diff --git a/src/pricehist/cli.py b/src/pricehist/cli.py index 274b073..2f1ce5d 100644 --- a/src/pricehist/cli.py +++ b/src/pricehist/cli.py @@ -4,11 +4,10 @@ import shutil import sys from datetime import datetime, timedelta -from pricehist import __version__, outputs, sources +from pricehist import __version__, logger, outputs, sources from pricehist.fetch import fetch from pricehist.format import Format from pricehist.series import Series -from pricehist import logger def cli(args=None, output_file=sys.stdout): @@ -34,7 +33,7 @@ def cli(args=None, output_file=sys.stdout): print(result, file=output_file) elif args.command == "source" and args.symbols: result = sources.by_id[args.source].format_symbols() - print(result, file=output_file) + print(result, file=output_file, end="") elif args.command == "source": total_width = shutil.get_terminal_size().columns result = sources.by_id[args.source].format_info(total_width) @@ -72,9 +71,6 @@ def build_parser(): if base == "": msg = f"No base found in the requested pair '{s}'." raise argparse.ArgumentTypeError(msg) - if quote == "": - msg = f"No quote found in the requested pair '{s}'." - raise argparse.ArgumentTypeError(msg) return (base, quote) def valid_date(s): diff --git a/src/pricehist/outputs/gnucashsql.py b/src/pricehist/outputs/gnucashsql.py index 005d0ad..0d6b3e0 100644 --- a/src/pricehist/outputs/gnucashsql.py +++ b/src/pricehist/outputs/gnucashsql.py @@ -1,9 +1,8 @@ import hashlib import logging -import re from datetime import datetime -from importlib.resources import read_text from decimal import Decimal +from importlib.resources import read_text from pricehist import __version__ from pricehist.format import Format @@ -115,7 +114,7 @@ class GnuCashSQL(BaseOutput): # - https://mariadb.com/kb/en/string-literals/ # - https://dev.mysql.com/doc/refman/8.0/en/string-literals.html # - https://www.postgresql.org/docs/devel/sql-syntax-lexical.html - escaped = re.sub("'", "''", s) + escaped = s.replace("'", "''") quoted = f"'{escaped}'" return quoted diff --git a/src/pricehist/sources/__init__.py b/src/pricehist/sources/__init__.py index 54d4bd8..b133b34 100644 --- a/src/pricehist/sources/__init__.py +++ b/src/pricehist/sources/__init__.py @@ -1,8 +1,11 @@ from .coindesk import CoinDesk from .coinmarketcap import CoinMarketCap from .ecb import ECB +from .yahoo import Yahoo -by_id = {source.id(): source for source in [CoinDesk(), CoinMarketCap(), ECB()]} +by_id = { + source.id(): source for source in [CoinDesk(), CoinMarketCap(), ECB(), Yahoo()] +} def formatted(): diff --git a/src/pricehist/sources/basesource.py b/src/pricehist/sources/basesource.py index c8bbca8..fd9aefe 100644 --- a/src/pricehist/sources/basesource.py +++ b/src/pricehist/sources/basesource.py @@ -51,9 +51,9 @@ class BaseSource(ABC): def format_symbols(self) -> str: symbols = self.symbols() - width = max([len(sym) for sym, desc in symbols]) - lines = [sym.ljust(width + 4) + desc for sym, desc in symbols] - return "\n".join(lines) + width = max([len(sym) for sym, desc in symbols] + [0]) + lines = [sym.ljust(width + 4) + desc + "\n" for sym, desc in symbols] + return "".join(lines) def format_info(self, total_width=80) -> str: k_width = 11 @@ -82,7 +82,7 @@ class BaseSource(ABC): first, *rest = value.split("\n") first_output = wrapper.wrap(first) wrapper.initial_indent = subsequent_indent - rest_output = sum([wrapper.wrap(line) if line else ["\n"] for line in rest], []) + rest_output = sum([wrapper.wrap(line) if line else [""] for line in rest], []) output = "\n".join(first_output + rest_output) if output != "": return output diff --git a/src/pricehist/sources/coinmarketcap.py b/src/pricehist/sources/coinmarketcap.py index 4a06bc3..3b4626f 100644 --- a/src/pricehist/sources/coinmarketcap.py +++ b/src/pricehist/sources/coinmarketcap.py @@ -77,7 +77,7 @@ class CoinMarketCap(BaseSource): params["convert"] = series.quote params["time_start"] = int( - datetime.strptime(series.start, "%Y-%m-%d").timestamp() + int(datetime.strptime(series.start, "%Y-%m-%d").timestamp()) ) params["time_end"] = ( int(datetime.strptime(series.end, "%Y-%m-%d").timestamp()) + 24 * 60 * 60 diff --git a/src/pricehist/sources/yahoo.py b/src/pricehist/sources/yahoo.py new file mode 100644 index 0000000..72d0b4f --- /dev/null +++ b/src/pricehist/sources/yahoo.py @@ -0,0 +1,120 @@ +import csv +import dataclasses +import json +import logging +from datetime import datetime +from decimal import Decimal + +import requests + +from pricehist.price import Price + +from .basesource import BaseSource + + +class Yahoo(BaseSource): + def id(self): + return "yahoo" + + def name(self): + return "Yahoo! Finance" + + def description(self): + return ( + "Historical data for most Yahoo! Finance symbols, " + "as available on the web page" + ) + + def source_url(self): + return "https://finance.yahoo.com/" + + def start(self): + return "1970-01-01" + + def types(self): + return ["adjclose", "open", "high", "low", "close", "mid"] + + def notes(self): + return ( + "Yahoo! Finance decommissioned its historical data API in 2017 but " + "some historical data is available via its web page, as described in: " + "https://help.yahoo.com/kb/" + "download-historical-data-yahoo-finance-sln2311.html\n" + f"{self._symbols_message()}\n" + "In output the base and quote will be the Yahoo! symbol and its " + "corresponding currency. Some symbols include the name of the quote " + "currency (e.g. BTC-USD), so you may wish to use --fmt-base to " + "remove the redundant information.\n" + "When a symbol's historical data is unavilable due to data licensing " + "restrictions, its web page will show no download button and " + "pricehist will only find the current day's price." + ) + + def _symbols_message(self): + return ( + "Find the symbol of interest on https://finance.yahoo.com/ and use " + "that as the PAIR in your pricehist command. Prices for each symbol " + "are given in its native currency." + ) + + def symbols(self): + logging.info(self._symbols_message()) + return [] + + def fetch(self, series): + spark, history = self._data(series) + + output_quote = spark["spark"]["result"][0]["response"][0]["meta"]["currency"] + + prices = [ + Price(row["date"], amount) + for row in history + if (amount := self._amount(row, series.type)) + ] + + return dataclasses.replace(series, quote=output_quote, prices=prices) + + def _amount(self, row, type): + if type != "mid" and row[type] != "null": + return Decimal(row[type]) + elif type == "mid" and row["high"] != "null" and row["low"] != "null": + return sum([Decimal(row["high"]), Decimal(row["low"])]) / 2 + else: + return None + + def _data(self, series) -> (dict, csv.DictReader): + base_url = "https://query1.finance.yahoo.com/v7/finance" + + spark_url = f"{base_url}/spark" + spark_params = { + "symbols": series.base, + "range": "1d", + "interval": "1d", + "indicators": "close", + "includeTimestamps": "false", + "includePrePost": "false", + } + spark_response = self.log_curl(requests.get(spark_url, params=spark_params)) + spark = json.loads(spark_response.content) + + start_ts = int(datetime.strptime(series.start, "%Y-%m-%d").timestamp()) + end_ts = int(datetime.strptime(series.end, "%Y-%m-%d").timestamp()) + ( + 24 * 60 * 60 + ) # round up to include the last day + + history_url = f"{base_url}/download/{series.base}" + history_params = { + "period1": start_ts, + "period2": end_ts, + "interval": "1d", + "events": "history", + "includeAdjustedClose": "true", + } + history_response = self.log_curl( + requests.get(history_url, params=history_params) + ) + history_lines = history_response.content.decode("utf-8").splitlines() + history_lines[0] = history_lines[0].lower().replace(" ", "") + history = csv.DictReader(history_lines, delimiter=",") + + return (spark, history)