Add support for Yahoo! Finance.

2021-06-01 19:03:32 +02:00 · 2021-06-01 19:03:32 +02:00 · 10982b72d4
commit 10982b72d4
parent 925ed42b86
6 changed files with 133 additions and 15 deletions
--- a/src/pricehist/cli.py
+++ b/src/pricehist/cli.py
@ -4,11 +4,10 @@ import shutil
 import sys
 from datetime import datetime, timedelta

-from pricehist import __version__, outputs, sources
+from pricehist import __version__, logger, outputs, sources
 from pricehist.fetch import fetch
 from pricehist.format import Format
 from pricehist.series import Series
-from pricehist import logger


 def cli(args=None, output_file=sys.stdout):
@ -34,7 +33,7 @@ def cli(args=None, output_file=sys.stdout):
            print(result, file=output_file)
        elif args.command == "source" and args.symbols:
            result = sources.by_id[args.source].format_symbols()
-            print(result, file=output_file)
+            print(result, file=output_file, end="")
        elif args.command == "source":
            total_width = shutil.get_terminal_size().columns
            result = sources.by_id[args.source].format_info(total_width)
@ -72,9 +71,6 @@ def build_parser():
        if base == "":
            msg = f"No base found in the requested pair '{s}'."
            raise argparse.ArgumentTypeError(msg)
-        if quote == "":
-            msg = f"No quote found in the requested pair '{s}'."
-            raise argparse.ArgumentTypeError(msg)
        return (base, quote)

    def valid_date(s):
--- a/src/pricehist/outputs/gnucashsql.py
+++ b/src/pricehist/outputs/gnucashsql.py
@ -1,9 +1,8 @@
 import hashlib
 import logging
-import re
 from datetime import datetime
-from importlib.resources import read_text
 from decimal import Decimal
+from importlib.resources import read_text

 from pricehist import __version__
 from pricehist.format import Format
@ -115,7 +114,7 @@ class GnuCashSQL(BaseOutput):
        # - https://mariadb.com/kb/en/string-literals/
        # - https://dev.mysql.com/doc/refman/8.0/en/string-literals.html
        # - https://www.postgresql.org/docs/devel/sql-syntax-lexical.html
-        escaped = re.sub("'", "''", s)
+        escaped = s.replace("'", "''")
        quoted = f"'{escaped}'"
        return quoted

--- a/src/pricehist/sources/init.py
+++ b/src/pricehist/sources/init.py
@ -1,8 +1,11 @@
 from .coindesk import CoinDesk
 from .coinmarketcap import CoinMarketCap
 from .ecb import ECB
+from .yahoo import Yahoo

-by_id = {source.id(): source for source in [CoinDesk(), CoinMarketCap(), ECB()]}
+by_id = {
+    source.id(): source for source in [CoinDesk(), CoinMarketCap(), ECB(), Yahoo()]
+}


 def formatted():
--- a/src/pricehist/sources/basesource.py
+++ b/src/pricehist/sources/basesource.py
@ -51,9 +51,9 @@ class BaseSource(ABC):

    def format_symbols(self) -> str:
        symbols = self.symbols()
-        width = max([len(sym) for sym, desc in symbols])
-        lines = [sym.ljust(width + 4) + desc for sym, desc in symbols]
-        return "\n".join(lines)
+        width = max([len(sym) for sym, desc in symbols] + [0])
+        lines = [sym.ljust(width + 4) + desc + "\n" for sym, desc in symbols]
+        return "".join(lines)

    def format_info(self, total_width=80) -> str:
        k_width = 11
@ -82,7 +82,7 @@ class BaseSource(ABC):
        first, *rest = value.split("\n")
        first_output = wrapper.wrap(first)
        wrapper.initial_indent = subsequent_indent
-        rest_output = sum([wrapper.wrap(line) if line else ["\n"] for line in rest], [])
+        rest_output = sum([wrapper.wrap(line) if line else [""] for line in rest], [])
        output = "\n".join(first_output + rest_output)
        if output != "":
            return output
--- a/src/pricehist/sources/coinmarketcap.py
+++ b/src/pricehist/sources/coinmarketcap.py
@ -77,7 +77,7 @@ class CoinMarketCap(BaseSource):
            params["convert"] = series.quote

        params["time_start"] = int(
-            datetime.strptime(series.start, "%Y-%m-%d").timestamp()
+            int(datetime.strptime(series.start, "%Y-%m-%d").timestamp())
        )
        params["time_end"] = (
            int(datetime.strptime(series.end, "%Y-%m-%d").timestamp()) + 24 * 60 * 60
--- a/src/pricehist/sources/yahoo.py
+++ b/src/pricehist/sources/yahoo.py
@ -0,0 +1,120 @@
+import csv
+import dataclasses
+import json
+import logging
+from datetime import datetime
+from decimal import Decimal
+
+import requests
+
+from pricehist.price import Price
+
+from .basesource import BaseSource
+
+
+class Yahoo(BaseSource):
+    def id(self):
+        return "yahoo"
+
+    def name(self):
+        return "Yahoo! Finance"
+
+    def description(self):
+        return (
+            "Historical data for most Yahoo! Finance symbols, "
+            "as available on the web page"
+        )
+
+    def source_url(self):
+        return "https://finance.yahoo.com/"
+
+    def start(self):
+        return "1970-01-01"
+
+    def types(self):
+        return ["adjclose", "open", "high", "low", "close", "mid"]
+
+    def notes(self):
+        return (
+            "Yahoo! Finance decommissioned its historical data API in 2017 but "
+            "some historical data is available via its web page, as described in: "
+            "https://help.yahoo.com/kb/"
+            "download-historical-data-yahoo-finance-sln2311.html\n"
+            f"{self._symbols_message()}\n"
+            "In output the base and quote will be the Yahoo! symbol and its "
+            "corresponding currency. Some symbols include the name of the quote "
+            "currency (e.g. BTC-USD), so you may wish to use --fmt-base to "
+            "remove the redundant information.\n"
+            "When a symbol's historical data is unavilable due to data licensing "
+            "restrictions, its web page will show no download button and "
+            "pricehist will only find the current day's price."
+        )
+
+    def _symbols_message(self):
+        return (
+            "Find the symbol of interest on https://finance.yahoo.com/ and use "
+            "that as the PAIR in your pricehist command. Prices for each symbol "
+            "are given in its native currency."
+        )
+
+    def symbols(self):
+        logging.info(self._symbols_message())
+        return []
+
+    def fetch(self, series):
+        spark, history = self._data(series)
+
+        output_quote = spark["spark"]["result"][0]["response"][0]["meta"]["currency"]
+
+        prices = [
+            Price(row["date"], amount)
+            for row in history
+            if (amount := self._amount(row, series.type))
+        ]
+
+        return dataclasses.replace(series, quote=output_quote, prices=prices)
+
+    def _amount(self, row, type):
+        if type != "mid" and row[type] != "null":
+            return Decimal(row[type])
+        elif type == "mid" and row["high"] != "null" and row["low"] != "null":
+            return sum([Decimal(row["high"]), Decimal(row["low"])]) / 2
+        else:
+            return None
+
+    def _data(self, series) -> (dict, csv.DictReader):
+        base_url = "https://query1.finance.yahoo.com/v7/finance"
+
+        spark_url = f"{base_url}/spark"
+        spark_params = {
+            "symbols": series.base,
+            "range": "1d",
+            "interval": "1d",
+            "indicators": "close",
+            "includeTimestamps": "false",
+            "includePrePost": "false",
+        }
+        spark_response = self.log_curl(requests.get(spark_url, params=spark_params))
+        spark = json.loads(spark_response.content)
+
+        start_ts = int(datetime.strptime(series.start, "%Y-%m-%d").timestamp())
+        end_ts = int(datetime.strptime(series.end, "%Y-%m-%d").timestamp()) + (
+            24 * 60 * 60
+        )  # round up to include the last day
+
+        history_url = f"{base_url}/download/{series.base}"
+        history_params = {
+            "period1": start_ts,
+            "period2": end_ts,
+            "interval": "1d",
+            "events": "history",
+            "includeAdjustedClose": "true",
+        }
+        history_response = self.log_curl(
+            requests.get(history_url, params=history_params)
+        )
+        history_lines = history_response.content.decode("utf-8").splitlines()
+        history_lines[0] = history_lines[0].lower().replace(" ", "")
+        history = csv.DictReader(history_lines, delimiter=",")
+
+        return (spark, history)