Add support for Yahoo! Finance.

This commit is contained in:
Chris Berkhout 2021-06-01 19:03:32 +02:00
parent 925ed42b86
commit 10982b72d4
6 changed files with 133 additions and 15 deletions

View file

@ -4,11 +4,10 @@ import shutil
import sys
from datetime import datetime, timedelta
from pricehist import __version__, outputs, sources
from pricehist import __version__, logger, outputs, sources
from pricehist.fetch import fetch
from pricehist.format import Format
from pricehist.series import Series
from pricehist import logger
def cli(args=None, output_file=sys.stdout):
@ -34,7 +33,7 @@ def cli(args=None, output_file=sys.stdout):
print(result, file=output_file)
elif args.command == "source" and args.symbols:
result = sources.by_id[args.source].format_symbols()
print(result, file=output_file)
print(result, file=output_file, end="")
elif args.command == "source":
total_width = shutil.get_terminal_size().columns
result = sources.by_id[args.source].format_info(total_width)
@ -72,9 +71,6 @@ def build_parser():
if base == "":
msg = f"No base found in the requested pair '{s}'."
raise argparse.ArgumentTypeError(msg)
if quote == "":
msg = f"No quote found in the requested pair '{s}'."
raise argparse.ArgumentTypeError(msg)
return (base, quote)
def valid_date(s):

View file

@ -1,9 +1,8 @@
import hashlib
import logging
import re
from datetime import datetime
from importlib.resources import read_text
from decimal import Decimal
from importlib.resources import read_text
from pricehist import __version__
from pricehist.format import Format
@ -115,7 +114,7 @@ class GnuCashSQL(BaseOutput):
# - https://mariadb.com/kb/en/string-literals/
# - https://dev.mysql.com/doc/refman/8.0/en/string-literals.html
# - https://www.postgresql.org/docs/devel/sql-syntax-lexical.html
escaped = re.sub("'", "''", s)
escaped = s.replace("'", "''")
quoted = f"'{escaped}'"
return quoted

View file

@ -1,8 +1,11 @@
from .coindesk import CoinDesk
from .coinmarketcap import CoinMarketCap
from .ecb import ECB
from .yahoo import Yahoo
by_id = {source.id(): source for source in [CoinDesk(), CoinMarketCap(), ECB()]}
by_id = {
source.id(): source for source in [CoinDesk(), CoinMarketCap(), ECB(), Yahoo()]
}
def formatted():

View file

@ -51,9 +51,9 @@ class BaseSource(ABC):
def format_symbols(self) -> str:
symbols = self.symbols()
width = max([len(sym) for sym, desc in symbols])
lines = [sym.ljust(width + 4) + desc for sym, desc in symbols]
return "\n".join(lines)
width = max([len(sym) for sym, desc in symbols] + [0])
lines = [sym.ljust(width + 4) + desc + "\n" for sym, desc in symbols]
return "".join(lines)
def format_info(self, total_width=80) -> str:
k_width = 11
@ -82,7 +82,7 @@ class BaseSource(ABC):
first, *rest = value.split("\n")
first_output = wrapper.wrap(first)
wrapper.initial_indent = subsequent_indent
rest_output = sum([wrapper.wrap(line) if line else ["\n"] for line in rest], [])
rest_output = sum([wrapper.wrap(line) if line else [""] for line in rest], [])
output = "\n".join(first_output + rest_output)
if output != "":
return output

View file

@ -77,7 +77,7 @@ class CoinMarketCap(BaseSource):
params["convert"] = series.quote
params["time_start"] = int(
datetime.strptime(series.start, "%Y-%m-%d").timestamp()
int(datetime.strptime(series.start, "%Y-%m-%d").timestamp())
)
params["time_end"] = (
int(datetime.strptime(series.end, "%Y-%m-%d").timestamp()) + 24 * 60 * 60

View file

@ -0,0 +1,120 @@
import csv
import dataclasses
import json
import logging
from datetime import datetime
from decimal import Decimal
import requests
from pricehist.price import Price
from .basesource import BaseSource
class Yahoo(BaseSource):
def id(self):
return "yahoo"
def name(self):
return "Yahoo! Finance"
def description(self):
return (
"Historical data for most Yahoo! Finance symbols, "
"as available on the web page"
)
def source_url(self):
return "https://finance.yahoo.com/"
def start(self):
return "1970-01-01"
def types(self):
return ["adjclose", "open", "high", "low", "close", "mid"]
def notes(self):
return (
"Yahoo! Finance decommissioned its historical data API in 2017 but "
"some historical data is available via its web page, as described in: "
"https://help.yahoo.com/kb/"
"download-historical-data-yahoo-finance-sln2311.html\n"
f"{self._symbols_message()}\n"
"In output the base and quote will be the Yahoo! symbol and its "
"corresponding currency. Some symbols include the name of the quote "
"currency (e.g. BTC-USD), so you may wish to use --fmt-base to "
"remove the redundant information.\n"
"When a symbol's historical data is unavilable due to data licensing "
"restrictions, its web page will show no download button and "
"pricehist will only find the current day's price."
)
def _symbols_message(self):
return (
"Find the symbol of interest on https://finance.yahoo.com/ and use "
"that as the PAIR in your pricehist command. Prices for each symbol "
"are given in its native currency."
)
def symbols(self):
logging.info(self._symbols_message())
return []
def fetch(self, series):
spark, history = self._data(series)
output_quote = spark["spark"]["result"][0]["response"][0]["meta"]["currency"]
prices = [
Price(row["date"], amount)
for row in history
if (amount := self._amount(row, series.type))
]
return dataclasses.replace(series, quote=output_quote, prices=prices)
def _amount(self, row, type):
if type != "mid" and row[type] != "null":
return Decimal(row[type])
elif type == "mid" and row["high"] != "null" and row["low"] != "null":
return sum([Decimal(row["high"]), Decimal(row["low"])]) / 2
else:
return None
def _data(self, series) -> (dict, csv.DictReader):
base_url = "https://query1.finance.yahoo.com/v7/finance"
spark_url = f"{base_url}/spark"
spark_params = {
"symbols": series.base,
"range": "1d",
"interval": "1d",
"indicators": "close",
"includeTimestamps": "false",
"includePrePost": "false",
}
spark_response = self.log_curl(requests.get(spark_url, params=spark_params))
spark = json.loads(spark_response.content)
start_ts = int(datetime.strptime(series.start, "%Y-%m-%d").timestamp())
end_ts = int(datetime.strptime(series.end, "%Y-%m-%d").timestamp()) + (
24 * 60 * 60
) # round up to include the last day
history_url = f"{base_url}/download/{series.base}"
history_params = {
"period1": start_ts,
"period2": end_ts,
"interval": "1d",
"events": "history",
"includeAdjustedClose": "true",
}
history_response = self.log_curl(
requests.get(history_url, params=history_params)
)
history_lines = history_response.content.decode("utf-8").splitlines()
history_lines[0] = history_lines[0].lower().replace(" ", "")
history = csv.DictReader(history_lines, delimiter=",")
return (spark, history)