diff --git a/src/pricehist/sources/yahoo.py b/src/pricehist/sources/yahoo.py index 6997301..ce5cf7d 100644 --- a/src/pricehist/sources/yahoo.py +++ b/src/pricehist/sources/yahoo.py @@ -1,4 +1,3 @@ -import csv import dataclasses import json import logging @@ -71,63 +70,36 @@ class Yahoo(BaseSource): series.base, series.quote, self, "Don't specify the quote currency." ) - quote, history = self._data(series) + data = self._data(series) + quote = data["chart"]["result"][0]["meta"]["currency"] + + timestamps = data["chart"]["result"][0]["timestamp"] + adjclose_data = data["chart"]["result"][0]["indicators"]["adjclose"][0] + rest_data = data["chart"]["result"][0]["indicators"]["quote"][0] + amounts = {**adjclose_data, **rest_data} prices = [ - Price(row["date"], amount) - for row in history - if (amount := self._amount(row, series.type)) + Price(ts, amount) + for i in range(len(timestamps)) + if (ts := datetime.fromtimestamp(timestamps[i]).strftime("%Y-%m-%d")) + <= series.end + if (amount := self._amount(amounts, series.type, i)) is not None ] return dataclasses.replace(series, quote=quote, prices=prices) - def _amount(self, row, type): - if type == "mid" and row["high"] != "null" and row["low"] != "null": - return sum([Decimal(row["high"]), Decimal(row["low"])]) / 2 - elif row[type] != "null": - return Decimal(row[type]) + def _amount(self, amounts, type, i): + if type == "mid" and amounts["high"] != "null" and amounts["low"] != "null": + return sum([Decimal(amounts["high"][i]), Decimal(amounts["low"][i])]) / 2 + elif amounts[type] != "null": + return Decimal(amounts[type][i]) else: return None - def _data(self, series) -> (dict, csv.DictReader): - base_url = "https://query1.finance.yahoo.com/v7/finance" + def _data(self, series) -> dict: + base_url = "https://query1.finance.yahoo.com/v8/finance/chart" headers = {"User-Agent": f"pricehist/{__version__}"} - - spark_url = f"{base_url}/spark" - spark_params = { - "symbols": series.base, - "range": "1d", - "interval": "1d", - "indicators": "close", - "includeTimestamps": "false", - "includePrePost": "false", - } - try: - spark_response = self.log_curl( - requests.get(spark_url, params=spark_params, headers=headers) - ) - except Exception as e: - raise exceptions.RequestError(str(e)) from e - - code = spark_response.status_code - text = spark_response.text - if code == 404 and "No data found for spark symbols" in text: - raise exceptions.InvalidPair( - series.base, series.quote, self, "Symbol not found." - ) - - try: - spark_response.raise_for_status() - except Exception as e: - raise exceptions.BadResponse(str(e)) from e - - try: - spark = json.loads(spark_response.content) - quote = spark["spark"]["result"][0]["response"][0]["meta"]["currency"] - except Exception as e: - raise exceptions.ResponseParsingError( - "The spark data couldn't be parsed. " - ) from e + url = f"{base_url}/{series.base}" start_ts = int( datetime.strptime(series.start, "%Y-%m-%d") @@ -142,24 +114,26 @@ class Yahoo(BaseSource): 24 * 60 * 60 ) # some symbols require padding on the end timestamp - history_url = f"{base_url}/download/{series.base}" - history_params = { + params = { + "symbol": series.base, "period1": start_ts, "period2": end_ts, "interval": "1d", - "events": "history", + "events": "capitalGain%7Cdiv%7Csplit", "includeAdjustedClose": "true", + "formatted": "true", + "userYfid": "true", + "lang": "en-US", + "region": "US", } try: - history_response = self.log_curl( - requests.get(history_url, params=history_params, headers=headers) - ) + response = self.log_curl(requests.get(url, params=params, headers=headers)) except Exception as e: raise exceptions.RequestError(str(e)) from e - code = history_response.status_code - text = history_response.text + code = response.status_code + text = response.text if code == 404 and "No data found, symbol may be delisted" in text: raise exceptions.InvalidPair( @@ -177,20 +151,15 @@ class Yahoo(BaseSource): ) try: - history_response.raise_for_status() + response.raise_for_status() except Exception as e: raise exceptions.BadResponse(str(e)) from e try: - history_lines = history_response.content.decode("utf-8").splitlines() - history_lines[0] = history_lines[0].lower().replace(" ", "") - history = csv.DictReader(history_lines, delimiter=",") + data = json.loads(response.content) except Exception as e: - raise exceptions.ResponseParsingError(str(e)) from e + raise exceptions.ResponseParsingError( + "The data couldn't be parsed. " + ) from e - if history_lines[0] != "date,open,high,low,close,adjclose,volume": - raise exceptions.ResponseParsingError("Unexpected CSV format") - - requested_history = [row for row in history if row["date"] <= series.end] - - return (quote, requested_history) + return data diff --git a/tests/live.sh b/tests/live.sh index dd5f626..fc24c08 100755 --- a/tests/live.sh +++ b/tests/live.sh @@ -116,21 +116,20 @@ date,base,quote,amount,source,type 2021-01-07,BTC,EUR,31208.49,coinbasepro,mid 2021-01-08,BTC,EUR,32019,coinbasepro,mid END -run_test "$name" "$cmd" "$expected" - -name="CoinDesk Bitcoin Price Index" -cmd="pricehist fetch coindesk BTC/EUR -s 2021-01-04 -e 2021-01-08" -read -r -d '' expected < 9 -def test_fetch_skips_dates_with_nulls(src, type, spark_ok, date_with_nulls_ok): - series = src.fetch(Series("IBM", "", type, "2021-01-05", "2021-01-07")) - assert series.prices[0] == Price("2021-01-05", Decimal("123.101204")) - assert series.prices[1] == Price("2021-01-07", Decimal("125.882545")) - assert len(series.prices) == 2 - - -def test_fetch_to_future(src, type, spark_ok, recent_ok): +def test_fetch_to_future(src, type, recent_ok): series = src.fetch(Series("TSLA", "", type, "2021-01-04", "2100-01-08")) assert len(series.prices) > 0 -def test_fetch_no_data_in_past(src, type, spark_ok, requests_mock): +def test_fetch_no_data_in_past(src, type, requests_mock): requests_mock.add( responses.GET, - history_url("TSLA"), + url("TSLA"), status=400, body=( "400 Bad Request: Data doesn't exist for " @@ -203,10 +177,10 @@ def test_fetch_no_data_in_past(src, type, spark_ok, requests_mock): assert "No data for the given interval" in str(e.value) -def test_fetch_no_data_in_future(src, type, spark_ok, requests_mock): +def test_fetch_no_data_in_future(src, type, requests_mock): requests_mock.add( responses.GET, - history_url("TSLA"), + url("TSLA"), status=400, body=( "400 Bad Request: Data doesn't exist for " @@ -218,10 +192,10 @@ def test_fetch_no_data_in_future(src, type, spark_ok, requests_mock): assert "No data for the given interval" in str(e.value) -def test_fetch_no_data_on_weekend(src, type, spark_ok, requests_mock): +def test_fetch_no_data_on_weekend(src, type, requests_mock): requests_mock.add( responses.GET, - history_url("TSLA"), + url("TSLA"), status=404, body="404 Not Found: Timestamp data missing.", ) @@ -233,30 +207,7 @@ def test_fetch_no_data_on_weekend(src, type, spark_ok, requests_mock): def test_fetch_bad_sym(src, type, requests_mock): requests_mock.add( responses.GET, - spark_url, - status=404, - body="""{ - "spark": { - "result": null, - "error": { - "code": "Not Found", - "description": "No data found for spark symbols" - } - } - }""", - ) - with pytest.raises(exceptions.InvalidPair) as e: - src.fetch(Series("NOTABASE", "", type, "2021-01-04", "2021-01-08")) - assert "Symbol not found" in str(e.value) - - -def test_fetch_bad_sym_history(src, type, spark_ok, requests_mock): - # In practice the spark history requests should succeed or fail together. - # This extra test ensures that a failure of the the history part is handled - # correctly even if the spark part succeeds. - requests_mock.add( - responses.GET, - history_url("NOTABASE"), + url("NOTABASE"), status=404, body="404 Not Found: No data found, symbol may be delisted", ) @@ -271,61 +222,23 @@ def test_fetch_giving_quote(src, type): assert "quote currency" in str(e.value) -def test_fetch_spark_network_issue(src, type, requests_mock): +def test_fetch_network_issue(src, type, requests_mock): body = requests.exceptions.ConnectionError("Network issue") - requests_mock.add(responses.GET, spark_url, body=body) + requests_mock.add(responses.GET, url("TSLA"), body=body) with pytest.raises(exceptions.RequestError) as e: src.fetch(Series("TSLA", "", type, "2021-01-04", "2021-01-08")) assert "Network issue" in str(e.value) -def test_fetch_spark_bad_status(src, type, requests_mock): - requests_mock.add(responses.GET, spark_url, status=500, body="Some other reason") +def test_fetch_bad_status(src, type, requests_mock): + requests_mock.add(responses.GET, url("TSLA"), status=500, body="Some other reason") with pytest.raises(exceptions.BadResponse) as e: src.fetch(Series("TSLA", "", type, "2021-01-04", "2021-01-08")) assert "Internal Server Error" in str(e.value) -def test_fetch_spark_parsing_error(src, type, requests_mock): - requests_mock.add(responses.GET, spark_url, body="NOT JSON") - with pytest.raises(exceptions.ResponseParsingError) as e: - src.fetch(Series("TSLA", "", type, "2021-01-04", "2021-01-08")) - assert "spark data couldn't be parsed" in str(e.value) - - -def test_fetch_spark_unexpected_json(src, type, requests_mock): - requests_mock.add(responses.GET, spark_url, body='{"notdata": []}') - with pytest.raises(exceptions.ResponseParsingError) as e: - src.fetch(Series("TSLA", "", type, "2021-01-04", "2021-01-08")) - assert "spark data couldn't be parsed" in str(e.value) - - -def test_fetch_history_network_issue(src, type, spark_ok, requests_mock): - body = requests.exceptions.ConnectionError("Network issue") - requests_mock.add(responses.GET, history_url("TSLA"), body=body) - with pytest.raises(exceptions.RequestError) as e: - src.fetch(Series("TSLA", "", type, "2021-01-04", "2021-01-08")) - assert "Network issue" in str(e.value) - - -def test_fetch_history_bad_status(src, type, spark_ok, requests_mock): - requests_mock.add( - responses.GET, history_url("TSLA"), status=500, body="Some other reason" - ) - with pytest.raises(exceptions.BadResponse) as e: - src.fetch(Series("TSLA", "", type, "2021-01-04", "2021-01-08")) - assert "Internal Server Error" in str(e.value) - - -def test_fetch_history_parsing_error(src, type, spark_ok, requests_mock): - requests_mock.add(responses.GET, history_url("TSLA"), body="") +def test_fetch_parsing_error(src, type, requests_mock): + requests_mock.add(responses.GET, url("TSLA"), body="") with pytest.raises(exceptions.ResponseParsingError) as e: src.fetch(Series("TSLA", "", type, "2021-01-04", "2021-01-08")) assert "error occurred while parsing data from the source" in str(e.value) - - -def test_fetch_history_unexpected_csv_format(src, type, spark_ok, requests_mock): - requests_mock.add(responses.GET, history_url("TSLA"), body="BAD HEADER\nBAD DATA") - with pytest.raises(exceptions.ResponseParsingError) as e: - src.fetch(Series("TSLA", "", type, "2021-01-04", "2021-01-08")) - assert "Unexpected CSV format" in str(e.value) diff --git a/tests/pricehist/sources/test_yahoo/ibm-date-with-nulls.csv b/tests/pricehist/sources/test_yahoo/ibm-date-with-nulls.csv deleted file mode 100644 index 601b395..0000000 --- a/tests/pricehist/sources/test_yahoo/ibm-date-with-nulls.csv +++ /dev/null @@ -1,4 +0,0 @@ -Date,Open,High,Low,Close,Adj Close,Volume -2021-01-05,125.010002,126.680000,124.610001,126.139999,123.101204,6114600 -2021-01-06,null,null,null,null,null,null -2021-01-07,130.039993,130.460007,128.259995,128.990005,125.882545,4507400 diff --git a/tests/pricehist/sources/test_yahoo/ibm-long-partial.csv b/tests/pricehist/sources/test_yahoo/ibm-long-partial.csv deleted file mode 100644 index 98149ad..0000000 --- a/tests/pricehist/sources/test_yahoo/ibm-long-partial.csv +++ /dev/null @@ -1,11 +0,0 @@ -Date,Open,High,Low,Close,Adj Close,Volume -1962-01-02,7.713333,7.713333,7.626667,7.626667,1.837710,390000 -1962-01-03,7.626667,7.693333,7.626667,7.693333,1.853774,292500 -1962-01-04,7.693333,7.693333,7.613333,7.616667,1.835299,262500 -1962-01-05,7.606667,7.606667,7.453333,7.466667,1.799155,367500 -1962-01-08,7.460000,7.460000,7.266667,7.326667,1.765422,547500 -2021-01-04,125.849998,125.919998,123.040001,123.940002,120.954201,5179200 -2021-01-05,125.010002,126.680000,124.610001,126.139999,123.101204,6114600 -2021-01-06,126.900002,131.880005,126.720001,129.289993,126.175316,7956700 -2021-01-07,130.039993,130.460007,128.259995,128.990005,125.882545,4507400 -2021-01-08,128.570007,129.320007,126.980003,128.529999,125.433624,4676200 diff --git a/tests/pricehist/sources/test_yahoo/ibm-long-partial.json b/tests/pricehist/sources/test_yahoo/ibm-long-partial.json new file mode 100644 index 0000000..df98efa --- /dev/null +++ b/tests/pricehist/sources/test_yahoo/ibm-long-partial.json @@ -0,0 +1,249 @@ +{ + "chart": { + "result": [ + { + "meta": { + "currency": "USD", + "symbol": "IBM", + "exchangeName": "NYQ", + "fullExchangeName": "NYSE", + "instrumentType": "EQUITY", + "firstTradeDate": -252322200, + "regularMarketTime": 1726257602, + "hasPrePostMarketData": true, + "gmtoffset": -14400, + "timezone": "EDT", + "exchangeTimezoneName": "America/New_York", + "regularMarketPrice": 214.79, + "fiftyTwoWeekHigh": 216.08, + "fiftyTwoWeekLow": 212.13, + "regularMarketDayHigh": 216.08, + "regularMarketDayLow": 212.13, + "regularMarketVolume": 4553547, + "longName": "International Business Machines Corporation", + "shortName": "International Business Machines", + "chartPreviousClose": 7.291, + "priceHint": 2, + "currentTradingPeriod": { + "pre": { + "timezone": "EDT", + "end": 1726234200, + "start": 1726214400, + "gmtoffset": -14400 + }, + "regular": { + "timezone": "EDT", + "end": 1726257600, + "start": 1726234200, + "gmtoffset": -14400 + }, + "post": { + "timezone": "EDT", + "end": 1726272000, + "start": 1726257600, + "gmtoffset": -14400 + } + }, + "dataGranularity": "1d", + "range": "", + "validRanges": [ + "1d", + "5d", + "1mo", + "3mo", + "6mo", + "1y", + "2y", + "5y", + "10y", + "ytd", + "max" + ] + }, + "timestamp": [ + -252322200, + -252235800, + -252149400, + -252063000, + -251803800, + 1609770600, + 1609857000, + 1609943400, + 1610029800, + 1610116200 + ], + "events": { + "dividends": { + "-249298200": { + "amount": 0.000956, + "date": -249298200 + }, + "-241439400": { + "amount": 0.000956, + "date": -241439400 + }, + "-233577000": { + "amount": 0.000956, + "date": -233577000 + }, + "-225797400": { + "amount": 0.000956, + "date": -225797400 + }, + "-217848600": { + "amount": 0.001275, + "date": -217848600 + }, + "1573137000": { + "amount": 1.548757, + "date": 1573137000 + }, + "1581085800": { + "amount": 1.548757, + "date": 1581085800 + }, + "1588858200": { + "amount": 1.558317, + "date": 1588858200 + }, + "1596807000": { + "amount": 1.558317, + "date": 1596807000 + }, + "1604932200": { + "amount": 1.558317, + "date": 1604932200 + } + }, + "splits": { + "-177417000": { + "date": -177417000, + "numerator": 5.0, + "denominator": 4.0, + "splitRatio": "5:4" + }, + "-114345000": { + "date": -114345000, + "numerator": 3.0, + "denominator": 2.0, + "splitRatio": "3:2" + }, + "-53343000": { + "date": -53343000, + "numerator": 2.0, + "denominator": 1.0, + "splitRatio": "2:1" + }, + "107530200": { + "date": 107530200, + "numerator": 5.0, + "denominator": 4.0, + "splitRatio": "5:4" + }, + "297091800": { + "date": 297091800, + "numerator": 4.0, + "denominator": 1.0, + "splitRatio": "4:1" + }, + "864826200": { + "date": 864826200, + "numerator": 2.0, + "denominator": 1.0, + "splitRatio": "2:1" + }, + "927811800": { + "date": 927811800, + "numerator": 2.0, + "denominator": 1.0, + "splitRatio": "2:1" + } + } + }, + "indicators": { + "quote": [ + { + "close": [ + 7.2912678718566895, + 7.3550028800964355, + 7.281707763671875, + 7.138305187225342, + 7.00446081161499, + 118.48948669433594, + 120.59273529052734, + 123.60420989990234, + 123.31739807128906, + 122.87763214111328 + ], + "low": [ + 7.2912678718566895, + 7.2912678718566895, + 7.2785210609436035, + 7.125557899475098, + 6.9471001625061035, + 117.62906646728516, + 119.13002014160156, + 121.14722442626953, + 122.61949920654297, + 121.39579010009766 + ], + "open": [ + 7.374124050140381, + 7.2912678718566895, + 7.3550028800964355, + 7.272148132324219, + 7.131930828094482, + 120.31549072265625, + 119.5124282836914, + 121.3193130493164, + 124.32122039794922, + 122.9158706665039 + ], + "high": [ + 7.374124050140381, + 7.3550028800964355, + 7.3550028800964355, + 7.272148132324219, + 7.131930828094482, + 120.38240814208984, + 121.1089859008789, + 126.08030700683594, + 124.7227554321289, + 123.63288879394531 + ], + "volume": [ + 407940, + 305955, + 274575, + 384405, + 572685, + 5417443, + 6395872, + 8322708, + 4714740, + 4891305 + ] + } + ], + "adjclose": [ + { + "adjclose": [ + 1.5133211612701416, + 1.5265485048294067, + 1.5113375186920166, + 1.4815733432769775, + 1.4537923336029053, + 99.60364532470703, + 101.37164306640625, + 103.90313720703125, + 103.66202545166016, + 103.29237365722656 + ] + } + ] + } + } + ], + "error": null + } +} diff --git a/tests/pricehist/sources/test_yahoo/tsla-recent.csv b/tests/pricehist/sources/test_yahoo/tsla-recent.csv deleted file mode 100644 index 48b5692..0000000 --- a/tests/pricehist/sources/test_yahoo/tsla-recent.csv +++ /dev/null @@ -1,6 +0,0 @@ -Date,Open,High,Low,Close,Adj Close,Volume -2021-01-04,719.460022,744.489990,717.190002,729.770020,729.770020,48638200 -2021-01-05,723.659973,740.840027,719.200012,735.109985,735.109985,32245200 -2021-01-06,758.489990,774.000000,749.099976,755.979980,755.979980,44700000 -2021-01-07,777.630005,816.989990,775.200012,816.039978,816.039978,51498900 -2021-01-08,856.000000,884.489990,838.390015,880.020020,880.020020,75055500 \ No newline at end of file diff --git a/tests/pricehist/sources/test_yahoo/tsla-recent.json b/tests/pricehist/sources/test_yahoo/tsla-recent.json new file mode 100644 index 0000000..3f35daa --- /dev/null +++ b/tests/pricehist/sources/test_yahoo/tsla-recent.json @@ -0,0 +1,126 @@ +{ + "chart": { + "result": [ + { + "meta": { + "currency": "USD", + "symbol": "TSLA", + "exchangeName": "NMS", + "fullExchangeName": "NasdaqGS", + "instrumentType": "EQUITY", + "firstTradeDate": 1277818200, + "regularMarketTime": 1726257600, + "hasPrePostMarketData": true, + "gmtoffset": -14400, + "timezone": "EDT", + "exchangeTimezoneName": "America/New_York", + "regularMarketPrice": 230.29, + "fiftyTwoWeekHigh": 232.664, + "fiftyTwoWeekLow": 226.32, + "regularMarketDayHigh": 232.664, + "regularMarketDayLow": 226.32, + "regularMarketVolume": 59096538, + "longName": "Tesla, Inc.", + "shortName": "Tesla, Inc.", + "chartPreviousClose": 235.223, + "priceHint": 2, + "currentTradingPeriod": { + "pre": { + "timezone": "EDT", + "start": 1726214400, + "end": 1726234200, + "gmtoffset": -14400 + }, + "regular": { + "timezone": "EDT", + "start": 1726234200, + "end": 1726257600, + "gmtoffset": -14400 + }, + "post": { + "timezone": "EDT", + "start": 1726257600, + "end": 1726272000, + "gmtoffset": -14400 + } + }, + "dataGranularity": "1d", + "range": "", + "validRanges": [ + "1d", + "5d", + "1mo", + "3mo", + "6mo", + "1y", + "2y", + "5y", + "10y", + "ytd", + "max" + ] + }, + "timestamp": [ + 1609770600, + 1609857000, + 1609943400, + 1610029800, + 1610116200 + ], + "indicators": { + "quote": [ + { + "open": [ + 239.82000732421875, + 241.22000122070312, + 252.8300018310547, + 259.2099914550781, + 285.3333435058594 + ], + "close": [ + 243.2566680908203, + 245.0366668701172, + 251.9933319091797, + 272.0133361816406, + 293.3399963378906 + ], + "high": [ + 248.163330078125, + 246.94667053222656, + 258.0, + 272.3299865722656, + 294.8299865722656 + ], + "low": [ + 239.06333923339844, + 239.73333740234375, + 249.6999969482422, + 258.3999938964844, + 279.46331787109375 + ], + "volume": [ + 145914600, + 96735600, + 134100000, + 154496700, + 225166500 + ] + } + ], + "adjclose": [ + { + "adjclose": [ + 243.2566680908203, + 245.0366668701172, + 251.9933319091797, + 272.0133361816406, + 293.3399963378906 + ] + } + ] + } + } + ], + "error": null + } +} diff --git a/tests/pricehist/sources/test_yahoo/tsla-spark.json b/tests/pricehist/sources/test_yahoo/tsla-spark.json deleted file mode 100644 index 53e7585..0000000 --- a/tests/pricehist/sources/test_yahoo/tsla-spark.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "spark": { - "result": [ - { - "symbol": "TSLA", - "response": [ - { - "meta": { - "currency": "USD", - "symbol": "TSLA", - "exchangeName": "NMS", - "instrumentType": "EQUITY", - "firstTradeDate": 1277818200, - "regularMarketTime": 1626465603, - "gmtoffset": -14400, - "timezone": "EDT", - "exchangeTimezoneName": "America/New_York", - "regularMarketPrice": 644.22, - "chartPreviousClose": 650.6, - "priceHint": 2, - "currentTradingPeriod": { - "pre": { - "timezone": "EDT", - "start": 1626422400, - "end": 1626442200, - "gmtoffset": -14400 - }, - "regular": { - "timezone": "EDT", - "start": 1626442200, - "end": 1626465600, - "gmtoffset": -14400 - }, - "post": { - "timezone": "EDT", - "start": 1626465600, - "end": 1626480000, - "gmtoffset": -14400 - } - }, - "dataGranularity": "1d", - "range": "1d", - "validRanges": [ - "1d", - "5d", - "1mo", - "3mo", - "6mo", - "1y", - "2y", - "5y", - "10y", - "ytd", - "max" - ] - }, - "timestamp": [ - 1626442200, - 1626465603 - ], - "indicators": { - "quote": [ - { - "close": [ - 644.22, - 644.22 - ] - } - ] - } - } - ] - } - ], - "error": null - } -}