diff --git a/poetry.lock b/poetry.lock index 33e32d9..d947d1d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -82,6 +82,14 @@ category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +[[package]] +name = "cssselect" +version = "1.1.0" +description = "cssselect parses CSS3 Selectors and translates them to XPath 1.0" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + [[package]] name = "flake8" version = "3.9.1" @@ -303,7 +311,7 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] [metadata] lock-version = "1.1" python-versions = "^3.9" -content-hash = "d43dc5e9f04655ee9faaa2e1fc46c006de7509945c4a0e6c4ea3990081f17d4e" +content-hash = "8674071f4ee6a2bd084bc4aa02e2d3abaca72eeba749ee5ac67de9a8001ac8db" [metadata.files] appdirs = [ @@ -337,6 +345,10 @@ colorama = [ {file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"}, {file = "colorama-0.4.4.tar.gz", hash = "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b"}, ] +cssselect = [ + {file = "cssselect-1.1.0-py2.py3-none-any.whl", hash = "sha256:f612ee47b749c877ebae5bb77035d8f4202c6ad0f0fc1271b3c18ad6c4468ecf"}, + {file = "cssselect-1.1.0.tar.gz", hash = "sha256:f95f8dedd925fd8f54edb3d2dfb44c190d9d18512377d3c1e2388d16126879bc"}, +] flake8 = [ {file = "flake8-3.9.1-py2.py3-none-any.whl", hash = "sha256:3b9f848952dddccf635be78098ca75010f073bfe14d2c6bda867154bea728d2a"}, {file = "flake8-3.9.1.tar.gz", hash = "sha256:1aa8990be1e689d96c745c5682b687ea49f2e05a443aff1f8251092b0014e378"}, diff --git a/pyproject.toml b/pyproject.toml index d23ab68..038f363 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,6 +9,7 @@ license = "MIT" python = "^3.9" requests = "^2.25.1" lxml = "^4.6.2" +cssselect = "^1.1.0" [tool.poetry.dev-dependencies] pytest = "^6.2.2" diff --git a/src/pricehist/sources/ecb.py b/src/pricehist/sources/ecb.py index c2df737..034c7e6 100644 --- a/src/pricehist/sources/ecb.py +++ b/src/pricehist/sources/ecb.py @@ -1,6 +1,7 @@ from datetime import datetime, timedelta from decimal import Decimal -from xml.etree import ElementTree +from lxml import etree +from lxml.cssselect import CSSSelector import requests @@ -90,20 +91,17 @@ class ECB: response = requests.get(source_url) data = response.content - # TODO consider changing from xml.etree to lxml - root = ElementTree.fromstring(data) - namespaces = { - "default": "http://www.ecb.int/vocabulary/2002-08-01/eurofxref", - "gesmes": "http://www.gesmes.org/xml/2002-08-01", - } + root = etree.fromstring(data) + ns = {"default": "http://www.ecb.int/vocabulary/2002-08-01/eurofxref"} + all_rows = [] - for day in root.find("default:Cube", namespaces): + for day in CSSSelector("default|Cube[time]", ns)(root): date = day.attrib["time"] - rate_xpath = f"./*[@currency='{quote}']" # TODO what if it's not found for that day? # (some quotes aren't in the earliest data) - rate = Decimal(day.find(rate_xpath).attrib["rate"]) - all_rows.insert(0, (date, rate)) + for row in CSSSelector(f"default|Cube[currency='{quote}']", ns)(day): + rate = Decimal(row.attrib["rate"]) + all_rows.insert(0, (date, rate)) selected = [ Price(base, quote, d, r) for d, r in all_rows if d >= start and d <= end ]