Switch from xml.etree to lxml and cssselect.

This commit is contained in:
Chris Berkhout 2021-05-04 16:39:47 +02:00
parent 85205eaba9
commit cce33beead
3 changed files with 23 additions and 12 deletions

14
poetry.lock generated
View file

@ -82,6 +82,14 @@ category = "dev"
optional = false optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
[[package]]
name = "cssselect"
version = "1.1.0"
description = "cssselect parses CSS3 Selectors and translates them to XPath 1.0"
category = "main"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
[[package]] [[package]]
name = "flake8" name = "flake8"
version = "3.9.1" version = "3.9.1"
@ -303,7 +311,7 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
[metadata] [metadata]
lock-version = "1.1" lock-version = "1.1"
python-versions = "^3.9" python-versions = "^3.9"
content-hash = "d43dc5e9f04655ee9faaa2e1fc46c006de7509945c4a0e6c4ea3990081f17d4e" content-hash = "8674071f4ee6a2bd084bc4aa02e2d3abaca72eeba749ee5ac67de9a8001ac8db"
[metadata.files] [metadata.files]
appdirs = [ appdirs = [
@ -337,6 +345,10 @@ colorama = [
{file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"}, {file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"},
{file = "colorama-0.4.4.tar.gz", hash = "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b"}, {file = "colorama-0.4.4.tar.gz", hash = "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b"},
] ]
cssselect = [
{file = "cssselect-1.1.0-py2.py3-none-any.whl", hash = "sha256:f612ee47b749c877ebae5bb77035d8f4202c6ad0f0fc1271b3c18ad6c4468ecf"},
{file = "cssselect-1.1.0.tar.gz", hash = "sha256:f95f8dedd925fd8f54edb3d2dfb44c190d9d18512377d3c1e2388d16126879bc"},
]
flake8 = [ flake8 = [
{file = "flake8-3.9.1-py2.py3-none-any.whl", hash = "sha256:3b9f848952dddccf635be78098ca75010f073bfe14d2c6bda867154bea728d2a"}, {file = "flake8-3.9.1-py2.py3-none-any.whl", hash = "sha256:3b9f848952dddccf635be78098ca75010f073bfe14d2c6bda867154bea728d2a"},
{file = "flake8-3.9.1.tar.gz", hash = "sha256:1aa8990be1e689d96c745c5682b687ea49f2e05a443aff1f8251092b0014e378"}, {file = "flake8-3.9.1.tar.gz", hash = "sha256:1aa8990be1e689d96c745c5682b687ea49f2e05a443aff1f8251092b0014e378"},

View file

@ -9,6 +9,7 @@ license = "MIT"
python = "^3.9" python = "^3.9"
requests = "^2.25.1" requests = "^2.25.1"
lxml = "^4.6.2" lxml = "^4.6.2"
cssselect = "^1.1.0"
[tool.poetry.dev-dependencies] [tool.poetry.dev-dependencies]
pytest = "^6.2.2" pytest = "^6.2.2"

View file

@ -1,6 +1,7 @@
from datetime import datetime, timedelta from datetime import datetime, timedelta
from decimal import Decimal from decimal import Decimal
from xml.etree import ElementTree from lxml import etree
from lxml.cssselect import CSSSelector
import requests import requests
@ -90,20 +91,17 @@ class ECB:
response = requests.get(source_url) response = requests.get(source_url)
data = response.content data = response.content
# TODO consider changing from xml.etree to lxml root = etree.fromstring(data)
root = ElementTree.fromstring(data) ns = {"default": "http://www.ecb.int/vocabulary/2002-08-01/eurofxref"}
namespaces = {
"default": "http://www.ecb.int/vocabulary/2002-08-01/eurofxref",
"gesmes": "http://www.gesmes.org/xml/2002-08-01",
}
all_rows = [] all_rows = []
for day in root.find("default:Cube", namespaces): for day in CSSSelector("default|Cube[time]", ns)(root):
date = day.attrib["time"] date = day.attrib["time"]
rate_xpath = f"./*[@currency='{quote}']"
# TODO what if it's not found for that day? # TODO what if it's not found for that day?
# (some quotes aren't in the earliest data) # (some quotes aren't in the earliest data)
rate = Decimal(day.find(rate_xpath).attrib["rate"]) for row in CSSSelector(f"default|Cube[currency='{quote}']", ns)(day):
all_rows.insert(0, (date, rate)) rate = Decimal(row.attrib["rate"])
all_rows.insert(0, (date, rate))
selected = [ selected = [
Price(base, quote, d, r) for d, r in all_rows if d >= start and d <= end Price(base, quote, d, r) for d, r in all_rows if d >= start and d <= end
] ]