Switch from xml.etree to lxml and cssselect.

This commit is contained in:
Chris Berkhout 2021-05-04 16:39:47 +02:00
parent 85205eaba9
commit cce33beead
3 changed files with 23 additions and 12 deletions

14
poetry.lock generated
View file

@ -82,6 +82,14 @@ category = "dev"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
[[package]]
name = "cssselect"
version = "1.1.0"
description = "cssselect parses CSS3 Selectors and translates them to XPath 1.0"
category = "main"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
[[package]]
name = "flake8"
version = "3.9.1"
@ -303,7 +311,7 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
[metadata]
lock-version = "1.1"
python-versions = "^3.9"
content-hash = "d43dc5e9f04655ee9faaa2e1fc46c006de7509945c4a0e6c4ea3990081f17d4e"
content-hash = "8674071f4ee6a2bd084bc4aa02e2d3abaca72eeba749ee5ac67de9a8001ac8db"
[metadata.files]
appdirs = [
@ -337,6 +345,10 @@ colorama = [
{file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"},
{file = "colorama-0.4.4.tar.gz", hash = "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b"},
]
cssselect = [
{file = "cssselect-1.1.0-py2.py3-none-any.whl", hash = "sha256:f612ee47b749c877ebae5bb77035d8f4202c6ad0f0fc1271b3c18ad6c4468ecf"},
{file = "cssselect-1.1.0.tar.gz", hash = "sha256:f95f8dedd925fd8f54edb3d2dfb44c190d9d18512377d3c1e2388d16126879bc"},
]
flake8 = [
{file = "flake8-3.9.1-py2.py3-none-any.whl", hash = "sha256:3b9f848952dddccf635be78098ca75010f073bfe14d2c6bda867154bea728d2a"},
{file = "flake8-3.9.1.tar.gz", hash = "sha256:1aa8990be1e689d96c745c5682b687ea49f2e05a443aff1f8251092b0014e378"},

View file

@ -9,6 +9,7 @@ license = "MIT"
python = "^3.9"
requests = "^2.25.1"
lxml = "^4.6.2"
cssselect = "^1.1.0"
[tool.poetry.dev-dependencies]
pytest = "^6.2.2"

View file

@ -1,6 +1,7 @@
from datetime import datetime, timedelta
from decimal import Decimal
from xml.etree import ElementTree
from lxml import etree
from lxml.cssselect import CSSSelector
import requests
@ -90,19 +91,16 @@ class ECB:
response = requests.get(source_url)
data = response.content
# TODO consider changing from xml.etree to lxml
root = ElementTree.fromstring(data)
namespaces = {
"default": "http://www.ecb.int/vocabulary/2002-08-01/eurofxref",
"gesmes": "http://www.gesmes.org/xml/2002-08-01",
}
root = etree.fromstring(data)
ns = {"default": "http://www.ecb.int/vocabulary/2002-08-01/eurofxref"}
all_rows = []
for day in root.find("default:Cube", namespaces):
for day in CSSSelector("default|Cube[time]", ns)(root):
date = day.attrib["time"]
rate_xpath = f"./*[@currency='{quote}']"
# TODO what if it's not found for that day?
# (some quotes aren't in the earliest data)
rate = Decimal(day.find(rate_xpath).attrib["rate"])
for row in CSSSelector(f"default|Cube[currency='{quote}']", ns)(day):
rate = Decimal(row.attrib["rate"])
all_rows.insert(0, (date, rate))
selected = [
Price(base, quote, d, r) for d, r in all_rows if d >= start and d <= end