my.topcoder: get rid of kjson in favor of using builtin dict methods

2024-08-10 23:42:32 +03:00 · 2024-08-10 23:42:32 +03:00 · 1e1e8d8494
commit 1e1e8d8494
parent 069264ce52
2 changed files with 65 additions and 45 deletions
--- a/my/core/compat.py
+++ b/my/core/compat.py
@ -115,3 +115,9 @@ def test_fromisoformat() -> None:
    # assert isoparse('2017-07-18T18:59:38.21731Z') == datetime(
    #     2017, 7, 18, 18, 59, 38, 217310, timezone.utc,
    # )
+
+
+if sys.version_info[:2] >= (3, 10):
+    from types import NoneType
+else:
+    NoneType = type(None)
--- a/my/topcoder.py
+++ b/my/topcoder.py
@ -1,77 +1,91 @@
 from my.config import topcoder as config  # type: ignore[attr-defined]


-from datetime import datetime
+from dataclasses import dataclass
 from functools import cached_property
 import json
-from typing import NamedTuple, Iterator
+from pathlib import Path
+from typing import Iterator, Sequence
+
+from my.core import get_files, Res, datetime_aware
+from my.core.compat import fromisoformat, NoneType


-from my.core import get_files, Res, Json
-from my.core.konsume import zoom, wrap, ignore
+def inputs() -> Sequence[Path]:
+    return get_files(config.export_path)


-def _get_latest() -> Json:
-    pp = max(get_files(config.export_path))
-    return json.loads(pp.read_text())
-
-
-class Competition(NamedTuple):
+@dataclass
+class Competition:
    contest_id: str
    contest: str
    percentile: float
-    dates: str
+    date_str: str

    @cached_property
    def uid(self) -> str:
        return self.contest_id

-    def __hash__(self):
-        return hash(self.contest_id)
-
    @cached_property
-    def when(self) -> datetime:
-        return datetime.strptime(self.dates, '%Y-%m-%dT%H:%M:%S.%fZ')
+    def when(self) -> datetime_aware:
+        return fromisoformat(self.date_str)

    @cached_property
    def summary(self) -> str:
        return f'participated in {self.contest}: {self.percentile:.0f}'

    @classmethod
-    def make(cls, json) -> Iterator[Res['Competition']]:
-        ignore(json, 'rating', 'placement')
-        cid = json['challengeId'].zoom().value
-        cname = json['challengeName'].zoom().value
-        percentile = json['percentile'].zoom().value
-        dates = json['date'].zoom().value
+    def make(cls, j) -> Iterator[Res['Competition']]:
+        assert isinstance(j.pop('rating'), float)
+        assert isinstance(j.pop('placement'), int)
+
+        cid = j.pop('challengeId')
+        cname = j.pop('challengeName')
+        percentile = j.pop('percentile')
+        date_str = j.pop('date')
+
        yield cls(
            contest_id=cid,
            contest=cname,
            percentile=percentile,
-            dates=dates,
+            date_str=date_str,
        )


+def _parse_one(p: Path) -> Iterator[Res[Competition]]:
+    j = json.loads(p.read_text())
+
+    # this is kind of an experiment to parse it exhaustively, making sure we don't miss any data
+    assert isinstance(j.pop('version'), str)
+    assert isinstance(j.pop('id'), str)
+    [j] = j.values()  # zoom in
+
+    assert j.pop('success') is True, j
+    assert j.pop('status') == 200, j
+    assert j.pop('metadata') is None, j
+    [j] = j.values()  # zoom in
+
+    # todo hmm, potentially error handling could be nicer since .pop just reports key error
+    # also by the time error is reported, key is already removed?
+    for k in ['handle', 'handleLower', 'userId', 'createdAt', 'updatedAt', 'createdBy', 'updatedBy']:
+        # check it's primitive
+        assert isinstance(j.pop(k), (str, bool, float, int, NoneType)), k
+
+    j.pop('DEVELOP')  # TODO how to handle it?
+    [j] = j.values()  # zoom in, DATA_SCIENCE section
+
+    mm = j.pop('MARATHON_MATCH')
+    [mm] = mm.values()  # zoom into historu
+
+    srm = j.pop('SRM')
+    [srm] = srm.values()  # zoom into history
+
+    assert len(j) == 0, j
+
+    for c in mm + srm:
+        yield from Competition.make(j=c)
+
+
 def data() -> Iterator[Res[Competition]]:
-    with wrap(_get_latest()) as j:
-        ignore(j, 'id', 'version')
-
-        res = j['result'].zoom()  # type: ignore[index]
-        ignore(res, 'success', 'status', 'metadata')
-
-        cont = res['content'].zoom()
-        ignore(cont, 'handle', 'handleLower', 'userId', 'createdAt', 'updatedAt', 'createdBy', 'updatedBy')
-
-        cont['DEVELOP'].ignore() # TODO handle it??
-        ds = cont['DATA_SCIENCE'].zoom()
-
-        mar, srm = zoom(ds, 'MARATHON_MATCH', 'SRM')
-
-        mar = mar['history'].zoom()
-        srm = srm['history'].zoom()
-    # TODO right, I guess I could rely on pylint for unused variables??
-
-        for c in mar + srm:
-            yield from Competition.make(json=c)
-            c.consume()
-
+    *_, last = inputs()
+    return _parse_one(last)