From 4e13779ed5ef21a816613ba2432479087b53a8fb Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Sat, 10 Aug 2024 23:42:32 +0300 Subject: [PATCH] my.topcoder: get rid of kjson in favor of using builtin dict methods --- my/core/compat.py | 6 +++ my/topcoder.py | 104 ++++++++++++++++++++++++++-------------------- 2 files changed, 65 insertions(+), 45 deletions(-) diff --git a/my/core/compat.py b/my/core/compat.py index 9cdea27..e984695 100644 --- a/my/core/compat.py +++ b/my/core/compat.py @@ -115,3 +115,9 @@ def test_fromisoformat() -> None: # assert isoparse('2017-07-18T18:59:38.21731Z') == datetime( # 2017, 7, 18, 18, 59, 38, 217310, timezone.utc, # ) + + +if sys.version_info[:2] >= (3, 10): + from types import NoneType +else: + NoneType = type(None) diff --git a/my/topcoder.py b/my/topcoder.py index 7432379..d9631dc 100644 --- a/my/topcoder.py +++ b/my/topcoder.py @@ -1,77 +1,91 @@ from my.config import topcoder as config # type: ignore[attr-defined] -from datetime import datetime +from dataclasses import dataclass from functools import cached_property import json -from typing import NamedTuple, Iterator +from pathlib import Path +from typing import Iterator, Sequence + +from my.core import get_files, Res, datetime_aware +from my.core.compat import fromisoformat, NoneType -from my.core import get_files, Res, Json -from my.core.konsume import zoom, wrap, ignore +def inputs() -> Sequence[Path]: + return get_files(config.export_path) -def _get_latest() -> Json: - pp = max(get_files(config.export_path)) - return json.loads(pp.read_text()) - - -class Competition(NamedTuple): +@dataclass +class Competition: contest_id: str contest: str percentile: float - dates: str + date_str: str @cached_property def uid(self) -> str: return self.contest_id - def __hash__(self): - return hash(self.contest_id) - @cached_property - def when(self) -> datetime: - return datetime.strptime(self.dates, '%Y-%m-%dT%H:%M:%S.%fZ') + def when(self) -> datetime_aware: + return fromisoformat(self.date_str) @cached_property def summary(self) -> str: return f'participated in {self.contest}: {self.percentile:.0f}' @classmethod - def make(cls, json) -> Iterator[Res['Competition']]: - ignore(json, 'rating', 'placement') - cid = json['challengeId'].zoom().value - cname = json['challengeName'].zoom().value - percentile = json['percentile'].zoom().value - dates = json['date'].zoom().value + def make(cls, j) -> Iterator[Res['Competition']]: + assert isinstance(j.pop('rating'), float) + assert isinstance(j.pop('placement'), int) + + cid = j.pop('challengeId') + cname = j.pop('challengeName') + percentile = j.pop('percentile') + date_str = j.pop('date') + yield cls( contest_id=cid, contest=cname, percentile=percentile, - dates=dates, + date_str=date_str, ) +def _parse_one(p: Path) -> Iterator[Res[Competition]]: + j = json.loads(p.read_text()) + + # this is kind of an experiment to parse it exhaustively, making sure we don't miss any data + assert isinstance(j.pop('version'), str) + assert isinstance(j.pop('id'), str) + [j] = j.values() # zoom in + + assert j.pop('success') is True, j + assert j.pop('status') == 200, j + assert j.pop('metadata') is None, j + [j] = j.values() # zoom in + + # todo hmm, potentially error handling could be nicer since .pop just reports key error + # also by the time error is reported, key is already removed? + for k in ['handle', 'handleLower', 'userId', 'createdAt', 'updatedAt', 'createdBy', 'updatedBy']: + # check it's primitive + assert isinstance(j.pop(k), (str, bool, float, int, NoneType)), k + + j.pop('DEVELOP') # TODO how to handle it? + [j] = j.values() # zoom in, DATA_SCIENCE section + + mm = j.pop('MARATHON_MATCH') + [mm] = mm.values() # zoom into historu + + srm = j.pop('SRM') + [srm] = srm.values() # zoom into history + + assert len(j) == 0, j + + for c in mm + srm: + yield from Competition.make(j=c) + + def data() -> Iterator[Res[Competition]]: - with wrap(_get_latest()) as j: - ignore(j, 'id', 'version') - - res = j['result'].zoom() # type: ignore[index] - ignore(res, 'success', 'status', 'metadata') - - cont = res['content'].zoom() - ignore(cont, 'handle', 'handleLower', 'userId', 'createdAt', 'updatedAt', 'createdBy', 'updatedBy') - - cont['DEVELOP'].ignore() # TODO handle it?? - ds = cont['DATA_SCIENCE'].zoom() - - mar, srm = zoom(ds, 'MARATHON_MATCH', 'SRM') - - mar = mar['history'].zoom() - srm = srm['history'].zoom() - # TODO right, I guess I could rely on pylint for unused variables?? - - for c in mar + srm: - yield from Competition.make(json=c) - c.consume() - + *_, last = inputs() + return _parse_one(last)