my.topcoder: get rid of kjson in favor of using builtin dict methods

This commit is contained in:
Dima Gerasimov 2024-08-10 23:42:32 +03:00 committed by karlicoss
parent 069264ce52
commit 1e1e8d8494
2 changed files with 65 additions and 45 deletions

View file

@ -115,3 +115,9 @@ def test_fromisoformat() -> None:
# assert isoparse('2017-07-18T18:59:38.21731Z') == datetime(
# 2017, 7, 18, 18, 59, 38, 217310, timezone.utc,
# )
if sys.version_info[:2] >= (3, 10):
from types import NoneType
else:
NoneType = type(None)

View file

@ -1,77 +1,91 @@
from my.config import topcoder as config # type: ignore[attr-defined]
from datetime import datetime
from dataclasses import dataclass
from functools import cached_property
import json
from typing import NamedTuple, Iterator
from pathlib import Path
from typing import Iterator, Sequence
from my.core import get_files, Res, datetime_aware
from my.core.compat import fromisoformat, NoneType
from my.core import get_files, Res, Json
from my.core.konsume import zoom, wrap, ignore
def inputs() -> Sequence[Path]:
return get_files(config.export_path)
def _get_latest() -> Json:
pp = max(get_files(config.export_path))
return json.loads(pp.read_text())
class Competition(NamedTuple):
@dataclass
class Competition:
contest_id: str
contest: str
percentile: float
dates: str
date_str: str
@cached_property
def uid(self) -> str:
return self.contest_id
def __hash__(self):
return hash(self.contest_id)
@cached_property
def when(self) -> datetime:
return datetime.strptime(self.dates, '%Y-%m-%dT%H:%M:%S.%fZ')
def when(self) -> datetime_aware:
return fromisoformat(self.date_str)
@cached_property
def summary(self) -> str:
return f'participated in {self.contest}: {self.percentile:.0f}'
@classmethod
def make(cls, json) -> Iterator[Res['Competition']]:
ignore(json, 'rating', 'placement')
cid = json['challengeId'].zoom().value
cname = json['challengeName'].zoom().value
percentile = json['percentile'].zoom().value
dates = json['date'].zoom().value
def make(cls, j) -> Iterator[Res['Competition']]:
assert isinstance(j.pop('rating'), float)
assert isinstance(j.pop('placement'), int)
cid = j.pop('challengeId')
cname = j.pop('challengeName')
percentile = j.pop('percentile')
date_str = j.pop('date')
yield cls(
contest_id=cid,
contest=cname,
percentile=percentile,
dates=dates,
date_str=date_str,
)
def _parse_one(p: Path) -> Iterator[Res[Competition]]:
j = json.loads(p.read_text())
# this is kind of an experiment to parse it exhaustively, making sure we don't miss any data
assert isinstance(j.pop('version'), str)
assert isinstance(j.pop('id'), str)
[j] = j.values() # zoom in
assert j.pop('success') is True, j
assert j.pop('status') == 200, j
assert j.pop('metadata') is None, j
[j] = j.values() # zoom in
# todo hmm, potentially error handling could be nicer since .pop just reports key error
# also by the time error is reported, key is already removed?
for k in ['handle', 'handleLower', 'userId', 'createdAt', 'updatedAt', 'createdBy', 'updatedBy']:
# check it's primitive
assert isinstance(j.pop(k), (str, bool, float, int, NoneType)), k
j.pop('DEVELOP') # TODO how to handle it?
[j] = j.values() # zoom in, DATA_SCIENCE section
mm = j.pop('MARATHON_MATCH')
[mm] = mm.values() # zoom into historu
srm = j.pop('SRM')
[srm] = srm.values() # zoom into history
assert len(j) == 0, j
for c in mm + srm:
yield from Competition.make(j=c)
def data() -> Iterator[Res[Competition]]:
with wrap(_get_latest()) as j:
ignore(j, 'id', 'version')
res = j['result'].zoom() # type: ignore[index]
ignore(res, 'success', 'status', 'metadata')
cont = res['content'].zoom()
ignore(cont, 'handle', 'handleLower', 'userId', 'createdAt', 'updatedAt', 'createdBy', 'updatedBy')
cont['DEVELOP'].ignore() # TODO handle it??
ds = cont['DATA_SCIENCE'].zoom()
mar, srm = zoom(ds, 'MARATHON_MATCH', 'SRM')
mar = mar['history'].zoom()
srm = srm['history'].zoom()
# TODO right, I guess I could rely on pylint for unused variables??
for c in mar + srm:
yield from Competition.make(json=c)
c.consume()
*_, last = inputs()
return _parse_one(last)