my.github: some work in progress on generating consistent ids

sadly it seems that there are at several issues:

- gdpr has less detailed data so it's hard to generate a proper ID at times
- sometimes there is a small (1s?) discrepancy between created_at between same event in GDPR an API
- some API events can have duplicate payload, but different id, which violates uniqueness
This commit is contained in:
Dima Gerasimov 2021-04-02 19:42:12 +01:00
parent 3c7af7d649
commit 6a0b93f3b2
3 changed files with 38 additions and 10 deletions

View file

@ -8,7 +8,7 @@ from typing import Iterable, Dict, Any
from ..core.error import Res
from ..core import get_files
from .common import Event, parse_dt
from .common import Event, parse_dt, EventIds
# TODO later, use a separate user config? (github_gdpr)
from my.config import github as user_config
@ -87,11 +87,14 @@ def _parse_common(d: Dict) -> Dict:
def _parse_repository(d: Dict) -> Event:
pref = 'https://github.com/'
url = d['url']
dts = d['created_at']
rt = d['type']
assert url.startswith(pref); name = url[len(pref):]
eid = EventIds.repo_created(dts=dts, name=name, ref_type=rt, ref=None)
return Event( # type: ignore[misc]
**_parse_common(d),
summary='created ' + name,
eid='created_' + name, # TODO ??
eid=eid,
)
@ -119,6 +122,7 @@ def _parse_issue(d: Dict) -> Event:
def _parse_pull_request(d: Dict) -> Event:
dts = d['created_at']
url = d['url']
title = d['title']
is_bot = "[bot]" in d["user"]
@ -127,7 +131,7 @@ def _parse_pull_request(d: Dict) -> Event:
# TODO distinguish incoming/outgoing?
# TODO action? opened/closed??
summary=f'opened PR {title}',
eid='pull_request_' + url,
eid=EventIds.pr(dts=dts, action='opened', url=url),
is_bot=is_bot,
)
@ -146,6 +150,7 @@ def _parse_project(d: Dict) -> Event:
is_bot=is_bot,
)
def _parse_release(d: Dict) -> Event:
tag = d['tag_name']
return Event( # type: ignore[misc]