Instead my.github.all should be used (still backward compatible) The reasons are a) I don't feel that grouping (i.e. my.coding.*) makes much sense b) using .all pattern (same way as twitter) allows for more composable and cleaner separation of GDPR and API data
111 lines
3.5 KiB
Python
111 lines
3.5 KiB
Python
from pathlib import Path
|
|
from typing import Tuple, Optional, Iterable, Dict, Sequence
|
|
|
|
from ..core import get_files
|
|
from ..core.common import mcachew
|
|
from ..kython.kompress import CPath
|
|
|
|
from .common import Event, parse_dt, Results
|
|
|
|
from my.config import github as config
|
|
import my.config.repos.ghexport.dal as ghexport
|
|
|
|
|
|
def inputs() -> Sequence[Path]:
|
|
return get_files(config.export_dir)
|
|
|
|
|
|
def _dal():
|
|
sources = inputs()
|
|
sources = list(map(CPath, sources)) # TODO maybe move it to get_files? e.g. compressed=True arg?
|
|
return ghexport.DAL(sources)
|
|
|
|
|
|
# TODO hmm. not good, need to be lazier?...
|
|
@mcachew(config.cache_dir, hashf=lambda dal: dal.sources)
|
|
def events(dal=_dal()) -> Results:
|
|
for d in dal.events():
|
|
yield _parse_event(d)
|
|
|
|
|
|
# TODO hmm. need some sort of abstract syntax for this...
|
|
# TODO split further, title too
|
|
def _get_summary(e) -> Tuple[str, Optional[str], Optional[str]]:
|
|
# TODO would be nice to give access to raw event withing timeline
|
|
eid = e['id']
|
|
tp = e['type']
|
|
pl = e['payload']
|
|
rname = e['repo']['name']
|
|
|
|
mapping = {
|
|
'CreateEvent': 'created',
|
|
'DeleteEvent': 'deleted',
|
|
}
|
|
|
|
if tp == 'ForkEvent':
|
|
url = e['payload']['forkee']['html_url']
|
|
return f"{rname}: forked", url, None
|
|
elif tp == 'PushEvent':
|
|
commits = pl['commits']
|
|
messages = [c['message'] for c in commits]
|
|
body = '\n'.join(messages)
|
|
return f"{rname}: pushed\n{body}", None, None
|
|
elif tp == 'WatchEvent':
|
|
return f"{rname}: watching", None, None
|
|
elif tp in mapping:
|
|
what = mapping[tp]
|
|
rt = pl['ref_type']
|
|
ref = pl['ref']
|
|
# TODO link to branch? only contains weird API link though
|
|
# TODO hmm. include timestamp instead?
|
|
# breakpoint()
|
|
# TODO combine automatically instead
|
|
return f"{rname}: {what} {rt} {ref}", None, f'{rname}_{what}_{rt}_{ref}_{eid}'
|
|
elif tp == 'PullRequestEvent':
|
|
pr = pl['pull_request']
|
|
action = pl['action']
|
|
link = pr['html_url']
|
|
title = pr['title']
|
|
return f"{rname}: {action} PR {title}", link, f'{rname}_{action}_pr_{link}'
|
|
elif tp == "IssuesEvent":
|
|
action = pl['action']
|
|
iss = pl['issue']
|
|
link = iss['html_url']
|
|
title = iss['title']
|
|
return f"{rname}: {action} issue {title}", link, None
|
|
elif tp == "IssueCommentEvent":
|
|
com = pl['comment']
|
|
link = com['html_url']
|
|
iss = pl['issue']
|
|
title = iss['title']
|
|
return f"{rname}: commented on issue {title}", link, f'issue_comment_' + link
|
|
elif tp == "ReleaseEvent":
|
|
action = pl['action']
|
|
rel = pl['release']
|
|
tag = rel['tag_name']
|
|
link = rel['html_url']
|
|
return f"{rname}: {action} [{tag}]", link, None
|
|
elif tp in 'PublicEvent':
|
|
return f'{tp} {e}', None, None # TODO ???
|
|
else:
|
|
return tp, None, None
|
|
|
|
|
|
def _parse_event(d: Dict) -> Event:
|
|
summary, link, eid = _get_summary(d)
|
|
if eid is None:
|
|
eid = d['id']
|
|
body = d.get('payload', {}).get('comment', {}).get('body')
|
|
return Event(
|
|
dt=parse_dt(d['created_at']),
|
|
summary=summary,
|
|
link=link,
|
|
eid=eid,
|
|
body=body,
|
|
)
|
|
|
|
|
|
# TODO mm. ok, not much point in deserializing as github.Event as it's basically a fancy dict wrapper?
|
|
# from github.Event import Event as GEvent # type: ignore
|
|
# # see https://github.com/PyGithub/PyGithub/blob/master/github/GithubObject.py::GithubObject.__init__
|
|
# e = GEvent(None, None, raw_event, True)
|