HPI/my/coding/github.py
2019-12-06 22:41:56 +00:00

229 lines
5.8 KiB
Python

from typing import Dict, List, Union, Any, NamedTuple, Tuple, Optional, Iterator, TypeVar
from datetime import datetime
import json
from pathlib import Path
import logging
import pytz
from ..common import get_files
from my_configuration import paths
import my_configuration.repos.ghexport.model as ghexport
def get_logger():
return logging.getLogger('my.github') # TODO __package__???
class Event(NamedTuple):
dt: datetime
summary: str
eid: str
link: Optional[str]
body: Optional[str]=None
T = TypeVar('T')
Res = Union[T, Exception]
# TODO split further, title too
def _get_summary(e) -> Tuple[str, Optional[str]]:
tp = e['type']
pl = e['payload']
rname = e['repo']['name']
if tp == 'ForkEvent':
url = e['payload']['forkee']['html_url']
return f"forked {rname}", url
elif tp == 'PushEvent':
return f"pushed to {rname}", None
elif tp == 'WatchEvent':
return f"watching {rname}", None
elif tp == 'CreateEvent':
return f"created {rname}", None
elif tp == 'PullRequestEvent':
pr = pl['pull_request']
action = pl['action']
link = pr['html_url']
title = pr['title']
return f"{action} PR {title}", link
elif tp == "IssuesEvent":
action = pl['action']
iss = pl['issue']
link = iss['html_url']
title = iss['title']
return f"{action} issue {title}", link
elif tp == "IssueCommentEvent":
com = pl['comment']
link = com['html_url']
iss = pl['issue']
title = iss['title']
return f"commented on issue {title}", link
elif tp == "ReleaseEvent":
action = pl['action']
rel = pl['release']
tag = rel['tag_name']
link = rel['html_url']
return f"{action} {rname} [{tag}]", link
elif tp in (
"DeleteEvent",
"PublicEvent",
):
return tp, None # TODO ???
else:
return tp, None
def get_model():
sources = get_files(paths.github.export_dir, glob='*.json')
model = ghexport.Model(sources)
return model
def _parse_dt(s: str) -> datetime:
# TODO isoformat?
return pytz.utc.localize(datetime.strptime(s, '%Y-%m-%dT%H:%M:%SZ'))
# TODO extract to separate gdpr module?
# TODO typing.TypedDict could be handy here..
def _parse_common(d: Dict) -> Dict:
url = d['url']
body = d.get('body')
return {
'dt' : _parse_dt(d['created_at']),
'link': url,
'body': body,
}
def _parse_repository(d: Dict) -> Event:
name = d['name']
return Event(
**_parse_common(d),
summary='created ' + name,
eid='created_' + name, # TODO ??
)
def _parse_issue_comment(d: Dict) -> Event:
url = d['url']
return Event(
**_parse_common(d),
summary=f'commented on issue {url}',
eid='issue_comment_' + url,
)
def _parse_issue(d: Dict) -> Event:
url = d['url']
title = d['title']
return Event(
**_parse_common(d),
summary=f'opened issue {title}',
eid='issue_comment_' + url,
)
def _parse_pull_request(d: Dict) -> Event:
url = d['url']
title = d['title']
return Event(
**_parse_common(d),
# TODO distinguish incoming/outgoing?
# TODO action? opened/closed??
summary=f'PR {title}',
eid='pull_request_' + url,
)
def _parse_release(d: Dict) -> Event:
tag = d['tag_name']
return Event(
**_parse_common(d),
summary=f'released {tag}',
eid='release_' + tag,
)
def _parse_commit_comment(d: Dict) -> Event:
url = d['url']
return Event(
**_parse_common(d),
summary=f'commented on {url}',
eid='commoit_comment_' + url,
)
def _parse_event(d: Dict) -> Event:
summary, link = _get_summary(d)
body = d.get('payload', {}).get('comment', {}).get('body')
return Event(
dt=_parse_dt(d['created_at']),
summary=summary,
link=link,
eid=d['id'],
body=body,
)
def iter_gdpr_events() -> Iterator[Res[Event]]:
"""
Parses events from GDPR export (https://github.com/settings/admin)
"""
files = list(sorted(paths.github.gdpr_dir.glob('*.json')))
handler_map = {
'schema' : None,
'issue_events_': None, # eh, doesn't seem to have any useful bodies
'attachments_' : None, # not sure if useful
'users' : None, # just contains random users
'repositories_' : _parse_repository,
'issue_comments_': _parse_issue_comment,
'issues_' : _parse_issue,
'pull_requests_' : _parse_pull_request,
'releases_' : _parse_release,
'commit_comments': _parse_commit_comment,
}
for f in files:
handler: Any
for prefix, h in handler_map.items():
if not f.name.startswith(prefix):
continue
handler = h
break
else:
yield RuntimeError(f'Unhandled file: {f}')
continue
if handler is None:
# ignored
continue
j = json.loads(f.read_text())
for r in j:
try:
yield handler(r)
except Exception as e:
yield e
def iter_events():
model = get_model()
for d in model.events():
yield _parse_event(d)
# TODO load events from GDPR export?
def get_events():
return sorted(iter_events(), key=lambda e: e.dt)
# TODO mm. ok, not much point in deserializing as github.Event as it's basically a fancy dict wrapper?
# from github.Event import Event as GEvent # type: ignore
# # see https://github.com/PyGithub/PyGithub/blob/master/github/GithubObject.py::GithubObject.__init__
# e = GEvent(None, None, raw_event, True)
def test():
events = get_events()
assert len(events) > 100
for e in events:
print(e)