164 lines
4 KiB
Python
164 lines
4 KiB
Python
"""
|
|
Github data (uses [[https://github.com/settings/admin][official GDPR export]])
|
|
"""
|
|
|
|
import json
|
|
from typing import Iterable, Dict, Any
|
|
|
|
from ..core.error import Res
|
|
from ..core import get_files
|
|
|
|
from .common import Event, parse_dt
|
|
|
|
# TODO later, use a separate user config? (github_gdpr)
|
|
from my.config import github as user_config
|
|
|
|
from dataclasses import dataclass
|
|
from ..core import PathIsh
|
|
|
|
@dataclass
|
|
class github(user_config):
|
|
gdpr_dir: PathIsh # path to unpacked GDPR archive
|
|
|
|
###
|
|
|
|
|
|
from ..core.cfg import make_config
|
|
config = make_config(github)
|
|
|
|
|
|
def events() -> Iterable[Res[Event]]:
|
|
# TODO FIXME allow using archive here?
|
|
files = get_files(config.gdpr_dir, glob='*.json')
|
|
handler_map = {
|
|
'schema' : None,
|
|
'issue_events_': None, # eh, doesn't seem to have any useful bodies
|
|
'attachments_' : None, # not sure if useful
|
|
'users' : None, # just contains random users
|
|
'repositories_' : _parse_repository,
|
|
'issue_comments_': _parse_issue_comment,
|
|
'issues_' : _parse_issue,
|
|
'pull_requests_' : _parse_pull_request,
|
|
'projects_' : _parse_project,
|
|
'releases_' : _parse_release,
|
|
'commit_comments': _parse_commit_comment,
|
|
}
|
|
for f in files:
|
|
handler: Any
|
|
for prefix, h in handler_map.items():
|
|
if not f.name.startswith(prefix):
|
|
continue
|
|
handler = h
|
|
break
|
|
else:
|
|
yield RuntimeError(f'Unhandled file: {f}')
|
|
continue
|
|
|
|
if handler is None:
|
|
# ignored
|
|
continue
|
|
|
|
j = json.loads(f.read_text())
|
|
for r in j:
|
|
try:
|
|
yield handler(r)
|
|
except Exception as e:
|
|
yield e
|
|
|
|
|
|
def stats():
|
|
from ..core import stat
|
|
return {
|
|
**stat(events),
|
|
}
|
|
|
|
|
|
# TODO typing.TypedDict could be handy here..
|
|
def _parse_common(d: Dict) -> Dict:
|
|
url = d['url']
|
|
body = d.get('body')
|
|
return {
|
|
'dt' : parse_dt(d['created_at']),
|
|
'link': url,
|
|
'body': body,
|
|
}
|
|
|
|
|
|
def _parse_repository(d: Dict) -> Event:
|
|
pref = 'https://github.com/'
|
|
url = d['url']
|
|
assert url.startswith(pref); name = url[len(pref):]
|
|
return Event( # type: ignore[misc]
|
|
**_parse_common(d),
|
|
summary='created ' + name,
|
|
eid='created_' + name, # TODO ??
|
|
)
|
|
|
|
|
|
def _parse_issue_comment(d: Dict) -> Event:
|
|
url = d['url']
|
|
is_bot = "[bot]" in d["user"]
|
|
return Event( # type: ignore[misc]
|
|
**_parse_common(d),
|
|
summary=f'commented on issue {url}',
|
|
eid='issue_comment_' + url,
|
|
is_bot=is_bot,
|
|
)
|
|
|
|
|
|
def _parse_issue(d: Dict) -> Event:
|
|
url = d['url']
|
|
title = d['title']
|
|
is_bot = "[bot]" in d["user"]
|
|
return Event( # type: ignore[misc]
|
|
**_parse_common(d),
|
|
summary=f'opened issue {title}',
|
|
eid='issue_comment_' + url,
|
|
is_bot=is_bot,
|
|
)
|
|
|
|
|
|
def _parse_pull_request(d: Dict) -> Event:
|
|
url = d['url']
|
|
title = d['title']
|
|
is_bot = "[bot]" in d["user"]
|
|
return Event( # type: ignore[misc]
|
|
**_parse_common(d),
|
|
# TODO distinguish incoming/outgoing?
|
|
# TODO action? opened/closed??
|
|
summary=f'opened PR {title}',
|
|
eid='pull_request_' + url,
|
|
is_bot=is_bot,
|
|
)
|
|
|
|
|
|
def _parse_project(d: Dict) -> Event:
|
|
url = d['url']
|
|
title = d['name']
|
|
is_bot = "[bot]" in d["creator"]
|
|
# TODO: use columns somehow?
|
|
# Doesn't fit with Event schema,
|
|
# is a list of each of the boards
|
|
return Event(
|
|
**_parse_common(d),
|
|
summary=f'created project {title}',
|
|
eid='project_' + url,
|
|
is_bot=is_bot,
|
|
)
|
|
|
|
def _parse_release(d: Dict) -> Event:
|
|
tag = d['tag_name']
|
|
return Event( # type: ignore[misc]
|
|
**_parse_common(d),
|
|
summary=f'released {tag}',
|
|
eid='release_' + tag,
|
|
)
|
|
|
|
|
|
def _parse_commit_comment(d: Dict) -> Event:
|
|
url = d['url']
|
|
return Event( # type: ignore[misc]
|
|
**_parse_common(d),
|
|
summary=f'commented on {url}',
|
|
eid='commit_comment_' + url,
|
|
)
|