HPI/commits/__init__.py
2019-03-12 12:00:14 +00:00

119 lines
3.1 KiB
Python

from datetime import datetime, timezone
from typing import List, NamedTuple, Optional, Dict, Any
from os.path import basename, islink, isdir, join
from os import listdir
import git # type: ignore
# TODO do something smarter... later
# TODO def run against bitbucket and gh backups
SOURCES = [
'***REMOVED***',
'***REMOVED***',
'***REMOVED***',
'***REMOVED***',
'***REMOVED***',
'***REMOVED***',
]
THINGS = [
'***REMOVED***',
'***REMOVED***',
'***REMOVED***',
'***REMOVED***',
]
def by_me(c):
actor = c.author
if actor.email in ('***REMOVED***', '***REMOVED***@gmail.com'):
return True
if actor.name in ('***REMOVED***',):
return True
aa = f"{actor.email} {actor.name}"
for thing in THINGS:
if thing in aa:
print("WARNING!!!", actor, c, c.repo)
return True
return False
class Commit(NamedTuple):
dt: datetime
message: str
repo: str
sha: str
ref: Optional[str]=None
# TODO filter so they are authored by me
# TODO not sure, maybe a better idea to move it to timeline?
def fix_datetime(dt) -> datetime:
# git module got it's own tzinfo object.. and it's pretty weird
tz = dt.tzinfo
assert tz._name == 'fixed'
offset = tz._offset
ntz = timezone(offset)
return dt.replace(tzinfo=ntz)
def iter_commits(repo: str, ref=None):
# TODO other branches?
rr = basename(repo)
gr = git.Repo(repo)
for c in gr.iter_commits(rev=ref):
if by_me(c):
yield Commit(
dt=fix_datetime(c.committed_datetime), # TODO authored??
message=c.message.strip(),
repo=rr,
sha=c.hexsha,
ref=ref,
)
def iter_all_ref_commits(repo):
rr = basename(repo)
gr = git.Repo(repo)
for r in gr.references:
yield from iter_commits(repo=repo, ref=r)
def is_git_repo(d: str):
dotgit = join(d, '.git')
return isdir(dotgit)
from pathlib import Path
from typing import Union
PathIsh = Union[str, Path]
def iter_all_git_repos(dd: PathIsh):
dd = Path(dd)
yield from dd.glob('**/.git')
def iter_multi_commits(sources):
for src in sources:
# TODO warn if doesn't exist?
for d in listdir(src):
pr = join(src, d)
if is_git_repo(pr):
try:
for c in iter_commits(pr):
yield c
except ValueError as ve:
if "Reference at 'refs/heads/master' does not exist" in str(ve):
continue # TODO wtf??? log?
else:
raise ve
# TODO eh. traverse all of filesystem?? or only specific dirs for now?
def iter_all_commits():
return iter_multi_commits(SOURCES)
def get_all_commits():
res: Dict[str, Any] = {}
for c in iter_all_commits():
nn = res.get(c.sha, None)
if nn is None:
res[c.sha] = c
else:
res[c.sha] = min(nn, c, key=lambda c: c.sha)
return list(sorted(res.values(), key=lambda c: c.dt))