prettify and simplify my.commits

This commit is contained in:
Dima Gerasimov 2020-03-14 10:11:24 +00:00
parent 59fc098b68
commit d2ef972416

View file

@ -2,28 +2,27 @@
Git commits data: crawls filesystem
"""
from pathlib import Path
from datetime import datetime, timezone
from typing import List, NamedTuple, Optional, Dict, Any, Iterator
from pathlib import Path
from os.path import basename, islink, isdir, join
from os import listdir
from ..common import PathIsh
from ..common import PathIsh, LazyLogger
from mycfg import commits as config
# pip3 install gitpython
import git # type: ignore
from git.repo.fun import is_git_dir # type: ignore
log = LazyLogger('my.commits', level='info')
# TODO do something smarter... later
# TODO def run against bitbucket and gh backups
# TODO github/bitbucket repos?
# TODO FIXME syncthing? or not necessary with coding view??
_things = {
*config.emails,
*config.names,
}
def by_me(c) -> bool:
actor = c.author
if actor.email in config.emails:
@ -62,47 +61,24 @@ def fix_datetime(dt) -> datetime:
return dt.replace(tzinfo=ntz)
def iter_commits(repo: PathIsh, ref=None):
# TODO other branches?
repo = Path(repo)
rr = repo.name
gr = git.Repo(repo)
def _repo_commits_aux(gr: git.Repo, rev: str) -> Iterator[Commit]:
# without path might not handle pull heads properly
for c in gr.iter_commits(rev=None if ref is None else ref.path):
for c in gr.iter_commits(rev=rev):
if by_me(c):
yield Commit(
commited_dt=fix_datetime(c.committed_datetime),
authored_dt=fix_datetime(c.authored_datetime),
message=c.message.strip(),
repo=rr,
repo=gr.git_dir, # TODO chop off .git?
sha=c.hexsha,
ref=ref,
ref=rev,
)
def iter_all_ref_commits(repo: Path):
def repo_commits(repo: PathIsh):
gr = git.Repo(str(repo))
for r in gr.references:
yield from iter_commits(repo=repo, ref=r)
def is_git_repo(d: str):
dotgit = join(d, '.git')
return isdir(dotgit)
from git.repo.fun import is_git_dir # type: ignore
def iter_all_git_repos(dd: PathIsh) -> Iterator[Path]:
# TODO would that cover all repos???
dd = Path(dd)
assert dd.exists()
for xx in dd.glob('**/HEAD'): # ugh
c = xx.parent
if not is_git_dir(c):
continue
if c.name == '.git':
c = c.parent
yield c
yield from _repo_commits_aux(gr=gr, rev=r.path)
def canonical_name(repo: Path) -> str:
@ -111,52 +87,16 @@ def canonical_name(repo: Path) -> str:
return repo.parent.name
else:
return repo.name
# if r.name == 'repository': # 'repository' thing from github..
# rname = r.parent.name
# else:
# rname = r.name
# if 'backups/github' in repo:
# pass # TODO
pass
# TODO not even used??
def _iter_multi_commits(sources):
for src in sources:
# TODO warn if doesn't exist?
for d in listdir(src):
pr = join(src, d)
if is_git_repo(pr):
try:
for c in iter_commits(pr):
yield c
except ValueError as ve:
if "Reference at 'refs/heads/master' does not exist" in str(ve):
continue # TODO wtf??? log?
else:
raise ve
# TODO eh. traverse all of filesystem?? or only specific dirs for now?
def iter_all_commits():
return _iter_multi_commits(config.sources)
def get_all_commits():
res: Dict[str, Any] = {}
for c in iter_all_commits():
nn = res.get(c.sha, None)
if nn is None:
res[c.sha] = c
else:
res[c.sha] = min(nn, c, key=lambda c: c.sha)
return list(sorted(res.values(), key=lambda c: c.dt))
# TODO cachew for all commits?
def repos():
# TODO could reuse in clustergit?..
def repos() -> List[Path]:
from subprocess import check_output
outputs = check_output([
'fdfind',
@ -165,19 +105,27 @@ def repos():
'--full-path',
'--type', 'f',
'/HEAD', # judging by is_git_dir, it should always be here..
*config.roots,
*roots,
]).decode('utf8').splitlines()
candidates = set(Path(o).resolve().absolute().parent for o in outputs)
gits = {c for c in candidates if is_git_dir(c)}
for g in sorted(gits):
print(g)
# print(outputs.decode('utf8').splitlines())
# exclude stuff within .git dirs (can happen for submodules?)
candidates = {c for c in candidates if '.git' not in c.parts[:-1]}
gits = list(sorted(c for c in candidates if is_git_dir(c)))
return gits
# TODO cachew for all commits?
def commits() -> Iterator[Commit]:
for r in repos():
log.info('processing %s', r)
yield from repo_commits(r)
def commits():
repos()
raise RuntimeError()
def print_all():
for c in commits():
print(c)
# TODO enforce read only? although it doesn't touch index