prettify and simplify my.commits
This commit is contained in:
parent
59fc098b68
commit
d2ef972416
1 changed files with 31 additions and 83 deletions
|
@ -2,28 +2,27 @@
|
||||||
Git commits data: crawls filesystem
|
Git commits data: crawls filesystem
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from typing import List, NamedTuple, Optional, Dict, Any, Iterator
|
from typing import List, NamedTuple, Optional, Dict, Any, Iterator
|
||||||
from pathlib import Path
|
|
||||||
from os.path import basename, islink, isdir, join
|
|
||||||
from os import listdir
|
|
||||||
|
|
||||||
from ..common import PathIsh
|
from ..common import PathIsh, LazyLogger
|
||||||
from mycfg import commits as config
|
from mycfg import commits as config
|
||||||
|
|
||||||
# pip3 install gitpython
|
# pip3 install gitpython
|
||||||
import git # type: ignore
|
import git # type: ignore
|
||||||
|
from git.repo.fun import is_git_dir # type: ignore
|
||||||
|
|
||||||
|
|
||||||
|
log = LazyLogger('my.commits', level='info')
|
||||||
|
|
||||||
# TODO do something smarter... later
|
|
||||||
# TODO def run against bitbucket and gh backups
|
|
||||||
# TODO github/bitbucket repos?
|
|
||||||
# TODO FIXME syncthing? or not necessary with coding view??
|
|
||||||
|
|
||||||
_things = {
|
_things = {
|
||||||
*config.emails,
|
*config.emails,
|
||||||
*config.names,
|
*config.names,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def by_me(c) -> bool:
|
def by_me(c) -> bool:
|
||||||
actor = c.author
|
actor = c.author
|
||||||
if actor.email in config.emails:
|
if actor.email in config.emails:
|
||||||
|
@ -62,47 +61,24 @@ def fix_datetime(dt) -> datetime:
|
||||||
return dt.replace(tzinfo=ntz)
|
return dt.replace(tzinfo=ntz)
|
||||||
|
|
||||||
|
|
||||||
def iter_commits(repo: PathIsh, ref=None):
|
def _repo_commits_aux(gr: git.Repo, rev: str) -> Iterator[Commit]:
|
||||||
# TODO other branches?
|
|
||||||
repo = Path(repo)
|
|
||||||
rr = repo.name
|
|
||||||
gr = git.Repo(repo)
|
|
||||||
# without path might not handle pull heads properly
|
# without path might not handle pull heads properly
|
||||||
for c in gr.iter_commits(rev=None if ref is None else ref.path):
|
for c in gr.iter_commits(rev=rev):
|
||||||
if by_me(c):
|
if by_me(c):
|
||||||
yield Commit(
|
yield Commit(
|
||||||
commited_dt=fix_datetime(c.committed_datetime),
|
commited_dt=fix_datetime(c.committed_datetime),
|
||||||
authored_dt=fix_datetime(c.authored_datetime),
|
authored_dt=fix_datetime(c.authored_datetime),
|
||||||
message=c.message.strip(),
|
message=c.message.strip(),
|
||||||
repo=rr,
|
repo=gr.git_dir, # TODO chop off .git?
|
||||||
sha=c.hexsha,
|
sha=c.hexsha,
|
||||||
ref=ref,
|
ref=rev,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def iter_all_ref_commits(repo: Path):
|
def repo_commits(repo: PathIsh):
|
||||||
gr = git.Repo(str(repo))
|
gr = git.Repo(str(repo))
|
||||||
for r in gr.references:
|
for r in gr.references:
|
||||||
yield from iter_commits(repo=repo, ref=r)
|
yield from _repo_commits_aux(gr=gr, rev=r.path)
|
||||||
|
|
||||||
|
|
||||||
def is_git_repo(d: str):
|
|
||||||
dotgit = join(d, '.git')
|
|
||||||
return isdir(dotgit)
|
|
||||||
|
|
||||||
from git.repo.fun import is_git_dir # type: ignore
|
|
||||||
|
|
||||||
def iter_all_git_repos(dd: PathIsh) -> Iterator[Path]:
|
|
||||||
# TODO would that cover all repos???
|
|
||||||
dd = Path(dd)
|
|
||||||
assert dd.exists()
|
|
||||||
for xx in dd.glob('**/HEAD'): # ugh
|
|
||||||
c = xx.parent
|
|
||||||
if not is_git_dir(c):
|
|
||||||
continue
|
|
||||||
if c.name == '.git':
|
|
||||||
c = c.parent
|
|
||||||
yield c
|
|
||||||
|
|
||||||
|
|
||||||
def canonical_name(repo: Path) -> str:
|
def canonical_name(repo: Path) -> str:
|
||||||
|
@ -111,52 +87,16 @@ def canonical_name(repo: Path) -> str:
|
||||||
return repo.parent.name
|
return repo.parent.name
|
||||||
else:
|
else:
|
||||||
return repo.name
|
return repo.name
|
||||||
|
|
||||||
# if r.name == 'repository': # 'repository' thing from github..
|
# if r.name == 'repository': # 'repository' thing from github..
|
||||||
# rname = r.parent.name
|
# rname = r.parent.name
|
||||||
# else:
|
# else:
|
||||||
# rname = r.name
|
# rname = r.name
|
||||||
# if 'backups/github' in repo:
|
# if 'backups/github' in repo:
|
||||||
# pass # TODO
|
# pass # TODO
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
# TODO not even used??
|
# TODO could reuse in clustergit?..
|
||||||
def _iter_multi_commits(sources):
|
def repos() -> List[Path]:
|
||||||
for src in sources:
|
|
||||||
# TODO warn if doesn't exist?
|
|
||||||
for d in listdir(src):
|
|
||||||
pr = join(src, d)
|
|
||||||
if is_git_repo(pr):
|
|
||||||
try:
|
|
||||||
for c in iter_commits(pr):
|
|
||||||
yield c
|
|
||||||
except ValueError as ve:
|
|
||||||
if "Reference at 'refs/heads/master' does not exist" in str(ve):
|
|
||||||
continue # TODO wtf??? log?
|
|
||||||
else:
|
|
||||||
raise ve
|
|
||||||
|
|
||||||
|
|
||||||
# TODO eh. traverse all of filesystem?? or only specific dirs for now?
|
|
||||||
def iter_all_commits():
|
|
||||||
return _iter_multi_commits(config.sources)
|
|
||||||
|
|
||||||
|
|
||||||
def get_all_commits():
|
|
||||||
res: Dict[str, Any] = {}
|
|
||||||
for c in iter_all_commits():
|
|
||||||
nn = res.get(c.sha, None)
|
|
||||||
if nn is None:
|
|
||||||
res[c.sha] = c
|
|
||||||
else:
|
|
||||||
res[c.sha] = min(nn, c, key=lambda c: c.sha)
|
|
||||||
|
|
||||||
return list(sorted(res.values(), key=lambda c: c.dt))
|
|
||||||
|
|
||||||
# TODO cachew for all commits?
|
|
||||||
|
|
||||||
def repos():
|
|
||||||
from subprocess import check_output
|
from subprocess import check_output
|
||||||
outputs = check_output([
|
outputs = check_output([
|
||||||
'fdfind',
|
'fdfind',
|
||||||
|
@ -165,19 +105,27 @@ def repos():
|
||||||
'--full-path',
|
'--full-path',
|
||||||
'--type', 'f',
|
'--type', 'f',
|
||||||
'/HEAD', # judging by is_git_dir, it should always be here..
|
'/HEAD', # judging by is_git_dir, it should always be here..
|
||||||
*config.roots,
|
*roots,
|
||||||
]).decode('utf8').splitlines()
|
]).decode('utf8').splitlines()
|
||||||
candidates = set(Path(o).resolve().absolute().parent for o in outputs)
|
candidates = set(Path(o).resolve().absolute().parent for o in outputs)
|
||||||
gits = {c for c in candidates if is_git_dir(c)}
|
|
||||||
for g in sorted(gits):
|
|
||||||
print(g)
|
|
||||||
|
|
||||||
# print(outputs.decode('utf8').splitlines())
|
# exclude stuff within .git dirs (can happen for submodules?)
|
||||||
|
candidates = {c for c in candidates if '.git' not in c.parts[:-1]}
|
||||||
|
|
||||||
|
gits = list(sorted(c for c in candidates if is_git_dir(c)))
|
||||||
|
return gits
|
||||||
|
|
||||||
|
|
||||||
|
# TODO cachew for all commits?
|
||||||
|
def commits() -> Iterator[Commit]:
|
||||||
|
for r in repos():
|
||||||
|
log.info('processing %s', r)
|
||||||
|
yield from repo_commits(r)
|
||||||
|
|
||||||
def commits():
|
|
||||||
repos()
|
|
||||||
raise RuntimeError()
|
|
||||||
|
|
||||||
def print_all():
|
def print_all():
|
||||||
for c in commits():
|
for c in commits():
|
||||||
print(c)
|
print(c)
|
||||||
|
|
||||||
|
|
||||||
|
# TODO enforce read only? although it doesn't touch index
|
||||||
|
|
Loading…
Add table
Reference in a new issue