cleanup coding.commits (#132)
* cleanup coding.commits remove the _things check, its never activated for me and seems pointless update mechanism for finding fdfind/fd path, send a core.warning if it fails update mechanism to cache repos to new cachew api (remove hashf), cache repos on a per-repo basis
This commit is contained in:
parent
44b893a025
commit
4db81ca362
1 changed files with 54 additions and 42 deletions
|
@ -2,11 +2,17 @@
|
||||||
Git commits data for repositories on your filesystem
|
Git commits data for repositories on your filesystem
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import shutil
|
||||||
|
import string
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from typing import List, NamedTuple, Optional, Dict, Any, Iterator, Set
|
from typing import List, NamedTuple, Optional, Dict, Any, Iterator, Set
|
||||||
|
|
||||||
from ..common import PathIsh, LazyLogger, mcachew
|
from ..core.common import PathIsh, LazyLogger, mcachew, Stats
|
||||||
|
from ..core.cachew import cache_dir
|
||||||
|
from ..core.warnings import high
|
||||||
|
|
||||||
|
# TODO: create user_config dataclass?
|
||||||
from my.config import commits as config
|
from my.config import commits as config
|
||||||
|
|
||||||
# pip3 install gitpython
|
# pip3 install gitpython
|
||||||
|
@ -17,23 +23,12 @@ from git.repo.fun import is_git_dir, find_worktree_git_dir # type: ignore
|
||||||
log = LazyLogger('my.commits', level='info')
|
log = LazyLogger('my.commits', level='info')
|
||||||
|
|
||||||
|
|
||||||
_things = {
|
|
||||||
*config.emails,
|
|
||||||
*config.names,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def by_me(c) -> bool:
|
def by_me(c) -> bool:
|
||||||
actor = c.author
|
actor = c.author
|
||||||
if actor.email in config.emails:
|
if actor.email in config.emails:
|
||||||
return True
|
return True
|
||||||
if actor.name in config.names:
|
if actor.name in config.names:
|
||||||
return True
|
return True
|
||||||
aa = f"{actor.email} {actor.name}"
|
|
||||||
for thing in _things:
|
|
||||||
if thing in aa:
|
|
||||||
# TODO this is probably useless
|
|
||||||
raise RuntimeError("WARNING!!!", actor, c, c.repo)
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
@ -112,11 +107,19 @@ def canonical_name(repo: Path) -> str:
|
||||||
# pass # TODO
|
# pass # TODO
|
||||||
|
|
||||||
|
|
||||||
|
def _fd_path() -> str:
|
||||||
|
fd_path: Optional[str] = shutil.which("fdfind") or shutil.which("fd-find") or shutil.which("fd")
|
||||||
|
if fd_path is None:
|
||||||
|
high(f"my.coding.commits requires 'fd' to be installed, See https://github.com/sharkdp/fd#installation")
|
||||||
|
# TODO: this just causes it to fail if 'fd' can't be found, but the warning is still sent... seems fine?
|
||||||
|
return fd_path or "fd"
|
||||||
|
|
||||||
|
|
||||||
# TODO could reuse in clustergit?..
|
# TODO could reuse in clustergit?..
|
||||||
def git_repos_in(roots: List[Path]) -> List[Path]:
|
def git_repos_in(roots: List[Path]) -> List[Path]:
|
||||||
from subprocess import check_output
|
from subprocess import check_output
|
||||||
outputs = check_output([
|
outputs = check_output([
|
||||||
'fdfind',
|
_fd_path(),
|
||||||
# '--follow', # right, not so sure about follow... make configurable?
|
# '--follow', # right, not so sure about follow... make configurable?
|
||||||
'--hidden',
|
'--hidden',
|
||||||
'--full-path',
|
'--full-path',
|
||||||
|
@ -124,6 +127,7 @@ def git_repos_in(roots: List[Path]) -> List[Path]:
|
||||||
'/HEAD', # judging by is_git_dir, it should always be here..
|
'/HEAD', # judging by is_git_dir, it should always be here..
|
||||||
*roots,
|
*roots,
|
||||||
]).decode('utf8').splitlines()
|
]).decode('utf8').splitlines()
|
||||||
|
|
||||||
candidates = set(Path(o).resolve().absolute().parent for o in outputs)
|
candidates = set(Path(o).resolve().absolute().parent for o in outputs)
|
||||||
|
|
||||||
# exclude stuff within .git dirs (can happen for submodules?)
|
# exclude stuff within .git dirs (can happen for submodules?)
|
||||||
|
@ -139,35 +143,43 @@ def repos():
|
||||||
return git_repos_in(config.roots)
|
return git_repos_in(config.roots)
|
||||||
|
|
||||||
|
|
||||||
def _hashf(_repos: List[Path]):
|
# returns modification time for an index to use as hash function
|
||||||
# TODO maybe use smth from git library? ugh..
|
def _repo_depends_on(_repo: Path) -> int:
|
||||||
res = []
|
|
||||||
for r in _repos:
|
|
||||||
# TODO just use anything except index? ugh.
|
|
||||||
for pp in {
|
for pp in {
|
||||||
'.git/FETCH_HEAD',
|
".git/FETCH_HEAD",
|
||||||
'.git/HEAD',
|
".git/HEAD",
|
||||||
'FETCH_HEAD', # bare
|
"FETCH_HEAD", # bare
|
||||||
'HEAD', # bare
|
"HEAD", # bare
|
||||||
}:
|
}:
|
||||||
ff = r / pp
|
ff = _repo / pp
|
||||||
if ff.exists():
|
if ff.exists():
|
||||||
updated = ff.stat().st_mtime
|
return int(ff.stat().st_mtime)
|
||||||
break
|
|
||||||
else:
|
else:
|
||||||
raise RuntimeError(r)
|
raise RuntimeError(f"Could not find a FETCH_HEAD/HEAD file in {_repo}")
|
||||||
res.append((r, updated))
|
|
||||||
return res
|
|
||||||
|
|
||||||
# TODO per-repo cache?
|
|
||||||
# TODO set default cache path?
|
def _commits(_repos: List[Path]) -> Iterator[Commit]:
|
||||||
# TODO got similar issue as in photos with a helper method.. figure it out
|
|
||||||
@mcachew(hashf=_hashf, logger=log)
|
|
||||||
def _commits(_repos) -> Iterator[Commit]:
|
|
||||||
for r in _repos:
|
for r in _repos:
|
||||||
log.info('processing %s', r)
|
yield from _cached_commits(r)
|
||||||
yield from repo_commits(r)
|
|
||||||
|
|
||||||
|
_allowed_letters: str = string.ascii_letters + string.digits
|
||||||
|
|
||||||
|
|
||||||
|
def _cached_commits_path(p: Path) -> str:
|
||||||
|
# compute a reduced simple filepath using the absolute path of the repo
|
||||||
|
simple_path = ''.join(filter(lambda c: c in _allowed_letters, str(p.absolute())))
|
||||||
|
return cache_dir() / simple_path / '_cached_commits'
|
||||||
|
|
||||||
|
|
||||||
|
# per-repo commits, to use cachew
|
||||||
|
@mcachew(
|
||||||
|
depends_on=_repo_depends_on,
|
||||||
|
logger=log,
|
||||||
|
cache_path=lambda p: _cached_commits_path(p)
|
||||||
|
)
|
||||||
|
def _cached_commits(repo: Path) -> Iterator[Commit]:
|
||||||
|
log.debug('processing %s', repo)
|
||||||
|
yield from repo_commits(repo)
|
||||||
|
|
||||||
def commits() -> Iterator[Commit]:
|
def commits() -> Iterator[Commit]:
|
||||||
return _commits(repos())
|
return _commits(repos())
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue