extract photo candidates

This commit is contained in:
Dima Gerasimov 2020-03-12 22:31:35 +00:00
parent c3e1f2c4c0
commit 5698be60fe

View file

@ -42,14 +42,6 @@ CACHE_PATH = "***REMOVED***"
# TODO sokino -- wrong timestamp
def ignore_path(p: str):
# TODO use marker files instead?..
for reg in _REGEXES:
if reg.search(p):
return True
return False
_DT_REGEX = re.compile(r'\D(\d{8})\D*(\d{6})\D')
def dt_from_path(p: str) -> Optional[datetime]:
name = basename(p)
@ -195,16 +187,37 @@ def _try_photo(photo: str, mtype: str, dgeo: Optional[LatLon]) -> Optional[Photo
# plink = "https://upload.wikimedia.org/wikipedia/commons/thumb/1/19/Ichthyornis_Clean.png/800px-Ichthyornis_Clean.png"
# yield (geo, src.color, plink)
# TODO ugh. need something like this, but tedious to reimplement..
# class Walker:
# def __init__(self, root: str) -> None:
# self.root = root
# def walk(self):
import mimetypes # TODO do I need init()?
def fastermime(path: str, mgc=magic.Magic(mime=True)) -> str:
# mimetypes is faster
(mime, _) = mimetypes.guess_type(path)
if mime is not None:
return mime
# maigc is slower but returns more stuff
return mgc.from_file(path)
# def step(self, cur, dirs, files):
# pass
# TODO exclude
def _candidates() -> Iterable[str]:
# TODO that could be a bit slow if there are to many extra files?
from subprocess import Popen, PIPE
with Popen([
'fdfind',
'--follow',
'-t', 'file',
'.',
*config.paths,
], stdout=PIPE) as p:
for line in p.stdout:
path = line.decode('utf8').rstrip('\n')
tp = fastermime(path).split('/')[0]
if tp in {'inode', 'text', 'application', 'audio'}:
continue
if tp not in {'image', 'video'}:
# TODO yield error?
logger.warning('%s: unexpected mime %s', path, tp)
yield path
# if geo information is missing from photo, you can specify it manually in geo.json file
@ -217,7 +230,6 @@ def iter_photos() -> Iterator[Photo]:
geos: List[LatLon] = [] # stack of geos so we could use the most specific one
# TODO could have this for all meta? e.g. time
# TODO just use fd-find? it's gonna be faster.. althrough not iterative?
for d, _, files in itertools.chain.from_iterable((os.walk(pp, followlinks=True) for pp in config.paths)):
logger.info(f"Processing {d}")