extract photo candidates
This commit is contained in:
parent
c3e1f2c4c0
commit
5698be60fe
1 changed files with 28 additions and 16 deletions
|
@ -42,14 +42,6 @@ CACHE_PATH = "***REMOVED***"
|
||||||
# TODO sokino -- wrong timestamp
|
# TODO sokino -- wrong timestamp
|
||||||
|
|
||||||
|
|
||||||
def ignore_path(p: str):
|
|
||||||
# TODO use marker files instead?..
|
|
||||||
for reg in _REGEXES:
|
|
||||||
if reg.search(p):
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
_DT_REGEX = re.compile(r'\D(\d{8})\D*(\d{6})\D')
|
_DT_REGEX = re.compile(r'\D(\d{8})\D*(\d{6})\D')
|
||||||
def dt_from_path(p: str) -> Optional[datetime]:
|
def dt_from_path(p: str) -> Optional[datetime]:
|
||||||
name = basename(p)
|
name = basename(p)
|
||||||
|
@ -195,16 +187,37 @@ def _try_photo(photo: str, mtype: str, dgeo: Optional[LatLon]) -> Optional[Photo
|
||||||
# plink = "https://upload.wikimedia.org/wikipedia/commons/thumb/1/19/Ichthyornis_Clean.png/800px-Ichthyornis_Clean.png"
|
# plink = "https://upload.wikimedia.org/wikipedia/commons/thumb/1/19/Ichthyornis_Clean.png/800px-Ichthyornis_Clean.png"
|
||||||
# yield (geo, src.color, plink)
|
# yield (geo, src.color, plink)
|
||||||
|
|
||||||
# TODO ugh. need something like this, but tedious to reimplement..
|
|
||||||
# class Walker:
|
|
||||||
# def __init__(self, root: str) -> None:
|
|
||||||
# self.root = root
|
|
||||||
|
|
||||||
# def walk(self):
|
import mimetypes # TODO do I need init()?
|
||||||
|
def fastermime(path: str, mgc=magic.Magic(mime=True)) -> str:
|
||||||
|
# mimetypes is faster
|
||||||
|
(mime, _) = mimetypes.guess_type(path)
|
||||||
|
if mime is not None:
|
||||||
|
return mime
|
||||||
|
# maigc is slower but returns more stuff
|
||||||
|
return mgc.from_file(path)
|
||||||
|
|
||||||
|
|
||||||
# def step(self, cur, dirs, files):
|
# TODO exclude
|
||||||
# pass
|
def _candidates() -> Iterable[str]:
|
||||||
|
# TODO that could be a bit slow if there are to many extra files?
|
||||||
|
from subprocess import Popen, PIPE
|
||||||
|
with Popen([
|
||||||
|
'fdfind',
|
||||||
|
'--follow',
|
||||||
|
'-t', 'file',
|
||||||
|
'.',
|
||||||
|
*config.paths,
|
||||||
|
], stdout=PIPE) as p:
|
||||||
|
for line in p.stdout:
|
||||||
|
path = line.decode('utf8').rstrip('\n')
|
||||||
|
tp = fastermime(path).split('/')[0]
|
||||||
|
if tp in {'inode', 'text', 'application', 'audio'}:
|
||||||
|
continue
|
||||||
|
if tp not in {'image', 'video'}:
|
||||||
|
# TODO yield error?
|
||||||
|
logger.warning('%s: unexpected mime %s', path, tp)
|
||||||
|
yield path
|
||||||
|
|
||||||
|
|
||||||
# if geo information is missing from photo, you can specify it manually in geo.json file
|
# if geo information is missing from photo, you can specify it manually in geo.json file
|
||||||
|
@ -217,7 +230,6 @@ def iter_photos() -> Iterator[Photo]:
|
||||||
|
|
||||||
geos: List[LatLon] = [] # stack of geos so we could use the most specific one
|
geos: List[LatLon] = [] # stack of geos so we could use the most specific one
|
||||||
# TODO could have this for all meta? e.g. time
|
# TODO could have this for all meta? e.g. time
|
||||||
# TODO just use fd-find? it's gonna be faster.. althrough not iterative?
|
|
||||||
for d, _, files in itertools.chain.from_iterable((os.walk(pp, followlinks=True) for pp in config.paths)):
|
for d, _, files in itertools.chain.from_iterable((os.walk(pp, followlinks=True) for pp in config.paths)):
|
||||||
logger.info(f"Processing {d}")
|
logger.info(f"Processing {d}")
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue