start adapting photos for cachew

This commit is contained in:
Dima Gerasimov 2020-03-12 23:04:11 +00:00
parent 5698be60fe
commit c7dc386258

View file

@ -7,6 +7,7 @@ import os
from os.path import join, basename from os.path import join, basename
import json import json
import re import re
from pathlib import Path
from typing import Tuple, Dict, Optional, NamedTuple, Iterator, Iterable, List from typing import Tuple, Dict, Optional, NamedTuple, Iterator, Iterable, List
from geopy.geocoders import Nominatim # type: ignore from geopy.geocoders import Nominatim # type: ignore
@ -17,10 +18,11 @@ import PIL.Image # type: ignore
from PIL.ExifTags import TAGS, GPSTAGS # type: ignore from PIL.ExifTags import TAGS, GPSTAGS # type: ignore
from ..common import LazyLogger from ..common import LazyLogger, mcachew
logger = LazyLogger('my.photos') logger = LazyLogger('my.photos')
log = logger
from mycfg import photos as config from mycfg import photos as config
@ -52,7 +54,9 @@ def dt_from_path(p: str) -> Optional[datetime]:
return datetime.strptime(dates, "%Y%m%d%H%M%S") return datetime.strptime(dates, "%Y%m%d%H%M%S")
# TODO ignore hidden dirs? # TODO ignore hidden dirs?
LatLon = Tuple[float, float] class LatLon(NamedTuple):
lat: float
lon: float
# TODO PIL.ExifTags.TAGS # TODO PIL.ExifTags.TAGS
@ -132,13 +136,14 @@ class Photo(NamedTuple):
def url(self) -> str: def url(self) -> str:
return PHOTOS_URL + self._basename return PHOTOS_URL + self._basename
def _try_photo(photo: str, mtype: str, dgeo: Optional[LatLon]) -> Optional[Photo]:
def _try_photo(photo: str, mtype: str, dgeo: Optional[LatLon]) -> Photo:
geo: Optional[LatLon] geo: Optional[LatLon]
dt: Optional[datetime] = None dt: Optional[datetime] = None
geo = dgeo geo = dgeo
if any(x in mtype for x in {'image/png', 'image/x-ms-bmp', 'video'}): if any(x in mtype for x in {'image/png', 'image/x-ms-bmp', 'video'}):
logger.info(f"Skipping geo extraction for {photo} due to mime {mtype}") log.debug(f"skipping geo extraction for {photo} due to mime {mtype}")
else: else:
edata: Dict edata: Dict
try: try:
@ -173,6 +178,7 @@ def _try_photo(photo: str, mtype: str, dgeo: Optional[LatLon]) -> Optional[Photo
try: try:
edt = dt_from_path(photo) # ok, last try.. edt = dt_from_path(photo) # ok, last try..
except Exception as e: except Exception as e:
# TODO result type?
logger.error(f"Error while trying to extract date from name {photo}") logger.error(f"Error while trying to extract date from name {photo}")
logger.exception(e) logger.exception(e)
else: else:
@ -194,7 +200,8 @@ def fastermime(path: str, mgc=magic.Magic(mime=True)) -> str:
(mime, _) = mimetypes.guess_type(path) (mime, _) = mimetypes.guess_type(path)
if mime is not None: if mime is not None:
return mime return mime
# maigc is slower but returns more stuff # magic is slower but returns more stuff
# TODO FIXME Result type; it's inherently racey
return mgc.from_file(path) return mgc.from_file(path)
@ -211,22 +218,42 @@ def _candidates() -> Iterable[str]:
], stdout=PIPE) as p: ], stdout=PIPE) as p:
for line in p.stdout: for line in p.stdout:
path = line.decode('utf8').rstrip('\n') path = line.decode('utf8').rstrip('\n')
tp = fastermime(path).split('/')[0] mime = fastermime(path)
tp = mime.split('/')[0]
if tp in {'inode', 'text', 'application', 'audio'}: if tp in {'inode', 'text', 'application', 'audio'}:
continue continue
if tp not in {'image', 'video'}: if tp not in {'image', 'video'}:
# TODO yield error? # TODO yield error?
logger.warning('%s: unexpected mime %s', path, tp) logger.warning('%s: unexpected mime %s', path, tp)
# TODO return mime too? so we don't have to call it again in _photos?
yield path yield path
# if geo information is missing from photo, you can specify it manually in geo.json file def photos() -> Iterator[Photo]:
def iter_photos() -> Iterator[Photo]: candidates = tuple(sorted(_candidates()))
geolocator = Nominatim() # TODO does it cache?? return _photos(candidates)
mime = magic.Magic(mime=True) # TODO figure out how to use cachew without helper function?
# I guess need lazy variables or something?
# if geo information is missing from photo, you can specify it manually in geo.json file
# @mcachew(logger=logger)
def _photos(candidates: Iterable[str]) -> Iterator[Photo]:
geolocator = Nominatim() # TODO does it cache??
# TODO add geos cache??
for path in candidates:
if config.ignored(Path(path)):
log.info('ignoring %s due to config', path)
continue
dgeo = None # TODO
mime = fastermime(path)
p = _try_photo(path, mime, dgeo)
yield p
return
for pp in config.paths:
assert os.path.lexists(pp)
geos: List[LatLon] = [] # stack of geos so we could use the most specific one geos: List[LatLon] = [] # stack of geos so we could use the most specific one
# TODO could have this for all meta? e.g. time # TODO could have this for all meta? e.g. time
@ -252,18 +279,6 @@ def iter_photos() -> Iterator[Photo]:
logger.info(f"Ignoring {photo} due to regex") logger.info(f"Ignoring {photo} due to regex")
continue continue
mtype = mime.from_file(photo)
IGNORED = {
'application',
'audio',
'text',
'inode',
}
if any(i in mtype for i in IGNORED):
logger.info(f"Ignoring {photo} due to mime {mtype}")
continue
try: try:
dgeo = None if len(geos) == 0 else geos[-1] dgeo = None if len(geos) == 0 else geos[-1]
p = _try_photo(photo, mtype, dgeo) p = _try_photo(photo, mtype, dgeo)
@ -291,3 +306,6 @@ def update_cache():
photos = get_photos(cached=False) photos = get_photos(cached=False)
with open(CACHE_PATH, 'wb') as fo: with open(CACHE_PATH, 'wb') as fo:
dill.dump(photos, fo) dill.dump(photos, fo)
# TODO cachew -- improve AttributeError: type object 'tuple' has no attribute '__annotations__' -- improve errors?
# TODO cachew -- invalidate if function code changed?