start adapting photos for cachew
This commit is contained in:
parent
5698be60fe
commit
c7dc386258
1 changed files with 42 additions and 24 deletions
|
@ -7,6 +7,7 @@ import os
|
||||||
from os.path import join, basename
|
from os.path import join, basename
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
from pathlib import Path
|
||||||
from typing import Tuple, Dict, Optional, NamedTuple, Iterator, Iterable, List
|
from typing import Tuple, Dict, Optional, NamedTuple, Iterator, Iterable, List
|
||||||
|
|
||||||
from geopy.geocoders import Nominatim # type: ignore
|
from geopy.geocoders import Nominatim # type: ignore
|
||||||
|
@ -17,10 +18,11 @@ import PIL.Image # type: ignore
|
||||||
from PIL.ExifTags import TAGS, GPSTAGS # type: ignore
|
from PIL.ExifTags import TAGS, GPSTAGS # type: ignore
|
||||||
|
|
||||||
|
|
||||||
from ..common import LazyLogger
|
from ..common import LazyLogger, mcachew
|
||||||
|
|
||||||
|
|
||||||
logger = LazyLogger('my.photos')
|
logger = LazyLogger('my.photos')
|
||||||
|
log = logger
|
||||||
|
|
||||||
|
|
||||||
from mycfg import photos as config
|
from mycfg import photos as config
|
||||||
|
@ -52,7 +54,9 @@ def dt_from_path(p: str) -> Optional[datetime]:
|
||||||
return datetime.strptime(dates, "%Y%m%d%H%M%S")
|
return datetime.strptime(dates, "%Y%m%d%H%M%S")
|
||||||
|
|
||||||
# TODO ignore hidden dirs?
|
# TODO ignore hidden dirs?
|
||||||
LatLon = Tuple[float, float]
|
class LatLon(NamedTuple):
|
||||||
|
lat: float
|
||||||
|
lon: float
|
||||||
|
|
||||||
# TODO PIL.ExifTags.TAGS
|
# TODO PIL.ExifTags.TAGS
|
||||||
|
|
||||||
|
@ -132,13 +136,14 @@ class Photo(NamedTuple):
|
||||||
def url(self) -> str:
|
def url(self) -> str:
|
||||||
return PHOTOS_URL + self._basename
|
return PHOTOS_URL + self._basename
|
||||||
|
|
||||||
def _try_photo(photo: str, mtype: str, dgeo: Optional[LatLon]) -> Optional[Photo]:
|
|
||||||
|
def _try_photo(photo: str, mtype: str, dgeo: Optional[LatLon]) -> Photo:
|
||||||
geo: Optional[LatLon]
|
geo: Optional[LatLon]
|
||||||
|
|
||||||
dt: Optional[datetime] = None
|
dt: Optional[datetime] = None
|
||||||
geo = dgeo
|
geo = dgeo
|
||||||
if any(x in mtype for x in {'image/png', 'image/x-ms-bmp', 'video'}):
|
if any(x in mtype for x in {'image/png', 'image/x-ms-bmp', 'video'}):
|
||||||
logger.info(f"Skipping geo extraction for {photo} due to mime {mtype}")
|
log.debug(f"skipping geo extraction for {photo} due to mime {mtype}")
|
||||||
else:
|
else:
|
||||||
edata: Dict
|
edata: Dict
|
||||||
try:
|
try:
|
||||||
|
@ -173,6 +178,7 @@ def _try_photo(photo: str, mtype: str, dgeo: Optional[LatLon]) -> Optional[Photo
|
||||||
try:
|
try:
|
||||||
edt = dt_from_path(photo) # ok, last try..
|
edt = dt_from_path(photo) # ok, last try..
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
# TODO result type?
|
||||||
logger.error(f"Error while trying to extract date from name {photo}")
|
logger.error(f"Error while trying to extract date from name {photo}")
|
||||||
logger.exception(e)
|
logger.exception(e)
|
||||||
else:
|
else:
|
||||||
|
@ -194,7 +200,8 @@ def fastermime(path: str, mgc=magic.Magic(mime=True)) -> str:
|
||||||
(mime, _) = mimetypes.guess_type(path)
|
(mime, _) = mimetypes.guess_type(path)
|
||||||
if mime is not None:
|
if mime is not None:
|
||||||
return mime
|
return mime
|
||||||
# maigc is slower but returns more stuff
|
# magic is slower but returns more stuff
|
||||||
|
# TODO FIXME Result type; it's inherently racey
|
||||||
return mgc.from_file(path)
|
return mgc.from_file(path)
|
||||||
|
|
||||||
|
|
||||||
|
@ -211,22 +218,42 @@ def _candidates() -> Iterable[str]:
|
||||||
], stdout=PIPE) as p:
|
], stdout=PIPE) as p:
|
||||||
for line in p.stdout:
|
for line in p.stdout:
|
||||||
path = line.decode('utf8').rstrip('\n')
|
path = line.decode('utf8').rstrip('\n')
|
||||||
tp = fastermime(path).split('/')[0]
|
mime = fastermime(path)
|
||||||
|
tp = mime.split('/')[0]
|
||||||
if tp in {'inode', 'text', 'application', 'audio'}:
|
if tp in {'inode', 'text', 'application', 'audio'}:
|
||||||
continue
|
continue
|
||||||
if tp not in {'image', 'video'}:
|
if tp not in {'image', 'video'}:
|
||||||
# TODO yield error?
|
# TODO yield error?
|
||||||
logger.warning('%s: unexpected mime %s', path, tp)
|
logger.warning('%s: unexpected mime %s', path, tp)
|
||||||
|
# TODO return mime too? so we don't have to call it again in _photos?
|
||||||
yield path
|
yield path
|
||||||
|
|
||||||
|
|
||||||
# if geo information is missing from photo, you can specify it manually in geo.json file
|
def photos() -> Iterator[Photo]:
|
||||||
def iter_photos() -> Iterator[Photo]:
|
candidates = tuple(sorted(_candidates()))
|
||||||
geolocator = Nominatim() # TODO does it cache??
|
return _photos(candidates)
|
||||||
mime = magic.Magic(mime=True)
|
# TODO figure out how to use cachew without helper function?
|
||||||
|
# I guess need lazy variables or something?
|
||||||
|
|
||||||
|
|
||||||
|
# if geo information is missing from photo, you can specify it manually in geo.json file
|
||||||
|
# @mcachew(logger=logger)
|
||||||
|
def _photos(candidates: Iterable[str]) -> Iterator[Photo]:
|
||||||
|
geolocator = Nominatim() # TODO does it cache??
|
||||||
|
|
||||||
|
# TODO add geos cache??
|
||||||
|
|
||||||
|
for path in candidates:
|
||||||
|
if config.ignored(Path(path)):
|
||||||
|
log.info('ignoring %s due to config', path)
|
||||||
|
continue
|
||||||
|
|
||||||
|
dgeo = None # TODO
|
||||||
|
mime = fastermime(path)
|
||||||
|
p = _try_photo(path, mime, dgeo)
|
||||||
|
yield p
|
||||||
|
return
|
||||||
|
|
||||||
for pp in config.paths:
|
|
||||||
assert os.path.lexists(pp)
|
|
||||||
|
|
||||||
geos: List[LatLon] = [] # stack of geos so we could use the most specific one
|
geos: List[LatLon] = [] # stack of geos so we could use the most specific one
|
||||||
# TODO could have this for all meta? e.g. time
|
# TODO could have this for all meta? e.g. time
|
||||||
|
@ -252,18 +279,6 @@ def iter_photos() -> Iterator[Photo]:
|
||||||
logger.info(f"Ignoring {photo} due to regex")
|
logger.info(f"Ignoring {photo} due to regex")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
mtype = mime.from_file(photo)
|
|
||||||
|
|
||||||
IGNORED = {
|
|
||||||
'application',
|
|
||||||
'audio',
|
|
||||||
'text',
|
|
||||||
'inode',
|
|
||||||
}
|
|
||||||
if any(i in mtype for i in IGNORED):
|
|
||||||
logger.info(f"Ignoring {photo} due to mime {mtype}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
dgeo = None if len(geos) == 0 else geos[-1]
|
dgeo = None if len(geos) == 0 else geos[-1]
|
||||||
p = _try_photo(photo, mtype, dgeo)
|
p = _try_photo(photo, mtype, dgeo)
|
||||||
|
@ -291,3 +306,6 @@ def update_cache():
|
||||||
photos = get_photos(cached=False)
|
photos = get_photos(cached=False)
|
||||||
with open(CACHE_PATH, 'wb') as fo:
|
with open(CACHE_PATH, 'wb') as fo:
|
||||||
dill.dump(photos, fo)
|
dill.dump(photos, fo)
|
||||||
|
|
||||||
|
# TODO cachew -- improve AttributeError: type object 'tuple' has no attribute '__annotations__' -- improve errors?
|
||||||
|
# TODO cachew -- invalidate if function code changed?
|
||||||
|
|
Loading…
Add table
Reference in a new issue