From 4ed70eee904ab1040a90be2a318acf59b50f9aa9 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Fri, 13 Mar 2020 00:10:27 +0000 Subject: [PATCH] more cleanup for photos provider --- my/common.py | 19 ++++++++++++++++ my/photos/__init__.py | 51 ++++++++++++++++++------------------------- my/photos/utils.py | 8 ++++--- 3 files changed, 45 insertions(+), 33 deletions(-) diff --git a/my/common.py b/my/common.py index ba8c07b..3d42167 100644 --- a/my/common.py +++ b/my/common.py @@ -126,3 +126,22 @@ def mcachew(*args, **kwargs): import cachew.experimental cachew.experimental.enable_exceptions() # TODO do it only once? return cachew.cachew(*args, **kwargs) + + + +@functools.lru_cache(1) +def _magic(): + import magic # type: ignore + return magic.Magic(mime=True) + + +# TODO could reuse in pdf module? +import mimetypes # TODO do I need init()? +def fastermime(path: str) -> str: + # mimetypes is faster + (mime, _) = mimetypes.guess_type(path) + if mime is not None: + return mime + # magic is slower but returns more stuff + # TODO FIXME Result type; it's inherently racey + return _magic().from_file(path) diff --git a/my/photos/__init__.py b/my/photos/__init__.py index 23a4311..3444692 100644 --- a/my/photos/__init__.py +++ b/my/photos/__init__.py @@ -1,5 +1,5 @@ """ -Module for accessing photos and videos on the filesystem +Module for accessing photos and videos, with their GPS and timestamps """ # pip install geopy magic @@ -10,15 +10,14 @@ from pathlib import Path from typing import Tuple, Dict, Optional, NamedTuple, Iterator, Iterable, List from geopy.geocoders import Nominatim # type: ignore -import magic # type: ignore -from ..common import LazyLogger, mcachew +from ..common import LazyLogger, mcachew, fastermime +from ..error import Res from mycfg import photos as config -logger = LazyLogger('my.photos') -log = logger +log = LazyLogger('my.photos') @@ -27,14 +26,11 @@ class LatLon(NamedTuple): lat: float lon: float -# TODO PIL.ExifTags.TAGS - class Photo(NamedTuple): path: str dt: Optional[datetime] geo: Optional[LatLon] - # TODO can we always extract date? I guess not... @property def tags(self) -> List[str]: # TODO @@ -42,6 +38,7 @@ class Photo(NamedTuple): @property def _basename(self) -> str: + # TODO 'canonical' or something? only makes sense for organized ones for bp in config.paths: if self.path.startswith(bp): return self.path[len(bp):] @@ -54,12 +51,15 @@ class Photo(NamedTuple): @property def url(self) -> str: + PHOTOS_URL = 'TODO FIXME' return PHOTOS_URL + self._basename from .utils import get_exif_from_file, ExifTags, Exif, dt_from_path, convert_ref -def _try_photo(photo: Path, mtype: str, *, parent_geo: Optional[LatLon]) -> Photo: +Result = Res[Photo] + +def _make_photo(photo: Path, mtype: str, *, parent_geo: Optional[LatLon]) -> Iterator[Result]: exif: Exif if any(x in mtype for x in {'image/png', 'image/x-ms-bmp', 'video'}): # TODO don't remember why.. @@ -94,7 +94,6 @@ def _try_photo(photo: Path, mtype: str, *, parent_geo: Optional[LatLon]) -> Phot log.warning('ignoring timestamp extraction for %s, they are stupid for Instagram videos', photo) return None - # TODO FIXME result type here?? edt = dt_from_path(photo) # ok, last try.. if edt is None: @@ -102,7 +101,7 @@ def _try_photo(photo: Path, mtype: str, *, parent_geo: Optional[LatLon]) -> Phot if edt is not None and edt > datetime.now(): # TODO also yield? - logger.error('datetime for %s is too far in future: %s', photo, edt) + log.error('datetime for %s is too far in future: %s', photo, edt) return None return edt @@ -110,24 +109,14 @@ def _try_photo(photo: Path, mtype: str, *, parent_geo: Optional[LatLon]) -> Phot geo = _get_geo() dt = _get_dt() - return Photo(str(photo), dt=dt, geo=geo) - - -import mimetypes # TODO do I need init()? -def fastermime(path: str, mgc=magic.Magic(mime=True)) -> str: - # mimetypes is faster - (mime, _) = mimetypes.guess_type(path) - if mime is not None: - return mime - # magic is slower but returns more stuff - # TODO FIXME Result type; it's inherently racey - return mgc.from_file(path) + yield Photo(str(photo), dt=dt, geo=geo) # TODO exclude def _candidates() -> Iterable[str]: # TODO that could be a bit slow if there are to many extra files? from subprocess import Popen, PIPE + # TODO could extract this to common? with Popen([ 'fdfind', '--follow', @@ -143,12 +132,12 @@ def _candidates() -> Iterable[str]: continue if tp not in {'image', 'video'}: # TODO yield error? - logger.warning('%s: unexpected mime %s', path, tp) + log.warning('%s: unexpected mime %s', path, tp) # TODO return mime too? so we don't have to call it again in _photos? yield path -def photos() -> Iterator[Photo]: +def photos() -> Iterator[Result]: candidates = tuple(sorted(_candidates())) return _photos(candidates) # TODO figure out how to use cachew without helper function? @@ -157,8 +146,8 @@ def photos() -> Iterator[Photo]: # if geo information is missing from photo, you can specify it manually in geo.json file # TODO is there something more standard? -# @mcachew(cache_path=config.cache_path) -def _photos(candidates: Iterable[str]) -> Iterator[Photo]: +@mcachew(cache_path=config.cache_path) +def _photos(candidates: Iterable[str]) -> Iterator[Result]: geolocator = Nominatim() # TODO does it cache?? from functools import lru_cache @@ -189,13 +178,15 @@ def _photos(candidates: Iterable[str]) -> Iterator[Photo]: parent_geo = get_geo(path.parent) mime = fastermime(str(path)) - p = _try_photo(path, mime, parent_geo=parent_geo) - yield p + yield from _make_photo(path, mime, parent_geo=parent_geo) def print_all(): for p in photos(): - print(f"{p.dt} {p.path} {p.tags}") + if isinstance(p, Exception): + print('ERROR!', p) + else: + print(f"{p.dt} {p.path} {p.tags}") # TODO cachew -- improve AttributeError: type object 'tuple' has no attribute '__annotations__' -- improve errors? # TODO cachew -- invalidate if function code changed? diff --git a/my/photos/utils.py b/my/photos/utils.py index c48d879..5b03079 100644 --- a/my/photos/utils.py +++ b/my/photos/utils.py @@ -1,3 +1,4 @@ +from pathlib import Path from typing import Dict import PIL.Image # type: ignore @@ -6,6 +7,8 @@ from PIL.ExifTags import TAGS, GPSTAGS # type: ignore Exif = Dict +# TODO PIL.ExifTags.TAGS + class ExifTags: DATETIME = "DateTimeOriginal" @@ -17,9 +20,9 @@ class ExifTags: # TODO there must be something more standard for this... -def get_exif_from_file(path: str) -> Exif: +def get_exif_from_file(path: Path) -> Exif: # TODO exception handler? - with PIL.Image.open(path) as fo: + with PIL.Image.open(str(path)) as fo: return get_exif_data(fo) @@ -70,7 +73,6 @@ def convert_ref(cstr, ref: str): import re from datetime import datetime -from pathlib import Path from typing import Optional # TODO surely there is a library that does it??