Extract date from photos
This commit is contained in:
parent
0a68e3000d
commit
b5834e5156
3 changed files with 59 additions and 2 deletions
|
@ -1,7 +1,8 @@
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import itertools
|
import itertools
|
||||||
import os
|
import os
|
||||||
from os.path import join
|
from os.path import join, basename
|
||||||
|
import re
|
||||||
from typing import Tuple, Dict, Optional, NamedTuple, Iterator, Iterable, List
|
from typing import Tuple, Dict, Optional, NamedTuple, Iterator, Iterable, List
|
||||||
|
|
||||||
from geopy.geocoders import Nominatim # type: ignore
|
from geopy.geocoders import Nominatim # type: ignore
|
||||||
|
@ -23,6 +24,33 @@ mime = magic.Magic(mime=True)
|
||||||
|
|
||||||
# TODO hmm, instead geo could be a dynamic property... although a bit wasteful
|
# TODO hmm, instead geo could be a dynamic property... although a bit wasteful
|
||||||
|
|
||||||
|
# TODO insta photos should have instagram tag?
|
||||||
|
|
||||||
|
# TODO sokino -- wrong timestamp
|
||||||
|
|
||||||
|
_REGEXES = [re.compile(rs) for rs in [
|
||||||
|
r'***REMOVED***',
|
||||||
|
r'***REMOVED***',
|
||||||
|
# TODO eh, some photos from ***REMOVED*** -- which is clearly bad datetime! like a default setting
|
||||||
|
# TODO mm. maybe have expected datetime ranges for photos and discard everything else? some cameras looks like they god bad timestamps
|
||||||
|
]]
|
||||||
|
|
||||||
|
def ignore_path(p: str):
|
||||||
|
for reg in _REGEXES:
|
||||||
|
if reg.search(p):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
_DT_REGEX = re.compile(r'\D(\d{8})\D*(\d{6})\D')
|
||||||
|
def dt_from_path(p: str) -> Optional[datetime]:
|
||||||
|
name = basename(p)
|
||||||
|
mm = _DT_REGEX.search(name)
|
||||||
|
if mm is None:
|
||||||
|
return None
|
||||||
|
dates = mm.group(1) + mm.group(2)
|
||||||
|
return datetime.strptime(dates, "%Y%m%d%H%M%S")
|
||||||
|
|
||||||
PATHS = [
|
PATHS = [
|
||||||
"***REMOVED***",
|
"***REMOVED***",
|
||||||
"***REMOVED***",
|
"***REMOVED***",
|
||||||
|
@ -129,7 +157,7 @@ def _try_photo(photo: str, mtype: str, dgeo: Optional[LatLon]) -> Optional[Photo
|
||||||
dt = datetime.strptime(dtimes, '%Y:%m:%d %H:%M:%S')
|
dt = datetime.strptime(dtimes, '%Y:%m:%d %H:%M:%S')
|
||||||
# # TODO timezone is local, should take into account...
|
# # TODO timezone is local, should take into account...
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error while trying to extract date for {photo}")
|
logger.error(f"Error while trying to extract date from EXIF {photo}")
|
||||||
logger.exception(e)
|
logger.exception(e)
|
||||||
|
|
||||||
meta = edata.get(GPSINFO, {})
|
meta = edata.get(GPSINFO, {})
|
||||||
|
@ -137,17 +165,38 @@ def _try_photo(photo: str, mtype: str, dgeo: Optional[LatLon]) -> Optional[Photo
|
||||||
lat = convert(meta[LAT], meta[LAT_REF])
|
lat = convert(meta[LAT], meta[LAT_REF])
|
||||||
lon = convert(meta[LON], meta[LON_REF])
|
lon = convert(meta[LON], meta[LON_REF])
|
||||||
geo = (lat, lon)
|
geo = (lat, lon)
|
||||||
|
if dt is None:
|
||||||
|
try:
|
||||||
|
dt = dt_from_path(photo) # ok, last try..
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error while trying to extract date from name {photo}")
|
||||||
|
logger.exception(e)
|
||||||
|
|
||||||
|
|
||||||
return Photo(photo, dt, geo)
|
return Photo(photo, dt, geo)
|
||||||
# plink = f"file://{photo}"
|
# plink = f"file://{photo}"
|
||||||
# plink = "https://upload.wikimedia.org/wikipedia/commons/thumb/1/19/Ichthyornis_Clean.png/800px-Ichthyornis_Clean.png"
|
# plink = "https://upload.wikimedia.org/wikipedia/commons/thumb/1/19/Ichthyornis_Clean.png/800px-Ichthyornis_Clean.png"
|
||||||
# yield (geo, src.color, plink)
|
# yield (geo, src.color, plink)
|
||||||
|
|
||||||
|
# TODO ugh. need something like this, but tedious to reimplement..
|
||||||
|
# class Walker:
|
||||||
|
# def __init__(self, root: str) -> None:
|
||||||
|
# self.root = root
|
||||||
|
|
||||||
|
# def walk(self):
|
||||||
|
|
||||||
|
|
||||||
|
# def step(self, cur, dirs, files):
|
||||||
|
# pass
|
||||||
|
|
||||||
|
|
||||||
# if geo information is missing from photo, you can specify it manually in geo.json file
|
# if geo information is missing from photo, you can specify it manually in geo.json file
|
||||||
def iter_photos() -> Iterator[Photo]:
|
def iter_photos() -> Iterator[Photo]:
|
||||||
logger = get_logger()
|
logger = get_logger()
|
||||||
|
|
||||||
|
for pp in PATHS:
|
||||||
|
assert os.path.lexists(pp)
|
||||||
|
|
||||||
geos: List[LatLon] = [] # stack of geos so we could use the most specific one
|
geos: List[LatLon] = [] # stack of geos so we could use the most specific one
|
||||||
# TODO could have this for all meta? e.g. time
|
# TODO could have this for all meta? e.g. time
|
||||||
for d, _, files in itertools.chain.from_iterable((os.walk(pp) for pp in PATHS)):
|
for d, _, files in itertools.chain.from_iterable((os.walk(pp) for pp in PATHS)):
|
||||||
|
@ -168,6 +217,10 @@ def iter_photos() -> Iterator[Photo]:
|
||||||
|
|
||||||
for f in sorted(files):
|
for f in sorted(files):
|
||||||
photo = join(d, f)
|
photo = join(d, f)
|
||||||
|
if ignore_path(photo):
|
||||||
|
logger.info(f"Ignoring {photo} due to regex")
|
||||||
|
continue
|
||||||
|
|
||||||
mtype = mime.from_file(photo)
|
mtype = mime.from_file(photo)
|
||||||
|
|
||||||
IGNORED = {
|
IGNORED = {
|
||||||
|
|
|
@ -19,6 +19,7 @@ if len(sys.argv) > 1:
|
||||||
raise RuntimeError(f"Unknown command {cmd}")
|
raise RuntimeError(f"Unknown command {cmd}")
|
||||||
else:
|
else:
|
||||||
for p in iter_photos():
|
for p in iter_photos():
|
||||||
|
print(f"{p.dt} {p.path} {p.tags}")
|
||||||
pass
|
pass
|
||||||
# TODO need datetime!
|
# TODO need datetime!
|
||||||
# print(p)
|
# print(p)
|
||||||
|
|
3
run
Executable file
3
run
Executable file
|
@ -0,0 +1,3 @@
|
||||||
|
#!/bin/bash
|
||||||
|
set -eu
|
||||||
|
python3 -m photos
|
Loading…
Add table
Reference in a new issue