core: add warning when get_files returns no files, my.twitter.archive: make more defensive in case of no archives

This commit is contained in:
Dima Gerasimov 2020-05-24 12:51:23 +01:00
parent b7662378a2
commit b99b2f3cfa
3 changed files with 29 additions and 12 deletions

View file

@ -151,6 +151,12 @@ def get_files(pp: Paths, glob: str=DEFAULT_GLOB, sort: bool=True) -> Tuple[Path,
if sort: if sort:
paths = list(sorted(paths)) paths = list(sorted(paths))
if len(paths) == 0:
# todo make it conditionally defensive based on some global settings
# todo stacktrace?
warnings.warn(f'No paths were matched against {paths}. This might result in missing data.')
return tuple(paths) return tuple(paths)

View file

@ -19,9 +19,8 @@ config = make_config(twitter)
from datetime import datetime from datetime import datetime
from typing import Union, List, Dict, Set, Optional, Iterator, Any, NamedTuple from typing import Union, List, Dict, Set, Optional, Iterable, Any, NamedTuple, Sequence
from pathlib import Path from pathlib import Path
from functools import lru_cache
import json import json
import zipfile import zipfile
@ -35,8 +34,8 @@ from ..kython import kompress
logger = LazyLogger(__name__) logger = LazyLogger(__name__)
def _get_export() -> Path: def inputs() -> Sequence[Path]:
return max(get_files(config.export_path)) return get_files(config.export_path)[-1:]
Tid = str Tid = str
@ -115,9 +114,10 @@ class Like(NamedTuple):
return self.id_str return self.id_str
from functools import lru_cache
class ZipExport: class ZipExport:
def __init__(self) -> None: def __init__(self, archive_path: Path) -> None:
self.epath = _get_export() self.epath = archive_path
self.old_format = False # changed somewhere around 2020.03 self.old_format = False # changed somewhere around 2020.03
if not kompress.kexists(self.epath, 'Your archive.html'): if not kompress.kexists(self.epath, 'Your archive.html'):
@ -149,12 +149,12 @@ class ZipExport:
[acc] = self.raw('account') [acc] = self.raw('account')
return acc['username'] return acc['username']
def tweets(self) -> Iterator[Tweet]: def tweets(self) -> Iterable[Tweet]:
for r in self.raw('tweet'): for r in self.raw('tweet'):
yield Tweet(r, screen_name=self.screen_name()) yield Tweet(r, screen_name=self.screen_name())
def likes(self) -> Iterator[Like]: def likes(self) -> Iterable[Like]:
# TODO ugh. would be nice to unify Tweet/Like interface # TODO ugh. would be nice to unify Tweet/Like interface
# however, akeout only got tweetId, full text and url # however, akeout only got tweetId, full text and url
for r in self.raw('like'): for r in self.raw('like'):
@ -162,9 +162,11 @@ class ZipExport:
# todo not sure about list and sorting? although can't hurt considering json is not iterative? # todo not sure about list and sorting? although can't hurt considering json is not iterative?
def tweets() -> List[Tweet]: def tweets() -> Iterable[Tweet]:
return list(sorted(ZipExport().tweets(), key=lambda t: t.dt)) for inp in inputs():
yield from sorted(ZipExport(inp).tweets(), key=lambda t: t.dt)
def likes() -> List[Like]: def likes() -> Iterable[Like]:
return list(ZipExport().likes()) for inp in inputs():
yield from ZipExport(inp).likes()

View file

@ -97,6 +97,15 @@ def test_implicit_glob():
Path('/tmp/hpi_test/456/file.zip'), Path('/tmp/hpi_test/456/file.zip'),
) )
def test_no_files():
'''
Test for empty matches. They work, but should result in warning
'''
assert get_files([]) == ()
assert get_files('bad*glob') == ()
# TODO not sure if should uniquify if the filenames end up same? # TODO not sure if should uniquify if the filenames end up same?
# TODO not sure about the symlinks? and hidden files? # TODO not sure about the symlinks? and hidden files?