From b99b2f3cfad3bd1ed7e6c5995ad5d00187b5f3e7 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Sun, 24 May 2020 12:51:23 +0100 Subject: [PATCH] core: add warning when get_files returns no files, my.twitter.archive: make more defensive in case of no archives --- my/core/common.py | 6 ++++++ my/twitter/archive.py | 26 ++++++++++++++------------ tests/get_files.py | 9 +++++++++ 3 files changed, 29 insertions(+), 12 deletions(-) diff --git a/my/core/common.py b/my/core/common.py index dec0b15..cfadc04 100644 --- a/my/core/common.py +++ b/my/core/common.py @@ -151,6 +151,12 @@ def get_files(pp: Paths, glob: str=DEFAULT_GLOB, sort: bool=True) -> Tuple[Path, if sort: paths = list(sorted(paths)) + + if len(paths) == 0: + # todo make it conditionally defensive based on some global settings + # todo stacktrace? + warnings.warn(f'No paths were matched against {paths}. This might result in missing data.') + return tuple(paths) diff --git a/my/twitter/archive.py b/my/twitter/archive.py index 07721f8..edcf63c 100755 --- a/my/twitter/archive.py +++ b/my/twitter/archive.py @@ -19,9 +19,8 @@ config = make_config(twitter) from datetime import datetime -from typing import Union, List, Dict, Set, Optional, Iterator, Any, NamedTuple +from typing import Union, List, Dict, Set, Optional, Iterable, Any, NamedTuple, Sequence from pathlib import Path -from functools import lru_cache import json import zipfile @@ -35,8 +34,8 @@ from ..kython import kompress logger = LazyLogger(__name__) -def _get_export() -> Path: - return max(get_files(config.export_path)) +def inputs() -> Sequence[Path]: + return get_files(config.export_path)[-1:] Tid = str @@ -115,9 +114,10 @@ class Like(NamedTuple): return self.id_str +from functools import lru_cache class ZipExport: - def __init__(self) -> None: - self.epath = _get_export() + def __init__(self, archive_path: Path) -> None: + self.epath = archive_path self.old_format = False # changed somewhere around 2020.03 if not kompress.kexists(self.epath, 'Your archive.html'): @@ -149,12 +149,12 @@ class ZipExport: [acc] = self.raw('account') return acc['username'] - def tweets(self) -> Iterator[Tweet]: + def tweets(self) -> Iterable[Tweet]: for r in self.raw('tweet'): yield Tweet(r, screen_name=self.screen_name()) - def likes(self) -> Iterator[Like]: + def likes(self) -> Iterable[Like]: # TODO ugh. would be nice to unify Tweet/Like interface # however, akeout only got tweetId, full text and url for r in self.raw('like'): @@ -162,9 +162,11 @@ class ZipExport: # todo not sure about list and sorting? although can't hurt considering json is not iterative? -def tweets() -> List[Tweet]: - return list(sorted(ZipExport().tweets(), key=lambda t: t.dt)) +def tweets() -> Iterable[Tweet]: + for inp in inputs(): + yield from sorted(ZipExport(inp).tweets(), key=lambda t: t.dt) -def likes() -> List[Like]: - return list(ZipExport().likes()) +def likes() -> Iterable[Like]: + for inp in inputs(): + yield from ZipExport(inp).likes() diff --git a/tests/get_files.py b/tests/get_files.py index 29e0528..14f2711 100644 --- a/tests/get_files.py +++ b/tests/get_files.py @@ -97,6 +97,15 @@ def test_implicit_glob(): Path('/tmp/hpi_test/456/file.zip'), ) + +def test_no_files(): + ''' + Test for empty matches. They work, but should result in warning + ''' + assert get_files([]) == () + assert get_files('bad*glob') == () + + # TODO not sure if should uniquify if the filenames end up same? # TODO not sure about the symlinks? and hidden files?