core: add warning when get_files returns no files, my.twitter.archive: make more defensive in case of no archives
This commit is contained in:
parent
b7662378a2
commit
b99b2f3cfa
3 changed files with 29 additions and 12 deletions
|
@ -151,6 +151,12 @@ def get_files(pp: Paths, glob: str=DEFAULT_GLOB, sort: bool=True) -> Tuple[Path,
|
||||||
|
|
||||||
if sort:
|
if sort:
|
||||||
paths = list(sorted(paths))
|
paths = list(sorted(paths))
|
||||||
|
|
||||||
|
if len(paths) == 0:
|
||||||
|
# todo make it conditionally defensive based on some global settings
|
||||||
|
# todo stacktrace?
|
||||||
|
warnings.warn(f'No paths were matched against {paths}. This might result in missing data.')
|
||||||
|
|
||||||
return tuple(paths)
|
return tuple(paths)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -19,9 +19,8 @@ config = make_config(twitter)
|
||||||
|
|
||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Union, List, Dict, Set, Optional, Iterator, Any, NamedTuple
|
from typing import Union, List, Dict, Set, Optional, Iterable, Any, NamedTuple, Sequence
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from functools import lru_cache
|
|
||||||
import json
|
import json
|
||||||
import zipfile
|
import zipfile
|
||||||
|
|
||||||
|
@ -35,8 +34,8 @@ from ..kython import kompress
|
||||||
logger = LazyLogger(__name__)
|
logger = LazyLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def _get_export() -> Path:
|
def inputs() -> Sequence[Path]:
|
||||||
return max(get_files(config.export_path))
|
return get_files(config.export_path)[-1:]
|
||||||
|
|
||||||
|
|
||||||
Tid = str
|
Tid = str
|
||||||
|
@ -115,9 +114,10 @@ class Like(NamedTuple):
|
||||||
return self.id_str
|
return self.id_str
|
||||||
|
|
||||||
|
|
||||||
|
from functools import lru_cache
|
||||||
class ZipExport:
|
class ZipExport:
|
||||||
def __init__(self) -> None:
|
def __init__(self, archive_path: Path) -> None:
|
||||||
self.epath = _get_export()
|
self.epath = archive_path
|
||||||
|
|
||||||
self.old_format = False # changed somewhere around 2020.03
|
self.old_format = False # changed somewhere around 2020.03
|
||||||
if not kompress.kexists(self.epath, 'Your archive.html'):
|
if not kompress.kexists(self.epath, 'Your archive.html'):
|
||||||
|
@ -149,12 +149,12 @@ class ZipExport:
|
||||||
[acc] = self.raw('account')
|
[acc] = self.raw('account')
|
||||||
return acc['username']
|
return acc['username']
|
||||||
|
|
||||||
def tweets(self) -> Iterator[Tweet]:
|
def tweets(self) -> Iterable[Tweet]:
|
||||||
for r in self.raw('tweet'):
|
for r in self.raw('tweet'):
|
||||||
yield Tweet(r, screen_name=self.screen_name())
|
yield Tweet(r, screen_name=self.screen_name())
|
||||||
|
|
||||||
|
|
||||||
def likes(self) -> Iterator[Like]:
|
def likes(self) -> Iterable[Like]:
|
||||||
# TODO ugh. would be nice to unify Tweet/Like interface
|
# TODO ugh. would be nice to unify Tweet/Like interface
|
||||||
# however, akeout only got tweetId, full text and url
|
# however, akeout only got tweetId, full text and url
|
||||||
for r in self.raw('like'):
|
for r in self.raw('like'):
|
||||||
|
@ -162,9 +162,11 @@ class ZipExport:
|
||||||
|
|
||||||
|
|
||||||
# todo not sure about list and sorting? although can't hurt considering json is not iterative?
|
# todo not sure about list and sorting? although can't hurt considering json is not iterative?
|
||||||
def tweets() -> List[Tweet]:
|
def tweets() -> Iterable[Tweet]:
|
||||||
return list(sorted(ZipExport().tweets(), key=lambda t: t.dt))
|
for inp in inputs():
|
||||||
|
yield from sorted(ZipExport(inp).tweets(), key=lambda t: t.dt)
|
||||||
|
|
||||||
|
|
||||||
def likes() -> List[Like]:
|
def likes() -> Iterable[Like]:
|
||||||
return list(ZipExport().likes())
|
for inp in inputs():
|
||||||
|
yield from ZipExport(inp).likes()
|
||||||
|
|
|
@ -97,6 +97,15 @@ def test_implicit_glob():
|
||||||
Path('/tmp/hpi_test/456/file.zip'),
|
Path('/tmp/hpi_test/456/file.zip'),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_no_files():
|
||||||
|
'''
|
||||||
|
Test for empty matches. They work, but should result in warning
|
||||||
|
'''
|
||||||
|
assert get_files([]) == ()
|
||||||
|
assert get_files('bad*glob') == ()
|
||||||
|
|
||||||
|
|
||||||
# TODO not sure if should uniquify if the filenames end up same?
|
# TODO not sure if should uniquify if the filenames end up same?
|
||||||
# TODO not sure about the symlinks? and hidden files?
|
# TODO not sure about the symlinks? and hidden files?
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue