diff --git a/my/core/_deprecated/kompress.py b/my/core/_deprecated/kompress.py index e4840f6..cd1bd9d 100644 --- a/my/core/_deprecated/kompress.py +++ b/my/core/_deprecated/kompress.py @@ -257,4 +257,8 @@ class ZipPath(zipfile_Path): ) return os.stat_result(tuple(params.values())) + @property + def suffix(self) -> str: + return Path(self.parts[-1]).suffix + # fmt: on diff --git a/my/core/structure.py b/my/core/structure.py index 88b75b8..7a0c2a2 100644 --- a/my/core/structure.py +++ b/my/core/structure.py @@ -123,7 +123,8 @@ def match_structure( searchdir = Path(tempfile.mkdtemp(dir=tdir)) - zf = zipfile.ZipFile(base) + # base might already be a ZipPath, and str(base) would end with / + zf = zipfile.ZipFile(str(base).rstrip('/')) zf.extractall(path=str(searchdir)) else: diff --git a/my/google/takeout/html.py b/my/google/takeout/html.py index c01788d..5d65a86 100644 --- a/my/google/takeout/html.py +++ b/my/google/takeout/html.py @@ -146,12 +146,11 @@ class TakeoutHTMLParser(HTMLParser): def read_html(tpath: Path, file: str) -> Iterable[Parsed]: - from ...core.kompress import kopen results: List[Parsed] = [] def cb(dt: datetime, url: Url, title: Title) -> None: results.append((dt, url, title)) parser = TakeoutHTMLParser(callback=cb) - with kopen(tpath, file) as fo: + with (tpath / file).open() as fo: data = fo.read() parser.feed(data) return results diff --git a/my/google/takeout/parser.py b/my/google/takeout/parser.py index 9a90c8f..96acfff 100644 --- a/my/google/takeout/parser.py +++ b/my/google/takeout/parser.py @@ -94,10 +94,9 @@ def events(disable_takeout_cache: bool = DISABLE_TAKEOUT_CACHE) -> CacheResults: for path in reversed(inputs()): with ExitStack() as exit_stack: if config._use_zippath: - from my.core.kompress import ZipPath # for later takeouts it's just 'Takeout' dir, # but for older (pre 2015) it contains email/date in the subdir name - results = tuple(cast(Sequence[Path], ZipPath(path).iterdir())) + results = tuple(cast(Sequence[Path], path.iterdir())) else: results = exit_stack.enter_context(match_structure(path, expected=EXPECTED, partial=True)) for m in results: diff --git a/my/google/takeout/paths.py b/my/google/takeout/paths.py index ee3e1e7..5b53149 100644 --- a/my/google/takeout/paths.py +++ b/my/google/takeout/paths.py @@ -23,8 +23,6 @@ config = make_config(google) from pathlib import Path from typing import Optional, Iterable -from ...core.kompress import kexists - def get_takeouts(*, path: Optional[str]=None) -> Iterable[Path]: """ @@ -33,7 +31,7 @@ def get_takeouts(*, path: Optional[str]=None) -> Iterable[Path]: # TODO FIXME zip is not great.. # allow a lambda expression? that way the user could restrict it for takeout in get_files(config.takeout_path, glob='*.zip'): - if path is None or kexists(takeout, path): + if path is None or (takeout / path).exists(): yield takeout diff --git a/my/instagram/gdpr.py b/my/instagram/gdpr.py index 348d69d..afa4c96 100644 --- a/my/instagram/gdpr.py +++ b/my/instagram/gdpr.py @@ -17,7 +17,6 @@ from my.core import ( assert_never, make_logger, ) -from my.core.kompress import ZipPath from my.config import instagram as user_config @@ -70,7 +69,7 @@ def _decode(s: str) -> str: def _entities() -> Iterator[Res[Union[User, _Message]]]: - last = ZipPath(max(inputs())) + last = max(inputs()) # TODO make sure it works both with plan directory # idelaly get_files should return the right thing, and we won't have to force ZipPath/match_structure here # e.g. possible options are: diff --git a/my/location/google.py b/my/location/google.py index fdddd92..ed37231 100644 --- a/my/location/google.py +++ b/my/location/google.py @@ -21,7 +21,6 @@ import geopy # type: ignore from ..core.common import LazyLogger, mcachew from ..core.cachew import cache_dir -from ..core import kompress from my.core.warnings import high @@ -135,7 +134,7 @@ def _iter_locations(path: Path, start=0, stop=None) -> Iterable[Location]: ctx = path.open('r') else: # must be a takeout archive # todo CPath? although not sure if it can be iterative? - ctx = kompress.open(path, _LOCATION_JSON) + ctx = (path / _LOCATION_JSON).open() if USE_GREP: unzip = f'unzip -p "{path}" "{_LOCATION_JSON}"' diff --git a/my/stackexchange/gdpr.py b/my/stackexchange/gdpr.py index 4a3182b..18b2b4d 100644 --- a/my/stackexchange/gdpr.py +++ b/my/stackexchange/gdpr.py @@ -6,7 +6,7 @@ Stackexchange data (uses [[https://stackoverflow.com/legal/gdpr/request][officia ### config from my.config import stackexchange as user_config -from ..core import dataclass, PathIsh, make_config +from ..core import dataclass, PathIsh, make_config, get_files @dataclass class stackexchange(user_config): gdpr_path: PathIsh # path to GDPR zip file @@ -61,12 +61,11 @@ class Vote(NamedTuple): # todo expose vote type? import json -from ..core.kompress import ZipPath from ..core.error import Res def votes() -> Iterable[Res[Vote]]: # TODO there is also some site specific stuff in qa/ directory.. not sure if its' more detailed # todo should be defensive? not sure if present when user has no votes - path = ZipPath(config.gdpr_path) + path = max(get_files(config.gdpr_path)) votes_path = path / 'analytics' / 'qa\\vote.submit.json' # yes, it does contain a backslash... j = json.loads(votes_path.read_text(encoding='utf-8-sig')) # not sure why, but this encoding seems necessary for r in reversed(j): # they seem to be in decreasing order by default diff --git a/my/twitter/archive.py b/my/twitter/archive.py index 44ebc5f..22014df 100644 --- a/my/twitter/archive.py +++ b/my/twitter/archive.py @@ -26,7 +26,6 @@ from functools import cached_property import html from ..core.common import Paths, datetime_aware from ..core.error import Res -from ..core.kompress import ZipPath @dataclass class twitter_archive(user_config): @@ -164,9 +163,7 @@ class Like(NamedTuple): class ZipExport: def __init__(self, archive_path: Path) -> None: - # todo maybe this should be insude get_files instead, perhps covered with a flag? - self.zpath = ZipPath(archive_path) - + self.zpath = archive_path if (self.zpath / 'tweets.csv').exists(): from ..core.warnings import high high("NOTE: CSV format (pre ~Aug 2018) isn't supported yet, this is likely not going to work.")