diff --git a/my/coding/github.py b/my/coding/github.py index f188475..6bc9bc7 100644 --- a/my/coding/github.py +++ b/my/coding/github.py @@ -5,6 +5,8 @@ from pathlib import Path import pytz +from ..kython.klogging import LazyLogger +from ..kython.kompress import CPath from ..common import get_files, mcachew from ..error import Res @@ -12,9 +14,8 @@ from mycfg import paths import mycfg.repos.ghexport.dal as ghexport -def get_logger(): - import logging - return logging.getLogger('my.github') # TODO __package__??? +logger = LazyLogger('my.github') +# TODO __package__??? class Event(NamedTuple): @@ -74,7 +75,8 @@ def _get_summary(e) -> Tuple[str, Optional[str], Optional[str]]: def get_dal(): - sources = get_files(paths.github.export_dir, glob='*.json') + sources = get_files(paths.github.export_dir, glob='*.json*') + sources = list(map(CPath, sources)) # TODO maybe move it to get_files? e.g. compressed=True arg? return ghexport.DAL(sources) @@ -216,7 +218,6 @@ def iter_backup_events(dal=get_dal()) -> Iterator[Event]: def iter_events() -> Iterator[Res[Event]]: - logger = get_logger() from itertools import chain emitted: Set[Tuple[datetime, str]] = set() for e in chain(iter_gdpr_events(), iter_backup_events()): diff --git a/my/kython/kompress.py b/my/kython/kompress.py new file mode 100644 index 0000000..1dafc5c --- /dev/null +++ b/my/kython/kompress.py @@ -0,0 +1,47 @@ +""" +Various helpers for compression +""" +from pathlib import Path, PosixPath +from typing import Union + +PathIsh = Union[Path, str] + + +def _zstd_open(path: Path): + import zstandard as zstd # type: ignore + fh = path.open('rb') + dctx = zstd.ZstdDecompressor() + reader = dctx.stream_reader(fh) + return reader + + +def kopen(path: PathIsh, *args, **kwargs): # TODO is it bytes stream?? + pp = Path(path) + suf = pp.suffix + if suf in {'.xz'}: + import lzma + return lzma.open(pp, *args, **kwargs) + elif suf in {'.zip'}: + from zipfile import ZipFile + return ZipFile(pp).open(*args, **kwargs) + elif suf in {'.lz4'}: + import lz4.frame # type: ignore + return lz4.frame.open(str(pp)) + elif suf in {'.zstd'}: + return _zstd_open(pp) + else: + return pp.open(*args, **kwargs) + + +class CPath(PosixPath): + """ + Ugh. So, can't override Path because of some _flavour thing. + Path only has _accessor and _closed slots, so can't directly set .open method + _accessor.open has to return file descriptor, doesn't work for compressed stuff. + """ + def open(self, *args, **kwargs): + # TODO assert read only? + return kopen(str(self)) + + +open = kopen # TODO FIXME remove? diff --git a/my/media/youtube.py b/my/media/youtube.py index ba7385f..9b17ce2 100755 --- a/my/media/youtube.py +++ b/my/media/youtube.py @@ -1,11 +1,10 @@ #!/usr/bin/env python3 from datetime import datetime from typing import NamedTuple, List -from pathlib import Path from kython.ktakeout import TakeoutHTMLParser -from kython.kompress import open as kopen +from ..kython.kompress import kopen from ..takeout import get_last_takeout diff --git a/my/reddit.py b/my/reddit.py index 37451c3..692aaa2 100755 --- a/my/reddit.py +++ b/my/reddit.py @@ -1,28 +1,17 @@ #!/usr/bin/env python3 -from pathlib import Path, PosixPath +from pathlib import Path from typing import List, Sequence, Mapping, Iterator +from .kython.kompress import CPath from .common import mcachew, get_files, LazyLogger from mycfg import paths import mycfg.repos.rexport.dal as rexport -# TODO Move this to kython.kompress? -class CPath(PosixPath): - """ - Ugh. So, can't override Path because of some _flavour thing. - Path only has _accessor and _closed slots, so can't directly set .open method - _accessor.open has to return file descriptor, doesn't work for compressed stuff. - """ - def open(self, *args, **kwargs): - # TODO assert read only? - from kython import kompress - return kompress.open(str(self)) - - def get_sources() -> Sequence[Path]: # TODO use zstd? + # TODO maybe add assert to get_files? (and allow to suppress it) files = get_files(paths.rexport.export_dir, glob='*.json.xz') res = list(map(CPath, files)); assert len(res) > 0 return tuple(res) diff --git a/my/takeout.py b/my/takeout.py index 03378e2..53129fc 100644 --- a/my/takeout.py +++ b/my/takeout.py @@ -5,7 +5,7 @@ from .common import get_files from mycfg import paths -from kython.kompress import open as kopen +from .kython.kompress import kopen def get_last_takeout(*, path: Optional[str]=None) -> Path: """ @@ -21,6 +21,7 @@ def get_last_takeout(*, path: Optional[str]=None) -> Path: return takeout except: # TODO eh, a bit horrible, but works for now.. + # TODO move ot kompress? 'kexists'? continue raise RuntimeError(f'Not found: {path}')