add more stuff to kompress; less kython deps

This commit is contained in:
Dima Gerasimov 2020-02-01 15:17:13 +00:00
parent b852e5a3a3
commit e3b3b12ff1
5 changed files with 59 additions and 22 deletions

View file

@ -5,6 +5,8 @@ from pathlib import Path
import pytz
from ..kython.klogging import LazyLogger
from ..kython.kompress import CPath
from ..common import get_files, mcachew
from ..error import Res
@ -12,9 +14,8 @@ from mycfg import paths
import mycfg.repos.ghexport.dal as ghexport
def get_logger():
import logging
return logging.getLogger('my.github') # TODO __package__???
logger = LazyLogger('my.github')
# TODO __package__???
class Event(NamedTuple):
@ -74,7 +75,8 @@ def _get_summary(e) -> Tuple[str, Optional[str], Optional[str]]:
def get_dal():
sources = get_files(paths.github.export_dir, glob='*.json')
sources = get_files(paths.github.export_dir, glob='*.json*')
sources = list(map(CPath, sources)) # TODO maybe move it to get_files? e.g. compressed=True arg?
return ghexport.DAL(sources)
@ -216,7 +218,6 @@ def iter_backup_events(dal=get_dal()) -> Iterator[Event]:
def iter_events() -> Iterator[Res[Event]]:
logger = get_logger()
from itertools import chain
emitted: Set[Tuple[datetime, str]] = set()
for e in chain(iter_gdpr_events(), iter_backup_events()):

47
my/kython/kompress.py Normal file
View file

@ -0,0 +1,47 @@
"""
Various helpers for compression
"""
from pathlib import Path, PosixPath
from typing import Union
PathIsh = Union[Path, str]
def _zstd_open(path: Path):
import zstandard as zstd # type: ignore
fh = path.open('rb')
dctx = zstd.ZstdDecompressor()
reader = dctx.stream_reader(fh)
return reader
def kopen(path: PathIsh, *args, **kwargs): # TODO is it bytes stream??
pp = Path(path)
suf = pp.suffix
if suf in {'.xz'}:
import lzma
return lzma.open(pp, *args, **kwargs)
elif suf in {'.zip'}:
from zipfile import ZipFile
return ZipFile(pp).open(*args, **kwargs)
elif suf in {'.lz4'}:
import lz4.frame # type: ignore
return lz4.frame.open(str(pp))
elif suf in {'.zstd'}:
return _zstd_open(pp)
else:
return pp.open(*args, **kwargs)
class CPath(PosixPath):
"""
Ugh. So, can't override Path because of some _flavour thing.
Path only has _accessor and _closed slots, so can't directly set .open method
_accessor.open has to return file descriptor, doesn't work for compressed stuff.
"""
def open(self, *args, **kwargs):
# TODO assert read only?
return kopen(str(self))
open = kopen # TODO FIXME remove?

View file

@ -1,11 +1,10 @@
#!/usr/bin/env python3
from datetime import datetime
from typing import NamedTuple, List
from pathlib import Path
from kython.ktakeout import TakeoutHTMLParser
from kython.kompress import open as kopen
from ..kython.kompress import kopen
from ..takeout import get_last_takeout

View file

@ -1,28 +1,17 @@
#!/usr/bin/env python3
from pathlib import Path, PosixPath
from pathlib import Path
from typing import List, Sequence, Mapping, Iterator
from .kython.kompress import CPath
from .common import mcachew, get_files, LazyLogger
from mycfg import paths
import mycfg.repos.rexport.dal as rexport
# TODO Move this to kython.kompress?
class CPath(PosixPath):
"""
Ugh. So, can't override Path because of some _flavour thing.
Path only has _accessor and _closed slots, so can't directly set .open method
_accessor.open has to return file descriptor, doesn't work for compressed stuff.
"""
def open(self, *args, **kwargs):
# TODO assert read only?
from kython import kompress
return kompress.open(str(self))
def get_sources() -> Sequence[Path]:
# TODO use zstd?
# TODO maybe add assert to get_files? (and allow to suppress it)
files = get_files(paths.rexport.export_dir, glob='*.json.xz')
res = list(map(CPath, files)); assert len(res) > 0
return tuple(res)

View file

@ -5,7 +5,7 @@ from .common import get_files
from mycfg import paths
from kython.kompress import open as kopen
from .kython.kompress import kopen
def get_last_takeout(*, path: Optional[str]=None) -> Path:
"""
@ -21,6 +21,7 @@ def get_last_takeout(*, path: Optional[str]=None) -> Path:
return takeout
except:
# TODO eh, a bit horrible, but works for now..
# TODO move ot kompress? 'kexists'?
continue
raise RuntimeError(f'Not found: {path}')