add more stuff to kompress; less kython deps
This commit is contained in:
parent
b852e5a3a3
commit
e3b3b12ff1
5 changed files with 59 additions and 22 deletions
|
@ -5,6 +5,8 @@ from pathlib import Path
|
|||
|
||||
import pytz
|
||||
|
||||
from ..kython.klogging import LazyLogger
|
||||
from ..kython.kompress import CPath
|
||||
from ..common import get_files, mcachew
|
||||
from ..error import Res
|
||||
|
||||
|
@ -12,9 +14,8 @@ from mycfg import paths
|
|||
import mycfg.repos.ghexport.dal as ghexport
|
||||
|
||||
|
||||
def get_logger():
|
||||
import logging
|
||||
return logging.getLogger('my.github') # TODO __package__???
|
||||
logger = LazyLogger('my.github')
|
||||
# TODO __package__???
|
||||
|
||||
|
||||
class Event(NamedTuple):
|
||||
|
@ -74,7 +75,8 @@ def _get_summary(e) -> Tuple[str, Optional[str], Optional[str]]:
|
|||
|
||||
|
||||
def get_dal():
|
||||
sources = get_files(paths.github.export_dir, glob='*.json')
|
||||
sources = get_files(paths.github.export_dir, glob='*.json*')
|
||||
sources = list(map(CPath, sources)) # TODO maybe move it to get_files? e.g. compressed=True arg?
|
||||
return ghexport.DAL(sources)
|
||||
|
||||
|
||||
|
@ -216,7 +218,6 @@ def iter_backup_events(dal=get_dal()) -> Iterator[Event]:
|
|||
|
||||
|
||||
def iter_events() -> Iterator[Res[Event]]:
|
||||
logger = get_logger()
|
||||
from itertools import chain
|
||||
emitted: Set[Tuple[datetime, str]] = set()
|
||||
for e in chain(iter_gdpr_events(), iter_backup_events()):
|
||||
|
|
47
my/kython/kompress.py
Normal file
47
my/kython/kompress.py
Normal file
|
@ -0,0 +1,47 @@
|
|||
"""
|
||||
Various helpers for compression
|
||||
"""
|
||||
from pathlib import Path, PosixPath
|
||||
from typing import Union
|
||||
|
||||
PathIsh = Union[Path, str]
|
||||
|
||||
|
||||
def _zstd_open(path: Path):
|
||||
import zstandard as zstd # type: ignore
|
||||
fh = path.open('rb')
|
||||
dctx = zstd.ZstdDecompressor()
|
||||
reader = dctx.stream_reader(fh)
|
||||
return reader
|
||||
|
||||
|
||||
def kopen(path: PathIsh, *args, **kwargs): # TODO is it bytes stream??
|
||||
pp = Path(path)
|
||||
suf = pp.suffix
|
||||
if suf in {'.xz'}:
|
||||
import lzma
|
||||
return lzma.open(pp, *args, **kwargs)
|
||||
elif suf in {'.zip'}:
|
||||
from zipfile import ZipFile
|
||||
return ZipFile(pp).open(*args, **kwargs)
|
||||
elif suf in {'.lz4'}:
|
||||
import lz4.frame # type: ignore
|
||||
return lz4.frame.open(str(pp))
|
||||
elif suf in {'.zstd'}:
|
||||
return _zstd_open(pp)
|
||||
else:
|
||||
return pp.open(*args, **kwargs)
|
||||
|
||||
|
||||
class CPath(PosixPath):
|
||||
"""
|
||||
Ugh. So, can't override Path because of some _flavour thing.
|
||||
Path only has _accessor and _closed slots, so can't directly set .open method
|
||||
_accessor.open has to return file descriptor, doesn't work for compressed stuff.
|
||||
"""
|
||||
def open(self, *args, **kwargs):
|
||||
# TODO assert read only?
|
||||
return kopen(str(self))
|
||||
|
||||
|
||||
open = kopen # TODO FIXME remove?
|
|
@ -1,11 +1,10 @@
|
|||
#!/usr/bin/env python3
|
||||
from datetime import datetime
|
||||
from typing import NamedTuple, List
|
||||
from pathlib import Path
|
||||
|
||||
from kython.ktakeout import TakeoutHTMLParser
|
||||
from kython.kompress import open as kopen
|
||||
|
||||
from ..kython.kompress import kopen
|
||||
from ..takeout import get_last_takeout
|
||||
|
||||
|
||||
|
|
17
my/reddit.py
17
my/reddit.py
|
@ -1,28 +1,17 @@
|
|||
#!/usr/bin/env python3
|
||||
from pathlib import Path, PosixPath
|
||||
from pathlib import Path
|
||||
from typing import List, Sequence, Mapping, Iterator
|
||||
|
||||
from .kython.kompress import CPath
|
||||
from .common import mcachew, get_files, LazyLogger
|
||||
|
||||
from mycfg import paths
|
||||
import mycfg.repos.rexport.dal as rexport
|
||||
|
||||
|
||||
# TODO Move this to kython.kompress?
|
||||
class CPath(PosixPath):
|
||||
"""
|
||||
Ugh. So, can't override Path because of some _flavour thing.
|
||||
Path only has _accessor and _closed slots, so can't directly set .open method
|
||||
_accessor.open has to return file descriptor, doesn't work for compressed stuff.
|
||||
"""
|
||||
def open(self, *args, **kwargs):
|
||||
# TODO assert read only?
|
||||
from kython import kompress
|
||||
return kompress.open(str(self))
|
||||
|
||||
|
||||
def get_sources() -> Sequence[Path]:
|
||||
# TODO use zstd?
|
||||
# TODO maybe add assert to get_files? (and allow to suppress it)
|
||||
files = get_files(paths.rexport.export_dir, glob='*.json.xz')
|
||||
res = list(map(CPath, files)); assert len(res) > 0
|
||||
return tuple(res)
|
||||
|
|
|
@ -5,7 +5,7 @@ from .common import get_files
|
|||
|
||||
from mycfg import paths
|
||||
|
||||
from kython.kompress import open as kopen
|
||||
from .kython.kompress import kopen
|
||||
|
||||
def get_last_takeout(*, path: Optional[str]=None) -> Path:
|
||||
"""
|
||||
|
@ -21,6 +21,7 @@ def get_last_takeout(*, path: Optional[str]=None) -> Path:
|
|||
return takeout
|
||||
except:
|
||||
# TODO eh, a bit horrible, but works for now..
|
||||
# TODO move ot kompress? 'kexists'?
|
||||
continue
|
||||
raise RuntimeError(f'Not found: {path}')
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue