add more stuff to kompress; less kython deps
This commit is contained in:
parent
b852e5a3a3
commit
e3b3b12ff1
5 changed files with 59 additions and 22 deletions
|
@ -5,6 +5,8 @@ from pathlib import Path
|
||||||
|
|
||||||
import pytz
|
import pytz
|
||||||
|
|
||||||
|
from ..kython.klogging import LazyLogger
|
||||||
|
from ..kython.kompress import CPath
|
||||||
from ..common import get_files, mcachew
|
from ..common import get_files, mcachew
|
||||||
from ..error import Res
|
from ..error import Res
|
||||||
|
|
||||||
|
@ -12,9 +14,8 @@ from mycfg import paths
|
||||||
import mycfg.repos.ghexport.dal as ghexport
|
import mycfg.repos.ghexport.dal as ghexport
|
||||||
|
|
||||||
|
|
||||||
def get_logger():
|
logger = LazyLogger('my.github')
|
||||||
import logging
|
# TODO __package__???
|
||||||
return logging.getLogger('my.github') # TODO __package__???
|
|
||||||
|
|
||||||
|
|
||||||
class Event(NamedTuple):
|
class Event(NamedTuple):
|
||||||
|
@ -74,7 +75,8 @@ def _get_summary(e) -> Tuple[str, Optional[str], Optional[str]]:
|
||||||
|
|
||||||
|
|
||||||
def get_dal():
|
def get_dal():
|
||||||
sources = get_files(paths.github.export_dir, glob='*.json')
|
sources = get_files(paths.github.export_dir, glob='*.json*')
|
||||||
|
sources = list(map(CPath, sources)) # TODO maybe move it to get_files? e.g. compressed=True arg?
|
||||||
return ghexport.DAL(sources)
|
return ghexport.DAL(sources)
|
||||||
|
|
||||||
|
|
||||||
|
@ -216,7 +218,6 @@ def iter_backup_events(dal=get_dal()) -> Iterator[Event]:
|
||||||
|
|
||||||
|
|
||||||
def iter_events() -> Iterator[Res[Event]]:
|
def iter_events() -> Iterator[Res[Event]]:
|
||||||
logger = get_logger()
|
|
||||||
from itertools import chain
|
from itertools import chain
|
||||||
emitted: Set[Tuple[datetime, str]] = set()
|
emitted: Set[Tuple[datetime, str]] = set()
|
||||||
for e in chain(iter_gdpr_events(), iter_backup_events()):
|
for e in chain(iter_gdpr_events(), iter_backup_events()):
|
||||||
|
|
47
my/kython/kompress.py
Normal file
47
my/kython/kompress.py
Normal file
|
@ -0,0 +1,47 @@
|
||||||
|
"""
|
||||||
|
Various helpers for compression
|
||||||
|
"""
|
||||||
|
from pathlib import Path, PosixPath
|
||||||
|
from typing import Union
|
||||||
|
|
||||||
|
PathIsh = Union[Path, str]
|
||||||
|
|
||||||
|
|
||||||
|
def _zstd_open(path: Path):
|
||||||
|
import zstandard as zstd # type: ignore
|
||||||
|
fh = path.open('rb')
|
||||||
|
dctx = zstd.ZstdDecompressor()
|
||||||
|
reader = dctx.stream_reader(fh)
|
||||||
|
return reader
|
||||||
|
|
||||||
|
|
||||||
|
def kopen(path: PathIsh, *args, **kwargs): # TODO is it bytes stream??
|
||||||
|
pp = Path(path)
|
||||||
|
suf = pp.suffix
|
||||||
|
if suf in {'.xz'}:
|
||||||
|
import lzma
|
||||||
|
return lzma.open(pp, *args, **kwargs)
|
||||||
|
elif suf in {'.zip'}:
|
||||||
|
from zipfile import ZipFile
|
||||||
|
return ZipFile(pp).open(*args, **kwargs)
|
||||||
|
elif suf in {'.lz4'}:
|
||||||
|
import lz4.frame # type: ignore
|
||||||
|
return lz4.frame.open(str(pp))
|
||||||
|
elif suf in {'.zstd'}:
|
||||||
|
return _zstd_open(pp)
|
||||||
|
else:
|
||||||
|
return pp.open(*args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
class CPath(PosixPath):
|
||||||
|
"""
|
||||||
|
Ugh. So, can't override Path because of some _flavour thing.
|
||||||
|
Path only has _accessor and _closed slots, so can't directly set .open method
|
||||||
|
_accessor.open has to return file descriptor, doesn't work for compressed stuff.
|
||||||
|
"""
|
||||||
|
def open(self, *args, **kwargs):
|
||||||
|
# TODO assert read only?
|
||||||
|
return kopen(str(self))
|
||||||
|
|
||||||
|
|
||||||
|
open = kopen # TODO FIXME remove?
|
|
@ -1,11 +1,10 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import NamedTuple, List
|
from typing import NamedTuple, List
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
from kython.ktakeout import TakeoutHTMLParser
|
from kython.ktakeout import TakeoutHTMLParser
|
||||||
from kython.kompress import open as kopen
|
|
||||||
|
|
||||||
|
from ..kython.kompress import kopen
|
||||||
from ..takeout import get_last_takeout
|
from ..takeout import get_last_takeout
|
||||||
|
|
||||||
|
|
||||||
|
|
17
my/reddit.py
17
my/reddit.py
|
@ -1,28 +1,17 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
from pathlib import Path, PosixPath
|
from pathlib import Path
|
||||||
from typing import List, Sequence, Mapping, Iterator
|
from typing import List, Sequence, Mapping, Iterator
|
||||||
|
|
||||||
|
from .kython.kompress import CPath
|
||||||
from .common import mcachew, get_files, LazyLogger
|
from .common import mcachew, get_files, LazyLogger
|
||||||
|
|
||||||
from mycfg import paths
|
from mycfg import paths
|
||||||
import mycfg.repos.rexport.dal as rexport
|
import mycfg.repos.rexport.dal as rexport
|
||||||
|
|
||||||
|
|
||||||
# TODO Move this to kython.kompress?
|
|
||||||
class CPath(PosixPath):
|
|
||||||
"""
|
|
||||||
Ugh. So, can't override Path because of some _flavour thing.
|
|
||||||
Path only has _accessor and _closed slots, so can't directly set .open method
|
|
||||||
_accessor.open has to return file descriptor, doesn't work for compressed stuff.
|
|
||||||
"""
|
|
||||||
def open(self, *args, **kwargs):
|
|
||||||
# TODO assert read only?
|
|
||||||
from kython import kompress
|
|
||||||
return kompress.open(str(self))
|
|
||||||
|
|
||||||
|
|
||||||
def get_sources() -> Sequence[Path]:
|
def get_sources() -> Sequence[Path]:
|
||||||
# TODO use zstd?
|
# TODO use zstd?
|
||||||
|
# TODO maybe add assert to get_files? (and allow to suppress it)
|
||||||
files = get_files(paths.rexport.export_dir, glob='*.json.xz')
|
files = get_files(paths.rexport.export_dir, glob='*.json.xz')
|
||||||
res = list(map(CPath, files)); assert len(res) > 0
|
res = list(map(CPath, files)); assert len(res) > 0
|
||||||
return tuple(res)
|
return tuple(res)
|
||||||
|
|
|
@ -5,7 +5,7 @@ from .common import get_files
|
||||||
|
|
||||||
from mycfg import paths
|
from mycfg import paths
|
||||||
|
|
||||||
from kython.kompress import open as kopen
|
from .kython.kompress import kopen
|
||||||
|
|
||||||
def get_last_takeout(*, path: Optional[str]=None) -> Path:
|
def get_last_takeout(*, path: Optional[str]=None) -> Path:
|
||||||
"""
|
"""
|
||||||
|
@ -21,6 +21,7 @@ def get_last_takeout(*, path: Optional[str]=None) -> Path:
|
||||||
return takeout
|
return takeout
|
||||||
except:
|
except:
|
||||||
# TODO eh, a bit horrible, but works for now..
|
# TODO eh, a bit horrible, but works for now..
|
||||||
|
# TODO move ot kompress? 'kexists'?
|
||||||
continue
|
continue
|
||||||
raise RuntimeError(f'Not found: {path}')
|
raise RuntimeError(f'Not found: {path}')
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue