From 15789a4149994c73fc307ae81e8d367ebfaf6f57 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Thu, 29 Oct 2020 01:35:43 +0000 Subject: [PATCH] kyhton.kompress: move to core (with a fallback, used in promnesia) --- my/core/common.py | 2 +- my/core/kompress.py | 94 +++++++++++++++++++++++++++++++++++++ my/google/takeout/html.py | 2 +- my/google/takeout/paths.py | 2 +- my/kython/README | 2 +- my/kython/kompress.py | 95 +------------------------------------- my/location/google.py | 2 +- my/twitter/archive.py | 2 +- tests/misc.py | 2 +- 9 files changed, 102 insertions(+), 101 deletions(-) create mode 100644 my/core/kompress.py mode change 100644 => 120000 my/kython/kompress.py diff --git a/my/core/common.py b/my/core/common.py index 09629bd..aa28056 100644 --- a/my/core/common.py +++ b/my/core/common.py @@ -183,7 +183,7 @@ def get_files( traceback.print_stack() if guess_compression: - from ..kython.kompress import CPath # todo move to core? + from .kompress import CPath paths = [CPath(p) if _is_compressed(p) else p for p in paths] return tuple(paths) diff --git a/my/core/kompress.py b/my/core/kompress.py new file mode 100644 index 0000000..4fa2840 --- /dev/null +++ b/my/core/kompress.py @@ -0,0 +1,94 @@ +""" +Various helpers for compression +""" +import pathlib +from pathlib import Path +from typing import Union, IO +import io + +PathIsh = Union[Path, str] + + +def _zstd_open(path: Path, *args, **kwargs) -> IO[str]: + import zstandard as zstd # type: ignore + fh = path.open('rb') + dctx = zstd.ZstdDecompressor() + reader = dctx.stream_reader(fh) + return io.TextIOWrapper(reader, **kwargs) # meh + + +# TODO returns protocol that we can call 'read' against? +# TODO use the 'dependent type' trick? +def kopen(path: PathIsh, *args, mode: str='rt', **kwargs) -> IO[str]: + # TODO handle mode in *rags? + encoding = kwargs.get('encoding', 'utf8') + kwargs['encoding'] = encoding + + pp = Path(path) + suf = pp.suffix + if suf in {'.xz'}: + import lzma + r = lzma.open(pp, mode, *args, **kwargs) + # should only happen for binary mode? + # file:///usr/share/doc/python3/html/library/lzma.html?highlight=lzma#lzma.open + assert not isinstance(r, lzma.LZMAFile), r + return r + elif suf in {'.zip'}: + # eh. this behaviour is a bit dodgy... + from zipfile import ZipFile + zfile = ZipFile(pp) + + [subpath] = args # meh? + + ## oh god... https://stackoverflow.com/a/5639960/706389 + ifile = zfile.open(subpath, mode='r') + ifile.readable = lambda: True # type: ignore + ifile.writable = lambda: False # type: ignore + ifile.seekable = lambda: False # type: ignore + ifile.read1 = ifile.read # type: ignore + # TODO pass all kwargs here?? + # todo 'expected "BinaryIO"'?? + return io.TextIOWrapper(ifile, encoding=encoding) # type: ignore[arg-type] + elif suf in {'.lz4'}: + import lz4.frame # type: ignore + return lz4.frame.open(str(pp), mode, *args, **kwargs) + elif suf in {'.zstd'}: + return _zstd_open(pp, mode, *args, **kwargs) + else: + return pp.open(mode, *args, **kwargs) + + +import typing +import os + +if typing.TYPE_CHECKING: + # otherwise mypy can't figure out that BasePath is a type alias.. + BasePath = pathlib.Path +else: + BasePath = pathlib.WindowsPath if os.name == 'nt' else pathlib.PosixPath + + +class CPath(BasePath): + """ + Hacky way to support compressed files. + If you can think of a better way to do this, please let me know! https://github.com/karlicoss/HPI/issues/20 + + Ugh. So, can't override Path because of some _flavour thing. + Path only has _accessor and _closed slots, so can't directly set .open method + _accessor.open has to return file descriptor, doesn't work for compressed stuff. + """ + def open(self, *args, **kwargs): + # TODO assert read only? + return kopen(str(self)) + + +open = kopen # TODO deprecate + + +# meh +def kexists(path: PathIsh, subpath: str) -> bool: + try: + kopen(path, subpath) + return True + except Exception: + return False diff --git a/my/google/takeout/html.py b/my/google/takeout/html.py index ddcdd59..a50fb3f 100644 --- a/my/google/takeout/html.py +++ b/my/google/takeout/html.py @@ -130,7 +130,7 @@ class TakeoutHTMLParser(HTMLParser): def read_html(tpath: Path, file: str) -> Iterable[Parsed]: - from ...kython.kompress import kopen + from ...core.kompress import kopen results: List[Parsed] = [] def cb(dt: datetime, url: Url, title: Title) -> None: results.append((dt, url, title)) diff --git a/my/google/takeout/paths.py b/my/google/takeout/paths.py index 36b3e0c..6c42635 100644 --- a/my/google/takeout/paths.py +++ b/my/google/takeout/paths.py @@ -23,7 +23,7 @@ config = make_config(google) from pathlib import Path from typing import Optional, Iterable -from ...kython.kompress import kopen, kexists +from ...core.kompress import kopen, kexists def get_takeouts(*, path: Optional[str]=None) -> Iterable[Path]: diff --git a/my/kython/README b/my/kython/README index 1e7d97e..21c8083 100644 --- a/my/kython/README +++ b/my/kython/README @@ -1 +1 @@ -vendorized kython (https://github.com/karlicoss/kython) stuff +deprecated, please use my.core directly diff --git a/my/kython/kompress.py b/my/kython/kompress.py deleted file mode 100644 index 4fa2840..0000000 --- a/my/kython/kompress.py +++ /dev/null @@ -1,94 +0,0 @@ -""" -Various helpers for compression -""" -import pathlib -from pathlib import Path -from typing import Union, IO -import io - -PathIsh = Union[Path, str] - - -def _zstd_open(path: Path, *args, **kwargs) -> IO[str]: - import zstandard as zstd # type: ignore - fh = path.open('rb') - dctx = zstd.ZstdDecompressor() - reader = dctx.stream_reader(fh) - return io.TextIOWrapper(reader, **kwargs) # meh - - -# TODO returns protocol that we can call 'read' against? -# TODO use the 'dependent type' trick? -def kopen(path: PathIsh, *args, mode: str='rt', **kwargs) -> IO[str]: - # TODO handle mode in *rags? - encoding = kwargs.get('encoding', 'utf8') - kwargs['encoding'] = encoding - - pp = Path(path) - suf = pp.suffix - if suf in {'.xz'}: - import lzma - r = lzma.open(pp, mode, *args, **kwargs) - # should only happen for binary mode? - # file:///usr/share/doc/python3/html/library/lzma.html?highlight=lzma#lzma.open - assert not isinstance(r, lzma.LZMAFile), r - return r - elif suf in {'.zip'}: - # eh. this behaviour is a bit dodgy... - from zipfile import ZipFile - zfile = ZipFile(pp) - - [subpath] = args # meh? - - ## oh god... https://stackoverflow.com/a/5639960/706389 - ifile = zfile.open(subpath, mode='r') - ifile.readable = lambda: True # type: ignore - ifile.writable = lambda: False # type: ignore - ifile.seekable = lambda: False # type: ignore - ifile.read1 = ifile.read # type: ignore - # TODO pass all kwargs here?? - # todo 'expected "BinaryIO"'?? - return io.TextIOWrapper(ifile, encoding=encoding) # type: ignore[arg-type] - elif suf in {'.lz4'}: - import lz4.frame # type: ignore - return lz4.frame.open(str(pp), mode, *args, **kwargs) - elif suf in {'.zstd'}: - return _zstd_open(pp, mode, *args, **kwargs) - else: - return pp.open(mode, *args, **kwargs) - - -import typing -import os - -if typing.TYPE_CHECKING: - # otherwise mypy can't figure out that BasePath is a type alias.. - BasePath = pathlib.Path -else: - BasePath = pathlib.WindowsPath if os.name == 'nt' else pathlib.PosixPath - - -class CPath(BasePath): - """ - Hacky way to support compressed files. - If you can think of a better way to do this, please let me know! https://github.com/karlicoss/HPI/issues/20 - - Ugh. So, can't override Path because of some _flavour thing. - Path only has _accessor and _closed slots, so can't directly set .open method - _accessor.open has to return file descriptor, doesn't work for compressed stuff. - """ - def open(self, *args, **kwargs): - # TODO assert read only? - return kopen(str(self)) - - -open = kopen # TODO deprecate - - -# meh -def kexists(path: PathIsh, subpath: str) -> bool: - try: - kopen(path, subpath) - return True - except Exception: - return False diff --git a/my/kython/kompress.py b/my/kython/kompress.py new file mode 120000 index 0000000..59edcd1 --- /dev/null +++ b/my/kython/kompress.py @@ -0,0 +1 @@ +../core/kompress.py \ No newline at end of file diff --git a/my/location/google.py b/my/location/google.py index b8cfcfc..96bdf5e 100644 --- a/my/location/google.py +++ b/my/location/google.py @@ -18,7 +18,7 @@ import geopy # type: ignore from ..core.common import LazyLogger, mcachew from ..core.cachew import cache_dir -from ..kython import kompress +from ..core import kompress # otherwise uses ijson diff --git a/my/twitter/archive.py b/my/twitter/archive.py index 8fa93db..edd26d2 100755 --- a/my/twitter/archive.py +++ b/my/twitter/archive.py @@ -40,7 +40,7 @@ import zipfile import pytz from ..common import PathIsh, get_files, LazyLogger, Json -from ..kython import kompress +from ..core import kompress diff --git a/tests/misc.py b/tests/misc.py index 869c276..2e83f28 100644 --- a/tests/misc.py +++ b/tests/misc.py @@ -5,7 +5,7 @@ import lzma import io import zipfile -from my.kython.kompress import kopen, kexists, CPath +from my.core.kompress import kopen, kexists, CPath def test_kopen(tmp_path: Path) -> None: "Plaintext handled transparently"