core.kompress: proper support for read_text/read_bytes against zstd/xz archives
This commit is contained in:
parent
b94904f5ee
commit
4dfc4029c3
1 changed files with 36 additions and 14 deletions
|
@ -28,30 +28,44 @@ def is_compressed(p: Path) -> bool:
|
||||||
return any(p.name.endswith(ext) for ext in {Ext.xz, Ext.zip, Ext.lz4, Ext.zstd, Ext.zst, Ext.targz})
|
return any(p.name.endswith(ext) for ext in {Ext.xz, Ext.zip, Ext.lz4, Ext.zstd, Ext.zst, Ext.targz})
|
||||||
|
|
||||||
|
|
||||||
def _zstd_open(path: Path, *args, **kwargs) -> IO[str]:
|
def _zstd_open(path: Path, *args, **kwargs) -> IO:
|
||||||
import zstandard as zstd # type: ignore
|
import zstandard as zstd # type: ignore
|
||||||
fh = path.open('rb')
|
fh = path.open('rb')
|
||||||
dctx = zstd.ZstdDecompressor()
|
dctx = zstd.ZstdDecompressor()
|
||||||
reader = dctx.stream_reader(fh)
|
reader = dctx.stream_reader(fh)
|
||||||
return io.TextIOWrapper(reader, **kwargs) # meh
|
|
||||||
|
mode = kwargs.get('mode', 'rt')
|
||||||
|
if mode == 'rb':
|
||||||
|
return reader
|
||||||
|
else:
|
||||||
|
# must be text mode
|
||||||
|
kwargs.pop('mode') # TextIOWrapper doesn't like it
|
||||||
|
return io.TextIOWrapper(reader, **kwargs) # meh
|
||||||
|
|
||||||
|
|
||||||
# TODO returns protocol that we can call 'read' against?
|
# TODO use the 'dependent type' trick for return type?
|
||||||
# TODO use the 'dependent type' trick?
|
def kopen(path: PathIsh, *args, mode: str='rt', **kwargs) -> IO:
|
||||||
def kopen(path: PathIsh, *args, mode: str='rt', **kwargs) -> IO[str]:
|
# just in case, but I think this shouldn't be necessary anymore
|
||||||
# TODO handle mode in *rags?
|
# since when we cann .read_text, encoding is passed already
|
||||||
encoding = kwargs.get('encoding', 'utf8')
|
if mode in {'r', 'rt'}:
|
||||||
|
encoding = kwargs.get('encoding', 'utf8')
|
||||||
|
else:
|
||||||
|
encoding = None
|
||||||
kwargs['encoding'] = encoding
|
kwargs['encoding'] = encoding
|
||||||
|
|
||||||
pp = Path(path)
|
pp = Path(path)
|
||||||
name = pp.name
|
name = pp.name
|
||||||
if name.endswith(Ext.xz):
|
if name.endswith(Ext.xz):
|
||||||
import lzma
|
import lzma
|
||||||
r = lzma.open(pp, mode, *args, **kwargs)
|
|
||||||
# should only happen for binary mode?
|
# ugh. for lzma, 'r' means 'rb'
|
||||||
# file:///usr/share/doc/python3/html/library/lzma.html?highlight=lzma#lzma.open
|
# https://github.com/python/cpython/blob/d01cf5072be5511595b6d0c35ace6c1b07716f8d/Lib/lzma.py#L97
|
||||||
assert not isinstance(r, lzma.LZMAFile), r
|
# whereas for regular open, 'r' means 'rt'
|
||||||
return r
|
# https://docs.python.org/3/library/functions.html#open
|
||||||
|
if mode == 'r':
|
||||||
|
mode = 'rt'
|
||||||
|
kwargs['mode'] = mode
|
||||||
|
return lzma.open(pp, *args, **kwargs)
|
||||||
elif name.endswith(Ext.zip):
|
elif name.endswith(Ext.zip):
|
||||||
# eh. this behaviour is a bit dodgy...
|
# eh. this behaviour is a bit dodgy...
|
||||||
from zipfile import ZipFile
|
from zipfile import ZipFile
|
||||||
|
@ -72,7 +86,8 @@ def kopen(path: PathIsh, *args, mode: str='rt', **kwargs) -> IO[str]:
|
||||||
import lz4.frame # type: ignore
|
import lz4.frame # type: ignore
|
||||||
return lz4.frame.open(str(pp), mode, *args, **kwargs)
|
return lz4.frame.open(str(pp), mode, *args, **kwargs)
|
||||||
elif name.endswith(Ext.zstd) or name.endswith(Ext.zst):
|
elif name.endswith(Ext.zstd) or name.endswith(Ext.zst):
|
||||||
return _zstd_open(pp, mode, *args, **kwargs)
|
kwargs['mode'] = mode
|
||||||
|
return _zstd_open(pp, *args, **kwargs)
|
||||||
elif name.endswith(Ext.targz):
|
elif name.endswith(Ext.targz):
|
||||||
import tarfile
|
import tarfile
|
||||||
# FIXME pass mode?
|
# FIXME pass mode?
|
||||||
|
@ -104,8 +119,15 @@ class CPath(BasePath):
|
||||||
_accessor.open has to return file descriptor, doesn't work for compressed stuff.
|
_accessor.open has to return file descriptor, doesn't work for compressed stuff.
|
||||||
"""
|
"""
|
||||||
def open(self, *args, **kwargs):
|
def open(self, *args, **kwargs):
|
||||||
|
kopen_kwargs = {}
|
||||||
|
mode = kwargs.get('mode')
|
||||||
|
if mode is not None:
|
||||||
|
kopen_kwargs['mode'] = mode
|
||||||
|
encoding = kwargs.get('encoding')
|
||||||
|
if encoding is not None:
|
||||||
|
kopen_kwargs['encoding'] = encoding
|
||||||
# TODO assert read only?
|
# TODO assert read only?
|
||||||
return kopen(str(self))
|
return kopen(str(self), **kopen_kwargs)
|
||||||
|
|
||||||
|
|
||||||
open = kopen # TODO deprecate
|
open = kopen # TODO deprecate
|
||||||
|
|
Loading…
Add table
Reference in a new issue