core: better support for compressed stuff, add .tar.gz
This commit is contained in:
parent
7493770d4d
commit
73c9e46c4c
2 changed files with 28 additions and 13 deletions
|
@ -163,12 +163,6 @@ from .logging import setup_logger, LazyLogger
|
||||||
Paths = Union[Sequence[PathIsh], PathIsh]
|
Paths = Union[Sequence[PathIsh], PathIsh]
|
||||||
|
|
||||||
|
|
||||||
def _is_compressed(p: Path) -> bool:
|
|
||||||
# todo kinda lame way for now.. use mime ideally?
|
|
||||||
# should cooperate with kompress.kopen?
|
|
||||||
return p.suffix in {'.xz', '.lz4', '.zstd'}
|
|
||||||
|
|
||||||
|
|
||||||
DEFAULT_GLOB = '*'
|
DEFAULT_GLOB = '*'
|
||||||
def get_files(
|
def get_files(
|
||||||
pp: Paths,
|
pp: Paths,
|
||||||
|
@ -233,8 +227,8 @@ def get_files(
|
||||||
traceback.print_stack()
|
traceback.print_stack()
|
||||||
|
|
||||||
if guess_compression:
|
if guess_compression:
|
||||||
from .kompress import CPath
|
from .kompress import CPath, is_compressed
|
||||||
paths = [CPath(p) if _is_compressed(p) else p for p in paths]
|
paths = [CPath(p) if is_compressed(p) else p for p in paths]
|
||||||
return tuple(paths)
|
return tuple(paths)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -9,6 +9,20 @@ import io
|
||||||
PathIsh = Union[Path, str]
|
PathIsh = Union[Path, str]
|
||||||
|
|
||||||
|
|
||||||
|
class Ext:
|
||||||
|
xz = '.xz'
|
||||||
|
zip = '.zip'
|
||||||
|
lz4 = '.lz4'
|
||||||
|
zstd = '.zstd'
|
||||||
|
targz = '.tar.gz'
|
||||||
|
|
||||||
|
|
||||||
|
def is_compressed(p: Path) -> bool:
|
||||||
|
# todo kinda lame way for now.. use mime ideally?
|
||||||
|
# should cooperate with kompress.kopen?
|
||||||
|
return any(p.name.endswith(ext) for ext in {Ext.xz, Ext.zip, Ext.lz4, Ext.zstd, Ext.targz})
|
||||||
|
|
||||||
|
|
||||||
def _zstd_open(path: Path, *args, **kwargs) -> IO[str]:
|
def _zstd_open(path: Path, *args, **kwargs) -> IO[str]:
|
||||||
import zstandard as zstd # type: ignore
|
import zstandard as zstd # type: ignore
|
||||||
fh = path.open('rb')
|
fh = path.open('rb')
|
||||||
|
@ -25,15 +39,15 @@ def kopen(path: PathIsh, *args, mode: str='rt', **kwargs) -> IO[str]:
|
||||||
kwargs['encoding'] = encoding
|
kwargs['encoding'] = encoding
|
||||||
|
|
||||||
pp = Path(path)
|
pp = Path(path)
|
||||||
suf = pp.suffix
|
name = pp.name
|
||||||
if suf in {'.xz'}:
|
if name.endswith(Ext.xz):
|
||||||
import lzma
|
import lzma
|
||||||
r = lzma.open(pp, mode, *args, **kwargs)
|
r = lzma.open(pp, mode, *args, **kwargs)
|
||||||
# should only happen for binary mode?
|
# should only happen for binary mode?
|
||||||
# file:///usr/share/doc/python3/html/library/lzma.html?highlight=lzma#lzma.open
|
# file:///usr/share/doc/python3/html/library/lzma.html?highlight=lzma#lzma.open
|
||||||
assert not isinstance(r, lzma.LZMAFile), r
|
assert not isinstance(r, lzma.LZMAFile), r
|
||||||
return r
|
return r
|
||||||
elif suf in {'.zip'}:
|
elif name.endswith(Ext.zip):
|
||||||
# eh. this behaviour is a bit dodgy...
|
# eh. this behaviour is a bit dodgy...
|
||||||
from zipfile import ZipFile
|
from zipfile import ZipFile
|
||||||
zfile = ZipFile(pp)
|
zfile = ZipFile(pp)
|
||||||
|
@ -49,11 +63,18 @@ def kopen(path: PathIsh, *args, mode: str='rt', **kwargs) -> IO[str]:
|
||||||
# TODO pass all kwargs here??
|
# TODO pass all kwargs here??
|
||||||
# todo 'expected "BinaryIO"'??
|
# todo 'expected "BinaryIO"'??
|
||||||
return io.TextIOWrapper(ifile, encoding=encoding) # type: ignore[arg-type]
|
return io.TextIOWrapper(ifile, encoding=encoding) # type: ignore[arg-type]
|
||||||
elif suf in {'.lz4'}:
|
elif name.endswith(Ext.lz4):
|
||||||
import lz4.frame # type: ignore
|
import lz4.frame # type: ignore
|
||||||
return lz4.frame.open(str(pp), mode, *args, **kwargs)
|
return lz4.frame.open(str(pp), mode, *args, **kwargs)
|
||||||
elif suf in {'.zstd'}:
|
elif name.endswith(Ext.zstd):
|
||||||
return _zstd_open(pp, mode, *args, **kwargs)
|
return _zstd_open(pp, mode, *args, **kwargs)
|
||||||
|
elif name.endswith(Ext.targz):
|
||||||
|
import tarfile
|
||||||
|
# FIXME pass mode?
|
||||||
|
tf = tarfile.open(pp)
|
||||||
|
# TODO pass encoding?
|
||||||
|
x = tf.extractfile(*args); assert x is not None
|
||||||
|
return x # type: ignore[return-value]
|
||||||
else:
|
else:
|
||||||
return pp.open(mode, *args, **kwargs)
|
return pp.open(mode, *args, **kwargs)
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue