core: detect compression, wrap in CPath if necessary

This commit is contained in:
Dima Gerasimov 2020-07-13 22:30:40 +01:00 committed by karlicoss
parent 77deef98de
commit 092aef88ce
4 changed files with 18 additions and 10 deletions

View file

@ -116,9 +116,21 @@ from ..kython.klogging import setup_logger, LazyLogger
Paths = Union[Sequence[PathIsh], PathIsh] Paths = Union[Sequence[PathIsh], PathIsh]
def _is_compressed(p: Path) -> bool:
# todo kinda lame way for now.. use mime ideally?
# should cooperate with kompress.kopen?
return p.suffix in {'.xz', '.lz4', '.zstd'}
# TODO support '' for emtpy path # TODO support '' for emtpy path
DEFAULT_GLOB = '*' DEFAULT_GLOB = '*'
def get_files(pp: Paths, glob: str=DEFAULT_GLOB, sort: bool=True) -> Tuple[Path, ...]: def get_files(
pp: Paths,
glob: str=DEFAULT_GLOB,
sort: bool=True,
guess_compression: bool=True,
) -> Tuple[Path, ...]:
""" """
Helper function to avoid boilerplate. Helper function to avoid boilerplate.
@ -170,6 +182,9 @@ def get_files(pp: Paths, glob: str=DEFAULT_GLOB, sort: bool=True) -> Tuple[Path,
warnings.warn(f'{caller()}: no paths were matched against {paths}. This might result in missing data.') warnings.warn(f'{caller()}: no paths were matched against {paths}. This might result in missing data.')
traceback.print_stack() traceback.print_stack()
if guess_compression:
from ..kython.kompress import CPath # todo move to core?
paths = [CPath(p) if _is_compressed(p) else p for p in paths]
return tuple(paths) return tuple(paths)

View file

@ -59,7 +59,6 @@ from typing import Tuple, Iterable, Dict, Sequence
from ..core import get_files from ..core import get_files
from ..core.common import mcachew from ..core.common import mcachew
from ..kython.kompress import CPath
from .common import Event, parse_dt, Results from .common import Event, parse_dt, Results
@ -70,7 +69,6 @@ def inputs() -> Sequence[Path]:
def _dal() -> dal.DAL: def _dal() -> dal.DAL:
sources = inputs() sources = inputs()
sources = list(map(CPath, sources)) # TODO maybe move it to get_files? e.g. compressed=True arg?
return dal.DAL(sources) return dal.DAL(sources)

View file

@ -67,11 +67,7 @@ logger = LazyLogger(__name__, level='debug')
from pathlib import Path from pathlib import Path
def inputs() -> Sequence[Path]: def inputs() -> Sequence[Path]:
files = get_files(config.export_path) return get_files(config.export_path)
# TODO Cpath better be automatic by get_files...
from .kython.kompress import CPath
res = tuple(map(CPath, files))
return res
Sid = dal.Sid Sid = dal.Sid

View file

@ -9,7 +9,6 @@ from typing import Dict, List, Optional, Iterator
from datetime import datetime from datetime import datetime
from .common import LazyLogger, get_files, group_by_key, cproperty, make_dict from .common import LazyLogger, get_files, group_by_key, cproperty, make_dict
from .kython.kompress import CPath
from my.config import rtm as config from my.config import rtm as config
@ -99,7 +98,7 @@ class DAL:
def dal(): def dal():
last = get_files(config.export_path)[-1] last = get_files(config.export_path)[-1]
data = CPath(last).read_text() # TODO make it automatic data = last.read_text()
return DAL(data=data, revision='TODO') return DAL(data=data, revision='TODO')