From 092aef88ce2d1332771eeb9aa2a2572ede92fb69 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Mon, 13 Jul 2020 22:30:40 +0100 Subject: [PATCH] core: detect compression, wrap in CPath if necessary --- my/core/common.py | 17 ++++++++++++++++- my/github/ghexport.py | 2 -- my/reddit.py | 6 +----- my/rtm.py | 3 +-- 4 files changed, 18 insertions(+), 10 deletions(-) diff --git a/my/core/common.py b/my/core/common.py index bc1e2e2..37a841a 100644 --- a/my/core/common.py +++ b/my/core/common.py @@ -116,9 +116,21 @@ from ..kython.klogging import setup_logger, LazyLogger Paths = Union[Sequence[PathIsh], PathIsh] + +def _is_compressed(p: Path) -> bool: + # todo kinda lame way for now.. use mime ideally? + # should cooperate with kompress.kopen? + return p.suffix in {'.xz', '.lz4', '.zstd'} + + # TODO support '' for emtpy path DEFAULT_GLOB = '*' -def get_files(pp: Paths, glob: str=DEFAULT_GLOB, sort: bool=True) -> Tuple[Path, ...]: +def get_files( + pp: Paths, + glob: str=DEFAULT_GLOB, + sort: bool=True, + guess_compression: bool=True, +) -> Tuple[Path, ...]: """ Helper function to avoid boilerplate. @@ -170,6 +182,9 @@ def get_files(pp: Paths, glob: str=DEFAULT_GLOB, sort: bool=True) -> Tuple[Path, warnings.warn(f'{caller()}: no paths were matched against {paths}. This might result in missing data.') traceback.print_stack() + if guess_compression: + from ..kython.kompress import CPath # todo move to core? + paths = [CPath(p) if _is_compressed(p) else p for p in paths] return tuple(paths) diff --git a/my/github/ghexport.py b/my/github/ghexport.py index 4156628..dd164be 100644 --- a/my/github/ghexport.py +++ b/my/github/ghexport.py @@ -59,7 +59,6 @@ from typing import Tuple, Iterable, Dict, Sequence from ..core import get_files from ..core.common import mcachew -from ..kython.kompress import CPath from .common import Event, parse_dt, Results @@ -70,7 +69,6 @@ def inputs() -> Sequence[Path]: def _dal() -> dal.DAL: sources = inputs() - sources = list(map(CPath, sources)) # TODO maybe move it to get_files? e.g. compressed=True arg? return dal.DAL(sources) diff --git a/my/reddit.py b/my/reddit.py index 1145297..41f7e56 100755 --- a/my/reddit.py +++ b/my/reddit.py @@ -67,11 +67,7 @@ logger = LazyLogger(__name__, level='debug') from pathlib import Path def inputs() -> Sequence[Path]: - files = get_files(config.export_path) - # TODO Cpath better be automatic by get_files... - from .kython.kompress import CPath - res = tuple(map(CPath, files)) - return res + return get_files(config.export_path) Sid = dal.Sid diff --git a/my/rtm.py b/my/rtm.py index 0b23fdd..d527d43 100755 --- a/my/rtm.py +++ b/my/rtm.py @@ -9,7 +9,6 @@ from typing import Dict, List, Optional, Iterator from datetime import datetime from .common import LazyLogger, get_files, group_by_key, cproperty, make_dict -from .kython.kompress import CPath from my.config import rtm as config @@ -99,7 +98,7 @@ class DAL: def dal(): last = get_files(config.export_path)[-1] - data = CPath(last).read_text() # TODO make it automatic + data = last.read_text() return DAL(data=data, revision='TODO')