rely on implicit glob for my.reddit

This commit is contained in:
Dima Gerasimov 2020-05-03 16:56:05 +01:00
parent 5706f690e7
commit 9bd61940b8
2 changed files with 8 additions and 4 deletions

View file

@ -3,6 +3,7 @@ from pathlib import Path
import functools import functools
import types import types
from typing import Union, Callable, Dict, Iterable, TypeVar, Sequence, List, Optional, Any, cast, Tuple from typing import Union, Callable, Dict, Iterable, TypeVar, Sequence, List, Optional, Any, cast, Tuple
import warnings
from . import init from . import init
@ -108,7 +109,8 @@ from .kython.klogging import setup_logger, LazyLogger
Paths = Union[Sequence[PathIsh], PathIsh] Paths = Union[Sequence[PathIsh], PathIsh]
def get_files(pp: Paths, glob: str='*', sort: bool=True) -> Tuple[Path, ...]: DEFAULT_GLOB = '*'
def get_files(pp: Paths, glob: str=DEFAULT_GLOB, sort: bool=True) -> Tuple[Path, ...]:
""" """
Helper function to avoid boilerplate. Helper function to avoid boilerplate.
@ -129,6 +131,8 @@ def get_files(pp: Paths, glob: str='*', sort: bool=True) -> Tuple[Path, ...]:
else: else:
ss = str(src) ss = str(src)
if '*' in ss: if '*' in ss:
if glob != DEFAULT_GLOB:
warnings.warn(f"Treating {ss} as glob path. Explicit glob={glob} argument is ignored!")
paths.extend(map(Path, do_glob(ss))) paths.extend(map(Path, do_glob(ss)))
else: else:
assert src.is_file(), src assert src.is_file(), src
@ -163,7 +167,6 @@ def mcachew(*args, **kwargs): # type: ignore[no-redef]
try: try:
import cachew import cachew
except ModuleNotFoundError: except ModuleNotFoundError:
import warnings
warnings.warn('cachew library not found. You might want to install it to speed things up. See https://github.com/karlicoss/cachew') warnings.warn('cachew library not found. You might want to install it to speed things up. See https://github.com/karlicoss/cachew')
return lambda orig_func: orig_func return lambda orig_func: orig_func
else: else:

View file

@ -15,9 +15,10 @@ import my.config.repos.rexport.dal as rexport
def get_sources() -> Sequence[Path]: def get_sources() -> Sequence[Path]:
# TODO use zstd? # TODO use zstd?
# TODO maybe add assert to get_files? (and allow to suppress it) # TODO rename to export_path?
files = get_files(config.export_dir, glob='*.json.xz') files = get_files(config.export_dir)
res = list(map(CPath, files)); assert len(res) > 0 res = list(map(CPath, files)); assert len(res) > 0
# todo move the assert to get_files?
return tuple(res) return tuple(res)