diff --git a/doc/MODULES.org b/doc/MODULES.org index a30e814..4b33143 100644 --- a/doc/MODULES.org +++ b/doc/MODULES.org @@ -46,7 +46,9 @@ Some explanations: - =/a/path/to/directory/=, so the module will consume all files from this directory - a list of files/directories (it will be flattened) - a [[https://docs.python.org/3/library/glob.html?highlight=glob#glob.glob][glob]] string, so you can be flexible about the format of your data on disk (e.g. if you want to keep it compressed) - - empty sequence (e.g. ~export_path = ()~), this is useful for modules that merge multiple data sources (for example, =my.twitter=) + - empty string (e.g. ~export_path = ''~), this will prevent the module from consuming any data + + This can be useful for modules that merge multiple data sources (for example, =my.twitter= or =my.github=) Typically, such variable will be passed to =get_files= to actually extract the list of real files to use. You can see usage examples [[https://github.com/karlicoss/HPI/blob/master/tests/get_files.py][here]]. diff --git a/doc/SETUP.org b/doc/SETUP.org index bacb489..bd4c6fd 100644 --- a/doc/SETUP.org +++ b/doc/SETUP.org @@ -474,8 +474,7 @@ Since you have two different sources of raw data, you need to specify two bits o : class twitter_archive: : export_path = '/backups/twitter-archives/*.zip' -Note that you can also just use =my.twitter.archive= or =my.twitter.twint= directly, or set either of paths to 'empty path': =()= -# TODO empty string? +Note that you can also just use =my.twitter.archive= or =my.twitter.twint= directly, or set either of paths to empty string: =''= # (TODO mypy-safe?) # #addingmodifying-modules diff --git a/my/core/common.py b/my/core/common.py index 74aac5e..324ae26 100644 --- a/my/core/common.py +++ b/my/core/common.py @@ -125,11 +125,16 @@ def get_files(pp: Paths, glob: str=DEFAULT_GLOB, sort: bool=True) -> Tuple[Path, Tuple as return type is a bit friendlier for hashing/caching, so hopefully makes sense """ # TODO FIXME mm, some wrapper to assert iterator isn't empty? - sources: List[Path] = [] - if isinstance(pp, (str, Path)): - sources.append(Path(pp)) + sources: List[Path] + if isinstance(pp, Path): + sources = [pp] + elif isinstance(pp, str): + if pp == '': + # special case -- makes sense for optional data sources, etc + return () # early return to prevent warnings etc + sources = [Path(pp)] else: - sources.extend(map(Path, pp)) + sources = [Path(p) for p in pp] def caller() -> str: import traceback diff --git a/tests/get_files.py b/tests/get_files.py index 14f2711..aa71e7b 100644 --- a/tests/get_files.py +++ b/tests/get_files.py @@ -102,6 +102,9 @@ def test_no_files(): ''' Test for empty matches. They work, but should result in warning ''' + assert get_files('') == () + + # todo test these for warnings? assert get_files([]) == () assert get_files('bad*glob') == ()