core: support '' for explicitly set empty path set
This commit is contained in:
parent
a267aeec5b
commit
3d7844b711
4 changed files with 16 additions and 7 deletions
|
@ -46,7 +46,9 @@ Some explanations:
|
|||
- =/a/path/to/directory/=, so the module will consume all files from this directory
|
||||
- a list of files/directories (it will be flattened)
|
||||
- a [[https://docs.python.org/3/library/glob.html?highlight=glob#glob.glob][glob]] string, so you can be flexible about the format of your data on disk (e.g. if you want to keep it compressed)
|
||||
- empty sequence (e.g. ~export_path = ()~), this is useful for modules that merge multiple data sources (for example, =my.twitter=)
|
||||
- empty string (e.g. ~export_path = ''~), this will prevent the module from consuming any data
|
||||
|
||||
This can be useful for modules that merge multiple data sources (for example, =my.twitter= or =my.github=)
|
||||
|
||||
Typically, such variable will be passed to =get_files= to actually extract the list of real files to use. You can see usage examples [[https://github.com/karlicoss/HPI/blob/master/tests/get_files.py][here]].
|
||||
|
||||
|
|
|
@ -474,8 +474,7 @@ Since you have two different sources of raw data, you need to specify two bits o
|
|||
: class twitter_archive:
|
||||
: export_path = '/backups/twitter-archives/*.zip'
|
||||
|
||||
Note that you can also just use =my.twitter.archive= or =my.twitter.twint= directly, or set either of paths to 'empty path': =()=
|
||||
# TODO empty string?
|
||||
Note that you can also just use =my.twitter.archive= or =my.twitter.twint= directly, or set either of paths to empty string: =''=
|
||||
# (TODO mypy-safe?)
|
||||
|
||||
# #addingmodifying-modules
|
||||
|
|
|
@ -125,11 +125,16 @@ def get_files(pp: Paths, glob: str=DEFAULT_GLOB, sort: bool=True) -> Tuple[Path,
|
|||
Tuple as return type is a bit friendlier for hashing/caching, so hopefully makes sense
|
||||
"""
|
||||
# TODO FIXME mm, some wrapper to assert iterator isn't empty?
|
||||
sources: List[Path] = []
|
||||
if isinstance(pp, (str, Path)):
|
||||
sources.append(Path(pp))
|
||||
sources: List[Path]
|
||||
if isinstance(pp, Path):
|
||||
sources = [pp]
|
||||
elif isinstance(pp, str):
|
||||
if pp == '':
|
||||
# special case -- makes sense for optional data sources, etc
|
||||
return () # early return to prevent warnings etc
|
||||
sources = [Path(pp)]
|
||||
else:
|
||||
sources.extend(map(Path, pp))
|
||||
sources = [Path(p) for p in pp]
|
||||
|
||||
def caller() -> str:
|
||||
import traceback
|
||||
|
|
|
@ -102,6 +102,9 @@ def test_no_files():
|
|||
'''
|
||||
Test for empty matches. They work, but should result in warning
|
||||
'''
|
||||
assert get_files('') == ()
|
||||
|
||||
# todo test these for warnings?
|
||||
assert get_files([]) == ()
|
||||
assert get_files('bad*glob') == ()
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue