enhance reddit module

This commit is contained in:
Dima Gerasimov 2020-01-17 18:40:11 +00:00
parent 3dabd7ff46
commit 7d56d85731
2 changed files with 35 additions and 21 deletions

View file

@ -88,6 +88,7 @@ def get_files(pp: PathIsh, glob: str, sort=True) -> List[Path]:
""" """
Helper function to avoid boilerplate. Helper function to avoid boilerplate.
""" """
# TODO FIXME mm, some wrapper to assert iterator isn't empty?
path = Path(pp) path = Path(pp)
if path.is_dir(): if path.is_dir():
gp: Iterable[Path] = path.glob(glob) gp: Iterable[Path] = path.glob(glob)

View file

@ -1,8 +1,8 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from pathlib import Path, PosixPath from pathlib import Path, PosixPath
from typing import List, Sequence, Mapping from typing import List, Sequence, Mapping, Iterator
from .common import mcachew from .common import mcachew, get_files, LazyLogger
from mycfg import paths from mycfg import paths
import mycfg.repos.rexport.dal as rexport import mycfg.repos.rexport.dal as rexport
@ -21,29 +21,44 @@ class CPath(PosixPath):
return kompress.open(str(self)) return kompress.open(str(self))
def get_backup_files() -> Sequence[Path]: def get_sources() -> Sequence[Path]:
export_dir = Path(paths.rexport.export_dir) # TODO use zstd?
res = list(map(CPath, sorted(export_dir.glob('*.json.xz')))) files = get_files(paths.rexport.export_dir, glob='*.json.xz')
assert len(res) > 0 res = list(map(CPath, files)); assert len(res) > 0
return tuple(res) return tuple(res)
def get_model(): def dal():
model = rexport.DAL(get_backup_files()) # TODO lru cache? but be careful when it runs continuously
return model return rexport.DAL(get_sources())
def get_logger(): logger = LazyLogger('my.reddit', level='debug')
import logging
return logging.getLogger('my.reddit')
Save = rexport.Save
Sid = rexport.Sid Sid = rexport.Sid
Save = rexport.Save
Comment = rexport.Comment
Submission = rexport.Submission
Upvote = rexport.Upvote
def get_saves() -> List[Save]: # TODO cachew? wonder how to play nicely with DAL?
return get_model().saved() def saved() -> Iterator[Save]:
return dal().saved()
def comments() -> Iterator[Comment]:
return dal().comments()
def submissions() -> Iterator[Submission]:
return dal().submissions()
def upvoted() -> Iterator[Upvote]:
return dal().upvoted()
from typing import Dict, Union, Iterable, Iterator, NamedTuple, Any from typing import Dict, Union, Iterable, Iterator, NamedTuple, Any
@ -94,7 +109,6 @@ def _get_bdate(bfile: Path) -> datetime:
def _get_state(bfile: Path) -> Dict[Sid, SaveWithDt]: def _get_state(bfile: Path) -> Dict[Sid, SaveWithDt]:
logger = get_logger()
logger.debug('handling %s', bfile) logger.debug('handling %s', bfile)
bdt = _get_bdate(bfile) bdt = _get_bdate(bfile)
@ -108,10 +122,9 @@ def _get_state(bfile: Path) -> Dict[Sid, SaveWithDt]:
) )
@mcachew('/L/data/.cache/reddit-events.cache') @mcachew('/L/data/.cache/reddit-events.cache')
def _get_events(backups: Sequence[Path]=get_backup_files(), parallel: bool=True) -> Iterator[Event]: def _get_events(backups: Sequence[Path]=get_sources(), parallel: bool=True) -> Iterator[Event]:
# TODO cachew: let it transform return type? so you don't have to write a wrapper for lists? # TODO cachew: let it transform return type? so you don't have to write a wrapper for lists?
# parallel = False # NOTE: eh, not sure if still necessary? I think glumov didn't like it? # parallel = False # NOTE: eh, not sure if still necessary? I think glumov didn't like it?
logger = get_logger()
prev_saves: Mapping[Sid, SaveWithDt] = {} prev_saves: Mapping[Sid, SaveWithDt] = {}
# TODO suppress first batch?? # TODO suppress first batch??
@ -167,8 +180,8 @@ def get_events(*args, **kwargs) -> List[Event]:
def test(): def test():
get_events(backups=get_backup_files()[-1:]) get_events(backups=get_sources()[-1:])
get_saves() list(saved())
def test_unfav(): def test_unfav():
@ -184,7 +197,7 @@ def test_unfav():
def test_get_all_saves(): def test_get_all_saves():
# TODO not sure if this is necesasry anymore? # TODO not sure if this is necesasry anymore?
saves = get_saves() saves = list(saved())
# just check that they are unique.. # just check that they are unique..
from kython import make_dict from kython import make_dict
make_dict(saves, key=lambda s: s.sid) make_dict(saves, key=lambda s: s.sid)