enhance reddit module
This commit is contained in:
parent
3dabd7ff46
commit
7d56d85731
2 changed files with 35 additions and 21 deletions
|
@ -88,6 +88,7 @@ def get_files(pp: PathIsh, glob: str, sort=True) -> List[Path]:
|
||||||
"""
|
"""
|
||||||
Helper function to avoid boilerplate.
|
Helper function to avoid boilerplate.
|
||||||
"""
|
"""
|
||||||
|
# TODO FIXME mm, some wrapper to assert iterator isn't empty?
|
||||||
path = Path(pp)
|
path = Path(pp)
|
||||||
if path.is_dir():
|
if path.is_dir():
|
||||||
gp: Iterable[Path] = path.glob(glob)
|
gp: Iterable[Path] = path.glob(glob)
|
||||||
|
|
55
my/reddit.py
55
my/reddit.py
|
@ -1,8 +1,8 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
from pathlib import Path, PosixPath
|
from pathlib import Path, PosixPath
|
||||||
from typing import List, Sequence, Mapping
|
from typing import List, Sequence, Mapping, Iterator
|
||||||
|
|
||||||
from .common import mcachew
|
from .common import mcachew, get_files, LazyLogger
|
||||||
|
|
||||||
from mycfg import paths
|
from mycfg import paths
|
||||||
import mycfg.repos.rexport.dal as rexport
|
import mycfg.repos.rexport.dal as rexport
|
||||||
|
@ -21,29 +21,44 @@ class CPath(PosixPath):
|
||||||
return kompress.open(str(self))
|
return kompress.open(str(self))
|
||||||
|
|
||||||
|
|
||||||
def get_backup_files() -> Sequence[Path]:
|
def get_sources() -> Sequence[Path]:
|
||||||
export_dir = Path(paths.rexport.export_dir)
|
# TODO use zstd?
|
||||||
res = list(map(CPath, sorted(export_dir.glob('*.json.xz'))))
|
files = get_files(paths.rexport.export_dir, glob='*.json.xz')
|
||||||
assert len(res) > 0
|
res = list(map(CPath, files)); assert len(res) > 0
|
||||||
return tuple(res)
|
return tuple(res)
|
||||||
|
|
||||||
|
|
||||||
def get_model():
|
def dal():
|
||||||
model = rexport.DAL(get_backup_files())
|
# TODO lru cache? but be careful when it runs continuously
|
||||||
return model
|
return rexport.DAL(get_sources())
|
||||||
|
|
||||||
|
|
||||||
def get_logger():
|
logger = LazyLogger('my.reddit', level='debug')
|
||||||
import logging
|
|
||||||
return logging.getLogger('my.reddit')
|
|
||||||
|
|
||||||
|
|
||||||
Save = rexport.Save
|
|
||||||
Sid = rexport.Sid
|
Sid = rexport.Sid
|
||||||
|
Save = rexport.Save
|
||||||
|
Comment = rexport.Comment
|
||||||
|
Submission = rexport.Submission
|
||||||
|
Upvote = rexport.Upvote
|
||||||
|
|
||||||
|
|
||||||
def get_saves() -> List[Save]:
|
# TODO cachew? wonder how to play nicely with DAL?
|
||||||
return get_model().saved()
|
def saved() -> Iterator[Save]:
|
||||||
|
return dal().saved()
|
||||||
|
|
||||||
|
|
||||||
|
def comments() -> Iterator[Comment]:
|
||||||
|
return dal().comments()
|
||||||
|
|
||||||
|
|
||||||
|
def submissions() -> Iterator[Submission]:
|
||||||
|
return dal().submissions()
|
||||||
|
|
||||||
|
|
||||||
|
def upvoted() -> Iterator[Upvote]:
|
||||||
|
return dal().upvoted()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
from typing import Dict, Union, Iterable, Iterator, NamedTuple, Any
|
from typing import Dict, Union, Iterable, Iterator, NamedTuple, Any
|
||||||
|
@ -94,7 +109,6 @@ def _get_bdate(bfile: Path) -> datetime:
|
||||||
|
|
||||||
|
|
||||||
def _get_state(bfile: Path) -> Dict[Sid, SaveWithDt]:
|
def _get_state(bfile: Path) -> Dict[Sid, SaveWithDt]:
|
||||||
logger = get_logger()
|
|
||||||
logger.debug('handling %s', bfile)
|
logger.debug('handling %s', bfile)
|
||||||
|
|
||||||
bdt = _get_bdate(bfile)
|
bdt = _get_bdate(bfile)
|
||||||
|
@ -108,10 +122,9 @@ def _get_state(bfile: Path) -> Dict[Sid, SaveWithDt]:
|
||||||
)
|
)
|
||||||
|
|
||||||
@mcachew('/L/data/.cache/reddit-events.cache')
|
@mcachew('/L/data/.cache/reddit-events.cache')
|
||||||
def _get_events(backups: Sequence[Path]=get_backup_files(), parallel: bool=True) -> Iterator[Event]:
|
def _get_events(backups: Sequence[Path]=get_sources(), parallel: bool=True) -> Iterator[Event]:
|
||||||
# TODO cachew: let it transform return type? so you don't have to write a wrapper for lists?
|
# TODO cachew: let it transform return type? so you don't have to write a wrapper for lists?
|
||||||
# parallel = False # NOTE: eh, not sure if still necessary? I think glumov didn't like it?
|
# parallel = False # NOTE: eh, not sure if still necessary? I think glumov didn't like it?
|
||||||
logger = get_logger()
|
|
||||||
|
|
||||||
prev_saves: Mapping[Sid, SaveWithDt] = {}
|
prev_saves: Mapping[Sid, SaveWithDt] = {}
|
||||||
# TODO suppress first batch??
|
# TODO suppress first batch??
|
||||||
|
@ -167,8 +180,8 @@ def get_events(*args, **kwargs) -> List[Event]:
|
||||||
|
|
||||||
|
|
||||||
def test():
|
def test():
|
||||||
get_events(backups=get_backup_files()[-1:])
|
get_events(backups=get_sources()[-1:])
|
||||||
get_saves()
|
list(saved())
|
||||||
|
|
||||||
|
|
||||||
def test_unfav():
|
def test_unfav():
|
||||||
|
@ -184,7 +197,7 @@ def test_unfav():
|
||||||
|
|
||||||
def test_get_all_saves():
|
def test_get_all_saves():
|
||||||
# TODO not sure if this is necesasry anymore?
|
# TODO not sure if this is necesasry anymore?
|
||||||
saves = get_saves()
|
saves = list(saved())
|
||||||
# just check that they are unique..
|
# just check that they are unique..
|
||||||
from kython import make_dict
|
from kython import make_dict
|
||||||
make_dict(saves, key=lambda s: s.sid)
|
make_dict(saves, key=lambda s: s.sid)
|
||||||
|
|
Loading…
Add table
Reference in a new issue