HPI/my/reddit/pushshift.py
Sean Breckenridge 4492e00250 misc fixes
- convert import_source to a decorator which
  wraps the function call in a try block
- fix protocol class when not TYPE_CHECKING
- add id properties to Protocols, remove attributes
  since protocol expects them to be settable but
  NT is read-only
- use id to merge comments
- remove type: ignore's from reddit config
  and just store as 'Any'
2021-10-31 13:15:21 -07:00

44 lines
943 B
Python

"""
Gives you access to older comments possibly not accessible with rexport
using pushshift
See https://github.com/seanbreckenridge/pushshift_comment_export
"""
REQUIRES = [
"git+https://github.com/seanbreckenridge/pushshift_comment_export",
]
from my.core.common import Paths, Stats
from dataclasses import dataclass
from my.core.cfg import make_config
from my.config import reddit as uconfig
@dataclass
class pushshift_config(uconfig.pushshift):
export_path: Paths
config = make_config(pushshift_config)
from my.core import get_files
from typing import Sequence, Iterator
from pathlib import Path
from pushshift_comment_export.dal import read_file, PComment
def inputs() -> Sequence[Path]:
return get_files(config.export_path)
def comments() -> Iterator[PComment]:
for f in inputs():
yield from read_file(f)
def stats() -> Stats:
from my.core import stat
return {
**stat(comments)
}