initial pushshift/rexport merge implementation
This commit is contained in:
parent
b54ec0d7f1
commit
5933711888
8 changed files with 259 additions and 20 deletions
45
my/reddit/pushshift.py
Normal file
45
my/reddit/pushshift.py
Normal file
|
@ -0,0 +1,45 @@
|
|||
"""
|
||||
Gives you access to older comments possibly not accessible with rexport
|
||||
using pushshift
|
||||
See https://github.com/seanbreckenridge/pushshift_comment_export
|
||||
"""
|
||||
|
||||
REQUIRES = [
|
||||
"git+https://github.com/seanbreckenridge/pushshift_comment_export",
|
||||
]
|
||||
|
||||
from my.core.common import Paths, Stats
|
||||
from dataclasses import dataclass
|
||||
from my.core.cfg import make_config
|
||||
|
||||
from my.config import reddit as uconfig
|
||||
|
||||
@dataclass
|
||||
class pushshift_config(uconfig.pushshift):
|
||||
export_path: Paths
|
||||
|
||||
config = make_config(pushshift_config)
|
||||
|
||||
from my.core import get_files
|
||||
from typing import Sequence, Iterator
|
||||
from pathlib import Path
|
||||
from .common import Comment
|
||||
|
||||
from pushshift_comment_export.dal import read_file, PComment
|
||||
|
||||
|
||||
def inputs() -> Sequence[Path]:
|
||||
return get_files(config.export_path)
|
||||
|
||||
|
||||
def comments() -> Iterator[PComment]:
|
||||
for f in inputs():
|
||||
yield from read_file(f)
|
||||
|
||||
def stats() -> Stats:
|
||||
from my.core import stat
|
||||
return {
|
||||
**stat(comments)
|
||||
}
|
||||
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue