extract text, use cproperty, function to get saves

This commit is contained in:
Dima Gerasimov 2019-03-14 20:59:41 +00:00
parent ef270f4d01
commit aac4807b5d

View file

@ -1,11 +1,12 @@
from typing import List, Dict, Union, Iterable, Iterator, NamedTuple from typing import List, Dict, Union, Iterable, Iterator, NamedTuple, Any
import json import json
from collections import OrderedDict
from pathlib import Path from pathlib import Path
import pytz import pytz
import re import re
from datetime import datetime from datetime import datetime
from kython import kompress from kython import kompress, cproperty
BPATH = Path("/L/backups/reddit") BPATH = Path("/L/backups/reddit")
@ -24,6 +25,24 @@ class Save(NamedTuple):
title: str title: str
url: str url: str
sid: str sid: str
json: Any = None
# TODO subreddit-display name?
def __hash__(self):
return hash(self.sid)
@cproperty
def text(self) -> str:
bb = self.json.get('body', None)
st = self.json.get('selftext', None)
if bb is not None and st is not None:
raise RuntimeError(f'wtf, both body and selftext are not None: {bb}; {st}')
return bb or st
@cproperty
def subreddit(self) -> str:
return self.json['subreddit']['display_name']
class Misc(NamedTuple): class Misc(NamedTuple):
pass pass
@ -41,6 +60,7 @@ class Event(NamedTuple):
# TODO kython? # TODO kython?
def get_some(d, *keys): def get_some(d, *keys):
# TODO only one should be non None??
for k in keys: for k in keys:
v = d.get(k, None) v = d.get(k, None)
if v is not None: if v is not None:
@ -49,8 +69,11 @@ def get_some(d, *keys):
return None return None
def get_state(bfile: Path): Url = str
saves: Dict[str, Save] = {}
# TODO OrderedDict
def get_state(bfile: Path) -> Dict[Url, Save]:
saves: Dict[Url, Save] = {}
with kompress.open(bfile) as fo: with kompress.open(bfile) as fo:
jj = json.load(fo) jj = json.load(fo)
@ -64,13 +87,14 @@ def get_state(bfile: Path):
title=title, title=title,
url=url, url=url,
sid=s['id'], sid=s['id'],
json=s,
) )
saves[save.url] = save saves[save.url] = save
# "created_utc": 1535055017.0, # "created_utc": 1535055017.0,
# link_title # link_title
# link_text # link_text
return saves return OrderedDict(sorted(saves.items(), key=lambda p: p[1].dt))
def get_events(all_=True): def get_events(all_=True):
@ -123,9 +147,20 @@ def get_events(all_=True):
return list(sorted(events, key=lambda e: e.dt)) return list(sorted(events, key=lambda e: e.dt))
def get_saves(all_=True) -> List[Save]:
# TODO hmm.... do we want ALL reddit saves I ever had?
# TODO for now even last ones would be ok
assert all_ is False, 'all saves are not supported yet...'
backups = _get_backups(all_=all_)
[backup] = backups
saves = get_state(backup)
return list(saves.values())
def test(): def test():
get_events(all_=False) get_events(all_=False)
get_saves(all_=False)
def main(): def main():