fix saves retrieval, use save id instead of url

This commit is contained in:
Dima Gerasimov 2019-03-16 10:56:29 +00:00
parent b9587939ca
commit ee99518cf5

85
reddit/__init__.py Normal file → Executable file
View file

@ -1,3 +1,4 @@
#!/usr/bin/env python3
from typing import List, Dict, Union, Iterable, Iterator, NamedTuple, Any from typing import List, Dict, Union, Iterable, Iterator, NamedTuple, Any
import json import json
from collections import OrderedDict from collections import OrderedDict
@ -6,11 +7,15 @@ import pytz
import re import re
from datetime import datetime from datetime import datetime
from kython import kompress, cproperty from kython import kompress, cproperty, make_dict
# TODO hmm. apparently decompressing takes quite a bit of time...
BPATH = Path("/L/backups/reddit") BPATH = Path("/L/backups/reddit")
def reddit(suffix: str) -> str:
return 'https://reddit.com' + suffix
def _get_backups(all_=True) -> List[Path]: def _get_backups(all_=True) -> List[Path]:
bfiles = list(sorted(BPATH.glob('reddit-*.json.xz'))) bfiles = list(sorted(BPATH.glob('reddit-*.json.xz')))
@ -23,14 +28,18 @@ def _get_backups(all_=True) -> List[Path]:
class Save(NamedTuple): class Save(NamedTuple):
dt: datetime dt: datetime
title: str title: str
url: str
sid: str sid: str
json: Any = None json: Any = None
# TODO subreddit-display name?
def __hash__(self): def __hash__(self):
return hash(self.sid) return hash(self.sid)
@cproperty
def url(self) -> str:
# pylint: disable=unsubscriptable-object
pl = self.json['permalink']
return reddit(pl)
@cproperty @cproperty
def text(self) -> str: def text(self) -> str:
bb = self.json.get('body', None) bb = self.json.get('body', None)
@ -59,6 +68,10 @@ class Event(NamedTuple):
title: str title: str
url: str url: str
@property
def cmp_key(self):
return (self.dt, (1 if 'unfavorited' in self.text else 0))
# TODO kython? # TODO kython?
def get_some(d, *keys): def get_some(d, *keys):
@ -73,33 +86,34 @@ def get_some(d, *keys):
Url = str Url = str
# TODO OrderedDict # TODO shit. there does seem to be a difference...
def get_state(bfile: Path) -> Dict[Url, Save]: def get_state(bfile: Path) -> Dict[Url, Save]:
saves: Dict[Url, Save] = {} saves: List[Save] = []
with kompress.open(bfile) as fo: with kompress.open(bfile) as fo:
jj = json.load(fo) jj = json.load(fo)
saved = jj['saved'] saved = jj['saved']
for s in saved: for s in saved:
dt = pytz.utc.localize(datetime.utcfromtimestamp(s['created_utc'])) dt = pytz.utc.localize(datetime.utcfromtimestamp(s['created_utc']))
url = get_some(s, 'link_permalink', 'url') # TODO need permalink
# url = get_some(s, 'link_permalink', 'url') # this was original url...
title = get_some(s, 'link_title', 'title') title = get_some(s, 'link_title', 'title')
save = Save( save = Save(
dt=dt, dt=dt,
title=title, title=title,
url=url,
sid=s['id'], sid=s['id'],
json=s, json=s,
) )
saves[save.url] = save saves.append(save)
# "created_utc": 1535055017.0, return make_dict(
# link_title sorted(saves, key=lambda p: p.dt), # TODO make helper to create lambda from property?
# link_text key=lambda s: s.sid,
return OrderedDict(sorted(saves.items(), key=lambda p: p[1].dt)) )
return OrderedDict()
def get_events(all_=True): def get_events(all_=True) -> List[Event]:
backups = _get_backups(all_=all_) backups = _get_backups(all_=all_)
assert len(backups) > 0 assert len(backups) > 0
@ -123,23 +137,23 @@ def get_events(all_=True):
else: else:
return btime return btime
for l in set(prev_saves.keys()).symmetric_difference(set(saves.keys())): for key in set(prev_saves.keys()).symmetric_difference(set(saves.keys())):
if l in prev_saves: ps = prev_saves.get(key, None)
s = prev_saves[l] if ps is not None:
# TODO use backup date, that is more precise... # TODO use backup date, that is more precise...
events.append(Event( events.append(Event(
dt=etime(s.dt), dt=etime(ps.dt),
text=f"unfavorited", text=f"unfavorited",
kind=s, kind=ps,
eid=f'unf-{s.sid}', eid=f'unf-{ps.sid}',
url=s.url, url=ps.url,
title=s.title, title=ps.title,
)) ))
else: # in saves else: # in saves
s = saves[l] s = saves[key]
events.append(Event( events.append(Event(
dt=etime(s.dt), dt=etime(s.dt),
text=f"favorited {' [initial]' if first else ''}", text=f"favorited {'[initial]' if first else ''}",
kind=s, kind=s,
eid=f'fav-{s.sid}', eid=f'fav-{s.sid}',
url=s.url, url=s.url,
@ -147,7 +161,8 @@ def get_events(all_=True):
)) ))
prev_saves = saves prev_saves = saves
return list(sorted(events, key=lambda e: e.dt)) # TODO a bit awkward, favorited should compare lower than unfavorited?
return list(sorted(events, key=lambda e: e.cmp_key))
def get_saves(all_=True) -> List[Save]: def get_saves(all_=True) -> List[Save]:
# TODO hmm.... do we want ALL reddit saves I ever had? # TODO hmm.... do we want ALL reddit saves I ever had?
@ -165,9 +180,27 @@ def test():
get_saves(all_=False) get_saves(all_=False)
# TODO fuck. pytest is broken??
def test_unfav():
events = get_events(all_=True)
url = 'https://reddit.com/r/QuantifiedSelf/comments/acxy1v/personal_dashboard/'
uevents = [e for e in events if e.url == url]
assert len(uevents) == 2
ff = uevents[0]
assert ff.text == 'favorited [initial]'
uf = uevents[1]
assert uf.text == 'unfavorited'
def main(): def main():
for e in get_events(): events = get_events()
print(e) print(len(events))
for e in events:
print(e.text, e.url)
# for e in get_
# 509 with urls..
# for e in get_events():
# print(e)
if __name__ == '__main__': if __name__ == '__main__':