pocket: reuse pockexport data access layer

BREAKING CHANGE! Data parsing was switched to pockexport.
This would help to keep it consistent across different apps in the future.

When you update, you'll need to:

- clone pockexport (latest version)
- set pockexport repository in your config (see doc/MODULES.org)
This commit is contained in:
Dima Gerasimov 2020-05-27 08:22:57 +01:00 committed by karlicoss
parent 6453ff415d
commit f175acc848
4 changed files with 87 additions and 50 deletions

View file

@ -19,6 +19,7 @@ If you have some issues with the setup, see [[file:SETUP.org::#troubleshooting][
- [[#mygoogletakeoutpaths][my.google.takeout.paths]]
- [[#myhypothesis][my.hypothesis]]
- [[#myreddit][my.reddit]]
- [[#mypocket][my.pocket]]
- [[#mytwittertwint][my.twitter.twint]]
- [[#mytwitterarchive][my.twitter.archive]]
- [[#mylastfm][my.lastfm]]
@ -67,6 +68,7 @@ modules = [
('google' , 'my.google.takeout.paths'),
('hypothesis' , 'my.hypothesis' ),
('reddit' , 'my.reddit' ),
('pocket' , 'my.pocket' ),
('twint' , 'my.twitter.twint' ),
('twitter_archive', 'my.twitter.archive' ),
('lastfm' , 'my.lastfm' ),
@ -149,6 +151,23 @@ for cls, p in modules:
# alternatively, you can put the repository (or a symlink) in $MY_CONFIG/my/config/repos/rexport
rexport : Optional[PathIsh] = None
#+end_src
** [[file:../my/pocket.py][my.pocket]]
[[https://getpocket.com][Pocket]] bookmarks and highlights
#+begin_src python
class pocket:
'''
Uses [[https://github.com/karlicoss/pockexport][pockexport]] outputs
'''
# paths[s]/glob to the exported JSON data
export_path: Paths
# path to a local clone of pockexport
# alternatively, you can put the repository (or a symlink) in $MY_CONFIG/my/config/repos/pockexport
pockexport : Optional[PathIsh] = None
#+end_src
** [[file:../my/twitter/twint.py][my.twitter.twint]]
Twitter data (tweets and favorites).

View file

@ -427,7 +427,7 @@ Several other HPI modules are following a similar pattern: hypothesis, instapape
** Twitter
Twitter is interesting, because it's an example of a data source that *arbitrates* between several data sources from the same service.
Twitter is interesting, because it's an example of an HPI module that *arbitrates* between several data sources from the same service.
The reason to use multiple in case of Twitter is:

View file

@ -58,8 +58,6 @@ def _dal() -> dal.DAL:
return dal.DAL(sources)
def highlights() -> List[Res[Highlight]]:
return sort_res_by(_dal().highlights(), key=lambda h: h.created)

View file

@ -1,57 +1,77 @@
from datetime import datetime
"""
[[https://getpocket.com][Pocket]] bookmarks and highlights
"""
from dataclasses import dataclass
from typing import Optional
from .core import Paths, PathIsh
from my.config import pocket as user_config
@dataclass
class pocket(user_config):
'''
Uses [[https://github.com/karlicoss/pockexport][pockexport]] outputs
'''
# paths[s]/glob to the exported JSON data
export_path: Paths
# path to a local clone of pockexport
# alternatively, you can put the repository (or a symlink) in $MY_CONFIG/my/config/repos/pockexport
pockexport : Optional[PathIsh] = None
@property
def dal_module(self):
rpath = self.pockexport
if rpath is not None:
from .core.common import import_dir
return import_dir(rpath, '.dal')
else:
import my.config.repos.pockexport.dal as dal
return dal
from .core.cfg import make_config
config = make_config(pocket)
from typing import TYPE_CHECKING
if TYPE_CHECKING:
import my.config.repos.pockexport.dal as dal
else:
dal = config.dal_module
############################
Article = dal.Article
from pathlib import Path
from typing import NamedTuple, Sequence, Any
from .common import get_files
from my.config import pocket as config
from typing import Sequence, Iterable
def _files():
return get_files(config.export_path, glob='*.json')
# todo not sure if should be defensive against empty?
def _dal() -> dal.DAL:
from .core import get_files
inputs = get_files(config.export_path)
return dal.DAL(inputs)
class Highlight(NamedTuple):
json: Any
@property
def text(self) -> str:
return self.json['quote']
@property
def created(self) -> datetime:
return datetime.strptime(self.json['created_at'], '%Y-%m-%d %H:%M:%S')
def articles() -> Iterable[Article]:
yield from _dal().articles()
class Article(NamedTuple):
json: Any
@property
def url(self) -> str:
return self.json['given_url']
@property
def title(self) -> str:
return self.json['given_title']
@property
def pocket_link(self) -> str:
return 'https://app.getpocket.com/read/' + self.json['item_id']
@property
def added(self) -> datetime:
return datetime.fromtimestamp(int(self.json['time_added']))
@property
def highlights(self) -> Sequence[Highlight]:
raw = self.json.get('annotations', [])
return list(map(Highlight, raw))
# TODO add tags?
def stats():
from itertools import chain
from more_itertools import ilen
# todo make stats more defensive?
return {
'articles' : ilen(articles()),
'highlights': ilen(chain.from_iterable(a.highlights for a in articles())),
}
# todo deprecate?
def get_articles() -> Sequence[Article]:
import json
last = _files()[-1]
raw = json.loads(last.read_text())['list']
return list(map(Article, raw.values()))
return list(articles())