pocket: reuse pockexport data access layer
BREAKING CHANGE! Data parsing was switched to pockexport. This would help to keep it consistent across different apps in the future. When you update, you'll need to: - clone pockexport (latest version) - set pockexport repository in your config (see doc/MODULES.org)
This commit is contained in:
parent
6453ff415d
commit
f175acc848
4 changed files with 87 additions and 50 deletions
|
@ -19,6 +19,7 @@ If you have some issues with the setup, see [[file:SETUP.org::#troubleshooting][
|
|||
- [[#mygoogletakeoutpaths][my.google.takeout.paths]]
|
||||
- [[#myhypothesis][my.hypothesis]]
|
||||
- [[#myreddit][my.reddit]]
|
||||
- [[#mypocket][my.pocket]]
|
||||
- [[#mytwittertwint][my.twitter.twint]]
|
||||
- [[#mytwitterarchive][my.twitter.archive]]
|
||||
- [[#mylastfm][my.lastfm]]
|
||||
|
@ -67,6 +68,7 @@ modules = [
|
|||
('google' , 'my.google.takeout.paths'),
|
||||
('hypothesis' , 'my.hypothesis' ),
|
||||
('reddit' , 'my.reddit' ),
|
||||
('pocket' , 'my.pocket' ),
|
||||
('twint' , 'my.twitter.twint' ),
|
||||
('twitter_archive', 'my.twitter.archive' ),
|
||||
('lastfm' , 'my.lastfm' ),
|
||||
|
@ -149,6 +151,23 @@ for cls, p in modules:
|
|||
# alternatively, you can put the repository (or a symlink) in $MY_CONFIG/my/config/repos/rexport
|
||||
rexport : Optional[PathIsh] = None
|
||||
#+end_src
|
||||
** [[file:../my/pocket.py][my.pocket]]
|
||||
|
||||
[[https://getpocket.com][Pocket]] bookmarks and highlights
|
||||
|
||||
#+begin_src python
|
||||
class pocket:
|
||||
'''
|
||||
Uses [[https://github.com/karlicoss/pockexport][pockexport]] outputs
|
||||
'''
|
||||
|
||||
# paths[s]/glob to the exported JSON data
|
||||
export_path: Paths
|
||||
|
||||
# path to a local clone of pockexport
|
||||
# alternatively, you can put the repository (or a symlink) in $MY_CONFIG/my/config/repos/pockexport
|
||||
pockexport : Optional[PathIsh] = None
|
||||
#+end_src
|
||||
** [[file:../my/twitter/twint.py][my.twitter.twint]]
|
||||
|
||||
Twitter data (tweets and favorites).
|
||||
|
|
|
@ -427,7 +427,7 @@ Several other HPI modules are following a similar pattern: hypothesis, instapape
|
|||
|
||||
** Twitter
|
||||
|
||||
Twitter is interesting, because it's an example of a data source that *arbitrates* between several data sources from the same service.
|
||||
Twitter is interesting, because it's an example of an HPI module that *arbitrates* between several data sources from the same service.
|
||||
|
||||
The reason to use multiple in case of Twitter is:
|
||||
|
||||
|
|
|
@ -58,8 +58,6 @@ def _dal() -> dal.DAL:
|
|||
return dal.DAL(sources)
|
||||
|
||||
|
||||
|
||||
|
||||
def highlights() -> List[Res[Highlight]]:
|
||||
return sort_res_by(_dal().highlights(), key=lambda h: h.created)
|
||||
|
||||
|
|
114
my/pocket.py
114
my/pocket.py
|
@ -1,57 +1,77 @@
|
|||
from datetime import datetime
|
||||
"""
|
||||
[[https://getpocket.com][Pocket]] bookmarks and highlights
|
||||
"""
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
from .core import Paths, PathIsh
|
||||
|
||||
from my.config import pocket as user_config
|
||||
|
||||
|
||||
@dataclass
|
||||
class pocket(user_config):
|
||||
'''
|
||||
Uses [[https://github.com/karlicoss/pockexport][pockexport]] outputs
|
||||
'''
|
||||
|
||||
# paths[s]/glob to the exported JSON data
|
||||
export_path: Paths
|
||||
|
||||
# path to a local clone of pockexport
|
||||
# alternatively, you can put the repository (or a symlink) in $MY_CONFIG/my/config/repos/pockexport
|
||||
pockexport : Optional[PathIsh] = None
|
||||
|
||||
@property
|
||||
def dal_module(self):
|
||||
rpath = self.pockexport
|
||||
if rpath is not None:
|
||||
from .core.common import import_dir
|
||||
return import_dir(rpath, '.dal')
|
||||
else:
|
||||
import my.config.repos.pockexport.dal as dal
|
||||
return dal
|
||||
|
||||
|
||||
from .core.cfg import make_config
|
||||
config = make_config(pocket)
|
||||
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
if TYPE_CHECKING:
|
||||
import my.config.repos.pockexport.dal as dal
|
||||
else:
|
||||
dal = config.dal_module
|
||||
|
||||
############################
|
||||
|
||||
Article = dal.Article
|
||||
|
||||
from pathlib import Path
|
||||
from typing import NamedTuple, Sequence, Any
|
||||
|
||||
from .common import get_files
|
||||
|
||||
from my.config import pocket as config
|
||||
from typing import Sequence, Iterable
|
||||
|
||||
|
||||
def _files():
|
||||
return get_files(config.export_path, glob='*.json')
|
||||
# todo not sure if should be defensive against empty?
|
||||
def _dal() -> dal.DAL:
|
||||
from .core import get_files
|
||||
inputs = get_files(config.export_path)
|
||||
return dal.DAL(inputs)
|
||||
|
||||
|
||||
class Highlight(NamedTuple):
|
||||
json: Any
|
||||
|
||||
@property
|
||||
def text(self) -> str:
|
||||
return self.json['quote']
|
||||
|
||||
@property
|
||||
def created(self) -> datetime:
|
||||
return datetime.strptime(self.json['created_at'], '%Y-%m-%d %H:%M:%S')
|
||||
def articles() -> Iterable[Article]:
|
||||
yield from _dal().articles()
|
||||
|
||||
|
||||
class Article(NamedTuple):
|
||||
json: Any
|
||||
|
||||
@property
|
||||
def url(self) -> str:
|
||||
return self.json['given_url']
|
||||
|
||||
@property
|
||||
def title(self) -> str:
|
||||
return self.json['given_title']
|
||||
|
||||
@property
|
||||
def pocket_link(self) -> str:
|
||||
return 'https://app.getpocket.com/read/' + self.json['item_id']
|
||||
|
||||
@property
|
||||
def added(self) -> datetime:
|
||||
return datetime.fromtimestamp(int(self.json['time_added']))
|
||||
|
||||
@property
|
||||
def highlights(self) -> Sequence[Highlight]:
|
||||
raw = self.json.get('annotations', [])
|
||||
return list(map(Highlight, raw))
|
||||
|
||||
# TODO add tags?
|
||||
def stats():
|
||||
from itertools import chain
|
||||
from more_itertools import ilen
|
||||
# todo make stats more defensive?
|
||||
return {
|
||||
'articles' : ilen(articles()),
|
||||
'highlights': ilen(chain.from_iterable(a.highlights for a in articles())),
|
||||
}
|
||||
|
||||
|
||||
# todo deprecate?
|
||||
def get_articles() -> Sequence[Article]:
|
||||
import json
|
||||
last = _files()[-1]
|
||||
raw = json.loads(last.read_text())['list']
|
||||
return list(map(Article, raw.values()))
|
||||
return list(articles())
|
||||
|
|
Loading…
Add table
Reference in a new issue