From e883304c17e4e7d0154e61e3ddd79bef3b5bc412 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Wed, 27 May 2020 08:22:57 +0100 Subject: [PATCH] pocket: reuse pockexport data access layer BREAKING CHANGE! Data parsing was switched to pockexport. This would help to keep it consistent across different apps in the future. When you update, you'll need to: - clone pockexport (latest version) - set pockexport repository in your config (see doc/MODULES.org) --- doc/MODULES.org | 19 ++++++++ doc/SETUP.org | 2 +- my/hypothesis.py | 2 - my/pocket.py | 114 ++++++++++++++++++++++++++++------------------- 4 files changed, 87 insertions(+), 50 deletions(-) diff --git a/doc/MODULES.org b/doc/MODULES.org index d45f8a1..763bebd 100644 --- a/doc/MODULES.org +++ b/doc/MODULES.org @@ -19,6 +19,7 @@ If you have some issues with the setup, see [[file:SETUP.org::#troubleshooting][ - [[#mygoogletakeoutpaths][my.google.takeout.paths]] - [[#myhypothesis][my.hypothesis]] - [[#myreddit][my.reddit]] + - [[#mypocket][my.pocket]] - [[#mytwittertwint][my.twitter.twint]] - [[#mytwitterarchive][my.twitter.archive]] - [[#mylastfm][my.lastfm]] @@ -67,6 +68,7 @@ modules = [ ('google' , 'my.google.takeout.paths'), ('hypothesis' , 'my.hypothesis' ), ('reddit' , 'my.reddit' ), + ('pocket' , 'my.pocket' ), ('twint' , 'my.twitter.twint' ), ('twitter_archive', 'my.twitter.archive' ), ('lastfm' , 'my.lastfm' ), @@ -149,6 +151,23 @@ for cls, p in modules: # alternatively, you can put the repository (or a symlink) in $MY_CONFIG/my/config/repos/rexport rexport : Optional[PathIsh] = None #+end_src +** [[file:../my/pocket.py][my.pocket]] + + [[https://getpocket.com][Pocket]] bookmarks and highlights + + #+begin_src python + class pocket: + ''' + Uses [[https://github.com/karlicoss/pockexport][pockexport]] outputs + ''' + + # paths[s]/glob to the exported JSON data + export_path: Paths + + # path to a local clone of pockexport + # alternatively, you can put the repository (or a symlink) in $MY_CONFIG/my/config/repos/pockexport + pockexport : Optional[PathIsh] = None + #+end_src ** [[file:../my/twitter/twint.py][my.twitter.twint]] Twitter data (tweets and favorites). diff --git a/doc/SETUP.org b/doc/SETUP.org index ba3ca45..bacb489 100644 --- a/doc/SETUP.org +++ b/doc/SETUP.org @@ -427,7 +427,7 @@ Several other HPI modules are following a similar pattern: hypothesis, instapape ** Twitter -Twitter is interesting, because it's an example of a data source that *arbitrates* between several data sources from the same service. +Twitter is interesting, because it's an example of an HPI module that *arbitrates* between several data sources from the same service. The reason to use multiple in case of Twitter is: diff --git a/my/hypothesis.py b/my/hypothesis.py index d3d95c2..10bec04 100644 --- a/my/hypothesis.py +++ b/my/hypothesis.py @@ -58,8 +58,6 @@ def _dal() -> dal.DAL: return dal.DAL(sources) - - def highlights() -> List[Res[Highlight]]: return sort_res_by(_dal().highlights(), key=lambda h: h.created) diff --git a/my/pocket.py b/my/pocket.py index 137ddeb..f20bb88 100644 --- a/my/pocket.py +++ b/my/pocket.py @@ -1,57 +1,77 @@ -from datetime import datetime +""" +[[https://getpocket.com][Pocket]] bookmarks and highlights +""" +from dataclasses import dataclass +from typing import Optional + +from .core import Paths, PathIsh + +from my.config import pocket as user_config + + +@dataclass +class pocket(user_config): + ''' + Uses [[https://github.com/karlicoss/pockexport][pockexport]] outputs + ''' + + # paths[s]/glob to the exported JSON data + export_path: Paths + + # path to a local clone of pockexport + # alternatively, you can put the repository (or a symlink) in $MY_CONFIG/my/config/repos/pockexport + pockexport : Optional[PathIsh] = None + + @property + def dal_module(self): + rpath = self.pockexport + if rpath is not None: + from .core.common import import_dir + return import_dir(rpath, '.dal') + else: + import my.config.repos.pockexport.dal as dal + return dal + + +from .core.cfg import make_config +config = make_config(pocket) + + +from typing import TYPE_CHECKING +if TYPE_CHECKING: + import my.config.repos.pockexport.dal as dal +else: + dal = config.dal_module + +############################ + +Article = dal.Article + from pathlib import Path -from typing import NamedTuple, Sequence, Any - -from .common import get_files - -from my.config import pocket as config +from typing import Sequence, Iterable -def _files(): - return get_files(config.export_path, glob='*.json') +# todo not sure if should be defensive against empty? +def _dal() -> dal.DAL: + from .core import get_files + inputs = get_files(config.export_path) + return dal.DAL(inputs) -class Highlight(NamedTuple): - json: Any - - @property - def text(self) -> str: - return self.json['quote'] - - @property - def created(self) -> datetime: - return datetime.strptime(self.json['created_at'], '%Y-%m-%d %H:%M:%S') +def articles() -> Iterable[Article]: + yield from _dal().articles() -class Article(NamedTuple): - json: Any - - @property - def url(self) -> str: - return self.json['given_url'] - - @property - def title(self) -> str: - return self.json['given_title'] - - @property - def pocket_link(self) -> str: - return 'https://app.getpocket.com/read/' + self.json['item_id'] - - @property - def added(self) -> datetime: - return datetime.fromtimestamp(int(self.json['time_added'])) - - @property - def highlights(self) -> Sequence[Highlight]: - raw = self.json.get('annotations', []) - return list(map(Highlight, raw)) - - # TODO add tags? +def stats(): + from itertools import chain + from more_itertools import ilen + # todo make stats more defensive? + return { + 'articles' : ilen(articles()), + 'highlights': ilen(chain.from_iterable(a.highlights for a in articles())), + } +# todo deprecate? def get_articles() -> Sequence[Article]: - import json - last = _files()[-1] - raw = json.loads(last.read_text())['list'] - return list(map(Article, raw.values())) + return list(articles())