From 3b8b69d71ef461de66bc0254bbdd04a360aa5b53 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Mon, 6 Aug 2018 19:17:33 +0100 Subject: [PATCH 1/9] init --- .gititnore | 172 +++++++++++++++++++++++++++++++++++++++++ hypothesis/__init__.py | 0 2 files changed, 172 insertions(+) create mode 100644 .gititnore create mode 100644 hypothesis/__init__.py diff --git a/.gititnore b/.gititnore new file mode 100644 index 0000000..b539013 --- /dev/null +++ b/.gititnore @@ -0,0 +1,172 @@ + +# Created by https://www.gitignore.io/api/python,emacs + +### Emacs ### +# -*- mode: gitignore; -*- +*~ +\#*\# +/.emacs.desktop +/.emacs.desktop.lock +*.elc +auto-save-list +tramp +.\#* + +# Org-mode +.org-id-locations +*_archive + +# flymake-mode +*_flymake.* + +# eshell files +/eshell/history +/eshell/lastdir + +# elpa packages +/elpa/ + +# reftex files +*.rel + +# AUCTeX auto folder +/auto/ + +# cask packages +.cask/ +dist/ + +# Flycheck +flycheck_*.el + +# server auth directory +/server/ + +# projectiles files +.projectile + +# directory configuration +.dir-locals.el + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +### Python Patch ### +.venv/ + +### Python.VirtualEnv Stack ### +# Virtualenv +# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/ +[Bb]in +[Ii]nclude +[Ll]ib +[Ll]ib64 +[Ll]ocal +[Ss]cripts +pyvenv.cfg +pip-selfcheck.json + + +# End of https://www.gitignore.io/api/python,emacs diff --git a/hypothesis/__init__.py b/hypothesis/__init__.py new file mode 100644 index 0000000..e69de29 From fd2715ab74630b0d8de63bee21336d55aff2c0b3 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Mon, 6 Aug 2018 19:56:31 +0100 Subject: [PATCH 2/9] initial --- hypothesis/__init__.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/hypothesis/__init__.py b/hypothesis/__init__.py index e69de29..9bb17f2 100644 --- a/hypothesis/__init__.py +++ b/hypothesis/__init__.py @@ -0,0 +1,33 @@ +from functools import lru_cache +from kython import listdir_abs, json_load, JSONType +from typing import Dict, List, NamedTuple +from pytz import UTC +from datetime import datetime +import os + +# TODO maybe, it should generate some kind of html snippet? + + +_PATH = '/L/backups/hypothesis/' + +class Hypothesis(NamedTuple): + dt: datetime + text: str + tag: str + +# TODO guarantee order? +def _iter(): + last = max(listdir_abs(_PATH)) + j: JSONType + with open(last, 'r') as fo: + j = json_load(fo) + for i in j: + dts = i['created'] + title = ' '.join(i['document']['title']) + dt = datetime.strptime(dts[:-3] + dts[-2:], '%Y-%m-%dT%H:%M:%S.%f%z') + yield Hypothesis(dt, title, 'hyp') + + +@lru_cache() +def get_entries(): + return list(_iter()) From 942c55fe164d1ac86621b22246badc48b9fdb03d Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Fri, 12 Oct 2018 19:54:51 +0100 Subject: [PATCH 3/9] extract highlighted thing --- hypothesis/__init__.py | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/hypothesis/__init__.py b/hypothesis/__init__.py index 9bb17f2..2aa43e6 100644 --- a/hypothesis/__init__.py +++ b/hypothesis/__init__.py @@ -5,15 +5,14 @@ from pytz import UTC from datetime import datetime import os -# TODO maybe, it should generate some kind of html snippet? - - _PATH = '/L/backups/hypothesis/' -class Hypothesis(NamedTuple): +class Entry(NamedTuple): dt: datetime - text: str - tag: str + summary: str + content: str + link: str + eid: str # TODO guarantee order? def _iter(): @@ -24,10 +23,26 @@ def _iter(): for i in j: dts = i['created'] title = ' '.join(i['document']['title']) + selectors = i['target'][0].get('selector', None) + if selectors is None: + # TODO warn?... + selectors = [] + content = None + for s in selectors: + if 'exact' in s: + content = s['exact'] + break + eid = i['id'] + link = i['uri'] dt = datetime.strptime(dts[:-3] + dts[-2:], '%Y-%m-%dT%H:%M:%S.%f%z') - yield Hypothesis(dt, title, 'hyp') + yield Entry( + dt, + title, + content, + link, + eid, + ) -@lru_cache() def get_entries(): return list(_iter()) From 6b74de5493013b4d30bc72c2adeb5b459ec3154e Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Wed, 6 Feb 2019 23:09:19 +0000 Subject: [PATCH 4/9] extract todos, add more stuff --- hypothesis/__init__.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/hypothesis/__init__.py b/hypothesis/__init__.py index 2aa43e6..e5d6ab4 100644 --- a/hypothesis/__init__.py +++ b/hypothesis/__init__.py @@ -1,6 +1,6 @@ from functools import lru_cache from kython import listdir_abs, json_load, JSONType -from typing import Dict, List, NamedTuple +from typing import Dict, List, NamedTuple, Optional from pytz import UTC from datetime import datetime import os @@ -13,6 +13,9 @@ class Entry(NamedTuple): content: str link: str eid: str + annotation: Optional[str] + context: str + tags: List[str] # TODO guarantee order? def _iter(): @@ -35,14 +38,29 @@ def _iter(): eid = i['id'] link = i['uri'] dt = datetime.strptime(dts[:-3] + dts[-2:], '%Y-%m-%dT%H:%M:%S.%f%z') + txt = i['text'] + annotation = None if len(txt.strip()) == 0 else txt + context = i['links']['incontext'] yield Entry( dt, title, content, link, eid, + annotation=annotation, + context=context, + tags=i['tags'], ) def get_entries(): return list(_iter()) + +def get_todos(): + def is_todo(e: Entry) -> bool: + if any(t.lower() == 'todo' for t in e.tags): + return True + if e.annotation is None: + return False + return e.annotation.lstrip().lower().startswith('todo') + return list(filter(is_todo, get_entries())) From c5c71cf7f3f96e08732125f5997c7726d4358c89 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Tue, 12 Mar 2019 12:06:55 +0000 Subject: [PATCH 5/9] make ruci happy --- hypothesis/__init__.py | 57 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 51 insertions(+), 6 deletions(-) diff --git a/hypothesis/__init__.py b/hypothesis/__init__.py index e5d6ab4..d172b3a 100644 --- a/hypothesis/__init__.py +++ b/hypothesis/__init__.py @@ -1,28 +1,54 @@ from functools import lru_cache -from kython import listdir_abs, json_load, JSONType +from kython import listdir_abs from typing import Dict, List, NamedTuple, Optional +from pathlib import Path +import json from pytz import UTC from datetime import datetime import os +from kython import group_by_key +from kython.misc import the + _PATH = '/L/backups/hypothesis/' class Entry(NamedTuple): dt: datetime summary: str - content: str + content: Optional[str] # might be none if for instance we just marked page with tags. not sure if we want to handle it somehow separately link: str eid: str annotation: Optional[str] context: str tags: List[str] +Url = str + +class Page(NamedTuple): + highlights: List[Entry] + + @property + # @lru_cache() + def url(self): + return the(h.url for h in self.highlights) + + @property + # @lru_cache() + def title(self): + return the(h.title for h in self.highlights) + + @property + # @lru_cache() + # TODO shit. can't be cached because of self, wtf??? how to get around it?? + def dt(self): + return min(h.dt for h in self.highlights) + + # TODO guarantee order? def _iter(): last = max(listdir_abs(_PATH)) - j: JSONType - with open(last, 'r') as fo: - j = json_load(fo) + with Path(last).open() as fo: + j = json.load(fo) for i in j: dts = i['created'] title = ' '.join(i['document']['title']) @@ -30,7 +56,7 @@ def _iter(): if selectors is None: # TODO warn?... selectors = [] - content = None + content: Optional[str] for s in selectors: if 'exact' in s: content = s['exact'] @@ -53,9 +79,21 @@ def _iter(): ) +def get_pages() -> List[Page]: + grouped = group_by_key(_iter(), key=lambda e: e.link) + pages = [] + for link, group in grouped.items(): + group = list(sorted(group, key=lambda e: e.dt)) + pages.append(Page(highlights=group)) + pages = list(sorted(pages, key=lambda p: p.dt)) + # TODO fixme page tag?? + return pages + + def get_entries(): return list(_iter()) + def get_todos(): def is_todo(e: Entry) -> bool: if any(t.lower() == 'todo' for t in e.tags): @@ -64,3 +102,10 @@ def get_todos(): return False return e.annotation.lstrip().lower().startswith('todo') return list(filter(is_todo, get_entries())) + +def _main(): + for page in get_pages(): + print(page) + +if __name__ == '__main__': + _main() From 63fd835386693b2a28da51de439ca1753b5bed8b Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Wed, 13 Mar 2019 23:38:39 +0000 Subject: [PATCH 6/9] better support for pages; hashable, hypothesis link --- hypothesis/__init__.py | 51 ++++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/hypothesis/__init__.py b/hypothesis/__init__.py index d172b3a..774298c 100644 --- a/hypothesis/__init__.py +++ b/hypothesis/__init__.py @@ -1,6 +1,6 @@ from functools import lru_cache from kython import listdir_abs -from typing import Dict, List, NamedTuple, Optional +from typing import Dict, List, NamedTuple, Optional, Sequence from pathlib import Path import json from pytz import UTC @@ -12,34 +12,36 @@ from kython.misc import the _PATH = '/L/backups/hypothesis/' +Url = str + class Entry(NamedTuple): dt: datetime summary: str content: Optional[str] # might be none if for instance we just marked page with tags. not sure if we want to handle it somehow separately - link: str + link: Url eid: str annotation: Optional[str] context: str - tags: List[str] + tags: Sequence[str] + hyp_link: str -Url = str + +# TODO kython?? +cache = lru_cache() +cproperty = lambda f: property(cache(f)) class Page(NamedTuple): - highlights: List[Entry] + highlights: Sequence[Entry] - @property - # @lru_cache() - def url(self): - return the(h.url for h in self.highlights) + @cproperty + def link(self): + return the(h.link for h in self.highlights) - @property - # @lru_cache() + @cproperty def title(self): - return the(h.title for h in self.highlights) + return the(h.summary for h in self.highlights) - @property - # @lru_cache() - # TODO shit. can't be cached because of self, wtf??? how to get around it?? + @cproperty def dt(self): return min(h.dt for h in self.highlights) @@ -68,14 +70,15 @@ def _iter(): annotation = None if len(txt.strip()) == 0 else txt context = i['links']['incontext'] yield Entry( - dt, - title, - content, - link, - eid, + dt=dt, + summary=title, + content=content, + link=link, + eid=eid, annotation=annotation, - context=context, - tags=i['tags'], + context=context, # TODO FIXME is context used anywhere? + tags=tuple(i['tags']), + hyp_link=context, ) @@ -83,8 +86,8 @@ def get_pages() -> List[Page]: grouped = group_by_key(_iter(), key=lambda e: e.link) pages = [] for link, group in grouped.items(): - group = list(sorted(group, key=lambda e: e.dt)) - pages.append(Page(highlights=group)) + sgroup = tuple(sorted(group, key=lambda e: e.dt)) + pages.append(Page(highlights=sgroup)) pages = list(sorted(pages, key=lambda p: p.dt)) # TODO fixme page tag?? return pages From 9f688cb50ae7a8ddbfcef710dc71e246c9c2021c Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Thu, 14 Mar 2019 20:38:34 +0000 Subject: [PATCH 7/9] use cproperty from kython --- hypothesis/__init__.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/hypothesis/__init__.py b/hypothesis/__init__.py index 774298c..f5e119b 100644 --- a/hypothesis/__init__.py +++ b/hypothesis/__init__.py @@ -7,8 +7,8 @@ from pytz import UTC from datetime import datetime import os -from kython import group_by_key -from kython.misc import the +from kython import group_by_key, the, cproperty + _PATH = '/L/backups/hypothesis/' @@ -26,10 +26,6 @@ class Entry(NamedTuple): hyp_link: str -# TODO kython?? -cache = lru_cache() -cproperty = lambda f: property(cache(f)) - class Page(NamedTuple): highlights: Sequence[Entry] @@ -106,6 +102,13 @@ def get_todos(): return e.annotation.lstrip().lower().startswith('todo') return list(filter(is_todo, get_entries())) + +def test(): + get_pages() + get_todos() + get_entries() + + def _main(): for page in get_pages(): print(page) From ad19c3c7dc9ff29cb6849e0c3bcfc08b40c976db Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Mon, 6 May 2019 21:41:45 +0000 Subject: [PATCH 8/9] fix None error --- hypothesis/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hypothesis/__init__.py b/hypothesis/__init__.py index f5e119b..47c0656 100644 --- a/hypothesis/__init__.py +++ b/hypothesis/__init__.py @@ -54,7 +54,7 @@ def _iter(): if selectors is None: # TODO warn?... selectors = [] - content: Optional[str] + content: Optional[str] = None for s in selectors: if 'exact' in s: content = s['exact'] From 4590f9c1393f9a9de3f66d836e05743177cb4830 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Sat, 11 May 2019 20:47:07 +0100 Subject: [PATCH 9/9] title property --- hypothesis/__init__.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/hypothesis/__init__.py b/hypothesis/__init__.py index 47c0656..a30fa29 100644 --- a/hypothesis/__init__.py +++ b/hypothesis/__init__.py @@ -21,9 +21,13 @@ class Entry(NamedTuple): link: Url eid: str annotation: Optional[str] - context: str + context: Url tags: Sequence[str] - hyp_link: str + hyp_link: Url + + @property + def title(self): + return self.summary # TODO eh, remove one of them?... class Page(NamedTuple):