diff --git a/.gititnore b/.gititnore new file mode 100644 index 0000000..b539013 --- /dev/null +++ b/.gititnore @@ -0,0 +1,172 @@ + +# Created by https://www.gitignore.io/api/python,emacs + +### Emacs ### +# -*- mode: gitignore; -*- +*~ +\#*\# +/.emacs.desktop +/.emacs.desktop.lock +*.elc +auto-save-list +tramp +.\#* + +# Org-mode +.org-id-locations +*_archive + +# flymake-mode +*_flymake.* + +# eshell files +/eshell/history +/eshell/lastdir + +# elpa packages +/elpa/ + +# reftex files +*.rel + +# AUCTeX auto folder +/auto/ + +# cask packages +.cask/ +dist/ + +# Flycheck +flycheck_*.el + +# server auth directory +/server/ + +# projectiles files +.projectile + +# directory configuration +.dir-locals.el + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +### Python Patch ### +.venv/ + +### Python.VirtualEnv Stack ### +# Virtualenv +# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/ +[Bb]in +[Ii]nclude +[Ll]ib +[Ll]ib64 +[Ll]ocal +[Ss]cripts +pyvenv.cfg +pip-selfcheck.json + + +# End of https://www.gitignore.io/api/python,emacs diff --git a/hypothesis/__init__.py b/hypothesis/__init__.py new file mode 100644 index 0000000..a30fa29 --- /dev/null +++ b/hypothesis/__init__.py @@ -0,0 +1,121 @@ +from functools import lru_cache +from kython import listdir_abs +from typing import Dict, List, NamedTuple, Optional, Sequence +from pathlib import Path +import json +from pytz import UTC +from datetime import datetime +import os + +from kython import group_by_key, the, cproperty + + +_PATH = '/L/backups/hypothesis/' + +Url = str + +class Entry(NamedTuple): + dt: datetime + summary: str + content: Optional[str] # might be none if for instance we just marked page with tags. not sure if we want to handle it somehow separately + link: Url + eid: str + annotation: Optional[str] + context: Url + tags: Sequence[str] + hyp_link: Url + + @property + def title(self): + return self.summary # TODO eh, remove one of them?... + + +class Page(NamedTuple): + highlights: Sequence[Entry] + + @cproperty + def link(self): + return the(h.link for h in self.highlights) + + @cproperty + def title(self): + return the(h.summary for h in self.highlights) + + @cproperty + def dt(self): + return min(h.dt for h in self.highlights) + + +# TODO guarantee order? +def _iter(): + last = max(listdir_abs(_PATH)) + with Path(last).open() as fo: + j = json.load(fo) + for i in j: + dts = i['created'] + title = ' '.join(i['document']['title']) + selectors = i['target'][0].get('selector', None) + if selectors is None: + # TODO warn?... + selectors = [] + content: Optional[str] = None + for s in selectors: + if 'exact' in s: + content = s['exact'] + break + eid = i['id'] + link = i['uri'] + dt = datetime.strptime(dts[:-3] + dts[-2:], '%Y-%m-%dT%H:%M:%S.%f%z') + txt = i['text'] + annotation = None if len(txt.strip()) == 0 else txt + context = i['links']['incontext'] + yield Entry( + dt=dt, + summary=title, + content=content, + link=link, + eid=eid, + annotation=annotation, + context=context, # TODO FIXME is context used anywhere? + tags=tuple(i['tags']), + hyp_link=context, + ) + + +def get_pages() -> List[Page]: + grouped = group_by_key(_iter(), key=lambda e: e.link) + pages = [] + for link, group in grouped.items(): + sgroup = tuple(sorted(group, key=lambda e: e.dt)) + pages.append(Page(highlights=sgroup)) + pages = list(sorted(pages, key=lambda p: p.dt)) + # TODO fixme page tag?? + return pages + + +def get_entries(): + return list(_iter()) + + +def get_todos(): + def is_todo(e: Entry) -> bool: + if any(t.lower() == 'todo' for t in e.tags): + return True + if e.annotation is None: + return False + return e.annotation.lstrip().lower().startswith('todo') + return list(filter(is_todo, get_entries())) + + +def test(): + get_pages() + get_todos() + get_entries() + + +def _main(): + for page in get_pages(): + print(page) + +if __name__ == '__main__': + _main()