From 1ecb42c37aca4fd9ebc471ffcf330fa94985f39b Mon Sep 17 00:00:00 2001 From: Sean Breckenridge Date: Sun, 13 Feb 2022 15:04:39 -0800 Subject: [PATCH] split into active_browser and add all.py --- doc/MODULES.org | 10 +++--- my/browser/active_browser.py | 52 +++++++++++++++++++++++++++++++ my/browser/all.py | 35 +++++++++++++++++++++ my/browser/common.py | 11 +++++++ my/browser/export.py | 60 ++++++------------------------------ my/config.py | 3 +- 6 files changed, 115 insertions(+), 56 deletions(-) create mode 100644 my/browser/active_browser.py create mode 100644 my/browser/all.py create mode 100644 my/browser/common.py diff --git a/doc/MODULES.org b/doc/MODULES.org index 78ecf44..1f31931 100644 --- a/doc/MODULES.org +++ b/doc/MODULES.org @@ -88,7 +88,7 @@ You don't have to set up all modules at once, it's recommended to do it graduall export_path: Paths #+end_src -** [[file:../my/browser/export.py][my.browser.export]] +** [[file:../my/browser/][my.browser]] Parses browser history using [[http://github.com/seanbreckenridge/browserexport][browserexport]] @@ -99,12 +99,12 @@ You don't have to set up all modules at once, it's recommended to do it graduall # path[s]/glob to your backed up browser history sqlite files export_path: Paths - # paths to sqlite database files which you - # use actively, which should be combined into your history - # For example: + class active_browser: + # paths to sqlite database files which you use actively + # to read from. For example: # from browserexport.browsers.all import Firefox # active_databases = Firefox.locate_database() - active_databases: Paths + export_path: Paths #+end_src # TODO hmm. drawer raw means it can output outlines, but then have to manually erase the generated results. ugh. diff --git a/my/browser/active_browser.py b/my/browser/active_browser.py new file mode 100644 index 0000000..38f2657 --- /dev/null +++ b/my/browser/active_browser.py @@ -0,0 +1,52 @@ +""" +Parses active browser history by backing it up with [[http://github.com/seanbreckenridge/sqlite_backup][sqlite_backup]] +""" + +REQUIRES = ["browserexport", "sqlite_backup"] + + +from my.config import browser as user_config +from my.core import Paths, dataclass + + +@dataclass +class config(user_config.active_browser): + # paths to sqlite database files which you use actively + # to read from. For example: + # from browserexport.browsers.all import Firefox + # active_databases = Firefox.locate_database() + export_path: Paths + + +from pathlib import Path +from typing import Sequence, Iterator + +from my.core import get_files, Stats, LazyLogger +from browserexport.merge import read_visits, Visit +from sqlite_backup import sqlite_backup + +from .common import _patch_browserexport_logs + +_patch_browserexport_logs() + +logger = LazyLogger(__name__, level="warning") + + +def inputs() -> Sequence[Path]: + return get_files(config.export_path) + + +def history() -> Iterator[Visit]: + for ad in inputs(): + conn = sqlite_backup(ad) + assert conn is not None + try: + yield from read_visits(conn) + finally: + conn.close() + + +def stats() -> Stats: + from my.core import stat + + return {**stat(history)} diff --git a/my/browser/all.py b/my/browser/all.py new file mode 100644 index 0000000..a7d12b4 --- /dev/null +++ b/my/browser/all.py @@ -0,0 +1,35 @@ +from typing import Iterator + +from my.core import Stats +from my.core.source import import_source +from browserexport.merge import merge_visits, Visit + + +src_export = import_source(module_name="my.browser.export") +src_active = import_source(module_name="my.browser.active_browser") + + +@src_export +def _visits_export() -> Iterator[Visit]: + from . import export + return export.history() + + +@src_active +def _visits_active() -> Iterator[Visit]: + from . import active_browser + return active_browser.history() + + +# NOTE: you can comment out the sources you don't need +def history() -> Iterator[Visit]: + yield from merge_visits([ + _visits_active(), + _visits_export(), + ]) + + +def stats() -> Stats: + from my.core import stat + + return {**stat(history)} diff --git a/my/browser/common.py b/my/browser/common.py new file mode 100644 index 0000000..9427f61 --- /dev/null +++ b/my/browser/common.py @@ -0,0 +1,11 @@ +import os +from my.core.util import __NOT_HPI_MODULE__ + + +def _patch_browserexport_logs(): + # patch browserexport logs if HPI_LOGS is present + if "HPI_LOGS" in os.environ: + from browserexport.log import setup as setup_browserexport_logger + from my.core.logging import mklevel + + setup_browserexport_logger(mklevel(os.environ["HPI_LOGS"])) diff --git a/my/browser/export.py b/my/browser/export.py index f063a81..3185d53 100644 --- a/my/browser/export.py +++ b/my/browser/export.py @@ -13,74 +13,34 @@ class config(user_config.export): # path[s]/glob to your backed up browser history sqlite files export_path: Paths - # paths to sqlite database files which you - # use actively, which should be combined into your history - # For example: - # from browserexport.browsers.all import Firefox - # active_databases = Firefox.locate_database() - active_databases: Paths - -import os from pathlib import Path -from typing import Iterator, List - -from sqlite_backup import sqlite_backup +from typing import Iterator, Sequence, List from my.core import Stats, get_files, LazyLogger from my.core.common import mcachew +from browserexport.merge import read_and_merge, Visit -# patch browserexport logs if HPI_LOGS is present -if "HPI_LOGS" in os.environ: - from browserexport.log import setup as setup_browserexport_logger - from my.core.logging import mklevel - - setup_browserexport_logger(mklevel(os.environ["HPI_LOGS"])) +from .common import _patch_browserexport_logs logger = LazyLogger(__name__, level="warning") - -from browserexport.merge import read_and_merge, merge_visits, Visit -from browserexport.parse import read_visits +_patch_browserexport_logs() # all of my backed up databases -def inputs() -> List[Path]: - return list(get_files(config.export_path)) +def inputs() -> Sequence[Path]: + return get_files(config.export_path) -# return the visits from the active sqlite database, -# copying the active database into memory using -# https://github.com/seanbreckenridge/sqlite_backup -def _active_visits() -> List[Visit]: - visits: List[Visit] = [] - active_dbs = get_files(config.active_databases or "") - logger.debug(f"Reading from active databases: {active_dbs}") - for ad in active_dbs: - conn = sqlite_backup(ad) - assert conn is not None - try: - # read visits, so can close the in-memory connection - visits.extend(list(read_visits(conn))) - finally: - conn.close() - logger.debug(f"Read {len(visits)} visits from active databases") - return visits +def _cachew_depends_on() -> List[str]: + return [str(f) for f in inputs()] -Results = Iterator[Visit] - - -# don't put this behind cachew, since the active history database(s) -# are merged when this is called, whose contents may constantly change -def history() -> Results: - yield from merge_visits([_history_from_backups(), _active_visits()]) - - -@mcachew(depends_on=lambda: sorted(map(str, inputs())), logger=logger) -def _history_from_backups() -> Results: +@mcachew(depends_on=_cachew_depends_on, logger=logger) +def history() -> Iterator[Visit]: yield from read_and_merge(inputs()) diff --git a/my/config.py b/my/config.py index cac9bc6..5bb316f 100644 --- a/my/config.py +++ b/my/config.py @@ -133,4 +133,5 @@ class twitter: class browser: class export: export_path: Paths = '' - active_databases: Paths = '' + class active_browser: + export_path: Paths = ''