split into active_browser and add all.py

This commit is contained in:
Sean Breckenridge 2022-02-13 15:04:39 -08:00
parent 865e8e97a5
commit 1ecb42c37a
6 changed files with 115 additions and 56 deletions

View file

@ -13,74 +13,34 @@ class config(user_config.export):
# path[s]/glob to your backed up browser history sqlite files
export_path: Paths
# paths to sqlite database files which you
# use actively, which should be combined into your history
# For example:
# from browserexport.browsers.all import Firefox
# active_databases = Firefox.locate_database()
active_databases: Paths
import os
from pathlib import Path
from typing import Iterator, List
from sqlite_backup import sqlite_backup
from typing import Iterator, Sequence, List
from my.core import Stats, get_files, LazyLogger
from my.core.common import mcachew
from browserexport.merge import read_and_merge, Visit
# patch browserexport logs if HPI_LOGS is present
if "HPI_LOGS" in os.environ:
from browserexport.log import setup as setup_browserexport_logger
from my.core.logging import mklevel
setup_browserexport_logger(mklevel(os.environ["HPI_LOGS"]))
from .common import _patch_browserexport_logs
logger = LazyLogger(__name__, level="warning")
from browserexport.merge import read_and_merge, merge_visits, Visit
from browserexport.parse import read_visits
_patch_browserexport_logs()
# all of my backed up databases
def inputs() -> List[Path]:
return list(get_files(config.export_path))
def inputs() -> Sequence[Path]:
return get_files(config.export_path)
# return the visits from the active sqlite database,
# copying the active database into memory using
# https://github.com/seanbreckenridge/sqlite_backup
def _active_visits() -> List[Visit]:
visits: List[Visit] = []
active_dbs = get_files(config.active_databases or "")
logger.debug(f"Reading from active databases: {active_dbs}")
for ad in active_dbs:
conn = sqlite_backup(ad)
assert conn is not None
try:
# read visits, so can close the in-memory connection
visits.extend(list(read_visits(conn)))
finally:
conn.close()
logger.debug(f"Read {len(visits)} visits from active databases")
return visits
def _cachew_depends_on() -> List[str]:
return [str(f) for f in inputs()]
Results = Iterator[Visit]
# don't put this behind cachew, since the active history database(s)
# are merged when this is called, whose contents may constantly change
def history() -> Results:
yield from merge_visits([_history_from_backups(), _active_visits()])
@mcachew(depends_on=lambda: sorted(map(str, inputs())), logger=logger)
def _history_from_backups() -> Results:
@mcachew(depends_on=_cachew_depends_on, logger=logger)
def history() -> Iterator[Visit]:
yield from read_and_merge(inputs())