split into active_browser and add all.py
This commit is contained in:
parent
865e8e97a5
commit
1ecb42c37a
6 changed files with 115 additions and 56 deletions
|
@ -88,7 +88,7 @@ You don't have to set up all modules at once, it's recommended to do it graduall
|
||||||
export_path: Paths
|
export_path: Paths
|
||||||
|
|
||||||
#+end_src
|
#+end_src
|
||||||
** [[file:../my/browser/export.py][my.browser.export]]
|
** [[file:../my/browser/][my.browser]]
|
||||||
|
|
||||||
Parses browser history using [[http://github.com/seanbreckenridge/browserexport][browserexport]]
|
Parses browser history using [[http://github.com/seanbreckenridge/browserexport][browserexport]]
|
||||||
|
|
||||||
|
@ -99,12 +99,12 @@ You don't have to set up all modules at once, it's recommended to do it graduall
|
||||||
# path[s]/glob to your backed up browser history sqlite files
|
# path[s]/glob to your backed up browser history sqlite files
|
||||||
export_path: Paths
|
export_path: Paths
|
||||||
|
|
||||||
# paths to sqlite database files which you
|
class active_browser:
|
||||||
# use actively, which should be combined into your history
|
# paths to sqlite database files which you use actively
|
||||||
# For example:
|
# to read from. For example:
|
||||||
# from browserexport.browsers.all import Firefox
|
# from browserexport.browsers.all import Firefox
|
||||||
# active_databases = Firefox.locate_database()
|
# active_databases = Firefox.locate_database()
|
||||||
active_databases: Paths
|
export_path: Paths
|
||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
# TODO hmm. drawer raw means it can output outlines, but then have to manually erase the generated results. ugh.
|
# TODO hmm. drawer raw means it can output outlines, but then have to manually erase the generated results. ugh.
|
||||||
|
|
52
my/browser/active_browser.py
Normal file
52
my/browser/active_browser.py
Normal file
|
@ -0,0 +1,52 @@
|
||||||
|
"""
|
||||||
|
Parses active browser history by backing it up with [[http://github.com/seanbreckenridge/sqlite_backup][sqlite_backup]]
|
||||||
|
"""
|
||||||
|
|
||||||
|
REQUIRES = ["browserexport", "sqlite_backup"]
|
||||||
|
|
||||||
|
|
||||||
|
from my.config import browser as user_config
|
||||||
|
from my.core import Paths, dataclass
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class config(user_config.active_browser):
|
||||||
|
# paths to sqlite database files which you use actively
|
||||||
|
# to read from. For example:
|
||||||
|
# from browserexport.browsers.all import Firefox
|
||||||
|
# active_databases = Firefox.locate_database()
|
||||||
|
export_path: Paths
|
||||||
|
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Sequence, Iterator
|
||||||
|
|
||||||
|
from my.core import get_files, Stats, LazyLogger
|
||||||
|
from browserexport.merge import read_visits, Visit
|
||||||
|
from sqlite_backup import sqlite_backup
|
||||||
|
|
||||||
|
from .common import _patch_browserexport_logs
|
||||||
|
|
||||||
|
_patch_browserexport_logs()
|
||||||
|
|
||||||
|
logger = LazyLogger(__name__, level="warning")
|
||||||
|
|
||||||
|
|
||||||
|
def inputs() -> Sequence[Path]:
|
||||||
|
return get_files(config.export_path)
|
||||||
|
|
||||||
|
|
||||||
|
def history() -> Iterator[Visit]:
|
||||||
|
for ad in inputs():
|
||||||
|
conn = sqlite_backup(ad)
|
||||||
|
assert conn is not None
|
||||||
|
try:
|
||||||
|
yield from read_visits(conn)
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def stats() -> Stats:
|
||||||
|
from my.core import stat
|
||||||
|
|
||||||
|
return {**stat(history)}
|
35
my/browser/all.py
Normal file
35
my/browser/all.py
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
from typing import Iterator
|
||||||
|
|
||||||
|
from my.core import Stats
|
||||||
|
from my.core.source import import_source
|
||||||
|
from browserexport.merge import merge_visits, Visit
|
||||||
|
|
||||||
|
|
||||||
|
src_export = import_source(module_name="my.browser.export")
|
||||||
|
src_active = import_source(module_name="my.browser.active_browser")
|
||||||
|
|
||||||
|
|
||||||
|
@src_export
|
||||||
|
def _visits_export() -> Iterator[Visit]:
|
||||||
|
from . import export
|
||||||
|
return export.history()
|
||||||
|
|
||||||
|
|
||||||
|
@src_active
|
||||||
|
def _visits_active() -> Iterator[Visit]:
|
||||||
|
from . import active_browser
|
||||||
|
return active_browser.history()
|
||||||
|
|
||||||
|
|
||||||
|
# NOTE: you can comment out the sources you don't need
|
||||||
|
def history() -> Iterator[Visit]:
|
||||||
|
yield from merge_visits([
|
||||||
|
_visits_active(),
|
||||||
|
_visits_export(),
|
||||||
|
])
|
||||||
|
|
||||||
|
|
||||||
|
def stats() -> Stats:
|
||||||
|
from my.core import stat
|
||||||
|
|
||||||
|
return {**stat(history)}
|
11
my/browser/common.py
Normal file
11
my/browser/common.py
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
import os
|
||||||
|
from my.core.util import __NOT_HPI_MODULE__
|
||||||
|
|
||||||
|
|
||||||
|
def _patch_browserexport_logs():
|
||||||
|
# patch browserexport logs if HPI_LOGS is present
|
||||||
|
if "HPI_LOGS" in os.environ:
|
||||||
|
from browserexport.log import setup as setup_browserexport_logger
|
||||||
|
from my.core.logging import mklevel
|
||||||
|
|
||||||
|
setup_browserexport_logger(mklevel(os.environ["HPI_LOGS"]))
|
|
@ -13,74 +13,34 @@ class config(user_config.export):
|
||||||
# path[s]/glob to your backed up browser history sqlite files
|
# path[s]/glob to your backed up browser history sqlite files
|
||||||
export_path: Paths
|
export_path: Paths
|
||||||
|
|
||||||
# paths to sqlite database files which you
|
|
||||||
# use actively, which should be combined into your history
|
|
||||||
# For example:
|
|
||||||
# from browserexport.browsers.all import Firefox
|
|
||||||
# active_databases = Firefox.locate_database()
|
|
||||||
active_databases: Paths
|
|
||||||
|
|
||||||
|
|
||||||
import os
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Iterator, List
|
from typing import Iterator, Sequence, List
|
||||||
|
|
||||||
from sqlite_backup import sqlite_backup
|
|
||||||
|
|
||||||
from my.core import Stats, get_files, LazyLogger
|
from my.core import Stats, get_files, LazyLogger
|
||||||
from my.core.common import mcachew
|
from my.core.common import mcachew
|
||||||
|
|
||||||
|
from browserexport.merge import read_and_merge, Visit
|
||||||
|
|
||||||
# patch browserexport logs if HPI_LOGS is present
|
from .common import _patch_browserexport_logs
|
||||||
if "HPI_LOGS" in os.environ:
|
|
||||||
from browserexport.log import setup as setup_browserexport_logger
|
|
||||||
from my.core.logging import mklevel
|
|
||||||
|
|
||||||
setup_browserexport_logger(mklevel(os.environ["HPI_LOGS"]))
|
|
||||||
|
|
||||||
|
|
||||||
logger = LazyLogger(__name__, level="warning")
|
logger = LazyLogger(__name__, level="warning")
|
||||||
|
|
||||||
|
_patch_browserexport_logs()
|
||||||
from browserexport.merge import read_and_merge, merge_visits, Visit
|
|
||||||
from browserexport.parse import read_visits
|
|
||||||
|
|
||||||
|
|
||||||
# all of my backed up databases
|
# all of my backed up databases
|
||||||
def inputs() -> List[Path]:
|
def inputs() -> Sequence[Path]:
|
||||||
return list(get_files(config.export_path))
|
return get_files(config.export_path)
|
||||||
|
|
||||||
|
|
||||||
# return the visits from the active sqlite database,
|
def _cachew_depends_on() -> List[str]:
|
||||||
# copying the active database into memory using
|
return [str(f) for f in inputs()]
|
||||||
# https://github.com/seanbreckenridge/sqlite_backup
|
|
||||||
def _active_visits() -> List[Visit]:
|
|
||||||
visits: List[Visit] = []
|
|
||||||
active_dbs = get_files(config.active_databases or "")
|
|
||||||
logger.debug(f"Reading from active databases: {active_dbs}")
|
|
||||||
for ad in active_dbs:
|
|
||||||
conn = sqlite_backup(ad)
|
|
||||||
assert conn is not None
|
|
||||||
try:
|
|
||||||
# read visits, so can close the in-memory connection
|
|
||||||
visits.extend(list(read_visits(conn)))
|
|
||||||
finally:
|
|
||||||
conn.close()
|
|
||||||
logger.debug(f"Read {len(visits)} visits from active databases")
|
|
||||||
return visits
|
|
||||||
|
|
||||||
|
|
||||||
Results = Iterator[Visit]
|
@mcachew(depends_on=_cachew_depends_on, logger=logger)
|
||||||
|
def history() -> Iterator[Visit]:
|
||||||
|
|
||||||
# don't put this behind cachew, since the active history database(s)
|
|
||||||
# are merged when this is called, whose contents may constantly change
|
|
||||||
def history() -> Results:
|
|
||||||
yield from merge_visits([_history_from_backups(), _active_visits()])
|
|
||||||
|
|
||||||
|
|
||||||
@mcachew(depends_on=lambda: sorted(map(str, inputs())), logger=logger)
|
|
||||||
def _history_from_backups() -> Results:
|
|
||||||
yield from read_and_merge(inputs())
|
yield from read_and_merge(inputs())
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -133,4 +133,5 @@ class twitter:
|
||||||
class browser:
|
class browser:
|
||||||
class export:
|
class export:
|
||||||
export_path: Paths = ''
|
export_path: Paths = ''
|
||||||
active_databases: Paths = ''
|
class active_browser:
|
||||||
|
export_path: Paths = ''
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue