browser: parse browser history using browserexport (#216)
* browser: parse browser history using browserexport
from seanbreckenridge/HPI module:
1fba8ccf2f/my/browser/export.py
This commit is contained in:
parent
059c4ae791
commit
9e5cd60ff2
7 changed files with 198 additions and 24 deletions
50
my/browser/active_browser.py
Normal file
50
my/browser/active_browser.py
Normal file
|
@ -0,0 +1,50 @@
|
|||
"""
|
||||
Parses active browser history by backing it up with [[http://github.com/seanbreckenridge/sqlite_backup][sqlite_backup]]
|
||||
"""
|
||||
|
||||
REQUIRES = ["browserexport", "sqlite_backup"]
|
||||
|
||||
|
||||
from my.config import browser as user_config
|
||||
from my.core import Paths, dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class config(user_config.active_browser):
|
||||
# paths to sqlite database files which you use actively
|
||||
# to read from. For example:
|
||||
# from browserexport.browsers.all import Firefox
|
||||
# active_databases = Firefox.locate_database()
|
||||
export_path: Paths
|
||||
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Sequence, Iterator
|
||||
|
||||
from my.core import get_files, Stats
|
||||
from browserexport.merge import read_visits, Visit
|
||||
from sqlite_backup import sqlite_backup
|
||||
|
||||
from .common import _patch_browserexport_logs
|
||||
|
||||
_patch_browserexport_logs()
|
||||
|
||||
|
||||
def inputs() -> Sequence[Path]:
|
||||
return get_files(config.export_path)
|
||||
|
||||
|
||||
def history() -> Iterator[Visit]:
|
||||
for ad in inputs():
|
||||
conn = sqlite_backup(ad)
|
||||
assert conn is not None
|
||||
try:
|
||||
yield from read_visits(conn)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def stats() -> Stats:
|
||||
from my.core import stat
|
||||
|
||||
return {**stat(history)}
|
35
my/browser/all.py
Normal file
35
my/browser/all.py
Normal file
|
@ -0,0 +1,35 @@
|
|||
from typing import Iterator
|
||||
|
||||
from my.core import Stats
|
||||
from my.core.source import import_source
|
||||
from browserexport.merge import merge_visits, Visit
|
||||
|
||||
|
||||
src_export = import_source(module_name="my.browser.export")
|
||||
src_active = import_source(module_name="my.browser.active_browser")
|
||||
|
||||
|
||||
@src_export
|
||||
def _visits_export() -> Iterator[Visit]:
|
||||
from . import export
|
||||
return export.history()
|
||||
|
||||
|
||||
@src_active
|
||||
def _visits_active() -> Iterator[Visit]:
|
||||
from . import active_browser
|
||||
return active_browser.history()
|
||||
|
||||
|
||||
# NOTE: you can comment out the sources you don't need
|
||||
def history() -> Iterator[Visit]:
|
||||
yield from merge_visits([
|
||||
_visits_active(),
|
||||
_visits_export(),
|
||||
])
|
||||
|
||||
|
||||
def stats() -> Stats:
|
||||
from my.core import stat
|
||||
|
||||
return {**stat(history)}
|
11
my/browser/common.py
Normal file
11
my/browser/common.py
Normal file
|
@ -0,0 +1,11 @@
|
|||
import os
|
||||
from my.core.util import __NOT_HPI_MODULE__
|
||||
|
||||
|
||||
def _patch_browserexport_logs():
|
||||
# patch browserexport logs if HPI_LOGS is present
|
||||
if "HPI_LOGS" in os.environ:
|
||||
from browserexport.log import setup as setup_browserexport_logger
|
||||
from my.core.logging import mklevel
|
||||
|
||||
setup_browserexport_logger(mklevel(os.environ["HPI_LOGS"]))
|
50
my/browser/export.py
Normal file
50
my/browser/export.py
Normal file
|
@ -0,0 +1,50 @@
|
|||
"""
|
||||
Parses browser history using [[http://github.com/seanbreckenridge/browserexport][browserexport]]
|
||||
"""
|
||||
|
||||
REQUIRES = ["browserexport"]
|
||||
|
||||
from my.config import browser as user_config
|
||||
from my.core import Paths, dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class config(user_config.export):
|
||||
# path[s]/glob to your backed up browser history sqlite files
|
||||
export_path: Paths
|
||||
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Iterator, Sequence, List
|
||||
|
||||
from my.core import Stats, get_files, LazyLogger
|
||||
from my.core.common import mcachew
|
||||
|
||||
from browserexport.merge import read_and_merge, Visit
|
||||
|
||||
from .common import _patch_browserexport_logs
|
||||
|
||||
|
||||
logger = LazyLogger(__name__, level="warning")
|
||||
|
||||
_patch_browserexport_logs()
|
||||
|
||||
|
||||
# all of my backed up databases
|
||||
def inputs() -> Sequence[Path]:
|
||||
return get_files(config.export_path)
|
||||
|
||||
|
||||
def _cachew_depends_on() -> List[str]:
|
||||
return [str(f) for f in inputs()]
|
||||
|
||||
|
||||
@mcachew(depends_on=_cachew_depends_on, logger=logger)
|
||||
def history() -> Iterator[Visit]:
|
||||
yield from read_and_merge(inputs())
|
||||
|
||||
|
||||
def stats() -> Stats:
|
||||
from my.core import stat
|
||||
|
||||
return {**stat(history)}
|
|
@ -129,3 +129,9 @@ class fbmessenger:
|
|||
class twitter:
|
||||
class talon:
|
||||
export_path: Paths
|
||||
|
||||
class browser:
|
||||
class export:
|
||||
export_path: Paths = ''
|
||||
class active_browser:
|
||||
export_path: Paths = ''
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue