general: migrate modules to use 3.9 features

This commit is contained in:
Dima Gerasimov 2024-10-19 22:10:40 +01:00 committed by karlicoss
parent d3f9a8e8b6
commit 8496d131e7
125 changed files with 889 additions and 739 deletions

View file

@ -2,20 +2,22 @@
[[https://github.com/nomeata/arbtt#arbtt-the-automatic-rule-based-time-tracker][Arbtt]] time tracking
'''
from __future__ import annotations
REQUIRES = ['ijson', 'cffi']
# NOTE likely also needs libyajl2 from apt or elsewhere?
from collections.abc import Iterable, Sequence
from dataclasses import dataclass
from pathlib import Path
from typing import Sequence, Iterable, List, Optional
def inputs() -> Sequence[Path]:
try:
from my.config import arbtt as user_config
except ImportError:
from .core.warnings import low
from my.core.warnings import low
low("Couldn't find 'arbtt' config section, falling back to the default capture.log (usually in HOME dir). Add 'arbtt' section with logfiles = '' to suppress this warning.")
return []
else:
@ -55,7 +57,7 @@ class Entry:
return fromisoformat(ds)
@property
def active(self) -> Optional[str]:
def active(self) -> str | None:
# NOTE: WIP, might change this in the future...
ait = (w for w in self.json['windows'] if w['active'])
a = next(ait, None)
@ -74,17 +76,18 @@ class Entry:
def entries() -> Iterable[Entry]:
inps = list(inputs())
base: List[PathIsh] = ['arbtt-dump', '--format=json']
base: list[PathIsh] = ['arbtt-dump', '--format=json']
cmds: List[List[PathIsh]]
cmds: list[list[PathIsh]]
if len(inps) == 0:
cmds = [base] # rely on default
else:
# otherwise, 'merge' them
cmds = [[*base, '--logfile', f] for f in inps]
from subprocess import PIPE, Popen
import ijson.backends.yajl2_cffi as ijson # type: ignore
from subprocess import Popen, PIPE
for cmd in cmds:
with Popen(cmd, stdout=PIPE) as p:
out = p.stdout; assert out is not None
@ -93,8 +96,8 @@ def entries() -> Iterable[Entry]:
def fill_influxdb() -> None:
from .core.influxdb import magic_fill
from .core.freezer import Freezer
from .core.influxdb import magic_fill
freezer = Freezer(Entry)
fit = (freezer.freeze(e) for e in entries())
# TODO crap, influxdb doesn't like None https://github.com/influxdata/influxdb/issues/7722
@ -106,6 +109,8 @@ def fill_influxdb() -> None:
magic_fill(fit, name=f'{entries.__module__}:{entries.__name__}')
from .core import stat, Stats
from .core import Stats, stat
def stats() -> Stats:
return stat(entries)

View file

@ -2,14 +2,17 @@
[[https://bluemaestro.com/products/product-details/bluetooth-environmental-monitor-and-logger][Bluemaestro]] temperature/humidity/pressure monitor
"""
from __future__ import annotations
# todo most of it belongs to DAL... but considering so few people use it I didn't bother for now
import re
import sqlite3
from abc import abstractmethod
from collections.abc import Iterable, Sequence
from dataclasses import dataclass
from datetime import datetime, timedelta
from pathlib import Path
from typing import Iterable, Optional, Protocol, Sequence, Set
from typing import Protocol
import pytz
@ -87,17 +90,17 @@ def measurements() -> Iterable[Res[Measurement]]:
total = len(paths)
width = len(str(total))
last: Optional[datetime] = None
last: datetime | None = None
# tables are immutable, so can save on processing..
processed_tables: Set[str] = set()
processed_tables: set[str] = set()
for idx, path in enumerate(paths):
logger.info(f'processing [{idx:>{width}}/{total:>{width}}] {path}')
tot = 0
new = 0
# todo assert increasing timestamp?
with sqlite_connect_immutable(path) as db:
db_dt: Optional[datetime] = None
db_dt: datetime | None = None
try:
datas = db.execute(
f'SELECT "{path.name}" as name, Time, Temperature, Humidity, Pressure, Dewpoint FROM data ORDER BY log_index'

View file

@ -2,41 +2,42 @@
Blood tracking (manual org-mode entries)
"""
from __future__ import annotations
from collections.abc import Iterable
from datetime import datetime
from typing import Iterable, NamedTuple, Optional
from typing import NamedTuple
from ..core.error import Res
from ..core.orgmode import parse_org_datetime, one_table
import pandas as pd
import orgparse
import pandas as pd
from my.config import blood as config # type: ignore[attr-defined]
from ..core.error import Res
from ..core.orgmode import one_table, parse_org_datetime
class Entry(NamedTuple):
dt: datetime
ketones : Optional[float]=None
glucose : Optional[float]=None
ketones : float | None=None
glucose : float | None=None
vitamin_d : Optional[float]=None
vitamin_b12 : Optional[float]=None
vitamin_d : float | None=None
vitamin_b12 : float | None=None
hdl : Optional[float]=None
ldl : Optional[float]=None
triglycerides: Optional[float]=None
hdl : float | None=None
ldl : float | None=None
triglycerides: float | None=None
source : Optional[str]=None
extra : Optional[str]=None
source : str | None=None
extra : str | None=None
Result = Res[Entry]
def try_float(s: str) -> Optional[float]:
def try_float(s: str) -> float | None:
l = s.split()
if len(l) == 0:
return None
@ -105,6 +106,7 @@ def blood_tests_data() -> Iterable[Result]:
def data() -> Iterable[Result]:
from itertools import chain
from ..core.error import sort_res_by
datas = chain(glucose_ketones_data(), blood_tests_data())
return sort_res_by(datas, key=lambda e: e.dt)

View file

@ -7,10 +7,10 @@ from ...core.pandas import DataFrameT, check_dataframe
@check_dataframe
def dataframe() -> DataFrameT:
# this should be somehow more flexible...
import pandas as pd
from ...endomondo import dataframe as EDF
from ...runnerup import dataframe as RDF
import pandas as pd
return pd.concat([
EDF(),
RDF(),

View file

@ -3,7 +3,6 @@ Cardio data, filtered from various data sources
'''
from ...core.pandas import DataFrameT, check_dataframe
CARDIO = {
'Running',
'Running, treadmill',

View file

@ -5,16 +5,18 @@ This is probably too specific to my needs, so later I will move it away to a per
For now it's worth keeping it here as an example and perhaps utility functions might be useful for other HPI modules.
'''
from datetime import datetime, timedelta
from typing import Optional
from __future__ import annotations
from ...core.pandas import DataFrameT, check_dataframe as cdf
from ...core.orgmode import collect, Table, parse_org_datetime, TypedTable
from datetime import datetime, timedelta
import pytz
from my.config import exercise as config
from ...core.orgmode import Table, TypedTable, collect, parse_org_datetime
from ...core.pandas import DataFrameT
from ...core.pandas import check_dataframe as cdf
import pytz
# FIXME how to attach it properly?
tz = pytz.timezone('Europe/London')
@ -114,7 +116,7 @@ def dataframe() -> DataFrameT:
rows.append(rd) # presumably has an error set
continue
idx: Optional[int]
idx: int | None
close = edf[edf['start_time'].apply(lambda t: pd_date_diff(t, mdate)).abs() < _DELTA]
if len(close) == 0:
idx = None
@ -163,7 +165,9 @@ def dataframe() -> DataFrameT:
# TODO wtf?? where is speed coming from??
from ...core import stat, Stats
from ...core import Stats, stat
def stats() -> Stats:
return stat(cross_trainer_data)

View file

@ -1,5 +1,6 @@
from ...core import stat, Stats
from ...core.pandas import DataFrameT, check_dataframe as cdf
from ...core import Stats, stat
from ...core.pandas import DataFrameT
from ...core.pandas import check_dataframe as cdf
class Combine:

View file

@ -1,7 +1,6 @@
from ... import jawbone
from ... import emfit
from ... import emfit, jawbone
from .common import Combine
_combined = Combine([
jawbone,
emfit,

View file

@ -2,14 +2,14 @@
Weight data (manually logged)
'''
from collections.abc import Iterator
from dataclasses import dataclass
from datetime import datetime
from typing import Any, Iterator
from my.core import make_logger
from my.core.error import Res, extract_error_datetime, set_error_datetime
from typing import Any
from my import orgmode
from my.core import make_logger
from my.core.error import Res, extract_error_datetime, set_error_datetime
config = Any

View file

@ -1,7 +1,6 @@
from ..core import warnings
from my.core import warnings
warnings.high('my.books.kobo is deprecated! Please use my.kobo instead!')
from ..core.util import __NOT_HPI_MODULE__
from ..kobo import * # type: ignore[no-redef]
from my.core.util import __NOT_HPI_MODULE__
from my.kobo import * # type: ignore[no-redef]

View file

@ -19,16 +19,18 @@ class config(user_config.active_browser):
export_path: Paths
from collections.abc import Iterator, Sequence
from pathlib import Path
from typing import Sequence, Iterator
from my.core import get_files, Stats, make_logger
from browserexport.merge import read_visits, Visit
from browserexport.merge import Visit, read_visits
from sqlite_backup import sqlite_backup
from my.core import Stats, get_files, make_logger
logger = make_logger(__name__)
from .common import _patch_browserexport_logs
_patch_browserexport_logs(logger.level)

View file

@ -1,9 +1,9 @@
from typing import Iterator
from collections.abc import Iterator
from browserexport.merge import Visit, merge_visits
from my.core import Stats
from my.core.source import import_source
from browserexport.merge import merge_visits, Visit
src_export = import_source(module_name="my.browser.export")
src_active = import_source(module_name="my.browser.active_browser")

View file

@ -4,11 +4,12 @@ Parses browser history using [[http://github.com/seanbreckenridge/browserexport]
REQUIRES = ["browserexport"]
from collections.abc import Iterator, Sequence
from dataclasses import dataclass
from pathlib import Path
from typing import Iterator, Sequence
import my.config
from browserexport.merge import Visit, read_and_merge
from my.core import (
Paths,
Stats,
@ -18,10 +19,10 @@ from my.core import (
)
from my.core.cachew import mcachew
from browserexport.merge import read_and_merge, Visit
from .common import _patch_browserexport_logs
import my.config # isort: skip
@dataclass
class config(my.config.browser.export):

View file

@ -3,24 +3,24 @@ Bumble data from Android app database (in =/data/data/com.bumble.app/databases/C
"""
from __future__ import annotations
from collections.abc import Iterator, Sequence
from dataclasses import dataclass
from datetime import datetime
from typing import Iterator, Sequence, Optional, Dict
from pathlib import Path
from more_itertools import unique_everseen
from my.config import bumble as user_config
from my.core import Paths, get_files
from my.config import bumble as user_config # isort: skip
from ..core import Paths
@dataclass
class config(user_config.android):
# paths[s]/glob to the exported sqlite databases
export_path: Paths
from ..core import get_files
from pathlib import Path
def inputs() -> Sequence[Path]:
return get_files(config.export_path)
@ -43,22 +43,24 @@ class _BaseMessage:
@dataclass(unsafe_hash=True)
class _Message(_BaseMessage):
conversation_id: str
reply_to_id: Optional[str]
reply_to_id: str | None
@dataclass(unsafe_hash=True)
class Message(_BaseMessage):
person: Person
reply_to: Optional[Message]
reply_to: Message | None
import json
from typing import Union
from ..core import Res
import sqlite3
from ..core.sqlite import sqlite_connect_immutable, select
from typing import Union
from my.core.compat import assert_never
from ..core import Res
from ..core.sqlite import select, sqlite_connect_immutable
EntitiesRes = Res[Union[Person, _Message]]
def _entities() -> Iterator[EntitiesRes]:
@ -120,8 +122,8 @@ _UNKNOWN_PERSON = "UNKNOWN_PERSON"
def messages() -> Iterator[Res[Message]]:
id2person: Dict[str, Person] = {}
id2msg: Dict[str, Message] = {}
id2person: dict[str, Person] = {}
id2msg: dict[str, Message] = {}
for x in unique_everseen(_entities(), key=_key):
if isinstance(x, Exception):
yield x

View file

@ -16,6 +16,7 @@ from my.core.time import zone_to_countrycode
@lru_cache(1)
def _calendar():
from workalendar.registry import registry # type: ignore
# todo switch to using time.tz.main once _get_tz stabilizes?
from ..time.tz import via_location as LTZ
# TODO would be nice to do it dynamically depending on the past timezones...

View file

@ -1,7 +1,6 @@
import my.config as config
from .core import __NOT_HPI_MODULE__
from .core import warnings as W
# still used in Promnesia, maybe in dashboard?

View file

@ -1,13 +1,12 @@
import json
from collections.abc import Iterator, Sequence
from dataclasses import dataclass
from datetime import datetime, timezone
from functools import cached_property
import json
from pathlib import Path
from typing import Dict, Iterator, Sequence
from my.core import get_files, Res, datetime_aware
from my.config import codeforces as config # type: ignore[attr-defined]
from my.core import Res, datetime_aware, get_files
def inputs() -> Sequence[Path]:
@ -39,7 +38,7 @@ class Competition:
class Parser:
def __init__(self, *, inputs: Sequence[Path]) -> None:
self.inputs = inputs
self.contests: Dict[ContestId, Contest] = {}
self.contests: dict[ContestId, Contest] = {}
def _parse_allcontests(self, p: Path) -> Iterator[Contest]:
j = json.loads(p.read_text())

View file

@ -1,29 +1,32 @@
"""
Git commits data for repositories on your filesystem
"""
from __future__ import annotations
REQUIRES = [
'gitpython',
]
import shutil
from pathlib import Path
from datetime import datetime, timezone
from collections.abc import Iterator, Sequence
from dataclasses import dataclass, field
from typing import List, Optional, Iterator, Set, Sequence, cast
from datetime import datetime, timezone
from pathlib import Path
from typing import Optional, cast
from my.core import PathIsh, LazyLogger, make_config
from my.core import LazyLogger, PathIsh, make_config
from my.core.cachew import cache_dir, mcachew
from my.core.warnings import high
from my.config import commits as user_config # isort: skip
from my.config import commits as user_config
@dataclass
class commits_cfg(user_config):
roots: Sequence[PathIsh] = field(default_factory=list)
emails: Optional[Sequence[str]] = None
names: Optional[Sequence[str]] = None
emails: Sequence[str] | None = None
names: Sequence[str] | None = None
# experiment to make it lazy?
@ -40,7 +43,6 @@ def config() -> commits_cfg:
import git
from git.repo.fun import is_git_dir
log = LazyLogger(__name__, level='info')
@ -93,7 +95,7 @@ def _git_root(git_dir: PathIsh) -> Path:
return gd # must be bare
def _repo_commits_aux(gr: git.Repo, rev: str, emitted: Set[str]) -> Iterator[Commit]:
def _repo_commits_aux(gr: git.Repo, rev: str, emitted: set[str]) -> Iterator[Commit]:
# without path might not handle pull heads properly
for c in gr.iter_commits(rev=rev):
if not by_me(c):
@ -120,7 +122,7 @@ def _repo_commits_aux(gr: git.Repo, rev: str, emitted: Set[str]) -> Iterator[Com
def repo_commits(repo: PathIsh):
gr = git.Repo(str(repo))
emitted: Set[str] = set()
emitted: set[str] = set()
for r in gr.references:
yield from _repo_commits_aux(gr=gr, rev=r.path, emitted=emitted)
@ -141,14 +143,14 @@ def canonical_name(repo: Path) -> str:
def _fd_path() -> str:
# todo move it to core
fd_path: Optional[str] = shutil.which("fdfind") or shutil.which("fd-find") or shutil.which("fd")
fd_path: str | None = shutil.which("fdfind") or shutil.which("fd-find") or shutil.which("fd")
if fd_path is None:
high("my.coding.commits requires 'fd' to be installed, See https://github.com/sharkdp/fd#installation")
assert fd_path is not None
return fd_path
def git_repos_in(roots: List[Path]) -> List[Path]:
def git_repos_in(roots: list[Path]) -> list[Path]:
from subprocess import check_output
outputs = check_output([
_fd_path(),
@ -172,7 +174,7 @@ def git_repos_in(roots: List[Path]) -> List[Path]:
return repos
def repos() -> List[Path]:
def repos() -> list[Path]:
return git_repos_in(list(map(Path, config().roots)))
@ -190,7 +192,7 @@ def _repo_depends_on(_repo: Path) -> int:
raise RuntimeError(f"Could not find a FETCH_HEAD/HEAD file in {_repo}")
def _commits(_repos: List[Path]) -> Iterator[Commit]:
def _commits(_repos: list[Path]) -> Iterator[Commit]:
for r in _repos:
yield from _cached_commits(r)

View file

@ -1,6 +1,6 @@
from .core.warnings import high
high("DEPRECATED! Please use my.core.common instead.")
from .core import __NOT_HPI_MODULE__
from .core.common import *

View file

@ -9,17 +9,18 @@ This file is used for:
- mypy: this file provides some type annotations
- for loading the actual user config
'''
from __future__ import annotations
#### NOTE: you won't need this line VVVV in your personal config
from my.core import init # noqa: F401
from my.core import init # noqa: F401 # isort: skip
###
from datetime import tzinfo
from pathlib import Path
from typing import List
from my.core import Paths, PathIsh
from my.core import PathIsh, Paths
class hypothesis:
@ -75,14 +76,16 @@ class google:
takeout_path: Paths = ''
from typing import Sequence, Union, Tuple
from datetime import datetime, date, timedelta
from collections.abc import Sequence
from datetime import date, datetime, timedelta
from typing import Union
DateIsh = Union[datetime, date, str]
LatLon = Tuple[float, float]
LatLon = tuple[float, float]
class location:
# todo ugh, need to think about it... mypy wants the type here to be general, otherwise it can't deduce
# and we can't import the types from the module itself, otherwise would be circular. common module?
home: Union[LatLon, Sequence[Tuple[DateIsh, LatLon]]] = (1.0, -1.0)
home: LatLon | Sequence[tuple[DateIsh, LatLon]] = (1.0, -1.0)
home_accuracy = 30_000.0
class via_ip:
@ -103,6 +106,8 @@ class location:
from typing import Literal
class time:
class tz:
policy: Literal['keep', 'convert', 'throw']
@ -121,10 +126,9 @@ class arbtt:
logfiles: Paths
from typing import Optional
class commits:
emails: Optional[Sequence[str]]
names: Optional[Sequence[str]]
emails: Sequence[str] | None
names: Sequence[str] | None
roots: Sequence[PathIsh]
@ -150,8 +154,8 @@ class tinder:
class instagram:
class android:
export_path: Paths
username: Optional[str]
full_name: Optional[str]
username: str | None
full_name: str | None
class gdpr:
export_path: Paths
@ -169,7 +173,7 @@ class materialistic:
class fbmessenger:
class fbmessengerexport:
export_db: PathIsh
facebook_id: Optional[str]
facebook_id: str | None
class android:
export_path: Paths
@ -247,7 +251,7 @@ class runnerup:
class emfit:
export_path: Path
timezone: tzinfo
excluded_sids: List[str]
excluded_sids: list[str]
class foursquare:
@ -270,7 +274,7 @@ class roamresearch:
class whatsapp:
class android:
export_path: Paths
my_user_id: Optional[str]
my_user_id: str | None
class harmonic:

View file

@ -11,7 +11,7 @@ from collections.abc import Iterator, Sequence
from datetime import datetime
from functools import total_ordering
from pathlib import Path
from typing import IO, Any, Union
from typing import IO, Union
PathIsh = Union[Path, str]

View file

@ -63,7 +63,7 @@ def get_files(
if '*' in gs:
if glob != DEFAULT_GLOB:
warnings.medium(f"{caller()}: treating {gs} as glob path. Explicit glob={glob} argument is ignored!")
paths.extend(map(Path, do_glob(gs)))
paths.extend(map(Path, do_glob(gs))) # noqa: PTH207
elif os.path.isdir(str(src)): # noqa: PTH112
# NOTE: we're using os.path here on purpose instead of src.is_dir
# the reason is is_dir for archives might return True and then
@ -157,7 +157,7 @@ def get_valid_filename(s: str) -> str:
# TODO deprecate and suggest to use one from my.core directly? not sure
from .utils.itertools import unique_everseen
from .utils.itertools import unique_everseen # noqa: F401
### legacy imports, keeping them here for backwards compatibility
## hiding behind TYPE_CHECKING so it works in runtime

View file

@ -1,12 +1,14 @@
'''
Just a demo module for testing and documentation purposes
'''
from __future__ import annotations
import json
from collections.abc import Iterable, Sequence
from dataclasses import dataclass
from datetime import datetime, timezone, tzinfo
from pathlib import Path
from typing import Iterable, Optional, Protocol, Sequence
from typing import Protocol
from my.core import Json, PathIsh, Paths, get_files
@ -20,7 +22,7 @@ class config(Protocol):
# this is to check optional attribute handling
timezone: tzinfo = timezone.utc
external: Optional[PathIsh] = None
external: PathIsh | None = None
@property
def external_module(self):

View file

@ -4,31 +4,34 @@
Consumes data exported by https://github.com/karlicoss/emfitexport
"""
from __future__ import annotations
REQUIRES = [
'git+https://github.com/karlicoss/emfitexport',
]
from contextlib import contextmanager
import dataclasses
from datetime import datetime, time, timedelta
import inspect
from collections.abc import Iterable, Iterator
from contextlib import contextmanager
from datetime import datetime, time, timedelta
from pathlib import Path
from typing import Any, Dict, Iterable, Iterator, List, Optional
from my.core import (
get_files,
stat,
Res,
Stats,
)
from my.core.cachew import cache_dir, mcachew
from my.core.error import set_error_datetime, extract_error_datetime
from my.core.pandas import DataFrameT
from my.config import emfit as config
from typing import Any
import emfitexport.dal as dal
from my.core import (
Res,
Stats,
get_files,
stat,
)
from my.core.cachew import cache_dir, mcachew
from my.core.error import extract_error_datetime, set_error_datetime
from my.core.pandas import DataFrameT
from my.config import emfit as config # isort: skip
Emfit = dal.Emfit
@ -85,7 +88,7 @@ def datas() -> Iterable[Res[Emfit]]:
# TODO should be used for jawbone data as well?
def pre_dataframe() -> Iterable[Res[Emfit]]:
# TODO shit. I need some sort of interrupted sleep detection?
g: List[Emfit] = []
g: list[Emfit] = []
def flush() -> Iterable[Res[Emfit]]:
if len(g) == 0:
@ -112,10 +115,10 @@ def pre_dataframe() -> Iterable[Res[Emfit]]:
def dataframe() -> DataFrameT:
dicts: List[Dict[str, Any]] = []
last: Optional[Emfit] = None
dicts: list[dict[str, Any]] = []
last: Emfit | None = None
for s in pre_dataframe():
d: Dict[str, Any]
d: dict[str, Any]
if isinstance(s, Exception):
edt = extract_error_datetime(s)
d = {
@ -166,11 +169,12 @@ def stats() -> Stats:
@contextmanager
def fake_data(nights: int = 500) -> Iterator:
from my.core.cfg import tmp_config
from tempfile import TemporaryDirectory
import pytz
from my.core.cfg import tmp_config
with TemporaryDirectory() as td:
tdir = Path(td)
gen = dal.FakeData()
@ -187,7 +191,7 @@ def fake_data(nights: int = 500) -> Iterator:
# TODO remove/deprecate it? I think used by timeline
def get_datas() -> List[Emfit]:
def get_datas() -> list[Emfit]:
# todo ugh. run lint properly
return sorted(datas(), key=lambda e: e.start) # type: ignore

View file

@ -7,13 +7,14 @@ REQUIRES = [
]
# todo use ast in setup.py or doctor to extract the corresponding pip packages?
from collections.abc import Iterable, Sequence
from dataclasses import dataclass
from pathlib import Path
from typing import Sequence, Iterable
from my.config import endomondo as user_config
from .core import Paths, get_files
from my.config import endomondo as user_config
@dataclass
class endomondo(user_config):
@ -33,15 +34,17 @@ def inputs() -> Sequence[Path]:
import endoexport.dal as dal
from endoexport.dal import Point, Workout # noqa: F401
from .core import Res
# todo cachew?
def workouts() -> Iterable[Res[Workout]]:
_dal = dal.DAL(inputs())
yield from _dal.workouts()
from .core.pandas import check_dataframe, DataFrameT
from .core.pandas import DataFrameT, check_dataframe
@check_dataframe
def dataframe(*, defensive: bool=True) -> DataFrameT:
@ -75,7 +78,9 @@ def dataframe(*, defensive: bool=True) -> DataFrameT:
return df
from .core import stat, Stats
from .core import Stats, stat
def stats() -> Stats:
return {
# todo pretty print stats?
@ -86,13 +91,16 @@ def stats() -> Stats:
# TODO make sure it's possible to 'advise' functions and override stuff
from collections.abc import Iterator
from contextlib import contextmanager
from typing import Iterator
@contextmanager
def fake_data(count: int=100) -> Iterator:
from my.core.cfg import tmp_config
from tempfile import TemporaryDirectory
import json
from tempfile import TemporaryDirectory
from my.core.cfg import tmp_config
with TemporaryDirectory() as td:
tdir = Path(td)
fd = dal.FakeData()

View file

@ -1,6 +1,6 @@
from .core.warnings import high
high("DEPRECATED! Please use my.core.error instead.")
from .core import __NOT_HPI_MODULE__
from .core.error import *

View file

@ -1,5 +1,6 @@
from collections.abc import Iterator
from dataclasses import dataclass
from typing import Any, Iterator, List, Tuple
from typing import Any
from my.core.compat import NoneType, assert_never
@ -9,7 +10,7 @@ from my.core.compat import NoneType, assert_never
class Helper:
manager: 'Manager'
item: Any # todo realistically, list or dict? could at least type as indexable or something
path: Tuple[str, ...]
path: tuple[str, ...]
def pop_if_primitive(self, *keys: str) -> None:
"""
@ -40,9 +41,9 @@ def is_empty(x) -> bool:
class Manager:
def __init__(self) -> None:
self.helpers: List[Helper] = []
self.helpers: list[Helper] = []
def helper(self, item: Any, *, path: Tuple[str, ...] = ()) -> Helper:
def helper(self, item: Any, *, path: tuple[str, ...] = ()) -> Helper:
res = Helper(manager=self, item=item, path=path)
self.helpers.append(res)
return res

View file

@ -20,6 +20,7 @@ REQUIRES = [
from my.core.hpi_compat import handle_legacy_import
is_legacy_import = handle_legacy_import(
parent_module_name=__name__,
legacy_submodule_name='export',

View file

@ -1,10 +1,10 @@
from typing import Iterator
from my.core import Res, stat, Stats
from collections.abc import Iterator
from my.core import Res, Stats
from my.core.source import import_source
from .common import Message, _merge_messages
src_export = import_source(module_name='my.fbmessenger.export')
src_android = import_source(module_name='my.fbmessenger.android')

View file

@ -4,19 +4,20 @@ Messenger data from Android app database (in =/data/data/com.facebook.orca/datab
from __future__ import annotations
import sqlite3
from collections.abc import Iterator, Sequence
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path
import sqlite3
from typing import Iterator, Sequence, Optional, Dict, Union, List
from typing import Union
from my.core import get_files, Paths, datetime_aware, Res, LazyLogger, make_config
from my.core import LazyLogger, Paths, Res, datetime_aware, get_files, make_config
from my.core.common import unique_everseen
from my.core.compat import assert_never
from my.core.error import echain
from my.core.sqlite import sqlite_connection
from my.config import fbmessenger as user_config
from my.config import fbmessenger as user_config # isort: skip
logger = LazyLogger(__name__)
@ -27,7 +28,7 @@ class Config(user_config.android):
# paths[s]/glob to the exported sqlite databases
export_path: Paths
facebook_id: Optional[str] = None
facebook_id: str | None = None
# hmm. this is necessary for default value (= None) to work
@ -42,13 +43,13 @@ def inputs() -> Sequence[Path]:
@dataclass(unsafe_hash=True)
class Sender:
id: str
name: Optional[str]
name: str | None
@dataclass(unsafe_hash=True)
class Thread:
id: str
name: Optional[str] # isn't set for groups or one to one messages
name: str | None # isn't set for groups or one to one messages
# todo not sure about order of fields...
@ -56,14 +57,14 @@ class Thread:
class _BaseMessage:
id: str
dt: datetime_aware
text: Optional[str]
text: str | None
@dataclass(unsafe_hash=True)
class _Message(_BaseMessage):
thread_id: str
sender_id: str
reply_to_id: Optional[str]
reply_to_id: str | None
# todo hmm, on the one hand would be kinda nice to inherit common.Message protocol here
@ -72,7 +73,7 @@ class _Message(_BaseMessage):
class Message(_BaseMessage):
thread: Thread
sender: Sender
reply_to: Optional[Message]
reply_to: Message | None
Entity = Union[Sender, Thread, _Message]
@ -110,7 +111,7 @@ def _normalise_thread_id(key) -> str:
# NOTE: this is sort of copy pasted from other _process_db method
# maybe later could unify them
def _process_db_msys(db: sqlite3.Connection) -> Iterator[Res[Entity]]:
senders: Dict[str, Sender] = {}
senders: dict[str, Sender] = {}
for r in db.execute('SELECT CAST(id AS TEXT) AS id, name FROM contacts'):
s = Sender(
id=r['id'], # looks like it's server id? same used on facebook site
@ -127,7 +128,7 @@ def _process_db_msys(db: sqlite3.Connection) -> Iterator[Res[Entity]]:
# TODO can we get it from db? could infer as the most common id perhaps?
self_id = config.facebook_id
thread_users: Dict[str, List[Sender]] = {}
thread_users: dict[str, list[Sender]] = {}
for r in db.execute('SELECT CAST(thread_key AS TEXT) AS thread_key, CAST(contact_id AS TEXT) AS contact_id FROM participants'):
thread_key = r['thread_key']
user_key = r['contact_id']
@ -193,7 +194,7 @@ def _process_db_msys(db: sqlite3.Connection) -> Iterator[Res[Entity]]:
def _process_db_threads_db2(db: sqlite3.Connection) -> Iterator[Res[Entity]]:
senders: Dict[str, Sender] = {}
senders: dict[str, Sender] = {}
for r in db.execute('''SELECT * FROM thread_users'''):
# for messaging_actor_type == 'REDUCED_MESSAGING_ACTOR', name is None
# but they are still referenced, so need to keep
@ -207,7 +208,7 @@ def _process_db_threads_db2(db: sqlite3.Connection) -> Iterator[Res[Entity]]:
yield s
self_id = config.facebook_id
thread_users: Dict[str, List[Sender]] = {}
thread_users: dict[str, list[Sender]] = {}
for r in db.execute('SELECT * from thread_participants'):
thread_key = r['thread_key']
user_key = r['user_key']
@ -267,9 +268,9 @@ def contacts() -> Iterator[Res[Sender]]:
def messages() -> Iterator[Res[Message]]:
senders: Dict[str, Sender] = {}
msgs: Dict[str, Message] = {}
threads: Dict[str, Thread] = {}
senders: dict[str, Sender] = {}
msgs: dict[str, Message] = {}
threads: dict[str, Thread] = {}
for x in unique_everseen(_entities):
if isinstance(x, Exception):
yield x

View file

@ -1,6 +1,9 @@
from my.core import __NOT_HPI_MODULE__
from __future__ import annotations
from typing import Iterator, Optional, Protocol
from my.core import __NOT_HPI_MODULE__ # isort: skip
from collections.abc import Iterator
from typing import Protocol
from my.core import datetime_aware
@ -10,7 +13,7 @@ class Thread(Protocol):
def id(self) -> str: ...
@property
def name(self) -> Optional[str]: ...
def name(self) -> str | None: ...
class Sender(Protocol):
@ -18,7 +21,7 @@ class Sender(Protocol):
def id(self) -> str: ...
@property
def name(self) -> Optional[str]: ...
def name(self) -> str | None: ...
class Message(Protocol):
@ -29,7 +32,7 @@ class Message(Protocol):
def dt(self) -> datetime_aware: ...
@property
def text(self) -> Optional[str]: ...
def text(self) -> str | None: ...
@property
def thread(self) -> Thread: ...
@ -39,8 +42,11 @@ class Message(Protocol):
from itertools import chain
from more_itertools import unique_everseen
from my.core import warn_if_empty, Res
from my.core import Res, warn_if_empty
@warn_if_empty
def _merge_messages(*sources: Iterator[Res[Message]]) -> Iterator[Res[Message]]:

View file

@ -7,16 +7,15 @@ REQUIRES = [
'git+https://github.com/karlicoss/fbmessengerexport',
]
from collections.abc import Iterator
from contextlib import ExitStack, contextmanager
from dataclasses import dataclass
from typing import Iterator
from my.core import PathIsh, Res, stat, Stats
from my.core.warnings import high
from my.config import fbmessenger as user_config
import fbmessengerexport.dal as messenger
from my.config import fbmessenger as user_config
from my.core import PathIsh, Res, Stats, stat
from my.core.warnings import high
###
# support old style config

View file

@ -2,15 +2,14 @@
Foursquare/Swarm checkins
'''
from datetime import datetime, timezone, timedelta
from itertools import chain
import json
from datetime import datetime, timedelta, timezone
from itertools import chain
# TODO pytz for timezone???
from my.core import get_files, make_logger
from my.config import foursquare as config
# TODO pytz for timezone???
from my.core import get_files, make_logger
logger = make_logger(__name__)

View file

@ -3,8 +3,7 @@ Unified Github data (merged from GDPR export and periodic API updates)
"""
from . import gdpr, ghexport
from .common import merge_events, Results
from .common import Results, merge_events
def events() -> Results:

View file

@ -1,17 +1,20 @@
"""
Github events and their metadata: comments/issues/pull requests
"""
from ..core import __NOT_HPI_MODULE__
from __future__ import annotations
from my.core import __NOT_HPI_MODULE__ # isort: skip
from collections.abc import Iterable
from datetime import datetime, timezone
from typing import Optional, NamedTuple, Iterable, Set, Tuple
from typing import NamedTuple, Optional
from ..core import warn_if_empty, LazyLogger
from ..core.error import Res
from my.core import make_logger, warn_if_empty
from my.core.error import Res
logger = LazyLogger(__name__)
logger = make_logger(__name__)
class Event(NamedTuple):
dt: datetime
@ -27,7 +30,7 @@ Results = Iterable[Res[Event]]
@warn_if_empty
def merge_events(*sources: Results) -> Results:
from itertools import chain
emitted: Set[Tuple[datetime, str]] = set()
emitted: set[tuple[datetime, str]] = set()
for e in chain(*sources):
if isinstance(e, Exception):
yield e
@ -52,7 +55,7 @@ def parse_dt(s: str) -> datetime:
# experimental way of supportint event ids... not sure
class EventIds:
@staticmethod
def repo_created(*, dts: str, name: str, ref_type: str, ref: Optional[str]) -> str:
def repo_created(*, dts: str, name: str, ref_type: str, ref: str | None) -> str:
return f'{dts}_repocreated_{name}_{ref_type}_{ref}'
@staticmethod

View file

@ -6,8 +6,9 @@ from __future__ import annotations
import json
from abc import abstractmethod
from collections.abc import Iterator, Sequence
from pathlib import Path
from typing import Any, Iterator, Sequence
from typing import Any
from my.core import Paths, Res, Stats, get_files, make_logger, stat, warnings
from my.core.error import echain

View file

@ -1,13 +1,17 @@
"""
Github data: events, comments, etc. (API data)
"""
from __future__ import annotations
REQUIRES = [
'git+https://github.com/karlicoss/ghexport',
]
from dataclasses import dataclass
from my.core import Paths
from my.config import github as user_config
from my.core import Paths
@dataclass
@ -21,7 +25,9 @@ class github(user_config):
###
from my.core.cfg import make_config, Attrs
from my.core.cfg import Attrs, make_config
def migration(attrs: Attrs) -> Attrs:
export_dir = 'export_dir'
if export_dir in attrs: # legacy name
@ -41,15 +47,14 @@ except ModuleNotFoundError as e:
############################
from collections.abc import Sequence
from functools import lru_cache
from pathlib import Path
from typing import Tuple, Dict, Sequence, Optional
from my.core import get_files, LazyLogger
from my.core import LazyLogger, get_files
from my.core.cachew import mcachew
from .common import Event, parse_dt, Results, EventIds
from .common import Event, EventIds, Results, parse_dt
logger = LazyLogger(__name__)
@ -82,7 +87,9 @@ def _events() -> Results:
yield e
from my.core import stat, Stats
from my.core import Stats, stat
def stats() -> Stats:
return {
**stat(events),
@ -99,7 +106,7 @@ def _log_if_unhandled(e) -> None:
Link = str
EventId = str
Body = str
def _get_summary(e) -> Tuple[str, Optional[Link], Optional[EventId], Optional[Body]]:
def _get_summary(e) -> tuple[str, Link | None, EventId | None, Body | None]:
# TODO would be nice to give access to raw event within timeline
dts = e['created_at']
eid = e['id']
@ -195,7 +202,7 @@ def _get_summary(e) -> Tuple[str, Optional[Link], Optional[EventId], Optional[Bo
return tp, None, None, None
def _parse_event(d: Dict) -> Event:
def _parse_event(d: dict) -> Event:
summary, link, eid, body = _get_summary(d)
if eid is None:
eid = d['id'] # meh

View file

@ -7,15 +7,18 @@ REQUIRES = [
from dataclasses import dataclass
from my.core import datetime_aware, Paths
from my.config import goodreads as user_config
from my.core import Paths, datetime_aware
@dataclass
class goodreads(user_config):
# paths[s]/glob to the exported JSON data
export_path: Paths
from my.core.cfg import make_config, Attrs
from my.core.cfg import Attrs, make_config
def _migration(attrs: Attrs) -> Attrs:
export_dir = 'export_dir'
@ -29,18 +32,19 @@ config = make_config(goodreads, migration=_migration)
#############################3
from my.core import get_files
from typing import Sequence, Iterator
from collections.abc import Iterator, Sequence
from pathlib import Path
from my.core import get_files
def inputs() -> Sequence[Path]:
return get_files(config.export_path)
from datetime import datetime
import pytz
from goodrexport import dal

View file

@ -1,8 +1,8 @@
from my.core import __NOT_HPI_MODULE__
from my.core import __NOT_HPI_MODULE__ # isort: skip
# NOTE: this tool was quite useful https://github.com/aj3423/aproto
from google.protobuf import descriptor_pool, descriptor_pb2, message_factory
from google.protobuf import descriptor_pb2, descriptor_pool, message_factory
TYPE_STRING = descriptor_pb2.FieldDescriptorProto.TYPE_STRING
TYPE_BYTES = descriptor_pb2.FieldDescriptorProto.TYPE_BYTES

View file

@ -7,20 +7,20 @@ REQUIRES = [
"protobuf", # for parsing blobs from the database
]
from collections.abc import Iterator, Sequence
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Iterator, Optional, Sequence
from typing import Any
from urllib.parse import quote
from my.core import datetime_aware, get_files, LazyLogger, Paths, Res
from my.core import LazyLogger, Paths, Res, datetime_aware, get_files
from my.core.common import unique_everseen
from my.core.sqlite import sqlite_connection
import my.config
from ._android_protobuf import parse_labeled, parse_list, parse_place
import my.config # isort: skip
logger = LazyLogger(__name__)
@ -59,8 +59,8 @@ class Place:
updated_at: datetime_aware # TODO double check it's utc?
title: str
location: Location
address: Optional[str]
note: Optional[str]
address: str | None
note: str | None
@property
def place_url(self) -> str:

View file

@ -2,18 +2,22 @@
Google Takeout exports: browsing history, search/youtube/google play activity
'''
from enum import Enum
from __future__ import annotations
from my.core import __NOT_HPI_MODULE__ # isort: skip
import re
from pathlib import Path
from collections.abc import Iterable
from datetime import datetime
from enum import Enum
from html.parser import HTMLParser
from typing import List, Optional, Any, Callable, Iterable, Tuple
from pathlib import Path
from typing import Any, Callable
from urllib.parse import unquote
import pytz
from ...core.time import abbr_to_timezone
from my.core.time import abbr_to_timezone
# NOTE: https://bugs.python.org/issue22377 %Z doesn't work properly
_TIME_FORMATS = [
@ -36,7 +40,7 @@ def parse_dt(s: str) -> datetime:
s, tzabbr = s.rsplit(maxsplit=1)
tz = abbr_to_timezone(tzabbr)
dt: Optional[datetime] = None
dt: datetime | None = None
for fmt in _TIME_FORMATS:
try:
dt = datetime.strptime(s, fmt)
@ -73,7 +77,7 @@ class State(Enum):
Url = str
Title = str
Parsed = Tuple[datetime, Url, Title]
Parsed = tuple[datetime, Url, Title]
Callback = Callable[[datetime, Url, Title], None]
@ -83,9 +87,9 @@ class TakeoutHTMLParser(HTMLParser):
super().__init__()
self.state: State = State.OUTSIDE
self.title_parts: List[str] = []
self.title: Optional[str] = None
self.url: Optional[str] = None
self.title_parts: list[str] = []
self.title: str | None = None
self.url: str | None = None
self.callback = callback
@ -148,7 +152,7 @@ class TakeoutHTMLParser(HTMLParser):
def read_html(tpath: Path, file: str) -> Iterable[Parsed]:
results: List[Parsed] = []
results: list[Parsed] = []
def cb(dt: datetime, url: Url, title: Title) -> None:
results.append((dt, url, title))
parser = TakeoutHTMLParser(callback=cb)
@ -156,5 +160,3 @@ def read_html(tpath: Path, file: str) -> Iterable[Parsed]:
data = fo.read()
parser.feed(data)
return results
from ...core import __NOT_HPI_MODULE__

View file

@ -14,24 +14,27 @@ the cachew cache
REQUIRES = ["git+https://github.com/seanbreckenridge/google_takeout_parser"]
import os
from collections.abc import Sequence
from contextlib import ExitStack
from dataclasses import dataclass
import os
from typing import List, Sequence, cast
from pathlib import Path
from my.core import make_config, stat, Stats, get_files, Paths, make_logger
from typing import cast
from google_takeout_parser.parse_html.html_time_utils import ABBR_TIMEZONES
from my.core import Paths, Stats, get_files, make_config, make_logger, stat
from my.core.cachew import mcachew
from my.core.error import ErrorPolicy
from my.core.structure import match_structure
from my.core.time import user_forced
from google_takeout_parser.parse_html.html_time_utils import ABBR_TIMEZONES
ABBR_TIMEZONES.extend(user_forced())
import google_takeout_parser
from google_takeout_parser.path_dispatch import TakeoutParser
from google_takeout_parser.merge import GoogleEventSet, CacheResults
from google_takeout_parser.merge import CacheResults, GoogleEventSet
from google_takeout_parser.models import BaseEvent
from google_takeout_parser.path_dispatch import TakeoutParser
# see https://github.com/seanbreckenridge/dotfiles/blob/master/.config/my/my/config/__init__.py for an example
from my.config import google as user_config
@ -56,6 +59,7 @@ logger = make_logger(__name__, level="warning")
# patch the takeout parser logger to match the computed loglevel
from google_takeout_parser.log import setup as setup_takeout_logger
setup_takeout_logger(logger.level)
@ -83,7 +87,7 @@ except ImportError:
google_takeout_version = str(getattr(google_takeout_parser, '__version__', 'unknown'))
def _cachew_depends_on() -> List[str]:
def _cachew_depends_on() -> list[str]:
exports = sorted([str(p) for p in inputs()])
# add google takeout parser pip version to hash, so this re-creates on breaking changes
exports.insert(0, f"google_takeout_version: {google_takeout_version}")

View file

@ -2,13 +2,17 @@
Module for locating and accessing [[https://takeout.google.com][Google Takeout]] data
'''
from __future__ import annotations
from my.core import __NOT_HPI_MODULE__ # isort: skip
from abc import abstractmethod
from collections.abc import Iterable
from pathlib import Path
from typing import Iterable, Optional, Protocol
from more_itertools import last
from my.core import __NOT_HPI_MODULE__, Paths, get_files
from my.core import Paths, get_files
class config:
@ -33,7 +37,7 @@ def make_config() -> config:
return combined_config()
def get_takeouts(*, path: Optional[str] = None) -> Iterable[Path]:
def get_takeouts(*, path: str | None = None) -> Iterable[Path]:
"""
Sometimes google splits takeout into multiple archives, so we need to detect the ones that contain the path we need
"""
@ -45,7 +49,7 @@ def get_takeouts(*, path: Optional[str] = None) -> Iterable[Path]:
yield takeout
def get_last_takeout(*, path: Optional[str] = None) -> Optional[Path]:
def get_last_takeout(*, path: str | None = None) -> Path | None:
return last(get_takeouts(path=path), default=None)

View file

@ -3,14 +3,14 @@ Hackernews data via Dogsheep [[hacker-news-to-sqlite][https://github.com/dogshee
"""
from __future__ import annotations
from collections.abc import Iterator, Sequence
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path
from typing import Iterator, Sequence, Optional
from my.core import get_files, Paths, Res, datetime_aware
from my.core.sqlite import sqlite_connection
import my.config
from my.core import Paths, Res, datetime_aware, get_files
from my.core.sqlite import sqlite_connection
from .common import hackernews_link
@ -33,9 +33,9 @@ class Item:
id: str
type: str
created: datetime_aware # checked and it's utc
title: Optional[str] # only present for Story
text_html: Optional[str] # should be present for Comment and might for Story
url: Optional[str] # might be present for Story
title: str | None # only present for Story
text_html: str | None # should be present for Comment and might for Story
url: str | None # might be present for Story
# todo process 'deleted'? fields?
# todo process 'parent'?

View file

@ -1,17 +1,22 @@
"""
[[https://play.google.com/store/apps/details?id=com.simon.harmonichackernews][Harmonic]] app for Hackernews
"""
from __future__ import annotations
REQUIRES = ['lxml', 'orjson']
from collections.abc import Iterator, Sequence
from dataclasses import dataclass
from datetime import datetime, timezone
import orjson
from pathlib import Path
from typing import Any, Dict, Iterator, List, Optional, Sequence, TypedDict, cast
from typing import Any, TypedDict, cast
import orjson
from lxml import etree
from more_itertools import one
import my.config
from my.core import (
Paths,
Res,
@ -22,8 +27,10 @@ from my.core import (
stat,
)
from my.core.common import unique_everseen
import my.config
from .common import hackernews_link, SavedBase
from .common import SavedBase, hackernews_link
import my.config # isort: skip
logger = make_logger(__name__)
@ -43,7 +50,7 @@ class Cached(TypedDict):
created_at_i: int
id: str
points: int
test: Optional[str]
test: str | None
title: str
type: str # TODO Literal['story', 'comment']? comments are only in 'children' field tho
url: str
@ -94,16 +101,16 @@ def _saved() -> Iterator[Res[Saved]]:
# TODO defensive for each item!
tr = etree.parse(path)
res = one(cast(List[Any], tr.xpath(f'//*[@name="{_PREFIX}_CACHED_STORIES_STRINGS"]')))
res = one(cast(list[Any], tr.xpath(f'//*[@name="{_PREFIX}_CACHED_STORIES_STRINGS"]')))
cached_ids = [x.text.split('-')[0] for x in res]
cached: Dict[str, Cached] = {}
cached: dict[str, Cached] = {}
for sid in cached_ids:
res = one(cast(List[Any], tr.xpath(f'//*[@name="{_PREFIX}_CACHED_STORY{sid}"]')))
res = one(cast(list[Any], tr.xpath(f'//*[@name="{_PREFIX}_CACHED_STORY{sid}"]')))
j = orjson.loads(res.text)
cached[sid] = j
res = one(cast(List[Any], tr.xpath(f'//*[@name="{_PREFIX}_BOOKMARKS"]')))
res = one(cast(list[Any], tr.xpath(f'//*[@name="{_PREFIX}_BOOKMARKS"]')))
for x in res.text.split('-'):
ids, item_timestamp = x.split('q')
# not sure if timestamp is any useful?

View file

@ -1,19 +1,20 @@
"""
[[https://play.google.com/store/apps/details?id=io.github.hidroh.materialistic][Materialistic]] app for Hackernews
"""
from collections.abc import Iterator, Sequence
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, Iterator, NamedTuple, Sequence
from typing import Any, NamedTuple
from more_itertools import unique_everseen
from my.core import get_files, datetime_aware, make_logger
from my.core import datetime_aware, get_files, make_logger
from my.core.sqlite import sqlite_connection
from my.config import materialistic as config # todo migrate config to my.hackernews.materialistic
from .common import hackernews_link
# todo migrate config to my.hackernews.materialistic
from my.config import materialistic as config # isort: skip
logger = make_logger(__name__)
@ -22,7 +23,7 @@ def inputs() -> Sequence[Path]:
return get_files(config.export_path)
Row = Dict[str, Any]
Row = dict[str, Any]
class Saved(NamedTuple):

View file

@ -4,20 +4,22 @@
REQUIRES = [
'git+https://github.com/karlicoss/hypexport',
]
from collections.abc import Iterator, Sequence
from dataclasses import dataclass
from pathlib import Path
from typing import Iterator, Sequence, TYPE_CHECKING
from typing import TYPE_CHECKING
from my.core import (
get_files,
stat,
Paths,
Res,
Stats,
get_files,
stat,
)
from my.core.cfg import make_config
from my.core.hpi_compat import always_supports_sequence
import my.config
import my.config # isort: skip
@dataclass

View file

@ -1,11 +1,10 @@
from typing import Iterator
from collections.abc import Iterator
from my.core import Res, stat, Stats
from my.core import Res, Stats, stat
from my.core.source import import_source
from .common import Message, _merge_messages
src_gdpr = import_source(module_name='my.instagram.gdpr')
@src_gdpr
def _messages_gdpr() -> Iterator[Res[Message]]:

View file

@ -3,30 +3,29 @@ Bumble data from Android app database (in =/data/data/com.instagram.android/data
"""
from __future__ import annotations
import json
import sqlite3
from collections.abc import Iterator, Sequence
from dataclasses import dataclass
from datetime import datetime
import json
from pathlib import Path
import sqlite3
from typing import Iterator, Sequence, Optional, Dict, Union
from my.core import (
get_files,
Paths,
make_config,
make_logger,
datetime_naive,
Json,
Paths,
Res,
assert_never,
datetime_naive,
get_files,
make_config,
make_logger,
)
from my.core.common import unique_everseen
from my.core.cachew import mcachew
from my.core.common import unique_everseen
from my.core.error import echain
from my.core.sqlite import sqlite_connect_immutable, select
from my.config import instagram as user_config
from my.core.sqlite import select, sqlite_connect_immutable
from my.config import instagram as user_config # isort: skip
logger = make_logger(__name__)
@ -38,8 +37,8 @@ class instagram_android_config(user_config.android):
# sadly doesn't seem easy to extract user's own handle/name from the db...
# todo maybe makes more sense to keep in parent class? not sure...
username: Optional[str] = None
full_name: Optional[str] = None
username: str | None = None
full_name: str | None = None
config = make_config(instagram_android_config)
@ -101,13 +100,13 @@ class MessageError(RuntimeError):
return self.rest == other.rest
def _parse_message(j: Json) -> Optional[_Message]:
def _parse_message(j: Json) -> _Message | None:
id = j['item_id']
t = j['item_type']
tid = j['thread_key']['thread_id']
uid = j['user_id']
created = datetime.fromtimestamp(int(j['timestamp']) / 1_000_000)
text: Optional[str] = None
text: str | None = None
if t == 'text':
text = j['text']
elif t == 'reel_share':
@ -133,7 +132,7 @@ def _parse_message(j: Json) -> Optional[_Message]:
)
def _process_db(db: sqlite3.Connection) -> Iterator[Res[Union[User, _Message]]]:
def _process_db(db: sqlite3.Connection) -> Iterator[Res[User | _Message]]:
# TODO ugh. seems like no way to extract username?
# sometimes messages (e.g. media_share) contain it in message field
# but generally it's not present. ugh
@ -175,7 +174,7 @@ def _process_db(db: sqlite3.Connection) -> Iterator[Res[Union[User, _Message]]]:
yield e
def _entities() -> Iterator[Res[Union[User, _Message]]]:
def _entities() -> Iterator[Res[User | _Message]]:
# NOTE: definitely need to merge multiple, app seems to recycle old messages
# TODO: hmm hard to guarantee timestamp ordering when we use synthetic input data...
# todo use TypedDict?
@ -194,7 +193,7 @@ def _entities() -> Iterator[Res[Union[User, _Message]]]:
@mcachew(depends_on=inputs)
def messages() -> Iterator[Res[Message]]:
id2user: Dict[str, User] = {}
id2user: dict[str, User] = {}
for x in unique_everseen(_entities):
if isinstance(x, Exception):
yield x

View file

@ -1,9 +1,10 @@
from collections.abc import Iterator
from dataclasses import replace
from datetime import datetime
from itertools import chain
from typing import Iterator, Dict, Any, Protocol
from typing import Any, Protocol
from my.core import warn_if_empty, Res
from my.core import Res, warn_if_empty
class User(Protocol):
@ -40,7 +41,7 @@ def _merge_messages(*sources: Iterator[Res[Message]]) -> Iterator[Res[Message]]:
# ugh. seems that GDPR thread ids are completely uncorrelated to any android ids (tried searching over all sqlite dump)
# so the only way to correlate is to try and match messages
# we also can't use unique_everseen here, otherwise will never get a chance to unify threads
mmap: Dict[str, Message] = {}
mmap: dict[str, Message] = {}
thread_map = {}
user_map = {}
@ -60,7 +61,7 @@ def _merge_messages(*sources: Iterator[Res[Message]]) -> Iterator[Res[Message]]:
user_map[m.user.id] = mm.user
else:
# not emitted yet, need to emit
repls: Dict[str, Any] = {}
repls: dict[str, Any] = {}
tid = thread_map.get(m.thread_id)
if tid is not None:
repls['thread_id'] = tid

View file

@ -2,26 +2,27 @@
Instagram data (uses [[https://www.instagram.com/download/request][official GDPR export]])
"""
from __future__ import annotations
import json
from collections.abc import Iterator, Sequence
from dataclasses import dataclass
from datetime import datetime
import json
from pathlib import Path
from typing import Iterator, Sequence, Dict, Union
from more_itertools import bucket
from my.core import (
get_files,
Paths,
datetime_naive,
Res,
assert_never,
datetime_naive,
get_files,
make_logger,
)
from my.core.common import unique_everseen
from my.config import instagram as user_config
from my.config import instagram as user_config # isort: skip
logger = make_logger(__name__)
@ -70,7 +71,7 @@ def _decode(s: str) -> str:
return s.encode('latin-1').decode('utf8')
def _entities() -> Iterator[Res[Union[User, _Message]]]:
def _entities() -> Iterator[Res[User | _Message]]:
# it's worth processing all previous export -- sometimes instagram removes some metadata from newer ones
# NOTE: here there are basically two options
# - process inputs as is (from oldest to newest)
@ -84,7 +85,7 @@ def _entities() -> Iterator[Res[Union[User, _Message]]]:
yield from _entitites_from_path(path)
def _entitites_from_path(path: Path) -> Iterator[Res[Union[User, _Message]]]:
def _entitites_from_path(path: Path) -> Iterator[Res[User | _Message]]:
# TODO make sure it works both with plan directory
# idelaly get_files should return the right thing, and we won't have to force ZipPath/match_structure here
# e.g. possible options are:
@ -202,7 +203,7 @@ def _entitites_from_path(path: Path) -> Iterator[Res[Union[User, _Message]]]:
# TODO basically copy pasted from android.py... hmm
def messages() -> Iterator[Res[Message]]:
id2user: Dict[str, User] = {}
id2user: dict[str, User] = {}
for x in unique_everseen(_entities):
if isinstance(x, Exception):
yield x

View file

@ -7,10 +7,10 @@ REQUIRES = [
from dataclasses import dataclass
from .core import Paths
from my.config import instapaper as user_config
from .core import Paths
@dataclass
class instapaper(user_config):
@ -22,6 +22,7 @@ class instapaper(user_config):
from .core.cfg import make_config
config = make_config(instapaper)
@ -39,9 +40,12 @@ Bookmark = dal.Bookmark
Page = dal.Page
from typing import Sequence, Iterable
from collections.abc import Iterable, Sequence
from pathlib import Path
from .core import get_files
def inputs() -> Sequence[Path]:
return get_files(config.export_path)

View file

@ -9,10 +9,9 @@ For an example of how this could be used, see https://github.com/seanbreckenridg
REQUIRES = ["git+https://github.com/seanbreckenridge/ipgeocache"]
from typing import Iterator
from collections.abc import Iterator
from my.core import Stats, warn_if_empty
from my.ip.common import IP

View file

@ -2,11 +2,12 @@
Provides location/timezone data from IP addresses, using [[https://github.com/seanbreckenridge/ipgeocache][ipgeocache]]
"""
from my.core import __NOT_HPI_MODULE__
from my.core import __NOT_HPI_MODULE__ # isort: skip
import ipaddress
from typing import NamedTuple, Iterator, Tuple
from collections.abc import Iterator
from datetime import datetime
from typing import NamedTuple
import ipgeocache
@ -22,7 +23,7 @@ class IP(NamedTuple):
return ipgeocache.get(self.addr)
@property
def latlon(self) -> Tuple[float, float]:
def latlon(self) -> tuple[float, float]:
loc: str = self.ipgeocache()["loc"]
lat, _, lon = loc.partition(",")
return float(lat), float(lon)

View file

@ -1,10 +1,11 @@
from __future__ import annotations
from typing import Dict, Any, List, Iterable
import json
from collections.abc import Iterable
from datetime import date, datetime, time, timedelta
from functools import lru_cache
from datetime import datetime, date, time, timedelta
from pathlib import Path
from typing import Any
import pytz
@ -14,7 +15,6 @@ logger = make_logger(__name__)
from my.config import jawbone as config # type: ignore[attr-defined]
BDIR = config.export_dir
PHASES_FILE = BDIR / 'phases.json'
SLEEPS_FILE = BDIR / 'sleeps.json'
@ -24,7 +24,7 @@ GRAPHS_DIR = BDIR / 'graphs'
XID = str # TODO how to shared with backup thing?
Phases = Dict[XID, Any]
Phases = dict[XID, Any]
@lru_cache(1)
def get_phases() -> Phases:
return json.loads(PHASES_FILE.read_text())
@ -89,7 +89,7 @@ class SleepEntry:
# TODO might be useful to cache these??
@property
def phases(self) -> List[datetime]:
def phases(self) -> list[datetime]:
# TODO make sure they are consistent with emfit?
return [self._fromts(i['time']) for i in get_phases()[self.xid]]
@ -100,12 +100,13 @@ class SleepEntry:
return str(self)
def load_sleeps() -> List[SleepEntry]:
def load_sleeps() -> list[SleepEntry]:
sleeps = json.loads(SLEEPS_FILE.read_text())
return [SleepEntry(js) for js in sleeps]
from ..core.error import Res, set_error_datetime, extract_error_datetime
from ..core.error import Res, extract_error_datetime, set_error_datetime
def pre_dataframe() -> Iterable[Res[SleepEntry]]:
from more_itertools import bucket
@ -129,9 +130,9 @@ def pre_dataframe() -> Iterable[Res[SleepEntry]]:
def dataframe():
dicts: List[Dict[str, Any]] = []
dicts: list[dict[str, Any]] = []
for s in pre_dataframe():
d: Dict[str, Any]
d: dict[str, Any]
if isinstance(s, Exception):
dt = extract_error_datetime(s)
d = {
@ -260,8 +261,8 @@ def predicate(sleep: SleepEntry):
# TODO move to dashboard
def plot() -> None:
from matplotlib.figure import Figure # type: ignore[import-not-found]
import matplotlib.pyplot as plt # type: ignore[import-not-found]
from matplotlib.figure import Figure # type: ignore[import-not-found]
# TODO FIXME melatonin data
melatonin_data = {} # type: ignore[var-annotated]

View file

@ -1,10 +1,11 @@
#!/usr/bin/env python3
# TODO this should be in dashboard
from pathlib import Path
# from kython.plotting import *
from csv import DictReader
from pathlib import Path
from typing import Any, NamedTuple
from typing import Dict, Any, NamedTuple
import matplotlib.pylab as pylab # type: ignore
# sleep = []
# with open('2017.csv', 'r') as fo:
@ -12,16 +13,14 @@ from typing import Dict, Any, NamedTuple
# for line in islice(reader, 0, 10):
# sleep
# print(line)
import matplotlib.pyplot as plt # type: ignore
from numpy import genfromtxt
import matplotlib.pylab as pylab # type: ignore
pylab.rcParams['figure.figsize'] = (32.0, 24.0)
pylab.rcParams['font.size'] = 10
jawboneDataFeatures = Path(__file__).parent / 'features.csv' # Data File Path
featureDesc: Dict[str, str] = {}
featureDesc: dict[str, str] = {}
for x in genfromtxt(jawboneDataFeatures, dtype='unicode', delimiter=','):
featureDesc[x[0]] = x[1]
@ -52,7 +51,7 @@ class SleepData(NamedTuple):
quality: float # ???
@classmethod
def from_jawbone_dict(cls, d: Dict[str, Any]):
def from_jawbone_dict(cls, d: dict[str, Any]):
return cls(
date=d['DATE'],
asleep_time=_safe_mins(_safe_float(d['s_asleep_time'])),
@ -75,7 +74,7 @@ class SleepData(NamedTuple):
def iter_useful(data_file: str):
with open(data_file) as fo:
with Path(data_file).open() as fo:
reader = DictReader(fo)
for d in reader:
dt = SleepData.from_jawbone_dict(d)
@ -95,6 +94,7 @@ files = [
]
from kython import concat, parse_date # type: ignore
useful = concat(*(list(iter_useful(str(f))) for f in files))
# for u in useful:
@ -108,6 +108,7 @@ dates = [parse_date(u.date, yearfirst=True, dayfirst=False) for u in useful]
# TODO don't need this anymore? it's gonna be in dashboards package
from kython.plotting import plot_timestamped # type: ignore
for attr, lims, mavg, fig in [
('light', (0, 400), 5, None),
('deep', (0, 600), 5, None),

View file

@ -7,21 +7,22 @@ REQUIRES = [
'kobuddy',
]
from collections.abc import Iterator
from dataclasses import dataclass
from typing import Iterator
from my.core import (
get_files,
stat,
Paths,
Stats,
)
from my.core.cfg import make_config
import my.config
import kobuddy
from kobuddy import Highlight, get_highlights
from kobuddy import *
from kobuddy import Highlight, get_highlights
from my.core import (
Paths,
Stats,
get_files,
stat,
)
from my.core.cfg import make_config
import my.config # isort: skip
@dataclass
@ -51,7 +52,7 @@ def stats() -> Stats:
## TODO hmm. not sure if all this really belongs here?... perhaps orger?
from typing import Callable, Union, List
from typing import Callable, Union
# TODO maybe type over T?
_Predicate = Callable[[str], bool]
@ -69,17 +70,17 @@ def from_predicatish(p: Predicatish) -> _Predicate:
return p
def by_annotation(predicatish: Predicatish, **kwargs) -> List[Highlight]:
def by_annotation(predicatish: Predicatish, **kwargs) -> list[Highlight]:
pred = from_predicatish(predicatish)
res: List[Highlight] = []
res: list[Highlight] = []
for h in get_highlights(**kwargs):
if pred(h.annotation):
res.append(h)
return res
def get_todos() -> List[Highlight]:
def get_todos() -> list[Highlight]:
def with_todo(ann):
if ann is None:
ann = ''

View file

@ -1,5 +1,4 @@
from my.core import __NOT_HPI_MODULE__
from my.core import warnings
from my.core import __NOT_HPI_MODULE__, warnings
warnings.high('my.kython.kompress is deprecated, please use "kompress" library directly. See https://github.com/karlicoss/kompress')

View file

@ -3,9 +3,9 @@ Last.fm scrobbles
'''
from dataclasses import dataclass
from my.core import Paths, Json, make_logger, get_files
from my.config import lastfm as user_config
from my.config import lastfm as user_config
from my.core import Json, Paths, get_files, make_logger
logger = make_logger(__name__)
@ -19,13 +19,15 @@ class lastfm(user_config):
from my.core.cfg import make_config
config = make_config(lastfm)
from datetime import datetime, timezone
import json
from collections.abc import Iterable, Sequence
from datetime import datetime, timezone
from pathlib import Path
from typing import NamedTuple, Sequence, Iterable
from typing import NamedTuple
from my.core.cachew import mcachew
@ -76,7 +78,9 @@ def scrobbles() -> Iterable[Scrobble]:
yield Scrobble(raw=raw)
from my.core import stat, Stats
from my.core import Stats, stat
def stats() -> Stats:
return stat(scrobbles)

View file

@ -2,14 +2,13 @@
Merges location data from multiple sources
"""
from typing import Iterator
from collections.abc import Iterator
from my.core import Stats, LazyLogger
from my.core import LazyLogger, Stats
from my.core.source import import_source
from .common import Location
logger = LazyLogger(__name__, level="warning")

View file

@ -1,12 +1,13 @@
from datetime import date, datetime
from typing import Union, Tuple, Optional, Iterable, TextIO, Iterator, Protocol
from dataclasses import dataclass
from my.core import __NOT_HPI_MODULE__ # isort: skip
from my.core import __NOT_HPI_MODULE__
from collections.abc import Iterable, Iterator
from dataclasses import dataclass
from datetime import date, datetime
from typing import Optional, Protocol, TextIO, Union
DateIsh = Union[datetime, date, str]
LatLon = Tuple[float, float]
LatLon = tuple[float, float]
class LocationProtocol(Protocol):

View file

@ -1,14 +1,16 @@
# TODO: add config here which passes kwargs to estimate_from (under_accuracy)
# overwritable by passing the kwarg name here to the top-level estimate_location
from typing import Iterator, Optional
from __future__ import annotations
from collections.abc import Iterator
from my.core.source import import_source
from my.location.fallback.common import (
estimate_from,
FallbackLocation,
DateExact,
FallbackLocation,
LocationEstimator,
estimate_from,
)
@ -24,7 +26,7 @@ def fallback_estimators() -> Iterator[LocationEstimator]:
yield _home_estimate
def estimate_location(dt: DateExact, *, first_match: bool=False, under_accuracy: Optional[int] = None) -> FallbackLocation:
def estimate_location(dt: DateExact, *, first_match: bool=False, under_accuracy: int | None = None) -> FallbackLocation:
loc = estimate_from(dt, estimators=list(fallback_estimators()), first_match=first_match, under_accuracy=under_accuracy)
# should never happen if the user has home configured
if loc is None:

View file

@ -1,9 +1,12 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import Optional, Callable, Sequence, Iterator, List, Union
from datetime import datetime, timedelta, timezone
from ..common import LocationProtocol, Location
from collections.abc import Iterator, Sequence
from dataclasses import dataclass
from datetime import datetime, timedelta, timezone
from typing import Callable, Union
from ..common import Location, LocationProtocol
DateExact = Union[datetime, float, int] # float/int as epoch timestamps
Second = float
@ -13,10 +16,10 @@ class FallbackLocation(LocationProtocol):
lat: float
lon: float
dt: datetime
duration: Optional[Second] = None
accuracy: Optional[float] = None
elevation: Optional[float] = None
datasource: Optional[str] = None # which module provided this, useful for debugging
duration: Second | None = None
accuracy: float | None = None
elevation: float | None = None
datasource: str | None = None # which module provided this, useful for debugging
def to_location(self, *, end: bool = False) -> Location:
'''
@ -43,9 +46,9 @@ class FallbackLocation(LocationProtocol):
lon: float,
dt: datetime,
end_dt: datetime,
accuracy: Optional[float] = None,
elevation: Optional[float] = None,
datasource: Optional[str] = None,
accuracy: float | None = None,
elevation: float | None = None,
datasource: str | None = None,
) -> FallbackLocation:
'''
Create FallbackLocation from a start date and an end date
@ -93,13 +96,13 @@ def estimate_from(
estimators: LocationEstimators,
*,
first_match: bool = False,
under_accuracy: Optional[int] = None,
) -> Optional[FallbackLocation]:
under_accuracy: int | None = None,
) -> FallbackLocation | None:
'''
first_match: if True, return the first location found
under_accuracy: if set, only return locations with accuracy under this value
'''
found: List[FallbackLocation] = []
found: list[FallbackLocation] = []
for loc in _iter_estimate_from(dt, estimators):
if under_accuracy is not None and loc.accuracy is not None and loc.accuracy > under_accuracy:
continue

View file

@ -2,25 +2,22 @@
Simple location provider, serving as a fallback when more detailed data isn't available
'''
from __future__ import annotations
from collections.abc import Iterator, Sequence
from dataclasses import dataclass
from datetime import datetime, time, timezone
from functools import lru_cache
from typing import Sequence, Tuple, Union, cast, List, Iterator
from functools import cache
from typing import cast
from my.config import location as user_config
from my.location.common import DateIsh, LatLon
from my.location.fallback.common import DateExact, FallbackLocation
from my.location.common import LatLon, DateIsh
from my.location.fallback.common import FallbackLocation, DateExact
@dataclass
class Config(user_config):
home: Union[
LatLon, # either single, 'current' location
Sequence[Tuple[ # or, a sequence of location history
DateIsh, # date when you moved to
LatLon, # the location
]]
]
home: LatLon | Sequence[tuple[DateIsh, LatLon]]
# default ~30km accuracy
# this is called 'home_accuracy' since it lives on the base location.config object,
@ -29,13 +26,13 @@ class Config(user_config):
# TODO could make current Optional and somehow determine from system settings?
@property
def _history(self) -> Sequence[Tuple[datetime, LatLon]]:
def _history(self) -> Sequence[tuple[datetime, LatLon]]:
home1 = self.home
# todo ugh, can't test for isnstance LatLon, it's a tuple itself
home2: Sequence[Tuple[DateIsh, LatLon]]
home2: Sequence[tuple[DateIsh, LatLon]]
if isinstance(home1[0], tuple):
# already a sequence
home2 = cast(Sequence[Tuple[DateIsh, LatLon]], home1)
home2 = cast(Sequence[tuple[DateIsh, LatLon]], home1)
else:
# must be a pair of coordinates. also doesn't really matter which date to pick?
loc = cast(LatLon, home1)
@ -60,10 +57,11 @@ class Config(user_config):
from ...core.cfg import make_config
config = make_config(Config)
@lru_cache(maxsize=None)
@cache
def get_location(dt: datetime) -> LatLon:
'''
Interpolates the location at dt
@ -74,8 +72,8 @@ def get_location(dt: datetime) -> LatLon:
# TODO: in python3.8, use functools.cached_property instead?
@lru_cache(maxsize=None)
def homes_cached() -> List[Tuple[datetime, LatLon]]:
@cache
def homes_cached() -> list[tuple[datetime, LatLon]]:
return list(config._history)

View file

@ -7,8 +7,8 @@ REQUIRES = ["git+https://github.com/seanbreckenridge/ipgeocache"]
from dataclasses import dataclass
from datetime import timedelta
from my.core import Stats, make_config
from my.config import location
from my.core import Stats, make_config
from my.core.warnings import medium
@ -24,13 +24,13 @@ class ip_config(location.via_ip):
config = make_config(ip_config)
from collections.abc import Iterator
from functools import lru_cache
from typing import Iterator, List
from my.core import make_logger
from my.core.compat import bisect_left
from my.location.common import Location
from my.location.fallback.common import FallbackLocation, DateExact, _datetime_timestamp
from my.location.fallback.common import DateExact, FallbackLocation, _datetime_timestamp
logger = make_logger(__name__, level="warning")
@ -60,7 +60,7 @@ def locations() -> Iterator[Location]:
@lru_cache(1)
def _sorted_fallback_locations() -> List[FallbackLocation]:
def _sorted_fallback_locations() -> list[FallbackLocation]:
fl = list(filter(lambda l: l.duration is not None, fallback_locations()))
logger.debug(f"Fallback locations: {len(fl)}, sorting...:")
fl.sort(key=lambda l: l.dt.timestamp())

View file

@ -3,28 +3,27 @@ Location data from Google Takeout
DEPRECATED: setup my.google.takeout.parser and use my.location.google_takeout instead
"""
from __future__ import annotations
REQUIRES = [
'geopy', # checking that coordinates are valid
'ijson',
]
import re
from collections.abc import Iterable, Sequence
from datetime import datetime, timezone
from itertools import islice
from pathlib import Path
from subprocess import Popen, PIPE
from typing import Iterable, NamedTuple, Optional, Sequence, IO, Tuple
import re
from subprocess import PIPE, Popen
from typing import IO, NamedTuple, Optional
# pip3 install geopy
import geopy # type: ignore
from my.core import stat, Stats, make_logger
from my.core import Stats, make_logger, stat, warnings
from my.core.cachew import cache_dir, mcachew
from my.core import warnings
warnings.high("Please set up my.google.takeout.parser module for better takeout support")
@ -43,7 +42,7 @@ class Location(NamedTuple):
alt: Optional[float]
TsLatLon = Tuple[int, int, int]
TsLatLon = tuple[int, int, int]
def _iter_via_ijson(fo) -> Iterable[TsLatLon]:

View file

@ -4,13 +4,14 @@ Extracts locations using google_takeout_parser -- no shared code with the deprec
REQUIRES = ["git+https://github.com/seanbreckenridge/google_takeout_parser"]
from typing import Iterator
from collections.abc import Iterator
from my.google.takeout.parser import events, _cachew_depends_on
from google_takeout_parser.models import Location as GoogleLocation
from my.core import stat, Stats, LazyLogger
from my.core import LazyLogger, Stats, stat
from my.core.cachew import mcachew
from my.google.takeout.parser import _cachew_depends_on, events
from .common import Location
logger = LazyLogger(__name__)

View file

@ -7,21 +7,24 @@ Extracts semantic location history using google_takeout_parser
REQUIRES = ["git+https://github.com/seanbreckenridge/google_takeout_parser"]
from collections.abc import Iterator
from dataclasses import dataclass
from typing import Iterator, List
from my.google.takeout.parser import events, _cachew_depends_on as _parser_cachew_depends_on
from google_takeout_parser.models import PlaceVisit as SemanticLocation
from my.core import make_config, stat, LazyLogger, Stats
from my.core import LazyLogger, Stats, make_config, stat
from my.core.cachew import mcachew
from my.core.error import Res
from my.google.takeout.parser import _cachew_depends_on as _parser_cachew_depends_on
from my.google.takeout.parser import events
from .common import Location
logger = LazyLogger(__name__)
from my.config import location as user_config
@dataclass
class semantic_locations_config(user_config.google_takeout_semantic):
# a value between 0 and 100, 100 being the most confident
@ -36,7 +39,7 @@ config = make_config(semantic_locations_config)
# add config to cachew dependency so it recomputes on config changes
def _cachew_depends_on() -> List[str]:
def _cachew_depends_on() -> list[str]:
dep = _parser_cachew_depends_on()
dep.insert(0, f"require_confidence={config.require_confidence} accuracy={config.accuracy}")
return dep

View file

@ -20,20 +20,20 @@ class config(location.gpslogger):
accuracy: float = 50.0
from itertools import chain
from collections.abc import Iterator, Sequence
from datetime import datetime, timezone
from itertools import chain
from pathlib import Path
from typing import Iterator, Sequence, List
import gpxpy
from gpxpy.gpx import GPXXMLSyntaxException
from more_itertools import unique_everseen
from my.core import Stats, LazyLogger
from my.core import LazyLogger, Stats
from my.core.cachew import mcachew
from my.core.common import get_files
from .common import Location
from .common import Location
logger = LazyLogger(__name__, level="warning")
@ -49,7 +49,7 @@ def inputs() -> Sequence[Path]:
return sorted(get_files(config.export_path, glob="*.gpx", sort=False), key=_input_sort_key)
def _cachew_depends_on() -> List[float]:
def _cachew_depends_on() -> list[float]:
return [p.stat().st_mtime for p in inputs()]

View file

@ -1,7 +1,7 @@
from .fallback.via_home import *
from my.core.warnings import high
from .fallback.via_home import *
high(
"my.location.home is deprecated, use my.location.fallback.via_home instead, or estimate locations using the higher-level my.location.fallback.all.estimate_location"
)

View file

@ -1,7 +1,7 @@
REQUIRES = ["git+https://github.com/seanbreckenridge/ipgeocache"]
from .fallback.via_ip import *
from my.core.warnings import high
from .fallback.via_ip import *
high("my.location.via_ip is deprecated, use my.location.fallback.via_ip instead")

View file

@ -1,4 +1,5 @@
from .core.warnings import high
high("DEPRECATED! Please use my.hackernews.materialistic instead.")
from .hackernews.materialistic import *

View file

@ -1,10 +1,12 @@
import csv
from collections.abc import Iterator
from datetime import datetime
from typing import Iterator, List, NamedTuple
from typing import NamedTuple
from ..core import get_files
from my.core import get_files
from my.config import imdb as config # isort: skip
from my.config import imdb as config
def _get_last():
return max(get_files(config.export_path))
@ -31,7 +33,7 @@ def iter_movies() -> Iterator[Movie]:
yield Movie(created=created, title=title, rating=rating)
def get_movies() -> List[Movie]:
def get_movies() -> list[Movie]:
return sorted(iter_movies(), key=lambda m: m.created)

View file

@ -1,4 +1,4 @@
from my.core import __NOT_HPI_MODULE__
from my.core import __NOT_HPI_MODULE__ # isort: skip
from typing import TYPE_CHECKING

View file

@ -5,16 +5,17 @@ REQUIRES = [
'git+https://github.com/karlicoss/monzoexport',
]
from collections.abc import Iterator, Sequence
from dataclasses import dataclass
from pathlib import Path
from typing import Sequence, Iterator
from my.core import (
Paths,
get_files,
make_logger,
)
import my.config
import my.config # isort: skip
@dataclass

View file

@ -1,15 +1,17 @@
'''
Programmatic access and queries to org-mode files on the filesystem
'''
from __future__ import annotations
REQUIRES = [
'orgparse',
]
import re
from collections.abc import Iterable, Sequence
from datetime import datetime
from pathlib import Path
from typing import Iterable, List, NamedTuple, Optional, Sequence, Tuple
from typing import NamedTuple, Optional
import orgparse
@ -34,7 +36,7 @@ def make_config() -> config:
class OrgNote(NamedTuple):
created: Optional[datetime]
heading: str
tags: List[str]
tags: list[str]
def inputs() -> Sequence[Path]:
@ -45,7 +47,7 @@ def inputs() -> Sequence[Path]:
_rgx = re.compile(orgparse.date.gene_timestamp_regex(brtype='inactive'), re.VERBOSE)
def _created(n: orgparse.OrgNode) -> Tuple[Optional[datetime], str]:
def _created(n: orgparse.OrgNode) -> tuple[datetime | None, str]:
heading = n.heading
# meh.. support in orgparse?
pp = {} if n.is_root() else n.properties
@ -68,7 +70,7 @@ def _created(n: orgparse.OrgNode) -> Tuple[Optional[datetime], str]:
def to_note(x: orgparse.OrgNode) -> OrgNote:
# ugh. hack to merely make it cacheable
heading = x.heading
created: Optional[datetime]
created: datetime | None
try:
c, heading = _created(x)
if isinstance(c, datetime):

View file

@ -1,6 +1,7 @@
'''
PDF documents and annotations on your filesystem
'''
from __future__ import annotations as _annotations
REQUIRES = [
'git+https://github.com/0xabu/pdfannots',
@ -8,9 +9,10 @@ REQUIRES = [
]
import time
from collections.abc import Iterator, Sequence
from datetime import datetime
from pathlib import Path
from typing import Iterator, List, NamedTuple, Optional, Protocol, Sequence, TYPE_CHECKING
from typing import TYPE_CHECKING, NamedTuple, Optional, Protocol
import pdfannots
from more_itertools import bucket
@ -72,7 +74,7 @@ class Annotation(NamedTuple):
created: Optional[datetime] # note: can be tz unaware in some bad pdfs...
@property
def date(self) -> Optional[datetime]:
def date(self) -> datetime | None:
# legacy name
return self.created
@ -93,7 +95,7 @@ def _as_annotation(*, raw: pdfannots.Annotation, path: str) -> Annotation:
)
def get_annots(p: Path) -> List[Annotation]:
def get_annots(p: Path) -> list[Annotation]:
b = time.time()
with p.open('rb') as fo:
doc = pdfannots.process_file(fo, emit_progress_to=None)
@ -150,17 +152,17 @@ class Pdf(NamedTuple):
annotations: Sequence[Annotation]
@property
def created(self) -> Optional[datetime]:
def created(self) -> datetime | None:
annots = self.annotations
return None if len(annots) == 0 else annots[-1].created
@property
def date(self) -> Optional[datetime]:
def date(self) -> datetime | None:
# legacy
return self.created
def annotated_pdfs(*, filelist: Optional[Sequence[PathIsh]] = None) -> Iterator[Res[Pdf]]:
def annotated_pdfs(*, filelist: Sequence[PathIsh] | None = None) -> Iterator[Res[Pdf]]:
if filelist is not None:
# hacky... keeping it backwards compatible
# https://github.com/karlicoss/HPI/pull/74

View file

@ -1,27 +1,30 @@
"""
Photos and videos on your filesystem, their GPS and timestamps
"""
from __future__ import annotations
REQUIRES = [
'geopy',
'magic',
]
# NOTE: also uses fdfind to search photos
import json
from collections.abc import Iterable, Iterator
from concurrent.futures import ProcessPoolExecutor as Pool
from datetime import datetime
import json
from pathlib import Path
from typing import Optional, NamedTuple, Iterator, Iterable, List
from typing import NamedTuple, Optional
from geopy.geocoders import Nominatim # type: ignore
from my.core import LazyLogger
from my.core.error import Res, sort_res_by
from my.core.cachew import cache_dir, mcachew
from my.core.error import Res, sort_res_by
from my.core.mime import fastermime
from my.config import photos as config # type: ignore[attr-defined]
from my.config import photos as config # type: ignore[attr-defined] # isort: skip
logger = LazyLogger(__name__)
@ -55,15 +58,15 @@ class Photo(NamedTuple):
return f'{config.base_url}{self._basename}'
from .utils import get_exif_from_file, ExifTags, Exif, dt_from_path, convert_ref
from .utils import Exif, ExifTags, convert_ref, dt_from_path, get_exif_from_file
Result = Res[Photo]
def _make_photo_aux(*args, **kwargs) -> List[Result]:
def _make_photo_aux(*args, **kwargs) -> list[Result]:
# for the process pool..
return list(_make_photo(*args, **kwargs))
def _make_photo(photo: Path, mtype: str, *, parent_geo: Optional[LatLon]) -> Iterator[Result]:
def _make_photo(photo: Path, mtype: str, *, parent_geo: LatLon | None) -> Iterator[Result]:
exif: Exif
if any(x in mtype for x in ['image/png', 'image/x-ms-bmp', 'video']):
# TODO don't remember why..
@ -77,7 +80,7 @@ def _make_photo(photo: Path, mtype: str, *, parent_geo: Optional[LatLon]) -> Ite
yield e
exif = {}
def _get_geo() -> Optional[LatLon]:
def _get_geo() -> LatLon | None:
meta = exif.get(ExifTags.GPSINFO, {})
if ExifTags.LAT in meta and ExifTags.LON in meta:
return LatLon(
@ -87,7 +90,7 @@ def _make_photo(photo: Path, mtype: str, *, parent_geo: Optional[LatLon]) -> Ite
return parent_geo
# TODO aware on unaware?
def _get_dt() -> Optional[datetime]:
def _get_dt() -> datetime | None:
edt = exif.get(ExifTags.DATETIME, None)
if edt is not None:
dtimes = edt.replace(' 24', ' 00') # jeez maybe log it?
@ -123,7 +126,7 @@ def _make_photo(photo: Path, mtype: str, *, parent_geo: Optional[LatLon]) -> Ite
def _candidates() -> Iterable[Res[str]]:
# TODO that could be a bit slow if there are to many extra files?
from subprocess import Popen, PIPE
from subprocess import PIPE, Popen
# TODO could extract this to common?
# TODO would be nice to reuse get_files (or even let it use find)
# that way would be easier to exclude
@ -162,7 +165,7 @@ def _photos(candidates: Iterable[Res[str]]) -> Iterator[Result]:
from functools import lru_cache
@lru_cache(None)
def get_geo(d: Path) -> Optional[LatLon]:
def get_geo(d: Path) -> LatLon | None:
geof = d / 'geo.json'
if not geof.exists():
if d == d.parent:
@ -214,5 +217,7 @@ def print_all() -> None:
# todo cachew -- invalidate if function code changed?
from ..core import Stats, stat
def stats() -> Stats:
return stat(photos)

View file

@ -1,11 +1,13 @@
from __future__ import annotations
from ..core import __NOT_HPI_MODULE__ # isort: skip
from pathlib import Path
from typing import Dict
import PIL.Image
from PIL.ExifTags import TAGS, GPSTAGS
from PIL.ExifTags import GPSTAGS, TAGS
Exif = Dict
Exif = dict
# TODO PIL.ExifTags.TAGS
@ -62,18 +64,15 @@ def convert_ref(cstr, ref: str) -> float:
import re
from datetime import datetime
from typing import Optional
# TODO surely there is a library that does it??
# TODO this belongs to a private overlay or something
# basically have a function that patches up dates after the files were yielded..
_DT_REGEX = re.compile(r'\D(\d{8})\D*(\d{6})\D')
def dt_from_path(p: Path) -> Optional[datetime]:
def dt_from_path(p: Path) -> datetime | None:
name = p.stem
mm = _DT_REGEX.search(name)
if mm is None:
return None
dates = mm.group(1) + mm.group(2)
return datetime.strptime(dates, "%Y%m%d%H%M%S")
from ..core import __NOT_HPI_MODULE__

View file

@ -5,15 +5,16 @@ REQUIRES = [
'git+https://github.com/karlicoss/pinbexport',
]
from collections.abc import Iterator, Sequence
from dataclasses import dataclass
from pathlib import Path
from typing import Iterator, Sequence
from my.core import get_files, Paths, Res
import my.config
import pinbexport.dal as pinbexport
from my.core import Paths, Res, get_files
import my.config # isort: skip
@dataclass
class config(my.config.pinboard): # TODO rename to pinboard.pinbexport?

View file

@ -7,10 +7,10 @@ REQUIRES = [
from dataclasses import dataclass
from typing import TYPE_CHECKING
from .core import Paths
from my.config import pocket as user_config
from .core import Paths
@dataclass
class pocket(user_config):
@ -23,6 +23,7 @@ class pocket(user_config):
from .core.cfg import make_config
config = make_config(pocket)
@ -37,7 +38,7 @@ except ModuleNotFoundError as e:
Article = dal.Article
from typing import Sequence, Iterable
from collections.abc import Iterable, Sequence
# todo not sure if should be defensive against empty?
@ -51,9 +52,12 @@ def articles() -> Iterable[Article]:
yield from _dal().articles()
from .core import stat, Stats
from .core import Stats, stat
def stats() -> Stats:
from itertools import chain
from more_itertools import ilen
return {
**stat(articles),

View file

@ -1,11 +1,12 @@
"""
[[https://github.com/burtonator/polar-bookshelf][Polar]] articles and highlights
"""
from __future__ import annotations
from pathlib import Path
from typing import cast, TYPE_CHECKING
from typing import TYPE_CHECKING, cast
import my.config
import my.config # isort: skip
# todo use something similar to tz.via_location for config fallback
if not TYPE_CHECKING:
@ -20,8 +21,11 @@ if user_config is None:
pass
from .core import PathIsh
from dataclasses import dataclass
from .core import PathIsh
@dataclass
class polar(user_config):
'''
@ -32,20 +36,21 @@ class polar(user_config):
from .core import make_config
config = make_config(polar)
# todo not sure where it keeps stuff on Windows?
# https://github.com/burtonator/polar-bookshelf/issues/296
from datetime import datetime
from typing import List, Dict, Iterable, NamedTuple, Sequence, Optional
import json
from collections.abc import Iterable, Sequence
from datetime import datetime
from typing import NamedTuple
from .core import LazyLogger, Json, Res
from .core import Json, LazyLogger, Res
from .core.compat import fromisoformat
from .core.error import echain, sort_res_by
from .core.konsume import wrap, Zoomable, Wdict
from .core.konsume import Wdict, Zoomable, wrap
logger = LazyLogger(__name__)
@ -65,7 +70,7 @@ class Highlight(NamedTuple):
comments: Sequence[Comment]
tags: Sequence[str]
page: int # 1-indexed
color: Optional[str] = None
color: str | None = None
Uid = str
@ -73,7 +78,7 @@ class Book(NamedTuple):
created: datetime
uid: Uid
path: Path
title: Optional[str]
title: str | None
# TODO hmmm. I think this needs to be defensive as well...
# think about it later.
items: Sequence[Highlight]
@ -129,7 +134,7 @@ class Loader:
pi['dimensions'].consume_all()
# TODO how to make it nicer?
cmap: Dict[Hid, List[Comment]] = {}
cmap: dict[Hid, list[Comment]] = {}
vals = list(comments)
for v in vals:
cid = v['id'].zoom()
@ -163,7 +168,7 @@ class Loader:
h['rects'].ignore()
# TODO make it more generic..
htags: List[str] = []
htags: list[str] = []
if 'tags' in h:
ht = h['tags'].zoom()
for _k, v in list(ht.items()):
@ -242,7 +247,7 @@ def iter_entries() -> Iterable[Result]:
yield err
def get_entries() -> List[Result]:
def get_entries() -> list[Result]:
# sorting by first annotation is reasonable I guess???
# todo perhaps worth making it a pattern? X() returns iterable, get_X returns reasonably sorted list?
return list(sort_res_by(iter_entries(), key=lambda e: e.created))

View file

@ -20,6 +20,7 @@ REQUIRES = [
from my.core.hpi_compat import handle_legacy_import
is_legacy_import = handle_legacy_import(
parent_module_name=__name__,
legacy_submodule_name='rexport',

View file

@ -1,8 +1,9 @@
from typing import Iterator
from my.core import stat, Stats
from collections.abc import Iterator
from my.core import Stats, stat
from my.core.source import import_source
from .common import Save, Upvote, Comment, Submission, _merge_comments
from .common import Comment, Save, Submission, Upvote, _merge_comments
# Man... ideally an all.py file isn't this verbose, but
# reddit just feels like that much of a complicated source and

View file

@ -2,12 +2,14 @@
This defines Protocol classes, which make sure that each different
type of shared models have a standardized interface
"""
from my.core import __NOT_HPI_MODULE__
from typing import Set, Iterator, Protocol
from my.core import __NOT_HPI_MODULE__ # isort: skip
from collections.abc import Iterator
from itertools import chain
from typing import Protocol
from my.core import datetime_aware, Json
from my.core import Json, datetime_aware
# common fields across all the Protocol classes, so generic code can be written
@ -49,7 +51,7 @@ class Submission(RedditBase, Protocol):
def _merge_comments(*sources: Iterator[Comment]) -> Iterator[Comment]:
#from .rexport import logger
#ignored = 0
emitted: Set[str] = set()
emitted: set[str] = set()
for e in chain(*sources):
uid = e.id
if uid in emitted:

View file

@ -10,13 +10,13 @@ REQUIRES = [
from dataclasses import dataclass
# note: keeping pushshift import before config import, so it's handled gracefully by import_source
from pushshift_comment_export.dal import PComment, read_file
from my.config import reddit as uconfig
from my.core import Paths, Stats, stat
from my.core.cfg import make_config
# note: keeping pushshift import before config import, so it's handled gracefully by import_source
from pushshift_comment_export.dal import read_file, PComment
from my.config import reddit as uconfig
@dataclass
class pushshift_config(uconfig.pushshift):
@ -29,10 +29,10 @@ class pushshift_config(uconfig.pushshift):
config = make_config(pushshift_config)
from my.core import get_files
from typing import Sequence, Iterator
from collections.abc import Iterator, Sequence
from pathlib import Path
from my.core import get_files
def inputs() -> Sequence[Path]:

View file

@ -7,23 +7,24 @@ REQUIRES = [
'git+https://github.com/karlicoss/rexport',
]
from dataclasses import dataclass
import inspect
from collections.abc import Iterator, Sequence
from dataclasses import dataclass
from pathlib import Path
from typing import TYPE_CHECKING, Iterator, Sequence
from typing import TYPE_CHECKING
from my.core import (
get_files,
make_logger,
warnings,
stat,
Paths,
Stats,
get_files,
make_logger,
stat,
warnings,
)
from my.core.cachew import mcachew
from my.core.cfg import make_config, Attrs
from my.core.cfg import Attrs, make_config
from my.config import reddit as uconfig
from my.config import reddit as uconfig # isort: skip
logger = make_logger(__name__)

View file

@ -5,16 +5,15 @@ REQUIRES = [
'git+https://github.com/karlicoss/rescuexport',
]
from pathlib import Path
from collections.abc import Iterable, Sequence
from datetime import timedelta
from typing import Sequence, Iterable
from pathlib import Path
from my.core import get_files, make_logger, stat, Stats
from my.core import Stats, get_files, make_logger, stat
from my.core.cachew import mcachew
from my.core.error import Res, split_errors
from my.config import rescuetime as config
from my.config import rescuetime as config # isort: skip
logger = make_logger(__name__)
@ -24,6 +23,7 @@ def inputs() -> Sequence[Path]:
import rescuexport.dal as dal
DAL = dal.DAL
Entry = dal.Entry
@ -43,6 +43,8 @@ def groups(gap: timedelta=timedelta(hours=3)) -> Iterable[Res[Sequence[Entry]]]:
# todo automatic dataframe interface?
from .core.pandas import DataFrameT, as_dataframe
def dataframe() -> DataFrameT:
return as_dataframe(entries())
@ -56,16 +58,19 @@ def stats() -> Stats:
# basically, hack config and populate it with fake data? fake data generated by DAL, but the rest is handled by this?
from collections.abc import Iterator
from contextlib import contextmanager
from typing import Iterator
# todo take seed, or what?
@contextmanager
def fake_data(rows: int=1000) -> Iterator:
# todo also disable cachew automatically for such things?
from my.core.cfg import tmp_config
from my.core.cachew import disabled_cachew
from tempfile import TemporaryDirectory
import json
from tempfile import TemporaryDirectory
from my.core.cachew import disabled_cachew
from my.core.cfg import tmp_config
with disabled_cachew(), TemporaryDirectory() as td:
tdir = Path(td)
f = tdir / 'rescuetime.json'

View file

@ -1,16 +1,19 @@
"""
[[https://roamresearch.com][Roam]] data
"""
from datetime import datetime, timezone
from pathlib import Path
from itertools import chain
import re
from typing import NamedTuple, Iterator, List, Optional
from __future__ import annotations
from .core import get_files, LazyLogger, Json
import re
from collections.abc import Iterator
from datetime import datetime, timezone
from itertools import chain
from pathlib import Path
from typing import NamedTuple
from my.config import roamresearch as config
from .core import Json, LazyLogger, get_files
logger = LazyLogger(__name__)
@ -57,15 +60,15 @@ class Node(NamedTuple):
return datetime.fromtimestamp(rt / 1000, tz=timezone.utc)
@property
def title(self) -> Optional[str]:
def title(self) -> str | None:
return self.raw.get(Keys.TITLE)
@property
def body(self) -> Optional[str]:
def body(self) -> str | None:
return self.raw.get(Keys.STRING)
@property
def children(self) -> List['Node']:
def children(self) -> list[Node]:
# TODO cache? needs a key argument (because of Json)
ch = self.raw.get(Keys.CHILDREN, [])
return list(map(Node, ch))
@ -95,7 +98,7 @@ class Node(NamedTuple):
# - heading -- notes that haven't been created yet
return len(self.body or '') == 0 and len(self.children) == 0
def traverse(self) -> Iterator['Node']:
def traverse(self) -> Iterator[Node]:
# not sure about __iter__, because might be a bit unintuitive that it's recursive..
yield self
for c in self.children:
@ -120,7 +123,7 @@ class Node(NamedTuple):
return f'Node(created={self.created}, title={self.title}, body={self.body})'
@staticmethod
def make(raw: Json) -> Iterator['Node']:
def make(raw: Json) -> Iterator[Node]:
is_empty = set(raw.keys()) == {Keys.EDITED, Keys.EDIT_EMAIL, Keys.TITLE}
# not sure about that... but daily notes end up like that
if is_empty:
@ -130,11 +133,11 @@ class Node(NamedTuple):
class Roam:
def __init__(self, raw: List[Json]) -> None:
def __init__(self, raw: list[Json]) -> None:
self.raw = raw
@property
def notes(self) -> List[Node]:
def notes(self) -> list[Node]:
return list(chain.from_iterable(map(Node.make, self.raw)))
def traverse(self) -> Iterator[Node]:

View file

@ -3,9 +3,9 @@ Unified RSS data, merged from different services I used historically
'''
# NOTE: you can comment out the sources you're not using
from . import feedbin, feedly
from collections.abc import Iterable
from typing import Iterable
from . import feedbin, feedly
from .common import Subscription, compute_subscriptions

View file

@ -1,10 +1,12 @@
from my.core import __NOT_HPI_MODULE__
from __future__ import annotations
from my.core import __NOT_HPI_MODULE__ # isort: skip
from collections.abc import Iterable, Sequence
from dataclasses import dataclass, replace
from itertools import chain
from typing import Optional, List, Dict, Iterable, Tuple, Sequence
from my.core import warn_if_empty, datetime_aware
from my.core import datetime_aware, warn_if_empty
@dataclass
@ -13,16 +15,16 @@ class Subscription:
url: str
id: str # TODO not sure about it...
# eh, not all of them got reasonable 'created' time
created_at: Optional[datetime_aware]
created_at: datetime_aware | None
subscribed: bool = True
# snapshot of subscriptions at time
SubscriptionState = Tuple[datetime_aware, Sequence[Subscription]]
SubscriptionState = tuple[datetime_aware, Sequence[Subscription]]
@warn_if_empty
def compute_subscriptions(*sources: Iterable[SubscriptionState]) -> List[Subscription]:
def compute_subscriptions(*sources: Iterable[SubscriptionState]) -> list[Subscription]:
"""
Keeps track of everything I ever subscribed to.
In addition, keeps track of unsubscribed as well (so you'd remember when and why you unsubscribed)
@ -30,7 +32,7 @@ def compute_subscriptions(*sources: Iterable[SubscriptionState]) -> List[Subscri
states = list(chain.from_iterable(sources))
# TODO keep 'source'/'provider'/'service' attribute?
by_url: Dict[str, Subscription] = {}
by_url: dict[str, Subscription] = {}
# ah. dates are used for sorting
for _when, state in sorted(states):
# TODO use 'when'?

View file

@ -3,15 +3,15 @@ Feedbin RSS reader
"""
import json
from collections.abc import Iterator, Sequence
from pathlib import Path
from typing import Iterator, Sequence
from my.core import get_files, stat, Stats
from my.core import Stats, get_files, stat
from my.core.compat import fromisoformat
from .common import Subscription, SubscriptionState
from my.config import feedbin as config
from my.config import feedbin as config # isort: skip
def inputs() -> Sequence[Path]:
return get_files(config.export_path)

View file

@ -4,9 +4,10 @@ Feedly RSS reader
import json
from abc import abstractmethod
from collections.abc import Iterator, Sequence
from datetime import datetime, timezone
from pathlib import Path
from typing import Iterator, Protocol, Sequence
from typing import Protocol
from my.core import Paths, get_files

View file

@ -6,21 +6,19 @@ REQUIRES = [
'icalendar',
]
import re
from collections.abc import Iterator
from datetime import datetime
from functools import cached_property
import re
from typing import Dict, List, Iterator
from my.core import make_logger, get_files
from my.core.utils.itertools import make_dict
from my.config import rtm as config
from more_itertools import bucket
import icalendar # type: ignore
from icalendar.cal import Todo # type: ignore
from more_itertools import bucket
from my.core import get_files, make_logger
from my.core.utils.itertools import make_dict
from my.config import rtm as config # isort: skip
logger = make_logger(__name__)
@ -32,14 +30,14 @@ class MyTodo:
self.revision = revision
@cached_property
def notes(self) -> List[str]:
def notes(self) -> list[str]:
# TODO can there be multiple??
desc = self.todo['DESCRIPTION']
notes = re.findall(r'---\n\n(.*?)\n\nUpdated:', desc, flags=re.DOTALL)
return notes
@cached_property
def tags(self) -> List[str]:
def tags(self) -> list[str]:
desc = self.todo['DESCRIPTION']
[tags_str] = re.findall(r'\nTags: (.*?)\n', desc, flags=re.DOTALL)
if tags_str == 'none':
@ -92,11 +90,11 @@ class DAL:
for t in self.cal.walk('VTODO'):
yield MyTodo(t, self.revision)
def get_todos_by_uid(self) -> Dict[str, MyTodo]:
def get_todos_by_uid(self) -> dict[str, MyTodo]:
todos = self.all_todos()
return make_dict(todos, key=lambda t: t.uid)
def get_todos_by_title(self) -> Dict[str, List[MyTodo]]:
def get_todos_by_title(self) -> dict[str, list[MyTodo]]:
todos = self.all_todos()
bucketed = bucket(todos, lambda todo: todo.title)
return {k: list(bucketed[k]) for k in bucketed}

View file

@ -6,17 +6,15 @@ REQUIRES = [
'python-tcxparser',
]
from collections.abc import Iterable
from datetime import timedelta
from pathlib import Path
from typing import Iterable
from my.core import Res, get_files, Json
from my.core.compat import fromisoformat
import tcxparser # type: ignore[import-untyped]
from my.config import runnerup as config
from my.core import Json, Res, get_files
from my.core.compat import fromisoformat
# TODO later, use a proper namedtuple?
Workout = Json
@ -70,6 +68,8 @@ def workouts() -> Iterable[Res[Workout]]:
from .core.pandas import DataFrameT, check_dataframe, error_to_row
@check_dataframe
def dataframe() -> DataFrameT:
def it():
@ -85,6 +85,8 @@ def dataframe() -> DataFrameT:
return df
from .core import stat, Stats
from .core import Stats, stat
def stats() -> Stats:
return stat(dataframe)

View file

@ -1,12 +1,11 @@
'''
Just a demo module for testing and documentation purposes
'''
from collections.abc import Iterator
from dataclasses import dataclass
from typing import Iterator
from my.core import make_config
from my.config import simple as user_config
from my.core import make_config
@dataclass

View file

@ -2,6 +2,7 @@
Phone calls and SMS messages
Exported using https://play.google.com/store/apps/details?id=com.riteshsahu.SMSBackupRestore&hl=en_US
"""
from __future__ import annotations
# See: https://www.synctech.com.au/sms-backup-restore/fields-in-xml-backup-files/ for schema
@ -9,8 +10,9 @@ REQUIRES = ['lxml']
from dataclasses import dataclass
from my.core import get_files, stat, Paths, Stats
from my.config import smscalls as user_config
from my.core import Paths, Stats, get_files, stat
@dataclass
class smscalls(user_config):
@ -18,11 +20,13 @@ class smscalls(user_config):
export_path: Paths
from my.core.cfg import make_config
config = make_config(smscalls)
from collections.abc import Iterator
from datetime import datetime, timezone
from pathlib import Path
from typing import NamedTuple, Iterator, Set, Tuple, Optional, Any, Dict, List
from typing import Any, NamedTuple
import lxml.etree as etree
@ -33,7 +37,7 @@ class Call(NamedTuple):
dt: datetime
dt_readable: str
duration_s: int
who: Optional[str]
who: str | None
# type - 1 = Incoming, 2 = Outgoing, 3 = Missed, 4 = Voicemail, 5 = Rejected, 6 = Refused List.
call_type: int
@ -50,7 +54,7 @@ class Call(NamedTuple):
# All the field values are read as-is from the underlying database and no conversion is done by the app in most cases.
#
# The '(Unknown)' is just what my android phone does, not sure if there are others
UNKNOWN: Set[str] = {'(Unknown)'}
UNKNOWN: set[str] = {'(Unknown)'}
def _extract_calls(path: Path) -> Iterator[Res[Call]]:
@ -83,7 +87,7 @@ def calls() -> Iterator[Res[Call]]:
files = get_files(config.export_path, glob='calls-*.xml')
# TODO always replacing with the latter is good, we get better contact names??
emitted: Set[datetime] = set()
emitted: set[datetime] = set()
for p in files:
for c in _extract_calls(p):
if isinstance(c, Exception):
@ -98,7 +102,7 @@ def calls() -> Iterator[Res[Call]]:
class Message(NamedTuple):
dt: datetime
dt_readable: str
who: Optional[str]
who: str | None
message: str
phone_number: str
# type - 1 = Received, 2 = Sent, 3 = Draft, 4 = Outbox, 5 = Failed, 6 = Queued
@ -112,7 +116,7 @@ class Message(NamedTuple):
def messages() -> Iterator[Res[Message]]:
files = get_files(config.export_path, glob='sms-*.xml')
emitted: Set[Tuple[datetime, Optional[str], bool]] = set()
emitted: set[tuple[datetime, str | None, bool]] = set()
for p in files:
for c in _extract_messages(p):
if isinstance(c, Exception):
@ -155,20 +159,20 @@ class MMSContentPart(NamedTuple):
sequence_index: int
content_type: str
filename: str
text: Optional[str]
data: Optional[str]
text: str | None
data: str | None
class MMS(NamedTuple):
dt: datetime
dt_readable: str
parts: List[MMSContentPart]
parts: list[MMSContentPart]
# NOTE: these is often something like 'Name 1, Name 2', but might be different depending on your client
who: Optional[str]
who: str | None
# NOTE: This can be a single phone number, or multiple, split by '~' or ','. Its better to think
# of this as a 'key' or 'conversation ID', phone numbers are also present in 'addresses'
phone_number: str
addresses: List[Tuple[str, int]]
addresses: list[tuple[str, int]]
# 1 = Received, 2 = Sent, 3 = Draft, 4 = Outbox
message_type: int
@ -194,7 +198,7 @@ class MMS(NamedTuple):
def mms() -> Iterator[Res[MMS]]:
files = get_files(config.export_path, glob='sms-*.xml')
emitted: Set[Tuple[datetime, Optional[str], str]] = set()
emitted: set[tuple[datetime, str | None, str]] = set()
for p in files:
for c in _extract_mms(p):
if isinstance(c, Exception):
@ -207,7 +211,7 @@ def mms() -> Iterator[Res[MMS]]:
yield c
def _resolve_null_str(value: Optional[str]) -> Optional[str]:
def _resolve_null_str(value: str | None) -> str | None:
if value is None:
return None
# hmm.. theres some risk of the text actually being 'null', but theres
@ -235,7 +239,7 @@ def _extract_mms(path: Path) -> Iterator[Res[MMS]]:
yield RuntimeError(f'Missing one or more required attributes [date, readable_date, msg_box, address] in {mxml_str}')
continue
addresses: List[Tuple[str, int]] = []
addresses: list[tuple[str, int]] = []
for addr_parent in mxml.findall('addrs'):
for addr in addr_parent.findall('addr'):
addr_data = addr.attrib
@ -250,7 +254,7 @@ def _extract_mms(path: Path) -> Iterator[Res[MMS]]:
continue
addresses.append((user_address, int(user_type)))
content: List[MMSContentPart] = []
content: list[MMSContentPart] = []
for part_root in mxml.findall('parts'):
@ -267,8 +271,8 @@ def _extract_mms(path: Path) -> Iterator[Res[MMS]]:
#
# man, attrib is some internal cpython ._Attrib type which can't
# be typed by any sort of mappingproxy. maybe a protocol could work..?
part_data: Dict[str, Any] = part.attrib # type: ignore
seq: Optional[str] = part_data.get('seq')
part_data: dict[str, Any] = part.attrib # type: ignore
seq: str | None = part_data.get('seq')
if seq == '-1':
continue
@ -276,13 +280,13 @@ def _extract_mms(path: Path) -> Iterator[Res[MMS]]:
yield RuntimeError(f'seq must be a number, was seq={seq} {type(seq)} in {part_data}')
continue
charset_type: Optional[str] = _resolve_null_str(part_data.get('ct'))
filename: Optional[str] = _resolve_null_str(part_data.get('name'))
charset_type: str | None = _resolve_null_str(part_data.get('ct'))
filename: str | None = _resolve_null_str(part_data.get('name'))
# in some cases (images, cards), the filename is set in 'cl' instead
if filename is None:
filename = _resolve_null_str(part_data.get('cl'))
text: Optional[str] = _resolve_null_str(part_data.get('text'))
data: Optional[str] = _resolve_null_str(part_data.get('data'))
text: str | None = _resolve_null_str(part_data.get('text'))
data: str | None = _resolve_null_str(part_data.get('data'))
if charset_type is None or filename is None or (text is None and data is None):
yield RuntimeError(f'Missing one or more required attributes [ct, name, (text, data)] must be present in {part_data}')

View file

@ -6,8 +6,11 @@ Stackexchange data (uses [[https://stackoverflow.com/legal/gdpr/request][officia
### config
from dataclasses import dataclass
from my.config import stackexchange as user_config
from my.core import PathIsh, make_config, get_files, Json
from my.core import Json, PathIsh, get_files, make_config
@dataclass
class stackexchange(user_config):
gdpr_path: PathIsh # path to GDPR zip file
@ -17,9 +20,13 @@ config = make_config(stackexchange)
# TODO just merge all of them and then filter?.. not sure
from my.core.compat import fromisoformat
from typing import NamedTuple, Iterable
from collections.abc import Iterable
from datetime import datetime
from typing import NamedTuple
from my.core.compat import fromisoformat
class Vote(NamedTuple):
j: Json
# todo ip?
@ -62,7 +69,10 @@ class Vote(NamedTuple):
# todo expose vote type?
import json
from ..core.error import Res
def votes() -> Iterable[Res[Vote]]:
# TODO there is also some site specific stuff in qa/ directory.. not sure if its' more detailed
# todo should be defensive? not sure if present when user has no votes
@ -74,6 +84,8 @@ def votes() -> Iterable[Res[Vote]]:
yield Vote(r)
from ..core import stat, Stats
from ..core import Stats, stat
def stats() -> Stats:
return stat(votes)

View file

@ -16,7 +16,8 @@ from my.core import (
make_config,
stat,
)
import my.config
import my.config # isort: skip
@dataclass

View file

@ -1,24 +1,26 @@
'''
[[https://play.google.com/store/apps/details?id=com.waterbear.taglog][Taplog]] app data
'''
from datetime import datetime
from typing import NamedTuple, Dict, Optional, Iterable
from __future__ import annotations
from my.core import get_files, stat, Stats
from my.core.sqlite import sqlite_connection
from collections.abc import Iterable
from datetime import datetime
from typing import NamedTuple
from my.config import taplog as user_config
from my.core import Stats, get_files, stat
from my.core.sqlite import sqlite_connection
class Entry(NamedTuple):
row: Dict
row: dict
@property
def id(self) -> str:
return str(self.row['_id'])
@property
def number(self) -> Optional[float]:
def number(self) -> float | None:
ns = self.row['number']
# TODO ??
if isinstance(ns, str):

Some files were not shown because too many files have changed in this diff Show more