general: migrate modules to use 3.9 features

This commit is contained in:
Dima Gerasimov 2024-10-19 22:10:40 +01:00 committed by karlicoss
parent d3f9a8e8b6
commit 8496d131e7
125 changed files with 889 additions and 739 deletions

View file

@ -2,18 +2,22 @@
Google Takeout exports: browsing history, search/youtube/google play activity
'''
from enum import Enum
from __future__ import annotations
from my.core import __NOT_HPI_MODULE__ # isort: skip
import re
from pathlib import Path
from collections.abc import Iterable
from datetime import datetime
from enum import Enum
from html.parser import HTMLParser
from typing import List, Optional, Any, Callable, Iterable, Tuple
from pathlib import Path
from typing import Any, Callable
from urllib.parse import unquote
import pytz
from ...core.time import abbr_to_timezone
from my.core.time import abbr_to_timezone
# NOTE: https://bugs.python.org/issue22377 %Z doesn't work properly
_TIME_FORMATS = [
@ -36,7 +40,7 @@ def parse_dt(s: str) -> datetime:
s, tzabbr = s.rsplit(maxsplit=1)
tz = abbr_to_timezone(tzabbr)
dt: Optional[datetime] = None
dt: datetime | None = None
for fmt in _TIME_FORMATS:
try:
dt = datetime.strptime(s, fmt)
@ -73,7 +77,7 @@ class State(Enum):
Url = str
Title = str
Parsed = Tuple[datetime, Url, Title]
Parsed = tuple[datetime, Url, Title]
Callback = Callable[[datetime, Url, Title], None]
@ -83,9 +87,9 @@ class TakeoutHTMLParser(HTMLParser):
super().__init__()
self.state: State = State.OUTSIDE
self.title_parts: List[str] = []
self.title: Optional[str] = None
self.url: Optional[str] = None
self.title_parts: list[str] = []
self.title: str | None = None
self.url: str | None = None
self.callback = callback
@ -148,7 +152,7 @@ class TakeoutHTMLParser(HTMLParser):
def read_html(tpath: Path, file: str) -> Iterable[Parsed]:
results: List[Parsed] = []
results: list[Parsed] = []
def cb(dt: datetime, url: Url, title: Title) -> None:
results.append((dt, url, title))
parser = TakeoutHTMLParser(callback=cb)
@ -156,5 +160,3 @@ def read_html(tpath: Path, file: str) -> Iterable[Parsed]:
data = fo.read()
parser.feed(data)
return results
from ...core import __NOT_HPI_MODULE__

View file

@ -14,24 +14,27 @@ the cachew cache
REQUIRES = ["git+https://github.com/seanbreckenridge/google_takeout_parser"]
import os
from collections.abc import Sequence
from contextlib import ExitStack
from dataclasses import dataclass
import os
from typing import List, Sequence, cast
from pathlib import Path
from my.core import make_config, stat, Stats, get_files, Paths, make_logger
from typing import cast
from google_takeout_parser.parse_html.html_time_utils import ABBR_TIMEZONES
from my.core import Paths, Stats, get_files, make_config, make_logger, stat
from my.core.cachew import mcachew
from my.core.error import ErrorPolicy
from my.core.structure import match_structure
from my.core.time import user_forced
from google_takeout_parser.parse_html.html_time_utils import ABBR_TIMEZONES
ABBR_TIMEZONES.extend(user_forced())
import google_takeout_parser
from google_takeout_parser.path_dispatch import TakeoutParser
from google_takeout_parser.merge import GoogleEventSet, CacheResults
from google_takeout_parser.merge import CacheResults, GoogleEventSet
from google_takeout_parser.models import BaseEvent
from google_takeout_parser.path_dispatch import TakeoutParser
# see https://github.com/seanbreckenridge/dotfiles/blob/master/.config/my/my/config/__init__.py for an example
from my.config import google as user_config
@ -56,6 +59,7 @@ logger = make_logger(__name__, level="warning")
# patch the takeout parser logger to match the computed loglevel
from google_takeout_parser.log import setup as setup_takeout_logger
setup_takeout_logger(logger.level)
@ -83,7 +87,7 @@ except ImportError:
google_takeout_version = str(getattr(google_takeout_parser, '__version__', 'unknown'))
def _cachew_depends_on() -> List[str]:
def _cachew_depends_on() -> list[str]:
exports = sorted([str(p) for p in inputs()])
# add google takeout parser pip version to hash, so this re-creates on breaking changes
exports.insert(0, f"google_takeout_version: {google_takeout_version}")

View file

@ -2,13 +2,17 @@
Module for locating and accessing [[https://takeout.google.com][Google Takeout]] data
'''
from __future__ import annotations
from my.core import __NOT_HPI_MODULE__ # isort: skip
from abc import abstractmethod
from collections.abc import Iterable
from pathlib import Path
from typing import Iterable, Optional, Protocol
from more_itertools import last
from my.core import __NOT_HPI_MODULE__, Paths, get_files
from my.core import Paths, get_files
class config:
@ -33,7 +37,7 @@ def make_config() -> config:
return combined_config()
def get_takeouts(*, path: Optional[str] = None) -> Iterable[Path]:
def get_takeouts(*, path: str | None = None) -> Iterable[Path]:
"""
Sometimes google splits takeout into multiple archives, so we need to detect the ones that contain the path we need
"""
@ -45,7 +49,7 @@ def get_takeouts(*, path: Optional[str] = None) -> Iterable[Path]:
yield takeout
def get_last_takeout(*, path: Optional[str] = None) -> Optional[Path]:
def get_last_takeout(*, path: str | None = None) -> Path | None:
return last(get_takeouts(path=path), default=None)