my.google.takeout.parser: speedup event merging on newer google_takeout_parser versions
This commit is contained in:
parent
71fdeca5e1
commit
27178c0939
2 changed files with 21 additions and 10 deletions
|
@ -31,6 +31,7 @@ ABBR_TIMEZONES.extend(user_forced())
|
|||
import google_takeout_parser
|
||||
from google_takeout_parser.path_dispatch import TakeoutParser
|
||||
from google_takeout_parser.merge import GoogleEventSet, CacheResults
|
||||
from google_takeout_parser.models import BaseEvent
|
||||
|
||||
# see https://github.com/seanbreckenridge/dotfiles/blob/master/.config/my/my/config/__init__.py for an example
|
||||
from my.config import google as user_config
|
||||
|
@ -95,6 +96,17 @@ def events(disable_takeout_cache: bool = DISABLE_TAKEOUT_CACHE) -> CacheResults:
|
|||
error_policy = config.error_policy
|
||||
count = 0
|
||||
emitted = GoogleEventSet()
|
||||
|
||||
try:
|
||||
emitted_add = emitted.add_if_not_present
|
||||
except AttributeError:
|
||||
# compat for older versions of google_takeout_parser which didn't have this method
|
||||
def emitted_add(other: BaseEvent) -> bool:
|
||||
if other in emitted:
|
||||
return False
|
||||
emitted.add(other)
|
||||
return True
|
||||
|
||||
# reversed shouldn't really matter? but logic is to use newer
|
||||
# takeouts if they're named according to date, since JSON Activity
|
||||
# is nicer than HTML Activity
|
||||
|
@ -123,10 +135,9 @@ def events(disable_takeout_cache: bool = DISABLE_TAKEOUT_CACHE) -> CacheResults:
|
|||
elif error_policy == 'drop':
|
||||
pass
|
||||
continue
|
||||
if event in emitted:
|
||||
continue
|
||||
emitted.add(event)
|
||||
yield event # type: ignore[misc]
|
||||
|
||||
if emitted_add(event):
|
||||
yield event # type: ignore[misc]
|
||||
logger.debug(
|
||||
f"HPI Takeout merge: from a total of {count} events, removed {count - len(emitted)} duplicates"
|
||||
)
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
from typing import NamedTuple, List, Iterable, TYPE_CHECKING
|
||||
|
||||
from ..core import datetime_aware, Res, LazyLogger
|
||||
from ..core.compat import removeprefix
|
||||
from my.core import datetime_aware, make_logger, stat, Res, Stats
|
||||
from my.core.compat import deprecated, removeprefix
|
||||
|
||||
|
||||
logger = LazyLogger(__name__)
|
||||
logger = make_logger(__name__)
|
||||
|
||||
|
||||
class Watched(NamedTuple):
|
||||
|
@ -93,7 +93,6 @@ def watched() -> Iterable[Res[Watched]]:
|
|||
)
|
||||
|
||||
|
||||
from ..core import stat, Stats
|
||||
def stats() -> Stats:
|
||||
return stat(watched)
|
||||
|
||||
|
@ -101,8 +100,9 @@ def stats() -> Stats:
|
|||
### deprecated stuff (keep in my.media.youtube)
|
||||
|
||||
if not TYPE_CHECKING:
|
||||
# "deprecate" by hiding from mypy
|
||||
get_watched = watched
|
||||
@deprecated("use 'watched' instead")
|
||||
def get_watched(*args, **kwargs):
|
||||
return watched(*args, **kwargs)
|
||||
|
||||
|
||||
def _watched_legacy() -> Iterable[Watched]:
|
||||
|
|
Loading…
Add table
Reference in a new issue