switch from using dataset to raw sqlite3 module
dataset is kinda unmaintaned and currently broken due to sqlalchemy 2.0 changes resolves https://github.com/karlicoss/HPI/issues/264
This commit is contained in:
parent
1dcb40f840
commit
7c7457ab08
8 changed files with 123 additions and 103 deletions
|
@ -4,31 +4,32 @@ Twitter data from Talon app database (in =/data/data/com.klinker.android.twitter
|
|||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timezone
|
||||
import re
|
||||
from typing import Iterator, Sequence, Optional, Dict
|
||||
import sqlite3
|
||||
from typing import Iterator, Sequence, Union
|
||||
|
||||
import pytz
|
||||
from more_itertools import unique_everseen
|
||||
|
||||
from my.core import Paths, Res, datetime_aware, get_files
|
||||
from my.core.sqlite import sqlite_connection
|
||||
|
||||
from .common import TweetId, permalink
|
||||
|
||||
from my.config import twitter as user_config
|
||||
|
||||
|
||||
from ..core import Paths, Res, datetime_aware
|
||||
@dataclass
|
||||
class config(user_config.talon):
|
||||
# paths[s]/glob to the exported sqlite databases
|
||||
export_path: Paths
|
||||
|
||||
|
||||
from ..core import get_files
|
||||
from pathlib import Path
|
||||
def inputs() -> Sequence[Path]:
|
||||
return get_files(config.export_path)
|
||||
|
||||
|
||||
from .common import TweetId, permalink
|
||||
|
||||
|
||||
@dataclass(unsafe_hash=True)
|
||||
class Tweet:
|
||||
id_str: TweetId
|
||||
|
@ -51,8 +52,6 @@ class _IsFavorire:
|
|||
tweet: Tweet
|
||||
|
||||
|
||||
from typing import Union
|
||||
from ..core.dataset import connect_readonly
|
||||
Entity = Union[_IsTweet, _IsFavorire]
|
||||
def _entities() -> Iterator[Res[Entity]]:
|
||||
for f in inputs():
|
||||
|
@ -67,35 +66,36 @@ def _process_one(f: Path) -> Iterator[Res[Entity]]:
|
|||
fname = f.name
|
||||
handler = handlers.get(fname)
|
||||
if handler is None:
|
||||
yield RuntimeError(f"Coulnd't find handler for {fname}")
|
||||
yield RuntimeError(f"Could not find handler for {fname}")
|
||||
return
|
||||
with connect_readonly(f) as db:
|
||||
with sqlite_connection(f, immutable=True, row_factory='row') as db:
|
||||
yield from handler(db)
|
||||
|
||||
|
||||
def _process_user_tweets(db) -> Iterator[Res[Entity]]:
|
||||
def _process_user_tweets(db: sqlite3.Connection) -> Iterator[Res[Entity]]:
|
||||
# dunno why it's called 'lists'
|
||||
for r in db['lists'].all(order_by='time'):
|
||||
for r in db.execute('SELECT * FROM lists ORDER BY time'):
|
||||
try:
|
||||
yield _IsTweet(_parse_tweet(r))
|
||||
except Exception as e:
|
||||
yield e
|
||||
|
||||
|
||||
def _process_favorite_tweets(db) -> Iterator[Res[Entity]]:
|
||||
for r in db['favorite_tweets'].all(order_by='time'):
|
||||
def _process_favorite_tweets(db: sqlite3.Connection) -> Iterator[Res[Entity]]:
|
||||
for r in db.execute('SELECT * FROM favorite_tweets ORDER BY time'):
|
||||
try:
|
||||
yield _IsFavorire(_parse_tweet(r))
|
||||
except Exception as e:
|
||||
yield e
|
||||
|
||||
def _parse_tweet(row) -> Tweet:
|
||||
|
||||
def _parse_tweet(row: sqlite3.Row) -> Tweet:
|
||||
# ok so looks like it's tz aware..
|
||||
# https://github.com/klinker24/talon-for-twitter-android/blob/c3b0612717ba3ea93c0cae6d907d7d86d640069e/app/src/main/java/com/klinker/android/twitter_l/data/sq_lite/FavoriteTweetsDataSource.java#L95
|
||||
# uses https://docs.oracle.com/javase/7/docs/api/java/util/Date.html#getTime()
|
||||
# and it's created here, so looks like it's properly parsed from the api
|
||||
# https://github.com/Twitter4J/Twitter4J/blob/8376fade8d557896bb9319fb46e39a55b134b166/twitter4j-core/src/internal-json/java/twitter4j/ParseUtil.java#L69-L79
|
||||
created_at = datetime.fromtimestamp(row['time'] / 1000, tz=pytz.utc)
|
||||
created_at = datetime.fromtimestamp(row['time'] / 1000, tz=timezone.utc)
|
||||
text = row['text']
|
||||
|
||||
# try explanding URLs.. sadly there are no positions in the db
|
||||
|
@ -132,7 +132,6 @@ def _parse_tweet(row) -> Tweet:
|
|||
)
|
||||
|
||||
|
||||
from more_itertools import unique_everseen
|
||||
def tweets() -> Iterator[Res[Tweet]]:
|
||||
for x in unique_everseen(_entities()):
|
||||
if isinstance(x, Exception):
|
||||
|
@ -140,6 +139,7 @@ def tweets() -> Iterator[Res[Tweet]]:
|
|||
elif isinstance(x, _IsTweet):
|
||||
yield x.tweet
|
||||
|
||||
|
||||
def likes() -> Iterator[Res[Tweet]]:
|
||||
for x in unique_everseen(_entities()):
|
||||
if isinstance(x, Exception):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue