switch from using dataset to raw sqlite3 module
dataset is kinda unmaintaned and currently broken due to sqlalchemy 2.0 changes resolves https://github.com/karlicoss/HPI/issues/264
This commit is contained in:
parent
1dcb40f840
commit
7c7457ab08
8 changed files with 123 additions and 103 deletions
|
@ -1,12 +1,16 @@
|
|||
"""
|
||||
Twitter data (tweets and favorites). Uses [[https://github.com/twintproject/twint][Twint]] data export.
|
||||
"""
|
||||
|
||||
REQUIRES = ['dataset']
|
||||
|
||||
from ..core.common import Paths
|
||||
from ..core.error import Res
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import NamedTuple, Iterator, List
|
||||
|
||||
|
||||
from my.core import Paths, Res, get_files, LazyLogger, Json, datetime_aware, stat, Stats
|
||||
from my.core.cfg import make_config
|
||||
from my.core.sqlite import sqlite_connection
|
||||
|
||||
from my.config import twint as user_config
|
||||
|
||||
# TODO move to twitter.twint config structure
|
||||
|
@ -17,16 +21,9 @@ class twint(user_config):
|
|||
|
||||
####
|
||||
|
||||
from ..core.cfg import make_config
|
||||
config = make_config(twint)
|
||||
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from typing import NamedTuple, Iterator, List
|
||||
from pathlib import Path
|
||||
|
||||
from ..core.common import get_files, LazyLogger, Json, datetime_aware
|
||||
|
||||
log = LazyLogger(__name__)
|
||||
|
||||
|
||||
|
@ -110,25 +107,19 @@ WHERE {where}
|
|||
ORDER BY T.created_at
|
||||
'''
|
||||
|
||||
def _get_db():
|
||||
from ..core.dataset import connect_readonly
|
||||
db_path = get_db_path()
|
||||
return connect_readonly(db_path)
|
||||
|
||||
|
||||
def tweets() -> Iterator[Res[Tweet]]:
|
||||
db = _get_db()
|
||||
res = db.query(_QUERY.format(where='F.tweet_id IS NULL'))
|
||||
yield from map(Tweet, res)
|
||||
with sqlite_connection(get_db_path(), immutable=True, row_factory='row') as db:
|
||||
res = db.execute(_QUERY.format(where='F.tweet_id IS NULL'))
|
||||
yield from map(Tweet, res)
|
||||
|
||||
|
||||
def likes() -> Iterator[Res[Tweet]]:
|
||||
db = _get_db()
|
||||
res = db.query(_QUERY.format(where='F.tweet_id IS NOT NULL'))
|
||||
yield from map(Tweet, res)
|
||||
with sqlite_connection(get_db_path(), immutable=True, row_factory='row') as db:
|
||||
res = db.execute(_QUERY.format(where='F.tweet_id IS NOT NULL'))
|
||||
yield from map(Tweet, res)
|
||||
|
||||
|
||||
from ..core import stat, Stats
|
||||
def stats() -> Stats:
|
||||
return {
|
||||
**stat(tweets),
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue