doc: spelling fixes
This commit is contained in:
parent
ad55c5c345
commit
a7f05c2cad
22 changed files with 27 additions and 27 deletions
|
@ -20,7 +20,7 @@ General/my.core changes:
|
||||||
- e81dddddf083ffd81aa7e2b715bd34f59949479c properly resolve class properties in make_config + add test
|
- e81dddddf083ffd81aa7e2b715bd34f59949479c properly resolve class properties in make_config + add test
|
||||||
|
|
||||||
Modules:
|
Modules:
|
||||||
- some innitial work on filling **InfluxDB** with HPI data
|
- some initial work on filling **InfluxDB** with HPI data
|
||||||
|
|
||||||
- pinboard
|
- pinboard
|
||||||
- 42399f6250d9901d93dcedcfe05f7857babcf834: **breaking backwards compatibility**, use pinbexport module directly
|
- 42399f6250d9901d93dcedcfe05f7857babcf834: **breaking backwards compatibility**, use pinbexport module directly
|
||||||
|
|
|
@ -10,7 +10,7 @@ Relevant discussion about overlays: https://github.com/karlicoss/HPI/issues/102
|
||||||
|
|
||||||
# You can see them TODO in overlays dir
|
# You can see them TODO in overlays dir
|
||||||
|
|
||||||
Consider a toy package/module structure with minimal code, wihout any actual data parsing, just for demonstration purposes.
|
Consider a toy package/module structure with minimal code, without any actual data parsing, just for demonstration purposes.
|
||||||
|
|
||||||
- =main= package structure
|
- =main= package structure
|
||||||
# TODO do links
|
# TODO do links
|
||||||
|
@ -19,7 +19,7 @@ Consider a toy package/module structure with minimal code, wihout any actual dat
|
||||||
Extracts Twitter data from GDPR archive.
|
Extracts Twitter data from GDPR archive.
|
||||||
- =my/twitter/all.py=
|
- =my/twitter/all.py=
|
||||||
Merges twitter data from multiple sources (only =gdpr= in this case), so data consumers are agnostic of specific data sources used.
|
Merges twitter data from multiple sources (only =gdpr= in this case), so data consumers are agnostic of specific data sources used.
|
||||||
This will be overriden by =overlay=.
|
This will be overridden by =overlay=.
|
||||||
- =my/twitter/common.py=
|
- =my/twitter/common.py=
|
||||||
Contains helper function to merge data, so they can be reused by overlay's =all.py=.
|
Contains helper function to merge data, so they can be reused by overlay's =all.py=.
|
||||||
- =my/reddit.py=
|
- =my/reddit.py=
|
||||||
|
@ -126,7 +126,7 @@ https://github.com/python/mypy/blob/1dd8e7fe654991b01bd80ef7f1f675d9e3910c3a/myp
|
||||||
|
|
||||||
For now, I opened an issue in mypy repository https://github.com/python/mypy/issues/16683
|
For now, I opened an issue in mypy repository https://github.com/python/mypy/issues/16683
|
||||||
|
|
||||||
But ok, maybe mypy treats =main= as an external package somhow but still type checks it properly?
|
But ok, maybe mypy treats =main= as an external package somehow but still type checks it properly?
|
||||||
Let's see what's going on with imports:
|
Let's see what's going on with imports:
|
||||||
|
|
||||||
: $ mypy --namespace-packages --strict -p my --follow-imports=error
|
: $ mypy --namespace-packages --strict -p my --follow-imports=error
|
||||||
|
|
|
@ -97,7 +97,7 @@ By default, this just returns the items in the order they were returned by the f
|
||||||
hpi query my.coding.commits.commits --order-key committed_dt --limit 1 --reverse --output pprint --stream
|
hpi query my.coding.commits.commits --order-key committed_dt --limit 1 --reverse --output pprint --stream
|
||||||
Commit(committed_dt=datetime.datetime(2023, 4, 14, 23, 9, 1, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=61200))),
|
Commit(committed_dt=datetime.datetime(2023, 4, 14, 23, 9, 1, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=61200))),
|
||||||
authored_dt=datetime.datetime(2023, 4, 14, 23, 4, 1, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=61200))),
|
authored_dt=datetime.datetime(2023, 4, 14, 23, 4, 1, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=61200))),
|
||||||
message='sources.smscalls: propogate errors if there are breaking '
|
message='sources.smscalls: propagate errors if there are breaking '
|
||||||
'schema changes',
|
'schema changes',
|
||||||
repo='/home/username/Repos/promnesia-fork',
|
repo='/home/username/Repos/promnesia-fork',
|
||||||
sha='22a434fca9a28df9b0915ccf16368df129d2c9ce',
|
sha='22a434fca9a28df9b0915ccf16368df129d2c9ce',
|
||||||
|
|
|
@ -136,7 +136,7 @@ if TYPE_CHECKING:
|
||||||
CC = Callable[P, R] # need to give it a name, if inlined into bound=, mypy runs in a bug
|
CC = Callable[P, R] # need to give it a name, if inlined into bound=, mypy runs in a bug
|
||||||
PathProvider = Union[PathIsh, Callable[P, PathIsh]]
|
PathProvider = Union[PathIsh, Callable[P, PathIsh]]
|
||||||
# NOTE: in cachew, HashFunction type returns str
|
# NOTE: in cachew, HashFunction type returns str
|
||||||
# however in practice, cachew alwasy calls str for its result
|
# however in practice, cachew always calls str for its result
|
||||||
# so perhaps better to switch it to Any in cachew as well
|
# so perhaps better to switch it to Any in cachew as well
|
||||||
HashFunction = Callable[P, Any]
|
HashFunction = Callable[P, Any]
|
||||||
|
|
||||||
|
|
|
@ -236,7 +236,7 @@ def test_zoom() -> None:
|
||||||
# - very flexible, easy to adjust behaviour
|
# - very flexible, easy to adjust behaviour
|
||||||
# - cons:
|
# - cons:
|
||||||
# - can forget to assert about extra entities etc, so error prone
|
# - can forget to assert about extra entities etc, so error prone
|
||||||
# - if we do something like =assert j.pop('status') == 200, j=, by the time assert happens we already popped item -- makes erro handling harder
|
# - if we do something like =assert j.pop('status') == 200, j=, by the time assert happens we already popped item -- makes error handling harder
|
||||||
# - a bit verbose.. so probably requires some helper functions though (could be much leaner than current konsume though)
|
# - a bit verbose.. so probably requires some helper functions though (could be much leaner than current konsume though)
|
||||||
# - if we assert, then terminates parsing too early, if we're defensive then inflates the code a lot with if statements
|
# - if we assert, then terminates parsing too early, if we're defensive then inflates the code a lot with if statements
|
||||||
# - TODO perhaps combine warnings somehow or at least only emit once per module?
|
# - TODO perhaps combine warnings somehow or at least only emit once per module?
|
||||||
|
|
|
@ -250,7 +250,7 @@ if __name__ == '__main__':
|
||||||
test()
|
test()
|
||||||
|
|
||||||
|
|
||||||
## legacy/deprecated methods for backwards compatilibity
|
## legacy/deprecated methods for backwards compatibility
|
||||||
if not TYPE_CHECKING:
|
if not TYPE_CHECKING:
|
||||||
from .compat import deprecated
|
from .compat import deprecated
|
||||||
|
|
||||||
|
|
|
@ -12,7 +12,7 @@ def _init_default_config() -> None:
|
||||||
|
|
||||||
def test_tmp_config() -> None:
|
def test_tmp_config() -> None:
|
||||||
## ugh. ideally this would be on the top level (would be a better test)
|
## ugh. ideally this would be on the top level (would be a better test)
|
||||||
## but pytest imports eveything first, executes hooks, and some reset_modules() fictures mess stuff up
|
## but pytest imports everything first, executes hooks, and some reset_modules() fictures mess stuff up
|
||||||
## later would be nice to be a bit more careful about them
|
## later would be nice to be a bit more careful about them
|
||||||
_init_default_config()
|
_init_default_config()
|
||||||
from my.simple import items
|
from my.simple import items
|
||||||
|
|
|
@ -321,7 +321,7 @@ _UET = TypeVar('_UET')
|
||||||
_UEU = TypeVar('_UEU')
|
_UEU = TypeVar('_UEU')
|
||||||
|
|
||||||
|
|
||||||
# NOTE: for historic reasons, this function had to accept Callable that retuns iterator
|
# NOTE: for historic reasons, this function had to accept Callable that returns iterator
|
||||||
# instead of just iterator
|
# instead of just iterator
|
||||||
# TODO maybe deprecated Callable support? not sure
|
# TODO maybe deprecated Callable support? not sure
|
||||||
def unique_everseen(
|
def unique_everseen(
|
||||||
|
@ -358,7 +358,7 @@ def test_unique_everseen() -> None:
|
||||||
assert list(unique_everseen(fun_good)) == [123]
|
assert list(unique_everseen(fun_good)) == [123]
|
||||||
|
|
||||||
with pytest.raises(Exception):
|
with pytest.raises(Exception):
|
||||||
# since function retuns a list rather than iterator, check happens immediately
|
# since function returns a list rather than iterator, check happens immediately
|
||||||
# , even without advancing the iterator
|
# , even without advancing the iterator
|
||||||
unique_everseen(fun_bad)
|
unique_everseen(fun_bad)
|
||||||
|
|
||||||
|
|
|
@ -9,7 +9,7 @@ since that allows for easier overriding using namespace packages
|
||||||
See https://github.com/karlicoss/HPI/blob/master/doc/MODULE_DESIGN.org#allpy for more info.
|
See https://github.com/karlicoss/HPI/blob/master/doc/MODULE_DESIGN.org#allpy for more info.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# prevent it from apprearing in modules list/doctor
|
# prevent it from appearing in modules list/doctor
|
||||||
from ..core import __NOT_HPI_MODULE__
|
from ..core import __NOT_HPI_MODULE__
|
||||||
|
|
||||||
# kinda annoying to keep it, but it's so legacy 'hpi module install my.fbmessenger' works
|
# kinda annoying to keep it, but it's so legacy 'hpi module install my.fbmessenger' works
|
||||||
|
|
|
@ -174,7 +174,7 @@ def _process_db_msys(db: sqlite3.Connection) -> Iterator[Res[Entity]]:
|
||||||
However seems that when message is not sent yet it doesn't have this server id yet
|
However seems that when message is not sent yet it doesn't have this server id yet
|
||||||
(happened only once, but could be just luck of course!)
|
(happened only once, but could be just luck of course!)
|
||||||
We exclude these messages to avoid duplication.
|
We exclude these messages to avoid duplication.
|
||||||
However poisitive filter (e.g. message_id LIKE 'mid%') feels a bit wrong, e.g. what if mesage ids change or something
|
However poisitive filter (e.g. message_id LIKE 'mid%') feels a bit wrong, e.g. what if message ids change or something
|
||||||
So instead this excludes only such unsent messages.
|
So instead this excludes only such unsent messages.
|
||||||
*/
|
*/
|
||||||
message_id != offline_threading_id
|
message_id != offline_threading_id
|
||||||
|
|
|
@ -23,7 +23,7 @@ def messages() -> Iterator[Res[Message]]:
|
||||||
# TODO in general best to prefer android, it has more data
|
# TODO in general best to prefer android, it has more data
|
||||||
# - message ids
|
# - message ids
|
||||||
# - usernames are correct for Android data
|
# - usernames are correct for Android data
|
||||||
# - thread ids more meaninful?
|
# - thread ids more meaningful?
|
||||||
# but for now prefer gdpr prefix since it makes a bit things a bit more consistent?
|
# but for now prefer gdpr prefix since it makes a bit things a bit more consistent?
|
||||||
# e.g. a new batch of android exports can throw off ids if we rely on it for mapping
|
# e.g. a new batch of android exports can throw off ids if we rely on it for mapping
|
||||||
yield from _merge_messages(
|
yield from _merge_messages(
|
||||||
|
|
|
@ -76,7 +76,7 @@ def _entities() -> Iterator[Res[User | _Message]]:
|
||||||
# NOTE: here there are basically two options
|
# NOTE: here there are basically two options
|
||||||
# - process inputs as is (from oldest to newest)
|
# - process inputs as is (from oldest to newest)
|
||||||
# this would be more stable wrt newer exports (e.g. existing thread ids won't change)
|
# this would be more stable wrt newer exports (e.g. existing thread ids won't change)
|
||||||
# the downside is that newer exports seem to have better thread ids, so might be preferrable to use them
|
# the downside is that newer exports seem to have better thread ids, so might be preferable to use them
|
||||||
# - process inputs reversed (from newest to oldest)
|
# - process inputs reversed (from newest to oldest)
|
||||||
# the upside is that thread ids/usernames might be better
|
# the upside is that thread ids/usernames might be better
|
||||||
# the downside is that if for example the user renames, thread ids will change _a lot_, might be undesirable..
|
# the downside is that if for example the user renames, thread ids will change _a lot_, might be undesirable..
|
||||||
|
@ -137,7 +137,7 @@ def _entitites_from_path(path: Path) -> Iterator[Res[User | _Message]]:
|
||||||
j = json.loads(ffile.read_text())
|
j = json.loads(ffile.read_text())
|
||||||
|
|
||||||
id_len = 10
|
id_len = 10
|
||||||
# NOTE: I'm not actually sure it's other user's id.., since it corresponds to the whole converstation
|
# NOTE: I'm not actually sure it's other user's id.., since it corresponds to the whole conversation
|
||||||
# but I stared a bit at these ids vs database ids and can't see any way to find the correspondence :(
|
# but I stared a bit at these ids vs database ids and can't see any way to find the correspondence :(
|
||||||
# so basically the only way to merge is to actually try some magic and correlate timestamps/message texts?
|
# so basically the only way to merge is to actually try some magic and correlate timestamps/message texts?
|
||||||
# another option is perhaps to query user id from username with some free API
|
# another option is perhaps to query user id from username with some free API
|
||||||
|
|
|
@ -9,7 +9,7 @@ since that allows for easier overriding using namespace packages
|
||||||
See https://github.com/karlicoss/HPI/blob/master/doc/MODULE_DESIGN.org#allpy for more info.
|
See https://github.com/karlicoss/HPI/blob/master/doc/MODULE_DESIGN.org#allpy for more info.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# prevent it from apprearing in modules list/doctor
|
# prevent it from appearing in modules list/doctor
|
||||||
from ..core import __NOT_HPI_MODULE__
|
from ..core import __NOT_HPI_MODULE__
|
||||||
|
|
||||||
# kinda annoying to keep it, but it's so legacy 'hpi module install my.reddit' works
|
# kinda annoying to keep it, but it's so legacy 'hpi module install my.reddit' works
|
||||||
|
|
|
@ -186,7 +186,7 @@ class MMS(NamedTuple):
|
||||||
for (addr, _type) in self.addresses:
|
for (addr, _type) in self.addresses:
|
||||||
if _type == 137:
|
if _type == 137:
|
||||||
return addr
|
return addr
|
||||||
# hmm, maybe return instead? but this probably shouldnt happen, means
|
# hmm, maybe return instead? but this probably shouldn't happen, means
|
||||||
# something is very broken
|
# something is very broken
|
||||||
raise RuntimeError(f'No from address matching 137 found in {self.addresses}')
|
raise RuntimeError(f'No from address matching 137 found in {self.addresses}')
|
||||||
|
|
||||||
|
@ -214,7 +214,7 @@ def mms() -> Iterator[Res[MMS]]:
|
||||||
def _resolve_null_str(value: str | None) -> str | None:
|
def _resolve_null_str(value: str | None) -> str | None:
|
||||||
if value is None:
|
if value is None:
|
||||||
return None
|
return None
|
||||||
# hmm.. theres some risk of the text actually being 'null', but theres
|
# hmm.. there's some risk of the text actually being 'null', but there's
|
||||||
# no way to distinguish that from XML values
|
# no way to distinguish that from XML values
|
||||||
if value == 'null':
|
if value == 'null':
|
||||||
return None
|
return None
|
||||||
|
|
|
@ -49,7 +49,7 @@ class Vote(NamedTuple):
|
||||||
# hmm, this loads very raw comments without the rest of the page?
|
# hmm, this loads very raw comments without the rest of the page?
|
||||||
# - https://meta.stackexchange.com/posts/27319/comments#comment-57475
|
# - https://meta.stackexchange.com/posts/27319/comments#comment-57475
|
||||||
#
|
#
|
||||||
# parentPostId is the original quesion
|
# parentPostId is the original question
|
||||||
# TODO is not always present? fucking hell
|
# TODO is not always present? fucking hell
|
||||||
# seems like there is no way to get a hierarchical comment link.. guess this needs to be handled in Promnesia normalisation...
|
# seems like there is no way to get a hierarchical comment link.. guess this needs to be handled in Promnesia normalisation...
|
||||||
# postId is the answer
|
# postId is the answer
|
||||||
|
|
|
@ -245,7 +245,7 @@ def _iter_tzs() -> Iterator[DayWithZone]:
|
||||||
def _day2zone() -> dict[date, pytz.BaseTzInfo]:
|
def _day2zone() -> dict[date, pytz.BaseTzInfo]:
|
||||||
# NOTE: kinda unfortunate that this will have to process all days before returning result for just one
|
# NOTE: kinda unfortunate that this will have to process all days before returning result for just one
|
||||||
# however otherwise cachew cache might never be initialized properly
|
# however otherwise cachew cache might never be initialized properly
|
||||||
# so we'll always end up recomputing everyting during subsequent runs
|
# so we'll always end up recomputing everything during subsequent runs
|
||||||
return {dz.day: pytz.timezone(dz.zone) for dz in _iter_tzs()}
|
return {dz.day: pytz.timezone(dz.zone) for dz in _iter_tzs()}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -106,7 +106,7 @@ def _handle_db(db: sqlite3.Connection) -> Iterator[Res[_Entity]]:
|
||||||
user_profile_rows = list(db.execute('SELECT * FROM profile_user_view'))
|
user_profile_rows = list(db.execute('SELECT * FROM profile_user_view'))
|
||||||
|
|
||||||
if len(user_profile_rows) == 0:
|
if len(user_profile_rows) == 0:
|
||||||
# shit, sometime in 2023 profile_user_view stoppped containing user profile..
|
# shit, sometime in 2023 profile_user_view stopped containing user profile..
|
||||||
# presumably the most common from_id/to_id would be our own username
|
# presumably the most common from_id/to_id would be our own username
|
||||||
counter = Counter([id_ for (id_,) in db.execute('SELECT from_id FROM message UNION ALL SELECT to_id FROM message')])
|
counter = Counter([id_ for (id_,) in db.execute('SELECT from_id FROM message UNION ALL SELECT to_id FROM message')])
|
||||||
if len(counter) > 0: # this might happen if db is empty (e.g. user got logged out)
|
if len(counter) > 0: # this might happen if db is empty (e.g. user got logged out)
|
||||||
|
|
|
@ -81,7 +81,7 @@ def _parse_one(p: Path) -> Iterator[Res[Competition]]:
|
||||||
# but also expects cooperation from .make method (e.g. popping items from the dict)
|
# but also expects cooperation from .make method (e.g. popping items from the dict)
|
||||||
# could also wrap in helper and pass to .make .. not sure
|
# could also wrap in helper and pass to .make .. not sure
|
||||||
# an argument could be made that .make isn't really a class methond..
|
# an argument could be made that .make isn't really a class methond..
|
||||||
# it's pretty specific to this parser onl
|
# it's pretty specific to this parser only
|
||||||
yield from Competition.make(j=c)
|
yield from Competition.make(j=c)
|
||||||
|
|
||||||
yield from m.check()
|
yield from m.check()
|
||||||
|
|
|
@ -192,7 +192,7 @@ def get_own_user_id(conn) -> str:
|
||||||
# - timeline_data_type
|
# - timeline_data_type
|
||||||
# 1 : the bulk of tweets, but also some notifications etc??
|
# 1 : the bulk of tweets, but also some notifications etc??
|
||||||
# 2 : who-to-follow/community-to-join. contains a couple of tweets, but their corresponding status_id is NULL
|
# 2 : who-to-follow/community-to-join. contains a couple of tweets, but their corresponding status_id is NULL
|
||||||
# 8 : who-to-follow/notfication
|
# 8 : who-to-follow/notification
|
||||||
# 13: semantic-core/who-to-follow
|
# 13: semantic-core/who-to-follow
|
||||||
# 14: cursor
|
# 14: cursor
|
||||||
# 17: trends
|
# 17: trends
|
||||||
|
|
|
@ -54,7 +54,7 @@ class Tweet(NamedTuple):
|
||||||
# https://github.com/thomasancheriyil/Red-Tide-Detection-based-on-Twitter/blob/beb200be60cc66dcbc394e670513715509837812/python/twitterGapParse.py#L61-L62
|
# https://github.com/thomasancheriyil/Red-Tide-Detection-based-on-Twitter/blob/beb200be60cc66dcbc394e670513715509837812/python/twitterGapParse.py#L61-L62
|
||||||
#
|
#
|
||||||
# twint is also saving 'timezone', but this is local machine timezone at the time of scraping?
|
# twint is also saving 'timezone', but this is local machine timezone at the time of scraping?
|
||||||
# perhaps they thought date-time-ms was local time... or just kept it just in case (they are keepin lots on unnecessary stuff in the db)
|
# perhaps they thought date-time-ms was local time... or just kept it just in case (they are keeping lots on unnecessary stuff in the db)
|
||||||
return datetime.fromtimestamp(seconds, tz=tz)
|
return datetime.fromtimestamp(seconds, tz=tz)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
|
|
@ -199,7 +199,7 @@ def _process_db(db: sqlite3.Connection) -> Iterator[Entity]:
|
||||||
sender_row_id = r['sender_jid_row_id']
|
sender_row_id = r['sender_jid_row_id']
|
||||||
if sender_row_id == 0:
|
if sender_row_id == 0:
|
||||||
# seems that it's always 0 for 1-1 chats
|
# seems that it's always 0 for 1-1 chats
|
||||||
# for group chats our onw id is still 0, but other ids are properly set
|
# for group chats our own id is still 0, but other ids are properly set
|
||||||
if from_me:
|
if from_me:
|
||||||
myself_user_id = config.my_user_id or 'MYSELF_USER_ID'
|
myself_user_id = config.my_user_id or 'MYSELF_USER_ID'
|
||||||
sender = Sender(id=myself_user_id, name=None) # TODO set my own name as well?
|
sender = Sender(id=myself_user_id, name=None) # TODO set my own name as well?
|
||||||
|
|
|
@ -36,7 +36,7 @@ def watched() -> Iterator[Res[Watched]]:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# older exports (e.g. html) didn't have microseconds
|
# older exports (e.g. html) didn't have microseconds
|
||||||
# wheras newer json ones do have them
|
# whereas newer json ones do have them
|
||||||
# seconds resolution is enough to distinguish watched videos
|
# seconds resolution is enough to distinguish watched videos
|
||||||
# also we're processing takeouts in HPI in reverse order, so first seen watch would contain microseconds, resulting in better data
|
# also we're processing takeouts in HPI in reverse order, so first seen watch would contain microseconds, resulting in better data
|
||||||
without_microsecond = w.when.replace(microsecond=0)
|
without_microsecond = w.when.replace(microsecond=0)
|
||||||
|
|
Loading…
Add table
Reference in a new issue