doc: spelling fixes

This commit is contained in:
purarue 2024-11-20 00:03:40 -08:00 committed by karlicoss
parent ad55c5c345
commit a7f05c2cad
22 changed files with 27 additions and 27 deletions

View file

@ -20,7 +20,7 @@ General/my.core changes:
- e81dddddf083ffd81aa7e2b715bd34f59949479c properly resolve class properties in make_config + add test - e81dddddf083ffd81aa7e2b715bd34f59949479c properly resolve class properties in make_config + add test
Modules: Modules:
- some innitial work on filling **InfluxDB** with HPI data - some initial work on filling **InfluxDB** with HPI data
- pinboard - pinboard
- 42399f6250d9901d93dcedcfe05f7857babcf834: **breaking backwards compatibility**, use pinbexport module directly - 42399f6250d9901d93dcedcfe05f7857babcf834: **breaking backwards compatibility**, use pinbexport module directly

View file

@ -10,7 +10,7 @@ Relevant discussion about overlays: https://github.com/karlicoss/HPI/issues/102
# You can see them TODO in overlays dir # You can see them TODO in overlays dir
Consider a toy package/module structure with minimal code, wihout any actual data parsing, just for demonstration purposes. Consider a toy package/module structure with minimal code, without any actual data parsing, just for demonstration purposes.
- =main= package structure - =main= package structure
# TODO do links # TODO do links
@ -19,7 +19,7 @@ Consider a toy package/module structure with minimal code, wihout any actual dat
Extracts Twitter data from GDPR archive. Extracts Twitter data from GDPR archive.
- =my/twitter/all.py= - =my/twitter/all.py=
Merges twitter data from multiple sources (only =gdpr= in this case), so data consumers are agnostic of specific data sources used. Merges twitter data from multiple sources (only =gdpr= in this case), so data consumers are agnostic of specific data sources used.
This will be overriden by =overlay=. This will be overridden by =overlay=.
- =my/twitter/common.py= - =my/twitter/common.py=
Contains helper function to merge data, so they can be reused by overlay's =all.py=. Contains helper function to merge data, so they can be reused by overlay's =all.py=.
- =my/reddit.py= - =my/reddit.py=
@ -126,7 +126,7 @@ https://github.com/python/mypy/blob/1dd8e7fe654991b01bd80ef7f1f675d9e3910c3a/myp
For now, I opened an issue in mypy repository https://github.com/python/mypy/issues/16683 For now, I opened an issue in mypy repository https://github.com/python/mypy/issues/16683
But ok, maybe mypy treats =main= as an external package somhow but still type checks it properly? But ok, maybe mypy treats =main= as an external package somehow but still type checks it properly?
Let's see what's going on with imports: Let's see what's going on with imports:
: $ mypy --namespace-packages --strict -p my --follow-imports=error : $ mypy --namespace-packages --strict -p my --follow-imports=error

View file

@ -97,7 +97,7 @@ By default, this just returns the items in the order they were returned by the f
hpi query my.coding.commits.commits --order-key committed_dt --limit 1 --reverse --output pprint --stream hpi query my.coding.commits.commits --order-key committed_dt --limit 1 --reverse --output pprint --stream
Commit(committed_dt=datetime.datetime(2023, 4, 14, 23, 9, 1, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=61200))), Commit(committed_dt=datetime.datetime(2023, 4, 14, 23, 9, 1, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=61200))),
authored_dt=datetime.datetime(2023, 4, 14, 23, 4, 1, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=61200))), authored_dt=datetime.datetime(2023, 4, 14, 23, 4, 1, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=61200))),
message='sources.smscalls: propogate errors if there are breaking ' message='sources.smscalls: propagate errors if there are breaking '
'schema changes', 'schema changes',
repo='/home/username/Repos/promnesia-fork', repo='/home/username/Repos/promnesia-fork',
sha='22a434fca9a28df9b0915ccf16368df129d2c9ce', sha='22a434fca9a28df9b0915ccf16368df129d2c9ce',

View file

@ -136,7 +136,7 @@ if TYPE_CHECKING:
CC = Callable[P, R] # need to give it a name, if inlined into bound=, mypy runs in a bug CC = Callable[P, R] # need to give it a name, if inlined into bound=, mypy runs in a bug
PathProvider = Union[PathIsh, Callable[P, PathIsh]] PathProvider = Union[PathIsh, Callable[P, PathIsh]]
# NOTE: in cachew, HashFunction type returns str # NOTE: in cachew, HashFunction type returns str
# however in practice, cachew alwasy calls str for its result # however in practice, cachew always calls str for its result
# so perhaps better to switch it to Any in cachew as well # so perhaps better to switch it to Any in cachew as well
HashFunction = Callable[P, Any] HashFunction = Callable[P, Any]

View file

@ -236,7 +236,7 @@ def test_zoom() -> None:
# - very flexible, easy to adjust behaviour # - very flexible, easy to adjust behaviour
# - cons: # - cons:
# - can forget to assert about extra entities etc, so error prone # - can forget to assert about extra entities etc, so error prone
# - if we do something like =assert j.pop('status') == 200, j=, by the time assert happens we already popped item -- makes erro handling harder # - if we do something like =assert j.pop('status') == 200, j=, by the time assert happens we already popped item -- makes error handling harder
# - a bit verbose.. so probably requires some helper functions though (could be much leaner than current konsume though) # - a bit verbose.. so probably requires some helper functions though (could be much leaner than current konsume though)
# - if we assert, then terminates parsing too early, if we're defensive then inflates the code a lot with if statements # - if we assert, then terminates parsing too early, if we're defensive then inflates the code a lot with if statements
# - TODO perhaps combine warnings somehow or at least only emit once per module? # - TODO perhaps combine warnings somehow or at least only emit once per module?

View file

@ -250,7 +250,7 @@ if __name__ == '__main__':
test() test()
## legacy/deprecated methods for backwards compatilibity ## legacy/deprecated methods for backwards compatibility
if not TYPE_CHECKING: if not TYPE_CHECKING:
from .compat import deprecated from .compat import deprecated

View file

@ -12,7 +12,7 @@ def _init_default_config() -> None:
def test_tmp_config() -> None: def test_tmp_config() -> None:
## ugh. ideally this would be on the top level (would be a better test) ## ugh. ideally this would be on the top level (would be a better test)
## but pytest imports eveything first, executes hooks, and some reset_modules() fictures mess stuff up ## but pytest imports everything first, executes hooks, and some reset_modules() fictures mess stuff up
## later would be nice to be a bit more careful about them ## later would be nice to be a bit more careful about them
_init_default_config() _init_default_config()
from my.simple import items from my.simple import items

View file

@ -321,7 +321,7 @@ _UET = TypeVar('_UET')
_UEU = TypeVar('_UEU') _UEU = TypeVar('_UEU')
# NOTE: for historic reasons, this function had to accept Callable that retuns iterator # NOTE: for historic reasons, this function had to accept Callable that returns iterator
# instead of just iterator # instead of just iterator
# TODO maybe deprecated Callable support? not sure # TODO maybe deprecated Callable support? not sure
def unique_everseen( def unique_everseen(
@ -358,7 +358,7 @@ def test_unique_everseen() -> None:
assert list(unique_everseen(fun_good)) == [123] assert list(unique_everseen(fun_good)) == [123]
with pytest.raises(Exception): with pytest.raises(Exception):
# since function retuns a list rather than iterator, check happens immediately # since function returns a list rather than iterator, check happens immediately
# , even without advancing the iterator # , even without advancing the iterator
unique_everseen(fun_bad) unique_everseen(fun_bad)

View file

@ -9,7 +9,7 @@ since that allows for easier overriding using namespace packages
See https://github.com/karlicoss/HPI/blob/master/doc/MODULE_DESIGN.org#allpy for more info. See https://github.com/karlicoss/HPI/blob/master/doc/MODULE_DESIGN.org#allpy for more info.
""" """
# prevent it from apprearing in modules list/doctor # prevent it from appearing in modules list/doctor
from ..core import __NOT_HPI_MODULE__ from ..core import __NOT_HPI_MODULE__
# kinda annoying to keep it, but it's so legacy 'hpi module install my.fbmessenger' works # kinda annoying to keep it, but it's so legacy 'hpi module install my.fbmessenger' works

View file

@ -174,7 +174,7 @@ def _process_db_msys(db: sqlite3.Connection) -> Iterator[Res[Entity]]:
However seems that when message is not sent yet it doesn't have this server id yet However seems that when message is not sent yet it doesn't have this server id yet
(happened only once, but could be just luck of course!) (happened only once, but could be just luck of course!)
We exclude these messages to avoid duplication. We exclude these messages to avoid duplication.
However poisitive filter (e.g. message_id LIKE 'mid%') feels a bit wrong, e.g. what if mesage ids change or something However poisitive filter (e.g. message_id LIKE 'mid%') feels a bit wrong, e.g. what if message ids change or something
So instead this excludes only such unsent messages. So instead this excludes only such unsent messages.
*/ */
message_id != offline_threading_id message_id != offline_threading_id

View file

@ -23,7 +23,7 @@ def messages() -> Iterator[Res[Message]]:
# TODO in general best to prefer android, it has more data # TODO in general best to prefer android, it has more data
# - message ids # - message ids
# - usernames are correct for Android data # - usernames are correct for Android data
# - thread ids more meaninful? # - thread ids more meaningful?
# but for now prefer gdpr prefix since it makes a bit things a bit more consistent? # but for now prefer gdpr prefix since it makes a bit things a bit more consistent?
# e.g. a new batch of android exports can throw off ids if we rely on it for mapping # e.g. a new batch of android exports can throw off ids if we rely on it for mapping
yield from _merge_messages( yield from _merge_messages(

View file

@ -76,7 +76,7 @@ def _entities() -> Iterator[Res[User | _Message]]:
# NOTE: here there are basically two options # NOTE: here there are basically two options
# - process inputs as is (from oldest to newest) # - process inputs as is (from oldest to newest)
# this would be more stable wrt newer exports (e.g. existing thread ids won't change) # this would be more stable wrt newer exports (e.g. existing thread ids won't change)
# the downside is that newer exports seem to have better thread ids, so might be preferrable to use them # the downside is that newer exports seem to have better thread ids, so might be preferable to use them
# - process inputs reversed (from newest to oldest) # - process inputs reversed (from newest to oldest)
# the upside is that thread ids/usernames might be better # the upside is that thread ids/usernames might be better
# the downside is that if for example the user renames, thread ids will change _a lot_, might be undesirable.. # the downside is that if for example the user renames, thread ids will change _a lot_, might be undesirable..
@ -137,7 +137,7 @@ def _entitites_from_path(path: Path) -> Iterator[Res[User | _Message]]:
j = json.loads(ffile.read_text()) j = json.loads(ffile.read_text())
id_len = 10 id_len = 10
# NOTE: I'm not actually sure it's other user's id.., since it corresponds to the whole converstation # NOTE: I'm not actually sure it's other user's id.., since it corresponds to the whole conversation
# but I stared a bit at these ids vs database ids and can't see any way to find the correspondence :( # but I stared a bit at these ids vs database ids and can't see any way to find the correspondence :(
# so basically the only way to merge is to actually try some magic and correlate timestamps/message texts? # so basically the only way to merge is to actually try some magic and correlate timestamps/message texts?
# another option is perhaps to query user id from username with some free API # another option is perhaps to query user id from username with some free API

View file

@ -9,7 +9,7 @@ since that allows for easier overriding using namespace packages
See https://github.com/karlicoss/HPI/blob/master/doc/MODULE_DESIGN.org#allpy for more info. See https://github.com/karlicoss/HPI/blob/master/doc/MODULE_DESIGN.org#allpy for more info.
""" """
# prevent it from apprearing in modules list/doctor # prevent it from appearing in modules list/doctor
from ..core import __NOT_HPI_MODULE__ from ..core import __NOT_HPI_MODULE__
# kinda annoying to keep it, but it's so legacy 'hpi module install my.reddit' works # kinda annoying to keep it, but it's so legacy 'hpi module install my.reddit' works

View file

@ -186,7 +186,7 @@ class MMS(NamedTuple):
for (addr, _type) in self.addresses: for (addr, _type) in self.addresses:
if _type == 137: if _type == 137:
return addr return addr
# hmm, maybe return instead? but this probably shouldnt happen, means # hmm, maybe return instead? but this probably shouldn't happen, means
# something is very broken # something is very broken
raise RuntimeError(f'No from address matching 137 found in {self.addresses}') raise RuntimeError(f'No from address matching 137 found in {self.addresses}')
@ -214,7 +214,7 @@ def mms() -> Iterator[Res[MMS]]:
def _resolve_null_str(value: str | None) -> str | None: def _resolve_null_str(value: str | None) -> str | None:
if value is None: if value is None:
return None return None
# hmm.. theres some risk of the text actually being 'null', but theres # hmm.. there's some risk of the text actually being 'null', but there's
# no way to distinguish that from XML values # no way to distinguish that from XML values
if value == 'null': if value == 'null':
return None return None

View file

@ -49,7 +49,7 @@ class Vote(NamedTuple):
# hmm, this loads very raw comments without the rest of the page? # hmm, this loads very raw comments without the rest of the page?
# - https://meta.stackexchange.com/posts/27319/comments#comment-57475 # - https://meta.stackexchange.com/posts/27319/comments#comment-57475
# #
# parentPostId is the original quesion # parentPostId is the original question
# TODO is not always present? fucking hell # TODO is not always present? fucking hell
# seems like there is no way to get a hierarchical comment link.. guess this needs to be handled in Promnesia normalisation... # seems like there is no way to get a hierarchical comment link.. guess this needs to be handled in Promnesia normalisation...
# postId is the answer # postId is the answer

View file

@ -245,7 +245,7 @@ def _iter_tzs() -> Iterator[DayWithZone]:
def _day2zone() -> dict[date, pytz.BaseTzInfo]: def _day2zone() -> dict[date, pytz.BaseTzInfo]:
# NOTE: kinda unfortunate that this will have to process all days before returning result for just one # NOTE: kinda unfortunate that this will have to process all days before returning result for just one
# however otherwise cachew cache might never be initialized properly # however otherwise cachew cache might never be initialized properly
# so we'll always end up recomputing everyting during subsequent runs # so we'll always end up recomputing everything during subsequent runs
return {dz.day: pytz.timezone(dz.zone) for dz in _iter_tzs()} return {dz.day: pytz.timezone(dz.zone) for dz in _iter_tzs()}

View file

@ -106,7 +106,7 @@ def _handle_db(db: sqlite3.Connection) -> Iterator[Res[_Entity]]:
user_profile_rows = list(db.execute('SELECT * FROM profile_user_view')) user_profile_rows = list(db.execute('SELECT * FROM profile_user_view'))
if len(user_profile_rows) == 0: if len(user_profile_rows) == 0:
# shit, sometime in 2023 profile_user_view stoppped containing user profile.. # shit, sometime in 2023 profile_user_view stopped containing user profile..
# presumably the most common from_id/to_id would be our own username # presumably the most common from_id/to_id would be our own username
counter = Counter([id_ for (id_,) in db.execute('SELECT from_id FROM message UNION ALL SELECT to_id FROM message')]) counter = Counter([id_ for (id_,) in db.execute('SELECT from_id FROM message UNION ALL SELECT to_id FROM message')])
if len(counter) > 0: # this might happen if db is empty (e.g. user got logged out) if len(counter) > 0: # this might happen if db is empty (e.g. user got logged out)

View file

@ -81,7 +81,7 @@ def _parse_one(p: Path) -> Iterator[Res[Competition]]:
# but also expects cooperation from .make method (e.g. popping items from the dict) # but also expects cooperation from .make method (e.g. popping items from the dict)
# could also wrap in helper and pass to .make .. not sure # could also wrap in helper and pass to .make .. not sure
# an argument could be made that .make isn't really a class methond.. # an argument could be made that .make isn't really a class methond..
# it's pretty specific to this parser onl # it's pretty specific to this parser only
yield from Competition.make(j=c) yield from Competition.make(j=c)
yield from m.check() yield from m.check()

View file

@ -192,7 +192,7 @@ def get_own_user_id(conn) -> str:
# - timeline_data_type # - timeline_data_type
# 1 : the bulk of tweets, but also some notifications etc?? # 1 : the bulk of tweets, but also some notifications etc??
# 2 : who-to-follow/community-to-join. contains a couple of tweets, but their corresponding status_id is NULL # 2 : who-to-follow/community-to-join. contains a couple of tweets, but their corresponding status_id is NULL
# 8 : who-to-follow/notfication # 8 : who-to-follow/notification
# 13: semantic-core/who-to-follow # 13: semantic-core/who-to-follow
# 14: cursor # 14: cursor
# 17: trends # 17: trends

View file

@ -54,7 +54,7 @@ class Tweet(NamedTuple):
# https://github.com/thomasancheriyil/Red-Tide-Detection-based-on-Twitter/blob/beb200be60cc66dcbc394e670513715509837812/python/twitterGapParse.py#L61-L62 # https://github.com/thomasancheriyil/Red-Tide-Detection-based-on-Twitter/blob/beb200be60cc66dcbc394e670513715509837812/python/twitterGapParse.py#L61-L62
# #
# twint is also saving 'timezone', but this is local machine timezone at the time of scraping? # twint is also saving 'timezone', but this is local machine timezone at the time of scraping?
# perhaps they thought date-time-ms was local time... or just kept it just in case (they are keepin lots on unnecessary stuff in the db) # perhaps they thought date-time-ms was local time... or just kept it just in case (they are keeping lots on unnecessary stuff in the db)
return datetime.fromtimestamp(seconds, tz=tz) return datetime.fromtimestamp(seconds, tz=tz)
@property @property

View file

@ -199,7 +199,7 @@ def _process_db(db: sqlite3.Connection) -> Iterator[Entity]:
sender_row_id = r['sender_jid_row_id'] sender_row_id = r['sender_jid_row_id']
if sender_row_id == 0: if sender_row_id == 0:
# seems that it's always 0 for 1-1 chats # seems that it's always 0 for 1-1 chats
# for group chats our onw id is still 0, but other ids are properly set # for group chats our own id is still 0, but other ids are properly set
if from_me: if from_me:
myself_user_id = config.my_user_id or 'MYSELF_USER_ID' myself_user_id = config.my_user_id or 'MYSELF_USER_ID'
sender = Sender(id=myself_user_id, name=None) # TODO set my own name as well? sender = Sender(id=myself_user_id, name=None) # TODO set my own name as well?

View file

@ -36,7 +36,7 @@ def watched() -> Iterator[Res[Watched]]:
continue continue
# older exports (e.g. html) didn't have microseconds # older exports (e.g. html) didn't have microseconds
# wheras newer json ones do have them # whereas newer json ones do have them
# seconds resolution is enough to distinguish watched videos # seconds resolution is enough to distinguish watched videos
# also we're processing takeouts in HPI in reverse order, so first seen watch would contain microseconds, resulting in better data # also we're processing takeouts in HPI in reverse order, so first seen watch would contain microseconds, resulting in better data
without_microsecond = w.when.replace(microsecond=0) without_microsecond = w.when.replace(microsecond=0)