doc: spelling fixes
This commit is contained in:
parent
ad55c5c345
commit
a7f05c2cad
22 changed files with 27 additions and 27 deletions
|
@ -136,7 +136,7 @@ if TYPE_CHECKING:
|
|||
CC = Callable[P, R] # need to give it a name, if inlined into bound=, mypy runs in a bug
|
||||
PathProvider = Union[PathIsh, Callable[P, PathIsh]]
|
||||
# NOTE: in cachew, HashFunction type returns str
|
||||
# however in practice, cachew alwasy calls str for its result
|
||||
# however in practice, cachew always calls str for its result
|
||||
# so perhaps better to switch it to Any in cachew as well
|
||||
HashFunction = Callable[P, Any]
|
||||
|
||||
|
|
|
@ -236,7 +236,7 @@ def test_zoom() -> None:
|
|||
# - very flexible, easy to adjust behaviour
|
||||
# - cons:
|
||||
# - can forget to assert about extra entities etc, so error prone
|
||||
# - if we do something like =assert j.pop('status') == 200, j=, by the time assert happens we already popped item -- makes erro handling harder
|
||||
# - if we do something like =assert j.pop('status') == 200, j=, by the time assert happens we already popped item -- makes error handling harder
|
||||
# - a bit verbose.. so probably requires some helper functions though (could be much leaner than current konsume though)
|
||||
# - if we assert, then terminates parsing too early, if we're defensive then inflates the code a lot with if statements
|
||||
# - TODO perhaps combine warnings somehow or at least only emit once per module?
|
||||
|
|
|
@ -250,7 +250,7 @@ if __name__ == '__main__':
|
|||
test()
|
||||
|
||||
|
||||
## legacy/deprecated methods for backwards compatilibity
|
||||
## legacy/deprecated methods for backwards compatibility
|
||||
if not TYPE_CHECKING:
|
||||
from .compat import deprecated
|
||||
|
||||
|
|
|
@ -12,7 +12,7 @@ def _init_default_config() -> None:
|
|||
|
||||
def test_tmp_config() -> None:
|
||||
## ugh. ideally this would be on the top level (would be a better test)
|
||||
## but pytest imports eveything first, executes hooks, and some reset_modules() fictures mess stuff up
|
||||
## but pytest imports everything first, executes hooks, and some reset_modules() fictures mess stuff up
|
||||
## later would be nice to be a bit more careful about them
|
||||
_init_default_config()
|
||||
from my.simple import items
|
||||
|
|
|
@ -321,7 +321,7 @@ _UET = TypeVar('_UET')
|
|||
_UEU = TypeVar('_UEU')
|
||||
|
||||
|
||||
# NOTE: for historic reasons, this function had to accept Callable that retuns iterator
|
||||
# NOTE: for historic reasons, this function had to accept Callable that returns iterator
|
||||
# instead of just iterator
|
||||
# TODO maybe deprecated Callable support? not sure
|
||||
def unique_everseen(
|
||||
|
@ -358,7 +358,7 @@ def test_unique_everseen() -> None:
|
|||
assert list(unique_everseen(fun_good)) == [123]
|
||||
|
||||
with pytest.raises(Exception):
|
||||
# since function retuns a list rather than iterator, check happens immediately
|
||||
# since function returns a list rather than iterator, check happens immediately
|
||||
# , even without advancing the iterator
|
||||
unique_everseen(fun_bad)
|
||||
|
||||
|
|
|
@ -9,7 +9,7 @@ since that allows for easier overriding using namespace packages
|
|||
See https://github.com/karlicoss/HPI/blob/master/doc/MODULE_DESIGN.org#allpy for more info.
|
||||
"""
|
||||
|
||||
# prevent it from apprearing in modules list/doctor
|
||||
# prevent it from appearing in modules list/doctor
|
||||
from ..core import __NOT_HPI_MODULE__
|
||||
|
||||
# kinda annoying to keep it, but it's so legacy 'hpi module install my.fbmessenger' works
|
||||
|
|
|
@ -174,7 +174,7 @@ def _process_db_msys(db: sqlite3.Connection) -> Iterator[Res[Entity]]:
|
|||
However seems that when message is not sent yet it doesn't have this server id yet
|
||||
(happened only once, but could be just luck of course!)
|
||||
We exclude these messages to avoid duplication.
|
||||
However poisitive filter (e.g. message_id LIKE 'mid%') feels a bit wrong, e.g. what if mesage ids change or something
|
||||
However poisitive filter (e.g. message_id LIKE 'mid%') feels a bit wrong, e.g. what if message ids change or something
|
||||
So instead this excludes only such unsent messages.
|
||||
*/
|
||||
message_id != offline_threading_id
|
||||
|
|
|
@ -23,7 +23,7 @@ def messages() -> Iterator[Res[Message]]:
|
|||
# TODO in general best to prefer android, it has more data
|
||||
# - message ids
|
||||
# - usernames are correct for Android data
|
||||
# - thread ids more meaninful?
|
||||
# - thread ids more meaningful?
|
||||
# but for now prefer gdpr prefix since it makes a bit things a bit more consistent?
|
||||
# e.g. a new batch of android exports can throw off ids if we rely on it for mapping
|
||||
yield from _merge_messages(
|
||||
|
|
|
@ -76,7 +76,7 @@ def _entities() -> Iterator[Res[User | _Message]]:
|
|||
# NOTE: here there are basically two options
|
||||
# - process inputs as is (from oldest to newest)
|
||||
# this would be more stable wrt newer exports (e.g. existing thread ids won't change)
|
||||
# the downside is that newer exports seem to have better thread ids, so might be preferrable to use them
|
||||
# the downside is that newer exports seem to have better thread ids, so might be preferable to use them
|
||||
# - process inputs reversed (from newest to oldest)
|
||||
# the upside is that thread ids/usernames might be better
|
||||
# the downside is that if for example the user renames, thread ids will change _a lot_, might be undesirable..
|
||||
|
@ -137,7 +137,7 @@ def _entitites_from_path(path: Path) -> Iterator[Res[User | _Message]]:
|
|||
j = json.loads(ffile.read_text())
|
||||
|
||||
id_len = 10
|
||||
# NOTE: I'm not actually sure it's other user's id.., since it corresponds to the whole converstation
|
||||
# NOTE: I'm not actually sure it's other user's id.., since it corresponds to the whole conversation
|
||||
# but I stared a bit at these ids vs database ids and can't see any way to find the correspondence :(
|
||||
# so basically the only way to merge is to actually try some magic and correlate timestamps/message texts?
|
||||
# another option is perhaps to query user id from username with some free API
|
||||
|
|
|
@ -9,7 +9,7 @@ since that allows for easier overriding using namespace packages
|
|||
See https://github.com/karlicoss/HPI/blob/master/doc/MODULE_DESIGN.org#allpy for more info.
|
||||
"""
|
||||
|
||||
# prevent it from apprearing in modules list/doctor
|
||||
# prevent it from appearing in modules list/doctor
|
||||
from ..core import __NOT_HPI_MODULE__
|
||||
|
||||
# kinda annoying to keep it, but it's so legacy 'hpi module install my.reddit' works
|
||||
|
|
|
@ -186,7 +186,7 @@ class MMS(NamedTuple):
|
|||
for (addr, _type) in self.addresses:
|
||||
if _type == 137:
|
||||
return addr
|
||||
# hmm, maybe return instead? but this probably shouldnt happen, means
|
||||
# hmm, maybe return instead? but this probably shouldn't happen, means
|
||||
# something is very broken
|
||||
raise RuntimeError(f'No from address matching 137 found in {self.addresses}')
|
||||
|
||||
|
@ -214,7 +214,7 @@ def mms() -> Iterator[Res[MMS]]:
|
|||
def _resolve_null_str(value: str | None) -> str | None:
|
||||
if value is None:
|
||||
return None
|
||||
# hmm.. theres some risk of the text actually being 'null', but theres
|
||||
# hmm.. there's some risk of the text actually being 'null', but there's
|
||||
# no way to distinguish that from XML values
|
||||
if value == 'null':
|
||||
return None
|
||||
|
|
|
@ -49,7 +49,7 @@ class Vote(NamedTuple):
|
|||
# hmm, this loads very raw comments without the rest of the page?
|
||||
# - https://meta.stackexchange.com/posts/27319/comments#comment-57475
|
||||
#
|
||||
# parentPostId is the original quesion
|
||||
# parentPostId is the original question
|
||||
# TODO is not always present? fucking hell
|
||||
# seems like there is no way to get a hierarchical comment link.. guess this needs to be handled in Promnesia normalisation...
|
||||
# postId is the answer
|
||||
|
|
|
@ -245,7 +245,7 @@ def _iter_tzs() -> Iterator[DayWithZone]:
|
|||
def _day2zone() -> dict[date, pytz.BaseTzInfo]:
|
||||
# NOTE: kinda unfortunate that this will have to process all days before returning result for just one
|
||||
# however otherwise cachew cache might never be initialized properly
|
||||
# so we'll always end up recomputing everyting during subsequent runs
|
||||
# so we'll always end up recomputing everything during subsequent runs
|
||||
return {dz.day: pytz.timezone(dz.zone) for dz in _iter_tzs()}
|
||||
|
||||
|
||||
|
|
|
@ -106,7 +106,7 @@ def _handle_db(db: sqlite3.Connection) -> Iterator[Res[_Entity]]:
|
|||
user_profile_rows = list(db.execute('SELECT * FROM profile_user_view'))
|
||||
|
||||
if len(user_profile_rows) == 0:
|
||||
# shit, sometime in 2023 profile_user_view stoppped containing user profile..
|
||||
# shit, sometime in 2023 profile_user_view stopped containing user profile..
|
||||
# presumably the most common from_id/to_id would be our own username
|
||||
counter = Counter([id_ for (id_,) in db.execute('SELECT from_id FROM message UNION ALL SELECT to_id FROM message')])
|
||||
if len(counter) > 0: # this might happen if db is empty (e.g. user got logged out)
|
||||
|
|
|
@ -81,7 +81,7 @@ def _parse_one(p: Path) -> Iterator[Res[Competition]]:
|
|||
# but also expects cooperation from .make method (e.g. popping items from the dict)
|
||||
# could also wrap in helper and pass to .make .. not sure
|
||||
# an argument could be made that .make isn't really a class methond..
|
||||
# it's pretty specific to this parser onl
|
||||
# it's pretty specific to this parser only
|
||||
yield from Competition.make(j=c)
|
||||
|
||||
yield from m.check()
|
||||
|
|
|
@ -192,7 +192,7 @@ def get_own_user_id(conn) -> str:
|
|||
# - timeline_data_type
|
||||
# 1 : the bulk of tweets, but also some notifications etc??
|
||||
# 2 : who-to-follow/community-to-join. contains a couple of tweets, but their corresponding status_id is NULL
|
||||
# 8 : who-to-follow/notfication
|
||||
# 8 : who-to-follow/notification
|
||||
# 13: semantic-core/who-to-follow
|
||||
# 14: cursor
|
||||
# 17: trends
|
||||
|
|
|
@ -54,7 +54,7 @@ class Tweet(NamedTuple):
|
|||
# https://github.com/thomasancheriyil/Red-Tide-Detection-based-on-Twitter/blob/beb200be60cc66dcbc394e670513715509837812/python/twitterGapParse.py#L61-L62
|
||||
#
|
||||
# twint is also saving 'timezone', but this is local machine timezone at the time of scraping?
|
||||
# perhaps they thought date-time-ms was local time... or just kept it just in case (they are keepin lots on unnecessary stuff in the db)
|
||||
# perhaps they thought date-time-ms was local time... or just kept it just in case (they are keeping lots on unnecessary stuff in the db)
|
||||
return datetime.fromtimestamp(seconds, tz=tz)
|
||||
|
||||
@property
|
||||
|
|
|
@ -199,7 +199,7 @@ def _process_db(db: sqlite3.Connection) -> Iterator[Entity]:
|
|||
sender_row_id = r['sender_jid_row_id']
|
||||
if sender_row_id == 0:
|
||||
# seems that it's always 0 for 1-1 chats
|
||||
# for group chats our onw id is still 0, but other ids are properly set
|
||||
# for group chats our own id is still 0, but other ids are properly set
|
||||
if from_me:
|
||||
myself_user_id = config.my_user_id or 'MYSELF_USER_ID'
|
||||
sender = Sender(id=myself_user_id, name=None) # TODO set my own name as well?
|
||||
|
|
|
@ -36,7 +36,7 @@ def watched() -> Iterator[Res[Watched]]:
|
|||
continue
|
||||
|
||||
# older exports (e.g. html) didn't have microseconds
|
||||
# wheras newer json ones do have them
|
||||
# whereas newer json ones do have them
|
||||
# seconds resolution is enough to distinguish watched videos
|
||||
# also we're processing takeouts in HPI in reverse order, so first seen watch would contain microseconds, resulting in better data
|
||||
without_microsecond = w.when.replace(microsecond=0)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue