doc: spelling fixes
This commit is contained in:
parent
ad55c5c345
commit
a7f05c2cad
22 changed files with 27 additions and 27 deletions
|
@ -20,7 +20,7 @@ General/my.core changes:
|
|||
- e81dddddf083ffd81aa7e2b715bd34f59949479c properly resolve class properties in make_config + add test
|
||||
|
||||
Modules:
|
||||
- some innitial work on filling **InfluxDB** with HPI data
|
||||
- some initial work on filling **InfluxDB** with HPI data
|
||||
|
||||
- pinboard
|
||||
- 42399f6250d9901d93dcedcfe05f7857babcf834: **breaking backwards compatibility**, use pinbexport module directly
|
||||
|
|
|
@ -10,7 +10,7 @@ Relevant discussion about overlays: https://github.com/karlicoss/HPI/issues/102
|
|||
|
||||
# You can see them TODO in overlays dir
|
||||
|
||||
Consider a toy package/module structure with minimal code, wihout any actual data parsing, just for demonstration purposes.
|
||||
Consider a toy package/module structure with minimal code, without any actual data parsing, just for demonstration purposes.
|
||||
|
||||
- =main= package structure
|
||||
# TODO do links
|
||||
|
@ -19,7 +19,7 @@ Consider a toy package/module structure with minimal code, wihout any actual dat
|
|||
Extracts Twitter data from GDPR archive.
|
||||
- =my/twitter/all.py=
|
||||
Merges twitter data from multiple sources (only =gdpr= in this case), so data consumers are agnostic of specific data sources used.
|
||||
This will be overriden by =overlay=.
|
||||
This will be overridden by =overlay=.
|
||||
- =my/twitter/common.py=
|
||||
Contains helper function to merge data, so they can be reused by overlay's =all.py=.
|
||||
- =my/reddit.py=
|
||||
|
@ -126,7 +126,7 @@ https://github.com/python/mypy/blob/1dd8e7fe654991b01bd80ef7f1f675d9e3910c3a/myp
|
|||
|
||||
For now, I opened an issue in mypy repository https://github.com/python/mypy/issues/16683
|
||||
|
||||
But ok, maybe mypy treats =main= as an external package somhow but still type checks it properly?
|
||||
But ok, maybe mypy treats =main= as an external package somehow but still type checks it properly?
|
||||
Let's see what's going on with imports:
|
||||
|
||||
: $ mypy --namespace-packages --strict -p my --follow-imports=error
|
||||
|
|
|
@ -97,7 +97,7 @@ By default, this just returns the items in the order they were returned by the f
|
|||
hpi query my.coding.commits.commits --order-key committed_dt --limit 1 --reverse --output pprint --stream
|
||||
Commit(committed_dt=datetime.datetime(2023, 4, 14, 23, 9, 1, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=61200))),
|
||||
authored_dt=datetime.datetime(2023, 4, 14, 23, 4, 1, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=61200))),
|
||||
message='sources.smscalls: propogate errors if there are breaking '
|
||||
message='sources.smscalls: propagate errors if there are breaking '
|
||||
'schema changes',
|
||||
repo='/home/username/Repos/promnesia-fork',
|
||||
sha='22a434fca9a28df9b0915ccf16368df129d2c9ce',
|
||||
|
|
|
@ -136,7 +136,7 @@ if TYPE_CHECKING:
|
|||
CC = Callable[P, R] # need to give it a name, if inlined into bound=, mypy runs in a bug
|
||||
PathProvider = Union[PathIsh, Callable[P, PathIsh]]
|
||||
# NOTE: in cachew, HashFunction type returns str
|
||||
# however in practice, cachew alwasy calls str for its result
|
||||
# however in practice, cachew always calls str for its result
|
||||
# so perhaps better to switch it to Any in cachew as well
|
||||
HashFunction = Callable[P, Any]
|
||||
|
||||
|
|
|
@ -236,7 +236,7 @@ def test_zoom() -> None:
|
|||
# - very flexible, easy to adjust behaviour
|
||||
# - cons:
|
||||
# - can forget to assert about extra entities etc, so error prone
|
||||
# - if we do something like =assert j.pop('status') == 200, j=, by the time assert happens we already popped item -- makes erro handling harder
|
||||
# - if we do something like =assert j.pop('status') == 200, j=, by the time assert happens we already popped item -- makes error handling harder
|
||||
# - a bit verbose.. so probably requires some helper functions though (could be much leaner than current konsume though)
|
||||
# - if we assert, then terminates parsing too early, if we're defensive then inflates the code a lot with if statements
|
||||
# - TODO perhaps combine warnings somehow or at least only emit once per module?
|
||||
|
|
|
@ -250,7 +250,7 @@ if __name__ == '__main__':
|
|||
test()
|
||||
|
||||
|
||||
## legacy/deprecated methods for backwards compatilibity
|
||||
## legacy/deprecated methods for backwards compatibility
|
||||
if not TYPE_CHECKING:
|
||||
from .compat import deprecated
|
||||
|
||||
|
|
|
@ -12,7 +12,7 @@ def _init_default_config() -> None:
|
|||
|
||||
def test_tmp_config() -> None:
|
||||
## ugh. ideally this would be on the top level (would be a better test)
|
||||
## but pytest imports eveything first, executes hooks, and some reset_modules() fictures mess stuff up
|
||||
## but pytest imports everything first, executes hooks, and some reset_modules() fictures mess stuff up
|
||||
## later would be nice to be a bit more careful about them
|
||||
_init_default_config()
|
||||
from my.simple import items
|
||||
|
|
|
@ -321,7 +321,7 @@ _UET = TypeVar('_UET')
|
|||
_UEU = TypeVar('_UEU')
|
||||
|
||||
|
||||
# NOTE: for historic reasons, this function had to accept Callable that retuns iterator
|
||||
# NOTE: for historic reasons, this function had to accept Callable that returns iterator
|
||||
# instead of just iterator
|
||||
# TODO maybe deprecated Callable support? not sure
|
||||
def unique_everseen(
|
||||
|
@ -358,7 +358,7 @@ def test_unique_everseen() -> None:
|
|||
assert list(unique_everseen(fun_good)) == [123]
|
||||
|
||||
with pytest.raises(Exception):
|
||||
# since function retuns a list rather than iterator, check happens immediately
|
||||
# since function returns a list rather than iterator, check happens immediately
|
||||
# , even without advancing the iterator
|
||||
unique_everseen(fun_bad)
|
||||
|
||||
|
|
|
@ -9,7 +9,7 @@ since that allows for easier overriding using namespace packages
|
|||
See https://github.com/karlicoss/HPI/blob/master/doc/MODULE_DESIGN.org#allpy for more info.
|
||||
"""
|
||||
|
||||
# prevent it from apprearing in modules list/doctor
|
||||
# prevent it from appearing in modules list/doctor
|
||||
from ..core import __NOT_HPI_MODULE__
|
||||
|
||||
# kinda annoying to keep it, but it's so legacy 'hpi module install my.fbmessenger' works
|
||||
|
|
|
@ -174,7 +174,7 @@ def _process_db_msys(db: sqlite3.Connection) -> Iterator[Res[Entity]]:
|
|||
However seems that when message is not sent yet it doesn't have this server id yet
|
||||
(happened only once, but could be just luck of course!)
|
||||
We exclude these messages to avoid duplication.
|
||||
However poisitive filter (e.g. message_id LIKE 'mid%') feels a bit wrong, e.g. what if mesage ids change or something
|
||||
However poisitive filter (e.g. message_id LIKE 'mid%') feels a bit wrong, e.g. what if message ids change or something
|
||||
So instead this excludes only such unsent messages.
|
||||
*/
|
||||
message_id != offline_threading_id
|
||||
|
|
|
@ -23,7 +23,7 @@ def messages() -> Iterator[Res[Message]]:
|
|||
# TODO in general best to prefer android, it has more data
|
||||
# - message ids
|
||||
# - usernames are correct for Android data
|
||||
# - thread ids more meaninful?
|
||||
# - thread ids more meaningful?
|
||||
# but for now prefer gdpr prefix since it makes a bit things a bit more consistent?
|
||||
# e.g. a new batch of android exports can throw off ids if we rely on it for mapping
|
||||
yield from _merge_messages(
|
||||
|
|
|
@ -76,7 +76,7 @@ def _entities() -> Iterator[Res[User | _Message]]:
|
|||
# NOTE: here there are basically two options
|
||||
# - process inputs as is (from oldest to newest)
|
||||
# this would be more stable wrt newer exports (e.g. existing thread ids won't change)
|
||||
# the downside is that newer exports seem to have better thread ids, so might be preferrable to use them
|
||||
# the downside is that newer exports seem to have better thread ids, so might be preferable to use them
|
||||
# - process inputs reversed (from newest to oldest)
|
||||
# the upside is that thread ids/usernames might be better
|
||||
# the downside is that if for example the user renames, thread ids will change _a lot_, might be undesirable..
|
||||
|
@ -137,7 +137,7 @@ def _entitites_from_path(path: Path) -> Iterator[Res[User | _Message]]:
|
|||
j = json.loads(ffile.read_text())
|
||||
|
||||
id_len = 10
|
||||
# NOTE: I'm not actually sure it's other user's id.., since it corresponds to the whole converstation
|
||||
# NOTE: I'm not actually sure it's other user's id.., since it corresponds to the whole conversation
|
||||
# but I stared a bit at these ids vs database ids and can't see any way to find the correspondence :(
|
||||
# so basically the only way to merge is to actually try some magic and correlate timestamps/message texts?
|
||||
# another option is perhaps to query user id from username with some free API
|
||||
|
|
|
@ -9,7 +9,7 @@ since that allows for easier overriding using namespace packages
|
|||
See https://github.com/karlicoss/HPI/blob/master/doc/MODULE_DESIGN.org#allpy for more info.
|
||||
"""
|
||||
|
||||
# prevent it from apprearing in modules list/doctor
|
||||
# prevent it from appearing in modules list/doctor
|
||||
from ..core import __NOT_HPI_MODULE__
|
||||
|
||||
# kinda annoying to keep it, but it's so legacy 'hpi module install my.reddit' works
|
||||
|
|
|
@ -186,7 +186,7 @@ class MMS(NamedTuple):
|
|||
for (addr, _type) in self.addresses:
|
||||
if _type == 137:
|
||||
return addr
|
||||
# hmm, maybe return instead? but this probably shouldnt happen, means
|
||||
# hmm, maybe return instead? but this probably shouldn't happen, means
|
||||
# something is very broken
|
||||
raise RuntimeError(f'No from address matching 137 found in {self.addresses}')
|
||||
|
||||
|
@ -214,7 +214,7 @@ def mms() -> Iterator[Res[MMS]]:
|
|||
def _resolve_null_str(value: str | None) -> str | None:
|
||||
if value is None:
|
||||
return None
|
||||
# hmm.. theres some risk of the text actually being 'null', but theres
|
||||
# hmm.. there's some risk of the text actually being 'null', but there's
|
||||
# no way to distinguish that from XML values
|
||||
if value == 'null':
|
||||
return None
|
||||
|
|
|
@ -49,7 +49,7 @@ class Vote(NamedTuple):
|
|||
# hmm, this loads very raw comments without the rest of the page?
|
||||
# - https://meta.stackexchange.com/posts/27319/comments#comment-57475
|
||||
#
|
||||
# parentPostId is the original quesion
|
||||
# parentPostId is the original question
|
||||
# TODO is not always present? fucking hell
|
||||
# seems like there is no way to get a hierarchical comment link.. guess this needs to be handled in Promnesia normalisation...
|
||||
# postId is the answer
|
||||
|
|
|
@ -245,7 +245,7 @@ def _iter_tzs() -> Iterator[DayWithZone]:
|
|||
def _day2zone() -> dict[date, pytz.BaseTzInfo]:
|
||||
# NOTE: kinda unfortunate that this will have to process all days before returning result for just one
|
||||
# however otherwise cachew cache might never be initialized properly
|
||||
# so we'll always end up recomputing everyting during subsequent runs
|
||||
# so we'll always end up recomputing everything during subsequent runs
|
||||
return {dz.day: pytz.timezone(dz.zone) for dz in _iter_tzs()}
|
||||
|
||||
|
||||
|
|
|
@ -106,7 +106,7 @@ def _handle_db(db: sqlite3.Connection) -> Iterator[Res[_Entity]]:
|
|||
user_profile_rows = list(db.execute('SELECT * FROM profile_user_view'))
|
||||
|
||||
if len(user_profile_rows) == 0:
|
||||
# shit, sometime in 2023 profile_user_view stoppped containing user profile..
|
||||
# shit, sometime in 2023 profile_user_view stopped containing user profile..
|
||||
# presumably the most common from_id/to_id would be our own username
|
||||
counter = Counter([id_ for (id_,) in db.execute('SELECT from_id FROM message UNION ALL SELECT to_id FROM message')])
|
||||
if len(counter) > 0: # this might happen if db is empty (e.g. user got logged out)
|
||||
|
|
|
@ -81,7 +81,7 @@ def _parse_one(p: Path) -> Iterator[Res[Competition]]:
|
|||
# but also expects cooperation from .make method (e.g. popping items from the dict)
|
||||
# could also wrap in helper and pass to .make .. not sure
|
||||
# an argument could be made that .make isn't really a class methond..
|
||||
# it's pretty specific to this parser onl
|
||||
# it's pretty specific to this parser only
|
||||
yield from Competition.make(j=c)
|
||||
|
||||
yield from m.check()
|
||||
|
|
|
@ -192,7 +192,7 @@ def get_own_user_id(conn) -> str:
|
|||
# - timeline_data_type
|
||||
# 1 : the bulk of tweets, but also some notifications etc??
|
||||
# 2 : who-to-follow/community-to-join. contains a couple of tweets, but their corresponding status_id is NULL
|
||||
# 8 : who-to-follow/notfication
|
||||
# 8 : who-to-follow/notification
|
||||
# 13: semantic-core/who-to-follow
|
||||
# 14: cursor
|
||||
# 17: trends
|
||||
|
|
|
@ -54,7 +54,7 @@ class Tweet(NamedTuple):
|
|||
# https://github.com/thomasancheriyil/Red-Tide-Detection-based-on-Twitter/blob/beb200be60cc66dcbc394e670513715509837812/python/twitterGapParse.py#L61-L62
|
||||
#
|
||||
# twint is also saving 'timezone', but this is local machine timezone at the time of scraping?
|
||||
# perhaps they thought date-time-ms was local time... or just kept it just in case (they are keepin lots on unnecessary stuff in the db)
|
||||
# perhaps they thought date-time-ms was local time... or just kept it just in case (they are keeping lots on unnecessary stuff in the db)
|
||||
return datetime.fromtimestamp(seconds, tz=tz)
|
||||
|
||||
@property
|
||||
|
|
|
@ -199,7 +199,7 @@ def _process_db(db: sqlite3.Connection) -> Iterator[Entity]:
|
|||
sender_row_id = r['sender_jid_row_id']
|
||||
if sender_row_id == 0:
|
||||
# seems that it's always 0 for 1-1 chats
|
||||
# for group chats our onw id is still 0, but other ids are properly set
|
||||
# for group chats our own id is still 0, but other ids are properly set
|
||||
if from_me:
|
||||
myself_user_id = config.my_user_id or 'MYSELF_USER_ID'
|
||||
sender = Sender(id=myself_user_id, name=None) # TODO set my own name as well?
|
||||
|
|
|
@ -36,7 +36,7 @@ def watched() -> Iterator[Res[Watched]]:
|
|||
continue
|
||||
|
||||
# older exports (e.g. html) didn't have microseconds
|
||||
# wheras newer json ones do have them
|
||||
# whereas newer json ones do have them
|
||||
# seconds resolution is enough to distinguish watched videos
|
||||
# also we're processing takeouts in HPI in reverse order, so first seen watch would contain microseconds, resulting in better data
|
||||
without_microsecond = w.when.replace(microsecond=0)
|
||||
|
|
Loading…
Add table
Reference in a new issue