doc: spelling fixes

2024-11-20 00:03:40 -08:00 · 2024-11-20 00:03:40 -08:00 · a7f05c2cad
commit a7f05c2cad
parent ad55c5c345
22 changed files with 27 additions and 27 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -20,7 +20,7 @@ General/my.core changes:
 - e81dddddf083ffd81aa7e2b715bd34f59949479c properly resolve class properties in make_config + add test

 Modules:
- some innitial work on filling **InfluxDB** with HPI data
+- some initial work on filling **InfluxDB** with HPI data

 - pinboard
  - 42399f6250d9901d93dcedcfe05f7857babcf834: **breaking backwards compatibility**, use pinbexport module directly
--- a/doc/OVERLAYS.org
+++ b/doc/OVERLAYS.org
@ -10,7 +10,7 @@ Relevant discussion about overlays: https://github.com/karlicoss/HPI/issues/102

 # You can see them TODO in overlays dir

-Consider a toy package/module structure with minimal code, wihout any actual data parsing, just for demonstration purposes.
+Consider a toy package/module structure with minimal code, without any actual data parsing, just for demonstration purposes.

 - =main= package structure
  # TODO do links
@ -19,7 +19,7 @@ Consider a toy package/module structure with minimal code, wihout any actual dat
    Extracts Twitter data from GDPR archive.
  - =my/twitter/all.py=
    Merges twitter data from multiple sources (only =gdpr= in this case), so data consumers are agnostic of specific data sources used.
-    This will be overriden by =overlay=.
+    This will be overridden by =overlay=.
  - =my/twitter/common.py=
    Contains helper function to merge data, so they can be reused by overlay's =all.py=.
  - =my/reddit.py=
@ -126,7 +126,7 @@ https://github.com/python/mypy/blob/1dd8e7fe654991b01bd80ef7f1f675d9e3910c3a/myp

 For now, I opened an issue in mypy repository https://github.com/python/mypy/issues/16683

-But ok, maybe mypy treats =main= as an external package somhow but still type checks it properly?
+But ok, maybe mypy treats =main= as an external package somehow but still type checks it properly?
 Let's see what's going on with imports:

 : $ mypy --namespace-packages --strict -p my --follow-imports=error
--- a/doc/QUERY.md
+++ b/doc/QUERY.md
@ -97,7 +97,7 @@ By default, this just returns the items in the order they were returned by the f
 hpi query my.coding.commits.commits --order-key committed_dt --limit 1 --reverse --output pprint --stream
 Commit(committed_dt=datetime.datetime(2023, 4, 14, 23, 9, 1, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=61200))),
       authored_dt=datetime.datetime(2023, 4, 14, 23, 4, 1, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=61200))),
-       message='sources.smscalls: propogate errors if there are breaking '
+       message='sources.smscalls: propagate errors if there are breaking '
               'schema changes',
       repo='/home/username/Repos/promnesia-fork',
       sha='22a434fca9a28df9b0915ccf16368df129d2c9ce',
--- a/my/core/cachew.py
+++ b/my/core/cachew.py
@ -136,7 +136,7 @@ if TYPE_CHECKING:
    CC = Callable[P, R]  # need to give it a name, if inlined into bound=, mypy runs in a bug
    PathProvider = Union[PathIsh, Callable[P, PathIsh]]
    # NOTE: in cachew, HashFunction type returns str
-    # however in practice, cachew alwasy calls str for its result
+    # however in practice, cachew always calls str for its result
    # so perhaps better to switch it to Any in cachew as well
    HashFunction = Callable[P, Any]

--- a/my/core/konsume.py
+++ b/my/core/konsume.py
@ -236,7 +236,7 @@ def test_zoom() -> None:
 #     - very flexible, easy to adjust behaviour
 #   - cons:
 #     - can forget to assert about extra entities etc, so error prone
-#     - if we do something like =assert j.pop('status') == 200, j=, by the time assert happens we already popped item -- makes erro handling harder
+#     - if we do something like =assert j.pop('status') == 200, j=, by the time assert happens we already popped item -- makes error handling harder
 #     - a bit verbose.. so probably requires some helper functions though (could be much leaner than current konsume though)
 #     - if we assert, then terminates parsing too early, if we're defensive then inflates the code a lot with if statements
 #       - TODO perhaps combine warnings somehow or at least only emit once per module?
--- a/my/core/logging.py
+++ b/my/core/logging.py
@ -250,7 +250,7 @@ if __name__ == '__main__':
    test()


-## legacy/deprecated methods for backwards compatilibity
+## legacy/deprecated methods for backwards compatibility
 if not TYPE_CHECKING:
    from .compat import deprecated

--- a/my/core/tests/test_tmp_config.py
+++ b/my/core/tests/test_tmp_config.py
@ -12,7 +12,7 @@ def _init_default_config() -> None:

 def test_tmp_config() -> None:
    ## ugh. ideally this would be on the top level (would be a better test)
-    ## but pytest imports eveything first, executes hooks, and some reset_modules() fictures mess stuff up
+    ## but pytest imports everything first, executes hooks, and some reset_modules() fictures mess stuff up
    ## later would be nice to be a bit more careful about them
    _init_default_config()
    from my.simple import items
--- a/my/core/utils/itertools.py
+++ b/my/core/utils/itertools.py
@ -321,7 +321,7 @@ _UET = TypeVar('_UET')
 _UEU = TypeVar('_UEU')


-# NOTE: for historic reasons, this function had to accept Callable that retuns iterator
+# NOTE: for historic reasons, this function had to accept Callable that returns iterator
 #        instead of just iterator
 #       TODO maybe deprecated Callable support? not sure
 def unique_everseen(
@ -358,7 +358,7 @@ def test_unique_everseen() -> None:
        assert list(unique_everseen(fun_good)) == [123]

        with pytest.raises(Exception):
-            # since function retuns a list rather than iterator, check happens immediately
+            # since function returns a list rather than iterator, check happens immediately
            # , even without advancing the iterator
            unique_everseen(fun_bad)

--- a/my/fbmessenger/init.py
+++ b/my/fbmessenger/init.py
@ -9,7 +9,7 @@ since that allows for easier overriding using namespace packages
 See https://github.com/karlicoss/HPI/blob/master/doc/MODULE_DESIGN.org#allpy for more info.
 """

-# prevent it from apprearing in modules list/doctor
+# prevent it from appearing in modules list/doctor
 from ..core import __NOT_HPI_MODULE__

 # kinda annoying to keep it, but it's so legacy 'hpi module install my.fbmessenger' works
--- a/my/fbmessenger/android.py
+++ b/my/fbmessenger/android.py
@ -174,7 +174,7 @@ def _process_db_msys(db: sqlite3.Connection) -> Iterator[Res[Entity]]:
           However seems that when message is not sent yet it doesn't have this server id yet
           (happened only once, but could be just luck of course!)
           We exclude these messages to avoid duplication.
-           However poisitive filter (e.g. message_id LIKE 'mid%') feels a bit wrong, e.g. what if mesage ids change or something
+           However poisitive filter (e.g. message_id LIKE 'mid%') feels a bit wrong, e.g. what if message ids change or something
           So instead this excludes only such unsent messages.
        */
        message_id != offline_threading_id
--- a/my/instagram/all.py
+++ b/my/instagram/all.py
@ -23,7 +23,7 @@ def messages() -> Iterator[Res[Message]]:
    # TODO in general best to prefer android, it has more data
    # - message ids
    # - usernames are correct for Android data
-    # - thread ids more meaninful?
+    # - thread ids more meaningful?
    # but for now prefer gdpr prefix since it makes a bit things a bit more consistent?
    # e.g. a new batch of android exports can throw off ids if we rely on it for mapping
    yield from _merge_messages(
--- a/my/instagram/gdpr.py
+++ b/my/instagram/gdpr.py
@ -76,7 +76,7 @@ def _entities() -> Iterator[Res[User | _Message]]:
    # NOTE: here there are basically two options
    # - process inputs as is (from oldest to newest)
    #   this would be more stable wrt newer exports (e.g. existing thread ids won't change)
-    #   the downside is that newer exports seem to have better thread ids, so might be preferrable to use them
+    #   the downside is that newer exports seem to have better thread ids, so might be preferable to use them
    # - process inputs reversed (from newest to oldest)
    #   the upside is that thread ids/usernames might be better
    #   the downside is that if for example the user renames, thread ids will change _a lot_, might be undesirable..
@ -137,7 +137,7 @@ def _entitites_from_path(path: Path) -> Iterator[Res[User | _Message]]:
            j = json.loads(ffile.read_text())

            id_len = 10
-            # NOTE: I'm not actually sure it's other user's id.., since it corresponds to the whole converstation
+            # NOTE: I'm not actually sure it's other user's id.., since it corresponds to the whole conversation
            # but I stared a bit at these ids vs database ids and can't see any way to find the correspondence :(
            # so basically the only way to merge is to actually try some magic and correlate timestamps/message texts?
            # another option is perhaps to query user id from username with some free API
--- a/my/reddit/init.py
+++ b/my/reddit/init.py
@ -9,7 +9,7 @@ since that allows for easier overriding using namespace packages
 See https://github.com/karlicoss/HPI/blob/master/doc/MODULE_DESIGN.org#allpy for more info.
 """

-# prevent it from apprearing in modules list/doctor
+# prevent it from appearing in modules list/doctor
 from ..core import __NOT_HPI_MODULE__

 # kinda annoying to keep it, but it's so legacy 'hpi module install my.reddit' works
--- a/my/smscalls.py
+++ b/my/smscalls.py
@ -186,7 +186,7 @@ class MMS(NamedTuple):
        for (addr, _type) in self.addresses:
            if _type == 137:
                return addr
-        # hmm, maybe return instead? but this probably shouldnt happen, means
+        # hmm, maybe return instead? but this probably shouldn't happen, means
        # something is very broken
        raise RuntimeError(f'No from address matching 137 found in {self.addresses}')

@ -214,7 +214,7 @@ def mms() -> Iterator[Res[MMS]]:
 def _resolve_null_str(value: str | None) -> str | None:
    if value is None:
        return None
-    # hmm.. theres some risk of the text actually being 'null', but theres
+    # hmm.. there's some risk of the text actually being 'null', but there's
    # no way to distinguish that from XML values
    if value == 'null':
        return None
--- a/my/stackexchange/gdpr.py
+++ b/my/stackexchange/gdpr.py
@ -49,7 +49,7 @@ class Vote(NamedTuple):
            # hmm, this loads very raw comments without the rest of the page?
            # - https://meta.stackexchange.com/posts/27319/comments#comment-57475
            #
-            # parentPostId is the original quesion
+            # parentPostId is the original question
            #    TODO is not always present? fucking hell
            #    seems like there is no way to get a hierarchical comment link.. guess this needs to be handled in Promnesia normalisation...
            # postId is the answer
--- a/my/time/tz/via_location.py
+++ b/my/time/tz/via_location.py
@ -245,7 +245,7 @@ def _iter_tzs() -> Iterator[DayWithZone]:
 def _day2zone() -> dict[date, pytz.BaseTzInfo]:
    # NOTE: kinda unfortunate that this will have to process all days before returning result for just one
    # however otherwise cachew cache might never be initialized properly
-    # so we'll always end up recomputing everyting during subsequent runs
+    # so we'll always end up recomputing everything during subsequent runs
    return {dz.day: pytz.timezone(dz.zone) for dz in _iter_tzs()}


--- a/my/tinder/android.py
+++ b/my/tinder/android.py
@ -106,7 +106,7 @@ def _handle_db(db: sqlite3.Connection) -> Iterator[Res[_Entity]]:
    user_profile_rows = list(db.execute('SELECT * FROM profile_user_view'))

    if len(user_profile_rows) == 0:
-        # shit, sometime in 2023 profile_user_view stoppped containing user profile..
+        # shit, sometime in 2023 profile_user_view stopped containing user profile..
        # presumably the most common from_id/to_id would be our own username
        counter = Counter([id_ for (id_,) in db.execute('SELECT from_id FROM message UNION ALL SELECT to_id FROM message')])
        if len(counter) > 0:  # this might happen if db is empty (e.g. user got logged out)
--- a/my/topcoder.py
+++ b/my/topcoder.py
@ -81,7 +81,7 @@ def _parse_one(p: Path) -> Iterator[Res[Competition]]:
        # but also expects cooperation from .make method (e.g. popping items from the dict)
        # could also wrap in helper and pass to .make .. not sure
        # an argument could be made that .make isn't really a class methond..
-        # it's pretty specific to this parser onl
+        # it's pretty specific to this parser only
        yield from Competition.make(j=c)

    yield from m.check()
--- a/my/twitter/android.py
+++ b/my/twitter/android.py
@ -192,7 +192,7 @@ def get_own_user_id(conn) -> str:
 # - timeline_data_type
 #   1 : the bulk of tweets, but also some notifications etc??
 #   2 : who-to-follow/community-to-join. contains a couple of tweets, but their corresponding status_id is NULL
-#   8 : who-to-follow/notfication
+#   8 : who-to-follow/notification
 #   13: semantic-core/who-to-follow
 #   14: cursor
 #   17: trends
--- a/my/twitter/twint.py
+++ b/my/twitter/twint.py
@ -54,7 +54,7 @@ class Tweet(NamedTuple):
        # https://github.com/thomasancheriyil/Red-Tide-Detection-based-on-Twitter/blob/beb200be60cc66dcbc394e670513715509837812/python/twitterGapParse.py#L61-L62
        #
        # twint is also saving 'timezone', but this is local machine timezone at the time of scraping?
-        # perhaps they thought date-time-ms was local time... or just kept it just in case (they are keepin lots on unnecessary stuff in the db)
+        # perhaps they thought date-time-ms was local time... or just kept it just in case (they are keeping lots on unnecessary stuff in the db)
        return datetime.fromtimestamp(seconds, tz=tz)

    @property
--- a/my/whatsapp/android.py
+++ b/my/whatsapp/android.py
@ -199,7 +199,7 @@ def _process_db(db: sqlite3.Connection) -> Iterator[Entity]:
        sender_row_id = r['sender_jid_row_id']
        if sender_row_id == 0:
            # seems that it's always 0 for 1-1 chats
-            # for group chats our onw id is still 0, but other ids are properly set
+            # for group chats our own id is still 0, but other ids are properly set
            if from_me:
                myself_user_id = config.my_user_id or 'MYSELF_USER_ID'
                sender = Sender(id=myself_user_id, name=None)  # TODO set my own name as well?
--- a/my/youtube/takeout.py
+++ b/my/youtube/takeout.py
@ -36,7 +36,7 @@ def watched() -> Iterator[Res[Watched]]:
            continue

        # older exports (e.g. html) didn't have microseconds
-        # wheras newer json ones do have them
+        # whereas newer json ones do have them
        # seconds resolution is enough to distinguish watched videos
        # also we're processing takeouts in HPI in reverse order, so first seen watch would contain microseconds, resulting in better data
        without_microsecond = w.when.replace(microsecond=0)