""" Stackexchange data (uses [[https://stackoverflow.com/legal/gdpr/request][official GDPR export]]) """ # TODO need to merge gdpr and stexport ### config from dataclasses import dataclass from my.config import stackexchange as user_config from my.core import PathIsh, make_config, get_files, Json @dataclass class stackexchange(user_config): gdpr_path: PathIsh # path to GDPR zip file config = make_config(stackexchange) # TODO later support unpacked zip too ### # TODO just merge all of them and then filter?.. not sure from my.core.compat import fromisoformat from typing import NamedTuple, Iterable from datetime import datetime class Vote(NamedTuple): j: Json # todo ip? @property def when(self) -> datetime: return fromisoformat(self.j['eventTime']) # todo Url return type? @property def link(self) -> str: # vote target l = f"https://{self.j['siteId']}/" t = self.j['target'] if t == 'Comment': # for comments, these work? # - https://meta.stackexchange.com/posts/comments/943975 # - https://meta.stackexchange.com/questions/5436/direct-link-to-a-comment#comment943975_290757 # ^question id ^comment id ^answer id # hmm, this loads very raw comments without the rest of the page? # - https://meta.stackexchange.com/posts/27319/comments#comment-57475 # # parentPostId is the original quesion # TODO is not always present? fucking hell # seems like there is no way to get a hierarchical comment link.. guess this needs to be handled in Promnesia normalisation... # postId is the answer l += f"posts/comments/{self.j['commentId']}" elif t == 'Post': # https://unix.stackexchange.com/q/14841/180307 # https://unix.stackexchange.com/a/14871/180307 # https://unix.stackexchange.com/a/16756/180307 # shit. links generated by stackexchange are not hierarchical # on the other hand seems that it works without the last bit (/180307) # ok, 'a' works even for questions l += f"a/{self.j['postId']}" else: raise RuntimeError(f'Unexpected type {t}') return l # todo expose vote type? import json from ..core.error import Res def votes() -> Iterable[Res[Vote]]: # TODO there is also some site specific stuff in qa/ directory.. not sure if its' more detailed # todo should be defensive? not sure if present when user has no votes path = max(get_files(config.gdpr_path)) votes_path = path / 'analytics' / 'qa\\vote.submit.json' # yes, it does contain a backslash... j = json.loads(votes_path.read_text(encoding='utf-8-sig')) # not sure why, but this encoding seems necessary for r in reversed(j): # they seem to be in decreasing order by default # TODO implement check method that would go through all properties and emit errors? yield Vote(r) from ..core import stat, Stats def stats() -> Stats: return stat(votes)