""" Stackexchange data (uses [[https://stackoverflow.com/legal/gdpr/request][official GDPR export]]) """ # TODO need to merge gdpr and stexport ### config from my.config import stackexchange as user_config from ..core import dataclass, PathIsh, make_config @dataclass class stackexchange(user_config): gdpr_path: PathIsh # path to GDPR zip file config = make_config(stackexchange) # TODO later support unpacked zip too ### # TODO just merge all of them and then filter?.. not sure from ..core.common import Json, isoparse from typing import NamedTuple, Iterable from datetime import datetime class Vote(NamedTuple): j: Json # todo ip? @property def when(self) -> datetime: return isoparse(self.j['eventTime']) # todo Url return type? @property def link(self) -> str: # vote target l = f"https://{self.j['siteId']}/" t = self.j['target'] if t == 'Comment': # for comments, these work? # - https://meta.stackexchange.com/posts/comments/943975 # - https://meta.stackexchange.com/questions/5436/direct-link-to-a-comment#comment943975_290757 # ^question id ^comment id ^answer id # hmm, this loads very raw comments without the rest of the page? # - https://meta.stackexchange.com/posts/27319/comments#comment-57475 # # parentPostId is the original quesion # TODO is not always present? fucking hell # seems like there is no way to get a hierarchical comment link.. guess this needs to be handled in Promnesia normalisation... # postId is the answer l += f"posts/comments/{self.j['commentId']}" elif t == 'Post': # https://unix.stackexchange.com/q/14841/180307 # https://unix.stackexchange.com/a/14871/180307 # https://unix.stackexchange.com/a/16756/180307 # shit. links generated by stackexchange are not hierarchical # on the other hand seems that it works without the last bit (/180307) # ok, 'a' works even for questions l += f"a/{self.j['postId']}" else: raise RuntimeError(f'Unexpected type {t}') return l # todo expose vote type? import json from ..core.kompress import kopen from ..core.error import Res def votes() -> Iterable[Res[Vote]]: # TODO there is also some site specific stuff in qa/ directory.. not sure if its' more detailed # todo should be defensive? not sure if present when user has no votes with kopen( config.gdpr_path, 'analytics/qa\\vote.submit.json', # TODO what the fuck is wrong with these separators encoding='utf-8-sig', # not sure why, but seems necessary for this data ) as fo: for r in reversed(json.load(fo)): # they seem to be in decreasing order by default # TODO implement check method that would go through all properties and emit errors? yield Vote(r) from ..core import stat, Stats def stats() -> Stats: return stat(votes)