91 lines
3.1 KiB
Python
91 lines
3.1 KiB
Python
"""
|
|
Stackexchange data (uses [[https://stackoverflow.com/legal/gdpr/request][official GDPR export]])
|
|
"""
|
|
|
|
# TODO need to merge gdpr and stexport
|
|
|
|
### config
|
|
from dataclasses import dataclass
|
|
|
|
from my.config import stackexchange as user_config
|
|
from my.core import Json, PathIsh, get_files, make_config
|
|
|
|
|
|
@dataclass
|
|
class stackexchange(user_config):
|
|
gdpr_path: PathIsh # path to GDPR zip file
|
|
config = make_config(stackexchange)
|
|
# TODO later support unpacked zip too
|
|
###
|
|
|
|
# TODO just merge all of them and then filter?.. not sure
|
|
|
|
from collections.abc import Iterable
|
|
from datetime import datetime
|
|
from typing import NamedTuple
|
|
|
|
from my.core.compat import fromisoformat
|
|
|
|
|
|
class Vote(NamedTuple):
|
|
j: Json
|
|
# todo ip?
|
|
|
|
@property
|
|
def when(self) -> datetime:
|
|
return fromisoformat(self.j['eventTime'])
|
|
|
|
# todo Url return type?
|
|
@property
|
|
def link(self) -> str:
|
|
# vote target
|
|
l = f"https://{self.j['siteId']}/"
|
|
t = self.j['target']
|
|
if t == 'Comment':
|
|
# for comments, these work?
|
|
# - https://meta.stackexchange.com/posts/comments/943975
|
|
# - https://meta.stackexchange.com/questions/5436/direct-link-to-a-comment#comment943975_290757
|
|
# ^question id ^comment id ^answer id
|
|
# hmm, this loads very raw comments without the rest of the page?
|
|
# - https://meta.stackexchange.com/posts/27319/comments#comment-57475
|
|
#
|
|
# parentPostId is the original question
|
|
# TODO is not always present? fucking hell
|
|
# seems like there is no way to get a hierarchical comment link.. guess this needs to be handled in Promnesia normalisation...
|
|
# postId is the answer
|
|
l += f"posts/comments/{self.j['commentId']}"
|
|
elif t == 'Post':
|
|
# https://unix.stackexchange.com/q/14841/180307
|
|
# https://unix.stackexchange.com/a/14871/180307
|
|
# https://unix.stackexchange.com/a/16756/180307
|
|
# shit. links generated by stackexchange are not hierarchical
|
|
# on the other hand seems that it works without the last bit (/180307)
|
|
# ok, 'a' works even for questions
|
|
l += f"a/{self.j['postId']}"
|
|
else:
|
|
raise RuntimeError(f'Unexpected type {t}')
|
|
return l
|
|
|
|
# todo expose vote type?
|
|
|
|
import json
|
|
|
|
from ..core.error import Res
|
|
|
|
|
|
def votes() -> Iterable[Res[Vote]]:
|
|
# TODO there is also some site specific stuff in qa/ directory.. not sure if its' more detailed
|
|
# todo should be defensive? not sure if present when user has no votes
|
|
path = max(get_files(config.gdpr_path))
|
|
votes_path = path / 'analytics' / 'qa\\vote.submit.json' # yes, it does contain a backslash...
|
|
j = json.loads(votes_path.read_text(encoding='utf-8-sig')) # not sure why, but this encoding seems necessary
|
|
for r in reversed(j): # they seem to be in decreasing order by default
|
|
# TODO implement check method that would go through all properties and emit errors?
|
|
yield Vote(r)
|
|
|
|
|
|
from ..core import Stats, stat
|
|
|
|
|
|
def stats() -> Stats:
|
|
return stat(votes)
|