Fix; use kython.kompress, nicer code using Path
This commit is contained in:
parent
073c19bf5d
commit
ef270f4d01
4 changed files with 33 additions and 40 deletions
10
ci.sh
10
ci.sh
|
@ -1,10 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
cd "$(this_dir)" || exit
|
|
||||||
|
|
||||||
. ~/bash_ci
|
|
||||||
|
|
||||||
ci_run mypy reddit
|
|
||||||
ci_run pylint -E reddit
|
|
||||||
|
|
||||||
ci_report_errors
|
|
|
@ -2,22 +2,23 @@ from typing import List, Dict, Union, Iterable, Iterator, NamedTuple
|
||||||
import json
|
import json
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import pytz
|
import pytz
|
||||||
|
|
||||||
BPATH = "/L/backups/reddit"
|
|
||||||
|
|
||||||
|
|
||||||
import re
|
import re
|
||||||
RE = re.compile(r'reddit-(\d{14}).json.xz')
|
|
||||||
|
|
||||||
def iter_backups() -> Iterator[str]:
|
|
||||||
import os
|
|
||||||
for f in sorted(os.listdir(BPATH)):
|
|
||||||
if RE.match(f):
|
|
||||||
yield os.path.join(BPATH, f)
|
|
||||||
|
|
||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
|
from kython import kompress
|
||||||
|
|
||||||
|
|
||||||
|
BPATH = Path("/L/backups/reddit")
|
||||||
|
|
||||||
|
|
||||||
|
def _get_backups(all_=True) -> List[Path]:
|
||||||
|
bfiles = list(sorted(BPATH.glob('reddit-*.json.xz')))
|
||||||
|
if all_:
|
||||||
|
return bfiles
|
||||||
|
else:
|
||||||
|
return bfiles[-1:]
|
||||||
|
|
||||||
|
|
||||||
class Save(NamedTuple):
|
class Save(NamedTuple):
|
||||||
dt: datetime
|
dt: datetime
|
||||||
title: str
|
title: str
|
||||||
|
@ -37,8 +38,8 @@ class Event(NamedTuple):
|
||||||
title: str
|
title: str
|
||||||
url: str
|
url: str
|
||||||
|
|
||||||
from kython import JSONType, load_json_file
|
|
||||||
|
|
||||||
|
# TODO kython?
|
||||||
def get_some(d, *keys):
|
def get_some(d, *keys):
|
||||||
for k in keys:
|
for k in keys:
|
||||||
v = d.get(k, None)
|
v = d.get(k, None)
|
||||||
|
@ -48,9 +49,9 @@ def get_some(d, *keys):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def get_state(bfile: str):
|
def get_state(bfile: Path):
|
||||||
saves: Dict[str, Save] = {}
|
saves: Dict[str, Save] = {}
|
||||||
with Path(bfile).open() as fo:
|
with kompress.open(bfile) as fo:
|
||||||
jj = json.load(fo)
|
jj = json.load(fo)
|
||||||
|
|
||||||
saved = jj['saved']
|
saved = jj['saved']
|
||||||
|
@ -68,12 +69,12 @@ def get_state(bfile: str):
|
||||||
|
|
||||||
# "created_utc": 1535055017.0,
|
# "created_utc": 1535055017.0,
|
||||||
# link_title
|
# link_title
|
||||||
# link_text
|
# link_text
|
||||||
return saves
|
return saves
|
||||||
|
|
||||||
|
|
||||||
def get_events():
|
def get_events(all_=True):
|
||||||
backups = list(iter_backups())
|
backups = _get_backups(all_=all_)
|
||||||
assert len(backups) > 0
|
assert len(backups) > 0
|
||||||
|
|
||||||
events: List[Event] = []
|
events: List[Event] = []
|
||||||
|
@ -81,8 +82,9 @@ def get_events():
|
||||||
# TODO suppress first batch??
|
# TODO suppress first batch??
|
||||||
# TODO for initial batch, treat event time as creation time
|
# TODO for initial batch, treat event time as creation time
|
||||||
|
|
||||||
|
RE = re.compile(r'reddit-(\d{14})')
|
||||||
for i, b in enumerate(backups): # TODO when date...
|
for i, b in enumerate(backups): # TODO when date...
|
||||||
match = RE.search(b)
|
match = RE.search(b.stem)
|
||||||
assert match is not None
|
assert match is not None
|
||||||
btime = pytz.utc.localize(datetime.strptime(match.group(1), "%Y%m%d%H%M%S"))
|
btime = pytz.utc.localize(datetime.strptime(match.group(1), "%Y%m%d%H%M%S"))
|
||||||
|
|
||||||
|
@ -122,3 +124,14 @@ def get_events():
|
||||||
return list(sorted(events, key=lambda e: e.dt))
|
return list(sorted(events, key=lambda e: e.dt))
|
||||||
|
|
||||||
|
|
||||||
|
def test():
|
||||||
|
get_events(all_=False)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
for e in get_events():
|
||||||
|
print(e)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
|
|
|
@ -1,4 +0,0 @@
|
||||||
from reddit import get_events
|
|
||||||
|
|
||||||
for e in get_events():
|
|
||||||
print(e)
|
|
6
run
6
run
|
@ -1,6 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
set -eu
|
|
||||||
|
|
||||||
cd "$(dirname "$0")"
|
|
||||||
|
|
||||||
python3 -m reddit
|
|
Loading…
Add table
Reference in a new issue