From 67cf4d0c0416bcf67e076648aec8dd14141b3dc9 Mon Sep 17 00:00:00 2001 From: Matthew Reishus Date: Sun, 31 May 2020 20:35:20 -0500 Subject: [PATCH] my.coding.github ignores some events emitted by bots. I use a service called dependabot ( https://dependabot.com/ ). It automatically creates pull requests in my repositories to upgrade dependencies. The modern front end javascript world moves really quickly; projects have a ton of dependencies that are updating all the time, so there are a lot of these pull requests. Also, the PRs it makes have a lot of info in them. Here's an example one: https://github.com/mreishus/spades/pull/180 . If you hit the arrows, you can see it includes a lot of text in "Changelog" and "Commits". Now check out the list of closed PRs this project has: https://github.com/mreishus/spades/pulls?q=is%3Apr+is%3Aclosed Once I got everything working with my.coding.github, my Github.org (using orger) was huge: 5MB. I wanted to get rid of the dependabot stuff, since it's mostly junk I'm not too interested it, and I got it down to 130K (from 5MB) just from this commit. Here's an example of an event I'm filtering out: I'm looking to see if the "user" contains a "[bot]" tag in it. { "type": "pull_request", "url": "https://github.com/mreishus/spades/pull/96", "user": "https://github.com/dependabot-preview[bot]", "repository": "https://github.com/mreishus/spades", "title": "Bump axios from 0.19.1 to 0.19.2 in /frontend", "body": "Bumps [axios](https://github.com/axios/axios) from 0.19.1 to 0.19.2.\n
\nRelease notes Event: def _parse_issue_comment(d: Dict) -> Event: url = d['url'] + is_bot = "[bot]" in d["user"] return Event( # type: ignore[misc] **_parse_common(d), summary=f'commented on issue {url}', eid='issue_comment_' + url, + is_bot=is_bot, ) def _parse_issue(d: Dict) -> Event: url = d['url'] title = d['title'] + is_bot = "[bot]" in d["user"] return Event( # type: ignore[misc] **_parse_common(d), summary=f'opened issue {title}', eid='issue_comment_' + url, + is_bot=is_bot, ) def _parse_pull_request(d: Dict) -> Event: url = d['url'] title = d['title'] + is_bot = "[bot]" in d["user"] return Event( # type: ignore[misc] **_parse_common(d), # TODO distinguish incoming/outgoing? # TODO action? opened/closed?? summary=f'opened PR {title}', eid='pull_request_' + url, + is_bot=is_bot, ) @@ -245,6 +252,8 @@ def iter_events() -> Iterator[Res[Event]]: if isinstance(e, Exception): yield e continue + if e.is_bot: + continue key = (e.dt, e.eid) # use both just in case # TODO wtf?? some minor (e.g. 1 sec) discrepancies (e.g. create repository events) if key in emitted: