From 64be0e9706af84a3d3fa2297c7396439a525c8e4 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Thu, 30 Aug 2018 17:31:11 +0300 Subject: [PATCH] grouping --- location/__init__.py | 48 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/location/__init__.py b/location/__init__.py index f521011..11bc401 100644 --- a/location/__init__.py +++ b/location/__init__.py @@ -32,6 +32,8 @@ def tagger(dt: datetime, lat: float, lon: float) -> Tag: else: return "other" +# TODO hope they are sorted... +# TODO that could also serve as basis for timezone provider. def iter_locations() -> Iterator[Location]: with open(PATH) as fo: reader = csv.reader(fo) @@ -59,6 +61,52 @@ def get_locations(cached: bool=False) -> Iterable[Location]: else: return list(iter_locations()) +class LocInterval(NamedTuple): + from_: Location + to: Location + +def get_groups(cached: bool=False) -> List[LocInterval]: + locs = get_locations(cached=cached) + i = 0 + groups: List[LocInterval] = [] + curg: List[Location] = [] + + def add_to_group(x): + nonlocal curg + if len(curg) < 2: + curg.append(x) + else: + curg[-1] = x + + def dump_group(): + nonlocal curg + if len(curg) > 0: + groups.append(LocInterval(from_=curg[0], to=curg[-1])) + curg = [] + + while i < len(locs): + last = None if len(curg) == 0 else curg[-1] + cur = locs[i] + j = i + match = False + while not match and j < len(locs) and j < i + 10: # TODO FIXME time distance here... e.g. half an hour? + cur = locs[j] + if last is None or cur.tag == last.tag: + # ok + add_to_group(cur) + i = j + 1 + match = True + else: + j += 1 + # if we made here without advancing + if not match: + dump_group() + i += 1 + else: + pass + dump_group() + return groups + def update_cache(): import dill # type: ignore datas = get_locations(cached=False)