""" EXPERIMENTAL! use with caution Manages 'global' ProcessPoolExecutor which is 'managed' by HPI itself, and can be passed down to DALs to speed up data processing. The reason to have it managed by HPI is because we don't want DALs instantiate pools themselves -- they can't cooperate and it would be hard/infeasible to control how many cores we want to dedicate to the DAL. Enabled by the env variable, specifying how many cores to dedicate e.g. "HPI_CPU_POOL=4 hpi query ..." """ import os from concurrent.futures import ProcessPoolExecutor from typing import Optional, cast _NOT_SET = cast(ProcessPoolExecutor, object()) _INSTANCE: Optional[ProcessPoolExecutor] = _NOT_SET def get_cpu_pool() -> Optional[ProcessPoolExecutor]: global _INSTANCE if _INSTANCE is _NOT_SET: use_cpu_pool = os.environ.get('HPI_CPU_POOL') if use_cpu_pool is None or int(use_cpu_pool) == 0: _INSTANCE = None else: # NOTE: this won't be cleaned up properly, but I guess it's fine? # since this it's basically a singleton for the whole process # , and will be destroyed when python exists _INSTANCE = ProcessPoolExecutor(max_workers=int(use_cpu_pool)) return _INSTANCE