From 661714f1d9d56f8c422fb2a12578ee04ccc96329 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Wed, 21 Jun 2023 16:43:20 +0100 Subject: [PATCH] core/logging: overhaul and many improvements -- mainly to deprecate abandoned logzero MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - generally saner/cleaner logger initialization In particular now it doesn't override logging level specified by the user code prior to instantiating the logger. Also remove the `LazyLogger` hack, doesn't seem like it's necessary when the above is implemented. - get rid of `logzero` which is archived and abandoned now, use `colorlog` for coloured logging formatter - allow configuring log level via shell via `LOGGING_LEVEL_module_name=` E.g. `LOGGING_LEVEL_rescuexport_dal=WARNING LOGGING_LEVEL_my_rescuetime=debug ./script.py` - port `AddExceptionTraceback` from HPI/promnesia - port `CollapseLogsHandler` from HPI/promnesia Also allow configuring from the shell, e.g. `LOGGING_COLLAPSE=` - add support for `enlighten` progress bar, so it can be shared between different projects See https://github.com/Rockhopper-Technologies/enlighten#readme This allows nice CLI progressbars, e.g. for parallel processing of different files from HPI: ghexport.dal[111] 29%|████████████████████████████████████████████████████████████████▏ | 29/100 [00:03<00:07, 10.03 files/s] rexport.dal[comments] 17%|████████████████████████████████████▋ | 115/682 [00:03<00:14, 39.15 files/s] my.instagram.android 0%|▎ | 3/2631 [00:02<34:50, 1.26 files/s] Currently off by default, and hidden behind an env variable (`ENLIGHTEN_ENABLE=true`) --- doc/SETUP.org | 5 +- my/core/__init__.py | 9 +- my/core/logging.py | 237 +++++++++++++++++++++++++++++--------------- setup.py | 3 +- 4 files changed, 166 insertions(+), 88 deletions(-) diff --git a/doc/SETUP.org b/doc/SETUP.org index 6605f66..904331f 100644 --- a/doc/SETUP.org +++ b/doc/SETUP.org @@ -105,10 +105,11 @@ You can also install some optional packages They aren't necessary, but will improve your experience. At the moment these are: -- [[https://github.com/karlicoss/cachew][cachew]]: automatic caching library, which can greatly speedup data access -- [[https://github.com/metachris/logzero][logzero]]: a nice logging library, supporting colors - [[https://github.com/ijl/orjson][orjson]]: a library for serializing data to JSON, used in ~my.core.serialize~ and the ~hpi query~ interface +- [[https://github.com/karlicoss/cachew][cachew]]: automatic caching library, which can greatly speedup data access - [[https://github.com/python/mypy][mypy]]: mypy is used for checking configs and troubleshooting +- [[https://github.com/borntyping/python-colorlog][colorlog]]: colored formatter for ~logging~ module +- [[https://github.com/Rockhopper-Technologies/enlighten]]: console progress bar library * Setting up modules This is an *optional step* as few modules work without extra setup. diff --git a/my/core/__init__.py b/my/core/__init__.py index a7af46c..0f09eef 100644 --- a/my/core/__init__.py +++ b/my/core/__init__.py @@ -1,17 +1,15 @@ # this file only keeps the most common & critical types/utility functions from .common import get_files, PathIsh, Paths from .common import Json -from .common import LazyLogger from .common import warn_if_empty from .common import stat, Stats from .common import datetime_naive, datetime_aware from .common import assert_never from .cfg import make_config - -from .util import __NOT_HPI_MODULE__ - from .error import Res, unwrap +from .logging import make_logger, LazyLogger +from .util import __NOT_HPI_MODULE__ # just for brevity in modules @@ -23,7 +21,8 @@ from pathlib import Path __all__ = [ 'get_files', 'PathIsh', 'Paths', 'Json', - 'LazyLogger', + 'make_logger', + 'LazyLogger', # legacy import 'warn_if_empty', 'stat', 'Stats', 'datetime_aware', 'datetime_naive', diff --git a/my/core/logging.py b/my/core/logging.py index e7061fa..47cd135 100644 --- a/my/core/logging.py +++ b/my/core/logging.py @@ -1,47 +1,61 @@ -#!/usr/bin/env python3 -''' -Default logger is a bit meh, see 'test'/run this file for a demo -''' +from __future__ import annotations + +from functools import lru_cache +import logging +import os +from typing import Union +import warnings + def test() -> None: - import logging import sys from typing import Callable M: Callable[[str], None] = lambda s: print(s, file=sys.stderr) - M(" Logging module's defaults are not great...'") - l = logging.getLogger('test_logger') + ## prepare exception for later + try: + None.whatever # type: ignore[attr-defined] + except Exception as e: + ex = e + ## + + M(" Logging module's defaults are not great:") + l = logging.getLogger('default_logger') l.error("For example, this should be logged as error. But it's not even formatted properly, doesn't have logger name or level") - M(" The reason is that you need to remember to call basicConfig() first") + M("\n The reason is that you need to remember to call basicConfig() first. Let's do it now:") logging.basicConfig() l.error("OK, this is better. But the default format kinda sucks, I prefer having timestamps and the file/line number") - M("") - M(" With LazyLogger you get a reasonable logging format, colours and other neat things") + M("\n Also exception logging is kinda lame, doesn't print traceback by default unless you remember to pass exc_info:") + l.exception(ex) # type: ignore[possibly-undefined] - ll = LazyLogger('test') # No need for basicConfig! + M("\n\n With make_logger you get a reasonable logging format, colours (via colorlog library) and other neat things:") + + ll = make_logger('test') # No need for basicConfig! ll.info("default level is INFO") - ll.debug(".. so this shouldn't be displayed") + ll.debug("... so this shouldn't be displayed") ll.warning("warnings are easy to spot!") - ll.exception(RuntimeError("exceptions as well")) + + M("\n Exceptions print traceback by default now:") + ll.exception(ex) + + M("\n You can (and should) use it via regular logging.getLogger after that, e.g. let's set logging level to DEBUG now") + logging.getLogger('test').setLevel(logging.DEBUG) + ll.debug("... now debug messages are also displayed") -import logging -from typing import Union, Optional, cast -import os -import warnings +DEFAULT_LEVEL = 'INFO' +FORMAT = '{start}[%(levelname)-7s %(asctime)s %(name)s %(filename)s:%(lineno)-4d]{end} %(message)s' +FORMAT_NOCOLOR = FORMAT.format(start='', end='') + Level = int -LevelIsh = Optional[Union[Level, str]] +LevelIsh = Union[Level, str, None] def mklevel(level: LevelIsh) -> Level: - # todo put in some global file, like envvars.py - glevel = os.environ.get('HPI_LOGS', None) - if glevel is not None: - level = glevel if level is None: return logging.NOTSET if isinstance(level, int): @@ -49,69 +63,85 @@ def mklevel(level: LevelIsh) -> Level: return getattr(logging, level.upper()) -FORMAT = '{start}[%(levelname)-7s %(asctime)s %(name)s %(filename)s:%(lineno)d]{end} %(message)s' -FORMAT_COLOR = FORMAT.format(start='%(color)s', end='%(end_color)s') -FORMAT_NOCOLOR = FORMAT.format(start='', end='') -DATEFMT = '%Y-%m-%d %H:%M:%S' +def get_collapse_level() -> Level | None: + # TODO not sure if should be specific to logger name? + cl = os.environ.get('LOGGING_COLLAPSE', None) + if cl is not None: + return mklevel(cl) + # legacy name, maybe deprecate? + cl = os.environ.get('COLLAPSE_DEBUG_LOGS', None) + if cl is not None: + return logging.DEBUG + return None -COLLAPSE_DEBUG_LOGS = os.environ.get('COLLAPSE_DEBUG_LOGS', False) -_init_done = 'lazylogger_init_done' +def get_env_level(name: str) -> Level | None: + PREFIX = 'LOGGING_LEVEL_' # e.g. LOGGING_LEVEL_my_hypothesis=debug + # shell doesn't allow using dots in var names without escaping, so also support underscore syntax + lvl = os.environ.get(PREFIX + name, None) or os.environ.get(PREFIX + name.replace('.', '_'), None) + if lvl is not None: + return mklevel(lvl) + return None -def setup_logger(logger: logging.Logger, level: LevelIsh) -> None: - lvl = mklevel(level) - try: - import logzero # type: ignore[import] - except ModuleNotFoundError: - warnings.warn("You might want to install 'logzero' for nice colored logs!") - formatter = logging.Formatter(fmt=FORMAT_NOCOLOR, datefmt=DATEFMT) - use_logzero = False + +def setup_logger(logger: str | logging.Logger, *, level: LevelIsh = None) -> None: + """ + Wrapper to simplify logging setup. + """ + if isinstance(logger, str): + logger = logging.getLogger(logger) + + if level is None: + level = DEFAULT_LEVEL + + # env level always takes precedence + env_level = get_env_level(logger.name) + if env_level is not None: + lvl = env_level else: - formatter = logzero.LogFormatter( - fmt=FORMAT_COLOR, - datefmt=DATEFMT, - ) - use_logzero = True + lvl = mklevel(level) + + if logger.level == logging.NOTSET: + # if it's already set, the user requested a different logging level, let's respect that + logger.setLevel(lvl) logger.addFilter(AddExceptionTraceback()) - if use_logzero and not COLLAPSE_DEBUG_LOGS: # all set, nothing to do - # 'simple' setup - logzero.setup_logger(logger.name, level=lvl, formatter=formatter) # type: ignore[possibly-undefined] - return - h = CollapseDebugHandler() if COLLAPSE_DEBUG_LOGS else logging.StreamHandler() - logger.setLevel(lvl) - h.setLevel(lvl) - h.setFormatter(formatter) - logger.addHandler(h) - logger.propagate = False # ugh. otherwise it duplicates log messages? not sure about it.. + ch = logging.StreamHandler() + collapse_level = get_collapse_level() + ch = logging.StreamHandler() if collapse_level is None else CollapseLogsHandler(maxlevel=collapse_level) + + # default level for handler is NOTSET, which will make it process all messages + # we rely on the logger to actually accept/reject log msgs + logger.addHandler(ch) + + # this attribute is set to True by default, which causes log entries to be passed to root logger (e.g. if you call basicConfig beforehand) + # even if log entry is handled by this logger ... not sure what's the point of this behaviour?? + logger.propagate = False + + try: + # try colorlog first, so user gets nice colored logs + import colorlog + except ModuleNotFoundError: + warnings.warn("You might want to 'pip install colorlog' for nice colored logs") + formatter = logging.Formatter(FORMAT_NOCOLOR) + else: + # log_color/reset are specific to colorlog + FORMAT_COLOR = FORMAT.format(start='%(log_color)s', end='%(reset)s') + fmt = FORMAT_COLOR if ch.stream.isatty() else FORMAT_NOCOLOR + # colorlog should detect tty in principle, but doesn't handle everything for some reason + # see https://github.com/borntyping/python-colorlog/issues/71 + formatter = colorlog.ColoredFormatter(fmt) + + ch.setFormatter(formatter) -class LazyLogger(logging.Logger): - def __new__(cls, name: str, level: LevelIsh = 'INFO') -> 'LazyLogger': - logger = logging.getLogger(name) - - # this is called prior to all _log calls so makes sense to do it here? - def isEnabledFor_lazyinit(*args, logger=logger, orig=logger.isEnabledFor, **kwargs) -> bool: - if not getattr(logger, _init_done, False): # init once, if necessary - setup_logger(logger, level=level) - setattr(logger, _init_done, True) - logger.isEnabledFor = orig # restore the callback - return orig(*args, **kwargs) - - # oh god.. otherwise might go into an inf loop - if not hasattr(logger, _init_done): - setattr(logger, _init_done, False) # will setup on the first call - logger.isEnabledFor = isEnabledFor_lazyinit # type: ignore[method-assign] - return cast(LazyLogger, logger) - - -# by default, logging.exception isn't logging traceback -# which is a bit annoying since we have to +# by default, logging.exception isn't logging traceback unless called inside of the exception handler +# which is a bit annoying since we have to pass exc_info explicitly # also see https://stackoverflow.com/questions/75121925/why-doesnt-python-logging-exception-method-log-traceback-by-default -# tod also amend by post about defensive error handling? +# todo also amend by post about defensive error handling? class AddExceptionTraceback(logging.Filter): - def filter(self, record): + def filter(self, record: logging.LogRecord) -> bool: s = super().filter(record) if s is False: return False @@ -125,25 +155,31 @@ class AddExceptionTraceback(logging.Filter): # todo also save full log in a file? -class CollapseDebugHandler(logging.StreamHandler): +class CollapseLogsHandler(logging.StreamHandler): ''' Collapses subsequent debug log lines and redraws on the same line. Hopefully this gives both a sense of progress and doesn't clutter the terminal as much? ''' - last = False + + last: bool = False + + maxlevel: Level = logging.DEBUG # everything with less or equal level will be collapsed + + def __init__(self, *args, maxlevel: Level, **kwargs) -> None: + super().__init__(*args, **kwargs) + self.maxlevel = maxlevel def emit(self, record: logging.LogRecord) -> None: try: msg = self.format(record) - cur = record.levelno == logging.DEBUG and '\n' not in msg + cur = record.levelno <= self.maxlevel and '\n' not in msg if cur: if self.last: - self.stream.write('\033[K' + '\r') # clear line + return carriage + self.stream.write('\033[K' + '\r') # clear line + return carriage else: if self.last: - self.stream.write('\n') # clean up after the last debug line + self.stream.write('\n') # clean up after the last line self.last = cur - import os columns, _ = os.get_terminal_size(0) # ugh. the columns thing is meh. dunno I guess ultimately need curses for that # TODO also would be cool to have a terminal post-processor? kinda like tail but aware of logging keywords (INFO/DEBUG/etc) @@ -153,5 +189,46 @@ class CollapseDebugHandler(logging.StreamHandler): self.handleError(record) +@lru_cache(None) # cache so it's only initialized once +def make_logger(name: str, *, level: LevelIsh = None) -> logging.Logger: + logger = logging.getLogger(name) + setup_logger(logger, level=level) + return logger + + +# ughh. hacky way to have a single enlighten instance per interpreter, so it can be shared between modules +# not sure about this. I guess this should definitely be behind some flag +# OK, when stdout is not a tty, enlighten doesn't log anything, good +def get_enlighten(): + # TODO could add env variable to disable enlighten for a module? + from unittest.mock import Mock + # Mock to return stub so cients don't have to think about it + + # for now hidden behind the flag since it's a little experimental + if os.environ.get('ENLIGHTEN_ENABLE', None) is None: + return Mock() + + try: + import enlighten # type: ignore[import] + except ModuleNotFoundError: + warnings.warn("You might want to 'pip install enlighten' for a nice progress bar") + + return Mock() + + # dirty, but otherwise a bit unclear how to share enlighten manager between packages that call each other + instance = getattr(enlighten, 'INSTANCE', None) + if instance is not None: + return instance + instance = enlighten.get_manager() + setattr(enlighten, 'INSTANCE', instance) + return instance + + if __name__ == '__main__': test() + + +## legacy/deprecated methods for backwards compatilibity +LazyLogger = make_logger +logger = make_logger +## diff --git a/setup.py b/setup.py index f3f8511..5a4f75b 100644 --- a/setup.py +++ b/setup.py @@ -55,11 +55,12 @@ def main() -> None: ], 'optional': [ # todo document these? - 'logzero', 'orjson', # for my.core.serialize 'pyfzf_iter', # for my.core.denylist 'cachew>=0.8.0', 'mypy', # used for config checks + 'colorlog', # for colored logs + 'enlighten', # for CLI progress bars ], }, entry_points={'console_scripts': ['hpi=my.core.__main__:main']},