From 661714f1d9d56f8c422fb2a12578ee04ccc96329 Mon Sep 17 00:00:00 2001
From: Dima Gerasimov <karlicoss@gmail.com>
Date: Wed, 21 Jun 2023 16:43:20 +0100
Subject: [PATCH] core/logging: overhaul and many improvements -- mainly to
 deprecate abandoned logzero
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- generally saner/cleaner logger initialization

  In particular now it doesn't override logging level specified by the user code prior to instantiating the logger.

  Also remove the `LazyLogger` hack, doesn't seem like it's necessary when the above is implemented.

- get rid of `logzero` which is archived and abandoned now, use `colorlog` for coloured logging formatter

- allow configuring log level via shell via `LOGGING_LEVEL_module_name=<level>`

  E.g. `LOGGING_LEVEL_rescuexport_dal=WARNING LOGGING_LEVEL_my_rescuetime=debug ./script.py`

- port `AddExceptionTraceback` from HPI/promnesia

- port `CollapseLogsHandler` from HPI/promnesia

  Also allow configuring from the shell, e.g. `LOGGING_COLLAPSE=<level>`

- add support for `enlighten` progress bar, so it can be shared between different projects

  See https://github.com/Rockhopper-Technologies/enlighten#readme

  This allows nice CLI progressbars, e.g. for parallel processing of different files from HPI:

    ghexport.dal[111]  29%|████████████████████████████████████████████████████████████████▏              |  29/100 [00:03<00:07, 10.03 files/s]
    rexport.dal[comments]  17%|████████████████████████████████████▋                                      | 115/682 [00:03<00:14, 39.15 files/s]
    my.instagram.android   0%|▎                                                                           |    3/2631 [00:02<34:50, 1.26 files/s]

  Currently off by default, and hidden behind an env variable (`ENLIGHTEN_ENABLE=true`)
---
 doc/SETUP.org       |   5 +-
 my/core/__init__.py |   9 +-
 my/core/logging.py  | 237 +++++++++++++++++++++++++++++---------------
 setup.py            |   3 +-
 4 files changed, 166 insertions(+), 88 deletions(-)
diff --git a/doc/SETUP.org b/doc/SETUP.org
index 6605f66..904331f 100644
--- a/doc/SETUP.org
+++ b/doc/SETUP.org
@@ -105,10 +105,11 @@ You can also install some optional packages
 
 They aren't necessary, but will improve your experience. At the moment these are:
 
-- [[https://github.com/karlicoss/cachew][cachew]]: automatic caching library, which can greatly speedup data access
-- [[https://github.com/metachris/logzero][logzero]]: a nice logging library, supporting colors
 - [[https://github.com/ijl/orjson][orjson]]: a library for serializing data to JSON, used in ~my.core.serialize~ and the ~hpi query~ interface
+- [[https://github.com/karlicoss/cachew][cachew]]: automatic caching library, which can greatly speedup data access
 - [[https://github.com/python/mypy][mypy]]: mypy is used for checking configs and troubleshooting
+- [[https://github.com/borntyping/python-colorlog][colorlog]]: colored formatter for ~logging~ module
+- [[https://github.com/Rockhopper-Technologies/enlighten]]: console progress bar library
 
 * Setting up modules
 This is an *optional step* as few modules work without extra setup.
diff --git a/my/core/__init__.py b/my/core/__init__.py
index a7af46c..0f09eef 100644
--- a/my/core/__init__.py
+++ b/my/core/__init__.py
@@ -1,17 +1,15 @@
 # this file only keeps the most common & critical types/utility functions
 from .common import get_files, PathIsh, Paths
 from .common import Json
-from .common import LazyLogger
 from .common import warn_if_empty
 from .common import stat, Stats
 from .common import datetime_naive, datetime_aware
 from .common import assert_never
 
 from .cfg import make_config
-
-from .util import __NOT_HPI_MODULE__
-
 from .error import Res, unwrap
+from .logging import make_logger, LazyLogger
+from .util import __NOT_HPI_MODULE__
 
 
 # just for brevity in modules
@@ -23,7 +21,8 @@ from pathlib import Path
 __all__ = [
     'get_files', 'PathIsh', 'Paths',
     'Json',
-    'LazyLogger',
+    'make_logger',
+    'LazyLogger',  # legacy import
     'warn_if_empty',
     'stat', 'Stats',
     'datetime_aware', 'datetime_naive',
diff --git a/my/core/logging.py b/my/core/logging.py
index e7061fa..47cd135 100644
--- a/my/core/logging.py
+++ b/my/core/logging.py
@@ -1,47 +1,61 @@
-#!/usr/bin/env python3
-'''
-Default logger is a bit meh, see 'test'/run this file for a demo
-'''
+from __future__ import annotations
+
+from functools import lru_cache
+import logging
+import os
+from typing import Union
+import warnings
+
 
 def test() -> None:
-    import logging
     import sys
     from typing import Callable
 
     M: Callable[[str], None] = lambda s: print(s, file=sys.stderr)
 
-    M("   Logging module's defaults are not great...'")
-    l = logging.getLogger('test_logger')
+    ## prepare exception for later
+    try:
+        None.whatever  # type: ignore[attr-defined]
+    except Exception as e:
+        ex = e
+    ##
+
+    M("   Logging module's defaults are not great:")
+    l = logging.getLogger('default_logger')
     l.error("For example, this should be logged as error. But it's not even formatted properly, doesn't have logger name or level")
 
-    M("   The reason is that you need to remember to call basicConfig() first")
+    M("\n   The reason is that you need to remember to call basicConfig() first. Let's do it now:")
     logging.basicConfig()
     l.error("OK, this is better. But the default format kinda sucks, I prefer having timestamps and the file/line number")
 
-    M("")
-    M("    With LazyLogger you get a reasonable logging format, colours and other neat things")
+    M("\n   Also exception logging is kinda lame, doesn't print traceback by default unless you remember to pass exc_info:")
+    l.exception(ex)  # type: ignore[possibly-undefined]
 
-    ll = LazyLogger('test')  # No need for basicConfig!
+    M("\n\n    With make_logger you get a reasonable logging format, colours (via colorlog library) and other neat things:")
+
+    ll = make_logger('test')  # No need for basicConfig!
     ll.info("default level is INFO")
-    ll.debug(".. so this shouldn't be displayed")
+    ll.debug("... so this shouldn't be displayed")
     ll.warning("warnings are easy to spot!")
-    ll.exception(RuntimeError("exceptions as well"))
+
+    M("\n    Exceptions print traceback by default now:")
+    ll.exception(ex)
+
+    M("\n    You can (and should) use it via regular logging.getLogger after that, e.g. let's set logging level to DEBUG now")
+    logging.getLogger('test').setLevel(logging.DEBUG)
+    ll.debug("... now debug messages are also displayed")
 
 
-import logging
-from typing import Union, Optional, cast
-import os
-import warnings
+DEFAULT_LEVEL = 'INFO'
+FORMAT = '{start}[%(levelname)-7s %(asctime)s %(name)s %(filename)s:%(lineno)-4d]{end} %(message)s'
+FORMAT_NOCOLOR = FORMAT.format(start='', end='')
+
 
 Level = int
-LevelIsh = Optional[Union[Level, str]]
+LevelIsh = Union[Level, str, None]
 
 
 def mklevel(level: LevelIsh) -> Level:
-    # todo put in some global file, like envvars.py
-    glevel = os.environ.get('HPI_LOGS', None)
-    if glevel is not None:
-        level = glevel
     if level is None:
         return logging.NOTSET
     if isinstance(level, int):
@@ -49,69 +63,85 @@ def mklevel(level: LevelIsh) -> Level:
     return getattr(logging, level.upper())
 
 
-FORMAT = '{start}[%(levelname)-7s %(asctime)s %(name)s %(filename)s:%(lineno)d]{end} %(message)s'
-FORMAT_COLOR   = FORMAT.format(start='%(color)s', end='%(end_color)s')
-FORMAT_NOCOLOR = FORMAT.format(start='', end='')
-DATEFMT = '%Y-%m-%d %H:%M:%S'
+def get_collapse_level() -> Level | None:
+    # TODO not sure if should be specific to logger name?
+    cl = os.environ.get('LOGGING_COLLAPSE', None)
+    if cl is not None:
+        return mklevel(cl)
+    # legacy name, maybe deprecate?
+    cl = os.environ.get('COLLAPSE_DEBUG_LOGS', None)
+    if cl is not None:
+        return logging.DEBUG
+    return None
 
-COLLAPSE_DEBUG_LOGS = os.environ.get('COLLAPSE_DEBUG_LOGS', False)
 
-_init_done = 'lazylogger_init_done'
+def get_env_level(name: str) -> Level | None:
+    PREFIX = 'LOGGING_LEVEL_'  # e.g. LOGGING_LEVEL_my_hypothesis=debug
+    # shell doesn't allow using dots in var names without escaping, so also support underscore syntax
+    lvl = os.environ.get(PREFIX + name, None) or os.environ.get(PREFIX + name.replace('.', '_'), None)
+    if lvl is not None:
+        return mklevel(lvl)
+    return None
 
-def setup_logger(logger: logging.Logger, level: LevelIsh) -> None:
-    lvl = mklevel(level)
-    try:
-        import logzero  # type: ignore[import]
-    except ModuleNotFoundError:
-        warnings.warn("You might want to install 'logzero' for nice colored logs!")
-        formatter = logging.Formatter(fmt=FORMAT_NOCOLOR, datefmt=DATEFMT)
-        use_logzero = False
+
+def setup_logger(logger: str | logging.Logger, *, level: LevelIsh = None) -> None:
+    """
+    Wrapper to simplify logging setup.
+    """
+    if isinstance(logger, str):
+        logger = logging.getLogger(logger)
+
+    if level is None:
+        level = DEFAULT_LEVEL
+
+    # env level always takes precedence
+    env_level = get_env_level(logger.name)
+    if env_level is not None:
+        lvl = env_level
     else:
-        formatter = logzero.LogFormatter(
-            fmt=FORMAT_COLOR,
-            datefmt=DATEFMT,
-        )
-        use_logzero = True
+        lvl = mklevel(level)
+
+    if logger.level == logging.NOTSET:
+        # if it's already set, the user requested a different logging level, let's respect that
+        logger.setLevel(lvl)
 
     logger.addFilter(AddExceptionTraceback())
-    if use_logzero and not COLLAPSE_DEBUG_LOGS: # all set, nothing to do
-        # 'simple' setup
-        logzero.setup_logger(logger.name, level=lvl, formatter=formatter)  # type: ignore[possibly-undefined]
-        return
 
-    h = CollapseDebugHandler() if COLLAPSE_DEBUG_LOGS else logging.StreamHandler()
-    logger.setLevel(lvl)
-    h.setLevel(lvl)
-    h.setFormatter(formatter)
-    logger.addHandler(h)
-    logger.propagate = False  # ugh. otherwise it duplicates log messages? not sure about it..
+    ch = logging.StreamHandler()
+    collapse_level = get_collapse_level()
+    ch = logging.StreamHandler() if collapse_level is None else CollapseLogsHandler(maxlevel=collapse_level)
+
+    # default level for handler is NOTSET, which will make it process all messages
+    # we rely on the logger to actually accept/reject log msgs
+    logger.addHandler(ch)
+
+    # this attribute is set to True by default, which causes log entries to be passed to root logger (e.g. if you call basicConfig beforehand)
+    # even if log entry is handled by this logger ... not sure what's the point of this behaviour??
+    logger.propagate = False
+
+    try:
+        # try colorlog first, so user gets nice colored logs
+        import colorlog
+    except ModuleNotFoundError:
+        warnings.warn("You might want to 'pip install colorlog' for nice colored logs")
+        formatter = logging.Formatter(FORMAT_NOCOLOR)
+    else:
+        # log_color/reset are specific to colorlog
+        FORMAT_COLOR = FORMAT.format(start='%(log_color)s', end='%(reset)s')
+        fmt = FORMAT_COLOR if ch.stream.isatty() else FORMAT_NOCOLOR
+        # colorlog should detect tty in principle, but doesn't handle everything for some reason
+        # see https://github.com/borntyping/python-colorlog/issues/71
+        formatter = colorlog.ColoredFormatter(fmt)
+
+    ch.setFormatter(formatter)
 
 
-class LazyLogger(logging.Logger):
-    def __new__(cls, name: str, level: LevelIsh = 'INFO') -> 'LazyLogger':
-        logger = logging.getLogger(name)
-
-        # this is called prior to all _log calls so makes sense to do it here?
-        def isEnabledFor_lazyinit(*args, logger=logger, orig=logger.isEnabledFor, **kwargs) -> bool:
-            if not getattr(logger, _init_done, False):  # init once, if necessary
-                setup_logger(logger, level=level)
-                setattr(logger, _init_done, True)
-                logger.isEnabledFor = orig # restore the callback
-            return orig(*args, **kwargs)
-
-        # oh god.. otherwise might go into an inf loop
-        if not hasattr(logger, _init_done):
-            setattr(logger, _init_done, False) # will setup on the first call
-            logger.isEnabledFor = isEnabledFor_lazyinit  # type: ignore[method-assign]
-        return cast(LazyLogger, logger)
-
-
-# by default, logging.exception isn't logging traceback
-# which is a bit annoying since we have to
+# by default, logging.exception isn't logging traceback unless called inside of the exception handler
+# which is a bit annoying since we have to pass exc_info explicitly
 # also see https://stackoverflow.com/questions/75121925/why-doesnt-python-logging-exception-method-log-traceback-by-default
-# tod also amend by post about defensive error handling?
+# todo also amend by post about defensive error handling?
 class AddExceptionTraceback(logging.Filter):
-    def filter(self, record):
+    def filter(self, record: logging.LogRecord) -> bool:
         s = super().filter(record)
         if s is False:
             return False
@@ -125,25 +155,31 @@ class AddExceptionTraceback(logging.Filter):
 
 
 # todo also save full log in a file?
-class CollapseDebugHandler(logging.StreamHandler):
+class CollapseLogsHandler(logging.StreamHandler):
     '''
     Collapses subsequent debug log lines and redraws on the same line.
     Hopefully this gives both a sense of progress and doesn't clutter the terminal as much?
     '''
-    last = False
+
+    last: bool = False
+
+    maxlevel: Level = logging.DEBUG  # everything with less or equal level will be collapsed
+
+    def __init__(self, *args, maxlevel: Level, **kwargs) -> None:
+        super().__init__(*args, **kwargs)
+        self.maxlevel = maxlevel
 
     def emit(self, record: logging.LogRecord) -> None:
         try:
             msg = self.format(record)
-            cur = record.levelno == logging.DEBUG and '\n' not in msg
+            cur = record.levelno <= self.maxlevel and '\n' not in msg
             if cur:
                 if self.last:
-                    self.stream.write('\033[K' + '\r') # clear line + return carriage
+                    self.stream.write('\033[K' + '\r')  # clear line + return carriage
             else:
                 if self.last:
-                    self.stream.write('\n') # clean up after the last debug line
+                    self.stream.write('\n')  # clean up after the last line
             self.last = cur
-            import os
             columns, _ = os.get_terminal_size(0)
             # ugh. the columns thing is meh. dunno I guess ultimately need curses for that
             # TODO also would be cool to have a terminal post-processor? kinda like tail but aware of logging keywords (INFO/DEBUG/etc)
@@ -153,5 +189,46 @@ class CollapseDebugHandler(logging.StreamHandler):
             self.handleError(record)
 
 
+@lru_cache(None)  # cache so it's only initialized once
+def make_logger(name: str, *, level: LevelIsh = None) -> logging.Logger:
+    logger = logging.getLogger(name)
+    setup_logger(logger, level=level)
+    return logger
+
+
+# ughh. hacky way to have a single enlighten instance per interpreter, so it can be shared between modules
+# not sure about this. I guess this should definitely be behind some flag
+# OK, when stdout is not a tty, enlighten doesn't log anything, good
+def get_enlighten():
+    # TODO could add env variable to disable enlighten for a module?
+    from unittest.mock import Mock
+    # Mock to return stub so cients don't have to think about it
+
+    # for now hidden behind the flag since it's a little experimental
+    if os.environ.get('ENLIGHTEN_ENABLE', None) is None:
+        return Mock()
+
+    try:
+        import enlighten  # type: ignore[import]
+    except ModuleNotFoundError:
+        warnings.warn("You might want to 'pip install enlighten' for a nice progress bar")
+
+        return Mock()
+
+    # dirty, but otherwise a bit unclear how to share enlighten manager between packages that call each other
+    instance = getattr(enlighten, 'INSTANCE', None)
+    if instance is not None:
+        return instance
+    instance = enlighten.get_manager()
+    setattr(enlighten, 'INSTANCE', instance)
+    return instance
+
+
 if __name__ == '__main__':
     test()
+
+
+## legacy/deprecated methods for backwards compatilibity
+LazyLogger = make_logger
+logger = make_logger
+##
diff --git a/setup.py b/setup.py
index f3f8511..5a4f75b 100644
--- a/setup.py
+++ b/setup.py
@@ -55,11 +55,12 @@ def main() -> None:
             ],
             'optional': [
                 # todo document these?
-                'logzero',
                 'orjson',  # for my.core.serialize
                 'pyfzf_iter',  # for my.core.denylist
                 'cachew>=0.8.0',
                 'mypy', # used for config checks
+                'colorlog',  # for colored logs
+                'enlighten',  # for CLI progress bars
             ],
         },
         entry_points={'console_scripts': ['hpi=my.core.__main__:main']},