madzak / python-json-logger

Json Formatter for the standard python logger
BSD 2-Clause "Simplified" License
1.7k stars 231 forks source link

Question: how to have a unique ID per request #191

Open ppi-agray opened 1 month ago

ppi-agray commented 1 month ago

I have a request to have a unique ID in the logs "per request". I can reproduce the problem that I'm trying to solve by this loop:

    for i in range(3):
        update_logger(__name__) # this is where I try to set a unique ID
        # I want each of these messages to have the SAME unique_id
        logger.info('loop start')
        logger.info(f'loop: {i}')
        logger.info('loop end')

I'm hoping to have something like:

{"message": "loop start", "request_id": "XXX"}
{"message": "loop: 0", "request_id": "XXX"}
{"message": "loop end", "request_id": "XXX"}
{"message": "loop start", "request_id": "YYY"}
{"message": "loop: 1", "request_id": "YYY"}
{"message": "loop end", "request_id": "YYY"}
{"message": "loop start", "request_id": "ZZZ"}
{"message": "loop: 2", "request_id": "ZZZ"}
{"message": "loop end", "request_id": "ZZZ"}

But instead I'm getting a unique ID PER message:

{"message": "loop start", "request_id": "aaa1f7986f144601af5a533baee1aca7"}
{"message": "loop: 0", "request_id": "afd7c033339a4030adcfb6eff78a007f"}
{"message": "loop end", "request_id": "63c51ca47afc4f73920c0e6998bf61ac"}
{"message": "loop start", "request_id": "21484da3aa0b46559310cbef3b6c5391"}
{"message": "loop: 1", "request_id": "67bdb9a7944b479aac2c4f86bef2fd4c"}
{"message": "loop end", "request_id": "8d0f7469a34b405ba6adf0cf51118c33"}
{"message": "loop start", "request_id": "89a4387b10634d268b2fbe0fd35ce677"}
{"message": "loop: 2", "request_id": "bf4099ed93f046628baa18a8201ca201"}
{"message": "loop end", "request_id": "60777e3113b84daabd12b003384e4fb8"}

My full code is like this:

#from utilities.logs import get_logger
import logging
from pythonjsonlogger import jsonlogger
import uuid

class CustomJsonFormatter(jsonlogger.JsonFormatter):
    def add_fields(self, log_record, record, message_dict):
        super(CustomJsonFormatter, self).add_fields(log_record, record, message_dict)
        log_record['request_id'] = uuid.uuid4().hex

def get_logger(logger_name):
    handler = logging.StreamHandler()
    logger = logging.getLogger(logger_name)
    logger.setLevel(logging.INFO)
    formatter = CustomJsonFormatter('%(message)s')
    handler.setFormatter(formatter)
    logger.addHandler(handler)
    return logger

def update_logger(logger_name):
    logger = logging.getLogger(logger_name)
    myhandler = logger.handlers[0]
    myhandler.setFormatter(CustomJsonFormatter('%(message)s'))

logger = get_logger(__name__)

def main():
    for i in range(3):
        update_logger(__name__)
        # I want each of these messages to have the SAME unique_id
        logger.info('loop start')
        logger.info(f'loop: {i}')
        logger.info('loop end')

if __name__ == '__main__':
    main()

I was hoping the function add_fields would be called just once, when I'm setting the formatter, but it looks like it's called every time I print a log message.

Is there a way to do this with python-json-logger?

thanks!!!

nhairs commented 1 month ago

Hi @ppi-agray,

Just so you're aware it looks like python-json-logger is currently unmaintained, that said I am working on a maintained fork.

Whilst the below examples have been tested using my fork, I'm pretty sure that this still applies to the original library as well.

You're correct that add_fields is called on every logging call - that's because this is the method that converts the LogRecord into the dict that will be converted to JSON and logged.

There's probably a few ways that you can achieve this (what is preferable depends on your use-case).

import logging
import uuid
# https://github.com/nhairs/python-json-logger v3.1.0.rc2
from pythonjsonlogger.json import JsonFormatter

## Setup
## -----------------------------------------------------------------------------
logger = logging.getLogger("test")
logger.setLevel(logging.INFO)
handler = logging.StreamHandler()
logger.addHandler(handler)

## Solution 1
## -----------------------------------------------------------------------------
formatter = JsonFormatter()
handler.setFormatter(formatter)

def main_1():
    print("========== MAIN 1 ==========")
    for i in range(3):
        request_id = uuid.uuid4().hex
        logger.info("loop start", extra={"request_id": request_id})
        logger.info(f"loop {i}", extra={"request_id": request_id})
        logger.info("loop end", extra={"request_id": request_id})
    return

main_1()

## Solution 2
## -----------------------------------------------------------------------------
REQUEST_ID: str | None = None

def get_request_id() -> str:
    return REQUEST_ID

def generate_request_id():
    global REQUEST_ID
    REQUEST_ID = uuid.uuid4().hex

class RequestIdFilter(logging.Filter):
    # Ref: https://docs.python.org/3/howto/logging-cookbook.html#using-filters-to-impart-contextual-information

    def filter(self, record):
        record.record_id = get_request_id()
        return True

request_id_filter = RequestIdFilter()
logger.addFilter(request_id_filter)

def main_2():
    print("========== MAIN 2 ==========")
    for i in range(3):
        generate_request_id()
        logger.info("loop start")
        logger.info(f"loop {i}")
        logger.info("loop end")
    return

main_2()

logger.removeFilter(request_id_filter)

## Solution 3
## -----------------------------------------------------------------------------
# Reuse REQUEST_ID stuff from solution 2
class MyFormatter(JsonFormatter):
    def process_log_record(self, log_record):
        log_record["request_id"] = get_request_id()
        return log_record

handler.setFormatter(MyFormatter())

def main_3():
    print("========== MAIN 3 ==========")
    for i in range(3):
        generate_request_id()
        logger.info("loop start")
        logger.info(f"loop {i}")
        logger.info("loop end")
    return

main_3()