From f5a5b27eb7e270980560202ed44807764894f7ae Mon Sep 17 00:00:00 2001 From: Hans Andersn Date: Sat, 10 Oct 2020 21:13:45 +0200 Subject: [PATCH 1/4] Added CloudWatchFormatter and CloudWatchJSONFormatter classes to handle the formatting of the record message to CloudWatch which also make it possible to extent the formatting logic --- watchtower/__init__.py | 74 +++++++++++++++++++++++++++++++++++------- 1 file changed, 62 insertions(+), 12 deletions(-) diff --git a/watchtower/__init__.py b/watchtower/__init__.py index 1c503db..f60dbad 100644 --- a/watchtower/__init__.py +++ b/watchtower/__init__.py @@ -49,6 +49,65 @@ class WatchtowerWarning(UserWarning): pass +class CloudWatchFormatter(logging.Formatter): + """ + Log formatter for CloudWatch message. Transforms the logged record message into a compatible message for CloudWatch. + This is the default formatter for CloudWatchLogHandler + + :param json_serialize_default: + The 'default' function to use when serializing dictionaries as JSON. Refer to the Python standard library + documentation on 'json' for more explanation about the 'default' parameter. + https://docs.python.org/3/library/json.html#json.dump + https://docs.python.org/2/library/json.html#json.dump + :type json_serialize_default: Function + """ + + def __init__(self, fmt=None, datefmt=None, style='%', validate=True, json_serialize_default=None): + super().__init__(fmt=fmt, datefmt=datefmt, style=style, validate=validate) + + self.json_serialize_default = json_serialize_default or _json_serialize_default + + def format(self, message): + if isinstance(message.msg, Mapping): + message.msg = json.dumps(message.msg, default=self.json_serialize_default) + + return super().format(message) + + +class CloudWatchJSONFormatter(logging.Formatter): + """ + JSON log formatter for CloudWatch. Transforms the logged record message into a JSON formatted message. + + :param json_serialize_default: + The 'default' function to use when serializing dictionaries as JSON. Refer to the Python standard library + documentation on 'json' for more explanation about the 'default' parameter. + https://docs.python.org/3/library/json.html#json.dump + https://docs.python.org/2/library/json.html#json.dump + :type json_serialize_default: Function + + :param fields: A list of fields of the record to include in the CloudWatch Log json object. Defaults to '__all__'. + :type fields: list + """ + def __init__(self, fmt=None, datefmt=None, style='%', validate=True, fields='__all__', json_serialize_default=None): + super().__init__(fmt=fmt, datefmt=datefmt, style=style, validate=validate) + + self.fields = fields + self.json_serialize_default = json_serialize_default or _json_serialize_default + + def format_json(self, message): + if self.fields == '__all__': + return dict(message) + return dict((k, v) for k, v in message.items() if k in self.fields) + + def format(self, message): + message.msg = json.dumps(self.format_json(message.__dict__), default=self.json_serialize_default) + + return super().format(message) + + +_defaultFormatter = CloudWatchFormatter() + + class CloudWatchLogHandler(logging.Handler): """ Create a new CloudWatch log handler object. This is the main entry point to the functionality of the module. See @@ -90,12 +149,6 @@ class CloudWatchLogHandler(logging.Handler): :param create_log_stream: Create CloudWatch Logs log stream if it does not exist. **True** by default. :type create_log_stream: Boolean - :param json_serialize_default: - The 'default' function to use when serializing dictionaries as JSON. Refer to the Python standard library - documentation on 'json' for more explanation about the 'default' parameter. - https://docs.python.org/3/library/json.html#json.dump - https://docs.python.org/2/library/json.html#json.dump - :type json_serialize_default: Function :param max_message_size: Maximum size (in bytes) of a single message. :type max_message_size: Integer @@ -124,14 +177,13 @@ def _get_session(boto3_session, boto3_profile_name): def __init__(self, log_group=__name__, stream_name=None, use_queues=True, send_interval=60, max_batch_size=1024 * 1024, max_batch_count=10000, boto3_session=None, boto3_profile_name=None, create_log_group=True, log_group_retention_days=None, - create_log_stream=True, json_serialize_default=None, max_message_size=256 * 1024, - endpoint_url=None, *args, **kwargs): + create_log_stream=True, max_message_size=256 * 1024, + endpoint_url=None, formatter=None, *args, **kwargs): super().__init__(*args, **kwargs) self.log_group = log_group self.stream_name = stream_name self.use_queues = use_queues self.send_interval = send_interval - self.json_serialize_default = json_serialize_default or _json_serialize_default self.max_batch_size = max_batch_size self.max_batch_count = max_batch_count self.max_message_size = max_message_size @@ -140,6 +192,7 @@ def __init__(self, log_group=__name__, stream_name=None, use_queues=True, send_i self.creating_log_stream, self.shutting_down = False, False self.create_log_stream = create_log_stream self.log_group_retention_days = log_group_retention_days + self.formatter = formatter or _defaultFormatter # Creating session should be the final call in __init__, after all instance attributes are set. # This ensures that failing to create the session will not result in any missing attribtues. @@ -219,9 +272,6 @@ def emit(self, message): if stream_name not in self.sequence_tokens: self.sequence_tokens[stream_name] = None - if isinstance(message.msg, Mapping): - message.msg = json.dumps(message.msg, default=self.json_serialize_default) - cwl_message = dict(timestamp=int(message.created * 1000), message=self.format(message)) if self.use_queues: From 7dfe4207b7fd77fab77f3145deaf687944f3d607 Mon Sep 17 00:00:00 2001 From: Hans Andersn Date: Sat, 10 Oct 2020 21:18:43 +0200 Subject: [PATCH 2/4] Added limited example of the CloudWatchJSONFormatter in use with the fields argument --- README.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.rst b/README.rst index a23c489..9563e9e 100644 --- a/README.rst +++ b/README.rst @@ -149,6 +149,8 @@ configuration provided by ``boto3``: disable_existing_loggers: False formatters: json: + (): watchtower.CloudWatchJSONFormatter + fields: [msg, levelname] format: "[%(asctime)s] %(process)d %(levelname)s %(name)s:%(funcName)s:%(lineno)s - %(message)s" plaintext: format: "[%(asctime)s] %(process)d %(levelname)s %(name)s:%(funcName)s:%(lineno)s - %(message)s" From 04261d4b0482fe0785551af0ff89dd3a0da94801 Mon Sep 17 00:00:00 2001 From: Hans Andersn Date: Sat, 10 Oct 2020 21:45:16 +0200 Subject: [PATCH 3/4] Fixed pre-3.8 non-supported kwargs for logging formatter --- watchtower/__init__.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/watchtower/__init__.py b/watchtower/__init__.py index f60dbad..db0b940 100644 --- a/watchtower/__init__.py +++ b/watchtower/__init__.py @@ -62,8 +62,8 @@ class CloudWatchFormatter(logging.Formatter): :type json_serialize_default: Function """ - def __init__(self, fmt=None, datefmt=None, style='%', validate=True, json_serialize_default=None): - super().__init__(fmt=fmt, datefmt=datefmt, style=style, validate=validate) + def __init__(self, fmt=None, datefmt=None, json_serialize_default=None, **kwargs): + super().__init__(fmt=fmt, datefmt=datefmt, **kwargs) self.json_serialize_default = json_serialize_default or _json_serialize_default @@ -88,8 +88,8 @@ class CloudWatchJSONFormatter(logging.Formatter): :param fields: A list of fields of the record to include in the CloudWatch Log json object. Defaults to '__all__'. :type fields: list """ - def __init__(self, fmt=None, datefmt=None, style='%', validate=True, fields='__all__', json_serialize_default=None): - super().__init__(fmt=fmt, datefmt=datefmt, style=style, validate=validate) + def __init__(self, fmt=None, datefmt=None, fields='__all__', json_serialize_default=None, **kwargs): + super().__init__(fmt=fmt, datefmt=datefmt, **kwargs) self.fields = fields self.json_serialize_default = json_serialize_default or _json_serialize_default From f4fb923d1e252707064f00f4fbceeedb45f85135 Mon Sep 17 00:00:00 2001 From: "Terence D. Honles" Date: Thu, 21 Jan 2021 17:19:33 -0800 Subject: [PATCH 4/4] make changes backwards compatible --- watchtower/__init__.py | 61 +++++++++++++++++++++++++++++------------- 1 file changed, 42 insertions(+), 19 deletions(-) diff --git a/watchtower/__init__.py b/watchtower/__init__.py index db0b940..3cde59d 100644 --- a/watchtower/__init__.py +++ b/watchtower/__init__.py @@ -62,9 +62,8 @@ class CloudWatchFormatter(logging.Formatter): :type json_serialize_default: Function """ - def __init__(self, fmt=None, datefmt=None, json_serialize_default=None, **kwargs): - super().__init__(fmt=fmt, datefmt=datefmt, **kwargs) - + def __init__(self, *args, json_serialize_default=None, **kwargs): + super().__init__(*args, **kwargs) self.json_serialize_default = json_serialize_default or _json_serialize_default def format(self, message): @@ -74,7 +73,7 @@ def format(self, message): return super().format(message) -class CloudWatchJSONFormatter(logging.Formatter): +class CloudWatchJSONFormatter(CloudWatchFormatter): """ JSON log formatter for CloudWatch. Transforms the logged record message into a JSON formatted message. @@ -84,28 +83,23 @@ class CloudWatchJSONFormatter(logging.Formatter): https://docs.python.org/3/library/json.html#json.dump https://docs.python.org/2/library/json.html#json.dump :type json_serialize_default: Function - :param fields: A list of fields of the record to include in the CloudWatch Log json object. Defaults to '__all__'. :type fields: list """ - def __init__(self, fmt=None, datefmt=None, fields='__all__', json_serialize_default=None, **kwargs): - super().__init__(fmt=fmt, datefmt=datefmt, **kwargs) - + def __init__(self, *args, fields='__all__', **kwargs): + super().__init__(*args, **kwargs) self.fields = fields - self.json_serialize_default = json_serialize_default or _json_serialize_default - - def format_json(self, message): - if self.fields == '__all__': - return dict(message) - return dict((k, v) for k, v in message.items() if k in self.fields) def format(self, message): - message.msg = json.dumps(self.format_json(message.__dict__), default=self.json_serialize_default) + if self.fields == '__all__': + message.msg = dict(message.__dict__) + else: + message.msg = {k: v for k, v in message.items() if k in self.fields} return super().format(message) -_defaultFormatter = CloudWatchFormatter() +_default_formatter = CloudWatchFormatter() class CloudWatchLogHandler(logging.Handler): @@ -149,6 +143,14 @@ class CloudWatchLogHandler(logging.Handler): :param create_log_stream: Create CloudWatch Logs log stream if it does not exist. **True** by default. :type create_log_stream: Boolean + :param json_serialize_default: + **DEPRECATED**: use CloudWatchFormatter for JSON formatting instead. + + The 'default' function to use when serializing dictionaries as JSON. Refer to the Python standard library + documentation on 'json' for more explanation about the 'default' parameter. + https://docs.python.org/3/library/json.html#json.dump + https://docs.python.org/2/library/json.html#json.dump + :type json_serialize_default: Function :param max_message_size: Maximum size (in bytes) of a single message. :type max_message_size: Integer @@ -177,8 +179,8 @@ def _get_session(boto3_session, boto3_profile_name): def __init__(self, log_group=__name__, stream_name=None, use_queues=True, send_interval=60, max_batch_size=1024 * 1024, max_batch_count=10000, boto3_session=None, boto3_profile_name=None, create_log_group=True, log_group_retention_days=None, - create_log_stream=True, max_message_size=256 * 1024, - endpoint_url=None, formatter=None, *args, **kwargs): + create_log_stream=True, json_serialize_default=None, max_message_size=256 * 1024, + endpoint_url=None, *args, **kwargs): super().__init__(*args, **kwargs) self.log_group = log_group self.stream_name = stream_name @@ -192,7 +194,12 @@ def __init__(self, log_group=__name__, stream_name=None, use_queues=True, send_i self.creating_log_stream, self.shutting_down = False, False self.create_log_stream = create_log_stream self.log_group_retention_days = log_group_retention_days - self.formatter = formatter or _defaultFormatter + self.json_serialize_default = json_serialize_default + if json_serialize_default: + warnings.warn( + 'Specifying json_serialize_default is deprecated, please create a CloudWatchFormatter instance ' + 'which accepts a json_serialize_default and set it as a formatter instead', + DeprecationWarning) # Creating session should be the final call in __init__, after all instance attributes are set. # This ensures that failing to create the session will not result in any missing attribtues. @@ -261,6 +268,19 @@ def _submit_batch(self, batch, stream_name, max_retries=5): # from the response self.sequence_tokens[stream_name] = response["nextSequenceToken"] + def format(self, record): + """ + Format the specified record. + + If a formatter is set, use it. Otherwise, use the default formatter for the module. This differs from + `logging.Handler.format` as its default is `CloudWatchFormatter`. + """ + if self.formatter: + fmt = self.formatter + else: + fmt = _default_formatter + return fmt.format(record) + def emit(self, message): if self.creating_log_stream: return # Avoid infinite recursion when asked to log a message as our own side effect @@ -272,6 +292,9 @@ def emit(self, message): if stream_name not in self.sequence_tokens: self.sequence_tokens[stream_name] = None + if self.json_serialize_default and isinstance(message.msg, Mapping): + message.msg = json.dumps(message.msg, default=self.json_serialize_default) + cwl_message = dict(timestamp=int(message.created * 1000), message=self.format(message)) if self.use_queues: