Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,15 @@ in development
--------------


* Service start up code has been updated to log a warning if a non-utf-8 encoding / locale is
detected.

Using non-utf-8 locale while working with unicode data will result in various issues so users
are strongly recommended to ensure encoding for all the StackStorm service is
set to ``utf-8``. (#5182)

Contributed by @Kami.

3.4.0 - March 02, 2021
----------------------

Expand Down
58 changes: 56 additions & 2 deletions st2common/st2common/log.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,24 @@
'audit'
]

# True if sys.stdout should be patched and re-opened with utf-8 encoding in situations where
# utf-8 encoding is not used (if we don't do that and a logger tries to log a unicode string,
# log format would go in an infinite loop).
# We only expose this variable for testing purposes
PATCH_STDOUT = os.environ.get("ST2_LOG_PATCH_STDOUT", "true").lower() in [
"true",
"1",
"yes",
]

LOG = logging.getLogger(__name__)

# Note: This attribute is used by "find_caller" so it can correctly exclude this file when looking
# for the logger method caller frame.
_srcfile = get_normalized_file_path(__file__)

_original_stdstderr = sys.stderr


def find_caller(stack_info=False, stacklevel=1):
"""
Expand Down Expand Up @@ -197,6 +211,13 @@ def __init__(self, name, level=logging.ERROR):
self._level = level

def write(self, message):
# Work around for infinite loop issue - ensure we don't log unicode data.
# If message contains unicode sequences and process locale is not set to utf-8, it would
# result in infinite lop in _log on formatting the message.
# This is because of the differences between Python 2.7 and Python 3 with log error
# handlers.
message = message.encode("utf-8", "replace").decode("ascii", "ignore")

self._logger._log(self._level, message, None)

def flush(self):
Expand All @@ -222,8 +243,40 @@ def _redirect_stderr():
sys.stderr = LoggingStream('STDERR')


def setup(config_file, redirect_stderr=True, excludes=None, disable_existing_loggers=False,
st2_conf_path=None):
def _patch_stdout():
"""
This function re-opens sys.stdout using utf-8 encoding.

It's to be used in situations where process encoding / locale is not set to utf-8. In such
situations when unicode sequence is logged, it would cause logging formatter to go in an infite
loop on formatting a record.

This function works around that by ensuring sys.stdout is always opened in utf-8 mode.
"""

stdout_encoding = getattr(sys.stdout, "encoding", "none")

if stdout_encoding not in ["utf8", "utf-8"] and PATCH_STDOUT:
LOG.info(
"Patching sys.stdout and re-opening it with utf-8 encoding (originally opened "
"with %s encoding)..." % (stdout_encoding)
)
sys.stdout = open(
sys.stdout.fileno(),
mode="w",
encoding="utf-8",
errors="replace",
buffering=1,
)


def setup(
config_file,
redirect_stderr=True,
excludes=None,
disable_existing_loggers=False,
st2_conf_path=None,
):
"""
Configure logging from file.

Expand All @@ -246,6 +299,7 @@ def setup(config_file, redirect_stderr=True, excludes=None, disable_existing_log
_add_exclusion_filters(handlers=handlers, excludes=excludes)
if redirect_stderr:
_redirect_stderr()
_patch_stdout()
except Exception as exc:
exc_cls = type(exc)
tb_msg = traceback.format_exc()
Expand Down
76 changes: 70 additions & 6 deletions st2common/st2common/service_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import os
import sys
import traceback
import locale
import logging as stdlib_logging

import six
Expand Down Expand Up @@ -62,6 +63,19 @@
'register_service_in_service_registry'
]

# Message which is logged if non utf-8 locale is detected on startup.
NON_UTF8_LOCALE_WARNING_MSG = """
Detected a non utf-8 locale / encoding (fs encoding: %s, default encoding: %s, locale: %s).
Using non utf-8 locale while working with unicode data will result in exceptions and undefined
behavior.
You are strongly encouraged to configure all the StackStorm services to use utf-8 encoding (e.g.
LANG=en_US.UTF-8).
""".strip().replace(
"\n", " "
)

VALID_UTF8_ENCODINGS = ["utf8", "utf-8"]

LOG = logging.getLogger(__name__)


Expand Down Expand Up @@ -99,20 +113,61 @@ def setup(service, config, setup_db=True, register_mq_exchanges=True,
else:
config.parse_args()

version = '%s.%s.%s' % (sys.version_info[0], sys.version_info[1], sys.version_info[2])
LOG.debug('Using Python: %s (%s)' % (version, sys.executable))
version = "%s.%s.%s" % (
sys.version_info[0],
sys.version_info[1],
sys.version_info[2],
)

# We print locale related info to make it easier to troubleshoot issues where locale is not
# set correctly (e.g. using C / ascii, but services are trying to work with unicode data
# would result in things blowing up)

fs_encoding = sys.getfilesystemencoding()
default_encoding = sys.getdefaultencoding()
lang_env = os.environ.get("LANG", "notset")
pythonioencoding_env = os.environ.get("PYTHONIOENCODING", "notset")

try:
language_code, encoding = locale.getdefaultlocale()

if language_code and encoding:
used_locale = ".".join([language_code, encoding])
else:
used_locale = "unable to retrieve locale"
except Exception as e:
language_code, encoding = "unknown", "unknown"
used_locale = "unable to retrieve locale: %s " % (str(e))

LOG.info("Using Python: %s (%s)" % (version, sys.executable))
LOG.info(
"Using fs encoding: %s, default encoding: %s, locale: %s, LANG env variable: %s, "
"PYTHONIOENCODING env variable: %s"
% (fs_encoding, default_encoding, lang_env, used_locale, pythonioencoding_env)
)

config_file_paths = cfg.CONF.config_file
config_file_paths = [os.path.abspath(path) for path in config_file_paths]
LOG.debug('Using config files: %s', ','.join(config_file_paths))
LOG.info("Using config files: %s", ",".join(config_file_paths))

# Setup logging.
logging_config_path = config.get_logging_config_path()
logging_config_path = os.path.abspath(logging_config_path)

LOG.debug('Using logging config: %s', logging_config_path)
LOG.info("Using logging config: %s", logging_config_path)

# Warn on non utf-8 locale which could cause issues when running under Python 3 and working
# with unicode data
if (
fs_encoding.lower() not in VALID_UTF8_ENCODINGS or
encoding.lower() not in VALID_UTF8_ENCODINGS
):
LOG.warning(
NON_UTF8_LOCALE_WARNING_MSG
% (fs_encoding, default_encoding, used_locale.strip())
)

is_debug_enabled = (cfg.CONF.debug or cfg.CONF.system.debug)
is_debug_enabled = cfg.CONF.debug or cfg.CONF.system.debug

try:
logging.setup(logging_config_path, redirect_stderr=cfg.CONF.log.redirect_stderr,
Expand All @@ -137,7 +192,16 @@ def setup(service, config, setup_db=True, register_mq_exchanges=True,
ignore_audit_log_messages = (handler.level >= stdlib_logging.INFO and
handler.level < stdlib_logging.AUDIT)
if not is_debug_enabled and ignore_audit_log_messages:
LOG.debug('Excluding log messages with level "AUDIT" for handler "%s"' % (handler))
try:
handler_repr = str(handler)
except TypeError:
# In case handler doesn't have name assigned, repr would throw
handler_repr = "unknown"

LOG.debug(
'Excluding log messages with level "AUDIT" for handler "%s"'
% (handler_repr)
)
handler.addFilter(LogLevelFilter(log_levels=exclude_log_levels))

if not is_debug_enabled:
Expand Down
86 changes: 86 additions & 0 deletions st2common/tests/integration/log_unicode_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# -*- coding: utf-8 -*-
# Copyright 2020 The StackStorm Authors.
# Copyright 2019 Extreme Networks, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
This file is used to test edge case with logging unicode data.
"""

from __future__ import absolute_import

import os
import sys

from oslo_config import cfg

BASE_DIR = os.path.dirname(os.path.abspath(__file__))

ST2ACTIONS_PATH = os.path.join(BASE_DIR, "../../../st2actions")
ST2COMMON_PATH = os.path.join(BASE_DIR, "../../")
ST2TESTS_PATH = os.path.join(BASE_DIR, "../../../st2tests")

# Ensure st2actions is in PYTHONPATH.
# This is needed since this script is spawned from inside integration tests
sys.path.append(ST2ACTIONS_PATH)
sys.path.append(ST2COMMON_PATH)
sys.path.append(ST2TESTS_PATH)

from st2actions.notifier import config
from st2common import log as logging
from st2common.service_setup import setup as common_setup

FIXTURES_DIR = os.path.join(ST2TESTS_PATH, "st2tests/fixtures")
ST2_CONFIG_DEBUG_LL_PATH = os.path.join(
FIXTURES_DIR, "conf/st2.tests.api.debug_log_level.conf"
)

LOG = logging.getLogger(__name__)


def main():
cfg.CONF.set_override("debug", True)
common_setup(
service="test",
config=config,
setup_db=False,
run_migrations=False,
register_runners=False,
register_internal_trigger_types=False,
register_mq_exchanges=False,
register_signal_handlers=False,
service_registry=False,
config_args=["--config-file", ST2_CONFIG_DEBUG_LL_PATH],
)

LOG.info("Test info message 1")
LOG.debug("Test debug message 1")

# 1. Actual unicode sequence
LOG.info("Test info message with unicode 1 - 好好好")
LOG.debug("Test debug message with unicode 1 - 好好好")

# 2. Ascii escape sequence
LOG.info(
"Test info message with unicode 1 - " +
"好好好".encode("ascii", "backslashreplace").decode("ascii", "backslashreplace")
)
LOG.debug(
"Test debug message with unicode 1 - " +
"好好好".encode("ascii", "backslashreplace").decode("ascii", "backslashreplace")
)


if __name__ == "__main__":
main()
Loading