diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 5174e90086..c226add53b 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -59,8 +59,17 @@ Fixed * Make sure we don't log auth token and api key inside st2api log file if those values are provided via query parameter and not header (``?x-auth-token=foo``, ``?st2-api-key=bar``). (bug fix) #4592 #4589 -* Fix rendering of config_context in orquesta task that references action in different pack +* Fix rendering of config_context in orquesta task that references action in different pack. (bug fix) #4570 +* Add missing default config location (``/etc/st2/st2.conf``) to the following services: + ``st2actionrunner``, ``st2scheduler``, ``st2workflowengine``. (bug fix) #4596 +* Update statsd metrics driver so any exception thrown by statsd library is treated as non fatal. + + Previously there was an edge case if user used a hostname instead of an IP address for metrics + backend server address. In such scenario, if hostname DNS resolution failed, statsd driver would + throw the exception which would propagate all the way up and break the application. (bug fix) #4597 + + Reported by Chris McKenzie. 2.10.3 - March 06, 2019 ----------------------- diff --git a/st2common/st2common/metrics/drivers/statsd_driver.py b/st2common/st2common/metrics/drivers/statsd_driver.py index 163938f291..2324ab4b71 100644 --- a/st2common/st2common/metrics/drivers/statsd_driver.py +++ b/st2common/st2common/metrics/drivers/statsd_driver.py @@ -13,14 +13,28 @@ # See the License for the specific language governing permissions and # limitations under the License. +import socket +import logging as stdlib_logging from numbers import Number import statsd from oslo_config import cfg +from st2common import log as logging from st2common.metrics.base import BaseMetricsDriver from st2common.metrics.utils import check_key from st2common.metrics.utils import get_full_key_name +from st2common.util.misc import ignore_and_log_exception + + +LOG = logging.getLogger(__name__) + +# Which exceptions thrown by statsd library should be considered as non-fatal +NON_FATAL_EXC_CLASSES = [ + socket.error, + IOError, + OSError +] __all__ = [ 'StatsdDriver' @@ -30,11 +44,22 @@ class StatsdDriver(BaseMetricsDriver): """ StatsD Implementation of the metrics driver + + NOTE: Statsd uses UDP which is "fire and forget" and any kind of send error is not fatal. There + is an issue with python-statsd library though which doesn't ignore DNS resolution related errors + and bubbles them all the way up. + + This of course breaks the application. Any kind of metric related errors should be considered + as non-fatal and not degrade application in any way if an error occurs. That's why we wrap all + the statsd library calls here to ignore the errors and just log them. """ + def __init__(self): statsd.Connection.set_defaults(host=cfg.CONF.metrics.host, port=cfg.CONF.metrics.port, sample_rate=cfg.CONF.metrics.sample_rate) + @ignore_and_log_exception(exc_classes=NON_FATAL_EXC_CLASSES, logger=LOG, + level=stdlib_logging.WARNING) def time(self, key, time): """ Timer metric @@ -46,6 +71,8 @@ def time(self, key, time): timer = statsd.Timer('') timer.send(key, time) + @ignore_and_log_exception(exc_classes=NON_FATAL_EXC_CLASSES, logger=LOG, + level=stdlib_logging.WARNING) def inc_counter(self, key, amount=1): """ Increment counter @@ -57,6 +84,8 @@ def inc_counter(self, key, amount=1): counter = statsd.Counter(key) counter.increment(delta=amount) + @ignore_and_log_exception(exc_classes=NON_FATAL_EXC_CLASSES, logger=LOG, + level=stdlib_logging.WARNING) def dec_counter(self, key, amount=1): """ Decrement metric @@ -68,6 +97,8 @@ def dec_counter(self, key, amount=1): counter = statsd.Counter(key) counter.decrement(delta=amount) + @ignore_and_log_exception(exc_classes=NON_FATAL_EXC_CLASSES, logger=LOG, + level=stdlib_logging.WARNING) def set_gauge(self, key, value): """ Set gauge value. @@ -79,6 +110,8 @@ def set_gauge(self, key, value): gauge = statsd.Gauge(key) gauge.send(None, value) + @ignore_and_log_exception(exc_classes=NON_FATAL_EXC_CLASSES, logger=LOG, + level=stdlib_logging.WARNING) def inc_gauge(self, key, amount=1): """ Increment gauge value. @@ -90,6 +123,8 @@ def inc_gauge(self, key, amount=1): gauge = statsd.Gauge(key) gauge.increment(None, amount) + @ignore_and_log_exception(exc_classes=NON_FATAL_EXC_CLASSES, logger=LOG, + level=stdlib_logging.WARNING) def dec_gauge(self, key, amount=1): """ Decrement gauge value. diff --git a/st2common/st2common/util/misc.py b/st2common/st2common/util/misc.py index 0768e2cb35..02d8f31513 100644 --- a/st2common/st2common/util/misc.py +++ b/st2common/st2common/util/misc.py @@ -15,10 +15,13 @@ from __future__ import absolute_import +import logging + import os import re import sys import collections +import functools import six @@ -26,7 +29,8 @@ 'prefix_dict_keys', 'compare_path_file_name', 'lowercase_value', - 'get_field_name_from_mongoengine_error' + 'get_field_name_from_mongoengine_error', + ] @@ -168,3 +172,28 @@ def get_field_name_from_mongoengine_error(exc): return match.groups()[0] return msg + + +def ignore_and_log_exception(exc_classes=(Exception,), logger=None, level=logging.WARNING): + """ + Decorator which catches the provided exception classes and logs them instead of letting them + bubble all the way up. + """ + exc_classes = tuple(exc_classes) + + def decorator(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + try: + return func(*args, **kwargs) + except exc_classes as e: + if len(args) >= 1 and getattr(args[0], '__class__', None): + func_name = '%s.%s' % (args[0].__class__.__name__, func.__name__) + else: + func_name = func.__name__ + + message = ('Exception in fuction "%s": %s' % (func_name, str(e))) + logger.log(level, message) + + return wrapper + return decorator diff --git a/st2common/tests/unit/test_metrics.py b/st2common/tests/unit/test_metrics.py index dbd2750432..1c7543ca7b 100644 --- a/st2common/tests/unit/test_metrics.py +++ b/st2common/tests/unit/test_metrics.py @@ -12,9 +12,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from datetime import datetime, timedelta + +import socket +from datetime import datetime +from datetime import timedelta import unittest2 +import mock from mock import patch, MagicMock from oslo_config import cfg @@ -200,6 +204,52 @@ def test_get_full_key_name(self): result = get_full_key_name('api.requests') self.assertEqual(result, 'st2.prod.api.requests') + @patch('st2common.metrics.drivers.statsd_driver.LOG') + @patch('st2common.metrics.drivers.statsd_driver.statsd') + def test_driver_socket_exceptions_are_not_fatal(self, statsd, mock_log): + # Socket errors such as DNS resolution errors should be considered non fatal and ignored + mock_logger = mock.Mock() + StatsdDriver.logger = mock_logger + + # 1. timer + mock_timer = MagicMock(side_effect=socket.error('error 1')) + statsd.Timer('').send.side_effect = mock_timer + params = ('test', 10) + self._driver.time(*params) + statsd.Timer('').send.assert_called_with('st2.test', 10) + + # 2. counter + key = 'test' + mock_counter = MagicMock(side_effect=socket.error('error 2')) + statsd.Counter(key).increment.side_effect = mock_counter + self._driver.inc_counter(key) + mock_counter.assert_called_once_with(delta=1) + + key = 'test' + mock_counter = MagicMock(side_effect=socket.error('error 3')) + statsd.Counter(key).decrement.side_effect = mock_counter + self._driver.dec_counter(key) + mock_counter.assert_called_once_with(delta=1) + + # 3. gauge + params = ('key', 100) + mock_gauge = MagicMock(side_effect=socket.error('error 4')) + statsd.Gauge().send.side_effect = mock_gauge + self._driver.set_gauge(*params) + mock_gauge.assert_called_once_with(None, params[1]) + + params = ('key1',) + mock_gauge = MagicMock(side_effect=socket.error('error 5')) + statsd.Gauge().increment.side_effect = mock_gauge + self._driver.inc_gauge(*params) + mock_gauge.assert_called_once_with(None, 1) + + params = ('key1',) + mock_gauge = MagicMock(side_effect=socket.error('error 6')) + statsd.Gauge().decrement.side_effect = mock_gauge + self._driver.dec_gauge(*params) + mock_gauge.assert_called_once_with(None, 1) + class TestCounterContextManager(unittest2.TestCase): @patch('st2common.metrics.base.METRICS')