Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,17 @@ Fixed
* Make sure we don't log auth token and api key inside st2api log file if those values are provided
via query parameter and not header (``?x-auth-token=foo``, ``?st2-api-key=bar``). (bug fix) #4592
#4589
* Fix rendering of config_context in orquesta task that references action in different pack
* Fix rendering of config_context in orquesta task that references action in different pack.
(bug fix) #4570
* Add missing default config location (``/etc/st2/st2.conf``) to the following services:
``st2actionrunner``, ``st2scheduler``, ``st2workflowengine``. (bug fix) #4596
* Update statsd metrics driver so any exception thrown by statsd library is treated as non fatal.

Previously there was an edge case if user used a hostname instead of an IP address for metrics
backend server address. In such scenario, if hostname DNS resolution failed, statsd driver would
throw the exception which would propagate all the way up and break the application. (bug fix) #4597

Reported by Chris McKenzie.

2.10.3 - March 06, 2019
-----------------------
Expand Down
35 changes: 35 additions & 0 deletions st2common/st2common/metrics/drivers/statsd_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,28 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import socket
import logging as stdlib_logging
from numbers import Number

import statsd
from oslo_config import cfg

from st2common import log as logging
from st2common.metrics.base import BaseMetricsDriver
from st2common.metrics.utils import check_key
from st2common.metrics.utils import get_full_key_name
from st2common.util.misc import ignore_and_log_exception


LOG = logging.getLogger(__name__)

# Which exceptions thrown by statsd library should be considered as non-fatal
NON_FATAL_EXC_CLASSES = [
socket.error,
IOError,
OSError
]

__all__ = [
'StatsdDriver'
Expand All @@ -30,11 +44,22 @@
class StatsdDriver(BaseMetricsDriver):
"""
StatsD Implementation of the metrics driver

NOTE: Statsd uses UDP which is "fire and forget" and any kind of send error is not fatal. There
is an issue with python-statsd library though which doesn't ignore DNS resolution related errors
and bubbles them all the way up.

This of course breaks the application. Any kind of metric related errors should be considered
as non-fatal and not degrade application in any way if an error occurs. That's why we wrap all
the statsd library calls here to ignore the errors and just log them.
"""

def __init__(self):
statsd.Connection.set_defaults(host=cfg.CONF.metrics.host, port=cfg.CONF.metrics.port,
sample_rate=cfg.CONF.metrics.sample_rate)

@ignore_and_log_exception(exc_classes=NON_FATAL_EXC_CLASSES, logger=LOG,
level=stdlib_logging.WARNING)
def time(self, key, time):
"""
Timer metric
Expand All @@ -46,6 +71,8 @@ def time(self, key, time):
timer = statsd.Timer('')
timer.send(key, time)

@ignore_and_log_exception(exc_classes=NON_FATAL_EXC_CLASSES, logger=LOG,
level=stdlib_logging.WARNING)
def inc_counter(self, key, amount=1):
"""
Increment counter
Expand All @@ -57,6 +84,8 @@ def inc_counter(self, key, amount=1):
counter = statsd.Counter(key)
counter.increment(delta=amount)

@ignore_and_log_exception(exc_classes=NON_FATAL_EXC_CLASSES, logger=LOG,
level=stdlib_logging.WARNING)
def dec_counter(self, key, amount=1):
"""
Decrement metric
Expand All @@ -68,6 +97,8 @@ def dec_counter(self, key, amount=1):
counter = statsd.Counter(key)
counter.decrement(delta=amount)

@ignore_and_log_exception(exc_classes=NON_FATAL_EXC_CLASSES, logger=LOG,
level=stdlib_logging.WARNING)
def set_gauge(self, key, value):
"""
Set gauge value.
Expand All @@ -79,6 +110,8 @@ def set_gauge(self, key, value):
gauge = statsd.Gauge(key)
gauge.send(None, value)

@ignore_and_log_exception(exc_classes=NON_FATAL_EXC_CLASSES, logger=LOG,
level=stdlib_logging.WARNING)
def inc_gauge(self, key, amount=1):
"""
Increment gauge value.
Expand All @@ -90,6 +123,8 @@ def inc_gauge(self, key, amount=1):
gauge = statsd.Gauge(key)
gauge.increment(None, amount)

@ignore_and_log_exception(exc_classes=NON_FATAL_EXC_CLASSES, logger=LOG,
level=stdlib_logging.WARNING)
def dec_gauge(self, key, amount=1):
"""
Decrement gauge value.
Expand Down
31 changes: 30 additions & 1 deletion st2common/st2common/util/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,22 @@

from __future__ import absolute_import

import logging

import os
import re
import sys
import collections
import functools

import six

__all__ = [
'prefix_dict_keys',
'compare_path_file_name',
'lowercase_value',
'get_field_name_from_mongoengine_error'
'get_field_name_from_mongoengine_error',

]


Expand Down Expand Up @@ -168,3 +172,28 @@ def get_field_name_from_mongoengine_error(exc):
return match.groups()[0]

return msg


def ignore_and_log_exception(exc_classes=(Exception,), logger=None, level=logging.WARNING):
"""
Decorator which catches the provided exception classes and logs them instead of letting them
bubble all the way up.
"""
exc_classes = tuple(exc_classes)

def decorator(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
try:
return func(*args, **kwargs)
except exc_classes as e:
if len(args) >= 1 and getattr(args[0], '__class__', None):
func_name = '%s.%s' % (args[0].__class__.__name__, func.__name__)
else:
func_name = func.__name__

message = ('Exception in fuction "%s": %s' % (func_name, str(e)))
logger.log(level, message)

return wrapper
return decorator
52 changes: 51 additions & 1 deletion st2common/tests/unit/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,13 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from datetime import datetime, timedelta

import socket
from datetime import datetime
from datetime import timedelta

import unittest2
import mock
from mock import patch, MagicMock

from oslo_config import cfg
Expand Down Expand Up @@ -200,6 +204,52 @@ def test_get_full_key_name(self):
result = get_full_key_name('api.requests')
self.assertEqual(result, 'st2.prod.api.requests')

@patch('st2common.metrics.drivers.statsd_driver.LOG')
@patch('st2common.metrics.drivers.statsd_driver.statsd')
def test_driver_socket_exceptions_are_not_fatal(self, statsd, mock_log):
# Socket errors such as DNS resolution errors should be considered non fatal and ignored
mock_logger = mock.Mock()
StatsdDriver.logger = mock_logger

# 1. timer
mock_timer = MagicMock(side_effect=socket.error('error 1'))
statsd.Timer('').send.side_effect = mock_timer
params = ('test', 10)
self._driver.time(*params)
statsd.Timer('').send.assert_called_with('st2.test', 10)

# 2. counter
key = 'test'
mock_counter = MagicMock(side_effect=socket.error('error 2'))
statsd.Counter(key).increment.side_effect = mock_counter
self._driver.inc_counter(key)
mock_counter.assert_called_once_with(delta=1)

key = 'test'
mock_counter = MagicMock(side_effect=socket.error('error 3'))
statsd.Counter(key).decrement.side_effect = mock_counter
self._driver.dec_counter(key)
mock_counter.assert_called_once_with(delta=1)

# 3. gauge
params = ('key', 100)
mock_gauge = MagicMock(side_effect=socket.error('error 4'))
statsd.Gauge().send.side_effect = mock_gauge
self._driver.set_gauge(*params)
mock_gauge.assert_called_once_with(None, params[1])

params = ('key1',)
mock_gauge = MagicMock(side_effect=socket.error('error 5'))
statsd.Gauge().increment.side_effect = mock_gauge
self._driver.inc_gauge(*params)
mock_gauge.assert_called_once_with(None, 1)

params = ('key1',)
mock_gauge = MagicMock(side_effect=socket.error('error 6'))
statsd.Gauge().decrement.side_effect = mock_gauge
self._driver.dec_gauge(*params)
mock_gauge.assert_called_once_with(None, 1)


class TestCounterContextManager(unittest2.TestCase):
@patch('st2common.metrics.base.METRICS')
Expand Down