Skip to content

Commit 84fdd3e

Browse files
authored
Enhance swss-common logging by writing a warning log when a command fails due to Redis loading the dataset. (#986)
Enhance swss-common logging by writing a warning log when a command fails due to Redis loading the dataset. Why I did it Fix issue sonic-net/sonic-buildimage#17472 sonic-db-cli write error log, this is because sonic-db-cli execute PING command when Redis is loading dataset. How I did it When command failed because Redis is loading dataset, write warning log instead of error log. Description for the changelog Enhance swss-common logging by writing a warning log when a command fails due to Redis loading the dataset.
1 parent 8a9e60b commit 84fdd3e

4 files changed

Lines changed: 133 additions & 18 deletions

File tree

common/redisreply.cpp

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
#include <vector>
55
#include <iostream>
66
#include <sstream>
7-
#include <system_error>
87
#include <functional>
98
#include <boost/algorithm/string.hpp>
109

@@ -60,22 +59,6 @@ static set<string> g_strToBoolCommands = {
6059
"SET"
6160
};
6261

63-
template <typename FUNC>
64-
inline void guard(FUNC func, const char* command)
65-
{
66-
try
67-
{
68-
func();
69-
}
70-
catch (const system_error& ex)
71-
{
72-
// Combine more error message and throw again
73-
string errmsg = "RedisReply catches system_error: command: " + string(command) + ", reason: " + ex.what();
74-
SWSS_LOG_ERROR("%s", errmsg.c_str());
75-
throw system_error(ex.code(), errmsg.c_str());
76-
}
77-
}
78-
7962
RedisReply::RedisReply(RedisContext *ctx, const RedisCommand& command)
8063
{
8164
int rc = command.appendTo(ctx->getContext());

common/redisreply.h

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
#include <hiredis/hiredis.h>
55
#include <string>
66
#include <stdexcept>
7+
#include <system_error>
8+
#include "logger.h"
79
#include "rediscommand.h"
810

911
namespace swss {
@@ -114,5 +116,31 @@ class RedisReply
114116
redisReply *m_reply;
115117
};
116118

119+
template <typename FUNC>
120+
inline void guard(FUNC func, const char* command)
121+
{
122+
try
123+
{
124+
func();
125+
}
126+
catch (const std::system_error& ex)
127+
{
128+
// Combine more error message and throw again
129+
std::string reason = ex.what();
130+
std::string errmsg = "RedisReply catches system_error: command: " + std::string(command) + ", reason: " + reason;
131+
// C++ does not have a startswith function. To ensure the search begins at the start of the string 'reason', pass pos = 0
132+
if (reason.rfind("LOADING ", 0) == 0)
133+
{
134+
// The command will fail when Redis is loading the dataset.
135+
SWSS_LOG_WARN("%s", errmsg.c_str());
136+
}
137+
else
138+
{
139+
SWSS_LOG_ERROR("%s", errmsg.c_str());
140+
}
141+
throw std::system_error(ex.code(), errmsg.c_str());
142+
}
143+
}
144+
117145
}
118146
#endif

tests/redis_ut.cpp

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,16 @@
33
#include <thread>
44
#include <algorithm>
55
#include <deque>
6-
#include <gmock/gmock.h>
76
#include <system_error>
7+
#include <gmock/gmock.h>
88
#include "gtest/gtest.h"
99
#include "common/dbconnector.h"
1010
#include "common/producertable.h"
1111
#include "common/consumertable.h"
1212
#include "common/notificationconsumer.h"
1313
#include "common/notificationproducer.h"
1414
#include "common/redisclient.h"
15+
#include "common/redisreply.h"
1516
#include "common/select.h"
1617
#include "common/selectableevent.h"
1718
#include "common/selectabletimer.h"
@@ -1171,3 +1172,32 @@ TEST(Connector, connectFail)
11711172
}
11721173
}, std::system_error);
11731174
}
1175+
1176+
TEST(Redisreply, guard)
1177+
{
1178+
// Improve test coverage for guard() method.
1179+
string command = "test";
1180+
EXPECT_THROW({
1181+
try
1182+
{
1183+
guard([&]{throw system_error(make_error_code(errc::io_error), "LOADING Redis is loading the dataset in memory");}, command.c_str());
1184+
}
1185+
catch(const std::system_error& e)
1186+
{
1187+
EXPECT_THAT(e.what(), HasSubstr("LOADING Redis is loading the dataset in memory"));
1188+
throw;
1189+
}
1190+
}, std::system_error);
1191+
1192+
EXPECT_THROW({
1193+
try
1194+
{
1195+
guard([&]{throw system_error(make_error_code(errc::io_error), "Command failed");}, command.c_str());
1196+
}
1197+
catch(const std::system_error& e)
1198+
{
1199+
EXPECT_THAT(e.what(), HasSubstr("Command failed"));
1200+
throw;
1201+
}
1202+
}, std::system_error);
1203+
}

tests/test_redis_loading.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
import os
2+
import time
3+
from swsscommon import swsscommon
4+
5+
TEST_TIMEOUT = 10
6+
REDIS_RECORD_COUNT = 2000000
7+
8+
9+
def wait_redis_ready():
10+
end_time = time.time() + TEST_TIMEOUT
11+
while time.time() < end_time:
12+
result = os.popen(r'redis-cli ping').read()
13+
if "PONG" in result:
14+
break
15+
time.sleep(1)
16+
17+
18+
def generate_redis_dump():
19+
print("generate redis dump")
20+
# generate a large redis dataset to reproduce LOADING exception
21+
db = swsscommon.SonicV2Connector(use_unix_socket_path=True)
22+
db.connect("TEST_DB")
23+
id = REDIS_RECORD_COUNT
24+
while id > 0:
25+
id -= 1
26+
db.set("TEST_DB", "record:{}".format(id), "field", "value{}".format(id))
27+
28+
# create dump file
29+
os.popen(r"redis-cli save")
30+
31+
32+
def test_redis_loading_exception(capfd):
33+
""" Test write warning message to syslog when redis is loading data.
34+
The redis loading data exception is difficult to reproduce, it's only happen when redis service starting and loading massive data.
35+
This test will create one million data dump and restart redis server, however on fast device the test will failed.
36+
Currently the test can pass on developer's devbox.
37+
So this test only report error message when test failed, please developer run this test manually to verify this scenario.
38+
"""
39+
40+
# cleanup redis data
41+
os.popen(r"redis-cli FLUSHALL")
42+
43+
generate_redis_dump()
44+
45+
print("restart redis service")
46+
os.popen(r"sudo service redis-server restart")
47+
48+
# write swss log to stderr, so capfd can capture it
49+
swsscommon.Logger.swssOutputNotify("", "STDERR")
50+
swsscommon.Logger.setMinPrio(swsscommon.Logger.SWSS_NOTICE)
51+
52+
end_time = time.time() + TEST_TIMEOUT
53+
loading_exception = False
54+
while time.time() < end_time:
55+
try:
56+
db = swsscommon.DBConnector("CONFIG_DB", 0, True)
57+
db.keys("test")
58+
except Exception as e:
59+
error = "{}".format(e)
60+
if "LOADING" in error:
61+
break
62+
63+
# wait for redis load dataset finish
64+
wait_redis_ready()
65+
66+
# cleanup test data
67+
os.popen(r"redis-cli FLUSHALL")
68+
os.popen(r"redis-cli save")
69+
70+
captured_log = capfd.readouterr().err
71+
expected = 'WARN:- guard: RedisReply catches system_error: command: *2\\r\\n$4\\r\\nKEYS\\r\\n$4\\r\\ntest\\r\\n, reason: LOADING Redis is loading the dataset in memory'
72+
if expected not in captured_log:
73+
print("Test redis loading data exception failed, could not find expected warning message from swsscommon syslog: {}".format(expected))
74+

0 commit comments

Comments
 (0)