Skip to content

Commit 94ac4a0

Browse files
authored
Fix loganalyzer.py UnicodeDecodeError (sonic-net#6524)
What is the motivation for this PR? Fix "UnicodeDecodeError: 'utf-8' codec can't decode byte 0xc0 in position 2203: invalid start byte" when running loganalyzer.py on dut. master and 202205 can hit this issue if syslog contains non-ascii characters. How did you do it? Add errors='ignore' parameter when open syslog file. How did you verify/test it? Run platform_tests/api/test_fan_drawer_fans.py::test_get_status and enable loganalyzer. Signed-off-by: Zhaohui Sun <[email protected]>
1 parent 4c0bb29 commit 94ac4a0

2 files changed

Lines changed: 59 additions & 52 deletions

File tree

ansible/library/extract_log.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -92,9 +92,9 @@ def extract_lines(directory, filename, target_string):
9292
path = os.path.join(directory, filename)
9393
file = None
9494
if 'gz' in path:
95-
file = gzip.open(path, mode='rt')
95+
file = gzip.open(path, mode='rt', errors='ignore')
9696
else:
97-
file = open(path)
97+
file = open(path, errors='ignore')
9898
result = None
9999
with file:
100100
# This might be a gunzip file or logrotate issue, there has
@@ -226,17 +226,17 @@ def combine_logs_and_save(directory, filenames, start_string, target_string, tar
226226
do_copy = False
227227
line_processed = 0
228228
line_copied = 0
229-
with open(target_filename, 'w') as fp:
229+
with open(target_filename, 'w', errors='ignore') as fp:
230230
for filename in reversed(filenames):
231231
path = os.path.join(directory, filename)
232232
dt = datetime.datetime.fromtimestamp(os.path.getctime(path))
233233
sz = os.path.getsize(path)
234234
logger.debug("extract_log combine_logs from file {} create time {}, size {}".format(path, dt, sz))
235235
file = None
236236
if 'gz' in path:
237-
file = gzip.open(path, mode='rt')
237+
file = gzip.open(path, mode='rt', errors='ignore')
238238
else:
239-
file = open(path)
239+
file = open(path, errors='ignore')
240240

241241
with file:
242242
for line in file:

ansible/roles/test/files/tools/loganalyzer/loganalyzer.py

Lines changed: 54 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@
1313
sudo python loganalyzer.py --out_dir /home/hrachya/projects/loganalyzer/log.analyzer.results --action analyze --run_id myTest114 --logs file3.log -m /home/hrachya/projects/loganalyzer/match.file.1.log,/home/hrachya/projects/loganalyzer/match.file.2.log -i ignore.file.1.log,ignore.file.2.log -v
1414
'''
1515

16-
#---------------------------------------------------------------------
16+
# ---------------------------------------------------------------------
1717
# Global imports
18-
#---------------------------------------------------------------------
18+
# ---------------------------------------------------------------------
1919
from __future__ import print_function
2020
import sys
2121
import getopt
@@ -30,15 +30,15 @@
3030
import subprocess
3131
from datetime import datetime
3232

33-
#---------------------------------------------------------------------
33+
# ---------------------------------------------------------------------
3434
# Global variables
35-
#---------------------------------------------------------------------
35+
# ---------------------------------------------------------------------
3636
tokenizer = ','
3737
comment_key = '#'
3838
system_log_file = '/var/log/syslog'
3939
re_rsyslog_pid = re.compile("PID:\s+(\d+)")
4040

41-
#-- List of ERROR codes to be returned by AnsibleLogAnalyzer
41+
# -- List of ERROR codes to be returned by AnsibleLogAnalyzer
4242
err_duplicate_start_marker = -1
4343
err_duplicate_end_marker = -2
4444
err_no_end_marker = -3
@@ -48,6 +48,7 @@
4848
err_end_ignore_marker = -7
4949
err_start_ignore_marker = -8
5050

51+
5152
class AnsibleLogAnalyzer:
5253
'''
5354
@summary: Overview of functionality
@@ -84,36 +85,36 @@ class AnsibleLogAnalyzer:
8485
def init_sys_logger(self):
8586
logger = logging.getLogger('LogAnalyzer')
8687
logger.setLevel(logging.DEBUG)
87-
handler = logging.handlers.SysLogHandler(address = '/dev/log')
88+
handler = logging.handlers.SysLogHandler(address='/dev/log')
8889
logger.addHandler(handler)
8990
return logger
90-
#---------------------------------------------------------------------
91+
# ---------------------------------------------------------------------
9192

92-
def __init__(self, run_id, verbose, start_marker = None):
93+
def __init__(self, run_id, verbose, start_marker=None):
9394
self.run_id = run_id
9495
self.verbose = verbose
9596
self.start_marker = start_marker
96-
#---------------------------------------------------------------------
97+
# ---------------------------------------------------------------------
9798

9899
def print_diagnostic_message(self, message):
99100
if (not self.verbose):
100101
return
101102

102103
print('[LogAnalyzer][diagnostic]:%s' % message)
103-
#---------------------------------------------------------------------
104+
# ---------------------------------------------------------------------
104105

105106
def create_start_marker(self):
106107
if (self.start_marker is None) or (len(self.start_marker) == 0):
107108
return self.start_marker_prefix + "-" + self.run_id
108109
else:
109110
return self.start_marker
110111

111-
#---------------------------------------------------------------------
112+
# ---------------------------------------------------------------------
112113

113114
def is_filename_stdin(self, file_name):
114115
return file_name == "-"
115116

116-
#---------------------------------------------------------------------
117+
# ---------------------------------------------------------------------
117118

118119
def require_marker_check(self, file_path):
119120
'''
@@ -127,19 +128,19 @@ def require_marker_check(self, file_path):
127128
files_to_skip = ["sairedis.rec", "bgpd.log"]
128129
return not any([target in file_path for target in files_to_skip])
129130

130-
#---------------------------------------------------------------------
131+
# ---------------------------------------------------------------------
131132

132133
def create_end_marker(self):
133134
return self.end_marker_prefix + "-" + self.run_id
134-
#---------------------------------------------------------------------
135+
# ---------------------------------------------------------------------
135136

136137
def create_start_ignore_marker(self):
137138
return self.start_ignore_marker_prefix + "-" + self.run_id
138-
#---------------------------------------------------------------------
139+
# ---------------------------------------------------------------------
139140

140141
def create_end_ignore_marker(self):
141142
return self.end_ignore_marker_prefix + "-" + self.run_id
142-
#---------------------------------------------------------------------
143+
# ---------------------------------------------------------------------
143144

144145
def flush_rsyslogd(self):
145146
'''
@@ -208,11 +209,11 @@ def wait_for_marker(self, marker, timeout=120, polling_interval=10):
208209
prev_syslog_file = "/var/log/syslog.1"
209210
last_dt = os.path.getctime(syslog_file)
210211
while wait_time <= timeout:
211-
with open(syslog_file, 'r') as fp:
212+
with open(syslog_file, 'r', errors='ignore') as fp:
212213
dt = os.path.getctime(syslog_file)
213214
if last_dt != dt:
214215
try:
215-
with open(prev_syslog_file, 'r') as pfp:
216+
with open(prev_syslog_file, 'r', errors='ignore') as pfp:
216217
pfp.seek(last_check_pos)
217218
for l in fp:
218219
if marker in l:
@@ -251,7 +252,7 @@ def place_marker(self, log_file_list, marker, wait_for_marker=False):
251252
raise RuntimeError("cannot find marker {} in /var/log/syslog".format(marker))
252253

253254
return
254-
#---------------------------------------------------------------------
255+
# ---------------------------------------------------------------------
255256

256257
def error_to_regx(self, error_string):
257258
'''
@@ -271,11 +272,11 @@ def error_to_regx(self, error_string):
271272
original_string = error_string
272273
#-- Escapes out of all the meta characters --#
273274
error_string = re.escape(error_string)
274-
#-- Replaces a white space with the white space regular expression
275+
# -- Replaces a white space with the white space regular expression
275276
error_string = re.sub(r"(\\\s+)+", "\\\\s+", error_string)
276-
#-- Replaces a digit number with the digit regular expression
277+
# -- Replaces a digit number with the digit regular expression
277278
error_string = re.sub(r"\b\d+\b", "\\\\d+", error_string)
278-
#-- Replaces a hex number with the hex regular expression
279+
# -- Replaces a hex number with the hex regular expression
279280
error_string = re.sub(r"0x[0-9a-fA-F]+", "0x[0-9a-fA-F]+", error_string)
280281
self.print_diagnostic_message('Built error string: %s' % error_string)
281282

@@ -284,7 +285,7 @@ def error_to_regx(self, error_string):
284285
error_string = '|'.join(map(self.error_to_regx, error_string))
285286

286287
return error_string
287-
#---------------------------------------------------------------------
288+
# ---------------------------------------------------------------------
288289

289290
def create_msg_regex(self, file_lsit):
290291
'''
@@ -310,17 +311,17 @@ def create_msg_regex(self, file_lsit):
310311
for index, row in enumerate(csvreader):
311312
row = [item for item in row if item != ""]
312313
self.print_diagnostic_message('[diagnostic]:processing row:%d' % index)
313-
self.print_diagnostic_message('row:%s'% row)
314+
self.print_diagnostic_message('row:%s' % row)
314315
try:
315-
#-- Ignore Empty Lines
316+
# -- Ignore Empty Lines
316317
if not row:
317318
continue
318-
#-- Ignore commented Lines
319+
# -- Ignore commented Lines
319320
if row[0].startswith(comment_key):
320321
self.print_diagnostic_message('[diagnostic]:skipping row[0]:%s' % row[0])
321322
continue
322323

323-
#-- ('s' | 'r') = (Raw String | Regular Expression)
324+
# -- ('s' | 'r') = (Raw String | Regular Expression)
324325
is_regex = row[0]
325326
if ('s' == row[0]):
326327
is_regex = False
@@ -329,7 +330,7 @@ def create_msg_regex(self, file_lsit):
329330
else:
330331
raise Exception('file:%s, malformed line:%d. '
331332
'must be \'s\'(string) or \'r\'(regex)'
332-
%(filename,index))
333+
% (filename, index))
333334

334335
if (is_regex):
335336
messages_regex.extend(row[1:])
@@ -346,7 +347,7 @@ def create_msg_regex(self, file_lsit):
346347
else:
347348
regex = None
348349
return regex, messages_regex
349-
#---------------------------------------------------------------------
350+
# ---------------------------------------------------------------------
350351

351352
def line_matches(self, str, match_messages_regex, ignore_messages_regex):
352353
'''
@@ -378,7 +379,7 @@ def line_matches(self, str, match_messages_regex, ignore_messages_regex):
378379
ret_code = True
379380

380381
return ret_code
381-
#---------------------------------------------------------------------
382+
# ---------------------------------------------------------------------
382383

383384
def line_is_expected(self, str, expect_messages_regex):
384385
'''
@@ -413,11 +414,10 @@ def analyze_file(self, log_file_path, match_messages_regex, ignore_messages_rege
413414
@return: List of strings match search criteria.
414415
'''
415416

417+
self.print_diagnostic_message('analyzing file: %s' % log_file_path)
416418

417-
self.print_diagnostic_message('analyzing file: %s'% log_file_path)
418-
419-
#-- indicates whether log analyzer currently is in the log range between start
420-
#-- and end marker. see analyze_file method.
419+
# -- indicates whether log analyzer currently is in the log range between start
420+
# -- and end marker. see analyze_file method.
421421
check_marker = self.require_marker_check(log_file_path)
422422
in_analysis_range = not check_marker
423423
stdin_as_input = self.is_filename_stdin(log_file_path)
@@ -503,7 +503,7 @@ def analyze_file(self, log_file_path, match_messages_regex, ignore_messages_rege
503503
sys.exit(err_no_end_marker)
504504

505505
return matching_lines, expected_lines
506-
#---------------------------------------------------------------------
506+
# ---------------------------------------------------------------------
507507

508508
def analyze_file_list(self, log_file_list, match_messages_regex, ignore_messages_regex, expect_messages_regex):
509509
'''
@@ -528,14 +528,16 @@ def analyze_file_list(self, log_file_list, match_messages_regex, ignore_messages
528528
for log_file in log_file_list:
529529
if not len(log_file):
530530
continue
531-
match_strings, expect_strings = self.analyze_file(log_file, match_messages_regex, ignore_messages_regex, expect_messages_regex)
531+
match_strings, expect_strings = self.analyze_file(
532+
log_file, match_messages_regex, ignore_messages_regex, expect_messages_regex)
532533

533534
match_strings.reverse()
534535
expect_strings.reverse()
535-
res[log_file] = [ match_strings, expect_strings ]
536+
res[log_file] = [match_strings, expect_strings]
536537

537538
return res
538-
#---------------------------------------------------------------------
539+
# ---------------------------------------------------------------------
540+
539541

540542
def usage():
541543
print('loganalyzer input parameters:')
@@ -565,7 +567,8 @@ def usage():
565567
print(' in one of specified log files during the analysis. Must be present')
566568
print(' when action == analyze.')
567569

568-
#---------------------------------------------------------------------
570+
# ---------------------------------------------------------------------
571+
569572

570573
def check_action(action, log_files_in, out_dir, match_files_in, ignore_files_in, expect_files_in):
571574
'''
@@ -588,13 +591,13 @@ def check_action(action, log_files_in, out_dir, match_files_in, ignore_files_in,
588591
print('ERROR: missing required match_files_in for analyze action')
589592
ret_code = False
590593

591-
592594
else:
593595
ret_code = False
594596
print('ERROR: invalid action:%s specified' % action)
595597

596598
return ret_code
597-
#---------------------------------------------------------------------
599+
# ---------------------------------------------------------------------
600+
598601

599602
def check_run_id(run_id):
600603
'''
@@ -612,7 +615,8 @@ def check_run_id(run_id):
612615
ret_code = False
613616

614617
return ret_code
615-
#---------------------------------------------------------------------
618+
# ---------------------------------------------------------------------
619+
616620

617621
def write_result_file(run_id, out_dir, analysis_result_per_file, messages_regex_e, unused_regex_messages):
618622
'''
@@ -666,7 +670,8 @@ def write_result_file(run_id, out_dir, analysis_result_per_file, messages_regex_
666670

667671
out_file.write("\n-------------------------------------------------\n\n")
668672
out_file.flush()
669-
#---------------------------------------------------------------------
673+
# ---------------------------------------------------------------------
674+
670675

671676
def write_summary_file(run_id, out_dir, analysis_result_per_file, unused_regex_messages):
672677
'''
@@ -703,7 +708,8 @@ def write_summary_file(run_id, out_dir, analysis_result_per_file, unused_regex_m
703708
out_file.write("-----------------------------------\n")
704709
out_file.flush()
705710
out_file.close()
706-
#---------------------------------------------------------------------
711+
# ---------------------------------------------------------------------
712+
707713

708714
def main(argv):
709715

@@ -718,7 +724,8 @@ def main(argv):
718724
verbose = False
719725

720726
try:
721-
opts, args = getopt.getopt(argv, "a:r:s:l:o:m:i:e:vh", ["action=", "run_id=", "start_marker=", "logs=", "out_dir=", "match_files_in=", "ignore_files_in=", "expect_files_in=", "verbose", "help"])
727+
opts, args = getopt.getopt(argv, "a:r:s:l:o:m:i:e:vh", [
728+
"action=", "run_id=", "start_marker=", "logs=", "out_dir=", "match_files_in=", "ignore_files_in=", "expect_files_in=", "verbose", "help"])
722729

723730
except getopt.GetoptError:
724731
print("Invalid option specified")
@@ -799,11 +806,11 @@ def main(argv):
799806
analyzer.place_marker(log_file_list, analyzer.create_end_ignore_marker(), wait_for_marker=True)
800807
return 0
801808

802-
803809
else:
804810
print('Unknown action:%s specified' % action)
805811
return len(result)
806-
#---------------------------------------------------------------------
812+
# ---------------------------------------------------------------------
813+
807814

808815
if __name__ == "__main__":
809816
main(sys.argv[1:])

0 commit comments

Comments
 (0)