Skip to content

Commit 960ed5e

Browse files
committed
[squashme] An attempt to fix the issue
The idea is to make repr implementation of Remediation aware of utf-8 encoding. The root of the problem is that a mixture of native text / encoded string can appear in a report message and that caused report generating code go crazy while creating actual log files. The test has been refactored as well.
1 parent 2f45be1 commit 960ed5e

3 files changed

Lines changed: 75 additions & 49 deletions

File tree

leapp/reporting/__init__.py

Lines changed: 37 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -238,30 +238,61 @@ def path(self):
238238
return ('detail', 'remediations')
239239

240240

241+
def _guarantee_encoded_str(text, encoding='utf-8'):
242+
"""Guarantees that result is a utf-8 encoded string"""
243+
# Apply conversion only in py2 case
244+
if six.PY3:
245+
return text
246+
# Unless data is already encoded -> encode it
247+
try:
248+
return text.encode(encoding)
249+
except (UnicodeDecodeError, AttributeError):
250+
return text
251+
252+
253+
def _guarantee_decoded_str(text, encoding='utf-8'):
254+
"""Guarantees that result is 'native' text"""
255+
# Apply conversion only in py2 case
256+
if six.PY3:
257+
return text
258+
# Unless data is already decoded -> decode it
259+
try:
260+
return text.decode(encoding)
261+
except (UnicodeEncodeError, AttributeError):
262+
return text
263+
264+
241265
class RemediationCommand(BaseRemediation):
242266
def __init__(self, value=None):
243267
if not isinstance(value, list):
244268
raise TypeError('Value of "RemediationCommand" must be a list')
245-
self._value = {'type': 'command', 'context': value}
269+
self._value = {'type': 'command', 'context': [_guarantee_decoded_str(c) for c in value]}
246270

247271
def __repr__(self):
248-
return "[{}] {}".format(self._value['type'], ' '.join(self._value['context']))
272+
# NOTE(ivasilev) As the message can contain non-ascii characters let's deal with it properly.
273+
# As per python practices repr has to return an encoded string
274+
return "[{}] {}".format(self._value['type'],
275+
' '.join([_guarantee_encoded_str(c) for c in self._value['context']]))
249276

250277

251278
class RemediationHint(BaseRemediation):
252279
def __init__(self, value=None):
253-
self._value = {'type': 'hint', 'context': value}
280+
self._value = {'type': 'hint', 'context': _guarantee_decoded_str(value)}
254281

255282
def __repr__(self):
256-
return "[{}] {}".format(self._value['type'], self._value['context'])
283+
# NOTE(ivasilev) As the message can contain non-ascii characters let's deal with it properly.
284+
# As per python practices repr has to return an encoded string
285+
return "[{}] {}".format(self._value['type'], _guarantee_encoded_str(self._value['context']))
257286

258287

259288
class RemediationPlaybook(BaseRemediation):
260289
def __init__(self, value=None):
261-
self._value = {'type': 'playbook', 'context': value}
290+
self._value = {'type': 'playbook', 'context': _guarantee_decoded_str(value)}
262291

263292
def __repr__(self):
264-
return "[{}] {}".format(self._value['type'], self._value['context'])
293+
# NOTE(ivasilev) As the message can contain non-ascii characters let's deal with it properly.
294+
# As per python practices repr has to return an encoded string
295+
return "[{}] {}".format(self._value['type'], _guarantee_encoded_str(self._value['context']))
265296

266297

267298
class Remediation(object):

leapp/utils/report.py

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
import hashlib
2+
import io
23
import json
34
import os
45

6+
import six
7+
58
from leapp.reporting import (
69
_DEPRECATION_FLAGS,
710
Groups,
@@ -110,21 +113,29 @@ def generate_report_file(messages_to_report, context, path, report_schema='1.1.0
110113
# NOTE(ivasilev) Int conversion should not break as only specific formats of report_schema versions are allowed
111114
report_schema_tuple = tuple(int(x) for x in report_schema.split('.'))
112115
if path.endswith(".txt"):
113-
with open(path, 'w') as f:
116+
with io.open(path, 'w', encoding='utf-8') as f:
114117
for message in sorted(messages_to_report, key=importance):
115-
f.write('Risk Factor: {}{}\n'.format(message['severity'],
116-
' (inhibitor)' if is_inhibitor(message) else ''))
117-
f.write('Title: {}\n'.format(message['title']))
118-
f.write('Summary: {}\n'.format(message['summary']))
118+
f.write(u'Risk Factor: {}{}\n'.format(message['severity'],
119+
' (inhibitor)' if is_inhibitor(message) else ''))
120+
f.write(u'Title: {}\n'.format(message['title']))
121+
f.write(u'Summary: {}\n'.format(message['summary']))
119122
remediation = Remediation.from_dict(message.get('detail', {}))
120123
if remediation:
121-
f.write('Remediation: {}\n'.format(remediation))
124+
# NOTE(ivasilev) Decoding is necessary in case of python2 as remediation is an encoded string,
125+
# while io.open expects "true text" input. For python3 repr will return proper py3 str, no
126+
# decoding will be needed.
127+
# This hassle and clumsiness makes me sad, so suggestions are welcome.
128+
remediation_text = 'Remediation: {}\n'.format(remediation)
129+
if isinstance(remediation_text, six.binary_type):
130+
# This will be true for py2 where repr returns an encoded string
131+
remediation_text = remediation_text.decode('utf-8')
132+
f.write(remediation_text)
122133
if report_schema_tuple > (1, 0, 0):
123134
# report-schema 1.0.0 doesn't have a stable report key
124-
f.write('Key: {}\n'.format(message['key']))
125-
f.write('-' * 40 + '\n')
135+
f.write(u'Key: {}\n'.format(message['key']))
136+
f.write(u'-' * 40 + '\n')
126137
elif path.endswith(".json"):
127-
with open(path, 'w') as f:
138+
with io.open(path, 'w', encoding='utf-8') as f:
128139
# Here all possible convertions will take place
129140
if report_schema_tuple < (1, 1, 0):
130141
# report-schema 1.0.0 doesn't have a stable report key
@@ -150,4 +161,8 @@ def generate_report_file(messages_to_report, context, path, report_schema='1.1.0
150161
# NOTE(ivasilev) As flags field is created only if there is an inhibitor
151162
# a default value to pop has to be specified not to end up with a KeyError
152163
msg.pop('flags', None)
153-
json.dump({'entries': messages_to_report, 'leapp_run_id': context}, f, indent=2)
164+
# NOTE(ivasilev) Has to be done in this way (dumps + py2 conversion + write to file instead of json.dump)
165+
# because of a py2 bug in the json module that can produce a mix of unicode and str objects that will be
166+
# incompatible with write. https://bugs.python.org/issue13769
167+
data = json.dumps({'entries': messages_to_report, 'leapp_run_id': context}, indent=2, ensure_ascii=False)
168+
f.write(data)

tests/scripts/test_reporting.py

Lines changed: 13 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
create_report_from_deprecation,
1515
create_report_from_error,
1616
_create_report_object,
17-
Audience, Flags, Groups, Key, RelatedResource, Summary, Severity, Tags, Title
17+
Audience, Flags, Groups, Key, RelatedResource, Remediation, Summary, Severity, Tags, Title
1818
)
1919
from leapp.utils.report import generate_report_file
2020

@@ -103,39 +103,19 @@ def test_report_tags_and_flags():
103103
assert Flags(["This is a new flag", Groups.INHIBITOR]).value == ["This is a new flag", "inhibitor"]
104104

105105

106-
def test_remediation_with_non_ascii_value():
107-
# report_entries = [Title('Some report title'), Summary('Some summary not used for dynamical key generation'),
108-
# Audience('sysadmin')]
106+
@pytest.mark.parametrize("report_suffix", ('.json', '.txt'))
107+
def test_remediation_with_non_ascii_value(report_suffix):
108+
report_entries = [Title('Some report title'), Summary('Some summary not used for dynamical key generation'),
109+
Audience('sysadmin')]
109110
# Partly related to leapp-repository PR1052
110-
# XXX FIXME Figure out what is missing in the straightforward reproducer
111-
# rem_command = ["ln", "-snf", "root/břeťa", "/bobošík"]
112-
# rem_hint = "Don't forget to check /bobošík directory!"
113-
# rem_playbook = "bobošík.yml"
114-
# rem = Remediation(commands=[rem_command], hint=rem_hint, playbook=rem_playbook)
115-
# report_entries.append(rem)
116-
# report_message = _create_report_object(report_entries).report
117-
report_message = {
118-
u'title': u'Upgrade requires links in root directory to be relative',
119-
u'timeStamp': u'2023-04-05T16:17:08.944687Z',
120-
u'hostname': u'ci-vm-10-0-137-133.hosted.upshift.rdu2.redhat.com',
121-
u'detail': {
122-
u'remediations': [
123-
{u'type': u'command',
124-
u'context': [u'sh', u'-c', u'ln -snf root/ffffuuuub\u0159e\u0165a /ffffuuuucobo\u0161\xedk']}
125-
]
126-
},
127-
u'actor': u'check_root_symlinks',
128-
u'summary': u'Please change these links in / to relative ones.',
129-
u'audience': u'sysadmin',
130-
u'flags': [u'inhibitor'],
131-
u'key': u'3d895ad37ceaf4157864d439edb6bd75562061fa',
132-
u'id': '69091cb0c73d3ec666c2a077f5c2100a23d297f52d2dc8dd8da9edc513d2d7ab',
133-
u'tags': [],
134-
u'severity': u'high'
135-
}
136-
for report_format in ['.json', '.txt']:
137-
with tempfile.NamedTemporaryFile(suffix=report_format) as reportfile:
138-
generate_report_file([report_message], 'leapp-run-id', reportfile.name, '1.1.0')
111+
rem_command = ["ln", "-snf", "root/břeťa", "/bobošík"]
112+
rem_hint = "Don't forget to check /bobošík directory!"
113+
rem_playbook = "bobošík.yml"
114+
rem = Remediation(commands=[rem_command], hint=rem_hint, playbook=rem_playbook)
115+
report_entries.append(rem)
116+
report_message = _create_report_object(report_entries).report
117+
with tempfile.NamedTemporaryFile(suffix=report_suffix) as reportfile:
118+
generate_report_file([report_message], 'leapp-run-id', reportfile.name, '1.1.0')
139119

140120

141121
def test_convert_from_error_to_report():

0 commit comments

Comments
 (0)