-
-
Notifications
You must be signed in to change notification settings - Fork 33.8k
bpo-39337: encodings.normalize_encoding() now ignores non-ASCII characters #22219
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 8 commits
8ae6408
0fcafb8
dea24d8
dbb0062
3fa221f
a69eef8
2e73d13
95c1d98
03bfd9b
38f28bd
5982784
4ecb8a1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3417,7 +3417,7 @@ def test_rot13_func(self): | |
|
|
||
| class CodecNameNormalizationTest(unittest.TestCase): | ||
| """Test codec name normalization""" | ||
| def test_normalized_encoding(self): | ||
| def test_codecs_lookup(self): | ||
| FOUND = (1, 2, 3, 4) | ||
| NOT_FOUND = (None, None, None, None) | ||
| def search_function(encoding): | ||
|
|
@@ -3439,6 +3439,23 @@ def search_function(encoding): | |
| self.assertEqual(NOT_FOUND, codecs.lookup('BBB.8')) | ||
| self.assertEqual(NOT_FOUND, codecs.lookup('a\xe9\u20ac-8')) | ||
|
|
||
| def test_encodings_normalize_encoding(self): | ||
shihai1991 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| # encodings.normalize_encoding() ignores non-ASCII letters. | ||
| out = encodings.normalize_encoding('utf_8') | ||
shihai1991 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| self.assertEqual(out, 'utf_8') | ||
shihai1991 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| out = encodings.normalize_encoding('utf\xE9\u20AC\U0010ffff-8') | ||
| self.assertEqual(out, 'utf_8') | ||
| out = encodings.normalize_encoding('utf 8') | ||
| self.assertEqual(out, 'utf_8') | ||
shihai1991 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| # encodings.normalize_encoding() doesn't convert | ||
| # characters to lower case. | ||
| out = encodings.normalize_encoding('UTF 8') | ||
|
||
| self.assertEqual(out, 'UTF_8') | ||
| out = encodings.normalize_encoding('utf.8') | ||
| self.assertEqual(out, 'utf.8') | ||
| out = encodings.normalize_encoding('utf...8') | ||
shihai1991 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| self.assertEqual(out, 'utf...8') | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| unittest.main() | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| :func:`encodings.normalize_encoding` now ignores non-ASCII letters. | ||
shihai1991 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wanted to ask you to add a ".. versionchanged:: 3.10" entry in the documentation, but then I noticed that the encodings module was never documented! Oh!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If end user will use this function or module, I can try to create the doc, but I need some time to do it :)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It can and must be addressed in a separated PR anymore. The lack of documentation should not hold this change.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok, copy that.