Skip to content

Commit 4c1be65

Browse files
behnamdjc
authored andcommitted
[idna] Add unit tests for punycode prefix edge cases
Helps with answering question asked in #373 The test invariants here are based on and checked against: * Processing Step 4 of UTS#46 (<http://www.unicode.org/reports/tr46/#ProcessingStepPunycode>) * 6.2 Decoding procedure of Punycode RFC. (<https://ietf.org/rfc/rfc3492.txt>)
1 parent 8dedda4 commit 4c1be65

1 file changed

Lines changed: 55 additions & 16 deletions

File tree

idna/tests/unit.rs

Lines changed: 55 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,73 @@
11
use unicode_normalization::char::is_combining_mark;
22

3-
fn _to_ascii(domain: &str) -> Result<String, idna::Errors> {
4-
idna::Config::default()
3+
/// https://github.com/servo/rust-url/issues/373
4+
#[test]
5+
fn test_punycode_prefix_with_length_check() {
6+
let config = idna::Config::default()
57
.verify_dns_length(true)
6-
.use_std3_ascii_rules(true)
7-
.to_ascii(domain)
8+
.check_hyphens(true)
9+
.use_std3_ascii_rules(true);
10+
11+
assert!(config.to_ascii("xn--").is_err());
12+
assert!(config.to_ascii("xn---").is_err());
13+
assert!(config.to_ascii("xn-----").is_err());
14+
assert!(config.to_ascii("xn--.").is_err());
15+
assert!(config.to_ascii("xn--...").is_err());
16+
assert!(config.to_ascii(".xn--").is_err());
17+
assert!(config.to_ascii("...xn--").is_err());
18+
assert!(config.to_ascii("xn--.xn--").is_err());
19+
assert!(config.to_ascii("xn--.example.org").is_err());
20+
}
21+
22+
/// https://github.com/servo/rust-url/issues/373
23+
#[test]
24+
fn test_punycode_prefix_without_length_check() {
25+
let config = idna::Config::default()
26+
.verify_dns_length(false)
27+
.check_hyphens(true)
28+
.use_std3_ascii_rules(true);
29+
30+
assert_eq!(config.to_ascii("xn--").unwrap(), "");
31+
assert!(config.to_ascii("xn---").is_err());
32+
assert!(config.to_ascii("xn-----").is_err());
33+
assert_eq!(config.to_ascii("xn--.").unwrap(), ".");
34+
assert_eq!(config.to_ascii("xn--...").unwrap(), "...");
35+
assert_eq!(config.to_ascii(".xn--").unwrap(), ".");
36+
assert_eq!(config.to_ascii("...xn--").unwrap(), "...");
37+
assert_eq!(config.to_ascii("xn--.xn--").unwrap(), ".");
38+
assert_eq!(config.to_ascii("xn--.example.org").unwrap(), ".example.org");
839
}
940

1041
#[test]
1142
fn test_v5() {
43+
let config = idna::Config::default()
44+
.verify_dns_length(true)
45+
.use_std3_ascii_rules(true);
46+
1247
// IdnaTest:784 蔏。𑰺
1348
assert!(is_combining_mark('\u{11C3A}'));
14-
assert!(_to_ascii("\u{11C3A}").is_err());
15-
assert!(_to_ascii("\u{850f}.\u{11C3A}").is_err());
16-
assert!(_to_ascii("\u{850f}\u{ff61}\u{11C3A}").is_err());
49+
assert!(config.to_ascii("\u{11C3A}").is_err());
50+
assert!(config.to_ascii("\u{850f}.\u{11C3A}").is_err());
51+
assert!(config.to_ascii("\u{850f}\u{ff61}\u{11C3A}").is_err());
1752
}
1853

1954
#[test]
2055
fn test_v8_bidi_rules() {
21-
assert_eq!(_to_ascii("abc").unwrap(), "abc");
22-
assert_eq!(_to_ascii("123").unwrap(), "123");
23-
assert_eq!(_to_ascii("אבּג").unwrap(), "xn--kdb3bdf");
24-
assert_eq!(_to_ascii("ابج").unwrap(), "xn--mgbcm");
25-
assert_eq!(_to_ascii("abc.ابج").unwrap(), "abc.xn--mgbcm");
26-
assert_eq!(_to_ascii("אבּג.ابج").unwrap(), "xn--kdb3bdf.xn--mgbcm");
56+
let config = idna::Config::default()
57+
.verify_dns_length(true)
58+
.use_std3_ascii_rules(true);
59+
60+
assert_eq!(config.to_ascii("abc").unwrap(), "abc");
61+
assert_eq!(config.to_ascii("123").unwrap(), "123");
62+
assert_eq!(config.to_ascii("אבּג").unwrap(), "xn--kdb3bdf");
63+
assert_eq!(config.to_ascii("ابج").unwrap(), "xn--mgbcm");
64+
assert_eq!(config.to_ascii("abc.ابج").unwrap(), "abc.xn--mgbcm");
65+
assert_eq!(config.to_ascii("אבּג.ابج").unwrap(), "xn--kdb3bdf.xn--mgbcm");
2766

2867
// Bidi domain names cannot start with digits
29-
assert!(_to_ascii("0a.\u{05D0}").is_err());
30-
assert!(_to_ascii("0à.\u{05D0}").is_err());
68+
assert!(config.to_ascii("0a.\u{05D0}").is_err());
69+
assert!(config.to_ascii("0à.\u{05D0}").is_err());
3170

3271
// Bidi chars may be punycode-encoded
33-
assert!(_to_ascii("xn--0ca24w").is_err());
72+
assert!(config.to_ascii("xn--0ca24w").is_err());
3473
}

0 commit comments

Comments
 (0)