diff --git a/lib/ui/hash_codes.dart b/lib/ui/hash_codes.dart index bece69b0887e9..72456863ccdab 100644 --- a/lib/ui/hash_codes.dart +++ b/lib/ui/hash_codes.dart @@ -119,3 +119,18 @@ int hashList(Iterable arguments) { } return _Jenkins.finish(result); } + +/// Combine the [Object.hashCode] values of an arbitrary number of key and value +/// objects from a [Map] into one value. This function will return the same +/// value if given null as if given an empty map. +int hashMap(Map map) { + int result = 0; + if (map != null) { + final List keys = map.keys.toList()..sort(); + for (Object key in keys) { + result = _Jenkins.combine(result, key); + result = _Jenkins.combine(result, map[key]); + } + } + return _Jenkins.finish(result); +} diff --git a/lib/ui/window.dart b/lib/ui/window.dart index 57b40bf1cdffa..d7dada1670ae7 100644 --- a/lib/ui/window.dart +++ b/lib/ui/window.dart @@ -118,9 +118,8 @@ class WindowPadding { /// An identifier used to select a user's language and formatting preferences. /// -/// This represents a [Unicode Language -/// Identifier](https://www.unicode.org/reports/tr35/#Unicode_language_identifier) -/// (i.e. without Locale extensions), except variants are not supported. +/// This represents a [Unicode Locale +/// Identifier](https://www.unicode.org/reports/tr35/#Unicode_locale_identifier). /// /// Locales are canonicalized according to the "preferred value" entries in the /// [IANA Language Subtag @@ -129,6 +128,11 @@ class WindowPadding { /// both have the [languageCode] `he`, because `iw` is a deprecated language /// subtag that was replaced by the subtag `he`. /// +/// When constructed correctly, instances of this Locale class will produce +/// normalized syntactically correct output, although not necessarily valid +/// (because tags are not validated). See constructor and factory method +/// documentation for details. +/// /// See also: /// /// * [Window.locale], which specifies the system's currently selected @@ -157,6 +161,10 @@ class Locale { /// Identifier](https://www.unicode.org/reports/tr35/#Unicode_language_identifier) /// specification. /// + /// This method only produces standards-compliant instances if valid language + /// and country codes are provided. Deprecated subtags will be replaced, but + /// incorrectly cased strings are not corrected. + /// /// Validity is not checked by default, but some methods may throw away /// invalid data. /// @@ -168,8 +176,9 @@ class Locale { this._languageCode, [ this._countryCode, ]) : assert(_languageCode != null), - assert(_languageCode != ''), - scriptCode = null; + scriptCode = null, + _variants = null, + _extensions = null; /// Creates a new Locale object. /// @@ -193,7 +202,279 @@ class Locale { _languageCode = languageCode, assert(scriptCode != ''), assert(countryCode != ''), - _countryCode = countryCode; + _countryCode = countryCode, + _variants = null, + _extensions = null; + + // Creates a new Locale object with the specified parts. + // + // This is for internal use only. All fields must already be normalized, must + // already be canonicalized. This method does not modify parameters in any + // way or do any syntax checking. + // + // * language, script and region must be in normalized form. + // * variants need not be sorted, this constructor performs sorting. Each + // variant subtag should already be in normalized form though. + // * The extensions map must contain only valid key/value pairs. "u" and "t" + // keys must be present, with an empty string as value, if there are any + // subtags for those singletons. + Locale._internal( + String languageCode, { + this.scriptCode, + String countryCode, + List variants, + collection.LinkedHashMap extensions, + }) : assert(languageCode != null), + assert(languageCode.length >= 2 && languageCode.length <= 8), + assert(languageCode.length != 4), + assert(scriptCode == null || scriptCode.length == 4), + assert(countryCode == null || (countryCode.length >= 2 && countryCode.length <= 3)), + _languageCode = languageCode, + _countryCode = countryCode, + _variants = (variants != null && variants.isNotEmpty) ? List.from(variants) : null, + _extensions = (extensions != null && extensions.isNotEmpty) ? {} : null + { + _variants?.sort(); + if (extensions != null) { + // Insert extensions in sorted order. + final List keys = extensions.keys.toList()..sort(); + for (String key in keys) { + _extensions[key] = extensions[key]; + } + } + } + + static final RegExp _reSep = RegExp(r'[-_]'); + + /// Parses [Unicode CLDR Locale + /// Identifiers](https://www.unicode.org/reports/tr35/#Identifiers). + /// + /// This method does not parse all BCP 47 tags. See [BCP 47 + /// Conformance](https://www.unicode.org/reports/tr35/#BCP_47_Conformance) for + /// details. + /// + /// TODO: support parsing all BCP 47 tags. + static Locale parse(String localeId) { + assert(localeId != null); + localeId = localeId.toLowerCase(); + if (localeId == 'root') + return Locale._internal('und'); + + final List localeSubtags = localeId.split(_reSep); + int parseIdx = 0; + + String language, script, region; + final List variants = []; + final Map extensions = {}; + + final List problems = []; + if (_isAlphabetic(localeSubtags[0], 2, 8) + && localeSubtags[0].length != 4) { + // language subtag: r'^[a-zA-Z]{2,3}$|^[a-zA-Z]{5,8}$' + language = _replaceDeprecatedLanguageSubtag(localeSubtags[0]); + parseIdx = 1; + } else if (_isAlphabetic(localeSubtags[0], 4, 4)) { + // First subtag is already a script subtag. + // + // Identifiers without language subtags aren't valid BCP 47 tags and + // therefore not intended for general interchange, however they do match + // the LDML spec. + language = 'und'; + } else { + problems.add('"${localeSubtags[0]}" is an invalid language subtag'); + } + if (parseIdx < localeSubtags.length + && _isAlphabetic(localeSubtags[parseIdx], 4, 4)) { + // script subtag: r'^[a-zA-Z]{4}$' + final String s = localeSubtags[parseIdx++]; + script = s.substring(0, 1).toUpperCase() + s.substring(1).toLowerCase(); + } + if (parseIdx < localeSubtags.length + && (_isAlphabetic(localeSubtags[parseIdx], 2, 2) + || _isNumeric(localeSubtags[parseIdx], 3, 3))) { + // region subtag: r'^[a-zA-Z]{2}$|^[0-9]{3}$'; + region = localeSubtags[parseIdx++].toUpperCase(); + } + while (parseIdx < localeSubtags.length + && _isVariantSubtag(localeSubtags[parseIdx])) { + variants.add(localeSubtags[parseIdx++]); + } + + // Now we should be into extension territory, localeSubtags[parseIdx] should + // be a singleton. + if (parseIdx < localeSubtags.length && localeSubtags[parseIdx].length > 1) { + final List mismatched = []; + if (variants.isEmpty) { + if (region == null) { + if (script == null) { + mismatched.add('script'); + } + mismatched.add('region'); + } + mismatched.add('variant'); + } + problems.add('unrecognised subtag "${localeSubtags[parseIdx]}": is not a ' + '${mismatched.join(", ")}'); + } + _parseExtensions(localeSubtags, parseIdx, extensions, problems); + + if (problems.isNotEmpty) { + throw FormatException('Locale Identifier $localeId is invalid: ' + '${problems.join("; ")}.'); + } + + return Locale._internal( + language, + scriptCode: script, + countryCode: region, + variants: variants, + extensions: extensions, + ); + } + + static Locale tryParse(String localeId) { + try { + final Locale l = parse(localeId); + return l; + } on Exception { + return null; + } + } + + // * All subtags in localeSubtags must already be lowercase. + // + // * extensions must be a map with sorted iteration order. LinkedHashMap + // preserves order for us. + static void _parseExtensions(List localeSubtags, + int parseIdx, + collection.LinkedHashMap extensions, + List problems) { + final Map ext = {}; + while (parseIdx < localeSubtags.length) { + final String singleton = localeSubtags[parseIdx++]; + if (singleton == 'u') { + bool empty = true; + // unicode_locale_extensions: collect "(sep attribute)+" attributes. + final List attributes = []; + while (parseIdx < localeSubtags.length + && _isAlphaNumeric(localeSubtags[parseIdx], 3, 8)) { + attributes.add(localeSubtags[parseIdx++]); + } + if (attributes.isNotEmpty) { + empty = false; + } + if (!ext.containsKey(singleton)) { + ext[singleton] = attributes.join('-'); + } else { + problems.add('duplicate singleton: "$singleton"'); + } + // unicode_locale_extensions: collect "(sep keyword)*". + while (parseIdx < localeSubtags.length + && _isUExtensionKey(localeSubtags[parseIdx])) { + empty = false; + final String key = localeSubtags[parseIdx++]; + final List typeParts = []; + while (parseIdx < localeSubtags.length + && _isAlphaNumeric(localeSubtags[parseIdx], 3, 8)) { + typeParts.add(localeSubtags[parseIdx++]); + } + if (!ext.containsKey(key)) { + if (typeParts.length == 1 && typeParts[0] == 'true') { + ext[key] = ''; + } else { + ext[key] = typeParts.join('-'); + } + } else { + problems.add('duplicate key: $key'); + } + } + if (empty) { + problems.add('empty singleton: $singleton'); + } + } else if (singleton == 't') { + bool empty = true; + // transformed_extensions: grab tlang if it exists. + final List tlang = []; + if (parseIdx < localeSubtags.length + && _isAlphabetic(localeSubtags[parseIdx], 2, 8) + && localeSubtags[parseIdx].length != 4) { + // language subtag + empty = false; + tlang.add(localeSubtags[parseIdx++]); + if (parseIdx < localeSubtags.length + && _isAlphabetic(localeSubtags[parseIdx], 4, 4)) + // script subtag + tlang.add(localeSubtags[parseIdx++]); + if (parseIdx < localeSubtags.length + && (_isAlphabetic(localeSubtags[parseIdx], 2, 2) + || _isNumeric(localeSubtags[parseIdx], 3, 3))) + // region subtag: r'^[a-zA-Z]{2}$|^[0-9]{3}$'; + tlang.add(localeSubtags[parseIdx++]); + while (parseIdx < localeSubtags.length + && _isVariantSubtag(localeSubtags[parseIdx])) { + tlang.add(localeSubtags[parseIdx++]); + } + } + if (!ext.containsKey(singleton)) { + ext[singleton] = tlang.join('-'); + } else { + problems.add('duplicate singleton: "$singleton"'); + } + // transformed_extensions: collect "(sep tfield)*". + while (parseIdx < localeSubtags.length + && _isTExtensionKey(localeSubtags[parseIdx])) { + final String tkey = localeSubtags[parseIdx++]; + final List tvalueParts = []; + while (parseIdx < localeSubtags.length + && _isAlphaNumeric(localeSubtags[parseIdx], 3, 8)) { + tvalueParts.add(localeSubtags[parseIdx++]); + } + if (tvalueParts.isNotEmpty) { + empty = false; + if (!ext.containsKey(tkey)) { + ext[tkey] = tvalueParts.join('-'); + } else { + problems.add('duplicate key: $tkey'); + } + } + } + if (empty) { + problems.add('empty singleton: $singleton'); + } + } else if (singleton == 'x') { + // pu_extensions + final List values = []; + while (parseIdx < localeSubtags.length + && _isAlphaNumeric(localeSubtags[parseIdx], 1, 8)) { + values.add(localeSubtags[parseIdx++]); + } + ext[singleton] = values.join('-'); + if (parseIdx < localeSubtags.length) { + problems.add('invalid part of private use subtags: "' + '${localeSubtags.getRange(parseIdx, localeSubtags.length).join('-')}"'); + } + break; + } else if (_isAlphabetic(singleton, 1, 1)) { + // other_extensions + final List values = []; + while (parseIdx < localeSubtags.length + && _isAlphaNumeric(localeSubtags[parseIdx], 2, 8)) { + values.add(localeSubtags[parseIdx++]); + } + if (!ext.containsKey(singleton)) { + ext[singleton] = values.join('-'); + } else { + problems.add('duplicate singleton: "$singleton"'); + } + } else { + problems.add('invalid subtag, should be singleton: "$singleton"'); + } + } + final List ks = ext.keys.toList()..sort(); + for (String k in ks) { + extensions[k] = ext[k]; + } + } /// The primary language subtag for the locale. /// @@ -214,6 +495,10 @@ class Locale { /// supplemental /// data](http://unicode.org/cldr/latest/common/validity/language.xml). /// + /// New deprecations in the registry are not automatically picked up by this + /// library, so this class will not make such changes for deprecations that + /// are too recent. + /// /// See also: /// /// * [new Locale.fromSubtags], which describes the conventions for creating @@ -221,6 +506,9 @@ class Locale { String get languageCode => _replaceDeprecatedLanguageSubtag(_languageCode); final String _languageCode; + // Replaces deprecated language subtags. + // + // The subtag must already be lowercase. static String _replaceDeprecatedLanguageSubtag(String languageCode) { // This switch statement is generated by //flutter/tools/gen_locale.dart // Mappings generated for language subtag registry as of 2018-08-08. @@ -336,6 +624,10 @@ class Locale { /// [countryCode] `DE`, because `DD` is a deprecated language subtag that was /// replaced by the subtag `DE`. /// + /// New deprecations in the registry are not automatically picked up by this + /// library, so this class will not make such changes for deprecations that + /// are too recent. + /// /// See also: /// /// * [new Locale.fromSubtags], which describes the conventions for creating @@ -343,6 +635,9 @@ class Locale { String get countryCode => _replaceDeprecatedRegionSubtag(_countryCode); final String _countryCode; + // Replaces deprecated region subtags. + // + // The subtag must already be uppercase. static String _replaceDeprecatedRegionSubtag(String regionCode) { // This switch statement is generated by //flutter/tools/gen_locale.dart // Mappings generated for language subtag registry as of 2018-08-08. @@ -357,6 +652,230 @@ class Locale { } } + // Unicode language variant codes. + // + // This iterable provides variants normalized to lowercase and in sorted + // order, as per the Unicode LDML specification. + // + // FIXME: decide whether this returns a single string or something fancier. + Iterable get variants => _variants ?? const []; + + // The private _variants field must have variants in lowercase and already + // sorted: constructors must construct it as such. + final List _variants; + + // A map representing all Locale Identifier extensions. + // + // Keys in this ordered map must be sorted, and both keys and values must all + // be lowercase: constructors must construct it as such. + // + // This map simultaneously represents T-extensions, U-extensions, other + // extensions and the private use extensions. Implementation detailsf: + // + // * The 't' entry represents the optional "tlang" identifier of the T + // Extension. If the T Extension is present but has no tlang value, the 't' + // map entry's value must be an empty string. + // * The 'u' entry represents the optional attributes of the U Extension. They + // must be sorted in alphabetical order, separated by hyphens, and be + // lowercase. If the U Extension is present but has no attributes, the 'u' + // map entry's value must be an empty string. + // * U-Extension keyword keys and T-Extension fields in the map are directly + // entered into this map: their syntax are enough to distinguish + // them. (_isUExtensionKey and _isTExtensionKey private methods check this + // syntax.) + // * Other singletons are entered directly into the map, with all + // values/attributes associated with that singleton as the map entry's + // value. + final collection.LinkedHashMap _extensions; + + // Produces the Unicode BCP47 Locale Identifier for this locale. + // + // If the unnamed constructor was used with bad parameters, the result might + // not be standards-compliant. + // https://www.unicode.org/reports/tr35/#Unicode_locale_identifier + // + // FIXME/TODO: this must not be submitted as a public function until we've + // made a final decision on toString() behaviour. + String toLanguageTag() { + final StringBuffer out = StringBuffer(languageCode); + if (scriptCode != null) { + out.write('-$scriptCode'); + } + if (_countryCode != null && _countryCode != '') { + out.write('-$countryCode'); + } + for (String v in variants) { + out.write('-$v'); + } + if (_extensions != null && _extensions.isNotEmpty) { + _extensionsToString(_extensions, out); + } + return out.toString(); + } + + // Formats the extension map into a partial Unicode Locale Identifier. + // + // This covers everything after the unicode_language_id, and returns a string + // starting with a hyphen. Returns '' if passed null or an empty extensions + // map. + static void _extensionsToString(collection.LinkedHashMap extensions, StringBuffer result) { + if (extensions == null || extensions.isEmpty) + return; + + String uAttributes; + String tLang; + final StringBuffer uOut = StringBuffer(); + final StringBuffer tOut = StringBuffer(); + final StringBuffer resultVWYZ = StringBuffer(); + + for (MapEntry entry in extensions.entries) { + if (entry.key.length == 1) { + final int letter = entry.key.codeUnitAt(0) - 0x61; // Subtracting 'a' + if (letter < 0 || letter >= 26) { + throw UnimplementedError('Unexpected key in extensions map: $entry'); + } else if (letter < 19) { // 'a' to 's' (19th letter) + result.write('-${entry.key}'); + if (entry.value.isNotEmpty) + result.write('-${entry.value}'); + } else if (letter == 19) { // t + tLang = entry.value; + } else if (letter == 20) { // u + uAttributes = entry.value; + } else if (letter != 23) { // not x: vwyz + resultVWYZ.write('-${entry.key}'); + if (entry.value.isNotEmpty) + resultVWYZ.write('-${entry.value}'); + } + } else if (_isUExtensionKey(entry.key)) { + // unicode_locale_extensions + if (entry.value == 'true' || entry.value == '') { + uOut.write('-${entry.key}'); + } else { + uOut.write('-${entry.key}-${entry.value}'); + } + } else if (_isTExtensionKey(entry.key)) { + // transformed_extensions + tOut.write('-${entry.key}'); + // TODO: this is not standards compliant. What do we want to do with + // this case? Drop entry.key like we drop empty t and u singletons? + // Or simply ensure we don't ever create such an instance? + if (entry.value.isNotEmpty) + tOut.write('-${entry.value}'); + } else { + throw UnimplementedError('Unexpected key in extensions map: $entry'); + } + } + if (tLang != null || tOut.isNotEmpty) { + result.write('-t'); + if (tLang != null) + result.write('-$tLang'); + result.write(tOut.toString()); + } + if (uAttributes != null || uOut.isNotEmpty) { + result.write('-u'); + if (uAttributes != null && uAttributes.isNotEmpty) + result.write('-$uAttributes'); + result.write(uOut.toString()); + } + if (resultVWYZ.isNotEmpty) + result.write(resultVWYZ.toString()); + if (extensions.containsKey('x')) { + result.write('-x'); + if (extensions['x'].isNotEmpty) { + result.write('-${extensions["x"]}'); + } + } + } + + // Returns true if s is a string made of lower-case alphabetic characters + // (a-z) or digits (0-9), with specified min and max lengths. + // + // Benchmarks show that doing this with a lookup map is faster. This function + // chooses to not keep the needed 256 byte string around though. + static bool _isAlphaNumeric(String s, int minLength, int maxLength) { + if (s.length < minLength || s.length > maxLength) + return false; + for (int i = 0; i < s.length; i++) { + final int char = s.codeUnitAt(i); + // 0-9: 0x30-0x39. + if (char ^ 0x30 <= 9) + continue; + // a-z: 0x61-0x7A + if ((char - 0x61) & 0xFFFF >= 26) + return false; + } + return true; + } + + // Returns true if s is a purely lower-case alphabetic (a-z) string with + // specified min and max lengths. + static bool _isAlphabetic(String s, int minLength, int maxLength) { + if (s.length < minLength || s.length > maxLength) + return false; + for (int i = 0; i < s.length; i++) { + final int char = s.codeUnitAt(i); + // a-z: 0x61-0x7A + if ((char - 0x61) & 0xFFFF >= 26) + return false; + } + return true; + } + + // Returns true if s is a string consisting of 3 digits (0-9). + static bool _isNumeric(String s, int minLength, int maxLength) { + if (s.length < minLength || s.length > maxLength) + return false; + for (int i = 0; i < s.length; i++) { + // codeUnitAt returns a 16-bit number. Dart's default implementation has + // 64-bit ints, so this will be a positive number. + final int char = s.codeUnitAt(i); + // 0-9: 0x30-0x39. + if (char ^ 0x30 > 9) + return false; + } + return true; + } + + // Checks that the specified string matches the variant subtag syntax. Does + // not check the list of valid subtags! + // + // r'^[a-zA-Z0-9]{5,8}$|^[0-9][a-zA-Z0-9]{3}$' + static bool _isVariantSubtag(String s) { + if (!_isAlphaNumeric(s, 4, 8)) + return false; + if (s.length == 4 && !_isNumeric(s[0], 1, 1)) + return false; + return true; + } + + // Checks that the specified string matches the syntax of U extension + // keys. Does not check that it is a valid key! + // + // r'^[a-zA-Z0-9][a-zA-Z]\$' + static bool _isUExtensionKey(String s) { + if (s.length != 2) + return false; + if (!_isAlphaNumeric(s[0], 1, 1)) + return false; + if (!_isAlphabetic(s[1], 1, 1)) + return false; + return true; + } + + // Checks that the specified string matches the syntax of T extension + // keys. Does not check that it is a valid key! + // + // r'^[a-zA-Z][0-9]\$' + static bool _isTExtensionKey(String s) { + if (s.length != 2) + return false; + if (!_isAlphabetic(s[0], 1, 1)) + return false; + if (!_isNumeric(s[1], 1, 1)) + return false; + return true; + } + @override bool operator ==(dynamic other) { if (identical(this, other)) @@ -364,22 +883,58 @@ class Locale { if (other is! Locale) return false; final Locale typedOther = other; + + // Comparing Lists, Sets and Maps requires reimplementing functionality in + // package:collection. Variants and extensions are rare, so we convert them + // to canonical/normalized strings for comparison. return languageCode == typedOther.languageCode && scriptCode == typedOther.scriptCode - && countryCode == typedOther.countryCode; + && countryCode == typedOther.countryCode + && _listEquals(_variants, typedOther._variants) + && _mapEquals(_extensions, typedOther._extensions); + } + + bool _listEquals(List a, List b) { + if (a == null) + return b == null; + if (b == null || a.length != b.length) + return false; + for (int index = 0; index < a.length; index += 1) { + if (a[index] != b[index]) + return false; + } + return true; + } + + bool _mapEquals(Map a, Map b) { + if (a == null) + return b == null; + if (b == null || a.length != b.length) + return false; + for (T1 k in a.keys) { + if (a[k] != b[k]) + return false; + } + return true; } @override - int get hashCode => hashValues(languageCode, scriptCode, countryCode); + int get hashCode => hashValues(languageCode, scriptCode, countryCode, hashList(_variants), hashMap(_extensions)); + /// Produces a non-BCP47 Unicode Locale Identifier for this locale. + /// + /// This Locale Identifier uses underscores as separator for historical + /// reasons. Use toLanguageTag() instead, it produces a Unicode BCP47 Locale + /// Identifier as recommended for general interchange. @override String toString() { - final StringBuffer out = StringBuffer(languageCode); - if (scriptCode != null) - out.write('_$scriptCode'); - if (_countryCode != null) - out.write('_$countryCode'); - return out.toString(); + String identifier = toLanguageTag().replaceAll('-', '_'); + if (_countryCode == '' && identifier == languageCode) { + // Not standards-compliant, but kept for legacy reasons. Only the const + // unnamed constructor should be able to create instances like these. + identifier = '${languageCode}_'; + } + return '$identifier'; } } diff --git a/testing/dart/locale_test.dart b/testing/dart/locale_test.dart index 66fd537a72444..83ed7a3ee649e 100644 --- a/testing/dart/locale_test.dart +++ b/testing/dart/locale_test.dart @@ -12,12 +12,93 @@ void main() { expect(const Locale('en').toString(), 'en'); expect(const Locale('en'), new Locale('en', $null)); expect(const Locale('en').hashCode, new Locale('en', $null).hashCode); - expect(const Locale('en'), isNot(new Locale('en', ''))); - expect(const Locale('en').hashCode, isNot(new Locale('en', '').hashCode)); - expect(const Locale('en', 'US').toString(), 'en_US'); - expect(const Locale('iw').toString(), 'he'); - expect(const Locale('iw', 'DD').toString(), 'he_DE'); - expect(const Locale('iw', 'DD'), const Locale('he', 'DE')); + expect(const Locale('en'), isNot(new Locale('en', '')), + reason: 'Legacy. (The semantic difference between Locale("en") and ' + 'Locale("en", "") is not defined.)'); + expect(const Locale('en').hashCode, isNot(new Locale('en', '').hashCode), + reason: 'Legacy. (The semantic difference between Locale("en") and ' + 'Locale("en", "") is not defined.)'); + expect(const Locale('en', 'US').toString(), 'en_US', + reason: 'Legacy. en_US is a valid Unicode Locale Identifier, but ' + 'not a valid Unicode BCP47 Locale Identifier.'); + expect(const Locale('iw').toString(), 'he', + reason: 'The language code for Hebrew was officially changed in 1989.'); + expect(const Locale('iw', 'DD').toString(), 'he_DE', + reason: 'Legacy. This is a valid Unicode Locale Identifier, even if ' + 'not a valid Unicode BCP47 Locale Identifier'); + expect(const Locale('iw', 'DD'), const Locale('he', 'DE'), + reason: 'The German Democratic Republic ceased to exist in ' + 'October 1990.'); + + expect(const Locale('en', 'US').toLanguageTag(), 'en-US', + reason: 'Unicode BCP47 Locale Identifier, as recommended for general ' + 'interchange.'); + }); + + test('Locale unnamed constructor idiosyncrasies', () { + expect( + () => Locale.parse(Locale('en', '').toString()), + throwsException, + reason: 'Locale("en", "").toString() produces "en-" which is not ' + 'standards-compliant.', + ); + + // We have: + expect( + const Locale('en'), + isNot(new Locale('en', '')), + reason: 'Legacy. (The semantic difference between Locale("en") and ' + 'Locale("en", "") is not defined.)', + ); + // However we also have: + expect( + Locale.parse(Locale('en', '').toLanguageTag()), + Locale('en'), + reason: 'There is no standards-compliant way for toLanguageTag() to ' + 'represent a zero-length region code.', + ); + // So this class' operator== doesn't match the behaviour we expect from the + // normalized Unicode BCP47 Locale Identifiers perspective. + + expect( + Locale('abcd').toLanguageTag(), + 'abcd', + reason: 'Locale("abcd") is not following instructions in the API ' + 'documentation, so produces standards-uncompliant output.', + ); + expect( + Locale.parse('abcd').toLanguageTag(), + 'und-Abcd', + reason: '', + ); + expect( + Locale('abcd'), + isNot(Locale.parse('abcd')), + reason: '', + ); + expect( + () => Locale.parse(Locale('a').toLanguageTag()), + throwsException, + reason: 'Locale("abcd") is not following instructions in the API ' + 'documentation, so produces standards-uncompliant output.', + ); + expect( + Locale.parse(Locale('EN').toLanguageTag()).languageCode, + 'en', + reason: 'Locale.parse does standards-compliant normalization, whereas ' + 'Locale("EN") is incorrect usage of the API as per API ' + 'documentation.', + ); + + // Syntax is correct. Without validating against CLDR supplemental + // data, this looks like da-u-nu-true. + expect( + Locale.parse('da-u-nu').toLanguageTag(), + equals('da-u-nu'), + reason: 'da-u-nu syntax is correct, this looks like "da-u-nu-true". ' + 'Only validation against CLDR validity data would show "true" is ' + 'not a valid value for "nu".', + ); }); test('Locale.fromSubtags', () { @@ -39,10 +120,331 @@ void main() { expect(Locale.fromSubtags(languageCode: 'zh', scriptCode: 'Hans', countryCode: 'CN').toString(), 'zh_Hans_CN'); }); - test('Locale equality', () { + test('Locale equality: fromSubtags', () { expect(Locale.fromSubtags(languageCode: 'en'), isNot(Locale.fromSubtags(languageCode: 'en', scriptCode: 'Latn'))); expect(Locale.fromSubtags(languageCode: 'en').hashCode, isNot(Locale.fromSubtags(languageCode: 'en', scriptCode: 'Latn').hashCode)); }); + + group('Locale.parse():', () { + test('languageCode.', () { + expect( + Locale.parse('IW').languageCode, + 'he', + reason: "Case insensitive to input.", + ); + expect( + Locale.parse('Fil').languageCode, + 'fil', + reason: "3-character language codes: Filipino is not Finnish.", + ); + expect( + Locale.parse('abcde').languageCode, + 'abcde', + reason: 'The spec provides for language codes with 5 to 8 ' + 'characters, though as of 2018, there aren\'t any valid tags ' + 'this long.', + ); + expect( + Locale.parse('ROOT').languageCode, + 'und', + reason: 'BCP 47 Language Tag Conversion: ' + 'replace "root" with "und", and case insensitive.', + ); + }); + + test('scriptCode.', () { + expect( + Locale.parse('af_latn').scriptCode, + 'Latn', + ); + expect( + Locale.parse('zh_HANT-CN').scriptCode, + 'Hant', + ); + expect( + Locale.parse('sw-TZ').scriptCode, + null, + ); + }); + + test('countryCode.', () { + expect( + Locale.parse('ar-Arab-EG').countryCode, + 'EG', + ); + expect( + Locale.parse('en-GB_scouse_fonipa').countryCode, + 'GB', + ); + }); + + test('variants.', () { + // Liverpool English, script variant: International Phonetic Alphabet. + // (fonipa is a Latn variant, so only Latn really makes sense as script) + expect( + Locale.parse('en_scouse_fonipa').variants, + orderedEquals(['fonipa', 'scouse']), + reason: 'Variants should be sorted alphabetically.', + ); + expect( + Locale.parse('de-1996').variants, + orderedEquals(['1996']), + ); + expect( + Locale.parse('ja_Jpan').variants, + orderedEquals([]), + reason: 'No variants represented by zero-length Iterable.', + ); + }); + + test('Locale Identifiers with extensions.', () { + expect( + Locale.parse('nl-u-attr2-attr1').toLanguageTag(), + equals('nl-u-attr2-attr1'), + reason: '-u- attributes are ordered, do not sort.', + ); + + expect( + Locale.parse('ar-u-ca-islamic-civil').toLanguageTag(), + equals('ar-u-ca-islamic-civil'), + reason: '-u- attributes are ordered, do not sort.', + ); + + expect( + Locale.parse('RU-T-en-Cyrl-GB-H0-HYBRID').toLanguageTag(), + equals('ru-t-en-cyrl-gb-h0-hybrid'), + reason: 'Language identifiers in t-extensions are also lowercase.', + ); + + expect( + Locale.parse('tr-x-foo-t-hi-h0-hybrid-u-nu').toLanguageTag(), + equals('tr-x-foo-t-hi-h0-hybrid-u-nu'), + reason: 'Everything after -x- belong to the -x- private use ' + 'extension.', + ); + + expect( + Locale.parse('pl-b-h0-hybrid-nu-roman').toLanguageTag(), + equals('pl-b-h0-hybrid-nu-roman'), + reason: 'What looks like tkey and key subtags appearing under ' + 'singletons other than -t- and -u- belong to those singletons.', + ); + + expect( + Locale.parse('ca-u-kb-true').toLanguageTag(), + equals('ca-u-kb'), + reason: 'For true/false keys like kb, true can be ommitted.', + ); + expect( + Locale.parse('ca-u-kb').toLanguageTag(), + equals('ca-u-kb'), + reason: 'For true/false keys like kb, true can be ommitted.', + ); + // TODO: when adding accessor for u-kb: it should return 'true'. + + expect( + Locale.parse('En-LATN-Gb-SCOUSE-FONIPA' + '-U-ATTR2-ATTR1-ca-islamic-civil-nu-thai' + '-A-BC' + '-z-fo-xyzzy' + '-T-HI-H0-hybrid-m0-UNGEGN' + '-x-u-ab') + .toLanguageTag(), + equals('en-Latn-GB-fonipa-scouse' + '-a-bc' + '-t-hi-h0-hybrid-m0-ungegn' + '-u-attr2-attr1-ca-islamic-civil-nu-thai' + '-z-fo-xyzzy' + '-x-u-ab'), + ); + }); + + // These examples are not spec compliant, although we could have been more + // lenient. When choosing to permit uncompliant identifiers, we would need + // to decide "how lenient?", so we prefer to draw the most obvious line by + // being strict. + // TODO: maybe have the exception contain what's necessary to make a "best + // effort" locale? + test('Strict parsing examples.', () { + expect( + () => Locale.parse('nl-u-x'), + throwsException, + reason: 'With no "u" attributes there should be no "u" singleton. ' + 'Could be lenient: "nl-x".', + ); + expect( + () => Locale.parse('fr-t-x'), + throwsException, + reason: 'With no "t" attributes there should be no "t" singleton. ' + 'Could be lenient: "fr-x".', + ); + expect( + () => Locale.parse('it-u-nu-romanlow-a-u-attr'), + throwsException, + reason: 'Duplicate "u" singletons could be merged if only one has ' + 'attributes. Could be lenient: "it-a-u-attr-nu-romanlow".', + ); + expect( + () => Locale.parse('pl-t-cs-b-t-h0-hybrid'), + throwsException, + reason: 'Duplicate "t" singletons could be merged if only one has ' + 'tlang specified. Could be lenient: "pl-b-t-cs-h0-hybrid".', + ); + + expect( + () => Locale.parse('ro-t-hu-HU-cu-ron'), + throwsException, + reason: 'U-extension keywords misplaced under the -t- singleton ' + 'could be moved if unambiguous enough. ' + 'Could be lenient: "ro-t-hu-hu-u-cu-ron".', + ); + // TODO: any point to this test? It's a counter-example of the previous + // "lenient parsing" idea. + expect( + Locale.parse('ro-t-nu-cyrl').toLanguageTag(), + equals('ro-t-nu-cyrl'), + reason: 'This cannot be interpreted as a misplaced -u- keyword. ' + 'It looks like a tlang tag: "nu-Cyrl", ', + ); + + expect( + () => Locale.parse('pt-BR-u-h0-hybrid-t-pt-PT'), + throwsException, + reason: 'T-extension "tfields" misplaced under the U-extension. ' + 'Could be lenient: "pt-BR-t-pt-pt-h0-hybrid".', + ); + + expect( + () => Locale.parse('pl-t-h0'), + throwsException, + reason: 'Locale tag pl-t-h0 is not spec compliant. How to best fix it ' + 'is unclear: it is underspecified.', + ); + }); + + test('Locale.parse(): invalid identifiers.', () { + expect( + () => Locale.parse('a'), + throwsException, + reason: 'One character language subtags are invalid.', + ); + expect( + Locale.parse('abcd').languageCode, + 'und', + reason: 'Special-use corner case from the specification: ' + 'language subtag can be skipped if a script is specified.', + ); + expect( + Locale.parse('abcd').scriptCode, + 'Abcd', + reason: 'Special-use corner case from the specification: ' + 'language subtag can be skipped if a script is specified.', + ); + expect( + () => Locale.parse('abcdefghi'), + throwsException, + reason: 'Language subtags may not be more than 8 characters.', + ); + expect( + () => Locale.parse(r'e$'), + throwsException, + reason: 'Invalid character for language subtag, only a-z allowed.', + ); + expect( + () => Locale.parse('fr-RU-Hant'), + throwsException, + reason: 'Swapping region and script is not allowed.', + ); + }); + + test('Locale.tryParse().', () { + expect( + Locale.tryParse('a'), + null, + reason: 'One character language subtags are invalid.', + ); + expect( + Locale.tryParse('abcdefghi'), + null, + reason: 'Language subtags may not be more than 8 characters.', + ); + expect( + Locale.tryParse(r'e$'), + null, + reason: 'Invalid character for language subtag, only a-z allowed.', + ); + }); + + test('Locale equality.', () { + expect( + Locale.parse('en'), + isNot(Locale.parse('en-Latn')), + ); + expect( + Locale.parse('en').hashCode, + isNot(Locale.parse('en-Latn').hashCode), + ); + + expect( + Locale.parse('en'), + isNot(Locale.parse('en-US')), + ); + expect( + Locale.parse('en').hashCode, + isNot(Locale.parse('en-US').hashCode), + ); + + expect( + Locale.parse('en'), + isNot(Locale.parse('en-fonipa')), + ); + expect( + Locale.parse('en').hashCode, + isNot(Locale.parse('en-fonipa').hashCode), + ); + + expect(Locale.parse('en'), isNot(Locale.parse('en-a'))); + expect( + Locale.parse('en').hashCode, + isNot(Locale.parse('en-a').hashCode), + ); + + expect(Locale.parse('en'), isNot(Locale.parse('en-a'))); + expect( + Locale.parse('en').hashCode, + isNot(Locale.parse('en-a').hashCode), + ); + + expect( + Locale.parse('en-u-attr'), + isNot(Locale.parse('en-u-nu-roman')), + ); + expect( + Locale.parse('en-u-attr').hashCode, + isNot(Locale.parse('en-u-nu-roman').hashCode), + ); + + expect( + Locale.parse('en-u-kb'), + Locale.parse('en-u-kb-true'), + reason: '-u-kb should parse to the same result as -u-kb-true.', + ); + expect( + Locale.parse('en-u-kb').hashCode, + Locale.parse('en-u-kb-true').hashCode, + reason: '-u-kb should parse to the same result as -u-kb-true.', + ); + + expect( + Locale.parse('en-t-hi'), + isNot(Locale.parse('en-t-hi-h0-hybrid')), + ); + expect( + Locale.parse('en-t-hi').hashCode, + isNot(Locale.parse('en-t-hi-h0-hybrid').hashCode), + ); + }); + }); }