From 4b2c1525e3a0bf6bf5096994e0da3a192363feca Mon Sep 17 00:00:00 2001 From: Hugo van der Merwe Date: Thu, 27 Sep 2018 12:25:38 +0200 Subject: [PATCH 01/20] Locale class: toLanguageTag(), parse() factory constructor. Implements Unicode BCP47 Locale Identifiers with syntax checking and normalization. Provides iterator access to variants, does not yet provide access to extensions (other than via toLanguageTag()). Unit tests showing behaviour, including highlighting idiosyncratic behaviour. --- lib/ui/window.dart | 456 ++++++++++++++++++++++++++++++++-- testing/dart/locale_test.dart | 246 +++++++++++++++++- 2 files changed, 671 insertions(+), 31 deletions(-) diff --git a/lib/ui/window.dart b/lib/ui/window.dart index 0b1318c997aaf..3aa8bdbc36564 100644 --- a/lib/ui/window.dart +++ b/lib/ui/window.dart @@ -116,9 +116,19 @@ class WindowPadding { } } -/// An identifier used to select a user's language and formatting preferences, -/// consisting of a language and a country. This is a subset of locale -/// identifiers as defined by BCP 47. +class LocaleParseException implements Exception { + LocaleParseException(this._message); + String _message; +} + +/// An identifier used to select a user's language and formatting preferences. +/// This implements Unicode Locale Identifiers as defined by [Unicode +/// LDML](https://www.unicode.org/reports/tr35/). +/// +/// When constructed correctly, instances of this Locale class will produce +/// normalized syntactically valid output, although not necessarily valid (tags +/// are not validated). See constructor and factory method documentation for +/// details. /// /// Locales are canonicalized according to the "preferred value" entries in the /// [IANA Language Subtag @@ -127,6 +137,8 @@ class WindowPadding { /// both have the [languageCode] `he`, because `iw` is a deprecated language /// subtag that was replaced by the subtag `he`. /// +/// FIXME/WIP: switch to using CLDR directly instead of the IANA registry? +/// /// See also: /// /// * [Window.locale], which specifies the system's currently selected @@ -143,14 +155,229 @@ class Locale { /// ``` /// /// The primary language subtag must not be null. The region subtag is - /// optional. - /// - /// The values are _case sensitive_, and should match the case of the relevant - /// subtags in the [IANA Language Subtag - /// Registry](https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry). - /// Typically this means the primary language subtag should be lowercase and - /// the region subtag should be uppercase. - const Locale(this._languageCode, [ this._countryCode ]) : assert(_languageCode != null); + /// optional. The values are _case sensitive_, and must match the values + /// listed in + /// http://unicode.org/repos/cldr/tags/latest/common/validity/language.xml and + /// http://unicode.org/repos/cldr/tags/latest/common/validity/region.xml. + /// + /// This method only produces standards-compliant instances if valid language + /// and country codes are provided. Deprecated subtags will be replaced, but + /// incorrectly cased strings are not corrected. + const Locale( + this._languageCode, [ + this._countryCode, + ]) : assert(_languageCode != null), + this._scriptCode = null, + this._variants = null, + this._extensions = null; + + /// Creates a new Locale object with the specified parts. + /// + /// This is for internal use only. All fields must already be normalized, must + /// already be canonicalized. This method does not modify parameters in any + /// way or do any syntax checking. + /// + /// * language, script and region must be in canonical form. + /// * iterating over variants must provide variants in alphabetical order. + /// * The extensions map must contain only valid key/value pairs. "u" and "t" + /// keys must be present, with an empty string as value, if there are any + /// subtags for those singletons. + Locale._internal( + String language, { + String script, + String region, + collection.SplayTreeSet variants, + collection.SplayTreeMap extensions, + }) : assert(language != null), + assert(language.length >= 2), + assert(language.length <= 8), + assert(language.length != 4), + assert((script ?? "Xxxx").length == 4), + assert((region ?? "XX").length >= 2), + assert((region ?? "XX").length <= 3), + _languageCode = language, + _scriptCode = script, + _countryCode = region, + _variants = collection.LinkedHashSet.from(variants ?? []), + _extensions = collection.LinkedHashMap.from(extensions ?? {}); + + /// Parses [Unicode Locale + /// Identifiers](https://www.unicode.org/reports/tr35/#Identifiers). + /// + /// This method does not parse all BCP 47 tags. See [BCP 47 + /// Conformance](https://www.unicode.org/reports/tr35/#BCP_47_Conformance) for + /// details. + factory Locale.parse(String localeId) { + assert(localeId != null); + localeId = localeId.toLowerCase(); + if (localeId == 'root') + return Locale._internal('und'); + + List locale_subtags = localeId.split(_re_sep); + String language, script, region; + var variants = collection.SplayTreeSet(); + // Using a SplayTreeMap for its automatic key sorting. + var extensions = collection.SplayTreeMap(); + + List problems = []; + if (_re_language.hasMatch(locale_subtags[0])) { + language = _replaceDeprecatedLanguageSubtag(locale_subtags.removeAt(0)); + } else if (_re_script.hasMatch(locale_subtags[0])) { + // Identifiers without language subtags aren't valid BCP 47 tags and + // therefore not intended for general interchange, however they do match + // the LDML spec. + language = 'und'; + } else { + problems.add('"${locale_subtags[0]}" is an invalid language subtag'); + } + if (locale_subtags.length > 0 && _re_script.hasMatch(locale_subtags[0])) { + script = _capitalize(locale_subtags.removeAt(0)); + } + if (locale_subtags.length > 0 && _re_region.hasMatch(locale_subtags[0])) { + region = locale_subtags.removeAt(0).toUpperCase(); + } + while (locale_subtags.length > 0 && _re_variant.hasMatch(locale_subtags[0])) { + variants.add(locale_subtags.removeAt(0)); + } + + // Now we should be into extension territory, locale_subtags[0] should be a singleton. + if (locale_subtags.length > 0 && locale_subtags[0].length > 1) { + List mismatched = []; + if (variants.length == 0) { + if (region == null) { + if (script == null) { + mismatched.add("script"); + } + mismatched.add("region"); + } + mismatched.add("variant"); + } + problems.add('unrecognised subtag "${locale_subtags[0]}": is not a ' + '${mismatched.join(", ")}'); + } + _ParseExtensions(locale_subtags, extensions, problems); + + if (problems.length > 0) + throw LocaleParseException('Locale Identifier $localeId is invalid: ' + '${problems.join("; ")}.'); + + return Locale._internal(language, + script: script, + region: region, + variants: variants, + extensions: extensions); + } + + /// * All subtags in locale_subtags must already be lowercase. + /// + /// * extensions must be a map with sorted iteration order, SplayTreeMap takes + /// care of that for us. + static void _ParseExtensions(List locale_subtags, + collection.SplayTreeMap extensions, + List problems) { + while (locale_subtags.length > 0) { + String singleton = locale_subtags.removeAt(0); + if (singleton == 'u') { + bool empty = true; + // unicode_locale_extensions: collect "(sep attribute)+" attributes. + var attributes = List(); + while (locale_subtags.length > 0 && + _re_value_subtags.hasMatch(locale_subtags[0])) { + attributes.add(locale_subtags.removeAt(0)); + } + if (attributes.length > 0) { + empty = false; + } + if (!extensions.containsKey(singleton)) { + extensions[singleton] = attributes.join('-'); + } else { + problems.add('duplicate singleton: "${singleton}"'); + } + // unicode_locale_extensions: collect "(sep keyword)*". + while (locale_subtags.length > 0 && + _re_key.hasMatch(locale_subtags[0])) { + empty = false; + String key = locale_subtags.removeAt(0); + var type_parts = List(); + while (locale_subtags.length > 0 && + _re_value_subtags.hasMatch(locale_subtags[0])) { + type_parts.add(locale_subtags.removeAt(0)); + } + if (!extensions.containsKey(key)) { + extensions[key] = type_parts.join('-'); + } else { + problems.add('duplicate key: ${key}'); + } + } + if (empty) { + problems.add('empty singleton: ${singleton}'); + } + } else if (singleton == 't') { + bool empty = true; + // transformed_extensions: grab tlang if it exists. + var tlang = List(); + if (locale_subtags.length > 0 && _re_language.hasMatch(locale_subtags[0])) { + empty = false; + tlang.add(locale_subtags.removeAt(0)); + if (locale_subtags.length > 0 && _re_script.hasMatch(locale_subtags[0])) + tlang.add(locale_subtags.removeAt(0)); + if (locale_subtags.length > 0 && _re_region.hasMatch(locale_subtags[0])) + tlang.add(locale_subtags.removeAt(0)); + while (locale_subtags.length > 0 && _re_variant.hasMatch(locale_subtags[0])) { + tlang.add(locale_subtags.removeAt(0)); + } + } + if (!extensions.containsKey(singleton)) { + extensions[singleton] = tlang.join('-'); + } else { + problems.add('duplicate singleton: "${singleton}"'); + } + // transformed_extensions: collect "(sep tfield)*". + while (locale_subtags.length > 0 && _re_tkey.hasMatch(locale_subtags[0])) { + String tkey = locale_subtags.removeAt(0); + var tvalue_parts = List(); + while (locale_subtags.length > 0 && _re_value_subtags.hasMatch(locale_subtags[0])) { + tvalue_parts.add(locale_subtags.removeAt(0)); + } + if (tvalue_parts.length > 0) { + empty = false; + if (!extensions.containsKey(tkey)) { + extensions[tkey] = tvalue_parts.join('-'); + } else { + problems.add('duplicate key: ${tkey}'); + } + } + } + if (empty) { + problems.add('empty singleton: ${singleton}'); + } + } else if (singleton == 'x') { + // pu_extensions + var values = List(); + while (locale_subtags.length > 0 && _re_all_subtags.hasMatch(locale_subtags[0])) { + values.add(locale_subtags.removeAt(0)); + } + extensions[singleton] = values.join('-'); + if (locale_subtags.length > 0) { + problems.add('invalid part of private use subtags: "${locale_subtags.join('-')}"'); + } + break; + } else if (_re_singleton.hasMatch(singleton)) { + // other_extensions + var values = List(); + while (locale_subtags.length > 0 && _re_other_subtags.hasMatch(locale_subtags[0])) { + values.add(locale_subtags.removeAt(0)); + } + if (!extensions.containsKey(singleton)) { + extensions[singleton] = values.join('-'); + } else { + problems.add('duplicate singleton: "${singleton}"'); + } + } else { + problems.add('invalid subtag, should be singleton: "${singleton}"'); + } + } + } /// The primary language subtag for the locale. /// @@ -166,10 +393,15 @@ class Locale { /// Locale('he')` and `const Locale('iw')` are equal, and both have the /// [languageCode] `he`, because `iw` is a deprecated language subtag that was /// replaced by the subtag `he`. - String get languageCode => _canonicalizeLanguageCode(_languageCode); + String get languageCode => _replaceDeprecatedLanguageSubtag(_languageCode); final String _languageCode; - static String _canonicalizeLanguageCode(String languageCode) { + /// Replaces deprecated language subtags. + /// + /// The subtag must already be lowercase. + /// + /// This method's switch statement periodically needs a manual update. + static String _replaceDeprecatedLanguageSubtag(String languageCode) { // This switch statement is generated by //flutter/tools/gen_locale.dart // Mappings generated for language subtag registry as of 2018-08-08. switch (languageCode) { @@ -255,6 +487,13 @@ class Locale { } } + String get scriptCode => _scriptCode; + final String _scriptCode; + + // ASCII only. (TODO: Check first if already valid). + static String _capitalize(String word) => + word.substring(0, 1).toUpperCase() + word.substring(1).toLowerCase(); + /// The region subtag for the locale. /// /// This can be null. @@ -269,10 +508,15 @@ class Locale { /// 'DE')` and `const Locale('de', 'DD')` are equal, and both have the /// [countryCode] `DE`, because `DD` is a deprecated language subtag that was /// replaced by the subtag `DE`. - String get countryCode => _canonicalizeRegionCode(_countryCode); + String get countryCode => _replaceDeprecatedRegionSubtag(_countryCode); final String _countryCode; - static String _canonicalizeRegionCode(String regionCode) { + /// Replaces deprecated region subtags. + /// + /// The subtag must already be uppercase. + /// + /// This method's switch statement periodically needs a manual update. + static String _replaceDeprecatedRegionSubtag(String regionCode) { // This switch statement is generated by //flutter/tools/gen_locale.dart // Mappings generated for language subtag registry as of 2018-08-08. switch (regionCode) { @@ -286,6 +530,158 @@ class Locale { } } + /// Unicode language variant codes. + /// + /// This iterable provides variants normalized to lowercase and in sorted + /// order, as per the Unicode LDML specification. + Iterable get variants => _variants ?? []; + + // The private _variants field must have variants in lowercase and already + // sorted: constructors must construct it as such. + final collection.LinkedHashSet _variants; + + // A map representing all Locale Identifier extensions. + // + // Keys in this ordered map must be sorted, and both keys and values must all + // be lowercase: constructors must construct it as such. + // + // This map simultaneously reprents T-extensions, U-extensions, other + // extensions and the private use extensions. Implementation detailsf: + // + // * The 't' entry represents the optional "tlang" identifier of the T + // Extension. If the T Extension is present but has no tlang value, the 't' + // map entry's value must be an empty string. + // * The 'u' entry represents the optional attributes of the U Extension. They + // must be sorted in alphabetical order, separated by hyphens, and be + // lowercase. If the U Extension is present but has no attributes, the 'u' + // map entry's value must be an empty string. + // * U-Extension keyword keys, matching _re_key, and T-Extension fields in the + // map, whos field separator subtags match _re_tkey, are directly entered + // into this map. (These regular expressions don't match the same keys.) + // * Other singletons are entered directly into the map, with all + // values/attributes associated with that singleton as the map entry's + // value. + final collection.LinkedHashMap _extensions; + + /// Produces the Unicode BCP47 Locale Identifier for this locale. + /// + /// If the const constructor was used with bad parameters, the result might + /// not be standards-compliant. + String toLanguageTag() { + var out = StringBuffer(languageCode); + if (scriptCode != null) { + out.write('-$scriptCode'); + } + if (_countryCode != null && _countryCode != '') { + out.write('-$countryCode'); + } + for (String v in variants) { + out.write('-$v'); + } + if (_extensions != null && _extensions.isNotEmpty) { + out.write(_ExtensionsToString(_extensions)); + } + return out.toString(); + } + + /// Formats the extension map into a partial Unicode Locale Identifier. + /// + /// This covers everything after the unicode_language_id. + static String _ExtensionsToString( + collection.LinkedHashMap extensions) { + String u_attr; + String t_attr; + var u_out = StringBuffer(); + var t_out = StringBuffer(); + var result = StringBuffer(); + var result_vwyzx = StringBuffer(); + + for (MapEntry entry in extensions.entries) { + if (entry.key.length == 1) { + if (RegExp(r'^[a-s]$').hasMatch(entry.key)) { + result.write('-${entry.key}'); + if (entry.value.length > 0) + result.write('-${entry.value}'); + } else if (entry.key == 't') { + t_attr = entry.value; + } else if (entry.key == 'u') { + u_attr = entry.value; + } else if (RegExp(r'^[vwyz]$').hasMatch(entry.key)) { + result_vwyzx.write('-${entry.key}'); + if (entry.value.length > 0) + result_vwyzx.write('-${entry.value}'); + } else if (entry.key != 'x') { + throw UnimplementedError( + 'Extension not supported/recognised: $entry.'); + } + } else if (_re_key.hasMatch(entry.key)) { + // unicode_locale_extensions + if (entry.value == 'true' || entry.value == '') { + u_out.write('-${entry.key}'); + } else { + u_out.write('-${entry.key}-${entry.value}'); + } + } else if (_re_tkey.hasMatch(entry.key)) { + // transformed_extensions + t_out.write('-${entry.key}'); + // FIXME: this is not standards compliant. What do we want to do with + // this case? Drop entry.key like we drop empty t and u singletons? + // Throw an exception probably. + if (entry.value.length > 0) + t_out.write('-${entry.value}'); + } else { + throw UnimplementedError( + 'Extension not supported/recognised: $entry.'); + } + } + if (t_attr != null || t_out.length > 0) { + result.write('-t'); + if (t_attr != null) + result.write('-$t_attr'); + result.write(t_out.toString()); + } + if (u_attr != null || u_out.length > 0) { + result.write('-u'); + if (u_attr != null && u_attr.length > 0) + result.write('-$u_attr'); + result.write(u_out.toString()); + } + if (result_vwyzx.length > 0) + result.write(result_vwyzx.toString()); + if (extensions.containsKey('x')) { + result.write('-x'); + if (extensions['x'].length > 0) { + result.write('-${extensions["x"]}'); + } + } + return result.toString(); + } + + // Unicode Language Identifier subtags + // TODO/WIP: because we lowercase Locale Identifiers before parsing, typical + // use of these regexps don't actually need to atch capitals too. + static final _re_singleton = RegExp(r'^[a-zA-Z]$'); + + // (https://www.unicode.org/reports/tr35/#Unicode_language_identifier). + static final _re_language = RegExp(r'^[a-zA-Z]{2,3}$|^[a-zA-Z]{5,8}$'); + static final _re_script = RegExp(r'^[a-zA-Z]{4}$'); + static final _re_region = RegExp(r'^[a-zA-Z]{2}$|^[0-9]{3}$'); + static final _re_variant = RegExp(r'^[a-zA-Z0-9]{5,8}$|^[0-9][a-zA-Z0-9]{3}$'); + static final _re_sep = RegExp(r'[-_]'); + + // Covers all subtags possible in Unicode Locale Identifiers, used for + // pu_extensions. + static final _re_all_subtags = RegExp(r'^[a-zA-Z0-9]{1,8}$'); + // Covers all subtags within a particular extension, used for other_extensions. + static final _re_other_subtags = RegExp(r'^[a-zA-Z0-9]{2,8}$'); + // Covers "attribute" and "type" from unicode_locale_extensions, and "tvalue" in + // transformed_extensions. + // (https://www.unicode.org/reports/tr35/#Unicode_locale_identifier). + static final _re_value_subtags = RegExp(r'^[a-zA-Z0-9]{3,8}$'); + + static final _re_key = RegExp('^[a-zA-Z0-9][a-zA-Z]\$'); + static final _re_tkey = RegExp('^[a-zA-Z][0-9]\$'); + @override bool operator ==(dynamic other) { if (identical(this, other)) @@ -293,24 +689,33 @@ class Locale { if (other is! Locale) return false; final Locale typedOther = other; - return languageCode == typedOther.languageCode - && countryCode == typedOther.countryCode; + + // TODO: improve efficiency of this, toLanguageTag() is too expensive. + // Comparing Sets and Maps requires reimplementing functionality in + // package:collection. + return this.toLanguageTag() == typedOther.toLanguageTag() + // toLanguageTag() cannot represent zero-length string as country-code. + && this.countryCode == typedOther.countryCode; } @override int get hashCode { - int result = 373; - result = 37 * result + languageCode.hashCode; - if (_countryCode != null) - result = 37 * result + countryCode.hashCode; - return result; + return this.toLanguageTag().hashCode + + 373 * this.countryCode.hashCode; } + /// Produces a non-BCP47 Unicode Locale Identifier for this locale. + /// + /// This Locale Identifier uses underscores as separator for historical + /// reasons. Use toLanguageTag() instead, it produces a Unicode BCP47 Locale + /// Identifier as recommended for general interchange. @override String toString() { - if (_countryCode == null) - return languageCode; - return '${languageCode}_$countryCode'; + if (_countryCode == '') { + // Not standards-compliant, but kept for legacy reasons. + return '${languageCode}_'; + } + return toLanguageTag().replaceAll('-', '_'); } } @@ -708,6 +1113,7 @@ class Window { if (error != null) throw new Exception(error); } + String _sendPlatformMessage(String name, PlatformMessageResponseCallback callback, ByteData data) native 'Window_sendPlatformMessage'; diff --git a/testing/dart/locale_test.dart b/testing/dart/locale_test.dart index 60a7bf99fe2af..0ea45c396cc3b 100644 --- a/testing/dart/locale_test.dart +++ b/testing/dart/locale_test.dart @@ -12,11 +12,245 @@ void main() { expect(const Locale('en').toString(), 'en'); expect(const Locale('en'), new Locale('en', $null)); expect(const Locale('en').hashCode, new Locale('en', $null).hashCode); - expect(const Locale('en'), isNot(new Locale('en', ''))); - expect(const Locale('en').hashCode, isNot(new Locale('en', '').hashCode)); - expect(const Locale('en', 'US').toString(), 'en_US'); - expect(const Locale('iw').toString(), 'he'); - expect(const Locale('iw', 'DD').toString(), 'he_DE'); - expect(const Locale('iw', 'DD'), const Locale('he', 'DE')); + + expect(const Locale('en'), isNot(new Locale('en', '')), + reason: 'Legacy. (The semantic difference between Locale("en") and ' + 'Locale("en", "") is not defined.)'); + expect(const Locale('en').hashCode, isNot(new Locale('en', '').hashCode), + reason: 'Legacy. (The semantic difference between Locale("en") and ' + 'Locale("en", "") is not defined.)'); + expect(const Locale('en', 'US').toString(), 'en_US', + reason: 'Legacy. en_US is a valid Unicode Locale Identifier, but ' + 'not a valid Unicode BCP47 Locale Identifier.'); + expect(const Locale('en', 'US').toLanguageTag(), 'en-US', + reason: 'Unicode BCP47 Locale Identifier, as recommended for general ' + 'interchange.'); + + expect(const Locale('iw').toString(), 'he', + reason: 'The language code for Hebrew was officially changed in 1989.'); + expect(const Locale('iw', 'DD').toString(), 'he_DE', + reason: 'Legacy. This is a valid Unicode Locale Identifier, even if ' + 'not a valid Unicode BCP47 Locale Identifier'); + expect(const Locale('iw', 'DD'), const Locale('he', 'DE'), + reason: 'The German Democratic Republic ceased to exist in ' + 'October 1990.'); + }); + + test('Locale unnamed constructor idiosyncrasies', () { + expect(() => Locale.parse(Locale('en', '').toString()), throwsException, + reason: 'Locale("en", "").toString() produces "en-" which is not ' + 'standards-compliant.'); + + expect(Locale.parse(Locale('en', '').toLanguageTag()), Locale('en'), + reason: 'There is no standards-compliant way for toLanguageTag() to ' + 'represent a zero-length region code.'); + expect(Locale.parse(Locale('en', '').toLanguageTag()), Locale('en'), + reason: 'There is no standards-compliant way for toLanguageTag() to ' + 'represent a zero-length region code.'); + + expect(Locale('abcd').toLanguageTag(), 'abcd', + reason: 'Locale("abcd") is not following instructions in the API ' + 'documentation, so produces standards-uncompliant output.'); + expect(Locale.parse('abcd').toLanguageTag(), 'und-Abcd', reason: ''); + expect(Locale('abcd'), isNot(Locale.parse('abcd')), reason: ''); + expect(() => Locale.parse(Locale('a').toLanguageTag()), throwsException, + reason: 'Locale("abcd") is not following instructions in the API ' + 'documentation, so produces standards-uncompliant output.'); + expect(Locale.parse(Locale('EN').toLanguageTag()).languageCode, 'en', + reason: 'Locale.parse does standards-compliant normalization, whereas ' + 'Locale("EN") is incorrect usage of the API as per API ' + 'documentation.'); + + // Syntax is correct. Without validating against CLDR supplemental + // data, this looks like da-u-nu-true. + expect(Locale.parse('da-u-nu').toLanguageTag(), equals('da-u-nu'), + reason: 'da-u-nu syntax is correct, this looks like "da-u-nu-true". ' + 'Only validation against CLDR validity data would show "true" is ' + 'not a valid value for "nu".'); + }); + + group('Locale.parse():', () { + test('languageCode.', () { + expect(Locale.parse('IW').languageCode, 'he', + reason: "Case insensitive to input."); + expect(Locale.parse('Fil').languageCode, 'fil', + reason: "3-character language codes: Filipino is not Finnish."); + expect(Locale.parse('abcde').languageCode, 'abcde', + reason: 'The spec provides for language codes with 5 to 8 ' + 'characters, though as of 2018, there aren\'t any valid tags ' + 'this long.'); + expect(Locale.parse('ROOT').languageCode, 'und', + reason: 'BCP 47 Language Tag Conversion: ' + 'replace "root" with "und", and case insensitive.'); + }); + + test('scriptCode.', () { + expect(Locale.parse('af_latn').scriptCode, 'Latn'); + expect(Locale.parse('zh_HANT-CN').scriptCode, 'Hant'); + expect(Locale.parse('sw-TZ').scriptCode, null); + }); + + test('countryCode.', () { + expect(Locale.parse('ar-Arab-EG').countryCode, 'EG'); + expect(Locale.parse('en-GB_scouse_fonipa').countryCode, 'GB'); + }); + + test('variants.', () { + // Liverpool English, script variant: International Phonetic Alphabet. + // (fonipa is a Latn variant, so only Latn really makes sense as script) + expect(Locale.parse('en_scouse_fonipa').variants, + orderedEquals(['fonipa', 'scouse']), + reason: 'Variants should be sorted alphabetically.'); + expect(Locale.parse('de-1996').variants, orderedEquals(['1996'])); + expect(Locale.parse('ja_Jpan').variants, orderedEquals([]), + reason: 'No variants represented by zero-length Iterable.'); + }); + + test('Locale Identifiers with extensions.', () { + expect(Locale.parse('nl-u-attr2-attr1').toLanguageTag(), + equals('nl-u-attr2-attr1'), + reason: '-u- attributes are ordered, do not sort.'); + + expect(Locale.parse('ar-u-ca-islamic-civil').toLanguageTag(), + equals('ar-u-ca-islamic-civil'), + reason: '-u- attributes are ordered, do not sort.'); + + expect(Locale.parse('RU-T-en-Cyrl-GB-H0-HYBRID').toLanguageTag(), + equals('ru-t-en-cyrl-gb-h0-hybrid'), + reason: 'Language identifiers in t-extensions are also lowercase.'); + + expect(Locale.parse('tr-x-foo-t-hi-h0-hybrid-u-nu').toLanguageTag(), + equals('tr-x-foo-t-hi-h0-hybrid-u-nu'), + reason: 'Everything after -x- belong to the -x- private use ' + 'extension.'); + + expect(Locale.parse('pl-b-h0-hybrid-nu-roman').toLanguageTag(), + equals('pl-b-h0-hybrid-nu-roman'), + reason: 'What looks like tkey and key subtags appearing under ' + 'singletons other than -t- and -u- belong to those singletons.'); + + expect(Locale.parse('ca-u-kb-true').toLanguageTag(), equals('ca-u-kb'), + reason: 'For true/false keys like kb, true can be ommitted.'); + expect(Locale.parse('ca-u-kb').toLanguageTag(), equals('ca-u-kb'), + reason: 'For true/false keys like kb, true can be ommitted.'); + // TODO: when adding accessor for u-kb: it should return 'true'. + + expect( + Locale.parse('En-LATN-Gb-SCOUSE-FONIPA' + '-U-ATTR2-ATTR1-ca-islamic-civil-nu-thai' + '-A-BC' + '-z-fo-xyzzy' + '-T-HI-H0-hybrid-m0-UNGEGN' + '-x-u-ab') + .toLanguageTag(), + equals('en-Latn-GB-fonipa-scouse' + '-a-bc' + '-t-hi-h0-hybrid-m0-ungegn' + '-u-attr2-attr1-ca-islamic-civil-nu-thai' + '-z-fo-xyzzy' + '-x-u-ab')); + }); + + // These examples are not spec compliant, although we could have been more + // lenient. When choosing to permit uncompliant identifiers, we would need + // to decide "how lenient?", so we prefer to draw the most obvious line by + // being strict. + // TODO: maybe have the exception contain what's necessary to make a "best + // effort" locale? + test('Strict parsing examples.', () { + expect(() => Locale.parse('nl-u-x'), + throwsA(TypeMatcher()), + reason: 'With no "u" attributes there should be no "u" singleton. ' + 'Could be lenient: "nl-x".'); + expect(() => Locale.parse('fr-t-x'), + throwsA(TypeMatcher()), + reason: 'With no "t" attributes there should be no "t" singleton. ' + 'Could be lenient: "fr-x".'); + expect(() => Locale.parse('it-u-nu-romanlow-a-u-attr'), + throwsA(TypeMatcher()), + reason: 'Duplicate "u" singletons could be merged if only one has ' + 'attributes. Could be lenient: "it-a-u-attr-nu-romanlow".'); + expect(() => Locale.parse('pl-t-cs-b-t-h0-hybrid'), + throwsA(TypeMatcher()), + reason: 'Duplicate "t" singletons could be merged if only one has ' + 'tlang specified. Could be lenient: "pl-b-t-cs-h0-hybrid".'); + + expect(() => Locale.parse('ro-t-hu-HU-cu-ron'), + throwsA(TypeMatcher()), + reason: 'U-extension keywords misplaced under the -t- singleton ' + 'could be moved if unambiguous enough. ' + 'Could be lenient: "ro-t-hu-hu-u-cu-ron".'); + // TODO: any point to this test? It's a counter-example of the previous + // "lenient parsing" idea. + expect( + Locale.parse('ro-t-nu-cyrl').toLanguageTag(), equals('ro-t-nu-cyrl'), + reason: 'This cannot be interpreted as a misplaced -u- keyword. ' + 'It looks like a tlang tag: "nu-Cyrl", '); + + expect(() => Locale.parse('pt-BR-u-h0-hybrid-t-pt-PT'), + throwsA(TypeMatcher()), + reason: 'T-extension "tfields" misplaced under the U-extension. ' + 'Could be lenient: "pt-BR-t-pt-pt-h0-hybrid".'); + + expect(() => Locale.parse('pl-t-h0'), + throwsA(TypeMatcher()), + reason: + 'Locale tag pl-t-h0 is not spec compliant. How to best fix it ' + 'is unclear: it is underspecified.'); + }); + + test('Locale.parse(): invalid identifiers.', () { + expect( + () => Locale.parse('a'), throwsA(TypeMatcher()), + reason: 'One character language subtags are invalid.'); + expect(Locale.parse('abcd').languageCode, 'und', + reason: 'Special-use corner case from the specification: ' + 'language subtag can be skipped if a script is specified.'); + expect(Locale.parse('abcd').scriptCode, 'Abcd', + reason: 'Special-use corner case from the specification: ' + 'language subtag can be skipped if a script is specified.'); + expect(() => Locale.parse('abcdefghi'), + throwsA(TypeMatcher()), + reason: 'Language subtags may not be more than 8 characters.'); + expect(() => Locale.parse(r'e$'), + throwsA(TypeMatcher()), + reason: 'Invalid character for language subtag, only a-z allowed.'); + expect(() => Locale.parse('fr-RU-Hant'), + throwsA(TypeMatcher()), + reason: 'Swapping region and script is not allowed.'); + }); + }); + + test('Equality.', () { + expect(Locale.parse('en'), isNot(Locale.parse('en-Latn'))); + expect( + Locale.parse('en').hashCode, isNot(Locale.parse('en-Latn').hashCode)); + + expect(Locale.parse('en'), isNot(Locale.parse('en-US'))); + expect(Locale.parse('en').hashCode, isNot(Locale.parse('en-US').hashCode)); + + expect(Locale.parse('en'), isNot(Locale.parse('en-fonipa'))); + expect( + Locale.parse('en').hashCode, isNot(Locale.parse('en-fonipa').hashCode)); + + expect(Locale.parse('en'), isNot(Locale.parse('en-a'))); + expect(Locale.parse('en').hashCode, isNot(Locale.parse('en-a').hashCode)); + + expect(Locale.parse('en'), isNot(Locale.parse('en-a'))); + expect(Locale.parse('en').hashCode, isNot(Locale.parse('en-a').hashCode)); + + expect(Locale.parse('en-u-attr'), isNot(Locale.parse('en-u-nu-roman'))); + expect(Locale.parse('en-u-attr').hashCode, + isNot(Locale.parse('en-u-nu-roman').hashCode)); + + expect(Locale.parse('en-u-kb'), Locale.parse('en-u-kb-true'), + reason: '-u-kb should parse to the same result as -u-kb-true.'); + expect( + Locale.parse('en-u-kb').hashCode, Locale.parse('en-u-kb-true').hashCode, + reason: '-u-kb should parse to the same result as -u-kb-true.'); + + expect(Locale.parse('en-t-hi'), isNot(Locale.parse('en-t-hi-h0-hybrid'))); + expect(Locale.parse('en-t-hi').hashCode, + isNot(Locale.parse('en-t-hi-h0-hybrid').hashCode)); }); } From 9b5585e9f23d9a62dc4b1a35d5d01b772a263e78 Mon Sep 17 00:00:00 2001 From: Hugo van der Merwe Date: Tue, 9 Oct 2018 21:34:35 +0200 Subject: [PATCH 02/20] Style improvements. Mention Unicode supplementalMetadata.xml in docs. --- lib/ui/window.dart | 54 ++++++++++++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 21 deletions(-) diff --git a/lib/ui/window.dart b/lib/ui/window.dart index 3aa8bdbc36564..cd4b9570afd3d 100644 --- a/lib/ui/window.dart +++ b/lib/ui/window.dart @@ -137,7 +137,9 @@ class LocaleParseException implements Exception { /// both have the [languageCode] `he`, because `iw` is a deprecated language /// subtag that was replaced by the subtag `he`. /// -/// FIXME/WIP: switch to using CLDR directly instead of the IANA registry? +/// TODO: evalute using CLDR instead of the IANA registry, and determine whether +/// there are more replacement reasons that should be grabbed: +/// https://www.unicode.org/repos/cldr/tags/latest/common/supplemental/supplementalMetadata.xml /// /// See also: /// @@ -166,10 +168,10 @@ class Locale { const Locale( this._languageCode, [ this._countryCode, - ]) : assert(_languageCode != null), - this._scriptCode = null, - this._variants = null, - this._extensions = null; + ]) : assert(_languageCode != null), + this._scriptCode = null, + this._variants = null, + this._extensions = null; /// Creates a new Locale object with the specified parts. /// @@ -207,6 +209,9 @@ class Locale { /// This method does not parse all BCP 47 tags. See [BCP 47 /// Conformance](https://www.unicode.org/reports/tr35/#BCP_47_Conformance) for /// details. + /// + /// TODO: we should try to support parsing all BCP 47 tags, even if we produce + /// only Unicode BCP47 Locale Identifiers as output. factory Locale.parse(String localeId) { assert(localeId != null); localeId = localeId.toLowerCase(); @@ -268,10 +273,10 @@ class Locale { extensions: extensions); } - /// * All subtags in locale_subtags must already be lowercase. - /// - /// * extensions must be a map with sorted iteration order, SplayTreeMap takes - /// care of that for us. + // * All subtags in locale_subtags must already be lowercase. + // + // * extensions must be a map with sorted iteration order, SplayTreeMap takes + // care of that for us. static void _ParseExtensions(List locale_subtags, collection.SplayTreeMap extensions, List problems) { @@ -490,9 +495,11 @@ class Locale { String get scriptCode => _scriptCode; final String _scriptCode; - // ASCII only. (TODO: Check first if already valid). - static String _capitalize(String word) => - word.substring(0, 1).toUpperCase() + word.substring(1).toLowerCase(); + // Uses the language independent Unicode mapping. For use in Locales, we only + // care about ASCII. (TODO: optimize by checking if already valid?) + static String _capitalize(String word) { + return word.substring(0, 1).toUpperCase() + word.substring(1).toLowerCase(); + } /// The region subtag for the locale. /// @@ -624,9 +631,9 @@ class Locale { } else if (_re_tkey.hasMatch(entry.key)) { // transformed_extensions t_out.write('-${entry.key}'); - // FIXME: this is not standards compliant. What do we want to do with + // TODO: this is not standards compliant. What do we want to do with // this case? Drop entry.key like we drop empty t and u singletons? - // Throw an exception probably. + // Or simply ensure we don't ever create such an instance? if (entry.value.length > 0) t_out.write('-${entry.value}'); } else { @@ -690,16 +697,19 @@ class Locale { return false; final Locale typedOther = other; - // TODO: improve efficiency of this, toLanguageTag() is too expensive. + // TODO: improve efficiency of this? // Comparing Sets and Maps requires reimplementing functionality in - // package:collection. + // package:collection, comparing canonical string is simple. return this.toLanguageTag() == typedOther.toLanguageTag() - // toLanguageTag() cannot represent zero-length string as country-code. + // toLanguageTag() cannot represent zero-length string as country-code, + // but we need to distinguish it for backward compatibility reasons. && this.countryCode == typedOther.countryCode; } @override int get hashCode { + // toLanguageTag() cannot represent zero-length string as country-code, + // but we need to distinguish it for backward compatibility reasons. return this.toLanguageTag().hashCode + 373 * this.countryCode.hashCode; } @@ -711,11 +721,13 @@ class Locale { /// Identifier as recommended for general interchange. @override String toString() { - if (_countryCode == '') { - // Not standards-compliant, but kept for legacy reasons. - return '${languageCode}_'; + String identifier = toLanguageTag().replaceAll('-', '_'); + if (_countryCode == '' && identifier == languageCode) { + // Not standards-compliant, but kept for legacy reasons. Only the const + // unnamed constructor should be able to create instances like these. + identifier = '${languageCode}_'; } - return toLanguageTag().replaceAll('-', '_'); + return identifier; } } From 87841128fdad538d3fba002440eed5ce2c2aa8a3 Mon Sep 17 00:00:00 2001 From: Hugo van der Merwe Date: Tue, 9 Oct 2018 23:17:07 +0200 Subject: [PATCH 03/20] Some Lint fixes. --- lib/ui/window.dart | 127 +++++++++++++++++++++++---------------------- 1 file changed, 64 insertions(+), 63 deletions(-) diff --git a/lib/ui/window.dart b/lib/ui/window.dart index cd4b9570afd3d..8d07deafdf586 100644 --- a/lib/ui/window.dart +++ b/lib/ui/window.dart @@ -194,14 +194,14 @@ class Locale { assert(language.length >= 2), assert(language.length <= 8), assert(language.length != 4), - assert((script ?? "Xxxx").length == 4), - assert((region ?? "XX").length >= 2), - assert((region ?? "XX").length <= 3), + assert((script ?? 'Xxxx').length == 4), + assert((region ?? 'XX').length >= 2), + assert((region ?? 'XX').length <= 3), _languageCode = language, _scriptCode = script, _countryCode = region, - _variants = collection.LinkedHashSet.from(variants ?? []), - _extensions = collection.LinkedHashMap.from(extensions ?? {}); + _variants = collection.LinkedHashSet.from(variants ?? []), + _extensions = collection.LinkedHashMap.from(extensions ?? {}); /// Parses [Unicode Locale /// Identifiers](https://www.unicode.org/reports/tr35/#Identifiers). @@ -218,35 +218,36 @@ class Locale { if (localeId == 'root') return Locale._internal('und'); - List locale_subtags = localeId.split(_re_sep); + List localeSubtags = localeId.split(_re_sep); String language, script, region; - var variants = collection.SplayTreeSet(); + String variants = collection.SplayTreeSet(); // Using a SplayTreeMap for its automatic key sorting. - var extensions = collection.SplayTreeMap(); + collection.SplayTreeMap extensions = + collection.SplayTreeMap(); - List problems = []; - if (_re_language.hasMatch(locale_subtags[0])) { - language = _replaceDeprecatedLanguageSubtag(locale_subtags.removeAt(0)); - } else if (_re_script.hasMatch(locale_subtags[0])) { + final List problems = []; + if (_re_language.hasMatch(localeSubtags[0])) { + language = _replaceDeprecatedLanguageSubtag(localeSubtags.removeAt(0)); + } else if (_re_script.hasMatch(localeSubtags[0])) { // Identifiers without language subtags aren't valid BCP 47 tags and // therefore not intended for general interchange, however they do match // the LDML spec. language = 'und'; } else { - problems.add('"${locale_subtags[0]}" is an invalid language subtag'); + problems.add('"${localeSubtags[0]}" is an invalid language subtag'); } - if (locale_subtags.length > 0 && _re_script.hasMatch(locale_subtags[0])) { - script = _capitalize(locale_subtags.removeAt(0)); + if (localeSubtags.isNotEmpty && _re_script.hasMatch(localeSubtags[0])) { + script = _capitalize(localeSubtags.removeAt(0)); } - if (locale_subtags.length > 0 && _re_region.hasMatch(locale_subtags[0])) { - region = locale_subtags.removeAt(0).toUpperCase(); + if (localeSubtags.isNotEmpty && _re_region.hasMatch(localeSubtags[0])) { + region = localeSubtags.removeAt(0).toUpperCase(); } - while (locale_subtags.length > 0 && _re_variant.hasMatch(locale_subtags[0])) { - variants.add(locale_subtags.removeAt(0)); + while (localeSubtags.isNotEmpty && _re_variant.hasMatch(localeSubtags[0])) { + variants.add(localeSubtags.removeAt(0)); } - // Now we should be into extension territory, locale_subtags[0] should be a singleton. - if (locale_subtags.length > 0 && locale_subtags[0].length > 1) { + // Now we should be into extension territory, localeSubtags[0] should be a singleton. + if (localeSubtags.isNotEmpty && localeSubtags[0].length > 1) { List mismatched = []; if (variants.length == 0) { if (region == null) { @@ -257,12 +258,12 @@ class Locale { } mismatched.add("variant"); } - problems.add('unrecognised subtag "${locale_subtags[0]}": is not a ' + problems.add('unrecognised subtag "${localeSubtags[0]}": is not a ' '${mismatched.join(", ")}'); } - _ParseExtensions(locale_subtags, extensions, problems); + _ParseExtensions(localeSubtags, extensions, problems); - if (problems.length > 0) + if (problems.isNotEmpty) throw LocaleParseException('Locale Identifier $localeId is invalid: ' '${problems.join("; ")}.'); @@ -273,24 +274,24 @@ class Locale { extensions: extensions); } - // * All subtags in locale_subtags must already be lowercase. + // * All subtags in localeSubtags must already be lowercase. // // * extensions must be a map with sorted iteration order, SplayTreeMap takes // care of that for us. static void _ParseExtensions(List locale_subtags, collection.SplayTreeMap extensions, List problems) { - while (locale_subtags.length > 0) { + while (locale_subtags.isNotEmpty) { String singleton = locale_subtags.removeAt(0); if (singleton == 'u') { bool empty = true; // unicode_locale_extensions: collect "(sep attribute)+" attributes. - var attributes = List(); - while (locale_subtags.length > 0 && + List attributes = List(); + while (locale_subtags.isNotEmpty && _re_value_subtags.hasMatch(locale_subtags[0])) { attributes.add(locale_subtags.removeAt(0)); } - if (attributes.length > 0) { + if (attributes.isNotEmpty) { empty = false; } if (!extensions.containsKey(singleton)) { @@ -299,12 +300,12 @@ class Locale { problems.add('duplicate singleton: "${singleton}"'); } // unicode_locale_extensions: collect "(sep keyword)*". - while (locale_subtags.length > 0 && + while (locale_subtags.isNotEmpty && _re_key.hasMatch(locale_subtags[0])) { empty = false; String key = locale_subtags.removeAt(0); - var type_parts = List(); - while (locale_subtags.length > 0 && + List type_parts = List(); + while (locale_subtags.isNotEmpty && _re_value_subtags.hasMatch(locale_subtags[0])) { type_parts.add(locale_subtags.removeAt(0)); } @@ -320,15 +321,15 @@ class Locale { } else if (singleton == 't') { bool empty = true; // transformed_extensions: grab tlang if it exists. - var tlang = List(); - if (locale_subtags.length > 0 && _re_language.hasMatch(locale_subtags[0])) { + List tlang = List(); + if (locale_subtags.isNotEmpty && _re_language.hasMatch(locale_subtags[0])) { empty = false; tlang.add(locale_subtags.removeAt(0)); - if (locale_subtags.length > 0 && _re_script.hasMatch(locale_subtags[0])) + if (locale_subtags.isNotEmpty && _re_script.hasMatch(locale_subtags[0])) tlang.add(locale_subtags.removeAt(0)); - if (locale_subtags.length > 0 && _re_region.hasMatch(locale_subtags[0])) + if (locale_subtags.isNotEmpty && _re_region.hasMatch(locale_subtags[0])) tlang.add(locale_subtags.removeAt(0)); - while (locale_subtags.length > 0 && _re_variant.hasMatch(locale_subtags[0])) { + while (locale_subtags.isNotEmpty && _re_variant.hasMatch(locale_subtags[0])) { tlang.add(locale_subtags.removeAt(0)); } } @@ -338,13 +339,13 @@ class Locale { problems.add('duplicate singleton: "${singleton}"'); } // transformed_extensions: collect "(sep tfield)*". - while (locale_subtags.length > 0 && _re_tkey.hasMatch(locale_subtags[0])) { + while (locale_subtags.isNotEmpty && _re_tkey.hasMatch(locale_subtags[0])) { String tkey = locale_subtags.removeAt(0); - var tvalue_parts = List(); - while (locale_subtags.length > 0 && _re_value_subtags.hasMatch(locale_subtags[0])) { + List tvalue_parts = List(); + while (locale_subtags.isNotEmpty && _re_value_subtags.hasMatch(locale_subtags[0])) { tvalue_parts.add(locale_subtags.removeAt(0)); } - if (tvalue_parts.length > 0) { + if (tvalue_parts.isNotEmpty) { empty = false; if (!extensions.containsKey(tkey)) { extensions[tkey] = tvalue_parts.join('-'); @@ -358,19 +359,19 @@ class Locale { } } else if (singleton == 'x') { // pu_extensions - var values = List(); - while (locale_subtags.length > 0 && _re_all_subtags.hasMatch(locale_subtags[0])) { + List values = List(); + while (locale_subtags.isNotEmpty && _re_all_subtags.hasMatch(locale_subtags[0])) { values.add(locale_subtags.removeAt(0)); } extensions[singleton] = values.join('-'); - if (locale_subtags.length > 0) { + if (locale_subtags.isNotEmpty) { problems.add('invalid part of private use subtags: "${locale_subtags.join('-')}"'); } break; } else if (_re_singleton.hasMatch(singleton)) { // other_extensions - var values = List(); - while (locale_subtags.length > 0 && _re_other_subtags.hasMatch(locale_subtags[0])) { + List values = List(); + while (locale_subtags.isNotEmpty && _re_other_subtags.hasMatch(locale_subtags[0])) { values.add(locale_subtags.removeAt(0)); } if (!extensions.containsKey(singleton)) { @@ -575,7 +576,7 @@ class Locale { /// If the const constructor was used with bad parameters, the result might /// not be standards-compliant. String toLanguageTag() { - var out = StringBuffer(languageCode); + StringBuffer out = StringBuffer(languageCode); if (scriptCode != null) { out.write('-$scriptCode'); } @@ -598,16 +599,16 @@ class Locale { collection.LinkedHashMap extensions) { String u_attr; String t_attr; - var u_out = StringBuffer(); - var t_out = StringBuffer(); - var result = StringBuffer(); - var result_vwyzx = StringBuffer(); + StringBuffer u_out = StringBuffer(); + StringBuffer t_out = StringBuffer(); + StringBuffer result = StringBuffer(); + StringBuffer result_vwyzx = StringBuffer(); for (MapEntry entry in extensions.entries) { if (entry.key.length == 1) { if (RegExp(r'^[a-s]$').hasMatch(entry.key)) { result.write('-${entry.key}'); - if (entry.value.length > 0) + if (entry.value.isNotEmpty) result.write('-${entry.value}'); } else if (entry.key == 't') { t_attr = entry.value; @@ -615,7 +616,7 @@ class Locale { u_attr = entry.value; } else if (RegExp(r'^[vwyz]$').hasMatch(entry.key)) { result_vwyzx.write('-${entry.key}'); - if (entry.value.length > 0) + if (entry.value.isNotEmpty) result_vwyzx.write('-${entry.value}'); } else if (entry.key != 'x') { throw UnimplementedError( @@ -634,30 +635,30 @@ class Locale { // TODO: this is not standards compliant. What do we want to do with // this case? Drop entry.key like we drop empty t and u singletons? // Or simply ensure we don't ever create such an instance? - if (entry.value.length > 0) + if (entry.value.isNotEmpty) t_out.write('-${entry.value}'); } else { throw UnimplementedError( 'Extension not supported/recognised: $entry.'); } } - if (t_attr != null || t_out.length > 0) { + if (t_attr != null || t_out.isNotEmpty) { result.write('-t'); if (t_attr != null) result.write('-$t_attr'); result.write(t_out.toString()); } - if (u_attr != null || u_out.length > 0) { + if (u_attr != null || u_out.isNotEmpty) { result.write('-u'); - if (u_attr != null && u_attr.length > 0) + if (u_attr != null && u_attr.isNotEmpty) result.write('-$u_attr'); result.write(u_out.toString()); } - if (result_vwyzx.length > 0) + if (result_vwyzx.isNotEmpty) result.write(result_vwyzx.toString()); if (extensions.containsKey('x')) { result.write('-x'); - if (extensions['x'].length > 0) { + if (extensions['x'].isNotEmpty) { result.write('-${extensions["x"]}'); } } @@ -700,18 +701,18 @@ class Locale { // TODO: improve efficiency of this? // Comparing Sets and Maps requires reimplementing functionality in // package:collection, comparing canonical string is simple. - return this.toLanguageTag() == typedOther.toLanguageTag() + return toLanguageTag() == typedOther.toLanguageTag() // toLanguageTag() cannot represent zero-length string as country-code, // but we need to distinguish it for backward compatibility reasons. - && this.countryCode == typedOther.countryCode; + && countryCode == typedOther.countryCode; } @override int get hashCode { // toLanguageTag() cannot represent zero-length string as country-code, // but we need to distinguish it for backward compatibility reasons. - return this.toLanguageTag().hashCode - + 373 * this.countryCode.hashCode; + return toLanguageTag().hashCode + + 373 * countryCode.hashCode; } /// Produces a non-BCP47 Unicode Locale Identifier for this locale. From d7b948b84d6f2ae28b205c841973ec6f9995c12e Mon Sep 17 00:00:00 2001 From: Hugo van der Merwe Date: Tue, 9 Oct 2018 23:43:34 +0200 Subject: [PATCH 04/20] Second round of lint fixes. --- lib/ui/window.dart | 210 +++++++++++++++++++++++---------------------- 1 file changed, 106 insertions(+), 104 deletions(-) diff --git a/lib/ui/window.dart b/lib/ui/window.dart index 8d07deafdf586..b1afd06621dee 100644 --- a/lib/ui/window.dart +++ b/lib/ui/window.dart @@ -119,6 +119,7 @@ class WindowPadding { class LocaleParseException implements Exception { LocaleParseException(this._message); String _message; + String toString() => 'LocaleParseException: $_message'; } /// An identifier used to select a user's language and formatting preferences. @@ -218,17 +219,18 @@ class Locale { if (localeId == 'root') return Locale._internal('und'); - List localeSubtags = localeId.split(_re_sep); + final List localeSubtags = localeId.split(_reSep); String language, script, region; - String variants = collection.SplayTreeSet(); + final collection.SplayTreeSet variants = + collection.SplayTreeSet(); // Using a SplayTreeMap for its automatic key sorting. - collection.SplayTreeMap extensions = + final collection.SplayTreeMap extensions = collection.SplayTreeMap(); final List problems = []; - if (_re_language.hasMatch(localeSubtags[0])) { + if (_reLanguage.hasMatch(localeSubtags[0])) { language = _replaceDeprecatedLanguageSubtag(localeSubtags.removeAt(0)); - } else if (_re_script.hasMatch(localeSubtags[0])) { + } else if (_reScript.hasMatch(localeSubtags[0])) { // Identifiers without language subtags aren't valid BCP 47 tags and // therefore not intended for general interchange, however they do match // the LDML spec. @@ -236,32 +238,32 @@ class Locale { } else { problems.add('"${localeSubtags[0]}" is an invalid language subtag'); } - if (localeSubtags.isNotEmpty && _re_script.hasMatch(localeSubtags[0])) { + if (localeSubtags.isNotEmpty && _reScript.hasMatch(localeSubtags[0])) { script = _capitalize(localeSubtags.removeAt(0)); } - if (localeSubtags.isNotEmpty && _re_region.hasMatch(localeSubtags[0])) { + if (localeSubtags.isNotEmpty && _reRegion.hasMatch(localeSubtags[0])) { region = localeSubtags.removeAt(0).toUpperCase(); } - while (localeSubtags.isNotEmpty && _re_variant.hasMatch(localeSubtags[0])) { + while (localeSubtags.isNotEmpty && _reVariant.hasMatch(localeSubtags[0])) { variants.add(localeSubtags.removeAt(0)); } // Now we should be into extension territory, localeSubtags[0] should be a singleton. if (localeSubtags.isNotEmpty && localeSubtags[0].length > 1) { - List mismatched = []; - if (variants.length == 0) { + final List mismatched = []; + if (variants.isEmpty) { if (region == null) { if (script == null) { - mismatched.add("script"); + mismatched.add('script'); } - mismatched.add("region"); + mismatched.add('region'); } - mismatched.add("variant"); + mismatched.add('variant'); } problems.add('unrecognised subtag "${localeSubtags[0]}": is not a ' '${mismatched.join(", ")}'); } - _ParseExtensions(localeSubtags, extensions, problems); + _parseExtensions(localeSubtags, extensions, problems); if (problems.isNotEmpty) throw LocaleParseException('Locale Identifier $localeId is invalid: ' @@ -278,18 +280,18 @@ class Locale { // // * extensions must be a map with sorted iteration order, SplayTreeMap takes // care of that for us. - static void _ParseExtensions(List locale_subtags, + static void _parseExtensions(List localeSubtags, collection.SplayTreeMap extensions, List problems) { - while (locale_subtags.isNotEmpty) { - String singleton = locale_subtags.removeAt(0); + while (localeSubtags.isNotEmpty) { + final String singleton = localeSubtags.removeAt(0); if (singleton == 'u') { bool empty = true; // unicode_locale_extensions: collect "(sep attribute)+" attributes. - List attributes = List(); - while (locale_subtags.isNotEmpty && - _re_value_subtags.hasMatch(locale_subtags[0])) { - attributes.add(locale_subtags.removeAt(0)); + final List attributes = []; + while (localeSubtags.isNotEmpty && + _reValueSubtags.hasMatch(localeSubtags[0])) { + attributes.add(localeSubtags.removeAt(0)); } if (attributes.isNotEmpty) { empty = false; @@ -297,90 +299,90 @@ class Locale { if (!extensions.containsKey(singleton)) { extensions[singleton] = attributes.join('-'); } else { - problems.add('duplicate singleton: "${singleton}"'); + problems.add('duplicate singleton: "$singleton"'); } // unicode_locale_extensions: collect "(sep keyword)*". - while (locale_subtags.isNotEmpty && - _re_key.hasMatch(locale_subtags[0])) { + while (localeSubtags.isNotEmpty && + _reKey.hasMatch(localeSubtags[0])) { empty = false; - String key = locale_subtags.removeAt(0); - List type_parts = List(); - while (locale_subtags.isNotEmpty && - _re_value_subtags.hasMatch(locale_subtags[0])) { - type_parts.add(locale_subtags.removeAt(0)); + final String key = localeSubtags.removeAt(0); + final List typeParts = []; + while (localeSubtags.isNotEmpty && + _reValueSubtags.hasMatch(localeSubtags[0])) { + typeParts.add(localeSubtags.removeAt(0)); } if (!extensions.containsKey(key)) { - extensions[key] = type_parts.join('-'); + extensions[key] = typeParts.join('-'); } else { - problems.add('duplicate key: ${key}'); + problems.add('duplicate key: $key'); } } if (empty) { - problems.add('empty singleton: ${singleton}'); + problems.add('empty singleton: $singleton'); } } else if (singleton == 't') { bool empty = true; // transformed_extensions: grab tlang if it exists. - List tlang = List(); - if (locale_subtags.isNotEmpty && _re_language.hasMatch(locale_subtags[0])) { + final List tlang = []; + if (localeSubtags.isNotEmpty && _reLanguage.hasMatch(localeSubtags[0])) { empty = false; - tlang.add(locale_subtags.removeAt(0)); - if (locale_subtags.isNotEmpty && _re_script.hasMatch(locale_subtags[0])) - tlang.add(locale_subtags.removeAt(0)); - if (locale_subtags.isNotEmpty && _re_region.hasMatch(locale_subtags[0])) - tlang.add(locale_subtags.removeAt(0)); - while (locale_subtags.isNotEmpty && _re_variant.hasMatch(locale_subtags[0])) { - tlang.add(locale_subtags.removeAt(0)); + tlang.add(localeSubtags.removeAt(0)); + if (localeSubtags.isNotEmpty && _reScript.hasMatch(localeSubtags[0])) + tlang.add(localeSubtags.removeAt(0)); + if (localeSubtags.isNotEmpty && _reRegion.hasMatch(localeSubtags[0])) + tlang.add(localeSubtags.removeAt(0)); + while (localeSubtags.isNotEmpty && _reVariant.hasMatch(localeSubtags[0])) { + tlang.add(localeSubtags.removeAt(0)); } } if (!extensions.containsKey(singleton)) { extensions[singleton] = tlang.join('-'); } else { - problems.add('duplicate singleton: "${singleton}"'); + problems.add('duplicate singleton: "$singleton"'); } // transformed_extensions: collect "(sep tfield)*". - while (locale_subtags.isNotEmpty && _re_tkey.hasMatch(locale_subtags[0])) { - String tkey = locale_subtags.removeAt(0); - List tvalue_parts = List(); - while (locale_subtags.isNotEmpty && _re_value_subtags.hasMatch(locale_subtags[0])) { - tvalue_parts.add(locale_subtags.removeAt(0)); + while (localeSubtags.isNotEmpty && _reTkey.hasMatch(localeSubtags[0])) { + final String tkey = localeSubtags.removeAt(0); + final List tvalueParts = []; + while (localeSubtags.isNotEmpty && _reValueSubtags.hasMatch(localeSubtags[0])) { + tvalueParts.add(localeSubtags.removeAt(0)); } - if (tvalue_parts.isNotEmpty) { + if (tvalueParts.isNotEmpty) { empty = false; if (!extensions.containsKey(tkey)) { - extensions[tkey] = tvalue_parts.join('-'); + extensions[tkey] = tvalueParts.join('-'); } else { - problems.add('duplicate key: ${tkey}'); + problems.add('duplicate key: $tkey'); } } } if (empty) { - problems.add('empty singleton: ${singleton}'); + problems.add('empty singleton: $singleton'); } } else if (singleton == 'x') { // pu_extensions - List values = List(); - while (locale_subtags.isNotEmpty && _re_all_subtags.hasMatch(locale_subtags[0])) { - values.add(locale_subtags.removeAt(0)); + final List values = []; + while (localeSubtags.isNotEmpty && _reAllSubtags.hasMatch(localeSubtags[0])) { + values.add(localeSubtags.removeAt(0)); } extensions[singleton] = values.join('-'); - if (locale_subtags.isNotEmpty) { - problems.add('invalid part of private use subtags: "${locale_subtags.join('-')}"'); + if (localeSubtags.isNotEmpty) { + problems.add('invalid part of private use subtags: "${localeSubtags.join('-')}"'); } break; } else if (_re_singleton.hasMatch(singleton)) { // other_extensions - List values = List(); - while (locale_subtags.isNotEmpty && _re_other_subtags.hasMatch(locale_subtags[0])) { - values.add(locale_subtags.removeAt(0)); + final List values = []; + while (localeSubtags.isNotEmpty && _reOtherSubtags.hasMatch(localeSubtags[0])) { + values.add(localeSubtags.removeAt(0)); } if (!extensions.containsKey(singleton)) { extensions[singleton] = values.join('-'); } else { - problems.add('duplicate singleton: "${singleton}"'); + problems.add('duplicate singleton: "$singleton"'); } } else { - problems.add('invalid subtag, should be singleton: "${singleton}"'); + problems.add('invalid subtag, should be singleton: "$singleton"'); } } } @@ -542,7 +544,7 @@ class Locale { /// /// This iterable provides variants normalized to lowercase and in sorted /// order, as per the Unicode LDML specification. - Iterable get variants => _variants ?? []; + Iterable get variants => _variants ?? []; // The private _variants field must have variants in lowercase and already // sorted: constructors must construct it as such. @@ -563,8 +565,8 @@ class Locale { // must be sorted in alphabetical order, separated by hyphens, and be // lowercase. If the U Extension is present but has no attributes, the 'u' // map entry's value must be an empty string. - // * U-Extension keyword keys, matching _re_key, and T-Extension fields in the - // map, whos field separator subtags match _re_tkey, are directly entered + // * U-Extension keyword keys, matching _reKey, and T-Extension fields in the + // map, whos field separator subtags match _reTkey, are directly entered // into this map. (These regular expressions don't match the same keys.) // * Other singletons are entered directly into the map, with all // values/attributes associated with that singleton as the map entry's @@ -587,7 +589,7 @@ class Locale { out.write('-$v'); } if (_extensions != null && _extensions.isNotEmpty) { - out.write(_ExtensionsToString(_extensions)); + out.write(_extensionsToString(_extensions)); } return out.toString(); } @@ -595,67 +597,67 @@ class Locale { /// Formats the extension map into a partial Unicode Locale Identifier. /// /// This covers everything after the unicode_language_id. - static String _ExtensionsToString( + static String _extensionsToString( collection.LinkedHashMap extensions) { - String u_attr; - String t_attr; - StringBuffer u_out = StringBuffer(); - StringBuffer t_out = StringBuffer(); - StringBuffer result = StringBuffer(); - StringBuffer result_vwyzx = StringBuffer(); - - for (MapEntry entry in extensions.entries) { + String uAttr; + String tAttr; + final StringBuffer uOut = StringBuffer(); + final StringBuffer tOut = StringBuffer(); + final StringBuffer result = StringBuffer(); + final StringBuffer resultVWYZX = StringBuffer(); + + for (MapEntry entry in extensions.entries) { if (entry.key.length == 1) { if (RegExp(r'^[a-s]$').hasMatch(entry.key)) { result.write('-${entry.key}'); if (entry.value.isNotEmpty) result.write('-${entry.value}'); } else if (entry.key == 't') { - t_attr = entry.value; + tAttr = entry.value; } else if (entry.key == 'u') { - u_attr = entry.value; + uAttr = entry.value; } else if (RegExp(r'^[vwyz]$').hasMatch(entry.key)) { - result_vwyzx.write('-${entry.key}'); + resultVWYZX.write('-${entry.key}'); if (entry.value.isNotEmpty) - result_vwyzx.write('-${entry.value}'); + resultVWYZX.write('-${entry.value}'); } else if (entry.key != 'x') { throw UnimplementedError( 'Extension not supported/recognised: $entry.'); } - } else if (_re_key.hasMatch(entry.key)) { + } else if (_reKey.hasMatch(entry.key)) { // unicode_locale_extensions if (entry.value == 'true' || entry.value == '') { - u_out.write('-${entry.key}'); + uOut.write('-${entry.key}'); } else { - u_out.write('-${entry.key}-${entry.value}'); + uOut.write('-${entry.key}-${entry.value}'); } - } else if (_re_tkey.hasMatch(entry.key)) { + } else if (_reTkey.hasMatch(entry.key)) { // transformed_extensions - t_out.write('-${entry.key}'); + tOut.write('-${entry.key}'); // TODO: this is not standards compliant. What do we want to do with // this case? Drop entry.key like we drop empty t and u singletons? // Or simply ensure we don't ever create such an instance? if (entry.value.isNotEmpty) - t_out.write('-${entry.value}'); + tOut.write('-${entry.value}'); } else { throw UnimplementedError( 'Extension not supported/recognised: $entry.'); } } - if (t_attr != null || t_out.isNotEmpty) { + if (tAttr != null || tOut.isNotEmpty) { result.write('-t'); - if (t_attr != null) - result.write('-$t_attr'); - result.write(t_out.toString()); + if (tAttr != null) + result.write('-$tAttr'); + result.write(tOut.toString()); } - if (u_attr != null || u_out.isNotEmpty) { + if (uAttr != null || uOut.isNotEmpty) { result.write('-u'); - if (u_attr != null && u_attr.isNotEmpty) - result.write('-$u_attr'); - result.write(u_out.toString()); + if (uAttr != null && uAttr.isNotEmpty) + result.write('-$uAttr'); + result.write(uOut.toString()); } - if (result_vwyzx.isNotEmpty) - result.write(result_vwyzx.toString()); + if (resultVWYZX.isNotEmpty) + result.write(resultVWYZX.toString()); if (extensions.containsKey('x')) { result.write('-x'); if (extensions['x'].isNotEmpty) { @@ -671,24 +673,24 @@ class Locale { static final _re_singleton = RegExp(r'^[a-zA-Z]$'); // (https://www.unicode.org/reports/tr35/#Unicode_language_identifier). - static final _re_language = RegExp(r'^[a-zA-Z]{2,3}$|^[a-zA-Z]{5,8}$'); - static final _re_script = RegExp(r'^[a-zA-Z]{4}$'); - static final _re_region = RegExp(r'^[a-zA-Z]{2}$|^[0-9]{3}$'); - static final _re_variant = RegExp(r'^[a-zA-Z0-9]{5,8}$|^[0-9][a-zA-Z0-9]{3}$'); - static final _re_sep = RegExp(r'[-_]'); + static final _reLanguage = RegExp(r'^[a-zA-Z]{2,3}$|^[a-zA-Z]{5,8}$'); + static final _reScript = RegExp(r'^[a-zA-Z]{4}$'); + static final _reRegion = RegExp(r'^[a-zA-Z]{2}$|^[0-9]{3}$'); + static final _reVariant = RegExp(r'^[a-zA-Z0-9]{5,8}$|^[0-9][a-zA-Z0-9]{3}$'); + static final _reSep = RegExp(r'[-_]'); // Covers all subtags possible in Unicode Locale Identifiers, used for // pu_extensions. - static final _re_all_subtags = RegExp(r'^[a-zA-Z0-9]{1,8}$'); + static final _reAllSubtags = RegExp(r'^[a-zA-Z0-9]{1,8}$'); // Covers all subtags within a particular extension, used for other_extensions. - static final _re_other_subtags = RegExp(r'^[a-zA-Z0-9]{2,8}$'); + static final _reOtherSubtags = RegExp(r'^[a-zA-Z0-9]{2,8}$'); // Covers "attribute" and "type" from unicode_locale_extensions, and "tvalue" in // transformed_extensions. // (https://www.unicode.org/reports/tr35/#Unicode_locale_identifier). - static final _re_value_subtags = RegExp(r'^[a-zA-Z0-9]{3,8}$'); + static final _reValueSubtags = RegExp(r'^[a-zA-Z0-9]{3,8}$'); - static final _re_key = RegExp('^[a-zA-Z0-9][a-zA-Z]\$'); - static final _re_tkey = RegExp('^[a-zA-Z][0-9]\$'); + static final _reKey = RegExp('^[a-zA-Z0-9][a-zA-Z]\$'); + static final _reTkey = RegExp('^[a-zA-Z][0-9]\$'); @override bool operator ==(dynamic other) { From 92fed85d7567176fbddd6c4cf7875848d992304f Mon Sep 17 00:00:00 2001 From: Hugo van der Merwe Date: Tue, 9 Oct 2018 23:53:10 +0200 Subject: [PATCH 05/20] Third round of lint fixes. --- lib/ui/window.dart | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/lib/ui/window.dart b/lib/ui/window.dart index b1afd06621dee..f2f608181594a 100644 --- a/lib/ui/window.dart +++ b/lib/ui/window.dart @@ -118,7 +118,10 @@ class WindowPadding { class LocaleParseException implements Exception { LocaleParseException(this._message); + String _message; + + @override String toString() => 'LocaleParseException: $_message'; } @@ -288,7 +291,7 @@ class Locale { if (singleton == 'u') { bool empty = true; // unicode_locale_extensions: collect "(sep attribute)+" attributes. - final List attributes = []; + final List attributes = []; while (localeSubtags.isNotEmpty && _reValueSubtags.hasMatch(localeSubtags[0])) { attributes.add(localeSubtags.removeAt(0)); @@ -306,7 +309,7 @@ class Locale { _reKey.hasMatch(localeSubtags[0])) { empty = false; final String key = localeSubtags.removeAt(0); - final List typeParts = []; + final List typeParts = []; while (localeSubtags.isNotEmpty && _reValueSubtags.hasMatch(localeSubtags[0])) { typeParts.add(localeSubtags.removeAt(0)); @@ -323,7 +326,7 @@ class Locale { } else if (singleton == 't') { bool empty = true; // transformed_extensions: grab tlang if it exists. - final List tlang = []; + final List tlang = []; if (localeSubtags.isNotEmpty && _reLanguage.hasMatch(localeSubtags[0])) { empty = false; tlang.add(localeSubtags.removeAt(0)); @@ -343,7 +346,7 @@ class Locale { // transformed_extensions: collect "(sep tfield)*". while (localeSubtags.isNotEmpty && _reTkey.hasMatch(localeSubtags[0])) { final String tkey = localeSubtags.removeAt(0); - final List tvalueParts = []; + final List tvalueParts = []; while (localeSubtags.isNotEmpty && _reValueSubtags.hasMatch(localeSubtags[0])) { tvalueParts.add(localeSubtags.removeAt(0)); } @@ -361,7 +364,7 @@ class Locale { } } else if (singleton == 'x') { // pu_extensions - final List values = []; + final List values = []; while (localeSubtags.isNotEmpty && _reAllSubtags.hasMatch(localeSubtags[0])) { values.add(localeSubtags.removeAt(0)); } @@ -370,9 +373,9 @@ class Locale { problems.add('invalid part of private use subtags: "${localeSubtags.join('-')}"'); } break; - } else if (_re_singleton.hasMatch(singleton)) { + } else if (_reSingleton.hasMatch(singleton)) { // other_extensions - final List values = []; + final List values = []; while (localeSubtags.isNotEmpty && _reOtherSubtags.hasMatch(localeSubtags[0])) { values.add(localeSubtags.removeAt(0)); } @@ -670,27 +673,27 @@ class Locale { // Unicode Language Identifier subtags // TODO/WIP: because we lowercase Locale Identifiers before parsing, typical // use of these regexps don't actually need to atch capitals too. - static final _re_singleton = RegExp(r'^[a-zA-Z]$'); + static final RegExp _reSingleton = RegExp(r'^[a-zA-Z]$'); // (https://www.unicode.org/reports/tr35/#Unicode_language_identifier). - static final _reLanguage = RegExp(r'^[a-zA-Z]{2,3}$|^[a-zA-Z]{5,8}$'); - static final _reScript = RegExp(r'^[a-zA-Z]{4}$'); - static final _reRegion = RegExp(r'^[a-zA-Z]{2}$|^[0-9]{3}$'); - static final _reVariant = RegExp(r'^[a-zA-Z0-9]{5,8}$|^[0-9][a-zA-Z0-9]{3}$'); - static final _reSep = RegExp(r'[-_]'); + static final RegExp _reLanguage = RegExp(r'^[a-zA-Z]{2,3}$|^[a-zA-Z]{5,8}$'); + static final RegExp _reScript = RegExp(r'^[a-zA-Z]{4}$'); + static final RegExp _reRegion = RegExp(r'^[a-zA-Z]{2}$|^[0-9]{3}$'); + static final RegExp _reVariant = RegExp(r'^[a-zA-Z0-9]{5,8}$|^[0-9][a-zA-Z0-9]{3}$'); + static final RegExp _reSep = RegExp(r'[-_]'); // Covers all subtags possible in Unicode Locale Identifiers, used for // pu_extensions. - static final _reAllSubtags = RegExp(r'^[a-zA-Z0-9]{1,8}$'); + static final RegExp _reAllSubtags = RegExp(r'^[a-zA-Z0-9]{1,8}$'); // Covers all subtags within a particular extension, used for other_extensions. - static final _reOtherSubtags = RegExp(r'^[a-zA-Z0-9]{2,8}$'); + static final RegExp _reOtherSubtags = RegExp(r'^[a-zA-Z0-9]{2,8}$'); // Covers "attribute" and "type" from unicode_locale_extensions, and "tvalue" in // transformed_extensions. // (https://www.unicode.org/reports/tr35/#Unicode_locale_identifier). - static final _reValueSubtags = RegExp(r'^[a-zA-Z0-9]{3,8}$'); + static final RegExp _reValueSubtags = RegExp(r'^[a-zA-Z0-9]{3,8}$'); - static final _reKey = RegExp('^[a-zA-Z0-9][a-zA-Z]\$'); - static final _reTkey = RegExp('^[a-zA-Z][0-9]\$'); + static final RegExp _reKey = RegExp('^[a-zA-Z0-9][a-zA-Z]\$'); + static final RegExp _reTkey = RegExp('^[a-zA-Z][0-9]\$'); @override bool operator ==(dynamic other) { From 49026de569bb423c4319d6d91a91788df7e24c56 Mon Sep 17 00:00:00 2001 From: Hugo van der Merwe Date: Tue, 9 Oct 2018 23:54:59 +0200 Subject: [PATCH 06/20] One stray lint fix. --- lib/ui/window.dart | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/ui/window.dart b/lib/ui/window.dart index f2f608181594a..56dbf90e8f3a3 100644 --- a/lib/ui/window.dart +++ b/lib/ui/window.dart @@ -581,7 +581,7 @@ class Locale { /// If the const constructor was used with bad parameters, the result might /// not be standards-compliant. String toLanguageTag() { - StringBuffer out = StringBuffer(languageCode); + final StringBuffer out = StringBuffer(languageCode); if (scriptCode != null) { out.write('-$scriptCode'); } From 83902a24e9977637a3cb738560a5853de5a003d3 Mon Sep 17 00:00:00 2001 From: Hugo van der Merwe Date: Wed, 10 Oct 2018 18:47:13 +0200 Subject: [PATCH 07/20] First round of PR review fixes. --- lib/ui/window.dart | 147 +++++---- testing/dart/locale_test.dart | 545 ++++++++++++++++++++++------------ 2 files changed, 444 insertions(+), 248 deletions(-) diff --git a/lib/ui/window.dart b/lib/ui/window.dart index 56dbf90e8f3a3..a479e001ebcf0 100644 --- a/lib/ui/window.dart +++ b/lib/ui/window.dart @@ -184,7 +184,10 @@ class Locale { /// way or do any syntax checking. /// /// * language, script and region must be in canonical form. - /// * iterating over variants must provide variants in alphabetical order. + /// * Iterating over variants must provide variants in alphabetical order. We + /// force this by taking a SplayTreeSet as parameter input, which + /// automatically sorts its items. We then convert to a LinkedHashSet for + /// faster access and iteration. /// * The extensions map must contain only valid key/value pairs. "u" and "t" /// keys must be present, with an empty string as value, if there are any /// subtags for those singletons. @@ -204,18 +207,17 @@ class Locale { _languageCode = language, _scriptCode = script, _countryCode = region, - _variants = collection.LinkedHashSet.from(variants ?? []), - _extensions = collection.LinkedHashMap.from(extensions ?? {}); + _variants = List.unmodifiable(variants ?? const []), + _extensions = collection.LinkedHashMap.from(extensions ?? const {}); - /// Parses [Unicode Locale + /// Parses [Unicode CLDR Locale /// Identifiers](https://www.unicode.org/reports/tr35/#Identifiers). /// /// This method does not parse all BCP 47 tags. See [BCP 47 /// Conformance](https://www.unicode.org/reports/tr35/#BCP_47_Conformance) for /// details. /// - /// TODO: we should try to support parsing all BCP 47 tags, even if we produce - /// only Unicode BCP47 Locale Identifiers as output. + /// TODO: support parsing all BCP 47 tags. factory Locale.parse(String localeId) { assert(localeId != null); localeId = localeId.toLowerCase(); @@ -242,7 +244,8 @@ class Locale { problems.add('"${localeSubtags[0]}" is an invalid language subtag'); } if (localeSubtags.isNotEmpty && _reScript.hasMatch(localeSubtags[0])) { - script = _capitalize(localeSubtags.removeAt(0)); + String s = localeSubtags.removeAt(0); + script = s.substring(0, 1).toUpperCase() + s.substring(1).toLowerCase(); } if (localeSubtags.isNotEmpty && _reRegion.hasMatch(localeSubtags[0])) { region = localeSubtags.removeAt(0).toUpperCase(); @@ -268,15 +271,18 @@ class Locale { } _parseExtensions(localeSubtags, extensions, problems); - if (problems.isNotEmpty) + if (problems.isNotEmpty) { throw LocaleParseException('Locale Identifier $localeId is invalid: ' '${problems.join("; ")}.'); + } - return Locale._internal(language, + return Locale._internal( + language, script: script, region: region, variants: variants, - extensions: extensions); + extensions: extensions, + ); } // * All subtags in localeSubtags must already be lowercase. @@ -407,11 +413,11 @@ class Locale { String get languageCode => _replaceDeprecatedLanguageSubtag(_languageCode); final String _languageCode; - /// Replaces deprecated language subtags. - /// - /// The subtag must already be lowercase. - /// - /// This method's switch statement periodically needs a manual update. + // Replaces deprecated language subtags. + // + // The subtag must already be lowercase. + // + // This method's switch statement periodically needs a manual update. static String _replaceDeprecatedLanguageSubtag(String languageCode) { // This switch statement is generated by //flutter/tools/gen_locale.dart // Mappings generated for language subtag registry as of 2018-08-08. @@ -501,12 +507,6 @@ class Locale { String get scriptCode => _scriptCode; final String _scriptCode; - // Uses the language independent Unicode mapping. For use in Locales, we only - // care about ASCII. (TODO: optimize by checking if already valid?) - static String _capitalize(String word) { - return word.substring(0, 1).toUpperCase() + word.substring(1).toLowerCase(); - } - /// The region subtag for the locale. /// /// This can be null. @@ -524,11 +524,11 @@ class Locale { String get countryCode => _replaceDeprecatedRegionSubtag(_countryCode); final String _countryCode; - /// Replaces deprecated region subtags. - /// - /// The subtag must already be uppercase. - /// - /// This method's switch statement periodically needs a manual update. + // Replaces deprecated region subtags. + // + // The subtag must already be uppercase. + // + // This method's switch statement periodically needs a manual update. static String _replaceDeprecatedRegionSubtag(String regionCode) { // This switch statement is generated by //flutter/tools/gen_locale.dart // Mappings generated for language subtag registry as of 2018-08-08. @@ -543,15 +543,17 @@ class Locale { } } - /// Unicode language variant codes. - /// - /// This iterable provides variants normalized to lowercase and in sorted - /// order, as per the Unicode LDML specification. - Iterable get variants => _variants ?? []; + // Unicode language variant codes. + // + // This iterable provides variants normalized to lowercase and in sorted + // order, as per the Unicode LDML specification. + // + // FIXME: decide whether this returns a single string or something fancier. + Iterable get variants => _variants ?? const []; // The private _variants field must have variants in lowercase and already // sorted: constructors must construct it as such. - final collection.LinkedHashSet _variants; + final List _variants; // A map representing all Locale Identifier extensions. // @@ -576,10 +578,13 @@ class Locale { // value. final collection.LinkedHashMap _extensions; - /// Produces the Unicode BCP47 Locale Identifier for this locale. - /// - /// If the const constructor was used with bad parameters, the result might - /// not be standards-compliant. + // Produces the Unicode BCP47 Locale Identifier for this locale. + // + // If the unnamed constructor was used with bad parameters, the result might + // not be standards-compliant. + // + // FIXME/TODO: this must not be submitted as a public function until we've + // made a final decision on toString() behaviour. String toLanguageTag() { final StringBuffer out = StringBuffer(languageCode); if (scriptCode != null) { @@ -597,13 +602,17 @@ class Locale { return out.toString(); } - /// Formats the extension map into a partial Unicode Locale Identifier. - /// - /// This covers everything after the unicode_language_id. - static String _extensionsToString( - collection.LinkedHashMap extensions) { - String uAttr; - String tAttr; + // Formats the extension map into a partial Unicode Locale Identifier. + // + // This covers everything after the unicode_language_id, and returns a string + // starting with a hyphen. Returns '' if passed null or an empty extensions + // map. + static String _extensionsToString(collection.LinkedHashMap extensions) { + if (extensions == null || extensions.isEmpty) { + return ''; + } + String uAttributes; + String tLang; final StringBuffer uOut = StringBuffer(); final StringBuffer tOut = StringBuffer(); final StringBuffer result = StringBuffer(); @@ -616,16 +625,15 @@ class Locale { if (entry.value.isNotEmpty) result.write('-${entry.value}'); } else if (entry.key == 't') { - tAttr = entry.value; + tLang = entry.value; } else if (entry.key == 'u') { - uAttr = entry.value; + uAttributes = entry.value; } else if (RegExp(r'^[vwyz]$').hasMatch(entry.key)) { resultVWYZX.write('-${entry.key}'); if (entry.value.isNotEmpty) resultVWYZX.write('-${entry.value}'); } else if (entry.key != 'x') { - throw UnimplementedError( - 'Extension not supported/recognised: $entry.'); + throw UnimplementedError('Unexpected key in extensions map: $entry'); } } else if (_reKey.hasMatch(entry.key)) { // unicode_locale_extensions @@ -643,20 +651,19 @@ class Locale { if (entry.value.isNotEmpty) tOut.write('-${entry.value}'); } else { - throw UnimplementedError( - 'Extension not supported/recognised: $entry.'); + throw UnimplementedError('Unexpected key in extensions map: $entry'); } } - if (tAttr != null || tOut.isNotEmpty) { + if (tLang != null || tOut.isNotEmpty) { result.write('-t'); - if (tAttr != null) - result.write('-$tAttr'); + if (tLang != null) + result.write('-$tLang'); result.write(tOut.toString()); } - if (uAttr != null || uOut.isNotEmpty) { + if (uAttributes != null || uOut.isNotEmpty) { result.write('-u'); - if (uAttr != null && uAttr.isNotEmpty) - result.write('-$uAttr'); + if (uAttributes != null && uAttributes.isNotEmpty) + result.write('-$uAttributes'); result.write(uOut.toString()); } if (resultVWYZX.isNotEmpty) @@ -703,21 +710,29 @@ class Locale { return false; final Locale typedOther = other; - // TODO: improve efficiency of this? - // Comparing Sets and Maps requires reimplementing functionality in - // package:collection, comparing canonical string is simple. - return toLanguageTag() == typedOther.toLanguageTag() - // toLanguageTag() cannot represent zero-length string as country-code, - // but we need to distinguish it for backward compatibility reasons. - && countryCode == typedOther.countryCode; + // Comparing Lists, Sets and Maps requires reimplementing functionality in + // package:collection. Variants and extensions are rare, so we convert them + // to canonical/normalized strings for comparison. + return languageCode == typedOther.languageCode + && countryCode == typedOther.countryCode + && scriptCode == typedOther.scriptCode + && variants.join('-') == typedOther.variants.join('-') + && _extensionsToString(_extensions) == _extensionsToString(typedOther._extensions); } @override int get hashCode { - // toLanguageTag() cannot represent zero-length string as country-code, - // but we need to distinguish it for backward compatibility reasons. - return toLanguageTag().hashCode - + 373 * countryCode.hashCode; + int result = 373; + result = 37 * result + languageCode.hashCode; + if (_scriptCode != null) + result = 37 * result + scriptCode.hashCode; + if (_countryCode != null) + result = 37 * result + countryCode.hashCode; + if (_variants != null && _variants.isNotEmpty) + result = 37 * result + variants.join('-').hashCode; + if (_extensions != null && _extensions.isNotEmpty) + result = 37 * result + _extensionsToString(_extensions).hashCode; + return result; } /// Produces a non-BCP47 Unicode Locale Identifier for this locale. diff --git a/testing/dart/locale_test.dart b/testing/dart/locale_test.dart index 0ea45c396cc3b..b905544c7edeb 100644 --- a/testing/dart/locale_test.dart +++ b/testing/dart/locale_test.dart @@ -9,146 +9,261 @@ import 'package:test/test.dart'; void main() { test('Locale', () { final Null $null = null; - expect(const Locale('en').toString(), 'en'); - expect(const Locale('en'), new Locale('en', $null)); - expect(const Locale('en').hashCode, new Locale('en', $null).hashCode); - - expect(const Locale('en'), isNot(new Locale('en', '')), - reason: 'Legacy. (The semantic difference between Locale("en") and ' - 'Locale("en", "") is not defined.)'); - expect(const Locale('en').hashCode, isNot(new Locale('en', '').hashCode), - reason: 'Legacy. (The semantic difference between Locale("en") and ' - 'Locale("en", "") is not defined.)'); - expect(const Locale('en', 'US').toString(), 'en_US', - reason: 'Legacy. en_US is a valid Unicode Locale Identifier, but ' - 'not a valid Unicode BCP47 Locale Identifier.'); - expect(const Locale('en', 'US').toLanguageTag(), 'en-US', - reason: 'Unicode BCP47 Locale Identifier, as recommended for general ' - 'interchange.'); - - expect(const Locale('iw').toString(), 'he', - reason: 'The language code for Hebrew was officially changed in 1989.'); - expect(const Locale('iw', 'DD').toString(), 'he_DE', - reason: 'Legacy. This is a valid Unicode Locale Identifier, even if ' - 'not a valid Unicode BCP47 Locale Identifier'); - expect(const Locale('iw', 'DD'), const Locale('he', 'DE'), - reason: 'The German Democratic Republic ceased to exist in ' - 'October 1990.'); + expect( + const Locale('en').toString(), + 'en', + ); + expect( + const Locale('en'), + new Locale('en', $null), + ); + expect( + const Locale('en').hashCode, + new Locale('en', $null).hashCode, + ); + + expect( + const Locale('en'), + isNot(new Locale('en', '')), + reason: 'Legacy. (The semantic difference between Locale("en") and ' + 'Locale("en", "") is not defined.)', + ); + expect( + const Locale('en').hashCode, + isNot(new Locale('en', '').hashCode), + reason: 'Legacy. (The semantic difference between Locale("en") and ' + 'Locale("en", "") is not defined.)', + ); + expect( + const Locale('en', 'US').toString(), + 'en_US', + reason: 'Legacy. en_US is a valid Unicode Locale Identifier, but ' + 'not a valid Unicode BCP47 Locale Identifier.', + ); + expect( + const Locale('en', 'US').toLanguageTag(), + 'en-US', + reason: 'Unicode BCP47 Locale Identifier, as recommended for general ' + 'interchange.', + ); + + expect( + const Locale('iw').toString(), + 'he', + reason: 'The language code for Hebrew was officially changed in 1989.', + ); + expect( + const Locale('iw', 'DD').toString(), + 'he_DE', + reason: 'Legacy. This is a valid Unicode Locale Identifier, even if ' + 'not a valid Unicode BCP47 Locale Identifier', + ); + expect( + const Locale('iw', 'DD'), + const Locale('he', 'DE'), + reason: 'The German Democratic Republic ceased to exist in ' + 'October 1990.', + ); }); test('Locale unnamed constructor idiosyncrasies', () { - expect(() => Locale.parse(Locale('en', '').toString()), throwsException, - reason: 'Locale("en", "").toString() produces "en-" which is not ' - 'standards-compliant.'); - - expect(Locale.parse(Locale('en', '').toLanguageTag()), Locale('en'), - reason: 'There is no standards-compliant way for toLanguageTag() to ' - 'represent a zero-length region code.'); - expect(Locale.parse(Locale('en', '').toLanguageTag()), Locale('en'), - reason: 'There is no standards-compliant way for toLanguageTag() to ' - 'represent a zero-length region code.'); - - expect(Locale('abcd').toLanguageTag(), 'abcd', - reason: 'Locale("abcd") is not following instructions in the API ' - 'documentation, so produces standards-uncompliant output.'); - expect(Locale.parse('abcd').toLanguageTag(), 'und-Abcd', reason: ''); - expect(Locale('abcd'), isNot(Locale.parse('abcd')), reason: ''); - expect(() => Locale.parse(Locale('a').toLanguageTag()), throwsException, - reason: 'Locale("abcd") is not following instructions in the API ' - 'documentation, so produces standards-uncompliant output.'); - expect(Locale.parse(Locale('EN').toLanguageTag()).languageCode, 'en', - reason: 'Locale.parse does standards-compliant normalization, whereas ' - 'Locale("EN") is incorrect usage of the API as per API ' - 'documentation.'); + expect( + () => Locale.parse(Locale('en', '').toString()), + throwsException, + reason: 'Locale("en", "").toString() produces "en-" which is not ' + 'standards-compliant.', + ); + + // We have: + expect( + const Locale('en'), + isNot(new Locale('en', '')), + reason: 'Legacy. (The semantic difference between Locale("en") and ' + 'Locale("en", "") is not defined.)', + ); + // However we also have: + expect( + Locale.parse(Locale('en', '').toLanguageTag()), + Locale('en'), + reason: 'There is no standards-compliant way for toLanguageTag() to ' + 'represent a zero-length region code.', + ); + // So this class' operator== doesn't match the behaviour we expect from the + // normalized Unicode BCP47 Locale Identifiers perspective. + + expect( + Locale('abcd').toLanguageTag(), + 'abcd', + reason: 'Locale("abcd") is not following instructions in the API ' + 'documentation, so produces standards-uncompliant output.', + ); + expect( + Locale.parse('abcd').toLanguageTag(), + 'und-Abcd', + reason: '', + ); + expect( + Locale('abcd'), + isNot(Locale.parse('abcd')), + reason: '', + ); + expect( + () => Locale.parse(Locale('a').toLanguageTag()), + throwsException, + reason: 'Locale("abcd") is not following instructions in the API ' + 'documentation, so produces standards-uncompliant output.', + ); + expect( + Locale.parse(Locale('EN').toLanguageTag()).languageCode, + 'en', + reason: 'Locale.parse does standards-compliant normalization, whereas ' + 'Locale("EN") is incorrect usage of the API as per API ' + 'documentation.', + ); // Syntax is correct. Without validating against CLDR supplemental // data, this looks like da-u-nu-true. - expect(Locale.parse('da-u-nu').toLanguageTag(), equals('da-u-nu'), - reason: 'da-u-nu syntax is correct, this looks like "da-u-nu-true". ' - 'Only validation against CLDR validity data would show "true" is ' - 'not a valid value for "nu".'); + expect( + Locale.parse('da-u-nu').toLanguageTag(), + equals('da-u-nu'), + reason: 'da-u-nu syntax is correct, this looks like "da-u-nu-true". ' + 'Only validation against CLDR validity data would show "true" is ' + 'not a valid value for "nu".', + ); }); group('Locale.parse():', () { test('languageCode.', () { - expect(Locale.parse('IW').languageCode, 'he', - reason: "Case insensitive to input."); - expect(Locale.parse('Fil').languageCode, 'fil', - reason: "3-character language codes: Filipino is not Finnish."); - expect(Locale.parse('abcde').languageCode, 'abcde', - reason: 'The spec provides for language codes with 5 to 8 ' - 'characters, though as of 2018, there aren\'t any valid tags ' - 'this long.'); - expect(Locale.parse('ROOT').languageCode, 'und', - reason: 'BCP 47 Language Tag Conversion: ' - 'replace "root" with "und", and case insensitive.'); + expect( + Locale.parse('IW').languageCode, + 'he', + reason: "Case insensitive to input.", + ); + expect( + Locale.parse('Fil').languageCode, + 'fil', + reason: "3-character language codes: Filipino is not Finnish.", + ); + expect( + Locale.parse('abcde').languageCode, + 'abcde', + reason: 'The spec provides for language codes with 5 to 8 ' + 'characters, though as of 2018, there aren\'t any valid tags ' + 'this long.', + ); + expect( + Locale.parse('ROOT').languageCode, + 'und', + reason: 'BCP 47 Language Tag Conversion: ' + 'replace "root" with "und", and case insensitive.', + ); }); test('scriptCode.', () { - expect(Locale.parse('af_latn').scriptCode, 'Latn'); - expect(Locale.parse('zh_HANT-CN').scriptCode, 'Hant'); - expect(Locale.parse('sw-TZ').scriptCode, null); + expect( + Locale.parse('af_latn').scriptCode, + 'Latn', + ); + expect( + Locale.parse('zh_HANT-CN').scriptCode, + 'Hant', + ); + expect( + Locale.parse('sw-TZ').scriptCode, + null, + ); }); test('countryCode.', () { - expect(Locale.parse('ar-Arab-EG').countryCode, 'EG'); - expect(Locale.parse('en-GB_scouse_fonipa').countryCode, 'GB'); + expect( + Locale.parse('ar-Arab-EG').countryCode, + 'EG', + ); + expect( + Locale.parse('en-GB_scouse_fonipa').countryCode, + 'GB', + ); }); test('variants.', () { // Liverpool English, script variant: International Phonetic Alphabet. // (fonipa is a Latn variant, so only Latn really makes sense as script) - expect(Locale.parse('en_scouse_fonipa').variants, - orderedEquals(['fonipa', 'scouse']), - reason: 'Variants should be sorted alphabetically.'); - expect(Locale.parse('de-1996').variants, orderedEquals(['1996'])); - expect(Locale.parse('ja_Jpan').variants, orderedEquals([]), - reason: 'No variants represented by zero-length Iterable.'); + expect( + Locale.parse('en_scouse_fonipa').variants, + orderedEquals(['fonipa', 'scouse']), + reason: 'Variants should be sorted alphabetically.', + ); + expect( + Locale.parse('de-1996').variants, + orderedEquals(['1996']), + ); + expect( + Locale.parse('ja_Jpan').variants, + orderedEquals([]), + reason: 'No variants represented by zero-length Iterable.', + ); }); test('Locale Identifiers with extensions.', () { - expect(Locale.parse('nl-u-attr2-attr1').toLanguageTag(), - equals('nl-u-attr2-attr1'), - reason: '-u- attributes are ordered, do not sort.'); - - expect(Locale.parse('ar-u-ca-islamic-civil').toLanguageTag(), - equals('ar-u-ca-islamic-civil'), - reason: '-u- attributes are ordered, do not sort.'); - - expect(Locale.parse('RU-T-en-Cyrl-GB-H0-HYBRID').toLanguageTag(), - equals('ru-t-en-cyrl-gb-h0-hybrid'), - reason: 'Language identifiers in t-extensions are also lowercase.'); - - expect(Locale.parse('tr-x-foo-t-hi-h0-hybrid-u-nu').toLanguageTag(), - equals('tr-x-foo-t-hi-h0-hybrid-u-nu'), - reason: 'Everything after -x- belong to the -x- private use ' - 'extension.'); - - expect(Locale.parse('pl-b-h0-hybrid-nu-roman').toLanguageTag(), - equals('pl-b-h0-hybrid-nu-roman'), - reason: 'What looks like tkey and key subtags appearing under ' - 'singletons other than -t- and -u- belong to those singletons.'); - - expect(Locale.parse('ca-u-kb-true').toLanguageTag(), equals('ca-u-kb'), - reason: 'For true/false keys like kb, true can be ommitted.'); - expect(Locale.parse('ca-u-kb').toLanguageTag(), equals('ca-u-kb'), - reason: 'For true/false keys like kb, true can be ommitted.'); + expect( + Locale.parse('nl-u-attr2-attr1').toLanguageTag(), + equals('nl-u-attr2-attr1'), + reason: '-u- attributes are ordered, do not sort.', + ); + + expect( + Locale.parse('ar-u-ca-islamic-civil').toLanguageTag(), + equals('ar-u-ca-islamic-civil'), + reason: '-u- attributes are ordered, do not sort.', + ); + + expect( + Locale.parse('RU-T-en-Cyrl-GB-H0-HYBRID').toLanguageTag(), + equals('ru-t-en-cyrl-gb-h0-hybrid'), + reason: 'Language identifiers in t-extensions are also lowercase.', + ); + + expect( + Locale.parse('tr-x-foo-t-hi-h0-hybrid-u-nu').toLanguageTag(), + equals('tr-x-foo-t-hi-h0-hybrid-u-nu'), + reason: 'Everything after -x- belong to the -x- private use ' + 'extension.', + ); + + expect( + Locale.parse('pl-b-h0-hybrid-nu-roman').toLanguageTag(), + equals('pl-b-h0-hybrid-nu-roman'), + reason: 'What looks like tkey and key subtags appearing under ' + 'singletons other than -t- and -u- belong to those singletons.', + ); + + expect( + Locale.parse('ca-u-kb-true').toLanguageTag(), + equals('ca-u-kb'), + reason: 'For true/false keys like kb, true can be ommitted.', + ); + expect( + Locale.parse('ca-u-kb').toLanguageTag(), + equals('ca-u-kb'), + reason: 'For true/false keys like kb, true can be ommitted.', + ); // TODO: when adding accessor for u-kb: it should return 'true'. expect( - Locale.parse('En-LATN-Gb-SCOUSE-FONIPA' - '-U-ATTR2-ATTR1-ca-islamic-civil-nu-thai' - '-A-BC' - '-z-fo-xyzzy' - '-T-HI-H0-hybrid-m0-UNGEGN' - '-x-u-ab') - .toLanguageTag(), - equals('en-Latn-GB-fonipa-scouse' - '-a-bc' - '-t-hi-h0-hybrid-m0-ungegn' - '-u-attr2-attr1-ca-islamic-civil-nu-thai' - '-z-fo-xyzzy' - '-x-u-ab')); + Locale.parse('En-LATN-Gb-SCOUSE-FONIPA' + '-U-ATTR2-ATTR1-ca-islamic-civil-nu-thai' + '-A-BC' + '-z-fo-xyzzy' + '-T-HI-H0-hybrid-m0-UNGEGN' + '-x-u-ab') + .toLanguageTag(), + equals('en-Latn-GB-fonipa-scouse' + '-a-bc' + '-t-hi-h0-hybrid-m0-ungegn' + '-u-attr2-attr1-ca-islamic-civil-nu-thai' + '-z-fo-xyzzy' + '-x-u-ab'), + ); }); // These examples are not spec compliant, although we could have been more @@ -158,99 +273,165 @@ void main() { // TODO: maybe have the exception contain what's necessary to make a "best // effort" locale? test('Strict parsing examples.', () { - expect(() => Locale.parse('nl-u-x'), - throwsA(TypeMatcher()), - reason: 'With no "u" attributes there should be no "u" singleton. ' - 'Could be lenient: "nl-x".'); - expect(() => Locale.parse('fr-t-x'), - throwsA(TypeMatcher()), - reason: 'With no "t" attributes there should be no "t" singleton. ' - 'Could be lenient: "fr-x".'); - expect(() => Locale.parse('it-u-nu-romanlow-a-u-attr'), - throwsA(TypeMatcher()), - reason: 'Duplicate "u" singletons could be merged if only one has ' - 'attributes. Could be lenient: "it-a-u-attr-nu-romanlow".'); - expect(() => Locale.parse('pl-t-cs-b-t-h0-hybrid'), - throwsA(TypeMatcher()), - reason: 'Duplicate "t" singletons could be merged if only one has ' - 'tlang specified. Could be lenient: "pl-b-t-cs-h0-hybrid".'); - - expect(() => Locale.parse('ro-t-hu-HU-cu-ron'), - throwsA(TypeMatcher()), - reason: 'U-extension keywords misplaced under the -t- singleton ' - 'could be moved if unambiguous enough. ' - 'Could be lenient: "ro-t-hu-hu-u-cu-ron".'); + expect( + () => Locale.parse('nl-u-x'), + throwsA(TypeMatcher()), + reason: 'With no "u" attributes there should be no "u" singleton. ' + 'Could be lenient: "nl-x".', + ); + expect( + () => Locale.parse('fr-t-x'), + throwsA(TypeMatcher()), + reason: 'With no "t" attributes there should be no "t" singleton. ' + 'Could be lenient: "fr-x".', + ); + expect( + () => Locale.parse('it-u-nu-romanlow-a-u-attr'), + throwsA(TypeMatcher()), + reason: 'Duplicate "u" singletons could be merged if only one has ' + 'attributes. Could be lenient: "it-a-u-attr-nu-romanlow".', + ); + expect( + () => Locale.parse('pl-t-cs-b-t-h0-hybrid'), + throwsA(TypeMatcher()), + reason: 'Duplicate "t" singletons could be merged if only one has ' + 'tlang specified. Could be lenient: "pl-b-t-cs-h0-hybrid".', + ); + + expect( + () => Locale.parse('ro-t-hu-HU-cu-ron'), + throwsA(TypeMatcher()), + reason: 'U-extension keywords misplaced under the -t- singleton ' + 'could be moved if unambiguous enough. ' + 'Could be lenient: "ro-t-hu-hu-u-cu-ron".', + ); // TODO: any point to this test? It's a counter-example of the previous // "lenient parsing" idea. expect( - Locale.parse('ro-t-nu-cyrl').toLanguageTag(), equals('ro-t-nu-cyrl'), - reason: 'This cannot be interpreted as a misplaced -u- keyword. ' - 'It looks like a tlang tag: "nu-Cyrl", '); + Locale.parse('ro-t-nu-cyrl').toLanguageTag(), + equals('ro-t-nu-cyrl'), + reason: 'This cannot be interpreted as a misplaced -u- keyword. ' + 'It looks like a tlang tag: "nu-Cyrl", ', + ); - expect(() => Locale.parse('pt-BR-u-h0-hybrid-t-pt-PT'), - throwsA(TypeMatcher()), - reason: 'T-extension "tfields" misplaced under the U-extension. ' - 'Could be lenient: "pt-BR-t-pt-pt-h0-hybrid".'); + expect( + () => Locale.parse('pt-BR-u-h0-hybrid-t-pt-PT'), + throwsA(TypeMatcher()), + reason: 'T-extension "tfields" misplaced under the U-extension. ' + 'Could be lenient: "pt-BR-t-pt-pt-h0-hybrid".', + ); - expect(() => Locale.parse('pl-t-h0'), - throwsA(TypeMatcher()), - reason: - 'Locale tag pl-t-h0 is not spec compliant. How to best fix it ' - 'is unclear: it is underspecified.'); + expect( + () => Locale.parse('pl-t-h0'), + throwsA(TypeMatcher()), + reason: 'Locale tag pl-t-h0 is not spec compliant. How to best fix it ' + 'is unclear: it is underspecified.', + ); }); test('Locale.parse(): invalid identifiers.', () { expect( - () => Locale.parse('a'), throwsA(TypeMatcher()), - reason: 'One character language subtags are invalid.'); - expect(Locale.parse('abcd').languageCode, 'und', - reason: 'Special-use corner case from the specification: ' - 'language subtag can be skipped if a script is specified.'); - expect(Locale.parse('abcd').scriptCode, 'Abcd', - reason: 'Special-use corner case from the specification: ' - 'language subtag can be skipped if a script is specified.'); - expect(() => Locale.parse('abcdefghi'), - throwsA(TypeMatcher()), - reason: 'Language subtags may not be more than 8 characters.'); - expect(() => Locale.parse(r'e$'), - throwsA(TypeMatcher()), - reason: 'Invalid character for language subtag, only a-z allowed.'); - expect(() => Locale.parse('fr-RU-Hant'), - throwsA(TypeMatcher()), - reason: 'Swapping region and script is not allowed.'); + () => Locale.parse('a'), + throwsA(TypeMatcher()), + reason: 'One character language subtags are invalid.', + ); + expect( + Locale.parse('abcd').languageCode, + 'und', + reason: 'Special-use corner case from the specification: ' + 'language subtag can be skipped if a script is specified.', + ); + expect( + Locale.parse('abcd').scriptCode, + 'Abcd', + reason: 'Special-use corner case from the specification: ' + 'language subtag can be skipped if a script is specified.', + ); + expect( + () => Locale.parse('abcdefghi'), + throwsA(TypeMatcher()), + reason: 'Language subtags may not be more than 8 characters.', + ); + expect( + () => Locale.parse(r'e$'), + throwsA(TypeMatcher()), + reason: 'Invalid character for language subtag, only a-z allowed.', + ); + expect( + () => Locale.parse('fr-RU-Hant'), + throwsA(TypeMatcher()), + reason: 'Swapping region and script is not allowed.', + ); }); }); test('Equality.', () { - expect(Locale.parse('en'), isNot(Locale.parse('en-Latn'))); expect( - Locale.parse('en').hashCode, isNot(Locale.parse('en-Latn').hashCode)); + Locale.parse('en'), + isNot(Locale.parse('en-Latn')), + ); + expect( + Locale.parse('en').hashCode, + isNot(Locale.parse('en-Latn').hashCode), + ); - expect(Locale.parse('en'), isNot(Locale.parse('en-US'))); - expect(Locale.parse('en').hashCode, isNot(Locale.parse('en-US').hashCode)); + expect( + Locale.parse('en'), + isNot(Locale.parse('en-US')), + ); + expect( + Locale.parse('en').hashCode, + isNot(Locale.parse('en-US').hashCode), + ); - expect(Locale.parse('en'), isNot(Locale.parse('en-fonipa'))); expect( - Locale.parse('en').hashCode, isNot(Locale.parse('en-fonipa').hashCode)); + Locale.parse('en'), + isNot(Locale.parse('en-fonipa')), + ); + expect( + Locale.parse('en').hashCode, + isNot(Locale.parse('en-fonipa').hashCode), + ); expect(Locale.parse('en'), isNot(Locale.parse('en-a'))); - expect(Locale.parse('en').hashCode, isNot(Locale.parse('en-a').hashCode)); + expect( + Locale.parse('en').hashCode, + isNot(Locale.parse('en-a').hashCode), + ); expect(Locale.parse('en'), isNot(Locale.parse('en-a'))); - expect(Locale.parse('en').hashCode, isNot(Locale.parse('en-a').hashCode)); + expect( + Locale.parse('en').hashCode, + isNot(Locale.parse('en-a').hashCode), + ); - expect(Locale.parse('en-u-attr'), isNot(Locale.parse('en-u-nu-roman'))); - expect(Locale.parse('en-u-attr').hashCode, - isNot(Locale.parse('en-u-nu-roman').hashCode)); + expect( + Locale.parse('en-u-attr'), + isNot(Locale.parse('en-u-nu-roman')), + ); + expect( + Locale.parse('en-u-attr').hashCode, + isNot(Locale.parse('en-u-nu-roman').hashCode), + ); - expect(Locale.parse('en-u-kb'), Locale.parse('en-u-kb-true'), - reason: '-u-kb should parse to the same result as -u-kb-true.'); expect( - Locale.parse('en-u-kb').hashCode, Locale.parse('en-u-kb-true').hashCode, - reason: '-u-kb should parse to the same result as -u-kb-true.'); + Locale.parse('en-u-kb'), + Locale.parse('en-u-kb-true'), + reason: '-u-kb should parse to the same result as -u-kb-true.', + ); + expect( + Locale.parse('en-u-kb').hashCode, + Locale.parse('en-u-kb-true').hashCode, + reason: '-u-kb should parse to the same result as -u-kb-true.', + ); - expect(Locale.parse('en-t-hi'), isNot(Locale.parse('en-t-hi-h0-hybrid'))); - expect(Locale.parse('en-t-hi').hashCode, - isNot(Locale.parse('en-t-hi-h0-hybrid').hashCode)); + expect( + Locale.parse('en-t-hi'), + isNot(Locale.parse('en-t-hi-h0-hybrid')), + ); + expect( + Locale.parse('en-t-hi').hashCode, + isNot(Locale.parse('en-t-hi-h0-hybrid').hashCode), + ); }); } From 2b30a87a99183ceb151ac48f12adf4f6495a9c1e Mon Sep 17 00:00:00 2001 From: Hugo van der Merwe Date: Thu, 11 Oct 2018 18:06:10 +0200 Subject: [PATCH 08/20] PR review fixes: asserts, static method, documentation. --- lib/ui/window.dart | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/lib/ui/window.dart b/lib/ui/window.dart index a479e001ebcf0..7c237d122c553 100644 --- a/lib/ui/window.dart +++ b/lib/ui/window.dart @@ -198,12 +198,10 @@ class Locale { collection.SplayTreeSet variants, collection.SplayTreeMap extensions, }) : assert(language != null), - assert(language.length >= 2), - assert(language.length <= 8), + assert(language.length >= 2 && language.length <= 8), assert(language.length != 4), - assert((script ?? 'Xxxx').length == 4), - assert((region ?? 'XX').length >= 2), - assert((region ?? 'XX').length <= 3), + assert(script == null || script.length == 4), + assert(region == null || (region.length >= 2 && region.length <= 3)), _languageCode = language, _scriptCode = script, _countryCode = region, @@ -218,7 +216,7 @@ class Locale { /// details. /// /// TODO: support parsing all BCP 47 tags. - factory Locale.parse(String localeId) { + static Locale parse(String localeId) { assert(localeId != null); localeId = localeId.toLowerCase(); if (localeId == 'root') @@ -244,7 +242,7 @@ class Locale { problems.add('"${localeSubtags[0]}" is an invalid language subtag'); } if (localeSubtags.isNotEmpty && _reScript.hasMatch(localeSubtags[0])) { - String s = localeSubtags.removeAt(0); + final String s = localeSubtags.removeAt(0); script = s.substring(0, 1).toUpperCase() + s.substring(1).toLowerCase(); } if (localeSubtags.isNotEmpty && _reRegion.hasMatch(localeSubtags[0])) { @@ -410,14 +408,16 @@ class Locale { /// Locale('he')` and `const Locale('iw')` are equal, and both have the /// [languageCode] `he`, because `iw` is a deprecated language subtag that was /// replaced by the subtag `he`. + /// + /// New deprecations in the registry are not automatically picked up by this + /// library, so this class will not make such changes for deprecations that + /// are too recent. String get languageCode => _replaceDeprecatedLanguageSubtag(_languageCode); final String _languageCode; // Replaces deprecated language subtags. // // The subtag must already be lowercase. - // - // This method's switch statement periodically needs a manual update. static String _replaceDeprecatedLanguageSubtag(String languageCode) { // This switch statement is generated by //flutter/tools/gen_locale.dart // Mappings generated for language subtag registry as of 2018-08-08. @@ -521,14 +521,16 @@ class Locale { /// 'DE')` and `const Locale('de', 'DD')` are equal, and both have the /// [countryCode] `DE`, because `DD` is a deprecated language subtag that was /// replaced by the subtag `DE`. + /// + /// New deprecations in the registry are not automatically picked up by this + /// library, so this class will not make such changes for deprecations that + /// are too recent. String get countryCode => _replaceDeprecatedRegionSubtag(_countryCode); final String _countryCode; // Replaces deprecated region subtags. // // The subtag must already be uppercase. - // - // This method's switch statement periodically needs a manual update. static String _replaceDeprecatedRegionSubtag(String regionCode) { // This switch statement is generated by //flutter/tools/gen_locale.dart // Mappings generated for language subtag registry as of 2018-08-08. From 761c842ad58a3d67d76daf54df8d7a88d2736e6e Mon Sep 17 00:00:00 2001 From: Hugo van der Merwe Date: Thu, 11 Oct 2018 18:27:15 +0200 Subject: [PATCH 09/20] PR review fixes: stop using a SplayTreeSet. Add _fromComponents. --- lib/ui/window.dart | 58 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 41 insertions(+), 17 deletions(-) diff --git a/lib/ui/window.dart b/lib/ui/window.dart index 7c237d122c553..70150d71ac5f2 100644 --- a/lib/ui/window.dart +++ b/lib/ui/window.dart @@ -177,25 +177,26 @@ class Locale { this._variants = null, this._extensions = null; - /// Creates a new Locale object with the specified parts. - /// - /// This is for internal use only. All fields must already be normalized, must - /// already be canonicalized. This method does not modify parameters in any - /// way or do any syntax checking. - /// - /// * language, script and region must be in canonical form. - /// * Iterating over variants must provide variants in alphabetical order. We - /// force this by taking a SplayTreeSet as parameter input, which - /// automatically sorts its items. We then convert to a LinkedHashSet for - /// faster access and iteration. - /// * The extensions map must contain only valid key/value pairs. "u" and "t" - /// keys must be present, with an empty string as value, if there are any - /// subtags for those singletons. + // Creates a new Locale object with the specified parts. + // + // This is for internal use only. All fields must already be normalized, must + // already be canonicalized. This method does not modify parameters in any + // way or do any syntax checking. + // + // * language, script and region must be in normalized form. + // * variants must already be sorted, with each subtag in normalized form. + // * Iterating over variants must provide variants in alphabetical order. We + // force this by taking a SplayTreeSet as parameter input, which + // automatically sorts its items. We then convert to a LinkedHashSet for + // faster access and iteration. + // * The extensions map must contain only valid key/value pairs. "u" and "t" + // keys must be present, with an empty string as value, if there are any + // subtags for those singletons. Locale._internal( String language, { String script, String region, - collection.SplayTreeSet variants, + List variants, collection.SplayTreeMap extensions, }) : assert(language != null), assert(language.length >= 2 && language.length <= 8), @@ -208,6 +209,29 @@ class Locale { _variants = List.unmodifiable(variants ?? const []), _extensions = collection.LinkedHashMap.from(extensions ?? const {}); + // TODO: unit-test this, pick the right name, make it public, update this + // documentation. + // + // TODO: might we prefer constructing with a single string as "variants" + // rather than a list of strings? + // + // TODO: this constructor modifies the list passed to variants.Is this the + // right design? "We mutate what you give us!" feels bad. + // + // Extensions are not yet supported by this constructor, it would require + // first finalizing the design of the extensions map we wish to expose. + factory Locale._fromComponents({ + String language, + String script, + String region, + List variants, + }) { + if (variants != null) { + variants.sort(); + } + return Locale._internal(language, script: script, region: region, variants: variants); + } + /// Parses [Unicode CLDR Locale /// Identifiers](https://www.unicode.org/reports/tr35/#Identifiers). /// @@ -224,8 +248,7 @@ class Locale { final List localeSubtags = localeId.split(_reSep); String language, script, region; - final collection.SplayTreeSet variants = - collection.SplayTreeSet(); + final List variants = []; // Using a SplayTreeMap for its automatic key sorting. final collection.SplayTreeMap extensions = collection.SplayTreeMap(); @@ -274,6 +297,7 @@ class Locale { '${problems.join("; ")}.'); } + variants.sort(); return Locale._internal( language, script: script, From 825b0e90526613cf530d56be8e357c57d87810f8 Mon Sep 17 00:00:00 2001 From: Hugo van der Merwe Date: Tue, 16 Oct 2018 17:53:28 +0200 Subject: [PATCH 10/20] Drop LocaleParseException. --- lib/ui/window.dart | 13 ++----------- testing/dart/locale_test.dart | 33 +++++++++++---------------------- 2 files changed, 13 insertions(+), 33 deletions(-) diff --git a/lib/ui/window.dart b/lib/ui/window.dart index 70150d71ac5f2..08e3e926a9082 100644 --- a/lib/ui/window.dart +++ b/lib/ui/window.dart @@ -116,15 +116,6 @@ class WindowPadding { } } -class LocaleParseException implements Exception { - LocaleParseException(this._message); - - String _message; - - @override - String toString() => 'LocaleParseException: $_message'; -} - /// An identifier used to select a user's language and formatting preferences. /// This implements Unicode Locale Identifiers as defined by [Unicode /// LDML](https://www.unicode.org/reports/tr35/). @@ -293,8 +284,8 @@ class Locale { _parseExtensions(localeSubtags, extensions, problems); if (problems.isNotEmpty) { - throw LocaleParseException('Locale Identifier $localeId is invalid: ' - '${problems.join("; ")}.'); + throw FormatException('Locale Identifier $localeId is invalid: ' + '${problems.join("; ")}.'); } variants.sort(); diff --git a/testing/dart/locale_test.dart b/testing/dart/locale_test.dart index b905544c7edeb..a59e93d6a66ba 100644 --- a/testing/dart/locale_test.dart +++ b/testing/dart/locale_test.dart @@ -274,33 +274,28 @@ void main() { // effort" locale? test('Strict parsing examples.', () { expect( - () => Locale.parse('nl-u-x'), - throwsA(TypeMatcher()), + () => Locale.parse('nl-u-x'), throws, reason: 'With no "u" attributes there should be no "u" singleton. ' 'Could be lenient: "nl-x".', ); expect( - () => Locale.parse('fr-t-x'), - throwsA(TypeMatcher()), + () => Locale.parse('fr-t-x'), throws, reason: 'With no "t" attributes there should be no "t" singleton. ' 'Could be lenient: "fr-x".', ); expect( - () => Locale.parse('it-u-nu-romanlow-a-u-attr'), - throwsA(TypeMatcher()), + () => Locale.parse('it-u-nu-romanlow-a-u-attr'), throws, reason: 'Duplicate "u" singletons could be merged if only one has ' 'attributes. Could be lenient: "it-a-u-attr-nu-romanlow".', ); expect( - () => Locale.parse('pl-t-cs-b-t-h0-hybrid'), - throwsA(TypeMatcher()), + () => Locale.parse('pl-t-cs-b-t-h0-hybrid'), throws, reason: 'Duplicate "t" singletons could be merged if only one has ' 'tlang specified. Could be lenient: "pl-b-t-cs-h0-hybrid".', ); expect( - () => Locale.parse('ro-t-hu-HU-cu-ron'), - throwsA(TypeMatcher()), + () => Locale.parse('ro-t-hu-HU-cu-ron'), throws, reason: 'U-extension keywords misplaced under the -t- singleton ' 'could be moved if unambiguous enough. ' 'Could be lenient: "ro-t-hu-hu-u-cu-ron".', @@ -315,15 +310,13 @@ void main() { ); expect( - () => Locale.parse('pt-BR-u-h0-hybrid-t-pt-PT'), - throwsA(TypeMatcher()), + () => Locale.parse('pt-BR-u-h0-hybrid-t-pt-PT'), throws, reason: 'T-extension "tfields" misplaced under the U-extension. ' 'Could be lenient: "pt-BR-t-pt-pt-h0-hybrid".', ); expect( - () => Locale.parse('pl-t-h0'), - throwsA(TypeMatcher()), + () => Locale.parse('pl-t-h0'), throws, reason: 'Locale tag pl-t-h0 is not spec compliant. How to best fix it ' 'is unclear: it is underspecified.', ); @@ -331,8 +324,7 @@ void main() { test('Locale.parse(): invalid identifiers.', () { expect( - () => Locale.parse('a'), - throwsA(TypeMatcher()), + () => Locale.parse('a'), throws, reason: 'One character language subtags are invalid.', ); expect( @@ -348,18 +340,15 @@ void main() { 'language subtag can be skipped if a script is specified.', ); expect( - () => Locale.parse('abcdefghi'), - throwsA(TypeMatcher()), + () => Locale.parse('abcdefghi'), throws, reason: 'Language subtags may not be more than 8 characters.', ); expect( - () => Locale.parse(r'e$'), - throwsA(TypeMatcher()), + () => Locale.parse(r'e$'), throws, reason: 'Invalid character for language subtag, only a-z allowed.', ); expect( - () => Locale.parse('fr-RU-Hant'), - throwsA(TypeMatcher()), + () => Locale.parse('fr-RU-Hant'), throws, reason: 'Swapping region and script is not allowed.', ); }); From 75ed9b0a0ed7639f88751f262fe1d285c569ed49 Mon Sep 17 00:00:00 2001 From: Hugo van der Merwe Date: Tue, 23 Oct 2018 13:09:28 +0200 Subject: [PATCH 11/20] Stop using SplayTreeMap too. Improve operator== and hashCode. Make other adjustments to have operator== and hashCode work correctly. --- lib/ui/hash_codes.dart | 15 +++++ lib/ui/window.dart | 103 ++++++++++++++++++++-------------- testing/dart/locale_test.dart | 12 ++-- 3 files changed, 84 insertions(+), 46 deletions(-) diff --git a/lib/ui/hash_codes.dart b/lib/ui/hash_codes.dart index bece69b0887e9..16e58f78d35f9 100644 --- a/lib/ui/hash_codes.dart +++ b/lib/ui/hash_codes.dart @@ -119,3 +119,18 @@ int hashList(Iterable arguments) { } return _Jenkins.finish(result); } + +/// Combine the [Object.hashCode] values of an arbitrary number of from +/// an [Iterable] into one value. This function will return the same value if +/// given null as if given an empty list. +int hashMap(Map map) { + int result = 0; + if (map != null) { + // FIXME: sort the keys for deterministic iteration order. + for (MapEntry entry in map.entries) { + result = _Jenkins.combine(result, entry.key); + result = _Jenkins.combine(result, entry.value); + } + } + return _Jenkins.finish(result); +} diff --git a/lib/ui/window.dart b/lib/ui/window.dart index 08e3e926a9082..a91d989f018bd 100644 --- a/lib/ui/window.dart +++ b/lib/ui/window.dart @@ -176,19 +176,16 @@ class Locale { // // * language, script and region must be in normalized form. // * variants must already be sorted, with each subtag in normalized form. - // * Iterating over variants must provide variants in alphabetical order. We - // force this by taking a SplayTreeSet as parameter input, which - // automatically sorts its items. We then convert to a LinkedHashSet for - // faster access and iteration. + // * Iterating over variants must provide variants in alphabetical order. // * The extensions map must contain only valid key/value pairs. "u" and "t" // keys must be present, with an empty string as value, if there are any - // subtags for those singletons. + // subtags for those singletons. Takes ownership of the LinkedHashMap. Locale._internal( String language, { String script, String region, List variants, - collection.SplayTreeMap extensions, + collection.LinkedHashMap extensions, }) : assert(language != null), assert(language.length >= 2 && language.length <= 8), assert(language.length != 4), @@ -197,8 +194,8 @@ class Locale { _languageCode = language, _scriptCode = script, _countryCode = region, - _variants = List.unmodifiable(variants ?? const []), - _extensions = collection.LinkedHashMap.from(extensions ?? const {}); + _variants = (variants != null && variants.length > 0) ? List.unmodifiable(variants) : null, + _extensions = (extensions != null && extensions.isNotEmpty) ? extensions : null; // TODO: unit-test this, pick the right name, make it public, update this // documentation. @@ -240,9 +237,8 @@ class Locale { final List localeSubtags = localeId.split(_reSep); String language, script, region; final List variants = []; - // Using a SplayTreeMap for its automatic key sorting. - final collection.SplayTreeMap extensions = - collection.SplayTreeMap(); + final collection.LinkedHashMap extensions = + collection.LinkedHashMap(); final List problems = []; if (_reLanguage.hasMatch(localeSubtags[0])) { @@ -300,11 +296,12 @@ class Locale { // * All subtags in localeSubtags must already be lowercase. // - // * extensions must be a map with sorted iteration order, SplayTreeMap takes - // care of that for us. + // * extensions must be a map with sorted iteration order. LinkedHashMap + // preserves order for us. static void _parseExtensions(List localeSubtags, - collection.SplayTreeMap extensions, + collection.LinkedHashMap extensions, List problems) { + Map ext = {}; while (localeSubtags.isNotEmpty) { final String singleton = localeSubtags.removeAt(0); if (singleton == 'u') { @@ -318,8 +315,8 @@ class Locale { if (attributes.isNotEmpty) { empty = false; } - if (!extensions.containsKey(singleton)) { - extensions[singleton] = attributes.join('-'); + if (!ext.containsKey(singleton)) { + ext[singleton] = attributes.join('-'); } else { problems.add('duplicate singleton: "$singleton"'); } @@ -333,8 +330,12 @@ class Locale { _reValueSubtags.hasMatch(localeSubtags[0])) { typeParts.add(localeSubtags.removeAt(0)); } - if (!extensions.containsKey(key)) { - extensions[key] = typeParts.join('-'); + if (!ext.containsKey(key)) { + if (typeParts.length == 1 && typeParts[0] == 'true') { + ext[key] = ''; + } else { + ext[key] = typeParts.join('-'); + } } else { problems.add('duplicate key: $key'); } @@ -357,8 +358,8 @@ class Locale { tlang.add(localeSubtags.removeAt(0)); } } - if (!extensions.containsKey(singleton)) { - extensions[singleton] = tlang.join('-'); + if (!ext.containsKey(singleton)) { + ext[singleton] = tlang.join('-'); } else { problems.add('duplicate singleton: "$singleton"'); } @@ -371,8 +372,8 @@ class Locale { } if (tvalueParts.isNotEmpty) { empty = false; - if (!extensions.containsKey(tkey)) { - extensions[tkey] = tvalueParts.join('-'); + if (!ext.containsKey(tkey)) { + ext[tkey] = tvalueParts.join('-'); } else { problems.add('duplicate key: $tkey'); } @@ -387,7 +388,7 @@ class Locale { while (localeSubtags.isNotEmpty && _reAllSubtags.hasMatch(localeSubtags[0])) { values.add(localeSubtags.removeAt(0)); } - extensions[singleton] = values.join('-'); + ext[singleton] = values.join('-'); if (localeSubtags.isNotEmpty) { problems.add('invalid part of private use subtags: "${localeSubtags.join('-')}"'); } @@ -398,8 +399,8 @@ class Locale { while (localeSubtags.isNotEmpty && _reOtherSubtags.hasMatch(localeSubtags[0])) { values.add(localeSubtags.removeAt(0)); } - if (!extensions.containsKey(singleton)) { - extensions[singleton] = values.join('-'); + if (!ext.containsKey(singleton)) { + ext[singleton] = values.join('-'); } else { problems.add('duplicate singleton: "$singleton"'); } @@ -407,6 +408,11 @@ class Locale { problems.add('invalid subtag, should be singleton: "$singleton"'); } } + List ks = ext.keys.toList(); + ks.sort(); + for (String k in ks) { + extensions[k] = ext[k]; + } } /// The primary language subtag for the locale. @@ -595,6 +601,9 @@ class Locale { // value. final collection.LinkedHashMap _extensions; + // TMP FIXME + collection.LinkedHashMap get myexthelper => _extensions; + // Produces the Unicode BCP47 Locale Identifier for this locale. // // If the unnamed constructor was used with bad parameters, the result might @@ -733,25 +742,37 @@ class Locale { return languageCode == typedOther.languageCode && countryCode == typedOther.countryCode && scriptCode == typedOther.scriptCode - && variants.join('-') == typedOther.variants.join('-') - && _extensionsToString(_extensions) == _extensionsToString(typedOther._extensions); + && _listEquals(_variants, typedOther._variants) + && _mapEquals(_extensions, typedOther._extensions); } - @override - int get hashCode { - int result = 373; - result = 37 * result + languageCode.hashCode; - if (_scriptCode != null) - result = 37 * result + scriptCode.hashCode; - if (_countryCode != null) - result = 37 * result + countryCode.hashCode; - if (_variants != null && _variants.isNotEmpty) - result = 37 * result + variants.join('-').hashCode; - if (_extensions != null && _extensions.isNotEmpty) - result = 37 * result + _extensionsToString(_extensions).hashCode; - return result; + bool _listEquals(List a, List b) { + if (a == null) + return b == null; + if (b == null || a.length != b.length) + return false; + for (int index = 0; index < a.length; index += 1) { + if (a[index] != b[index]) + return false; + } + return true; } + bool _mapEquals(Map a, Map b) { + if (a == null) + return b == null; + if (b == null || a.length != b.length) + return false; + for (T1 k in a.keys) { + if (a[k] != b[k]) + return false; + } + return true; + } + + @override + int get hashCode => hashValues(languageCode, scriptCode, countryCode, hashList(_variants), hashMap(_extensions)); + /// Produces a non-BCP47 Unicode Locale Identifier for this locale. /// /// This Locale Identifier uses underscores as separator for historical @@ -765,7 +786,7 @@ class Locale { // unnamed constructor should be able to create instances like these. identifier = '${languageCode}_'; } - return identifier; + return '$identifier'; } } diff --git a/testing/dart/locale_test.dart b/testing/dart/locale_test.dart index a59e93d6a66ba..26c665c4e3ad2 100644 --- a/testing/dart/locale_test.dart +++ b/testing/dart/locale_test.dart @@ -403,15 +403,17 @@ void main() { isNot(Locale.parse('en-u-nu-roman').hashCode), ); + Locale a = Locale.parse('en-u-kb'); + Locale b = Locale.parse('en-u-kb-true'); expect( - Locale.parse('en-u-kb'), - Locale.parse('en-u-kb-true'), + a, + b, reason: '-u-kb should parse to the same result as -u-kb-true.', ); expect( - Locale.parse('en-u-kb').hashCode, - Locale.parse('en-u-kb-true').hashCode, - reason: '-u-kb should parse to the same result as -u-kb-true.', + a.hashCode, + b.hashCode, + reason: '-u-kb should parse to the same result as -u-kb-true. ${a.myexthelper}, ${b.myexthelper}.', ); expect( From 3fb5b56700cd181f26c6da1afb9182ffb05757ec Mon Sep 17 00:00:00 2001 From: Hugo van der Merwe Date: Mon, 29 Oct 2018 13:30:41 +0100 Subject: [PATCH 12/20] WIP: drop Regex: drop toLanguageTag-specific regexp. --- lib/ui/window.dart | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/lib/ui/window.dart b/lib/ui/window.dart index a91d989f018bd..b8fce1eb8c9ea 100644 --- a/lib/ui/window.dart +++ b/lib/ui/window.dart @@ -642,24 +642,25 @@ class Locale { final StringBuffer uOut = StringBuffer(); final StringBuffer tOut = StringBuffer(); final StringBuffer result = StringBuffer(); - final StringBuffer resultVWYZX = StringBuffer(); + final StringBuffer resultVWYZ = StringBuffer(); for (MapEntry entry in extensions.entries) { if (entry.key.length == 1) { - if (RegExp(r'^[a-s]$').hasMatch(entry.key)) { + int letter = entry.key.codeUnitAt(0) - 0x61; + if (letter < 0 || letter >= 26) { + throw UnimplementedError('Unexpected key in extensions map: $entry'); + } else if (letter < 19) { // 'a' to 's' (19th letter) result.write('-${entry.key}'); if (entry.value.isNotEmpty) result.write('-${entry.value}'); - } else if (entry.key == 't') { + } else if (letter == 19) { // t tLang = entry.value; - } else if (entry.key == 'u') { + } else if (letter == 20) { // u uAttributes = entry.value; - } else if (RegExp(r'^[vwyz]$').hasMatch(entry.key)) { - resultVWYZX.write('-${entry.key}'); + } else if (letter != 23) { // not x: vwyz + resultVWYZ.write('-${entry.key}'); if (entry.value.isNotEmpty) - resultVWYZX.write('-${entry.value}'); - } else if (entry.key != 'x') { - throw UnimplementedError('Unexpected key in extensions map: $entry'); + resultVWYZ.write('-${entry.value}'); } } else if (_reKey.hasMatch(entry.key)) { // unicode_locale_extensions @@ -692,8 +693,8 @@ class Locale { result.write('-$uAttributes'); result.write(uOut.toString()); } - if (resultVWYZX.isNotEmpty) - result.write(resultVWYZX.toString()); + if (resultVWYZ.isNotEmpty) + result.write(resultVWYZ.toString()); if (extensions.containsKey('x')) { result.write('-x'); if (extensions['x'].isNotEmpty) { From aab141c056480e643b42a3539c311334dd16e233 Mon Sep 17 00:00:00 2001 From: Hugo van der Merwe Date: Mon, 29 Oct 2018 22:04:45 +0100 Subject: [PATCH 13/20] Replace all regexp's, except _reSep which is needed for String.split(). --- lib/ui/window.dart | 186 +++++++++++++++++++++++++++++++++------------ 1 file changed, 137 insertions(+), 49 deletions(-) diff --git a/lib/ui/window.dart b/lib/ui/window.dart index b8fce1eb8c9ea..d0e22967627fe 100644 --- a/lib/ui/window.dart +++ b/lib/ui/window.dart @@ -220,6 +220,8 @@ class Locale { return Locale._internal(language, script: script, region: region, variants: variants); } + static final RegExp _reSep = RegExp(r'[-_]'); + /// Parses [Unicode CLDR Locale /// Identifiers](https://www.unicode.org/reports/tr35/#Identifiers). /// @@ -241,9 +243,13 @@ class Locale { collection.LinkedHashMap(); final List problems = []; - if (_reLanguage.hasMatch(localeSubtags[0])) { + if (_isAlphabetic(localeSubtags[0], 2, 8) + && localeSubtags[0].length != 4) { + // language subtag: r'^[a-zA-Z]{2,3}$|^[a-zA-Z]{5,8}$' language = _replaceDeprecatedLanguageSubtag(localeSubtags.removeAt(0)); - } else if (_reScript.hasMatch(localeSubtags[0])) { + } else if (_isAlphabetic(localeSubtags[0], 4, 4)) { + // First subtag is already a script subtag. + // // Identifiers without language subtags aren't valid BCP 47 tags and // therefore not intended for general interchange, however they do match // the LDML spec. @@ -251,14 +257,19 @@ class Locale { } else { problems.add('"${localeSubtags[0]}" is an invalid language subtag'); } - if (localeSubtags.isNotEmpty && _reScript.hasMatch(localeSubtags[0])) { + if (localeSubtags.isNotEmpty + && _isAlphabetic(localeSubtags[0], 4, 4)) { + // script subtag: r'^[a-zA-Z]{4}$' final String s = localeSubtags.removeAt(0); script = s.substring(0, 1).toUpperCase() + s.substring(1).toLowerCase(); } - if (localeSubtags.isNotEmpty && _reRegion.hasMatch(localeSubtags[0])) { + if (localeSubtags.isNotEmpty + && (_isAlphabetic(localeSubtags[0], 2, 2) + || _isNumeric(localeSubtags[0], 3, 3))) { + // region subtag: r'^[a-zA-Z]{2}$|^[0-9]{3}$'; region = localeSubtags.removeAt(0).toUpperCase(); } - while (localeSubtags.isNotEmpty && _reVariant.hasMatch(localeSubtags[0])) { + while (localeSubtags.isNotEmpty && _isVariantSubtag(localeSubtags[0])) { variants.add(localeSubtags.removeAt(0)); } @@ -308,8 +319,8 @@ class Locale { bool empty = true; // unicode_locale_extensions: collect "(sep attribute)+" attributes. final List attributes = []; - while (localeSubtags.isNotEmpty && - _reValueSubtags.hasMatch(localeSubtags[0])) { + while (localeSubtags.isNotEmpty + && _isAlphaNumeric(localeSubtags[0], 3, 8)) { attributes.add(localeSubtags.removeAt(0)); } if (attributes.isNotEmpty) { @@ -321,13 +332,13 @@ class Locale { problems.add('duplicate singleton: "$singleton"'); } // unicode_locale_extensions: collect "(sep keyword)*". - while (localeSubtags.isNotEmpty && - _reKey.hasMatch(localeSubtags[0])) { + while (localeSubtags.isNotEmpty + && _isUExtensionKey(localeSubtags[0])) { empty = false; final String key = localeSubtags.removeAt(0); final List typeParts = []; - while (localeSubtags.isNotEmpty && - _reValueSubtags.hasMatch(localeSubtags[0])) { + while (localeSubtags.isNotEmpty + && _isAlphaNumeric(localeSubtags[0], 3, 8)) { typeParts.add(localeSubtags.removeAt(0)); } if (!ext.containsKey(key)) { @@ -347,14 +358,22 @@ class Locale { bool empty = true; // transformed_extensions: grab tlang if it exists. final List tlang = []; - if (localeSubtags.isNotEmpty && _reLanguage.hasMatch(localeSubtags[0])) { + if (localeSubtags.isNotEmpty + && _isAlphabetic(localeSubtags[0], 2, 8) + && localeSubtags[0].length != 4) { + // language subtag empty = false; tlang.add(localeSubtags.removeAt(0)); - if (localeSubtags.isNotEmpty && _reScript.hasMatch(localeSubtags[0])) + if (localeSubtags.isNotEmpty + && _isAlphabetic(localeSubtags[0], 4, 4)) + // script subtag tlang.add(localeSubtags.removeAt(0)); - if (localeSubtags.isNotEmpty && _reRegion.hasMatch(localeSubtags[0])) + if (localeSubtags.isNotEmpty + && (_isAlphabetic(localeSubtags[0], 2, 2) + || _isNumeric(localeSubtags[0], 3, 3))) + // region subtag: r'^[a-zA-Z]{2}$|^[0-9]{3}$'; tlang.add(localeSubtags.removeAt(0)); - while (localeSubtags.isNotEmpty && _reVariant.hasMatch(localeSubtags[0])) { + while (localeSubtags.isNotEmpty && _isVariantSubtag(localeSubtags[0])) { tlang.add(localeSubtags.removeAt(0)); } } @@ -364,10 +383,11 @@ class Locale { problems.add('duplicate singleton: "$singleton"'); } // transformed_extensions: collect "(sep tfield)*". - while (localeSubtags.isNotEmpty && _reTkey.hasMatch(localeSubtags[0])) { + while (localeSubtags.isNotEmpty && _isTExtensionKey(localeSubtags[0])) { final String tkey = localeSubtags.removeAt(0); final List tvalueParts = []; - while (localeSubtags.isNotEmpty && _reValueSubtags.hasMatch(localeSubtags[0])) { + while (localeSubtags.isNotEmpty + && _isAlphaNumeric(localeSubtags[0], 3, 8)) { tvalueParts.add(localeSubtags.removeAt(0)); } if (tvalueParts.isNotEmpty) { @@ -385,7 +405,8 @@ class Locale { } else if (singleton == 'x') { // pu_extensions final List values = []; - while (localeSubtags.isNotEmpty && _reAllSubtags.hasMatch(localeSubtags[0])) { + while (localeSubtags.isNotEmpty + && _isAlphaNumeric(localeSubtags[0], 1, 8)) { values.add(localeSubtags.removeAt(0)); } ext[singleton] = values.join('-'); @@ -393,10 +414,11 @@ class Locale { problems.add('invalid part of private use subtags: "${localeSubtags.join('-')}"'); } break; - } else if (_reSingleton.hasMatch(singleton)) { + } else if (_isAlphabetic(singleton, 1, 1)) { // other_extensions final List values = []; - while (localeSubtags.isNotEmpty && _reOtherSubtags.hasMatch(localeSubtags[0])) { + while (localeSubtags.isNotEmpty + && _isAlphaNumeric(localeSubtags[0], 2, 8)) { values.add(localeSubtags.removeAt(0)); } if (!ext.containsKey(singleton)) { @@ -583,7 +605,7 @@ class Locale { // Keys in this ordered map must be sorted, and both keys and values must all // be lowercase: constructors must construct it as such. // - // This map simultaneously reprents T-extensions, U-extensions, other + // This map simultaneously represents T-extensions, U-extensions, other // extensions and the private use extensions. Implementation detailsf: // // * The 't' entry represents the optional "tlang" identifier of the T @@ -593,9 +615,10 @@ class Locale { // must be sorted in alphabetical order, separated by hyphens, and be // lowercase. If the U Extension is present but has no attributes, the 'u' // map entry's value must be an empty string. - // * U-Extension keyword keys, matching _reKey, and T-Extension fields in the - // map, whos field separator subtags match _reTkey, are directly entered - // into this map. (These regular expressions don't match the same keys.) + // * U-Extension keyword keys and T-Extension fields in the map are directly + // entered into this map: their syntax are enough to distinguish + // them. (_isUExtensionKey and _isTExtensionKey private methods check this + // syntax.) // * Other singletons are entered directly into the map, with all // values/attributes associated with that singleton as the map entry's // value. @@ -608,6 +631,7 @@ class Locale { // // If the unnamed constructor was used with bad parameters, the result might // not be standards-compliant. + // https://www.unicode.org/reports/tr35/#Unicode_locale_identifier // // FIXME/TODO: this must not be submitted as a public function until we've // made a final decision on toString() behaviour. @@ -646,7 +670,7 @@ class Locale { for (MapEntry entry in extensions.entries) { if (entry.key.length == 1) { - int letter = entry.key.codeUnitAt(0) - 0x61; + int letter = entry.key.codeUnitAt(0) - 0x61; // Subtracting 'a' if (letter < 0 || letter >= 26) { throw UnimplementedError('Unexpected key in extensions map: $entry'); } else if (letter < 19) { // 'a' to 's' (19th letter) @@ -662,14 +686,14 @@ class Locale { if (entry.value.isNotEmpty) resultVWYZ.write('-${entry.value}'); } - } else if (_reKey.hasMatch(entry.key)) { + } else if (_isUExtensionKey(entry.key)) { // unicode_locale_extensions if (entry.value == 'true' || entry.value == '') { uOut.write('-${entry.key}'); } else { uOut.write('-${entry.key}-${entry.value}'); } - } else if (_reTkey.hasMatch(entry.key)) { + } else if (_isTExtensionKey(entry.key)) { // transformed_extensions tOut.write('-${entry.key}'); // TODO: this is not standards compliant. What do we want to do with @@ -704,30 +728,94 @@ class Locale { return result.toString(); } - // Unicode Language Identifier subtags - // TODO/WIP: because we lowercase Locale Identifiers before parsing, typical - // use of these regexps don't actually need to atch capitals too. - static final RegExp _reSingleton = RegExp(r'^[a-zA-Z]$'); + // Returns true if s is a string made of lower-case alphabetic characters + // (a-z) or digits (0-9), with specified min and max lengths. + // + // Benchmarks show that doing this with a lookup map is faster. This function + // chooses to not keep the needed 256 byte string around though. + static bool _isAlphaNumeric(String s, int minLength, int maxLength) { + if (s.length < minLength || s.length > maxLength) + return false; + for (int i = 0; i < s.length; i++) { + int char = s.codeUnitAt(i); + // 0-9: 0x30-0x39. + if (char ^ 0x30 <= 9) + continue; + // a-z: 0x61-0x7A + if ((char - 0x61) & 0xFFFF >= 26) + return false; + } + return true; + } - // (https://www.unicode.org/reports/tr35/#Unicode_language_identifier). - static final RegExp _reLanguage = RegExp(r'^[a-zA-Z]{2,3}$|^[a-zA-Z]{5,8}$'); - static final RegExp _reScript = RegExp(r'^[a-zA-Z]{4}$'); - static final RegExp _reRegion = RegExp(r'^[a-zA-Z]{2}$|^[0-9]{3}$'); - static final RegExp _reVariant = RegExp(r'^[a-zA-Z0-9]{5,8}$|^[0-9][a-zA-Z0-9]{3}$'); - static final RegExp _reSep = RegExp(r'[-_]'); + // Returns true if s is a purely lower-case alphabetic (a-z) string with + // specified min and max lengths. + static bool _isAlphabetic(String s, int minLength, int maxLength) { + if (s.length < minLength || s.length > maxLength) + return false; + for (int i = 0; i < s.length; i++) { + int char = s.codeUnitAt(i); + // a-z: 0x61-0x7A + if ((char - 0x61) & 0xFFFF >= 26) + return false; + } + return true; + } + + // Returns true if s is a string consisting of 3 digits (0-9). + static bool _isNumeric(String s, int minLength, int maxLength) { + if (s.length < minLength || s.length > maxLength) + return false; + for (int i = 0; i < s.length; i++) { + // codeUnitAt returns a 16-bit number. Dart's default implementation has + // 64-bit ints, so this will be a positive number. + int char = s.codeUnitAt(i); + // 0-9: 0x30-0x39. + if (char ^ 0x30 > 9) + return false; + } + return true; + } - // Covers all subtags possible in Unicode Locale Identifiers, used for - // pu_extensions. - static final RegExp _reAllSubtags = RegExp(r'^[a-zA-Z0-9]{1,8}$'); - // Covers all subtags within a particular extension, used for other_extensions. - static final RegExp _reOtherSubtags = RegExp(r'^[a-zA-Z0-9]{2,8}$'); - // Covers "attribute" and "type" from unicode_locale_extensions, and "tvalue" in - // transformed_extensions. - // (https://www.unicode.org/reports/tr35/#Unicode_locale_identifier). - static final RegExp _reValueSubtags = RegExp(r'^[a-zA-Z0-9]{3,8}$'); - - static final RegExp _reKey = RegExp('^[a-zA-Z0-9][a-zA-Z]\$'); - static final RegExp _reTkey = RegExp('^[a-zA-Z][0-9]\$'); + // Checks that the specified string matches the variant subtag syntax. Does + // not check the list of valid subtags! + // + // r'^[a-zA-Z0-9]{5,8}$|^[0-9][a-zA-Z0-9]{3}$' + static bool _isVariantSubtag(String s) { + if (!_isAlphaNumeric(s, 4, 8)) + return false; + if (s.length == 4 && !_isNumeric(s[0], 1, 1)) + return false; + return true; + } + + // Checks that the specified string matches the syntax of U extension + // keys. Does not check that it is a valid key! + // + // r'^[a-zA-Z0-9][a-zA-Z]\$' + static bool _isUExtensionKey(String s) { + if (s.length != 2) + return false; + if (!_isAlphaNumeric(s[0], 1, 1)) + return false; + if (!_isAlphabetic(s[1], 1, 1)) + return false; + return true; + } + + // Checks that the specified string matches the syntax of T extension + // keys. Does not check that it is a valid key! + // + // r'^[a-zA-Z][0-9]\$' + static bool _isTExtensionKey(String s) { + if (s.length != 2) + return false; + if (!_isAlphabetic(s[0], 1, 1)) + return false; + if (!_isNumeric(s[1], 1, 1)) + return false; + return true; + } @override bool operator ==(dynamic other) { From 44e0939a4804d57184c2d71b6260f12baad149bc Mon Sep 17 00:00:00 2001 From: Hugo van der Merwe Date: Tue, 30 Oct 2018 17:16:13 +0100 Subject: [PATCH 14/20] fixup! Merge branch 'master' into unicodebcp47 --- lib/ui/window.dart | 106 ++++++++++++++++++--------------------------- 1 file changed, 42 insertions(+), 64 deletions(-) diff --git a/lib/ui/window.dart b/lib/ui/window.dart index 647f7adb0796d..2af6f36b236ea 100644 --- a/lib/ui/window.dart +++ b/lib/ui/window.dart @@ -119,8 +119,7 @@ class WindowPadding { /// An identifier used to select a user's language and formatting preferences. /// /// This represents a [Unicode Locale -/// Identifier](https://www.unicode.org/reports/tr35/#Unicode_locale_identifier) -/// (i.e. without Locale extensions), except variants are not supported. +/// Identifier](https://www.unicode.org/reports/tr35/#Unicode_locale_identifier). /// /// Locales are canonicalized according to the "preferred value" entries in the /// [IANA Language Subtag @@ -129,14 +128,10 @@ class WindowPadding { /// both have the [languageCode] `he`, because `iw` is a deprecated language /// subtag that was replaced by the subtag `he`. /// -/// TODO: evalute using CLDR instead of the IANA registry, and determine whether -/// there are more replacement reasons that should be grabbed: -/// https://www.unicode.org/repos/cldr/tags/latest/common/supplemental/supplementalMetadata.xml -/// /// When constructed correctly, instances of this Locale class will produce -/// normalized syntactically valid output, although not necessarily valid (tags -/// are not validated). See constructor and factory method documentation for -/// details. +/// normalized syntactically correct output, although not necessarily valid +/// (because tags are not validated). See constructor and factory method +/// documentation for details. /// /// See also: /// @@ -154,10 +149,17 @@ class Locale { /// ``` /// /// The primary language subtag must not be null. The region subtag is - /// optional. The values are _case sensitive_, and must match the values - /// listed in - /// http://unicode.org/repos/cldr/tags/latest/common/validity/language.xml and - /// http://unicode.org/repos/cldr/tags/latest/common/validity/region.xml. + /// optional. + /// + /// The subtag values are _case sensitive_ and must be one of the valid + /// subtags according to CLDR supplemental data: + /// [language](http://unicode.org/cldr/latest/common/validity/language.xml), + /// [region](http://unicode.org/cldr/latest/common/validity/region.xml). The + /// primary language subtag must be at least two and at most eight lowercase + /// letters, but not four letters. The region region subtag must be two + /// uppercase letters or three digits. See the [Unicode Language + /// Identifier](https://www.unicode.org/reports/tr35/#Unicode_language_identifier) + /// specification. /// /// This method only produces standards-compliant instances if valid language /// and country codes are provided. Deprecated subtags will be replaced, but @@ -200,7 +202,9 @@ class Locale { _languageCode = languageCode, assert(scriptCode != ''), assert(countryCode != ''), - _countryCode = countryCode; + _countryCode = countryCode, + _variants = null, + _extensions = null; // Creates a new Locale object with the specified parts. // @@ -215,45 +219,21 @@ class Locale { // keys must be present, with an empty string as value, if there are any // subtags for those singletons. Takes ownership of the LinkedHashMap. Locale._internal( - String language, { - String script, - String region, + String languageCode, { + this.scriptCode, + String countryCode, List variants, collection.LinkedHashMap extensions, - }) : assert(language != null), - assert(language.length >= 2 && language.length <= 8), - assert(language.length != 4), - assert(script == null || script.length == 4), - assert(region == null || (region.length >= 2 && region.length <= 3)), - _languageCode = language, - _scriptCode = script, - _countryCode = region, - _variants = (variants != null && variants.length > 0) ? List.unmodifiable(variants) : null, + }) : assert(languageCode != null), + assert(languageCode.length >= 2 && languageCode.length <= 8), + assert(languageCode.length != 4), + assert(scriptCode == null || scriptCode.length == 4), + assert(countryCode == null || (countryCode.length >= 2 && countryCode.length <= 3)), + _languageCode = languageCode, + _countryCode = countryCode, + _variants = (variants != null && variants.isNotEmpty) ? List.unmodifiable(variants) : null, _extensions = (extensions != null && extensions.isNotEmpty) ? extensions : null; - // TODO: unit-test this, pick the right name, make it public, update this - // documentation. - // - // TODO: might we prefer constructing with a single string as "variants" - // rather than a list of strings? - // - // TODO: this constructor modifies the list passed to variants.Is this the - // right design? "We mutate what you give us!" feels bad. - // - // Extensions are not yet supported by this constructor, it would require - // first finalizing the design of the extensions map we wish to expose. - factory Locale._fromComponents({ - String language, - String script, - String region, - List variants, - }) { - if (variants != null) { - variants.sort(); - } - return Locale._internal(language, script: script, region: region, variants: variants); - } - static final RegExp _reSep = RegExp(r'[-_]'); /// Parses [Unicode CLDR Locale @@ -332,8 +312,8 @@ class Locale { variants.sort(); return Locale._internal( language, - script: script, - region: region, + scriptCode: script, + countryCode: region, variants: variants, extensions: extensions, ); @@ -346,7 +326,7 @@ class Locale { static void _parseExtensions(List localeSubtags, collection.LinkedHashMap extensions, List problems) { - Map ext = {}; + final Map ext = {}; while (localeSubtags.isNotEmpty) { final String singleton = localeSubtags.removeAt(0); if (singleton == 'u') { @@ -464,8 +444,7 @@ class Locale { problems.add('invalid subtag, should be singleton: "$singleton"'); } } - List ks = ext.keys.toList(); - ks.sort(); + final List ks = ext.keys.toList()..sort(); for (String k in ks) { extensions[k] = ext[k]; } @@ -486,14 +465,14 @@ class Locale { /// [languageCode] `he`, because `iw` is a deprecated language subtag that was /// replaced by the subtag `he`. /// - /// New deprecations in the registry are not automatically picked up by this - /// library, so this class will not make such changes for deprecations that - /// are too recent. - /// /// This must be a valid Unicode Language subtag as listed in [Unicode CLDR /// supplemental /// data](http://unicode.org/cldr/latest/common/validity/language.xml). /// + /// New deprecations in the registry are not automatically picked up by this + /// library, so this class will not make such changes for deprecations that + /// are too recent. + /// /// See also: /// /// * [new Locale.fromSubtags], which describes the conventions for creating @@ -729,7 +708,7 @@ class Locale { for (MapEntry entry in extensions.entries) { if (entry.key.length == 1) { - int letter = entry.key.codeUnitAt(0) - 0x61; // Subtracting 'a' + final int letter = entry.key.codeUnitAt(0) - 0x61; // Subtracting 'a' if (letter < 0 || letter >= 26) { throw UnimplementedError('Unexpected key in extensions map: $entry'); } else if (letter < 19) { // 'a' to 's' (19th letter) @@ -796,7 +775,7 @@ class Locale { if (s.length < minLength || s.length > maxLength) return false; for (int i = 0; i < s.length; i++) { - int char = s.codeUnitAt(i); + final int char = s.codeUnitAt(i); // 0-9: 0x30-0x39. if (char ^ 0x30 <= 9) continue; @@ -813,7 +792,7 @@ class Locale { if (s.length < minLength || s.length > maxLength) return false; for (int i = 0; i < s.length; i++) { - int char = s.codeUnitAt(i); + final int char = s.codeUnitAt(i); // a-z: 0x61-0x7A if ((char - 0x61) & 0xFFFF >= 26) return false; @@ -828,7 +807,7 @@ class Locale { for (int i = 0; i < s.length; i++) { // codeUnitAt returns a 16-bit number. Dart's default implementation has // 64-bit ints, so this will be a positive number. - int char = s.codeUnitAt(i); + final int char = s.codeUnitAt(i); // 0-9: 0x30-0x39. if (char ^ 0x30 > 9) return false; @@ -888,8 +867,8 @@ class Locale { // package:collection. Variants and extensions are rare, so we convert them // to canonical/normalized strings for comparison. return languageCode == typedOther.languageCode - && countryCode == typedOther.countryCode && scriptCode == typedOther.scriptCode + && countryCode == typedOther.countryCode && _listEquals(_variants, typedOther._variants) && _mapEquals(_extensions, typedOther._extensions); } @@ -1362,7 +1341,6 @@ class Window { if (error != null) throw new Exception(error); } - String _sendPlatformMessage(String name, PlatformMessageResponseCallback callback, ByteData data) native 'Window_sendPlatformMessage'; From 5f43846c314f5293dd70b53dda7849bdadbcc6ad Mon Sep 17 00:00:00 2001 From: Hugo van der Merwe Date: Tue, 30 Oct 2018 17:03:59 +0100 Subject: [PATCH 15/20] Make hashMap deterministic by sorting map keys. --- lib/ui/hash_codes.dart | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/ui/hash_codes.dart b/lib/ui/hash_codes.dart index 16e58f78d35f9..72456863ccdab 100644 --- a/lib/ui/hash_codes.dart +++ b/lib/ui/hash_codes.dart @@ -120,16 +120,16 @@ int hashList(Iterable arguments) { return _Jenkins.finish(result); } -/// Combine the [Object.hashCode] values of an arbitrary number of from -/// an [Iterable] into one value. This function will return the same value if -/// given null as if given an empty list. +/// Combine the [Object.hashCode] values of an arbitrary number of key and value +/// objects from a [Map] into one value. This function will return the same +/// value if given null as if given an empty map. int hashMap(Map map) { int result = 0; if (map != null) { - // FIXME: sort the keys for deterministic iteration order. - for (MapEntry entry in map.entries) { - result = _Jenkins.combine(result, entry.key); - result = _Jenkins.combine(result, entry.value); + final List keys = map.keys.toList()..sort(); + for (Object key in keys) { + result = _Jenkins.combine(result, key); + result = _Jenkins.combine(result, map[key]); } } return _Jenkins.finish(result); From b6f9a6cfaa64d68045f4a2816ca6d8812451b6e1 Mon Sep 17 00:00:00 2001 From: Hugo van der Merwe Date: Tue, 30 Oct 2018 20:07:22 +0100 Subject: [PATCH 16/20] Sort variants and _extensions in Locale._internal. --- lib/ui/window.dart | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/lib/ui/window.dart b/lib/ui/window.dart index 2af6f36b236ea..935c30eecf2b0 100644 --- a/lib/ui/window.dart +++ b/lib/ui/window.dart @@ -213,11 +213,11 @@ class Locale { // way or do any syntax checking. // // * language, script and region must be in normalized form. - // * variants must already be sorted, with each subtag in normalized form. - // * Iterating over variants must provide variants in alphabetical order. + // * variants need not be sorted, this constructor performs sorting. Each + // variant subtag should already be in normalized form though. // * The extensions map must contain only valid key/value pairs. "u" and "t" // keys must be present, with an empty string as value, if there are any - // subtags for those singletons. Takes ownership of the LinkedHashMap. + // subtags for those singletons. Locale._internal( String languageCode, { this.scriptCode, @@ -231,8 +231,18 @@ class Locale { assert(countryCode == null || (countryCode.length >= 2 && countryCode.length <= 3)), _languageCode = languageCode, _countryCode = countryCode, - _variants = (variants != null && variants.isNotEmpty) ? List.unmodifiable(variants) : null, - _extensions = (extensions != null && extensions.isNotEmpty) ? extensions : null; + _variants = (variants != null && variants.isNotEmpty) ? List.from(variants) : null, + _extensions = (extensions != null && extensions.isNotEmpty) ? {} : null + { + _variants?.sort(); + if (extensions != null) { + // Insert extensions in sorted order. + final List keys = extensions.keys.toList()..sort(); + for (String key in keys) { + _extensions[key] = extensions[key]; + } + } + } static final RegExp _reSep = RegExp(r'[-_]'); @@ -253,8 +263,7 @@ class Locale { final List localeSubtags = localeId.split(_reSep); String language, script, region; final List variants = []; - final collection.LinkedHashMap extensions = - collection.LinkedHashMap(); + final Map extensions = {}; final List problems = []; if (_isAlphabetic(localeSubtags[0], 2, 8) @@ -309,7 +318,6 @@ class Locale { '${problems.join("; ")}.'); } - variants.sort(); return Locale._internal( language, scriptCode: script, From e75296b95c333bea22e7f855091242719308f0c2 Mon Sep 17 00:00:00 2001 From: Hugo van der Merwe Date: Tue, 30 Oct 2018 20:38:48 +0100 Subject: [PATCH 17/20] Remove unnecessary diffs in locale_test.dart for easier review. --- lib/ui/window.dart | 3 - testing/dart/locale_test.dart | 247 +++++++++++++++------------------- 2 files changed, 107 insertions(+), 143 deletions(-) diff --git a/lib/ui/window.dart b/lib/ui/window.dart index 935c30eecf2b0..3521ed6ce80e5 100644 --- a/lib/ui/window.dart +++ b/lib/ui/window.dart @@ -670,9 +670,6 @@ class Locale { // value. final collection.LinkedHashMap _extensions; - // TMP FIXME - collection.LinkedHashMap get myexthelper => _extensions; - // Produces the Unicode BCP47 Locale Identifier for this locale. // // If the unnamed constructor was used with bad parameters, the result might diff --git a/testing/dart/locale_test.dart b/testing/dart/locale_test.dart index bfb149ae7f760..62ec72fc8bc38 100644 --- a/testing/dart/locale_test.dart +++ b/testing/dart/locale_test.dart @@ -9,61 +9,30 @@ import 'package:test/test.dart'; void main() { test('Locale', () { final Null $null = null; - expect( - const Locale('en').toString(), - 'en', - ); - expect( - const Locale('en'), - new Locale('en', $null), - ); - expect( - const Locale('en').hashCode, - new Locale('en', $null).hashCode, - ); - - expect( - const Locale('en'), - isNot(new Locale('en', '')), + expect(const Locale('en').toString(), 'en'); + expect(const Locale('en'), new Locale('en', $null)); + expect(const Locale('en').hashCode, new Locale('en', $null).hashCode); + expect(const Locale('en'), isNot(new Locale('en', '')), reason: 'Legacy. (The semantic difference between Locale("en") and ' - 'Locale("en", "") is not defined.)', - ); - expect( - const Locale('en').hashCode, - isNot(new Locale('en', '').hashCode), + 'Locale("en", "") is not defined.)'); + expect(const Locale('en').hashCode, isNot(new Locale('en', '').hashCode), reason: 'Legacy. (The semantic difference between Locale("en") and ' - 'Locale("en", "") is not defined.)', - ); - expect( - const Locale('en', 'US').toString(), - 'en_US', + 'Locale("en", "") is not defined.)'); + expect(const Locale('en', 'US').toString(), 'en_US', reason: 'Legacy. en_US is a valid Unicode Locale Identifier, but ' - 'not a valid Unicode BCP47 Locale Identifier.', - ); - expect( - const Locale('en', 'US').toLanguageTag(), - 'en-US', - reason: 'Unicode BCP47 Locale Identifier, as recommended for general ' - 'interchange.', - ); - - expect( - const Locale('iw').toString(), - 'he', - reason: 'The language code for Hebrew was officially changed in 1989.', - ); - expect( - const Locale('iw', 'DD').toString(), - 'he_DE', + 'not a valid Unicode BCP47 Locale Identifier.'); + expect(const Locale('iw').toString(), 'he', + reason: 'The language code for Hebrew was officially changed in 1989.'); + expect(const Locale('iw', 'DD').toString(), 'he_DE', reason: 'Legacy. This is a valid Unicode Locale Identifier, even if ' - 'not a valid Unicode BCP47 Locale Identifier', - ); - expect( - const Locale('iw', 'DD'), - const Locale('he', 'DE'), + 'not a valid Unicode BCP47 Locale Identifier'); + expect(const Locale('iw', 'DD'), const Locale('he', 'DE'), reason: 'The German Democratic Republic ceased to exist in ' - 'October 1990.', - ); + 'October 1990.'); + + expect(const Locale('en', 'US').toLanguageTag(), 'en-US', + reason: 'Unicode BCP47 Locale Identifier, as recommended for general ' + 'interchange.'); }); test('Locale unnamed constructor idiosyncrasies', () { @@ -132,6 +101,32 @@ void main() { ); }); + test('Locale.fromSubtags', () { + expect(const Locale.fromSubtags().languageCode, 'und'); + expect(const Locale.fromSubtags().scriptCode, null); + expect(const Locale.fromSubtags().countryCode, null); + + expect(const Locale.fromSubtags(languageCode: 'en').toString(), 'en'); + expect(const Locale.fromSubtags(languageCode: 'en').languageCode, 'en'); + expect(const Locale.fromSubtags(scriptCode: 'Latn').toString(), 'und_Latn'); + expect(const Locale.fromSubtags(scriptCode: 'Latn').scriptCode, 'Latn'); + expect(const Locale.fromSubtags(countryCode: 'US').toString(), 'und_US'); + expect(const Locale.fromSubtags(countryCode: 'US').countryCode, 'US'); + + expect(Locale.fromSubtags(languageCode: 'es', countryCode: '419').toString(), 'es_419'); + expect(Locale.fromSubtags(languageCode: 'es', countryCode: '419').languageCode, 'es'); + expect(Locale.fromSubtags(languageCode: 'es', countryCode: '419').countryCode, '419'); + + expect(Locale.fromSubtags(languageCode: 'zh', scriptCode: 'Hans', countryCode: 'CN').toString(), 'zh_Hans_CN'); + }); + + test('Locale equality: fromSubtags', () { + expect(Locale.fromSubtags(languageCode: 'en'), + isNot(Locale.fromSubtags(languageCode: 'en', scriptCode: 'Latn'))); + expect(Locale.fromSubtags(languageCode: 'en').hashCode, + isNot(Locale.fromSubtags(languageCode: 'en', scriptCode: 'Latn').hashCode)); + }); + group('Locale.parse():', () { test('languageCode.', () { expect( @@ -352,103 +347,75 @@ void main() { reason: 'Swapping region and script is not allowed.', ); }); - }); - - test('Equality.', () { - expect( - Locale.parse('en'), - isNot(Locale.parse('en-Latn')), - ); - expect( - Locale.parse('en').hashCode, - isNot(Locale.parse('en-Latn').hashCode), - ); - expect( - Locale.parse('en'), - isNot(Locale.parse('en-US')), - ); - expect( - Locale.parse('en').hashCode, - isNot(Locale.parse('en-US').hashCode), - ); - - expect( - Locale.parse('en'), - isNot(Locale.parse('en-fonipa')), - ); - expect( - Locale.parse('en').hashCode, - isNot(Locale.parse('en-fonipa').hashCode), - ); - - expect(Locale.parse('en'), isNot(Locale.parse('en-a'))); - expect( - Locale.parse('en').hashCode, - isNot(Locale.parse('en-a').hashCode), - ); - - expect(Locale.parse('en'), isNot(Locale.parse('en-a'))); - expect( - Locale.parse('en').hashCode, - isNot(Locale.parse('en-a').hashCode), - ); - - expect( - Locale.parse('en-u-attr'), - isNot(Locale.parse('en-u-nu-roman')), - ); - expect( - Locale.parse('en-u-attr').hashCode, - isNot(Locale.parse('en-u-nu-roman').hashCode), - ); + test('Locale equality.', () { + expect( + Locale.parse('en'), + isNot(Locale.parse('en-Latn')), + ); + expect( + Locale.parse('en').hashCode, + isNot(Locale.parse('en-Latn').hashCode), + ); - Locale a = Locale.parse('en-u-kb'); - Locale b = Locale.parse('en-u-kb-true'); - expect( - a, - b, - reason: '-u-kb should parse to the same result as -u-kb-true.', - ); - expect( - a.hashCode, - b.hashCode, - reason: '-u-kb should parse to the same result as -u-kb-true. ${a.myexthelper}, ${b.myexthelper}.', - ); + expect( + Locale.parse('en'), + isNot(Locale.parse('en-US')), + ); + expect( + Locale.parse('en').hashCode, + isNot(Locale.parse('en-US').hashCode), + ); - expect( - Locale.parse('en-t-hi'), - isNot(Locale.parse('en-t-hi-h0-hybrid')), - ); - expect( - Locale.parse('en-t-hi').hashCode, - isNot(Locale.parse('en-t-hi-h0-hybrid').hashCode), - ); - }); + expect( + Locale.parse('en'), + isNot(Locale.parse('en-fonipa')), + ); + expect( + Locale.parse('en').hashCode, + isNot(Locale.parse('en-fonipa').hashCode), + ); - test('Locale.fromSubtags', () { - expect(const Locale.fromSubtags().languageCode, 'und'); - expect(const Locale.fromSubtags().scriptCode, null); - expect(const Locale.fromSubtags().countryCode, null); + expect(Locale.parse('en'), isNot(Locale.parse('en-a'))); + expect( + Locale.parse('en').hashCode, + isNot(Locale.parse('en-a').hashCode), + ); - expect(const Locale.fromSubtags(languageCode: 'en').toString(), 'en'); - expect(const Locale.fromSubtags(languageCode: 'en').languageCode, 'en'); - expect(const Locale.fromSubtags(scriptCode: 'Latn').toString(), 'und_Latn'); - expect(const Locale.fromSubtags(scriptCode: 'Latn').scriptCode, 'Latn'); - expect(const Locale.fromSubtags(countryCode: 'US').toString(), 'und_US'); - expect(const Locale.fromSubtags(countryCode: 'US').countryCode, 'US'); + expect(Locale.parse('en'), isNot(Locale.parse('en-a'))); + expect( + Locale.parse('en').hashCode, + isNot(Locale.parse('en-a').hashCode), + ); - expect(Locale.fromSubtags(languageCode: 'es', countryCode: '419').toString(), 'es_419'); - expect(Locale.fromSubtags(languageCode: 'es', countryCode: '419').languageCode, 'es'); - expect(Locale.fromSubtags(languageCode: 'es', countryCode: '419').countryCode, '419'); + expect( + Locale.parse('en-u-attr'), + isNot(Locale.parse('en-u-nu-roman')), + ); + expect( + Locale.parse('en-u-attr').hashCode, + isNot(Locale.parse('en-u-nu-roman').hashCode), + ); - expect(Locale.fromSubtags(languageCode: 'zh', scriptCode: 'Hans', countryCode: 'CN').toString(), 'zh_Hans_CN'); - }); + expect( + Locale.parse('en-u-kb'), + Locale.parse('en-u-kb-true'), + reason: '-u-kb should parse to the same result as -u-kb-true.', + ); + expect( + Locale.parse('en-u-kb').hashCode, + Locale.parse('en-u-kb-true').hashCode, + reason: '-u-kb should parse to the same result as -u-kb-true.', + ); - test('Locale equality', () { - expect(Locale.fromSubtags(languageCode: 'en'), - isNot(Locale.fromSubtags(languageCode: 'en', scriptCode: 'Latn'))); - expect(Locale.fromSubtags(languageCode: 'en').hashCode, - isNot(Locale.fromSubtags(languageCode: 'en', scriptCode: 'Latn').hashCode)); + expect( + Locale.parse('en-t-hi'), + isNot(Locale.parse('en-t-hi-h0-hybrid')), + ); + expect( + Locale.parse('en-t-hi').hashCode, + isNot(Locale.parse('en-t-hi-h0-hybrid').hashCode), + ); + }); }); } From f03cf6e2987a504357611135c7ef1a8ac510b0b4 Mon Sep 17 00:00:00 2001 From: Hugo van der Merwe Date: Thu, 1 Nov 2018 20:26:50 +0100 Subject: [PATCH 18/20] Pass StringBuffer to _extensionsToString, return void. --- lib/ui/window.dart | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/lib/ui/window.dart b/lib/ui/window.dart index 3521ed6ce80e5..200ac44cb1b39 100644 --- a/lib/ui/window.dart +++ b/lib/ui/window.dart @@ -690,7 +690,7 @@ class Locale { out.write('-$v'); } if (_extensions != null && _extensions.isNotEmpty) { - out.write(_extensionsToString(_extensions)); + _extensionsToString(_extensions, out); } return out.toString(); } @@ -700,15 +700,14 @@ class Locale { // This covers everything after the unicode_language_id, and returns a string // starting with a hyphen. Returns '' if passed null or an empty extensions // map. - static String _extensionsToString(collection.LinkedHashMap extensions) { - if (extensions == null || extensions.isEmpty) { - return ''; - } + static void _extensionsToString(collection.LinkedHashMap extensions, StringBuffer result) { + if (extensions == null || extensions.isEmpty) + return; + String uAttributes; String tLang; final StringBuffer uOut = StringBuffer(); final StringBuffer tOut = StringBuffer(); - final StringBuffer result = StringBuffer(); final StringBuffer resultVWYZ = StringBuffer(); for (MapEntry entry in extensions.entries) { @@ -768,7 +767,6 @@ class Locale { result.write('-${extensions["x"]}'); } } - return result.toString(); } // Returns true if s is a string made of lower-case alphabetic characters From 4b90e4c07f4c5b9cbc319c123cd81507c3289ad6 Mon Sep 17 00:00:00 2001 From: Hugo van der Merwe Date: Fri, 2 Nov 2018 23:20:59 +0100 Subject: [PATCH 19/20] Add tryParse. Improve unit test formatting. --- lib/ui/window.dart | 9 +++++++ testing/dart/locale_test.dart | 51 +++++++++++++++++++++++++++-------- 2 files changed, 49 insertions(+), 11 deletions(-) diff --git a/lib/ui/window.dart b/lib/ui/window.dart index 200ac44cb1b39..76eed8d5f351d 100644 --- a/lib/ui/window.dart +++ b/lib/ui/window.dart @@ -327,6 +327,15 @@ class Locale { ); } + static Locale tryParse(String localeId) { + try { + final Locale l = parse(localeId); + return l; + } on Exception { + return null; + } + } + // * All subtags in localeSubtags must already be lowercase. // // * extensions must be a map with sorted iteration order. LinkedHashMap diff --git a/testing/dart/locale_test.dart b/testing/dart/locale_test.dart index 62ec72fc8bc38..83ed7a3ee649e 100644 --- a/testing/dart/locale_test.dart +++ b/testing/dart/locale_test.dart @@ -269,28 +269,33 @@ void main() { // effort" locale? test('Strict parsing examples.', () { expect( - () => Locale.parse('nl-u-x'), throws, + () => Locale.parse('nl-u-x'), + throwsException, reason: 'With no "u" attributes there should be no "u" singleton. ' 'Could be lenient: "nl-x".', ); expect( - () => Locale.parse('fr-t-x'), throws, + () => Locale.parse('fr-t-x'), + throwsException, reason: 'With no "t" attributes there should be no "t" singleton. ' 'Could be lenient: "fr-x".', ); expect( - () => Locale.parse('it-u-nu-romanlow-a-u-attr'), throws, + () => Locale.parse('it-u-nu-romanlow-a-u-attr'), + throwsException, reason: 'Duplicate "u" singletons could be merged if only one has ' 'attributes. Could be lenient: "it-a-u-attr-nu-romanlow".', ); expect( - () => Locale.parse('pl-t-cs-b-t-h0-hybrid'), throws, + () => Locale.parse('pl-t-cs-b-t-h0-hybrid'), + throwsException, reason: 'Duplicate "t" singletons could be merged if only one has ' 'tlang specified. Could be lenient: "pl-b-t-cs-h0-hybrid".', ); expect( - () => Locale.parse('ro-t-hu-HU-cu-ron'), throws, + () => Locale.parse('ro-t-hu-HU-cu-ron'), + throwsException, reason: 'U-extension keywords misplaced under the -t- singleton ' 'could be moved if unambiguous enough. ' 'Could be lenient: "ro-t-hu-hu-u-cu-ron".', @@ -305,13 +310,15 @@ void main() { ); expect( - () => Locale.parse('pt-BR-u-h0-hybrid-t-pt-PT'), throws, + () => Locale.parse('pt-BR-u-h0-hybrid-t-pt-PT'), + throwsException, reason: 'T-extension "tfields" misplaced under the U-extension. ' 'Could be lenient: "pt-BR-t-pt-pt-h0-hybrid".', ); expect( - () => Locale.parse('pl-t-h0'), throws, + () => Locale.parse('pl-t-h0'), + throwsException, reason: 'Locale tag pl-t-h0 is not spec compliant. How to best fix it ' 'is unclear: it is underspecified.', ); @@ -319,7 +326,8 @@ void main() { test('Locale.parse(): invalid identifiers.', () { expect( - () => Locale.parse('a'), throws, + () => Locale.parse('a'), + throwsException, reason: 'One character language subtags are invalid.', ); expect( @@ -335,19 +343,40 @@ void main() { 'language subtag can be skipped if a script is specified.', ); expect( - () => Locale.parse('abcdefghi'), throws, + () => Locale.parse('abcdefghi'), + throwsException, reason: 'Language subtags may not be more than 8 characters.', ); expect( - () => Locale.parse(r'e$'), throws, + () => Locale.parse(r'e$'), + throwsException, reason: 'Invalid character for language subtag, only a-z allowed.', ); expect( - () => Locale.parse('fr-RU-Hant'), throws, + () => Locale.parse('fr-RU-Hant'), + throwsException, reason: 'Swapping region and script is not allowed.', ); }); + test('Locale.tryParse().', () { + expect( + Locale.tryParse('a'), + null, + reason: 'One character language subtags are invalid.', + ); + expect( + Locale.tryParse('abcdefghi'), + null, + reason: 'Language subtags may not be more than 8 characters.', + ); + expect( + Locale.tryParse(r'e$'), + null, + reason: 'Invalid character for language subtag, only a-z allowed.', + ); + }); + test('Locale equality.', () { expect( Locale.parse('en'), From 1ec498a942a398d79fac3af587a25212ec5a5907 Mon Sep 17 00:00:00 2001 From: Hugo van der Merwe Date: Sat, 3 Nov 2018 01:18:12 +0100 Subject: [PATCH 20/20] Replace inefficient List.removeAt(0) calls by an incrementing index. --- lib/ui/window.dart | 111 ++++++++++++++++++++++++--------------------- 1 file changed, 60 insertions(+), 51 deletions(-) diff --git a/lib/ui/window.dart b/lib/ui/window.dart index 76eed8d5f351d..d7dada1670ae7 100644 --- a/lib/ui/window.dart +++ b/lib/ui/window.dart @@ -261,6 +261,8 @@ class Locale { return Locale._internal('und'); final List localeSubtags = localeId.split(_reSep); + int parseIdx = 0; + String language, script, region; final List variants = []; final Map extensions = {}; @@ -269,7 +271,8 @@ class Locale { if (_isAlphabetic(localeSubtags[0], 2, 8) && localeSubtags[0].length != 4) { // language subtag: r'^[a-zA-Z]{2,3}$|^[a-zA-Z]{5,8}$' - language = _replaceDeprecatedLanguageSubtag(localeSubtags.removeAt(0)); + language = _replaceDeprecatedLanguageSubtag(localeSubtags[0]); + parseIdx = 1; } else if (_isAlphabetic(localeSubtags[0], 4, 4)) { // First subtag is already a script subtag. // @@ -280,24 +283,26 @@ class Locale { } else { problems.add('"${localeSubtags[0]}" is an invalid language subtag'); } - if (localeSubtags.isNotEmpty - && _isAlphabetic(localeSubtags[0], 4, 4)) { + if (parseIdx < localeSubtags.length + && _isAlphabetic(localeSubtags[parseIdx], 4, 4)) { // script subtag: r'^[a-zA-Z]{4}$' - final String s = localeSubtags.removeAt(0); + final String s = localeSubtags[parseIdx++]; script = s.substring(0, 1).toUpperCase() + s.substring(1).toLowerCase(); } - if (localeSubtags.isNotEmpty - && (_isAlphabetic(localeSubtags[0], 2, 2) - || _isNumeric(localeSubtags[0], 3, 3))) { + if (parseIdx < localeSubtags.length + && (_isAlphabetic(localeSubtags[parseIdx], 2, 2) + || _isNumeric(localeSubtags[parseIdx], 3, 3))) { // region subtag: r'^[a-zA-Z]{2}$|^[0-9]{3}$'; - region = localeSubtags.removeAt(0).toUpperCase(); + region = localeSubtags[parseIdx++].toUpperCase(); } - while (localeSubtags.isNotEmpty && _isVariantSubtag(localeSubtags[0])) { - variants.add(localeSubtags.removeAt(0)); + while (parseIdx < localeSubtags.length + && _isVariantSubtag(localeSubtags[parseIdx])) { + variants.add(localeSubtags[parseIdx++]); } - // Now we should be into extension territory, localeSubtags[0] should be a singleton. - if (localeSubtags.isNotEmpty && localeSubtags[0].length > 1) { + // Now we should be into extension territory, localeSubtags[parseIdx] should + // be a singleton. + if (parseIdx < localeSubtags.length && localeSubtags[parseIdx].length > 1) { final List mismatched = []; if (variants.isEmpty) { if (region == null) { @@ -308,10 +313,10 @@ class Locale { } mismatched.add('variant'); } - problems.add('unrecognised subtag "${localeSubtags[0]}": is not a ' + problems.add('unrecognised subtag "${localeSubtags[parseIdx]}": is not a ' '${mismatched.join(", ")}'); } - _parseExtensions(localeSubtags, extensions, problems); + _parseExtensions(localeSubtags, parseIdx, extensions, problems); if (problems.isNotEmpty) { throw FormatException('Locale Identifier $localeId is invalid: ' @@ -341,18 +346,19 @@ class Locale { // * extensions must be a map with sorted iteration order. LinkedHashMap // preserves order for us. static void _parseExtensions(List localeSubtags, + int parseIdx, collection.LinkedHashMap extensions, List problems) { final Map ext = {}; - while (localeSubtags.isNotEmpty) { - final String singleton = localeSubtags.removeAt(0); + while (parseIdx < localeSubtags.length) { + final String singleton = localeSubtags[parseIdx++]; if (singleton == 'u') { bool empty = true; // unicode_locale_extensions: collect "(sep attribute)+" attributes. final List attributes = []; - while (localeSubtags.isNotEmpty - && _isAlphaNumeric(localeSubtags[0], 3, 8)) { - attributes.add(localeSubtags.removeAt(0)); + while (parseIdx < localeSubtags.length + && _isAlphaNumeric(localeSubtags[parseIdx], 3, 8)) { + attributes.add(localeSubtags[parseIdx++]); } if (attributes.isNotEmpty) { empty = false; @@ -363,14 +369,14 @@ class Locale { problems.add('duplicate singleton: "$singleton"'); } // unicode_locale_extensions: collect "(sep keyword)*". - while (localeSubtags.isNotEmpty - && _isUExtensionKey(localeSubtags[0])) { + while (parseIdx < localeSubtags.length + && _isUExtensionKey(localeSubtags[parseIdx])) { empty = false; - final String key = localeSubtags.removeAt(0); + final String key = localeSubtags[parseIdx++]; final List typeParts = []; - while (localeSubtags.isNotEmpty - && _isAlphaNumeric(localeSubtags[0], 3, 8)) { - typeParts.add(localeSubtags.removeAt(0)); + while (parseIdx < localeSubtags.length + && _isAlphaNumeric(localeSubtags[parseIdx], 3, 8)) { + typeParts.add(localeSubtags[parseIdx++]); } if (!ext.containsKey(key)) { if (typeParts.length == 1 && typeParts[0] == 'true') { @@ -389,23 +395,24 @@ class Locale { bool empty = true; // transformed_extensions: grab tlang if it exists. final List tlang = []; - if (localeSubtags.isNotEmpty - && _isAlphabetic(localeSubtags[0], 2, 8) - && localeSubtags[0].length != 4) { + if (parseIdx < localeSubtags.length + && _isAlphabetic(localeSubtags[parseIdx], 2, 8) + && localeSubtags[parseIdx].length != 4) { // language subtag empty = false; - tlang.add(localeSubtags.removeAt(0)); - if (localeSubtags.isNotEmpty - && _isAlphabetic(localeSubtags[0], 4, 4)) + tlang.add(localeSubtags[parseIdx++]); + if (parseIdx < localeSubtags.length + && _isAlphabetic(localeSubtags[parseIdx], 4, 4)) // script subtag - tlang.add(localeSubtags.removeAt(0)); - if (localeSubtags.isNotEmpty - && (_isAlphabetic(localeSubtags[0], 2, 2) - || _isNumeric(localeSubtags[0], 3, 3))) + tlang.add(localeSubtags[parseIdx++]); + if (parseIdx < localeSubtags.length + && (_isAlphabetic(localeSubtags[parseIdx], 2, 2) + || _isNumeric(localeSubtags[parseIdx], 3, 3))) // region subtag: r'^[a-zA-Z]{2}$|^[0-9]{3}$'; - tlang.add(localeSubtags.removeAt(0)); - while (localeSubtags.isNotEmpty && _isVariantSubtag(localeSubtags[0])) { - tlang.add(localeSubtags.removeAt(0)); + tlang.add(localeSubtags[parseIdx++]); + while (parseIdx < localeSubtags.length + && _isVariantSubtag(localeSubtags[parseIdx])) { + tlang.add(localeSubtags[parseIdx++]); } } if (!ext.containsKey(singleton)) { @@ -414,12 +421,13 @@ class Locale { problems.add('duplicate singleton: "$singleton"'); } // transformed_extensions: collect "(sep tfield)*". - while (localeSubtags.isNotEmpty && _isTExtensionKey(localeSubtags[0])) { - final String tkey = localeSubtags.removeAt(0); + while (parseIdx < localeSubtags.length + && _isTExtensionKey(localeSubtags[parseIdx])) { + final String tkey = localeSubtags[parseIdx++]; final List tvalueParts = []; - while (localeSubtags.isNotEmpty - && _isAlphaNumeric(localeSubtags[0], 3, 8)) { - tvalueParts.add(localeSubtags.removeAt(0)); + while (parseIdx < localeSubtags.length + && _isAlphaNumeric(localeSubtags[parseIdx], 3, 8)) { + tvalueParts.add(localeSubtags[parseIdx++]); } if (tvalueParts.isNotEmpty) { empty = false; @@ -436,21 +444,22 @@ class Locale { } else if (singleton == 'x') { // pu_extensions final List values = []; - while (localeSubtags.isNotEmpty - && _isAlphaNumeric(localeSubtags[0], 1, 8)) { - values.add(localeSubtags.removeAt(0)); + while (parseIdx < localeSubtags.length + && _isAlphaNumeric(localeSubtags[parseIdx], 1, 8)) { + values.add(localeSubtags[parseIdx++]); } ext[singleton] = values.join('-'); - if (localeSubtags.isNotEmpty) { - problems.add('invalid part of private use subtags: "${localeSubtags.join('-')}"'); + if (parseIdx < localeSubtags.length) { + problems.add('invalid part of private use subtags: "' + '${localeSubtags.getRange(parseIdx, localeSubtags.length).join('-')}"'); } break; } else if (_isAlphabetic(singleton, 1, 1)) { // other_extensions final List values = []; - while (localeSubtags.isNotEmpty - && _isAlphaNumeric(localeSubtags[0], 2, 8)) { - values.add(localeSubtags.removeAt(0)); + while (parseIdx < localeSubtags.length + && _isAlphaNumeric(localeSubtags[parseIdx], 2, 8)) { + values.add(localeSubtags[parseIdx++]); } if (!ext.containsKey(singleton)) { ext[singleton] = values.join('-');