Skip to content

Commit 89b952d

Browse files
committed
ICU-22362 Fix the name order derivation code in PersonNameFormatter to match the CLDR spec.
1 parent 6ba5a1a commit 89b952d

4 files changed

Lines changed: 98 additions & 21 deletions

File tree

icu4j/main/classes/core/src/com/ibm/icu/impl/ICUResourceBundle.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1285,7 +1285,7 @@ private static String getDefaultScript(String language, String region) {
12851285
return result;
12861286
}
12871287

1288-
private static String getParentLocaleID(String name, String origName, OpenType openType) {
1288+
public static String getParentLocaleID(String name, String origName, OpenType openType) {
12891289
// early out if the locale ID has a variant code or ends with _
12901290
if (name.endsWith("_") || !ULocale.getVariant(name).isEmpty()) {
12911291
int lastUnderbarPos = name.lastIndexOf('_');

icu4j/main/classes/core/src/com/ibm/icu/impl/personname/PersonNameFormatterImpl.java

Lines changed: 60 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -98,11 +98,8 @@ public PersonNameFormatterImpl(Locale locale,
9898
/**
9999
* THIS IS A DUMMY CONSTRUCTOR JUST FOR THE USE OF THE UNIT TESTS TO CHECK SOME OF THE INTERNAL IMPLEMENTATION!
100100
*/
101-
public PersonNameFormatterImpl(Locale locale, String[] patterns) {
101+
public PersonNameFormatterImpl(Locale locale, String[] gnFirstPatterns, String[] snFirstPatterns, String[] gnFirstLocales, String[] snFirstLocales) {
102102
// first, set dummy values for the other fields
103-
snFirstPatterns = null;
104-
gnFirstLocales = null;
105-
snFirstLocales = null;
106103
length = PersonNameFormatter.Length.MEDIUM;
107104
usage = PersonNameFormatter.Usage.REFERRING;
108105
formality = PersonNameFormatter.Formality.FORMAL;
@@ -114,10 +111,22 @@ public PersonNameFormatterImpl(Locale locale, String[] patterns) {
114111
nativeSpaceReplacement = " ";
115112
formatterLocaleUsesSpaces = true;
116113

117-
// then, set values for the fields we actually care about
114+
// then, set values for the fields we actually care about (all but gnFirstPatterns are optional)
118115
this.locale = locale;
119-
gnFirstPatterns = PersonNamePattern.makePatterns(patterns, this);
120-
116+
this.gnFirstPatterns = PersonNamePattern.makePatterns(gnFirstPatterns, this);
117+
this.snFirstPatterns = (snFirstPatterns != null) ? PersonNamePattern.makePatterns(snFirstPatterns, this) : null;
118+
if (gnFirstLocales != null) {
119+
this.gnFirstLocales = new HashSet<>();
120+
Collections.addAll(this.gnFirstLocales, gnFirstLocales);
121+
} else {
122+
this.gnFirstLocales = null;
123+
}
124+
if (snFirstLocales != null) {
125+
this.snFirstLocales = new HashSet<>();
126+
Collections.addAll(this.snFirstLocales, snFirstLocales);
127+
} else {
128+
this.snFirstLocales = null;
129+
}
121130
}
122131

123132
@Override
@@ -193,6 +202,8 @@ public boolean shouldCapitalizeSurname() {
193202

194203
private final Set<String> LOCALES_THAT_DONT_USE_SPACES = new HashSet<>(Arrays.asList("ja", "zh", "yue", "km", "lo", "my"));
195204

205+
static final Set NON_DEFAULT_SCRIPTS = new HashSet<>(Arrays.asList("Hani", "Hira", "Kana"));
206+
196207
/**
197208
* Returns the value of the resource, as a string array.
198209
* @param resource An ICUResourceBundle of type STRING or ARRAY. If ARRAY, this function just returns it
@@ -223,23 +234,57 @@ private boolean nameIsGnFirst(PersonName name) {
223234
return false;
224235
}
225236

226-
String localeStr = getNameLocale(name).toString();
237+
// Otherwise, search the gnFirstLocales and snFirstLocales for the locale's name.
238+
// For our purposes, the "locale's name" is the locale the name itself gives us (if it
239+
// has one), or the locale we guess for the name (if it doesn't).
240+
Locale nameLocale = name.getNameLocale();
241+
if (nameLocale == null) {
242+
nameLocale = getNameLocale(name);
243+
}
244+
245+
// this is a hack to deal with certain script codes that are valid, but not the default, for their locales--
246+
// to make the parent-chain lookup work right, we need to replace any of those script codes (in the name's locale)
247+
// with the appropriate default script for whatever language and region we have
248+
ULocale nameULocale = ULocale.forLocale(nameLocale);
249+
if (NON_DEFAULT_SCRIPTS.contains(nameULocale.getScript())) {
250+
ULocale.Builder builder = new ULocale.Builder();
251+
builder.setLocale(nameULocale);
252+
builder.setScript(null);
253+
nameULocale = ULocale.addLikelySubtags(builder.build());
254+
}
255+
256+
// now search for the locale in the gnFirstLocales and snFirstLocales lists...
257+
String localeStr = nameULocale.getName();
258+
String origLocaleStr = localeStr;
259+
String languageCode = nameULocale.getLanguage();
260+
227261
do {
262+
// first check if the locale is in one of those lists
228263
if (gnFirstLocales.contains(localeStr)) {
229264
return true;
230265
} else if (snFirstLocales.contains(localeStr)) {
231266
return false;
232267
}
233268

234-
int lastUnderbarPos = localeStr.lastIndexOf("_");
235-
if (lastUnderbarPos >= 0) {
236-
localeStr = localeStr.substring(0, lastUnderbarPos);
237-
} else {
238-
localeStr = "root";
269+
// if not, try again with "und" in place of the language code (this lets us use "und_CN" to match
270+
// all locales with a region code of "CN" and makes sure the last thing we try is always "und", which
271+
// is required to be in gnFirstLocales or snFirstLocales)
272+
String undStr = localeStr.replaceAll("^" + languageCode, "und");
273+
if (gnFirstLocales.contains(undStr)) {
274+
return true;
275+
} else if (snFirstLocales.contains(undStr)) {
276+
return false;
239277
}
240-
} while (!localeStr.equals("root"));
241278

242-
// should never get here-- "root" should always be in one of the locales
279+
// if we haven't found the locale ID yet, look up its parent locale ID and try again-- if getParentLocaleID()
280+
// returns null (i.e., we have a locale ID, such as "zh_Hant", that inherits directly from "root"), try again
281+
// with just the locale ID's language code (this fixes it so that "zh_Hant" matches "zh", even though "zh" isn't,
282+
// strictly speaking, its parent locale)
283+
String parentLocaleStr = ICUResourceBundle.getParentLocaleID(localeStr, origLocaleStr, ICUResourceBundle.OpenType.LOCALE_DEFAULT_ROOT);
284+
localeStr = (parentLocaleStr != null) ? parentLocaleStr : languageCode;
285+
} while (localeStr != null);
286+
287+
// should never get here ("und" should always be in gnFirstLocales or snFirstLocales), but if we do...
243288
return true;
244289
}
245290

icu4j/main/classes/core/src/com/ibm/icu/text/PersonNameFormatter.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -319,8 +319,8 @@ private PersonNameFormatter(Locale locale, Length length, Usage usage, Formality
319319
* @deprecated This API is for unit testing only.
320320
*/
321321
@Deprecated
322-
public PersonNameFormatter(Locale locale, String[] patterns) {
323-
this.impl = new PersonNameFormatterImpl(locale, patterns);
322+
public PersonNameFormatter(Locale locale, String[] gnFirstPatterns, String[] snFirstPatterns, String[] gnFirstLocales, String[] snFirstLocales) {
323+
this.impl = new PersonNameFormatterImpl(locale, gnFirstPatterns, snFirstPatterns, gnFirstLocales, snFirstLocales);
324324
}
325325

326326
/**

icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/PersonNameFormatterTest.java

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -440,7 +440,7 @@ public void TestLiteralTextElision2() {
440440
// a more extensive text of the literal text elision logic
441441
PersonNameFormatter pnf = new PersonNameFormatter(Locale.US, new String[] {
442442
"1{title}1 2{given}2 3{given2}3 4{surname}4 5{surname2}5 6{generation}6"
443-
});
443+
}, null, null, null);
444444

445445
String[][] testCases = new String[][] {
446446
{ "locale=en_US,title=Dr.,given=Richard,given2=Theodore,surname=Gillam,surname2=Morgan,generation=III", "1Dr.1 2Richard2 3Theodore3 4Gillam4 5Morgan5 6III6" },
@@ -467,7 +467,7 @@ public void TestPatternSelection() {
467467
"A {title} {given} {given2} {surname} {surname2} {generation}",
468468
"B {given} {given2} {surname} {surname2}",
469469
"C {given} {surname}",
470-
});
470+
}, null, null, null);
471471

472472
String[][] testCases = new String[][] {
473473
// { "locale=en_US,title=Dr.,given=Richard,given2=Theodore,surname=Gillam,surname2=Morgan,generation=III", "A Dr. Richard Theodore Gillam Morgan III" },
@@ -502,7 +502,7 @@ public void TestCapitalization() {
502502
};
503503

504504
for (String[] testCase : testCases) {
505-
PersonNameFormatter pnf = new PersonNameFormatter(new Locale("hu", "HU"), new String[] { testCase[0] } );
505+
PersonNameFormatter pnf = new PersonNameFormatter(new Locale("hu", "HU"), new String[] { testCase[0] }, null, null, null );
506506
String expectedResult = testCase[1];
507507
String actualResult = pnf.formatToString(name);
508508

@@ -566,4 +566,36 @@ public void TestLocaleDerivation() {
566566
}),
567567
}, false);
568568
}
569+
570+
@Test
571+
public void TestNameOrderFromLocale() {
572+
PersonNameFormatter pnf = new PersonNameFormatter(Locale.US,
573+
new String[] { "{given} {surname}" }, // gnFirstPatterns
574+
new String[] { "{surname} {given}" }, // snFirstPatterns
575+
new String[] { "und", "zh_Hant" }, // gnFirstLocales
576+
new String[] { "zh", "und_CN", "und_SG" } // snFirstLocales
577+
);
578+
579+
String[][] testCases = new String[][] {
580+
{ "en", "Given Sur" }, // should match "und"
581+
{ "zh", "Sur Given" }, // should match "zh"
582+
{ "en_US", "Given Sur" }, // should match "und"
583+
{ "zh_CN", "Sur Given" }, // should match "und_CN"
584+
{ "zh_TW", "Given Sur" }, // should match "zh_Hant"
585+
{ "zh_Hans", "Sur Given" }, // should match "zh"
586+
{ "zh_Hant", "Given Sur" }, // should match "zh_Hant"
587+
{ "zh_Hant_CN", "Given Sur" }, // should match "zh_Hant", NOT "und_CN"
588+
{ "en_CN", "Sur Given" }, // should match "und_CN"
589+
{ "de_DE", "Given Sur" }, // should match "und"
590+
};
591+
592+
for (String[] testCase : testCases) {
593+
String localeID = testCase[0];
594+
String expectedResult = testCase[1];
595+
596+
SimplePersonName name = buildPersonName("given=Given,surname=Sur,locale=" + localeID);
597+
String actualResult = pnf.formatToString(name);
598+
assertEquals("Wrong result for " + localeID, expectedResult, actualResult);
599+
}
600+
}
569601
}

0 commit comments

Comments
 (0)