Skip to content

Commit e352a9b

Browse files
[CODEC-249] Fix Incorrect transform of CH digraph according Metaphone basic rules (#423)
* [CODEC-249] Fix Incorrect transform of CH digraph according Metaphone basic rules * [CODEC-249] Add. test for Metaphone CH character translation with max code length of 5 * Remove blank lines. --------- Co-authored-by: Gary Gregory <garydgregory@users.noreply.github.com>
1 parent 269df1e commit e352a9b

File tree

2 files changed

+25
-12
lines changed

2 files changed

+25
-12
lines changed

src/main/java/org/apache/commons/codec/language/Metaphone.java

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -246,23 +246,19 @@ public String metaphone(final String txt) {
246246
if (isPreviousChar(local, n, 'S') && !isLastChar(wdsz, n) && FRONTV.indexOf(local.charAt(n + 1)) >= 0) {
247247
break;
248248
}
249-
if (regionMatch(local, n, "CIA")) { // "CIA" -> X
249+
if (isPreviousChar(local, n, 'S') && isNextChar(local, n, 'H')) { // SCH->sk
250+
code.append('K');
251+
break;
252+
}
253+
if (regionMatch(local, n, "CIA") || isNextChar(local, n, 'H')) { // "CIA" -> X or CH -> X
250254
code.append('X');
251255
break;
252256
}
253257
if (!isLastChar(wdsz, n) && FRONTV.indexOf(local.charAt(n + 1)) >= 0) {
254258
code.append('S');
255259
break; // CI,CE,CY -> S
256260
}
257-
if (isPreviousChar(local, n, 'S') && isNextChar(local, n, 'H')) { // SCH->sk
258-
code.append('K');
259-
break;
260-
}
261-
if (!isNextChar(local, n, 'H') || n == 0 && wdsz >= 3 && isVowel(local, 2)) { // CH consonant -> K consonant
262-
code.append('K');
263-
} else {
264-
code.append('X'); // CHvowel -> X
265-
}
261+
code.append('K'); // default C -> K
266262
break;
267263
case 'D':
268264
if (!isLastChar(wdsz, n + 1) && isNextChar(local, n, 'G') && FRONTV.indexOf(local.charAt(n + 2)) >= 0) { // DGE DGI DGY -> J

src/test/java/org/apache/commons/codec/language/MetaphoneTest.java

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -272,11 +272,28 @@ void testTIOAndTIAToX() {
272272

273273
@Test
274274
void testTranslateOfSCHAndCH() {
275+
assertEquals("SNS", getStringEncoder().metaphone("SCIENCE"));
276+
assertEquals("SN", getStringEncoder().metaphone("SCENE"));
277+
assertEquals("S", getStringEncoder().metaphone("SCY"));
275278
assertEquals("SKTL", getStringEncoder().metaphone("SCHEDULE"));
276279
assertEquals("SKMT", getStringEncoder().metaphone("SCHEMATIC"));
277-
278-
assertEquals("KRKT", getStringEncoder().metaphone("CHARACTER"));
280+
assertEquals("TSKR", getStringEncoder().metaphone("DISCHARGE"));
281+
assertEquals("EX", getStringEncoder().metaphone("ECHO"));
279282
assertEquals("TX", getStringEncoder().metaphone("TEACH"));
283+
assertEquals("XR", getStringEncoder().metaphone("CHERI"));
284+
assertEquals("XP", getStringEncoder().metaphone("CHIP"));
285+
assertEquals("XRST", getStringEncoder().metaphone("CHRIST"));
286+
assertEquals("X", getStringEncoder().metaphone("CIAO"));
287+
assertEquals("ST", getStringEncoder().metaphone("CITY"));
288+
assertEquals("KT", getStringEncoder().metaphone("CAT"));
289+
290+
}
291+
292+
@Test
293+
void testTranslateOfCHCharacterWithMaxCodeLenFive() {
294+
final Metaphone metaphone = new Metaphone();
295+
metaphone.setMaxCodeLen(5);
296+
assertEquals("XRKTR", metaphone.metaphone("CHARACTER"));
280297
}
281298

282299
@Test

0 commit comments

Comments
 (0)