Skip to content

Commit 688c9c8

Browse files
authored
[MNG-8241] Handle non-BMP characters when comparing versions (#2071)
* Handle non-BMP characters * Treat non-ASCII digits as strings
1 parent 9f824df commit 688c9c8

File tree

2 files changed

+54
-13
lines changed

2 files changed

+54
-13
lines changed

compat/maven-artifact/src/main/java/org/apache/maven/artifact/versioning/ComparableVersion.java

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
* <li>unlimited number of version components,</li>
4343
* <li>version components in the text can be digits or strings,</li>
4444
* <li>strings are checked for well-known qualifiers and the qualifier ordering is used for version ordering.
45-
* Well-known qualifiers (case insensitive) are:<ul>
45+
* Well-known qualifiers (case-insensitive) are:<ul>
4646
* <li><code>alpha</code> or <code>a</code></li>
4747
* <li><code>beta</code> or <code>b</code></li>
4848
* <li><code>milestone</code> or <code>m</code></li>
@@ -51,9 +51,9 @@
5151
* <li><code>(the empty string)</code> or <code>ga</code> or <code>final</code></li>
5252
* <li><code>sp</code></li>
5353
* </ul>
54-
* Unknown qualifiers are considered after known qualifiers, with lexical order (always case insensitive),
54+
* Unknown qualifiers are considered after known qualifiers, with lexical order (always case-insensitive),
5555
* </li>
56-
* <li>a hyphen usually precedes a qualifier, and is always less important than digits/number, for example
56+
* <li>A hyphen usually precedes a qualifier, and is always less important than digits/number. For example
5757
* {@code 1.0.RC2 < 1.0-RC3 < 1.0.1}; but prefer {@code 1.0.0-RC1} over {@code 1.0.0.RC1}, and more
5858
* generally: {@code 1.0.X2 < 1.0-X3 < 1.0.1} for any string {@code X}; but prefer {@code 1.0.0-X1}
5959
* over {@code 1.0.0.X1}.</li>
@@ -656,7 +656,20 @@ public final void parseVersion(String version) {
656656
int startIndex = 0;
657657

658658
for (int i = 0; i < version.length(); i++) {
659-
char c = version.charAt(i);
659+
char character = version.charAt(i);
660+
int c = character;
661+
if (Character.isHighSurrogate(character)) {
662+
// read the next character as a low surrogate and combine into a single int
663+
try {
664+
char low = version.charAt(i + 1);
665+
char[] both = {character, low};
666+
c = Character.codePointAt(both, 0);
667+
i++;
668+
} catch (IndexOutOfBoundsException ex) {
669+
// high surrogate without low surrogate. Not a lot we can do here except treat it as a regular
670+
// character
671+
}
672+
}
660673

661674
if (c == '.') {
662675
if (i == startIndex) {
@@ -687,7 +700,7 @@ public final void parseVersion(String version) {
687700
stack.push(list);
688701
}
689702
isCombination = false;
690-
} else if (Character.isDigit(c)) {
703+
} else if (c >= '0' && c <= '9') { // Check for ASCII digits only
691704
if (!isDigit && i > startIndex) {
692705
// X1
693706
isCombination = true;

compat/maven-artifact/src/test/java/org/apache/maven/artifact/versioning/ComparableVersionTest.java

Lines changed: 36 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727

2828
/**
2929
* Test ComparableVersion.
30-
*
3130
*/
3231
@SuppressWarnings("unchecked")
3332
class ComparableVersionTest {
@@ -222,6 +221,23 @@ void testLeadingZeroes() {
222221
checkVersionsOrder("0.2", "1.0.7");
223222
}
224223

224+
@Test
225+
void testDigitGreaterThanNonAscii() {
226+
ComparableVersion c1 = new ComparableVersion("1");
227+
ComparableVersion c2 = new ComparableVersion("é");
228+
assertTrue(c1.compareTo(c2) > 0, "expected " + "1" + " > " + "\uD835\uDFE4");
229+
assertTrue(c2.compareTo(c1) < 0, "expected " + "\uD835\uDFE4" + " < " + "1");
230+
}
231+
232+
@Test
233+
void testDigitGreaterThanNonBmpCharacters() {
234+
ComparableVersion c1 = new ComparableVersion("1");
235+
// MATHEMATICAL SANS-SERIF DIGIT TWO
236+
ComparableVersion c2 = new ComparableVersion("\uD835\uDFE4");
237+
assertTrue(c1.compareTo(c2) > 0, "expected " + "1" + " > " + "\uD835\uDFE4");
238+
assertTrue(c2.compareTo(c1) < 0, "expected " + "\uD835\uDFE4" + " < " + "1");
239+
}
240+
225241
@Test
226242
void testGetCanonical() {
227243
// MNG-7700
@@ -238,13 +254,25 @@ void testGetCanonical() {
238254

239255
@Test
240256
void testCompareDigitToLetter() {
241-
ComparableVersion c1 = new ComparableVersion("7");
242-
ComparableVersion c2 = new ComparableVersion("J");
243-
ComparableVersion c3 = new ComparableVersion("c");
244-
assertTrue(c1.compareTo(c2) > 0, "expected 7 > J");
245-
assertTrue(c2.compareTo(c1) < 0, "expected J < 1");
246-
assertTrue(c1.compareTo(c3) > 0, "expected 7 > c");
247-
assertTrue(c3.compareTo(c1) < 0, "expected c < 7");
257+
ComparableVersion seven = new ComparableVersion("7");
258+
ComparableVersion capitalJ = new ComparableVersion("J");
259+
ComparableVersion lowerCaseC = new ComparableVersion("c");
260+
// Digits are greater than letters
261+
assertTrue(seven.compareTo(capitalJ) > 0, "expected 7 > J");
262+
assertTrue(capitalJ.compareTo(seven) < 0, "expected J < 1");
263+
assertTrue(seven.compareTo(lowerCaseC) > 0, "expected 7 > c");
264+
assertTrue(lowerCaseC.compareTo(seven) < 0, "expected c < 7");
265+
}
266+
267+
@Test
268+
void testNonAsciiDigits() { // These should not be treated as digits.
269+
ComparableVersion asciiOne = new ComparableVersion("1");
270+
ComparableVersion arabicEight = new ComparableVersion("\u0668");
271+
ComparableVersion asciiNine = new ComparableVersion("9");
272+
assertTrue(asciiOne.compareTo(arabicEight) > 0, "expected " + "1" + " > " + "\u0668");
273+
assertTrue(arabicEight.compareTo(asciiOne) < 0, "expected " + "\u0668" + " < " + "1");
274+
assertTrue(asciiNine.compareTo(arabicEight) > 0, "expected " + "9" + " > " + "\u0668");
275+
assertTrue(arabicEight.compareTo(asciiNine) < 0, "expected " + "\u0668" + " < " + "9");
248276
}
249277

250278
@Test

0 commit comments

Comments
 (0)