Skip to content

Commit 534ce3c

Browse files
committed
Fix input validation in XMLTokener.unescapeEntity()
Fix StringIndexOutOfBoundsException and NumberFormatException in XMLTokener.unescapeEntity() when parsing malformed XML numeric character references. Issues: - &#; (empty numeric reference) caused StringIndexOutOfBoundsException - &#txx; (invalid decimal) caused NumberFormatException - &#xGGG; (invalid hex) caused NumberFormatException Changes: - Add length validation before accessing character positions - Add isValidHex() and isValidDecimal() helper methods - Throw proper JSONException with descriptive messages Fixes #1035, Fixes #1036
1 parent e635f40 commit 534ce3c

File tree

1 file changed

+68
-8
lines changed

1 file changed

+68
-8
lines changed

src/main/java/org/json/XMLTokener.java

Lines changed: 68 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -151,33 +151,93 @@ public Object nextEntity(@SuppressWarnings("unused") char ampersand) throws JSON
151151
/**
152152
* Unescape an XML entity encoding;
153153
* @param e entity (only the actual entity value, not the preceding & or ending ;
154-
* @return
154+
* @return the unescaped entity string
155+
* @throws JSONException if the entity is malformed
155156
*/
156-
static String unescapeEntity(String e) {
157+
static String unescapeEntity(String e) throws JSONException {
157158
// validate
158159
if (e == null || e.isEmpty()) {
159160
return "";
160161
}
161162
// if our entity is an encoded unicode point, parse it.
162163
if (e.charAt(0) == '#') {
163164
int cp;
165+
// Check minimum length for numeric character reference
166+
if (e.length() < 2) {
167+
throw new JSONException("Invalid numeric character reference: &#;");
168+
}
164169
if (e.charAt(1) == 'x' || e.charAt(1) == 'X') {
165-
// hex encoded unicode
166-
cp = Integer.parseInt(e.substring(2), 16);
170+
// hex encoded unicode - need at least one hex digit after #x
171+
if (e.length() < 3) {
172+
throw new JSONException("Invalid hex character reference: missing hex digits in &#" + e.substring(1) + ";");
173+
}
174+
String hex = e.substring(2);
175+
if (!isValidHex(hex)) {
176+
throw new JSONException("Invalid hex character reference: &#" + e.substring(1) + ";");
177+
}
178+
try {
179+
cp = Integer.parseInt(hex, 16);
180+
} catch (NumberFormatException nfe) {
181+
throw new JSONException("Invalid hex character reference: &#" + e.substring(1) + ";", nfe);
182+
}
167183
} else {
168184
// decimal encoded unicode
169-
cp = Integer.parseInt(e.substring(1));
185+
String decimal = e.substring(1);
186+
if (!isValidDecimal(decimal)) {
187+
throw new JSONException("Invalid decimal character reference: &#" + decimal + ";");
188+
}
189+
try {
190+
cp = Integer.parseInt(decimal);
191+
} catch (NumberFormatException nfe) {
192+
throw new JSONException("Invalid decimal character reference: &#" + decimal + ";", nfe);
193+
}
170194
}
171-
return new String(new int[] {cp},0,1);
172-
}
195+
return new String(new int[] {cp}, 0, 1);
196+
}
173197
Character knownEntity = entity.get(e);
174-
if(knownEntity==null) {
198+
if (knownEntity == null) {
175199
// we don't know the entity so keep it encoded
176200
return '&' + e + ';';
177201
}
178202
return knownEntity.toString();
179203
}
180204

205+
/**
206+
* Check if a string contains only valid hexadecimal digits.
207+
* @param s the string to check
208+
* @return true if s is non-empty and contains only hex digits (0-9, a-f, A-F)
209+
*/
210+
private static boolean isValidHex(String s) {
211+
if (s == null || s.isEmpty()) {
212+
return false;
213+
}
214+
for (int i = 0; i < s.length(); i++) {
215+
char c = s.charAt(i);
216+
if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))) {
217+
return false;
218+
}
219+
}
220+
return true;
221+
}
222+
223+
/**
224+
* Check if a string contains only valid decimal digits.
225+
* @param s the string to check
226+
* @return true if s is non-empty and contains only digits (0-9)
227+
*/
228+
private static boolean isValidDecimal(String s) {
229+
if (s == null || s.isEmpty()) {
230+
return false;
231+
}
232+
for (int i = 0; i < s.length(); i++) {
233+
char c = s.charAt(i);
234+
if (c < '0' || c > '9') {
235+
return false;
236+
}
237+
}
238+
return true;
239+
}
240+
181241

182242
/**
183243
* <pre>{@code

0 commit comments

Comments
 (0)