Skip to content

Commit 4e387f6

Browse files
committed
handle html entities when out of range
1 parent 088b47a commit 4e387f6

File tree

3 files changed

+39
-97
lines changed

3 files changed

+39
-97
lines changed

spec/entities_spec.js

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,28 @@ describe("XMLParser Entities", function() {
433433
expect(result).toEqual(expected);
434434
});
435435

436+
it("should skip HTML numeric or hex entities when htmlEntities:true but entity is out of range", function() {
437+
const xmlData = `<root attr="&#9999999;">&#xFFFFFF;</root>`;
438+
439+
const expected = {
440+
"root": {
441+
"#text": "&#xFFFFFF;",
442+
"attr": "&#9999999;"
443+
}
444+
};
445+
446+
const options = {
447+
attributeNamePrefix: "",
448+
ignoreAttributes: false,
449+
processEntities: true,
450+
htmlEntities: true,
451+
};
452+
const parser = new XMLParser(options);
453+
let result = parser.parse(xmlData);
454+
455+
expect(result).toEqual(expected);
456+
});
457+
436458
it("should throw error if an entity name contains special char", function() {
437459
const xmlData = `
438460
<?xml version="1.0" encoding="UTF-8"?>

spec/temp.js

Lines changed: 5 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -4,104 +4,14 @@ import { format } from "path";
44
import {XMLParser, XMLValidator, XMLBuilder} from "../src/fxp.js";
55

66
describe("unpaired and empty tags", function() {
7-
xit("bug test", function() {
7+
fit("bug test", function() {
88

9-
const xmlData = `<?xml version="1.0"?>
10-
<!DOCTYPE softwarelists [
11-
<!ELEMENT softwarelists (softwarelist*)>
12-
<!ELEMENT softwarelist (notes?, software+)>
13-
<!ATTLIST softwarelist name CDATA #REQUIRED>
14-
<!ATTLIST softwarelist description CDATA #IMPLIED>
15-
<!ELEMENT notes (#PCDATA)>
16-
<!ELEMENT software (description, year, publisher, notes?, info*, sharedfeat*, part*)>
17-
<!ATTLIST software name CDATA #REQUIRED>
18-
<!ATTLIST software cloneof CDATA #IMPLIED>
19-
<!ATTLIST software supported (yes|partial|no) "yes">
20-
<!ELEMENT description (#PCDATA)>
21-
<!ELEMENT year (#PCDATA)>
22-
<!ELEMENT publisher (#PCDATA)>
23-
<!ELEMENT notes (#PCDATA)>
24-
<!ELEMENT info EMPTY>
25-
<!ATTLIST info name CDATA #REQUIRED>
26-
<!ATTLIST info value CDATA #IMPLIED>
27-
<!ELEMENT sharedfeat EMPTY>
28-
<!ATTLIST sharedfeat name CDATA #REQUIRED>
29-
<!ATTLIST sharedfeat value CDATA #IMPLIED>
30-
<!ELEMENT part (feature*, dataarea*, diskarea*, dipswitch*)>
31-
<!ATTLIST part name CDATA #REQUIRED>
32-
<!ATTLIST part interface CDATA #REQUIRED>
33-
<!ELEMENT feature EMPTY>
34-
<!ATTLIST feature name CDATA #REQUIRED>
35-
<!ATTLIST feature value CDATA #IMPLIED>
36-
<!ELEMENT dataarea (rom*)>
37-
<!ATTLIST dataarea name CDATA #REQUIRED>
38-
<!ATTLIST dataarea size CDATA #REQUIRED>
39-
<!ATTLIST dataarea databits (8|16|32|64) "8">
40-
<!ATTLIST dataarea endian (big|little) "little">
41-
<!ELEMENT rom EMPTY>
42-
<!ATTLIST rom name CDATA #IMPLIED>
43-
<!ATTLIST rom size CDATA #IMPLIED>
44-
<!ATTLIST rom length CDATA #IMPLIED>
45-
<!ATTLIST rom crc CDATA #IMPLIED>
46-
<!ATTLIST rom sha1 CDATA #IMPLIED>
47-
<!ATTLIST rom offset CDATA #IMPLIED>
48-
<!ATTLIST rom value CDATA #IMPLIED>
49-
<!ATTLIST rom status (baddump|nodump|good) "good">
50-
<!ATTLIST rom loadflag (load16_byte|load16_word|load16_word_swap|load32_byte|load32_word|load32_word_swap|load32_dword|load64_word|load64_word_swap|reload|fill|continue|reload_plain) #IMPLIED>
51-
<!ELEMENT diskarea (disk*)>
52-
<!ATTLIST diskarea name CDATA #REQUIRED>
53-
<!ELEMENT disk EMPTY>
54-
<!ATTLIST disk name CDATA #REQUIRED>
55-
<!ATTLIST disk sha1 CDATA #IMPLIED>
56-
<!ATTLIST disk status (baddump|nodump|good) "good">
57-
<!ATTLIST disk writeable (yes|no) "no">
58-
<!ELEMENT dipswitch (dipvalue*)>
59-
<!ATTLIST dipswitch name CDATA #REQUIRED>
60-
<!ATTLIST dipswitch tag CDATA #REQUIRED>
61-
<!ATTLIST dipswitch mask CDATA #REQUIRED>
62-
<!ELEMENT dipvalue EMPTY>
63-
<!ATTLIST dipvalue name CDATA #REQUIRED>
64-
<!ATTLIST dipvalue value CDATA #REQUIRED>
65-
<!ATTLIST dipvalue default (yes|no) "no">
66-
]>
67-
68-
<softwarelists>
69-
<softwarelist name="snes" description="Nintendo SNES cartridges">
70-
<software name="aokidenp" cloneof="aokiden">
71-
<description>Aoki Densetsu Shoot! (Japan, prototype)</description>
72-
<year>1994</year>
73-
<publisher>KSS</publisher>
74-
<info name="alt_title" value="蒼き伝説シュート!"/>
75-
<part name="cart" interface="snes_cart">
76-
<feature name="battery" value="BATT CR2032" />
77-
<feature name="cart_model" value="no shell" />
78-
<feature name="lockout" value="" />
79-
<feature name="pcb" value="SHVC-4PV5B-01" />
80-
<feature name="slot" value="lorom" />
81-
<feature name="u1" value="U1 EPROM" />
82-
<feature name="u2" value="U2 EPROM" />
83-
<feature name="u3" value="U3 EPROM" />
84-
<feature name="u4" value="U4 EPROM" />
85-
<feature name="u5" value="U5 SRAM" />
86-
<feature name="u6" value="U6 PLD" />
87-
<feature name="u7" value="U7 74LS157" />
88-
<feature name="u8" value="U8 CIC" />
89-
<dataarea name="rom" size="1572864">
90-
<rom name="shoot 1 kss.u1" size="524288" crc="71306e06" sha1="253ec028d68a85209dc3e5846a2a2f5b582fed7b"/>
91-
<rom name="shoot 2 kss.u2" size="524288" crc="d07e1be3" sha1="7a58acb027ca15c1054e58f43156c2d99f62d16c"/>
92-
<rom name="shoot 3 kss.u3" size="524288" crc="380ed94f" sha1="8607ce31748ae73b9aa7aacda80c843622c61a79"/>
93-
</dataarea>
94-
<dataarea name="nvram" size="131072">
95-
</dataarea>
96-
</part>
97-
</software>
98-
</softwarelist>
99-
</softwarelists>
100-
101-
`;
9+
const xmlData = `<root attr="&#9999999;"/>`;
10210
const options = {
10311
ignoreAttributes: false,
10412
attributeNamePrefix: '',
13+
//processEntities: true,
14+
htmlEntities: true
10515
};
10616
const parser = new XMLParser(options);
10717
// console.log(JSON.stringify(parser.parse(xml)));
@@ -112,7 +22,7 @@ describe("unpaired and empty tags", function() {
11222
// expect(result).toEqual(expected);
11323

11424
});
115-
fit("bug test", function() {
25+
xit("bug test", function() {
11626

11727
const xmlData = `<?xml version="1.0" encoding="UTF-8"?>
11828
<!DOCTYPE dmodule [

src/xmlparser/OrderedObjParser.js

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,8 @@ export default class OrderedObjParser{
4141
"copyright" : { regex: /&(copy|#169);/g, val: "©" },
4242
"reg" : { regex: /&(reg|#174);/g, val: "®" },
4343
"inr" : { regex: /&(inr|#8377);/g, val: "₹" },
44-
"num_dec": { regex: /&#([0-9]{1,7});/g, val : (_, str) => String.fromCodePoint(Number.parseInt(str, 10)) },
45-
"num_hex": { regex: /&#x([0-9a-fA-F]{1,6});/g, val : (_, str) => String.fromCodePoint(Number.parseInt(str, 16)) },
44+
"num_dec": { regex: /&#([0-9]{1,7});/g, val : (_, str) => fromCodePoint(str, 10, "&#") },
45+
"num_hex": { regex: /&#x([0-9a-fA-F]{1,6});/g, val : (_, str) => fromCodePoint(str, 16, "&#x") },
4646
};
4747
this.addExternalEntities = addExternalEntities;
4848
this.parseXml = parseXml;
@@ -627,3 +627,13 @@ function parseValue(val, shouldParse, options) {
627627
}
628628
}
629629
}
630+
631+
function fromCodePoint(str, base, prefix){
632+
const codePoint = Number.parseInt(str, base);
633+
634+
if (codePoint >= 0 && codePoint <= 0x10FFFF) {
635+
return String.fromCodePoint(codePoint);
636+
} else {
637+
return prefix +str + ";";
638+
}
639+
}

0 commit comments

Comments
 (0)