Skip to content

Commit 16c43b3

Browse files
papandreouljharb
authored andcommitted
Look for the utf8 sentinel in a separate pass
Means that it doesn't have to be the first parameter to affect the parsing of the entire query string. ljharb#268 (comment)
1 parent 4bfb4d1 commit 16c43b3

File tree

2 files changed

+40
-23
lines changed

2 files changed

+40
-23
lines changed

lib/parse.js

Lines changed: 35 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,15 @@ var defaults = {
88
allowDots: false,
99
allowPrototypes: false,
1010
arrayLimit: 20,
11+
charset: 'utf-8',
1112
decoder: utils.decode,
1213
delimiter: '&',
1314
depth: 5,
15+
interpretNumericEntities: false,
1416
parameterLimit: 1000,
1517
plainObjects: false,
16-
strictNullHandling: false
18+
strictNullHandling: false,
19+
utf8Sentinel: false
1720
};
1821

1922
var interpretNumericEntities = function (str) {
@@ -25,24 +28,41 @@ var interpretNumericEntities = function (str) {
2528
// This is what browsers will submit when the ✓ character occurs in an
2629
// application/x-www-form-urlencoded body and the encoding of the page containing
2730
// the form is iso-8859-1, or when the submitted form has an accept-charset
28-
// attribute of iso-8859-1. Presumably also with other charsets that does no contain
31+
// attribute of iso-8859-1. Presumably also with other charsets that do not contain
2932
// the ✓ character, such as us-ascii.
30-
var numericCheckmark = '✓';
33+
var isoSentinel = 'utf8=%26%2310003%3B'; // encodeURIComponent('✓')
3134

32-
// These are the raw utf-8 bytes of the checkmark as code points in a string.
33-
// It's what we end up with when the utf-8 sentinel parameter is interpreted
34-
// as iso-8859-1. When utf8Sentinel is enabled, we will use it to course-correct
35-
// and interpret the rest of the query string as utf-8.
36-
var misinterpretedCheckmark = '\xe2\x9c\x93';
35+
// These are the percent-encoded utf-8 octets representing a checkmark, indicating
36+
// that the request actually is utf-8 encoded.
37+
var utf8Sentinel = 'utf8=%E2%9C%93'; // encodeURIComponent('✓')
3738

3839
var parseValues = function parseQueryStringValues(str, options) {
3940
var obj = {};
4041
var cleanStr = options.ignoreQueryPrefix ? str.replace(/^\?/, '') : str;
4142
var limit = options.parameterLimit === Infinity ? undefined : options.parameterLimit;
4243
var parts = cleanStr.split(options.delimiter, limit);
4344
var charset = options.charset;
45+
var skipIndex = -1; // Keep track of where the utf8 sentinel was found
46+
var i;
47+
48+
if (options.utf8Sentinel) {
49+
for (i = 0; i < parts.length; ++i) {
50+
if (parts[i].indexOf('utf8=') === 0) {
51+
if (parts[i] === utf8Sentinel) {
52+
charset = 'utf-8';
53+
} else if (parts[i] === isoSentinel) {
54+
charset = 'iso-8859-1';
55+
}
56+
skipIndex = i;
57+
i = parts.length; // The eslint settings do not allow break;
58+
}
59+
}
60+
}
4461

45-
for (var i = 0; i < parts.length; ++i) {
62+
for (i = 0; i < parts.length; ++i) {
63+
if (i === skipIndex) {
64+
continue;
65+
}
4666
var part = parts[i];
4767

4868
var bracketEqualsPos = part.indexOf(']=');
@@ -57,21 +77,13 @@ var parseValues = function parseQueryStringValues(str, options) {
5777
val = options.decoder(part.slice(pos + 1), defaults.decoder, charset);
5878
}
5979

60-
if (key === 'utf8' && options.utf8Sentinel) {
61-
if (val === '✓' || val === misinterpretedCheckmark) {
62-
charset = 'utf-8';
63-
} else if (val === numericCheckmark) {
64-
charset = 'iso-8859-1';
65-
}
80+
if (options.interpretNumericEntities && charset === 'iso-8859-1') {
81+
val = interpretNumericEntities(val);
82+
}
83+
if (has.call(obj, key)) {
84+
obj[key] = [].concat(obj[key]).concat(val);
6685
} else {
67-
if (options.interpretNumericEntities && charset === 'iso-8859-1') {
68-
val = interpretNumericEntities(val);
69-
}
70-
if (has.call(obj, key)) {
71-
obj[key] = [].concat(obj[key]).concat(val);
72-
} else {
73-
obj[key] = val;
74-
}
86+
obj[key] = val;
7587
}
7688
}
7789

test/parse.js

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -597,6 +597,11 @@ test('parse()', function (t) {
597597
st.end();
598598
});
599599

600+
t.test('does not require the utf8 sentinel to be defined before the parameters whose decoding it affects', function (st) {
601+
st.deepEqual(qs.parse('a=' + urlEncodedOSlashInUtf8 + '&utf8=' + urlEncodedNumCheckmark, { utf8Sentinel: true, charset: 'utf-8' }), { a: 'ø' });
602+
st.end();
603+
});
604+
600605
t.test('should ignore an utf8 sentinel with an unknown value', function (st) {
601606
st.deepEqual(qs.parse('utf8=foo&' + urlEncodedOSlashInUtf8 + '=' + urlEncodedOSlashInUtf8, { utf8Sentinel: true, charset: 'utf-8' }), { ø: 'ø' });
602607
st.end();

0 commit comments

Comments
 (0)