@@ -8,12 +8,15 @@ var defaults = {
88 allowDots : false ,
99 allowPrototypes : false ,
1010 arrayLimit : 20 ,
11+ charset : 'utf-8' ,
1112 decoder : utils . decode ,
1213 delimiter : '&' ,
1314 depth : 5 ,
15+ interpretNumericEntities : false ,
1416 parameterLimit : 1000 ,
1517 plainObjects : false ,
16- strictNullHandling : false
18+ strictNullHandling : false ,
19+ utf8Sentinel : false
1720} ;
1821
1922var interpretNumericEntities = function ( str ) {
@@ -25,24 +28,41 @@ var interpretNumericEntities = function (str) {
2528// This is what browsers will submit when the ✓ character occurs in an
2629// application/x-www-form-urlencoded body and the encoding of the page containing
2730// the form is iso-8859-1, or when the submitted form has an accept-charset
28- // attribute of iso-8859-1. Presumably also with other charsets that does no contain
31+ // attribute of iso-8859-1. Presumably also with other charsets that do not contain
2932// the ✓ character, such as us-ascii.
30- var numericCheckmark = '✓' ;
33+ var isoSentinel = 'utf8=%26%2310003%3B' ; // encodeURIComponent(' ✓')
3134
32- // These are the raw utf-8 bytes of the checkmark as code points in a string.
33- // It's what we end up with when the utf-8 sentinel parameter is interpreted
34- // as iso-8859-1. When utf8Sentinel is enabled, we will use it to course-correct
35- // and interpret the rest of the query string as utf-8.
36- var misinterpretedCheckmark = '\xe2\x9c\x93' ;
35+ // These are the percent-encoded utf-8 octets representing a checkmark, indicating
36+ // that the request actually is utf-8 encoded.
37+ var utf8Sentinel = 'utf8=%E2%9C%93' ; // encodeURIComponent('✓')
3738
3839var parseValues = function parseQueryStringValues ( str , options ) {
3940 var obj = { } ;
4041 var cleanStr = options . ignoreQueryPrefix ? str . replace ( / ^ \? / , '' ) : str ;
4142 var limit = options . parameterLimit === Infinity ? undefined : options . parameterLimit ;
4243 var parts = cleanStr . split ( options . delimiter , limit ) ;
4344 var charset = options . charset ;
45+ var skipIndex = - 1 ; // Keep track of where the utf8 sentinel was found
46+ var i ;
47+
48+ if ( options . utf8Sentinel ) {
49+ for ( i = 0 ; i < parts . length ; ++ i ) {
50+ if ( parts [ i ] . indexOf ( 'utf8=' ) === 0 ) {
51+ if ( parts [ i ] === utf8Sentinel ) {
52+ charset = 'utf-8' ;
53+ } else if ( parts [ i ] === isoSentinel ) {
54+ charset = 'iso-8859-1' ;
55+ }
56+ skipIndex = i ;
57+ i = parts . length ; // The eslint settings do not allow break;
58+ }
59+ }
60+ }
4461
45- for ( var i = 0 ; i < parts . length ; ++ i ) {
62+ for ( i = 0 ; i < parts . length ; ++ i ) {
63+ if ( i === skipIndex ) {
64+ continue ;
65+ }
4666 var part = parts [ i ] ;
4767
4868 var bracketEqualsPos = part . indexOf ( ']=' ) ;
@@ -57,21 +77,13 @@ var parseValues = function parseQueryStringValues(str, options) {
5777 val = options . decoder ( part . slice ( pos + 1 ) , defaults . decoder , charset ) ;
5878 }
5979
60- if ( key === 'utf8' && options . utf8Sentinel ) {
61- if ( val === '✓' || val === misinterpretedCheckmark ) {
62- charset = 'utf-8' ;
63- } else if ( val === numericCheckmark ) {
64- charset = 'iso-8859-1' ;
65- }
80+ if ( options . interpretNumericEntities && charset === 'iso-8859-1' ) {
81+ val = interpretNumericEntities ( val ) ;
82+ }
83+ if ( has . call ( obj , key ) ) {
84+ obj [ key ] = [ ] . concat ( obj [ key ] ) . concat ( val ) ;
6685 } else {
67- if ( options . interpretNumericEntities && charset === 'iso-8859-1' ) {
68- val = interpretNumericEntities ( val ) ;
69- }
70- if ( has . call ( obj , key ) ) {
71- obj [ key ] = [ ] . concat ( obj [ key ] ) . concat ( val ) ;
72- } else {
73- obj [ key ] = val ;
74- }
86+ obj [ key ] = val ;
7587 }
7688 }
7789
0 commit comments