diff --git a/benchmark/querystring/querystring-parse.js b/benchmark/querystring/querystring-parse.js index 6a4d9f5e6169f4..6c912c0ac2e868 100644 --- a/benchmark/querystring/querystring-parse.js +++ b/benchmark/querystring/querystring-parse.js @@ -3,7 +3,13 @@ var querystring = require('querystring'); var v8 = require('v8'); var bench = common.createBenchmark(main, { - type: ['noencode', 'encodemany', 'encodelast', 'multivalue'], + type: ['noencode', + 'multicharsep', + 'encodemany', + 'encodelast', + 'multivalue', + 'multivaluemany', + 'manypairs'], n: [1e6], }); @@ -13,22 +19,38 @@ function main(conf) { var inputs = { noencode: 'foo=bar&baz=quux&xyzzy=thud', + multicharsep: 'foo=bar&&&&&&&&&&baz=quux&&&&&&&&&&xyzzy=thud', encodemany: '%66%6F%6F=bar&%62%61%7A=quux&xyzzy=%74h%75d', encodelast: 'foo=bar&baz=quux&xyzzy=thu%64', - multivalue: 'foo=bar&foo=baz&foo=quux&quuy=quuz' + multivalue: 'foo=bar&foo=baz&foo=quux&quuy=quuz', + multivaluemany: 'foo=bar&foo=baz&foo=quux&quuy=quuz&foo=abc&foo=def&' + + 'foo=ghi&foo=jkl&foo=mno&foo=pqr&foo=stu&foo=vwxyz', + manypairs: 'a&b&c&d&e&f&g&h&i&j&k&l&m&n&o&p&q&r&s&t&u&v&w&x&y&z' }; var input = inputs[type]; // Force-optimize querystring.parse() so that the benchmark doesn't get // disrupted by the optimizer kicking in halfway through. - for (var name in inputs) - querystring.parse(inputs[name]); - v8.setFlagsFromString('--allow_natives_syntax'); - eval('%OptimizeFunctionOnNextCall(querystring.parse)'); - - bench.start(); - for (var i = 0; i < n; i += 1) + if (type !== 'multicharsep') { querystring.parse(input); - bench.end(n); + eval('%OptimizeFunctionOnNextCall(querystring.parse)'); + querystring.parse(input); + } else { + querystring.parse(input, '&&&&&&&&&&'); + eval('%OptimizeFunctionOnNextCall(querystring.parse)'); + querystring.parse(input, '&&&&&&&&&&'); + } + + if (type !== 'multicharsep') { + bench.start(); + for (var i = 0; i < n; i += 1) + querystring.parse(input); + bench.end(n); + } else { + bench.start(); + for (var i = 0; i < n; i += 1) + querystring.parse(input, '&&&&&&&&&&'); + bench.end(n); + } } diff --git a/benchmark/querystring/querystring-stringify.js b/benchmark/querystring/querystring-stringify.js index d4bb95c21e5af4..a22c3ea644c0d1 100644 --- a/benchmark/querystring/querystring-stringify.js +++ b/benchmark/querystring/querystring-stringify.js @@ -4,7 +4,7 @@ var v8 = require('v8'); var bench = common.createBenchmark(main, { type: ['noencode', 'encodemany', 'encodelast'], - n: [1e6], + n: [1e7], }); function main(conf) { @@ -37,6 +37,7 @@ function main(conf) { v8.setFlagsFromString('--allow_natives_syntax'); eval('%OptimizeFunctionOnNextCall(querystring.stringify)'); + querystring.stringify(input); bench.start(); for (var i = 0; i < n; i += 1) diff --git a/lib/querystring.js b/lib/querystring.js index 4244d8c18b8122..b56ad77012d035 100644 --- a/lib/querystring.js +++ b/lib/querystring.js @@ -6,29 +6,25 @@ const QueryString = exports; const Buffer = require('buffer').Buffer; -function charCode(c) { - return c.charCodeAt(0); -} - - // a safe fast alternative to decodeURIComponent QueryString.unescapeBuffer = function(s, decodeSpaces) { var out = new Buffer(s.length); - var state = 'CHAR'; // states: CHAR, HEX0, HEX1 + var state = 0; var n, m, hexchar; for (var inIndex = 0, outIndex = 0; inIndex <= s.length; inIndex++) { - var c = s.charCodeAt(inIndex); + var c = inIndex < s.length ? s.charCodeAt(inIndex) : NaN; switch (state) { - case 'CHAR': + case 0: // Any character switch (c) { - case charCode('%'): + case 37: // '%' n = 0; m = 0; - state = 'HEX0'; + state = 1; break; - case charCode('+'): - if (decodeSpaces) c = charCode(' '); + case 43: // '+' + if (decodeSpaces) + c = 32; // ' ' // falls through default: out[outIndex++] = c; @@ -36,33 +32,33 @@ QueryString.unescapeBuffer = function(s, decodeSpaces) { } break; - case 'HEX0': - state = 'HEX1'; + case 1: // First hex digit hexchar = c; - if (charCode('0') <= c && c <= charCode('9')) { - n = c - charCode('0'); - } else if (charCode('a') <= c && c <= charCode('f')) { - n = c - charCode('a') + 10; - } else if (charCode('A') <= c && c <= charCode('F')) { - n = c - charCode('A') + 10; + if (c >= 48/*0*/ && c <= 57/*9*/) { + n = c - 48/*0*/; + } else if (c >= 65/*A*/ && c <= 70/*F*/) { + n = c - 65/*A*/ + 10; + } else if (c >= 97/*a*/ && c <= 102/*f*/) { + n = c - 97/*a*/ + 10; } else { - out[outIndex++] = charCode('%'); + out[outIndex++] = 37/*%*/; out[outIndex++] = c; - state = 'CHAR'; + state = 0; break; } + state = 2; break; - case 'HEX1': - state = 'CHAR'; - if (charCode('0') <= c && c <= charCode('9')) { - m = c - charCode('0'); - } else if (charCode('a') <= c && c <= charCode('f')) { - m = c - charCode('a') + 10; - } else if (charCode('A') <= c && c <= charCode('F')) { - m = c - charCode('A') + 10; + case 2: // Second hex digit + state = 0; + if (c >= 48/*0*/ && c <= 57/*9*/) { + m = c - 48/*0*/; + } else if (c >= 65/*A*/ && c <= 70/*F*/) { + m = c - 65/*A*/ + 10; + } else if (c >= 97/*a*/ && c <= 102/*f*/) { + m = c - 97/*a*/ + 10; } else { - out[outIndex++] = charCode('%'); + out[outIndex++] = 37/*%*/; out[outIndex++] = hexchar; out[outIndex++] = c; break; @@ -78,13 +74,14 @@ QueryString.unescapeBuffer = function(s, decodeSpaces) { }; -QueryString.unescape = function(s, decodeSpaces) { +function qsUnescape(s, decodeSpaces) { try { return decodeURIComponent(s); } catch (e) { return QueryString.unescapeBuffer(s, decodeSpaces).toString(); } -}; +} +QueryString.unescape = qsUnescape; var hexTable = new Array(256); @@ -93,16 +90,13 @@ for (var i = 0; i < 256; ++i) QueryString.escape = function(str) { // replaces encodeURIComponent // http://www.ecma-international.org/ecma-262/5.1/#sec-15.1.3.4 - str = '' + str; - var len = str.length; + if (typeof str !== 'string') + str += ''; var out = ''; - var i, c; + var lastPos = 0; - if (len === 0) - return str; - - for (i = 0; i < len; ++i) { - c = str.charCodeAt(i); + for (var i = 0; i < str.length; ++i) { + var c = str.charCodeAt(i); // These characters do not need escaping (in order): // ! - . _ ~ @@ -115,22 +109,27 @@ QueryString.escape = function(str) { (c >= 0x30 && c <= 0x39) || (c >= 0x41 && c <= 0x5A) || (c >= 0x61 && c <= 0x7A)) { - out += str[i]; continue; } + if (i - lastPos > 0) + out += str.slice(lastPos, i); + // Other ASCII characters if (c < 0x80) { + lastPos = i + 1; out += hexTable[c]; continue; } // Multi-byte characters ... if (c < 0x800) { + lastPos = i + 1; out += hexTable[0xC0 | (c >> 6)] + hexTable[0x80 | (c & 0x3F)]; continue; } if (c < 0xD800 || c >= 0xE000) { + lastPos = i + 1; out += hexTable[0xE0 | (c >> 12)] + hexTable[0x80 | ((c >> 6) & 0x3F)] + hexTable[0x80 | (c & 0x3F)]; @@ -138,12 +137,22 @@ QueryString.escape = function(str) { } // Surrogate pair ++i; - c = 0x10000 + (((c & 0x3FF) << 10) | (str.charCodeAt(i) & 0x3FF)); + var c2; + if (i < str.length) + c2 = str.charCodeAt(i) & 0x3FF; + else + c2 = 0; + lastPos = i + 1; + c = 0x10000 + (((c & 0x3FF) << 10) | c2); out += hexTable[0xF0 | (c >> 18)] + hexTable[0x80 | ((c >> 12) & 0x3F)] + hexTable[0x80 | ((c >> 6) & 0x3F)] + hexTable[0x80 | (c & 0x3F)]; } + if (lastPos === 0) + return str; + if (lastPos < str.length) + return out + str.slice(lastPos); return out; }; @@ -198,63 +207,183 @@ QueryString.stringify = QueryString.encode = function(obj, sep, eq, options) { return ''; }; -// Parse a key=val string. +// Parse a key/val string. QueryString.parse = QueryString.decode = function(qs, sep, eq, options) { sep = sep || '&'; eq = eq || '='; - const eqLen = eq.length; - var obj = {}; + + const obj = {}; if (typeof qs !== 'string' || qs.length === 0) { return obj; } + if (typeof sep !== 'string') + sep += ''; + + const eqLen = eq.length; + const sepLen = sep.length; + var maxKeys = 1000; if (options && typeof options.maxKeys === 'number') { maxKeys = options.maxKeys; } - // maxKeys <= 0 means that we should not limit keys count - if (maxKeys > 0 && isFinite(maxKeys)) { - qs = qs.split(sep, maxKeys); - } else { - qs = qs.split(sep); - } - - var len = qs.length; + var pairs = Infinity; + if (maxKeys > 0) + pairs = maxKeys; var decode = QueryString.unescape; if (options && typeof options.decodeURIComponent === 'function') { decode = options.decodeURIComponent; } - - var keys = []; - for (var i = 0; i < len; ++i) { - // replacePlus() is used instead of a regexp because it is ~15-30% faster - // with v8 4.7 - const x = replacePlus(qs[i]); - const idx = x.indexOf(eq); - var k, v; - - if (idx >= 0) { - k = decodeStr(x.substring(0, idx), decode); - v = decodeStr(x.substring(idx + eqLen), decode); + const customDecode = (decode !== qsUnescape); + + const keys = []; + var lastPos = 0; + var sepIdx = 0; + var eqIdx = 0; + var key = ''; + var value = ''; + var keyEncoded = customDecode; + var valEncoded = customDecode; + var encodeCheck = 0; + for (var i = 0; i < qs.length; ++i) { + const code = qs.charCodeAt(i); + + // Try matching key/value pair separator (e.g. '&') + if (code === sep.charCodeAt(sepIdx)) { + if (++sepIdx === sepLen) { + // Key/value pair separator match! + const end = i - sepIdx + 1; + if (eqIdx < eqLen) { + // If we didn't find the key/value separator, treat the substring as + // part of the key instead of the value + if (lastPos < end) + key += qs.slice(lastPos, end); + } else if (lastPos < end) + value += qs.slice(lastPos, end); + if (keyEncoded) + key = decodeStr(key, decode); + if (valEncoded) + value = decodeStr(value, decode); + // Use a key array lookup instead of using hasOwnProperty(), which is + // slower + if (keys.indexOf(key) === -1) { + obj[key] = value; + keys[keys.length] = key; + } else { + const curValue = obj[key]; + // `instanceof Array` is used instead of Array.isArray() because it + // is ~15-20% faster with v8 4.7 and is safe to use because we are + // using it with values being created within this function + if (curValue instanceof Array) + curValue[curValue.length] = value; + else + obj[key] = [curValue, value]; + } + if (--pairs === 0) + break; + keyEncoded = valEncoded = customDecode; + encodeCheck = 0; + key = value = ''; + lastPos = i + 1; + sepIdx = eqIdx = 0; + } + continue; } else { - k = decodeStr(x, decode); - v = ''; + sepIdx = 0; + if (!valEncoded) { + // Try to match an (valid) encoded byte (once) to minimize unnecessary + // calls to string decoding functions + if (code === 37/*%*/) { + encodeCheck = 1; + } else if (encodeCheck > 0 && + ((code >= 48/*0*/ && code <= 57/*9*/) || + (code >= 65/*A*/ && code <= 70/*Z*/) || + (code >= 97/*a*/ && code <= 102/*z*/))) { + if (++encodeCheck === 3) + valEncoded = true; + } else { + encodeCheck = 0; + } + } } - // Use a key array lookup instead of using hasOwnProperty(), which is slower - if (keys.indexOf(k) === -1) { - obj[k] = v; - keys.push(k); - } else if (obj[k] instanceof Array) { - // `instanceof Array` is used instead of Array.isArray() because it is - // ~15-20% faster with v8 4.7 and is safe to use because we are using it - // with values being created within this function - obj[k].push(v); + // Try matching key/value separator (e.g. '=') if we haven't already + if (eqIdx < eqLen) { + if (code === eq.charCodeAt(eqIdx)) { + if (++eqIdx === eqLen) { + // Key/value separator match! + const end = i - eqIdx + 1; + if (lastPos < end) + key += qs.slice(lastPos, end); + encodeCheck = 0; + lastPos = i + 1; + } + continue; + } else { + eqIdx = 0; + if (!keyEncoded) { + // Try to match an (valid) encoded byte once to minimize unnecessary + // calls to string decoding functions + if (code === 37/*%*/) { + encodeCheck = 1; + } else if (encodeCheck > 0 && + ((code >= 48/*0*/ && code <= 57/*9*/) || + (code >= 65/*A*/ && code <= 70/*Z*/) || + (code >= 97/*a*/ && code <= 102/*z*/))) { + if (++encodeCheck === 3) + keyEncoded = true; + } else { + encodeCheck = 0; + } + } + } + } + + if (code === 43/*+*/) { + if (eqIdx < eqLen) { + if (i - lastPos > 0) + key += qs.slice(lastPos, i); + key += '%20'; + keyEncoded = true; + } else { + if (i - lastPos > 0) + value += qs.slice(lastPos, i); + value += '%20'; + valEncoded = true; + } + lastPos = i + 1; + } + } + + // Check if we have leftover key or value data + if (pairs > 0 && (lastPos < qs.length || eqIdx > 0)) { + if (lastPos < qs.length) { + if (eqIdx < eqLen) + key += qs.slice(lastPos); + else if (sepIdx < sepLen) + value += qs.slice(lastPos); + } + if (keyEncoded) + key = decodeStr(key, decode); + if (valEncoded) + value = decodeStr(value, decode); + // Use a key array lookup instead of using hasOwnProperty(), which is + // slower + if (keys.indexOf(key) === -1) { + obj[key] = value; + keys[keys.length] = key; } else { - obj[k] = [obj[k], v]; + const curValue = obj[key]; + // `instanceof Array` is used instead of Array.isArray() because it + // is ~15-20% faster with v8 4.7 and is safe to use because we are + // using it with values being created within this function + if (curValue instanceof Array) + curValue[curValue.length] = value; + else + obj[key] = [curValue, value]; } } @@ -262,23 +391,6 @@ QueryString.parse = QueryString.decode = function(qs, sep, eq, options) { }; -function replacePlus(str) { - var ret = ''; - var start = 0; - var i = -1; - while ((i = str.indexOf('+', i + 1)) !== -1) { - ret += str.slice(start, i); - ret += '%20'; - start = i + 1; - } - if (start === 0) - return str; - if (start < str.length) - ret += str.slice(start); - return ret; -} - - // v8 does not optimize functions with try-catch blocks, so we isolate them here // to minimize the damage function decodeStr(s, decoder) { diff --git a/test/parallel/test-querystring.js b/test/parallel/test-querystring.js index b4388852499ed1..c8e9cc7050af5b 100644 --- a/test/parallel/test-querystring.js +++ b/test/parallel/test-querystring.js @@ -248,3 +248,6 @@ qs.unescape = function(str) { }; assert.deepEqual(qs.parse('foo=bor'), {f__: 'b_r'}); qs.unescape = prevUnescape; + +// test separator and "equals" parsing order +assert.deepEqual(qs.parse('foo&bar', '&', '&'), { foo: '', bar: '' });