mirror of
https://github.com/nodejs/node.git
synced 2025-08-15 13:48:44 +02:00
querystring: improve parse() performance
This commit improves parse() performance by ~20-200% with the various querystring-parse benchmarks. Some optimization strategies used in this commit include: * Combining multiple searches (for '&', '=', and '+') on the same string into a single loop * Avoiding string.split() * Minimizing creation of temporary strings * Avoiding string decoding if no encoded bytes were found and the default string decoder is being used PR-URL: https://github.com/nodejs/node/pull/5012 Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Roman Reiss <me@silverwind.io> Reviewed-By: Matteo Collina <matteo.collina@gmail.com>
This commit is contained in:
parent
90451a67ca
commit
a2a69a2b63
3 changed files with 191 additions and 62 deletions
|
@ -78,13 +78,14 @@ QueryString.unescapeBuffer = function(s, decodeSpaces) {
|
|||
};
|
||||
|
||||
|
||||
QueryString.unescape = function(s, decodeSpaces) {
|
||||
function qsUnescape(s, decodeSpaces) {
|
||||
try {
|
||||
return decodeURIComponent(s);
|
||||
} catch (e) {
|
||||
return QueryString.unescapeBuffer(s, decodeSpaces).toString();
|
||||
}
|
||||
};
|
||||
}
|
||||
QueryString.unescape = qsUnescape;
|
||||
|
||||
|
||||
var hexTable = new Array(256);
|
||||
|
@ -198,63 +199,183 @@ QueryString.stringify = QueryString.encode = function(obj, sep, eq, options) {
|
|||
return '';
|
||||
};
|
||||
|
||||
// Parse a key=val string.
|
||||
// Parse a key/val string.
|
||||
QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {
|
||||
sep = sep || '&';
|
||||
eq = eq || '=';
|
||||
const eqLen = eq.length;
|
||||
var obj = {};
|
||||
|
||||
const obj = {};
|
||||
|
||||
if (typeof qs !== 'string' || qs.length === 0) {
|
||||
return obj;
|
||||
}
|
||||
|
||||
if (typeof sep !== 'string')
|
||||
sep += '';
|
||||
|
||||
const eqLen = eq.length;
|
||||
const sepLen = sep.length;
|
||||
|
||||
var maxKeys = 1000;
|
||||
if (options && typeof options.maxKeys === 'number') {
|
||||
maxKeys = options.maxKeys;
|
||||
}
|
||||
|
||||
// maxKeys <= 0 means that we should not limit keys count
|
||||
if (maxKeys > 0 && isFinite(maxKeys)) {
|
||||
qs = qs.split(sep, maxKeys);
|
||||
} else {
|
||||
qs = qs.split(sep);
|
||||
}
|
||||
|
||||
var len = qs.length;
|
||||
var pairs = Infinity;
|
||||
if (maxKeys > 0)
|
||||
pairs = maxKeys;
|
||||
|
||||
var decode = QueryString.unescape;
|
||||
if (options && typeof options.decodeURIComponent === 'function') {
|
||||
decode = options.decodeURIComponent;
|
||||
}
|
||||
const customDecode = (decode !== qsUnescape);
|
||||
|
||||
var keys = [];
|
||||
for (var i = 0; i < len; ++i) {
|
||||
// replacePlus() is used instead of a regexp because it is ~15-30% faster
|
||||
// with v8 4.7
|
||||
const x = replacePlus(qs[i]);
|
||||
const idx = x.indexOf(eq);
|
||||
var k, v;
|
||||
const keys = [];
|
||||
var lastPos = 0;
|
||||
var sepIdx = 0;
|
||||
var eqIdx = 0;
|
||||
var key = '';
|
||||
var value = '';
|
||||
var keyEncoded = customDecode;
|
||||
var valEncoded = customDecode;
|
||||
var encodeCheck = 0;
|
||||
for (var i = 0; i < qs.length; ++i) {
|
||||
const code = qs.charCodeAt(i);
|
||||
|
||||
if (idx >= 0) {
|
||||
k = decodeStr(x.substring(0, idx), decode);
|
||||
v = decodeStr(x.substring(idx + eqLen), decode);
|
||||
// Try matching key/value pair separator (e.g. '&')
|
||||
if (code === sep.charCodeAt(sepIdx)) {
|
||||
if (++sepIdx === sepLen) {
|
||||
// Key/value pair separator match!
|
||||
const end = i - sepIdx + 1;
|
||||
if (eqIdx < eqLen) {
|
||||
// If we didn't find the key/value separator, treat the substring as
|
||||
// part of the key instead of the value
|
||||
if (lastPos < end)
|
||||
key += qs.slice(lastPos, end);
|
||||
} else if (lastPos < end)
|
||||
value += qs.slice(lastPos, end);
|
||||
if (keyEncoded)
|
||||
key = decodeStr(key, decode);
|
||||
if (valEncoded)
|
||||
value = decodeStr(value, decode);
|
||||
// Use a key array lookup instead of using hasOwnProperty(), which is
|
||||
// slower
|
||||
if (keys.indexOf(key) === -1) {
|
||||
obj[key] = value;
|
||||
keys[keys.length] = key;
|
||||
} else {
|
||||
const curValue = obj[key];
|
||||
// `instanceof Array` is used instead of Array.isArray() because it
|
||||
// is ~15-20% faster with v8 4.7 and is safe to use because we are
|
||||
// using it with values being created within this function
|
||||
if (curValue instanceof Array)
|
||||
curValue[curValue.length] = value;
|
||||
else
|
||||
obj[key] = [curValue, value];
|
||||
}
|
||||
if (--pairs === 0)
|
||||
break;
|
||||
keyEncoded = valEncoded = customDecode;
|
||||
encodeCheck = 0;
|
||||
key = value = '';
|
||||
lastPos = i + 1;
|
||||
sepIdx = eqIdx = 0;
|
||||
}
|
||||
continue;
|
||||
} else {
|
||||
k = decodeStr(x, decode);
|
||||
v = '';
|
||||
sepIdx = 0;
|
||||
if (!valEncoded) {
|
||||
// Try to match an (valid) encoded byte (once) to minimize unnecessary
|
||||
// calls to string decoding functions
|
||||
if (code === 37/*%*/) {
|
||||
encodeCheck = 1;
|
||||
} else if (encodeCheck > 0 &&
|
||||
((code >= 48/*0*/ && code <= 57/*9*/) ||
|
||||
(code >= 65/*A*/ && code <= 70/*Z*/) ||
|
||||
(code >= 97/*a*/ && code <= 102/*z*/))) {
|
||||
if (++encodeCheck === 3)
|
||||
valEncoded = true;
|
||||
} else {
|
||||
encodeCheck = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Use a key array lookup instead of using hasOwnProperty(), which is slower
|
||||
if (keys.indexOf(k) === -1) {
|
||||
obj[k] = v;
|
||||
keys.push(k);
|
||||
} else if (obj[k] instanceof Array) {
|
||||
// `instanceof Array` is used instead of Array.isArray() because it is
|
||||
// ~15-20% faster with v8 4.7 and is safe to use because we are using it
|
||||
// with values being created within this function
|
||||
obj[k].push(v);
|
||||
// Try matching key/value separator (e.g. '=') if we haven't already
|
||||
if (eqIdx < eqLen) {
|
||||
if (code === eq.charCodeAt(eqIdx)) {
|
||||
if (++eqIdx === eqLen) {
|
||||
// Key/value separator match!
|
||||
const end = i - eqIdx + 1;
|
||||
if (lastPos < end)
|
||||
key += qs.slice(lastPos, end);
|
||||
encodeCheck = 0;
|
||||
lastPos = i + 1;
|
||||
}
|
||||
continue;
|
||||
} else {
|
||||
eqIdx = 0;
|
||||
if (!keyEncoded) {
|
||||
// Try to match an (valid) encoded byte once to minimize unnecessary
|
||||
// calls to string decoding functions
|
||||
if (code === 37/*%*/) {
|
||||
encodeCheck = 1;
|
||||
} else if (encodeCheck > 0 &&
|
||||
((code >= 48/*0*/ && code <= 57/*9*/) ||
|
||||
(code >= 65/*A*/ && code <= 70/*Z*/) ||
|
||||
(code >= 97/*a*/ && code <= 102/*z*/))) {
|
||||
if (++encodeCheck === 3)
|
||||
keyEncoded = true;
|
||||
} else {
|
||||
encodeCheck = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (code === 43/*+*/) {
|
||||
if (eqIdx < eqLen) {
|
||||
if (i - lastPos > 0)
|
||||
key += qs.slice(lastPos, i);
|
||||
key += '%20';
|
||||
keyEncoded = true;
|
||||
} else {
|
||||
if (i - lastPos > 0)
|
||||
value += qs.slice(lastPos, i);
|
||||
value += '%20';
|
||||
valEncoded = true;
|
||||
}
|
||||
lastPos = i + 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Check if we have leftover key or value data
|
||||
if (pairs > 0 && (lastPos < qs.length || eqIdx > 0)) {
|
||||
if (lastPos < qs.length) {
|
||||
if (eqIdx < eqLen)
|
||||
key += qs.slice(lastPos);
|
||||
else if (sepIdx < sepLen)
|
||||
value += qs.slice(lastPos);
|
||||
}
|
||||
if (keyEncoded)
|
||||
key = decodeStr(key, decode);
|
||||
if (valEncoded)
|
||||
value = decodeStr(value, decode);
|
||||
// Use a key array lookup instead of using hasOwnProperty(), which is
|
||||
// slower
|
||||
if (keys.indexOf(key) === -1) {
|
||||
obj[key] = value;
|
||||
keys[keys.length] = key;
|
||||
} else {
|
||||
obj[k] = [obj[k], v];
|
||||
const curValue = obj[key];
|
||||
// `instanceof Array` is used instead of Array.isArray() because it
|
||||
// is ~15-20% faster with v8 4.7 and is safe to use because we are
|
||||
// using it with values being created within this function
|
||||
if (curValue instanceof Array)
|
||||
curValue[curValue.length] = value;
|
||||
else
|
||||
obj[key] = [curValue, value];
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -262,23 +383,6 @@ QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {
|
|||
};
|
||||
|
||||
|
||||
function replacePlus(str) {
|
||||
var ret = '';
|
||||
var start = 0;
|
||||
var i = -1;
|
||||
while ((i = str.indexOf('+', i + 1)) !== -1) {
|
||||
ret += str.slice(start, i);
|
||||
ret += '%20';
|
||||
start = i + 1;
|
||||
}
|
||||
if (start === 0)
|
||||
return str;
|
||||
if (start < str.length)
|
||||
ret += str.slice(start);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
// v8 does not optimize functions with try-catch blocks, so we isolate them here
|
||||
// to minimize the damage
|
||||
function decodeStr(s, decoder) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue