expressjs / body-parser

Node.js body parsing middleware
MIT License
5.44k stars 727 forks source link

improve performance of strict #468

Closed stepancar closed 2 years ago

stepancar commented 2 years ago

In our project we send long string to our server (2mb). Playing with settings I noticed that, if we set strict false parsing is 5-10 ms faster. Digging dipper I've found that firstCharacter is slow. Looking at regexp it seems like it can be simplified. We are looking for first character which is not \x20\x09\x0a\x0d https://regex101.com/r/X2eWbf/1 Search will be stopped after first match because regexp doesn't use global modifier

How I tested it

function makeRandomString(length) {
    var result           = '';
    var characters       = '  {ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789';
    var charactersLength = characters.length;
    for ( var i = 0; i < length; i++ ) {
        result += characters.charAt(Math.floor(Math.random() * charactersLength));
    }
    return result;
}
var str = makeRandomString(2000000);
var OLD_FIRST_CHAR_REGEXP = /^[\x20\x09\x0a\x0d]*([^\x20\x09\x0a\x0d])/
var NEW_FIRST_CHAR_REGEXP = /[^\x20\x09\x0a\x0d]/

console.time()
console.log(OLD_FIRST_CHAR_REGEXP.exec(str)[0])
console.timeEnd();
console.time();
console.log(NEW_FIRST_CHAR_REGEXP.exec(str)[0])
console.timeEnd();

Time difference 100-1000 times for 2mb string.

If we want to achieve really good performance here we should iterate using javascript. Because probability that first non whitespace character is placed on a first position high it would be fast.

qodesmith commented 2 years ago

I ran the test code as is above and received this as the results:

k
default: 11.370849609375 ms
k
default: 0.0458984375 ms

However, if you swap the console.log lines for the new and old regexes so they look like this:

console.time()
console.log(NEW_FIRST_CHAR_REGEXP.exec(str)[0]) // Notice how the "new" one is first
console.timeEnd();
console.time();
console.log(OLD_FIRST_CHAR_REGEXP.exec(str)[0])
console.timeEnd();

...it still shows that the first console.log is much slower than the 2nd!

O
default: 14.861328125 ms
O
default: 0.02783203125 ms

I don't think the test code is showing what you want.

stepancar commented 2 years ago

@qodesmith , it is very interesting! Firstly I thought that browser caches something for regexp underhood.

But it is not a reason!

function makeRandomString(length) {
    var result           = '';
    var characters       = '  {ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789';
    var charactersLength = characters.length;
    for ( var i = 0; i < length; i++ ) {
        result += characters.charAt(Math.floor(Math.random() * charactersLength));
    }
    return result;
}
var str = makeRandomString(2000000);
var OLD_FIRST_CHAR_REGEXP = /^[\x20\x09\x0a\x0d]*([^\x20\x09\x0a\x0d])/
var NEW_FIRST_CHAR_REGEXP = /[^\x20\x09\x0a\x0d]/
console.log(str[0]) // FIRST access to value of long string is slow!

console.time()
console.log(OLD_FIRST_CHAR_REGEXP.exec(str)[0])
console.timeEnd();
console.time();
console.log(NEW_FIRST_CHAR_REGEXP.exec(str)[0])
console.timeEnd();

Look at this example, I added only one line here before running regexps.

console.log(str[0]) // FIRST access to value of long string is slow!
dougwilson commented 2 years ago

Hi @stepancar 👋 I see what you're saying with it being faster, but maybe I missed something since you closed the PR. Did you realize that the change here is not actually faster? I'm just checking because I'm confused why the PR was closed.