Open Uzlopak opened 5 months ago
How come we can implement startsWith faster than native? I don't quite understand. Seems a bit unintuitive.
The native one seems to not cache the value. So there is no performance gain.
We absolutely can do this, I think I showed this to @anonrig at some point a while ago and we used the trick in bluebird back then.
It's faster than native because it does less work and it's jitted to a specific string value.
Looks really cool! But let's first consider native alternatives ;)
true startsWith x 73,398,707 ops/sec ±0.56% (100 runs sampled)
true indexOf x 968,185,969 ops/sec ±0.17% (97 runs sampled)
true startsWith jit x 959,753,026 ops/sec ±1.18% (98 runs sampled)
true endsWith x 111,526,784 ops/sec ±0.20% (97 runs sampled)
true indexOf end x 969,052,956 ops/sec ±0.08% (96 runs sampled)
true endsWith jit x 178,013,979 ops/sec ±0.28% (98 runs sampled)
"use strict";
const assert = require("assert");
const Benchmark = require("benchmark");
const test = "Lorem Ipsum I dolor";
const startsWithJit = startsWithJitGen("Lorem");
const endsWithJit = endsWithJitGen("dolor");
const Lorem = "Lorem";
const dolor = "dolor";
new Benchmark.Suite()
.add("true startsWith", function () {
test.startsWith(Lorem);
})
.add("true indexOf", function () {
test.indexOf(Lorem) === 0;
})
.add("true startsWith jit", function () {
startsWithJit(test);
})
.add("true endsWith", function () {
test.endsWith(dolor);
})
.add("true indexOf end", function () {
test.indexOf(dolor) === test.length - dolor.length;
})
.add("true endsWith jit", function () {
endsWithJit(test);
})
.on("cycle", function (event) {
console.log(String(event.target));
})
.run();
function startsWithJitGen(sequence) {
const chain = [];
chain.push("(typeof value === 'string')");
chain.push(`(value.length >= ${sequence.length})`);
for (let i = 0, il = sequence.length; i < il; ++i) {
chain.push(`(value[${i}] === '${sequence[i]}')`);
}
const fnBody = "return " + chain.join(" && ");
return new Function("value", fnBody);
}
function endsWithJitGen(sequence) {
const chain = [];
chain.push("(typeof value === 'string')");
chain.push(`(value.length >= ${sequence.length})`);
for (let i = 0, il = sequence.length; i < il; ++i) {
if (i === 0) {
chain.push(`(value[len] === '${sequence[i]}')`);
continue;
}
chain.push(`(value[len + ${i}] === '${sequence[i]}')`);
}
const fnBody =
`const len = value.length - ${sequence.length}; return ` +
chain.join(" && ");
return new Function("value", fnBody);
}
This is not correct. indexOf searches the whole string. So imagine 1 mb of string and at the end is the sequence you are looking for. In that case startsWith is faster as it will only check the beginning. The JIT variant is basically faster than startsWith as it does not have to process the "needle" everytime.
Also Using indexOf for endsWith is not good, you should use the second parameter of indexOf to force it to use the end or use lastIndexOf.
No, I did not release it as a module.
Sorry I accidentally deleted the comment
No, I did not release it as a module.
You should in my opinion
This is not correct.
Yeah I was in a hurry, but as you pointed out they can be remedied and it would still be at least as performant
The main point was code generation using Function should be avoided because of how easy it is to make a bad mistake, that's why a proper module that does this would be better
How about these? Note that .slice
doesn't create a new string but makes a SlicedString:
true startsWith x 72,533,239 ops/sec ±1.12% (100 runs sampled)
true slice indexOf x 934,900,608 ops/sec ±1.61% (93 runs sampled)
true slice string comparison x 942,870,838 ops/sec ±0.61% (97 runs sampled)
true startsWith jit x 944,179,790 ops/sec ±1.18% (98 runs sampled)
true endsWith x 110,914,809 ops/sec ±0.28% (98 runs sampled)
true slice indexOf end x 952,351,504 ops/sec ±0.94% (101 runs sampled)
true slice string comparison end x 948,044,225 ops/sec ±1.03% (97 runs sampled)
true endsWith jit x 175,161,034 ops/sec ±0.40% (97 runs sampled)
"use strict";
const assert = require("assert");
const Benchmark = require("benchmark");
const test = "Lorem Ipsum I dolor";
const startsWithJit = startsWithJitGen("Lorem");
const endsWithJit = endsWithJitGen("dolor");
const Lorem = "Lorem";
const dolor = "dolor";
new Benchmark.Suite()
.add("true startsWith", function () {
test.startsWith(Lorem);
})
.add("true slice indexOf", function () {
test.slice(0, Lorem.length).indexOf(Lorem) === 0;
})
.add("true slice string comparison", function () {
test.slice(0, Lorem.length) === Lorem;
})
.add("true startsWith jit", function () {
startsWithJit(test);
})
.add("true endsWith", function () {
test.endsWith(dolor);
})
.add("true slice indexOf end", function () {
test.slice(-dolor.length).indexOf(dolor) === 0
})
.add("true slice string comparison end", function () {
test.slice(-dolor.length) === dolor
})
.add("true endsWith jit", function () {
endsWithJit(test);
})
.on("cycle", function (event) {
console.log(String(event.target));
})
.run();
function startsWithJitGen(sequence) {
const chain = [];
chain.push("(typeof value === 'string')");
chain.push(`(value.length >= ${sequence.length})`);
for (let i = 0, il = sequence.length; i < il; ++i) {
chain.push(`(value[${i}] === '${sequence[i]}')`);
}
const fnBody = "return " + chain.join(" && ");
return new Function("value", fnBody);
}
function endsWithJitGen(sequence) {
const chain = [];
chain.push("(typeof value === 'string')");
chain.push(`(value.length >= ${sequence.length})`);
for (let i = 0, il = sequence.length; i < il; ++i) {
if (i === 0) {
chain.push(`(value[len] === '${sequence[i]}')`);
continue;
}
chain.push(`(value[len + ${i}] === '${sequence[i]}')`);
}
const fnBody =
`const len = value.length - ${sequence.length}; return ` +
chain.join(" && ");
return new Function("value", fnBody);
}
These sort of benchmarks are kind of meaningless due to the variety of data (string length, string type (whether tree/rope or flat), string encoding internally etc).
I recommend instead of these microbenchmark fairy tales (+1 if you get the reference) you look at the IR in indicitum (https://v8.github.io/tools/head/system-analyzer/index.html)
Of course I get you point; I wasn't arguing what I presented is the best performance-wise, just that we don't need to use new Function
to get that level of performance
However, one thing all these benchmarks clearly demonstrate is how startsWith and endsWith are definitely slower than all the alternatives
Replacing them with one of the provided alternatives should be a no-brainer in node or any performance-centric project
Does startsWith
or endsWith
actually show up in the performance profile of any of our APIs that use them?
I don't know, does it matter if it's faster?
I'm not pushing for this change, I simply mentioned my concerns for using new Function
and provided an alternative that also seems to be 100 times faster than starts/endsWith
in microbenchmarks — is that much of a difference realistic? No! Could it improve the performance of hot paths? Maybe
Also, endsWithJIT
doesn't seem on par with what I submitted — I know if this was a lower level language, nothing beats what Uzlopak showed, but as we know, V8 cheats when JIT compiling
Anyway, I don't know either if the results are similar with primordials
Reference:
https://github.com/RafaelGSS/nodejs-bench-operations/blob/main/RESULTS-v20.md#startswith-comparison https://github.com/RafaelGSS/nodejs-bench-operations/blob/main/RESULTS-v20.md#endswith-comparison
I proposed this already in fastify/help long time ago, but maybe it has a use in nodejs core?
https://github.com/fastify/help/issues/711
true startsWith x 57,424,304 ops/sec ±0.61% (95 runs sampled) true startsWith jit x 1,359,500,378 ops/sec ±0.12% (99 runs sampled) true endsWith x 55,539,779 ops/sec ±0.73% (93 runs sampled) true endsWith jit x 136,694,982 ops/sec ±0.92% (95 runs sampled) false startsWith x 208,705,495 ops/sec ±0.52% (97 runs sampled) false startsWith jit x 1,360,481,699 ops/sec ±0.15% (97 runs sampled) false endsWith x 208,323,815 ops/sec ±0.83% (93 runs sampled) false endsWith jit x 135,154,350 ops/sec ±0.74% (87 runs sampled)