mirror of
https://github.com/nodejs/node.git
synced 2025-08-15 13:48:44 +02:00
benchmark: add calibrate-n script
This script should help identify the best N when creating/updating benchmarks Signed-off-by: RafaelGSS <rafael.nunu@hotmail.com> PR-URL: https://github.com/nodejs/node/pull/59186 Reviewed-By: Vinícius Lourenço Claro Cardoso <contact@viniciusl.com.br> Reviewed-By: James M Snell <jasnell@gmail.com>
This commit is contained in:
parent
af77e4bf2f
commit
698cbd0f8b
2 changed files with 333 additions and 0 deletions
292
benchmark/calibrate-n.js
Normal file
292
benchmark/calibrate-n.js
Normal file
|
@ -0,0 +1,292 @@
|
|||
'use strict';
|
||||
|
||||
const path = require('node:path');
|
||||
const { fork } = require('node:child_process');
|
||||
const fs = require('node:fs');
|
||||
const { styleText } = require('node:util');
|
||||
|
||||
const DEFAULT_RUNS = 30; // Number of runs for each n value
|
||||
const CV_THRESHOLD = 0.05; // 5% coefficient of variation threshold
|
||||
const MAX_N_INCREASE = 6; // Maximum number of times to increase n (10**6)
|
||||
const INCREASE_FACTOR = 10; // Factor by which to increase n
|
||||
|
||||
const args = process.argv.slice(2);
|
||||
if (args.length === 0) {
|
||||
console.log(`
|
||||
Usage: node calibrate-n.js [options] <benchmark_path>
|
||||
|
||||
Options:
|
||||
--runs=N Number of runs for each n value (default: ${DEFAULT_RUNS})
|
||||
--cv-threshold=N Target coefficient of variation threshold (default: ${CV_THRESHOLD})
|
||||
--max-increases=N Maximum number of n increases to try (default: ${MAX_N_INCREASE})
|
||||
--start-n=N Initial n value to start with (default: autodetect)
|
||||
--increase=N Factor by which to increase n (default: ${INCREASE_FACTOR})
|
||||
|
||||
Example:
|
||||
node calibrate-n.js buffers/buffer-compare.js
|
||||
node calibrate-n.js --runs=10 --cv-threshold=0.02 buffers/buffer-compare.js
|
||||
`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Extract options
|
||||
let benchmarkPath;
|
||||
let runs = DEFAULT_RUNS;
|
||||
let cvThreshold = CV_THRESHOLD;
|
||||
let maxIncreases = MAX_N_INCREASE;
|
||||
let startN = 10;
|
||||
let increaseFactor = INCREASE_FACTOR;
|
||||
|
||||
for (const arg of args) {
|
||||
if (arg.startsWith('--runs=')) {
|
||||
runs = parseInt(arg.substring(7), 10);
|
||||
} else if (arg.startsWith('--cv-threshold=')) {
|
||||
cvThreshold = parseFloat(arg.substring(14));
|
||||
} else if (arg.startsWith('--max-increases=')) {
|
||||
maxIncreases = parseInt(arg.substring(15), 10);
|
||||
if (isNaN(maxIncreases)) {
|
||||
console.error(`Error: Invalid value for --max-increases. Using default: ${MAX_N_INCREASE}`);
|
||||
maxIncreases = MAX_N_INCREASE;
|
||||
}
|
||||
} else if (arg.startsWith('--start-n=')) {
|
||||
startN = parseInt(arg.substring(10), 10);
|
||||
if (isNaN(startN)) {
|
||||
console.error(`Error: Invalid value for --start-n. Using default: 10`);
|
||||
startN = 10;
|
||||
}
|
||||
} else if (arg.startsWith('--increase=')) {
|
||||
increaseFactor = parseInt(arg.substring(11), 10);
|
||||
if (isNaN(increaseFactor)) {
|
||||
console.error(`Error: Invalid value for --increase. Using default: ${INCREASE_FACTOR}`);
|
||||
increaseFactor = INCREASE_FACTOR;
|
||||
}
|
||||
} else {
|
||||
benchmarkPath = arg;
|
||||
}
|
||||
}
|
||||
|
||||
if (!benchmarkPath) {
|
||||
console.error('Error: No benchmark path specified');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const fullBenchmarkPath = path.resolve(benchmarkPath);
|
||||
if (!fs.existsSync(fullBenchmarkPath)) {
|
||||
console.error(`Error: Benchmark file not found: ${fullBenchmarkPath}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
function calculateStats(values) {
|
||||
const mean = values.reduce((sum, val) => sum + val, 0) / values.length;
|
||||
|
||||
const squaredDiffs = values.map((val) => {
|
||||
const diff = val - mean;
|
||||
const squared = diff ** 2;
|
||||
return squared;
|
||||
});
|
||||
|
||||
const variance = squaredDiffs.reduce((sum, val) => sum + val, 0) / values.length;
|
||||
const stdDev = Math.sqrt(variance);
|
||||
const cv = stdDev / mean;
|
||||
|
||||
return { mean, stdDev, cv, variance };
|
||||
}
|
||||
|
||||
function runBenchmark(n) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const child = fork(
|
||||
fullBenchmarkPath,
|
||||
[`n=${n}`],
|
||||
{ stdio: ['inherit', 'pipe', 'inherit', 'ipc'] },
|
||||
);
|
||||
|
||||
const results = [];
|
||||
child.on('message', (data) => {
|
||||
if (data.type === 'report' && data.rate && data.conf) {
|
||||
results.push({
|
||||
rate: data.rate,
|
||||
conf: data.conf,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
child.on('close', (code) => {
|
||||
if (code !== 0) {
|
||||
reject(new Error(`Benchmark exited with code ${code}`));
|
||||
} else {
|
||||
resolve(results);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
async function main(n = startN) {
|
||||
let increaseCount = 0;
|
||||
let bestN = n;
|
||||
let bestCV = Infinity;
|
||||
let bestGroupStats = null;
|
||||
|
||||
console.log(`
|
||||
--------------------------------------------------------
|
||||
Benchmark: ${benchmarkPath}
|
||||
--------------------------------------------------------
|
||||
What we are trying to find: The optimal number of iterations (n)
|
||||
that produces consistent benchmark results without wasting time.
|
||||
|
||||
How it works:
|
||||
1. Run the benchmark multiple times with a specific n value
|
||||
2. Group results by configuration
|
||||
3. If overall CV is above 5% or any configuration has CV above 10%, increase n and try again
|
||||
|
||||
Configuration:
|
||||
- Starting n: ${n.toLocaleString()} iterations
|
||||
- Runs per n value: ${runs}
|
||||
- Target CV threshold: ${cvThreshold * 100}% (lower CV = more stable results)
|
||||
- Max increases: ${maxIncreases}
|
||||
- Increase factor: ${increaseFactor}x`);
|
||||
|
||||
while (increaseCount < maxIncreases) {
|
||||
console.log(`\nTesting with n=${n}:`);
|
||||
|
||||
const resultsData = [];
|
||||
for (let i = 0; i < runs; i++) {
|
||||
const results = await runBenchmark(n);
|
||||
// Each run might return multiple results (one per configuration)
|
||||
if (Array.isArray(results) && results.length > 0) {
|
||||
resultsData.push(...results);
|
||||
} else if (results) {
|
||||
resultsData.push(results);
|
||||
}
|
||||
process.stdout.write('.');
|
||||
}
|
||||
process.stdout.write('\n');
|
||||
|
||||
const groupedResults = {};
|
||||
resultsData.forEach((result) => {
|
||||
if (!result || !result.conf) return;
|
||||
|
||||
const confKey = JSON.stringify(result.conf);
|
||||
groupedResults[confKey] ||= {
|
||||
conf: result.conf,
|
||||
rates: [],
|
||||
};
|
||||
|
||||
groupedResults[confKey].rates.push(result.rate);
|
||||
});
|
||||
|
||||
const groupStats = [];
|
||||
for (const [confKey, group] of Object.entries(groupedResults)) {
|
||||
console.log(`\nConfiguration: ${JSON.stringify(group.conf)}`);
|
||||
|
||||
const stats = calculateStats(group.rates);
|
||||
console.log(` CV: ${(stats.cv * 100).toFixed(2)}% (lower values mean more stable results)`);
|
||||
|
||||
const isStable = stats.cv <= cvThreshold;
|
||||
console.log(` Stability: ${isStable ?
|
||||
styleText(['bold', 'green'], '✓ Stable') :
|
||||
styleText(['bold', 'red'], '✗ Unstable')}`);
|
||||
|
||||
groupStats.push({
|
||||
confKey,
|
||||
stats,
|
||||
isStable,
|
||||
});
|
||||
}
|
||||
|
||||
if (groupStats.length > 0) {
|
||||
// Check if any configuration has CV > 10% (too unstable)
|
||||
const tooUnstableConfigs = groupStats.filter((g) => g.stats.cv > 0.10);
|
||||
|
||||
const avgCV = groupStats.reduce((sum, g) => sum + g.stats.cv, 0) / groupStats.length;
|
||||
console.log(`\nOverall average CV: ${(avgCV * 100).toFixed(2)}%`);
|
||||
|
||||
const isOverallStable = avgCV < CV_THRESHOLD;
|
||||
const hasVeryUnstableConfigs = tooUnstableConfigs.length > 0;
|
||||
|
||||
// Check if overall CV is below CV_THRESHOLD and no configuration has CV > 10%
|
||||
if (isOverallStable && !hasVeryUnstableConfigs) {
|
||||
console.log(styleText(['bold', 'green'], ` ✓ Overall CV is below 5% and no configuration has CV above 10%`));
|
||||
} else {
|
||||
if (!isOverallStable) {
|
||||
console.log(styleText(['bold', 'red'], ` ✗ Overall CV (${(avgCV * 100).toFixed(2)}%) is above 5%`));
|
||||
}
|
||||
if (hasVeryUnstableConfigs) {
|
||||
console.log(styleText(['bold', 'red'], ` ✗ ${tooUnstableConfigs.length} configuration(s) have CV above 10%`));
|
||||
}
|
||||
}
|
||||
|
||||
if (avgCV < bestCV || !bestGroupStats) {
|
||||
bestN = n;
|
||||
bestCV = avgCV;
|
||||
|
||||
bestGroupStats = [];
|
||||
for (const group of Object.values(groupedResults)) {
|
||||
if (group.rates.length >= 3) {
|
||||
const stats = calculateStats(group.rates);
|
||||
bestGroupStats.push({
|
||||
conf: group.conf,
|
||||
stats: stats,
|
||||
isStable: stats.cv <= 0.10,
|
||||
});
|
||||
}
|
||||
}
|
||||
console.log(` → New best n: ${n} with average CV: ${(avgCV * 100).toFixed(2)}%`);
|
||||
} else {
|
||||
console.log(` → Current best n remains: ${bestN} with average CV: ${(bestCV * 100).toFixed(2)}%`);
|
||||
}
|
||||
}
|
||||
|
||||
// Check if we've reached acceptable stability based on new criteria
|
||||
// 1. Overall CV should be below CV_THRESHOLD
|
||||
// 2. No configuration should have a CV greater than 10%
|
||||
const avgCV = groupStats.length > 0 ?
|
||||
groupStats.reduce((sum, g) => sum + g.stats.cv, 0) / groupStats.length : Infinity;
|
||||
const hasUnstableConfig = groupStats.some((g) => g.stats.cv > 0.10);
|
||||
const isOverallStable = avgCV < CV_THRESHOLD;
|
||||
|
||||
if (isOverallStable && !hasUnstableConfig) {
|
||||
console.log(`\n✓ Found optimal n=${n} (Overall CV=${(avgCV * 100).toFixed(2)}% < 5% and no configuration has CV > 10%)`);
|
||||
console.log('\nFinal CV for each configuration:');
|
||||
groupStats.forEach((g) => {
|
||||
console.log(` ${JSON.stringify(groupedResults[g.confKey].conf)}: ${(g.stats.cv * 100).toFixed(2)}%`);
|
||||
});
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
increaseCount++;
|
||||
n *= increaseFactor;
|
||||
}
|
||||
|
||||
if (increaseCount >= maxIncreases) {
|
||||
const finalAvgCV = bestGroupStats && bestGroupStats.length > 0 ?
|
||||
bestGroupStats.reduce((sum, g) => sum + g.stats.cv, 0) / bestGroupStats.length : Infinity;
|
||||
|
||||
console.log(`Maximum number of increases (${maxIncreases}) reached without achieving target stability`);
|
||||
console.log(`Best n found: ${bestN} with average CV=${(finalAvgCV * 100).toFixed(2)}%`);
|
||||
console.log(`\nCV by configuration at best n:`);
|
||||
|
||||
if (bestGroupStats) {
|
||||
bestGroupStats.forEach((g) => {
|
||||
if (g.conf) {
|
||||
console.log(` ${JSON.stringify(g.conf)}: ${(g.stats.cv * 100).toFixed(2)}%`);
|
||||
if (g.stats.cv > cvThreshold) {
|
||||
console.log(` ⚠️ This configuration is above the target threshold of ${cvThreshold * 100}%`);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`
|
||||
Recommendation: You might want to try increasing --max-increases to
|
||||
continue testing with larger n values, or adjust --cv-threshold to
|
||||
accept the current best result, or investigate if specific configurations
|
||||
are contributing to instability.`);
|
||||
return bestN;
|
||||
}
|
||||
|
||||
main().catch((err) => {
|
||||
console.error('Error:', err);
|
||||
process.exit(1);
|
||||
});
|
|
@ -9,6 +9,7 @@
|
|||
* [Benchmark analysis requirements](#benchmark-analysis-requirements)
|
||||
* [Running benchmarks](#running-benchmarks)
|
||||
* [Running individual benchmarks](#running-individual-benchmarks)
|
||||
* [Calibrating the number of iterations with calibrate-n.js](#calibrating-the-number-of-iterations-with-calibrate-njs)
|
||||
* [Running all benchmarks](#running-all-benchmarks)
|
||||
* [Specifying CPU Cores for Benchmarks with run.js](#specifying-cpu-cores-for-benchmarks-with-runjs)
|
||||
* [Filtering benchmarks](#filtering-benchmarks)
|
||||
|
@ -142,6 +143,46 @@ buffers/buffer-tostring.js n=10000000 len=1024 arg=true: 3498295.68561504
|
|||
buffers/buffer-tostring.js n=10000000 len=1024 arg=false: 3783071.1678948295
|
||||
```
|
||||
|
||||
### Calibrating the number of iterations with calibrate-n.js
|
||||
|
||||
Before running benchmarks, it's often useful to determine the optimal number of iterations (`n`)
|
||||
that provides statistically stable results. The `calibrate-n.js` tool helps find this value by
|
||||
running a benchmark multiple times with increasing `n` values until the coefficient of variation (CV)
|
||||
falls below a target threshold.
|
||||
|
||||
```console
|
||||
$ node benchmark/calibrate-n.js benchmark/buffers/buffer-compare.js
|
||||
|
||||
--------------------------------------------------------
|
||||
Benchmark: buffers/buffer-compare.js
|
||||
--------------------------------------------------------
|
||||
What we are trying to find: The optimal number of iterations (n)
|
||||
that produces consistent benchmark results without wasting time.
|
||||
|
||||
How it works:
|
||||
1. Run the benchmark multiple times with a specific n value
|
||||
2. Group results by configuration
|
||||
3. If overall CV is above 5% or any configuration has CV above 10%, increase n and try again
|
||||
4. Stop when we have stable results (overall CV < 5% and all configs CV < 10%) or max increases reached
|
||||
|
||||
Configuration:
|
||||
- Starting n: 10 iterations
|
||||
- Runs per n value: 30
|
||||
- Target CV threshold: 5% (lower CV = more stable results)
|
||||
- Max increases: 6
|
||||
- Increase factor: 10x
|
||||
```
|
||||
|
||||
The tool accepts several options:
|
||||
|
||||
* `--runs=N`: Number of runs for each n value (default: 30)
|
||||
* `--cv-threshold=N`: Target coefficient of variation threshold (default: 0.05)
|
||||
* `--max-increases=N`: Maximum number of n increases to try (default: 6)
|
||||
* `--start-n=N`: Initial n value to start with (default: 10)
|
||||
* `--increase=N`: Factor by which to increase n (default: 10)
|
||||
|
||||
Once you've determined a stable `n` value, you can use it when running your benchmarks.
|
||||
|
||||
### Running all benchmarks
|
||||
|
||||
Similar to running individual benchmarks, a group of benchmarks can be executed
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue