buffer: use simdutf for atob implementation

Co-authored-by: Daniel Lemire <daniel@lemire.me>
PR-URL: https://github.com/nodejs/node/pull/52381
Refs: https://github.com/nodejs/node/pull/51670
Reviewed-By: Daniel Lemire <daniel@lemire.me>
Reviewed-By: Vinícius Lourenço Claro Cardoso <contact@viniciusl.com.br>
Reviewed-By: Matteo Collina <matteo.collina@gmail.com>
Reviewed-By: Robert Nagy <ronagy@icloud.com>
Reviewed-By: Benjamin Gruenbaum <benjamingr@gmail.com>
Reviewed-By: Filip Skokan <panva.ip@gmail.com>
This commit is contained in:
Yagiz Nizipli 2024-04-04 18:03:17 -04:00 committed by Node.js GitHub Bot
parent cf629366b9
commit 6f504b71ac
3 changed files with 93 additions and 73 deletions

View file

@ -23,10 +23,8 @@
const {
Array,
ArrayFrom,
ArrayIsArray,
ArrayPrototypeForEach,
ArrayPrototypeIndexOf,
MathFloor,
MathMin,
MathTrunc,
@ -70,6 +68,7 @@ const {
swap64: _swap64,
kMaxLength,
kStringMaxLength,
atob: _atob,
} = internalBinding('buffer');
const {
constants: {
@ -1259,85 +1258,26 @@ function btoa(input) {
return buf.toString('base64');
}
// Refs: https://infra.spec.whatwg.org/#forgiving-base64-decode
const kForgivingBase64AllowedChars = [
// ASCII whitespace
// Refs: https://infra.spec.whatwg.org/#ascii-whitespace
0x09, 0x0A, 0x0C, 0x0D, 0x20,
// Uppercase letters
...ArrayFrom({ length: 26 }, (_, i) => StringPrototypeCharCodeAt('A') + i),
// Lowercase letters
...ArrayFrom({ length: 26 }, (_, i) => StringPrototypeCharCodeAt('a') + i),
// Decimal digits
...ArrayFrom({ length: 10 }, (_, i) => StringPrototypeCharCodeAt('0') + i),
0x2B, // +
0x2F, // /
0x3D, // =
];
const kEqualSignIndex = ArrayPrototypeIndexOf(kForgivingBase64AllowedChars,
0x3D);
function atob(input) {
// The implementation here has not been performance optimized in any way and
// should not be.
// Refs: https://github.com/nodejs/node/pull/38433#issuecomment-828426932
if (arguments.length === 0) {
throw new ERR_MISSING_ARGS('input');
}
input = `${input}`;
let nonAsciiWhitespaceCharCount = 0;
let equalCharCount = 0;
const result = _atob(`${input}`);
for (let n = 0; n < input.length; n++) {
const index = ArrayPrototypeIndexOf(
kForgivingBase64AllowedChars,
StringPrototypeCharCodeAt(input, n));
if (index > 4) {
// The first 5 elements of `kForgivingBase64AllowedChars` are
// ASCII whitespace char codes.
nonAsciiWhitespaceCharCount++;
if (index === kEqualSignIndex) {
equalCharCount++;
} else if (equalCharCount) {
// The `=` char is only allowed at the end.
throw lazyDOMException('Invalid character', 'InvalidCharacterError');
}
if (equalCharCount > 2) {
// Only one more `=` is permitted after the first equal sign.
throw lazyDOMException('Invalid character', 'InvalidCharacterError');
}
} else if (index === -1) {
switch (result) {
case -2: // Invalid character
throw lazyDOMException('Invalid character', 'InvalidCharacterError');
}
case -1: // Single character remained
throw lazyDOMException(
'The string to be decoded is not correctly encoded.',
'InvalidCharacterError');
case -3: // Possible overflow
// TODO(@anonrig): Throw correct error in here.
throw lazyDOMException('The input causes overflow.', 'InvalidCharacterError');
default:
return result;
}
let reminder = nonAsciiWhitespaceCharCount % 4;
// See #2, #3, #4 - https://infra.spec.whatwg.org/#forgiving-base64
if (!reminder) {
// Remove all trailing `=` characters and get the new reminder.
reminder = (nonAsciiWhitespaceCharCount - equalCharCount) % 4;
} else if (equalCharCount) {
// `=` should not in the input if there's a reminder.
throw lazyDOMException('Invalid character', 'InvalidCharacterError');
}
// See #3 - https://infra.spec.whatwg.org/#forgiving-base64
if (reminder === 1) {
throw lazyDOMException(
'The string to be decoded is not correctly encoded.',
'InvalidCharacterError');
}
return Buffer.from(input, 'base64').toString('latin1');
}
function isUtf8(input) {