buffer: use simdutf for atob implementation

Co-authored-by: Daniel Lemire <daniel@lemire.me> PR-URL: https://github.com/nodejs/node/pull/52381 Refs: https://github.com/nodejs/node/pull/51670 Reviewed-By: Daniel Lemire <daniel@lemire.me> Reviewed-By: Vinícius Lourenço Claro Cardoso <contact@viniciusl.com.br> Reviewed-By: Matteo Collina <matteo.collina@gmail.com> Reviewed-By: Robert Nagy <ronagy@icloud.com> Reviewed-By: Benjamin Gruenbaum <benjamingr@gmail.com> Reviewed-By: Filip Skokan <panva.ip@gmail.com>
2025-08-15 13:48:44 +02:00 · 2024-04-04 18:03:17 -04:00 · 2024-04-04 18:03:17 -04:00 · 6f504b71ac
commit 6f504b71ac
parent cf629366b9
3 changed files with 93 additions and 73 deletions
--- a/lib/buffer.js
+++ b/lib/buffer.js
@ -23,10 +23,8 @@

 const {
  Array,
-  ArrayFrom,
  ArrayIsArray,
  ArrayPrototypeForEach,
-  ArrayPrototypeIndexOf,
  MathFloor,
  MathMin,
  MathTrunc,
@ -70,6 +68,7 @@ const {
  swap64: _swap64,
  kMaxLength,
  kStringMaxLength,
+  atob: _atob,
 } = internalBinding('buffer');
 const {
  constants: {
@ -1259,85 +1258,26 @@ function btoa(input) {
  return buf.toString('base64');
 }

-// Refs: https://infra.spec.whatwg.org/#forgiving-base64-decode
-const kForgivingBase64AllowedChars = [
-  // ASCII whitespace
-  // Refs: https://infra.spec.whatwg.org/#ascii-whitespace
-  0x09, 0x0A, 0x0C, 0x0D, 0x20,
-
-  // Uppercase letters
-  ...ArrayFrom({ length: 26 }, (_, i) => StringPrototypeCharCodeAt('A') + i),
-
-  // Lowercase letters
-  ...ArrayFrom({ length: 26 }, (_, i) => StringPrototypeCharCodeAt('a') + i),
-
-  // Decimal digits
-  ...ArrayFrom({ length: 10 }, (_, i) => StringPrototypeCharCodeAt('0') + i),
-
-  0x2B, // +
-  0x2F, // /
-  0x3D, // =
-];
-const kEqualSignIndex = ArrayPrototypeIndexOf(kForgivingBase64AllowedChars,
-                                              0x3D);
-
 function atob(input) {
-  // The implementation here has not been performance optimized in any way and
-  // should not be.
-  // Refs: https://github.com/nodejs/node/pull/38433#issuecomment-828426932
  if (arguments.length === 0) {
    throw new ERR_MISSING_ARGS('input');
  }

-  input = `${input}`;
-  let nonAsciiWhitespaceCharCount = 0;
-  let equalCharCount = 0;
+  const result = _atob(`${input}`);

-  for (let n = 0; n < input.length; n++) {
-    const index = ArrayPrototypeIndexOf(
-      kForgivingBase64AllowedChars,
-      StringPrototypeCharCodeAt(input, n));
-
-    if (index > 4) {
-      // The first 5 elements of `kForgivingBase64AllowedChars` are
-      // ASCII whitespace char codes.
-      nonAsciiWhitespaceCharCount++;
-
-      if (index === kEqualSignIndex) {
-        equalCharCount++;
-      } else if (equalCharCount) {
-        // The `=` char is only allowed at the end.
-        throw lazyDOMException('Invalid character', 'InvalidCharacterError');
-      }
-
-      if (equalCharCount > 2) {
-        // Only one more `=` is permitted after the first equal sign.
-        throw lazyDOMException('Invalid character', 'InvalidCharacterError');
-      }
-    } else if (index === -1) {
+  switch (result) {
+    case -2: // Invalid character
      throw lazyDOMException('Invalid character', 'InvalidCharacterError');
-    }
+    case -1: // Single character remained
+      throw lazyDOMException(
+        'The string to be decoded is not correctly encoded.',
+        'InvalidCharacterError');
+    case -3: // Possible overflow
+      // TODO(@anonrig): Throw correct error in here.
+      throw lazyDOMException('The input causes overflow.', 'InvalidCharacterError');
+    default:
+      return result;
  }
-
-  let reminder = nonAsciiWhitespaceCharCount % 4;
-
-  // See #2, #3, #4 - https://infra.spec.whatwg.org/#forgiving-base64
-  if (!reminder) {
-    // Remove all trailing `=` characters and get the new reminder.
-    reminder = (nonAsciiWhitespaceCharCount - equalCharCount) % 4;
-  } else if (equalCharCount) {
-    // `=` should not in the input if there's a reminder.
-    throw lazyDOMException('Invalid character', 'InvalidCharacterError');
-  }
-
-  // See #3 - https://infra.spec.whatwg.org/#forgiving-base64
-  if (reminder === 1) {
-    throw lazyDOMException(
-      'The string to be decoded is not correctly encoded.',
-      'InvalidCharacterError');
-  }
-
-  return Buffer.from(input, 'base64').toString('latin1');
 }

 function isUtf8(input) {