mirror of
https://github.com/nodejs/node.git
synced 2025-08-15 13:48:44 +02:00
url: add fileURLToPathBuffer API
The existing `fileURLToPath()` does not handle the case where the input URL contains percent-encoded characters that are not valid UTF-8 sequences. This can lead to issues, for instance, when the URL is constructed using file names in non-Unicode encodings (like Shift-JIS). This commit introduces a new API, `fileURLToPathBuffer()`, which returns a `Buffer` representing the path, allowing for accurate conversion of file URLs to paths without attempting to decode the percent-encoded bytes into characters. PR-URL: https://github.com/nodejs/node/pull/58700 Reviewed-By: Matteo Collina <matteo.collina@gmail.com> Reviewed-By: Ethan Arrowood <ethan@arrowood.dev> Reviewed-By: LiviaMedeiros <livia@cirno.name>
This commit is contained in:
parent
f582d4569f
commit
3f6ad56846
6 changed files with 215 additions and 0 deletions
|
@ -1358,6 +1358,26 @@ new URL('file:///hello world').pathname; // Incorrect: /hello%20world
|
|||
fileURLToPath('file:///hello world'); // Correct: /hello world (POSIX)
|
||||
```
|
||||
|
||||
### `url.fileURLToPathBuffer(url[, options])`
|
||||
|
||||
<!--
|
||||
added: REPLACEME
|
||||
-->
|
||||
|
||||
* `url` {URL | string} The file URL string or URL object to convert to a path.
|
||||
* `options` {Object}
|
||||
* `windows` {boolean|undefined} `true` if the `path` should be
|
||||
return as a windows filepath, `false` for posix, and
|
||||
`undefined` for the system default.
|
||||
**Default:** `undefined`.
|
||||
* Returns: {Buffer} The fully-resolved platform-specific Node.js file path
|
||||
as a {Buffer}.
|
||||
|
||||
Like `url.fileURLToPath(...)` except that instead of returning a string
|
||||
representation of the path, a `Buffer` is returned. This conversion is
|
||||
helpful when the input URL contains percent-encoded segments that are
|
||||
not valid UTF-8 / Unicode sequences.
|
||||
|
||||
### `url.format(URL[, options])`
|
||||
|
||||
<!-- YAML
|
||||
|
|
|
@ -349,4 +349,5 @@ function isomorphicDecode(input) {
|
|||
|
||||
module.exports = {
|
||||
dataURLProcessor,
|
||||
percentDecode,
|
||||
};
|
||||
|
|
|
@ -29,6 +29,9 @@ const {
|
|||
Symbol,
|
||||
SymbolIterator,
|
||||
SymbolToStringTag,
|
||||
TypedArrayPrototypeGetBuffer,
|
||||
TypedArrayPrototypeGetByteLength,
|
||||
TypedArrayPrototypeGetByteOffset,
|
||||
decodeURIComponent,
|
||||
} = primordials;
|
||||
|
||||
|
@ -81,13 +84,17 @@ const {
|
|||
CHAR_LOWERCASE_Z,
|
||||
CHAR_PERCENT,
|
||||
CHAR_PLUS,
|
||||
CHAR_COLON,
|
||||
} = require('internal/constants');
|
||||
const path = require('path');
|
||||
const { Buffer } = require('buffer');
|
||||
|
||||
const {
|
||||
validateFunction,
|
||||
} = require('internal/validators');
|
||||
|
||||
const { percentDecode } = require('internal/data_url');
|
||||
|
||||
const querystring = require('querystring');
|
||||
|
||||
const bindingUrl = internalBinding('url');
|
||||
|
@ -1482,6 +1489,76 @@ function getPathFromURLWin32(url) {
|
|||
return StringPrototypeSlice(pathname, 1);
|
||||
}
|
||||
|
||||
function getPathBufferFromURLWin32(url) {
|
||||
const hostname = url.hostname;
|
||||
let pathname = url.pathname;
|
||||
// In the getPathFromURLWin32 variant, we scan the input for backslash (\)
|
||||
// and forward slash (/) characters, specifically looking for the ASCII/UTF8
|
||||
// encoding these and forbidding their use. This is a bit tricky
|
||||
// because these may conflict with non-UTF8 encodings. For instance,
|
||||
// in shift-jis, %5C identifies the symbol for the Japanese Yen and not the
|
||||
// backslash. If we have a url like file:///foo/%5c/bar, then we really have
|
||||
// no way of knowing if that %5c is meant to be a backslash \ or a yen sign.
|
||||
// Passing in an encoding option does not help since our Buffer encoding only
|
||||
// knows about certain specific text encodings and a single file path might
|
||||
// actually contain segments that use multiple encodings. It's tricky! So,
|
||||
// for this variation where we are producing a buffer, we won't scan for the
|
||||
// slashes at all, and instead will decode the bytes literally into the
|
||||
// returned Buffer. That said, that can also be tricky because, on windows,
|
||||
// the file path separator *is* the ASCII backslash. This is a known issue
|
||||
// on windows specific to the Shift-JIS encoding that we're not really going
|
||||
// to solve here. Instead, we're going to do the best we can and just
|
||||
// interpret the input url as a sequence of bytes.
|
||||
|
||||
// Because we are converting to a Windows file path here, we need to replace
|
||||
// the explicit forward slash separators with backslashes. Note that this
|
||||
// intentionally disregards any percent-encoded forward slashes in the path.
|
||||
pathname = SideEffectFreeRegExpPrototypeSymbolReplace(FORWARD_SLASH, pathname, '\\');
|
||||
|
||||
// Now, let's start to build our Buffer. We will initially start with a
|
||||
// Buffer allocated to fit in the entire string. Worst case there are no
|
||||
// percent encoded characters and we take the string as is. Any invalid
|
||||
// percent encodings, e.g. `%ZZ` are ignored and are passed through
|
||||
// literally.
|
||||
const decodedu8 = percentDecode(Buffer.from(pathname, 'utf8'));
|
||||
const decodedPathname = Buffer.from(TypedArrayPrototypeGetBuffer(decodedu8),
|
||||
TypedArrayPrototypeGetByteOffset(decodedu8),
|
||||
TypedArrayPrototypeGetByteLength(decodedu8));
|
||||
if (hostname !== '') {
|
||||
// If hostname is set, then we have a UNC path
|
||||
// Pass the hostname through domainToUnicode just in case
|
||||
// it is an IDN using punycode encoding. We do not need to worry
|
||||
// about percent encoding because the URL parser will have
|
||||
// already taken care of that for us. Note that this only
|
||||
// causes IDNs with an appropriate `xn--` prefix to be decoded.
|
||||
|
||||
// This is a bit tricky because of the need to convert to a Buffer
|
||||
// followed by concatenation of the results.
|
||||
const prefix = Buffer.from('\\\\', 'ascii');
|
||||
const domain = Buffer.from(domainToUnicode(hostname), 'utf8');
|
||||
|
||||
return Buffer.concat([prefix, domain, decodedPathname]);
|
||||
}
|
||||
// Otherwise, it's a local path that requires a drive letter
|
||||
// In this case we're only going to pay attention to the second and
|
||||
// third bytes in the decodedPathname. If first byte is either an ASCII
|
||||
// uppercase letter between 'A' and 'Z' or lowercase letter between
|
||||
// 'a' and 'z', and the second byte must be an ASCII `:` or the
|
||||
// operation will fail.
|
||||
|
||||
const letter = decodedPathname[1] | 0x20;
|
||||
const sep = decodedPathname[2];
|
||||
|
||||
if (letter < CHAR_LOWERCASE_A || letter > CHAR_LOWERCASE_Z || // a..z A..Z
|
||||
(sep !== CHAR_COLON)) {
|
||||
throw new ERR_INVALID_FILE_URL_PATH('must be absolute');
|
||||
}
|
||||
|
||||
// Now, we'll just return everything except the first byte of
|
||||
// decodedPathname
|
||||
return decodedPathname.subarray(1);
|
||||
}
|
||||
|
||||
function getPathFromURLPosix(url) {
|
||||
if (url.hostname !== '') {
|
||||
throw new ERR_INVALID_FILE_URL_HOST(platform);
|
||||
|
@ -1500,6 +1577,28 @@ function getPathFromURLPosix(url) {
|
|||
return decodeURIComponent(pathname);
|
||||
}
|
||||
|
||||
function getPathBufferFromURLPosix(url) {
|
||||
if (url.hostname !== '') {
|
||||
throw new ERR_INVALID_FILE_URL_HOST(platform);
|
||||
}
|
||||
const pathname = url.pathname;
|
||||
|
||||
// In the getPathFromURLPosix variant, we scan the input for forward slash
|
||||
// (/) characters, specifically looking for the ASCII/UTF8 and forbidding
|
||||
// its use. This is a bit tricky because these may conflict with non-UTF8
|
||||
// encodings. Passing in an encoding option does not help since our Buffer
|
||||
// encoding only knows about certain specific text encodings and a single
|
||||
// file path might actually contain segments that use multiple encodings.
|
||||
// It's tricky! So, for this variation where we are producing a buffer, we
|
||||
// won't scan for the slashes at all, and instead will decode the bytes
|
||||
// literally into the returned Buffer. We're going to do the best we can and
|
||||
// just interpret the input url as a sequence of bytes.
|
||||
const u8 = percentDecode(Buffer.from(pathname, 'utf8'));
|
||||
return Buffer.from(TypedArrayPrototypeGetBuffer(u8),
|
||||
TypedArrayPrototypeGetByteOffset(u8),
|
||||
TypedArrayPrototypeGetByteLength(u8));
|
||||
}
|
||||
|
||||
function fileURLToPath(path, options = kEmptyObject) {
|
||||
const windows = options?.windows;
|
||||
if (typeof path === 'string')
|
||||
|
@ -1511,6 +1610,24 @@ function fileURLToPath(path, options = kEmptyObject) {
|
|||
return (windows ?? isWindows) ? getPathFromURLWin32(path) : getPathFromURLPosix(path);
|
||||
}
|
||||
|
||||
// An alternative to fileURLToPath that outputs a Buffer
|
||||
// instead of a string. The other fileURLToPath does not
|
||||
// handle non-UTF8 encoded percent encodings at all, so
|
||||
// converting to a Buffer is necessary in cases where the
|
||||
// to string conversion would fail.
|
||||
function fileURLToPathBuffer(path, options = kEmptyObject) {
|
||||
const windows = options?.windows;
|
||||
if (typeof path === 'string') {
|
||||
path = new URL(path);
|
||||
} else if (!isURL(path)) {
|
||||
throw new ERR_INVALID_ARG_TYPE('path', ['string', 'URL'], path);
|
||||
}
|
||||
if (path.protocol !== 'file:') {
|
||||
throw new ERR_INVALID_URL_SCHEME('file');
|
||||
}
|
||||
return (windows ?? isWindows) ? getPathBufferFromURLWin32(path) : getPathBufferFromURLPosix(path);
|
||||
}
|
||||
|
||||
function pathToFileURL(filepath, options = kEmptyObject) {
|
||||
const windows = options?.windows ?? isWindows;
|
||||
const isUNC = windows && StringPrototypeStartsWith(filepath, '\\\\');
|
||||
|
@ -1571,6 +1688,7 @@ function getURLOrigin(url) {
|
|||
|
||||
module.exports = {
|
||||
fileURLToPath,
|
||||
fileURLToPathBuffer,
|
||||
pathToFileURL,
|
||||
toPathIfFileURL,
|
||||
installObjectURLMethods,
|
||||
|
|
|
@ -60,6 +60,7 @@ const {
|
|||
domainToASCII,
|
||||
domainToUnicode,
|
||||
fileURLToPath,
|
||||
fileURLToPathBuffer,
|
||||
pathToFileURL: _pathToFileURL,
|
||||
urlToHttpOptions,
|
||||
unsafeProtocol,
|
||||
|
@ -1037,5 +1038,6 @@ module.exports = {
|
|||
// Utilities
|
||||
pathToFileURL,
|
||||
fileURLToPath,
|
||||
fileURLToPathBuffer,
|
||||
urlToHttpOptions,
|
||||
};
|
||||
|
|
|
@ -105,6 +105,8 @@ expected.beforePreExec = new Set([
|
|||
'Internal Binding wasm_web_api',
|
||||
'NativeModule internal/events/abort_listener',
|
||||
'NativeModule internal/modules/typescript',
|
||||
'NativeModule internal/data_url',
|
||||
'NativeModule internal/mime',
|
||||
]);
|
||||
|
||||
expected.atRunTime = new Set([
|
||||
|
|
72
test/parallel/test-fileurltopathbuffer.js
Normal file
72
test/parallel/test-fileurltopathbuffer.js
Normal file
|
@ -0,0 +1,72 @@
|
|||
'use strict';
|
||||
|
||||
const common = require('../common');
|
||||
|
||||
// This test does not work on OSX due to the way it handles
|
||||
// non-Unicode sequences in file names.
|
||||
if (common.isMacOS) {
|
||||
common.skip('Test unsupported on OSX');
|
||||
}
|
||||
|
||||
// Unfortunately, the test also does not work on Windows
|
||||
// because the writeFileSync operation will replace the
|
||||
// non-Unicode characters with replacement characters when
|
||||
// it normalizes the path.
|
||||
if (common.isWindows) {
|
||||
common.skip('Test unsupported on Windows');
|
||||
}
|
||||
|
||||
const tmpdir = require('../common/tmpdir');
|
||||
|
||||
const {
|
||||
existsSync,
|
||||
writeFileSync,
|
||||
} = require('node:fs');
|
||||
|
||||
const {
|
||||
ok,
|
||||
throws,
|
||||
} = require('node:assert');
|
||||
|
||||
const {
|
||||
sep,
|
||||
} = require('node:path');
|
||||
|
||||
tmpdir.refresh();
|
||||
|
||||
const {
|
||||
pathToFileURL,
|
||||
fileURLToPath,
|
||||
fileURLToPathBuffer,
|
||||
} = require('node:url');
|
||||
|
||||
const kShiftJisName = '%82%A0%82%A2%82%A4';
|
||||
const kShiftJisBuffer = Buffer.from([0x82, 0xA0, 0x82, 0xA2, 0x82, 0xA4]);
|
||||
|
||||
const tmpdirUrl = pathToFileURL(tmpdir.path + sep);
|
||||
const testPath = new URL(kShiftJisName, tmpdirUrl);
|
||||
|
||||
ok(testPath.pathname.endsWith(`/${kShiftJisName}`));
|
||||
|
||||
const tmpdirBuffer = Buffer.from(tmpdir.path + sep, 'utf8');
|
||||
const testPathBuffer = Buffer.concat([tmpdirBuffer, kShiftJisBuffer]);
|
||||
|
||||
// We can use the Buffer version of the path to create a file and check
|
||||
// its existence. But we cannot use the URL version because it contains
|
||||
// non-Unicode percent-encoded characters.
|
||||
throws(() => writeFileSync(testPath, 'test'), {
|
||||
name: 'URIError',
|
||||
});
|
||||
|
||||
writeFileSync(testPathBuffer, 'test');
|
||||
ok(existsSync(testPathBuffer));
|
||||
|
||||
// Using fileURLToPath fails because the URL contains non-Unicode
|
||||
// percent-encoded characters.
|
||||
throws(() => existsSync(fileURLToPath(testPath)), {
|
||||
name: 'URIError',
|
||||
});
|
||||
|
||||
// This variation succeeds because the URL is converted to a buffer
|
||||
// without trying to interpret the percent-encoded characters.
|
||||
ok(existsSync(fileURLToPathBuffer(testPath)));
|
Loading…
Add table
Add a link
Reference in a new issue