zlib: add dictionary support to zstdCompress and zstdDecompress

Adds optional dictionary support to zlib’s zstdCompress and
zstdDecompress APIs. This enables better compression ratios when the
dictionary matches expected input structure or content patterns.

The implementation allows passing a `dictionary` buffer through the
options object. Support was added to both streaming and convenience
methods. Tests and documentation were also updated to reflect this new
capability.

Fixes: https://github.com/nodejs/node/issues/59105
PR-URL: https://github.com/nodejs/node/pull/59240
Reviewed-By: Anna Henningsen <anna@addaleax.net>
This commit is contained in:
lluisemper 2025-08-04 18:13:57 +02:00 committed by GitHub
parent f7c2a7ed4a
commit b8e643259e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 79 additions and 7 deletions

View file

@ -1069,6 +1069,9 @@ Each Zstd-based class takes an `options` object. All options are optional.
* `maxOutputLength` {integer} Limits output size when using
[convenience methods][]. **Default:** [`buffer.kMaxLength`][]
* `info` {boolean} If `true`, returns an object with `buffer` and `engine`. **Default:** `false`
* `dictionary` {Buffer} Optional dictionary used to
improve compression efficiency when compressing or decompressing data that
shares common patterns with the dictionary.
For example:

View file

@ -893,12 +893,15 @@ class Zstd extends ZlibBase {
const pledgedSrcSize = opts?.pledgedSrcSize ?? undefined;
const writeState = new Uint32Array(2);
handle.init(
initParamsArray,
pledgedSrcSize,
writeState,
processCallback,
opts?.dictionary && isArrayBufferView(opts.dictionary) ? opts.dictionary : undefined,
);
super(opts, mode, handle, zstdDefaultOpts);
this._writeState = writeState;
}

View file

@ -324,7 +324,8 @@ class ZstdCompressContext final : public ZstdContext {
CompressionError ResetStream();
// Zstd specific:
CompressionError Init(uint64_t pledged_src_size);
CompressionError Init(uint64_t pledged_src_size,
std::string_view dictionary = {});
CompressionError SetParameter(int key, int value);
// Wrap ZSTD_freeCCtx to remove the return type.
@ -349,7 +350,9 @@ class ZstdDecompressContext final : public ZstdContext {
CompressionError ResetStream();
// Zstd specific:
CompressionError Init(uint64_t pledged_src_size);
CompressionError Init(uint64_t pledged_src_size,
std::string_view dictionary = {});
CompressionError SetParameter(int key, int value);
// Wrap ZSTD_freeDCtx to remove the return type.
@ -875,8 +878,10 @@ class ZstdStream final : public CompressionStream<CompressionContext> {
Environment* env = Environment::GetCurrent(args);
Local<Context> context = env->context();
CHECK(args.Length() == 4 &&
"init(params, pledgedSrcSize, writeResult, writeCallback)");
CHECK((args.Length() == 4 || args.Length() == 5) &&
"init(params, pledgedSrcSize, writeResult, writeCallback[, "
"dictionary])");
ZstdStream* wrap;
ASSIGN_OR_RETURN_UNWRAP(&wrap, args.This());
@ -904,7 +909,19 @@ class ZstdStream final : public CompressionStream<CompressionContext> {
}
AllocScope alloc_scope(wrap);
CompressionError err = wrap->context()->Init(pledged_src_size);
std::string_view dictionary;
ArrayBufferViewContents<char> contents;
if (args.Length() == 5 && !args[4]->IsUndefined()) {
if (!args[4]->IsArrayBufferView()) {
THROW_ERR_INVALID_ARG_TYPE(
wrap->env(), "dictionary must be an ArrayBufferView if provided");
return;
}
contents.ReadValue(args[4]);
dictionary = std::string_view(contents.data(), contents.length());
}
CompressionError err = wrap->context()->Init(pledged_src_size, dictionary);
if (err.IsError()) {
wrap->EmitError(err);
THROW_ERR_ZLIB_INITIALIZATION_FAILED(wrap->env(), err.message);
@ -1509,7 +1526,8 @@ CompressionError ZstdCompressContext::SetParameter(int key, int value) {
return {};
}
CompressionError ZstdCompressContext::Init(uint64_t pledged_src_size) {
CompressionError ZstdCompressContext::Init(uint64_t pledged_src_size,
std::string_view dictionary) {
pledged_src_size_ = pledged_src_size;
cctx_.reset(ZSTD_createCCtx());
if (!cctx_) {
@ -1517,6 +1535,17 @@ CompressionError ZstdCompressContext::Init(uint64_t pledged_src_size) {
"ERR_ZLIB_INITIALIZATION_FAILED",
-1);
}
if (!dictionary.empty()) {
size_t ret = ZSTD_CCtx_loadDictionary(
cctx_.get(), dictionary.data(), dictionary.size());
if (ZSTD_isError(ret)) {
return CompressionError("Failed to load zstd dictionary",
"ERR_ZLIB_DICTIONARY_LOAD_FAILED",
-1);
}
}
size_t result = ZSTD_CCtx_setPledgedSrcSize(cctx_.get(), pledged_src_size);
if (ZSTD_isError(result)) {
return CompressionError(
@ -1549,13 +1578,24 @@ CompressionError ZstdDecompressContext::SetParameter(int key, int value) {
return {};
}
CompressionError ZstdDecompressContext::Init(uint64_t pledged_src_size) {
CompressionError ZstdDecompressContext::Init(uint64_t pledged_src_size,
std::string_view dictionary) {
dctx_.reset(ZSTD_createDCtx());
if (!dctx_) {
return CompressionError("Could not initialize zstd instance",
"ERR_ZLIB_INITIALIZATION_FAILED",
-1);
}
if (!dictionary.empty()) {
size_t ret = ZSTD_DCtx_loadDictionary(
dctx_.get(), dictionary.data(), dictionary.size());
if (ZSTD_isError(ret)) {
return CompressionError("Failed to load zstd dictionary",
"ERR_ZLIB_DICTIONARY_LOAD_FAILED",
-1);
}
}
return {};
}

View file

@ -0,0 +1,26 @@
'use strict';
const common = require('../common');
const assert = require('assert');
const zlib = require('zlib');
const dictionary = Buffer.from(
`Lorem ipsum dolor sit amet, consectetur adipiscing elit.
Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.`
);
const input = Buffer.from(
`Lorem ipsum dolor sit amet, consectetur adipiscing elit.
Lorem ipsum dolor sit amet, consectetur adipiscing elit.
Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur.`
);
zlib.zstdCompress(input, { dictionary }, common.mustSucceed((compressed) => {
assert(compressed.length < input.length);
zlib.zstdDecompress(compressed, { dictionary }, common.mustSucceed((decompressed) => {
assert.strictEqual(decompressed.toString(), input.toString());
}));
}));