mirror of
https://github.com/nodejs/node.git
synced 2025-08-15 13:48:44 +02:00
deps,src: simplify base64 encoding
PR-URL: https://github.com/nodejs/node/pull/52714 Reviewed-By: Yagiz Nizipli <yagiz.nizipli@sentry.io> Reviewed-By: Moshe Atlow <moshe@atlow.co.il> Reviewed-By: Michaël Zasso <targos@protonmail.com> Reviewed-By: Ruben Bridgewater <ruben@bridgewater.de> Reviewed-By: Marco Ippolito <marcoippolito54@gmail.com> Reviewed-By: Richard Lau <rlau@redhat.com>
This commit is contained in:
parent
9fda8e29cd
commit
561493d35e
93 changed files with 20 additions and 9194 deletions
9
.github/workflows/tools.yml
vendored
9
.github/workflows/tools.yml
vendored
|
@ -16,7 +16,6 @@ on:
|
|||
- acorn
|
||||
- acorn-walk
|
||||
- ada
|
||||
- base64
|
||||
- brotli
|
||||
- c-ares
|
||||
- cjs-module-lexer
|
||||
|
@ -80,14 +79,6 @@ jobs:
|
|||
cat temp-output
|
||||
tail -n1 temp-output | grep "NEW_VERSION=" >> "$GITHUB_ENV" || true
|
||||
rm temp-output
|
||||
- id: base64
|
||||
subsystem: deps
|
||||
label: dependencies
|
||||
run: |
|
||||
./tools/dep_updaters/update-base64.sh > temp-output
|
||||
cat temp-output
|
||||
tail -n1 temp-output | grep "NEW_VERSION=" >> "$GITHUB_ENV" || true
|
||||
rm temp-output
|
||||
- id: brotli
|
||||
subsystem: deps
|
||||
label: dependencies
|
||||
|
|
32
LICENSE
32
LICENSE
|
@ -2168,35 +2168,3 @@ The externally maintained libraries used by Node.js are:
|
|||
OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
"""
|
||||
|
||||
- base64, located at deps/base64/base64/, is licensed as follows:
|
||||
"""
|
||||
Copyright (c) 2005-2007, Nick Galbreath
|
||||
Copyright (c) 2015-2018, Wojciech Muła
|
||||
Copyright (c) 2016-2017, Matthieu Darbois
|
||||
Copyright (c) 2013-2022, Alfred Klomp
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
"""
|
||||
|
|
14
deps/base64/BUILD.gn
vendored
14
deps/base64/BUILD.gn
vendored
|
@ -1,14 +0,0 @@
|
|||
##############################################################################
|
||||
# #
|
||||
# DO NOT EDIT THIS FILE! #
|
||||
# #
|
||||
##############################################################################
|
||||
|
||||
# This file is used by GN for building, which is NOT the build system used for
|
||||
# building official binaries.
|
||||
# Please modify the gyp files if you are making changes to build system.
|
||||
|
||||
import("unofficial.gni")
|
||||
|
||||
base64_gn_build("base64") {
|
||||
}
|
14
deps/base64/README.md
vendored
14
deps/base64/README.md
vendored
|
@ -1,14 +0,0 @@
|
|||
# base64
|
||||
|
||||
This project boosts base64 encoding/decoding performance by utilizing SIMD
|
||||
operations where possible.
|
||||
|
||||
The source is pulled from: https://github.com/aklomp/base64
|
||||
|
||||
Active development occurs in the default branch (currently named `master`).
|
||||
|
||||
## Updating
|
||||
|
||||
```sh
|
||||
$ git clone https://github.com/aklomp/base64
|
||||
```
|
221
deps/base64/base64.gyp
vendored
221
deps/base64/base64.gyp
vendored
|
@ -1,221 +0,0 @@
|
|||
{
|
||||
'variables': {
|
||||
'arm_fpu%': '',
|
||||
'target_arch%': '',
|
||||
'base64_sources_common': [
|
||||
'base64/include/libbase64.h',
|
||||
'base64/lib/arch/generic/codec.c',
|
||||
'base64/lib/tables/tables.c',
|
||||
'base64/lib/codec_choose.c',
|
||||
'base64/lib/codecs.h',
|
||||
'base64/lib/lib.c',
|
||||
],
|
||||
},
|
||||
'targets': [
|
||||
{
|
||||
'target_name': 'base64',
|
||||
'type': 'static_library',
|
||||
'include_dirs': [ 'base64/include', 'base64/lib' ],
|
||||
'direct_dependent_settings': {
|
||||
'include_dirs': [ 'base64/include' ],
|
||||
'defines': [ 'BASE64_STATIC_DEFINE' ],
|
||||
},
|
||||
'defines': [ 'BASE64_STATIC_DEFINE' ],
|
||||
'sources': [
|
||||
'<@(base64_sources_common)',
|
||||
],
|
||||
|
||||
'conditions': [
|
||||
[ 'arm_fpu=="neon" and target_arch=="arm"', {
|
||||
'defines': [ 'HAVE_NEON32=1' ],
|
||||
'dependencies': [ 'base64_neon32' ],
|
||||
}, {
|
||||
'sources': [ 'base64/lib/arch/neon32/codec.c' ],
|
||||
}],
|
||||
|
||||
# arm64 requires NEON, so it's safe to always use it
|
||||
[ 'target_arch=="arm64"', {
|
||||
'defines': [ 'HAVE_NEON64=1' ],
|
||||
'dependencies': [ 'base64_neon64' ],
|
||||
}, {
|
||||
'sources': [ 'base64/lib/arch/neon64/codec.c' ],
|
||||
}],
|
||||
|
||||
# Runtime detection will happen for x86 CPUs
|
||||
[ 'target_arch in "ia32 x64 x32"', {
|
||||
'defines': [
|
||||
'HAVE_SSSE3=1',
|
||||
'HAVE_SSE41=1',
|
||||
'HAVE_SSE42=1',
|
||||
'HAVE_AVX=1',
|
||||
'HAVE_AVX2=1',
|
||||
'HAVE_AVX512=1',
|
||||
],
|
||||
'dependencies': [
|
||||
'base64_ssse3',
|
||||
'base64_sse41',
|
||||
'base64_sse42',
|
||||
'base64_avx',
|
||||
'base64_avx2',
|
||||
'base64_avx512',
|
||||
],
|
||||
}, {
|
||||
'sources': [
|
||||
'base64/lib/arch/ssse3/codec.c',
|
||||
'base64/lib/arch/sse41/codec.c',
|
||||
'base64/lib/arch/sse42/codec.c',
|
||||
'base64/lib/arch/avx/codec.c',
|
||||
'base64/lib/arch/avx2/codec.c',
|
||||
'base64/lib/arch/avx512/codec.c',
|
||||
],
|
||||
}],
|
||||
],
|
||||
},
|
||||
|
||||
{
|
||||
'target_name': 'base64_ssse3',
|
||||
'type': 'static_library',
|
||||
'include_dirs': [ 'base64/include', 'base64/lib' ],
|
||||
'sources': [ 'base64/lib/arch/ssse3/codec.c' ],
|
||||
'defines': [ 'BASE64_STATIC_DEFINE', 'HAVE_SSSE3=1' ],
|
||||
'conditions': [
|
||||
[ 'OS!="win"', {
|
||||
'cflags': [ '-mssse3' ],
|
||||
'xcode_settings': {
|
||||
'OTHER_CFLAGS': [ '-mssse3' ]
|
||||
},
|
||||
}],
|
||||
],
|
||||
},
|
||||
|
||||
{
|
||||
'target_name': 'base64_sse41',
|
||||
'type': 'static_library',
|
||||
'include_dirs': [ 'base64/include', 'base64/lib' ],
|
||||
'sources': [ 'base64/lib/arch/sse41/codec.c' ],
|
||||
'defines': [ 'BASE64_STATIC_DEFINE', 'HAVE_SSE41=1' ],
|
||||
'conditions': [
|
||||
[ 'OS!="win"', {
|
||||
'cflags': [ '-msse4.1' ],
|
||||
'xcode_settings': {
|
||||
'OTHER_CFLAGS': [ '-msse4.1' ]
|
||||
},
|
||||
}],
|
||||
],
|
||||
},
|
||||
|
||||
{
|
||||
'target_name': 'base64_sse42',
|
||||
'type': 'static_library',
|
||||
'include_dirs': [ 'base64/include', 'base64/lib' ],
|
||||
'sources': [ 'base64/lib/arch/sse42/codec.c' ],
|
||||
'defines': [ 'BASE64_STATIC_DEFINE', 'HAVE_SSE42=1' ],
|
||||
'conditions': [
|
||||
[ 'OS!="win"', {
|
||||
'cflags': [ '-msse4.2' ],
|
||||
'xcode_settings': {
|
||||
'OTHER_CFLAGS': [ '-msse4.2' ]
|
||||
},
|
||||
}],
|
||||
],
|
||||
},
|
||||
|
||||
{
|
||||
'target_name': 'base64_avx',
|
||||
'type': 'static_library',
|
||||
'include_dirs': [ 'base64/include', 'base64/lib' ],
|
||||
'sources': [ 'base64/lib/arch/avx/codec.c' ],
|
||||
'defines': [ 'BASE64_STATIC_DEFINE', 'HAVE_AVX=1' ],
|
||||
'conditions': [
|
||||
[ 'OS!="win"', {
|
||||
'cflags': [ '-mavx' ],
|
||||
'xcode_settings': {
|
||||
'OTHER_CFLAGS': [ '-mavx' ]
|
||||
},
|
||||
}, {
|
||||
'msvs_settings': {
|
||||
'VCCLCompilerTool': {
|
||||
'AdditionalOptions': [
|
||||
'/arch:AVX'
|
||||
],
|
||||
},
|
||||
},
|
||||
}],
|
||||
],
|
||||
},
|
||||
|
||||
{
|
||||
'target_name': 'base64_avx2',
|
||||
'type': 'static_library',
|
||||
'include_dirs': [ 'base64/include', 'base64/lib' ],
|
||||
'sources': [ 'base64/lib/arch/avx2/codec.c' ],
|
||||
'defines': [ 'BASE64_STATIC_DEFINE', 'HAVE_AVX2=1' ],
|
||||
'conditions': [
|
||||
[ 'OS!="win"', {
|
||||
'cflags': [ '-mavx2' ],
|
||||
'xcode_settings': {
|
||||
'OTHER_CFLAGS': [ '-mavx2' ]
|
||||
},
|
||||
}, {
|
||||
'msvs_settings': {
|
||||
'VCCLCompilerTool': {
|
||||
'AdditionalOptions': [
|
||||
'/arch:AVX2'
|
||||
],
|
||||
},
|
||||
},
|
||||
}],
|
||||
],
|
||||
},
|
||||
|
||||
{
|
||||
'target_name': 'base64_avx512',
|
||||
'type': 'static_library',
|
||||
'include_dirs': [ 'base64/include', 'base64/lib' ],
|
||||
'sources': [ 'base64/lib/arch/avx512/codec.c' ],
|
||||
'defines': [ 'BASE64_STATIC_DEFINE', 'HAVE_AVX512=1' ],
|
||||
'conditions': [
|
||||
[ 'OS!="win"', {
|
||||
'cflags': [ '-mavx512vl', '-mavx512vbmi' ],
|
||||
'xcode_settings': {
|
||||
'OTHER_CFLAGS': [ '-mavx512vl', '-mavx512vbmi' ]
|
||||
},
|
||||
}, {
|
||||
'msvs_settings': {
|
||||
'VCCLCompilerTool': {
|
||||
'AdditionalOptions': [
|
||||
'/arch:AVX512'
|
||||
],
|
||||
},
|
||||
},
|
||||
}],
|
||||
],
|
||||
},
|
||||
|
||||
{
|
||||
'target_name': 'base64_neon32',
|
||||
'type': 'static_library',
|
||||
'include_dirs': [ 'base64/include', 'base64/lib' ],
|
||||
'sources': [ 'base64/lib/arch/neon32/codec.c' ],
|
||||
'defines': [ 'BASE64_STATIC_DEFINE', 'HAVE_NEON32=1' ],
|
||||
'conditions': [
|
||||
[ 'OS!="win"', {
|
||||
'cflags': [ '-mfpu=neon' ],
|
||||
'xcode_settings': {
|
||||
'OTHER_CFLAGS': [ '-mfpu=neon' ]
|
||||
},
|
||||
}],
|
||||
],
|
||||
},
|
||||
|
||||
{
|
||||
'target_name': 'base64_neon64',
|
||||
'type': 'static_library',
|
||||
'include_dirs': [ 'base64/include', 'base64/lib' ],
|
||||
'sources': [ 'base64/lib/arch/neon64/codec.c' ],
|
||||
'defines': [ 'BASE64_STATIC_DEFINE', 'HAVE_NEON64=1' ],
|
||||
# NEON is required in arm64, so no -mfpu flag is needed
|
||||
}
|
||||
|
||||
]
|
||||
}
|
22
deps/base64/base64/.editorconfig
vendored
22
deps/base64/base64/.editorconfig
vendored
|
@ -1,22 +0,0 @@
|
|||
# https://EditorConfig.org
|
||||
root = true
|
||||
|
||||
[*]
|
||||
charset = utf-8
|
||||
insert_final_newline = true
|
||||
trim_trailing_whitespace = true
|
||||
|
||||
indent_style = tab
|
||||
tab_width = 8
|
||||
indent_size = 8
|
||||
|
||||
[CMakeLists.txt]
|
||||
tab_width = 4
|
||||
indent_style = space
|
||||
[*.cmake]
|
||||
tab_width = 4
|
||||
indent_style = space
|
||||
|
||||
[*.py]
|
||||
tab_width = 4
|
||||
indent_style = space
|
1
deps/base64/base64/.gitignore
vendored
1
deps/base64/base64/.gitignore
vendored
|
@ -1 +0,0 @@
|
|||
# Intentionally empty
|
290
deps/base64/base64/CMakeLists.txt
vendored
290
deps/base64/base64/CMakeLists.txt
vendored
|
@ -1,290 +0,0 @@
|
|||
# Written in 2016-2017, 2021 by Henrik Steffen Gaßmann henrik@gassmann.onl
|
||||
#
|
||||
# To the extent possible under law, the author(s) have dedicated all
|
||||
# copyright and related and neighboring rights to this software to the
|
||||
# public domain worldwide. This software is distributed without any warranty.
|
||||
#
|
||||
# You should have received a copy of the CC0 Public Domain Dedication
|
||||
# along with this software. If not, see
|
||||
#
|
||||
# http://creativecommons.org/publicdomain/zero/1.0/
|
||||
#
|
||||
########################################################################
|
||||
cmake_minimum_required(VERSION 3.10.2)
|
||||
|
||||
# new dependent option syntax. We are already compliant
|
||||
if (POLICY CMP0127)
|
||||
cmake_policy(SET CMP0127 NEW)
|
||||
endif()
|
||||
|
||||
project(base64 LANGUAGES C VERSION 0.5.2)
|
||||
|
||||
include(GNUInstallDirs)
|
||||
include(CMakeDependentOption)
|
||||
include(CheckIncludeFile)
|
||||
include(FeatureSummary)
|
||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules")
|
||||
|
||||
#######################################################################
|
||||
# platform detection
|
||||
include(TargetArch)
|
||||
detect_target_architecture(_TARGET_ARCH)
|
||||
|
||||
check_include_file(getopt.h HAVE_GETOPT_H)
|
||||
cmake_dependent_option(BASE64_BUILD_CLI "Build the cli for encoding and decoding" ON "HAVE_GETOPT_H" OFF)
|
||||
add_feature_info(CLI BASE64_BUILD_CLI "enables the CLI executable for encoding and decoding")
|
||||
|
||||
###################################################################
|
||||
# optional/conditional dependencies
|
||||
find_package(OpenMP)
|
||||
set_package_properties(OpenMP PROPERTIES
|
||||
TYPE OPTIONAL
|
||||
PURPOSE "Allows to utilize OpenMP"
|
||||
)
|
||||
|
||||
|
||||
########################################################################
|
||||
# Compilation options
|
||||
option(BASE64_WERROR "Treat warnings as error" ON)
|
||||
option(BASE64_BUILD_TESTS "add test projects" OFF)
|
||||
cmake_dependent_option(BASE64_WITH_OpenMP "use OpenMP" OFF "OpenMP_FOUND" OFF)
|
||||
add_feature_info("OpenMP codec" BASE64_WITH_OpenMP "spreads codec work accross multiple threads")
|
||||
cmake_dependent_option(BASE64_REGENERATE_TABLES "regenerate the codec tables" OFF "NOT CMAKE_CROSSCOMPILING" OFF)
|
||||
|
||||
set(_IS_X86 "_TARGET_ARCH_x86 OR _TARGET_ARCH_x64")
|
||||
cmake_dependent_option(BASE64_WITH_SSSE3 "add SSSE 3 codepath" ON ${_IS_X86} OFF)
|
||||
add_feature_info(SSSE3 BASE64_WITH_SSSE3 "add SSSE 3 codepath")
|
||||
cmake_dependent_option(BASE64_WITH_SSE41 "add SSE 4.1 codepath" ON ${_IS_X86} OFF)
|
||||
add_feature_info(SSE4.1 BASE64_WITH_SSE41 "add SSE 4.1 codepath")
|
||||
cmake_dependent_option(BASE64_WITH_SSE42 "add SSE 4.2 codepath" ON ${_IS_X86} OFF)
|
||||
add_feature_info(SSE4.2 BASE64_WITH_SSE42 "add SSE 4.2 codepath")
|
||||
cmake_dependent_option(BASE64_WITH_AVX "add AVX codepath" ON ${_IS_X86} OFF)
|
||||
add_feature_info(AVX BASE64_WITH_AVX "add AVX codepath")
|
||||
cmake_dependent_option(BASE64_WITH_AVX2 "add AVX 2 codepath" ON ${_IS_X86} OFF)
|
||||
add_feature_info(AVX2 BASE64_WITH_AVX2 "add AVX2 codepath")
|
||||
cmake_dependent_option(BASE64_WITH_AVX512 "add AVX 512 codepath" ON ${_IS_X86} OFF)
|
||||
add_feature_info(AVX512 BASE64_WITH_AVX512 "add AVX512 codepath")
|
||||
|
||||
cmake_dependent_option(BASE64_WITH_NEON32 "add NEON32 codepath" OFF _TARGET_ARCH_arm OFF)
|
||||
add_feature_info(NEON32 BASE64_WITH_NEON32 "add NEON32 codepath")
|
||||
|
||||
cmake_dependent_option(BASE64_WITH_NEON64 "add NEON64 codepath" ON _TARGET_ARCH_arm64 OFF)
|
||||
add_feature_info(NEON64 BASE64_WITH_NEON64 "add NEON64 codepath")
|
||||
|
||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/bin")
|
||||
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/bin")
|
||||
|
||||
########################################################################
|
||||
# Regenerate headers
|
||||
|
||||
if (BASE64_REGENERATE_TABLES)
|
||||
# Generate tables in build folder and copy to source tree.
|
||||
# Don't add the tables in the source tree to the outputs, to avoid `make clean` removing them.
|
||||
add_executable(table_generator
|
||||
lib/tables/table_generator.c
|
||||
)
|
||||
|
||||
add_custom_command(OUTPUT table_dec_32bit.h "${CMAKE_CURRENT_SOURCE_DIR}/lib/tables/table_dec_32bit.h"
|
||||
COMMAND table_generator > table_dec_32bit.h
|
||||
COMMAND "${CMAKE_COMMAND}" -E copy table_dec_32bit.h "${CMAKE_CURRENT_SOURCE_DIR}/lib/tables/table_dec_32bit.h"
|
||||
DEPENDS table_generator
|
||||
)
|
||||
set(Python_ADDITIONAL_VERSIONS 3)
|
||||
find_package(PythonInterp REQUIRED)
|
||||
add_custom_command(OUTPUT table_enc_12bit.h "${CMAKE_CURRENT_SOURCE_DIR}/lib/tables/table_enc_12bit.h"
|
||||
COMMAND "${PYTHON_EXECUTABLE}" "${CMAKE_CURRENT_SOURCE_DIR}/lib/tables/table_enc_12bit.py" > table_enc_12bit.h
|
||||
COMMAND "${CMAKE_COMMAND}" -E copy table_enc_12bit.h "${CMAKE_CURRENT_SOURCE_DIR}/lib/tables/table_enc_12bit.h"
|
||||
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/lib/tables/table_enc_12bit.py"
|
||||
)
|
||||
endif()
|
||||
|
||||
|
||||
########################################################################
|
||||
# library project
|
||||
add_library(base64
|
||||
# library files
|
||||
lib/lib.c
|
||||
lib/codec_choose.c
|
||||
include/libbase64.h
|
||||
|
||||
lib/tables/tables.c
|
||||
# Add generated headers explicitly to target, to insert them in the dependency tree
|
||||
lib/tables/table_dec_32bit.h
|
||||
lib/tables/table_enc_12bit.h
|
||||
|
||||
# codec implementations
|
||||
lib/arch/generic/codec.c
|
||||
|
||||
lib/arch/ssse3/codec.c
|
||||
lib/arch/sse41/codec.c
|
||||
lib/arch/sse42/codec.c
|
||||
lib/arch/avx/codec.c
|
||||
lib/arch/avx2/codec.c
|
||||
lib/arch/avx512/codec.c
|
||||
|
||||
lib/arch/neon32/codec.c
|
||||
lib/arch/neon64/codec.c
|
||||
)
|
||||
|
||||
target_include_directories(base64
|
||||
PUBLIC
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
|
||||
PRIVATE
|
||||
"${CMAKE_CURRENT_BINARY_DIR}"
|
||||
)
|
||||
|
||||
####################################################################
|
||||
# platform/compiler specific configuration
|
||||
set_target_properties(base64 PROPERTIES
|
||||
C_STANDARD 99
|
||||
C_STANDARD_REQUIRED YES
|
||||
C_EXTENSIONS OFF
|
||||
DEFINE_SYMBOL BASE64_EXPORTS
|
||||
VERSION ${PROJECT_VERSION}
|
||||
SOVERSION ${PROJECT_VERSION_MAJOR}
|
||||
)
|
||||
|
||||
#generate_export_header(base64)
|
||||
# the following definitions and those in libbase64.h have been
|
||||
# kept forward compatible in case we ever switch to generate_export_header
|
||||
if (BUILD_SHARED_LIBS)
|
||||
set_target_properties(base64 PROPERTIES
|
||||
C_VISIBILITY_PRESET hidden
|
||||
)
|
||||
else()
|
||||
target_compile_definitions(base64
|
||||
PUBLIC
|
||||
BASE64_STATIC_DEFINE
|
||||
)
|
||||
endif()
|
||||
|
||||
target_compile_options(base64 PRIVATE
|
||||
$<$<C_COMPILER_ID:MSVC>:
|
||||
/W4
|
||||
/we4013 # Error warning C4013: 'function' undefined; assuming extern returning int
|
||||
/we4700 # Error warning C4700: uninitialized local variable
|
||||
/we4715 # not all control paths return a value
|
||||
/we4003 # not enough actual parameters for macro
|
||||
/wd4456 # disable warning C4456: declaration of 'xxx' hides previous local declaration
|
||||
>
|
||||
$<$<NOT:$<C_COMPILER_ID:MSVC>>:
|
||||
-Wall
|
||||
-Wextra
|
||||
-Wpedantic
|
||||
>
|
||||
$<$<BOOL:${BASE64_WERROR}>:$<IF:$<C_COMPILER_ID:MSVC>,/WX,-Werror>>
|
||||
)
|
||||
|
||||
target_compile_definitions(base64 PRIVATE
|
||||
$<$<C_COMPILER_ID:MSVC>:
|
||||
# remove unnecessary warnings about unchecked iterators
|
||||
_SCL_SECURE_NO_WARNINGS
|
||||
>
|
||||
)
|
||||
|
||||
########################################################################
|
||||
# SIMD settings
|
||||
include(TargetSIMDInstructionSet)
|
||||
define_SIMD_compile_flags()
|
||||
|
||||
if (_TARGET_ARCH STREQUAL "x86" OR _TARGET_ARCH STREQUAL "x64")
|
||||
macro(configure_codec _TYPE)
|
||||
if (BASE64_WITH_${_TYPE})
|
||||
string(TOLOWER "${_TYPE}" _DIR)
|
||||
set_source_files_properties("lib/arch/${_DIR}/codec.c" PROPERTIES
|
||||
COMPILE_FLAGS "${COMPILE_FLAGS_${_TYPE}}"
|
||||
)
|
||||
|
||||
if (${ARGC} GREATER 1 AND MSVC)
|
||||
set_source_files_properties("lib/arch/${_DIR}/codec.c" PROPERTIES
|
||||
COMPILE_DEFINITIONS ${ARGV1}
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
configure_codec(SSSE3 __SSSE3__)
|
||||
configure_codec(SSE41 __SSSE4_1__)
|
||||
configure_codec(SSE42 __SSSE4_2__)
|
||||
configure_codec(AVX)
|
||||
configure_codec(AVX2)
|
||||
configure_codec(AVX512)
|
||||
|
||||
elseif (_TARGET_ARCH STREQUAL "arm")
|
||||
set(BASE64_NEON32_CFLAGS "${COMPILE_FLAGS_NEON32}" CACHE STRING "the NEON32 compile flags (for 'lib/arch/neon32/codec.c')")
|
||||
mark_as_advanced(BASE64_NEON32_CFLAGS)
|
||||
|
||||
if (BASE64_WITH_NEON32)
|
||||
set_source_files_properties("lib/arch/neon32/codec.c" PROPERTIES
|
||||
COMPILE_FLAGS "${BASE64_NEON32_CFLAGS} "
|
||||
)
|
||||
endif()
|
||||
|
||||
#elseif (_TARGET_ARCH STREQUAL "arm64" AND BASE64_WITH_NEON64)
|
||||
|
||||
endif()
|
||||
|
||||
configure_file("${CMAKE_CURRENT_LIST_DIR}/cmake/config.h.in" "${CMAKE_CURRENT_BINARY_DIR}/config.h" @ONLY)
|
||||
|
||||
########################################################################
|
||||
# OpenMP Settings
|
||||
if (BASE64_WITH_OpenMP)
|
||||
target_link_libraries(base64 PRIVATE OpenMP::OpenMP_C)
|
||||
endif()
|
||||
|
||||
########################################################################
|
||||
if (BASE64_BUILD_TESTS)
|
||||
enable_testing()
|
||||
add_subdirectory(test)
|
||||
endif()
|
||||
|
||||
########################################################################
|
||||
# base64
|
||||
if (BASE64_BUILD_CLI)
|
||||
add_executable(base64-bin
|
||||
bin/base64.c
|
||||
)
|
||||
target_link_libraries(base64-bin PRIVATE base64)
|
||||
set_target_properties(base64-bin PROPERTIES
|
||||
OUTPUT_NAME base64
|
||||
)
|
||||
endif()
|
||||
|
||||
########################################################################
|
||||
# cmake install
|
||||
install(DIRECTORY include/ TYPE INCLUDE)
|
||||
install(TARGETS base64
|
||||
EXPORT base64-targets
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
|
||||
)
|
||||
if (BASE64_BUILD_CLI)
|
||||
install(TARGETS base64-bin EXPORT base64-targets DESTINATION ${CMAKE_INSTALL_BINDIR})
|
||||
endif()
|
||||
|
||||
include(CMakePackageConfigHelpers)
|
||||
configure_package_config_file(cmake/base64-config.cmake.in
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/base64-config.cmake"
|
||||
|
||||
INSTALL_DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}"
|
||||
)
|
||||
write_basic_package_version_file(
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/base64-config-version.cmake"
|
||||
VERSION ${BASE64_VERSION}
|
||||
COMPATIBILITY SameMajorVersion
|
||||
)
|
||||
|
||||
install(FILES
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/base64-config.cmake"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/base64-config-version.cmake"
|
||||
DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}"
|
||||
)
|
||||
|
||||
install(EXPORT base64-targets
|
||||
NAMESPACE aklomp::
|
||||
DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}"
|
||||
)
|
||||
|
||||
########################################################################
|
||||
feature_summary(WHAT PACKAGES_FOUND PACKAGES_NOT_FOUND ENABLED_FEATURES DISABLED_FEATURES)
|
28
deps/base64/base64/LICENSE
vendored
28
deps/base64/base64/LICENSE
vendored
|
@ -1,28 +0,0 @@
|
|||
Copyright (c) 2005-2007, Nick Galbreath
|
||||
Copyright (c) 2015-2018, Wojciech Muła
|
||||
Copyright (c) 2016-2017, Matthieu Darbois
|
||||
Copyright (c) 2013-2022, Alfred Klomp
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
109
deps/base64/base64/Makefile
vendored
109
deps/base64/base64/Makefile
vendored
|
@ -1,109 +0,0 @@
|
|||
CFLAGS += -std=c99 -O3 -Wall -Wextra -pedantic -DBASE64_STATIC_DEFINE
|
||||
|
||||
# Set OBJCOPY if not defined by environment:
|
||||
OBJCOPY ?= objcopy
|
||||
|
||||
OBJS = \
|
||||
lib/arch/avx512/codec.o \
|
||||
lib/arch/avx2/codec.o \
|
||||
lib/arch/generic/codec.o \
|
||||
lib/arch/neon32/codec.o \
|
||||
lib/arch/neon64/codec.o \
|
||||
lib/arch/ssse3/codec.o \
|
||||
lib/arch/sse41/codec.o \
|
||||
lib/arch/sse42/codec.o \
|
||||
lib/arch/avx/codec.o \
|
||||
lib/lib.o \
|
||||
lib/codec_choose.o \
|
||||
lib/tables/tables.o
|
||||
|
||||
HAVE_AVX512 = 0
|
||||
HAVE_AVX2 = 0
|
||||
HAVE_NEON32 = 0
|
||||
HAVE_NEON64 = 0
|
||||
HAVE_SSSE3 = 0
|
||||
HAVE_SSE41 = 0
|
||||
HAVE_SSE42 = 0
|
||||
HAVE_AVX = 0
|
||||
|
||||
# The user should supply compiler flags for the codecs they want to build.
|
||||
# Check which codecs we're going to include:
|
||||
ifdef AVX512_CFLAGS
|
||||
HAVE_AVX512 = 1
|
||||
endif
|
||||
ifdef AVX2_CFLAGS
|
||||
HAVE_AVX2 = 1
|
||||
endif
|
||||
ifdef NEON32_CFLAGS
|
||||
HAVE_NEON32 = 1
|
||||
endif
|
||||
ifdef NEON64_CFLAGS
|
||||
HAVE_NEON64 = 1
|
||||
endif
|
||||
ifdef SSSE3_CFLAGS
|
||||
HAVE_SSSE3 = 1
|
||||
endif
|
||||
ifdef SSE41_CFLAGS
|
||||
HAVE_SSE41 = 1
|
||||
endif
|
||||
ifdef SSE42_CFLAGS
|
||||
HAVE_SSE42 = 1
|
||||
endif
|
||||
ifdef AVX_CFLAGS
|
||||
HAVE_AVX = 1
|
||||
endif
|
||||
ifdef OPENMP
|
||||
CFLAGS += -fopenmp
|
||||
endif
|
||||
|
||||
TARGET := $(shell $(CC) -dumpmachine)
|
||||
|
||||
.PHONY: all analyze clean
|
||||
|
||||
all: bin/base64 lib/libbase64.o
|
||||
|
||||
bin/base64: bin/base64.o lib/libbase64.o
|
||||
$(CC) $(CFLAGS) -o $@ $^
|
||||
|
||||
# Workaround: mangle exported function names on MinGW32.
|
||||
lib/exports.build.txt: lib/exports.txt
|
||||
ifeq (i686-w64-mingw32, $(TARGET))
|
||||
sed -e 's/^/_/' $< > $@
|
||||
else
|
||||
cp -f $< $@
|
||||
endif
|
||||
|
||||
lib/libbase64.o: lib/exports.build.txt $(OBJS)
|
||||
$(LD) -r -o $@ $(OBJS)
|
||||
$(OBJCOPY) --keep-global-symbols=$< $@
|
||||
|
||||
lib/config.h:
|
||||
@echo "#define HAVE_AVX512 $(HAVE_AVX512)" > $@
|
||||
@echo "#define HAVE_AVX2 $(HAVE_AVX2)" >> $@
|
||||
@echo "#define HAVE_NEON32 $(HAVE_NEON32)" >> $@
|
||||
@echo "#define HAVE_NEON64 $(HAVE_NEON64)" >> $@
|
||||
@echo "#define HAVE_SSSE3 $(HAVE_SSSE3)" >> $@
|
||||
@echo "#define HAVE_SSE41 $(HAVE_SSE41)" >> $@
|
||||
@echo "#define HAVE_SSE42 $(HAVE_SSE42)" >> $@
|
||||
@echo "#define HAVE_AVX $(HAVE_AVX)" >> $@
|
||||
|
||||
$(OBJS): lib/config.h
|
||||
$(OBJS): CFLAGS += -Ilib
|
||||
|
||||
lib/arch/avx512/codec.o: CFLAGS += $(AVX512_CFLAGS)
|
||||
lib/arch/avx2/codec.o: CFLAGS += $(AVX2_CFLAGS)
|
||||
lib/arch/neon32/codec.o: CFLAGS += $(NEON32_CFLAGS)
|
||||
lib/arch/neon64/codec.o: CFLAGS += $(NEON64_CFLAGS)
|
||||
lib/arch/ssse3/codec.o: CFLAGS += $(SSSE3_CFLAGS)
|
||||
lib/arch/sse41/codec.o: CFLAGS += $(SSE41_CFLAGS)
|
||||
lib/arch/sse42/codec.o: CFLAGS += $(SSE42_CFLAGS)
|
||||
lib/arch/avx/codec.o: CFLAGS += $(AVX_CFLAGS)
|
||||
|
||||
%.o: %.c
|
||||
$(CC) $(CFLAGS) -o $@ -c $<
|
||||
|
||||
analyze: clean
|
||||
scan-build --use-analyzer=`which clang` --status-bugs make
|
||||
|
||||
clean:
|
||||
rm -f bin/base64 bin/base64.o lib/libbase64.o lib/config.h lib/exports.build.txt $(OBJS)
|
491
deps/base64/base64/README.md
vendored
491
deps/base64/base64/README.md
vendored
|
@ -1,491 +0,0 @@
|
|||
# Fast Base64 stream encoder/decoder
|
||||
|
||||
[](https://github.com/aklomp/base64/actions/workflows/test.yml)
|
||||
|
||||
This is an implementation of a base64 stream encoding/decoding library in C99
|
||||
with SIMD (AVX2, AVX512, NEON, AArch64/NEON, SSSE3, SSE4.1, SSE4.2, AVX) and
|
||||
[OpenMP](http://www.openmp.org) acceleration. It also contains wrapper functions
|
||||
to encode/decode simple length-delimited strings. This library aims to be:
|
||||
|
||||
- FAST;
|
||||
- easy to use;
|
||||
- elegant.
|
||||
|
||||
On x86, the library does runtime feature detection. The first time it's called,
|
||||
the library will determine the appropriate encoding/decoding routines for the
|
||||
machine. It then remembers them for the lifetime of the program. If your
|
||||
processor supports AVX2, SSSE3, SSE4.1, SSE4.2 or AVX instructions, the library
|
||||
will pick an optimized codec that lets it encode/decode 12 or 24 bytes at a
|
||||
time, which gives a speedup of four or more times compared to the "plain"
|
||||
bytewise codec.
|
||||
|
||||
AVX512 support is only for encoding at present, utilizing the AVX512 VL and VBMI
|
||||
instructions. Decoding part reused AVX2 implementations. For CPUs later than
|
||||
Cannonlake (manufactured in 2018) supports these instructions.
|
||||
|
||||
NEON support is hardcoded to on or off at compile time, because portable
|
||||
runtime feature detection is unavailable on ARM.
|
||||
|
||||
Even if your processor does not support SIMD instructions, this is a very fast
|
||||
library. The fallback routine can process 32 or 64 bits of input in one round,
|
||||
depending on your processor's word width, which still makes it significantly
|
||||
faster than naive bytewise implementations. On some 64-bit machines, the 64-bit
|
||||
routines even outperform the SSSE3 ones.
|
||||
|
||||
To the author's knowledge, at the time of original release, this was the only
|
||||
Base64 library to offer SIMD acceleration. The author wrote
|
||||
[an article](http://www.alfredklomp.com/programming/sse-base64) explaining one
|
||||
possible SIMD approach to encoding/decoding Base64. The article can help figure
|
||||
out what the code is doing, and why.
|
||||
|
||||
Notable features:
|
||||
|
||||
- Really fast on x86 and ARM systems by using SIMD vector processing;
|
||||
- Can use [OpenMP](http://www.openmp.org) for even more parallel speedups;
|
||||
- Really fast on other 32 or 64-bit platforms through optimized routines;
|
||||
- Reads/writes blocks of streaming data;
|
||||
- Does not dynamically allocate memory;
|
||||
- Valid C99 that compiles with pedantic options on;
|
||||
- Re-entrant and threadsafe;
|
||||
- Unit tested;
|
||||
- Uses Duff's Device.
|
||||
|
||||
## Acknowledgements
|
||||
|
||||
The original AVX2, NEON and Aarch64/NEON codecs were generously contributed by
|
||||
[Inkymail](https://github.com/inkymail/base64), who, in their fork, also
|
||||
implemented some additional features. Their work is slowly being backported
|
||||
into this project.
|
||||
|
||||
The SSSE3 and AVX2 codecs were substantially improved by using some very clever
|
||||
optimizations described by Wojciech Muła in a
|
||||
[series](http://0x80.pl/notesen/2016-01-12-sse-base64-encoding.html) of
|
||||
[articles](http://0x80.pl/notesen/2016-01-17-sse-base64-decoding.html).
|
||||
His own code is [here](https://github.com/WojciechMula/toys/tree/master/base64).
|
||||
|
||||
The AVX512 encoder is based on code from Wojciech Muła's
|
||||
[base64simd](https://github.com/WojciechMula/base64simd) library.
|
||||
|
||||
The OpenMP implementation was added by Ferry Toth (@htot) from [Exalon Delft](http://www.exalondelft.nl).
|
||||
|
||||
## Building
|
||||
|
||||
The `lib` directory contains the code for the actual library.
|
||||
Typing `make` in the toplevel directory will build `lib/libbase64.o` and `bin/base64`.
|
||||
The first is a single, self-contained object file that you can link into your own project.
|
||||
The second is a standalone test binary that works similarly to the `base64` system utility.
|
||||
|
||||
The matching header file needed to use this library is in `include/libbase64.h`.
|
||||
|
||||
To compile just the "plain" library without SIMD codecs, type:
|
||||
|
||||
```sh
|
||||
make lib/libbase64.o
|
||||
```
|
||||
|
||||
Optional SIMD codecs can be included by specifying the `AVX2_CFLAGS`, `AVX512_CFLAGS`,
|
||||
`NEON32_CFLAGS`, `NEON64_CFLAGS`, `SSSE3_CFLAGS`, `SSE41_CFLAGS`, `SSE42_CFLAGS` and/or `AVX_CFLAGS` environment variables.
|
||||
A typical build invocation on x86 looks like this:
|
||||
|
||||
```sh
|
||||
AVX2_CFLAGS=-mavx2 SSSE3_CFLAGS=-mssse3 SSE41_CFLAGS=-msse4.1 SSE42_CFLAGS=-msse4.2 AVX_CFLAGS=-mavx make lib/libbase64.o
|
||||
```
|
||||
|
||||
### AVX2
|
||||
|
||||
To build and include the AVX2 codec, set the `AVX2_CFLAGS` environment variable to a value that will turn on AVX2 support in your compiler, typically `-mavx2`.
|
||||
Example:
|
||||
|
||||
```sh
|
||||
AVX2_CFLAGS=-mavx2 make
|
||||
```
|
||||
|
||||
### AVX512
|
||||
|
||||
To build and include the AVX512 codec, set the `AVX512_CFLAGS` environment variable to a value that will turn on AVX512 support in your compiler, typically `-mavx512vl -mavx512vbmi`.
|
||||
Example:
|
||||
|
||||
```sh
|
||||
AVX512_CFLAGS="-mavx512vl -mavx512vbmi" make
|
||||
```
|
||||
|
||||
The codec will only be used if runtime feature detection shows that the target machine supports AVX2.
|
||||
|
||||
### SSSE3
|
||||
|
||||
To build and include the SSSE3 codec, set the `SSSE3_CFLAGS` environment variable to a value that will turn on SSSE3 support in your compiler, typically `-mssse3`.
|
||||
Example:
|
||||
|
||||
```sh
|
||||
SSSE3_CFLAGS=-mssse3 make
|
||||
```
|
||||
|
||||
The codec will only be used if runtime feature detection shows that the target machine supports SSSE3.
|
||||
|
||||
### NEON
|
||||
|
||||
This library includes two NEON codecs: one for regular 32-bit ARM and one for the 64-bit AArch64 with NEON, which has double the amount of SIMD registers and can do full 64-byte table lookups.
|
||||
These codecs encode in 48-byte chunks and decode in massive 64-byte chunks, so they had to be augmented with an uint32/64 codec to stay fast on smaller inputs!
|
||||
|
||||
Use LLVM/Clang for compiling the NEON codecs.
|
||||
The code generation of at least GCC 4.6 (the version shipped with Raspbian and used for testing) contains a bug when compiling `vstq4_u8()`, and the generated assembly code is of low quality.
|
||||
NEON intrinsics are a known weak area of GCC.
|
||||
Clang does a better job.
|
||||
|
||||
NEON support can unfortunately not be portably detected at runtime from userland (the `mrc` instruction is privileged), so the default value for using the NEON codec is determined at compile-time.
|
||||
But you can do your own runtime detection.
|
||||
You can include the NEON codec and make it the default, then do a runtime check if the CPU has NEON support, and if not, force a downgrade to non-NEON with `BASE64_FORCE_PLAIN`.
|
||||
|
||||
These are your options:
|
||||
|
||||
1. Don't include NEON support;
|
||||
2. build NEON support and make it the default, but build all other code without NEON flags so that you can override the default at runtime with `BASE64_FORCE_PLAIN`;
|
||||
3. build everything with NEON support and make it the default;
|
||||
4. build everything with NEON support, but don't make it the default (which makes no sense).
|
||||
|
||||
For option 1, simply don't specify any NEON-specific compiler flags at all, like so:
|
||||
|
||||
```sh
|
||||
CC=clang CFLAGS="-march=armv6" make
|
||||
```
|
||||
|
||||
For option 2, keep your `CFLAGS` plain, but set the `NEON32_CFLAGS` environment variable to a value that will build NEON support.
|
||||
The line below, for instance, will build all the code at ARMv6 level, except for the NEON codec, which is built at ARMv7.
|
||||
It will also make the NEON codec the default.
|
||||
For ARMv6 platforms, override that default at runtime with the `BASE64_FORCE_PLAIN` flag.
|
||||
No ARMv7/NEON code will then be touched.
|
||||
|
||||
```sh
|
||||
CC=clang CFLAGS="-march=armv6" NEON32_CFLAGS="-march=armv7 -mfpu=neon" make
|
||||
```
|
||||
|
||||
For option 3, put everything in your `CFLAGS` and use a stub, but non-empty, `NEON32_CFLAGS`.
|
||||
This example works for the Raspberry Pi 2B V1.1, which has NEON support:
|
||||
|
||||
```sh
|
||||
CC=clang CFLAGS="-march=armv7 -mtune=cortex-a7" NEON32_CFLAGS="-mfpu=neon" make
|
||||
```
|
||||
|
||||
To build and include the NEON64 codec, use `CFLAGS` as usual to define the platform and set `NEON64_CFLAGS` to a nonempty stub.
|
||||
(The AArch64 target has mandatory NEON64 support.)
|
||||
Example:
|
||||
|
||||
```sh
|
||||
CC=clang CFLAGS="--target=aarch64-linux-gnu -march=armv8-a" NEON64_CFLAGS=" " make
|
||||
```
|
||||
|
||||
### OpenMP
|
||||
|
||||
To enable OpenMP on GCC you need to build with `-fopenmp`. This can be by setting the the `OPENMP` environment variable to `1`.
|
||||
|
||||
Example:
|
||||
|
||||
```sh
|
||||
OPENMP=1 make
|
||||
```
|
||||
|
||||
This will let the compiler define `_OPENMP`, which in turn will include the OpenMP optimized `lib_openmp.c` into `lib.c`.
|
||||
|
||||
By default the number of parallel threads will be equal to the number of cores of the processor.
|
||||
On a quad core with hyperthreading eight cores will be detected, but hyperthreading will not increase the performance.
|
||||
|
||||
To get verbose information about OpenMP start the program with `OMP_DISPLAY_ENV=VERBOSE`, for instance
|
||||
|
||||
```sh
|
||||
OMP_DISPLAY_ENV=VERBOSE test/benchmark
|
||||
```
|
||||
|
||||
To put a limit on the number of threads, start the program with `OMP_THREAD_LIMIT=n`, for instance
|
||||
|
||||
```sh
|
||||
OMP_THREAD_LIMIT=2 test/benchmark
|
||||
```
|
||||
|
||||
An example of running a benchmark with OpenMP, SSSE3 and AVX2 enabled:
|
||||
|
||||
```sh
|
||||
make clean && OPENMP=1 SSSE3_CFLAGS=-mssse3 AVX2_CFLAGS=-mavx2 make && OPENMP=1 make -C test
|
||||
```
|
||||
|
||||
## API reference
|
||||
|
||||
Strings are represented as a pointer and a length; they are not
|
||||
zero-terminated. This was a conscious design decision. In the decoding step,
|
||||
relying on zero-termination would make no sense since the output could contain
|
||||
legitimate zero bytes. In the encoding step, returning the length saves the
|
||||
overhead of calling `strlen()` on the output. If you insist on the trailing
|
||||
zero, you can easily add it yourself at the given offset.
|
||||
|
||||
### Flags
|
||||
|
||||
Some API calls take a `flags` argument.
|
||||
That argument can be used to force the use of a specific codec, even if that codec is a no-op in the current build.
|
||||
Mainly there for testing purposes, this is also useful on ARM where the only way to do runtime NEON detection is to ask the OS if it's available.
|
||||
The following constants can be used:
|
||||
|
||||
- `BASE64_FORCE_AVX2`
|
||||
- `BASE64_FORCE_AVX512`
|
||||
- `BASE64_FORCE_NEON32`
|
||||
- `BASE64_FORCE_NEON64`
|
||||
- `BASE64_FORCE_PLAIN`
|
||||
- `BASE64_FORCE_SSSE3`
|
||||
- `BASE64_FORCE_SSE41`
|
||||
- `BASE64_FORCE_SSE42`
|
||||
- `BASE64_FORCE_AVX`
|
||||
|
||||
Set `flags` to `0` for the default behavior, which is runtime feature detection on x86, a compile-time fixed codec on ARM, and the plain codec on other platforms.
|
||||
|
||||
### Encoding
|
||||
|
||||
#### base64_encode
|
||||
|
||||
```c
|
||||
void base64_encode
|
||||
( const char *src
|
||||
, size_t srclen
|
||||
, char *out
|
||||
, size_t *outlen
|
||||
, int flags
|
||||
) ;
|
||||
```
|
||||
|
||||
Wrapper function to encode a plain string of given length.
|
||||
Output is written to `out` without trailing zero.
|
||||
Output length in bytes is written to `outlen`.
|
||||
The buffer in `out` has been allocated by the caller and is at least 4/3 the size of the input.
|
||||
|
||||
#### base64_stream_encode_init
|
||||
|
||||
```c
|
||||
void base64_stream_encode_init
|
||||
( struct base64_state *state
|
||||
, int flags
|
||||
) ;
|
||||
```
|
||||
|
||||
Call this before calling `base64_stream_encode()` to init the state.
|
||||
|
||||
#### base64_stream_encode
|
||||
|
||||
```c
|
||||
void base64_stream_encode
|
||||
( struct base64_state *state
|
||||
, const char *src
|
||||
, size_t srclen
|
||||
, char *out
|
||||
, size_t *outlen
|
||||
) ;
|
||||
```
|
||||
|
||||
Encodes the block of data of given length at `src`, into the buffer at `out`.
|
||||
Caller is responsible for allocating a large enough out-buffer; it must be at least 4/3 the size of the in-buffer, but take some margin.
|
||||
Places the number of new bytes written into `outlen` (which is set to zero when the function starts).
|
||||
Does not zero-terminate or finalize the output.
|
||||
|
||||
#### base64_stream_encode_final
|
||||
|
||||
```c
|
||||
void base64_stream_encode_final
|
||||
( struct base64_state *state
|
||||
, char *out
|
||||
, size_t *outlen
|
||||
) ;
|
||||
```
|
||||
|
||||
Finalizes the output begun by previous calls to `base64_stream_encode()`.
|
||||
Adds the required end-of-stream markers if appropriate.
|
||||
`outlen` is modified and will contain the number of new bytes written at `out` (which will quite often be zero).
|
||||
|
||||
### Decoding
|
||||
|
||||
#### base64_decode
|
||||
|
||||
```c
|
||||
int base64_decode
|
||||
( const char *src
|
||||
, size_t srclen
|
||||
, char *out
|
||||
, size_t *outlen
|
||||
, int flags
|
||||
) ;
|
||||
```
|
||||
|
||||
Wrapper function to decode a plain string of given length.
|
||||
Output is written to `out` without trailing zero. Output length in bytes is written to `outlen`.
|
||||
The buffer in `out` has been allocated by the caller and is at least 3/4 the size of the input.
|
||||
Returns `1` for success, and `0` when a decode error has occured due to invalid input.
|
||||
Returns `-1` if the chosen codec is not included in the current build.
|
||||
|
||||
#### base64_stream_decode_init
|
||||
|
||||
```c
|
||||
void base64_stream_decode_init
|
||||
( struct base64_state *state
|
||||
, int flags
|
||||
) ;
|
||||
```
|
||||
|
||||
Call this before calling `base64_stream_decode()` to init the state.
|
||||
|
||||
#### base64_stream_decode
|
||||
|
||||
```c
|
||||
int base64_stream_decode
|
||||
( struct base64_state *state
|
||||
, const char *src
|
||||
, size_t srclen
|
||||
, char *out
|
||||
, size_t *outlen
|
||||
) ;
|
||||
```
|
||||
|
||||
Decodes the block of data of given length at `src`, into the buffer at `out`.
|
||||
Caller is responsible for allocating a large enough out-buffer; it must be at least 3/4 the size of the in-buffer, but take some margin.
|
||||
Places the number of new bytes written into `outlen` (which is set to zero when the function starts).
|
||||
Does not zero-terminate the output.
|
||||
Returns 1 if all is well, and 0 if a decoding error was found, such as an invalid character.
|
||||
Returns -1 if the chosen codec is not included in the current build.
|
||||
Used by the test harness to check whether a codec is available for testing.
|
||||
|
||||
## Examples
|
||||
|
||||
A simple example of encoding a static string to base64 and printing the output
|
||||
to stdout:
|
||||
|
||||
```c
|
||||
#include <stdio.h> /* fwrite */
|
||||
#include "libbase64.h"
|
||||
|
||||
int main ()
|
||||
{
|
||||
char src[] = "hello world";
|
||||
char out[20];
|
||||
size_t srclen = sizeof(src) - 1;
|
||||
size_t outlen;
|
||||
|
||||
base64_encode(src, srclen, out, &outlen, 0);
|
||||
|
||||
fwrite(out, outlen, 1, stdout);
|
||||
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
|
||||
A simple example (no error checking, etc) of stream encoding standard input to
|
||||
standard output:
|
||||
|
||||
```c
|
||||
#include <stdio.h>
|
||||
#include "libbase64.h"
|
||||
|
||||
int main ()
|
||||
{
|
||||
size_t nread, nout;
|
||||
char buf[12000], out[16000];
|
||||
struct base64_state state;
|
||||
|
||||
// Initialize stream encoder:
|
||||
base64_stream_encode_init(&state, 0);
|
||||
|
||||
// Read contents of stdin into buffer:
|
||||
while ((nread = fread(buf, 1, sizeof(buf), stdin)) > 0) {
|
||||
|
||||
// Encode buffer:
|
||||
base64_stream_encode(&state, buf, nread, out, &nout);
|
||||
|
||||
// If there's output, print it to stdout:
|
||||
if (nout) {
|
||||
fwrite(out, nout, 1, stdout);
|
||||
}
|
||||
|
||||
// If an error occurred, exit the loop:
|
||||
if (feof(stdin)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Finalize encoding:
|
||||
base64_stream_encode_final(&state, out, &nout);
|
||||
|
||||
// If the finalizing resulted in extra output bytes, print them:
|
||||
if (nout) {
|
||||
fwrite(out, nout, 1, stdout);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
|
||||
Also see `bin/base64.c` for a simple re-implementation of the `base64` utility.
|
||||
A file or standard input is fed through the encoder/decoder, and the output is
|
||||
written to standard output.
|
||||
|
||||
## Tests
|
||||
|
||||
See `tests/` for a small test suite. Testing is automated with
|
||||
[GitHub Actions](https://github.com/aklomp/base64/actions), which builds and
|
||||
tests the code across various architectures.
|
||||
|
||||
## Benchmarks
|
||||
|
||||
Benchmarks can be run with the built-in benchmark program as follows:
|
||||
|
||||
```sh
|
||||
make -C test benchmark <buildflags> && test/benchmark
|
||||
```
|
||||
|
||||
It will run an encoding and decoding benchmark for all of the compiled-in codecs.
|
||||
|
||||
The tables below contain some results on random machines. All numbers measured with a 10MB buffer in MB/sec, rounded to the nearest integer.
|
||||
|
||||
\*: Update needed
|
||||
|
||||
x86 processors
|
||||
|
||||
| Processor | Plain enc | Plain dec | SSSE3 enc | SSSE3 dec | AVX enc | AVX dec | AVX2 enc | AVX2 dec |
|
||||
|-------------------------------------------|----------:|----------:|----------:|----------:|--------:|--------:|---------:|---------:|
|
||||
| i7-4771 @ 3.5 GHz | 833\* | 1111\* | 3333\* | 4444\* | TBD | TBD | 4999\* | 6666\* |
|
||||
| i7-4770 @ 3.4 GHz DDR1600 | 1790\* | 3038\* | 4899\* | 4043\* | 4796\* | 5709\* | 4681\* | 6386\* |
|
||||
| i7-4770 @ 3.4 GHz DDR1600 OPENMP 1 thread | 1784\* | 3041\* | 4945\* | 4035\* | 4776\* | 5719\* | 4661\* | 6294\* |
|
||||
| i7-4770 @ 3.4 GHz DDR1600 OPENMP 2 thread | 3401\* | 5729\* | 5489\* | 7444\* | 5003\* | 8624\* | 5105\* | 8558\* |
|
||||
| i7-4770 @ 3.4 GHz DDR1600 OPENMP 4 thread | 4884\* | 7099\* | 4917\* | 7057\* | 4799\* | 7143\* | 4902\* | 7219\* |
|
||||
| i7-4770 @ 3.4 GHz DDR1600 OPENMP 8 thread | 5212\* | 8849\* | 5284\* | 9099\* | 5289\* | 9220\* | 4849\* | 9200\* |
|
||||
| i7-4870HQ @ 2.5 GHz | 1471\* | 3066\* | 6721\* | 6962\* | 7015\* | 8267\* | 8328\* | 11576\* |
|
||||
| i5-4590S @ 3.0 GHz | 3356 | 3197 | 4363 | 6104 | 4243\* | 6233 | 4160\* | 6344 |
|
||||
| Xeon X5570 @ 2.93 GHz | 2161 | 1508 | 3160 | 3915 | - | - | - | - |
|
||||
| Pentium4 @ 3.4 GHz | 896 | 740 | - | - | - | - | - | - |
|
||||
| Atom N270 | 243 | 266 | 508 | 387 | - | - | - | - |
|
||||
| AMD E-450 | 645 | 564 | 625 | 634 | - | - | - | - |
|
||||
| Intel Edison @ 500 MHz | 79\* | 92\* | 152\* | 172\* | - | - | - | - |
|
||||
| Intel Edison @ 500 MHz OPENMP 2 thread | 158\* | 184\* | 300\* | 343\* | - | - | - | - |
|
||||
| Intel Edison @ 500 MHz (x86-64) | 162 | 119 | 209 | 164 | - | - | - | - |
|
||||
| Intel Edison @ 500 MHz (x86-64) 2 thread | 319 | 237 | 412 | 329 | - | - | - | - |
|
||||
|
||||
ARM processors
|
||||
|
||||
| Processor | Plain enc | Plain dec | NEON32 enc | NEON32 dec | NEON64 enc | NEON64 dec |
|
||||
|-------------------------------------------|----------:|----------:|-----------:|-----------:|-----------:|-----------:|
|
||||
| Raspberry PI B+ V1.2 | 46\* | 40\* | - | - | - | - |
|
||||
| Raspberry PI 2 B V1.1 | 85 | 141 | 300 | 225 | - | - |
|
||||
| Apple iPhone SE armv7 | 1056\* | 895\* | 2943\* | 2618\* | - | - |
|
||||
| Apple iPhone SE arm64 | 1061\* | 1239\* | - | - | 4098\* | 3983\* |
|
||||
|
||||
PowerPC processors
|
||||
|
||||
| Processor | Plain enc | Plain dec |
|
||||
|-------------------------------------------|----------:|----------:|
|
||||
| PowerPC E6500 @ 1.8GHz | 270\* | 265\* |
|
||||
|
||||
|
||||
Benchmarks on i7-4770 @ 3.4 GHz DDR1600 with varrying buffer sizes:
|
||||

|
||||
|
||||
Note: optimal buffer size to take advantage of the cache is in the range of 100 kB to 1 MB, leading to 12x faster AVX encoding/decoding compared to Plain, or a throughput of 24/27GB/sec.
|
||||
Also note the performance degradation when the buffer size is less than 10 kB due to thread creation overhead.
|
||||
To prevent this from happening `lib_openmp.c` defines `OMP_THRESHOLD 20000`, requiring at least a 20000 byte buffer to enable multithreading.
|
||||
|
||||
## License
|
||||
|
||||
This repository is licensed under the
|
||||
[BSD 2-clause License](http://opensource.org/licenses/BSD-2-Clause). See the
|
||||
LICENSE file.
|
BIN
deps/base64/base64/base64-benchmarks.png
vendored
BIN
deps/base64/base64/base64-benchmarks.png
vendored
Binary file not shown.
Before Width: | Height: | Size: 21 KiB |
591
deps/base64/base64/bin/base64.c
vendored
591
deps/base64/base64/bin/base64.c
vendored
|
@ -1,591 +0,0 @@
|
|||
// Test for MinGW.
|
||||
#if defined(__MINGW32__) || defined(__MINGW64__)
|
||||
# define MINGW
|
||||
#endif
|
||||
|
||||
// Decide if the writev(2) system call needs to be emulated as a series of
|
||||
// write(2) calls. At least MinGW does not support writev(2).
|
||||
#ifdef MINGW
|
||||
# define EMULATE_WRITEV
|
||||
#endif
|
||||
|
||||
// Include the necessary system header when using the system's writev(2).
|
||||
#ifndef EMULATE_WRITEV
|
||||
# define _XOPEN_SOURCE // Unlock IOV_MAX
|
||||
# include <sys/uio.h>
|
||||
#endif
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <getopt.h>
|
||||
#include <errno.h>
|
||||
#include <limits.h>
|
||||
|
||||
#include "../include/libbase64.h"
|
||||
|
||||
// Size of the buffer for the "raw" (not base64-encoded) data in bytes.
|
||||
#define BUFFER_RAW_SIZE (1024 * 1024)
|
||||
|
||||
// Size of the buffer for the base64-encoded data in bytes. The base64-encoded
|
||||
// data is 4/3 the size of the input, with some margin to be sure.
|
||||
#define BUFFER_ENC_SIZE (BUFFER_RAW_SIZE * 4 / 3 + 16)
|
||||
|
||||
// Global config structure.
|
||||
struct config {
|
||||
|
||||
// Name by which the program was called on the command line.
|
||||
const char *name;
|
||||
|
||||
// Name of the input file for logging purposes.
|
||||
const char *file;
|
||||
|
||||
// Input file handle.
|
||||
FILE *fp;
|
||||
|
||||
// Wrap width in characters, for encoding only.
|
||||
size_t wrap;
|
||||
|
||||
// Whether to run in decode mode.
|
||||
bool decode;
|
||||
|
||||
// Whether to just print the help text and exit.
|
||||
bool print_help;
|
||||
};
|
||||
|
||||
// Input/output buffer structure.
|
||||
struct buffer {
|
||||
|
||||
// Runtime-allocated buffer for raw (unencoded) data.
|
||||
char *raw;
|
||||
|
||||
// Runtime-allocated buffer for base64-encoded data.
|
||||
char *enc;
|
||||
};
|
||||
|
||||
// Optionally emulate writev(2) as a series of write calls.
|
||||
#ifdef EMULATE_WRITEV
|
||||
|
||||
// Quick and dirty definition of IOV_MAX as it is probably not defined.
|
||||
#ifndef IOV_MAX
|
||||
# define IOV_MAX 1024
|
||||
#endif
|
||||
|
||||
// Quick and dirty definition of this system struct, for local use only.
|
||||
struct iovec {
|
||||
|
||||
// Opaque data pointer.
|
||||
void *iov_base;
|
||||
|
||||
// Length of the data in bytes.
|
||||
size_t iov_len;
|
||||
};
|
||||
|
||||
static ssize_t
|
||||
writev (const int fd, const struct iovec *iov, int iovcnt)
|
||||
{
|
||||
ssize_t r, nwrite = 0;
|
||||
|
||||
// Reset the error marker.
|
||||
errno = 0;
|
||||
|
||||
while (iovcnt-- > 0) {
|
||||
|
||||
// Write the vector; propagate errors back to the caller. Note
|
||||
// that this loses information about how much vectors have been
|
||||
// successfully written, but that also seems to be the case
|
||||
// with the real function. The API is somewhat flawed.
|
||||
if ((r = write(fd, iov->iov_base, iov->iov_len)) < 0) {
|
||||
return r;
|
||||
}
|
||||
|
||||
// Update the total write count.
|
||||
nwrite += r;
|
||||
|
||||
// Return early after a partial write; the caller should retry.
|
||||
if ((size_t) r != iov->iov_len) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Move to the next vector.
|
||||
iov++;
|
||||
}
|
||||
|
||||
return nwrite;
|
||||
}
|
||||
|
||||
#endif // EMULATE_WRITEV
|
||||
|
||||
static bool
|
||||
buffer_alloc (const struct config *config, struct buffer *buf)
|
||||
{
|
||||
if ((buf->raw = malloc(BUFFER_RAW_SIZE)) == NULL ||
|
||||
(buf->enc = malloc(BUFFER_ENC_SIZE)) == NULL) {
|
||||
free(buf->raw);
|
||||
fprintf(stderr, "%s: malloc: %s\n",
|
||||
config->name, strerror(errno));
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
buffer_free (struct buffer *buf)
|
||||
{
|
||||
free(buf->raw);
|
||||
free(buf->enc);
|
||||
}
|
||||
|
||||
static bool
|
||||
writev_retry (const struct config *config, struct iovec *iov, size_t nvec)
|
||||
{
|
||||
// Writing nothing always succeeds.
|
||||
if (nvec == 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
while (true) {
|
||||
ssize_t nwrite;
|
||||
|
||||
// Try to write the vectors to stdout.
|
||||
if ((nwrite = writev(1, iov, nvec)) < 0) {
|
||||
|
||||
// Retry on EINTR.
|
||||
if (errno == EINTR) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Quit on other errors.
|
||||
fprintf(stderr, "%s: writev: %s\n",
|
||||
config->name, strerror(errno));
|
||||
return false;
|
||||
}
|
||||
|
||||
// The return value of `writev' is the number of bytes written.
|
||||
// To check for success, we traverse the list and remove all
|
||||
// written vectors. The call succeeded if the list is empty.
|
||||
while (true) {
|
||||
|
||||
// Retry if this vector is not or partially written.
|
||||
if (iov->iov_len > (size_t) nwrite) {
|
||||
char *base = iov->iov_base;
|
||||
|
||||
iov->iov_base = (size_t) nwrite + base;
|
||||
iov->iov_len -= (size_t) nwrite;
|
||||
break;
|
||||
}
|
||||
|
||||
// Move to the next vector.
|
||||
nwrite -= iov->iov_len;
|
||||
iov++;
|
||||
|
||||
// Return successfully if all vectors were written.
|
||||
if (--nvec == 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool
|
||||
iov_append (const struct config *config, struct iovec *iov,
|
||||
size_t *nvec, char *base, const size_t len)
|
||||
{
|
||||
// Add the buffer to the IO vector array.
|
||||
iov[*nvec].iov_base = base;
|
||||
iov[*nvec].iov_len = len;
|
||||
|
||||
// Increment the array index. Flush the array if it is full.
|
||||
if (++(*nvec) == IOV_MAX) {
|
||||
if (writev_retry(config, iov, IOV_MAX) == false) {
|
||||
return false;
|
||||
}
|
||||
*nvec = 0;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
write_stdout (const struct config *config, const char *buf, size_t len)
|
||||
{
|
||||
while (len > 0) {
|
||||
ssize_t nwrite;
|
||||
|
||||
// Try to write the buffer to stdout.
|
||||
if ((nwrite = write(1, buf, len)) < 0) {
|
||||
|
||||
// Retry on EINTR.
|
||||
if (errno == EINTR) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Quit on other errors.
|
||||
fprintf(stderr, "%s: write: %s\n",
|
||||
config->name, strerror(errno));
|
||||
return false;
|
||||
}
|
||||
|
||||
// Update the buffer position.
|
||||
buf += (size_t) nwrite;
|
||||
len -= (size_t) nwrite;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
write_wrapped (const struct config *config, char *buf, size_t len)
|
||||
{
|
||||
static size_t col = 0;
|
||||
|
||||
// Special case: if buf is NULL, print final trailing newline.
|
||||
if (buf == NULL) {
|
||||
if (config->wrap > 0 && col > 0) {
|
||||
return write_stdout(config, "\n", 1);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// If no wrap width is given, write the entire buffer.
|
||||
if (config->wrap == 0) {
|
||||
return write_stdout(config, buf, len);
|
||||
}
|
||||
|
||||
// Statically allocated IO vector buffer.
|
||||
static struct iovec iov[IOV_MAX];
|
||||
size_t nvec = 0;
|
||||
|
||||
while (len > 0) {
|
||||
|
||||
// Number of characters to fill the current line.
|
||||
size_t nwrite = config->wrap - col;
|
||||
|
||||
// Do not write more data than is available.
|
||||
if (nwrite > len) {
|
||||
nwrite = len;
|
||||
}
|
||||
|
||||
// Append the data to the IO vector array.
|
||||
if (iov_append(config, iov, &nvec, buf, nwrite) == false) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Advance the buffer.
|
||||
len -= nwrite;
|
||||
buf += nwrite;
|
||||
col += nwrite;
|
||||
|
||||
// If the line is full, append a newline.
|
||||
if (col == config->wrap) {
|
||||
if (iov_append(config, iov, &nvec, "\n", 1) == false) {
|
||||
return false;
|
||||
}
|
||||
col = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Write the remaining vectors.
|
||||
if (writev_retry(config, iov, nvec) == false) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
encode (const struct config *config, struct buffer *buf)
|
||||
{
|
||||
size_t nread, nout;
|
||||
struct base64_state state;
|
||||
|
||||
// Initialize the encoder's state structure.
|
||||
base64_stream_encode_init(&state, 0);
|
||||
|
||||
// Read raw data into the buffer.
|
||||
while ((nread = fread(buf->raw, 1, BUFFER_RAW_SIZE, config->fp)) > 0) {
|
||||
|
||||
// Encode the raw input into the encoded buffer.
|
||||
base64_stream_encode(&state, buf->raw, nread, buf->enc, &nout);
|
||||
|
||||
// Append the encoded data to the output stream.
|
||||
if (write_wrapped(config, buf->enc, nout) == false) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Check for stream errors.
|
||||
if (ferror(config->fp)) {
|
||||
fprintf(stderr, "%s: %s: read error\n",
|
||||
config->name, config->file);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Finalize the encoding by adding proper stream terminators.
|
||||
base64_stream_encode_final(&state, buf->enc, &nout);
|
||||
|
||||
// Append this tail to the output stream.
|
||||
if (write_wrapped(config, buf->enc, nout) == false) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Print optional trailing newline.
|
||||
if (write_wrapped(config, NULL, 0) == false) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline size_t
|
||||
find_newline (const char *p, const size_t avail)
|
||||
{
|
||||
// This is very naive and can probably be improved by vectorization.
|
||||
for (size_t len = 0; len < avail; len++) {
|
||||
if (p[len] == '\n') {
|
||||
return len;
|
||||
}
|
||||
}
|
||||
|
||||
return avail;
|
||||
}
|
||||
|
||||
static bool
|
||||
decode (const struct config *config, struct buffer *buf)
|
||||
{
|
||||
size_t avail;
|
||||
struct base64_state state;
|
||||
|
||||
// Initialize the decoder's state structure.
|
||||
base64_stream_decode_init(&state, 0);
|
||||
|
||||
// Read encoded data into the buffer. Use the smallest buffer size to
|
||||
// be on the safe side: the decoded output will fit the raw buffer.
|
||||
while ((avail = fread(buf->enc, 1, BUFFER_RAW_SIZE, config->fp)) > 0) {
|
||||
char *start = buf->enc;
|
||||
char *outbuf = buf->raw;
|
||||
size_t ototal = 0;
|
||||
|
||||
// By popular demand, this utility tries to be bug-compatible
|
||||
// with GNU `base64'. That includes silently ignoring newlines
|
||||
// in the input. Tokenize the input on newline characters.
|
||||
while (avail > 0) {
|
||||
|
||||
// Find the offset of the next newline character, which
|
||||
// is also the length of the next chunk.
|
||||
size_t outlen, len = find_newline(start, avail);
|
||||
|
||||
// Ignore empty chunks.
|
||||
if (len == 0) {
|
||||
start++;
|
||||
avail--;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Decode the chunk into the raw buffer.
|
||||
if (base64_stream_decode(&state, start, len,
|
||||
outbuf, &outlen) == 0) {
|
||||
fprintf(stderr, "%s: %s: decoding error\n",
|
||||
config->name, config->file);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Update the output buffer pointer and total size.
|
||||
outbuf += outlen;
|
||||
ototal += outlen;
|
||||
|
||||
// Bail out if the whole string has been consumed.
|
||||
if (len == avail) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Move the start pointer past the newline.
|
||||
start += len + 1;
|
||||
avail -= len + 1;
|
||||
}
|
||||
|
||||
// Append the raw data to the output stream.
|
||||
if (write_stdout(config, buf->raw, ototal) == false) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Check for stream errors.
|
||||
if (ferror(config->fp)) {
|
||||
fprintf(stderr, "%s: %s: read error\n",
|
||||
config->name, config->file);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
usage (FILE *fp, const struct config *config)
|
||||
{
|
||||
const char *usage =
|
||||
"Usage: %s [OPTION]... [FILE]\n"
|
||||
"If no FILE is given or is specified as '-', "
|
||||
"read from standard input.\n"
|
||||
"Options:\n"
|
||||
" -d, --decode Decode a base64 stream.\n"
|
||||
" -h, --help Print this help text.\n"
|
||||
" -w, --wrap=COLS Wrap encoded lines at this column. "
|
||||
"Default 76, 0 to disable.\n";
|
||||
|
||||
fprintf(fp, usage, config->name);
|
||||
}
|
||||
|
||||
static bool
|
||||
get_wrap (struct config *config, const char *str)
|
||||
{
|
||||
char *eptr;
|
||||
|
||||
// Reject empty strings.
|
||||
if (*str == '\0') {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Convert the input string to a signed long.
|
||||
const long wrap = strtol(str, &eptr, 10);
|
||||
|
||||
// Reject negative numbers.
|
||||
if (wrap < 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Reject strings containing non-digits.
|
||||
if (*eptr != '\0') {
|
||||
return false;
|
||||
}
|
||||
|
||||
config->wrap = (size_t) wrap;
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
parse_opts (int argc, char **argv, struct config *config)
|
||||
{
|
||||
int c;
|
||||
static const struct option opts[] = {
|
||||
{ "decode", no_argument, NULL, 'd' },
|
||||
{ "help", no_argument, NULL, 'h' },
|
||||
{ "wrap", required_argument, NULL, 'w' },
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
// Remember the program's name.
|
||||
config->name = *argv;
|
||||
|
||||
// Parse command line options.
|
||||
while ((c = getopt_long(argc, argv, ":dhw:", opts, NULL)) != -1) {
|
||||
switch (c) {
|
||||
case 'd':
|
||||
config->decode = true;
|
||||
break;
|
||||
|
||||
case 'h':
|
||||
config->print_help = true;
|
||||
return true;
|
||||
|
||||
case 'w':
|
||||
if (get_wrap(config, optarg) == false) {
|
||||
fprintf(stderr,
|
||||
"%s: invalid wrap value '%s'\n",
|
||||
config->name, optarg);
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
|
||||
case ':':
|
||||
fprintf(stderr, "%s: missing argument for '%c'\n",
|
||||
config->name, optopt);
|
||||
return false;
|
||||
|
||||
default:
|
||||
fprintf(stderr, "%s: unknown option '%c'\n",
|
||||
config->name, optopt);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Return successfully if no filename was given.
|
||||
if (optind >= argc) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Return unsuccessfully if more than one filename was given.
|
||||
if (optind + 1 < argc) {
|
||||
fprintf(stderr, "%s: too many files\n", config->name);
|
||||
return false;
|
||||
}
|
||||
|
||||
// For compatibility with GNU Coreutils base64, treat a filename of '-'
|
||||
// as standard input.
|
||||
if (strcmp(argv[optind], "-") == 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Save the name of the file.
|
||||
config->file = argv[optind];
|
||||
|
||||
// Open the file.
|
||||
if ((config->fp = fopen(config->file, "rb")) == NULL) {
|
||||
fprintf(stderr, "%s: %s: %s\n",
|
||||
config->name, config->file, strerror(errno));
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int
|
||||
main (int argc, char **argv)
|
||||
{
|
||||
// Default program config.
|
||||
struct config config = {
|
||||
.file = "stdin",
|
||||
.fp = stdin,
|
||||
.wrap = 76,
|
||||
.decode = false,
|
||||
.print_help = false,
|
||||
};
|
||||
struct buffer buf;
|
||||
|
||||
// Parse options from the command line.
|
||||
if (parse_opts(argc, argv, &config) == false) {
|
||||
usage(stderr, &config);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Return early if the user just wanted the help text.
|
||||
if (config.print_help) {
|
||||
usage(stdout, &config);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Allocate buffers.
|
||||
if (buffer_alloc(&config, &buf) == false) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Encode or decode the input based on the user's choice.
|
||||
const bool ret = config.decode
|
||||
? decode(&config, &buf)
|
||||
: encode(&config, &buf);
|
||||
|
||||
// Free the buffers.
|
||||
buffer_free(&buf);
|
||||
|
||||
// Close the input file.
|
||||
fclose(config.fp);
|
||||
|
||||
// Close the output stream.
|
||||
fclose(stdout);
|
||||
|
||||
// That's all, folks.
|
||||
return ret ? 0 : 1;
|
||||
}
|
|
@ -1,30 +0,0 @@
|
|||
# Written in 2017 by Henrik Steffen Gaßmann henrik@gassmann.onl
|
||||
#
|
||||
# To the extent possible under law, the author(s) have dedicated all
|
||||
# copyright and related and neighboring rights to this software to the
|
||||
# public domain worldwide. This software is distributed without any warranty.
|
||||
#
|
||||
# You should have received a copy of the CC0 Public Domain Dedication
|
||||
# along with this software. If not, see
|
||||
#
|
||||
# http://creativecommons.org/publicdomain/zero/1.0/
|
||||
#
|
||||
########################################################################
|
||||
|
||||
set(TARGET_ARCHITECTURE_TEST_FILE "${CMAKE_CURRENT_LIST_DIR}/../test-arch.c")
|
||||
|
||||
function(detect_target_architecture OUTPUT_VARIABLE)
|
||||
message(STATUS "${CMAKE_CURRENT_LIST_DIR}")
|
||||
try_compile(_IGNORED "${CMAKE_CURRENT_BINARY_DIR}"
|
||||
"${TARGET_ARCHITECTURE_TEST_FILE}"
|
||||
OUTPUT_VARIABLE _LOG
|
||||
)
|
||||
|
||||
string(REGEX MATCH "##arch=([^#]+)##" _IGNORED "${_LOG}")
|
||||
|
||||
set(${OUTPUT_VARIABLE} "${CMAKE_MATCH_1}" PARENT_SCOPE)
|
||||
set("${OUTPUT_VARIABLE}_${CMAKE_MATCH_1}" 1 PARENT_SCOPE)
|
||||
if (CMAKE_MATCH_1 STREQUAL "unknown")
|
||||
message(WARNING "could not detect the target architecture.")
|
||||
endif()
|
||||
endfunction()
|
|
@ -1,36 +0,0 @@
|
|||
# Written in 2016-2017 by Henrik Steffen Gaßmann henrik@gassmann.onl
|
||||
#
|
||||
# To the extent possible under law, the author(s) have dedicated all
|
||||
# copyright and related and neighboring rights to this software to the
|
||||
# public domain worldwide. This software is distributed without any warranty.
|
||||
#
|
||||
# You should have received a copy of the CC0 Public Domain Dedication
|
||||
# along with this software. If not, see
|
||||
#
|
||||
# http://creativecommons.org/publicdomain/zero/1.0/
|
||||
#
|
||||
########################################################################
|
||||
|
||||
########################################################################
|
||||
# compiler flags definition
|
||||
macro(define_SIMD_compile_flags)
|
||||
if (CMAKE_C_COMPILER_ID STREQUAL "GNU" OR CMAKE_C_COMPILER_ID STREQUAL "Clang" OR CMAKE_C_COMPILER_ID STREQUAL "AppleClang")
|
||||
# x86
|
||||
set(COMPILE_FLAGS_SSSE3 "-mssse3")
|
||||
set(COMPILE_FLAGS_SSE41 "-msse4.1")
|
||||
set(COMPILE_FLAGS_SSE42 "-msse4.2")
|
||||
set(COMPILE_FLAGS_AVX "-mavx")
|
||||
set(COMPILE_FLAGS_AVX2 "-mavx2")
|
||||
set(COMPILE_FLAGS_AVX512 "-mavx512vl -mavx512vbmi")
|
||||
|
||||
#arm
|
||||
set(COMPILE_FLAGS_NEON32 "-mfpu=neon")
|
||||
elseif(MSVC)
|
||||
set(COMPILE_FLAGS_SSSE3 " ")
|
||||
set(COMPILE_FLAGS_SSE41 " ")
|
||||
set(COMPILE_FLAGS_SSE42 " ")
|
||||
set(COMPILE_FLAGS_AVX "/arch:AVX")
|
||||
set(COMPILE_FLAGS_AVX2 "/arch:AVX2")
|
||||
set(COMPILE_FLAGS_AVX512 "/arch:AVX512")
|
||||
endif()
|
||||
endmacro(define_SIMD_compile_flags)
|
|
@ -1,5 +0,0 @@
|
|||
@PACKAGE_INIT@
|
||||
|
||||
include("${CMAKE_CURRENT_LIST_DIR}/base64-targets.cmake")
|
||||
|
||||
check_required_components(base64)
|
28
deps/base64/base64/cmake/config.h.in
vendored
28
deps/base64/base64/cmake/config.h.in
vendored
|
@ -1,28 +0,0 @@
|
|||
#ifndef BASE64_CONFIG_H
|
||||
#define BASE64_CONFIG_H
|
||||
|
||||
#cmakedefine01 BASE64_WITH_SSSE3
|
||||
#define HAVE_SSSE3 BASE64_WITH_SSSE3
|
||||
|
||||
#cmakedefine01 BASE64_WITH_SSE41
|
||||
#define HAVE_SSE41 BASE64_WITH_SSE41
|
||||
|
||||
#cmakedefine01 BASE64_WITH_SSE42
|
||||
#define HAVE_SSE42 BASE64_WITH_SSE42
|
||||
|
||||
#cmakedefine01 BASE64_WITH_AVX
|
||||
#define HAVE_AVX BASE64_WITH_AVX
|
||||
|
||||
#cmakedefine01 BASE64_WITH_AVX2
|
||||
#define HAVE_AVX2 BASE64_WITH_AVX2
|
||||
|
||||
#cmakedefine01 BASE64_WITH_AVX512
|
||||
#define HAVE_AVX512 BASE64_WITH_AVX512
|
||||
|
||||
#cmakedefine01 BASE64_WITH_NEON32
|
||||
#define HAVE_NEON32 BASE64_WITH_NEON32
|
||||
|
||||
#cmakedefine01 BASE64_WITH_NEON64
|
||||
#define HAVE_NEON64 BASE64_WITH_NEON64
|
||||
|
||||
#endif // BASE64_CONFIG_H
|
35
deps/base64/base64/cmake/test-arch.c
vendored
35
deps/base64/base64/cmake/test-arch.c
vendored
|
@ -1,35 +0,0 @@
|
|||
// Written in 2017 by Henrik Steffen Gaßmann henrik@gassmann.onl
|
||||
//
|
||||
// To the extent possible under law, the author(s) have dedicated all
|
||||
// copyright and related and neighboring rights to this software to the
|
||||
// public domain worldwide. This software is distributed without any warranty.
|
||||
//
|
||||
// You should have received a copy of the CC0 Public Domain Dedication
|
||||
// along with this software. If not, see
|
||||
//
|
||||
// http://creativecommons.org/publicdomain/zero/1.0/
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// ARM 64-Bit
|
||||
#if defined(__aarch64__)
|
||||
#error ##arch=arm64##
|
||||
|
||||
// ARM 32-Bit
|
||||
#elif defined(__arm__) \
|
||||
|| defined(_M_ARM)
|
||||
#error ##arch=arm##
|
||||
|
||||
// x86 64-Bit
|
||||
#elif defined(__x86_64__) \
|
||||
|| defined(_M_X64)
|
||||
#error ##arch=x64##
|
||||
|
||||
// x86 32-Bit
|
||||
#elif defined(__i386__) \
|
||||
|| defined(_M_IX86)
|
||||
#error ##arch=x86##
|
||||
|
||||
#else
|
||||
#error ##arch=unknown##
|
||||
#endif
|
146
deps/base64/base64/include/libbase64.h
vendored
146
deps/base64/base64/include/libbase64.h
vendored
|
@ -1,146 +0,0 @@
|
|||
#ifndef LIBBASE64_H
|
||||
#define LIBBASE64_H
|
||||
|
||||
#include <stddef.h> /* size_t */
|
||||
|
||||
|
||||
#if defined(_WIN32) || defined(__CYGWIN__)
|
||||
#define BASE64_SYMBOL_IMPORT __declspec(dllimport)
|
||||
#define BASE64_SYMBOL_EXPORT __declspec(dllexport)
|
||||
#define BASE64_SYMBOL_PRIVATE
|
||||
|
||||
#elif __GNUC__ >= 4
|
||||
#define BASE64_SYMBOL_IMPORT __attribute__ ((visibility ("default")))
|
||||
#define BASE64_SYMBOL_EXPORT __attribute__ ((visibility ("default")))
|
||||
#define BASE64_SYMBOL_PRIVATE __attribute__ ((visibility ("hidden")))
|
||||
|
||||
#else
|
||||
#define BASE64_SYMBOL_IMPORT
|
||||
#define BASE64_SYMBOL_EXPORT
|
||||
#define BASE64_SYMBOL_PRIVATE
|
||||
#endif
|
||||
|
||||
#if defined(BASE64_STATIC_DEFINE)
|
||||
#define BASE64_EXPORT
|
||||
#define BASE64_NO_EXPORT
|
||||
|
||||
#else
|
||||
#if defined(BASE64_EXPORTS) // defined if we are building the shared library
|
||||
#define BASE64_EXPORT BASE64_SYMBOL_EXPORT
|
||||
|
||||
#else
|
||||
#define BASE64_EXPORT BASE64_SYMBOL_IMPORT
|
||||
#endif
|
||||
|
||||
#define BASE64_NO_EXPORT BASE64_SYMBOL_PRIVATE
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* These are the flags that can be passed in the `flags` argument. The values
|
||||
* below force the use of a given codec, even if that codec is a no-op in the
|
||||
* current build. Used in testing. Set to 0 for the default behavior, which is
|
||||
* runtime feature detection on x86, a compile-time fixed codec on ARM, and
|
||||
* the plain codec on other platforms: */
|
||||
#define BASE64_FORCE_AVX2 (1 << 0)
|
||||
#define BASE64_FORCE_NEON32 (1 << 1)
|
||||
#define BASE64_FORCE_NEON64 (1 << 2)
|
||||
#define BASE64_FORCE_PLAIN (1 << 3)
|
||||
#define BASE64_FORCE_SSSE3 (1 << 4)
|
||||
#define BASE64_FORCE_SSE41 (1 << 5)
|
||||
#define BASE64_FORCE_SSE42 (1 << 6)
|
||||
#define BASE64_FORCE_AVX (1 << 7)
|
||||
#define BASE64_FORCE_AVX512 (1 << 8)
|
||||
|
||||
struct base64_state {
|
||||
int eof;
|
||||
int bytes;
|
||||
int flags;
|
||||
unsigned char carry;
|
||||
};
|
||||
|
||||
/* Wrapper function to encode a plain string of given length. Output is written
|
||||
* to *out without trailing zero. Output length in bytes is written to *outlen.
|
||||
* The buffer in `out` has been allocated by the caller and is at least 4/3 the
|
||||
* size of the input. See above for `flags`; set to 0 for default operation: */
|
||||
void BASE64_EXPORT base64_encode
|
||||
( const char *src
|
||||
, size_t srclen
|
||||
, char *out
|
||||
, size_t *outlen
|
||||
, int flags
|
||||
) ;
|
||||
|
||||
/* Call this before calling base64_stream_encode() to init the state. See above
|
||||
* for `flags`; set to 0 for default operation: */
|
||||
void BASE64_EXPORT base64_stream_encode_init
|
||||
( struct base64_state *state
|
||||
, int flags
|
||||
) ;
|
||||
|
||||
/* Encodes the block of data of given length at `src`, into the buffer at
|
||||
* `out`. Caller is responsible for allocating a large enough out-buffer; it
|
||||
* must be at least 4/3 the size of the in-buffer, but take some margin. Places
|
||||
* the number of new bytes written into `outlen` (which is set to zero when the
|
||||
* function starts). Does not zero-terminate or finalize the output. */
|
||||
void BASE64_EXPORT base64_stream_encode
|
||||
( struct base64_state *state
|
||||
, const char *src
|
||||
, size_t srclen
|
||||
, char *out
|
||||
, size_t *outlen
|
||||
) ;
|
||||
|
||||
/* Finalizes the output begun by previous calls to `base64_stream_encode()`.
|
||||
* Adds the required end-of-stream markers if appropriate. `outlen` is modified
|
||||
* and will contain the number of new bytes written at `out` (which will quite
|
||||
* often be zero). */
|
||||
void BASE64_EXPORT base64_stream_encode_final
|
||||
( struct base64_state *state
|
||||
, char *out
|
||||
, size_t *outlen
|
||||
) ;
|
||||
|
||||
/* Wrapper function to decode a plain string of given length. Output is written
|
||||
* to *out without trailing zero. Output length in bytes is written to *outlen.
|
||||
* The buffer in `out` has been allocated by the caller and is at least 3/4 the
|
||||
* size of the input. See above for `flags`, set to 0 for default operation: */
|
||||
int BASE64_EXPORT base64_decode
|
||||
( const char *src
|
||||
, size_t srclen
|
||||
, char *out
|
||||
, size_t *outlen
|
||||
, int flags
|
||||
) ;
|
||||
|
||||
/* Call this before calling base64_stream_decode() to init the state. See above
|
||||
* for `flags`; set to 0 for default operation: */
|
||||
void BASE64_EXPORT base64_stream_decode_init
|
||||
( struct base64_state *state
|
||||
, int flags
|
||||
) ;
|
||||
|
||||
/* Decodes the block of data of given length at `src`, into the buffer at
|
||||
* `out`. Caller is responsible for allocating a large enough out-buffer; it
|
||||
* must be at least 3/4 the size of the in-buffer, but take some margin. Places
|
||||
* the number of new bytes written into `outlen` (which is set to zero when the
|
||||
* function starts). Does not zero-terminate the output. Returns 1 if all is
|
||||
* well, and 0 if a decoding error was found, such as an invalid character.
|
||||
* Returns -1 if the chosen codec is not included in the current build. Used by
|
||||
* the test harness to check whether a codec is available for testing. */
|
||||
int BASE64_EXPORT base64_stream_decode
|
||||
( struct base64_state *state
|
||||
, const char *src
|
||||
, size_t srclen
|
||||
, char *out
|
||||
, size_t *outlen
|
||||
) ;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* LIBBASE64_H */
|
66
deps/base64/base64/lib/arch/avx/codec.c
vendored
66
deps/base64/base64/lib/arch/avx/codec.c
vendored
|
@ -1,66 +0,0 @@
|
|||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "../../../include/libbase64.h"
|
||||
#include "../../tables/tables.h"
|
||||
#include "../../codecs.h"
|
||||
#include "config.h"
|
||||
#include "../../env.h"
|
||||
|
||||
#if HAVE_AVX
|
||||
#include <immintrin.h>
|
||||
|
||||
// Only enable inline assembly on supported compilers and on 64-bit CPUs.
|
||||
#ifndef BASE64_AVX_USE_ASM
|
||||
# if (defined(__GNUC__) || defined(__clang__)) && BASE64_WORDSIZE == 64
|
||||
# define BASE64_AVX_USE_ASM 1
|
||||
# else
|
||||
# define BASE64_AVX_USE_ASM 0
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#include "../ssse3/dec_reshuffle.c"
|
||||
#include "../ssse3/dec_loop.c"
|
||||
|
||||
#if BASE64_AVX_USE_ASM
|
||||
# include "enc_loop_asm.c"
|
||||
#else
|
||||
# include "../ssse3/enc_translate.c"
|
||||
# include "../ssse3/enc_reshuffle.c"
|
||||
# include "../ssse3/enc_loop.c"
|
||||
#endif
|
||||
|
||||
#endif // HAVE_AVX
|
||||
|
||||
BASE64_ENC_FUNCTION(avx)
|
||||
{
|
||||
#if HAVE_AVX
|
||||
#include "../generic/enc_head.c"
|
||||
|
||||
// For supported compilers, use a hand-optimized inline assembly
|
||||
// encoder. Otherwise fall back on the SSSE3 encoder, but compiled with
|
||||
// AVX flags to generate better optimized AVX code.
|
||||
|
||||
#if BASE64_AVX_USE_ASM
|
||||
enc_loop_avx(&s, &slen, &o, &olen);
|
||||
#else
|
||||
enc_loop_ssse3(&s, &slen, &o, &olen);
|
||||
#endif
|
||||
|
||||
#include "../generic/enc_tail.c"
|
||||
#else
|
||||
BASE64_ENC_STUB
|
||||
#endif
|
||||
}
|
||||
|
||||
BASE64_DEC_FUNCTION(avx)
|
||||
{
|
||||
#if HAVE_AVX
|
||||
#include "../generic/dec_head.c"
|
||||
dec_loop_ssse3(&s, &slen, &o, &olen);
|
||||
#include "../generic/dec_tail.c"
|
||||
#else
|
||||
BASE64_DEC_STUB
|
||||
#endif
|
||||
}
|
264
deps/base64/base64/lib/arch/avx/enc_loop_asm.c
vendored
264
deps/base64/base64/lib/arch/avx/enc_loop_asm.c
vendored
|
@ -1,264 +0,0 @@
|
|||
// Apologies in advance for combining the preprocessor with inline assembly,
|
||||
// two notoriously gnarly parts of C, but it was necessary to avoid a lot of
|
||||
// code repetition. The preprocessor is used to template large sections of
|
||||
// inline assembly that differ only in the registers used. If the code was
|
||||
// written out by hand, it would become very large and hard to audit.
|
||||
|
||||
// Generate a block of inline assembly that loads register R0 from memory. The
|
||||
// offset at which the register is loaded is set by the given round.
|
||||
#define LOAD(R0, ROUND) \
|
||||
"vlddqu ("#ROUND" * 12)(%[src]), %["R0"] \n\t"
|
||||
|
||||
// Generate a block of inline assembly that deinterleaves and shuffles register
|
||||
// R0 using preloaded constants. Outputs in R0 and R1.
|
||||
#define SHUF(R0, R1, R2) \
|
||||
"vpshufb %[lut0], %["R0"], %["R1"] \n\t" \
|
||||
"vpand %["R1"], %[msk0], %["R2"] \n\t" \
|
||||
"vpand %["R1"], %[msk2], %["R1"] \n\t" \
|
||||
"vpmulhuw %["R2"], %[msk1], %["R2"] \n\t" \
|
||||
"vpmullw %["R1"], %[msk3], %["R1"] \n\t" \
|
||||
"vpor %["R1"], %["R2"], %["R1"] \n\t"
|
||||
|
||||
// Generate a block of inline assembly that takes R0 and R1 and translates
|
||||
// their contents to the base64 alphabet, using preloaded constants.
|
||||
#define TRAN(R0, R1, R2) \
|
||||
"vpsubusb %[n51], %["R1"], %["R0"] \n\t" \
|
||||
"vpcmpgtb %[n25], %["R1"], %["R2"] \n\t" \
|
||||
"vpsubb %["R2"], %["R0"], %["R0"] \n\t" \
|
||||
"vpshufb %["R0"], %[lut1], %["R2"] \n\t" \
|
||||
"vpaddb %["R1"], %["R2"], %["R0"] \n\t"
|
||||
|
||||
// Generate a block of inline assembly that stores the given register R0 at an
|
||||
// offset set by the given round.
|
||||
#define STOR(R0, ROUND) \
|
||||
"vmovdqu %["R0"], ("#ROUND" * 16)(%[dst]) \n\t"
|
||||
|
||||
// Generate a block of inline assembly that generates a single self-contained
|
||||
// encoder round: fetch the data, process it, and store the result. Then update
|
||||
// the source and destination pointers.
|
||||
#define ROUND() \
|
||||
LOAD("a", 0) \
|
||||
SHUF("a", "b", "c") \
|
||||
TRAN("a", "b", "c") \
|
||||
STOR("a", 0) \
|
||||
"add $12, %[src] \n\t" \
|
||||
"add $16, %[dst] \n\t"
|
||||
|
||||
// Define a macro that initiates a three-way interleaved encoding round by
|
||||
// preloading registers a, b and c from memory.
|
||||
// The register graph shows which registers are in use during each step, and
|
||||
// is a visual aid for choosing registers for that step. Symbol index:
|
||||
//
|
||||
// + indicates that a register is loaded by that step.
|
||||
// | indicates that a register is in use and must not be touched.
|
||||
// - indicates that a register is decommissioned by that step.
|
||||
// x indicates that a register is used as a temporary by that step.
|
||||
// V indicates that a register is an input or output to the macro.
|
||||
//
|
||||
#define ROUND_3_INIT() /* a b c d e f */ \
|
||||
LOAD("a", 0) /* + */ \
|
||||
SHUF("a", "d", "e") /* | + x */ \
|
||||
LOAD("b", 1) /* | + | */ \
|
||||
TRAN("a", "d", "e") /* | | - x */ \
|
||||
LOAD("c", 2) /* V V V */
|
||||
|
||||
// Define a macro that translates, shuffles and stores the input registers A, B
|
||||
// and C, and preloads registers D, E and F for the next round.
|
||||
// This macro can be arbitrarily daisy-chained by feeding output registers D, E
|
||||
// and F back into the next round as input registers A, B and C. The macro
|
||||
// carefully interleaves memory operations with data operations for optimal
|
||||
// pipelined performance.
|
||||
|
||||
#define ROUND_3(ROUND, A,B,C,D,E,F) /* A B C D E F */ \
|
||||
LOAD(D, (ROUND + 3)) /* V V V + */ \
|
||||
SHUF(B, E, F) /* | | | | + x */ \
|
||||
STOR(A, (ROUND + 0)) /* - | | | | */ \
|
||||
TRAN(B, E, F) /* | | | - x */ \
|
||||
LOAD(E, (ROUND + 4)) /* | | | + */ \
|
||||
SHUF(C, A, F) /* + | | | | x */ \
|
||||
STOR(B, (ROUND + 1)) /* | - | | | */ \
|
||||
TRAN(C, A, F) /* - | | | x */ \
|
||||
LOAD(F, (ROUND + 5)) /* | | | + */ \
|
||||
SHUF(D, A, B) /* + x | | | | */ \
|
||||
STOR(C, (ROUND + 2)) /* | - | | | */ \
|
||||
TRAN(D, A, B) /* - x V V V */
|
||||
|
||||
// Define a macro that terminates a ROUND_3 macro by taking pre-loaded
|
||||
// registers D, E and F, and translating, shuffling and storing them.
|
||||
#define ROUND_3_END(ROUND, A,B,C,D,E,F) /* A B C D E F */ \
|
||||
SHUF(E, A, B) /* + x V V V */ \
|
||||
STOR(D, (ROUND + 3)) /* | - | | */ \
|
||||
TRAN(E, A, B) /* - x | | */ \
|
||||
SHUF(F, C, D) /* + x | | */ \
|
||||
STOR(E, (ROUND + 4)) /* | - | */ \
|
||||
TRAN(F, C, D) /* - x | */ \
|
||||
STOR(F, (ROUND + 5)) /* - */
|
||||
|
||||
// Define a type A round. Inputs are a, b, and c, outputs are d, e, and f.
|
||||
#define ROUND_3_A(ROUND) \
|
||||
ROUND_3(ROUND, "a", "b", "c", "d", "e", "f")
|
||||
|
||||
// Define a type B round. Inputs and outputs are swapped with regard to type A.
|
||||
#define ROUND_3_B(ROUND) \
|
||||
ROUND_3(ROUND, "d", "e", "f", "a", "b", "c")
|
||||
|
||||
// Terminating macro for a type A round.
|
||||
#define ROUND_3_A_LAST(ROUND) \
|
||||
ROUND_3_A(ROUND) \
|
||||
ROUND_3_END(ROUND, "a", "b", "c", "d", "e", "f")
|
||||
|
||||
// Terminating macro for a type B round.
|
||||
#define ROUND_3_B_LAST(ROUND) \
|
||||
ROUND_3_B(ROUND) \
|
||||
ROUND_3_END(ROUND, "d", "e", "f", "a", "b", "c")
|
||||
|
||||
// Suppress clang's warning that the literal string in the asm statement is
|
||||
// overlong (longer than the ISO-mandated minimum size of 4095 bytes for C99
|
||||
// compilers). It may be true, but the goal here is not C99 portability.
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Woverlength-strings"
|
||||
|
||||
static inline void
|
||||
enc_loop_avx (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
|
||||
{
|
||||
// For a clearer explanation of the algorithm used by this function,
|
||||
// please refer to the plain (not inline assembly) implementation. This
|
||||
// function follows the same basic logic.
|
||||
|
||||
if (*slen < 16) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Process blocks of 12 bytes at a time. Input is read in blocks of 16
|
||||
// bytes, so "reserve" four bytes from the input buffer to ensure that
|
||||
// we never read beyond the end of the input buffer.
|
||||
size_t rounds = (*slen - 4) / 12;
|
||||
|
||||
*slen -= rounds * 12; // 12 bytes consumed per round
|
||||
*olen += rounds * 16; // 16 bytes produced per round
|
||||
|
||||
// Number of times to go through the 36x loop.
|
||||
size_t loops = rounds / 36;
|
||||
|
||||
// Number of rounds remaining after the 36x loop.
|
||||
rounds %= 36;
|
||||
|
||||
// Lookup tables.
|
||||
const __m128i lut0 = _mm_set_epi8(
|
||||
10, 11, 9, 10, 7, 8, 6, 7, 4, 5, 3, 4, 1, 2, 0, 1);
|
||||
|
||||
const __m128i lut1 = _mm_setr_epi8(
|
||||
65, 71, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -19, -16, 0, 0);
|
||||
|
||||
// Temporary registers.
|
||||
__m128i a, b, c, d, e, f;
|
||||
|
||||
__asm__ volatile (
|
||||
|
||||
// If there are 36 rounds or more, enter a 36x unrolled loop of
|
||||
// interleaved encoding rounds. The rounds interleave memory
|
||||
// operations (load/store) with data operations (table lookups,
|
||||
// etc) to maximize pipeline throughput.
|
||||
" test %[loops], %[loops] \n\t"
|
||||
" jz 18f \n\t"
|
||||
" jmp 36f \n\t"
|
||||
" \n\t"
|
||||
".balign 64 \n\t"
|
||||
"36: " ROUND_3_INIT()
|
||||
" " ROUND_3_A( 0)
|
||||
" " ROUND_3_B( 3)
|
||||
" " ROUND_3_A( 6)
|
||||
" " ROUND_3_B( 9)
|
||||
" " ROUND_3_A(12)
|
||||
" " ROUND_3_B(15)
|
||||
" " ROUND_3_A(18)
|
||||
" " ROUND_3_B(21)
|
||||
" " ROUND_3_A(24)
|
||||
" " ROUND_3_B(27)
|
||||
" " ROUND_3_A_LAST(30)
|
||||
" add $(12 * 36), %[src] \n\t"
|
||||
" add $(16 * 36), %[dst] \n\t"
|
||||
" dec %[loops] \n\t"
|
||||
" jnz 36b \n\t"
|
||||
|
||||
// Enter an 18x unrolled loop for rounds of 18 or more.
|
||||
"18: cmp $18, %[rounds] \n\t"
|
||||
" jl 9f \n\t"
|
||||
" " ROUND_3_INIT()
|
||||
" " ROUND_3_A(0)
|
||||
" " ROUND_3_B(3)
|
||||
" " ROUND_3_A(6)
|
||||
" " ROUND_3_B(9)
|
||||
" " ROUND_3_A_LAST(12)
|
||||
" sub $18, %[rounds] \n\t"
|
||||
" add $(12 * 18), %[src] \n\t"
|
||||
" add $(16 * 18), %[dst] \n\t"
|
||||
|
||||
// Enter a 9x unrolled loop for rounds of 9 or more.
|
||||
"9: cmp $9, %[rounds] \n\t"
|
||||
" jl 6f \n\t"
|
||||
" " ROUND_3_INIT()
|
||||
" " ROUND_3_A(0)
|
||||
" " ROUND_3_B_LAST(3)
|
||||
" sub $9, %[rounds] \n\t"
|
||||
" add $(12 * 9), %[src] \n\t"
|
||||
" add $(16 * 9), %[dst] \n\t"
|
||||
|
||||
// Enter a 6x unrolled loop for rounds of 6 or more.
|
||||
"6: cmp $6, %[rounds] \n\t"
|
||||
" jl 55f \n\t"
|
||||
" " ROUND_3_INIT()
|
||||
" " ROUND_3_A_LAST(0)
|
||||
" sub $6, %[rounds] \n\t"
|
||||
" add $(12 * 6), %[src] \n\t"
|
||||
" add $(16 * 6), %[dst] \n\t"
|
||||
|
||||
// Dispatch the remaining rounds 0..5.
|
||||
"55: cmp $3, %[rounds] \n\t"
|
||||
" jg 45f \n\t"
|
||||
" je 3f \n\t"
|
||||
" cmp $1, %[rounds] \n\t"
|
||||
" jg 2f \n\t"
|
||||
" je 1f \n\t"
|
||||
" jmp 0f \n\t"
|
||||
|
||||
"45: cmp $4, %[rounds] \n\t"
|
||||
" je 4f \n\t"
|
||||
|
||||
// Block of non-interlaced encoding rounds, which can each
|
||||
// individually be jumped to. Rounds fall through to the next.
|
||||
"5: " ROUND()
|
||||
"4: " ROUND()
|
||||
"3: " ROUND()
|
||||
"2: " ROUND()
|
||||
"1: " ROUND()
|
||||
"0: \n\t"
|
||||
|
||||
// Outputs (modified).
|
||||
: [rounds] "+r" (rounds),
|
||||
[loops] "+r" (loops),
|
||||
[src] "+r" (*s),
|
||||
[dst] "+r" (*o),
|
||||
[a] "=&x" (a),
|
||||
[b] "=&x" (b),
|
||||
[c] "=&x" (c),
|
||||
[d] "=&x" (d),
|
||||
[e] "=&x" (e),
|
||||
[f] "=&x" (f)
|
||||
|
||||
// Inputs (not modified).
|
||||
: [lut0] "x" (lut0),
|
||||
[lut1] "x" (lut1),
|
||||
[msk0] "x" (_mm_set1_epi32(0x0FC0FC00)),
|
||||
[msk1] "x" (_mm_set1_epi32(0x04000040)),
|
||||
[msk2] "x" (_mm_set1_epi32(0x003F03F0)),
|
||||
[msk3] "x" (_mm_set1_epi32(0x01000010)),
|
||||
[n51] "x" (_mm_set1_epi8(51)),
|
||||
[n25] "x" (_mm_set1_epi8(25))
|
||||
|
||||
// Clobbers.
|
||||
: "cc", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
#pragma GCC diagnostic pop
|
56
deps/base64/base64/lib/arch/avx2/codec.c
vendored
56
deps/base64/base64/lib/arch/avx2/codec.c
vendored
|
@ -1,56 +0,0 @@
|
|||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "../../../include/libbase64.h"
|
||||
#include "../../tables/tables.h"
|
||||
#include "../../codecs.h"
|
||||
#include "config.h"
|
||||
#include "../../env.h"
|
||||
|
||||
#if HAVE_AVX2
|
||||
#include <immintrin.h>
|
||||
|
||||
// Only enable inline assembly on supported compilers and on 64-bit CPUs.
|
||||
#ifndef BASE64_AVX2_USE_ASM
|
||||
# if (defined(__GNUC__) || defined(__clang__)) && BASE64_WORDSIZE == 64
|
||||
# define BASE64_AVX2_USE_ASM 1
|
||||
# else
|
||||
# define BASE64_AVX2_USE_ASM 0
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#include "dec_reshuffle.c"
|
||||
#include "dec_loop.c"
|
||||
|
||||
#if BASE64_AVX2_USE_ASM
|
||||
# include "enc_loop_asm.c"
|
||||
#else
|
||||
# include "enc_translate.c"
|
||||
# include "enc_reshuffle.c"
|
||||
# include "enc_loop.c"
|
||||
#endif
|
||||
|
||||
#endif // HAVE_AVX2
|
||||
|
||||
BASE64_ENC_FUNCTION(avx2)
|
||||
{
|
||||
#if HAVE_AVX2
|
||||
#include "../generic/enc_head.c"
|
||||
enc_loop_avx2(&s, &slen, &o, &olen);
|
||||
#include "../generic/enc_tail.c"
|
||||
#else
|
||||
BASE64_ENC_STUB
|
||||
#endif
|
||||
}
|
||||
|
||||
BASE64_DEC_FUNCTION(avx2)
|
||||
{
|
||||
#if HAVE_AVX2
|
||||
#include "../generic/dec_head.c"
|
||||
dec_loop_avx2(&s, &slen, &o, &olen);
|
||||
#include "../generic/dec_tail.c"
|
||||
#else
|
||||
BASE64_DEC_STUB
|
||||
#endif
|
||||
}
|
110
deps/base64/base64/lib/arch/avx2/dec_loop.c
vendored
110
deps/base64/base64/lib/arch/avx2/dec_loop.c
vendored
|
@ -1,110 +0,0 @@
|
|||
static inline int
|
||||
dec_loop_avx2_inner (const uint8_t **s, uint8_t **o, size_t *rounds)
|
||||
{
|
||||
const __m256i lut_lo = _mm256_setr_epi8(
|
||||
0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,
|
||||
0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A,
|
||||
0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,
|
||||
0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A);
|
||||
|
||||
const __m256i lut_hi = _mm256_setr_epi8(
|
||||
0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08,
|
||||
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
|
||||
0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08,
|
||||
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10);
|
||||
|
||||
const __m256i lut_roll = _mm256_setr_epi8(
|
||||
0, 16, 19, 4, -65, -65, -71, -71,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 16, 19, 4, -65, -65, -71, -71,
|
||||
0, 0, 0, 0, 0, 0, 0, 0);
|
||||
|
||||
const __m256i mask_2F = _mm256_set1_epi8(0x2F);
|
||||
|
||||
// Load input:
|
||||
__m256i str = _mm256_loadu_si256((__m256i *) *s);
|
||||
|
||||
// See the SSSE3 decoder for an explanation of the algorithm.
|
||||
const __m256i hi_nibbles = _mm256_and_si256(_mm256_srli_epi32(str, 4), mask_2F);
|
||||
const __m256i lo_nibbles = _mm256_and_si256(str, mask_2F);
|
||||
const __m256i hi = _mm256_shuffle_epi8(lut_hi, hi_nibbles);
|
||||
const __m256i lo = _mm256_shuffle_epi8(lut_lo, lo_nibbles);
|
||||
|
||||
if (!_mm256_testz_si256(lo, hi)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const __m256i eq_2F = _mm256_cmpeq_epi8(str, mask_2F);
|
||||
const __m256i roll = _mm256_shuffle_epi8(lut_roll, _mm256_add_epi8(eq_2F, hi_nibbles));
|
||||
|
||||
// Now simply add the delta values to the input:
|
||||
str = _mm256_add_epi8(str, roll);
|
||||
|
||||
// Reshuffle the input to packed 12-byte output format:
|
||||
str = dec_reshuffle(str);
|
||||
|
||||
// Store the output:
|
||||
_mm256_storeu_si256((__m256i *) *o, str);
|
||||
|
||||
*s += 32;
|
||||
*o += 24;
|
||||
*rounds -= 1;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline void
|
||||
dec_loop_avx2 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
|
||||
{
|
||||
if (*slen < 45) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Process blocks of 32 bytes per round. Because 8 extra zero bytes are
|
||||
// written after the output, ensure that there will be at least 13
|
||||
// bytes of input data left to cover the gap. (11 data bytes and up to
|
||||
// two end-of-string markers.)
|
||||
size_t rounds = (*slen - 13) / 32;
|
||||
|
||||
*slen -= rounds * 32; // 32 bytes consumed per round
|
||||
*olen += rounds * 24; // 24 bytes produced per round
|
||||
|
||||
do {
|
||||
if (rounds >= 8) {
|
||||
if (dec_loop_avx2_inner(s, o, &rounds) &&
|
||||
dec_loop_avx2_inner(s, o, &rounds) &&
|
||||
dec_loop_avx2_inner(s, o, &rounds) &&
|
||||
dec_loop_avx2_inner(s, o, &rounds) &&
|
||||
dec_loop_avx2_inner(s, o, &rounds) &&
|
||||
dec_loop_avx2_inner(s, o, &rounds) &&
|
||||
dec_loop_avx2_inner(s, o, &rounds) &&
|
||||
dec_loop_avx2_inner(s, o, &rounds)) {
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (rounds >= 4) {
|
||||
if (dec_loop_avx2_inner(s, o, &rounds) &&
|
||||
dec_loop_avx2_inner(s, o, &rounds) &&
|
||||
dec_loop_avx2_inner(s, o, &rounds) &&
|
||||
dec_loop_avx2_inner(s, o, &rounds)) {
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (rounds >= 2) {
|
||||
if (dec_loop_avx2_inner(s, o, &rounds) &&
|
||||
dec_loop_avx2_inner(s, o, &rounds)) {
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
dec_loop_avx2_inner(s, o, &rounds);
|
||||
break;
|
||||
|
||||
} while (rounds > 0);
|
||||
|
||||
// Adjust for any rounds that were skipped:
|
||||
*slen += rounds * 32;
|
||||
*olen -= rounds * 24;
|
||||
}
|
34
deps/base64/base64/lib/arch/avx2/dec_reshuffle.c
vendored
34
deps/base64/base64/lib/arch/avx2/dec_reshuffle.c
vendored
|
@ -1,34 +0,0 @@
|
|||
static inline __m256i
|
||||
dec_reshuffle (const __m256i in)
|
||||
{
|
||||
// in, lower lane, bits, upper case are most significant bits, lower
|
||||
// case are least significant bits:
|
||||
// 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ
|
||||
// 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG
|
||||
// 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD
|
||||
// 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA
|
||||
|
||||
const __m256i merge_ab_and_bc = _mm256_maddubs_epi16(in, _mm256_set1_epi32(0x01400140));
|
||||
// 0000kkkk LLllllll 0000JJJJ JJjjKKKK
|
||||
// 0000hhhh IIiiiiii 0000GGGG GGggHHHH
|
||||
// 0000eeee FFffffff 0000DDDD DDddEEEE
|
||||
// 0000bbbb CCcccccc 0000AAAA AAaaBBBB
|
||||
|
||||
__m256i out = _mm256_madd_epi16(merge_ab_and_bc, _mm256_set1_epi32(0x00011000));
|
||||
// 00000000 JJJJJJjj KKKKkkkk LLllllll
|
||||
// 00000000 GGGGGGgg HHHHhhhh IIiiiiii
|
||||
// 00000000 DDDDDDdd EEEEeeee FFffffff
|
||||
// 00000000 AAAAAAaa BBBBbbbb CCcccccc
|
||||
|
||||
// Pack bytes together in each lane:
|
||||
out = _mm256_shuffle_epi8(out, _mm256_setr_epi8(
|
||||
2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1,
|
||||
2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1));
|
||||
// 00000000 00000000 00000000 00000000
|
||||
// LLllllll KKKKkkkk JJJJJJjj IIiiiiii
|
||||
// HHHHhhhh GGGGGGgg FFffffff EEEEeeee
|
||||
// DDDDDDdd CCcccccc BBBBbbbb AAAAAAaa
|
||||
|
||||
// Pack lanes:
|
||||
return _mm256_permutevar8x32_epi32(out, _mm256_setr_epi32(0, 1, 2, 4, 5, 6, -1, -1));
|
||||
}
|
89
deps/base64/base64/lib/arch/avx2/enc_loop.c
vendored
89
deps/base64/base64/lib/arch/avx2/enc_loop.c
vendored
|
@ -1,89 +0,0 @@
|
|||
static inline void
|
||||
enc_loop_avx2_inner_first (const uint8_t **s, uint8_t **o)
|
||||
{
|
||||
// First load is done at s - 0 to not get a segfault:
|
||||
__m256i src = _mm256_loadu_si256((__m256i *) *s);
|
||||
|
||||
// Shift by 4 bytes, as required by enc_reshuffle:
|
||||
src = _mm256_permutevar8x32_epi32(src, _mm256_setr_epi32(0, 0, 1, 2, 3, 4, 5, 6));
|
||||
|
||||
// Reshuffle, translate, store:
|
||||
src = enc_reshuffle(src);
|
||||
src = enc_translate(src);
|
||||
_mm256_storeu_si256((__m256i *) *o, src);
|
||||
|
||||
// Subsequent loads will be done at s - 4, set pointer for next round:
|
||||
*s += 20;
|
||||
*o += 32;
|
||||
}
|
||||
|
||||
static inline void
|
||||
enc_loop_avx2_inner (const uint8_t **s, uint8_t **o)
|
||||
{
|
||||
// Load input:
|
||||
__m256i src = _mm256_loadu_si256((__m256i *) *s);
|
||||
|
||||
// Reshuffle, translate, store:
|
||||
src = enc_reshuffle(src);
|
||||
src = enc_translate(src);
|
||||
_mm256_storeu_si256((__m256i *) *o, src);
|
||||
|
||||
*s += 24;
|
||||
*o += 32;
|
||||
}
|
||||
|
||||
static inline void
|
||||
enc_loop_avx2 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
|
||||
{
|
||||
if (*slen < 32) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Process blocks of 24 bytes at a time. Because blocks are loaded 32
|
||||
// bytes at a time an offset of -4, ensure that there will be at least
|
||||
// 4 remaining bytes after the last round, so that the final read will
|
||||
// not pass beyond the bounds of the input buffer:
|
||||
size_t rounds = (*slen - 4) / 24;
|
||||
|
||||
*slen -= rounds * 24; // 24 bytes consumed per round
|
||||
*olen += rounds * 32; // 32 bytes produced per round
|
||||
|
||||
// The first loop iteration requires special handling to ensure that
|
||||
// the read, which is done at an offset, does not underflow the buffer:
|
||||
enc_loop_avx2_inner_first(s, o);
|
||||
rounds--;
|
||||
|
||||
while (rounds > 0) {
|
||||
if (rounds >= 8) {
|
||||
enc_loop_avx2_inner(s, o);
|
||||
enc_loop_avx2_inner(s, o);
|
||||
enc_loop_avx2_inner(s, o);
|
||||
enc_loop_avx2_inner(s, o);
|
||||
enc_loop_avx2_inner(s, o);
|
||||
enc_loop_avx2_inner(s, o);
|
||||
enc_loop_avx2_inner(s, o);
|
||||
enc_loop_avx2_inner(s, o);
|
||||
rounds -= 8;
|
||||
continue;
|
||||
}
|
||||
if (rounds >= 4) {
|
||||
enc_loop_avx2_inner(s, o);
|
||||
enc_loop_avx2_inner(s, o);
|
||||
enc_loop_avx2_inner(s, o);
|
||||
enc_loop_avx2_inner(s, o);
|
||||
rounds -= 4;
|
||||
continue;
|
||||
}
|
||||
if (rounds >= 2) {
|
||||
enc_loop_avx2_inner(s, o);
|
||||
enc_loop_avx2_inner(s, o);
|
||||
rounds -= 2;
|
||||
continue;
|
||||
}
|
||||
enc_loop_avx2_inner(s, o);
|
||||
break;
|
||||
}
|
||||
|
||||
// Add the offset back:
|
||||
*s += 4;
|
||||
}
|
291
deps/base64/base64/lib/arch/avx2/enc_loop_asm.c
vendored
291
deps/base64/base64/lib/arch/avx2/enc_loop_asm.c
vendored
|
@ -1,291 +0,0 @@
|
|||
// Apologies in advance for combining the preprocessor with inline assembly,
|
||||
// two notoriously gnarly parts of C, but it was necessary to avoid a lot of
|
||||
// code repetition. The preprocessor is used to template large sections of
|
||||
// inline assembly that differ only in the registers used. If the code was
|
||||
// written out by hand, it would become very large and hard to audit.
|
||||
|
||||
// Generate a block of inline assembly that loads register R0 from memory. The
|
||||
// offset at which the register is loaded is set by the given round and a
|
||||
// constant offset.
|
||||
#define LOAD(R0, ROUND, OFFSET) \
|
||||
"vlddqu ("#ROUND" * 24 + "#OFFSET")(%[src]), %["R0"] \n\t"
|
||||
|
||||
// Generate a block of inline assembly that deinterleaves and shuffles register
|
||||
// R0 using preloaded constants. Outputs in R0 and R1.
|
||||
#define SHUF(R0, R1, R2) \
|
||||
"vpshufb %[lut0], %["R0"], %["R1"] \n\t" \
|
||||
"vpand %["R1"], %[msk0], %["R2"] \n\t" \
|
||||
"vpand %["R1"], %[msk2], %["R1"] \n\t" \
|
||||
"vpmulhuw %["R2"], %[msk1], %["R2"] \n\t" \
|
||||
"vpmullw %["R1"], %[msk3], %["R1"] \n\t" \
|
||||
"vpor %["R1"], %["R2"], %["R1"] \n\t"
|
||||
|
||||
// Generate a block of inline assembly that takes R0 and R1 and translates
|
||||
// their contents to the base64 alphabet, using preloaded constants.
|
||||
#define TRAN(R0, R1, R2) \
|
||||
"vpsubusb %[n51], %["R1"], %["R0"] \n\t" \
|
||||
"vpcmpgtb %[n25], %["R1"], %["R2"] \n\t" \
|
||||
"vpsubb %["R2"], %["R0"], %["R0"] \n\t" \
|
||||
"vpshufb %["R0"], %[lut1], %["R2"] \n\t" \
|
||||
"vpaddb %["R1"], %["R2"], %["R0"] \n\t"
|
||||
|
||||
// Generate a block of inline assembly that stores the given register R0 at an
|
||||
// offset set by the given round.
|
||||
#define STOR(R0, ROUND) \
|
||||
"vmovdqu %["R0"], ("#ROUND" * 32)(%[dst]) \n\t"
|
||||
|
||||
// Generate a block of inline assembly that generates a single self-contained
|
||||
// encoder round: fetch the data, process it, and store the result. Then update
|
||||
// the source and destination pointers.
|
||||
#define ROUND() \
|
||||
LOAD("a", 0, -4) \
|
||||
SHUF("a", "b", "c") \
|
||||
TRAN("a", "b", "c") \
|
||||
STOR("a", 0) \
|
||||
"add $24, %[src] \n\t" \
|
||||
"add $32, %[dst] \n\t"
|
||||
|
||||
// Define a macro that initiates a three-way interleaved encoding round by
|
||||
// preloading registers a, b and c from memory.
|
||||
// The register graph shows which registers are in use during each step, and
|
||||
// is a visual aid for choosing registers for that step. Symbol index:
|
||||
//
|
||||
// + indicates that a register is loaded by that step.
|
||||
// | indicates that a register is in use and must not be touched.
|
||||
// - indicates that a register is decommissioned by that step.
|
||||
// x indicates that a register is used as a temporary by that step.
|
||||
// V indicates that a register is an input or output to the macro.
|
||||
//
|
||||
#define ROUND_3_INIT() /* a b c d e f */ \
|
||||
LOAD("a", 0, -4) /* + */ \
|
||||
SHUF("a", "d", "e") /* | + x */ \
|
||||
LOAD("b", 1, -4) /* | + | */ \
|
||||
TRAN("a", "d", "e") /* | | - x */ \
|
||||
LOAD("c", 2, -4) /* V V V */
|
||||
|
||||
// Define a macro that translates, shuffles and stores the input registers A, B
|
||||
// and C, and preloads registers D, E and F for the next round.
|
||||
// This macro can be arbitrarily daisy-chained by feeding output registers D, E
|
||||
// and F back into the next round as input registers A, B and C. The macro
|
||||
// carefully interleaves memory operations with data operations for optimal
|
||||
// pipelined performance.
|
||||
|
||||
#define ROUND_3(ROUND, A,B,C,D,E,F) /* A B C D E F */ \
|
||||
LOAD(D, (ROUND + 3), -4) /* V V V + */ \
|
||||
SHUF(B, E, F) /* | | | | + x */ \
|
||||
STOR(A, (ROUND + 0)) /* - | | | | */ \
|
||||
TRAN(B, E, F) /* | | | - x */ \
|
||||
LOAD(E, (ROUND + 4), -4) /* | | | + */ \
|
||||
SHUF(C, A, F) /* + | | | | x */ \
|
||||
STOR(B, (ROUND + 1)) /* | - | | | */ \
|
||||
TRAN(C, A, F) /* - | | | x */ \
|
||||
LOAD(F, (ROUND + 5), -4) /* | | | + */ \
|
||||
SHUF(D, A, B) /* + x | | | | */ \
|
||||
STOR(C, (ROUND + 2)) /* | - | | | */ \
|
||||
TRAN(D, A, B) /* - x V V V */
|
||||
|
||||
// Define a macro that terminates a ROUND_3 macro by taking pre-loaded
|
||||
// registers D, E and F, and translating, shuffling and storing them.
|
||||
#define ROUND_3_END(ROUND, A,B,C,D,E,F) /* A B C D E F */ \
|
||||
SHUF(E, A, B) /* + x V V V */ \
|
||||
STOR(D, (ROUND + 3)) /* | - | | */ \
|
||||
TRAN(E, A, B) /* - x | | */ \
|
||||
SHUF(F, C, D) /* + x | | */ \
|
||||
STOR(E, (ROUND + 4)) /* | - | */ \
|
||||
TRAN(F, C, D) /* - x | */ \
|
||||
STOR(F, (ROUND + 5)) /* - */
|
||||
|
||||
// Define a type A round. Inputs are a, b, and c, outputs are d, e, and f.
|
||||
#define ROUND_3_A(ROUND) \
|
||||
ROUND_3(ROUND, "a", "b", "c", "d", "e", "f")
|
||||
|
||||
// Define a type B round. Inputs and outputs are swapped with regard to type A.
|
||||
#define ROUND_3_B(ROUND) \
|
||||
ROUND_3(ROUND, "d", "e", "f", "a", "b", "c")
|
||||
|
||||
// Terminating macro for a type A round.
|
||||
#define ROUND_3_A_LAST(ROUND) \
|
||||
ROUND_3_A(ROUND) \
|
||||
ROUND_3_END(ROUND, "a", "b", "c", "d", "e", "f")
|
||||
|
||||
// Terminating macro for a type B round.
|
||||
#define ROUND_3_B_LAST(ROUND) \
|
||||
ROUND_3_B(ROUND) \
|
||||
ROUND_3_END(ROUND, "d", "e", "f", "a", "b", "c")
|
||||
|
||||
// Suppress clang's warning that the literal string in the asm statement is
|
||||
// overlong (longer than the ISO-mandated minimum size of 4095 bytes for C99
|
||||
// compilers). It may be true, but the goal here is not C99 portability.
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Woverlength-strings"
|
||||
|
||||
static inline void
|
||||
enc_loop_avx2 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
|
||||
{
|
||||
// For a clearer explanation of the algorithm used by this function,
|
||||
// please refer to the plain (not inline assembly) implementation. This
|
||||
// function follows the same basic logic.
|
||||
|
||||
if (*slen < 32) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Process blocks of 24 bytes at a time. Because blocks are loaded 32
|
||||
// bytes at a time an offset of -4, ensure that there will be at least
|
||||
// 4 remaining bytes after the last round, so that the final read will
|
||||
// not pass beyond the bounds of the input buffer.
|
||||
size_t rounds = (*slen - 4) / 24;
|
||||
|
||||
*slen -= rounds * 24; // 24 bytes consumed per round
|
||||
*olen += rounds * 32; // 32 bytes produced per round
|
||||
|
||||
// Pre-decrement the number of rounds to get the number of rounds
|
||||
// *after* the first round, which is handled as a special case.
|
||||
rounds--;
|
||||
|
||||
// Number of times to go through the 36x loop.
|
||||
size_t loops = rounds / 36;
|
||||
|
||||
// Number of rounds remaining after the 36x loop.
|
||||
rounds %= 36;
|
||||
|
||||
// Lookup tables.
|
||||
const __m256i lut0 = _mm256_set_epi8(
|
||||
10, 11, 9, 10, 7, 8, 6, 7, 4, 5, 3, 4, 1, 2, 0, 1,
|
||||
14, 15, 13, 14, 11, 12, 10, 11, 8, 9, 7, 8, 5, 6, 4, 5);
|
||||
|
||||
const __m256i lut1 = _mm256_setr_epi8(
|
||||
65, 71, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -19, -16, 0, 0,
|
||||
65, 71, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -19, -16, 0, 0);
|
||||
|
||||
// Temporary registers.
|
||||
__m256i a, b, c, d, e;
|
||||
|
||||
// Temporary register f doubles as the shift mask for the first round.
|
||||
__m256i f = _mm256_setr_epi32(0, 0, 1, 2, 3, 4, 5, 6);
|
||||
|
||||
__asm__ volatile (
|
||||
|
||||
// The first loop iteration requires special handling to ensure
|
||||
// that the read, which is normally done at an offset of -4,
|
||||
// does not underflow the buffer. Load the buffer at an offset
|
||||
// of 0 and permute the input to achieve the same effect.
|
||||
LOAD("a", 0, 0)
|
||||
"vpermd %[a], %[f], %[a] \n\t"
|
||||
|
||||
// Perform the standard shuffling and translation steps.
|
||||
SHUF("a", "b", "c")
|
||||
TRAN("a", "b", "c")
|
||||
|
||||
// Store the result and increment the source and dest pointers.
|
||||
"vmovdqu %[a], (%[dst]) \n\t"
|
||||
"add $24, %[src] \n\t"
|
||||
"add $32, %[dst] \n\t"
|
||||
|
||||
// If there are 36 rounds or more, enter a 36x unrolled loop of
|
||||
// interleaved encoding rounds. The rounds interleave memory
|
||||
// operations (load/store) with data operations (table lookups,
|
||||
// etc) to maximize pipeline throughput.
|
||||
" test %[loops], %[loops] \n\t"
|
||||
" jz 18f \n\t"
|
||||
" jmp 36f \n\t"
|
||||
" \n\t"
|
||||
".balign 64 \n\t"
|
||||
"36: " ROUND_3_INIT()
|
||||
" " ROUND_3_A( 0)
|
||||
" " ROUND_3_B( 3)
|
||||
" " ROUND_3_A( 6)
|
||||
" " ROUND_3_B( 9)
|
||||
" " ROUND_3_A(12)
|
||||
" " ROUND_3_B(15)
|
||||
" " ROUND_3_A(18)
|
||||
" " ROUND_3_B(21)
|
||||
" " ROUND_3_A(24)
|
||||
" " ROUND_3_B(27)
|
||||
" " ROUND_3_A_LAST(30)
|
||||
" add $(24 * 36), %[src] \n\t"
|
||||
" add $(32 * 36), %[dst] \n\t"
|
||||
" dec %[loops] \n\t"
|
||||
" jnz 36b \n\t"
|
||||
|
||||
// Enter an 18x unrolled loop for rounds of 18 or more.
|
||||
"18: cmp $18, %[rounds] \n\t"
|
||||
" jl 9f \n\t"
|
||||
" " ROUND_3_INIT()
|
||||
" " ROUND_3_A(0)
|
||||
" " ROUND_3_B(3)
|
||||
" " ROUND_3_A(6)
|
||||
" " ROUND_3_B(9)
|
||||
" " ROUND_3_A_LAST(12)
|
||||
" sub $18, %[rounds] \n\t"
|
||||
" add $(24 * 18), %[src] \n\t"
|
||||
" add $(32 * 18), %[dst] \n\t"
|
||||
|
||||
// Enter a 9x unrolled loop for rounds of 9 or more.
|
||||
"9: cmp $9, %[rounds] \n\t"
|
||||
" jl 6f \n\t"
|
||||
" " ROUND_3_INIT()
|
||||
" " ROUND_3_A(0)
|
||||
" " ROUND_3_B_LAST(3)
|
||||
" sub $9, %[rounds] \n\t"
|
||||
" add $(24 * 9), %[src] \n\t"
|
||||
" add $(32 * 9), %[dst] \n\t"
|
||||
|
||||
// Enter a 6x unrolled loop for rounds of 6 or more.
|
||||
"6: cmp $6, %[rounds] \n\t"
|
||||
" jl 55f \n\t"
|
||||
" " ROUND_3_INIT()
|
||||
" " ROUND_3_A_LAST(0)
|
||||
" sub $6, %[rounds] \n\t"
|
||||
" add $(24 * 6), %[src] \n\t"
|
||||
" add $(32 * 6), %[dst] \n\t"
|
||||
|
||||
// Dispatch the remaining rounds 0..5.
|
||||
"55: cmp $3, %[rounds] \n\t"
|
||||
" jg 45f \n\t"
|
||||
" je 3f \n\t"
|
||||
" cmp $1, %[rounds] \n\t"
|
||||
" jg 2f \n\t"
|
||||
" je 1f \n\t"
|
||||
" jmp 0f \n\t"
|
||||
|
||||
"45: cmp $4, %[rounds] \n\t"
|
||||
" je 4f \n\t"
|
||||
|
||||
// Block of non-interlaced encoding rounds, which can each
|
||||
// individually be jumped to. Rounds fall through to the next.
|
||||
"5: " ROUND()
|
||||
"4: " ROUND()
|
||||
"3: " ROUND()
|
||||
"2: " ROUND()
|
||||
"1: " ROUND()
|
||||
"0: \n\t"
|
||||
|
||||
// Outputs (modified).
|
||||
: [rounds] "+r" (rounds),
|
||||
[loops] "+r" (loops),
|
||||
[src] "+r" (*s),
|
||||
[dst] "+r" (*o),
|
||||
[a] "=&x" (a),
|
||||
[b] "=&x" (b),
|
||||
[c] "=&x" (c),
|
||||
[d] "=&x" (d),
|
||||
[e] "=&x" (e),
|
||||
[f] "+x" (f)
|
||||
|
||||
// Inputs (not modified).
|
||||
: [lut0] "x" (lut0),
|
||||
[lut1] "x" (lut1),
|
||||
[msk0] "x" (_mm256_set1_epi32(0x0FC0FC00)),
|
||||
[msk1] "x" (_mm256_set1_epi32(0x04000040)),
|
||||
[msk2] "x" (_mm256_set1_epi32(0x003F03F0)),
|
||||
[msk3] "x" (_mm256_set1_epi32(0x01000010)),
|
||||
[n51] "x" (_mm256_set1_epi8(51)),
|
||||
[n25] "x" (_mm256_set1_epi8(25))
|
||||
|
||||
// Clobbers.
|
||||
: "cc", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
#pragma GCC diagnostic pop
|
83
deps/base64/base64/lib/arch/avx2/enc_reshuffle.c
vendored
83
deps/base64/base64/lib/arch/avx2/enc_reshuffle.c
vendored
|
@ -1,83 +0,0 @@
|
|||
static inline __m256i
|
||||
enc_reshuffle (const __m256i input)
|
||||
{
|
||||
// Translation of the SSSE3 reshuffling algorithm to AVX2. This one
|
||||
// works with shifted (4 bytes) input in order to be able to work
|
||||
// efficiently in the two 128-bit lanes.
|
||||
|
||||
// Input, bytes MSB to LSB:
|
||||
// 0 0 0 0 x w v u t s r q p o n m
|
||||
// l k j i h g f e d c b a 0 0 0 0
|
||||
|
||||
const __m256i in = _mm256_shuffle_epi8(input, _mm256_set_epi8(
|
||||
10, 11, 9, 10,
|
||||
7, 8, 6, 7,
|
||||
4, 5, 3, 4,
|
||||
1, 2, 0, 1,
|
||||
|
||||
14, 15, 13, 14,
|
||||
11, 12, 10, 11,
|
||||
8, 9, 7, 8,
|
||||
5, 6, 4, 5));
|
||||
// in, bytes MSB to LSB:
|
||||
// w x v w
|
||||
// t u s t
|
||||
// q r p q
|
||||
// n o m n
|
||||
// k l j k
|
||||
// h i g h
|
||||
// e f d e
|
||||
// b c a b
|
||||
|
||||
const __m256i t0 = _mm256_and_si256(in, _mm256_set1_epi32(0x0FC0FC00));
|
||||
// bits, upper case are most significant bits, lower case are least
|
||||
// significant bits.
|
||||
// 0000wwww XX000000 VVVVVV00 00000000
|
||||
// 0000tttt UU000000 SSSSSS00 00000000
|
||||
// 0000qqqq RR000000 PPPPPP00 00000000
|
||||
// 0000nnnn OO000000 MMMMMM00 00000000
|
||||
// 0000kkkk LL000000 JJJJJJ00 00000000
|
||||
// 0000hhhh II000000 GGGGGG00 00000000
|
||||
// 0000eeee FF000000 DDDDDD00 00000000
|
||||
// 0000bbbb CC000000 AAAAAA00 00000000
|
||||
|
||||
const __m256i t1 = _mm256_mulhi_epu16(t0, _mm256_set1_epi32(0x04000040));
|
||||
// 00000000 00wwwwXX 00000000 00VVVVVV
|
||||
// 00000000 00ttttUU 00000000 00SSSSSS
|
||||
// 00000000 00qqqqRR 00000000 00PPPPPP
|
||||
// 00000000 00nnnnOO 00000000 00MMMMMM
|
||||
// 00000000 00kkkkLL 00000000 00JJJJJJ
|
||||
// 00000000 00hhhhII 00000000 00GGGGGG
|
||||
// 00000000 00eeeeFF 00000000 00DDDDDD
|
||||
// 00000000 00bbbbCC 00000000 00AAAAAA
|
||||
|
||||
const __m256i t2 = _mm256_and_si256(in, _mm256_set1_epi32(0x003F03F0));
|
||||
// 00000000 00xxxxxx 000000vv WWWW0000
|
||||
// 00000000 00uuuuuu 000000ss TTTT0000
|
||||
// 00000000 00rrrrrr 000000pp QQQQ0000
|
||||
// 00000000 00oooooo 000000mm NNNN0000
|
||||
// 00000000 00llllll 000000jj KKKK0000
|
||||
// 00000000 00iiiiii 000000gg HHHH0000
|
||||
// 00000000 00ffffff 000000dd EEEE0000
|
||||
// 00000000 00cccccc 000000aa BBBB0000
|
||||
|
||||
const __m256i t3 = _mm256_mullo_epi16(t2, _mm256_set1_epi32(0x01000010));
|
||||
// 00xxxxxx 00000000 00vvWWWW 00000000
|
||||
// 00uuuuuu 00000000 00ssTTTT 00000000
|
||||
// 00rrrrrr 00000000 00ppQQQQ 00000000
|
||||
// 00oooooo 00000000 00mmNNNN 00000000
|
||||
// 00llllll 00000000 00jjKKKK 00000000
|
||||
// 00iiiiii 00000000 00ggHHHH 00000000
|
||||
// 00ffffff 00000000 00ddEEEE 00000000
|
||||
// 00cccccc 00000000 00aaBBBB 00000000
|
||||
|
||||
return _mm256_or_si256(t1, t3);
|
||||
// 00xxxxxx 00wwwwXX 00vvWWWW 00VVVVVV
|
||||
// 00uuuuuu 00ttttUU 00ssTTTT 00SSSSSS
|
||||
// 00rrrrrr 00qqqqRR 00ppQQQQ 00PPPPPP
|
||||
// 00oooooo 00nnnnOO 00mmNNNN 00MMMMMM
|
||||
// 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ
|
||||
// 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG
|
||||
// 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD
|
||||
// 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA
|
||||
}
|
30
deps/base64/base64/lib/arch/avx2/enc_translate.c
vendored
30
deps/base64/base64/lib/arch/avx2/enc_translate.c
vendored
|
@ -1,30 +0,0 @@
|
|||
static inline __m256i
|
||||
enc_translate (const __m256i in)
|
||||
{
|
||||
// A lookup table containing the absolute offsets for all ranges:
|
||||
const __m256i lut = _mm256_setr_epi8(
|
||||
65, 71, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -19, -16, 0, 0,
|
||||
65, 71, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -19, -16, 0, 0);
|
||||
|
||||
// Translate values 0..63 to the Base64 alphabet. There are five sets:
|
||||
// # From To Abs Index Characters
|
||||
// 0 [0..25] [65..90] +65 0 ABCDEFGHIJKLMNOPQRSTUVWXYZ
|
||||
// 1 [26..51] [97..122] +71 1 abcdefghijklmnopqrstuvwxyz
|
||||
// 2 [52..61] [48..57] -4 [2..11] 0123456789
|
||||
// 3 [62] [43] -19 12 +
|
||||
// 4 [63] [47] -16 13 /
|
||||
|
||||
// Create LUT indices from the input. The index for range #0 is right,
|
||||
// others are 1 less than expected:
|
||||
__m256i indices = _mm256_subs_epu8(in, _mm256_set1_epi8(51));
|
||||
|
||||
// mask is 0xFF (-1) for range #[1..4] and 0x00 for range #0:
|
||||
const __m256i mask = _mm256_cmpgt_epi8(in, _mm256_set1_epi8(25));
|
||||
|
||||
// Subtract -1, so add 1 to indices for range #[1..4]. All indices are
|
||||
// now correct:
|
||||
indices = _mm256_sub_epi8(indices, mask);
|
||||
|
||||
// Add offsets to input values:
|
||||
return _mm256_add_epi8(in, _mm256_shuffle_epi8(lut, indices));
|
||||
}
|
42
deps/base64/base64/lib/arch/avx512/codec.c
vendored
42
deps/base64/base64/lib/arch/avx512/codec.c
vendored
|
@ -1,42 +0,0 @@
|
|||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "../../../include/libbase64.h"
|
||||
#include "../../tables/tables.h"
|
||||
#include "../../codecs.h"
|
||||
#include "config.h"
|
||||
#include "../../env.h"
|
||||
|
||||
#if HAVE_AVX512
|
||||
#include <immintrin.h>
|
||||
|
||||
#include "../avx2/dec_reshuffle.c"
|
||||
#include "../avx2/dec_loop.c"
|
||||
#include "enc_reshuffle_translate.c"
|
||||
#include "enc_loop.c"
|
||||
|
||||
#endif // HAVE_AVX512
|
||||
|
||||
BASE64_ENC_FUNCTION(avx512)
|
||||
{
|
||||
#if HAVE_AVX512
|
||||
#include "../generic/enc_head.c"
|
||||
enc_loop_avx512(&s, &slen, &o, &olen);
|
||||
#include "../generic/enc_tail.c"
|
||||
#else
|
||||
BASE64_ENC_STUB
|
||||
#endif
|
||||
}
|
||||
|
||||
// Reuse AVX2 decoding. Not supporting AVX512 at present
|
||||
BASE64_DEC_FUNCTION(avx512)
|
||||
{
|
||||
#if HAVE_AVX512
|
||||
#include "../generic/dec_head.c"
|
||||
dec_loop_avx2(&s, &slen, &o, &olen);
|
||||
#include "../generic/dec_tail.c"
|
||||
#else
|
||||
BASE64_DEC_STUB
|
||||
#endif
|
||||
}
|
61
deps/base64/base64/lib/arch/avx512/enc_loop.c
vendored
61
deps/base64/base64/lib/arch/avx512/enc_loop.c
vendored
|
@ -1,61 +0,0 @@
|
|||
static inline void
|
||||
enc_loop_avx512_inner (const uint8_t **s, uint8_t **o)
|
||||
{
|
||||
// Load input.
|
||||
__m512i src = _mm512_loadu_si512((__m512i *) *s);
|
||||
|
||||
// Reshuffle, translate, store.
|
||||
src = enc_reshuffle_translate(src);
|
||||
_mm512_storeu_si512((__m512i *) *o, src);
|
||||
|
||||
*s += 48;
|
||||
*o += 64;
|
||||
}
|
||||
|
||||
static inline void
|
||||
enc_loop_avx512 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
|
||||
{
|
||||
if (*slen < 64) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Process blocks of 48 bytes at a time. Because blocks are loaded 64
|
||||
// bytes at a time, ensure that there will be at least 24 remaining
|
||||
// bytes after the last round, so that the final read will not pass
|
||||
// beyond the bounds of the input buffer.
|
||||
size_t rounds = (*slen - 24) / 48;
|
||||
|
||||
*slen -= rounds * 48; // 48 bytes consumed per round
|
||||
*olen += rounds * 64; // 64 bytes produced per round
|
||||
|
||||
while (rounds > 0) {
|
||||
if (rounds >= 8) {
|
||||
enc_loop_avx512_inner(s, o);
|
||||
enc_loop_avx512_inner(s, o);
|
||||
enc_loop_avx512_inner(s, o);
|
||||
enc_loop_avx512_inner(s, o);
|
||||
enc_loop_avx512_inner(s, o);
|
||||
enc_loop_avx512_inner(s, o);
|
||||
enc_loop_avx512_inner(s, o);
|
||||
enc_loop_avx512_inner(s, o);
|
||||
rounds -= 8;
|
||||
continue;
|
||||
}
|
||||
if (rounds >= 4) {
|
||||
enc_loop_avx512_inner(s, o);
|
||||
enc_loop_avx512_inner(s, o);
|
||||
enc_loop_avx512_inner(s, o);
|
||||
enc_loop_avx512_inner(s, o);
|
||||
rounds -= 4;
|
||||
continue;
|
||||
}
|
||||
if (rounds >= 2) {
|
||||
enc_loop_avx512_inner(s, o);
|
||||
enc_loop_avx512_inner(s, o);
|
||||
rounds -= 2;
|
||||
continue;
|
||||
}
|
||||
enc_loop_avx512_inner(s, o);
|
||||
break;
|
||||
}
|
||||
}
|
|
@ -1,50 +0,0 @@
|
|||
// AVX512 algorithm is based on permutevar and multishift. The code is based on
|
||||
// https://github.com/WojciechMula/base64simd which is under BSD-2 license.
|
||||
|
||||
static inline __m512i
|
||||
enc_reshuffle_translate (const __m512i input)
|
||||
{
|
||||
// 32-bit input
|
||||
// [ 0 0 0 0 0 0 0 0|c1 c0 d5 d4 d3 d2 d1 d0|
|
||||
// b3 b2 b1 b0 c5 c4 c3 c2|a5 a4 a3 a2 a1 a0 b5 b4]
|
||||
// output order [1, 2, 0, 1]
|
||||
// [b3 b2 b1 b0 c5 c4 c3 c2|c1 c0 d5 d4 d3 d2 d1 d0|
|
||||
// a5 a4 a3 a2 a1 a0 b5 b4|b3 b2 b1 b0 c3 c2 c1 c0]
|
||||
|
||||
const __m512i shuffle_input = _mm512_setr_epi32(0x01020001,
|
||||
0x04050304,
|
||||
0x07080607,
|
||||
0x0a0b090a,
|
||||
0x0d0e0c0d,
|
||||
0x10110f10,
|
||||
0x13141213,
|
||||
0x16171516,
|
||||
0x191a1819,
|
||||
0x1c1d1b1c,
|
||||
0x1f201e1f,
|
||||
0x22232122,
|
||||
0x25262425,
|
||||
0x28292728,
|
||||
0x2b2c2a2b,
|
||||
0x2e2f2d2e);
|
||||
|
||||
// Reorder bytes
|
||||
// [b3 b2 b1 b0 c5 c4 c3 c2|c1 c0 d5 d4 d3 d2 d1 d0|
|
||||
// a5 a4 a3 a2 a1 a0 b5 b4|b3 b2 b1 b0 c3 c2 c1 c0]
|
||||
const __m512i in = _mm512_permutexvar_epi8(shuffle_input, input);
|
||||
|
||||
// After multishift a single 32-bit lane has following layout
|
||||
// [c1 c0 d5 d4 d3 d2 d1 d0|b1 b0 c5 c4 c3 c2 c1 c0|
|
||||
// a1 a0 b5 b4 b3 b2 b1 b0|d1 d0 a5 a4 a3 a2 a1 a0]
|
||||
// (a = [10:17], b = [4:11], c = [22:27], d = [16:21])
|
||||
|
||||
// 48, 54, 36, 42, 16, 22, 4, 10
|
||||
const __m512i shifts = _mm512_set1_epi64(0x3036242a1016040alu);
|
||||
__m512i shuffled_in = _mm512_multishift_epi64_epi8(shifts, in);
|
||||
|
||||
// Translate immediatedly after reshuffled.
|
||||
const __m512i lookup = _mm512_loadu_si512(base64_table_enc_6bit);
|
||||
|
||||
// Translation 6-bit values to ASCII.
|
||||
return _mm512_permutexvar_epi8(shuffled_in, lookup);
|
||||
}
|
|
@ -1,86 +0,0 @@
|
|||
static inline int
|
||||
dec_loop_generic_32_inner (const uint8_t **s, uint8_t **o, size_t *rounds)
|
||||
{
|
||||
const uint32_t str
|
||||
= base64_table_dec_32bit_d0[(*s)[0]]
|
||||
| base64_table_dec_32bit_d1[(*s)[1]]
|
||||
| base64_table_dec_32bit_d2[(*s)[2]]
|
||||
| base64_table_dec_32bit_d3[(*s)[3]];
|
||||
|
||||
#if BASE64_LITTLE_ENDIAN
|
||||
|
||||
// LUTs for little-endian set MSB in case of invalid character:
|
||||
if (str & UINT32_C(0x80000000)) {
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
// LUTs for big-endian set LSB in case of invalid character:
|
||||
if (str & UINT32_C(1)) {
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
// Store the output:
|
||||
memcpy(*o, &str, sizeof (str));
|
||||
|
||||
*s += 4;
|
||||
*o += 3;
|
||||
*rounds -= 1;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline void
|
||||
dec_loop_generic_32 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
|
||||
{
|
||||
if (*slen < 8) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Process blocks of 4 bytes per round. Because one extra zero byte is
|
||||
// written after the output, ensure that there will be at least 4 bytes
|
||||
// of input data left to cover the gap. (Two data bytes and up to two
|
||||
// end-of-string markers.)
|
||||
size_t rounds = (*slen - 4) / 4;
|
||||
|
||||
*slen -= rounds * 4; // 4 bytes consumed per round
|
||||
*olen += rounds * 3; // 3 bytes produced per round
|
||||
|
||||
do {
|
||||
if (rounds >= 8) {
|
||||
if (dec_loop_generic_32_inner(s, o, &rounds) &&
|
||||
dec_loop_generic_32_inner(s, o, &rounds) &&
|
||||
dec_loop_generic_32_inner(s, o, &rounds) &&
|
||||
dec_loop_generic_32_inner(s, o, &rounds) &&
|
||||
dec_loop_generic_32_inner(s, o, &rounds) &&
|
||||
dec_loop_generic_32_inner(s, o, &rounds) &&
|
||||
dec_loop_generic_32_inner(s, o, &rounds) &&
|
||||
dec_loop_generic_32_inner(s, o, &rounds)) {
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (rounds >= 4) {
|
||||
if (dec_loop_generic_32_inner(s, o, &rounds) &&
|
||||
dec_loop_generic_32_inner(s, o, &rounds) &&
|
||||
dec_loop_generic_32_inner(s, o, &rounds) &&
|
||||
dec_loop_generic_32_inner(s, o, &rounds)) {
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (rounds >= 2) {
|
||||
if (dec_loop_generic_32_inner(s, o, &rounds) &&
|
||||
dec_loop_generic_32_inner(s, o, &rounds)) {
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
dec_loop_generic_32_inner(s, o, &rounds);
|
||||
break;
|
||||
|
||||
} while (rounds > 0);
|
||||
|
||||
// Adjust for any rounds that were skipped:
|
||||
*slen += rounds * 4;
|
||||
*olen -= rounds * 3;
|
||||
}
|
|
@ -1,73 +0,0 @@
|
|||
static inline void
|
||||
enc_loop_generic_32_inner (const uint8_t **s, uint8_t **o)
|
||||
{
|
||||
uint32_t src;
|
||||
|
||||
// Load input:
|
||||
memcpy(&src, *s, sizeof (src));
|
||||
|
||||
// Reorder to 32-bit big-endian, if not already in that format. The
|
||||
// workset must be in big-endian, otherwise the shifted bits do not
|
||||
// carry over properly among adjacent bytes:
|
||||
src = BASE64_HTOBE32(src);
|
||||
|
||||
// Two indices for the 12-bit lookup table:
|
||||
const size_t index0 = (src >> 20) & 0xFFFU;
|
||||
const size_t index1 = (src >> 8) & 0xFFFU;
|
||||
|
||||
// Table lookup and store:
|
||||
memcpy(*o + 0, base64_table_enc_12bit + index0, 2);
|
||||
memcpy(*o + 2, base64_table_enc_12bit + index1, 2);
|
||||
|
||||
*s += 3;
|
||||
*o += 4;
|
||||
}
|
||||
|
||||
static inline void
|
||||
enc_loop_generic_32 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
|
||||
{
|
||||
if (*slen < 4) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Process blocks of 3 bytes at a time. Because blocks are loaded 4
|
||||
// bytes at a time, ensure that there will be at least one remaining
|
||||
// byte after the last round, so that the final read will not pass
|
||||
// beyond the bounds of the input buffer:
|
||||
size_t rounds = (*slen - 1) / 3;
|
||||
|
||||
*slen -= rounds * 3; // 3 bytes consumed per round
|
||||
*olen += rounds * 4; // 4 bytes produced per round
|
||||
|
||||
do {
|
||||
if (rounds >= 8) {
|
||||
enc_loop_generic_32_inner(s, o);
|
||||
enc_loop_generic_32_inner(s, o);
|
||||
enc_loop_generic_32_inner(s, o);
|
||||
enc_loop_generic_32_inner(s, o);
|
||||
enc_loop_generic_32_inner(s, o);
|
||||
enc_loop_generic_32_inner(s, o);
|
||||
enc_loop_generic_32_inner(s, o);
|
||||
enc_loop_generic_32_inner(s, o);
|
||||
rounds -= 8;
|
||||
continue;
|
||||
}
|
||||
if (rounds >= 4) {
|
||||
enc_loop_generic_32_inner(s, o);
|
||||
enc_loop_generic_32_inner(s, o);
|
||||
enc_loop_generic_32_inner(s, o);
|
||||
enc_loop_generic_32_inner(s, o);
|
||||
rounds -= 4;
|
||||
continue;
|
||||
}
|
||||
if (rounds >= 2) {
|
||||
enc_loop_generic_32_inner(s, o);
|
||||
enc_loop_generic_32_inner(s, o);
|
||||
rounds -= 2;
|
||||
continue;
|
||||
}
|
||||
enc_loop_generic_32_inner(s, o);
|
||||
break;
|
||||
|
||||
} while (rounds > 0);
|
||||
}
|
|
@ -1,77 +0,0 @@
|
|||
static inline void
|
||||
enc_loop_generic_64_inner (const uint8_t **s, uint8_t **o)
|
||||
{
|
||||
uint64_t src;
|
||||
|
||||
// Load input:
|
||||
memcpy(&src, *s, sizeof (src));
|
||||
|
||||
// Reorder to 64-bit big-endian, if not already in that format. The
|
||||
// workset must be in big-endian, otherwise the shifted bits do not
|
||||
// carry over properly among adjacent bytes:
|
||||
src = BASE64_HTOBE64(src);
|
||||
|
||||
// Four indices for the 12-bit lookup table:
|
||||
const size_t index0 = (src >> 52) & 0xFFFU;
|
||||
const size_t index1 = (src >> 40) & 0xFFFU;
|
||||
const size_t index2 = (src >> 28) & 0xFFFU;
|
||||
const size_t index3 = (src >> 16) & 0xFFFU;
|
||||
|
||||
// Table lookup and store:
|
||||
memcpy(*o + 0, base64_table_enc_12bit + index0, 2);
|
||||
memcpy(*o + 2, base64_table_enc_12bit + index1, 2);
|
||||
memcpy(*o + 4, base64_table_enc_12bit + index2, 2);
|
||||
memcpy(*o + 6, base64_table_enc_12bit + index3, 2);
|
||||
|
||||
*s += 6;
|
||||
*o += 8;
|
||||
}
|
||||
|
||||
static inline void
|
||||
enc_loop_generic_64 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
|
||||
{
|
||||
if (*slen < 8) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Process blocks of 6 bytes at a time. Because blocks are loaded 8
|
||||
// bytes at a time, ensure that there will be at least 2 remaining
|
||||
// bytes after the last round, so that the final read will not pass
|
||||
// beyond the bounds of the input buffer:
|
||||
size_t rounds = (*slen - 2) / 6;
|
||||
|
||||
*slen -= rounds * 6; // 6 bytes consumed per round
|
||||
*olen += rounds * 8; // 8 bytes produced per round
|
||||
|
||||
do {
|
||||
if (rounds >= 8) {
|
||||
enc_loop_generic_64_inner(s, o);
|
||||
enc_loop_generic_64_inner(s, o);
|
||||
enc_loop_generic_64_inner(s, o);
|
||||
enc_loop_generic_64_inner(s, o);
|
||||
enc_loop_generic_64_inner(s, o);
|
||||
enc_loop_generic_64_inner(s, o);
|
||||
enc_loop_generic_64_inner(s, o);
|
||||
enc_loop_generic_64_inner(s, o);
|
||||
rounds -= 8;
|
||||
continue;
|
||||
}
|
||||
if (rounds >= 4) {
|
||||
enc_loop_generic_64_inner(s, o);
|
||||
enc_loop_generic_64_inner(s, o);
|
||||
enc_loop_generic_64_inner(s, o);
|
||||
enc_loop_generic_64_inner(s, o);
|
||||
rounds -= 4;
|
||||
continue;
|
||||
}
|
||||
if (rounds >= 2) {
|
||||
enc_loop_generic_64_inner(s, o);
|
||||
enc_loop_generic_64_inner(s, o);
|
||||
rounds -= 2;
|
||||
continue;
|
||||
}
|
||||
enc_loop_generic_64_inner(s, o);
|
||||
break;
|
||||
|
||||
} while (rounds > 0);
|
||||
}
|
39
deps/base64/base64/lib/arch/generic/codec.c
vendored
39
deps/base64/base64/lib/arch/generic/codec.c
vendored
|
@ -1,39 +0,0 @@
|
|||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "../../../include/libbase64.h"
|
||||
#include "../../tables/tables.h"
|
||||
#include "../../codecs.h"
|
||||
#include "config.h"
|
||||
#include "../../env.h"
|
||||
|
||||
#if BASE64_WORDSIZE == 32
|
||||
# include "32/enc_loop.c"
|
||||
#elif BASE64_WORDSIZE == 64
|
||||
# include "64/enc_loop.c"
|
||||
#endif
|
||||
|
||||
#if BASE64_WORDSIZE >= 32
|
||||
# include "32/dec_loop.c"
|
||||
#endif
|
||||
|
||||
BASE64_ENC_FUNCTION(plain)
|
||||
{
|
||||
#include "enc_head.c"
|
||||
#if BASE64_WORDSIZE == 32
|
||||
enc_loop_generic_32(&s, &slen, &o, &olen);
|
||||
#elif BASE64_WORDSIZE == 64
|
||||
enc_loop_generic_64(&s, &slen, &o, &olen);
|
||||
#endif
|
||||
#include "enc_tail.c"
|
||||
}
|
||||
|
||||
BASE64_DEC_FUNCTION(plain)
|
||||
{
|
||||
#include "dec_head.c"
|
||||
#if BASE64_WORDSIZE >= 32
|
||||
dec_loop_generic_32(&s, &slen, &o, &olen);
|
||||
#endif
|
||||
#include "dec_tail.c"
|
||||
}
|
37
deps/base64/base64/lib/arch/generic/dec_head.c
vendored
37
deps/base64/base64/lib/arch/generic/dec_head.c
vendored
|
@ -1,37 +0,0 @@
|
|||
int ret = 0;
|
||||
const uint8_t *s = (const uint8_t *) src;
|
||||
uint8_t *o = (uint8_t *) out;
|
||||
uint8_t q;
|
||||
|
||||
// Use local temporaries to avoid cache thrashing:
|
||||
size_t olen = 0;
|
||||
size_t slen = srclen;
|
||||
struct base64_state st;
|
||||
st.eof = state->eof;
|
||||
st.bytes = state->bytes;
|
||||
st.carry = state->carry;
|
||||
|
||||
// If we previously saw an EOF or an invalid character, bail out:
|
||||
if (st.eof) {
|
||||
*outlen = 0;
|
||||
ret = 0;
|
||||
// If there was a trailing '=' to check, check it:
|
||||
if (slen && (st.eof == BASE64_AEOF)) {
|
||||
state->bytes = 0;
|
||||
state->eof = BASE64_EOF;
|
||||
ret = ((base64_table_dec_8bit[*s++] == 254) && (slen == 1)) ? 1 : 0;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Turn four 6-bit numbers into three bytes:
|
||||
// out[0] = 11111122
|
||||
// out[1] = 22223333
|
||||
// out[2] = 33444444
|
||||
|
||||
// Duff's device again:
|
||||
switch (st.bytes)
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
case 0:
|
91
deps/base64/base64/lib/arch/generic/dec_tail.c
vendored
91
deps/base64/base64/lib/arch/generic/dec_tail.c
vendored
|
@ -1,91 +0,0 @@
|
|||
if (slen-- == 0) {
|
||||
ret = 1;
|
||||
break;
|
||||
}
|
||||
if ((q = base64_table_dec_8bit[*s++]) >= 254) {
|
||||
st.eof = BASE64_EOF;
|
||||
// Treat character '=' as invalid for byte 0:
|
||||
break;
|
||||
}
|
||||
st.carry = q << 2;
|
||||
st.bytes++;
|
||||
|
||||
// Deliberate fallthrough:
|
||||
BASE64_FALLTHROUGH
|
||||
|
||||
case 1: if (slen-- == 0) {
|
||||
ret = 1;
|
||||
break;
|
||||
}
|
||||
if ((q = base64_table_dec_8bit[*s++]) >= 254) {
|
||||
st.eof = BASE64_EOF;
|
||||
// Treat character '=' as invalid for byte 1:
|
||||
break;
|
||||
}
|
||||
*o++ = st.carry | (q >> 4);
|
||||
st.carry = q << 4;
|
||||
st.bytes++;
|
||||
olen++;
|
||||
|
||||
// Deliberate fallthrough:
|
||||
BASE64_FALLTHROUGH
|
||||
|
||||
case 2: if (slen-- == 0) {
|
||||
ret = 1;
|
||||
break;
|
||||
}
|
||||
if ((q = base64_table_dec_8bit[*s++]) >= 254) {
|
||||
st.bytes++;
|
||||
// When q == 254, the input char is '='.
|
||||
// Check if next byte is also '=':
|
||||
if (q == 254) {
|
||||
if (slen-- != 0) {
|
||||
st.bytes = 0;
|
||||
// EOF:
|
||||
st.eof = BASE64_EOF;
|
||||
q = base64_table_dec_8bit[*s++];
|
||||
ret = ((q == 254) && (slen == 0)) ? 1 : 0;
|
||||
break;
|
||||
}
|
||||
else {
|
||||
// Almost EOF
|
||||
st.eof = BASE64_AEOF;
|
||||
ret = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// If we get here, there was an error:
|
||||
break;
|
||||
}
|
||||
*o++ = st.carry | (q >> 2);
|
||||
st.carry = q << 6;
|
||||
st.bytes++;
|
||||
olen++;
|
||||
|
||||
// Deliberate fallthrough:
|
||||
BASE64_FALLTHROUGH
|
||||
|
||||
case 3: if (slen-- == 0) {
|
||||
ret = 1;
|
||||
break;
|
||||
}
|
||||
if ((q = base64_table_dec_8bit[*s++]) >= 254) {
|
||||
st.bytes = 0;
|
||||
st.eof = BASE64_EOF;
|
||||
// When q == 254, the input char is '='. Return 1 and EOF.
|
||||
// When q == 255, the input char is invalid. Return 0 and EOF.
|
||||
ret = ((q == 254) && (slen == 0)) ? 1 : 0;
|
||||
break;
|
||||
}
|
||||
*o++ = st.carry | q;
|
||||
st.carry = 0;
|
||||
st.bytes = 0;
|
||||
olen++;
|
||||
}
|
||||
}
|
||||
|
||||
state->eof = st.eof;
|
||||
state->bytes = st.bytes;
|
||||
state->carry = st.carry;
|
||||
*outlen = olen;
|
||||
return ret;
|
24
deps/base64/base64/lib/arch/generic/enc_head.c
vendored
24
deps/base64/base64/lib/arch/generic/enc_head.c
vendored
|
@ -1,24 +0,0 @@
|
|||
// Assume that *out is large enough to contain the output.
|
||||
// Theoretically it should be 4/3 the length of src.
|
||||
const uint8_t *s = (const uint8_t *) src;
|
||||
uint8_t *o = (uint8_t *) out;
|
||||
|
||||
// Use local temporaries to avoid cache thrashing:
|
||||
size_t olen = 0;
|
||||
size_t slen = srclen;
|
||||
struct base64_state st;
|
||||
st.bytes = state->bytes;
|
||||
st.carry = state->carry;
|
||||
|
||||
// Turn three bytes into four 6-bit numbers:
|
||||
// in[0] = 00111111
|
||||
// in[1] = 00112222
|
||||
// in[2] = 00222233
|
||||
// in[3] = 00333333
|
||||
|
||||
// Duff's device, a for() loop inside a switch() statement. Legal!
|
||||
switch (st.bytes)
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
case 0:
|
34
deps/base64/base64/lib/arch/generic/enc_tail.c
vendored
34
deps/base64/base64/lib/arch/generic/enc_tail.c
vendored
|
@ -1,34 +0,0 @@
|
|||
if (slen-- == 0) {
|
||||
break;
|
||||
}
|
||||
*o++ = base64_table_enc_6bit[*s >> 2];
|
||||
st.carry = (*s++ << 4) & 0x30;
|
||||
st.bytes++;
|
||||
olen += 1;
|
||||
|
||||
// Deliberate fallthrough:
|
||||
BASE64_FALLTHROUGH
|
||||
|
||||
case 1: if (slen-- == 0) {
|
||||
break;
|
||||
}
|
||||
*o++ = base64_table_enc_6bit[st.carry | (*s >> 4)];
|
||||
st.carry = (*s++ << 2) & 0x3C;
|
||||
st.bytes++;
|
||||
olen += 1;
|
||||
|
||||
// Deliberate fallthrough:
|
||||
BASE64_FALLTHROUGH
|
||||
|
||||
case 2: if (slen-- == 0) {
|
||||
break;
|
||||
}
|
||||
*o++ = base64_table_enc_6bit[st.carry | (*s >> 6)];
|
||||
*o++ = base64_table_enc_6bit[*s++ & 0x3F];
|
||||
st.bytes = 0;
|
||||
olen += 2;
|
||||
}
|
||||
}
|
||||
state->bytes = st.bytes;
|
||||
state->carry = st.carry;
|
||||
*outlen = olen;
|
77
deps/base64/base64/lib/arch/neon32/codec.c
vendored
77
deps/base64/base64/lib/arch/neon32/codec.c
vendored
|
@ -1,77 +0,0 @@
|
|||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "../../../include/libbase64.h"
|
||||
#include "../../tables/tables.h"
|
||||
#include "../../codecs.h"
|
||||
#include "config.h"
|
||||
#include "../../env.h"
|
||||
|
||||
#ifdef __arm__
|
||||
# if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && HAVE_NEON32
|
||||
# define BASE64_USE_NEON32
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef BASE64_USE_NEON32
|
||||
#include <arm_neon.h>
|
||||
|
||||
// Only enable inline assembly on supported compilers.
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
#define BASE64_NEON32_USE_ASM
|
||||
#endif
|
||||
|
||||
static inline uint8x16_t
|
||||
vqtbl1q_u8 (const uint8x16_t lut, const uint8x16_t indices)
|
||||
{
|
||||
// NEON32 only supports 64-bit wide lookups in 128-bit tables. Emulate
|
||||
// the NEON64 `vqtbl1q_u8` intrinsic to do 128-bit wide lookups.
|
||||
uint8x8x2_t lut2;
|
||||
uint8x8x2_t result;
|
||||
|
||||
lut2.val[0] = vget_low_u8(lut);
|
||||
lut2.val[1] = vget_high_u8(lut);
|
||||
|
||||
result.val[0] = vtbl2_u8(lut2, vget_low_u8(indices));
|
||||
result.val[1] = vtbl2_u8(lut2, vget_high_u8(indices));
|
||||
|
||||
return vcombine_u8(result.val[0], result.val[1]);
|
||||
}
|
||||
|
||||
#include "../generic/32/dec_loop.c"
|
||||
#include "../generic/32/enc_loop.c"
|
||||
#include "dec_loop.c"
|
||||
#include "enc_reshuffle.c"
|
||||
#include "enc_translate.c"
|
||||
#include "enc_loop.c"
|
||||
|
||||
#endif // BASE64_USE_NEON32
|
||||
|
||||
// Stride size is so large on these NEON 32-bit functions
|
||||
// (48 bytes encode, 32 bytes decode) that we inline the
|
||||
// uint32 codec to stay performant on smaller inputs.
|
||||
|
||||
BASE64_ENC_FUNCTION(neon32)
|
||||
{
|
||||
#ifdef BASE64_USE_NEON32
|
||||
#include "../generic/enc_head.c"
|
||||
enc_loop_neon32(&s, &slen, &o, &olen);
|
||||
enc_loop_generic_32(&s, &slen, &o, &olen);
|
||||
#include "../generic/enc_tail.c"
|
||||
#else
|
||||
BASE64_ENC_STUB
|
||||
#endif
|
||||
}
|
||||
|
||||
BASE64_DEC_FUNCTION(neon32)
|
||||
{
|
||||
#ifdef BASE64_USE_NEON32
|
||||
#include "../generic/dec_head.c"
|
||||
dec_loop_neon32(&s, &slen, &o, &olen);
|
||||
dec_loop_generic_32(&s, &slen, &o, &olen);
|
||||
#include "../generic/dec_tail.c"
|
||||
#else
|
||||
BASE64_DEC_STUB
|
||||
#endif
|
||||
}
|
106
deps/base64/base64/lib/arch/neon32/dec_loop.c
vendored
106
deps/base64/base64/lib/arch/neon32/dec_loop.c
vendored
|
@ -1,106 +0,0 @@
|
|||
static inline int
|
||||
is_nonzero (const uint8x16_t v)
|
||||
{
|
||||
uint64_t u64;
|
||||
const uint64x2_t v64 = vreinterpretq_u64_u8(v);
|
||||
const uint32x2_t v32 = vqmovn_u64(v64);
|
||||
|
||||
vst1_u64(&u64, vreinterpret_u64_u32(v32));
|
||||
return u64 != 0;
|
||||
}
|
||||
|
||||
static inline uint8x16_t
|
||||
delta_lookup (const uint8x16_t v)
|
||||
{
|
||||
const uint8x8_t lut = {
|
||||
0, 16, 19, 4, (uint8_t) -65, (uint8_t) -65, (uint8_t) -71, (uint8_t) -71,
|
||||
};
|
||||
|
||||
return vcombine_u8(
|
||||
vtbl1_u8(lut, vget_low_u8(v)),
|
||||
vtbl1_u8(lut, vget_high_u8(v)));
|
||||
}
|
||||
|
||||
static inline uint8x16_t
|
||||
dec_loop_neon32_lane (uint8x16_t *lane)
|
||||
{
|
||||
// See the SSSE3 decoder for an explanation of the algorithm.
|
||||
const uint8x16_t lut_lo = {
|
||||
0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,
|
||||
0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A
|
||||
};
|
||||
|
||||
const uint8x16_t lut_hi = {
|
||||
0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08,
|
||||
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10
|
||||
};
|
||||
|
||||
const uint8x16_t mask_0F = vdupq_n_u8(0x0F);
|
||||
const uint8x16_t mask_2F = vdupq_n_u8(0x2F);
|
||||
|
||||
const uint8x16_t hi_nibbles = vshrq_n_u8(*lane, 4);
|
||||
const uint8x16_t lo_nibbles = vandq_u8(*lane, mask_0F);
|
||||
const uint8x16_t eq_2F = vceqq_u8(*lane, mask_2F);
|
||||
|
||||
const uint8x16_t hi = vqtbl1q_u8(lut_hi, hi_nibbles);
|
||||
const uint8x16_t lo = vqtbl1q_u8(lut_lo, lo_nibbles);
|
||||
|
||||
// Now simply add the delta values to the input:
|
||||
*lane = vaddq_u8(*lane, delta_lookup(vaddq_u8(eq_2F, hi_nibbles)));
|
||||
|
||||
// Return the validity mask:
|
||||
return vandq_u8(lo, hi);
|
||||
}
|
||||
|
||||
static inline void
|
||||
dec_loop_neon32 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
|
||||
{
|
||||
if (*slen < 64) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Process blocks of 64 bytes per round. Unlike the SSE codecs, no
|
||||
// extra trailing zero bytes are written, so it is not necessary to
|
||||
// reserve extra input bytes:
|
||||
size_t rounds = *slen / 64;
|
||||
|
||||
*slen -= rounds * 64; // 64 bytes consumed per round
|
||||
*olen += rounds * 48; // 48 bytes produced per round
|
||||
|
||||
do {
|
||||
uint8x16x3_t dec;
|
||||
|
||||
// Load 64 bytes and deinterleave:
|
||||
uint8x16x4_t str = vld4q_u8(*s);
|
||||
|
||||
// Decode each lane, collect a mask of invalid inputs:
|
||||
const uint8x16_t classified
|
||||
= dec_loop_neon32_lane(&str.val[0])
|
||||
| dec_loop_neon32_lane(&str.val[1])
|
||||
| dec_loop_neon32_lane(&str.val[2])
|
||||
| dec_loop_neon32_lane(&str.val[3]);
|
||||
|
||||
// Check for invalid input: if any of the delta values are
|
||||
// zero, fall back on bytewise code to do error checking and
|
||||
// reporting:
|
||||
if (is_nonzero(classified)) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Compress four bytes into three:
|
||||
dec.val[0] = vorrq_u8(vshlq_n_u8(str.val[0], 2), vshrq_n_u8(str.val[1], 4));
|
||||
dec.val[1] = vorrq_u8(vshlq_n_u8(str.val[1], 4), vshrq_n_u8(str.val[2], 2));
|
||||
dec.val[2] = vorrq_u8(vshlq_n_u8(str.val[2], 6), str.val[3]);
|
||||
|
||||
// Interleave and store decoded result:
|
||||
vst3q_u8(*o, dec);
|
||||
|
||||
*s += 64;
|
||||
*o += 48;
|
||||
|
||||
} while (--rounds > 0);
|
||||
|
||||
// Adjust for any rounds that were skipped:
|
||||
*slen += rounds * 64;
|
||||
*olen -= rounds * 48;
|
||||
}
|
170
deps/base64/base64/lib/arch/neon32/enc_loop.c
vendored
170
deps/base64/base64/lib/arch/neon32/enc_loop.c
vendored
|
@ -1,170 +0,0 @@
|
|||
#ifdef BASE64_NEON32_USE_ASM
|
||||
static inline void
|
||||
enc_loop_neon32_inner_asm (const uint8_t **s, uint8_t **o)
|
||||
{
|
||||
// This function duplicates the functionality of enc_loop_neon32_inner,
|
||||
// but entirely with inline assembly. This gives a significant speedup
|
||||
// over using NEON intrinsics, which do not always generate very good
|
||||
// code. The logic of the assembly is directly lifted from the
|
||||
// intrinsics version, so it can be used as a guide to this code.
|
||||
|
||||
// Temporary registers, used as scratch space.
|
||||
uint8x16_t tmp0, tmp1, tmp2, tmp3;
|
||||
uint8x16_t mask0, mask1, mask2, mask3;
|
||||
|
||||
// A lookup table containing the absolute offsets for all ranges.
|
||||
const uint8x16_t lut = {
|
||||
65U, 71U, 252U, 252U,
|
||||
252U, 252U, 252U, 252U,
|
||||
252U, 252U, 252U, 252U,
|
||||
237U, 240U, 0U, 0U
|
||||
};
|
||||
|
||||
// Numeric constants.
|
||||
const uint8x16_t n51 = vdupq_n_u8(51);
|
||||
const uint8x16_t n25 = vdupq_n_u8(25);
|
||||
const uint8x16_t n63 = vdupq_n_u8(63);
|
||||
|
||||
__asm__ (
|
||||
|
||||
// Load 48 bytes and deinterleave. The bytes are loaded to
|
||||
// hard-coded registers q12, q13 and q14, to ensure that they
|
||||
// are contiguous. Increment the source pointer.
|
||||
"vld3.8 {d24, d26, d28}, [%[src]]! \n\t"
|
||||
"vld3.8 {d25, d27, d29}, [%[src]]! \n\t"
|
||||
|
||||
// Reshuffle the bytes using temporaries.
|
||||
"vshr.u8 %q[t0], q12, #2 \n\t"
|
||||
"vshr.u8 %q[t1], q13, #4 \n\t"
|
||||
"vshr.u8 %q[t2], q14, #6 \n\t"
|
||||
"vsli.8 %q[t1], q12, #4 \n\t"
|
||||
"vsli.8 %q[t2], q13, #2 \n\t"
|
||||
"vand.u8 %q[t1], %q[t1], %q[n63] \n\t"
|
||||
"vand.u8 %q[t2], %q[t2], %q[n63] \n\t"
|
||||
"vand.u8 %q[t3], q14, %q[n63] \n\t"
|
||||
|
||||
// t0..t3 are the reshuffled inputs. Create LUT indices.
|
||||
"vqsub.u8 q12, %q[t0], %q[n51] \n\t"
|
||||
"vqsub.u8 q13, %q[t1], %q[n51] \n\t"
|
||||
"vqsub.u8 q14, %q[t2], %q[n51] \n\t"
|
||||
"vqsub.u8 q15, %q[t3], %q[n51] \n\t"
|
||||
|
||||
// Create the mask for range #0.
|
||||
"vcgt.u8 %q[m0], %q[t0], %q[n25] \n\t"
|
||||
"vcgt.u8 %q[m1], %q[t1], %q[n25] \n\t"
|
||||
"vcgt.u8 %q[m2], %q[t2], %q[n25] \n\t"
|
||||
"vcgt.u8 %q[m3], %q[t3], %q[n25] \n\t"
|
||||
|
||||
// Subtract -1 to correct the LUT indices.
|
||||
"vsub.u8 q12, %q[m0] \n\t"
|
||||
"vsub.u8 q13, %q[m1] \n\t"
|
||||
"vsub.u8 q14, %q[m2] \n\t"
|
||||
"vsub.u8 q15, %q[m3] \n\t"
|
||||
|
||||
// Lookup the delta values.
|
||||
"vtbl.u8 d24, {%q[lut]}, d24 \n\t"
|
||||
"vtbl.u8 d25, {%q[lut]}, d25 \n\t"
|
||||
"vtbl.u8 d26, {%q[lut]}, d26 \n\t"
|
||||
"vtbl.u8 d27, {%q[lut]}, d27 \n\t"
|
||||
"vtbl.u8 d28, {%q[lut]}, d28 \n\t"
|
||||
"vtbl.u8 d29, {%q[lut]}, d29 \n\t"
|
||||
"vtbl.u8 d30, {%q[lut]}, d30 \n\t"
|
||||
"vtbl.u8 d31, {%q[lut]}, d31 \n\t"
|
||||
|
||||
// Add the delta values.
|
||||
"vadd.u8 q12, %q[t0] \n\t"
|
||||
"vadd.u8 q13, %q[t1] \n\t"
|
||||
"vadd.u8 q14, %q[t2] \n\t"
|
||||
"vadd.u8 q15, %q[t3] \n\t"
|
||||
|
||||
// Store 64 bytes and interleave. Increment the dest pointer.
|
||||
"vst4.8 {d24, d26, d28, d30}, [%[dst]]! \n\t"
|
||||
"vst4.8 {d25, d27, d29, d31}, [%[dst]]! \n\t"
|
||||
|
||||
// Outputs (modified).
|
||||
: [src] "+r" (*s),
|
||||
[dst] "+r" (*o),
|
||||
[t0] "=&w" (tmp0),
|
||||
[t1] "=&w" (tmp1),
|
||||
[t2] "=&w" (tmp2),
|
||||
[t3] "=&w" (tmp3),
|
||||
[m0] "=&w" (mask0),
|
||||
[m1] "=&w" (mask1),
|
||||
[m2] "=&w" (mask2),
|
||||
[m3] "=&w" (mask3)
|
||||
|
||||
// Inputs (not modified).
|
||||
: [lut] "w" (lut),
|
||||
[n25] "w" (n25),
|
||||
[n51] "w" (n51),
|
||||
[n63] "w" (n63)
|
||||
|
||||
// Clobbers.
|
||||
: "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31",
|
||||
"cc", "memory"
|
||||
);
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline void
|
||||
enc_loop_neon32_inner (const uint8_t **s, uint8_t **o)
|
||||
{
|
||||
#ifdef BASE64_NEON32_USE_ASM
|
||||
enc_loop_neon32_inner_asm(s, o);
|
||||
#else
|
||||
// Load 48 bytes and deinterleave:
|
||||
uint8x16x3_t src = vld3q_u8(*s);
|
||||
|
||||
// Reshuffle:
|
||||
uint8x16x4_t out = enc_reshuffle(src);
|
||||
|
||||
// Translate reshuffled bytes to the Base64 alphabet:
|
||||
out = enc_translate(out);
|
||||
|
||||
// Interleave and store output:
|
||||
vst4q_u8(*o, out);
|
||||
|
||||
*s += 48;
|
||||
*o += 64;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void
|
||||
enc_loop_neon32 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
|
||||
{
|
||||
size_t rounds = *slen / 48;
|
||||
|
||||
*slen -= rounds * 48; // 48 bytes consumed per round
|
||||
*olen += rounds * 64; // 64 bytes produced per round
|
||||
|
||||
while (rounds > 0) {
|
||||
if (rounds >= 8) {
|
||||
enc_loop_neon32_inner(s, o);
|
||||
enc_loop_neon32_inner(s, o);
|
||||
enc_loop_neon32_inner(s, o);
|
||||
enc_loop_neon32_inner(s, o);
|
||||
enc_loop_neon32_inner(s, o);
|
||||
enc_loop_neon32_inner(s, o);
|
||||
enc_loop_neon32_inner(s, o);
|
||||
enc_loop_neon32_inner(s, o);
|
||||
rounds -= 8;
|
||||
continue;
|
||||
}
|
||||
if (rounds >= 4) {
|
||||
enc_loop_neon32_inner(s, o);
|
||||
enc_loop_neon32_inner(s, o);
|
||||
enc_loop_neon32_inner(s, o);
|
||||
enc_loop_neon32_inner(s, o);
|
||||
rounds -= 4;
|
||||
continue;
|
||||
}
|
||||
if (rounds >= 2) {
|
||||
enc_loop_neon32_inner(s, o);
|
||||
enc_loop_neon32_inner(s, o);
|
||||
rounds -= 2;
|
||||
continue;
|
||||
}
|
||||
enc_loop_neon32_inner(s, o);
|
||||
break;
|
||||
}
|
||||
}
|
|
@ -1,31 +0,0 @@
|
|||
static inline uint8x16x4_t
|
||||
enc_reshuffle (uint8x16x3_t in)
|
||||
{
|
||||
uint8x16x4_t out;
|
||||
|
||||
// Input:
|
||||
// in[0] = a7 a6 a5 a4 a3 a2 a1 a0
|
||||
// in[1] = b7 b6 b5 b4 b3 b2 b1 b0
|
||||
// in[2] = c7 c6 c5 c4 c3 c2 c1 c0
|
||||
|
||||
// Output:
|
||||
// out[0] = 00 00 a7 a6 a5 a4 a3 a2
|
||||
// out[1] = 00 00 a1 a0 b7 b6 b5 b4
|
||||
// out[2] = 00 00 b3 b2 b1 b0 c7 c6
|
||||
// out[3] = 00 00 c5 c4 c3 c2 c1 c0
|
||||
|
||||
// Move the input bits to where they need to be in the outputs. Except
|
||||
// for the first output, the high two bits are not cleared.
|
||||
out.val[0] = vshrq_n_u8(in.val[0], 2);
|
||||
out.val[1] = vshrq_n_u8(in.val[1], 4);
|
||||
out.val[2] = vshrq_n_u8(in.val[2], 6);
|
||||
out.val[1] = vsliq_n_u8(out.val[1], in.val[0], 4);
|
||||
out.val[2] = vsliq_n_u8(out.val[2], in.val[1], 2);
|
||||
|
||||
// Clear the high two bits in the second, third and fourth output.
|
||||
out.val[1] = vandq_u8(out.val[1], vdupq_n_u8(0x3F));
|
||||
out.val[2] = vandq_u8(out.val[2], vdupq_n_u8(0x3F));
|
||||
out.val[3] = vandq_u8(in.val[2], vdupq_n_u8(0x3F));
|
||||
|
||||
return out;
|
||||
}
|
|
@ -1,57 +0,0 @@
|
|||
static inline uint8x16x4_t
|
||||
enc_translate (const uint8x16x4_t in)
|
||||
{
|
||||
// A lookup table containing the absolute offsets for all ranges:
|
||||
const uint8x16_t lut = {
|
||||
65U, 71U, 252U, 252U,
|
||||
252U, 252U, 252U, 252U,
|
||||
252U, 252U, 252U, 252U,
|
||||
237U, 240U, 0U, 0U
|
||||
};
|
||||
|
||||
const uint8x16_t offset = vdupq_n_u8(51);
|
||||
|
||||
uint8x16x4_t indices, mask, delta, out;
|
||||
|
||||
// Translate values 0..63 to the Base64 alphabet. There are five sets:
|
||||
// # From To Abs Index Characters
|
||||
// 0 [0..25] [65..90] +65 0 ABCDEFGHIJKLMNOPQRSTUVWXYZ
|
||||
// 1 [26..51] [97..122] +71 1 abcdefghijklmnopqrstuvwxyz
|
||||
// 2 [52..61] [48..57] -4 [2..11] 0123456789
|
||||
// 3 [62] [43] -19 12 +
|
||||
// 4 [63] [47] -16 13 /
|
||||
|
||||
// Create LUT indices from input:
|
||||
// the index for range #0 is right, others are 1 less than expected:
|
||||
indices.val[0] = vqsubq_u8(in.val[0], offset);
|
||||
indices.val[1] = vqsubq_u8(in.val[1], offset);
|
||||
indices.val[2] = vqsubq_u8(in.val[2], offset);
|
||||
indices.val[3] = vqsubq_u8(in.val[3], offset);
|
||||
|
||||
// mask is 0xFF (-1) for range #[1..4] and 0x00 for range #0:
|
||||
mask.val[0] = vcgtq_u8(in.val[0], vdupq_n_u8(25));
|
||||
mask.val[1] = vcgtq_u8(in.val[1], vdupq_n_u8(25));
|
||||
mask.val[2] = vcgtq_u8(in.val[2], vdupq_n_u8(25));
|
||||
mask.val[3] = vcgtq_u8(in.val[3], vdupq_n_u8(25));
|
||||
|
||||
// Subtract -1, so add 1 to indices for range #[1..4], All indices are
|
||||
// now correct:
|
||||
indices.val[0] = vsubq_u8(indices.val[0], mask.val[0]);
|
||||
indices.val[1] = vsubq_u8(indices.val[1], mask.val[1]);
|
||||
indices.val[2] = vsubq_u8(indices.val[2], mask.val[2]);
|
||||
indices.val[3] = vsubq_u8(indices.val[3], mask.val[3]);
|
||||
|
||||
// Lookup delta values:
|
||||
delta.val[0] = vqtbl1q_u8(lut, indices.val[0]);
|
||||
delta.val[1] = vqtbl1q_u8(lut, indices.val[1]);
|
||||
delta.val[2] = vqtbl1q_u8(lut, indices.val[2]);
|
||||
delta.val[3] = vqtbl1q_u8(lut, indices.val[3]);
|
||||
|
||||
// Add delta values:
|
||||
out.val[0] = vaddq_u8(in.val[0], delta.val[0]);
|
||||
out.val[1] = vaddq_u8(in.val[1], delta.val[1]);
|
||||
out.val[2] = vaddq_u8(in.val[2], delta.val[2]);
|
||||
out.val[3] = vaddq_u8(in.val[3], delta.val[3]);
|
||||
|
||||
return out;
|
||||
}
|
97
deps/base64/base64/lib/arch/neon64/codec.c
vendored
97
deps/base64/base64/lib/arch/neon64/codec.c
vendored
|
@ -1,97 +0,0 @@
|
|||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "../../../include/libbase64.h"
|
||||
#include "../../tables/tables.h"
|
||||
#include "../../codecs.h"
|
||||
#include "config.h"
|
||||
#include "../../env.h"
|
||||
|
||||
#ifdef __aarch64__
|
||||
# if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && HAVE_NEON64
|
||||
# define BASE64_USE_NEON64
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef BASE64_USE_NEON64
|
||||
#include <arm_neon.h>
|
||||
|
||||
// Only enable inline assembly on supported compilers.
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
#define BASE64_NEON64_USE_ASM
|
||||
#endif
|
||||
|
||||
static inline uint8x16x4_t
|
||||
load_64byte_table (const uint8_t *p)
|
||||
{
|
||||
#ifdef BASE64_NEON64_USE_ASM
|
||||
|
||||
// Force the table to be loaded into contiguous registers. GCC will not
|
||||
// normally allocate contiguous registers for a `uint8x16x4_t'. These
|
||||
// registers are chosen to not conflict with the ones in the enc loop.
|
||||
register uint8x16_t t0 __asm__ ("v8");
|
||||
register uint8x16_t t1 __asm__ ("v9");
|
||||
register uint8x16_t t2 __asm__ ("v10");
|
||||
register uint8x16_t t3 __asm__ ("v11");
|
||||
|
||||
__asm__ (
|
||||
"ld1 {%[t0].16b, %[t1].16b, %[t2].16b, %[t3].16b}, [%[src]], #64 \n\t"
|
||||
: [src] "+r" (p),
|
||||
[t0] "=w" (t0),
|
||||
[t1] "=w" (t1),
|
||||
[t2] "=w" (t2),
|
||||
[t3] "=w" (t3)
|
||||
);
|
||||
|
||||
return (uint8x16x4_t) {
|
||||
.val[0] = t0,
|
||||
.val[1] = t1,
|
||||
.val[2] = t2,
|
||||
.val[3] = t3,
|
||||
};
|
||||
#else
|
||||
return vld1q_u8_x4(p);
|
||||
#endif
|
||||
}
|
||||
|
||||
#include "../generic/32/dec_loop.c"
|
||||
#include "../generic/64/enc_loop.c"
|
||||
#include "dec_loop.c"
|
||||
|
||||
#ifdef BASE64_NEON64_USE_ASM
|
||||
# include "enc_loop_asm.c"
|
||||
#else
|
||||
# include "enc_reshuffle.c"
|
||||
# include "enc_loop.c"
|
||||
#endif
|
||||
|
||||
#endif // BASE64_USE_NEON64
|
||||
|
||||
// Stride size is so large on these NEON 64-bit functions
|
||||
// (48 bytes encode, 64 bytes decode) that we inline the
|
||||
// uint64 codec to stay performant on smaller inputs.
|
||||
|
||||
BASE64_ENC_FUNCTION(neon64)
|
||||
{
|
||||
#ifdef BASE64_USE_NEON64
|
||||
#include "../generic/enc_head.c"
|
||||
enc_loop_neon64(&s, &slen, &o, &olen);
|
||||
enc_loop_generic_64(&s, &slen, &o, &olen);
|
||||
#include "../generic/enc_tail.c"
|
||||
#else
|
||||
BASE64_ENC_STUB
|
||||
#endif
|
||||
}
|
||||
|
||||
BASE64_DEC_FUNCTION(neon64)
|
||||
{
|
||||
#ifdef BASE64_USE_NEON64
|
||||
#include "../generic/dec_head.c"
|
||||
dec_loop_neon64(&s, &slen, &o, &olen);
|
||||
dec_loop_generic_32(&s, &slen, &o, &olen);
|
||||
#include "../generic/dec_tail.c"
|
||||
#else
|
||||
BASE64_DEC_STUB
|
||||
#endif
|
||||
}
|
129
deps/base64/base64/lib/arch/neon64/dec_loop.c
vendored
129
deps/base64/base64/lib/arch/neon64/dec_loop.c
vendored
|
@ -1,129 +0,0 @@
|
|||
// The input consists of five valid character sets in the Base64 alphabet,
|
||||
// which we need to map back to the 6-bit values they represent.
|
||||
// There are three ranges, two singles, and then there's the rest.
|
||||
//
|
||||
// # From To LUT Characters
|
||||
// 1 [0..42] [255] #1 invalid input
|
||||
// 2 [43] [62] #1 +
|
||||
// 3 [44..46] [255] #1 invalid input
|
||||
// 4 [47] [63] #1 /
|
||||
// 5 [48..57] [52..61] #1 0..9
|
||||
// 6 [58..63] [255] #1 invalid input
|
||||
// 7 [64] [255] #2 invalid input
|
||||
// 8 [65..90] [0..25] #2 A..Z
|
||||
// 9 [91..96] [255] #2 invalid input
|
||||
// 10 [97..122] [26..51] #2 a..z
|
||||
// 11 [123..126] [255] #2 invalid input
|
||||
// (12) Everything else => invalid input
|
||||
|
||||
// The first LUT will use the VTBL instruction (out of range indices are set to
|
||||
// 0 in destination).
|
||||
static const uint8_t dec_lut1[] = {
|
||||
255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U,
|
||||
255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U,
|
||||
255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 62U, 255U, 255U, 255U, 63U,
|
||||
52U, 53U, 54U, 55U, 56U, 57U, 58U, 59U, 60U, 61U, 255U, 255U, 255U, 255U, 255U, 255U,
|
||||
};
|
||||
|
||||
// The second LUT will use the VTBX instruction (out of range indices will be
|
||||
// unchanged in destination). Input [64..126] will be mapped to index [1..63]
|
||||
// in this LUT. Index 0 means that value comes from LUT #1.
|
||||
static const uint8_t dec_lut2[] = {
|
||||
0U, 255U, 0U, 1U, 2U, 3U, 4U, 5U, 6U, 7U, 8U, 9U, 10U, 11U, 12U, 13U,
|
||||
14U, 15U, 16U, 17U, 18U, 19U, 20U, 21U, 22U, 23U, 24U, 25U, 255U, 255U, 255U, 255U,
|
||||
255U, 255U, 26U, 27U, 28U, 29U, 30U, 31U, 32U, 33U, 34U, 35U, 36U, 37U, 38U, 39U,
|
||||
40U, 41U, 42U, 43U, 44U, 45U, 46U, 47U, 48U, 49U, 50U, 51U, 255U, 255U, 255U, 255U,
|
||||
};
|
||||
|
||||
// All input values in range for the first look-up will be 0U in the second
|
||||
// look-up result. All input values out of range for the first look-up will be
|
||||
// 0U in the first look-up result. Thus, the two results can be ORed without
|
||||
// conflicts.
|
||||
//
|
||||
// Invalid characters that are in the valid range for either look-up will be
|
||||
// set to 255U in the combined result. Other invalid characters will just be
|
||||
// passed through with the second look-up result (using the VTBX instruction).
|
||||
// Since the second LUT is 64 bytes, those passed-through values are guaranteed
|
||||
// to have a value greater than 63U. Therefore, valid characters will be mapped
|
||||
// to the valid [0..63] range and all invalid characters will be mapped to
|
||||
// values greater than 63.
|
||||
|
||||
static inline void
|
||||
dec_loop_neon64 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
|
||||
{
|
||||
if (*slen < 64) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Process blocks of 64 bytes per round. Unlike the SSE codecs, no
|
||||
// extra trailing zero bytes are written, so it is not necessary to
|
||||
// reserve extra input bytes:
|
||||
size_t rounds = *slen / 64;
|
||||
|
||||
*slen -= rounds * 64; // 64 bytes consumed per round
|
||||
*olen += rounds * 48; // 48 bytes produced per round
|
||||
|
||||
const uint8x16x4_t tbl_dec1 = load_64byte_table(dec_lut1);
|
||||
const uint8x16x4_t tbl_dec2 = load_64byte_table(dec_lut2);
|
||||
|
||||
do {
|
||||
const uint8x16_t offset = vdupq_n_u8(63U);
|
||||
uint8x16x4_t dec1, dec2;
|
||||
uint8x16x3_t dec;
|
||||
|
||||
// Load 64 bytes and deinterleave:
|
||||
uint8x16x4_t str = vld4q_u8((uint8_t *) *s);
|
||||
|
||||
// Get indices for second LUT:
|
||||
dec2.val[0] = vqsubq_u8(str.val[0], offset);
|
||||
dec2.val[1] = vqsubq_u8(str.val[1], offset);
|
||||
dec2.val[2] = vqsubq_u8(str.val[2], offset);
|
||||
dec2.val[3] = vqsubq_u8(str.val[3], offset);
|
||||
|
||||
// Get values from first LUT:
|
||||
dec1.val[0] = vqtbl4q_u8(tbl_dec1, str.val[0]);
|
||||
dec1.val[1] = vqtbl4q_u8(tbl_dec1, str.val[1]);
|
||||
dec1.val[2] = vqtbl4q_u8(tbl_dec1, str.val[2]);
|
||||
dec1.val[3] = vqtbl4q_u8(tbl_dec1, str.val[3]);
|
||||
|
||||
// Get values from second LUT:
|
||||
dec2.val[0] = vqtbx4q_u8(dec2.val[0], tbl_dec2, dec2.val[0]);
|
||||
dec2.val[1] = vqtbx4q_u8(dec2.val[1], tbl_dec2, dec2.val[1]);
|
||||
dec2.val[2] = vqtbx4q_u8(dec2.val[2], tbl_dec2, dec2.val[2]);
|
||||
dec2.val[3] = vqtbx4q_u8(dec2.val[3], tbl_dec2, dec2.val[3]);
|
||||
|
||||
// Get final values:
|
||||
str.val[0] = vorrq_u8(dec1.val[0], dec2.val[0]);
|
||||
str.val[1] = vorrq_u8(dec1.val[1], dec2.val[1]);
|
||||
str.val[2] = vorrq_u8(dec1.val[2], dec2.val[2]);
|
||||
str.val[3] = vorrq_u8(dec1.val[3], dec2.val[3]);
|
||||
|
||||
// Check for invalid input, any value larger than 63:
|
||||
const uint8x16_t classified
|
||||
= vcgtq_u8(str.val[0], vdupq_n_u8(63))
|
||||
| vcgtq_u8(str.val[1], vdupq_n_u8(63))
|
||||
| vcgtq_u8(str.val[2], vdupq_n_u8(63))
|
||||
| vcgtq_u8(str.val[3], vdupq_n_u8(63));
|
||||
|
||||
// Check that all bits are zero:
|
||||
if (vmaxvq_u8(classified) != 0U) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Compress four bytes into three:
|
||||
dec.val[0] = vshlq_n_u8(str.val[0], 2) | vshrq_n_u8(str.val[1], 4);
|
||||
dec.val[1] = vshlq_n_u8(str.val[1], 4) | vshrq_n_u8(str.val[2], 2);
|
||||
dec.val[2] = vshlq_n_u8(str.val[2], 6) | str.val[3];
|
||||
|
||||
// Interleave and store decoded result:
|
||||
vst3q_u8((uint8_t *) *o, dec);
|
||||
|
||||
*s += 64;
|
||||
*o += 48;
|
||||
|
||||
} while (--rounds > 0);
|
||||
|
||||
// Adjust for any rounds that were skipped:
|
||||
*slen += rounds * 64;
|
||||
*olen -= rounds * 48;
|
||||
}
|
66
deps/base64/base64/lib/arch/neon64/enc_loop.c
vendored
66
deps/base64/base64/lib/arch/neon64/enc_loop.c
vendored
|
@ -1,66 +0,0 @@
|
|||
static inline void
|
||||
enc_loop_neon64_inner (const uint8_t **s, uint8_t **o, const uint8x16x4_t tbl_enc)
|
||||
{
|
||||
// Load 48 bytes and deinterleave:
|
||||
uint8x16x3_t src = vld3q_u8(*s);
|
||||
|
||||
// Divide bits of three input bytes over four output bytes:
|
||||
uint8x16x4_t out = enc_reshuffle(src);
|
||||
|
||||
// The bits have now been shifted to the right locations;
|
||||
// translate their values 0..63 to the Base64 alphabet.
|
||||
// Use a 64-byte table lookup:
|
||||
out.val[0] = vqtbl4q_u8(tbl_enc, out.val[0]);
|
||||
out.val[1] = vqtbl4q_u8(tbl_enc, out.val[1]);
|
||||
out.val[2] = vqtbl4q_u8(tbl_enc, out.val[2]);
|
||||
out.val[3] = vqtbl4q_u8(tbl_enc, out.val[3]);
|
||||
|
||||
// Interleave and store output:
|
||||
vst4q_u8(*o, out);
|
||||
|
||||
*s += 48;
|
||||
*o += 64;
|
||||
}
|
||||
|
||||
static inline void
|
||||
enc_loop_neon64 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
|
||||
{
|
||||
size_t rounds = *slen / 48;
|
||||
|
||||
*slen -= rounds * 48; // 48 bytes consumed per round
|
||||
*olen += rounds * 64; // 64 bytes produced per round
|
||||
|
||||
// Load the encoding table:
|
||||
const uint8x16x4_t tbl_enc = load_64byte_table(base64_table_enc_6bit);
|
||||
|
||||
while (rounds > 0) {
|
||||
if (rounds >= 8) {
|
||||
enc_loop_neon64_inner(s, o, tbl_enc);
|
||||
enc_loop_neon64_inner(s, o, tbl_enc);
|
||||
enc_loop_neon64_inner(s, o, tbl_enc);
|
||||
enc_loop_neon64_inner(s, o, tbl_enc);
|
||||
enc_loop_neon64_inner(s, o, tbl_enc);
|
||||
enc_loop_neon64_inner(s, o, tbl_enc);
|
||||
enc_loop_neon64_inner(s, o, tbl_enc);
|
||||
enc_loop_neon64_inner(s, o, tbl_enc);
|
||||
rounds -= 8;
|
||||
continue;
|
||||
}
|
||||
if (rounds >= 4) {
|
||||
enc_loop_neon64_inner(s, o, tbl_enc);
|
||||
enc_loop_neon64_inner(s, o, tbl_enc);
|
||||
enc_loop_neon64_inner(s, o, tbl_enc);
|
||||
enc_loop_neon64_inner(s, o, tbl_enc);
|
||||
rounds -= 4;
|
||||
continue;
|
||||
}
|
||||
if (rounds >= 2) {
|
||||
enc_loop_neon64_inner(s, o, tbl_enc);
|
||||
enc_loop_neon64_inner(s, o, tbl_enc);
|
||||
rounds -= 2;
|
||||
continue;
|
||||
}
|
||||
enc_loop_neon64_inner(s, o, tbl_enc);
|
||||
break;
|
||||
}
|
||||
}
|
168
deps/base64/base64/lib/arch/neon64/enc_loop_asm.c
vendored
168
deps/base64/base64/lib/arch/neon64/enc_loop_asm.c
vendored
|
@ -1,168 +0,0 @@
|
|||
// Apologies in advance for combining the preprocessor with inline assembly,
|
||||
// two notoriously gnarly parts of C, but it was necessary to avoid a lot of
|
||||
// code repetition. The preprocessor is used to template large sections of
|
||||
// inline assembly that differ only in the registers used. If the code was
|
||||
// written out by hand, it would become very large and hard to audit.
|
||||
|
||||
// Generate a block of inline assembly that loads three user-defined registers
|
||||
// A, B, C from memory and deinterleaves them, post-incrementing the src
|
||||
// pointer. The register set should be sequential.
|
||||
#define LOAD(A, B, C) \
|
||||
"ld3 {"A".16b, "B".16b, "C".16b}, [%[src]], #48 \n\t"
|
||||
|
||||
// Generate a block of inline assembly that takes three deinterleaved registers
|
||||
// and shuffles the bytes. The output is in temporary registers t0..t3.
|
||||
#define SHUF(A, B, C) \
|
||||
"ushr %[t0].16b, "A".16b, #2 \n\t" \
|
||||
"ushr %[t1].16b, "B".16b, #4 \n\t" \
|
||||
"ushr %[t2].16b, "C".16b, #6 \n\t" \
|
||||
"sli %[t1].16b, "A".16b, #4 \n\t" \
|
||||
"sli %[t2].16b, "B".16b, #2 \n\t" \
|
||||
"and %[t1].16b, %[t1].16b, %[n63].16b \n\t" \
|
||||
"and %[t2].16b, %[t2].16b, %[n63].16b \n\t" \
|
||||
"and %[t3].16b, "C".16b, %[n63].16b \n\t"
|
||||
|
||||
// Generate a block of inline assembly that takes temporary registers t0..t3
|
||||
// and translates them to the base64 alphabet, using a table loaded into
|
||||
// v8..v11. The output is in user-defined registers A..D.
|
||||
#define TRAN(A, B, C, D) \
|
||||
"tbl "A".16b, {v8.16b-v11.16b}, %[t0].16b \n\t" \
|
||||
"tbl "B".16b, {v8.16b-v11.16b}, %[t1].16b \n\t" \
|
||||
"tbl "C".16b, {v8.16b-v11.16b}, %[t2].16b \n\t" \
|
||||
"tbl "D".16b, {v8.16b-v11.16b}, %[t3].16b \n\t"
|
||||
|
||||
// Generate a block of inline assembly that interleaves four registers and
|
||||
// stores them, post-incrementing the destination pointer.
|
||||
#define STOR(A, B, C, D) \
|
||||
"st4 {"A".16b, "B".16b, "C".16b, "D".16b}, [%[dst]], #64 \n\t"
|
||||
|
||||
// Generate a block of inline assembly that generates a single self-contained
|
||||
// encoder round: fetch the data, process it, and store the result.
|
||||
#define ROUND() \
|
||||
LOAD("v12", "v13", "v14") \
|
||||
SHUF("v12", "v13", "v14") \
|
||||
TRAN("v12", "v13", "v14", "v15") \
|
||||
STOR("v12", "v13", "v14", "v15")
|
||||
|
||||
// Generate a block of assembly that generates a type A interleaved encoder
|
||||
// round. It uses registers that were loaded by the previous type B round, and
|
||||
// in turn loads registers for the next type B round.
|
||||
#define ROUND_A() \
|
||||
SHUF("v2", "v3", "v4") \
|
||||
LOAD("v12", "v13", "v14") \
|
||||
TRAN("v2", "v3", "v4", "v5") \
|
||||
STOR("v2", "v3", "v4", "v5")
|
||||
|
||||
// Type B interleaved encoder round. Same as type A, but register sets swapped.
|
||||
#define ROUND_B() \
|
||||
SHUF("v12", "v13", "v14") \
|
||||
LOAD("v2", "v3", "v4") \
|
||||
TRAN("v12", "v13", "v14", "v15") \
|
||||
STOR("v12", "v13", "v14", "v15")
|
||||
|
||||
// The first type A round needs to load its own registers.
|
||||
#define ROUND_A_FIRST() \
|
||||
LOAD("v2", "v3", "v4") \
|
||||
ROUND_A()
|
||||
|
||||
// The last type B round omits the load for the next step.
|
||||
#define ROUND_B_LAST() \
|
||||
SHUF("v12", "v13", "v14") \
|
||||
TRAN("v12", "v13", "v14", "v15") \
|
||||
STOR("v12", "v13", "v14", "v15")
|
||||
|
||||
// Suppress clang's warning that the literal string in the asm statement is
|
||||
// overlong (longer than the ISO-mandated minimum size of 4095 bytes for C99
|
||||
// compilers). It may be true, but the goal here is not C99 portability.
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Woverlength-strings"
|
||||
|
||||
static inline void
|
||||
enc_loop_neon64 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
|
||||
{
|
||||
size_t rounds = *slen / 48;
|
||||
|
||||
if (rounds == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
*slen -= rounds * 48; // 48 bytes consumed per round.
|
||||
*olen += rounds * 64; // 64 bytes produced per round.
|
||||
|
||||
// Number of times to go through the 8x loop.
|
||||
size_t loops = rounds / 8;
|
||||
|
||||
// Number of rounds remaining after the 8x loop.
|
||||
rounds %= 8;
|
||||
|
||||
// Temporary registers, used as scratch space.
|
||||
uint8x16_t tmp0, tmp1, tmp2, tmp3;
|
||||
|
||||
__asm__ volatile (
|
||||
|
||||
// Load the encoding table into v8..v11.
|
||||
" ld1 {v8.16b-v11.16b}, [%[tbl]] \n\t"
|
||||
|
||||
// If there are eight rounds or more, enter an 8x unrolled loop
|
||||
// of interleaved encoding rounds. The rounds interleave memory
|
||||
// operations (load/store) with data operations to maximize
|
||||
// pipeline throughput.
|
||||
" cbz %[loops], 4f \n\t"
|
||||
|
||||
// The SIMD instructions do not touch the flags.
|
||||
"88: subs %[loops], %[loops], #1 \n\t"
|
||||
" " ROUND_A_FIRST()
|
||||
" " ROUND_B()
|
||||
" " ROUND_A()
|
||||
" " ROUND_B()
|
||||
" " ROUND_A()
|
||||
" " ROUND_B()
|
||||
" " ROUND_A()
|
||||
" " ROUND_B_LAST()
|
||||
" b.ne 88b \n\t"
|
||||
|
||||
// Enter a 4x unrolled loop for rounds of 4 or more.
|
||||
"4: cmp %[rounds], #4 \n\t"
|
||||
" b.lt 30f \n\t"
|
||||
" " ROUND_A_FIRST()
|
||||
" " ROUND_B()
|
||||
" " ROUND_A()
|
||||
" " ROUND_B_LAST()
|
||||
" sub %[rounds], %[rounds], #4 \n\t"
|
||||
|
||||
// Dispatch the remaining rounds 0..3.
|
||||
"30: cbz %[rounds], 0f \n\t"
|
||||
" cmp %[rounds], #2 \n\t"
|
||||
" b.eq 2f \n\t"
|
||||
" b.lt 1f \n\t"
|
||||
|
||||
// Block of non-interlaced encoding rounds, which can each
|
||||
// individually be jumped to. Rounds fall through to the next.
|
||||
"3: " ROUND()
|
||||
"2: " ROUND()
|
||||
"1: " ROUND()
|
||||
"0: \n\t"
|
||||
|
||||
// Outputs (modified).
|
||||
: [loops] "+r" (loops),
|
||||
[src] "+r" (*s),
|
||||
[dst] "+r" (*o),
|
||||
[t0] "=&w" (tmp0),
|
||||
[t1] "=&w" (tmp1),
|
||||
[t2] "=&w" (tmp2),
|
||||
[t3] "=&w" (tmp3)
|
||||
|
||||
// Inputs (not modified).
|
||||
: [rounds] "r" (rounds),
|
||||
[tbl] "r" (base64_table_enc_6bit),
|
||||
[n63] "w" (vdupq_n_u8(63))
|
||||
|
||||
// Clobbers.
|
||||
: "v2", "v3", "v4", "v5",
|
||||
"v8", "v9", "v10", "v11",
|
||||
"v12", "v13", "v14", "v15",
|
||||
"cc", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
#pragma GCC diagnostic pop
|
|
@ -1,31 +0,0 @@
|
|||
static inline uint8x16x4_t
|
||||
enc_reshuffle (const uint8x16x3_t in)
|
||||
{
|
||||
uint8x16x4_t out;
|
||||
|
||||
// Input:
|
||||
// in[0] = a7 a6 a5 a4 a3 a2 a1 a0
|
||||
// in[1] = b7 b6 b5 b4 b3 b2 b1 b0
|
||||
// in[2] = c7 c6 c5 c4 c3 c2 c1 c0
|
||||
|
||||
// Output:
|
||||
// out[0] = 00 00 a7 a6 a5 a4 a3 a2
|
||||
// out[1] = 00 00 a1 a0 b7 b6 b5 b4
|
||||
// out[2] = 00 00 b3 b2 b1 b0 c7 c6
|
||||
// out[3] = 00 00 c5 c4 c3 c2 c1 c0
|
||||
|
||||
// Move the input bits to where they need to be in the outputs. Except
|
||||
// for the first output, the high two bits are not cleared.
|
||||
out.val[0] = vshrq_n_u8(in.val[0], 2);
|
||||
out.val[1] = vshrq_n_u8(in.val[1], 4);
|
||||
out.val[2] = vshrq_n_u8(in.val[2], 6);
|
||||
out.val[1] = vsliq_n_u8(out.val[1], in.val[0], 4);
|
||||
out.val[2] = vsliq_n_u8(out.val[2], in.val[1], 2);
|
||||
|
||||
// Clear the high two bits in the second, third and fourth output.
|
||||
out.val[1] = vandq_u8(out.val[1], vdupq_n_u8(0x3F));
|
||||
out.val[2] = vandq_u8(out.val[2], vdupq_n_u8(0x3F));
|
||||
out.val[3] = vandq_u8(in.val[2], vdupq_n_u8(0x3F));
|
||||
|
||||
return out;
|
||||
}
|
56
deps/base64/base64/lib/arch/sse41/codec.c
vendored
56
deps/base64/base64/lib/arch/sse41/codec.c
vendored
|
@ -1,56 +0,0 @@
|
|||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "../../../include/libbase64.h"
|
||||
#include "../../tables/tables.h"
|
||||
#include "../../codecs.h"
|
||||
#include "config.h"
|
||||
#include "../../env.h"
|
||||
|
||||
#if HAVE_SSE41
|
||||
#include <smmintrin.h>
|
||||
|
||||
// Only enable inline assembly on supported compilers and on 64-bit CPUs.
|
||||
#ifndef BASE64_SSE41_USE_ASM
|
||||
# if (defined(__GNUC__) || defined(__clang__)) && BASE64_WORDSIZE == 64
|
||||
# define BASE64_SSE41_USE_ASM 1
|
||||
# else
|
||||
# define BASE64_SSE41_USE_ASM 0
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#include "../ssse3/dec_reshuffle.c"
|
||||
#include "../ssse3/dec_loop.c"
|
||||
|
||||
#if BASE64_SSE41_USE_ASM
|
||||
# include "../ssse3/enc_loop_asm.c"
|
||||
#else
|
||||
# include "../ssse3/enc_translate.c"
|
||||
# include "../ssse3/enc_reshuffle.c"
|
||||
# include "../ssse3/enc_loop.c"
|
||||
#endif
|
||||
|
||||
#endif // HAVE_SSE41
|
||||
|
||||
BASE64_ENC_FUNCTION(sse41)
|
||||
{
|
||||
#if HAVE_SSE41
|
||||
#include "../generic/enc_head.c"
|
||||
enc_loop_ssse3(&s, &slen, &o, &olen);
|
||||
#include "../generic/enc_tail.c"
|
||||
#else
|
||||
BASE64_ENC_STUB
|
||||
#endif
|
||||
}
|
||||
|
||||
BASE64_DEC_FUNCTION(sse41)
|
||||
{
|
||||
#if HAVE_SSE41
|
||||
#include "../generic/dec_head.c"
|
||||
dec_loop_ssse3(&s, &slen, &o, &olen);
|
||||
#include "../generic/dec_tail.c"
|
||||
#else
|
||||
BASE64_DEC_STUB
|
||||
#endif
|
||||
}
|
56
deps/base64/base64/lib/arch/sse42/codec.c
vendored
56
deps/base64/base64/lib/arch/sse42/codec.c
vendored
|
@ -1,56 +0,0 @@
|
|||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "../../../include/libbase64.h"
|
||||
#include "../../tables/tables.h"
|
||||
#include "../../codecs.h"
|
||||
#include "config.h"
|
||||
#include "../../env.h"
|
||||
|
||||
#if HAVE_SSE42
|
||||
#include <nmmintrin.h>
|
||||
|
||||
// Only enable inline assembly on supported compilers and on 64-bit CPUs.
|
||||
#ifndef BASE64_SSE42_USE_ASM
|
||||
# if (defined(__GNUC__) || defined(__clang__)) && BASE64_WORDSIZE == 64
|
||||
# define BASE64_SSE42_USE_ASM 1
|
||||
# else
|
||||
# define BASE64_SSE42_USE_ASM 0
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#include "../ssse3/dec_reshuffle.c"
|
||||
#include "../ssse3/dec_loop.c"
|
||||
|
||||
#if BASE64_SSE42_USE_ASM
|
||||
# include "../ssse3/enc_loop_asm.c"
|
||||
#else
|
||||
# include "../ssse3/enc_translate.c"
|
||||
# include "../ssse3/enc_reshuffle.c"
|
||||
# include "../ssse3/enc_loop.c"
|
||||
#endif
|
||||
|
||||
#endif // HAVE_SSE42
|
||||
|
||||
BASE64_ENC_FUNCTION(sse42)
|
||||
{
|
||||
#if HAVE_SSE42
|
||||
#include "../generic/enc_head.c"
|
||||
enc_loop_ssse3(&s, &slen, &o, &olen);
|
||||
#include "../generic/enc_tail.c"
|
||||
#else
|
||||
BASE64_ENC_STUB
|
||||
#endif
|
||||
}
|
||||
|
||||
BASE64_DEC_FUNCTION(sse42)
|
||||
{
|
||||
#if HAVE_SSE42
|
||||
#include "../generic/dec_head.c"
|
||||
dec_loop_ssse3(&s, &slen, &o, &olen);
|
||||
#include "../generic/dec_tail.c"
|
||||
#else
|
||||
BASE64_DEC_STUB
|
||||
#endif
|
||||
}
|
58
deps/base64/base64/lib/arch/ssse3/codec.c
vendored
58
deps/base64/base64/lib/arch/ssse3/codec.c
vendored
|
@ -1,58 +0,0 @@
|
|||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "../../../include/libbase64.h"
|
||||
#include "../../tables/tables.h"
|
||||
#include "../../codecs.h"
|
||||
#include "config.h"
|
||||
#include "../../env.h"
|
||||
|
||||
#if HAVE_SSSE3
|
||||
#include <tmmintrin.h>
|
||||
|
||||
// Only enable inline assembly on supported compilers and on 64-bit CPUs.
|
||||
// 32-bit CPUs with SSSE3 support, such as low-end Atoms, only have eight XMM
|
||||
// registers, which is not enough to run the inline assembly.
|
||||
#ifndef BASE64_SSSE3_USE_ASM
|
||||
# if (defined(__GNUC__) || defined(__clang__)) && BASE64_WORDSIZE == 64
|
||||
# define BASE64_SSSE3_USE_ASM 1
|
||||
# else
|
||||
# define BASE64_SSSE3_USE_ASM 0
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#include "dec_reshuffle.c"
|
||||
#include "dec_loop.c"
|
||||
|
||||
#if BASE64_SSSE3_USE_ASM
|
||||
# include "enc_loop_asm.c"
|
||||
#else
|
||||
# include "enc_reshuffle.c"
|
||||
# include "enc_translate.c"
|
||||
# include "enc_loop.c"
|
||||
#endif
|
||||
|
||||
#endif // HAVE_SSSE3
|
||||
|
||||
BASE64_ENC_FUNCTION(ssse3)
|
||||
{
|
||||
#if HAVE_SSSE3
|
||||
#include "../generic/enc_head.c"
|
||||
enc_loop_ssse3(&s, &slen, &o, &olen);
|
||||
#include "../generic/enc_tail.c"
|
||||
#else
|
||||
BASE64_ENC_STUB
|
||||
#endif
|
||||
}
|
||||
|
||||
BASE64_DEC_FUNCTION(ssse3)
|
||||
{
|
||||
#if HAVE_SSSE3
|
||||
#include "../generic/dec_head.c"
|
||||
dec_loop_ssse3(&s, &slen, &o, &olen);
|
||||
#include "../generic/dec_tail.c"
|
||||
#else
|
||||
BASE64_DEC_STUB
|
||||
#endif
|
||||
}
|
173
deps/base64/base64/lib/arch/ssse3/dec_loop.c
vendored
173
deps/base64/base64/lib/arch/ssse3/dec_loop.c
vendored
|
@ -1,173 +0,0 @@
|
|||
// The input consists of six character sets in the Base64 alphabet, which we
|
||||
// need to map back to the 6-bit values they represent. There are three ranges,
|
||||
// two singles, and then there's the rest.
|
||||
//
|
||||
// # From To Add Characters
|
||||
// 1 [43] [62] +19 +
|
||||
// 2 [47] [63] +16 /
|
||||
// 3 [48..57] [52..61] +4 0..9
|
||||
// 4 [65..90] [0..25] -65 A..Z
|
||||
// 5 [97..122] [26..51] -71 a..z
|
||||
// (6) Everything else => invalid input
|
||||
//
|
||||
// We will use lookup tables for character validation and offset computation.
|
||||
// Remember that 0x2X and 0x0X are the same index for _mm_shuffle_epi8, this
|
||||
// allows to mask with 0x2F instead of 0x0F and thus save one constant
|
||||
// declaration (register and/or memory access).
|
||||
//
|
||||
// For offsets:
|
||||
// Perfect hash for lut = ((src >> 4) & 0x2F) + ((src == 0x2F) ? 0xFF : 0x00)
|
||||
// 0000 = garbage
|
||||
// 0001 = /
|
||||
// 0010 = +
|
||||
// 0011 = 0-9
|
||||
// 0100 = A-Z
|
||||
// 0101 = A-Z
|
||||
// 0110 = a-z
|
||||
// 0111 = a-z
|
||||
// 1000 >= garbage
|
||||
//
|
||||
// For validation, here's the table.
|
||||
// A character is valid if and only if the AND of the 2 lookups equals 0:
|
||||
//
|
||||
// hi \ lo 0000 0001 0010 0011 0100 0101 0110 0111 1000 1001 1010 1011 1100 1101 1110 1111
|
||||
// LUT 0x15 0x11 0x11 0x11 0x11 0x11 0x11 0x11 0x11 0x11 0x13 0x1A 0x1B 0x1B 0x1B 0x1A
|
||||
//
|
||||
// 0000 0x10 char NUL SOH STX ETX EOT ENQ ACK BEL BS HT LF VT FF CR SO SI
|
||||
// andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
|
||||
//
|
||||
// 0001 0x10 char DLE DC1 DC2 DC3 DC4 NAK SYN ETB CAN EM SUB ESC FS GS RS US
|
||||
// andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
|
||||
//
|
||||
// 0010 0x01 char ! " # $ % & ' ( ) * + , - . /
|
||||
// andlut 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x00 0x01 0x01 0x01 0x00
|
||||
//
|
||||
// 0011 0x02 char 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
|
||||
// andlut 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x02 0x02 0x02 0x02 0x02 0x02
|
||||
//
|
||||
// 0100 0x04 char @ A B C D E F G H I J K L M N O
|
||||
// andlut 0x04 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
|
||||
//
|
||||
// 0101 0x08 char P Q R S T U V W X Y Z [ \ ] ^ _
|
||||
// andlut 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x08 0x08 0x08 0x08 0x08
|
||||
//
|
||||
// 0110 0x04 char ` a b c d e f g h i j k l m n o
|
||||
// andlut 0x04 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
|
||||
// 0111 0x08 char p q r s t u v w x y z { | } ~
|
||||
// andlut 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x08 0x08 0x08 0x08 0x08
|
||||
//
|
||||
// 1000 0x10 andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
|
||||
// 1001 0x10 andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
|
||||
// 1010 0x10 andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
|
||||
// 1011 0x10 andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
|
||||
// 1100 0x10 andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
|
||||
// 1101 0x10 andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
|
||||
// 1110 0x10 andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
|
||||
// 1111 0x10 andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
|
||||
|
||||
static inline int
|
||||
dec_loop_ssse3_inner (const uint8_t **s, uint8_t **o, size_t *rounds)
|
||||
{
|
||||
const __m128i lut_lo = _mm_setr_epi8(
|
||||
0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,
|
||||
0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A);
|
||||
|
||||
const __m128i lut_hi = _mm_setr_epi8(
|
||||
0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08,
|
||||
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10);
|
||||
|
||||
const __m128i lut_roll = _mm_setr_epi8(
|
||||
0, 16, 19, 4, -65, -65, -71, -71,
|
||||
0, 0, 0, 0, 0, 0, 0, 0);
|
||||
|
||||
const __m128i mask_2F = _mm_set1_epi8(0x2F);
|
||||
|
||||
// Load input:
|
||||
__m128i str = _mm_loadu_si128((__m128i *) *s);
|
||||
|
||||
// Table lookups:
|
||||
const __m128i hi_nibbles = _mm_and_si128(_mm_srli_epi32(str, 4), mask_2F);
|
||||
const __m128i lo_nibbles = _mm_and_si128(str, mask_2F);
|
||||
const __m128i hi = _mm_shuffle_epi8(lut_hi, hi_nibbles);
|
||||
const __m128i lo = _mm_shuffle_epi8(lut_lo, lo_nibbles);
|
||||
|
||||
// Check for invalid input: if any "and" values from lo and hi are not
|
||||
// zero, fall back on bytewise code to do error checking and reporting:
|
||||
if (_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_and_si128(lo, hi), _mm_setzero_si128())) != 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const __m128i eq_2F = _mm_cmpeq_epi8(str, mask_2F);
|
||||
const __m128i roll = _mm_shuffle_epi8(lut_roll, _mm_add_epi8(eq_2F, hi_nibbles));
|
||||
|
||||
// Now simply add the delta values to the input:
|
||||
str = _mm_add_epi8(str, roll);
|
||||
|
||||
// Reshuffle the input to packed 12-byte output format:
|
||||
str = dec_reshuffle(str);
|
||||
|
||||
// Store the output:
|
||||
_mm_storeu_si128((__m128i *) *o, str);
|
||||
|
||||
*s += 16;
|
||||
*o += 12;
|
||||
*rounds -= 1;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline void
|
||||
dec_loop_ssse3 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
|
||||
{
|
||||
if (*slen < 24) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Process blocks of 16 bytes per round. Because 4 extra zero bytes are
|
||||
// written after the output, ensure that there will be at least 8 bytes
|
||||
// of input data left to cover the gap. (6 data bytes and up to two
|
||||
// end-of-string markers.)
|
||||
size_t rounds = (*slen - 8) / 16;
|
||||
|
||||
*slen -= rounds * 16; // 16 bytes consumed per round
|
||||
*olen += rounds * 12; // 12 bytes produced per round
|
||||
|
||||
do {
|
||||
if (rounds >= 8) {
|
||||
if (dec_loop_ssse3_inner(s, o, &rounds) &&
|
||||
dec_loop_ssse3_inner(s, o, &rounds) &&
|
||||
dec_loop_ssse3_inner(s, o, &rounds) &&
|
||||
dec_loop_ssse3_inner(s, o, &rounds) &&
|
||||
dec_loop_ssse3_inner(s, o, &rounds) &&
|
||||
dec_loop_ssse3_inner(s, o, &rounds) &&
|
||||
dec_loop_ssse3_inner(s, o, &rounds) &&
|
||||
dec_loop_ssse3_inner(s, o, &rounds)) {
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (rounds >= 4) {
|
||||
if (dec_loop_ssse3_inner(s, o, &rounds) &&
|
||||
dec_loop_ssse3_inner(s, o, &rounds) &&
|
||||
dec_loop_ssse3_inner(s, o, &rounds) &&
|
||||
dec_loop_ssse3_inner(s, o, &rounds)) {
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (rounds >= 2) {
|
||||
if (dec_loop_ssse3_inner(s, o, &rounds) &&
|
||||
dec_loop_ssse3_inner(s, o, &rounds)) {
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
dec_loop_ssse3_inner(s, o, &rounds);
|
||||
break;
|
||||
|
||||
} while (rounds > 0);
|
||||
|
||||
// Adjust for any rounds that were skipped:
|
||||
*slen += rounds * 16;
|
||||
*olen -= rounds * 12;
|
||||
}
|
|
@ -1,33 +0,0 @@
|
|||
static inline __m128i
|
||||
dec_reshuffle (const __m128i in)
|
||||
{
|
||||
// in, bits, upper case are most significant bits, lower case are least significant bits
|
||||
// 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ
|
||||
// 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG
|
||||
// 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD
|
||||
// 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA
|
||||
|
||||
const __m128i merge_ab_and_bc = _mm_maddubs_epi16(in, _mm_set1_epi32(0x01400140));
|
||||
// 0000kkkk LLllllll 0000JJJJ JJjjKKKK
|
||||
// 0000hhhh IIiiiiii 0000GGGG GGggHHHH
|
||||
// 0000eeee FFffffff 0000DDDD DDddEEEE
|
||||
// 0000bbbb CCcccccc 0000AAAA AAaaBBBB
|
||||
|
||||
const __m128i out = _mm_madd_epi16(merge_ab_and_bc, _mm_set1_epi32(0x00011000));
|
||||
// 00000000 JJJJJJjj KKKKkkkk LLllllll
|
||||
// 00000000 GGGGGGgg HHHHhhhh IIiiiiii
|
||||
// 00000000 DDDDDDdd EEEEeeee FFffffff
|
||||
// 00000000 AAAAAAaa BBBBbbbb CCcccccc
|
||||
|
||||
// Pack bytes together:
|
||||
return _mm_shuffle_epi8(out, _mm_setr_epi8(
|
||||
2, 1, 0,
|
||||
6, 5, 4,
|
||||
10, 9, 8,
|
||||
14, 13, 12,
|
||||
-1, -1, -1, -1));
|
||||
// 00000000 00000000 00000000 00000000
|
||||
// LLllllll KKKKkkkk JJJJJJjj IIiiiiii
|
||||
// HHHHhhhh GGGGGGgg FFffffff EEEEeeee
|
||||
// DDDDDDdd CCcccccc BBBBbbbb AAAAAAaa
|
||||
}
|
67
deps/base64/base64/lib/arch/ssse3/enc_loop.c
vendored
67
deps/base64/base64/lib/arch/ssse3/enc_loop.c
vendored
|
@ -1,67 +0,0 @@
|
|||
static inline void
|
||||
enc_loop_ssse3_inner (const uint8_t **s, uint8_t **o)
|
||||
{
|
||||
// Load input:
|
||||
__m128i str = _mm_loadu_si128((__m128i *) *s);
|
||||
|
||||
// Reshuffle:
|
||||
str = enc_reshuffle(str);
|
||||
|
||||
// Translate reshuffled bytes to the Base64 alphabet:
|
||||
str = enc_translate(str);
|
||||
|
||||
// Store:
|
||||
_mm_storeu_si128((__m128i *) *o, str);
|
||||
|
||||
*s += 12;
|
||||
*o += 16;
|
||||
}
|
||||
|
||||
static inline void
|
||||
enc_loop_ssse3 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
|
||||
{
|
||||
if (*slen < 16) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Process blocks of 12 bytes at a time. Because blocks are loaded 16
|
||||
// bytes at a time, ensure that there will be at least 4 remaining
|
||||
// bytes after the last round, so that the final read will not pass
|
||||
// beyond the bounds of the input buffer:
|
||||
size_t rounds = (*slen - 4) / 12;
|
||||
|
||||
*slen -= rounds * 12; // 12 bytes consumed per round
|
||||
*olen += rounds * 16; // 16 bytes produced per round
|
||||
|
||||
do {
|
||||
if (rounds >= 8) {
|
||||
enc_loop_ssse3_inner(s, o);
|
||||
enc_loop_ssse3_inner(s, o);
|
||||
enc_loop_ssse3_inner(s, o);
|
||||
enc_loop_ssse3_inner(s, o);
|
||||
enc_loop_ssse3_inner(s, o);
|
||||
enc_loop_ssse3_inner(s, o);
|
||||
enc_loop_ssse3_inner(s, o);
|
||||
enc_loop_ssse3_inner(s, o);
|
||||
rounds -= 8;
|
||||
continue;
|
||||
}
|
||||
if (rounds >= 4) {
|
||||
enc_loop_ssse3_inner(s, o);
|
||||
enc_loop_ssse3_inner(s, o);
|
||||
enc_loop_ssse3_inner(s, o);
|
||||
enc_loop_ssse3_inner(s, o);
|
||||
rounds -= 4;
|
||||
continue;
|
||||
}
|
||||
if (rounds >= 2) {
|
||||
enc_loop_ssse3_inner(s, o);
|
||||
enc_loop_ssse3_inner(s, o);
|
||||
rounds -= 2;
|
||||
continue;
|
||||
}
|
||||
enc_loop_ssse3_inner(s, o);
|
||||
break;
|
||||
|
||||
} while (rounds > 0);
|
||||
}
|
268
deps/base64/base64/lib/arch/ssse3/enc_loop_asm.c
vendored
268
deps/base64/base64/lib/arch/ssse3/enc_loop_asm.c
vendored
|
@ -1,268 +0,0 @@
|
|||
// Apologies in advance for combining the preprocessor with inline assembly,
|
||||
// two notoriously gnarly parts of C, but it was necessary to avoid a lot of
|
||||
// code repetition. The preprocessor is used to template large sections of
|
||||
// inline assembly that differ only in the registers used. If the code was
|
||||
// written out by hand, it would become very large and hard to audit.
|
||||
|
||||
// Generate a block of inline assembly that loads register R0 from memory. The
|
||||
// offset at which the register is loaded is set by the given round.
|
||||
#define LOAD(R0, ROUND) \
|
||||
"lddqu ("#ROUND" * 12)(%[src]), %["R0"] \n\t"
|
||||
|
||||
// Generate a block of inline assembly that deinterleaves and shuffles register
|
||||
// R0 using preloaded constants. Outputs in R0 and R1.
|
||||
#define SHUF(R0, R1) \
|
||||
"pshufb %[lut0], %["R0"] \n\t" \
|
||||
"movdqa %["R0"], %["R1"] \n\t" \
|
||||
"pand %[msk0], %["R0"] \n\t" \
|
||||
"pand %[msk2], %["R1"] \n\t" \
|
||||
"pmulhuw %[msk1], %["R0"] \n\t" \
|
||||
"pmullw %[msk3], %["R1"] \n\t" \
|
||||
"por %["R1"], %["R0"] \n\t"
|
||||
|
||||
// Generate a block of inline assembly that takes R0 and R1 and translates
|
||||
// their contents to the base64 alphabet, using preloaded constants.
|
||||
#define TRAN(R0, R1, R2) \
|
||||
"movdqa %["R0"], %["R1"] \n\t" \
|
||||
"movdqa %["R0"], %["R2"] \n\t" \
|
||||
"psubusb %[n51], %["R1"] \n\t" \
|
||||
"pcmpgtb %[n25], %["R2"] \n\t" \
|
||||
"psubb %["R2"], %["R1"] \n\t" \
|
||||
"movdqa %[lut1], %["R2"] \n\t" \
|
||||
"pshufb %["R1"], %["R2"] \n\t" \
|
||||
"paddb %["R2"], %["R0"] \n\t"
|
||||
|
||||
// Generate a block of inline assembly that stores the given register R0 at an
|
||||
// offset set by the given round.
|
||||
#define STOR(R0, ROUND) \
|
||||
"movdqu %["R0"], ("#ROUND" * 16)(%[dst]) \n\t"
|
||||
|
||||
// Generate a block of inline assembly that generates a single self-contained
|
||||
// encoder round: fetch the data, process it, and store the result. Then update
|
||||
// the source and destination pointers.
|
||||
#define ROUND() \
|
||||
LOAD("a", 0) \
|
||||
SHUF("a", "b") \
|
||||
TRAN("a", "b", "c") \
|
||||
STOR("a", 0) \
|
||||
"add $12, %[src] \n\t" \
|
||||
"add $16, %[dst] \n\t"
|
||||
|
||||
// Define a macro that initiates a three-way interleaved encoding round by
|
||||
// preloading registers a, b and c from memory.
|
||||
// The register graph shows which registers are in use during each step, and
|
||||
// is a visual aid for choosing registers for that step. Symbol index:
|
||||
//
|
||||
// + indicates that a register is loaded by that step.
|
||||
// | indicates that a register is in use and must not be touched.
|
||||
// - indicates that a register is decommissioned by that step.
|
||||
// x indicates that a register is used as a temporary by that step.
|
||||
// V indicates that a register is an input or output to the macro.
|
||||
//
|
||||
#define ROUND_3_INIT() /* a b c d e f */ \
|
||||
LOAD("a", 0) /* + */ \
|
||||
SHUF("a", "d") /* | + */ \
|
||||
LOAD("b", 1) /* | + | */ \
|
||||
TRAN("a", "d", "e") /* | | - x */ \
|
||||
LOAD("c", 2) /* V V V */
|
||||
|
||||
// Define a macro that translates, shuffles and stores the input registers A, B
|
||||
// and C, and preloads registers D, E and F for the next round.
|
||||
// This macro can be arbitrarily daisy-chained by feeding output registers D, E
|
||||
// and F back into the next round as input registers A, B and C. The macro
|
||||
// carefully interleaves memory operations with data operations for optimal
|
||||
// pipelined performance.
|
||||
|
||||
#define ROUND_3(ROUND, A,B,C,D,E,F) /* A B C D E F */ \
|
||||
LOAD(D, (ROUND + 3)) /* V V V + */ \
|
||||
SHUF(B, E) /* | | | | + */ \
|
||||
STOR(A, (ROUND + 0)) /* - | | | | */ \
|
||||
TRAN(B, E, F) /* | | | - x */ \
|
||||
LOAD(E, (ROUND + 4)) /* | | | + */ \
|
||||
SHUF(C, A) /* + | | | | */ \
|
||||
STOR(B, (ROUND + 1)) /* | - | | | */ \
|
||||
TRAN(C, A, F) /* - | | | x */ \
|
||||
LOAD(F, (ROUND + 5)) /* | | | + */ \
|
||||
SHUF(D, A) /* + | | | | */ \
|
||||
STOR(C, (ROUND + 2)) /* | - | | | */ \
|
||||
TRAN(D, A, B) /* - x V V V */
|
||||
|
||||
// Define a macro that terminates a ROUND_3 macro by taking pre-loaded
|
||||
// registers D, E and F, and translating, shuffling and storing them.
|
||||
#define ROUND_3_END(ROUND, A,B,C,D,E,F) /* A B C D E F */ \
|
||||
SHUF(E, A) /* + V V V */ \
|
||||
STOR(D, (ROUND + 3)) /* | - | | */ \
|
||||
TRAN(E, A, B) /* - x | | */ \
|
||||
SHUF(F, C) /* + | | */ \
|
||||
STOR(E, (ROUND + 4)) /* | - | */ \
|
||||
TRAN(F, C, D) /* - x | */ \
|
||||
STOR(F, (ROUND + 5)) /* - */
|
||||
|
||||
// Define a type A round. Inputs are a, b, and c, outputs are d, e, and f.
|
||||
#define ROUND_3_A(ROUND) \
|
||||
ROUND_3(ROUND, "a", "b", "c", "d", "e", "f")
|
||||
|
||||
// Define a type B round. Inputs and outputs are swapped with regard to type A.
|
||||
#define ROUND_3_B(ROUND) \
|
||||
ROUND_3(ROUND, "d", "e", "f", "a", "b", "c")
|
||||
|
||||
// Terminating macro for a type A round.
|
||||
#define ROUND_3_A_LAST(ROUND) \
|
||||
ROUND_3_A(ROUND) \
|
||||
ROUND_3_END(ROUND, "a", "b", "c", "d", "e", "f")
|
||||
|
||||
// Terminating macro for a type B round.
|
||||
#define ROUND_3_B_LAST(ROUND) \
|
||||
ROUND_3_B(ROUND) \
|
||||
ROUND_3_END(ROUND, "d", "e", "f", "a", "b", "c")
|
||||
|
||||
// Suppress clang's warning that the literal string in the asm statement is
|
||||
// overlong (longer than the ISO-mandated minimum size of 4095 bytes for C99
|
||||
// compilers). It may be true, but the goal here is not C99 portability.
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Woverlength-strings"
|
||||
|
||||
static inline void
|
||||
enc_loop_ssse3 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
|
||||
{
|
||||
// For a clearer explanation of the algorithm used by this function,
|
||||
// please refer to the plain (not inline assembly) implementation. This
|
||||
// function follows the same basic logic.
|
||||
|
||||
if (*slen < 16) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Process blocks of 12 bytes at a time. Input is read in blocks of 16
|
||||
// bytes, so "reserve" four bytes from the input buffer to ensure that
|
||||
// we never read beyond the end of the input buffer.
|
||||
size_t rounds = (*slen - 4) / 12;
|
||||
|
||||
*slen -= rounds * 12; // 12 bytes consumed per round
|
||||
*olen += rounds * 16; // 16 bytes produced per round
|
||||
|
||||
// Number of times to go through the 36x loop.
|
||||
size_t loops = rounds / 36;
|
||||
|
||||
// Number of rounds remaining after the 36x loop.
|
||||
rounds %= 36;
|
||||
|
||||
// Lookup tables.
|
||||
const __m128i lut0 = _mm_set_epi8(
|
||||
10, 11, 9, 10, 7, 8, 6, 7, 4, 5, 3, 4, 1, 2, 0, 1);
|
||||
|
||||
const __m128i lut1 = _mm_setr_epi8(
|
||||
65, 71, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -19, -16, 0, 0);
|
||||
|
||||
// Temporary registers.
|
||||
__m128i a, b, c, d, e, f;
|
||||
|
||||
__asm__ volatile (
|
||||
|
||||
// If there are 36 rounds or more, enter a 36x unrolled loop of
|
||||
// interleaved encoding rounds. The rounds interleave memory
|
||||
// operations (load/store) with data operations (table lookups,
|
||||
// etc) to maximize pipeline throughput.
|
||||
" test %[loops], %[loops] \n\t"
|
||||
" jz 18f \n\t"
|
||||
" jmp 36f \n\t"
|
||||
" \n\t"
|
||||
".balign 64 \n\t"
|
||||
"36: " ROUND_3_INIT()
|
||||
" " ROUND_3_A( 0)
|
||||
" " ROUND_3_B( 3)
|
||||
" " ROUND_3_A( 6)
|
||||
" " ROUND_3_B( 9)
|
||||
" " ROUND_3_A(12)
|
||||
" " ROUND_3_B(15)
|
||||
" " ROUND_3_A(18)
|
||||
" " ROUND_3_B(21)
|
||||
" " ROUND_3_A(24)
|
||||
" " ROUND_3_B(27)
|
||||
" " ROUND_3_A_LAST(30)
|
||||
" add $(12 * 36), %[src] \n\t"
|
||||
" add $(16 * 36), %[dst] \n\t"
|
||||
" dec %[loops] \n\t"
|
||||
" jnz 36b \n\t"
|
||||
|
||||
// Enter an 18x unrolled loop for rounds of 18 or more.
|
||||
"18: cmp $18, %[rounds] \n\t"
|
||||
" jl 9f \n\t"
|
||||
" " ROUND_3_INIT()
|
||||
" " ROUND_3_A(0)
|
||||
" " ROUND_3_B(3)
|
||||
" " ROUND_3_A(6)
|
||||
" " ROUND_3_B(9)
|
||||
" " ROUND_3_A_LAST(12)
|
||||
" sub $18, %[rounds] \n\t"
|
||||
" add $(12 * 18), %[src] \n\t"
|
||||
" add $(16 * 18), %[dst] \n\t"
|
||||
|
||||
// Enter a 9x unrolled loop for rounds of 9 or more.
|
||||
"9: cmp $9, %[rounds] \n\t"
|
||||
" jl 6f \n\t"
|
||||
" " ROUND_3_INIT()
|
||||
" " ROUND_3_A(0)
|
||||
" " ROUND_3_B_LAST(3)
|
||||
" sub $9, %[rounds] \n\t"
|
||||
" add $(12 * 9), %[src] \n\t"
|
||||
" add $(16 * 9), %[dst] \n\t"
|
||||
|
||||
// Enter a 6x unrolled loop for rounds of 6 or more.
|
||||
"6: cmp $6, %[rounds] \n\t"
|
||||
" jl 55f \n\t"
|
||||
" " ROUND_3_INIT()
|
||||
" " ROUND_3_A_LAST(0)
|
||||
" sub $6, %[rounds] \n\t"
|
||||
" add $(12 * 6), %[src] \n\t"
|
||||
" add $(16 * 6), %[dst] \n\t"
|
||||
|
||||
// Dispatch the remaining rounds 0..5.
|
||||
"55: cmp $3, %[rounds] \n\t"
|
||||
" jg 45f \n\t"
|
||||
" je 3f \n\t"
|
||||
" cmp $1, %[rounds] \n\t"
|
||||
" jg 2f \n\t"
|
||||
" je 1f \n\t"
|
||||
" jmp 0f \n\t"
|
||||
|
||||
"45: cmp $4, %[rounds] \n\t"
|
||||
" je 4f \n\t"
|
||||
|
||||
// Block of non-interlaced encoding rounds, which can each
|
||||
// individually be jumped to. Rounds fall through to the next.
|
||||
"5: " ROUND()
|
||||
"4: " ROUND()
|
||||
"3: " ROUND()
|
||||
"2: " ROUND()
|
||||
"1: " ROUND()
|
||||
"0: \n\t"
|
||||
|
||||
// Outputs (modified).
|
||||
: [rounds] "+r" (rounds),
|
||||
[loops] "+r" (loops),
|
||||
[src] "+r" (*s),
|
||||
[dst] "+r" (*o),
|
||||
[a] "=&x" (a),
|
||||
[b] "=&x" (b),
|
||||
[c] "=&x" (c),
|
||||
[d] "=&x" (d),
|
||||
[e] "=&x" (e),
|
||||
[f] "=&x" (f)
|
||||
|
||||
// Inputs (not modified).
|
||||
: [lut0] "x" (lut0),
|
||||
[lut1] "x" (lut1),
|
||||
[msk0] "x" (_mm_set1_epi32(0x0FC0FC00)),
|
||||
[msk1] "x" (_mm_set1_epi32(0x04000040)),
|
||||
[msk2] "x" (_mm_set1_epi32(0x003F03F0)),
|
||||
[msk3] "x" (_mm_set1_epi32(0x01000010)),
|
||||
[n51] "x" (_mm_set1_epi8(51)),
|
||||
[n25] "x" (_mm_set1_epi8(25))
|
||||
|
||||
// Clobbers.
|
||||
: "cc", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
#pragma GCC diagnostic pop
|
|
@ -1,48 +0,0 @@
|
|||
static inline __m128i
|
||||
enc_reshuffle (__m128i in)
|
||||
{
|
||||
// Input, bytes MSB to LSB:
|
||||
// 0 0 0 0 l k j i h g f e d c b a
|
||||
|
||||
in = _mm_shuffle_epi8(in, _mm_set_epi8(
|
||||
10, 11, 9, 10,
|
||||
7, 8, 6, 7,
|
||||
4, 5, 3, 4,
|
||||
1, 2, 0, 1));
|
||||
// in, bytes MSB to LSB:
|
||||
// k l j k
|
||||
// h i g h
|
||||
// e f d e
|
||||
// b c a b
|
||||
|
||||
const __m128i t0 = _mm_and_si128(in, _mm_set1_epi32(0x0FC0FC00));
|
||||
// bits, upper case are most significant bits, lower case are least significant bits
|
||||
// 0000kkkk LL000000 JJJJJJ00 00000000
|
||||
// 0000hhhh II000000 GGGGGG00 00000000
|
||||
// 0000eeee FF000000 DDDDDD00 00000000
|
||||
// 0000bbbb CC000000 AAAAAA00 00000000
|
||||
|
||||
const __m128i t1 = _mm_mulhi_epu16(t0, _mm_set1_epi32(0x04000040));
|
||||
// 00000000 00kkkkLL 00000000 00JJJJJJ
|
||||
// 00000000 00hhhhII 00000000 00GGGGGG
|
||||
// 00000000 00eeeeFF 00000000 00DDDDDD
|
||||
// 00000000 00bbbbCC 00000000 00AAAAAA
|
||||
|
||||
const __m128i t2 = _mm_and_si128(in, _mm_set1_epi32(0x003F03F0));
|
||||
// 00000000 00llllll 000000jj KKKK0000
|
||||
// 00000000 00iiiiii 000000gg HHHH0000
|
||||
// 00000000 00ffffff 000000dd EEEE0000
|
||||
// 00000000 00cccccc 000000aa BBBB0000
|
||||
|
||||
const __m128i t3 = _mm_mullo_epi16(t2, _mm_set1_epi32(0x01000010));
|
||||
// 00llllll 00000000 00jjKKKK 00000000
|
||||
// 00iiiiii 00000000 00ggHHHH 00000000
|
||||
// 00ffffff 00000000 00ddEEEE 00000000
|
||||
// 00cccccc 00000000 00aaBBBB 00000000
|
||||
|
||||
return _mm_or_si128(t1, t3);
|
||||
// 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ
|
||||
// 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG
|
||||
// 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD
|
||||
// 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA
|
||||
}
|
|
@ -1,33 +0,0 @@
|
|||
static inline __m128i
|
||||
enc_translate (const __m128i in)
|
||||
{
|
||||
// A lookup table containing the absolute offsets for all ranges:
|
||||
const __m128i lut = _mm_setr_epi8(
|
||||
65, 71, -4, -4,
|
||||
-4, -4, -4, -4,
|
||||
-4, -4, -4, -4,
|
||||
-19, -16, 0, 0
|
||||
);
|
||||
|
||||
// Translate values 0..63 to the Base64 alphabet. There are five sets:
|
||||
// # From To Abs Index Characters
|
||||
// 0 [0..25] [65..90] +65 0 ABCDEFGHIJKLMNOPQRSTUVWXYZ
|
||||
// 1 [26..51] [97..122] +71 1 abcdefghijklmnopqrstuvwxyz
|
||||
// 2 [52..61] [48..57] -4 [2..11] 0123456789
|
||||
// 3 [62] [43] -19 12 +
|
||||
// 4 [63] [47] -16 13 /
|
||||
|
||||
// Create LUT indices from the input. The index for range #0 is right,
|
||||
// others are 1 less than expected:
|
||||
__m128i indices = _mm_subs_epu8(in, _mm_set1_epi8(51));
|
||||
|
||||
// mask is 0xFF (-1) for range #[1..4] and 0x00 for range #0:
|
||||
__m128i mask = _mm_cmpgt_epi8(in, _mm_set1_epi8(25));
|
||||
|
||||
// Subtract -1, so add 1 to indices for range #[1..4]. All indices are
|
||||
// now correct:
|
||||
indices = _mm_sub_epi8(indices, mask);
|
||||
|
||||
// Add offsets to input values:
|
||||
return _mm_add_epi8(in, _mm_shuffle_epi8(lut, indices));
|
||||
}
|
305
deps/base64/base64/lib/codec_choose.c
vendored
305
deps/base64/base64/lib/codec_choose.c
vendored
|
@ -1,305 +0,0 @@
|
|||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "../include/libbase64.h"
|
||||
#include "codecs.h"
|
||||
#include "config.h"
|
||||
#include "env.h"
|
||||
|
||||
#if (__x86_64__ || __i386__ || _M_X86 || _M_X64)
|
||||
#define BASE64_X86
|
||||
#if (HAVE_SSSE3 || HAVE_SSE41 || HAVE_SSE42 || HAVE_AVX || HAVE_AVX2 || HAVE_AVX512)
|
||||
#define BASE64_X86_SIMD
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef BASE64_X86
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h>
|
||||
#define __cpuid_count(__level, __count, __eax, __ebx, __ecx, __edx) \
|
||||
{ \
|
||||
int info[4]; \
|
||||
__cpuidex(info, __level, __count); \
|
||||
__eax = info[0]; \
|
||||
__ebx = info[1]; \
|
||||
__ecx = info[2]; \
|
||||
__edx = info[3]; \
|
||||
}
|
||||
#define __cpuid(__level, __eax, __ebx, __ecx, __edx) \
|
||||
__cpuid_count(__level, 0, __eax, __ebx, __ecx, __edx)
|
||||
#else
|
||||
#include <cpuid.h>
|
||||
#if HAVE_AVX512 || HAVE_AVX2 || HAVE_AVX
|
||||
#if ((__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 2) || (__clang_major__ >= 3))
|
||||
static inline uint64_t _xgetbv (uint32_t index)
|
||||
{
|
||||
uint32_t eax, edx;
|
||||
__asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index));
|
||||
return ((uint64_t)edx << 32) | eax;
|
||||
}
|
||||
#else
|
||||
#error "Platform not supported"
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef bit_AVX512vl
|
||||
#define bit_AVX512vl (1 << 31)
|
||||
#endif
|
||||
#ifndef bit_AVX512vbmi
|
||||
#define bit_AVX512vbmi (1 << 1)
|
||||
#endif
|
||||
#ifndef bit_AVX2
|
||||
#define bit_AVX2 (1 << 5)
|
||||
#endif
|
||||
#ifndef bit_SSSE3
|
||||
#define bit_SSSE3 (1 << 9)
|
||||
#endif
|
||||
#ifndef bit_SSE41
|
||||
#define bit_SSE41 (1 << 19)
|
||||
#endif
|
||||
#ifndef bit_SSE42
|
||||
#define bit_SSE42 (1 << 20)
|
||||
#endif
|
||||
#ifndef bit_AVX
|
||||
#define bit_AVX (1 << 28)
|
||||
#endif
|
||||
|
||||
#define bit_XSAVE_XRSTORE (1 << 27)
|
||||
|
||||
#ifndef _XCR_XFEATURE_ENABLED_MASK
|
||||
#define _XCR_XFEATURE_ENABLED_MASK 0
|
||||
#endif
|
||||
|
||||
#define _XCR_XMM_AND_YMM_STATE_ENABLED_BY_OS 0x6
|
||||
#endif
|
||||
|
||||
// Function declarations:
|
||||
#define BASE64_CODEC_FUNCS(arch) \
|
||||
BASE64_ENC_FUNCTION(arch); \
|
||||
BASE64_DEC_FUNCTION(arch); \
|
||||
|
||||
BASE64_CODEC_FUNCS(avx512)
|
||||
BASE64_CODEC_FUNCS(avx2)
|
||||
BASE64_CODEC_FUNCS(neon32)
|
||||
BASE64_CODEC_FUNCS(neon64)
|
||||
BASE64_CODEC_FUNCS(plain)
|
||||
BASE64_CODEC_FUNCS(ssse3)
|
||||
BASE64_CODEC_FUNCS(sse41)
|
||||
BASE64_CODEC_FUNCS(sse42)
|
||||
BASE64_CODEC_FUNCS(avx)
|
||||
|
||||
static bool
|
||||
codec_choose_forced (struct codec *codec, int flags)
|
||||
{
|
||||
// If the user wants to use a certain codec,
|
||||
// always allow it, even if the codec is a no-op.
|
||||
// For testing purposes.
|
||||
|
||||
if (!(flags & 0xFFFF)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (flags & BASE64_FORCE_AVX2) {
|
||||
codec->enc = base64_stream_encode_avx2;
|
||||
codec->dec = base64_stream_decode_avx2;
|
||||
return true;
|
||||
}
|
||||
if (flags & BASE64_FORCE_NEON32) {
|
||||
codec->enc = base64_stream_encode_neon32;
|
||||
codec->dec = base64_stream_decode_neon32;
|
||||
return true;
|
||||
}
|
||||
if (flags & BASE64_FORCE_NEON64) {
|
||||
codec->enc = base64_stream_encode_neon64;
|
||||
codec->dec = base64_stream_decode_neon64;
|
||||
return true;
|
||||
}
|
||||
if (flags & BASE64_FORCE_PLAIN) {
|
||||
codec->enc = base64_stream_encode_plain;
|
||||
codec->dec = base64_stream_decode_plain;
|
||||
return true;
|
||||
}
|
||||
if (flags & BASE64_FORCE_SSSE3) {
|
||||
codec->enc = base64_stream_encode_ssse3;
|
||||
codec->dec = base64_stream_decode_ssse3;
|
||||
return true;
|
||||
}
|
||||
if (flags & BASE64_FORCE_SSE41) {
|
||||
codec->enc = base64_stream_encode_sse41;
|
||||
codec->dec = base64_stream_decode_sse41;
|
||||
return true;
|
||||
}
|
||||
if (flags & BASE64_FORCE_SSE42) {
|
||||
codec->enc = base64_stream_encode_sse42;
|
||||
codec->dec = base64_stream_decode_sse42;
|
||||
return true;
|
||||
}
|
||||
if (flags & BASE64_FORCE_AVX) {
|
||||
codec->enc = base64_stream_encode_avx;
|
||||
codec->dec = base64_stream_decode_avx;
|
||||
return true;
|
||||
}
|
||||
if (flags & BASE64_FORCE_AVX512) {
|
||||
codec->enc = base64_stream_encode_avx512;
|
||||
codec->dec = base64_stream_decode_avx512;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool
|
||||
codec_choose_arm (struct codec *codec)
|
||||
{
|
||||
#if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && ((defined(__aarch64__) && HAVE_NEON64) || HAVE_NEON32)
|
||||
|
||||
// Unfortunately there is no portable way to check for NEON
|
||||
// support at runtime from userland in the same way that x86
|
||||
// has cpuid, so just stick to the compile-time configuration:
|
||||
|
||||
#if defined(__aarch64__) && HAVE_NEON64
|
||||
codec->enc = base64_stream_encode_neon64;
|
||||
codec->dec = base64_stream_decode_neon64;
|
||||
#else
|
||||
codec->enc = base64_stream_encode_neon32;
|
||||
codec->dec = base64_stream_decode_neon32;
|
||||
#endif
|
||||
|
||||
return true;
|
||||
|
||||
#else
|
||||
(void)codec;
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
static bool
|
||||
codec_choose_x86 (struct codec *codec)
|
||||
{
|
||||
#ifdef BASE64_X86_SIMD
|
||||
|
||||
unsigned int eax, ebx = 0, ecx = 0, edx;
|
||||
unsigned int max_level;
|
||||
|
||||
#ifdef _MSC_VER
|
||||
int info[4];
|
||||
__cpuidex(info, 0, 0);
|
||||
max_level = info[0];
|
||||
#else
|
||||
max_level = __get_cpuid_max(0, NULL);
|
||||
#endif
|
||||
|
||||
#if HAVE_AVX512 || HAVE_AVX2 || HAVE_AVX
|
||||
// Check for AVX/AVX2/AVX512 support:
|
||||
// Checking for AVX requires 3 things:
|
||||
// 1) CPUID indicates that the OS uses XSAVE and XRSTORE instructions
|
||||
// (allowing saving YMM registers on context switch)
|
||||
// 2) CPUID indicates support for AVX
|
||||
// 3) XGETBV indicates the AVX registers will be saved and restored on
|
||||
// context switch
|
||||
//
|
||||
// Note that XGETBV is only available on 686 or later CPUs, so the
|
||||
// instruction needs to be conditionally run.
|
||||
if (max_level >= 1) {
|
||||
__cpuid_count(1, 0, eax, ebx, ecx, edx);
|
||||
if (ecx & bit_XSAVE_XRSTORE) {
|
||||
uint64_t xcr_mask;
|
||||
xcr_mask = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
|
||||
if ((xcr_mask & _XCR_XMM_AND_YMM_STATE_ENABLED_BY_OS) == _XCR_XMM_AND_YMM_STATE_ENABLED_BY_OS) { // check multiple bits at once
|
||||
#if HAVE_AVX512
|
||||
if (max_level >= 7) {
|
||||
__cpuid_count(7, 0, eax, ebx, ecx, edx);
|
||||
if ((ebx & bit_AVX512vl) && (ecx & bit_AVX512vbmi)) {
|
||||
codec->enc = base64_stream_encode_avx512;
|
||||
codec->dec = base64_stream_decode_avx512;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if HAVE_AVX2
|
||||
if (max_level >= 7) {
|
||||
__cpuid_count(7, 0, eax, ebx, ecx, edx);
|
||||
if (ebx & bit_AVX2) {
|
||||
codec->enc = base64_stream_encode_avx2;
|
||||
codec->dec = base64_stream_decode_avx2;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if HAVE_AVX
|
||||
__cpuid_count(1, 0, eax, ebx, ecx, edx);
|
||||
if (ecx & bit_AVX) {
|
||||
codec->enc = base64_stream_encode_avx;
|
||||
codec->dec = base64_stream_decode_avx;
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if HAVE_SSE42
|
||||
// Check for SSE42 support:
|
||||
if (max_level >= 1) {
|
||||
__cpuid(1, eax, ebx, ecx, edx);
|
||||
if (ecx & bit_SSE42) {
|
||||
codec->enc = base64_stream_encode_sse42;
|
||||
codec->dec = base64_stream_decode_sse42;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if HAVE_SSE41
|
||||
// Check for SSE41 support:
|
||||
if (max_level >= 1) {
|
||||
__cpuid(1, eax, ebx, ecx, edx);
|
||||
if (ecx & bit_SSE41) {
|
||||
codec->enc = base64_stream_encode_sse41;
|
||||
codec->dec = base64_stream_decode_sse41;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if HAVE_SSSE3
|
||||
// Check for SSSE3 support:
|
||||
if (max_level >= 1) {
|
||||
__cpuid(1, eax, ebx, ecx, edx);
|
||||
if (ecx & bit_SSSE3) {
|
||||
codec->enc = base64_stream_encode_ssse3;
|
||||
codec->dec = base64_stream_decode_ssse3;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#else
|
||||
(void)codec;
|
||||
#endif
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void
|
||||
codec_choose (struct codec *codec, int flags)
|
||||
{
|
||||
// User forced a codec:
|
||||
if (codec_choose_forced(codec, flags)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Runtime feature detection:
|
||||
if (codec_choose_arm(codec)) {
|
||||
return;
|
||||
}
|
||||
if (codec_choose_x86(codec)) {
|
||||
return;
|
||||
}
|
||||
codec->enc = base64_stream_encode_plain;
|
||||
codec->dec = base64_stream_decode_plain;
|
||||
}
|
65
deps/base64/base64/lib/codecs.h
vendored
65
deps/base64/base64/lib/codecs.h
vendored
|
@ -1,65 +0,0 @@
|
|||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#include "../include/libbase64.h"
|
||||
#include "config.h"
|
||||
|
||||
// Function parameters for encoding functions:
|
||||
#define BASE64_ENC_PARAMS \
|
||||
( struct base64_state *state \
|
||||
, const char *src \
|
||||
, size_t srclen \
|
||||
, char *out \
|
||||
, size_t *outlen \
|
||||
)
|
||||
|
||||
// Function parameters for decoding functions:
|
||||
#define BASE64_DEC_PARAMS \
|
||||
( struct base64_state *state \
|
||||
, const char *src \
|
||||
, size_t srclen \
|
||||
, char *out \
|
||||
, size_t *outlen \
|
||||
)
|
||||
|
||||
// Function signature for encoding functions:
|
||||
#define BASE64_ENC_FUNCTION(arch) \
|
||||
void \
|
||||
base64_stream_encode_ ## arch \
|
||||
BASE64_ENC_PARAMS
|
||||
|
||||
// Function signature for decoding functions:
|
||||
#define BASE64_DEC_FUNCTION(arch) \
|
||||
int \
|
||||
base64_stream_decode_ ## arch \
|
||||
BASE64_DEC_PARAMS
|
||||
|
||||
// Cast away unused variable, silence compiler:
|
||||
#define UNUSED(x) ((void)(x))
|
||||
|
||||
// Stub function when encoder arch unsupported:
|
||||
#define BASE64_ENC_STUB \
|
||||
UNUSED(state); \
|
||||
UNUSED(src); \
|
||||
UNUSED(srclen); \
|
||||
UNUSED(out); \
|
||||
\
|
||||
*outlen = 0;
|
||||
|
||||
// Stub function when decoder arch unsupported:
|
||||
#define BASE64_DEC_STUB \
|
||||
UNUSED(state); \
|
||||
UNUSED(src); \
|
||||
UNUSED(srclen); \
|
||||
UNUSED(out); \
|
||||
UNUSED(outlen); \
|
||||
\
|
||||
return -1;
|
||||
|
||||
struct codec
|
||||
{
|
||||
void (* enc) BASE64_ENC_PARAMS;
|
||||
int (* dec) BASE64_DEC_PARAMS;
|
||||
};
|
||||
|
||||
extern void codec_choose (struct codec *, int flags);
|
1
deps/base64/base64/lib/config.h
vendored
1
deps/base64/base64/lib/config.h
vendored
|
@ -1 +0,0 @@
|
|||
// Intentionally empty
|
74
deps/base64/base64/lib/env.h
vendored
74
deps/base64/base64/lib/env.h
vendored
|
@ -1,74 +0,0 @@
|
|||
#ifndef BASE64_ENV_H
|
||||
#define BASE64_ENV_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
// This header file contains macro definitions that describe certain aspects of
|
||||
// the compile-time environment. Compatibility and portability macros go here.
|
||||
|
||||
// Define machine endianness. This is for GCC:
|
||||
#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
|
||||
# define BASE64_LITTLE_ENDIAN 1
|
||||
#else
|
||||
# define BASE64_LITTLE_ENDIAN 0
|
||||
#endif
|
||||
|
||||
// This is for Clang:
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
# define BASE64_LITTLE_ENDIAN 1
|
||||
#endif
|
||||
|
||||
#ifdef __BIG_ENDIAN__
|
||||
# define BASE64_LITTLE_ENDIAN 0
|
||||
#endif
|
||||
|
||||
// MSVC++ needs intrin.h for _byteswap_uint64 (issue #68):
|
||||
#if BASE64_LITTLE_ENDIAN && defined(_MSC_VER)
|
||||
# include <intrin.h>
|
||||
#endif
|
||||
|
||||
// Endian conversion functions:
|
||||
#if BASE64_LITTLE_ENDIAN
|
||||
# ifdef _MSC_VER
|
||||
// Microsoft Visual C++:
|
||||
# define BASE64_HTOBE32(x) _byteswap_ulong(x)
|
||||
# define BASE64_HTOBE64(x) _byteswap_uint64(x)
|
||||
# else
|
||||
// GCC and Clang:
|
||||
# define BASE64_HTOBE32(x) __builtin_bswap32(x)
|
||||
# define BASE64_HTOBE64(x) __builtin_bswap64(x)
|
||||
# endif
|
||||
#else
|
||||
// No conversion needed:
|
||||
# define BASE64_HTOBE32(x) (x)
|
||||
# define BASE64_HTOBE64(x) (x)
|
||||
#endif
|
||||
|
||||
// Detect word size:
|
||||
#if defined (__x86_64__)
|
||||
// This also works for the x32 ABI, which has a 64-bit word size.
|
||||
# define BASE64_WORDSIZE 64
|
||||
#elif SIZE_MAX == UINT32_MAX
|
||||
# define BASE64_WORDSIZE 32
|
||||
#elif SIZE_MAX == UINT64_MAX
|
||||
# define BASE64_WORDSIZE 64
|
||||
#else
|
||||
# error BASE64_WORDSIZE_NOT_DEFINED
|
||||
#endif
|
||||
|
||||
// End-of-file definitions.
|
||||
// Almost end-of-file when waiting for the last '=' character:
|
||||
#define BASE64_AEOF 1
|
||||
// End-of-file when stream end has been reached or invalid input provided:
|
||||
#define BASE64_EOF 2
|
||||
|
||||
// GCC 7 defaults to issuing a warning for fallthrough in switch statements,
|
||||
// unless the fallthrough cases are marked with an attribute. As we use
|
||||
// fallthrough deliberately, define an alias for the attribute:
|
||||
#if __GNUC__ >= 7
|
||||
# define BASE64_FALLTHROUGH __attribute__((fallthrough));
|
||||
#else
|
||||
# define BASE64_FALLTHROUGH
|
||||
#endif
|
||||
|
||||
#endif // BASE64_ENV_H
|
7
deps/base64/base64/lib/exports.txt
vendored
7
deps/base64/base64/lib/exports.txt
vendored
|
@ -1,7 +0,0 @@
|
|||
base64_encode
|
||||
base64_stream_encode
|
||||
base64_stream_encode_init
|
||||
base64_stream_encode_final
|
||||
base64_decode
|
||||
base64_stream_decode
|
||||
base64_stream_decode_init
|
164
deps/base64/base64/lib/lib.c
vendored
164
deps/base64/base64/lib/lib.c
vendored
|
@ -1,164 +0,0 @@
|
|||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#ifdef _OPENMP
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
#include "../include/libbase64.h"
|
||||
#include "tables/tables.h"
|
||||
#include "codecs.h"
|
||||
#include "env.h"
|
||||
|
||||
// These static function pointers are initialized once when the library is
|
||||
// first used, and remain in use for the remaining lifetime of the program.
|
||||
// The idea being that CPU features don't change at runtime.
|
||||
static struct codec codec = { NULL, NULL };
|
||||
|
||||
void
|
||||
base64_stream_encode_init (struct base64_state *state, int flags)
|
||||
{
|
||||
// If any of the codec flags are set, redo choice:
|
||||
if (codec.enc == NULL || flags & 0xFF) {
|
||||
codec_choose(&codec, flags);
|
||||
}
|
||||
state->eof = 0;
|
||||
state->bytes = 0;
|
||||
state->carry = 0;
|
||||
state->flags = flags;
|
||||
}
|
||||
|
||||
void
|
||||
base64_stream_encode
|
||||
( struct base64_state *state
|
||||
, const char *src
|
||||
, size_t srclen
|
||||
, char *out
|
||||
, size_t *outlen
|
||||
)
|
||||
{
|
||||
codec.enc(state, src, srclen, out, outlen);
|
||||
}
|
||||
|
||||
void
|
||||
base64_stream_encode_final
|
||||
( struct base64_state *state
|
||||
, char *out
|
||||
, size_t *outlen
|
||||
)
|
||||
{
|
||||
uint8_t *o = (uint8_t *)out;
|
||||
|
||||
if (state->bytes == 1) {
|
||||
*o++ = base64_table_enc_6bit[state->carry];
|
||||
*o++ = '=';
|
||||
*o++ = '=';
|
||||
*outlen = 3;
|
||||
return;
|
||||
}
|
||||
if (state->bytes == 2) {
|
||||
*o++ = base64_table_enc_6bit[state->carry];
|
||||
*o++ = '=';
|
||||
*outlen = 2;
|
||||
return;
|
||||
}
|
||||
*outlen = 0;
|
||||
}
|
||||
|
||||
void
|
||||
base64_stream_decode_init (struct base64_state *state, int flags)
|
||||
{
|
||||
// If any of the codec flags are set, redo choice:
|
||||
if (codec.dec == NULL || flags & 0xFFFF) {
|
||||
codec_choose(&codec, flags);
|
||||
}
|
||||
state->eof = 0;
|
||||
state->bytes = 0;
|
||||
state->carry = 0;
|
||||
state->flags = flags;
|
||||
}
|
||||
|
||||
int
|
||||
base64_stream_decode
|
||||
( struct base64_state *state
|
||||
, const char *src
|
||||
, size_t srclen
|
||||
, char *out
|
||||
, size_t *outlen
|
||||
)
|
||||
{
|
||||
return codec.dec(state, src, srclen, out, outlen);
|
||||
}
|
||||
|
||||
#ifdef _OPENMP
|
||||
|
||||
// Due to the overhead of initializing OpenMP and creating a team of
|
||||
// threads, we require the data length to be larger than a threshold:
|
||||
#define OMP_THRESHOLD 20000
|
||||
|
||||
// Conditionally include OpenMP-accelerated codec implementations:
|
||||
#include "lib_openmp.c"
|
||||
#endif
|
||||
|
||||
void
|
||||
base64_encode
|
||||
( const char *src
|
||||
, size_t srclen
|
||||
, char *out
|
||||
, size_t *outlen
|
||||
, int flags
|
||||
)
|
||||
{
|
||||
size_t s;
|
||||
size_t t;
|
||||
struct base64_state state;
|
||||
|
||||
#ifdef _OPENMP
|
||||
if (srclen >= OMP_THRESHOLD) {
|
||||
base64_encode_openmp(src, srclen, out, outlen, flags);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Init the stream reader:
|
||||
base64_stream_encode_init(&state, flags);
|
||||
|
||||
// Feed the whole string to the stream reader:
|
||||
base64_stream_encode(&state, src, srclen, out, &s);
|
||||
|
||||
// Finalize the stream by writing trailer if any:
|
||||
base64_stream_encode_final(&state, out + s, &t);
|
||||
|
||||
// Final output length is stream length plus tail:
|
||||
*outlen = s + t;
|
||||
}
|
||||
|
||||
int
|
||||
base64_decode
|
||||
( const char *src
|
||||
, size_t srclen
|
||||
, char *out
|
||||
, size_t *outlen
|
||||
, int flags
|
||||
)
|
||||
{
|
||||
int ret;
|
||||
struct base64_state state;
|
||||
|
||||
#ifdef _OPENMP
|
||||
if (srclen >= OMP_THRESHOLD) {
|
||||
return base64_decode_openmp(src, srclen, out, outlen, flags);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Init the stream reader:
|
||||
base64_stream_decode_init(&state, flags);
|
||||
|
||||
// Feed the whole string to the stream reader:
|
||||
ret = base64_stream_decode(&state, src, srclen, out, outlen);
|
||||
|
||||
// If when decoding a whole block, we're still waiting for input then fail:
|
||||
if (ret && (state.bytes == 0)) {
|
||||
return ret;
|
||||
}
|
||||
return 0;
|
||||
}
|
149
deps/base64/base64/lib/lib_openmp.c
vendored
149
deps/base64/base64/lib/lib_openmp.c
vendored
|
@ -1,149 +0,0 @@
|
|||
// This code makes some assumptions on the implementation of
|
||||
// base64_stream_encode_init(), base64_stream_encode() and base64_stream_decode().
|
||||
// Basically these assumptions boil down to that when breaking the src into
|
||||
// parts, out parts can be written without side effects.
|
||||
// This is met when:
|
||||
// 1) base64_stream_encode() and base64_stream_decode() don't use globals;
|
||||
// 2) the shared variables src and out are not read or written outside of the
|
||||
// bounds of their parts, i.e. when base64_stream_encode() reads a multiple
|
||||
// of 3 bytes, it must write no more then a multiple of 4 bytes, not even
|
||||
// temporarily;
|
||||
// 3) the state flag can be discarded after base64_stream_encode() and
|
||||
// base64_stream_decode() on the parts.
|
||||
|
||||
static inline void
|
||||
base64_encode_openmp
|
||||
( const char *src
|
||||
, size_t srclen
|
||||
, char *out
|
||||
, size_t *outlen
|
||||
, int flags
|
||||
)
|
||||
{
|
||||
size_t s;
|
||||
size_t t;
|
||||
size_t sum = 0, len, last_len;
|
||||
struct base64_state state, initial_state;
|
||||
int num_threads, i;
|
||||
|
||||
// Request a number of threads but not necessarily get them:
|
||||
#pragma omp parallel
|
||||
{
|
||||
// Get the number of threads used from one thread only,
|
||||
// as num_threads is a shared var:
|
||||
#pragma omp single
|
||||
{
|
||||
num_threads = omp_get_num_threads();
|
||||
|
||||
// Split the input string into num_threads parts, each
|
||||
// part a multiple of 3 bytes. The remaining bytes will
|
||||
// be done later:
|
||||
len = srclen / (num_threads * 3);
|
||||
len *= 3;
|
||||
last_len = srclen - num_threads * len;
|
||||
|
||||
// Init the stream reader:
|
||||
base64_stream_encode_init(&state, flags);
|
||||
initial_state = state;
|
||||
}
|
||||
|
||||
// Single has an implicit barrier for all threads to wait here
|
||||
// for the above to complete:
|
||||
#pragma omp for firstprivate(state) private(s) reduction(+:sum) schedule(static,1)
|
||||
for (i = 0; i < num_threads; i++)
|
||||
{
|
||||
// Feed each part of the string to the stream reader:
|
||||
base64_stream_encode(&state, src + i * len, len, out + i * len * 4 / 3, &s);
|
||||
sum += s;
|
||||
}
|
||||
}
|
||||
|
||||
// As encoding should never fail and we encode an exact multiple
|
||||
// of 3 bytes, we can discard state:
|
||||
state = initial_state;
|
||||
|
||||
// Encode the remaining bytes:
|
||||
base64_stream_encode(&state, src + num_threads * len, last_len, out + num_threads * len * 4 / 3, &s);
|
||||
|
||||
// Finalize the stream by writing trailer if any:
|
||||
base64_stream_encode_final(&state, out + num_threads * len * 4 / 3 + s, &t);
|
||||
|
||||
// Final output length is stream length plus tail:
|
||||
sum += s + t;
|
||||
*outlen = sum;
|
||||
}
|
||||
|
||||
static inline int
|
||||
base64_decode_openmp
|
||||
( const char *src
|
||||
, size_t srclen
|
||||
, char *out
|
||||
, size_t *outlen
|
||||
, int flags
|
||||
)
|
||||
{
|
||||
int num_threads, result = 0, i;
|
||||
size_t sum = 0, len, last_len, s;
|
||||
struct base64_state state, initial_state;
|
||||
|
||||
// Request a number of threads but not necessarily get them:
|
||||
#pragma omp parallel
|
||||
{
|
||||
// Get the number of threads used from one thread only,
|
||||
// as num_threads is a shared var:
|
||||
#pragma omp single
|
||||
{
|
||||
num_threads = omp_get_num_threads();
|
||||
|
||||
// Split the input string into num_threads parts, each
|
||||
// part a multiple of 4 bytes. The remaining bytes will
|
||||
// be done later:
|
||||
len = srclen / (num_threads * 4);
|
||||
len *= 4;
|
||||
last_len = srclen - num_threads * len;
|
||||
|
||||
// Init the stream reader:
|
||||
base64_stream_decode_init(&state, flags);
|
||||
|
||||
initial_state = state;
|
||||
}
|
||||
|
||||
// Single has an implicit barrier to wait here for the above to
|
||||
// complete:
|
||||
#pragma omp for firstprivate(state) private(s) reduction(+:sum, result) schedule(static,1)
|
||||
for (i = 0; i < num_threads; i++)
|
||||
{
|
||||
int this_result;
|
||||
|
||||
// Feed each part of the string to the stream reader:
|
||||
this_result = base64_stream_decode(&state, src + i * len, len, out + i * len * 3 / 4, &s);
|
||||
sum += s;
|
||||
result += this_result;
|
||||
}
|
||||
}
|
||||
|
||||
// If `result' equals `-num_threads', then all threads returned -1,
|
||||
// indicating that the requested codec is not available:
|
||||
if (result == -num_threads) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// If `result' does not equal `num_threads', then at least one of the
|
||||
// threads hit a decode error:
|
||||
if (result != num_threads) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// So far so good, now decode whatever remains in the buffer. Reuse the
|
||||
// initial state, since we are at a 4-byte boundary:
|
||||
state = initial_state;
|
||||
result = base64_stream_decode(&state, src + num_threads * len, last_len, out + num_threads * len * 3 / 4, &s);
|
||||
sum += s;
|
||||
*outlen = sum;
|
||||
|
||||
// If when decoding a whole block, we're still waiting for input then fail:
|
||||
if (result && (state.bytes == 0)) {
|
||||
return result;
|
||||
}
|
||||
return 0;
|
||||
}
|
1
deps/base64/base64/lib/tables/.gitignore
vendored
1
deps/base64/base64/lib/tables/.gitignore
vendored
|
@ -1 +0,0 @@
|
|||
table_generator
|
17
deps/base64/base64/lib/tables/Makefile
vendored
17
deps/base64/base64/lib/tables/Makefile
vendored
|
@ -1,17 +0,0 @@
|
|||
.PHONY: all clean
|
||||
|
||||
TARGETS := table_dec_32bit.h table_enc_12bit.h table_generator
|
||||
|
||||
all: $(TARGETS)
|
||||
|
||||
clean:
|
||||
$(RM) $(TARGETS)
|
||||
|
||||
table_dec_32bit.h: table_generator
|
||||
./$^ > $@
|
||||
|
||||
table_enc_12bit.h: table_enc_12bit.py
|
||||
./$^ > $@
|
||||
|
||||
table_generator: table_generator.c
|
||||
$(CC) $(CFLAGS) -o $@ $^
|
393
deps/base64/base64/lib/tables/table_dec_32bit.h
vendored
393
deps/base64/base64/lib/tables/table_dec_32bit.h
vendored
|
@ -1,393 +0,0 @@
|
|||
#include <stdint.h>
|
||||
#define CHAR62 '+'
|
||||
#define CHAR63 '/'
|
||||
#define CHARPAD '='
|
||||
|
||||
|
||||
#if BASE64_LITTLE_ENDIAN
|
||||
|
||||
|
||||
/* SPECIAL DECODE TABLES FOR LITTLE ENDIAN (INTEL) CPUS */
|
||||
|
||||
const uint32_t base64_table_dec_32bit_d0[256] = {
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0x000000f8, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000fc,
|
||||
0x000000d0, 0x000000d4, 0x000000d8, 0x000000dc, 0x000000e0, 0x000000e4,
|
||||
0x000000e8, 0x000000ec, 0x000000f0, 0x000000f4, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000,
|
||||
0x00000004, 0x00000008, 0x0000000c, 0x00000010, 0x00000014, 0x00000018,
|
||||
0x0000001c, 0x00000020, 0x00000024, 0x00000028, 0x0000002c, 0x00000030,
|
||||
0x00000034, 0x00000038, 0x0000003c, 0x00000040, 0x00000044, 0x00000048,
|
||||
0x0000004c, 0x00000050, 0x00000054, 0x00000058, 0x0000005c, 0x00000060,
|
||||
0x00000064, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0x00000068, 0x0000006c, 0x00000070, 0x00000074, 0x00000078,
|
||||
0x0000007c, 0x00000080, 0x00000084, 0x00000088, 0x0000008c, 0x00000090,
|
||||
0x00000094, 0x00000098, 0x0000009c, 0x000000a0, 0x000000a4, 0x000000a8,
|
||||
0x000000ac, 0x000000b0, 0x000000b4, 0x000000b8, 0x000000bc, 0x000000c0,
|
||||
0x000000c4, 0x000000c8, 0x000000cc, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
|
||||
};
|
||||
|
||||
|
||||
const uint32_t base64_table_dec_32bit_d1[256] = {
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0x0000e003, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000f003,
|
||||
0x00004003, 0x00005003, 0x00006003, 0x00007003, 0x00008003, 0x00009003,
|
||||
0x0000a003, 0x0000b003, 0x0000c003, 0x0000d003, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000,
|
||||
0x00001000, 0x00002000, 0x00003000, 0x00004000, 0x00005000, 0x00006000,
|
||||
0x00007000, 0x00008000, 0x00009000, 0x0000a000, 0x0000b000, 0x0000c000,
|
||||
0x0000d000, 0x0000e000, 0x0000f000, 0x00000001, 0x00001001, 0x00002001,
|
||||
0x00003001, 0x00004001, 0x00005001, 0x00006001, 0x00007001, 0x00008001,
|
||||
0x00009001, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0x0000a001, 0x0000b001, 0x0000c001, 0x0000d001, 0x0000e001,
|
||||
0x0000f001, 0x00000002, 0x00001002, 0x00002002, 0x00003002, 0x00004002,
|
||||
0x00005002, 0x00006002, 0x00007002, 0x00008002, 0x00009002, 0x0000a002,
|
||||
0x0000b002, 0x0000c002, 0x0000d002, 0x0000e002, 0x0000f002, 0x00000003,
|
||||
0x00001003, 0x00002003, 0x00003003, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
|
||||
};
|
||||
|
||||
|
||||
const uint32_t base64_table_dec_32bit_d2[256] = {
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0x00800f00, 0xffffffff, 0xffffffff, 0xffffffff, 0x00c00f00,
|
||||
0x00000d00, 0x00400d00, 0x00800d00, 0x00c00d00, 0x00000e00, 0x00400e00,
|
||||
0x00800e00, 0x00c00e00, 0x00000f00, 0x00400f00, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000,
|
||||
0x00400000, 0x00800000, 0x00c00000, 0x00000100, 0x00400100, 0x00800100,
|
||||
0x00c00100, 0x00000200, 0x00400200, 0x00800200, 0x00c00200, 0x00000300,
|
||||
0x00400300, 0x00800300, 0x00c00300, 0x00000400, 0x00400400, 0x00800400,
|
||||
0x00c00400, 0x00000500, 0x00400500, 0x00800500, 0x00c00500, 0x00000600,
|
||||
0x00400600, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0x00800600, 0x00c00600, 0x00000700, 0x00400700, 0x00800700,
|
||||
0x00c00700, 0x00000800, 0x00400800, 0x00800800, 0x00c00800, 0x00000900,
|
||||
0x00400900, 0x00800900, 0x00c00900, 0x00000a00, 0x00400a00, 0x00800a00,
|
||||
0x00c00a00, 0x00000b00, 0x00400b00, 0x00800b00, 0x00c00b00, 0x00000c00,
|
||||
0x00400c00, 0x00800c00, 0x00c00c00, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
|
||||
};
|
||||
|
||||
|
||||
const uint32_t base64_table_dec_32bit_d3[256] = {
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0x003e0000, 0xffffffff, 0xffffffff, 0xffffffff, 0x003f0000,
|
||||
0x00340000, 0x00350000, 0x00360000, 0x00370000, 0x00380000, 0x00390000,
|
||||
0x003a0000, 0x003b0000, 0x003c0000, 0x003d0000, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000,
|
||||
0x00010000, 0x00020000, 0x00030000, 0x00040000, 0x00050000, 0x00060000,
|
||||
0x00070000, 0x00080000, 0x00090000, 0x000a0000, 0x000b0000, 0x000c0000,
|
||||
0x000d0000, 0x000e0000, 0x000f0000, 0x00100000, 0x00110000, 0x00120000,
|
||||
0x00130000, 0x00140000, 0x00150000, 0x00160000, 0x00170000, 0x00180000,
|
||||
0x00190000, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0x001a0000, 0x001b0000, 0x001c0000, 0x001d0000, 0x001e0000,
|
||||
0x001f0000, 0x00200000, 0x00210000, 0x00220000, 0x00230000, 0x00240000,
|
||||
0x00250000, 0x00260000, 0x00270000, 0x00280000, 0x00290000, 0x002a0000,
|
||||
0x002b0000, 0x002c0000, 0x002d0000, 0x002e0000, 0x002f0000, 0x00300000,
|
||||
0x00310000, 0x00320000, 0x00330000, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
|
||||
};
|
||||
|
||||
|
||||
#else
|
||||
|
||||
|
||||
/* SPECIAL DECODE TABLES FOR BIG ENDIAN (IBM/MOTOROLA/SUN) CPUS */
|
||||
|
||||
const uint32_t base64_table_dec_32bit_d0[256] = {
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xf8000000, 0xffffffff, 0xffffffff, 0xffffffff, 0xfc000000,
|
||||
0xd0000000, 0xd4000000, 0xd8000000, 0xdc000000, 0xe0000000, 0xe4000000,
|
||||
0xe8000000, 0xec000000, 0xf0000000, 0xf4000000, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000,
|
||||
0x04000000, 0x08000000, 0x0c000000, 0x10000000, 0x14000000, 0x18000000,
|
||||
0x1c000000, 0x20000000, 0x24000000, 0x28000000, 0x2c000000, 0x30000000,
|
||||
0x34000000, 0x38000000, 0x3c000000, 0x40000000, 0x44000000, 0x48000000,
|
||||
0x4c000000, 0x50000000, 0x54000000, 0x58000000, 0x5c000000, 0x60000000,
|
||||
0x64000000, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0x68000000, 0x6c000000, 0x70000000, 0x74000000, 0x78000000,
|
||||
0x7c000000, 0x80000000, 0x84000000, 0x88000000, 0x8c000000, 0x90000000,
|
||||
0x94000000, 0x98000000, 0x9c000000, 0xa0000000, 0xa4000000, 0xa8000000,
|
||||
0xac000000, 0xb0000000, 0xb4000000, 0xb8000000, 0xbc000000, 0xc0000000,
|
||||
0xc4000000, 0xc8000000, 0xcc000000, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
|
||||
};
|
||||
|
||||
|
||||
const uint32_t base64_table_dec_32bit_d1[256] = {
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0x03e00000, 0xffffffff, 0xffffffff, 0xffffffff, 0x03f00000,
|
||||
0x03400000, 0x03500000, 0x03600000, 0x03700000, 0x03800000, 0x03900000,
|
||||
0x03a00000, 0x03b00000, 0x03c00000, 0x03d00000, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000,
|
||||
0x00100000, 0x00200000, 0x00300000, 0x00400000, 0x00500000, 0x00600000,
|
||||
0x00700000, 0x00800000, 0x00900000, 0x00a00000, 0x00b00000, 0x00c00000,
|
||||
0x00d00000, 0x00e00000, 0x00f00000, 0x01000000, 0x01100000, 0x01200000,
|
||||
0x01300000, 0x01400000, 0x01500000, 0x01600000, 0x01700000, 0x01800000,
|
||||
0x01900000, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0x01a00000, 0x01b00000, 0x01c00000, 0x01d00000, 0x01e00000,
|
||||
0x01f00000, 0x02000000, 0x02100000, 0x02200000, 0x02300000, 0x02400000,
|
||||
0x02500000, 0x02600000, 0x02700000, 0x02800000, 0x02900000, 0x02a00000,
|
||||
0x02b00000, 0x02c00000, 0x02d00000, 0x02e00000, 0x02f00000, 0x03000000,
|
||||
0x03100000, 0x03200000, 0x03300000, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
|
||||
};
|
||||
|
||||
|
||||
const uint32_t base64_table_dec_32bit_d2[256] = {
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0x000f8000, 0xffffffff, 0xffffffff, 0xffffffff, 0x000fc000,
|
||||
0x000d0000, 0x000d4000, 0x000d8000, 0x000dc000, 0x000e0000, 0x000e4000,
|
||||
0x000e8000, 0x000ec000, 0x000f0000, 0x000f4000, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000,
|
||||
0x00004000, 0x00008000, 0x0000c000, 0x00010000, 0x00014000, 0x00018000,
|
||||
0x0001c000, 0x00020000, 0x00024000, 0x00028000, 0x0002c000, 0x00030000,
|
||||
0x00034000, 0x00038000, 0x0003c000, 0x00040000, 0x00044000, 0x00048000,
|
||||
0x0004c000, 0x00050000, 0x00054000, 0x00058000, 0x0005c000, 0x00060000,
|
||||
0x00064000, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0x00068000, 0x0006c000, 0x00070000, 0x00074000, 0x00078000,
|
||||
0x0007c000, 0x00080000, 0x00084000, 0x00088000, 0x0008c000, 0x00090000,
|
||||
0x00094000, 0x00098000, 0x0009c000, 0x000a0000, 0x000a4000, 0x000a8000,
|
||||
0x000ac000, 0x000b0000, 0x000b4000, 0x000b8000, 0x000bc000, 0x000c0000,
|
||||
0x000c4000, 0x000c8000, 0x000cc000, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
|
||||
};
|
||||
|
||||
|
||||
const uint32_t base64_table_dec_32bit_d3[256] = {
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0x00003e00, 0xffffffff, 0xffffffff, 0xffffffff, 0x00003f00,
|
||||
0x00003400, 0x00003500, 0x00003600, 0x00003700, 0x00003800, 0x00003900,
|
||||
0x00003a00, 0x00003b00, 0x00003c00, 0x00003d00, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000,
|
||||
0x00000100, 0x00000200, 0x00000300, 0x00000400, 0x00000500, 0x00000600,
|
||||
0x00000700, 0x00000800, 0x00000900, 0x00000a00, 0x00000b00, 0x00000c00,
|
||||
0x00000d00, 0x00000e00, 0x00000f00, 0x00001000, 0x00001100, 0x00001200,
|
||||
0x00001300, 0x00001400, 0x00001500, 0x00001600, 0x00001700, 0x00001800,
|
||||
0x00001900, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0x00001a00, 0x00001b00, 0x00001c00, 0x00001d00, 0x00001e00,
|
||||
0x00001f00, 0x00002000, 0x00002100, 0x00002200, 0x00002300, 0x00002400,
|
||||
0x00002500, 0x00002600, 0x00002700, 0x00002800, 0x00002900, 0x00002a00,
|
||||
0x00002b00, 0x00002c00, 0x00002d00, 0x00002e00, 0x00002f00, 0x00003000,
|
||||
0x00003100, 0x00003200, 0x00003300, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
|
||||
};
|
||||
|
||||
|
||||
#endif
|
1031
deps/base64/base64/lib/tables/table_enc_12bit.h
vendored
1031
deps/base64/base64/lib/tables/table_enc_12bit.h
vendored
File diff suppressed because it is too large
Load diff
45
deps/base64/base64/lib/tables/table_enc_12bit.py
vendored
45
deps/base64/base64/lib/tables/table_enc_12bit.py
vendored
|
@ -1,45 +0,0 @@
|
|||
#!/usr/bin/python3
|
||||
|
||||
def tr(x):
|
||||
"""Translate a 6-bit value to the Base64 alphabet."""
|
||||
s = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' \
|
||||
+ 'abcdefghijklmnopqrstuvwxyz' \
|
||||
+ '0123456789' \
|
||||
+ '+/'
|
||||
return ord(s[x])
|
||||
|
||||
def table(fn):
|
||||
"""Generate a 12-bit lookup table."""
|
||||
ret = []
|
||||
for n in range(0, 2**12):
|
||||
pre = "\n\t" if n % 8 == 0 else " "
|
||||
pre = "\t" if n == 0 else pre
|
||||
ret.append("{}0x{:04X}U,".format(pre, fn(n)))
|
||||
return "".join(ret)
|
||||
|
||||
def table_be():
|
||||
"""Generate a 12-bit big-endian lookup table."""
|
||||
return table(lambda n: (tr(n & 0x3F) << 0) | (tr(n >> 6) << 8))
|
||||
|
||||
def table_le():
|
||||
"""Generate a 12-bit little-endian lookup table."""
|
||||
return table(lambda n: (tr(n >> 6) << 0) | (tr(n & 0x3F) << 8))
|
||||
|
||||
def main():
|
||||
"""Entry point."""
|
||||
lines = [
|
||||
"#include <stdint.h>",
|
||||
"",
|
||||
"const uint16_t base64_table_enc_12bit[] = {",
|
||||
"#if BASE64_LITTLE_ENDIAN",
|
||||
table_le(),
|
||||
"#else",
|
||||
table_be(),
|
||||
"#endif",
|
||||
"};"
|
||||
]
|
||||
for line in lines:
|
||||
print(line)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
184
deps/base64/base64/lib/tables/table_generator.c
vendored
184
deps/base64/base64/lib/tables/table_generator.c
vendored
|
@ -1,184 +0,0 @@
|
|||
/**
|
||||
*
|
||||
* Copyright 2005, 2006 Nick Galbreath -- nickg [at] modp [dot] com
|
||||
* Copyright 2017 Matthieu Darbois
|
||||
* All rights reserved.
|
||||
*
|
||||
* http://modp.com/release/base64
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
* IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
* PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
/****************************/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
static uint8_t b64chars[64] = {
|
||||
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
|
||||
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
|
||||
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
|
||||
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
|
||||
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'
|
||||
};
|
||||
|
||||
static uint8_t padchar = '=';
|
||||
|
||||
static void printStart(void)
|
||||
{
|
||||
printf("#include <stdint.h>\n");
|
||||
printf("#define CHAR62 '%c'\n", b64chars[62]);
|
||||
printf("#define CHAR63 '%c'\n", b64chars[63]);
|
||||
printf("#define CHARPAD '%c'\n", padchar);
|
||||
}
|
||||
|
||||
static void clearDecodeTable(uint32_t* ary)
|
||||
{
|
||||
int i = 0;
|
||||
for (i = 0; i < 256; ++i) {
|
||||
ary[i] = 0xFFFFFFFF;
|
||||
}
|
||||
}
|
||||
|
||||
/* dump uint32_t as hex digits */
|
||||
void uint32_array_to_c_hex(const uint32_t* ary, size_t sz, const char* name)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
printf("const uint32_t %s[%d] = {\n", name, (int)sz);
|
||||
for (;;) {
|
||||
printf("0x%08" PRIx32, ary[i]);
|
||||
++i;
|
||||
if (i == sz)
|
||||
break;
|
||||
if (i % 6 == 0) {
|
||||
printf(",\n");
|
||||
} else {
|
||||
printf(", ");
|
||||
}
|
||||
}
|
||||
printf("\n};\n");
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
uint32_t x;
|
||||
uint32_t i = 0;
|
||||
uint32_t ary[256];
|
||||
|
||||
/* over-ride standard alphabet */
|
||||
if (argc == 2) {
|
||||
uint8_t* replacements = (uint8_t*)argv[1];
|
||||
if (strlen((char*)replacements) != 3) {
|
||||
fprintf(stderr, "input must be a string of 3 characters '-', '.' or '_'\n");
|
||||
exit(1);
|
||||
}
|
||||
fprintf(stderr, "fusing '%s' as replacements in base64 encoding\n", replacements);
|
||||
b64chars[62] = replacements[0];
|
||||
b64chars[63] = replacements[1];
|
||||
padchar = replacements[2];
|
||||
}
|
||||
|
||||
printStart();
|
||||
|
||||
printf("\n\n#if BASE64_LITTLE_ENDIAN\n");
|
||||
|
||||
printf("\n\n/* SPECIAL DECODE TABLES FOR LITTLE ENDIAN (INTEL) CPUS */\n\n");
|
||||
|
||||
clearDecodeTable(ary);
|
||||
for (i = 0; i < 64; ++i) {
|
||||
x = b64chars[i];
|
||||
ary[x] = i << 2;
|
||||
}
|
||||
uint32_array_to_c_hex(ary, sizeof(ary) / sizeof(uint32_t), "base64_table_dec_32bit_d0");
|
||||
printf("\n\n");
|
||||
|
||||
clearDecodeTable(ary);
|
||||
for (i = 0; i < 64; ++i) {
|
||||
x = b64chars[i];
|
||||
ary[x] = ((i & 0x30) >> 4) | ((i & 0x0F) << 12);
|
||||
}
|
||||
uint32_array_to_c_hex(ary, sizeof(ary) / sizeof(uint32_t), "base64_table_dec_32bit_d1");
|
||||
printf("\n\n");
|
||||
|
||||
clearDecodeTable(ary);
|
||||
for (i = 0; i < 64; ++i) {
|
||||
x = b64chars[i];
|
||||
ary[x] = ((i & 0x03) << 22) | ((i & 0x3c) << 6);
|
||||
}
|
||||
uint32_array_to_c_hex(ary, sizeof(ary) / sizeof(uint32_t), "base64_table_dec_32bit_d2");
|
||||
printf("\n\n");
|
||||
|
||||
clearDecodeTable(ary);
|
||||
for (i = 0; i < 64; ++i) {
|
||||
x = b64chars[i];
|
||||
ary[x] = i << 16;
|
||||
}
|
||||
uint32_array_to_c_hex(ary, sizeof(ary) / sizeof(uint32_t), "base64_table_dec_32bit_d3");
|
||||
printf("\n\n");
|
||||
|
||||
printf("#else\n");
|
||||
|
||||
printf("\n\n/* SPECIAL DECODE TABLES FOR BIG ENDIAN (IBM/MOTOROLA/SUN) CPUS */\n\n");
|
||||
|
||||
clearDecodeTable(ary);
|
||||
for (i = 0; i < 64; ++i) {
|
||||
x = b64chars[i];
|
||||
ary[x] = i << 26;
|
||||
}
|
||||
uint32_array_to_c_hex(ary, sizeof(ary) / sizeof(uint32_t), "base64_table_dec_32bit_d0");
|
||||
printf("\n\n");
|
||||
|
||||
clearDecodeTable(ary);
|
||||
for (i = 0; i < 64; ++i) {
|
||||
x = b64chars[i];
|
||||
ary[x] = i << 20;
|
||||
}
|
||||
uint32_array_to_c_hex(ary, sizeof(ary) / sizeof(uint32_t), "base64_table_dec_32bit_d1");
|
||||
printf("\n\n");
|
||||
|
||||
clearDecodeTable(ary);
|
||||
for (i = 0; i < 64; ++i) {
|
||||
x = b64chars[i];
|
||||
ary[x] = i << 14;
|
||||
}
|
||||
uint32_array_to_c_hex(ary, sizeof(ary) / sizeof(uint32_t), "base64_table_dec_32bit_d2");
|
||||
printf("\n\n");
|
||||
|
||||
clearDecodeTable(ary);
|
||||
for (i = 0; i < 64; ++i) {
|
||||
x = b64chars[i];
|
||||
ary[x] = i << 8;
|
||||
}
|
||||
uint32_array_to_c_hex(ary, sizeof(ary) / sizeof(uint32_t), "base64_table_dec_32bit_d3");
|
||||
printf("\n\n");
|
||||
|
||||
printf("#endif\n");
|
||||
|
||||
return 0;
|
||||
}
|
40
deps/base64/base64/lib/tables/tables.c
vendored
40
deps/base64/base64/lib/tables/tables.c
vendored
|
@ -1,40 +0,0 @@
|
|||
#include "tables.h"
|
||||
|
||||
const uint8_t
|
||||
base64_table_enc_6bit[] =
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
"abcdefghijklmnopqrstuvwxyz"
|
||||
"0123456789"
|
||||
"+/";
|
||||
|
||||
// In the lookup table below, note that the value for '=' (character 61) is
|
||||
// 254, not 255. This character is used for in-band signaling of the end of
|
||||
// the datastream, and we will use that later. The characters A-Z, a-z, 0-9
|
||||
// and + / are mapped to their "decoded" values. The other bytes all map to
|
||||
// the value 255, which flags them as "invalid input".
|
||||
|
||||
const uint8_t
|
||||
base64_table_dec_8bit[] =
|
||||
{
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 0..15
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 16..31
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 255, 255, 255, 63, // 32..47
|
||||
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, 255, 254, 255, 255, // 48..63
|
||||
255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 64..79
|
||||
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 255, 255, 255, 255, 255, // 80..95
|
||||
255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 96..111
|
||||
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 255, 255, 255, 255, 255, // 112..127
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 128..143
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
};
|
||||
|
||||
#if BASE64_WORDSIZE >= 32
|
||||
# include "table_dec_32bit.h"
|
||||
# include "table_enc_12bit.h"
|
||||
#endif
|
23
deps/base64/base64/lib/tables/tables.h
vendored
23
deps/base64/base64/lib/tables/tables.h
vendored
|
@ -1,23 +0,0 @@
|
|||
#ifndef BASE64_TABLES_H
|
||||
#define BASE64_TABLES_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "../env.h"
|
||||
|
||||
// These tables are used by all codecs for fallback plain encoding/decoding:
|
||||
extern const uint8_t base64_table_enc_6bit[];
|
||||
extern const uint8_t base64_table_dec_8bit[];
|
||||
|
||||
// These tables are used for the 32-bit and 64-bit generic decoders:
|
||||
#if BASE64_WORDSIZE >= 32
|
||||
extern const uint32_t base64_table_dec_32bit_d0[];
|
||||
extern const uint32_t base64_table_dec_32bit_d1[];
|
||||
extern const uint32_t base64_table_dec_32bit_d2[];
|
||||
extern const uint32_t base64_table_dec_32bit_d3[];
|
||||
|
||||
// This table is used by the 32 and 64-bit generic encoders:
|
||||
extern const uint16_t base64_table_enc_12bit[];
|
||||
#endif
|
||||
|
||||
#endif // BASE64_TABLES_H
|
43
deps/base64/base64/test/CMakeLists.txt
vendored
43
deps/base64/base64/test/CMakeLists.txt
vendored
|
@ -1,43 +0,0 @@
|
|||
# Written in 2016 by Henrik Steffen Gaßmann henrik@gassmann.onl
|
||||
#
|
||||
# To the extent possible under law, the author(s) have dedicated all
|
||||
# copyright and related and neighboring rights to this software to the
|
||||
# public domain worldwide. This software is distributed without any warranty.
|
||||
#
|
||||
# You should have received a copy of the CC0 Public Domain Dedication
|
||||
# along with this software. If not, see
|
||||
#
|
||||
# http://creativecommons.org/publicdomain/zero/1.0/
|
||||
#
|
||||
########################################################################
|
||||
|
||||
function(add_base64_test TEST_NAME)
|
||||
unset(SRC_FILE)
|
||||
foreach(SRC_FILE ${ARGN})
|
||||
list(APPEND SRC_FILES "${SRC_FILE}")
|
||||
endforeach()
|
||||
|
||||
add_executable(${TEST_NAME} ${SRC_FILES})
|
||||
target_link_libraries(${TEST_NAME} PRIVATE base64)
|
||||
|
||||
add_test(NAME ${TEST_NAME}
|
||||
COMMAND ${TEST_NAME}
|
||||
)
|
||||
install(TARGETS ${TEST_NAME} DESTINATION ${CMAKE_INSTALL_BINDIR})
|
||||
endfunction()
|
||||
|
||||
|
||||
add_base64_test(test_base64
|
||||
codec_supported.c
|
||||
test_base64.c
|
||||
)
|
||||
|
||||
add_base64_test(benchmark
|
||||
codec_supported.c
|
||||
benchmark.c
|
||||
)
|
||||
|
||||
if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
|
||||
target_link_libraries(benchmark PRIVATE rt)
|
||||
endif()
|
||||
|
38
deps/base64/base64/test/Makefile
vendored
38
deps/base64/base64/test/Makefile
vendored
|
@ -1,38 +0,0 @@
|
|||
CFLAGS += -std=c99 -O3 -Wall -Wextra -pedantic -DBASE64_STATIC_DEFINE
|
||||
ifdef OPENMP
|
||||
CFLAGS += -fopenmp
|
||||
endif
|
||||
|
||||
TARGET := $(shell $(CC) -dumpmachine)
|
||||
ifneq (, $(findstring darwin, $(TARGET)))
|
||||
BENCH_LDFLAGS=
|
||||
else ifneq (, $(findstring mingw, $(TARGET)))
|
||||
BENCH_LDFLAGS=
|
||||
else
|
||||
# default to linux, -lrt needed
|
||||
BENCH_LDFLAGS=-lrt
|
||||
endif
|
||||
|
||||
.PHONY: clean test valgrind
|
||||
|
||||
test: clean test_base64 benchmark
|
||||
./test_base64
|
||||
./benchmark
|
||||
|
||||
valgrind: clean test_base64
|
||||
valgrind --error-exitcode=2 ./test_base64
|
||||
|
||||
test_base64: test_base64.c codec_supported.o ../lib/libbase64.o
|
||||
$(CC) $(CFLAGS) -o $@ $^
|
||||
|
||||
benchmark: benchmark.c codec_supported.o ../lib/libbase64.o
|
||||
$(CC) $(CFLAGS) -o $@ $^ $(BENCH_LDFLAGS)
|
||||
|
||||
../%:
|
||||
make -C .. $*
|
||||
|
||||
%.o: %.c
|
||||
$(CC) $(CFLAGS) -o $@ -c $<
|
||||
|
||||
clean:
|
||||
rm -f benchmark test_base64 *.o
|
286
deps/base64/base64/test/benchmark.c
vendored
286
deps/base64/base64/test/benchmark.c
vendored
|
@ -1,286 +0,0 @@
|
|||
// For clock_gettime(2):
|
||||
#ifndef _POSIX_C_SOURCE
|
||||
#define _POSIX_C_SOURCE 199309L
|
||||
#endif
|
||||
|
||||
// For CLOCK_REALTIME on FreeBSD:
|
||||
#ifndef _XOPEN_SOURCE
|
||||
#define _XOPEN_SOURCE 600
|
||||
#endif
|
||||
|
||||
// Standard cross-platform includes.
|
||||
#include <stdbool.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
// Platform-specific includes.
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
# include <windows.h>
|
||||
# include <wincrypt.h>
|
||||
#else
|
||||
# include <sys/types.h>
|
||||
# include <sys/stat.h>
|
||||
# include <fcntl.h>
|
||||
# include <unistd.h>
|
||||
# include <time.h>
|
||||
#endif
|
||||
|
||||
#if defined(__MACH__)
|
||||
# include <mach/mach_time.h>
|
||||
#endif
|
||||
|
||||
#include "../include/libbase64.h"
|
||||
#include "codec_supported.h"
|
||||
|
||||
#define KB 1024
|
||||
#define MB (1024 * KB)
|
||||
|
||||
#define RANDOMDEV "/dev/urandom"
|
||||
|
||||
struct buffers {
|
||||
char *reg;
|
||||
char *enc;
|
||||
size_t regsz;
|
||||
size_t encsz;
|
||||
};
|
||||
|
||||
// Define buffer sizes to test with:
|
||||
static struct bufsize {
|
||||
char *label;
|
||||
size_t len;
|
||||
int repeat;
|
||||
int batch;
|
||||
}
|
||||
sizes[] = {
|
||||
{ "10 MB", MB * 10, 10, 1 },
|
||||
{ "1 MB", MB * 1, 10, 10 },
|
||||
{ "100 KB", KB * 100, 10, 100 },
|
||||
{ "10 KB", KB * 10, 100, 100 },
|
||||
{ "1 KB", KB * 1, 100, 1000 },
|
||||
};
|
||||
|
||||
static inline float
|
||||
bytes_to_mb (size_t bytes)
|
||||
{
|
||||
return bytes / (float) MB;
|
||||
}
|
||||
|
||||
static bool
|
||||
get_random_data (struct buffers *b, char **errmsg)
|
||||
{
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
HCRYPTPROV hProvider = 0;
|
||||
|
||||
if (!CryptAcquireContext(&hProvider, 0, 0, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT | CRYPT_SILENT)) {
|
||||
*errmsg = "Error: CryptAcquireContext";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!CryptGenRandom(hProvider, b->regsz, b->reg)) {
|
||||
CryptReleaseContext(hProvider, 0);
|
||||
*errmsg = "Error: CryptGenRandom";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!CryptReleaseContext(hProvider, 0)) {
|
||||
*errmsg = "Error: CryptReleaseContext";
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
#else
|
||||
int fd;
|
||||
ssize_t nread;
|
||||
size_t total_read = 0;
|
||||
|
||||
// Open random device for semi-random data:
|
||||
if ((fd = open(RANDOMDEV, O_RDONLY)) < 0) {
|
||||
*errmsg = "Cannot open " RANDOMDEV;
|
||||
return false;
|
||||
}
|
||||
|
||||
printf("Filling buffer with %.1f MB of random data...\n", bytes_to_mb(b->regsz));
|
||||
|
||||
while (total_read < b->regsz) {
|
||||
if ((nread = read(fd, b->reg + total_read, b->regsz - total_read)) < 0) {
|
||||
*errmsg = "Read error";
|
||||
close(fd);
|
||||
return false;
|
||||
}
|
||||
total_read += nread;
|
||||
}
|
||||
|
||||
close(fd);
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(__MACH__)
|
||||
typedef uint64_t base64_timespec;
|
||||
|
||||
static void
|
||||
base64_gettime (base64_timespec *t)
|
||||
{
|
||||
*t = mach_absolute_time();
|
||||
}
|
||||
|
||||
static float
|
||||
timediff_sec (base64_timespec *start, base64_timespec *end)
|
||||
{
|
||||
uint64_t diff = *end - *start;
|
||||
mach_timebase_info_data_t tb = { 0, 0 };
|
||||
mach_timebase_info(&tb);
|
||||
|
||||
return (float)((diff * tb.numer) / tb.denom) / 1e9f;
|
||||
}
|
||||
#elif defined(_WIN32) || defined(_WIN64)
|
||||
typedef ULARGE_INTEGER base64_timespec;
|
||||
|
||||
static void
|
||||
base64_gettime (base64_timespec *t)
|
||||
{
|
||||
FILETIME current_time_ft;
|
||||
|
||||
GetSystemTimePreciseAsFileTime(¤t_time_ft);
|
||||
|
||||
t->LowPart = current_time_ft.dwLowDateTime;
|
||||
t->HighPart = current_time_ft.dwHighDateTime;
|
||||
}
|
||||
|
||||
static float
|
||||
timediff_sec (base64_timespec *start, base64_timespec *end)
|
||||
{
|
||||
// Timer resolution is 100 nanoseconds (10^-7 sec).
|
||||
return (end->QuadPart - start->QuadPart) / 1e7f;
|
||||
}
|
||||
#else
|
||||
typedef struct timespec base64_timespec;
|
||||
|
||||
static void
|
||||
base64_gettime (base64_timespec *t)
|
||||
{
|
||||
clock_gettime(CLOCK_REALTIME, t);
|
||||
}
|
||||
|
||||
static float
|
||||
timediff_sec (base64_timespec *start, base64_timespec *end)
|
||||
{
|
||||
return (end->tv_sec - start->tv_sec) + (end->tv_nsec - start->tv_nsec) / 1e9f;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void
|
||||
codec_bench_enc (struct buffers *b, const struct bufsize *bs, const char *name, unsigned int flags)
|
||||
{
|
||||
float timediff, fastest = -1.0f;
|
||||
base64_timespec start, end;
|
||||
|
||||
// Reset buffer size:
|
||||
b->regsz = bs->len;
|
||||
|
||||
// Repeat benchmark a number of times for a fair test:
|
||||
for (int i = bs->repeat; i; i--) {
|
||||
|
||||
// Timing loop, use batches to increase timer resolution:
|
||||
base64_gettime(&start);
|
||||
for (int j = bs->batch; j; j--)
|
||||
base64_encode(b->reg, b->regsz, b->enc, &b->encsz, flags);
|
||||
base64_gettime(&end);
|
||||
|
||||
// Calculate average time of batch:
|
||||
timediff = timediff_sec(&start, &end) / bs->batch;
|
||||
|
||||
// Update fastest time seen:
|
||||
if (fastest < 0.0f || timediff < fastest)
|
||||
fastest = timediff;
|
||||
}
|
||||
|
||||
printf("%s\tencode\t%.02f MB/sec\n", name, bytes_to_mb(b->regsz) / fastest);
|
||||
}
|
||||
|
||||
static void
|
||||
codec_bench_dec (struct buffers *b, const struct bufsize *bs, const char *name, unsigned int flags)
|
||||
{
|
||||
float timediff, fastest = -1.0f;
|
||||
base64_timespec start, end;
|
||||
|
||||
// Reset buffer size:
|
||||
b->encsz = bs->len;
|
||||
|
||||
// Repeat benchmark a number of times for a fair test:
|
||||
for (int i = bs->repeat; i; i--) {
|
||||
|
||||
// Timing loop, use batches to increase timer resolution:
|
||||
base64_gettime(&start);
|
||||
for (int j = bs->batch; j; j--)
|
||||
base64_decode(b->enc, b->encsz, b->reg, &b->regsz, flags);
|
||||
base64_gettime(&end);
|
||||
|
||||
// Calculate average time of batch:
|
||||
timediff = timediff_sec(&start, &end) / bs->batch;
|
||||
|
||||
// Update fastest time seen:
|
||||
if (fastest < 0.0f || timediff < fastest)
|
||||
fastest = timediff;
|
||||
}
|
||||
|
||||
printf("%s\tdecode\t%.02f MB/sec\n", name, bytes_to_mb(b->encsz) / fastest);
|
||||
}
|
||||
|
||||
static void
|
||||
codec_bench (struct buffers *b, const struct bufsize *bs, const char *name, unsigned int flags)
|
||||
{
|
||||
codec_bench_enc(b, bs, name, flags);
|
||||
codec_bench_dec(b, bs, name, flags);
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
int ret = 0;
|
||||
char *errmsg = NULL;
|
||||
struct buffers b;
|
||||
|
||||
// Set buffer sizes to largest buffer length:
|
||||
b.regsz = sizes[0].len;
|
||||
b.encsz = sizes[0].len * 5 / 3;
|
||||
|
||||
// Allocate space for megabytes of random data:
|
||||
if ((b.reg = malloc(b.regsz)) == NULL) {
|
||||
errmsg = "Out of memory";
|
||||
ret = 1;
|
||||
goto err0;
|
||||
}
|
||||
|
||||
// Allocate space for encoded output:
|
||||
if ((b.enc = malloc(b.encsz)) == NULL) {
|
||||
errmsg = "Out of memory";
|
||||
ret = 1;
|
||||
goto err1;
|
||||
}
|
||||
|
||||
// Fill buffer with random data:
|
||||
if (get_random_data(&b, &errmsg) == false) {
|
||||
ret = 1;
|
||||
goto err2;
|
||||
}
|
||||
|
||||
// Loop over all buffer sizes:
|
||||
for (size_t i = 0; i < sizeof(sizes) / sizeof(sizes[0]); i++) {
|
||||
printf("Testing with buffer size %s, fastest of %d * %d\n",
|
||||
sizes[i].label, sizes[i].repeat, sizes[i].batch);
|
||||
|
||||
// Loop over all codecs:
|
||||
for (size_t j = 0; codecs[j]; j++)
|
||||
if (codec_supported(1 << j))
|
||||
codec_bench(&b, &sizes[i], codecs[j], 1 << j);
|
||||
};
|
||||
|
||||
// Free memory:
|
||||
err2: free(b.enc);
|
||||
err1: free(b.reg);
|
||||
err0: if (errmsg)
|
||||
fputs(errmsg, stderr);
|
||||
|
||||
return ret;
|
||||
}
|
37
deps/base64/base64/test/ci/analysis.sh
vendored
37
deps/base64/base64/test/ci/analysis.sh
vendored
|
@ -1,37 +0,0 @@
|
|||
#!/bin/bash
|
||||
set -ve
|
||||
|
||||
MACHINE=$(uname -m)
|
||||
export CC=gcc
|
||||
|
||||
uname -a
|
||||
clang --version # make analyse
|
||||
${CC} --version # make -C test valgrind
|
||||
|
||||
for USE_ASSEMBLY in 0 1; do
|
||||
if [ "${MACHINE}" == "x86_64" ]; then
|
||||
export SSSE3_CFLAGS="-mssse3 -DBASE64_SSSE3_USE_ASM=${USE_ASSEMBLY}"
|
||||
export SSE41_CFLAGS="-msse4.1 -DBASE64_SSE41_USE_ASM=${USE_ASSEMBLY}"
|
||||
export SSE42_CFLAGS="-msse4.2 -DBASE64_SSE42_USE_ASM=${USE_ASSEMBLY}"
|
||||
export AVX_CFLAGS="-mavx -DBASE64_AVX_USE_ASM=${USE_ASSEMBLY}"
|
||||
export AVX2_CFLAGS="-mavx2 -DBASE64_AVX2_USE_ASM=${USE_ASSEMBLY}"
|
||||
# Temporarily disable AVX512; it is not available in CI yet.
|
||||
# export AVX512_CFLAGS="-mavx512vl -mavx512vbmi"
|
||||
elif [ "${MACHINE}" == "aarch64" ]; then
|
||||
export NEON64_CFLAGS="-march=armv8-a"
|
||||
elif [ "${MACHINE}" == "armv7l" ]; then
|
||||
export NEON32_CFLAGS="-march=armv7-a -mfloat-abi=hard -mfpu=neon"
|
||||
fi
|
||||
|
||||
if [ ${USE_ASSEMBLY} -eq 0 ]; then
|
||||
echo "::group::analyze"
|
||||
make analyze
|
||||
echo "::endgroup::"
|
||||
fi
|
||||
|
||||
echo "::group::valgrind (USE_ASSEMBLY=${USE_ASSEMBLY})"
|
||||
make clean
|
||||
make
|
||||
make -C test valgrind
|
||||
echo "::endgroup::"
|
||||
done
|
30
deps/base64/base64/test/ci/test.sh
vendored
30
deps/base64/base64/test/ci/test.sh
vendored
|
@ -1,30 +0,0 @@
|
|||
#!/bin/bash
|
||||
set -ve
|
||||
|
||||
MACHINE=$(uname -m)
|
||||
if [ "${MACHINE}" == "x86_64" ]; then
|
||||
export SSSE3_CFLAGS=-mssse3
|
||||
export SSE41_CFLAGS=-msse4.1
|
||||
export SSE42_CFLAGS=-msse4.2
|
||||
export AVX_CFLAGS=-mavx
|
||||
# no AVX2 or AVX512 on GHA macOS
|
||||
if [ "$(uname -s)" != "Darwin" ]; then
|
||||
export AVX2_CFLAGS=-mavx2
|
||||
# Temporarily disable AVX512; it is not available in CI yet.
|
||||
# export AVX512_CFLAGS="-mavx512vl -mavx512vbmi"
|
||||
fi
|
||||
elif [ "${MACHINE}" == "aarch64" ]; then
|
||||
export NEON64_CFLAGS="-march=armv8-a"
|
||||
elif [ "${MACHINE}" == "armv7l" ]; then
|
||||
export NEON32_CFLAGS="-march=armv7-a -mfloat-abi=hard -mfpu=neon"
|
||||
fi
|
||||
|
||||
if [ "${OPENMP:-}" == "0" ]; then
|
||||
unset OPENMP
|
||||
fi
|
||||
|
||||
uname -a
|
||||
${CC} --version
|
||||
|
||||
make
|
||||
make -C test
|
29
deps/base64/base64/test/codec_supported.c
vendored
29
deps/base64/base64/test/codec_supported.c
vendored
|
@ -1,29 +0,0 @@
|
|||
#include <string.h>
|
||||
|
||||
#include "../include/libbase64.h"
|
||||
|
||||
static char *_codecs[] =
|
||||
{ "AVX2"
|
||||
, "NEON32"
|
||||
, "NEON64"
|
||||
, "plain"
|
||||
, "SSSE3"
|
||||
, "SSE41"
|
||||
, "SSE42"
|
||||
, "AVX"
|
||||
, "AVX512"
|
||||
, NULL
|
||||
} ;
|
||||
|
||||
char **codecs = _codecs;
|
||||
|
||||
int
|
||||
codec_supported (int flags)
|
||||
{
|
||||
// Check if given codec is supported by trying to decode a test string:
|
||||
char *a = "aGVsbG8=";
|
||||
char b[10];
|
||||
size_t outlen;
|
||||
|
||||
return (base64_decode(a, strlen(a), b, &outlen, flags) != -1);
|
||||
}
|
3
deps/base64/base64/test/codec_supported.h
vendored
3
deps/base64/base64/test/codec_supported.h
vendored
|
@ -1,3 +0,0 @@
|
|||
extern char **codecs;
|
||||
|
||||
int codec_supported (int flags);
|
41
deps/base64/base64/test/moby_dick.h
vendored
41
deps/base64/base64/test/moby_dick.h
vendored
|
@ -1,41 +0,0 @@
|
|||
static const char *moby_dick_plain =
|
||||
"Call me Ishmael. Some years ago--never mind how long precisely--having\n"
|
||||
"little or no money in my purse, and nothing particular to interest me on\n"
|
||||
"shore, I thought I would sail about a little and see the watery part of\n"
|
||||
"the world. It is a way I have of driving off the spleen and regulating\n"
|
||||
"the circulation. Whenever I find myself growing grim about the mouth;\n"
|
||||
"whenever it is a damp, drizzly November in my soul; whenever I find\n"
|
||||
"myself involuntarily pausing before coffin warehouses, and bringing up\n"
|
||||
"the rear of every funeral I meet; and especially whenever my hypos get\n"
|
||||
"such an upper hand of me, that it requires a strong moral principle to\n"
|
||||
"prevent me from deliberately stepping into the street, and methodically\n"
|
||||
"knocking people's hats off--then, I account it high time to get to sea\n"
|
||||
"as soon as I can. This is my substitute for pistol and ball. With a\n"
|
||||
"philosophical flourish Cato throws himself upon his sword; I quietly\n"
|
||||
"take to the ship. There is nothing surprising in this. If they but knew\n"
|
||||
"it, almost all men in their degree, some time or other, cherish very\n"
|
||||
"nearly the same feelings towards the ocean with me.\n";
|
||||
|
||||
static const char *moby_dick_base64 =
|
||||
"Q2FsbCBtZSBJc2htYWVsLiBTb21lIHllYXJzIGFnby0tbmV2ZXIgbWluZCBob3cgbG9uZ"
|
||||
"yBwcmVjaXNlbHktLWhhdmluZwpsaXR0bGUgb3Igbm8gbW9uZXkgaW4gbXkgcHVyc2UsIG"
|
||||
"FuZCBub3RoaW5nIHBhcnRpY3VsYXIgdG8gaW50ZXJlc3QgbWUgb24Kc2hvcmUsIEkgdGh"
|
||||
"vdWdodCBJIHdvdWxkIHNhaWwgYWJvdXQgYSBsaXR0bGUgYW5kIHNlZSB0aGUgd2F0ZXJ5"
|
||||
"IHBhcnQgb2YKdGhlIHdvcmxkLiBJdCBpcyBhIHdheSBJIGhhdmUgb2YgZHJpdmluZyBvZ"
|
||||
"mYgdGhlIHNwbGVlbiBhbmQgcmVndWxhdGluZwp0aGUgY2lyY3VsYXRpb24uIFdoZW5ldm"
|
||||
"VyIEkgZmluZCBteXNlbGYgZ3Jvd2luZyBncmltIGFib3V0IHRoZSBtb3V0aDsKd2hlbmV"
|
||||
"2ZXIgaXQgaXMgYSBkYW1wLCBkcml6emx5IE5vdmVtYmVyIGluIG15IHNvdWw7IHdoZW5l"
|
||||
"dmVyIEkgZmluZApteXNlbGYgaW52b2x1bnRhcmlseSBwYXVzaW5nIGJlZm9yZSBjb2Zma"
|
||||
"W4gd2FyZWhvdXNlcywgYW5kIGJyaW5naW5nIHVwCnRoZSByZWFyIG9mIGV2ZXJ5IGZ1bm"
|
||||
"VyYWwgSSBtZWV0OyBhbmQgZXNwZWNpYWxseSB3aGVuZXZlciBteSBoeXBvcyBnZXQKc3V"
|
||||
"jaCBhbiB1cHBlciBoYW5kIG9mIG1lLCB0aGF0IGl0IHJlcXVpcmVzIGEgc3Ryb25nIG1v"
|
||||
"cmFsIHByaW5jaXBsZSB0bwpwcmV2ZW50IG1lIGZyb20gZGVsaWJlcmF0ZWx5IHN0ZXBwa"
|
||||
"W5nIGludG8gdGhlIHN0cmVldCwgYW5kIG1ldGhvZGljYWxseQprbm9ja2luZyBwZW9wbG"
|
||||
"UncyBoYXRzIG9mZi0tdGhlbiwgSSBhY2NvdW50IGl0IGhpZ2ggdGltZSB0byBnZXQgdG8"
|
||||
"gc2VhCmFzIHNvb24gYXMgSSBjYW4uIFRoaXMgaXMgbXkgc3Vic3RpdHV0ZSBmb3IgcGlz"
|
||||
"dG9sIGFuZCBiYWxsLiBXaXRoIGEKcGhpbG9zb3BoaWNhbCBmbG91cmlzaCBDYXRvIHRoc"
|
||||
"m93cyBoaW1zZWxmIHVwb24gaGlzIHN3b3JkOyBJIHF1aWV0bHkKdGFrZSB0byB0aGUgc2"
|
||||
"hpcC4gVGhlcmUgaXMgbm90aGluZyBzdXJwcmlzaW5nIGluIHRoaXMuIElmIHRoZXkgYnV"
|
||||
"0IGtuZXcKaXQsIGFsbW9zdCBhbGwgbWVuIGluIHRoZWlyIGRlZ3JlZSwgc29tZSB0aW1l"
|
||||
"IG9yIG90aGVyLCBjaGVyaXNoIHZlcnkKbmVhcmx5IHRoZSBzYW1lIGZlZWxpbmdzIHRvd"
|
||||
"2FyZHMgdGhlIG9jZWFuIHdpdGggbWUuCg==";
|
1
deps/base64/base64/test/moby_dick_base64.txt
vendored
1
deps/base64/base64/test/moby_dick_base64.txt
vendored
|
@ -1 +0,0 @@
|
|||
Q2FsbCBtZSBJc2htYWVsLiBTb21lIHllYXJzIGFnby0tbmV2ZXIgbWluZCBob3cgbG9uZyBwcmVjaXNlbHktLWhhdmluZwpsaXR0bGUgb3Igbm8gbW9uZXkgaW4gbXkgcHVyc2UsIGFuZCBub3RoaW5nIHBhcnRpY3VsYXIgdG8gaW50ZXJlc3QgbWUgb24Kc2hvcmUsIEkgdGhvdWdodCBJIHdvdWxkIHNhaWwgYWJvdXQgYSBsaXR0bGUgYW5kIHNlZSB0aGUgd2F0ZXJ5IHBhcnQgb2YKdGhlIHdvcmxkLiBJdCBpcyBhIHdheSBJIGhhdmUgb2YgZHJpdmluZyBvZmYgdGhlIHNwbGVlbiBhbmQgcmVndWxhdGluZwp0aGUgY2lyY3VsYXRpb24uIFdoZW5ldmVyIEkgZmluZCBteXNlbGYgZ3Jvd2luZyBncmltIGFib3V0IHRoZSBtb3V0aDsKd2hlbmV2ZXIgaXQgaXMgYSBkYW1wLCBkcml6emx5IE5vdmVtYmVyIGluIG15IHNvdWw7IHdoZW5ldmVyIEkgZmluZApteXNlbGYgaW52b2x1bnRhcmlseSBwYXVzaW5nIGJlZm9yZSBjb2ZmaW4gd2FyZWhvdXNlcywgYW5kIGJyaW5naW5nIHVwCnRoZSByZWFyIG9mIGV2ZXJ5IGZ1bmVyYWwgSSBtZWV0OyBhbmQgZXNwZWNpYWxseSB3aGVuZXZlciBteSBoeXBvcyBnZXQKc3VjaCBhbiB1cHBlciBoYW5kIG9mIG1lLCB0aGF0IGl0IHJlcXVpcmVzIGEgc3Ryb25nIG1vcmFsIHByaW5jaXBsZSB0bwpwcmV2ZW50IG1lIGZyb20gZGVsaWJlcmF0ZWx5IHN0ZXBwaW5nIGludG8gdGhlIHN0cmVldCwgYW5kIG1ldGhvZGljYWxseQprbm9ja2luZyBwZW9wbGUncyBoYXRzIG9mZi0tdGhlbiwgSSBhY2NvdW50IGl0IGhpZ2ggdGltZSB0byBnZXQgdG8gc2VhCmFzIHNvb24gYXMgSSBjYW4uIFRoaXMgaXMgbXkgc3Vic3RpdHV0ZSBmb3IgcGlzdG9sIGFuZCBiYWxsLiBXaXRoIGEKcGhpbG9zb3BoaWNhbCBmbG91cmlzaCBDYXRvIHRocm93cyBoaW1zZWxmIHVwb24gaGlzIHN3b3JkOyBJIHF1aWV0bHkKdGFrZSB0byB0aGUgc2hpcC4gVGhlcmUgaXMgbm90aGluZyBzdXJwcmlzaW5nIGluIHRoaXMuIElmIHRoZXkgYnV0IGtuZXcKaXQsIGFsbW9zdCBhbGwgbWVuIGluIHRoZWlyIGRlZ3JlZSwgc29tZSB0aW1lIG9yIG90aGVyLCBjaGVyaXNoIHZlcnkKbmVhcmx5IHRoZSBzYW1lIGZlZWxpbmdzIHRvd2FyZHMgdGhlIG9jZWFuIHdpdGggbWUuCg==
|
16
deps/base64/base64/test/moby_dick_plain.txt
vendored
16
deps/base64/base64/test/moby_dick_plain.txt
vendored
|
@ -1,16 +0,0 @@
|
|||
Call me Ishmael. Some years ago--never mind how long precisely--having
|
||||
little or no money in my purse, and nothing particular to interest me on
|
||||
shore, I thought I would sail about a little and see the watery part of
|
||||
the world. It is a way I have of driving off the spleen and regulating
|
||||
the circulation. Whenever I find myself growing grim about the mouth;
|
||||
whenever it is a damp, drizzly November in my soul; whenever I find
|
||||
myself involuntarily pausing before coffin warehouses, and bringing up
|
||||
the rear of every funeral I meet; and especially whenever my hypos get
|
||||
such an upper hand of me, that it requires a strong moral principle to
|
||||
prevent me from deliberately stepping into the street, and methodically
|
||||
knocking people's hats off--then, I account it high time to get to sea
|
||||
as soon as I can. This is my substitute for pistol and ball. With a
|
||||
philosophical flourish Cato throws himself upon his sword; I quietly
|
||||
take to the ship. There is nothing surprising in this. If they but knew
|
||||
it, almost all men in their degree, some time or other, cherish very
|
||||
nearly the same feelings towards the ocean with me.
|
388
deps/base64/base64/test/test_base64.c
vendored
388
deps/base64/base64/test/test_base64.c
vendored
|
@ -1,388 +0,0 @@
|
|||
#include <stdbool.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "../include/libbase64.h"
|
||||
#include "codec_supported.h"
|
||||
#include "moby_dick.h"
|
||||
|
||||
static char out[2000];
|
||||
static size_t outlen;
|
||||
|
||||
static bool
|
||||
assert_enc (int flags, const char *src, const char *dst)
|
||||
{
|
||||
size_t srclen = strlen(src);
|
||||
size_t dstlen = strlen(dst);
|
||||
|
||||
base64_encode(src, srclen, out, &outlen, flags);
|
||||
|
||||
if (outlen != dstlen) {
|
||||
printf("FAIL: encoding of '%s': length expected %lu, got %lu\n", src,
|
||||
(unsigned long)dstlen,
|
||||
(unsigned long)outlen
|
||||
);
|
||||
return true;
|
||||
}
|
||||
if (strncmp(dst, out, outlen) != 0) {
|
||||
out[outlen] = '\0';
|
||||
printf("FAIL: encoding of '%s': expected output '%s', got '%s'\n", src, dst, out);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool
|
||||
assert_dec (int flags, const char *src, const char *dst)
|
||||
{
|
||||
size_t srclen = strlen(src);
|
||||
size_t dstlen = strlen(dst);
|
||||
|
||||
if (!base64_decode(src, srclen, out, &outlen, flags)) {
|
||||
printf("FAIL: decoding of '%s': decoding error\n", src);
|
||||
return true;
|
||||
}
|
||||
if (outlen != dstlen) {
|
||||
printf("FAIL: encoding of '%s': "
|
||||
"length expected %lu, got %lu\n", src,
|
||||
(unsigned long)dstlen,
|
||||
(unsigned long)outlen
|
||||
);
|
||||
return true;
|
||||
}
|
||||
if (strncmp(dst, out, outlen) != 0) {
|
||||
out[outlen] = '\0';
|
||||
printf("FAIL: decoding of '%s': expected output '%s', got '%s'\n", src, dst, out);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static int
|
||||
assert_roundtrip (int flags, const char *src)
|
||||
{
|
||||
char tmp[1500];
|
||||
size_t tmplen;
|
||||
size_t srclen = strlen(src);
|
||||
|
||||
// Encode the input into global buffer:
|
||||
base64_encode(src, srclen, out, &outlen, flags);
|
||||
|
||||
// Decode the global buffer into local temp buffer:
|
||||
if (!base64_decode(out, outlen, tmp, &tmplen, flags)) {
|
||||
printf("FAIL: decoding of '%s': decoding error\n", out);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check that 'src' is identical to 'tmp':
|
||||
if (srclen != tmplen) {
|
||||
printf("FAIL: roundtrip of '%s': "
|
||||
"length expected %lu, got %lu\n", src,
|
||||
(unsigned long)srclen,
|
||||
(unsigned long)tmplen
|
||||
);
|
||||
return true;
|
||||
}
|
||||
if (strncmp(src, tmp, tmplen) != 0) {
|
||||
tmp[tmplen] = '\0';
|
||||
printf("FAIL: roundtrip of '%s': got '%s'\n", src, tmp);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static int
|
||||
test_char_table (int flags, bool use_malloc)
|
||||
{
|
||||
bool fail = false;
|
||||
char chr[256];
|
||||
char enc[400], dec[400];
|
||||
size_t enclen, declen;
|
||||
|
||||
// Fill array with all characters 0..255:
|
||||
for (int i = 0; i < 256; i++)
|
||||
chr[i] = (unsigned char)i;
|
||||
|
||||
// Loop, using each char as a starting position to increase test coverage:
|
||||
for (int i = 0; i < 256; i++) {
|
||||
|
||||
size_t chrlen = 256 - i;
|
||||
char* src = &chr[i];
|
||||
if (use_malloc) {
|
||||
src = malloc(chrlen); /* malloc/copy this so valgrind can find out-of-bound access */
|
||||
if (src == NULL) {
|
||||
printf(
|
||||
"FAIL: encoding @ %d: allocation of %lu bytes failed\n",
|
||||
i, (unsigned long)chrlen
|
||||
);
|
||||
fail = true;
|
||||
continue;
|
||||
}
|
||||
memcpy(src, &chr[i], chrlen);
|
||||
}
|
||||
|
||||
base64_encode(src, chrlen, enc, &enclen, flags);
|
||||
if (use_malloc) {
|
||||
free(src);
|
||||
}
|
||||
|
||||
if (!base64_decode(enc, enclen, dec, &declen, flags)) {
|
||||
printf("FAIL: decoding @ %d: decoding error\n", i);
|
||||
fail = true;
|
||||
continue;
|
||||
}
|
||||
if (declen != chrlen) {
|
||||
printf("FAIL: roundtrip @ %d: "
|
||||
"length expected %lu, got %lu\n", i,
|
||||
(unsigned long)chrlen,
|
||||
(unsigned long)declen
|
||||
);
|
||||
fail = true;
|
||||
continue;
|
||||
}
|
||||
if (strncmp(&chr[i], dec, declen) != 0) {
|
||||
printf("FAIL: roundtrip @ %d: decoded output not same as input\n", i);
|
||||
fail = true;
|
||||
}
|
||||
}
|
||||
|
||||
return fail;
|
||||
}
|
||||
|
||||
static int
|
||||
test_streaming (int flags)
|
||||
{
|
||||
bool fail = false;
|
||||
char chr[256];
|
||||
char ref[400], enc[400];
|
||||
size_t reflen;
|
||||
struct base64_state state;
|
||||
|
||||
// Fill array with all characters 0..255:
|
||||
for (int i = 0; i < 256; i++)
|
||||
chr[i] = (unsigned char)i;
|
||||
|
||||
// Create reference base64 encoding:
|
||||
base64_encode(chr, 256, ref, &reflen, BASE64_FORCE_PLAIN);
|
||||
|
||||
// Encode the table with various block sizes and compare to reference:
|
||||
for (size_t bs = 1; bs < 255; bs++)
|
||||
{
|
||||
size_t inpos = 0;
|
||||
size_t partlen = 0;
|
||||
size_t enclen = 0;
|
||||
|
||||
base64_stream_encode_init(&state, flags);
|
||||
memset(enc, 0, 400);
|
||||
for (;;) {
|
||||
base64_stream_encode(&state, &chr[inpos], (inpos + bs > 256) ? 256 - inpos : bs, &enc[enclen], &partlen);
|
||||
enclen += partlen;
|
||||
if (inpos + bs > 256) {
|
||||
break;
|
||||
}
|
||||
inpos += bs;
|
||||
}
|
||||
base64_stream_encode_final(&state, &enc[enclen], &partlen);
|
||||
enclen += partlen;
|
||||
|
||||
if (enclen != reflen) {
|
||||
printf("FAIL: stream encoding gave incorrect size: "
|
||||
"%lu instead of %lu\n",
|
||||
(unsigned long)enclen,
|
||||
(unsigned long)reflen
|
||||
);
|
||||
fail = true;
|
||||
}
|
||||
if (strncmp(ref, enc, reflen) != 0) {
|
||||
printf("FAIL: stream encoding with blocksize %lu failed\n",
|
||||
(unsigned long)bs
|
||||
);
|
||||
fail = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Decode the reference encoding with various block sizes and
|
||||
// compare to input char table:
|
||||
for (size_t bs = 1; bs < 255; bs++)
|
||||
{
|
||||
size_t inpos = 0;
|
||||
size_t partlen = 0;
|
||||
size_t enclen = 0;
|
||||
|
||||
base64_stream_decode_init(&state, flags);
|
||||
memset(enc, 0, 400);
|
||||
while (base64_stream_decode(&state, &ref[inpos], (inpos + bs > reflen) ? reflen - inpos : bs, &enc[enclen], &partlen)) {
|
||||
enclen += partlen;
|
||||
inpos += bs;
|
||||
|
||||
// Has the entire buffer been consumed?
|
||||
if (inpos >= 400) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (enclen != 256) {
|
||||
printf("FAIL: stream decoding gave incorrect size: "
|
||||
"%lu instead of 255\n",
|
||||
(unsigned long)enclen
|
||||
);
|
||||
fail = true;
|
||||
}
|
||||
if (strncmp(chr, enc, 256) != 0) {
|
||||
printf("FAIL: stream decoding with blocksize %lu failed\n",
|
||||
(unsigned long)bs
|
||||
);
|
||||
fail = true;
|
||||
}
|
||||
}
|
||||
|
||||
return fail;
|
||||
}
|
||||
|
||||
static int
|
||||
test_invalid_dec_input (int flags)
|
||||
{
|
||||
// Subset of invalid characters to cover all ranges
|
||||
static const char invalid_set[] = { '\0', -1, '!', '-', ';', '_', '|' };
|
||||
static const char* invalid_strings[] = {
|
||||
"Zm9vYg=",
|
||||
"Zm9vYg",
|
||||
"Zm9vY",
|
||||
"Zm9vYmF=Zm9v"
|
||||
};
|
||||
|
||||
bool fail = false;
|
||||
char chr[256];
|
||||
char enc[400], dec[400];
|
||||
size_t enclen, declen;
|
||||
|
||||
// Fill array with all characters 0..255:
|
||||
for (int i = 0; i < 256; i++)
|
||||
chr[i] = (unsigned char)i;
|
||||
|
||||
// Create reference base64 encoding:
|
||||
base64_encode(chr, 256, enc, &enclen, BASE64_FORCE_PLAIN);
|
||||
|
||||
// Test invalid strings returns error.
|
||||
for (size_t i = 0U; i < sizeof(invalid_strings) / sizeof(invalid_strings[0]); ++i) {
|
||||
if (base64_decode(invalid_strings[i], strlen(invalid_strings[i]), dec, &declen, flags)) {
|
||||
printf("FAIL: decoding invalid input \"%s\": no decoding error\n", invalid_strings[i]);
|
||||
fail = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Loop, corrupting each char to increase test coverage:
|
||||
for (size_t c = 0U; c < sizeof(invalid_set); ++c) {
|
||||
for (size_t i = 0U; i < enclen; i++) {
|
||||
char backup = enc[i];
|
||||
|
||||
enc[i] = invalid_set[c];
|
||||
|
||||
if (base64_decode(enc, enclen, dec, &declen, flags)) {
|
||||
printf("FAIL: decoding invalid input @ %d: no decoding error\n", (int)i);
|
||||
fail = true;
|
||||
enc[i] = backup;
|
||||
continue;
|
||||
}
|
||||
enc[i] = backup;
|
||||
}
|
||||
}
|
||||
|
||||
// Loop, corrupting two chars to increase test coverage:
|
||||
for (size_t c = 0U; c < sizeof(invalid_set); ++c) {
|
||||
for (size_t i = 0U; i < enclen - 2U; i++) {
|
||||
char backup = enc[i+0];
|
||||
char backup2 = enc[i+2];
|
||||
|
||||
enc[i+0] = invalid_set[c];
|
||||
enc[i+2] = invalid_set[c];
|
||||
|
||||
if (base64_decode(enc, enclen, dec, &declen, flags)) {
|
||||
printf("FAIL: decoding invalid input @ %d: no decoding error\n", (int)i);
|
||||
fail = true;
|
||||
enc[i+0] = backup;
|
||||
enc[i+2] = backup2;
|
||||
continue;
|
||||
}
|
||||
enc[i+0] = backup;
|
||||
enc[i+2] = backup2;
|
||||
}
|
||||
}
|
||||
|
||||
return fail;
|
||||
}
|
||||
|
||||
static int
|
||||
test_one_codec (const char *codec, int flags)
|
||||
{
|
||||
bool fail = false;
|
||||
|
||||
printf("Codec %s:\n", codec);
|
||||
|
||||
// Skip if this codec is not supported:
|
||||
if (!codec_supported(flags)) {
|
||||
puts(" skipping");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Test vectors:
|
||||
struct {
|
||||
const char *in;
|
||||
const char *out;
|
||||
} vec[] = {
|
||||
|
||||
// These are the test vectors from RFC4648:
|
||||
{ "", "" },
|
||||
{ "f", "Zg==" },
|
||||
{ "fo", "Zm8=" },
|
||||
{ "foo", "Zm9v" },
|
||||
{ "foob", "Zm9vYg==" },
|
||||
{ "fooba", "Zm9vYmE=" },
|
||||
{ "foobar", "Zm9vYmFy" },
|
||||
|
||||
// The first paragraph from Moby Dick,
|
||||
// to test the SIMD codecs with larger blocksize:
|
||||
{ moby_dick_plain, moby_dick_base64 },
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < sizeof(vec) / sizeof(vec[0]); i++) {
|
||||
|
||||
// Encode plain string, check against output:
|
||||
fail |= assert_enc(flags, vec[i].in, vec[i].out);
|
||||
|
||||
// Decode the output string, check if we get the input:
|
||||
fail |= assert_dec(flags, vec[i].out, vec[i].in);
|
||||
|
||||
// Do a roundtrip on the inputs and the outputs:
|
||||
fail |= assert_roundtrip(flags, vec[i].in);
|
||||
fail |= assert_roundtrip(flags, vec[i].out);
|
||||
}
|
||||
|
||||
fail |= test_char_table(flags, false); /* test with unaligned input buffer */
|
||||
fail |= test_char_table(flags, true); /* test for out-of-bound input read */
|
||||
fail |= test_streaming(flags);
|
||||
fail |= test_invalid_dec_input(flags);
|
||||
|
||||
if (!fail)
|
||||
puts(" all tests passed.");
|
||||
|
||||
return fail;
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
bool fail = false;
|
||||
|
||||
// Loop over all codecs:
|
||||
for (size_t i = 0; codecs[i]; i++) {
|
||||
|
||||
// Flags to invoke this codec:
|
||||
int codec_flags = (1 << i);
|
||||
|
||||
// Test this codec, merge the results:
|
||||
fail |= test_one_codec(codecs[i], codec_flags);
|
||||
}
|
||||
|
||||
return (fail) ? 1 : 0;
|
||||
}
|
153
deps/base64/unofficial.gni
vendored
153
deps/base64/unofficial.gni
vendored
|
@ -1,153 +0,0 @@
|
|||
# This file is used by GN for building, which is NOT the build system used for
|
||||
# building official binaries.
|
||||
# Please edit the gyp files if you are making changes to build system.
|
||||
|
||||
# The actual configurations are put inside a template in unofficial.gni to
|
||||
# prevent accidental edits from contributors.
|
||||
template("base64_gn_build") {
|
||||
config("base64_external_config") {
|
||||
include_dirs = [ "base64/include" ]
|
||||
if (!is_component_build) {
|
||||
defines = [ "BASE64_STATIC_DEFINE" ]
|
||||
}
|
||||
}
|
||||
|
||||
config("base64_internal_config") {
|
||||
include_dirs = [ "base64/lib" ]
|
||||
if (is_component_build) {
|
||||
defines = [ "BASE64_EXPORTS" ]
|
||||
} else {
|
||||
defines = []
|
||||
}
|
||||
if (current_cpu == "x86" || current_cpu == "x64") {
|
||||
defines += [
|
||||
"HAVE_SSSE3=1",
|
||||
"HAVE_SSE41=1",
|
||||
"HAVE_SSE42=1",
|
||||
"HAVE_AVX=1",
|
||||
"HAVE_AVX2=1",
|
||||
"HAVE_AVX512=1",
|
||||
]
|
||||
}
|
||||
if (current_cpu == "arm") {
|
||||
defines += [ "HAVE_NEON32=1" ]
|
||||
}
|
||||
if (current_cpu == "arm64") {
|
||||
defines += [ "HAVE_NEON64=1" ]
|
||||
}
|
||||
if (is_clang || !is_win) {
|
||||
cflags_c = [
|
||||
"-Wno-implicit-fallthrough",
|
||||
"-Wno-shadow",
|
||||
"-Wno-unused-but-set-variable",
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
gypi_values = exec_script("../../tools/gypi_to_gn.py",
|
||||
[ rebase_path("base64.gyp") ],
|
||||
"scope",
|
||||
[ "base64.gyp" ])
|
||||
|
||||
component(target_name) {
|
||||
forward_variables_from(invoker, "*")
|
||||
configs += [ ":base64_internal_config" ]
|
||||
public_configs = [ ":base64_external_config" ]
|
||||
sources = gypi_values.base64_sources_common
|
||||
deps = [
|
||||
":base64_ssse3",
|
||||
":base64_sse41",
|
||||
":base64_sse42",
|
||||
":base64_avx",
|
||||
":base64_avx2",
|
||||
":base64_avx512",
|
||||
":base64_neon32",
|
||||
":base64_neon64",
|
||||
]
|
||||
}
|
||||
|
||||
source_set("base64_ssse3") {
|
||||
configs += [ ":base64_internal_config" ]
|
||||
sources = [ "base64/lib/arch/ssse3/codec.c" ]
|
||||
if (current_cpu == "x86" || current_cpu == "x64") {
|
||||
if (is_clang || !is_win) {
|
||||
cflags_c = [ "-mssse3" ]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
source_set("base64_sse41") {
|
||||
configs += [ ":base64_internal_config" ]
|
||||
sources = [ "base64/lib/arch/sse41/codec.c" ]
|
||||
if (current_cpu == "x86" || current_cpu == "x64") {
|
||||
if (is_clang || !is_win) {
|
||||
cflags_c = [ "-msse4.1" ]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
source_set("base64_sse42") {
|
||||
configs += [ ":base64_internal_config" ]
|
||||
sources = [ "base64/lib/arch/sse42/codec.c" ]
|
||||
if (current_cpu == "x86" || current_cpu == "x64") {
|
||||
if (is_clang || !is_win) {
|
||||
cflags_c = [ "-msse4.2" ]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
source_set("base64_avx") {
|
||||
configs += [ ":base64_internal_config" ]
|
||||
sources = [ "base64/lib/arch/avx/codec.c" ]
|
||||
if (current_cpu == "x86" || current_cpu == "x64") {
|
||||
if (is_clang || !is_win) {
|
||||
cflags_c = [ "-mavx" ]
|
||||
} else if (is_win) {
|
||||
cflags_c = [ "/arch:AVX" ]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
source_set("base64_avx2") {
|
||||
configs += [ ":base64_internal_config" ]
|
||||
sources = [ "base64/lib/arch/avx2/codec.c" ]
|
||||
if (current_cpu == "x86" || current_cpu == "x64") {
|
||||
if (is_clang || !is_win) {
|
||||
cflags_c = [ "-mavx2" ]
|
||||
} else if (is_win) {
|
||||
cflags_c = [ "/arch:AVX2" ]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
source_set("base64_avx512") {
|
||||
configs += [ ":base64_internal_config" ]
|
||||
sources = [ "base64/lib/arch/avx512/codec.c" ]
|
||||
if (current_cpu == "x86" || current_cpu == "x64") {
|
||||
if (is_clang || !is_win) {
|
||||
cflags_c = [
|
||||
"-mavx512vl",
|
||||
"-mavx512vbmi",
|
||||
]
|
||||
} else if (is_win) {
|
||||
cflags_c = [ "/arch:AVX512" ]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
source_set("base64_neon32") {
|
||||
configs += [ ":base64_internal_config" ]
|
||||
sources = [ "base64/lib/arch/neon32/codec.c" ]
|
||||
if (current_cpu == "arm") {
|
||||
if (is_clang || !is_win) {
|
||||
cflags_c = [ "-mfpu=neon" ]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
source_set("base64_neon64") {
|
||||
configs += [ ":base64_internal_config" ]
|
||||
sources = [ "base64/lib/arch/neon64/codec.c" ]
|
||||
# NEON is required in arm64, so no -mfpu flag is needed
|
||||
}
|
||||
}
|
|
@ -219,13 +219,6 @@ an abstract syntax tree walker for the ESTree format.
|
|||
The [ada](https://github.com/ada-url/ada) dependency is a
|
||||
fast and spec-compliant URL parser written in C++.
|
||||
|
||||
### base64
|
||||
|
||||
The [base64](https://github.com/aklomp/base64) dependency is a base64
|
||||
stream encoding/decoding library in C99 with SIMD and OpenMP acceleration.
|
||||
It also contains wrapper functions to encode/decode simple
|
||||
length-delimited strings.
|
||||
|
||||
### brotli
|
||||
|
||||
The [brotli](https://github.com/google/brotli) dependency is
|
||||
|
|
2
node.gyp
2
node.gyp
|
@ -830,7 +830,6 @@
|
|||
'<(SHARED_INTERMEDIATE_DIR)' # for node_natives.h
|
||||
],
|
||||
'dependencies': [
|
||||
'deps/base64/base64.gyp:base64',
|
||||
'deps/googletest/googletest.gyp:gtest_prod',
|
||||
'deps/histogram/histogram.gyp:histogram',
|
||||
'deps/uvwasi/uvwasi.gyp:uvwasi',
|
||||
|
@ -1148,7 +1147,6 @@
|
|||
|
||||
'dependencies': [
|
||||
'<(node_lib_target_name)',
|
||||
'deps/base64/base64.gyp:base64',
|
||||
'deps/googletest/googletest.gyp:gtest',
|
||||
'deps/googletest/googletest.gyp:gtest_main',
|
||||
'deps/histogram/histogram.gyp:histogram',
|
||||
|
|
|
@ -4,7 +4,6 @@
|
|||
#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS
|
||||
|
||||
#include "base64.h"
|
||||
#include "libbase64.h"
|
||||
#include "util.h"
|
||||
|
||||
namespace node {
|
||||
|
@ -124,68 +123,6 @@ size_t base64_decode(char* const dst, const size_t dstlen,
|
|||
return base64_decode_fast(dst, dstlen, src, srclen, decoded_size);
|
||||
}
|
||||
|
||||
|
||||
inline size_t base64_encode(const char* src,
|
||||
size_t slen,
|
||||
char* dst,
|
||||
size_t dlen,
|
||||
Base64Mode mode) {
|
||||
// We know how much we'll write, just make sure that there's space.
|
||||
CHECK(dlen >= base64_encoded_size(slen, mode) &&
|
||||
"not enough space provided for base64 encode");
|
||||
|
||||
dlen = base64_encoded_size(slen, mode);
|
||||
|
||||
if (mode == Base64Mode::NORMAL) {
|
||||
::base64_encode(src, slen, dst, &dlen, 0);
|
||||
return dlen;
|
||||
}
|
||||
|
||||
unsigned a;
|
||||
unsigned b;
|
||||
unsigned c;
|
||||
unsigned i;
|
||||
unsigned k;
|
||||
unsigned n;
|
||||
|
||||
const char* table = base64_table_url;
|
||||
|
||||
i = 0;
|
||||
k = 0;
|
||||
n = slen / 3 * 3;
|
||||
|
||||
while (i < n) {
|
||||
a = src[i + 0] & 0xff;
|
||||
b = src[i + 1] & 0xff;
|
||||
c = src[i + 2] & 0xff;
|
||||
|
||||
dst[k + 0] = table[a >> 2];
|
||||
dst[k + 1] = table[((a & 3) << 4) | (b >> 4)];
|
||||
dst[k + 2] = table[((b & 0x0f) << 2) | (c >> 6)];
|
||||
dst[k + 3] = table[c & 0x3f];
|
||||
|
||||
i += 3;
|
||||
k += 4;
|
||||
}
|
||||
|
||||
switch (slen - n) {
|
||||
case 1:
|
||||
a = src[i + 0] & 0xff;
|
||||
dst[k + 0] = table[a >> 2];
|
||||
dst[k + 1] = table[(a & 3) << 4];
|
||||
break;
|
||||
case 2:
|
||||
a = src[i + 0] & 0xff;
|
||||
b = src[i + 1] & 0xff;
|
||||
dst[k + 0] = table[a >> 2];
|
||||
dst[k + 1] = table[((a & 3) << 4) | (b >> 4)];
|
||||
dst[k + 2] = table[(b & 0x0f) << 2];
|
||||
break;
|
||||
}
|
||||
|
||||
return dlen;
|
||||
}
|
||||
|
||||
} // namespace node
|
||||
|
||||
#endif // defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS
|
||||
|
|
12
src/base64.h
12
src/base64.h
|
@ -37,14 +37,10 @@ template <typename TypeName>
|
|||
size_t base64_decoded_size(const TypeName* src, size_t size);
|
||||
|
||||
template <typename TypeName>
|
||||
size_t base64_decode(char* const dst, const size_t dstlen,
|
||||
const TypeName* const src, const size_t srclen);
|
||||
|
||||
inline size_t base64_encode(const char* src,
|
||||
size_t slen,
|
||||
char* dst,
|
||||
size_t dlen,
|
||||
Base64Mode mode = Base64Mode::NORMAL);
|
||||
size_t base64_decode(char* const dst,
|
||||
const size_t dstlen,
|
||||
const TypeName* const src,
|
||||
const size_t srclen);
|
||||
} // namespace node
|
||||
|
||||
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
#include "inspector_socket.h"
|
||||
#include "llhttp.h"
|
||||
|
||||
#include "base64-inl.h"
|
||||
#include "base64.h"
|
||||
#include "simdutf.h"
|
||||
#include "util-inl.h"
|
||||
|
||||
#include "openssl/sha.h" // Sha-1 hash
|
||||
|
@ -147,10 +148,13 @@ static void generate_accept_string(const std::string& client_key,
|
|||
static const char ws_magic[] = "258EAFA5-E914-47DA-95CA-C5AB0DC85B11";
|
||||
std::string input(client_key + ws_magic);
|
||||
char hash[SHA_DIGEST_LENGTH];
|
||||
|
||||
CHECK(ACCEPT_KEY_LENGTH >= base64_encoded_size(SHA_DIGEST_LENGTH) &&
|
||||
"not enough space provided for base64 encode");
|
||||
USE(SHA1(reinterpret_cast<const unsigned char*>(input.data()),
|
||||
input.size(),
|
||||
reinterpret_cast<unsigned char*>(hash)));
|
||||
node::base64_encode(hash, sizeof(hash), *buffer, sizeof(*buffer));
|
||||
simdutf::binary_to_base64(hash, sizeof(hash), *buffer);
|
||||
}
|
||||
|
||||
static std::string TrimPort(const std::string& host) {
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
#include "base64-inl.h"
|
||||
#include "simdutf.h"
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstring>
|
||||
|
@ -6,14 +7,16 @@
|
|||
#include "gtest/gtest.h"
|
||||
|
||||
using node::base64_decode;
|
||||
using node::base64_encode;
|
||||
|
||||
TEST(Base64Test, Encode) {
|
||||
auto test = [](const char* string, const char* base64_string) {
|
||||
const size_t len = strlen(base64_string);
|
||||
const size_t slen = strlen(string);
|
||||
char* const buffer = new char[len + 1];
|
||||
buffer[len] = 0;
|
||||
base64_encode(string, strlen(string), buffer, len);
|
||||
CHECK(len >= simdutf::base64_length_from_binary(slen) &&
|
||||
"not enough space provided for base64 encode");
|
||||
simdutf::binary_to_base64(string, slen, buffer);
|
||||
EXPECT_STREQ(base64_string, buffer);
|
||||
delete[] buffer;
|
||||
};
|
||||
|
@ -47,9 +50,13 @@ TEST(Base64Test, Encode) {
|
|||
TEST(Base64Test, EncodeURL) {
|
||||
auto test = [](const char* string, const char* base64_string) {
|
||||
const size_t len = strlen(base64_string);
|
||||
const size_t slen = strlen(string);
|
||||
char* const buffer = new char[len + 1];
|
||||
buffer[len] = 0;
|
||||
base64_encode(string, strlen(string), buffer, len, node::Base64Mode::URL);
|
||||
CHECK(len >=
|
||||
simdutf::base64_length_from_binary(slen, simdutf::base64_url) &&
|
||||
"not enough space provided for base64 encode");
|
||||
simdutf::binary_to_base64(string, slen, buffer, simdutf::base64_url);
|
||||
EXPECT_STREQ(base64_string, buffer);
|
||||
delete[] buffer;
|
||||
};
|
||||
|
|
|
@ -1,80 +0,0 @@
|
|||
#!/bin/sh
|
||||
set -e
|
||||
# Shell script to update base64 in the source tree to a specific version
|
||||
|
||||
BASE_DIR=$(cd "$(dirname "$0")/../.." && pwd)
|
||||
DEPS_DIR="$BASE_DIR/deps"
|
||||
|
||||
[ -z "$NODE" ] && NODE="$BASE_DIR/out/Release/node"
|
||||
[ -x "$NODE" ] || NODE=$(command -v node)
|
||||
|
||||
# shellcheck disable=SC1091
|
||||
. "$BASE_DIR/tools/dep_updaters/utils.sh"
|
||||
|
||||
NEW_VERSION="$("$NODE" --input-type=module <<'EOF'
|
||||
const res = await fetch('https://api.github.com/repos/aklomp/base64/releases/latest',
|
||||
process.env.GITHUB_TOKEN && {
|
||||
headers: {
|
||||
"Authorization": `Bearer ${process.env.GITHUB_TOKEN}`
|
||||
},
|
||||
});
|
||||
if (!res.ok) throw new Error(`FetchError: ${res.status} ${res.statusText}`, { cause: res });
|
||||
const { tag_name } = await res.json();
|
||||
console.log(tag_name.replace('v', ''));
|
||||
EOF
|
||||
)"
|
||||
|
||||
CURRENT_VERSION=$(grep "base64 LANGUAGES C VERSION" ./deps/base64/base64/CMakeLists.txt | sed -n "s/^.*VERSION \(.*\))/\1/p")
|
||||
|
||||
# This function exit with 0 if new version and current version are the same
|
||||
compare_dependency_version "base64" "$NEW_VERSION" "$CURRENT_VERSION"
|
||||
|
||||
echo "Making temporary workspace"
|
||||
|
||||
WORKSPACE=$(mktemp -d 2> /dev/null || mktemp -d -t 'tmp')
|
||||
|
||||
cleanup () {
|
||||
EXIT_CODE=$?
|
||||
[ -d "$WORKSPACE" ] && rm -rf "$WORKSPACE"
|
||||
exit $EXIT_CODE
|
||||
}
|
||||
|
||||
trap cleanup INT TERM EXIT
|
||||
|
||||
cd "$WORKSPACE"
|
||||
|
||||
BASE64_TARBALL="base64-v$NEW_VERSION.tar.gz"
|
||||
|
||||
echo "Fetching base64 source archive"
|
||||
curl -sL -o "$BASE64_TARBALL" "https://api.github.com/repos/aklomp/base64/tarball/v$NEW_VERSION"
|
||||
log_and_verify_sha256sum "base64" "$BASE64_TARBALL"
|
||||
gzip -dc "$BASE64_TARBALL" | tar xf -
|
||||
rm "$BASE64_TARBALL"
|
||||
mv aklomp-base64-* base64
|
||||
|
||||
echo "Replacing existing base64"
|
||||
rm -rf "$DEPS_DIR/base64/base64"
|
||||
mv "$WORKSPACE/base64" "$DEPS_DIR/base64/"
|
||||
|
||||
# Build configuration is handled by `deps/base64/base64.gyp`, but since `config.h` has to be present for the build
|
||||
# to work, we create it and leave it empty.
|
||||
echo "// Intentionally empty" > "$DEPS_DIR/base64/base64/lib/config.h"
|
||||
|
||||
# Clear out .gitignore, otherwise config.h is ignored. That's dangerous when
|
||||
# people check in our tarballs into source control and run `git clean`.
|
||||
echo "# Intentionally empty" > "$DEPS_DIR/base64/base64/.gitignore"
|
||||
|
||||
# update the base64_version.h
|
||||
cat > "$BASE_DIR/src/base64_version.h" << EOL
|
||||
// This is an auto generated file, please do not edit.
|
||||
// Refer to tools/dep_updaters/update-base64.sh
|
||||
#ifndef SRC_BASE64_VERSION_H_
|
||||
#define SRC_BASE64_VERSION_H_
|
||||
#define BASE64_VERSION "$NEW_VERSION"
|
||||
#endif // SRC_BASE64_VERSION_H_
|
||||
EOL
|
||||
|
||||
# Update the version number on maintaining-dependencies.md
|
||||
# and print the new version as the last line of the script as we need
|
||||
# to add it to $GITHUB_ENV variable
|
||||
finalize_version_update "base64" "$NEW_VERSION" "src/base64_version.h"
|
|
@ -145,6 +145,4 @@ addlicense "nghttp3" "deps/ngtcp2/nghttp3/" "$licenseText"
|
|||
licenseText="$(curl -sL https://raw.githubusercontent.com/jprichardson/node-fs-extra/b34da2762a4865b025cac06d02d6a2f1f1027b65/LICENSE)"
|
||||
addlicense "node-fs-extra" "lib/internal/fs/cp" "$licenseText"
|
||||
|
||||
addlicense "base64" "deps/base64/base64/" "$(cat "${rootdir}/deps/base64/base64/LICENSE" || true)"
|
||||
|
||||
mv "$tmplicense" "$licensefile"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue