linux/arch/arm/lib/crypto/chacha-glue.c
Herbert Xu ef93f15628 Revert "crypto: run initcalls for generic implementations earlier"
This reverts commit c4741b2305.

Crypto API self-tests no longer run at registration time and now
occur either at late_initcall or upon the first use.

Therefore the premise of the above commit no longer exists.  Revert
it and subsequent additions of subsys_initcall and arch_initcall.

Note that lib/crypto calls will stay at subsys_initcall (or rather
downgraded from arch_initcall) because they may need to occur
before Crypto API registration.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2025-05-05 18:20:44 +08:00

134 lines
3.4 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* ChaCha and HChaCha functions (ARM optimized)
*
* Copyright (C) 2016-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org>
* Copyright (C) 2015 Martin Willi
*/
#include <crypto/chacha.h>
#include <crypto/internal/simd.h>
#include <linux/jump_label.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <asm/cputype.h>
#include <asm/hwcap.h>
#include <asm/neon.h>
#include <asm/simd.h>
asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
int nrounds);
asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
int nrounds, unsigned int nbytes);
asmlinkage void hchacha_block_arm(const u32 *state, u32 *out, int nrounds);
asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes,
const u32 *state, int nrounds);
static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon);
static inline bool neon_usable(void)
{
return static_branch_likely(&use_neon) && crypto_simd_usable();
}
static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
unsigned int bytes, int nrounds)
{
u8 buf[CHACHA_BLOCK_SIZE];
while (bytes > CHACHA_BLOCK_SIZE) {
unsigned int l = min(bytes, CHACHA_BLOCK_SIZE * 4U);
chacha_4block_xor_neon(state, dst, src, nrounds, l);
bytes -= l;
src += l;
dst += l;
state[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE);
}
if (bytes) {
const u8 *s = src;
u8 *d = dst;
if (bytes != CHACHA_BLOCK_SIZE)
s = d = memcpy(buf, src, bytes);
chacha_block_xor_neon(state, d, s, nrounds);
if (d != dst)
memcpy(dst, buf, bytes);
state[12]++;
}
}
void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
{
if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) {
hchacha_block_arm(state, stream, nrounds);
} else {
kernel_neon_begin();
hchacha_block_neon(state, stream, nrounds);
kernel_neon_end();
}
}
EXPORT_SYMBOL(hchacha_block_arch);
void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
int nrounds)
{
if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable() ||
bytes <= CHACHA_BLOCK_SIZE) {
chacha_doarm(dst, src, bytes, state, nrounds);
state[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE);
return;
}
do {
unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
kernel_neon_begin();
chacha_doneon(state, dst, src, todo, nrounds);
kernel_neon_end();
bytes -= todo;
src += todo;
dst += todo;
} while (bytes);
}
EXPORT_SYMBOL(chacha_crypt_arch);
bool chacha_is_arch_optimized(void)
{
/* We always can use at least the ARM scalar implementation. */
return true;
}
EXPORT_SYMBOL(chacha_is_arch_optimized);
static int __init chacha_arm_mod_init(void)
{
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) {
switch (read_cpuid_part()) {
case ARM_CPU_PART_CORTEX_A7:
case ARM_CPU_PART_CORTEX_A5:
/*
* The Cortex-A7 and Cortex-A5 do not perform well with
* the NEON implementation but do incredibly with the
* scalar one and use less power.
*/
break;
default:
static_branch_enable(&use_neon);
}
}
return 0;
}
subsys_initcall(chacha_arm_mod_init);
static void __exit chacha_arm_mod_exit(void)
{
}
module_exit(chacha_arm_mod_exit);
MODULE_DESCRIPTION("ChaCha and HChaCha functions (ARM optimized)");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");