1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
|
// SPDX-License-Identifier: GPL-2.0
/*
* ChaCha and HChaCha functions (ARM optimized)
*
* Copyright (C) 2016-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org>
* Copyright (C) 2015 Martin Willi
*/
#include <crypto/chacha.h>
#include <crypto/internal/simd.h>
#include <linux/jump_label.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <asm/cputype.h>
#include <asm/hwcap.h>
#include <asm/neon.h>
#include <asm/simd.h>
asmlinkage void chacha_block_xor_neon(const struct chacha_state *state,
u8 *dst, const u8 *src, int nrounds);
asmlinkage void chacha_4block_xor_neon(const struct chacha_state *state,
u8 *dst, const u8 *src,
int nrounds, unsigned int nbytes);
asmlinkage void hchacha_block_arm(const struct chacha_state *state,
u32 out[HCHACHA_OUT_WORDS], int nrounds);
asmlinkage void hchacha_block_neon(const struct chacha_state *state,
u32 out[HCHACHA_OUT_WORDS], int nrounds);
asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes,
const struct chacha_state *state, int nrounds);
static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon);
static inline bool neon_usable(void)
{
return static_branch_likely(&use_neon) && crypto_simd_usable();
}
static void chacha_doneon(struct chacha_state *state, u8 *dst, const u8 *src,
unsigned int bytes, int nrounds)
{
u8 buf[CHACHA_BLOCK_SIZE];
while (bytes > CHACHA_BLOCK_SIZE) {
unsigned int l = min(bytes, CHACHA_BLOCK_SIZE * 4U);
chacha_4block_xor_neon(state, dst, src, nrounds, l);
bytes -= l;
src += l;
dst += l;
state->x[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE);
}
if (bytes) {
const u8 *s = src;
u8 *d = dst;
if (bytes != CHACHA_BLOCK_SIZE)
s = d = memcpy(buf, src, bytes);
chacha_block_xor_neon(state, d, s, nrounds);
if (d != dst)
memcpy(dst, buf, bytes);
state->x[12]++;
}
}
void hchacha_block_arch(const struct chacha_state *state,
u32 out[HCHACHA_OUT_WORDS], int nrounds)
{
if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) {
hchacha_block_arm(state, out, nrounds);
} else {
kernel_neon_begin();
hchacha_block_neon(state, out, nrounds);
kernel_neon_end();
}
}
EXPORT_SYMBOL(hchacha_block_arch);
void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src,
unsigned int bytes, int nrounds)
{
if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable() ||
bytes <= CHACHA_BLOCK_SIZE) {
chacha_doarm(dst, src, bytes, state, nrounds);
state->x[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE);
return;
}
do {
unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
kernel_neon_begin();
chacha_doneon(state, dst, src, todo, nrounds);
kernel_neon_end();
bytes -= todo;
src += todo;
dst += todo;
} while (bytes);
}
EXPORT_SYMBOL(chacha_crypt_arch);
bool chacha_is_arch_optimized(void)
{
/* We always can use at least the ARM scalar implementation. */
return true;
}
EXPORT_SYMBOL(chacha_is_arch_optimized);
static int __init chacha_arm_mod_init(void)
{
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) {
switch (read_cpuid_part()) {
case ARM_CPU_PART_CORTEX_A7:
case ARM_CPU_PART_CORTEX_A5:
/*
* The Cortex-A7 and Cortex-A5 do not perform well with
* the NEON implementation but do incredibly with the
* scalar one and use less power.
*/
break;
default:
static_branch_enable(&use_neon);
}
}
return 0;
}
subsys_initcall(chacha_arm_mod_init);
static void __exit chacha_arm_mod_exit(void)
{
}
module_exit(chacha_arm_mod_exit);
MODULE_DESCRIPTION("ChaCha and HChaCha functions (ARM optimized)");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
|