diff options
| author | Peter Zijlstra <peterz@infradead.org> | 2025-11-21 11:21:20 +0100 |
|---|---|---|
| committer | Peter Zijlstra <peterz@infradead.org> | 2025-11-21 11:21:20 +0100 |
| commit | 2ace52718376fdb56aca863da2eebe70d7e2ddb1 (patch) | |
| tree | 494517cf79608fa93840a3c84ee5e6118da5f26f /lib/crypto/x86/blake2s.h | |
| parent | a53d0cf7f1cb3182ad533ff5cacfa5fd29c419ad (diff) | |
| parent | 11991999a20145b7f8af21202d0cac6b1f90a6e4 (diff) | |
Merge branch 'objtool/core'
Bring in the UDB and objtool data annotations to avoid conflicts while further extending the bug exceptions.
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Diffstat (limited to 'lib/crypto/x86/blake2s.h')
| -rw-r--r-- | lib/crypto/x86/blake2s.h | 64 |
1 files changed, 64 insertions, 0 deletions
diff --git a/lib/crypto/x86/blake2s.h b/lib/crypto/x86/blake2s.h new file mode 100644 index 000000000000..b6d30d2fa045 --- /dev/null +++ b/lib/crypto/x86/blake2s.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include <asm/cpufeature.h> +#include <asm/fpu/api.h> +#include <asm/processor.h> +#include <asm/simd.h> +#include <linux/jump_label.h> +#include <linux/kernel.h> +#include <linux/sizes.h> + +asmlinkage void blake2s_compress_ssse3(struct blake2s_state *state, + const u8 *block, const size_t nblocks, + const u32 inc); +asmlinkage void blake2s_compress_avx512(struct blake2s_state *state, + const u8 *block, const size_t nblocks, + const u32 inc); + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_ssse3); +static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_avx512); + +static void blake2s_compress(struct blake2s_state *state, const u8 *block, + size_t nblocks, const u32 inc) +{ + /* SIMD disables preemption, so relax after processing each page. */ + BUILD_BUG_ON(SZ_4K / BLAKE2S_BLOCK_SIZE < 8); + + if (!static_branch_likely(&blake2s_use_ssse3) || !may_use_simd()) { + blake2s_compress_generic(state, block, nblocks, inc); + return; + } + + do { + const size_t blocks = min_t(size_t, nblocks, + SZ_4K / BLAKE2S_BLOCK_SIZE); + + kernel_fpu_begin(); + if (static_branch_likely(&blake2s_use_avx512)) + blake2s_compress_avx512(state, block, blocks, inc); + else + blake2s_compress_ssse3(state, block, blocks, inc); + kernel_fpu_end(); + + nblocks -= blocks; + block += blocks * BLAKE2S_BLOCK_SIZE; + } while (nblocks); +} + +#define blake2s_mod_init_arch blake2s_mod_init_arch +static void blake2s_mod_init_arch(void) +{ + if (boot_cpu_has(X86_FEATURE_SSSE3)) + static_branch_enable(&blake2s_use_ssse3); + + if (boot_cpu_has(X86_FEATURE_AVX) && + boot_cpu_has(X86_FEATURE_AVX2) && + boot_cpu_has(X86_FEATURE_AVX512F) && + boot_cpu_has(X86_FEATURE_AVX512VL) && + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | + XFEATURE_MASK_AVX512, NULL)) + static_branch_enable(&blake2s_use_avx512); +} |