summaryrefslogtreecommitdiff
path: root/lib/crypto
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-12-02 18:53:50 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2025-12-02 18:53:50 -0800
commitf617d24606553159a271f43e36d1c71a4c317e48 (patch)
treef39957a87580dcc1835e2c1f0bfad5adeef93483 /lib/crypto
parent906003e15160642658358153e7598302d1b38166 (diff)
parent5dc8d277520be6f0be11f36712e557167b3964c8 (diff)
Merge tag 'fpsimd-on-stack-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux
Pull arm64 FPSIMD on-stack buffer updates from Eric Biggers: "This is a core arm64 change. However, I was asked to take this because most uses of kernel-mode FPSIMD are in crypto or CRC code. In v6.8, the size of task_struct on arm64 increased by 528 bytes due to the new 'kernel_fpsimd_state' field. This field was added to allow kernel-mode FPSIMD code to be preempted. Unfortunately, 528 bytes is kind of a lot for task_struct. This regression in the task_struct size was noticed and reported. Recover that space by making this state be allocated on the stack at the beginning of each kernel-mode FPSIMD section. To make it easier for all the users of kernel-mode FPSIMD to do that correctly, introduce and use a 'scoped_ksimd' abstraction" * tag 'fpsimd-on-stack-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux: (23 commits) lib/crypto: arm64: Move remaining algorithms to scoped ksimd API lib/crypto: arm/blake2b: Move to scoped ksimd API arm64/fpsimd: Allocate kernel mode FP/SIMD buffers on the stack arm64/fpu: Enforce task-context only for generic kernel mode FPU net/mlx5: Switch to more abstract scoped ksimd guard API on arm64 arm64/xorblocks: Switch to 'ksimd' scoped guard API crypto/arm64: sm4 - Switch to 'ksimd' scoped guard API crypto/arm64: sm3 - Switch to 'ksimd' scoped guard API crypto/arm64: sha3 - Switch to 'ksimd' scoped guard API crypto/arm64: polyval - Switch to 'ksimd' scoped guard API crypto/arm64: nhpoly1305 - Switch to 'ksimd' scoped guard API crypto/arm64: aes-gcm - Switch to 'ksimd' scoped guard API crypto/arm64: aes-blk - Switch to 'ksimd' scoped guard API crypto/arm64: aes-ccm - Switch to 'ksimd' scoped guard API raid6: Move to more abstract 'ksimd' guard API crypto: aegis128-neon - Move to more abstract 'ksimd' guard API crypto/arm64: sm4-ce-gcm - Avoid pointless yield of the NEON unit crypto/arm64: sm4-ce-ccm - Avoid pointless yield of the NEON unit crypto/arm64: aes-ce-ccm - Avoid pointless yield of the NEON unit lib/crc: Switch ARM and arm64 to 'ksimd' scoped guard API ...
Diffstat (limited to 'lib/crypto')
-rw-r--r--lib/crypto/arm/blake2b.h5
-rw-r--r--lib/crypto/arm/chacha.h11
-rw-r--r--lib/crypto/arm/curve25519.h5
-rw-r--r--lib/crypto/arm/poly1305.h6
-rw-r--r--lib/crypto/arm/sha1.h13
-rw-r--r--lib/crypto/arm/sha256.h12
-rw-r--r--lib/crypto/arm/sha512.h5
-rw-r--r--lib/crypto/arm64/chacha.h11
-rw-r--r--lib/crypto/arm64/poly1305.h6
-rw-r--r--lib/crypto/arm64/polyval.h24
-rw-r--r--lib/crypto/arm64/sha1.h7
-rw-r--r--lib/crypto/arm64/sha256.h19
-rw-r--r--lib/crypto/arm64/sha3.h13
-rw-r--r--lib/crypto/arm64/sha512.h8
14 files changed, 61 insertions, 84 deletions
diff --git a/lib/crypto/arm/blake2b.h b/lib/crypto/arm/blake2b.h
index 1b9154d119db..5c76498521e6 100644
--- a/lib/crypto/arm/blake2b.h
+++ b/lib/crypto/arm/blake2b.h
@@ -24,9 +24,8 @@ static void blake2b_compress(struct blake2b_ctx *ctx,
const size_t blocks = min_t(size_t, nblocks,
SZ_4K / BLAKE2B_BLOCK_SIZE);
- kernel_neon_begin();
- blake2b_compress_neon(ctx, data, blocks, inc);
- kernel_neon_end();
+ scoped_ksimd()
+ blake2b_compress_neon(ctx, data, blocks, inc);
data += blocks * BLAKE2B_BLOCK_SIZE;
nblocks -= blocks;
diff --git a/lib/crypto/arm/chacha.h b/lib/crypto/arm/chacha.h
index 0cae30f8ee5d..836e49088e98 100644
--- a/lib/crypto/arm/chacha.h
+++ b/lib/crypto/arm/chacha.h
@@ -12,7 +12,6 @@
#include <asm/cputype.h>
#include <asm/hwcap.h>
-#include <asm/neon.h>
#include <asm/simd.h>
asmlinkage void chacha_block_xor_neon(const struct chacha_state *state,
@@ -68,9 +67,8 @@ static void hchacha_block_arch(const struct chacha_state *state,
if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) {
hchacha_block_arm(state, out, nrounds);
} else {
- kernel_neon_begin();
- hchacha_block_neon(state, out, nrounds);
- kernel_neon_end();
+ scoped_ksimd()
+ hchacha_block_neon(state, out, nrounds);
}
}
@@ -87,9 +85,8 @@ static void chacha_crypt_arch(struct chacha_state *state, u8 *dst,
do {
unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
- kernel_neon_begin();
- chacha_doneon(state, dst, src, todo, nrounds);
- kernel_neon_end();
+ scoped_ksimd()
+ chacha_doneon(state, dst, src, todo, nrounds);
bytes -= todo;
src += todo;
diff --git a/lib/crypto/arm/curve25519.h b/lib/crypto/arm/curve25519.h
index f6d66494eb8f..b1a566885e95 100644
--- a/lib/crypto/arm/curve25519.h
+++ b/lib/crypto/arm/curve25519.h
@@ -25,9 +25,8 @@ static void curve25519_arch(u8 out[CURVE25519_KEY_SIZE],
const u8 point[CURVE25519_KEY_SIZE])
{
if (static_branch_likely(&have_neon) && crypto_simd_usable()) {
- kernel_neon_begin();
- curve25519_neon(out, scalar, point);
- kernel_neon_end();
+ scoped_ksimd()
+ curve25519_neon(out, scalar, point);
} else {
curve25519_generic(out, scalar, point);
}
diff --git a/lib/crypto/arm/poly1305.h b/lib/crypto/arm/poly1305.h
index 0021cf368307..0fe903d8de55 100644
--- a/lib/crypto/arm/poly1305.h
+++ b/lib/crypto/arm/poly1305.h
@@ -6,7 +6,6 @@
*/
#include <asm/hwcap.h>
-#include <asm/neon.h>
#include <asm/simd.h>
#include <linux/cpufeature.h>
#include <linux/jump_label.h>
@@ -32,9 +31,8 @@ static void poly1305_blocks(struct poly1305_block_state *state, const u8 *src,
do {
unsigned int todo = min_t(unsigned int, len, SZ_4K);
- kernel_neon_begin();
- poly1305_blocks_neon(state, src, todo, padbit);
- kernel_neon_end();
+ scoped_ksimd()
+ poly1305_blocks_neon(state, src, todo, padbit);
len -= todo;
src += todo;
diff --git a/lib/crypto/arm/sha1.h b/lib/crypto/arm/sha1.h
index 29f8bcad0447..3e2d8c7cab9f 100644
--- a/lib/crypto/arm/sha1.h
+++ b/lib/crypto/arm/sha1.h
@@ -4,7 +4,6 @@
*
* Copyright 2025 Google LLC
*/
-#include <asm/neon.h>
#include <asm/simd.h>
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
@@ -22,12 +21,12 @@ static void sha1_blocks(struct sha1_block_state *state,
{
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
static_branch_likely(&have_neon) && likely(may_use_simd())) {
- kernel_neon_begin();
- if (static_branch_likely(&have_ce))
- sha1_ce_transform(state, data, nblocks);
- else
- sha1_transform_neon(state, data, nblocks);
- kernel_neon_end();
+ scoped_ksimd() {
+ if (static_branch_likely(&have_ce))
+ sha1_ce_transform(state, data, nblocks);
+ else
+ sha1_transform_neon(state, data, nblocks);
+ }
} else {
sha1_block_data_order(state, data, nblocks);
}
diff --git a/lib/crypto/arm/sha256.h b/lib/crypto/arm/sha256.h
index 7556457b3094..ae7e52dd6e3b 100644
--- a/lib/crypto/arm/sha256.h
+++ b/lib/crypto/arm/sha256.h
@@ -22,12 +22,12 @@ static void sha256_blocks(struct sha256_block_state *state,
{
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
static_branch_likely(&have_neon) && likely(may_use_simd())) {
- kernel_neon_begin();
- if (static_branch_likely(&have_ce))
- sha256_ce_transform(state, data, nblocks);
- else
- sha256_block_data_order_neon(state, data, nblocks);
- kernel_neon_end();
+ scoped_ksimd() {
+ if (static_branch_likely(&have_ce))
+ sha256_ce_transform(state, data, nblocks);
+ else
+ sha256_block_data_order_neon(state, data, nblocks);
+ }
} else {
sha256_block_data_order(state, data, nblocks);
}
diff --git a/lib/crypto/arm/sha512.h b/lib/crypto/arm/sha512.h
index d1b485dd275d..ed9bd81d6d78 100644
--- a/lib/crypto/arm/sha512.h
+++ b/lib/crypto/arm/sha512.h
@@ -19,9 +19,8 @@ static void sha512_blocks(struct sha512_block_state *state,
{
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
static_branch_likely(&have_neon) && likely(may_use_simd())) {
- kernel_neon_begin();
- sha512_block_data_order_neon(state, data, nblocks);
- kernel_neon_end();
+ scoped_ksimd()
+ sha512_block_data_order_neon(state, data, nblocks);
} else {
sha512_block_data_order(state, data, nblocks);
}
diff --git a/lib/crypto/arm64/chacha.h b/lib/crypto/arm64/chacha.h
index ba6c22d46086..ca8c6a8b0578 100644
--- a/lib/crypto/arm64/chacha.h
+++ b/lib/crypto/arm64/chacha.h
@@ -23,7 +23,6 @@
#include <linux/kernel.h>
#include <asm/hwcap.h>
-#include <asm/neon.h>
#include <asm/simd.h>
asmlinkage void chacha_block_xor_neon(const struct chacha_state *state,
@@ -65,9 +64,8 @@ static void hchacha_block_arch(const struct chacha_state *state,
if (!static_branch_likely(&have_neon) || !crypto_simd_usable()) {
hchacha_block_generic(state, out, nrounds);
} else {
- kernel_neon_begin();
- hchacha_block_neon(state, out, nrounds);
- kernel_neon_end();
+ scoped_ksimd()
+ hchacha_block_neon(state, out, nrounds);
}
}
@@ -81,9 +79,8 @@ static void chacha_crypt_arch(struct chacha_state *state, u8 *dst,
do {
unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
- kernel_neon_begin();
- chacha_doneon(state, dst, src, todo, nrounds);
- kernel_neon_end();
+ scoped_ksimd()
+ chacha_doneon(state, dst, src, todo, nrounds);
bytes -= todo;
src += todo;
diff --git a/lib/crypto/arm64/poly1305.h b/lib/crypto/arm64/poly1305.h
index aed5921ccd9a..b77669767cd6 100644
--- a/lib/crypto/arm64/poly1305.h
+++ b/lib/crypto/arm64/poly1305.h
@@ -6,7 +6,6 @@
*/
#include <asm/hwcap.h>
-#include <asm/neon.h>
#include <asm/simd.h>
#include <linux/cpufeature.h>
#include <linux/jump_label.h>
@@ -31,9 +30,8 @@ static void poly1305_blocks(struct poly1305_block_state *state, const u8 *src,
do {
unsigned int todo = min_t(unsigned int, len, SZ_4K);
- kernel_neon_begin();
- poly1305_blocks_neon(state, src, todo, padbit);
- kernel_neon_end();
+ scoped_ksimd()
+ poly1305_blocks_neon(state, src, todo, padbit);
len -= todo;
src += todo;
diff --git a/lib/crypto/arm64/polyval.h b/lib/crypto/arm64/polyval.h
index 2486e80750d0..a39763395e9b 100644
--- a/lib/crypto/arm64/polyval.h
+++ b/lib/crypto/arm64/polyval.h
@@ -4,7 +4,6 @@
*
* Copyright 2025 Google LLC
*/
-#include <asm/neon.h>
#include <asm/simd.h>
#include <linux/cpufeature.h>
@@ -24,13 +23,14 @@ static void polyval_preparekey_arch(struct polyval_key *key,
static_assert(ARRAY_SIZE(key->h_powers) == NUM_H_POWERS);
memcpy(&key->h_powers[NUM_H_POWERS - 1], raw_key, POLYVAL_BLOCK_SIZE);
if (static_branch_likely(&have_pmull) && may_use_simd()) {
- kernel_neon_begin();
- for (int i = NUM_H_POWERS - 2; i >= 0; i--) {
- key->h_powers[i] = key->h_powers[i + 1];
- polyval_mul_pmull(&key->h_powers[i],
- &key->h_powers[NUM_H_POWERS - 1]);
+ scoped_ksimd() {
+ for (int i = NUM_H_POWERS - 2; i >= 0; i--) {
+ key->h_powers[i] = key->h_powers[i + 1];
+ polyval_mul_pmull(
+ &key->h_powers[i],
+ &key->h_powers[NUM_H_POWERS - 1]);
+ }
}
- kernel_neon_end();
} else {
for (int i = NUM_H_POWERS - 2; i >= 0; i--) {
key->h_powers[i] = key->h_powers[i + 1];
@@ -44,9 +44,8 @@ static void polyval_mul_arch(struct polyval_elem *acc,
const struct polyval_key *key)
{
if (static_branch_likely(&have_pmull) && may_use_simd()) {
- kernel_neon_begin();
- polyval_mul_pmull(acc, &key->h_powers[NUM_H_POWERS - 1]);
- kernel_neon_end();
+ scoped_ksimd()
+ polyval_mul_pmull(acc, &key->h_powers[NUM_H_POWERS - 1]);
} else {
polyval_mul_generic(acc, &key->h_powers[NUM_H_POWERS - 1]);
}
@@ -62,9 +61,8 @@ static void polyval_blocks_arch(struct polyval_elem *acc,
size_t n = min_t(size_t, nblocks,
4096 / POLYVAL_BLOCK_SIZE);
- kernel_neon_begin();
- polyval_blocks_pmull(acc, key, data, n);
- kernel_neon_end();
+ scoped_ksimd()
+ polyval_blocks_pmull(acc, key, data, n);
data += n * POLYVAL_BLOCK_SIZE;
nblocks -= n;
} while (nblocks);
diff --git a/lib/crypto/arm64/sha1.h b/lib/crypto/arm64/sha1.h
index aaef4ebfc5e3..bc7071f1be09 100644
--- a/lib/crypto/arm64/sha1.h
+++ b/lib/crypto/arm64/sha1.h
@@ -4,7 +4,6 @@
*
* Copyright 2025 Google LLC
*/
-#include <asm/neon.h>
#include <asm/simd.h>
#include <linux/cpufeature.h>
@@ -20,9 +19,9 @@ static void sha1_blocks(struct sha1_block_state *state,
do {
size_t rem;
- kernel_neon_begin();
- rem = __sha1_ce_transform(state, data, nblocks);
- kernel_neon_end();
+ scoped_ksimd()
+ rem = __sha1_ce_transform(state, data, nblocks);
+
data += (nblocks - rem) * SHA1_BLOCK_SIZE;
nblocks = rem;
} while (nblocks);
diff --git a/lib/crypto/arm64/sha256.h b/lib/crypto/arm64/sha256.h
index 80d06df27d3a..568dff0f276a 100644
--- a/lib/crypto/arm64/sha256.h
+++ b/lib/crypto/arm64/sha256.h
@@ -4,7 +4,6 @@
*
* Copyright 2025 Google LLC
*/
-#include <asm/neon.h>
#include <asm/simd.h>
#include <linux/cpufeature.h>
@@ -27,17 +26,16 @@ static void sha256_blocks(struct sha256_block_state *state,
do {
size_t rem;
- kernel_neon_begin();
- rem = __sha256_ce_transform(state,
- data, nblocks);
- kernel_neon_end();
+ scoped_ksimd()
+ rem = __sha256_ce_transform(state, data,
+ nblocks);
+
data += (nblocks - rem) * SHA256_BLOCK_SIZE;
nblocks = rem;
} while (nblocks);
} else {
- kernel_neon_begin();
- sha256_block_neon(state, data, nblocks);
- kernel_neon_end();
+ scoped_ksimd()
+ sha256_block_neon(state, data, nblocks);
}
} else {
sha256_block_data_order(state, data, nblocks);
@@ -66,9 +64,8 @@ static bool sha256_finup_2x_arch(const struct __sha256_ctx *ctx,
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
static_branch_likely(&have_ce) && len >= SHA256_BLOCK_SIZE &&
len <= 65536 && likely(may_use_simd())) {
- kernel_neon_begin();
- sha256_ce_finup2x(ctx, data1, data2, len, out1, out2);
- kernel_neon_end();
+ scoped_ksimd()
+ sha256_ce_finup2x(ctx, data1, data2, len, out1, out2);
kmsan_unpoison_memory(out1, SHA256_DIGEST_SIZE);
kmsan_unpoison_memory(out2, SHA256_DIGEST_SIZE);
return true;
diff --git a/lib/crypto/arm64/sha3.h b/lib/crypto/arm64/sha3.h
index 6dd5183056da..b602f1b3b282 100644
--- a/lib/crypto/arm64/sha3.h
+++ b/lib/crypto/arm64/sha3.h
@@ -7,7 +7,6 @@
* published by the Free Software Foundation.
*/
-#include <asm/neon.h>
#include <asm/simd.h>
#include <linux/cpufeature.h>
@@ -23,10 +22,9 @@ static void sha3_absorb_blocks(struct sha3_state *state, const u8 *data,
do {
size_t rem;
- kernel_neon_begin();
- rem = sha3_ce_transform(state, data, nblocks,
- block_size);
- kernel_neon_end();
+ scoped_ksimd()
+ rem = sha3_ce_transform(state, data, nblocks,
+ block_size);
data += (nblocks - rem) * block_size;
nblocks = rem;
} while (nblocks);
@@ -46,9 +44,8 @@ static void sha3_keccakf(struct sha3_state *state)
*/
static const u8 zeroes[SHA3_512_BLOCK_SIZE];
- kernel_neon_begin();
- sha3_ce_transform(state, zeroes, 1, sizeof(zeroes));
- kernel_neon_end();
+ scoped_ksimd()
+ sha3_ce_transform(state, zeroes, 1, sizeof(zeroes));
} else {
sha3_keccakf_generic(state);
}
diff --git a/lib/crypto/arm64/sha512.h b/lib/crypto/arm64/sha512.h
index ddb0d256f73a..7eb7ef04d268 100644
--- a/lib/crypto/arm64/sha512.h
+++ b/lib/crypto/arm64/sha512.h
@@ -4,7 +4,7 @@
*
* Copyright 2025 Google LLC
*/
-#include <asm/neon.h>
+
#include <asm/simd.h>
#include <linux/cpufeature.h>
@@ -24,9 +24,9 @@ static void sha512_blocks(struct sha512_block_state *state,
do {
size_t rem;
- kernel_neon_begin();
- rem = __sha512_ce_transform(state, data, nblocks);
- kernel_neon_end();
+ scoped_ksimd()
+ rem = __sha512_ce_transform(state, data, nblocks);
+
data += (nblocks - rem) * SHA512_BLOCK_SIZE;
nblocks = rem;
} while (nblocks);