diff options
| author | Eric Biggers <ebiggers@kernel.org> | 2025-10-25 22:50:26 -0700 |
|---|---|---|
| committer | Eric Biggers <ebiggers@kernel.org> | 2025-11-05 20:02:35 -0800 |
| commit | 1e29a750572a25200fcea995d91e5f6448f340c0 (patch) | |
| tree | d19698b26f79f9af8bc3bb8ef8d655681650e9cc /arch/arm64 | |
| parent | be755eb2b021495e1935813abaf9e1f1de1d8c78 (diff) | |
lib/crypto: arm64/sha3: Migrate optimized code into library
Instead of exposing the arm64-optimized SHA-3 code via arm64-specific
crypto_shash algorithms, instead just implement the sha3_absorb_blocks()
and sha3_keccakf() library functions. This is much simpler, it makes
the SHA-3 library functions be arm64-optimized, and it fixes the
longstanding issue where the arm64-optimized SHA-3 code was disabled by
default. SHA-3 still remains available through crypto_shash, but
individual architectures no longer need to handle it.
Note: to see the diff from arch/arm64/crypto/sha3-ce-glue.c to
lib/crypto/arm64/sha3.h, view this commit with 'git show -M10'.
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20251026055032.1413733-10-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
Diffstat (limited to 'arch/arm64')
| -rw-r--r-- | arch/arm64/configs/defconfig | 2 | ||||
| -rw-r--r-- | arch/arm64/crypto/Kconfig | 11 | ||||
| -rw-r--r-- | arch/arm64/crypto/Makefile | 3 | ||||
| -rw-r--r-- | arch/arm64/crypto/sha3-ce-core.S | 213 | ||||
| -rw-r--r-- | arch/arm64/crypto/sha3-ce-glue.c | 150 |
5 files changed, 1 insertions, 378 deletions
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index e3a2d37bd104..20dd3a39faea 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -1783,10 +1783,10 @@ CONFIG_CRYPTO_CHACHA20=m CONFIG_CRYPTO_BENCHMARK=m CONFIG_CRYPTO_ECHAINIV=y CONFIG_CRYPTO_MICHAEL_MIC=m +CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_ANSI_CPRNG=y CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_GHASH_ARM64_CE=y -CONFIG_CRYPTO_SHA3_ARM64=m CONFIG_CRYPTO_SM3_ARM64_CE=m CONFIG_CRYPTO_AES_ARM64_CE_BLK=y CONFIG_CRYPTO_AES_ARM64_BS=m diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 91f3093eee6a..376d6b50743f 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -25,17 +25,6 @@ config CRYPTO_NHPOLY1305_NEON Architecture: arm64 using: - NEON (Advanced SIMD) extensions -config CRYPTO_SHA3_ARM64 - tristate "Hash functions: SHA-3 (ARMv8.2 Crypto Extensions)" - depends on KERNEL_MODE_NEON - select CRYPTO_HASH - select CRYPTO_SHA3 - help - SHA-3 secure hash algorithms (FIPS 202) - - Architecture: arm64 using: - - ARMv8.2 Crypto Extensions - config CRYPTO_SM3_NEON tristate "Hash functions: SM3 (NEON)" depends on KERNEL_MODE_NEON diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index a8b2cdbe202c..fd3d590fa113 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -5,9 +5,6 @@ # Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org> # -obj-$(CONFIG_CRYPTO_SHA3_ARM64) += sha3-ce.o -sha3-ce-y := sha3-ce-glue.o sha3-ce-core.o - obj-$(CONFIG_CRYPTO_SM3_NEON) += sm3-neon.o sm3-neon-y := sm3-neon-glue.o sm3-neon-core.o diff --git a/arch/arm64/crypto/sha3-ce-core.S b/arch/arm64/crypto/sha3-ce-core.S deleted file mode 100644 index b62bd714839b..000000000000 --- a/arch/arm64/crypto/sha3-ce-core.S +++ /dev/null @@ -1,213 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * sha3-ce-core.S - core SHA-3 transform using v8.2 Crypto Extensions - * - * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/linkage.h> -#include <asm/assembler.h> - - .irp b,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 - .set .Lv\b\().2d, \b - .set .Lv\b\().16b, \b - .endr - - /* - * ARMv8.2 Crypto Extensions instructions - */ - .macro eor3, rd, rn, rm, ra - .inst 0xce000000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16) - .endm - - .macro rax1, rd, rn, rm - .inst 0xce608c00 | .L\rd | (.L\rn << 5) | (.L\rm << 16) - .endm - - .macro bcax, rd, rn, rm, ra - .inst 0xce200000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16) - .endm - - .macro xar, rd, rn, rm, imm6 - .inst 0xce800000 | .L\rd | (.L\rn << 5) | ((\imm6) << 10) | (.L\rm << 16) - .endm - - /* - * size_t sha3_ce_transform(struct sha3_state *state, const u8 *data, - * size_t nblocks, size_t block_size) - * - * block_size is assumed to be one of 72 (SHA3-512), 104 (SHA3-384), 136 - * (SHA3-256 and SHAKE256), 144 (SHA3-224), or 168 (SHAKE128). - */ - .text -SYM_FUNC_START(sha3_ce_transform) - /* load state */ - add x8, x0, #32 - ld1 { v0.1d- v3.1d}, [x0] - ld1 { v4.1d- v7.1d}, [x8], #32 - ld1 { v8.1d-v11.1d}, [x8], #32 - ld1 {v12.1d-v15.1d}, [x8], #32 - ld1 {v16.1d-v19.1d}, [x8], #32 - ld1 {v20.1d-v23.1d}, [x8], #32 - ld1 {v24.1d}, [x8] - -0: sub x2, x2, #1 - mov w8, #24 - adr_l x9, .Lsha3_rcon - - /* load input */ - ld1 {v25.8b-v28.8b}, [x1], #32 - ld1 {v29.8b}, [x1], #8 - eor v0.8b, v0.8b, v25.8b - eor v1.8b, v1.8b, v26.8b - eor v2.8b, v2.8b, v27.8b - eor v3.8b, v3.8b, v28.8b - eor v4.8b, v4.8b, v29.8b - - ld1 {v25.8b-v28.8b}, [x1], #32 - eor v5.8b, v5.8b, v25.8b - eor v6.8b, v6.8b, v26.8b - eor v7.8b, v7.8b, v27.8b - eor v8.8b, v8.8b, v28.8b - cmp x3, #72 - b.eq 3f /* SHA3-512 (block_size=72)? */ - - ld1 {v25.8b-v28.8b}, [x1], #32 - eor v9.8b, v9.8b, v25.8b - eor v10.8b, v10.8b, v26.8b - eor v11.8b, v11.8b, v27.8b - eor v12.8b, v12.8b, v28.8b - cmp x3, #104 - b.eq 3f /* SHA3-384 (block_size=104)? */ - - ld1 {v25.8b-v28.8b}, [x1], #32 - eor v13.8b, v13.8b, v25.8b - eor v14.8b, v14.8b, v26.8b - eor v15.8b, v15.8b, v27.8b - eor v16.8b, v16.8b, v28.8b - cmp x3, #144 - b.lt 3f /* SHA3-256 or SHAKE256 (block_size=136)? */ - b.eq 2f /* SHA3-224 (block_size=144)? */ - - /* SHAKE128 (block_size=168) */ - ld1 {v25.8b-v28.8b}, [x1], #32 - eor v17.8b, v17.8b, v25.8b - eor v18.8b, v18.8b, v26.8b - eor v19.8b, v19.8b, v27.8b - eor v20.8b, v20.8b, v28.8b - b 3f -2: - /* SHA3-224 (block_size=144) */ - ld1 {v25.8b}, [x1], #8 - eor v17.8b, v17.8b, v25.8b - -3: sub w8, w8, #1 - - eor3 v29.16b, v4.16b, v9.16b, v14.16b - eor3 v26.16b, v1.16b, v6.16b, v11.16b - eor3 v28.16b, v3.16b, v8.16b, v13.16b - eor3 v25.16b, v0.16b, v5.16b, v10.16b - eor3 v27.16b, v2.16b, v7.16b, v12.16b - eor3 v29.16b, v29.16b, v19.16b, v24.16b - eor3 v26.16b, v26.16b, v16.16b, v21.16b - eor3 v28.16b, v28.16b, v18.16b, v23.16b - eor3 v25.16b, v25.16b, v15.16b, v20.16b - eor3 v27.16b, v27.16b, v17.16b, v22.16b - - rax1 v30.2d, v29.2d, v26.2d // bc[0] - rax1 v26.2d, v26.2d, v28.2d // bc[2] - rax1 v28.2d, v28.2d, v25.2d // bc[4] - rax1 v25.2d, v25.2d, v27.2d // bc[1] - rax1 v27.2d, v27.2d, v29.2d // bc[3] - - eor v0.16b, v0.16b, v30.16b - xar v29.2d, v1.2d, v25.2d, (64 - 1) - xar v1.2d, v6.2d, v25.2d, (64 - 44) - xar v6.2d, v9.2d, v28.2d, (64 - 20) - xar v9.2d, v22.2d, v26.2d, (64 - 61) - xar v22.2d, v14.2d, v28.2d, (64 - 39) - xar v14.2d, v20.2d, v30.2d, (64 - 18) - xar v31.2d, v2.2d, v26.2d, (64 - 62) - xar v2.2d, v12.2d, v26.2d, (64 - 43) - xar v12.2d, v13.2d, v27.2d, (64 - 25) - xar v13.2d, v19.2d, v28.2d, (64 - 8) - xar v19.2d, v23.2d, v27.2d, (64 - 56) - xar v23.2d, v15.2d, v30.2d, (64 - 41) - xar v15.2d, v4.2d, v28.2d, (64 - 27) - xar v28.2d, v24.2d, v28.2d, (64 - 14) - xar v24.2d, v21.2d, v25.2d, (64 - 2) - xar v8.2d, v8.2d, v27.2d, (64 - 55) - xar v4.2d, v16.2d, v25.2d, (64 - 45) - xar v16.2d, v5.2d, v30.2d, (64 - 36) - xar v5.2d, v3.2d, v27.2d, (64 - 28) - xar v27.2d, v18.2d, v27.2d, (64 - 21) - xar v3.2d, v17.2d, v26.2d, (64 - 15) - xar v25.2d, v11.2d, v25.2d, (64 - 10) - xar v26.2d, v7.2d, v26.2d, (64 - 6) - xar v30.2d, v10.2d, v30.2d, (64 - 3) - - bcax v20.16b, v31.16b, v22.16b, v8.16b - bcax v21.16b, v8.16b, v23.16b, v22.16b - bcax v22.16b, v22.16b, v24.16b, v23.16b - bcax v23.16b, v23.16b, v31.16b, v24.16b - bcax v24.16b, v24.16b, v8.16b, v31.16b - - ld1r {v31.2d}, [x9], #8 - - bcax v17.16b, v25.16b, v19.16b, v3.16b - bcax v18.16b, v3.16b, v15.16b, v19.16b - bcax v19.16b, v19.16b, v16.16b, v15.16b - bcax v15.16b, v15.16b, v25.16b, v16.16b - bcax v16.16b, v16.16b, v3.16b, v25.16b - - bcax v10.16b, v29.16b, v12.16b, v26.16b - bcax v11.16b, v26.16b, v13.16b, v12.16b - bcax v12.16b, v12.16b, v14.16b, v13.16b - bcax v13.16b, v13.16b, v29.16b, v14.16b - bcax v14.16b, v14.16b, v26.16b, v29.16b - - bcax v7.16b, v30.16b, v9.16b, v4.16b - bcax v8.16b, v4.16b, v5.16b, v9.16b - bcax v9.16b, v9.16b, v6.16b, v5.16b - bcax v5.16b, v5.16b, v30.16b, v6.16b - bcax v6.16b, v6.16b, v4.16b, v30.16b - - bcax v3.16b, v27.16b, v0.16b, v28.16b - bcax v4.16b, v28.16b, v1.16b, v0.16b - bcax v0.16b, v0.16b, v2.16b, v1.16b - bcax v1.16b, v1.16b, v27.16b, v2.16b - bcax v2.16b, v2.16b, v28.16b, v27.16b - - eor v0.16b, v0.16b, v31.16b - - cbnz w8, 3b - cond_yield 4f, x8, x9 - cbnz x2, 0b - - /* save state */ -4: st1 { v0.1d- v3.1d}, [x0], #32 - st1 { v4.1d- v7.1d}, [x0], #32 - st1 { v8.1d-v11.1d}, [x0], #32 - st1 {v12.1d-v15.1d}, [x0], #32 - st1 {v16.1d-v19.1d}, [x0], #32 - st1 {v20.1d-v23.1d}, [x0], #32 - st1 {v24.1d}, [x0] - mov x0, x2 - ret -SYM_FUNC_END(sha3_ce_transform) - - .section ".rodata", "a" - .align 8 -.Lsha3_rcon: - .quad 0x0000000000000001, 0x0000000000008082, 0x800000000000808a - .quad 0x8000000080008000, 0x000000000000808b, 0x0000000080000001 - .quad 0x8000000080008081, 0x8000000000008009, 0x000000000000008a - .quad 0x0000000000000088, 0x0000000080008009, 0x000000008000000a - .quad 0x000000008000808b, 0x800000000000008b, 0x8000000000008089 - .quad 0x8000000000008003, 0x8000000000008002, 0x8000000000000080 - .quad 0x000000000000800a, 0x800000008000000a, 0x8000000080008081 - .quad 0x8000000000008080, 0x0000000080000001, 0x8000000080008008 diff --git a/arch/arm64/crypto/sha3-ce-glue.c b/arch/arm64/crypto/sha3-ce-glue.c deleted file mode 100644 index 250f4fb76b47..000000000000 --- a/arch/arm64/crypto/sha3-ce-glue.c +++ /dev/null @@ -1,150 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * sha3-ce-glue.c - core SHA-3 transform using v8.2 Crypto Extensions - * - * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <asm/hwcap.h> -#include <asm/neon.h> -#include <asm/simd.h> -#include <crypto/internal/hash.h> -#include <crypto/sha3.h> -#include <linux/cpufeature.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/string.h> -#include <linux/unaligned.h> - -MODULE_DESCRIPTION("SHA3 secure hash using ARMv8 Crypto Extensions"); -MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); -MODULE_LICENSE("GPL v2"); -MODULE_ALIAS_CRYPTO("sha3-224"); -MODULE_ALIAS_CRYPTO("sha3-256"); -MODULE_ALIAS_CRYPTO("sha3-384"); -MODULE_ALIAS_CRYPTO("sha3-512"); - -asmlinkage size_t sha3_ce_transform(struct sha3_state *state, const u8 *data, - size_t nblocks, size_t block_size); - -static int arm64_sha3_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - struct sha3_state *sctx = shash_desc_ctx(desc); - struct crypto_shash *tfm = desc->tfm; - unsigned int bs; - int blocks; - - bs = crypto_shash_blocksize(tfm); - blocks = len / bs; - len -= blocks * bs; - do { - int rem; - - kernel_neon_begin(); - rem = sha3_ce_transform(sctx, data, blocks, bs); - kernel_neon_end(); - data += (blocks - rem) * bs; - blocks = rem; - } while (blocks); - return len; -} - -static int sha3_finup(struct shash_desc *desc, const u8 *src, unsigned int len, - u8 *out) -{ - struct sha3_state *sctx = shash_desc_ctx(desc); - struct crypto_shash *tfm = desc->tfm; - __le64 *digest = (__le64 *)out; - u8 block[SHA3_224_BLOCK_SIZE]; - unsigned int bs, ds; - int i; - - ds = crypto_shash_digestsize(tfm); - bs = crypto_shash_blocksize(tfm); - memcpy(block, src, len); - - block[len++] = 0x06; - memset(block + len, 0, bs - len); - block[bs - 1] |= 0x80; - - kernel_neon_begin(); - sha3_ce_transform(sctx, block, 1, bs); - kernel_neon_end(); - memzero_explicit(block , sizeof(block)); - - for (i = 0; i < ds / 8; i++) - put_unaligned_le64(sctx->st[i], digest++); - - if (ds & 4) - put_unaligned_le32(sctx->st[i], (__le32 *)digest); - - return 0; -} - -static struct shash_alg algs[] = { { - .digestsize = SHA3_224_DIGEST_SIZE, - .init = crypto_sha3_init, - .update = arm64_sha3_update, - .finup = sha3_finup, - .descsize = SHA3_STATE_SIZE, - .base.cra_name = "sha3-224", - .base.cra_driver_name = "sha3-224-ce", - .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, - .base.cra_blocksize = SHA3_224_BLOCK_SIZE, - .base.cra_module = THIS_MODULE, - .base.cra_priority = 200, -}, { - .digestsize = SHA3_256_DIGEST_SIZE, - .init = crypto_sha3_init, - .update = arm64_sha3_update, - .finup = sha3_finup, - .descsize = SHA3_STATE_SIZE, - .base.cra_name = "sha3-256", - .base.cra_driver_name = "sha3-256-ce", - .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, - .base.cra_blocksize = SHA3_256_BLOCK_SIZE, - .base.cra_module = THIS_MODULE, - .base.cra_priority = 200, -}, { - .digestsize = SHA3_384_DIGEST_SIZE, - .init = crypto_sha3_init, - .update = arm64_sha3_update, - .finup = sha3_finup, - .descsize = SHA3_STATE_SIZE, - .base.cra_name = "sha3-384", - .base.cra_driver_name = "sha3-384-ce", - .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, - .base.cra_blocksize = SHA3_384_BLOCK_SIZE, - .base.cra_module = THIS_MODULE, - .base.cra_priority = 200, -}, { - .digestsize = SHA3_512_DIGEST_SIZE, - .init = crypto_sha3_init, - .update = arm64_sha3_update, - .finup = sha3_finup, - .descsize = SHA3_STATE_SIZE, - .base.cra_name = "sha3-512", - .base.cra_driver_name = "sha3-512-ce", - .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, - .base.cra_blocksize = SHA3_512_BLOCK_SIZE, - .base.cra_module = THIS_MODULE, - .base.cra_priority = 200, -} }; - -static int __init sha3_neon_mod_init(void) -{ - return crypto_register_shashes(algs, ARRAY_SIZE(algs)); -} - -static void __exit sha3_neon_mod_fini(void) -{ - crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); -} - -module_cpu_feature_match(SHA3, sha3_neon_mod_init); -module_exit(sha3_neon_mod_fini); |