diff options
| -rw-r--r-- | arch/arm/crypto/Kconfig | 16 | ||||
| -rw-r--r-- | arch/arm/crypto/Makefile | 2 | ||||
| -rw-r--r-- | arch/arm/crypto/blake2b-neon-glue.c | 104 | ||||
| -rw-r--r-- | lib/crypto/Kconfig | 1 | ||||
| -rw-r--r-- | lib/crypto/Makefile | 1 | ||||
| -rw-r--r-- | lib/crypto/arm/blake2b-neon-core.S (renamed from arch/arm/crypto/blake2b-neon-core.S) | 29 | ||||
| -rw-r--r-- | lib/crypto/arm/blake2b.h | 41 |
7 files changed, 59 insertions, 135 deletions
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index c436eec22d86..f30d743df264 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -33,22 +33,6 @@ config CRYPTO_NHPOLY1305_NEON Architecture: arm using: - NEON (Advanced SIMD) extensions -config CRYPTO_BLAKE2B_NEON - tristate "Hash functions: BLAKE2b (NEON)" - depends on KERNEL_MODE_NEON - select CRYPTO_BLAKE2B - help - BLAKE2b cryptographic hash function (RFC 7693) - - Architecture: arm using - - NEON (Advanced SIMD) extensions - - BLAKE2b digest algorithm optimized with ARM NEON instructions. - On ARM processors that have NEON support but not the ARMv8 - Crypto Extensions, typically this BLAKE2b implementation is - much faster than the SHA-2 family and slightly faster than - SHA-1. - config CRYPTO_AES_ARM tristate "Ciphers: AES" select CRYPTO_ALGAPI diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index 6346a73effc0..86dd43313dbf 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -5,7 +5,6 @@ obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o -obj-$(CONFIG_CRYPTO_BLAKE2B_NEON) += blake2b-neon.o obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o @@ -13,7 +12,6 @@ obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o aes-arm-y := aes-cipher-core.o aes-cipher-glue.o aes-arm-bs-y := aes-neonbs-core.o aes-neonbs-glue.o -blake2b-neon-y := blake2b-neon-core.o blake2b-neon-glue.o aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o diff --git a/arch/arm/crypto/blake2b-neon-glue.c b/arch/arm/crypto/blake2b-neon-glue.c deleted file mode 100644 index 2ff443a91724..000000000000 --- a/arch/arm/crypto/blake2b-neon-glue.c +++ /dev/null @@ -1,104 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * BLAKE2b digest algorithm, NEON accelerated - * - * Copyright 2020 Google LLC - */ - -#include <crypto/internal/blake2b.h> -#include <crypto/internal/hash.h> - -#include <linux/module.h> -#include <linux/sizes.h> - -#include <asm/neon.h> -#include <asm/simd.h> - -asmlinkage void blake2b_compress_neon(struct blake2b_state *state, - const u8 *block, size_t nblocks, u32 inc); - -static void blake2b_compress_arch(struct blake2b_state *state, - const u8 *block, size_t nblocks, u32 inc) -{ - do { - const size_t blocks = min_t(size_t, nblocks, - SZ_4K / BLAKE2B_BLOCK_SIZE); - - kernel_neon_begin(); - blake2b_compress_neon(state, block, blocks, inc); - kernel_neon_end(); - - nblocks -= blocks; - block += blocks * BLAKE2B_BLOCK_SIZE; - } while (nblocks); -} - -static int crypto_blake2b_update_neon(struct shash_desc *desc, - const u8 *in, unsigned int inlen) -{ - return crypto_blake2b_update_bo(desc, in, inlen, blake2b_compress_arch); -} - -static int crypto_blake2b_finup_neon(struct shash_desc *desc, const u8 *in, - unsigned int inlen, u8 *out) -{ - return crypto_blake2b_finup(desc, in, inlen, out, - blake2b_compress_arch); -} - -#define BLAKE2B_ALG(name, driver_name, digest_size) \ - { \ - .base.cra_name = name, \ - .base.cra_driver_name = driver_name, \ - .base.cra_priority = 200, \ - .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY | \ - CRYPTO_AHASH_ALG_BLOCK_ONLY | \ - CRYPTO_AHASH_ALG_FINAL_NONZERO, \ - .base.cra_blocksize = BLAKE2B_BLOCK_SIZE, \ - .base.cra_ctxsize = sizeof(struct blake2b_tfm_ctx), \ - .base.cra_module = THIS_MODULE, \ - .digestsize = digest_size, \ - .setkey = crypto_blake2b_setkey, \ - .init = crypto_blake2b_init, \ - .update = crypto_blake2b_update_neon, \ - .finup = crypto_blake2b_finup_neon, \ - .descsize = sizeof(struct blake2b_state), \ - .statesize = BLAKE2B_STATE_SIZE, \ - } - -static struct shash_alg blake2b_neon_algs[] = { - BLAKE2B_ALG("blake2b-160", "blake2b-160-neon", BLAKE2B_160_HASH_SIZE), - BLAKE2B_ALG("blake2b-256", "blake2b-256-neon", BLAKE2B_256_HASH_SIZE), - BLAKE2B_ALG("blake2b-384", "blake2b-384-neon", BLAKE2B_384_HASH_SIZE), - BLAKE2B_ALG("blake2b-512", "blake2b-512-neon", BLAKE2B_512_HASH_SIZE), -}; - -static int __init blake2b_neon_mod_init(void) -{ - if (!(elf_hwcap & HWCAP_NEON)) - return -ENODEV; - - return crypto_register_shashes(blake2b_neon_algs, - ARRAY_SIZE(blake2b_neon_algs)); -} - -static void __exit blake2b_neon_mod_exit(void) -{ - crypto_unregister_shashes(blake2b_neon_algs, - ARRAY_SIZE(blake2b_neon_algs)); -} - -module_init(blake2b_neon_mod_init); -module_exit(blake2b_neon_mod_exit); - -MODULE_DESCRIPTION("BLAKE2b digest algorithm, NEON accelerated"); -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>"); -MODULE_ALIAS_CRYPTO("blake2b-160"); -MODULE_ALIAS_CRYPTO("blake2b-160-neon"); -MODULE_ALIAS_CRYPTO("blake2b-256"); -MODULE_ALIAS_CRYPTO("blake2b-256-neon"); -MODULE_ALIAS_CRYPTO("blake2b-384"); -MODULE_ALIAS_CRYPTO("blake2b-384-neon"); -MODULE_ALIAS_CRYPTO("blake2b-512"); -MODULE_ALIAS_CRYPTO("blake2b-512-neon"); diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index 918378b7e833..280b888153bf 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -37,6 +37,7 @@ config CRYPTO_LIB_BLAKE2B config CRYPTO_LIB_BLAKE2B_ARCH bool depends on CRYPTO_LIB_BLAKE2B && !UML + default y if ARM && KERNEL_MODE_NEON # BLAKE2s support is always built-in, so there's no CRYPTO_LIB_BLAKE2S option. diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index f863417b1681..bc26777d08e9 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -36,6 +36,7 @@ libblake2b-y := blake2b.o CFLAGS_blake2b.o := -Wframe-larger-than=4096 # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105930 ifeq ($(CONFIG_CRYPTO_LIB_BLAKE2B_ARCH),y) CFLAGS_blake2b.o += -I$(src)/$(SRCARCH) +libblake2b-$(CONFIG_ARM) += arm/blake2b-neon-core.o endif # CONFIG_CRYPTO_LIB_BLAKE2B_ARCH ################################################################################ diff --git a/arch/arm/crypto/blake2b-neon-core.S b/lib/crypto/arm/blake2b-neon-core.S index 0406a186377f..b55c37f0b88f 100644 --- a/arch/arm/crypto/blake2b-neon-core.S +++ b/lib/crypto/arm/blake2b-neon-core.S @@ -1,6 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ /* - * BLAKE2b digest algorithm, NEON accelerated + * BLAKE2b digest algorithm optimized with ARM NEON instructions. On ARM + * processors that have NEON support but not the ARMv8 Crypto Extensions, + * typically this BLAKE2b implementation is much faster than the SHA-2 family + * and slightly faster than SHA-1. * * Copyright 2020 Google LLC * @@ -13,8 +16,8 @@ .fpu neon // The arguments to blake2b_compress_neon() - STATE .req r0 - BLOCK .req r1 + CTX .req r0 + DATA .req r1 NBLOCKS .req r2 INC .req r3 @@ -234,10 +237,10 @@ .endm // -// void blake2b_compress_neon(struct blake2b_state *state, -// const u8 *block, size_t nblocks, u32 inc); +// void blake2b_compress_neon(struct blake2b_ctx *ctx, +// const u8 *data, size_t nblocks, u32 inc); // -// Only the first three fields of struct blake2b_state are used: +// Only the first three fields of struct blake2b_ctx are used: // u64 h[8]; (inout) // u64 t[2]; (inout) // u64 f[2]; (in) @@ -255,7 +258,7 @@ ENTRY(blake2b_compress_neon) adr ROR24_TABLE, .Lror24_table adr ROR16_TABLE, .Lror16_table - mov ip, STATE + mov ip, CTX vld1.64 {q0-q1}, [ip]! // Load h[0..3] vld1.64 {q2-q3}, [ip]! // Load h[4..7] .Lnext_block: @@ -281,14 +284,14 @@ ENTRY(blake2b_compress_neon) // (q8-q9) in an aligned buffer on the stack so that they can be // reloaded when needed. (We could just reload directly from the // message buffer, but it's faster to use aligned loads.) - vld1.8 {q8-q9}, [BLOCK]! + vld1.8 {q8-q9}, [DATA]! veor q6, q6, q14 // v[12..13] = IV[4..5] ^ t[0..1] - vld1.8 {q10-q11}, [BLOCK]! + vld1.8 {q10-q11}, [DATA]! veor q7, q7, q15 // v[14..15] = IV[6..7] ^ f[0..1] - vld1.8 {q12-q13}, [BLOCK]! + vld1.8 {q12-q13}, [DATA]! vst1.8 {q8-q9}, [sp, :256] - mov ip, STATE - vld1.8 {q14-q15}, [BLOCK]! + mov ip, CTX + vld1.8 {q14-q15}, [DATA]! // Execute the rounds. Each round is provided the order in which it // needs to use the message words. @@ -319,7 +322,7 @@ ENTRY(blake2b_compress_neon) veor q3, q3, q7 // v[6..7] ^= v[14..15] veor q0, q0, q8 // v[0..1] ^= h[0..1] veor q1, q1, q9 // v[2..3] ^= h[2..3] - mov ip, STATE + mov ip, CTX subs NBLOCKS, NBLOCKS, #1 // nblocks-- vst1.64 {q0-q1}, [ip]! // Store new h[0..3] veor q2, q2, q10 // v[4..5] ^= h[4..5] diff --git a/lib/crypto/arm/blake2b.h b/lib/crypto/arm/blake2b.h new file mode 100644 index 000000000000..1b9154d119db --- /dev/null +++ b/lib/crypto/arm/blake2b.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * BLAKE2b digest algorithm, NEON accelerated + * + * Copyright 2020 Google LLC + */ + +#include <asm/neon.h> +#include <asm/simd.h> + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); + +asmlinkage void blake2b_compress_neon(struct blake2b_ctx *ctx, + const u8 *data, size_t nblocks, u32 inc); + +static void blake2b_compress(struct blake2b_ctx *ctx, + const u8 *data, size_t nblocks, u32 inc) +{ + if (!static_branch_likely(&have_neon) || !may_use_simd()) { + blake2b_compress_generic(ctx, data, nblocks, inc); + return; + } + do { + const size_t blocks = min_t(size_t, nblocks, + SZ_4K / BLAKE2B_BLOCK_SIZE); + + kernel_neon_begin(); + blake2b_compress_neon(ctx, data, blocks, inc); + kernel_neon_end(); + + data += blocks * BLAKE2B_BLOCK_SIZE; + nblocks -= blocks; + } while (nblocks); +} + +#define blake2b_mod_init_arch blake2b_mod_init_arch +static void blake2b_mod_init_arch(void) +{ + if (elf_hwcap & HWCAP_NEON) + static_branch_enable(&have_neon); +} |