summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/arm/crypto/Kconfig16
-rw-r--r--arch/arm/crypto/Makefile2
-rw-r--r--arch/arm/crypto/blake2b-neon-glue.c104
-rw-r--r--lib/crypto/Kconfig1
-rw-r--r--lib/crypto/Makefile1
-rw-r--r--lib/crypto/arm/blake2b-neon-core.S (renamed from arch/arm/crypto/blake2b-neon-core.S)29
-rw-r--r--lib/crypto/arm/blake2b.h41
7 files changed, 59 insertions, 135 deletions
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index c436eec22d86..f30d743df264 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -33,22 +33,6 @@ config CRYPTO_NHPOLY1305_NEON
Architecture: arm using:
- NEON (Advanced SIMD) extensions
-config CRYPTO_BLAKE2B_NEON
- tristate "Hash functions: BLAKE2b (NEON)"
- depends on KERNEL_MODE_NEON
- select CRYPTO_BLAKE2B
- help
- BLAKE2b cryptographic hash function (RFC 7693)
-
- Architecture: arm using
- - NEON (Advanced SIMD) extensions
-
- BLAKE2b digest algorithm optimized with ARM NEON instructions.
- On ARM processors that have NEON support but not the ARMv8
- Crypto Extensions, typically this BLAKE2b implementation is
- much faster than the SHA-2 family and slightly faster than
- SHA-1.
-
config CRYPTO_AES_ARM
tristate "Ciphers: AES"
select CRYPTO_ALGAPI
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index 6346a73effc0..86dd43313dbf 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -5,7 +5,6 @@
obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o
obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o
-obj-$(CONFIG_CRYPTO_BLAKE2B_NEON) += blake2b-neon.o
obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o
obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
@@ -13,7 +12,6 @@ obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o
aes-arm-y := aes-cipher-core.o aes-cipher-glue.o
aes-arm-bs-y := aes-neonbs-core.o aes-neonbs-glue.o
-blake2b-neon-y := blake2b-neon-core.o blake2b-neon-glue.o
aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o
ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o
nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o
diff --git a/arch/arm/crypto/blake2b-neon-glue.c b/arch/arm/crypto/blake2b-neon-glue.c
deleted file mode 100644
index 2ff443a91724..000000000000
--- a/arch/arm/crypto/blake2b-neon-glue.c
+++ /dev/null
@@ -1,104 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * BLAKE2b digest algorithm, NEON accelerated
- *
- * Copyright 2020 Google LLC
- */
-
-#include <crypto/internal/blake2b.h>
-#include <crypto/internal/hash.h>
-
-#include <linux/module.h>
-#include <linux/sizes.h>
-
-#include <asm/neon.h>
-#include <asm/simd.h>
-
-asmlinkage void blake2b_compress_neon(struct blake2b_state *state,
- const u8 *block, size_t nblocks, u32 inc);
-
-static void blake2b_compress_arch(struct blake2b_state *state,
- const u8 *block, size_t nblocks, u32 inc)
-{
- do {
- const size_t blocks = min_t(size_t, nblocks,
- SZ_4K / BLAKE2B_BLOCK_SIZE);
-
- kernel_neon_begin();
- blake2b_compress_neon(state, block, blocks, inc);
- kernel_neon_end();
-
- nblocks -= blocks;
- block += blocks * BLAKE2B_BLOCK_SIZE;
- } while (nblocks);
-}
-
-static int crypto_blake2b_update_neon(struct shash_desc *desc,
- const u8 *in, unsigned int inlen)
-{
- return crypto_blake2b_update_bo(desc, in, inlen, blake2b_compress_arch);
-}
-
-static int crypto_blake2b_finup_neon(struct shash_desc *desc, const u8 *in,
- unsigned int inlen, u8 *out)
-{
- return crypto_blake2b_finup(desc, in, inlen, out,
- blake2b_compress_arch);
-}
-
-#define BLAKE2B_ALG(name, driver_name, digest_size) \
- { \
- .base.cra_name = name, \
- .base.cra_driver_name = driver_name, \
- .base.cra_priority = 200, \
- .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY | \
- CRYPTO_AHASH_ALG_BLOCK_ONLY | \
- CRYPTO_AHASH_ALG_FINAL_NONZERO, \
- .base.cra_blocksize = BLAKE2B_BLOCK_SIZE, \
- .base.cra_ctxsize = sizeof(struct blake2b_tfm_ctx), \
- .base.cra_module = THIS_MODULE, \
- .digestsize = digest_size, \
- .setkey = crypto_blake2b_setkey, \
- .init = crypto_blake2b_init, \
- .update = crypto_blake2b_update_neon, \
- .finup = crypto_blake2b_finup_neon, \
- .descsize = sizeof(struct blake2b_state), \
- .statesize = BLAKE2B_STATE_SIZE, \
- }
-
-static struct shash_alg blake2b_neon_algs[] = {
- BLAKE2B_ALG("blake2b-160", "blake2b-160-neon", BLAKE2B_160_HASH_SIZE),
- BLAKE2B_ALG("blake2b-256", "blake2b-256-neon", BLAKE2B_256_HASH_SIZE),
- BLAKE2B_ALG("blake2b-384", "blake2b-384-neon", BLAKE2B_384_HASH_SIZE),
- BLAKE2B_ALG("blake2b-512", "blake2b-512-neon", BLAKE2B_512_HASH_SIZE),
-};
-
-static int __init blake2b_neon_mod_init(void)
-{
- if (!(elf_hwcap & HWCAP_NEON))
- return -ENODEV;
-
- return crypto_register_shashes(blake2b_neon_algs,
- ARRAY_SIZE(blake2b_neon_algs));
-}
-
-static void __exit blake2b_neon_mod_exit(void)
-{
- crypto_unregister_shashes(blake2b_neon_algs,
- ARRAY_SIZE(blake2b_neon_algs));
-}
-
-module_init(blake2b_neon_mod_init);
-module_exit(blake2b_neon_mod_exit);
-
-MODULE_DESCRIPTION("BLAKE2b digest algorithm, NEON accelerated");
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
-MODULE_ALIAS_CRYPTO("blake2b-160");
-MODULE_ALIAS_CRYPTO("blake2b-160-neon");
-MODULE_ALIAS_CRYPTO("blake2b-256");
-MODULE_ALIAS_CRYPTO("blake2b-256-neon");
-MODULE_ALIAS_CRYPTO("blake2b-384");
-MODULE_ALIAS_CRYPTO("blake2b-384-neon");
-MODULE_ALIAS_CRYPTO("blake2b-512");
-MODULE_ALIAS_CRYPTO("blake2b-512-neon");
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
index 918378b7e833..280b888153bf 100644
--- a/lib/crypto/Kconfig
+++ b/lib/crypto/Kconfig
@@ -37,6 +37,7 @@ config CRYPTO_LIB_BLAKE2B
config CRYPTO_LIB_BLAKE2B_ARCH
bool
depends on CRYPTO_LIB_BLAKE2B && !UML
+ default y if ARM && KERNEL_MODE_NEON
# BLAKE2s support is always built-in, so there's no CRYPTO_LIB_BLAKE2S option.
diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
index f863417b1681..bc26777d08e9 100644
--- a/lib/crypto/Makefile
+++ b/lib/crypto/Makefile
@@ -36,6 +36,7 @@ libblake2b-y := blake2b.o
CFLAGS_blake2b.o := -Wframe-larger-than=4096 # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105930
ifeq ($(CONFIG_CRYPTO_LIB_BLAKE2B_ARCH),y)
CFLAGS_blake2b.o += -I$(src)/$(SRCARCH)
+libblake2b-$(CONFIG_ARM) += arm/blake2b-neon-core.o
endif # CONFIG_CRYPTO_LIB_BLAKE2B_ARCH
################################################################################
diff --git a/arch/arm/crypto/blake2b-neon-core.S b/lib/crypto/arm/blake2b-neon-core.S
index 0406a186377f..b55c37f0b88f 100644
--- a/arch/arm/crypto/blake2b-neon-core.S
+++ b/lib/crypto/arm/blake2b-neon-core.S
@@ -1,6 +1,9 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
- * BLAKE2b digest algorithm, NEON accelerated
+ * BLAKE2b digest algorithm optimized with ARM NEON instructions. On ARM
+ * processors that have NEON support but not the ARMv8 Crypto Extensions,
+ * typically this BLAKE2b implementation is much faster than the SHA-2 family
+ * and slightly faster than SHA-1.
*
* Copyright 2020 Google LLC
*
@@ -13,8 +16,8 @@
.fpu neon
// The arguments to blake2b_compress_neon()
- STATE .req r0
- BLOCK .req r1
+ CTX .req r0
+ DATA .req r1
NBLOCKS .req r2
INC .req r3
@@ -234,10 +237,10 @@
.endm
//
-// void blake2b_compress_neon(struct blake2b_state *state,
-// const u8 *block, size_t nblocks, u32 inc);
+// void blake2b_compress_neon(struct blake2b_ctx *ctx,
+// const u8 *data, size_t nblocks, u32 inc);
//
-// Only the first three fields of struct blake2b_state are used:
+// Only the first three fields of struct blake2b_ctx are used:
// u64 h[8]; (inout)
// u64 t[2]; (inout)
// u64 f[2]; (in)
@@ -255,7 +258,7 @@ ENTRY(blake2b_compress_neon)
adr ROR24_TABLE, .Lror24_table
adr ROR16_TABLE, .Lror16_table
- mov ip, STATE
+ mov ip, CTX
vld1.64 {q0-q1}, [ip]! // Load h[0..3]
vld1.64 {q2-q3}, [ip]! // Load h[4..7]
.Lnext_block:
@@ -281,14 +284,14 @@ ENTRY(blake2b_compress_neon)
// (q8-q9) in an aligned buffer on the stack so that they can be
// reloaded when needed. (We could just reload directly from the
// message buffer, but it's faster to use aligned loads.)
- vld1.8 {q8-q9}, [BLOCK]!
+ vld1.8 {q8-q9}, [DATA]!
veor q6, q6, q14 // v[12..13] = IV[4..5] ^ t[0..1]
- vld1.8 {q10-q11}, [BLOCK]!
+ vld1.8 {q10-q11}, [DATA]!
veor q7, q7, q15 // v[14..15] = IV[6..7] ^ f[0..1]
- vld1.8 {q12-q13}, [BLOCK]!
+ vld1.8 {q12-q13}, [DATA]!
vst1.8 {q8-q9}, [sp, :256]
- mov ip, STATE
- vld1.8 {q14-q15}, [BLOCK]!
+ mov ip, CTX
+ vld1.8 {q14-q15}, [DATA]!
// Execute the rounds. Each round is provided the order in which it
// needs to use the message words.
@@ -319,7 +322,7 @@ ENTRY(blake2b_compress_neon)
veor q3, q3, q7 // v[6..7] ^= v[14..15]
veor q0, q0, q8 // v[0..1] ^= h[0..1]
veor q1, q1, q9 // v[2..3] ^= h[2..3]
- mov ip, STATE
+ mov ip, CTX
subs NBLOCKS, NBLOCKS, #1 // nblocks--
vst1.64 {q0-q1}, [ip]! // Store new h[0..3]
veor q2, q2, q10 // v[4..5] ^= h[4..5]
diff --git a/lib/crypto/arm/blake2b.h b/lib/crypto/arm/blake2b.h
new file mode 100644
index 000000000000..1b9154d119db
--- /dev/null
+++ b/lib/crypto/arm/blake2b.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * BLAKE2b digest algorithm, NEON accelerated
+ *
+ * Copyright 2020 Google LLC
+ */
+
+#include <asm/neon.h>
+#include <asm/simd.h>
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
+
+asmlinkage void blake2b_compress_neon(struct blake2b_ctx *ctx,
+ const u8 *data, size_t nblocks, u32 inc);
+
+static void blake2b_compress(struct blake2b_ctx *ctx,
+ const u8 *data, size_t nblocks, u32 inc)
+{
+ if (!static_branch_likely(&have_neon) || !may_use_simd()) {
+ blake2b_compress_generic(ctx, data, nblocks, inc);
+ return;
+ }
+ do {
+ const size_t blocks = min_t(size_t, nblocks,
+ SZ_4K / BLAKE2B_BLOCK_SIZE);
+
+ kernel_neon_begin();
+ blake2b_compress_neon(ctx, data, blocks, inc);
+ kernel_neon_end();
+
+ data += blocks * BLAKE2B_BLOCK_SIZE;
+ nblocks -= blocks;
+ } while (nblocks);
+}
+
+#define blake2b_mod_init_arch blake2b_mod_init_arch
+static void blake2b_mod_init_arch(void)
+{
+ if (elf_hwcap & HWCAP_NEON)
+ static_branch_enable(&have_neon);
+}