summaryrefslogtreecommitdiff
path: root/lib/crypto
diff options
context:
space:
mode:
authorEric Biggers <ebiggers@kernel.org>2025-11-02 15:42:09 -0800
committerEric Biggers <ebiggers@kernel.org>2025-11-05 20:30:52 -0800
commit8ba60c5914f25a44f10189c6919a737b199f6dbf (patch)
treeea6f5dde3fa02e6ba66547cd982c6ccbfdf27483 /lib/crypto
parentcd5528621abb01664a477392cd3e76be2ef6296b (diff)
lib/crypto: x86/blake2s: Use vpternlogd for 3-input XORs
AVX-512 supports 3-input XORs via the vpternlogd (or vpternlogq) instruction with immediate 0x96. This approach, vs. the alternative of two vpxor instructions, is already used in the CRC, AES-GCM, and AES-XTS code, since it reduces the instruction count and is faster on some CPUs. Make blake2s_compress_avx512() take advantage of it too. Reviewed-by: Ard Biesheuvel <ardb@kernel.org> Link: https://lore.kernel.org/r/20251102234209.62133-7-ebiggers@kernel.org Signed-off-by: Eric Biggers <ebiggers@kernel.org>
Diffstat (limited to 'lib/crypto')
-rw-r--r--lib/crypto/x86/blake2s-core.S6
1 files changed, 2 insertions, 4 deletions
diff --git a/lib/crypto/x86/blake2s-core.S b/lib/crypto/x86/blake2s-core.S
index 869064f6ac16..7b1d98ca7482 100644
--- a/lib/crypto/x86/blake2s-core.S
+++ b/lib/crypto/x86/blake2s-core.S
@@ -278,10 +278,8 @@ SYM_FUNC_START(blake2s_compress_avx512)
jne .Lavx512_roundloop
// Compute the new h: h[0..7] ^= v[0..7] ^ v[8..15]
- vpxor %xmm10,%xmm0,%xmm0
- vpxor %xmm11,%xmm1,%xmm1
- vpxor %xmm2,%xmm0,%xmm0
- vpxor %xmm3,%xmm1,%xmm1
+ vpternlogd $0x96,%xmm10,%xmm2,%xmm0
+ vpternlogd $0x96,%xmm11,%xmm3,%xmm1
decq NBLOCKS
jne .Lavx512_mainloop