diff options
| author | Herbert Xu <herbert@gondor.apana.org.au> | 2025-03-17 16:33:59 +0800 |
|---|---|---|
| committer | Herbert Xu <herbert@gondor.apana.org.au> | 2025-04-07 13:22:25 +0800 |
| commit | c47e1f4142a3823b6e963e14db295a8a733804b5 (patch) | |
| tree | 53b32f282ddba99c35212b498d18c452873d3f30 /crypto/scompress.c | |
| parent | 138804c2c18ca8bd1443dea173b3cc2643995919 (diff) | |
crypto: scomp - Allocate per-cpu buffer on first use of each CPU
Per-cpu buffers can be wasteful when the number of CPUs is large,
especially if the buffer itself is likely to never be used. Reduce
such wastage by only allocating them on first use of a particular
CPU.
On start-up allocate a single buffer on the first possible CPU.
For every other CPU a work struct will be scheduled on first use
to allocate the buffer for that CPU. Until the allocation succeeds
simply use the first CPU's buffer which is protected under a spin
lock.
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'crypto/scompress.c')
| -rw-r--r-- | crypto/scompress.c | 199 |
1 files changed, 158 insertions, 41 deletions
diff --git a/crypto/scompress.c b/crypto/scompress.c index d435d4b24469..f4e3376ca7d3 100644 --- a/crypto/scompress.c +++ b/crypto/scompress.c @@ -10,6 +10,7 @@ #include <crypto/internal/acompress.h> #include <crypto/internal/scompress.h> #include <crypto/scatterwalk.h> +#include <linux/cpumask.h> #include <linux/cryptouser.h> #include <linux/err.h> #include <linux/highmem.h> @@ -20,7 +21,7 @@ #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/string.h> -#include <linux/vmalloc.h> +#include <linux/workqueue.h> #include <net/netlink.h> #include "compress.h" @@ -44,6 +45,10 @@ static const struct crypto_type crypto_scomp_type; static int scomp_scratch_users; static DEFINE_MUTEX(scomp_lock); +static cpumask_t scomp_scratch_want; +static void scomp_scratch_workfn(struct work_struct *work); +static DECLARE_WORK(scomp_scratch_work, scomp_scratch_workfn); + static int __maybe_unused crypto_scomp_report( struct sk_buff *skb, struct crypto_alg *alg) { @@ -74,36 +79,57 @@ static void crypto_scomp_free_scratches(void) scratch = per_cpu_ptr(&scomp_scratch, i); free_page(scratch->saddr); - vfree(scratch->dst); + kvfree(scratch->dst); scratch->src = NULL; scratch->dst = NULL; } } -static int crypto_scomp_alloc_scratches(void) +static int scomp_alloc_scratch(struct scomp_scratch *scratch, int cpu) { - struct scomp_scratch *scratch; - int i; + int node = cpu_to_node(cpu); + struct page *page; + void *mem; - for_each_possible_cpu(i) { - struct page *page; - void *mem; + mem = kvmalloc_node(SCOMP_SCRATCH_SIZE, GFP_KERNEL, node); + if (!mem) + return -ENOMEM; + page = alloc_pages_node(node, GFP_KERNEL, 0); + if (!page) { + kvfree(mem); + return -ENOMEM; + } + spin_lock_bh(&scratch->lock); + scratch->src = page_address(page); + scratch->dst = mem; + spin_unlock_bh(&scratch->lock); + return 0; +} - scratch = per_cpu_ptr(&scomp_scratch, i); +static void scomp_scratch_workfn(struct work_struct *work) +{ + int cpu; - page = alloc_pages_node(cpu_to_node(i), GFP_KERNEL, 0); - if (!page) - goto error; - scratch->src = page_address(page); - mem = vmalloc_node(SCOMP_SCRATCH_SIZE, cpu_to_node(i)); - if (!mem) - goto error; - scratch->dst = mem; + for_each_cpu(cpu, &scomp_scratch_want) { + struct scomp_scratch *scratch; + + scratch = per_cpu_ptr(&scomp_scratch, cpu); + if (scratch->src) + continue; + if (scomp_alloc_scratch(scratch, cpu)) + break; + + cpumask_clear_cpu(cpu, &scomp_scratch_want); } - return 0; -error: - crypto_scomp_free_scratches(); - return -ENOMEM; +} + +static int crypto_scomp_alloc_scratches(void) +{ + unsigned int i = cpumask_first(cpu_possible_mask); + struct scomp_scratch *scratch; + + scratch = per_cpu_ptr(&scomp_scratch, i); + return scomp_alloc_scratch(scratch, i); } static void scomp_free_streams(struct scomp_alg *alg) @@ -115,7 +141,7 @@ static void scomp_free_streams(struct scomp_alg *alg) struct crypto_acomp_stream *ps = per_cpu_ptr(stream, i); if (!ps->ctx) - break; + continue; alg->free_ctx(ps->ctx); } @@ -126,21 +152,26 @@ static void scomp_free_streams(struct scomp_alg *alg) static int scomp_alloc_streams(struct scomp_alg *alg) { struct crypto_acomp_stream __percpu *stream; - int i; + struct crypto_acomp_stream *ps; + unsigned int i; + void *ctx; stream = alloc_percpu(struct crypto_acomp_stream); if (!stream) return -ENOMEM; - for_each_possible_cpu(i) { - struct crypto_acomp_stream *ps = per_cpu_ptr(stream, i); + ctx = alg->alloc_ctx(); + if (IS_ERR(ctx)) { + free_percpu(stream); + return PTR_ERR(ctx); + } - ps->ctx = alg->alloc_ctx(); - if (IS_ERR(ps->ctx)) { - scomp_free_streams(alg); - return PTR_ERR(ps->ctx); - } + i = cpumask_first(cpu_possible_mask); + ps = per_cpu_ptr(stream, i); + ps->ctx = ctx; + for_each_possible_cpu(i) { + ps = per_cpu_ptr(stream, i); spin_lock_init(&ps->lock); } @@ -148,6 +179,33 @@ static int scomp_alloc_streams(struct scomp_alg *alg) return 0; } +static void scomp_stream_workfn(struct work_struct *work) +{ + struct scomp_alg *alg = container_of(work, struct scomp_alg, + stream_work); + struct crypto_acomp_stream __percpu *stream = alg->stream; + int cpu; + + for_each_cpu(cpu, &alg->stream_want) { + struct crypto_acomp_stream *ps; + void *ctx; + + ps = per_cpu_ptr(stream, cpu); + if (ps->ctx) + continue; + + ctx = alg->alloc_ctx(); + if (IS_ERR(ctx)) + break; + + spin_lock_bh(&ps->lock); + ps->ctx = ctx; + spin_unlock_bh(&ps->lock); + + cpumask_clear_cpu(cpu, &alg->stream_want); + } +} + static int crypto_scomp_init_tfm(struct crypto_tfm *tfm) { struct scomp_alg *alg = crypto_scomp_alg(__crypto_scomp_tfm(tfm)); @@ -171,13 +229,67 @@ unlock: return ret; } +static struct scomp_scratch *scomp_lock_scratch_bh(void) __acquires(scratch) +{ + int cpu = raw_smp_processor_id(); + struct scomp_scratch *scratch; + + scratch = per_cpu_ptr(&scomp_scratch, cpu); + spin_lock_bh(&scratch->lock); + if (likely(scratch->src)) + return scratch; + spin_unlock(&scratch->lock); + + cpumask_set_cpu(cpu, &scomp_scratch_want); + schedule_work(&scomp_scratch_work); + + scratch = per_cpu_ptr(&scomp_scratch, cpumask_first(cpu_possible_mask)); + spin_lock(&scratch->lock); + return scratch; +} + +static inline void scomp_unlock_scratch_bh(struct scomp_scratch *scratch) + __releases(scratch) +{ + spin_unlock_bh(&scratch->lock); +} + +static struct crypto_acomp_stream *scomp_lock_stream(struct crypto_scomp *tfm) + __acquires(stream) +{ + struct scomp_alg *alg = crypto_scomp_alg(tfm); + struct crypto_acomp_stream __percpu *stream; + int cpu = raw_smp_processor_id(); + struct crypto_acomp_stream *ps; + + stream = alg->stream; + ps = per_cpu_ptr(stream, cpu); + spin_lock(&ps->lock); + if (likely(ps->ctx)) + return ps; + spin_unlock(&ps->lock); + + cpumask_set_cpu(cpu, &alg->stream_want); + schedule_work(&alg->stream_work); + + ps = per_cpu_ptr(stream, cpumask_first(cpu_possible_mask)); + spin_lock(&ps->lock); + return ps; +} + +static inline void scomp_unlock_stream(struct crypto_acomp_stream *stream) + __releases(stream) +{ + spin_unlock(&stream->lock); +} + static int scomp_acomp_comp_decomp(struct acomp_req *req, int dir) { - struct scomp_scratch *scratch = raw_cpu_ptr(&scomp_scratch); struct crypto_acomp *tfm = crypto_acomp_reqtfm(req); struct crypto_scomp **tfm_ctx = acomp_tfm_ctx(tfm); struct crypto_scomp *scomp = *tfm_ctx; struct crypto_acomp_stream *stream; + struct scomp_scratch *scratch; unsigned int slen = req->slen; unsigned int dlen = req->dlen; struct page *spage, *dpage; @@ -194,6 +306,8 @@ static int scomp_acomp_comp_decomp(struct acomp_req *req, int dir) if (!req->dst || !dlen) return -EINVAL; + scratch = scomp_lock_scratch_bh(); + if (acomp_request_src_isvirt(req)) src = req->svirt; else { @@ -218,6 +332,9 @@ static int scomp_acomp_comp_decomp(struct acomp_req *req, int dir) break; src = kmap_local_page(spage) + soff; } while (0); + + if (src == scratch->src) + memcpy_from_sglist(scratch->src, req->src, 0, slen); } if (acomp_request_dst_isvirt(req)) @@ -250,13 +367,7 @@ static int scomp_acomp_comp_decomp(struct acomp_req *req, int dir) dlen = min(dlen, max); } - spin_lock_bh(&scratch->lock); - - if (src == scratch->src) - memcpy_from_sglist(scratch->src, req->src, 0, slen); - - stream = raw_cpu_ptr(crypto_scomp_alg(scomp)->stream); - spin_lock(&stream->lock); + stream = scomp_lock_stream(scomp); if (dir) ret = crypto_scomp_compress(scomp, src, slen, dst, &dlen, stream->ctx); @@ -267,8 +378,8 @@ static int scomp_acomp_comp_decomp(struct acomp_req *req, int dir) if (dst == scratch->dst) memcpy_to_sglist(req->dst, 0, dst, dlen); - spin_unlock(&stream->lock); - spin_unlock_bh(&scratch->lock); + scomp_unlock_stream(stream); + scomp_unlock_scratch_bh(scratch); req->dlen = dlen; @@ -319,6 +430,7 @@ static void crypto_exit_scomp_ops_async(struct crypto_tfm *tfm) crypto_free_scomp(*ctx); + flush_work(&scomp_scratch_work); mutex_lock(&scomp_lock); if (!--scomp_scratch_users) crypto_scomp_free_scratches(); @@ -352,7 +464,10 @@ int crypto_init_scomp_ops_async(struct crypto_tfm *tfm) static void crypto_scomp_destroy(struct crypto_alg *alg) { - scomp_free_streams(__crypto_scomp_alg(alg)); + struct scomp_alg *scomp = __crypto_scomp_alg(alg); + + cancel_work_sync(&scomp->stream_work); + scomp_free_streams(scomp); } static const struct crypto_type crypto_scomp_type = { @@ -378,6 +493,8 @@ static void scomp_prepare_alg(struct scomp_alg *alg) comp_prepare_alg(&alg->calg); base->cra_flags |= CRYPTO_ALG_REQ_CHAIN; + + INIT_WORK(&alg->stream_work, scomp_stream_workfn); } int crypto_register_scomp(struct scomp_alg *alg) |