summaryrefslogtreecommitdiff
path: root/kernel/bpf/stream.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-12-03 16:54:54 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2025-12-03 16:54:54 -0800
commit015e7b0b0e8e51f7321ec2aafc1d7fc0a8a5536f (patch)
tree258f719e59946c733dd03198eba404e85f9d0945 /kernel/bpf/stream.c
parentb6d993310a65b994f37e3347419d9ed398ee37a3 (diff)
parentff34657aa72a4dab9c2fd38e1b31a506951f4b1c (diff)
Merge tag 'bpf-next-6.19' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Pull bpf updates from Alexei Starovoitov: - Convert selftests/bpf/test_tc_edt and test_tc_tunnel from .sh to test_progs runner (Alexis Lothoré) - Convert selftests/bpf/test_xsk to test_progs runner (Bastien Curutchet) - Replace bpf memory allocator with kmalloc_nolock() in bpf_local_storage (Amery Hung), and in bpf streams and range tree (Puranjay Mohan) - Introduce support for indirect jumps in BPF verifier and x86 JIT (Anton Protopopov) and arm64 JIT (Puranjay Mohan) - Remove runqslower bpf tool (Hoyeon Lee) - Fix corner cases in the verifier to close several syzbot reports (Eduard Zingerman, KaFai Wan) - Several improvements in deadlock detection in rqspinlock (Kumar Kartikeya Dwivedi) - Implement "jmp" mode for BPF trampoline and corresponding DYNAMIC_FTRACE_WITH_JMP. It improves "fexit" program type performance from 80 M/s to 136 M/s. With Steven's Ack. (Menglong Dong) - Add ability to test non-linear skbs in BPF_PROG_TEST_RUN (Paul Chaignon) - Do not let BPF_PROG_TEST_RUN emit invalid GSO types to stack (Daniel Borkmann) - Generalize buildid reader into bpf_dynptr (Mykyta Yatsenko) - Optimize bpf_map_update_elem() for map-in-map types (Ritesh Oedayrajsingh Varma) - Introduce overwrite mode for BPF ring buffer (Xu Kuohai) * tag 'bpf-next-6.19' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (169 commits) bpf: optimize bpf_map_update_elem() for map-in-map types bpf: make kprobe_multi_link_prog_run always_inline selftests/bpf: do not hardcode target rate in test_tc_edt BPF program selftests/bpf: remove test_tc_edt.sh selftests/bpf: integrate test_tc_edt into test_progs selftests/bpf: rename test_tc_edt.bpf.c section to expose program type selftests/bpf: Add success stats to rqspinlock stress test rqspinlock: Precede non-head waiter queueing with AA check rqspinlock: Disable spinning for trylock fallback rqspinlock: Use trylock fallback when per-CPU rqnode is busy rqspinlock: Perform AA checks immediately rqspinlock: Enclose lock/unlock within lock entry acquisitions bpf: Remove runqslower tool selftests/bpf: Remove usage of lsm/file_alloc_security in selftest bpf: Disable file_alloc_security hook bpf: check for insn arrays in check_ptr_alignment bpf: force BPF_F_RDONLY_PROG on insn array creation bpf: Fix exclusive map memory leak selftests/bpf: Make CS length configurable for rqspinlock stress test selftests/bpf: Add lock wait time stats to rqspinlock stress test ...
Diffstat (limited to 'kernel/bpf/stream.c')
-rw-r--r--kernel/bpf/stream.c159
1 files changed, 8 insertions, 151 deletions
diff --git a/kernel/bpf/stream.c b/kernel/bpf/stream.c
index ff16c631951b..0b6bc3f30335 100644
--- a/kernel/bpf/stream.c
+++ b/kernel/bpf/stream.c
@@ -4,111 +4,10 @@
#include <linux/bpf.h>
#include <linux/filter.h>
#include <linux/bpf_mem_alloc.h>
-#include <linux/percpu.h>
-#include <linux/refcount.h>
#include <linux/gfp.h>
#include <linux/memory.h>
-#include <linux/local_lock.h>
#include <linux/mutex.h>
-/*
- * Simple per-CPU NMI-safe bump allocation mechanism, backed by the NMI-safe
- * try_alloc_pages()/free_pages_nolock() primitives. We allocate a page and
- * stash it in a local per-CPU variable, and bump allocate from the page
- * whenever items need to be printed to a stream. Each page holds a global
- * atomic refcount in its first 4 bytes, and then records of variable length
- * that describe the printed messages. Once the global refcount has dropped to
- * zero, it is a signal to free the page back to the kernel's page allocator,
- * given all the individual records in it have been consumed.
- *
- * It is possible the same page is used to serve allocations across different
- * programs, which may be consumed at different times individually, hence
- * maintaining a reference count per-page is critical for correct lifetime
- * tracking.
- *
- * The bpf_stream_page code will be replaced to use kmalloc_nolock() once it
- * lands.
- */
-struct bpf_stream_page {
- refcount_t ref;
- u32 consumed;
- char buf[];
-};
-
-/* Available room to add data to a refcounted page. */
-#define BPF_STREAM_PAGE_SZ (PAGE_SIZE - offsetofend(struct bpf_stream_page, consumed))
-
-static DEFINE_PER_CPU(local_trylock_t, stream_local_lock) = INIT_LOCAL_TRYLOCK(stream_local_lock);
-static DEFINE_PER_CPU(struct bpf_stream_page *, stream_pcpu_page);
-
-static bool bpf_stream_page_local_lock(unsigned long *flags)
-{
- return local_trylock_irqsave(&stream_local_lock, *flags);
-}
-
-static void bpf_stream_page_local_unlock(unsigned long *flags)
-{
- local_unlock_irqrestore(&stream_local_lock, *flags);
-}
-
-static void bpf_stream_page_free(struct bpf_stream_page *stream_page)
-{
- struct page *p;
-
- if (!stream_page)
- return;
- p = virt_to_page(stream_page);
- free_pages_nolock(p, 0);
-}
-
-static void bpf_stream_page_get(struct bpf_stream_page *stream_page)
-{
- refcount_inc(&stream_page->ref);
-}
-
-static void bpf_stream_page_put(struct bpf_stream_page *stream_page)
-{
- if (refcount_dec_and_test(&stream_page->ref))
- bpf_stream_page_free(stream_page);
-}
-
-static void bpf_stream_page_init(struct bpf_stream_page *stream_page)
-{
- refcount_set(&stream_page->ref, 1);
- stream_page->consumed = 0;
-}
-
-static struct bpf_stream_page *bpf_stream_page_replace(void)
-{
- struct bpf_stream_page *stream_page, *old_stream_page;
- struct page *page;
-
- page = alloc_pages_nolock(/* Don't account */ 0, NUMA_NO_NODE, 0);
- if (!page)
- return NULL;
- stream_page = page_address(page);
- bpf_stream_page_init(stream_page);
-
- old_stream_page = this_cpu_read(stream_pcpu_page);
- if (old_stream_page)
- bpf_stream_page_put(old_stream_page);
- this_cpu_write(stream_pcpu_page, stream_page);
- return stream_page;
-}
-
-static int bpf_stream_page_check_room(struct bpf_stream_page *stream_page, int len)
-{
- int min = offsetof(struct bpf_stream_elem, str[0]);
- int consumed = stream_page->consumed;
- int total = BPF_STREAM_PAGE_SZ;
- int rem = max(0, total - consumed - min);
-
- /* Let's give room of at least 8 bytes. */
- WARN_ON_ONCE(rem % 8 != 0);
- rem = rem < 8 ? 0 : rem;
- return min(len, rem);
-}
-
static void bpf_stream_elem_init(struct bpf_stream_elem *elem, int len)
{
init_llist_node(&elem->node);
@@ -116,54 +15,12 @@ static void bpf_stream_elem_init(struct bpf_stream_elem *elem, int len)
elem->consumed_len = 0;
}
-static struct bpf_stream_page *bpf_stream_page_from_elem(struct bpf_stream_elem *elem)
-{
- unsigned long addr = (unsigned long)elem;
-
- return (struct bpf_stream_page *)PAGE_ALIGN_DOWN(addr);
-}
-
-static struct bpf_stream_elem *bpf_stream_page_push_elem(struct bpf_stream_page *stream_page, int len)
-{
- u32 consumed = stream_page->consumed;
-
- stream_page->consumed += round_up(offsetof(struct bpf_stream_elem, str[len]), 8);
- return (struct bpf_stream_elem *)&stream_page->buf[consumed];
-}
-
-static struct bpf_stream_elem *bpf_stream_page_reserve_elem(int len)
-{
- struct bpf_stream_elem *elem = NULL;
- struct bpf_stream_page *page;
- int room = 0;
-
- page = this_cpu_read(stream_pcpu_page);
- if (!page)
- page = bpf_stream_page_replace();
- if (!page)
- return NULL;
-
- room = bpf_stream_page_check_room(page, len);
- if (room != len)
- page = bpf_stream_page_replace();
- if (!page)
- return NULL;
- bpf_stream_page_get(page);
- room = bpf_stream_page_check_room(page, len);
- WARN_ON_ONCE(room != len);
-
- elem = bpf_stream_page_push_elem(page, room);
- bpf_stream_elem_init(elem, room);
- return elem;
-}
-
static struct bpf_stream_elem *bpf_stream_elem_alloc(int len)
{
const int max_len = ARRAY_SIZE((struct bpf_bprintf_buffers){}.buf);
struct bpf_stream_elem *elem;
- unsigned long flags;
+ size_t alloc_size;
- BUILD_BUG_ON(max_len > BPF_STREAM_PAGE_SZ);
/*
* Length denotes the amount of data to be written as part of stream element,
* thus includes '\0' byte. We're capped by how much bpf_bprintf_buffers can
@@ -172,10 +29,13 @@ static struct bpf_stream_elem *bpf_stream_elem_alloc(int len)
if (len < 0 || len > max_len)
return NULL;
- if (!bpf_stream_page_local_lock(&flags))
+ alloc_size = offsetof(struct bpf_stream_elem, str[len]);
+ elem = kmalloc_nolock(alloc_size, __GFP_ZERO, -1);
+ if (!elem)
return NULL;
- elem = bpf_stream_page_reserve_elem(len);
- bpf_stream_page_local_unlock(&flags);
+
+ bpf_stream_elem_init(elem, len);
+
return elem;
}
@@ -231,10 +91,7 @@ static struct bpf_stream *bpf_stream_get(enum bpf_stream_id stream_id, struct bp
static void bpf_stream_free_elem(struct bpf_stream_elem *elem)
{
- struct bpf_stream_page *p;
-
- p = bpf_stream_page_from_elem(elem);
- bpf_stream_page_put(p);
+ kfree_nolock(elem);
}
static void bpf_stream_free_list(struct llist_node *list)