diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-12-03 16:54:54 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-12-03 16:54:54 -0800 |
| commit | 015e7b0b0e8e51f7321ec2aafc1d7fc0a8a5536f (patch) | |
| tree | 258f719e59946c733dd03198eba404e85f9d0945 /kernel/bpf/stream.c | |
| parent | b6d993310a65b994f37e3347419d9ed398ee37a3 (diff) | |
| parent | ff34657aa72a4dab9c2fd38e1b31a506951f4b1c (diff) | |
Merge tag 'bpf-next-6.19' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Pull bpf updates from Alexei Starovoitov:
- Convert selftests/bpf/test_tc_edt and test_tc_tunnel from .sh to
test_progs runner (Alexis Lothoré)
- Convert selftests/bpf/test_xsk to test_progs runner (Bastien
Curutchet)
- Replace bpf memory allocator with kmalloc_nolock() in
bpf_local_storage (Amery Hung), and in bpf streams and range tree
(Puranjay Mohan)
- Introduce support for indirect jumps in BPF verifier and x86 JIT
(Anton Protopopov) and arm64 JIT (Puranjay Mohan)
- Remove runqslower bpf tool (Hoyeon Lee)
- Fix corner cases in the verifier to close several syzbot reports
(Eduard Zingerman, KaFai Wan)
- Several improvements in deadlock detection in rqspinlock (Kumar
Kartikeya Dwivedi)
- Implement "jmp" mode for BPF trampoline and corresponding
DYNAMIC_FTRACE_WITH_JMP. It improves "fexit" program type performance
from 80 M/s to 136 M/s. With Steven's Ack. (Menglong Dong)
- Add ability to test non-linear skbs in BPF_PROG_TEST_RUN (Paul
Chaignon)
- Do not let BPF_PROG_TEST_RUN emit invalid GSO types to stack (Daniel
Borkmann)
- Generalize buildid reader into bpf_dynptr (Mykyta Yatsenko)
- Optimize bpf_map_update_elem() for map-in-map types (Ritesh
Oedayrajsingh Varma)
- Introduce overwrite mode for BPF ring buffer (Xu Kuohai)
* tag 'bpf-next-6.19' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (169 commits)
bpf: optimize bpf_map_update_elem() for map-in-map types
bpf: make kprobe_multi_link_prog_run always_inline
selftests/bpf: do not hardcode target rate in test_tc_edt BPF program
selftests/bpf: remove test_tc_edt.sh
selftests/bpf: integrate test_tc_edt into test_progs
selftests/bpf: rename test_tc_edt.bpf.c section to expose program type
selftests/bpf: Add success stats to rqspinlock stress test
rqspinlock: Precede non-head waiter queueing with AA check
rqspinlock: Disable spinning for trylock fallback
rqspinlock: Use trylock fallback when per-CPU rqnode is busy
rqspinlock: Perform AA checks immediately
rqspinlock: Enclose lock/unlock within lock entry acquisitions
bpf: Remove runqslower tool
selftests/bpf: Remove usage of lsm/file_alloc_security in selftest
bpf: Disable file_alloc_security hook
bpf: check for insn arrays in check_ptr_alignment
bpf: force BPF_F_RDONLY_PROG on insn array creation
bpf: Fix exclusive map memory leak
selftests/bpf: Make CS length configurable for rqspinlock stress test
selftests/bpf: Add lock wait time stats to rqspinlock stress test
...
Diffstat (limited to 'kernel/bpf/stream.c')
| -rw-r--r-- | kernel/bpf/stream.c | 159 |
1 files changed, 8 insertions, 151 deletions
diff --git a/kernel/bpf/stream.c b/kernel/bpf/stream.c index ff16c631951b..0b6bc3f30335 100644 --- a/kernel/bpf/stream.c +++ b/kernel/bpf/stream.c @@ -4,111 +4,10 @@ #include <linux/bpf.h> #include <linux/filter.h> #include <linux/bpf_mem_alloc.h> -#include <linux/percpu.h> -#include <linux/refcount.h> #include <linux/gfp.h> #include <linux/memory.h> -#include <linux/local_lock.h> #include <linux/mutex.h> -/* - * Simple per-CPU NMI-safe bump allocation mechanism, backed by the NMI-safe - * try_alloc_pages()/free_pages_nolock() primitives. We allocate a page and - * stash it in a local per-CPU variable, and bump allocate from the page - * whenever items need to be printed to a stream. Each page holds a global - * atomic refcount in its first 4 bytes, and then records of variable length - * that describe the printed messages. Once the global refcount has dropped to - * zero, it is a signal to free the page back to the kernel's page allocator, - * given all the individual records in it have been consumed. - * - * It is possible the same page is used to serve allocations across different - * programs, which may be consumed at different times individually, hence - * maintaining a reference count per-page is critical for correct lifetime - * tracking. - * - * The bpf_stream_page code will be replaced to use kmalloc_nolock() once it - * lands. - */ -struct bpf_stream_page { - refcount_t ref; - u32 consumed; - char buf[]; -}; - -/* Available room to add data to a refcounted page. */ -#define BPF_STREAM_PAGE_SZ (PAGE_SIZE - offsetofend(struct bpf_stream_page, consumed)) - -static DEFINE_PER_CPU(local_trylock_t, stream_local_lock) = INIT_LOCAL_TRYLOCK(stream_local_lock); -static DEFINE_PER_CPU(struct bpf_stream_page *, stream_pcpu_page); - -static bool bpf_stream_page_local_lock(unsigned long *flags) -{ - return local_trylock_irqsave(&stream_local_lock, *flags); -} - -static void bpf_stream_page_local_unlock(unsigned long *flags) -{ - local_unlock_irqrestore(&stream_local_lock, *flags); -} - -static void bpf_stream_page_free(struct bpf_stream_page *stream_page) -{ - struct page *p; - - if (!stream_page) - return; - p = virt_to_page(stream_page); - free_pages_nolock(p, 0); -} - -static void bpf_stream_page_get(struct bpf_stream_page *stream_page) -{ - refcount_inc(&stream_page->ref); -} - -static void bpf_stream_page_put(struct bpf_stream_page *stream_page) -{ - if (refcount_dec_and_test(&stream_page->ref)) - bpf_stream_page_free(stream_page); -} - -static void bpf_stream_page_init(struct bpf_stream_page *stream_page) -{ - refcount_set(&stream_page->ref, 1); - stream_page->consumed = 0; -} - -static struct bpf_stream_page *bpf_stream_page_replace(void) -{ - struct bpf_stream_page *stream_page, *old_stream_page; - struct page *page; - - page = alloc_pages_nolock(/* Don't account */ 0, NUMA_NO_NODE, 0); - if (!page) - return NULL; - stream_page = page_address(page); - bpf_stream_page_init(stream_page); - - old_stream_page = this_cpu_read(stream_pcpu_page); - if (old_stream_page) - bpf_stream_page_put(old_stream_page); - this_cpu_write(stream_pcpu_page, stream_page); - return stream_page; -} - -static int bpf_stream_page_check_room(struct bpf_stream_page *stream_page, int len) -{ - int min = offsetof(struct bpf_stream_elem, str[0]); - int consumed = stream_page->consumed; - int total = BPF_STREAM_PAGE_SZ; - int rem = max(0, total - consumed - min); - - /* Let's give room of at least 8 bytes. */ - WARN_ON_ONCE(rem % 8 != 0); - rem = rem < 8 ? 0 : rem; - return min(len, rem); -} - static void bpf_stream_elem_init(struct bpf_stream_elem *elem, int len) { init_llist_node(&elem->node); @@ -116,54 +15,12 @@ static void bpf_stream_elem_init(struct bpf_stream_elem *elem, int len) elem->consumed_len = 0; } -static struct bpf_stream_page *bpf_stream_page_from_elem(struct bpf_stream_elem *elem) -{ - unsigned long addr = (unsigned long)elem; - - return (struct bpf_stream_page *)PAGE_ALIGN_DOWN(addr); -} - -static struct bpf_stream_elem *bpf_stream_page_push_elem(struct bpf_stream_page *stream_page, int len) -{ - u32 consumed = stream_page->consumed; - - stream_page->consumed += round_up(offsetof(struct bpf_stream_elem, str[len]), 8); - return (struct bpf_stream_elem *)&stream_page->buf[consumed]; -} - -static struct bpf_stream_elem *bpf_stream_page_reserve_elem(int len) -{ - struct bpf_stream_elem *elem = NULL; - struct bpf_stream_page *page; - int room = 0; - - page = this_cpu_read(stream_pcpu_page); - if (!page) - page = bpf_stream_page_replace(); - if (!page) - return NULL; - - room = bpf_stream_page_check_room(page, len); - if (room != len) - page = bpf_stream_page_replace(); - if (!page) - return NULL; - bpf_stream_page_get(page); - room = bpf_stream_page_check_room(page, len); - WARN_ON_ONCE(room != len); - - elem = bpf_stream_page_push_elem(page, room); - bpf_stream_elem_init(elem, room); - return elem; -} - static struct bpf_stream_elem *bpf_stream_elem_alloc(int len) { const int max_len = ARRAY_SIZE((struct bpf_bprintf_buffers){}.buf); struct bpf_stream_elem *elem; - unsigned long flags; + size_t alloc_size; - BUILD_BUG_ON(max_len > BPF_STREAM_PAGE_SZ); /* * Length denotes the amount of data to be written as part of stream element, * thus includes '\0' byte. We're capped by how much bpf_bprintf_buffers can @@ -172,10 +29,13 @@ static struct bpf_stream_elem *bpf_stream_elem_alloc(int len) if (len < 0 || len > max_len) return NULL; - if (!bpf_stream_page_local_lock(&flags)) + alloc_size = offsetof(struct bpf_stream_elem, str[len]); + elem = kmalloc_nolock(alloc_size, __GFP_ZERO, -1); + if (!elem) return NULL; - elem = bpf_stream_page_reserve_elem(len); - bpf_stream_page_local_unlock(&flags); + + bpf_stream_elem_init(elem, len); + return elem; } @@ -231,10 +91,7 @@ static struct bpf_stream *bpf_stream_get(enum bpf_stream_id stream_id, struct bp static void bpf_stream_free_elem(struct bpf_stream_elem *elem) { - struct bpf_stream_page *p; - - p = bpf_stream_page_from_elem(elem); - bpf_stream_page_put(p); + kfree_nolock(elem); } static void bpf_stream_free_list(struct llist_node *list) |