summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-12-03 16:54:54 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2025-12-03 16:54:54 -0800
commit015e7b0b0e8e51f7321ec2aafc1d7fc0a8a5536f (patch)
tree258f719e59946c733dd03198eba404e85f9d0945 /net
parentb6d993310a65b994f37e3347419d9ed398ee37a3 (diff)
parentff34657aa72a4dab9c2fd38e1b31a506951f4b1c (diff)
Merge tag 'bpf-next-6.19' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Pull bpf updates from Alexei Starovoitov: - Convert selftests/bpf/test_tc_edt and test_tc_tunnel from .sh to test_progs runner (Alexis Lothoré) - Convert selftests/bpf/test_xsk to test_progs runner (Bastien Curutchet) - Replace bpf memory allocator with kmalloc_nolock() in bpf_local_storage (Amery Hung), and in bpf streams and range tree (Puranjay Mohan) - Introduce support for indirect jumps in BPF verifier and x86 JIT (Anton Protopopov) and arm64 JIT (Puranjay Mohan) - Remove runqslower bpf tool (Hoyeon Lee) - Fix corner cases in the verifier to close several syzbot reports (Eduard Zingerman, KaFai Wan) - Several improvements in deadlock detection in rqspinlock (Kumar Kartikeya Dwivedi) - Implement "jmp" mode for BPF trampoline and corresponding DYNAMIC_FTRACE_WITH_JMP. It improves "fexit" program type performance from 80 M/s to 136 M/s. With Steven's Ack. (Menglong Dong) - Add ability to test non-linear skbs in BPF_PROG_TEST_RUN (Paul Chaignon) - Do not let BPF_PROG_TEST_RUN emit invalid GSO types to stack (Daniel Borkmann) - Generalize buildid reader into bpf_dynptr (Mykyta Yatsenko) - Optimize bpf_map_update_elem() for map-in-map types (Ritesh Oedayrajsingh Varma) - Introduce overwrite mode for BPF ring buffer (Xu Kuohai) * tag 'bpf-next-6.19' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (169 commits) bpf: optimize bpf_map_update_elem() for map-in-map types bpf: make kprobe_multi_link_prog_run always_inline selftests/bpf: do not hardcode target rate in test_tc_edt BPF program selftests/bpf: remove test_tc_edt.sh selftests/bpf: integrate test_tc_edt into test_progs selftests/bpf: rename test_tc_edt.bpf.c section to expose program type selftests/bpf: Add success stats to rqspinlock stress test rqspinlock: Precede non-head waiter queueing with AA check rqspinlock: Disable spinning for trylock fallback rqspinlock: Use trylock fallback when per-CPU rqnode is busy rqspinlock: Perform AA checks immediately rqspinlock: Enclose lock/unlock within lock entry acquisitions bpf: Remove runqslower tool selftests/bpf: Remove usage of lsm/file_alloc_security in selftest bpf: Disable file_alloc_security hook bpf: check for insn arrays in check_ptr_alignment bpf: force BPF_F_RDONLY_PROG on insn array creation bpf: Fix exclusive map memory leak selftests/bpf: Make CS length configurable for rqspinlock stress test selftests/bpf: Add lock wait time stats to rqspinlock stress test ...
Diffstat (limited to 'net')
-rw-r--r--net/bpf/test_run.c148
-rw-r--r--net/core/bpf_sk_storage.c16
-rw-r--r--net/core/filter.c16
3 files changed, 127 insertions, 53 deletions
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 8b7d0b90fea7..655efac6f133 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -436,7 +436,7 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
static int bpf_test_finish(const union bpf_attr *kattr,
union bpf_attr __user *uattr, const void *data,
- struct skb_shared_info *sinfo, u32 size,
+ struct skb_shared_info *sinfo, u32 size, u32 frag_size,
u32 retval, u32 duration)
{
void __user *data_out = u64_to_user_ptr(kattr->test.data_out);
@@ -453,7 +453,7 @@ static int bpf_test_finish(const union bpf_attr *kattr,
}
if (data_out) {
- int len = sinfo ? copy_size - sinfo->xdp_frags_size : copy_size;
+ int len = sinfo ? copy_size - frag_size : copy_size;
if (len < 0) {
err = -ENOSPC;
@@ -899,6 +899,12 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb)
/* cb is allowed */
if (!range_is_zero(__skb, offsetofend(struct __sk_buff, cb),
+ offsetof(struct __sk_buff, data_end)))
+ return -EINVAL;
+
+ /* data_end is allowed, but not copied to skb */
+
+ if (!range_is_zero(__skb, offsetofend(struct __sk_buff, data_end),
offsetof(struct __sk_buff, tstamp)))
return -EINVAL;
@@ -939,6 +945,11 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb)
if (__skb->gso_segs > GSO_MAX_SEGS)
return -EINVAL;
+
+ /* Currently GSO type is zero/unset. If this gets extended with
+ * a small list of accepted GSO types in future, the filter for
+ * an unset GSO type in bpf_clone_redirect() can be lifted.
+ */
skb_shinfo(skb)->gso_segs = __skb->gso_segs;
skb_shinfo(skb)->gso_size = __skb->gso_size;
skb_shinfo(skb)->hwtstamps.hwtstamp = __skb->hwtstamp;
@@ -973,46 +984,39 @@ static struct proto bpf_dummy_proto = {
int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
union bpf_attr __user *uattr)
{
- bool is_l2 = false, is_direct_pkt_access = false;
+ bool is_l2 = false, is_direct_pkt_access = false, is_lwt = false;
+ u32 tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
struct net *net = current->nsproxy->net_ns;
struct net_device *dev = net->loopback_dev;
- u32 size = kattr->test.data_size_in;
+ u32 headroom = NET_SKB_PAD + NET_IP_ALIGN;
+ u32 linear_sz = kattr->test.data_size_in;
u32 repeat = kattr->test.repeat;
struct __sk_buff *ctx = NULL;
+ struct sk_buff *skb = NULL;
+ struct sock *sk = NULL;
u32 retval, duration;
int hh_len = ETH_HLEN;
- struct sk_buff *skb;
- struct sock *sk;
- void *data;
+ void *data = NULL;
int ret;
if ((kattr->test.flags & ~BPF_F_TEST_SKB_CHECKSUM_COMPLETE) ||
kattr->test.cpu || kattr->test.batch_size)
return -EINVAL;
- if (size < ETH_HLEN)
+ if (kattr->test.data_size_in < ETH_HLEN)
return -EINVAL;
- data = bpf_test_init(kattr, kattr->test.data_size_in,
- size, NET_SKB_PAD + NET_IP_ALIGN,
- SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
- if (IS_ERR(data))
- return PTR_ERR(data);
-
- ctx = bpf_ctx_init(kattr, sizeof(struct __sk_buff));
- if (IS_ERR(ctx)) {
- kfree(data);
- return PTR_ERR(ctx);
- }
-
switch (prog->type) {
case BPF_PROG_TYPE_SCHED_CLS:
case BPF_PROG_TYPE_SCHED_ACT:
+ is_direct_pkt_access = true;
is_l2 = true;
- fallthrough;
+ break;
case BPF_PROG_TYPE_LWT_IN:
case BPF_PROG_TYPE_LWT_OUT:
case BPF_PROG_TYPE_LWT_XMIT:
+ is_lwt = true;
+ fallthrough;
case BPF_PROG_TYPE_CGROUP_SKB:
is_direct_pkt_access = true;
break;
@@ -1020,25 +1024,88 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
break;
}
+ ctx = bpf_ctx_init(kattr, sizeof(struct __sk_buff));
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ if (ctx) {
+ if (ctx->data_end > kattr->test.data_size_in || ctx->data || ctx->data_meta) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if (ctx->data_end) {
+ /* Non-linear LWT test_run is unsupported for now. */
+ if (is_lwt) {
+ ret = -EINVAL;
+ goto out;
+ }
+ linear_sz = max(ETH_HLEN, ctx->data_end);
+ }
+ }
+
+ linear_sz = min_t(u32, linear_sz, PAGE_SIZE - headroom - tailroom);
+
+ data = bpf_test_init(kattr, linear_sz, linear_sz, headroom, tailroom);
+ if (IS_ERR(data)) {
+ ret = PTR_ERR(data);
+ data = NULL;
+ goto out;
+ }
+
sk = sk_alloc(net, AF_UNSPEC, GFP_USER, &bpf_dummy_proto, 1);
if (!sk) {
- kfree(data);
- kfree(ctx);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out;
}
sock_init_data(NULL, sk);
skb = slab_build_skb(data);
if (!skb) {
- kfree(data);
- kfree(ctx);
- sk_free(sk);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out;
}
skb->sk = sk;
+ data = NULL; /* data released via kfree_skb */
+
skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
- __skb_put(skb, size);
+ __skb_put(skb, linear_sz);
+
+ if (unlikely(kattr->test.data_size_in > linear_sz)) {
+ void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
+ struct skb_shared_info *sinfo = skb_shinfo(skb);
+ u32 copied = linear_sz;
+
+ while (copied < kattr->test.data_size_in) {
+ struct page *page;
+ u32 data_len;
+
+ if (sinfo->nr_frags == MAX_SKB_FRAGS) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ page = alloc_page(GFP_KERNEL);
+ if (!page) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ data_len = min_t(u32, kattr->test.data_size_in - copied,
+ PAGE_SIZE);
+ skb_fill_page_desc(skb, sinfo->nr_frags, page, 0, data_len);
+
+ if (copy_from_user(page_address(page), data_in + copied,
+ data_len)) {
+ ret = -EFAULT;
+ goto out;
+ }
+ skb->data_len += data_len;
+ skb->truesize += PAGE_SIZE;
+ skb->len += data_len;
+ copied += data_len;
+ }
+ }
if (ctx && ctx->ifindex > 1) {
dev = dev_get_by_index(net, ctx->ifindex);
@@ -1118,12 +1185,11 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
convert_skb_to___skb(skb, ctx);
- size = skb->len;
- /* bpf program can never convert linear skb to non-linear */
- if (WARN_ON_ONCE(skb_is_nonlinear(skb)))
- size = skb_headlen(skb);
- ret = bpf_test_finish(kattr, uattr, skb->data, NULL, size, retval,
- duration);
+ if (skb_is_nonlinear(skb))
+ /* bpf program can never convert linear skb to non-linear */
+ WARN_ON_ONCE(linear_sz == kattr->test.data_size_in);
+ ret = bpf_test_finish(kattr, uattr, skb->data, skb_shinfo(skb), skb->len,
+ skb->data_len, retval, duration);
if (!ret)
ret = bpf_ctx_finish(kattr, uattr, ctx,
sizeof(struct __sk_buff));
@@ -1131,7 +1197,9 @@ out:
if (dev && dev != net->loopback_dev)
dev_put(dev);
kfree_skb(skb);
- sk_free(sk);
+ kfree(data);
+ if (sk)
+ sk_free(sk);
kfree(ctx);
return ret;
}
@@ -1329,7 +1397,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
goto out;
size = xdp.data_end - xdp.data_meta + sinfo->xdp_frags_size;
- ret = bpf_test_finish(kattr, uattr, xdp.data_meta, sinfo, size,
+ ret = bpf_test_finish(kattr, uattr, xdp.data_meta, sinfo, size, sinfo->xdp_frags_size,
retval, duration);
if (!ret)
ret = bpf_ctx_finish(kattr, uattr, ctx,
@@ -1420,7 +1488,7 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
goto out;
ret = bpf_test_finish(kattr, uattr, &flow_keys, NULL,
- sizeof(flow_keys), retval, duration);
+ sizeof(flow_keys), 0, retval, duration);
if (!ret)
ret = bpf_ctx_finish(kattr, uattr, user_ctx,
sizeof(struct bpf_flow_keys));
@@ -1521,7 +1589,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat
user_ctx->cookie = sock_gen_cookie(ctx.selected_sk);
}
- ret = bpf_test_finish(kattr, uattr, NULL, NULL, 0, retval, duration);
+ ret = bpf_test_finish(kattr, uattr, NULL, NULL, 0, 0, retval, duration);
if (!ret)
ret = bpf_ctx_finish(kattr, uattr, user_ctx, sizeof(*user_ctx));
@@ -1721,7 +1789,7 @@ int bpf_prog_test_run_nf(struct bpf_prog *prog,
if (ret)
goto out;
- ret = bpf_test_finish(kattr, uattr, NULL, NULL, 0, retval, duration);
+ ret = bpf_test_finish(kattr, uattr, NULL, NULL, 0, 0, retval, duration);
out:
kfree(user_ctx);
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index 2e538399757f..850dd736ccd1 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -50,16 +50,14 @@ void bpf_sk_storage_free(struct sock *sk)
{
struct bpf_local_storage *sk_storage;
- migrate_disable();
- rcu_read_lock();
+ rcu_read_lock_dont_migrate();
sk_storage = rcu_dereference(sk->sk_bpf_storage);
if (!sk_storage)
goto out;
bpf_local_storage_destroy(sk_storage);
out:
- rcu_read_unlock();
- migrate_enable();
+ rcu_read_unlock_migrate();
}
static void bpf_sk_storage_map_free(struct bpf_map *map)
@@ -138,7 +136,7 @@ bpf_sk_storage_clone_elem(struct sock *newsk,
{
struct bpf_local_storage_elem *copy_selem;
- copy_selem = bpf_selem_alloc(smap, newsk, NULL, true, false, GFP_ATOMIC);
+ copy_selem = bpf_selem_alloc(smap, newsk, NULL, false, GFP_ATOMIC);
if (!copy_selem)
return NULL;
@@ -161,8 +159,7 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL);
- migrate_disable();
- rcu_read_lock();
+ rcu_read_lock_dont_migrate();
sk_storage = rcu_dereference(sk->sk_bpf_storage);
if (!sk_storage || hlist_empty(&sk_storage->list))
@@ -199,7 +196,7 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
} else {
ret = bpf_local_storage_alloc(newsk, smap, copy_selem, GFP_ATOMIC);
if (ret) {
- bpf_selem_free(copy_selem, smap, true);
+ bpf_selem_free(copy_selem, true);
atomic_sub(smap->elem_size,
&newsk->sk_omem_alloc);
bpf_map_put(map);
@@ -213,8 +210,7 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
}
out:
- rcu_read_unlock();
- migrate_enable();
+ rcu_read_unlock_migrate();
/* In case of an error, don't free anything explicitly here, the
* caller is responsible to call bpf_sk_storage_free.
diff --git a/net/core/filter.c b/net/core/filter.c
index fa06c5a08e22..df6ce85e48dc 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2458,6 +2458,13 @@ BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
if (unlikely(flags & (~(BPF_F_INGRESS) | BPF_F_REDIRECT_INTERNAL)))
return -EINVAL;
+ /* BPF test infra's convert___skb_to_skb() can create type-less
+ * GSO packets. gso_features_check() will detect this as a bad
+ * offload. However, lets not leak them out in the first place.
+ */
+ if (unlikely(skb_is_gso(skb) && !skb_shinfo(skb)->gso_type))
+ return -EBADMSG;
+
dev = dev_get_by_index_rcu(dev_net(skb->dev), ifindex);
if (unlikely(!dev))
return -EINVAL;
@@ -6422,9 +6429,12 @@ BPF_CALL_5(bpf_skb_check_mtu, struct sk_buff *, skb,
*/
if (skb_is_gso(skb)) {
ret = BPF_MTU_CHK_RET_SUCCESS;
- if (flags & BPF_MTU_CHK_SEGS &&
- !skb_gso_validate_network_len(skb, mtu))
- ret = BPF_MTU_CHK_RET_SEGS_TOOBIG;
+ if (flags & BPF_MTU_CHK_SEGS) {
+ if (!skb_transport_header_was_set(skb))
+ return -EINVAL;
+ if (!skb_gso_validate_network_len(skb, mtu))
+ ret = BPF_MTU_CHK_RET_SEGS_TOOBIG;
+ }
}
out:
*mtu_len = mtu;