summaryrefslogtreecommitdiff
path: root/kernel/bpf/hashtab.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-11-21 08:11:04 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2024-11-21 08:11:04 -0800
commit6e95ef0258ff4ee23ae3b06bf6b00b33dbbd5ef7 (patch)
tree07f66723c602ab3b085d890d7fef898a61bb539c /kernel/bpf/hashtab.c
parent43fb83c17ba2d63dfb798f0be7453ed55ca3f9c2 (diff)
parent2c8b09ac2537299511c898bc71b1a5f2756c831c (diff)
Merge tag 'bpf-next-6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Pull bpf updates from Alexei Starovoitov: - Add BPF uprobe session support (Jiri Olsa) - Optimize uprobe performance (Andrii Nakryiko) - Add bpf_fastcall support to helpers and kfuncs (Eduard Zingerman) - Avoid calling free_htab_elem() under hash map bucket lock (Hou Tao) - Prevent tailcall infinite loop caused by freplace (Leon Hwang) - Mark raw_tracepoint arguments as nullable (Kumar Kartikeya Dwivedi) - Introduce uptr support in the task local storage map (Martin KaFai Lau) - Stringify errno log messages in libbpf (Mykyta Yatsenko) - Add kmem_cache BPF iterator for perf's lock profiling (Namhyung Kim) - Support BPF objects of either endianness in libbpf (Tony Ambardar) - Add ksym to struct_ops trampoline to fix stack trace (Xu Kuohai) - Introduce private stack for eligible BPF programs (Yonghong Song) - Migrate samples/bpf tests to selftests/bpf test_progs (Daniel T. Lee) - Migrate test_sock to selftests/bpf test_progs (Jordan Rife) * tag 'bpf-next-6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (152 commits) libbpf: Change hash_combine parameters from long to unsigned long selftests/bpf: Fix build error with llvm 19 libbpf: Fix memory leak in bpf_program__attach_uprobe_multi bpf: use common instruction history across all states bpf: Add necessary migrate_disable to range_tree. bpf: Do not alloc arena on unsupported arches selftests/bpf: Set test path for token/obj_priv_implicit_token_envvar selftests/bpf: Add a test for arena range tree algorithm bpf: Introduce range_tree data structure and use it in bpf arena samples/bpf: Remove unused variable in xdp2skb_meta_kern.c samples/bpf: Remove unused variables in tc_l2_redirect_kern.c bpftool: Cast variable `var` to long long bpf, x86: Propagate tailcall info only for subprogs bpf: Add kernel symbol for struct_ops trampoline bpf: Use function pointers count as struct_ops links count bpf: Remove unused member rcu from bpf_struct_ops_map selftests/bpf: Add struct_ops prog private stack tests bpf: Support private stack for struct_ops progs selftests/bpf: Add tracing prog private stack tests bpf, x86: Support private stack in jit ...
Diffstat (limited to 'kernel/bpf/hashtab.c')
-rw-r--r--kernel/bpf/hashtab.c56
1 files changed, 39 insertions, 17 deletions
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index b14b87463ee0..3ec941a0ea41 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -896,9 +896,12 @@ find_first_elem:
static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l)
{
check_and_free_fields(htab, l);
+
+ migrate_disable();
if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH)
bpf_mem_cache_free(&htab->pcpu_ma, l->ptr_to_pptr);
bpf_mem_cache_free(&htab->ma, l);
+ migrate_enable();
}
static void htab_put_fd_value(struct bpf_htab *htab, struct htab_elem *l)
@@ -948,7 +951,7 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
if (htab_is_prealloc(htab)) {
bpf_map_dec_elem_count(&htab->map);
check_and_free_fields(htab, l);
- __pcpu_freelist_push(&htab->freelist, &l->fnode);
+ pcpu_freelist_push(&htab->freelist, &l->fnode);
} else {
dec_elem_count(htab);
htab_elem_free(htab, l);
@@ -1018,7 +1021,6 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
*/
pl_new = this_cpu_ptr(htab->extra_elems);
l_new = *pl_new;
- htab_put_fd_value(htab, old_elem);
*pl_new = old_elem;
} else {
struct pcpu_freelist_node *l;
@@ -1105,6 +1107,7 @@ static long htab_map_update_elem(struct bpf_map *map, void *key, void *value,
struct htab_elem *l_new = NULL, *l_old;
struct hlist_nulls_head *head;
unsigned long flags;
+ void *old_map_ptr;
struct bucket *b;
u32 key_size, hash;
int ret;
@@ -1183,12 +1186,27 @@ static long htab_map_update_elem(struct bpf_map *map, void *key, void *value,
hlist_nulls_add_head_rcu(&l_new->hash_node, head);
if (l_old) {
hlist_nulls_del_rcu(&l_old->hash_node);
+
+ /* l_old has already been stashed in htab->extra_elems, free
+ * its special fields before it is available for reuse. Also
+ * save the old map pointer in htab of maps before unlock
+ * and release it after unlock.
+ */
+ old_map_ptr = NULL;
+ if (htab_is_prealloc(htab)) {
+ if (map->ops->map_fd_put_ptr)
+ old_map_ptr = fd_htab_map_get_ptr(map, l_old);
+ check_and_free_fields(htab, l_old);
+ }
+ }
+ htab_unlock_bucket(htab, b, hash, flags);
+ if (l_old) {
+ if (old_map_ptr)
+ map->ops->map_fd_put_ptr(map, old_map_ptr, true);
if (!htab_is_prealloc(htab))
free_htab_elem(htab, l_old);
- else
- check_and_free_fields(htab, l_old);
}
- ret = 0;
+ return 0;
err:
htab_unlock_bucket(htab, b, hash, flags);
return ret;
@@ -1432,15 +1450,15 @@ static long htab_map_delete_elem(struct bpf_map *map, void *key)
return ret;
l = lookup_elem_raw(head, hash, key, key_size);
-
- if (l) {
+ if (l)
hlist_nulls_del_rcu(&l->hash_node);
- free_htab_elem(htab, l);
- } else {
+ else
ret = -ENOENT;
- }
htab_unlock_bucket(htab, b, hash, flags);
+
+ if (l)
+ free_htab_elem(htab, l);
return ret;
}
@@ -1853,13 +1871,14 @@ again_nocopy:
* may cause deadlock. See comments in function
* prealloc_lru_pop(). Let us do bpf_lru_push_free()
* after releasing the bucket lock.
+ *
+ * For htab of maps, htab_put_fd_value() in
+ * free_htab_elem() may acquire a spinlock with bucket
+ * lock being held and it violates the lock rule, so
+ * invoke free_htab_elem() after unlock as well.
*/
- if (is_lru_map) {
- l->batch_flink = node_to_free;
- node_to_free = l;
- } else {
- free_htab_elem(htab, l);
- }
+ l->batch_flink = node_to_free;
+ node_to_free = l;
}
dst_key += key_size;
dst_val += value_size;
@@ -1871,7 +1890,10 @@ again_nocopy:
while (node_to_free) {
l = node_to_free;
node_to_free = node_to_free->batch_flink;
- htab_lru_push_free(htab, l);
+ if (is_lru_map)
+ htab_lru_push_free(htab, l);
+ else
+ free_htab_elem(htab, l);
}
next_batch: