diff options
Diffstat (limited to 'kernel/bpf/btf.c')
| -rw-r--r-- | kernel/bpf/btf.c | 808 |
1 files changed, 599 insertions, 209 deletions
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 520f49f422fe..0de8fc8a0e0b 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -26,6 +26,7 @@ #include <linux/bsearch.h> #include <linux/kobject.h> #include <linux/sysfs.h> +#include <linux/overflow.h> #include <net/netfilter/nf_bpf_link.h> @@ -212,7 +213,7 @@ enum btf_kfunc_hook { BTF_KFUNC_HOOK_TRACING, BTF_KFUNC_HOOK_SYSCALL, BTF_KFUNC_HOOK_FMODRET, - BTF_KFUNC_HOOK_CGROUP_SKB, + BTF_KFUNC_HOOK_CGROUP, BTF_KFUNC_HOOK_SCHED_ACT, BTF_KFUNC_HOOK_SK_SKB, BTF_KFUNC_HOOK_SOCKET_FILTER, @@ -498,11 +499,6 @@ bool btf_type_is_void(const struct btf_type *t) return t == &btf_void; } -static bool btf_type_is_fwd(const struct btf_type *t) -{ - return BTF_INFO_KIND(t->info) == BTF_KIND_FWD; -} - static bool btf_type_is_datasec(const struct btf_type *t) { return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC; @@ -611,6 +607,7 @@ s32 bpf_find_btf_id(const char *name, u32 kind, struct btf **btf_p) spin_unlock_bh(&btf_idr_lock); return ret; } +EXPORT_SYMBOL_GPL(bpf_find_btf_id); const struct btf_type *btf_type_skip_modifiers(const struct btf *btf, u32 id, u32 *res_id) @@ -790,7 +787,7 @@ const char *btf_str_by_offset(const struct btf *btf, u32 offset) return NULL; } -static bool __btf_name_valid(const struct btf *btf, u32 offset) +static bool btf_name_valid_identifier(const struct btf *btf, u32 offset) { /* offset must be valid */ const char *src = btf_str_by_offset(btf, offset); @@ -811,11 +808,6 @@ static bool __btf_name_valid(const struct btf *btf, u32 offset) return !*src; } -static bool btf_name_valid_identifier(const struct btf *btf, u32 offset) -{ - return __btf_name_valid(btf, offset); -} - /* Allow any printable character in DATASEC names */ static bool btf_name_valid_section(const struct btf *btf, u32 offset) { @@ -823,9 +815,11 @@ static bool btf_name_valid_section(const struct btf *btf, u32 offset) const char *src = btf_str_by_offset(btf, offset); const char *src_limit; + if (!*src) + return false; + /* set a limit on identifier length */ src_limit = src + KSYM_NAME_LEN; - src++; while (*src && src < src_limit) { if (!isprint(*src)) return false; @@ -864,26 +858,43 @@ const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id) EXPORT_SYMBOL_GPL(btf_type_by_id); /* - * Regular int is not a bit field and it must be either - * u8/u16/u32/u64 or __int128. + * Check that the type @t is a regular int. This means that @t is not + * a bit field and it has the same size as either of u8/u16/u32/u64 + * or __int128. If @expected_size is not zero, then size of @t should + * be the same. A caller should already have checked that the type @t + * is an integer. */ +static bool __btf_type_int_is_regular(const struct btf_type *t, size_t expected_size) +{ + u32 int_data = btf_type_int(t); + u8 nr_bits = BTF_INT_BITS(int_data); + u8 nr_bytes = BITS_ROUNDUP_BYTES(nr_bits); + + return BITS_PER_BYTE_MASKED(nr_bits) == 0 && + BTF_INT_OFFSET(int_data) == 0 && + (nr_bytes <= 16 && is_power_of_2(nr_bytes)) && + (expected_size == 0 || nr_bytes == expected_size); +} + static bool btf_type_int_is_regular(const struct btf_type *t) { - u8 nr_bits, nr_bytes; - u32 int_data; + return __btf_type_int_is_regular(t, 0); +} - int_data = btf_type_int(t); - nr_bits = BTF_INT_BITS(int_data); - nr_bytes = BITS_ROUNDUP_BYTES(nr_bits); - if (BITS_PER_BYTE_MASKED(nr_bits) || - BTF_INT_OFFSET(int_data) || - (nr_bytes != sizeof(u8) && nr_bytes != sizeof(u16) && - nr_bytes != sizeof(u32) && nr_bytes != sizeof(u64) && - nr_bytes != (2 * sizeof(u64)))) { - return false; - } +bool btf_type_is_i32(const struct btf_type *t) +{ + return btf_type_is_int(t) && __btf_type_int_is_regular(t, 4); +} - return true; +bool btf_type_is_i64(const struct btf_type *t) +{ + return btf_type_is_int(t) && __btf_type_int_is_regular(t, 8); +} + +bool btf_type_is_primitive(const struct btf_type *t) +{ + return (btf_type_is_int(t) && btf_type_int_is_regular(t)) || + btf_is_any_enum(t); } /* @@ -2583,7 +2594,7 @@ static int btf_ref_type_check_meta(struct btf_verifier_env *env, return -EINVAL; } - if (btf_type_kflag(t)) { + if (btf_type_kflag(t) && !btf_type_is_type_tag(t)) { btf_verifier_log_type(env, t, "Invalid btf_info kind_flag"); return -EINVAL; } @@ -2811,7 +2822,7 @@ static void btf_ref_type_log(struct btf_verifier_env *env, btf_verifier_log(env, "type_id=%u", t->type); } -static struct btf_kind_operations modifier_ops = { +static const struct btf_kind_operations modifier_ops = { .check_meta = btf_ref_type_check_meta, .resolve = btf_modifier_resolve, .check_member = btf_modifier_check_member, @@ -2820,7 +2831,7 @@ static struct btf_kind_operations modifier_ops = { .show = btf_modifier_show, }; -static struct btf_kind_operations ptr_ops = { +static const struct btf_kind_operations ptr_ops = { .check_meta = btf_ref_type_check_meta, .resolve = btf_ptr_resolve, .check_member = btf_ptr_check_member, @@ -2861,7 +2872,7 @@ static void btf_fwd_type_log(struct btf_verifier_env *env, btf_verifier_log(env, "%s", btf_type_kflag(t) ? "union" : "struct"); } -static struct btf_kind_operations fwd_ops = { +static const struct btf_kind_operations fwd_ops = { .check_meta = btf_fwd_check_meta, .resolve = btf_df_resolve, .check_member = btf_df_check_member, @@ -3112,7 +3123,7 @@ static void btf_array_show(const struct btf *btf, const struct btf_type *t, __btf_array_show(btf, t, type_id, data, bits_offset, show); } -static struct btf_kind_operations array_ops = { +static const struct btf_kind_operations array_ops = { .check_meta = btf_array_check_meta, .resolve = btf_array_resolve, .check_member = btf_array_check_member, @@ -3337,9 +3348,11 @@ static int btf_find_struct(const struct btf *btf, const struct btf_type *t, } static int btf_find_kptr(const struct btf *btf, const struct btf_type *t, - u32 off, int sz, struct btf_field_info *info) + u32 off, int sz, struct btf_field_info *info, u32 field_mask) { enum btf_field_type type; + const char *tag_value; + bool is_type_tag; u32 res_id; /* Permit modifiers on the pointer itself */ @@ -3349,21 +3362,27 @@ static int btf_find_kptr(const struct btf *btf, const struct btf_type *t, if (!btf_type_is_ptr(t)) return BTF_FIELD_IGNORE; t = btf_type_by_id(btf, t->type); - - if (!btf_type_is_type_tag(t)) + is_type_tag = btf_type_is_type_tag(t) && !btf_type_kflag(t); + if (!is_type_tag) return BTF_FIELD_IGNORE; /* Reject extra tags */ if (btf_type_is_type_tag(btf_type_by_id(btf, t->type))) return -EINVAL; - if (!strcmp("kptr_untrusted", __btf_name_by_offset(btf, t->name_off))) + tag_value = __btf_name_by_offset(btf, t->name_off); + if (!strcmp("kptr_untrusted", tag_value)) type = BPF_KPTR_UNREF; - else if (!strcmp("kptr", __btf_name_by_offset(btf, t->name_off))) + else if (!strcmp("kptr", tag_value)) type = BPF_KPTR_REF; - else if (!strcmp("percpu_kptr", __btf_name_by_offset(btf, t->name_off))) + else if (!strcmp("percpu_kptr", tag_value)) type = BPF_KPTR_PERCPU; + else if (!strcmp("uptr", tag_value)) + type = BPF_UPTR; else return -EINVAL; + if (!(type & field_mask)) + return BTF_FIELD_IGNORE; + /* Get the base type */ t = btf_type_skip_modifiers(btf, t->type, &res_id); /* Only pointer to struct is allowed */ @@ -3441,7 +3460,8 @@ btf_find_graph_root(const struct btf *btf, const struct btf_type *pt, node_field_name = strstr(value_type, ":"); if (!node_field_name) return -EINVAL; - value_type = kstrndup(value_type, node_field_name - value_type, GFP_KERNEL | __GFP_NOWARN); + value_type = kstrndup(value_type, node_field_name - value_type, + GFP_KERNEL_ACCOUNT | __GFP_NOWARN); if (!value_type) return -ENOMEM; id = btf_find_by_name_kind(btf, value_type, BTF_KIND_STRUCT); @@ -3458,54 +3478,48 @@ btf_find_graph_root(const struct btf *btf, const struct btf_type *pt, return BTF_FIELD_FOUND; } -#define field_mask_test_name(field_type, field_type_str) \ - if (field_mask & field_type && !strcmp(name, field_type_str)) { \ - type = field_type; \ - goto end; \ - } - static int btf_get_field_type(const struct btf *btf, const struct btf_type *var_type, - u32 field_mask, u32 *seen_mask, - int *align, int *sz) -{ - int type = 0; + u32 field_mask, u32 *seen_mask, int *align, int *sz) +{ + const struct { + enum btf_field_type type; + const char *const name; + const bool is_unique; + } field_types[] = { + { BPF_SPIN_LOCK, "bpf_spin_lock", true }, + { BPF_RES_SPIN_LOCK, "bpf_res_spin_lock", true }, + { BPF_TIMER, "bpf_timer", true }, + { BPF_WORKQUEUE, "bpf_wq", true }, + { BPF_TASK_WORK, "bpf_task_work", true }, + { BPF_LIST_HEAD, "bpf_list_head", false }, + { BPF_LIST_NODE, "bpf_list_node", false }, + { BPF_RB_ROOT, "bpf_rb_root", false }, + { BPF_RB_NODE, "bpf_rb_node", false }, + { BPF_REFCOUNT, "bpf_refcount", false }, + }; + int type = 0, i; const char *name = __btf_name_by_offset(btf, var_type->name_off); - - if (field_mask & BPF_SPIN_LOCK) { - if (!strcmp(name, "bpf_spin_lock")) { - if (*seen_mask & BPF_SPIN_LOCK) - return -E2BIG; - *seen_mask |= BPF_SPIN_LOCK; - type = BPF_SPIN_LOCK; - goto end; - } - } - if (field_mask & BPF_TIMER) { - if (!strcmp(name, "bpf_timer")) { - if (*seen_mask & BPF_TIMER) - return -E2BIG; - *seen_mask |= BPF_TIMER; - type = BPF_TIMER; - goto end; - } - } - if (field_mask & BPF_WORKQUEUE) { - if (!strcmp(name, "bpf_wq")) { - if (*seen_mask & BPF_WORKQUEUE) + const char *field_type_name; + enum btf_field_type field_type; + bool is_unique; + + for (i = 0; i < ARRAY_SIZE(field_types); ++i) { + field_type = field_types[i].type; + field_type_name = field_types[i].name; + is_unique = field_types[i].is_unique; + if (!(field_mask & field_type) || strcmp(name, field_type_name)) + continue; + if (is_unique) { + if (*seen_mask & field_type) return -E2BIG; - *seen_mask |= BPF_WORKQUEUE; - type = BPF_WORKQUEUE; - goto end; + *seen_mask |= field_type; } + type = field_type; + goto end; } - field_mask_test_name(BPF_LIST_HEAD, "bpf_list_head"); - field_mask_test_name(BPF_LIST_NODE, "bpf_list_node"); - field_mask_test_name(BPF_RB_ROOT, "bpf_rb_root"); - field_mask_test_name(BPF_RB_NODE, "bpf_rb_node"); - field_mask_test_name(BPF_REFCOUNT, "bpf_refcount"); /* Only return BPF_KPTR when all other types with matchable names fail */ - if (field_mask & BPF_KPTR && !__btf_type_is_struct(var_type)) { + if (field_mask & (BPF_KPTR | BPF_UPTR) && !__btf_type_is_struct(var_type)) { type = BPF_KPTR_REF; goto end; } @@ -3516,8 +3530,6 @@ end: return type; } -#undef field_mask_test_name - /* Repeat a number of fields for a specified number of times. * * Copy the fields starting from the first field and repeat them for @@ -3526,7 +3538,7 @@ end: * (i + 1) * elem_size * where i is the repeat index and elem_size is the size of an element. */ -static int btf_repeat_fields(struct btf_field_info *info, +static int btf_repeat_fields(struct btf_field_info *info, int info_cnt, u32 field_cnt, u32 repeat_cnt, u32 elem_size) { u32 i, j; @@ -3538,6 +3550,7 @@ static int btf_repeat_fields(struct btf_field_info *info, case BPF_KPTR_UNREF: case BPF_KPTR_REF: case BPF_KPTR_PERCPU: + case BPF_UPTR: case BPF_LIST_HEAD: case BPF_RB_ROOT: break; @@ -3546,6 +3559,12 @@ static int btf_repeat_fields(struct btf_field_info *info, } } + /* The type of struct size or variable size is u32, + * so the multiplication will not overflow. + */ + if (field_cnt * (repeat_cnt + 1) > info_cnt) + return -E2BIG; + cur = field_cnt; for (i = 0; i < repeat_cnt; i++) { memcpy(&info[cur], &info[0], field_cnt * sizeof(info[0])); @@ -3590,7 +3609,7 @@ static int btf_find_nested_struct(const struct btf *btf, const struct btf_type * info[i].off += off; if (nelems > 1) { - err = btf_repeat_fields(info, ret, nelems - 1, t->size); + err = btf_repeat_fields(info, info_cnt, ret, nelems - 1, t->size); if (err == 0) ret *= nelems; else @@ -3651,11 +3670,13 @@ static int btf_find_field_one(const struct btf *btf, switch (field_type) { case BPF_SPIN_LOCK: + case BPF_RES_SPIN_LOCK: case BPF_TIMER: case BPF_WORKQUEUE: case BPF_LIST_NODE: case BPF_RB_NODE: case BPF_REFCOUNT: + case BPF_TASK_WORK: ret = btf_find_struct(btf, var_type, off, sz, field_type, info_cnt ? &info[0] : &tmp); if (ret < 0) @@ -3664,8 +3685,9 @@ static int btf_find_field_one(const struct btf *btf, case BPF_KPTR_UNREF: case BPF_KPTR_REF: case BPF_KPTR_PERCPU: + case BPF_UPTR: ret = btf_find_kptr(btf, var_type, off, sz, - info_cnt ? &info[0] : &tmp); + info_cnt ? &info[0] : &tmp, field_mask); if (ret < 0) return ret; break; @@ -3684,10 +3706,10 @@ static int btf_find_field_one(const struct btf *btf, if (ret == BTF_FIELD_IGNORE) return 0; - if (nelems > info_cnt) + if (!info_cnt) return -E2BIG; if (nelems > 1) { - ret = btf_repeat_fields(info, 1, nelems - 1, sz); + ret = btf_repeat_fields(info, info_cnt, 1, nelems - 1, sz); if (ret < 0) return ret; } @@ -3759,6 +3781,7 @@ static int btf_find_field(const struct btf *btf, const struct btf_type *t, return -EINVAL; } +/* Callers have to ensure the life cycle of btf if it is program BTF */ static int btf_parse_kptr(const struct btf *btf, struct btf_field *field, struct btf_field_info *info) { @@ -3787,7 +3810,6 @@ static int btf_parse_kptr(const struct btf *btf, struct btf_field *field, field->kptr.dtor = NULL; id = info->kptr.type_id; kptr_btf = (struct btf *)btf; - btf_get(kptr_btf); goto found_dtor; } if (id < 0) @@ -3938,14 +3960,16 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type /* This needs to be kzalloc to zero out padding and unused fields, see * comment in btf_record_equal. */ - rec = kzalloc(offsetof(struct btf_record, fields[cnt]), GFP_KERNEL | __GFP_NOWARN); + rec = kzalloc(struct_size(rec, fields, cnt), GFP_KERNEL_ACCOUNT | __GFP_NOWARN); if (!rec) return ERR_PTR(-ENOMEM); rec->spin_lock_off = -EINVAL; + rec->res_spin_lock_off = -EINVAL; rec->timer_off = -EINVAL; rec->wq_off = -EINVAL; rec->refcount_off = -EINVAL; + rec->task_work_off = -EINVAL; for (i = 0; i < cnt; i++) { field_type_size = btf_field_type_size(info_arr[i].type); if (info_arr[i].off + field_type_size > value_size) { @@ -3970,6 +3994,11 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type /* Cache offset for faster lookup at runtime */ rec->spin_lock_off = rec->fields[i].offset; break; + case BPF_RES_SPIN_LOCK: + WARN_ON_ONCE(rec->spin_lock_off >= 0); + /* Cache offset for faster lookup at runtime */ + rec->res_spin_lock_off = rec->fields[i].offset; + break; case BPF_TIMER: WARN_ON_ONCE(rec->timer_off >= 0); /* Cache offset for faster lookup at runtime */ @@ -3980,6 +4009,10 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type /* Cache offset for faster lookup at runtime */ rec->wq_off = rec->fields[i].offset; break; + case BPF_TASK_WORK: + WARN_ON_ONCE(rec->task_work_off >= 0); + rec->task_work_off = rec->fields[i].offset; + break; case BPF_REFCOUNT: WARN_ON_ONCE(rec->refcount_off >= 0); /* Cache offset for faster lookup at runtime */ @@ -3988,6 +4021,7 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type case BPF_KPTR_UNREF: case BPF_KPTR_REF: case BPF_KPTR_PERCPU: + case BPF_UPTR: ret = btf_parse_kptr(btf, &rec->fields[i], &info_arr[i]); if (ret < 0) goto end; @@ -4012,9 +4046,15 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type rec->cnt++; } + if (rec->spin_lock_off >= 0 && rec->res_spin_lock_off >= 0) { + ret = -EINVAL; + goto end; + } + /* bpf_{list_head, rb_node} require bpf_spin_lock */ if ((btf_record_has_field(rec, BPF_LIST_HEAD) || - btf_record_has_field(rec, BPF_RB_ROOT)) && rec->spin_lock_off < 0) { + btf_record_has_field(rec, BPF_RB_ROOT)) && + (rec->spin_lock_off < 0 && rec->res_spin_lock_off < 0)) { ret = -EINVAL; goto end; } @@ -4047,12 +4087,28 @@ int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec) * Hence we only need to ensure that bpf_{list_head,rb_root} ownership * does not form cycles. */ - if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & BPF_GRAPH_ROOT)) + if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & (BPF_GRAPH_ROOT | BPF_UPTR))) return 0; for (i = 0; i < rec->cnt; i++) { struct btf_struct_meta *meta; + const struct btf_type *t; u32 btf_id; + if (rec->fields[i].type == BPF_UPTR) { + /* The uptr only supports pinning one page and cannot + * point to a kernel struct + */ + if (btf_is_kernel(rec->fields[i].kptr.btf)) + return -EINVAL; + t = btf_type_by_id(rec->fields[i].kptr.btf, + rec->fields[i].kptr.btf_id); + if (!t->size) + return -EINVAL; + if (t->size > PAGE_SIZE) + return -E2BIG; + continue; + } + if (!(rec->fields[i].type & BPF_GRAPH_ROOT)) continue; btf_id = rec->fields[i].graph_root.value_btf_id; @@ -4188,7 +4244,7 @@ static void btf_struct_show(const struct btf *btf, const struct btf_type *t, __btf_struct_show(btf, t, type_id, data, bits_offset, show); } -static struct btf_kind_operations struct_ops = { +static const struct btf_kind_operations struct_ops = { .check_meta = btf_struct_check_meta, .resolve = btf_struct_resolve, .check_member = btf_struct_check_member, @@ -4356,7 +4412,7 @@ static void btf_enum_show(const struct btf *btf, const struct btf_type *t, btf_show_end_type(show); } -static struct btf_kind_operations enum_ops = { +static const struct btf_kind_operations enum_ops = { .check_meta = btf_enum_check_meta, .resolve = btf_df_resolve, .check_member = btf_enum_check_member, @@ -4459,7 +4515,7 @@ static void btf_enum64_show(const struct btf *btf, const struct btf_type *t, btf_show_end_type(show); } -static struct btf_kind_operations enum64_ops = { +static const struct btf_kind_operations enum64_ops = { .check_meta = btf_enum64_check_meta, .resolve = btf_df_resolve, .check_member = btf_enum_check_member, @@ -4537,7 +4593,7 @@ done: btf_verifier_log(env, ")"); } -static struct btf_kind_operations func_proto_ops = { +static const struct btf_kind_operations func_proto_ops = { .check_meta = btf_func_proto_check_meta, .resolve = btf_df_resolve, /* @@ -4595,7 +4651,7 @@ static int btf_func_resolve(struct btf_verifier_env *env, return 0; } -static struct btf_kind_operations func_ops = { +static const struct btf_kind_operations func_ops = { .check_meta = btf_func_check_meta, .resolve = btf_func_resolve, .check_member = btf_df_check_member, @@ -4629,7 +4685,7 @@ static s32 btf_var_check_meta(struct btf_verifier_env *env, } if (!t->name_off || - !__btf_name_valid(env->btf, t->name_off)) { + !btf_name_valid_identifier(env->btf, t->name_off)) { btf_verifier_log_type(env, t, "Invalid name"); return -EINVAL; } @@ -4922,11 +4978,6 @@ static s32 btf_decl_tag_check_meta(struct btf_verifier_env *env, return -EINVAL; } - if (btf_type_kflag(t)) { - btf_verifier_log_type(env, t, "Invalid btf_info kind_flag"); - return -EINVAL; - } - component_idx = btf_type_decl_tag(t)->component_idx; if (component_idx < -1) { btf_verifier_log_type(env, t, "Invalid component_idx"); @@ -5517,36 +5568,72 @@ static const char *alloc_obj_fields[] = { static struct btf_struct_metas * btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf) { - union { - struct btf_id_set set; - struct { - u32 _cnt; - u32 _ids[ARRAY_SIZE(alloc_obj_fields)]; - } _arr; - } aof; struct btf_struct_metas *tab = NULL; + struct btf_id_set *aof; int i, n, id, ret; BUILD_BUG_ON(offsetof(struct btf_id_set, cnt) != 0); BUILD_BUG_ON(sizeof(struct btf_id_set) != sizeof(u32)); - memset(&aof, 0, sizeof(aof)); + aof = kmalloc(sizeof(*aof), GFP_KERNEL | __GFP_NOWARN); + if (!aof) + return ERR_PTR(-ENOMEM); + aof->cnt = 0; + for (i = 0; i < ARRAY_SIZE(alloc_obj_fields); i++) { /* Try to find whether this special type exists in user BTF, and * if so remember its ID so we can easily find it among members * of structs that we iterate in the next loop. */ + struct btf_id_set *new_aof; + id = btf_find_by_name_kind(btf, alloc_obj_fields[i], BTF_KIND_STRUCT); if (id < 0) continue; - aof.set.ids[aof.set.cnt++] = id; + + new_aof = krealloc(aof, struct_size(new_aof, ids, aof->cnt + 1), + GFP_KERNEL | __GFP_NOWARN); + if (!new_aof) { + ret = -ENOMEM; + goto free_aof; + } + aof = new_aof; + aof->ids[aof->cnt++] = id; + } + + n = btf_nr_types(btf); + for (i = 1; i < n; i++) { + /* Try to find if there are kptrs in user BTF and remember their ID */ + struct btf_id_set *new_aof; + struct btf_field_info tmp; + const struct btf_type *t; + + t = btf_type_by_id(btf, i); + if (!t) { + ret = -EINVAL; + goto free_aof; + } + + ret = btf_find_kptr(btf, t, 0, 0, &tmp, BPF_KPTR); + if (ret != BTF_FIELD_FOUND) + continue; + + new_aof = krealloc(aof, struct_size(new_aof, ids, aof->cnt + 1), + GFP_KERNEL | __GFP_NOWARN); + if (!new_aof) { + ret = -ENOMEM; + goto free_aof; + } + aof = new_aof; + aof->ids[aof->cnt++] = i; } - if (!aof.set.cnt) + if (!aof->cnt) { + kfree(aof); return NULL; - sort(&aof.set.ids, aof.set.cnt, sizeof(aof.set.ids[0]), btf_id_cmp_func, NULL); + } + sort(&aof->ids, aof->cnt, sizeof(aof->ids[0]), btf_id_cmp_func, NULL); - n = btf_nr_types(btf); for (i = 1; i < n; i++) { struct btf_struct_metas *new_tab; const struct btf_member *member; @@ -5556,23 +5643,19 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf) int j, tab_cnt; t = btf_type_by_id(btf, i); - if (!t) { - ret = -EINVAL; - goto free; - } if (!__btf_type_is_struct(t)) continue; cond_resched(); for_each_member(j, t, member) { - if (btf_id_set_contains(&aof.set, member->type)) + if (btf_id_set_contains(aof, member->type)) goto parse; } continue; parse: tab_cnt = tab ? tab->cnt : 0; - new_tab = krealloc(tab, offsetof(struct btf_struct_metas, types[tab_cnt + 1]), + new_tab = krealloc(tab, struct_size(new_tab, types, tab_cnt + 1), GFP_KERNEL | __GFP_NOWARN); if (!new_tab) { ret = -ENOMEM; @@ -5584,8 +5667,9 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf) type = &tab->types[tab->cnt]; type->btf_id = i; - record = btf_parse_fields(btf, t, BPF_SPIN_LOCK | BPF_LIST_HEAD | BPF_LIST_NODE | - BPF_RB_ROOT | BPF_RB_NODE | BPF_REFCOUNT, t->size); + record = btf_parse_fields(btf, t, BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK | BPF_LIST_HEAD | BPF_LIST_NODE | + BPF_RB_ROOT | BPF_RB_NODE | BPF_REFCOUNT | + BPF_KPTR, t->size); /* The record cannot be unset, treat it as an error if so */ if (IS_ERR_OR_NULL(record)) { ret = PTR_ERR_OR_ZERO(record) ?: -EFAULT; @@ -5594,9 +5678,12 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf) type->record = record; tab->cnt++; } + kfree(aof); return tab; free: btf_struct_metas_free(tab); +free_aof: + kfree(aof); return ERR_PTR(ret); } @@ -6102,8 +6189,7 @@ int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_ty return kctx_type_id; } -BTF_ID_LIST(bpf_ctx_convert_btf_id) -BTF_ID(struct, bpf_ctx_convert) +BTF_ID_LIST_SINGLE(bpf_ctx_convert_btf_id, struct, bpf_ctx_convert) static struct btf *btf_parse_base(struct btf_verifier_env *env, const char *name, void *data, unsigned int data_size) @@ -6243,12 +6329,11 @@ static struct btf *btf_parse_module(const char *module_name, const void *data, btf->kernel_btf = true; snprintf(btf->name, sizeof(btf->name), "%s", module_name); - btf->data = kvmalloc(data_size, GFP_KERNEL | __GFP_NOWARN); + btf->data = kvmemdup(data, data_size, GFP_KERNEL | __GFP_NOWARN); if (!btf->data) { err = -ENOMEM; goto errout; } - memcpy(btf->data, data, data_size); btf->data_size = data_size; err = btf_parse_hdr(env); @@ -6283,7 +6368,7 @@ static struct btf *btf_parse_module(const char *module_name, const void *data, errout: btf_verifier_env_free(env); - if (base_btf != vmlinux_btf) + if (!IS_ERR(base_btf) && base_btf != vmlinux_btf) btf_free(base_btf); if (btf) { kvfree(btf->data); @@ -6305,16 +6390,15 @@ struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog) return prog->aux->attach_btf; } -static bool is_int_ptr(struct btf *btf, const struct btf_type *t) +static bool is_void_or_int_ptr(struct btf *btf, const struct btf_type *t) { /* skip modifiers */ t = btf_type_skip_modifiers(btf, t->type, NULL); - - return btf_type_is_int(t); + return btf_type_is_void(t) || btf_type_is_int(t); } -static u32 get_ctx_arg_idx(struct btf *btf, const struct btf_type *func_proto, - int off) +u32 btf_ctx_arg_idx(struct btf *btf, const struct btf_type *func_proto, + int off) { const struct btf_param *args; const struct btf_type *t; @@ -6377,6 +6461,203 @@ int btf_ctx_arg_offset(const struct btf *btf, const struct btf_type *func_proto, return off; } +struct bpf_raw_tp_null_args { + const char *func; + u64 mask; +}; + +static const struct bpf_raw_tp_null_args raw_tp_null_args[] = { + /* sched */ + { "sched_pi_setprio", 0x10 }, + /* ... from sched_numa_pair_template event class */ + { "sched_stick_numa", 0x100 }, + { "sched_swap_numa", 0x100 }, + /* afs */ + { "afs_make_fs_call", 0x10 }, + { "afs_make_fs_calli", 0x10 }, + { "afs_make_fs_call1", 0x10 }, + { "afs_make_fs_call2", 0x10 }, + { "afs_protocol_error", 0x1 }, + { "afs_flock_ev", 0x10 }, + /* cachefiles */ + { "cachefiles_lookup", 0x1 | 0x200 }, + { "cachefiles_unlink", 0x1 }, + { "cachefiles_rename", 0x1 }, + { "cachefiles_prep_read", 0x1 }, + { "cachefiles_mark_active", 0x1 }, + { "cachefiles_mark_failed", 0x1 }, + { "cachefiles_mark_inactive", 0x1 }, + { "cachefiles_vfs_error", 0x1 }, + { "cachefiles_io_error", 0x1 }, + { "cachefiles_ondemand_open", 0x1 }, + { "cachefiles_ondemand_copen", 0x1 }, + { "cachefiles_ondemand_close", 0x1 }, + { "cachefiles_ondemand_read", 0x1 }, + { "cachefiles_ondemand_cread", 0x1 }, + { "cachefiles_ondemand_fd_write", 0x1 }, + { "cachefiles_ondemand_fd_release", 0x1 }, + /* ext4, from ext4__mballoc event class */ + { "ext4_mballoc_discard", 0x10 }, + { "ext4_mballoc_free", 0x10 }, + /* fib */ + { "fib_table_lookup", 0x100 }, + /* filelock */ + /* ... from filelock_lock event class */ + { "posix_lock_inode", 0x10 }, + { "fcntl_setlk", 0x10 }, + { "locks_remove_posix", 0x10 }, + { "flock_lock_inode", 0x10 }, + /* ... from filelock_lease event class */ + { "break_lease_noblock", 0x10 }, + { "break_lease_block", 0x10 }, + { "break_lease_unblock", 0x10 }, + { "generic_delete_lease", 0x10 }, + { "time_out_leases", 0x10 }, + /* host1x */ + { "host1x_cdma_push_gather", 0x10000 }, + /* huge_memory */ + { "mm_khugepaged_scan_pmd", 0x10 }, + { "mm_collapse_huge_page_isolate", 0x1 }, + { "mm_khugepaged_scan_file", 0x10 }, + { "mm_khugepaged_collapse_file", 0x10 }, + /* kmem */ + { "mm_page_alloc", 0x1 }, + { "mm_page_pcpu_drain", 0x1 }, + /* .. from mm_page event class */ + { "mm_page_alloc_zone_locked", 0x1 }, + /* netfs */ + { "netfs_failure", 0x10 }, + /* power */ + { "device_pm_callback_start", 0x10 }, + /* qdisc */ + { "qdisc_dequeue", 0x1000 }, + /* rxrpc */ + { "rxrpc_recvdata", 0x1 }, + { "rxrpc_resend", 0x10 }, + { "rxrpc_tq", 0x10 }, + { "rxrpc_client", 0x1 }, + /* skb */ + {"kfree_skb", 0x1000}, + /* sunrpc */ + { "xs_stream_read_data", 0x1 }, + /* ... from xprt_cong_event event class */ + { "xprt_reserve_cong", 0x10 }, + { "xprt_release_cong", 0x10 }, + { "xprt_get_cong", 0x10 }, + { "xprt_put_cong", 0x10 }, + /* tcp */ + { "tcp_send_reset", 0x11 }, + { "tcp_sendmsg_locked", 0x100 }, + /* tegra_apb_dma */ + { "tegra_dma_tx_status", 0x100 }, + /* timer_migration */ + { "tmigr_update_events", 0x1 }, + /* writeback, from writeback_folio_template event class */ + { "writeback_dirty_folio", 0x10 }, + { "folio_wait_writeback", 0x10 }, + /* rdma */ + { "mr_integ_alloc", 0x2000 }, + /* bpf_testmod */ + { "bpf_testmod_test_read", 0x0 }, + /* amdgpu */ + { "amdgpu_vm_bo_map", 0x1 }, + { "amdgpu_vm_bo_unmap", 0x1 }, + /* netfs */ + { "netfs_folioq", 0x1 }, + /* xfs from xfs_defer_pending_class */ + { "xfs_defer_create_intent", 0x1 }, + { "xfs_defer_cancel_list", 0x1 }, + { "xfs_defer_pending_finish", 0x1 }, + { "xfs_defer_pending_abort", 0x1 }, + { "xfs_defer_relog_intent", 0x1 }, + { "xfs_defer_isolate_paused", 0x1 }, + { "xfs_defer_item_pause", 0x1 }, + { "xfs_defer_item_unpause", 0x1 }, + /* xfs from xfs_defer_pending_item_class */ + { "xfs_defer_add_item", 0x1 }, + { "xfs_defer_cancel_item", 0x1 }, + { "xfs_defer_finish_item", 0x1 }, + /* xfs from xfs_icwalk_class */ + { "xfs_ioc_free_eofblocks", 0x10 }, + { "xfs_blockgc_free_space", 0x10 }, + /* xfs from xfs_btree_cur_class */ + { "xfs_btree_updkeys", 0x100 }, + { "xfs_btree_overlapped_query_range", 0x100 }, + /* xfs from xfs_imap_class*/ + { "xfs_map_blocks_found", 0x10000 }, + { "xfs_map_blocks_alloc", 0x10000 }, + { "xfs_iomap_alloc", 0x1000 }, + { "xfs_iomap_found", 0x1000 }, + /* xfs from xfs_fs_class */ + { "xfs_inodegc_flush", 0x1 }, + { "xfs_inodegc_push", 0x1 }, + { "xfs_inodegc_start", 0x1 }, + { "xfs_inodegc_stop", 0x1 }, + { "xfs_inodegc_queue", 0x1 }, + { "xfs_inodegc_throttle", 0x1 }, + { "xfs_fs_sync_fs", 0x1 }, + { "xfs_blockgc_start", 0x1 }, + { "xfs_blockgc_stop", 0x1 }, + { "xfs_blockgc_worker", 0x1 }, + { "xfs_blockgc_flush_all", 0x1 }, + /* xfs_scrub */ + { "xchk_nlinks_live_update", 0x10 }, + /* xfs_scrub from xchk_metapath_class */ + { "xchk_metapath_lookup", 0x100 }, + /* nfsd */ + { "nfsd_dirent", 0x1 }, + { "nfsd_file_acquire", 0x1001 }, + { "nfsd_file_insert_err", 0x1 }, + { "nfsd_file_cons_err", 0x1 }, + /* nfs4 */ + { "nfs4_setup_sequence", 0x1 }, + { "pnfs_update_layout", 0x10000 }, + { "nfs4_inode_callback_event", 0x200 }, + { "nfs4_inode_stateid_callback_event", 0x200 }, + /* nfs from pnfs_layout_event */ + { "pnfs_mds_fallback_pg_init_read", 0x10000 }, + { "pnfs_mds_fallback_pg_init_write", 0x10000 }, + { "pnfs_mds_fallback_pg_get_mirror_count", 0x10000 }, + { "pnfs_mds_fallback_read_done", 0x10000 }, + { "pnfs_mds_fallback_write_done", 0x10000 }, + { "pnfs_mds_fallback_read_pagelist", 0x10000 }, + { "pnfs_mds_fallback_write_pagelist", 0x10000 }, + /* coda */ + { "coda_dec_pic_run", 0x10 }, + { "coda_dec_pic_done", 0x10 }, + /* cfg80211 */ + { "cfg80211_scan_done", 0x11 }, + { "rdev_set_coalesce", 0x10 }, + { "cfg80211_report_wowlan_wakeup", 0x100 }, + { "cfg80211_inform_bss_frame", 0x100 }, + { "cfg80211_michael_mic_failure", 0x10000 }, + /* cfg80211 from wiphy_work_event */ + { "wiphy_work_queue", 0x10 }, + { "wiphy_work_run", 0x10 }, + { "wiphy_work_cancel", 0x10 }, + { "wiphy_work_flush", 0x10 }, + /* hugetlbfs */ + { "hugetlbfs_alloc_inode", 0x10 }, + /* spufs */ + { "spufs_context", 0x10 }, + /* kvm_hv */ + { "kvm_page_fault_enter", 0x100 }, + /* dpu */ + { "dpu_crtc_setup_mixer", 0x100 }, + /* binder */ + { "binder_transaction", 0x100 }, + /* bcachefs */ + { "btree_path_free", 0x100 }, + /* hfi1_tx */ + { "hfi1_sdma_progress", 0x1000 }, + /* iptfs */ + { "iptfs_ingress_postq_event", 0x1000 }, + /* neigh */ + { "neigh_update", 0x10 }, + /* snd_firewire_lib */ + { "amdtp_packet", 0x100 }, +}; + bool btf_ctx_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) @@ -6387,6 +6668,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, const char *tname = prog->aux->attach_func_name; struct bpf_verifier_log *log = info->log; const struct btf_param *args; + bool ptr_err_raw_tp = false; const char *tag_value; u32 nr_args, arg; int i, ret; @@ -6396,7 +6678,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, tname, off); return false; } - arg = get_ctx_arg_idx(btf, t, off); + arg = btf_ctx_arg_idx(btf, t, off); args = (const struct btf_param *)(t + 1); /* if (t == NULL) Fall back to default BPF prog with * MAX_BPF_FUNC_REG_ARGS u64 arguments. @@ -6416,8 +6698,11 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, if (arg == nr_args) { switch (prog->expected_attach_type) { - case BPF_LSM_CGROUP: case BPF_LSM_MAC: + /* mark we are accessing the return value */ + info->is_retval = true; + fallthrough; + case BPF_LSM_CGROUP: case BPF_TRACE_FEXIT: /* When LSM programs are attached to void LSM hooks * they use FEXIT trampolines and when attached to @@ -6466,7 +6751,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, /* skip modifiers */ while (btf_type_is_modifier(t)) t = btf_type_by_id(btf, t->type); - if (btf_type_is_small_int(t) || btf_is_any_enum(t) || __btf_type_is_struct(t)) + if (btf_type_is_small_int(t) || btf_is_any_enum(t) || btf_type_is_struct(t)) /* accessing a scalar */ return true; if (!btf_type_is_ptr(t)) { @@ -6478,6 +6763,12 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, return false; } + if (size != sizeof(u64)) { + bpf_log(log, "func '%s' size %d must be 8\n", + tname, size); + return false; + } + /* check for PTR_TO_RDONLY_BUF_OR_NULL or PTR_TO_RDWR_BUF_OR_NULL */ for (i = 0; i < prog->aux->ctx_arg_info_size; i++) { const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i]; @@ -6492,14 +6783,11 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, } } - if (t->type == 0) - /* This is a pointer to void. - * It is the same as scalar from the verifier safety pov. - * No further pointer walking is allowed. - */ - return true; - - if (is_int_ptr(btf, t)) + /* + * If it's a pointer to void, it's the same as scalar from the verifier + * safety POV. Either way, no futher pointer walking is allowed. + */ + if (is_void_or_int_ptr(btf, t)) return true; /* this is a pointer to another type */ @@ -6515,6 +6803,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, info->reg_type = ctx_arg_info->reg_type; info->btf = ctx_arg_info->btf ? : btf_vmlinux; info->btf_id = ctx_arg_info->btf_id; + info->ref_obj_id = ctx_arg_info->ref_obj_id; return true; } } @@ -6523,6 +6812,42 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, if (prog_args_trusted(prog)) info->reg_type |= PTR_TRUSTED; + if (btf_param_match_suffix(btf, &args[arg], "__nullable")) + info->reg_type |= PTR_MAYBE_NULL; + + if (prog->expected_attach_type == BPF_TRACE_RAW_TP) { + struct btf *btf = prog->aux->attach_btf; + const struct btf_type *t; + const char *tname; + + /* BTF lookups cannot fail, return false on error */ + t = btf_type_by_id(btf, prog->aux->attach_btf_id); + if (!t) + return false; + tname = btf_name_by_offset(btf, t->name_off); + if (!tname) + return false; + /* Checked by bpf_check_attach_target */ + tname += sizeof("btf_trace_") - 1; + for (i = 0; i < ARRAY_SIZE(raw_tp_null_args); i++) { + /* Is this a func with potential NULL args? */ + if (strcmp(tname, raw_tp_null_args[i].func)) + continue; + if (raw_tp_null_args[i].mask & (0x1ULL << (arg * 4))) + info->reg_type |= PTR_MAYBE_NULL; + /* Is the current arg IS_ERR? */ + if (raw_tp_null_args[i].mask & (0x2ULL << (arg * 4))) + ptr_err_raw_tp = true; + break; + } + /* If we don't know NULL-ness specification and the tracepoint + * is coming from a loadable module, be conservative and mark + * argument as PTR_MAYBE_NULL. + */ + if (i == ARRAY_SIZE(raw_tp_null_args) && btf_is_module(btf)) + info->reg_type |= PTR_MAYBE_NULL; + } + if (tgt_prog) { enum bpf_prog_type tgt_type; @@ -6545,7 +6870,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, info->btf_id = t->type; t = btf_type_by_id(btf, t->type); - if (btf_type_is_type_tag(t)) { + if (btf_type_is_type_tag(t) && !btf_type_kflag(t)) { tag_value = __btf_name_by_offset(btf, t->name_off); if (strcmp(tag_value, "user") == 0) info->reg_type |= MEM_USER; @@ -6567,6 +6892,15 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, bpf_log(log, "func '%s' arg%d has btf_id %d type %s '%s'\n", tname, arg, info->btf_id, btf_type_str(t), __btf_name_by_offset(btf, t->name_off)); + + /* Perform all checks on the validity of type for this argument, but if + * we know it can be IS_ERR at runtime, scrub pointer type and mark as + * scalar. + */ + if (ptr_err_raw_tp) { + bpf_log(log, "marking pointer arg%d as scalar as it may encode error", arg); + info->reg_type = SCALAR_VALUE; + } return true; } EXPORT_SYMBOL_GPL(btf_ctx_access); @@ -6575,6 +6909,7 @@ enum bpf_struct_walk_result { /* < 0 error */ WALK_SCALAR = 0, WALK_PTR, + WALK_PTR_UNTRUSTED, WALK_STRUCT, }; @@ -6795,7 +7130,7 @@ error: /* check type tag */ t = btf_type_by_id(btf, mtype->type); - if (btf_type_is_type_tag(t)) { + if (btf_type_is_type_tag(t) && !btf_type_kflag(t)) { tag_value = __btf_name_by_offset(btf, t->name_off); /* check __user tag */ if (strcmp(tag_value, "user") == 0) @@ -6816,6 +7151,8 @@ error: *field_name = mname; return WALK_PTR; } + + return WALK_PTR_UNTRUSTED; } /* Allow more flexible access within an int as long as @@ -6888,6 +7225,9 @@ int btf_struct_access(struct bpf_verifier_log *log, *next_btf_id = id; *flag = tmp_flag; return PTR_TO_BTF_ID; + case WALK_PTR_UNTRUSTED: + *flag = MEM_RDONLY | PTR_UNTRUSTED; + return PTR_TO_MEM; case WALK_SCALAR: return SCALAR_VALUE; case WALK_STRUCT: @@ -6983,7 +7323,7 @@ static int __get_type_size(struct btf *btf, u32 btf_id, if (btf_type_is_ptr(t)) /* kernel size of pointer. Not BPF's size of pointer*/ return sizeof(void *); - if (btf_type_is_int(t) || btf_is_any_enum(t) || __btf_type_is_struct(t)) + if (btf_type_is_int(t) || btf_is_any_enum(t) || btf_type_is_struct(t)) return t->size; return -EINVAL; } @@ -6992,7 +7332,7 @@ static u8 __get_type_fmodel_flags(const struct btf_type *t) { u8 flags = 0; - if (__btf_type_is_struct(t)) + if (btf_type_is_struct(t)) flags |= BTF_FMODEL_STRUCT_ARG; if (btf_type_is_signed_int(t)) flags |= BTF_FMODEL_SIGNED_ARG; @@ -7033,7 +7373,7 @@ int btf_distill_func_proto(struct bpf_verifier_log *log, return -EINVAL; } ret = __get_type_size(btf, func->type, &t); - if (ret < 0 || __btf_type_is_struct(t)) { + if (ret < 0 || btf_type_is_struct(t)) { bpf_log(log, "The function %s return type %s is unsupported.\n", tname, btf_type_str(t)); @@ -7300,11 +7640,12 @@ cand_cache_unlock: } enum btf_arg_tag { - ARG_TAG_CTX = BIT_ULL(0), - ARG_TAG_NONNULL = BIT_ULL(1), - ARG_TAG_TRUSTED = BIT_ULL(2), - ARG_TAG_NULLABLE = BIT_ULL(3), - ARG_TAG_ARENA = BIT_ULL(4), + ARG_TAG_CTX = BIT_ULL(0), + ARG_TAG_NONNULL = BIT_ULL(1), + ARG_TAG_TRUSTED = BIT_ULL(2), + ARG_TAG_UNTRUSTED = BIT_ULL(3), + ARG_TAG_NULLABLE = BIT_ULL(4), + ARG_TAG_ARENA = BIT_ULL(5), }; /* Process BTF of a function to produce high-level expectation of function @@ -7332,7 +7673,7 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog) return 0; if (!prog->aux->func_info) { - bpf_log(log, "Verifier bug\n"); + verifier_bug(env, "func_info undefined"); return -EFAULT; } @@ -7356,7 +7697,7 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog) tname = btf_name_by_offset(btf, fn_t->name_off); if (prog->aux->func_info_aux[subprog].unreliable) { - bpf_log(log, "Verifier bug in function %s()\n", tname); + verifier_bug(env, "unreliable BTF for function %s()", tname); return -EFAULT; } if (prog_type == BPF_PROG_TYPE_EXT) @@ -7412,6 +7753,8 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog) tags |= ARG_TAG_CTX; } else if (strcmp(tag, "trusted") == 0) { tags |= ARG_TAG_TRUSTED; + } else if (strcmp(tag, "untrusted") == 0) { + tags |= ARG_TAG_UNTRUSTED; } else if (strcmp(tag, "nonnull") == 0) { tags |= ARG_TAG_NONNULL; } else if (strcmp(tag, "nullable") == 0) { @@ -7472,6 +7815,38 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog) sub->args[i].btf_id = kern_type_id; continue; } + if (tags & ARG_TAG_UNTRUSTED) { + struct btf *vmlinux_btf; + int kern_type_id; + + if (tags & ~ARG_TAG_UNTRUSTED) { + bpf_log(log, "arg#%d untrusted cannot be combined with any other tags\n", i); + return -EINVAL; + } + + ref_t = btf_type_skip_modifiers(btf, t->type, NULL); + if (btf_type_is_void(ref_t) || btf_type_is_primitive(ref_t)) { + sub->args[i].arg_type = ARG_PTR_TO_MEM | MEM_RDONLY | PTR_UNTRUSTED; + sub->args[i].mem_size = 0; + continue; + } + + kern_type_id = btf_get_ptr_to_btf_id(log, i, btf, t); + if (kern_type_id < 0) + return kern_type_id; + + vmlinux_btf = bpf_get_btf_vmlinux(); + ref_t = btf_type_by_id(vmlinux_btf, kern_type_id); + if (!btf_type_is_struct(ref_t)) { + tname = __btf_name_by_offset(vmlinux_btf, t->name_off); + bpf_log(log, "arg#%d has type %s '%s', but only struct or primitive types are allowed\n", + i, btf_type_str(ref_t), tname); + return -EINVAL; + } + sub->args[i].arg_type = ARG_PTR_TO_BTF_ID | PTR_UNTRUSTED; + sub->args[i].btf_id = kern_type_id; + continue; + } if (tags & ARG_TAG_ARENA) { if (tags & ~ARG_TAG_ARENA) { bpf_log(log, "arg#%d arena cannot be combined with any other tags\n", i); @@ -7673,21 +8048,11 @@ int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) struct btf *btf_get_by_fd(int fd) { struct btf *btf; - struct fd f; - - f = fdget(fd); + CLASS(fd, f)(fd); - if (!f.file) - return ERR_PTR(-EBADF); - - if (f.file->f_op != &btf_fops) { - fdput(f); - return ERR_PTR(-EINVAL); - } - - btf = f.file->private_data; - refcount_inc(&btf->refcnt); - fdput(f); + btf = __btf_get_by_fd(f); + if (!IS_ERR(btf)) + refcount_inc(&btf->refcnt); return btf; } @@ -7804,17 +8169,6 @@ struct btf_module { static LIST_HEAD(btf_modules); static DEFINE_MUTEX(btf_module_mutex); -static ssize_t -btf_module_read(struct file *file, struct kobject *kobj, - struct bin_attribute *bin_attr, - char *buf, loff_t off, size_t len) -{ - const struct btf *btf = bin_attr->private; - - memcpy(buf, btf->data + off, len); - return len; -} - static void purge_cand_cache(struct btf *btf); static int btf_module_notify(struct notifier_block *nb, unsigned long op, @@ -7875,8 +8229,8 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op, attr->attr.name = btf->name; attr->attr.mode = 0444; attr->size = btf->data_size; - attr->private = btf; - attr->read = btf_module_read; + attr->private = btf->data; + attr->read = sysfs_bin_attr_simple_read; err = sysfs_create_bin_file(btf_kobj, attr); if (err) { @@ -8049,15 +8403,44 @@ BTF_ID_LIST_GLOBAL(btf_tracing_ids, MAX_BTF_TRACING_TYPE) BTF_TRACING_TYPE_xxx #undef BTF_TRACING_TYPE +/* Validate well-formedness of iter argument type. + * On success, return positive BTF ID of iter state's STRUCT type. + * On error, negative error is returned. + */ +int btf_check_iter_arg(struct btf *btf, const struct btf_type *func, int arg_idx) +{ + const struct btf_param *arg; + const struct btf_type *t; + const char *name; + int btf_id; + + if (btf_type_vlen(func) <= arg_idx) + return -EINVAL; + + arg = &btf_params(func)[arg_idx]; + t = btf_type_skip_modifiers(btf, arg->type, NULL); + if (!t || !btf_type_is_ptr(t)) + return -EINVAL; + t = btf_type_skip_modifiers(btf, t->type, &btf_id); + if (!t || !__btf_type_is_struct(t)) + return -EINVAL; + + name = btf_name_by_offset(btf, t->name_off); + if (!name || strncmp(name, ITER_PREFIX, sizeof(ITER_PREFIX) - 1)) + return -EINVAL; + + return btf_id; +} + static int btf_check_iter_kfuncs(struct btf *btf, const char *func_name, const struct btf_type *func, u32 func_flags) { u32 flags = func_flags & (KF_ITER_NEW | KF_ITER_NEXT | KF_ITER_DESTROY); - const char *name, *sfx, *iter_name; - const struct btf_param *arg; + const char *sfx, *iter_name; const struct btf_type *t; char exp_name[128]; u32 nr_args; + int btf_id; /* exactly one of KF_ITER_{NEW,NEXT,DESTROY} can be set */ if (!flags || (flags & (flags - 1))) @@ -8068,28 +8451,21 @@ static int btf_check_iter_kfuncs(struct btf *btf, const char *func_name, if (nr_args < 1) return -EINVAL; - arg = &btf_params(func)[0]; - t = btf_type_skip_modifiers(btf, arg->type, NULL); - if (!t || !btf_type_is_ptr(t)) - return -EINVAL; - t = btf_type_skip_modifiers(btf, t->type, NULL); - if (!t || !__btf_type_is_struct(t)) - return -EINVAL; - - name = btf_name_by_offset(btf, t->name_off); - if (!name || strncmp(name, ITER_PREFIX, sizeof(ITER_PREFIX) - 1)) - return -EINVAL; + btf_id = btf_check_iter_arg(btf, func, 0); + if (btf_id < 0) + return btf_id; /* sizeof(struct bpf_iter_<type>) should be a multiple of 8 to * fit nicely in stack slots */ + t = btf_type_by_id(btf, btf_id); if (t->size == 0 || (t->size % 8)) return -EINVAL; /* validate bpf_iter_<type>_{new,next,destroy}(struct bpf_iter_<type> *) * naming pattern */ - iter_name = name + sizeof(ITER_PREFIX) - 1; + iter_name = btf_name_by_offset(btf, t->name_off) + sizeof(ITER_PREFIX) - 1; if (flags & KF_ITER_NEW) sfx = "new"; else if (flags & KF_ITER_NEXT) @@ -8232,7 +8608,7 @@ static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, /* Grow set */ set = krealloc(tab->sets[hook], - offsetof(struct btf_id_set8, pairs[set_cnt + add_set->cnt]), + struct_size(set, pairs, set_cnt + add_set->cnt), GFP_KERNEL | __GFP_NOWARN); if (!set) { ret = -ENOMEM; @@ -8304,13 +8680,20 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type) case BPF_PROG_TYPE_STRUCT_OPS: return BTF_KFUNC_HOOK_STRUCT_OPS; case BPF_PROG_TYPE_TRACING: + case BPF_PROG_TYPE_TRACEPOINT: + case BPF_PROG_TYPE_PERF_EVENT: case BPF_PROG_TYPE_LSM: return BTF_KFUNC_HOOK_TRACING; case BPF_PROG_TYPE_SYSCALL: return BTF_KFUNC_HOOK_SYSCALL; case BPF_PROG_TYPE_CGROUP_SKB: + case BPF_PROG_TYPE_CGROUP_SOCK: + case BPF_PROG_TYPE_CGROUP_DEVICE: case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: - return BTF_KFUNC_HOOK_CGROUP_SKB; + case BPF_PROG_TYPE_CGROUP_SOCKOPT: + case BPF_PROG_TYPE_CGROUP_SYSCTL: + case BPF_PROG_TYPE_SOCK_OPS: + return BTF_KFUNC_HOOK_CGROUP; case BPF_PROG_TYPE_SCHED_ACT: return BTF_KFUNC_HOOK_SCHED_ACT; case BPF_PROG_TYPE_SK_SKB: @@ -8511,7 +8894,7 @@ int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_c } tab = krealloc(btf->dtor_kfunc_tab, - offsetof(struct btf_id_dtor_kfunc_tab, dtors[tab_cnt + add_cnt]), + struct_size(tab, dtors, tab_cnt + add_cnt), GFP_KERNEL | __GFP_NOWARN); if (!tab) { ret = -ENOMEM; @@ -8683,7 +9066,7 @@ static struct bpf_cand_cache *populate_cand_cache(struct bpf_cand_cache *cands, bpf_free_cands_from_cache(*cc); *cc = NULL; } - new_cands = kmemdup(cands, sizeof_cands(cands->cnt), GFP_KERNEL); + new_cands = kmemdup(cands, sizeof_cands(cands->cnt), GFP_KERNEL_ACCOUNT); if (!new_cands) { bpf_free_cands(cands); return ERR_PTR(-ENOMEM); @@ -8691,7 +9074,7 @@ static struct bpf_cand_cache *populate_cand_cache(struct bpf_cand_cache *cands, /* strdup the name, since it will stay in cache. * the cands->name points to strings in prog's BTF and the prog can be unloaded. */ - new_cands->name = kmemdup_nul(cands->name, cands->name_len, GFP_KERNEL); + new_cands->name = kmemdup_nul(cands->name, cands->name_len, GFP_KERNEL_ACCOUNT); bpf_free_cands(cands); if (!new_cands->name) { kfree(new_cands); @@ -8775,7 +9158,7 @@ bpf_core_add_cands(struct bpf_cand_cache *cands, const struct btf *targ_btf, continue; /* most of the time there is only one candidate for a given kind+name pair */ - new_cands = kmalloc(sizeof_cands(cands->cnt + 1), GFP_KERNEL); + new_cands = kmalloc(sizeof_cands(cands->cnt + 1), GFP_KERNEL_ACCOUNT); if (!new_cands) { bpf_free_cands(cands); return ERR_PTR(-ENOMEM); @@ -8886,15 +9269,24 @@ int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo, struct bpf_core_cand_list cands = {}; struct bpf_core_relo_res targ_res; struct bpf_core_spec *specs; + const struct btf_type *type; int err; /* ~4k of temp memory necessary to convert LLVM spec like "0:1:0:5" * into arrays of btf_ids of struct fields and array indices. */ - specs = kcalloc(3, sizeof(*specs), GFP_KERNEL); + specs = kcalloc(3, sizeof(*specs), GFP_KERNEL_ACCOUNT); if (!specs) return -ENOMEM; + type = btf_type_by_id(ctx->btf, relo->type_id); + if (!type) { + bpf_log(ctx->log, "relo #%u: bad type id %u\n", + relo_idx, relo->type_id); + kfree(specs); + return -EINVAL; + } + if (need_cands) { struct bpf_cand_cache *cc; int i; @@ -8908,7 +9300,7 @@ int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo, goto out; } if (cc->cnt) { - cands.cands = kcalloc(cc->cnt, sizeof(*cands.cands), GFP_KERNEL); + cands.cands = kcalloc(cc->cnt, sizeof(*cands.cands), GFP_KERNEL_ACCOUNT); if (!cands.cands) { err = -ENOMEM; goto out; @@ -9060,8 +9452,7 @@ btf_add_struct_ops(struct btf *btf, struct bpf_struct_ops *st_ops, tab = btf->struct_ops_tab; if (!tab) { - tab = kzalloc(offsetof(struct btf_struct_ops_tab, ops[4]), - GFP_KERNEL); + tab = kzalloc(struct_size(tab, ops, 4), GFP_KERNEL); if (!tab) return -ENOMEM; tab->capacity = 4; @@ -9074,8 +9465,7 @@ btf_add_struct_ops(struct btf *btf, struct bpf_struct_ops *st_ops, if (tab->cnt == tab->capacity) { new_tab = krealloc(tab, - offsetof(struct btf_struct_ops_tab, - ops[tab->capacity * 2]), + struct_size(tab, ops, tab->capacity * 2), GFP_KERNEL); if (!new_tab) return -ENOMEM; |