summaryrefslogtreecommitdiff
path: root/kernel/bpf/btf.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/bpf/btf.c')
-rw-r--r--kernel/bpf/btf.c808
1 files changed, 599 insertions, 209 deletions
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 520f49f422fe..0de8fc8a0e0b 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -26,6 +26,7 @@
#include <linux/bsearch.h>
#include <linux/kobject.h>
#include <linux/sysfs.h>
+#include <linux/overflow.h>
#include <net/netfilter/nf_bpf_link.h>
@@ -212,7 +213,7 @@ enum btf_kfunc_hook {
BTF_KFUNC_HOOK_TRACING,
BTF_KFUNC_HOOK_SYSCALL,
BTF_KFUNC_HOOK_FMODRET,
- BTF_KFUNC_HOOK_CGROUP_SKB,
+ BTF_KFUNC_HOOK_CGROUP,
BTF_KFUNC_HOOK_SCHED_ACT,
BTF_KFUNC_HOOK_SK_SKB,
BTF_KFUNC_HOOK_SOCKET_FILTER,
@@ -498,11 +499,6 @@ bool btf_type_is_void(const struct btf_type *t)
return t == &btf_void;
}
-static bool btf_type_is_fwd(const struct btf_type *t)
-{
- return BTF_INFO_KIND(t->info) == BTF_KIND_FWD;
-}
-
static bool btf_type_is_datasec(const struct btf_type *t)
{
return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC;
@@ -611,6 +607,7 @@ s32 bpf_find_btf_id(const char *name, u32 kind, struct btf **btf_p)
spin_unlock_bh(&btf_idr_lock);
return ret;
}
+EXPORT_SYMBOL_GPL(bpf_find_btf_id);
const struct btf_type *btf_type_skip_modifiers(const struct btf *btf,
u32 id, u32 *res_id)
@@ -790,7 +787,7 @@ const char *btf_str_by_offset(const struct btf *btf, u32 offset)
return NULL;
}
-static bool __btf_name_valid(const struct btf *btf, u32 offset)
+static bool btf_name_valid_identifier(const struct btf *btf, u32 offset)
{
/* offset must be valid */
const char *src = btf_str_by_offset(btf, offset);
@@ -811,11 +808,6 @@ static bool __btf_name_valid(const struct btf *btf, u32 offset)
return !*src;
}
-static bool btf_name_valid_identifier(const struct btf *btf, u32 offset)
-{
- return __btf_name_valid(btf, offset);
-}
-
/* Allow any printable character in DATASEC names */
static bool btf_name_valid_section(const struct btf *btf, u32 offset)
{
@@ -823,9 +815,11 @@ static bool btf_name_valid_section(const struct btf *btf, u32 offset)
const char *src = btf_str_by_offset(btf, offset);
const char *src_limit;
+ if (!*src)
+ return false;
+
/* set a limit on identifier length */
src_limit = src + KSYM_NAME_LEN;
- src++;
while (*src && src < src_limit) {
if (!isprint(*src))
return false;
@@ -864,26 +858,43 @@ const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id)
EXPORT_SYMBOL_GPL(btf_type_by_id);
/*
- * Regular int is not a bit field and it must be either
- * u8/u16/u32/u64 or __int128.
+ * Check that the type @t is a regular int. This means that @t is not
+ * a bit field and it has the same size as either of u8/u16/u32/u64
+ * or __int128. If @expected_size is not zero, then size of @t should
+ * be the same. A caller should already have checked that the type @t
+ * is an integer.
*/
+static bool __btf_type_int_is_regular(const struct btf_type *t, size_t expected_size)
+{
+ u32 int_data = btf_type_int(t);
+ u8 nr_bits = BTF_INT_BITS(int_data);
+ u8 nr_bytes = BITS_ROUNDUP_BYTES(nr_bits);
+
+ return BITS_PER_BYTE_MASKED(nr_bits) == 0 &&
+ BTF_INT_OFFSET(int_data) == 0 &&
+ (nr_bytes <= 16 && is_power_of_2(nr_bytes)) &&
+ (expected_size == 0 || nr_bytes == expected_size);
+}
+
static bool btf_type_int_is_regular(const struct btf_type *t)
{
- u8 nr_bits, nr_bytes;
- u32 int_data;
+ return __btf_type_int_is_regular(t, 0);
+}
- int_data = btf_type_int(t);
- nr_bits = BTF_INT_BITS(int_data);
- nr_bytes = BITS_ROUNDUP_BYTES(nr_bits);
- if (BITS_PER_BYTE_MASKED(nr_bits) ||
- BTF_INT_OFFSET(int_data) ||
- (nr_bytes != sizeof(u8) && nr_bytes != sizeof(u16) &&
- nr_bytes != sizeof(u32) && nr_bytes != sizeof(u64) &&
- nr_bytes != (2 * sizeof(u64)))) {
- return false;
- }
+bool btf_type_is_i32(const struct btf_type *t)
+{
+ return btf_type_is_int(t) && __btf_type_int_is_regular(t, 4);
+}
- return true;
+bool btf_type_is_i64(const struct btf_type *t)
+{
+ return btf_type_is_int(t) && __btf_type_int_is_regular(t, 8);
+}
+
+bool btf_type_is_primitive(const struct btf_type *t)
+{
+ return (btf_type_is_int(t) && btf_type_int_is_regular(t)) ||
+ btf_is_any_enum(t);
}
/*
@@ -2583,7 +2594,7 @@ static int btf_ref_type_check_meta(struct btf_verifier_env *env,
return -EINVAL;
}
- if (btf_type_kflag(t)) {
+ if (btf_type_kflag(t) && !btf_type_is_type_tag(t)) {
btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
return -EINVAL;
}
@@ -2811,7 +2822,7 @@ static void btf_ref_type_log(struct btf_verifier_env *env,
btf_verifier_log(env, "type_id=%u", t->type);
}
-static struct btf_kind_operations modifier_ops = {
+static const struct btf_kind_operations modifier_ops = {
.check_meta = btf_ref_type_check_meta,
.resolve = btf_modifier_resolve,
.check_member = btf_modifier_check_member,
@@ -2820,7 +2831,7 @@ static struct btf_kind_operations modifier_ops = {
.show = btf_modifier_show,
};
-static struct btf_kind_operations ptr_ops = {
+static const struct btf_kind_operations ptr_ops = {
.check_meta = btf_ref_type_check_meta,
.resolve = btf_ptr_resolve,
.check_member = btf_ptr_check_member,
@@ -2861,7 +2872,7 @@ static void btf_fwd_type_log(struct btf_verifier_env *env,
btf_verifier_log(env, "%s", btf_type_kflag(t) ? "union" : "struct");
}
-static struct btf_kind_operations fwd_ops = {
+static const struct btf_kind_operations fwd_ops = {
.check_meta = btf_fwd_check_meta,
.resolve = btf_df_resolve,
.check_member = btf_df_check_member,
@@ -3112,7 +3123,7 @@ static void btf_array_show(const struct btf *btf, const struct btf_type *t,
__btf_array_show(btf, t, type_id, data, bits_offset, show);
}
-static struct btf_kind_operations array_ops = {
+static const struct btf_kind_operations array_ops = {
.check_meta = btf_array_check_meta,
.resolve = btf_array_resolve,
.check_member = btf_array_check_member,
@@ -3337,9 +3348,11 @@ static int btf_find_struct(const struct btf *btf, const struct btf_type *t,
}
static int btf_find_kptr(const struct btf *btf, const struct btf_type *t,
- u32 off, int sz, struct btf_field_info *info)
+ u32 off, int sz, struct btf_field_info *info, u32 field_mask)
{
enum btf_field_type type;
+ const char *tag_value;
+ bool is_type_tag;
u32 res_id;
/* Permit modifiers on the pointer itself */
@@ -3349,21 +3362,27 @@ static int btf_find_kptr(const struct btf *btf, const struct btf_type *t,
if (!btf_type_is_ptr(t))
return BTF_FIELD_IGNORE;
t = btf_type_by_id(btf, t->type);
-
- if (!btf_type_is_type_tag(t))
+ is_type_tag = btf_type_is_type_tag(t) && !btf_type_kflag(t);
+ if (!is_type_tag)
return BTF_FIELD_IGNORE;
/* Reject extra tags */
if (btf_type_is_type_tag(btf_type_by_id(btf, t->type)))
return -EINVAL;
- if (!strcmp("kptr_untrusted", __btf_name_by_offset(btf, t->name_off)))
+ tag_value = __btf_name_by_offset(btf, t->name_off);
+ if (!strcmp("kptr_untrusted", tag_value))
type = BPF_KPTR_UNREF;
- else if (!strcmp("kptr", __btf_name_by_offset(btf, t->name_off)))
+ else if (!strcmp("kptr", tag_value))
type = BPF_KPTR_REF;
- else if (!strcmp("percpu_kptr", __btf_name_by_offset(btf, t->name_off)))
+ else if (!strcmp("percpu_kptr", tag_value))
type = BPF_KPTR_PERCPU;
+ else if (!strcmp("uptr", tag_value))
+ type = BPF_UPTR;
else
return -EINVAL;
+ if (!(type & field_mask))
+ return BTF_FIELD_IGNORE;
+
/* Get the base type */
t = btf_type_skip_modifiers(btf, t->type, &res_id);
/* Only pointer to struct is allowed */
@@ -3441,7 +3460,8 @@ btf_find_graph_root(const struct btf *btf, const struct btf_type *pt,
node_field_name = strstr(value_type, ":");
if (!node_field_name)
return -EINVAL;
- value_type = kstrndup(value_type, node_field_name - value_type, GFP_KERNEL | __GFP_NOWARN);
+ value_type = kstrndup(value_type, node_field_name - value_type,
+ GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
if (!value_type)
return -ENOMEM;
id = btf_find_by_name_kind(btf, value_type, BTF_KIND_STRUCT);
@@ -3458,54 +3478,48 @@ btf_find_graph_root(const struct btf *btf, const struct btf_type *pt,
return BTF_FIELD_FOUND;
}
-#define field_mask_test_name(field_type, field_type_str) \
- if (field_mask & field_type && !strcmp(name, field_type_str)) { \
- type = field_type; \
- goto end; \
- }
-
static int btf_get_field_type(const struct btf *btf, const struct btf_type *var_type,
- u32 field_mask, u32 *seen_mask,
- int *align, int *sz)
-{
- int type = 0;
+ u32 field_mask, u32 *seen_mask, int *align, int *sz)
+{
+ const struct {
+ enum btf_field_type type;
+ const char *const name;
+ const bool is_unique;
+ } field_types[] = {
+ { BPF_SPIN_LOCK, "bpf_spin_lock", true },
+ { BPF_RES_SPIN_LOCK, "bpf_res_spin_lock", true },
+ { BPF_TIMER, "bpf_timer", true },
+ { BPF_WORKQUEUE, "bpf_wq", true },
+ { BPF_TASK_WORK, "bpf_task_work", true },
+ { BPF_LIST_HEAD, "bpf_list_head", false },
+ { BPF_LIST_NODE, "bpf_list_node", false },
+ { BPF_RB_ROOT, "bpf_rb_root", false },
+ { BPF_RB_NODE, "bpf_rb_node", false },
+ { BPF_REFCOUNT, "bpf_refcount", false },
+ };
+ int type = 0, i;
const char *name = __btf_name_by_offset(btf, var_type->name_off);
-
- if (field_mask & BPF_SPIN_LOCK) {
- if (!strcmp(name, "bpf_spin_lock")) {
- if (*seen_mask & BPF_SPIN_LOCK)
- return -E2BIG;
- *seen_mask |= BPF_SPIN_LOCK;
- type = BPF_SPIN_LOCK;
- goto end;
- }
- }
- if (field_mask & BPF_TIMER) {
- if (!strcmp(name, "bpf_timer")) {
- if (*seen_mask & BPF_TIMER)
- return -E2BIG;
- *seen_mask |= BPF_TIMER;
- type = BPF_TIMER;
- goto end;
- }
- }
- if (field_mask & BPF_WORKQUEUE) {
- if (!strcmp(name, "bpf_wq")) {
- if (*seen_mask & BPF_WORKQUEUE)
+ const char *field_type_name;
+ enum btf_field_type field_type;
+ bool is_unique;
+
+ for (i = 0; i < ARRAY_SIZE(field_types); ++i) {
+ field_type = field_types[i].type;
+ field_type_name = field_types[i].name;
+ is_unique = field_types[i].is_unique;
+ if (!(field_mask & field_type) || strcmp(name, field_type_name))
+ continue;
+ if (is_unique) {
+ if (*seen_mask & field_type)
return -E2BIG;
- *seen_mask |= BPF_WORKQUEUE;
- type = BPF_WORKQUEUE;
- goto end;
+ *seen_mask |= field_type;
}
+ type = field_type;
+ goto end;
}
- field_mask_test_name(BPF_LIST_HEAD, "bpf_list_head");
- field_mask_test_name(BPF_LIST_NODE, "bpf_list_node");
- field_mask_test_name(BPF_RB_ROOT, "bpf_rb_root");
- field_mask_test_name(BPF_RB_NODE, "bpf_rb_node");
- field_mask_test_name(BPF_REFCOUNT, "bpf_refcount");
/* Only return BPF_KPTR when all other types with matchable names fail */
- if (field_mask & BPF_KPTR && !__btf_type_is_struct(var_type)) {
+ if (field_mask & (BPF_KPTR | BPF_UPTR) && !__btf_type_is_struct(var_type)) {
type = BPF_KPTR_REF;
goto end;
}
@@ -3516,8 +3530,6 @@ end:
return type;
}
-#undef field_mask_test_name
-
/* Repeat a number of fields for a specified number of times.
*
* Copy the fields starting from the first field and repeat them for
@@ -3526,7 +3538,7 @@ end:
* (i + 1) * elem_size
* where i is the repeat index and elem_size is the size of an element.
*/
-static int btf_repeat_fields(struct btf_field_info *info,
+static int btf_repeat_fields(struct btf_field_info *info, int info_cnt,
u32 field_cnt, u32 repeat_cnt, u32 elem_size)
{
u32 i, j;
@@ -3538,6 +3550,7 @@ static int btf_repeat_fields(struct btf_field_info *info,
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
case BPF_KPTR_PERCPU:
+ case BPF_UPTR:
case BPF_LIST_HEAD:
case BPF_RB_ROOT:
break;
@@ -3546,6 +3559,12 @@ static int btf_repeat_fields(struct btf_field_info *info,
}
}
+ /* The type of struct size or variable size is u32,
+ * so the multiplication will not overflow.
+ */
+ if (field_cnt * (repeat_cnt + 1) > info_cnt)
+ return -E2BIG;
+
cur = field_cnt;
for (i = 0; i < repeat_cnt; i++) {
memcpy(&info[cur], &info[0], field_cnt * sizeof(info[0]));
@@ -3590,7 +3609,7 @@ static int btf_find_nested_struct(const struct btf *btf, const struct btf_type *
info[i].off += off;
if (nelems > 1) {
- err = btf_repeat_fields(info, ret, nelems - 1, t->size);
+ err = btf_repeat_fields(info, info_cnt, ret, nelems - 1, t->size);
if (err == 0)
ret *= nelems;
else
@@ -3651,11 +3670,13 @@ static int btf_find_field_one(const struct btf *btf,
switch (field_type) {
case BPF_SPIN_LOCK:
+ case BPF_RES_SPIN_LOCK:
case BPF_TIMER:
case BPF_WORKQUEUE:
case BPF_LIST_NODE:
case BPF_RB_NODE:
case BPF_REFCOUNT:
+ case BPF_TASK_WORK:
ret = btf_find_struct(btf, var_type, off, sz, field_type,
info_cnt ? &info[0] : &tmp);
if (ret < 0)
@@ -3664,8 +3685,9 @@ static int btf_find_field_one(const struct btf *btf,
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
case BPF_KPTR_PERCPU:
+ case BPF_UPTR:
ret = btf_find_kptr(btf, var_type, off, sz,
- info_cnt ? &info[0] : &tmp);
+ info_cnt ? &info[0] : &tmp, field_mask);
if (ret < 0)
return ret;
break;
@@ -3684,10 +3706,10 @@ static int btf_find_field_one(const struct btf *btf,
if (ret == BTF_FIELD_IGNORE)
return 0;
- if (nelems > info_cnt)
+ if (!info_cnt)
return -E2BIG;
if (nelems > 1) {
- ret = btf_repeat_fields(info, 1, nelems - 1, sz);
+ ret = btf_repeat_fields(info, info_cnt, 1, nelems - 1, sz);
if (ret < 0)
return ret;
}
@@ -3759,6 +3781,7 @@ static int btf_find_field(const struct btf *btf, const struct btf_type *t,
return -EINVAL;
}
+/* Callers have to ensure the life cycle of btf if it is program BTF */
static int btf_parse_kptr(const struct btf *btf, struct btf_field *field,
struct btf_field_info *info)
{
@@ -3787,7 +3810,6 @@ static int btf_parse_kptr(const struct btf *btf, struct btf_field *field,
field->kptr.dtor = NULL;
id = info->kptr.type_id;
kptr_btf = (struct btf *)btf;
- btf_get(kptr_btf);
goto found_dtor;
}
if (id < 0)
@@ -3938,14 +3960,16 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
/* This needs to be kzalloc to zero out padding and unused fields, see
* comment in btf_record_equal.
*/
- rec = kzalloc(offsetof(struct btf_record, fields[cnt]), GFP_KERNEL | __GFP_NOWARN);
+ rec = kzalloc(struct_size(rec, fields, cnt), GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
if (!rec)
return ERR_PTR(-ENOMEM);
rec->spin_lock_off = -EINVAL;
+ rec->res_spin_lock_off = -EINVAL;
rec->timer_off = -EINVAL;
rec->wq_off = -EINVAL;
rec->refcount_off = -EINVAL;
+ rec->task_work_off = -EINVAL;
for (i = 0; i < cnt; i++) {
field_type_size = btf_field_type_size(info_arr[i].type);
if (info_arr[i].off + field_type_size > value_size) {
@@ -3970,6 +3994,11 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
/* Cache offset for faster lookup at runtime */
rec->spin_lock_off = rec->fields[i].offset;
break;
+ case BPF_RES_SPIN_LOCK:
+ WARN_ON_ONCE(rec->spin_lock_off >= 0);
+ /* Cache offset for faster lookup at runtime */
+ rec->res_spin_lock_off = rec->fields[i].offset;
+ break;
case BPF_TIMER:
WARN_ON_ONCE(rec->timer_off >= 0);
/* Cache offset for faster lookup at runtime */
@@ -3980,6 +4009,10 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
/* Cache offset for faster lookup at runtime */
rec->wq_off = rec->fields[i].offset;
break;
+ case BPF_TASK_WORK:
+ WARN_ON_ONCE(rec->task_work_off >= 0);
+ rec->task_work_off = rec->fields[i].offset;
+ break;
case BPF_REFCOUNT:
WARN_ON_ONCE(rec->refcount_off >= 0);
/* Cache offset for faster lookup at runtime */
@@ -3988,6 +4021,7 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
case BPF_KPTR_PERCPU:
+ case BPF_UPTR:
ret = btf_parse_kptr(btf, &rec->fields[i], &info_arr[i]);
if (ret < 0)
goto end;
@@ -4012,9 +4046,15 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
rec->cnt++;
}
+ if (rec->spin_lock_off >= 0 && rec->res_spin_lock_off >= 0) {
+ ret = -EINVAL;
+ goto end;
+ }
+
/* bpf_{list_head, rb_node} require bpf_spin_lock */
if ((btf_record_has_field(rec, BPF_LIST_HEAD) ||
- btf_record_has_field(rec, BPF_RB_ROOT)) && rec->spin_lock_off < 0) {
+ btf_record_has_field(rec, BPF_RB_ROOT)) &&
+ (rec->spin_lock_off < 0 && rec->res_spin_lock_off < 0)) {
ret = -EINVAL;
goto end;
}
@@ -4047,12 +4087,28 @@ int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec)
* Hence we only need to ensure that bpf_{list_head,rb_root} ownership
* does not form cycles.
*/
- if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & BPF_GRAPH_ROOT))
+ if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & (BPF_GRAPH_ROOT | BPF_UPTR)))
return 0;
for (i = 0; i < rec->cnt; i++) {
struct btf_struct_meta *meta;
+ const struct btf_type *t;
u32 btf_id;
+ if (rec->fields[i].type == BPF_UPTR) {
+ /* The uptr only supports pinning one page and cannot
+ * point to a kernel struct
+ */
+ if (btf_is_kernel(rec->fields[i].kptr.btf))
+ return -EINVAL;
+ t = btf_type_by_id(rec->fields[i].kptr.btf,
+ rec->fields[i].kptr.btf_id);
+ if (!t->size)
+ return -EINVAL;
+ if (t->size > PAGE_SIZE)
+ return -E2BIG;
+ continue;
+ }
+
if (!(rec->fields[i].type & BPF_GRAPH_ROOT))
continue;
btf_id = rec->fields[i].graph_root.value_btf_id;
@@ -4188,7 +4244,7 @@ static void btf_struct_show(const struct btf *btf, const struct btf_type *t,
__btf_struct_show(btf, t, type_id, data, bits_offset, show);
}
-static struct btf_kind_operations struct_ops = {
+static const struct btf_kind_operations struct_ops = {
.check_meta = btf_struct_check_meta,
.resolve = btf_struct_resolve,
.check_member = btf_struct_check_member,
@@ -4356,7 +4412,7 @@ static void btf_enum_show(const struct btf *btf, const struct btf_type *t,
btf_show_end_type(show);
}
-static struct btf_kind_operations enum_ops = {
+static const struct btf_kind_operations enum_ops = {
.check_meta = btf_enum_check_meta,
.resolve = btf_df_resolve,
.check_member = btf_enum_check_member,
@@ -4459,7 +4515,7 @@ static void btf_enum64_show(const struct btf *btf, const struct btf_type *t,
btf_show_end_type(show);
}
-static struct btf_kind_operations enum64_ops = {
+static const struct btf_kind_operations enum64_ops = {
.check_meta = btf_enum64_check_meta,
.resolve = btf_df_resolve,
.check_member = btf_enum_check_member,
@@ -4537,7 +4593,7 @@ done:
btf_verifier_log(env, ")");
}
-static struct btf_kind_operations func_proto_ops = {
+static const struct btf_kind_operations func_proto_ops = {
.check_meta = btf_func_proto_check_meta,
.resolve = btf_df_resolve,
/*
@@ -4595,7 +4651,7 @@ static int btf_func_resolve(struct btf_verifier_env *env,
return 0;
}
-static struct btf_kind_operations func_ops = {
+static const struct btf_kind_operations func_ops = {
.check_meta = btf_func_check_meta,
.resolve = btf_func_resolve,
.check_member = btf_df_check_member,
@@ -4629,7 +4685,7 @@ static s32 btf_var_check_meta(struct btf_verifier_env *env,
}
if (!t->name_off ||
- !__btf_name_valid(env->btf, t->name_off)) {
+ !btf_name_valid_identifier(env->btf, t->name_off)) {
btf_verifier_log_type(env, t, "Invalid name");
return -EINVAL;
}
@@ -4922,11 +4978,6 @@ static s32 btf_decl_tag_check_meta(struct btf_verifier_env *env,
return -EINVAL;
}
- if (btf_type_kflag(t)) {
- btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
- return -EINVAL;
- }
-
component_idx = btf_type_decl_tag(t)->component_idx;
if (component_idx < -1) {
btf_verifier_log_type(env, t, "Invalid component_idx");
@@ -5517,36 +5568,72 @@ static const char *alloc_obj_fields[] = {
static struct btf_struct_metas *
btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf)
{
- union {
- struct btf_id_set set;
- struct {
- u32 _cnt;
- u32 _ids[ARRAY_SIZE(alloc_obj_fields)];
- } _arr;
- } aof;
struct btf_struct_metas *tab = NULL;
+ struct btf_id_set *aof;
int i, n, id, ret;
BUILD_BUG_ON(offsetof(struct btf_id_set, cnt) != 0);
BUILD_BUG_ON(sizeof(struct btf_id_set) != sizeof(u32));
- memset(&aof, 0, sizeof(aof));
+ aof = kmalloc(sizeof(*aof), GFP_KERNEL | __GFP_NOWARN);
+ if (!aof)
+ return ERR_PTR(-ENOMEM);
+ aof->cnt = 0;
+
for (i = 0; i < ARRAY_SIZE(alloc_obj_fields); i++) {
/* Try to find whether this special type exists in user BTF, and
* if so remember its ID so we can easily find it among members
* of structs that we iterate in the next loop.
*/
+ struct btf_id_set *new_aof;
+
id = btf_find_by_name_kind(btf, alloc_obj_fields[i], BTF_KIND_STRUCT);
if (id < 0)
continue;
- aof.set.ids[aof.set.cnt++] = id;
+
+ new_aof = krealloc(aof, struct_size(new_aof, ids, aof->cnt + 1),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!new_aof) {
+ ret = -ENOMEM;
+ goto free_aof;
+ }
+ aof = new_aof;
+ aof->ids[aof->cnt++] = id;
+ }
+
+ n = btf_nr_types(btf);
+ for (i = 1; i < n; i++) {
+ /* Try to find if there are kptrs in user BTF and remember their ID */
+ struct btf_id_set *new_aof;
+ struct btf_field_info tmp;
+ const struct btf_type *t;
+
+ t = btf_type_by_id(btf, i);
+ if (!t) {
+ ret = -EINVAL;
+ goto free_aof;
+ }
+
+ ret = btf_find_kptr(btf, t, 0, 0, &tmp, BPF_KPTR);
+ if (ret != BTF_FIELD_FOUND)
+ continue;
+
+ new_aof = krealloc(aof, struct_size(new_aof, ids, aof->cnt + 1),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!new_aof) {
+ ret = -ENOMEM;
+ goto free_aof;
+ }
+ aof = new_aof;
+ aof->ids[aof->cnt++] = i;
}
- if (!aof.set.cnt)
+ if (!aof->cnt) {
+ kfree(aof);
return NULL;
- sort(&aof.set.ids, aof.set.cnt, sizeof(aof.set.ids[0]), btf_id_cmp_func, NULL);
+ }
+ sort(&aof->ids, aof->cnt, sizeof(aof->ids[0]), btf_id_cmp_func, NULL);
- n = btf_nr_types(btf);
for (i = 1; i < n; i++) {
struct btf_struct_metas *new_tab;
const struct btf_member *member;
@@ -5556,23 +5643,19 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf)
int j, tab_cnt;
t = btf_type_by_id(btf, i);
- if (!t) {
- ret = -EINVAL;
- goto free;
- }
if (!__btf_type_is_struct(t))
continue;
cond_resched();
for_each_member(j, t, member) {
- if (btf_id_set_contains(&aof.set, member->type))
+ if (btf_id_set_contains(aof, member->type))
goto parse;
}
continue;
parse:
tab_cnt = tab ? tab->cnt : 0;
- new_tab = krealloc(tab, offsetof(struct btf_struct_metas, types[tab_cnt + 1]),
+ new_tab = krealloc(tab, struct_size(new_tab, types, tab_cnt + 1),
GFP_KERNEL | __GFP_NOWARN);
if (!new_tab) {
ret = -ENOMEM;
@@ -5584,8 +5667,9 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf)
type = &tab->types[tab->cnt];
type->btf_id = i;
- record = btf_parse_fields(btf, t, BPF_SPIN_LOCK | BPF_LIST_HEAD | BPF_LIST_NODE |
- BPF_RB_ROOT | BPF_RB_NODE | BPF_REFCOUNT, t->size);
+ record = btf_parse_fields(btf, t, BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK | BPF_LIST_HEAD | BPF_LIST_NODE |
+ BPF_RB_ROOT | BPF_RB_NODE | BPF_REFCOUNT |
+ BPF_KPTR, t->size);
/* The record cannot be unset, treat it as an error if so */
if (IS_ERR_OR_NULL(record)) {
ret = PTR_ERR_OR_ZERO(record) ?: -EFAULT;
@@ -5594,9 +5678,12 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf)
type->record = record;
tab->cnt++;
}
+ kfree(aof);
return tab;
free:
btf_struct_metas_free(tab);
+free_aof:
+ kfree(aof);
return ERR_PTR(ret);
}
@@ -6102,8 +6189,7 @@ int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_ty
return kctx_type_id;
}
-BTF_ID_LIST(bpf_ctx_convert_btf_id)
-BTF_ID(struct, bpf_ctx_convert)
+BTF_ID_LIST_SINGLE(bpf_ctx_convert_btf_id, struct, bpf_ctx_convert)
static struct btf *btf_parse_base(struct btf_verifier_env *env, const char *name,
void *data, unsigned int data_size)
@@ -6243,12 +6329,11 @@ static struct btf *btf_parse_module(const char *module_name, const void *data,
btf->kernel_btf = true;
snprintf(btf->name, sizeof(btf->name), "%s", module_name);
- btf->data = kvmalloc(data_size, GFP_KERNEL | __GFP_NOWARN);
+ btf->data = kvmemdup(data, data_size, GFP_KERNEL | __GFP_NOWARN);
if (!btf->data) {
err = -ENOMEM;
goto errout;
}
- memcpy(btf->data, data, data_size);
btf->data_size = data_size;
err = btf_parse_hdr(env);
@@ -6283,7 +6368,7 @@ static struct btf *btf_parse_module(const char *module_name, const void *data,
errout:
btf_verifier_env_free(env);
- if (base_btf != vmlinux_btf)
+ if (!IS_ERR(base_btf) && base_btf != vmlinux_btf)
btf_free(base_btf);
if (btf) {
kvfree(btf->data);
@@ -6305,16 +6390,15 @@ struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog)
return prog->aux->attach_btf;
}
-static bool is_int_ptr(struct btf *btf, const struct btf_type *t)
+static bool is_void_or_int_ptr(struct btf *btf, const struct btf_type *t)
{
/* skip modifiers */
t = btf_type_skip_modifiers(btf, t->type, NULL);
-
- return btf_type_is_int(t);
+ return btf_type_is_void(t) || btf_type_is_int(t);
}
-static u32 get_ctx_arg_idx(struct btf *btf, const struct btf_type *func_proto,
- int off)
+u32 btf_ctx_arg_idx(struct btf *btf, const struct btf_type *func_proto,
+ int off)
{
const struct btf_param *args;
const struct btf_type *t;
@@ -6377,6 +6461,203 @@ int btf_ctx_arg_offset(const struct btf *btf, const struct btf_type *func_proto,
return off;
}
+struct bpf_raw_tp_null_args {
+ const char *func;
+ u64 mask;
+};
+
+static const struct bpf_raw_tp_null_args raw_tp_null_args[] = {
+ /* sched */
+ { "sched_pi_setprio", 0x10 },
+ /* ... from sched_numa_pair_template event class */
+ { "sched_stick_numa", 0x100 },
+ { "sched_swap_numa", 0x100 },
+ /* afs */
+ { "afs_make_fs_call", 0x10 },
+ { "afs_make_fs_calli", 0x10 },
+ { "afs_make_fs_call1", 0x10 },
+ { "afs_make_fs_call2", 0x10 },
+ { "afs_protocol_error", 0x1 },
+ { "afs_flock_ev", 0x10 },
+ /* cachefiles */
+ { "cachefiles_lookup", 0x1 | 0x200 },
+ { "cachefiles_unlink", 0x1 },
+ { "cachefiles_rename", 0x1 },
+ { "cachefiles_prep_read", 0x1 },
+ { "cachefiles_mark_active", 0x1 },
+ { "cachefiles_mark_failed", 0x1 },
+ { "cachefiles_mark_inactive", 0x1 },
+ { "cachefiles_vfs_error", 0x1 },
+ { "cachefiles_io_error", 0x1 },
+ { "cachefiles_ondemand_open", 0x1 },
+ { "cachefiles_ondemand_copen", 0x1 },
+ { "cachefiles_ondemand_close", 0x1 },
+ { "cachefiles_ondemand_read", 0x1 },
+ { "cachefiles_ondemand_cread", 0x1 },
+ { "cachefiles_ondemand_fd_write", 0x1 },
+ { "cachefiles_ondemand_fd_release", 0x1 },
+ /* ext4, from ext4__mballoc event class */
+ { "ext4_mballoc_discard", 0x10 },
+ { "ext4_mballoc_free", 0x10 },
+ /* fib */
+ { "fib_table_lookup", 0x100 },
+ /* filelock */
+ /* ... from filelock_lock event class */
+ { "posix_lock_inode", 0x10 },
+ { "fcntl_setlk", 0x10 },
+ { "locks_remove_posix", 0x10 },
+ { "flock_lock_inode", 0x10 },
+ /* ... from filelock_lease event class */
+ { "break_lease_noblock", 0x10 },
+ { "break_lease_block", 0x10 },
+ { "break_lease_unblock", 0x10 },
+ { "generic_delete_lease", 0x10 },
+ { "time_out_leases", 0x10 },
+ /* host1x */
+ { "host1x_cdma_push_gather", 0x10000 },
+ /* huge_memory */
+ { "mm_khugepaged_scan_pmd", 0x10 },
+ { "mm_collapse_huge_page_isolate", 0x1 },
+ { "mm_khugepaged_scan_file", 0x10 },
+ { "mm_khugepaged_collapse_file", 0x10 },
+ /* kmem */
+ { "mm_page_alloc", 0x1 },
+ { "mm_page_pcpu_drain", 0x1 },
+ /* .. from mm_page event class */
+ { "mm_page_alloc_zone_locked", 0x1 },
+ /* netfs */
+ { "netfs_failure", 0x10 },
+ /* power */
+ { "device_pm_callback_start", 0x10 },
+ /* qdisc */
+ { "qdisc_dequeue", 0x1000 },
+ /* rxrpc */
+ { "rxrpc_recvdata", 0x1 },
+ { "rxrpc_resend", 0x10 },
+ { "rxrpc_tq", 0x10 },
+ { "rxrpc_client", 0x1 },
+ /* skb */
+ {"kfree_skb", 0x1000},
+ /* sunrpc */
+ { "xs_stream_read_data", 0x1 },
+ /* ... from xprt_cong_event event class */
+ { "xprt_reserve_cong", 0x10 },
+ { "xprt_release_cong", 0x10 },
+ { "xprt_get_cong", 0x10 },
+ { "xprt_put_cong", 0x10 },
+ /* tcp */
+ { "tcp_send_reset", 0x11 },
+ { "tcp_sendmsg_locked", 0x100 },
+ /* tegra_apb_dma */
+ { "tegra_dma_tx_status", 0x100 },
+ /* timer_migration */
+ { "tmigr_update_events", 0x1 },
+ /* writeback, from writeback_folio_template event class */
+ { "writeback_dirty_folio", 0x10 },
+ { "folio_wait_writeback", 0x10 },
+ /* rdma */
+ { "mr_integ_alloc", 0x2000 },
+ /* bpf_testmod */
+ { "bpf_testmod_test_read", 0x0 },
+ /* amdgpu */
+ { "amdgpu_vm_bo_map", 0x1 },
+ { "amdgpu_vm_bo_unmap", 0x1 },
+ /* netfs */
+ { "netfs_folioq", 0x1 },
+ /* xfs from xfs_defer_pending_class */
+ { "xfs_defer_create_intent", 0x1 },
+ { "xfs_defer_cancel_list", 0x1 },
+ { "xfs_defer_pending_finish", 0x1 },
+ { "xfs_defer_pending_abort", 0x1 },
+ { "xfs_defer_relog_intent", 0x1 },
+ { "xfs_defer_isolate_paused", 0x1 },
+ { "xfs_defer_item_pause", 0x1 },
+ { "xfs_defer_item_unpause", 0x1 },
+ /* xfs from xfs_defer_pending_item_class */
+ { "xfs_defer_add_item", 0x1 },
+ { "xfs_defer_cancel_item", 0x1 },
+ { "xfs_defer_finish_item", 0x1 },
+ /* xfs from xfs_icwalk_class */
+ { "xfs_ioc_free_eofblocks", 0x10 },
+ { "xfs_blockgc_free_space", 0x10 },
+ /* xfs from xfs_btree_cur_class */
+ { "xfs_btree_updkeys", 0x100 },
+ { "xfs_btree_overlapped_query_range", 0x100 },
+ /* xfs from xfs_imap_class*/
+ { "xfs_map_blocks_found", 0x10000 },
+ { "xfs_map_blocks_alloc", 0x10000 },
+ { "xfs_iomap_alloc", 0x1000 },
+ { "xfs_iomap_found", 0x1000 },
+ /* xfs from xfs_fs_class */
+ { "xfs_inodegc_flush", 0x1 },
+ { "xfs_inodegc_push", 0x1 },
+ { "xfs_inodegc_start", 0x1 },
+ { "xfs_inodegc_stop", 0x1 },
+ { "xfs_inodegc_queue", 0x1 },
+ { "xfs_inodegc_throttle", 0x1 },
+ { "xfs_fs_sync_fs", 0x1 },
+ { "xfs_blockgc_start", 0x1 },
+ { "xfs_blockgc_stop", 0x1 },
+ { "xfs_blockgc_worker", 0x1 },
+ { "xfs_blockgc_flush_all", 0x1 },
+ /* xfs_scrub */
+ { "xchk_nlinks_live_update", 0x10 },
+ /* xfs_scrub from xchk_metapath_class */
+ { "xchk_metapath_lookup", 0x100 },
+ /* nfsd */
+ { "nfsd_dirent", 0x1 },
+ { "nfsd_file_acquire", 0x1001 },
+ { "nfsd_file_insert_err", 0x1 },
+ { "nfsd_file_cons_err", 0x1 },
+ /* nfs4 */
+ { "nfs4_setup_sequence", 0x1 },
+ { "pnfs_update_layout", 0x10000 },
+ { "nfs4_inode_callback_event", 0x200 },
+ { "nfs4_inode_stateid_callback_event", 0x200 },
+ /* nfs from pnfs_layout_event */
+ { "pnfs_mds_fallback_pg_init_read", 0x10000 },
+ { "pnfs_mds_fallback_pg_init_write", 0x10000 },
+ { "pnfs_mds_fallback_pg_get_mirror_count", 0x10000 },
+ { "pnfs_mds_fallback_read_done", 0x10000 },
+ { "pnfs_mds_fallback_write_done", 0x10000 },
+ { "pnfs_mds_fallback_read_pagelist", 0x10000 },
+ { "pnfs_mds_fallback_write_pagelist", 0x10000 },
+ /* coda */
+ { "coda_dec_pic_run", 0x10 },
+ { "coda_dec_pic_done", 0x10 },
+ /* cfg80211 */
+ { "cfg80211_scan_done", 0x11 },
+ { "rdev_set_coalesce", 0x10 },
+ { "cfg80211_report_wowlan_wakeup", 0x100 },
+ { "cfg80211_inform_bss_frame", 0x100 },
+ { "cfg80211_michael_mic_failure", 0x10000 },
+ /* cfg80211 from wiphy_work_event */
+ { "wiphy_work_queue", 0x10 },
+ { "wiphy_work_run", 0x10 },
+ { "wiphy_work_cancel", 0x10 },
+ { "wiphy_work_flush", 0x10 },
+ /* hugetlbfs */
+ { "hugetlbfs_alloc_inode", 0x10 },
+ /* spufs */
+ { "spufs_context", 0x10 },
+ /* kvm_hv */
+ { "kvm_page_fault_enter", 0x100 },
+ /* dpu */
+ { "dpu_crtc_setup_mixer", 0x100 },
+ /* binder */
+ { "binder_transaction", 0x100 },
+ /* bcachefs */
+ { "btree_path_free", 0x100 },
+ /* hfi1_tx */
+ { "hfi1_sdma_progress", 0x1000 },
+ /* iptfs */
+ { "iptfs_ingress_postq_event", 0x1000 },
+ /* neigh */
+ { "neigh_update", 0x10 },
+ /* snd_firewire_lib */
+ { "amdtp_packet", 0x100 },
+};
+
bool btf_ctx_access(int off, int size, enum bpf_access_type type,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
@@ -6387,6 +6668,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
const char *tname = prog->aux->attach_func_name;
struct bpf_verifier_log *log = info->log;
const struct btf_param *args;
+ bool ptr_err_raw_tp = false;
const char *tag_value;
u32 nr_args, arg;
int i, ret;
@@ -6396,7 +6678,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
tname, off);
return false;
}
- arg = get_ctx_arg_idx(btf, t, off);
+ arg = btf_ctx_arg_idx(btf, t, off);
args = (const struct btf_param *)(t + 1);
/* if (t == NULL) Fall back to default BPF prog with
* MAX_BPF_FUNC_REG_ARGS u64 arguments.
@@ -6416,8 +6698,11 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
if (arg == nr_args) {
switch (prog->expected_attach_type) {
- case BPF_LSM_CGROUP:
case BPF_LSM_MAC:
+ /* mark we are accessing the return value */
+ info->is_retval = true;
+ fallthrough;
+ case BPF_LSM_CGROUP:
case BPF_TRACE_FEXIT:
/* When LSM programs are attached to void LSM hooks
* they use FEXIT trampolines and when attached to
@@ -6466,7 +6751,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
/* skip modifiers */
while (btf_type_is_modifier(t))
t = btf_type_by_id(btf, t->type);
- if (btf_type_is_small_int(t) || btf_is_any_enum(t) || __btf_type_is_struct(t))
+ if (btf_type_is_small_int(t) || btf_is_any_enum(t) || btf_type_is_struct(t))
/* accessing a scalar */
return true;
if (!btf_type_is_ptr(t)) {
@@ -6478,6 +6763,12 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
return false;
}
+ if (size != sizeof(u64)) {
+ bpf_log(log, "func '%s' size %d must be 8\n",
+ tname, size);
+ return false;
+ }
+
/* check for PTR_TO_RDONLY_BUF_OR_NULL or PTR_TO_RDWR_BUF_OR_NULL */
for (i = 0; i < prog->aux->ctx_arg_info_size; i++) {
const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i];
@@ -6492,14 +6783,11 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
}
}
- if (t->type == 0)
- /* This is a pointer to void.
- * It is the same as scalar from the verifier safety pov.
- * No further pointer walking is allowed.
- */
- return true;
-
- if (is_int_ptr(btf, t))
+ /*
+ * If it's a pointer to void, it's the same as scalar from the verifier
+ * safety POV. Either way, no futher pointer walking is allowed.
+ */
+ if (is_void_or_int_ptr(btf, t))
return true;
/* this is a pointer to another type */
@@ -6515,6 +6803,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
info->reg_type = ctx_arg_info->reg_type;
info->btf = ctx_arg_info->btf ? : btf_vmlinux;
info->btf_id = ctx_arg_info->btf_id;
+ info->ref_obj_id = ctx_arg_info->ref_obj_id;
return true;
}
}
@@ -6523,6 +6812,42 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
if (prog_args_trusted(prog))
info->reg_type |= PTR_TRUSTED;
+ if (btf_param_match_suffix(btf, &args[arg], "__nullable"))
+ info->reg_type |= PTR_MAYBE_NULL;
+
+ if (prog->expected_attach_type == BPF_TRACE_RAW_TP) {
+ struct btf *btf = prog->aux->attach_btf;
+ const struct btf_type *t;
+ const char *tname;
+
+ /* BTF lookups cannot fail, return false on error */
+ t = btf_type_by_id(btf, prog->aux->attach_btf_id);
+ if (!t)
+ return false;
+ tname = btf_name_by_offset(btf, t->name_off);
+ if (!tname)
+ return false;
+ /* Checked by bpf_check_attach_target */
+ tname += sizeof("btf_trace_") - 1;
+ for (i = 0; i < ARRAY_SIZE(raw_tp_null_args); i++) {
+ /* Is this a func with potential NULL args? */
+ if (strcmp(tname, raw_tp_null_args[i].func))
+ continue;
+ if (raw_tp_null_args[i].mask & (0x1ULL << (arg * 4)))
+ info->reg_type |= PTR_MAYBE_NULL;
+ /* Is the current arg IS_ERR? */
+ if (raw_tp_null_args[i].mask & (0x2ULL << (arg * 4)))
+ ptr_err_raw_tp = true;
+ break;
+ }
+ /* If we don't know NULL-ness specification and the tracepoint
+ * is coming from a loadable module, be conservative and mark
+ * argument as PTR_MAYBE_NULL.
+ */
+ if (i == ARRAY_SIZE(raw_tp_null_args) && btf_is_module(btf))
+ info->reg_type |= PTR_MAYBE_NULL;
+ }
+
if (tgt_prog) {
enum bpf_prog_type tgt_type;
@@ -6545,7 +6870,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
info->btf_id = t->type;
t = btf_type_by_id(btf, t->type);
- if (btf_type_is_type_tag(t)) {
+ if (btf_type_is_type_tag(t) && !btf_type_kflag(t)) {
tag_value = __btf_name_by_offset(btf, t->name_off);
if (strcmp(tag_value, "user") == 0)
info->reg_type |= MEM_USER;
@@ -6567,6 +6892,15 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
bpf_log(log, "func '%s' arg%d has btf_id %d type %s '%s'\n",
tname, arg, info->btf_id, btf_type_str(t),
__btf_name_by_offset(btf, t->name_off));
+
+ /* Perform all checks on the validity of type for this argument, but if
+ * we know it can be IS_ERR at runtime, scrub pointer type and mark as
+ * scalar.
+ */
+ if (ptr_err_raw_tp) {
+ bpf_log(log, "marking pointer arg%d as scalar as it may encode error", arg);
+ info->reg_type = SCALAR_VALUE;
+ }
return true;
}
EXPORT_SYMBOL_GPL(btf_ctx_access);
@@ -6575,6 +6909,7 @@ enum bpf_struct_walk_result {
/* < 0 error */
WALK_SCALAR = 0,
WALK_PTR,
+ WALK_PTR_UNTRUSTED,
WALK_STRUCT,
};
@@ -6795,7 +7130,7 @@ error:
/* check type tag */
t = btf_type_by_id(btf, mtype->type);
- if (btf_type_is_type_tag(t)) {
+ if (btf_type_is_type_tag(t) && !btf_type_kflag(t)) {
tag_value = __btf_name_by_offset(btf, t->name_off);
/* check __user tag */
if (strcmp(tag_value, "user") == 0)
@@ -6816,6 +7151,8 @@ error:
*field_name = mname;
return WALK_PTR;
}
+
+ return WALK_PTR_UNTRUSTED;
}
/* Allow more flexible access within an int as long as
@@ -6888,6 +7225,9 @@ int btf_struct_access(struct bpf_verifier_log *log,
*next_btf_id = id;
*flag = tmp_flag;
return PTR_TO_BTF_ID;
+ case WALK_PTR_UNTRUSTED:
+ *flag = MEM_RDONLY | PTR_UNTRUSTED;
+ return PTR_TO_MEM;
case WALK_SCALAR:
return SCALAR_VALUE;
case WALK_STRUCT:
@@ -6983,7 +7323,7 @@ static int __get_type_size(struct btf *btf, u32 btf_id,
if (btf_type_is_ptr(t))
/* kernel size of pointer. Not BPF's size of pointer*/
return sizeof(void *);
- if (btf_type_is_int(t) || btf_is_any_enum(t) || __btf_type_is_struct(t))
+ if (btf_type_is_int(t) || btf_is_any_enum(t) || btf_type_is_struct(t))
return t->size;
return -EINVAL;
}
@@ -6992,7 +7332,7 @@ static u8 __get_type_fmodel_flags(const struct btf_type *t)
{
u8 flags = 0;
- if (__btf_type_is_struct(t))
+ if (btf_type_is_struct(t))
flags |= BTF_FMODEL_STRUCT_ARG;
if (btf_type_is_signed_int(t))
flags |= BTF_FMODEL_SIGNED_ARG;
@@ -7033,7 +7373,7 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
return -EINVAL;
}
ret = __get_type_size(btf, func->type, &t);
- if (ret < 0 || __btf_type_is_struct(t)) {
+ if (ret < 0 || btf_type_is_struct(t)) {
bpf_log(log,
"The function %s return type %s is unsupported.\n",
tname, btf_type_str(t));
@@ -7300,11 +7640,12 @@ cand_cache_unlock:
}
enum btf_arg_tag {
- ARG_TAG_CTX = BIT_ULL(0),
- ARG_TAG_NONNULL = BIT_ULL(1),
- ARG_TAG_TRUSTED = BIT_ULL(2),
- ARG_TAG_NULLABLE = BIT_ULL(3),
- ARG_TAG_ARENA = BIT_ULL(4),
+ ARG_TAG_CTX = BIT_ULL(0),
+ ARG_TAG_NONNULL = BIT_ULL(1),
+ ARG_TAG_TRUSTED = BIT_ULL(2),
+ ARG_TAG_UNTRUSTED = BIT_ULL(3),
+ ARG_TAG_NULLABLE = BIT_ULL(4),
+ ARG_TAG_ARENA = BIT_ULL(5),
};
/* Process BTF of a function to produce high-level expectation of function
@@ -7332,7 +7673,7 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog)
return 0;
if (!prog->aux->func_info) {
- bpf_log(log, "Verifier bug\n");
+ verifier_bug(env, "func_info undefined");
return -EFAULT;
}
@@ -7356,7 +7697,7 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog)
tname = btf_name_by_offset(btf, fn_t->name_off);
if (prog->aux->func_info_aux[subprog].unreliable) {
- bpf_log(log, "Verifier bug in function %s()\n", tname);
+ verifier_bug(env, "unreliable BTF for function %s()", tname);
return -EFAULT;
}
if (prog_type == BPF_PROG_TYPE_EXT)
@@ -7412,6 +7753,8 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog)
tags |= ARG_TAG_CTX;
} else if (strcmp(tag, "trusted") == 0) {
tags |= ARG_TAG_TRUSTED;
+ } else if (strcmp(tag, "untrusted") == 0) {
+ tags |= ARG_TAG_UNTRUSTED;
} else if (strcmp(tag, "nonnull") == 0) {
tags |= ARG_TAG_NONNULL;
} else if (strcmp(tag, "nullable") == 0) {
@@ -7472,6 +7815,38 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog)
sub->args[i].btf_id = kern_type_id;
continue;
}
+ if (tags & ARG_TAG_UNTRUSTED) {
+ struct btf *vmlinux_btf;
+ int kern_type_id;
+
+ if (tags & ~ARG_TAG_UNTRUSTED) {
+ bpf_log(log, "arg#%d untrusted cannot be combined with any other tags\n", i);
+ return -EINVAL;
+ }
+
+ ref_t = btf_type_skip_modifiers(btf, t->type, NULL);
+ if (btf_type_is_void(ref_t) || btf_type_is_primitive(ref_t)) {
+ sub->args[i].arg_type = ARG_PTR_TO_MEM | MEM_RDONLY | PTR_UNTRUSTED;
+ sub->args[i].mem_size = 0;
+ continue;
+ }
+
+ kern_type_id = btf_get_ptr_to_btf_id(log, i, btf, t);
+ if (kern_type_id < 0)
+ return kern_type_id;
+
+ vmlinux_btf = bpf_get_btf_vmlinux();
+ ref_t = btf_type_by_id(vmlinux_btf, kern_type_id);
+ if (!btf_type_is_struct(ref_t)) {
+ tname = __btf_name_by_offset(vmlinux_btf, t->name_off);
+ bpf_log(log, "arg#%d has type %s '%s', but only struct or primitive types are allowed\n",
+ i, btf_type_str(ref_t), tname);
+ return -EINVAL;
+ }
+ sub->args[i].arg_type = ARG_PTR_TO_BTF_ID | PTR_UNTRUSTED;
+ sub->args[i].btf_id = kern_type_id;
+ continue;
+ }
if (tags & ARG_TAG_ARENA) {
if (tags & ~ARG_TAG_ARENA) {
bpf_log(log, "arg#%d arena cannot be combined with any other tags\n", i);
@@ -7673,21 +8048,11 @@ int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
struct btf *btf_get_by_fd(int fd)
{
struct btf *btf;
- struct fd f;
-
- f = fdget(fd);
+ CLASS(fd, f)(fd);
- if (!f.file)
- return ERR_PTR(-EBADF);
-
- if (f.file->f_op != &btf_fops) {
- fdput(f);
- return ERR_PTR(-EINVAL);
- }
-
- btf = f.file->private_data;
- refcount_inc(&btf->refcnt);
- fdput(f);
+ btf = __btf_get_by_fd(f);
+ if (!IS_ERR(btf))
+ refcount_inc(&btf->refcnt);
return btf;
}
@@ -7804,17 +8169,6 @@ struct btf_module {
static LIST_HEAD(btf_modules);
static DEFINE_MUTEX(btf_module_mutex);
-static ssize_t
-btf_module_read(struct file *file, struct kobject *kobj,
- struct bin_attribute *bin_attr,
- char *buf, loff_t off, size_t len)
-{
- const struct btf *btf = bin_attr->private;
-
- memcpy(buf, btf->data + off, len);
- return len;
-}
-
static void purge_cand_cache(struct btf *btf);
static int btf_module_notify(struct notifier_block *nb, unsigned long op,
@@ -7875,8 +8229,8 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op,
attr->attr.name = btf->name;
attr->attr.mode = 0444;
attr->size = btf->data_size;
- attr->private = btf;
- attr->read = btf_module_read;
+ attr->private = btf->data;
+ attr->read = sysfs_bin_attr_simple_read;
err = sysfs_create_bin_file(btf_kobj, attr);
if (err) {
@@ -8049,15 +8403,44 @@ BTF_ID_LIST_GLOBAL(btf_tracing_ids, MAX_BTF_TRACING_TYPE)
BTF_TRACING_TYPE_xxx
#undef BTF_TRACING_TYPE
+/* Validate well-formedness of iter argument type.
+ * On success, return positive BTF ID of iter state's STRUCT type.
+ * On error, negative error is returned.
+ */
+int btf_check_iter_arg(struct btf *btf, const struct btf_type *func, int arg_idx)
+{
+ const struct btf_param *arg;
+ const struct btf_type *t;
+ const char *name;
+ int btf_id;
+
+ if (btf_type_vlen(func) <= arg_idx)
+ return -EINVAL;
+
+ arg = &btf_params(func)[arg_idx];
+ t = btf_type_skip_modifiers(btf, arg->type, NULL);
+ if (!t || !btf_type_is_ptr(t))
+ return -EINVAL;
+ t = btf_type_skip_modifiers(btf, t->type, &btf_id);
+ if (!t || !__btf_type_is_struct(t))
+ return -EINVAL;
+
+ name = btf_name_by_offset(btf, t->name_off);
+ if (!name || strncmp(name, ITER_PREFIX, sizeof(ITER_PREFIX) - 1))
+ return -EINVAL;
+
+ return btf_id;
+}
+
static int btf_check_iter_kfuncs(struct btf *btf, const char *func_name,
const struct btf_type *func, u32 func_flags)
{
u32 flags = func_flags & (KF_ITER_NEW | KF_ITER_NEXT | KF_ITER_DESTROY);
- const char *name, *sfx, *iter_name;
- const struct btf_param *arg;
+ const char *sfx, *iter_name;
const struct btf_type *t;
char exp_name[128];
u32 nr_args;
+ int btf_id;
/* exactly one of KF_ITER_{NEW,NEXT,DESTROY} can be set */
if (!flags || (flags & (flags - 1)))
@@ -8068,28 +8451,21 @@ static int btf_check_iter_kfuncs(struct btf *btf, const char *func_name,
if (nr_args < 1)
return -EINVAL;
- arg = &btf_params(func)[0];
- t = btf_type_skip_modifiers(btf, arg->type, NULL);
- if (!t || !btf_type_is_ptr(t))
- return -EINVAL;
- t = btf_type_skip_modifiers(btf, t->type, NULL);
- if (!t || !__btf_type_is_struct(t))
- return -EINVAL;
-
- name = btf_name_by_offset(btf, t->name_off);
- if (!name || strncmp(name, ITER_PREFIX, sizeof(ITER_PREFIX) - 1))
- return -EINVAL;
+ btf_id = btf_check_iter_arg(btf, func, 0);
+ if (btf_id < 0)
+ return btf_id;
/* sizeof(struct bpf_iter_<type>) should be a multiple of 8 to
* fit nicely in stack slots
*/
+ t = btf_type_by_id(btf, btf_id);
if (t->size == 0 || (t->size % 8))
return -EINVAL;
/* validate bpf_iter_<type>_{new,next,destroy}(struct bpf_iter_<type> *)
* naming pattern
*/
- iter_name = name + sizeof(ITER_PREFIX) - 1;
+ iter_name = btf_name_by_offset(btf, t->name_off) + sizeof(ITER_PREFIX) - 1;
if (flags & KF_ITER_NEW)
sfx = "new";
else if (flags & KF_ITER_NEXT)
@@ -8232,7 +8608,7 @@ static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
/* Grow set */
set = krealloc(tab->sets[hook],
- offsetof(struct btf_id_set8, pairs[set_cnt + add_set->cnt]),
+ struct_size(set, pairs, set_cnt + add_set->cnt),
GFP_KERNEL | __GFP_NOWARN);
if (!set) {
ret = -ENOMEM;
@@ -8304,13 +8680,20 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type)
case BPF_PROG_TYPE_STRUCT_OPS:
return BTF_KFUNC_HOOK_STRUCT_OPS;
case BPF_PROG_TYPE_TRACING:
+ case BPF_PROG_TYPE_TRACEPOINT:
+ case BPF_PROG_TYPE_PERF_EVENT:
case BPF_PROG_TYPE_LSM:
return BTF_KFUNC_HOOK_TRACING;
case BPF_PROG_TYPE_SYSCALL:
return BTF_KFUNC_HOOK_SYSCALL;
case BPF_PROG_TYPE_CGROUP_SKB:
+ case BPF_PROG_TYPE_CGROUP_SOCK:
+ case BPF_PROG_TYPE_CGROUP_DEVICE:
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
- return BTF_KFUNC_HOOK_CGROUP_SKB;
+ case BPF_PROG_TYPE_CGROUP_SOCKOPT:
+ case BPF_PROG_TYPE_CGROUP_SYSCTL:
+ case BPF_PROG_TYPE_SOCK_OPS:
+ return BTF_KFUNC_HOOK_CGROUP;
case BPF_PROG_TYPE_SCHED_ACT:
return BTF_KFUNC_HOOK_SCHED_ACT;
case BPF_PROG_TYPE_SK_SKB:
@@ -8511,7 +8894,7 @@ int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_c
}
tab = krealloc(btf->dtor_kfunc_tab,
- offsetof(struct btf_id_dtor_kfunc_tab, dtors[tab_cnt + add_cnt]),
+ struct_size(tab, dtors, tab_cnt + add_cnt),
GFP_KERNEL | __GFP_NOWARN);
if (!tab) {
ret = -ENOMEM;
@@ -8683,7 +9066,7 @@ static struct bpf_cand_cache *populate_cand_cache(struct bpf_cand_cache *cands,
bpf_free_cands_from_cache(*cc);
*cc = NULL;
}
- new_cands = kmemdup(cands, sizeof_cands(cands->cnt), GFP_KERNEL);
+ new_cands = kmemdup(cands, sizeof_cands(cands->cnt), GFP_KERNEL_ACCOUNT);
if (!new_cands) {
bpf_free_cands(cands);
return ERR_PTR(-ENOMEM);
@@ -8691,7 +9074,7 @@ static struct bpf_cand_cache *populate_cand_cache(struct bpf_cand_cache *cands,
/* strdup the name, since it will stay in cache.
* the cands->name points to strings in prog's BTF and the prog can be unloaded.
*/
- new_cands->name = kmemdup_nul(cands->name, cands->name_len, GFP_KERNEL);
+ new_cands->name = kmemdup_nul(cands->name, cands->name_len, GFP_KERNEL_ACCOUNT);
bpf_free_cands(cands);
if (!new_cands->name) {
kfree(new_cands);
@@ -8775,7 +9158,7 @@ bpf_core_add_cands(struct bpf_cand_cache *cands, const struct btf *targ_btf,
continue;
/* most of the time there is only one candidate for a given kind+name pair */
- new_cands = kmalloc(sizeof_cands(cands->cnt + 1), GFP_KERNEL);
+ new_cands = kmalloc(sizeof_cands(cands->cnt + 1), GFP_KERNEL_ACCOUNT);
if (!new_cands) {
bpf_free_cands(cands);
return ERR_PTR(-ENOMEM);
@@ -8886,15 +9269,24 @@ int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo,
struct bpf_core_cand_list cands = {};
struct bpf_core_relo_res targ_res;
struct bpf_core_spec *specs;
+ const struct btf_type *type;
int err;
/* ~4k of temp memory necessary to convert LLVM spec like "0:1:0:5"
* into arrays of btf_ids of struct fields and array indices.
*/
- specs = kcalloc(3, sizeof(*specs), GFP_KERNEL);
+ specs = kcalloc(3, sizeof(*specs), GFP_KERNEL_ACCOUNT);
if (!specs)
return -ENOMEM;
+ type = btf_type_by_id(ctx->btf, relo->type_id);
+ if (!type) {
+ bpf_log(ctx->log, "relo #%u: bad type id %u\n",
+ relo_idx, relo->type_id);
+ kfree(specs);
+ return -EINVAL;
+ }
+
if (need_cands) {
struct bpf_cand_cache *cc;
int i;
@@ -8908,7 +9300,7 @@ int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo,
goto out;
}
if (cc->cnt) {
- cands.cands = kcalloc(cc->cnt, sizeof(*cands.cands), GFP_KERNEL);
+ cands.cands = kcalloc(cc->cnt, sizeof(*cands.cands), GFP_KERNEL_ACCOUNT);
if (!cands.cands) {
err = -ENOMEM;
goto out;
@@ -9060,8 +9452,7 @@ btf_add_struct_ops(struct btf *btf, struct bpf_struct_ops *st_ops,
tab = btf->struct_ops_tab;
if (!tab) {
- tab = kzalloc(offsetof(struct btf_struct_ops_tab, ops[4]),
- GFP_KERNEL);
+ tab = kzalloc(struct_size(tab, ops, 4), GFP_KERNEL);
if (!tab)
return -ENOMEM;
tab->capacity = 4;
@@ -9074,8 +9465,7 @@ btf_add_struct_ops(struct btf *btf, struct bpf_struct_ops *st_ops,
if (tab->cnt == tab->capacity) {
new_tab = krealloc(tab,
- offsetof(struct btf_struct_ops_tab,
- ops[tab->capacity * 2]),
+ struct_size(tab, ops, tab->capacity * 2),
GFP_KERNEL);
if (!new_tab)
return -ENOMEM;