diff options
Diffstat (limited to 'tools')
226 files changed, 10418 insertions, 1226 deletions
diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h index 9afcc6467a09..e09d6908a21d 100644 --- a/tools/arch/arm64/include/asm/cputype.h +++ b/tools/arch/arm64/include/asm/cputype.h @@ -75,6 +75,7 @@ #define ARM_CPU_PART_CORTEX_A77 0xD0D #define ARM_CPU_PART_NEOVERSE_V1 0xD40 #define ARM_CPU_PART_CORTEX_A78 0xD41 +#define ARM_CPU_PART_CORTEX_A78AE 0xD42 #define ARM_CPU_PART_CORTEX_X1 0xD44 #define ARM_CPU_PART_CORTEX_A510 0xD46 #define ARM_CPU_PART_CORTEX_A710 0xD47 @@ -130,6 +131,7 @@ #define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77) #define MIDR_NEOVERSE_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V1) #define MIDR_CORTEX_A78 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78) +#define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE) #define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1) #define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510) #define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710) diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 0eb90d21049e..ee15311b6be1 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -128,9 +128,9 @@ #define TSX_CTRL_RTM_DISABLE BIT(0) /* Disable RTM feature */ #define TSX_CTRL_CPUID_CLEAR BIT(1) /* Disable TSX enumeration */ -/* SRBDS support */ #define MSR_IA32_MCU_OPT_CTRL 0x00000123 -#define RNGDS_MITG_DIS BIT(0) +#define RNGDS_MITG_DIS BIT(0) /* SRBDS support */ +#define RTM_ALLOW BIT(1) /* TSX development mode */ #define MSR_IA32_SYSENTER_CS 0x00000174 #define MSR_IA32_SYSENTER_ESP 0x00000175 diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 0c1e06cf50b9..c740142c24d8 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -17,7 +17,6 @@ #include <linux/magic.h> #include <net/if.h> #include <sys/mount.h> -#include <sys/resource.h> #include <sys/stat.h> #include <sys/vfs.h> @@ -119,13 +118,6 @@ static bool is_bpffs(char *path) return (unsigned long)st_fs.f_type == BPF_FS_MAGIC; } -void set_max_rlimit(void) -{ - struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY }; - - setrlimit(RLIMIT_MEMLOCK, &rinf); -} - static int mnt_fs(const char *target, const char *type, char *buff, size_t bufflen) { diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c index 290998c82de1..be130e35462f 100644 --- a/tools/bpf/bpftool/feature.c +++ b/tools/bpf/bpftool/feature.c @@ -567,7 +567,7 @@ probe_prog_type(enum bpf_prog_type prog_type, bool *supported_types, res = probe_prog_type_ifindex(prog_type, ifindex); } else { - res = libbpf_probe_bpf_prog_type(prog_type, NULL); + res = libbpf_probe_bpf_prog_type(prog_type, NULL) > 0; } #ifdef USE_LIBCAP @@ -1136,8 +1136,6 @@ static int do_probe(int argc, char **argv) __u32 ifindex = 0; char *ifname; - set_max_rlimit(); - while (argc) { if (is_prefix(*argv, "kernel")) { if (target != COMPONENT_UNSPEC) { diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index 91af2850b505..7678af364793 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -828,8 +828,10 @@ codegen_maps_skeleton(struct bpf_object *obj, size_t map_cnt, bool mmaped) s->map_cnt = %zu; \n\ s->map_skel_sz = sizeof(*s->maps); \n\ s->maps = (struct bpf_map_skeleton *)calloc(s->map_cnt, s->map_skel_sz);\n\ - if (!s->maps) \n\ + if (!s->maps) { \n\ + err = -ENOMEM; \n\ goto err; \n\ + } \n\ ", map_cnt ); @@ -870,8 +872,10 @@ codegen_progs_skeleton(struct bpf_object *obj, size_t prog_cnt, bool populate_li s->prog_cnt = %zu; \n\ s->prog_skel_sz = sizeof(*s->progs); \n\ s->progs = (struct bpf_prog_skeleton *)calloc(s->prog_cnt, s->prog_skel_sz);\n\ - if (!s->progs) \n\ + if (!s->progs) { \n\ + err = -ENOMEM; \n\ goto err; \n\ + } \n\ ", prog_cnt ); @@ -1182,10 +1186,13 @@ static int do_skeleton(int argc, char **argv) %1$s__create_skeleton(struct %1$s *obj) \n\ { \n\ struct bpf_object_skeleton *s; \n\ + int err; \n\ \n\ s = (struct bpf_object_skeleton *)calloc(1, sizeof(*s));\n\ - if (!s) \n\ + if (!s) { \n\ + err = -ENOMEM; \n\ goto err; \n\ + } \n\ \n\ s->sz = sizeof(*s); \n\ s->name = \"%1$s\"; \n\ @@ -1206,7 +1213,7 @@ static int do_skeleton(int argc, char **argv) return 0; \n\ err: \n\ bpf_object__destroy_skeleton(s); \n\ - return -ENOMEM; \n\ + return err; \n\ } \n\ \n\ static inline const void *%2$s__elf_bytes(size_t *sz) \n\ @@ -1466,12 +1473,12 @@ static int do_subskeleton(int argc, char **argv) \n\ obj = (struct %1$s *)calloc(1, sizeof(*obj)); \n\ if (!obj) { \n\ - errno = ENOMEM; \n\ + err = -ENOMEM; \n\ goto err; \n\ } \n\ s = (struct bpf_object_subskeleton *)calloc(1, sizeof(*s));\n\ if (!s) { \n\ - errno = ENOMEM; \n\ + err = -ENOMEM; \n\ goto err; \n\ } \n\ s->sz = sizeof(*s); \n\ @@ -1483,7 +1490,7 @@ static int do_subskeleton(int argc, char **argv) s->var_cnt = %2$d; \n\ s->vars = (struct bpf_var_skeleton *)calloc(%2$d, sizeof(*s->vars));\n\ if (!s->vars) { \n\ - errno = ENOMEM; \n\ + err = -ENOMEM; \n\ goto err; \n\ } \n\ ", @@ -1538,6 +1545,7 @@ static int do_subskeleton(int argc, char **argv) return obj; \n\ err: \n\ %1$s__destroy(obj); \n\ + errno = -err; \n\ return NULL; \n\ } \n\ \n\ diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index 97dec81950e5..8fb0116f9136 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -20,6 +20,9 @@ static const char * const link_type_name[] = { [BPF_LINK_TYPE_CGROUP] = "cgroup", [BPF_LINK_TYPE_ITER] = "iter", [BPF_LINK_TYPE_NETNS] = "netns", + [BPF_LINK_TYPE_XDP] = "xdp", + [BPF_LINK_TYPE_PERF_EVENT] = "perf_event", + [BPF_LINK_TYPE_KPROBE_MULTI] = "kprobe_multi", }; static struct hashmap *link_table; diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index e81227761f5d..9062ef2b8767 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -507,9 +507,9 @@ int main(int argc, char **argv) * It will still be rejected if users use LIBBPF_STRICT_ALL * mode for loading generated skeleton. */ - ret = libbpf_set_strict_mode(LIBBPF_STRICT_ALL & ~LIBBPF_STRICT_MAP_DEFINITIONS); - if (ret) - p_err("failed to enable libbpf strict mode: %d", ret); + libbpf_set_strict_mode(LIBBPF_STRICT_ALL & ~LIBBPF_STRICT_MAP_DEFINITIONS); + } else { + libbpf_set_strict_mode(LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK); } argc -= optind; diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 6e9277ffc68c..aa99ffab451a 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -102,8 +102,6 @@ int detect_common_prefix(const char *arg, ...); void fprint_hex(FILE *f, void *arg, unsigned int n, const char *sep); void usage(void) __noreturn; -void set_max_rlimit(void); - int mount_tracefs(const char *target); struct obj_ref { diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index c26378f20831..877387ef79c7 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -1342,8 +1342,6 @@ static int do_create(int argc, char **argv) goto exit; } - set_max_rlimit(); - fd = bpf_map_create(map_type, map_name, key_size, value_size, max_entries, &attr); if (fd < 0) { p_err("map create failed: %s", strerror(errno)); diff --git a/tools/bpf/bpftool/perf.c b/tools/bpf/bpftool/perf.c index 50de087b0db7..226ec2c39052 100644 --- a/tools/bpf/bpftool/perf.c +++ b/tools/bpf/bpftool/perf.c @@ -11,7 +11,7 @@ #include <sys/stat.h> #include <sys/types.h> #include <unistd.h> -#include <ftw.h> +#include <dirent.h> #include <bpf/bpf.h> @@ -147,81 +147,83 @@ static void print_perf_plain(int pid, int fd, __u32 prog_id, __u32 fd_type, } } -static int show_proc(const char *fpath, const struct stat *sb, - int tflag, struct FTW *ftwbuf) +static int show_proc(void) { + struct dirent *proc_de, *pid_fd_de; __u64 probe_offset, probe_addr; __u32 len, prog_id, fd_type; - int err, pid = 0, fd = 0; + DIR *proc, *pid_fd; + int err, pid, fd; const char *pch; char buf[4096]; - /* prefix always /proc */ - pch = fpath + 5; - if (*pch == '\0') - return 0; + proc = opendir("/proc"); + if (!proc) + return -1; - /* pid should be all numbers */ - pch++; - while (isdigit(*pch)) { - pid = pid * 10 + *pch - '0'; - pch++; - } - if (*pch == '\0') - return 0; - if (*pch != '/') - return FTW_SKIP_SUBTREE; - - /* check /proc/<pid>/fd directory */ - pch++; - if (strncmp(pch, "fd", 2)) - return FTW_SKIP_SUBTREE; - pch += 2; - if (*pch == '\0') - return 0; - if (*pch != '/') - return FTW_SKIP_SUBTREE; - - /* check /proc/<pid>/fd/<fd_num> */ - pch++; - while (isdigit(*pch)) { - fd = fd * 10 + *pch - '0'; - pch++; - } - if (*pch != '\0') - return FTW_SKIP_SUBTREE; + while ((proc_de = readdir(proc))) { + pid = 0; + pch = proc_de->d_name; - /* query (pid, fd) for potential perf events */ - len = sizeof(buf); - err = bpf_task_fd_query(pid, fd, 0, buf, &len, &prog_id, &fd_type, - &probe_offset, &probe_addr); - if (err < 0) - return 0; + /* pid should be all numbers */ + while (isdigit(*pch)) { + pid = pid * 10 + *pch - '0'; + pch++; + } + if (*pch != '\0') + continue; - if (json_output) - print_perf_json(pid, fd, prog_id, fd_type, buf, probe_offset, - probe_addr); - else - print_perf_plain(pid, fd, prog_id, fd_type, buf, probe_offset, - probe_addr); + err = snprintf(buf, sizeof(buf), "/proc/%s/fd", proc_de->d_name); + if (err < 0 || err >= (int)sizeof(buf)) + continue; + + pid_fd = opendir(buf); + if (!pid_fd) + continue; + while ((pid_fd_de = readdir(pid_fd))) { + fd = 0; + pch = pid_fd_de->d_name; + + /* fd should be all numbers */ + while (isdigit(*pch)) { + fd = fd * 10 + *pch - '0'; + pch++; + } + if (*pch != '\0') + continue; + + /* query (pid, fd) for potential perf events */ + len = sizeof(buf); + err = bpf_task_fd_query(pid, fd, 0, buf, &len, + &prog_id, &fd_type, + &probe_offset, &probe_addr); + if (err < 0) + continue; + + if (json_output) + print_perf_json(pid, fd, prog_id, fd_type, buf, + probe_offset, probe_addr); + else + print_perf_plain(pid, fd, prog_id, fd_type, buf, + probe_offset, probe_addr); + } + closedir(pid_fd); + } + closedir(proc); return 0; } static int do_show(int argc, char **argv) { - int flags = FTW_ACTIONRETVAL | FTW_PHYS; - int err = 0, nopenfd = 16; + int err; if (!has_perf_query_support()) return -1; if (json_output) jsonw_start_array(json_wtr); - if (nftw("/proc", show_proc, nopenfd, flags) == -1) { - p_err("%s", strerror(errno)); - err = -1; - } + err = show_proc(); if (json_output) jsonw_end_array(json_wtr); diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c index bb6c969a114a..e2d00d3cd868 100644 --- a/tools/bpf/bpftool/pids.c +++ b/tools/bpf/bpftool/pids.c @@ -108,7 +108,6 @@ int build_obj_refs_table(struct hashmap **map, enum bpf_obj_type type) p_err("failed to create hashmap for PID references"); return -1; } - set_max_rlimit(); skel = pid_iter_bpf__open(); if (!skel) { diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index bc4e05542c2b..5c2c63df92e8 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -68,6 +68,7 @@ const char * const prog_type_name[] = { [BPF_PROG_TYPE_EXT] = "ext", [BPF_PROG_TYPE_LSM] = "lsm", [BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup", + [BPF_PROG_TYPE_SYSCALL] = "syscall", }; const size_t prog_type_name_size = ARRAY_SIZE(prog_type_name); @@ -1603,8 +1604,6 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) } } - set_max_rlimit(); - if (verifier_logs) /* log_level1 + log_level2 + stats, but not stable UAPI */ open_opts.kernel_log_level = 1 + 2 + 4; @@ -2302,7 +2301,6 @@ static int do_profile(int argc, char **argv) } } - set_max_rlimit(); err = profiler_bpf__load(profile_obj); if (err) { p_err("failed to load profile_obj"); diff --git a/tools/bpf/bpftool/struct_ops.c b/tools/bpf/bpftool/struct_ops.c index e08a6ff2866c..2535f079ed67 100644 --- a/tools/bpf/bpftool/struct_ops.c +++ b/tools/bpf/bpftool/struct_ops.c @@ -501,8 +501,6 @@ static int do_register(int argc, char **argv) if (libbpf_get_error(obj)) return -1; - set_max_rlimit(); - if (bpf_object__load(obj)) { bpf_object__close(obj); return -1; diff --git a/tools/bpf/bpftool/tracelog.c b/tools/bpf/bpftool/tracelog.c index e80a5c79b38f..bf1f02212797 100644 --- a/tools/bpf/bpftool/tracelog.c +++ b/tools/bpf/bpftool/tracelog.c @@ -9,7 +9,7 @@ #include <string.h> #include <unistd.h> #include <linux/magic.h> -#include <sys/fcntl.h> +#include <fcntl.h> #include <sys/vfs.h> #include "main.h" diff --git a/tools/bpf/runqslower/runqslower.c b/tools/bpf/runqslower/runqslower.c index d78f4148597f..83c5993a139a 100644 --- a/tools/bpf/runqslower/runqslower.c +++ b/tools/bpf/runqslower/runqslower.c @@ -4,7 +4,6 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> -#include <sys/resource.h> #include <time.h> #include <bpf/libbpf.h> #include <bpf/bpf.h> @@ -88,16 +87,6 @@ int libbpf_print_fn(enum libbpf_print_level level, return vfprintf(stderr, format, args); } -static int bump_memlock_rlimit(void) -{ - struct rlimit rlim_new = { - .rlim_cur = RLIM_INFINITY, - .rlim_max = RLIM_INFINITY, - }; - - return setrlimit(RLIMIT_MEMLOCK, &rlim_new); -} - void handle_event(void *ctx, int cpu, void *data, __u32 data_sz) { const struct runq_event *e = data; @@ -133,11 +122,8 @@ int main(int argc, char **argv) libbpf_set_print(libbpf_print_fn); - err = bump_memlock_rlimit(); - if (err) { - fprintf(stderr, "failed to increase rlimit: %d", err); - return 1; - } + /* Use libbpf 1.0 API mode */ + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); obj = runqslower_bpf__open(); if (!obj) { diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 1480910c792e..de66e1cc0734 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -217,9 +217,16 @@ strip-libs = $(filter-out -l%,$(1)) PERL_EMBED_LDOPTS = $(shell perl -MExtUtils::Embed -e ldopts 2>/dev/null) PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS)) PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS)) -PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null` +PERL_EMBED_CCOPTS = $(shell perl -MExtUtils::Embed -e ccopts 2>/dev/null) FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS) +ifeq ($(CC_NO_CLANG), 0) + PERL_EMBED_LDOPTS := $(filter-out -specs=%,$(PERL_EMBED_LDOPTS)) + PERL_EMBED_CCOPTS := $(filter-out -flto=auto -ffat-lto-objects, $(PERL_EMBED_CCOPTS)) + PERL_EMBED_CCOPTS := $(filter-out -specs=%,$(PERL_EMBED_CCOPTS)) + FLAGS_PERL_EMBED += -Wno-compound-token-split-by-macro +endif + $(OUTPUT)test-libperl.bin: $(BUILD) $(FLAGS_PERL_EMBED) diff --git a/tools/include/linux/slab.h b/tools/include/linux/slab.h index f41d8a0eb1a4..0616409513eb 100644 --- a/tools/include/linux/slab.h +++ b/tools/include/linux/slab.h @@ -28,7 +28,13 @@ static inline void *kzalloc(size_t size, gfp_t gfp) return kmalloc(size, gfp | __GFP_ZERO); } -void *kmem_cache_alloc(struct kmem_cache *cachep, int flags); +struct list_lru; + +void *kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *, int flags); +static inline void *kmem_cache_alloc(struct kmem_cache *cachep, int flags) +{ + return kmem_cache_alloc_lru(cachep, NULL, flags); +} void kmem_cache_free(struct kmem_cache *cachep, void *objp); struct kmem_cache *kmem_cache_create(const char *name, unsigned int size, diff --git a/tools/include/uapi/asm-generic/socket.h b/tools/include/uapi/asm-generic/socket.h index 77f7c1638eb1..8756df13be50 100644 --- a/tools/include/uapi/asm-generic/socket.h +++ b/tools/include/uapi/asm-generic/socket.h @@ -119,6 +119,8 @@ #define SO_DETACH_REUSEPORT_BPF 68 +#define SO_RCVMARK 75 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) diff --git a/tools/include/uapi/asm/bpf_perf_event.h b/tools/include/uapi/asm/bpf_perf_event.h index 39acc149d843..d7dfeab0d71a 100644 --- a/tools/include/uapi/asm/bpf_perf_event.h +++ b/tools/include/uapi/asm/bpf_perf_event.h @@ -1,5 +1,7 @@ #if defined(__aarch64__) #include "../../arch/arm64/include/uapi/asm/bpf_perf_event.h" +#elif defined(__arc__) +#include "../../arch/arc/include/uapi/asm/bpf_perf_event.h" #elif defined(__s390__) #include "../../arch/s390/include/uapi/asm/bpf_perf_event.h" #elif defined(__riscv) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index d14b10b85e51..444fe6f1cf35 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -5143,6 +5143,17 @@ union bpf_attr { * The **hash_algo** is returned on success, * **-EOPNOTSUP** if the hash calculation failed or **-EINVAL** if * invalid arguments are passed. + * + * void *bpf_kptr_xchg(void *map_value, void *ptr) + * Description + * Exchange kptr at pointer *map_value* with *ptr*, and return the + * old value. *ptr* can be NULL, otherwise it must be a referenced + * pointer which will be released when this helper is called. + * Return + * The old value of kptr (which can be NULL). The returned pointer + * if not NULL, is a reference which must be released using its + * corresponding release function, or moved into a BPF map before + * program exit. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5339,6 +5350,7 @@ union bpf_attr { FN(copy_from_user_task), \ FN(skb_set_tstamp), \ FN(ima_file_hash), \ + FN(kptr_xchg), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h index b0d8fea1951d..a9162a6c0284 100644 --- a/tools/include/uapi/linux/btf.h +++ b/tools/include/uapi/linux/btf.h @@ -33,8 +33,8 @@ struct btf_type { /* "info" bits arrangement * bits 0-15: vlen (e.g. # of struct's members) * bits 16-23: unused - * bits 24-27: kind (e.g. int, ptr, array...etc) - * bits 28-30: unused + * bits 24-28: kind (e.g. int, ptr, array...etc) + * bits 29-30: unused * bit 31: kind_flag, currently used by * struct, union and fwd */ diff --git a/tools/include/uapi/linux/vhost.h b/tools/include/uapi/linux/vhost.h index c998860d7bbc..5d99e7c242a2 100644 --- a/tools/include/uapi/linux/vhost.h +++ b/tools/include/uapi/linux/vhost.h @@ -150,4 +150,11 @@ /* Get the valid iova range */ #define VHOST_VDPA_GET_IOVA_RANGE _IOR(VHOST_VIRTIO, 0x78, \ struct vhost_vdpa_iova_range) + +/* Get the config size */ +#define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32) + +/* Get the count of all virtqueues */ +#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32) + #endif diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build index 94f0a146bb7b..31a1a9015902 100644 --- a/tools/lib/bpf/Build +++ b/tools/lib/bpf/Build @@ -1,3 +1,4 @@ libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \ netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o hashmap.o \ - btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o + btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \ + usdt.o diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 064c89e31560..64741c55b8e3 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -239,7 +239,7 @@ install_lib: all_cmd SRC_HDRS := bpf.h libbpf.h btf.h libbpf_common.h libbpf_legacy.h xsk.h \ bpf_helpers.h bpf_tracing.h bpf_endian.h bpf_core_read.h \ - skel_internal.h libbpf_version.h + skel_internal.h libbpf_version.h usdt.bpf.h GEN_HDRS := $(BPF_GENERATED) INSTALL_PFX := $(DESTDIR)$(prefix)/include/bpf diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index cf27251adb92..a9d292c106c2 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -817,7 +817,7 @@ int bpf_link_create(int prog_fd, int target_fd, { __u32 target_btf_id, iter_info_len; union bpf_attr attr; - int fd; + int fd, err; if (!OPTS_VALID(opts, bpf_link_create_opts)) return libbpf_err(-EINVAL); @@ -870,7 +870,37 @@ int bpf_link_create(int prog_fd, int target_fd, } proceed: fd = sys_bpf_fd(BPF_LINK_CREATE, &attr, sizeof(attr)); - return libbpf_err_errno(fd); + if (fd >= 0) + return fd; + /* we'll get EINVAL if LINK_CREATE doesn't support attaching fentry + * and other similar programs + */ + err = -errno; + if (err != -EINVAL) + return libbpf_err(err); + + /* if user used features not supported by + * BPF_RAW_TRACEPOINT_OPEN command, then just give up immediately + */ + if (attr.link_create.target_fd || attr.link_create.target_btf_id) + return libbpf_err(err); + if (!OPTS_ZEROED(opts, sz)) + return libbpf_err(err); + + /* otherwise, for few select kinds of programs that can be + * attached using BPF_RAW_TRACEPOINT_OPEN command, try that as + * a fallback for older kernels + */ + switch (attach_type) { + case BPF_TRACE_RAW_TP: + case BPF_LSM_MAC: + case BPF_TRACE_FENTRY: + case BPF_TRACE_FEXIT: + case BPF_MODIFY_RETURN: + return bpf_raw_tracepoint_open(NULL, prog_fd); + default: + return libbpf_err(err); + } } int bpf_link_detach(int link_fd) diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 44df982d2a5c..5de3eb267125 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -149,6 +149,13 @@ enum libbpf_tristate { #define __kconfig __attribute__((section(".kconfig"))) #define __ksym __attribute__((section(".ksyms"))) +#if __has_attribute(btf_type_tag) +#define __kptr __attribute__((btf_type_tag("kptr"))) +#define __kptr_ref __attribute__((btf_type_tag("kptr_ref"))) +#else +#define __kptr +#define __kptr_ref +#endif #ifndef ___bpf_concat #define ___bpf_concat(a, b) a ## b diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index e3a8c947e89f..01ce121c302d 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -27,6 +27,9 @@ #elif defined(__TARGET_ARCH_riscv) #define bpf_target_riscv #define bpf_target_defined +#elif defined(__TARGET_ARCH_arc) + #define bpf_target_arc + #define bpf_target_defined #else /* Fall back to what the compiler says */ @@ -54,6 +57,9 @@ #elif defined(__riscv) && __riscv_xlen == 64 #define bpf_target_riscv #define bpf_target_defined +#elif defined(__arc__) + #define bpf_target_arc + #define bpf_target_defined #endif /* no compiler target */ #endif @@ -233,6 +239,23 @@ struct pt_regs___arm64 { /* riscv does not select ARCH_HAS_SYSCALL_WRAPPER. */ #define PT_REGS_SYSCALL_REGS(ctx) ctx +#elif defined(bpf_target_arc) + +/* arc provides struct user_pt_regs instead of struct pt_regs to userspace */ +#define __PT_REGS_CAST(x) ((const struct user_regs_struct *)(x)) +#define __PT_PARM1_REG scratch.r0 +#define __PT_PARM2_REG scratch.r1 +#define __PT_PARM3_REG scratch.r2 +#define __PT_PARM4_REG scratch.r3 +#define __PT_PARM5_REG scratch.r4 +#define __PT_RET_REG scratch.blink +#define __PT_FP_REG __unsupported__ +#define __PT_RC_REG scratch.r0 +#define __PT_SP_REG scratch.sp +#define __PT_IP_REG scratch.ret +/* arc does not select ARCH_HAS_SYSCALL_WRAPPER. */ +#define PT_REGS_SYSCALL_REGS(ctx) ctx + #endif #if defined(bpf_target_defined) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 1383e26c5d1f..bb1e06eb1eca 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -2626,6 +2626,7 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext, const struct btf_ext_info_sec *sinfo; struct btf_ext_info *ext_info; __u32 info_left, record_size; + size_t sec_cnt = 0; /* The start of the info sec (including the __u32 record_size). */ void *info; @@ -2689,8 +2690,7 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext, return -EINVAL; } - total_record_size = sec_hdrlen + - (__u64)num_records * record_size; + total_record_size = sec_hdrlen + (__u64)num_records * record_size; if (info_left < total_record_size) { pr_debug("%s section has incorrect num_records in .BTF.ext\n", ext_sec->desc); @@ -2699,12 +2699,14 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext, info_left -= total_record_size; sinfo = (void *)sinfo + total_record_size; + sec_cnt++; } ext_info = ext_sec->ext_info; ext_info->len = ext_sec->len - sizeof(__u32); ext_info->rec_size = record_size; ext_info->info = info + sizeof(__u32); + ext_info->sec_cnt = sec_cnt; return 0; } @@ -2788,6 +2790,9 @@ void btf_ext__free(struct btf_ext *btf_ext) { if (IS_ERR_OR_NULL(btf_ext)) return; + free(btf_ext->func_info.sec_idxs); + free(btf_ext->line_info.sec_idxs); + free(btf_ext->core_relo_info.sec_idxs); free(btf_ext->data); free(btf_ext); } @@ -2826,10 +2831,8 @@ struct btf_ext *btf_ext__new(const __u8 *data, __u32 size) if (err) goto done; - if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len)) { - err = -EINVAL; - goto done; - } + if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len)) + goto done; /* skip core relos parsing */ err = btf_ext_setup_core_relos(btf_ext); if (err) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 809fe209cdcc..73a5192defb3 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -302,7 +302,7 @@ struct bpf_program { void *priv; bpf_program_clear_priv_t clear_priv; - bool load; + bool autoload; bool mark_btf_static; enum bpf_prog_type type; enum bpf_attach_type expected_attach_type; @@ -483,6 +483,8 @@ struct elf_state { int st_ops_shndx; }; +struct usdt_manager; + struct bpf_object { char name[BPF_OBJ_NAME_LEN]; char license[64]; @@ -545,6 +547,8 @@ struct bpf_object { size_t fd_array_cap; size_t fd_array_cnt; + struct usdt_manager *usdt_man; + char path[]; }; @@ -668,7 +672,18 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog, prog->insns_cnt = prog->sec_insn_cnt; prog->type = BPF_PROG_TYPE_UNSPEC; - prog->load = true; + + /* libbpf's convention for SEC("?abc...") is that it's just like + * SEC("abc...") but the corresponding bpf_program starts out with + * autoload set to false. + */ + if (sec_name[0] == '?') { + prog->autoload = false; + /* from now on forget there was ? in section name */ + sec_name++; + } else { + prog->autoload = true; + } prog->instances.fds = NULL; prog->instances.nr = -1; @@ -1218,10 +1233,8 @@ static void bpf_object__elf_finish(struct bpf_object *obj) if (!obj->efile.elf) return; - if (obj->efile.elf) { - elf_end(obj->efile.elf); - obj->efile.elf = NULL; - } + elf_end(obj->efile.elf); + obj->efile.elf = NULL; obj->efile.symbols = NULL; obj->efile.st_ops_data = NULL; @@ -1397,8 +1410,11 @@ static int find_elf_var_offset(const struct bpf_object *obj, const char *name, _ for (si = 0; si < symbols->d_size / sizeof(Elf64_Sym); si++) { Elf64_Sym *sym = elf_sym_by_idx(obj, si); - if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL || - ELF64_ST_TYPE(sym->st_info) != STT_OBJECT) + if (ELF64_ST_TYPE(sym->st_info) != STT_OBJECT) + continue; + + if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL && + ELF64_ST_BIND(sym->st_info) != STB_WEAK) continue; sname = elf_sym_str(obj, sym->st_name); @@ -2749,6 +2765,9 @@ static int bpf_object__init_btf(struct bpf_object *obj, btf__set_pointer_size(obj->btf, 8); } if (btf_ext_data) { + struct btf_ext_info *ext_segs[3]; + int seg_num, sec_num; + if (!obj->btf) { pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n", BTF_EXT_ELF_SEC, BTF_ELF_SEC); @@ -2762,6 +2781,43 @@ static int bpf_object__init_btf(struct bpf_object *obj, obj->btf_ext = NULL; goto out; } + + /* setup .BTF.ext to ELF section mapping */ + ext_segs[0] = &obj->btf_ext->func_info; + ext_segs[1] = &obj->btf_ext->line_info; + ext_segs[2] = &obj->btf_ext->core_relo_info; + for (seg_num = 0; seg_num < ARRAY_SIZE(ext_segs); seg_num++) { + struct btf_ext_info *seg = ext_segs[seg_num]; + const struct btf_ext_info_sec *sec; + const char *sec_name; + Elf_Scn *scn; + + if (seg->sec_cnt == 0) + continue; + + seg->sec_idxs = calloc(seg->sec_cnt, sizeof(*seg->sec_idxs)); + if (!seg->sec_idxs) { + err = -ENOMEM; + goto out; + } + + sec_num = 0; + for_each_btf_ext_sec(seg, sec) { + /* preventively increment index to avoid doing + * this before every continue below + */ + sec_num++; + + sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); + if (str_is_empty(sec_name)) + continue; + scn = elf_sec_by_name(obj, sec_name); + if (!scn) + continue; + + seg->sec_idxs[sec_num - 1] = elf_ndxscn(scn); + } + } } out: if (err && libbpf_needs_btf(obj)) { @@ -2920,7 +2976,7 @@ static bool obj_needs_vmlinux_btf(const struct bpf_object *obj) } bpf_object__for_each_program(prog, obj) { - if (!prog->load) + if (!prog->autoload) continue; if (prog_needs_vmlinux_btf(prog)) return true; @@ -4587,7 +4643,7 @@ static int probe_kern_probe_read_kernel(void) }; int fd, insn_cnt = ARRAY_SIZE(insns); - fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL); + fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL); return probe_fd(fd); } @@ -4678,6 +4734,18 @@ static int probe_perf_link(void) return link_fd < 0 && err == -EBADF; } +static int probe_kern_bpf_cookie(void) +{ + struct bpf_insn insns[] = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie), + BPF_EXIT_INSN(), + }; + int ret, insn_cnt = ARRAY_SIZE(insns); + + ret = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL); + return probe_fd(ret); +} + enum kern_feature_result { FEAT_UNKNOWN = 0, FEAT_SUPPORTED = 1, @@ -4740,6 +4808,9 @@ static struct kern_feature_desc { [FEAT_MEMCG_ACCOUNT] = { "memcg-based memory accounting", probe_memcg_account, }, + [FEAT_BPF_COOKIE] = { + "BPF cookie support", probe_kern_bpf_cookie, + }, }; bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) @@ -5555,6 +5626,22 @@ static int record_relo_core(struct bpf_program *prog, return 0; } +static const struct bpf_core_relo *find_relo_core(struct bpf_program *prog, int insn_idx) +{ + struct reloc_desc *relo; + int i; + + for (i = 0; i < prog->nr_reloc; i++) { + relo = &prog->reloc_desc[i]; + if (relo->type != RELO_CORE || relo->insn_idx != insn_idx) + continue; + + return relo->core_relo; + } + + return NULL; +} + static int bpf_core_resolve_relo(struct bpf_program *prog, const struct bpf_core_relo *relo, int relo_idx, @@ -5611,7 +5698,7 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) struct bpf_program *prog; struct bpf_insn *insn; const char *sec_name; - int i, err = 0, insn_idx, sec_idx; + int i, err = 0, insn_idx, sec_idx, sec_num; if (obj->btf_ext->core_relo_info.len == 0) return 0; @@ -5632,32 +5719,18 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) } seg = &obj->btf_ext->core_relo_info; + sec_num = 0; for_each_btf_ext_sec(seg, sec) { + sec_idx = seg->sec_idxs[sec_num]; + sec_num++; + sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); if (str_is_empty(sec_name)) { err = -EINVAL; goto out; } - /* bpf_object's ELF is gone by now so it's not easy to find - * section index by section name, but we can find *any* - * bpf_program within desired section name and use it's - * prog->sec_idx to do a proper search by section index and - * instruction offset - */ - prog = NULL; - for (i = 0; i < obj->nr_programs; i++) { - prog = &obj->programs[i]; - if (strcmp(prog->sec_name, sec_name) == 0) - break; - } - if (!prog) { - pr_warn("sec '%s': failed to find a BPF program\n", sec_name); - return -ENOENT; - } - sec_idx = prog->sec_idx; - pr_debug("sec '%s': found %d CO-RE relocations\n", - sec_name, sec->num_info); + pr_debug("sec '%s': found %d CO-RE relocations\n", sec_name, sec->num_info); for_each_btf_ext_rec(seg, sec, i, rec) { if (rec->insn_off % BPF_INSN_SZ) @@ -5665,15 +5738,22 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) insn_idx = rec->insn_off / BPF_INSN_SZ; prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx); if (!prog) { - pr_warn("sec '%s': failed to find program at insn #%d for CO-RE offset relocation #%d\n", - sec_name, insn_idx, i); - err = -EINVAL; - goto out; + /* When __weak subprog is "overridden" by another instance + * of the subprog from a different object file, linker still + * appends all the .BTF.ext info that used to belong to that + * eliminated subprogram. + * This is similar to what x86-64 linker does for relocations. + * So just ignore such relocations just like we ignore + * subprog instructions when discovering subprograms. + */ + pr_debug("sec '%s': skipping CO-RE relocation #%d for insn #%d belonging to eliminated weak subprogram\n", + sec_name, i, insn_idx); + continue; } /* no need to apply CO-RE relocation if the program is * not going to be loaded */ - if (!prog->load) + if (!prog->autoload) continue; /* adjust insn_idx from section frame of reference to the local @@ -5685,16 +5765,16 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) return -EINVAL; insn = &prog->insns[insn_idx]; - if (prog->obj->gen_loader) { - err = record_relo_core(prog, rec, insn_idx); - if (err) { - pr_warn("prog '%s': relo #%d: failed to record relocation: %d\n", - prog->name, i, err); - goto out; - } - continue; + err = record_relo_core(prog, rec, insn_idx); + if (err) { + pr_warn("prog '%s': relo #%d: failed to record relocation: %d\n", + prog->name, i, err); + goto out; } + if (prog->obj->gen_loader) + continue; + err = bpf_core_resolve_relo(prog, rec, i, obj->btf, cand_cache, &targ_res); if (err) { pr_warn("prog '%s': relo #%d: failed to relocate: %d\n", @@ -5834,14 +5914,13 @@ static int adjust_prog_btf_ext_info(const struct bpf_object *obj, void *rec, *rec_end, *new_prog_info; const struct btf_ext_info_sec *sec; size_t old_sz, new_sz; - const char *sec_name; - int i, off_adj; + int i, sec_num, sec_idx, off_adj; + sec_num = 0; for_each_btf_ext_sec(ext_info, sec) { - sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); - if (!sec_name) - return -EINVAL; - if (strcmp(sec_name, prog->sec_name) != 0) + sec_idx = ext_info->sec_idxs[sec_num]; + sec_num++; + if (prog->sec_idx != sec_idx) continue; for_each_btf_ext_rec(ext_info, sec, i, rec) { @@ -6236,7 +6315,6 @@ bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog) if (err) return err; - return 0; } @@ -6297,8 +6375,7 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) err); return err; } - if (obj->gen_loader) - bpf_object__sort_relos(obj); + bpf_object__sort_relos(obj); } /* Before relocating calls pre-process relocations and mark @@ -6334,7 +6411,7 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) */ if (prog_is_subprog(obj, prog)) continue; - if (!prog->load) + if (!prog->autoload) continue; err = bpf_object__relocate_calls(obj, prog); @@ -6349,7 +6426,7 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) prog = &obj->programs[i]; if (prog_is_subprog(obj, prog)) continue; - if (!prog->load) + if (!prog->autoload) continue; err = bpf_object__relocate_data(obj, prog); if (err) { @@ -6358,8 +6435,7 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) return err; } } - if (!obj->gen_loader) - bpf_object__free_relocs(obj); + return 0; } @@ -6636,6 +6712,8 @@ static int libbpf_prepare_prog_load(struct bpf_program *prog, return 0; } +static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz); + static int bpf_object_load_prog_instance(struct bpf_object *obj, struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, const char *license, __u32 kern_version, @@ -6782,6 +6860,10 @@ retry_load: goto retry_load; ret = -errno; + + /* post-process verifier log to improve error descriptions */ + fixup_verifier_log(prog, log_buf, log_buf_size); + cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, cp); pr_perm_msg(ret); @@ -6790,10 +6872,6 @@ retry_load: pr_warn("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n", prog->name, log_buf); } - if (insns_cnt >= BPF_MAXINSNS) { - pr_warn("prog '%s': program too large (%d insns), at most %d insns\n", - prog->name, insns_cnt, BPF_MAXINSNS); - } out: if (own_log_buf) @@ -6801,6 +6879,128 @@ out: return ret; } +static char *find_prev_line(char *buf, char *cur) +{ + char *p; + + if (cur == buf) /* end of a log buf */ + return NULL; + + p = cur - 1; + while (p - 1 >= buf && *(p - 1) != '\n') + p--; + + return p; +} + +static void patch_log(char *buf, size_t buf_sz, size_t log_sz, + char *orig, size_t orig_sz, const char *patch) +{ + /* size of the remaining log content to the right from the to-be-replaced part */ + size_t rem_sz = (buf + log_sz) - (orig + orig_sz); + size_t patch_sz = strlen(patch); + + if (patch_sz != orig_sz) { + /* If patch line(s) are longer than original piece of verifier log, + * shift log contents by (patch_sz - orig_sz) bytes to the right + * starting from after to-be-replaced part of the log. + * + * If patch line(s) are shorter than original piece of verifier log, + * shift log contents by (orig_sz - patch_sz) bytes to the left + * starting from after to-be-replaced part of the log + * + * We need to be careful about not overflowing available + * buf_sz capacity. If that's the case, we'll truncate the end + * of the original log, as necessary. + */ + if (patch_sz > orig_sz) { + if (orig + patch_sz >= buf + buf_sz) { + /* patch is big enough to cover remaining space completely */ + patch_sz -= (orig + patch_sz) - (buf + buf_sz) + 1; + rem_sz = 0; + } else if (patch_sz - orig_sz > buf_sz - log_sz) { + /* patch causes part of remaining log to be truncated */ + rem_sz -= (patch_sz - orig_sz) - (buf_sz - log_sz); + } + } + /* shift remaining log to the right by calculated amount */ + memmove(orig + patch_sz, orig + orig_sz, rem_sz); + } + + memcpy(orig, patch, patch_sz); +} + +static void fixup_log_failed_core_relo(struct bpf_program *prog, + char *buf, size_t buf_sz, size_t log_sz, + char *line1, char *line2, char *line3) +{ + /* Expected log for failed and not properly guarded CO-RE relocation: + * line1 -> 123: (85) call unknown#195896080 + * line2 -> invalid func unknown#195896080 + * line3 -> <anything else or end of buffer> + * + * "123" is the index of the instruction that was poisoned. We extract + * instruction index to find corresponding CO-RE relocation and + * replace this part of the log with more relevant information about + * failed CO-RE relocation. + */ + const struct bpf_core_relo *relo; + struct bpf_core_spec spec; + char patch[512], spec_buf[256]; + int insn_idx, err; + + if (sscanf(line1, "%d: (%*d) call unknown#195896080\n", &insn_idx) != 1) + return; + + relo = find_relo_core(prog, insn_idx); + if (!relo) + return; + + err = bpf_core_parse_spec(prog->name, prog->obj->btf, relo, &spec); + if (err) + return; + + bpf_core_format_spec(spec_buf, sizeof(spec_buf), &spec); + snprintf(patch, sizeof(patch), + "%d: <invalid CO-RE relocation>\n" + "failed to resolve CO-RE relocation %s\n", + insn_idx, spec_buf); + + patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch); +} + +static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz) +{ + /* look for familiar error patterns in last N lines of the log */ + const size_t max_last_line_cnt = 10; + char *prev_line, *cur_line, *next_line; + size_t log_sz; + int i; + + if (!buf) + return; + + log_sz = strlen(buf) + 1; + next_line = buf + log_sz - 1; + + for (i = 0; i < max_last_line_cnt; i++, next_line = cur_line) { + cur_line = find_prev_line(buf, next_line); + if (!cur_line) + return; + + /* failed CO-RE relocation case */ + if (str_has_pfx(cur_line, "invalid func unknown#195896080\n")) { + prev_line = find_prev_line(buf, cur_line); + if (!prev_line) + continue; + + fixup_log_failed_core_relo(prog, buf, buf_sz, log_sz, + prev_line, cur_line, next_line); + return; + } + } +} + static int bpf_program_record_relos(struct bpf_program *prog) { struct bpf_object *obj = prog->obj; @@ -6946,7 +7146,7 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level) prog = &obj->programs[i]; if (prog_is_subprog(obj, prog)) continue; - if (!prog->load) { + if (!prog->autoload) { pr_debug("prog '%s': skipped loading\n", prog->name); continue; } @@ -6955,8 +7155,8 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level) if (err) return err; } - if (obj->gen_loader) - bpf_object__free_relocs(obj); + + bpf_object__free_relocs(obj); return 0; } @@ -6976,8 +7176,8 @@ static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object continue; } - bpf_program__set_type(prog, prog->sec_def->prog_type); - bpf_program__set_expected_attach_type(prog, prog->sec_def->expected_attach_type); + prog->type = prog->sec_def->prog_type; + prog->expected_attach_type = prog->sec_def->expected_attach_type; #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" @@ -8200,6 +8400,9 @@ void bpf_object__close(struct bpf_object *obj) if (obj->clear_priv) obj->clear_priv(obj, obj->priv); + usdt_manager_free(obj->usdt_man); + obj->usdt_man = NULL; + bpf_gen__free(obj->gen_loader); bpf_object__elf_finish(obj); bpf_object_unload(obj); @@ -8423,7 +8626,7 @@ const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy) bool bpf_program__autoload(const struct bpf_program *prog) { - return prog->load; + return prog->autoload; } int bpf_program__set_autoload(struct bpf_program *prog, bool autoload) @@ -8431,7 +8634,7 @@ int bpf_program__set_autoload(struct bpf_program *prog, bool autoload) if (prog->obj->loaded) return libbpf_err(-EINVAL); - prog->load = autoload; + prog->autoload = autoload; return 0; } @@ -8519,9 +8722,13 @@ enum bpf_prog_type bpf_program__type(const struct bpf_program *prog) return prog->type; } -void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type) +int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type) { + if (prog->obj->loaded) + return libbpf_err(-EBUSY); + prog->type = type; + return 0; } static bool bpf_program__is_type(const struct bpf_program *prog, @@ -8535,8 +8742,7 @@ int bpf_program__set_##NAME(struct bpf_program *prog) \ { \ if (!prog) \ return libbpf_err(-EINVAL); \ - bpf_program__set_type(prog, TYPE); \ - return 0; \ + return bpf_program__set_type(prog, TYPE); \ } \ \ bool bpf_program__is_##NAME(const struct bpf_program *prog) \ @@ -8566,10 +8772,14 @@ enum bpf_attach_type bpf_program__expected_attach_type(const struct bpf_program return prog->expected_attach_type; } -void bpf_program__set_expected_attach_type(struct bpf_program *prog, +int bpf_program__set_expected_attach_type(struct bpf_program *prog, enum bpf_attach_type type) { + if (prog->obj->loaded) + return libbpf_err(-EBUSY); + prog->expected_attach_type = type; + return 0; } __u32 bpf_program__flags(const struct bpf_program *prog) @@ -8630,6 +8840,8 @@ int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log } static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link); +static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link); +static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link); static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link); static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link); static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link); @@ -8642,11 +8854,12 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE | SEC_SLOPPY_PFX), SEC_DEF("sk_reuseport", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE | SEC_SLOPPY_PFX), SEC_DEF("kprobe/", KPROBE, 0, SEC_NONE, attach_kprobe), - SEC_DEF("uprobe/", KPROBE, 0, SEC_NONE), + SEC_DEF("uprobe+", KPROBE, 0, SEC_NONE, attach_uprobe), SEC_DEF("kretprobe/", KPROBE, 0, SEC_NONE, attach_kprobe), - SEC_DEF("uretprobe/", KPROBE, 0, SEC_NONE), + SEC_DEF("uretprobe+", KPROBE, 0, SEC_NONE, attach_uprobe), SEC_DEF("kprobe.multi/", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), SEC_DEF("kretprobe.multi/", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), + SEC_DEF("usdt+", KPROBE, 0, SEC_NONE, attach_usdt), SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE), SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE | SEC_SLOPPY_PFX | SEC_DEPRECATED), SEC_DEF("action", SCHED_ACT, 0, SEC_NONE | SEC_SLOPPY_PFX), @@ -9636,9 +9849,8 @@ static int bpf_prog_load_xattr2(const struct bpf_prog_load_attr *attr, * bpf_object__open guessed */ if (attr->prog_type != BPF_PROG_TYPE_UNSPEC) { - bpf_program__set_type(prog, attr->prog_type); - bpf_program__set_expected_attach_type(prog, - attach_type); + prog->type = attr->prog_type; + prog->expected_attach_type = attach_type; } if (bpf_program__type(prog) == BPF_PROG_TYPE_UNSPEC) { /* @@ -9692,14 +9904,6 @@ int bpf_prog_load_deprecated(const char *file, enum bpf_prog_type type, return bpf_prog_load_xattr2(&attr, pobj, prog_fd); } -struct bpf_link { - int (*detach)(struct bpf_link *link); - void (*dealloc)(struct bpf_link *link); - char *pin_path; /* NULL, if not pinned */ - int fd; /* hook FD, -1 if not applicable */ - bool disconnected; -}; - /* Replace link's underlying BPF program with the new one */ int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog) { @@ -10517,6 +10721,273 @@ static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe, return pfd; } +/* uprobes deal in relative offsets; subtract the base address associated with + * the mapped binary. See Documentation/trace/uprobetracer.rst for more + * details. + */ +static long elf_find_relative_offset(const char *filename, Elf *elf, long addr) +{ + size_t n; + int i; + + if (elf_getphdrnum(elf, &n)) { + pr_warn("elf: failed to find program headers for '%s': %s\n", filename, + elf_errmsg(-1)); + return -ENOENT; + } + + for (i = 0; i < n; i++) { + int seg_start, seg_end, seg_offset; + GElf_Phdr phdr; + + if (!gelf_getphdr(elf, i, &phdr)) { + pr_warn("elf: failed to get program header %d from '%s': %s\n", i, filename, + elf_errmsg(-1)); + return -ENOENT; + } + if (phdr.p_type != PT_LOAD || !(phdr.p_flags & PF_X)) + continue; + + seg_start = phdr.p_vaddr; + seg_end = seg_start + phdr.p_memsz; + seg_offset = phdr.p_offset; + if (addr >= seg_start && addr < seg_end) + return addr - seg_start + seg_offset; + } + pr_warn("elf: failed to find prog header containing 0x%lx in '%s'\n", addr, filename); + return -ENOENT; +} + +/* Return next ELF section of sh_type after scn, or first of that type if scn is NULL. */ +static Elf_Scn *elf_find_next_scn_by_type(Elf *elf, int sh_type, Elf_Scn *scn) +{ + while ((scn = elf_nextscn(elf, scn)) != NULL) { + GElf_Shdr sh; + + if (!gelf_getshdr(scn, &sh)) + continue; + if (sh.sh_type == sh_type) + return scn; + } + return NULL; +} + +/* Find offset of function name in object specified by path. "name" matches + * symbol name or name@@LIB for library functions. + */ +static long elf_find_func_offset(const char *binary_path, const char *name) +{ + int fd, i, sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB }; + bool is_shared_lib, is_name_qualified; + char errmsg[STRERR_BUFSIZE]; + long ret = -ENOENT; + size_t name_len; + GElf_Ehdr ehdr; + Elf *elf; + + fd = open(binary_path, O_RDONLY | O_CLOEXEC); + if (fd < 0) { + ret = -errno; + pr_warn("failed to open %s: %s\n", binary_path, + libbpf_strerror_r(ret, errmsg, sizeof(errmsg))); + return ret; + } + elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); + if (!elf) { + pr_warn("elf: could not read elf from %s: %s\n", binary_path, elf_errmsg(-1)); + close(fd); + return -LIBBPF_ERRNO__FORMAT; + } + if (!gelf_getehdr(elf, &ehdr)) { + pr_warn("elf: failed to get ehdr from %s: %s\n", binary_path, elf_errmsg(-1)); + ret = -LIBBPF_ERRNO__FORMAT; + goto out; + } + /* for shared lib case, we do not need to calculate relative offset */ + is_shared_lib = ehdr.e_type == ET_DYN; + + name_len = strlen(name); + /* Does name specify "@@LIB"? */ + is_name_qualified = strstr(name, "@@") != NULL; + + /* Search SHT_DYNSYM, SHT_SYMTAB for symbol. This search order is used because if + * a binary is stripped, it may only have SHT_DYNSYM, and a fully-statically + * linked binary may not have SHT_DYMSYM, so absence of a section should not be + * reported as a warning/error. + */ + for (i = 0; i < ARRAY_SIZE(sh_types); i++) { + size_t nr_syms, strtabidx, idx; + Elf_Data *symbols = NULL; + Elf_Scn *scn = NULL; + int last_bind = -1; + const char *sname; + GElf_Shdr sh; + + scn = elf_find_next_scn_by_type(elf, sh_types[i], NULL); + if (!scn) { + pr_debug("elf: failed to find symbol table ELF sections in '%s'\n", + binary_path); + continue; + } + if (!gelf_getshdr(scn, &sh)) + continue; + strtabidx = sh.sh_link; + symbols = elf_getdata(scn, 0); + if (!symbols) { + pr_warn("elf: failed to get symbols for symtab section in '%s': %s\n", + binary_path, elf_errmsg(-1)); + ret = -LIBBPF_ERRNO__FORMAT; + goto out; + } + nr_syms = symbols->d_size / sh.sh_entsize; + + for (idx = 0; idx < nr_syms; idx++) { + int curr_bind; + GElf_Sym sym; + + if (!gelf_getsym(symbols, idx, &sym)) + continue; + + if (GELF_ST_TYPE(sym.st_info) != STT_FUNC) + continue; + + sname = elf_strptr(elf, strtabidx, sym.st_name); + if (!sname) + continue; + + curr_bind = GELF_ST_BIND(sym.st_info); + + /* User can specify func, func@@LIB or func@@LIB_VERSION. */ + if (strncmp(sname, name, name_len) != 0) + continue; + /* ...but we don't want a search for "foo" to match 'foo2" also, so any + * additional characters in sname should be of the form "@@LIB". + */ + if (!is_name_qualified && sname[name_len] != '\0' && sname[name_len] != '@') + continue; + + if (ret >= 0) { + /* handle multiple matches */ + if (last_bind != STB_WEAK && curr_bind != STB_WEAK) { + /* Only accept one non-weak bind. */ + pr_warn("elf: ambiguous match for '%s', '%s' in '%s'\n", + sname, name, binary_path); + ret = -LIBBPF_ERRNO__FORMAT; + goto out; + } else if (curr_bind == STB_WEAK) { + /* already have a non-weak bind, and + * this is a weak bind, so ignore. + */ + continue; + } + } + ret = sym.st_value; + last_bind = curr_bind; + } + /* For binaries that are not shared libraries, we need relative offset */ + if (ret > 0 && !is_shared_lib) + ret = elf_find_relative_offset(binary_path, elf, ret); + if (ret > 0) + break; + } + + if (ret > 0) { + pr_debug("elf: symbol address match for '%s' in '%s': 0x%lx\n", name, binary_path, + ret); + } else { + if (ret == 0) { + pr_warn("elf: '%s' is 0 in symtab for '%s': %s\n", name, binary_path, + is_shared_lib ? "should not be 0 in a shared library" : + "try using shared library path instead"); + ret = -ENOENT; + } else { + pr_warn("elf: failed to find symbol '%s' in '%s'\n", name, binary_path); + } + } +out: + elf_end(elf); + close(fd); + return ret; +} + +static const char *arch_specific_lib_paths(void) +{ + /* + * Based on https://packages.debian.org/sid/libc6. + * + * Assume that the traced program is built for the same architecture + * as libbpf, which should cover the vast majority of cases. + */ +#if defined(__x86_64__) + return "/lib/x86_64-linux-gnu"; +#elif defined(__i386__) + return "/lib/i386-linux-gnu"; +#elif defined(__s390x__) + return "/lib/s390x-linux-gnu"; +#elif defined(__s390__) + return "/lib/s390-linux-gnu"; +#elif defined(__arm__) && defined(__SOFTFP__) + return "/lib/arm-linux-gnueabi"; +#elif defined(__arm__) && !defined(__SOFTFP__) + return "/lib/arm-linux-gnueabihf"; +#elif defined(__aarch64__) + return "/lib/aarch64-linux-gnu"; +#elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 64 + return "/lib/mips64el-linux-gnuabi64"; +#elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 32 + return "/lib/mipsel-linux-gnu"; +#elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return "/lib/powerpc64le-linux-gnu"; +#elif defined(__sparc__) && defined(__arch64__) + return "/lib/sparc64-linux-gnu"; +#elif defined(__riscv) && __riscv_xlen == 64 + return "/lib/riscv64-linux-gnu"; +#else + return NULL; +#endif +} + +/* Get full path to program/shared library. */ +static int resolve_full_path(const char *file, char *result, size_t result_sz) +{ + const char *search_paths[3] = {}; + int i; + + if (str_has_sfx(file, ".so") || strstr(file, ".so.")) { + search_paths[0] = getenv("LD_LIBRARY_PATH"); + search_paths[1] = "/usr/lib64:/usr/lib"; + search_paths[2] = arch_specific_lib_paths(); + } else { + search_paths[0] = getenv("PATH"); + search_paths[1] = "/usr/bin:/usr/sbin"; + } + + for (i = 0; i < ARRAY_SIZE(search_paths); i++) { + const char *s; + + if (!search_paths[i]) + continue; + for (s = search_paths[i]; s != NULL; s = strchr(s, ':')) { + char *next_path; + int seg_len; + + if (s[0] == ':') + s++; + next_path = strchr(s, ':'); + seg_len = next_path ? next_path - s : strlen(s); + if (!seg_len) + continue; + snprintf(result, result_sz, "%.*s/%s", seg_len, s, file); + /* ensure it is an executable file/link */ + if (access(result, R_OK | X_OK) < 0) + continue; + pr_debug("resolved '%s' to '%s'\n", file, result); + return 0; + } + } + return -ENOENT; +} + LIBBPF_API struct bpf_link * bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, const char *binary_path, size_t func_offset, @@ -10524,10 +10995,12 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, { DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); char errmsg[STRERR_BUFSIZE], *legacy_probe = NULL; + char full_binary_path[PATH_MAX]; struct bpf_link *link; size_t ref_ctr_off; int pfd, err; bool retprobe, legacy; + const char *func_name; if (!OPTS_VALID(opts, bpf_uprobe_opts)) return libbpf_err_ptr(-EINVAL); @@ -10536,12 +11009,37 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0); pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); + if (binary_path && !strchr(binary_path, '/')) { + err = resolve_full_path(binary_path, full_binary_path, + sizeof(full_binary_path)); + if (err) { + pr_warn("prog '%s': failed to resolve full path for '%s': %d\n", + prog->name, binary_path, err); + return libbpf_err_ptr(err); + } + binary_path = full_binary_path; + } + func_name = OPTS_GET(opts, func_name, NULL); + if (func_name) { + long sym_off; + + if (!binary_path) { + pr_warn("prog '%s': name-based attach requires binary_path\n", + prog->name); + return libbpf_err_ptr(-EINVAL); + } + sym_off = elf_find_func_offset(binary_path, func_name); + if (sym_off < 0) + return libbpf_err_ptr(sym_off); + func_offset += sym_off; + } + legacy = determine_uprobe_perf_type() < 0; if (!legacy) { pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path, func_offset, pid, ref_ctr_off); } else { - char probe_name[512]; + char probe_name[PATH_MAX + 64]; if (ref_ctr_off) return libbpf_err_ptr(-EINVAL); @@ -10589,6 +11087,60 @@ err_out: } +/* Format of u[ret]probe section definition supporting auto-attach: + * u[ret]probe/binary:function[+offset] + * + * binary can be an absolute/relative path or a filename; the latter is resolved to a + * full binary path via bpf_program__attach_uprobe_opts. + * + * Specifying uprobe+ ensures we carry out strict matching; either "uprobe" must be + * specified (and auto-attach is not possible) or the above format is specified for + * auto-attach. + */ +static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link) +{ + DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts); + char *probe_type = NULL, *binary_path = NULL, *func_name = NULL; + int n, ret = -EINVAL; + long offset = 0; + + *link = NULL; + + n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[a-zA-Z0-9_.]+%li", + &probe_type, &binary_path, &func_name, &offset); + switch (n) { + case 1: + /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */ + ret = 0; + break; + case 2: + pr_warn("prog '%s': section '%s' missing ':function[+offset]' specification\n", + prog->name, prog->sec_name); + break; + case 3: + case 4: + opts.retprobe = strcmp(probe_type, "uretprobe") == 0; + if (opts.retprobe && offset != 0) { + pr_warn("prog '%s': uretprobes do not support offset specification\n", + prog->name); + break; + } + opts.func_name = func_name; + *link = bpf_program__attach_uprobe_opts(prog, -1, binary_path, offset, &opts); + ret = libbpf_get_error(*link); + break; + default: + pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name, + prog->sec_name); + break; + } + free(probe_type); + free(binary_path); + free(func_name); + + return ret; +} + struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog, bool retprobe, pid_t pid, const char *binary_path, @@ -10599,6 +11151,85 @@ struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog, return bpf_program__attach_uprobe_opts(prog, pid, binary_path, func_offset, &opts); } +struct bpf_link *bpf_program__attach_usdt(const struct bpf_program *prog, + pid_t pid, const char *binary_path, + const char *usdt_provider, const char *usdt_name, + const struct bpf_usdt_opts *opts) +{ + char resolved_path[512]; + struct bpf_object *obj = prog->obj; + struct bpf_link *link; + __u64 usdt_cookie; + int err; + + if (!OPTS_VALID(opts, bpf_uprobe_opts)) + return libbpf_err_ptr(-EINVAL); + + if (bpf_program__fd(prog) < 0) { + pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n", + prog->name); + return libbpf_err_ptr(-EINVAL); + } + + if (!strchr(binary_path, '/')) { + err = resolve_full_path(binary_path, resolved_path, sizeof(resolved_path)); + if (err) { + pr_warn("prog '%s': failed to resolve full path for '%s': %d\n", + prog->name, binary_path, err); + return libbpf_err_ptr(err); + } + binary_path = resolved_path; + } + + /* USDT manager is instantiated lazily on first USDT attach. It will + * be destroyed together with BPF object in bpf_object__close(). + */ + if (IS_ERR(obj->usdt_man)) + return libbpf_ptr(obj->usdt_man); + if (!obj->usdt_man) { + obj->usdt_man = usdt_manager_new(obj); + if (IS_ERR(obj->usdt_man)) + return libbpf_ptr(obj->usdt_man); + } + + usdt_cookie = OPTS_GET(opts, usdt_cookie, 0); + link = usdt_manager_attach_usdt(obj->usdt_man, prog, pid, binary_path, + usdt_provider, usdt_name, usdt_cookie); + err = libbpf_get_error(link); + if (err) + return libbpf_err_ptr(err); + return link; +} + +static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link) +{ + char *path = NULL, *provider = NULL, *name = NULL; + const char *sec_name; + int n, err; + + sec_name = bpf_program__section_name(prog); + if (strcmp(sec_name, "usdt") == 0) { + /* no auto-attach for just SEC("usdt") */ + *link = NULL; + return 0; + } + + n = sscanf(sec_name, "usdt/%m[^:]:%m[^:]:%m[^:]", &path, &provider, &name); + if (n != 3) { + pr_warn("invalid section '%s', expected SEC(\"usdt/<path>:<provider>:<name>\")\n", + sec_name); + err = -EINVAL; + } else { + *link = bpf_program__attach_usdt(prog, -1 /* any process */, path, + provider, name, NULL); + err = libbpf_get_error(*link); + } + free(path); + free(provider); + free(name); + return err; +} + static int determine_tracepoint_id(const char *tp_category, const char *tp_name) { @@ -10791,7 +11422,8 @@ static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *pro return libbpf_err_ptr(-ENOMEM); link->detach = &bpf_link__detach_fd; - pfd = bpf_raw_tracepoint_open(NULL, prog_fd); + /* libbpf is smart enough to redirect to BPF_RAW_TRACEPOINT_OPEN on old kernels */ + pfd = bpf_link_create(prog_fd, 0, bpf_program__expected_attach_type(prog), NULL); if (pfd < 0) { pfd = -errno; free(link); @@ -10800,7 +11432,7 @@ static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *pro return libbpf_err_ptr(pfd); } link->fd = pfd; - return (struct bpf_link *)link; + return link; } struct bpf_link *bpf_program__attach_trace(const struct bpf_program *prog) @@ -12211,7 +12843,7 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) struct bpf_program *prog = *s->progs[i].prog; struct bpf_link **link = s->progs[i].link; - if (!prog->load) + if (!prog->autoload) continue; /* auto-attaching not supported for this program */ diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 05dde85e19a6..cdbfee60ea3e 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -378,7 +378,31 @@ struct bpf_link; LIBBPF_API struct bpf_link *bpf_link__open(const char *path); LIBBPF_API int bpf_link__fd(const struct bpf_link *link); LIBBPF_API const char *bpf_link__pin_path(const struct bpf_link *link); +/** + * @brief **bpf_link__pin()** pins the BPF link to a file + * in the BPF FS specified by a path. This increments the links + * reference count, allowing it to stay loaded after the process + * which loaded it has exited. + * + * @param link BPF link to pin, must already be loaded + * @param path file path in a BPF file system + * @return 0, on success; negative error code, otherwise + */ + LIBBPF_API int bpf_link__pin(struct bpf_link *link, const char *path); + +/** + * @brief **bpf_link__unpin()** unpins the BPF link from a file + * in the BPFFS specified by a path. This decrements the links + * reference count. + * + * The file pinning the BPF link can also be unlinked by a different + * process in which case this function will return an error. + * + * @param prog BPF program to unpin + * @param path file path to the pin in a BPF file system + * @return 0, on success; negative error code, otherwise + */ LIBBPF_API int bpf_link__unpin(struct bpf_link *link); LIBBPF_API int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog); @@ -386,6 +410,22 @@ LIBBPF_API void bpf_link__disconnect(struct bpf_link *link); LIBBPF_API int bpf_link__detach(struct bpf_link *link); LIBBPF_API int bpf_link__destroy(struct bpf_link *link); +/** + * @brief **bpf_program__attach()** is a generic function for attaching + * a BPF program based on auto-detection of program type, attach type, + * and extra paremeters, where applicable. + * + * @param prog BPF program to attach + * @return Reference to the newly created BPF link; or NULL is returned on error, + * error code is stored in errno + * + * This is supported for: + * - kprobe/kretprobe (depends on SEC() definition) + * - uprobe/uretprobe (depends on SEC() definition) + * - tracepoint + * - raw tracepoint + * - tracing programs (typed raw TP/fentry/fexit/fmod_ret) + */ LIBBPF_API struct bpf_link * bpf_program__attach(const struct bpf_program *prog); @@ -459,9 +499,17 @@ struct bpf_uprobe_opts { __u64 bpf_cookie; /* uprobe is return probe, invoked at function return time */ bool retprobe; + /* Function name to attach to. Could be an unqualified ("abc") or library-qualified + * "abc@LIBXYZ" name. To specify function entry, func_name should be set while + * func_offset argument to bpf_prog__attach_uprobe_opts() should be 0. To trace an + * offset within a function, specify func_name and use func_offset argument to specify + * offset within the function. Shared library functions must specify the shared library + * binary_path. + */ + const char *func_name; size_t :0; }; -#define bpf_uprobe_opts__last_field retprobe +#define bpf_uprobe_opts__last_field func_name /** * @brief **bpf_program__attach_uprobe()** attaches a BPF program @@ -503,6 +551,37 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, const char *binary_path, size_t func_offset, const struct bpf_uprobe_opts *opts); +struct bpf_usdt_opts { + /* size of this struct, for forward/backward compatibility */ + size_t sz; + /* custom user-provided value accessible through usdt_cookie() */ + __u64 usdt_cookie; + size_t :0; +}; +#define bpf_usdt_opts__last_field usdt_cookie + +/** + * @brief **bpf_program__attach_usdt()** is just like + * bpf_program__attach_uprobe_opts() except it covers USDT (User-space + * Statically Defined Tracepoint) attachment, instead of attaching to + * user-space function entry or exit. + * + * @param prog BPF program to attach + * @param pid Process ID to attach the uprobe to, 0 for self (own process), + * -1 for all processes + * @param binary_path Path to binary that contains provided USDT probe + * @param usdt_provider USDT provider name + * @param usdt_name USDT probe name + * @param opts Options for altering program attachment + * @return Reference to the newly created BPF link; or NULL is returned on error, + * error code is stored in errno + */ +LIBBPF_API struct bpf_link * +bpf_program__attach_usdt(const struct bpf_program *prog, + pid_t pid, const char *binary_path, + const char *usdt_provider, const char *usdt_name, + const struct bpf_usdt_opts *opts); + struct bpf_tracepoint_opts { /* size of this struct, for forward/backward compatiblity */ size_t sz; @@ -647,12 +726,37 @@ LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead") LIBBPF_API int bpf_program__set_sk_lookup(struct bpf_program *prog); LIBBPF_API enum bpf_prog_type bpf_program__type(const struct bpf_program *prog); -LIBBPF_API void bpf_program__set_type(struct bpf_program *prog, - enum bpf_prog_type type); + +/** + * @brief **bpf_program__set_type()** sets the program + * type of the passed BPF program. + * @param prog BPF program to set the program type for + * @param type program type to set the BPF map to have + * @return error code; or 0 if no error. An error occurs + * if the object is already loaded. + * + * This must be called before the BPF object is loaded, + * otherwise it has no effect and an error is returned. + */ +LIBBPF_API int bpf_program__set_type(struct bpf_program *prog, + enum bpf_prog_type type); LIBBPF_API enum bpf_attach_type bpf_program__expected_attach_type(const struct bpf_program *prog); -LIBBPF_API void + +/** + * @brief **bpf_program__set_expected_attach_type()** sets the + * attach type of the passed BPF program. This is used for + * auto-detection of attachment when programs are loaded. + * @param prog BPF program to set the attach type for + * @param type attach type to set the BPF map to have + * @return error code; or 0 if no error. An error occurs + * if the object is already loaded. + * + * This must be called before the BPF object is loaded, + * otherwise it has no effect and an error is returned. + */ +LIBBPF_API int bpf_program__set_expected_attach_type(struct bpf_program *prog, enum bpf_attach_type type); @@ -668,6 +772,17 @@ LIBBPF_API int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_le LIBBPF_API const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size); LIBBPF_API int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size); +/** + * @brief **bpf_program__set_attach_target()** sets BTF-based attach target + * for supported BPF program types: + * - BTF-aware raw tracepoints (tp_btf); + * - fentry/fexit/fmod_ret; + * - lsm; + * - freplace. + * @param prog BPF program to set the attach type for + * @param type attach type to set the BPF map to have + * @return error code; or 0 if no error occurred. + */ LIBBPF_API int bpf_program__set_attach_target(struct bpf_program *prog, int attach_prog_fd, const char *attach_func_name); diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index dd35ee58bfaa..82f6d62176dd 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -444,6 +444,7 @@ LIBBPF_0.8.0 { global: bpf_object__destroy_subskeleton; bpf_object__open_subskeleton; + bpf_program__attach_usdt; libbpf_register_prog_handler; libbpf_unregister_prog_handler; bpf_program__attach_kprobe_multi_opts; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index b6247dc7f8eb..4abdbe2fea9d 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -103,6 +103,17 @@ #define str_has_pfx(str, pfx) \ (strncmp(str, pfx, __builtin_constant_p(pfx) ? sizeof(pfx) - 1 : strlen(pfx)) == 0) +/* suffix check */ +static inline bool str_has_sfx(const char *str, const char *sfx) +{ + size_t str_len = strlen(str); + size_t sfx_len = strlen(sfx); + + if (sfx_len <= str_len) + return strcmp(str + str_len - sfx_len, sfx); + return false; +} + /* Symbol versioning is different between static and shared library. * Properly versioned symbols are needed for shared library, but * only the symbol of the new version is needed for static library. @@ -148,6 +159,15 @@ do { \ #ifndef __has_builtin #define __has_builtin(x) 0 #endif + +struct bpf_link { + int (*detach)(struct bpf_link *link); + void (*dealloc)(struct bpf_link *link); + char *pin_path; /* NULL, if not pinned */ + int fd; /* hook FD, -1 if not applicable */ + bool disconnected; +}; + /* * Re-implement glibc's reallocarray() for libbpf internal-only use. * reallocarray(), unfortunately, is not available in all versions of glibc, @@ -329,6 +349,8 @@ enum kern_feature_id { FEAT_BTF_TYPE_TAG, /* memcg-based accounting for BPF maps and progs */ FEAT_MEMCG_ACCOUNT, + /* BPF cookie (bpf_get_attach_cookie() BPF helper) support */ + FEAT_BPF_COOKIE, __FEAT_CNT, }; @@ -354,6 +376,13 @@ struct btf_ext_info { void *info; __u32 rec_size; __u32 len; + /* optional (maintained internally by libbpf) mapping between .BTF.ext + * section and corresponding ELF section. This is used to join + * information like CO-RE relocation records with corresponding BPF + * programs defined in ELF sections + */ + __u32 *sec_idxs; + int sec_cnt; }; #define for_each_btf_ext_sec(seg, sec) \ @@ -543,4 +572,12 @@ int bpf_core_add_cands(struct bpf_core_cand *local_cand, struct bpf_core_cand_list *cands); void bpf_core_free_cands(struct bpf_core_cand_list *cands); +struct usdt_manager *usdt_manager_new(struct bpf_object *obj); +void usdt_manager_free(struct usdt_manager *man); +struct bpf_link * usdt_manager_attach_usdt(struct usdt_manager *man, + const struct bpf_program *prog, + pid_t pid, const char *path, + const char *usdt_provider, const char *usdt_name, + __u64 usdt_cookie); + #endif /* __LIBBPF_LIBBPF_INTERNAL_H */ diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c index f946f23eab20..ba4453dfd1ed 100644 --- a/tools/lib/bpf/relo_core.c +++ b/tools/lib/bpf/relo_core.c @@ -178,29 +178,28 @@ static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind) * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access * string to specify enumerator's value index that need to be relocated. */ -static int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, - __u32 type_id, - const char *spec_str, - enum bpf_core_relo_kind relo_kind, - struct bpf_core_spec *spec) +int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, + const struct bpf_core_relo *relo, + struct bpf_core_spec *spec) { int access_idx, parsed_len, i; struct bpf_core_accessor *acc; const struct btf_type *t; - const char *name; + const char *name, *spec_str; __u32 id; __s64 sz; + spec_str = btf__name_by_offset(btf, relo->access_str_off); if (str_is_empty(spec_str) || *spec_str == ':') return -EINVAL; memset(spec, 0, sizeof(*spec)); spec->btf = btf; - spec->root_type_id = type_id; - spec->relo_kind = relo_kind; + spec->root_type_id = relo->type_id; + spec->relo_kind = relo->kind; /* type-based relocations don't have a field access string */ - if (core_relo_is_type_based(relo_kind)) { + if (core_relo_is_type_based(relo->kind)) { if (strcmp(spec_str, "0")) return -EINVAL; return 0; @@ -221,7 +220,7 @@ static int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, if (spec->raw_len == 0) return -EINVAL; - t = skip_mods_and_typedefs(btf, type_id, &id); + t = skip_mods_and_typedefs(btf, relo->type_id, &id); if (!t) return -EINVAL; @@ -231,7 +230,7 @@ static int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, acc->idx = access_idx; spec->len++; - if (core_relo_is_enumval_based(relo_kind)) { + if (core_relo_is_enumval_based(relo->kind)) { if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t)) return -EINVAL; @@ -240,7 +239,7 @@ static int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, return 0; } - if (!core_relo_is_field_based(relo_kind)) + if (!core_relo_is_field_based(relo->kind)) return -EINVAL; sz = btf__resolve_size(btf, id); @@ -301,7 +300,7 @@ static int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, spec->bit_offset += access_idx * sz * 8; } else { pr_warn("prog '%s': relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n", - prog_name, type_id, spec_str, i, id, btf_kind_str(t)); + prog_name, relo->type_id, spec_str, i, id, btf_kind_str(t)); return -EINVAL; } } @@ -1055,51 +1054,66 @@ poison: * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>, * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b */ -static void bpf_core_dump_spec(const char *prog_name, int level, const struct bpf_core_spec *spec) +int bpf_core_format_spec(char *buf, size_t buf_sz, const struct bpf_core_spec *spec) { const struct btf_type *t; const struct btf_enum *e; const char *s; __u32 type_id; - int i; + int i, len = 0; + +#define append_buf(fmt, args...) \ + ({ \ + int r; \ + r = snprintf(buf, buf_sz, fmt, ##args); \ + len += r; \ + if (r >= buf_sz) \ + r = buf_sz; \ + buf += r; \ + buf_sz -= r; \ + }) type_id = spec->root_type_id; t = btf_type_by_id(spec->btf, type_id); s = btf__name_by_offset(spec->btf, t->name_off); - libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s); + append_buf("<%s> [%u] %s %s", + core_relo_kind_str(spec->relo_kind), + type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s); if (core_relo_is_type_based(spec->relo_kind)) - return; + return len; if (core_relo_is_enumval_based(spec->relo_kind)) { t = skip_mods_and_typedefs(spec->btf, type_id, NULL); e = btf_enum(t) + spec->raw_spec[0]; s = btf__name_by_offset(spec->btf, e->name_off); - libbpf_print(level, "::%s = %u", s, e->val); - return; + append_buf("::%s = %u", s, e->val); + return len; } if (core_relo_is_field_based(spec->relo_kind)) { for (i = 0; i < spec->len; i++) { if (spec->spec[i].name) - libbpf_print(level, ".%s", spec->spec[i].name); + append_buf(".%s", spec->spec[i].name); else if (i > 0 || spec->spec[i].idx > 0) - libbpf_print(level, "[%u]", spec->spec[i].idx); + append_buf("[%u]", spec->spec[i].idx); } - libbpf_print(level, " ("); + append_buf(" ("); for (i = 0; i < spec->raw_len; i++) - libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]); + append_buf("%s%d", i == 0 ? "" : ":", spec->raw_spec[i]); if (spec->bit_offset % 8) - libbpf_print(level, " @ offset %u.%u)", - spec->bit_offset / 8, spec->bit_offset % 8); + append_buf(" @ offset %u.%u)", spec->bit_offset / 8, spec->bit_offset % 8); else - libbpf_print(level, " @ offset %u)", spec->bit_offset / 8); - return; + append_buf(" @ offset %u)", spec->bit_offset / 8); + return len; } + + return len; +#undef append_buf } /* @@ -1167,7 +1181,7 @@ int bpf_core_calc_relo_insn(const char *prog_name, const struct btf_type *local_type; const char *local_name; __u32 local_id; - const char *spec_str; + char spec_buf[256]; int i, j, err; local_id = relo->type_id; @@ -1176,24 +1190,20 @@ int bpf_core_calc_relo_insn(const char *prog_name, if (!local_name) return -EINVAL; - spec_str = btf__name_by_offset(local_btf, relo->access_str_off); - if (str_is_empty(spec_str)) - return -EINVAL; - - err = bpf_core_parse_spec(prog_name, local_btf, local_id, spec_str, - relo->kind, local_spec); + err = bpf_core_parse_spec(prog_name, local_btf, relo, local_spec); if (err) { + const char *spec_str; + + spec_str = btf__name_by_offset(local_btf, relo->access_str_off); pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n", prog_name, relo_idx, local_id, btf_kind_str(local_type), str_is_empty(local_name) ? "<anon>" : local_name, - spec_str, err); + spec_str ?: "<?>", err); return -EINVAL; } - pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog_name, - relo_idx, core_relo_kind_str(relo->kind), relo->kind); - bpf_core_dump_spec(prog_name, LIBBPF_DEBUG, local_spec); - libbpf_print(LIBBPF_DEBUG, "\n"); + bpf_core_format_spec(spec_buf, sizeof(spec_buf), local_spec); + pr_debug("prog '%s': relo #%d: %s\n", prog_name, relo_idx, spec_buf); /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */ if (relo->kind == BPF_CORE_TYPE_ID_LOCAL) { @@ -1207,7 +1217,7 @@ int bpf_core_calc_relo_insn(const char *prog_name, } /* libbpf doesn't support candidate search for anonymous types */ - if (str_is_empty(spec_str)) { + if (str_is_empty(local_name)) { pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n", prog_name, relo_idx, core_relo_kind_str(relo->kind), relo->kind); return -EOPNOTSUPP; @@ -1217,17 +1227,15 @@ int bpf_core_calc_relo_insn(const char *prog_name, err = bpf_core_spec_match(local_spec, cands->cands[i].btf, cands->cands[i].id, cand_spec); if (err < 0) { - pr_warn("prog '%s': relo #%d: error matching candidate #%d ", - prog_name, relo_idx, i); - bpf_core_dump_spec(prog_name, LIBBPF_WARN, cand_spec); - libbpf_print(LIBBPF_WARN, ": %d\n", err); + bpf_core_format_spec(spec_buf, sizeof(spec_buf), cand_spec); + pr_warn("prog '%s': relo #%d: error matching candidate #%d %s: %d\n ", + prog_name, relo_idx, i, spec_buf, err); return err; } - pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog_name, - relo_idx, err == 0 ? "non-matching" : "matching", i); - bpf_core_dump_spec(prog_name, LIBBPF_DEBUG, cand_spec); - libbpf_print(LIBBPF_DEBUG, "\n"); + bpf_core_format_spec(spec_buf, sizeof(spec_buf), cand_spec); + pr_debug("prog '%s': relo #%d: %s candidate #%d %s\n", prog_name, + relo_idx, err == 0 ? "non-matching" : "matching", i, spec_buf); if (err == 0) continue; diff --git a/tools/lib/bpf/relo_core.h b/tools/lib/bpf/relo_core.h index a28bf3711ce2..073039d8ca4f 100644 --- a/tools/lib/bpf/relo_core.h +++ b/tools/lib/bpf/relo_core.h @@ -84,4 +84,10 @@ int bpf_core_patch_insn(const char *prog_name, struct bpf_insn *insn, int insn_idx, const struct bpf_core_relo *relo, int relo_idx, const struct bpf_core_relo_res *res); +int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, + const struct bpf_core_relo *relo, + struct bpf_core_spec *spec); + +int bpf_core_format_spec(char *buf, size_t buf_sz, const struct bpf_core_spec *spec); + #endif diff --git a/tools/lib/bpf/usdt.bpf.h b/tools/lib/bpf/usdt.bpf.h new file mode 100644 index 000000000000..4181fddb3687 --- /dev/null +++ b/tools/lib/bpf/usdt.bpf.h @@ -0,0 +1,259 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ +#ifndef __USDT_BPF_H__ +#define __USDT_BPF_H__ + +#include <linux/errno.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> + +/* Below types and maps are internal implementation details of libbpf's USDT + * support and are subjects to change. Also, bpf_usdt_xxx() API helpers should + * be considered an unstable API as well and might be adjusted based on user + * feedback from using libbpf's USDT support in production. + */ + +/* User can override BPF_USDT_MAX_SPEC_CNT to change default size of internal + * map that keeps track of USDT argument specifications. This might be + * necessary if there are a lot of USDT attachments. + */ +#ifndef BPF_USDT_MAX_SPEC_CNT +#define BPF_USDT_MAX_SPEC_CNT 256 +#endif +/* User can override BPF_USDT_MAX_IP_CNT to change default size of internal + * map that keeps track of IP (memory address) mapping to USDT argument + * specification. + * Note, if kernel supports BPF cookies, this map is not used and could be + * resized all the way to 1 to save a bit of memory. + */ +#ifndef BPF_USDT_MAX_IP_CNT +#define BPF_USDT_MAX_IP_CNT (4 * BPF_USDT_MAX_SPEC_CNT) +#endif +/* We use BPF CO-RE to detect support for BPF cookie from BPF side. This is + * the only dependency on CO-RE, so if it's undesirable, user can override + * BPF_USDT_HAS_BPF_COOKIE to specify whether to BPF cookie is supported or not. + */ +#ifndef BPF_USDT_HAS_BPF_COOKIE +#define BPF_USDT_HAS_BPF_COOKIE \ + bpf_core_enum_value_exists(enum bpf_func_id___usdt, BPF_FUNC_get_attach_cookie___usdt) +#endif + +enum __bpf_usdt_arg_type { + BPF_USDT_ARG_CONST, + BPF_USDT_ARG_REG, + BPF_USDT_ARG_REG_DEREF, +}; + +struct __bpf_usdt_arg_spec { + /* u64 scalar interpreted depending on arg_type, see below */ + __u64 val_off; + /* arg location case, see bpf_udst_arg() for details */ + enum __bpf_usdt_arg_type arg_type; + /* offset of referenced register within struct pt_regs */ + short reg_off; + /* whether arg should be interpreted as signed value */ + bool arg_signed; + /* number of bits that need to be cleared and, optionally, + * sign-extended to cast arguments that are 1, 2, or 4 bytes + * long into final 8-byte u64/s64 value returned to user + */ + char arg_bitshift; +}; + +/* should match USDT_MAX_ARG_CNT in usdt.c exactly */ +#define BPF_USDT_MAX_ARG_CNT 12 +struct __bpf_usdt_spec { + struct __bpf_usdt_arg_spec args[BPF_USDT_MAX_ARG_CNT]; + __u64 usdt_cookie; + short arg_cnt; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, BPF_USDT_MAX_SPEC_CNT); + __type(key, int); + __type(value, struct __bpf_usdt_spec); +} __bpf_usdt_specs SEC(".maps") __weak; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, BPF_USDT_MAX_IP_CNT); + __type(key, long); + __type(value, __u32); +} __bpf_usdt_ip_to_spec_id SEC(".maps") __weak; + +/* don't rely on user's BPF code to have latest definition of bpf_func_id */ +enum bpf_func_id___usdt { + BPF_FUNC_get_attach_cookie___usdt = 0xBAD, /* value doesn't matter */ +}; + +static __always_inline +int __bpf_usdt_spec_id(struct pt_regs *ctx) +{ + if (!BPF_USDT_HAS_BPF_COOKIE) { + long ip = PT_REGS_IP(ctx); + int *spec_id_ptr; + + spec_id_ptr = bpf_map_lookup_elem(&__bpf_usdt_ip_to_spec_id, &ip); + return spec_id_ptr ? *spec_id_ptr : -ESRCH; + } + + return bpf_get_attach_cookie(ctx); +} + +/* Return number of USDT arguments defined for currently traced USDT. */ +__weak __hidden +int bpf_usdt_arg_cnt(struct pt_regs *ctx) +{ + struct __bpf_usdt_spec *spec; + int spec_id; + + spec_id = __bpf_usdt_spec_id(ctx); + if (spec_id < 0) + return -ESRCH; + + spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id); + if (!spec) + return -ESRCH; + + return spec->arg_cnt; +} + +/* Fetch USDT argument #*arg_num* (zero-indexed) and put its value into *res. + * Returns 0 on success; negative error, otherwise. + * On error *res is guaranteed to be set to zero. + */ +__weak __hidden +int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res) +{ + struct __bpf_usdt_spec *spec; + struct __bpf_usdt_arg_spec *arg_spec; + unsigned long val; + int err, spec_id; + + *res = 0; + + spec_id = __bpf_usdt_spec_id(ctx); + if (spec_id < 0) + return -ESRCH; + + spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id); + if (!spec) + return -ESRCH; + + if (arg_num >= BPF_USDT_MAX_ARG_CNT || arg_num >= spec->arg_cnt) + return -ENOENT; + + arg_spec = &spec->args[arg_num]; + switch (arg_spec->arg_type) { + case BPF_USDT_ARG_CONST: + /* Arg is just a constant ("-4@$-9" in USDT arg spec). + * value is recorded in arg_spec->val_off directly. + */ + val = arg_spec->val_off; + break; + case BPF_USDT_ARG_REG: + /* Arg is in a register (e.g, "8@%rax" in USDT arg spec), + * so we read the contents of that register directly from + * struct pt_regs. To keep things simple user-space parts + * record offsetof(struct pt_regs, <regname>) in arg_spec->reg_off. + */ + err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off); + if (err) + return err; + break; + case BPF_USDT_ARG_REG_DEREF: + /* Arg is in memory addressed by register, plus some offset + * (e.g., "-4@-1204(%rbp)" in USDT arg spec). Register is + * identified like with BPF_USDT_ARG_REG case, and the offset + * is in arg_spec->val_off. We first fetch register contents + * from pt_regs, then do another user-space probe read to + * fetch argument value itself. + */ + err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off); + if (err) + return err; + err = bpf_probe_read_user(&val, sizeof(val), (void *)val + arg_spec->val_off); + if (err) + return err; +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + val >>= arg_spec->arg_bitshift; +#endif + break; + default: + return -EINVAL; + } + + /* cast arg from 1, 2, or 4 bytes to final 8 byte size clearing + * necessary upper arg_bitshift bits, with sign extension if argument + * is signed + */ + val <<= arg_spec->arg_bitshift; + if (arg_spec->arg_signed) + val = ((long)val) >> arg_spec->arg_bitshift; + else + val = val >> arg_spec->arg_bitshift; + *res = val; + return 0; +} + +/* Retrieve user-specified cookie value provided during attach as + * bpf_usdt_opts.usdt_cookie. This serves the same purpose as BPF cookie + * returned by bpf_get_attach_cookie(). Libbpf's support for USDT is itself + * utilizing BPF cookies internally, so user can't use BPF cookie directly + * for USDT programs and has to use bpf_usdt_cookie() API instead. + */ +__weak __hidden +long bpf_usdt_cookie(struct pt_regs *ctx) +{ + struct __bpf_usdt_spec *spec; + int spec_id; + + spec_id = __bpf_usdt_spec_id(ctx); + if (spec_id < 0) + return 0; + + spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id); + if (!spec) + return 0; + + return spec->usdt_cookie; +} + +/* we rely on ___bpf_apply() and ___bpf_narg() macros already defined in bpf_tracing.h */ +#define ___bpf_usdt_args0() ctx +#define ___bpf_usdt_args1(x) ___bpf_usdt_args0(), ({ long _x; bpf_usdt_arg(ctx, 0, &_x); (void *)_x; }) +#define ___bpf_usdt_args2(x, args...) ___bpf_usdt_args1(args), ({ long _x; bpf_usdt_arg(ctx, 1, &_x); (void *)_x; }) +#define ___bpf_usdt_args3(x, args...) ___bpf_usdt_args2(args), ({ long _x; bpf_usdt_arg(ctx, 2, &_x); (void *)_x; }) +#define ___bpf_usdt_args4(x, args...) ___bpf_usdt_args3(args), ({ long _x; bpf_usdt_arg(ctx, 3, &_x); (void *)_x; }) +#define ___bpf_usdt_args5(x, args...) ___bpf_usdt_args4(args), ({ long _x; bpf_usdt_arg(ctx, 4, &_x); (void *)_x; }) +#define ___bpf_usdt_args6(x, args...) ___bpf_usdt_args5(args), ({ long _x; bpf_usdt_arg(ctx, 5, &_x); (void *)_x; }) +#define ___bpf_usdt_args7(x, args...) ___bpf_usdt_args6(args), ({ long _x; bpf_usdt_arg(ctx, 6, &_x); (void *)_x; }) +#define ___bpf_usdt_args8(x, args...) ___bpf_usdt_args7(args), ({ long _x; bpf_usdt_arg(ctx, 7, &_x); (void *)_x; }) +#define ___bpf_usdt_args9(x, args...) ___bpf_usdt_args8(args), ({ long _x; bpf_usdt_arg(ctx, 8, &_x); (void *)_x; }) +#define ___bpf_usdt_args10(x, args...) ___bpf_usdt_args9(args), ({ long _x; bpf_usdt_arg(ctx, 9, &_x); (void *)_x; }) +#define ___bpf_usdt_args11(x, args...) ___bpf_usdt_args10(args), ({ long _x; bpf_usdt_arg(ctx, 10, &_x); (void *)_x; }) +#define ___bpf_usdt_args12(x, args...) ___bpf_usdt_args11(args), ({ long _x; bpf_usdt_arg(ctx, 11, &_x); (void *)_x; }) +#define ___bpf_usdt_args(args...) ___bpf_apply(___bpf_usdt_args, ___bpf_narg(args))(args) + +/* + * BPF_USDT serves the same purpose for USDT handlers as BPF_PROG for + * tp_btf/fentry/fexit BPF programs and BPF_KPROBE for kprobes. + * Original struct pt_regs * context is preserved as 'ctx' argument. + */ +#define BPF_USDT(name, args...) \ +name(struct pt_regs *ctx); \ +static __attribute__((always_inline)) typeof(name(0)) \ +____##name(struct pt_regs *ctx, ##args); \ +typeof(name(0)) name(struct pt_regs *ctx) \ +{ \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ + return ____##name(___bpf_usdt_args(args)); \ + _Pragma("GCC diagnostic pop") \ +} \ +static __attribute__((always_inline)) typeof(name(0)) \ +____##name(struct pt_regs *ctx, ##args) + +#endif /* __USDT_BPF_H__ */ diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c new file mode 100644 index 000000000000..f1c9339cfbbc --- /dev/null +++ b/tools/lib/bpf/usdt.c @@ -0,0 +1,1518 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <libelf.h> +#include <gelf.h> +#include <unistd.h> +#include <linux/ptrace.h> +#include <linux/kernel.h> + +/* s8 will be marked as poison while it's a reg of riscv */ +#if defined(__riscv) +#define rv_s8 s8 +#endif + +#include "bpf.h" +#include "libbpf.h" +#include "libbpf_common.h" +#include "libbpf_internal.h" +#include "hashmap.h" + +/* libbpf's USDT support consists of BPF-side state/code and user-space + * state/code working together in concert. BPF-side parts are defined in + * usdt.bpf.h header library. User-space state is encapsulated by struct + * usdt_manager and all the supporting code centered around usdt_manager. + * + * usdt.bpf.h defines two BPF maps that usdt_manager expects: USDT spec map + * and IP-to-spec-ID map, which is auxiliary map necessary for kernels that + * don't support BPF cookie (see below). These two maps are implicitly + * embedded into user's end BPF object file when user's code included + * usdt.bpf.h. This means that libbpf doesn't do anything special to create + * these USDT support maps. They are created by normal libbpf logic of + * instantiating BPF maps when opening and loading BPF object. + * + * As such, libbpf is basically unaware of the need to do anything + * USDT-related until the very first call to bpf_program__attach_usdt(), which + * can be called by user explicitly or happen automatically during skeleton + * attach (or, equivalently, through generic bpf_program__attach() call). At + * this point, libbpf will instantiate and initialize struct usdt_manager and + * store it in bpf_object. USDT manager is per-BPF object construct, as each + * independent BPF object might or might not have USDT programs, and thus all + * the expected USDT-related state. There is no coordination between two + * bpf_object in parts of USDT attachment, they are oblivious of each other's + * existence and libbpf is just oblivious, dealing with bpf_object-specific + * USDT state. + * + * Quick crash course on USDTs. + * + * From user-space application's point of view, USDT is essentially just + * a slightly special function call that normally has zero overhead, unless it + * is being traced by some external entity (e.g, BPF-based tool). Here's how + * a typical application can trigger USDT probe: + * + * #include <sys/sdt.h> // provided by systemtap-sdt-devel package + * // folly also provide similar functionality in folly/tracing/StaticTracepoint.h + * + * STAP_PROBE3(my_usdt_provider, my_usdt_probe_name, 123, x, &y); + * + * USDT is identified by it's <provider-name>:<probe-name> pair of names. Each + * individual USDT has a fixed number of arguments (3 in the above example) + * and specifies values of each argument as if it was a function call. + * + * USDT call is actually not a function call, but is instead replaced by + * a single NOP instruction (thus zero overhead, effectively). But in addition + * to that, those USDT macros generate special SHT_NOTE ELF records in + * .note.stapsdt ELF section. Here's an example USDT definition as emitted by + * `readelf -n <binary>`: + * + * stapsdt 0x00000089 NT_STAPSDT (SystemTap probe descriptors) + * Provider: test + * Name: usdt12 + * Location: 0x0000000000549df3, Base: 0x00000000008effa4, Semaphore: 0x0000000000a4606e + * Arguments: -4@-1204(%rbp) -4@%edi -8@-1216(%rbp) -8@%r8 -4@$5 -8@%r9 8@%rdx 8@%r10 -4@$-9 -2@%cx -2@%ax -1@%sil + * + * In this case we have USDT test:usdt12 with 12 arguments. + * + * Location and base are offsets used to calculate absolute IP address of that + * NOP instruction that kernel can replace with an interrupt instruction to + * trigger instrumentation code (BPF program for all that we care about). + * + * Semaphore above is and optional feature. It records an address of a 2-byte + * refcount variable (normally in '.probes' ELF section) used for signaling if + * there is anything that is attached to USDT. This is useful for user + * applications if, for example, they need to prepare some arguments that are + * passed only to USDTs and preparation is expensive. By checking if USDT is + * "activated", an application can avoid paying those costs unnecessarily. + * Recent enough kernel has built-in support for automatically managing this + * refcount, which libbpf expects and relies on. If USDT is defined without + * associated semaphore, this value will be zero. See selftests for semaphore + * examples. + * + * Arguments is the most interesting part. This USDT specification string is + * providing information about all the USDT arguments and their locations. The + * part before @ sign defined byte size of the argument (1, 2, 4, or 8) and + * whether the argument is signed or unsigned (negative size means signed). + * The part after @ sign is assembly-like definition of argument location + * (see [0] for more details). Technically, assembler can provide some pretty + * advanced definitions, but libbpf is currently supporting three most common + * cases: + * 1) immediate constant, see 5th and 9th args above (-4@$5 and -4@-9); + * 2) register value, e.g., 8@%rdx, which means "unsigned 8-byte integer + * whose value is in register %rdx"; + * 3) memory dereference addressed by register, e.g., -4@-1204(%rbp), which + * specifies signed 32-bit integer stored at offset -1204 bytes from + * memory address stored in %rbp. + * + * [0] https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation + * + * During attachment, libbpf parses all the relevant USDT specifications and + * prepares `struct usdt_spec` (USDT spec), which is then provided to BPF-side + * code through spec map. This allows BPF applications to quickly fetch the + * actual value at runtime using a simple BPF-side code. + * + * With basics out of the way, let's go over less immediately obvious aspects + * of supporting USDTs. + * + * First, there is no special USDT BPF program type. It is actually just + * a uprobe BPF program (which for kernel, at least currently, is just a kprobe + * program, so BPF_PROG_TYPE_KPROBE program type). With the only difference + * that uprobe is usually attached at the function entry, while USDT will + * normally will be somewhere inside the function. But it should always be + * pointing to NOP instruction, which makes such uprobes the fastest uprobe + * kind. + * + * Second, it's important to realize that such STAP_PROBEn(provider, name, ...) + * macro invocations can end up being inlined many-many times, depending on + * specifics of each individual user application. So single conceptual USDT + * (identified by provider:name pair of identifiers) is, generally speaking, + * multiple uprobe locations (USDT call sites) in different places in user + * application. Further, again due to inlining, each USDT call site might end + * up having the same argument #N be located in a different place. In one call + * site it could be a constant, in another will end up in a register, and in + * yet another could be some other register or even somewhere on the stack. + * + * As such, "attaching to USDT" means (in general case) attaching the same + * uprobe BPF program to multiple target locations in user application, each + * potentially having a completely different USDT spec associated with it. + * To wire all this up together libbpf allocates a unique integer spec ID for + * each unique USDT spec. Spec IDs are allocated as sequential small integers + * so that they can be used as keys in array BPF map (for performance reasons). + * Spec ID allocation and accounting is big part of what usdt_manager is + * about. This state has to be maintained per-BPF object and coordinate + * between different USDT attachments within the same BPF object. + * + * Spec ID is the key in spec BPF map, value is the actual USDT spec layed out + * as struct usdt_spec. Each invocation of BPF program at runtime needs to + * know its associated spec ID. It gets it either through BPF cookie, which + * libbpf sets to spec ID during attach time, or, if kernel is too old to + * support BPF cookie, through IP-to-spec-ID map that libbpf maintains in such + * case. The latter means that some modes of operation can't be supported + * without BPF cookie. Such mode is attaching to shared library "generically", + * without specifying target process. In such case, it's impossible to + * calculate absolute IP addresses for IP-to-spec-ID map, and thus such mode + * is not supported without BPF cookie support. + * + * Note that libbpf is using BPF cookie functionality for its own internal + * needs, so user itself can't rely on BPF cookie feature. To that end, libbpf + * provides conceptually equivalent USDT cookie support. It's still u64 + * user-provided value that can be associated with USDT attachment. Note that + * this will be the same value for all USDT call sites within the same single + * *logical* USDT attachment. This makes sense because to user attaching to + * USDT is a single BPF program triggered for singular USDT probe. The fact + * that this is done at multiple actual locations is a mostly hidden + * implementation details. This USDT cookie value can be fetched with + * bpf_usdt_cookie(ctx) API provided by usdt.bpf.h + * + * Lastly, while single USDT can have tons of USDT call sites, it doesn't + * necessarily have that many different USDT specs. It very well might be + * that 1000 USDT call sites only need 5 different USDT specs, because all the + * arguments are typically contained in a small set of registers or stack + * locations. As such, it's wasteful to allocate as many USDT spec IDs as + * there are USDT call sites. So libbpf tries to be frugal and performs + * on-the-fly deduplication during a single USDT attachment to only allocate + * the minimal required amount of unique USDT specs (and thus spec IDs). This + * is trivially achieved by using USDT spec string (Arguments string from USDT + * note) as a lookup key in a hashmap. USDT spec string uniquely defines + * everything about how to fetch USDT arguments, so two USDT call sites + * sharing USDT spec string can safely share the same USDT spec and spec ID. + * Note, this spec string deduplication is happening only during the same USDT + * attachment, so each USDT spec shares the same USDT cookie value. This is + * not generally true for other USDT attachments within the same BPF object, + * as even if USDT spec string is the same, USDT cookie value can be + * different. It was deemed excessive to try to deduplicate across independent + * USDT attachments by taking into account USDT spec string *and* USDT cookie + * value, which would complicated spec ID accounting significantly for little + * gain. + */ + +#define USDT_BASE_SEC ".stapsdt.base" +#define USDT_SEMA_SEC ".probes" +#define USDT_NOTE_SEC ".note.stapsdt" +#define USDT_NOTE_TYPE 3 +#define USDT_NOTE_NAME "stapsdt" + +/* should match exactly enum __bpf_usdt_arg_type from usdt.bpf.h */ +enum usdt_arg_type { + USDT_ARG_CONST, + USDT_ARG_REG, + USDT_ARG_REG_DEREF, +}; + +/* should match exactly struct __bpf_usdt_arg_spec from usdt.bpf.h */ +struct usdt_arg_spec { + __u64 val_off; + enum usdt_arg_type arg_type; + short reg_off; + bool arg_signed; + char arg_bitshift; +}; + +/* should match BPF_USDT_MAX_ARG_CNT in usdt.bpf.h */ +#define USDT_MAX_ARG_CNT 12 + +/* should match struct __bpf_usdt_spec from usdt.bpf.h */ +struct usdt_spec { + struct usdt_arg_spec args[USDT_MAX_ARG_CNT]; + __u64 usdt_cookie; + short arg_cnt; +}; + +struct usdt_note { + const char *provider; + const char *name; + /* USDT args specification string, e.g.: + * "-4@%esi -4@-24(%rbp) -4@%ecx 2@%ax 8@%rdx" + */ + const char *args; + long loc_addr; + long base_addr; + long sema_addr; +}; + +struct usdt_target { + long abs_ip; + long rel_ip; + long sema_off; + struct usdt_spec spec; + const char *spec_str; +}; + +struct usdt_manager { + struct bpf_map *specs_map; + struct bpf_map *ip_to_spec_id_map; + + int *free_spec_ids; + size_t free_spec_cnt; + size_t next_free_spec_id; + + bool has_bpf_cookie; + bool has_sema_refcnt; +}; + +struct usdt_manager *usdt_manager_new(struct bpf_object *obj) +{ + static const char *ref_ctr_sysfs_path = "/sys/bus/event_source/devices/uprobe/format/ref_ctr_offset"; + struct usdt_manager *man; + struct bpf_map *specs_map, *ip_to_spec_id_map; + + specs_map = bpf_object__find_map_by_name(obj, "__bpf_usdt_specs"); + ip_to_spec_id_map = bpf_object__find_map_by_name(obj, "__bpf_usdt_ip_to_spec_id"); + if (!specs_map || !ip_to_spec_id_map) { + pr_warn("usdt: failed to find USDT support BPF maps, did you forget to include bpf/usdt.bpf.h?\n"); + return ERR_PTR(-ESRCH); + } + + man = calloc(1, sizeof(*man)); + if (!man) + return ERR_PTR(-ENOMEM); + + man->specs_map = specs_map; + man->ip_to_spec_id_map = ip_to_spec_id_map; + + /* Detect if BPF cookie is supported for kprobes. + * We don't need IP-to-ID mapping if we can use BPF cookies. + * Added in: 7adfc6c9b315 ("bpf: Add bpf_get_attach_cookie() BPF helper to access bpf_cookie value") + */ + man->has_bpf_cookie = kernel_supports(obj, FEAT_BPF_COOKIE); + + /* Detect kernel support for automatic refcounting of USDT semaphore. + * If this is not supported, USDTs with semaphores will not be supported. + * Added in: a6ca88b241d5 ("trace_uprobe: support reference counter in fd-based uprobe") + */ + man->has_sema_refcnt = access(ref_ctr_sysfs_path, F_OK) == 0; + + return man; +} + +void usdt_manager_free(struct usdt_manager *man) +{ + if (IS_ERR_OR_NULL(man)) + return; + + free(man->free_spec_ids); + free(man); +} + +static int sanity_check_usdt_elf(Elf *elf, const char *path) +{ + GElf_Ehdr ehdr; + int endianness; + + if (elf_kind(elf) != ELF_K_ELF) { + pr_warn("usdt: unrecognized ELF kind %d for '%s'\n", elf_kind(elf), path); + return -EBADF; + } + + switch (gelf_getclass(elf)) { + case ELFCLASS64: + if (sizeof(void *) != 8) { + pr_warn("usdt: attaching to 64-bit ELF binary '%s' is not supported\n", path); + return -EBADF; + } + break; + case ELFCLASS32: + if (sizeof(void *) != 4) { + pr_warn("usdt: attaching to 32-bit ELF binary '%s' is not supported\n", path); + return -EBADF; + } + break; + default: + pr_warn("usdt: unsupported ELF class for '%s'\n", path); + return -EBADF; + } + + if (!gelf_getehdr(elf, &ehdr)) + return -EINVAL; + + if (ehdr.e_type != ET_EXEC && ehdr.e_type != ET_DYN) { + pr_warn("usdt: unsupported type of ELF binary '%s' (%d), only ET_EXEC and ET_DYN are supported\n", + path, ehdr.e_type); + return -EBADF; + } + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + endianness = ELFDATA2LSB; +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + endianness = ELFDATA2MSB; +#else +# error "Unrecognized __BYTE_ORDER__" +#endif + if (endianness != ehdr.e_ident[EI_DATA]) { + pr_warn("usdt: ELF endianness mismatch for '%s'\n", path); + return -EBADF; + } + + return 0; +} + +static int find_elf_sec_by_name(Elf *elf, const char *sec_name, GElf_Shdr *shdr, Elf_Scn **scn) +{ + Elf_Scn *sec = NULL; + size_t shstrndx; + + if (elf_getshdrstrndx(elf, &shstrndx)) + return -EINVAL; + + /* check if ELF is corrupted and avoid calling elf_strptr if yes */ + if (!elf_rawdata(elf_getscn(elf, shstrndx), NULL)) + return -EINVAL; + + while ((sec = elf_nextscn(elf, sec)) != NULL) { + char *name; + + if (!gelf_getshdr(sec, shdr)) + return -EINVAL; + + name = elf_strptr(elf, shstrndx, shdr->sh_name); + if (name && strcmp(sec_name, name) == 0) { + *scn = sec; + return 0; + } + } + + return -ENOENT; +} + +struct elf_seg { + long start; + long end; + long offset; + bool is_exec; +}; + +static int cmp_elf_segs(const void *_a, const void *_b) +{ + const struct elf_seg *a = _a; + const struct elf_seg *b = _b; + + return a->start < b->start ? -1 : 1; +} + +static int parse_elf_segs(Elf *elf, const char *path, struct elf_seg **segs, size_t *seg_cnt) +{ + GElf_Phdr phdr; + size_t n; + int i, err; + struct elf_seg *seg; + void *tmp; + + *seg_cnt = 0; + + if (elf_getphdrnum(elf, &n)) { + err = -errno; + return err; + } + + for (i = 0; i < n; i++) { + if (!gelf_getphdr(elf, i, &phdr)) { + err = -errno; + return err; + } + + pr_debug("usdt: discovered PHDR #%d in '%s': vaddr 0x%lx memsz 0x%lx offset 0x%lx type 0x%lx flags 0x%lx\n", + i, path, (long)phdr.p_vaddr, (long)phdr.p_memsz, (long)phdr.p_offset, + (long)phdr.p_type, (long)phdr.p_flags); + if (phdr.p_type != PT_LOAD) + continue; + + tmp = libbpf_reallocarray(*segs, *seg_cnt + 1, sizeof(**segs)); + if (!tmp) + return -ENOMEM; + + *segs = tmp; + seg = *segs + *seg_cnt; + (*seg_cnt)++; + + seg->start = phdr.p_vaddr; + seg->end = phdr.p_vaddr + phdr.p_memsz; + seg->offset = phdr.p_offset; + seg->is_exec = phdr.p_flags & PF_X; + } + + if (*seg_cnt == 0) { + pr_warn("usdt: failed to find PT_LOAD program headers in '%s'\n", path); + return -ESRCH; + } + + qsort(*segs, *seg_cnt, sizeof(**segs), cmp_elf_segs); + return 0; +} + +static int parse_lib_segs(int pid, const char *lib_path, struct elf_seg **segs, size_t *seg_cnt) +{ + char path[PATH_MAX], line[PATH_MAX], mode[16]; + size_t seg_start, seg_end, seg_off; + struct elf_seg *seg; + int tmp_pid, i, err; + FILE *f; + + *seg_cnt = 0; + + /* Handle containerized binaries only accessible from + * /proc/<pid>/root/<path>. They will be reported as just /<path> in + * /proc/<pid>/maps. + */ + if (sscanf(lib_path, "/proc/%d/root%s", &tmp_pid, path) == 2 && pid == tmp_pid) + goto proceed; + + if (!realpath(lib_path, path)) { + pr_warn("usdt: failed to get absolute path of '%s' (err %d), using path as is...\n", + lib_path, -errno); + libbpf_strlcpy(path, lib_path, sizeof(path)); + } + +proceed: + sprintf(line, "/proc/%d/maps", pid); + f = fopen(line, "r"); + if (!f) { + err = -errno; + pr_warn("usdt: failed to open '%s' to get base addr of '%s': %d\n", + line, lib_path, err); + return err; + } + + /* We need to handle lines with no path at the end: + * + * 7f5c6f5d1000-7f5c6f5d3000 rw-p 001c7000 08:04 21238613 /usr/lib64/libc-2.17.so + * 7f5c6f5d3000-7f5c6f5d8000 rw-p 00000000 00:00 0 + * 7f5c6f5d8000-7f5c6f5d9000 r-xp 00000000 103:01 362990598 /data/users/andriin/linux/tools/bpf/usdt/libhello_usdt.so + */ + while (fscanf(f, "%zx-%zx %s %zx %*s %*d%[^\n]\n", + &seg_start, &seg_end, mode, &seg_off, line) == 5) { + void *tmp; + + /* to handle no path case (see above) we need to capture line + * without skipping any whitespaces. So we need to strip + * leading whitespaces manually here + */ + i = 0; + while (isblank(line[i])) + i++; + if (strcmp(line + i, path) != 0) + continue; + + pr_debug("usdt: discovered segment for lib '%s': addrs %zx-%zx mode %s offset %zx\n", + path, seg_start, seg_end, mode, seg_off); + + /* ignore non-executable sections for shared libs */ + if (mode[2] != 'x') + continue; + + tmp = libbpf_reallocarray(*segs, *seg_cnt + 1, sizeof(**segs)); + if (!tmp) { + err = -ENOMEM; + goto err_out; + } + + *segs = tmp; + seg = *segs + *seg_cnt; + *seg_cnt += 1; + + seg->start = seg_start; + seg->end = seg_end; + seg->offset = seg_off; + seg->is_exec = true; + } + + if (*seg_cnt == 0) { + pr_warn("usdt: failed to find '%s' (resolved to '%s') within PID %d memory mappings\n", + lib_path, path, pid); + err = -ESRCH; + goto err_out; + } + + qsort(*segs, *seg_cnt, sizeof(**segs), cmp_elf_segs); + err = 0; +err_out: + fclose(f); + return err; +} + +static struct elf_seg *find_elf_seg(struct elf_seg *segs, size_t seg_cnt, long addr, bool relative) +{ + struct elf_seg *seg; + int i; + + if (relative) { + /* for shared libraries, address is relative offset and thus + * should be fall within logical offset-based range of + * [offset_start, offset_end) + */ + for (i = 0, seg = segs; i < seg_cnt; i++, seg++) { + if (seg->offset <= addr && addr < seg->offset + (seg->end - seg->start)) + return seg; + } + } else { + /* for binaries, address is absolute and thus should be within + * absolute address range of [seg_start, seg_end) + */ + for (i = 0, seg = segs; i < seg_cnt; i++, seg++) { + if (seg->start <= addr && addr < seg->end) + return seg; + } + } + + return NULL; +} + +static int parse_usdt_note(Elf *elf, const char *path, long base_addr, + GElf_Nhdr *nhdr, const char *data, size_t name_off, size_t desc_off, + struct usdt_note *usdt_note); + +static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, __u64 usdt_cookie); + +static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *path, pid_t pid, + const char *usdt_provider, const char *usdt_name, __u64 usdt_cookie, + struct usdt_target **out_targets, size_t *out_target_cnt) +{ + size_t off, name_off, desc_off, seg_cnt = 0, lib_seg_cnt = 0, target_cnt = 0; + struct elf_seg *segs = NULL, *lib_segs = NULL; + struct usdt_target *targets = NULL, *target; + long base_addr = 0; + Elf_Scn *notes_scn, *base_scn; + GElf_Shdr base_shdr, notes_shdr; + GElf_Ehdr ehdr; + GElf_Nhdr nhdr; + Elf_Data *data; + int err; + + *out_targets = NULL; + *out_target_cnt = 0; + + err = find_elf_sec_by_name(elf, USDT_NOTE_SEC, ¬es_shdr, ¬es_scn); + if (err) { + pr_warn("usdt: no USDT notes section (%s) found in '%s'\n", USDT_NOTE_SEC, path); + return err; + } + + if (notes_shdr.sh_type != SHT_NOTE || !gelf_getehdr(elf, &ehdr)) { + pr_warn("usdt: invalid USDT notes section (%s) in '%s'\n", USDT_NOTE_SEC, path); + return -EINVAL; + } + + err = parse_elf_segs(elf, path, &segs, &seg_cnt); + if (err) { + pr_warn("usdt: failed to process ELF program segments for '%s': %d\n", path, err); + goto err_out; + } + + /* .stapsdt.base ELF section is optional, but is used for prelink + * offset compensation (see a big comment further below) + */ + if (find_elf_sec_by_name(elf, USDT_BASE_SEC, &base_shdr, &base_scn) == 0) + base_addr = base_shdr.sh_addr; + + data = elf_getdata(notes_scn, 0); + off = 0; + while ((off = gelf_getnote(data, off, &nhdr, &name_off, &desc_off)) > 0) { + long usdt_abs_ip, usdt_rel_ip, usdt_sema_off = 0; + struct usdt_note note; + struct elf_seg *seg = NULL; + void *tmp; + + err = parse_usdt_note(elf, path, base_addr, &nhdr, + data->d_buf, name_off, desc_off, ¬e); + if (err) + goto err_out; + + if (strcmp(note.provider, usdt_provider) != 0 || strcmp(note.name, usdt_name) != 0) + continue; + + /* We need to compensate "prelink effect". See [0] for details, + * relevant parts quoted here: + * + * Each SDT probe also expands into a non-allocated ELF note. You can + * find this by looking at SHT_NOTE sections and decoding the format; + * see below for details. Because the note is non-allocated, it means + * there is no runtime cost, and also preserved in both stripped files + * and .debug files. + * + * However, this means that prelink won't adjust the note's contents + * for address offsets. Instead, this is done via the .stapsdt.base + * section. This is a special section that is added to the text. We + * will only ever have one of these sections in a final link and it + * will only ever be one byte long. Nothing about this section itself + * matters, we just use it as a marker to detect prelink address + * adjustments. + * + * Each probe note records the link-time address of the .stapsdt.base + * section alongside the probe PC address. The decoder compares the + * base address stored in the note with the .stapsdt.base section's + * sh_addr. Initially these are the same, but the section header will + * be adjusted by prelink. So the decoder applies the difference to + * the probe PC address to get the correct prelinked PC address; the + * same adjustment is applied to the semaphore address, if any. + * + * [0] https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation + */ + usdt_rel_ip = usdt_abs_ip = note.loc_addr; + if (base_addr) { + usdt_abs_ip += base_addr - note.base_addr; + usdt_rel_ip += base_addr - note.base_addr; + } + + if (ehdr.e_type == ET_EXEC) { + /* When attaching uprobes (which what USDTs basically + * are) kernel expects a relative IP to be specified, + * so if we are attaching to an executable ELF binary + * (i.e., not a shared library), we need to calculate + * proper relative IP based on ELF's load address + */ + seg = find_elf_seg(segs, seg_cnt, usdt_abs_ip, false /* relative */); + if (!seg) { + err = -ESRCH; + pr_warn("usdt: failed to find ELF program segment for '%s:%s' in '%s' at IP 0x%lx\n", + usdt_provider, usdt_name, path, usdt_abs_ip); + goto err_out; + } + if (!seg->is_exec) { + err = -ESRCH; + pr_warn("usdt: matched ELF binary '%s' segment [0x%lx, 0x%lx) for '%s:%s' at IP 0x%lx is not executable\n", + path, seg->start, seg->end, usdt_provider, usdt_name, + usdt_abs_ip); + goto err_out; + } + + usdt_rel_ip = usdt_abs_ip - (seg->start - seg->offset); + } else if (!man->has_bpf_cookie) { /* ehdr.e_type == ET_DYN */ + /* If we don't have BPF cookie support but need to + * attach to a shared library, we'll need to know and + * record absolute addresses of attach points due to + * the need to lookup USDT spec by absolute IP of + * triggered uprobe. Doing this resolution is only + * possible when we have a specific PID of the process + * that's using specified shared library. BPF cookie + * removes the absolute address limitation as we don't + * need to do this lookup (we just use BPF cookie as + * an index of USDT spec), so for newer kernels with + * BPF cookie support libbpf supports USDT attachment + * to shared libraries with no PID filter. + */ + if (pid < 0) { + pr_warn("usdt: attaching to shared libraries without specific PID is not supported on current kernel\n"); + err = -ENOTSUP; + goto err_out; + } + + /* lib_segs are lazily initialized only if necessary */ + if (lib_seg_cnt == 0) { + err = parse_lib_segs(pid, path, &lib_segs, &lib_seg_cnt); + if (err) { + pr_warn("usdt: failed to get memory segments in PID %d for shared library '%s': %d\n", + pid, path, err); + goto err_out; + } + } + + seg = find_elf_seg(lib_segs, lib_seg_cnt, usdt_rel_ip, true /* relative */); + if (!seg) { + err = -ESRCH; + pr_warn("usdt: failed to find shared lib memory segment for '%s:%s' in '%s' at relative IP 0x%lx\n", + usdt_provider, usdt_name, path, usdt_rel_ip); + goto err_out; + } + + usdt_abs_ip = seg->start + (usdt_rel_ip - seg->offset); + } + + pr_debug("usdt: probe for '%s:%s' in %s '%s': addr 0x%lx base 0x%lx (resolved abs_ip 0x%lx rel_ip 0x%lx) args '%s' in segment [0x%lx, 0x%lx) at offset 0x%lx\n", + usdt_provider, usdt_name, ehdr.e_type == ET_EXEC ? "exec" : "lib ", path, + note.loc_addr, note.base_addr, usdt_abs_ip, usdt_rel_ip, note.args, + seg ? seg->start : 0, seg ? seg->end : 0, seg ? seg->offset : 0); + + /* Adjust semaphore address to be a relative offset */ + if (note.sema_addr) { + if (!man->has_sema_refcnt) { + pr_warn("usdt: kernel doesn't support USDT semaphore refcounting for '%s:%s' in '%s'\n", + usdt_provider, usdt_name, path); + err = -ENOTSUP; + goto err_out; + } + + seg = find_elf_seg(segs, seg_cnt, note.sema_addr, false /* relative */); + if (!seg) { + err = -ESRCH; + pr_warn("usdt: failed to find ELF loadable segment with semaphore of '%s:%s' in '%s' at 0x%lx\n", + usdt_provider, usdt_name, path, note.sema_addr); + goto err_out; + } + if (seg->is_exec) { + err = -ESRCH; + pr_warn("usdt: matched ELF binary '%s' segment [0x%lx, 0x%lx] for semaphore of '%s:%s' at 0x%lx is executable\n", + path, seg->start, seg->end, usdt_provider, usdt_name, + note.sema_addr); + goto err_out; + } + + usdt_sema_off = note.sema_addr - (seg->start - seg->offset); + + pr_debug("usdt: sema for '%s:%s' in %s '%s': addr 0x%lx base 0x%lx (resolved 0x%lx) in segment [0x%lx, 0x%lx] at offset 0x%lx\n", + usdt_provider, usdt_name, ehdr.e_type == ET_EXEC ? "exec" : "lib ", + path, note.sema_addr, note.base_addr, usdt_sema_off, + seg->start, seg->end, seg->offset); + } + + /* Record adjusted addresses and offsets and parse USDT spec */ + tmp = libbpf_reallocarray(targets, target_cnt + 1, sizeof(*targets)); + if (!tmp) { + err = -ENOMEM; + goto err_out; + } + targets = tmp; + + target = &targets[target_cnt]; + memset(target, 0, sizeof(*target)); + + target->abs_ip = usdt_abs_ip; + target->rel_ip = usdt_rel_ip; + target->sema_off = usdt_sema_off; + + /* notes->args references strings from Elf itself, so they can + * be referenced safely until elf_end() call + */ + target->spec_str = note.args; + + err = parse_usdt_spec(&target->spec, ¬e, usdt_cookie); + if (err) + goto err_out; + + target_cnt++; + } + + *out_targets = targets; + *out_target_cnt = target_cnt; + err = target_cnt; + +err_out: + free(segs); + free(lib_segs); + if (err < 0) + free(targets); + return err; +} + +struct bpf_link_usdt { + struct bpf_link link; + + struct usdt_manager *usdt_man; + + size_t spec_cnt; + int *spec_ids; + + size_t uprobe_cnt; + struct { + long abs_ip; + struct bpf_link *link; + } *uprobes; +}; + +static int bpf_link_usdt_detach(struct bpf_link *link) +{ + struct bpf_link_usdt *usdt_link = container_of(link, struct bpf_link_usdt, link); + struct usdt_manager *man = usdt_link->usdt_man; + int i; + + for (i = 0; i < usdt_link->uprobe_cnt; i++) { + /* detach underlying uprobe link */ + bpf_link__destroy(usdt_link->uprobes[i].link); + /* there is no need to update specs map because it will be + * unconditionally overwritten on subsequent USDT attaches, + * but if BPF cookies are not used we need to remove entry + * from ip_to_spec_id map, otherwise we'll run into false + * conflicting IP errors + */ + if (!man->has_bpf_cookie) { + /* not much we can do about errors here */ + (void)bpf_map_delete_elem(bpf_map__fd(man->ip_to_spec_id_map), + &usdt_link->uprobes[i].abs_ip); + } + } + + /* try to return the list of previously used spec IDs to usdt_manager + * for future reuse for subsequent USDT attaches + */ + if (!man->free_spec_ids) { + /* if there were no free spec IDs yet, just transfer our IDs */ + man->free_spec_ids = usdt_link->spec_ids; + man->free_spec_cnt = usdt_link->spec_cnt; + usdt_link->spec_ids = NULL; + } else { + /* otherwise concat IDs */ + size_t new_cnt = man->free_spec_cnt + usdt_link->spec_cnt; + int *new_free_ids; + + new_free_ids = libbpf_reallocarray(man->free_spec_ids, new_cnt, + sizeof(*new_free_ids)); + /* If we couldn't resize free_spec_ids, we'll just leak + * a bunch of free IDs; this is very unlikely to happen and if + * system is so exhausted on memory, it's the least of user's + * concerns, probably. + * So just do our best here to return those IDs to usdt_manager. + */ + if (new_free_ids) { + memcpy(new_free_ids + man->free_spec_cnt, usdt_link->spec_ids, + usdt_link->spec_cnt * sizeof(*usdt_link->spec_ids)); + man->free_spec_ids = new_free_ids; + man->free_spec_cnt = new_cnt; + } + } + + return 0; +} + +static void bpf_link_usdt_dealloc(struct bpf_link *link) +{ + struct bpf_link_usdt *usdt_link = container_of(link, struct bpf_link_usdt, link); + + free(usdt_link->spec_ids); + free(usdt_link->uprobes); + free(usdt_link); +} + +static size_t specs_hash_fn(const void *key, void *ctx) +{ + const char *s = key; + + return str_hash(s); +} + +static bool specs_equal_fn(const void *key1, const void *key2, void *ctx) +{ + const char *s1 = key1; + const char *s2 = key2; + + return strcmp(s1, s2) == 0; +} + +static int allocate_spec_id(struct usdt_manager *man, struct hashmap *specs_hash, + struct bpf_link_usdt *link, struct usdt_target *target, + int *spec_id, bool *is_new) +{ + void *tmp; + int err; + + /* check if we already allocated spec ID for this spec string */ + if (hashmap__find(specs_hash, target->spec_str, &tmp)) { + *spec_id = (long)tmp; + *is_new = false; + return 0; + } + + /* otherwise it's a new ID that needs to be set up in specs map and + * returned back to usdt_manager when USDT link is detached + */ + tmp = libbpf_reallocarray(link->spec_ids, link->spec_cnt + 1, sizeof(*link->spec_ids)); + if (!tmp) + return -ENOMEM; + link->spec_ids = tmp; + + /* get next free spec ID, giving preference to free list, if not empty */ + if (man->free_spec_cnt) { + *spec_id = man->free_spec_ids[man->free_spec_cnt - 1]; + + /* cache spec ID for current spec string for future lookups */ + err = hashmap__add(specs_hash, target->spec_str, (void *)(long)*spec_id); + if (err) + return err; + + man->free_spec_cnt--; + } else { + /* don't allocate spec ID bigger than what fits in specs map */ + if (man->next_free_spec_id >= bpf_map__max_entries(man->specs_map)) + return -E2BIG; + + *spec_id = man->next_free_spec_id; + + /* cache spec ID for current spec string for future lookups */ + err = hashmap__add(specs_hash, target->spec_str, (void *)(long)*spec_id); + if (err) + return err; + + man->next_free_spec_id++; + } + + /* remember new spec ID in the link for later return back to free list on detach */ + link->spec_ids[link->spec_cnt] = *spec_id; + link->spec_cnt++; + *is_new = true; + return 0; +} + +struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct bpf_program *prog, + pid_t pid, const char *path, + const char *usdt_provider, const char *usdt_name, + __u64 usdt_cookie) +{ + int i, fd, err, spec_map_fd, ip_map_fd; + LIBBPF_OPTS(bpf_uprobe_opts, opts); + struct hashmap *specs_hash = NULL; + struct bpf_link_usdt *link = NULL; + struct usdt_target *targets = NULL; + size_t target_cnt; + Elf *elf; + + spec_map_fd = bpf_map__fd(man->specs_map); + ip_map_fd = bpf_map__fd(man->ip_to_spec_id_map); + + /* TODO: perform path resolution similar to uprobe's */ + fd = open(path, O_RDONLY); + if (fd < 0) { + err = -errno; + pr_warn("usdt: failed to open ELF binary '%s': %d\n", path, err); + return libbpf_err_ptr(err); + } + + elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); + if (!elf) { + err = -EBADF; + pr_warn("usdt: failed to parse ELF binary '%s': %s\n", path, elf_errmsg(-1)); + goto err_out; + } + + err = sanity_check_usdt_elf(elf, path); + if (err) + goto err_out; + + /* normalize PID filter */ + if (pid < 0) + pid = -1; + else if (pid == 0) + pid = getpid(); + + /* discover USDT in given binary, optionally limiting + * activations to a given PID, if pid > 0 + */ + err = collect_usdt_targets(man, elf, path, pid, usdt_provider, usdt_name, + usdt_cookie, &targets, &target_cnt); + if (err <= 0) { + err = (err == 0) ? -ENOENT : err; + goto err_out; + } + + specs_hash = hashmap__new(specs_hash_fn, specs_equal_fn, NULL); + if (IS_ERR(specs_hash)) { + err = PTR_ERR(specs_hash); + goto err_out; + } + + link = calloc(1, sizeof(*link)); + if (!link) { + err = -ENOMEM; + goto err_out; + } + + link->usdt_man = man; + link->link.detach = &bpf_link_usdt_detach; + link->link.dealloc = &bpf_link_usdt_dealloc; + + link->uprobes = calloc(target_cnt, sizeof(*link->uprobes)); + if (!link->uprobes) { + err = -ENOMEM; + goto err_out; + } + + for (i = 0; i < target_cnt; i++) { + struct usdt_target *target = &targets[i]; + struct bpf_link *uprobe_link; + bool is_new; + int spec_id; + + /* Spec ID can be either reused or newly allocated. If it is + * newly allocated, we'll need to fill out spec map, otherwise + * entire spec should be valid and can be just used by a new + * uprobe. We reuse spec when USDT arg spec is identical. We + * also never share specs between two different USDT + * attachments ("links"), so all the reused specs already + * share USDT cookie value implicitly. + */ + err = allocate_spec_id(man, specs_hash, link, target, &spec_id, &is_new); + if (err) + goto err_out; + + if (is_new && bpf_map_update_elem(spec_map_fd, &spec_id, &target->spec, BPF_ANY)) { + err = -errno; + pr_warn("usdt: failed to set USDT spec #%d for '%s:%s' in '%s': %d\n", + spec_id, usdt_provider, usdt_name, path, err); + goto err_out; + } + if (!man->has_bpf_cookie && + bpf_map_update_elem(ip_map_fd, &target->abs_ip, &spec_id, BPF_NOEXIST)) { + err = -errno; + if (err == -EEXIST) { + pr_warn("usdt: IP collision detected for spec #%d for '%s:%s' in '%s'\n", + spec_id, usdt_provider, usdt_name, path); + } else { + pr_warn("usdt: failed to map IP 0x%lx to spec #%d for '%s:%s' in '%s': %d\n", + target->abs_ip, spec_id, usdt_provider, usdt_name, + path, err); + } + goto err_out; + } + + opts.ref_ctr_offset = target->sema_off; + opts.bpf_cookie = man->has_bpf_cookie ? spec_id : 0; + uprobe_link = bpf_program__attach_uprobe_opts(prog, pid, path, + target->rel_ip, &opts); + err = libbpf_get_error(uprobe_link); + if (err) { + pr_warn("usdt: failed to attach uprobe #%d for '%s:%s' in '%s': %d\n", + i, usdt_provider, usdt_name, path, err); + goto err_out; + } + + link->uprobes[i].link = uprobe_link; + link->uprobes[i].abs_ip = target->abs_ip; + link->uprobe_cnt++; + } + + free(targets); + hashmap__free(specs_hash); + elf_end(elf); + close(fd); + + return &link->link; + +err_out: + if (link) + bpf_link__destroy(&link->link); + free(targets); + hashmap__free(specs_hash); + if (elf) + elf_end(elf); + close(fd); + return libbpf_err_ptr(err); +} + +/* Parse out USDT ELF note from '.note.stapsdt' section. + * Logic inspired by perf's code. + */ +static int parse_usdt_note(Elf *elf, const char *path, long base_addr, + GElf_Nhdr *nhdr, const char *data, size_t name_off, size_t desc_off, + struct usdt_note *note) +{ + const char *provider, *name, *args; + long addrs[3]; + size_t len; + + /* sanity check USDT note name and type first */ + if (strncmp(data + name_off, USDT_NOTE_NAME, nhdr->n_namesz) != 0) + return -EINVAL; + if (nhdr->n_type != USDT_NOTE_TYPE) + return -EINVAL; + + /* sanity check USDT note contents ("description" in ELF terminology) */ + len = nhdr->n_descsz; + data = data + desc_off; + + /* +3 is the very minimum required to store three empty strings */ + if (len < sizeof(addrs) + 3) + return -EINVAL; + + /* get location, base, and semaphore addrs */ + memcpy(&addrs, data, sizeof(addrs)); + + /* parse string fields: provider, name, args */ + provider = data + sizeof(addrs); + + name = (const char *)memchr(provider, '\0', data + len - provider); + if (!name) /* non-zero-terminated provider */ + return -EINVAL; + name++; + if (name >= data + len || *name == '\0') /* missing or empty name */ + return -EINVAL; + + args = memchr(name, '\0', data + len - name); + if (!args) /* non-zero-terminated name */ + return -EINVAL; + ++args; + if (args >= data + len) /* missing arguments spec */ + return -EINVAL; + + note->provider = provider; + note->name = name; + if (*args == '\0' || *args == ':') + note->args = ""; + else + note->args = args; + note->loc_addr = addrs[0]; + note->base_addr = addrs[1]; + note->sema_addr = addrs[2]; + + return 0; +} + +static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg); + +static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, __u64 usdt_cookie) +{ + const char *s; + int len; + + spec->usdt_cookie = usdt_cookie; + spec->arg_cnt = 0; + + s = note->args; + while (s[0]) { + if (spec->arg_cnt >= USDT_MAX_ARG_CNT) { + pr_warn("usdt: too many USDT arguments (> %d) for '%s:%s' with args spec '%s'\n", + USDT_MAX_ARG_CNT, note->provider, note->name, note->args); + return -E2BIG; + } + + len = parse_usdt_arg(s, spec->arg_cnt, &spec->args[spec->arg_cnt]); + if (len < 0) + return len; + + s += len; + spec->arg_cnt++; + } + + return 0; +} + +/* Architecture-specific logic for parsing USDT argument location specs */ + +#if defined(__x86_64__) || defined(__i386__) + +static int calc_pt_regs_off(const char *reg_name) +{ + static struct { + const char *names[4]; + size_t pt_regs_off; + } reg_map[] = { +#ifdef __x86_64__ +#define reg_off(reg64, reg32) offsetof(struct pt_regs, reg64) +#else +#define reg_off(reg64, reg32) offsetof(struct pt_regs, reg32) +#endif + { {"rip", "eip", "", ""}, reg_off(rip, eip) }, + { {"rax", "eax", "ax", "al"}, reg_off(rax, eax) }, + { {"rbx", "ebx", "bx", "bl"}, reg_off(rbx, ebx) }, + { {"rcx", "ecx", "cx", "cl"}, reg_off(rcx, ecx) }, + { {"rdx", "edx", "dx", "dl"}, reg_off(rdx, edx) }, + { {"rsi", "esi", "si", "sil"}, reg_off(rsi, esi) }, + { {"rdi", "edi", "di", "dil"}, reg_off(rdi, edi) }, + { {"rbp", "ebp", "bp", "bpl"}, reg_off(rbp, ebp) }, + { {"rsp", "esp", "sp", "spl"}, reg_off(rsp, esp) }, +#undef reg_off +#ifdef __x86_64__ + { {"r8", "r8d", "r8w", "r8b"}, offsetof(struct pt_regs, r8) }, + { {"r9", "r9d", "r9w", "r9b"}, offsetof(struct pt_regs, r9) }, + { {"r10", "r10d", "r10w", "r10b"}, offsetof(struct pt_regs, r10) }, + { {"r11", "r11d", "r11w", "r11b"}, offsetof(struct pt_regs, r11) }, + { {"r12", "r12d", "r12w", "r12b"}, offsetof(struct pt_regs, r12) }, + { {"r13", "r13d", "r13w", "r13b"}, offsetof(struct pt_regs, r13) }, + { {"r14", "r14d", "r14w", "r14b"}, offsetof(struct pt_regs, r14) }, + { {"r15", "r15d", "r15w", "r15b"}, offsetof(struct pt_regs, r15) }, +#endif + }; + int i, j; + + for (i = 0; i < ARRAY_SIZE(reg_map); i++) { + for (j = 0; j < ARRAY_SIZE(reg_map[i].names); j++) { + if (strcmp(reg_name, reg_map[i].names[j]) == 0) + return reg_map[i].pt_regs_off; + } + } + + pr_warn("usdt: unrecognized register '%s'\n", reg_name); + return -ENOENT; +} + +static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg) +{ + char *reg_name = NULL; + int arg_sz, len, reg_off; + long off; + + if (sscanf(arg_str, " %d @ %ld ( %%%m[^)] ) %n", &arg_sz, &off, ®_name, &len) == 3) { + /* Memory dereference case, e.g., -4@-20(%rbp) */ + arg->arg_type = USDT_ARG_REG_DEREF; + arg->val_off = off; + reg_off = calc_pt_regs_off(reg_name); + free(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; + } else if (sscanf(arg_str, " %d @ %%%ms %n", &arg_sz, ®_name, &len) == 2) { + /* Register read case, e.g., -4@%eax */ + arg->arg_type = USDT_ARG_REG; + arg->val_off = 0; + + reg_off = calc_pt_regs_off(reg_name); + free(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; + } else if (sscanf(arg_str, " %d @ $%ld %n", &arg_sz, &off, &len) == 2) { + /* Constant value case, e.g., 4@$71 */ + arg->arg_type = USDT_ARG_CONST; + arg->val_off = off; + arg->reg_off = 0; + } else { + pr_warn("usdt: unrecognized arg #%d spec '%s'\n", arg_num, arg_str); + return -EINVAL; + } + + arg->arg_signed = arg_sz < 0; + if (arg_sz < 0) + arg_sz = -arg_sz; + + switch (arg_sz) { + case 1: case 2: case 4: case 8: + arg->arg_bitshift = 64 - arg_sz * 8; + break; + default: + pr_warn("usdt: unsupported arg #%d (spec '%s') size: %d\n", + arg_num, arg_str, arg_sz); + return -EINVAL; + } + + return len; +} + +#elif defined(__s390x__) + +/* Do not support __s390__ for now, since user_pt_regs is broken with -m31. */ + +static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg) +{ + unsigned int reg; + int arg_sz, len; + long off; + + if (sscanf(arg_str, " %d @ %ld ( %%r%u ) %n", &arg_sz, &off, ®, &len) == 3) { + /* Memory dereference case, e.g., -2@-28(%r15) */ + arg->arg_type = USDT_ARG_REG_DEREF; + arg->val_off = off; + if (reg > 15) { + pr_warn("usdt: unrecognized register '%%r%u'\n", reg); + return -EINVAL; + } + arg->reg_off = offsetof(user_pt_regs, gprs[reg]); + } else if (sscanf(arg_str, " %d @ %%r%u %n", &arg_sz, ®, &len) == 2) { + /* Register read case, e.g., -8@%r0 */ + arg->arg_type = USDT_ARG_REG; + arg->val_off = 0; + if (reg > 15) { + pr_warn("usdt: unrecognized register '%%r%u'\n", reg); + return -EINVAL; + } + arg->reg_off = offsetof(user_pt_regs, gprs[reg]); + } else if (sscanf(arg_str, " %d @ %ld %n", &arg_sz, &off, &len) == 2) { + /* Constant value case, e.g., 4@71 */ + arg->arg_type = USDT_ARG_CONST; + arg->val_off = off; + arg->reg_off = 0; + } else { + pr_warn("usdt: unrecognized arg #%d spec '%s'\n", arg_num, arg_str); + return -EINVAL; + } + + arg->arg_signed = arg_sz < 0; + if (arg_sz < 0) + arg_sz = -arg_sz; + + switch (arg_sz) { + case 1: case 2: case 4: case 8: + arg->arg_bitshift = 64 - arg_sz * 8; + break; + default: + pr_warn("usdt: unsupported arg #%d (spec '%s') size: %d\n", + arg_num, arg_str, arg_sz); + return -EINVAL; + } + + return len; +} + +#elif defined(__aarch64__) + +static int calc_pt_regs_off(const char *reg_name) +{ + int reg_num; + + if (sscanf(reg_name, "x%d", ®_num) == 1) { + if (reg_num >= 0 && reg_num < 31) + return offsetof(struct user_pt_regs, regs[reg_num]); + } else if (strcmp(reg_name, "sp") == 0) { + return offsetof(struct user_pt_regs, sp); + } + pr_warn("usdt: unrecognized register '%s'\n", reg_name); + return -ENOENT; +} + +static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg) +{ + char *reg_name = NULL; + int arg_sz, len, reg_off; + long off; + + if (sscanf(arg_str, " %d @ \[ %m[a-z0-9], %ld ] %n", &arg_sz, ®_name, &off, &len) == 3) { + /* Memory dereference case, e.g., -4@[sp, 96] */ + arg->arg_type = USDT_ARG_REG_DEREF; + arg->val_off = off; + reg_off = calc_pt_regs_off(reg_name); + free(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; + } else if (sscanf(arg_str, " %d @ \[ %m[a-z0-9] ] %n", &arg_sz, ®_name, &len) == 2) { + /* Memory dereference case, e.g., -4@[sp] */ + arg->arg_type = USDT_ARG_REG_DEREF; + arg->val_off = 0; + reg_off = calc_pt_regs_off(reg_name); + free(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; + } else if (sscanf(arg_str, " %d @ %ld %n", &arg_sz, &off, &len) == 2) { + /* Constant value case, e.g., 4@5 */ + arg->arg_type = USDT_ARG_CONST; + arg->val_off = off; + arg->reg_off = 0; + } else if (sscanf(arg_str, " %d @ %m[a-z0-9] %n", &arg_sz, ®_name, &len) == 2) { + /* Register read case, e.g., -8@x4 */ + arg->arg_type = USDT_ARG_REG; + arg->val_off = 0; + reg_off = calc_pt_regs_off(reg_name); + free(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; + } else { + pr_warn("usdt: unrecognized arg #%d spec '%s'\n", arg_num, arg_str); + return -EINVAL; + } + + arg->arg_signed = arg_sz < 0; + if (arg_sz < 0) + arg_sz = -arg_sz; + + switch (arg_sz) { + case 1: case 2: case 4: case 8: + arg->arg_bitshift = 64 - arg_sz * 8; + break; + default: + pr_warn("usdt: unsupported arg #%d (spec '%s') size: %d\n", + arg_num, arg_str, arg_sz); + return -EINVAL; + } + + return len; +} + +#elif defined(__riscv) + +static int calc_pt_regs_off(const char *reg_name) +{ + static struct { + const char *name; + size_t pt_regs_off; + } reg_map[] = { + { "ra", offsetof(struct user_regs_struct, ra) }, + { "sp", offsetof(struct user_regs_struct, sp) }, + { "gp", offsetof(struct user_regs_struct, gp) }, + { "tp", offsetof(struct user_regs_struct, tp) }, + { "a0", offsetof(struct user_regs_struct, a0) }, + { "a1", offsetof(struct user_regs_struct, a1) }, + { "a2", offsetof(struct user_regs_struct, a2) }, + { "a3", offsetof(struct user_regs_struct, a3) }, + { "a4", offsetof(struct user_regs_struct, a4) }, + { "a5", offsetof(struct user_regs_struct, a5) }, + { "a6", offsetof(struct user_regs_struct, a6) }, + { "a7", offsetof(struct user_regs_struct, a7) }, + { "s0", offsetof(struct user_regs_struct, s0) }, + { "s1", offsetof(struct user_regs_struct, s1) }, + { "s2", offsetof(struct user_regs_struct, s2) }, + { "s3", offsetof(struct user_regs_struct, s3) }, + { "s4", offsetof(struct user_regs_struct, s4) }, + { "s5", offsetof(struct user_regs_struct, s5) }, + { "s6", offsetof(struct user_regs_struct, s6) }, + { "s7", offsetof(struct user_regs_struct, s7) }, + { "s8", offsetof(struct user_regs_struct, rv_s8) }, + { "s9", offsetof(struct user_regs_struct, s9) }, + { "s10", offsetof(struct user_regs_struct, s10) }, + { "s11", offsetof(struct user_regs_struct, s11) }, + { "t0", offsetof(struct user_regs_struct, t0) }, + { "t1", offsetof(struct user_regs_struct, t1) }, + { "t2", offsetof(struct user_regs_struct, t2) }, + { "t3", offsetof(struct user_regs_struct, t3) }, + { "t4", offsetof(struct user_regs_struct, t4) }, + { "t5", offsetof(struct user_regs_struct, t5) }, + { "t6", offsetof(struct user_regs_struct, t6) }, + }; + int i; + + for (i = 0; i < ARRAY_SIZE(reg_map); i++) { + if (strcmp(reg_name, reg_map[i].name) == 0) + return reg_map[i].pt_regs_off; + } + + pr_warn("usdt: unrecognized register '%s'\n", reg_name); + return -ENOENT; +} + +static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg) +{ + char *reg_name = NULL; + int arg_sz, len, reg_off; + long off; + + if (sscanf(arg_str, " %d @ %ld ( %m[a-z0-9] ) %n", &arg_sz, &off, ®_name, &len) == 3) { + /* Memory dereference case, e.g., -8@-88(s0) */ + arg->arg_type = USDT_ARG_REG_DEREF; + arg->val_off = off; + reg_off = calc_pt_regs_off(reg_name); + free(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; + } else if (sscanf(arg_str, " %d @ %ld %n", &arg_sz, &off, &len) == 2) { + /* Constant value case, e.g., 4@5 */ + arg->arg_type = USDT_ARG_CONST; + arg->val_off = off; + arg->reg_off = 0; + } else if (sscanf(arg_str, " %d @ %m[a-z0-9] %n", &arg_sz, ®_name, &len) == 2) { + /* Register read case, e.g., -8@a1 */ + arg->arg_type = USDT_ARG_REG; + arg->val_off = 0; + reg_off = calc_pt_regs_off(reg_name); + free(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; + } else { + pr_warn("usdt: unrecognized arg #%d spec '%s'\n", arg_num, arg_str); + return -EINVAL; + } + + arg->arg_signed = arg_sz < 0; + if (arg_sz < 0) + arg_sz = -arg_sz; + + switch (arg_sz) { + case 1: case 2: case 4: case 8: + arg->arg_bitshift = 64 - arg_sz * 8; + break; + default: + pr_warn("usdt: unsupported arg #%d (spec '%s') size: %d\n", + arg_num, arg_str, arg_sz); + return -EINVAL; + } + + return len; +} + +#else + +static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg) +{ + pr_warn("usdt: libbpf doesn't support USDTs on current architecture\n"); + return -ENOTSUP; +} + +#endif diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index 1b15ba13c477..a09315538a30 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -577,7 +577,6 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist, { struct perf_evsel *evsel; const struct perf_cpu_map *cpus = evlist->user_requested_cpus; - const struct perf_thread_map *threads = evlist->threads; if (!ops || !ops->get || !ops->mmap) return -EINVAL; @@ -589,7 +588,7 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist, perf_evlist__for_each_entry(evlist, evsel) { if ((evsel->attr.read_format & PERF_FORMAT_ID) && evsel->sample_id == NULL && - perf_evsel__alloc_id(evsel, perf_cpu_map__nr(cpus), threads->nr) < 0) + perf_evsel__alloc_id(evsel, evsel->fd->max_x, evsel->fd->max_y) < 0) return -ENOMEM; } diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 6de5085e3e5a..bd0c2c828940 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1155,6 +1155,17 @@ static void annotate_call_site(struct objtool_file *file, : arch_nop_insn(insn->len)); insn->type = sibling ? INSN_RETURN : INSN_NOP; + + if (sibling) { + /* + * We've replaced the tail-call JMP insn by two new + * insn: RET; INT3, except we only have a single struct + * insn here. Mark it retpoline_safe to avoid the SLS + * warning, instead of adding another insn. + */ + insn->retpoline_safe = true; + } + return; } @@ -1239,11 +1250,20 @@ static bool same_function(struct instruction *insn1, struct instruction *insn2) return insn1->func->pfunc == insn2->func->pfunc; } -static bool is_first_func_insn(struct instruction *insn) +static bool is_first_func_insn(struct objtool_file *file, struct instruction *insn) { - return insn->offset == insn->func->offset || - (insn->type == INSN_ENDBR && - insn->offset == insn->func->offset + insn->len); + if (insn->offset == insn->func->offset) + return true; + + if (ibt) { + struct instruction *prev = prev_insn_same_sym(file, insn); + + if (prev && prev->type == INSN_ENDBR && + insn->offset == insn->func->offset + prev->len) + return true; + } + + return false; } /* @@ -1327,7 +1347,7 @@ static int add_jump_destinations(struct objtool_file *file) insn->jump_dest->func->pfunc = insn->func; } else if (!same_function(insn, insn->jump_dest) && - is_first_func_insn(insn->jump_dest)) { + is_first_func_insn(file, insn->jump_dest)) { /* internal sibling call (without reloc) */ add_call_dest(file, insn, insn->jump_dest->func, true); } diff --git a/tools/perf/Documentation/perf.txt b/tools/perf/Documentation/perf.txt index 9c330cdfa973..71ebdf8125de 100644 --- a/tools/perf/Documentation/perf.txt +++ b/tools/perf/Documentation/perf.txt @@ -83,7 +83,7 @@ linkperf:perf-buildid-list[1], linkperf:perf-c2c[1], linkperf:perf-config[1], linkperf:perf-data[1], linkperf:perf-diff[1], linkperf:perf-evlist[1], linkperf:perf-ftrace[1], linkperf:perf-help[1], linkperf:perf-inject[1], -linkperf:perf-intel-pt[1], linkperf:perf-kallsyms[1], +linkperf:perf-intel-pt[1], linkperf:perf-iostat[1], linkperf:perf-kallsyms[1], linkperf:perf-kmem[1], linkperf:perf-kvm[1], linkperf:perf-lock[1], linkperf:perf-mem[1], linkperf:perf-probe[1], linkperf:perf-sched[1], linkperf:perf-script[1], linkperf:perf-test[1], diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 96ad944ca6a8..f3bf9297bcc0 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -272,6 +272,9 @@ ifdef PYTHON_CONFIG PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --includes 2>/dev/null) FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS) + ifeq ($(CC_NO_CLANG), 0) + PYTHON_EMBED_CCOPTS := $(filter-out -ffat-lto-objects, $(PYTHON_EMBED_CCOPTS)) + endif endif FEATURE_CHECK_CFLAGS-libpython := $(PYTHON_EMBED_CCOPTS) @@ -790,6 +793,9 @@ else LDFLAGS += $(PERL_EMBED_LDFLAGS) EXTLIBS += $(PERL_EMBED_LIBADD) CFLAGS += -DHAVE_LIBPERL_SUPPORT + ifeq ($(CC_NO_CLANG), 0) + CFLAGS += -Wno-compound-token-split-by-macro + endif $(call detected,CONFIG_LIBPERL) endif endif diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index 86e2e926aa0e..af4d63af8072 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -239,6 +239,12 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, arm_spe_set_timestamp(itr, arm_spe_evsel); } + /* + * Set this only so that perf report knows that SPE generates memory info. It has no effect + * on the opening of the event or the SPE data produced. + */ + evsel__set_sample_bit(arm_spe_evsel, DATA_SRC); + /* Add dummy event to keep tracking */ err = parse_events(evlist, "dummy:u", NULL); if (err) diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c index 134612bde0cb..4256dc5d6236 100644 --- a/tools/perf/bench/epoll-ctl.c +++ b/tools/perf/bench/epoll-ctl.c @@ -222,13 +222,20 @@ static void init_fdmaps(struct worker *w, int pct) static int do_threads(struct worker *worker, struct perf_cpu_map *cpu) { pthread_attr_t thread_attr, *attrp = NULL; - cpu_set_t cpuset; + cpu_set_t *cpuset; unsigned int i, j; int ret = 0; + int nrcpus; + size_t size; if (!noaffinity) pthread_attr_init(&thread_attr); + nrcpus = perf_cpu_map__nr(cpu); + cpuset = CPU_ALLOC(nrcpus); + BUG_ON(!cpuset); + size = CPU_ALLOC_SIZE(nrcpus); + for (i = 0; i < nthreads; i++) { struct worker *w = &worker[i]; @@ -252,22 +259,28 @@ static int do_threads(struct worker *worker, struct perf_cpu_map *cpu) init_fdmaps(w, 50); if (!noaffinity) { - CPU_ZERO(&cpuset); - CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); + CPU_ZERO_S(size, cpuset); + CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, + size, cpuset); - ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset); - if (ret) + ret = pthread_attr_setaffinity_np(&thread_attr, size, cpuset); + if (ret) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); + } attrp = &thread_attr; } ret = pthread_create(&w->thread, attrp, workerfn, (void *)(struct worker *) w); - if (ret) + if (ret) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_create"); + } } + CPU_FREE(cpuset); if (!noaffinity) pthread_attr_destroy(&thread_attr); diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c index 37de970c9743..2728b0140853 100644 --- a/tools/perf/bench/epoll-wait.c +++ b/tools/perf/bench/epoll-wait.c @@ -291,9 +291,11 @@ static void print_summary(void) static int do_threads(struct worker *worker, struct perf_cpu_map *cpu) { pthread_attr_t thread_attr, *attrp = NULL; - cpu_set_t cpuset; + cpu_set_t *cpuset; unsigned int i, j; int ret = 0, events = EPOLLIN; + int nrcpus; + size_t size; if (oneshot) events |= EPOLLONESHOT; @@ -306,6 +308,11 @@ static int do_threads(struct worker *worker, struct perf_cpu_map *cpu) if (!noaffinity) pthread_attr_init(&thread_attr); + nrcpus = perf_cpu_map__nr(cpu); + cpuset = CPU_ALLOC(nrcpus); + BUG_ON(!cpuset); + size = CPU_ALLOC_SIZE(nrcpus); + for (i = 0; i < nthreads; i++) { struct worker *w = &worker[i]; @@ -341,22 +348,28 @@ static int do_threads(struct worker *worker, struct perf_cpu_map *cpu) } if (!noaffinity) { - CPU_ZERO(&cpuset); - CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); + CPU_ZERO_S(size, cpuset); + CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, + size, cpuset); - ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset); - if (ret) + ret = pthread_attr_setaffinity_np(&thread_attr, size, cpuset); + if (ret) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); + } attrp = &thread_attr; } ret = pthread_create(&w->thread, attrp, workerfn, (void *)(struct worker *) w); - if (ret) + if (ret) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_create"); + } } + CPU_FREE(cpuset); if (!noaffinity) pthread_attr_destroy(&thread_attr); diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index dbcecec4eeda..f05db4cf983d 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -122,12 +122,14 @@ static void print_summary(void) int bench_futex_hash(int argc, const char **argv) { int ret = 0; - cpu_set_t cpuset; + cpu_set_t *cpuset; struct sigaction act; unsigned int i; pthread_attr_t thread_attr; struct worker *worker = NULL; struct perf_cpu_map *cpu; + int nrcpus; + size_t size; argc = parse_options(argc, argv, options, bench_futex_hash_usage, 0); if (argc) { @@ -170,25 +172,35 @@ int bench_futex_hash(int argc, const char **argv) threads_starting = params.nthreads; pthread_attr_init(&thread_attr); gettimeofday(&bench__start, NULL); + + nrcpus = perf_cpu_map__nr(cpu); + cpuset = CPU_ALLOC(nrcpus); + BUG_ON(!cpuset); + size = CPU_ALLOC_SIZE(nrcpus); + for (i = 0; i < params.nthreads; i++) { worker[i].tid = i; worker[i].futex = calloc(params.nfutexes, sizeof(*worker[i].futex)); if (!worker[i].futex) goto errmem; - CPU_ZERO(&cpuset); - CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); + CPU_ZERO_S(size, cpuset); - ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset); - if (ret) + CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset); + ret = pthread_attr_setaffinity_np(&thread_attr, size, cpuset); + if (ret) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); - + } ret = pthread_create(&worker[i].thread, &thread_attr, workerfn, (void *)(struct worker *) &worker[i]); - if (ret) + if (ret) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_create"); + } } + CPU_FREE(cpuset); pthread_attr_destroy(&thread_attr); pthread_mutex_lock(&thread_lock); diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index 6fc9a3d55c1f..0abb3f7ee24f 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -120,11 +120,17 @@ static void *workerfn(void *arg) static void create_threads(struct worker *w, pthread_attr_t thread_attr, struct perf_cpu_map *cpu) { - cpu_set_t cpuset; + cpu_set_t *cpuset; unsigned int i; + int nrcpus = perf_cpu_map__nr(cpu); + size_t size; threads_starting = params.nthreads; + cpuset = CPU_ALLOC(nrcpus); + BUG_ON(!cpuset); + size = CPU_ALLOC_SIZE(nrcpus); + for (i = 0; i < params.nthreads; i++) { worker[i].tid = i; @@ -135,15 +141,20 @@ static void create_threads(struct worker *w, pthread_attr_t thread_attr, } else worker[i].futex = &global_futex; - CPU_ZERO(&cpuset); - CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); + CPU_ZERO_S(size, cpuset); + CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset); - if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset)) + if (pthread_attr_setaffinity_np(&thread_attr, size, cpuset)) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); + } - if (pthread_create(&w[i].thread, &thread_attr, workerfn, &worker[i])) + if (pthread_create(&w[i].thread, &thread_attr, workerfn, &worker[i])) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_create"); + } } + CPU_FREE(cpuset); } int bench_futex_lock_pi(int argc, const char **argv) diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c index 2f59d5d1c509..b6faabfafb8e 100644 --- a/tools/perf/bench/futex-requeue.c +++ b/tools/perf/bench/futex-requeue.c @@ -123,22 +123,33 @@ static void *workerfn(void *arg __maybe_unused) static void block_threads(pthread_t *w, pthread_attr_t thread_attr, struct perf_cpu_map *cpu) { - cpu_set_t cpuset; + cpu_set_t *cpuset; unsigned int i; + int nrcpus = perf_cpu_map__nr(cpu); + size_t size; threads_starting = params.nthreads; + cpuset = CPU_ALLOC(nrcpus); + BUG_ON(!cpuset); + size = CPU_ALLOC_SIZE(nrcpus); + /* create and block all threads */ for (i = 0; i < params.nthreads; i++) { - CPU_ZERO(&cpuset); - CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); + CPU_ZERO_S(size, cpuset); + CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset); - if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset)) + if (pthread_attr_setaffinity_np(&thread_attr, size, cpuset)) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); + } - if (pthread_create(&w[i], &thread_attr, workerfn, NULL)) + if (pthread_create(&w[i], &thread_attr, workerfn, NULL)) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_create"); + } } + CPU_FREE(cpuset); } static void toggle_done(int sig __maybe_unused, diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c index 861deb934745..e47f46a3a47e 100644 --- a/tools/perf/bench/futex-wake-parallel.c +++ b/tools/perf/bench/futex-wake-parallel.c @@ -144,22 +144,33 @@ static void *blocked_workerfn(void *arg __maybe_unused) static void block_threads(pthread_t *w, pthread_attr_t thread_attr, struct perf_cpu_map *cpu) { - cpu_set_t cpuset; + cpu_set_t *cpuset; unsigned int i; + int nrcpus = perf_cpu_map__nr(cpu); + size_t size; threads_starting = params.nthreads; + cpuset = CPU_ALLOC(nrcpus); + BUG_ON(!cpuset); + size = CPU_ALLOC_SIZE(nrcpus); + /* create and block all threads */ for (i = 0; i < params.nthreads; i++) { - CPU_ZERO(&cpuset); - CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); + CPU_ZERO_S(size, cpuset); + CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset); - if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset)) + if (pthread_attr_setaffinity_np(&thread_attr, size, cpuset)) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); + } - if (pthread_create(&w[i], &thread_attr, blocked_workerfn, NULL)) + if (pthread_create(&w[i], &thread_attr, blocked_workerfn, NULL)) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_create"); + } } + CPU_FREE(cpuset); } static void print_run(struct thread_data *waking_worker, unsigned int run_num) diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c index cfda48bef1d7..201a3555f09a 100644 --- a/tools/perf/bench/futex-wake.c +++ b/tools/perf/bench/futex-wake.c @@ -97,22 +97,32 @@ static void print_summary(void) static void block_threads(pthread_t *w, pthread_attr_t thread_attr, struct perf_cpu_map *cpu) { - cpu_set_t cpuset; + cpu_set_t *cpuset; unsigned int i; - + size_t size; + int nrcpus = perf_cpu_map__nr(cpu); threads_starting = params.nthreads; + cpuset = CPU_ALLOC(nrcpus); + BUG_ON(!cpuset); + size = CPU_ALLOC_SIZE(nrcpus); + /* create and block all threads */ for (i = 0; i < params.nthreads; i++) { - CPU_ZERO(&cpuset); - CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); + CPU_ZERO_S(size, cpuset); + CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset); - if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset)) + if (pthread_attr_setaffinity_np(&thread_attr, size, cpuset)) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); + } - if (pthread_create(&w[i], &thread_attr, workerfn, NULL)) + if (pthread_create(&w[i], &thread_attr, workerfn, NULL)) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_create"); + } } + CPU_FREE(cpuset); } static void toggle_done(int sig __maybe_unused, diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index f2640179ada9..44e1f8a44087 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -34,6 +34,7 @@ #include <linux/numa.h> #include <linux/zalloc.h> +#include "../util/header.h" #include <numa.h> #include <numaif.h> @@ -54,7 +55,7 @@ struct thread_data { int curr_cpu; - cpu_set_t bind_cpumask; + cpu_set_t *bind_cpumask; int bind_node; u8 *process_data; int process_nr; @@ -266,71 +267,115 @@ static bool node_has_cpus(int node) return ret; } -static cpu_set_t bind_to_cpu(int target_cpu) +static cpu_set_t *bind_to_cpu(int target_cpu) { - cpu_set_t orig_mask, mask; - int ret; + int nrcpus = numa_num_possible_cpus(); + cpu_set_t *orig_mask, *mask; + size_t size; - ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask); - BUG_ON(ret); + orig_mask = CPU_ALLOC(nrcpus); + BUG_ON(!orig_mask); + size = CPU_ALLOC_SIZE(nrcpus); + CPU_ZERO_S(size, orig_mask); + + if (sched_getaffinity(0, size, orig_mask)) + goto err_out; + + mask = CPU_ALLOC(nrcpus); + if (!mask) + goto err_out; - CPU_ZERO(&mask); + CPU_ZERO_S(size, mask); if (target_cpu == -1) { int cpu; for (cpu = 0; cpu < g->p.nr_cpus; cpu++) - CPU_SET(cpu, &mask); + CPU_SET_S(cpu, size, mask); } else { - BUG_ON(target_cpu < 0 || target_cpu >= g->p.nr_cpus); - CPU_SET(target_cpu, &mask); + if (target_cpu < 0 || target_cpu >= g->p.nr_cpus) + goto err; + + CPU_SET_S(target_cpu, size, mask); } - ret = sched_setaffinity(0, sizeof(mask), &mask); - BUG_ON(ret); + if (sched_setaffinity(0, size, mask)) + goto err; return orig_mask; + +err: + CPU_FREE(mask); +err_out: + CPU_FREE(orig_mask); + + /* BUG_ON due to failure in allocation of orig_mask/mask */ + BUG_ON(-1); } -static cpu_set_t bind_to_node(int target_node) +static cpu_set_t *bind_to_node(int target_node) { - cpu_set_t orig_mask, mask; + int nrcpus = numa_num_possible_cpus(); + size_t size; + cpu_set_t *orig_mask, *mask; int cpu; - int ret; - ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask); - BUG_ON(ret); + orig_mask = CPU_ALLOC(nrcpus); + BUG_ON(!orig_mask); + size = CPU_ALLOC_SIZE(nrcpus); + CPU_ZERO_S(size, orig_mask); - CPU_ZERO(&mask); + if (sched_getaffinity(0, size, orig_mask)) + goto err_out; + + mask = CPU_ALLOC(nrcpus); + if (!mask) + goto err_out; + + CPU_ZERO_S(size, mask); if (target_node == NUMA_NO_NODE) { for (cpu = 0; cpu < g->p.nr_cpus; cpu++) - CPU_SET(cpu, &mask); + CPU_SET_S(cpu, size, mask); } else { struct bitmask *cpumask = numa_allocate_cpumask(); - BUG_ON(!cpumask); + if (!cpumask) + goto err; + if (!numa_node_to_cpus(target_node, cpumask)) { for (cpu = 0; cpu < (int)cpumask->size; cpu++) { if (numa_bitmask_isbitset(cpumask, cpu)) - CPU_SET(cpu, &mask); + CPU_SET_S(cpu, size, mask); } } numa_free_cpumask(cpumask); } - ret = sched_setaffinity(0, sizeof(mask), &mask); - BUG_ON(ret); + if (sched_setaffinity(0, size, mask)) + goto err; return orig_mask; + +err: + CPU_FREE(mask); +err_out: + CPU_FREE(orig_mask); + + /* BUG_ON due to failure in allocation of orig_mask/mask */ + BUG_ON(-1); } -static void bind_to_cpumask(cpu_set_t mask) +static void bind_to_cpumask(cpu_set_t *mask) { int ret; + size_t size = CPU_ALLOC_SIZE(numa_num_possible_cpus()); - ret = sched_setaffinity(0, sizeof(mask), &mask); - BUG_ON(ret); + ret = sched_setaffinity(0, size, mask); + if (ret) { + CPU_FREE(mask); + BUG_ON(ret); + } } static void mempol_restore(void) @@ -376,7 +421,7 @@ do { \ static u8 *alloc_data(ssize_t bytes0, int map_flags, int init_zero, int init_cpu0, int thp, int init_random) { - cpu_set_t orig_mask; + cpu_set_t *orig_mask = NULL; ssize_t bytes; u8 *buf; int ret; @@ -434,6 +479,7 @@ static u8 *alloc_data(ssize_t bytes0, int map_flags, /* Restore affinity: */ if (init_cpu0) { bind_to_cpumask(orig_mask); + CPU_FREE(orig_mask); mempol_restore(); } @@ -585,10 +631,16 @@ static int parse_setup_cpu_list(void) return -1; } + if (is_cpu_online(bind_cpu_0) != 1 || is_cpu_online(bind_cpu_1) != 1) { + printf("\nTest not applicable, bind_cpu_0 or bind_cpu_1 is offline\n"); + return -1; + } + BUG_ON(bind_cpu_0 < 0 || bind_cpu_1 < 0); BUG_ON(bind_cpu_0 > bind_cpu_1); for (bind_cpu = bind_cpu_0; bind_cpu <= bind_cpu_1; bind_cpu += step) { + size_t size = CPU_ALLOC_SIZE(g->p.nr_cpus); int i; for (i = 0; i < mul; i++) { @@ -608,10 +660,15 @@ static int parse_setup_cpu_list(void) tprintf("%2d", bind_cpu); } - CPU_ZERO(&td->bind_cpumask); + td->bind_cpumask = CPU_ALLOC(g->p.nr_cpus); + BUG_ON(!td->bind_cpumask); + CPU_ZERO_S(size, td->bind_cpumask); for (cpu = bind_cpu; cpu < bind_cpu+bind_len; cpu++) { - BUG_ON(cpu < 0 || cpu >= g->p.nr_cpus); - CPU_SET(cpu, &td->bind_cpumask); + if (cpu < 0 || cpu >= g->p.nr_cpus) { + CPU_FREE(td->bind_cpumask); + BUG_ON(-1); + } + CPU_SET_S(cpu, size, td->bind_cpumask); } t++; } @@ -752,8 +809,6 @@ static int parse_nodes_opt(const struct option *opt __maybe_unused, return parse_node_list(arg); } -#define BIT(x) (1ul << x) - static inline uint32_t lfsr_32(uint32_t lfsr) { const uint32_t taps = BIT(1) | BIT(5) | BIT(6) | BIT(31); @@ -1241,7 +1296,7 @@ static void *worker_thread(void *__tdata) * by migrating to CPU#0: */ if (first_task && g->p.perturb_secs && (int)(stop.tv_sec - last_perturbance) >= g->p.perturb_secs) { - cpu_set_t orig_mask; + cpu_set_t *orig_mask; int target_cpu; int this_cpu; @@ -1265,6 +1320,7 @@ static void *worker_thread(void *__tdata) printf(" (injecting perturbalance, moved to CPU#%d)\n", target_cpu); bind_to_cpumask(orig_mask); + CPU_FREE(orig_mask); } if (details >= 3) { @@ -1398,21 +1454,31 @@ static void init_thread_data(void) for (t = 0; t < g->p.nr_tasks; t++) { struct thread_data *td = g->threads + t; + size_t cpuset_size = CPU_ALLOC_SIZE(g->p.nr_cpus); int cpu; /* Allow all nodes by default: */ td->bind_node = NUMA_NO_NODE; /* Allow all CPUs by default: */ - CPU_ZERO(&td->bind_cpumask); + td->bind_cpumask = CPU_ALLOC(g->p.nr_cpus); + BUG_ON(!td->bind_cpumask); + CPU_ZERO_S(cpuset_size, td->bind_cpumask); for (cpu = 0; cpu < g->p.nr_cpus; cpu++) - CPU_SET(cpu, &td->bind_cpumask); + CPU_SET_S(cpu, cpuset_size, td->bind_cpumask); } } static void deinit_thread_data(void) { ssize_t size = sizeof(*g->threads)*g->p.nr_tasks; + int t; + + /* Free the bind_cpumask allocated for thread_data */ + for (t = 0; t < g->p.nr_tasks; t++) { + struct thread_data *td = g->threads + t; + CPU_FREE(td->bind_cpumask); + } free_data(g->threads, size); } diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index ba74fab02e62..069825c48d40 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -989,8 +989,11 @@ static int record__thread_data_init_maps(struct record_thread *thread_data, stru struct mmap *overwrite_mmap = evlist->overwrite_mmap; struct perf_cpu_map *cpus = evlist->core.user_requested_cpus; - thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits, - thread_data->mask->maps.nbits); + if (cpu_map__is_dummy(cpus)) + thread_data->nr_mmaps = nr_mmaps; + else + thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits, + thread_data->mask->maps.nbits); if (mmap) { thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *)); if (!thread_data->maps) @@ -1007,16 +1010,17 @@ static int record__thread_data_init_maps(struct record_thread *thread_data, stru thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps); for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) { - if (test_bit(cpus->map[m].cpu, thread_data->mask->maps.bits)) { + if (cpu_map__is_dummy(cpus) || + test_bit(cpus->map[m].cpu, thread_data->mask->maps.bits)) { if (thread_data->maps) { thread_data->maps[tm] = &mmap[m]; pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n", - thread_data, cpus->map[m].cpu, tm, m); + thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m); } if (thread_data->overwrite_maps) { thread_data->overwrite_maps[tm] = &overwrite_mmap[m]; pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n", - thread_data, cpus->map[m].cpu, tm, m); + thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m); } tm++; } @@ -3329,6 +3333,9 @@ static void record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_c { int c; + if (cpu_map__is_dummy(cpus)) + return; + for (c = 0; c < cpus->nr; c++) set_bit(cpus->map[c].cpu, mask->bits); } @@ -3680,6 +3687,11 @@ static int record__init_thread_masks(struct record *rec) if (!record__threads_enabled(rec)) return record__init_thread_default_masks(rec, cpus); + if (cpu_map__is_dummy(cpus)) { + pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n"); + return -EINVAL; + } + switch (rec->opts.threads_spec) { case THREAD_SPEC__CPU: ret = record__init_thread_cpu_masks(rec, cpus); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 1ad75c7ba074..afe4a5539ecc 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -353,6 +353,7 @@ static int report__setup_sample_type(struct report *rep) struct perf_session *session = rep->session; u64 sample_type = evlist__combined_sample_type(session->evlist); bool is_pipe = perf_data__is_pipe(session->data); + struct evsel *evsel; if (session->itrace_synth_opts->callchain || session->itrace_synth_opts->add_callchain || @@ -407,6 +408,19 @@ static int report__setup_sample_type(struct report *rep) } if (sort__mode == SORT_MODE__MEMORY) { + /* + * FIXUP: prior to kernel 5.18, Arm SPE missed to set + * PERF_SAMPLE_DATA_SRC bit in sample type. For backward + * compatibility, set the bit if it's an old perf data file. + */ + evlist__for_each_entry(session->evlist, evsel) { + if (strstr(evsel->name, "arm_spe") && + !(sample_type & PERF_SAMPLE_DATA_SRC)) { + evsel->core.attr.sample_type |= PERF_SAMPLE_DATA_SRC; + sample_type |= PERF_SAMPLE_DATA_SRC; + } + } + if (!is_pipe && !(sample_type & PERF_SAMPLE_DATA_SRC)) { ui__error("Selected --mem-mode but no mem data. " "Did you call perf record without -d?\n"); diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index a2f117936188..cf5eab5431b4 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -461,7 +461,7 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session) return -EINVAL; if (PRINT_FIELD(DATA_SRC) && - evsel__check_stype(evsel, PERF_SAMPLE_DATA_SRC, "DATA_SRC", PERF_OUTPUT_DATA_SRC)) + evsel__do_check_stype(evsel, PERF_SAMPLE_DATA_SRC, "DATA_SRC", PERF_OUTPUT_DATA_SRC, allow_user_set)) return -EINVAL; if (PRINT_FIELD(WEIGHT) && diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 2f6b67189b42..0170cb0819d6 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -55,6 +55,7 @@ struct cmd_struct { }; static struct cmd_struct commands[] = { + { "archive", NULL, 0 }, { "buildid-cache", cmd_buildid_cache, 0 }, { "buildid-list", cmd_buildid_list, 0 }, { "config", cmd_config, 0 }, @@ -62,6 +63,7 @@ static struct cmd_struct commands[] = { { "diff", cmd_diff, 0 }, { "evlist", cmd_evlist, 0 }, { "help", cmd_help, 0 }, + { "iostat", NULL, 0 }, { "kallsyms", cmd_kallsyms, 0 }, { "list", cmd_list, 0 }, { "record", cmd_record, 0 }, @@ -360,6 +362,8 @@ static void handle_internal_command(int argc, const char **argv) for (i = 0; i < ARRAY_SIZE(commands); i++) { struct cmd_struct *p = commands+i; + if (p->fn == NULL) + continue; if (strcmp(p->cmd, cmd)) continue; exit(run_builtin(p, argc, argv)); @@ -434,7 +438,7 @@ void pthread__unblock_sigwinch(void) static int libperf_print(enum libperf_print_level level, const char *fmt, va_list ap) { - return eprintf(level, verbose, fmt, ap); + return veprintf(level, verbose, fmt, ap); } int main(int argc, const char **argv) diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index 2dab2d262060..afdca7f2959f 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c @@ -122,7 +122,7 @@ NO_TAIL_CALL_ATTRIBUTE noinline int test_dwarf_unwind__thread(struct thread *thr } err = unwind__get_entries(unwind_entry, &cnt, thread, - &sample, MAX_STACK); + &sample, MAX_STACK, false); if (err) pr_debug("unwind failed\n"); else if (cnt != MAX_STACK) { diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c index d12d0ad81801..4ad0dfbc8b21 100644 --- a/tools/perf/tests/perf-time-to-tsc.c +++ b/tools/perf/tests/perf-time-to-tsc.c @@ -47,6 +47,17 @@ } \ } +static int test__tsc_is_supported(struct test_suite *test __maybe_unused, + int subtest __maybe_unused) +{ + if (!TSC_IS_SUPPORTED) { + pr_debug("Test not supported on this architecture\n"); + return TEST_SKIP; + } + + return TEST_OK; +} + /** * test__perf_time_to_tsc - test converting perf time to TSC. * @@ -70,7 +81,7 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su struct perf_cpu_map *cpus = NULL; struct evlist *evlist = NULL; struct evsel *evsel = NULL; - int err = -1, ret, i; + int err = TEST_FAIL, ret, i; const char *comm1, *comm2; struct perf_tsc_conversion tc; struct perf_event_mmap_page *pc; @@ -79,10 +90,6 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su u64 test_time, comm1_time = 0, comm2_time = 0; struct mmap *md; - if (!TSC_IS_SUPPORTED) { - pr_debug("Test not supported on this architecture"); - return TEST_SKIP; - } threads = thread_map__new(-1, getpid(), UINT_MAX); CHECK_NOT_NULL__(threads); @@ -116,6 +123,10 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su evsel->core.attr.enable_on_exec = 0; } + if (evlist__open(evlist) == -ENOENT) { + err = TEST_SKIP; + goto out_err; + } CHECK__(evlist__open(evlist)); CHECK__(evlist__mmap(evlist, UINT_MAX)); @@ -124,8 +135,8 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su ret = perf_read_tsc_conversion(pc, &tc); if (ret) { if (ret == -EOPNOTSUPP) { - fprintf(stderr, " (not supported)"); - return 0; + pr_debug("perf_read_tsc_conversion is not supported in current kernel\n"); + err = TEST_SKIP; } goto out_err; } @@ -191,7 +202,7 @@ next_event: test_tsc >= comm2_tsc) goto out_err; - err = 0; + err = TEST_OK; out_err: evlist__delete(evlist); @@ -200,4 +211,15 @@ out_err: return err; } -DEFINE_SUITE("Convert perf time to TSC", perf_time_to_tsc); +static struct test_case time_to_tsc_tests[] = { + TEST_CASE_REASON("TSC support", tsc_is_supported, + "This architecture does not support"), + TEST_CASE_REASON("Perf time to TSC", perf_time_to_tsc, + "perf_read_tsc_conversion is not supported"), + { .name = NULL, } +}; + +struct test_suite suite__perf_time_to_tsc = { + .desc = "Convert perf time to TSC", + .test_cases = time_to_tsc_tests, +}; diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index e4c641b240df..82cc396ef516 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -2047,6 +2047,7 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) objdump_process.argv = objdump_argv; objdump_process.out = -1; objdump_process.err = -1; + objdump_process.no_stderr = 1; if (start_command(&objdump_process)) { pr_err("Failure starting to run %s\n", command); err = -1; diff --git a/tools/perf/util/arm64-frame-pointer-unwind-support.c b/tools/perf/util/arm64-frame-pointer-unwind-support.c index 2242a885fbd7..4940be4a0569 100644 --- a/tools/perf/util/arm64-frame-pointer-unwind-support.c +++ b/tools/perf/util/arm64-frame-pointer-unwind-support.c @@ -53,7 +53,7 @@ u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thr sample->user_regs.cache_regs[PERF_REG_ARM64_SP] = 0; } - ret = unwind__get_entries(add_entry, &entries, thread, sample, 2); + ret = unwind__get_entries(add_entry, &entries, thread, sample, 2, true); sample->user_regs = old_regs; if (ret || entries.length != 2) diff --git a/tools/perf/util/c++/clang.cpp b/tools/perf/util/c++/clang.cpp index df7b18fb6b6e..1aad7d6d34aa 100644 --- a/tools/perf/util/c++/clang.cpp +++ b/tools/perf/util/c++/clang.cpp @@ -20,7 +20,11 @@ #include "llvm/Option/Option.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/ManagedStatic.h" +#if CLANG_VERSION_MAJOR >= 14 +#include "llvm/MC/TargetRegistry.h" +#else #include "llvm/Support/TargetRegistry.h" +#endif #include "llvm/Support/TargetSelect.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index d546ff724dbe..a27132e5a5ef 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -983,6 +983,57 @@ static int write_dir_format(struct feat_fd *ff, return do_write(ff, &data->dir.version, sizeof(data->dir.version)); } +/* + * Check whether a CPU is online + * + * Returns: + * 1 -> if CPU is online + * 0 -> if CPU is offline + * -1 -> error case + */ +int is_cpu_online(unsigned int cpu) +{ + char *str; + size_t strlen; + char buf[256]; + int status = -1; + struct stat statbuf; + + snprintf(buf, sizeof(buf), + "/sys/devices/system/cpu/cpu%d", cpu); + if (stat(buf, &statbuf) != 0) + return 0; + + /* + * Check if /sys/devices/system/cpu/cpux/online file + * exists. Some cases cpu0 won't have online file since + * it is not expected to be turned off generally. + * In kernels without CONFIG_HOTPLUG_CPU, this + * file won't exist + */ + snprintf(buf, sizeof(buf), + "/sys/devices/system/cpu/cpu%d/online", cpu); + if (stat(buf, &statbuf) != 0) + return 1; + + /* + * Read online file using sysfs__read_str. + * If read or open fails, return -1. + * If read succeeds, return value from file + * which gets stored in "str" + */ + snprintf(buf, sizeof(buf), + "devices/system/cpu/cpu%d/online", cpu); + + if (sysfs__read_str(buf, &str, &strlen) < 0) + return status; + + status = atoi(str); + + free(str); + return status; +} + #ifdef HAVE_LIBBPF_SUPPORT static int write_bpf_prog_info(struct feat_fd *ff, struct evlist *evlist __maybe_unused) diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index c9e3265832d9..0eb4bc29a5a4 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -158,6 +158,7 @@ int do_write(struct feat_fd *fd, const void *buf, size_t size); int write_padded(struct feat_fd *fd, const void *bf, size_t count, size_t count_aligned); +int is_cpu_online(unsigned int cpu); /* * arch specific callback */ diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index b80048546451..95391236f5f6 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2987,7 +2987,7 @@ static int thread__resolve_callchain_unwind(struct thread *thread, return 0; return unwind__get_entries(unwind_entry, cursor, - thread, sample, max_stack); + thread, sample, max_stack, false); } int thread__resolve_callchain(struct thread *thread, diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 24997925ae00..dd84fed698a3 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1523,7 +1523,9 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, bool use_uncore_alias; LIST_HEAD(config_terms); - if (verbose > 1) { + pmu = parse_state->fake_pmu ?: perf_pmu__find(name); + + if (verbose > 1 && !(pmu && pmu->selectable)) { fprintf(stderr, "Attempting to add event pmu '%s' with '", name); if (head_config) { @@ -1536,7 +1538,6 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, fprintf(stderr, "' that may result in non-fatal errors\n"); } - pmu = parse_state->fake_pmu ?: perf_pmu__find(name); if (!pmu) { char *err_str; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 3b8dfe603e50..45a30040ec8d 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -2095,6 +2095,7 @@ prefetch_event(char *buf, u64 head, size_t mmap_size, bool needs_swap, union perf_event *error) { union perf_event *event; + u16 event_size; /* * Ensure we have enough space remaining to read @@ -2107,15 +2108,23 @@ prefetch_event(char *buf, u64 head, size_t mmap_size, if (needs_swap) perf_event_header__bswap(&event->header); - if (head + event->header.size <= mmap_size) + event_size = event->header.size; + if (head + event_size <= mmap_size) return event; /* We're not fetching the event so swap back again */ if (needs_swap) perf_event_header__bswap(&event->header); - pr_debug("%s: head=%#" PRIx64 " event->header_size=%#x, mmap_size=%#zx:" - " fuzzed or compressed perf.data?\n",__func__, head, event->header.size, mmap_size); + /* Check if the event fits into the next mmapped buf. */ + if (event_size <= mmap_size - head % page_size) { + /* Remap buf and fetch again. */ + return NULL; + } + + /* Invalid input. Event size should never exceed mmap_size. */ + pr_debug("%s: head=%#" PRIx64 " event->header.size=%#x, mmap_size=%#zx:" + " fuzzed or compressed perf.data?\n", __func__, head, event_size, mmap_size); return error; } diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index 483f05004e68..c255a2c90cd6 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -1,12 +1,14 @@ -from os import getenv +from os import getenv, path from subprocess import Popen, PIPE from re import sub cc = getenv("CC") cc_is_clang = b"clang version" in Popen([cc.split()[0], "-v"], stderr=PIPE).stderr.readline() +src_feature_tests = getenv('srctree') + '/tools/build/feature' def clang_has_option(option): - return [o for o in Popen([cc, option], stderr=PIPE).stderr.readlines() if b"unknown argument" in o] == [ ] + cc_output = Popen([cc, option, path.join(src_feature_tests, "test-hello.c") ], stderr=PIPE).stderr.readlines() + return [o for o in cc_output if ((b"unknown argument" in o) or (b"is not supported" in o))] == [ ] if cc_is_clang: from distutils.sysconfig import get_config_vars @@ -23,6 +25,8 @@ if cc_is_clang: vars[var] = sub("-fstack-protector-strong", "", vars[var]) if not clang_has_option("-fno-semantic-interposition"): vars[var] = sub("-fno-semantic-interposition", "", vars[var]) + if not clang_has_option("-ffat-lto-objects"): + vars[var] = sub("-ffat-lto-objects", "", vars[var]) from distutils.core import setup, Extension diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index ee6f03481215..817a2de264b4 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <errno.h> +#include <linux/err.h> #include <inttypes.h> #include <math.h> #include <string.h> @@ -311,7 +312,7 @@ static int check_per_pkg(struct evsel *counter, struct perf_counts_values *vals, if (!mask) { mask = hashmap__new(pkg_id_hash, pkg_id_equal, NULL); - if (!mask) + if (IS_ERR(mask)) return -ENOMEM; counter->per_pkg_mask = mask; diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index a74b517f7497..94aa40f6e348 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -200,7 +200,8 @@ frame_callback(Dwfl_Frame *state, void *arg) bool isactivation; if (!dwfl_frame_pc(state, &pc, NULL)) { - pr_err("%s", dwfl_errmsg(-1)); + if (!ui->best_effort) + pr_err("%s", dwfl_errmsg(-1)); return DWARF_CB_ABORT; } @@ -208,7 +209,8 @@ frame_callback(Dwfl_Frame *state, void *arg) report_module(pc, ui); if (!dwfl_frame_pc(state, &pc, &isactivation)) { - pr_err("%s", dwfl_errmsg(-1)); + if (!ui->best_effort) + pr_err("%s", dwfl_errmsg(-1)); return DWARF_CB_ABORT; } @@ -222,7 +224,8 @@ frame_callback(Dwfl_Frame *state, void *arg) int unwind__get_entries(unwind_entry_cb_t cb, void *arg, struct thread *thread, struct perf_sample *data, - int max_stack) + int max_stack, + bool best_effort) { struct unwind_info *ui, ui_buf = { .sample = data, @@ -231,6 +234,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg, .cb = cb, .arg = arg, .max_stack = max_stack, + .best_effort = best_effort }; Dwarf_Word ip; int err = -EINVAL, i; diff --git a/tools/perf/util/unwind-libdw.h b/tools/perf/util/unwind-libdw.h index 0cbd2650e280..8c88bc4f2304 100644 --- a/tools/perf/util/unwind-libdw.h +++ b/tools/perf/util/unwind-libdw.h @@ -20,6 +20,7 @@ struct unwind_info { void *arg; int max_stack; int idx; + bool best_effort; struct unwind_entry entries[]; }; diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index 71a353349181..41e29fc7648a 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -96,6 +96,7 @@ struct unwind_info { struct perf_sample *sample; struct machine *machine; struct thread *thread; + bool best_effort; }; #define dw_read(ptr, type, end) ({ \ @@ -553,7 +554,8 @@ static int access_reg(unw_addr_space_t __maybe_unused as, ret = perf_reg_value(&val, &ui->sample->user_regs, id); if (ret) { - pr_err("unwind: can't read reg %d\n", regnum); + if (!ui->best_effort) + pr_err("unwind: can't read reg %d\n", regnum); return ret; } @@ -666,7 +668,7 @@ static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb, return -1; ret = unw_init_remote(&c, addr_space, ui); - if (ret) + if (ret && !ui->best_effort) display_error(ret); while (!ret && (unw_step(&c) > 0) && i < max_stack) { @@ -704,12 +706,14 @@ static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb, static int _unwind__get_entries(unwind_entry_cb_t cb, void *arg, struct thread *thread, - struct perf_sample *data, int max_stack) + struct perf_sample *data, int max_stack, + bool best_effort) { struct unwind_info ui = { .sample = data, .thread = thread, .machine = thread->maps->machine, + .best_effort = best_effort }; if (!data->user_regs.regs) diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c index e89a5479b361..509c287ee762 100644 --- a/tools/perf/util/unwind-libunwind.c +++ b/tools/perf/util/unwind-libunwind.c @@ -80,9 +80,11 @@ void unwind__finish_access(struct maps *maps) int unwind__get_entries(unwind_entry_cb_t cb, void *arg, struct thread *thread, - struct perf_sample *data, int max_stack) + struct perf_sample *data, int max_stack, + bool best_effort) { if (thread->maps->unwind_libunwind_ops) - return thread->maps->unwind_libunwind_ops->get_entries(cb, arg, thread, data, max_stack); + return thread->maps->unwind_libunwind_ops->get_entries(cb, arg, thread, data, + max_stack, best_effort); return 0; } diff --git a/tools/perf/util/unwind.h b/tools/perf/util/unwind.h index ab8ad469c8de..b2a03fa5289b 100644 --- a/tools/perf/util/unwind.h +++ b/tools/perf/util/unwind.h @@ -23,13 +23,19 @@ struct unwind_libunwind_ops { void (*finish_access)(struct maps *maps); int (*get_entries)(unwind_entry_cb_t cb, void *arg, struct thread *thread, - struct perf_sample *data, int max_stack); + struct perf_sample *data, int max_stack, bool best_effort); }; #ifdef HAVE_DWARF_UNWIND_SUPPORT +/* + * When best_effort is set, don't report errors and fail silently. This could + * be expanded in the future to be more permissive about things other than + * error messages. + */ int unwind__get_entries(unwind_entry_cb_t cb, void *arg, struct thread *thread, - struct perf_sample *data, int max_stack); + struct perf_sample *data, int max_stack, + bool best_effort); /* libunwind specific */ #ifdef HAVE_LIBUNWIND_SUPPORT #ifndef LIBUNWIND__ARCH_REG_ID @@ -65,7 +71,8 @@ unwind__get_entries(unwind_entry_cb_t cb __maybe_unused, void *arg __maybe_unused, struct thread *thread __maybe_unused, struct perf_sample *data __maybe_unused, - int max_stack __maybe_unused) + int max_stack __maybe_unused, + bool best_effort __maybe_unused) { return 0; } diff --git a/tools/power/x86/intel-speed-select/Makefile b/tools/power/x86/intel-speed-select/Makefile index 846f785e278d..7221f2f55e8b 100644 --- a/tools/power/x86/intel-speed-select/Makefile +++ b/tools/power/x86/intel-speed-select/Makefile @@ -42,7 +42,7 @@ ISST_IN := $(OUTPUT)intel-speed-select-in.o $(ISST_IN): prepare FORCE $(Q)$(MAKE) $(build)=intel-speed-select $(OUTPUT)intel-speed-select: $(ISST_IN) - $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ + $(QUIET_LINK)$(CC) $(CFLAGS) $< $(LDFLAGS) -o $@ clean: rm -f $(ALL_PROGRAMS) diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 65dbdda3a054..1da76ccde448 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -1842,7 +1842,7 @@ static int nfit_test_dimm_init(struct nfit_test *t) return 0; } -static void security_init(struct nfit_test *t) +static void nfit_security_init(struct nfit_test *t) { int i; @@ -1938,7 +1938,7 @@ static int nfit_test0_alloc(struct nfit_test *t) if (nfit_test_dimm_init(t)) return -ENOMEM; smart_init(t); - security_init(t); + nfit_security_init(t); return ars_state_init(&t->pdev.dev, &t->ars_state); } diff --git a/tools/testing/radix-tree/linux.c b/tools/testing/radix-tree/linux.c index 81539f543954..d5c1bcba86fe 100644 --- a/tools/testing/radix-tree/linux.c +++ b/tools/testing/radix-tree/linux.c @@ -25,7 +25,8 @@ struct kmem_cache { void (*ctor)(void *); }; -void *kmem_cache_alloc(struct kmem_cache *cachep, int gfp) +void *kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru, + int gfp) { void *p; diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 3820608faf57..bafdc5373a13 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -168,9 +168,15 @@ $(OUTPUT)/%:%.c $(call msg,BINARY,,$@) $(Q)$(LINK.c) $^ $(LDLIBS) -o $@ -$(OUTPUT)/urandom_read: urandom_read.c +$(OUTPUT)/liburandom_read.so: urandom_read_lib1.c urandom_read_lib2.c + $(call msg,LIB,,$@) + $(Q)$(CC) $(CFLAGS) -fPIC $(LDFLAGS) $^ $(LDLIBS) --shared -o $@ + +$(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_read.so $(call msg,BINARY,,$@) - $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $< $(LDLIBS) -Wl,--build-id=sha1 -o $@ + $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.c,$^) \ + liburandom_read.so $(LDLIBS) \ + -Wl,-rpath=. -Wl,--build-id=sha1 -o $@ $(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch]) $(call msg,MOD,,$@) @@ -328,12 +334,8 @@ SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \ linked_vars.skel.h linked_maps.skel.h \ - test_subskeleton.skel.h test_subskeleton_lib.skel.h - -# In the subskeleton case, we want the test_subskeleton_lib.subskel.h file -# but that's created as a side-effect of the skel.h generation. -test_subskeleton.skel.h-deps := test_subskeleton_lib2.o test_subskeleton_lib.o test_subskeleton.o -test_subskeleton_lib.skel.h-deps := test_subskeleton_lib2.o test_subskeleton_lib.o + test_subskeleton.skel.h test_subskeleton_lib.skel.h \ + test_usdt.skel.h LSKELS := kfunc_call_test.c fentry_test.c fexit_test.c fexit_sleep.c \ test_ringbuf.c atomics.c trace_printk.c trace_vprintk.c \ @@ -346,6 +348,11 @@ test_static_linked.skel.h-deps := test_static_linked1.o test_static_linked2.o linked_funcs.skel.h-deps := linked_funcs1.o linked_funcs2.o linked_vars.skel.h-deps := linked_vars1.o linked_vars2.o linked_maps.skel.h-deps := linked_maps1.o linked_maps2.o +# In the subskeleton case, we want the test_subskeleton_lib.subskel.h file +# but that's created as a side-effect of the skel.h generation. +test_subskeleton.skel.h-deps := test_subskeleton_lib2.o test_subskeleton_lib.o test_subskeleton.o +test_subskeleton_lib.skel.h-deps := test_subskeleton_lib2.o test_subskeleton_lib.o +test_usdt.skel.h-deps := test_usdt.o test_usdt_multispec.o LINKED_BPF_SRCS := $(patsubst %.o,%.c,$(foreach skel,$(LINKED_SKELS),$($(skel)-deps))) @@ -400,6 +407,7 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o: \ $(TRUNNER_BPF_PROGS_DIR)/*.h \ $$(INCLUDE_DIR)/vmlinux.h \ $(wildcard $(BPFDIR)/bpf_*.h) \ + $(wildcard $(BPFDIR)/*.bpf.h) \ | $(TRUNNER_OUTPUT) $$(BPFOBJ) $$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \ $(TRUNNER_BPF_CFLAGS)) @@ -491,6 +499,7 @@ TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \ btf_helpers.c flow_dissector_load.h \ cap_helpers.c TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \ + $(OUTPUT)/liburandom_read.so \ ima_setup.sh \ $(wildcard progs/btf_dump_test_case_*.c) TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index f973320e6dbf..f061cc20e776 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -8,7 +8,6 @@ #include <fcntl.h> #include <pthread.h> #include <sys/sysinfo.h> -#include <sys/resource.h> #include <signal.h> #include "bench.h" #include "testing_helpers.h" diff --git a/tools/testing/selftests/bpf/bpf_rlimit.h b/tools/testing/selftests/bpf/bpf_rlimit.h deleted file mode 100644 index 9dac9b30f8ef..000000000000 --- a/tools/testing/selftests/bpf/bpf_rlimit.h +++ /dev/null @@ -1,28 +0,0 @@ -#include <sys/resource.h> -#include <stdio.h> - -static __attribute__((constructor)) void bpf_rlimit_ctor(void) -{ - struct rlimit rlim_old, rlim_new = { - .rlim_cur = RLIM_INFINITY, - .rlim_max = RLIM_INFINITY, - }; - - getrlimit(RLIMIT_MEMLOCK, &rlim_old); - /* For the sake of running the test cases, we temporarily - * set rlimit to infinity in order for kernel to focus on - * errors from actual test cases and not getting noise - * from hitting memlock limits. The limit is on per-process - * basis and not a global one, hence destructor not really - * needed here. - */ - if (setrlimit(RLIMIT_MEMLOCK, &rlim_new) < 0) { - perror("Unable to lift memlock rlimit"); - /* Trying out lower limit, but expect potential test - * case failures from this! - */ - rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20); - rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20); - setrlimit(RLIMIT_MEMLOCK, &rlim_new); - } -} diff --git a/tools/testing/selftests/bpf/flow_dissector_load.c b/tools/testing/selftests/bpf/flow_dissector_load.c index 87fd1aa323a9..c8be6406777f 100644 --- a/tools/testing/selftests/bpf/flow_dissector_load.c +++ b/tools/testing/selftests/bpf/flow_dissector_load.c @@ -11,7 +11,6 @@ #include <bpf/bpf.h> #include <bpf/libbpf.h> -#include "bpf_rlimit.h" #include "flow_dissector_load.h" const char *cfg_pin_path = "/sys/fs/bpf/flow_dissector"; @@ -25,9 +24,8 @@ static void load_and_attach_program(void) int prog_fd, ret; struct bpf_object *obj; - ret = libbpf_set_strict_mode(LIBBPF_STRICT_ALL); - if (ret) - error(1, 0, "failed to enable libbpf strict mode: %d", ret); + /* Use libbpf 1.0 API mode */ + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); ret = bpf_flow_load(&obj, cfg_path_name, cfg_prog_name, cfg_map_name, NULL, &prog_fd, NULL); diff --git a/tools/testing/selftests/bpf/get_cgroup_id_user.c b/tools/testing/selftests/bpf/get_cgroup_id_user.c index 3a7b82bd9e94..e021cc67dc02 100644 --- a/tools/testing/selftests/bpf/get_cgroup_id_user.c +++ b/tools/testing/selftests/bpf/get_cgroup_id_user.c @@ -20,7 +20,6 @@ #include "cgroup_helpers.h" #include "testing_helpers.h" -#include "bpf_rlimit.h" #define CHECK(condition, tag, format...) ({ \ int __ret = !!(condition); \ @@ -67,6 +66,9 @@ int main(int argc, char **argv) if (CHECK(cgroup_fd < 0, "cgroup_setup_and_join", "err %d errno %d\n", cgroup_fd, errno)) return 1; + /* Use libbpf 1.0 API mode */ + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); if (CHECK(err, "bpf_prog_test_load", "err %d errno %d\n", err, errno)) goto cleanup_cgroup_env; diff --git a/tools/testing/selftests/bpf/prog_tests/arg_parsing.c b/tools/testing/selftests/bpf/prog_tests/arg_parsing.c new file mode 100644 index 000000000000..b17bfa0e0aac --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/arg_parsing.c @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +#include "test_progs.h" +#include "testing_helpers.h" + +static void init_test_filter_set(struct test_filter_set *set) +{ + set->cnt = 0; + set->tests = NULL; +} + +static void free_test_filter_set(struct test_filter_set *set) +{ + int i, j; + + for (i = 0; i < set->cnt; i++) { + for (j = 0; j < set->tests[i].subtest_cnt; j++) + free((void *)set->tests[i].subtests[j]); + free(set->tests[i].subtests); + free(set->tests[i].name); + } + + free(set->tests); + init_test_filter_set(set); +} + +static void test_parse_test_list(void) +{ + struct test_filter_set set; + + init_test_filter_set(&set); + + ASSERT_OK(parse_test_list("arg_parsing", &set, true), "parsing"); + if (!ASSERT_EQ(set.cnt, 1, "test filters count")) + goto error; + if (!ASSERT_OK_PTR(set.tests, "test filters initialized")) + goto error; + ASSERT_EQ(set.tests[0].subtest_cnt, 0, "subtest filters count"); + ASSERT_OK(strcmp("arg_parsing", set.tests[0].name), "subtest name"); + free_test_filter_set(&set); + + ASSERT_OK(parse_test_list("arg_parsing,bpf_cookie", &set, true), + "parsing"); + if (!ASSERT_EQ(set.cnt, 2, "count of test filters")) + goto error; + if (!ASSERT_OK_PTR(set.tests, "test filters initialized")) + goto error; + ASSERT_EQ(set.tests[0].subtest_cnt, 0, "subtest filters count"); + ASSERT_EQ(set.tests[1].subtest_cnt, 0, "subtest filters count"); + ASSERT_OK(strcmp("arg_parsing", set.tests[0].name), "test name"); + ASSERT_OK(strcmp("bpf_cookie", set.tests[1].name), "test name"); + free_test_filter_set(&set); + + ASSERT_OK(parse_test_list("arg_parsing/arg_parsing,bpf_cookie", + &set, + true), + "parsing"); + if (!ASSERT_EQ(set.cnt, 2, "count of test filters")) + goto error; + if (!ASSERT_OK_PTR(set.tests, "test filters initialized")) + goto error; + if (!ASSERT_EQ(set.tests[0].subtest_cnt, 1, "subtest filters count")) + goto error; + ASSERT_EQ(set.tests[1].subtest_cnt, 0, "subtest filters count"); + ASSERT_OK(strcmp("arg_parsing", set.tests[0].name), "test name"); + ASSERT_OK(strcmp("arg_parsing", set.tests[0].subtests[0]), + "subtest name"); + ASSERT_OK(strcmp("bpf_cookie", set.tests[1].name), "test name"); + free_test_filter_set(&set); + + ASSERT_OK(parse_test_list("arg_parsing/arg_parsing", &set, true), + "parsing"); + ASSERT_OK(parse_test_list("bpf_cookie", &set, true), "parsing"); + ASSERT_OK(parse_test_list("send_signal", &set, true), "parsing"); + if (!ASSERT_EQ(set.cnt, 3, "count of test filters")) + goto error; + if (!ASSERT_OK_PTR(set.tests, "test filters initialized")) + goto error; + if (!ASSERT_EQ(set.tests[0].subtest_cnt, 1, "subtest filters count")) + goto error; + ASSERT_EQ(set.tests[1].subtest_cnt, 0, "subtest filters count"); + ASSERT_EQ(set.tests[2].subtest_cnt, 0, "subtest filters count"); + ASSERT_OK(strcmp("arg_parsing", set.tests[0].name), "test name"); + ASSERT_OK(strcmp("arg_parsing", set.tests[0].subtests[0]), + "subtest name"); + ASSERT_OK(strcmp("bpf_cookie", set.tests[1].name), "test name"); + ASSERT_OK(strcmp("send_signal", set.tests[2].name), "test name"); + free_test_filter_set(&set); + + ASSERT_OK(parse_test_list("bpf_cookie/trace", &set, false), "parsing"); + if (!ASSERT_EQ(set.cnt, 1, "count of test filters")) + goto error; + if (!ASSERT_OK_PTR(set.tests, "test filters initialized")) + goto error; + if (!ASSERT_EQ(set.tests[0].subtest_cnt, 1, "subtest filters count")) + goto error; + ASSERT_OK(strcmp("*bpf_cookie*", set.tests[0].name), "test name"); + ASSERT_OK(strcmp("*trace*", set.tests[0].subtests[0]), "subtest name"); +error: + free_test_filter_set(&set); +} + +void test_arg_parsing(void) +{ + if (test__start_subtest("test_parse_test_list")) + test_parse_test_list(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c index d48f6e533e1e..c0c6d410751d 100644 --- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c +++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c @@ -11,15 +11,22 @@ static void trigger_func(void) asm volatile (""); } +/* attach point for byname uprobe */ +static void trigger_func2(void) +{ + asm volatile (""); +} + void test_attach_probe(void) { DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts); - int duration = 0; struct bpf_link *kprobe_link, *kretprobe_link; struct bpf_link *uprobe_link, *uretprobe_link; struct test_attach_probe* skel; ssize_t uprobe_offset, ref_ctr_offset; + struct bpf_link *uprobe_err_link; bool legacy; + char *mem; /* Check if new-style kprobe/uprobe API is supported. * Kernels that support new FD-based kprobe and uprobe BPF attachment @@ -43,9 +50,9 @@ void test_attach_probe(void) return; skel = test_attach_probe__open_and_load(); - if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + if (!ASSERT_OK_PTR(skel, "skel_open")) return; - if (CHECK(!skel->bss, "check_bss", ".bss wasn't mmap()-ed\n")) + if (!ASSERT_OK_PTR(skel->bss, "check_bss")) goto cleanup; kprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kprobe, @@ -90,25 +97,73 @@ void test_attach_probe(void) goto cleanup; skel->links.handle_uretprobe = uretprobe_link; - /* trigger & validate kprobe && kretprobe */ - usleep(1); + /* verify auto-attach fails for old-style uprobe definition */ + uprobe_err_link = bpf_program__attach(skel->progs.handle_uprobe_byname); + if (!ASSERT_EQ(libbpf_get_error(uprobe_err_link), -EOPNOTSUPP, + "auto-attach should fail for old-style name")) + goto cleanup; + + uprobe_opts.func_name = "trigger_func2"; + uprobe_opts.retprobe = false; + uprobe_opts.ref_ctr_offset = 0; + skel->links.handle_uprobe_byname = + bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe_byname, + 0 /* this pid */, + "/proc/self/exe", + 0, &uprobe_opts); + if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname, "attach_uprobe_byname")) + goto cleanup; + + /* verify auto-attach works */ + skel->links.handle_uretprobe_byname = + bpf_program__attach(skel->progs.handle_uretprobe_byname); + if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname, "attach_uretprobe_byname")) + goto cleanup; - if (CHECK(skel->bss->kprobe_res != 1, "check_kprobe_res", - "wrong kprobe res: %d\n", skel->bss->kprobe_res)) + /* test attach by name for a library function, using the library + * as the binary argument. libc.so.6 will be resolved via dlopen()/dlinfo(). + */ + uprobe_opts.func_name = "malloc"; + uprobe_opts.retprobe = false; + skel->links.handle_uprobe_byname2 = + bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe_byname2, + 0 /* this pid */, + "libc.so.6", + 0, &uprobe_opts); + if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname2, "attach_uprobe_byname2")) goto cleanup; - if (CHECK(skel->bss->kretprobe_res != 2, "check_kretprobe_res", - "wrong kretprobe res: %d\n", skel->bss->kretprobe_res)) + + uprobe_opts.func_name = "free"; + uprobe_opts.retprobe = true; + skel->links.handle_uretprobe_byname2 = + bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe_byname2, + -1 /* any pid */, + "libc.so.6", + 0, &uprobe_opts); + if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname2, "attach_uretprobe_byname2")) goto cleanup; + /* trigger & validate kprobe && kretprobe */ + usleep(1); + + /* trigger & validate shared library u[ret]probes attached by name */ + mem = malloc(1); + free(mem); + /* trigger & validate uprobe & uretprobe */ trigger_func(); - if (CHECK(skel->bss->uprobe_res != 3, "check_uprobe_res", - "wrong uprobe res: %d\n", skel->bss->uprobe_res)) - goto cleanup; - if (CHECK(skel->bss->uretprobe_res != 4, "check_uretprobe_res", - "wrong uretprobe res: %d\n", skel->bss->uretprobe_res)) - goto cleanup; + /* trigger & validate uprobe attached by name */ + trigger_func2(); + + ASSERT_EQ(skel->bss->kprobe_res, 1, "check_kprobe_res"); + ASSERT_EQ(skel->bss->kretprobe_res, 2, "check_kretprobe_res"); + ASSERT_EQ(skel->bss->uprobe_res, 3, "check_uprobe_res"); + ASSERT_EQ(skel->bss->uretprobe_res, 4, "check_uretprobe_res"); + ASSERT_EQ(skel->bss->uprobe_byname_res, 5, "check_uprobe_byname_res"); + ASSERT_EQ(skel->bss->uretprobe_byname_res, 6, "check_uretprobe_byname_res"); + ASSERT_EQ(skel->bss->uprobe_byname2_res, 7, "check_uprobe_byname2_res"); + ASSERT_EQ(skel->bss->uretprobe_byname2_res, 8, "check_uretprobe_byname2_res"); cleanup: test_attach_probe__destroy(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 5142a7d130b2..2c403ddc8076 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -1192,8 +1192,6 @@ static void str_strip_first_line(char *str) *dst = '\0'; } -#define min(a, b) ((a) < (b) ? (a) : (b)) - static void test_task_vma(void) { int err, iter_fd = -1, proc_maps_fd = -1; @@ -1229,7 +1227,7 @@ static void test_task_vma(void) len = 0; while (len < CMP_BUFFER_SIZE) { err = read_fd_into_buffer(iter_fd, task_vma_output + len, - min(read_size, CMP_BUFFER_SIZE - len)); + MIN(read_size, CMP_BUFFER_SIZE - len)); if (!err) break; if (CHECK(err < 0, "read_iter_fd", "read_iter_fd failed\n")) diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c index d43f548c572c..a4d0cc9d3367 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c @@ -36,13 +36,13 @@ struct test_config { void (*bpf_destroy)(void *); }; -enum test_state { +enum bpf_test_state { _TS_INVALID, TS_MODULE_LOAD, TS_MODULE_LOAD_FAIL, }; -static _Atomic enum test_state state = _TS_INVALID; +static _Atomic enum bpf_test_state state = _TS_INVALID; static int sys_finit_module(int fd, const char *param_values, int flags) { diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index 8f7a1cef7d87..e9a9a31b2ffe 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -10,8 +10,6 @@ #include "bpf_tcp_nogpl.skel.h" #include "bpf_dctcp_release.skel.h" -#define min(a, b) ((a) < (b) ? (a) : (b)) - #ifndef ENOTSUPP #define ENOTSUPP 524 #endif @@ -53,7 +51,7 @@ static void *server(void *arg) while (bytes < total_bytes && !READ_ONCE(stop)) { nr_sent = send(fd, &batch, - min(total_bytes - bytes, sizeof(batch)), 0); + MIN(total_bytes - bytes, sizeof(batch)), 0); if (nr_sent == -1 && errno == EINTR) continue; if (nr_sent == -1) { @@ -146,7 +144,7 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map) /* recv total_bytes */ while (bytes < total_bytes && !READ_ONCE(stop)) { nr_recv = recv(fd, &batch, - min(total_bytes - bytes, sizeof(batch)), 0); + MIN(total_bytes - bytes, sizeof(batch)), 0); if (nr_recv == -1 && errno == EINTR) continue; if (nr_recv == -1) diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index ec823561b912..ba5bde53d418 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -8,7 +8,6 @@ #include <linux/filter.h> #include <linux/unistd.h> #include <bpf/bpf.h> -#include <sys/resource.h> #include <libelf.h> #include <gelf.h> #include <string.h> @@ -3974,6 +3973,105 @@ static struct btf_raw_test raw_tests[] = { .value_type_id = 1, .max_entries = 1, }, +{ + .descr = "type_tag test #2, type tag order", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_CONST_ENC(3), /* [2] */ + BTF_TYPE_TAG_ENC(NAME_TBD, 1), /* [3] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0tag"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "tag_type_check_btf", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Type tags don't precede modifiers", +}, +{ + .descr = "type_tag test #3, type tag order", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_TAG_ENC(NAME_TBD, 3), /* [2] */ + BTF_CONST_ENC(4), /* [3] */ + BTF_TYPE_TAG_ENC(NAME_TBD, 1), /* [4] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0tag\0tag"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "tag_type_check_btf", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Type tags don't precede modifiers", +}, +{ + .descr = "type_tag test #4, type tag order", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPEDEF_ENC(NAME_TBD, 3), /* [2] */ + BTF_CONST_ENC(4), /* [3] */ + BTF_TYPE_TAG_ENC(NAME_TBD, 1), /* [4] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0tag\0tag"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "tag_type_check_btf", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Type tags don't precede modifiers", +}, +{ + .descr = "type_tag test #5, type tag order", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_TAG_ENC(NAME_TBD, 3), /* [2] */ + BTF_CONST_ENC(1), /* [3] */ + BTF_TYPE_TAG_ENC(NAME_TBD, 2), /* [4] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0tag\0tag"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "tag_type_check_btf", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 1, +}, +{ + .descr = "type_tag test #6, type tag order", + .raw_types = { + BTF_PTR_ENC(2), /* [1] */ + BTF_TYPE_TAG_ENC(NAME_TBD, 3), /* [2] */ + BTF_CONST_ENC(4), /* [3] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [4] */ + BTF_PTR_ENC(6), /* [5] */ + BTF_CONST_ENC(2), /* [6] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0tag"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "tag_type_check_btf", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Type tags don't precede modifiers", +}, }; /* struct btf_raw_test raw_tests[] */ diff --git a/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c b/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c index 5aa52cc31dc2..c11832657d2b 100644 --- a/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c +++ b/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c @@ -2,6 +2,7 @@ /* Copyright (C) 2021. Huawei Technologies Co., Ltd */ #include <test_progs.h> #include "dummy_st_ops.skel.h" +#include "trace_dummy_st_ops.skel.h" /* Need to keep consistent with definition in include/linux/bpf.h */ struct bpf_dummy_ops_state { @@ -56,6 +57,7 @@ static void test_dummy_init_ptr_arg(void) .ctx_in = args, .ctx_size_in = sizeof(args), ); + struct trace_dummy_st_ops *trace_skel; struct dummy_st_ops *skel; int fd, err; @@ -64,12 +66,33 @@ static void test_dummy_init_ptr_arg(void) return; fd = bpf_program__fd(skel->progs.test_1); + + trace_skel = trace_dummy_st_ops__open(); + if (!ASSERT_OK_PTR(trace_skel, "trace_dummy_st_ops__open")) + goto done; + + err = bpf_program__set_attach_target(trace_skel->progs.fentry_test_1, + fd, "test_1"); + if (!ASSERT_OK(err, "set_attach_target(fentry_test_1)")) + goto done; + + err = trace_dummy_st_ops__load(trace_skel); + if (!ASSERT_OK(err, "load(trace_skel)")) + goto done; + + err = trace_dummy_st_ops__attach(trace_skel); + if (!ASSERT_OK(err, "attach(trace_skel)")) + goto done; + err = bpf_prog_test_run_opts(fd, &attr); ASSERT_OK(err, "test_run"); ASSERT_EQ(in_state.val, 0x5a, "test_ptr_ret"); ASSERT_EQ(attr.retval, exp_retval, "test_ret"); + ASSERT_EQ(trace_skel->bss->val, exp_retval, "fentry_val"); +done: dummy_st_ops__destroy(skel); + trace_dummy_st_ops__destroy(trace_skel); } static void test_dummy_multiple_args(void) diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c index 3ee2107bbf7a..fe1f0f26ea14 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c @@ -53,7 +53,7 @@ void test_fexit_stress(void) &trace_opts); if (!ASSERT_GE(fexit_fd[i], 0, "fexit load")) goto out; - link_fd[i] = bpf_raw_tracepoint_open(NULL, fexit_fd[i]); + link_fd[i] = bpf_link_create(fexit_fd[i], 0, BPF_TRACE_FEXIT, NULL); if (!ASSERT_GE(link_fd[i], 0, "fexit attach")) goto out; } diff --git a/tools/testing/selftests/bpf/prog_tests/for_each.c b/tools/testing/selftests/bpf/prog_tests/for_each.c index 044df13ee069..754e80937e5d 100644 --- a/tools/testing/selftests/bpf/prog_tests/for_each.c +++ b/tools/testing/selftests/bpf/prog_tests/for_each.c @@ -4,6 +4,7 @@ #include <network_helpers.h> #include "for_each_hash_map_elem.skel.h" #include "for_each_array_map_elem.skel.h" +#include "for_each_map_elem_write_key.skel.h" static unsigned int duration; @@ -129,10 +130,21 @@ out: for_each_array_map_elem__destroy(skel); } +static void test_write_map_key(void) +{ + struct for_each_map_elem_write_key *skel; + + skel = for_each_map_elem_write_key__open_and_load(); + if (!ASSERT_ERR_PTR(skel, "for_each_map_elem_write_key__open_and_load")) + for_each_map_elem_write_key__destroy(skel); +} + void test_for_each(void) { if (test__start_subtest("hash_map")) test_hash_map(); if (test__start_subtest("array_map")) test_array_map(); + if (test__start_subtest("write_map_key")) + test_write_map_key(); } diff --git a/tools/testing/selftests/bpf/prog_tests/helper_restricted.c b/tools/testing/selftests/bpf/prog_tests/helper_restricted.c index e1de5f80c3b2..0354f9b82c65 100644 --- a/tools/testing/selftests/bpf/prog_tests/helper_restricted.c +++ b/tools/testing/selftests/bpf/prog_tests/helper_restricted.c @@ -6,11 +6,10 @@ void test_helper_restricted(void) { int prog_i = 0, prog_cnt; - int duration = 0; do { struct test_helper_restricted *test; - int maybeOK; + int err; test = test_helper_restricted__open(); if (!ASSERT_OK_PTR(test, "open")) @@ -21,12 +20,11 @@ void test_helper_restricted(void) for (int j = 0; j < prog_cnt; ++j) { struct bpf_program *prog = *test->skeleton->progs[j].prog; - maybeOK = bpf_program__set_autoload(prog, prog_i == j); - ASSERT_OK(maybeOK, "set autoload"); + bpf_program__set_autoload(prog, true); } - maybeOK = test_helper_restricted__load(test); - CHECK(!maybeOK, test->skeleton->progs[prog_i].name, "helper isn't restricted"); + err = test_helper_restricted__load(test); + ASSERT_ERR(err, "load_should_fail"); test_helper_restricted__destroy(test); } while (++prog_i < prog_cnt); diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c index f6933b06daf8..1d7a2f1e0731 100644 --- a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c +++ b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c @@ -138,12 +138,16 @@ cleanup: test_ksyms_weak_lskel__destroy(skel); } -static void test_write_check(void) +static void test_write_check(bool test_handler1) { struct test_ksyms_btf_write_check *skel; - skel = test_ksyms_btf_write_check__open_and_load(); - ASSERT_ERR_PTR(skel, "unexpected load of a prog writing to ksym memory\n"); + skel = test_ksyms_btf_write_check__open(); + if (!ASSERT_OK_PTR(skel, "test_ksyms_btf_write_check__open")) + return; + bpf_program__set_autoload(test_handler1 ? skel->progs.handler2 : skel->progs.handler1, false); + ASSERT_ERR(test_ksyms_btf_write_check__load(skel), + "unexpected load of a prog writing to ksym memory\n"); test_ksyms_btf_write_check__destroy(skel); } @@ -179,6 +183,9 @@ void test_ksyms_btf(void) if (test__start_subtest("weak_ksyms_lskel")) test_weak_syms_lskel(); - if (test__start_subtest("write_check")) - test_write_check(); + if (test__start_subtest("write_check1")) + test_write_check(true); + + if (test__start_subtest("write_check2")) + test_write_check(false); } diff --git a/tools/testing/selftests/bpf/prog_tests/linked_funcs.c b/tools/testing/selftests/bpf/prog_tests/linked_funcs.c index e9916f2817ec..cad664546912 100644 --- a/tools/testing/selftests/bpf/prog_tests/linked_funcs.c +++ b/tools/testing/selftests/bpf/prog_tests/linked_funcs.c @@ -14,6 +14,12 @@ void test_linked_funcs(void) if (!ASSERT_OK_PTR(skel, "skel_open")) return; + /* handler1 and handler2 are marked as SEC("?raw_tp/sys_enter") and + * are set to not autoload by default + */ + bpf_program__set_autoload(skel->progs.handler1, true); + bpf_program__set_autoload(skel->progs.handler2, true); + skel->rodata->my_tid = syscall(SYS_gettid); skel->bss->syscall_id = SYS_getpgid; diff --git a/tools/testing/selftests/bpf/prog_tests/log_fixup.c b/tools/testing/selftests/bpf/prog_tests/log_fixup.c new file mode 100644 index 000000000000..be3a956cb3a5 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/log_fixup.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ +#include <test_progs.h> +#include <bpf/btf.h> + +#include "test_log_fixup.skel.h" + +enum trunc_type { + TRUNC_NONE, + TRUNC_PARTIAL, + TRUNC_FULL, +}; + +static void bad_core_relo(size_t log_buf_size, enum trunc_type trunc_type) +{ + char log_buf[8 * 1024]; + struct test_log_fixup* skel; + int err; + + skel = test_log_fixup__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + bpf_program__set_autoload(skel->progs.bad_relo, true); + memset(log_buf, 0, sizeof(log_buf)); + bpf_program__set_log_buf(skel->progs.bad_relo, log_buf, log_buf_size ?: sizeof(log_buf)); + + err = test_log_fixup__load(skel); + if (!ASSERT_ERR(err, "load_fail")) + goto cleanup; + + ASSERT_HAS_SUBSTR(log_buf, + "0: <invalid CO-RE relocation>\n" + "failed to resolve CO-RE relocation <byte_sz> ", + "log_buf_part1"); + + switch (trunc_type) { + case TRUNC_NONE: + ASSERT_HAS_SUBSTR(log_buf, + "struct task_struct___bad.fake_field (0:1 @ offset 4)\n", + "log_buf_part2"); + ASSERT_HAS_SUBSTR(log_buf, + "max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0\n", + "log_buf_end"); + break; + case TRUNC_PARTIAL: + /* we should get full libbpf message patch */ + ASSERT_HAS_SUBSTR(log_buf, + "struct task_struct___bad.fake_field (0:1 @ offset 4)\n", + "log_buf_part2"); + /* we shouldn't get full end of BPF verifier log */ + ASSERT_NULL(strstr(log_buf, "max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0\n"), + "log_buf_end"); + break; + case TRUNC_FULL: + /* we shouldn't get second part of libbpf message patch */ + ASSERT_NULL(strstr(log_buf, "struct task_struct___bad.fake_field (0:1 @ offset 4)\n"), + "log_buf_part2"); + /* we shouldn't get full end of BPF verifier log */ + ASSERT_NULL(strstr(log_buf, "max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0\n"), + "log_buf_end"); + break; + } + + if (env.verbosity > VERBOSE_NONE) + printf("LOG: \n=================\n%s=================\n", log_buf); +cleanup: + test_log_fixup__destroy(skel); +} + +static void bad_core_relo_subprog(void) +{ + char log_buf[8 * 1024]; + struct test_log_fixup* skel; + int err; + + skel = test_log_fixup__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + bpf_program__set_autoload(skel->progs.bad_relo_subprog, true); + bpf_program__set_log_buf(skel->progs.bad_relo_subprog, log_buf, sizeof(log_buf)); + + err = test_log_fixup__load(skel); + if (!ASSERT_ERR(err, "load_fail")) + goto cleanup; + + /* there should be no prog loading log because we specified per-prog log buf */ + ASSERT_HAS_SUBSTR(log_buf, + ": <invalid CO-RE relocation>\n" + "failed to resolve CO-RE relocation <byte_off> ", + "log_buf"); + ASSERT_HAS_SUBSTR(log_buf, + "struct task_struct___bad.fake_field_subprog (0:2 @ offset 8)\n", + "log_buf"); + + if (env.verbosity > VERBOSE_NONE) + printf("LOG: \n=================\n%s=================\n", log_buf); + +cleanup: + test_log_fixup__destroy(skel); +} + +void test_log_fixup(void) +{ + if (test__start_subtest("bad_core_relo_trunc_none")) + bad_core_relo(0, TRUNC_NONE /* full buf */); + if (test__start_subtest("bad_core_relo_trunc_partial")) + bad_core_relo(300, TRUNC_PARTIAL /* truncate original log a bit */); + if (test__start_subtest("bad_core_relo_trunc_full")) + bad_core_relo(250, TRUNC_FULL /* truncate also libbpf's message patch */); + if (test__start_subtest("bad_core_relo_subprog")) + bad_core_relo_subprog(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/map_kptr.c b/tools/testing/selftests/bpf/prog_tests/map_kptr.c new file mode 100644 index 000000000000..9e2fbda64a65 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/map_kptr.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> + +#include "map_kptr.skel.h" + +void test_map_kptr(void) +{ + struct map_kptr *skel; + int key = 0, ret; + char buf[24]; + + skel = map_kptr__open_and_load(); + if (!ASSERT_OK_PTR(skel, "map_kptr__open_and_load")) + return; + + ret = bpf_map_update_elem(bpf_map__fd(skel->maps.array_map), &key, buf, 0); + ASSERT_OK(ret, "array_map update"); + ret = bpf_map_update_elem(bpf_map__fd(skel->maps.array_map), &key, buf, 0); + ASSERT_OK(ret, "array_map update2"); + + ret = bpf_map_update_elem(bpf_map__fd(skel->maps.hash_map), &key, buf, 0); + ASSERT_OK(ret, "hash_map update"); + ret = bpf_map_delete_elem(bpf_map__fd(skel->maps.hash_map), &key); + ASSERT_OK(ret, "hash_map delete"); + + ret = bpf_map_update_elem(bpf_map__fd(skel->maps.hash_malloc_map), &key, buf, 0); + ASSERT_OK(ret, "hash_malloc_map update"); + ret = bpf_map_delete_elem(bpf_map__fd(skel->maps.hash_malloc_map), &key); + ASSERT_OK(ret, "hash_malloc_map delete"); + + ret = bpf_map_update_elem(bpf_map__fd(skel->maps.lru_hash_map), &key, buf, 0); + ASSERT_OK(ret, "lru_hash_map update"); + ret = bpf_map_delete_elem(bpf_map__fd(skel->maps.lru_hash_map), &key); + ASSERT_OK(ret, "lru_hash_map delete"); + + map_kptr__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/netcnt.c b/tools/testing/selftests/bpf/prog_tests/netcnt.c index 954964f0ac3d..d3915c58d0e1 100644 --- a/tools/testing/selftests/bpf/prog_tests/netcnt.c +++ b/tools/testing/selftests/bpf/prog_tests/netcnt.c @@ -25,7 +25,7 @@ void serial_test_netcnt(void) if (!ASSERT_OK_PTR(skel, "netcnt_prog__open_and_load")) return; - nproc = get_nprocs_conf(); + nproc = bpf_num_possible_cpus(); percpu_netcnt = malloc(sizeof(*percpu_netcnt) * nproc); if (!ASSERT_OK_PTR(percpu_netcnt, "malloc(percpu_netcnt)")) goto err; diff --git a/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c b/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c new file mode 100644 index 000000000000..14f2796076e0 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +#include "test_progs.h" +#include "testing_helpers.h" + +static void clear_test_state(struct test_state *state) +{ + state->error_cnt = 0; + state->sub_succ_cnt = 0; + state->skip_cnt = 0; +} + +void test_prog_tests_framework(void) +{ + struct test_state *state = env.test_state; + + /* in all the ASSERT calls below we need to return on the first + * error due to the fact that we are cleaning the test state after + * each dummy subtest + */ + + /* test we properly count skipped tests with subtests */ + if (test__start_subtest("test_good_subtest")) + test__end_subtest(); + if (!ASSERT_EQ(state->skip_cnt, 0, "skip_cnt_check")) + return; + if (!ASSERT_EQ(state->error_cnt, 0, "error_cnt_check")) + return; + if (!ASSERT_EQ(state->subtest_num, 1, "subtest_num_check")) + return; + clear_test_state(state); + + if (test__start_subtest("test_skip_subtest")) { + test__skip(); + test__end_subtest(); + } + if (test__start_subtest("test_skip_subtest")) { + test__skip(); + test__end_subtest(); + } + if (!ASSERT_EQ(state->skip_cnt, 2, "skip_cnt_check")) + return; + if (!ASSERT_EQ(state->subtest_num, 3, "subtest_num_check")) + return; + clear_test_state(state); + + if (test__start_subtest("test_fail_subtest")) { + test__fail(); + test__end_subtest(); + } + if (!ASSERT_EQ(state->error_cnt, 1, "error_cnt_check")) + return; + if (!ASSERT_EQ(state->subtest_num, 4, "subtest_num_check")) + return; + clear_test_state(state); +} diff --git a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c index 873323fb18ba..739d2ea6ca55 100644 --- a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c +++ b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c @@ -1,21 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> -static void toggle_object_autoload_progs(const struct bpf_object *obj, - const char *name_load) -{ - struct bpf_program *prog; - - bpf_object__for_each_program(prog, obj) { - const char *name = bpf_program__name(prog); - - if (!strcmp(name_load, name)) - bpf_program__set_autoload(prog, true); - else - bpf_program__set_autoload(prog, false); - } -} - void test_reference_tracking(void) { const char *file = "test_sk_lookup_kern.o"; @@ -39,6 +24,7 @@ void test_reference_tracking(void) goto cleanup; bpf_object__for_each_program(prog, obj_iter) { + struct bpf_program *p; const char *name; name = bpf_program__name(prog); @@ -49,7 +35,12 @@ void test_reference_tracking(void) if (!ASSERT_OK_PTR(obj, "obj_open_file")) goto cleanup; - toggle_object_autoload_progs(obj, name); + /* all programs are not loaded by default, so just set + * autoload to true for the single prog under test + */ + p = bpf_object__find_program_by_name(obj, name); + bpf_program__set_autoload(p, true); + /* Expect verifier failure if test name has 'err' */ if (strncmp(name, "err_", sizeof("err_") - 1) == 0) { libbpf_print_fn_t old_print_fn; diff --git a/tools/testing/selftests/bpf/prog_tests/skb_load_bytes.c b/tools/testing/selftests/bpf/prog_tests/skb_load_bytes.c new file mode 100644 index 000000000000..d7f83c0a40a5 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/skb_load_bytes.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include <network_helpers.h> +#include "skb_load_bytes.skel.h" + +void test_skb_load_bytes(void) +{ + struct skb_load_bytes *skel; + int err, prog_fd, test_result; + struct __sk_buff skb = { 0 }; + + LIBBPF_OPTS(bpf_test_run_opts, tattr, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .ctx_in = &skb, + .ctx_size_in = sizeof(skb), + ); + + skel = skb_load_bytes__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + return; + + prog_fd = bpf_program__fd(skel->progs.skb_process); + if (!ASSERT_GE(prog_fd, 0, "prog_fd")) + goto out; + + skel->bss->load_offset = (uint32_t)(-1); + err = bpf_prog_test_run_opts(prog_fd, &tattr); + if (!ASSERT_OK(err, "bpf_prog_test_run_opts")) + goto out; + test_result = skel->bss->test_result; + if (!ASSERT_EQ(test_result, -EFAULT, "offset -1")) + goto out; + + skel->bss->load_offset = (uint32_t)10; + err = bpf_prog_test_run_opts(prog_fd, &tattr); + if (!ASSERT_OK(err, "bpf_prog_test_run_opts")) + goto out; + test_result = skel->bss->test_result; + if (!ASSERT_EQ(test_result, 0, "offset 10")) + goto out; + +out: + skb_load_bytes__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf.c b/tools/testing/selftests/bpf/prog_tests/snprintf.c index 394ebfc3bbf3..4be6fdb78c6a 100644 --- a/tools/testing/selftests/bpf/prog_tests/snprintf.c +++ b/tools/testing/selftests/bpf/prog_tests/snprintf.c @@ -83,8 +83,6 @@ cleanup: test_snprintf__destroy(skel); } -#define min(a, b) ((a) < (b) ? (a) : (b)) - /* Loads an eBPF object calling bpf_snprintf with up to 10 characters of fmt */ static int load_single_snprintf(char *fmt) { @@ -95,7 +93,7 @@ static int load_single_snprintf(char *fmt) if (!skel) return -EINVAL; - memcpy(skel->rodata->fmt, fmt, min(strlen(fmt) + 1, 10)); + memcpy(skel->rodata->fmt, fmt, MIN(strlen(fmt) + 1, 10)); ret = test_snprintf_single__load(skel); test_snprintf_single__destroy(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c index 7ad66a247c02..958dae769c52 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -949,7 +949,6 @@ fail: return -1; } -#define MAX(a, b) ((a) > (b) ? (a) : (b)) enum { SRC_TO_TARGET = 0, TARGET_TO_SRC = 1, diff --git a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c index 509e21d5cb9d..b90ee47d3111 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c +++ b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c @@ -81,6 +81,7 @@ void test_test_global_funcs(void) { "test_global_func14.o", "reference type('FWD S') size cannot be determined" }, { "test_global_func15.o", "At program exit the register R0 has value" }, { "test_global_func16.o", "invalid indirect read from stack" }, + { "test_global_func17.o", "Caller passes invalid args into func#1" }, }; libbpf_print_fn_t old_print_fn = NULL; int err, i, duration = 0; diff --git a/tools/testing/selftests/bpf/prog_tests/test_strncmp.c b/tools/testing/selftests/bpf/prog_tests/test_strncmp.c index b57a3009465f..7ddd6615b7e7 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_strncmp.c +++ b/tools/testing/selftests/bpf/prog_tests/test_strncmp.c @@ -44,16 +44,12 @@ static void strncmp_full_str_cmp(struct strncmp_test *skel, const char *name, static void test_strncmp_ret(void) { struct strncmp_test *skel; - struct bpf_program *prog; int err, got; skel = strncmp_test__open(); if (!ASSERT_OK_PTR(skel, "strncmp_test open")) return; - bpf_object__for_each_program(prog, skel->obj) - bpf_program__set_autoload(prog, false); - bpf_program__set_autoload(skel->progs.do_strncmp, true); err = strncmp_test__load(skel); @@ -91,18 +87,13 @@ out: static void test_strncmp_bad_not_const_str_size(void) { struct strncmp_test *skel; - struct bpf_program *prog; int err; skel = strncmp_test__open(); if (!ASSERT_OK_PTR(skel, "strncmp_test open")) return; - bpf_object__for_each_program(prog, skel->obj) - bpf_program__set_autoload(prog, false); - - bpf_program__set_autoload(skel->progs.strncmp_bad_not_const_str_size, - true); + bpf_program__set_autoload(skel->progs.strncmp_bad_not_const_str_size, true); err = strncmp_test__load(skel); ASSERT_ERR(err, "strncmp_test load bad_not_const_str_size"); @@ -113,18 +104,13 @@ static void test_strncmp_bad_not_const_str_size(void) static void test_strncmp_bad_writable_target(void) { struct strncmp_test *skel; - struct bpf_program *prog; int err; skel = strncmp_test__open(); if (!ASSERT_OK_PTR(skel, "strncmp_test open")) return; - bpf_object__for_each_program(prog, skel->obj) - bpf_program__set_autoload(prog, false); - - bpf_program__set_autoload(skel->progs.strncmp_bad_writable_target, - true); + bpf_program__set_autoload(skel->progs.strncmp_bad_writable_target, true); err = strncmp_test__load(skel); ASSERT_ERR(err, "strncmp_test load bad_writable_target"); @@ -135,18 +121,13 @@ static void test_strncmp_bad_writable_target(void) static void test_strncmp_bad_not_null_term_target(void) { struct strncmp_test *skel; - struct bpf_program *prog; int err; skel = strncmp_test__open(); if (!ASSERT_OK_PTR(skel, "strncmp_test open")) return; - bpf_object__for_each_program(prog, skel->obj) - bpf_program__set_autoload(prog, false); - - bpf_program__set_autoload(skel->progs.strncmp_bad_not_null_term_target, - true); + bpf_program__set_autoload(skel->progs.strncmp_bad_not_null_term_target, true); err = strncmp_test__load(skel); ASSERT_ERR(err, "strncmp_test load bad_not_null_term_target"); diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c b/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c new file mode 100644 index 000000000000..35b87c7ba5be --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022, Oracle and/or its affiliates. */ + +#include <test_progs.h> +#include "test_uprobe_autoattach.skel.h" + +/* uprobe attach point */ +static noinline int autoattach_trigger_func(int arg) +{ + asm volatile (""); + return arg + 1; +} + +void test_uprobe_autoattach(void) +{ + struct test_uprobe_autoattach *skel; + int trigger_val = 100, trigger_ret; + size_t malloc_sz = 1; + char *mem; + + skel = test_uprobe_autoattach__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + if (!ASSERT_OK(test_uprobe_autoattach__attach(skel), "skel_attach")) + goto cleanup; + + skel->bss->test_pid = getpid(); + + /* trigger & validate uprobe & uretprobe */ + trigger_ret = autoattach_trigger_func(trigger_val); + + skel->bss->test_pid = getpid(); + + /* trigger & validate shared library u[ret]probes attached by name */ + mem = malloc(malloc_sz); + + ASSERT_EQ(skel->bss->uprobe_byname_parm1, trigger_val, "check_uprobe_byname_parm1"); + ASSERT_EQ(skel->bss->uprobe_byname_ran, 1, "check_uprobe_byname_ran"); + ASSERT_EQ(skel->bss->uretprobe_byname_rc, trigger_ret, "check_uretprobe_byname_rc"); + ASSERT_EQ(skel->bss->uretprobe_byname_ran, 2, "check_uretprobe_byname_ran"); + ASSERT_EQ(skel->bss->uprobe_byname2_parm1, malloc_sz, "check_uprobe_byname2_parm1"); + ASSERT_EQ(skel->bss->uprobe_byname2_ran, 3, "check_uprobe_byname2_ran"); + ASSERT_EQ(skel->bss->uretprobe_byname2_rc, mem, "check_uretprobe_byname2_rc"); + ASSERT_EQ(skel->bss->uretprobe_byname2_ran, 4, "check_uretprobe_byname2_ran"); + + free(mem); +cleanup: + test_uprobe_autoattach__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c new file mode 100644 index 000000000000..a71f51bdc08d --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/usdt.c @@ -0,0 +1,421 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ +#include <test_progs.h> + +#define _SDT_HAS_SEMAPHORES 1 +#include "../sdt.h" + +#include "test_usdt.skel.h" +#include "test_urandom_usdt.skel.h" + +int lets_test_this(int); + +static volatile int idx = 2; +static volatile __u64 bla = 0xFEDCBA9876543210ULL; +static volatile short nums[] = {-1, -2, -3, }; + +static volatile struct { + int x; + signed char y; +} t1 = { 1, -127 }; + +#define SEC(name) __attribute__((section(name), used)) + +unsigned short test_usdt0_semaphore SEC(".probes"); +unsigned short test_usdt3_semaphore SEC(".probes"); +unsigned short test_usdt12_semaphore SEC(".probes"); + +static void __always_inline trigger_func(int x) { + long y = 42; + + if (test_usdt0_semaphore) + STAP_PROBE(test, usdt0); + if (test_usdt3_semaphore) + STAP_PROBE3(test, usdt3, x, y, &bla); + if (test_usdt12_semaphore) { + STAP_PROBE12(test, usdt12, + x, x + 1, y, x + y, 5, + y / 7, bla, &bla, -9, nums[x], + nums[idx], t1.y); + } +} + +static void subtest_basic_usdt(void) +{ + LIBBPF_OPTS(bpf_usdt_opts, opts); + struct test_usdt *skel; + struct test_usdt__bss *bss; + int err; + + skel = test_usdt__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + bss = skel->bss; + bss->my_pid = getpid(); + + err = test_usdt__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto cleanup; + + /* usdt0 won't be auto-attached */ + opts.usdt_cookie = 0xcafedeadbeeffeed; + skel->links.usdt0 = bpf_program__attach_usdt(skel->progs.usdt0, + 0 /*self*/, "/proc/self/exe", + "test", "usdt0", &opts); + if (!ASSERT_OK_PTR(skel->links.usdt0, "usdt0_link")) + goto cleanup; + + trigger_func(1); + + ASSERT_EQ(bss->usdt0_called, 1, "usdt0_called"); + ASSERT_EQ(bss->usdt3_called, 1, "usdt3_called"); + ASSERT_EQ(bss->usdt12_called, 1, "usdt12_called"); + + ASSERT_EQ(bss->usdt0_cookie, 0xcafedeadbeeffeed, "usdt0_cookie"); + ASSERT_EQ(bss->usdt0_arg_cnt, 0, "usdt0_arg_cnt"); + ASSERT_EQ(bss->usdt0_arg_ret, -ENOENT, "usdt0_arg_ret"); + + /* auto-attached usdt3 gets default zero cookie value */ + ASSERT_EQ(bss->usdt3_cookie, 0, "usdt3_cookie"); + ASSERT_EQ(bss->usdt3_arg_cnt, 3, "usdt3_arg_cnt"); + + ASSERT_EQ(bss->usdt3_arg_rets[0], 0, "usdt3_arg1_ret"); + ASSERT_EQ(bss->usdt3_arg_rets[1], 0, "usdt3_arg2_ret"); + ASSERT_EQ(bss->usdt3_arg_rets[2], 0, "usdt3_arg3_ret"); + ASSERT_EQ(bss->usdt3_args[0], 1, "usdt3_arg1"); + ASSERT_EQ(bss->usdt3_args[1], 42, "usdt3_arg2"); + ASSERT_EQ(bss->usdt3_args[2], (uintptr_t)&bla, "usdt3_arg3"); + + /* auto-attached usdt12 gets default zero cookie value */ + ASSERT_EQ(bss->usdt12_cookie, 0, "usdt12_cookie"); + ASSERT_EQ(bss->usdt12_arg_cnt, 12, "usdt12_arg_cnt"); + + ASSERT_EQ(bss->usdt12_args[0], 1, "usdt12_arg1"); + ASSERT_EQ(bss->usdt12_args[1], 1 + 1, "usdt12_arg2"); + ASSERT_EQ(bss->usdt12_args[2], 42, "usdt12_arg3"); + ASSERT_EQ(bss->usdt12_args[3], 42 + 1, "usdt12_arg4"); + ASSERT_EQ(bss->usdt12_args[4], 5, "usdt12_arg5"); + ASSERT_EQ(bss->usdt12_args[5], 42 / 7, "usdt12_arg6"); + ASSERT_EQ(bss->usdt12_args[6], bla, "usdt12_arg7"); + ASSERT_EQ(bss->usdt12_args[7], (uintptr_t)&bla, "usdt12_arg8"); + ASSERT_EQ(bss->usdt12_args[8], -9, "usdt12_arg9"); + ASSERT_EQ(bss->usdt12_args[9], nums[1], "usdt12_arg10"); + ASSERT_EQ(bss->usdt12_args[10], nums[idx], "usdt12_arg11"); + ASSERT_EQ(bss->usdt12_args[11], t1.y, "usdt12_arg12"); + + /* trigger_func() is marked __always_inline, so USDT invocations will be + * inlined in two different places, meaning that each USDT will have + * at least 2 different places to be attached to. This verifies that + * bpf_program__attach_usdt() handles this properly and attaches to + * all possible places of USDT invocation. + */ + trigger_func(2); + + ASSERT_EQ(bss->usdt0_called, 2, "usdt0_called"); + ASSERT_EQ(bss->usdt3_called, 2, "usdt3_called"); + ASSERT_EQ(bss->usdt12_called, 2, "usdt12_called"); + + /* only check values that depend on trigger_func()'s input value */ + ASSERT_EQ(bss->usdt3_args[0], 2, "usdt3_arg1"); + + ASSERT_EQ(bss->usdt12_args[0], 2, "usdt12_arg1"); + ASSERT_EQ(bss->usdt12_args[1], 2 + 1, "usdt12_arg2"); + ASSERT_EQ(bss->usdt12_args[3], 42 + 2, "usdt12_arg4"); + ASSERT_EQ(bss->usdt12_args[9], nums[2], "usdt12_arg10"); + + /* detach and re-attach usdt3 */ + bpf_link__destroy(skel->links.usdt3); + + opts.usdt_cookie = 0xBADC00C51E; + skel->links.usdt3 = bpf_program__attach_usdt(skel->progs.usdt3, -1 /* any pid */, + "/proc/self/exe", "test", "usdt3", &opts); + if (!ASSERT_OK_PTR(skel->links.usdt3, "usdt3_reattach")) + goto cleanup; + + trigger_func(3); + + ASSERT_EQ(bss->usdt3_called, 3, "usdt3_called"); + /* this time usdt3 has custom cookie */ + ASSERT_EQ(bss->usdt3_cookie, 0xBADC00C51E, "usdt3_cookie"); + ASSERT_EQ(bss->usdt3_arg_cnt, 3, "usdt3_arg_cnt"); + + ASSERT_EQ(bss->usdt3_arg_rets[0], 0, "usdt3_arg1_ret"); + ASSERT_EQ(bss->usdt3_arg_rets[1], 0, "usdt3_arg2_ret"); + ASSERT_EQ(bss->usdt3_arg_rets[2], 0, "usdt3_arg3_ret"); + ASSERT_EQ(bss->usdt3_args[0], 3, "usdt3_arg1"); + ASSERT_EQ(bss->usdt3_args[1], 42, "usdt3_arg2"); + ASSERT_EQ(bss->usdt3_args[2], (uintptr_t)&bla, "usdt3_arg3"); + +cleanup: + test_usdt__destroy(skel); +} + +unsigned short test_usdt_100_semaphore SEC(".probes"); +unsigned short test_usdt_300_semaphore SEC(".probes"); +unsigned short test_usdt_400_semaphore SEC(".probes"); + +#define R10(F, X) F(X+0); F(X+1);F(X+2); F(X+3); F(X+4); \ + F(X+5); F(X+6); F(X+7); F(X+8); F(X+9); +#define R100(F, X) R10(F,X+ 0);R10(F,X+10);R10(F,X+20);R10(F,X+30);R10(F,X+40); \ + R10(F,X+50);R10(F,X+60);R10(F,X+70);R10(F,X+80);R10(F,X+90); + +/* carefully control that we get exactly 100 inlines by preventing inlining */ +static void __always_inline f100(int x) +{ + STAP_PROBE1(test, usdt_100, x); +} + +__weak void trigger_100_usdts(void) +{ + R100(f100, 0); +} + +/* we shouldn't be able to attach to test:usdt2_300 USDT as we don't have as + * many slots for specs. It's important that each STAP_PROBE2() invocation + * (after untolling) gets different arg spec due to compiler inlining i as + * a constant + */ +static void __always_inline f300(int x) +{ + STAP_PROBE1(test, usdt_300, x); +} + +__weak void trigger_300_usdts(void) +{ + R100(f300, 0); + R100(f300, 100); + R100(f300, 200); +} + +static void __always_inline f400(int x __attribute__((unused))) +{ + static int y; + + STAP_PROBE1(test, usdt_400, y++); +} + +/* this time we have 400 different USDT call sites, but they have uniform + * argument location, so libbpf's spec string deduplication logic should keep + * spec count use very small and so we should be able to attach to all 400 + * call sites + */ +__weak void trigger_400_usdts(void) +{ + R100(f400, 0); + R100(f400, 100); + R100(f400, 200); + R100(f400, 300); +} + +static void subtest_multispec_usdt(void) +{ + LIBBPF_OPTS(bpf_usdt_opts, opts); + struct test_usdt *skel; + struct test_usdt__bss *bss; + int err, i; + + skel = test_usdt__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + bss = skel->bss; + bss->my_pid = getpid(); + + err = test_usdt__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto cleanup; + + /* usdt_100 is auto-attached and there are 100 inlined call sites, + * let's validate that all of them are properly attached to and + * handled from BPF side + */ + trigger_100_usdts(); + + ASSERT_EQ(bss->usdt_100_called, 100, "usdt_100_called"); + ASSERT_EQ(bss->usdt_100_sum, 99 * 100 / 2, "usdt_100_sum"); + + /* Stress test free spec ID tracking. By default libbpf allows up to + * 256 specs to be used, so if we don't return free spec IDs back + * after few detachments and re-attachments we should run out of + * available spec IDs. + */ + for (i = 0; i < 2; i++) { + bpf_link__destroy(skel->links.usdt_100); + + skel->links.usdt_100 = bpf_program__attach_usdt(skel->progs.usdt_100, -1, + "/proc/self/exe", + "test", "usdt_100", NULL); + if (!ASSERT_OK_PTR(skel->links.usdt_100, "usdt_100_reattach")) + goto cleanup; + + bss->usdt_100_sum = 0; + trigger_100_usdts(); + + ASSERT_EQ(bss->usdt_100_called, (i + 1) * 100 + 100, "usdt_100_called"); + ASSERT_EQ(bss->usdt_100_sum, 99 * 100 / 2, "usdt_100_sum"); + } + + /* Now let's step it up and try to attach USDT that requires more than + * 256 attach points with different specs for each. + * Note that we need trigger_300_usdts() only to actually have 300 + * USDT call sites, we are not going to actually trace them. + */ + trigger_300_usdts(); + + /* we'll reuse usdt_100 BPF program for usdt_300 test */ + bpf_link__destroy(skel->links.usdt_100); + skel->links.usdt_100 = bpf_program__attach_usdt(skel->progs.usdt_100, -1, "/proc/self/exe", + "test", "usdt_300", NULL); + err = -errno; + if (!ASSERT_ERR_PTR(skel->links.usdt_100, "usdt_300_bad_attach")) + goto cleanup; + ASSERT_EQ(err, -E2BIG, "usdt_300_attach_err"); + + /* let's check that there are no "dangling" BPF programs attached due + * to partial success of the above test:usdt_300 attachment + */ + bss->usdt_100_called = 0; + bss->usdt_100_sum = 0; + + f300(777); /* this is 301st instance of usdt_300 */ + + ASSERT_EQ(bss->usdt_100_called, 0, "usdt_301_called"); + ASSERT_EQ(bss->usdt_100_sum, 0, "usdt_301_sum"); + + /* This time we have USDT with 400 inlined invocations, but arg specs + * should be the same across all sites, so libbpf will only need to + * use one spec and thus we'll be able to attach 400 uprobes + * successfully. + * + * Again, we are reusing usdt_100 BPF program. + */ + skel->links.usdt_100 = bpf_program__attach_usdt(skel->progs.usdt_100, -1, + "/proc/self/exe", + "test", "usdt_400", NULL); + if (!ASSERT_OK_PTR(skel->links.usdt_100, "usdt_400_attach")) + goto cleanup; + + trigger_400_usdts(); + + ASSERT_EQ(bss->usdt_100_called, 400, "usdt_400_called"); + ASSERT_EQ(bss->usdt_100_sum, 399 * 400 / 2, "usdt_400_sum"); + +cleanup: + test_usdt__destroy(skel); +} + +static FILE *urand_spawn(int *pid) +{ + FILE *f; + + /* urandom_read's stdout is wired into f */ + f = popen("./urandom_read 1 report-pid", "r"); + if (!f) + return NULL; + + if (fscanf(f, "%d", pid) != 1) { + pclose(f); + return NULL; + } + + return f; +} + +static int urand_trigger(FILE **urand_pipe) +{ + int exit_code; + + /* pclose() waits for child process to exit and returns their exit code */ + exit_code = pclose(*urand_pipe); + *urand_pipe = NULL; + + return exit_code; +} + +static void subtest_urandom_usdt(bool auto_attach) +{ + struct test_urandom_usdt *skel; + struct test_urandom_usdt__bss *bss; + struct bpf_link *l; + FILE *urand_pipe = NULL; + int err, urand_pid = 0; + + skel = test_urandom_usdt__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + urand_pipe = urand_spawn(&urand_pid); + if (!ASSERT_OK_PTR(urand_pipe, "urand_spawn")) + goto cleanup; + + bss = skel->bss; + bss->urand_pid = urand_pid; + + if (auto_attach) { + err = test_urandom_usdt__attach(skel); + if (!ASSERT_OK(err, "skel_auto_attach")) + goto cleanup; + } else { + l = bpf_program__attach_usdt(skel->progs.urand_read_without_sema, + urand_pid, "./urandom_read", + "urand", "read_without_sema", NULL); + if (!ASSERT_OK_PTR(l, "urand_without_sema_attach")) + goto cleanup; + skel->links.urand_read_without_sema = l; + + l = bpf_program__attach_usdt(skel->progs.urand_read_with_sema, + urand_pid, "./urandom_read", + "urand", "read_with_sema", NULL); + if (!ASSERT_OK_PTR(l, "urand_with_sema_attach")) + goto cleanup; + skel->links.urand_read_with_sema = l; + + l = bpf_program__attach_usdt(skel->progs.urandlib_read_without_sema, + urand_pid, "./liburandom_read.so", + "urandlib", "read_without_sema", NULL); + if (!ASSERT_OK_PTR(l, "urandlib_without_sema_attach")) + goto cleanup; + skel->links.urandlib_read_without_sema = l; + + l = bpf_program__attach_usdt(skel->progs.urandlib_read_with_sema, + urand_pid, "./liburandom_read.so", + "urandlib", "read_with_sema", NULL); + if (!ASSERT_OK_PTR(l, "urandlib_with_sema_attach")) + goto cleanup; + skel->links.urandlib_read_with_sema = l; + + } + + /* trigger urandom_read USDTs */ + ASSERT_OK(urand_trigger(&urand_pipe), "urand_exit_code"); + + ASSERT_EQ(bss->urand_read_without_sema_call_cnt, 1, "urand_wo_sema_cnt"); + ASSERT_EQ(bss->urand_read_without_sema_buf_sz_sum, 256, "urand_wo_sema_sum"); + + ASSERT_EQ(bss->urand_read_with_sema_call_cnt, 1, "urand_w_sema_cnt"); + ASSERT_EQ(bss->urand_read_with_sema_buf_sz_sum, 256, "urand_w_sema_sum"); + + ASSERT_EQ(bss->urandlib_read_without_sema_call_cnt, 1, "urandlib_wo_sema_cnt"); + ASSERT_EQ(bss->urandlib_read_without_sema_buf_sz_sum, 256, "urandlib_wo_sema_sum"); + + ASSERT_EQ(bss->urandlib_read_with_sema_call_cnt, 1, "urandlib_w_sema_cnt"); + ASSERT_EQ(bss->urandlib_read_with_sema_buf_sz_sum, 256, "urandlib_w_sema_sum"); + +cleanup: + if (urand_pipe) + pclose(urand_pipe); + test_urandom_usdt__destroy(skel); +} + +void test_usdt(void) +{ + if (test__start_subtest("basic")) + subtest_basic_usdt(); + if (test__start_subtest("multispec")) + subtest_multispec_usdt(); + if (test__start_subtest("urand_auto_attach")) + subtest_urandom_usdt(true /* auto_attach */); + if (test__start_subtest("urand_pid_attach")) + subtest_urandom_usdt(false /* auto_attach */); +} diff --git a/tools/testing/selftests/bpf/progs/exhandler_kern.c b/tools/testing/selftests/bpf/progs/exhandler_kern.c index f5ca142abf8f..dd9b30a0f0fc 100644 --- a/tools/testing/selftests/bpf/progs/exhandler_kern.c +++ b/tools/testing/selftests/bpf/progs/exhandler_kern.c @@ -7,6 +7,8 @@ #include <bpf/bpf_tracing.h> #include <bpf/bpf_core_read.h> +#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var)) + char _license[] SEC("license") = "GPL"; unsigned int exception_triggered; @@ -37,7 +39,16 @@ int BPF_PROG(trace_task_newtask, struct task_struct *task, u64 clone_flags) */ work = task->task_works; func = work->func; - if (!work && !func) - exception_triggered++; + /* Currently verifier will fail for `btf_ptr |= btf_ptr` * instruction. + * To workaround the issue, use barrier_var() and rewrite as below to + * prevent compiler from generating verifier-unfriendly code. + */ + barrier_var(work); + if (work) + return 0; + barrier_var(func); + if (func) + return 0; + exception_triggered++; return 0; } diff --git a/tools/testing/selftests/bpf/progs/for_each_map_elem_write_key.c b/tools/testing/selftests/bpf/progs/for_each_map_elem_write_key.c new file mode 100644 index 000000000000..8e545865ea33 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/for_each_map_elem_write_key.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} array_map SEC(".maps"); + +static __u64 +check_array_elem(struct bpf_map *map, __u32 *key, __u64 *val, + void *data) +{ + bpf_get_current_comm(key, sizeof(*key)); + return 0; +} + +SEC("raw_tp/sys_enter") +int test_map_key_write(const void *ctx) +{ + bpf_for_each_map_elem(&array_map, check_array_elem, NULL, 0); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/linked_funcs1.c b/tools/testing/selftests/bpf/progs/linked_funcs1.c index b964ec1390c2..b05571bc67d5 100644 --- a/tools/testing/selftests/bpf/progs/linked_funcs1.c +++ b/tools/testing/selftests/bpf/progs/linked_funcs1.c @@ -4,6 +4,7 @@ #include "vmlinux.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> /* weak and shared between two files */ const volatile int my_tid __weak; @@ -44,6 +45,13 @@ void set_output_ctx1(__u64 *ctx) /* this weak instance should win because it's the first one */ __weak int set_output_weak(int x) { + static volatile int whatever; + + /* make sure we use CO-RE relocations in a weak function, this used to + * cause problems for BPF static linker + */ + whatever = bpf_core_type_size(struct task_struct); + output_weak1 = x; return x; } @@ -53,12 +61,17 @@ extern int set_output_val2(int x); /* here we'll force set_output_ctx2() to be __hidden in the final obj file */ __hidden extern void set_output_ctx2(__u64 *ctx); -SEC("raw_tp/sys_enter") +SEC("?raw_tp/sys_enter") int BPF_PROG(handler1, struct pt_regs *regs, long id) { + static volatile int whatever; + if (my_tid != (u32)bpf_get_current_pid_tgid() || id != syscall_id) return 0; + /* make sure we have CO-RE relocations in main program */ + whatever = bpf_core_type_size(struct task_struct); + set_output_val2(1000); set_output_ctx2(ctx); /* ctx definition is hidden in BPF_PROG macro */ diff --git a/tools/testing/selftests/bpf/progs/linked_funcs2.c b/tools/testing/selftests/bpf/progs/linked_funcs2.c index 575e958e60b7..ee7e3848ee4f 100644 --- a/tools/testing/selftests/bpf/progs/linked_funcs2.c +++ b/tools/testing/selftests/bpf/progs/linked_funcs2.c @@ -4,6 +4,7 @@ #include "vmlinux.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> /* weak and shared between both files */ const volatile int my_tid __weak; @@ -44,6 +45,13 @@ void set_output_ctx2(__u64 *ctx) /* this weak instance should lose, because it will be processed second */ __weak int set_output_weak(int x) { + static volatile int whatever; + + /* make sure we use CO-RE relocations in a weak function, this used to + * cause problems for BPF static linker + */ + whatever = 2 * bpf_core_type_size(struct task_struct); + output_weak2 = x; return 2 * x; } @@ -53,12 +61,17 @@ extern int set_output_val1(int x); /* here we'll force set_output_ctx1() to be __hidden in the final obj file */ __hidden extern void set_output_ctx1(__u64 *ctx); -SEC("raw_tp/sys_enter") +SEC("?raw_tp/sys_enter") int BPF_PROG(handler2, struct pt_regs *regs, long id) { + static volatile int whatever; + if (my_tid != (u32)bpf_get_current_pid_tgid() || id != syscall_id) return 0; + /* make sure we have CO-RE relocations in main program */ + whatever = bpf_core_type_size(struct task_struct); + set_output_val1(2000); set_output_ctx1(ctx); /* ctx definition is hidden in BPF_PROG macro */ diff --git a/tools/testing/selftests/bpf/progs/map_kptr.c b/tools/testing/selftests/bpf/progs/map_kptr.c new file mode 100644 index 000000000000..1b0e0409eaa5 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/map_kptr.c @@ -0,0 +1,190 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> + +struct map_value { + struct prog_test_ref_kfunc __kptr *unref_ptr; + struct prog_test_ref_kfunc __kptr_ref *ref_ptr; +}; + +struct array_map { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, 1); +} array_map SEC(".maps"); + +struct hash_map { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, 1); +} hash_map SEC(".maps"); + +struct hash_malloc_map { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, 1); + __uint(map_flags, BPF_F_NO_PREALLOC); +} hash_malloc_map SEC(".maps"); + +struct lru_hash_map { + __uint(type, BPF_MAP_TYPE_LRU_HASH); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, 1); +} lru_hash_map SEC(".maps"); + +#define DEFINE_MAP_OF_MAP(map_type, inner_map_type, name) \ + struct { \ + __uint(type, map_type); \ + __uint(max_entries, 1); \ + __uint(key_size, sizeof(int)); \ + __uint(value_size, sizeof(int)); \ + __array(values, struct inner_map_type); \ + } name SEC(".maps") = { \ + .values = { [0] = &inner_map_type }, \ + } + +DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_map, array_of_array_maps); +DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, hash_map, array_of_hash_maps); +DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, hash_malloc_map, array_of_hash_malloc_maps); +DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, lru_hash_map, array_of_lru_hash_maps); +DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, array_map, hash_of_array_maps); +DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, hash_map, hash_of_hash_maps); +DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, hash_malloc_map, hash_of_hash_malloc_maps); +DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, lru_hash_map, hash_of_lru_hash_maps); + +extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym; +extern struct prog_test_ref_kfunc * +bpf_kfunc_call_test_kptr_get(struct prog_test_ref_kfunc **p, int a, int b) __ksym; +extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; + +static void test_kptr_unref(struct map_value *v) +{ + struct prog_test_ref_kfunc *p; + + p = v->unref_ptr; + /* store untrusted_ptr_or_null_ */ + v->unref_ptr = p; + if (!p) + return; + if (p->a + p->b > 100) + return; + /* store untrusted_ptr_ */ + v->unref_ptr = p; + /* store NULL */ + v->unref_ptr = NULL; +} + +static void test_kptr_ref(struct map_value *v) +{ + struct prog_test_ref_kfunc *p; + + p = v->ref_ptr; + /* store ptr_or_null_ */ + v->unref_ptr = p; + if (!p) + return; + if (p->a + p->b > 100) + return; + /* store NULL */ + p = bpf_kptr_xchg(&v->ref_ptr, NULL); + if (!p) + return; + if (p->a + p->b > 100) { + bpf_kfunc_call_test_release(p); + return; + } + /* store ptr_ */ + v->unref_ptr = p; + bpf_kfunc_call_test_release(p); + + p = bpf_kfunc_call_test_acquire(&(unsigned long){0}); + if (!p) + return; + /* store ptr_ */ + p = bpf_kptr_xchg(&v->ref_ptr, p); + if (!p) + return; + if (p->a + p->b > 100) { + bpf_kfunc_call_test_release(p); + return; + } + bpf_kfunc_call_test_release(p); +} + +static void test_kptr_get(struct map_value *v) +{ + struct prog_test_ref_kfunc *p; + + p = bpf_kfunc_call_test_kptr_get(&v->ref_ptr, 0, 0); + if (!p) + return; + if (p->a + p->b > 100) { + bpf_kfunc_call_test_release(p); + return; + } + bpf_kfunc_call_test_release(p); +} + +static void test_kptr(struct map_value *v) +{ + test_kptr_unref(v); + test_kptr_ref(v); + test_kptr_get(v); +} + +SEC("tc") +int test_map_kptr(struct __sk_buff *ctx) +{ + struct map_value *v; + int i, key = 0; + +#define TEST(map) \ + v = bpf_map_lookup_elem(&map, &key); \ + if (!v) \ + return 0; \ + test_kptr(v) + + TEST(array_map); + TEST(hash_map); + TEST(hash_malloc_map); + TEST(lru_hash_map); + +#undef TEST + return 0; +} + +SEC("tc") +int test_map_in_map_kptr(struct __sk_buff *ctx) +{ + struct map_value *v; + int i, key = 0; + void *map; + +#define TEST(map_in_map) \ + map = bpf_map_lookup_elem(&map_in_map, &key); \ + if (!map) \ + return 0; \ + v = bpf_map_lookup_elem(map, &key); \ + if (!v) \ + return 0; \ + test_kptr(v) + + TEST(array_of_array_maps); + TEST(array_of_hash_maps); + TEST(array_of_hash_malloc_maps); + TEST(array_of_lru_hash_maps); + TEST(hash_of_array_maps); + TEST(hash_of_hash_maps); + TEST(hash_of_hash_malloc_maps); + TEST(hash_of_lru_hash_maps); + +#undef TEST + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c index b64df94ec476..db388f593d0a 100644 --- a/tools/testing/selftests/bpf/progs/map_ptr_kern.c +++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c @@ -367,7 +367,7 @@ static inline int check_array_of_maps(void) VERIFY(check_default(&array_of_maps->map, map)); inner_map = bpf_map_lookup_elem(array_of_maps, &key); - VERIFY(inner_map != 0); + VERIFY(inner_map != NULL); VERIFY(inner_map->map.max_entries == INNER_MAX_ENTRIES); return 1; @@ -394,7 +394,7 @@ static inline int check_hash_of_maps(void) VERIFY(check_default(&hash_of_maps->map, map)); inner_map = bpf_map_lookup_elem(hash_of_maps, &key); - VERIFY(inner_map != 0); + VERIFY(inner_map != NULL); VERIFY(inner_map->map.max_entries == INNER_MAX_ENTRIES); return 1; diff --git a/tools/testing/selftests/bpf/progs/perf_event_stackmap.c b/tools/testing/selftests/bpf/progs/perf_event_stackmap.c index b3fcb5274ee0..f793280a3238 100644 --- a/tools/testing/selftests/bpf/progs/perf_event_stackmap.c +++ b/tools/testing/selftests/bpf/progs/perf_event_stackmap.c @@ -35,10 +35,10 @@ int oncpu(void *ctx) long val; val = bpf_get_stackid(ctx, &stackmap, 0); - if (val > 0) + if (val >= 0) stackid_kernel = 2; val = bpf_get_stackid(ctx, &stackmap, BPF_F_USER_STACK); - if (val > 0) + if (val >= 0) stackid_user = 2; trace = bpf_map_lookup_elem(&stackdata_map, &key); diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h index 4896fdf816f7..92331053dba3 100644 --- a/tools/testing/selftests/bpf/progs/profiler.inc.h +++ b/tools/testing/selftests/bpf/progs/profiler.inc.h @@ -826,8 +826,9 @@ out: SEC("kprobe/vfs_link") int BPF_KPROBE(kprobe__vfs_link, - struct dentry* old_dentry, struct inode* dir, - struct dentry* new_dentry, struct inode** delegated_inode) + struct dentry* old_dentry, struct user_namespace *mnt_userns, + struct inode* dir, struct dentry* new_dentry, + struct inode** delegated_inode) { struct bpf_func_stats_ctx stats_ctx; bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_link); diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h index 1ed28882daf3..5d3dc4d66d47 100644 --- a/tools/testing/selftests/bpf/progs/pyperf.h +++ b/tools/testing/selftests/bpf/progs/pyperf.h @@ -299,7 +299,11 @@ int __on_event(struct bpf_raw_tracepoint_args *ctx) #ifdef NO_UNROLL #pragma clang loop unroll(disable) #else +#ifdef UNROLL_COUNT +#pragma clang loop unroll_count(UNROLL_COUNT) +#else #pragma clang loop unroll(full) +#endif #endif /* NO_UNROLL */ /* Unwind python stack */ for (int i = 0; i < STACK_MAX_LEN; ++i) { diff --git a/tools/testing/selftests/bpf/progs/pyperf600.c b/tools/testing/selftests/bpf/progs/pyperf600.c index cb49b89e37cd..ce1aa5189cc4 100644 --- a/tools/testing/selftests/bpf/progs/pyperf600.c +++ b/tools/testing/selftests/bpf/progs/pyperf600.c @@ -1,9 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2019 Facebook #define STACK_MAX_LEN 600 -/* clang will not unroll the loop 600 times. - * Instead it will unroll it to the amount it deemed - * appropriate, but the loop will still execute 600 times. - * Total program size is around 90k insns +/* Full unroll of 600 iterations will have total + * program size close to 298k insns and this may + * cause BPF_JMP insn out of 16-bit integer range. + * So limit the unroll size to 150 so the + * total program size is around 80k insns but + * the loop will still execute 600 times. */ +#define UNROLL_COUNT 150 #include "pyperf.h" diff --git a/tools/testing/selftests/bpf/progs/skb_load_bytes.c b/tools/testing/selftests/bpf/progs/skb_load_bytes.c new file mode 100644 index 000000000000..e4252fd973be --- /dev/null +++ b/tools/testing/selftests/bpf/progs/skb_load_bytes.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +__u32 load_offset = 0; +int test_result = 0; + +SEC("tc") +int skb_process(struct __sk_buff *skb) +{ + char buf[16]; + + test_result = bpf_skb_load_bytes(skb, load_offset, buf, 10); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/strncmp_test.c b/tools/testing/selftests/bpf/progs/strncmp_test.c index 900d930d48a8..769668feed48 100644 --- a/tools/testing/selftests/bpf/progs/strncmp_test.c +++ b/tools/testing/selftests/bpf/progs/strncmp_test.c @@ -19,7 +19,7 @@ unsigned int no_const_str_size = STRNCMP_STR_SZ; char _license[] SEC("license") = "GPL"; -SEC("tp/syscalls/sys_enter_nanosleep") +SEC("?tp/syscalls/sys_enter_nanosleep") int do_strncmp(void *ctx) { if ((bpf_get_current_pid_tgid() >> 32) != target_pid) @@ -29,7 +29,7 @@ int do_strncmp(void *ctx) return 0; } -SEC("tp/syscalls/sys_enter_nanosleep") +SEC("?tp/syscalls/sys_enter_nanosleep") int strncmp_bad_not_const_str_size(void *ctx) { /* The value of string size is not const, so will fail */ @@ -37,7 +37,7 @@ int strncmp_bad_not_const_str_size(void *ctx) return 0; } -SEC("tp/syscalls/sys_enter_nanosleep") +SEC("?tp/syscalls/sys_enter_nanosleep") int strncmp_bad_writable_target(void *ctx) { /* Compared target is not read-only, so will fail */ @@ -45,7 +45,7 @@ int strncmp_bad_writable_target(void *ctx) return 0; } -SEC("tp/syscalls/sys_enter_nanosleep") +SEC("?tp/syscalls/sys_enter_nanosleep") int strncmp_bad_not_null_term_target(void *ctx) { /* Compared target is not null-terminated, so will fail */ diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe.c b/tools/testing/selftests/bpf/progs/test_attach_probe.c index 8056a4c6d918..af994d16bb10 100644 --- a/tools/testing/selftests/bpf/progs/test_attach_probe.c +++ b/tools/testing/selftests/bpf/progs/test_attach_probe.c @@ -10,6 +10,10 @@ int kprobe_res = 0; int kretprobe_res = 0; int uprobe_res = 0; int uretprobe_res = 0; +int uprobe_byname_res = 0; +int uretprobe_byname_res = 0; +int uprobe_byname2_res = 0; +int uretprobe_byname2_res = 0; SEC("kprobe/sys_nanosleep") int handle_kprobe(struct pt_regs *ctx) @@ -25,18 +29,51 @@ int BPF_KRETPROBE(handle_kretprobe) return 0; } -SEC("uprobe/trigger_func") +SEC("uprobe") int handle_uprobe(struct pt_regs *ctx) { uprobe_res = 3; return 0; } -SEC("uretprobe/trigger_func") +SEC("uretprobe") int handle_uretprobe(struct pt_regs *ctx) { uretprobe_res = 4; return 0; } +SEC("uprobe") +int handle_uprobe_byname(struct pt_regs *ctx) +{ + uprobe_byname_res = 5; + return 0; +} + +/* use auto-attach format for section definition. */ +SEC("uretprobe//proc/self/exe:trigger_func2") +int handle_uretprobe_byname(struct pt_regs *ctx) +{ + uretprobe_byname_res = 6; + return 0; +} + +SEC("uprobe") +int handle_uprobe_byname2(struct pt_regs *ctx) +{ + unsigned int size = PT_REGS_PARM1(ctx); + + /* verify malloc size */ + if (size == 1) + uprobe_byname2_res = 7; + return 0; +} + +SEC("uretprobe") +int handle_uretprobe_byname2(struct pt_regs *ctx) +{ + uretprobe_byname2_res = 8; + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_bpf_cookie.c b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c index 2d3a7710e2ce..0e2222968918 100644 --- a/tools/testing/selftests/bpf/progs/test_bpf_cookie.c +++ b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c @@ -37,14 +37,14 @@ int handle_kretprobe(struct pt_regs *ctx) return 0; } -SEC("uprobe/trigger_func") +SEC("uprobe") int handle_uprobe(struct pt_regs *ctx) { update(ctx, &uprobe_res); return 0; } -SEC("uretprobe/trigger_func") +SEC("uretprobe") int handle_uretprobe(struct pt_regs *ctx) { update(ctx, &uretprobe_res); diff --git a/tools/testing/selftests/bpf/progs/test_global_func17.c b/tools/testing/selftests/bpf/progs/test_global_func17.c new file mode 100644 index 000000000000..2b8b9b8ba018 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_global_func17.c @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> + +__noinline int foo(int *p) +{ + return p ? (*p = 42) : 0; +} + +const volatile int i; + +SEC("tc") +int test_cls(struct __sk_buff *skb) +{ + return foo((int *)&i); +} diff --git a/tools/testing/selftests/bpf/progs/test_helper_restricted.c b/tools/testing/selftests/bpf/progs/test_helper_restricted.c index 68d64c365f90..20ef9d433b97 100644 --- a/tools/testing/selftests/bpf/progs/test_helper_restricted.c +++ b/tools/testing/selftests/bpf/progs/test_helper_restricted.c @@ -56,7 +56,7 @@ static void spin_lock_work(void) } } -SEC("raw_tp/sys_enter") +SEC("?raw_tp/sys_enter") int raw_tp_timer(void *ctx) { timer_work(); @@ -64,7 +64,7 @@ int raw_tp_timer(void *ctx) return 0; } -SEC("tp/syscalls/sys_enter_nanosleep") +SEC("?tp/syscalls/sys_enter_nanosleep") int tp_timer(void *ctx) { timer_work(); @@ -72,7 +72,7 @@ int tp_timer(void *ctx) return 0; } -SEC("kprobe/sys_nanosleep") +SEC("?kprobe/sys_nanosleep") int kprobe_timer(void *ctx) { timer_work(); @@ -80,7 +80,7 @@ int kprobe_timer(void *ctx) return 0; } -SEC("perf_event") +SEC("?perf_event") int perf_event_timer(void *ctx) { timer_work(); @@ -88,7 +88,7 @@ int perf_event_timer(void *ctx) return 0; } -SEC("raw_tp/sys_enter") +SEC("?raw_tp/sys_enter") int raw_tp_spin_lock(void *ctx) { spin_lock_work(); @@ -96,7 +96,7 @@ int raw_tp_spin_lock(void *ctx) return 0; } -SEC("tp/syscalls/sys_enter_nanosleep") +SEC("?tp/syscalls/sys_enter_nanosleep") int tp_spin_lock(void *ctx) { spin_lock_work(); @@ -104,7 +104,7 @@ int tp_spin_lock(void *ctx) return 0; } -SEC("kprobe/sys_nanosleep") +SEC("?kprobe/sys_nanosleep") int kprobe_spin_lock(void *ctx) { spin_lock_work(); @@ -112,7 +112,7 @@ int kprobe_spin_lock(void *ctx) return 0; } -SEC("perf_event") +SEC("?perf_event") int perf_event_spin_lock(void *ctx) { spin_lock_work(); diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c b/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c index 2180c41cd890..a72a5bf3812a 100644 --- a/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c +++ b/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c @@ -8,7 +8,7 @@ extern const int bpf_prog_active __ksym; /* int type global var. */ SEC("raw_tp/sys_enter") -int handler(const void *ctx) +int handler1(const void *ctx) { int *active; __u32 cpu; @@ -26,4 +26,20 @@ int handler(const void *ctx) return 0; } +__noinline int write_active(int *p) +{ + return p ? (*p = 42) : 0; +} + +SEC("raw_tp/sys_enter") +int handler2(const void *ctx) +{ + int *active; + __u32 cpu; + + active = bpf_this_cpu_ptr(&bpf_prog_active); + write_active(active); + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c index 19e4d2071c60..c8bc0c6947aa 100644 --- a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c +++ b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c @@ -218,7 +218,7 @@ static __noinline bool get_packet_dst(struct real_definition **real, if (hash != 0x358459b7 /* jhash of ipv4 packet */ && hash != 0x2f4bc6bb /* jhash of ipv6 packet */) - return 0; + return false; real_pos = bpf_map_lookup_elem(&ch_rings, &key); if (!real_pos) diff --git a/tools/testing/selftests/bpf/progs/test_log_fixup.c b/tools/testing/selftests/bpf/progs/test_log_fixup.c new file mode 100644 index 000000000000..a78980d897b3 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_log_fixup.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> + +struct task_struct___bad { + int pid; + int fake_field; + void *fake_field_subprog; +} __attribute__((preserve_access_index)); + +SEC("?raw_tp/sys_enter") +int bad_relo(const void *ctx) +{ + static struct task_struct___bad *t; + + return bpf_core_field_size(t->fake_field); +} + +static __noinline int bad_subprog(void) +{ + static struct task_struct___bad *t; + + /* ugliness below is a field offset relocation */ + return (void *)&t->fake_field_subprog - (void *)t; +} + +SEC("?raw_tp/sys_enter") +int bad_relo_subprog(const void *ctx) +{ + static struct task_struct___bad *t; + + return bad_subprog() + bpf_core_field_size(t->pid); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_sk_assign.c b/tools/testing/selftests/bpf/progs/test_sk_assign.c index 02f79356d5eb..98c6493d9b91 100644 --- a/tools/testing/selftests/bpf/progs/test_sk_assign.c +++ b/tools/testing/selftests/bpf/progs/test_sk_assign.c @@ -89,7 +89,6 @@ get_tuple(struct __sk_buff *skb, bool *ipv4, bool *tcp) static inline int handle_udp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4) { - struct bpf_sock_tuple ln = {0}; struct bpf_sock *sk; const int zero = 0; size_t tuple_len; @@ -121,7 +120,6 @@ assign: static inline int handle_tcp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4) { - struct bpf_sock_tuple ln = {0}; struct bpf_sock *sk; const int zero = 0; size_t tuple_len; @@ -161,7 +159,7 @@ assign: SEC("tc") int bpf_sk_assign_test(struct __sk_buff *skb) { - struct bpf_sock_tuple *tuple, ln = {0}; + struct bpf_sock_tuple *tuple; bool ipv4 = false; bool tcp = false; int tuple_len; diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c index 40f161480a2f..b502e5c92e33 100644 --- a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c +++ b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c @@ -52,7 +52,7 @@ static struct bpf_sock_tuple *get_tuple(void *data, __u64 nh_off, return result; } -SEC("tc") +SEC("?tc") int sk_lookup_success(struct __sk_buff *skb) { void *data_end = (void *)(long)skb->data_end; @@ -78,7 +78,7 @@ int sk_lookup_success(struct __sk_buff *skb) return sk ? TC_ACT_OK : TC_ACT_UNSPEC; } -SEC("tc") +SEC("?tc") int sk_lookup_success_simple(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -90,7 +90,7 @@ int sk_lookup_success_simple(struct __sk_buff *skb) return 0; } -SEC("tc") +SEC("?tc") int err_use_after_free(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -105,7 +105,7 @@ int err_use_after_free(struct __sk_buff *skb) return family; } -SEC("tc") +SEC("?tc") int err_modify_sk_pointer(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -120,7 +120,7 @@ int err_modify_sk_pointer(struct __sk_buff *skb) return 0; } -SEC("tc") +SEC("?tc") int err_modify_sk_or_null_pointer(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -134,7 +134,7 @@ int err_modify_sk_or_null_pointer(struct __sk_buff *skb) return 0; } -SEC("tc") +SEC("?tc") int err_no_release(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -143,7 +143,7 @@ int err_no_release(struct __sk_buff *skb) return 0; } -SEC("tc") +SEC("?tc") int err_release_twice(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -155,7 +155,7 @@ int err_release_twice(struct __sk_buff *skb) return 0; } -SEC("tc") +SEC("?tc") int err_release_unchecked(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -172,7 +172,7 @@ void lookup_no_release(struct __sk_buff *skb) bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); } -SEC("tc") +SEC("?tc") int err_no_release_subcall(struct __sk_buff *skb) { lookup_no_release(skb); diff --git a/tools/testing/selftests/bpf/progs/test_task_pt_regs.c b/tools/testing/selftests/bpf/progs/test_task_pt_regs.c index e6cb09259408..1926facba122 100644 --- a/tools/testing/selftests/bpf/progs/test_task_pt_regs.c +++ b/tools/testing/selftests/bpf/progs/test_task_pt_regs.c @@ -14,7 +14,7 @@ char current_regs[PT_REGS_SIZE] = {}; char ctx_regs[PT_REGS_SIZE] = {}; int uprobe_res = 0; -SEC("uprobe/trigger_func") +SEC("uprobe") int handle_uprobe(struct pt_regs *ctx) { struct task_struct *current; diff --git a/tools/testing/selftests/bpf/progs/test_uprobe_autoattach.c b/tools/testing/selftests/bpf/progs/test_uprobe_autoattach.c new file mode 100644 index 000000000000..ab75522e2eeb --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_uprobe_autoattach.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022, Oracle and/or its affiliates. */ + +#include "vmlinux.h" + +#include <bpf/bpf_core_read.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +int uprobe_byname_parm1 = 0; +int uprobe_byname_ran = 0; +int uretprobe_byname_rc = 0; +int uretprobe_byname_ran = 0; +size_t uprobe_byname2_parm1 = 0; +int uprobe_byname2_ran = 0; +char *uretprobe_byname2_rc = NULL; +int uretprobe_byname2_ran = 0; + +int test_pid; + +/* This program cannot auto-attach, but that should not stop other + * programs from attaching. + */ +SEC("uprobe") +int handle_uprobe_noautoattach(struct pt_regs *ctx) +{ + return 0; +} + +SEC("uprobe//proc/self/exe:autoattach_trigger_func") +int handle_uprobe_byname(struct pt_regs *ctx) +{ + uprobe_byname_parm1 = PT_REGS_PARM1_CORE(ctx); + uprobe_byname_ran = 1; + return 0; +} + +SEC("uretprobe//proc/self/exe:autoattach_trigger_func") +int handle_uretprobe_byname(struct pt_regs *ctx) +{ + uretprobe_byname_rc = PT_REGS_RC_CORE(ctx); + uretprobe_byname_ran = 2; + return 0; +} + + +SEC("uprobe/libc.so.6:malloc") +int handle_uprobe_byname2(struct pt_regs *ctx) +{ + int pid = bpf_get_current_pid_tgid() >> 32; + + /* ignore irrelevant invocations */ + if (test_pid != pid) + return 0; + uprobe_byname2_parm1 = PT_REGS_PARM1_CORE(ctx); + uprobe_byname2_ran = 3; + return 0; +} + +SEC("uretprobe/libc.so.6:malloc") +int handle_uretprobe_byname2(struct pt_regs *ctx) +{ + int pid = bpf_get_current_pid_tgid() >> 32; + + /* ignore irrelevant invocations */ + if (test_pid != pid) + return 0; + uretprobe_byname2_rc = (char *)PT_REGS_RC_CORE(ctx); + uretprobe_byname2_ran = 4; + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_urandom_usdt.c b/tools/testing/selftests/bpf/progs/test_urandom_usdt.c new file mode 100644 index 000000000000..3539b02bd5f7 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_urandom_usdt.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/usdt.bpf.h> + +int urand_pid; + +int urand_read_without_sema_call_cnt; +int urand_read_without_sema_buf_sz_sum; + +SEC("usdt/./urandom_read:urand:read_without_sema") +int BPF_USDT(urand_read_without_sema, int iter_num, int iter_cnt, int buf_sz) +{ + if (urand_pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + __sync_fetch_and_add(&urand_read_without_sema_call_cnt, 1); + __sync_fetch_and_add(&urand_read_without_sema_buf_sz_sum, buf_sz); + + return 0; +} + +int urand_read_with_sema_call_cnt; +int urand_read_with_sema_buf_sz_sum; + +SEC("usdt/./urandom_read:urand:read_with_sema") +int BPF_USDT(urand_read_with_sema, int iter_num, int iter_cnt, int buf_sz) +{ + if (urand_pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + __sync_fetch_and_add(&urand_read_with_sema_call_cnt, 1); + __sync_fetch_and_add(&urand_read_with_sema_buf_sz_sum, buf_sz); + + return 0; +} + +int urandlib_read_without_sema_call_cnt; +int urandlib_read_without_sema_buf_sz_sum; + +SEC("usdt/./liburandom_read.so:urandlib:read_without_sema") +int BPF_USDT(urandlib_read_without_sema, int iter_num, int iter_cnt, int buf_sz) +{ + if (urand_pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + __sync_fetch_and_add(&urandlib_read_without_sema_call_cnt, 1); + __sync_fetch_and_add(&urandlib_read_without_sema_buf_sz_sum, buf_sz); + + return 0; +} + +int urandlib_read_with_sema_call_cnt; +int urandlib_read_with_sema_buf_sz_sum; + +SEC("usdt/./liburandom_read.so:urandlib:read_with_sema") +int BPF_USDT(urandlib_read_with_sema, int iter_num, int iter_cnt, int buf_sz) +{ + if (urand_pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + __sync_fetch_and_add(&urandlib_read_with_sema_call_cnt, 1); + __sync_fetch_and_add(&urandlib_read_with_sema_buf_sz_sum, buf_sz); + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_usdt.c b/tools/testing/selftests/bpf/progs/test_usdt.c new file mode 100644 index 000000000000..505aab9a5234 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_usdt.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/usdt.bpf.h> + +int my_pid; + +int usdt0_called; +u64 usdt0_cookie; +int usdt0_arg_cnt; +int usdt0_arg_ret; + +SEC("usdt") +int usdt0(struct pt_regs *ctx) +{ + long tmp; + + if (my_pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + __sync_fetch_and_add(&usdt0_called, 1); + + usdt0_cookie = bpf_usdt_cookie(ctx); + usdt0_arg_cnt = bpf_usdt_arg_cnt(ctx); + /* should return -ENOENT for any arg_num */ + usdt0_arg_ret = bpf_usdt_arg(ctx, bpf_get_prandom_u32(), &tmp); + return 0; +} + +int usdt3_called; +u64 usdt3_cookie; +int usdt3_arg_cnt; +int usdt3_arg_rets[3]; +u64 usdt3_args[3]; + +SEC("usdt//proc/self/exe:test:usdt3") +int usdt3(struct pt_regs *ctx) +{ + long tmp; + + if (my_pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + __sync_fetch_and_add(&usdt3_called, 1); + + usdt3_cookie = bpf_usdt_cookie(ctx); + usdt3_arg_cnt = bpf_usdt_arg_cnt(ctx); + + usdt3_arg_rets[0] = bpf_usdt_arg(ctx, 0, &tmp); + usdt3_args[0] = (int)tmp; + + usdt3_arg_rets[1] = bpf_usdt_arg(ctx, 1, &tmp); + usdt3_args[1] = (long)tmp; + + usdt3_arg_rets[2] = bpf_usdt_arg(ctx, 2, &tmp); + usdt3_args[2] = (uintptr_t)tmp; + + return 0; +} + +int usdt12_called; +u64 usdt12_cookie; +int usdt12_arg_cnt; +u64 usdt12_args[12]; + +SEC("usdt//proc/self/exe:test:usdt12") +int BPF_USDT(usdt12, int a1, int a2, long a3, long a4, unsigned a5, + long a6, __u64 a7, uintptr_t a8, int a9, short a10, + short a11, signed char a12) +{ + if (my_pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + __sync_fetch_and_add(&usdt12_called, 1); + + usdt12_cookie = bpf_usdt_cookie(ctx); + usdt12_arg_cnt = bpf_usdt_arg_cnt(ctx); + + usdt12_args[0] = a1; + usdt12_args[1] = a2; + usdt12_args[2] = a3; + usdt12_args[3] = a4; + usdt12_args[4] = a5; + usdt12_args[5] = a6; + usdt12_args[6] = a7; + usdt12_args[7] = a8; + usdt12_args[8] = a9; + usdt12_args[9] = a10; + usdt12_args[10] = a11; + usdt12_args[11] = a12; + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_usdt_multispec.c b/tools/testing/selftests/bpf/progs/test_usdt_multispec.c new file mode 100644 index 000000000000..aa6de32b50d1 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_usdt_multispec.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/usdt.bpf.h> + +/* this file is linked together with test_usdt.c to validate that usdt.bpf.h + * can be included in multiple .bpf.c files forming single final BPF object + * file + */ + +extern int my_pid; + +int usdt_100_called; +int usdt_100_sum; + +SEC("usdt//proc/self/exe:test:usdt_100") +int BPF_USDT(usdt_100, int x) +{ + long tmp; + + if (my_pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + __sync_fetch_and_add(&usdt_100_called, 1); + __sync_fetch_and_add(&usdt_100_sum, x); + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c index 596c4e71bf3a..125d872d7981 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c @@ -564,22 +564,22 @@ static bool get_packet_dst(struct real_definition **real, hash = get_packet_hash(pckt, hash_16bytes); if (hash != 0x358459b7 /* jhash of ipv4 packet */ && hash != 0x2f4bc6bb /* jhash of ipv6 packet */) - return 0; + return false; key = 2 * vip_info->vip_num + hash % 2; real_pos = bpf_map_lookup_elem(&ch_rings, &key); if (!real_pos) - return 0; + return false; key = *real_pos; *real = bpf_map_lookup_elem(&reals, &key); if (!(*real)) - return 0; + return false; if (!(vip_info->flags & (1 << 1))) { __u32 conn_rate_key = 512 + 2; struct lb_stats *conn_rate_stats = bpf_map_lookup_elem(&stats, &conn_rate_key); if (!conn_rate_stats) - return 1; + return true; cur_time = bpf_ktime_get_ns(); if ((cur_time - conn_rate_stats->v2) >> 32 > 0xffFFFF) { conn_rate_stats->v1 = 1; @@ -587,14 +587,14 @@ static bool get_packet_dst(struct real_definition **real, } else { conn_rate_stats->v1 += 1; if (conn_rate_stats->v1 >= 1) - return 1; + return true; } if (pckt->flow.proto == IPPROTO_UDP) new_dst_lru.atime = cur_time; new_dst_lru.pos = key; bpf_map_update_elem(lru_map, &pckt->flow, &new_dst_lru, 0); } - return 1; + return true; } __attribute__ ((noinline)) diff --git a/tools/testing/selftests/bpf/progs/trace_dummy_st_ops.c b/tools/testing/selftests/bpf/progs/trace_dummy_st_ops.c new file mode 100644 index 000000000000..00a4be9d3074 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/trace_dummy_st_ops.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +int val = 0; + +SEC("fentry/test_1") +int BPF_PROG(fentry_test_1, __u64 *st_ops_ctx) +{ + __u64 state; + + /* Read the traced st_ops arg1 which is a pointer */ + bpf_probe_read_kernel(&state, sizeof(__u64), (void *)st_ops_ctx); + /* Read state->val */ + bpf_probe_read_kernel(&val, sizeof(__u32), (void *)state); + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c index 2ab049b54d6c..694e7cec1823 100644 --- a/tools/testing/selftests/bpf/progs/trigger_bench.c +++ b/tools/testing/selftests/bpf/progs/trigger_bench.c @@ -54,7 +54,7 @@ int bench_trigger_fmodret(void *ctx) return -22; } -SEC("uprobe/self/uprobe_target") +SEC("uprobe") int bench_trigger_uprobe(void *ctx) { __sync_add_and_fetch(&hits, 1); diff --git a/tools/testing/selftests/bpf/sdt-config.h b/tools/testing/selftests/bpf/sdt-config.h new file mode 100644 index 000000000000..733045a52771 --- /dev/null +++ b/tools/testing/selftests/bpf/sdt-config.h @@ -0,0 +1,6 @@ +/* includes/sys/sdt-config.h. Generated from sdt-config.h.in by configure. + + This file just defines _SDT_ASM_SECTION_AUTOGROUP_SUPPORT to 0 or 1 to + indicate whether the assembler supports "?" in .pushsection directives. */ + +#define _SDT_ASM_SECTION_AUTOGROUP_SUPPORT 1 diff --git a/tools/testing/selftests/bpf/sdt.h b/tools/testing/selftests/bpf/sdt.h new file mode 100644 index 000000000000..ca0162b4dc57 --- /dev/null +++ b/tools/testing/selftests/bpf/sdt.h @@ -0,0 +1,513 @@ +/* <sys/sdt.h> - Systemtap static probe definition macros. + + This file is dedicated to the public domain, pursuant to CC0 + (https://creativecommons.org/publicdomain/zero/1.0/) +*/ + +#ifndef _SYS_SDT_H +#define _SYS_SDT_H 1 + +/* + This file defines a family of macros + + STAP_PROBEn(op1, ..., opn) + + that emit a nop into the instruction stream, and some data into an auxiliary + note section. The data in the note section describes the operands, in terms + of size and location. Each location is encoded as assembler operand string. + Consumer tools such as gdb or systemtap insert breakpoints on top of + the nop, and decode the location operand-strings, like an assembler, + to find the values being passed. + + The operand strings are selected by the compiler for each operand. + They are constrained by gcc inline-assembler codes. The default is: + + #define STAP_SDT_ARG_CONSTRAINT nor + + This is a good default if the operands tend to be integral and + moderate in number (smaller than number of registers). In other + cases, the compiler may report "'asm' requires impossible reload" or + similar. In this case, consider simplifying the macro call (fewer + and simpler operands), reduce optimization, or override the default + constraints string via: + + #define STAP_SDT_ARG_CONSTRAINT g + #include <sys/sdt.h> + + See also: + https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation + https://gcc.gnu.org/onlinedocs/gcc/Constraints.html + */ + + + +#ifdef __ASSEMBLER__ +# define _SDT_PROBE(provider, name, n, arglist) \ + _SDT_ASM_BODY(provider, name, _SDT_ASM_SUBSTR_1, (_SDT_DEPAREN_##n arglist)) \ + _SDT_ASM_BASE +# define _SDT_ASM_1(x) x; +# define _SDT_ASM_2(a, b) a,b; +# define _SDT_ASM_3(a, b, c) a,b,c; +# define _SDT_ASM_5(a, b, c, d, e) a,b,c,d,e; +# define _SDT_ASM_STRING_1(x) .asciz #x; +# define _SDT_ASM_SUBSTR_1(x) .ascii #x; +# define _SDT_DEPAREN_0() /* empty */ +# define _SDT_DEPAREN_1(a) a +# define _SDT_DEPAREN_2(a,b) a b +# define _SDT_DEPAREN_3(a,b,c) a b c +# define _SDT_DEPAREN_4(a,b,c,d) a b c d +# define _SDT_DEPAREN_5(a,b,c,d,e) a b c d e +# define _SDT_DEPAREN_6(a,b,c,d,e,f) a b c d e f +# define _SDT_DEPAREN_7(a,b,c,d,e,f,g) a b c d e f g +# define _SDT_DEPAREN_8(a,b,c,d,e,f,g,h) a b c d e f g h +# define _SDT_DEPAREN_9(a,b,c,d,e,f,g,h,i) a b c d e f g h i +# define _SDT_DEPAREN_10(a,b,c,d,e,f,g,h,i,j) a b c d e f g h i j +# define _SDT_DEPAREN_11(a,b,c,d,e,f,g,h,i,j,k) a b c d e f g h i j k +# define _SDT_DEPAREN_12(a,b,c,d,e,f,g,h,i,j,k,l) a b c d e f g h i j k l +#else +#if defined _SDT_HAS_SEMAPHORES +#define _SDT_NOTE_SEMAPHORE_USE(provider, name) \ + __asm__ __volatile__ ("" :: "m" (provider##_##name##_semaphore)); +#else +#define _SDT_NOTE_SEMAPHORE_USE(provider, name) +#endif + +# define _SDT_PROBE(provider, name, n, arglist) \ + do { \ + _SDT_NOTE_SEMAPHORE_USE(provider, name); \ + __asm__ __volatile__ (_SDT_ASM_BODY(provider, name, _SDT_ASM_ARGS, (n)) \ + :: _SDT_ASM_OPERANDS_##n arglist); \ + __asm__ __volatile__ (_SDT_ASM_BASE); \ + } while (0) +# define _SDT_S(x) #x +# define _SDT_ASM_1(x) _SDT_S(x) "\n" +# define _SDT_ASM_2(a, b) _SDT_S(a) "," _SDT_S(b) "\n" +# define _SDT_ASM_3(a, b, c) _SDT_S(a) "," _SDT_S(b) "," \ + _SDT_S(c) "\n" +# define _SDT_ASM_5(a, b, c, d, e) _SDT_S(a) "," _SDT_S(b) "," \ + _SDT_S(c) "," _SDT_S(d) "," \ + _SDT_S(e) "\n" +# define _SDT_ASM_ARGS(n) _SDT_ASM_TEMPLATE_##n +# define _SDT_ASM_STRING_1(x) _SDT_ASM_1(.asciz #x) +# define _SDT_ASM_SUBSTR_1(x) _SDT_ASM_1(.ascii #x) + +# define _SDT_ARGFMT(no) _SDT_ASM_1(_SDT_SIGN %n[_SDT_S##no]) \ + _SDT_ASM_1(_SDT_SIZE %n[_SDT_S##no]) \ + _SDT_ASM_1(_SDT_TYPE %n[_SDT_S##no]) \ + _SDT_ASM_SUBSTR(_SDT_ARGTMPL(_SDT_A##no)) + + +# ifndef STAP_SDT_ARG_CONSTRAINT +# if defined __powerpc__ +# define STAP_SDT_ARG_CONSTRAINT nZr +# elif defined __arm__ +# define STAP_SDT_ARG_CONSTRAINT g +# else +# define STAP_SDT_ARG_CONSTRAINT nor +# endif +# endif + +# define _SDT_STRINGIFY(x) #x +# define _SDT_ARG_CONSTRAINT_STRING(x) _SDT_STRINGIFY(x) +/* _SDT_S encodes the size and type as 0xSSTT which is decoded by the assembler + macros _SDT_SIZE and _SDT_TYPE */ +# define _SDT_ARG(n, x) \ + [_SDT_S##n] "n" ((_SDT_ARGSIGNED (x) ? (int)-1 : 1) * (-(((int) _SDT_ARGSIZE (x)) << 8) + (-(0x7f & __builtin_classify_type (x))))), \ + [_SDT_A##n] _SDT_ARG_CONSTRAINT_STRING (STAP_SDT_ARG_CONSTRAINT) (_SDT_ARGVAL (x)) +#endif +#define _SDT_ASM_STRING(x) _SDT_ASM_STRING_1(x) +#define _SDT_ASM_SUBSTR(x) _SDT_ASM_SUBSTR_1(x) + +#define _SDT_ARGARRAY(x) (__builtin_classify_type (x) == 14 \ + || __builtin_classify_type (x) == 5) + +#ifdef __cplusplus +# define _SDT_ARGSIGNED(x) (!_SDT_ARGARRAY (x) \ + && __sdt_type<__typeof (x)>::__sdt_signed) +# define _SDT_ARGSIZE(x) (_SDT_ARGARRAY (x) \ + ? sizeof (void *) : sizeof (x)) +# define _SDT_ARGVAL(x) (x) + +# include <cstddef> + +template<typename __sdt_T> +struct __sdt_type +{ + static const bool __sdt_signed = false; +}; + +#define __SDT_ALWAYS_SIGNED(T) \ +template<> struct __sdt_type<T> { static const bool __sdt_signed = true; }; +#define __SDT_COND_SIGNED(T,CT) \ +template<> struct __sdt_type<T> { static const bool __sdt_signed = ((CT)(-1) < 1); }; +__SDT_ALWAYS_SIGNED(signed char) +__SDT_ALWAYS_SIGNED(short) +__SDT_ALWAYS_SIGNED(int) +__SDT_ALWAYS_SIGNED(long) +__SDT_ALWAYS_SIGNED(long long) +__SDT_ALWAYS_SIGNED(volatile signed char) +__SDT_ALWAYS_SIGNED(volatile short) +__SDT_ALWAYS_SIGNED(volatile int) +__SDT_ALWAYS_SIGNED(volatile long) +__SDT_ALWAYS_SIGNED(volatile long long) +__SDT_ALWAYS_SIGNED(const signed char) +__SDT_ALWAYS_SIGNED(const short) +__SDT_ALWAYS_SIGNED(const int) +__SDT_ALWAYS_SIGNED(const long) +__SDT_ALWAYS_SIGNED(const long long) +__SDT_ALWAYS_SIGNED(const volatile signed char) +__SDT_ALWAYS_SIGNED(const volatile short) +__SDT_ALWAYS_SIGNED(const volatile int) +__SDT_ALWAYS_SIGNED(const volatile long) +__SDT_ALWAYS_SIGNED(const volatile long long) +__SDT_COND_SIGNED(char, char) +__SDT_COND_SIGNED(wchar_t, wchar_t) +__SDT_COND_SIGNED(volatile char, char) +__SDT_COND_SIGNED(volatile wchar_t, wchar_t) +__SDT_COND_SIGNED(const char, char) +__SDT_COND_SIGNED(const wchar_t, wchar_t) +__SDT_COND_SIGNED(const volatile char, char) +__SDT_COND_SIGNED(const volatile wchar_t, wchar_t) +#if defined (__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)) +/* __SDT_COND_SIGNED(char16_t) */ +/* __SDT_COND_SIGNED(char32_t) */ +#endif + +template<typename __sdt_E> +struct __sdt_type<__sdt_E[]> : public __sdt_type<__sdt_E *> {}; + +template<typename __sdt_E, size_t __sdt_N> +struct __sdt_type<__sdt_E[__sdt_N]> : public __sdt_type<__sdt_E *> {}; + +#elif !defined(__ASSEMBLER__) +__extension__ extern unsigned long long __sdt_unsp; +# define _SDT_ARGINTTYPE(x) \ + __typeof (__builtin_choose_expr (((__builtin_classify_type (x) \ + + 3) & -4) == 4, (x), 0U)) +# define _SDT_ARGSIGNED(x) \ + (!__extension__ \ + (__builtin_constant_p ((((unsigned long long) \ + (_SDT_ARGINTTYPE (x)) __sdt_unsp) \ + & ((unsigned long long)1 << (sizeof (unsigned long long) \ + * __CHAR_BIT__ - 1))) == 0) \ + || (_SDT_ARGINTTYPE (x)) -1 > (_SDT_ARGINTTYPE (x)) 0)) +# define _SDT_ARGSIZE(x) \ + (_SDT_ARGARRAY (x) ? sizeof (void *) : sizeof (x)) +# define _SDT_ARGVAL(x) (x) +#endif + +#if defined __powerpc__ || defined __powerpc64__ +# define _SDT_ARGTMPL(id) %I[id]%[id] +#elif defined __i386__ +# define _SDT_ARGTMPL(id) %k[id] /* gcc.gnu.org/PR80115 sourceware.org/PR24541 */ +#else +# define _SDT_ARGTMPL(id) %[id] +#endif + +/* NB: gdb PR24541 highlighted an unspecified corner of the sdt.h + operand note format. + + The named register may be a longer or shorter (!) alias for the + storage where the value in question is found. For example, on + i386, 64-bit value may be put in register pairs, and the register + name stored would identify just one of them. Previously, gcc was + asked to emit the %w[id] (16-bit alias of some registers holding + operands), even when a wider 32-bit value was used. + + Bottom line: the byte-width given before the @ sign governs. If + there is a mismatch between that width and that of the named + register, then a sys/sdt.h note consumer may need to employ + architecture-specific heuristics to figure out where the compiler + has actually put the complete value. +*/ + +#ifdef __LP64__ +# define _SDT_ASM_ADDR .8byte +#else +# define _SDT_ASM_ADDR .4byte +#endif + +/* The ia64 and s390 nop instructions take an argument. */ +#if defined(__ia64__) || defined(__s390__) || defined(__s390x__) +#define _SDT_NOP nop 0 +#else +#define _SDT_NOP nop +#endif + +#define _SDT_NOTE_NAME "stapsdt" +#define _SDT_NOTE_TYPE 3 + +/* If the assembler supports the necessary feature, then we can play + nice with code in COMDAT sections, which comes up in C++ code. + Without that assembler support, some combinations of probe placements + in certain kinds of C++ code may produce link-time errors. */ +#include "sdt-config.h" +#if _SDT_ASM_SECTION_AUTOGROUP_SUPPORT +# define _SDT_ASM_AUTOGROUP "?" +#else +# define _SDT_ASM_AUTOGROUP "" +#endif + +#define _SDT_DEF_MACROS \ + _SDT_ASM_1(.altmacro) \ + _SDT_ASM_1(.macro _SDT_SIGN x) \ + _SDT_ASM_3(.pushsection .note.stapsdt,"","note") \ + _SDT_ASM_1(.iflt \\x) \ + _SDT_ASM_1(.ascii "-") \ + _SDT_ASM_1(.endif) \ + _SDT_ASM_1(.popsection) \ + _SDT_ASM_1(.endm) \ + _SDT_ASM_1(.macro _SDT_SIZE_ x) \ + _SDT_ASM_3(.pushsection .note.stapsdt,"","note") \ + _SDT_ASM_1(.ascii "\x") \ + _SDT_ASM_1(.popsection) \ + _SDT_ASM_1(.endm) \ + _SDT_ASM_1(.macro _SDT_SIZE x) \ + _SDT_ASM_1(_SDT_SIZE_ %%((-(-\\x*((-\\x>0)-(-\\x<0))))>>8)) \ + _SDT_ASM_1(.endm) \ + _SDT_ASM_1(.macro _SDT_TYPE_ x) \ + _SDT_ASM_3(.pushsection .note.stapsdt,"","note") \ + _SDT_ASM_2(.ifc 8,\\x) \ + _SDT_ASM_1(.ascii "f") \ + _SDT_ASM_1(.endif) \ + _SDT_ASM_1(.ascii "@") \ + _SDT_ASM_1(.popsection) \ + _SDT_ASM_1(.endm) \ + _SDT_ASM_1(.macro _SDT_TYPE x) \ + _SDT_ASM_1(_SDT_TYPE_ %%((\\x)&(0xff))) \ + _SDT_ASM_1(.endm) + +#define _SDT_UNDEF_MACROS \ + _SDT_ASM_1(.purgem _SDT_SIGN) \ + _SDT_ASM_1(.purgem _SDT_SIZE_) \ + _SDT_ASM_1(.purgem _SDT_SIZE) \ + _SDT_ASM_1(.purgem _SDT_TYPE_) \ + _SDT_ASM_1(.purgem _SDT_TYPE) + +#define _SDT_ASM_BODY(provider, name, pack_args, args, ...) \ + _SDT_DEF_MACROS \ + _SDT_ASM_1(990: _SDT_NOP) \ + _SDT_ASM_3( .pushsection .note.stapsdt,_SDT_ASM_AUTOGROUP,"note") \ + _SDT_ASM_1( .balign 4) \ + _SDT_ASM_3( .4byte 992f-991f, 994f-993f, _SDT_NOTE_TYPE) \ + _SDT_ASM_1(991: .asciz _SDT_NOTE_NAME) \ + _SDT_ASM_1(992: .balign 4) \ + _SDT_ASM_1(993: _SDT_ASM_ADDR 990b) \ + _SDT_ASM_1( _SDT_ASM_ADDR _.stapsdt.base) \ + _SDT_SEMAPHORE(provider,name) \ + _SDT_ASM_STRING(provider) \ + _SDT_ASM_STRING(name) \ + pack_args args \ + _SDT_ASM_SUBSTR(\x00) \ + _SDT_UNDEF_MACROS \ + _SDT_ASM_1(994: .balign 4) \ + _SDT_ASM_1( .popsection) + +#define _SDT_ASM_BASE \ + _SDT_ASM_1(.ifndef _.stapsdt.base) \ + _SDT_ASM_5( .pushsection .stapsdt.base,"aG","progbits", \ + .stapsdt.base,comdat) \ + _SDT_ASM_1( .weak _.stapsdt.base) \ + _SDT_ASM_1( .hidden _.stapsdt.base) \ + _SDT_ASM_1( _.stapsdt.base: .space 1) \ + _SDT_ASM_2( .size _.stapsdt.base, 1) \ + _SDT_ASM_1( .popsection) \ + _SDT_ASM_1(.endif) + +#if defined _SDT_HAS_SEMAPHORES +#define _SDT_SEMAPHORE(p,n) \ + _SDT_ASM_1( _SDT_ASM_ADDR p##_##n##_semaphore) +#else +#define _SDT_SEMAPHORE(p,n) _SDT_ASM_1( _SDT_ASM_ADDR 0) +#endif + +#define _SDT_ASM_BLANK _SDT_ASM_SUBSTR(\x20) +#define _SDT_ASM_TEMPLATE_0 /* no arguments */ +#define _SDT_ASM_TEMPLATE_1 _SDT_ARGFMT(1) +#define _SDT_ASM_TEMPLATE_2 _SDT_ASM_TEMPLATE_1 _SDT_ASM_BLANK _SDT_ARGFMT(2) +#define _SDT_ASM_TEMPLATE_3 _SDT_ASM_TEMPLATE_2 _SDT_ASM_BLANK _SDT_ARGFMT(3) +#define _SDT_ASM_TEMPLATE_4 _SDT_ASM_TEMPLATE_3 _SDT_ASM_BLANK _SDT_ARGFMT(4) +#define _SDT_ASM_TEMPLATE_5 _SDT_ASM_TEMPLATE_4 _SDT_ASM_BLANK _SDT_ARGFMT(5) +#define _SDT_ASM_TEMPLATE_6 _SDT_ASM_TEMPLATE_5 _SDT_ASM_BLANK _SDT_ARGFMT(6) +#define _SDT_ASM_TEMPLATE_7 _SDT_ASM_TEMPLATE_6 _SDT_ASM_BLANK _SDT_ARGFMT(7) +#define _SDT_ASM_TEMPLATE_8 _SDT_ASM_TEMPLATE_7 _SDT_ASM_BLANK _SDT_ARGFMT(8) +#define _SDT_ASM_TEMPLATE_9 _SDT_ASM_TEMPLATE_8 _SDT_ASM_BLANK _SDT_ARGFMT(9) +#define _SDT_ASM_TEMPLATE_10 _SDT_ASM_TEMPLATE_9 _SDT_ASM_BLANK _SDT_ARGFMT(10) +#define _SDT_ASM_TEMPLATE_11 _SDT_ASM_TEMPLATE_10 _SDT_ASM_BLANK _SDT_ARGFMT(11) +#define _SDT_ASM_TEMPLATE_12 _SDT_ASM_TEMPLATE_11 _SDT_ASM_BLANK _SDT_ARGFMT(12) +#define _SDT_ASM_OPERANDS_0() [__sdt_dummy] "g" (0) +#define _SDT_ASM_OPERANDS_1(arg1) _SDT_ARG(1, arg1) +#define _SDT_ASM_OPERANDS_2(arg1, arg2) \ + _SDT_ASM_OPERANDS_1(arg1), _SDT_ARG(2, arg2) +#define _SDT_ASM_OPERANDS_3(arg1, arg2, arg3) \ + _SDT_ASM_OPERANDS_2(arg1, arg2), _SDT_ARG(3, arg3) +#define _SDT_ASM_OPERANDS_4(arg1, arg2, arg3, arg4) \ + _SDT_ASM_OPERANDS_3(arg1, arg2, arg3), _SDT_ARG(4, arg4) +#define _SDT_ASM_OPERANDS_5(arg1, arg2, arg3, arg4, arg5) \ + _SDT_ASM_OPERANDS_4(arg1, arg2, arg3, arg4), _SDT_ARG(5, arg5) +#define _SDT_ASM_OPERANDS_6(arg1, arg2, arg3, arg4, arg5, arg6) \ + _SDT_ASM_OPERANDS_5(arg1, arg2, arg3, arg4, arg5), _SDT_ARG(6, arg6) +#define _SDT_ASM_OPERANDS_7(arg1, arg2, arg3, arg4, arg5, arg6, arg7) \ + _SDT_ASM_OPERANDS_6(arg1, arg2, arg3, arg4, arg5, arg6), _SDT_ARG(7, arg7) +#define _SDT_ASM_OPERANDS_8(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8) \ + _SDT_ASM_OPERANDS_7(arg1, arg2, arg3, arg4, arg5, arg6, arg7), \ + _SDT_ARG(8, arg8) +#define _SDT_ASM_OPERANDS_9(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9) \ + _SDT_ASM_OPERANDS_8(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8), \ + _SDT_ARG(9, arg9) +#define _SDT_ASM_OPERANDS_10(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10) \ + _SDT_ASM_OPERANDS_9(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9), \ + _SDT_ARG(10, arg10) +#define _SDT_ASM_OPERANDS_11(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11) \ + _SDT_ASM_OPERANDS_10(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10), \ + _SDT_ARG(11, arg11) +#define _SDT_ASM_OPERANDS_12(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11,arg12) \ + _SDT_ASM_OPERANDS_11(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, arg11), \ + _SDT_ARG(12, arg12) + +/* These macros can be used in C, C++, or assembly code. + In assembly code the arguments should use normal assembly operand syntax. */ + +#define STAP_PROBE(provider, name) \ + _SDT_PROBE(provider, name, 0, ()) +#define STAP_PROBE1(provider, name, arg1) \ + _SDT_PROBE(provider, name, 1, (arg1)) +#define STAP_PROBE2(provider, name, arg1, arg2) \ + _SDT_PROBE(provider, name, 2, (arg1, arg2)) +#define STAP_PROBE3(provider, name, arg1, arg2, arg3) \ + _SDT_PROBE(provider, name, 3, (arg1, arg2, arg3)) +#define STAP_PROBE4(provider, name, arg1, arg2, arg3, arg4) \ + _SDT_PROBE(provider, name, 4, (arg1, arg2, arg3, arg4)) +#define STAP_PROBE5(provider, name, arg1, arg2, arg3, arg4, arg5) \ + _SDT_PROBE(provider, name, 5, (arg1, arg2, arg3, arg4, arg5)) +#define STAP_PROBE6(provider, name, arg1, arg2, arg3, arg4, arg5, arg6) \ + _SDT_PROBE(provider, name, 6, (arg1, arg2, arg3, arg4, arg5, arg6)) +#define STAP_PROBE7(provider, name, arg1, arg2, arg3, arg4, arg5, arg6, arg7) \ + _SDT_PROBE(provider, name, 7, (arg1, arg2, arg3, arg4, arg5, arg6, arg7)) +#define STAP_PROBE8(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8) \ + _SDT_PROBE(provider, name, 8, (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8)) +#define STAP_PROBE9(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9)\ + _SDT_PROBE(provider, name, 9, (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9)) +#define STAP_PROBE10(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10) \ + _SDT_PROBE(provider, name, 10, \ + (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10)) +#define STAP_PROBE11(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11) \ + _SDT_PROBE(provider, name, 11, \ + (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11)) +#define STAP_PROBE12(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11,arg12) \ + _SDT_PROBE(provider, name, 12, \ + (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11,arg12)) + +/* This STAP_PROBEV macro can be used in variadic scenarios, where the + number of probe arguments is not known until compile time. Since + variadic macro support may vary with compiler options, you must + pre-#define SDT_USE_VARIADIC to enable this type of probe. + + The trick to count __VA_ARGS__ was inspired by this post by + Laurent Deniau <laurent.deniau@cern.ch>: + http://groups.google.com/group/comp.std.c/msg/346fc464319b1ee5 + + Note that our _SDT_NARG is called with an extra 0 arg that's not + counted, so we don't have to worry about the behavior of macros + called without any arguments. */ + +#define _SDT_NARG(...) __SDT_NARG(__VA_ARGS__, 12,11,10,9,8,7,6,5,4,3,2,1,0) +#define __SDT_NARG(_0,_1,_2,_3,_4,_5,_6,_7,_8,_9,_10,_11,_12, N, ...) N +#ifdef SDT_USE_VARIADIC +#define _SDT_PROBE_N(provider, name, N, ...) \ + _SDT_PROBE(provider, name, N, (__VA_ARGS__)) +#define STAP_PROBEV(provider, name, ...) \ + _SDT_PROBE_N(provider, name, _SDT_NARG(0, ##__VA_ARGS__), ##__VA_ARGS__) +#endif + +/* These macros are for use in asm statements. You must compile + with -std=gnu99 or -std=c99 to use the STAP_PROBE_ASM macro. + + The STAP_PROBE_ASM macro generates a quoted string to be used in the + template portion of the asm statement, concatenated with strings that + contain the actual assembly code around the probe site. + + For example: + + asm ("before\n" + STAP_PROBE_ASM(provider, fooprobe, %eax 4(%esi)) + "after"); + + emits the assembly code for "before\nafter", with a probe in between. + The probe arguments are the %eax register, and the value of the memory + word located 4 bytes past the address in the %esi register. Note that + because this is a simple asm, not a GNU C extended asm statement, these + % characters do not need to be doubled to generate literal %reg names. + + In a GNU C extended asm statement, the probe arguments can be specified + using the macro STAP_PROBE_ASM_TEMPLATE(n) for n arguments. The paired + macro STAP_PROBE_ASM_OPERANDS gives the C values of these probe arguments, + and appears in the input operand list of the asm statement. For example: + + asm ("someinsn %0,%1\n" // %0 is output operand, %1 is input operand + STAP_PROBE_ASM(provider, fooprobe, STAP_PROBE_ASM_TEMPLATE(3)) + "otherinsn %[namedarg]" + : "r" (outvar) + : "g" (some_value), [namedarg] "i" (1234), + STAP_PROBE_ASM_OPERANDS(3, some_value, some_ptr->field, 1234)); + + This is just like writing: + + STAP_PROBE3(provider, fooprobe, some_value, some_ptr->field, 1234)); + + but the probe site is right between "someinsn" and "otherinsn". + + The probe arguments in STAP_PROBE_ASM can be given as assembly + operands instead, even inside a GNU C extended asm statement. + Note that these can use operand templates like %0 or %[name], + and likewise they must write %%reg for a literal operand of %reg. */ + +#define _SDT_ASM_BODY_1(p,n,...) _SDT_ASM_BODY(p,n,_SDT_ASM_SUBSTR,(__VA_ARGS__)) +#define _SDT_ASM_BODY_2(p,n,...) _SDT_ASM_BODY(p,n,/*_SDT_ASM_STRING */,__VA_ARGS__) +#define _SDT_ASM_BODY_N2(p,n,no,...) _SDT_ASM_BODY_ ## no(p,n,__VA_ARGS__) +#define _SDT_ASM_BODY_N1(p,n,no,...) _SDT_ASM_BODY_N2(p,n,no,__VA_ARGS__) +#define _SDT_ASM_BODY_N(p,n,...) _SDT_ASM_BODY_N1(p,n,_SDT_NARG(0, __VA_ARGS__),__VA_ARGS__) + +#if __STDC_VERSION__ >= 199901L +# define STAP_PROBE_ASM(provider, name, ...) \ + _SDT_ASM_BODY_N(provider, name, __VA_ARGS__) \ + _SDT_ASM_BASE +# define STAP_PROBE_ASM_OPERANDS(n, ...) _SDT_ASM_OPERANDS_##n(__VA_ARGS__) +#else +# define STAP_PROBE_ASM(provider, name, args) \ + _SDT_ASM_BODY(provider, name, /* _SDT_ASM_STRING */, (args)) \ + _SDT_ASM_BASE +#endif +#define STAP_PROBE_ASM_TEMPLATE(n) _SDT_ASM_TEMPLATE_##n,"use _SDT_ASM_TEMPLATE_" + + +/* DTrace compatible macro names. */ +#define DTRACE_PROBE(provider,probe) \ + STAP_PROBE(provider,probe) +#define DTRACE_PROBE1(provider,probe,parm1) \ + STAP_PROBE1(provider,probe,parm1) +#define DTRACE_PROBE2(provider,probe,parm1,parm2) \ + STAP_PROBE2(provider,probe,parm1,parm2) +#define DTRACE_PROBE3(provider,probe,parm1,parm2,parm3) \ + STAP_PROBE3(provider,probe,parm1,parm2,parm3) +#define DTRACE_PROBE4(provider,probe,parm1,parm2,parm3,parm4) \ + STAP_PROBE4(provider,probe,parm1,parm2,parm3,parm4) +#define DTRACE_PROBE5(provider,probe,parm1,parm2,parm3,parm4,parm5) \ + STAP_PROBE5(provider,probe,parm1,parm2,parm3,parm4,parm5) +#define DTRACE_PROBE6(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6) \ + STAP_PROBE6(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6) +#define DTRACE_PROBE7(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7) \ + STAP_PROBE7(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7) +#define DTRACE_PROBE8(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8) \ + STAP_PROBE8(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8) +#define DTRACE_PROBE9(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9) \ + STAP_PROBE9(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9) +#define DTRACE_PROBE10(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10) \ + STAP_PROBE10(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10) +#define DTRACE_PROBE11(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10,parm11) \ + STAP_PROBE11(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10,parm11) +#define DTRACE_PROBE12(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10,parm11,parm12) \ + STAP_PROBE12(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10,parm11,parm12) + + +#endif /* sys/sdt.h */ diff --git a/tools/testing/selftests/bpf/test_bpftool_synctypes.py b/tools/testing/selftests/bpf/test_bpftool_synctypes.py index 6bf21e47882a..c0e7acd698ed 100755 --- a/tools/testing/selftests/bpf/test_bpftool_synctypes.py +++ b/tools/testing/selftests/bpf/test_bpftool_synctypes.py @@ -180,7 +180,7 @@ class FileExtractor(object): @enum_name: name of the enum to parse """ start_marker = re.compile(f'enum {enum_name} {{\n') - pattern = re.compile('^\s*(BPF_\w+),?$') + pattern = re.compile('^\s*(BPF_\w+),?(\s+/\*.*\*/)?$') end_marker = re.compile('^};') parser = BlockParser(self.reader) parser.search_block(start_marker) diff --git a/tools/testing/selftests/bpf/test_cgroup_storage.c b/tools/testing/selftests/bpf/test_cgroup_storage.c index d6a1be4d8020..0861ea60dcdd 100644 --- a/tools/testing/selftests/bpf/test_cgroup_storage.c +++ b/tools/testing/selftests/bpf/test_cgroup_storage.c @@ -6,7 +6,7 @@ #include <stdlib.h> #include <sys/sysinfo.h> -#include "bpf_rlimit.h" +#include "bpf_util.h" #include "cgroup_helpers.h" #include "testing_helpers.h" @@ -44,13 +44,16 @@ int main(int argc, char **argv) unsigned long long *percpu_value; int cpu, nproc; - nproc = get_nprocs_conf(); + nproc = bpf_num_possible_cpus(); percpu_value = malloc(sizeof(*percpu_value) * nproc); if (!percpu_value) { printf("Not enough memory for per-cpu area (%d cpus)\n", nproc); goto err; } + /* Use libbpf 1.0 API mode */ + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + map_fd = bpf_map_create(BPF_MAP_TYPE_CGROUP_STORAGE, NULL, sizeof(key), sizeof(value), 0, NULL); if (map_fd < 0) { diff --git a/tools/testing/selftests/bpf/test_dev_cgroup.c b/tools/testing/selftests/bpf/test_dev_cgroup.c index c299d3452695..7886265846a0 100644 --- a/tools/testing/selftests/bpf/test_dev_cgroup.c +++ b/tools/testing/selftests/bpf/test_dev_cgroup.c @@ -15,7 +15,6 @@ #include "cgroup_helpers.h" #include "testing_helpers.h" -#include "bpf_rlimit.h" #define DEV_CGROUP_PROG "./dev_cgroup.o" @@ -28,6 +27,9 @@ int main(int argc, char **argv) int prog_fd, cgroup_fd; __u32 prog_cnt; + /* Use libbpf 1.0 API mode */ + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + if (bpf_prog_test_load(DEV_CGROUP_PROG, BPF_PROG_TYPE_CGROUP_DEVICE, &obj, &prog_fd)) { printf("Failed to load DEV_CGROUP program\n"); diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c index aa294612e0a7..c028d621c744 100644 --- a/tools/testing/selftests/bpf/test_lpm_map.c +++ b/tools/testing/selftests/bpf/test_lpm_map.c @@ -26,7 +26,6 @@ #include <bpf/bpf.h> #include "bpf_util.h" -#include "bpf_rlimit.h" struct tlpm_node { struct tlpm_node *next; @@ -409,16 +408,13 @@ static void test_lpm_ipaddr(void) /* Test some lookups that should not match any entry */ inet_pton(AF_INET, "10.0.0.1", key_ipv4->data); - assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == -1 && - errno == ENOENT); + assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == -ENOENT); inet_pton(AF_INET, "11.11.11.11", key_ipv4->data); - assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == -1 && - errno == ENOENT); + assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == -ENOENT); inet_pton(AF_INET6, "2a00:ffff::", key_ipv6->data); - assert(bpf_map_lookup_elem(map_fd_ipv6, key_ipv6, &value) == -1 && - errno == ENOENT); + assert(bpf_map_lookup_elem(map_fd_ipv6, key_ipv6, &value) == -ENOENT); close(map_fd_ipv4); close(map_fd_ipv6); @@ -475,18 +471,15 @@ static void test_lpm_delete(void) /* remove non-existent node */ key->prefixlen = 32; inet_pton(AF_INET, "10.0.0.1", key->data); - assert(bpf_map_lookup_elem(map_fd, key, &value) == -1 && - errno == ENOENT); + assert(bpf_map_lookup_elem(map_fd, key, &value) == -ENOENT); key->prefixlen = 30; // unused prefix so far inet_pton(AF_INET, "192.255.0.0", key->data); - assert(bpf_map_delete_elem(map_fd, key) == -1 && - errno == ENOENT); + assert(bpf_map_delete_elem(map_fd, key) == -ENOENT); key->prefixlen = 16; // same prefix as the root node inet_pton(AF_INET, "192.255.0.0", key->data); - assert(bpf_map_delete_elem(map_fd, key) == -1 && - errno == ENOENT); + assert(bpf_map_delete_elem(map_fd, key) == -ENOENT); /* assert initial lookup */ key->prefixlen = 32; @@ -531,8 +524,7 @@ static void test_lpm_delete(void) key->prefixlen = 32; inet_pton(AF_INET, "192.168.128.1", key->data); - assert(bpf_map_lookup_elem(map_fd, key, &value) == -1 && - errno == ENOENT); + assert(bpf_map_lookup_elem(map_fd, key, &value) == -ENOENT); close(map_fd); } @@ -553,8 +545,7 @@ static void test_lpm_get_next_key(void) assert(map_fd >= 0); /* empty tree. get_next_key should return ENOENT */ - assert(bpf_map_get_next_key(map_fd, NULL, key_p) == -1 && - errno == ENOENT); + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == -ENOENT); /* get and verify the first key, get the second one should fail. */ key_p->prefixlen = 16; @@ -566,8 +557,7 @@ static void test_lpm_get_next_key(void) assert(key_p->prefixlen == 16 && key_p->data[0] == 192 && key_p->data[1] == 168); - assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && - errno == ENOENT); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -ENOENT); /* no exact matching key should get the first one in post order. */ key_p->prefixlen = 8; @@ -591,8 +581,7 @@ static void test_lpm_get_next_key(void) next_key_p->data[1] == 168); memcpy(key_p, next_key_p, key_size); - assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && - errno == ENOENT); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -ENOENT); /* Add one more element (total three) */ key_p->prefixlen = 24; @@ -615,8 +604,7 @@ static void test_lpm_get_next_key(void) next_key_p->data[1] == 168); memcpy(key_p, next_key_p, key_size); - assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && - errno == ENOENT); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -ENOENT); /* Add one more element (total four) */ key_p->prefixlen = 24; @@ -644,8 +632,7 @@ static void test_lpm_get_next_key(void) next_key_p->data[1] == 168); memcpy(key_p, next_key_p, key_size); - assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && - errno == ENOENT); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -ENOENT); /* Add one more element (total five) */ key_p->prefixlen = 28; @@ -679,8 +666,7 @@ static void test_lpm_get_next_key(void) next_key_p->data[1] == 168); memcpy(key_p, next_key_p, key_size); - assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && - errno == ENOENT); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -ENOENT); /* no exact matching key should return the first one in post order */ key_p->prefixlen = 22; @@ -791,6 +777,9 @@ int main(void) /* we want predictable, pseudo random tests */ srand(0xf00ba1); + /* Use libbpf 1.0 API mode */ + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + test_lpm_basic(); test_lpm_order(); diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c index 563bbe18c172..4d0650cfb5cd 100644 --- a/tools/testing/selftests/bpf/test_lru_map.c +++ b/tools/testing/selftests/bpf/test_lru_map.c @@ -18,7 +18,6 @@ #include <bpf/libbpf.h> #include "bpf_util.h" -#include "bpf_rlimit.h" #include "../../../include/linux/filter.h" #define LOCAL_FREE_TARGET (128) @@ -176,24 +175,20 @@ static void test_lru_sanity0(int map_type, int map_flags) BPF_NOEXIST)); /* BPF_NOEXIST means: add new element if it doesn't exist */ - assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -1 - /* key=1 already exists */ - && errno == EEXIST); + assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -EEXIST); + /* key=1 already exists */ - assert(bpf_map_update_elem(lru_map_fd, &key, value, -1) == -1 && - errno == EINVAL); + assert(bpf_map_update_elem(lru_map_fd, &key, value, -1) == -EINVAL); /* insert key=2 element */ /* check that key=2 is not found */ key = 2; - assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && - errno == ENOENT); + assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT); /* BPF_EXIST means: update existing element */ - assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -1 && - /* key=2 is not there */ - errno == ENOENT); + assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -ENOENT); + /* key=2 is not there */ assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); @@ -201,8 +196,7 @@ static void test_lru_sanity0(int map_type, int map_flags) /* check that key=3 is not found */ key = 3; - assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && - errno == ENOENT); + assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT); /* check that key=1 can be found and mark the ref bit to * stop LRU from removing key=1 @@ -218,8 +212,7 @@ static void test_lru_sanity0(int map_type, int map_flags) /* key=2 has been removed from the LRU */ key = 2; - assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && - errno == ENOENT); + assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT); /* lookup elem key=1 and delete it, then check it doesn't exist */ key = 1; @@ -382,8 +375,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) end_key = 1 + batch_size; value[0] = 4321; for (key = 1; key < end_key; key++) { - assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && - errno == ENOENT); + assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT); assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, key, value)); @@ -563,8 +555,7 @@ static void do_test_lru_sanity5(unsigned long long last_key, int map_fd) assert(!bpf_map_lookup_elem_with_ref_bit(map_fd, key, value)); /* Cannot find the last key because it was removed by LRU */ - assert(bpf_map_lookup_elem(map_fd, &last_key, value) == -1 && - errno == ENOENT); + assert(bpf_map_lookup_elem(map_fd, &last_key, value) == -ENOENT); } /* Test map with only one element */ @@ -712,21 +703,18 @@ static void test_lru_sanity7(int map_type, int map_flags) BPF_NOEXIST)); /* BPF_NOEXIST means: add new element if it doesn't exist */ - assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -1 - /* key=1 already exists */ - && errno == EEXIST); + assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -EEXIST); + /* key=1 already exists */ /* insert key=2 element */ /* check that key=2 is not found */ key = 2; - assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && - errno == ENOENT); + assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT); /* BPF_EXIST means: update existing element */ - assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -1 && - /* key=2 is not there */ - errno == ENOENT); + assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -ENOENT); + /* key=2 is not there */ assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); @@ -734,8 +722,7 @@ static void test_lru_sanity7(int map_type, int map_flags) /* check that key=3 is not found */ key = 3; - assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && - errno == ENOENT); + assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT); /* check that key=1 can be found and mark the ref bit to * stop LRU from removing key=1 @@ -758,8 +745,7 @@ static void test_lru_sanity7(int map_type, int map_flags) /* key=2 has been removed from the LRU */ key = 2; - assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && - errno == ENOENT); + assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT); assert(map_equal(lru_map_fd, expected_map_fd)); @@ -806,21 +792,18 @@ static void test_lru_sanity8(int map_type, int map_flags) assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); /* BPF_NOEXIST means: add new element if it doesn't exist */ - assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -1 - /* key=1 already exists */ - && errno == EEXIST); + assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -EEXIST); + /* key=1 already exists */ /* insert key=2 element */ /* check that key=2 is not found */ key = 2; - assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && - errno == ENOENT); + assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT); /* BPF_EXIST means: update existing element */ - assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -1 && - /* key=2 is not there */ - errno == ENOENT); + assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -ENOENT); + /* key=2 is not there */ assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); assert(!bpf_map_update_elem(expected_map_fd, &key, value, @@ -830,8 +813,7 @@ static void test_lru_sanity8(int map_type, int map_flags) /* check that key=3 is not found */ key = 3; - assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && - errno == ENOENT); + assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT); /* check that key=1 can be found and do _not_ mark ref bit. * this will be evicted on next update. @@ -854,8 +836,7 @@ static void test_lru_sanity8(int map_type, int map_flags) /* key=1 has been removed from the LRU */ key = 1; - assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && - errno == ENOENT); + assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT); assert(map_equal(lru_map_fd, expected_map_fd)); @@ -878,6 +859,9 @@ int main(int argc, char **argv) assert(nr_cpus != -1); printf("nr_cpus:%d\n\n", nr_cpus); + /* Use libbpf 1.0 API mode */ + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + for (f = 0; f < ARRAY_SIZE(map_flags); f++) { unsigned int tgt_free = (map_flags[f] & BPF_F_NO_COMMON_LRU) ? PERCPU_FREE_TARGET : LOCAL_FREE_TARGET; diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index edaffd43da83..6cd6ef9fc20b 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -184,7 +184,7 @@ def bpftool_prog_list(expected=None, ns=""): def bpftool_map_list(expected=None, ns=""): _, maps = bpftool("map show", JSON=True, ns=ns, fail=True) # Remove the base maps - maps = [m for m in maps if m not in base_maps and m.get('name') not in base_map_names] + maps = [m for m in maps if m not in base_maps and m.get('name') and m.get('name') not in base_map_names] if expected is not None: if len(maps) != expected: fail(True, "%d BPF maps loaded, expected %d" % diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 2ecb73a65206..c536d1d29d57 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -3,6 +3,7 @@ */ #define _GNU_SOURCE #include "test_progs.h" +#include "testing_helpers.h" #include "cgroup_helpers.h" #include <argp.h> #include <pthread.h> @@ -50,19 +51,8 @@ struct prog_test_def { int test_num; void (*run_test)(void); void (*run_serial_test)(void); - bool force_log; - int error_cnt; - int skip_cnt; - int sub_succ_cnt; bool should_run; - bool tested; bool need_cgroup_cleanup; - - char *subtest_name; - int subtest_num; - - /* store counts before subtest started */ - int old_error_cnt; }; /* Override C runtime library's usleep() implementation to ensure nanosleep() @@ -84,12 +74,13 @@ static bool should_run(struct test_selector *sel, int num, const char *name) int i; for (i = 0; i < sel->blacklist.cnt; i++) { - if (glob_match(name, sel->blacklist.strs[i])) + if (glob_match(name, sel->blacklist.tests[i].name) && + !sel->blacklist.tests[i].subtest_cnt) return false; } for (i = 0; i < sel->whitelist.cnt; i++) { - if (glob_match(name, sel->whitelist.strs[i])) + if (glob_match(name, sel->whitelist.tests[i].name)) return true; } @@ -99,32 +90,69 @@ static bool should_run(struct test_selector *sel, int num, const char *name) return num < sel->num_set_len && sel->num_set[num]; } -static void dump_test_log(const struct prog_test_def *test, bool failed) +static bool should_run_subtest(struct test_selector *sel, + struct test_selector *subtest_sel, + int subtest_num, + const char *test_name, + const char *subtest_name) { - if (stdout == env.stdout) - return; + int i, j; - /* worker always holds log */ - if (env.worker_id != -1) - return; + for (i = 0; i < sel->blacklist.cnt; i++) { + if (glob_match(test_name, sel->blacklist.tests[i].name)) { + if (!sel->blacklist.tests[i].subtest_cnt) + return false; + + for (j = 0; j < sel->blacklist.tests[i].subtest_cnt; j++) { + if (glob_match(subtest_name, + sel->blacklist.tests[i].subtests[j])) + return false; + } + } + } - fflush(stdout); /* exports env.log_buf & env.log_cnt */ + for (i = 0; i < sel->whitelist.cnt; i++) { + if (glob_match(test_name, sel->whitelist.tests[i].name)) { + if (!sel->whitelist.tests[i].subtest_cnt) + return true; - if (env.verbosity > VERBOSE_NONE || test->force_log || failed) { - if (env.log_cnt) { - env.log_buf[env.log_cnt] = '\0'; - fprintf(env.stdout, "%s", env.log_buf); - if (env.log_buf[env.log_cnt - 1] != '\n') - fprintf(env.stdout, "\n"); + for (j = 0; j < sel->whitelist.tests[i].subtest_cnt; j++) { + if (glob_match(subtest_name, + sel->whitelist.tests[i].subtests[j])) + return true; + } } } + + if (!sel->whitelist.cnt && !subtest_sel->num_set) + return true; + + return subtest_num < subtest_sel->num_set_len && subtest_sel->num_set[subtest_num]; } -static void skip_account(void) +static void dump_test_log(const struct prog_test_def *test, + const struct test_state *test_state, + bool force_failed) { - if (env.test->skip_cnt) { - env.skip_cnt++; - env.test->skip_cnt = 0; + bool failed = test_state->error_cnt > 0 || force_failed; + + /* worker always holds log */ + if (env.worker_id != -1) + return; + + fflush(stdout); /* exports test_state->log_buf & test_state->log_cnt */ + + fprintf(env.stdout, "#%-3d %s:%s\n", + test->test_num, test->test_name, + failed ? "FAIL" : (test_state->skip_cnt ? "SKIP" : "OK")); + + if (env.verbosity > VERBOSE_NONE || test_state->force_log || failed) { + if (test_state->log_cnt) { + test_state->log_buf[test_state->log_cnt] = '\0'; + fprintf(env.stdout, "%s", test_state->log_buf); + if (test_state->log_buf[test_state->log_cnt - 1] != '\n') + fprintf(env.stdout, "\n"); + } } } @@ -135,7 +163,6 @@ static void stdio_restore(void); */ static void reset_affinity(void) { - cpu_set_t cpuset; int i, err; @@ -178,68 +205,78 @@ static void restore_netns(void) void test__end_subtest(void) { struct prog_test_def *test = env.test; - int sub_error_cnt = test->error_cnt - test->old_error_cnt; - - dump_test_log(test, sub_error_cnt); + struct test_state *state = env.test_state; + int sub_error_cnt = state->error_cnt - state->old_error_cnt; fprintf(stdout, "#%d/%d %s/%s:%s\n", - test->test_num, test->subtest_num, test->test_name, test->subtest_name, - sub_error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK")); + test->test_num, state->subtest_num, test->test_name, state->subtest_name, + sub_error_cnt ? "FAIL" : (state->subtest_skip_cnt ? "SKIP" : "OK")); - if (sub_error_cnt) - test->error_cnt++; - else if (test->skip_cnt == 0) - test->sub_succ_cnt++; - skip_account(); + if (sub_error_cnt == 0) { + if (state->subtest_skip_cnt == 0) { + state->sub_succ_cnt++; + } else { + state->subtest_skip_cnt = 0; + state->skip_cnt++; + } + } - free(test->subtest_name); - test->subtest_name = NULL; + free(state->subtest_name); + state->subtest_name = NULL; } -bool test__start_subtest(const char *name) +bool test__start_subtest(const char *subtest_name) { struct prog_test_def *test = env.test; + struct test_state *state = env.test_state; - if (test->subtest_name) + if (state->subtest_name) test__end_subtest(); - test->subtest_num++; + state->subtest_num++; - if (!name || !name[0]) { + if (!subtest_name || !subtest_name[0]) { fprintf(env.stderr, "Subtest #%d didn't provide sub-test name!\n", - test->subtest_num); + state->subtest_num); return false; } - if (!should_run(&env.subtest_selector, test->subtest_num, name)) + if (!should_run_subtest(&env.test_selector, + &env.subtest_selector, + state->subtest_num, + test->test_name, + subtest_name)) return false; - test->subtest_name = strdup(name); - if (!test->subtest_name) { + state->subtest_name = strdup(subtest_name); + if (!state->subtest_name) { fprintf(env.stderr, "Subtest #%d: failed to copy subtest name!\n", - test->subtest_num); + state->subtest_num); return false; } - env.test->old_error_cnt = env.test->error_cnt; + state->old_error_cnt = state->error_cnt; return true; } void test__force_log(void) { - env.test->force_log = true; + env.test_state->force_log = true; } void test__skip(void) { - env.test->skip_cnt++; + if (env.test_state->subtest_name) + env.test_state->subtest_skip_cnt++; + else + env.test_state->skip_cnt++; } void test__fail(void) { - env.test->error_cnt++; + env.test_state->error_cnt++; } int test__join_cgroup(const char *path) @@ -472,8 +509,11 @@ static struct prog_test_def prog_test_defs[] = { #include <prog_tests/tests.h> #undef DEFINE_TEST }; + static const int prog_test_cnt = ARRAY_SIZE(prog_test_defs); +static struct test_state test_states[ARRAY_SIZE(prog_test_defs)]; + const char *argp_program_version = "test_progs 0.1"; const char *argp_program_bug_address = "<bpf@vger.kernel.org>"; static const char argp_program_doc[] = "BPF selftests test runner"; @@ -527,63 +567,29 @@ static int libbpf_print_fn(enum libbpf_print_level level, return 0; } -static void free_str_set(const struct str_set *set) +static void free_test_filter_set(const struct test_filter_set *set) { - int i; + int i, j; if (!set) return; - for (i = 0; i < set->cnt; i++) - free((void *)set->strs[i]); - free(set->strs); -} - -static int parse_str_list(const char *s, struct str_set *set, bool is_glob_pattern) -{ - char *input, *state = NULL, *next, **tmp, **strs = NULL; - int i, cnt = 0; - - input = strdup(s); - if (!input) - return -ENOMEM; - - while ((next = strtok_r(state ? NULL : input, ",", &state))) { - tmp = realloc(strs, sizeof(*strs) * (cnt + 1)); - if (!tmp) - goto err; - strs = tmp; - - if (is_glob_pattern) { - strs[cnt] = strdup(next); - if (!strs[cnt]) - goto err; - } else { - strs[cnt] = malloc(strlen(next) + 2 + 1); - if (!strs[cnt]) - goto err; - sprintf(strs[cnt], "*%s*", next); - } + for (i = 0; i < set->cnt; i++) { + free((void *)set->tests[i].name); + for (j = 0; j < set->tests[i].subtest_cnt; j++) + free((void *)set->tests[i].subtests[j]); - cnt++; + free((void *)set->tests[i].subtests); } - tmp = realloc(set->strs, sizeof(*strs) * (cnt + set->cnt)); - if (!tmp) - goto err; - memcpy(tmp + set->cnt, strs, sizeof(*strs) * cnt); - set->strs = (const char **)tmp; - set->cnt += cnt; + free((void *)set->tests); +} - free(input); - free(strs); - return 0; -err: - for (i = 0; i < cnt; i++) - free(strs[i]); - free(strs); - free(input); - return -ENOMEM; +static void free_test_selector(struct test_selector *test_selector) +{ + free_test_filter_set(&test_selector->blacklist); + free_test_filter_set(&test_selector->whitelist); + free(test_selector->num_set); } extern int extra_prog_load_log_flags; @@ -615,33 +621,17 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) } case ARG_TEST_NAME_GLOB_ALLOWLIST: case ARG_TEST_NAME: { - char *subtest_str = strchr(arg, '/'); - - if (subtest_str) { - *subtest_str = '\0'; - if (parse_str_list(subtest_str + 1, - &env->subtest_selector.whitelist, - key == ARG_TEST_NAME_GLOB_ALLOWLIST)) - return -ENOMEM; - } - if (parse_str_list(arg, &env->test_selector.whitelist, - key == ARG_TEST_NAME_GLOB_ALLOWLIST)) + if (parse_test_list(arg, + &env->test_selector.whitelist, + key == ARG_TEST_NAME_GLOB_ALLOWLIST)) return -ENOMEM; break; } case ARG_TEST_NAME_GLOB_DENYLIST: case ARG_TEST_NAME_BLACKLIST: { - char *subtest_str = strchr(arg, '/'); - - if (subtest_str) { - *subtest_str = '\0'; - if (parse_str_list(subtest_str + 1, - &env->subtest_selector.blacklist, - key == ARG_TEST_NAME_GLOB_DENYLIST)) - return -ENOMEM; - } - if (parse_str_list(arg, &env->test_selector.blacklist, - key == ARG_TEST_NAME_GLOB_DENYLIST)) + if (parse_test_list(arg, + &env->test_selector.blacklist, + key == ARG_TEST_NAME_GLOB_DENYLIST)) return -ENOMEM; break; } @@ -706,7 +696,7 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) return 0; } -static void stdio_hijack(void) +static void stdio_hijack(char **log_buf, size_t *log_cnt) { #ifdef __GLIBC__ env.stdout = stdout; @@ -720,7 +710,7 @@ static void stdio_hijack(void) /* stdout and stderr -> buffer */ fflush(stdout); - stdout = open_memstream(&env.log_buf, &env.log_cnt); + stdout = open_memstream(log_buf, log_cnt); if (!stdout) { stdout = env.stdout; perror("open_memstream"); @@ -761,8 +751,10 @@ int cd_flavor_subdir(const char *exec_name) const char *flavor = strrchr(exec_name, '/'); if (!flavor) - return 0; - flavor++; + flavor = exec_name; + else + flavor++; + flavor = strrchr(flavor, '-'); if (!flavor) return 0; @@ -821,7 +813,7 @@ void crash_handler(int signum) sz = backtrace(bt, ARRAY_SIZE(bt)); if (env.test) - dump_test_log(env.test, true); + dump_test_log(env.test, env.test_state, true); if (env.stdout) stdio_restore(); if (env.worker_id != -1) @@ -843,17 +835,6 @@ static int current_test_idx; static pthread_mutex_t current_test_lock; static pthread_mutex_t stdout_output_lock; -struct test_result { - int error_cnt; - int skip_cnt; - int sub_succ_cnt; - - size_t log_cnt; - char *log_buf; -}; - -static struct test_result test_results[ARRAY_SIZE(prog_test_defs)]; - static inline const char *str_msg(const struct msg *msg, char *buf) { switch (msg->type) { @@ -907,8 +888,12 @@ static int recv_message(int sock, struct msg *msg) static void run_one_test(int test_num) { struct prog_test_def *test = &prog_test_defs[test_num]; + struct test_state *state = &test_states[test_num]; env.test = test; + env.test_state = state; + + stdio_hijack(&state->log_buf, &state->log_cnt); if (test->run_test) test->run_test(); @@ -916,17 +901,19 @@ static void run_one_test(int test_num) test->run_serial_test(); /* ensure last sub-test is finalized properly */ - if (test->subtest_name) + if (state->subtest_name) test__end_subtest(); - test->tested = true; + state->tested = true; - dump_test_log(test, test->error_cnt); + dump_test_log(test, state, false); reset_affinity(); restore_netns(); if (test->need_cgroup_cleanup) cleanup_cgroup_environment(); + + stdio_restore(); } struct dispatch_data { @@ -945,7 +932,7 @@ static void *dispatch_thread(void *ctx) while (true) { int test_to_run = -1; struct prog_test_def *test; - struct test_result *result; + struct test_state *state; /* grab a test */ { @@ -992,16 +979,15 @@ static void *dispatch_thread(void *ctx) if (test_to_run != msg_test_done.test_done.test_num) goto error; - test->tested = true; - result = &test_results[test_to_run]; - - result->error_cnt = msg_test_done.test_done.error_cnt; - result->skip_cnt = msg_test_done.test_done.skip_cnt; - result->sub_succ_cnt = msg_test_done.test_done.sub_succ_cnt; + state = &test_states[test_to_run]; + state->tested = true; + state->error_cnt = msg_test_done.test_done.error_cnt; + state->skip_cnt = msg_test_done.test_done.skip_cnt; + state->sub_succ_cnt = msg_test_done.test_done.sub_succ_cnt; /* collect all logs */ if (msg_test_done.test_done.have_log) { - log_fp = open_memstream(&result->log_buf, &result->log_cnt); + log_fp = open_memstream(&state->log_buf, &state->log_cnt); if (!log_fp) goto error; @@ -1020,25 +1006,11 @@ static void *dispatch_thread(void *ctx) fclose(log_fp); log_fp = NULL; } - /* output log */ - { - pthread_mutex_lock(&stdout_output_lock); - - if (result->log_cnt) { - result->log_buf[result->log_cnt] = '\0'; - fprintf(stdout, "%s", result->log_buf); - if (result->log_buf[result->log_cnt - 1] != '\n') - fprintf(stdout, "\n"); - } - - fprintf(stdout, "#%d %s:%s\n", - test->test_num, test->test_name, - result->error_cnt ? "FAIL" : (result->skip_cnt ? "SKIP" : "OK")); - - pthread_mutex_unlock(&stdout_output_lock); - } - } /* wait for test done */ + + pthread_mutex_lock(&stdout_output_lock); + dump_test_log(test, state, false); + pthread_mutex_unlock(&stdout_output_lock); } /* while (true) */ error: if (env.debug) @@ -1060,38 +1032,50 @@ done: return NULL; } -static void print_all_error_logs(void) +static void calculate_summary_and_print_errors(struct test_env *env) { int i; + int succ_cnt = 0, fail_cnt = 0, sub_succ_cnt = 0, skip_cnt = 0; + + for (i = 0; i < prog_test_cnt; i++) { + struct test_state *state = &test_states[i]; - if (env.fail_cnt) - fprintf(stdout, "\nAll error logs:\n"); + if (!state->tested) + continue; + + sub_succ_cnt += state->sub_succ_cnt; + skip_cnt += state->skip_cnt; + + if (state->error_cnt) + fail_cnt++; + else + succ_cnt++; + } + + if (fail_cnt) + printf("\nAll error logs:\n"); /* print error logs again */ for (i = 0; i < prog_test_cnt; i++) { - struct prog_test_def *test; - struct test_result *result; - - test = &prog_test_defs[i]; - result = &test_results[i]; + struct prog_test_def *test = &prog_test_defs[i]; + struct test_state *state = &test_states[i]; - if (!test->tested || !result->error_cnt) + if (!state->tested || !state->error_cnt) continue; - fprintf(stdout, "\n#%d %s:%s\n", - test->test_num, test->test_name, - result->error_cnt ? "FAIL" : (result->skip_cnt ? "SKIP" : "OK")); - - if (result->log_cnt) { - result->log_buf[result->log_cnt] = '\0'; - fprintf(stdout, "%s", result->log_buf); - if (result->log_buf[result->log_cnt - 1] != '\n') - fprintf(stdout, "\n"); - } + dump_test_log(test, state, true); } + + printf("Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n", + succ_cnt, sub_succ_cnt, skip_cnt, fail_cnt); + + env->succ_cnt = succ_cnt; + env->sub_succ_cnt = sub_succ_cnt; + env->fail_cnt = fail_cnt; + env->skip_cnt = skip_cnt; } -static int server_main(void) +static void server_main(void) { pthread_t *dispatcher_threads; struct dispatch_data *data; @@ -1147,60 +1131,18 @@ static int server_main(void) for (int i = 0; i < prog_test_cnt; i++) { struct prog_test_def *test = &prog_test_defs[i]; - struct test_result *result = &test_results[i]; if (!test->should_run || !test->run_serial_test) continue; - stdio_hijack(); - run_one_test(i); - - stdio_restore(); - if (env.log_buf) { - result->log_cnt = env.log_cnt; - result->log_buf = strdup(env.log_buf); - - free(env.log_buf); - env.log_buf = NULL; - env.log_cnt = 0; - } - restore_netns(); - - fprintf(stdout, "#%d %s:%s\n", - test->test_num, test->test_name, - test->error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK")); - - result->error_cnt = test->error_cnt; - result->skip_cnt = test->skip_cnt; - result->sub_succ_cnt = test->sub_succ_cnt; } /* generate summary */ fflush(stderr); fflush(stdout); - for (i = 0; i < prog_test_cnt; i++) { - struct prog_test_def *current_test; - struct test_result *result; - - current_test = &prog_test_defs[i]; - result = &test_results[i]; - - if (!current_test->tested) - continue; - - env.succ_cnt += result->error_cnt ? 0 : 1; - env.skip_cnt += result->skip_cnt; - if (result->error_cnt) - env.fail_cnt++; - env.sub_succ_cnt += result->sub_succ_cnt; - } - - print_all_error_logs(); - - fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n", - env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt); + calculate_summary_and_print_errors(&env); /* reap all workers */ for (i = 0; i < env.workers; i++) { @@ -1210,8 +1152,6 @@ static int server_main(void) if (pid != env.worker_pids[i]) perror("Unable to reap worker"); } - - return 0; } static int worker_main(int sock) @@ -1232,35 +1172,29 @@ static int worker_main(int sock) env.worker_id); goto out; case MSG_DO_TEST: { - int test_to_run; - struct prog_test_def *test; + int test_to_run = msg.do_test.test_num; + struct prog_test_def *test = &prog_test_defs[test_to_run]; + struct test_state *state = &test_states[test_to_run]; struct msg msg_done; - test_to_run = msg.do_test.test_num; - test = &prog_test_defs[test_to_run]; - if (env.debug) fprintf(stderr, "[%d]: #%d:%s running.\n", env.worker_id, test_to_run + 1, test->test_name); - stdio_hijack(); - run_one_test(test_to_run); - stdio_restore(); - memset(&msg_done, 0, sizeof(msg_done)); msg_done.type = MSG_TEST_DONE; msg_done.test_done.test_num = test_to_run; - msg_done.test_done.error_cnt = test->error_cnt; - msg_done.test_done.skip_cnt = test->skip_cnt; - msg_done.test_done.sub_succ_cnt = test->sub_succ_cnt; + msg_done.test_done.error_cnt = state->error_cnt; + msg_done.test_done.skip_cnt = state->skip_cnt; + msg_done.test_done.sub_succ_cnt = state->sub_succ_cnt; msg_done.test_done.have_log = false; - if (env.verbosity > VERBOSE_NONE || test->force_log || test->error_cnt) { - if (env.log_cnt) + if (env.verbosity > VERBOSE_NONE || state->force_log || state->error_cnt) { + if (state->log_cnt) msg_done.test_done.have_log = true; } if (send_message(sock, &msg_done) < 0) { @@ -1273,8 +1207,8 @@ static int worker_main(int sock) char *src; size_t slen; - src = env.log_buf; - slen = env.log_cnt; + src = state->log_buf; + slen = state->log_cnt; while (slen) { struct msg msg_log; char *dest; @@ -1294,10 +1228,10 @@ static int worker_main(int sock) assert(send_message(sock, &msg_log) >= 0); } } - if (env.log_buf) { - free(env.log_buf); - env.log_buf = NULL; - env.log_cnt = 0; + if (state->log_buf) { + free(state->log_buf); + state->log_buf = NULL; + state->log_cnt = 0; } if (env.debug) fprintf(stderr, "[%d]: #%d:%s done.\n", @@ -1428,7 +1362,6 @@ int main(int argc, char **argv) for (i = 0; i < prog_test_cnt; i++) { struct prog_test_def *test = &prog_test_defs[i]; - struct test_result *result; if (!test->should_run) continue; @@ -1444,34 +1377,7 @@ int main(int argc, char **argv) continue; } - stdio_hijack(); - run_one_test(i); - - stdio_restore(); - - fprintf(env.stdout, "#%d %s:%s\n", - test->test_num, test->test_name, - test->error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK")); - - result = &test_results[i]; - result->error_cnt = test->error_cnt; - if (env.log_buf) { - result->log_buf = strdup(env.log_buf); - result->log_cnt = env.log_cnt; - - free(env.log_buf); - env.log_buf = NULL; - env.log_cnt = 0; - } - - if (test->error_cnt) - env.fail_cnt++; - else - env.succ_cnt++; - - skip_account(); - env.sub_succ_cnt += test->sub_succ_cnt; } if (env.get_test_cnt) { @@ -1482,21 +1388,14 @@ int main(int argc, char **argv) if (env.list_test_names) goto out; - print_all_error_logs(); - - fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n", - env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt); + calculate_summary_and_print_errors(&env); close(env.saved_netns_fd); out: if (!env.list_test_names && env.has_testmod) unload_bpf_testmod(); - free_str_set(&env.test_selector.blacklist); - free_str_set(&env.test_selector.whitelist); - free(env.test_selector.num_set); - free_str_set(&env.subtest_selector.blacklist); - free_str_set(&env.subtest_selector.whitelist); - free(env.subtest_selector.num_set); + + free_test_selector(&env.test_selector); if (env.succ_cnt + env.fail_cnt + env.skip_cnt == 0) return EXIT_NO_TEST; diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 93c1ff705533..d3fee3b98888 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -25,6 +25,7 @@ typedef __u16 __sum16; #include <sys/wait.h> #include <sys/types.h> #include <sys/time.h> +#include <sys/param.h> #include <fcntl.h> #include <pthread.h> #include <linux/bpf.h> @@ -37,7 +38,6 @@ typedef __u16 __sum16; #include <bpf/bpf_endian.h> #include "trace_helpers.h" #include "testing_helpers.h" -#include "flow_dissector_load.h" enum verbosity { VERBOSE_NONE, @@ -46,18 +46,43 @@ enum verbosity { VERBOSE_SUPER, }; -struct str_set { - const char **strs; +struct test_filter { + char *name; + char **subtests; + int subtest_cnt; +}; + +struct test_filter_set { + struct test_filter *tests; int cnt; }; struct test_selector { - struct str_set whitelist; - struct str_set blacklist; + struct test_filter_set whitelist; + struct test_filter_set blacklist; bool *num_set; int num_set_len; }; +struct test_state { + bool tested; + bool force_log; + + int error_cnt; + int skip_cnt; + int subtest_skip_cnt; + int sub_succ_cnt; + + char *subtest_name; + int subtest_num; + + /* store counts before subtest started */ + int old_error_cnt; + + size_t log_cnt; + char *log_buf; +}; + struct test_env { struct test_selector test_selector; struct test_selector subtest_selector; @@ -70,12 +95,11 @@ struct test_env { bool get_test_cnt; bool list_test_names; - struct prog_test_def *test; /* current running tests */ + struct prog_test_def *test; /* current running test */ + struct test_state *test_state; /* current running test result */ FILE *stdout; FILE *stderr; - char *log_buf; - size_t log_cnt; int nr_cpus; int succ_cnt; /* successful tests */ @@ -120,11 +144,12 @@ struct msg { extern struct test_env env; -extern void test__force_log(); -extern bool test__start_subtest(const char *name); -extern void test__skip(void); -extern void test__fail(void); -extern int test__join_cgroup(const char *path); +void test__force_log(void); +bool test__start_subtest(const char *name); +void test__end_subtest(void); +void test__skip(void); +void test__fail(void); +int test__join_cgroup(const char *path); #define PRINT_FAIL(format...) \ ({ \ @@ -267,6 +292,17 @@ extern int test__join_cgroup(const char *path); ___ok; \ }) +#define ASSERT_HAS_SUBSTR(str, substr, name) ({ \ + static int duration = 0; \ + const char *___str = str; \ + const char *___substr = substr; \ + bool ___ok = strstr(___str, ___substr) != NULL; \ + CHECK(!___ok, (name), \ + "unexpected %s: '%s' is not a substring of '%s'\n", \ + (name), ___substr, ___str); \ + ___ok; \ +}) + #define ASSERT_OK(res, name) ({ \ static int duration = 0; \ long long ___res = (res); \ @@ -332,6 +368,8 @@ int trigger_module_test_write(int write_sz); #define SYS_NANOSLEEP_KPROBE_NAME "__x64_sys_nanosleep" #elif defined(__s390x__) #define SYS_NANOSLEEP_KPROBE_NAME "__s390x_sys_nanosleep" +#elif defined(__aarch64__) +#define SYS_NANOSLEEP_KPROBE_NAME "__arm64_sys_nanosleep" #else #define SYS_NANOSLEEP_KPROBE_NAME "sys_nanosleep" #endif diff --git a/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c b/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c index 4a64306728ab..3256de30f563 100644 --- a/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c +++ b/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c @@ -15,7 +15,6 @@ #include <bpf/bpf.h> #include <bpf/libbpf.h> -#include "bpf_rlimit.h" #include "cgroup_helpers.h" #define CGROUP_PATH "/skb_cgroup_test" @@ -160,6 +159,9 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } + /* Use libbpf 1.0 API mode */ + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + cgfd = cgroup_setup_and_join(CGROUP_PATH); if (cgfd < 0) goto err; diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c index fe10f8134278..810c3740b2cc 100644 --- a/tools/testing/selftests/bpf/test_sock.c +++ b/tools/testing/selftests/bpf/test_sock.c @@ -14,7 +14,6 @@ #include "cgroup_helpers.h" #include <bpf/bpf_endian.h> -#include "bpf_rlimit.h" #include "bpf_util.h" #define CG_PATH "/foo" @@ -493,7 +492,7 @@ static int run_test_case(int cgfd, const struct sock_test *test) goto err; } - if (attach_sock_prog(cgfd, progfd, test->attach_type) == -1) { + if (attach_sock_prog(cgfd, progfd, test->attach_type) < 0) { if (test->result == ATTACH_REJECT) goto out; else @@ -541,6 +540,9 @@ int main(int argc, char **argv) if (cgfd < 0) goto err; + /* Use libbpf 1.0 API mode */ + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + if (run_tests(cgfd)) goto err; diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c index f3d5d7ac6505..458564fcfc82 100644 --- a/tools/testing/selftests/bpf/test_sock_addr.c +++ b/tools/testing/selftests/bpf/test_sock_addr.c @@ -19,7 +19,6 @@ #include <bpf/libbpf.h> #include "cgroup_helpers.h" -#include "bpf_rlimit.h" #include "bpf_util.h" #ifndef ENOTSUPP @@ -1418,6 +1417,9 @@ int main(int argc, char **argv) if (cgfd < 0) goto err; + /* Use libbpf 1.0 API mode */ + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + if (run_tests(cgfd)) goto err; diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index dfb4f5c0fcb9..0fbaccdc8861 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -18,7 +18,6 @@ #include <sched.h> #include <sys/time.h> -#include <sys/resource.h> #include <sys/types.h> #include <sys/sendfile.h> @@ -37,7 +36,6 @@ #include <bpf/libbpf.h> #include "bpf_util.h" -#include "bpf_rlimit.h" #include "cgroup_helpers.h" int running; @@ -2017,6 +2015,9 @@ int main(int argc, char **argv) cg_created = 1; } + /* Use libbpf 1.0 API mode */ + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + if (test == SELFTESTS) { err = test_selftest(cg_fd, &options); goto out; diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c index 4f6cf833b522..57620e7c9048 100644 --- a/tools/testing/selftests/bpf/test_sysctl.c +++ b/tools/testing/selftests/bpf/test_sysctl.c @@ -14,7 +14,6 @@ #include <bpf/libbpf.h> #include <bpf/bpf_endian.h> -#include "bpf_rlimit.h" #include "bpf_util.h" #include "cgroup_helpers.h" #include "testing_helpers.h" @@ -1561,7 +1560,7 @@ static int run_test_case(int cgfd, struct sysctl_test *test) goto err; } - if (bpf_prog_attach(progfd, cgfd, atype, BPF_F_ALLOW_OVERRIDE) == -1) { + if (bpf_prog_attach(progfd, cgfd, atype, BPF_F_ALLOW_OVERRIDE) < 0) { if (test->result == ATTACH_REJECT) goto out; else @@ -1618,6 +1617,9 @@ int main(int argc, char **argv) if (cgfd < 0) goto err; + /* Use libbpf 1.0 API mode */ + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + if (run_tests(cgfd)) goto err; diff --git a/tools/testing/selftests/bpf/test_tag.c b/tools/testing/selftests/bpf/test_tag.c index 0851c42ee31c..5546b05a0486 100644 --- a/tools/testing/selftests/bpf/test_tag.c +++ b/tools/testing/selftests/bpf/test_tag.c @@ -20,7 +20,6 @@ #include <bpf/bpf.h> #include "../../../include/linux/filter.h" -#include "bpf_rlimit.h" #include "testing_helpers.h" static struct bpf_insn prog[BPF_MAXINSNS]; @@ -189,6 +188,9 @@ int main(void) uint32_t tests = 0; int i, fd_map; + /* Use libbpf 1.0 API mode */ + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + fd_map = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(int), sizeof(int), 1, &opts); assert(fd_map > 0); diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c index b9e991d43155..5c8ef062f760 100644 --- a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c +++ b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c @@ -15,11 +15,11 @@ #include <bpf/bpf.h> #include <bpf/libbpf.h> -#include "bpf_rlimit.h" #include "cgroup_helpers.h" -static int start_server(const struct sockaddr *addr, socklen_t len) +static int start_server(const struct sockaddr *addr, socklen_t len, bool dual) { + int mode = !dual; int fd; fd = socket(addr->sa_family, SOCK_STREAM, 0); @@ -28,6 +28,14 @@ static int start_server(const struct sockaddr *addr, socklen_t len) goto out; } + if (addr->sa_family == AF_INET6) { + if (setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, (char *)&mode, + sizeof(mode)) == -1) { + log_err("Failed to set the dual-stack mode"); + goto close_out; + } + } + if (bind(fd, addr, len) == -1) { log_err("Failed to bind server socket"); goto close_out; @@ -47,24 +55,17 @@ out: return fd; } -static int connect_to_server(int server_fd) +static int connect_to_server(const struct sockaddr *addr, socklen_t len) { - struct sockaddr_storage addr; - socklen_t len = sizeof(addr); int fd = -1; - if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) { - log_err("Failed to get server addr"); - goto out; - } - - fd = socket(addr.ss_family, SOCK_STREAM, 0); + fd = socket(addr->sa_family, SOCK_STREAM, 0); if (fd == -1) { log_err("Failed to create client socket"); goto out; } - if (connect(fd, (const struct sockaddr *)&addr, len) == -1) { + if (connect(fd, (const struct sockaddr *)addr, len) == -1) { log_err("Fail to connect to server"); goto close_out; } @@ -116,7 +117,8 @@ err: return map_fd; } -static int run_test(int server_fd, int results_fd, bool xdp) +static int run_test(int server_fd, int results_fd, bool xdp, + const struct sockaddr *addr, socklen_t len) { int client = -1, srv_client = -1; int ret = 0; @@ -142,7 +144,7 @@ static int run_test(int server_fd, int results_fd, bool xdp) goto err; } - client = connect_to_server(server_fd); + client = connect_to_server(addr, len); if (client == -1) goto err; @@ -199,12 +201,30 @@ out: return ret; } +static bool get_port(int server_fd, in_port_t *port) +{ + struct sockaddr_in addr; + socklen_t len = sizeof(addr); + + if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) { + log_err("Failed to get server addr"); + return false; + } + + /* sin_port and sin6_port are located at the same offset. */ + *port = addr.sin_port; + return true; +} + int main(int argc, char **argv) { struct sockaddr_in addr4; struct sockaddr_in6 addr6; + struct sockaddr_in addr4dual; + struct sockaddr_in6 addr6dual; int server = -1; int server_v6 = -1; + int server_dual = -1; int results = -1; int err = 0; bool xdp; @@ -214,6 +234,9 @@ int main(int argc, char **argv) exit(1); } + /* Use libbpf 1.0 API mode */ + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + results = get_map_fd_by_prog_id(atoi(argv[1]), &xdp); if (results < 0) { log_err("Can't get map"); @@ -224,25 +247,43 @@ int main(int argc, char **argv) addr4.sin_family = AF_INET; addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK); addr4.sin_port = 0; + memcpy(&addr4dual, &addr4, sizeof(addr4dual)); memset(&addr6, 0, sizeof(addr6)); addr6.sin6_family = AF_INET6; addr6.sin6_addr = in6addr_loopback; addr6.sin6_port = 0; - server = start_server((const struct sockaddr *)&addr4, sizeof(addr4)); - if (server == -1) + memset(&addr6dual, 0, sizeof(addr6dual)); + addr6dual.sin6_family = AF_INET6; + addr6dual.sin6_addr = in6addr_any; + addr6dual.sin6_port = 0; + + server = start_server((const struct sockaddr *)&addr4, sizeof(addr4), + false); + if (server == -1 || !get_port(server, &addr4.sin_port)) goto err; server_v6 = start_server((const struct sockaddr *)&addr6, - sizeof(addr6)); - if (server_v6 == -1) + sizeof(addr6), false); + if (server_v6 == -1 || !get_port(server_v6, &addr6.sin6_port)) + goto err; + + server_dual = start_server((const struct sockaddr *)&addr6dual, + sizeof(addr6dual), true); + if (server_dual == -1 || !get_port(server_dual, &addr4dual.sin_port)) + goto err; + + if (run_test(server, results, xdp, + (const struct sockaddr *)&addr4, sizeof(addr4))) goto err; - if (run_test(server, results, xdp)) + if (run_test(server_v6, results, xdp, + (const struct sockaddr *)&addr6, sizeof(addr6))) goto err; - if (run_test(server_v6, results, xdp)) + if (run_test(server_dual, results, xdp, + (const struct sockaddr *)&addr4dual, sizeof(addr4dual))) goto err; printf("ok\n"); @@ -252,6 +293,7 @@ err: out: close(server); close(server_v6); + close(server_dual); close(results); return err; } diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c index 4c5114765b23..8284db8b0f13 100644 --- a/tools/testing/selftests/bpf/test_tcpnotify_user.c +++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c @@ -19,7 +19,6 @@ #include <linux/perf_event.h> #include <linux/err.h> -#include "bpf_rlimit.h" #include "bpf_util.h" #include "cgroup_helpers.h" diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index a2cd236c32eb..372579c9f45e 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -53,7 +53,7 @@ #define MAX_INSNS BPF_MAXINSNS #define MAX_TEST_INSNS 1000000 #define MAX_FIXUPS 8 -#define MAX_NR_MAPS 22 +#define MAX_NR_MAPS 23 #define MAX_TEST_RUNS 8 #define POINTER_VALUE 0xcafe4all #define TEST_DATA_LEN 64 @@ -101,6 +101,7 @@ struct bpf_test { int fixup_map_reuseport_array[MAX_FIXUPS]; int fixup_map_ringbuf[MAX_FIXUPS]; int fixup_map_timer[MAX_FIXUPS]; + int fixup_map_kptr[MAX_FIXUPS]; struct kfunc_btf_id_pair fixup_kfunc_btf_id[MAX_FIXUPS]; /* Expected verifier log output for result REJECT or VERBOSE_ACCEPT. * Can be a tab-separated sequence of expected strings. An empty string @@ -621,8 +622,15 @@ static int create_cgroup_storage(bool percpu) * struct timer { * struct bpf_timer t; * }; + * struct btf_ptr { + * struct prog_test_ref_kfunc __kptr *ptr; + * struct prog_test_ref_kfunc __kptr_ref *ptr; + * struct prog_test_member __kptr_ref *ptr; + * } */ -static const char btf_str_sec[] = "\0bpf_spin_lock\0val\0cnt\0l\0bpf_timer\0timer\0t"; +static const char btf_str_sec[] = "\0bpf_spin_lock\0val\0cnt\0l\0bpf_timer\0timer\0t" + "\0btf_ptr\0prog_test_ref_kfunc\0ptr\0kptr\0kptr_ref" + "\0prog_test_member"; static __u32 btf_raw_types[] = { /* int */ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ @@ -638,6 +646,22 @@ static __u32 btf_raw_types[] = { /* struct timer */ /* [5] */ BTF_TYPE_ENC(35, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 16), BTF_MEMBER_ENC(41, 4, 0), /* struct bpf_timer t; */ + /* struct prog_test_ref_kfunc */ /* [6] */ + BTF_STRUCT_ENC(51, 0, 0), + BTF_STRUCT_ENC(89, 0, 0), /* [7] */ + /* type tag "kptr" */ + BTF_TYPE_TAG_ENC(75, 6), /* [8] */ + /* type tag "kptr_ref" */ + BTF_TYPE_TAG_ENC(80, 6), /* [9] */ + BTF_TYPE_TAG_ENC(80, 7), /* [10] */ + BTF_PTR_ENC(8), /* [11] */ + BTF_PTR_ENC(9), /* [12] */ + BTF_PTR_ENC(10), /* [13] */ + /* struct btf_ptr */ /* [14] */ + BTF_STRUCT_ENC(43, 3, 24), + BTF_MEMBER_ENC(71, 11, 0), /* struct prog_test_ref_kfunc __kptr *ptr; */ + BTF_MEMBER_ENC(71, 12, 64), /* struct prog_test_ref_kfunc __kptr_ref *ptr; */ + BTF_MEMBER_ENC(71, 13, 128), /* struct prog_test_member __kptr_ref *ptr; */ }; static int load_btf(void) @@ -727,6 +751,25 @@ static int create_map_timer(void) return fd; } +static int create_map_kptr(void) +{ + LIBBPF_OPTS(bpf_map_create_opts, opts, + .btf_key_type_id = 1, + .btf_value_type_id = 14, + ); + int fd, btf_fd; + + btf_fd = load_btf(); + if (btf_fd < 0) + return -1; + + opts.btf_fd = btf_fd; + fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "test_map", 4, 24, 1, &opts); + if (fd < 0) + printf("Failed to create map with btf_id pointer\n"); + return fd; +} + static char bpf_vlog[UINT_MAX >> 8]; static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, @@ -754,6 +797,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, int *fixup_map_reuseport_array = test->fixup_map_reuseport_array; int *fixup_map_ringbuf = test->fixup_map_ringbuf; int *fixup_map_timer = test->fixup_map_timer; + int *fixup_map_kptr = test->fixup_map_kptr; struct kfunc_btf_id_pair *fixup_kfunc_btf_id = test->fixup_kfunc_btf_id; if (test->fill_helper) { @@ -947,6 +991,13 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, fixup_map_timer++; } while (*fixup_map_timer); } + if (*fixup_map_kptr) { + map_fds[22] = create_map_kptr(); + do { + prog[*fixup_map_kptr].imm = map_fds[22]; + fixup_map_kptr++; + } while (*fixup_map_kptr); + } /* Patch in kfunc BTF IDs */ if (fixup_kfunc_btf_id->kfunc) { diff --git a/tools/testing/selftests/bpf/test_verifier_log.c b/tools/testing/selftests/bpf/test_verifier_log.c index 8d6918c3b4a2..70feda97cee5 100644 --- a/tools/testing/selftests/bpf/test_verifier_log.c +++ b/tools/testing/selftests/bpf/test_verifier_log.c @@ -11,8 +11,6 @@ #include <bpf/bpf.h> -#include "bpf_rlimit.h" - #define LOG_SIZE (1 << 20) #define err(str...) printf("ERROR: " str) @@ -141,6 +139,9 @@ int main(int argc, char **argv) memset(log, 1, LOG_SIZE); + /* Use libbpf 1.0 API mode */ + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + /* Test incorrect attr */ printf("Test log_level 0...\n"); test_log_bad(log, LOG_SIZE, 0); diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c index 795b6798ccee..9695318e8132 100644 --- a/tools/testing/selftests/bpf/testing_helpers.c +++ b/tools/testing/selftests/bpf/testing_helpers.c @@ -6,6 +6,7 @@ #include <errno.h> #include <bpf/bpf.h> #include <bpf/libbpf.h> +#include "test_progs.h" #include "testing_helpers.h" int parse_num_list(const char *s, bool **num_set, int *num_set_len) @@ -60,7 +61,7 @@ int parse_num_list(const char *s, bool **num_set, int *num_set_len) set[i] = true; } - if (!set) + if (!set || parsing_end) return -EINVAL; *num_set = set; @@ -69,6 +70,94 @@ int parse_num_list(const char *s, bool **num_set, int *num_set_len) return 0; } +int parse_test_list(const char *s, + struct test_filter_set *set, + bool is_glob_pattern) +{ + char *input, *state = NULL, *next; + struct test_filter *tmp, *tests = NULL; + int i, j, cnt = 0; + + input = strdup(s); + if (!input) + return -ENOMEM; + + while ((next = strtok_r(state ? NULL : input, ",", &state))) { + char *subtest_str = strchr(next, '/'); + char *pattern = NULL; + int glob_chars = 0; + + tmp = realloc(tests, sizeof(*tests) * (cnt + 1)); + if (!tmp) + goto err; + tests = tmp; + + tests[cnt].subtest_cnt = 0; + tests[cnt].subtests = NULL; + + if (is_glob_pattern) { + pattern = "%s"; + } else { + pattern = "*%s*"; + glob_chars = 2; + } + + if (subtest_str) { + char **tmp_subtests = NULL; + int subtest_cnt = tests[cnt].subtest_cnt; + + *subtest_str = '\0'; + subtest_str += 1; + tmp_subtests = realloc(tests[cnt].subtests, + sizeof(*tmp_subtests) * + (subtest_cnt + 1)); + if (!tmp_subtests) + goto err; + tests[cnt].subtests = tmp_subtests; + + tests[cnt].subtests[subtest_cnt] = + malloc(strlen(subtest_str) + glob_chars + 1); + if (!tests[cnt].subtests[subtest_cnt]) + goto err; + sprintf(tests[cnt].subtests[subtest_cnt], + pattern, + subtest_str); + + tests[cnt].subtest_cnt++; + } + + tests[cnt].name = malloc(strlen(next) + glob_chars + 1); + if (!tests[cnt].name) + goto err; + sprintf(tests[cnt].name, pattern, next); + + cnt++; + } + + tmp = realloc(set->tests, sizeof(*tests) * (cnt + set->cnt)); + if (!tmp) + goto err; + + memcpy(tmp + set->cnt, tests, sizeof(*tests) * cnt); + set->tests = tmp; + set->cnt += cnt; + + free(tests); + free(input); + return 0; + +err: + for (i = 0; i < cnt; i++) { + for (j = 0; j < tests[i].subtest_cnt; j++) + free(tests[i].subtests[j]); + + free(tests[i].name); + } + free(tests); + free(input); + return -ENOMEM; +} + __u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info) { __u32 info_len = sizeof(*info); diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h index f46ebc476ee8..6ec00bf79cb5 100644 --- a/tools/testing/selftests/bpf/testing_helpers.h +++ b/tools/testing/selftests/bpf/testing_helpers.h @@ -12,3 +12,11 @@ int bpf_test_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, size_t insns_cnt, const char *license, __u32 kern_version, char *log_buf, size_t log_buf_sz); + +/* + * below function is exported for testing in prog_test test + */ +struct test_filter_set; +int parse_test_list(const char *s, + struct test_filter_set *test_set, + bool is_glob_pattern); diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index 3d6217e3aff7..9c4be2cdb21a 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -25,15 +25,12 @@ static int ksym_cmp(const void *p1, const void *p2) int load_kallsyms(void) { - FILE *f = fopen("/proc/kallsyms", "r"); + FILE *f; char func[256], buf[256]; char symbol; void *addr; int i = 0; - if (!f) - return -ENOENT; - /* * This is called/used from multiplace places, * load symbols just once. @@ -41,6 +38,10 @@ int load_kallsyms(void) if (sym_cnt) return 0; + f = fopen("/proc/kallsyms", "r"); + if (!f) + return -ENOENT; + while (fgets(buf, sizeof(buf), f)) { if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3) break; diff --git a/tools/testing/selftests/bpf/urandom_read.c b/tools/testing/selftests/bpf/urandom_read.c index db781052758d..e92644d0fa75 100644 --- a/tools/testing/selftests/bpf/urandom_read.c +++ b/tools/testing/selftests/bpf/urandom_read.c @@ -1,32 +1,85 @@ +#include <stdbool.h> #include <stdio.h> #include <unistd.h> +#include <errno.h> #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> #include <stdlib.h> +#include <signal.h> + +#define _SDT_HAS_SEMAPHORES 1 +#include "sdt.h" + +#define SEC(name) __attribute__((section(name), used)) #define BUF_SIZE 256 +/* defined in urandom_read_aux.c */ +void urand_read_without_sema(int iter_num, int iter_cnt, int read_sz); +/* these are coming from urandom_read_lib{1,2}.c */ +void urandlib_read_with_sema(int iter_num, int iter_cnt, int read_sz); +void urandlib_read_without_sema(int iter_num, int iter_cnt, int read_sz); + +unsigned short urand_read_with_sema_semaphore SEC(".probes"); + static __attribute__((noinline)) void urandom_read(int fd, int count) { - char buf[BUF_SIZE]; - int i; + char buf[BUF_SIZE]; + int i; + + for (i = 0; i < count; ++i) { + read(fd, buf, BUF_SIZE); + + /* trigger USDTs defined in executable itself */ + urand_read_without_sema(i, count, BUF_SIZE); + STAP_PROBE3(urand, read_with_sema, i, count, BUF_SIZE); - for (i = 0; i < count; ++i) - read(fd, buf, BUF_SIZE); + /* trigger USDTs defined in shared lib */ + urandlib_read_without_sema(i, count, BUF_SIZE); + urandlib_read_with_sema(i, count, BUF_SIZE); + } +} + +static volatile bool parent_ready; + +static void handle_sigpipe(int sig) +{ + parent_ready = true; } int main(int argc, char *argv[]) { int fd = open("/dev/urandom", O_RDONLY); int count = 4; + bool report_pid = false; if (fd < 0) return 1; - if (argc == 2) + if (argc >= 2) count = atoi(argv[1]); + if (argc >= 3) { + report_pid = true; + /* install SIGPIPE handler to catch when parent closes their + * end of the pipe (on the other side of our stdout) + */ + signal(SIGPIPE, handle_sigpipe); + } + + /* report PID and wait for parent process to send us "signal" by + * closing stdout + */ + if (report_pid) { + while (!parent_ready) { + fprintf(stdout, "%d\n", getpid()); + fflush(stdout); + } + /* at this point stdout is closed, parent process knows our + * PID and is ready to trace us + */ + } urandom_read(fd, count); diff --git a/tools/testing/selftests/bpf/urandom_read_aux.c b/tools/testing/selftests/bpf/urandom_read_aux.c new file mode 100644 index 000000000000..6132edcfea74 --- /dev/null +++ b/tools/testing/selftests/bpf/urandom_read_aux.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ +#include "sdt.h" + +void urand_read_without_sema(int iter_num, int iter_cnt, int read_sz) +{ + /* semaphore-less USDT */ + STAP_PROBE3(urand, read_without_sema, iter_num, iter_cnt, read_sz); +} diff --git a/tools/testing/selftests/bpf/urandom_read_lib1.c b/tools/testing/selftests/bpf/urandom_read_lib1.c new file mode 100644 index 000000000000..86186e24b740 --- /dev/null +++ b/tools/testing/selftests/bpf/urandom_read_lib1.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ +#define _SDT_HAS_SEMAPHORES 1 +#include "sdt.h" + +#define SEC(name) __attribute__((section(name), used)) + +unsigned short urandlib_read_with_sema_semaphore SEC(".probes"); + +void urandlib_read_with_sema(int iter_num, int iter_cnt, int read_sz) +{ + STAP_PROBE3(urandlib, read_with_sema, iter_num, iter_cnt, read_sz); +} diff --git a/tools/testing/selftests/bpf/urandom_read_lib2.c b/tools/testing/selftests/bpf/urandom_read_lib2.c new file mode 100644 index 000000000000..9d401ad9838f --- /dev/null +++ b/tools/testing/selftests/bpf/urandom_read_lib2.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ +#include "sdt.h" + +void urandlib_read_without_sema(int iter_num, int iter_cnt, int read_sz) +{ + STAP_PROBE3(urandlib, read_without_sema, iter_num, iter_cnt, read_sz); +} diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index 2e03decb11b6..743ed34c1238 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -139,6 +139,26 @@ }, }, { + "calls: invalid kfunc call: don't match first member type when passed to release kfunc", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "kernel function bpf_kfunc_call_memb1_release args#0 expected pointer", + .fixup_kfunc_btf_id = { + { "bpf_kfunc_call_memb_acquire", 1 }, + { "bpf_kfunc_call_memb1_release", 5 }, + }, +}, +{ "calls: invalid kfunc call: PTR_TO_BTF_ID with negative offset", .insns = { BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), diff --git a/tools/testing/selftests/bpf/verifier/map_kptr.c b/tools/testing/selftests/bpf/verifier/map_kptr.c new file mode 100644 index 000000000000..9113834640e6 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/map_kptr.c @@ -0,0 +1,469 @@ +/* Common tests */ +{ + "map_kptr: BPF_ST imm != 0", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_LD_MAP_FD(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_kptr = { 1 }, + .result = REJECT, + .errstr = "BPF_ST imm must be 0 when storing to kptr at off=0", +}, +{ + "map_kptr: size != bpf_size_to_bytes(BPF_DW)", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_LD_MAP_FD(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_ST_MEM(BPF_W, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_kptr = { 1 }, + .result = REJECT, + .errstr = "kptr access size must be BPF_DW", +}, +{ + "map_kptr: map_value non-const var_off", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_LD_MAP_FD(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2, 0), + BPF_JMP_IMM(BPF_JLE, BPF_REG_2, 4, 1), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_kptr = { 1 }, + .result = REJECT, + .errstr = "kptr access cannot have variable offset", +}, +{ + "map_kptr: bpf_kptr_xchg non-const var_off", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_LD_MAP_FD(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2, 0), + BPF_JMP_IMM(BPF_JLE, BPF_REG_2, 4, 1), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_3), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_kptr_xchg), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_kptr = { 1 }, + .result = REJECT, + .errstr = "R1 doesn't have constant offset. kptr has to be at the constant offset", +}, +{ + "map_kptr: unaligned boundary load/store", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_LD_MAP_FD(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 7), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_kptr = { 1 }, + .result = REJECT, + .errstr = "kptr access misaligned expected=0 off=7", +}, +{ + "map_kptr: reject var_off != 0", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_LD_MAP_FD(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JLE, BPF_REG_2, 4, 1), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2), + BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_kptr = { 1 }, + .result = REJECT, + .errstr = "variable untrusted_ptr_ access var_off=(0x0; 0x7) disallowed", +}, +/* Tests for unreferened PTR_TO_BTF_ID */ +{ + "map_kptr: unref: reject btf_struct_ids_match == false", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_LD_MAP_FD(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_kptr = { 1 }, + .result = REJECT, + .errstr = "invalid kptr access, R1 type=untrusted_ptr_prog_test_ref_kfunc expected=ptr_prog_test", +}, +{ + "map_kptr: unref: loaded pointer marked as untrusted", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_LD_MAP_FD(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_kptr = { 1 }, + .result = REJECT, + .errstr = "R0 invalid mem access 'untrusted_ptr_or_null_'", +}, +{ + "map_kptr: unref: correct in kernel type size", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_LD_MAP_FD(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 24), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_kptr = { 1 }, + .result = REJECT, + .errstr = "access beyond struct prog_test_ref_kfunc at off 24 size 8", +}, +{ + "map_kptr: unref: inherit PTR_UNTRUSTED on struct walk", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_LD_MAP_FD(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_this_cpu_ptr), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_kptr = { 1 }, + .result = REJECT, + .errstr = "R1 type=untrusted_ptr_ expected=percpu_ptr_", +}, +{ + "map_kptr: unref: no reference state created", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_LD_MAP_FD(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_kptr = { 1 }, + .result = ACCEPT, +}, +{ + "map_kptr: unref: bpf_kptr_xchg rejected", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_LD_MAP_FD(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_kptr_xchg), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_kptr = { 1 }, + .result = REJECT, + .errstr = "off=0 kptr isn't referenced kptr", +}, +{ + "map_kptr: unref: bpf_kfunc_call_test_kptr_get rejected", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_LD_MAP_FD(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_kptr = { 1 }, + .result = REJECT, + .errstr = "arg#0 no referenced kptr at map value offset=0", + .fixup_kfunc_btf_id = { + { "bpf_kfunc_call_test_kptr_get", 13 }, + } +}, +/* Tests for referenced PTR_TO_BTF_ID */ +{ + "map_kptr: ref: loaded pointer marked as untrusted", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_LD_MAP_FD(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_this_cpu_ptr), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_kptr = { 1 }, + .result = REJECT, + .errstr = "R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_", +}, +{ + "map_kptr: ref: reject off != 0", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_LD_MAP_FD(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_kptr_xchg), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_kptr_xchg), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_kptr = { 1 }, + .result = REJECT, + .errstr = "invalid kptr access, R2 type=ptr_prog_test_ref_kfunc expected=ptr_prog_test_member", +}, +{ + "map_kptr: ref: reference state created and released on xchg", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_LD_MAP_FD(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_kptr_xchg), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_kptr = { 1 }, + .result = REJECT, + .errstr = "Unreleased reference id=5 alloc_insn=20", + .fixup_kfunc_btf_id = { + { "bpf_kfunc_call_test_acquire", 15 }, + } +}, +{ + "map_kptr: ref: reject STX", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_LD_MAP_FD(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_kptr = { 1 }, + .result = REJECT, + .errstr = "store to referenced kptr disallowed", +}, +{ + "map_kptr: ref: reject ST", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_LD_MAP_FD(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 8, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_kptr = { 1 }, + .result = REJECT, + .errstr = "store to referenced kptr disallowed", +}, +{ + "map_kptr: reject helper access to kptr", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_LD_MAP_FD(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_delete_elem), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_kptr = { 1 }, + .result = REJECT, + .errstr = "kptr cannot be accessed indirectly by helper", +}, diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c index fbd682520e47..57a83d763ec1 100644 --- a/tools/testing/selftests/bpf/verifier/ref_tracking.c +++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c @@ -796,7 +796,7 @@ }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, - .errstr = "reference has not been acquired before", + .errstr = "R1 must be referenced when passed to release function", }, { /* !bpf_sk_fullsock(sk) is checked but !bpf_tcp_sock(sk) is not checked */ diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c index 86b24cad27a7..d11d0b28be41 100644 --- a/tools/testing/selftests/bpf/verifier/sock.c +++ b/tools/testing/selftests/bpf/verifier/sock.c @@ -417,7 +417,7 @@ }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, - .errstr = "reference has not been acquired before", + .errstr = "R1 must be referenced when passed to release function", }, { "bpf_sk_release(bpf_sk_fullsock(skb->sk))", @@ -436,7 +436,7 @@ }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, - .errstr = "reference has not been acquired before", + .errstr = "R1 must be referenced when passed to release function", }, { "bpf_sk_release(bpf_tcp_sock(skb->sk))", @@ -455,7 +455,7 @@ }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, - .errstr = "reference has not been acquired before", + .errstr = "R1 must be referenced when passed to release function", }, { "sk_storage_get(map, skb->sk, NULL, 0): value == NULL", diff --git a/tools/testing/selftests/bpf/xdp_redirect_multi.c b/tools/testing/selftests/bpf/xdp_redirect_multi.c index aaedbf4955c3..c03b3a75991f 100644 --- a/tools/testing/selftests/bpf/xdp_redirect_multi.c +++ b/tools/testing/selftests/bpf/xdp_redirect_multi.c @@ -10,7 +10,6 @@ #include <net/if.h> #include <unistd.h> #include <libgen.h> -#include <sys/resource.h> #include <sys/ioctl.h> #include <sys/types.h> #include <sys/socket.h> diff --git a/tools/testing/selftests/bpf/xdping.c b/tools/testing/selftests/bpf/xdping.c index c567856fd1bc..5b6f977870f8 100644 --- a/tools/testing/selftests/bpf/xdping.c +++ b/tools/testing/selftests/bpf/xdping.c @@ -12,7 +12,6 @@ #include <string.h> #include <unistd.h> #include <libgen.h> -#include <sys/resource.h> #include <net/if.h> #include <sys/types.h> #include <sys/socket.h> @@ -89,7 +88,6 @@ int main(int argc, char **argv) { __u32 mode_flags = XDP_FLAGS_DRV_MODE | XDP_FLAGS_SKB_MODE; struct addrinfo *a, hints = { .ai_family = AF_INET }; - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; __u16 count = XDPING_DEFAULT_COUNT; struct pinginfo pinginfo = { 0 }; const char *optstr = "c:I:NsS"; @@ -167,10 +165,8 @@ int main(int argc, char **argv) freeaddrinfo(a); } - if (setrlimit(RLIMIT_MEMLOCK, &r)) { - perror("setrlimit(RLIMIT_MEMLOCK)"); - return 1; - } + /* Use libbpf 1.0 API mode */ + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c index 5f8296d29e77..cfcb031323c5 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.c +++ b/tools/testing/selftests/bpf/xdpxceiver.c @@ -90,7 +90,6 @@ #include <string.h> #include <stddef.h> #include <sys/mman.h> -#include <sys/resource.h> #include <sys/types.h> #include <sys/queue.h> #include <time.h> @@ -1448,14 +1447,13 @@ static void ifobject_delete(struct ifobject *ifobj) int main(int argc, char **argv) { - struct rlimit _rlim = { RLIM_INFINITY, RLIM_INFINITY }; struct pkt_stream *pkt_stream_default; struct ifobject *ifobj_tx, *ifobj_rx; struct test_spec test; u32 i, j; - if (setrlimit(RLIMIT_MEMLOCK, &_rlim)) - exit_with_error(errno); + /* Use libbpf 1.0 API mode */ + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); ifobj_tx = ifobject_create(); if (!ifobj_tx) diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh b/tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh new file mode 120000 index 000000000000..f5eb940c4c7c --- /dev/null +++ b/tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh @@ -0,0 +1 @@ +../../../net/forwarding/bridge_locked_port.sh
\ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh b/tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh new file mode 120000 index 000000000000..76492da525f7 --- /dev/null +++ b/tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh @@ -0,0 +1 @@ +../../../net/forwarding/bridge_mdb.sh
\ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_mld.sh b/tools/testing/selftests/drivers/net/dsa/bridge_mld.sh new file mode 120000 index 000000000000..81a7e0df0474 --- /dev/null +++ b/tools/testing/selftests/drivers/net/dsa/bridge_mld.sh @@ -0,0 +1 @@ +../../../net/forwarding/bridge_mld.sh
\ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh new file mode 120000 index 000000000000..9831ed74376a --- /dev/null +++ b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh @@ -0,0 +1 @@ +../../../net/forwarding/bridge_vlan_aware.sh
\ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh new file mode 120000 index 000000000000..7f3c3f0bf719 --- /dev/null +++ b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh @@ -0,0 +1 @@ +../../../net/forwarding/bridge_vlan_mcast.sh
\ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh new file mode 120000 index 000000000000..bf1a57e6bde1 --- /dev/null +++ b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh @@ -0,0 +1 @@ +../../../net/forwarding/bridge_vlan_unaware.sh
\ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/dsa/forwarding.config b/tools/testing/selftests/drivers/net/dsa/forwarding.config new file mode 100644 index 000000000000..7adc1396fae0 --- /dev/null +++ b/tools/testing/selftests/drivers/net/dsa/forwarding.config @@ -0,0 +1,2 @@ +NETIF_CREATE=no +STABLE_MAC_ADDRS=yes diff --git a/tools/testing/selftests/drivers/net/dsa/lib.sh b/tools/testing/selftests/drivers/net/dsa/lib.sh new file mode 120000 index 000000000000..39c96828c5ef --- /dev/null +++ b/tools/testing/selftests/drivers/net/dsa/lib.sh @@ -0,0 +1 @@ +../../../net/forwarding/lib.sh
\ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/dsa/local_termination.sh b/tools/testing/selftests/drivers/net/dsa/local_termination.sh new file mode 120000 index 000000000000..c08166f84501 --- /dev/null +++ b/tools/testing/selftests/drivers/net/dsa/local_termination.sh @@ -0,0 +1 @@ +../../../net/forwarding/local_termination.sh
\ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/dsa/no_forwarding.sh b/tools/testing/selftests/drivers/net/dsa/no_forwarding.sh new file mode 120000 index 000000000000..b9757466bc97 --- /dev/null +++ b/tools/testing/selftests/drivers/net/dsa/no_forwarding.sh @@ -0,0 +1 @@ +../../../net/forwarding/no_forwarding.sh
\ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh new file mode 100755 index 000000000000..53a65f416770 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh @@ -0,0 +1,341 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# In addition to the common variables, user might use: +# LC_SLOT - If not set, all probed line cards are going to be tested, +# with an exception of the "activation_16x100G_test". +# It set, only the selected line card is going to be used +# for tests, including "activation_16x100G_test". + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + unprovision_test + provision_test + activation_16x100G_test +" + +NUM_NETIFS=0 + +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +until_lc_state_is() +{ + local state=$1; shift + local current=$("$@") + + echo "$current" + [ "$current" == "$state" ] +} + +until_lc_state_is_not() +{ + ! until_lc_state_is "$@" +} + +lc_state_get() +{ + local lc=$1 + + devlink lc show $DEVLINK_DEV lc $lc -j | jq -e -r ".[][][].state" +} + +lc_wait_until_state_changes() +{ + local lc=$1 + local state=$2 + local timeout=$3 # ms + + busywait "$timeout" until_lc_state_is_not "$state" lc_state_get "$lc" +} + +lc_wait_until_state_becomes() +{ + local lc=$1 + local state=$2 + local timeout=$3 # ms + + busywait "$timeout" until_lc_state_is "$state" lc_state_get "$lc" +} + +until_lc_port_count_is() +{ + local port_count=$1; shift + local current=$("$@") + + echo "$current" + [ $current == $port_count ] +} + +lc_port_count_get() +{ + local lc=$1 + + devlink port -j | jq -e -r ".[][] | select(.lc==$lc) | .port" | wc -l +} + +lc_wait_until_port_count_is() +{ + local lc=$1 + local port_count=$2 + local timeout=$3 # ms + + busywait "$timeout" until_lc_port_count_is "$port_count" lc_port_count_get "$lc" +} + +PROV_UNPROV_TIMEOUT=8000 # ms +POST_PROV_ACT_TIMEOUT=2000 # ms +PROV_PORTS_INSTANTIATION_TIMEOUT=15000 # ms + +unprovision_one() +{ + local lc=$1 + local state + + state=$(lc_state_get $lc) + check_err $? "Failed to get state of linecard $lc" + if [[ "$state" == "unprovisioned" ]]; then + return + fi + + log_info "Unprovisioning linecard $lc" + + devlink lc set $DEVLINK_DEV lc $lc notype + check_err $? "Failed to trigger linecard $lc unprovisioning" + + state=$(lc_wait_until_state_changes $lc "unprovisioning" \ + $PROV_UNPROV_TIMEOUT) + check_err $? "Failed to unprovision linecard $lc (timeout)" + + [ "$state" == "unprovisioned" ] + check_err $? "Failed to unprovision linecard $lc (state=$state)" +} + +provision_one() +{ + local lc=$1 + local type=$2 + local state + + log_info "Provisioning linecard $lc" + + devlink lc set $DEVLINK_DEV lc $lc type $type + check_err $? "Failed trigger linecard $lc provisioning" + + state=$(lc_wait_until_state_changes $lc "provisioning" \ + $PROV_UNPROV_TIMEOUT) + check_err $? "Failed to provision linecard $lc (timeout)" + + [ "$state" == "provisioned" ] || [ "$state" == "active" ] + check_err $? "Failed to provision linecard $lc (state=$state)" + + provisioned_type=$(devlink lc show $DEVLINK_DEV lc $lc -j | jq -e -r ".[][][].type") + [ "$provisioned_type" == "$type" ] + check_err $? "Wrong provision type returned for linecard $lc (got \"$provisioned_type\", expected \"$type\")" + + # Wait for possible activation to make sure the state + # won't change after return from this function. + state=$(lc_wait_until_state_becomes $lc "active" \ + $POST_PROV_ACT_TIMEOUT) +} + +unprovision_test() +{ + RET=0 + local lc + + lc=$LC_SLOT + unprovision_one $lc + log_test "Unprovision" +} + +LC_16X100G_TYPE="16x100G" +LC_16X100G_PORT_COUNT=16 +LC_16X100G_DEVICE_COUNT=4 + +supported_types_check() +{ + local lc=$1 + local supported_types_count + local type_index + local lc_16x100_found=false + + supported_types_count=$(devlink lc show $DEVLINK_DEV lc $lc -j | \ + jq -e -r ".[][][].supported_types | length") + [ $supported_types_count != 0 ] + check_err $? "No supported types found for linecard $lc" + for (( type_index=0; type_index<$supported_types_count; type_index++ )) + do + type=$(devlink lc show $DEVLINK_DEV lc $lc -j | \ + jq -e -r ".[][][].supported_types[$type_index]") + if [[ "$type" == "$LC_16X100G_TYPE" ]]; then + lc_16x100_found=true + break + fi + done + [ $lc_16x100_found = true ] + check_err $? "16X100G not found between supported types of linecard $lc" +} + +lc_info_check() +{ + local lc=$1 + local fixed_hw_revision + local running_ini_version + + fixed_hw_revision=$(devlink lc -v info $DEVLINK_DEV lc $lc -j | \ + jq -e -r '.[][][].versions.fixed."hw.revision"') + check_err $? "Failed to get linecard $lc fixed.hw.revision" + log_info "Linecard $lc fixed.hw.revision: \"$fixed_hw_revision\"" + running_ini_version=$(devlink lc -v info $DEVLINK_DEV lc $lc -j | \ + jq -e -r '.[][][].versions.running."ini.version"') + check_err $? "Failed to get linecard $lc running.ini.version" + log_info "Linecard $lc running.ini.version: \"$running_ini_version\"" +} + +lc_devices_check() +{ + local lc=$1 + local expected_device_count=$2 + local device_count + local device + + device_count=$(devlink lc show $DEVLINK_DEV lc $lc -j | \ + jq -e -r ".[][][].devices |length") + check_err $? "Failed to get linecard $lc device count" + [ $device_count != 0 ] + check_err $? "No device found on linecard $lc" + [ $device_count == $expected_device_count ] + check_err $? "Unexpected device count on linecard $lc (got $expected_device_count, expected $device_count)" + for (( device=0; device<device_count; device++ )) + do + log_info "Linecard $lc device $device" + done +} + +ports_check() +{ + local lc=$1 + local expected_port_count=$2 + local port_count + + port_count=$(lc_wait_until_port_count_is $lc $expected_port_count \ + $PROV_PORTS_INSTANTIATION_TIMEOUT) + [ $port_count != 0 ] + check_err $? "No port associated with linecard $lc" + [ $port_count == $expected_port_count ] + check_err $? "Unexpected port count linecard $lc (got $port_count, expected $expected_port_count)" +} + +provision_test() +{ + RET=0 + local lc + local type + local state + + lc=$LC_SLOT + supported_types_check $lc + state=$(lc_state_get $lc) + check_err $? "Failed to get state of linecard $lc" + if [[ "$state" != "unprovisioned" ]]; then + unprovision_one $lc + fi + provision_one $lc $LC_16X100G_TYPE + lc_devices_check $lc $LC_16X100G_DEVICE_COUNT + lc_info_check $lc + ports_check $lc $LC_16X100G_PORT_COUNT + log_test "Provision" +} + +ACTIVATION_TIMEOUT=20000 # ms + +interface_check() +{ + ip link set $h1 up + ip link set $h2 up + ifaces_upped=true + setup_wait +} + +lc_devices_info_check() +{ + local lc=$1 + local expected_device_count=$2 + local device_count + local device + local running_device_fw + + device_count=$(devlink lc info $DEVLINK_DEV lc $lc -j | \ + jq -e -r ".[][][].devices |length") + check_err $? "Failed to get linecard $lc device count" + for (( device=0; device<device_count; device++ )) + do + running_device_fw=$(devlink lc -v info $DEVLINK_DEV lc $lc -j | \ + jq -e -r ".[][][].devices[$device].versions.running.fw") + check_err $? "Failed to get linecard $lc device $device running fw version" + log_info "Linecard $lc device $device running.fw: \"$running_device_fw\"" + done +} + +activation_16x100G_test() +{ + RET=0 + local lc + local type + local state + + lc=$LC_SLOT + type=$LC_16X100G_TYPE + + unprovision_one $lc + provision_one $lc $type + state=$(lc_wait_until_state_becomes $lc "active" \ + $ACTIVATION_TIMEOUT) + check_err $? "Failed to get linecard $lc activated (timeout)" + + lc_devices_info_check $lc $LC_16X100G_DEVICE_COUNT + + interface_check + + log_test "Activation 16x100G" +} + +setup_prepare() +{ + local lc_num=$(devlink lc show -j | jq -e -r ".[][\"$DEVLINK_DEV\"] |length") + if [[ $? -ne 0 ]] || [[ $lc_num -eq 0 ]]; then + echo "SKIP: No linecard support found" + exit $ksft_skip + fi + + if [ -z "$LC_SLOT" ]; then + echo "SKIP: \"LC_SLOT\" variable not provided" + exit $ksft_skip + fi + + # Interfaces are not present during the script start, + # that's why we define NUM_NETIFS here so dummy + # implicit veth pairs are not created. + NUM_NETIFS=2 + h1=${NETIFS[p1]} + h2=${NETIFS[p2]} + ifaces_upped=false +} + +cleanup() +{ + if [ "$ifaces_upped" = true ] ; then + ip link set $h1 down + ip link set $h2 down + fi +} + +trap cleanup EXIT + +setup_prepare + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_burst.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_burst.sh new file mode 100755 index 000000000000..82a47b903f92 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_burst.sh @@ -0,0 +1,480 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test sends 1Gbps of traffic through the switch, into which it then +# injects a burst of traffic and tests that there are no drops. +# +# The 1Gbps stream is created by sending >1Gbps stream from H1. This stream +# ingresses through $swp1, and is forwarded thtrough a small temporary pool to a +# 1Gbps $swp3. +# +# Thus a 1Gbps stream enters $swp4, and is forwarded through a large pool to +# $swp2, and eventually to H2. Since $swp2 is a 1Gbps port as well, no backlog +# is generated. +# +# At this point, a burst of traffic is forwarded from H3. This enters $swp5, is +# forwarded to $swp2, which is fully subscribed by the 1Gbps stream. The +# expectation is that the burst is wholly absorbed by the large pool and no +# drops are caused. After the burst, there should be a backlog that is hard to +# get rid of, because $sw2 is fully subscribed. But because each individual +# packet is scheduled soon after getting enqueued, SLL and HLL do not impact the +# test. +# +# +-----------------------+ +-----------------------+ +# | H1 | | H3 | +# | + $h1.111 | | $h3.111 + | +# | | 192.0.2.33/28 | | 192.0.2.35/28 | | +# | | | | | | +# | + $h1 | | $h3 + | +# +---|-------------------+ +--------------------+ +------------------|----+ +# | | | | +# +---|----------------------|--------------------|----------------------|----+ +# | + $swp1 $swp3 + + $swp4 $swp5 | | +# | | iPOOL1 iPOOL0 | | iPOOL2 iPOOL2 | | +# | | ePOOL4 ePOOL5 | | ePOOL4 ePOOL4 | | +# | | 1Gbps | | 1Gbps | | +# | +-|----------------------|-+ +-|----------------------|-+ | +# | | + $swp1.111 $swp3.111 + | | + $swp4.111 $swp5.111 + | | +# | | | | | | +# | | BR1 | | BR2 | | +# | | | | | | +# | | | | + $swp2.111 | | +# | +--------------------------+ +---------|----------------+ | +# | | | +# | iPOOL0: 500KB dynamic | | +# | iPOOL1: 500KB dynamic | | +# | iPOOL2: 10MB dynamic + $swp2 | +# | ePOOL4: 500KB dynamic | iPOOL0 | +# | ePOOL5: 500KB dnamic | ePOOL6 | +# | ePOOL6: 10MB dynamic | 1Gbps | +# +-------------------------------------------------------|-------------------+ +# | +# +---|-------------------+ +# | + $h2 H2 | +# | | 1Gbps | +# | | | +# | + $h2.111 | +# | 192.0.2.34/28 | +# +-----------------------+ +# +# iPOOL0+ePOOL4 are helper pools for control traffic etc. +# iPOOL1+ePOOL5 are helper pools for modeling the 1Gbps stream +# iPOOL2+ePOOL6 are pools for soaking the burst traffic + +ALL_TESTS=" + ping_ipv4 + test_8K + test_800 +" + +lib_dir=$(dirname $0)/../../../net/forwarding + +NUM_NETIFS=8 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh +source qos_lib.sh +source mlxsw_lib.sh + +_1KB=1000 +_500KB=$((500 * _1KB)) +_1MB=$((1000 * _1KB)) + +# The failure mode that this specifically tests is exhaustion of descriptor +# buffer. The point is to produce a burst that shared buffer should be able +# to accommodate, but produce it with small enough packets that the machine +# runs out of the descriptor buffer space with default configuration. +# +# The machine therefore needs to be able to produce line rate with as small +# packets as possible, and at the same time have large enough buffer that +# when filled with these small packets, it runs out of descriptors. +# Spectrum-2 is very close, but cannot perform this test. Therefore use +# Spectrum-3 as a minimum, and permit larger burst size, and therefore +# larger packets, to reduce spurious failures. +# +mlxsw_only_on_spectrum 3+ || exit + +BURST_SIZE=$((50000000)) +POOL_SIZE=$BURST_SIZE + +h1_create() +{ + simple_if_init $h1 + mtu_set $h1 10000 + + vlan_create $h1 111 v$h1 192.0.2.33/28 + ip link set dev $h1.111 type vlan egress-qos-map 0:1 +} + +h1_destroy() +{ + vlan_destroy $h1 111 + + mtu_restore $h1 + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + mtu_set $h2 10000 + ethtool -s $h2 speed 1000 autoneg off + + vlan_create $h2 111 v$h2 192.0.2.34/28 +} + +h2_destroy() +{ + vlan_destroy $h2 111 + + ethtool -s $h2 autoneg on + mtu_restore $h2 + simple_if_fini $h2 +} + +h3_create() +{ + simple_if_init $h3 + mtu_set $h3 10000 + + vlan_create $h3 111 v$h3 192.0.2.35/28 +} + +h3_destroy() +{ + vlan_destroy $h3 111 + + mtu_restore $h3 + simple_if_fini $h3 +} + +switch_create() +{ + # pools + # ----- + + devlink_pool_size_thtype_save 0 + devlink_pool_size_thtype_save 4 + devlink_pool_size_thtype_save 1 + devlink_pool_size_thtype_save 5 + devlink_pool_size_thtype_save 2 + devlink_pool_size_thtype_save 6 + + devlink_port_pool_th_save $swp1 1 + devlink_port_pool_th_save $swp2 6 + devlink_port_pool_th_save $swp3 5 + devlink_port_pool_th_save $swp4 2 + devlink_port_pool_th_save $swp5 2 + + devlink_tc_bind_pool_th_save $swp1 1 ingress + devlink_tc_bind_pool_th_save $swp2 1 egress + devlink_tc_bind_pool_th_save $swp3 1 egress + devlink_tc_bind_pool_th_save $swp4 1 ingress + devlink_tc_bind_pool_th_save $swp5 1 ingress + + # Control traffic pools. Just reduce the size. + devlink_pool_size_thtype_set 0 dynamic $_500KB + devlink_pool_size_thtype_set 4 dynamic $_500KB + + # Stream modeling pools. + devlink_pool_size_thtype_set 1 dynamic $_500KB + devlink_pool_size_thtype_set 5 dynamic $_500KB + + # Burst soak pools. + devlink_pool_size_thtype_set 2 static $POOL_SIZE + devlink_pool_size_thtype_set 6 static $POOL_SIZE + + # $swp1 + # ----- + + ip link set dev $swp1 up + mtu_set $swp1 10000 + vlan_create $swp1 111 + ip link set dev $swp1.111 type vlan ingress-qos-map 0:0 1:1 + + devlink_port_pool_th_set $swp1 1 16 + devlink_tc_bind_pool_th_set $swp1 1 ingress 1 16 + + # Configure qdisc... + tc qdisc replace dev $swp1 root handle 1: \ + ets bands 8 strict 8 priomap 7 6 + # ... so that we can assign prio1 traffic to PG1. + dcb buffer set dev $swp1 prio-buffer all:0 1:1 + + # $swp2 + # ----- + + ip link set dev $swp2 up + mtu_set $swp2 10000 + ethtool -s $swp2 speed 1000 autoneg off + vlan_create $swp2 111 + ip link set dev $swp2.111 type vlan egress-qos-map 0:0 1:1 + + devlink_port_pool_th_set $swp2 6 $POOL_SIZE + devlink_tc_bind_pool_th_set $swp2 1 egress 6 $POOL_SIZE + + # prio 0->TC0 (band 7), 1->TC1 (band 6) + tc qdisc replace dev $swp2 root handle 1: \ + ets bands 8 strict 8 priomap 7 6 + + # $swp3 + # ----- + + ip link set dev $swp3 up + mtu_set $swp3 10000 + ethtool -s $swp3 speed 1000 autoneg off + vlan_create $swp3 111 + ip link set dev $swp3.111 type vlan egress-qos-map 0:0 1:1 + + devlink_port_pool_th_set $swp3 5 16 + devlink_tc_bind_pool_th_set $swp3 1 egress 5 16 + + # prio 0->TC0 (band 7), 1->TC1 (band 6) + tc qdisc replace dev $swp3 root handle 1: \ + ets bands 8 strict 8 priomap 7 6 + + # $swp4 + # ----- + + ip link set dev $swp4 up + mtu_set $swp4 10000 + ethtool -s $swp4 speed 1000 autoneg off + vlan_create $swp4 111 + ip link set dev $swp4.111 type vlan ingress-qos-map 0:0 1:1 + + devlink_port_pool_th_set $swp4 2 $POOL_SIZE + devlink_tc_bind_pool_th_set $swp4 1 ingress 2 $POOL_SIZE + + # Configure qdisc... + tc qdisc replace dev $swp4 root handle 1: \ + ets bands 8 strict 8 priomap 7 6 + # ... so that we can assign prio1 traffic to PG1. + dcb buffer set dev $swp4 prio-buffer all:0 1:1 + + # $swp5 + # ----- + + ip link set dev $swp5 up + mtu_set $swp5 10000 + vlan_create $swp5 111 + ip link set dev $swp5.111 type vlan ingress-qos-map 0:0 1:1 + + devlink_port_pool_th_set $swp5 2 $POOL_SIZE + devlink_tc_bind_pool_th_set $swp5 1 ingress 2 $POOL_SIZE + + # Configure qdisc... + tc qdisc replace dev $swp5 root handle 1: \ + ets bands 8 strict 8 priomap 7 6 + # ... so that we can assign prio1 traffic to PG1. + dcb buffer set dev $swp5 prio-buffer all:0 1:1 + + # bridges + # ------- + + ip link add name br1 type bridge vlan_filtering 0 + ip link set dev $swp1.111 master br1 + ip link set dev $swp3.111 master br1 + ip link set dev br1 up + + ip link add name br2 type bridge vlan_filtering 0 + ip link set dev $swp2.111 master br2 + ip link set dev $swp4.111 master br2 + ip link set dev $swp5.111 master br2 + ip link set dev br2 up +} + +switch_destroy() +{ + # Do this first so that we can reset the limits to values that are only + # valid for the original static / dynamic setting. + devlink_pool_size_thtype_restore 6 + devlink_pool_size_thtype_restore 5 + devlink_pool_size_thtype_restore 4 + devlink_pool_size_thtype_restore 2 + devlink_pool_size_thtype_restore 1 + devlink_pool_size_thtype_restore 0 + + # bridges + # ------- + + ip link set dev br2 down + ip link set dev $swp5.111 nomaster + ip link set dev $swp4.111 nomaster + ip link set dev $swp2.111 nomaster + ip link del dev br2 + + ip link set dev br1 down + ip link set dev $swp3.111 nomaster + ip link set dev $swp1.111 nomaster + ip link del dev br1 + + # $swp5 + # ----- + + dcb buffer set dev $swp5 prio-buffer all:0 + tc qdisc del dev $swp5 root + + devlink_tc_bind_pool_th_restore $swp5 1 ingress + devlink_port_pool_th_restore $swp5 2 + + vlan_destroy $swp5 111 + mtu_restore $swp5 + ip link set dev $swp5 down + + # $swp4 + # ----- + + dcb buffer set dev $swp4 prio-buffer all:0 + tc qdisc del dev $swp4 root + + devlink_tc_bind_pool_th_restore $swp4 1 ingress + devlink_port_pool_th_restore $swp4 2 + + vlan_destroy $swp4 111 + ethtool -s $swp4 autoneg on + mtu_restore $swp4 + ip link set dev $swp4 down + + # $swp3 + # ----- + + tc qdisc del dev $swp3 root + + devlink_tc_bind_pool_th_restore $swp3 1 egress + devlink_port_pool_th_restore $swp3 5 + + vlan_destroy $swp3 111 + ethtool -s $swp3 autoneg on + mtu_restore $swp3 + ip link set dev $swp3 down + + # $swp2 + # ----- + + tc qdisc del dev $swp2 root + + devlink_tc_bind_pool_th_restore $swp2 1 egress + devlink_port_pool_th_restore $swp2 6 + + vlan_destroy $swp2 111 + ethtool -s $swp2 autoneg on + mtu_restore $swp2 + ip link set dev $swp2 down + + # $swp1 + # ----- + + dcb buffer set dev $swp1 prio-buffer all:0 + tc qdisc del dev $swp1 root + + devlink_tc_bind_pool_th_restore $swp1 1 ingress + devlink_port_pool_th_restore $swp1 1 + + vlan_destroy $swp1 111 + mtu_restore $swp1 + ip link set dev $swp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + swp4=${NETIFS[p6]} + + swp5=${NETIFS[p7]} + h3=${NETIFS[p8]} + + h2mac=$(mac_get $h2) + + vrf_prepare + + h1_create + h2_create + h3_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h3_destroy + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.34 " h1->h2" + ping_test $h3 192.0.2.34 " h3->h2" +} + +__test_qos_burst() +{ + local pktsize=$1; shift + + RET=0 + + start_traffic_pktsize $pktsize $h1.111 192.0.2.33 192.0.2.34 $h2mac + sleep 1 + + local q0=$(ethtool_stats_get $swp2 tc_transmit_queue_tc_1) + ((q0 == 0)) + check_err $? "Transmit queue non-zero?" + + local d0=$(ethtool_stats_get $swp2 tc_no_buffer_discard_uc_tc_1) + + local cell_size=$(devlink_cell_size_get) + local cells=$((BURST_SIZE / cell_size)) + # Each packet is $pktsize of payload + headers. + local pkt_cells=$(((pktsize + 50 + cell_size - 1) / cell_size)) + # How many packets can we admit: + local pkts=$((cells / pkt_cells)) + + $MZ $h3 -p $pktsize -Q 1:111 -A 192.0.2.35 -B 192.0.2.34 \ + -a own -b $h2mac -c $pkts -t udp -q + sleep 1 + + local d1=$(ethtool_stats_get $swp2 tc_no_buffer_discard_uc_tc_1) + ((d1 == d0)) + check_err $? "Drops seen on egress port: $d0 -> $d1 ($((d1 - d0)))" + + # Check that the queue is somewhat close to the burst size This + # makes sure that the lack of drops above was not due to port + # undersubscribtion. + local q0=$(ethtool_stats_get $swp2 tc_transmit_queue_tc_1) + local qe=$((90 * BURST_SIZE / 100)) + ((q0 > qe)) + check_err $? "Queue size expected >$qe, got $q0" + + stop_traffic + sleep 2 + + log_test "Burst: absorb $pkts ${pktsize}-B packets" +} + +test_8K() +{ + __test_qos_burst 8000 +} + +test_800() +{ + __test_qos_burst 800 +} + +bail_on_lldpad + +trap cleanup EXIT +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/vxlan_flooding_ipv6.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/vxlan_flooding_ipv6.sh index 429f7ee735cf..fd23c80eba31 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/vxlan_flooding_ipv6.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/vxlan_flooding_ipv6.sh @@ -159,6 +159,17 @@ flooding_remotes_add() local lsb local i + # Prevent unwanted packets from entering the bridge and interfering + # with the test. + tc qdisc add dev br0 clsact + tc filter add dev br0 egress protocol all pref 1 handle 1 \ + matchall skip_hw action drop + tc qdisc add dev $h1 clsact + tc filter add dev $h1 egress protocol all pref 1 handle 1 \ + flower skip_hw dst_mac de:ad:be:ef:13:37 action pass + tc filter add dev $h1 egress protocol all pref 2 handle 2 \ + matchall skip_hw action drop + for i in $(eval echo {1..$num_remotes}); do lsb=$((i + 1)) @@ -195,6 +206,12 @@ flooding_filters_del() done tc qdisc del dev $rp2 clsact + + tc filter del dev $h1 egress protocol all pref 2 handle 2 matchall + tc filter del dev $h1 egress protocol all pref 1 handle 1 flower + tc qdisc del dev $h1 clsact + tc filter del dev br0 egress protocol all pref 1 handle 1 matchall + tc qdisc del dev br0 clsact } flooding_check_packets() diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh index fedcb7b35af9..af5ea50ed5c0 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh @@ -172,6 +172,17 @@ flooding_filters_add() local lsb local i + # Prevent unwanted packets from entering the bridge and interfering + # with the test. + tc qdisc add dev br0 clsact + tc filter add dev br0 egress protocol all pref 1 handle 1 \ + matchall skip_hw action drop + tc qdisc add dev $h1 clsact + tc filter add dev $h1 egress protocol all pref 1 handle 1 \ + flower skip_hw dst_mac de:ad:be:ef:13:37 action pass + tc filter add dev $h1 egress protocol all pref 2 handle 2 \ + matchall skip_hw action drop + tc qdisc add dev $rp2 clsact for i in $(eval echo {1..$num_remotes}); do @@ -194,6 +205,12 @@ flooding_filters_del() done tc qdisc del dev $rp2 clsact + + tc filter del dev $h1 egress protocol all pref 2 handle 2 matchall + tc filter del dev $h1 egress protocol all pref 1 handle 1 flower + tc qdisc del dev $h1 clsact + tc filter del dev br0 egress protocol all pref 1 handle 1 matchall + tc qdisc del dev br0 clsact } flooding_check_packets() diff --git a/tools/testing/selftests/drivers/net/ocelot/basic_qos.sh b/tools/testing/selftests/drivers/net/ocelot/basic_qos.sh new file mode 100755 index 000000000000..c51c83421c61 --- /dev/null +++ b/tools/testing/selftests/drivers/net/ocelot/basic_qos.sh @@ -0,0 +1,253 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright 2022 NXP + +# The script is mostly generic, with the exception of the +# ethtool per-TC counter names ("rx_green_prio_${tc}") + +WAIT_TIME=1 +NUM_NETIFS=4 +STABLE_MAC_ADDRS=yes +NETIF_CREATE=no +lib_dir=$(dirname $0)/../../../net/forwarding +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh + +require_command dcb + +h1=${NETIFS[p1]} +swp1=${NETIFS[p2]} +swp2=${NETIFS[p3]} +h2=${NETIFS[p4]} + +H1_IPV4="192.0.2.1" +H2_IPV4="192.0.2.2" +H1_IPV6="2001:db8:1::1" +H2_IPV6="2001:db8:1::2" + +h1_create() +{ + simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64 +} + +h1_destroy() +{ + simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64 +} + +h2_create() +{ + simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64 +} + +h2_destroy() +{ + simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64 +} + +h1_vlan_create() +{ + local vid=$1 + + vlan_create $h1 $vid + simple_if_init $h1.$vid $H1_IPV4/24 $H1_IPV6/64 + ip link set $h1.$vid type vlan \ + egress-qos-map 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 \ + ingress-qos-map 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 +} + +h1_vlan_destroy() +{ + local vid=$1 + + simple_if_fini $h1.$vid $H1_IPV4/24 $H1_IPV6/64 + vlan_destroy $h1 $vid +} + +h2_vlan_create() +{ + local vid=$1 + + vlan_create $h2 $vid + simple_if_init $h2.$vid $H2_IPV4/24 $H2_IPV6/64 + ip link set $h2.$vid type vlan \ + egress-qos-map 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 \ + ingress-qos-map 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 +} + +h2_vlan_destroy() +{ + local vid=$1 + + simple_if_fini $h2.$vid $H2_IPV4/24 $H2_IPV6/64 + vlan_destroy $h2 $vid +} + +vlans_prepare() +{ + h1_vlan_create 100 + h2_vlan_create 100 + + tc qdisc add dev ${h1}.100 clsact + tc filter add dev ${h1}.100 egress protocol ipv4 \ + flower ip_proto icmp action skbedit priority 3 + tc filter add dev ${h1}.100 egress protocol ipv6 \ + flower ip_proto icmpv6 action skbedit priority 3 +} + +vlans_destroy() +{ + tc qdisc del dev ${h1}.100 clsact + + h1_vlan_destroy 100 + h2_vlan_destroy 100 +} + +switch_create() +{ + ip link set ${swp1} up + ip link set ${swp2} up + + # Ports should trust VLAN PCP even with vlan_filtering=0 + ip link add br0 type bridge + ip link set ${swp1} master br0 + ip link set ${swp2} master br0 + ip link set br0 up +} + +switch_destroy() +{ + ip link del br0 +} + +setup_prepare() +{ + vrf_prepare + + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + h2_destroy + h1_destroy + switch_destroy + + vrf_cleanup +} + +dscp_cs_to_tos() +{ + local dscp_cs=$1 + + # https://datatracker.ietf.org/doc/html/rfc2474 + # 4.2.2.1 The Class Selector Codepoints + echo $((${dscp_cs} << 5)) +} + +run_test() +{ + local test_name=$1; shift + local if_name=$1; shift + local tc=$1; shift + local tos=$1; shift + local counter_name="rx_green_prio_${tc}" + local ipv4_before + local ipv4_after + local ipv6_before + local ipv6_after + + ipv4_before=$(ethtool_stats_get ${swp1} "${counter_name}") + ping_do ${if_name} $H2_IPV4 "-Q ${tos}" + ipv4_after=$(ethtool_stats_get ${swp1} "${counter_name}") + + if [ $((${ipv4_after} - ${ipv4_before})) -lt ${PING_COUNT} ]; then + RET=1 + else + RET=0 + fi + log_test "IPv4 ${test_name}" + + ipv6_before=$(ethtool_stats_get ${swp1} "${counter_name}") + ping_do ${if_name} $H2_IPV6 "-Q ${tos}" + ipv6_after=$(ethtool_stats_get ${swp1} "${counter_name}") + + if [ $((${ipv6_after} - ${ipv6_before})) -lt ${PING_COUNT} ]; then + RET=1 + else + RET=0 + fi + log_test "IPv6 ${test_name}" +} + +port_default_prio_get() +{ + local if_name=$1 + local prio + + prio="$(dcb -j app show dev ${if_name} default-prio | \ + jq '.default_prio[]')" + if [ -z "${prio}" ]; then + prio=0 + fi + + echo ${prio} +} + +test_port_default() +{ + local orig=$(port_default_prio_get ${swp1}) + local dmac=$(mac_get ${h2}) + + dcb app replace dev ${swp1} default-prio 5 + + run_test "Port-default QoS classification" ${h1} 5 0 + + dcb app replace dev ${swp1} default-prio ${orig} +} + +test_vlan_pcp() +{ + vlans_prepare + + run_test "Trusted VLAN PCP QoS classification" ${h1}.100 3 0 + + vlans_destroy +} + +test_ip_dscp() +{ + local port_default=$(port_default_prio_get ${swp1}) + local tos=$(dscp_cs_to_tos 4) + + dcb app add dev ${swp1} dscp-prio CS4:4 + run_test "Trusted DSCP QoS classification" ${h1} 4 ${tos} + dcb app del dev ${swp1} dscp-prio CS4:4 + + vlans_prepare + run_test "Untrusted DSCP QoS classification follows VLAN PCP" \ + ${h1}.100 3 ${tos} + vlans_destroy + + run_test "Untrusted DSCP QoS classification follows port default" \ + ${h1} ${port_default} ${tos} +} + +trap cleanup EXIT + +ALL_TESTS=" + test_port_default + test_vlan_pcp + test_ip_dscp +" + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/ocelot/psfp.sh b/tools/testing/selftests/drivers/net/ocelot/psfp.sh new file mode 100755 index 000000000000..5a5cee92c665 --- /dev/null +++ b/tools/testing/selftests/drivers/net/ocelot/psfp.sh @@ -0,0 +1,327 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright 2021-2022 NXP + +# Note: On LS1028A, in lack of enough user ports, this setup requires patching +# the device tree to use the second CPU port as a user port + +WAIT_TIME=1 +NUM_NETIFS=4 +STABLE_MAC_ADDRS=yes +NETIF_CREATE=no +lib_dir=$(dirname $0)/../../../net/forwarding +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh +source $lib_dir/tsn_lib.sh + +UDS_ADDRESS_H1="/var/run/ptp4l_h1" +UDS_ADDRESS_SWP1="/var/run/ptp4l_swp1" + +# Tunables +NUM_PKTS=1000 +STREAM_VID=100 +STREAM_PRIO=6 +# Use a conservative cycle of 10 ms to allow the test to still pass when the +# kernel has some extra overhead like lockdep etc +CYCLE_TIME_NS=10000000 +# Create two Gate Control List entries, one OPEN and one CLOSE, of equal +# durations +GATE_DURATION_NS=$((${CYCLE_TIME_NS} / 2)) +# Give 2/3 of the cycle time to user space and 1/3 to the kernel +FUDGE_FACTOR=$((${CYCLE_TIME_NS} / 3)) +# Shift the isochron base time by half the gate time, so that packets are +# always received by swp1 close to the middle of the time slot, to minimize +# inaccuracies due to network sync +SHIFT_TIME_NS=$((${GATE_DURATION_NS} / 2)) + +h1=${NETIFS[p1]} +swp1=${NETIFS[p2]} +swp2=${NETIFS[p3]} +h2=${NETIFS[p4]} + +H1_IPV4="192.0.2.1" +H2_IPV4="192.0.2.2" +H1_IPV6="2001:db8:1::1" +H2_IPV6="2001:db8:1::2" + +# Chain number exported by the ocelot driver for +# Per-Stream Filtering and Policing filters +PSFP() +{ + echo 30000 +} + +psfp_chain_create() +{ + local if_name=$1 + + tc qdisc add dev $if_name clsact + + tc filter add dev $if_name ingress chain 0 pref 49152 flower \ + skip_sw action goto chain $(PSFP) +} + +psfp_chain_destroy() +{ + local if_name=$1 + + tc qdisc del dev $if_name clsact +} + +psfp_filter_check() +{ + local expected=$1 + local packets="" + local drops="" + local stats="" + + stats=$(tc -j -s filter show dev ${swp1} ingress chain $(PSFP) pref 1) + packets=$(echo ${stats} | jq ".[1].options.actions[].stats.packets") + drops=$(echo ${stats} | jq ".[1].options.actions[].stats.drops") + + if ! [ "${packets}" = "${expected}" ]; then + printf "Expected filter to match on %d packets but matched on %d instead\n" \ + "${expected}" "${packets}" + fi + + echo "Hardware filter reports ${drops} drops" +} + +h1_create() +{ + simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64 +} + +h1_destroy() +{ + simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64 +} + +h2_create() +{ + simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64 +} + +h2_destroy() +{ + simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64 +} + +switch_create() +{ + local h2_mac_addr=$(mac_get $h2) + + ip link set ${swp1} up + ip link set ${swp2} up + + ip link add br0 type bridge vlan_filtering 1 + ip link set ${swp1} master br0 + ip link set ${swp2} master br0 + ip link set br0 up + + bridge vlan add dev ${swp2} vid ${STREAM_VID} + bridge vlan add dev ${swp1} vid ${STREAM_VID} + # PSFP on Ocelot requires the filter to also be added to the bridge + # FDB, and not be removed + bridge fdb add dev ${swp2} \ + ${h2_mac_addr} vlan ${STREAM_VID} static master + + psfp_chain_create ${swp1} + + tc filter add dev ${swp1} ingress chain $(PSFP) pref 1 \ + protocol 802.1Q flower skip_sw \ + dst_mac ${h2_mac_addr} vlan_id ${STREAM_VID} \ + action gate base-time 0.000000000 \ + sched-entry OPEN ${GATE_DURATION_NS} -1 -1 \ + sched-entry CLOSE ${GATE_DURATION_NS} -1 -1 +} + +switch_destroy() +{ + psfp_chain_destroy ${swp1} + ip link del br0 +} + +txtime_setup() +{ + local if_name=$1 + + tc qdisc add dev ${if_name} clsact + # Classify PTP on TC 7 and isochron on TC 6 + tc filter add dev ${if_name} egress protocol 0x88f7 \ + flower action skbedit priority 7 + tc filter add dev ${if_name} egress protocol 802.1Q \ + flower vlan_ethtype 0xdead action skbedit priority 6 + tc qdisc add dev ${if_name} handle 100: parent root mqprio num_tc 8 \ + queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \ + map 0 1 2 3 4 5 6 7 \ + hw 1 + # Set up TC 6 for SO_TXTIME. tc-mqprio queues count from 1. + tc qdisc replace dev ${if_name} parent 100:$((${STREAM_PRIO} + 1)) etf \ + clockid CLOCK_TAI offload delta ${FUDGE_FACTOR} +} + +txtime_cleanup() +{ + local if_name=$1 + + tc qdisc del dev ${if_name} root + tc qdisc del dev ${if_name} clsact +} + +setup_prepare() +{ + vrf_prepare + + h1_create + h2_create + switch_create + + txtime_setup ${h1} + + # Set up swp1 as a master PHC for h1, synchronized to the local + # CLOCK_REALTIME. + phc2sys_start ${swp1} ${UDS_ADDRESS_SWP1} + + # Assumption true for LS1028A: h1 and h2 use the same PHC. So by + # synchronizing h1 to swp1 via PTP, h2 is also implicitly synchronized + # to swp1 (and both to CLOCK_REALTIME). + ptp4l_start ${h1} true ${UDS_ADDRESS_H1} + ptp4l_start ${swp1} false ${UDS_ADDRESS_SWP1} + + # Make sure there are no filter matches at the beginning of the test + psfp_filter_check 0 +} + +cleanup() +{ + pre_cleanup + + ptp4l_stop ${swp1} + ptp4l_stop ${h1} + phc2sys_stop + isochron_recv_stop + + txtime_cleanup ${h1} + + h2_destroy + h1_destroy + switch_destroy + + vrf_cleanup +} + +debug_incorrectly_dropped_packets() +{ + local isochron_dat=$1 + local dropped_seqids + local seqid + + echo "Packets incorrectly dropped:" + + dropped_seqids=$(isochron report \ + --input-file "${isochron_dat}" \ + --printf-format "%u RX hw %T\n" \ + --printf-args "qR" | \ + grep 'RX hw 0.000000000' | \ + awk '{print $1}') + + for seqid in ${dropped_seqids}; do + isochron report \ + --input-file "${isochron_dat}" \ + --start ${seqid} --stop ${seqid} \ + --printf-format "seqid %u scheduled for %T, HW TX timestamp %T\n" \ + --printf-args "qST" + done +} + +debug_incorrectly_received_packets() +{ + local isochron_dat=$1 + + echo "Packets incorrectly received:" + + isochron report \ + --input-file "${isochron_dat}" \ + --printf-format "seqid %u scheduled for %T, HW TX timestamp %T, HW RX timestamp %T\n" \ + --printf-args "qSTR" | + grep -v 'HW RX timestamp 0.000000000' +} + +run_test() +{ + local base_time=$1 + local expected=$2 + local test_name=$3 + local debug=$4 + local isochron_dat="$(mktemp)" + local extra_args="" + local received + + isochron_do \ + "${h1}" \ + "${h2}" \ + "${UDS_ADDRESS_H1}" \ + "" \ + "${base_time}" \ + "${CYCLE_TIME_NS}" \ + "${SHIFT_TIME_NS}" \ + "${NUM_PKTS}" \ + "${STREAM_VID}" \ + "${STREAM_PRIO}" \ + "" \ + "${isochron_dat}" + + # Count all received packets by looking at the non-zero RX timestamps + received=$(isochron report \ + --input-file "${isochron_dat}" \ + --printf-format "%u\n" --printf-args "R" | \ + grep -w -v '0' | wc -l) + + if [ "${received}" = "${expected}" ]; then + RET=0 + else + RET=1 + echo "Expected isochron to receive ${expected} packets but received ${received}" + fi + + log_test "${test_name}" + + if [ "$RET" = "1" ]; then + ${debug} "${isochron_dat}" + fi + + rm ${isochron_dat} 2> /dev/null +} + +test_gate_in_band() +{ + # Send packets in-band with the OPEN gate entry + run_test 0.000000000 ${NUM_PKTS} "In band" \ + debug_incorrectly_dropped_packets + + psfp_filter_check ${NUM_PKTS} +} + +test_gate_out_of_band() +{ + # Send packets in-band with the CLOSE gate entry + run_test 0.005000000 0 "Out of band" \ + debug_incorrectly_received_packets + + psfp_filter_check $((2 * ${NUM_PKTS})) +} + +trap cleanup EXIT + +ALL_TESTS=" + test_gate_in_band + test_gate_out_of_band +" + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh index eaf8a04a7ca5..7e684e27a682 100755 --- a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh +++ b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh @@ -215,15 +215,15 @@ test_vlan_pop() sleep 1 - tcpdump_stop + tcpdump_stop $eth2 - if tcpdump_show | grep -q "$eth3_mac > $eth2_mac, ethertype IPv4"; then + if tcpdump_show $eth2 | grep -q "$eth3_mac > $eth2_mac, ethertype IPv4"; then echo "OK" else echo "FAIL" fi - tcpdump_cleanup + tcpdump_cleanup $eth2 } test_vlan_push() @@ -236,15 +236,15 @@ test_vlan_push() sleep 1 - tcpdump_stop + tcpdump_stop $eth3.100 - if tcpdump_show | grep -q "$eth2_mac > $eth3_mac"; then + if tcpdump_show $eth3.100 | grep -q "$eth2_mac > $eth3_mac"; then echo "OK" else echo "FAIL" fi - tcpdump_cleanup + tcpdump_cleanup $eth3.100 } test_vlan_ingress_modify() @@ -267,15 +267,15 @@ test_vlan_ingress_modify() sleep 1 - tcpdump_stop + tcpdump_stop $eth2 - if tcpdump_show | grep -q "$eth3_mac > $eth2_mac, .* vlan 300"; then + if tcpdump_show $eth2 | grep -q "$eth3_mac > $eth2_mac, .* vlan 300"; then echo "OK" else echo "FAIL" fi - tcpdump_cleanup + tcpdump_cleanup $eth2 tc filter del dev $eth0 ingress chain $(IS1 2) pref 3 @@ -305,15 +305,15 @@ test_vlan_egress_modify() sleep 1 - tcpdump_stop + tcpdump_stop $eth2 - if tcpdump_show | grep -q "$eth3_mac > $eth2_mac, .* vlan 300"; then + if tcpdump_show $eth2 | grep -q "$eth3_mac > $eth2_mac, .* vlan 300"; then echo "OK" else echo "FAIL" fi - tcpdump_cleanup + tcpdump_cleanup $eth2 tc filter del dev $eth1 egress chain $(ES0) pref 3 tc qdisc del dev $eth1 clsact diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 11779405dc80..25f4d54067c0 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -64,6 +64,7 @@ #include <sys/types.h> #include <sys/wait.h> #include <unistd.h> +#include <setjmp.h> #include "kselftest.h" @@ -183,7 +184,10 @@ struct __test_metadata *_metadata, \ struct __fixture_variant_metadata *variant) \ { \ - test_name(_metadata); \ + _metadata->setup_completed = true; \ + if (setjmp(_metadata->env) == 0) \ + test_name(_metadata); \ + __test_check_assert(_metadata); \ } \ static struct __test_metadata _##test_name##_object = \ { .name = #test_name, \ @@ -287,7 +291,9 @@ #define FIXTURE_TEARDOWN(fixture_name) \ void fixture_name##_teardown( \ struct __test_metadata __attribute__((unused)) *_metadata, \ - FIXTURE_DATA(fixture_name) __attribute__((unused)) *self) + FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \ + const FIXTURE_VARIANT(fixture_name) \ + __attribute__((unused)) *variant) /** * FIXTURE_VARIANT() - Optionally called once per fixture @@ -302,9 +308,9 @@ * ... * }; * - * Defines type of constant parameters provided to FIXTURE_SETUP() and TEST_F() - * as *variant*. Variants allow the same tests to be run with different - * arguments. + * Defines type of constant parameters provided to FIXTURE_SETUP(), TEST_F() and + * FIXTURE_TEARDOWN as *variant*. Variants allow the same tests to be run with + * different arguments. */ #define FIXTURE_VARIANT(fixture_name) struct _fixture_variant_##fixture_name @@ -356,10 +362,7 @@ * Defines a test that depends on a fixture (e.g., is part of a test case). * Very similar to TEST() except that *self* is the setup instance of fixture's * datatype exposed for use by the implementation. - * - * Warning: use of ASSERT_* here will skip TEARDOWN. */ -/* TODO(wad) register fixtures on dedicated test lists. */ #define TEST_F(fixture_name, test_name) \ __TEST_F_IMPL(fixture_name, test_name, -1, TEST_TIMEOUT_DEFAULT) @@ -381,12 +384,17 @@ /* fixture data is alloced, setup, and torn down per call. */ \ FIXTURE_DATA(fixture_name) self; \ memset(&self, 0, sizeof(FIXTURE_DATA(fixture_name))); \ - fixture_name##_setup(_metadata, &self, variant->data); \ - /* Let setup failure terminate early. */ \ - if (!_metadata->passed) \ - return; \ - fixture_name##_##test_name(_metadata, &self, variant->data); \ - fixture_name##_teardown(_metadata, &self); \ + if (setjmp(_metadata->env) == 0) { \ + fixture_name##_setup(_metadata, &self, variant->data); \ + /* Let setup failure terminate early. */ \ + if (!_metadata->passed) \ + return; \ + _metadata->setup_completed = true; \ + fixture_name##_##test_name(_metadata, &self, variant->data); \ + } \ + if (_metadata->setup_completed) \ + fixture_name##_teardown(_metadata, &self, variant->data); \ + __test_check_assert(_metadata); \ } \ static struct __test_metadata \ _##fixture_name##_##test_name##_object = { \ @@ -683,7 +691,7 @@ */ #define OPTIONAL_HANDLER(_assert) \ for (; _metadata->trigger; _metadata->trigger = \ - __bail(_assert, _metadata->no_print, _metadata->step)) + __bail(_assert, _metadata)) #define __INC_STEP(_metadata) \ /* Keep "step" below 255 (which is used for "SKIP" reporting). */ \ @@ -830,6 +838,9 @@ struct __test_metadata { bool timed_out; /* did this test timeout instead of exiting? */ __u8 step; bool no_print; /* manual trigger when TH_LOG_STREAM is not available */ + bool aborted; /* stopped test due to failed ASSERT */ + bool setup_completed; /* did setup finish? */ + jmp_buf env; /* for exiting out of test early */ struct __test_results *results; struct __test_metadata *prev, *next; }; @@ -848,16 +859,26 @@ static inline void __register_test(struct __test_metadata *t) __LIST_APPEND(t->fixture->tests, t); } -static inline int __bail(int for_realz, bool no_print, __u8 step) +static inline int __bail(int for_realz, struct __test_metadata *t) { + /* if this is ASSERT, return immediately. */ if (for_realz) { - if (no_print) - _exit(step); - abort(); + t->aborted = true; + longjmp(t->env, 1); } + /* otherwise, end the for loop and continue. */ return 0; } +static inline void __test_check_assert(struct __test_metadata *t) +{ + if (t->aborted) { + if (t->no_print) + _exit(t->step); + abort(); + } +} + struct __test_metadata *__active_test; static void __timeout_handler(int sig, siginfo_t *info, void *ucontext) { diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index d1e8f5237469..0b0e4402bba6 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -3,6 +3,7 @@ /aarch64/debug-exceptions /aarch64/get-reg-list /aarch64/psci_cpu_on_test +/aarch64/vcpu_width_config /aarch64/vgic_init /aarch64/vgic_irq /s390x/memop @@ -33,6 +34,7 @@ /x86_64/state_test /x86_64/svm_vmcall_test /x86_64/svm_int_ctl_test +/x86_64/tsc_scaling_sync /x86_64/sync_regs_test /x86_64/tsc_msrs_test /x86_64/userspace_io_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 21c2dbd21a81..681b173aa87c 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -106,6 +106,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/arch_timer TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list TEST_GEN_PROGS_aarch64 += aarch64/psci_cpu_on_test +TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config TEST_GEN_PROGS_aarch64 += aarch64/vgic_init TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq TEST_GEN_PROGS_aarch64 += demand_paging_test diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c index b08d30bf71c5..3b940a101bc0 100644 --- a/tools/testing/selftests/kvm/aarch64/arch_timer.c +++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c @@ -362,11 +362,12 @@ static void test_init_timer_irq(struct kvm_vm *vm) pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq); } +static int gic_fd; + static struct kvm_vm *test_vm_create(void) { struct kvm_vm *vm; unsigned int i; - int ret; int nr_vcpus = test_args.nr_vcpus; vm = vm_create_default_with_vcpus(nr_vcpus, 0, 0, guest_code, NULL); @@ -383,8 +384,8 @@ static struct kvm_vm *test_vm_create(void) ucall_init(vm, NULL); test_init_timer_irq(vm); - ret = vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA); - if (ret < 0) { + gic_fd = vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA); + if (gic_fd < 0) { print_skip("Failed to create vgic-v3"); exit(KSFT_SKIP); } @@ -395,6 +396,12 @@ static struct kvm_vm *test_vm_create(void) return vm; } +static void test_vm_cleanup(struct kvm_vm *vm) +{ + close(gic_fd); + kvm_vm_free(vm); +} + static void test_print_help(char *name) { pr_info("Usage: %s [-h] [-n nr_vcpus] [-i iterations] [-p timer_period_ms]\n", @@ -478,7 +485,7 @@ int main(int argc, char *argv[]) vm = test_vm_create(); test_run(vm); - kvm_vm_free(vm); + test_vm_cleanup(vm); return 0; } diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c index f12147c43464..0b571f3fe64c 100644 --- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c @@ -503,8 +503,13 @@ static void run_test(struct vcpu_config *c) ++missing_regs; if (new_regs || missing_regs) { + n = 0; + for_each_reg_filtered(i) + ++n; + printf("%s: Number blessed registers: %5lld\n", config_name(c), blessed_n); - printf("%s: Number registers: %5lld\n", config_name(c), reg_list->n); + printf("%s: Number registers: %5lld (includes %lld filtered registers)\n", + config_name(c), reg_list->n, reg_list->n - n); } if (new_regs) { @@ -683,9 +688,10 @@ static __u64 base_regs[] = { KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[4]), KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpsr), KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpcr), - KVM_REG_ARM_FW_REG(0), - KVM_REG_ARM_FW_REG(1), - KVM_REG_ARM_FW_REG(2), + KVM_REG_ARM_FW_REG(0), /* KVM_REG_ARM_PSCI_VERSION */ + KVM_REG_ARM_FW_REG(1), /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1 */ + KVM_REG_ARM_FW_REG(2), /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2 */ + KVM_REG_ARM_FW_REG(3), /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3 */ ARM64_SYS_REG(3, 3, 14, 3, 1), /* CNTV_CTL_EL0 */ ARM64_SYS_REG(3, 3, 14, 3, 2), /* CNTV_CVAL_EL0 */ ARM64_SYS_REG(3, 3, 14, 0, 2), diff --git a/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c b/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c new file mode 100644 index 000000000000..6e9402679229 --- /dev/null +++ b/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * vcpu_width_config - Test KVM_ARM_VCPU_INIT() with KVM_ARM_VCPU_EL1_32BIT. + * + * Copyright (c) 2022 Google LLC. + * + * This is a test that ensures that non-mixed-width vCPUs (all 64bit vCPUs + * or all 32bit vcPUs) can be configured and mixed-width vCPUs cannot be + * configured. + */ + +#include "kvm_util.h" +#include "processor.h" +#include "test_util.h" + + +/* + * Add a vCPU, run KVM_ARM_VCPU_INIT with @init1, and then + * add another vCPU, and run KVM_ARM_VCPU_INIT with @init2. + */ +static int add_init_2vcpus(struct kvm_vcpu_init *init1, + struct kvm_vcpu_init *init2) +{ + struct kvm_vm *vm; + int ret; + + vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); + + vm_vcpu_add(vm, 0); + ret = _vcpu_ioctl(vm, 0, KVM_ARM_VCPU_INIT, init1); + if (ret) + goto free_exit; + + vm_vcpu_add(vm, 1); + ret = _vcpu_ioctl(vm, 1, KVM_ARM_VCPU_INIT, init2); + +free_exit: + kvm_vm_free(vm); + return ret; +} + +/* + * Add two vCPUs, then run KVM_ARM_VCPU_INIT for one vCPU with @init1, + * and run KVM_ARM_VCPU_INIT for another vCPU with @init2. + */ +static int add_2vcpus_init_2vcpus(struct kvm_vcpu_init *init1, + struct kvm_vcpu_init *init2) +{ + struct kvm_vm *vm; + int ret; + + vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); + + vm_vcpu_add(vm, 0); + vm_vcpu_add(vm, 1); + + ret = _vcpu_ioctl(vm, 0, KVM_ARM_VCPU_INIT, init1); + if (ret) + goto free_exit; + + ret = _vcpu_ioctl(vm, 1, KVM_ARM_VCPU_INIT, init2); + +free_exit: + kvm_vm_free(vm); + return ret; +} + +/* + * Tests that two 64bit vCPUs can be configured, two 32bit vCPUs can be + * configured, and two mixed-width vCPUs cannot be configured. + * Each of those three cases, configure vCPUs in two different orders. + * The one is running KVM_CREATE_VCPU for 2 vCPUs, and then running + * KVM_ARM_VCPU_INIT for them. + * The other is running KVM_CREATE_VCPU and KVM_ARM_VCPU_INIT for a vCPU, + * and then run those commands for another vCPU. + */ +int main(void) +{ + struct kvm_vcpu_init init1, init2; + struct kvm_vm *vm; + int ret; + + if (!kvm_check_cap(KVM_CAP_ARM_EL1_32BIT)) { + print_skip("KVM_CAP_ARM_EL1_32BIT is not supported"); + exit(KSFT_SKIP); + } + + /* Get the preferred target type and copy that to init2 for later use */ + vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); + vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init1); + kvm_vm_free(vm); + init2 = init1; + + /* Test with 64bit vCPUs */ + ret = add_init_2vcpus(&init1, &init1); + TEST_ASSERT(ret == 0, + "Configuring 64bit EL1 vCPUs failed unexpectedly"); + ret = add_2vcpus_init_2vcpus(&init1, &init1); + TEST_ASSERT(ret == 0, + "Configuring 64bit EL1 vCPUs failed unexpectedly"); + + /* Test with 32bit vCPUs */ + init1.features[0] = (1 << KVM_ARM_VCPU_EL1_32BIT); + ret = add_init_2vcpus(&init1, &init1); + TEST_ASSERT(ret == 0, + "Configuring 32bit EL1 vCPUs failed unexpectedly"); + ret = add_2vcpus_init_2vcpus(&init1, &init1); + TEST_ASSERT(ret == 0, + "Configuring 32bit EL1 vCPUs failed unexpectedly"); + + /* Test with mixed-width vCPUs */ + init1.features[0] = 0; + init2.features[0] = (1 << KVM_ARM_VCPU_EL1_32BIT); + ret = add_init_2vcpus(&init1, &init2); + TEST_ASSERT(ret != 0, + "Configuring mixed-width vCPUs worked unexpectedly"); + ret = add_2vcpus_init_2vcpus(&init1, &init2); + TEST_ASSERT(ret != 0, + "Configuring mixed-width vCPUs worked unexpectedly"); + + return 0; +} diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index c9d9e513ca04..7b47ae4f952e 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -18,11 +18,40 @@ #include "test_util.h" #include "perf_test_util.h" #include "guest_modes.h" + #ifdef __aarch64__ #include "aarch64/vgic.h" #define GICD_BASE_GPA 0x8000000ULL #define GICR_BASE_GPA 0x80A0000ULL + +static int gic_fd; + +static void arch_setup_vm(struct kvm_vm *vm, unsigned int nr_vcpus) +{ + /* + * The test can still run even if hardware does not support GICv3, as it + * is only an optimization to reduce guest exits. + */ + gic_fd = vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA); +} + +static void arch_cleanup_vm(struct kvm_vm *vm) +{ + if (gic_fd > 0) + close(gic_fd); +} + +#else /* __aarch64__ */ + +static void arch_setup_vm(struct kvm_vm *vm, unsigned int nr_vcpus) +{ +} + +static void arch_cleanup_vm(struct kvm_vm *vm) +{ +} + #endif /* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/ @@ -206,9 +235,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) vm_enable_cap(vm, &cap); } -#ifdef __aarch64__ - vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA); -#endif + arch_setup_vm(vm, nr_vcpus); /* Start the iterations */ iteration = 0; @@ -302,6 +329,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) } free_bitmaps(bitmaps, p->slots); + arch_cleanup_vm(vm); perf_test_destroy_vm(vm); } diff --git a/tools/testing/selftests/kvm/include/riscv/processor.h b/tools/testing/selftests/kvm/include/riscv/processor.h index dc284c6bdbc3..eca5c622efd2 100644 --- a/tools/testing/selftests/kvm/include/riscv/processor.h +++ b/tools/testing/selftests/kvm/include/riscv/processor.h @@ -101,7 +101,9 @@ static inline void set_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id, #define PGTBL_PTE_WRITE_SHIFT 2 #define PGTBL_PTE_READ_MASK 0x0000000000000002ULL #define PGTBL_PTE_READ_SHIFT 1 -#define PGTBL_PTE_PERM_MASK (PGTBL_PTE_EXECUTE_MASK | \ +#define PGTBL_PTE_PERM_MASK (PGTBL_PTE_ACCESSED_MASK | \ + PGTBL_PTE_DIRTY_MASK | \ + PGTBL_PTE_EXECUTE_MASK | \ PGTBL_PTE_WRITE_MASK | \ PGTBL_PTE_READ_MASK) #define PGTBL_PTE_VALID_MASK 0x0000000000000001ULL diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 37db341d4cc5..d0d51adec76e 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -60,6 +60,23 @@ /* CPUID.0x8000_0001.EDX */ #define CPUID_GBPAGES (1ul << 26) +/* Page table bitfield declarations */ +#define PTE_PRESENT_MASK BIT_ULL(0) +#define PTE_WRITABLE_MASK BIT_ULL(1) +#define PTE_USER_MASK BIT_ULL(2) +#define PTE_ACCESSED_MASK BIT_ULL(5) +#define PTE_DIRTY_MASK BIT_ULL(6) +#define PTE_LARGE_MASK BIT_ULL(7) +#define PTE_GLOBAL_MASK BIT_ULL(8) +#define PTE_NX_MASK BIT_ULL(63) + +#define PAGE_SHIFT 12 +#define PAGE_SIZE (1ULL << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE-1)) + +#define PHYSICAL_PAGE_MASK GENMASK_ULL(51, 12) +#define PTE_GET_PFN(pte) (((pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT) + /* General Registers in 64-Bit Mode */ struct gpr64_regs { u64 rax; diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c index ba1fdc3dcf4a..2c4a7563a4f8 100644 --- a/tools/testing/selftests/kvm/kvm_page_table_test.c +++ b/tools/testing/selftests/kvm/kvm_page_table_test.c @@ -278,7 +278,7 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg) else guest_test_phys_mem = p->phys_offset; #ifdef __s390x__ - alignment = max(0x100000, alignment); + alignment = max(0x100000UL, alignment); #endif guest_test_phys_mem = align_down(guest_test_phys_mem, alignment); diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c index d377f2603d98..3961487a4870 100644 --- a/tools/testing/selftests/kvm/lib/riscv/processor.c +++ b/tools/testing/selftests/kvm/lib/riscv/processor.c @@ -268,7 +268,7 @@ void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent) core.regs.t3, core.regs.t4, core.regs.t5, core.regs.t6); } -static void guest_hang(void) +static void __aligned(16) guest_hang(void) { while (1) ; diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 9f000dfb5594..33ea5e9955d9 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -19,38 +19,6 @@ vm_vaddr_t exception_handlers; -/* Virtual translation table structure declarations */ -struct pageUpperEntry { - uint64_t present:1; - uint64_t writable:1; - uint64_t user:1; - uint64_t write_through:1; - uint64_t cache_disable:1; - uint64_t accessed:1; - uint64_t ignored_06:1; - uint64_t page_size:1; - uint64_t ignored_11_08:4; - uint64_t pfn:40; - uint64_t ignored_62_52:11; - uint64_t execute_disable:1; -}; - -struct pageTableEntry { - uint64_t present:1; - uint64_t writable:1; - uint64_t user:1; - uint64_t write_through:1; - uint64_t cache_disable:1; - uint64_t accessed:1; - uint64_t dirty:1; - uint64_t reserved_07:1; - uint64_t global:1; - uint64_t ignored_11_09:3; - uint64_t pfn:40; - uint64_t ignored_62_52:11; - uint64_t execute_disable:1; -}; - void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent) { @@ -195,23 +163,21 @@ static void *virt_get_pte(struct kvm_vm *vm, uint64_t pt_pfn, uint64_t vaddr, return &page_table[index]; } -static struct pageUpperEntry *virt_create_upper_pte(struct kvm_vm *vm, - uint64_t pt_pfn, - uint64_t vaddr, - uint64_t paddr, - int level, - enum x86_page_size page_size) +static uint64_t *virt_create_upper_pte(struct kvm_vm *vm, + uint64_t pt_pfn, + uint64_t vaddr, + uint64_t paddr, + int level, + enum x86_page_size page_size) { - struct pageUpperEntry *pte = virt_get_pte(vm, pt_pfn, vaddr, level); - - if (!pte->present) { - pte->writable = true; - pte->present = true; - pte->page_size = (level == page_size); - if (pte->page_size) - pte->pfn = paddr >> vm->page_shift; + uint64_t *pte = virt_get_pte(vm, pt_pfn, vaddr, level); + + if (!(*pte & PTE_PRESENT_MASK)) { + *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK; + if (level == page_size) + *pte |= PTE_LARGE_MASK | (paddr & PHYSICAL_PAGE_MASK); else - pte->pfn = vm_alloc_page_table(vm) >> vm->page_shift; + *pte |= vm_alloc_page_table(vm) & PHYSICAL_PAGE_MASK; } else { /* * Entry already present. Assert that the caller doesn't want @@ -221,7 +187,7 @@ static struct pageUpperEntry *virt_create_upper_pte(struct kvm_vm *vm, TEST_ASSERT(level != page_size, "Cannot create hugepage at level: %u, vaddr: 0x%lx\n", page_size, vaddr); - TEST_ASSERT(!pte->page_size, + TEST_ASSERT(!(*pte & PTE_LARGE_MASK), "Cannot create page table at level: %u, vaddr: 0x%lx\n", level, vaddr); } @@ -232,8 +198,8 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, enum x86_page_size page_size) { const uint64_t pg_size = 1ull << ((page_size * 9) + 12); - struct pageUpperEntry *pml4e, *pdpe, *pde; - struct pageTableEntry *pte; + uint64_t *pml4e, *pdpe, *pde; + uint64_t *pte; TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Unknown or unsupported guest mode, mode: 0x%x", vm->mode); @@ -257,24 +223,22 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, */ pml4e = virt_create_upper_pte(vm, vm->pgd >> vm->page_shift, vaddr, paddr, 3, page_size); - if (pml4e->page_size) + if (*pml4e & PTE_LARGE_MASK) return; - pdpe = virt_create_upper_pte(vm, pml4e->pfn, vaddr, paddr, 2, page_size); - if (pdpe->page_size) + pdpe = virt_create_upper_pte(vm, PTE_GET_PFN(*pml4e), vaddr, paddr, 2, page_size); + if (*pdpe & PTE_LARGE_MASK) return; - pde = virt_create_upper_pte(vm, pdpe->pfn, vaddr, paddr, 1, page_size); - if (pde->page_size) + pde = virt_create_upper_pte(vm, PTE_GET_PFN(*pdpe), vaddr, paddr, 1, page_size); + if (*pde & PTE_LARGE_MASK) return; /* Fill in page table entry. */ - pte = virt_get_pte(vm, pde->pfn, vaddr, 0); - TEST_ASSERT(!pte->present, + pte = virt_get_pte(vm, PTE_GET_PFN(*pde), vaddr, 0); + TEST_ASSERT(!(*pte & PTE_PRESENT_MASK), "PTE already present for 4k page at vaddr: 0x%lx\n", vaddr); - pte->pfn = paddr >> vm->page_shift; - pte->writable = true; - pte->present = 1; + *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK); } void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) @@ -282,22 +246,22 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) __virt_pg_map(vm, vaddr, paddr, X86_PAGE_SIZE_4K); } -static struct pageTableEntry *_vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid, +static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr) { uint16_t index[4]; - struct pageUpperEntry *pml4e, *pdpe, *pde; - struct pageTableEntry *pte; + uint64_t *pml4e, *pdpe, *pde; + uint64_t *pte; struct kvm_cpuid_entry2 *entry; struct kvm_sregs sregs; int max_phy_addr; - /* Set the bottom 52 bits. */ - uint64_t rsvd_mask = 0x000fffffffffffff; + uint64_t rsvd_mask = 0; entry = kvm_get_supported_cpuid_index(0x80000008, 0); max_phy_addr = entry->eax & 0x000000ff; - /* Clear the bottom bits of the reserved mask. */ - rsvd_mask = (rsvd_mask >> max_phy_addr) << max_phy_addr; + /* Set the high bits in the reserved mask. */ + if (max_phy_addr < 52) + rsvd_mask = GENMASK_ULL(51, max_phy_addr); /* * SDM vol 3, fig 4-11 "Formats of CR3 and Paging-Structure Entries @@ -307,7 +271,7 @@ static struct pageTableEntry *_vm_get_page_table_entry(struct kvm_vm *vm, int vc */ vcpu_sregs_get(vm, vcpuid, &sregs); if ((sregs.efer & EFER_NX) == 0) { - rsvd_mask |= (1ull << 63); + rsvd_mask |= PTE_NX_MASK; } TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " @@ -329,30 +293,29 @@ static struct pageTableEntry *_vm_get_page_table_entry(struct kvm_vm *vm, int vc index[3] = (vaddr >> 39) & 0x1ffu; pml4e = addr_gpa2hva(vm, vm->pgd); - TEST_ASSERT(pml4e[index[3]].present, + TEST_ASSERT(pml4e[index[3]] & PTE_PRESENT_MASK, "Expected pml4e to be present for gva: 0x%08lx", vaddr); - TEST_ASSERT((*(uint64_t*)(&pml4e[index[3]]) & - (rsvd_mask | (1ull << 7))) == 0, + TEST_ASSERT((pml4e[index[3]] & (rsvd_mask | PTE_LARGE_MASK)) == 0, "Unexpected reserved bits set."); - pdpe = addr_gpa2hva(vm, pml4e[index[3]].pfn * vm->page_size); - TEST_ASSERT(pdpe[index[2]].present, + pdpe = addr_gpa2hva(vm, PTE_GET_PFN(pml4e[index[3]]) * vm->page_size); + TEST_ASSERT(pdpe[index[2]] & PTE_PRESENT_MASK, "Expected pdpe to be present for gva: 0x%08lx", vaddr); - TEST_ASSERT(pdpe[index[2]].page_size == 0, + TEST_ASSERT(!(pdpe[index[2]] & PTE_LARGE_MASK), "Expected pdpe to map a pde not a 1-GByte page."); - TEST_ASSERT((*(uint64_t*)(&pdpe[index[2]]) & rsvd_mask) == 0, + TEST_ASSERT((pdpe[index[2]] & rsvd_mask) == 0, "Unexpected reserved bits set."); - pde = addr_gpa2hva(vm, pdpe[index[2]].pfn * vm->page_size); - TEST_ASSERT(pde[index[1]].present, + pde = addr_gpa2hva(vm, PTE_GET_PFN(pdpe[index[2]]) * vm->page_size); + TEST_ASSERT(pde[index[1]] & PTE_PRESENT_MASK, "Expected pde to be present for gva: 0x%08lx", vaddr); - TEST_ASSERT(pde[index[1]].page_size == 0, + TEST_ASSERT(!(pde[index[1]] & PTE_LARGE_MASK), "Expected pde to map a pte not a 2-MByte page."); - TEST_ASSERT((*(uint64_t*)(&pde[index[1]]) & rsvd_mask) == 0, + TEST_ASSERT((pde[index[1]] & rsvd_mask) == 0, "Unexpected reserved bits set."); - pte = addr_gpa2hva(vm, pde[index[1]].pfn * vm->page_size); - TEST_ASSERT(pte[index[0]].present, + pte = addr_gpa2hva(vm, PTE_GET_PFN(pde[index[1]]) * vm->page_size); + TEST_ASSERT(pte[index[0]] & PTE_PRESENT_MASK, "Expected pte to be present for gva: 0x%08lx", vaddr); return &pte[index[0]]; @@ -360,7 +323,7 @@ static struct pageTableEntry *_vm_get_page_table_entry(struct kvm_vm *vm, int vc uint64_t vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr) { - struct pageTableEntry *pte = _vm_get_page_table_entry(vm, vcpuid, vaddr); + uint64_t *pte = _vm_get_page_table_entry(vm, vcpuid, vaddr); return *(uint64_t *)pte; } @@ -368,18 +331,17 @@ uint64_t vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr) void vm_set_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr, uint64_t pte) { - struct pageTableEntry *new_pte = _vm_get_page_table_entry(vm, vcpuid, - vaddr); + uint64_t *new_pte = _vm_get_page_table_entry(vm, vcpuid, vaddr); *(uint64_t *)new_pte = pte; } void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) { - struct pageUpperEntry *pml4e, *pml4e_start; - struct pageUpperEntry *pdpe, *pdpe_start; - struct pageUpperEntry *pde, *pde_start; - struct pageTableEntry *pte, *pte_start; + uint64_t *pml4e, *pml4e_start; + uint64_t *pdpe, *pdpe_start; + uint64_t *pde, *pde_start; + uint64_t *pte, *pte_start; if (!vm->pgd_created) return; @@ -389,58 +351,58 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) fprintf(stream, "%*s index hvaddr gpaddr " "addr w exec dirty\n", indent, ""); - pml4e_start = (struct pageUpperEntry *) addr_gpa2hva(vm, vm->pgd); + pml4e_start = (uint64_t *) addr_gpa2hva(vm, vm->pgd); for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) { pml4e = &pml4e_start[n1]; - if (!pml4e->present) + if (!(*pml4e & PTE_PRESENT_MASK)) continue; - fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10lx %u " + fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10llx %u " " %u\n", indent, "", pml4e - pml4e_start, pml4e, - addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->pfn, - pml4e->writable, pml4e->execute_disable); + addr_hva2gpa(vm, pml4e), PTE_GET_PFN(*pml4e), + !!(*pml4e & PTE_WRITABLE_MASK), !!(*pml4e & PTE_NX_MASK)); - pdpe_start = addr_gpa2hva(vm, pml4e->pfn * vm->page_size); + pdpe_start = addr_gpa2hva(vm, *pml4e & PHYSICAL_PAGE_MASK); for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) { pdpe = &pdpe_start[n2]; - if (!pdpe->present) + if (!(*pdpe & PTE_PRESENT_MASK)) continue; - fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10lx " + fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10llx " "%u %u\n", indent, "", pdpe - pdpe_start, pdpe, addr_hva2gpa(vm, pdpe), - (uint64_t) pdpe->pfn, pdpe->writable, - pdpe->execute_disable); + PTE_GET_PFN(*pdpe), !!(*pdpe & PTE_WRITABLE_MASK), + !!(*pdpe & PTE_NX_MASK)); - pde_start = addr_gpa2hva(vm, pdpe->pfn * vm->page_size); + pde_start = addr_gpa2hva(vm, *pdpe & PHYSICAL_PAGE_MASK); for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) { pde = &pde_start[n3]; - if (!pde->present) + if (!(*pde & PTE_PRESENT_MASK)) continue; fprintf(stream, "%*spde 0x%-3zx %p " - "0x%-12lx 0x%-10lx %u %u\n", + "0x%-12lx 0x%-10llx %u %u\n", indent, "", pde - pde_start, pde, addr_hva2gpa(vm, pde), - (uint64_t) pde->pfn, pde->writable, - pde->execute_disable); + PTE_GET_PFN(*pde), !!(*pde & PTE_WRITABLE_MASK), + !!(*pde & PTE_NX_MASK)); - pte_start = addr_gpa2hva(vm, pde->pfn * vm->page_size); + pte_start = addr_gpa2hva(vm, *pde & PHYSICAL_PAGE_MASK); for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) { pte = &pte_start[n4]; - if (!pte->present) + if (!(*pte & PTE_PRESENT_MASK)) continue; fprintf(stream, "%*spte 0x%-3zx %p " - "0x%-12lx 0x%-10lx %u %u " + "0x%-12lx 0x%-10llx %u %u " " %u 0x%-10lx\n", indent, "", pte - pte_start, pte, addr_hva2gpa(vm, pte), - (uint64_t) pte->pfn, - pte->writable, - pte->execute_disable, - pte->dirty, + PTE_GET_PFN(*pte), + !!(*pte & PTE_WRITABLE_MASK), + !!(*pte & PTE_NX_MASK), + !!(*pte & PTE_DIRTY_MASK), ((uint64_t) n1 << 27) | ((uint64_t) n2 << 18) | ((uint64_t) n3 << 9) @@ -558,8 +520,8 @@ static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector, vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) { uint16_t index[4]; - struct pageUpperEntry *pml4e, *pdpe, *pde; - struct pageTableEntry *pte; + uint64_t *pml4e, *pdpe, *pde; + uint64_t *pte; TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " "unknown or unsupported guest mode, mode: 0x%x", vm->mode); @@ -572,22 +534,22 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) if (!vm->pgd_created) goto unmapped_gva; pml4e = addr_gpa2hva(vm, vm->pgd); - if (!pml4e[index[3]].present) + if (!(pml4e[index[3]] & PTE_PRESENT_MASK)) goto unmapped_gva; - pdpe = addr_gpa2hva(vm, pml4e[index[3]].pfn * vm->page_size); - if (!pdpe[index[2]].present) + pdpe = addr_gpa2hva(vm, PTE_GET_PFN(pml4e[index[3]]) * vm->page_size); + if (!(pdpe[index[2]] & PTE_PRESENT_MASK)) goto unmapped_gva; - pde = addr_gpa2hva(vm, pdpe[index[2]].pfn * vm->page_size); - if (!pde[index[1]].present) + pde = addr_gpa2hva(vm, PTE_GET_PFN(pdpe[index[2]]) * vm->page_size); + if (!(pde[index[1]] & PTE_PRESENT_MASK)) goto unmapped_gva; - pte = addr_gpa2hva(vm, pde[index[1]].pfn * vm->page_size); - if (!pte[index[0]].present) + pte = addr_gpa2hva(vm, PTE_GET_PFN(pde[index[1]]) * vm->page_size); + if (!(pte[index[0]] & PTE_PRESENT_MASK)) goto unmapped_gva; - return (pte[index[0]].pfn * vm->page_size) + (gva & 0xfffu); + return (PTE_GET_PFN(pte[index[0]]) * vm->page_size) + (gva & ~PAGE_MASK); unmapped_gva: TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva); diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c index 52a3ef6629e8..76f65c22796f 100644 --- a/tools/testing/selftests/kvm/x86_64/amx_test.c +++ b/tools/testing/selftests/kvm/x86_64/amx_test.c @@ -29,7 +29,6 @@ #define X86_FEATURE_XSAVE (1 << 26) #define X86_FEATURE_OSXSAVE (1 << 27) -#define PAGE_SIZE (1 << 12) #define NUM_TILES 8 #define TILE_SIZE 1024 #define XSAVE_SIZE ((NUM_TILES * TILE_SIZE) + PAGE_SIZE) diff --git a/tools/testing/selftests/kvm/x86_64/emulator_error_test.c b/tools/testing/selftests/kvm/x86_64/emulator_error_test.c index f070ff0224fa..aeb3850f81bd 100644 --- a/tools/testing/selftests/kvm/x86_64/emulator_error_test.c +++ b/tools/testing/selftests/kvm/x86_64/emulator_error_test.c @@ -12,7 +12,6 @@ #include "vmx.h" #define VCPU_ID 1 -#define PAGE_SIZE 4096 #define MAXPHYADDR 36 #define MEM_REGION_GVA 0x0000123456789000 diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c index a626d40fdb48..b4e0c860769e 100644 --- a/tools/testing/selftests/kvm/x86_64/smm_test.c +++ b/tools/testing/selftests/kvm/x86_64/smm_test.c @@ -21,8 +21,6 @@ #define VCPU_ID 1 -#define PAGE_SIZE 4096 - #define SMRAM_SIZE 65536 #define SMRAM_MEMSLOT ((1 << 16) | 1) #define SMRAM_PAGES (SMRAM_SIZE / PAGE_SIZE) diff --git a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c index e683d0ac3e45..19b35c607dc6 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c @@ -32,7 +32,6 @@ #define MSR_IA32_TSC_ADJUST 0x3b #endif -#define PAGE_SIZE 4096 #define VCPU_ID 5 #define TSC_ADJUST_VALUE (1ll << 32) diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c index 865e17146815..bcd370827859 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c @@ -23,7 +23,6 @@ #define SHINFO_REGION_GVA 0xc0000000ULL #define SHINFO_REGION_GPA 0xc0000000ULL #define SHINFO_REGION_SLOT 10 -#define PAGE_SIZE 4096 #define DUMMY_REGION_GPA (SHINFO_REGION_GPA + (2 * PAGE_SIZE)) #define DUMMY_REGION_SLOT 11 diff --git a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c index adc94452b57c..b30fe9de1d4f 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c +++ b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c @@ -15,7 +15,6 @@ #define HCALL_REGION_GPA 0xc0000000ULL #define HCALL_REGION_SLOT 10 -#define PAGE_SIZE 4096 static struct kvm_vm *vm; diff --git a/tools/testing/selftests/mqueue/mq_perf_tests.c b/tools/testing/selftests/mqueue/mq_perf_tests.c index b019e0b8221c..84fda3b49073 100644 --- a/tools/testing/selftests/mqueue/mq_perf_tests.c +++ b/tools/testing/selftests/mqueue/mq_perf_tests.c @@ -180,6 +180,9 @@ void shutdown(int exit_val, char *err_cause, int line_no) if (in_shutdown++) return; + /* Free the cpu_set allocated using CPU_ALLOC in main function */ + CPU_FREE(cpu_set); + for (i = 0; i < num_cpus_to_pin; i++) if (cpu_threads[i]) { pthread_kill(cpu_threads[i], SIGUSR1); @@ -551,6 +554,12 @@ int main(int argc, char *argv[]) perror("sysconf(_SC_NPROCESSORS_ONLN)"); exit(1); } + + if (getuid() != 0) + ksft_exit_skip("Not running as root, but almost all tests " + "require root in order to modify\nsystem settings. " + "Exiting.\n"); + cpus_online = min(MAX_CPUS, sysconf(_SC_NPROCESSORS_ONLN)); cpu_set = CPU_ALLOC(cpus_online); if (cpu_set == NULL) { @@ -589,7 +598,7 @@ int main(int argc, char *argv[]) cpu_set)) { fprintf(stderr, "Any given CPU may " "only be given once.\n"); - exit(1); + goto err_code; } else CPU_SET_S(cpus_to_pin[cpu], cpu_set_size, cpu_set); @@ -607,7 +616,7 @@ int main(int argc, char *argv[]) queue_path = malloc(strlen(option) + 2); if (!queue_path) { perror("malloc()"); - exit(1); + goto err_code; } queue_path[0] = '/'; queue_path[1] = 0; @@ -622,17 +631,12 @@ int main(int argc, char *argv[]) fprintf(stderr, "Must pass at least one CPU to continuous " "mode.\n"); poptPrintUsage(popt_context, stderr, 0); - exit(1); + goto err_code; } else if (!continuous_mode) { num_cpus_to_pin = 1; cpus_to_pin[0] = cpus_online - 1; } - if (getuid() != 0) - ksft_exit_skip("Not running as root, but almost all tests " - "require root in order to modify\nsystem settings. " - "Exiting.\n"); - max_msgs = fopen(MAX_MSGS, "r+"); max_msgsize = fopen(MAX_MSGSIZE, "r+"); if (!max_msgs) @@ -740,4 +744,9 @@ int main(int argc, char *argv[]) sleep(1); } shutdown(0, "", 0); + +err_code: + CPU_FREE(cpu_set); + exit(1); + } diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 3fe2515aa616..af7f6e6ff182 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -36,6 +36,7 @@ TEST_PROGS += srv6_end_dt4_l3vpn_test.sh TEST_PROGS += srv6_end_dt6_l3vpn_test.sh TEST_PROGS += vrf_strict_mode_test.sh TEST_PROGS += arp_ndisc_evict_nocarrier.sh +TEST_PROGS += ndisc_unsolicited_na_test.sh TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh TEST_GEN_FILES = socket nettest diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index d444ee6aa3cb..b3bf5319bb0e 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -1208,6 +1208,20 @@ ipv4_fcnal() set +e check_nexthop "dev veth1" "" log_test $? 0 "Nexthops removed on admin down" + + # nexthop route delete warning: route add with nhid and delete + # using device + run_cmd "$IP li set dev veth1 up" + run_cmd "$IP nexthop add id 12 via 172.16.1.3 dev veth1" + out1=`dmesg | grep "WARNING:.*fib_nh_match.*" | wc -l` + run_cmd "$IP route add 172.16.101.1/32 nhid 12" + run_cmd "$IP route delete 172.16.101.1/32 dev veth1" + out2=`dmesg | grep "WARNING:.*fib_nh_match.*" | wc -l` + [ $out1 -eq $out2 ] + rc=$? + log_test $rc 0 "Delete nexthop route warning" + run_cmd "$IP route delete 172.16.101.1/32 nhid 12" + run_cmd "$IP nexthop del id 12" } ipv4_grp_fcnal() diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh index 4f70baad867d..bbe3b379927a 100755 --- a/tools/testing/selftests/net/fib_rule_tests.sh +++ b/tools/testing/selftests/net/fib_rule_tests.sh @@ -20,6 +20,7 @@ SRC_IP6=2001:db8:1::3 DEV_ADDR=192.51.100.1 DEV_ADDR6=2001:db8:1::1 DEV=dummy0 +TESTS="fib_rule6 fib_rule4" log_test() { @@ -316,7 +317,16 @@ fi # start clean cleanup &> /dev/null setup -run_fibrule_tests +for t in $TESTS +do + case $t in + fib_rule6_test|fib_rule6) fib_rule6_test;; + fib_rule4_test|fib_rule4) fib_rule4_test;; + + help) echo "Test names: $TESTS"; exit 0;; + + esac +done cleanup if [ "$TESTS" != "none" ]; then diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index 8fa97ae9af9e..ae80c2aef577 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -2,6 +2,7 @@ TEST_PROGS = bridge_igmp.sh \ bridge_locked_port.sh \ + bridge_mdb.sh \ bridge_port_isolation.sh \ bridge_sticky_fdb.sh \ bridge_vlan_aware.sh \ diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh new file mode 100755 index 000000000000..b1ba6876dd86 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh @@ -0,0 +1,103 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Verify that adding host mdb entries work as intended for all types of +# multicast filters: ipv4, ipv6, and mac + +ALL_TESTS="mdb_add_del_test" +NUM_NETIFS=2 + +TEST_GROUP_IP4="225.1.2.3" +TEST_GROUP_IP6="ff02::42" +TEST_GROUP_MAC="01:00:01:c0:ff:ee" + +source lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +switch_create() +{ + # Enable multicast filtering + ip link add dev br0 type bridge mcast_snooping 1 + + ip link set dev $swp1 master br0 + + ip link set dev br0 up + ip link set dev $swp1 up +} + +switch_destroy() +{ + ip link set dev $swp1 down + ip link del dev br0 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + vrf_prepare + + h1_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h1_destroy + + vrf_cleanup +} + +do_mdb_add_del() +{ + local group=$1 + local flag=$2 + + RET=0 + bridge mdb add dev br0 port br0 grp $group $flag 2>/dev/null + check_err $? "Failed adding $group to br0, port br0" + + if [ -z "$flag" ]; then + flag="temp" + fi + + bridge mdb show dev br0 | grep $group | grep -q $flag 2>/dev/null + check_err $? "$group not added with $flag flag" + + bridge mdb del dev br0 port br0 grp $group 2>/dev/null + check_err $? "Failed deleting $group from br0, port br0" + + bridge mdb show dev br0 | grep -q $group >/dev/null + check_err_fail 1 $? "$group still in mdb after delete" + + log_test "MDB add/del group $group to bridge port br0" +} + +mdb_add_del_test() +{ + do_mdb_add_del $TEST_GROUP_MAC permanent + do_mdb_add_del $TEST_GROUP_IP4 + do_mdb_add_del $TEST_GROUP_IP6 +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 664b9ecaf228..66681a2bcdd3 100644..100755 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -27,6 +27,9 @@ INTERFACE_TIMEOUT=${INTERFACE_TIMEOUT:=600} LOW_AGEING_TIME=${LOW_AGEING_TIME:=1000} REQUIRE_JQ=${REQUIRE_JQ:=yes} REQUIRE_MZ=${REQUIRE_MZ:=yes} +REQUIRE_MTOOLS=${REQUIRE_MTOOLS:=no} +STABLE_MAC_ADDRS=${STABLE_MAC_ADDRS:=no} +TCPDUMP_EXTRA_FLAGS=${TCPDUMP_EXTRA_FLAGS:=} relative_path="${BASH_SOURCE%/*}" if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then @@ -159,6 +162,12 @@ fi if [[ "$REQUIRE_MZ" = "yes" ]]; then require_command $MZ fi +if [[ "$REQUIRE_MTOOLS" = "yes" ]]; then + # https://github.com/vladimiroltean/mtools/ + # patched for IPv6 support + require_command msend + require_command mreceive +fi if [[ ! -v NUM_NETIFS ]]; then echo "SKIP: importer does not define \"NUM_NETIFS\"" @@ -214,10 +223,41 @@ create_netif() esac } +declare -A MAC_ADDR_ORIG +mac_addr_prepare() +{ + local new_addr= + local dev= + + for ((i = 1; i <= NUM_NETIFS; ++i)); do + dev=${NETIFS[p$i]} + new_addr=$(printf "00:01:02:03:04:%02x" $i) + + MAC_ADDR_ORIG["$dev"]=$(ip -j link show dev $dev | jq -e '.[].address') + # Strip quotes + MAC_ADDR_ORIG["$dev"]=${MAC_ADDR_ORIG["$dev"]//\"/} + ip link set dev $dev address $new_addr + done +} + +mac_addr_restore() +{ + local dev= + + for ((i = 1; i <= NUM_NETIFS; ++i)); do + dev=${NETIFS[p$i]} + ip link set dev $dev address ${MAC_ADDR_ORIG["$dev"]} + done +} + if [[ "$NETIF_CREATE" = "yes" ]]; then create_netif fi +if [[ "$STABLE_MAC_ADDRS" = "yes" ]]; then + mac_addr_prepare +fi + for ((i = 1; i <= NUM_NETIFS; ++i)); do ip link show dev ${NETIFS[p$i]} &> /dev/null if [[ $? -ne 0 ]]; then @@ -503,6 +543,10 @@ pre_cleanup() echo "Pausing before cleanup, hit any key to continue" read fi + + if [[ "$STABLE_MAC_ADDRS" = "yes" ]]; then + mac_addr_restore + fi } vrf_prepare() @@ -824,6 +868,15 @@ mac_get() ip -j link show dev $if_name | jq -r '.[]["address"]' } +ipv6_lladdr_get() +{ + local if_name=$1 + + ip -j addr show dev $if_name | \ + jq -r '.[]["addr_info"][] | select(.scope == "link").local' | \ + head -1 +} + bridge_ageing_time_get() { local bridge=$1 @@ -1322,25 +1375,40 @@ flood_test() __start_traffic() { + local pktsize=$1; shift local proto=$1; shift local h_in=$1; shift # Where the traffic egresses the host local sip=$1; shift local dip=$1; shift local dmac=$1; shift - $MZ $h_in -p 8000 -A $sip -B $dip -c 0 \ + $MZ $h_in -p $pktsize -A $sip -B $dip -c 0 \ -a own -b $dmac -t "$proto" -q "$@" & sleep 1 } +start_traffic_pktsize() +{ + local pktsize=$1; shift + + __start_traffic $pktsize udp "$@" +} + +start_tcp_traffic_pktsize() +{ + local pktsize=$1; shift + + __start_traffic $pktsize tcp "$@" +} + start_traffic() { - __start_traffic udp "$@" + start_traffic_pktsize 8000 "$@" } start_tcp_traffic() { - __start_traffic tcp "$@" + start_tcp_traffic_pktsize 8000 "$@" } stop_traffic() @@ -1349,13 +1417,17 @@ stop_traffic() { kill %% && wait %%; } 2>/dev/null } +declare -A cappid +declare -A capfile +declare -A capout + tcpdump_start() { local if_name=$1; shift local ns=$1; shift - capfile=$(mktemp) - capout=$(mktemp) + capfile[$if_name]=$(mktemp) + capout[$if_name]=$(mktemp) if [ -z $ns ]; then ns_cmd="" @@ -1369,27 +1441,35 @@ tcpdump_start() capuser="-Z $SUDO_USER" fi - $ns_cmd tcpdump -e -n -Q in -i $if_name \ - -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 & - cappid=$! + $ns_cmd tcpdump $TCPDUMP_EXTRA_FLAGS -e -n -Q in -i $if_name \ + -s 65535 -B 32768 $capuser -w ${capfile[$if_name]} \ + > "${capout[$if_name]}" 2>&1 & + cappid[$if_name]=$! sleep 1 } tcpdump_stop() { - $ns_cmd kill $cappid + local if_name=$1 + local pid=${cappid[$if_name]} + + $ns_cmd kill "$pid" && wait "$pid" sleep 1 } tcpdump_cleanup() { - rm $capfile $capout + local if_name=$1 + + rm ${capfile[$if_name]} ${capout[$if_name]} } tcpdump_show() { - tcpdump -e -n -r $capfile 2>&1 + local if_name=$1 + + tcpdump -e -n -r ${capfile[$if_name]} 2>&1 } # return 0 if the packet wasn't seen on host2_if or 1 if it was @@ -1499,6 +1579,37 @@ brmcast_check_sg_state() done } +mc_join() +{ + local if_name=$1 + local group=$2 + local vrf_name=$(master_name_get $if_name) + + # We don't care about actual reception, just about joining the + # IP multicast group and adding the L2 address to the device's + # MAC filtering table + ip vrf exec $vrf_name \ + mreceive -g $group -I $if_name > /dev/null 2>&1 & + mreceive_pid=$! + + sleep 1 +} + +mc_leave() +{ + kill "$mreceive_pid" && wait "$mreceive_pid" +} + +mc_send() +{ + local if_name=$1 + local groups=$2 + local vrf_name=$(master_name_get $if_name) + + ip vrf exec $vrf_name \ + msend -g $groups -I $if_name -c 1 > /dev/null 2>&1 +} + start_ip_monitor() { local mtype=$1; shift diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh new file mode 100755 index 000000000000..c5b0cbc85b3e --- /dev/null +++ b/tools/testing/selftests/net/forwarding/local_termination.sh @@ -0,0 +1,299 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS="standalone bridge" +NUM_NETIFS=2 +PING_COUNT=1 +REQUIRE_MTOOLS=yes +REQUIRE_MZ=no + +source lib.sh + +H1_IPV4="192.0.2.1" +H2_IPV4="192.0.2.2" +H1_IPV6="2001:db8:1::1" +H2_IPV6="2001:db8:1::2" + +BRIDGE_ADDR="00:00:de:ad:be:ee" +MACVLAN_ADDR="00:00:de:ad:be:ef" +UNKNOWN_UC_ADDR1="de:ad:be:ef:ee:03" +UNKNOWN_UC_ADDR2="de:ad:be:ef:ee:04" +UNKNOWN_UC_ADDR3="de:ad:be:ef:ee:05" +JOINED_IPV4_MC_ADDR="225.1.2.3" +UNKNOWN_IPV4_MC_ADDR1="225.1.2.4" +UNKNOWN_IPV4_MC_ADDR2="225.1.2.5" +UNKNOWN_IPV4_MC_ADDR3="225.1.2.6" +JOINED_IPV6_MC_ADDR="ff2e::0102:0304" +UNKNOWN_IPV6_MC_ADDR1="ff2e::0102:0305" +UNKNOWN_IPV6_MC_ADDR2="ff2e::0102:0306" +UNKNOWN_IPV6_MC_ADDR3="ff2e::0102:0307" + +JOINED_MACV4_MC_ADDR="01:00:5e:01:02:03" +UNKNOWN_MACV4_MC_ADDR1="01:00:5e:01:02:04" +UNKNOWN_MACV4_MC_ADDR2="01:00:5e:01:02:05" +UNKNOWN_MACV4_MC_ADDR3="01:00:5e:01:02:06" +JOINED_MACV6_MC_ADDR="33:33:01:02:03:04" +UNKNOWN_MACV6_MC_ADDR1="33:33:01:02:03:05" +UNKNOWN_MACV6_MC_ADDR2="33:33:01:02:03:06" +UNKNOWN_MACV6_MC_ADDR3="33:33:01:02:03:07" + +NON_IP_MC="01:02:03:04:05:06" +NON_IP_PKT="00:04 48:45:4c:4f" +BC="ff:ff:ff:ff:ff:ff" + +# Disable promisc to ensure we don't receive unknown MAC DA packets +export TCPDUMP_EXTRA_FLAGS="-pl" + +h1=${NETIFS[p1]} +h2=${NETIFS[p2]} + +send_non_ip() +{ + local if_name=$1 + local smac=$2 + local dmac=$3 + + $MZ -q $if_name "$dmac $smac $NON_IP_PKT" +} + +send_uc_ipv4() +{ + local if_name=$1 + local dmac=$2 + + ip neigh add $H2_IPV4 lladdr $dmac dev $if_name + ping_do $if_name $H2_IPV4 + ip neigh del $H2_IPV4 dev $if_name +} + +check_rcv() +{ + local if_name=$1 + local type=$2 + local pattern=$3 + local should_receive=$4 + local should_fail= + + [ $should_receive = true ] && should_fail=0 || should_fail=1 + RET=0 + + tcpdump_show $if_name | grep -q "$pattern" + + check_err_fail "$should_fail" "$?" "reception" + + log_test "$if_name: $type" +} + +mc_route_prepare() +{ + local if_name=$1 + local vrf_name=$(master_name_get $if_name) + + ip route add 225.100.1.0/24 dev $if_name vrf $vrf_name + ip -6 route add ff2e::/64 dev $if_name vrf $vrf_name +} + +mc_route_destroy() +{ + local if_name=$1 + local vrf_name=$(master_name_get $if_name) + + ip route del 225.100.1.0/24 dev $if_name vrf $vrf_name + ip -6 route del ff2e::/64 dev $if_name vrf $vrf_name +} + +run_test() +{ + local rcv_if_name=$1 + local smac=$(mac_get $h1) + local rcv_dmac=$(mac_get $rcv_if_name) + + tcpdump_start $rcv_if_name + + mc_route_prepare $h1 + mc_route_prepare $rcv_if_name + + send_uc_ipv4 $h1 $rcv_dmac + send_uc_ipv4 $h1 $MACVLAN_ADDR + send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR1 + + ip link set dev $rcv_if_name promisc on + send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR2 + mc_send $h1 $UNKNOWN_IPV4_MC_ADDR2 + mc_send $h1 $UNKNOWN_IPV6_MC_ADDR2 + ip link set dev $rcv_if_name promisc off + + mc_join $rcv_if_name $JOINED_IPV4_MC_ADDR + mc_send $h1 $JOINED_IPV4_MC_ADDR + mc_leave + + mc_join $rcv_if_name $JOINED_IPV6_MC_ADDR + mc_send $h1 $JOINED_IPV6_MC_ADDR + mc_leave + + mc_send $h1 $UNKNOWN_IPV4_MC_ADDR1 + mc_send $h1 $UNKNOWN_IPV6_MC_ADDR1 + + ip link set dev $rcv_if_name allmulticast on + send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR3 + mc_send $h1 $UNKNOWN_IPV4_MC_ADDR3 + mc_send $h1 $UNKNOWN_IPV6_MC_ADDR3 + ip link set dev $rcv_if_name allmulticast off + + mc_route_destroy $rcv_if_name + mc_route_destroy $h1 + + sleep 1 + + tcpdump_stop $rcv_if_name + + check_rcv $rcv_if_name "Unicast IPv4 to primary MAC address" \ + "$smac > $rcv_dmac, ethertype IPv4 (0x0800)" \ + true + + check_rcv $rcv_if_name "Unicast IPv4 to macvlan MAC address" \ + "$smac > $MACVLAN_ADDR, ethertype IPv4 (0x0800)" \ + true + + check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address" \ + "$smac > $UNKNOWN_UC_ADDR1, ethertype IPv4 (0x0800)" \ + false + + check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address, promisc" \ + "$smac > $UNKNOWN_UC_ADDR2, ethertype IPv4 (0x0800)" \ + true + + check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address, allmulti" \ + "$smac > $UNKNOWN_UC_ADDR3, ethertype IPv4 (0x0800)" \ + false + + check_rcv $rcv_if_name "Multicast IPv4 to joined group" \ + "$smac > $JOINED_MACV4_MC_ADDR, ethertype IPv4 (0x0800)" \ + true + + check_rcv $rcv_if_name "Multicast IPv4 to unknown group" \ + "$smac > $UNKNOWN_MACV4_MC_ADDR1, ethertype IPv4 (0x0800)" \ + false + + check_rcv $rcv_if_name "Multicast IPv4 to unknown group, promisc" \ + "$smac > $UNKNOWN_MACV4_MC_ADDR2, ethertype IPv4 (0x0800)" \ + true + + check_rcv $rcv_if_name "Multicast IPv4 to unknown group, allmulti" \ + "$smac > $UNKNOWN_MACV4_MC_ADDR3, ethertype IPv4 (0x0800)" \ + true + + check_rcv $rcv_if_name "Multicast IPv6 to joined group" \ + "$smac > $JOINED_MACV6_MC_ADDR, ethertype IPv6 (0x86dd)" \ + true + + check_rcv $rcv_if_name "Multicast IPv6 to unknown group" \ + "$smac > $UNKNOWN_MACV6_MC_ADDR1, ethertype IPv6 (0x86dd)" \ + false + + check_rcv $rcv_if_name "Multicast IPv6 to unknown group, promisc" \ + "$smac > $UNKNOWN_MACV6_MC_ADDR2, ethertype IPv6 (0x86dd)" \ + true + + check_rcv $rcv_if_name "Multicast IPv6 to unknown group, allmulti" \ + "$smac > $UNKNOWN_MACV6_MC_ADDR3, ethertype IPv6 (0x86dd)" \ + true + + tcpdump_cleanup $rcv_if_name +} + +h1_create() +{ + simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64 +} + +h1_destroy() +{ + simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64 +} + +h2_create() +{ + simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64 +} + +h2_destroy() +{ + simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64 +} + +bridge_create() +{ + ip link add br0 type bridge + ip link set br0 address $BRIDGE_ADDR + ip link set br0 up + + ip link set $h2 master br0 + ip link set $h2 up + + simple_if_init br0 $H2_IPV4/24 $H2_IPV6/64 +} + +bridge_destroy() +{ + simple_if_fini br0 $H2_IPV4/24 $H2_IPV6/64 + + ip link del br0 +} + +standalone() +{ + h1_create + h2_create + + ip link add link $h2 name macvlan0 type macvlan mode private + ip link set macvlan0 address $MACVLAN_ADDR + ip link set macvlan0 up + + run_test $h2 + + ip link del macvlan0 + + h2_destroy + h1_destroy +} + +bridge() +{ + h1_create + bridge_create + + ip link add link br0 name macvlan0 type macvlan mode private + ip link set macvlan0 address $MACVLAN_ADDR + ip link set macvlan0 up + + run_test br0 + + ip link del macvlan0 + + bridge_destroy + h1_destroy +} + +cleanup() +{ + pre_cleanup + vrf_cleanup +} + +setup_prepare() +{ + vrf_prepare + # setup_wait() needs this + ip link set $h1 up + ip link set $h2 up +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/no_forwarding.sh b/tools/testing/selftests/net/forwarding/no_forwarding.sh new file mode 100755 index 000000000000..af3b398d13f0 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/no_forwarding.sh @@ -0,0 +1,261 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS="standalone two_bridges one_bridge_two_pvids" +NUM_NETIFS=4 + +source lib.sh + +h1=${NETIFS[p1]} +h2=${NETIFS[p3]} +swp1=${NETIFS[p2]} +swp2=${NETIFS[p4]} + +H1_IPV4="192.0.2.1" +H2_IPV4="192.0.2.2" +H1_IPV6="2001:db8:1::1" +H2_IPV6="2001:db8:1::2" + +IPV4_ALLNODES="224.0.0.1" +IPV6_ALLNODES="ff02::1" +MACV4_ALLNODES="01:00:5e:00:00:01" +MACV6_ALLNODES="33:33:00:00:00:01" +NON_IP_MC="01:02:03:04:05:06" +NON_IP_PKT="00:04 48:45:4c:4f" +BC="ff:ff:ff:ff:ff:ff" + +# The full 4K VLAN space is too much to check, so strategically pick some +# values which should provide reasonable coverage +vids=(0 1 2 5 10 20 50 100 200 500 1000 1000 2000 4000 4094) + +send_non_ip() +{ + local if_name=$1 + local smac=$2 + local dmac=$3 + + $MZ -q $if_name "$dmac $smac $NON_IP_PKT" +} + +send_uc_ipv4() +{ + local if_name=$1 + local dmac=$2 + + ip neigh add $H2_IPV4 lladdr $dmac dev $if_name + ping_do $if_name $H2_IPV4 + ip neigh del $H2_IPV4 dev $if_name +} + +send_mc_ipv4() +{ + local if_name=$1 + + ping_do $if_name $IPV4_ALLNODES "-I $if_name" +} + +send_uc_ipv6() +{ + local if_name=$1 + local dmac=$2 + + ip -6 neigh add $H2_IPV6 lladdr $dmac dev $if_name + ping6_do $if_name $H2_IPV6 + ip -6 neigh del $H2_IPV6 dev $if_name +} + +send_mc_ipv6() +{ + local if_name=$1 + + ping6_do $if_name $IPV6_ALLNODES%$if_name +} + +check_rcv() +{ + local if_name=$1 + local type=$2 + local pattern=$3 + local should_fail=1 + + RET=0 + + tcpdump_show $if_name | grep -q "$pattern" + + check_err_fail "$should_fail" "$?" "reception" + + log_test "$type" +} + +run_test() +{ + local test_name="$1" + local smac=$(mac_get $h1) + local dmac=$(mac_get $h2) + local h1_ipv6_lladdr=$(ipv6_lladdr_get $h1) + local vid= + + echo "$test_name: Sending packets" + + tcpdump_start $h2 + + send_non_ip $h1 $smac $dmac + send_non_ip $h1 $smac $NON_IP_MC + send_non_ip $h1 $smac $BC + send_uc_ipv4 $h1 $dmac + send_mc_ipv4 $h1 + send_uc_ipv6 $h1 $dmac + send_mc_ipv6 $h1 + + for vid in "${vids[@]}"; do + vlan_create $h1 $vid + simple_if_init $h1.$vid $H1_IPV4/24 $H1_IPV6/64 + + send_non_ip $h1.$vid $smac $dmac + send_non_ip $h1.$vid $smac $NON_IP_MC + send_non_ip $h1.$vid $smac $BC + send_uc_ipv4 $h1.$vid $dmac + send_mc_ipv4 $h1.$vid + send_uc_ipv6 $h1.$vid $dmac + send_mc_ipv6 $h1.$vid + + simple_if_fini $h1.$vid $H1_IPV4/24 $H1_IPV6/64 + vlan_destroy $h1 $vid + done + + sleep 1 + + echo "$test_name: Checking which packets were received" + + tcpdump_stop $h2 + + check_rcv $h2 "$test_name: Unicast non-IP untagged" \ + "$smac > $dmac, 802.3, length 4:" + + check_rcv $h2 "$test_name: Multicast non-IP untagged" \ + "$smac > $NON_IP_MC, 802.3, length 4:" + + check_rcv $h2 "$test_name: Broadcast non-IP untagged" \ + "$smac > $BC, 802.3, length 4:" + + check_rcv $h2 "$test_name: Unicast IPv4 untagged" \ + "$smac > $dmac, ethertype IPv4 (0x0800)" + + check_rcv $h2 "$test_name: Multicast IPv4 untagged" \ + "$smac > $MACV4_ALLNODES, ethertype IPv4 (0x0800).*: $H1_IPV4 > $IPV4_ALLNODES" + + check_rcv $h2 "$test_name: Unicast IPv6 untagged" \ + "$smac > $dmac, ethertype IPv6 (0x86dd).*8: $H1_IPV6 > $H2_IPV6" + + check_rcv $h2 "$test_name: Multicast IPv6 untagged" \ + "$smac > $MACV6_ALLNODES, ethertype IPv6 (0x86dd).*: $h1_ipv6_lladdr > $IPV6_ALLNODES" + + for vid in "${vids[@]}"; do + check_rcv $h2 "$test_name: Unicast non-IP VID $vid" \ + "$smac > $dmac, ethertype 802.1Q (0x8100).*vlan $vid,.*length 4" + + check_rcv $h2 "$test_name: Multicast non-IP VID $vid" \ + "$smac > $NON_IP_MC, ethertype 802.1Q (0x8100).*vlan $vid,.*length 4" + + check_rcv $h2 "$test_name: Broadcast non-IP VID $vid" \ + "$smac > $BC, ethertype 802.1Q (0x8100).*vlan $vid,.*length 4" + + check_rcv $h2 "$test_name: Unicast IPv4 VID $vid" \ + "$smac > $dmac, ethertype 802.1Q (0x8100).*vlan $vid,.*ethertype IPv4 (0x0800), $H1_IPV4 > $H2_IPV4" + + check_rcv $h2 "$test_name: Multicast IPv4 VID $vid" \ + "$smac > $MACV4_ALLNODES, ethertype 802.1Q (0x8100).*vlan $vid,.*ethertype IPv4 (0x0800), $H1_IPV4 > $IPV4_ALLNODES" + + check_rcv $h2 "$test_name: Unicast IPv6 VID $vid" \ + "$smac > $dmac, ethertype 802.1Q (0x8100).*vlan $vid,.*ethertype IPv6 (0x86dd), $H1_IPV6 > $H2_IPV6" + + check_rcv $h2 "$test_name: Multicast IPv6 VID $vid" \ + "$smac > $MACV6_ALLNODES, ethertype 802.1Q (0x8100).*vlan $vid,.*ethertype IPv6 (0x86dd), $h1_ipv6_lladdr > $IPV6_ALLNODES" + done + + tcpdump_cleanup $h2 +} + +standalone() +{ + run_test "Standalone switch ports" +} + +two_bridges() +{ + ip link add br0 type bridge && ip link set br0 up + ip link add br1 type bridge && ip link set br1 up + ip link set $swp1 master br0 + ip link set $swp2 master br1 + + run_test "Switch ports in different bridges" + + ip link del br1 + ip link del br0 +} + +one_bridge_two_pvids() +{ + ip link add br0 type bridge vlan_filtering 1 vlan_default_pvid 0 + ip link set br0 up + ip link set $swp1 master br0 + ip link set $swp2 master br0 + + bridge vlan add dev $swp1 vid 1 pvid untagged + bridge vlan add dev $swp1 vid 2 pvid untagged + + run_test "Switch ports in VLAN-aware bridge with different PVIDs" + + ip link del br0 +} + +h1_create() +{ + simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64 +} + +h1_destroy() +{ + simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64 +} + +h2_create() +{ + simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64 +} + +h2_destroy() +{ + simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64 +} + +cleanup() +{ + pre_cleanup + + h2_destroy + h1_destroy + + vrf_cleanup +} + +setup_prepare() +{ + vrf_prepare + + h1_create + h2_create + # we call simple_if_init from the test itself, but setup_wait expects + # that we call it from here, and waits until the interfaces are up + ip link set dev $swp1 up + ip link set dev $swp2 up +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tsn_lib.sh b/tools/testing/selftests/net/forwarding/tsn_lib.sh new file mode 100644 index 000000000000..60a1423e8116 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/tsn_lib.sh @@ -0,0 +1,235 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright 2021-2022 NXP + +REQUIRE_ISOCHRON=${REQUIRE_ISOCHRON:=yes} +REQUIRE_LINUXPTP=${REQUIRE_LINUXPTP:=yes} + +# Tunables +UTC_TAI_OFFSET=37 +ISOCHRON_CPU=1 + +if [[ "$REQUIRE_ISOCHRON" = "yes" ]]; then + # https://github.com/vladimiroltean/tsn-scripts + # WARNING: isochron versions pre-1.0 are unstable, + # always use the latest version + require_command isochron +fi +if [[ "$REQUIRE_LINUXPTP" = "yes" ]]; then + require_command phc2sys + require_command ptp4l +fi + +phc2sys_start() +{ + local if_name=$1 + local uds_address=$2 + local extra_args="" + + if ! [ -z "${uds_address}" ]; then + extra_args="${extra_args} -z ${uds_address}" + fi + + phc2sys_log="$(mktemp)" + + chrt -f 10 phc2sys -m \ + -c ${if_name} \ + -s CLOCK_REALTIME \ + -O ${UTC_TAI_OFFSET} \ + --step_threshold 0.00002 \ + --first_step_threshold 0.00002 \ + ${extra_args} \ + > "${phc2sys_log}" 2>&1 & + phc2sys_pid=$! + + echo "phc2sys logs to ${phc2sys_log} and has pid ${phc2sys_pid}" + + sleep 1 +} + +phc2sys_stop() +{ + { kill ${phc2sys_pid} && wait ${phc2sys_pid}; } 2> /dev/null + rm "${phc2sys_log}" 2> /dev/null +} + +ptp4l_start() +{ + local if_name=$1 + local slave_only=$2 + local uds_address=$3 + local log="ptp4l_log_${if_name}" + local pid="ptp4l_pid_${if_name}" + local extra_args="" + + if [ "${slave_only}" = true ]; then + extra_args="${extra_args} -s" + fi + + # declare dynamic variables ptp4l_log_${if_name} and ptp4l_pid_${if_name} + # as global, so that they can be referenced later + declare -g "${log}=$(mktemp)" + + chrt -f 10 ptp4l -m -2 -P \ + -i ${if_name} \ + --step_threshold 0.00002 \ + --first_step_threshold 0.00002 \ + --tx_timestamp_timeout 100 \ + --uds_address="${uds_address}" \ + ${extra_args} \ + > "${!log}" 2>&1 & + declare -g "${pid}=$!" + + echo "ptp4l for interface ${if_name} logs to ${!log} and has pid ${!pid}" + + sleep 1 +} + +ptp4l_stop() +{ + local if_name=$1 + local log="ptp4l_log_${if_name}" + local pid="ptp4l_pid_${if_name}" + + { kill ${!pid} && wait ${!pid}; } 2> /dev/null + rm "${!log}" 2> /dev/null +} + +cpufreq_max() +{ + local cpu=$1 + local freq="cpu${cpu}_freq" + local governor="cpu${cpu}_governor" + + # Kernel may be compiled with CONFIG_CPU_FREQ disabled + if ! [ -d /sys/bus/cpu/devices/cpu${cpu}/cpufreq ]; then + return + fi + + # declare dynamic variables cpu${cpu}_freq and cpu${cpu}_governor as + # global, so they can be referenced later + declare -g "${freq}=$(cat /sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_min_freq)" + declare -g "${governor}=$(cat /sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_governor)" + + cat /sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_max_freq > \ + /sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_min_freq + echo -n "performance" > \ + /sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_governor +} + +cpufreq_restore() +{ + local cpu=$1 + local freq="cpu${cpu}_freq" + local governor="cpu${cpu}_governor" + + if ! [ -d /sys/bus/cpu/devices/cpu${cpu}/cpufreq ]; then + return + fi + + echo "${!freq}" > /sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_min_freq + echo -n "${!governor}" > \ + /sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_governor +} + +isochron_recv_start() +{ + local if_name=$1 + local uds=$2 + local extra_args=$3 + + if ! [ -z "${uds}" ]; then + extra_args="--unix-domain-socket ${uds}" + fi + + isochron rcv \ + --interface ${if_name} \ + --sched-priority 98 \ + --sched-fifo \ + --utc-tai-offset ${UTC_TAI_OFFSET} \ + --quiet \ + ${extra_args} & \ + isochron_pid=$! + + sleep 1 +} + +isochron_recv_stop() +{ + { kill ${isochron_pid} && wait ${isochron_pid}; } 2> /dev/null +} + +isochron_do() +{ + local sender_if_name=$1; shift + local receiver_if_name=$1; shift + local sender_uds=$1; shift + local receiver_uds=$1; shift + local base_time=$1; shift + local cycle_time=$1; shift + local shift_time=$1; shift + local num_pkts=$1; shift + local vid=$1; shift + local priority=$1; shift + local dst_ip=$1; shift + local isochron_dat=$1; shift + local extra_args="" + local receiver_extra_args="" + local vrf="$(master_name_get ${sender_if_name})" + local use_l2="true" + + if ! [ -z "${dst_ip}" ]; then + use_l2="false" + fi + + if ! [ -z "${vrf}" ]; then + dst_ip="${dst_ip}%${vrf}" + fi + + if ! [ -z "${vid}" ]; then + vid="--vid=${vid}" + fi + + if [ -z "${receiver_uds}" ]; then + extra_args="${extra_args} --omit-remote-sync" + fi + + if ! [ -z "${shift_time}" ]; then + extra_args="${extra_args} --shift-time=${shift_time}" + fi + + if [ "${use_l2}" = "true" ]; then + extra_args="${extra_args} --l2 --etype=0xdead ${vid}" + receiver_extra_args="--l2 --etype=0xdead" + else + extra_args="${extra_args} --l4 --ip-destination=${dst_ip}" + receiver_extra_args="--l4" + fi + + cpufreq_max ${ISOCHRON_CPU} + + isochron_recv_start "${h2}" "${receiver_uds}" "${receiver_extra_args}" + + isochron send \ + --interface ${sender_if_name} \ + --unix-domain-socket ${sender_uds} \ + --priority ${priority} \ + --base-time ${base_time} \ + --cycle-time ${cycle_time} \ + --num-frames ${num_pkts} \ + --frame-size 64 \ + --txtime \ + --utc-tai-offset ${UTC_TAI_OFFSET} \ + --cpu-mask $((1 << ${ISOCHRON_CPU})) \ + --sched-fifo \ + --sched-priority 98 \ + --client 127.0.0.1 \ + --sync-threshold 5000 \ + --output-file ${isochron_dat} \ + ${extra_args} \ + --quiet + + isochron_recv_stop + + cpufreq_restore ${ISOCHRON_CPU} +} diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index d36b7da5082a..38021a0dd527 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -12,6 +12,9 @@ CONFIG_NF_TABLES=m CONFIG_NFT_COMPAT=m CONFIG_NETFILTER_XTABLES=m CONFIG_NETFILTER_XT_MATCH_BPF=m +CONFIG_NETFILTER_XT_MATCH_LENGTH=m +CONFIG_NETFILTER_XT_MATCH_STATISTIC=m +CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NF_TABLES_INET=y CONFIG_NFT_TPROXY=m CONFIG_NFT_SOCKET=m @@ -19,3 +22,8 @@ CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_MULTIPLE_TABLES=y CONFIG_IP_NF_TARGET_REJECT=m CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_NET_ACT_CSUM=m +CONFIG_NET_ACT_PEDIT=m +CONFIG_NET_CLS_ACT=y +CONFIG_NET_CLS_FW=m +CONFIG_NET_SCH_INGRESS=m diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index ff821025d309..9dd43d7d957b 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -71,6 +71,43 @@ chk_msk_remote_key_nr() __chk_nr "grep -c remote_key" $* } +__chk_listen() +{ + local filter="$1" + local expected=$2 + + shift 2 + msg=$* + + nr=$(ss -N $ns -Ml "$filter" | grep -c LISTEN) + printf "%-50s" "$msg" + + if [ $nr != $expected ]; then + echo "[ fail ] expected $expected found $nr" + ret=$test_cnt + else + echo "[ ok ]" + fi +} + +chk_msk_listen() +{ + lport=$1 + local msg="check for listen socket" + + # destination port search should always return empty list + __chk_listen "dport $lport" 0 "listen match for dport $lport" + + # should return 'our' mptcp listen socket + __chk_listen "sport $lport" 1 "listen match for sport $lport" + + __chk_listen "src inet:0.0.0.0:$lport" 1 "listen match for saddr and sport" + + __chk_listen "" 1 "all listen sockets" + + nr=$(ss -Ml $filter | wc -l) +} + # $1: ns, $2: port wait_local_port_listen() { @@ -113,6 +150,7 @@ echo "a" | \ 0.0.0.0 >/dev/null & wait_local_port_listen $ns 10000 chk_msk_nr 0 "no msk on netns creation" +chk_msk_listen 10000 echo "b" | \ timeout ${timeout_test} \ diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 7314257d248a..d1de1e7702fb 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -70,6 +70,7 @@ init_partial() ip netns add $netns || exit $ksft_skip ip -net $netns link set lo up ip netns exec $netns sysctl -q net.mptcp.enabled=1 + ip netns exec $netns sysctl -q net.mptcp.pm_type=0 ip netns exec $netns sysctl -q net.ipv4.conf.all.rp_filter=0 ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0 if [ $checksum -eq 1 ]; then @@ -266,6 +267,58 @@ reset_with_allow_join_id0() ip netns exec $ns2 sysctl -q net.mptcp.allow_join_initial_addr_port=$ns2_enable } +# Modify TCP payload without corrupting the TCP packet +# +# This rule inverts a 8-bit word at byte offset 148 for the 2nd TCP ACK packets +# carrying enough data. +# Once it is done, the TCP Checksum field is updated so the packet is still +# considered as valid at the TCP level. +# Because the MPTCP checksum, covering the TCP options and data, has not been +# updated, the modification will be detected and an MP_FAIL will be emitted: +# what we want to validate here without corrupting "random" MPTCP options. +# +# To avoid having tc producing this pr_info() message for each TCP ACK packets +# not carrying enough data: +# +# tc action pedit offset 162 out of bounds +# +# Netfilter is used to mark packets with enough data. +reset_with_fail() +{ + reset "${1}" || return 1 + + ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=1 + ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=1 + + check_invert=1 + validate_checksum=1 + local i="$2" + local ip="${3:-4}" + local tables + + tables="iptables" + if [ $ip -eq 6 ]; then + tables="ip6tables" + fi + + ip netns exec $ns2 $tables \ + -t mangle \ + -A OUTPUT \ + -o ns2eth$i \ + -p tcp \ + -m length --length 150:9999 \ + -m statistic --mode nth --packet 1 --every 99999 \ + -j MARK --set-mark 42 || exit 1 + + tc -n $ns2 qdisc add dev ns2eth$i clsact || exit 1 + tc -n $ns2 filter add dev ns2eth$i egress \ + protocol ip prio 1000 \ + handle 42 fw \ + action pedit munge offset 148 u8 invert \ + pipe csum tcp \ + index 100 || exit 1 +} + fail_test() { ret=1 @@ -961,6 +1014,7 @@ chk_csum_nr() local csum_ns2=${2:-0} local count local dump_stats + local extra_msg="" local allow_multi_errors_ns1=0 local allow_multi_errors_ns2=0 @@ -976,6 +1030,9 @@ chk_csum_nr() printf "%-${nr_blank}s %s" " " "sum" count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}') [ -z "$count" ] && count=0 + if [ "$count" != "$csum_ns1" ]; then + extra_msg="$extra_msg ns1=$count" + fi if { [ "$count" != $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 0 ]; } || { [ "$count" -lt $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 1 ]; }; then echo "[fail] got $count data checksum error[s] expected $csum_ns1" @@ -987,28 +1044,58 @@ chk_csum_nr() echo -n " - csum " count=$(ip netns exec $ns2 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}') [ -z "$count" ] && count=0 + if [ "$count" != "$csum_ns2" ]; then + extra_msg="$extra_msg ns2=$count" + fi if { [ "$count" != $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 0 ]; } || { [ "$count" -lt $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 1 ]; }; then echo "[fail] got $count data checksum error[s] expected $csum_ns2" fail_test dump_stats=1 else - echo "[ ok ]" + echo -n "[ ok ]" fi [ "${dump_stats}" = 1 ] && dump_stats + + echo "$extra_msg" } chk_fail_nr() { local fail_tx=$1 local fail_rx=$2 + local ns_invert=${3:-""} local count local dump_stats + local ns_tx=$ns1 + local ns_rx=$ns2 + local extra_msg="" + local allow_tx_lost=0 + local allow_rx_lost=0 + + if [[ $ns_invert = "invert" ]]; then + ns_tx=$ns2 + ns_rx=$ns1 + extra_msg=" invert" + fi + + if [[ "${fail_tx}" = "-"* ]]; then + allow_tx_lost=1 + fail_tx=${fail_tx:1} + fi + if [[ "${fail_rx}" = "-"* ]]; then + allow_rx_lost=1 + fail_rx=${fail_rx:1} + fi printf "%-${nr_blank}s %s" " " "ftx" - count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtMPFailTx | awk '{print $2}') + count=$(ip netns exec $ns_tx nstat -as | grep MPTcpExtMPFailTx | awk '{print $2}') [ -z "$count" ] && count=0 if [ "$count" != "$fail_tx" ]; then + extra_msg="$extra_msg,tx=$count" + fi + if { [ "$count" != "$fail_tx" ] && [ $allow_tx_lost -eq 0 ]; } || + { [ "$count" -gt "$fail_tx" ] && [ $allow_tx_lost -eq 1 ]; }; then echo "[fail] got $count MP_FAIL[s] TX expected $fail_tx" fail_test dump_stats=1 @@ -1017,17 +1104,23 @@ chk_fail_nr() fi echo -n " - failrx" - count=$(ip netns exec $ns2 nstat -as | grep MPTcpExtMPFailRx | awk '{print $2}') + count=$(ip netns exec $ns_rx nstat -as | grep MPTcpExtMPFailRx | awk '{print $2}') [ -z "$count" ] && count=0 if [ "$count" != "$fail_rx" ]; then + extra_msg="$extra_msg,rx=$count" + fi + if { [ "$count" != "$fail_rx" ] && [ $allow_rx_lost -eq 0 ]; } || + { [ "$count" -gt "$fail_rx" ] && [ $allow_rx_lost -eq 1 ]; }; then echo "[fail] got $count MP_FAIL[s] RX expected $fail_rx" fail_test dump_stats=1 else - echo "[ ok ]" + echo -n "[ ok ]" fi [ "${dump_stats}" = 1 ] && dump_stats + + echo "$extra_msg" } chk_fclose_nr() @@ -1106,6 +1199,38 @@ chk_rst_nr() echo "$extra_msg" } +chk_infi_nr() +{ + local infi_tx=$1 + local infi_rx=$2 + local count + local dump_stats + + printf "%-${nr_blank}s %s" " " "itx" + count=$(ip netns exec $ns2 nstat -as | grep InfiniteMapTx | awk '{print $2}') + [ -z "$count" ] && count=0 + if [ "$count" != "$infi_tx" ]; then + echo "[fail] got $count infinite map[s] TX expected $infi_tx" + fail_test + dump_stats=1 + else + echo -n "[ ok ]" + fi + + echo -n " - infirx" + count=$(ip netns exec $ns1 nstat -as | grep InfiniteMapRx | awk '{print $2}') + [ -z "$count" ] && count=0 + if [ "$count" != "$infi_rx" ]; then + echo "[fail] got $count infinite map[s] RX expected $infi_rx" + fail_test + dump_stats=1 + else + echo "[ ok ]" + fi + + [ "${dump_stats}" = 1 ] && dump_stats +} + chk_join_nr() { local syn_nr=$1 @@ -1115,7 +1240,8 @@ chk_join_nr() local csum_ns2=${5:-0} local fail_nr=${6:-0} local rst_nr=${7:-0} - local corrupted_pkts=${8:-0} + local infi_nr=${8:-0} + local corrupted_pkts=${9:-0} local count local dump_stats local with_cookie @@ -1166,10 +1292,11 @@ chk_join_nr() echo "[ ok ]" fi [ "${dump_stats}" = 1 ] && dump_stats - if [ $checksum -eq 1 ]; then + if [ $validate_checksum -eq 1 ]; then chk_csum_nr $csum_ns1 $csum_ns2 chk_fail_nr $fail_nr $fail_nr chk_rst_nr $rst_nr $rst_nr + chk_infi_nr $infi_nr $infi_nr fi } @@ -1485,6 +1612,13 @@ wait_attempt_fail() return 1 } +set_userspace_pm() +{ + local ns=$1 + + ip netns exec $ns sysctl -q net.mptcp.pm_type=1 +} + subflows_tests() { if reset "no JOIN"; then @@ -2556,6 +2690,90 @@ fastclose_tests() fi } +pedit_action_pkts() +{ + tc -n $ns2 -j -s action show action pedit index 100 | \ + sed 's/.*"packets":\([0-9]\+\),.*/\1/' +} + +fail_tests() +{ + # single subflow + if reset_with_fail "Infinite map" 1; then + run_tests $ns1 $ns2 10.0.1.1 128 + chk_join_nr 0 0 0 +1 +0 1 0 1 "$(pedit_action_pkts)" + chk_fail_nr 1 -1 invert + fi +} + +userspace_tests() +{ + # userspace pm type prevents add_addr + if reset "userspace pm type prevents add_addr"; then + set_userspace_pm $ns1 + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 0 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + chk_add_nr 0 0 + fi + + # userspace pm type does not echo add_addr without daemon + if reset "userspace pm no echo w/o daemon"; then + set_userspace_pm $ns2 + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 0 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + chk_add_nr 1 0 + fi + + # userspace pm type rejects join + if reset "userspace pm type rejects join"; then + set_userspace_pm $ns1 + pm_nl_set_limits $ns1 1 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 0 + fi + + # userspace pm type does not send join + if reset "userspace pm type does not send join"; then + set_userspace_pm $ns2 + pm_nl_set_limits $ns1 1 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + fi + + # userspace pm type prevents mp_prio + if reset "userspace pm type prevents mp_prio"; then + set_userspace_pm $ns1 + pm_nl_set_limits $ns1 1 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup + chk_join_nr 1 1 0 + chk_prio_nr 0 0 + fi + + # userspace pm type prevents rm_addr + if reset "userspace pm type prevents rm_addr"; then + set_userspace_pm $ns1 + set_userspace_pm $ns2 + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 0 0 -1 slow + chk_join_nr 0 0 0 + chk_rm_nr 0 0 + fi +} + implicit_tests() { # userspace pm type prevents add_addr @@ -2624,6 +2842,8 @@ all_tests_sorted=( d@deny_join_id0_tests m@fullmesh_tests z@fastclose_tests + F@fail_tests + u@userspace_tests I@implicit_tests ) diff --git a/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh b/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh new file mode 100755 index 000000000000..f508657ee126 --- /dev/null +++ b/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh @@ -0,0 +1,255 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test is for the accept_unsolicited_na feature to +# enable RFC9131 behaviour. The following is the test-matrix. +# drop accept fwding behaviour +# ---- ------ ------ ---------------------------------------------- +# 1 X X Drop NA packet and don't pass up the stack +# 0 0 X Pass NA packet up the stack, don't update NC +# 0 1 0 Pass NA packet up the stack, don't update NC +# 0 1 1 Pass NA packet up the stack, and add a STALE +# NC entry + +ret=0 +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +PAUSE_ON_FAIL=no +PAUSE=no + +HOST_NS="ns-host" +ROUTER_NS="ns-router" + +HOST_INTF="veth-host" +ROUTER_INTF="veth-router" + +ROUTER_ADDR="2000:20::1" +HOST_ADDR="2000:20::2" +SUBNET_WIDTH=64 +ROUTER_ADDR_WITH_MASK="${ROUTER_ADDR}/${SUBNET_WIDTH}" +HOST_ADDR_WITH_MASK="${HOST_ADDR}/${SUBNET_WIDTH}" + +IP_HOST="ip -6 -netns ${HOST_NS}" +IP_HOST_EXEC="ip netns exec ${HOST_NS}" +IP_ROUTER="ip -6 -netns ${ROUTER_NS}" +IP_ROUTER_EXEC="ip netns exec ${ROUTER_NS}" + +tcpdump_stdout= +tcpdump_stderr= + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + printf " TEST: %-60s [ OK ]\n" "${msg}" + nsuccess=$((nsuccess+1)) + else + ret=1 + nfail=$((nfail+1)) + printf " TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi + + if [ "${PAUSE}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi +} + +setup() +{ + set -e + + local drop_unsolicited_na=$1 + local accept_unsolicited_na=$2 + local forwarding=$3 + + # Setup two namespaces and a veth tunnel across them. + # On end of the tunnel is a router and the other end is a host. + ip netns add ${HOST_NS} + ip netns add ${ROUTER_NS} + ${IP_ROUTER} link add ${ROUTER_INTF} type veth \ + peer name ${HOST_INTF} netns ${HOST_NS} + + # Enable IPv6 on both router and host, and configure static addresses. + # The router here is the DUT + # Setup router configuration as specified by the arguments. + # forwarding=0 case is to check that a non-router + # doesn't add neighbour entries. + ROUTER_CONF=net.ipv6.conf.${ROUTER_INTF} + ${IP_ROUTER_EXEC} sysctl -qw \ + ${ROUTER_CONF}.forwarding=${forwarding} + ${IP_ROUTER_EXEC} sysctl -qw \ + ${ROUTER_CONF}.drop_unsolicited_na=${drop_unsolicited_na} + ${IP_ROUTER_EXEC} sysctl -qw \ + ${ROUTER_CONF}.accept_unsolicited_na=${accept_unsolicited_na} + ${IP_ROUTER_EXEC} sysctl -qw ${ROUTER_CONF}.disable_ipv6=0 + ${IP_ROUTER} addr add ${ROUTER_ADDR_WITH_MASK} dev ${ROUTER_INTF} + + # Turn on ndisc_notify on host interface so that + # the host sends unsolicited NAs. + HOST_CONF=net.ipv6.conf.${HOST_INTF} + ${IP_HOST_EXEC} sysctl -qw ${HOST_CONF}.ndisc_notify=1 + ${IP_HOST_EXEC} sysctl -qw ${HOST_CONF}.disable_ipv6=0 + ${IP_HOST} addr add ${HOST_ADDR_WITH_MASK} dev ${HOST_INTF} + + set +e +} + +start_tcpdump() { + set -e + tcpdump_stdout=`mktemp` + tcpdump_stderr=`mktemp` + ${IP_ROUTER_EXEC} timeout 15s \ + tcpdump --immediate-mode -tpni ${ROUTER_INTF} -c 1 \ + "icmp6 && icmp6[0] == 136 && src ${HOST_ADDR}" \ + > ${tcpdump_stdout} 2> /dev/null + set +e +} + +cleanup_tcpdump() +{ + set -e + [[ ! -z ${tcpdump_stdout} ]] && rm -f ${tcpdump_stdout} + [[ ! -z ${tcpdump_stderr} ]] && rm -f ${tcpdump_stderr} + tcpdump_stdout= + tcpdump_stderr= + set +e +} + +cleanup() +{ + cleanup_tcpdump + ip netns del ${HOST_NS} + ip netns del ${ROUTER_NS} +} + +link_up() { + set -e + ${IP_ROUTER} link set dev ${ROUTER_INTF} up + ${IP_HOST} link set dev ${HOST_INTF} up + set +e +} + +verify_ndisc() { + local drop_unsolicited_na=$1 + local accept_unsolicited_na=$2 + local forwarding=$3 + + neigh_show_output=$(${IP_ROUTER} neigh show \ + to ${HOST_ADDR} dev ${ROUTER_INTF} nud stale) + if [ ${drop_unsolicited_na} -eq 0 ] && \ + [ ${accept_unsolicited_na} -eq 1 ] && \ + [ ${forwarding} -eq 1 ]; then + # Neighbour entry expected to be present for 011 case + [[ ${neigh_show_output} ]] + else + # Neighbour entry expected to be absent for all other cases + [[ -z ${neigh_show_output} ]] + fi +} + +test_unsolicited_na_common() +{ + # Setup the test bed, but keep links down + setup $1 $2 $3 + + # Bring the link up, wait for the NA, + # and add a delay to ensure neighbour processing is done. + link_up + start_tcpdump + + # Verify the neighbour table + verify_ndisc $1 $2 $3 + +} + +test_unsolicited_na_combination() { + test_unsolicited_na_common $1 $2 $3 + test_msg=("test_unsolicited_na: " + "drop_unsolicited_na=$1 " + "accept_unsolicited_na=$2 " + "forwarding=$3") + log_test $? 0 "${test_msg[*]}" + cleanup +} + +test_unsolicited_na_combinations() { + # Args: drop_unsolicited_na accept_unsolicited_na forwarding + + # Expect entry + test_unsolicited_na_combination 0 1 1 + + # Expect no entry + test_unsolicited_na_combination 0 0 0 + test_unsolicited_na_combination 0 0 1 + test_unsolicited_na_combination 0 1 0 + test_unsolicited_na_combination 1 0 0 + test_unsolicited_na_combination 1 0 1 + test_unsolicited_na_combination 1 1 0 + test_unsolicited_na_combination 1 1 1 +} + +############################################################################### +# usage + +usage() +{ + cat <<EOF +usage: ${0##*/} OPTS + -p Pause on fail + -P Pause after each test before cleanup +EOF +} + +############################################################################### +# main + +while getopts :pPh o +do + case $o in + p) PAUSE_ON_FAIL=yes;; + P) PAUSE=yes;; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + +# make sure we don't pause twice +[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit $ksft_skip; +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v tcpdump)" ]; then + echo "SKIP: Could not run test without tcpdump tool" + exit $ksft_skip +fi + +# start clean +cleanup &> /dev/null + +test_unsolicited_na_combinations + +printf "\nTests passed: %3d\n" ${nsuccess} +printf "Tests failed: %3d\n" ${nfail} + +exit $ret diff --git a/tools/testing/selftests/net/vrf_strict_mode_test.sh b/tools/testing/selftests/net/vrf_strict_mode_test.sh index 865d53c1781c..417d214264f3 100755 --- a/tools/testing/selftests/net/vrf_strict_mode_test.sh +++ b/tools/testing/selftests/net/vrf_strict_mode_test.sh @@ -14,6 +14,8 @@ INIT_NETNS_NAME="init" PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} +TESTS="init testns mix" + log_test() { local rc=$1 @@ -262,6 +264,8 @@ cleanup() vrf_strict_mode_tests_init() { + log_section "VRF strict_mode test on init network namespace" + vrf_strict_mode_check_support init strict_mode_check_default init @@ -292,6 +296,8 @@ vrf_strict_mode_tests_init() vrf_strict_mode_tests_testns() { + log_section "VRF strict_mode test on testns network namespace" + vrf_strict_mode_check_support testns strict_mode_check_default testns @@ -318,6 +324,8 @@ vrf_strict_mode_tests_testns() vrf_strict_mode_tests_mix() { + log_section "VRF strict_mode test mixing init and testns network namespaces" + read_strict_mode_compare_and_check init 1 read_strict_mode_compare_and_check testns 0 @@ -341,18 +349,30 @@ vrf_strict_mode_tests_mix() read_strict_mode_compare_and_check testns 0 } -vrf_strict_mode_tests() -{ - log_section "VRF strict_mode test on init network namespace" - vrf_strict_mode_tests_init +################################################################################ +# usage - log_section "VRF strict_mode test on testns network namespace" - vrf_strict_mode_tests_testns +usage() +{ + cat <<EOF +usage: ${0##*/} OPTS - log_section "VRF strict_mode test mixing init and testns network namespaces" - vrf_strict_mode_tests_mix + -t <test> Test(s) to run (default: all) + (options: $TESTS) +EOF } +################################################################################ +# main + +while getopts ":t:h" opt; do + case $opt in + t) TESTS=$OPTARG;; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + vrf_strict_mode_check_support() { local nsname=$1 @@ -391,7 +411,17 @@ fi cleanup &> /dev/null setup -vrf_strict_mode_tests +for t in $TESTS +do + case $t in + vrf_strict_mode_tests_init|init) vrf_strict_mode_tests_init;; + vrf_strict_mode_tests_testns|testns) vrf_strict_mode_tests_testns;; + vrf_strict_mode_tests_mix|mix) vrf_strict_mode_tests_mix;; + + help) echo "Test names: $TESTS"; exit 0;; + + esac +done cleanup print_log_test_results diff --git a/tools/testing/selftests/netfilter/nft_fib.sh b/tools/testing/selftests/netfilter/nft_fib.sh index 695a1958723f..fd76b69635a4 100755 --- a/tools/testing/selftests/netfilter/nft_fib.sh +++ b/tools/testing/selftests/netfilter/nft_fib.sh @@ -66,6 +66,20 @@ table inet filter { EOF } +load_pbr_ruleset() { + local netns=$1 + +ip netns exec ${netns} nft -f /dev/stdin <<EOF +table inet filter { + chain forward { + type filter hook forward priority raw; + fib saddr . iif oif gt 0 accept + log drop + } +} +EOF +} + load_ruleset_count() { local netns=$1 @@ -219,4 +233,40 @@ sleep 2 ip netns exec ${ns1} ping -c 3 -q 1c3::c01d > /dev/null check_fib_counter 3 ${nsrouter} 1c3::c01d || exit 1 +# delete all rules +ip netns exec ${ns1} nft flush ruleset +ip netns exec ${ns2} nft flush ruleset +ip netns exec ${nsrouter} nft flush ruleset + +ip -net ${ns1} addr add 10.0.1.99/24 dev eth0 +ip -net ${ns1} addr add dead:1::99/64 dev eth0 + +ip -net ${ns1} addr del 10.0.2.99/24 dev eth0 +ip -net ${ns1} addr del dead:2::99/64 dev eth0 + +ip -net ${nsrouter} addr del dead:2::1/64 dev veth0 + +# ... pbr ruleset for the router, check iif+oif. +load_pbr_ruleset ${nsrouter} +if [ $? -ne 0 ] ; then + echo "SKIP: Could not load fib forward ruleset" + exit $ksft_skip +fi + +ip -net ${nsrouter} rule add from all table 128 +ip -net ${nsrouter} rule add from all iif veth0 table 129 +ip -net ${nsrouter} route add table 128 to 10.0.1.0/24 dev veth0 +ip -net ${nsrouter} route add table 129 to 10.0.2.0/24 dev veth1 + +# drop main ipv4 table +ip -net ${nsrouter} -4 rule delete table main + +test_ping 10.0.2.99 dead:2::99 +if [ $? -ne 0 ] ; then + ip -net ${nsrouter} nft list ruleset + echo "FAIL: fib mismatch in pbr setup" + exit 1 +fi + +echo "PASS: fib expression forward check with policy based routing" exit 0 diff --git a/tools/testing/selftests/pid_namespace/Makefile b/tools/testing/selftests/pid_namespace/Makefile index dcaefa224ca0..edafaca1aeb3 100644 --- a/tools/testing/selftests/pid_namespace/Makefile +++ b/tools/testing/selftests/pid_namespace/Makefile @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += -g -I../../../../usr/include/ -TEST_GEN_PROGS := regression_enomem +TEST_GEN_PROGS = regression_enomem -include ../lib.mk +LOCAL_HDRS += $(selfdir)/pidfd/pidfd.h -$(OUTPUT)/regression_enomem: regression_enomem.c ../pidfd/pidfd.h +include ../lib.mk diff --git a/tools/testing/selftests/pidfd/pidfd_wait.c b/tools/testing/selftests/pidfd/pidfd_wait.c index 17999e082aa7..070c1c876df1 100644 --- a/tools/testing/selftests/pidfd/pidfd_wait.c +++ b/tools/testing/selftests/pidfd/pidfd_wait.c @@ -95,7 +95,6 @@ TEST(wait_states) .flags = CLONE_PIDFD | CLONE_PARENT_SETTID, .exit_signal = SIGCHLD, }; - int ret; pid_t pid; siginfo_t info = { .si_signo = 0, diff --git a/tools/testing/selftests/proc/proc-pid-vm.c b/tools/testing/selftests/proc/proc-pid-vm.c index 18a3bde8bc96..28604c9f805c 100644 --- a/tools/testing/selftests/proc/proc-pid-vm.c +++ b/tools/testing/selftests/proc/proc-pid-vm.c @@ -46,6 +46,8 @@ #include <sys/time.h> #include <sys/resource.h> +#include "../kselftest.h" + static inline long sys_execveat(int dirfd, const char *pathname, char **argv, char **envp, int flags) { return syscall(SYS_execveat, dirfd, pathname, argv, envp, flags); @@ -368,7 +370,7 @@ int main(void) }; int i; - for (i = 0; i < sizeof(S)/sizeof(S[0]); i++) { + for (i = 0; i < ARRAY_SIZE(S); i++) { assert(memmem(buf, rv, S[i], strlen(S[i]))); } @@ -417,7 +419,7 @@ int main(void) }; int i; - for (i = 0; i < sizeof(S)/sizeof(S[0]); i++) { + for (i = 0; i < ARRAY_SIZE(S); i++) { assert(memmem(buf, rv, S[i], strlen(S[i]))); } } diff --git a/tools/testing/selftests/sysctl/sysctl.sh b/tools/testing/selftests/sysctl/sysctl.sh index 19515dcb7d04..f50778a3d744 100755 --- a/tools/testing/selftests/sysctl/sysctl.sh +++ b/tools/testing/selftests/sysctl/sysctl.sh @@ -40,6 +40,7 @@ ALL_TESTS="$ALL_TESTS 0004:1:1:uint_0001" ALL_TESTS="$ALL_TESTS 0005:3:1:int_0003" ALL_TESTS="$ALL_TESTS 0006:50:1:bitmap_0001" ALL_TESTS="$ALL_TESTS 0007:1:1:boot_int" +ALL_TESTS="$ALL_TESTS 0008:1:1:match_int" function allow_user_defaults() { @@ -785,6 +786,27 @@ sysctl_test_0007() return $ksft_skip } +sysctl_test_0008() +{ + TARGET="${SYSCTL}/match_int" + if [ ! -f $TARGET ]; then + echo "Skipping test for $TARGET as it is not present ..." + return $ksft_skip + fi + + echo -n "Testing if $TARGET is matched in kernel" + ORIG_VALUE=$(cat "${TARGET}") + + if [ $ORIG_VALUE -ne 1 ]; then + echo "TEST FAILED" + rc=1 + test_rc + fi + + echo "ok" + return 0 +} + list_tests() { echo "Test ID list:" @@ -800,6 +822,7 @@ list_tests() echo "0005 x $(get_test_count 0005) - tests proc_douintvec() array" echo "0006 x $(get_test_count 0006) - tests proc_do_large_bitmap()" echo "0007 x $(get_test_count 0007) - tests setting sysctl from kernel boot param" + echo "0008 x $(get_test_count 0008) - tests sysctl macro values match" } usage() diff --git a/tools/testing/selftests/vDSO/vdso_test_correctness.c b/tools/testing/selftests/vDSO/vdso_test_correctness.c index c4aea794725a..e691a3cf1491 100644 --- a/tools/testing/selftests/vDSO/vdso_test_correctness.c +++ b/tools/testing/selftests/vDSO/vdso_test_correctness.c @@ -20,6 +20,7 @@ #include <limits.h> #include "vdso_config.h" +#include "../kselftest.h" static const char **name; @@ -306,10 +307,8 @@ static void test_clock_gettime(void) return; } - for (int clock = 0; clock < sizeof(clocknames) / sizeof(clocknames[0]); - clock++) { + for (int clock = 0; clock < ARRAY_SIZE(clocknames); clock++) test_one_clock_gettime(clock, clocknames[clock]); - } /* Also test some invalid clock ids */ test_one_clock_gettime(-1, "invalid"); @@ -370,10 +369,8 @@ static void test_clock_gettime64(void) return; } - for (int clock = 0; clock < sizeof(clocknames) / sizeof(clocknames[0]); - clock++) { + for (int clock = 0; clock < ARRAY_SIZE(clocknames); clock++) test_one_clock_gettime64(clock, clocknames[clock]); - } /* Also test some invalid clock ids */ test_one_clock_gettime64(-1, "invalid"); diff --git a/tools/testing/selftests/vm/mremap_test.c b/tools/testing/selftests/vm/mremap_test.c index 7c0b0617b9f8..db0270127aeb 100644 --- a/tools/testing/selftests/vm/mremap_test.c +++ b/tools/testing/selftests/vm/mremap_test.c @@ -6,9 +6,11 @@ #include <errno.h> #include <stdlib.h> +#include <stdio.h> #include <string.h> #include <sys/mman.h> #include <time.h> +#include <stdbool.h> #include "../kselftest.h" @@ -64,6 +66,59 @@ enum { } /* + * Returns false if the requested remap region overlaps with an + * existing mapping (e.g text, stack) else returns true. + */ +static bool is_remap_region_valid(void *addr, unsigned long long size) +{ + void *remap_addr = NULL; + bool ret = true; + + /* Use MAP_FIXED_NOREPLACE flag to ensure region is not mapped */ + remap_addr = mmap(addr, size, PROT_READ | PROT_WRITE, + MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED, + -1, 0); + + if (remap_addr == MAP_FAILED) { + if (errno == EEXIST) + ret = false; + } else { + munmap(remap_addr, size); + } + + return ret; +} + +/* Returns mmap_min_addr sysctl tunable from procfs */ +static unsigned long long get_mmap_min_addr(void) +{ + FILE *fp; + int n_matched; + static unsigned long long addr; + + if (addr) + return addr; + + fp = fopen("/proc/sys/vm/mmap_min_addr", "r"); + if (fp == NULL) { + ksft_print_msg("Failed to open /proc/sys/vm/mmap_min_addr: %s\n", + strerror(errno)); + exit(KSFT_SKIP); + } + + n_matched = fscanf(fp, "%llu", &addr); + if (n_matched != 1) { + ksft_print_msg("Failed to read /proc/sys/vm/mmap_min_addr: %s\n", + strerror(errno)); + fclose(fp); + exit(KSFT_SKIP); + } + + fclose(fp); + return addr; +} + +/* * Returns the start address of the mapping on success, else returns * NULL on failure. */ @@ -71,11 +126,18 @@ static void *get_source_mapping(struct config c) { unsigned long long addr = 0ULL; void *src_addr = NULL; + unsigned long long mmap_min_addr; + + mmap_min_addr = get_mmap_min_addr(); + retry: addr += c.src_alignment; + if (addr < mmap_min_addr) + goto retry; + src_addr = mmap((void *) addr, c.region_size, PROT_READ | PROT_WRITE, - MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED, - -1, 0); + MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED, + -1, 0); if (src_addr == MAP_FAILED) { if (errno == EPERM || errno == EEXIST) goto retry; @@ -90,8 +152,10 @@ retry: * alignment in the tests. */ if (((unsigned long long) src_addr & (c.src_alignment - 1)) || - !((unsigned long long) src_addr & c.src_alignment)) + !((unsigned long long) src_addr & c.src_alignment)) { + munmap(src_addr, c.region_size); goto retry; + } if (!src_addr) goto error; @@ -140,9 +204,20 @@ static long long remap_region(struct config c, unsigned int threshold_mb, if (!((unsigned long long) addr & c.dest_alignment)) addr = (void *) ((unsigned long long) addr | c.dest_alignment); + /* Don't destroy existing mappings unless expected to overlap */ + while (!is_remap_region_valid(addr, c.region_size) && !c.overlapping) { + /* Check for unsigned overflow */ + if (addr + c.dest_alignment < addr) { + ksft_print_msg("Couldn't find a valid region to remap to\n"); + ret = -1; + goto out; + } + addr += c.dest_alignment; + } + clock_gettime(CLOCK_MONOTONIC, &t_start); dest_addr = mremap(src_addr, c.region_size, c.region_size, - MREMAP_MAYMOVE|MREMAP_FIXED, (char *) addr); + MREMAP_MAYMOVE|MREMAP_FIXED, (char *) addr); clock_gettime(CLOCK_MONOTONIC, &t_end); if (dest_addr == MAP_FAILED) { @@ -193,7 +268,7 @@ static void run_mremap_test_case(struct test test_case, int *failures, if (remap_time < 0) { if (test_case.expect_failure) - ksft_test_result_pass("%s\n\tExpected mremap failure\n", + ksft_test_result_xfail("%s\n\tExpected mremap failure\n", test_case.name); else { ksft_test_result_fail("%s\n", test_case.name); diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh index 3b265f140c25..352ba00cf26b 100755 --- a/tools/testing/selftests/vm/run_vmtests.sh +++ b/tools/testing/selftests/vm/run_vmtests.sh @@ -291,11 +291,16 @@ echo "-------------------" echo "running mremap_test" echo "-------------------" ./mremap_test -if [ $? -ne 0 ]; then +ret_val=$? + +if [ $ret_val -eq 0 ]; then + echo "[PASS]" +elif [ $ret_val -eq $ksft_skip ]; then + echo "[SKIP]" + exitcode=$ksft_skip +else echo "[FAIL]" exitcode=1 -else - echo "[PASS]" fi echo "-----------------" diff --git a/tools/testing/selftests/wireguard/qemu/arch/i686.config b/tools/testing/selftests/wireguard/qemu/arch/i686.config index a85025d7206e..a9b4fe795048 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/i686.config +++ b/tools/testing/selftests/wireguard/qemu/arch/i686.config @@ -1,3 +1,4 @@ +CONFIG_ACPI=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_CMDLINE_BOOL=y diff --git a/tools/testing/selftests/wireguard/qemu/arch/x86_64.config b/tools/testing/selftests/wireguard/qemu/arch/x86_64.config index 00a1ef4869d5..45dd53a0d760 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/x86_64.config +++ b/tools/testing/selftests/wireguard/qemu/arch/x86_64.config @@ -1,3 +1,4 @@ +CONFIG_ACPI=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_CMDLINE_BOOL=y diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 53df7d3893d3..0388c4d60af0 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -92,6 +92,10 @@ warn_32bit_failure: echo "If you are using a Fedora-like distribution, try:"; \ echo ""; \ echo " yum install glibc-devel.*i686"; \ + echo ""; \ + echo "If you are using a SUSE-like distribution, try:"; \ + echo ""; \ + echo " zypper install gcc-32bit glibc-devel-static-32bit"; \ exit 0; endif |