summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/Makefile40
-rw-r--r--tools/arch/arm64/include/asm/cputype.h2
-rw-r--r--tools/arch/x86/include/asm/amd-ibs.h2
-rw-r--r--tools/arch/x86/include/asm/disabled-features.h8
-rw-r--r--tools/arch/x86/include/asm/msr-index.h4
-rw-r--r--tools/bpf/bpftool/gen.c22
-rw-r--r--tools/build/Makefile.feature1
-rw-r--r--tools/build/feature/Makefile13
-rw-r--r--tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c7
-rwxr-xr-xtools/certs/print-cert-tbs-hash.sh91
-rw-r--r--tools/include/linux/objtool.h10
-rw-r--r--tools/include/linux/slab.h8
-rw-r--r--tools/include/nolibc/Makefile42
-rw-r--r--tools/include/nolibc/arch-aarch64.h199
-rw-r--r--tools/include/nolibc/arch-arm.h204
-rw-r--r--tools/include/nolibc/arch-i386.h219
-rw-r--r--tools/include/nolibc/arch-mips.h215
-rw-r--r--tools/include/nolibc/arch-riscv.h204
-rw-r--r--tools/include/nolibc/arch-x86_64.h215
-rw-r--r--tools/include/nolibc/arch.h32
-rw-r--r--tools/include/nolibc/ctype.h99
-rw-r--r--tools/include/nolibc/errno.h27
-rw-r--r--tools/include/nolibc/nolibc.h2540
-rw-r--r--tools/include/nolibc/signal.h22
-rw-r--r--tools/include/nolibc/std.h49
-rw-r--r--tools/include/nolibc/stdio.h306
-rw-r--r--tools/include/nolibc/stdlib.h423
-rw-r--r--tools/include/nolibc/string.h285
-rw-r--r--tools/include/nolibc/sys.h1247
-rw-r--r--tools/include/nolibc/time.h28
-rw-r--r--tools/include/nolibc/types.h205
-rw-r--r--tools/include/nolibc/unistd.h54
-rw-r--r--tools/include/uapi/linux/kvm.h10
-rw-r--r--tools/include/uapi/linux/vhost.h7
-rw-r--r--tools/lib/perf/evlist.c3
-rw-r--r--tools/lib/subcmd/parse-options.c17
-rw-r--r--tools/lib/thermal/.gitignore2
-rw-r--r--tools/lib/thermal/Build5
-rw-r--r--tools/lib/thermal/Makefile165
-rw-r--r--tools/lib/thermal/commands.c349
-rw-r--r--tools/lib/thermal/events.c164
-rw-r--r--tools/lib/thermal/include/thermal.h142
-rw-r--r--tools/lib/thermal/libthermal.map25
-rw-r--r--tools/lib/thermal/libthermal.pc.template12
-rw-r--r--tools/lib/thermal/sampling.c75
-rw-r--r--tools/lib/thermal/thermal.c135
-rw-r--r--tools/lib/thermal/thermal_nl.c215
-rw-r--r--tools/lib/thermal/thermal_nl.h46
-rw-r--r--tools/memory-model/README3
-rw-r--r--tools/objtool/Build12
-rw-r--r--tools/objtool/Documentation/objtool.txt (renamed from tools/objtool/Documentation/stack-validation.txt)122
-rw-r--r--tools/objtool/Makefile10
-rw-r--r--tools/objtool/arch/x86/decode.c2
-rw-r--r--tools/objtool/arch/x86/special.c2
-rw-r--r--tools/objtool/builtin-check.c156
-rw-r--r--tools/objtool/builtin-orc.c73
-rw-r--r--tools/objtool/check.c565
-rw-r--r--tools/objtool/elf.c264
-rw-r--r--tools/objtool/include/objtool/builtin.h34
-rw-r--r--tools/objtool/include/objtool/elf.h16
-rw-r--r--tools/objtool/include/objtool/objtool.h2
-rw-r--r--tools/objtool/include/objtool/warn.h35
-rw-r--r--tools/objtool/objtool.c104
-rw-r--r--tools/objtool/weak.c9
-rw-r--r--tools/perf/Documentation/perf.txt2
-rw-r--r--tools/perf/Makefile.config13
-rw-r--r--tools/perf/arch/arm64/util/arm-spe.c16
-rw-r--r--tools/perf/arch/arm64/util/machine.c21
-rw-r--r--tools/perf/arch/powerpc/util/Build1
-rw-r--r--tools/perf/arch/powerpc/util/machine.c25
-rw-r--r--tools/perf/arch/s390/util/machine.c16
-rw-r--r--tools/perf/arch/x86/util/perf_regs.c12
-rw-r--r--tools/perf/bench/epoll-ctl.c25
-rw-r--r--tools/perf/bench/epoll-wait.c25
-rw-r--r--tools/perf/bench/futex-hash.c26
-rw-r--r--tools/perf/bench/futex-lock-pi.c21
-rw-r--r--tools/perf/bench/futex-requeue.c21
-rw-r--r--tools/perf/bench/futex-wake-parallel.c21
-rw-r--r--tools/perf/bench/futex-wake.c22
-rw-r--r--tools/perf/bench/numa.c140
-rw-r--r--tools/perf/builtin-record.c22
-rw-r--r--tools/perf/builtin-report.c14
-rw-r--r--tools/perf/builtin-script.c2
-rw-r--r--tools/perf/perf.c6
-rw-r--r--tools/perf/tests/attr/README1
-rw-r--r--tools/perf/tests/attr/test-record-spe-physical-address12
-rw-r--r--tools/perf/tests/bpf.c10
-rw-r--r--tools/perf/tests/builtin-test.c8
-rw-r--r--tools/perf/tests/dwarf-unwind.c2
-rw-r--r--tools/perf/tests/perf-time-to-tsc.c40
-rwxr-xr-xtools/perf/tests/shell/stat_all_pmu.sh10
-rwxr-xr-xtools/perf/tests/shell/test_arm_coresight.sh1
-rw-r--r--tools/perf/tests/topology.c11
-rw-r--r--tools/perf/util/annotate.c1
-rw-r--r--tools/perf/util/arm-spe.c5
-rw-r--r--tools/perf/util/arm64-frame-pointer-unwind-support.c2
-rw-r--r--tools/perf/util/bpf-event.c4
-rw-r--r--tools/perf/util/c++/clang.cpp4
-rw-r--r--tools/perf/util/header.c51
-rw-r--r--tools/perf/util/header.h1
-rw-r--r--tools/perf/util/machine.c2
-rw-r--r--tools/perf/util/parse-events.c5
-rw-r--r--tools/perf/util/session.c43
-rw-r--r--tools/perf/util/setup.py8
-rw-r--r--tools/perf/util/stat.c20
-rw-r--r--tools/perf/util/symbol-elf.c2
-rw-r--r--tools/perf/util/symbol.c37
-rw-r--r--tools/perf/util/symbol.h3
-rw-r--r--tools/perf/util/unwind-libdw.c10
-rw-r--r--tools/perf/util/unwind-libdw.h1
-rw-r--r--tools/perf/util/unwind-libunwind-local.c10
-rw-r--r--tools/perf/util/unwind-libunwind.c6
-rw-r--r--tools/perf/util/unwind.h13
-rw-r--r--tools/power/acpi/common/cmfsize.c2
-rw-r--r--tools/power/acpi/common/getopt.c2
-rw-r--r--tools/power/acpi/os_specific/service_layers/oslinuxtbl.c2
-rw-r--r--tools/power/acpi/os_specific/service_layers/osunixdir.c2
-rw-r--r--tools/power/acpi/os_specific/service_layers/osunixmap.c2
-rw-r--r--tools/power/acpi/os_specific/service_layers/osunixxf.c2
-rw-r--r--tools/power/acpi/tools/acpidump/acpidump.h2
-rw-r--r--tools/power/acpi/tools/acpidump/apdump.c2
-rw-r--r--tools/power/acpi/tools/acpidump/apfiles.c2
-rw-r--r--tools/power/acpi/tools/acpidump/apmain.c2
-rw-r--r--tools/power/x86/intel-speed-select/Makefile2
-rw-r--r--tools/power/x86/intel-speed-select/hfi-events.c2
-rw-r--r--tools/power/x86/intel-speed-select/isst-config.c6
-rw-r--r--tools/power/x86/turbostat/Makefile2
-rw-r--r--tools/power/x86/turbostat/turbostat.82
-rw-r--r--tools/power/x86/turbostat/turbostat.c594
-rw-r--r--tools/testing/nvdimm/test/nfit.c4
-rw-r--r--tools/testing/radix-tree/linux.c3
-rw-r--r--tools/testing/selftests/Makefile2
-rw-r--r--tools/testing/selftests/arm64/Makefile11
-rw-r--r--tools/testing/selftests/arm64/abi/.gitignore1
-rw-r--r--tools/testing/selftests/arm64/abi/Makefile9
-rw-r--r--tools/testing/selftests/arm64/abi/syscall-abi-asm.S79
-rw-r--r--tools/testing/selftests/arm64/abi/syscall-abi.c204
-rw-r--r--tools/testing/selftests/arm64/abi/syscall-abi.h15
-rw-r--r--tools/testing/selftests/arm64/abi/tpidr2.c298
-rw-r--r--tools/testing/selftests/arm64/bti/Makefile6
-rw-r--r--tools/testing/selftests/arm64/fp/.gitignore5
-rw-r--r--tools/testing/selftests/arm64/fp/Makefile46
-rw-r--r--tools/testing/selftests/arm64/fp/rdvl-sme.c14
-rw-r--r--tools/testing/selftests/arm64/fp/rdvl.S10
-rw-r--r--tools/testing/selftests/arm64/fp/rdvl.h1
-rw-r--r--tools/testing/selftests/arm64/fp/sme-inst.h51
-rw-r--r--tools/testing/selftests/arm64/fp/ssve-stress59
-rw-r--r--tools/testing/selftests/arm64/fp/sve-ptrace.c175
-rw-r--r--tools/testing/selftests/arm64/fp/sve-test.S20
-rw-r--r--tools/testing/selftests/arm64/fp/vec-syscfg.c10
-rw-r--r--tools/testing/selftests/arm64/fp/vlset.c10
-rw-r--r--tools/testing/selftests/arm64/fp/za-fork-asm.S61
-rw-r--r--tools/testing/selftests/arm64/fp/za-fork.c156
-rw-r--r--tools/testing/selftests/arm64/fp/za-ptrace.c356
-rw-r--r--tools/testing/selftests/arm64/fp/za-stress59
-rw-r--r--tools/testing/selftests/arm64/fp/za-test.S388
-rw-r--r--tools/testing/selftests/arm64/mte/.gitignore1
-rw-r--r--tools/testing/selftests/arm64/mte/check_child_memory.c8
-rw-r--r--tools/testing/selftests/arm64/mte/check_prctl.c119
-rw-r--r--tools/testing/selftests/arm64/mte/check_tags_inclusion.c54
-rw-r--r--tools/testing/selftests/arm64/mte/mte_common_util.c42
-rw-r--r--tools/testing/selftests/arm64/mte/mte_common_util.h15
-rw-r--r--tools/testing/selftests/arm64/signal/.gitignore3
-rw-r--r--tools/testing/selftests/arm64/signal/test_signals.h4
-rw-r--r--tools/testing/selftests/arm64/signal/test_signals_utils.c6
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c92
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/sme_trap_no_sm.c38
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/sme_trap_non_streaming.c45
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/sme_trap_za.c36
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/sme_vl.c68
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/ssve_regs.c135
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/testcases.c36
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/testcases.h3
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/za_regs.c128
-rw-r--r--tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c23
-rw-r--r--tools/testing/selftests/bpf/progs/map_ptr_kern.c4
-rw-r--r--tools/testing/selftests/bpf/progs/trace_dummy_st_ops.c21
-rw-r--r--tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c78
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum-2/vxlan_flooding_ipv6.sh17
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh17
-rwxr-xr-xtools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh2
-rw-r--r--tools/testing/selftests/filesystems/binderfs/binderfs_test.c3
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc3
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc4
-rwxr-xr-xtools/testing/selftests/ir/ir_loopback.sh2
-rw-r--r--tools/testing/selftests/kselftest.h15
-rw-r--r--tools/testing/selftests/kselftest_harness.h59
-rw-r--r--tools/testing/selftests/kvm/.gitignore2
-rw-r--r--tools/testing/selftests/kvm/Makefile1
-rw-r--r--tools/testing/selftests/kvm/aarch64/arch_timer.c15
-rw-r--r--tools/testing/selftests/kvm/aarch64/get-reg-list.c14
-rw-r--r--tools/testing/selftests/kvm/aarch64/vcpu_width_config.c122
-rw-r--r--tools/testing/selftests/kvm/dirty_log_perf_test.c34
-rw-r--r--tools/testing/selftests/kvm/include/riscv/processor.h4
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/processor.h17
-rw-r--r--tools/testing/selftests/kvm/kvm_page_table_test.c2
-rw-r--r--tools/testing/selftests/kvm/lib/riscv/processor.c2
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/processor.c202
-rw-r--r--tools/testing/selftests/kvm/x86_64/amx_test.c1
-rw-r--r--tools/testing/selftests/kvm/x86_64/emulator_error_test.c1
-rw-r--r--tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c37
-rw-r--r--tools/testing/selftests/kvm/x86_64/smm_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c1
-rw-r--r--tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c1
-rw-r--r--tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c1
-rw-r--r--tools/testing/selftests/landlock/base_test.c179
-rw-r--r--tools/testing/selftests/landlock/common.h66
-rw-r--r--tools/testing/selftests/landlock/fs_test.c1511
-rw-r--r--tools/testing/selftests/landlock/ptrace_test.c40
-rw-r--r--tools/testing/selftests/mqueue/mq_perf_tests.c29
-rw-r--r--tools/testing/selftests/net/Makefile6
-rw-r--r--tools/testing/selftests/net/bpf/Makefile14
-rw-r--r--tools/testing/selftests/net/bpf/nat6to4.c285
-rwxr-xr-xtools/testing/selftests/net/fcnal-test.sh12
-rwxr-xr-xtools/testing/selftests/net/fib_nexthops.sh14
-rw-r--r--tools/testing/selftests/net/forwarding/Makefile33
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh3
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh48
-rw-r--r--tools/testing/selftests/net/so_txtime.c4
-rwxr-xr-xtools/testing/selftests/net/udpgro_frglist.sh101
-rw-r--r--tools/testing/selftests/pid_namespace/Makefile6
-rw-r--r--tools/testing/selftests/pidfd/pidfd_wait.c1
-rw-r--r--tools/testing/selftests/proc/proc-pid-vm.c6
-rw-r--r--tools/testing/selftests/rcutorture/bin/functions.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-find-errors.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck.sh7
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-remote.sh14
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm.sh10
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/torture.sh29
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/RUDE012
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-N2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS011
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS024
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS02.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS032
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TRACE013
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TRACE022
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE045
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE071
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE092
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE101
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh16
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon7
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuscale/TREE2
-rw-r--r--tools/testing/selftests/rcutorture/configs/refscale/CFcommon4
-rw-r--r--tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT2
-rw-r--r--tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT2
-rw-r--r--tools/testing/selftests/rcutorture/configs/scf/PREEMPT1
-rw-r--r--tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh3
-rw-r--r--tools/testing/selftests/resctrl/Makefile19
-rw-r--r--tools/testing/selftests/resctrl/README39
-rw-r--r--tools/testing/selftests/resctrl/cat_test.c2
-rw-r--r--tools/testing/selftests/resctrl/fill_buf.c4
-rw-r--r--tools/testing/selftests/resctrl/resctrl.h5
-rw-r--r--tools/testing/selftests/resctrl/resctrl_tests.c49
-rw-r--r--tools/testing/selftests/resctrl/resctrl_val.c1
-rw-r--r--tools/testing/selftests/resctrl/resctrlfs.c2
-rw-r--r--tools/testing/selftests/resctrl/settings3
-rw-r--r--tools/testing/selftests/seccomp/Makefile1
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c437
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_correctness.c9
-rw-r--r--tools/testing/selftests/vm/Makefile10
-rw-r--r--tools/testing/selftests/vm/mremap_test.c85
-rw-r--r--tools/testing/selftests/vm/pkey-x86.h21
-rwxr-xr-xtools/testing/selftests/vm/run_vmtests.sh11
-rwxr-xr-xtools/testing/selftests/wireguard/netns.sh34
-rw-r--r--tools/testing/selftests/wireguard/qemu/.gitignore1
-rw-r--r--tools/testing/selftests/wireguard/qemu/Makefile205
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/aarch64.config5
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config5
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/arm.config5
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/armeb.config5
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/i686.config3
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/m68k.config2
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/mips.config2
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/mips64.config2
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/mips64el.config2
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/mipsel.config2
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/powerpc.config2
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/powerpc64.config13
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config2
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/riscv32.config12
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/riscv64.config12
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/s390x.config6
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/x86_64.config3
-rw-r--r--tools/testing/selftests/wireguard/qemu/init.c6
-rw-r--r--tools/testing/selftests/x86/Makefile4
-rw-r--r--tools/testing/selftests/x86/amx.c24
-rw-r--r--tools/testing/selftests/x86/corrupt_xstate_header.c16
-rw-r--r--tools/thermal/lib/Build3
-rw-r--r--tools/thermal/lib/Makefile158
-rw-r--r--tools/thermal/lib/libthermal_tools.pc.template12
-rw-r--r--tools/thermal/lib/log.c77
-rw-r--r--tools/thermal/lib/log.h31
-rw-r--r--tools/thermal/lib/mainloop.c120
-rw-r--r--tools/thermal/lib/mainloop.h15
-rw-r--r--tools/thermal/lib/thermal-tools.h10
-rw-r--r--tools/thermal/lib/uptimeofday.c40
-rw-r--r--tools/thermal/lib/uptimeofday.h12
-rw-r--r--tools/thermal/thermal-engine/Build1
-rw-r--r--tools/thermal/thermal-engine/Makefile28
-rw-r--r--tools/thermal/thermal-engine/thermal-engine.c341
-rw-r--r--tools/thermal/thermometer/Build1
-rw-r--r--tools/thermal/thermometer/Makefile26
-rw-r--r--tools/thermal/thermometer/thermometer.892
-rw-r--r--tools/thermal/thermometer/thermometer.c572
-rw-r--r--tools/thermal/thermometer/thermometer.conf5
307 files changed, 15272 insertions, 4427 deletions
diff --git a/tools/Makefile b/tools/Makefile
index db2f7b8ebed5..c074e42fd92f 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -24,6 +24,7 @@ help:
@echo ' intel-speed-select - Intel Speed Select tool'
@echo ' kvm_stat - top-like utility for displaying kvm statistics'
@echo ' leds - LEDs tools'
+ @echo ' nolibc - nolibc headers testing and installation'
@echo ' objtool - an ELF object analysis tool'
@echo ' pci - PCI tools'
@echo ' perf - Linux performance measurement and analysis tool'
@@ -31,6 +32,9 @@ help:
@echo ' bootconfig - boot config tool'
@echo ' spi - spi tools'
@echo ' tmon - thermal monitoring and tuning tool'
+ @echo ' thermometer - temperature capture tool'
+ @echo ' thermal-engine - thermal monitoring tool'
+ @echo ' thermal - thermal library'
@echo ' tracing - misc tracing tools'
@echo ' turbostat - Intel CPU idle stats and freq reporting tool'
@echo ' usb - USB testing tools'
@@ -74,6 +78,9 @@ bpf/%: FORCE
libapi: FORCE
$(call descend,lib/api)
+nolibc_%: FORCE
+ $(call descend,include/nolibc,$(patsubst nolibc_%,%,$@))
+
# The perf build does not follow the descend function setup,
# invoking it via it's own make rule.
PERF_O = $(if $(O),$(O)/tools/perf,)
@@ -85,12 +92,21 @@ perf: FORCE
selftests: FORCE
$(call descend,testing/$@)
+thermal: FORCE
+ $(call descend,lib/$@)
+
turbostat x86_energy_perf_policy intel-speed-select: FORCE
$(call descend,power/x86/$@)
tmon: FORCE
$(call descend,thermal/$@)
+thermometer: FORCE
+ $(call descend,thermal/$@)
+
+thermal-engine: FORCE thermal
+ $(call descend,thermal/$@)
+
freefall: FORCE
$(call descend,laptop/$@)
@@ -101,7 +117,7 @@ all: acpi cgroup counter cpupower gpio hv firewire \
perf selftests bootconfig spi turbostat usb \
virtio vm bpf x86_energy_perf_policy \
tmon freefall iio objtool kvm_stat wmi \
- pci debugging tracing
+ pci debugging tracing thermal thermometer thermal-engine
acpi_install:
$(call descend,power/$(@:_install=),install)
@@ -115,12 +131,21 @@ cgroup_install counter_install firewire_install gpio_install hv_install iio_inst
selftests_install:
$(call descend,testing/$(@:_install=),install)
+thermal_install:
+ $(call descend,lib/$(@:_install=),install)
+
turbostat_install x86_energy_perf_policy_install intel-speed-select_install:
$(call descend,power/x86/$(@:_install=),install)
tmon_install:
$(call descend,thermal/$(@:_install=),install)
+thermometer_install:
+ $(call descend,thermal/$(@:_install=),install)
+
+thermal-engine_install:
+ $(call descend,thermal/$(@:_install=),install)
+
freefall_install:
$(call descend,laptop/$(@:_install=),install)
@@ -133,7 +158,7 @@ install: acpi_install cgroup_install counter_install cpupower_install gpio_insta
virtio_install vm_install bpf_install x86_energy_perf_policy_install \
tmon_install freefall_install objtool_install kvm_stat_install \
wmi_install pci_install debugging_install intel-speed-select_install \
- tracing_install
+ tracing_install thermometer_install thermal-engine_install
acpi_clean:
$(call descend,power/acpi,clean)
@@ -160,9 +185,18 @@ perf_clean:
selftests_clean:
$(call descend,testing/$(@:_clean=),clean)
+thermal_clean:
+ $(call descend,lib/thermal,clean)
+
turbostat_clean x86_energy_perf_policy_clean intel-speed-select_clean:
$(call descend,power/x86/$(@:_clean=),clean)
+thermometer_clean:
+ $(call descend,thermal/thermometer,clean)
+
+thermal-engine_clean:
+ $(call descend,thermal/thermal-engine,clean)
+
tmon_clean:
$(call descend,thermal/tmon,clean)
@@ -177,6 +211,6 @@ clean: acpi_clean cgroup_clean counter_clean cpupower_clean hv_clean firewire_cl
vm_clean bpf_clean iio_clean x86_energy_perf_policy_clean tmon_clean \
freefall_clean build_clean libbpf_clean libsubcmd_clean \
gpio_clean objtool_clean leds_clean wmi_clean pci_clean firmware_clean debugging_clean \
- intel-speed-select_clean tracing_clean
+ intel-speed-select_clean tracing_clean thermal_clean thermometer_clean thermal-engine_clean
.PHONY: FORCE
diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h
index 9afcc6467a09..e09d6908a21d 100644
--- a/tools/arch/arm64/include/asm/cputype.h
+++ b/tools/arch/arm64/include/asm/cputype.h
@@ -75,6 +75,7 @@
#define ARM_CPU_PART_CORTEX_A77 0xD0D
#define ARM_CPU_PART_NEOVERSE_V1 0xD40
#define ARM_CPU_PART_CORTEX_A78 0xD41
+#define ARM_CPU_PART_CORTEX_A78AE 0xD42
#define ARM_CPU_PART_CORTEX_X1 0xD44
#define ARM_CPU_PART_CORTEX_A510 0xD46
#define ARM_CPU_PART_CORTEX_A710 0xD47
@@ -130,6 +131,7 @@
#define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77)
#define MIDR_NEOVERSE_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V1)
#define MIDR_CORTEX_A78 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78)
+#define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE)
#define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1)
#define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510)
#define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710)
diff --git a/tools/arch/x86/include/asm/amd-ibs.h b/tools/arch/x86/include/asm/amd-ibs.h
index 174e7d83fcbd..765e9e752d03 100644
--- a/tools/arch/x86/include/asm/amd-ibs.h
+++ b/tools/arch/x86/include/asm/amd-ibs.h
@@ -49,7 +49,7 @@ union ibs_op_ctl {
};
};
-/* MSR 0xc0011035: IBS Op Data 2 */
+/* MSR 0xc0011035: IBS Op Data 1 */
union ibs_op_data {
__u64 val;
struct {
diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h
index 1231d63f836d..1ae0fab7d902 100644
--- a/tools/arch/x86/include/asm/disabled-features.h
+++ b/tools/arch/x86/include/asm/disabled-features.h
@@ -10,12 +10,6 @@
* cpu_feature_enabled().
*/
-#ifdef CONFIG_X86_SMAP
-# define DISABLE_SMAP 0
-#else
-# define DISABLE_SMAP (1<<(X86_FEATURE_SMAP & 31))
-#endif
-
#ifdef CONFIG_X86_UMIP
# define DISABLE_UMIP 0
#else
@@ -80,7 +74,7 @@
#define DISABLED_MASK6 0
#define DISABLED_MASK7 (DISABLE_PTI)
#define DISABLED_MASK8 0
-#define DISABLED_MASK9 (DISABLE_SMAP|DISABLE_SGX)
+#define DISABLED_MASK9 (DISABLE_SGX)
#define DISABLED_MASK10 0
#define DISABLED_MASK11 0
#define DISABLED_MASK12 0
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index 0eb90d21049e..ee15311b6be1 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -128,9 +128,9 @@
#define TSX_CTRL_RTM_DISABLE BIT(0) /* Disable RTM feature */
#define TSX_CTRL_CPUID_CLEAR BIT(1) /* Disable TSX enumeration */
-/* SRBDS support */
#define MSR_IA32_MCU_OPT_CTRL 0x00000123
-#define RNGDS_MITG_DIS BIT(0)
+#define RNGDS_MITG_DIS BIT(0) /* SRBDS support */
+#define RTM_ALLOW BIT(1) /* TSX development mode */
#define MSR_IA32_SYSENTER_CS 0x00000174
#define MSR_IA32_SYSENTER_ESP 0x00000175
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 91af2850b505..7678af364793 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -828,8 +828,10 @@ codegen_maps_skeleton(struct bpf_object *obj, size_t map_cnt, bool mmaped)
s->map_cnt = %zu; \n\
s->map_skel_sz = sizeof(*s->maps); \n\
s->maps = (struct bpf_map_skeleton *)calloc(s->map_cnt, s->map_skel_sz);\n\
- if (!s->maps) \n\
+ if (!s->maps) { \n\
+ err = -ENOMEM; \n\
goto err; \n\
+ } \n\
",
map_cnt
);
@@ -870,8 +872,10 @@ codegen_progs_skeleton(struct bpf_object *obj, size_t prog_cnt, bool populate_li
s->prog_cnt = %zu; \n\
s->prog_skel_sz = sizeof(*s->progs); \n\
s->progs = (struct bpf_prog_skeleton *)calloc(s->prog_cnt, s->prog_skel_sz);\n\
- if (!s->progs) \n\
+ if (!s->progs) { \n\
+ err = -ENOMEM; \n\
goto err; \n\
+ } \n\
",
prog_cnt
);
@@ -1182,10 +1186,13 @@ static int do_skeleton(int argc, char **argv)
%1$s__create_skeleton(struct %1$s *obj) \n\
{ \n\
struct bpf_object_skeleton *s; \n\
+ int err; \n\
\n\
s = (struct bpf_object_skeleton *)calloc(1, sizeof(*s));\n\
- if (!s) \n\
+ if (!s) { \n\
+ err = -ENOMEM; \n\
goto err; \n\
+ } \n\
\n\
s->sz = sizeof(*s); \n\
s->name = \"%1$s\"; \n\
@@ -1206,7 +1213,7 @@ static int do_skeleton(int argc, char **argv)
return 0; \n\
err: \n\
bpf_object__destroy_skeleton(s); \n\
- return -ENOMEM; \n\
+ return err; \n\
} \n\
\n\
static inline const void *%2$s__elf_bytes(size_t *sz) \n\
@@ -1466,12 +1473,12 @@ static int do_subskeleton(int argc, char **argv)
\n\
obj = (struct %1$s *)calloc(1, sizeof(*obj)); \n\
if (!obj) { \n\
- errno = ENOMEM; \n\
+ err = -ENOMEM; \n\
goto err; \n\
} \n\
s = (struct bpf_object_subskeleton *)calloc(1, sizeof(*s));\n\
if (!s) { \n\
- errno = ENOMEM; \n\
+ err = -ENOMEM; \n\
goto err; \n\
} \n\
s->sz = sizeof(*s); \n\
@@ -1483,7 +1490,7 @@ static int do_subskeleton(int argc, char **argv)
s->var_cnt = %2$d; \n\
s->vars = (struct bpf_var_skeleton *)calloc(%2$d, sizeof(*s->vars));\n\
if (!s->vars) { \n\
- errno = ENOMEM; \n\
+ err = -ENOMEM; \n\
goto err; \n\
} \n\
",
@@ -1538,6 +1545,7 @@ static int do_subskeleton(int argc, char **argv)
return obj; \n\
err: \n\
%1$s__destroy(obj); \n\
+ errno = -err; \n\
return NULL; \n\
} \n\
\n\
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index ae61f464043a..c6a48d0ef9ff 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -98,6 +98,7 @@ FEATURE_TESTS_EXTRA := \
llvm-version \
clang \
libbpf \
+ libbpf-btf__load_from_kernel_by_id \
libpfm4 \
libdebuginfod \
clang-bpf-co-re
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index 1480910c792e..cb4a2a4fa2e4 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -57,6 +57,7 @@ FILES= \
test-lzma.bin \
test-bpf.bin \
test-libbpf.bin \
+ test-libbpf-btf__load_from_kernel_by_id.bin \
test-get_cpuid.bin \
test-sdt.bin \
test-cxx.bin \
@@ -217,9 +218,16 @@ strip-libs = $(filter-out -l%,$(1))
PERL_EMBED_LDOPTS = $(shell perl -MExtUtils::Embed -e ldopts 2>/dev/null)
PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS))
PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS))
-PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null`
+PERL_EMBED_CCOPTS = $(shell perl -MExtUtils::Embed -e ccopts 2>/dev/null)
FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS)
+ifeq ($(CC_NO_CLANG), 0)
+ PERL_EMBED_LDOPTS := $(filter-out -specs=%,$(PERL_EMBED_LDOPTS))
+ PERL_EMBED_CCOPTS := $(filter-out -flto=auto -ffat-lto-objects, $(PERL_EMBED_CCOPTS))
+ PERL_EMBED_CCOPTS := $(filter-out -specs=%,$(PERL_EMBED_CCOPTS))
+ FLAGS_PERL_EMBED += -Wno-compound-token-split-by-macro
+endif
+
$(OUTPUT)test-libperl.bin:
$(BUILD) $(FLAGS_PERL_EMBED)
@@ -280,6 +288,9 @@ $(OUTPUT)test-bpf.bin:
$(OUTPUT)test-libbpf.bin:
$(BUILD) -lbpf
+$(OUTPUT)test-libbpf-btf__load_from_kernel_by_id.bin:
+ $(BUILD) -lbpf
+
$(OUTPUT)test-sdt.bin:
$(BUILD)
diff --git a/tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c b/tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c
new file mode 100644
index 000000000000..f7c084428735
--- /dev/null
+++ b/tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c
@@ -0,0 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <bpf/libbpf.h>
+
+int main(void)
+{
+ return btf__load_from_kernel_by_id(20151128, NULL);
+}
diff --git a/tools/certs/print-cert-tbs-hash.sh b/tools/certs/print-cert-tbs-hash.sh
new file mode 100755
index 000000000000..c93df5387ec9
--- /dev/null
+++ b/tools/certs/print-cert-tbs-hash.sh
@@ -0,0 +1,91 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright © 2020, Microsoft Corporation. All rights reserved.
+#
+# Author: Mickaël Salaün <mic@linux.microsoft.com>
+#
+# Compute and print the To Be Signed (TBS) hash of a certificate. This is used
+# as description of keys in the blacklist keyring to identify certificates.
+# This output should be redirected, without newline, in a file (hash0.txt) and
+# signed to create a PKCS#7 file (hash0.p7s). Both of these files can then be
+# loaded in the kernel with.
+#
+# Exemple on a workstation:
+# ./print-cert-tbs-hash.sh certificate-to-invalidate.pem > hash0.txt
+# openssl smime -sign -in hash0.txt -inkey builtin-private-key.pem \
+# -signer builtin-certificate.pem -certfile certificate-chain.pem \
+# -noattr -binary -outform DER -out hash0.p7s
+#
+# Exemple on a managed system:
+# keyctl padd blacklist "$(< hash0.txt)" %:.blacklist < hash0.p7s
+
+set -u -e -o pipefail
+
+CERT="${1:-}"
+BASENAME="$(basename -- "${BASH_SOURCE[0]}")"
+
+if [ $# -ne 1 ] || [ ! -f "${CERT}" ]; then
+ echo "usage: ${BASENAME} <certificate>" >&2
+ exit 1
+fi
+
+# Checks that it is indeed a certificate (PEM or DER encoded) and exclude the
+# optional PEM text header.
+if ! PEM="$(openssl x509 -inform DER -in "${CERT}" 2>/dev/null || openssl x509 -in "${CERT}")"; then
+ echo "ERROR: Failed to parse certificate" >&2
+ exit 1
+fi
+
+# TBSCertificate starts at the second entry.
+# Cf. https://tools.ietf.org/html/rfc3280#section-4.1
+#
+# Exemple of first lines printed by openssl asn1parse:
+# 0:d=0 hl=4 l= 763 cons: SEQUENCE
+# 4:d=1 hl=4 l= 483 cons: SEQUENCE
+# 8:d=2 hl=2 l= 3 cons: cont [ 0 ]
+# 10:d=3 hl=2 l= 1 prim: INTEGER :02
+# 13:d=2 hl=2 l= 20 prim: INTEGER :3CEB2CB8818D968AC00EEFE195F0DF9665328B7B
+# 35:d=2 hl=2 l= 13 cons: SEQUENCE
+# 37:d=3 hl=2 l= 9 prim: OBJECT :sha256WithRSAEncryption
+RANGE_AND_DIGEST_RE='
+2s/^\s*\([0-9]\+\):d=\s*[0-9]\+\s\+hl=\s*[0-9]\+\s\+l=\s*\([0-9]\+\)\s\+cons:\s*SEQUENCE\s*$/\1 \2/p;
+7s/^\s*[0-9]\+:d=\s*[0-9]\+\s\+hl=\s*[0-9]\+\s\+l=\s*[0-9]\+\s\+prim:\s*OBJECT\s*:\(.*\)$/\1/p;
+'
+
+RANGE_AND_DIGEST=($(echo "${PEM}" | \
+ openssl asn1parse -in - | \
+ sed -n -e "${RANGE_AND_DIGEST_RE}"))
+
+if [ "${#RANGE_AND_DIGEST[@]}" != 3 ]; then
+ echo "ERROR: Failed to parse TBSCertificate." >&2
+ exit 1
+fi
+
+OFFSET="${RANGE_AND_DIGEST[0]}"
+END="$(( OFFSET + RANGE_AND_DIGEST[1] ))"
+DIGEST="${RANGE_AND_DIGEST[2]}"
+
+# The signature hash algorithm is used by Linux to blacklist certificates.
+# Cf. crypto/asymmetric_keys/x509_cert_parser.c:x509_note_pkey_algo()
+DIGEST_MATCH=""
+while read -r DIGEST_ITEM; do
+ if [ -z "${DIGEST_ITEM}" ]; then
+ break
+ fi
+ if echo "${DIGEST}" | grep -qiF "${DIGEST_ITEM}"; then
+ DIGEST_MATCH="${DIGEST_ITEM}"
+ break
+ fi
+done < <(openssl list -digest-commands | tr ' ' '\n' | sort -ur)
+
+if [ -z "${DIGEST_MATCH}" ]; then
+ echo "ERROR: Unknown digest algorithm: ${DIGEST}" >&2
+ exit 1
+fi
+
+echo "${PEM}" | \
+ openssl x509 -in - -outform DER | \
+ dd "bs=1" "skip=${OFFSET}" "count=${END}" "status=none" | \
+ openssl dgst "-${DIGEST_MATCH}" - | \
+ awk '{printf "tbs:" $2}'
diff --git a/tools/include/linux/objtool.h b/tools/include/linux/objtool.h
index 586d35720f13..6491fa8fba6d 100644
--- a/tools/include/linux/objtool.h
+++ b/tools/include/linux/objtool.h
@@ -38,7 +38,9 @@ struct unwind_hint {
#define UNWIND_HINT_TYPE_REGS_PARTIAL 2
#define UNWIND_HINT_TYPE_FUNC 3
-#ifdef CONFIG_STACK_VALIDATION
+#ifdef CONFIG_OBJTOOL
+
+#include <asm/asm.h>
#ifndef __ASSEMBLY__
@@ -137,7 +139,7 @@ struct unwind_hint {
.macro STACK_FRAME_NON_STANDARD func:req
.pushsection .discard.func_stack_frame_non_standard, "aw"
- .long \func - .
+ _ASM_PTR \func
.popsection
.endm
@@ -157,7 +159,7 @@ struct unwind_hint {
#endif /* __ASSEMBLY__ */
-#else /* !CONFIG_STACK_VALIDATION */
+#else /* !CONFIG_OBJTOOL */
#ifndef __ASSEMBLY__
@@ -179,6 +181,6 @@ struct unwind_hint {
.endm
#endif
-#endif /* CONFIG_STACK_VALIDATION */
+#endif /* CONFIG_OBJTOOL */
#endif /* _LINUX_OBJTOOL_H */
diff --git a/tools/include/linux/slab.h b/tools/include/linux/slab.h
index f41d8a0eb1a4..0616409513eb 100644
--- a/tools/include/linux/slab.h
+++ b/tools/include/linux/slab.h
@@ -28,7 +28,13 @@ static inline void *kzalloc(size_t size, gfp_t gfp)
return kmalloc(size, gfp | __GFP_ZERO);
}
-void *kmem_cache_alloc(struct kmem_cache *cachep, int flags);
+struct list_lru;
+
+void *kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *, int flags);
+static inline void *kmem_cache_alloc(struct kmem_cache *cachep, int flags)
+{
+ return kmem_cache_alloc_lru(cachep, NULL, flags);
+}
void kmem_cache_free(struct kmem_cache *cachep, void *objp);
struct kmem_cache *kmem_cache_create(const char *name, unsigned int size,
diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile
new file mode 100644
index 000000000000..7a16d917c185
--- /dev/null
+++ b/tools/include/nolibc/Makefile
@@ -0,0 +1,42 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for nolibc installation and tests
+include ../../scripts/Makefile.include
+
+# we're in ".../tools/include/nolibc"
+ifeq ($(srctree),)
+srctree := $(patsubst %/tools/include/,%,$(dir $(CURDIR)))
+endif
+
+nolibc_arch := $(patsubst arm64,aarch64,$(ARCH))
+arch_file := arch-$(nolibc_arch).h
+all_files := ctype.h errno.h nolibc.h signal.h std.h stdio.h stdlib.h string.h \
+ sys.h time.h types.h unistd.h
+
+# install all headers needed to support a bare-metal compiler
+all:
+
+# Note: when ARCH is "x86" we concatenate both x86_64 and i386
+headers:
+ $(Q)mkdir -p $(OUTPUT)sysroot
+ $(Q)mkdir -p $(OUTPUT)sysroot/include
+ $(Q)cp $(all_files) $(OUTPUT)sysroot/include/
+ $(Q)if [ "$(ARCH)" = "x86" ]; then \
+ sed -e \
+ 's,^#ifndef _NOLIBC_ARCH_X86_64_H,#if !defined(_NOLIBC_ARCH_X86_64_H) \&\& defined(__x86_64__),' \
+ arch-x86_64.h; \
+ sed -e \
+ 's,^#ifndef _NOLIBC_ARCH_I386_H,#if !defined(_NOLIBC_ARCH_I386_H) \&\& !defined(__x86_64__),' \
+ arch-i386.h; \
+ elif [ -e "$(arch_file)" ]; then \
+ cat $(arch_file); \
+ else \
+ echo "Fatal: architecture $(ARCH) not yet supported by nolibc." >&2; \
+ exit 1; \
+ fi > $(OUTPUT)sysroot/include/arch.h
+
+headers_standalone: headers
+ $(Q)$(MAKE) -C $(srctree) headers
+ $(Q)$(MAKE) -C $(srctree) headers_install INSTALL_HDR_PATH=$(OUTPUT)/sysroot
+
+clean:
+ $(call QUIET_CLEAN, nolibc) rm -rf "$(OUTPUT)sysroot"
diff --git a/tools/include/nolibc/arch-aarch64.h b/tools/include/nolibc/arch-aarch64.h
new file mode 100644
index 000000000000..f68baf8f395f
--- /dev/null
+++ b/tools/include/nolibc/arch-aarch64.h
@@ -0,0 +1,199 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * AARCH64 specific definitions for NOLIBC
+ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_ARCH_AARCH64_H
+#define _NOLIBC_ARCH_AARCH64_H
+
+/* O_* macros for fcntl/open are architecture-specific */
+#define O_RDONLY 0
+#define O_WRONLY 1
+#define O_RDWR 2
+#define O_CREAT 0x40
+#define O_EXCL 0x80
+#define O_NOCTTY 0x100
+#define O_TRUNC 0x200
+#define O_APPEND 0x400
+#define O_NONBLOCK 0x800
+#define O_DIRECTORY 0x4000
+
+/* The struct returned by the newfstatat() syscall. Differs slightly from the
+ * x86_64's stat one by field ordering, so be careful.
+ */
+struct sys_stat_struct {
+ unsigned long st_dev;
+ unsigned long st_ino;
+ unsigned int st_mode;
+ unsigned int st_nlink;
+ unsigned int st_uid;
+ unsigned int st_gid;
+
+ unsigned long st_rdev;
+ unsigned long __pad1;
+ long st_size;
+ int st_blksize;
+ int __pad2;
+
+ long st_blocks;
+ long st_atime;
+ unsigned long st_atime_nsec;
+ long st_mtime;
+
+ unsigned long st_mtime_nsec;
+ long st_ctime;
+ unsigned long st_ctime_nsec;
+ unsigned int __unused[2];
+};
+
+/* Syscalls for AARCH64 :
+ * - registers are 64-bit
+ * - stack is 16-byte aligned
+ * - syscall number is passed in x8
+ * - arguments are in x0, x1, x2, x3, x4, x5
+ * - the system call is performed by calling svc 0
+ * - syscall return comes in x0.
+ * - the arguments are cast to long and assigned into the target registers
+ * which are then simply passed as registers to the asm code, so that we
+ * don't have to experience issues with register constraints.
+ *
+ * On aarch64, select() is not implemented so we have to use pselect6().
+ */
+#define __ARCH_WANT_SYS_PSELECT6
+
+#define my_syscall0(num) \
+({ \
+ register long _num __asm__ ("x8") = (num); \
+ register long _arg1 __asm__ ("x0"); \
+ \
+ __asm__ volatile ( \
+ "svc #0\n" \
+ : "=r"(_arg1) \
+ : "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall1(num, arg1) \
+({ \
+ register long _num __asm__ ("x8") = (num); \
+ register long _arg1 __asm__ ("x0") = (long)(arg1); \
+ \
+ __asm__ volatile ( \
+ "svc #0\n" \
+ : "=r"(_arg1) \
+ : "r"(_arg1), \
+ "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall2(num, arg1, arg2) \
+({ \
+ register long _num __asm__ ("x8") = (num); \
+ register long _arg1 __asm__ ("x0") = (long)(arg1); \
+ register long _arg2 __asm__ ("x1") = (long)(arg2); \
+ \
+ __asm__ volatile ( \
+ "svc #0\n" \
+ : "=r"(_arg1) \
+ : "r"(_arg1), "r"(_arg2), \
+ "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3) \
+({ \
+ register long _num __asm__ ("x8") = (num); \
+ register long _arg1 __asm__ ("x0") = (long)(arg1); \
+ register long _arg2 __asm__ ("x1") = (long)(arg2); \
+ register long _arg3 __asm__ ("x2") = (long)(arg3); \
+ \
+ __asm__ volatile ( \
+ "svc #0\n" \
+ : "=r"(_arg1) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), \
+ "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4) \
+({ \
+ register long _num __asm__ ("x8") = (num); \
+ register long _arg1 __asm__ ("x0") = (long)(arg1); \
+ register long _arg2 __asm__ ("x1") = (long)(arg2); \
+ register long _arg3 __asm__ ("x2") = (long)(arg3); \
+ register long _arg4 __asm__ ("x3") = (long)(arg4); \
+ \
+ __asm__ volatile ( \
+ "svc #0\n" \
+ : "=r"(_arg1) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \
+ "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
+({ \
+ register long _num __asm__ ("x8") = (num); \
+ register long _arg1 __asm__ ("x0") = (long)(arg1); \
+ register long _arg2 __asm__ ("x1") = (long)(arg2); \
+ register long _arg3 __asm__ ("x2") = (long)(arg3); \
+ register long _arg4 __asm__ ("x3") = (long)(arg4); \
+ register long _arg5 __asm__ ("x4") = (long)(arg5); \
+ \
+ __asm__ volatile ( \
+ "svc #0\n" \
+ : "=r" (_arg1) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+ "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \
+({ \
+ register long _num __asm__ ("x8") = (num); \
+ register long _arg1 __asm__ ("x0") = (long)(arg1); \
+ register long _arg2 __asm__ ("x1") = (long)(arg2); \
+ register long _arg3 __asm__ ("x2") = (long)(arg3); \
+ register long _arg4 __asm__ ("x3") = (long)(arg4); \
+ register long _arg5 __asm__ ("x4") = (long)(arg5); \
+ register long _arg6 __asm__ ("x5") = (long)(arg6); \
+ \
+ __asm__ volatile ( \
+ "svc #0\n" \
+ : "=r" (_arg1) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+ "r"(_arg6), "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+/* startup code */
+__asm__ (".section .text\n"
+ ".weak _start\n"
+ "_start:\n"
+ "ldr x0, [sp]\n" // argc (x0) was in the stack
+ "add x1, sp, 8\n" // argv (x1) = sp
+ "lsl x2, x0, 3\n" // envp (x2) = 8*argc ...
+ "add x2, x2, 8\n" // + 8 (skip null)
+ "add x2, x2, x1\n" // + argv
+ "and sp, x1, -16\n" // sp must be 16-byte aligned in the callee
+ "bl main\n" // main() returns the status code, we'll exit with it.
+ "mov x8, 93\n" // NR_exit == 93
+ "svc #0\n"
+ "");
+
+#endif // _NOLIBC_ARCH_AARCH64_H
diff --git a/tools/include/nolibc/arch-arm.h b/tools/include/nolibc/arch-arm.h
new file mode 100644
index 000000000000..f31be8e967d6
--- /dev/null
+++ b/tools/include/nolibc/arch-arm.h
@@ -0,0 +1,204 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * ARM specific definitions for NOLIBC
+ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_ARCH_ARM_H
+#define _NOLIBC_ARCH_ARM_H
+
+/* O_* macros for fcntl/open are architecture-specific */
+#define O_RDONLY 0
+#define O_WRONLY 1
+#define O_RDWR 2
+#define O_CREAT 0x40
+#define O_EXCL 0x80
+#define O_NOCTTY 0x100
+#define O_TRUNC 0x200
+#define O_APPEND 0x400
+#define O_NONBLOCK 0x800
+#define O_DIRECTORY 0x4000
+
+/* The struct returned by the stat() syscall, 32-bit only, the syscall returns
+ * exactly 56 bytes (stops before the unused array). In big endian, the format
+ * differs as devices are returned as short only.
+ */
+struct sys_stat_struct {
+#if defined(__ARMEB__)
+ unsigned short st_dev;
+ unsigned short __pad1;
+#else
+ unsigned long st_dev;
+#endif
+ unsigned long st_ino;
+ unsigned short st_mode;
+ unsigned short st_nlink;
+ unsigned short st_uid;
+ unsigned short st_gid;
+
+#if defined(__ARMEB__)
+ unsigned short st_rdev;
+ unsigned short __pad2;
+#else
+ unsigned long st_rdev;
+#endif
+ unsigned long st_size;
+ unsigned long st_blksize;
+ unsigned long st_blocks;
+
+ unsigned long st_atime;
+ unsigned long st_atime_nsec;
+ unsigned long st_mtime;
+ unsigned long st_mtime_nsec;
+
+ unsigned long st_ctime;
+ unsigned long st_ctime_nsec;
+ unsigned long __unused[2];
+};
+
+/* Syscalls for ARM in ARM or Thumb modes :
+ * - registers are 32-bit
+ * - stack is 8-byte aligned
+ * ( http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.faqs/ka4127.html)
+ * - syscall number is passed in r7
+ * - arguments are in r0, r1, r2, r3, r4, r5
+ * - the system call is performed by calling svc #0
+ * - syscall return comes in r0.
+ * - only lr is clobbered.
+ * - the arguments are cast to long and assigned into the target registers
+ * which are then simply passed as registers to the asm code, so that we
+ * don't have to experience issues with register constraints.
+ * - the syscall number is always specified last in order to allow to force
+ * some registers before (gcc refuses a %-register at the last position).
+ *
+ * Also, ARM supports the old_select syscall if newselect is not available
+ */
+#define __ARCH_WANT_SYS_OLD_SELECT
+
+#define my_syscall0(num) \
+({ \
+ register long _num __asm__ ("r7") = (num); \
+ register long _arg1 __asm__ ("r0"); \
+ \
+ __asm__ volatile ( \
+ "svc #0\n" \
+ : "=r"(_arg1) \
+ : "r"(_num) \
+ : "memory", "cc", "lr" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall1(num, arg1) \
+({ \
+ register long _num __asm__ ("r7") = (num); \
+ register long _arg1 __asm__ ("r0") = (long)(arg1); \
+ \
+ __asm__ volatile ( \
+ "svc #0\n" \
+ : "=r"(_arg1) \
+ : "r"(_arg1), \
+ "r"(_num) \
+ : "memory", "cc", "lr" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall2(num, arg1, arg2) \
+({ \
+ register long _num __asm__ ("r7") = (num); \
+ register long _arg1 __asm__ ("r0") = (long)(arg1); \
+ register long _arg2 __asm__ ("r1") = (long)(arg2); \
+ \
+ __asm__ volatile ( \
+ "svc #0\n" \
+ : "=r"(_arg1) \
+ : "r"(_arg1), "r"(_arg2), \
+ "r"(_num) \
+ : "memory", "cc", "lr" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3) \
+({ \
+ register long _num __asm__ ("r7") = (num); \
+ register long _arg1 __asm__ ("r0") = (long)(arg1); \
+ register long _arg2 __asm__ ("r1") = (long)(arg2); \
+ register long _arg3 __asm__ ("r2") = (long)(arg3); \
+ \
+ __asm__ volatile ( \
+ "svc #0\n" \
+ : "=r"(_arg1) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), \
+ "r"(_num) \
+ : "memory", "cc", "lr" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4) \
+({ \
+ register long _num __asm__ ("r7") = (num); \
+ register long _arg1 __asm__ ("r0") = (long)(arg1); \
+ register long _arg2 __asm__ ("r1") = (long)(arg2); \
+ register long _arg3 __asm__ ("r2") = (long)(arg3); \
+ register long _arg4 __asm__ ("r3") = (long)(arg4); \
+ \
+ __asm__ volatile ( \
+ "svc #0\n" \
+ : "=r"(_arg1) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \
+ "r"(_num) \
+ : "memory", "cc", "lr" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
+({ \
+ register long _num __asm__ ("r7") = (num); \
+ register long _arg1 __asm__ ("r0") = (long)(arg1); \
+ register long _arg2 __asm__ ("r1") = (long)(arg2); \
+ register long _arg3 __asm__ ("r2") = (long)(arg3); \
+ register long _arg4 __asm__ ("r3") = (long)(arg4); \
+ register long _arg5 __asm__ ("r4") = (long)(arg5); \
+ \
+ __asm__ volatile ( \
+ "svc #0\n" \
+ : "=r" (_arg1) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+ "r"(_num) \
+ : "memory", "cc", "lr" \
+ ); \
+ _arg1; \
+})
+
+/* startup code */
+__asm__ (".section .text\n"
+ ".weak _start\n"
+ "_start:\n"
+#if defined(__THUMBEB__) || defined(__THUMBEL__)
+ /* We enter here in 32-bit mode but if some previous functions were in
+ * 16-bit mode, the assembler cannot know, so we need to tell it we're in
+ * 32-bit now, then switch to 16-bit (is there a better way to do it than
+ * adding 1 by hand ?) and tell the asm we're now in 16-bit mode so that
+ * it generates correct instructions. Note that we do not support thumb1.
+ */
+ ".code 32\n"
+ "add r0, pc, #1\n"
+ "bx r0\n"
+ ".code 16\n"
+#endif
+ "pop {%r0}\n" // argc was in the stack
+ "mov %r1, %sp\n" // argv = sp
+ "add %r2, %r1, %r0, lsl #2\n" // envp = argv + 4*argc ...
+ "add %r2, %r2, $4\n" // ... + 4
+ "and %r3, %r1, $-8\n" // AAPCS : sp must be 8-byte aligned in the
+ "mov %sp, %r3\n" // callee, an bl doesn't push (lr=pc)
+ "bl main\n" // main() returns the status code, we'll exit with it.
+ "movs r7, $1\n" // NR_exit == 1
+ "svc $0x00\n"
+ "");
+
+#endif // _NOLIBC_ARCH_ARM_H
diff --git a/tools/include/nolibc/arch-i386.h b/tools/include/nolibc/arch-i386.h
new file mode 100644
index 000000000000..d7e7212346e2
--- /dev/null
+++ b/tools/include/nolibc/arch-i386.h
@@ -0,0 +1,219 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * i386 specific definitions for NOLIBC
+ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_ARCH_I386_H
+#define _NOLIBC_ARCH_I386_H
+
+/* O_* macros for fcntl/open are architecture-specific */
+#define O_RDONLY 0
+#define O_WRONLY 1
+#define O_RDWR 2
+#define O_CREAT 0x40
+#define O_EXCL 0x80
+#define O_NOCTTY 0x100
+#define O_TRUNC 0x200
+#define O_APPEND 0x400
+#define O_NONBLOCK 0x800
+#define O_DIRECTORY 0x10000
+
+/* The struct returned by the stat() syscall, 32-bit only, the syscall returns
+ * exactly 56 bytes (stops before the unused array).
+ */
+struct sys_stat_struct {
+ unsigned long st_dev;
+ unsigned long st_ino;
+ unsigned short st_mode;
+ unsigned short st_nlink;
+ unsigned short st_uid;
+ unsigned short st_gid;
+
+ unsigned long st_rdev;
+ unsigned long st_size;
+ unsigned long st_blksize;
+ unsigned long st_blocks;
+
+ unsigned long st_atime;
+ unsigned long st_atime_nsec;
+ unsigned long st_mtime;
+ unsigned long st_mtime_nsec;
+
+ unsigned long st_ctime;
+ unsigned long st_ctime_nsec;
+ unsigned long __unused[2];
+};
+
+/* Syscalls for i386 :
+ * - mostly similar to x86_64
+ * - registers are 32-bit
+ * - syscall number is passed in eax
+ * - arguments are in ebx, ecx, edx, esi, edi, ebp respectively
+ * - all registers are preserved (except eax of course)
+ * - the system call is performed by calling int $0x80
+ * - syscall return comes in eax
+ * - the arguments are cast to long and assigned into the target registers
+ * which are then simply passed as registers to the asm code, so that we
+ * don't have to experience issues with register constraints.
+ * - the syscall number is always specified last in order to allow to force
+ * some registers before (gcc refuses a %-register at the last position).
+ *
+ * Also, i386 supports the old_select syscall if newselect is not available
+ */
+#define __ARCH_WANT_SYS_OLD_SELECT
+
+#define my_syscall0(num) \
+({ \
+ long _ret; \
+ register long _num __asm__ ("eax") = (num); \
+ \
+ __asm__ volatile ( \
+ "int $0x80\n" \
+ : "=a" (_ret) \
+ : "0"(_num) \
+ : "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall1(num, arg1) \
+({ \
+ long _ret; \
+ register long _num __asm__ ("eax") = (num); \
+ register long _arg1 __asm__ ("ebx") = (long)(arg1); \
+ \
+ __asm__ volatile ( \
+ "int $0x80\n" \
+ : "=a" (_ret) \
+ : "r"(_arg1), \
+ "0"(_num) \
+ : "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall2(num, arg1, arg2) \
+({ \
+ long _ret; \
+ register long _num __asm__ ("eax") = (num); \
+ register long _arg1 __asm__ ("ebx") = (long)(arg1); \
+ register long _arg2 __asm__ ("ecx") = (long)(arg2); \
+ \
+ __asm__ volatile ( \
+ "int $0x80\n" \
+ : "=a" (_ret) \
+ : "r"(_arg1), "r"(_arg2), \
+ "0"(_num) \
+ : "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3) \
+({ \
+ long _ret; \
+ register long _num __asm__ ("eax") = (num); \
+ register long _arg1 __asm__ ("ebx") = (long)(arg1); \
+ register long _arg2 __asm__ ("ecx") = (long)(arg2); \
+ register long _arg3 __asm__ ("edx") = (long)(arg3); \
+ \
+ __asm__ volatile ( \
+ "int $0x80\n" \
+ : "=a" (_ret) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), \
+ "0"(_num) \
+ : "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4) \
+({ \
+ long _ret; \
+ register long _num __asm__ ("eax") = (num); \
+ register long _arg1 __asm__ ("ebx") = (long)(arg1); \
+ register long _arg2 __asm__ ("ecx") = (long)(arg2); \
+ register long _arg3 __asm__ ("edx") = (long)(arg3); \
+ register long _arg4 __asm__ ("esi") = (long)(arg4); \
+ \
+ __asm__ volatile ( \
+ "int $0x80\n" \
+ : "=a" (_ret) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \
+ "0"(_num) \
+ : "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
+({ \
+ long _ret; \
+ register long _num __asm__ ("eax") = (num); \
+ register long _arg1 __asm__ ("ebx") = (long)(arg1); \
+ register long _arg2 __asm__ ("ecx") = (long)(arg2); \
+ register long _arg3 __asm__ ("edx") = (long)(arg3); \
+ register long _arg4 __asm__ ("esi") = (long)(arg4); \
+ register long _arg5 __asm__ ("edi") = (long)(arg5); \
+ \
+ __asm__ volatile ( \
+ "int $0x80\n" \
+ : "=a" (_ret) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+ "0"(_num) \
+ : "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \
+({ \
+ long _eax = (long)(num); \
+ long _arg6 = (long)(arg6); /* Always in memory */ \
+ __asm__ volatile ( \
+ "pushl %[_arg6]\n\t" \
+ "pushl %%ebp\n\t" \
+ "movl 4(%%esp),%%ebp\n\t" \
+ "int $0x80\n\t" \
+ "popl %%ebp\n\t" \
+ "addl $4,%%esp\n\t" \
+ : "+a"(_eax) /* %eax */ \
+ : "b"(arg1), /* %ebx */ \
+ "c"(arg2), /* %ecx */ \
+ "d"(arg3), /* %edx */ \
+ "S"(arg4), /* %esi */ \
+ "D"(arg5), /* %edi */ \
+ [_arg6]"m"(_arg6) /* memory */ \
+ : "memory", "cc" \
+ ); \
+ _eax; \
+})
+
+/* startup code */
+/*
+ * i386 System V ABI mandates:
+ * 1) last pushed argument must be 16-byte aligned.
+ * 2) The deepest stack frame should be set to zero
+ *
+ */
+__asm__ (".section .text\n"
+ ".weak _start\n"
+ "_start:\n"
+ "pop %eax\n" // argc (first arg, %eax)
+ "mov %esp, %ebx\n" // argv[] (second arg, %ebx)
+ "lea 4(%ebx,%eax,4),%ecx\n" // then a NULL then envp (third arg, %ecx)
+ "xor %ebp, %ebp\n" // zero the stack frame
+ "and $-16, %esp\n" // x86 ABI : esp must be 16-byte aligned before
+ "sub $4, %esp\n" // the call instruction (args are aligned)
+ "push %ecx\n" // push all registers on the stack so that we
+ "push %ebx\n" // support both regparm and plain stack modes
+ "push %eax\n"
+ "call main\n" // main() returns the status code in %eax
+ "mov %eax, %ebx\n" // retrieve exit code (32-bit int)
+ "movl $1, %eax\n" // NR_exit == 1
+ "int $0x80\n" // exit now
+ "hlt\n" // ensure it does not
+ "");
+
+#endif // _NOLIBC_ARCH_I386_H
diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h
new file mode 100644
index 000000000000..5fc5b8029bff
--- /dev/null
+++ b/tools/include/nolibc/arch-mips.h
@@ -0,0 +1,215 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * MIPS specific definitions for NOLIBC
+ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_ARCH_MIPS_H
+#define _NOLIBC_ARCH_MIPS_H
+
+/* O_* macros for fcntl/open are architecture-specific */
+#define O_RDONLY 0
+#define O_WRONLY 1
+#define O_RDWR 2
+#define O_APPEND 0x0008
+#define O_NONBLOCK 0x0080
+#define O_CREAT 0x0100
+#define O_TRUNC 0x0200
+#define O_EXCL 0x0400
+#define O_NOCTTY 0x0800
+#define O_DIRECTORY 0x10000
+
+/* The struct returned by the stat() syscall. 88 bytes are returned by the
+ * syscall.
+ */
+struct sys_stat_struct {
+ unsigned int st_dev;
+ long st_pad1[3];
+ unsigned long st_ino;
+ unsigned int st_mode;
+ unsigned int st_nlink;
+ unsigned int st_uid;
+ unsigned int st_gid;
+ unsigned int st_rdev;
+ long st_pad2[2];
+ long st_size;
+ long st_pad3;
+
+ long st_atime;
+ long st_atime_nsec;
+ long st_mtime;
+ long st_mtime_nsec;
+
+ long st_ctime;
+ long st_ctime_nsec;
+ long st_blksize;
+ long st_blocks;
+ long st_pad4[14];
+};
+
+/* Syscalls for MIPS ABI O32 :
+ * - WARNING! there's always a delayed slot!
+ * - WARNING again, the syntax is different, registers take a '$' and numbers
+ * do not.
+ * - registers are 32-bit
+ * - stack is 8-byte aligned
+ * - syscall number is passed in v0 (starts at 0xfa0).
+ * - arguments are in a0, a1, a2, a3, then the stack. The caller needs to
+ * leave some room in the stack for the callee to save a0..a3 if needed.
+ * - Many registers are clobbered, in fact only a0..a2 and s0..s8 are
+ * preserved. See: https://www.linux-mips.org/wiki/Syscall as well as
+ * scall32-o32.S in the kernel sources.
+ * - the system call is performed by calling "syscall"
+ * - syscall return comes in v0, and register a3 needs to be checked to know
+ * if an error occurred, in which case errno is in v0.
+ * - the arguments are cast to long and assigned into the target registers
+ * which are then simply passed as registers to the asm code, so that we
+ * don't have to experience issues with register constraints.
+ */
+
+#define my_syscall0(num) \
+({ \
+ register long _num __asm__ ("v0") = (num); \
+ register long _arg4 __asm__ ("a3"); \
+ \
+ __asm__ volatile ( \
+ "addiu $sp, $sp, -32\n" \
+ "syscall\n" \
+ "addiu $sp, $sp, 32\n" \
+ : "=r"(_num), "=r"(_arg4) \
+ : "r"(_num) \
+ : "memory", "cc", "at", "v1", "hi", "lo", \
+ "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
+ ); \
+ _arg4 ? -_num : _num; \
+})
+
+#define my_syscall1(num, arg1) \
+({ \
+ register long _num __asm__ ("v0") = (num); \
+ register long _arg1 __asm__ ("a0") = (long)(arg1); \
+ register long _arg4 __asm__ ("a3"); \
+ \
+ __asm__ volatile ( \
+ "addiu $sp, $sp, -32\n" \
+ "syscall\n" \
+ "addiu $sp, $sp, 32\n" \
+ : "=r"(_num), "=r"(_arg4) \
+ : "0"(_num), \
+ "r"(_arg1) \
+ : "memory", "cc", "at", "v1", "hi", "lo", \
+ "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
+ ); \
+ _arg4 ? -_num : _num; \
+})
+
+#define my_syscall2(num, arg1, arg2) \
+({ \
+ register long _num __asm__ ("v0") = (num); \
+ register long _arg1 __asm__ ("a0") = (long)(arg1); \
+ register long _arg2 __asm__ ("a1") = (long)(arg2); \
+ register long _arg4 __asm__ ("a3"); \
+ \
+ __asm__ volatile ( \
+ "addiu $sp, $sp, -32\n" \
+ "syscall\n" \
+ "addiu $sp, $sp, 32\n" \
+ : "=r"(_num), "=r"(_arg4) \
+ : "0"(_num), \
+ "r"(_arg1), "r"(_arg2) \
+ : "memory", "cc", "at", "v1", "hi", "lo", \
+ "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
+ ); \
+ _arg4 ? -_num : _num; \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3) \
+({ \
+ register long _num __asm__ ("v0") = (num); \
+ register long _arg1 __asm__ ("a0") = (long)(arg1); \
+ register long _arg2 __asm__ ("a1") = (long)(arg2); \
+ register long _arg3 __asm__ ("a2") = (long)(arg3); \
+ register long _arg4 __asm__ ("a3"); \
+ \
+ __asm__ volatile ( \
+ "addiu $sp, $sp, -32\n" \
+ "syscall\n" \
+ "addiu $sp, $sp, 32\n" \
+ : "=r"(_num), "=r"(_arg4) \
+ : "0"(_num), \
+ "r"(_arg1), "r"(_arg2), "r"(_arg3) \
+ : "memory", "cc", "at", "v1", "hi", "lo", \
+ "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
+ ); \
+ _arg4 ? -_num : _num; \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4) \
+({ \
+ register long _num __asm__ ("v0") = (num); \
+ register long _arg1 __asm__ ("a0") = (long)(arg1); \
+ register long _arg2 __asm__ ("a1") = (long)(arg2); \
+ register long _arg3 __asm__ ("a2") = (long)(arg3); \
+ register long _arg4 __asm__ ("a3") = (long)(arg4); \
+ \
+ __asm__ volatile ( \
+ "addiu $sp, $sp, -32\n" \
+ "syscall\n" \
+ "addiu $sp, $sp, 32\n" \
+ : "=r" (_num), "=r"(_arg4) \
+ : "0"(_num), \
+ "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4) \
+ : "memory", "cc", "at", "v1", "hi", "lo", \
+ "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
+ ); \
+ _arg4 ? -_num : _num; \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
+({ \
+ register long _num __asm__ ("v0") = (num); \
+ register long _arg1 __asm__ ("a0") = (long)(arg1); \
+ register long _arg2 __asm__ ("a1") = (long)(arg2); \
+ register long _arg3 __asm__ ("a2") = (long)(arg3); \
+ register long _arg4 __asm__ ("a3") = (long)(arg4); \
+ register long _arg5 = (long)(arg5); \
+ \
+ __asm__ volatile ( \
+ "addiu $sp, $sp, -32\n" \
+ "sw %7, 16($sp)\n" \
+ "syscall\n " \
+ "addiu $sp, $sp, 32\n" \
+ : "=r" (_num), "=r"(_arg4) \
+ : "0"(_num), \
+ "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5) \
+ : "memory", "cc", "at", "v1", "hi", "lo", \
+ "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
+ ); \
+ _arg4 ? -_num : _num; \
+})
+
+/* startup code, note that it's called __start on MIPS */
+__asm__ (".section .text\n"
+ ".weak __start\n"
+ ".set nomips16\n"
+ ".set noreorder\n"
+ ".option pic0\n"
+ ".ent __start\n"
+ "__start:\n"
+ "lw $a0,($sp)\n" // argc was in the stack
+ "addiu $a1, $sp, 4\n" // argv = sp + 4
+ "sll $a2, $a0, 2\n" // a2 = argc * 4
+ "add $a2, $a2, $a1\n" // envp = argv + 4*argc ...
+ "addiu $a2, $a2, 4\n" // ... + 4
+ "li $t0, -8\n"
+ "and $sp, $sp, $t0\n" // sp must be 8-byte aligned
+ "addiu $sp,$sp,-16\n" // the callee expects to save a0..a3 there!
+ "jal main\n" // main() returns the status code, we'll exit with it.
+ "nop\n" // delayed slot
+ "move $a0, $v0\n" // retrieve 32-bit exit code from v0
+ "li $v0, 4001\n" // NR_exit == 4001
+ "syscall\n"
+ ".end __start\n"
+ "");
+
+#endif // _NOLIBC_ARCH_MIPS_H
diff --git a/tools/include/nolibc/arch-riscv.h b/tools/include/nolibc/arch-riscv.h
new file mode 100644
index 000000000000..95e2b7924925
--- /dev/null
+++ b/tools/include/nolibc/arch-riscv.h
@@ -0,0 +1,204 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * RISCV (32 and 64) specific definitions for NOLIBC
+ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_ARCH_RISCV_H
+#define _NOLIBC_ARCH_RISCV_H
+
+/* O_* macros for fcntl/open are architecture-specific */
+#define O_RDONLY 0
+#define O_WRONLY 1
+#define O_RDWR 2
+#define O_CREAT 0x100
+#define O_EXCL 0x200
+#define O_NOCTTY 0x400
+#define O_TRUNC 0x1000
+#define O_APPEND 0x2000
+#define O_NONBLOCK 0x4000
+#define O_DIRECTORY 0x200000
+
+struct sys_stat_struct {
+ unsigned long st_dev; /* Device. */
+ unsigned long st_ino; /* File serial number. */
+ unsigned int st_mode; /* File mode. */
+ unsigned int st_nlink; /* Link count. */
+ unsigned int st_uid; /* User ID of the file's owner. */
+ unsigned int st_gid; /* Group ID of the file's group. */
+ unsigned long st_rdev; /* Device number, if device. */
+ unsigned long __pad1;
+ long st_size; /* Size of file, in bytes. */
+ int st_blksize; /* Optimal block size for I/O. */
+ int __pad2;
+ long st_blocks; /* Number 512-byte blocks allocated. */
+ long st_atime; /* Time of last access. */
+ unsigned long st_atime_nsec;
+ long st_mtime; /* Time of last modification. */
+ unsigned long st_mtime_nsec;
+ long st_ctime; /* Time of last status change. */
+ unsigned long st_ctime_nsec;
+ unsigned int __unused4;
+ unsigned int __unused5;
+};
+
+#if __riscv_xlen == 64
+#define PTRLOG "3"
+#define SZREG "8"
+#elif __riscv_xlen == 32
+#define PTRLOG "2"
+#define SZREG "4"
+#endif
+
+/* Syscalls for RISCV :
+ * - stack is 16-byte aligned
+ * - syscall number is passed in a7
+ * - arguments are in a0, a1, a2, a3, a4, a5
+ * - the system call is performed by calling ecall
+ * - syscall return comes in a0
+ * - the arguments are cast to long and assigned into the target
+ * registers which are then simply passed as registers to the asm code,
+ * so that we don't have to experience issues with register constraints.
+ *
+ * On riscv, select() is not implemented so we have to use pselect6().
+ */
+#define __ARCH_WANT_SYS_PSELECT6
+
+#define my_syscall0(num) \
+({ \
+ register long _num __asm__ ("a7") = (num); \
+ register long _arg1 __asm__ ("a0"); \
+ \
+ __asm__ volatile ( \
+ "ecall\n\t" \
+ : "=r"(_arg1) \
+ : "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall1(num, arg1) \
+({ \
+ register long _num __asm__ ("a7") = (num); \
+ register long _arg1 __asm__ ("a0") = (long)(arg1); \
+ \
+ __asm__ volatile ( \
+ "ecall\n" \
+ : "+r"(_arg1) \
+ : "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall2(num, arg1, arg2) \
+({ \
+ register long _num __asm__ ("a7") = (num); \
+ register long _arg1 __asm__ ("a0") = (long)(arg1); \
+ register long _arg2 __asm__ ("a1") = (long)(arg2); \
+ \
+ __asm__ volatile ( \
+ "ecall\n" \
+ : "+r"(_arg1) \
+ : "r"(_arg2), \
+ "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3) \
+({ \
+ register long _num __asm__ ("a7") = (num); \
+ register long _arg1 __asm__ ("a0") = (long)(arg1); \
+ register long _arg2 __asm__ ("a1") = (long)(arg2); \
+ register long _arg3 __asm__ ("a2") = (long)(arg3); \
+ \
+ __asm__ volatile ( \
+ "ecall\n\t" \
+ : "+r"(_arg1) \
+ : "r"(_arg2), "r"(_arg3), \
+ "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4) \
+({ \
+ register long _num __asm__ ("a7") = (num); \
+ register long _arg1 __asm__ ("a0") = (long)(arg1); \
+ register long _arg2 __asm__ ("a1") = (long)(arg2); \
+ register long _arg3 __asm__ ("a2") = (long)(arg3); \
+ register long _arg4 __asm__ ("a3") = (long)(arg4); \
+ \
+ __asm__ volatile ( \
+ "ecall\n" \
+ : "+r"(_arg1) \
+ : "r"(_arg2), "r"(_arg3), "r"(_arg4), \
+ "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
+({ \
+ register long _num __asm__ ("a7") = (num); \
+ register long _arg1 __asm__ ("a0") = (long)(arg1); \
+ register long _arg2 __asm__ ("a1") = (long)(arg2); \
+ register long _arg3 __asm__ ("a2") = (long)(arg3); \
+ register long _arg4 __asm__ ("a3") = (long)(arg4); \
+ register long _arg5 __asm__ ("a4") = (long)(arg5); \
+ \
+ __asm__ volatile ( \
+ "ecall\n" \
+ : "+r"(_arg1) \
+ : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+ "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \
+({ \
+ register long _num __asm__ ("a7") = (num); \
+ register long _arg1 __asm__ ("a0") = (long)(arg1); \
+ register long _arg2 __asm__ ("a1") = (long)(arg2); \
+ register long _arg3 __asm__ ("a2") = (long)(arg3); \
+ register long _arg4 __asm__ ("a3") = (long)(arg4); \
+ register long _arg5 __asm__ ("a4") = (long)(arg5); \
+ register long _arg6 __asm__ ("a5") = (long)(arg6); \
+ \
+ __asm__ volatile ( \
+ "ecall\n" \
+ : "+r"(_arg1) \
+ : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_arg6), \
+ "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+/* startup code */
+__asm__ (".section .text\n"
+ ".weak _start\n"
+ "_start:\n"
+ ".option push\n"
+ ".option norelax\n"
+ "lla gp, __global_pointer$\n"
+ ".option pop\n"
+ "ld a0, 0(sp)\n" // argc (a0) was in the stack
+ "add a1, sp, "SZREG"\n" // argv (a1) = sp
+ "slli a2, a0, "PTRLOG"\n" // envp (a2) = SZREG*argc ...
+ "add a2, a2, "SZREG"\n" // + SZREG (skip null)
+ "add a2,a2,a1\n" // + argv
+ "andi sp,a1,-16\n" // sp must be 16-byte aligned
+ "call main\n" // main() returns the status code, we'll exit with it.
+ "li a7, 93\n" // NR_exit == 93
+ "ecall\n"
+ "");
+
+#endif // _NOLIBC_ARCH_RISCV_H
diff --git a/tools/include/nolibc/arch-x86_64.h b/tools/include/nolibc/arch-x86_64.h
new file mode 100644
index 000000000000..0e1e9eb8545d
--- /dev/null
+++ b/tools/include/nolibc/arch-x86_64.h
@@ -0,0 +1,215 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * x86_64 specific definitions for NOLIBC
+ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_ARCH_X86_64_H
+#define _NOLIBC_ARCH_X86_64_H
+
+/* O_* macros for fcntl/open are architecture-specific */
+#define O_RDONLY 0
+#define O_WRONLY 1
+#define O_RDWR 2
+#define O_CREAT 0x40
+#define O_EXCL 0x80
+#define O_NOCTTY 0x100
+#define O_TRUNC 0x200
+#define O_APPEND 0x400
+#define O_NONBLOCK 0x800
+#define O_DIRECTORY 0x10000
+
+/* The struct returned by the stat() syscall, equivalent to stat64(). The
+ * syscall returns 116 bytes and stops in the middle of __unused.
+ */
+struct sys_stat_struct {
+ unsigned long st_dev;
+ unsigned long st_ino;
+ unsigned long st_nlink;
+ unsigned int st_mode;
+ unsigned int st_uid;
+
+ unsigned int st_gid;
+ unsigned int __pad0;
+ unsigned long st_rdev;
+ long st_size;
+ long st_blksize;
+
+ long st_blocks;
+ unsigned long st_atime;
+ unsigned long st_atime_nsec;
+ unsigned long st_mtime;
+
+ unsigned long st_mtime_nsec;
+ unsigned long st_ctime;
+ unsigned long st_ctime_nsec;
+ long __unused[3];
+};
+
+/* Syscalls for x86_64 :
+ * - registers are 64-bit
+ * - syscall number is passed in rax
+ * - arguments are in rdi, rsi, rdx, r10, r8, r9 respectively
+ * - the system call is performed by calling the syscall instruction
+ * - syscall return comes in rax
+ * - rcx and r11 are clobbered, others are preserved.
+ * - the arguments are cast to long and assigned into the target registers
+ * which are then simply passed as registers to the asm code, so that we
+ * don't have to experience issues with register constraints.
+ * - the syscall number is always specified last in order to allow to force
+ * some registers before (gcc refuses a %-register at the last position).
+ * - see also x86-64 ABI section A.2 AMD64 Linux Kernel Conventions, A.2.1
+ * Calling Conventions.
+ *
+ * Link x86-64 ABI: https://gitlab.com/x86-psABIs/x86-64-ABI/-/wikis/home
+ *
+ */
+
+#define my_syscall0(num) \
+({ \
+ long _ret; \
+ register long _num __asm__ ("rax") = (num); \
+ \
+ __asm__ volatile ( \
+ "syscall\n" \
+ : "=a"(_ret) \
+ : "0"(_num) \
+ : "rcx", "r11", "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall1(num, arg1) \
+({ \
+ long _ret; \
+ register long _num __asm__ ("rax") = (num); \
+ register long _arg1 __asm__ ("rdi") = (long)(arg1); \
+ \
+ __asm__ volatile ( \
+ "syscall\n" \
+ : "=a"(_ret) \
+ : "r"(_arg1), \
+ "0"(_num) \
+ : "rcx", "r11", "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall2(num, arg1, arg2) \
+({ \
+ long _ret; \
+ register long _num __asm__ ("rax") = (num); \
+ register long _arg1 __asm__ ("rdi") = (long)(arg1); \
+ register long _arg2 __asm__ ("rsi") = (long)(arg2); \
+ \
+ __asm__ volatile ( \
+ "syscall\n" \
+ : "=a"(_ret) \
+ : "r"(_arg1), "r"(_arg2), \
+ "0"(_num) \
+ : "rcx", "r11", "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3) \
+({ \
+ long _ret; \
+ register long _num __asm__ ("rax") = (num); \
+ register long _arg1 __asm__ ("rdi") = (long)(arg1); \
+ register long _arg2 __asm__ ("rsi") = (long)(arg2); \
+ register long _arg3 __asm__ ("rdx") = (long)(arg3); \
+ \
+ __asm__ volatile ( \
+ "syscall\n" \
+ : "=a"(_ret) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), \
+ "0"(_num) \
+ : "rcx", "r11", "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4) \
+({ \
+ long _ret; \
+ register long _num __asm__ ("rax") = (num); \
+ register long _arg1 __asm__ ("rdi") = (long)(arg1); \
+ register long _arg2 __asm__ ("rsi") = (long)(arg2); \
+ register long _arg3 __asm__ ("rdx") = (long)(arg3); \
+ register long _arg4 __asm__ ("r10") = (long)(arg4); \
+ \
+ __asm__ volatile ( \
+ "syscall\n" \
+ : "=a"(_ret) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \
+ "0"(_num) \
+ : "rcx", "r11", "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
+({ \
+ long _ret; \
+ register long _num __asm__ ("rax") = (num); \
+ register long _arg1 __asm__ ("rdi") = (long)(arg1); \
+ register long _arg2 __asm__ ("rsi") = (long)(arg2); \
+ register long _arg3 __asm__ ("rdx") = (long)(arg3); \
+ register long _arg4 __asm__ ("r10") = (long)(arg4); \
+ register long _arg5 __asm__ ("r8") = (long)(arg5); \
+ \
+ __asm__ volatile ( \
+ "syscall\n" \
+ : "=a"(_ret) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+ "0"(_num) \
+ : "rcx", "r11", "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \
+({ \
+ long _ret; \
+ register long _num __asm__ ("rax") = (num); \
+ register long _arg1 __asm__ ("rdi") = (long)(arg1); \
+ register long _arg2 __asm__ ("rsi") = (long)(arg2); \
+ register long _arg3 __asm__ ("rdx") = (long)(arg3); \
+ register long _arg4 __asm__ ("r10") = (long)(arg4); \
+ register long _arg5 __asm__ ("r8") = (long)(arg5); \
+ register long _arg6 __asm__ ("r9") = (long)(arg6); \
+ \
+ __asm__ volatile ( \
+ "syscall\n" \
+ : "=a"(_ret) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+ "r"(_arg6), "0"(_num) \
+ : "rcx", "r11", "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+/* startup code */
+/*
+ * x86-64 System V ABI mandates:
+ * 1) %rsp must be 16-byte aligned right before the function call.
+ * 2) The deepest stack frame should be zero (the %rbp).
+ *
+ */
+__asm__ (".section .text\n"
+ ".weak _start\n"
+ "_start:\n"
+ "pop %rdi\n" // argc (first arg, %rdi)
+ "mov %rsp, %rsi\n" // argv[] (second arg, %rsi)
+ "lea 8(%rsi,%rdi,8),%rdx\n" // then a NULL then envp (third arg, %rdx)
+ "xor %ebp, %ebp\n" // zero the stack frame
+ "and $-16, %rsp\n" // x86 ABI : esp must be 16-byte aligned before call
+ "call main\n" // main() returns the status code, we'll exit with it.
+ "mov %eax, %edi\n" // retrieve exit code (32 bit)
+ "mov $60, %eax\n" // NR_exit == 60
+ "syscall\n" // really exit
+ "hlt\n" // ensure it does not return
+ "");
+
+#endif // _NOLIBC_ARCH_X86_64_H
diff --git a/tools/include/nolibc/arch.h b/tools/include/nolibc/arch.h
new file mode 100644
index 000000000000..4c6992321b0d
--- /dev/null
+++ b/tools/include/nolibc/arch.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
+ */
+
+/* Below comes the architecture-specific code. For each architecture, we have
+ * the syscall declarations and the _start code definition. This is the only
+ * global part. On all architectures the kernel puts everything in the stack
+ * before jumping to _start just above us, without any return address (_start
+ * is not a function but an entry pint). So at the stack pointer we find argc.
+ * Then argv[] begins, and ends at the first NULL. Then we have envp which
+ * starts and ends with a NULL as well. So envp=argv+argc+1.
+ */
+
+#ifndef _NOLIBC_ARCH_H
+#define _NOLIBC_ARCH_H
+
+#if defined(__x86_64__)
+#include "arch-x86_64.h"
+#elif defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__)
+#include "arch-i386.h"
+#elif defined(__ARM_EABI__)
+#include "arch-arm.h"
+#elif defined(__aarch64__)
+#include "arch-aarch64.h"
+#elif defined(__mips__) && defined(_ABIO32)
+#include "arch-mips.h"
+#elif defined(__riscv)
+#include "arch-riscv.h"
+#endif
+
+#endif /* _NOLIBC_ARCH_H */
diff --git a/tools/include/nolibc/ctype.h b/tools/include/nolibc/ctype.h
new file mode 100644
index 000000000000..e3000b2992d7
--- /dev/null
+++ b/tools/include/nolibc/ctype.h
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * ctype function definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_CTYPE_H
+#define _NOLIBC_CTYPE_H
+
+#include "std.h"
+
+/*
+ * As much as possible, please keep functions alphabetically sorted.
+ */
+
+static __attribute__((unused))
+int isascii(int c)
+{
+ /* 0x00..0x7f */
+ return (unsigned int)c <= 0x7f;
+}
+
+static __attribute__((unused))
+int isblank(int c)
+{
+ return c == '\t' || c == ' ';
+}
+
+static __attribute__((unused))
+int iscntrl(int c)
+{
+ /* 0x00..0x1f, 0x7f */
+ return (unsigned int)c < 0x20 || c == 0x7f;
+}
+
+static __attribute__((unused))
+int isdigit(int c)
+{
+ return (unsigned int)(c - '0') < 10;
+}
+
+static __attribute__((unused))
+int isgraph(int c)
+{
+ /* 0x21..0x7e */
+ return (unsigned int)(c - 0x21) < 0x5e;
+}
+
+static __attribute__((unused))
+int islower(int c)
+{
+ return (unsigned int)(c - 'a') < 26;
+}
+
+static __attribute__((unused))
+int isprint(int c)
+{
+ /* 0x20..0x7e */
+ return (unsigned int)(c - 0x20) < 0x5f;
+}
+
+static __attribute__((unused))
+int isspace(int c)
+{
+ /* \t is 0x9, \n is 0xA, \v is 0xB, \f is 0xC, \r is 0xD */
+ return ((unsigned int)c == ' ') || (unsigned int)(c - 0x09) < 5;
+}
+
+static __attribute__((unused))
+int isupper(int c)
+{
+ return (unsigned int)(c - 'A') < 26;
+}
+
+static __attribute__((unused))
+int isxdigit(int c)
+{
+ return isdigit(c) || (unsigned int)(c - 'A') < 6 || (unsigned int)(c - 'a') < 6;
+}
+
+static __attribute__((unused))
+int isalpha(int c)
+{
+ return islower(c) || isupper(c);
+}
+
+static __attribute__((unused))
+int isalnum(int c)
+{
+ return isalpha(c) || isdigit(c);
+}
+
+static __attribute__((unused))
+int ispunct(int c)
+{
+ return isgraph(c) && !isalnum(c);
+}
+
+#endif /* _NOLIBC_CTYPE_H */
diff --git a/tools/include/nolibc/errno.h b/tools/include/nolibc/errno.h
new file mode 100644
index 000000000000..06893d6dfb7a
--- /dev/null
+++ b/tools/include/nolibc/errno.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Minimal errno definitions for NOLIBC
+ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_ERRNO_H
+#define _NOLIBC_ERRNO_H
+
+#include <asm/errno.h>
+
+/* this way it will be removed if unused */
+static int errno;
+
+#ifndef NOLIBC_IGNORE_ERRNO
+#define SET_ERRNO(v) do { errno = (v); } while (0)
+#else
+#define SET_ERRNO(v) do { } while (0)
+#endif
+
+
+/* errno codes all ensure that they will not conflict with a valid pointer
+ * because they all correspond to the highest addressable memory page.
+ */
+#define MAX_ERRNO 4095
+
+#endif /* _NOLIBC_ERRNO_H */
diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h
index c1c285fe494a..b2bc48d3cfe4 100644
--- a/tools/include/nolibc/nolibc.h
+++ b/tools/include/nolibc/nolibc.h
@@ -57,22 +57,32 @@
* having to specify anything.
*
* Finally some very common libc-level functions are provided. It is the case
- * for a few functions usually found in string.h, ctype.h, or stdlib.h. Nothing
- * is currently provided regarding stdio emulation.
+ * for a few functions usually found in string.h, ctype.h, or stdlib.h.
*
- * The macro NOLIBC is always defined, so that it is possible for a program to
- * check this macro to know if it is being built against and decide to disable
- * some features or simply not to include some standard libc files.
- *
- * Ideally this file should be split in multiple files for easier long term
- * maintenance, but provided as a single file as it is now, it's quite
- * convenient to use. Maybe some variations involving a set of includes at the
- * top could work.
+ * The nolibc.h file is only a convenient entry point which includes all other
+ * files. It also defines the NOLIBC macro, so that it is possible for a
+ * program to check this macro to know if it is being built against and decide
+ * to disable some features or simply not to include some standard libc files.
*
* A simple static executable may be built this way :
* $ gcc -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \
* -static -include nolibc.h -o hello hello.c -lgcc
*
+ * Simple programs meant to be reasonably portable to various libc and using
+ * only a few common includes, may also be built by simply making the include
+ * path point to the nolibc directory:
+ * $ gcc -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \
+ * -I../nolibc -o hello hello.c -lgcc
+ *
+ * The available standard (but limited) include files are:
+ * ctype.h, errno.h, signal.h, stdio.h, stdlib.h, string.h, time.h
+ *
+ * In addition, the following ones are expected to be provided by the compiler:
+ * float.h, stdarg.h, stddef.h
+ *
+ * The following ones which are part to the C standard are not provided:
+ * assert.h, locale.h, math.h, setjmp.h, limits.h
+ *
* A very useful calling convention table may be found here :
* http://man7.org/linux/man-pages/man2/syscall.2.html
*
@@ -80,2502 +90,22 @@
* https://w3challs.com/syscalls/
*
*/
+#ifndef _NOLIBC_H
+#define _NOLIBC_H
-#include <asm/unistd.h>
-#include <asm/ioctls.h>
-#include <asm/errno.h>
-#include <linux/fs.h>
-#include <linux/loop.h>
-#include <linux/time.h>
+#include "std.h"
+#include "arch.h"
+#include "types.h"
+#include "sys.h"
+#include "ctype.h"
+#include "signal.h"
+#include "stdio.h"
+#include "stdlib.h"
+#include "string.h"
+#include "time.h"
+#include "unistd.h"
+/* Used by programs to avoid std includes */
#define NOLIBC
-/* this way it will be removed if unused */
-static int errno;
-
-#ifndef NOLIBC_IGNORE_ERRNO
-#define SET_ERRNO(v) do { errno = (v); } while (0)
-#else
-#define SET_ERRNO(v) do { } while (0)
-#endif
-
-/* errno codes all ensure that they will not conflict with a valid pointer
- * because they all correspond to the highest addressable memory page.
- */
-#define MAX_ERRNO 4095
-
-/* Declare a few quite common macros and types that usually are in stdlib.h,
- * stdint.h, ctype.h, unistd.h and a few other common locations.
- */
-
-#define NULL ((void *)0)
-
-/* stdint types */
-typedef unsigned char uint8_t;
-typedef signed char int8_t;
-typedef unsigned short uint16_t;
-typedef signed short int16_t;
-typedef unsigned int uint32_t;
-typedef signed int int32_t;
-typedef unsigned long long uint64_t;
-typedef signed long long int64_t;
-typedef unsigned long size_t;
-typedef signed long ssize_t;
-typedef unsigned long uintptr_t;
-typedef signed long intptr_t;
-typedef signed long ptrdiff_t;
-
-/* for stat() */
-typedef unsigned int dev_t;
-typedef unsigned long ino_t;
-typedef unsigned int mode_t;
-typedef signed int pid_t;
-typedef unsigned int uid_t;
-typedef unsigned int gid_t;
-typedef unsigned long nlink_t;
-typedef signed long off_t;
-typedef signed long blksize_t;
-typedef signed long blkcnt_t;
-typedef signed long time_t;
-
-/* for poll() */
-struct pollfd {
- int fd;
- short int events;
- short int revents;
-};
-
-/* for getdents64() */
-struct linux_dirent64 {
- uint64_t d_ino;
- int64_t d_off;
- unsigned short d_reclen;
- unsigned char d_type;
- char d_name[];
-};
-
-/* commonly an fd_set represents 256 FDs */
-#define FD_SETSIZE 256
-typedef struct { uint32_t fd32[FD_SETSIZE/32]; } fd_set;
-
-/* needed by wait4() */
-struct rusage {
- struct timeval ru_utime;
- struct timeval ru_stime;
- long ru_maxrss;
- long ru_ixrss;
- long ru_idrss;
- long ru_isrss;
- long ru_minflt;
- long ru_majflt;
- long ru_nswap;
- long ru_inblock;
- long ru_oublock;
- long ru_msgsnd;
- long ru_msgrcv;
- long ru_nsignals;
- long ru_nvcsw;
- long ru_nivcsw;
-};
-
-/* stat flags (WARNING, octal here) */
-#define S_IFDIR 0040000
-#define S_IFCHR 0020000
-#define S_IFBLK 0060000
-#define S_IFREG 0100000
-#define S_IFIFO 0010000
-#define S_IFLNK 0120000
-#define S_IFSOCK 0140000
-#define S_IFMT 0170000
-
-#define S_ISDIR(mode) (((mode) & S_IFDIR) == S_IFDIR)
-#define S_ISCHR(mode) (((mode) & S_IFCHR) == S_IFCHR)
-#define S_ISBLK(mode) (((mode) & S_IFBLK) == S_IFBLK)
-#define S_ISREG(mode) (((mode) & S_IFREG) == S_IFREG)
-#define S_ISFIFO(mode) (((mode) & S_IFIFO) == S_IFIFO)
-#define S_ISLNK(mode) (((mode) & S_IFLNK) == S_IFLNK)
-#define S_ISSOCK(mode) (((mode) & S_IFSOCK) == S_IFSOCK)
-
-#define DT_UNKNOWN 0
-#define DT_FIFO 1
-#define DT_CHR 2
-#define DT_DIR 4
-#define DT_BLK 6
-#define DT_REG 8
-#define DT_LNK 10
-#define DT_SOCK 12
-
-/* all the *at functions */
-#ifndef AT_FDCWD
-#define AT_FDCWD -100
-#endif
-
-/* lseek */
-#define SEEK_SET 0
-#define SEEK_CUR 1
-#define SEEK_END 2
-
-/* reboot */
-#define LINUX_REBOOT_MAGIC1 0xfee1dead
-#define LINUX_REBOOT_MAGIC2 0x28121969
-#define LINUX_REBOOT_CMD_HALT 0xcdef0123
-#define LINUX_REBOOT_CMD_POWER_OFF 0x4321fedc
-#define LINUX_REBOOT_CMD_RESTART 0x01234567
-#define LINUX_REBOOT_CMD_SW_SUSPEND 0xd000fce2
-
-
-/* The format of the struct as returned by the libc to the application, which
- * significantly differs from the format returned by the stat() syscall flavours.
- */
-struct stat {
- dev_t st_dev; /* ID of device containing file */
- ino_t st_ino; /* inode number */
- mode_t st_mode; /* protection */
- nlink_t st_nlink; /* number of hard links */
- uid_t st_uid; /* user ID of owner */
- gid_t st_gid; /* group ID of owner */
- dev_t st_rdev; /* device ID (if special file) */
- off_t st_size; /* total size, in bytes */
- blksize_t st_blksize; /* blocksize for file system I/O */
- blkcnt_t st_blocks; /* number of 512B blocks allocated */
- time_t st_atime; /* time of last access */
- time_t st_mtime; /* time of last modification */
- time_t st_ctime; /* time of last status change */
-};
-
-#define WEXITSTATUS(status) (((status) & 0xff00) >> 8)
-#define WIFEXITED(status) (((status) & 0x7f) == 0)
-
-/* for SIGCHLD */
-#include <asm/signal.h>
-
-/* Below comes the architecture-specific code. For each architecture, we have
- * the syscall declarations and the _start code definition. This is the only
- * global part. On all architectures the kernel puts everything in the stack
- * before jumping to _start just above us, without any return address (_start
- * is not a function but an entry pint). So at the stack pointer we find argc.
- * Then argv[] begins, and ends at the first NULL. Then we have envp which
- * starts and ends with a NULL as well. So envp=argv+argc+1.
- */
-
-#if defined(__x86_64__)
-/* Syscalls for x86_64 :
- * - registers are 64-bit
- * - syscall number is passed in rax
- * - arguments are in rdi, rsi, rdx, r10, r8, r9 respectively
- * - the system call is performed by calling the syscall instruction
- * - syscall return comes in rax
- * - rcx and r11 are clobbered, others are preserved.
- * - the arguments are cast to long and assigned into the target registers
- * which are then simply passed as registers to the asm code, so that we
- * don't have to experience issues with register constraints.
- * - the syscall number is always specified last in order to allow to force
- * some registers before (gcc refuses a %-register at the last position).
- * - see also x86-64 ABI section A.2 AMD64 Linux Kernel Conventions, A.2.1
- * Calling Conventions.
- *
- * Link x86-64 ABI: https://gitlab.com/x86-psABIs/x86-64-ABI/-/wikis/x86-64-psABI
- *
- */
-
-#define my_syscall0(num) \
-({ \
- long _ret; \
- register long _num asm("rax") = (num); \
- \
- asm volatile ( \
- "syscall\n" \
- : "=a"(_ret) \
- : "0"(_num) \
- : "rcx", "r11", "memory", "cc" \
- ); \
- _ret; \
-})
-
-#define my_syscall1(num, arg1) \
-({ \
- long _ret; \
- register long _num asm("rax") = (num); \
- register long _arg1 asm("rdi") = (long)(arg1); \
- \
- asm volatile ( \
- "syscall\n" \
- : "=a"(_ret) \
- : "r"(_arg1), \
- "0"(_num) \
- : "rcx", "r11", "memory", "cc" \
- ); \
- _ret; \
-})
-
-#define my_syscall2(num, arg1, arg2) \
-({ \
- long _ret; \
- register long _num asm("rax") = (num); \
- register long _arg1 asm("rdi") = (long)(arg1); \
- register long _arg2 asm("rsi") = (long)(arg2); \
- \
- asm volatile ( \
- "syscall\n" \
- : "=a"(_ret) \
- : "r"(_arg1), "r"(_arg2), \
- "0"(_num) \
- : "rcx", "r11", "memory", "cc" \
- ); \
- _ret; \
-})
-
-#define my_syscall3(num, arg1, arg2, arg3) \
-({ \
- long _ret; \
- register long _num asm("rax") = (num); \
- register long _arg1 asm("rdi") = (long)(arg1); \
- register long _arg2 asm("rsi") = (long)(arg2); \
- register long _arg3 asm("rdx") = (long)(arg3); \
- \
- asm volatile ( \
- "syscall\n" \
- : "=a"(_ret) \
- : "r"(_arg1), "r"(_arg2), "r"(_arg3), \
- "0"(_num) \
- : "rcx", "r11", "memory", "cc" \
- ); \
- _ret; \
-})
-
-#define my_syscall4(num, arg1, arg2, arg3, arg4) \
-({ \
- long _ret; \
- register long _num asm("rax") = (num); \
- register long _arg1 asm("rdi") = (long)(arg1); \
- register long _arg2 asm("rsi") = (long)(arg2); \
- register long _arg3 asm("rdx") = (long)(arg3); \
- register long _arg4 asm("r10") = (long)(arg4); \
- \
- asm volatile ( \
- "syscall\n" \
- : "=a"(_ret) \
- : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \
- "0"(_num) \
- : "rcx", "r11", "memory", "cc" \
- ); \
- _ret; \
-})
-
-#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
-({ \
- long _ret; \
- register long _num asm("rax") = (num); \
- register long _arg1 asm("rdi") = (long)(arg1); \
- register long _arg2 asm("rsi") = (long)(arg2); \
- register long _arg3 asm("rdx") = (long)(arg3); \
- register long _arg4 asm("r10") = (long)(arg4); \
- register long _arg5 asm("r8") = (long)(arg5); \
- \
- asm volatile ( \
- "syscall\n" \
- : "=a"(_ret) \
- : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
- "0"(_num) \
- : "rcx", "r11", "memory", "cc" \
- ); \
- _ret; \
-})
-
-#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \
-({ \
- long _ret; \
- register long _num asm("rax") = (num); \
- register long _arg1 asm("rdi") = (long)(arg1); \
- register long _arg2 asm("rsi") = (long)(arg2); \
- register long _arg3 asm("rdx") = (long)(arg3); \
- register long _arg4 asm("r10") = (long)(arg4); \
- register long _arg5 asm("r8") = (long)(arg5); \
- register long _arg6 asm("r9") = (long)(arg6); \
- \
- asm volatile ( \
- "syscall\n" \
- : "=a"(_ret) \
- : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
- "r"(_arg6), "0"(_num) \
- : "rcx", "r11", "memory", "cc" \
- ); \
- _ret; \
-})
-
-/* startup code */
-/*
- * x86-64 System V ABI mandates:
- * 1) %rsp must be 16-byte aligned right before the function call.
- * 2) The deepest stack frame should be zero (the %rbp).
- *
- */
-asm(".section .text\n"
- ".global _start\n"
- "_start:\n"
- "pop %rdi\n" // argc (first arg, %rdi)
- "mov %rsp, %rsi\n" // argv[] (second arg, %rsi)
- "lea 8(%rsi,%rdi,8),%rdx\n" // then a NULL then envp (third arg, %rdx)
- "xor %ebp, %ebp\n" // zero the stack frame
- "and $-16, %rsp\n" // x86 ABI : esp must be 16-byte aligned before call
- "call main\n" // main() returns the status code, we'll exit with it.
- "mov %eax, %edi\n" // retrieve exit code (32 bit)
- "mov $60, %eax\n" // NR_exit == 60
- "syscall\n" // really exit
- "hlt\n" // ensure it does not return
- "");
-
-/* fcntl / open */
-#define O_RDONLY 0
-#define O_WRONLY 1
-#define O_RDWR 2
-#define O_CREAT 0x40
-#define O_EXCL 0x80
-#define O_NOCTTY 0x100
-#define O_TRUNC 0x200
-#define O_APPEND 0x400
-#define O_NONBLOCK 0x800
-#define O_DIRECTORY 0x10000
-
-/* The struct returned by the stat() syscall, equivalent to stat64(). The
- * syscall returns 116 bytes and stops in the middle of __unused.
- */
-struct sys_stat_struct {
- unsigned long st_dev;
- unsigned long st_ino;
- unsigned long st_nlink;
- unsigned int st_mode;
- unsigned int st_uid;
-
- unsigned int st_gid;
- unsigned int __pad0;
- unsigned long st_rdev;
- long st_size;
- long st_blksize;
-
- long st_blocks;
- unsigned long st_atime;
- unsigned long st_atime_nsec;
- unsigned long st_mtime;
-
- unsigned long st_mtime_nsec;
- unsigned long st_ctime;
- unsigned long st_ctime_nsec;
- long __unused[3];
-};
-
-#elif defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__)
-/* Syscalls for i386 :
- * - mostly similar to x86_64
- * - registers are 32-bit
- * - syscall number is passed in eax
- * - arguments are in ebx, ecx, edx, esi, edi, ebp respectively
- * - all registers are preserved (except eax of course)
- * - the system call is performed by calling int $0x80
- * - syscall return comes in eax
- * - the arguments are cast to long and assigned into the target registers
- * which are then simply passed as registers to the asm code, so that we
- * don't have to experience issues with register constraints.
- * - the syscall number is always specified last in order to allow to force
- * some registers before (gcc refuses a %-register at the last position).
- *
- * Also, i386 supports the old_select syscall if newselect is not available
- */
-#define __ARCH_WANT_SYS_OLD_SELECT
-
-#define my_syscall0(num) \
-({ \
- long _ret; \
- register long _num asm("eax") = (num); \
- \
- asm volatile ( \
- "int $0x80\n" \
- : "=a" (_ret) \
- : "0"(_num) \
- : "memory", "cc" \
- ); \
- _ret; \
-})
-
-#define my_syscall1(num, arg1) \
-({ \
- long _ret; \
- register long _num asm("eax") = (num); \
- register long _arg1 asm("ebx") = (long)(arg1); \
- \
- asm volatile ( \
- "int $0x80\n" \
- : "=a" (_ret) \
- : "r"(_arg1), \
- "0"(_num) \
- : "memory", "cc" \
- ); \
- _ret; \
-})
-
-#define my_syscall2(num, arg1, arg2) \
-({ \
- long _ret; \
- register long _num asm("eax") = (num); \
- register long _arg1 asm("ebx") = (long)(arg1); \
- register long _arg2 asm("ecx") = (long)(arg2); \
- \
- asm volatile ( \
- "int $0x80\n" \
- : "=a" (_ret) \
- : "r"(_arg1), "r"(_arg2), \
- "0"(_num) \
- : "memory", "cc" \
- ); \
- _ret; \
-})
-
-#define my_syscall3(num, arg1, arg2, arg3) \
-({ \
- long _ret; \
- register long _num asm("eax") = (num); \
- register long _arg1 asm("ebx") = (long)(arg1); \
- register long _arg2 asm("ecx") = (long)(arg2); \
- register long _arg3 asm("edx") = (long)(arg3); \
- \
- asm volatile ( \
- "int $0x80\n" \
- : "=a" (_ret) \
- : "r"(_arg1), "r"(_arg2), "r"(_arg3), \
- "0"(_num) \
- : "memory", "cc" \
- ); \
- _ret; \
-})
-
-#define my_syscall4(num, arg1, arg2, arg3, arg4) \
-({ \
- long _ret; \
- register long _num asm("eax") = (num); \
- register long _arg1 asm("ebx") = (long)(arg1); \
- register long _arg2 asm("ecx") = (long)(arg2); \
- register long _arg3 asm("edx") = (long)(arg3); \
- register long _arg4 asm("esi") = (long)(arg4); \
- \
- asm volatile ( \
- "int $0x80\n" \
- : "=a" (_ret) \
- : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \
- "0"(_num) \
- : "memory", "cc" \
- ); \
- _ret; \
-})
-
-#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
-({ \
- long _ret; \
- register long _num asm("eax") = (num); \
- register long _arg1 asm("ebx") = (long)(arg1); \
- register long _arg2 asm("ecx") = (long)(arg2); \
- register long _arg3 asm("edx") = (long)(arg3); \
- register long _arg4 asm("esi") = (long)(arg4); \
- register long _arg5 asm("edi") = (long)(arg5); \
- \
- asm volatile ( \
- "int $0x80\n" \
- : "=a" (_ret) \
- : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
- "0"(_num) \
- : "memory", "cc" \
- ); \
- _ret; \
-})
-
-/* startup code */
-/*
- * i386 System V ABI mandates:
- * 1) last pushed argument must be 16-byte aligned.
- * 2) The deepest stack frame should be set to zero
- *
- */
-asm(".section .text\n"
- ".global _start\n"
- "_start:\n"
- "pop %eax\n" // argc (first arg, %eax)
- "mov %esp, %ebx\n" // argv[] (second arg, %ebx)
- "lea 4(%ebx,%eax,4),%ecx\n" // then a NULL then envp (third arg, %ecx)
- "xor %ebp, %ebp\n" // zero the stack frame
- "and $-16, %esp\n" // x86 ABI : esp must be 16-byte aligned before
- "sub $4, %esp\n" // the call instruction (args are aligned)
- "push %ecx\n" // push all registers on the stack so that we
- "push %ebx\n" // support both regparm and plain stack modes
- "push %eax\n"
- "call main\n" // main() returns the status code in %eax
- "mov %eax, %ebx\n" // retrieve exit code (32-bit int)
- "movl $1, %eax\n" // NR_exit == 1
- "int $0x80\n" // exit now
- "hlt\n" // ensure it does not
- "");
-
-/* fcntl / open */
-#define O_RDONLY 0
-#define O_WRONLY 1
-#define O_RDWR 2
-#define O_CREAT 0x40
-#define O_EXCL 0x80
-#define O_NOCTTY 0x100
-#define O_TRUNC 0x200
-#define O_APPEND 0x400
-#define O_NONBLOCK 0x800
-#define O_DIRECTORY 0x10000
-
-/* The struct returned by the stat() syscall, 32-bit only, the syscall returns
- * exactly 56 bytes (stops before the unused array).
- */
-struct sys_stat_struct {
- unsigned long st_dev;
- unsigned long st_ino;
- unsigned short st_mode;
- unsigned short st_nlink;
- unsigned short st_uid;
- unsigned short st_gid;
-
- unsigned long st_rdev;
- unsigned long st_size;
- unsigned long st_blksize;
- unsigned long st_blocks;
-
- unsigned long st_atime;
- unsigned long st_atime_nsec;
- unsigned long st_mtime;
- unsigned long st_mtime_nsec;
-
- unsigned long st_ctime;
- unsigned long st_ctime_nsec;
- unsigned long __unused[2];
-};
-
-#elif defined(__ARM_EABI__)
-/* Syscalls for ARM in ARM or Thumb modes :
- * - registers are 32-bit
- * - stack is 8-byte aligned
- * ( http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.faqs/ka4127.html)
- * - syscall number is passed in r7
- * - arguments are in r0, r1, r2, r3, r4, r5
- * - the system call is performed by calling svc #0
- * - syscall return comes in r0.
- * - only lr is clobbered.
- * - the arguments are cast to long and assigned into the target registers
- * which are then simply passed as registers to the asm code, so that we
- * don't have to experience issues with register constraints.
- * - the syscall number is always specified last in order to allow to force
- * some registers before (gcc refuses a %-register at the last position).
- *
- * Also, ARM supports the old_select syscall if newselect is not available
- */
-#define __ARCH_WANT_SYS_OLD_SELECT
-
-#define my_syscall0(num) \
-({ \
- register long _num asm("r7") = (num); \
- register long _arg1 asm("r0"); \
- \
- asm volatile ( \
- "svc #0\n" \
- : "=r"(_arg1) \
- : "r"(_num) \
- : "memory", "cc", "lr" \
- ); \
- _arg1; \
-})
-
-#define my_syscall1(num, arg1) \
-({ \
- register long _num asm("r7") = (num); \
- register long _arg1 asm("r0") = (long)(arg1); \
- \
- asm volatile ( \
- "svc #0\n" \
- : "=r"(_arg1) \
- : "r"(_arg1), \
- "r"(_num) \
- : "memory", "cc", "lr" \
- ); \
- _arg1; \
-})
-
-#define my_syscall2(num, arg1, arg2) \
-({ \
- register long _num asm("r7") = (num); \
- register long _arg1 asm("r0") = (long)(arg1); \
- register long _arg2 asm("r1") = (long)(arg2); \
- \
- asm volatile ( \
- "svc #0\n" \
- : "=r"(_arg1) \
- : "r"(_arg1), "r"(_arg2), \
- "r"(_num) \
- : "memory", "cc", "lr" \
- ); \
- _arg1; \
-})
-
-#define my_syscall3(num, arg1, arg2, arg3) \
-({ \
- register long _num asm("r7") = (num); \
- register long _arg1 asm("r0") = (long)(arg1); \
- register long _arg2 asm("r1") = (long)(arg2); \
- register long _arg3 asm("r2") = (long)(arg3); \
- \
- asm volatile ( \
- "svc #0\n" \
- : "=r"(_arg1) \
- : "r"(_arg1), "r"(_arg2), "r"(_arg3), \
- "r"(_num) \
- : "memory", "cc", "lr" \
- ); \
- _arg1; \
-})
-
-#define my_syscall4(num, arg1, arg2, arg3, arg4) \
-({ \
- register long _num asm("r7") = (num); \
- register long _arg1 asm("r0") = (long)(arg1); \
- register long _arg2 asm("r1") = (long)(arg2); \
- register long _arg3 asm("r2") = (long)(arg3); \
- register long _arg4 asm("r3") = (long)(arg4); \
- \
- asm volatile ( \
- "svc #0\n" \
- : "=r"(_arg1) \
- : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \
- "r"(_num) \
- : "memory", "cc", "lr" \
- ); \
- _arg1; \
-})
-
-#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
-({ \
- register long _num asm("r7") = (num); \
- register long _arg1 asm("r0") = (long)(arg1); \
- register long _arg2 asm("r1") = (long)(arg2); \
- register long _arg3 asm("r2") = (long)(arg3); \
- register long _arg4 asm("r3") = (long)(arg4); \
- register long _arg5 asm("r4") = (long)(arg5); \
- \
- asm volatile ( \
- "svc #0\n" \
- : "=r" (_arg1) \
- : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
- "r"(_num) \
- : "memory", "cc", "lr" \
- ); \
- _arg1; \
-})
-
-/* startup code */
-asm(".section .text\n"
- ".global _start\n"
- "_start:\n"
-#if defined(__THUMBEB__) || defined(__THUMBEL__)
- /* We enter here in 32-bit mode but if some previous functions were in
- * 16-bit mode, the assembler cannot know, so we need to tell it we're in
- * 32-bit now, then switch to 16-bit (is there a better way to do it than
- * adding 1 by hand ?) and tell the asm we're now in 16-bit mode so that
- * it generates correct instructions. Note that we do not support thumb1.
- */
- ".code 32\n"
- "add r0, pc, #1\n"
- "bx r0\n"
- ".code 16\n"
-#endif
- "pop {%r0}\n" // argc was in the stack
- "mov %r1, %sp\n" // argv = sp
- "add %r2, %r1, %r0, lsl #2\n" // envp = argv + 4*argc ...
- "add %r2, %r2, $4\n" // ... + 4
- "and %r3, %r1, $-8\n" // AAPCS : sp must be 8-byte aligned in the
- "mov %sp, %r3\n" // callee, an bl doesn't push (lr=pc)
- "bl main\n" // main() returns the status code, we'll exit with it.
- "movs r7, $1\n" // NR_exit == 1
- "svc $0x00\n"
- "");
-
-/* fcntl / open */
-#define O_RDONLY 0
-#define O_WRONLY 1
-#define O_RDWR 2
-#define O_CREAT 0x40
-#define O_EXCL 0x80
-#define O_NOCTTY 0x100
-#define O_TRUNC 0x200
-#define O_APPEND 0x400
-#define O_NONBLOCK 0x800
-#define O_DIRECTORY 0x4000
-
-/* The struct returned by the stat() syscall, 32-bit only, the syscall returns
- * exactly 56 bytes (stops before the unused array). In big endian, the format
- * differs as devices are returned as short only.
- */
-struct sys_stat_struct {
-#if defined(__ARMEB__)
- unsigned short st_dev;
- unsigned short __pad1;
-#else
- unsigned long st_dev;
-#endif
- unsigned long st_ino;
- unsigned short st_mode;
- unsigned short st_nlink;
- unsigned short st_uid;
- unsigned short st_gid;
-#if defined(__ARMEB__)
- unsigned short st_rdev;
- unsigned short __pad2;
-#else
- unsigned long st_rdev;
-#endif
- unsigned long st_size;
- unsigned long st_blksize;
- unsigned long st_blocks;
- unsigned long st_atime;
- unsigned long st_atime_nsec;
- unsigned long st_mtime;
- unsigned long st_mtime_nsec;
- unsigned long st_ctime;
- unsigned long st_ctime_nsec;
- unsigned long __unused[2];
-};
-
-#elif defined(__aarch64__)
-/* Syscalls for AARCH64 :
- * - registers are 64-bit
- * - stack is 16-byte aligned
- * - syscall number is passed in x8
- * - arguments are in x0, x1, x2, x3, x4, x5
- * - the system call is performed by calling svc 0
- * - syscall return comes in x0.
- * - the arguments are cast to long and assigned into the target registers
- * which are then simply passed as registers to the asm code, so that we
- * don't have to experience issues with register constraints.
- *
- * On aarch64, select() is not implemented so we have to use pselect6().
- */
-#define __ARCH_WANT_SYS_PSELECT6
-
-#define my_syscall0(num) \
-({ \
- register long _num asm("x8") = (num); \
- register long _arg1 asm("x0"); \
- \
- asm volatile ( \
- "svc #0\n" \
- : "=r"(_arg1) \
- : "r"(_num) \
- : "memory", "cc" \
- ); \
- _arg1; \
-})
-
-#define my_syscall1(num, arg1) \
-({ \
- register long _num asm("x8") = (num); \
- register long _arg1 asm("x0") = (long)(arg1); \
- \
- asm volatile ( \
- "svc #0\n" \
- : "=r"(_arg1) \
- : "r"(_arg1), \
- "r"(_num) \
- : "memory", "cc" \
- ); \
- _arg1; \
-})
-
-#define my_syscall2(num, arg1, arg2) \
-({ \
- register long _num asm("x8") = (num); \
- register long _arg1 asm("x0") = (long)(arg1); \
- register long _arg2 asm("x1") = (long)(arg2); \
- \
- asm volatile ( \
- "svc #0\n" \
- : "=r"(_arg1) \
- : "r"(_arg1), "r"(_arg2), \
- "r"(_num) \
- : "memory", "cc" \
- ); \
- _arg1; \
-})
-
-#define my_syscall3(num, arg1, arg2, arg3) \
-({ \
- register long _num asm("x8") = (num); \
- register long _arg1 asm("x0") = (long)(arg1); \
- register long _arg2 asm("x1") = (long)(arg2); \
- register long _arg3 asm("x2") = (long)(arg3); \
- \
- asm volatile ( \
- "svc #0\n" \
- : "=r"(_arg1) \
- : "r"(_arg1), "r"(_arg2), "r"(_arg3), \
- "r"(_num) \
- : "memory", "cc" \
- ); \
- _arg1; \
-})
-
-#define my_syscall4(num, arg1, arg2, arg3, arg4) \
-({ \
- register long _num asm("x8") = (num); \
- register long _arg1 asm("x0") = (long)(arg1); \
- register long _arg2 asm("x1") = (long)(arg2); \
- register long _arg3 asm("x2") = (long)(arg3); \
- register long _arg4 asm("x3") = (long)(arg4); \
- \
- asm volatile ( \
- "svc #0\n" \
- : "=r"(_arg1) \
- : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \
- "r"(_num) \
- : "memory", "cc" \
- ); \
- _arg1; \
-})
-
-#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
-({ \
- register long _num asm("x8") = (num); \
- register long _arg1 asm("x0") = (long)(arg1); \
- register long _arg2 asm("x1") = (long)(arg2); \
- register long _arg3 asm("x2") = (long)(arg3); \
- register long _arg4 asm("x3") = (long)(arg4); \
- register long _arg5 asm("x4") = (long)(arg5); \
- \
- asm volatile ( \
- "svc #0\n" \
- : "=r" (_arg1) \
- : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
- "r"(_num) \
- : "memory", "cc" \
- ); \
- _arg1; \
-})
-
-#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \
-({ \
- register long _num asm("x8") = (num); \
- register long _arg1 asm("x0") = (long)(arg1); \
- register long _arg2 asm("x1") = (long)(arg2); \
- register long _arg3 asm("x2") = (long)(arg3); \
- register long _arg4 asm("x3") = (long)(arg4); \
- register long _arg5 asm("x4") = (long)(arg5); \
- register long _arg6 asm("x5") = (long)(arg6); \
- \
- asm volatile ( \
- "svc #0\n" \
- : "=r" (_arg1) \
- : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
- "r"(_arg6), "r"(_num) \
- : "memory", "cc" \
- ); \
- _arg1; \
-})
-
-/* startup code */
-asm(".section .text\n"
- ".global _start\n"
- "_start:\n"
- "ldr x0, [sp]\n" // argc (x0) was in the stack
- "add x1, sp, 8\n" // argv (x1) = sp
- "lsl x2, x0, 3\n" // envp (x2) = 8*argc ...
- "add x2, x2, 8\n" // + 8 (skip null)
- "add x2, x2, x1\n" // + argv
- "and sp, x1, -16\n" // sp must be 16-byte aligned in the callee
- "bl main\n" // main() returns the status code, we'll exit with it.
- "mov x8, 93\n" // NR_exit == 93
- "svc #0\n"
- "");
-
-/* fcntl / open */
-#define O_RDONLY 0
-#define O_WRONLY 1
-#define O_RDWR 2
-#define O_CREAT 0x40
-#define O_EXCL 0x80
-#define O_NOCTTY 0x100
-#define O_TRUNC 0x200
-#define O_APPEND 0x400
-#define O_NONBLOCK 0x800
-#define O_DIRECTORY 0x4000
-
-/* The struct returned by the newfstatat() syscall. Differs slightly from the
- * x86_64's stat one by field ordering, so be careful.
- */
-struct sys_stat_struct {
- unsigned long st_dev;
- unsigned long st_ino;
- unsigned int st_mode;
- unsigned int st_nlink;
- unsigned int st_uid;
- unsigned int st_gid;
-
- unsigned long st_rdev;
- unsigned long __pad1;
- long st_size;
- int st_blksize;
- int __pad2;
-
- long st_blocks;
- long st_atime;
- unsigned long st_atime_nsec;
- long st_mtime;
-
- unsigned long st_mtime_nsec;
- long st_ctime;
- unsigned long st_ctime_nsec;
- unsigned int __unused[2];
-};
-
-#elif defined(__mips__) && defined(_ABIO32)
-/* Syscalls for MIPS ABI O32 :
- * - WARNING! there's always a delayed slot!
- * - WARNING again, the syntax is different, registers take a '$' and numbers
- * do not.
- * - registers are 32-bit
- * - stack is 8-byte aligned
- * - syscall number is passed in v0 (starts at 0xfa0).
- * - arguments are in a0, a1, a2, a3, then the stack. The caller needs to
- * leave some room in the stack for the callee to save a0..a3 if needed.
- * - Many registers are clobbered, in fact only a0..a2 and s0..s8 are
- * preserved. See: https://www.linux-mips.org/wiki/Syscall as well as
- * scall32-o32.S in the kernel sources.
- * - the system call is performed by calling "syscall"
- * - syscall return comes in v0, and register a3 needs to be checked to know
- * if an error occurred, in which case errno is in v0.
- * - the arguments are cast to long and assigned into the target registers
- * which are then simply passed as registers to the asm code, so that we
- * don't have to experience issues with register constraints.
- */
-
-#define my_syscall0(num) \
-({ \
- register long _num asm("v0") = (num); \
- register long _arg4 asm("a3"); \
- \
- asm volatile ( \
- "addiu $sp, $sp, -32\n" \
- "syscall\n" \
- "addiu $sp, $sp, 32\n" \
- : "=r"(_num), "=r"(_arg4) \
- : "r"(_num) \
- : "memory", "cc", "at", "v1", "hi", "lo", \
- "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
- ); \
- _arg4 ? -_num : _num; \
-})
-
-#define my_syscall1(num, arg1) \
-({ \
- register long _num asm("v0") = (num); \
- register long _arg1 asm("a0") = (long)(arg1); \
- register long _arg4 asm("a3"); \
- \
- asm volatile ( \
- "addiu $sp, $sp, -32\n" \
- "syscall\n" \
- "addiu $sp, $sp, 32\n" \
- : "=r"(_num), "=r"(_arg4) \
- : "0"(_num), \
- "r"(_arg1) \
- : "memory", "cc", "at", "v1", "hi", "lo", \
- "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
- ); \
- _arg4 ? -_num : _num; \
-})
-
-#define my_syscall2(num, arg1, arg2) \
-({ \
- register long _num asm("v0") = (num); \
- register long _arg1 asm("a0") = (long)(arg1); \
- register long _arg2 asm("a1") = (long)(arg2); \
- register long _arg4 asm("a3"); \
- \
- asm volatile ( \
- "addiu $sp, $sp, -32\n" \
- "syscall\n" \
- "addiu $sp, $sp, 32\n" \
- : "=r"(_num), "=r"(_arg4) \
- : "0"(_num), \
- "r"(_arg1), "r"(_arg2) \
- : "memory", "cc", "at", "v1", "hi", "lo", \
- "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
- ); \
- _arg4 ? -_num : _num; \
-})
-
-#define my_syscall3(num, arg1, arg2, arg3) \
-({ \
- register long _num asm("v0") = (num); \
- register long _arg1 asm("a0") = (long)(arg1); \
- register long _arg2 asm("a1") = (long)(arg2); \
- register long _arg3 asm("a2") = (long)(arg3); \
- register long _arg4 asm("a3"); \
- \
- asm volatile ( \
- "addiu $sp, $sp, -32\n" \
- "syscall\n" \
- "addiu $sp, $sp, 32\n" \
- : "=r"(_num), "=r"(_arg4) \
- : "0"(_num), \
- "r"(_arg1), "r"(_arg2), "r"(_arg3) \
- : "memory", "cc", "at", "v1", "hi", "lo", \
- "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
- ); \
- _arg4 ? -_num : _num; \
-})
-
-#define my_syscall4(num, arg1, arg2, arg3, arg4) \
-({ \
- register long _num asm("v0") = (num); \
- register long _arg1 asm("a0") = (long)(arg1); \
- register long _arg2 asm("a1") = (long)(arg2); \
- register long _arg3 asm("a2") = (long)(arg3); \
- register long _arg4 asm("a3") = (long)(arg4); \
- \
- asm volatile ( \
- "addiu $sp, $sp, -32\n" \
- "syscall\n" \
- "addiu $sp, $sp, 32\n" \
- : "=r" (_num), "=r"(_arg4) \
- : "0"(_num), \
- "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4) \
- : "memory", "cc", "at", "v1", "hi", "lo", \
- "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
- ); \
- _arg4 ? -_num : _num; \
-})
-
-#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
-({ \
- register long _num asm("v0") = (num); \
- register long _arg1 asm("a0") = (long)(arg1); \
- register long _arg2 asm("a1") = (long)(arg2); \
- register long _arg3 asm("a2") = (long)(arg3); \
- register long _arg4 asm("a3") = (long)(arg4); \
- register long _arg5 = (long)(arg5); \
- \
- asm volatile ( \
- "addiu $sp, $sp, -32\n" \
- "sw %7, 16($sp)\n" \
- "syscall\n " \
- "addiu $sp, $sp, 32\n" \
- : "=r" (_num), "=r"(_arg4) \
- : "0"(_num), \
- "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5) \
- : "memory", "cc", "at", "v1", "hi", "lo", \
- "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
- ); \
- _arg4 ? -_num : _num; \
-})
-
-/* startup code, note that it's called __start on MIPS */
-asm(".section .text\n"
- ".set nomips16\n"
- ".global __start\n"
- ".set noreorder\n"
- ".option pic0\n"
- ".ent __start\n"
- "__start:\n"
- "lw $a0,($sp)\n" // argc was in the stack
- "addiu $a1, $sp, 4\n" // argv = sp + 4
- "sll $a2, $a0, 2\n" // a2 = argc * 4
- "add $a2, $a2, $a1\n" // envp = argv + 4*argc ...
- "addiu $a2, $a2, 4\n" // ... + 4
- "li $t0, -8\n"
- "and $sp, $sp, $t0\n" // sp must be 8-byte aligned
- "addiu $sp,$sp,-16\n" // the callee expects to save a0..a3 there!
- "jal main\n" // main() returns the status code, we'll exit with it.
- "nop\n" // delayed slot
- "move $a0, $v0\n" // retrieve 32-bit exit code from v0
- "li $v0, 4001\n" // NR_exit == 4001
- "syscall\n"
- ".end __start\n"
- "");
-
-/* fcntl / open */
-#define O_RDONLY 0
-#define O_WRONLY 1
-#define O_RDWR 2
-#define O_APPEND 0x0008
-#define O_NONBLOCK 0x0080
-#define O_CREAT 0x0100
-#define O_TRUNC 0x0200
-#define O_EXCL 0x0400
-#define O_NOCTTY 0x0800
-#define O_DIRECTORY 0x10000
-
-/* The struct returned by the stat() syscall. 88 bytes are returned by the
- * syscall.
- */
-struct sys_stat_struct {
- unsigned int st_dev;
- long st_pad1[3];
- unsigned long st_ino;
- unsigned int st_mode;
- unsigned int st_nlink;
- unsigned int st_uid;
- unsigned int st_gid;
- unsigned int st_rdev;
- long st_pad2[2];
- long st_size;
- long st_pad3;
- long st_atime;
- long st_atime_nsec;
- long st_mtime;
- long st_mtime_nsec;
- long st_ctime;
- long st_ctime_nsec;
- long st_blksize;
- long st_blocks;
- long st_pad4[14];
-};
-
-#elif defined(__riscv)
-
-#if __riscv_xlen == 64
-#define PTRLOG "3"
-#define SZREG "8"
-#elif __riscv_xlen == 32
-#define PTRLOG "2"
-#define SZREG "4"
-#endif
-
-/* Syscalls for RISCV :
- * - stack is 16-byte aligned
- * - syscall number is passed in a7
- * - arguments are in a0, a1, a2, a3, a4, a5
- * - the system call is performed by calling ecall
- * - syscall return comes in a0
- * - the arguments are cast to long and assigned into the target
- * registers which are then simply passed as registers to the asm code,
- * so that we don't have to experience issues with register constraints.
- */
-
-#define my_syscall0(num) \
-({ \
- register long _num asm("a7") = (num); \
- register long _arg1 asm("a0"); \
- \
- asm volatile ( \
- "ecall\n\t" \
- : "=r"(_arg1) \
- : "r"(_num) \
- : "memory", "cc" \
- ); \
- _arg1; \
-})
-
-#define my_syscall1(num, arg1) \
-({ \
- register long _num asm("a7") = (num); \
- register long _arg1 asm("a0") = (long)(arg1); \
- \
- asm volatile ( \
- "ecall\n" \
- : "+r"(_arg1) \
- : "r"(_num) \
- : "memory", "cc" \
- ); \
- _arg1; \
-})
-
-#define my_syscall2(num, arg1, arg2) \
-({ \
- register long _num asm("a7") = (num); \
- register long _arg1 asm("a0") = (long)(arg1); \
- register long _arg2 asm("a1") = (long)(arg2); \
- \
- asm volatile ( \
- "ecall\n" \
- : "+r"(_arg1) \
- : "r"(_arg2), \
- "r"(_num) \
- : "memory", "cc" \
- ); \
- _arg1; \
-})
-
-#define my_syscall3(num, arg1, arg2, arg3) \
-({ \
- register long _num asm("a7") = (num); \
- register long _arg1 asm("a0") = (long)(arg1); \
- register long _arg2 asm("a1") = (long)(arg2); \
- register long _arg3 asm("a2") = (long)(arg3); \
- \
- asm volatile ( \
- "ecall\n\t" \
- : "+r"(_arg1) \
- : "r"(_arg2), "r"(_arg3), \
- "r"(_num) \
- : "memory", "cc" \
- ); \
- _arg1; \
-})
-
-#define my_syscall4(num, arg1, arg2, arg3, arg4) \
-({ \
- register long _num asm("a7") = (num); \
- register long _arg1 asm("a0") = (long)(arg1); \
- register long _arg2 asm("a1") = (long)(arg2); \
- register long _arg3 asm("a2") = (long)(arg3); \
- register long _arg4 asm("a3") = (long)(arg4); \
- \
- asm volatile ( \
- "ecall\n" \
- : "+r"(_arg1) \
- : "r"(_arg2), "r"(_arg3), "r"(_arg4), \
- "r"(_num) \
- : "memory", "cc" \
- ); \
- _arg1; \
-})
-
-#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
-({ \
- register long _num asm("a7") = (num); \
- register long _arg1 asm("a0") = (long)(arg1); \
- register long _arg2 asm("a1") = (long)(arg2); \
- register long _arg3 asm("a2") = (long)(arg3); \
- register long _arg4 asm("a3") = (long)(arg4); \
- register long _arg5 asm("a4") = (long)(arg5); \
- \
- asm volatile ( \
- "ecall\n" \
- : "+r"(_arg1) \
- : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
- "r"(_num) \
- : "memory", "cc" \
- ); \
- _arg1; \
-})
-
-#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \
-({ \
- register long _num asm("a7") = (num); \
- register long _arg1 asm("a0") = (long)(arg1); \
- register long _arg2 asm("a1") = (long)(arg2); \
- register long _arg3 asm("a2") = (long)(arg3); \
- register long _arg4 asm("a3") = (long)(arg4); \
- register long _arg5 asm("a4") = (long)(arg5); \
- register long _arg6 asm("a5") = (long)(arg6); \
- \
- asm volatile ( \
- "ecall\n" \
- : "+r"(_arg1) \
- : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_arg6), \
- "r"(_num) \
- : "memory", "cc" \
- ); \
- _arg1; \
-})
-
-/* startup code */
-asm(".section .text\n"
- ".global _start\n"
- "_start:\n"
- ".option push\n"
- ".option norelax\n"
- "lla gp, __global_pointer$\n"
- ".option pop\n"
- "ld a0, 0(sp)\n" // argc (a0) was in the stack
- "add a1, sp, "SZREG"\n" // argv (a1) = sp
- "slli a2, a0, "PTRLOG"\n" // envp (a2) = SZREG*argc ...
- "add a2, a2, "SZREG"\n" // + SZREG (skip null)
- "add a2,a2,a1\n" // + argv
- "andi sp,a1,-16\n" // sp must be 16-byte aligned
- "call main\n" // main() returns the status code, we'll exit with it.
- "li a7, 93\n" // NR_exit == 93
- "ecall\n"
- "");
-
-/* fcntl / open */
-#define O_RDONLY 0
-#define O_WRONLY 1
-#define O_RDWR 2
-#define O_CREAT 0x100
-#define O_EXCL 0x200
-#define O_NOCTTY 0x400
-#define O_TRUNC 0x1000
-#define O_APPEND 0x2000
-#define O_NONBLOCK 0x4000
-#define O_DIRECTORY 0x200000
-
-struct sys_stat_struct {
- unsigned long st_dev; /* Device. */
- unsigned long st_ino; /* File serial number. */
- unsigned int st_mode; /* File mode. */
- unsigned int st_nlink; /* Link count. */
- unsigned int st_uid; /* User ID of the file's owner. */
- unsigned int st_gid; /* Group ID of the file's group. */
- unsigned long st_rdev; /* Device number, if device. */
- unsigned long __pad1;
- long st_size; /* Size of file, in bytes. */
- int st_blksize; /* Optimal block size for I/O. */
- int __pad2;
- long st_blocks; /* Number 512-byte blocks allocated. */
- long st_atime; /* Time of last access. */
- unsigned long st_atime_nsec;
- long st_mtime; /* Time of last modification. */
- unsigned long st_mtime_nsec;
- long st_ctime; /* Time of last status change. */
- unsigned long st_ctime_nsec;
- unsigned int __unused4;
- unsigned int __unused5;
-};
-
-#endif
-
-
-/* Below are the C functions used to declare the raw syscalls. They try to be
- * architecture-agnostic, and return either a success or -errno. Declaring them
- * static will lead to them being inlined in most cases, but it's still possible
- * to reference them by a pointer if needed.
- */
-static __attribute__((unused))
-void *sys_brk(void *addr)
-{
- return (void *)my_syscall1(__NR_brk, addr);
-}
-
-static __attribute__((noreturn,unused))
-void sys_exit(int status)
-{
- my_syscall1(__NR_exit, status & 255);
- while(1); // shut the "noreturn" warnings.
-}
-
-static __attribute__((unused))
-int sys_chdir(const char *path)
-{
- return my_syscall1(__NR_chdir, path);
-}
-
-static __attribute__((unused))
-int sys_chmod(const char *path, mode_t mode)
-{
-#ifdef __NR_fchmodat
- return my_syscall4(__NR_fchmodat, AT_FDCWD, path, mode, 0);
-#elif defined(__NR_chmod)
- return my_syscall2(__NR_chmod, path, mode);
-#else
-#error Neither __NR_fchmodat nor __NR_chmod defined, cannot implement sys_chmod()
-#endif
-}
-
-static __attribute__((unused))
-int sys_chown(const char *path, uid_t owner, gid_t group)
-{
-#ifdef __NR_fchownat
- return my_syscall5(__NR_fchownat, AT_FDCWD, path, owner, group, 0);
-#elif defined(__NR_chown)
- return my_syscall3(__NR_chown, path, owner, group);
-#else
-#error Neither __NR_fchownat nor __NR_chown defined, cannot implement sys_chown()
-#endif
-}
-
-static __attribute__((unused))
-int sys_chroot(const char *path)
-{
- return my_syscall1(__NR_chroot, path);
-}
-
-static __attribute__((unused))
-int sys_close(int fd)
-{
- return my_syscall1(__NR_close, fd);
-}
-
-static __attribute__((unused))
-int sys_dup(int fd)
-{
- return my_syscall1(__NR_dup, fd);
-}
-
-#ifdef __NR_dup3
-static __attribute__((unused))
-int sys_dup3(int old, int new, int flags)
-{
- return my_syscall3(__NR_dup3, old, new, flags);
-}
-#endif
-
-static __attribute__((unused))
-int sys_dup2(int old, int new)
-{
-#ifdef __NR_dup3
- return my_syscall3(__NR_dup3, old, new, 0);
-#elif defined(__NR_dup2)
- return my_syscall2(__NR_dup2, old, new);
-#else
-#error Neither __NR_dup3 nor __NR_dup2 defined, cannot implement sys_dup2()
-#endif
-}
-
-static __attribute__((unused))
-int sys_execve(const char *filename, char *const argv[], char *const envp[])
-{
- return my_syscall3(__NR_execve, filename, argv, envp);
-}
-
-static __attribute__((unused))
-pid_t sys_fork(void)
-{
-#ifdef __NR_clone
- /* note: some archs only have clone() and not fork(). Different archs
- * have a different API, but most archs have the flags on first arg and
- * will not use the rest with no other flag.
- */
- return my_syscall5(__NR_clone, SIGCHLD, 0, 0, 0, 0);
-#elif defined(__NR_fork)
- return my_syscall0(__NR_fork);
-#else
-#error Neither __NR_clone nor __NR_fork defined, cannot implement sys_fork()
-#endif
-}
-
-static __attribute__((unused))
-int sys_fsync(int fd)
-{
- return my_syscall1(__NR_fsync, fd);
-}
-
-static __attribute__((unused))
-int sys_getdents64(int fd, struct linux_dirent64 *dirp, int count)
-{
- return my_syscall3(__NR_getdents64, fd, dirp, count);
-}
-
-static __attribute__((unused))
-pid_t sys_getpgid(pid_t pid)
-{
- return my_syscall1(__NR_getpgid, pid);
-}
-
-static __attribute__((unused))
-pid_t sys_getpgrp(void)
-{
- return sys_getpgid(0);
-}
-
-static __attribute__((unused))
-pid_t sys_getpid(void)
-{
- return my_syscall0(__NR_getpid);
-}
-
-static __attribute__((unused))
-pid_t sys_gettid(void)
-{
- return my_syscall0(__NR_gettid);
-}
-
-static __attribute__((unused))
-int sys_gettimeofday(struct timeval *tv, struct timezone *tz)
-{
- return my_syscall2(__NR_gettimeofday, tv, tz);
-}
-
-static __attribute__((unused))
-int sys_ioctl(int fd, unsigned long req, void *value)
-{
- return my_syscall3(__NR_ioctl, fd, req, value);
-}
-
-static __attribute__((unused))
-int sys_kill(pid_t pid, int signal)
-{
- return my_syscall2(__NR_kill, pid, signal);
-}
-
-static __attribute__((unused))
-int sys_link(const char *old, const char *new)
-{
-#ifdef __NR_linkat
- return my_syscall5(__NR_linkat, AT_FDCWD, old, AT_FDCWD, new, 0);
-#elif defined(__NR_link)
- return my_syscall2(__NR_link, old, new);
-#else
-#error Neither __NR_linkat nor __NR_link defined, cannot implement sys_link()
-#endif
-}
-
-static __attribute__((unused))
-off_t sys_lseek(int fd, off_t offset, int whence)
-{
- return my_syscall3(__NR_lseek, fd, offset, whence);
-}
-
-static __attribute__((unused))
-int sys_mkdir(const char *path, mode_t mode)
-{
-#ifdef __NR_mkdirat
- return my_syscall3(__NR_mkdirat, AT_FDCWD, path, mode);
-#elif defined(__NR_mkdir)
- return my_syscall2(__NR_mkdir, path, mode);
-#else
-#error Neither __NR_mkdirat nor __NR_mkdir defined, cannot implement sys_mkdir()
-#endif
-}
-
-static __attribute__((unused))
-long sys_mknod(const char *path, mode_t mode, dev_t dev)
-{
-#ifdef __NR_mknodat
- return my_syscall4(__NR_mknodat, AT_FDCWD, path, mode, dev);
-#elif defined(__NR_mknod)
- return my_syscall3(__NR_mknod, path, mode, dev);
-#else
-#error Neither __NR_mknodat nor __NR_mknod defined, cannot implement sys_mknod()
-#endif
-}
-
-static __attribute__((unused))
-int sys_mount(const char *src, const char *tgt, const char *fst,
- unsigned long flags, const void *data)
-{
- return my_syscall5(__NR_mount, src, tgt, fst, flags, data);
-}
-
-static __attribute__((unused))
-int sys_open(const char *path, int flags, mode_t mode)
-{
-#ifdef __NR_openat
- return my_syscall4(__NR_openat, AT_FDCWD, path, flags, mode);
-#elif defined(__NR_open)
- return my_syscall3(__NR_open, path, flags, mode);
-#else
-#error Neither __NR_openat nor __NR_open defined, cannot implement sys_open()
-#endif
-}
-
-static __attribute__((unused))
-int sys_pivot_root(const char *new, const char *old)
-{
- return my_syscall2(__NR_pivot_root, new, old);
-}
-
-static __attribute__((unused))
-int sys_poll(struct pollfd *fds, int nfds, int timeout)
-{
-#if defined(__NR_ppoll)
- struct timespec t;
-
- if (timeout >= 0) {
- t.tv_sec = timeout / 1000;
- t.tv_nsec = (timeout % 1000) * 1000000;
- }
- return my_syscall4(__NR_ppoll, fds, nfds, (timeout >= 0) ? &t : NULL, NULL);
-#elif defined(__NR_poll)
- return my_syscall3(__NR_poll, fds, nfds, timeout);
-#else
-#error Neither __NR_ppoll nor __NR_poll defined, cannot implement sys_poll()
-#endif
-}
-
-static __attribute__((unused))
-ssize_t sys_read(int fd, void *buf, size_t count)
-{
- return my_syscall3(__NR_read, fd, buf, count);
-}
-
-static __attribute__((unused))
-ssize_t sys_reboot(int magic1, int magic2, int cmd, void *arg)
-{
- return my_syscall4(__NR_reboot, magic1, magic2, cmd, arg);
-}
-
-static __attribute__((unused))
-int sys_sched_yield(void)
-{
- return my_syscall0(__NR_sched_yield);
-}
-
-static __attribute__((unused))
-int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout)
-{
-#if defined(__ARCH_WANT_SYS_OLD_SELECT) && !defined(__NR__newselect)
- struct sel_arg_struct {
- unsigned long n;
- fd_set *r, *w, *e;
- struct timeval *t;
- } arg = { .n = nfds, .r = rfds, .w = wfds, .e = efds, .t = timeout };
- return my_syscall1(__NR_select, &arg);
-#elif defined(__ARCH_WANT_SYS_PSELECT6) && defined(__NR_pselect6)
- struct timespec t;
-
- if (timeout) {
- t.tv_sec = timeout->tv_sec;
- t.tv_nsec = timeout->tv_usec * 1000;
- }
- return my_syscall6(__NR_pselect6, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL);
-#elif defined(__NR__newselect) || defined(__NR_select)
-#ifndef __NR__newselect
-#define __NR__newselect __NR_select
-#endif
- return my_syscall5(__NR__newselect, nfds, rfds, wfds, efds, timeout);
-#else
-#error None of __NR_select, __NR_pselect6, nor __NR__newselect defined, cannot implement sys_select()
-#endif
-}
-
-static __attribute__((unused))
-int sys_setpgid(pid_t pid, pid_t pgid)
-{
- return my_syscall2(__NR_setpgid, pid, pgid);
-}
-
-static __attribute__((unused))
-pid_t sys_setsid(void)
-{
- return my_syscall0(__NR_setsid);
-}
-
-static __attribute__((unused))
-int sys_stat(const char *path, struct stat *buf)
-{
- struct sys_stat_struct stat;
- long ret;
-
-#ifdef __NR_newfstatat
- /* only solution for arm64 */
- ret = my_syscall4(__NR_newfstatat, AT_FDCWD, path, &stat, 0);
-#elif defined(__NR_stat)
- ret = my_syscall2(__NR_stat, path, &stat);
-#else
-#error Neither __NR_newfstatat nor __NR_stat defined, cannot implement sys_stat()
-#endif
- buf->st_dev = stat.st_dev;
- buf->st_ino = stat.st_ino;
- buf->st_mode = stat.st_mode;
- buf->st_nlink = stat.st_nlink;
- buf->st_uid = stat.st_uid;
- buf->st_gid = stat.st_gid;
- buf->st_rdev = stat.st_rdev;
- buf->st_size = stat.st_size;
- buf->st_blksize = stat.st_blksize;
- buf->st_blocks = stat.st_blocks;
- buf->st_atime = stat.st_atime;
- buf->st_mtime = stat.st_mtime;
- buf->st_ctime = stat.st_ctime;
- return ret;
-}
-
-
-static __attribute__((unused))
-int sys_symlink(const char *old, const char *new)
-{
-#ifdef __NR_symlinkat
- return my_syscall3(__NR_symlinkat, old, AT_FDCWD, new);
-#elif defined(__NR_symlink)
- return my_syscall2(__NR_symlink, old, new);
-#else
-#error Neither __NR_symlinkat nor __NR_symlink defined, cannot implement sys_symlink()
-#endif
-}
-
-static __attribute__((unused))
-mode_t sys_umask(mode_t mode)
-{
- return my_syscall1(__NR_umask, mode);
-}
-
-static __attribute__((unused))
-int sys_umount2(const char *path, int flags)
-{
- return my_syscall2(__NR_umount2, path, flags);
-}
-
-static __attribute__((unused))
-int sys_unlink(const char *path)
-{
-#ifdef __NR_unlinkat
- return my_syscall3(__NR_unlinkat, AT_FDCWD, path, 0);
-#elif defined(__NR_unlink)
- return my_syscall1(__NR_unlink, path);
-#else
-#error Neither __NR_unlinkat nor __NR_unlink defined, cannot implement sys_unlink()
-#endif
-}
-
-static __attribute__((unused))
-pid_t sys_wait4(pid_t pid, int *status, int options, struct rusage *rusage)
-{
- return my_syscall4(__NR_wait4, pid, status, options, rusage);
-}
-
-static __attribute__((unused))
-pid_t sys_waitpid(pid_t pid, int *status, int options)
-{
- return sys_wait4(pid, status, options, 0);
-}
-
-static __attribute__((unused))
-pid_t sys_wait(int *status)
-{
- return sys_waitpid(-1, status, 0);
-}
-
-static __attribute__((unused))
-ssize_t sys_write(int fd, const void *buf, size_t count)
-{
- return my_syscall3(__NR_write, fd, buf, count);
-}
-
-
-/* Below are the libc-compatible syscalls which return x or -1 and set errno.
- * They rely on the functions above. Similarly they're marked static so that it
- * is possible to assign pointers to them if needed.
- */
-
-static __attribute__((unused))
-int brk(void *addr)
-{
- void *ret = sys_brk(addr);
-
- if (!ret) {
- SET_ERRNO(ENOMEM);
- return -1;
- }
- return 0;
-}
-
-static __attribute__((noreturn,unused))
-void exit(int status)
-{
- sys_exit(status);
-}
-
-static __attribute__((unused))
-int chdir(const char *path)
-{
- int ret = sys_chdir(path);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int chmod(const char *path, mode_t mode)
-{
- int ret = sys_chmod(path, mode);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int chown(const char *path, uid_t owner, gid_t group)
-{
- int ret = sys_chown(path, owner, group);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int chroot(const char *path)
-{
- int ret = sys_chroot(path);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int close(int fd)
-{
- int ret = sys_close(fd);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int dup(int fd)
-{
- int ret = sys_dup(fd);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int dup2(int old, int new)
-{
- int ret = sys_dup2(old, new);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-#ifdef __NR_dup3
-static __attribute__((unused))
-int dup3(int old, int new, int flags)
-{
- int ret = sys_dup3(old, new, flags);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-#endif
-
-static __attribute__((unused))
-int execve(const char *filename, char *const argv[], char *const envp[])
-{
- int ret = sys_execve(filename, argv, envp);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-pid_t fork(void)
-{
- pid_t ret = sys_fork();
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int fsync(int fd)
-{
- int ret = sys_fsync(fd);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int getdents64(int fd, struct linux_dirent64 *dirp, int count)
-{
- int ret = sys_getdents64(fd, dirp, count);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-pid_t getpgid(pid_t pid)
-{
- pid_t ret = sys_getpgid(pid);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-pid_t getpgrp(void)
-{
- pid_t ret = sys_getpgrp();
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-pid_t getpid(void)
-{
- pid_t ret = sys_getpid();
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-pid_t gettid(void)
-{
- pid_t ret = sys_gettid();
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int gettimeofday(struct timeval *tv, struct timezone *tz)
-{
- int ret = sys_gettimeofday(tv, tz);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int ioctl(int fd, unsigned long req, void *value)
-{
- int ret = sys_ioctl(fd, req, value);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int kill(pid_t pid, int signal)
-{
- int ret = sys_kill(pid, signal);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int link(const char *old, const char *new)
-{
- int ret = sys_link(old, new);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-off_t lseek(int fd, off_t offset, int whence)
-{
- off_t ret = sys_lseek(fd, offset, whence);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int mkdir(const char *path, mode_t mode)
-{
- int ret = sys_mkdir(path, mode);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int mknod(const char *path, mode_t mode, dev_t dev)
-{
- int ret = sys_mknod(path, mode, dev);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int mount(const char *src, const char *tgt,
- const char *fst, unsigned long flags,
- const void *data)
-{
- int ret = sys_mount(src, tgt, fst, flags, data);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int open(const char *path, int flags, mode_t mode)
-{
- int ret = sys_open(path, flags, mode);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int pivot_root(const char *new, const char *old)
-{
- int ret = sys_pivot_root(new, old);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int poll(struct pollfd *fds, int nfds, int timeout)
-{
- int ret = sys_poll(fds, nfds, timeout);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-ssize_t read(int fd, void *buf, size_t count)
-{
- ssize_t ret = sys_read(fd, buf, count);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int reboot(int cmd)
-{
- int ret = sys_reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, 0);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-void *sbrk(intptr_t inc)
-{
- void *ret;
-
- /* first call to find current end */
- if ((ret = sys_brk(0)) && (sys_brk(ret + inc) == ret + inc))
- return ret + inc;
-
- SET_ERRNO(ENOMEM);
- return (void *)-1;
-}
-
-static __attribute__((unused))
-int sched_yield(void)
-{
- int ret = sys_sched_yield();
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout)
-{
- int ret = sys_select(nfds, rfds, wfds, efds, timeout);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int setpgid(pid_t pid, pid_t pgid)
-{
- int ret = sys_setpgid(pid, pgid);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-pid_t setsid(void)
-{
- pid_t ret = sys_setsid();
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-unsigned int sleep(unsigned int seconds)
-{
- struct timeval my_timeval = { seconds, 0 };
-
- if (sys_select(0, 0, 0, 0, &my_timeval) < 0)
- return my_timeval.tv_sec + !!my_timeval.tv_usec;
- else
- return 0;
-}
-
-static __attribute__((unused))
-int msleep(unsigned int msecs)
-{
- struct timeval my_timeval = { msecs / 1000, (msecs % 1000) * 1000 };
-
- if (sys_select(0, 0, 0, 0, &my_timeval) < 0)
- return (my_timeval.tv_sec * 1000) +
- (my_timeval.tv_usec / 1000) +
- !!(my_timeval.tv_usec % 1000);
- else
- return 0;
-}
-
-static __attribute__((unused))
-int stat(const char *path, struct stat *buf)
-{
- int ret = sys_stat(path, buf);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int symlink(const char *old, const char *new)
-{
- int ret = sys_symlink(old, new);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int tcsetpgrp(int fd, pid_t pid)
-{
- return ioctl(fd, TIOCSPGRP, &pid);
-}
-
-static __attribute__((unused))
-mode_t umask(mode_t mode)
-{
- return sys_umask(mode);
-}
-
-static __attribute__((unused))
-int umount2(const char *path, int flags)
-{
- int ret = sys_umount2(path, flags);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int unlink(const char *path)
-{
- int ret = sys_unlink(path);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage)
-{
- pid_t ret = sys_wait4(pid, status, options, rusage);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-pid_t waitpid(pid_t pid, int *status, int options)
-{
- pid_t ret = sys_waitpid(pid, status, options);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-pid_t wait(int *status)
-{
- pid_t ret = sys_wait(status);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-ssize_t write(int fd, const void *buf, size_t count)
-{
- ssize_t ret = sys_write(fd, buf, count);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-/* some size-optimized reimplementations of a few common str* and mem*
- * functions. They're marked static, except memcpy() and raise() which are used
- * by libgcc on ARM, so they are marked weak instead in order not to cause an
- * error when building a program made of multiple files (not recommended).
- */
-
-static __attribute__((unused))
-void *memmove(void *dst, const void *src, size_t len)
-{
- ssize_t pos = (dst <= src) ? -1 : (long)len;
- void *ret = dst;
-
- while (len--) {
- pos += (dst <= src) ? 1 : -1;
- ((char *)dst)[pos] = ((char *)src)[pos];
- }
- return ret;
-}
-
-static __attribute__((unused))
-void *memset(void *dst, int b, size_t len)
-{
- char *p = dst;
-
- while (len--)
- *(p++) = b;
- return dst;
-}
-
-static __attribute__((unused))
-int memcmp(const void *s1, const void *s2, size_t n)
-{
- size_t ofs = 0;
- char c1 = 0;
-
- while (ofs < n && !(c1 = ((char *)s1)[ofs] - ((char *)s2)[ofs])) {
- ofs++;
- }
- return c1;
-}
-
-static __attribute__((unused))
-char *strcpy(char *dst, const char *src)
-{
- char *ret = dst;
-
- while ((*dst++ = *src++));
- return ret;
-}
-
-static __attribute__((unused))
-char *strchr(const char *s, int c)
-{
- while (*s) {
- if (*s == (char)c)
- return (char *)s;
- s++;
- }
- return NULL;
-}
-
-static __attribute__((unused))
-char *strrchr(const char *s, int c)
-{
- const char *ret = NULL;
-
- while (*s) {
- if (*s == (char)c)
- ret = s;
- s++;
- }
- return (char *)ret;
-}
-
-static __attribute__((unused))
-size_t nolibc_strlen(const char *str)
-{
- size_t len;
-
- for (len = 0; str[len]; len++);
- return len;
-}
-
-#define strlen(str) ({ \
- __builtin_constant_p((str)) ? \
- __builtin_strlen((str)) : \
- nolibc_strlen((str)); \
-})
-
-static __attribute__((unused))
-int isdigit(int c)
-{
- return (unsigned int)(c - '0') <= 9;
-}
-
-static __attribute__((unused))
-long atol(const char *s)
-{
- unsigned long ret = 0;
- unsigned long d;
- int neg = 0;
-
- if (*s == '-') {
- neg = 1;
- s++;
- }
-
- while (1) {
- d = (*s++) - '0';
- if (d > 9)
- break;
- ret *= 10;
- ret += d;
- }
-
- return neg ? -ret : ret;
-}
-
-static __attribute__((unused))
-int atoi(const char *s)
-{
- return atol(s);
-}
-
-static __attribute__((unused))
-const char *ltoa(long in)
-{
- /* large enough for -9223372036854775808 */
- static char buffer[21];
- char *pos = buffer + sizeof(buffer) - 1;
- int neg = in < 0;
- unsigned long n = neg ? -in : in;
-
- *pos-- = '\0';
- do {
- *pos-- = '0' + n % 10;
- n /= 10;
- if (pos < buffer)
- return pos + 1;
- } while (n);
-
- if (neg)
- *pos-- = '-';
- return pos + 1;
-}
-
-__attribute__((weak,unused))
-void *memcpy(void *dst, const void *src, size_t len)
-{
- return memmove(dst, src, len);
-}
-
-/* needed by libgcc for divide by zero */
-__attribute__((weak,unused))
-int raise(int signal)
-{
- return kill(getpid(), signal);
-}
-
-/* Here come a few helper functions */
-
-static __attribute__((unused))
-void FD_ZERO(fd_set *set)
-{
- memset(set, 0, sizeof(*set));
-}
-
-static __attribute__((unused))
-void FD_SET(int fd, fd_set *set)
-{
- if (fd < 0 || fd >= FD_SETSIZE)
- return;
- set->fd32[fd / 32] |= 1 << (fd & 31);
-}
-
-/* WARNING, it only deals with the 4096 first majors and 256 first minors */
-static __attribute__((unused))
-dev_t makedev(unsigned int major, unsigned int minor)
-{
- return ((major & 0xfff) << 8) | (minor & 0xff);
-}
+#endif /* _NOLIBC_H */
diff --git a/tools/include/nolibc/signal.h b/tools/include/nolibc/signal.h
new file mode 100644
index 000000000000..ef47e71e2be3
--- /dev/null
+++ b/tools/include/nolibc/signal.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * signal function definitions for NOLIBC
+ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_SIGNAL_H
+#define _NOLIBC_SIGNAL_H
+
+#include "std.h"
+#include "arch.h"
+#include "types.h"
+#include "sys.h"
+
+/* This one is not marked static as it's needed by libgcc for divide by zero */
+__attribute__((weak,unused,section(".text.nolibc_raise")))
+int raise(int signal)
+{
+ return sys_kill(sys_getpid(), signal);
+}
+
+#endif /* _NOLIBC_SIGNAL_H */
diff --git a/tools/include/nolibc/std.h b/tools/include/nolibc/std.h
new file mode 100644
index 000000000000..1747ae125392
--- /dev/null
+++ b/tools/include/nolibc/std.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Standard definitions and types for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_STD_H
+#define _NOLIBC_STD_H
+
+/* Declare a few quite common macros and types that usually are in stdlib.h,
+ * stdint.h, ctype.h, unistd.h and a few other common locations. Please place
+ * integer type definitions and generic macros here, but avoid OS-specific and
+ * syscall-specific stuff, as this file is expected to be included very early.
+ */
+
+/* note: may already be defined */
+#ifndef NULL
+#define NULL ((void *)0)
+#endif
+
+/* stdint types */
+typedef unsigned char uint8_t;
+typedef signed char int8_t;
+typedef unsigned short uint16_t;
+typedef signed short int16_t;
+typedef unsigned int uint32_t;
+typedef signed int int32_t;
+typedef unsigned long long uint64_t;
+typedef signed long long int64_t;
+typedef unsigned long size_t;
+typedef signed long ssize_t;
+typedef unsigned long uintptr_t;
+typedef signed long intptr_t;
+typedef signed long ptrdiff_t;
+
+/* those are commonly provided by sys/types.h */
+typedef unsigned int dev_t;
+typedef unsigned long ino_t;
+typedef unsigned int mode_t;
+typedef signed int pid_t;
+typedef unsigned int uid_t;
+typedef unsigned int gid_t;
+typedef unsigned long nlink_t;
+typedef signed long off_t;
+typedef signed long blksize_t;
+typedef signed long blkcnt_t;
+typedef signed long time_t;
+
+#endif /* _NOLIBC_STD_H */
diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h
new file mode 100644
index 000000000000..15dedf8d0902
--- /dev/null
+++ b/tools/include/nolibc/stdio.h
@@ -0,0 +1,306 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * minimal stdio function definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_STDIO_H
+#define _NOLIBC_STDIO_H
+
+#include <stdarg.h>
+
+#include "std.h"
+#include "arch.h"
+#include "errno.h"
+#include "types.h"
+#include "sys.h"
+#include "stdlib.h"
+#include "string.h"
+
+#ifndef EOF
+#define EOF (-1)
+#endif
+
+/* just define FILE as a non-empty type */
+typedef struct FILE {
+ char dummy[1];
+} FILE;
+
+/* We define the 3 common stdio files as constant invalid pointers that
+ * are easily recognized.
+ */
+static __attribute__((unused)) FILE* const stdin = (FILE*)-3;
+static __attribute__((unused)) FILE* const stdout = (FILE*)-2;
+static __attribute__((unused)) FILE* const stderr = (FILE*)-1;
+
+/* getc(), fgetc(), getchar() */
+
+#define getc(stream) fgetc(stream)
+
+static __attribute__((unused))
+int fgetc(FILE* stream)
+{
+ unsigned char ch;
+ int fd;
+
+ if (stream < stdin || stream > stderr)
+ return EOF;
+
+ fd = 3 + (long)stream;
+
+ if (read(fd, &ch, 1) <= 0)
+ return EOF;
+ return ch;
+}
+
+static __attribute__((unused))
+int getchar(void)
+{
+ return fgetc(stdin);
+}
+
+
+/* putc(), fputc(), putchar() */
+
+#define putc(c, stream) fputc(c, stream)
+
+static __attribute__((unused))
+int fputc(int c, FILE* stream)
+{
+ unsigned char ch = c;
+ int fd;
+
+ if (stream < stdin || stream > stderr)
+ return EOF;
+
+ fd = 3 + (long)stream;
+
+ if (write(fd, &ch, 1) <= 0)
+ return EOF;
+ return ch;
+}
+
+static __attribute__((unused))
+int putchar(int c)
+{
+ return fputc(c, stdout);
+}
+
+
+/* fwrite(), puts(), fputs(). Note that puts() emits '\n' but not fputs(). */
+
+/* internal fwrite()-like function which only takes a size and returns 0 on
+ * success or EOF on error. It automatically retries on short writes.
+ */
+static __attribute__((unused))
+int _fwrite(const void *buf, size_t size, FILE *stream)
+{
+ ssize_t ret;
+ int fd;
+
+ if (stream < stdin || stream > stderr)
+ return EOF;
+
+ fd = 3 + (long)stream;
+
+ while (size) {
+ ret = write(fd, buf, size);
+ if (ret <= 0)
+ return EOF;
+ size -= ret;
+ buf += ret;
+ }
+ return 0;
+}
+
+static __attribute__((unused))
+size_t fwrite(const void *s, size_t size, size_t nmemb, FILE *stream)
+{
+ size_t written;
+
+ for (written = 0; written < nmemb; written++) {
+ if (_fwrite(s, size, stream) != 0)
+ break;
+ s += size;
+ }
+ return written;
+}
+
+static __attribute__((unused))
+int fputs(const char *s, FILE *stream)
+{
+ return _fwrite(s, strlen(s), stream);
+}
+
+static __attribute__((unused))
+int puts(const char *s)
+{
+ if (fputs(s, stdout) == EOF)
+ return EOF;
+ return putchar('\n');
+}
+
+
+/* fgets() */
+static __attribute__((unused))
+char *fgets(char *s, int size, FILE *stream)
+{
+ int ofs;
+ int c;
+
+ for (ofs = 0; ofs + 1 < size;) {
+ c = fgetc(stream);
+ if (c == EOF)
+ break;
+ s[ofs++] = c;
+ if (c == '\n')
+ break;
+ }
+ if (ofs < size)
+ s[ofs] = 0;
+ return ofs ? s : NULL;
+}
+
+
+/* minimal vfprintf(). It supports the following formats:
+ * - %[l*]{d,u,c,x,p}
+ * - %s
+ * - unknown modifiers are ignored.
+ */
+static __attribute__((unused))
+int vfprintf(FILE *stream, const char *fmt, va_list args)
+{
+ char escape, lpref, c;
+ unsigned long long v;
+ unsigned int written;
+ size_t len, ofs;
+ char tmpbuf[21];
+ const char *outstr;
+
+ written = ofs = escape = lpref = 0;
+ while (1) {
+ c = fmt[ofs++];
+
+ if (escape) {
+ /* we're in an escape sequence, ofs == 1 */
+ escape = 0;
+ if (c == 'c' || c == 'd' || c == 'u' || c == 'x' || c == 'p') {
+ char *out = tmpbuf;
+
+ if (c == 'p')
+ v = va_arg(args, unsigned long);
+ else if (lpref) {
+ if (lpref > 1)
+ v = va_arg(args, unsigned long long);
+ else
+ v = va_arg(args, unsigned long);
+ } else
+ v = va_arg(args, unsigned int);
+
+ if (c == 'd') {
+ /* sign-extend the value */
+ if (lpref == 0)
+ v = (long long)(int)v;
+ else if (lpref == 1)
+ v = (long long)(long)v;
+ }
+
+ switch (c) {
+ case 'c':
+ out[0] = v;
+ out[1] = 0;
+ break;
+ case 'd':
+ i64toa_r(v, out);
+ break;
+ case 'u':
+ u64toa_r(v, out);
+ break;
+ case 'p':
+ *(out++) = '0';
+ *(out++) = 'x';
+ /* fall through */
+ default: /* 'x' and 'p' above */
+ u64toh_r(v, out);
+ break;
+ }
+ outstr = tmpbuf;
+ }
+ else if (c == 's') {
+ outstr = va_arg(args, char *);
+ if (!outstr)
+ outstr="(null)";
+ }
+ else if (c == '%') {
+ /* queue it verbatim */
+ continue;
+ }
+ else {
+ /* modifiers or final 0 */
+ if (c == 'l') {
+ /* long format prefix, maintain the escape */
+ lpref++;
+ }
+ escape = 1;
+ goto do_escape;
+ }
+ len = strlen(outstr);
+ goto flush_str;
+ }
+
+ /* not an escape sequence */
+ if (c == 0 || c == '%') {
+ /* flush pending data on escape or end */
+ escape = 1;
+ lpref = 0;
+ outstr = fmt;
+ len = ofs - 1;
+ flush_str:
+ if (_fwrite(outstr, len, stream) != 0)
+ break;
+
+ written += len;
+ do_escape:
+ if (c == 0)
+ break;
+ fmt += ofs;
+ ofs = 0;
+ continue;
+ }
+
+ /* literal char, just queue it */
+ }
+ return written;
+}
+
+static __attribute__((unused))
+int fprintf(FILE *stream, const char *fmt, ...)
+{
+ va_list args;
+ int ret;
+
+ va_start(args, fmt);
+ ret = vfprintf(stream, fmt, args);
+ va_end(args);
+ return ret;
+}
+
+static __attribute__((unused))
+int printf(const char *fmt, ...)
+{
+ va_list args;
+ int ret;
+
+ va_start(args, fmt);
+ ret = vfprintf(stdout, fmt, args);
+ va_end(args);
+ return ret;
+}
+
+static __attribute__((unused))
+void perror(const char *msg)
+{
+ fprintf(stderr, "%s%serrno=%d\n", (msg && *msg) ? msg : "", (msg && *msg) ? ": " : "", errno);
+}
+
+#endif /* _NOLIBC_STDIO_H */
diff --git a/tools/include/nolibc/stdlib.h b/tools/include/nolibc/stdlib.h
new file mode 100644
index 000000000000..8fd32eaf8037
--- /dev/null
+++ b/tools/include/nolibc/stdlib.h
@@ -0,0 +1,423 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * stdlib function definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_STDLIB_H
+#define _NOLIBC_STDLIB_H
+
+#include "std.h"
+#include "arch.h"
+#include "types.h"
+#include "sys.h"
+#include "string.h"
+
+struct nolibc_heap {
+ size_t len;
+ char user_p[] __attribute__((__aligned__));
+};
+
+/* Buffer used to store int-to-ASCII conversions. Will only be implemented if
+ * any of the related functions is implemented. The area is large enough to
+ * store "18446744073709551615" or "-9223372036854775808" and the final zero.
+ */
+static __attribute__((unused)) char itoa_buffer[21];
+
+/*
+ * As much as possible, please keep functions alphabetically sorted.
+ */
+
+/* must be exported, as it's used by libgcc for various divide functions */
+__attribute__((weak,unused,noreturn,section(".text.nolibc_abort")))
+void abort(void)
+{
+ sys_kill(sys_getpid(), SIGABRT);
+ for (;;);
+}
+
+static __attribute__((unused))
+long atol(const char *s)
+{
+ unsigned long ret = 0;
+ unsigned long d;
+ int neg = 0;
+
+ if (*s == '-') {
+ neg = 1;
+ s++;
+ }
+
+ while (1) {
+ d = (*s++) - '0';
+ if (d > 9)
+ break;
+ ret *= 10;
+ ret += d;
+ }
+
+ return neg ? -ret : ret;
+}
+
+static __attribute__((unused))
+int atoi(const char *s)
+{
+ return atol(s);
+}
+
+static __attribute__((unused))
+void free(void *ptr)
+{
+ struct nolibc_heap *heap;
+
+ if (!ptr)
+ return;
+
+ heap = container_of(ptr, struct nolibc_heap, user_p);
+ munmap(heap, heap->len);
+}
+
+/* getenv() tries to find the environment variable named <name> in the
+ * environment array pointed to by global variable "environ" which must be
+ * declared as a char **, and must be terminated by a NULL (it is recommended
+ * to set this variable to the "envp" argument of main()). If the requested
+ * environment variable exists its value is returned otherwise NULL is
+ * returned. getenv() is forcefully inlined so that the reference to "environ"
+ * will be dropped if unused, even at -O0.
+ */
+static __attribute__((unused))
+char *_getenv(const char *name, char **environ)
+{
+ int idx, i;
+
+ if (environ) {
+ for (idx = 0; environ[idx]; idx++) {
+ for (i = 0; name[i] && name[i] == environ[idx][i];)
+ i++;
+ if (!name[i] && environ[idx][i] == '=')
+ return &environ[idx][i+1];
+ }
+ }
+ return NULL;
+}
+
+static inline __attribute__((unused,always_inline))
+char *getenv(const char *name)
+{
+ extern char **environ;
+ return _getenv(name, environ);
+}
+
+static __attribute__((unused))
+void *malloc(size_t len)
+{
+ struct nolibc_heap *heap;
+
+ /* Always allocate memory with size multiple of 4096. */
+ len = sizeof(*heap) + len;
+ len = (len + 4095UL) & -4096UL;
+ heap = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE,
+ -1, 0);
+ if (__builtin_expect(heap == MAP_FAILED, 0))
+ return NULL;
+
+ heap->len = len;
+ return heap->user_p;
+}
+
+static __attribute__((unused))
+void *calloc(size_t size, size_t nmemb)
+{
+ void *orig;
+ size_t res = 0;
+
+ if (__builtin_expect(__builtin_mul_overflow(nmemb, size, &res), 0)) {
+ SET_ERRNO(ENOMEM);
+ return NULL;
+ }
+
+ /*
+ * No need to zero the heap, the MAP_ANONYMOUS in malloc()
+ * already does it.
+ */
+ return malloc(res);
+}
+
+static __attribute__((unused))
+void *realloc(void *old_ptr, size_t new_size)
+{
+ struct nolibc_heap *heap;
+ size_t user_p_len;
+ void *ret;
+
+ if (!old_ptr)
+ return malloc(new_size);
+
+ heap = container_of(old_ptr, struct nolibc_heap, user_p);
+ user_p_len = heap->len - sizeof(*heap);
+ /*
+ * Don't realloc() if @user_p_len >= @new_size, this block of
+ * memory is still enough to handle the @new_size. Just return
+ * the same pointer.
+ */
+ if (user_p_len >= new_size)
+ return old_ptr;
+
+ ret = malloc(new_size);
+ if (__builtin_expect(!ret, 0))
+ return NULL;
+
+ memcpy(ret, heap->user_p, heap->len);
+ munmap(heap, heap->len);
+ return ret;
+}
+
+/* Converts the unsigned long integer <in> to its hex representation into
+ * buffer <buffer>, which must be long enough to store the number and the
+ * trailing zero (17 bytes for "ffffffffffffffff" or 9 for "ffffffff"). The
+ * buffer is filled from the first byte, and the number of characters emitted
+ * (not counting the trailing zero) is returned. The function is constructed
+ * in a way to optimize the code size and avoid any divide that could add a
+ * dependency on large external functions.
+ */
+static __attribute__((unused))
+int utoh_r(unsigned long in, char *buffer)
+{
+ signed char pos = (~0UL > 0xfffffffful) ? 60 : 28;
+ int digits = 0;
+ int dig;
+
+ do {
+ dig = in >> pos;
+ in -= (uint64_t)dig << pos;
+ pos -= 4;
+ if (dig || digits || pos < 0) {
+ if (dig > 9)
+ dig += 'a' - '0' - 10;
+ buffer[digits++] = '0' + dig;
+ }
+ } while (pos >= 0);
+
+ buffer[digits] = 0;
+ return digits;
+}
+
+/* converts unsigned long <in> to an hex string using the static itoa_buffer
+ * and returns the pointer to that string.
+ */
+static inline __attribute__((unused))
+char *utoh(unsigned long in)
+{
+ utoh_r(in, itoa_buffer);
+ return itoa_buffer;
+}
+
+/* Converts the unsigned long integer <in> to its string representation into
+ * buffer <buffer>, which must be long enough to store the number and the
+ * trailing zero (21 bytes for 18446744073709551615 in 64-bit, 11 for
+ * 4294967295 in 32-bit). The buffer is filled from the first byte, and the
+ * number of characters emitted (not counting the trailing zero) is returned.
+ * The function is constructed in a way to optimize the code size and avoid
+ * any divide that could add a dependency on large external functions.
+ */
+static __attribute__((unused))
+int utoa_r(unsigned long in, char *buffer)
+{
+ unsigned long lim;
+ int digits = 0;
+ int pos = (~0UL > 0xfffffffful) ? 19 : 9;
+ int dig;
+
+ do {
+ for (dig = 0, lim = 1; dig < pos; dig++)
+ lim *= 10;
+
+ if (digits || in >= lim || !pos) {
+ for (dig = 0; in >= lim; dig++)
+ in -= lim;
+ buffer[digits++] = '0' + dig;
+ }
+ } while (pos--);
+
+ buffer[digits] = 0;
+ return digits;
+}
+
+/* Converts the signed long integer <in> to its string representation into
+ * buffer <buffer>, which must be long enough to store the number and the
+ * trailing zero (21 bytes for -9223372036854775808 in 64-bit, 12 for
+ * -2147483648 in 32-bit). The buffer is filled from the first byte, and the
+ * number of characters emitted (not counting the trailing zero) is returned.
+ */
+static __attribute__((unused))
+int itoa_r(long in, char *buffer)
+{
+ char *ptr = buffer;
+ int len = 0;
+
+ if (in < 0) {
+ in = -in;
+ *(ptr++) = '-';
+ len++;
+ }
+ len += utoa_r(in, ptr);
+ return len;
+}
+
+/* for historical compatibility, same as above but returns the pointer to the
+ * buffer.
+ */
+static inline __attribute__((unused))
+char *ltoa_r(long in, char *buffer)
+{
+ itoa_r(in, buffer);
+ return buffer;
+}
+
+/* converts long integer <in> to a string using the static itoa_buffer and
+ * returns the pointer to that string.
+ */
+static inline __attribute__((unused))
+char *itoa(long in)
+{
+ itoa_r(in, itoa_buffer);
+ return itoa_buffer;
+}
+
+/* converts long integer <in> to a string using the static itoa_buffer and
+ * returns the pointer to that string. Same as above, for compatibility.
+ */
+static inline __attribute__((unused))
+char *ltoa(long in)
+{
+ itoa_r(in, itoa_buffer);
+ return itoa_buffer;
+}
+
+/* converts unsigned long integer <in> to a string using the static itoa_buffer
+ * and returns the pointer to that string.
+ */
+static inline __attribute__((unused))
+char *utoa(unsigned long in)
+{
+ utoa_r(in, itoa_buffer);
+ return itoa_buffer;
+}
+
+/* Converts the unsigned 64-bit integer <in> to its hex representation into
+ * buffer <buffer>, which must be long enough to store the number and the
+ * trailing zero (17 bytes for "ffffffffffffffff"). The buffer is filled from
+ * the first byte, and the number of characters emitted (not counting the
+ * trailing zero) is returned. The function is constructed in a way to optimize
+ * the code size and avoid any divide that could add a dependency on large
+ * external functions.
+ */
+static __attribute__((unused))
+int u64toh_r(uint64_t in, char *buffer)
+{
+ signed char pos = 60;
+ int digits = 0;
+ int dig;
+
+ do {
+ if (sizeof(long) >= 8) {
+ dig = (in >> pos) & 0xF;
+ } else {
+ /* 32-bit platforms: avoid a 64-bit shift */
+ uint32_t d = (pos >= 32) ? (in >> 32) : in;
+ dig = (d >> (pos & 31)) & 0xF;
+ }
+ if (dig > 9)
+ dig += 'a' - '0' - 10;
+ pos -= 4;
+ if (dig || digits || pos < 0)
+ buffer[digits++] = '0' + dig;
+ } while (pos >= 0);
+
+ buffer[digits] = 0;
+ return digits;
+}
+
+/* converts uint64_t <in> to an hex string using the static itoa_buffer and
+ * returns the pointer to that string.
+ */
+static inline __attribute__((unused))
+char *u64toh(uint64_t in)
+{
+ u64toh_r(in, itoa_buffer);
+ return itoa_buffer;
+}
+
+/* Converts the unsigned 64-bit integer <in> to its string representation into
+ * buffer <buffer>, which must be long enough to store the number and the
+ * trailing zero (21 bytes for 18446744073709551615). The buffer is filled from
+ * the first byte, and the number of characters emitted (not counting the
+ * trailing zero) is returned. The function is constructed in a way to optimize
+ * the code size and avoid any divide that could add a dependency on large
+ * external functions.
+ */
+static __attribute__((unused))
+int u64toa_r(uint64_t in, char *buffer)
+{
+ unsigned long long lim;
+ int digits = 0;
+ int pos = 19; /* start with the highest possible digit */
+ int dig;
+
+ do {
+ for (dig = 0, lim = 1; dig < pos; dig++)
+ lim *= 10;
+
+ if (digits || in >= lim || !pos) {
+ for (dig = 0; in >= lim; dig++)
+ in -= lim;
+ buffer[digits++] = '0' + dig;
+ }
+ } while (pos--);
+
+ buffer[digits] = 0;
+ return digits;
+}
+
+/* Converts the signed 64-bit integer <in> to its string representation into
+ * buffer <buffer>, which must be long enough to store the number and the
+ * trailing zero (21 bytes for -9223372036854775808). The buffer is filled from
+ * the first byte, and the number of characters emitted (not counting the
+ * trailing zero) is returned.
+ */
+static __attribute__((unused))
+int i64toa_r(int64_t in, char *buffer)
+{
+ char *ptr = buffer;
+ int len = 0;
+
+ if (in < 0) {
+ in = -in;
+ *(ptr++) = '-';
+ len++;
+ }
+ len += u64toa_r(in, ptr);
+ return len;
+}
+
+/* converts int64_t <in> to a string using the static itoa_buffer and returns
+ * the pointer to that string.
+ */
+static inline __attribute__((unused))
+char *i64toa(int64_t in)
+{
+ i64toa_r(in, itoa_buffer);
+ return itoa_buffer;
+}
+
+/* converts uint64_t <in> to a string using the static itoa_buffer and returns
+ * the pointer to that string.
+ */
+static inline __attribute__((unused))
+char *u64toa(uint64_t in)
+{
+ u64toa_r(in, itoa_buffer);
+ return itoa_buffer;
+}
+
+#endif /* _NOLIBC_STDLIB_H */
diff --git a/tools/include/nolibc/string.h b/tools/include/nolibc/string.h
new file mode 100644
index 000000000000..bef35bee9c44
--- /dev/null
+++ b/tools/include/nolibc/string.h
@@ -0,0 +1,285 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * string function definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_STRING_H
+#define _NOLIBC_STRING_H
+
+#include "std.h"
+
+static void *malloc(size_t len);
+
+/*
+ * As much as possible, please keep functions alphabetically sorted.
+ */
+
+static __attribute__((unused))
+int memcmp(const void *s1, const void *s2, size_t n)
+{
+ size_t ofs = 0;
+ char c1 = 0;
+
+ while (ofs < n && !(c1 = ((char *)s1)[ofs] - ((char *)s2)[ofs])) {
+ ofs++;
+ }
+ return c1;
+}
+
+static __attribute__((unused))
+void *_nolibc_memcpy_up(void *dst, const void *src, size_t len)
+{
+ size_t pos = 0;
+
+ while (pos < len) {
+ ((char *)dst)[pos] = ((const char *)src)[pos];
+ pos++;
+ }
+ return dst;
+}
+
+static __attribute__((unused))
+void *_nolibc_memcpy_down(void *dst, const void *src, size_t len)
+{
+ while (len) {
+ len--;
+ ((char *)dst)[len] = ((const char *)src)[len];
+ }
+ return dst;
+}
+
+/* might be ignored by the compiler without -ffreestanding, then found as
+ * missing.
+ */
+__attribute__((weak,unused,section(".text.nolibc_memmove")))
+void *memmove(void *dst, const void *src, size_t len)
+{
+ size_t dir, pos;
+
+ pos = len;
+ dir = -1;
+
+ if (dst < src) {
+ pos = -1;
+ dir = 1;
+ }
+
+ while (len) {
+ pos += dir;
+ ((char *)dst)[pos] = ((const char *)src)[pos];
+ len--;
+ }
+ return dst;
+}
+
+/* must be exported, as it's used by libgcc on ARM */
+__attribute__((weak,unused,section(".text.nolibc_memcpy")))
+void *memcpy(void *dst, const void *src, size_t len)
+{
+ return _nolibc_memcpy_up(dst, src, len);
+}
+
+/* might be ignored by the compiler without -ffreestanding, then found as
+ * missing.
+ */
+__attribute__((weak,unused,section(".text.nolibc_memset")))
+void *memset(void *dst, int b, size_t len)
+{
+ char *p = dst;
+
+ while (len--)
+ *(p++) = b;
+ return dst;
+}
+
+static __attribute__((unused))
+char *strchr(const char *s, int c)
+{
+ while (*s) {
+ if (*s == (char)c)
+ return (char *)s;
+ s++;
+ }
+ return NULL;
+}
+
+static __attribute__((unused))
+int strcmp(const char *a, const char *b)
+{
+ unsigned int c;
+ int diff;
+
+ while (!(diff = (unsigned char)*a++ - (c = (unsigned char)*b++)) && c)
+ ;
+ return diff;
+}
+
+static __attribute__((unused))
+char *strcpy(char *dst, const char *src)
+{
+ char *ret = dst;
+
+ while ((*dst++ = *src++));
+ return ret;
+}
+
+/* this function is only used with arguments that are not constants or when
+ * it's not known because optimizations are disabled.
+ */
+static __attribute__((unused))
+size_t nolibc_strlen(const char *str)
+{
+ size_t len;
+
+ for (len = 0; str[len]; len++);
+ return len;
+}
+
+/* do not trust __builtin_constant_p() at -O0, as clang will emit a test and
+ * the two branches, then will rely on an external definition of strlen().
+ */
+#if defined(__OPTIMIZE__)
+#define strlen(str) ({ \
+ __builtin_constant_p((str)) ? \
+ __builtin_strlen((str)) : \
+ nolibc_strlen((str)); \
+})
+#else
+#define strlen(str) nolibc_strlen((str))
+#endif
+
+static __attribute__((unused))
+size_t strnlen(const char *str, size_t maxlen)
+{
+ size_t len;
+
+ for (len = 0; (len < maxlen) && str[len]; len++);
+ return len;
+}
+
+static __attribute__((unused))
+char *strdup(const char *str)
+{
+ size_t len;
+ char *ret;
+
+ len = strlen(str);
+ ret = malloc(len + 1);
+ if (__builtin_expect(ret != NULL, 1))
+ memcpy(ret, str, len + 1);
+
+ return ret;
+}
+
+static __attribute__((unused))
+char *strndup(const char *str, size_t maxlen)
+{
+ size_t len;
+ char *ret;
+
+ len = strnlen(str, maxlen);
+ ret = malloc(len + 1);
+ if (__builtin_expect(ret != NULL, 1)) {
+ memcpy(ret, str, len);
+ ret[len] = '\0';
+ }
+
+ return ret;
+}
+
+static __attribute__((unused))
+size_t strlcat(char *dst, const char *src, size_t size)
+{
+ size_t len;
+ char c;
+
+ for (len = 0; dst[len]; len++)
+ ;
+
+ for (;;) {
+ c = *src;
+ if (len < size)
+ dst[len] = c;
+ if (!c)
+ break;
+ len++;
+ src++;
+ }
+
+ return len;
+}
+
+static __attribute__((unused))
+size_t strlcpy(char *dst, const char *src, size_t size)
+{
+ size_t len;
+ char c;
+
+ for (len = 0;;) {
+ c = src[len];
+ if (len < size)
+ dst[len] = c;
+ if (!c)
+ break;
+ len++;
+ }
+ return len;
+}
+
+static __attribute__((unused))
+char *strncat(char *dst, const char *src, size_t size)
+{
+ char *orig = dst;
+
+ while (*dst)
+ dst++;
+
+ while (size && (*dst = *src)) {
+ src++;
+ dst++;
+ size--;
+ }
+
+ *dst = 0;
+ return orig;
+}
+
+static __attribute__((unused))
+int strncmp(const char *a, const char *b, size_t size)
+{
+ unsigned int c;
+ int diff = 0;
+
+ while (size-- &&
+ !(diff = (unsigned char)*a++ - (c = (unsigned char)*b++)) && c)
+ ;
+
+ return diff;
+}
+
+static __attribute__((unused))
+char *strncpy(char *dst, const char *src, size_t size)
+{
+ size_t len;
+
+ for (len = 0; len < size; len++)
+ if ((dst[len] = *src))
+ src++;
+ return dst;
+}
+
+static __attribute__((unused))
+char *strrchr(const char *s, int c)
+{
+ const char *ret = NULL;
+
+ while (*s) {
+ if (*s == (char)c)
+ ret = s;
+ s++;
+ }
+ return (char *)ret;
+}
+
+#endif /* _NOLIBC_STRING_H */
diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h
new file mode 100644
index 000000000000..08491070387b
--- /dev/null
+++ b/tools/include/nolibc/sys.h
@@ -0,0 +1,1247 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Syscall definitions for NOLIBC (those in man(2))
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_SYS_H
+#define _NOLIBC_SYS_H
+
+#include <stdarg.h>
+#include "std.h"
+
+/* system includes */
+#include <asm/unistd.h>
+#include <asm/signal.h> // for SIGCHLD
+#include <asm/ioctls.h>
+#include <asm/mman.h>
+#include <linux/fs.h>
+#include <linux/loop.h>
+#include <linux/time.h>
+
+#include "arch.h"
+#include "errno.h"
+#include "types.h"
+
+
+/* Functions in this file only describe syscalls. They're declared static so
+ * that the compiler usually decides to inline them while still being allowed
+ * to pass a pointer to one of their instances. Each syscall exists in two
+ * versions:
+ * - the "internal" ones, which matches the raw syscall interface at the
+ * kernel level, which may sometimes slightly differ from the documented
+ * libc-level ones. For example most of them return either a valid value
+ * or -errno. All of these are prefixed with "sys_". They may be called
+ * by non-portable applications if desired.
+ *
+ * - the "exported" ones, whose interface must closely match the one
+ * documented in man(2), that applications are supposed to expect. These
+ * ones rely on the internal ones, and set errno.
+ *
+ * Each syscall will be defined with the two functions, sorted in alphabetical
+ * order applied to the exported names.
+ *
+ * In case of doubt about the relevance of a function here, only those which
+ * set errno should be defined here. Wrappers like those appearing in man(3)
+ * should not be placed here.
+ */
+
+
+/*
+ * int brk(void *addr);
+ * void *sbrk(intptr_t inc)
+ */
+
+static __attribute__((unused))
+void *sys_brk(void *addr)
+{
+ return (void *)my_syscall1(__NR_brk, addr);
+}
+
+static __attribute__((unused))
+int brk(void *addr)
+{
+ void *ret = sys_brk(addr);
+
+ if (!ret) {
+ SET_ERRNO(ENOMEM);
+ return -1;
+ }
+ return 0;
+}
+
+static __attribute__((unused))
+void *sbrk(intptr_t inc)
+{
+ void *ret;
+
+ /* first call to find current end */
+ if ((ret = sys_brk(0)) && (sys_brk(ret + inc) == ret + inc))
+ return ret + inc;
+
+ SET_ERRNO(ENOMEM);
+ return (void *)-1;
+}
+
+
+/*
+ * int chdir(const char *path);
+ */
+
+static __attribute__((unused))
+int sys_chdir(const char *path)
+{
+ return my_syscall1(__NR_chdir, path);
+}
+
+static __attribute__((unused))
+int chdir(const char *path)
+{
+ int ret = sys_chdir(path);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * int chmod(const char *path, mode_t mode);
+ */
+
+static __attribute__((unused))
+int sys_chmod(const char *path, mode_t mode)
+{
+#ifdef __NR_fchmodat
+ return my_syscall4(__NR_fchmodat, AT_FDCWD, path, mode, 0);
+#elif defined(__NR_chmod)
+ return my_syscall2(__NR_chmod, path, mode);
+#else
+#error Neither __NR_fchmodat nor __NR_chmod defined, cannot implement sys_chmod()
+#endif
+}
+
+static __attribute__((unused))
+int chmod(const char *path, mode_t mode)
+{
+ int ret = sys_chmod(path, mode);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * int chown(const char *path, uid_t owner, gid_t group);
+ */
+
+static __attribute__((unused))
+int sys_chown(const char *path, uid_t owner, gid_t group)
+{
+#ifdef __NR_fchownat
+ return my_syscall5(__NR_fchownat, AT_FDCWD, path, owner, group, 0);
+#elif defined(__NR_chown)
+ return my_syscall3(__NR_chown, path, owner, group);
+#else
+#error Neither __NR_fchownat nor __NR_chown defined, cannot implement sys_chown()
+#endif
+}
+
+static __attribute__((unused))
+int chown(const char *path, uid_t owner, gid_t group)
+{
+ int ret = sys_chown(path, owner, group);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * int chroot(const char *path);
+ */
+
+static __attribute__((unused))
+int sys_chroot(const char *path)
+{
+ return my_syscall1(__NR_chroot, path);
+}
+
+static __attribute__((unused))
+int chroot(const char *path)
+{
+ int ret = sys_chroot(path);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * int close(int fd);
+ */
+
+static __attribute__((unused))
+int sys_close(int fd)
+{
+ return my_syscall1(__NR_close, fd);
+}
+
+static __attribute__((unused))
+int close(int fd)
+{
+ int ret = sys_close(fd);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * int dup(int fd);
+ */
+
+static __attribute__((unused))
+int sys_dup(int fd)
+{
+ return my_syscall1(__NR_dup, fd);
+}
+
+static __attribute__((unused))
+int dup(int fd)
+{
+ int ret = sys_dup(fd);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * int dup2(int old, int new);
+ */
+
+static __attribute__((unused))
+int sys_dup2(int old, int new)
+{
+#ifdef __NR_dup3
+ return my_syscall3(__NR_dup3, old, new, 0);
+#elif defined(__NR_dup2)
+ return my_syscall2(__NR_dup2, old, new);
+#else
+#error Neither __NR_dup3 nor __NR_dup2 defined, cannot implement sys_dup2()
+#endif
+}
+
+static __attribute__((unused))
+int dup2(int old, int new)
+{
+ int ret = sys_dup2(old, new);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * int dup3(int old, int new, int flags);
+ */
+
+#ifdef __NR_dup3
+static __attribute__((unused))
+int sys_dup3(int old, int new, int flags)
+{
+ return my_syscall3(__NR_dup3, old, new, flags);
+}
+
+static __attribute__((unused))
+int dup3(int old, int new, int flags)
+{
+ int ret = sys_dup3(old, new, flags);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+#endif
+
+
+/*
+ * int execve(const char *filename, char *const argv[], char *const envp[]);
+ */
+
+static __attribute__((unused))
+int sys_execve(const char *filename, char *const argv[], char *const envp[])
+{
+ return my_syscall3(__NR_execve, filename, argv, envp);
+}
+
+static __attribute__((unused))
+int execve(const char *filename, char *const argv[], char *const envp[])
+{
+ int ret = sys_execve(filename, argv, envp);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * void exit(int status);
+ */
+
+static __attribute__((noreturn,unused))
+void sys_exit(int status)
+{
+ my_syscall1(__NR_exit, status & 255);
+ while(1); // shut the "noreturn" warnings.
+}
+
+static __attribute__((noreturn,unused))
+void exit(int status)
+{
+ sys_exit(status);
+}
+
+
+/*
+ * pid_t fork(void);
+ */
+
+static __attribute__((unused))
+pid_t sys_fork(void)
+{
+#ifdef __NR_clone
+ /* note: some archs only have clone() and not fork(). Different archs
+ * have a different API, but most archs have the flags on first arg and
+ * will not use the rest with no other flag.
+ */
+ return my_syscall5(__NR_clone, SIGCHLD, 0, 0, 0, 0);
+#elif defined(__NR_fork)
+ return my_syscall0(__NR_fork);
+#else
+#error Neither __NR_clone nor __NR_fork defined, cannot implement sys_fork()
+#endif
+}
+
+static __attribute__((unused))
+pid_t fork(void)
+{
+ pid_t ret = sys_fork();
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * int fsync(int fd);
+ */
+
+static __attribute__((unused))
+int sys_fsync(int fd)
+{
+ return my_syscall1(__NR_fsync, fd);
+}
+
+static __attribute__((unused))
+int fsync(int fd)
+{
+ int ret = sys_fsync(fd);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * int getdents64(int fd, struct linux_dirent64 *dirp, int count);
+ */
+
+static __attribute__((unused))
+int sys_getdents64(int fd, struct linux_dirent64 *dirp, int count)
+{
+ return my_syscall3(__NR_getdents64, fd, dirp, count);
+}
+
+static __attribute__((unused))
+int getdents64(int fd, struct linux_dirent64 *dirp, int count)
+{
+ int ret = sys_getdents64(fd, dirp, count);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * pid_t getpgid(pid_t pid);
+ */
+
+static __attribute__((unused))
+pid_t sys_getpgid(pid_t pid)
+{
+ return my_syscall1(__NR_getpgid, pid);
+}
+
+static __attribute__((unused))
+pid_t getpgid(pid_t pid)
+{
+ pid_t ret = sys_getpgid(pid);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * pid_t getpgrp(void);
+ */
+
+static __attribute__((unused))
+pid_t sys_getpgrp(void)
+{
+ return sys_getpgid(0);
+}
+
+static __attribute__((unused))
+pid_t getpgrp(void)
+{
+ return sys_getpgrp();
+}
+
+
+/*
+ * pid_t getpid(void);
+ */
+
+static __attribute__((unused))
+pid_t sys_getpid(void)
+{
+ return my_syscall0(__NR_getpid);
+}
+
+static __attribute__((unused))
+pid_t getpid(void)
+{
+ return sys_getpid();
+}
+
+
+/*
+ * pid_t getppid(void);
+ */
+
+static __attribute__((unused))
+pid_t sys_getppid(void)
+{
+ return my_syscall0(__NR_getppid);
+}
+
+static __attribute__((unused))
+pid_t getppid(void)
+{
+ return sys_getppid();
+}
+
+
+/*
+ * pid_t gettid(void);
+ */
+
+static __attribute__((unused))
+pid_t sys_gettid(void)
+{
+ return my_syscall0(__NR_gettid);
+}
+
+static __attribute__((unused))
+pid_t gettid(void)
+{
+ return sys_gettid();
+}
+
+
+/*
+ * int gettimeofday(struct timeval *tv, struct timezone *tz);
+ */
+
+static __attribute__((unused))
+int sys_gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+ return my_syscall2(__NR_gettimeofday, tv, tz);
+}
+
+static __attribute__((unused))
+int gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+ int ret = sys_gettimeofday(tv, tz);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * int ioctl(int fd, unsigned long req, void *value);
+ */
+
+static __attribute__((unused))
+int sys_ioctl(int fd, unsigned long req, void *value)
+{
+ return my_syscall3(__NR_ioctl, fd, req, value);
+}
+
+static __attribute__((unused))
+int ioctl(int fd, unsigned long req, void *value)
+{
+ int ret = sys_ioctl(fd, req, value);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+/*
+ * int kill(pid_t pid, int signal);
+ */
+
+static __attribute__((unused))
+int sys_kill(pid_t pid, int signal)
+{
+ return my_syscall2(__NR_kill, pid, signal);
+}
+
+static __attribute__((unused))
+int kill(pid_t pid, int signal)
+{
+ int ret = sys_kill(pid, signal);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * int link(const char *old, const char *new);
+ */
+
+static __attribute__((unused))
+int sys_link(const char *old, const char *new)
+{
+#ifdef __NR_linkat
+ return my_syscall5(__NR_linkat, AT_FDCWD, old, AT_FDCWD, new, 0);
+#elif defined(__NR_link)
+ return my_syscall2(__NR_link, old, new);
+#else
+#error Neither __NR_linkat nor __NR_link defined, cannot implement sys_link()
+#endif
+}
+
+static __attribute__((unused))
+int link(const char *old, const char *new)
+{
+ int ret = sys_link(old, new);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * off_t lseek(int fd, off_t offset, int whence);
+ */
+
+static __attribute__((unused))
+off_t sys_lseek(int fd, off_t offset, int whence)
+{
+ return my_syscall3(__NR_lseek, fd, offset, whence);
+}
+
+static __attribute__((unused))
+off_t lseek(int fd, off_t offset, int whence)
+{
+ off_t ret = sys_lseek(fd, offset, whence);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * int mkdir(const char *path, mode_t mode);
+ */
+
+static __attribute__((unused))
+int sys_mkdir(const char *path, mode_t mode)
+{
+#ifdef __NR_mkdirat
+ return my_syscall3(__NR_mkdirat, AT_FDCWD, path, mode);
+#elif defined(__NR_mkdir)
+ return my_syscall2(__NR_mkdir, path, mode);
+#else
+#error Neither __NR_mkdirat nor __NR_mkdir defined, cannot implement sys_mkdir()
+#endif
+}
+
+static __attribute__((unused))
+int mkdir(const char *path, mode_t mode)
+{
+ int ret = sys_mkdir(path, mode);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * int mknod(const char *path, mode_t mode, dev_t dev);
+ */
+
+static __attribute__((unused))
+long sys_mknod(const char *path, mode_t mode, dev_t dev)
+{
+#ifdef __NR_mknodat
+ return my_syscall4(__NR_mknodat, AT_FDCWD, path, mode, dev);
+#elif defined(__NR_mknod)
+ return my_syscall3(__NR_mknod, path, mode, dev);
+#else
+#error Neither __NR_mknodat nor __NR_mknod defined, cannot implement sys_mknod()
+#endif
+}
+
+static __attribute__((unused))
+int mknod(const char *path, mode_t mode, dev_t dev)
+{
+ int ret = sys_mknod(path, mode, dev);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+#ifndef MAP_SHARED
+#define MAP_SHARED 0x01 /* Share changes */
+#define MAP_PRIVATE 0x02 /* Changes are private */
+#define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */
+#endif
+
+#ifndef MAP_FAILED
+#define MAP_FAILED ((void *)-1)
+#endif
+
+static __attribute__((unused))
+void *sys_mmap(void *addr, size_t length, int prot, int flags, int fd,
+ off_t offset)
+{
+#ifndef my_syscall6
+ /* Function not implemented. */
+ return -ENOSYS;
+#else
+
+ int n;
+
+#if defined(__i386__)
+ n = __NR_mmap2;
+ offset >>= 12;
+#else
+ n = __NR_mmap;
+#endif
+
+ return (void *)my_syscall6(n, addr, length, prot, flags, fd, offset);
+#endif
+}
+
+static __attribute__((unused))
+void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset)
+{
+ void *ret = sys_mmap(addr, length, prot, flags, fd, offset);
+
+ if ((unsigned long)ret >= -4095UL) {
+ SET_ERRNO(-(long)ret);
+ ret = MAP_FAILED;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+int sys_munmap(void *addr, size_t length)
+{
+ return my_syscall2(__NR_munmap, addr, length);
+}
+
+static __attribute__((unused))
+int munmap(void *addr, size_t length)
+{
+ int ret = sys_munmap(addr, length);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+/*
+ * int mount(const char *source, const char *target,
+ * const char *fstype, unsigned long flags,
+ * const void *data);
+ */
+static __attribute__((unused))
+int sys_mount(const char *src, const char *tgt, const char *fst,
+ unsigned long flags, const void *data)
+{
+ return my_syscall5(__NR_mount, src, tgt, fst, flags, data);
+}
+
+static __attribute__((unused))
+int mount(const char *src, const char *tgt,
+ const char *fst, unsigned long flags,
+ const void *data)
+{
+ int ret = sys_mount(src, tgt, fst, flags, data);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * int open(const char *path, int flags[, mode_t mode]);
+ */
+
+static __attribute__((unused))
+int sys_open(const char *path, int flags, mode_t mode)
+{
+#ifdef __NR_openat
+ return my_syscall4(__NR_openat, AT_FDCWD, path, flags, mode);
+#elif defined(__NR_open)
+ return my_syscall3(__NR_open, path, flags, mode);
+#else
+#error Neither __NR_openat nor __NR_open defined, cannot implement sys_open()
+#endif
+}
+
+static __attribute__((unused))
+int open(const char *path, int flags, ...)
+{
+ mode_t mode = 0;
+ int ret;
+
+ if (flags & O_CREAT) {
+ va_list args;
+
+ va_start(args, flags);
+ mode = va_arg(args, mode_t);
+ va_end(args);
+ }
+
+ ret = sys_open(path, flags, mode);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * int pivot_root(const char *new, const char *old);
+ */
+
+static __attribute__((unused))
+int sys_pivot_root(const char *new, const char *old)
+{
+ return my_syscall2(__NR_pivot_root, new, old);
+}
+
+static __attribute__((unused))
+int pivot_root(const char *new, const char *old)
+{
+ int ret = sys_pivot_root(new, old);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * int poll(struct pollfd *fds, int nfds, int timeout);
+ */
+
+static __attribute__((unused))
+int sys_poll(struct pollfd *fds, int nfds, int timeout)
+{
+#if defined(__NR_ppoll)
+ struct timespec t;
+
+ if (timeout >= 0) {
+ t.tv_sec = timeout / 1000;
+ t.tv_nsec = (timeout % 1000) * 1000000;
+ }
+ return my_syscall4(__NR_ppoll, fds, nfds, (timeout >= 0) ? &t : NULL, NULL);
+#elif defined(__NR_poll)
+ return my_syscall3(__NR_poll, fds, nfds, timeout);
+#else
+#error Neither __NR_ppoll nor __NR_poll defined, cannot implement sys_poll()
+#endif
+}
+
+static __attribute__((unused))
+int poll(struct pollfd *fds, int nfds, int timeout)
+{
+ int ret = sys_poll(fds, nfds, timeout);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * ssize_t read(int fd, void *buf, size_t count);
+ */
+
+static __attribute__((unused))
+ssize_t sys_read(int fd, void *buf, size_t count)
+{
+ return my_syscall3(__NR_read, fd, buf, count);
+}
+
+static __attribute__((unused))
+ssize_t read(int fd, void *buf, size_t count)
+{
+ ssize_t ret = sys_read(fd, buf, count);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * int reboot(int cmd);
+ * <cmd> is among LINUX_REBOOT_CMD_*
+ */
+
+static __attribute__((unused))
+ssize_t sys_reboot(int magic1, int magic2, int cmd, void *arg)
+{
+ return my_syscall4(__NR_reboot, magic1, magic2, cmd, arg);
+}
+
+static __attribute__((unused))
+int reboot(int cmd)
+{
+ int ret = sys_reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, 0);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * int sched_yield(void);
+ */
+
+static __attribute__((unused))
+int sys_sched_yield(void)
+{
+ return my_syscall0(__NR_sched_yield);
+}
+
+static __attribute__((unused))
+int sched_yield(void)
+{
+ int ret = sys_sched_yield();
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * int select(int nfds, fd_set *read_fds, fd_set *write_fds,
+ * fd_set *except_fds, struct timeval *timeout);
+ */
+
+static __attribute__((unused))
+int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout)
+{
+#if defined(__ARCH_WANT_SYS_OLD_SELECT) && !defined(__NR__newselect)
+ struct sel_arg_struct {
+ unsigned long n;
+ fd_set *r, *w, *e;
+ struct timeval *t;
+ } arg = { .n = nfds, .r = rfds, .w = wfds, .e = efds, .t = timeout };
+ return my_syscall1(__NR_select, &arg);
+#elif defined(__ARCH_WANT_SYS_PSELECT6) && defined(__NR_pselect6)
+ struct timespec t;
+
+ if (timeout) {
+ t.tv_sec = timeout->tv_sec;
+ t.tv_nsec = timeout->tv_usec * 1000;
+ }
+ return my_syscall6(__NR_pselect6, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL);
+#elif defined(__NR__newselect) || defined(__NR_select)
+#ifndef __NR__newselect
+#define __NR__newselect __NR_select
+#endif
+ return my_syscall5(__NR__newselect, nfds, rfds, wfds, efds, timeout);
+#else
+#error None of __NR_select, __NR_pselect6, nor __NR__newselect defined, cannot implement sys_select()
+#endif
+}
+
+static __attribute__((unused))
+int select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout)
+{
+ int ret = sys_select(nfds, rfds, wfds, efds, timeout);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * int setpgid(pid_t pid, pid_t pgid);
+ */
+
+static __attribute__((unused))
+int sys_setpgid(pid_t pid, pid_t pgid)
+{
+ return my_syscall2(__NR_setpgid, pid, pgid);
+}
+
+static __attribute__((unused))
+int setpgid(pid_t pid, pid_t pgid)
+{
+ int ret = sys_setpgid(pid, pgid);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * pid_t setsid(void);
+ */
+
+static __attribute__((unused))
+pid_t sys_setsid(void)
+{
+ return my_syscall0(__NR_setsid);
+}
+
+static __attribute__((unused))
+pid_t setsid(void)
+{
+ pid_t ret = sys_setsid();
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * int stat(const char *path, struct stat *buf);
+ * Warning: the struct stat's layout is arch-dependent.
+ */
+
+static __attribute__((unused))
+int sys_stat(const char *path, struct stat *buf)
+{
+ struct sys_stat_struct stat;
+ long ret;
+
+#ifdef __NR_newfstatat
+ /* only solution for arm64 */
+ ret = my_syscall4(__NR_newfstatat, AT_FDCWD, path, &stat, 0);
+#elif defined(__NR_stat)
+ ret = my_syscall2(__NR_stat, path, &stat);
+#else
+#error Neither __NR_newfstatat nor __NR_stat defined, cannot implement sys_stat()
+#endif
+ buf->st_dev = stat.st_dev;
+ buf->st_ino = stat.st_ino;
+ buf->st_mode = stat.st_mode;
+ buf->st_nlink = stat.st_nlink;
+ buf->st_uid = stat.st_uid;
+ buf->st_gid = stat.st_gid;
+ buf->st_rdev = stat.st_rdev;
+ buf->st_size = stat.st_size;
+ buf->st_blksize = stat.st_blksize;
+ buf->st_blocks = stat.st_blocks;
+ buf->st_atime = stat.st_atime;
+ buf->st_mtime = stat.st_mtime;
+ buf->st_ctime = stat.st_ctime;
+ return ret;
+}
+
+static __attribute__((unused))
+int stat(const char *path, struct stat *buf)
+{
+ int ret = sys_stat(path, buf);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * int symlink(const char *old, const char *new);
+ */
+
+static __attribute__((unused))
+int sys_symlink(const char *old, const char *new)
+{
+#ifdef __NR_symlinkat
+ return my_syscall3(__NR_symlinkat, old, AT_FDCWD, new);
+#elif defined(__NR_symlink)
+ return my_syscall2(__NR_symlink, old, new);
+#else
+#error Neither __NR_symlinkat nor __NR_symlink defined, cannot implement sys_symlink()
+#endif
+}
+
+static __attribute__((unused))
+int symlink(const char *old, const char *new)
+{
+ int ret = sys_symlink(old, new);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * mode_t umask(mode_t mode);
+ */
+
+static __attribute__((unused))
+mode_t sys_umask(mode_t mode)
+{
+ return my_syscall1(__NR_umask, mode);
+}
+
+static __attribute__((unused))
+mode_t umask(mode_t mode)
+{
+ return sys_umask(mode);
+}
+
+
+/*
+ * int umount2(const char *path, int flags);
+ */
+
+static __attribute__((unused))
+int sys_umount2(const char *path, int flags)
+{
+ return my_syscall2(__NR_umount2, path, flags);
+}
+
+static __attribute__((unused))
+int umount2(const char *path, int flags)
+{
+ int ret = sys_umount2(path, flags);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * int unlink(const char *path);
+ */
+
+static __attribute__((unused))
+int sys_unlink(const char *path)
+{
+#ifdef __NR_unlinkat
+ return my_syscall3(__NR_unlinkat, AT_FDCWD, path, 0);
+#elif defined(__NR_unlink)
+ return my_syscall1(__NR_unlink, path);
+#else
+#error Neither __NR_unlinkat nor __NR_unlink defined, cannot implement sys_unlink()
+#endif
+}
+
+static __attribute__((unused))
+int unlink(const char *path)
+{
+ int ret = sys_unlink(path);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * pid_t wait(int *status);
+ * pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage);
+ * pid_t waitpid(pid_t pid, int *status, int options);
+ */
+
+static __attribute__((unused))
+pid_t sys_wait4(pid_t pid, int *status, int options, struct rusage *rusage)
+{
+ return my_syscall4(__NR_wait4, pid, status, options, rusage);
+}
+
+static __attribute__((unused))
+pid_t wait(int *status)
+{
+ pid_t ret = sys_wait4(-1, status, 0, NULL);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage)
+{
+ pid_t ret = sys_wait4(pid, status, options, rusage);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+static __attribute__((unused))
+pid_t waitpid(pid_t pid, int *status, int options)
+{
+ pid_t ret = sys_wait4(pid, status, options, NULL);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * ssize_t write(int fd, const void *buf, size_t count);
+ */
+
+static __attribute__((unused))
+ssize_t sys_write(int fd, const void *buf, size_t count)
+{
+ return my_syscall3(__NR_write, fd, buf, count);
+}
+
+static __attribute__((unused))
+ssize_t write(int fd, const void *buf, size_t count)
+{
+ ssize_t ret = sys_write(fd, buf, count);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+#endif /* _NOLIBC_SYS_H */
diff --git a/tools/include/nolibc/time.h b/tools/include/nolibc/time.h
new file mode 100644
index 000000000000..d18b7661fdd7
--- /dev/null
+++ b/tools/include/nolibc/time.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * time function definitions for NOLIBC
+ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_TIME_H
+#define _NOLIBC_TIME_H
+
+#include "std.h"
+#include "arch.h"
+#include "types.h"
+#include "sys.h"
+
+static __attribute__((unused))
+time_t time(time_t *tptr)
+{
+ struct timeval tv;
+
+ /* note, cannot fail here */
+ sys_gettimeofday(&tv, NULL);
+
+ if (tptr)
+ *tptr = tv.tv_sec;
+ return tv.tv_sec;
+}
+
+#endif /* _NOLIBC_TIME_H */
diff --git a/tools/include/nolibc/types.h b/tools/include/nolibc/types.h
new file mode 100644
index 000000000000..959997034e55
--- /dev/null
+++ b/tools/include/nolibc/types.h
@@ -0,0 +1,205 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Special types used by various syscalls for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_TYPES_H
+#define _NOLIBC_TYPES_H
+
+#include "std.h"
+#include <linux/time.h>
+
+
+/* Only the generic macros and types may be defined here. The arch-specific
+ * ones such as the O_RDONLY and related macros used by fcntl() and open(), or
+ * the layout of sys_stat_struct must not be defined here.
+ */
+
+/* stat flags (WARNING, octal here) */
+#define S_IFDIR 0040000
+#define S_IFCHR 0020000
+#define S_IFBLK 0060000
+#define S_IFREG 0100000
+#define S_IFIFO 0010000
+#define S_IFLNK 0120000
+#define S_IFSOCK 0140000
+#define S_IFMT 0170000
+
+#define S_ISDIR(mode) (((mode) & S_IFDIR) == S_IFDIR)
+#define S_ISCHR(mode) (((mode) & S_IFCHR) == S_IFCHR)
+#define S_ISBLK(mode) (((mode) & S_IFBLK) == S_IFBLK)
+#define S_ISREG(mode) (((mode) & S_IFREG) == S_IFREG)
+#define S_ISFIFO(mode) (((mode) & S_IFIFO) == S_IFIFO)
+#define S_ISLNK(mode) (((mode) & S_IFLNK) == S_IFLNK)
+#define S_ISSOCK(mode) (((mode) & S_IFSOCK) == S_IFSOCK)
+
+/* dirent types */
+#define DT_UNKNOWN 0x0
+#define DT_FIFO 0x1
+#define DT_CHR 0x2
+#define DT_DIR 0x4
+#define DT_BLK 0x6
+#define DT_REG 0x8
+#define DT_LNK 0xa
+#define DT_SOCK 0xc
+
+/* commonly an fd_set represents 256 FDs */
+#ifndef FD_SETSIZE
+#define FD_SETSIZE 256
+#endif
+
+/* PATH_MAX and MAXPATHLEN are often used and found with plenty of different
+ * values.
+ */
+#ifndef PATH_MAX
+#define PATH_MAX 4096
+#endif
+
+#ifndef MAXPATHLEN
+#define MAXPATHLEN (PATH_MAX)
+#endif
+
+/* Special FD used by all the *at functions */
+#ifndef AT_FDCWD
+#define AT_FDCWD (-100)
+#endif
+
+/* whence values for lseek() */
+#define SEEK_SET 0
+#define SEEK_CUR 1
+#define SEEK_END 2
+
+/* cmd for reboot() */
+#define LINUX_REBOOT_MAGIC1 0xfee1dead
+#define LINUX_REBOOT_MAGIC2 0x28121969
+#define LINUX_REBOOT_CMD_HALT 0xcdef0123
+#define LINUX_REBOOT_CMD_POWER_OFF 0x4321fedc
+#define LINUX_REBOOT_CMD_RESTART 0x01234567
+#define LINUX_REBOOT_CMD_SW_SUSPEND 0xd000fce2
+
+/* Macros used on waitpid()'s return status */
+#define WEXITSTATUS(status) (((status) & 0xff00) >> 8)
+#define WIFEXITED(status) (((status) & 0x7f) == 0)
+
+/* waitpid() flags */
+#define WNOHANG 1
+
+/* standard exit() codes */
+#define EXIT_SUCCESS 0
+#define EXIT_FAILURE 1
+
+/* for select() */
+typedef struct {
+ uint32_t fd32[(FD_SETSIZE + 31) / 32];
+} fd_set;
+
+#define FD_CLR(fd, set) do { \
+ fd_set *__set = (set); \
+ int __fd = (fd); \
+ if (__fd >= 0) \
+ __set->fd32[__fd / 32] &= ~(1U << (__fd & 31)); \
+ } while (0)
+
+#define FD_SET(fd, set) do { \
+ fd_set *__set = (set); \
+ int __fd = (fd); \
+ if (__fd >= 0) \
+ __set->fd32[__fd / 32] |= 1U << (__fd & 31); \
+ } while (0)
+
+#define FD_ISSET(fd, set) ({ \
+ fd_set *__set = (set); \
+ int __fd = (fd); \
+ int __r = 0; \
+ if (__fd >= 0) \
+ __r = !!(__set->fd32[__fd / 32] & 1U << (__fd & 31)); \
+ __r; \
+ })
+
+#define FD_ZERO(set) do { \
+ fd_set *__set = (set); \
+ int __idx; \
+ for (__idx = 0; __idx < (FD_SETSIZE+31) / 32; __idx ++) \
+ __set->fd32[__idx] = 0; \
+ } while (0)
+
+/* for poll() */
+#define POLLIN 0x0001
+#define POLLPRI 0x0002
+#define POLLOUT 0x0004
+#define POLLERR 0x0008
+#define POLLHUP 0x0010
+#define POLLNVAL 0x0020
+
+struct pollfd {
+ int fd;
+ short int events;
+ short int revents;
+};
+
+/* for getdents64() */
+struct linux_dirent64 {
+ uint64_t d_ino;
+ int64_t d_off;
+ unsigned short d_reclen;
+ unsigned char d_type;
+ char d_name[];
+};
+
+/* needed by wait4() */
+struct rusage {
+ struct timeval ru_utime;
+ struct timeval ru_stime;
+ long ru_maxrss;
+ long ru_ixrss;
+ long ru_idrss;
+ long ru_isrss;
+ long ru_minflt;
+ long ru_majflt;
+ long ru_nswap;
+ long ru_inblock;
+ long ru_oublock;
+ long ru_msgsnd;
+ long ru_msgrcv;
+ long ru_nsignals;
+ long ru_nvcsw;
+ long ru_nivcsw;
+};
+
+/* The format of the struct as returned by the libc to the application, which
+ * significantly differs from the format returned by the stat() syscall flavours.
+ */
+struct stat {
+ dev_t st_dev; /* ID of device containing file */
+ ino_t st_ino; /* inode number */
+ mode_t st_mode; /* protection */
+ nlink_t st_nlink; /* number of hard links */
+ uid_t st_uid; /* user ID of owner */
+ gid_t st_gid; /* group ID of owner */
+ dev_t st_rdev; /* device ID (if special file) */
+ off_t st_size; /* total size, in bytes */
+ blksize_t st_blksize; /* blocksize for file system I/O */
+ blkcnt_t st_blocks; /* number of 512B blocks allocated */
+ time_t st_atime; /* time of last access */
+ time_t st_mtime; /* time of last modification */
+ time_t st_ctime; /* time of last status change */
+};
+
+/* WARNING, it only deals with the 4096 first majors and 256 first minors */
+#define makedev(major, minor) ((dev_t)((((major) & 0xfff) << 8) | ((minor) & 0xff)))
+#define major(dev) ((unsigned int)(((dev) >> 8) & 0xfff))
+#define minor(dev) ((unsigned int)(((dev) & 0xff))
+
+#ifndef offsetof
+#define offsetof(TYPE, FIELD) ((size_t) &((TYPE *)0)->FIELD)
+#endif
+
+#ifndef container_of
+#define container_of(PTR, TYPE, FIELD) ({ \
+ __typeof__(((TYPE *)0)->FIELD) *__FIELD_PTR = (PTR); \
+ (TYPE *)((char *) __FIELD_PTR - offsetof(TYPE, FIELD)); \
+})
+#endif
+
+#endif /* _NOLIBC_TYPES_H */
diff --git a/tools/include/nolibc/unistd.h b/tools/include/nolibc/unistd.h
new file mode 100644
index 000000000000..1c25e20ee360
--- /dev/null
+++ b/tools/include/nolibc/unistd.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * unistd function definitions for NOLIBC
+ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_UNISTD_H
+#define _NOLIBC_UNISTD_H
+
+#include "std.h"
+#include "arch.h"
+#include "types.h"
+#include "sys.h"
+
+
+static __attribute__((unused))
+int msleep(unsigned int msecs)
+{
+ struct timeval my_timeval = { msecs / 1000, (msecs % 1000) * 1000 };
+
+ if (sys_select(0, 0, 0, 0, &my_timeval) < 0)
+ return (my_timeval.tv_sec * 1000) +
+ (my_timeval.tv_usec / 1000) +
+ !!(my_timeval.tv_usec % 1000);
+ else
+ return 0;
+}
+
+static __attribute__((unused))
+unsigned int sleep(unsigned int seconds)
+{
+ struct timeval my_timeval = { seconds, 0 };
+
+ if (sys_select(0, 0, 0, 0, &my_timeval) < 0)
+ return my_timeval.tv_sec + !!my_timeval.tv_usec;
+ else
+ return 0;
+}
+
+static __attribute__((unused))
+int usleep(unsigned int usecs)
+{
+ struct timeval my_timeval = { usecs / 1000000, usecs % 1000000 };
+
+ return sys_select(0, 0, 0, 0, &my_timeval);
+}
+
+static __attribute__((unused))
+int tcsetpgrp(int fd, pid_t pid)
+{
+ return ioctl(fd, TIOCSPGRP, &pid);
+}
+
+#endif /* _NOLIBC_UNISTD_H */
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 91a6fe4e02c0..6a184d260c7f 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -445,7 +445,13 @@ struct kvm_run {
#define KVM_SYSTEM_EVENT_RESET 2
#define KVM_SYSTEM_EVENT_CRASH 3
__u32 type;
- __u64 flags;
+ __u32 ndata;
+ union {
+#ifndef __KERNEL__
+ __u64 flags;
+#endif
+ __u64 data[16];
+ };
} system_event;
/* KVM_EXIT_S390_STSI */
struct {
@@ -1144,6 +1150,8 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_S390_MEM_OP_EXTENSION 211
#define KVM_CAP_PMU_CAPABILITY 212
#define KVM_CAP_DISABLE_QUIRKS2 213
+/* #define KVM_CAP_VM_TSC_CONTROL 214 */
+#define KVM_CAP_SYSTEM_EVENT_DATA 215
#ifdef KVM_CAP_IRQ_ROUTING
diff --git a/tools/include/uapi/linux/vhost.h b/tools/include/uapi/linux/vhost.h
index c998860d7bbc..5d99e7c242a2 100644
--- a/tools/include/uapi/linux/vhost.h
+++ b/tools/include/uapi/linux/vhost.h
@@ -150,4 +150,11 @@
/* Get the valid iova range */
#define VHOST_VDPA_GET_IOVA_RANGE _IOR(VHOST_VIRTIO, 0x78, \
struct vhost_vdpa_iova_range)
+
+/* Get the config size */
+#define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32)
+
+/* Get the count of all virtqueues */
+#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32)
+
#endif
diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c
index 1b15ba13c477..a09315538a30 100644
--- a/tools/lib/perf/evlist.c
+++ b/tools/lib/perf/evlist.c
@@ -577,7 +577,6 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist,
{
struct perf_evsel *evsel;
const struct perf_cpu_map *cpus = evlist->user_requested_cpus;
- const struct perf_thread_map *threads = evlist->threads;
if (!ops || !ops->get || !ops->mmap)
return -EINVAL;
@@ -589,7 +588,7 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist,
perf_evlist__for_each_entry(evlist, evsel) {
if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
evsel->sample_id == NULL &&
- perf_evsel__alloc_id(evsel, perf_cpu_map__nr(cpus), threads->nr) < 0)
+ perf_evsel__alloc_id(evsel, evsel->fd->max_x, evsel->fd->max_y) < 0)
return -ENOMEM;
}
diff --git a/tools/lib/subcmd/parse-options.c b/tools/lib/subcmd/parse-options.c
index 39ebf6192016..9fa75943f2ed 100644
--- a/tools/lib/subcmd/parse-options.c
+++ b/tools/lib/subcmd/parse-options.c
@@ -806,9 +806,9 @@ static int option__cmp(const void *va, const void *vb)
static struct option *options__order(const struct option *opts)
{
- int nr_opts = 0, len;
+ int nr_opts = 0, nr_group = 0, len;
const struct option *o = opts;
- struct option *ordered;
+ struct option *opt, *ordered, *group;
for (o = opts; o->type != OPTION_END; o++)
++nr_opts;
@@ -819,7 +819,18 @@ static struct option *options__order(const struct option *opts)
goto out;
memcpy(ordered, opts, len);
- qsort(ordered, nr_opts, sizeof(*o), option__cmp);
+ /* sort each option group individually */
+ for (opt = group = ordered; opt->type != OPTION_END; opt++) {
+ if (opt->type == OPTION_GROUP) {
+ qsort(group, nr_group, sizeof(*opt), option__cmp);
+ group = opt + 1;
+ nr_group = 0;
+ continue;
+ }
+ nr_group++;
+ }
+ qsort(group, nr_group, sizeof(*opt), option__cmp);
+
out:
return ordered;
}
diff --git a/tools/lib/thermal/.gitignore b/tools/lib/thermal/.gitignore
new file mode 100644
index 000000000000..5d2aeda80fea
--- /dev/null
+++ b/tools/lib/thermal/.gitignore
@@ -0,0 +1,2 @@
+libthermal.so*
+libthermal.pc
diff --git a/tools/lib/thermal/Build b/tools/lib/thermal/Build
new file mode 100644
index 000000000000..4a892d9e24f9
--- /dev/null
+++ b/tools/lib/thermal/Build
@@ -0,0 +1,5 @@
+libthermal-y += commands.o
+libthermal-y += events.o
+libthermal-y += thermal_nl.o
+libthermal-y += sampling.o
+libthermal-y += thermal.o
diff --git a/tools/lib/thermal/Makefile b/tools/lib/thermal/Makefile
new file mode 100644
index 000000000000..2d0d255fd0e1
--- /dev/null
+++ b/tools/lib/thermal/Makefile
@@ -0,0 +1,165 @@
+# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+# Most of this file is copied from tools/lib/perf/Makefile
+
+LIBTHERMAL_VERSION = 0
+LIBTHERMAL_PATCHLEVEL = 0
+LIBTHERMAL_EXTRAVERSION = 1
+
+MAKEFLAGS += --no-print-directory
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+# $(info Determined 'srctree' to be $(srctree))
+endif
+
+INSTALL = install
+
+# Use DESTDIR for installing into a different root directory.
+# This is useful for building a package. The program will be
+# installed in this directory as if it was the root directory.
+# Then the build tool can move it later.
+DESTDIR ?=
+DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
+
+include $(srctree)/tools/scripts/Makefile.include
+include $(srctree)/tools/scripts/Makefile.arch
+
+ifeq ($(LP64), 1)
+ libdir_relative = lib64
+else
+ libdir_relative = lib
+endif
+
+prefix ?=
+libdir = $(prefix)/$(libdir_relative)
+
+# Shell quotes
+libdir_SQ = $(subst ','\'',$(libdir))
+libdir_relative_SQ = $(subst ','\'',$(libdir_relative))
+
+ifeq ("$(origin V)", "command line")
+ VERBOSE = $(V)
+endif
+ifndef VERBOSE
+ VERBOSE = 0
+endif
+
+ifeq ($(VERBOSE),1)
+ Q =
+else
+ Q = @
+endif
+
+# Set compile option CFLAGS
+ifdef EXTRA_CFLAGS
+ CFLAGS := $(EXTRA_CFLAGS)
+else
+ CFLAGS := -g -Wall
+endif
+
+INCLUDES = \
+-I/usr/include/libnl3 \
+-I$(srctree)/tools/lib/thermal/include \
+-I$(srctree)/tools/lib/ \
+-I$(srctree)/tools/include \
+-I$(srctree)/tools/arch/$(SRCARCH)/include/ \
+-I$(srctree)/tools/arch/$(SRCARCH)/include/uapi \
+-I$(srctree)/tools/include/uapi
+
+# Append required CFLAGS
+override CFLAGS += $(EXTRA_WARNINGS)
+override CFLAGS += -Werror -Wall
+override CFLAGS += -fPIC
+override CFLAGS += $(INCLUDES)
+override CFLAGS += -fvisibility=hidden
+override CFGLAS += -Wl,-L.
+override CFGLAS += -Wl,-lthermal
+
+all:
+
+export srctree OUTPUT CC LD CFLAGS V
+export DESTDIR DESTDIR_SQ
+
+include $(srctree)/tools/build/Makefile.include
+
+VERSION_SCRIPT := libthermal.map
+
+PATCHLEVEL = $(LIBTHERMAL_PATCHLEVEL)
+EXTRAVERSION = $(LIBTHERMAL_EXTRAVERSION)
+VERSION = $(LIBTHERMAL_VERSION).$(LIBTHERMAL_PATCHLEVEL).$(LIBTHERMAL_EXTRAVERSION)
+
+LIBTHERMAL_SO := $(OUTPUT)libthermal.so.$(VERSION)
+LIBTHERMAL_A := $(OUTPUT)libthermal.a
+LIBTHERMAL_IN := $(OUTPUT)libthermal-in.o
+LIBTHERMAL_PC := $(OUTPUT)libthermal.pc
+LIBTHERMAL_ALL := $(LIBTHERMAL_A) $(OUTPUT)libthermal.so*
+
+THERMAL_UAPI := include/uapi/linux/thermal.h
+
+$(THERMAL_UAPI): FORCE
+ ln -sf $(srctree)/$@ $(srctree)/tools/$@
+
+$(LIBTHERMAL_IN): FORCE
+ $(Q)$(MAKE) $(build)=libthermal
+
+$(LIBTHERMAL_A): $(LIBTHERMAL_IN)
+ $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBTHERMAL_IN)
+
+$(LIBTHERMAL_SO): $(LIBTHERMAL_IN)
+ $(QUIET_LINK)$(CC) --shared -Wl,-soname,libthermal.so \
+ -Wl,--version-script=$(VERSION_SCRIPT) $^ -o $@
+ @ln -sf $(@F) $(OUTPUT)libthermal.so
+ @ln -sf $(@F) $(OUTPUT)libthermal.so.$(LIBTHERMAL_VERSION)
+
+
+libs: $(THERMAL_UAPI) $(LIBTHERMAL_A) $(LIBTHERMAL_SO) $(LIBTHERMAL_PC)
+
+all: fixdep
+ $(Q)$(MAKE) libs
+
+clean:
+ $(call QUIET_CLEAN, libthermal) $(RM) $(LIBTHERMAL_A) \
+ *.o *~ *.a *.so *.so.$(VERSION) *.so.$(LIBTHERMAL_VERSION) .*.d .*.cmd LIBTHERMAL-CFLAGS $(LIBTHERMAL_PC)
+
+$(LIBTHERMAL_PC):
+ $(QUIET_GEN)sed -e "s|@PREFIX@|$(prefix)|" \
+ -e "s|@LIBDIR@|$(libdir_SQ)|" \
+ -e "s|@VERSION@|$(VERSION)|" \
+ < libthermal.pc.template > $@
+
+define do_install_mkdir
+ if [ ! -d '$(DESTDIR_SQ)$1' ]; then \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \
+ fi
+endef
+
+define do_install
+ if [ ! -d '$(DESTDIR_SQ)$2' ]; then \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \
+ fi; \
+ $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2'
+endef
+
+install_lib: libs
+ $(call QUIET_INSTALL, $(LIBTHERMAL_ALL)) \
+ $(call do_install_mkdir,$(libdir_SQ)); \
+ cp -fpR $(LIBTHERMAL_ALL) $(DESTDIR)$(libdir_SQ)
+
+install_headers:
+ $(call QUIET_INSTALL, headers) \
+ $(call do_install,include/thermal.h,$(prefix)/include/thermal,644); \
+
+install_pkgconfig: $(LIBTHERMAL_PC)
+ $(call QUIET_INSTALL, $(LIBTHERMAL_PC)) \
+ $(call do_install,$(LIBTHERMAL_PC),$(libdir_SQ)/pkgconfig,644)
+
+install_doc:
+ $(Q)$(MAKE) -C Documentation install-man install-html install-examples
+
+install: install_lib install_headers install_pkgconfig
+
+FORCE:
+
+.PHONY: all install clean FORCE
diff --git a/tools/lib/thermal/commands.c b/tools/lib/thermal/commands.c
new file mode 100644
index 000000000000..73d4d4e8d6ec
--- /dev/null
+++ b/tools/lib/thermal/commands.c
@@ -0,0 +1,349 @@
+// SPDX-License-Identifier: LGPL-2.1+
+// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org>
+#define _GNU_SOURCE
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <thermal.h>
+#include "thermal_nl.h"
+
+static struct nla_policy thermal_genl_policy[THERMAL_GENL_ATTR_MAX + 1] = {
+ /* Thermal zone */
+ [THERMAL_GENL_ATTR_TZ] = { .type = NLA_NESTED },
+ [THERMAL_GENL_ATTR_TZ_ID] = { .type = NLA_U32 },
+ [THERMAL_GENL_ATTR_TZ_TEMP] = { .type = NLA_U32 },
+ [THERMAL_GENL_ATTR_TZ_TRIP] = { .type = NLA_NESTED },
+ [THERMAL_GENL_ATTR_TZ_TRIP_ID] = { .type = NLA_U32 },
+ [THERMAL_GENL_ATTR_TZ_TRIP_TEMP] = { .type = NLA_U32 },
+ [THERMAL_GENL_ATTR_TZ_TRIP_TYPE] = { .type = NLA_U32 },
+ [THERMAL_GENL_ATTR_TZ_TRIP_HYST] = { .type = NLA_U32 },
+ [THERMAL_GENL_ATTR_TZ_MODE] = { .type = NLA_U32 },
+ [THERMAL_GENL_ATTR_TZ_CDEV_WEIGHT] = { .type = NLA_U32 },
+ [THERMAL_GENL_ATTR_TZ_NAME] = { .type = NLA_STRING },
+
+ /* Governor(s) */
+ [THERMAL_GENL_ATTR_TZ_GOV] = { .type = NLA_NESTED },
+ [THERMAL_GENL_ATTR_TZ_GOV_NAME] = { .type = NLA_STRING },
+
+ /* Cooling devices */
+ [THERMAL_GENL_ATTR_CDEV] = { .type = NLA_NESTED },
+ [THERMAL_GENL_ATTR_CDEV_ID] = { .type = NLA_U32 },
+ [THERMAL_GENL_ATTR_CDEV_CUR_STATE] = { .type = NLA_U32 },
+ [THERMAL_GENL_ATTR_CDEV_MAX_STATE] = { .type = NLA_U32 },
+ [THERMAL_GENL_ATTR_CDEV_NAME] = { .type = NLA_STRING },
+};
+
+static int parse_tz_get(struct genl_info *info, struct thermal_zone **tz)
+{
+ struct nlattr *attr;
+ struct thermal_zone *__tz = NULL;
+ size_t size = 0;
+ int rem;
+
+ nla_for_each_nested(attr, info->attrs[THERMAL_GENL_ATTR_TZ], rem) {
+
+ if (nla_type(attr) == THERMAL_GENL_ATTR_TZ_ID) {
+
+ size++;
+
+ __tz = realloc(__tz, sizeof(*__tz) * (size + 2));
+ if (!__tz)
+ return THERMAL_ERROR;
+
+ __tz[size - 1].id = nla_get_u32(attr);
+ }
+
+
+ if (nla_type(attr) == THERMAL_GENL_ATTR_TZ_NAME)
+ nla_strlcpy(__tz[size - 1].name, attr,
+ THERMAL_NAME_LENGTH);
+ }
+
+ if (__tz)
+ __tz[size].id = -1;
+
+ *tz = __tz;
+
+ return THERMAL_SUCCESS;
+}
+
+static int parse_cdev_get(struct genl_info *info, struct thermal_cdev **cdev)
+{
+ struct nlattr *attr;
+ struct thermal_cdev *__cdev = NULL;
+ size_t size = 0;
+ int rem;
+
+ nla_for_each_nested(attr, info->attrs[THERMAL_GENL_ATTR_CDEV], rem) {
+
+ if (nla_type(attr) == THERMAL_GENL_ATTR_CDEV_ID) {
+
+ size++;
+
+ __cdev = realloc(__cdev, sizeof(*__cdev) * (size + 2));
+ if (!__cdev)
+ return THERMAL_ERROR;
+
+ __cdev[size - 1].id = nla_get_u32(attr);
+ }
+
+ if (nla_type(attr) == THERMAL_GENL_ATTR_CDEV_NAME) {
+ nla_strlcpy(__cdev[size - 1].name, attr,
+ THERMAL_NAME_LENGTH);
+ }
+
+ if (nla_type(attr) == THERMAL_GENL_ATTR_CDEV_CUR_STATE)
+ __cdev[size - 1].cur_state = nla_get_u32(attr);
+
+ if (nla_type(attr) == THERMAL_GENL_ATTR_CDEV_MAX_STATE)
+ __cdev[size - 1].max_state = nla_get_u32(attr);
+ }
+
+ if (__cdev)
+ __cdev[size].id = -1;
+
+ *cdev = __cdev;
+
+ return THERMAL_SUCCESS;
+}
+
+static int parse_tz_get_trip(struct genl_info *info, struct thermal_zone *tz)
+{
+ struct nlattr *attr;
+ struct thermal_trip *__tt = NULL;
+ size_t size = 0;
+ int rem;
+
+ nla_for_each_nested(attr, info->attrs[THERMAL_GENL_ATTR_TZ_TRIP], rem) {
+
+ if (nla_type(attr) == THERMAL_GENL_ATTR_TZ_TRIP_ID) {
+
+ size++;
+
+ __tt = realloc(__tt, sizeof(*__tt) * (size + 2));
+ if (!__tt)
+ return THERMAL_ERROR;
+
+ __tt[size - 1].id = nla_get_u32(attr);
+ }
+
+ if (nla_type(attr) == THERMAL_GENL_ATTR_TZ_TRIP_TYPE)
+ __tt[size - 1].type = nla_get_u32(attr);
+
+ if (nla_type(attr) == THERMAL_GENL_ATTR_TZ_TRIP_TEMP)
+ __tt[size - 1].temp = nla_get_u32(attr);
+
+ if (nla_type(attr) == THERMAL_GENL_ATTR_TZ_TRIP_HYST)
+ __tt[size - 1].hyst = nla_get_u32(attr);
+ }
+
+ if (__tt)
+ __tt[size].id = -1;
+
+ tz->trip = __tt;
+
+ return THERMAL_SUCCESS;
+}
+
+static int parse_tz_get_temp(struct genl_info *info, struct thermal_zone *tz)
+{
+ int id = -1;
+
+ if (info->attrs[THERMAL_GENL_ATTR_TZ_ID])
+ id = nla_get_u32(info->attrs[THERMAL_GENL_ATTR_TZ_ID]);
+
+ if (tz->id != id)
+ return THERMAL_ERROR;
+
+ if (info->attrs[THERMAL_GENL_ATTR_TZ_TEMP])
+ tz->temp = nla_get_u32(info->attrs[THERMAL_GENL_ATTR_TZ_TEMP]);
+
+ return THERMAL_SUCCESS;
+}
+
+static int parse_tz_get_gov(struct genl_info *info, struct thermal_zone *tz)
+{
+ int id = -1;
+
+ if (info->attrs[THERMAL_GENL_ATTR_TZ_ID])
+ id = nla_get_u32(info->attrs[THERMAL_GENL_ATTR_TZ_ID]);
+
+ if (tz->id != id)
+ return THERMAL_ERROR;
+
+ if (info->attrs[THERMAL_GENL_ATTR_TZ_GOV_NAME]) {
+ nla_strlcpy(tz->governor,
+ info->attrs[THERMAL_GENL_ATTR_TZ_GOV_NAME],
+ THERMAL_NAME_LENGTH);
+ }
+
+ return THERMAL_SUCCESS;
+}
+
+static int handle_netlink(struct nl_cache_ops *unused,
+ struct genl_cmd *cmd,
+ struct genl_info *info, void *arg)
+{
+ int ret;
+
+ switch (cmd->c_id) {
+
+ case THERMAL_GENL_CMD_TZ_GET_ID:
+ ret = parse_tz_get(info, arg);
+ break;
+
+ case THERMAL_GENL_CMD_CDEV_GET:
+ ret = parse_cdev_get(info, arg);
+ break;
+
+ case THERMAL_GENL_CMD_TZ_GET_TEMP:
+ ret = parse_tz_get_temp(info, arg);
+ break;
+
+ case THERMAL_GENL_CMD_TZ_GET_TRIP:
+ ret = parse_tz_get_trip(info, arg);
+ break;
+
+ case THERMAL_GENL_CMD_TZ_GET_GOV:
+ ret = parse_tz_get_gov(info, arg);
+ break;
+
+ default:
+ return THERMAL_ERROR;
+ }
+
+ return ret;
+}
+
+static struct genl_cmd thermal_cmds[] = {
+ {
+ .c_id = THERMAL_GENL_CMD_TZ_GET_ID,
+ .c_name = (char *)"List thermal zones",
+ .c_msg_parser = handle_netlink,
+ .c_maxattr = THERMAL_GENL_ATTR_MAX,
+ .c_attr_policy = thermal_genl_policy,
+ },
+ {
+ .c_id = THERMAL_GENL_CMD_TZ_GET_GOV,
+ .c_name = (char *)"Get governor",
+ .c_msg_parser = handle_netlink,
+ .c_maxattr = THERMAL_GENL_ATTR_MAX,
+ .c_attr_policy = thermal_genl_policy,
+ },
+ {
+ .c_id = THERMAL_GENL_CMD_TZ_GET_TEMP,
+ .c_name = (char *)"Get thermal zone temperature",
+ .c_msg_parser = handle_netlink,
+ .c_maxattr = THERMAL_GENL_ATTR_MAX,
+ .c_attr_policy = thermal_genl_policy,
+ },
+ {
+ .c_id = THERMAL_GENL_CMD_TZ_GET_TRIP,
+ .c_name = (char *)"Get thermal zone trip points",
+ .c_msg_parser = handle_netlink,
+ .c_maxattr = THERMAL_GENL_ATTR_MAX,
+ .c_attr_policy = thermal_genl_policy,
+ },
+ {
+ .c_id = THERMAL_GENL_CMD_CDEV_GET,
+ .c_name = (char *)"Get cooling devices",
+ .c_msg_parser = handle_netlink,
+ .c_maxattr = THERMAL_GENL_ATTR_MAX,
+ .c_attr_policy = thermal_genl_policy,
+ },
+};
+
+static struct genl_ops thermal_cmd_ops = {
+ .o_name = (char *)"thermal",
+ .o_cmds = thermal_cmds,
+ .o_ncmds = ARRAY_SIZE(thermal_cmds),
+};
+
+static thermal_error_t thermal_genl_auto(struct thermal_handler *th, int id, int cmd,
+ int flags, void *arg)
+{
+ struct nl_msg *msg;
+ void *hdr;
+
+ msg = nlmsg_alloc();
+ if (!msg)
+ return THERMAL_ERROR;
+
+ hdr = genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, thermal_cmd_ops.o_id,
+ 0, flags, cmd, THERMAL_GENL_VERSION);
+ if (!hdr)
+ return THERMAL_ERROR;
+
+ if (id >= 0 && nla_put_u32(msg, THERMAL_GENL_ATTR_TZ_ID, id))
+ return THERMAL_ERROR;
+
+ if (nl_send_msg(th->sk_cmd, th->cb_cmd, msg, genl_handle_msg, arg))
+ return THERMAL_ERROR;
+
+ nlmsg_free(msg);
+
+ return THERMAL_SUCCESS;
+}
+
+thermal_error_t thermal_cmd_get_tz(struct thermal_handler *th, struct thermal_zone **tz)
+{
+ return thermal_genl_auto(th, -1, THERMAL_GENL_CMD_TZ_GET_ID,
+ NLM_F_DUMP | NLM_F_ACK, tz);
+}
+
+thermal_error_t thermal_cmd_get_cdev(struct thermal_handler *th, struct thermal_cdev **tc)
+{
+ return thermal_genl_auto(th, -1, THERMAL_GENL_CMD_CDEV_GET,
+ NLM_F_DUMP | NLM_F_ACK, tc);
+}
+
+thermal_error_t thermal_cmd_get_trip(struct thermal_handler *th, struct thermal_zone *tz)
+{
+ return thermal_genl_auto(th, tz->id, THERMAL_GENL_CMD_TZ_GET_TRIP,
+ 0, tz);
+}
+
+thermal_error_t thermal_cmd_get_governor(struct thermal_handler *th, struct thermal_zone *tz)
+{
+ return thermal_genl_auto(th, tz->id, THERMAL_GENL_CMD_TZ_GET_GOV, 0, tz);
+}
+
+thermal_error_t thermal_cmd_get_temp(struct thermal_handler *th, struct thermal_zone *tz)
+{
+ return thermal_genl_auto(th, tz->id, THERMAL_GENL_CMD_TZ_GET_TEMP, 0, tz);
+}
+
+thermal_error_t thermal_cmd_exit(struct thermal_handler *th)
+{
+ if (genl_unregister_family(&thermal_cmd_ops))
+ return THERMAL_ERROR;
+
+ nl_thermal_disconnect(th->sk_cmd, th->cb_cmd);
+
+ return THERMAL_SUCCESS;
+}
+
+thermal_error_t thermal_cmd_init(struct thermal_handler *th)
+{
+ int ret;
+ int family;
+
+ if (nl_thermal_connect(&th->sk_cmd, &th->cb_cmd))
+ return THERMAL_ERROR;
+
+ ret = genl_register_family(&thermal_cmd_ops);
+ if (ret)
+ return THERMAL_ERROR;
+
+ ret = genl_ops_resolve(th->sk_cmd, &thermal_cmd_ops);
+ if (ret)
+ return THERMAL_ERROR;
+
+ family = genl_ctrl_resolve(th->sk_cmd, "nlctrl");
+ if (family != GENL_ID_CTRL)
+ return THERMAL_ERROR;
+
+ return THERMAL_SUCCESS;
+}
diff --git a/tools/lib/thermal/events.c b/tools/lib/thermal/events.c
new file mode 100644
index 000000000000..a7a55d1a0c4c
--- /dev/null
+++ b/tools/lib/thermal/events.c
@@ -0,0 +1,164 @@
+// SPDX-License-Identifier: LGPL-2.1+
+// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org>
+#include <linux/netlink.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+
+#include <thermal.h>
+#include "thermal_nl.h"
+
+/*
+ * Optimization: fill this array to tell which event we do want to pay
+ * attention to. That happens at init time with the ops
+ * structure. Each ops will enable the event and the general handler
+ * will be able to discard the event if there is not ops associated
+ * with it.
+ */
+static int enabled_ops[__THERMAL_GENL_EVENT_MAX];
+
+static int handle_thermal_event(struct nl_msg *n, void *arg)
+{
+ struct nlmsghdr *nlh = nlmsg_hdr(n);
+ struct genlmsghdr *genlhdr = genlmsg_hdr(nlh);
+ struct nlattr *attrs[THERMAL_GENL_ATTR_MAX + 1];
+ struct thermal_handler_param *thp = arg;
+ struct thermal_events_ops *ops = &thp->th->ops->events;
+
+ genlmsg_parse(nlh, 0, attrs, THERMAL_GENL_ATTR_MAX, NULL);
+
+ arg = thp->arg;
+
+ /*
+ * This is an event we don't care of, bail out.
+ */
+ if (!enabled_ops[genlhdr->cmd])
+ return THERMAL_SUCCESS;
+
+ switch (genlhdr->cmd) {
+
+ case THERMAL_GENL_EVENT_TZ_CREATE:
+ return ops->tz_create(nla_get_string(attrs[THERMAL_GENL_ATTR_TZ_NAME]),
+ nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), arg);
+
+ case THERMAL_GENL_EVENT_TZ_DELETE:
+ return ops->tz_delete(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), arg);
+
+ case THERMAL_GENL_EVENT_TZ_ENABLE:
+ return ops->tz_enable(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), arg);
+
+ case THERMAL_GENL_EVENT_TZ_DISABLE:
+ return ops->tz_disable(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), arg);
+
+ case THERMAL_GENL_EVENT_TZ_TRIP_CHANGE:
+ return ops->trip_change(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]),
+ nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_ID]),
+ nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_TYPE]),
+ nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_TEMP]),
+ nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_HYST]), arg);
+
+ case THERMAL_GENL_EVENT_TZ_TRIP_ADD:
+ return ops->trip_add(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]),
+ nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_ID]),
+ nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_TYPE]),
+ nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_TEMP]),
+ nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_HYST]), arg);
+
+ case THERMAL_GENL_EVENT_TZ_TRIP_DELETE:
+ return ops->trip_delete(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]),
+ nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_ID]), arg);
+
+ case THERMAL_GENL_EVENT_TZ_TRIP_UP:
+ return ops->trip_high(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]),
+ nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_ID]),
+ nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TEMP]), arg);
+
+ case THERMAL_GENL_EVENT_TZ_TRIP_DOWN:
+ return ops->trip_low(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]),
+ nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_ID]),
+ nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TEMP]), arg);
+
+ case THERMAL_GENL_EVENT_CDEV_ADD:
+ return ops->cdev_add(nla_get_string(attrs[THERMAL_GENL_ATTR_CDEV_NAME]),
+ nla_get_u32(attrs[THERMAL_GENL_ATTR_CDEV_ID]),
+ nla_get_u32(attrs[THERMAL_GENL_ATTR_CDEV_MAX_STATE]), arg);
+
+ case THERMAL_GENL_EVENT_CDEV_DELETE:
+ return ops->cdev_delete(nla_get_u32(attrs[THERMAL_GENL_ATTR_CDEV_ID]), arg);
+
+ case THERMAL_GENL_EVENT_CDEV_STATE_UPDATE:
+ return ops->cdev_update(nla_get_u32(attrs[THERMAL_GENL_ATTR_CDEV_ID]),
+ nla_get_u32(attrs[THERMAL_GENL_ATTR_CDEV_CUR_STATE]), arg);
+
+ case THERMAL_GENL_EVENT_TZ_GOV_CHANGE:
+ return ops->gov_change(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]),
+ nla_get_string(attrs[THERMAL_GENL_ATTR_GOV_NAME]), arg);
+ default:
+ return -1;
+ }
+}
+
+static void thermal_events_ops_init(struct thermal_events_ops *ops)
+{
+ enabled_ops[THERMAL_GENL_EVENT_TZ_CREATE] = !!ops->tz_create;
+ enabled_ops[THERMAL_GENL_EVENT_TZ_DELETE] = !!ops->tz_delete;
+ enabled_ops[THERMAL_GENL_EVENT_TZ_DISABLE] = !!ops->tz_disable;
+ enabled_ops[THERMAL_GENL_EVENT_TZ_ENABLE] = !!ops->tz_enable;
+ enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_UP] = !!ops->trip_high;
+ enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_DOWN] = !!ops->trip_low;
+ enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_CHANGE] = !!ops->trip_change;
+ enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_ADD] = !!ops->trip_add;
+ enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_DELETE] = !!ops->trip_delete;
+ enabled_ops[THERMAL_GENL_EVENT_CDEV_ADD] = !!ops->cdev_add;
+ enabled_ops[THERMAL_GENL_EVENT_CDEV_DELETE] = !!ops->cdev_delete;
+ enabled_ops[THERMAL_GENL_EVENT_CDEV_STATE_UPDATE] = !!ops->cdev_update;
+ enabled_ops[THERMAL_GENL_EVENT_TZ_GOV_CHANGE] = !!ops->gov_change;
+}
+
+thermal_error_t thermal_events_handle(struct thermal_handler *th, void *arg)
+{
+ struct thermal_handler_param thp = { .th = th, .arg = arg };
+
+ if (!th)
+ return THERMAL_ERROR;
+
+ if (nl_cb_set(th->cb_event, NL_CB_VALID, NL_CB_CUSTOM,
+ handle_thermal_event, &thp))
+ return THERMAL_ERROR;
+
+ return nl_recvmsgs(th->sk_event, th->cb_event);
+}
+
+int thermal_events_fd(struct thermal_handler *th)
+{
+ if (!th)
+ return -1;
+
+ return nl_socket_get_fd(th->sk_event);
+}
+
+thermal_error_t thermal_events_exit(struct thermal_handler *th)
+{
+ if (nl_unsubscribe_thermal(th->sk_event, th->cb_event,
+ THERMAL_GENL_EVENT_GROUP_NAME))
+ return THERMAL_ERROR;
+
+ nl_thermal_disconnect(th->sk_event, th->cb_event);
+
+ return THERMAL_SUCCESS;
+}
+
+thermal_error_t thermal_events_init(struct thermal_handler *th)
+{
+ thermal_events_ops_init(&th->ops->events);
+
+ if (nl_thermal_connect(&th->sk_event, &th->cb_event))
+ return THERMAL_ERROR;
+
+ if (nl_subscribe_thermal(th->sk_event, th->cb_event,
+ THERMAL_GENL_EVENT_GROUP_NAME))
+ return THERMAL_ERROR;
+
+ return THERMAL_SUCCESS;
+}
diff --git a/tools/lib/thermal/include/thermal.h b/tools/lib/thermal/include/thermal.h
new file mode 100644
index 000000000000..1abc560602cf
--- /dev/null
+++ b/tools/lib/thermal/include/thermal.h
@@ -0,0 +1,142 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/* Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> */
+#ifndef __LIBTHERMAL_H
+#define __LIBTHERMAL_H
+
+#include <linux/thermal.h>
+
+#ifndef LIBTHERMAL_API
+#define LIBTHERMAL_API __attribute__((visibility("default")))
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct thermal_sampling_ops {
+ int (*tz_temp)(int tz_id, int temp, void *arg);
+};
+
+struct thermal_events_ops {
+ int (*tz_create)(const char *name, int tz_id, void *arg);
+ int (*tz_delete)(int tz_id, void *arg);
+ int (*tz_enable)(int tz_id, void *arg);
+ int (*tz_disable)(int tz_id, void *arg);
+ int (*trip_high)(int tz_id, int trip_id, int temp, void *arg);
+ int (*trip_low)(int tz_id, int trip_id, int temp, void *arg);
+ int (*trip_add)(int tz_id, int trip_id, int type, int temp, int hyst, void *arg);
+ int (*trip_change)(int tz_id, int trip_id, int type, int temp, int hyst, void *arg);
+ int (*trip_delete)(int tz_id, int trip_id, void *arg);
+ int (*cdev_add)(const char *name, int cdev_id, int max_state, void *arg);
+ int (*cdev_delete)(int cdev_id, void *arg);
+ int (*cdev_update)(int cdev_id, int cur_state, void *arg);
+ int (*gov_change)(int tz_id, const char *gov_name, void *arg);
+};
+
+struct thermal_ops {
+ struct thermal_sampling_ops sampling;
+ struct thermal_events_ops events;
+};
+
+struct thermal_trip {
+ int id;
+ int type;
+ int temp;
+ int hyst;
+};
+
+struct thermal_zone {
+ int id;
+ int temp;
+ char name[THERMAL_NAME_LENGTH];
+ char governor[THERMAL_NAME_LENGTH];
+ struct thermal_trip *trip;
+};
+
+struct thermal_cdev {
+ int id;
+ char name[THERMAL_NAME_LENGTH];
+ int max_state;
+ int min_state;
+ int cur_state;
+};
+
+typedef enum {
+ THERMAL_ERROR = -1,
+ THERMAL_SUCCESS = 0,
+} thermal_error_t;
+
+struct thermal_handler;
+
+typedef int (*cb_tz_t)(struct thermal_zone *, void *);
+
+typedef int (*cb_tt_t)(struct thermal_trip *, void *);
+
+typedef int (*cb_tc_t)(struct thermal_cdev *, void *);
+
+LIBTHERMAL_API int for_each_thermal_zone(struct thermal_zone *tz, cb_tz_t cb, void *arg);
+
+LIBTHERMAL_API int for_each_thermal_trip(struct thermal_trip *tt, cb_tt_t cb, void *arg);
+
+LIBTHERMAL_API int for_each_thermal_cdev(struct thermal_cdev *cdev, cb_tc_t cb, void *arg);
+
+LIBTHERMAL_API struct thermal_zone *thermal_zone_find_by_name(struct thermal_zone *tz,
+ const char *name);
+
+LIBTHERMAL_API struct thermal_zone *thermal_zone_find_by_id(struct thermal_zone *tz, int id);
+
+LIBTHERMAL_API struct thermal_zone *thermal_zone_discover(struct thermal_handler *th);
+
+LIBTHERMAL_API struct thermal_handler *thermal_init(struct thermal_ops *ops);
+
+LIBTHERMAL_API void thermal_exit(struct thermal_handler *th);
+
+/*
+ * Netlink thermal events
+ */
+LIBTHERMAL_API thermal_error_t thermal_events_exit(struct thermal_handler *th);
+
+LIBTHERMAL_API thermal_error_t thermal_events_init(struct thermal_handler *th);
+
+LIBTHERMAL_API thermal_error_t thermal_events_handle(struct thermal_handler *th, void *arg);
+
+LIBTHERMAL_API int thermal_events_fd(struct thermal_handler *th);
+
+/*
+ * Netlink thermal commands
+ */
+LIBTHERMAL_API thermal_error_t thermal_cmd_exit(struct thermal_handler *th);
+
+LIBTHERMAL_API thermal_error_t thermal_cmd_init(struct thermal_handler *th);
+
+LIBTHERMAL_API thermal_error_t thermal_cmd_get_tz(struct thermal_handler *th,
+ struct thermal_zone **tz);
+
+LIBTHERMAL_API thermal_error_t thermal_cmd_get_cdev(struct thermal_handler *th,
+ struct thermal_cdev **tc);
+
+LIBTHERMAL_API thermal_error_t thermal_cmd_get_trip(struct thermal_handler *th,
+ struct thermal_zone *tz);
+
+LIBTHERMAL_API thermal_error_t thermal_cmd_get_governor(struct thermal_handler *th,
+ struct thermal_zone *tz);
+
+LIBTHERMAL_API thermal_error_t thermal_cmd_get_temp(struct thermal_handler *th,
+ struct thermal_zone *tz);
+
+/*
+ * Netlink thermal samples
+ */
+LIBTHERMAL_API thermal_error_t thermal_sampling_exit(struct thermal_handler *th);
+
+LIBTHERMAL_API thermal_error_t thermal_sampling_init(struct thermal_handler *th);
+
+LIBTHERMAL_API thermal_error_t thermal_sampling_handle(struct thermal_handler *th, void *arg);
+
+LIBTHERMAL_API int thermal_sampling_fd(struct thermal_handler *th);
+
+#endif /* __LIBTHERMAL_H */
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/tools/lib/thermal/libthermal.map b/tools/lib/thermal/libthermal.map
new file mode 100644
index 000000000000..d5e77738c7a4
--- /dev/null
+++ b/tools/lib/thermal/libthermal.map
@@ -0,0 +1,25 @@
+LIBTHERMAL_0.0.1 {
+ global:
+ thermal_init;
+ for_each_thermal_zone;
+ for_each_thermal_trip;
+ for_each_thermal_cdev;
+ thermal_zone_find_by_name;
+ thermal_zone_find_by_id;
+ thermal_zone_discover;
+ thermal_init;
+ thermal_events_init;
+ thermal_events_handle;
+ thermal_events_fd;
+ thermal_cmd_init;
+ thermal_cmd_get_tz;
+ thermal_cmd_get_cdev;
+ thermal_cmd_get_trip;
+ thermal_cmd_get_governor;
+ thermal_cmd_get_temp;
+ thermal_sampling_init;
+ thermal_sampling_handle;
+ thermal_sampling_fd;
+local:
+ *;
+};
diff --git a/tools/lib/thermal/libthermal.pc.template b/tools/lib/thermal/libthermal.pc.template
new file mode 100644
index 000000000000..6f3769731b59
--- /dev/null
+++ b/tools/lib/thermal/libthermal.pc.template
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+prefix=@PREFIX@
+libdir=@LIBDIR@
+includedir=${prefix}/include
+
+Name: libthermal
+Description: thermal library
+Requires: libnl-3.0 libnl-genl-3.0
+Version: @VERSION@
+Libs: -L${libdir} -lnl-genl-3 -lnl-3
+Cflags: -I${includedir} -I{include}/libnl3
diff --git a/tools/lib/thermal/sampling.c b/tools/lib/thermal/sampling.c
new file mode 100644
index 000000000000..ee818f4e9654
--- /dev/null
+++ b/tools/lib/thermal/sampling.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: LGPL-2.1+
+// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <thermal.h>
+#include "thermal_nl.h"
+
+static int handle_thermal_sample(struct nl_msg *n, void *arg)
+{
+ struct nlmsghdr *nlh = nlmsg_hdr(n);
+ struct genlmsghdr *genlhdr = genlmsg_hdr(nlh);
+ struct nlattr *attrs[THERMAL_GENL_ATTR_MAX + 1];
+ struct thermal_handler_param *thp = arg;
+ struct thermal_handler *th = thp->th;
+
+ genlmsg_parse(nlh, 0, attrs, THERMAL_GENL_ATTR_MAX, NULL);
+
+ switch (genlhdr->cmd) {
+
+ case THERMAL_GENL_SAMPLING_TEMP:
+ return th->ops->sampling.tz_temp(
+ nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]),
+ nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TEMP]), arg);
+ default:
+ return THERMAL_ERROR;
+ }
+}
+
+thermal_error_t thermal_sampling_handle(struct thermal_handler *th, void *arg)
+{
+ struct thermal_handler_param thp = { .th = th, .arg = arg };
+
+ if (!th)
+ return THERMAL_ERROR;
+
+ if (nl_cb_set(th->cb_sampling, NL_CB_VALID, NL_CB_CUSTOM,
+ handle_thermal_sample, &thp))
+ return THERMAL_ERROR;
+
+ return nl_recvmsgs(th->sk_sampling, th->cb_sampling);
+}
+
+int thermal_sampling_fd(struct thermal_handler *th)
+{
+ if (!th)
+ return -1;
+
+ return nl_socket_get_fd(th->sk_sampling);
+}
+
+thermal_error_t thermal_sampling_exit(struct thermal_handler *th)
+{
+ if (nl_unsubscribe_thermal(th->sk_sampling, th->cb_sampling,
+ THERMAL_GENL_EVENT_GROUP_NAME))
+ return THERMAL_ERROR;
+
+ nl_thermal_disconnect(th->sk_sampling, th->cb_sampling);
+
+ return THERMAL_SUCCESS;
+}
+
+thermal_error_t thermal_sampling_init(struct thermal_handler *th)
+{
+ if (nl_thermal_connect(&th->sk_sampling, &th->cb_sampling))
+ return THERMAL_ERROR;
+
+ if (nl_subscribe_thermal(th->sk_sampling, th->cb_sampling,
+ THERMAL_GENL_SAMPLING_GROUP_NAME))
+ return THERMAL_ERROR;
+
+ return THERMAL_SUCCESS;
+}
diff --git a/tools/lib/thermal/thermal.c b/tools/lib/thermal/thermal.c
new file mode 100644
index 000000000000..72a76dc205bc
--- /dev/null
+++ b/tools/lib/thermal/thermal.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: LGPL-2.1+
+// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org>
+#include <stdio.h>
+#include <thermal.h>
+
+#include "thermal_nl.h"
+
+int for_each_thermal_cdev(struct thermal_cdev *cdev, cb_tc_t cb, void *arg)
+{
+ int i, ret = 0;
+
+ if (!cdev)
+ return 0;
+
+ for (i = 0; cdev[i].id != -1; i++)
+ ret |= cb(&cdev[i], arg);
+
+ return ret;
+}
+
+int for_each_thermal_trip(struct thermal_trip *tt, cb_tt_t cb, void *arg)
+{
+ int i, ret = 0;
+
+ if (!tt)
+ return 0;
+
+ for (i = 0; tt[i].id != -1; i++)
+ ret |= cb(&tt[i], arg);
+
+ return ret;
+}
+
+int for_each_thermal_zone(struct thermal_zone *tz, cb_tz_t cb, void *arg)
+{
+ int i, ret = 0;
+
+ if (!tz)
+ return 0;
+
+ for (i = 0; tz[i].id != -1; i++)
+ ret |= cb(&tz[i], arg);
+
+ return ret;
+}
+
+struct thermal_zone *thermal_zone_find_by_name(struct thermal_zone *tz,
+ const char *name)
+{
+ int i;
+
+ if (!tz || !name)
+ return NULL;
+
+ for (i = 0; tz[i].id != -1; i++) {
+ if (!strcmp(tz[i].name, name))
+ return &tz[i];
+ }
+
+ return NULL;
+}
+
+struct thermal_zone *thermal_zone_find_by_id(struct thermal_zone *tz, int id)
+{
+ int i;
+
+ if (!tz || id < 0)
+ return NULL;
+
+ for (i = 0; tz[i].id != -1; i++) {
+ if (tz[i].id == id)
+ return &tz[i];
+ }
+
+ return NULL;
+}
+
+static int __thermal_zone_discover(struct thermal_zone *tz, void *th)
+{
+ if (thermal_cmd_get_trip(th, tz) < 0)
+ return -1;
+
+ if (thermal_cmd_get_governor(th, tz))
+ return -1;
+
+ return 0;
+}
+
+struct thermal_zone *thermal_zone_discover(struct thermal_handler *th)
+{
+ struct thermal_zone *tz;
+
+ if (thermal_cmd_get_tz(th, &tz) < 0)
+ return NULL;
+
+ if (for_each_thermal_zone(tz, __thermal_zone_discover, th))
+ return NULL;
+
+ return tz;
+}
+
+void thermal_exit(struct thermal_handler *th)
+{
+ thermal_cmd_exit(th);
+ thermal_events_exit(th);
+ thermal_sampling_exit(th);
+
+ free(th);
+}
+
+struct thermal_handler *thermal_init(struct thermal_ops *ops)
+{
+ struct thermal_handler *th;
+
+ th = malloc(sizeof(*th));
+ if (!th)
+ return NULL;
+ th->ops = ops;
+
+ if (thermal_events_init(th))
+ goto out_free;
+
+ if (thermal_sampling_init(th))
+ goto out_free;
+
+ if (thermal_cmd_init(th))
+ goto out_free;
+
+ return th;
+
+out_free:
+ free(th);
+
+ return NULL;
+}
diff --git a/tools/lib/thermal/thermal_nl.c b/tools/lib/thermal/thermal_nl.c
new file mode 100644
index 000000000000..b05cf9569858
--- /dev/null
+++ b/tools/lib/thermal/thermal_nl.c
@@ -0,0 +1,215 @@
+// SPDX-License-Identifier: LGPL-2.1+
+// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <thermal.h>
+#include "thermal_nl.h"
+
+struct handler_args {
+ const char *group;
+ int id;
+};
+
+static __thread int err;
+static __thread int done;
+
+static int nl_seq_check_handler(struct nl_msg *msg, void *arg)
+{
+ return NL_OK;
+}
+
+static int nl_error_handler(struct sockaddr_nl *nla, struct nlmsgerr *nl_err,
+ void *arg)
+{
+ int *ret = arg;
+
+ if (ret)
+ *ret = nl_err->error;
+
+ return NL_STOP;
+}
+
+static int nl_finish_handler(struct nl_msg *msg, void *arg)
+{
+ int *ret = arg;
+
+ if (ret)
+ *ret = 1;
+
+ return NL_OK;
+}
+
+static int nl_ack_handler(struct nl_msg *msg, void *arg)
+{
+ int *ret = arg;
+
+ if (ret)
+ *ret = 1;
+
+ return NL_OK;
+}
+
+int nl_send_msg(struct nl_sock *sock, struct nl_cb *cb, struct nl_msg *msg,
+ int (*rx_handler)(struct nl_msg *, void *), void *data)
+{
+ if (!rx_handler)
+ return THERMAL_ERROR;
+
+ err = nl_send_auto_complete(sock, msg);
+ if (err < 0)
+ return err;
+
+ nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, rx_handler, data);
+
+ err = done = 0;
+
+ while (err == 0 && done == 0)
+ nl_recvmsgs(sock, cb);
+
+ return err;
+}
+
+static int nl_family_handler(struct nl_msg *msg, void *arg)
+{
+ struct handler_args *grp = arg;
+ struct nlattr *tb[CTRL_ATTR_MAX + 1];
+ struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg));
+ struct nlattr *mcgrp;
+ int rem_mcgrp;
+
+ nla_parse(tb, CTRL_ATTR_MAX, genlmsg_attrdata(gnlh, 0),
+ genlmsg_attrlen(gnlh, 0), NULL);
+
+ if (!tb[CTRL_ATTR_MCAST_GROUPS])
+ return THERMAL_ERROR;
+
+ nla_for_each_nested(mcgrp, tb[CTRL_ATTR_MCAST_GROUPS], rem_mcgrp) {
+
+ struct nlattr *tb_mcgrp[CTRL_ATTR_MCAST_GRP_MAX + 1];
+
+ nla_parse(tb_mcgrp, CTRL_ATTR_MCAST_GRP_MAX,
+ nla_data(mcgrp), nla_len(mcgrp), NULL);
+
+ if (!tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME] ||
+ !tb_mcgrp[CTRL_ATTR_MCAST_GRP_ID])
+ continue;
+
+ if (strncmp(nla_data(tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME]),
+ grp->group,
+ nla_len(tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME])))
+ continue;
+
+ grp->id = nla_get_u32(tb_mcgrp[CTRL_ATTR_MCAST_GRP_ID]);
+
+ break;
+ }
+
+ return THERMAL_SUCCESS;
+}
+
+static int nl_get_multicast_id(struct nl_sock *sock, struct nl_cb *cb,
+ const char *family, const char *group)
+{
+ struct nl_msg *msg;
+ int ret = 0, ctrlid;
+ struct handler_args grp = {
+ .group = group,
+ .id = -ENOENT,
+ };
+
+ msg = nlmsg_alloc();
+ if (!msg)
+ return THERMAL_ERROR;
+
+ ctrlid = genl_ctrl_resolve(sock, "nlctrl");
+
+ genlmsg_put(msg, 0, 0, ctrlid, 0, 0, CTRL_CMD_GETFAMILY, 0);
+
+ nla_put_string(msg, CTRL_ATTR_FAMILY_NAME, family);
+
+ ret = nl_send_msg(sock, cb, msg, nl_family_handler, &grp);
+ if (ret)
+ goto nla_put_failure;
+
+ ret = grp.id;
+
+nla_put_failure:
+ nlmsg_free(msg);
+ return ret;
+}
+
+int nl_thermal_connect(struct nl_sock **nl_sock, struct nl_cb **nl_cb)
+{
+ struct nl_cb *cb;
+ struct nl_sock *sock;
+
+ cb = nl_cb_alloc(NL_CB_DEFAULT);
+ if (!cb)
+ return THERMAL_ERROR;
+
+ sock = nl_socket_alloc();
+ if (!sock)
+ goto out_cb_free;
+
+ if (genl_connect(sock))
+ goto out_socket_free;
+
+ if (nl_cb_err(cb, NL_CB_CUSTOM, nl_error_handler, &err) ||
+ nl_cb_set(cb, NL_CB_FINISH, NL_CB_CUSTOM, nl_finish_handler, &done) ||
+ nl_cb_set(cb, NL_CB_ACK, NL_CB_CUSTOM, nl_ack_handler, &done) ||
+ nl_cb_set(cb, NL_CB_SEQ_CHECK, NL_CB_CUSTOM, nl_seq_check_handler, &done))
+ return THERMAL_ERROR;
+
+ *nl_sock = sock;
+ *nl_cb = cb;
+
+ return THERMAL_SUCCESS;
+
+out_socket_free:
+ nl_socket_free(sock);
+out_cb_free:
+ nl_cb_put(cb);
+ return THERMAL_ERROR;
+}
+
+void nl_thermal_disconnect(struct nl_sock *nl_sock, struct nl_cb *nl_cb)
+{
+ nl_close(nl_sock);
+ nl_socket_free(nl_sock);
+ nl_cb_put(nl_cb);
+}
+
+int nl_unsubscribe_thermal(struct nl_sock *nl_sock, struct nl_cb *nl_cb,
+ const char *group)
+{
+ int mcid;
+
+ mcid = nl_get_multicast_id(nl_sock, nl_cb, THERMAL_GENL_FAMILY_NAME,
+ group);
+ if (mcid < 0)
+ return THERMAL_ERROR;
+
+ if (nl_socket_drop_membership(nl_sock, mcid))
+ return THERMAL_ERROR;
+
+ return THERMAL_SUCCESS;
+}
+
+int nl_subscribe_thermal(struct nl_sock *nl_sock, struct nl_cb *nl_cb,
+ const char *group)
+{
+ int mcid;
+
+ mcid = nl_get_multicast_id(nl_sock, nl_cb, THERMAL_GENL_FAMILY_NAME,
+ group);
+ if (mcid < 0)
+ return THERMAL_ERROR;
+
+ if (nl_socket_add_membership(nl_sock, mcid))
+ return THERMAL_ERROR;
+
+ return THERMAL_SUCCESS;
+}
diff --git a/tools/lib/thermal/thermal_nl.h b/tools/lib/thermal/thermal_nl.h
new file mode 100644
index 000000000000..ddf635642f07
--- /dev/null
+++ b/tools/lib/thermal/thermal_nl.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/* Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> */
+#ifndef __THERMAL_H
+#define __THERMAL_H
+
+#include <netlink/netlink.h>
+#include <netlink/genl/genl.h>
+#include <netlink/genl/mngt.h>
+#include <netlink/genl/ctrl.h>
+
+struct thermal_handler {
+ int done;
+ int error;
+ struct thermal_ops *ops;
+ struct nl_msg *msg;
+ struct nl_sock *sk_event;
+ struct nl_sock *sk_sampling;
+ struct nl_sock *sk_cmd;
+ struct nl_cb *cb_cmd;
+ struct nl_cb *cb_event;
+ struct nl_cb *cb_sampling;
+};
+
+struct thermal_handler_param {
+ struct thermal_handler *th;
+ void *arg;
+};
+
+/*
+ * Low level netlink
+ */
+extern int nl_subscribe_thermal(struct nl_sock *nl_sock, struct nl_cb *nl_cb,
+ const char *group);
+
+extern int nl_unsubscribe_thermal(struct nl_sock *nl_sock, struct nl_cb *nl_cb,
+ const char *group);
+
+extern int nl_thermal_connect(struct nl_sock **nl_sock, struct nl_cb **nl_cb);
+
+extern void nl_thermal_disconnect(struct nl_sock *nl_sock, struct nl_cb *nl_cb);
+
+extern int nl_send_msg(struct nl_sock *sock, struct nl_cb *nl_cb, struct nl_msg *msg,
+ int (*rx_handler)(struct nl_msg *, void *),
+ void *data);
+
+#endif /* __THERMAL_H */
diff --git a/tools/memory-model/README b/tools/memory-model/README
index 9edd402704c4..dab38904206a 100644
--- a/tools/memory-model/README
+++ b/tools/memory-model/README
@@ -54,7 +54,8 @@ klitmus7 Compatibility Table
-- 4.14 7.48 --
4.15 -- 4.19 7.49 --
4.20 -- 5.5 7.54 --
- 5.6 -- 7.56 --
+ 5.6 -- 5.16 7.56 --
+ 5.17 -- 7.56.1 --
============ ==========
diff --git a/tools/objtool/Build b/tools/objtool/Build
index b7222d5cc7bc..33f2ee5a46d3 100644
--- a/tools/objtool/Build
+++ b/tools/objtool/Build
@@ -2,17 +2,15 @@ objtool-y += arch/$(SRCARCH)/
objtool-y += weak.o
-objtool-$(SUBCMD_CHECK) += check.o
-objtool-$(SUBCMD_CHECK) += special.o
-objtool-$(SUBCMD_ORC) += check.o
-objtool-$(SUBCMD_ORC) += orc_gen.o
-objtool-$(SUBCMD_ORC) += orc_dump.o
-
+objtool-y += check.o
+objtool-y += special.o
objtool-y += builtin-check.o
-objtool-y += builtin-orc.o
objtool-y += elf.o
objtool-y += objtool.o
+objtool-$(BUILD_ORC) += orc_gen.o
+objtool-$(BUILD_ORC) += orc_dump.o
+
objtool-y += libstring.o
objtool-y += libctype.o
objtool-y += str_error_r.o
diff --git a/tools/objtool/Documentation/stack-validation.txt b/tools/objtool/Documentation/objtool.txt
index 30f38fdc0d56..8a671902a187 100644
--- a/tools/objtool/Documentation/stack-validation.txt
+++ b/tools/objtool/Documentation/objtool.txt
@@ -1,15 +1,103 @@
-Compile-time stack metadata validation
-======================================
+Objtool
+=======
+The kernel CONFIG_OBJTOOL option enables a host tool named 'objtool'
+which runs at compile time. It can do various validations and
+transformations on .o files.
-Overview
+Objtool has become an integral part of the x86-64 kernel toolchain. The
+kernel depends on it for a variety of security and performance features
+(and other types of features as well).
+
+
+Features
--------
-The kernel CONFIG_STACK_VALIDATION option enables a host tool named
-objtool which runs at compile time. It has a "check" subcommand which
-analyzes every .o file and ensures the validity of its stack metadata.
-It enforces a set of rules on asm code and C inline assembly code so
-that stack traces can be reliable.
+Objtool has the following features:
+
+- Stack unwinding metadata validation -- useful for helping to ensure
+ stack traces are reliable for live patching
+
+- ORC unwinder metadata generation -- a faster and more precise
+ alternative to frame pointer based unwinding
+
+- Retpoline validation -- ensures that all indirect calls go through
+ retpoline thunks, for Spectre v2 mitigations
+
+- Retpoline call site annotation -- annotates all retpoline thunk call
+ sites, enabling the kernel to patch them inline, to prevent "thunk
+ funneling" for both security and performance reasons
+
+- Non-instrumentation validation -- validates non-instrumentable
+ ("noinstr") code rules, preventing instrumentation in low-level C
+ entry code
+
+- Static call annotation -- annotates static call sites, enabling the
+ kernel to implement inline static calls, a faster alternative to some
+ indirect branches
+
+- Uaccess validation -- validates uaccess rules for a proper
+ implementation of Supervisor Mode Access Protection (SMAP)
+
+- Straight Line Speculation validation -- validates certain SLS
+ mitigations
+
+- Indirect Branch Tracking validation -- validates Intel CET IBT rules
+ to ensure that all functions referenced by function pointers have
+ corresponding ENDBR instructions
+
+- Indirect Branch Tracking annotation -- annotates unused ENDBR
+ instruction sites, enabling the kernel to "seal" them (replace them
+ with NOPs) to further harden IBT
+
+- Function entry annotation -- annotates function entries, enabling
+ kernel function tracing
+
+- Other toolchain hacks which will go unmentioned at this time...
+
+Each feature can be enabled individually or in combination using the
+objtool cmdline.
+
+
+Objects
+-------
+
+Typically, objtool runs on every translation unit (TU, aka ".o file") in
+the kernel. If a TU is part of a kernel module, the '--module' option
+is added.
+
+However:
+
+- If noinstr validation is enabled, it also runs on vmlinux.o, with all
+ options removed and '--noinstr' added.
+
+- If IBT or LTO is enabled, it doesn't run on TUs at all. Instead it
+ runs on vmlinux.o and linked modules, with all options.
+
+In summary:
+
+ A) Legacy mode:
+ TU: objtool [--module] <options>
+ vmlinux: N/A
+ module: N/A
+
+ B) CONFIG_NOINSTR_VALIDATION=y && !(CONFIG_X86_KERNEL_IBT=y || CONFIG_LTO=y):
+ TU: objtool [--module] <options> // no --noinstr
+ vmlinux: objtool --noinstr // other options removed
+ module: N/A
+
+ C) CONFIG_X86_KERNEL_IBT=y || CONFIG_LTO=y:
+ TU: N/A
+ vmlinux: objtool --noinstr <options>
+ module: objtool --module --noinstr <options>
+
+
+Stack validation
+----------------
+
+Objtool's stack validation feature analyzes every .o file and ensures
+the validity of its stack metadata. It enforces a set of rules on asm
+code and C inline assembly code so that stack traces can be reliable.
For each function, it recursively follows all possible code paths and
validates the correct frame pointer state at each instruction.
@@ -20,14 +108,6 @@ alternative execution paths to a given instruction (or set of
instructions). Similarly, it knows how to follow switch statements, for
which gcc sometimes uses jump tables.
-(Objtool also has an 'orc generate' subcommand which generates debuginfo
-for the ORC unwinder. See Documentation/x86/orc-unwinder.rst in the
-kernel tree for more details.)
-
-
-Why do we need stack metadata validation?
------------------------------------------
-
Here are some of the benefits of validating stack metadata:
a) More reliable stack traces for frame pointer enabled kernels
@@ -113,9 +193,6 @@ c) Higher live patching compatibility rate
For more details, see the livepatch documentation in the Linux kernel
source tree at Documentation/livepatch/livepatch.rst.
-Rules
------
-
To achieve the validation, objtool enforces the following rules:
1. Each callable function must be annotated as such with the ELF
@@ -177,7 +254,8 @@ Another possible cause for errors in C code is if the Makefile removes
-fno-omit-frame-pointer or adds -fomit-frame-pointer to the gcc options.
Here are some examples of common warnings reported by objtool, what
-they mean, and suggestions for how to fix them.
+they mean, and suggestions for how to fix them. When in doubt, ping
+the objtool maintainers.
1. file.o: warning: objtool: func()+0x128: call without frame pointer save/setup
@@ -358,3 +436,7 @@ ignore it:
OBJECT_FILES_NON_STANDARD := y
to the Makefile.
+
+NOTE: OBJECT_FILES_NON_STANDARD doesn't work for link time validation of
+vmlinux.o or a linked module. So it should only be used for files which
+aren't linked into vmlinux or a module.
diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
index 0dbd397f319d..e66d717c245d 100644
--- a/tools/objtool/Makefile
+++ b/tools/objtool/Makefile
@@ -39,15 +39,13 @@ CFLAGS += $(if $(elfshdr),,-DLIBELF_USE_DEPRECATED)
AWK = awk
-SUBCMD_CHECK := n
-SUBCMD_ORC := n
+BUILD_ORC := n
ifeq ($(SRCARCH),x86)
- SUBCMD_CHECK := y
- SUBCMD_ORC := y
+ BUILD_ORC := y
endif
-export SUBCMD_CHECK SUBCMD_ORC
+export BUILD_ORC
export srctree OUTPUT CFLAGS SRCARCH AWK
include $(srctree)/tools/build/Makefile.include
@@ -65,7 +63,7 @@ $(LIBSUBCMD): fixdep FORCE
clean:
$(call QUIET_CLEAN, objtool) $(RM) $(OBJTOOL)
$(Q)find $(OUTPUT) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
- $(Q)$(RM) $(OUTPUT)arch/x86/inat-tables.c $(OUTPUT)fixdep
+ $(Q)$(RM) $(OUTPUT)arch/x86/lib/inat-tables.c $(OUTPUT)fixdep $(LIBSUBCMD)
FORCE:
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 943cb41cddf7..8b990a52aada 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -581,7 +581,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
break;
case 0xc7: /* mov imm, r/m */
- if (!noinstr)
+ if (!opts.noinstr)
break;
if (insn.length == 3+4+4 && !strncmp(sec->name, ".init.text", 10)) {
diff --git a/tools/objtool/arch/x86/special.c b/tools/objtool/arch/x86/special.c
index e707d9bcd161..7c97b7391279 100644
--- a/tools/objtool/arch/x86/special.c
+++ b/tools/objtool/arch/x86/special.c
@@ -20,7 +20,7 @@ void arch_handle_alternative(unsigned short feature, struct special_alt *alt)
* find paths that see the STAC but take the NOP instead of
* CLAC and the other way around.
*/
- if (uaccess)
+ if (opts.uaccess)
alt->skip_orig = true;
else
alt->skip_alt = true;
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index fc6975ab8b06..f4c3a5091737 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -3,28 +3,21 @@
* Copyright (C) 2015-2017 Josh Poimboeuf <jpoimboe@redhat.com>
*/
-/*
- * objtool check:
- *
- * This command analyzes every .o file and ensures the validity of its stack
- * trace metadata. It enforces a set of rules on asm code and C inline
- * assembly code so that stack traces can be reliable.
- *
- * For more information, see tools/objtool/Documentation/stack-validation.txt.
- */
-
#include <subcmd/parse-options.h>
#include <string.h>
#include <stdlib.h>
#include <objtool/builtin.h>
#include <objtool/objtool.h>
-bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats,
- lto, vmlinux, mcount, noinstr, backup, sls, dryrun,
- ibt;
+#define ERROR(format, ...) \
+ fprintf(stderr, \
+ "error: objtool: " format "\n", \
+ ##__VA_ARGS__)
+
+struct opts opts;
static const char * const check_usage[] = {
- "objtool check [<options>] file.o",
+ "objtool <actions> [<options>] file.o",
NULL,
};
@@ -33,22 +26,64 @@ static const char * const env_usage[] = {
NULL,
};
+static int parse_dump(const struct option *opt, const char *str, int unset)
+{
+ if (!str || !strcmp(str, "orc")) {
+ opts.dump_orc = true;
+ return 0;
+ }
+
+ return -1;
+}
+
+static int parse_hacks(const struct option *opt, const char *str, int unset)
+{
+ bool found = false;
+
+ /*
+ * Use strstr() as a lazy method of checking for comma-separated
+ * options.
+ *
+ * No string provided == enable all options.
+ */
+
+ if (!str || strstr(str, "jump_label")) {
+ opts.hack_jump_label = true;
+ found = true;
+ }
+
+ if (!str || strstr(str, "noinstr")) {
+ opts.hack_noinstr = true;
+ found = true;
+ }
+
+ return found ? 0 : -1;
+}
+
const struct option check_options[] = {
- OPT_BOOLEAN('f', "no-fp", &no_fp, "Skip frame pointer validation"),
- OPT_BOOLEAN('u', "no-unreachable", &no_unreachable, "Skip 'unreachable instruction' warnings"),
- OPT_BOOLEAN('r', "retpoline", &retpoline, "Validate retpoline assumptions"),
- OPT_BOOLEAN('m', "module", &module, "Indicates the object will be part of a kernel module"),
- OPT_BOOLEAN('b', "backtrace", &backtrace, "unwind on error"),
- OPT_BOOLEAN('a', "uaccess", &uaccess, "enable uaccess checking"),
- OPT_BOOLEAN('s', "stats", &stats, "print statistics"),
- OPT_BOOLEAN(0, "lto", &lto, "whole-archive like runs"),
- OPT_BOOLEAN('n', "noinstr", &noinstr, "noinstr validation for vmlinux.o"),
- OPT_BOOLEAN('l', "vmlinux", &vmlinux, "vmlinux.o validation"),
- OPT_BOOLEAN('M', "mcount", &mcount, "generate __mcount_loc"),
- OPT_BOOLEAN('B', "backup", &backup, "create .orig files before modification"),
- OPT_BOOLEAN('S', "sls", &sls, "validate straight-line-speculation"),
- OPT_BOOLEAN(0, "dry-run", &dryrun, "don't write the modifications"),
- OPT_BOOLEAN(0, "ibt", &ibt, "validate ENDBR placement"),
+ OPT_GROUP("Actions:"),
+ OPT_CALLBACK_OPTARG('h', "hacks", NULL, NULL, "jump_label,noinstr", "patch toolchain bugs/limitations", parse_hacks),
+ OPT_BOOLEAN('i', "ibt", &opts.ibt, "validate and annotate IBT"),
+ OPT_BOOLEAN('m', "mcount", &opts.mcount, "annotate mcount/fentry calls for ftrace"),
+ OPT_BOOLEAN('n', "noinstr", &opts.noinstr, "validate noinstr rules"),
+ OPT_BOOLEAN('o', "orc", &opts.orc, "generate ORC metadata"),
+ OPT_BOOLEAN('r', "retpoline", &opts.retpoline, "validate and annotate retpoline usage"),
+ OPT_BOOLEAN('l', "sls", &opts.sls, "validate straight-line-speculation mitigations"),
+ OPT_BOOLEAN('s', "stackval", &opts.stackval, "validate frame pointer rules"),
+ OPT_BOOLEAN('t', "static-call", &opts.static_call, "annotate static calls"),
+ OPT_BOOLEAN('u', "uaccess", &opts.uaccess, "validate uaccess rules for SMAP"),
+ OPT_CALLBACK_OPTARG(0, "dump", NULL, NULL, "orc", "dump metadata", parse_dump),
+
+ OPT_GROUP("Options:"),
+ OPT_BOOLEAN(0, "backtrace", &opts.backtrace, "unwind on error"),
+ OPT_BOOLEAN(0, "backup", &opts.backup, "create .orig files before modification"),
+ OPT_BOOLEAN(0, "dry-run", &opts.dryrun, "don't write modifications"),
+ OPT_BOOLEAN(0, "link", &opts.link, "object is a linked object"),
+ OPT_BOOLEAN(0, "module", &opts.module, "object is part of a kernel module"),
+ OPT_BOOLEAN(0, "no-unreachable", &opts.no_unreachable, "skip 'unreachable instruction' warnings"),
+ OPT_BOOLEAN(0, "sec-address", &opts.sec_address, "print section addresses in warnings"),
+ OPT_BOOLEAN(0, "stats", &opts.stats, "print statistics"),
+
OPT_END(),
};
@@ -79,7 +114,59 @@ int cmd_parse_options(int argc, const char **argv, const char * const usage[])
return argc;
}
-int cmd_check(int argc, const char **argv)
+static bool opts_valid(void)
+{
+ if (opts.hack_jump_label ||
+ opts.hack_noinstr ||
+ opts.ibt ||
+ opts.mcount ||
+ opts.noinstr ||
+ opts.orc ||
+ opts.retpoline ||
+ opts.sls ||
+ opts.stackval ||
+ opts.static_call ||
+ opts.uaccess) {
+ if (opts.dump_orc) {
+ ERROR("--dump can't be combined with other options");
+ return false;
+ }
+
+ return true;
+ }
+
+ if (opts.dump_orc)
+ return true;
+
+ ERROR("At least one command required");
+ return false;
+}
+
+static bool link_opts_valid(struct objtool_file *file)
+{
+ if (opts.link)
+ return true;
+
+ if (has_multiple_files(file->elf)) {
+ ERROR("Linked object detected, forcing --link");
+ opts.link = true;
+ return true;
+ }
+
+ if (opts.noinstr) {
+ ERROR("--noinstr requires --link");
+ return false;
+ }
+
+ if (opts.ibt) {
+ ERROR("--ibt requires --link");
+ return false;
+ }
+
+ return true;
+}
+
+int objtool_run(int argc, const char **argv)
{
const char *objname;
struct objtool_file *file;
@@ -88,10 +175,19 @@ int cmd_check(int argc, const char **argv)
argc = cmd_parse_options(argc, argv, check_usage);
objname = argv[0];
+ if (!opts_valid())
+ return 1;
+
+ if (opts.dump_orc)
+ return orc_dump(objname);
+
file = objtool_open_read(objname);
if (!file)
return 1;
+ if (!link_opts_valid(file))
+ return 1;
+
ret = check(file);
if (ret)
return ret;
diff --git a/tools/objtool/builtin-orc.c b/tools/objtool/builtin-orc.c
deleted file mode 100644
index 17f8b9307738..000000000000
--- a/tools/objtool/builtin-orc.c
+++ /dev/null
@@ -1,73 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
- */
-
-/*
- * objtool orc:
- *
- * This command analyzes a .o file and adds .orc_unwind and .orc_unwind_ip
- * sections to it, which is used by the in-kernel ORC unwinder.
- *
- * This command is a superset of "objtool check".
- */
-
-#include <string.h>
-#include <objtool/builtin.h>
-#include <objtool/objtool.h>
-
-static const char *orc_usage[] = {
- "objtool orc generate [<options>] file.o",
- "objtool orc dump file.o",
- NULL,
-};
-
-int cmd_orc(int argc, const char **argv)
-{
- const char *objname;
-
- argc--; argv++;
- if (argc <= 0)
- usage_with_options(orc_usage, check_options);
-
- if (!strncmp(argv[0], "gen", 3)) {
- struct objtool_file *file;
- int ret;
-
- argc = cmd_parse_options(argc, argv, orc_usage);
- objname = argv[0];
-
- file = objtool_open_read(objname);
- if (!file)
- return 1;
-
- ret = check(file);
- if (ret)
- return ret;
-
- if (list_empty(&file->insn_list))
- return 0;
-
- ret = orc_create(file);
- if (ret)
- return ret;
-
- if (!file->elf->changed)
- return 0;
-
- return elf_write(file->elf);
- }
-
- if (!strcmp(argv[0], "dump")) {
- if (argc != 2)
- usage_with_options(orc_usage, check_options);
-
- objname = argv[1];
-
- return orc_dump(objname);
- }
-
- usage_with_options(orc_usage, check_options);
-
- return 0;
-}
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 6de5085e3e5a..190b2f6e360a 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -5,6 +5,7 @@
#include <string.h>
#include <stdlib.h>
+#include <inttypes.h>
#include <sys/mman.h>
#include <arch/elf.h>
@@ -184,6 +185,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func,
"do_group_exit",
"stop_this_cpu",
"__invalid_creds",
+ "cpu_startup_entry",
};
if (!func)
@@ -262,7 +264,8 @@ static void init_cfi_state(struct cfi_state *cfi)
cfi->drap_offset = -1;
}
-static void init_insn_state(struct insn_state *state, struct section *sec)
+static void init_insn_state(struct objtool_file *file, struct insn_state *state,
+ struct section *sec)
{
memset(state, 0, sizeof(*state));
init_cfi_state(&state->cfi);
@@ -272,7 +275,7 @@ static void init_insn_state(struct insn_state *state, struct section *sec)
* not correctly determine insn->call_dest->sec (external symbols do
* not have a section).
*/
- if (vmlinux && noinstr && sec)
+ if (opts.link && opts.noinstr && sec)
state->noinstr = sec->noinstr;
}
@@ -338,7 +341,7 @@ static void *cfi_hash_alloc(unsigned long size)
if (cfi_hash == (void *)-1L) {
WARN("mmap fail cfi_hash");
cfi_hash = NULL;
- } else if (stats) {
+ } else if (opts.stats) {
printf("cfi_bits: %d\n", cfi_bits);
}
@@ -433,7 +436,7 @@ static int decode_instructions(struct objtool_file *file)
}
}
- if (stats)
+ if (opts.stats)
printf("nr_insns: %lu\n", nr_insns);
return 0;
@@ -496,7 +499,7 @@ static int init_pv_ops(struct objtool_file *file)
struct symbol *sym;
int idx, nr;
- if (!noinstr)
+ if (!opts.noinstr)
return 0;
file->pv_ops = NULL;
@@ -559,12 +562,12 @@ static int add_dead_ends(struct objtool_file *file)
else if (reloc->addend == reloc->sym->sec->sh.sh_size) {
insn = find_last_insn(file, reloc->sym->sec);
if (!insn) {
- WARN("can't find unreachable insn at %s+0x%x",
+ WARN("can't find unreachable insn at %s+0x%" PRIx64,
reloc->sym->sec->name, reloc->addend);
return -1;
}
} else {
- WARN("can't find unreachable insn at %s+0x%x",
+ WARN("can't find unreachable insn at %s+0x%" PRIx64,
reloc->sym->sec->name, reloc->addend);
return -1;
}
@@ -594,12 +597,12 @@ reachable:
else if (reloc->addend == reloc->sym->sec->sh.sh_size) {
insn = find_last_insn(file, reloc->sym->sec);
if (!insn) {
- WARN("can't find reachable insn at %s+0x%x",
+ WARN("can't find reachable insn at %s+0x%" PRIx64,
reloc->sym->sec->name, reloc->addend);
return -1;
}
} else {
- WARN("can't find reachable insn at %s+0x%x",
+ WARN("can't find reachable insn at %s+0x%" PRIx64,
reloc->sym->sec->name, reloc->addend);
return -1;
}
@@ -667,7 +670,7 @@ static int create_static_call_sections(struct objtool_file *file)
key_sym = find_symbol_by_name(file->elf, tmp);
if (!key_sym) {
- if (!module) {
+ if (!opts.module) {
WARN("static_call: can't find static_call_key symbol: %s", tmp);
return -1;
}
@@ -760,7 +763,7 @@ static int create_ibt_endbr_seal_sections(struct objtool_file *file)
list_for_each_entry(insn, &file->endbr_list, call_node)
idx++;
- if (stats) {
+ if (opts.stats) {
printf("ibt: ENDBR at function start: %d\n", file->nr_endbr);
printf("ibt: ENDBR inside functions: %d\n", file->nr_endbr_int);
printf("ibt: superfluous ENDBR: %d\n", idx);
@@ -1027,7 +1030,7 @@ static void add_uaccess_safe(struct objtool_file *file)
struct symbol *func;
const char **name;
- if (!uaccess)
+ if (!opts.uaccess)
return;
for (name = uaccess_safe_builtin; *name; name++) {
@@ -1143,7 +1146,7 @@ static void annotate_call_site(struct objtool_file *file,
* attribute so they need a little help, NOP out any such calls from
* noinstr text.
*/
- if (insn->sec->noinstr && sym->profiling_func) {
+ if (opts.hack_noinstr && insn->sec->noinstr && sym->profiling_func) {
if (reloc) {
reloc->type = R_NONE;
elf_write_reloc(file->elf, reloc);
@@ -1155,10 +1158,21 @@ static void annotate_call_site(struct objtool_file *file,
: arch_nop_insn(insn->len));
insn->type = sibling ? INSN_RETURN : INSN_NOP;
+
+ if (sibling) {
+ /*
+ * We've replaced the tail-call JMP insn by two new
+ * insn: RET; INT3, except we only have a single struct
+ * insn here. Mark it retpoline_safe to avoid the SLS
+ * warning, instead of adding another insn.
+ */
+ insn->retpoline_safe = true;
+ }
+
return;
}
- if (mcount && sym->fentry) {
+ if (opts.mcount && sym->fentry) {
if (sibling)
WARN_FUNC("Tail call to __fentry__ !?!?", insn->sec, insn->offset);
@@ -1239,11 +1253,20 @@ static bool same_function(struct instruction *insn1, struct instruction *insn2)
return insn1->func->pfunc == insn2->func->pfunc;
}
-static bool is_first_func_insn(struct instruction *insn)
+static bool is_first_func_insn(struct objtool_file *file, struct instruction *insn)
{
- return insn->offset == insn->func->offset ||
- (insn->type == INSN_ENDBR &&
- insn->offset == insn->func->offset + insn->len);
+ if (insn->offset == insn->func->offset)
+ return true;
+
+ if (opts.ibt) {
+ struct instruction *prev = prev_insn_same_sym(file, insn);
+
+ if (prev && prev->type == INSN_ENDBR &&
+ insn->offset == insn->func->offset + prev->len)
+ return true;
+ }
+
+ return false;
}
/*
@@ -1251,12 +1274,19 @@ static bool is_first_func_insn(struct instruction *insn)
*/
static int add_jump_destinations(struct objtool_file *file)
{
- struct instruction *insn;
+ struct instruction *insn, *jump_dest;
struct reloc *reloc;
struct section *dest_sec;
unsigned long dest_off;
for_each_insn(file, insn) {
+ if (insn->jump_dest) {
+ /*
+ * handle_group_alt() may have previously set
+ * 'jump_dest' for some alternatives.
+ */
+ continue;
+ }
if (!is_static_jump(insn))
continue;
@@ -1271,7 +1301,10 @@ static int add_jump_destinations(struct objtool_file *file)
add_retpoline_call(file, insn);
continue;
} else if (insn->func) {
- /* internal or external sibling call (with reloc) */
+ /*
+ * External sibling call or internal sibling call with
+ * STT_FUNC reloc.
+ */
add_call_dest(file, insn, reloc->sym, true);
continue;
} else if (reloc->sym->sec->idx) {
@@ -1283,17 +1316,8 @@ static int add_jump_destinations(struct objtool_file *file)
continue;
}
- insn->jump_dest = find_insn(file, dest_sec, dest_off);
- if (!insn->jump_dest) {
-
- /*
- * This is a special case where an alt instruction
- * jumps past the end of the section. These are
- * handled later in handle_group_alt().
- */
- if (!strcmp(insn->sec->name, ".altinstr_replacement"))
- continue;
-
+ jump_dest = find_insn(file, dest_sec, dest_off);
+ if (!jump_dest) {
WARN_FUNC("can't find jump dest instruction at %s+0x%lx",
insn->sec, insn->offset, dest_sec->name,
dest_off);
@@ -1303,8 +1327,8 @@ static int add_jump_destinations(struct objtool_file *file)
/*
* Cross-function jump.
*/
- if (insn->func && insn->jump_dest->func &&
- insn->func != insn->jump_dest->func) {
+ if (insn->func && jump_dest->func &&
+ insn->func != jump_dest->func) {
/*
* For GCC 8+, create parent/child links for any cold
@@ -1322,16 +1346,22 @@ static int add_jump_destinations(struct objtool_file *file)
* subfunction is through a jump table.
*/
if (!strstr(insn->func->name, ".cold") &&
- strstr(insn->jump_dest->func->name, ".cold")) {
- insn->func->cfunc = insn->jump_dest->func;
- insn->jump_dest->func->pfunc = insn->func;
+ strstr(jump_dest->func->name, ".cold")) {
+ insn->func->cfunc = jump_dest->func;
+ jump_dest->func->pfunc = insn->func;
- } else if (!same_function(insn, insn->jump_dest) &&
- is_first_func_insn(insn->jump_dest)) {
- /* internal sibling call (without reloc) */
- add_call_dest(file, insn, insn->jump_dest->func, true);
+ } else if (!same_function(insn, jump_dest) &&
+ is_first_func_insn(file, jump_dest)) {
+ /*
+ * Internal sibling call without reloc or with
+ * STT_SECTION reloc.
+ */
+ add_call_dest(file, insn, jump_dest->func, true);
+ continue;
}
}
+
+ insn->jump_dest = jump_dest;
}
return 0;
@@ -1520,13 +1550,13 @@ static int handle_group_alt(struct objtool_file *file,
continue;
dest_off = arch_jump_destination(insn);
- if (dest_off == special_alt->new_off + special_alt->new_len)
+ if (dest_off == special_alt->new_off + special_alt->new_len) {
insn->jump_dest = next_insn_same_sec(file, last_orig_insn);
-
- if (!insn->jump_dest) {
- WARN_FUNC("can't find alternative jump destination",
- insn->sec, insn->offset);
- return -1;
+ if (!insn->jump_dest) {
+ WARN_FUNC("can't find alternative jump destination",
+ insn->sec, insn->offset);
+ return -1;
+ }
}
}
@@ -1564,7 +1594,7 @@ static int handle_jump_alt(struct objtool_file *file,
return -1;
}
- if (special_alt->key_addend & 2) {
+ if (opts.hack_jump_label && special_alt->key_addend & 2) {
struct reloc *reloc = insn_reloc(file, orig_insn);
if (reloc) {
@@ -1671,7 +1701,7 @@ static int add_special_section_alts(struct objtool_file *file)
free(special_alt);
}
- if (stats) {
+ if (opts.stats) {
printf("jl\\\tNOP\tJMP\n");
printf("short:\t%ld\t%ld\n", file->jl_nop_short, file->jl_short);
printf("long:\t%ld\t%ld\n", file->jl_nop_long, file->jl_long);
@@ -1917,7 +1947,7 @@ static int read_unwind_hints(struct objtool_file *file)
insn->hint = true;
- if (ibt && hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) {
+ if (opts.ibt && hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) {
struct symbol *sym = find_symbol_by_offset(insn->sec, insn->offset);
if (sym && sym->bind == STB_GLOBAL &&
@@ -2225,14 +2255,14 @@ static int decode_sections(struct objtool_file *file)
return ret;
/*
- * Must be before add_special_section_alts() as that depends on
- * jump_dest being set.
+ * Must be before add_jump_destinations(), which depends on 'func'
+ * being set for alternatives, to enable proper sibling call detection.
*/
- ret = add_jump_destinations(file);
+ ret = add_special_section_alts(file);
if (ret)
return ret;
- ret = add_special_section_alts(file);
+ ret = add_jump_destinations(file);
if (ret)
return ret;
@@ -2778,7 +2808,7 @@ static int update_cfi_state(struct instruction *insn,
}
/* detect when asm code uses rbp as a scratch register */
- if (!no_fp && insn->func && op->src.reg == CFI_BP &&
+ if (opts.stackval && insn->func && op->src.reg == CFI_BP &&
cfa->base != CFI_BP)
cfi->bp_scratch = true;
break;
@@ -3154,115 +3184,6 @@ static struct instruction *next_insn_to_validate(struct objtool_file *file,
return next_insn_same_sec(file, insn);
}
-static struct instruction *
-validate_ibt_reloc(struct objtool_file *file, struct reloc *reloc)
-{
- struct instruction *dest;
- struct section *sec;
- unsigned long off;
-
- sec = reloc->sym->sec;
- off = reloc->sym->offset;
-
- if ((reloc->sec->base->sh.sh_flags & SHF_EXECINSTR) &&
- (reloc->type == R_X86_64_PC32 || reloc->type == R_X86_64_PLT32))
- off += arch_dest_reloc_offset(reloc->addend);
- else
- off += reloc->addend;
-
- dest = find_insn(file, sec, off);
- if (!dest)
- return NULL;
-
- if (dest->type == INSN_ENDBR) {
- if (!list_empty(&dest->call_node))
- list_del_init(&dest->call_node);
-
- return NULL;
- }
-
- if (reloc->sym->static_call_tramp)
- return NULL;
-
- return dest;
-}
-
-static void warn_noendbr(const char *msg, struct section *sec, unsigned long offset,
- struct instruction *dest)
-{
- WARN_FUNC("%srelocation to !ENDBR: %s+0x%lx", sec, offset, msg,
- dest->func ? dest->func->name : dest->sec->name,
- dest->func ? dest->offset - dest->func->offset : dest->offset);
-}
-
-static void validate_ibt_dest(struct objtool_file *file, struct instruction *insn,
- struct instruction *dest)
-{
- if (dest->func && dest->func == insn->func) {
- /*
- * Anything from->to self is either _THIS_IP_ or IRET-to-self.
- *
- * There is no sane way to annotate _THIS_IP_ since the compiler treats the
- * relocation as a constant and is happy to fold in offsets, skewing any
- * annotation we do, leading to vast amounts of false-positives.
- *
- * There's also compiler generated _THIS_IP_ through KCOV and
- * such which we have no hope of annotating.
- *
- * As such, blanket accept self-references without issue.
- */
- return;
- }
-
- if (dest->noendbr)
- return;
-
- warn_noendbr("", insn->sec, insn->offset, dest);
-}
-
-static void validate_ibt_insn(struct objtool_file *file, struct instruction *insn)
-{
- struct instruction *dest;
- struct reloc *reloc;
-
- switch (insn->type) {
- case INSN_CALL:
- case INSN_CALL_DYNAMIC:
- case INSN_JUMP_CONDITIONAL:
- case INSN_JUMP_UNCONDITIONAL:
- case INSN_JUMP_DYNAMIC:
- case INSN_JUMP_DYNAMIC_CONDITIONAL:
- case INSN_RETURN:
- /*
- * We're looking for code references setting up indirect code
- * flow. As such, ignore direct code flow and the actual
- * dynamic branches.
- */
- return;
-
- case INSN_NOP:
- /*
- * handle_group_alt() will create INSN_NOP instruction that
- * don't belong to any section, ignore all NOP since they won't
- * carry a (useful) relocation anyway.
- */
- return;
-
- default:
- break;
- }
-
- for (reloc = insn_reloc(file, insn);
- reloc;
- reloc = find_reloc_by_dest_range(file->elf, insn->sec,
- reloc->offset + 1,
- (insn->offset + insn->len) - (reloc->offset + 1))) {
- dest = validate_ibt_reloc(file, reloc);
- if (dest)
- validate_ibt_dest(file, insn, dest);
- }
-}
-
/*
* Follow the branch starting at the given instruction, and recursively follow
* any other branches (jumps). Meanwhile, track the frame pointer state at
@@ -3283,7 +3204,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
while (1) {
next_insn = next_insn_to_validate(file, insn);
- if (file->c_file && func && insn->func && func != insn->func->pfunc) {
+ if (func && insn->func && func != insn->func->pfunc) {
WARN("%s() falls through to next function %s()",
func->name, insn->func->name);
return 1;
@@ -3336,7 +3257,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
ret = validate_branch(file, func, alt->insn, state);
if (ret) {
- if (backtrace)
+ if (opts.backtrace)
BT_FUNC("(alt)", insn);
return ret;
}
@@ -3352,11 +3273,6 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
switch (insn->type) {
case INSN_RETURN:
- if (sls && !insn->retpoline_safe &&
- next_insn && next_insn->type != INSN_TRAP) {
- WARN_FUNC("missing int3 after ret",
- insn->sec, insn->offset);
- }
return validate_return(func, insn, &state);
case INSN_CALL:
@@ -3365,7 +3281,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
if (ret)
return ret;
- if (!no_fp && func && !is_fentry_call(insn) &&
+ if (opts.stackval && func && !is_fentry_call(insn) &&
!has_valid_stack_frame(&state)) {
WARN_FUNC("call without frame pointer save/setup",
sec, insn->offset);
@@ -3388,7 +3304,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
ret = validate_branch(file, func,
insn->jump_dest, state);
if (ret) {
- if (backtrace)
+ if (opts.backtrace)
BT_FUNC("(branch)", insn);
return ret;
}
@@ -3400,13 +3316,6 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
break;
case INSN_JUMP_DYNAMIC:
- if (sls && !insn->retpoline_safe &&
- next_insn && next_insn->type != INSN_TRAP) {
- WARN_FUNC("missing int3 after indirect jump",
- insn->sec, insn->offset);
- }
-
- /* fallthrough */
case INSN_JUMP_DYNAMIC_CONDITIONAL:
if (is_sibling_call(insn)) {
ret = validate_sibling_call(file, insn, &state);
@@ -3472,9 +3381,6 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
break;
}
- if (ibt)
- validate_ibt_insn(file, insn);
-
if (insn->dead_end)
return 0;
@@ -3501,7 +3407,7 @@ static int validate_unwind_hints(struct objtool_file *file, struct section *sec)
if (!file->hints)
return 0;
- init_insn_state(&state, sec);
+ init_insn_state(file, &state, sec);
if (sec) {
insn = find_insn(file, sec, 0);
@@ -3514,7 +3420,7 @@ static int validate_unwind_hints(struct objtool_file *file, struct section *sec)
while (&insn->list != &file->insn_list && (!sec || insn->sec == sec)) {
if (insn->hint && !insn->visited && !insn->ignore) {
ret = validate_branch(file, insn->func, insn, state);
- if (ret && backtrace)
+ if (ret && opts.backtrace)
BT_FUNC("<=== (hint)", insn);
warnings += ret;
}
@@ -3544,7 +3450,7 @@ static int validate_retpoline(struct objtool_file *file)
* loaded late, they very much do need retpoline in their
* .init.text
*/
- if (!strcmp(insn->sec->name, ".init.text") && !module)
+ if (!strcmp(insn->sec->name, ".init.text") && !opts.module)
continue;
WARN_FUNC("indirect %s found in RETPOLINE build",
@@ -3587,14 +3493,14 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio
return true;
/*
- * Whole archive runs might encounder dead code from weak symbols.
+ * Whole archive runs might encounter dead code from weak symbols.
* This is where the linker will have dropped the weak symbol in
* favour of a regular symbol, but leaves the code in place.
*
* In this case we'll find a piece of code (whole function) that is not
* covered by a !section symbol. Ignore them.
*/
- if (!insn->func && lto) {
+ if (opts.link && !insn->func) {
int size = find_symbol_hole_containing(insn->sec, insn->offset);
unsigned long end = insn->offset + size;
@@ -3701,7 +3607,7 @@ static int validate_symbol(struct objtool_file *file, struct section *sec,
state->uaccess = sym->uaccess_safe;
ret = validate_branch(file, insn->func, insn, *state);
- if (ret && backtrace)
+ if (ret && opts.backtrace)
BT_FUNC("<=== (sym)", insn);
return ret;
}
@@ -3716,7 +3622,7 @@ static int validate_section(struct objtool_file *file, struct section *sec)
if (func->type != STT_FUNC)
continue;
- init_insn_state(&state, sec);
+ init_insn_state(file, &state, sec);
set_func_state(&state.cfi);
warnings += validate_symbol(file, sec, func, &state);
@@ -3725,7 +3631,7 @@ static int validate_section(struct objtool_file *file, struct section *sec)
return warnings;
}
-static int validate_vmlinux_functions(struct objtool_file *file)
+static int validate_noinstr_sections(struct objtool_file *file)
{
struct section *sec;
int warnings = 0;
@@ -3760,51 +3666,208 @@ static int validate_functions(struct objtool_file *file)
return warnings;
}
+static void mark_endbr_used(struct instruction *insn)
+{
+ if (!list_empty(&insn->call_node))
+ list_del_init(&insn->call_node);
+}
+
+static int validate_ibt_insn(struct objtool_file *file, struct instruction *insn)
+{
+ struct instruction *dest;
+ struct reloc *reloc;
+ unsigned long off;
+ int warnings = 0;
+
+ /*
+ * Looking for function pointer load relocations. Ignore
+ * direct/indirect branches:
+ */
+ switch (insn->type) {
+ case INSN_CALL:
+ case INSN_CALL_DYNAMIC:
+ case INSN_JUMP_CONDITIONAL:
+ case INSN_JUMP_UNCONDITIONAL:
+ case INSN_JUMP_DYNAMIC:
+ case INSN_JUMP_DYNAMIC_CONDITIONAL:
+ case INSN_RETURN:
+ case INSN_NOP:
+ return 0;
+ default:
+ break;
+ }
+
+ for (reloc = insn_reloc(file, insn);
+ reloc;
+ reloc = find_reloc_by_dest_range(file->elf, insn->sec,
+ reloc->offset + 1,
+ (insn->offset + insn->len) - (reloc->offset + 1))) {
+
+ /*
+ * static_call_update() references the trampoline, which
+ * doesn't have (or need) ENDBR. Skip warning in that case.
+ */
+ if (reloc->sym->static_call_tramp)
+ continue;
+
+ off = reloc->sym->offset;
+ if (reloc->type == R_X86_64_PC32 || reloc->type == R_X86_64_PLT32)
+ off += arch_dest_reloc_offset(reloc->addend);
+ else
+ off += reloc->addend;
+
+ dest = find_insn(file, reloc->sym->sec, off);
+ if (!dest)
+ continue;
+
+ if (dest->type == INSN_ENDBR) {
+ mark_endbr_used(dest);
+ continue;
+ }
+
+ if (dest->func && dest->func == insn->func) {
+ /*
+ * Anything from->to self is either _THIS_IP_ or
+ * IRET-to-self.
+ *
+ * There is no sane way to annotate _THIS_IP_ since the
+ * compiler treats the relocation as a constant and is
+ * happy to fold in offsets, skewing any annotation we
+ * do, leading to vast amounts of false-positives.
+ *
+ * There's also compiler generated _THIS_IP_ through
+ * KCOV and such which we have no hope of annotating.
+ *
+ * As such, blanket accept self-references without
+ * issue.
+ */
+ continue;
+ }
+
+ if (dest->noendbr)
+ continue;
+
+ WARN_FUNC("relocation to !ENDBR: %s",
+ insn->sec, insn->offset,
+ offstr(dest->sec, dest->offset));
+
+ warnings++;
+ }
+
+ return warnings;
+}
+
+static int validate_ibt_data_reloc(struct objtool_file *file,
+ struct reloc *reloc)
+{
+ struct instruction *dest;
+
+ dest = find_insn(file, reloc->sym->sec,
+ reloc->sym->offset + reloc->addend);
+ if (!dest)
+ return 0;
+
+ if (dest->type == INSN_ENDBR) {
+ mark_endbr_used(dest);
+ return 0;
+ }
+
+ if (dest->noendbr)
+ return 0;
+
+ WARN_FUNC("data relocation to !ENDBR: %s",
+ reloc->sec->base, reloc->offset,
+ offstr(dest->sec, dest->offset));
+
+ return 1;
+}
+
+/*
+ * Validate IBT rules and remove used ENDBR instructions from the seal list.
+ * Unused ENDBR instructions will be annotated for sealing (i.e., replaced with
+ * NOPs) later, in create_ibt_endbr_seal_sections().
+ */
static int validate_ibt(struct objtool_file *file)
{
struct section *sec;
struct reloc *reloc;
+ struct instruction *insn;
+ int warnings = 0;
+
+ for_each_insn(file, insn)
+ warnings += validate_ibt_insn(file, insn);
for_each_sec(file, sec) {
- bool is_data;
- /* already done in validate_branch() */
+ /* Already done by validate_ibt_insn() */
if (sec->sh.sh_flags & SHF_EXECINSTR)
continue;
if (!sec->reloc)
continue;
- if (!strncmp(sec->name, ".orc", 4))
+ /*
+ * These sections can reference text addresses, but not with
+ * the intent to indirect branch to them.
+ */
+ if (!strncmp(sec->name, ".discard", 8) ||
+ !strncmp(sec->name, ".debug", 6) ||
+ !strcmp(sec->name, ".altinstructions") ||
+ !strcmp(sec->name, ".ibt_endbr_seal") ||
+ !strcmp(sec->name, ".orc_unwind_ip") ||
+ !strcmp(sec->name, ".parainstructions") ||
+ !strcmp(sec->name, ".retpoline_sites") ||
+ !strcmp(sec->name, ".smp_locks") ||
+ !strcmp(sec->name, ".static_call_sites") ||
+ !strcmp(sec->name, "_error_injection_whitelist") ||
+ !strcmp(sec->name, "_kprobe_blacklist") ||
+ !strcmp(sec->name, "__bug_table") ||
+ !strcmp(sec->name, "__ex_table") ||
+ !strcmp(sec->name, "__jump_table") ||
+ !strcmp(sec->name, "__mcount_loc") ||
+ !strcmp(sec->name, "__tracepoints"))
continue;
- if (!strncmp(sec->name, ".discard", 8))
- continue;
+ list_for_each_entry(reloc, &sec->reloc->reloc_list, list)
+ warnings += validate_ibt_data_reloc(file, reloc);
+ }
- if (!strncmp(sec->name, ".debug", 6))
- continue;
+ return warnings;
+}
- if (!strcmp(sec->name, "_error_injection_whitelist"))
- continue;
+static int validate_sls(struct objtool_file *file)
+{
+ struct instruction *insn, *next_insn;
+ int warnings = 0;
- if (!strcmp(sec->name, "_kprobe_blacklist"))
- continue;
+ for_each_insn(file, insn) {
+ next_insn = next_insn_same_sec(file, insn);
- is_data = strstr(sec->name, ".data") || strstr(sec->name, ".rodata");
+ if (insn->retpoline_safe)
+ continue;
- list_for_each_entry(reloc, &sec->reloc->reloc_list, list) {
- struct instruction *dest;
+ switch (insn->type) {
+ case INSN_RETURN:
+ if (!next_insn || next_insn->type != INSN_TRAP) {
+ WARN_FUNC("missing int3 after ret",
+ insn->sec, insn->offset);
+ warnings++;
+ }
- dest = validate_ibt_reloc(file, reloc);
- if (is_data && dest && !dest->noendbr) {
- warn_noendbr("data ", reloc->sym->sec,
- reloc->sym->offset + reloc->addend,
- dest);
+ break;
+ case INSN_JUMP_DYNAMIC:
+ if (!next_insn || next_insn->type != INSN_TRAP) {
+ WARN_FUNC("missing int3 after indirect jump",
+ insn->sec, insn->offset);
+ warnings++;
}
+ break;
+ default:
+ break;
}
}
- return 0;
+ return warnings;
}
static int validate_reachable_instructions(struct objtool_file *file)
@@ -3829,16 +3892,6 @@ int check(struct objtool_file *file)
{
int ret, warnings = 0;
- if (lto && !(vmlinux || module)) {
- fprintf(stderr, "--lto requires: --vmlinux or --module\n");
- return 1;
- }
-
- if (ibt && !lto) {
- fprintf(stderr, "--ibt requires: --lto\n");
- return 1;
- }
-
arch_initial_func_cfi_state(&initial_func_cfi);
init_cfi_state(&init_cfi);
init_cfi_state(&func_cfi);
@@ -3859,73 +3912,89 @@ int check(struct objtool_file *file)
if (list_empty(&file->insn_list))
goto out;
- if (vmlinux && !lto) {
- ret = validate_vmlinux_functions(file);
+ if (opts.retpoline) {
+ ret = validate_retpoline(file);
if (ret < 0)
- goto out;
-
+ return ret;
warnings += ret;
- goto out;
}
- if (retpoline) {
- ret = validate_retpoline(file);
+ if (opts.stackval || opts.orc || opts.uaccess) {
+ ret = validate_functions(file);
if (ret < 0)
- return ret;
+ goto out;
warnings += ret;
- }
- ret = validate_functions(file);
- if (ret < 0)
- goto out;
- warnings += ret;
+ ret = validate_unwind_hints(file, NULL);
+ if (ret < 0)
+ goto out;
+ warnings += ret;
- ret = validate_unwind_hints(file, NULL);
- if (ret < 0)
- goto out;
- warnings += ret;
+ if (!warnings) {
+ ret = validate_reachable_instructions(file);
+ if (ret < 0)
+ goto out;
+ warnings += ret;
+ }
- if (ibt) {
+ } else if (opts.noinstr) {
+ ret = validate_noinstr_sections(file);
+ if (ret < 0)
+ goto out;
+ warnings += ret;
+ }
+
+ if (opts.ibt) {
ret = validate_ibt(file);
if (ret < 0)
goto out;
warnings += ret;
}
- if (!warnings) {
- ret = validate_reachable_instructions(file);
+ if (opts.sls) {
+ ret = validate_sls(file);
if (ret < 0)
goto out;
warnings += ret;
}
- ret = create_static_call_sections(file);
- if (ret < 0)
- goto out;
- warnings += ret;
+ if (opts.static_call) {
+ ret = create_static_call_sections(file);
+ if (ret < 0)
+ goto out;
+ warnings += ret;
+ }
- if (retpoline) {
+ if (opts.retpoline) {
ret = create_retpoline_sites_sections(file);
if (ret < 0)
goto out;
warnings += ret;
}
- if (mcount) {
+ if (opts.mcount) {
ret = create_mcount_loc_sections(file);
if (ret < 0)
goto out;
warnings += ret;
}
- if (ibt) {
+ if (opts.ibt) {
ret = create_ibt_endbr_seal_sections(file);
if (ret < 0)
goto out;
warnings += ret;
}
- if (stats) {
+ if (opts.orc && !list_empty(&file->insn_list)) {
+ ret = orc_create(file);
+ if (ret < 0)
+ goto out;
+ warnings += ret;
+ }
+
+
+ if (opts.stats) {
printf("nr_insns_visited: %ld\n", nr_insns_visited);
printf("nr_cfi: %ld\n", nr_cfi);
printf("nr_cfi_reused: %ld\n", nr_cfi_reused);
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index d7b99a737496..c25e957c1e52 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -355,7 +355,7 @@ static int read_sections(struct elf *elf)
elf_hash_add(section_name, &sec->name_hash, str_hash(sec->name));
}
- if (stats) {
+ if (opts.stats) {
printf("nr_sections: %lu\n", (unsigned long)sections_nr);
printf("section_bits: %d\n", elf->section_bits);
}
@@ -374,9 +374,15 @@ static void elf_add_symbol(struct elf *elf, struct symbol *sym)
struct list_head *entry;
struct rb_node *pnode;
+ INIT_LIST_HEAD(&sym->pv_target);
+ sym->alias = sym;
+
sym->type = GELF_ST_TYPE(sym->sym.st_info);
sym->bind = GELF_ST_BIND(sym->sym.st_info);
+ if (sym->type == STT_FILE)
+ elf->num_files++;
+
sym->offset = sym->sym.st_value;
sym->len = sym->sym.st_size;
@@ -435,8 +441,6 @@ static int read_symbols(struct elf *elf)
return -1;
}
memset(sym, 0, sizeof(*sym));
- INIT_LIST_HEAD(&sym->pv_target);
- sym->alias = sym;
sym->idx = i;
@@ -475,7 +479,7 @@ static int read_symbols(struct elf *elf)
elf_add_symbol(elf, sym);
}
- if (stats) {
+ if (opts.stats) {
printf("nr_symbols: %lu\n", (unsigned long)symbols_nr);
printf("symbol_bits: %d\n", elf->symbol_bits);
}
@@ -546,7 +550,7 @@ static struct section *elf_create_reloc_section(struct elf *elf,
int reltype);
int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
- unsigned int type, struct symbol *sym, int addend)
+ unsigned int type, struct symbol *sym, s64 addend)
{
struct reloc *reloc;
@@ -575,37 +579,239 @@ int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
return 0;
}
+/*
+ * Ensure that any reloc section containing references to @sym is marked
+ * changed such that it will get re-generated in elf_rebuild_reloc_sections()
+ * with the new symbol index.
+ */
+static void elf_dirty_reloc_sym(struct elf *elf, struct symbol *sym)
+{
+ struct section *sec;
+
+ list_for_each_entry(sec, &elf->sections, list) {
+ struct reloc *reloc;
+
+ if (sec->changed)
+ continue;
+
+ list_for_each_entry(reloc, &sec->reloc_list, list) {
+ if (reloc->sym == sym) {
+ sec->changed = true;
+ break;
+ }
+ }
+ }
+}
+
+/*
+ * The libelf API is terrible; gelf_update_sym*() takes a data block relative
+ * index value, *NOT* the symbol index. As such, iterate the data blocks and
+ * adjust index until it fits.
+ *
+ * If no data block is found, allow adding a new data block provided the index
+ * is only one past the end.
+ */
+static int elf_update_symbol(struct elf *elf, struct section *symtab,
+ struct section *symtab_shndx, struct symbol *sym)
+{
+ Elf32_Word shndx = sym->sec ? sym->sec->idx : SHN_UNDEF;
+ Elf_Data *symtab_data = NULL, *shndx_data = NULL;
+ Elf64_Xword entsize = symtab->sh.sh_entsize;
+ int max_idx, idx = sym->idx;
+ Elf_Scn *s, *t = NULL;
+
+ s = elf_getscn(elf->elf, symtab->idx);
+ if (!s) {
+ WARN_ELF("elf_getscn");
+ return -1;
+ }
+
+ if (symtab_shndx) {
+ t = elf_getscn(elf->elf, symtab_shndx->idx);
+ if (!t) {
+ WARN_ELF("elf_getscn");
+ return -1;
+ }
+ }
+
+ for (;;) {
+ /* get next data descriptor for the relevant sections */
+ symtab_data = elf_getdata(s, symtab_data);
+ if (t)
+ shndx_data = elf_getdata(t, shndx_data);
+
+ /* end-of-list */
+ if (!symtab_data) {
+ void *buf;
+
+ if (idx) {
+ /* we don't do holes in symbol tables */
+ WARN("index out of range");
+ return -1;
+ }
+
+ /* if @idx == 0, it's the next contiguous entry, create it */
+ symtab_data = elf_newdata(s);
+ if (t)
+ shndx_data = elf_newdata(t);
+
+ buf = calloc(1, entsize);
+ if (!buf) {
+ WARN("malloc");
+ return -1;
+ }
+
+ symtab_data->d_buf = buf;
+ symtab_data->d_size = entsize;
+ symtab_data->d_align = 1;
+ symtab_data->d_type = ELF_T_SYM;
+
+ symtab->sh.sh_size += entsize;
+ symtab->changed = true;
+
+ if (t) {
+ shndx_data->d_buf = &sym->sec->idx;
+ shndx_data->d_size = sizeof(Elf32_Word);
+ shndx_data->d_align = sizeof(Elf32_Word);
+ shndx_data->d_type = ELF_T_WORD;
+
+ symtab_shndx->sh.sh_size += sizeof(Elf32_Word);
+ symtab_shndx->changed = true;
+ }
+
+ break;
+ }
+
+ /* empty blocks should not happen */
+ if (!symtab_data->d_size) {
+ WARN("zero size data");
+ return -1;
+ }
+
+ /* is this the right block? */
+ max_idx = symtab_data->d_size / entsize;
+ if (idx < max_idx)
+ break;
+
+ /* adjust index and try again */
+ idx -= max_idx;
+ }
+
+ /* something went side-ways */
+ if (idx < 0) {
+ WARN("negative index");
+ return -1;
+ }
+
+ /* setup extended section index magic and write the symbol */
+ if (shndx >= SHN_UNDEF && shndx < SHN_LORESERVE) {
+ sym->sym.st_shndx = shndx;
+ if (!shndx_data)
+ shndx = 0;
+ } else {
+ sym->sym.st_shndx = SHN_XINDEX;
+ if (!shndx_data) {
+ WARN("no .symtab_shndx");
+ return -1;
+ }
+ }
+
+ if (!gelf_update_symshndx(symtab_data, shndx_data, idx, &sym->sym, shndx)) {
+ WARN_ELF("gelf_update_symshndx");
+ return -1;
+ }
+
+ return 0;
+}
+
+static struct symbol *
+elf_create_section_symbol(struct elf *elf, struct section *sec)
+{
+ struct section *symtab, *symtab_shndx;
+ Elf32_Word first_non_local, new_idx;
+ struct symbol *sym, *old;
+
+ symtab = find_section_by_name(elf, ".symtab");
+ if (symtab) {
+ symtab_shndx = find_section_by_name(elf, ".symtab_shndx");
+ } else {
+ WARN("no .symtab");
+ return NULL;
+ }
+
+ sym = calloc(1, sizeof(*sym));
+ if (!sym) {
+ perror("malloc");
+ return NULL;
+ }
+
+ sym->name = sec->name;
+ sym->sec = sec;
+
+ // st_name 0
+ sym->sym.st_info = GELF_ST_INFO(STB_LOCAL, STT_SECTION);
+ // st_other 0
+ // st_value 0
+ // st_size 0
+
+ /*
+ * Move the first global symbol, as per sh_info, into a new, higher
+ * symbol index. This fees up a spot for a new local symbol.
+ */
+ first_non_local = symtab->sh.sh_info;
+ new_idx = symtab->sh.sh_size / symtab->sh.sh_entsize;
+ old = find_symbol_by_index(elf, first_non_local);
+ if (old) {
+ old->idx = new_idx;
+
+ hlist_del(&old->hash);
+ elf_hash_add(symbol, &old->hash, old->idx);
+
+ elf_dirty_reloc_sym(elf, old);
+
+ if (elf_update_symbol(elf, symtab, symtab_shndx, old)) {
+ WARN("elf_update_symbol move");
+ return NULL;
+ }
+
+ new_idx = first_non_local;
+ }
+
+ sym->idx = new_idx;
+ if (elf_update_symbol(elf, symtab, symtab_shndx, sym)) {
+ WARN("elf_update_symbol");
+ return NULL;
+ }
+
+ /*
+ * Either way, we added a LOCAL symbol.
+ */
+ symtab->sh.sh_info += 1;
+
+ elf_add_symbol(elf, sym);
+
+ return sym;
+}
+
int elf_add_reloc_to_insn(struct elf *elf, struct section *sec,
unsigned long offset, unsigned int type,
struct section *insn_sec, unsigned long insn_off)
{
- struct symbol *sym;
- int addend;
-
- if (insn_sec->sym) {
- sym = insn_sec->sym;
- addend = insn_off;
+ struct symbol *sym = insn_sec->sym;
+ int addend = insn_off;
- } else {
+ if (!sym) {
/*
- * The Clang assembler strips section symbols, so we have to
- * reference the function symbol instead:
+ * Due to how weak functions work, we must use section based
+ * relocations. Symbol based relocations would result in the
+ * weak and non-weak function annotations being overlaid on the
+ * non-weak function after linking.
*/
- sym = find_symbol_containing(insn_sec, insn_off);
- if (!sym) {
- /*
- * Hack alert. This happens when we need to reference
- * the NOP pad insn immediately after the function.
- */
- sym = find_symbol_containing(insn_sec, insn_off - 1);
- }
-
- if (!sym) {
- WARN("can't find symbol containing %s+0x%lx", insn_sec->name, insn_off);
+ sym = elf_create_section_symbol(elf, insn_sec);
+ if (!sym)
return -1;
- }
- addend = insn_off - sym->offset;
+ insn_sec->sym = sym;
}
return elf_add_reloc(elf, sec, offset, type, sym, addend);
@@ -700,7 +906,7 @@ static int read_relocs(struct elf *elf)
tot_reloc += nr_reloc;
}
- if (stats) {
+ if (opts.stats) {
printf("max_reloc: %lu\n", max_reloc);
printf("tot_reloc: %lu\n", tot_reloc);
printf("reloc_bits: %d\n", elf->reloc_bits);
@@ -1079,7 +1285,7 @@ int elf_write(struct elf *elf)
struct section *sec;
Elf_Scn *s;
- if (dryrun)
+ if (opts.dryrun)
return 0;
/* Update changed relocation sections and section headers: */
diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h
index c39dbfaef6dc..280ea18b7f2b 100644
--- a/tools/objtool/include/objtool/builtin.h
+++ b/tools/objtool/include/objtool/builtin.h
@@ -8,13 +8,37 @@
#include <subcmd/parse-options.h>
extern const struct option check_options[];
-extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats,
- lto, vmlinux, mcount, noinstr, backup, sls, dryrun,
- ibt;
+
+struct opts {
+ /* actions: */
+ bool dump_orc;
+ bool hack_jump_label;
+ bool hack_noinstr;
+ bool ibt;
+ bool mcount;
+ bool noinstr;
+ bool orc;
+ bool retpoline;
+ bool sls;
+ bool stackval;
+ bool static_call;
+ bool uaccess;
+
+ /* options: */
+ bool backtrace;
+ bool backup;
+ bool dryrun;
+ bool link;
+ bool module;
+ bool no_unreachable;
+ bool sec_address;
+ bool stats;
+};
+
+extern struct opts opts;
extern int cmd_parse_options(int argc, const char **argv, const char * const usage[]);
-extern int cmd_check(int argc, const char **argv);
-extern int cmd_orc(int argc, const char **argv);
+extern int objtool_run(int argc, const char **argv);
#endif /* _BUILTIN_H */
diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h
index 22ba7e2b816e..adebfbc2b518 100644
--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -73,7 +73,7 @@ struct reloc {
struct symbol *sym;
unsigned long offset;
unsigned int type;
- int addend;
+ s64 addend;
int idx;
bool jump_table_start;
};
@@ -86,7 +86,7 @@ struct elf {
int fd;
bool changed;
char *name;
- unsigned int text_size;
+ unsigned int text_size, num_files;
struct list_head sections;
int symbol_bits;
@@ -131,11 +131,21 @@ static inline u32 reloc_hash(struct reloc *reloc)
return sec_offset_hash(reloc->sec, reloc->offset);
}
+/*
+ * Try to see if it's a whole archive (vmlinux.o or module).
+ *
+ * Note this will miss the case where a module only has one source file.
+ */
+static inline bool has_multiple_files(struct elf *elf)
+{
+ return elf->num_files > 1;
+}
+
struct elf *elf_open_read(const char *name, int flags);
struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr);
int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
- unsigned int type, struct symbol *sym, int addend);
+ unsigned int type, struct symbol *sym, s64 addend);
int elf_add_reloc_to_insn(struct elf *elf, struct section *sec,
unsigned long offset, unsigned int type,
struct section *insn_sec, unsigned long insn_off);
diff --git a/tools/objtool/include/objtool/objtool.h b/tools/objtool/include/objtool/objtool.h
index 7a5c13a78f87..a6e72d916807 100644
--- a/tools/objtool/include/objtool/objtool.h
+++ b/tools/objtool/include/objtool/objtool.h
@@ -27,7 +27,7 @@ struct objtool_file {
struct list_head static_call_list;
struct list_head mcount_loc_list;
struct list_head endbr_list;
- bool ignore_unreachables, c_file, hints, rodata;
+ bool ignore_unreachables, hints, rodata;
unsigned int nr_endbr;
unsigned int nr_endbr_int;
diff --git a/tools/objtool/include/objtool/warn.h b/tools/objtool/include/objtool/warn.h
index 802cfda0a6f6..a3e79ae75f2e 100644
--- a/tools/objtool/include/objtool/warn.h
+++ b/tools/objtool/include/objtool/warn.h
@@ -11,34 +11,33 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
+#include <objtool/builtin.h>
#include <objtool/elf.h>
extern const char *objname;
static inline char *offstr(struct section *sec, unsigned long offset)
{
- struct symbol *func;
- char *name, *str;
- unsigned long name_off;
+ bool is_text = (sec->sh.sh_flags & SHF_EXECINSTR);
+ struct symbol *sym = NULL;
+ char *str;
+ int len;
- func = find_func_containing(sec, offset);
- if (!func)
- func = find_symbol_containing(sec, offset);
- if (func) {
- name = func->name;
- name_off = offset - func->offset;
+ if (is_text)
+ sym = find_func_containing(sec, offset);
+ if (!sym)
+ sym = find_symbol_containing(sec, offset);
+
+ if (sym) {
+ str = malloc(strlen(sym->name) + strlen(sec->name) + 40);
+ len = sprintf(str, "%s+0x%lx", sym->name, offset - sym->offset);
+ if (opts.sec_address)
+ sprintf(str+len, " (%s+0x%lx)", sec->name, offset);
} else {
- name = sec->name;
- name_off = offset;
+ str = malloc(strlen(sec->name) + 20);
+ sprintf(str, "%s+0x%lx", sec->name, offset);
}
- str = malloc(strlen(name) + 20);
-
- if (func)
- sprintf(str, "%s()+0x%lx", name, name_off);
- else
- sprintf(str, "%s+0x%lx", name, name_off);
-
return str;
}
diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c
index b09946f4e1d6..512669ce064c 100644
--- a/tools/objtool/objtool.c
+++ b/tools/objtool/objtool.c
@@ -3,16 +3,6 @@
* Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com>
*/
-/*
- * objtool:
- *
- * The 'check' subcmd analyzes every .o file and ensures the validity of its
- * stack trace metadata. It enforces a set of rules on asm code and C inline
- * assembly code so that stack traces can be reliable.
- *
- * For more information, see tools/objtool/Documentation/stack-validation.txt.
- */
-
#include <stdio.h>
#include <stdbool.h>
#include <string.h>
@@ -26,20 +16,6 @@
#include <objtool/objtool.h>
#include <objtool/warn.h>
-struct cmd_struct {
- const char *name;
- int (*fn)(int, const char **);
- const char *help;
-};
-
-static const char objtool_usage_string[] =
- "objtool COMMAND [ARGS]";
-
-static struct cmd_struct objtool_cmds[] = {
- {"check", cmd_check, "Perform stack metadata validation on an object file" },
- {"orc", cmd_orc, "Generate in-place ORC unwind tables for an object file" },
-};
-
bool help;
const char *objname;
@@ -118,7 +94,7 @@ struct objtool_file *objtool_open_read(const char *_objname)
if (!file.elf)
return NULL;
- if (backup && !objtool_create_backup(objname)) {
+ if (opts.backup && !objtool_create_backup(objname)) {
WARN("can't create backup file");
return NULL;
}
@@ -129,8 +105,7 @@ struct objtool_file *objtool_open_read(const char *_objname)
INIT_LIST_HEAD(&file.static_call_list);
INIT_LIST_HEAD(&file.mcount_loc_list);
INIT_LIST_HEAD(&file.endbr_list);
- file.c_file = !vmlinux && find_section_by_name(file.elf, ".comment");
- file.ignore_unreachables = no_unreachable;
+ file.ignore_unreachables = opts.no_unreachable;
file.hints = false;
return &file;
@@ -138,7 +113,7 @@ struct objtool_file *objtool_open_read(const char *_objname)
void objtool_pv_add(struct objtool_file *f, int idx, struct symbol *func)
{
- if (!noinstr)
+ if (!opts.noinstr)
return;
if (!f->pv_ops) {
@@ -162,70 +137,6 @@ void objtool_pv_add(struct objtool_file *f, int idx, struct symbol *func)
f->pv_ops[idx].clean = false;
}
-static void cmd_usage(void)
-{
- unsigned int i, longest = 0;
-
- printf("\n usage: %s\n\n", objtool_usage_string);
-
- for (i = 0; i < ARRAY_SIZE(objtool_cmds); i++) {
- if (longest < strlen(objtool_cmds[i].name))
- longest = strlen(objtool_cmds[i].name);
- }
-
- puts(" Commands:");
- for (i = 0; i < ARRAY_SIZE(objtool_cmds); i++) {
- printf(" %-*s ", longest, objtool_cmds[i].name);
- puts(objtool_cmds[i].help);
- }
-
- printf("\n");
-
- if (!help)
- exit(129);
- exit(0);
-}
-
-static void handle_options(int *argc, const char ***argv)
-{
- while (*argc > 0) {
- const char *cmd = (*argv)[0];
-
- if (cmd[0] != '-')
- break;
-
- if (!strcmp(cmd, "--help") || !strcmp(cmd, "-h")) {
- help = true;
- break;
- } else {
- fprintf(stderr, "Unknown option: %s\n", cmd);
- cmd_usage();
- }
-
- (*argv)++;
- (*argc)--;
- }
-}
-
-static void handle_internal_command(int argc, const char **argv)
-{
- const char *cmd = argv[0];
- unsigned int i, ret;
-
- for (i = 0; i < ARRAY_SIZE(objtool_cmds); i++) {
- struct cmd_struct *p = objtool_cmds+i;
-
- if (strcmp(p->name, cmd))
- continue;
-
- ret = p->fn(argc, argv);
-
- exit(ret);
- }
-
- cmd_usage();
-}
-
int main(int argc, const char **argv)
{
static const char *UNUSED = "OBJTOOL_NOT_IMPLEMENTED";
@@ -234,14 +145,7 @@ int main(int argc, const char **argv)
exec_cmd_init("objtool", UNUSED, UNUSED, UNUSED);
pager_init(UNUSED);
- argv++;
- argc--;
- handle_options(&argc, &argv);
-
- if (!argc || help)
- cmd_usage();
-
- handle_internal_command(argc, argv);
+ objtool_run(argc, argv);
return 0;
}
diff --git a/tools/objtool/weak.c b/tools/objtool/weak.c
index 8314e824db4a..d83f607733b0 100644
--- a/tools/objtool/weak.c
+++ b/tools/objtool/weak.c
@@ -15,17 +15,12 @@
return ENOSYS; \
})
-int __weak check(struct objtool_file *file)
-{
- UNSUPPORTED("check subcommand");
-}
-
int __weak orc_dump(const char *_objname)
{
- UNSUPPORTED("orc");
+ UNSUPPORTED("ORC");
}
int __weak orc_create(struct objtool_file *file)
{
- UNSUPPORTED("orc");
+ UNSUPPORTED("ORC");
}
diff --git a/tools/perf/Documentation/perf.txt b/tools/perf/Documentation/perf.txt
index 9c330cdfa973..71ebdf8125de 100644
--- a/tools/perf/Documentation/perf.txt
+++ b/tools/perf/Documentation/perf.txt
@@ -83,7 +83,7 @@ linkperf:perf-buildid-list[1], linkperf:perf-c2c[1],
linkperf:perf-config[1], linkperf:perf-data[1], linkperf:perf-diff[1],
linkperf:perf-evlist[1], linkperf:perf-ftrace[1],
linkperf:perf-help[1], linkperf:perf-inject[1],
-linkperf:perf-intel-pt[1], linkperf:perf-kallsyms[1],
+linkperf:perf-intel-pt[1], linkperf:perf-iostat[1], linkperf:perf-kallsyms[1],
linkperf:perf-kmem[1], linkperf:perf-kvm[1], linkperf:perf-lock[1],
linkperf:perf-mem[1], linkperf:perf-probe[1], linkperf:perf-sched[1],
linkperf:perf-script[1], linkperf:perf-test[1],
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 96ad944ca6a8..1bd64e7404b9 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -272,6 +272,9 @@ ifdef PYTHON_CONFIG
PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil
PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --includes 2>/dev/null)
FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
+ ifeq ($(CC_NO_CLANG), 0)
+ PYTHON_EMBED_CCOPTS := $(filter-out -ffat-lto-objects, $(PYTHON_EMBED_CCOPTS))
+ endif
endif
FEATURE_CHECK_CFLAGS-libpython := $(PYTHON_EMBED_CCOPTS)
@@ -550,9 +553,16 @@ ifndef NO_LIBELF
ifeq ($(feature-libbpf), 1)
EXTLIBS += -lbpf
$(call detected,CONFIG_LIBBPF_DYNAMIC)
+
+ $(call feature_check,libbpf-btf__load_from_kernel_by_id)
+ ifeq ($(feature-libbpf-btf__load_from_kernel_by_id), 1)
+ CFLAGS += -DHAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID
+ endif
else
dummy := $(error Error: No libbpf devel library found, please install libbpf-devel);
endif
+ else
+ CFLAGS += -DHAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID
endif
endif
@@ -790,6 +800,9 @@ else
LDFLAGS += $(PERL_EMBED_LDFLAGS)
EXTLIBS += $(PERL_EMBED_LIBADD)
CFLAGS += -DHAVE_LIBPERL_SUPPORT
+ ifeq ($(CC_NO_CLANG), 0)
+ CFLAGS += -Wno-compound-token-split-by-macro
+ endif
$(call detected,CONFIG_LIBPERL)
endif
endif
diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
index 86e2e926aa0e..e8b577d33e53 100644
--- a/tools/perf/arch/arm64/util/arm-spe.c
+++ b/tools/perf/arch/arm64/util/arm-spe.c
@@ -148,6 +148,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
bool privileged = perf_event_paranoid_check(-1);
struct evsel *tracking_evsel;
int err;
+ u64 bit;
sper->evlist = evlist;
@@ -239,6 +240,21 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
arm_spe_set_timestamp(itr, arm_spe_evsel);
}
+ /*
+ * Set this only so that perf report knows that SPE generates memory info. It has no effect
+ * on the opening of the event or the SPE data produced.
+ */
+ evsel__set_sample_bit(arm_spe_evsel, DATA_SRC);
+
+ /*
+ * The PHYS_ADDR flag does not affect the driver behaviour, it is used to
+ * inform that the resulting output's SPE samples contain physical addresses
+ * where applicable.
+ */
+ bit = perf_pmu__format_bits(&arm_spe_pmu->format, "pa_enable");
+ if (arm_spe_evsel->core.attr.config & bit)
+ evsel__set_sample_bit(arm_spe_evsel, PHYS_ADDR);
+
/* Add dummy event to keep tracking */
err = parse_events(evlist, "dummy:u", NULL);
if (err)
diff --git a/tools/perf/arch/arm64/util/machine.c b/tools/perf/arch/arm64/util/machine.c
index d2ce31e28cd7..41c1596e5207 100644
--- a/tools/perf/arch/arm64/util/machine.c
+++ b/tools/perf/arch/arm64/util/machine.c
@@ -8,27 +8,6 @@
#include "callchain.h"
#include "record.h"
-/* On arm64, kernel text segment starts at high memory address,
- * for example 0xffff 0000 8xxx xxxx. Modules start at a low memory
- * address, like 0xffff 0000 00ax xxxx. When only small amount of
- * memory is used by modules, gap between end of module's text segment
- * and start of kernel text segment may reach 2G.
- * Therefore do not fill this gap and do not assign it to the kernel dso map.
- */
-
-#define SYMBOL_LIMIT (1 << 12) /* 4K */
-
-void arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
-{
- if ((strchr(p->name, '[') && strchr(c->name, '[') == NULL) ||
- (strchr(p->name, '[') == NULL && strchr(c->name, '[')))
- /* Limit range of last symbol in module and kernel */
- p->end += SYMBOL_LIMIT;
- else
- p->end = c->start;
- pr_debug4("%s sym:%s end:%#" PRIx64 "\n", __func__, p->name, p->end);
-}
-
void arch__add_leaf_frame_record_opts(struct record_opts *opts)
{
opts->sample_user_regs |= sample_reg_masks[PERF_REG_ARM64_LR].mask;
diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build
index 8a79c4126e5b..0115f3166568 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build
@@ -1,5 +1,4 @@
perf-y += header.o
-perf-y += machine.o
perf-y += kvm-stat.o
perf-y += perf_regs.o
perf-y += mem-events.o
diff --git a/tools/perf/arch/powerpc/util/machine.c b/tools/perf/arch/powerpc/util/machine.c
deleted file mode 100644
index e652a1aa8132..000000000000
--- a/tools/perf/arch/powerpc/util/machine.c
+++ /dev/null
@@ -1,25 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-#include <inttypes.h>
-#include <stdio.h>
-#include <string.h>
-#include <internal/lib.h> // page_size
-#include "debug.h"
-#include "symbol.h"
-
-/* On powerpc kernel text segment start at memory addresses, 0xc000000000000000
- * whereas the modules are located at very high memory addresses,
- * for example 0xc00800000xxxxxxx. The gap between end of kernel text segment
- * and beginning of first module's text segment is very high.
- * Therefore do not fill this gap and do not assign it to the kernel dso map.
- */
-
-void arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
-{
- if (strchr(p->name, '[') == NULL && strchr(c->name, '['))
- /* Limit the range of last kernel symbol */
- p->end += page_size;
- else
- p->end = c->start;
- pr_debug4("%s sym:%s end:%#" PRIx64 "\n", __func__, p->name, p->end);
-}
diff --git a/tools/perf/arch/s390/util/machine.c b/tools/perf/arch/s390/util/machine.c
index 7644a4f6d4a4..98bc3f39d5f3 100644
--- a/tools/perf/arch/s390/util/machine.c
+++ b/tools/perf/arch/s390/util/machine.c
@@ -35,19 +35,3 @@ int arch__fix_module_text_start(u64 *start, u64 *size, const char *name)
return 0;
}
-
-/* On s390 kernel text segment start is located at very low memory addresses,
- * for example 0x10000. Modules are located at very high memory addresses,
- * for example 0x3ff xxxx xxxx. The gap between end of kernel text segment
- * and beginning of first module's text segment is very big.
- * Therefore do not fill this gap and do not assign it to the kernel dso map.
- */
-void arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
-{
- if (strchr(p->name, '[') == NULL && strchr(c->name, '['))
- /* Last kernel symbol mapped to end of page */
- p->end = roundup(p->end, page_size);
- else
- p->end = c->start;
- pr_debug4("%s sym:%s end:%#" PRIx64 "\n", __func__, p->name, p->end);
-}
diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c
index 207c56805c55..0ed177991ad0 100644
--- a/tools/perf/arch/x86/util/perf_regs.c
+++ b/tools/perf/arch/x86/util/perf_regs.c
@@ -9,6 +9,8 @@
#include "../../../util/perf_regs.h"
#include "../../../util/debug.h"
#include "../../../util/event.h"
+#include "../../../util/pmu.h"
+#include "../../../util/pmu-hybrid.h"
const struct sample_reg sample_reg_masks[] = {
SMPL_REG(AX, PERF_REG_X86_AX),
@@ -284,12 +286,22 @@ uint64_t arch__intr_reg_mask(void)
.disabled = 1,
.exclude_kernel = 1,
};
+ struct perf_pmu *pmu;
int fd;
/*
* In an unnamed union, init it here to build on older gcc versions
*/
attr.sample_period = 1;
+ if (perf_pmu__has_hybrid()) {
+ /*
+ * The same register set is supported among different hybrid PMUs.
+ * Only check the first available one.
+ */
+ pmu = list_first_entry(&perf_pmu__hybrid_pmus, typeof(*pmu), hybrid_list);
+ attr.config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT;
+ }
+
event_attr_init(&attr);
fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c
index 134612bde0cb..4256dc5d6236 100644
--- a/tools/perf/bench/epoll-ctl.c
+++ b/tools/perf/bench/epoll-ctl.c
@@ -222,13 +222,20 @@ static void init_fdmaps(struct worker *w, int pct)
static int do_threads(struct worker *worker, struct perf_cpu_map *cpu)
{
pthread_attr_t thread_attr, *attrp = NULL;
- cpu_set_t cpuset;
+ cpu_set_t *cpuset;
unsigned int i, j;
int ret = 0;
+ int nrcpus;
+ size_t size;
if (!noaffinity)
pthread_attr_init(&thread_attr);
+ nrcpus = perf_cpu_map__nr(cpu);
+ cpuset = CPU_ALLOC(nrcpus);
+ BUG_ON(!cpuset);
+ size = CPU_ALLOC_SIZE(nrcpus);
+
for (i = 0; i < nthreads; i++) {
struct worker *w = &worker[i];
@@ -252,22 +259,28 @@ static int do_threads(struct worker *worker, struct perf_cpu_map *cpu)
init_fdmaps(w, 50);
if (!noaffinity) {
- CPU_ZERO(&cpuset);
- CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
+ CPU_ZERO_S(size, cpuset);
+ CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu,
+ size, cpuset);
- ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset);
- if (ret)
+ ret = pthread_attr_setaffinity_np(&thread_attr, size, cpuset);
+ if (ret) {
+ CPU_FREE(cpuset);
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+ }
attrp = &thread_attr;
}
ret = pthread_create(&w->thread, attrp, workerfn,
(void *)(struct worker *) w);
- if (ret)
+ if (ret) {
+ CPU_FREE(cpuset);
err(EXIT_FAILURE, "pthread_create");
+ }
}
+ CPU_FREE(cpuset);
if (!noaffinity)
pthread_attr_destroy(&thread_attr);
diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c
index 37de970c9743..2728b0140853 100644
--- a/tools/perf/bench/epoll-wait.c
+++ b/tools/perf/bench/epoll-wait.c
@@ -291,9 +291,11 @@ static void print_summary(void)
static int do_threads(struct worker *worker, struct perf_cpu_map *cpu)
{
pthread_attr_t thread_attr, *attrp = NULL;
- cpu_set_t cpuset;
+ cpu_set_t *cpuset;
unsigned int i, j;
int ret = 0, events = EPOLLIN;
+ int nrcpus;
+ size_t size;
if (oneshot)
events |= EPOLLONESHOT;
@@ -306,6 +308,11 @@ static int do_threads(struct worker *worker, struct perf_cpu_map *cpu)
if (!noaffinity)
pthread_attr_init(&thread_attr);
+ nrcpus = perf_cpu_map__nr(cpu);
+ cpuset = CPU_ALLOC(nrcpus);
+ BUG_ON(!cpuset);
+ size = CPU_ALLOC_SIZE(nrcpus);
+
for (i = 0; i < nthreads; i++) {
struct worker *w = &worker[i];
@@ -341,22 +348,28 @@ static int do_threads(struct worker *worker, struct perf_cpu_map *cpu)
}
if (!noaffinity) {
- CPU_ZERO(&cpuset);
- CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
+ CPU_ZERO_S(size, cpuset);
+ CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu,
+ size, cpuset);
- ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset);
- if (ret)
+ ret = pthread_attr_setaffinity_np(&thread_attr, size, cpuset);
+ if (ret) {
+ CPU_FREE(cpuset);
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+ }
attrp = &thread_attr;
}
ret = pthread_create(&w->thread, attrp, workerfn,
(void *)(struct worker *) w);
- if (ret)
+ if (ret) {
+ CPU_FREE(cpuset);
err(EXIT_FAILURE, "pthread_create");
+ }
}
+ CPU_FREE(cpuset);
if (!noaffinity)
pthread_attr_destroy(&thread_attr);
diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
index dbcecec4eeda..f05db4cf983d 100644
--- a/tools/perf/bench/futex-hash.c
+++ b/tools/perf/bench/futex-hash.c
@@ -122,12 +122,14 @@ static void print_summary(void)
int bench_futex_hash(int argc, const char **argv)
{
int ret = 0;
- cpu_set_t cpuset;
+ cpu_set_t *cpuset;
struct sigaction act;
unsigned int i;
pthread_attr_t thread_attr;
struct worker *worker = NULL;
struct perf_cpu_map *cpu;
+ int nrcpus;
+ size_t size;
argc = parse_options(argc, argv, options, bench_futex_hash_usage, 0);
if (argc) {
@@ -170,25 +172,35 @@ int bench_futex_hash(int argc, const char **argv)
threads_starting = params.nthreads;
pthread_attr_init(&thread_attr);
gettimeofday(&bench__start, NULL);
+
+ nrcpus = perf_cpu_map__nr(cpu);
+ cpuset = CPU_ALLOC(nrcpus);
+ BUG_ON(!cpuset);
+ size = CPU_ALLOC_SIZE(nrcpus);
+
for (i = 0; i < params.nthreads; i++) {
worker[i].tid = i;
worker[i].futex = calloc(params.nfutexes, sizeof(*worker[i].futex));
if (!worker[i].futex)
goto errmem;
- CPU_ZERO(&cpuset);
- CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
+ CPU_ZERO_S(size, cpuset);
- ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset);
- if (ret)
+ CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset);
+ ret = pthread_attr_setaffinity_np(&thread_attr, size, cpuset);
+ if (ret) {
+ CPU_FREE(cpuset);
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
-
+ }
ret = pthread_create(&worker[i].thread, &thread_attr, workerfn,
(void *)(struct worker *) &worker[i]);
- if (ret)
+ if (ret) {
+ CPU_FREE(cpuset);
err(EXIT_FAILURE, "pthread_create");
+ }
}
+ CPU_FREE(cpuset);
pthread_attr_destroy(&thread_attr);
pthread_mutex_lock(&thread_lock);
diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
index 6fc9a3d55c1f..0abb3f7ee24f 100644
--- a/tools/perf/bench/futex-lock-pi.c
+++ b/tools/perf/bench/futex-lock-pi.c
@@ -120,11 +120,17 @@ static void *workerfn(void *arg)
static void create_threads(struct worker *w, pthread_attr_t thread_attr,
struct perf_cpu_map *cpu)
{
- cpu_set_t cpuset;
+ cpu_set_t *cpuset;
unsigned int i;
+ int nrcpus = perf_cpu_map__nr(cpu);
+ size_t size;
threads_starting = params.nthreads;
+ cpuset = CPU_ALLOC(nrcpus);
+ BUG_ON(!cpuset);
+ size = CPU_ALLOC_SIZE(nrcpus);
+
for (i = 0; i < params.nthreads; i++) {
worker[i].tid = i;
@@ -135,15 +141,20 @@ static void create_threads(struct worker *w, pthread_attr_t thread_attr,
} else
worker[i].futex = &global_futex;
- CPU_ZERO(&cpuset);
- CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
+ CPU_ZERO_S(size, cpuset);
+ CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset);
- if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
+ if (pthread_attr_setaffinity_np(&thread_attr, size, cpuset)) {
+ CPU_FREE(cpuset);
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+ }
- if (pthread_create(&w[i].thread, &thread_attr, workerfn, &worker[i]))
+ if (pthread_create(&w[i].thread, &thread_attr, workerfn, &worker[i])) {
+ CPU_FREE(cpuset);
err(EXIT_FAILURE, "pthread_create");
+ }
}
+ CPU_FREE(cpuset);
}
int bench_futex_lock_pi(int argc, const char **argv)
diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
index 2f59d5d1c509..b6faabfafb8e 100644
--- a/tools/perf/bench/futex-requeue.c
+++ b/tools/perf/bench/futex-requeue.c
@@ -123,22 +123,33 @@ static void *workerfn(void *arg __maybe_unused)
static void block_threads(pthread_t *w,
pthread_attr_t thread_attr, struct perf_cpu_map *cpu)
{
- cpu_set_t cpuset;
+ cpu_set_t *cpuset;
unsigned int i;
+ int nrcpus = perf_cpu_map__nr(cpu);
+ size_t size;
threads_starting = params.nthreads;
+ cpuset = CPU_ALLOC(nrcpus);
+ BUG_ON(!cpuset);
+ size = CPU_ALLOC_SIZE(nrcpus);
+
/* create and block all threads */
for (i = 0; i < params.nthreads; i++) {
- CPU_ZERO(&cpuset);
- CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
+ CPU_ZERO_S(size, cpuset);
+ CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset);
- if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
+ if (pthread_attr_setaffinity_np(&thread_attr, size, cpuset)) {
+ CPU_FREE(cpuset);
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+ }
- if (pthread_create(&w[i], &thread_attr, workerfn, NULL))
+ if (pthread_create(&w[i], &thread_attr, workerfn, NULL)) {
+ CPU_FREE(cpuset);
err(EXIT_FAILURE, "pthread_create");
+ }
}
+ CPU_FREE(cpuset);
}
static void toggle_done(int sig __maybe_unused,
diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
index 861deb934745..e47f46a3a47e 100644
--- a/tools/perf/bench/futex-wake-parallel.c
+++ b/tools/perf/bench/futex-wake-parallel.c
@@ -144,22 +144,33 @@ static void *blocked_workerfn(void *arg __maybe_unused)
static void block_threads(pthread_t *w, pthread_attr_t thread_attr,
struct perf_cpu_map *cpu)
{
- cpu_set_t cpuset;
+ cpu_set_t *cpuset;
unsigned int i;
+ int nrcpus = perf_cpu_map__nr(cpu);
+ size_t size;
threads_starting = params.nthreads;
+ cpuset = CPU_ALLOC(nrcpus);
+ BUG_ON(!cpuset);
+ size = CPU_ALLOC_SIZE(nrcpus);
+
/* create and block all threads */
for (i = 0; i < params.nthreads; i++) {
- CPU_ZERO(&cpuset);
- CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
+ CPU_ZERO_S(size, cpuset);
+ CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset);
- if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
+ if (pthread_attr_setaffinity_np(&thread_attr, size, cpuset)) {
+ CPU_FREE(cpuset);
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+ }
- if (pthread_create(&w[i], &thread_attr, blocked_workerfn, NULL))
+ if (pthread_create(&w[i], &thread_attr, blocked_workerfn, NULL)) {
+ CPU_FREE(cpuset);
err(EXIT_FAILURE, "pthread_create");
+ }
}
+ CPU_FREE(cpuset);
}
static void print_run(struct thread_data *waking_worker, unsigned int run_num)
diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
index cfda48bef1d7..201a3555f09a 100644
--- a/tools/perf/bench/futex-wake.c
+++ b/tools/perf/bench/futex-wake.c
@@ -97,22 +97,32 @@ static void print_summary(void)
static void block_threads(pthread_t *w,
pthread_attr_t thread_attr, struct perf_cpu_map *cpu)
{
- cpu_set_t cpuset;
+ cpu_set_t *cpuset;
unsigned int i;
-
+ size_t size;
+ int nrcpus = perf_cpu_map__nr(cpu);
threads_starting = params.nthreads;
+ cpuset = CPU_ALLOC(nrcpus);
+ BUG_ON(!cpuset);
+ size = CPU_ALLOC_SIZE(nrcpus);
+
/* create and block all threads */
for (i = 0; i < params.nthreads; i++) {
- CPU_ZERO(&cpuset);
- CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
+ CPU_ZERO_S(size, cpuset);
+ CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset);
- if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
+ if (pthread_attr_setaffinity_np(&thread_attr, size, cpuset)) {
+ CPU_FREE(cpuset);
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+ }
- if (pthread_create(&w[i], &thread_attr, workerfn, NULL))
+ if (pthread_create(&w[i], &thread_attr, workerfn, NULL)) {
+ CPU_FREE(cpuset);
err(EXIT_FAILURE, "pthread_create");
+ }
}
+ CPU_FREE(cpuset);
}
static void toggle_done(int sig __maybe_unused,
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index f2640179ada9..20eed1e53f80 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -34,6 +34,7 @@
#include <linux/numa.h>
#include <linux/zalloc.h>
+#include "../util/header.h"
#include <numa.h>
#include <numaif.h>
@@ -54,7 +55,7 @@
struct thread_data {
int curr_cpu;
- cpu_set_t bind_cpumask;
+ cpu_set_t *bind_cpumask;
int bind_node;
u8 *process_data;
int process_nr;
@@ -266,71 +267,117 @@ static bool node_has_cpus(int node)
return ret;
}
-static cpu_set_t bind_to_cpu(int target_cpu)
+static cpu_set_t *bind_to_cpu(int target_cpu)
{
- cpu_set_t orig_mask, mask;
- int ret;
+ int nrcpus = numa_num_possible_cpus();
+ cpu_set_t *orig_mask, *mask;
+ size_t size;
- ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
- BUG_ON(ret);
+ orig_mask = CPU_ALLOC(nrcpus);
+ BUG_ON(!orig_mask);
+ size = CPU_ALLOC_SIZE(nrcpus);
+ CPU_ZERO_S(size, orig_mask);
+
+ if (sched_getaffinity(0, size, orig_mask))
+ goto err_out;
- CPU_ZERO(&mask);
+ mask = CPU_ALLOC(nrcpus);
+ if (!mask)
+ goto err_out;
+
+ CPU_ZERO_S(size, mask);
if (target_cpu == -1) {
int cpu;
for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
- CPU_SET(cpu, &mask);
+ CPU_SET_S(cpu, size, mask);
} else {
- BUG_ON(target_cpu < 0 || target_cpu >= g->p.nr_cpus);
- CPU_SET(target_cpu, &mask);
+ if (target_cpu < 0 || target_cpu >= g->p.nr_cpus)
+ goto err;
+
+ CPU_SET_S(target_cpu, size, mask);
}
- ret = sched_setaffinity(0, sizeof(mask), &mask);
- BUG_ON(ret);
+ if (sched_setaffinity(0, size, mask))
+ goto err;
return orig_mask;
+
+err:
+ CPU_FREE(mask);
+err_out:
+ CPU_FREE(orig_mask);
+
+ /* BUG_ON due to failure in allocation of orig_mask/mask */
+ BUG_ON(-1);
+ return NULL;
}
-static cpu_set_t bind_to_node(int target_node)
+static cpu_set_t *bind_to_node(int target_node)
{
- cpu_set_t orig_mask, mask;
+ int nrcpus = numa_num_possible_cpus();
+ size_t size;
+ cpu_set_t *orig_mask, *mask;
int cpu;
- int ret;
- ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
- BUG_ON(ret);
+ orig_mask = CPU_ALLOC(nrcpus);
+ BUG_ON(!orig_mask);
+ size = CPU_ALLOC_SIZE(nrcpus);
+ CPU_ZERO_S(size, orig_mask);
- CPU_ZERO(&mask);
+ if (sched_getaffinity(0, size, orig_mask))
+ goto err_out;
+
+ mask = CPU_ALLOC(nrcpus);
+ if (!mask)
+ goto err_out;
+
+ CPU_ZERO_S(size, mask);
if (target_node == NUMA_NO_NODE) {
for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
- CPU_SET(cpu, &mask);
+ CPU_SET_S(cpu, size, mask);
} else {
struct bitmask *cpumask = numa_allocate_cpumask();
- BUG_ON(!cpumask);
+ if (!cpumask)
+ goto err;
+
if (!numa_node_to_cpus(target_node, cpumask)) {
for (cpu = 0; cpu < (int)cpumask->size; cpu++) {
if (numa_bitmask_isbitset(cpumask, cpu))
- CPU_SET(cpu, &mask);
+ CPU_SET_S(cpu, size, mask);
}
}
numa_free_cpumask(cpumask);
}
- ret = sched_setaffinity(0, sizeof(mask), &mask);
- BUG_ON(ret);
+ if (sched_setaffinity(0, size, mask))
+ goto err;
return orig_mask;
+
+err:
+ CPU_FREE(mask);
+err_out:
+ CPU_FREE(orig_mask);
+
+ /* BUG_ON due to failure in allocation of orig_mask/mask */
+ BUG_ON(-1);
+ return NULL;
}
-static void bind_to_cpumask(cpu_set_t mask)
+static void bind_to_cpumask(cpu_set_t *mask)
{
int ret;
+ size_t size = CPU_ALLOC_SIZE(numa_num_possible_cpus());
- ret = sched_setaffinity(0, sizeof(mask), &mask);
- BUG_ON(ret);
+ ret = sched_setaffinity(0, size, mask);
+ if (ret) {
+ CPU_FREE(mask);
+ BUG_ON(ret);
+ }
}
static void mempol_restore(void)
@@ -376,7 +423,7 @@ do { \
static u8 *alloc_data(ssize_t bytes0, int map_flags,
int init_zero, int init_cpu0, int thp, int init_random)
{
- cpu_set_t orig_mask;
+ cpu_set_t *orig_mask = NULL;
ssize_t bytes;
u8 *buf;
int ret;
@@ -434,6 +481,7 @@ static u8 *alloc_data(ssize_t bytes0, int map_flags,
/* Restore affinity: */
if (init_cpu0) {
bind_to_cpumask(orig_mask);
+ CPU_FREE(orig_mask);
mempol_restore();
}
@@ -585,10 +633,16 @@ static int parse_setup_cpu_list(void)
return -1;
}
+ if (is_cpu_online(bind_cpu_0) != 1 || is_cpu_online(bind_cpu_1) != 1) {
+ printf("\nTest not applicable, bind_cpu_0 or bind_cpu_1 is offline\n");
+ return -1;
+ }
+
BUG_ON(bind_cpu_0 < 0 || bind_cpu_1 < 0);
BUG_ON(bind_cpu_0 > bind_cpu_1);
for (bind_cpu = bind_cpu_0; bind_cpu <= bind_cpu_1; bind_cpu += step) {
+ size_t size = CPU_ALLOC_SIZE(g->p.nr_cpus);
int i;
for (i = 0; i < mul; i++) {
@@ -608,10 +662,15 @@ static int parse_setup_cpu_list(void)
tprintf("%2d", bind_cpu);
}
- CPU_ZERO(&td->bind_cpumask);
+ td->bind_cpumask = CPU_ALLOC(g->p.nr_cpus);
+ BUG_ON(!td->bind_cpumask);
+ CPU_ZERO_S(size, td->bind_cpumask);
for (cpu = bind_cpu; cpu < bind_cpu+bind_len; cpu++) {
- BUG_ON(cpu < 0 || cpu >= g->p.nr_cpus);
- CPU_SET(cpu, &td->bind_cpumask);
+ if (cpu < 0 || cpu >= g->p.nr_cpus) {
+ CPU_FREE(td->bind_cpumask);
+ BUG_ON(-1);
+ }
+ CPU_SET_S(cpu, size, td->bind_cpumask);
}
t++;
}
@@ -752,8 +811,6 @@ static int parse_nodes_opt(const struct option *opt __maybe_unused,
return parse_node_list(arg);
}
-#define BIT(x) (1ul << x)
-
static inline uint32_t lfsr_32(uint32_t lfsr)
{
const uint32_t taps = BIT(1) | BIT(5) | BIT(6) | BIT(31);
@@ -1241,7 +1298,7 @@ static void *worker_thread(void *__tdata)
* by migrating to CPU#0:
*/
if (first_task && g->p.perturb_secs && (int)(stop.tv_sec - last_perturbance) >= g->p.perturb_secs) {
- cpu_set_t orig_mask;
+ cpu_set_t *orig_mask;
int target_cpu;
int this_cpu;
@@ -1265,6 +1322,7 @@ static void *worker_thread(void *__tdata)
printf(" (injecting perturbalance, moved to CPU#%d)\n", target_cpu);
bind_to_cpumask(orig_mask);
+ CPU_FREE(orig_mask);
}
if (details >= 3) {
@@ -1398,21 +1456,31 @@ static void init_thread_data(void)
for (t = 0; t < g->p.nr_tasks; t++) {
struct thread_data *td = g->threads + t;
+ size_t cpuset_size = CPU_ALLOC_SIZE(g->p.nr_cpus);
int cpu;
/* Allow all nodes by default: */
td->bind_node = NUMA_NO_NODE;
/* Allow all CPUs by default: */
- CPU_ZERO(&td->bind_cpumask);
+ td->bind_cpumask = CPU_ALLOC(g->p.nr_cpus);
+ BUG_ON(!td->bind_cpumask);
+ CPU_ZERO_S(cpuset_size, td->bind_cpumask);
for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
- CPU_SET(cpu, &td->bind_cpumask);
+ CPU_SET_S(cpu, cpuset_size, td->bind_cpumask);
}
}
static void deinit_thread_data(void)
{
ssize_t size = sizeof(*g->threads)*g->p.nr_tasks;
+ int t;
+
+ /* Free the bind_cpumask allocated for thread_data */
+ for (t = 0; t < g->p.nr_tasks; t++) {
+ struct thread_data *td = g->threads + t;
+ CPU_FREE(td->bind_cpumask);
+ }
free_data(g->threads, size);
}
@@ -1672,7 +1740,7 @@ static int __bench_numa(const char *name)
"GB/sec,", "total-speed", "GB/sec total speed");
if (g->p.show_details >= 2) {
- char tname[14 + 2 * 10 + 1];
+ char tname[14 + 2 * 11 + 1];
struct thread_data *td;
for (p = 0; p < g->p.nr_proc; p++) {
for (t = 0; t < g->p.nr_threads; t++) {
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index ba74fab02e62..069825c48d40 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -989,8 +989,11 @@ static int record__thread_data_init_maps(struct record_thread *thread_data, stru
struct mmap *overwrite_mmap = evlist->overwrite_mmap;
struct perf_cpu_map *cpus = evlist->core.user_requested_cpus;
- thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits,
- thread_data->mask->maps.nbits);
+ if (cpu_map__is_dummy(cpus))
+ thread_data->nr_mmaps = nr_mmaps;
+ else
+ thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits,
+ thread_data->mask->maps.nbits);
if (mmap) {
thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
if (!thread_data->maps)
@@ -1007,16 +1010,17 @@ static int record__thread_data_init_maps(struct record_thread *thread_data, stru
thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps);
for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) {
- if (test_bit(cpus->map[m].cpu, thread_data->mask->maps.bits)) {
+ if (cpu_map__is_dummy(cpus) ||
+ test_bit(cpus->map[m].cpu, thread_data->mask->maps.bits)) {
if (thread_data->maps) {
thread_data->maps[tm] = &mmap[m];
pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n",
- thread_data, cpus->map[m].cpu, tm, m);
+ thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
}
if (thread_data->overwrite_maps) {
thread_data->overwrite_maps[tm] = &overwrite_mmap[m];
pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n",
- thread_data, cpus->map[m].cpu, tm, m);
+ thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
}
tm++;
}
@@ -3329,6 +3333,9 @@ static void record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_c
{
int c;
+ if (cpu_map__is_dummy(cpus))
+ return;
+
for (c = 0; c < cpus->nr; c++)
set_bit(cpus->map[c].cpu, mask->bits);
}
@@ -3680,6 +3687,11 @@ static int record__init_thread_masks(struct record *rec)
if (!record__threads_enabled(rec))
return record__init_thread_default_masks(rec, cpus);
+ if (cpu_map__is_dummy(cpus)) {
+ pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n");
+ return -EINVAL;
+ }
+
switch (rec->opts.threads_spec) {
case THREAD_SPEC__CPU:
ret = record__init_thread_cpu_masks(rec, cpus);
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 1ad75c7ba074..afe4a5539ecc 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -353,6 +353,7 @@ static int report__setup_sample_type(struct report *rep)
struct perf_session *session = rep->session;
u64 sample_type = evlist__combined_sample_type(session->evlist);
bool is_pipe = perf_data__is_pipe(session->data);
+ struct evsel *evsel;
if (session->itrace_synth_opts->callchain ||
session->itrace_synth_opts->add_callchain ||
@@ -407,6 +408,19 @@ static int report__setup_sample_type(struct report *rep)
}
if (sort__mode == SORT_MODE__MEMORY) {
+ /*
+ * FIXUP: prior to kernel 5.18, Arm SPE missed to set
+ * PERF_SAMPLE_DATA_SRC bit in sample type. For backward
+ * compatibility, set the bit if it's an old perf data file.
+ */
+ evlist__for_each_entry(session->evlist, evsel) {
+ if (strstr(evsel->name, "arm_spe") &&
+ !(sample_type & PERF_SAMPLE_DATA_SRC)) {
+ evsel->core.attr.sample_type |= PERF_SAMPLE_DATA_SRC;
+ sample_type |= PERF_SAMPLE_DATA_SRC;
+ }
+ }
+
if (!is_pipe && !(sample_type & PERF_SAMPLE_DATA_SRC)) {
ui__error("Selected --mem-mode but no mem data. "
"Did you call perf record without -d?\n");
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index a2f117936188..cf5eab5431b4 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -461,7 +461,7 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session)
return -EINVAL;
if (PRINT_FIELD(DATA_SRC) &&
- evsel__check_stype(evsel, PERF_SAMPLE_DATA_SRC, "DATA_SRC", PERF_OUTPUT_DATA_SRC))
+ evsel__do_check_stype(evsel, PERF_SAMPLE_DATA_SRC, "DATA_SRC", PERF_OUTPUT_DATA_SRC, allow_user_set))
return -EINVAL;
if (PRINT_FIELD(WEIGHT) &&
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 2f6b67189b42..0170cb0819d6 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -55,6 +55,7 @@ struct cmd_struct {
};
static struct cmd_struct commands[] = {
+ { "archive", NULL, 0 },
{ "buildid-cache", cmd_buildid_cache, 0 },
{ "buildid-list", cmd_buildid_list, 0 },
{ "config", cmd_config, 0 },
@@ -62,6 +63,7 @@ static struct cmd_struct commands[] = {
{ "diff", cmd_diff, 0 },
{ "evlist", cmd_evlist, 0 },
{ "help", cmd_help, 0 },
+ { "iostat", NULL, 0 },
{ "kallsyms", cmd_kallsyms, 0 },
{ "list", cmd_list, 0 },
{ "record", cmd_record, 0 },
@@ -360,6 +362,8 @@ static void handle_internal_command(int argc, const char **argv)
for (i = 0; i < ARRAY_SIZE(commands); i++) {
struct cmd_struct *p = commands+i;
+ if (p->fn == NULL)
+ continue;
if (strcmp(p->cmd, cmd))
continue;
exit(run_builtin(p, argc, argv));
@@ -434,7 +438,7 @@ void pthread__unblock_sigwinch(void)
static int libperf_print(enum libperf_print_level level,
const char *fmt, va_list ap)
{
- return eprintf(level, verbose, fmt, ap);
+ return veprintf(level, verbose, fmt, ap);
}
int main(int argc, const char **argv)
diff --git a/tools/perf/tests/attr/README b/tools/perf/tests/attr/README
index 454505d343fa..eb3f7d4bb324 100644
--- a/tools/perf/tests/attr/README
+++ b/tools/perf/tests/attr/README
@@ -60,6 +60,7 @@ Following tests are defined (with perf commands):
perf record -R kill (test-record-raw)
perf record -c 2 -e arm_spe_0// -- kill (test-record-spe-period)
perf record -e arm_spe_0/period=3/ -- kill (test-record-spe-period-term)
+ perf record -e arm_spe_0/pa_enable=1/ -- kill (test-record-spe-physical-address)
perf stat -e cycles kill (test-stat-basic)
perf stat kill (test-stat-default)
perf stat -d kill (test-stat-detailed-1)
diff --git a/tools/perf/tests/attr/test-record-spe-physical-address b/tools/perf/tests/attr/test-record-spe-physical-address
new file mode 100644
index 000000000000..7ebcf5012ce3
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-spe-physical-address
@@ -0,0 +1,12 @@
+[config]
+command = record
+args = --no-bpf-event -e arm_spe_0/pa_enable=1/ -- kill >/dev/null 2>&1
+ret = 1
+arch = aarch64
+
+[event-10:base-record-spe]
+# 622727 is the decimal of IP|TID|TIME|CPU|IDENTIFIER|DATA_SRC|PHYS_ADDR
+sample_type=622727
+
+# dummy event
+[event-1:base-record-spe] \ No newline at end of file
diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c
index 57b9591f7cbb..17c023823713 100644
--- a/tools/perf/tests/bpf.c
+++ b/tools/perf/tests/bpf.c
@@ -222,11 +222,11 @@ static int __test__bpf(int idx)
ret = test_llvm__fetch_bpf_obj(&obj_buf, &obj_buf_sz,
bpf_testcase_table[idx].prog_id,
- true, NULL);
+ false, NULL);
if (ret != TEST_OK || !obj_buf || !obj_buf_sz) {
pr_debug("Unable to get BPF object, %s\n",
bpf_testcase_table[idx].msg_compile_fail);
- if (idx == 0)
+ if ((idx == 0) || (ret == TEST_SKIP))
return TEST_SKIP;
else
return TEST_FAIL;
@@ -364,9 +364,11 @@ static int test__bpf_prologue_test(struct test_suite *test __maybe_unused,
static struct test_case bpf_tests[] = {
#ifdef HAVE_LIBBPF_SUPPORT
TEST_CASE("Basic BPF filtering", basic_bpf_test),
- TEST_CASE("BPF pinning", bpf_pinning),
+ TEST_CASE_REASON("BPF pinning", bpf_pinning,
+ "clang isn't installed or environment missing BPF support"),
#ifdef HAVE_BPF_PROLOGUE
- TEST_CASE("BPF prologue generation", bpf_prologue_test),
+ TEST_CASE_REASON("BPF prologue generation", bpf_prologue_test,
+ "clang isn't installed or environment missing BPF support"),
#else
TEST_CASE_REASON("BPF prologue generation", bpf_prologue_test, "not compiled in"),
#endif
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index fac3717d9ba1..d336cda94a11 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -279,6 +279,7 @@ static const char *shell_test__description(char *description, size_t size,
{
FILE *fp;
char filename[PATH_MAX];
+ int ch;
path__join(filename, sizeof(filename), path, name);
fp = fopen(filename, "r");
@@ -286,7 +287,9 @@ static const char *shell_test__description(char *description, size_t size,
return NULL;
/* Skip shebang */
- while (fgetc(fp) != '\n');
+ do {
+ ch = fgetc(fp);
+ } while (ch != EOF && ch != '\n');
description = fgets(description, size, fp);
fclose(fp);
@@ -417,7 +420,8 @@ static int run_shell_tests(int argc, const char *argv[], int i, int width,
.priv = &st,
};
- if (!perf_test__matches(test_suite.desc, curr, argc, argv))
+ if (test_suite.desc == NULL ||
+ !perf_test__matches(test_suite.desc, curr, argc, argv))
continue;
st.file = ent->d_name;
diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c
index 2dab2d262060..afdca7f2959f 100644
--- a/tools/perf/tests/dwarf-unwind.c
+++ b/tools/perf/tests/dwarf-unwind.c
@@ -122,7 +122,7 @@ NO_TAIL_CALL_ATTRIBUTE noinline int test_dwarf_unwind__thread(struct thread *thr
}
err = unwind__get_entries(unwind_entry, &cnt, thread,
- &sample, MAX_STACK);
+ &sample, MAX_STACK, false);
if (err)
pr_debug("unwind failed\n");
else if (cnt != MAX_STACK) {
diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c
index d12d0ad81801..4ad0dfbc8b21 100644
--- a/tools/perf/tests/perf-time-to-tsc.c
+++ b/tools/perf/tests/perf-time-to-tsc.c
@@ -47,6 +47,17 @@
} \
}
+static int test__tsc_is_supported(struct test_suite *test __maybe_unused,
+ int subtest __maybe_unused)
+{
+ if (!TSC_IS_SUPPORTED) {
+ pr_debug("Test not supported on this architecture\n");
+ return TEST_SKIP;
+ }
+
+ return TEST_OK;
+}
+
/**
* test__perf_time_to_tsc - test converting perf time to TSC.
*
@@ -70,7 +81,7 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su
struct perf_cpu_map *cpus = NULL;
struct evlist *evlist = NULL;
struct evsel *evsel = NULL;
- int err = -1, ret, i;
+ int err = TEST_FAIL, ret, i;
const char *comm1, *comm2;
struct perf_tsc_conversion tc;
struct perf_event_mmap_page *pc;
@@ -79,10 +90,6 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su
u64 test_time, comm1_time = 0, comm2_time = 0;
struct mmap *md;
- if (!TSC_IS_SUPPORTED) {
- pr_debug("Test not supported on this architecture");
- return TEST_SKIP;
- }
threads = thread_map__new(-1, getpid(), UINT_MAX);
CHECK_NOT_NULL__(threads);
@@ -116,6 +123,10 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su
evsel->core.attr.enable_on_exec = 0;
}
+ if (evlist__open(evlist) == -ENOENT) {
+ err = TEST_SKIP;
+ goto out_err;
+ }
CHECK__(evlist__open(evlist));
CHECK__(evlist__mmap(evlist, UINT_MAX));
@@ -124,8 +135,8 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su
ret = perf_read_tsc_conversion(pc, &tc);
if (ret) {
if (ret == -EOPNOTSUPP) {
- fprintf(stderr, " (not supported)");
- return 0;
+ pr_debug("perf_read_tsc_conversion is not supported in current kernel\n");
+ err = TEST_SKIP;
}
goto out_err;
}
@@ -191,7 +202,7 @@ next_event:
test_tsc >= comm2_tsc)
goto out_err;
- err = 0;
+ err = TEST_OK;
out_err:
evlist__delete(evlist);
@@ -200,4 +211,15 @@ out_err:
return err;
}
-DEFINE_SUITE("Convert perf time to TSC", perf_time_to_tsc);
+static struct test_case time_to_tsc_tests[] = {
+ TEST_CASE_REASON("TSC support", tsc_is_supported,
+ "This architecture does not support"),
+ TEST_CASE_REASON("Perf time to TSC", perf_time_to_tsc,
+ "perf_read_tsc_conversion is not supported"),
+ { .name = NULL, }
+};
+
+struct test_suite suite__perf_time_to_tsc = {
+ .desc = "Convert perf time to TSC",
+ .test_cases = time_to_tsc_tests,
+};
diff --git a/tools/perf/tests/shell/stat_all_pmu.sh b/tools/perf/tests/shell/stat_all_pmu.sh
index b30dba455f36..9c9ef33e0b3c 100755
--- a/tools/perf/tests/shell/stat_all_pmu.sh
+++ b/tools/perf/tests/shell/stat_all_pmu.sh
@@ -5,6 +5,16 @@
set -e
for p in $(perf list --raw-dump pmu); do
+ # In powerpc, skip the events for hv_24x7 and hv_gpci.
+ # These events needs input values to be filled in for
+ # core, chip, partition id based on system.
+ # Example: hv_24x7/CPM_ADJUNCT_INST,domain=?,core=?/
+ # hv_gpci/event,partition_id=?/
+ # Hence skip these events for ppc.
+ if echo "$p" |grep -Eq 'hv_24x7|hv_gpci' ; then
+ echo "Skipping: Event '$p' in powerpc"
+ continue
+ fi
echo "Testing $p"
result=$(perf stat -e "$p" true 2>&1)
if ! echo "$result" | grep -q "$p" && ! echo "$result" | grep -q "<not supported>" ; then
diff --git a/tools/perf/tests/shell/test_arm_coresight.sh b/tools/perf/tests/shell/test_arm_coresight.sh
index 6de53b7ef5ff..e4cb4f1806ff 100755
--- a/tools/perf/tests/shell/test_arm_coresight.sh
+++ b/tools/perf/tests/shell/test_arm_coresight.sh
@@ -29,7 +29,6 @@ cleanup_files()
rm -f ${file}
rm -f "${perfdata}.old"
trap - exit term int
- kill -2 $$
exit $glb_err
}
diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index ee1e3dcbc0bd..d23a9e322ff5 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -109,6 +109,17 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
&& strncmp(session->header.env.arch, "aarch64", 7))
return TEST_SKIP;
+ /*
+ * In powerpc pSeries platform, not all the topology information
+ * are exposed via sysfs. Due to restriction, detail like
+ * physical_package_id will be set to -1. Hence skip this
+ * test if physical_package_id returns -1 for cpu from perf_cpu_map.
+ */
+ if (strncmp(session->header.env.arch, "powerpc", 7)) {
+ if (cpu__get_socket_id(perf_cpu_map__cpu(map, 0)) == -1)
+ return TEST_SKIP;
+ }
+
TEST_ASSERT_VAL("Session header CPU map not set", session->header.env.cpu);
for (i = 0; i < session->header.env.nr_cpus_avail; i++) {
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index e4c641b240df..82cc396ef516 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -2047,6 +2047,7 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
objdump_process.argv = objdump_argv;
objdump_process.out = -1;
objdump_process.err = -1;
+ objdump_process.no_stderr = 1;
if (start_command(&objdump_process)) {
pr_err("Failure starting to run %s\n", command);
err = -1;
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index d2b64e3f588b..1a80151baed9 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -1033,10 +1033,11 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
memset(&attr, 0, sizeof(struct perf_event_attr));
attr.size = sizeof(struct perf_event_attr);
attr.type = PERF_TYPE_HARDWARE;
- attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
+ attr.sample_type = evsel->core.attr.sample_type &
+ (PERF_SAMPLE_MASK | PERF_SAMPLE_PHYS_ADDR);
attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC |
- PERF_SAMPLE_WEIGHT;
+ PERF_SAMPLE_WEIGHT | PERF_SAMPLE_ADDR;
if (spe->timeless_decoding)
attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
else
diff --git a/tools/perf/util/arm64-frame-pointer-unwind-support.c b/tools/perf/util/arm64-frame-pointer-unwind-support.c
index 2242a885fbd7..4940be4a0569 100644
--- a/tools/perf/util/arm64-frame-pointer-unwind-support.c
+++ b/tools/perf/util/arm64-frame-pointer-unwind-support.c
@@ -53,7 +53,7 @@ u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thr
sample->user_regs.cache_regs[PERF_REG_ARM64_SP] = 0;
}
- ret = unwind__get_entries(add_entry, &entries, thread, sample, 2);
+ ret = unwind__get_entries(add_entry, &entries, thread, sample, 2, true);
sample->user_regs = old_regs;
if (ret || entries.length != 2)
diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c
index 94624733af7e..8271ab764eb5 100644
--- a/tools/perf/util/bpf-event.c
+++ b/tools/perf/util/bpf-event.c
@@ -22,7 +22,8 @@
#include "record.h"
#include "util/synthetic-events.h"
-struct btf * __weak btf__load_from_kernel_by_id(__u32 id)
+#ifndef HAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID
+struct btf *btf__load_from_kernel_by_id(__u32 id)
{
struct btf *btf;
#pragma GCC diagnostic push
@@ -32,6 +33,7 @@ struct btf * __weak btf__load_from_kernel_by_id(__u32 id)
return err ? ERR_PTR(err) : btf;
}
+#endif
int __weak bpf_prog_load(enum bpf_prog_type prog_type,
const char *prog_name __maybe_unused,
diff --git a/tools/perf/util/c++/clang.cpp b/tools/perf/util/c++/clang.cpp
index df7b18fb6b6e..1aad7d6d34aa 100644
--- a/tools/perf/util/c++/clang.cpp
+++ b/tools/perf/util/c++/clang.cpp
@@ -20,7 +20,11 @@
#include "llvm/Option/Option.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/ManagedStatic.h"
+#if CLANG_VERSION_MAJOR >= 14
+#include "llvm/MC/TargetRegistry.h"
+#else
#include "llvm/Support/TargetRegistry.h"
+#endif
#include "llvm/Support/TargetSelect.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index d546ff724dbe..a27132e5a5ef 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -983,6 +983,57 @@ static int write_dir_format(struct feat_fd *ff,
return do_write(ff, &data->dir.version, sizeof(data->dir.version));
}
+/*
+ * Check whether a CPU is online
+ *
+ * Returns:
+ * 1 -> if CPU is online
+ * 0 -> if CPU is offline
+ * -1 -> error case
+ */
+int is_cpu_online(unsigned int cpu)
+{
+ char *str;
+ size_t strlen;
+ char buf[256];
+ int status = -1;
+ struct stat statbuf;
+
+ snprintf(buf, sizeof(buf),
+ "/sys/devices/system/cpu/cpu%d", cpu);
+ if (stat(buf, &statbuf) != 0)
+ return 0;
+
+ /*
+ * Check if /sys/devices/system/cpu/cpux/online file
+ * exists. Some cases cpu0 won't have online file since
+ * it is not expected to be turned off generally.
+ * In kernels without CONFIG_HOTPLUG_CPU, this
+ * file won't exist
+ */
+ snprintf(buf, sizeof(buf),
+ "/sys/devices/system/cpu/cpu%d/online", cpu);
+ if (stat(buf, &statbuf) != 0)
+ return 1;
+
+ /*
+ * Read online file using sysfs__read_str.
+ * If read or open fails, return -1.
+ * If read succeeds, return value from file
+ * which gets stored in "str"
+ */
+ snprintf(buf, sizeof(buf),
+ "devices/system/cpu/cpu%d/online", cpu);
+
+ if (sysfs__read_str(buf, &str, &strlen) < 0)
+ return status;
+
+ status = atoi(str);
+
+ free(str);
+ return status;
+}
+
#ifdef HAVE_LIBBPF_SUPPORT
static int write_bpf_prog_info(struct feat_fd *ff,
struct evlist *evlist __maybe_unused)
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index c9e3265832d9..0eb4bc29a5a4 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -158,6 +158,7 @@ int do_write(struct feat_fd *fd, const void *buf, size_t size);
int write_padded(struct feat_fd *fd, const void *bf,
size_t count, size_t count_aligned);
+int is_cpu_online(unsigned int cpu);
/*
* arch specific callback
*/
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index b80048546451..95391236f5f6 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -2987,7 +2987,7 @@ static int thread__resolve_callchain_unwind(struct thread *thread,
return 0;
return unwind__get_entries(unwind_entry, cursor,
- thread, sample, max_stack);
+ thread, sample, max_stack, false);
}
int thread__resolve_callchain(struct thread *thread,
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 24997925ae00..dd84fed698a3 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1523,7 +1523,9 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
bool use_uncore_alias;
LIST_HEAD(config_terms);
- if (verbose > 1) {
+ pmu = parse_state->fake_pmu ?: perf_pmu__find(name);
+
+ if (verbose > 1 && !(pmu && pmu->selectable)) {
fprintf(stderr, "Attempting to add event pmu '%s' with '",
name);
if (head_config) {
@@ -1536,7 +1538,6 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
fprintf(stderr, "' that may result in non-fatal errors\n");
}
- pmu = parse_state->fake_pmu ?: perf_pmu__find(name);
if (!pmu) {
char *err_str;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 3b8dfe603e50..a7f93f5a1ac8 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1151,9 +1151,20 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack)
struct branch_entry *entries = perf_sample__branch_entries(sample);
uint64_t i;
- printf("%s: nr:%" PRIu64 "\n",
- !callstack ? "... branch stack" : "... branch callstack",
- sample->branch_stack->nr);
+ if (!callstack) {
+ printf("%s: nr:%" PRIu64 "\n", "... branch stack", sample->branch_stack->nr);
+ } else {
+ /* the reason of adding 1 to nr is because after expanding
+ * branch stack it generates nr + 1 callstack records. e.g.,
+ * B()->C()
+ * A()->B()
+ * the final callstack should be:
+ * C()
+ * B()
+ * A()
+ */
+ printf("%s: nr:%" PRIu64 "\n", "... branch callstack", sample->branch_stack->nr+1);
+ }
for (i = 0; i < sample->branch_stack->nr; i++) {
struct branch_entry *e = &entries[i];
@@ -1169,8 +1180,13 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack)
(unsigned)e->flags.reserved,
e->flags.type ? branch_type_name(e->flags.type) : "");
} else {
- printf("..... %2"PRIu64": %016" PRIx64 "\n",
- i, i > 0 ? e->from : e->to);
+ if (i == 0) {
+ printf("..... %2"PRIu64": %016" PRIx64 "\n"
+ "..... %2"PRIu64": %016" PRIx64 "\n",
+ i, e->to, i+1, e->from);
+ } else {
+ printf("..... %2"PRIu64": %016" PRIx64 "\n", i+1, e->from);
+ }
}
}
}
@@ -2095,6 +2111,7 @@ prefetch_event(char *buf, u64 head, size_t mmap_size,
bool needs_swap, union perf_event *error)
{
union perf_event *event;
+ u16 event_size;
/*
* Ensure we have enough space remaining to read
@@ -2107,15 +2124,23 @@ prefetch_event(char *buf, u64 head, size_t mmap_size,
if (needs_swap)
perf_event_header__bswap(&event->header);
- if (head + event->header.size <= mmap_size)
+ event_size = event->header.size;
+ if (head + event_size <= mmap_size)
return event;
/* We're not fetching the event so swap back again */
if (needs_swap)
perf_event_header__bswap(&event->header);
- pr_debug("%s: head=%#" PRIx64 " event->header_size=%#x, mmap_size=%#zx:"
- " fuzzed or compressed perf.data?\n",__func__, head, event->header.size, mmap_size);
+ /* Check if the event fits into the next mmapped buf. */
+ if (event_size <= mmap_size - head % page_size) {
+ /* Remap buf and fetch again. */
+ return NULL;
+ }
+
+ /* Invalid input. Event size should never exceed mmap_size. */
+ pr_debug("%s: head=%#" PRIx64 " event->header.size=%#x, mmap_size=%#zx:"
+ " fuzzed or compressed perf.data?\n", __func__, head, event_size, mmap_size);
return error;
}
@@ -2567,7 +2592,7 @@ int perf_session__process_events(struct perf_session *session)
if (perf_data__is_pipe(session->data))
return __perf_session__process_pipe_events(session);
- if (perf_data__is_dir(session->data))
+ if (perf_data__is_dir(session->data) && session->data->dir.nr)
return __perf_session__process_dir_events(session);
return __perf_session__process_events(session);
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
index 483f05004e68..c255a2c90cd6 100644
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -1,12 +1,14 @@
-from os import getenv
+from os import getenv, path
from subprocess import Popen, PIPE
from re import sub
cc = getenv("CC")
cc_is_clang = b"clang version" in Popen([cc.split()[0], "-v"], stderr=PIPE).stderr.readline()
+src_feature_tests = getenv('srctree') + '/tools/build/feature'
def clang_has_option(option):
- return [o for o in Popen([cc, option], stderr=PIPE).stderr.readlines() if b"unknown argument" in o] == [ ]
+ cc_output = Popen([cc, option, path.join(src_feature_tests, "test-hello.c") ], stderr=PIPE).stderr.readlines()
+ return [o for o in cc_output if ((b"unknown argument" in o) or (b"is not supported" in o))] == [ ]
if cc_is_clang:
from distutils.sysconfig import get_config_vars
@@ -23,6 +25,8 @@ if cc_is_clang:
vars[var] = sub("-fstack-protector-strong", "", vars[var])
if not clang_has_option("-fno-semantic-interposition"):
vars[var] = sub("-fno-semantic-interposition", "", vars[var])
+ if not clang_has_option("-ffat-lto-objects"):
+ vars[var] = sub("-ffat-lto-objects", "", vars[var])
from distutils.core import setup, Extension
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index ee6f03481215..c1af37e11f98 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <errno.h>
+#include <linux/err.h>
#include <inttypes.h>
#include <math.h>
#include <string.h>
@@ -311,7 +312,7 @@ static int check_per_pkg(struct evsel *counter, struct perf_counts_values *vals,
if (!mask) {
mask = hashmap__new(pkg_id_hash, pkg_id_equal, NULL);
- if (!mask)
+ if (IS_ERR(mask))
return -ENOMEM;
counter->per_pkg_mask = mask;
@@ -471,9 +472,10 @@ int perf_stat_process_counter(struct perf_stat_config *config,
int perf_event__process_stat_event(struct perf_session *session,
union perf_event *event)
{
- struct perf_counts_values count;
+ struct perf_counts_values count, *ptr;
struct perf_record_stat *st = &event->stat;
struct evsel *counter;
+ int cpu_map_idx;
count.val = st->val;
count.ena = st->ena;
@@ -484,8 +486,18 @@ int perf_event__process_stat_event(struct perf_session *session,
pr_err("Failed to resolve counter for stat event.\n");
return -EINVAL;
}
-
- *perf_counts(counter->counts, st->cpu, st->thread) = count;
+ cpu_map_idx = perf_cpu_map__idx(evsel__cpus(counter), (struct perf_cpu){.cpu = st->cpu});
+ if (cpu_map_idx == -1) {
+ pr_err("Invalid CPU %d for event %s.\n", st->cpu, evsel__name(counter));
+ return -EINVAL;
+ }
+ ptr = perf_counts(counter->counts, cpu_map_idx, st->thread);
+ if (ptr == NULL) {
+ pr_err("Failed to find perf count for CPU %d thread %d on event %s.\n",
+ st->cpu, st->thread, evsel__name(counter));
+ return -EINVAL;
+ }
+ *ptr = count;
counter->supported = true;
return 0;
}
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 31cd59a2b66e..ecd377938eea 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -1290,7 +1290,7 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
* For misannotated, zeroed, ASM function sizes.
*/
if (nr > 0) {
- symbols__fixup_end(&dso->symbols);
+ symbols__fixup_end(&dso->symbols, false);
symbols__fixup_duplicate(&dso->symbols);
if (kmap) {
/*
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index dea0fc495185..f72baf636724 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -101,11 +101,6 @@ static int prefix_underscores_count(const char *str)
return tail - str;
}
-void __weak arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
-{
- p->end = c->start;
-}
-
const char * __weak arch__normalize_symbol_name(const char *name)
{
return name;
@@ -217,7 +212,8 @@ again:
}
}
-void symbols__fixup_end(struct rb_root_cached *symbols)
+/* Update zero-sized symbols using the address of the next symbol */
+void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms)
{
struct rb_node *nd, *prevnd = rb_first_cached(symbols);
struct symbol *curr, *prev;
@@ -231,8 +227,29 @@ void symbols__fixup_end(struct rb_root_cached *symbols)
prev = curr;
curr = rb_entry(nd, struct symbol, rb_node);
- if (prev->end == prev->start || prev->end != curr->start)
- arch__symbols__fixup_end(prev, curr);
+ /*
+ * On some architecture kernel text segment start is located at
+ * some low memory address, while modules are located at high
+ * memory addresses (or vice versa). The gap between end of
+ * kernel text segment and beginning of first module's text
+ * segment is very big. Therefore do not fill this gap and do
+ * not assign it to the kernel dso map (kallsyms).
+ *
+ * In kallsyms, it determines module symbols using '[' character
+ * like in:
+ * ffffffffc1937000 T hdmi_driver_init [snd_hda_codec_hdmi]
+ */
+ if (prev->end == prev->start) {
+ /* Last kernel/module symbol mapped to end of page */
+ if (is_kallsyms && (!strchr(prev->name, '[') !=
+ !strchr(curr->name, '[')))
+ prev->end = roundup(prev->end + 4096, 4096);
+ else
+ prev->end = curr->start;
+
+ pr_debug4("%s sym:%s end:%#" PRIx64 "\n",
+ __func__, prev->name, prev->end);
+ }
}
/* Last entry */
@@ -1467,7 +1484,7 @@ int __dso__load_kallsyms(struct dso *dso, const char *filename,
if (kallsyms__delta(kmap, filename, &delta))
return -1;
- symbols__fixup_end(&dso->symbols);
+ symbols__fixup_end(&dso->symbols, true);
symbols__fixup_duplicate(&dso->symbols);
if (dso->kernel == DSO_SPACE__KERNEL_GUEST)
@@ -1659,7 +1676,7 @@ int dso__load_bfd_symbols(struct dso *dso, const char *debugfile)
#undef bfd_asymbol_section
#endif
- symbols__fixup_end(&dso->symbols);
+ symbols__fixup_end(&dso->symbols, false);
symbols__fixup_duplicate(&dso->symbols);
dso->adjust_symbols = 1;
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index fbf866d82dcc..0b893dcc8ea6 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -203,7 +203,7 @@ void __symbols__insert(struct rb_root_cached *symbols, struct symbol *sym,
bool kernel);
void symbols__insert(struct rb_root_cached *symbols, struct symbol *sym);
void symbols__fixup_duplicate(struct rb_root_cached *symbols);
-void symbols__fixup_end(struct rb_root_cached *symbols);
+void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms);
void maps__fixup_end(struct maps *maps);
typedef int (*mapfn_t)(u64 start, u64 len, u64 pgoff, void *data);
@@ -241,7 +241,6 @@ const char *arch__normalize_symbol_name(const char *name);
#define SYMBOL_A 0
#define SYMBOL_B 1
-void arch__symbols__fixup_end(struct symbol *p, struct symbol *c);
int arch__compare_symbol_names(const char *namea, const char *nameb);
int arch__compare_symbol_names_n(const char *namea, const char *nameb,
unsigned int n);
diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c
index a74b517f7497..94aa40f6e348 100644
--- a/tools/perf/util/unwind-libdw.c
+++ b/tools/perf/util/unwind-libdw.c
@@ -200,7 +200,8 @@ frame_callback(Dwfl_Frame *state, void *arg)
bool isactivation;
if (!dwfl_frame_pc(state, &pc, NULL)) {
- pr_err("%s", dwfl_errmsg(-1));
+ if (!ui->best_effort)
+ pr_err("%s", dwfl_errmsg(-1));
return DWARF_CB_ABORT;
}
@@ -208,7 +209,8 @@ frame_callback(Dwfl_Frame *state, void *arg)
report_module(pc, ui);
if (!dwfl_frame_pc(state, &pc, &isactivation)) {
- pr_err("%s", dwfl_errmsg(-1));
+ if (!ui->best_effort)
+ pr_err("%s", dwfl_errmsg(-1));
return DWARF_CB_ABORT;
}
@@ -222,7 +224,8 @@ frame_callback(Dwfl_Frame *state, void *arg)
int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
struct thread *thread,
struct perf_sample *data,
- int max_stack)
+ int max_stack,
+ bool best_effort)
{
struct unwind_info *ui, ui_buf = {
.sample = data,
@@ -231,6 +234,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
.cb = cb,
.arg = arg,
.max_stack = max_stack,
+ .best_effort = best_effort
};
Dwarf_Word ip;
int err = -EINVAL, i;
diff --git a/tools/perf/util/unwind-libdw.h b/tools/perf/util/unwind-libdw.h
index 0cbd2650e280..8c88bc4f2304 100644
--- a/tools/perf/util/unwind-libdw.h
+++ b/tools/perf/util/unwind-libdw.h
@@ -20,6 +20,7 @@ struct unwind_info {
void *arg;
int max_stack;
int idx;
+ bool best_effort;
struct unwind_entry entries[];
};
diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c
index 71a353349181..41e29fc7648a 100644
--- a/tools/perf/util/unwind-libunwind-local.c
+++ b/tools/perf/util/unwind-libunwind-local.c
@@ -96,6 +96,7 @@ struct unwind_info {
struct perf_sample *sample;
struct machine *machine;
struct thread *thread;
+ bool best_effort;
};
#define dw_read(ptr, type, end) ({ \
@@ -553,7 +554,8 @@ static int access_reg(unw_addr_space_t __maybe_unused as,
ret = perf_reg_value(&val, &ui->sample->user_regs, id);
if (ret) {
- pr_err("unwind: can't read reg %d\n", regnum);
+ if (!ui->best_effort)
+ pr_err("unwind: can't read reg %d\n", regnum);
return ret;
}
@@ -666,7 +668,7 @@ static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb,
return -1;
ret = unw_init_remote(&c, addr_space, ui);
- if (ret)
+ if (ret && !ui->best_effort)
display_error(ret);
while (!ret && (unw_step(&c) > 0) && i < max_stack) {
@@ -704,12 +706,14 @@ static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb,
static int _unwind__get_entries(unwind_entry_cb_t cb, void *arg,
struct thread *thread,
- struct perf_sample *data, int max_stack)
+ struct perf_sample *data, int max_stack,
+ bool best_effort)
{
struct unwind_info ui = {
.sample = data,
.thread = thread,
.machine = thread->maps->machine,
+ .best_effort = best_effort
};
if (!data->user_regs.regs)
diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c
index e89a5479b361..509c287ee762 100644
--- a/tools/perf/util/unwind-libunwind.c
+++ b/tools/perf/util/unwind-libunwind.c
@@ -80,9 +80,11 @@ void unwind__finish_access(struct maps *maps)
int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
struct thread *thread,
- struct perf_sample *data, int max_stack)
+ struct perf_sample *data, int max_stack,
+ bool best_effort)
{
if (thread->maps->unwind_libunwind_ops)
- return thread->maps->unwind_libunwind_ops->get_entries(cb, arg, thread, data, max_stack);
+ return thread->maps->unwind_libunwind_ops->get_entries(cb, arg, thread, data,
+ max_stack, best_effort);
return 0;
}
diff --git a/tools/perf/util/unwind.h b/tools/perf/util/unwind.h
index ab8ad469c8de..b2a03fa5289b 100644
--- a/tools/perf/util/unwind.h
+++ b/tools/perf/util/unwind.h
@@ -23,13 +23,19 @@ struct unwind_libunwind_ops {
void (*finish_access)(struct maps *maps);
int (*get_entries)(unwind_entry_cb_t cb, void *arg,
struct thread *thread,
- struct perf_sample *data, int max_stack);
+ struct perf_sample *data, int max_stack, bool best_effort);
};
#ifdef HAVE_DWARF_UNWIND_SUPPORT
+/*
+ * When best_effort is set, don't report errors and fail silently. This could
+ * be expanded in the future to be more permissive about things other than
+ * error messages.
+ */
int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
struct thread *thread,
- struct perf_sample *data, int max_stack);
+ struct perf_sample *data, int max_stack,
+ bool best_effort);
/* libunwind specific */
#ifdef HAVE_LIBUNWIND_SUPPORT
#ifndef LIBUNWIND__ARCH_REG_ID
@@ -65,7 +71,8 @@ unwind__get_entries(unwind_entry_cb_t cb __maybe_unused,
void *arg __maybe_unused,
struct thread *thread __maybe_unused,
struct perf_sample *data __maybe_unused,
- int max_stack __maybe_unused)
+ int max_stack __maybe_unused,
+ bool best_effort __maybe_unused)
{
return 0;
}
diff --git a/tools/power/acpi/common/cmfsize.c b/tools/power/acpi/common/cmfsize.c
index 185b8c588e1d..38f9b9da8170 100644
--- a/tools/power/acpi/common/cmfsize.c
+++ b/tools/power/acpi/common/cmfsize.c
@@ -3,7 +3,7 @@
*
* Module Name: cmfsize - Common get file size function
*
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
*
*****************************************************************************/
diff --git a/tools/power/acpi/common/getopt.c b/tools/power/acpi/common/getopt.c
index 3c265bc917a1..96fd6cec78e2 100644
--- a/tools/power/acpi/common/getopt.c
+++ b/tools/power/acpi/common/getopt.c
@@ -3,7 +3,7 @@
*
* Module Name: getopt
*
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
*
*****************************************************************************/
diff --git a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
index ccabdbaae6a4..bd08f36df4a7 100644
--- a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
+++ b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
@@ -3,7 +3,7 @@
*
* Module Name: oslinuxtbl - Linux OSL for obtaining ACPI tables
*
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
*
*****************************************************************************/
diff --git a/tools/power/acpi/os_specific/service_layers/osunixdir.c b/tools/power/acpi/os_specific/service_layers/osunixdir.c
index edd99274cd12..5107892d054b 100644
--- a/tools/power/acpi/os_specific/service_layers/osunixdir.c
+++ b/tools/power/acpi/os_specific/service_layers/osunixdir.c
@@ -3,7 +3,7 @@
*
* Module Name: osunixdir - Unix directory access interfaces
*
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
*
*****************************************************************************/
diff --git a/tools/power/acpi/os_specific/service_layers/osunixmap.c b/tools/power/acpi/os_specific/service_layers/osunixmap.c
index fee0022560d5..6ff4edd8dc3b 100644
--- a/tools/power/acpi/os_specific/service_layers/osunixmap.c
+++ b/tools/power/acpi/os_specific/service_layers/osunixmap.c
@@ -3,7 +3,7 @@
*
* Module Name: osunixmap - Unix OSL for file mappings
*
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
*
*****************************************************************************/
diff --git a/tools/power/acpi/os_specific/service_layers/osunixxf.c b/tools/power/acpi/os_specific/service_layers/osunixxf.c
index 0861728da562..b3651a04d68c 100644
--- a/tools/power/acpi/os_specific/service_layers/osunixxf.c
+++ b/tools/power/acpi/os_specific/service_layers/osunixxf.c
@@ -3,7 +3,7 @@
*
* Module Name: osunixxf - UNIX OSL interfaces
*
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
*
*****************************************************************************/
diff --git a/tools/power/acpi/tools/acpidump/acpidump.h b/tools/power/acpi/tools/acpidump/acpidump.h
index e0ebc1dab1cc..153249c87fd7 100644
--- a/tools/power/acpi/tools/acpidump/acpidump.h
+++ b/tools/power/acpi/tools/acpidump/acpidump.h
@@ -3,7 +3,7 @@
*
* Module Name: acpidump.h - Include file for acpi_dump utility
*
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
*
*****************************************************************************/
diff --git a/tools/power/acpi/tools/acpidump/apdump.c b/tools/power/acpi/tools/acpidump/apdump.c
index 444e3d78bd89..d54dde02b87d 100644
--- a/tools/power/acpi/tools/acpidump/apdump.c
+++ b/tools/power/acpi/tools/acpidump/apdump.c
@@ -3,7 +3,7 @@
*
* Module Name: apdump - Dump routines for ACPI tables (acpidump)
*
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
*
*****************************************************************************/
diff --git a/tools/power/acpi/tools/acpidump/apfiles.c b/tools/power/acpi/tools/acpidump/apfiles.c
index da0c6e13042b..2d9b45a9b526 100644
--- a/tools/power/acpi/tools/acpidump/apfiles.c
+++ b/tools/power/acpi/tools/acpidump/apfiles.c
@@ -3,7 +3,7 @@
*
* Module Name: apfiles - File-related functions for acpidump utility
*
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
*
*****************************************************************************/
diff --git a/tools/power/acpi/tools/acpidump/apmain.c b/tools/power/acpi/tools/acpidump/apmain.c
index a4cf6042fcfd..44b23fc53dd9 100644
--- a/tools/power/acpi/tools/acpidump/apmain.c
+++ b/tools/power/acpi/tools/acpidump/apmain.c
@@ -3,7 +3,7 @@
*
* Module Name: apmain - Main module for the acpidump utility
*
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
*
*****************************************************************************/
diff --git a/tools/power/x86/intel-speed-select/Makefile b/tools/power/x86/intel-speed-select/Makefile
index 846f785e278d..7221f2f55e8b 100644
--- a/tools/power/x86/intel-speed-select/Makefile
+++ b/tools/power/x86/intel-speed-select/Makefile
@@ -42,7 +42,7 @@ ISST_IN := $(OUTPUT)intel-speed-select-in.o
$(ISST_IN): prepare FORCE
$(Q)$(MAKE) $(build)=intel-speed-select
$(OUTPUT)intel-speed-select: $(ISST_IN)
- $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
+ $(QUIET_LINK)$(CC) $(CFLAGS) $< $(LDFLAGS) -o $@
clean:
rm -f $(ALL_PROGRAMS)
diff --git a/tools/power/x86/intel-speed-select/hfi-events.c b/tools/power/x86/intel-speed-select/hfi-events.c
index e85676711372..761375062505 100644
--- a/tools/power/x86/intel-speed-select/hfi-events.c
+++ b/tools/power/x86/intel-speed-select/hfi-events.c
@@ -190,7 +190,7 @@ static int handle_event(struct nl_msg *n, void *arg)
struct genlmsghdr *genlhdr = genlmsg_hdr(nlh);
struct nlattr *attrs[THERMAL_GENL_ATTR_MAX + 1];
int ret;
- struct perf_cap perf_cap;
+ struct perf_cap perf_cap = {0};
ret = genlmsg_parse(nlh, 0, attrs, THERMAL_GENL_ATTR_MAX, NULL);
diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c
index 060390e88e37..9d35614995ee 100644
--- a/tools/power/x86/intel-speed-select/isst-config.c
+++ b/tools/power/x86/intel-speed-select/isst-config.c
@@ -1892,6 +1892,12 @@ static void set_fact_for_cpu(int cpu, void *arg1, void *arg2, void *arg3,
int ret;
int status = *(int *)arg4;
+ if (status && no_turbo()) {
+ isst_display_error_info_message(1, "Turbo mode is disabled", 0, 0);
+ ret = -1;
+ goto disp_results;
+ }
+
ret = isst_get_ctdp_levels(cpu, &pkg_dev);
if (ret) {
isst_display_error_info_message(1, "Failed to get number of levels", 0, 0);
diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile
index f3e3c94ab9bd..92e139b9c792 100644
--- a/tools/power/x86/turbostat/Makefile
+++ b/tools/power/x86/turbostat/Makefile
@@ -9,7 +9,7 @@ ifeq ("$(origin O)", "command line")
endif
turbostat : turbostat.c
-override CFLAGS += -O2 -Wall -I../../../include
+override CFLAGS += -O2 -Wall -Wextra -I../../../include
override CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"'
override CFLAGS += -DINTEL_FAMILY_HEADER='"../../../../arch/x86/include/asm/intel-family.h"'
override CFLAGS += -D_FILE_OFFSET_BITS=64
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index 9b17097bc3d7..1e7d3de55a94 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -292,7 +292,7 @@ starts a new interval.
must be run as root.
Alternatively, non-root users can be enabled to run turbostat this way:
-# setcap cap_sys_rawio=ep ./turbostat
+# setcap cap_sys_admin,cap_sys_rawio,cap_sys_nice=+ep ./turbostat
# chmod +r /dev/cpu/*/msr
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index bc5ae0872fed..ede31a4287a0 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -3,7 +3,7 @@
* turbostat -- show CPU frequency and C-state residency
* on modern Intel and AMD processors.
*
- * Copyright (c) 2021 Intel Corporation.
+ * Copyright (c) 2022 Intel Corporation.
* Len Brown <len.brown@intel.com>
*/
@@ -37,6 +37,171 @@
#include <asm/unistd.h>
#include <stdbool.h>
+#define UNUSED(x) (void)(x)
+
+/*
+ * This list matches the column headers, except
+ * 1. built-in only, the sysfs counters are not here -- we learn of those at run-time
+ * 2. Core and CPU are moved to the end, we can't have strings that contain them
+ * matching on them for --show and --hide.
+ */
+
+/*
+ * buffer size used by sscanf() for added column names
+ * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters
+ */
+#define NAME_BYTES 20
+#define PATH_BYTES 128
+
+enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE };
+enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC };
+enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT };
+
+struct msr_counter {
+ unsigned int msr_num;
+ char name[NAME_BYTES];
+ char path[PATH_BYTES];
+ unsigned int width;
+ enum counter_type type;
+ enum counter_format format;
+ struct msr_counter *next;
+ unsigned int flags;
+#define FLAGS_HIDE (1 << 0)
+#define FLAGS_SHOW (1 << 1)
+#define SYSFS_PERCPU (1 << 1)
+};
+
+struct msr_counter bic[] = {
+ { 0x0, "usec", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "Time_Of_Day_Seconds", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "Package", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "Node", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "Avg_MHz", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "Busy%", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "Bzy_MHz", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "TSC_MHz", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "IRQ", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL, 0 },
+ { 0x0, "sysfs", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "CPU%c1", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "CPU%c3", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "CPU%c6", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "CPU%c7", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "ThreadC", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "CoreTmp", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "CoreCnt", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "PkgTmp", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "GFX%rc6", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "GFXMHz", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "Pkg%pc2", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "Pkg%pc3", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "Pkg%pc6", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "Pkg%pc7", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "Pkg%pc8", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "Pkg%pc9", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "Pk%pc10", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "CPU%LPI", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "SYS%LPI", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "PkgWatt", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "CorWatt", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "GFXWatt", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "PkgCnt", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "RAMWatt", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "PKG_%", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "RAM_%", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "Pkg_J", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "Cor_J", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "GFX_J", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "RAM_J", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "Mod%c6", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "Totl%C0", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "Any%C0", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "GFX%C0", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "CPUGFX%", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "Core", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "CPU", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "APIC", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "X2APIC", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "Die", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "GFXAMHz", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "IPC", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "CoreThr", "", 0, 0, 0, NULL, 0 },
+};
+
+#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
+#define BIC_USEC (1ULL << 0)
+#define BIC_TOD (1ULL << 1)
+#define BIC_Package (1ULL << 2)
+#define BIC_Node (1ULL << 3)
+#define BIC_Avg_MHz (1ULL << 4)
+#define BIC_Busy (1ULL << 5)
+#define BIC_Bzy_MHz (1ULL << 6)
+#define BIC_TSC_MHz (1ULL << 7)
+#define BIC_IRQ (1ULL << 8)
+#define BIC_SMI (1ULL << 9)
+#define BIC_sysfs (1ULL << 10)
+#define BIC_CPU_c1 (1ULL << 11)
+#define BIC_CPU_c3 (1ULL << 12)
+#define BIC_CPU_c6 (1ULL << 13)
+#define BIC_CPU_c7 (1ULL << 14)
+#define BIC_ThreadC (1ULL << 15)
+#define BIC_CoreTmp (1ULL << 16)
+#define BIC_CoreCnt (1ULL << 17)
+#define BIC_PkgTmp (1ULL << 18)
+#define BIC_GFX_rc6 (1ULL << 19)
+#define BIC_GFXMHz (1ULL << 20)
+#define BIC_Pkgpc2 (1ULL << 21)
+#define BIC_Pkgpc3 (1ULL << 22)
+#define BIC_Pkgpc6 (1ULL << 23)
+#define BIC_Pkgpc7 (1ULL << 24)
+#define BIC_Pkgpc8 (1ULL << 25)
+#define BIC_Pkgpc9 (1ULL << 26)
+#define BIC_Pkgpc10 (1ULL << 27)
+#define BIC_CPU_LPI (1ULL << 28)
+#define BIC_SYS_LPI (1ULL << 29)
+#define BIC_PkgWatt (1ULL << 30)
+#define BIC_CorWatt (1ULL << 31)
+#define BIC_GFXWatt (1ULL << 32)
+#define BIC_PkgCnt (1ULL << 33)
+#define BIC_RAMWatt (1ULL << 34)
+#define BIC_PKG__ (1ULL << 35)
+#define BIC_RAM__ (1ULL << 36)
+#define BIC_Pkg_J (1ULL << 37)
+#define BIC_Cor_J (1ULL << 38)
+#define BIC_GFX_J (1ULL << 39)
+#define BIC_RAM_J (1ULL << 40)
+#define BIC_Mod_c6 (1ULL << 41)
+#define BIC_Totl_c0 (1ULL << 42)
+#define BIC_Any_c0 (1ULL << 43)
+#define BIC_GFX_c0 (1ULL << 44)
+#define BIC_CPUGFX (1ULL << 45)
+#define BIC_Core (1ULL << 46)
+#define BIC_CPU (1ULL << 47)
+#define BIC_APIC (1ULL << 48)
+#define BIC_X2APIC (1ULL << 49)
+#define BIC_Die (1ULL << 50)
+#define BIC_GFXACTMHz (1ULL << 51)
+#define BIC_IPC (1ULL << 52)
+#define BIC_CORE_THROT_CNT (1ULL << 53)
+
+#define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die )
+#define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__)
+#define BIC_FREQUENCY ( BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz )
+#define BIC_IDLE ( BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX)
+#define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC)
+
+#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
+
+unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT);
+unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC;
+
+#define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
+#define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME)
+#define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME)
+#define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
+#define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
+#define BIC_IS_ENABLED(COUNTER_BIT) (bic_enabled & COUNTER_BIT)
+
char *proc_stat = "/proc/stat";
FILE *outf;
int *fd_percpu;
@@ -48,6 +213,7 @@ struct timespec interval_ts = { 5, 0 };
unsigned int model_orig;
unsigned int num_iterations;
+unsigned int header_iterations;
unsigned int debug;
unsigned int quiet;
unsigned int shown;
@@ -159,13 +325,6 @@ int ignore_stdin;
#define MAX(a, b) ((a) > (b) ? (a) : (b))
-/*
- * buffer size used by sscanf() for added column names
- * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters
- */
-#define NAME_BYTES 20
-#define PATH_BYTES 128
-
int backwards_count;
char *progname;
@@ -205,6 +364,7 @@ struct core_data {
unsigned int core_temp_c;
unsigned int core_energy; /* MSR_CORE_ENERGY_STAT */
unsigned int core_id;
+ unsigned long long core_throt_cnt;
unsigned long long counter[MAX_ADDED_COUNTERS];
} *core_even, *core_odd;
@@ -255,24 +415,6 @@ struct pkg_data {
#define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)
-enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE };
-enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC };
-enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT };
-
-struct msr_counter {
- unsigned int msr_num;
- char name[NAME_BYTES];
- char path[PATH_BYTES];
- unsigned int width;
- enum counter_type type;
- enum counter_format format;
- struct msr_counter *next;
- unsigned int flags;
-#define FLAGS_HIDE (1 << 0)
-#define FLAGS_SHOW (1 << 1)
-#define SYSFS_PERCPU (1 << 1)
-};
-
/*
* The accumulated sum of MSR is defined as a monotonic
* increasing MSR, it will be accumulated periodically,
@@ -522,8 +664,10 @@ static int perf_instr_count_open(int cpu_num)
/* counter for cpu_num, including user + kernel and all processes */
fd = perf_event_open(&pea, -1, cpu_num, -1, 0);
- if (fd == -1)
- err(-1, "cpu%d: perf instruction counter\n", cpu_num);
+ if (fd == -1) {
+ warn("cpu%d: perf instruction counter", cpu_num);
+ BIC_NOT_PRESENT(BIC_IPC);
+ }
return fd;
}
@@ -550,143 +694,10 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
return 0;
}
-/*
- * This list matches the column headers, except
- * 1. built-in only, the sysfs counters are not here -- we learn of those at run-time
- * 2. Core and CPU are moved to the end, we can't have strings that contain them
- * matching on them for --show and --hide.
- */
-struct msr_counter bic[] = {
- { 0x0, "usec" },
- { 0x0, "Time_Of_Day_Seconds" },
- { 0x0, "Package" },
- { 0x0, "Node" },
- { 0x0, "Avg_MHz" },
- { 0x0, "Busy%" },
- { 0x0, "Bzy_MHz" },
- { 0x0, "TSC_MHz" },
- { 0x0, "IRQ" },
- { 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL },
- { 0x0, "sysfs" },
- { 0x0, "CPU%c1" },
- { 0x0, "CPU%c3" },
- { 0x0, "CPU%c6" },
- { 0x0, "CPU%c7" },
- { 0x0, "ThreadC" },
- { 0x0, "CoreTmp" },
- { 0x0, "CoreCnt" },
- { 0x0, "PkgTmp" },
- { 0x0, "GFX%rc6" },
- { 0x0, "GFXMHz" },
- { 0x0, "Pkg%pc2" },
- { 0x0, "Pkg%pc3" },
- { 0x0, "Pkg%pc6" },
- { 0x0, "Pkg%pc7" },
- { 0x0, "Pkg%pc8" },
- { 0x0, "Pkg%pc9" },
- { 0x0, "Pk%pc10" },
- { 0x0, "CPU%LPI" },
- { 0x0, "SYS%LPI" },
- { 0x0, "PkgWatt" },
- { 0x0, "CorWatt" },
- { 0x0, "GFXWatt" },
- { 0x0, "PkgCnt" },
- { 0x0, "RAMWatt" },
- { 0x0, "PKG_%" },
- { 0x0, "RAM_%" },
- { 0x0, "Pkg_J" },
- { 0x0, "Cor_J" },
- { 0x0, "GFX_J" },
- { 0x0, "RAM_J" },
- { 0x0, "Mod%c6" },
- { 0x0, "Totl%C0" },
- { 0x0, "Any%C0" },
- { 0x0, "GFX%C0" },
- { 0x0, "CPUGFX%" },
- { 0x0, "Core" },
- { 0x0, "CPU" },
- { 0x0, "APIC" },
- { 0x0, "X2APIC" },
- { 0x0, "Die" },
- { 0x0, "GFXAMHz" },
- { 0x0, "IPC" },
-};
-
-#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
-#define BIC_USEC (1ULL << 0)
-#define BIC_TOD (1ULL << 1)
-#define BIC_Package (1ULL << 2)
-#define BIC_Node (1ULL << 3)
-#define BIC_Avg_MHz (1ULL << 4)
-#define BIC_Busy (1ULL << 5)
-#define BIC_Bzy_MHz (1ULL << 6)
-#define BIC_TSC_MHz (1ULL << 7)
-#define BIC_IRQ (1ULL << 8)
-#define BIC_SMI (1ULL << 9)
-#define BIC_sysfs (1ULL << 10)
-#define BIC_CPU_c1 (1ULL << 11)
-#define BIC_CPU_c3 (1ULL << 12)
-#define BIC_CPU_c6 (1ULL << 13)
-#define BIC_CPU_c7 (1ULL << 14)
-#define BIC_ThreadC (1ULL << 15)
-#define BIC_CoreTmp (1ULL << 16)
-#define BIC_CoreCnt (1ULL << 17)
-#define BIC_PkgTmp (1ULL << 18)
-#define BIC_GFX_rc6 (1ULL << 19)
-#define BIC_GFXMHz (1ULL << 20)
-#define BIC_Pkgpc2 (1ULL << 21)
-#define BIC_Pkgpc3 (1ULL << 22)
-#define BIC_Pkgpc6 (1ULL << 23)
-#define BIC_Pkgpc7 (1ULL << 24)
-#define BIC_Pkgpc8 (1ULL << 25)
-#define BIC_Pkgpc9 (1ULL << 26)
-#define BIC_Pkgpc10 (1ULL << 27)
-#define BIC_CPU_LPI (1ULL << 28)
-#define BIC_SYS_LPI (1ULL << 29)
-#define BIC_PkgWatt (1ULL << 30)
-#define BIC_CorWatt (1ULL << 31)
-#define BIC_GFXWatt (1ULL << 32)
-#define BIC_PkgCnt (1ULL << 33)
-#define BIC_RAMWatt (1ULL << 34)
-#define BIC_PKG__ (1ULL << 35)
-#define BIC_RAM__ (1ULL << 36)
-#define BIC_Pkg_J (1ULL << 37)
-#define BIC_Cor_J (1ULL << 38)
-#define BIC_GFX_J (1ULL << 39)
-#define BIC_RAM_J (1ULL << 40)
-#define BIC_Mod_c6 (1ULL << 41)
-#define BIC_Totl_c0 (1ULL << 42)
-#define BIC_Any_c0 (1ULL << 43)
-#define BIC_GFX_c0 (1ULL << 44)
-#define BIC_CPUGFX (1ULL << 45)
-#define BIC_Core (1ULL << 46)
-#define BIC_CPU (1ULL << 47)
-#define BIC_APIC (1ULL << 48)
-#define BIC_X2APIC (1ULL << 49)
-#define BIC_Die (1ULL << 50)
-#define BIC_GFXACTMHz (1ULL << 51)
-#define BIC_IPC (1ULL << 52)
-
-#define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die )
-#define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__)
-#define BIC_FREQUENCY ( BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz )
-#define BIC_IDLE ( BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX)
-#define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC)
-
-#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
-
-unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT);
-unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC;
-
-#define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
-#define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME)
-#define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME)
-#define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
-#define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
-#define BIC_IS_ENABLED(COUNTER_BIT) (bic_enabled & COUNTER_BIT)
-
#define MAX_DEFERRED 16
+char *deferred_add_names[MAX_DEFERRED];
char *deferred_skip_names[MAX_DEFERRED];
+int deferred_add_index;
int deferred_skip_index;
/*
@@ -720,6 +731,8 @@ void help(void)
" -l, --list list column headers only\n"
" -n, --num_iterations num\n"
" number of the measurement iterations\n"
+ " -N, --header_iterations num\n"
+ " print header every num iterations\n"
" -o, --out file\n"
" create or truncate \"file\" for all output\n"
" -q, --quiet skip decoding system configuration header\n"
@@ -741,7 +754,7 @@ void help(void)
*/
unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode)
{
- int i;
+ unsigned int i;
unsigned long long retval = 0;
while (name_list) {
@@ -752,40 +765,51 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode)
if (comma)
*comma = '\0';
- if (!strcmp(name_list, "all"))
- return ~0;
- if (!strcmp(name_list, "topology"))
- return BIC_TOPOLOGY;
- if (!strcmp(name_list, "power"))
- return BIC_THERMAL_PWR;
- if (!strcmp(name_list, "idle"))
- return BIC_IDLE;
- if (!strcmp(name_list, "frequency"))
- return BIC_FREQUENCY;
- if (!strcmp(name_list, "other"))
- return BIC_OTHER;
- if (!strcmp(name_list, "all"))
- return 0;
-
for (i = 0; i < MAX_BIC; ++i) {
if (!strcmp(name_list, bic[i].name)) {
retval |= (1ULL << i);
break;
}
+ if (!strcmp(name_list, "all")) {
+ retval |= ~0;
+ break;
+ } else if (!strcmp(name_list, "topology")) {
+ retval |= BIC_TOPOLOGY;
+ break;
+ } else if (!strcmp(name_list, "power")) {
+ retval |= BIC_THERMAL_PWR;
+ break;
+ } else if (!strcmp(name_list, "idle")) {
+ retval |= BIC_IDLE;
+ break;
+ } else if (!strcmp(name_list, "frequency")) {
+ retval |= BIC_FREQUENCY;
+ break;
+ } else if (!strcmp(name_list, "other")) {
+ retval |= BIC_OTHER;
+ break;
+ }
+
}
if (i == MAX_BIC) {
if (mode == SHOW_LIST) {
- fprintf(stderr, "Invalid counter name: %s\n", name_list);
- exit(-1);
- }
- deferred_skip_names[deferred_skip_index++] = name_list;
- if (debug)
- fprintf(stderr, "deferred \"%s\"\n", name_list);
- if (deferred_skip_index >= MAX_DEFERRED) {
- fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n",
- MAX_DEFERRED, name_list);
- help();
- exit(1);
+ deferred_add_names[deferred_add_index++] = name_list;
+ if (deferred_add_index >= MAX_DEFERRED) {
+ fprintf(stderr, "More than max %d un-recognized --add options '%s'\n",
+ MAX_DEFERRED, name_list);
+ help();
+ exit(1);
+ }
+ } else {
+ deferred_skip_names[deferred_skip_index++] = name_list;
+ if (debug)
+ fprintf(stderr, "deferred \"%s\"\n", name_list);
+ if (deferred_skip_index >= MAX_DEFERRED) {
+ fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n",
+ MAX_DEFERRED, name_list);
+ help();
+ exit(1);
+ }
}
}
@@ -872,6 +896,9 @@ void print_header(char *delim)
if (DO_BIC(BIC_CoreTmp))
outp += sprintf(outp, "%sCoreTmp", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_CORE_THROT_CNT))
+ outp += sprintf(outp, "%sCoreThr", (printed++ ? delim : ""));
+
if (do_rapl && !rapl_joules) {
if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY))
outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
@@ -1011,6 +1038,7 @@ int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p
outp += sprintf(outp, "c6: %016llX\n", c->c6);
outp += sprintf(outp, "c7: %016llX\n", c->c7);
outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c);
+ outp += sprintf(outp, "cpu_throt_count: %016llX\n", c->core_throt_cnt);
outp += sprintf(outp, "Joules: %0X\n", c->core_energy);
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
@@ -1225,6 +1253,10 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
if (DO_BIC(BIC_CoreTmp))
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_temp_c);
+ /* Core throttle count */
+ if (DO_BIC(BIC_CORE_THROT_CNT))
+ outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->core_throt_cnt);
+
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW) {
if (mp->width == 32)
@@ -1311,6 +1343,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
if (DO_BIC(BIC_PkgWatt))
outp +=
sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float);
+
if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY))
outp +=
sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float);
@@ -1386,14 +1419,14 @@ void flush_output_stderr(void)
void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
- static int printed;
+ static int count;
- if (!printed || !summary_only)
+ if ((!count || (header_iterations && !(count % header_iterations))) || !summary_only)
print_header("\t");
format_counters(&average.threads, &average.cores, &average.packages);
- printed = 1;
+ count++;
if (summary_only)
return;
@@ -1467,6 +1500,7 @@ void delta_core(struct core_data *new, struct core_data *old)
old->c6 = new->c6 - old->c6;
old->c7 = new->c7 - old->c7;
old->core_temp_c = new->core_temp_c;
+ old->core_throt_cnt = new->core_throt_cnt;
old->mc6_us = new->mc6_us - old->mc6_us;
DELTA_WRAP32(new->core_energy, old->core_energy);
@@ -1626,6 +1660,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
c->mc6_us = 0;
c->core_temp_c = 0;
c->core_energy = 0;
+ c->core_throt_cnt = 0;
p->pkg_wtd_core_c0 = 0;
p->pkg_any_core_c0 = 0;
@@ -1710,6 +1745,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
average.cores.mc6_us += c->mc6_us;
average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
+ average.cores.core_throt_cnt = MAX(average.cores.core_throt_cnt, c->core_throt_cnt);
average.cores.core_energy += c->core_energy;
@@ -1987,6 +2023,26 @@ void get_apic_id(struct thread_data *t)
fprintf(outf, "cpu%d: BIOS BUG: apic 0x%x x2apic 0x%x\n", t->cpu_id, t->apic_id, t->x2apic_id);
}
+int get_core_throt_cnt(int cpu, unsigned long long *cnt)
+{
+ char path[128 + PATH_BYTES];
+ unsigned long long tmp;
+ FILE *fp;
+ int ret;
+
+ sprintf(path, "/sys/devices/system/cpu/cpu%d/thermal_throttle/core_throttle_count", cpu);
+ fp = fopen(path, "r");
+ if (!fp)
+ return -1;
+ ret = fscanf(fp, "%lld", &tmp);
+ if (ret != 1)
+ return -1;
+ fclose(fp);
+ *cnt = tmp;
+
+ return 0;
+}
+
/*
* get_counters(...)
* migrate to cpu
@@ -2129,6 +2185,9 @@ retry:
c->core_temp_c = tj_max - ((msr >> 16) & 0x7F);
}
+ if (DO_BIC(BIC_CORE_THROT_CNT))
+ get_core_throt_cnt(cpu, &c->core_throt_cnt);
+
if (do_rapl & RAPL_AMD_F17H) {
if (get_msr(cpu, MSR_CORE_ENERGY_STAT, &msr))
return -14;
@@ -2428,6 +2487,9 @@ int has_turbo_ratio_group_limits(int family, int model)
if (!genuine_intel)
return 0;
+ if (family != 6)
+ return 0;
+
switch (model) {
case INTEL_FAM6_ATOM_GOLDMONT:
case INTEL_FAM6_SKYLAKE_X:
@@ -2435,8 +2497,9 @@ int has_turbo_ratio_group_limits(int family, int model)
case INTEL_FAM6_ATOM_GOLDMONT_D:
case INTEL_FAM6_ATOM_TREMONT_D:
return 1;
+ default:
+ return 0;
}
- return 0;
}
static void dump_turbo_ratio_limits(int family, int model)
@@ -3027,6 +3090,8 @@ void set_max_cpu_num(void)
*/
int count_cpus(int cpu)
{
+ UNUSED(cpu);
+
topo.num_cpus++;
return 0;
}
@@ -3361,6 +3426,9 @@ static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg
int i, ret;
int cpu = t->cpu_id;
+ UNUSED(c);
+ UNUSED(p);
+
for (i = IDX_PKG_ENERGY; i < IDX_COUNT; i++) {
unsigned long long msr_cur, msr_last;
off_t offset;
@@ -3387,6 +3455,8 @@ static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg
static void msr_record_handler(union sigval v)
{
+ UNUSED(v);
+
for_all_cpus(update_msr_sum, EVEN_COUNTERS);
}
@@ -3439,6 +3509,9 @@ release_msr:
/*
* set_my_sched_priority(pri)
* return previous
+ *
+ * if non-root, do this:
+ * # /sbin/setcap cap_sys_rawio,cap_sys_nice=+ep /usr/bin/turbostat
*/
int set_my_sched_priority(int priority)
{
@@ -3457,7 +3530,7 @@ int set_my_sched_priority(int priority)
errno = 0;
retval = getpriority(PRIO_PROCESS, 0);
if (retval != priority)
- err(-1, "getpriority(%d) != setpriority(%d)", retval, priority);
+ err(retval, "getpriority(%d) != setpriority(%d)", retval, priority);
return original_priority;
}
@@ -3466,7 +3539,7 @@ void turbostat_loop()
{
int retval;
int restarted = 0;
- int done_iters = 0;
+ unsigned int done_iters = 0;
setup_signal_handler();
@@ -3678,6 +3751,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
break;
case INTEL_FAM6_ATOM_SILVERMONT: /* BYT */
no_MSR_MISC_PWR_MGMT = 1;
+ /* FALLTHRU */
case INTEL_FAM6_ATOM_SILVERMONT_D: /* AVN */
pkg_cstate_limits = slv_pkg_cstate_limits;
break;
@@ -3721,6 +3795,9 @@ int has_slv_msrs(unsigned int family, unsigned int model)
if (!genuine_intel)
return 0;
+ if (family != 6)
+ return 0;
+
switch (model) {
case INTEL_FAM6_ATOM_SILVERMONT:
case INTEL_FAM6_ATOM_SILVERMONT_MID:
@@ -3736,6 +3813,9 @@ int is_dnv(unsigned int family, unsigned int model)
if (!genuine_intel)
return 0;
+ if (family != 6)
+ return 0;
+
switch (model) {
case INTEL_FAM6_ATOM_GOLDMONT_D:
return 1;
@@ -3749,6 +3829,9 @@ int is_bdx(unsigned int family, unsigned int model)
if (!genuine_intel)
return 0;
+ if (family != 6)
+ return 0;
+
switch (model) {
case INTEL_FAM6_BROADWELL_X:
return 1;
@@ -3762,6 +3845,9 @@ int is_skx(unsigned int family, unsigned int model)
if (!genuine_intel)
return 0;
+ if (family != 6)
+ return 0;
+
switch (model) {
case INTEL_FAM6_SKYLAKE_X:
return 1;
@@ -3775,6 +3861,9 @@ int is_icx(unsigned int family, unsigned int model)
if (!genuine_intel)
return 0;
+ if (family != 6)
+ return 0;
+
switch (model) {
case INTEL_FAM6_ICELAKE_X:
return 1;
@@ -3787,6 +3876,9 @@ int is_ehl(unsigned int family, unsigned int model)
if (!genuine_intel)
return 0;
+ if (family != 6)
+ return 0;
+
switch (model) {
case INTEL_FAM6_ATOM_TREMONT:
return 1;
@@ -3799,6 +3891,9 @@ int is_jvl(unsigned int family, unsigned int model)
if (!genuine_intel)
return 0;
+ if (family != 6)
+ return 0;
+
switch (model) {
case INTEL_FAM6_ATOM_TREMONT_D:
return 1;
@@ -3811,6 +3906,9 @@ int has_turbo_ratio_limit(unsigned int family, unsigned int model)
if (has_slv_msrs(family, model))
return 0;
+ if (family != 6)
+ return 0;
+
switch (model) {
/* Nehalem compatible, but do not include turbo-ratio limit support */
case INTEL_FAM6_NEHALEM_EX: /* Nehalem-EX Xeon - Beckton */
@@ -4125,6 +4223,9 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
char *epb_string;
int cpu, epb;
+ UNUSED(c);
+ UNUSED(p);
+
if (!has_epb)
return 0;
@@ -4171,6 +4272,9 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
unsigned long long msr;
int cpu;
+ UNUSED(c);
+ UNUSED(p);
+
if (!has_hwp)
return 0;
@@ -4254,6 +4358,9 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
unsigned long long msr;
int cpu;
+ UNUSED(c);
+ UNUSED(p);
+
cpu = t->cpu_id;
/* per-package */
@@ -4359,6 +4466,8 @@ double get_tdp_intel(unsigned int model)
double get_tdp_amd(unsigned int family)
{
+ UNUSED(family);
+
/* This is the max stock TDP of HEDT/Server Fam17h+ chips */
return 280.0;
}
@@ -4376,6 +4485,7 @@ static double rapl_dram_energy_units_probe(int model, double rapl_energy_units)
case INTEL_FAM6_BROADWELL_X: /* BDX */
case INTEL_FAM6_SKYLAKE_X: /* SKX */
case INTEL_FAM6_XEON_PHI_KNL: /* KNL */
+ case INTEL_FAM6_ICELAKE_X: /* ICX */
return (rapl_dram_energy_units = 15.3 / 1000000);
default:
return (rapl_energy_units);
@@ -4559,6 +4669,8 @@ void rapl_probe_amd(unsigned int family, unsigned int model)
unsigned int has_rapl = 0;
double tdp;
+ UNUSED(model);
+
if (max_extended_level >= 0x80000007) {
__cpuid(0x80000007, eax, ebx, ecx, edx);
/* RAPL (Fam 17h+) */
@@ -4617,6 +4729,7 @@ void perf_limit_reasons_probe(unsigned int family, unsigned int model)
case INTEL_FAM6_HASWELL_L: /* HSW */
case INTEL_FAM6_HASWELL_G: /* HSW */
do_gfx_perf_limit_reasons = 1;
+ /* FALLTHRU */
case INTEL_FAM6_HASWELL_X: /* HSX */
do_core_perf_limit_reasons = 1;
do_ring_perf_limit_reasons = 1;
@@ -4643,6 +4756,9 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
unsigned int dts, dts2;
int cpu;
+ UNUSED(c);
+ UNUSED(p);
+
if (!(do_dts || do_ptm))
return 0;
@@ -4698,7 +4814,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
{
- fprintf(outf, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n",
+ fprintf(outf, "cpu%d: %s: %sabled (%0.3f Watts, %f sec, clamp %sabled)\n",
cpu, label,
((msr >> 15) & 1) ? "EN" : "DIS",
((msr >> 0) & 0x7FFF) * rapl_power_units,
@@ -4714,6 +4830,9 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
const char *msr_name;
int cpu;
+ UNUSED(c);
+ UNUSED(p);
+
if (!do_rapl)
return 0;
@@ -4762,12 +4881,19 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
cpu, msr, (msr >> 63) & 1 ? "" : "UN");
print_power_limit_msr(cpu, msr, "PKG Limit #1");
- fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n",
+ fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%0.3f Watts, %f* sec, clamp %sabled)\n",
cpu,
((msr >> 47) & 1) ? "EN" : "DIS",
((msr >> 32) & 0x7FFF) * rapl_power_units,
(1.0 + (((msr >> 54) & 0x3) / 4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units,
((msr >> 48) & 1) ? "EN" : "DIS");
+
+ if (get_msr(cpu, MSR_VR_CURRENT_CONFIG, &msr))
+ return -9;
+
+ fprintf(outf, "cpu%d: MSR_VR_CURRENT_CONFIG: 0x%08llx\n", cpu, msr);
+ fprintf(outf, "cpu%d: PKG Limit #4: %f Watts (%slocked)\n",
+ cpu, ((msr >> 0) & 0x1FFF) * rapl_power_units, (msr >> 31) & 1 ? "" : "UN");
}
if (do_rapl & RAPL_DRAM_POWER_INFO) {
@@ -4830,6 +4956,9 @@ int has_snb_msrs(unsigned int family, unsigned int model)
if (!genuine_intel)
return 0;
+ if (family != 6)
+ return 0;
+
switch (model) {
case INTEL_FAM6_SANDYBRIDGE:
case INTEL_FAM6_SANDYBRIDGE_X:
@@ -4873,6 +5002,9 @@ int has_c8910_msrs(unsigned int family, unsigned int model)
if (!genuine_intel)
return 0;
+ if (family != 6)
+ return 0;
+
switch (model) {
case INTEL_FAM6_HASWELL_L: /* HSW */
case INTEL_FAM6_BROADWELL: /* BDW */
@@ -4899,6 +5031,9 @@ int has_skl_msrs(unsigned int family, unsigned int model)
if (!genuine_intel)
return 0;
+ if (family != 6)
+ return 0;
+
switch (model) {
case INTEL_FAM6_SKYLAKE_L: /* SKL */
case INTEL_FAM6_CANNONLAKE_L: /* CNL */
@@ -4911,6 +5046,10 @@ int is_slm(unsigned int family, unsigned int model)
{
if (!genuine_intel)
return 0;
+
+ if (family != 6)
+ return 0;
+
switch (model) {
case INTEL_FAM6_ATOM_SILVERMONT: /* BYT */
case INTEL_FAM6_ATOM_SILVERMONT_D: /* AVN */
@@ -4923,6 +5062,10 @@ int is_knl(unsigned int family, unsigned int model)
{
if (!genuine_intel)
return 0;
+
+ if (family != 6)
+ return 0;
+
switch (model) {
case INTEL_FAM6_XEON_PHI_KNL: /* KNL */
return 1;
@@ -4935,6 +5078,9 @@ int is_cnl(unsigned int family, unsigned int model)
if (!genuine_intel)
return 0;
+ if (family != 6)
+ return 0;
+
switch (model) {
case INTEL_FAM6_CANNONLAKE_L: /* CNL */
return 1;
@@ -4989,6 +5135,9 @@ int get_cpu_type(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
unsigned int eax, ebx, ecx, edx;
+ UNUSED(c);
+ UNUSED(p);
+
if (!genuine_intel)
return 0;
@@ -5025,6 +5174,9 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
unsigned int tcc_default, tcc_offset;
int cpu;
+ UNUSED(c);
+ UNUSED(p);
+
/* tj_max is used only for dts or ptm */
if (!(do_dts || do_ptm))
return 0;
@@ -5572,6 +5724,11 @@ void process_cpuid()
else
BIC_NOT_PRESENT(BIC_CPU_LPI);
+ if (!access("/sys/devices/system/cpu/cpu0/thermal_throttle/core_throttle_count", R_OK))
+ BIC_PRESENT(BIC_CORE_THROT_CNT);
+ else
+ BIC_NOT_PRESENT(BIC_CORE_THROT_CNT);
+
if (!access(sys_lpi_file_sysfs, R_OK)) {
sys_lpi_file = sys_lpi_file_sysfs;
BIC_PRESENT(BIC_SYS_LPI);
@@ -5601,11 +5758,6 @@ int dir_filter(const struct dirent *dirp)
return 0;
}
-int open_dev_cpu_msr(int dummy1)
-{
- return 0;
-}
-
void topology_probe()
{
int i;
@@ -5896,6 +6048,9 @@ void turbostat_init()
if (!quiet && do_irtl_snb)
print_irtl();
+
+ if (DO_BIC(BIC_IPC))
+ (void)get_instr_count_fd(base_cpu);
}
int fork_it(char **argv)
@@ -5973,7 +6128,7 @@ int get_and_dump_counters(void)
void print_version()
{
- fprintf(outf, "turbostat version 21.05.04" " - Len Brown <lenb@kernel.org>\n");
+ fprintf(outf, "turbostat version 2022.04.16 - Len Brown <lenb@kernel.org>\n");
}
int add_counter(unsigned int msr_num, char *path, char *name,
@@ -6138,6 +6293,16 @@ next:
}
}
+int is_deferred_add(char *name)
+{
+ int i;
+
+ for (i = 0; i < deferred_add_index; ++i)
+ if (!strcmp(name, deferred_add_names[i]))
+ return 1;
+ return 0;
+}
+
int is_deferred_skip(char *name)
{
int i;
@@ -6156,9 +6321,6 @@ void probe_sysfs(void)
int state;
char *sp;
- if (!DO_BIC(BIC_sysfs))
- return;
-
for (state = 10; state >= 0; --state) {
sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state);
@@ -6181,6 +6343,9 @@ void probe_sysfs(void)
sprintf(path, "cpuidle/state%d/time", state);
+ if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf))
+ continue;
+
if (is_deferred_skip(name_buf))
continue;
@@ -6206,6 +6371,9 @@ void probe_sysfs(void)
sprintf(path, "cpuidle/state%d/usage", state);
+ if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf))
+ continue;
+
if (is_deferred_skip(name_buf))
continue;
@@ -6313,6 +6481,7 @@ void cmdline(int argc, char **argv)
{ "interval", required_argument, 0, 'i' },
{ "IPC", no_argument, 0, 'I' },
{ "num_iterations", required_argument, 0, 'n' },
+ { "header_iterations", required_argument, 0, 'N' },
{ "help", no_argument, 0, 'h' },
{ "hide", required_argument, 0, 'H' }, // meh, -h taken by --help
{ "Joules", no_argument, 0, 'J' },
@@ -6394,6 +6563,14 @@ void cmdline(int argc, char **argv)
exit(2);
}
break;
+ case 'N':
+ header_iterations = strtod(optarg, NULL);
+
+ if (header_iterations <= 0) {
+ fprintf(outf, "iterations %d should be positive number\n", header_iterations);
+ exit(2);
+ }
+ break;
case 's':
/*
* --show: show only those specified
@@ -6432,6 +6609,8 @@ int main(int argc, char **argv)
turbostat_init();
+ msr_sum_record();
+
/* dump counters and exit */
if (dump_only)
return get_and_dump_counters();
@@ -6443,7 +6622,6 @@ int main(int argc, char **argv)
return 0;
}
- msr_sum_record();
/*
* if any params left, it must be a command to fork
*/
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 65dbdda3a054..1da76ccde448 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -1842,7 +1842,7 @@ static int nfit_test_dimm_init(struct nfit_test *t)
return 0;
}
-static void security_init(struct nfit_test *t)
+static void nfit_security_init(struct nfit_test *t)
{
int i;
@@ -1938,7 +1938,7 @@ static int nfit_test0_alloc(struct nfit_test *t)
if (nfit_test_dimm_init(t))
return -ENOMEM;
smart_init(t);
- security_init(t);
+ nfit_security_init(t);
return ars_state_init(&t->pdev.dev, &t->ars_state);
}
diff --git a/tools/testing/radix-tree/linux.c b/tools/testing/radix-tree/linux.c
index 81539f543954..d5c1bcba86fe 100644
--- a/tools/testing/radix-tree/linux.c
+++ b/tools/testing/radix-tree/linux.c
@@ -25,7 +25,8 @@ struct kmem_cache {
void (*ctor)(void *);
};
-void *kmem_cache_alloc(struct kmem_cache *cachep, int gfp)
+void *kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru,
+ int gfp)
{
void *p;
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 2319ec87f53d..0aedcd76cf0f 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -9,6 +9,7 @@ TARGETS += clone3
TARGETS += core
TARGETS += cpufreq
TARGETS += cpu-hotplug
+TARGETS += damon
TARGETS += drivers/dma-buf
TARGETS += efivarfs
TARGETS += exec
@@ -52,6 +53,7 @@ TARGETS += proc
TARGETS += pstore
TARGETS += ptrace
TARGETS += openat2
+TARGETS += resctrl
TARGETS += rlimits
TARGETS += rseq
TARGETS += rtc
diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile
index 1e8d9a8f59df..9460cbe81bcc 100644
--- a/tools/testing/selftests/arm64/Makefile
+++ b/tools/testing/selftests/arm64/Makefile
@@ -17,16 +17,7 @@ top_srcdir = $(realpath ../../../../)
# Additional include paths needed by kselftest.h and local headers
CFLAGS += -I$(top_srcdir)/tools/testing/selftests/
-# Guessing where the Kernel headers could have been installed
-# depending on ENV config
-ifeq ($(KBUILD_OUTPUT),)
-khdr_dir = $(top_srcdir)/usr/include
-else
-# the KSFT preferred location when KBUILD_OUTPUT is set
-khdr_dir = $(KBUILD_OUTPUT)/kselftest/usr/include
-endif
-
-CFLAGS += -I$(khdr_dir)
+CFLAGS += $(KHDR_INCLUDES)
export CFLAGS
export top_srcdir
diff --git a/tools/testing/selftests/arm64/abi/.gitignore b/tools/testing/selftests/arm64/abi/.gitignore
index b79cf5814c23..b9e54417250d 100644
--- a/tools/testing/selftests/arm64/abi/.gitignore
+++ b/tools/testing/selftests/arm64/abi/.gitignore
@@ -1 +1,2 @@
syscall-abi
+tpidr2
diff --git a/tools/testing/selftests/arm64/abi/Makefile b/tools/testing/selftests/arm64/abi/Makefile
index 96eba974ac8d..c8d7f2495eb2 100644
--- a/tools/testing/selftests/arm64/abi/Makefile
+++ b/tools/testing/selftests/arm64/abi/Makefile
@@ -1,8 +1,15 @@
# SPDX-License-Identifier: GPL-2.0
# Copyright (C) 2021 ARM Limited
-TEST_GEN_PROGS := syscall-abi
+TEST_GEN_PROGS := syscall-abi tpidr2
include ../../lib.mk
$(OUTPUT)/syscall-abi: syscall-abi.c syscall-abi-asm.S
+
+# Build with nolibc since TPIDR2 is intended to be actively managed by
+# libc and we're trying to test the functionality that it depends on here.
+$(OUTPUT)/tpidr2: tpidr2.c
+ $(CC) -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \
+ -static -include ../../../../include/nolibc/nolibc.h \
+ -ffreestanding -Wall $^ -o $@ -lgcc
diff --git a/tools/testing/selftests/arm64/abi/syscall-abi-asm.S b/tools/testing/selftests/arm64/abi/syscall-abi-asm.S
index 983467cfcee0..b523c21c2278 100644
--- a/tools/testing/selftests/arm64/abi/syscall-abi-asm.S
+++ b/tools/testing/selftests/arm64/abi/syscall-abi-asm.S
@@ -9,15 +9,42 @@
// invoked is configured in x8 of the input GPR data.
//
// x0: SVE VL, 0 for FP only
+// x1: SME VL
//
// GPRs: gpr_in, gpr_out
// FPRs: fpr_in, fpr_out
// Zn: z_in, z_out
// Pn: p_in, p_out
// FFR: ffr_in, ffr_out
+// ZA: za_in, za_out
+// SVCR: svcr_in, svcr_out
+
+#include "syscall-abi.h"
.arch_extension sve
+/*
+ * LDR (vector to ZA array):
+ * LDR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
+ */
+.macro _ldr_za nw, nxbase, offset=0
+ .inst 0xe1000000 \
+ | (((\nw) & 3) << 13) \
+ | ((\nxbase) << 5) \
+ | ((\offset) & 7)
+.endm
+
+/*
+ * STR (vector from ZA array):
+ * STR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
+ */
+.macro _str_za nw, nxbase, offset=0
+ .inst 0xe1200000 \
+ | (((\nw) & 3) << 13) \
+ | ((\nxbase) << 5) \
+ | ((\offset) & 7)
+.endm
+
.globl do_syscall
do_syscall:
// Store callee saved registers x19-x29 (80 bytes) plus x0 and x1
@@ -30,6 +57,24 @@ do_syscall:
stp x25, x26, [sp, #80]
stp x27, x28, [sp, #96]
+ // Set SVCR if we're doing SME
+ cbz x1, 1f
+ adrp x2, svcr_in
+ ldr x2, [x2, :lo12:svcr_in]
+ msr S3_3_C4_C2_2, x2
+1:
+
+ // Load ZA if it's enabled - uses x12 as scratch due to SME LDR
+ tbz x2, #SVCR_ZA_SHIFT, 1f
+ mov w12, #0
+ ldr x2, =za_in
+2: _ldr_za 12, 2
+ add x2, x2, x1
+ add x12, x12, #1
+ cmp x1, x12
+ bne 2b
+1:
+
// Load GPRs x8-x28, and save our SP/FP for later comparison
ldr x2, =gpr_in
add x2, x2, #64
@@ -68,7 +113,7 @@ do_syscall:
ldp q30, q31, [x2, #16 * 30]
1:
- // Load the SVE registers if we're doing SVE
+ // Load the SVE registers if we're doing SVE/SME
cbz x0, 1f
ldr x2, =z_in
@@ -105,9 +150,14 @@ do_syscall:
ldr z30, [x2, #30, MUL VL]
ldr z31, [x2, #31, MUL VL]
+ // Only set a non-zero FFR, test patterns must be zero since the
+ // syscall should clear it - this lets us handle FA64.
ldr x2, =ffr_in
ldr p0, [x2, #0]
+ ldr x2, [x2, #0]
+ cbz x2, 2f
wrffr p0.b
+2:
ldr x2, =p_in
ldr p0, [x2, #0, MUL VL]
@@ -169,6 +219,24 @@ do_syscall:
stp q28, q29, [x2, #16 * 28]
stp q30, q31, [x2, #16 * 30]
+ // Save SVCR if we're doing SME
+ cbz x1, 1f
+ mrs x2, S3_3_C4_C2_2
+ adrp x3, svcr_out
+ str x2, [x3, :lo12:svcr_out]
+1:
+
+ // Save ZA if it's enabled - uses x12 as scratch due to SME STR
+ tbz x2, #SVCR_ZA_SHIFT, 1f
+ mov w12, #0
+ ldr x2, =za_out
+2: _str_za 12, 2
+ add x2, x2, x1
+ add x12, x12, #1
+ cmp x1, x12
+ bne 2b
+1:
+
// Save the SVE state if we have some
cbz x0, 1f
@@ -224,6 +292,10 @@ do_syscall:
str p14, [x2, #14, MUL VL]
str p15, [x2, #15, MUL VL]
+ // Only save FFR if we wrote a value for SME
+ ldr x2, =ffr_in
+ ldr x2, [x2, #0]
+ cbz x2, 1f
ldr x2, =ffr_out
rdffr p0.b
str p0, [x2, #0]
@@ -237,4 +309,9 @@ do_syscall:
ldp x27, x28, [sp, #96]
ldp x29, x30, [sp], #112
+ // Clear SVCR if we were doing SME so future tests don't have ZA
+ cbz x1, 1f
+ msr S3_3_C4_C2_2, xzr
+1:
+
ret
diff --git a/tools/testing/selftests/arm64/abi/syscall-abi.c b/tools/testing/selftests/arm64/abi/syscall-abi.c
index 1e13b7523918..b632bfe9e022 100644
--- a/tools/testing/selftests/arm64/abi/syscall-abi.c
+++ b/tools/testing/selftests/arm64/abi/syscall-abi.c
@@ -18,9 +18,13 @@
#include "../../kselftest.h"
+#include "syscall-abi.h"
+
#define NUM_VL ((SVE_VQ_MAX - SVE_VQ_MIN) + 1)
-extern void do_syscall(int sve_vl);
+static int default_sme_vl;
+
+extern void do_syscall(int sve_vl, int sme_vl);
static void fill_random(void *buf, size_t size)
{
@@ -48,14 +52,15 @@ static struct syscall_cfg {
uint64_t gpr_in[NUM_GPR];
uint64_t gpr_out[NUM_GPR];
-static void setup_gpr(struct syscall_cfg *cfg, int sve_vl)
+static void setup_gpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr)
{
fill_random(gpr_in, sizeof(gpr_in));
gpr_in[8] = cfg->syscall_nr;
memset(gpr_out, 0, sizeof(gpr_out));
}
-static int check_gpr(struct syscall_cfg *cfg, int sve_vl)
+static int check_gpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, uint64_t svcr)
{
int errors = 0;
int i;
@@ -79,13 +84,15 @@ static int check_gpr(struct syscall_cfg *cfg, int sve_vl)
uint64_t fpr_in[NUM_FPR * 2];
uint64_t fpr_out[NUM_FPR * 2];
-static void setup_fpr(struct syscall_cfg *cfg, int sve_vl)
+static void setup_fpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr)
{
fill_random(fpr_in, sizeof(fpr_in));
memset(fpr_out, 0, sizeof(fpr_out));
}
-static int check_fpr(struct syscall_cfg *cfg, int sve_vl)
+static int check_fpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr)
{
int errors = 0;
int i;
@@ -109,13 +116,15 @@ static uint8_t z_zero[__SVE_ZREG_SIZE(SVE_VQ_MAX)];
uint8_t z_in[SVE_NUM_PREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)];
uint8_t z_out[SVE_NUM_PREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)];
-static void setup_z(struct syscall_cfg *cfg, int sve_vl)
+static void setup_z(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr)
{
fill_random(z_in, sizeof(z_in));
fill_random(z_out, sizeof(z_out));
}
-static int check_z(struct syscall_cfg *cfg, int sve_vl)
+static int check_z(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr)
{
size_t reg_size = sve_vl;
int errors = 0;
@@ -126,13 +135,17 @@ static int check_z(struct syscall_cfg *cfg, int sve_vl)
/*
* After a syscall the low 128 bits of the Z registers should
- * be preserved and the rest be zeroed or preserved.
+ * be preserved and the rest be zeroed or preserved, except if
+ * we were in streaming mode in which case the low 128 bits may
+ * also be cleared by the transition out of streaming mode.
*/
for (i = 0; i < SVE_NUM_ZREGS; i++) {
void *in = &z_in[reg_size * i];
void *out = &z_out[reg_size * i];
- if (memcmp(in, out, SVE_VQ_BYTES) != 0) {
+ if ((memcmp(in, out, SVE_VQ_BYTES) != 0) &&
+ !((svcr & SVCR_SM_MASK) &&
+ memcmp(z_zero, out, SVE_VQ_BYTES) == 0)) {
ksft_print_msg("%s SVE VL %d Z%d low 128 bits changed\n",
cfg->name, sve_vl, i);
errors++;
@@ -145,13 +158,15 @@ static int check_z(struct syscall_cfg *cfg, int sve_vl)
uint8_t p_in[SVE_NUM_PREGS * __SVE_PREG_SIZE(SVE_VQ_MAX)];
uint8_t p_out[SVE_NUM_PREGS * __SVE_PREG_SIZE(SVE_VQ_MAX)];
-static void setup_p(struct syscall_cfg *cfg, int sve_vl)
+static void setup_p(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr)
{
fill_random(p_in, sizeof(p_in));
fill_random(p_out, sizeof(p_out));
}
-static int check_p(struct syscall_cfg *cfg, int sve_vl)
+static int check_p(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr)
{
size_t reg_size = sve_vq_from_vl(sve_vl) * 2; /* 1 bit per VL byte */
@@ -175,9 +190,20 @@ static int check_p(struct syscall_cfg *cfg, int sve_vl)
uint8_t ffr_in[__SVE_PREG_SIZE(SVE_VQ_MAX)];
uint8_t ffr_out[__SVE_PREG_SIZE(SVE_VQ_MAX)];
-static void setup_ffr(struct syscall_cfg *cfg, int sve_vl)
+static void setup_ffr(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr)
{
/*
+ * If we are in streaming mode and do not have FA64 then FFR
+ * is unavailable.
+ */
+ if ((svcr & SVCR_SM_MASK) &&
+ !(getauxval(AT_HWCAP2) & HWCAP2_SME_FA64)) {
+ memset(&ffr_in, 0, sizeof(ffr_in));
+ return;
+ }
+
+ /*
* It is only valid to set a contiguous set of bits starting
* at 0. For now since we're expecting this to be cleared by
* a syscall just set all bits.
@@ -186,7 +212,8 @@ static void setup_ffr(struct syscall_cfg *cfg, int sve_vl)
fill_random(ffr_out, sizeof(ffr_out));
}
-static int check_ffr(struct syscall_cfg *cfg, int sve_vl)
+static int check_ffr(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr)
{
size_t reg_size = sve_vq_from_vl(sve_vl) * 2; /* 1 bit per VL byte */
int errors = 0;
@@ -195,6 +222,10 @@ static int check_ffr(struct syscall_cfg *cfg, int sve_vl)
if (!sve_vl)
return 0;
+ if ((svcr & SVCR_SM_MASK) &&
+ !(getauxval(AT_HWCAP2) & HWCAP2_SME_FA64))
+ return 0;
+
/* After a syscall the P registers should be preserved or zeroed */
for (i = 0; i < reg_size; i++)
if (ffr_out[i] && (ffr_in[i] != ffr_out[i]))
@@ -206,8 +237,65 @@ static int check_ffr(struct syscall_cfg *cfg, int sve_vl)
return errors;
}
-typedef void (*setup_fn)(struct syscall_cfg *cfg, int sve_vl);
-typedef int (*check_fn)(struct syscall_cfg *cfg, int sve_vl);
+uint64_t svcr_in, svcr_out;
+
+static void setup_svcr(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr)
+{
+ svcr_in = svcr;
+}
+
+static int check_svcr(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr)
+{
+ int errors = 0;
+
+ if (svcr_out & SVCR_SM_MASK) {
+ ksft_print_msg("%s Still in SM, SVCR %llx\n",
+ cfg->name, svcr_out);
+ errors++;
+ }
+
+ if ((svcr_in & SVCR_ZA_MASK) != (svcr_out & SVCR_ZA_MASK)) {
+ ksft_print_msg("%s PSTATE.ZA changed, SVCR %llx != %llx\n",
+ cfg->name, svcr_in, svcr_out);
+ errors++;
+ }
+
+ return errors;
+}
+
+uint8_t za_in[SVE_NUM_PREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)];
+uint8_t za_out[SVE_NUM_PREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)];
+
+static void setup_za(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr)
+{
+ fill_random(za_in, sizeof(za_in));
+ memset(za_out, 0, sizeof(za_out));
+}
+
+static int check_za(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr)
+{
+ size_t reg_size = sme_vl * sme_vl;
+ int errors = 0;
+
+ if (!(svcr & SVCR_ZA_MASK))
+ return 0;
+
+ if (memcmp(za_in, za_out, reg_size) != 0) {
+ ksft_print_msg("SME VL %d ZA does not match\n", sme_vl);
+ errors++;
+ }
+
+ return errors;
+}
+
+typedef void (*setup_fn)(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr);
+typedef int (*check_fn)(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr);
/*
* Each set of registers has a setup function which is called before
@@ -225,20 +313,23 @@ static struct {
{ setup_z, check_z },
{ setup_p, check_p },
{ setup_ffr, check_ffr },
+ { setup_svcr, check_svcr },
+ { setup_za, check_za },
};
-static bool do_test(struct syscall_cfg *cfg, int sve_vl)
+static bool do_test(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr)
{
int errors = 0;
int i;
for (i = 0; i < ARRAY_SIZE(regset); i++)
- regset[i].setup(cfg, sve_vl);
+ regset[i].setup(cfg, sve_vl, sme_vl, svcr);
- do_syscall(sve_vl);
+ do_syscall(sve_vl, sme_vl);
for (i = 0; i < ARRAY_SIZE(regset); i++)
- errors += regset[i].check(cfg, sve_vl);
+ errors += regset[i].check(cfg, sve_vl, sme_vl, svcr);
return errors == 0;
}
@@ -246,9 +337,10 @@ static bool do_test(struct syscall_cfg *cfg, int sve_vl)
static void test_one_syscall(struct syscall_cfg *cfg)
{
int sve_vq, sve_vl;
+ int sme_vq, sme_vl;
/* FPSIMD only case */
- ksft_test_result(do_test(cfg, 0),
+ ksft_test_result(do_test(cfg, 0, default_sme_vl, 0),
"%s FPSIMD\n", cfg->name);
if (!(getauxval(AT_HWCAP) & HWCAP_SVE))
@@ -265,8 +357,36 @@ static void test_one_syscall(struct syscall_cfg *cfg)
if (sve_vq != sve_vq_from_vl(sve_vl))
sve_vq = sve_vq_from_vl(sve_vl);
- ksft_test_result(do_test(cfg, sve_vl),
+ ksft_test_result(do_test(cfg, sve_vl, default_sme_vl, 0),
"%s SVE VL %d\n", cfg->name, sve_vl);
+
+ if (!(getauxval(AT_HWCAP2) & HWCAP2_SME))
+ continue;
+
+ for (sme_vq = SVE_VQ_MAX; sme_vq > 0; --sme_vq) {
+ sme_vl = prctl(PR_SME_SET_VL, sme_vq * 16);
+ if (sme_vl == -1)
+ ksft_exit_fail_msg("PR_SME_SET_VL failed: %s (%d)\n",
+ strerror(errno), errno);
+
+ sme_vl &= PR_SME_VL_LEN_MASK;
+
+ if (sme_vq != sve_vq_from_vl(sme_vl))
+ sme_vq = sve_vq_from_vl(sme_vl);
+
+ ksft_test_result(do_test(cfg, sve_vl, sme_vl,
+ SVCR_ZA_MASK | SVCR_SM_MASK),
+ "%s SVE VL %d/SME VL %d SM+ZA\n",
+ cfg->name, sve_vl, sme_vl);
+ ksft_test_result(do_test(cfg, sve_vl, sme_vl,
+ SVCR_SM_MASK),
+ "%s SVE VL %d/SME VL %d SM\n",
+ cfg->name, sve_vl, sme_vl);
+ ksft_test_result(do_test(cfg, sve_vl, sme_vl,
+ SVCR_ZA_MASK),
+ "%s SVE VL %d/SME VL %d ZA\n",
+ cfg->name, sve_vl, sme_vl);
+ }
}
}
@@ -299,14 +419,54 @@ int sve_count_vls(void)
return vl_count;
}
+int sme_count_vls(void)
+{
+ unsigned int vq;
+ int vl_count = 0;
+ int vl;
+
+ if (!(getauxval(AT_HWCAP2) & HWCAP2_SME))
+ return 0;
+
+ /* Ensure we configure a SME VL, used to flag if SVCR is set */
+ default_sme_vl = 16;
+
+ /*
+ * Enumerate up to SVE_VQ_MAX vector lengths
+ */
+ for (vq = SVE_VQ_MAX; vq > 0; --vq) {
+ vl = prctl(PR_SME_SET_VL, vq * 16);
+ if (vl == -1)
+ ksft_exit_fail_msg("PR_SME_SET_VL failed: %s (%d)\n",
+ strerror(errno), errno);
+
+ vl &= PR_SME_VL_LEN_MASK;
+
+ if (vq != sve_vq_from_vl(vl))
+ vq = sve_vq_from_vl(vl);
+
+ vl_count++;
+ }
+
+ return vl_count;
+}
+
int main(void)
{
int i;
+ int tests = 1; /* FPSIMD */
srandom(getpid());
ksft_print_header();
- ksft_set_plan(ARRAY_SIZE(syscalls) * (sve_count_vls() + 1));
+ tests += sve_count_vls();
+ tests += (sve_count_vls() * sme_count_vls()) * 3;
+ ksft_set_plan(ARRAY_SIZE(syscalls) * tests);
+
+ if (getauxval(AT_HWCAP2) & HWCAP2_SME_FA64)
+ ksft_print_msg("SME with FA64\n");
+ else if (getauxval(AT_HWCAP2) & HWCAP2_SME)
+ ksft_print_msg("SME without FA64\n");
for (i = 0; i < ARRAY_SIZE(syscalls); i++)
test_one_syscall(&syscalls[i]);
diff --git a/tools/testing/selftests/arm64/abi/syscall-abi.h b/tools/testing/selftests/arm64/abi/syscall-abi.h
new file mode 100644
index 000000000000..bda5a87ad381
--- /dev/null
+++ b/tools/testing/selftests/arm64/abi/syscall-abi.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2021 ARM Limited.
+ */
+
+#ifndef SYSCALL_ABI_H
+#define SYSCALL_ABI_H
+
+#define SVCR_ZA_MASK 2
+#define SVCR_SM_MASK 1
+
+#define SVCR_ZA_SHIFT 1
+#define SVCR_SM_SHIFT 0
+
+#endif
diff --git a/tools/testing/selftests/arm64/abi/tpidr2.c b/tools/testing/selftests/arm64/abi/tpidr2.c
new file mode 100644
index 000000000000..351a098b503a
--- /dev/null
+++ b/tools/testing/selftests/arm64/abi/tpidr2.c
@@ -0,0 +1,298 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/sched.h>
+#include <linux/wait.h>
+
+#define SYS_TPIDR2 "S3_3_C13_C0_5"
+
+#define EXPECTED_TESTS 5
+
+static void putstr(const char *str)
+{
+ write(1, str, strlen(str));
+}
+
+static void putnum(unsigned int num)
+{
+ char c;
+
+ if (num / 10)
+ putnum(num / 10);
+
+ c = '0' + (num % 10);
+ write(1, &c, 1);
+}
+
+static int tests_run;
+static int tests_passed;
+static int tests_failed;
+static int tests_skipped;
+
+static void set_tpidr2(uint64_t val)
+{
+ asm volatile (
+ "msr " SYS_TPIDR2 ", %0\n"
+ :
+ : "r"(val)
+ : "cc");
+}
+
+static uint64_t get_tpidr2(void)
+{
+ uint64_t val;
+
+ asm volatile (
+ "mrs %0, " SYS_TPIDR2 "\n"
+ : "=r"(val)
+ :
+ : "cc");
+
+ return val;
+}
+
+static void print_summary(void)
+{
+ if (tests_passed + tests_failed + tests_skipped != EXPECTED_TESTS)
+ putstr("# UNEXPECTED TEST COUNT: ");
+
+ putstr("# Totals: pass:");
+ putnum(tests_passed);
+ putstr(" fail:");
+ putnum(tests_failed);
+ putstr(" xfail:0 xpass:0 skip:");
+ putnum(tests_skipped);
+ putstr(" error:0\n");
+}
+
+/* Processes should start with TPIDR2 == 0 */
+static int default_value(void)
+{
+ return get_tpidr2() == 0;
+}
+
+/* If we set TPIDR2 we should read that value */
+static int write_read(void)
+{
+ set_tpidr2(getpid());
+
+ return getpid() == get_tpidr2();
+}
+
+/* If we set a value we should read the same value after scheduling out */
+static int write_sleep_read(void)
+{
+ set_tpidr2(getpid());
+
+ msleep(100);
+
+ return getpid() == get_tpidr2();
+}
+
+/*
+ * If we fork the value in the parent should be unchanged and the
+ * child should start with the same value and be able to set its own
+ * value.
+ */
+static int write_fork_read(void)
+{
+ pid_t newpid, waiting, oldpid;
+ int status;
+
+ set_tpidr2(getpid());
+
+ oldpid = getpid();
+ newpid = fork();
+ if (newpid == 0) {
+ /* In child */
+ if (get_tpidr2() != oldpid) {
+ putstr("# TPIDR2 changed in child: ");
+ putnum(get_tpidr2());
+ putstr("\n");
+ exit(0);
+ }
+
+ set_tpidr2(getpid());
+ if (get_tpidr2() == getpid()) {
+ exit(1);
+ } else {
+ putstr("# Failed to set TPIDR2 in child\n");
+ exit(0);
+ }
+ }
+ if (newpid < 0) {
+ putstr("# fork() failed: -");
+ putnum(-newpid);
+ putstr("\n");
+ return 0;
+ }
+
+ for (;;) {
+ waiting = waitpid(newpid, &status, 0);
+
+ if (waiting < 0) {
+ if (errno == EINTR)
+ continue;
+ putstr("# waitpid() failed: ");
+ putnum(errno);
+ putstr("\n");
+ return 0;
+ }
+ if (waiting != newpid) {
+ putstr("# waitpid() returned wrong PID\n");
+ return 0;
+ }
+
+ if (!WIFEXITED(status)) {
+ putstr("# child did not exit\n");
+ return 0;
+ }
+
+ if (getpid() != get_tpidr2()) {
+ putstr("# TPIDR2 corrupted in parent\n");
+ return 0;
+ }
+
+ return WEXITSTATUS(status);
+ }
+}
+
+/*
+ * sys_clone() has a lot of per architecture variation so just define
+ * it here rather than adding it to nolibc, plus the raw API is a
+ * little more convenient for this test.
+ */
+static int sys_clone(unsigned long clone_flags, unsigned long newsp,
+ int *parent_tidptr, unsigned long tls,
+ int *child_tidptr)
+{
+ return my_syscall5(__NR_clone, clone_flags, newsp, parent_tidptr, tls,
+ child_tidptr);
+}
+
+/*
+ * If we clone with CLONE_SETTLS then the value in the parent should
+ * be unchanged and the child should start with zero and be able to
+ * set its own value.
+ */
+static int write_clone_read(void)
+{
+ int parent_tid, child_tid;
+ pid_t parent, waiting;
+ int ret, status;
+
+ parent = getpid();
+ set_tpidr2(parent);
+
+ ret = sys_clone(CLONE_SETTLS, 0, &parent_tid, 0, &child_tid);
+ if (ret == -1) {
+ putstr("# clone() failed\n");
+ putnum(errno);
+ putstr("\n");
+ return 0;
+ }
+
+ if (ret == 0) {
+ /* In child */
+ if (get_tpidr2() != 0) {
+ putstr("# TPIDR2 non-zero in child: ");
+ putnum(get_tpidr2());
+ putstr("\n");
+ exit(0);
+ }
+
+ if (gettid() == 0)
+ putstr("# Child TID==0\n");
+ set_tpidr2(gettid());
+ if (get_tpidr2() == gettid()) {
+ exit(1);
+ } else {
+ putstr("# Failed to set TPIDR2 in child\n");
+ exit(0);
+ }
+ }
+
+ for (;;) {
+ waiting = wait4(ret, &status, __WCLONE, NULL);
+
+ if (waiting < 0) {
+ if (errno == EINTR)
+ continue;
+ putstr("# wait4() failed: ");
+ putnum(errno);
+ putstr("\n");
+ return 0;
+ }
+ if (waiting != ret) {
+ putstr("# wait4() returned wrong PID ");
+ putnum(waiting);
+ putstr("\n");
+ return 0;
+ }
+
+ if (!WIFEXITED(status)) {
+ putstr("# child did not exit\n");
+ return 0;
+ }
+
+ if (parent != get_tpidr2()) {
+ putstr("# TPIDR2 corrupted in parent\n");
+ return 0;
+ }
+
+ return WEXITSTATUS(status);
+ }
+}
+
+#define run_test(name) \
+ if (name()) { \
+ tests_passed++; \
+ } else { \
+ tests_failed++; \
+ putstr("not "); \
+ } \
+ putstr("ok "); \
+ putnum(++tests_run); \
+ putstr(" " #name "\n");
+
+int main(int argc, char **argv)
+{
+ int ret, i;
+
+ putstr("TAP version 13\n");
+ putstr("1..");
+ putnum(EXPECTED_TESTS);
+ putstr("\n");
+
+ putstr("# PID: ");
+ putnum(getpid());
+ putstr("\n");
+
+ /*
+ * This test is run with nolibc which doesn't support hwcap and
+ * it's probably disproportionate to implement so instead check
+ * for the default vector length configuration in /proc.
+ */
+ ret = open("/proc/sys/abi/sme_default_vector_length", O_RDONLY, 0);
+ if (ret >= 0) {
+ run_test(default_value);
+ run_test(write_read);
+ run_test(write_sleep_read);
+ run_test(write_fork_read);
+ run_test(write_clone_read);
+
+ } else {
+ putstr("# SME support not present\n");
+
+ for (i = 0; i < EXPECTED_TESTS; i++) {
+ putstr("ok ");
+ putnum(i);
+ putstr(" skipped, TPIDR2 not supported\n");
+ }
+
+ tests_skipped += EXPECTED_TESTS;
+ }
+
+ print_summary();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/arm64/bti/Makefile b/tools/testing/selftests/arm64/bti/Makefile
index 73e013c082a6..ccdac414ad94 100644
--- a/tools/testing/selftests/arm64/bti/Makefile
+++ b/tools/testing/selftests/arm64/bti/Makefile
@@ -10,7 +10,7 @@ PROGS := $(patsubst %,gen/%,$(TEST_GEN_PROGS))
# cases for statically linked and dynamically lined binaries are
# slightly different.
-CFLAGS_NOBTI = -DBTI=0
+CFLAGS_NOBTI = -mbranch-protection=none -DBTI=0
CFLAGS_BTI = -mbranch-protection=standard -DBTI=1
CFLAGS_COMMON = -ffreestanding -Wall -Wextra $(CFLAGS)
@@ -39,7 +39,7 @@ BTI_OBJS = \
teststubs-bti.o \
trampoline-bti.o
gen/btitest: $(BTI_OBJS)
- $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -nostdlib -o $@ $^
+ $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -nostdlib -static -o $@ $^
NOBTI_OBJS = \
test-nobti.o \
@@ -50,7 +50,7 @@ NOBTI_OBJS = \
teststubs-nobti.o \
trampoline-nobti.o
gen/nobtitest: $(NOBTI_OBJS)
- $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -nostdlib -o $@ $^
+ $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -nostdlib -static -o $@ $^
# Including KSFT lib.mk here will also mangle the TEST_GEN_PROGS list
# to account for any OUTPUT target-dirs optionally provided by
diff --git a/tools/testing/selftests/arm64/fp/.gitignore b/tools/testing/selftests/arm64/fp/.gitignore
index c50d86331ed2..ea947af63882 100644
--- a/tools/testing/selftests/arm64/fp/.gitignore
+++ b/tools/testing/selftests/arm64/fp/.gitignore
@@ -1,8 +1,13 @@
fp-pidbench
fpsimd-test
+rdvl-sme
rdvl-sve
sve-probe-vls
sve-ptrace
sve-test
+ssve-test
vec-syscfg
vlset
+za-fork
+za-ptrace
+za-test
diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile
index 95f0b877a060..a7c2286bf65b 100644
--- a/tools/testing/selftests/arm64/fp/Makefile
+++ b/tools/testing/selftests/arm64/fp/Makefile
@@ -1,24 +1,42 @@
# SPDX-License-Identifier: GPL-2.0
-CFLAGS += -I../../../../../usr/include/
-TEST_GEN_PROGS := sve-ptrace sve-probe-vls vec-syscfg
-TEST_PROGS_EXTENDED := fp-pidbench fpsimd-test fpsimd-stress \
- rdvl-sve \
- sve-test sve-stress \
+# A proper top_srcdir is needed by KSFT(lib.mk)
+top_srcdir = $(realpath ../../../../../)
+
+CFLAGS += -I$(top_srcdir)/usr/include/
+
+TEST_GEN_PROGS := sve-ptrace sve-probe-vls vec-syscfg za-fork za-ptrace
+TEST_GEN_PROGS_EXTENDED := fp-pidbench fpsimd-test \
+ rdvl-sme rdvl-sve \
+ sve-test \
+ ssve-test \
+ za-test \
vlset
+TEST_PROGS_EXTENDED := fpsimd-stress sve-stress ssve-stress za-stress
-all: $(TEST_GEN_PROGS) $(TEST_PROGS_EXTENDED)
+EXTRA_CLEAN += $(OUTPUT)/asm-utils.o $(OUTPUT)/rdvl.o $(OUTPUT)/za-fork-asm.o
-fp-pidbench: fp-pidbench.S asm-utils.o
+# Build with nolibc to avoid effects due to libc's clone() support
+$(OUTPUT)/fp-pidbench: fp-pidbench.S $(OUTPUT)/asm-utils.o
+ $(CC) -nostdlib $^ -o $@
+$(OUTPUT)/fpsimd-test: fpsimd-test.S $(OUTPUT)/asm-utils.o
$(CC) -nostdlib $^ -o $@
-fpsimd-test: fpsimd-test.o asm-utils.o
+$(OUTPUT)/rdvl-sve: rdvl-sve.c $(OUTPUT)/rdvl.o
+$(OUTPUT)/rdvl-sme: rdvl-sme.c $(OUTPUT)/rdvl.o
+$(OUTPUT)/sve-ptrace: sve-ptrace.c
+$(OUTPUT)/sve-probe-vls: sve-probe-vls.c $(OUTPUT)/rdvl.o
+$(OUTPUT)/sve-test: sve-test.S $(OUTPUT)/asm-utils.o
$(CC) -nostdlib $^ -o $@
-rdvl-sve: rdvl-sve.o rdvl.o
-sve-ptrace: sve-ptrace.o
-sve-probe-vls: sve-probe-vls.o rdvl.o
-sve-test: sve-test.o asm-utils.o
+$(OUTPUT)/ssve-test: sve-test.S $(OUTPUT)/asm-utils.o
+ $(CC) -DSSVE -nostdlib $^ -o $@
+$(OUTPUT)/vec-syscfg: vec-syscfg.c $(OUTPUT)/rdvl.o
+$(OUTPUT)/vlset: vlset.c
+$(OUTPUT)/za-fork: za-fork.c $(OUTPUT)/za-fork-asm.o
+ $(CC) -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \
+ -include ../../../../include/nolibc/nolibc.h \
+ -static -ffreestanding -Wall $^ -o $@
+$(OUTPUT)/za-ptrace: za-ptrace.c
+$(OUTPUT)/za-test: za-test.S $(OUTPUT)/asm-utils.o
$(CC) -nostdlib $^ -o $@
-vec-syscfg: vec-syscfg.o rdvl.o
-vlset: vlset.o
include ../../lib.mk
diff --git a/tools/testing/selftests/arm64/fp/rdvl-sme.c b/tools/testing/selftests/arm64/fp/rdvl-sme.c
new file mode 100644
index 000000000000..49b0b2e08bac
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/rdvl-sme.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <stdio.h>
+
+#include "rdvl.h"
+
+int main(void)
+{
+ int vl = rdvl_sme();
+
+ printf("%d\n", vl);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/arm64/fp/rdvl.S b/tools/testing/selftests/arm64/fp/rdvl.S
index c916c1c9defd..20dc29996dc6 100644
--- a/tools/testing/selftests/arm64/fp/rdvl.S
+++ b/tools/testing/selftests/arm64/fp/rdvl.S
@@ -1,6 +1,8 @@
// SPDX-License-Identifier: GPL-2.0-only
// Copyright (C) 2021 ARM Limited.
+#include "sme-inst.h"
+
.arch_extension sve
.globl rdvl_sve
@@ -8,3 +10,11 @@ rdvl_sve:
hint 34 // BTI C
rdvl x0, #1
ret
+
+.globl rdvl_sme
+rdvl_sme:
+ hint 34 // BTI C
+
+ rdsvl 0, 1
+
+ ret
diff --git a/tools/testing/selftests/arm64/fp/rdvl.h b/tools/testing/selftests/arm64/fp/rdvl.h
index 7c9d953fc9e7..5d323679fbc9 100644
--- a/tools/testing/selftests/arm64/fp/rdvl.h
+++ b/tools/testing/selftests/arm64/fp/rdvl.h
@@ -3,6 +3,7 @@
#ifndef RDVL_H
#define RDVL_H
+int rdvl_sme(void);
int rdvl_sve(void);
#endif
diff --git a/tools/testing/selftests/arm64/fp/sme-inst.h b/tools/testing/selftests/arm64/fp/sme-inst.h
new file mode 100644
index 000000000000..7191e53ca1c0
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/sme-inst.h
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2021-2 ARM Limited.
+// Original author: Mark Brown <broonie@kernel.org>
+
+#ifndef SME_INST_H
+#define SME_INST_H
+
+/*
+ * RDSVL X\nx, #\imm
+ */
+.macro rdsvl nx, imm
+ .inst 0x4bf5800 \
+ | (\imm << 5) \
+ | (\nx)
+.endm
+
+.macro smstop
+ msr S0_3_C4_C6_3, xzr
+.endm
+
+.macro smstart_za
+ msr S0_3_C4_C5_3, xzr
+.endm
+
+.macro smstart_sm
+ msr S0_3_C4_C3_3, xzr
+.endm
+
+/*
+ * LDR (vector to ZA array):
+ * LDR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
+ */
+.macro _ldr_za nw, nxbase, offset=0
+ .inst 0xe1000000 \
+ | (((\nw) & 3) << 13) \
+ | ((\nxbase) << 5) \
+ | ((\offset) & 7)
+.endm
+
+/*
+ * STR (vector from ZA array):
+ * STR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
+ */
+.macro _str_za nw, nxbase, offset=0
+ .inst 0xe1200000 \
+ | (((\nw) & 3) << 13) \
+ | ((\nxbase) << 5) \
+ | ((\offset) & 7)
+.endm
+
+#endif
diff --git a/tools/testing/selftests/arm64/fp/ssve-stress b/tools/testing/selftests/arm64/fp/ssve-stress
new file mode 100644
index 000000000000..e2bd2cc184ad
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/ssve-stress
@@ -0,0 +1,59 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright (C) 2015-2019 ARM Limited.
+# Original author: Dave Martin <Dave.Martin@arm.com>
+
+set -ue
+
+NR_CPUS=`nproc`
+
+pids=
+logs=
+
+cleanup () {
+ trap - INT TERM CHLD
+ set +e
+
+ if [ -n "$pids" ]; then
+ kill $pids
+ wait $pids
+ pids=
+ fi
+
+ if [ -n "$logs" ]; then
+ cat $logs
+ rm $logs
+ logs=
+ fi
+}
+
+interrupt () {
+ cleanup
+ exit 0
+}
+
+child_died () {
+ cleanup
+ exit 1
+}
+
+trap interrupt INT TERM EXIT
+
+for x in `seq 0 $((NR_CPUS * 4))`; do
+ log=`mktemp`
+ logs=$logs\ $log
+ ./ssve-test >$log &
+ pids=$pids\ $!
+done
+
+# Wait for all child processes to be created:
+sleep 10
+
+while :; do
+ kill -USR1 $pids
+done &
+pids=$pids\ $!
+
+wait
+
+exit 1
diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c
index 4c418b2021e0..8c4847977583 100644
--- a/tools/testing/selftests/arm64/fp/sve-ptrace.c
+++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c
@@ -26,6 +26,10 @@
#define NT_ARM_SVE 0x405
#endif
+#ifndef NT_ARM_SSVE
+#define NT_ARM_SSVE 0x40b
+#endif
+
struct vec_type {
const char *name;
unsigned long hwcap_type;
@@ -42,11 +46,18 @@ static const struct vec_type vec_types[] = {
.regset = NT_ARM_SVE,
.prctl_set = PR_SVE_SET_VL,
},
+ {
+ .name = "Streaming SVE",
+ .hwcap_type = AT_HWCAP2,
+ .hwcap = HWCAP2_SME,
+ .regset = NT_ARM_SSVE,
+ .prctl_set = PR_SME_SET_VL,
+ },
};
-#define VL_TESTS (((SVE_VQ_MAX - SVE_VQ_MIN) + 1) * 3)
+#define VL_TESTS (((SVE_VQ_MAX - SVE_VQ_MIN) + 1) * 4)
#define FLAG_TESTS 2
-#define FPSIMD_TESTS 3
+#define FPSIMD_TESTS 2
#define EXPECTED_TESTS ((VL_TESTS + FLAG_TESTS + FPSIMD_TESTS) * ARRAY_SIZE(vec_types))
@@ -78,6 +89,15 @@ static int get_fpsimd(pid_t pid, struct user_fpsimd_state *fpsimd)
return ptrace(PTRACE_GETREGSET, pid, NT_PRFPREG, &iov);
}
+static int set_fpsimd(pid_t pid, struct user_fpsimd_state *fpsimd)
+{
+ struct iovec iov;
+
+ iov.iov_base = fpsimd;
+ iov.iov_len = sizeof(*fpsimd);
+ return ptrace(PTRACE_SETREGSET, pid, NT_PRFPREG, &iov);
+}
+
static struct user_sve_header *get_sve(pid_t pid, const struct vec_type *type,
void **buf, size_t *size)
{
@@ -240,28 +260,24 @@ static void check_u32(unsigned int vl, const char *reg,
/* Access the FPSIMD registers via the SVE regset */
static void ptrace_sve_fpsimd(pid_t child, const struct vec_type *type)
{
- void *svebuf = NULL;
- size_t svebufsz = 0;
+ void *svebuf;
struct user_sve_header *sve;
struct user_fpsimd_state *fpsimd, new_fpsimd;
unsigned int i, j;
unsigned char *p;
+ int ret;
- /* New process should start with FPSIMD registers only */
- sve = get_sve(child, type, &svebuf, &svebufsz);
- if (!sve) {
- ksft_test_result_fail("get_sve(%s): %s\n",
- type->name, strerror(errno));
-
+ svebuf = malloc(SVE_PT_SIZE(0, SVE_PT_REGS_FPSIMD));
+ if (!svebuf) {
+ ksft_test_result_fail("Failed to allocate FPSIMD buffer\n");
return;
- } else {
- ksft_test_result_pass("get_sve(%s FPSIMD)\n", type->name);
}
- ksft_test_result((sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD,
- "Got FPSIMD registers via %s\n", type->name);
- if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_FPSIMD)
- goto out;
+ memset(svebuf, 0, SVE_PT_SIZE(0, SVE_PT_REGS_FPSIMD));
+ sve = svebuf;
+ sve->flags = SVE_PT_REGS_FPSIMD;
+ sve->size = SVE_PT_SIZE(0, SVE_PT_REGS_FPSIMD);
+ sve->vl = 16; /* We don't care what the VL is */
/* Try to set a known FPSIMD state via PT_REGS_SVE */
fpsimd = (struct user_fpsimd_state *)((char *)sve +
@@ -273,12 +289,11 @@ static void ptrace_sve_fpsimd(pid_t child, const struct vec_type *type)
p[j] = j;
}
- if (set_sve(child, type, sve)) {
- ksft_test_result_fail("set_sve(%s FPSIMD): %s\n",
- type->name, strerror(errno));
-
+ ret = set_sve(child, type, sve);
+ ksft_test_result(ret == 0, "%s FPSIMD set via SVE: %d\n",
+ type->name, ret);
+ if (ret)
goto out;
- }
/* Verify via the FPSIMD regset */
if (get_fpsimd(child, &new_fpsimd)) {
@@ -395,7 +410,7 @@ out:
free(write_buf);
}
-/* Validate attempting to set SVE data and read SVE data */
+/* Validate attempting to set SVE data and read it via the FPSIMD regset */
static void ptrace_set_sve_get_fpsimd_data(pid_t child,
const struct vec_type *type,
unsigned int vl)
@@ -478,6 +493,115 @@ out:
free(write_buf);
}
+/* Validate attempting to set FPSIMD data and read it via the SVE regset */
+static void ptrace_set_fpsimd_get_sve_data(pid_t child,
+ const struct vec_type *type,
+ unsigned int vl)
+{
+ void *read_buf = NULL;
+ unsigned char *p;
+ struct user_sve_header *read_sve;
+ unsigned int vq = sve_vq_from_vl(vl);
+ struct user_fpsimd_state write_fpsimd;
+ int ret, i, j;
+ size_t read_sve_size = 0;
+ size_t expected_size;
+ int errors = 0;
+
+ if (__BYTE_ORDER == __BIG_ENDIAN) {
+ ksft_test_result_skip("Big endian not supported\n");
+ return;
+ }
+
+ for (i = 0; i < 32; ++i) {
+ p = (unsigned char *)&write_fpsimd.vregs[i];
+
+ for (j = 0; j < sizeof(write_fpsimd.vregs[i]); ++j)
+ p[j] = j;
+ }
+
+ ret = set_fpsimd(child, &write_fpsimd);
+ if (ret != 0) {
+ ksft_test_result_fail("Failed to set FPSIMD state: %d\n)",
+ ret);
+ return;
+ }
+
+ if (!get_sve(child, type, (void **)&read_buf, &read_sve_size)) {
+ ksft_test_result_fail("Failed to read %s VL %u data\n",
+ type->name, vl);
+ return;
+ }
+ read_sve = read_buf;
+
+ if (read_sve->vl != vl) {
+ ksft_test_result_fail("Child VL != expected VL %d\n",
+ read_sve->vl, vl);
+ goto out;
+ }
+
+ /* The kernel may return either SVE or FPSIMD format */
+ switch (read_sve->flags & SVE_PT_REGS_MASK) {
+ case SVE_PT_REGS_FPSIMD:
+ expected_size = SVE_PT_FPSIMD_SIZE(vq, SVE_PT_REGS_FPSIMD);
+ if (read_sve_size < expected_size) {
+ ksft_test_result_fail("Read %d bytes, expected %d\n",
+ read_sve_size, expected_size);
+ goto out;
+ }
+
+ ret = memcmp(&write_fpsimd, read_buf + SVE_PT_FPSIMD_OFFSET,
+ sizeof(write_fpsimd));
+ if (ret != 0) {
+ ksft_print_msg("Read FPSIMD data mismatch\n");
+ errors++;
+ }
+ break;
+
+ case SVE_PT_REGS_SVE:
+ expected_size = SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE);
+ if (read_sve_size < expected_size) {
+ ksft_test_result_fail("Read %d bytes, expected %d\n",
+ read_sve_size, expected_size);
+ goto out;
+ }
+
+ for (i = 0; i < __SVE_NUM_ZREGS; i++) {
+ __uint128_t tmp = 0;
+
+ /*
+ * Z regs are stored endianness invariant, this won't
+ * work for big endian
+ */
+ memcpy(&tmp, read_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i),
+ sizeof(tmp));
+
+ if (tmp != write_fpsimd.vregs[i]) {
+ ksft_print_msg("Mismatch in FPSIMD for %s VL %u Z%d/V%d\n",
+ type->name, vl, i, i);
+ errors++;
+ }
+ }
+
+ check_u32(vl, "FPSR", &write_fpsimd.fpsr,
+ read_buf + SVE_PT_SVE_FPSR_OFFSET(vq), &errors);
+ check_u32(vl, "FPCR", &write_fpsimd.fpcr,
+ read_buf + SVE_PT_SVE_FPCR_OFFSET(vq), &errors);
+ break;
+ default:
+ ksft_print_msg("Unexpected regs type %d\n",
+ read_sve->flags & SVE_PT_REGS_MASK);
+ errors++;
+ break;
+ }
+
+ ksft_test_result(errors == 0, "Set FPSIMD, read via SVE for %s VL %u\n",
+ type->name, vl);
+
+out:
+ free(read_buf);
+}
+
static int do_parent(pid_t child)
{
int ret = EXIT_FAILURE;
@@ -548,11 +672,9 @@ static int do_parent(pid_t child)
if (getauxval(vec_types[i].hwcap_type) & vec_types[i].hwcap) {
ptrace_sve_fpsimd(child, &vec_types[i]);
} else {
- ksft_test_result_skip("%s FPSIMD get via SVE\n",
- vec_types[i].name);
ksft_test_result_skip("%s FPSIMD set via SVE\n",
vec_types[i].name);
- ksft_test_result_skip("%s set read via FPSIMD\n",
+ ksft_test_result_skip("%s FPSIMD read\n",
vec_types[i].name);
}
@@ -585,11 +707,14 @@ static int do_parent(pid_t child)
if (vl_supported) {
ptrace_set_sve_get_sve_data(child, &vec_types[i], vl);
ptrace_set_sve_get_fpsimd_data(child, &vec_types[i], vl);
+ ptrace_set_fpsimd_get_sve_data(child, &vec_types[i], vl);
} else {
ksft_test_result_skip("%s set SVE get SVE for VL %d\n",
vec_types[i].name, vl);
ksft_test_result_skip("%s set SVE get FPSIMD for VL %d\n",
vec_types[i].name, vl);
+ ksft_test_result_skip("%s set FPSIMD get SVE for VL %d\n",
+ vec_types[i].name, vl);
}
}
}
diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S
index f5b1b48ffff2..589264231a2d 100644
--- a/tools/testing/selftests/arm64/fp/sve-test.S
+++ b/tools/testing/selftests/arm64/fp/sve-test.S
@@ -13,6 +13,7 @@
#include <asm/unistd.h>
#include "assembler.h"
#include "asm-offsets.h"
+#include "sme-inst.h"
#define NZR 32
#define NPR 16
@@ -156,6 +157,7 @@ endfunction
// We fill the upper lanes of FFR with zeros.
// Beware: corrupts P0.
function setup_ffr
+#ifndef SSVE
mov x4, x30
and w0, w0, #0x3
@@ -178,6 +180,9 @@ function setup_ffr
wrffr p0.b
ret x4
+#else
+ ret
+#endif
endfunction
// Trivial memory compare: compare x2 bytes starting at address x0 with
@@ -260,6 +265,7 @@ endfunction
// Beware -- corrupts P0.
// Clobbers x0-x5.
function check_ffr
+#ifndef SSVE
mov x3, x30
ldr x4, =scratch
@@ -280,6 +286,9 @@ function check_ffr
mov x2, x5
mov x30, x3
b memcmp
+#else
+ ret
+#endif
endfunction
// Any SVE register modified here can cause corruption in the main
@@ -295,10 +304,12 @@ function irritator_handler
movi v0.8b, #1
movi v9.16b, #2
movi v31.8b, #3
+#ifndef SSVE
// And P0
rdffr p0.b
// And FFR
wrffr p15.b
+#endif
ret
endfunction
@@ -359,6 +370,11 @@ endfunction
.globl _start
function _start
_start:
+#ifdef SSVE
+ puts "Streaming mode "
+ smstart_sm
+#endif
+
// Sanity-check and report the vector length
rdvl x19, #8
@@ -407,6 +423,10 @@ _start:
orr w2, w2, #SA_NODEFER
bl setsignal
+#ifdef SSVE
+ smstart_sm // syscalls will have exited streaming mode
+#endif
+
mov x22, #0 // generation number, increments per iteration
.Ltest_loop:
rdvl x0, #8
diff --git a/tools/testing/selftests/arm64/fp/vec-syscfg.c b/tools/testing/selftests/arm64/fp/vec-syscfg.c
index c90658811a83..9bcfcdc34ee9 100644
--- a/tools/testing/selftests/arm64/fp/vec-syscfg.c
+++ b/tools/testing/selftests/arm64/fp/vec-syscfg.c
@@ -51,6 +51,16 @@ static struct vec_data vec_data[] = {
.prctl_set = PR_SVE_SET_VL,
.default_vl_file = "/proc/sys/abi/sve_default_vector_length",
},
+ {
+ .name = "SME",
+ .hwcap_type = AT_HWCAP2,
+ .hwcap = HWCAP2_SME,
+ .rdvl = rdvl_sme,
+ .rdvl_binary = "./rdvl-sme",
+ .prctl_get = PR_SME_GET_VL,
+ .prctl_set = PR_SME_SET_VL,
+ .default_vl_file = "/proc/sys/abi/sme_default_vector_length",
+ },
};
static int stdio_read_integer(FILE *f, const char *what, int *val)
diff --git a/tools/testing/selftests/arm64/fp/vlset.c b/tools/testing/selftests/arm64/fp/vlset.c
index 308d27a68226..76912a581a95 100644
--- a/tools/testing/selftests/arm64/fp/vlset.c
+++ b/tools/testing/selftests/arm64/fp/vlset.c
@@ -22,12 +22,15 @@ static int inherit = 0;
static int no_inherit = 0;
static int force = 0;
static unsigned long vl;
+static int set_ctl = PR_SVE_SET_VL;
+static int get_ctl = PR_SVE_GET_VL;
static const struct option options[] = {
{ "force", no_argument, NULL, 'f' },
{ "inherit", no_argument, NULL, 'i' },
{ "max", no_argument, NULL, 'M' },
{ "no-inherit", no_argument, &no_inherit, 1 },
+ { "sme", no_argument, NULL, 's' },
{ "help", no_argument, NULL, '?' },
{}
};
@@ -50,6 +53,9 @@ static int parse_options(int argc, char **argv)
case 'M': vl = SVE_VL_MAX; break;
case 'f': force = 1; break;
case 'i': inherit = 1; break;
+ case 's': set_ctl = PR_SME_SET_VL;
+ get_ctl = PR_SME_GET_VL;
+ break;
case 0: break;
default: goto error;
}
@@ -125,14 +131,14 @@ int main(int argc, char **argv)
if (inherit)
flags |= PR_SVE_VL_INHERIT;
- t = prctl(PR_SVE_SET_VL, vl | flags);
+ t = prctl(set_ctl, vl | flags);
if (t < 0) {
fprintf(stderr, "%s: PR_SVE_SET_VL: %s\n",
program_name, strerror(errno));
goto error;
}
- t = prctl(PR_SVE_GET_VL);
+ t = prctl(get_ctl);
if (t == -1) {
fprintf(stderr, "%s: PR_SVE_GET_VL: %s\n",
program_name, strerror(errno));
diff --git a/tools/testing/selftests/arm64/fp/za-fork-asm.S b/tools/testing/selftests/arm64/fp/za-fork-asm.S
new file mode 100644
index 000000000000..2fafadd491c3
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/za-fork-asm.S
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2021 ARM Limited.
+
+#include "sme-inst.h"
+
+.arch_extension sve
+
+#define MAGIC 42
+
+#define MAXVL 2048
+#define MAXVL_B (MAXVL / 8)
+
+.pushsection .text
+.data
+.align 4
+scratch:
+ .space MAXVL_B
+.popsection
+
+.globl fork_test
+fork_test:
+ smstart_za
+
+ // For simplicity just set one word in one vector, other tests
+ // cover general data corruption issues.
+ ldr x0, =scratch
+ mov x1, #MAGIC
+ str x1, [x0]
+ mov w12, wzr
+ _ldr_za 12, 0 // ZA.H[W12] loaded from [X0]
+
+ // Tail call into the C portion that does the fork & verify
+ b fork_test_c
+
+.globl verify_fork
+verify_fork:
+ // SVCR should have ZA=1, SM=0
+ mrs x0, S3_3_C4_C2_2
+ and x1, x0, #3
+ cmp x1, #2
+ beq 1f
+ mov x0, xzr
+ b 100f
+1:
+
+ // ZA should still have the value we loaded
+ ldr x0, =scratch
+ mov w12, wzr
+ _str_za 12, 0 // ZA.H[W12] stored to [X0]
+ ldr x1, [x0]
+ cmp x1, #MAGIC
+ beq 2f
+ mov x0, xzr
+ b 100f
+
+2:
+ // All tests passed
+ mov x0, #1
+100:
+ ret
+
diff --git a/tools/testing/selftests/arm64/fp/za-fork.c b/tools/testing/selftests/arm64/fp/za-fork.c
new file mode 100644
index 000000000000..ff475c649e96
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/za-fork.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2022 ARM Limited.
+ * Original author: Mark Brown <broonie@kernel.org>
+ */
+
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/sched.h>
+#include <linux/wait.h>
+
+#define EXPECTED_TESTS 1
+
+static void putstr(const char *str)
+{
+ write(1, str, strlen(str));
+}
+
+static void putnum(unsigned int num)
+{
+ char c;
+
+ if (num / 10)
+ putnum(num / 10);
+
+ c = '0' + (num % 10);
+ write(1, &c, 1);
+}
+
+static int tests_run;
+static int tests_passed;
+static int tests_failed;
+static int tests_skipped;
+
+static void print_summary(void)
+{
+ if (tests_passed + tests_failed + tests_skipped != EXPECTED_TESTS)
+ putstr("# UNEXPECTED TEST COUNT: ");
+
+ putstr("# Totals: pass:");
+ putnum(tests_passed);
+ putstr(" fail:");
+ putnum(tests_failed);
+ putstr(" xfail:0 xpass:0 skip:");
+ putnum(tests_skipped);
+ putstr(" error:0\n");
+}
+
+int fork_test(void);
+int verify_fork(void);
+
+/*
+ * If we fork the value in the parent should be unchanged and the
+ * child should start with the same value. This is called from the
+ * fork_test() asm function.
+ */
+int fork_test_c(void)
+{
+ pid_t newpid, waiting;
+ int child_status, parent_result;
+
+ newpid = fork();
+ if (newpid == 0) {
+ /* In child */
+ if (!verify_fork()) {
+ putstr("# ZA state invalid in child\n");
+ exit(0);
+ } else {
+ exit(1);
+ }
+ }
+ if (newpid < 0) {
+ putstr("# fork() failed: -");
+ putnum(-newpid);
+ putstr("\n");
+ return 0;
+ }
+
+ parent_result = verify_fork();
+ if (!parent_result)
+ putstr("# ZA state invalid in parent\n");
+
+ for (;;) {
+ waiting = waitpid(newpid, &child_status, 0);
+
+ if (waiting < 0) {
+ if (errno == EINTR)
+ continue;
+ putstr("# waitpid() failed: ");
+ putnum(errno);
+ putstr("\n");
+ return 0;
+ }
+ if (waiting != newpid) {
+ putstr("# waitpid() returned wrong PID\n");
+ return 0;
+ }
+
+ if (!WIFEXITED(child_status)) {
+ putstr("# child did not exit\n");
+ return 0;
+ }
+
+ return WEXITSTATUS(child_status) && parent_result;
+ }
+}
+
+#define run_test(name) \
+ if (name()) { \
+ tests_passed++; \
+ } else { \
+ tests_failed++; \
+ putstr("not "); \
+ } \
+ putstr("ok "); \
+ putnum(++tests_run); \
+ putstr(" " #name "\n");
+
+int main(int argc, char **argv)
+{
+ int ret, i;
+
+ putstr("TAP version 13\n");
+ putstr("1..");
+ putnum(EXPECTED_TESTS);
+ putstr("\n");
+
+ putstr("# PID: ");
+ putnum(getpid());
+ putstr("\n");
+
+ /*
+ * This test is run with nolibc which doesn't support hwcap and
+ * it's probably disproportionate to implement so instead check
+ * for the default vector length configuration in /proc.
+ */
+ ret = open("/proc/sys/abi/sme_default_vector_length", O_RDONLY, 0);
+ if (ret >= 0) {
+ run_test(fork_test);
+
+ } else {
+ putstr("# SME support not present\n");
+
+ for (i = 0; i < EXPECTED_TESTS; i++) {
+ putstr("ok ");
+ putnum(i);
+ putstr(" skipped\n");
+ }
+
+ tests_skipped += EXPECTED_TESTS;
+ }
+
+ print_summary();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/arm64/fp/za-ptrace.c b/tools/testing/selftests/arm64/fp/za-ptrace.c
new file mode 100644
index 000000000000..bf6158654056
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/za-ptrace.c
@@ -0,0 +1,356 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021 ARM Limited.
+ */
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/wait.h>
+#include <asm/sigcontext.h>
+#include <asm/ptrace.h>
+
+#include "../../kselftest.h"
+
+/* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */
+#ifndef NT_ARM_ZA
+#define NT_ARM_ZA 0x40c
+#endif
+
+#define EXPECTED_TESTS (((SVE_VQ_MAX - SVE_VQ_MIN) + 1) * 3)
+
+static void fill_buf(char *buf, size_t size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ buf[i] = random();
+}
+
+static int do_child(void)
+{
+ if (ptrace(PTRACE_TRACEME, -1, NULL, NULL))
+ ksft_exit_fail_msg("PTRACE_TRACEME", strerror(errno));
+
+ if (raise(SIGSTOP))
+ ksft_exit_fail_msg("raise(SIGSTOP)", strerror(errno));
+
+ return EXIT_SUCCESS;
+}
+
+static struct user_za_header *get_za(pid_t pid, void **buf, size_t *size)
+{
+ struct user_za_header *za;
+ void *p;
+ size_t sz = sizeof(*za);
+ struct iovec iov;
+
+ while (1) {
+ if (*size < sz) {
+ p = realloc(*buf, sz);
+ if (!p) {
+ errno = ENOMEM;
+ goto error;
+ }
+
+ *buf = p;
+ *size = sz;
+ }
+
+ iov.iov_base = *buf;
+ iov.iov_len = sz;
+ if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_ZA, &iov))
+ goto error;
+
+ za = *buf;
+ if (za->size <= sz)
+ break;
+
+ sz = za->size;
+ }
+
+ return za;
+
+error:
+ return NULL;
+}
+
+static int set_za(pid_t pid, const struct user_za_header *za)
+{
+ struct iovec iov;
+
+ iov.iov_base = (void *)za;
+ iov.iov_len = za->size;
+ return ptrace(PTRACE_SETREGSET, pid, NT_ARM_ZA, &iov);
+}
+
+/* Validate attempting to set the specfied VL via ptrace */
+static void ptrace_set_get_vl(pid_t child, unsigned int vl, bool *supported)
+{
+ struct user_za_header za;
+ struct user_za_header *new_za = NULL;
+ size_t new_za_size = 0;
+ int ret, prctl_vl;
+
+ *supported = false;
+
+ /* Check if the VL is supported in this process */
+ prctl_vl = prctl(PR_SME_SET_VL, vl);
+ if (prctl_vl == -1)
+ ksft_exit_fail_msg("prctl(PR_SME_SET_VL) failed: %s (%d)\n",
+ strerror(errno), errno);
+
+ /* If the VL is not supported then a supported VL will be returned */
+ *supported = (prctl_vl == vl);
+
+ /* Set the VL by doing a set with no register payload */
+ memset(&za, 0, sizeof(za));
+ za.size = sizeof(za);
+ za.vl = vl;
+ ret = set_za(child, &za);
+ if (ret != 0) {
+ ksft_test_result_fail("Failed to set VL %u\n", vl);
+ return;
+ }
+
+ /*
+ * Read back the new register state and verify that we have the
+ * same VL that we got from prctl() on ourselves.
+ */
+ if (!get_za(child, (void **)&new_za, &new_za_size)) {
+ ksft_test_result_fail("Failed to read VL %u\n", vl);
+ return;
+ }
+
+ ksft_test_result(new_za->vl = prctl_vl, "Set VL %u\n", vl);
+
+ free(new_za);
+}
+
+/* Validate attempting to set no ZA data and read it back */
+static void ptrace_set_no_data(pid_t child, unsigned int vl)
+{
+ void *read_buf = NULL;
+ struct user_za_header write_za;
+ struct user_za_header *read_za;
+ size_t read_za_size = 0;
+ int ret;
+
+ /* Set up some data and write it out */
+ memset(&write_za, 0, sizeof(write_za));
+ write_za.size = ZA_PT_ZA_OFFSET;
+ write_za.vl = vl;
+
+ ret = set_za(child, &write_za);
+ if (ret != 0) {
+ ksft_test_result_fail("Failed to set VL %u no data\n", vl);
+ return;
+ }
+
+ /* Read the data back */
+ if (!get_za(child, (void **)&read_buf, &read_za_size)) {
+ ksft_test_result_fail("Failed to read VL %u no data\n", vl);
+ return;
+ }
+ read_za = read_buf;
+
+ /* We might read more data if there's extensions we don't know */
+ if (read_za->size < write_za.size) {
+ ksft_test_result_fail("VL %u wrote %d bytes, only read %d\n",
+ vl, write_za.size, read_za->size);
+ goto out_read;
+ }
+
+ ksft_test_result(read_za->size == write_za.size,
+ "Disabled ZA for VL %u\n", vl);
+
+out_read:
+ free(read_buf);
+}
+
+/* Validate attempting to set data and read it back */
+static void ptrace_set_get_data(pid_t child, unsigned int vl)
+{
+ void *write_buf;
+ void *read_buf = NULL;
+ struct user_za_header *write_za;
+ struct user_za_header *read_za;
+ size_t read_za_size = 0;
+ unsigned int vq = sve_vq_from_vl(vl);
+ int ret;
+ size_t data_size;
+
+ data_size = ZA_PT_SIZE(vq);
+ write_buf = malloc(data_size);
+ if (!write_buf) {
+ ksft_test_result_fail("Error allocating %d byte buffer for VL %u\n",
+ data_size, vl);
+ return;
+ }
+ write_za = write_buf;
+
+ /* Set up some data and write it out */
+ memset(write_za, 0, data_size);
+ write_za->size = data_size;
+ write_za->vl = vl;
+
+ fill_buf(write_buf + ZA_PT_ZA_OFFSET, ZA_PT_ZA_SIZE(vq));
+
+ ret = set_za(child, write_za);
+ if (ret != 0) {
+ ksft_test_result_fail("Failed to set VL %u data\n", vl);
+ goto out;
+ }
+
+ /* Read the data back */
+ if (!get_za(child, (void **)&read_buf, &read_za_size)) {
+ ksft_test_result_fail("Failed to read VL %u data\n", vl);
+ goto out;
+ }
+ read_za = read_buf;
+
+ /* We might read more data if there's extensions we don't know */
+ if (read_za->size < write_za->size) {
+ ksft_test_result_fail("VL %u wrote %d bytes, only read %d\n",
+ vl, write_za->size, read_za->size);
+ goto out_read;
+ }
+
+ ksft_test_result(memcmp(write_buf + ZA_PT_ZA_OFFSET,
+ read_buf + ZA_PT_ZA_OFFSET,
+ ZA_PT_ZA_SIZE(vq)) == 0,
+ "Data match for VL %u\n", vl);
+
+out_read:
+ free(read_buf);
+out:
+ free(write_buf);
+}
+
+static int do_parent(pid_t child)
+{
+ int ret = EXIT_FAILURE;
+ pid_t pid;
+ int status;
+ siginfo_t si;
+ unsigned int vq, vl;
+ bool vl_supported;
+
+ /* Attach to the child */
+ while (1) {
+ int sig;
+
+ pid = wait(&status);
+ if (pid == -1) {
+ perror("wait");
+ goto error;
+ }
+
+ /*
+ * This should never happen but it's hard to flag in
+ * the framework.
+ */
+ if (pid != child)
+ continue;
+
+ if (WIFEXITED(status) || WIFSIGNALED(status))
+ ksft_exit_fail_msg("Child died unexpectedly\n");
+
+ if (!WIFSTOPPED(status))
+ goto error;
+
+ sig = WSTOPSIG(status);
+
+ if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &si)) {
+ if (errno == ESRCH)
+ goto disappeared;
+
+ if (errno == EINVAL) {
+ sig = 0; /* bust group-stop */
+ goto cont;
+ }
+
+ ksft_test_result_fail("PTRACE_GETSIGINFO: %s\n",
+ strerror(errno));
+ goto error;
+ }
+
+ if (sig == SIGSTOP && si.si_code == SI_TKILL &&
+ si.si_pid == pid)
+ break;
+
+ cont:
+ if (ptrace(PTRACE_CONT, pid, NULL, sig)) {
+ if (errno == ESRCH)
+ goto disappeared;
+
+ ksft_test_result_fail("PTRACE_CONT: %s\n",
+ strerror(errno));
+ goto error;
+ }
+ }
+
+ ksft_print_msg("Parent is %d, child is %d\n", getpid(), child);
+
+ /* Step through every possible VQ */
+ for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; vq++) {
+ vl = sve_vl_from_vq(vq);
+
+ /* First, try to set this vector length */
+ ptrace_set_get_vl(child, vl, &vl_supported);
+
+ /* If the VL is supported validate data set/get */
+ if (vl_supported) {
+ ptrace_set_no_data(child, vl);
+ ptrace_set_get_data(child, vl);
+ } else {
+ ksft_test_result_skip("Disabled ZA for VL %u\n", vl);
+ ksft_test_result_skip("Get and set data for VL %u\n",
+ vl);
+ }
+ }
+
+ ret = EXIT_SUCCESS;
+
+error:
+ kill(child, SIGKILL);
+
+disappeared:
+ return ret;
+}
+
+int main(void)
+{
+ int ret = EXIT_SUCCESS;
+ pid_t child;
+
+ srandom(getpid());
+
+ ksft_print_header();
+
+ if (!(getauxval(AT_HWCAP2) & HWCAP2_SME)) {
+ ksft_set_plan(1);
+ ksft_exit_skip("SME not available\n");
+ }
+
+ ksft_set_plan(EXPECTED_TESTS);
+
+ child = fork();
+ if (!child)
+ return do_child();
+
+ if (do_parent(child))
+ ret = EXIT_FAILURE;
+
+ ksft_print_cnts();
+
+ return ret;
+}
diff --git a/tools/testing/selftests/arm64/fp/za-stress b/tools/testing/selftests/arm64/fp/za-stress
new file mode 100644
index 000000000000..5ac386b55b95
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/za-stress
@@ -0,0 +1,59 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright (C) 2015-2019 ARM Limited.
+# Original author: Dave Martin <Dave.Martin@arm.com>
+
+set -ue
+
+NR_CPUS=`nproc`
+
+pids=
+logs=
+
+cleanup () {
+ trap - INT TERM CHLD
+ set +e
+
+ if [ -n "$pids" ]; then
+ kill $pids
+ wait $pids
+ pids=
+ fi
+
+ if [ -n "$logs" ]; then
+ cat $logs
+ rm $logs
+ logs=
+ fi
+}
+
+interrupt () {
+ cleanup
+ exit 0
+}
+
+child_died () {
+ cleanup
+ exit 1
+}
+
+trap interrupt INT TERM EXIT
+
+for x in `seq 0 $((NR_CPUS * 4))`; do
+ log=`mktemp`
+ logs=$logs\ $log
+ ./za-test >$log &
+ pids=$pids\ $!
+done
+
+# Wait for all child processes to be created:
+sleep 10
+
+while :; do
+ kill -USR1 $pids
+done &
+pids=$pids\ $!
+
+wait
+
+exit 1
diff --git a/tools/testing/selftests/arm64/fp/za-test.S b/tools/testing/selftests/arm64/fp/za-test.S
new file mode 100644
index 000000000000..9ab6f9cd9623
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/za-test.S
@@ -0,0 +1,388 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2021 ARM Limited.
+// Original author: Mark Brown <broonie@kernel.org>
+//
+// Scalable Matrix Extension ZA context switch test
+// Repeatedly writes unique test patterns into each ZA tile
+// and reads them back to verify integrity.
+//
+// for x in `seq 1 NR_CPUS`; do sve-test & pids=$pids\ $! ; done
+// (leave it running for as long as you want...)
+// kill $pids
+
+#include <asm/unistd.h>
+#include "assembler.h"
+#include "asm-offsets.h"
+#include "sme-inst.h"
+
+.arch_extension sve
+
+#define MAXVL 2048
+#define MAXVL_B (MAXVL / 8)
+
+// Declare some storage space to shadow ZA register contents and a
+// scratch buffer for a vector.
+.pushsection .text
+.data
+.align 4
+zaref:
+ .space MAXVL_B * MAXVL_B
+scratch:
+ .space MAXVL_B
+.popsection
+
+// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0.
+// Clobbers x0-x3
+function memcpy
+ cmp x2, #0
+ b.eq 1f
+0: ldrb w3, [x1], #1
+ strb w3, [x0], #1
+ subs x2, x2, #1
+ b.ne 0b
+1: ret
+endfunction
+
+// Generate a test pattern for storage in ZA
+// x0: pid
+// x1: row in ZA
+// x2: generation
+
+// These values are used to constuct a 32-bit pattern that is repeated in the
+// scratch buffer as many times as will fit:
+// bits 31:28 generation number (increments once per test_loop)
+// bits 27:16 pid
+// bits 15: 8 row number
+// bits 7: 0 32-bit lane index
+
+function pattern
+ mov w3, wzr
+ bfi w3, w0, #16, #12 // PID
+ bfi w3, w1, #8, #8 // Row
+ bfi w3, w2, #28, #4 // Generation
+
+ ldr x0, =scratch
+ mov w1, #MAXVL_B / 4
+
+0: str w3, [x0], #4
+ add w3, w3, #1 // Lane
+ subs w1, w1, #1
+ b.ne 0b
+
+ ret
+endfunction
+
+// Get the address of shadow data for ZA horizontal vector xn
+.macro _adrza xd, xn, nrtmp
+ ldr \xd, =zaref
+ rdsvl \nrtmp, 1
+ madd \xd, x\nrtmp, \xn, \xd
+.endm
+
+// Set up test pattern in a ZA horizontal vector
+// x0: pid
+// x1: row number
+// x2: generation
+function setup_za
+ mov x4, x30
+ mov x12, x1 // Use x12 for vector select
+
+ bl pattern // Get pattern in scratch buffer
+ _adrza x0, x12, 2 // Shadow buffer pointer to x0 and x5
+ mov x5, x0
+ ldr x1, =scratch
+ bl memcpy // length set up in x2 by _adrza
+
+ _ldr_za 12, 5 // load vector w12 from pointer x5
+
+ ret x4
+endfunction
+
+// Trivial memory compare: compare x2 bytes starting at address x0 with
+// bytes starting at address x1.
+// Returns only if all bytes match; otherwise, the program is aborted.
+// Clobbers x0-x5.
+function memcmp
+ cbz x2, 2f
+
+ stp x0, x1, [sp, #-0x20]!
+ str x2, [sp, #0x10]
+
+ mov x5, #0
+0: ldrb w3, [x0, x5]
+ ldrb w4, [x1, x5]
+ add x5, x5, #1
+ cmp w3, w4
+ b.ne 1f
+ subs x2, x2, #1
+ b.ne 0b
+
+1: ldr x2, [sp, #0x10]
+ ldp x0, x1, [sp], #0x20
+ b.ne barf
+
+2: ret
+endfunction
+
+// Verify that a ZA vector matches its shadow in memory, else abort
+// x0: row number
+// Clobbers x0-x7 and x12.
+function check_za
+ mov x3, x30
+
+ mov x12, x0
+ _adrza x5, x0, 6 // pointer to expected value in x5
+ mov x4, x0
+ ldr x7, =scratch // x7 is scratch
+
+ mov x0, x7 // Poison scratch
+ mov x1, x6
+ bl memfill_ae
+
+ _str_za 12, 7 // save vector w12 to pointer x7
+
+ mov x0, x5
+ mov x1, x7
+ mov x2, x6
+ mov x30, x3
+ b memcmp
+endfunction
+
+// Any SME register modified here can cause corruption in the main
+// thread -- but *only* the locations modified here.
+function irritator_handler
+ // Increment the irritation signal count (x23):
+ ldr x0, [x2, #ucontext_regs + 8 * 23]
+ add x0, x0, #1
+ str x0, [x2, #ucontext_regs + 8 * 23]
+
+ // Corrupt some random ZA data
+#if 0
+ adr x0, .text + (irritator_handler - .text) / 16 * 16
+ movi v0.8b, #1
+ movi v9.16b, #2
+ movi v31.8b, #3
+#endif
+
+ ret
+endfunction
+
+function terminate_handler
+ mov w21, w0
+ mov x20, x2
+
+ puts "Terminated by signal "
+ mov w0, w21
+ bl putdec
+ puts ", no error, iterations="
+ ldr x0, [x20, #ucontext_regs + 8 * 22]
+ bl putdec
+ puts ", signals="
+ ldr x0, [x20, #ucontext_regs + 8 * 23]
+ bl putdecn
+
+ mov x0, #0
+ mov x8, #__NR_exit
+ svc #0
+endfunction
+
+// w0: signal number
+// x1: sa_action
+// w2: sa_flags
+// Clobbers x0-x6,x8
+function setsignal
+ str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]!
+
+ mov w4, w0
+ mov x5, x1
+ mov w6, w2
+
+ add x0, sp, #16
+ mov x1, #sa_sz
+ bl memclr
+
+ mov w0, w4
+ add x1, sp, #16
+ str w6, [x1, #sa_flags]
+ str x5, [x1, #sa_handler]
+ mov x2, #0
+ mov x3, #sa_mask_sz
+ mov x8, #__NR_rt_sigaction
+ svc #0
+
+ cbz w0, 1f
+
+ puts "sigaction failure\n"
+ b .Labort
+
+1: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16)
+ ret
+endfunction
+
+// Main program entry point
+.globl _start
+function _start
+_start:
+ puts "Streaming mode "
+ smstart_za
+
+ // Sanity-check and report the vector length
+
+ rdsvl 19, 8
+ cmp x19, #128
+ b.lo 1f
+ cmp x19, #2048
+ b.hi 1f
+ tst x19, #(8 - 1)
+ b.eq 2f
+
+1: puts "bad vector length: "
+ mov x0, x19
+ bl putdecn
+ b .Labort
+
+2: puts "vector length:\t"
+ mov x0, x19
+ bl putdec
+ puts " bits\n"
+
+ // Obtain our PID, to ensure test pattern uniqueness between processes
+ mov x8, #__NR_getpid
+ svc #0
+ mov x20, x0
+
+ puts "PID:\t"
+ mov x0, x20
+ bl putdecn
+
+ mov x23, #0 // Irritation signal count
+
+ mov w0, #SIGINT
+ adr x1, terminate_handler
+ mov w2, #SA_SIGINFO
+ bl setsignal
+
+ mov w0, #SIGTERM
+ adr x1, terminate_handler
+ mov w2, #SA_SIGINFO
+ bl setsignal
+
+ mov w0, #SIGUSR1
+ adr x1, irritator_handler
+ mov w2, #SA_SIGINFO
+ orr w2, w2, #SA_NODEFER
+ bl setsignal
+
+ mov x22, #0 // generation number, increments per iteration
+.Ltest_loop:
+ rdsvl 0, 8
+ cmp x0, x19
+ b.ne vl_barf
+
+ rdsvl 21, 1 // Set up ZA & shadow with test pattern
+0: mov x0, x20
+ sub x1, x21, #1
+ mov x2, x22
+ bl setup_za
+ subs x21, x21, #1
+ b.ne 0b
+
+ and x8, x22, #127 // Every 128 interations...
+ cbz x8, 0f
+ mov x8, #__NR_getpid // (otherwise minimal syscall)
+ b 1f
+0:
+ mov x8, #__NR_sched_yield // ...encourage preemption
+1:
+ svc #0
+
+ mrs x0, S3_3_C4_C2_2 // SVCR should have ZA=1,SM=0
+ and x1, x0, #3
+ cmp x1, #2
+ b.ne svcr_barf
+
+ rdsvl 21, 1 // Verify that the data made it through
+ rdsvl 24, 1 // Verify that the data made it through
+0: sub x0, x24, x21
+ bl check_za
+ subs x21, x21, #1
+ bne 0b
+
+ add x22, x22, #1 // Everything still working
+ b .Ltest_loop
+
+.Labort:
+ mov x0, #0
+ mov x1, #SIGABRT
+ mov x8, #__NR_kill
+ svc #0
+endfunction
+
+function barf
+// fpsimd.c acitivty log dump hack
+// ldr w0, =0xdeadc0de
+// mov w8, #__NR_exit
+// svc #0
+// end hack
+ smstop
+ mov x10, x0 // expected data
+ mov x11, x1 // actual data
+ mov x12, x2 // data size
+
+ puts "Mismatch: PID="
+ mov x0, x20
+ bl putdec
+ puts ", iteration="
+ mov x0, x22
+ bl putdec
+ puts ", row="
+ mov x0, x21
+ bl putdecn
+ puts "\tExpected ["
+ mov x0, x10
+ mov x1, x12
+ bl dumphex
+ puts "]\n\tGot ["
+ mov x0, x11
+ mov x1, x12
+ bl dumphex
+ puts "]\n"
+
+ mov x8, #__NR_getpid
+ svc #0
+// fpsimd.c acitivty log dump hack
+// ldr w0, =0xdeadc0de
+// mov w8, #__NR_exit
+// svc #0
+// ^ end of hack
+ mov x1, #SIGABRT
+ mov x8, #__NR_kill
+ svc #0
+// mov x8, #__NR_exit
+// mov x1, #1
+// svc #0
+endfunction
+
+function vl_barf
+ mov x10, x0
+
+ puts "Bad active VL: "
+ mov x0, x10
+ bl putdecn
+
+ mov x8, #__NR_exit
+ mov x1, #1
+ svc #0
+endfunction
+
+function svcr_barf
+ mov x10, x0
+
+ puts "Bad SVCR: "
+ mov x0, x10
+ bl putdecn
+
+ mov x8, #__NR_exit
+ mov x1, #1
+ svc #0
+endfunction
diff --git a/tools/testing/selftests/arm64/mte/.gitignore b/tools/testing/selftests/arm64/mte/.gitignore
index d1fe4ddf1669..052d0f9f92b3 100644
--- a/tools/testing/selftests/arm64/mte/.gitignore
+++ b/tools/testing/selftests/arm64/mte/.gitignore
@@ -3,5 +3,6 @@ check_gcr_el1_cswitch
check_tags_inclusion
check_child_memory
check_mmap_options
+check_prctl
check_ksm_options
check_user_mem
diff --git a/tools/testing/selftests/arm64/mte/check_child_memory.c b/tools/testing/selftests/arm64/mte/check_child_memory.c
index 43bd94f853ba..7597fc632cad 100644
--- a/tools/testing/selftests/arm64/mte/check_child_memory.c
+++ b/tools/testing/selftests/arm64/mte/check_child_memory.c
@@ -85,9 +85,9 @@ static int check_child_memory_mapping(int mem_type, int mode, int mapping)
{
char *ptr;
int run, result;
- int item = sizeof(sizes)/sizeof(int);
+ int item = ARRAY_SIZE(sizes);
- item = sizeof(sizes)/sizeof(int);
+ item = ARRAY_SIZE(sizes);
mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
for (run = 0; run < item; run++) {
ptr = (char *)mte_allocate_memory_tag_range(sizes[run], mem_type, mapping,
@@ -107,7 +107,7 @@ static int check_child_file_mapping(int mem_type, int mode, int mapping)
{
char *ptr, *map_ptr;
int run, fd, map_size, result = KSFT_PASS;
- int total = sizeof(sizes)/sizeof(int);
+ int total = ARRAY_SIZE(sizes);
mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
for (run = 0; run < total; run++) {
@@ -144,7 +144,7 @@ static int check_child_file_mapping(int mem_type, int mode, int mapping)
int main(int argc, char *argv[])
{
int err;
- int item = sizeof(sizes)/sizeof(int);
+ int item = ARRAY_SIZE(sizes);
page_size = getpagesize();
if (!page_size) {
diff --git a/tools/testing/selftests/arm64/mte/check_prctl.c b/tools/testing/selftests/arm64/mte/check_prctl.c
new file mode 100644
index 000000000000..f139a33a43ef
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_prctl.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2022 ARM Limited
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+
+#include <asm/hwcap.h>
+
+#include "kselftest.h"
+
+static int set_tagged_addr_ctrl(int val)
+{
+ int ret;
+
+ ret = prctl(PR_SET_TAGGED_ADDR_CTRL, val, 0, 0, 0);
+ if (ret < 0)
+ ksft_print_msg("PR_SET_TAGGED_ADDR_CTRL: failed %d %d (%s)\n",
+ ret, errno, strerror(errno));
+ return ret;
+}
+
+static int get_tagged_addr_ctrl(void)
+{
+ int ret;
+
+ ret = prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0);
+ if (ret < 0)
+ ksft_print_msg("PR_GET_TAGGED_ADDR_CTRL failed: %d %d (%s)\n",
+ ret, errno, strerror(errno));
+ return ret;
+}
+
+/*
+ * Read the current mode without having done any configuration, should
+ * run first.
+ */
+void check_basic_read(void)
+{
+ int ret;
+
+ ret = get_tagged_addr_ctrl();
+ if (ret < 0) {
+ ksft_test_result_fail("check_basic_read\n");
+ return;
+ }
+
+ if (ret & PR_MTE_TCF_SYNC)
+ ksft_print_msg("SYNC enabled\n");
+ if (ret & PR_MTE_TCF_ASYNC)
+ ksft_print_msg("ASYNC enabled\n");
+
+ /* Any configuration is valid */
+ ksft_test_result_pass("check_basic_read\n");
+}
+
+/*
+ * Attempt to set a specified combination of modes.
+ */
+void set_mode_test(const char *name, int hwcap2, int mask)
+{
+ int ret;
+
+ if ((getauxval(AT_HWCAP2) & hwcap2) != hwcap2) {
+ ksft_test_result_skip("%s\n", name);
+ return;
+ }
+
+ ret = set_tagged_addr_ctrl(mask);
+ if (ret < 0) {
+ ksft_test_result_fail("%s\n", name);
+ return;
+ }
+
+ ret = get_tagged_addr_ctrl();
+ if (ret < 0) {
+ ksft_test_result_fail("%s\n", name);
+ return;
+ }
+
+ if ((ret & PR_MTE_TCF_MASK) == mask) {
+ ksft_test_result_pass("%s\n", name);
+ } else {
+ ksft_print_msg("Got %x, expected %x\n",
+ (ret & PR_MTE_TCF_MASK), mask);
+ ksft_test_result_fail("%s\n", name);
+ }
+}
+
+struct mte_mode {
+ int mask;
+ int hwcap2;
+ const char *name;
+} mte_modes[] = {
+ { PR_MTE_TCF_NONE, 0, "NONE" },
+ { PR_MTE_TCF_SYNC, HWCAP2_MTE, "SYNC" },
+ { PR_MTE_TCF_ASYNC, HWCAP2_MTE, "ASYNC" },
+ { PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC, HWCAP2_MTE, "SYNC+ASYNC" },
+};
+
+int main(void)
+{
+ int i;
+
+ ksft_print_header();
+ ksft_set_plan(5);
+
+ check_basic_read();
+ for (i = 0; i < ARRAY_SIZE(mte_modes); i++)
+ set_mode_test(mte_modes[i].name, mte_modes[i].hwcap2,
+ mte_modes[i].mask);
+
+ ksft_print_cnts();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c
index deaef1f61076..2b1425b92b69 100644
--- a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c
+++ b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c
@@ -23,10 +23,13 @@ static int verify_mte_pointer_validity(char *ptr, int mode)
{
mte_initialize_current_context(mode, (uintptr_t)ptr, BUFFER_SIZE);
/* Check the validity of the tagged pointer */
- memset((void *)ptr, '1', BUFFER_SIZE);
+ memset(ptr, '1', BUFFER_SIZE);
mte_wait_after_trig();
- if (cur_mte_cxt.fault_valid)
+ if (cur_mte_cxt.fault_valid) {
+ ksft_print_msg("Unexpected fault recorded for %p-%p in mode %x\n",
+ ptr, ptr + BUFFER_SIZE, mode);
return KSFT_FAIL;
+ }
/* Proceed further for nonzero tags */
if (!MT_FETCH_TAG((uintptr_t)ptr))
return KSFT_PASS;
@@ -34,27 +37,32 @@ static int verify_mte_pointer_validity(char *ptr, int mode)
/* Check the validity outside the range */
ptr[BUFFER_SIZE] = '2';
mte_wait_after_trig();
- if (!cur_mte_cxt.fault_valid)
+ if (!cur_mte_cxt.fault_valid) {
+ ksft_print_msg("No valid fault recorded for %p in mode %x\n",
+ ptr, mode);
return KSFT_FAIL;
- else
+ } else {
return KSFT_PASS;
+ }
}
static int check_single_included_tags(int mem_type, int mode)
{
char *ptr;
- int tag, run, result = KSFT_PASS;
+ int tag, run, ret, result = KSFT_PASS;
- ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
+ ptr = mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE,
mem_type, false) != KSFT_PASS)
return KSFT_FAIL;
for (tag = 0; (tag < MT_TAG_COUNT) && (result == KSFT_PASS); tag++) {
- mte_switch_mode(mode, MT_INCLUDE_VALID_TAG(tag));
+ ret = mte_switch_mode(mode, MT_INCLUDE_VALID_TAG(tag));
+ if (ret != 0)
+ result = KSFT_FAIL;
/* Try to catch a excluded tag by a number of tries. */
for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) {
- ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE);
+ ptr = mte_insert_tags(ptr, BUFFER_SIZE);
/* Check tag value */
if (MT_FETCH_TAG((uintptr_t)ptr) == tag) {
ksft_print_msg("FAIL: wrong tag = 0x%x with include mask=0x%x\n",
@@ -66,7 +74,7 @@ static int check_single_included_tags(int mem_type, int mode)
result = verify_mte_pointer_validity(ptr, mode);
}
}
- mte_free_memory_tag_range((void *)ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE);
+ mte_free_memory_tag_range(ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE);
return result;
}
@@ -76,7 +84,7 @@ static int check_multiple_included_tags(int mem_type, int mode)
int tag, run, result = KSFT_PASS;
unsigned long excl_mask = 0;
- ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
+ ptr = mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE,
mem_type, false) != KSFT_PASS)
return KSFT_FAIL;
@@ -86,7 +94,7 @@ static int check_multiple_included_tags(int mem_type, int mode)
mte_switch_mode(mode, MT_INCLUDE_VALID_TAGS(excl_mask));
/* Try to catch a excluded tag by a number of tries. */
for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) {
- ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE);
+ ptr = mte_insert_tags(ptr, BUFFER_SIZE);
/* Check tag value */
if (MT_FETCH_TAG((uintptr_t)ptr) < tag) {
ksft_print_msg("FAIL: wrong tag = 0x%x with include mask=0x%x\n",
@@ -98,21 +106,23 @@ static int check_multiple_included_tags(int mem_type, int mode)
result = verify_mte_pointer_validity(ptr, mode);
}
}
- mte_free_memory_tag_range((void *)ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE);
+ mte_free_memory_tag_range(ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE);
return result;
}
static int check_all_included_tags(int mem_type, int mode)
{
char *ptr;
- int run, result = KSFT_PASS;
+ int run, ret, result = KSFT_PASS;
- ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
+ ptr = mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE,
mem_type, false) != KSFT_PASS)
return KSFT_FAIL;
- mte_switch_mode(mode, MT_INCLUDE_TAG_MASK);
+ ret = mte_switch_mode(mode, MT_INCLUDE_TAG_MASK);
+ if (ret != 0)
+ return KSFT_FAIL;
/* Try to catch a excluded tag by a number of tries. */
for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) {
ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE);
@@ -122,20 +132,22 @@ static int check_all_included_tags(int mem_type, int mode)
*/
result = verify_mte_pointer_validity(ptr, mode);
}
- mte_free_memory_tag_range((void *)ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE);
+ mte_free_memory_tag_range(ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE);
return result;
}
static int check_none_included_tags(int mem_type, int mode)
{
char *ptr;
- int run;
+ int run, ret;
- ptr = (char *)mte_allocate_memory(BUFFER_SIZE, mem_type, 0, false);
+ ptr = mte_allocate_memory(BUFFER_SIZE, mem_type, 0, false);
if (check_allocated_memory(ptr, BUFFER_SIZE, mem_type, false) != KSFT_PASS)
return KSFT_FAIL;
- mte_switch_mode(mode, MT_EXCLUDE_TAG_MASK);
+ ret = mte_switch_mode(mode, MT_EXCLUDE_TAG_MASK);
+ if (ret != 0)
+ return KSFT_FAIL;
/* Try to catch a excluded tag by a number of tries. */
for (run = 0; run < RUNS; run++) {
ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE);
@@ -147,12 +159,12 @@ static int check_none_included_tags(int mem_type, int mode)
}
mte_initialize_current_context(mode, (uintptr_t)ptr, BUFFER_SIZE);
/* Check the write validity of the untagged pointer */
- memset((void *)ptr, '1', BUFFER_SIZE);
+ memset(ptr, '1', BUFFER_SIZE);
mte_wait_after_trig();
if (cur_mte_cxt.fault_valid)
break;
}
- mte_free_memory((void *)ptr, BUFFER_SIZE, mem_type, false);
+ mte_free_memory(ptr, BUFFER_SIZE, mem_type, false);
if (cur_mte_cxt.fault_valid)
return KSFT_FAIL;
else
diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c
index 0328a1e08f65..00ffd34c66d3 100644
--- a/tools/testing/selftests/arm64/mte/mte_common_util.c
+++ b/tools/testing/selftests/arm64/mte/mte_common_util.c
@@ -37,6 +37,10 @@ void mte_default_handler(int signum, siginfo_t *si, void *uc)
if (si->si_code == SEGV_MTEAERR) {
if (cur_mte_cxt.trig_si_code == si->si_code)
cur_mte_cxt.fault_valid = true;
+ else
+ ksft_print_msg("Got unexpected SEGV_MTEAERR at pc=$lx, fault addr=%lx\n",
+ ((ucontext_t *)uc)->uc_mcontext.pc,
+ addr);
return;
}
/* Compare the context for precise error */
@@ -124,13 +128,16 @@ static void *__mte_allocate_memory_range(size_t size, int mem_type, int mapping,
int prot_flag, map_flag;
size_t entire_size = size + range_before + range_after;
- if (mem_type != USE_MALLOC && mem_type != USE_MMAP &&
- mem_type != USE_MPROTECT) {
+ switch (mem_type) {
+ case USE_MALLOC:
+ return malloc(entire_size) + range_before;
+ case USE_MMAP:
+ case USE_MPROTECT:
+ break;
+ default:
ksft_print_msg("FAIL: Invalid allocate request\n");
return NULL;
}
- if (mem_type == USE_MALLOC)
- return malloc(entire_size) + range_before;
prot_flag = PROT_READ | PROT_WRITE;
if (mem_type == USE_MMAP)
@@ -269,18 +276,33 @@ int mte_switch_mode(int mte_option, unsigned long incl_mask)
{
unsigned long en = 0;
- if (!(mte_option == MTE_SYNC_ERR || mte_option == MTE_ASYNC_ERR ||
- mte_option == MTE_NONE_ERR || incl_mask <= MTE_ALLOW_NON_ZERO_TAG)) {
- ksft_print_msg("FAIL: Invalid mte config option\n");
+ switch (mte_option) {
+ case MTE_NONE_ERR:
+ case MTE_SYNC_ERR:
+ case MTE_ASYNC_ERR:
+ break;
+ default:
+ ksft_print_msg("FAIL: Invalid MTE option %x\n", mte_option);
+ return -EINVAL;
+ }
+
+ if (incl_mask & ~MT_INCLUDE_TAG_MASK) {
+ ksft_print_msg("FAIL: Invalid incl_mask %lx\n", incl_mask);
return -EINVAL;
}
+
en = PR_TAGGED_ADDR_ENABLE;
- if (mte_option == MTE_SYNC_ERR)
+ switch (mte_option) {
+ case MTE_SYNC_ERR:
en |= PR_MTE_TCF_SYNC;
- else if (mte_option == MTE_ASYNC_ERR)
+ break;
+ case MTE_ASYNC_ERR:
en |= PR_MTE_TCF_ASYNC;
- else if (mte_option == MTE_NONE_ERR)
+ break;
+ case MTE_NONE_ERR:
en |= PR_MTE_TCF_NONE;
+ break;
+ }
en |= (incl_mask << PR_MTE_TAG_SHIFT);
/* Enable address tagging ABI, mte error reporting mode and tag inclusion mask. */
diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.h b/tools/testing/selftests/arm64/mte/mte_common_util.h
index 195a7d1879e6..2d3e71724e55 100644
--- a/tools/testing/selftests/arm64/mte/mte_common_util.h
+++ b/tools/testing/selftests/arm64/mte/mte_common_util.h
@@ -75,10 +75,21 @@ unsigned int mte_get_pstate_tco(void);
/* Test framework static inline functions/macros */
static inline void evaluate_test(int err, const char *msg)
{
- if (err == KSFT_PASS)
+ switch (err) {
+ case KSFT_PASS:
ksft_test_result_pass(msg);
- else if (err == KSFT_FAIL)
+ break;
+ case KSFT_FAIL:
ksft_test_result_fail(msg);
+ break;
+ case KSFT_SKIP:
+ ksft_test_result_skip(msg);
+ break;
+ default:
+ ksft_test_result_error("Unknown return code %d from %s",
+ err, msg);
+ break;
+ }
}
static inline int check_allocated_memory(void *ptr, size_t size,
diff --git a/tools/testing/selftests/arm64/signal/.gitignore b/tools/testing/selftests/arm64/signal/.gitignore
index c1742755abb9..e8d2b57f73ec 100644
--- a/tools/testing/selftests/arm64/signal/.gitignore
+++ b/tools/testing/selftests/arm64/signal/.gitignore
@@ -1,5 +1,8 @@
# SPDX-License-Identifier: GPL-2.0-only
mangle_*
fake_sigreturn_*
+sme_*
+ssve_*
sve_*
+za_*
!*.[ch]
diff --git a/tools/testing/selftests/arm64/signal/test_signals.h b/tools/testing/selftests/arm64/signal/test_signals.h
index f909b70d9e98..c70fdec7d7c4 100644
--- a/tools/testing/selftests/arm64/signal/test_signals.h
+++ b/tools/testing/selftests/arm64/signal/test_signals.h
@@ -34,11 +34,15 @@
enum {
FSSBS_BIT,
FSVE_BIT,
+ FSME_BIT,
+ FSME_FA64_BIT,
FMAX_END
};
#define FEAT_SSBS (1UL << FSSBS_BIT)
#define FEAT_SVE (1UL << FSVE_BIT)
+#define FEAT_SME (1UL << FSME_BIT)
+#define FEAT_SME_FA64 (1UL << FSME_FA64_BIT)
/*
* A descriptor used to describe and configure a test case.
diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c
index 5743897984b0..b588d10afd5b 100644
--- a/tools/testing/selftests/arm64/signal/test_signals_utils.c
+++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c
@@ -27,6 +27,8 @@ static int sig_copyctx = SIGTRAP;
static char const *const feats_names[FMAX_END] = {
" SSBS ",
" SVE ",
+ " SME ",
+ " FA64 ",
};
#define MAX_FEATS_SZ 128
@@ -268,6 +270,10 @@ int test_init(struct tdescr *td)
td->feats_supported |= FEAT_SSBS;
if (getauxval(AT_HWCAP) & HWCAP_SVE)
td->feats_supported |= FEAT_SVE;
+ if (getauxval(AT_HWCAP2) & HWCAP2_SME)
+ td->feats_supported |= FEAT_SME;
+ if (getauxval(AT_HWCAP2) & HWCAP2_SME_FA64)
+ td->feats_supported |= FEAT_SME_FA64;
if (feats_ok(td)) {
if (td->feats_required & td->feats_supported)
fprintf(stderr,
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c
new file mode 100644
index 000000000000..7ed762b7202f
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Attempt to change the streaming SVE vector length in a signal
+ * handler, this is not supported and is expected to segfault.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+struct fake_sigframe sf;
+static unsigned int vls[SVE_VQ_MAX];
+unsigned int nvls = 0;
+
+static bool sme_get_vls(struct tdescr *td)
+{
+ int vq, vl;
+
+ /*
+ * Enumerate up to SVE_VQ_MAX vector lengths
+ */
+ for (vq = SVE_VQ_MAX; vq > 0; --vq) {
+ vl = prctl(PR_SVE_SET_VL, vq * 16);
+ if (vl == -1)
+ return false;
+
+ vl &= PR_SME_VL_LEN_MASK;
+
+ /* Skip missing VLs */
+ vq = sve_vq_from_vl(vl);
+
+ vls[nvls++] = vl;
+ }
+
+ /* We need at least two VLs */
+ if (nvls < 2) {
+ fprintf(stderr, "Only %d VL supported\n", nvls);
+ return false;
+ }
+
+ return true;
+}
+
+static int fake_sigreturn_ssve_change_vl(struct tdescr *td,
+ siginfo_t *si, ucontext_t *uc)
+{
+ size_t resv_sz, offset;
+ struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf);
+ struct sve_context *sve;
+
+ /* Get a signal context with a SME ZA frame in it */
+ if (!get_current_context(td, &sf.uc))
+ return 1;
+
+ resv_sz = GET_SF_RESV_SIZE(sf);
+ head = get_header(head, SVE_MAGIC, resv_sz, &offset);
+ if (!head) {
+ fprintf(stderr, "No SVE context\n");
+ return 1;
+ }
+
+ if (head->size != sizeof(struct sve_context)) {
+ fprintf(stderr, "Register data present, aborting\n");
+ return 1;
+ }
+
+ sve = (struct sve_context *)head;
+
+ /* No changes are supported; init left us at minimum VL so go to max */
+ fprintf(stderr, "Attempting to change VL from %d to %d\n",
+ sve->vl, vls[0]);
+ sve->vl = vls[0];
+
+ fake_sigreturn(&sf, sizeof(sf), 0);
+
+ return 1;
+}
+
+struct tdescr tde = {
+ .name = "FAKE_SIGRETURN_SSVE_CHANGE",
+ .descr = "Attempt to change Streaming SVE VL",
+ .feats_required = FEAT_SME,
+ .sig_ok = SIGSEGV,
+ .timeout = 3,
+ .init = sme_get_vls,
+ .run = fake_sigreturn_ssve_change_vl,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/sme_trap_no_sm.c b/tools/testing/selftests/arm64/signal/testcases/sme_trap_no_sm.c
new file mode 100644
index 000000000000..f9d76ae32bba
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/sme_trap_no_sm.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Verify that using a streaming mode instruction without enabling it
+ * generates a SIGILL.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+int sme_trap_no_sm_trigger(struct tdescr *td)
+{
+ /* SMSTART ZA ; ADDHA ZA0.S, P0/M, P0/M, Z0.S */
+ asm volatile(".inst 0xd503457f ; .inst 0xc0900000");
+
+ return 0;
+}
+
+int sme_trap_no_sm_run(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+ return 1;
+}
+
+struct tdescr tde = {
+ .name = "SME trap without SM",
+ .descr = "Check that we get a SIGILL if we use streaming mode without enabling it",
+ .timeout = 3,
+ .feats_required = FEAT_SME, /* We need a SMSTART ZA */
+ .sanity_disabled = true,
+ .trigger = sme_trap_no_sm_trigger,
+ .run = sme_trap_no_sm_run,
+ .sig_ok = SIGILL,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/sme_trap_non_streaming.c b/tools/testing/selftests/arm64/signal/testcases/sme_trap_non_streaming.c
new file mode 100644
index 000000000000..e469ae5348e3
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/sme_trap_non_streaming.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Verify that using an instruction not supported in streaming mode
+ * traps when in streaming mode.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+int sme_trap_non_streaming_trigger(struct tdescr *td)
+{
+ /*
+ * The framework will handle SIGILL so we need to exit SM to
+ * stop any other code triggering a further SIGILL down the
+ * line from using a streaming-illegal instruction.
+ */
+ asm volatile(".inst 0xd503437f; /* SMSTART ZA */ \
+ cnt v0.16b, v0.16b; \
+ .inst 0xd503447f /* SMSTOP ZA */");
+
+ return 0;
+}
+
+int sme_trap_non_streaming_run(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+ return 1;
+}
+
+struct tdescr tde = {
+ .name = "SME SM trap unsupported instruction",
+ .descr = "Check that we get a SIGILL if we use an unsupported instruction in streaming mode",
+ .feats_required = FEAT_SME,
+ .feats_incompatible = FEAT_SME_FA64,
+ .timeout = 3,
+ .sanity_disabled = true,
+ .trigger = sme_trap_non_streaming_trigger,
+ .run = sme_trap_non_streaming_run,
+ .sig_ok = SIGILL,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/sme_trap_za.c b/tools/testing/selftests/arm64/signal/testcases/sme_trap_za.c
new file mode 100644
index 000000000000..3a7747af4715
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/sme_trap_za.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Verify that accessing ZA without enabling it generates a SIGILL.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+int sme_trap_za_trigger(struct tdescr *td)
+{
+ /* ZERO ZA */
+ asm volatile(".inst 0xc00800ff");
+
+ return 0;
+}
+
+int sme_trap_za_run(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+ return 1;
+}
+
+struct tdescr tde = {
+ .name = "SME ZA trap",
+ .descr = "Check that we get a SIGILL if we access ZA without enabling",
+ .timeout = 3,
+ .sanity_disabled = true,
+ .trigger = sme_trap_za_trigger,
+ .run = sme_trap_za_run,
+ .sig_ok = SIGILL,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/sme_vl.c b/tools/testing/selftests/arm64/signal/testcases/sme_vl.c
new file mode 100644
index 000000000000..13ff3b35cbaf
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/sme_vl.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Check that the SME vector length reported in signal contexts is the
+ * expected one.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+struct fake_sigframe sf;
+unsigned int vl;
+
+static bool get_sme_vl(struct tdescr *td)
+{
+ int ret = prctl(PR_SME_GET_VL);
+ if (ret == -1)
+ return false;
+
+ vl = ret;
+
+ return true;
+}
+
+static int sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+ size_t resv_sz, offset;
+ struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf);
+ struct za_context *za;
+
+ /* Get a signal context which should have a ZA frame in it */
+ if (!get_current_context(td, &sf.uc))
+ return 1;
+
+ resv_sz = GET_SF_RESV_SIZE(sf);
+ head = get_header(head, ZA_MAGIC, resv_sz, &offset);
+ if (!head) {
+ fprintf(stderr, "No ZA context\n");
+ return 1;
+ }
+ za = (struct za_context *)head;
+
+ if (za->vl != vl) {
+ fprintf(stderr, "ZA sigframe VL %u, expected %u\n",
+ za->vl, vl);
+ return 1;
+ } else {
+ fprintf(stderr, "got expected VL %u\n", vl);
+ }
+
+ td->pass = 1;
+
+ return 0;
+}
+
+struct tdescr tde = {
+ .name = "SME VL",
+ .descr = "Check that we get the right SME VL reported",
+ .feats_required = FEAT_SME,
+ .timeout = 3,
+ .init = get_sme_vl,
+ .run = sme_vl,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c b/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c
new file mode 100644
index 000000000000..9022a6cab4b3
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Verify that the streaming SVE register context in signal frames is
+ * set up as expected.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+struct fake_sigframe sf;
+static unsigned int vls[SVE_VQ_MAX];
+unsigned int nvls = 0;
+
+static bool sme_get_vls(struct tdescr *td)
+{
+ int vq, vl;
+
+ /*
+ * Enumerate up to SVE_VQ_MAX vector lengths
+ */
+ for (vq = SVE_VQ_MAX; vq > 0; --vq) {
+ vl = prctl(PR_SME_SET_VL, vq * 16);
+ if (vl == -1)
+ return false;
+
+ vl &= PR_SME_VL_LEN_MASK;
+
+ /* Skip missing VLs */
+ vq = sve_vq_from_vl(vl);
+
+ vls[nvls++] = vl;
+ }
+
+ /* We need at least one VL */
+ if (nvls < 1) {
+ fprintf(stderr, "Only %d VL supported\n", nvls);
+ return false;
+ }
+
+ return true;
+}
+
+static void setup_ssve_regs(void)
+{
+ /* smstart sm; real data is TODO */
+ asm volatile(".inst 0xd503437f" : : : );
+}
+
+static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc,
+ unsigned int vl)
+{
+ size_t resv_sz, offset;
+ struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf);
+ struct sve_context *ssve;
+ int ret;
+
+ fprintf(stderr, "Testing VL %d\n", vl);
+
+ ret = prctl(PR_SME_SET_VL, vl);
+ if (ret != vl) {
+ fprintf(stderr, "Failed to set VL, got %d\n", ret);
+ return 1;
+ }
+
+ /*
+ * Get a signal context which should have a SVE frame and registers
+ * in it.
+ */
+ setup_ssve_regs();
+ if (!get_current_context(td, &sf.uc))
+ return 1;
+
+ resv_sz = GET_SF_RESV_SIZE(sf);
+ head = get_header(head, SVE_MAGIC, resv_sz, &offset);
+ if (!head) {
+ fprintf(stderr, "No SVE context\n");
+ return 1;
+ }
+
+ ssve = (struct sve_context *)head;
+ if (ssve->vl != vl) {
+ fprintf(stderr, "Got VL %d, expected %d\n", ssve->vl, vl);
+ return 1;
+ }
+
+ /* The actual size validation is done in get_current_context() */
+ fprintf(stderr, "Got expected size %u and VL %d\n",
+ head->size, ssve->vl);
+
+ return 0;
+}
+
+static int sme_regs(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+ int i;
+
+ for (i = 0; i < nvls; i++) {
+ /*
+ * TODO: the signal test helpers can't currently cope
+ * with signal frames bigger than struct sigcontext,
+ * skip VLs that will trigger that.
+ */
+ if (vls[i] > 64) {
+ printf("Skipping VL %u due to stack size\n", vls[i]);
+ continue;
+ }
+
+ if (do_one_sme_vl(td, si, uc, vls[i]))
+ return 1;
+ }
+
+ td->pass = 1;
+
+ return 0;
+}
+
+struct tdescr tde = {
+ .name = "Streaming SVE registers",
+ .descr = "Check that we get the right Streaming SVE registers reported",
+ /*
+ * We shouldn't require FA64 but things like memset() used in the
+ * helpers might use unsupported instructions so for now disable
+ * the test unless we've got the full instruction set.
+ */
+ .feats_required = FEAT_SME | FEAT_SME_FA64,
+ .timeout = 3,
+ .init = sme_get_vls,
+ .run = sme_regs,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.c b/tools/testing/selftests/arm64/signal/testcases/testcases.c
index 8c2a57fc2f9c..84c36bee4d82 100644
--- a/tools/testing/selftests/arm64/signal/testcases/testcases.c
+++ b/tools/testing/selftests/arm64/signal/testcases/testcases.c
@@ -75,6 +75,31 @@ bool validate_sve_context(struct sve_context *sve, char **err)
return true;
}
+bool validate_za_context(struct za_context *za, char **err)
+{
+ /* Size will be rounded up to a multiple of 16 bytes */
+ size_t regs_size
+ = ((ZA_SIG_CONTEXT_SIZE(sve_vq_from_vl(za->vl)) + 15) / 16) * 16;
+
+ if (!za || !err)
+ return false;
+
+ /* Either a bare za_context or a za_context followed by regs data */
+ if ((za->head.size != sizeof(struct za_context)) &&
+ (za->head.size != regs_size)) {
+ *err = "bad size for ZA context";
+ return false;
+ }
+
+ if (!sve_vl_valid(za->vl)) {
+ *err = "SME VL in ZA context invalid";
+
+ return false;
+ }
+
+ return true;
+}
+
bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
{
bool terminated = false;
@@ -82,6 +107,7 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
int flags = 0;
struct extra_context *extra = NULL;
struct sve_context *sve = NULL;
+ struct za_context *za = NULL;
struct _aarch64_ctx *head =
(struct _aarch64_ctx *)uc->uc_mcontext.__reserved;
@@ -120,6 +146,13 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
sve = (struct sve_context *)head;
flags |= SVE_CTX;
break;
+ case ZA_MAGIC:
+ if (flags & ZA_CTX)
+ *err = "Multiple ZA_MAGIC";
+ /* Size is validated in validate_za_context() */
+ za = (struct za_context *)head;
+ flags |= ZA_CTX;
+ break;
case EXTRA_MAGIC:
if (flags & EXTRA_CTX)
*err = "Multiple EXTRA_MAGIC";
@@ -165,6 +198,9 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
if (flags & SVE_CTX)
if (!validate_sve_context(sve, err))
return false;
+ if (flags & ZA_CTX)
+ if (!validate_za_context(za, err))
+ return false;
head = GET_RESV_NEXT_HEAD(head);
}
diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.h b/tools/testing/selftests/arm64/signal/testcases/testcases.h
index ad884c135314..49f1d5de7b5b 100644
--- a/tools/testing/selftests/arm64/signal/testcases/testcases.h
+++ b/tools/testing/selftests/arm64/signal/testcases/testcases.h
@@ -16,7 +16,8 @@
#define FPSIMD_CTX (1 << 0)
#define SVE_CTX (1 << 1)
-#define EXTRA_CTX (1 << 2)
+#define ZA_CTX (1 << 2)
+#define EXTRA_CTX (1 << 3)
#define KSFT_BAD_MAGIC 0xdeadbeef
diff --git a/tools/testing/selftests/arm64/signal/testcases/za_regs.c b/tools/testing/selftests/arm64/signal/testcases/za_regs.c
new file mode 100644
index 000000000000..b94e4f99fcac
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/za_regs.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Verify that the ZA register context in signal frames is set up as
+ * expected.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+struct fake_sigframe sf;
+static unsigned int vls[SVE_VQ_MAX];
+unsigned int nvls = 0;
+
+static bool sme_get_vls(struct tdescr *td)
+{
+ int vq, vl;
+
+ /*
+ * Enumerate up to SVE_VQ_MAX vector lengths
+ */
+ for (vq = SVE_VQ_MAX; vq > 0; --vq) {
+ vl = prctl(PR_SVE_SET_VL, vq * 16);
+ if (vl == -1)
+ return false;
+
+ vl &= PR_SME_VL_LEN_MASK;
+
+ /* Skip missing VLs */
+ vq = sve_vq_from_vl(vl);
+
+ vls[nvls++] = vl;
+ }
+
+ /* We need at least one VL */
+ if (nvls < 1) {
+ fprintf(stderr, "Only %d VL supported\n", nvls);
+ return false;
+ }
+
+ return true;
+}
+
+static void setup_za_regs(void)
+{
+ /* smstart za; real data is TODO */
+ asm volatile(".inst 0xd503457f" : : : );
+}
+
+static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc,
+ unsigned int vl)
+{
+ size_t resv_sz, offset;
+ struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf);
+ struct za_context *za;
+
+ fprintf(stderr, "Testing VL %d\n", vl);
+
+ if (prctl(PR_SME_SET_VL, vl) != vl) {
+ fprintf(stderr, "Failed to set VL\n");
+ return 1;
+ }
+
+ /*
+ * Get a signal context which should have a SVE frame and registers
+ * in it.
+ */
+ setup_za_regs();
+ if (!get_current_context(td, &sf.uc))
+ return 1;
+
+ resv_sz = GET_SF_RESV_SIZE(sf);
+ head = get_header(head, ZA_MAGIC, resv_sz, &offset);
+ if (!head) {
+ fprintf(stderr, "No ZA context\n");
+ return 1;
+ }
+
+ za = (struct za_context *)head;
+ if (za->vl != vl) {
+ fprintf(stderr, "Got VL %d, expected %d\n", za->vl, vl);
+ return 1;
+ }
+
+ /* The actual size validation is done in get_current_context() */
+ fprintf(stderr, "Got expected size %u and VL %d\n",
+ head->size, za->vl);
+
+ return 0;
+}
+
+static int sme_regs(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+ int i;
+
+ for (i = 0; i < nvls; i++) {
+ /*
+ * TODO: the signal test helpers can't currently cope
+ * with signal frames bigger than struct sigcontext,
+ * skip VLs that will trigger that.
+ */
+ if (vls[i] > 32) {
+ printf("Skipping VL %u due to stack size\n", vls[i]);
+ continue;
+ }
+
+ if (do_one_sme_vl(td, si, uc, vls[i]))
+ return 1;
+ }
+
+ td->pass = 1;
+
+ return 0;
+}
+
+struct tdescr tde = {
+ .name = "ZA register",
+ .descr = "Check that we get the right ZA registers reported",
+ .feats_required = FEAT_SME,
+ .timeout = 3,
+ .init = sme_get_vls,
+ .run = sme_regs,
+};
diff --git a/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c b/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c
index 5aa52cc31dc2..c11832657d2b 100644
--- a/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c
+++ b/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c
@@ -2,6 +2,7 @@
/* Copyright (C) 2021. Huawei Technologies Co., Ltd */
#include <test_progs.h>
#include "dummy_st_ops.skel.h"
+#include "trace_dummy_st_ops.skel.h"
/* Need to keep consistent with definition in include/linux/bpf.h */
struct bpf_dummy_ops_state {
@@ -56,6 +57,7 @@ static void test_dummy_init_ptr_arg(void)
.ctx_in = args,
.ctx_size_in = sizeof(args),
);
+ struct trace_dummy_st_ops *trace_skel;
struct dummy_st_ops *skel;
int fd, err;
@@ -64,12 +66,33 @@ static void test_dummy_init_ptr_arg(void)
return;
fd = bpf_program__fd(skel->progs.test_1);
+
+ trace_skel = trace_dummy_st_ops__open();
+ if (!ASSERT_OK_PTR(trace_skel, "trace_dummy_st_ops__open"))
+ goto done;
+
+ err = bpf_program__set_attach_target(trace_skel->progs.fentry_test_1,
+ fd, "test_1");
+ if (!ASSERT_OK(err, "set_attach_target(fentry_test_1)"))
+ goto done;
+
+ err = trace_dummy_st_ops__load(trace_skel);
+ if (!ASSERT_OK(err, "load(trace_skel)"))
+ goto done;
+
+ err = trace_dummy_st_ops__attach(trace_skel);
+ if (!ASSERT_OK(err, "attach(trace_skel)"))
+ goto done;
+
err = bpf_prog_test_run_opts(fd, &attr);
ASSERT_OK(err, "test_run");
ASSERT_EQ(in_state.val, 0x5a, "test_ptr_ret");
ASSERT_EQ(attr.retval, exp_retval, "test_ret");
+ ASSERT_EQ(trace_skel->bss->val, exp_retval, "fentry_val");
+done:
dummy_st_ops__destroy(skel);
+ trace_dummy_st_ops__destroy(trace_skel);
}
static void test_dummy_multiple_args(void)
diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
index b64df94ec476..db388f593d0a 100644
--- a/tools/testing/selftests/bpf/progs/map_ptr_kern.c
+++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
@@ -367,7 +367,7 @@ static inline int check_array_of_maps(void)
VERIFY(check_default(&array_of_maps->map, map));
inner_map = bpf_map_lookup_elem(array_of_maps, &key);
- VERIFY(inner_map != 0);
+ VERIFY(inner_map != NULL);
VERIFY(inner_map->map.max_entries == INNER_MAX_ENTRIES);
return 1;
@@ -394,7 +394,7 @@ static inline int check_hash_of_maps(void)
VERIFY(check_default(&hash_of_maps->map, map));
inner_map = bpf_map_lookup_elem(hash_of_maps, &key);
- VERIFY(inner_map != 0);
+ VERIFY(inner_map != NULL);
VERIFY(inner_map->map.max_entries == INNER_MAX_ENTRIES);
return 1;
diff --git a/tools/testing/selftests/bpf/progs/trace_dummy_st_ops.c b/tools/testing/selftests/bpf/progs/trace_dummy_st_ops.c
new file mode 100644
index 000000000000..00a4be9d3074
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/trace_dummy_st_ops.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+int val = 0;
+
+SEC("fentry/test_1")
+int BPF_PROG(fentry_test_1, __u64 *st_ops_ctx)
+{
+ __u64 state;
+
+ /* Read the traced st_ops arg1 which is a pointer */
+ bpf_probe_read_kernel(&state, sizeof(__u64), (void *)st_ops_ctx);
+ /* Read state->val */
+ bpf_probe_read_kernel(&val, sizeof(__u32), (void *)state);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
index b9e991d43155..e7775d3bbe08 100644
--- a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
+++ b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
@@ -18,8 +18,9 @@
#include "bpf_rlimit.h"
#include "cgroup_helpers.h"
-static int start_server(const struct sockaddr *addr, socklen_t len)
+static int start_server(const struct sockaddr *addr, socklen_t len, bool dual)
{
+ int mode = !dual;
int fd;
fd = socket(addr->sa_family, SOCK_STREAM, 0);
@@ -28,6 +29,14 @@ static int start_server(const struct sockaddr *addr, socklen_t len)
goto out;
}
+ if (addr->sa_family == AF_INET6) {
+ if (setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, (char *)&mode,
+ sizeof(mode)) == -1) {
+ log_err("Failed to set the dual-stack mode");
+ goto close_out;
+ }
+ }
+
if (bind(fd, addr, len) == -1) {
log_err("Failed to bind server socket");
goto close_out;
@@ -47,24 +56,17 @@ out:
return fd;
}
-static int connect_to_server(int server_fd)
+static int connect_to_server(const struct sockaddr *addr, socklen_t len)
{
- struct sockaddr_storage addr;
- socklen_t len = sizeof(addr);
int fd = -1;
- if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
- log_err("Failed to get server addr");
- goto out;
- }
-
- fd = socket(addr.ss_family, SOCK_STREAM, 0);
+ fd = socket(addr->sa_family, SOCK_STREAM, 0);
if (fd == -1) {
log_err("Failed to create client socket");
goto out;
}
- if (connect(fd, (const struct sockaddr *)&addr, len) == -1) {
+ if (connect(fd, (const struct sockaddr *)addr, len) == -1) {
log_err("Fail to connect to server");
goto close_out;
}
@@ -116,7 +118,8 @@ err:
return map_fd;
}
-static int run_test(int server_fd, int results_fd, bool xdp)
+static int run_test(int server_fd, int results_fd, bool xdp,
+ const struct sockaddr *addr, socklen_t len)
{
int client = -1, srv_client = -1;
int ret = 0;
@@ -142,7 +145,7 @@ static int run_test(int server_fd, int results_fd, bool xdp)
goto err;
}
- client = connect_to_server(server_fd);
+ client = connect_to_server(addr, len);
if (client == -1)
goto err;
@@ -199,12 +202,30 @@ out:
return ret;
}
+static bool get_port(int server_fd, in_port_t *port)
+{
+ struct sockaddr_in addr;
+ socklen_t len = sizeof(addr);
+
+ if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
+ log_err("Failed to get server addr");
+ return false;
+ }
+
+ /* sin_port and sin6_port are located at the same offset. */
+ *port = addr.sin_port;
+ return true;
+}
+
int main(int argc, char **argv)
{
struct sockaddr_in addr4;
struct sockaddr_in6 addr6;
+ struct sockaddr_in addr4dual;
+ struct sockaddr_in6 addr6dual;
int server = -1;
int server_v6 = -1;
+ int server_dual = -1;
int results = -1;
int err = 0;
bool xdp;
@@ -224,25 +245,43 @@ int main(int argc, char **argv)
addr4.sin_family = AF_INET;
addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
addr4.sin_port = 0;
+ memcpy(&addr4dual, &addr4, sizeof(addr4dual));
memset(&addr6, 0, sizeof(addr6));
addr6.sin6_family = AF_INET6;
addr6.sin6_addr = in6addr_loopback;
addr6.sin6_port = 0;
- server = start_server((const struct sockaddr *)&addr4, sizeof(addr4));
- if (server == -1)
+ memset(&addr6dual, 0, sizeof(addr6dual));
+ addr6dual.sin6_family = AF_INET6;
+ addr6dual.sin6_addr = in6addr_any;
+ addr6dual.sin6_port = 0;
+
+ server = start_server((const struct sockaddr *)&addr4, sizeof(addr4),
+ false);
+ if (server == -1 || !get_port(server, &addr4.sin_port))
goto err;
server_v6 = start_server((const struct sockaddr *)&addr6,
- sizeof(addr6));
- if (server_v6 == -1)
+ sizeof(addr6), false);
+ if (server_v6 == -1 || !get_port(server_v6, &addr6.sin6_port))
+ goto err;
+
+ server_dual = start_server((const struct sockaddr *)&addr6dual,
+ sizeof(addr6dual), true);
+ if (server_dual == -1 || !get_port(server_dual, &addr4dual.sin_port))
+ goto err;
+
+ if (run_test(server, results, xdp,
+ (const struct sockaddr *)&addr4, sizeof(addr4)))
goto err;
- if (run_test(server, results, xdp))
+ if (run_test(server_v6, results, xdp,
+ (const struct sockaddr *)&addr6, sizeof(addr6)))
goto err;
- if (run_test(server_v6, results, xdp))
+ if (run_test(server_dual, results, xdp,
+ (const struct sockaddr *)&addr4dual, sizeof(addr4dual)))
goto err;
printf("ok\n");
@@ -252,6 +291,7 @@ err:
out:
close(server);
close(server_v6);
+ close(server_dual);
close(results);
return err;
}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/vxlan_flooding_ipv6.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/vxlan_flooding_ipv6.sh
index 429f7ee735cf..fd23c80eba31 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/vxlan_flooding_ipv6.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/vxlan_flooding_ipv6.sh
@@ -159,6 +159,17 @@ flooding_remotes_add()
local lsb
local i
+ # Prevent unwanted packets from entering the bridge and interfering
+ # with the test.
+ tc qdisc add dev br0 clsact
+ tc filter add dev br0 egress protocol all pref 1 handle 1 \
+ matchall skip_hw action drop
+ tc qdisc add dev $h1 clsact
+ tc filter add dev $h1 egress protocol all pref 1 handle 1 \
+ flower skip_hw dst_mac de:ad:be:ef:13:37 action pass
+ tc filter add dev $h1 egress protocol all pref 2 handle 2 \
+ matchall skip_hw action drop
+
for i in $(eval echo {1..$num_remotes}); do
lsb=$((i + 1))
@@ -195,6 +206,12 @@ flooding_filters_del()
done
tc qdisc del dev $rp2 clsact
+
+ tc filter del dev $h1 egress protocol all pref 2 handle 2 matchall
+ tc filter del dev $h1 egress protocol all pref 1 handle 1 flower
+ tc qdisc del dev $h1 clsact
+ tc filter del dev br0 egress protocol all pref 1 handle 1 matchall
+ tc qdisc del dev br0 clsact
}
flooding_check_packets()
diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh
index fedcb7b35af9..af5ea50ed5c0 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh
@@ -172,6 +172,17 @@ flooding_filters_add()
local lsb
local i
+ # Prevent unwanted packets from entering the bridge and interfering
+ # with the test.
+ tc qdisc add dev br0 clsact
+ tc filter add dev br0 egress protocol all pref 1 handle 1 \
+ matchall skip_hw action drop
+ tc qdisc add dev $h1 clsact
+ tc filter add dev $h1 egress protocol all pref 1 handle 1 \
+ flower skip_hw dst_mac de:ad:be:ef:13:37 action pass
+ tc filter add dev $h1 egress protocol all pref 2 handle 2 \
+ matchall skip_hw action drop
+
tc qdisc add dev $rp2 clsact
for i in $(eval echo {1..$num_remotes}); do
@@ -194,6 +205,12 @@ flooding_filters_del()
done
tc qdisc del dev $rp2 clsact
+
+ tc filter del dev $h1 egress protocol all pref 2 handle 2 matchall
+ tc filter del dev $h1 egress protocol all pref 1 handle 1 flower
+ tc qdisc del dev $h1 clsact
+ tc filter del dev br0 egress protocol all pref 1 handle 1 matchall
+ tc qdisc del dev br0 clsact
}
flooding_check_packets()
diff --git a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
index eaf8a04a7ca5..10e54bcca7a9 100755
--- a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
+++ b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
@@ -190,7 +190,7 @@ setup_prepare()
tc filter add dev $eth0 ingress chain $(IS2 0 0) pref 1 \
protocol ipv4 flower skip_sw ip_proto udp dst_port 5201 \
- action police rate 50mbit burst 64k \
+ action police rate 50mbit burst 64k conform-exceed drop/pipe \
action goto chain $(IS2 1 0)
}
diff --git a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
index 0315955ff0f4..bc1c407651fc 100644
--- a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
+++ b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
@@ -412,7 +412,8 @@ TEST(binderfs_stress)
ret = mount(NULL, binderfs_mntpt, "binder", 0, 0);
ASSERT_EQ(ret, 0) {
- TH_LOG("%s - Failed to mount binderfs", strerror(errno));
+ TH_LOG("%s - Failed to mount binderfs, check if CONFIG_ANDROID_BINDERFS is enabled in the running kernel",
+ strerror(errno));
}
for (int i = 0; i < ARRAY_SIZE(fds); i++) {
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
index dc7ade196798..459741565222 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
@@ -25,6 +25,9 @@ ppc*)
s390*)
ARG1=%r2
;;
+mips*)
+ ARG1=%r4
+;;
*)
echo "Please implement other architecture here"
exit_untested
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc
index 47d84b5cb6ca..d4662c8cf407 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc
@@ -36,6 +36,10 @@ s390*)
GOODREG=%r2
BADREG=%s2
;;
+mips*)
+ GOODREG=%r4
+ BADREG=%r12
+;;
*)
echo "Please implement other architecture here"
exit_untested
diff --git a/tools/testing/selftests/ir/ir_loopback.sh b/tools/testing/selftests/ir/ir_loopback.sh
index b90dc9939f45..aff9299c9416 100755
--- a/tools/testing/selftests/ir/ir_loopback.sh
+++ b/tools/testing/selftests/ir/ir_loopback.sh
@@ -10,7 +10,7 @@ if [ $UID != 0 ]; then
fi
if ! /sbin/modprobe -q -n rc-loopback; then
- echo "ir_loopback: module rc-loopback is not found [SKIP]"
+ echo "ir_loopback: module rc-loopback is not found in /lib/modules/`uname -r` [SKIP]"
exit $ksft_skip
fi
diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h
index b8f248018174..33a0dbd26bd3 100644
--- a/tools/testing/selftests/kselftest.h
+++ b/tools/testing/selftests/kselftest.h
@@ -53,6 +53,21 @@
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
#endif
+/*
+ * gcc cpuid.h provides __cpuid_count() since v4.4.
+ * Clang/LLVM cpuid.h provides __cpuid_count() since v3.4.0.
+ *
+ * Provide local define for tests needing __cpuid_count() because
+ * selftests need to work in older environments that do not yet
+ * have __cpuid_count().
+ */
+#ifndef __cpuid_count
+#define __cpuid_count(level, count, a, b, c, d) \
+ __asm__ __volatile__ ("cpuid\n\t" \
+ : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \
+ : "0" (level), "2" (count))
+#endif
+
/* define kselftest exit codes */
#define KSFT_PASS 0
#define KSFT_FAIL 1
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index 11779405dc80..25f4d54067c0 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -64,6 +64,7 @@
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
+#include <setjmp.h>
#include "kselftest.h"
@@ -183,7 +184,10 @@
struct __test_metadata *_metadata, \
struct __fixture_variant_metadata *variant) \
{ \
- test_name(_metadata); \
+ _metadata->setup_completed = true; \
+ if (setjmp(_metadata->env) == 0) \
+ test_name(_metadata); \
+ __test_check_assert(_metadata); \
} \
static struct __test_metadata _##test_name##_object = \
{ .name = #test_name, \
@@ -287,7 +291,9 @@
#define FIXTURE_TEARDOWN(fixture_name) \
void fixture_name##_teardown( \
struct __test_metadata __attribute__((unused)) *_metadata, \
- FIXTURE_DATA(fixture_name) __attribute__((unused)) *self)
+ FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \
+ const FIXTURE_VARIANT(fixture_name) \
+ __attribute__((unused)) *variant)
/**
* FIXTURE_VARIANT() - Optionally called once per fixture
@@ -302,9 +308,9 @@
* ...
* };
*
- * Defines type of constant parameters provided to FIXTURE_SETUP() and TEST_F()
- * as *variant*. Variants allow the same tests to be run with different
- * arguments.
+ * Defines type of constant parameters provided to FIXTURE_SETUP(), TEST_F() and
+ * FIXTURE_TEARDOWN as *variant*. Variants allow the same tests to be run with
+ * different arguments.
*/
#define FIXTURE_VARIANT(fixture_name) struct _fixture_variant_##fixture_name
@@ -356,10 +362,7 @@
* Defines a test that depends on a fixture (e.g., is part of a test case).
* Very similar to TEST() except that *self* is the setup instance of fixture's
* datatype exposed for use by the implementation.
- *
- * Warning: use of ASSERT_* here will skip TEARDOWN.
*/
-/* TODO(wad) register fixtures on dedicated test lists. */
#define TEST_F(fixture_name, test_name) \
__TEST_F_IMPL(fixture_name, test_name, -1, TEST_TIMEOUT_DEFAULT)
@@ -381,12 +384,17 @@
/* fixture data is alloced, setup, and torn down per call. */ \
FIXTURE_DATA(fixture_name) self; \
memset(&self, 0, sizeof(FIXTURE_DATA(fixture_name))); \
- fixture_name##_setup(_metadata, &self, variant->data); \
- /* Let setup failure terminate early. */ \
- if (!_metadata->passed) \
- return; \
- fixture_name##_##test_name(_metadata, &self, variant->data); \
- fixture_name##_teardown(_metadata, &self); \
+ if (setjmp(_metadata->env) == 0) { \
+ fixture_name##_setup(_metadata, &self, variant->data); \
+ /* Let setup failure terminate early. */ \
+ if (!_metadata->passed) \
+ return; \
+ _metadata->setup_completed = true; \
+ fixture_name##_##test_name(_metadata, &self, variant->data); \
+ } \
+ if (_metadata->setup_completed) \
+ fixture_name##_teardown(_metadata, &self, variant->data); \
+ __test_check_assert(_metadata); \
} \
static struct __test_metadata \
_##fixture_name##_##test_name##_object = { \
@@ -683,7 +691,7 @@
*/
#define OPTIONAL_HANDLER(_assert) \
for (; _metadata->trigger; _metadata->trigger = \
- __bail(_assert, _metadata->no_print, _metadata->step))
+ __bail(_assert, _metadata))
#define __INC_STEP(_metadata) \
/* Keep "step" below 255 (which is used for "SKIP" reporting). */ \
@@ -830,6 +838,9 @@ struct __test_metadata {
bool timed_out; /* did this test timeout instead of exiting? */
__u8 step;
bool no_print; /* manual trigger when TH_LOG_STREAM is not available */
+ bool aborted; /* stopped test due to failed ASSERT */
+ bool setup_completed; /* did setup finish? */
+ jmp_buf env; /* for exiting out of test early */
struct __test_results *results;
struct __test_metadata *prev, *next;
};
@@ -848,16 +859,26 @@ static inline void __register_test(struct __test_metadata *t)
__LIST_APPEND(t->fixture->tests, t);
}
-static inline int __bail(int for_realz, bool no_print, __u8 step)
+static inline int __bail(int for_realz, struct __test_metadata *t)
{
+ /* if this is ASSERT, return immediately. */
if (for_realz) {
- if (no_print)
- _exit(step);
- abort();
+ t->aborted = true;
+ longjmp(t->env, 1);
}
+ /* otherwise, end the for loop and continue. */
return 0;
}
+static inline void __test_check_assert(struct __test_metadata *t)
+{
+ if (t->aborted) {
+ if (t->no_print)
+ _exit(t->step);
+ abort();
+ }
+}
+
struct __test_metadata *__active_test;
static void __timeout_handler(int sig, siginfo_t *info, void *ucontext)
{
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index d1e8f5237469..0b0e4402bba6 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -3,6 +3,7 @@
/aarch64/debug-exceptions
/aarch64/get-reg-list
/aarch64/psci_cpu_on_test
+/aarch64/vcpu_width_config
/aarch64/vgic_init
/aarch64/vgic_irq
/s390x/memop
@@ -33,6 +34,7 @@
/x86_64/state_test
/x86_64/svm_vmcall_test
/x86_64/svm_int_ctl_test
+/x86_64/tsc_scaling_sync
/x86_64/sync_regs_test
/x86_64/tsc_msrs_test
/x86_64/userspace_io_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 21c2dbd21a81..681b173aa87c 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -106,6 +106,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/arch_timer
TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions
TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
TEST_GEN_PROGS_aarch64 += aarch64/psci_cpu_on_test
+TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config
TEST_GEN_PROGS_aarch64 += aarch64/vgic_init
TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq
TEST_GEN_PROGS_aarch64 += demand_paging_test
diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c
index b08d30bf71c5..3b940a101bc0 100644
--- a/tools/testing/selftests/kvm/aarch64/arch_timer.c
+++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c
@@ -362,11 +362,12 @@ static void test_init_timer_irq(struct kvm_vm *vm)
pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq);
}
+static int gic_fd;
+
static struct kvm_vm *test_vm_create(void)
{
struct kvm_vm *vm;
unsigned int i;
- int ret;
int nr_vcpus = test_args.nr_vcpus;
vm = vm_create_default_with_vcpus(nr_vcpus, 0, 0, guest_code, NULL);
@@ -383,8 +384,8 @@ static struct kvm_vm *test_vm_create(void)
ucall_init(vm, NULL);
test_init_timer_irq(vm);
- ret = vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA);
- if (ret < 0) {
+ gic_fd = vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA);
+ if (gic_fd < 0) {
print_skip("Failed to create vgic-v3");
exit(KSFT_SKIP);
}
@@ -395,6 +396,12 @@ static struct kvm_vm *test_vm_create(void)
return vm;
}
+static void test_vm_cleanup(struct kvm_vm *vm)
+{
+ close(gic_fd);
+ kvm_vm_free(vm);
+}
+
static void test_print_help(char *name)
{
pr_info("Usage: %s [-h] [-n nr_vcpus] [-i iterations] [-p timer_period_ms]\n",
@@ -478,7 +485,7 @@ int main(int argc, char *argv[])
vm = test_vm_create();
test_run(vm);
- kvm_vm_free(vm);
+ test_vm_cleanup(vm);
return 0;
}
diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
index f12147c43464..0b571f3fe64c 100644
--- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c
+++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
@@ -503,8 +503,13 @@ static void run_test(struct vcpu_config *c)
++missing_regs;
if (new_regs || missing_regs) {
+ n = 0;
+ for_each_reg_filtered(i)
+ ++n;
+
printf("%s: Number blessed registers: %5lld\n", config_name(c), blessed_n);
- printf("%s: Number registers: %5lld\n", config_name(c), reg_list->n);
+ printf("%s: Number registers: %5lld (includes %lld filtered registers)\n",
+ config_name(c), reg_list->n, reg_list->n - n);
}
if (new_regs) {
@@ -683,9 +688,10 @@ static __u64 base_regs[] = {
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[4]),
KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpsr),
KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpcr),
- KVM_REG_ARM_FW_REG(0),
- KVM_REG_ARM_FW_REG(1),
- KVM_REG_ARM_FW_REG(2),
+ KVM_REG_ARM_FW_REG(0), /* KVM_REG_ARM_PSCI_VERSION */
+ KVM_REG_ARM_FW_REG(1), /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1 */
+ KVM_REG_ARM_FW_REG(2), /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2 */
+ KVM_REG_ARM_FW_REG(3), /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3 */
ARM64_SYS_REG(3, 3, 14, 3, 1), /* CNTV_CTL_EL0 */
ARM64_SYS_REG(3, 3, 14, 3, 2), /* CNTV_CVAL_EL0 */
ARM64_SYS_REG(3, 3, 14, 0, 2),
diff --git a/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c b/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c
new file mode 100644
index 000000000000..6e9402679229
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vcpu_width_config - Test KVM_ARM_VCPU_INIT() with KVM_ARM_VCPU_EL1_32BIT.
+ *
+ * Copyright (c) 2022 Google LLC.
+ *
+ * This is a test that ensures that non-mixed-width vCPUs (all 64bit vCPUs
+ * or all 32bit vcPUs) can be configured and mixed-width vCPUs cannot be
+ * configured.
+ */
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+
+/*
+ * Add a vCPU, run KVM_ARM_VCPU_INIT with @init1, and then
+ * add another vCPU, and run KVM_ARM_VCPU_INIT with @init2.
+ */
+static int add_init_2vcpus(struct kvm_vcpu_init *init1,
+ struct kvm_vcpu_init *init2)
+{
+ struct kvm_vm *vm;
+ int ret;
+
+ vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
+
+ vm_vcpu_add(vm, 0);
+ ret = _vcpu_ioctl(vm, 0, KVM_ARM_VCPU_INIT, init1);
+ if (ret)
+ goto free_exit;
+
+ vm_vcpu_add(vm, 1);
+ ret = _vcpu_ioctl(vm, 1, KVM_ARM_VCPU_INIT, init2);
+
+free_exit:
+ kvm_vm_free(vm);
+ return ret;
+}
+
+/*
+ * Add two vCPUs, then run KVM_ARM_VCPU_INIT for one vCPU with @init1,
+ * and run KVM_ARM_VCPU_INIT for another vCPU with @init2.
+ */
+static int add_2vcpus_init_2vcpus(struct kvm_vcpu_init *init1,
+ struct kvm_vcpu_init *init2)
+{
+ struct kvm_vm *vm;
+ int ret;
+
+ vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
+
+ vm_vcpu_add(vm, 0);
+ vm_vcpu_add(vm, 1);
+
+ ret = _vcpu_ioctl(vm, 0, KVM_ARM_VCPU_INIT, init1);
+ if (ret)
+ goto free_exit;
+
+ ret = _vcpu_ioctl(vm, 1, KVM_ARM_VCPU_INIT, init2);
+
+free_exit:
+ kvm_vm_free(vm);
+ return ret;
+}
+
+/*
+ * Tests that two 64bit vCPUs can be configured, two 32bit vCPUs can be
+ * configured, and two mixed-width vCPUs cannot be configured.
+ * Each of those three cases, configure vCPUs in two different orders.
+ * The one is running KVM_CREATE_VCPU for 2 vCPUs, and then running
+ * KVM_ARM_VCPU_INIT for them.
+ * The other is running KVM_CREATE_VCPU and KVM_ARM_VCPU_INIT for a vCPU,
+ * and then run those commands for another vCPU.
+ */
+int main(void)
+{
+ struct kvm_vcpu_init init1, init2;
+ struct kvm_vm *vm;
+ int ret;
+
+ if (!kvm_check_cap(KVM_CAP_ARM_EL1_32BIT)) {
+ print_skip("KVM_CAP_ARM_EL1_32BIT is not supported");
+ exit(KSFT_SKIP);
+ }
+
+ /* Get the preferred target type and copy that to init2 for later use */
+ vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
+ vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init1);
+ kvm_vm_free(vm);
+ init2 = init1;
+
+ /* Test with 64bit vCPUs */
+ ret = add_init_2vcpus(&init1, &init1);
+ TEST_ASSERT(ret == 0,
+ "Configuring 64bit EL1 vCPUs failed unexpectedly");
+ ret = add_2vcpus_init_2vcpus(&init1, &init1);
+ TEST_ASSERT(ret == 0,
+ "Configuring 64bit EL1 vCPUs failed unexpectedly");
+
+ /* Test with 32bit vCPUs */
+ init1.features[0] = (1 << KVM_ARM_VCPU_EL1_32BIT);
+ ret = add_init_2vcpus(&init1, &init1);
+ TEST_ASSERT(ret == 0,
+ "Configuring 32bit EL1 vCPUs failed unexpectedly");
+ ret = add_2vcpus_init_2vcpus(&init1, &init1);
+ TEST_ASSERT(ret == 0,
+ "Configuring 32bit EL1 vCPUs failed unexpectedly");
+
+ /* Test with mixed-width vCPUs */
+ init1.features[0] = 0;
+ init2.features[0] = (1 << KVM_ARM_VCPU_EL1_32BIT);
+ ret = add_init_2vcpus(&init1, &init2);
+ TEST_ASSERT(ret != 0,
+ "Configuring mixed-width vCPUs worked unexpectedly");
+ ret = add_2vcpus_init_2vcpus(&init1, &init2);
+ TEST_ASSERT(ret != 0,
+ "Configuring mixed-width vCPUs worked unexpectedly");
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index c9d9e513ca04..7b47ae4f952e 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -18,11 +18,40 @@
#include "test_util.h"
#include "perf_test_util.h"
#include "guest_modes.h"
+
#ifdef __aarch64__
#include "aarch64/vgic.h"
#define GICD_BASE_GPA 0x8000000ULL
#define GICR_BASE_GPA 0x80A0000ULL
+
+static int gic_fd;
+
+static void arch_setup_vm(struct kvm_vm *vm, unsigned int nr_vcpus)
+{
+ /*
+ * The test can still run even if hardware does not support GICv3, as it
+ * is only an optimization to reduce guest exits.
+ */
+ gic_fd = vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA);
+}
+
+static void arch_cleanup_vm(struct kvm_vm *vm)
+{
+ if (gic_fd > 0)
+ close(gic_fd);
+}
+
+#else /* __aarch64__ */
+
+static void arch_setup_vm(struct kvm_vm *vm, unsigned int nr_vcpus)
+{
+}
+
+static void arch_cleanup_vm(struct kvm_vm *vm)
+{
+}
+
#endif
/* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/
@@ -206,9 +235,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
vm_enable_cap(vm, &cap);
}
-#ifdef __aarch64__
- vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA);
-#endif
+ arch_setup_vm(vm, nr_vcpus);
/* Start the iterations */
iteration = 0;
@@ -302,6 +329,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
}
free_bitmaps(bitmaps, p->slots);
+ arch_cleanup_vm(vm);
perf_test_destroy_vm(vm);
}
diff --git a/tools/testing/selftests/kvm/include/riscv/processor.h b/tools/testing/selftests/kvm/include/riscv/processor.h
index dc284c6bdbc3..eca5c622efd2 100644
--- a/tools/testing/selftests/kvm/include/riscv/processor.h
+++ b/tools/testing/selftests/kvm/include/riscv/processor.h
@@ -101,7 +101,9 @@ static inline void set_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id,
#define PGTBL_PTE_WRITE_SHIFT 2
#define PGTBL_PTE_READ_MASK 0x0000000000000002ULL
#define PGTBL_PTE_READ_SHIFT 1
-#define PGTBL_PTE_PERM_MASK (PGTBL_PTE_EXECUTE_MASK | \
+#define PGTBL_PTE_PERM_MASK (PGTBL_PTE_ACCESSED_MASK | \
+ PGTBL_PTE_DIRTY_MASK | \
+ PGTBL_PTE_EXECUTE_MASK | \
PGTBL_PTE_WRITE_MASK | \
PGTBL_PTE_READ_MASK)
#define PGTBL_PTE_VALID_MASK 0x0000000000000001ULL
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 37db341d4cc5..d0d51adec76e 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -60,6 +60,23 @@
/* CPUID.0x8000_0001.EDX */
#define CPUID_GBPAGES (1ul << 26)
+/* Page table bitfield declarations */
+#define PTE_PRESENT_MASK BIT_ULL(0)
+#define PTE_WRITABLE_MASK BIT_ULL(1)
+#define PTE_USER_MASK BIT_ULL(2)
+#define PTE_ACCESSED_MASK BIT_ULL(5)
+#define PTE_DIRTY_MASK BIT_ULL(6)
+#define PTE_LARGE_MASK BIT_ULL(7)
+#define PTE_GLOBAL_MASK BIT_ULL(8)
+#define PTE_NX_MASK BIT_ULL(63)
+
+#define PAGE_SHIFT 12
+#define PAGE_SIZE (1ULL << PAGE_SHIFT)
+#define PAGE_MASK (~(PAGE_SIZE-1))
+
+#define PHYSICAL_PAGE_MASK GENMASK_ULL(51, 12)
+#define PTE_GET_PFN(pte) (((pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT)
+
/* General Registers in 64-Bit Mode */
struct gpr64_regs {
u64 rax;
diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c
index ba1fdc3dcf4a..2c4a7563a4f8 100644
--- a/tools/testing/selftests/kvm/kvm_page_table_test.c
+++ b/tools/testing/selftests/kvm/kvm_page_table_test.c
@@ -278,7 +278,7 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg)
else
guest_test_phys_mem = p->phys_offset;
#ifdef __s390x__
- alignment = max(0x100000, alignment);
+ alignment = max(0x100000UL, alignment);
#endif
guest_test_phys_mem = align_down(guest_test_phys_mem, alignment);
diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c
index d377f2603d98..3961487a4870 100644
--- a/tools/testing/selftests/kvm/lib/riscv/processor.c
+++ b/tools/testing/selftests/kvm/lib/riscv/processor.c
@@ -268,7 +268,7 @@ void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
core.regs.t3, core.regs.t4, core.regs.t5, core.regs.t6);
}
-static void guest_hang(void)
+static void __aligned(16) guest_hang(void)
{
while (1)
;
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index 9f000dfb5594..33ea5e9955d9 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -19,38 +19,6 @@
vm_vaddr_t exception_handlers;
-/* Virtual translation table structure declarations */
-struct pageUpperEntry {
- uint64_t present:1;
- uint64_t writable:1;
- uint64_t user:1;
- uint64_t write_through:1;
- uint64_t cache_disable:1;
- uint64_t accessed:1;
- uint64_t ignored_06:1;
- uint64_t page_size:1;
- uint64_t ignored_11_08:4;
- uint64_t pfn:40;
- uint64_t ignored_62_52:11;
- uint64_t execute_disable:1;
-};
-
-struct pageTableEntry {
- uint64_t present:1;
- uint64_t writable:1;
- uint64_t user:1;
- uint64_t write_through:1;
- uint64_t cache_disable:1;
- uint64_t accessed:1;
- uint64_t dirty:1;
- uint64_t reserved_07:1;
- uint64_t global:1;
- uint64_t ignored_11_09:3;
- uint64_t pfn:40;
- uint64_t ignored_62_52:11;
- uint64_t execute_disable:1;
-};
-
void regs_dump(FILE *stream, struct kvm_regs *regs,
uint8_t indent)
{
@@ -195,23 +163,21 @@ static void *virt_get_pte(struct kvm_vm *vm, uint64_t pt_pfn, uint64_t vaddr,
return &page_table[index];
}
-static struct pageUpperEntry *virt_create_upper_pte(struct kvm_vm *vm,
- uint64_t pt_pfn,
- uint64_t vaddr,
- uint64_t paddr,
- int level,
- enum x86_page_size page_size)
+static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
+ uint64_t pt_pfn,
+ uint64_t vaddr,
+ uint64_t paddr,
+ int level,
+ enum x86_page_size page_size)
{
- struct pageUpperEntry *pte = virt_get_pte(vm, pt_pfn, vaddr, level);
-
- if (!pte->present) {
- pte->writable = true;
- pte->present = true;
- pte->page_size = (level == page_size);
- if (pte->page_size)
- pte->pfn = paddr >> vm->page_shift;
+ uint64_t *pte = virt_get_pte(vm, pt_pfn, vaddr, level);
+
+ if (!(*pte & PTE_PRESENT_MASK)) {
+ *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK;
+ if (level == page_size)
+ *pte |= PTE_LARGE_MASK | (paddr & PHYSICAL_PAGE_MASK);
else
- pte->pfn = vm_alloc_page_table(vm) >> vm->page_shift;
+ *pte |= vm_alloc_page_table(vm) & PHYSICAL_PAGE_MASK;
} else {
/*
* Entry already present. Assert that the caller doesn't want
@@ -221,7 +187,7 @@ static struct pageUpperEntry *virt_create_upper_pte(struct kvm_vm *vm,
TEST_ASSERT(level != page_size,
"Cannot create hugepage at level: %u, vaddr: 0x%lx\n",
page_size, vaddr);
- TEST_ASSERT(!pte->page_size,
+ TEST_ASSERT(!(*pte & PTE_LARGE_MASK),
"Cannot create page table at level: %u, vaddr: 0x%lx\n",
level, vaddr);
}
@@ -232,8 +198,8 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
enum x86_page_size page_size)
{
const uint64_t pg_size = 1ull << ((page_size * 9) + 12);
- struct pageUpperEntry *pml4e, *pdpe, *pde;
- struct pageTableEntry *pte;
+ uint64_t *pml4e, *pdpe, *pde;
+ uint64_t *pte;
TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K,
"Unknown or unsupported guest mode, mode: 0x%x", vm->mode);
@@ -257,24 +223,22 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
*/
pml4e = virt_create_upper_pte(vm, vm->pgd >> vm->page_shift,
vaddr, paddr, 3, page_size);
- if (pml4e->page_size)
+ if (*pml4e & PTE_LARGE_MASK)
return;
- pdpe = virt_create_upper_pte(vm, pml4e->pfn, vaddr, paddr, 2, page_size);
- if (pdpe->page_size)
+ pdpe = virt_create_upper_pte(vm, PTE_GET_PFN(*pml4e), vaddr, paddr, 2, page_size);
+ if (*pdpe & PTE_LARGE_MASK)
return;
- pde = virt_create_upper_pte(vm, pdpe->pfn, vaddr, paddr, 1, page_size);
- if (pde->page_size)
+ pde = virt_create_upper_pte(vm, PTE_GET_PFN(*pdpe), vaddr, paddr, 1, page_size);
+ if (*pde & PTE_LARGE_MASK)
return;
/* Fill in page table entry. */
- pte = virt_get_pte(vm, pde->pfn, vaddr, 0);
- TEST_ASSERT(!pte->present,
+ pte = virt_get_pte(vm, PTE_GET_PFN(*pde), vaddr, 0);
+ TEST_ASSERT(!(*pte & PTE_PRESENT_MASK),
"PTE already present for 4k page at vaddr: 0x%lx\n", vaddr);
- pte->pfn = paddr >> vm->page_shift;
- pte->writable = true;
- pte->present = 1;
+ *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK);
}
void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
@@ -282,22 +246,22 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
__virt_pg_map(vm, vaddr, paddr, X86_PAGE_SIZE_4K);
}
-static struct pageTableEntry *_vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid,
+static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid,
uint64_t vaddr)
{
uint16_t index[4];
- struct pageUpperEntry *pml4e, *pdpe, *pde;
- struct pageTableEntry *pte;
+ uint64_t *pml4e, *pdpe, *pde;
+ uint64_t *pte;
struct kvm_cpuid_entry2 *entry;
struct kvm_sregs sregs;
int max_phy_addr;
- /* Set the bottom 52 bits. */
- uint64_t rsvd_mask = 0x000fffffffffffff;
+ uint64_t rsvd_mask = 0;
entry = kvm_get_supported_cpuid_index(0x80000008, 0);
max_phy_addr = entry->eax & 0x000000ff;
- /* Clear the bottom bits of the reserved mask. */
- rsvd_mask = (rsvd_mask >> max_phy_addr) << max_phy_addr;
+ /* Set the high bits in the reserved mask. */
+ if (max_phy_addr < 52)
+ rsvd_mask = GENMASK_ULL(51, max_phy_addr);
/*
* SDM vol 3, fig 4-11 "Formats of CR3 and Paging-Structure Entries
@@ -307,7 +271,7 @@ static struct pageTableEntry *_vm_get_page_table_entry(struct kvm_vm *vm, int vc
*/
vcpu_sregs_get(vm, vcpuid, &sregs);
if ((sregs.efer & EFER_NX) == 0) {
- rsvd_mask |= (1ull << 63);
+ rsvd_mask |= PTE_NX_MASK;
}
TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
@@ -329,30 +293,29 @@ static struct pageTableEntry *_vm_get_page_table_entry(struct kvm_vm *vm, int vc
index[3] = (vaddr >> 39) & 0x1ffu;
pml4e = addr_gpa2hva(vm, vm->pgd);
- TEST_ASSERT(pml4e[index[3]].present,
+ TEST_ASSERT(pml4e[index[3]] & PTE_PRESENT_MASK,
"Expected pml4e to be present for gva: 0x%08lx", vaddr);
- TEST_ASSERT((*(uint64_t*)(&pml4e[index[3]]) &
- (rsvd_mask | (1ull << 7))) == 0,
+ TEST_ASSERT((pml4e[index[3]] & (rsvd_mask | PTE_LARGE_MASK)) == 0,
"Unexpected reserved bits set.");
- pdpe = addr_gpa2hva(vm, pml4e[index[3]].pfn * vm->page_size);
- TEST_ASSERT(pdpe[index[2]].present,
+ pdpe = addr_gpa2hva(vm, PTE_GET_PFN(pml4e[index[3]]) * vm->page_size);
+ TEST_ASSERT(pdpe[index[2]] & PTE_PRESENT_MASK,
"Expected pdpe to be present for gva: 0x%08lx", vaddr);
- TEST_ASSERT(pdpe[index[2]].page_size == 0,
+ TEST_ASSERT(!(pdpe[index[2]] & PTE_LARGE_MASK),
"Expected pdpe to map a pde not a 1-GByte page.");
- TEST_ASSERT((*(uint64_t*)(&pdpe[index[2]]) & rsvd_mask) == 0,
+ TEST_ASSERT((pdpe[index[2]] & rsvd_mask) == 0,
"Unexpected reserved bits set.");
- pde = addr_gpa2hva(vm, pdpe[index[2]].pfn * vm->page_size);
- TEST_ASSERT(pde[index[1]].present,
+ pde = addr_gpa2hva(vm, PTE_GET_PFN(pdpe[index[2]]) * vm->page_size);
+ TEST_ASSERT(pde[index[1]] & PTE_PRESENT_MASK,
"Expected pde to be present for gva: 0x%08lx", vaddr);
- TEST_ASSERT(pde[index[1]].page_size == 0,
+ TEST_ASSERT(!(pde[index[1]] & PTE_LARGE_MASK),
"Expected pde to map a pte not a 2-MByte page.");
- TEST_ASSERT((*(uint64_t*)(&pde[index[1]]) & rsvd_mask) == 0,
+ TEST_ASSERT((pde[index[1]] & rsvd_mask) == 0,
"Unexpected reserved bits set.");
- pte = addr_gpa2hva(vm, pde[index[1]].pfn * vm->page_size);
- TEST_ASSERT(pte[index[0]].present,
+ pte = addr_gpa2hva(vm, PTE_GET_PFN(pde[index[1]]) * vm->page_size);
+ TEST_ASSERT(pte[index[0]] & PTE_PRESENT_MASK,
"Expected pte to be present for gva: 0x%08lx", vaddr);
return &pte[index[0]];
@@ -360,7 +323,7 @@ static struct pageTableEntry *_vm_get_page_table_entry(struct kvm_vm *vm, int vc
uint64_t vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr)
{
- struct pageTableEntry *pte = _vm_get_page_table_entry(vm, vcpuid, vaddr);
+ uint64_t *pte = _vm_get_page_table_entry(vm, vcpuid, vaddr);
return *(uint64_t *)pte;
}
@@ -368,18 +331,17 @@ uint64_t vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr)
void vm_set_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr,
uint64_t pte)
{
- struct pageTableEntry *new_pte = _vm_get_page_table_entry(vm, vcpuid,
- vaddr);
+ uint64_t *new_pte = _vm_get_page_table_entry(vm, vcpuid, vaddr);
*(uint64_t *)new_pte = pte;
}
void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
{
- struct pageUpperEntry *pml4e, *pml4e_start;
- struct pageUpperEntry *pdpe, *pdpe_start;
- struct pageUpperEntry *pde, *pde_start;
- struct pageTableEntry *pte, *pte_start;
+ uint64_t *pml4e, *pml4e_start;
+ uint64_t *pdpe, *pdpe_start;
+ uint64_t *pde, *pde_start;
+ uint64_t *pte, *pte_start;
if (!vm->pgd_created)
return;
@@ -389,58 +351,58 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
fprintf(stream, "%*s index hvaddr gpaddr "
"addr w exec dirty\n",
indent, "");
- pml4e_start = (struct pageUpperEntry *) addr_gpa2hva(vm, vm->pgd);
+ pml4e_start = (uint64_t *) addr_gpa2hva(vm, vm->pgd);
for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) {
pml4e = &pml4e_start[n1];
- if (!pml4e->present)
+ if (!(*pml4e & PTE_PRESENT_MASK))
continue;
- fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10lx %u "
+ fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10llx %u "
" %u\n",
indent, "",
pml4e - pml4e_start, pml4e,
- addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->pfn,
- pml4e->writable, pml4e->execute_disable);
+ addr_hva2gpa(vm, pml4e), PTE_GET_PFN(*pml4e),
+ !!(*pml4e & PTE_WRITABLE_MASK), !!(*pml4e & PTE_NX_MASK));
- pdpe_start = addr_gpa2hva(vm, pml4e->pfn * vm->page_size);
+ pdpe_start = addr_gpa2hva(vm, *pml4e & PHYSICAL_PAGE_MASK);
for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) {
pdpe = &pdpe_start[n2];
- if (!pdpe->present)
+ if (!(*pdpe & PTE_PRESENT_MASK))
continue;
- fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10lx "
+ fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10llx "
"%u %u\n",
indent, "",
pdpe - pdpe_start, pdpe,
addr_hva2gpa(vm, pdpe),
- (uint64_t) pdpe->pfn, pdpe->writable,
- pdpe->execute_disable);
+ PTE_GET_PFN(*pdpe), !!(*pdpe & PTE_WRITABLE_MASK),
+ !!(*pdpe & PTE_NX_MASK));
- pde_start = addr_gpa2hva(vm, pdpe->pfn * vm->page_size);
+ pde_start = addr_gpa2hva(vm, *pdpe & PHYSICAL_PAGE_MASK);
for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) {
pde = &pde_start[n3];
- if (!pde->present)
+ if (!(*pde & PTE_PRESENT_MASK))
continue;
fprintf(stream, "%*spde 0x%-3zx %p "
- "0x%-12lx 0x%-10lx %u %u\n",
+ "0x%-12lx 0x%-10llx %u %u\n",
indent, "", pde - pde_start, pde,
addr_hva2gpa(vm, pde),
- (uint64_t) pde->pfn, pde->writable,
- pde->execute_disable);
+ PTE_GET_PFN(*pde), !!(*pde & PTE_WRITABLE_MASK),
+ !!(*pde & PTE_NX_MASK));
- pte_start = addr_gpa2hva(vm, pde->pfn * vm->page_size);
+ pte_start = addr_gpa2hva(vm, *pde & PHYSICAL_PAGE_MASK);
for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) {
pte = &pte_start[n4];
- if (!pte->present)
+ if (!(*pte & PTE_PRESENT_MASK))
continue;
fprintf(stream, "%*spte 0x%-3zx %p "
- "0x%-12lx 0x%-10lx %u %u "
+ "0x%-12lx 0x%-10llx %u %u "
" %u 0x%-10lx\n",
indent, "",
pte - pte_start, pte,
addr_hva2gpa(vm, pte),
- (uint64_t) pte->pfn,
- pte->writable,
- pte->execute_disable,
- pte->dirty,
+ PTE_GET_PFN(*pte),
+ !!(*pte & PTE_WRITABLE_MASK),
+ !!(*pte & PTE_NX_MASK),
+ !!(*pte & PTE_DIRTY_MASK),
((uint64_t) n1 << 27)
| ((uint64_t) n2 << 18)
| ((uint64_t) n3 << 9)
@@ -558,8 +520,8 @@ static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector,
vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
{
uint16_t index[4];
- struct pageUpperEntry *pml4e, *pdpe, *pde;
- struct pageTableEntry *pte;
+ uint64_t *pml4e, *pdpe, *pde;
+ uint64_t *pte;
TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
@@ -572,22 +534,22 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
if (!vm->pgd_created)
goto unmapped_gva;
pml4e = addr_gpa2hva(vm, vm->pgd);
- if (!pml4e[index[3]].present)
+ if (!(pml4e[index[3]] & PTE_PRESENT_MASK))
goto unmapped_gva;
- pdpe = addr_gpa2hva(vm, pml4e[index[3]].pfn * vm->page_size);
- if (!pdpe[index[2]].present)
+ pdpe = addr_gpa2hva(vm, PTE_GET_PFN(pml4e[index[3]]) * vm->page_size);
+ if (!(pdpe[index[2]] & PTE_PRESENT_MASK))
goto unmapped_gva;
- pde = addr_gpa2hva(vm, pdpe[index[2]].pfn * vm->page_size);
- if (!pde[index[1]].present)
+ pde = addr_gpa2hva(vm, PTE_GET_PFN(pdpe[index[2]]) * vm->page_size);
+ if (!(pde[index[1]] & PTE_PRESENT_MASK))
goto unmapped_gva;
- pte = addr_gpa2hva(vm, pde[index[1]].pfn * vm->page_size);
- if (!pte[index[0]].present)
+ pte = addr_gpa2hva(vm, PTE_GET_PFN(pde[index[1]]) * vm->page_size);
+ if (!(pte[index[0]] & PTE_PRESENT_MASK))
goto unmapped_gva;
- return (pte[index[0]].pfn * vm->page_size) + (gva & 0xfffu);
+ return (PTE_GET_PFN(pte[index[0]]) * vm->page_size) + (gva & ~PAGE_MASK);
unmapped_gva:
TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c
index 52a3ef6629e8..76f65c22796f 100644
--- a/tools/testing/selftests/kvm/x86_64/amx_test.c
+++ b/tools/testing/selftests/kvm/x86_64/amx_test.c
@@ -29,7 +29,6 @@
#define X86_FEATURE_XSAVE (1 << 26)
#define X86_FEATURE_OSXSAVE (1 << 27)
-#define PAGE_SIZE (1 << 12)
#define NUM_TILES 8
#define TILE_SIZE 1024
#define XSAVE_SIZE ((NUM_TILES * TILE_SIZE) + PAGE_SIZE)
diff --git a/tools/testing/selftests/kvm/x86_64/emulator_error_test.c b/tools/testing/selftests/kvm/x86_64/emulator_error_test.c
index f070ff0224fa..aeb3850f81bd 100644
--- a/tools/testing/selftests/kvm/x86_64/emulator_error_test.c
+++ b/tools/testing/selftests/kvm/x86_64/emulator_error_test.c
@@ -12,7 +12,6 @@
#include "vmx.h"
#define VCPU_ID 1
-#define PAGE_SIZE 4096
#define MAXPHYADDR 36
#define MEM_REGION_GVA 0x0000123456789000
diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
index 0d06ffa95d9d..93d77574b255 100644
--- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
+++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
@@ -208,7 +208,7 @@ static bool sanity_check_pmu(struct kvm_vm *vm)
return success;
}
-static struct kvm_pmu_event_filter *make_pmu_event_filter(uint32_t nevents)
+static struct kvm_pmu_event_filter *alloc_pmu_event_filter(uint32_t nevents)
{
struct kvm_pmu_event_filter *f;
int size = sizeof(*f) + nevents * sizeof(f->events[0]);
@@ -220,19 +220,29 @@ static struct kvm_pmu_event_filter *make_pmu_event_filter(uint32_t nevents)
return f;
}
-static struct kvm_pmu_event_filter *event_filter(uint32_t action)
+
+static struct kvm_pmu_event_filter *
+create_pmu_event_filter(const uint64_t event_list[],
+ int nevents, uint32_t action)
{
struct kvm_pmu_event_filter *f;
int i;
- f = make_pmu_event_filter(ARRAY_SIZE(event_list));
+ f = alloc_pmu_event_filter(nevents);
f->action = action;
- for (i = 0; i < ARRAY_SIZE(event_list); i++)
+ for (i = 0; i < nevents; i++)
f->events[i] = event_list[i];
return f;
}
+static struct kvm_pmu_event_filter *event_filter(uint32_t action)
+{
+ return create_pmu_event_filter(event_list,
+ ARRAY_SIZE(event_list),
+ action);
+}
+
/*
* Remove the first occurrence of 'event' (if any) from the filter's
* event list.
@@ -271,6 +281,22 @@ static uint64_t test_with_filter(struct kvm_vm *vm,
return run_vm_to_sync(vm);
}
+static void test_amd_deny_list(struct kvm_vm *vm)
+{
+ uint64_t event = EVENT(0x1C2, 0);
+ struct kvm_pmu_event_filter *f;
+ uint64_t count;
+
+ f = create_pmu_event_filter(&event, 1, KVM_PMU_EVENT_DENY);
+ count = test_with_filter(vm, f);
+
+ free(f);
+ if (count != NUM_BRANCHES)
+ pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
+ __func__, count, NUM_BRANCHES);
+ TEST_ASSERT(count, "Allowed PMU event is not counting");
+}
+
static void test_member_deny_list(struct kvm_vm *vm)
{
struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY);
@@ -453,6 +479,9 @@ int main(int argc, char *argv[])
exit(KSFT_SKIP);
}
+ if (use_amd_pmu())
+ test_amd_deny_list(vm);
+
test_without_filter(vm);
test_member_deny_list(vm);
test_member_allow_list(vm);
diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c
index a626d40fdb48..b4e0c860769e 100644
--- a/tools/testing/selftests/kvm/x86_64/smm_test.c
+++ b/tools/testing/selftests/kvm/x86_64/smm_test.c
@@ -21,8 +21,6 @@
#define VCPU_ID 1
-#define PAGE_SIZE 4096
-
#define SMRAM_SIZE 65536
#define SMRAM_MEMSLOT ((1 << 16) | 1)
#define SMRAM_PAGES (SMRAM_SIZE / PAGE_SIZE)
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
index e683d0ac3e45..19b35c607dc6 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
@@ -32,7 +32,6 @@
#define MSR_IA32_TSC_ADJUST 0x3b
#endif
-#define PAGE_SIZE 4096
#define VCPU_ID 5
#define TSC_ADJUST_VALUE (1ll << 32)
diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
index 865e17146815..bcd370827859 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
@@ -23,7 +23,6 @@
#define SHINFO_REGION_GVA 0xc0000000ULL
#define SHINFO_REGION_GPA 0xc0000000ULL
#define SHINFO_REGION_SLOT 10
-#define PAGE_SIZE 4096
#define DUMMY_REGION_GPA (SHINFO_REGION_GPA + (2 * PAGE_SIZE))
#define DUMMY_REGION_SLOT 11
diff --git a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
index adc94452b57c..b30fe9de1d4f 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
@@ -15,7 +15,6 @@
#define HCALL_REGION_GPA 0xc0000000ULL
#define HCALL_REGION_SLOT 10
-#define PAGE_SIZE 4096
static struct kvm_vm *vm;
diff --git a/tools/testing/selftests/landlock/base_test.c b/tools/testing/selftests/landlock/base_test.c
index ca40abe9daa8..da9290817866 100644
--- a/tools/testing/selftests/landlock/base_test.c
+++ b/tools/testing/selftests/landlock/base_test.c
@@ -18,10 +18,11 @@
#include "common.h"
#ifndef O_PATH
-#define O_PATH 010000000
+#define O_PATH 010000000
#endif
-TEST(inconsistent_attr) {
+TEST(inconsistent_attr)
+{
const long page_size = sysconf(_SC_PAGESIZE);
char *const buf = malloc(page_size + 1);
struct landlock_ruleset_attr *const ruleset_attr = (void *)buf;
@@ -34,20 +35,26 @@ TEST(inconsistent_attr) {
ASSERT_EQ(EINVAL, errno);
ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, 1, 0));
ASSERT_EQ(EINVAL, errno);
+ ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, 7, 0));
+ ASSERT_EQ(EINVAL, errno);
ASSERT_EQ(-1, landlock_create_ruleset(NULL, 1, 0));
/* The size if less than sizeof(struct landlock_attr_enforce). */
ASSERT_EQ(EFAULT, errno);
- ASSERT_EQ(-1, landlock_create_ruleset(NULL,
- sizeof(struct landlock_ruleset_attr), 0));
+ ASSERT_EQ(-1, landlock_create_ruleset(
+ NULL, sizeof(struct landlock_ruleset_attr), 0));
ASSERT_EQ(EFAULT, errno);
ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, page_size + 1, 0));
ASSERT_EQ(E2BIG, errno);
- ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr,
- sizeof(struct landlock_ruleset_attr), 0));
+ /* Checks minimal valid attribute size. */
+ ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, 8, 0));
+ ASSERT_EQ(ENOMSG, errno);
+ ASSERT_EQ(-1, landlock_create_ruleset(
+ ruleset_attr,
+ sizeof(struct landlock_ruleset_attr), 0));
ASSERT_EQ(ENOMSG, errno);
ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, page_size, 0));
ASSERT_EQ(ENOMSG, errno);
@@ -63,38 +70,44 @@ TEST(inconsistent_attr) {
free(buf);
}
-TEST(abi_version) {
+TEST(abi_version)
+{
const struct landlock_ruleset_attr ruleset_attr = {
.handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE,
};
- ASSERT_EQ(1, landlock_create_ruleset(NULL, 0,
- LANDLOCK_CREATE_RULESET_VERSION));
+ ASSERT_EQ(2, landlock_create_ruleset(NULL, 0,
+ LANDLOCK_CREATE_RULESET_VERSION));
ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 0,
- LANDLOCK_CREATE_RULESET_VERSION));
+ LANDLOCK_CREATE_RULESET_VERSION));
ASSERT_EQ(EINVAL, errno);
ASSERT_EQ(-1, landlock_create_ruleset(NULL, sizeof(ruleset_attr),
- LANDLOCK_CREATE_RULESET_VERSION));
+ LANDLOCK_CREATE_RULESET_VERSION));
ASSERT_EQ(EINVAL, errno);
- ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr,
- sizeof(ruleset_attr),
- LANDLOCK_CREATE_RULESET_VERSION));
+ ASSERT_EQ(-1,
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr),
+ LANDLOCK_CREATE_RULESET_VERSION));
ASSERT_EQ(EINVAL, errno);
ASSERT_EQ(-1, landlock_create_ruleset(NULL, 0,
- LANDLOCK_CREATE_RULESET_VERSION | 1 << 31));
+ LANDLOCK_CREATE_RULESET_VERSION |
+ 1 << 31));
ASSERT_EQ(EINVAL, errno);
}
-TEST(inval_create_ruleset_flags) {
+/* Tests ordering of syscall argument checks. */
+TEST(create_ruleset_checks_ordering)
+{
const int last_flag = LANDLOCK_CREATE_RULESET_VERSION;
const int invalid_flag = last_flag << 1;
+ int ruleset_fd;
const struct landlock_ruleset_attr ruleset_attr = {
.handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE,
};
+ /* Checks priority for invalid flags. */
ASSERT_EQ(-1, landlock_create_ruleset(NULL, 0, invalid_flag));
ASSERT_EQ(EINVAL, errno);
@@ -102,44 +115,121 @@ TEST(inval_create_ruleset_flags) {
ASSERT_EQ(EINVAL, errno);
ASSERT_EQ(-1, landlock_create_ruleset(NULL, sizeof(ruleset_attr),
- invalid_flag));
+ invalid_flag));
+ ASSERT_EQ(EINVAL, errno);
+
+ ASSERT_EQ(-1,
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr),
+ invalid_flag));
ASSERT_EQ(EINVAL, errno);
- ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr,
- sizeof(ruleset_attr), invalid_flag));
+ /* Checks too big ruleset_attr size. */
+ ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, -1, 0));
+ ASSERT_EQ(E2BIG, errno);
+
+ /* Checks too small ruleset_attr size. */
+ ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 0, 0));
+ ASSERT_EQ(EINVAL, errno);
+ ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 1, 0));
ASSERT_EQ(EINVAL, errno);
+
+ /* Checks valid call. */
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
}
-TEST(empty_path_beneath_attr) {
+/* Tests ordering of syscall argument checks. */
+TEST(add_rule_checks_ordering)
+{
const struct landlock_ruleset_attr ruleset_attr = {
.handled_access_fs = LANDLOCK_ACCESS_FS_EXECUTE,
};
- const int ruleset_fd = landlock_create_ruleset(&ruleset_attr,
- sizeof(ruleset_attr), 0);
+ struct landlock_path_beneath_attr path_beneath_attr = {
+ .allowed_access = LANDLOCK_ACCESS_FS_EXECUTE,
+ .parent_fd = -1,
+ };
+ const int ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
ASSERT_LE(0, ruleset_fd);
- /* Similar to struct landlock_path_beneath_attr.parent_fd = 0 */
+ /* Checks invalid flags. */
+ ASSERT_EQ(-1, landlock_add_rule(-1, 0, NULL, 1));
+ ASSERT_EQ(EINVAL, errno);
+
+ /* Checks invalid ruleset FD. */
+ ASSERT_EQ(-1, landlock_add_rule(-1, 0, NULL, 0));
+ ASSERT_EQ(EBADF, errno);
+
+ /* Checks invalid rule type. */
+ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, 0, NULL, 0));
+ ASSERT_EQ(EINVAL, errno);
+
+ /* Checks invalid rule attr. */
ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
- NULL, 0));
+ NULL, 0));
ASSERT_EQ(EFAULT, errno);
+
+ /* Checks invalid path_beneath.parent_fd. */
+ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath_attr, 0));
+ ASSERT_EQ(EBADF, errno);
+
+ /* Checks valid call. */
+ path_beneath_attr.parent_fd =
+ open("/tmp", O_PATH | O_NOFOLLOW | O_DIRECTORY | O_CLOEXEC);
+ ASSERT_LE(0, path_beneath_attr.parent_fd);
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath_attr, 0));
+ ASSERT_EQ(0, close(path_beneath_attr.parent_fd));
ASSERT_EQ(0, close(ruleset_fd));
}
-TEST(inval_fd_enforce) {
+/* Tests ordering of syscall argument and permission checks. */
+TEST(restrict_self_checks_ordering)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_EXECUTE,
+ };
+ struct landlock_path_beneath_attr path_beneath_attr = {
+ .allowed_access = LANDLOCK_ACCESS_FS_EXECUTE,
+ .parent_fd = -1,
+ };
+ const int ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+
+ ASSERT_LE(0, ruleset_fd);
+ path_beneath_attr.parent_fd =
+ open("/tmp", O_PATH | O_NOFOLLOW | O_DIRECTORY | O_CLOEXEC);
+ ASSERT_LE(0, path_beneath_attr.parent_fd);
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath_attr, 0));
+ ASSERT_EQ(0, close(path_beneath_attr.parent_fd));
+
+ /* Checks unprivileged enforcement without no_new_privs. */
+ drop_caps(_metadata);
+ ASSERT_EQ(-1, landlock_restrict_self(-1, -1));
+ ASSERT_EQ(EPERM, errno);
+ ASSERT_EQ(-1, landlock_restrict_self(-1, 0));
+ ASSERT_EQ(EPERM, errno);
+ ASSERT_EQ(-1, landlock_restrict_self(ruleset_fd, 0));
+ ASSERT_EQ(EPERM, errno);
+
ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+ /* Checks invalid flags. */
+ ASSERT_EQ(-1, landlock_restrict_self(-1, -1));
+ ASSERT_EQ(EINVAL, errno);
+
+ /* Checks invalid ruleset FD. */
ASSERT_EQ(-1, landlock_restrict_self(-1, 0));
ASSERT_EQ(EBADF, errno);
-}
-
-TEST(unpriv_enforce_without_no_new_privs) {
- int err;
- drop_caps(_metadata);
- err = landlock_restrict_self(-1, 0);
- ASSERT_EQ(EPERM, errno);
- ASSERT_EQ(err, -1);
+ /* Checks valid call. */
+ ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0));
+ ASSERT_EQ(0, close(ruleset_fd));
}
TEST(ruleset_fd_io)
@@ -151,8 +241,8 @@ TEST(ruleset_fd_io)
char buf;
drop_caps(_metadata);
- ruleset_fd = landlock_create_ruleset(&ruleset_attr,
- sizeof(ruleset_attr), 0);
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
ASSERT_LE(0, ruleset_fd);
ASSERT_EQ(-1, write(ruleset_fd, ".", 1));
@@ -197,14 +287,15 @@ TEST(ruleset_fd_transfer)
drop_caps(_metadata);
/* Creates a test ruleset with a simple rule. */
- ruleset_fd_tx = landlock_create_ruleset(&ruleset_attr,
- sizeof(ruleset_attr), 0);
+ ruleset_fd_tx =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
ASSERT_LE(0, ruleset_fd_tx);
- path_beneath_attr.parent_fd = open("/tmp", O_PATH | O_NOFOLLOW |
- O_DIRECTORY | O_CLOEXEC);
+ path_beneath_attr.parent_fd =
+ open("/tmp", O_PATH | O_NOFOLLOW | O_DIRECTORY | O_CLOEXEC);
ASSERT_LE(0, path_beneath_attr.parent_fd);
- ASSERT_EQ(0, landlock_add_rule(ruleset_fd_tx, LANDLOCK_RULE_PATH_BENEATH,
- &path_beneath_attr, 0));
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd_tx, LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath_attr, 0));
ASSERT_EQ(0, close(path_beneath_attr.parent_fd));
cmsg = CMSG_FIRSTHDR(&msg);
@@ -215,7 +306,8 @@ TEST(ruleset_fd_transfer)
memcpy(CMSG_DATA(cmsg), &ruleset_fd_tx, sizeof(ruleset_fd_tx));
/* Sends the ruleset FD over a socketpair and then close it. */
- ASSERT_EQ(0, socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, socket_fds));
+ ASSERT_EQ(0, socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0,
+ socket_fds));
ASSERT_EQ(sizeof(data_tx), sendmsg(socket_fds[0], &msg, 0));
ASSERT_EQ(0, close(socket_fds[0]));
ASSERT_EQ(0, close(ruleset_fd_tx));
@@ -226,7 +318,8 @@ TEST(ruleset_fd_transfer)
int ruleset_fd_rx;
*(char *)msg.msg_iov->iov_base = '\0';
- ASSERT_EQ(sizeof(data_tx), recvmsg(socket_fds[1], &msg, MSG_CMSG_CLOEXEC));
+ ASSERT_EQ(sizeof(data_tx),
+ recvmsg(socket_fds[1], &msg, MSG_CMSG_CLOEXEC));
ASSERT_EQ('.', *(char *)msg.msg_iov->iov_base);
ASSERT_EQ(0, close(socket_fds[1]));
cmsg = CMSG_FIRSTHDR(&msg);
diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h
index 183b7e8e1b95..7ba18eb23783 100644
--- a/tools/testing/selftests/landlock/common.h
+++ b/tools/testing/selftests/landlock/common.h
@@ -25,6 +25,7 @@
* this to be possible, we must not call abort() but instead exit smoothly
* (hence the step print).
*/
+/* clang-format off */
#define TEST_F_FORK(fixture_name, test_name) \
static void fixture_name##_##test_name##_child( \
struct __test_metadata *_metadata, \
@@ -71,11 +72,12 @@
FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \
const FIXTURE_VARIANT(fixture_name) \
__attribute__((unused)) *variant)
+/* clang-format on */
#ifndef landlock_create_ruleset
-static inline int landlock_create_ruleset(
- const struct landlock_ruleset_attr *const attr,
- const size_t size, const __u32 flags)
+static inline int
+landlock_create_ruleset(const struct landlock_ruleset_attr *const attr,
+ const size_t size, const __u32 flags)
{
return syscall(__NR_landlock_create_ruleset, attr, size, flags);
}
@@ -83,17 +85,18 @@ static inline int landlock_create_ruleset(
#ifndef landlock_add_rule
static inline int landlock_add_rule(const int ruleset_fd,
- const enum landlock_rule_type rule_type,
- const void *const rule_attr, const __u32 flags)
+ const enum landlock_rule_type rule_type,
+ const void *const rule_attr,
+ const __u32 flags)
{
- return syscall(__NR_landlock_add_rule, ruleset_fd, rule_type,
- rule_attr, flags);
+ return syscall(__NR_landlock_add_rule, ruleset_fd, rule_type, rule_attr,
+ flags);
}
#endif
#ifndef landlock_restrict_self
static inline int landlock_restrict_self(const int ruleset_fd,
- const __u32 flags)
+ const __u32 flags)
{
return syscall(__NR_landlock_restrict_self, ruleset_fd, flags);
}
@@ -111,69 +114,76 @@ static void _init_caps(struct __test_metadata *const _metadata, bool drop_all)
};
cap_p = cap_get_proc();
- EXPECT_NE(NULL, cap_p) {
+ EXPECT_NE(NULL, cap_p)
+ {
TH_LOG("Failed to cap_get_proc: %s", strerror(errno));
}
- EXPECT_NE(-1, cap_clear(cap_p)) {
+ EXPECT_NE(-1, cap_clear(cap_p))
+ {
TH_LOG("Failed to cap_clear: %s", strerror(errno));
}
if (!drop_all) {
EXPECT_NE(-1, cap_set_flag(cap_p, CAP_PERMITTED,
- ARRAY_SIZE(caps), caps, CAP_SET)) {
+ ARRAY_SIZE(caps), caps, CAP_SET))
+ {
TH_LOG("Failed to cap_set_flag: %s", strerror(errno));
}
}
- EXPECT_NE(-1, cap_set_proc(cap_p)) {
+ EXPECT_NE(-1, cap_set_proc(cap_p))
+ {
TH_LOG("Failed to cap_set_proc: %s", strerror(errno));
}
- EXPECT_NE(-1, cap_free(cap_p)) {
+ EXPECT_NE(-1, cap_free(cap_p))
+ {
TH_LOG("Failed to cap_free: %s", strerror(errno));
}
}
/* We cannot put such helpers in a library because of kselftest_harness.h . */
-__attribute__((__unused__))
-static void disable_caps(struct __test_metadata *const _metadata)
+__attribute__((__unused__)) static void
+disable_caps(struct __test_metadata *const _metadata)
{
_init_caps(_metadata, false);
}
-__attribute__((__unused__))
-static void drop_caps(struct __test_metadata *const _metadata)
+__attribute__((__unused__)) static void
+drop_caps(struct __test_metadata *const _metadata)
{
_init_caps(_metadata, true);
}
static void _effective_cap(struct __test_metadata *const _metadata,
- const cap_value_t caps, const cap_flag_value_t value)
+ const cap_value_t caps, const cap_flag_value_t value)
{
cap_t cap_p;
cap_p = cap_get_proc();
- EXPECT_NE(NULL, cap_p) {
+ EXPECT_NE(NULL, cap_p)
+ {
TH_LOG("Failed to cap_get_proc: %s", strerror(errno));
}
- EXPECT_NE(-1, cap_set_flag(cap_p, CAP_EFFECTIVE, 1, &caps, value)) {
+ EXPECT_NE(-1, cap_set_flag(cap_p, CAP_EFFECTIVE, 1, &caps, value))
+ {
TH_LOG("Failed to cap_set_flag: %s", strerror(errno));
}
- EXPECT_NE(-1, cap_set_proc(cap_p)) {
+ EXPECT_NE(-1, cap_set_proc(cap_p))
+ {
TH_LOG("Failed to cap_set_proc: %s", strerror(errno));
}
- EXPECT_NE(-1, cap_free(cap_p)) {
+ EXPECT_NE(-1, cap_free(cap_p))
+ {
TH_LOG("Failed to cap_free: %s", strerror(errno));
}
}
-__attribute__((__unused__))
-static void set_cap(struct __test_metadata *const _metadata,
- const cap_value_t caps)
+__attribute__((__unused__)) static void
+set_cap(struct __test_metadata *const _metadata, const cap_value_t caps)
{
_effective_cap(_metadata, caps, CAP_SET);
}
-__attribute__((__unused__))
-static void clear_cap(struct __test_metadata *const _metadata,
- const cap_value_t caps)
+__attribute__((__unused__)) static void
+clear_cap(struct __test_metadata *const _metadata, const cap_value_t caps)
{
_effective_cap(_metadata, caps, CAP_CLEAR);
}
diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c
index 10c9a1e4ebd9..21a2ce8fa739 100644
--- a/tools/testing/selftests/landlock/fs_test.c
+++ b/tools/testing/selftests/landlock/fs_test.c
@@ -22,8 +22,21 @@
#include "common.h"
-#define TMP_DIR "tmp"
-#define BINARY_PATH "./true"
+#ifndef renameat2
+int renameat2(int olddirfd, const char *oldpath, int newdirfd,
+ const char *newpath, unsigned int flags)
+{
+ return syscall(__NR_renameat2, olddirfd, oldpath, newdirfd, newpath,
+ flags);
+}
+#endif
+
+#ifndef RENAME_EXCHANGE
+#define RENAME_EXCHANGE (1 << 1)
+#endif
+
+#define TMP_DIR "tmp"
+#define BINARY_PATH "./true"
/* Paths (sibling number and depth) */
static const char dir_s1d1[] = TMP_DIR "/s1d1";
@@ -75,7 +88,7 @@ static const char dir_s3d3[] = TMP_DIR "/s3d1/s3d2/s3d3";
*/
static void mkdir_parents(struct __test_metadata *const _metadata,
- const char *const path)
+ const char *const path)
{
char *walker;
const char *parent;
@@ -90,9 +103,10 @@ static void mkdir_parents(struct __test_metadata *const _metadata,
continue;
walker[i] = '\0';
err = mkdir(parent, 0700);
- ASSERT_FALSE(err && errno != EEXIST) {
- TH_LOG("Failed to create directory \"%s\": %s",
- parent, strerror(errno));
+ ASSERT_FALSE(err && errno != EEXIST)
+ {
+ TH_LOG("Failed to create directory \"%s\": %s", parent,
+ strerror(errno));
}
walker[i] = '/';
}
@@ -100,22 +114,24 @@ static void mkdir_parents(struct __test_metadata *const _metadata,
}
static void create_directory(struct __test_metadata *const _metadata,
- const char *const path)
+ const char *const path)
{
mkdir_parents(_metadata, path);
- ASSERT_EQ(0, mkdir(path, 0700)) {
+ ASSERT_EQ(0, mkdir(path, 0700))
+ {
TH_LOG("Failed to create directory \"%s\": %s", path,
- strerror(errno));
+ strerror(errno));
}
}
static void create_file(struct __test_metadata *const _metadata,
- const char *const path)
+ const char *const path)
{
mkdir_parents(_metadata, path);
- ASSERT_EQ(0, mknod(path, S_IFREG | 0700, 0)) {
+ ASSERT_EQ(0, mknod(path, S_IFREG | 0700, 0))
+ {
TH_LOG("Failed to create file \"%s\": %s", path,
- strerror(errno));
+ strerror(errno));
}
}
@@ -130,7 +146,7 @@ static int remove_path(const char *const path)
goto out;
}
if (unlink(path) && rmdir(path)) {
- if (errno != ENOENT)
+ if (errno != ENOENT && errno != ENOTDIR)
err = errno;
goto out;
}
@@ -221,8 +237,9 @@ static void remove_layout1(struct __test_metadata *const _metadata)
EXPECT_EQ(0, remove_path(dir_s3d2));
}
-FIXTURE(layout1) {
-};
+/* clang-format off */
+FIXTURE(layout1) {};
+/* clang-format on */
FIXTURE_SETUP(layout1)
{
@@ -242,7 +259,8 @@ FIXTURE_TEARDOWN(layout1)
* This helper enables to use the ASSERT_* macros and print the line number
* pointing to the test caller.
*/
-static int test_open_rel(const int dirfd, const char *const path, const int flags)
+static int test_open_rel(const int dirfd, const char *const path,
+ const int flags)
{
int fd;
@@ -291,23 +309,23 @@ TEST_F_FORK(layout1, inval)
{
struct landlock_path_beneath_attr path_beneath = {
.allowed_access = LANDLOCK_ACCESS_FS_READ_FILE |
- LANDLOCK_ACCESS_FS_WRITE_FILE,
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
.parent_fd = -1,
};
struct landlock_ruleset_attr ruleset_attr = {
.handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE |
- LANDLOCK_ACCESS_FS_WRITE_FILE,
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
};
int ruleset_fd;
- path_beneath.parent_fd = open(dir_s1d2, O_PATH | O_DIRECTORY |
- O_CLOEXEC);
+ path_beneath.parent_fd =
+ open(dir_s1d2, O_PATH | O_DIRECTORY | O_CLOEXEC);
ASSERT_LE(0, path_beneath.parent_fd);
ruleset_fd = open(dir_s1d1, O_PATH | O_DIRECTORY | O_CLOEXEC);
ASSERT_LE(0, ruleset_fd);
ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
- &path_beneath, 0));
+ &path_beneath, 0));
/* Returns EBADF because ruleset_fd is not a landlock-ruleset FD. */
ASSERT_EQ(EBADF, errno);
ASSERT_EQ(0, close(ruleset_fd));
@@ -315,55 +333,55 @@ TEST_F_FORK(layout1, inval)
ruleset_fd = open(dir_s1d1, O_DIRECTORY | O_CLOEXEC);
ASSERT_LE(0, ruleset_fd);
ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
- &path_beneath, 0));
+ &path_beneath, 0));
/* Returns EBADFD because ruleset_fd is not a valid ruleset. */
ASSERT_EQ(EBADFD, errno);
ASSERT_EQ(0, close(ruleset_fd));
/* Gets a real ruleset. */
- ruleset_fd = landlock_create_ruleset(&ruleset_attr,
- sizeof(ruleset_attr), 0);
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
ASSERT_LE(0, ruleset_fd);
ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
- &path_beneath, 0));
+ &path_beneath, 0));
ASSERT_EQ(0, close(path_beneath.parent_fd));
/* Tests without O_PATH. */
path_beneath.parent_fd = open(dir_s1d2, O_DIRECTORY | O_CLOEXEC);
ASSERT_LE(0, path_beneath.parent_fd);
ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
- &path_beneath, 0));
+ &path_beneath, 0));
ASSERT_EQ(0, close(path_beneath.parent_fd));
/* Tests with a ruleset FD. */
path_beneath.parent_fd = ruleset_fd;
ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
- &path_beneath, 0));
+ &path_beneath, 0));
ASSERT_EQ(EBADFD, errno);
/* Checks unhandled allowed_access. */
- path_beneath.parent_fd = open(dir_s1d2, O_PATH | O_DIRECTORY |
- O_CLOEXEC);
+ path_beneath.parent_fd =
+ open(dir_s1d2, O_PATH | O_DIRECTORY | O_CLOEXEC);
ASSERT_LE(0, path_beneath.parent_fd);
/* Test with legitimate values. */
path_beneath.allowed_access |= LANDLOCK_ACCESS_FS_EXECUTE;
ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
- &path_beneath, 0));
+ &path_beneath, 0));
ASSERT_EQ(EINVAL, errno);
path_beneath.allowed_access &= ~LANDLOCK_ACCESS_FS_EXECUTE;
/* Test with unknown (64-bits) value. */
path_beneath.allowed_access |= (1ULL << 60);
ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
- &path_beneath, 0));
+ &path_beneath, 0));
ASSERT_EQ(EINVAL, errno);
path_beneath.allowed_access &= ~(1ULL << 60);
/* Test with no access. */
path_beneath.allowed_access = 0;
ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
- &path_beneath, 0));
+ &path_beneath, 0));
ASSERT_EQ(ENOMSG, errno);
path_beneath.allowed_access &= ~(1ULL << 60);
@@ -376,12 +394,14 @@ TEST_F_FORK(layout1, inval)
ASSERT_EQ(0, close(ruleset_fd));
}
+/* clang-format off */
+
#define ACCESS_FILE ( \
LANDLOCK_ACCESS_FS_EXECUTE | \
LANDLOCK_ACCESS_FS_WRITE_FILE | \
LANDLOCK_ACCESS_FS_READ_FILE)
-#define ACCESS_LAST LANDLOCK_ACCESS_FS_MAKE_SYM
+#define ACCESS_LAST LANDLOCK_ACCESS_FS_REFER
#define ACCESS_ALL ( \
ACCESS_FILE | \
@@ -394,55 +414,90 @@ TEST_F_FORK(layout1, inval)
LANDLOCK_ACCESS_FS_MAKE_SOCK | \
LANDLOCK_ACCESS_FS_MAKE_FIFO | \
LANDLOCK_ACCESS_FS_MAKE_BLOCK | \
+ LANDLOCK_ACCESS_FS_MAKE_SYM | \
ACCESS_LAST)
-TEST_F_FORK(layout1, file_access_rights)
+/* clang-format on */
+
+TEST_F_FORK(layout1, file_and_dir_access_rights)
{
__u64 access;
int err;
- struct landlock_path_beneath_attr path_beneath = {};
+ struct landlock_path_beneath_attr path_beneath_file = {},
+ path_beneath_dir = {};
struct landlock_ruleset_attr ruleset_attr = {
.handled_access_fs = ACCESS_ALL,
};
- const int ruleset_fd = landlock_create_ruleset(&ruleset_attr,
- sizeof(ruleset_attr), 0);
+ const int ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
ASSERT_LE(0, ruleset_fd);
/* Tests access rights for files. */
- path_beneath.parent_fd = open(file1_s1d2, O_PATH | O_CLOEXEC);
- ASSERT_LE(0, path_beneath.parent_fd);
+ path_beneath_file.parent_fd = open(file1_s1d2, O_PATH | O_CLOEXEC);
+ ASSERT_LE(0, path_beneath_file.parent_fd);
+
+ /* Tests access rights for directories. */
+ path_beneath_dir.parent_fd =
+ open(dir_s1d2, O_PATH | O_DIRECTORY | O_CLOEXEC);
+ ASSERT_LE(0, path_beneath_dir.parent_fd);
+
for (access = 1; access <= ACCESS_LAST; access <<= 1) {
- path_beneath.allowed_access = access;
+ path_beneath_dir.allowed_access = access;
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd,
+ LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath_dir, 0));
+
+ path_beneath_file.allowed_access = access;
err = landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
- &path_beneath, 0);
- if ((access | ACCESS_FILE) == ACCESS_FILE) {
+ &path_beneath_file, 0);
+ if (access & ACCESS_FILE) {
ASSERT_EQ(0, err);
} else {
ASSERT_EQ(-1, err);
ASSERT_EQ(EINVAL, errno);
}
}
- ASSERT_EQ(0, close(path_beneath.parent_fd));
+ ASSERT_EQ(0, close(path_beneath_file.parent_fd));
+ ASSERT_EQ(0, close(path_beneath_dir.parent_fd));
+ ASSERT_EQ(0, close(ruleset_fd));
+}
+
+TEST_F_FORK(layout1, unknown_access_rights)
+{
+ __u64 access_mask;
+
+ for (access_mask = 1ULL << 63; access_mask != ACCESS_LAST;
+ access_mask >>= 1) {
+ struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = access_mask,
+ };
+
+ ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr,
+ sizeof(ruleset_attr), 0));
+ ASSERT_EQ(EINVAL, errno);
+ }
}
static void add_path_beneath(struct __test_metadata *const _metadata,
- const int ruleset_fd, const __u64 allowed_access,
- const char *const path)
+ const int ruleset_fd, const __u64 allowed_access,
+ const char *const path)
{
struct landlock_path_beneath_attr path_beneath = {
.allowed_access = allowed_access,
};
path_beneath.parent_fd = open(path, O_PATH | O_CLOEXEC);
- ASSERT_LE(0, path_beneath.parent_fd) {
+ ASSERT_LE(0, path_beneath.parent_fd)
+ {
TH_LOG("Failed to open directory \"%s\": %s", path,
- strerror(errno));
+ strerror(errno));
}
ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
- &path_beneath, 0)) {
+ &path_beneath, 0))
+ {
TH_LOG("Failed to update the ruleset with \"%s\": %s", path,
- strerror(errno));
+ strerror(errno));
}
ASSERT_EQ(0, close(path_beneath.parent_fd));
}
@@ -452,6 +507,8 @@ struct rule {
__u64 access;
};
+/* clang-format off */
+
#define ACCESS_RO ( \
LANDLOCK_ACCESS_FS_READ_FILE | \
LANDLOCK_ACCESS_FS_READ_DIR)
@@ -460,39 +517,46 @@ struct rule {
ACCESS_RO | \
LANDLOCK_ACCESS_FS_WRITE_FILE)
+/* clang-format on */
+
static int create_ruleset(struct __test_metadata *const _metadata,
- const __u64 handled_access_fs, const struct rule rules[])
+ const __u64 handled_access_fs,
+ const struct rule rules[])
{
int ruleset_fd, i;
struct landlock_ruleset_attr ruleset_attr = {
.handled_access_fs = handled_access_fs,
};
- ASSERT_NE(NULL, rules) {
+ ASSERT_NE(NULL, rules)
+ {
TH_LOG("No rule list");
}
- ASSERT_NE(NULL, rules[0].path) {
+ ASSERT_NE(NULL, rules[0].path)
+ {
TH_LOG("Empty rule list");
}
- ruleset_fd = landlock_create_ruleset(&ruleset_attr,
- sizeof(ruleset_attr), 0);
- ASSERT_LE(0, ruleset_fd) {
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd)
+ {
TH_LOG("Failed to create a ruleset: %s", strerror(errno));
}
for (i = 0; rules[i].path; i++) {
add_path_beneath(_metadata, ruleset_fd, rules[i].access,
- rules[i].path);
+ rules[i].path);
}
return ruleset_fd;
}
static void enforce_ruleset(struct __test_metadata *const _metadata,
- const int ruleset_fd)
+ const int ruleset_fd)
{
ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
- ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0)) {
+ ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0))
+ {
TH_LOG("Failed to enforce ruleset: %s", strerror(errno));
}
}
@@ -503,13 +567,14 @@ TEST_F_FORK(layout1, proc_nsfs)
{
.path = "/dev/null",
.access = LANDLOCK_ACCESS_FS_READ_FILE |
- LANDLOCK_ACCESS_FS_WRITE_FILE,
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
},
- {}
+ {},
};
struct landlock_path_beneath_attr path_beneath;
- const int ruleset_fd = create_ruleset(_metadata, rules[0].access |
- LANDLOCK_ACCESS_FS_READ_DIR, rules);
+ const int ruleset_fd = create_ruleset(
+ _metadata, rules[0].access | LANDLOCK_ACCESS_FS_READ_DIR,
+ rules);
ASSERT_LE(0, ruleset_fd);
ASSERT_EQ(0, test_open("/proc/self/ns/mnt", O_RDONLY));
@@ -536,22 +601,23 @@ TEST_F_FORK(layout1, proc_nsfs)
* references to a ruleset.
*/
path_beneath.allowed_access = LANDLOCK_ACCESS_FS_READ_FILE |
- LANDLOCK_ACCESS_FS_WRITE_FILE,
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
path_beneath.parent_fd = open("/proc/self/ns/mnt", O_PATH | O_CLOEXEC);
ASSERT_LE(0, path_beneath.parent_fd);
ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
- &path_beneath, 0));
+ &path_beneath, 0));
ASSERT_EQ(EBADFD, errno);
ASSERT_EQ(0, close(path_beneath.parent_fd));
}
-TEST_F_FORK(layout1, unpriv) {
+TEST_F_FORK(layout1, unpriv)
+{
const struct rule rules[] = {
{
.path = dir_s1d2,
.access = ACCESS_RO,
},
- {}
+ {},
};
int ruleset_fd;
@@ -577,9 +643,9 @@ TEST_F_FORK(layout1, effective_access)
{
.path = file1_s2d2,
.access = LANDLOCK_ACCESS_FS_READ_FILE |
- LANDLOCK_ACCESS_FS_WRITE_FILE,
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
},
- {}
+ {},
};
const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
char buf;
@@ -589,17 +655,23 @@ TEST_F_FORK(layout1, effective_access)
enforce_ruleset(_metadata, ruleset_fd);
ASSERT_EQ(0, close(ruleset_fd));
- /* Tests on a directory. */
+ /* Tests on a directory (with or without O_PATH). */
ASSERT_EQ(EACCES, test_open("/", O_RDONLY));
+ ASSERT_EQ(0, test_open("/", O_RDONLY | O_PATH));
ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY));
+ ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY | O_PATH));
ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY | O_PATH));
+
ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY));
ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY));
ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
- /* Tests on a file. */
+ /* Tests on a file (with or without O_PATH). */
ASSERT_EQ(EACCES, test_open(dir_s2d2, O_RDONLY));
+ ASSERT_EQ(0, test_open(dir_s2d2, O_RDONLY | O_PATH));
+
ASSERT_EQ(0, test_open(file1_s2d2, O_RDONLY));
/* Checks effective read and write actions. */
@@ -626,7 +698,7 @@ TEST_F_FORK(layout1, unhandled_access)
.path = dir_s1d2,
.access = ACCESS_RO,
},
- {}
+ {},
};
/* Here, we only handle read accesses, not write accesses. */
const int ruleset_fd = create_ruleset(_metadata, ACCESS_RO, rules);
@@ -653,14 +725,14 @@ TEST_F_FORK(layout1, ruleset_overlap)
{
.path = dir_s1d2,
.access = LANDLOCK_ACCESS_FS_READ_FILE |
- LANDLOCK_ACCESS_FS_WRITE_FILE,
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
},
{
.path = dir_s1d2,
.access = LANDLOCK_ACCESS_FS_READ_FILE |
- LANDLOCK_ACCESS_FS_READ_DIR,
+ LANDLOCK_ACCESS_FS_READ_DIR,
},
- {}
+ {},
};
const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
@@ -687,6 +759,113 @@ TEST_F_FORK(layout1, ruleset_overlap)
ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY));
}
+TEST_F_FORK(layout1, layer_rule_unions)
+{
+ const struct rule layer1[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ /* dir_s1d3 should allow READ_FILE and WRITE_FILE (O_RDWR). */
+ {
+ .path = dir_s1d3,
+ .access = LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {},
+ };
+ const struct rule layer2[] = {
+ /* Doesn't change anything from layer1. */
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {},
+ };
+ const struct rule layer3[] = {
+ /* Only allows write (but not read) to dir_s1d3. */
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {},
+ };
+ int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer1);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks s1d1 hierarchy with layer1. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDWR));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+ /* Checks s1d2 hierarchy with layer1. */
+ ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_RDWR));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+ /* Checks s1d3 hierarchy with layer1. */
+ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s1d3, O_WRONLY));
+ /* dir_s1d3 should allow READ_FILE and WRITE_FILE (O_RDWR). */
+ ASSERT_EQ(0, test_open(file1_s1d3, O_RDWR));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+ /* Doesn't change anything from layer1. */
+ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer2);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks s1d1 hierarchy with layer2. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDWR));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+ /* Checks s1d2 hierarchy with layer2. */
+ ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_RDWR));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+ /* Checks s1d3 hierarchy with layer2. */
+ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s1d3, O_WRONLY));
+ /* dir_s1d3 should allow READ_FILE and WRITE_FILE (O_RDWR). */
+ ASSERT_EQ(0, test_open(file1_s1d3, O_RDWR));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+ /* Only allows write (but not read) to dir_s1d3. */
+ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer3);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks s1d1 hierarchy with layer3. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDWR));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+ /* Checks s1d2 hierarchy with layer3. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_RDWR));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+ /* Checks s1d3 hierarchy with layer3. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s1d3, O_WRONLY));
+ /* dir_s1d3 should now deny READ_FILE and WRITE_FILE (O_RDWR). */
+ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_RDWR));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+}
+
TEST_F_FORK(layout1, non_overlapping_accesses)
{
const struct rule layer1[] = {
@@ -694,22 +873,22 @@ TEST_F_FORK(layout1, non_overlapping_accesses)
.path = dir_s1d2,
.access = LANDLOCK_ACCESS_FS_MAKE_REG,
},
- {}
+ {},
};
const struct rule layer2[] = {
{
.path = dir_s1d3,
.access = LANDLOCK_ACCESS_FS_REMOVE_FILE,
},
- {}
+ {},
};
int ruleset_fd;
ASSERT_EQ(0, unlink(file1_s1d1));
ASSERT_EQ(0, unlink(file1_s1d2));
- ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_MAKE_REG,
- layer1);
+ ruleset_fd =
+ create_ruleset(_metadata, LANDLOCK_ACCESS_FS_MAKE_REG, layer1);
ASSERT_LE(0, ruleset_fd);
enforce_ruleset(_metadata, ruleset_fd);
ASSERT_EQ(0, close(ruleset_fd));
@@ -720,7 +899,7 @@ TEST_F_FORK(layout1, non_overlapping_accesses)
ASSERT_EQ(0, unlink(file1_s1d2));
ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_REMOVE_FILE,
- layer2);
+ layer2);
ASSERT_LE(0, ruleset_fd);
enforce_ruleset(_metadata, ruleset_fd);
ASSERT_EQ(0, close(ruleset_fd));
@@ -758,7 +937,7 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
.path = file1_s1d3,
.access = LANDLOCK_ACCESS_FS_READ_FILE,
},
- {}
+ {},
};
/* First rule with write restrictions. */
const struct rule layer2_read_write[] = {
@@ -766,14 +945,14 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
{
.path = dir_s1d3,
.access = LANDLOCK_ACCESS_FS_READ_FILE |
- LANDLOCK_ACCESS_FS_WRITE_FILE,
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
},
/* ...but also denies read access via its grandparent directory. */
{
.path = dir_s1d2,
.access = LANDLOCK_ACCESS_FS_WRITE_FILE,
},
- {}
+ {},
};
const struct rule layer3_read[] = {
/* Allows read access via its great-grandparent directory. */
@@ -781,7 +960,7 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
.path = dir_s1d1,
.access = LANDLOCK_ACCESS_FS_READ_FILE,
},
- {}
+ {},
};
const struct rule layer4_read_write[] = {
/*
@@ -792,7 +971,7 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
.path = dir_s1d2,
.access = LANDLOCK_ACCESS_FS_READ_FILE,
},
- {}
+ {},
};
const struct rule layer5_read[] = {
/*
@@ -803,7 +982,7 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
.path = dir_s1d2,
.access = LANDLOCK_ACCESS_FS_READ_FILE,
},
- {}
+ {},
};
const struct rule layer6_execute[] = {
/*
@@ -814,7 +993,7 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
.path = dir_s2d1,
.access = LANDLOCK_ACCESS_FS_EXECUTE,
},
- {}
+ {},
};
const struct rule layer7_read_write[] = {
/*
@@ -825,12 +1004,12 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
.path = dir_s1d2,
.access = LANDLOCK_ACCESS_FS_WRITE_FILE,
},
- {}
+ {},
};
int ruleset_fd;
ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE,
- layer1_read);
+ layer1_read);
ASSERT_LE(0, ruleset_fd);
enforce_ruleset(_metadata, ruleset_fd);
ASSERT_EQ(0, close(ruleset_fd));
@@ -840,8 +1019,10 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY));
ASSERT_EQ(0, test_open(file2_s1d3, O_WRONLY));
- ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE |
- LANDLOCK_ACCESS_FS_WRITE_FILE, layer2_read_write);
+ ruleset_fd = create_ruleset(_metadata,
+ LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ layer2_read_write);
ASSERT_LE(0, ruleset_fd);
enforce_ruleset(_metadata, ruleset_fd);
ASSERT_EQ(0, close(ruleset_fd));
@@ -852,7 +1033,7 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
ASSERT_EQ(0, test_open(file2_s1d3, O_WRONLY));
ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE,
- layer3_read);
+ layer3_read);
ASSERT_LE(0, ruleset_fd);
enforce_ruleset(_metadata, ruleset_fd);
ASSERT_EQ(0, close(ruleset_fd));
@@ -863,8 +1044,10 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
ASSERT_EQ(0, test_open(file2_s1d3, O_WRONLY));
/* This time, denies write access for the file hierarchy. */
- ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE |
- LANDLOCK_ACCESS_FS_WRITE_FILE, layer4_read_write);
+ ruleset_fd = create_ruleset(_metadata,
+ LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ layer4_read_write);
ASSERT_LE(0, ruleset_fd);
enforce_ruleset(_metadata, ruleset_fd);
ASSERT_EQ(0, close(ruleset_fd));
@@ -879,7 +1062,7 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
ASSERT_EQ(EACCES, test_open(file2_s1d3, O_WRONLY));
ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE,
- layer5_read);
+ layer5_read);
ASSERT_LE(0, ruleset_fd);
enforce_ruleset(_metadata, ruleset_fd);
ASSERT_EQ(0, close(ruleset_fd));
@@ -891,7 +1074,7 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY));
ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_EXECUTE,
- layer6_execute);
+ layer6_execute);
ASSERT_LE(0, ruleset_fd);
enforce_ruleset(_metadata, ruleset_fd);
ASSERT_EQ(0, close(ruleset_fd));
@@ -902,8 +1085,10 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
ASSERT_EQ(EACCES, test_open(file2_s1d3, O_WRONLY));
ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY));
- ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE |
- LANDLOCK_ACCESS_FS_WRITE_FILE, layer7_read_write);
+ ruleset_fd = create_ruleset(_metadata,
+ LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ layer7_read_write);
ASSERT_LE(0, ruleset_fd);
enforce_ruleset(_metadata, ruleset_fd);
ASSERT_EQ(0, close(ruleset_fd));
@@ -921,9 +1106,9 @@ TEST_F_FORK(layout1, inherit_subset)
{
.path = dir_s1d2,
.access = LANDLOCK_ACCESS_FS_READ_FILE |
- LANDLOCK_ACCESS_FS_READ_DIR,
+ LANDLOCK_ACCESS_FS_READ_DIR,
},
- {}
+ {},
};
const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
@@ -949,7 +1134,7 @@ TEST_F_FORK(layout1, inherit_subset)
* ANDed with the previous ones.
*/
add_path_beneath(_metadata, ruleset_fd, LANDLOCK_ACCESS_FS_WRITE_FILE,
- dir_s1d2);
+ dir_s1d2);
/*
* According to ruleset_fd, dir_s1d2 should now have the
* LANDLOCK_ACCESS_FS_READ_FILE and LANDLOCK_ACCESS_FS_WRITE_FILE
@@ -1004,7 +1189,7 @@ TEST_F_FORK(layout1, inherit_subset)
* that there was no rule tied to it before.
*/
add_path_beneath(_metadata, ruleset_fd, LANDLOCK_ACCESS_FS_WRITE_FILE,
- dir_s1d3);
+ dir_s1d3);
enforce_ruleset(_metadata, ruleset_fd);
ASSERT_EQ(0, close(ruleset_fd));
@@ -1039,7 +1224,7 @@ TEST_F_FORK(layout1, inherit_superset)
.path = dir_s1d3,
.access = ACCESS_RO,
},
- {}
+ {},
};
const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
@@ -1054,8 +1239,10 @@ TEST_F_FORK(layout1, inherit_superset)
ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
/* Now dir_s1d2, parent of dir_s1d3, gets a new rule tied to it. */
- add_path_beneath(_metadata, ruleset_fd, LANDLOCK_ACCESS_FS_READ_FILE |
- LANDLOCK_ACCESS_FS_READ_DIR, dir_s1d2);
+ add_path_beneath(_metadata, ruleset_fd,
+ LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_READ_DIR,
+ dir_s1d2);
enforce_ruleset(_metadata, ruleset_fd);
ASSERT_EQ(0, close(ruleset_fd));
@@ -1075,12 +1262,12 @@ TEST_F_FORK(layout1, max_layers)
.path = dir_s1d2,
.access = ACCESS_RO,
},
- {}
+ {},
};
const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
ASSERT_LE(0, ruleset_fd);
- for (i = 0; i < 64; i++)
+ for (i = 0; i < 16; i++)
enforce_ruleset(_metadata, ruleset_fd);
for (i = 0; i < 2; i++) {
@@ -1097,15 +1284,15 @@ TEST_F_FORK(layout1, empty_or_same_ruleset)
int ruleset_fd;
/* Tests empty handled_access_fs. */
- ruleset_fd = landlock_create_ruleset(&ruleset_attr,
- sizeof(ruleset_attr), 0);
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
ASSERT_LE(-1, ruleset_fd);
ASSERT_EQ(ENOMSG, errno);
/* Enforces policy which deny read access to all files. */
ruleset_attr.handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE;
- ruleset_fd = landlock_create_ruleset(&ruleset_attr,
- sizeof(ruleset_attr), 0);
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
ASSERT_LE(0, ruleset_fd);
enforce_ruleset(_metadata, ruleset_fd);
ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
@@ -1113,8 +1300,8 @@ TEST_F_FORK(layout1, empty_or_same_ruleset)
/* Nests a policy which deny read access to all directories. */
ruleset_attr.handled_access_fs = LANDLOCK_ACCESS_FS_READ_DIR;
- ruleset_fd = landlock_create_ruleset(&ruleset_attr,
- sizeof(ruleset_attr), 0);
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
ASSERT_LE(0, ruleset_fd);
enforce_ruleset(_metadata, ruleset_fd);
ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
@@ -1137,7 +1324,7 @@ TEST_F_FORK(layout1, rule_on_mountpoint)
.path = dir_s3d2,
.access = ACCESS_RO,
},
- {}
+ {},
};
const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
@@ -1166,7 +1353,7 @@ TEST_F_FORK(layout1, rule_over_mountpoint)
.path = dir_s3d1,
.access = ACCESS_RO,
},
- {}
+ {},
};
const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
@@ -1194,7 +1381,7 @@ TEST_F_FORK(layout1, rule_over_root_allow_then_deny)
.path = "/",
.access = ACCESS_RO,
},
- {}
+ {},
};
int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
@@ -1224,7 +1411,7 @@ TEST_F_FORK(layout1, rule_over_root_deny)
.path = "/",
.access = LANDLOCK_ACCESS_FS_READ_FILE,
},
- {}
+ {},
};
const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
@@ -1244,12 +1431,13 @@ TEST_F_FORK(layout1, rule_inside_mount_ns)
.path = "s3d3",
.access = ACCESS_RO,
},
- {}
+ {},
};
int ruleset_fd;
set_cap(_metadata, CAP_SYS_ADMIN);
- ASSERT_EQ(0, syscall(SYS_pivot_root, dir_s3d2, dir_s3d3)) {
+ ASSERT_EQ(0, syscall(__NR_pivot_root, dir_s3d2, dir_s3d3))
+ {
TH_LOG("Failed to pivot root: %s", strerror(errno));
};
ASSERT_EQ(0, chdir("/"));
@@ -1271,7 +1459,7 @@ TEST_F_FORK(layout1, mount_and_pivot)
.path = dir_s3d2,
.access = ACCESS_RO,
},
- {}
+ {},
};
const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
@@ -1282,7 +1470,7 @@ TEST_F_FORK(layout1, mount_and_pivot)
set_cap(_metadata, CAP_SYS_ADMIN);
ASSERT_EQ(-1, mount(NULL, dir_s3d2, NULL, MS_RDONLY, NULL));
ASSERT_EQ(EPERM, errno);
- ASSERT_EQ(-1, syscall(SYS_pivot_root, dir_s3d2, dir_s3d3));
+ ASSERT_EQ(-1, syscall(__NR_pivot_root, dir_s3d2, dir_s3d3));
ASSERT_EQ(EPERM, errno);
clear_cap(_metadata, CAP_SYS_ADMIN);
}
@@ -1294,28 +1482,29 @@ TEST_F_FORK(layout1, move_mount)
.path = dir_s3d2,
.access = ACCESS_RO,
},
- {}
+ {},
};
const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
ASSERT_LE(0, ruleset_fd);
set_cap(_metadata, CAP_SYS_ADMIN);
- ASSERT_EQ(0, syscall(SYS_move_mount, AT_FDCWD, dir_s3d2, AT_FDCWD,
- dir_s1d2, 0)) {
+ ASSERT_EQ(0, syscall(__NR_move_mount, AT_FDCWD, dir_s3d2, AT_FDCWD,
+ dir_s1d2, 0))
+ {
TH_LOG("Failed to move mount: %s", strerror(errno));
}
- ASSERT_EQ(0, syscall(SYS_move_mount, AT_FDCWD, dir_s1d2, AT_FDCWD,
- dir_s3d2, 0));
+ ASSERT_EQ(0, syscall(__NR_move_mount, AT_FDCWD, dir_s1d2, AT_FDCWD,
+ dir_s3d2, 0));
clear_cap(_metadata, CAP_SYS_ADMIN);
enforce_ruleset(_metadata, ruleset_fd);
ASSERT_EQ(0, close(ruleset_fd));
set_cap(_metadata, CAP_SYS_ADMIN);
- ASSERT_EQ(-1, syscall(SYS_move_mount, AT_FDCWD, dir_s3d2, AT_FDCWD,
- dir_s1d2, 0));
+ ASSERT_EQ(-1, syscall(__NR_move_mount, AT_FDCWD, dir_s3d2, AT_FDCWD,
+ dir_s1d2, 0));
ASSERT_EQ(EPERM, errno);
clear_cap(_metadata, CAP_SYS_ADMIN);
}
@@ -1335,7 +1524,7 @@ TEST_F_FORK(layout1, release_inodes)
.path = dir_s3d3,
.access = ACCESS_RO,
},
- {}
+ {},
};
const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
@@ -1362,7 +1551,7 @@ enum relative_access {
};
static void test_relative_path(struct __test_metadata *const _metadata,
- const enum relative_access rel)
+ const enum relative_access rel)
{
/*
* Common layer to check that chroot doesn't ignore it (i.e. a chroot
@@ -1373,7 +1562,7 @@ static void test_relative_path(struct __test_metadata *const _metadata,
.path = TMP_DIR,
.access = ACCESS_RO,
},
- {}
+ {},
};
const struct rule layer2_subs[] = {
{
@@ -1384,7 +1573,7 @@ static void test_relative_path(struct __test_metadata *const _metadata,
.path = dir_s2d2,
.access = ACCESS_RO,
},
- {}
+ {},
};
int dirfd, ruleset_fd;
@@ -1425,14 +1614,16 @@ static void test_relative_path(struct __test_metadata *const _metadata,
break;
case REL_CHROOT_ONLY:
/* Do chroot into dir_s1d2 (relative to dir_s2d2). */
- ASSERT_EQ(0, chroot("../../s1d1/s1d2")) {
+ ASSERT_EQ(0, chroot("../../s1d1/s1d2"))
+ {
TH_LOG("Failed to chroot: %s", strerror(errno));
}
dirfd = AT_FDCWD;
break;
case REL_CHROOT_CHDIR:
/* Do chroot into dir_s1d2. */
- ASSERT_EQ(0, chroot(".")) {
+ ASSERT_EQ(0, chroot("."))
+ {
TH_LOG("Failed to chroot: %s", strerror(errno));
}
dirfd = AT_FDCWD;
@@ -1440,7 +1631,7 @@ static void test_relative_path(struct __test_metadata *const _metadata,
}
ASSERT_EQ((rel == REL_CHROOT_CHDIR) ? 0 : EACCES,
- test_open_rel(dirfd, "..", O_RDONLY));
+ test_open_rel(dirfd, "..", O_RDONLY));
ASSERT_EQ(0, test_open_rel(dirfd, ".", O_RDONLY));
if (rel == REL_CHROOT_ONLY) {
@@ -1462,11 +1653,13 @@ static void test_relative_path(struct __test_metadata *const _metadata,
if (rel != REL_CHROOT_CHDIR) {
ASSERT_EQ(EACCES, test_open_rel(dirfd, "../../s1d1", O_RDONLY));
ASSERT_EQ(0, test_open_rel(dirfd, "../../s1d1/s1d2", O_RDONLY));
- ASSERT_EQ(0, test_open_rel(dirfd, "../../s1d1/s1d2/s1d3", O_RDONLY));
+ ASSERT_EQ(0, test_open_rel(dirfd, "../../s1d1/s1d2/s1d3",
+ O_RDONLY));
ASSERT_EQ(EACCES, test_open_rel(dirfd, "../../s2d1", O_RDONLY));
ASSERT_EQ(0, test_open_rel(dirfd, "../../s2d1/s2d2", O_RDONLY));
- ASSERT_EQ(0, test_open_rel(dirfd, "../../s2d1/s2d2/s2d3", O_RDONLY));
+ ASSERT_EQ(0, test_open_rel(dirfd, "../../s2d1/s2d2/s2d3",
+ O_RDONLY));
}
if (rel == REL_OPEN)
@@ -1495,40 +1688,42 @@ TEST_F_FORK(layout1, relative_chroot_chdir)
}
static void copy_binary(struct __test_metadata *const _metadata,
- const char *const dst_path)
+ const char *const dst_path)
{
int dst_fd, src_fd;
struct stat statbuf;
dst_fd = open(dst_path, O_WRONLY | O_TRUNC | O_CLOEXEC);
- ASSERT_LE(0, dst_fd) {
- TH_LOG("Failed to open \"%s\": %s", dst_path,
- strerror(errno));
+ ASSERT_LE(0, dst_fd)
+ {
+ TH_LOG("Failed to open \"%s\": %s", dst_path, strerror(errno));
}
src_fd = open(BINARY_PATH, O_RDONLY | O_CLOEXEC);
- ASSERT_LE(0, src_fd) {
+ ASSERT_LE(0, src_fd)
+ {
TH_LOG("Failed to open \"" BINARY_PATH "\": %s",
- strerror(errno));
+ strerror(errno));
}
ASSERT_EQ(0, fstat(src_fd, &statbuf));
- ASSERT_EQ(statbuf.st_size, sendfile(dst_fd, src_fd, 0,
- statbuf.st_size));
+ ASSERT_EQ(statbuf.st_size,
+ sendfile(dst_fd, src_fd, 0, statbuf.st_size));
ASSERT_EQ(0, close(src_fd));
ASSERT_EQ(0, close(dst_fd));
}
-static void test_execute(struct __test_metadata *const _metadata,
- const int err, const char *const path)
+static void test_execute(struct __test_metadata *const _metadata, const int err,
+ const char *const path)
{
int status;
- char *const argv[] = {(char *)path, NULL};
+ char *const argv[] = { (char *)path, NULL };
const pid_t child = fork();
ASSERT_LE(0, child);
if (child == 0) {
- ASSERT_EQ(err ? -1 : 0, execve(path, argv, NULL)) {
+ ASSERT_EQ(err ? -1 : 0, execve(path, argv, NULL))
+ {
TH_LOG("Failed to execute \"%s\": %s", path,
- strerror(errno));
+ strerror(errno));
};
ASSERT_EQ(err, errno);
_exit(_metadata->passed ? 2 : 1);
@@ -1536,9 +1731,10 @@ static void test_execute(struct __test_metadata *const _metadata,
}
ASSERT_EQ(child, waitpid(child, &status, 0));
ASSERT_EQ(1, WIFEXITED(status));
- ASSERT_EQ(err ? 2 : 0, WEXITSTATUS(status)) {
+ ASSERT_EQ(err ? 2 : 0, WEXITSTATUS(status))
+ {
TH_LOG("Unexpected return code for \"%s\": %s", path,
- strerror(errno));
+ strerror(errno));
};
}
@@ -1549,10 +1745,10 @@ TEST_F_FORK(layout1, execute)
.path = dir_s1d2,
.access = LANDLOCK_ACCESS_FS_EXECUTE,
},
- {}
+ {},
};
- const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
- rules);
+ const int ruleset_fd =
+ create_ruleset(_metadata, rules[0].access, rules);
ASSERT_LE(0, ruleset_fd);
copy_binary(_metadata, file1_s1d1);
@@ -1577,15 +1773,21 @@ TEST_F_FORK(layout1, execute)
TEST_F_FORK(layout1, link)
{
- const struct rule rules[] = {
+ const struct rule layer1[] = {
{
.path = dir_s1d2,
.access = LANDLOCK_ACCESS_FS_MAKE_REG,
},
- {}
+ {},
};
- const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
- rules);
+ const struct rule layer2[] = {
+ {
+ .path = dir_s1d3,
+ .access = LANDLOCK_ACCESS_FS_REMOVE_FILE,
+ },
+ {},
+ };
+ int ruleset_fd = create_ruleset(_metadata, layer1[0].access, layer1);
ASSERT_LE(0, ruleset_fd);
@@ -1598,14 +1800,30 @@ TEST_F_FORK(layout1, link)
ASSERT_EQ(-1, link(file2_s1d1, file1_s1d1));
ASSERT_EQ(EACCES, errno);
+
/* Denies linking because of reparenting. */
ASSERT_EQ(-1, link(file1_s2d1, file1_s1d2));
ASSERT_EQ(EXDEV, errno);
ASSERT_EQ(-1, link(file2_s1d2, file1_s1d3));
ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(-1, link(file2_s1d3, file1_s1d2));
+ ASSERT_EQ(EXDEV, errno);
ASSERT_EQ(0, link(file2_s1d2, file1_s1d2));
ASSERT_EQ(0, link(file2_s1d3, file1_s1d3));
+
+ /* Prepares for next unlinks. */
+ ASSERT_EQ(0, unlink(file2_s1d2));
+ ASSERT_EQ(0, unlink(file2_s1d3));
+
+ ruleset_fd = create_ruleset(_metadata, layer2[0].access, layer2);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks that linkind doesn't require the ability to delete a file. */
+ ASSERT_EQ(0, link(file1_s1d2, file2_s1d2));
+ ASSERT_EQ(0, link(file1_s1d3, file2_s1d3));
}
TEST_F_FORK(layout1, rename_file)
@@ -1619,14 +1837,13 @@ TEST_F_FORK(layout1, rename_file)
.path = dir_s2d2,
.access = LANDLOCK_ACCESS_FS_REMOVE_FILE,
},
- {}
+ {},
};
- const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
- rules);
+ const int ruleset_fd =
+ create_ruleset(_metadata, rules[0].access, rules);
ASSERT_LE(0, ruleset_fd);
- ASSERT_EQ(0, unlink(file1_s1d1));
ASSERT_EQ(0, unlink(file1_s1d2));
enforce_ruleset(_metadata, ruleset_fd);
@@ -1662,9 +1879,15 @@ TEST_F_FORK(layout1, rename_file)
ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s2d2, AT_FDCWD, file1_s2d1,
RENAME_EXCHANGE));
ASSERT_EQ(EACCES, errno);
+ /* Checks that file1_s2d1 cannot be removed (instead of ENOTDIR). */
+ ASSERT_EQ(-1, rename(dir_s2d2, file1_s2d1));
+ ASSERT_EQ(EACCES, errno);
ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d1, AT_FDCWD, dir_s2d2,
RENAME_EXCHANGE));
ASSERT_EQ(EACCES, errno);
+ /* Checks that file1_s1d1 cannot be removed (instead of EISDIR). */
+ ASSERT_EQ(-1, rename(file1_s1d1, dir_s1d2));
+ ASSERT_EQ(EACCES, errno);
/* Renames files with different parents. */
ASSERT_EQ(-1, rename(file1_s2d2, file1_s1d2));
@@ -1675,14 +1898,14 @@ TEST_F_FORK(layout1, rename_file)
/* Exchanges and renames files with same parent. */
ASSERT_EQ(0, renameat2(AT_FDCWD, file2_s2d3, AT_FDCWD, file1_s2d3,
- RENAME_EXCHANGE));
+ RENAME_EXCHANGE));
ASSERT_EQ(0, rename(file2_s2d3, file1_s2d3));
/* Exchanges files and directories with same parent, twice. */
ASSERT_EQ(0, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_s2d3,
- RENAME_EXCHANGE));
+ RENAME_EXCHANGE));
ASSERT_EQ(0, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_s2d3,
- RENAME_EXCHANGE));
+ RENAME_EXCHANGE));
}
TEST_F_FORK(layout1, rename_dir)
@@ -1696,10 +1919,10 @@ TEST_F_FORK(layout1, rename_dir)
.path = dir_s2d1,
.access = LANDLOCK_ACCESS_FS_REMOVE_DIR,
},
- {}
+ {},
};
- const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
- rules);
+ const int ruleset_fd =
+ create_ruleset(_metadata, rules[0].access, rules);
ASSERT_LE(0, ruleset_fd);
@@ -1727,22 +1950,743 @@ TEST_F_FORK(layout1, rename_dir)
ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s1d1, AT_FDCWD, dir_s2d1,
RENAME_EXCHANGE));
ASSERT_EQ(EACCES, errno);
+ /* Checks that dir_s1d2 cannot be removed (instead of ENOTDIR). */
+ ASSERT_EQ(-1, rename(dir_s1d2, file1_s1d1));
+ ASSERT_EQ(EACCES, errno);
ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s1d1, AT_FDCWD, dir_s1d2,
RENAME_EXCHANGE));
ASSERT_EQ(EACCES, errno);
+ /* Checks that dir_s1d2 cannot be removed (instead of EISDIR). */
+ ASSERT_EQ(-1, rename(file1_s1d1, dir_s1d2));
+ ASSERT_EQ(EACCES, errno);
/*
* Exchanges and renames directory to the same parent, which allows
* directory removal.
*/
ASSERT_EQ(0, renameat2(AT_FDCWD, dir_s1d3, AT_FDCWD, file1_s1d2,
- RENAME_EXCHANGE));
+ RENAME_EXCHANGE));
ASSERT_EQ(0, unlink(dir_s1d3));
ASSERT_EQ(0, mkdir(dir_s1d3, 0700));
ASSERT_EQ(0, rename(file1_s1d2, dir_s1d3));
ASSERT_EQ(0, rmdir(dir_s1d3));
}
+TEST_F_FORK(layout1, reparent_refer)
+{
+ const struct rule layer1[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_REFER,
+ },
+ {
+ .path = dir_s2d2,
+ .access = LANDLOCK_ACCESS_FS_REFER,
+ },
+ {},
+ };
+ int ruleset_fd =
+ create_ruleset(_metadata, LANDLOCK_ACCESS_FS_REFER, layer1);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ ASSERT_EQ(-1, rename(dir_s1d2, dir_s2d1));
+ ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(-1, rename(dir_s1d2, dir_s2d2));
+ ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(-1, rename(dir_s1d2, dir_s2d3));
+ ASSERT_EQ(EXDEV, errno);
+
+ ASSERT_EQ(-1, rename(dir_s1d3, dir_s2d1));
+ ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(-1, rename(dir_s1d3, dir_s2d2));
+ ASSERT_EQ(EXDEV, errno);
+ /*
+ * Moving should only be allowed when the source and the destination
+ * parent directory have REFER.
+ */
+ ASSERT_EQ(-1, rename(dir_s1d3, dir_s2d3));
+ ASSERT_EQ(ENOTEMPTY, errno);
+ ASSERT_EQ(0, unlink(file1_s2d3));
+ ASSERT_EQ(0, unlink(file2_s2d3));
+ ASSERT_EQ(0, rename(dir_s1d3, dir_s2d3));
+}
+
+TEST_F_FORK(layout1, reparent_link)
+{
+ const struct rule layer1[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_MAKE_REG,
+ },
+ {
+ .path = dir_s1d3,
+ .access = LANDLOCK_ACCESS_FS_REFER,
+ },
+ {
+ .path = dir_s2d2,
+ .access = LANDLOCK_ACCESS_FS_REFER,
+ },
+ {
+ .path = dir_s2d3,
+ .access = LANDLOCK_ACCESS_FS_MAKE_REG,
+ },
+ {},
+ };
+ const int ruleset_fd = create_ruleset(
+ _metadata,
+ LANDLOCK_ACCESS_FS_MAKE_REG | LANDLOCK_ACCESS_FS_REFER, layer1);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ ASSERT_EQ(0, unlink(file1_s1d1));
+ ASSERT_EQ(0, unlink(file1_s1d2));
+ ASSERT_EQ(0, unlink(file1_s1d3));
+
+ /* Denies linking because of missing MAKE_REG. */
+ ASSERT_EQ(-1, link(file2_s1d1, file1_s1d1));
+ ASSERT_EQ(EACCES, errno);
+ /* Denies linking because of missing source and destination REFER. */
+ ASSERT_EQ(-1, link(file1_s2d1, file1_s1d2));
+ ASSERT_EQ(EXDEV, errno);
+ /* Denies linking because of missing source REFER. */
+ ASSERT_EQ(-1, link(file1_s2d1, file1_s1d3));
+ ASSERT_EQ(EXDEV, errno);
+
+ /* Denies linking because of missing MAKE_REG. */
+ ASSERT_EQ(-1, link(file1_s2d2, file1_s1d1));
+ ASSERT_EQ(EACCES, errno);
+ /* Denies linking because of missing destination REFER. */
+ ASSERT_EQ(-1, link(file1_s2d2, file1_s1d2));
+ ASSERT_EQ(EXDEV, errno);
+
+ /* Allows linking because of REFER and MAKE_REG. */
+ ASSERT_EQ(0, link(file1_s2d2, file1_s1d3));
+ ASSERT_EQ(0, unlink(file1_s2d2));
+ /* Reverse linking denied because of missing MAKE_REG. */
+ ASSERT_EQ(-1, link(file1_s1d3, file1_s2d2));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(0, unlink(file1_s2d3));
+ /* Checks reverse linking. */
+ ASSERT_EQ(0, link(file1_s1d3, file1_s2d3));
+ ASSERT_EQ(0, unlink(file1_s1d3));
+
+ /*
+ * This is OK for a file link, but it should not be allowed for a
+ * directory rename (because of the superset of access rights.
+ */
+ ASSERT_EQ(0, link(file1_s2d3, file1_s1d3));
+ ASSERT_EQ(0, unlink(file1_s1d3));
+
+ ASSERT_EQ(-1, link(file2_s1d2, file1_s1d3));
+ ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(-1, link(file2_s1d3, file1_s1d2));
+ ASSERT_EQ(EXDEV, errno);
+
+ ASSERT_EQ(0, link(file2_s1d2, file1_s1d2));
+ ASSERT_EQ(0, link(file2_s1d3, file1_s1d3));
+}
+
+TEST_F_FORK(layout1, reparent_rename)
+{
+ /* Same rules as for reparent_link. */
+ const struct rule layer1[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_MAKE_REG,
+ },
+ {
+ .path = dir_s1d3,
+ .access = LANDLOCK_ACCESS_FS_REFER,
+ },
+ {
+ .path = dir_s2d2,
+ .access = LANDLOCK_ACCESS_FS_REFER,
+ },
+ {
+ .path = dir_s2d3,
+ .access = LANDLOCK_ACCESS_FS_MAKE_REG,
+ },
+ {},
+ };
+ const int ruleset_fd = create_ruleset(
+ _metadata,
+ LANDLOCK_ACCESS_FS_MAKE_REG | LANDLOCK_ACCESS_FS_REFER, layer1);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ ASSERT_EQ(0, unlink(file1_s1d2));
+ ASSERT_EQ(0, unlink(file1_s1d3));
+
+ /* Denies renaming because of missing MAKE_REG. */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file2_s1d1, AT_FDCWD, file1_s1d1,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s1d1, AT_FDCWD, file2_s1d1,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(0, unlink(file1_s1d1));
+ ASSERT_EQ(-1, rename(file2_s1d1, file1_s1d1));
+ ASSERT_EQ(EACCES, errno);
+ /* Even denies same file exchange. */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file2_s1d1, AT_FDCWD, file2_s1d1,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+
+ /* Denies renaming because of missing source and destination REFER. */
+ ASSERT_EQ(-1, rename(file1_s2d1, file1_s1d2));
+ ASSERT_EQ(EXDEV, errno);
+ /*
+ * Denies renaming because of missing MAKE_REG, source and destination
+ * REFER.
+ */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d1, AT_FDCWD, file2_s1d1,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file2_s1d1, AT_FDCWD, file1_s2d1,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+
+ /* Denies renaming because of missing source REFER. */
+ ASSERT_EQ(-1, rename(file1_s2d1, file1_s1d3));
+ ASSERT_EQ(EXDEV, errno);
+ /* Denies renaming because of missing MAKE_REG. */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d1, AT_FDCWD, file2_s1d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+
+ /* Denies renaming because of missing MAKE_REG. */
+ ASSERT_EQ(-1, rename(file1_s2d2, file1_s1d1));
+ ASSERT_EQ(EACCES, errno);
+ /* Denies renaming because of missing destination REFER*/
+ ASSERT_EQ(-1, rename(file1_s2d2, file1_s1d2));
+ ASSERT_EQ(EXDEV, errno);
+
+ /* Denies exchange because of one missing MAKE_REG. */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, file2_s1d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+ /* Allows renaming because of REFER and MAKE_REG. */
+ ASSERT_EQ(0, rename(file1_s2d2, file1_s1d3));
+
+ /* Reverse renaming denied because of missing MAKE_REG. */
+ ASSERT_EQ(-1, rename(file1_s1d3, file1_s2d2));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(0, unlink(file1_s2d3));
+ ASSERT_EQ(0, rename(file1_s1d3, file1_s2d3));
+
+ /* Tests reverse renaming. */
+ ASSERT_EQ(0, rename(file1_s2d3, file1_s1d3));
+ ASSERT_EQ(0, renameat2(AT_FDCWD, file2_s2d3, AT_FDCWD, file1_s1d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(0, rename(file1_s1d3, file1_s2d3));
+
+ /*
+ * This is OK for a file rename, but it should not be allowed for a
+ * directory rename (because of the superset of access rights).
+ */
+ ASSERT_EQ(0, rename(file1_s2d3, file1_s1d3));
+ ASSERT_EQ(0, rename(file1_s1d3, file1_s2d3));
+
+ /*
+ * Tests superset restrictions applied to directories. Not only the
+ * dir_s2d3's parent (dir_s2d2) should be taken into account but also
+ * access rights tied to dir_s2d3. dir_s2d2 is missing one access right
+ * compared to dir_s1d3/file1_s1d3 (MAKE_REG) but it is provided
+ * directly by the moved dir_s2d3.
+ */
+ ASSERT_EQ(0, rename(dir_s2d3, file1_s1d3));
+ ASSERT_EQ(0, rename(file1_s1d3, dir_s2d3));
+ /*
+ * The first rename is allowed but not the exchange because dir_s1d3's
+ * parent (dir_s1d2) doesn't have REFER.
+ */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d3, AT_FDCWD, dir_s1d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s1d3, AT_FDCWD, file1_s2d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(-1, rename(file1_s2d3, dir_s1d3));
+ ASSERT_EQ(EXDEV, errno);
+
+ ASSERT_EQ(-1, rename(file2_s1d2, file1_s1d3));
+ ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(-1, rename(file2_s1d3, file1_s1d2));
+ ASSERT_EQ(EXDEV, errno);
+
+ /* Renaming in the same directory is always allowed. */
+ ASSERT_EQ(0, rename(file2_s1d2, file1_s1d2));
+ ASSERT_EQ(0, rename(file2_s1d3, file1_s1d3));
+
+ ASSERT_EQ(0, unlink(file1_s1d2));
+ /* Denies because of missing source MAKE_REG and destination REFER. */
+ ASSERT_EQ(-1, rename(dir_s2d3, file1_s1d2));
+ ASSERT_EQ(EXDEV, errno);
+
+ ASSERT_EQ(0, unlink(file1_s1d3));
+ /* Denies because of missing source MAKE_REG and REFER. */
+ ASSERT_EQ(-1, rename(dir_s2d2, file1_s1d3));
+ ASSERT_EQ(EXDEV, errno);
+}
+
+static void
+reparent_exdev_layers_enforce1(struct __test_metadata *const _metadata)
+{
+ const struct rule layer1[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_REFER,
+ },
+ {
+ /* Interesting for the layer2 tests. */
+ .path = dir_s1d3,
+ .access = LANDLOCK_ACCESS_FS_MAKE_REG,
+ },
+ {
+ .path = dir_s2d2,
+ .access = LANDLOCK_ACCESS_FS_REFER,
+ },
+ {
+ .path = dir_s2d3,
+ .access = LANDLOCK_ACCESS_FS_MAKE_REG,
+ },
+ {},
+ };
+ const int ruleset_fd = create_ruleset(
+ _metadata,
+ LANDLOCK_ACCESS_FS_MAKE_REG | LANDLOCK_ACCESS_FS_REFER, layer1);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+}
+
+static void
+reparent_exdev_layers_enforce2(struct __test_metadata *const _metadata)
+{
+ const struct rule layer2[] = {
+ {
+ .path = dir_s2d3,
+ .access = LANDLOCK_ACCESS_FS_MAKE_DIR,
+ },
+ {},
+ };
+ /*
+ * Same checks as before but with a second layer and a new MAKE_DIR
+ * rule (and no explicit handling of REFER).
+ */
+ const int ruleset_fd =
+ create_ruleset(_metadata, LANDLOCK_ACCESS_FS_MAKE_DIR, layer2);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+}
+
+TEST_F_FORK(layout1, reparent_exdev_layers_rename1)
+{
+ ASSERT_EQ(0, unlink(file1_s2d2));
+ ASSERT_EQ(0, unlink(file1_s2d3));
+
+ reparent_exdev_layers_enforce1(_metadata);
+
+ /*
+ * Moving the dir_s1d3 directory below dir_s2d2 is allowed by Landlock
+ * because it doesn't inherit new access rights.
+ */
+ ASSERT_EQ(0, rename(dir_s1d3, file1_s2d2));
+ ASSERT_EQ(0, rename(file1_s2d2, dir_s1d3));
+
+ /*
+ * Moving the dir_s1d3 directory below dir_s2d3 is allowed, even if it
+ * gets a new inherited access rights (MAKE_REG), because MAKE_REG is
+ * already allowed for dir_s1d3.
+ */
+ ASSERT_EQ(0, rename(dir_s1d3, file1_s2d3));
+ ASSERT_EQ(0, rename(file1_s2d3, dir_s1d3));
+
+ /*
+ * However, moving the file1_s1d3 file below dir_s2d3 is allowed
+ * because it cannot inherit MAKE_REG right (which is dedicated to
+ * directories).
+ */
+ ASSERT_EQ(0, rename(file1_s1d3, file1_s2d3));
+
+ reparent_exdev_layers_enforce2(_metadata);
+
+ /*
+ * Moving the dir_s1d3 directory below dir_s2d2 is now denied because
+ * MAKE_DIR is not tied to dir_s2d2.
+ */
+ ASSERT_EQ(-1, rename(dir_s1d3, file1_s2d2));
+ ASSERT_EQ(EACCES, errno);
+
+ /*
+ * Moving the dir_s1d3 directory below dir_s2d3 is forbidden because it
+ * would grants MAKE_REG and MAKE_DIR rights to it.
+ */
+ ASSERT_EQ(-1, rename(dir_s1d3, file1_s2d3));
+ ASSERT_EQ(EXDEV, errno);
+
+ /*
+ * However, moving the file2_s1d3 file below dir_s2d3 is allowed
+ * because it cannot inherit MAKE_REG nor MAKE_DIR rights (which are
+ * dedicated to directories).
+ */
+ ASSERT_EQ(0, rename(file2_s1d3, file1_s2d3));
+}
+
+TEST_F_FORK(layout1, reparent_exdev_layers_rename2)
+{
+ reparent_exdev_layers_enforce1(_metadata);
+
+ /* Checks EACCES predominance over EXDEV. */
+ ASSERT_EQ(-1, rename(file1_s1d1, file1_s2d2));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, rename(file1_s1d2, file1_s2d2));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, rename(file1_s1d1, file1_s2d3));
+ ASSERT_EQ(EXDEV, errno);
+ /* Modify layout! */
+ ASSERT_EQ(0, rename(file1_s1d2, file1_s2d3));
+
+ /* Without REFER source. */
+ ASSERT_EQ(-1, rename(dir_s1d1, file1_s2d2));
+ ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(-1, rename(dir_s1d2, file1_s2d2));
+ ASSERT_EQ(EXDEV, errno);
+
+ reparent_exdev_layers_enforce2(_metadata);
+
+ /* Checks EACCES predominance over EXDEV. */
+ ASSERT_EQ(-1, rename(file1_s1d1, file1_s2d2));
+ ASSERT_EQ(EACCES, errno);
+ /* Checks with actual file2_s1d2. */
+ ASSERT_EQ(-1, rename(file2_s1d2, file1_s2d2));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, rename(file1_s1d1, file1_s2d3));
+ ASSERT_EQ(EXDEV, errno);
+ /* Modify layout! */
+ ASSERT_EQ(0, rename(file2_s1d2, file1_s2d3));
+
+ /* Without REFER source, EACCES wins over EXDEV. */
+ ASSERT_EQ(-1, rename(dir_s1d1, file1_s2d2));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, rename(dir_s1d2, file1_s2d2));
+ ASSERT_EQ(EACCES, errno);
+}
+
+TEST_F_FORK(layout1, reparent_exdev_layers_exchange1)
+{
+ const char *const dir_file1_s1d2 = file1_s1d2, *const dir_file2_s2d3 =
+ file2_s2d3;
+
+ ASSERT_EQ(0, unlink(file1_s1d2));
+ ASSERT_EQ(0, mkdir(file1_s1d2, 0700));
+ ASSERT_EQ(0, unlink(file2_s2d3));
+ ASSERT_EQ(0, mkdir(file2_s2d3, 0700));
+
+ reparent_exdev_layers_enforce1(_metadata);
+
+ /* Error predominance with file exchange: returns EXDEV and EACCES. */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s1d1, AT_FDCWD, file1_s2d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d3, AT_FDCWD, file1_s1d1,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+
+ /*
+ * Checks with directories which creation could be allowed, but denied
+ * because of access rights that would be inherited.
+ */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file1_s1d2, AT_FDCWD,
+ dir_file2_s2d3, RENAME_EXCHANGE));
+ ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file2_s2d3, AT_FDCWD,
+ dir_file1_s1d2, RENAME_EXCHANGE));
+ ASSERT_EQ(EXDEV, errno);
+
+ /* Checks with same access rights. */
+ ASSERT_EQ(0, renameat2(AT_FDCWD, dir_s1d3, AT_FDCWD, dir_s2d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(0, renameat2(AT_FDCWD, dir_s2d3, AT_FDCWD, dir_s1d3,
+ RENAME_EXCHANGE));
+
+ /* Checks with different (child-only) access rights. */
+ ASSERT_EQ(0, renameat2(AT_FDCWD, dir_s2d3, AT_FDCWD, dir_file1_s1d2,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(0, renameat2(AT_FDCWD, dir_file1_s1d2, AT_FDCWD, dir_s2d3,
+ RENAME_EXCHANGE));
+
+ /*
+ * Checks that exchange between file and directory are consistent.
+ *
+ * Moving a file (file1_s2d2) to a directory which only grants more
+ * directory-related access rights is allowed, and at the same time
+ * moving a directory (dir_file2_s2d3) to another directory which
+ * grants less access rights is allowed too.
+ *
+ * See layout1.reparent_exdev_layers_exchange3 for inverted arguments.
+ */
+ ASSERT_EQ(0, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_file2_s2d3,
+ RENAME_EXCHANGE));
+ /*
+ * However, moving back the directory is denied because it would get
+ * more access rights than the current state and because file creation
+ * is forbidden (in dir_s2d2).
+ */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file2_s2d3, AT_FDCWD, file1_s2d2,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_file2_s2d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+
+ reparent_exdev_layers_enforce2(_metadata);
+
+ /* Error predominance with file exchange: returns EXDEV and EACCES. */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s1d1, AT_FDCWD, file1_s2d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d3, AT_FDCWD, file1_s1d1,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+
+ /* Checks with directories which creation is now denied. */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file1_s1d2, AT_FDCWD,
+ dir_file2_s2d3, RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file2_s2d3, AT_FDCWD,
+ dir_file1_s1d2, RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+
+ /* Checks with different (child-only) access rights. */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s1d3, AT_FDCWD, dir_s2d3,
+ RENAME_EXCHANGE));
+ /* Denied because of MAKE_DIR. */
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s2d3, AT_FDCWD, dir_s1d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+
+ /* Checks with different (child-only) access rights. */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s2d3, AT_FDCWD, dir_file1_s1d2,
+ RENAME_EXCHANGE));
+ /* Denied because of MAKE_DIR. */
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file1_s1d2, AT_FDCWD, dir_s2d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+
+ /* See layout1.reparent_exdev_layers_exchange2 for complement. */
+}
+
+TEST_F_FORK(layout1, reparent_exdev_layers_exchange2)
+{
+ const char *const dir_file2_s2d3 = file2_s2d3;
+
+ ASSERT_EQ(0, unlink(file2_s2d3));
+ ASSERT_EQ(0, mkdir(file2_s2d3, 0700));
+
+ reparent_exdev_layers_enforce1(_metadata);
+ reparent_exdev_layers_enforce2(_metadata);
+
+ /* Checks that exchange between file and directory are consistent. */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_file2_s2d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file2_s2d3, AT_FDCWD, file1_s2d2,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+}
+
+TEST_F_FORK(layout1, reparent_exdev_layers_exchange3)
+{
+ const char *const dir_file2_s2d3 = file2_s2d3;
+
+ ASSERT_EQ(0, unlink(file2_s2d3));
+ ASSERT_EQ(0, mkdir(file2_s2d3, 0700));
+
+ reparent_exdev_layers_enforce1(_metadata);
+
+ /*
+ * Checks that exchange between file and directory are consistent,
+ * including with inverted arguments (see
+ * layout1.reparent_exdev_layers_exchange1).
+ */
+ ASSERT_EQ(0, renameat2(AT_FDCWD, dir_file2_s2d3, AT_FDCWD, file1_s2d2,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_file2_s2d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file2_s2d3, AT_FDCWD, file1_s2d2,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+}
+
+TEST_F_FORK(layout1, reparent_remove)
+{
+ const struct rule layer1[] = {
+ {
+ .path = dir_s1d1,
+ .access = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_REMOVE_DIR,
+ },
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_REMOVE_FILE,
+ },
+ {
+ .path = dir_s2d1,
+ .access = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE,
+ },
+ {},
+ };
+ const int ruleset_fd = create_ruleset(
+ _metadata,
+ LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_REMOVE_DIR |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE,
+ layer1);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Access denied because of wrong/swapped remove file/dir. */
+ ASSERT_EQ(-1, rename(file1_s1d1, dir_s2d2));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, rename(dir_s2d2, file1_s1d1));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s1d1, AT_FDCWD, dir_s2d2,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s1d1, AT_FDCWD, dir_s2d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+
+ /* Access allowed thanks to the matching rights. */
+ ASSERT_EQ(-1, rename(file1_s2d1, dir_s1d2));
+ ASSERT_EQ(EISDIR, errno);
+ ASSERT_EQ(-1, rename(dir_s1d2, file1_s2d1));
+ ASSERT_EQ(ENOTDIR, errno);
+ ASSERT_EQ(-1, rename(dir_s1d3, file1_s2d1));
+ ASSERT_EQ(ENOTDIR, errno);
+ ASSERT_EQ(0, unlink(file1_s2d1));
+ ASSERT_EQ(0, unlink(file1_s1d3));
+ ASSERT_EQ(0, unlink(file2_s1d3));
+ ASSERT_EQ(0, rename(dir_s1d3, file1_s2d1));
+
+ /* Effectively removes a file and a directory by exchanging them. */
+ ASSERT_EQ(0, mkdir(dir_s1d3, 0700));
+ ASSERT_EQ(0, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_s1d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_s1d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+}
+
+TEST_F_FORK(layout1, reparent_dom_superset)
+{
+ const struct rule layer1[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_REFER,
+ },
+ {
+ .path = file1_s1d2,
+ .access = LANDLOCK_ACCESS_FS_EXECUTE,
+ },
+ {
+ .path = dir_s1d3,
+ .access = LANDLOCK_ACCESS_FS_MAKE_SOCK |
+ LANDLOCK_ACCESS_FS_EXECUTE,
+ },
+ {
+ .path = dir_s2d2,
+ .access = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_EXECUTE |
+ LANDLOCK_ACCESS_FS_MAKE_SOCK,
+ },
+ {
+ .path = dir_s2d3,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_MAKE_FIFO,
+ },
+ {},
+ };
+ int ruleset_fd = create_ruleset(_metadata,
+ LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_EXECUTE |
+ LANDLOCK_ACCESS_FS_MAKE_SOCK |
+ LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_MAKE_FIFO,
+ layer1);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ ASSERT_EQ(-1, rename(file1_s1d2, file1_s2d1));
+ ASSERT_EQ(EXDEV, errno);
+ /*
+ * Moving file1_s1d2 beneath dir_s2d3 would grant it the READ_FILE
+ * access right.
+ */
+ ASSERT_EQ(-1, rename(file1_s1d2, file1_s2d3));
+ ASSERT_EQ(EXDEV, errno);
+ /*
+ * Moving file1_s1d2 should be allowed even if dir_s2d2 grants a
+ * superset of access rights compared to dir_s1d2, because file1_s1d2
+ * already has these access rights anyway.
+ */
+ ASSERT_EQ(0, rename(file1_s1d2, file1_s2d2));
+ ASSERT_EQ(0, rename(file1_s2d2, file1_s1d2));
+
+ ASSERT_EQ(-1, rename(dir_s1d3, file1_s2d1));
+ ASSERT_EQ(EXDEV, errno);
+ /*
+ * Moving dir_s1d3 beneath dir_s2d3 would grant it the MAKE_FIFO access
+ * right.
+ */
+ ASSERT_EQ(-1, rename(dir_s1d3, file1_s2d3));
+ ASSERT_EQ(EXDEV, errno);
+ /*
+ * Moving dir_s1d3 should be allowed even if dir_s2d2 grants a superset
+ * of access rights compared to dir_s1d2, because dir_s1d3 already has
+ * these access rights anyway.
+ */
+ ASSERT_EQ(0, rename(dir_s1d3, file1_s2d2));
+ ASSERT_EQ(0, rename(file1_s2d2, dir_s1d3));
+
+ /*
+ * Moving file1_s2d3 beneath dir_s1d2 is allowed, but moving it back
+ * will be denied because the new inherited access rights from dir_s1d2
+ * will be less than the destination (original) dir_s2d3. This is a
+ * sinkhole scenario where we cannot move back files or directories.
+ */
+ ASSERT_EQ(0, rename(file1_s2d3, file2_s1d2));
+ ASSERT_EQ(-1, rename(file2_s1d2, file1_s2d3));
+ ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(0, unlink(file2_s1d2));
+ ASSERT_EQ(0, unlink(file2_s2d3));
+ /*
+ * Checks similar directory one-way move: dir_s2d3 loses EXECUTE and
+ * MAKE_SOCK which were inherited from dir_s1d3.
+ */
+ ASSERT_EQ(0, rename(dir_s2d3, file2_s1d2));
+ ASSERT_EQ(-1, rename(file2_s1d2, dir_s2d3));
+ ASSERT_EQ(EXDEV, errno);
+}
+
TEST_F_FORK(layout1, remove_dir)
{
const struct rule rules[] = {
@@ -1750,10 +2694,10 @@ TEST_F_FORK(layout1, remove_dir)
.path = dir_s1d2,
.access = LANDLOCK_ACCESS_FS_REMOVE_DIR,
},
- {}
+ {},
};
- const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
- rules);
+ const int ruleset_fd =
+ create_ruleset(_metadata, rules[0].access, rules);
ASSERT_LE(0, ruleset_fd);
@@ -1787,10 +2731,10 @@ TEST_F_FORK(layout1, remove_file)
.path = dir_s1d2,
.access = LANDLOCK_ACCESS_FS_REMOVE_FILE,
},
- {}
+ {},
};
- const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
- rules);
+ const int ruleset_fd =
+ create_ruleset(_metadata, rules[0].access, rules);
ASSERT_LE(0, ruleset_fd);
enforce_ruleset(_metadata, ruleset_fd);
@@ -1805,14 +2749,15 @@ TEST_F_FORK(layout1, remove_file)
}
static void test_make_file(struct __test_metadata *const _metadata,
- const __u64 access, const mode_t mode, const dev_t dev)
+ const __u64 access, const mode_t mode,
+ const dev_t dev)
{
const struct rule rules[] = {
{
.path = dir_s1d2,
.access = access,
},
- {}
+ {},
};
const int ruleset_fd = create_ruleset(_metadata, access, rules);
@@ -1820,9 +2765,10 @@ static void test_make_file(struct __test_metadata *const _metadata,
ASSERT_EQ(0, unlink(file1_s1d1));
ASSERT_EQ(0, unlink(file2_s1d1));
- ASSERT_EQ(0, mknod(file2_s1d1, mode | 0400, dev)) {
- TH_LOG("Failed to make file \"%s\": %s",
- file2_s1d1, strerror(errno));
+ ASSERT_EQ(0, mknod(file2_s1d1, mode | 0400, dev))
+ {
+ TH_LOG("Failed to make file \"%s\": %s", file2_s1d1,
+ strerror(errno));
};
ASSERT_EQ(0, unlink(file1_s1d2));
@@ -1841,9 +2787,10 @@ static void test_make_file(struct __test_metadata *const _metadata,
ASSERT_EQ(-1, rename(file2_s1d1, file1_s1d1));
ASSERT_EQ(EACCES, errno);
- ASSERT_EQ(0, mknod(file1_s1d2, mode | 0400, dev)) {
- TH_LOG("Failed to make file \"%s\": %s",
- file1_s1d2, strerror(errno));
+ ASSERT_EQ(0, mknod(file1_s1d2, mode | 0400, dev))
+ {
+ TH_LOG("Failed to make file \"%s\": %s", file1_s1d2,
+ strerror(errno));
};
ASSERT_EQ(0, link(file1_s1d2, file2_s1d2));
ASSERT_EQ(0, unlink(file2_s1d2));
@@ -1860,7 +2807,7 @@ TEST_F_FORK(layout1, make_char)
/* Creates a /dev/null device. */
set_cap(_metadata, CAP_MKNOD);
test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_CHAR, S_IFCHR,
- makedev(1, 3));
+ makedev(1, 3));
}
TEST_F_FORK(layout1, make_block)
@@ -1868,7 +2815,7 @@ TEST_F_FORK(layout1, make_block)
/* Creates a /dev/loop0 device. */
set_cap(_metadata, CAP_MKNOD);
test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_BLOCK, S_IFBLK,
- makedev(7, 0));
+ makedev(7, 0));
}
TEST_F_FORK(layout1, make_reg_1)
@@ -1898,10 +2845,10 @@ TEST_F_FORK(layout1, make_sym)
.path = dir_s1d2,
.access = LANDLOCK_ACCESS_FS_MAKE_SYM,
},
- {}
+ {},
};
- const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
- rules);
+ const int ruleset_fd =
+ create_ruleset(_metadata, rules[0].access, rules);
ASSERT_LE(0, ruleset_fd);
@@ -1943,10 +2890,10 @@ TEST_F_FORK(layout1, make_dir)
.path = dir_s1d2,
.access = LANDLOCK_ACCESS_FS_MAKE_DIR,
},
- {}
+ {},
};
- const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
- rules);
+ const int ruleset_fd =
+ create_ruleset(_metadata, rules[0].access, rules);
ASSERT_LE(0, ruleset_fd);
@@ -1965,12 +2912,12 @@ TEST_F_FORK(layout1, make_dir)
}
static int open_proc_fd(struct __test_metadata *const _metadata, const int fd,
- const int open_flags)
+ const int open_flags)
{
static const char path_template[] = "/proc/self/fd/%d";
char procfd_path[sizeof(path_template) + 10];
- const int procfd_path_size = snprintf(procfd_path, sizeof(procfd_path),
- path_template, fd);
+ const int procfd_path_size =
+ snprintf(procfd_path, sizeof(procfd_path), path_template, fd);
ASSERT_LT(procfd_path_size, sizeof(procfd_path));
return open(procfd_path, open_flags);
@@ -1983,12 +2930,13 @@ TEST_F_FORK(layout1, proc_unlinked_file)
.path = file1_s1d2,
.access = LANDLOCK_ACCESS_FS_READ_FILE,
},
- {}
+ {},
};
int reg_fd, proc_fd;
- const int ruleset_fd = create_ruleset(_metadata,
- LANDLOCK_ACCESS_FS_READ_FILE |
- LANDLOCK_ACCESS_FS_WRITE_FILE, rules);
+ const int ruleset_fd = create_ruleset(
+ _metadata,
+ LANDLOCK_ACCESS_FS_READ_FILE | LANDLOCK_ACCESS_FS_WRITE_FILE,
+ rules);
ASSERT_LE(0, ruleset_fd);
enforce_ruleset(_metadata, ruleset_fd);
@@ -2005,9 +2953,10 @@ TEST_F_FORK(layout1, proc_unlinked_file)
ASSERT_EQ(0, close(proc_fd));
proc_fd = open_proc_fd(_metadata, reg_fd, O_RDWR | O_CLOEXEC);
- ASSERT_EQ(-1, proc_fd) {
- TH_LOG("Successfully opened /proc/self/fd/%d: %s",
- reg_fd, strerror(errno));
+ ASSERT_EQ(-1, proc_fd)
+ {
+ TH_LOG("Successfully opened /proc/self/fd/%d: %s", reg_fd,
+ strerror(errno));
}
ASSERT_EQ(EACCES, errno);
@@ -2023,13 +2972,13 @@ TEST_F_FORK(layout1, proc_pipe)
{
.path = dir_s1d2,
.access = LANDLOCK_ACCESS_FS_READ_FILE |
- LANDLOCK_ACCESS_FS_WRITE_FILE,
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
},
- {}
+ {},
};
/* Limits read and write access to files tied to the filesystem. */
- const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
- rules);
+ const int ruleset_fd =
+ create_ruleset(_metadata, rules[0].access, rules);
ASSERT_LE(0, ruleset_fd);
enforce_ruleset(_metadata, ruleset_fd);
@@ -2041,7 +2990,8 @@ TEST_F_FORK(layout1, proc_pipe)
/* Checks access to pipes through FD. */
ASSERT_EQ(0, pipe2(pipe_fds, O_CLOEXEC));
- ASSERT_EQ(1, write(pipe_fds[1], ".", 1)) {
+ ASSERT_EQ(1, write(pipe_fds[1], ".", 1))
+ {
TH_LOG("Failed to write in pipe: %s", strerror(errno));
}
ASSERT_EQ(1, read(pipe_fds[0], &buf, 1));
@@ -2050,9 +3000,10 @@ TEST_F_FORK(layout1, proc_pipe)
/* Checks write access to pipe through /proc/self/fd . */
proc_fd = open_proc_fd(_metadata, pipe_fds[1], O_WRONLY | O_CLOEXEC);
ASSERT_LE(0, proc_fd);
- ASSERT_EQ(1, write(proc_fd, ".", 1)) {
+ ASSERT_EQ(1, write(proc_fd, ".", 1))
+ {
TH_LOG("Failed to write through /proc/self/fd/%d: %s",
- pipe_fds[1], strerror(errno));
+ pipe_fds[1], strerror(errno));
}
ASSERT_EQ(0, close(proc_fd));
@@ -2060,9 +3011,10 @@ TEST_F_FORK(layout1, proc_pipe)
proc_fd = open_proc_fd(_metadata, pipe_fds[0], O_RDONLY | O_CLOEXEC);
ASSERT_LE(0, proc_fd);
buf = '\0';
- ASSERT_EQ(1, read(proc_fd, &buf, 1)) {
+ ASSERT_EQ(1, read(proc_fd, &buf, 1))
+ {
TH_LOG("Failed to read through /proc/self/fd/%d: %s",
- pipe_fds[1], strerror(errno));
+ pipe_fds[1], strerror(errno));
}
ASSERT_EQ(0, close(proc_fd));
@@ -2070,8 +3022,9 @@ TEST_F_FORK(layout1, proc_pipe)
ASSERT_EQ(0, close(pipe_fds[1]));
}
-FIXTURE(layout1_bind) {
-};
+/* clang-format off */
+FIXTURE(layout1_bind) {};
+/* clang-format on */
FIXTURE_SETUP(layout1_bind)
{
@@ -2161,7 +3114,7 @@ TEST_F_FORK(layout1_bind, same_content_same_file)
.path = dir_s2d1,
.access = ACCESS_RW,
},
- {}
+ {},
};
/*
* Sets access rights on the same bind-mounted directories. The result
@@ -2177,7 +3130,7 @@ TEST_F_FORK(layout1_bind, same_content_same_file)
.path = dir_s2d2,
.access = ACCESS_RW,
},
- {}
+ {},
};
/* Only allow read-access to the s1d3 hierarchies. */
const struct rule layer3_source[] = {
@@ -2185,7 +3138,7 @@ TEST_F_FORK(layout1_bind, same_content_same_file)
.path = dir_s1d3,
.access = LANDLOCK_ACCESS_FS_READ_FILE,
},
- {}
+ {},
};
/* Removes all access rights. */
const struct rule layer4_destination[] = {
@@ -2193,7 +3146,7 @@ TEST_F_FORK(layout1_bind, same_content_same_file)
.path = bind_file1_s1d3,
.access = LANDLOCK_ACCESS_FS_WRITE_FILE,
},
- {}
+ {},
};
int ruleset_fd;
@@ -2282,8 +3235,46 @@ TEST_F_FORK(layout1_bind, same_content_same_file)
ASSERT_EQ(EACCES, test_open(bind_file1_s1d3, O_WRONLY));
}
-#define LOWER_BASE TMP_DIR "/lower"
-#define LOWER_DATA LOWER_BASE "/data"
+TEST_F_FORK(layout1_bind, reparent_cross_mount)
+{
+ const struct rule layer1[] = {
+ {
+ /* dir_s2d1 is beneath the dir_s2d2 mount point. */
+ .path = dir_s2d1,
+ .access = LANDLOCK_ACCESS_FS_REFER,
+ },
+ {
+ .path = bind_dir_s1d3,
+ .access = LANDLOCK_ACCESS_FS_EXECUTE,
+ },
+ {},
+ };
+ int ruleset_fd = create_ruleset(
+ _metadata,
+ LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_EXECUTE, layer1);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks basic denied move. */
+ ASSERT_EQ(-1, rename(file1_s1d1, file1_s1d2));
+ ASSERT_EQ(EXDEV, errno);
+
+ /* Checks real cross-mount move (Landlock is not involved). */
+ ASSERT_EQ(-1, rename(file1_s2d1, file1_s2d2));
+ ASSERT_EQ(EXDEV, errno);
+
+ /* Checks move that will give more accesses. */
+ ASSERT_EQ(-1, rename(file1_s2d2, bind_file1_s1d3));
+ ASSERT_EQ(EXDEV, errno);
+
+ /* Checks legitimate downgrade move. */
+ ASSERT_EQ(0, rename(bind_file1_s1d3, file1_s2d2));
+}
+
+#define LOWER_BASE TMP_DIR "/lower"
+#define LOWER_DATA LOWER_BASE "/data"
static const char lower_fl1[] = LOWER_DATA "/fl1";
static const char lower_dl1[] = LOWER_DATA "/dl1";
static const char lower_dl1_fl2[] = LOWER_DATA "/dl1/fl2";
@@ -2295,23 +3286,23 @@ static const char lower_do1_fl3[] = LOWER_DATA "/do1/fl3";
static const char (*lower_base_files[])[] = {
&lower_fl1,
&lower_fo1,
- NULL
+ NULL,
};
static const char (*lower_base_directories[])[] = {
&lower_dl1,
&lower_do1,
- NULL
+ NULL,
};
static const char (*lower_sub_files[])[] = {
&lower_dl1_fl2,
&lower_do1_fo2,
&lower_do1_fl3,
- NULL
+ NULL,
};
-#define UPPER_BASE TMP_DIR "/upper"
-#define UPPER_DATA UPPER_BASE "/data"
-#define UPPER_WORK UPPER_BASE "/work"
+#define UPPER_BASE TMP_DIR "/upper"
+#define UPPER_DATA UPPER_BASE "/data"
+#define UPPER_WORK UPPER_BASE "/work"
static const char upper_fu1[] = UPPER_DATA "/fu1";
static const char upper_du1[] = UPPER_DATA "/du1";
static const char upper_du1_fu2[] = UPPER_DATA "/du1/fu2";
@@ -2323,22 +3314,22 @@ static const char upper_do1_fu3[] = UPPER_DATA "/do1/fu3";
static const char (*upper_base_files[])[] = {
&upper_fu1,
&upper_fo1,
- NULL
+ NULL,
};
static const char (*upper_base_directories[])[] = {
&upper_du1,
&upper_do1,
- NULL
+ NULL,
};
static const char (*upper_sub_files[])[] = {
&upper_du1_fu2,
&upper_do1_fo2,
&upper_do1_fu3,
- NULL
+ NULL,
};
-#define MERGE_BASE TMP_DIR "/merge"
-#define MERGE_DATA MERGE_BASE "/data"
+#define MERGE_BASE TMP_DIR "/merge"
+#define MERGE_DATA MERGE_BASE "/data"
static const char merge_fl1[] = MERGE_DATA "/fl1";
static const char merge_dl1[] = MERGE_DATA "/dl1";
static const char merge_dl1_fl2[] = MERGE_DATA "/dl1/fl2";
@@ -2355,21 +3346,17 @@ static const char (*merge_base_files[])[] = {
&merge_fl1,
&merge_fu1,
&merge_fo1,
- NULL
+ NULL,
};
static const char (*merge_base_directories[])[] = {
&merge_dl1,
&merge_du1,
&merge_do1,
- NULL
+ NULL,
};
static const char (*merge_sub_files[])[] = {
- &merge_dl1_fl2,
- &merge_du1_fu2,
- &merge_do1_fo2,
- &merge_do1_fl3,
- &merge_do1_fu3,
- NULL
+ &merge_dl1_fl2, &merge_du1_fu2, &merge_do1_fo2,
+ &merge_do1_fl3, &merge_do1_fu3, NULL,
};
/*
@@ -2411,8 +3398,9 @@ static const char (*merge_sub_files[])[] = {
* └── work
*/
-FIXTURE(layout2_overlay) {
-};
+/* clang-format off */
+FIXTURE(layout2_overlay) {};
+/* clang-format on */
FIXTURE_SETUP(layout2_overlay)
{
@@ -2444,9 +3432,8 @@ FIXTURE_SETUP(layout2_overlay)
set_cap(_metadata, CAP_SYS_ADMIN);
set_cap(_metadata, CAP_DAC_OVERRIDE);
ASSERT_EQ(0, mount("overlay", MERGE_DATA, "overlay", 0,
- "lowerdir=" LOWER_DATA
- ",upperdir=" UPPER_DATA
- ",workdir=" UPPER_WORK));
+ "lowerdir=" LOWER_DATA ",upperdir=" UPPER_DATA
+ ",workdir=" UPPER_WORK));
clear_cap(_metadata, CAP_DAC_OVERRIDE);
clear_cap(_metadata, CAP_SYS_ADMIN);
}
@@ -2513,9 +3500,9 @@ TEST_F_FORK(layout2_overlay, no_restriction)
ASSERT_EQ(0, test_open(merge_do1_fu3, O_RDONLY));
}
-#define for_each_path(path_list, path_entry, i) \
- for (i = 0, path_entry = *path_list[i]; path_list[i]; \
- path_entry = *path_list[++i])
+#define for_each_path(path_list, path_entry, i) \
+ for (i = 0, path_entry = *path_list[i]; path_list[i]; \
+ path_entry = *path_list[++i])
TEST_F_FORK(layout2_overlay, same_content_different_file)
{
@@ -2533,7 +3520,7 @@ TEST_F_FORK(layout2_overlay, same_content_different_file)
.path = MERGE_BASE,
.access = ACCESS_RW,
},
- {}
+ {},
};
const struct rule layer2_data[] = {
{
@@ -2548,7 +3535,7 @@ TEST_F_FORK(layout2_overlay, same_content_different_file)
.path = MERGE_DATA,
.access = ACCESS_RW,
},
- {}
+ {},
};
/* Sets access right on directories inside both layers. */
const struct rule layer3_subdirs[] = {
@@ -2580,7 +3567,7 @@ TEST_F_FORK(layout2_overlay, same_content_different_file)
.path = merge_do1,
.access = ACCESS_RW,
},
- {}
+ {},
};
/* Tighten access rights to the files. */
const struct rule layer4_files[] = {
@@ -2611,37 +3598,37 @@ TEST_F_FORK(layout2_overlay, same_content_different_file)
{
.path = merge_dl1_fl2,
.access = LANDLOCK_ACCESS_FS_READ_FILE |
- LANDLOCK_ACCESS_FS_WRITE_FILE,
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
},
{
.path = merge_du1_fu2,
.access = LANDLOCK_ACCESS_FS_READ_FILE |
- LANDLOCK_ACCESS_FS_WRITE_FILE,
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
},
{
.path = merge_do1_fo2,
.access = LANDLOCK_ACCESS_FS_READ_FILE |
- LANDLOCK_ACCESS_FS_WRITE_FILE,
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
},
{
.path = merge_do1_fl3,
.access = LANDLOCK_ACCESS_FS_READ_FILE |
- LANDLOCK_ACCESS_FS_WRITE_FILE,
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
},
{
.path = merge_do1_fu3,
.access = LANDLOCK_ACCESS_FS_READ_FILE |
- LANDLOCK_ACCESS_FS_WRITE_FILE,
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
},
- {}
+ {},
};
const struct rule layer5_merge_only[] = {
{
.path = MERGE_DATA,
.access = LANDLOCK_ACCESS_FS_READ_FILE |
- LANDLOCK_ACCESS_FS_WRITE_FILE,
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
},
- {}
+ {},
};
int ruleset_fd;
size_t i;
@@ -2659,7 +3646,8 @@ TEST_F_FORK(layout2_overlay, same_content_different_file)
ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY));
}
for_each_path(lower_base_directories, path_entry, i) {
- ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY | O_DIRECTORY));
+ ASSERT_EQ(EACCES,
+ test_open(path_entry, O_RDONLY | O_DIRECTORY));
}
for_each_path(lower_sub_files, path_entry, i) {
ASSERT_EQ(0, test_open(path_entry, O_RDONLY));
@@ -2671,7 +3659,8 @@ TEST_F_FORK(layout2_overlay, same_content_different_file)
ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY));
}
for_each_path(upper_base_directories, path_entry, i) {
- ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY | O_DIRECTORY));
+ ASSERT_EQ(EACCES,
+ test_open(path_entry, O_RDONLY | O_DIRECTORY));
}
for_each_path(upper_sub_files, path_entry, i) {
ASSERT_EQ(0, test_open(path_entry, O_RDONLY));
@@ -2756,7 +3745,8 @@ TEST_F_FORK(layout2_overlay, same_content_different_file)
ASSERT_EQ(EACCES, test_open(path_entry, O_RDWR));
}
for_each_path(merge_base_directories, path_entry, i) {
- ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY | O_DIRECTORY));
+ ASSERT_EQ(EACCES,
+ test_open(path_entry, O_RDONLY | O_DIRECTORY));
}
for_each_path(merge_sub_files, path_entry, i) {
ASSERT_EQ(0, test_open(path_entry, O_RDWR));
@@ -2781,7 +3771,8 @@ TEST_F_FORK(layout2_overlay, same_content_different_file)
ASSERT_EQ(EACCES, test_open(path_entry, O_RDWR));
}
for_each_path(merge_base_directories, path_entry, i) {
- ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY | O_DIRECTORY));
+ ASSERT_EQ(EACCES,
+ test_open(path_entry, O_RDONLY | O_DIRECTORY));
}
for_each_path(merge_sub_files, path_entry, i) {
ASSERT_EQ(0, test_open(path_entry, O_RDWR));
diff --git a/tools/testing/selftests/landlock/ptrace_test.c b/tools/testing/selftests/landlock/ptrace_test.c
index 15fbef9cc849..c28ef98ff3ac 100644
--- a/tools/testing/selftests/landlock/ptrace_test.c
+++ b/tools/testing/selftests/landlock/ptrace_test.c
@@ -26,9 +26,10 @@ static void create_domain(struct __test_metadata *const _metadata)
.handled_access_fs = LANDLOCK_ACCESS_FS_MAKE_BLOCK,
};
- ruleset_fd = landlock_create_ruleset(&ruleset_attr,
- sizeof(ruleset_attr), 0);
- EXPECT_LE(0, ruleset_fd) {
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ EXPECT_LE(0, ruleset_fd)
+ {
TH_LOG("Failed to create a ruleset: %s", strerror(errno));
}
EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
@@ -43,7 +44,7 @@ static int test_ptrace_read(const pid_t pid)
int procenv_path_size, fd;
procenv_path_size = snprintf(procenv_path, sizeof(procenv_path),
- path_template, pid);
+ path_template, pid);
if (procenv_path_size >= sizeof(procenv_path))
return E2BIG;
@@ -59,9 +60,12 @@ static int test_ptrace_read(const pid_t pid)
return 0;
}
-FIXTURE(hierarchy) { };
+/* clang-format off */
+FIXTURE(hierarchy) {};
+/* clang-format on */
-FIXTURE_VARIANT(hierarchy) {
+FIXTURE_VARIANT(hierarchy)
+{
const bool domain_both;
const bool domain_parent;
const bool domain_child;
@@ -83,7 +87,9 @@ FIXTURE_VARIANT(hierarchy) {
* \ P2 -> P1 : allow
* 'P2
*/
+/* clang-format off */
FIXTURE_VARIANT_ADD(hierarchy, allow_without_domain) {
+ /* clang-format on */
.domain_both = false,
.domain_parent = false,
.domain_child = false,
@@ -98,7 +104,9 @@ FIXTURE_VARIANT_ADD(hierarchy, allow_without_domain) {
* | P2 |
* '------'
*/
+/* clang-format off */
FIXTURE_VARIANT_ADD(hierarchy, allow_with_one_domain) {
+ /* clang-format on */
.domain_both = false,
.domain_parent = false,
.domain_child = true,
@@ -112,7 +120,9 @@ FIXTURE_VARIANT_ADD(hierarchy, allow_with_one_domain) {
* '
* P2
*/
+/* clang-format off */
FIXTURE_VARIANT_ADD(hierarchy, deny_with_parent_domain) {
+ /* clang-format on */
.domain_both = false,
.domain_parent = true,
.domain_child = false,
@@ -127,7 +137,9 @@ FIXTURE_VARIANT_ADD(hierarchy, deny_with_parent_domain) {
* | P2 |
* '------'
*/
+/* clang-format off */
FIXTURE_VARIANT_ADD(hierarchy, deny_with_sibling_domain) {
+ /* clang-format on */
.domain_both = false,
.domain_parent = true,
.domain_child = true,
@@ -142,7 +154,9 @@ FIXTURE_VARIANT_ADD(hierarchy, deny_with_sibling_domain) {
* | P2 |
* '-------------'
*/
+/* clang-format off */
FIXTURE_VARIANT_ADD(hierarchy, allow_sibling_domain) {
+ /* clang-format on */
.domain_both = true,
.domain_parent = false,
.domain_child = false,
@@ -158,7 +172,9 @@ FIXTURE_VARIANT_ADD(hierarchy, allow_sibling_domain) {
* | '------' |
* '-----------------'
*/
+/* clang-format off */
FIXTURE_VARIANT_ADD(hierarchy, allow_with_nested_domain) {
+ /* clang-format on */
.domain_both = true,
.domain_parent = false,
.domain_child = true,
@@ -174,7 +190,9 @@ FIXTURE_VARIANT_ADD(hierarchy, allow_with_nested_domain) {
* | P2 |
* '-----------------'
*/
+/* clang-format off */
FIXTURE_VARIANT_ADD(hierarchy, deny_with_nested_and_parent_domain) {
+ /* clang-format on */
.domain_both = true,
.domain_parent = true,
.domain_child = false,
@@ -192,17 +210,21 @@ FIXTURE_VARIANT_ADD(hierarchy, deny_with_nested_and_parent_domain) {
* | '------' |
* '-----------------'
*/
+/* clang-format off */
FIXTURE_VARIANT_ADD(hierarchy, deny_with_forked_domain) {
+ /* clang-format on */
.domain_both = true,
.domain_parent = true,
.domain_child = true,
};
FIXTURE_SETUP(hierarchy)
-{ }
+{
+}
FIXTURE_TEARDOWN(hierarchy)
-{ }
+{
+}
/* Test PTRACE_TRACEME and PTRACE_ATTACH for parent and child. */
TEST_F(hierarchy, trace)
@@ -330,7 +352,7 @@ TEST_F(hierarchy, trace)
ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
ASSERT_EQ(child, waitpid(child, &status, 0));
if (WIFSIGNALED(status) || !WIFEXITED(status) ||
- WEXITSTATUS(status) != EXIT_SUCCESS)
+ WEXITSTATUS(status) != EXIT_SUCCESS)
_metadata->passed = 0;
}
diff --git a/tools/testing/selftests/mqueue/mq_perf_tests.c b/tools/testing/selftests/mqueue/mq_perf_tests.c
index b019e0b8221c..5c16159d0bcd 100644
--- a/tools/testing/selftests/mqueue/mq_perf_tests.c
+++ b/tools/testing/selftests/mqueue/mq_perf_tests.c
@@ -35,6 +35,7 @@
#include <sys/time.h>
#include <sys/resource.h>
#include <sys/stat.h>
+#include <sys/param.h>
#include <mqueue.h>
#include <popt.h>
#include <error.h>
@@ -73,7 +74,6 @@ static char *usage =
char *MAX_MSGS = "/proc/sys/fs/mqueue/msg_max";
char *MAX_MSGSIZE = "/proc/sys/fs/mqueue/msgsize_max";
-#define min(a, b) ((a) < (b) ? (a) : (b))
#define MAX_CPUS 64
char *cpu_option_string;
int cpus_to_pin[MAX_CPUS];
@@ -180,6 +180,9 @@ void shutdown(int exit_val, char *err_cause, int line_no)
if (in_shutdown++)
return;
+ /* Free the cpu_set allocated using CPU_ALLOC in main function */
+ CPU_FREE(cpu_set);
+
for (i = 0; i < num_cpus_to_pin; i++)
if (cpu_threads[i]) {
pthread_kill(cpu_threads[i], SIGUSR1);
@@ -551,7 +554,13 @@ int main(int argc, char *argv[])
perror("sysconf(_SC_NPROCESSORS_ONLN)");
exit(1);
}
- cpus_online = min(MAX_CPUS, sysconf(_SC_NPROCESSORS_ONLN));
+
+ if (getuid() != 0)
+ ksft_exit_skip("Not running as root, but almost all tests "
+ "require root in order to modify\nsystem settings. "
+ "Exiting.\n");
+
+ cpus_online = MIN(MAX_CPUS, sysconf(_SC_NPROCESSORS_ONLN));
cpu_set = CPU_ALLOC(cpus_online);
if (cpu_set == NULL) {
perror("CPU_ALLOC()");
@@ -589,7 +598,7 @@ int main(int argc, char *argv[])
cpu_set)) {
fprintf(stderr, "Any given CPU may "
"only be given once.\n");
- exit(1);
+ goto err_code;
} else
CPU_SET_S(cpus_to_pin[cpu],
cpu_set_size, cpu_set);
@@ -607,7 +616,7 @@ int main(int argc, char *argv[])
queue_path = malloc(strlen(option) + 2);
if (!queue_path) {
perror("malloc()");
- exit(1);
+ goto err_code;
}
queue_path[0] = '/';
queue_path[1] = 0;
@@ -622,17 +631,12 @@ int main(int argc, char *argv[])
fprintf(stderr, "Must pass at least one CPU to continuous "
"mode.\n");
poptPrintUsage(popt_context, stderr, 0);
- exit(1);
+ goto err_code;
} else if (!continuous_mode) {
num_cpus_to_pin = 1;
cpus_to_pin[0] = cpus_online - 1;
}
- if (getuid() != 0)
- ksft_exit_skip("Not running as root, but almost all tests "
- "require root in order to modify\nsystem settings. "
- "Exiting.\n");
-
max_msgs = fopen(MAX_MSGS, "r+");
max_msgsize = fopen(MAX_MSGSIZE, "r+");
if (!max_msgs)
@@ -740,4 +744,9 @@ int main(int argc, char *argv[])
sleep(1);
}
shutdown(0, "", 0);
+
+err_code:
+ CPU_FREE(cpu_set);
+ exit(1);
+
}
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 3fe2515aa616..e1f998defd10 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -25,12 +25,13 @@ TEST_PROGS += bareudp.sh
TEST_PROGS += amt.sh
TEST_PROGS += unicast_extensions.sh
TEST_PROGS += udpgro_fwd.sh
+TEST_PROGS += udpgro_frglist.sh
TEST_PROGS += veth.sh
TEST_PROGS += ioam6.sh
TEST_PROGS += gro.sh
TEST_PROGS += gre_gso.sh
TEST_PROGS += cmsg_so_mark.sh
-TEST_PROGS += cmsg_time.sh
+TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh
TEST_PROGS += srv6_end_dt46_l3vpn_test.sh
TEST_PROGS += srv6_end_dt4_l3vpn_test.sh
TEST_PROGS += srv6_end_dt6_l3vpn_test.sh
@@ -54,12 +55,15 @@ TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
TEST_GEN_FILES += toeplitz
TEST_GEN_FILES += cmsg_sender
+TEST_PROGS += test_vxlan_vnifiltering.sh
TEST_FILES := settings
KSFT_KHDR_INSTALL := 1
include ../lib.mk
+include bpf/Makefile
+
$(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma
$(OUTPUT)/tcp_mmap: LDLIBS += -lpthread
$(OUTPUT)/tcp_inq: LDLIBS += -lpthread
diff --git a/tools/testing/selftests/net/bpf/Makefile b/tools/testing/selftests/net/bpf/Makefile
new file mode 100644
index 000000000000..f91bf14bbee7
--- /dev/null
+++ b/tools/testing/selftests/net/bpf/Makefile
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
+
+CLANG ?= clang
+CCINCLUDE += -I../../bpf
+CCINCLUDE += -I../../../../../usr/include/
+
+TEST_CUSTOM_PROGS = $(OUTPUT)/bpf/nat6to4.o
+all: $(TEST_CUSTOM_PROGS)
+
+$(OUTPUT)/%.o: %.c
+ $(CLANG) -O2 -target bpf -c $< $(CCINCLUDE) -o $@
+
+clean:
+ rm -f $(TEST_CUSTOM_PROGS)
diff --git a/tools/testing/selftests/net/bpf/nat6to4.c b/tools/testing/selftests/net/bpf/nat6to4.c
new file mode 100644
index 000000000000..ac54c36b25fc
--- /dev/null
+++ b/tools/testing/selftests/net/bpf/nat6to4.c
@@ -0,0 +1,285 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * This code is taken from the Android Open Source Project and the author
+ * (Maciej Żenczykowski) has gave permission to relicense it under the
+ * GPLv2. Therefore this program is free software;
+ * You can redistribute it and/or modify it under the terms of the GNU
+ * General Public License version 2 as published by the Free Software
+ * Foundation
+
+ * The original headers, including the original license headers, are
+ * included below for completeness.
+ *
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <linux/bpf.h>
+#include <linux/if.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/pkt_cls.h>
+#include <linux/swab.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+
+#include <linux/udp.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define IP_DF 0x4000 // Flag: "Don't Fragment"
+
+SEC("schedcls/ingress6/nat_6")
+int sched_cls_ingress6_nat_6_prog(struct __sk_buff *skb)
+{
+ const int l2_header_size = sizeof(struct ethhdr);
+ void *data = (void *)(long)skb->data;
+ const void *data_end = (void *)(long)skb->data_end;
+ const struct ethhdr * const eth = data; // used iff is_ethernet
+ const struct ipv6hdr * const ip6 = (void *)(eth + 1);
+
+ // Require ethernet dst mac address to be our unicast address.
+ if (skb->pkt_type != PACKET_HOST)
+ return TC_ACT_OK;
+
+ // Must be meta-ethernet IPv6 frame
+ if (skb->protocol != bpf_htons(ETH_P_IPV6))
+ return TC_ACT_OK;
+
+ // Must have (ethernet and) ipv6 header
+ if (data + l2_header_size + sizeof(*ip6) > data_end)
+ return TC_ACT_OK;
+
+ // Ethertype - if present - must be IPv6
+ if (eth->h_proto != bpf_htons(ETH_P_IPV6))
+ return TC_ACT_OK;
+
+ // IP version must be 6
+ if (ip6->version != 6)
+ return TC_ACT_OK;
+ // Maximum IPv6 payload length that can be translated to IPv4
+ if (bpf_ntohs(ip6->payload_len) > 0xFFFF - sizeof(struct iphdr))
+ return TC_ACT_OK;
+ switch (ip6->nexthdr) {
+ case IPPROTO_TCP: // For TCP & UDP the checksum neutrality of the chosen IPv6
+ case IPPROTO_UDP: // address means there is no need to update their checksums.
+ case IPPROTO_GRE: // We do not need to bother looking at GRE/ESP headers,
+ case IPPROTO_ESP: // since there is never a checksum to update.
+ break;
+ default: // do not know how to handle anything else
+ return TC_ACT_OK;
+ }
+
+ struct ethhdr eth2; // used iff is_ethernet
+
+ eth2 = *eth; // Copy over the ethernet header (src/dst mac)
+ eth2.h_proto = bpf_htons(ETH_P_IP); // But replace the ethertype
+
+ struct iphdr ip = {
+ .version = 4, // u4
+ .ihl = sizeof(struct iphdr) / sizeof(__u32), // u4
+ .tos = (ip6->priority << 4) + (ip6->flow_lbl[0] >> 4), // u8
+ .tot_len = bpf_htons(bpf_ntohs(ip6->payload_len) + sizeof(struct iphdr)), // u16
+ .id = 0, // u16
+ .frag_off = bpf_htons(IP_DF), // u16
+ .ttl = ip6->hop_limit, // u8
+ .protocol = ip6->nexthdr, // u8
+ .check = 0, // u16
+ .saddr = 0x0201a8c0, // u32
+ .daddr = 0x0101a8c0, // u32
+ };
+
+ // Calculate the IPv4 one's complement checksum of the IPv4 header.
+ __wsum sum4 = 0;
+
+ for (int i = 0; i < sizeof(ip) / sizeof(__u16); ++i)
+ sum4 += ((__u16 *)&ip)[i];
+
+ // Note that sum4 is guaranteed to be non-zero by virtue of ip.version == 4
+ sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); // collapse u32 into range 1 .. 0x1FFFE
+ sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); // collapse any potential carry into u16
+ ip.check = (__u16)~sum4; // sum4 cannot be zero, so this is never 0xFFFF
+
+ // Calculate the *negative* IPv6 16-bit one's complement checksum of the IPv6 header.
+ __wsum sum6 = 0;
+ // We'll end up with a non-zero sum due to ip6->version == 6 (which has '0' bits)
+ for (int i = 0; i < sizeof(*ip6) / sizeof(__u16); ++i)
+ sum6 += ~((__u16 *)ip6)[i]; // note the bitwise negation
+
+ // Note that there is no L4 checksum update: we are relying on the checksum neutrality
+ // of the ipv6 address chosen by netd's ClatdController.
+
+ // Packet mutations begin - point of no return, but if this first modification fails
+ // the packet is probably still pristine, so let clatd handle it.
+ if (bpf_skb_change_proto(skb, bpf_htons(ETH_P_IP), 0))
+ return TC_ACT_OK;
+ bpf_csum_update(skb, sum6);
+
+ data = (void *)(long)skb->data;
+ data_end = (void *)(long)skb->data_end;
+ if (data + l2_header_size + sizeof(struct iphdr) > data_end)
+ return TC_ACT_SHOT;
+
+ struct ethhdr *new_eth = data;
+
+ // Copy over the updated ethernet header
+ *new_eth = eth2;
+
+ // Copy over the new ipv4 header.
+ *(struct iphdr *)(new_eth + 1) = ip;
+ return bpf_redirect(skb->ifindex, BPF_F_INGRESS);
+}
+
+SEC("schedcls/egress4/snat4")
+int sched_cls_egress4_snat4_prog(struct __sk_buff *skb)
+{
+ const int l2_header_size = sizeof(struct ethhdr);
+ void *data = (void *)(long)skb->data;
+ const void *data_end = (void *)(long)skb->data_end;
+ const struct ethhdr *const eth = data; // used iff is_ethernet
+ const struct iphdr *const ip4 = (void *)(eth + 1);
+
+ // Must be meta-ethernet IPv4 frame
+ if (skb->protocol != bpf_htons(ETH_P_IP))
+ return TC_ACT_OK;
+
+ // Must have ipv4 header
+ if (data + l2_header_size + sizeof(struct ipv6hdr) > data_end)
+ return TC_ACT_OK;
+
+ // Ethertype - if present - must be IPv4
+ if (eth->h_proto != bpf_htons(ETH_P_IP))
+ return TC_ACT_OK;
+
+ // IP version must be 4
+ if (ip4->version != 4)
+ return TC_ACT_OK;
+
+ // We cannot handle IP options, just standard 20 byte == 5 dword minimal IPv4 header
+ if (ip4->ihl != 5)
+ return TC_ACT_OK;
+
+ // Maximum IPv6 payload length that can be translated to IPv4
+ if (bpf_htons(ip4->tot_len) > 0xFFFF - sizeof(struct ipv6hdr))
+ return TC_ACT_OK;
+
+ // Calculate the IPv4 one's complement checksum of the IPv4 header.
+ __wsum sum4 = 0;
+
+ for (int i = 0; i < sizeof(*ip4) / sizeof(__u16); ++i)
+ sum4 += ((__u16 *)ip4)[i];
+
+ // Note that sum4 is guaranteed to be non-zero by virtue of ip4->version == 4
+ sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); // collapse u32 into range 1 .. 0x1FFFE
+ sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); // collapse any potential carry into u16
+ // for a correct checksum we should get *a* zero, but sum4 must be positive, ie 0xFFFF
+ if (sum4 != 0xFFFF)
+ return TC_ACT_OK;
+
+ // Minimum IPv4 total length is the size of the header
+ if (bpf_ntohs(ip4->tot_len) < sizeof(*ip4))
+ return TC_ACT_OK;
+
+ // We are incapable of dealing with IPv4 fragments
+ if (ip4->frag_off & ~bpf_htons(IP_DF))
+ return TC_ACT_OK;
+
+ switch (ip4->protocol) {
+ case IPPROTO_TCP: // For TCP & UDP the checksum neutrality of the chosen IPv6
+ case IPPROTO_GRE: // address means there is no need to update their checksums.
+ case IPPROTO_ESP: // We do not need to bother looking at GRE/ESP headers,
+ break; // since there is never a checksum to update.
+
+ case IPPROTO_UDP: // See above comment, but must also have UDP header...
+ if (data + sizeof(*ip4) + sizeof(struct udphdr) > data_end)
+ return TC_ACT_OK;
+ const struct udphdr *uh = (const struct udphdr *)(ip4 + 1);
+ // If IPv4/UDP checksum is 0 then fallback to clatd so it can calculate the
+ // checksum. Otherwise the network or more likely the NAT64 gateway might
+ // drop the packet because in most cases IPv6/UDP packets with a zero checksum
+ // are invalid. See RFC 6935. TODO: calculate checksum via bpf_csum_diff()
+ if (!uh->check)
+ return TC_ACT_OK;
+ break;
+
+ default: // do not know how to handle anything else
+ return TC_ACT_OK;
+ }
+ struct ethhdr eth2; // used iff is_ethernet
+
+ eth2 = *eth; // Copy over the ethernet header (src/dst mac)
+ eth2.h_proto = bpf_htons(ETH_P_IPV6); // But replace the ethertype
+
+ struct ipv6hdr ip6 = {
+ .version = 6, // __u8:4
+ .priority = ip4->tos >> 4, // __u8:4
+ .flow_lbl = {(ip4->tos & 0xF) << 4, 0, 0}, // __u8[3]
+ .payload_len = bpf_htons(bpf_ntohs(ip4->tot_len) - 20), // __be16
+ .nexthdr = ip4->protocol, // __u8
+ .hop_limit = ip4->ttl, // __u8
+ };
+ ip6.saddr.in6_u.u6_addr32[0] = bpf_htonl(0x20010db8);
+ ip6.saddr.in6_u.u6_addr32[1] = 0;
+ ip6.saddr.in6_u.u6_addr32[2] = 0;
+ ip6.saddr.in6_u.u6_addr32[3] = bpf_htonl(1);
+ ip6.daddr.in6_u.u6_addr32[0] = bpf_htonl(0x20010db8);
+ ip6.daddr.in6_u.u6_addr32[1] = 0;
+ ip6.daddr.in6_u.u6_addr32[2] = 0;
+ ip6.daddr.in6_u.u6_addr32[3] = bpf_htonl(2);
+
+ // Calculate the IPv6 16-bit one's complement checksum of the IPv6 header.
+ __wsum sum6 = 0;
+ // We'll end up with a non-zero sum due to ip6.version == 6
+ for (int i = 0; i < sizeof(ip6) / sizeof(__u16); ++i)
+ sum6 += ((__u16 *)&ip6)[i];
+
+ // Packet mutations begin - point of no return, but if this first modification fails
+ // the packet is probably still pristine, so let clatd handle it.
+ if (bpf_skb_change_proto(skb, bpf_htons(ETH_P_IPV6), 0))
+ return TC_ACT_OK;
+
+ // This takes care of updating the skb->csum field for a CHECKSUM_COMPLETE packet.
+ // In such a case, skb->csum is a 16-bit one's complement sum of the entire payload,
+ // thus we need to subtract out the ipv4 header's sum, and add in the ipv6 header's sum.
+ // However, we've already verified the ipv4 checksum is correct and thus 0.
+ // Thus we only need to add the ipv6 header's sum.
+ //
+ // bpf_csum_update() always succeeds if the skb is CHECKSUM_COMPLETE and returns an error
+ // (-ENOTSUPP) if it isn't. So we just ignore the return code (see above for more details).
+ bpf_csum_update(skb, sum6);
+
+ // bpf_skb_change_proto() invalidates all pointers - reload them.
+ data = (void *)(long)skb->data;
+ data_end = (void *)(long)skb->data_end;
+
+ // I cannot think of any valid way for this error condition to trigger, however I do
+ // believe the explicit check is required to keep the in kernel ebpf verifier happy.
+ if (data + l2_header_size + sizeof(ip6) > data_end)
+ return TC_ACT_SHOT;
+
+ struct ethhdr *new_eth = data;
+
+ // Copy over the updated ethernet header
+ *new_eth = eth2;
+ // Copy over the new ipv4 header.
+ *(struct ipv6hdr *)(new_eth + 1) = ip6;
+ return TC_ACT_OK;
+}
+
+char _license[] SEC("license") = ("GPL");
diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh
index 47c4d4b4a44a..54701c8b0cd7 100755
--- a/tools/testing/selftests/net/fcnal-test.sh
+++ b/tools/testing/selftests/net/fcnal-test.sh
@@ -810,10 +810,16 @@ ipv4_ping()
setup
set_sysctl net.ipv4.raw_l3mdev_accept=1 2>/dev/null
ipv4_ping_novrf
+ setup
+ set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null
+ ipv4_ping_novrf
log_subsection "With VRF"
setup "yes"
ipv4_ping_vrf
+ setup "yes"
+ set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null
+ ipv4_ping_vrf
}
################################################################################
@@ -2348,10 +2354,16 @@ ipv6_ping()
log_subsection "No VRF"
setup
ipv6_ping_novrf
+ setup
+ set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null
+ ipv6_ping_novrf
log_subsection "With VRF"
setup "yes"
ipv6_ping_vrf
+ setup "yes"
+ set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null
+ ipv6_ping_vrf
}
################################################################################
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index d444ee6aa3cb..b3bf5319bb0e 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -1208,6 +1208,20 @@ ipv4_fcnal()
set +e
check_nexthop "dev veth1" ""
log_test $? 0 "Nexthops removed on admin down"
+
+ # nexthop route delete warning: route add with nhid and delete
+ # using device
+ run_cmd "$IP li set dev veth1 up"
+ run_cmd "$IP nexthop add id 12 via 172.16.1.3 dev veth1"
+ out1=`dmesg | grep "WARNING:.*fib_nh_match.*" | wc -l`
+ run_cmd "$IP route add 172.16.101.1/32 nhid 12"
+ run_cmd "$IP route delete 172.16.101.1/32 dev veth1"
+ out2=`dmesg | grep "WARNING:.*fib_nh_match.*" | wc -l`
+ [ $out1 -eq $out2 ]
+ rc=$?
+ log_test $rc 0 "Delete nexthop route warning"
+ run_cmd "$IP route delete 172.16.101.1/32 nhid 12"
+ run_cmd "$IP nexthop del id 12"
}
ipv4_grp_fcnal()
diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index 8fa97ae9af9e..e811090f7748 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -2,15 +2,31 @@
TEST_PROGS = bridge_igmp.sh \
bridge_locked_port.sh \
+ bridge_mld.sh \
bridge_port_isolation.sh \
bridge_sticky_fdb.sh \
bridge_vlan_aware.sh \
+ bridge_vlan_mcast.sh \
bridge_vlan_unaware.sh \
+ custom_multipath_hash.sh \
+ dual_vxlan_bridge.sh \
+ ethtool_extended_state.sh \
ethtool.sh \
+ gre_custom_multipath_hash.sh \
gre_inner_v4_multipath.sh \
gre_inner_v6_multipath.sh \
+ gre_multipath_nh_res.sh \
+ gre_multipath_nh.sh \
gre_multipath.sh \
+ hw_stats_l3.sh \
ip6_forward_instats_vrf.sh \
+ ip6gre_custom_multipath_hash.sh \
+ ip6gre_flat_key.sh \
+ ip6gre_flat_keys.sh \
+ ip6gre_flat.sh \
+ ip6gre_hier_key.sh \
+ ip6gre_hier_keys.sh \
+ ip6gre_hier.sh \
ip6gre_inner_v4_multipath.sh \
ip6gre_inner_v6_multipath.sh \
ipip_flat_gre_key.sh \
@@ -34,36 +50,53 @@ TEST_PROGS = bridge_igmp.sh \
mirror_gre_vlan_bridge_1q.sh \
mirror_gre_vlan.sh \
mirror_vlan.sh \
+ pedit_dsfield.sh \
+ pedit_ip.sh \
+ pedit_l4port.sh \
+ q_in_vni_ipv6.sh \
+ q_in_vni.sh \
router_bridge.sh \
router_bridge_vlan.sh \
router_broadcast.sh \
+ router_mpath_nh_res.sh \
router_mpath_nh.sh \
router_multicast.sh \
router_multipath.sh \
+ router_nh.sh \
router.sh \
router_vid_1.sh \
sch_ets.sh \
+ sch_red.sh \
sch_tbf_ets.sh \
sch_tbf_prio.sh \
sch_tbf_root.sh \
+ skbedit_priority.sh \
tc_actions.sh \
tc_chains.sh \
tc_flower_router.sh \
tc_flower.sh \
tc_mpls_l2vpn.sh \
+ tc_police.sh \
tc_shblocks.sh \
tc_vlan_modify.sh \
+ vxlan_asymmetric_ipv6.sh \
vxlan_asymmetric.sh \
+ vxlan_bridge_1d_ipv6.sh \
+ vxlan_bridge_1d_port_8472_ipv6.sh \
vxlan_bridge_1d_port_8472.sh \
vxlan_bridge_1d.sh \
+ vxlan_bridge_1q_ipv6.sh \
+ vxlan_bridge_1q_port_8472_ipv6.sh \
vxlan_bridge_1q_port_8472.sh \
vxlan_bridge_1q.sh \
+ vxlan_symmetric_ipv6.sh \
vxlan_symmetric.sh
TEST_PROGS_EXTENDED := devlink_lib.sh \
ethtool_lib.sh \
fib_offload_lib.sh \
forwarding.config.sample \
+ ip6gre_lib.sh \
ipip_lib.sh \
lib.sh \
mirror_gre_lib.sh \
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh
index a3402cd8d5b6..9ff22f28032d 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh
@@ -61,9 +61,12 @@ setup_prepare()
vrf_prepare
mirror_gre_topo_create
+ # Avoid changing br1's PVID while it is operational as a L3 interface.
+ ip link set dev br1 down
ip link set dev $swp3 master br1
bridge vlan add dev br1 vid 555 pvid untagged self
+ ip link set dev br1 up
ip address add dev br1 192.0.2.129/28
ip address add dev br1 2001:db8:2::1/64
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 7314257d248a..48ef112f42c2 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -1444,6 +1444,33 @@ chk_prio_nr()
[ "${dump_stats}" = 1 ] && dump_stats
}
+chk_subflow_nr()
+{
+ local need_title="$1"
+ local msg="$2"
+ local subflow_nr=$3
+ local cnt1
+ local cnt2
+
+ if [ -n "${need_title}" ]; then
+ printf "%03u %-36s %s" "${TEST_COUNT}" "${TEST_NAME}" "${msg}"
+ else
+ printf "%-${nr_blank}s %s" " " "${msg}"
+ fi
+
+ cnt1=$(ss -N $ns1 -tOni | grep -c token)
+ cnt2=$(ss -N $ns2 -tOni | grep -c token)
+ if [ "$cnt1" != "$subflow_nr" -o "$cnt2" != "$subflow_nr" ]; then
+ echo "[fail] got $cnt1:$cnt2 subflows expected $subflow_nr"
+ fail_test
+ dump_stats=1
+ else
+ echo "[ ok ]"
+ fi
+
+ [ "${dump_stats}" = 1 ] && ( ss -N $ns1 -tOni ; ss -N $ns1 -tOni | grep token; ip -n $ns1 mptcp endpoint )
+}
+
chk_link_usage()
{
local ns=$1
@@ -2556,7 +2583,7 @@ fastclose_tests()
fi
}
-implicit_tests()
+endpoint_tests()
{
# userspace pm type prevents add_addr
if reset "implicit EP"; then
@@ -2578,6 +2605,23 @@ implicit_tests()
$ns2 10.0.2.2 id 1 flags signal
wait
fi
+
+ if reset "delete and re-add"; then
+ pm_nl_set_limits $ns1 1 1
+ pm_nl_set_limits $ns2 1 1
+ pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1 4 0 0 slow &
+
+ wait_mpj $ns2
+ pm_nl_del_endpoint $ns2 2 10.0.2.2
+ sleep 0.5
+ chk_subflow_nr needtitle "after delete" 1
+
+ pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow
+ wait_mpj $ns2
+ chk_subflow_nr "" "after re-add" 2
+ wait
+ fi
}
# [$1: error message]
@@ -2624,7 +2668,7 @@ all_tests_sorted=(
d@deny_join_id0_tests
m@fullmesh_tests
z@fastclose_tests
- I@implicit_tests
+ I@endpoint_tests
)
all_tests_args=""
diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c
index 59067f64b775..2672ac0b6d1f 100644
--- a/tools/testing/selftests/net/so_txtime.c
+++ b/tools/testing/selftests/net/so_txtime.c
@@ -421,7 +421,7 @@ static void usage(const char *progname)
"Options:\n"
" -4 only IPv4\n"
" -6 only IPv6\n"
- " -c <clock> monotonic (default) or tai\n"
+ " -c <clock> monotonic or tai (default)\n"
" -D <addr> destination IP address (server)\n"
" -S <addr> source IP address (client)\n"
" -r run rx mode\n"
@@ -475,7 +475,7 @@ static void parse_opts(int argc, char **argv)
cfg_rx = true;
break;
case 't':
- cfg_start_time_ns = strtol(optarg, NULL, 0);
+ cfg_start_time_ns = strtoll(optarg, NULL, 0);
break;
case 'm':
cfg_mark = strtol(optarg, NULL, 0);
diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh
new file mode 100755
index 000000000000..807b74c8fd80
--- /dev/null
+++ b/tools/testing/selftests/net/udpgro_frglist.sh
@@ -0,0 +1,101 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run a series of udpgro benchmarks
+
+readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
+
+cleanup() {
+ local -r jobs="$(jobs -p)"
+ local -r ns="$(ip netns list|grep $PEER_NS)"
+
+ [ -n "${jobs}" ] && kill -INT ${jobs} 2>/dev/null
+ [ -n "$ns" ] && ip netns del $ns 2>/dev/null
+}
+trap cleanup EXIT
+
+run_one() {
+ # use 'rx' as separator between sender args and receiver args
+ local -r all="$@"
+ local -r tx_args=${all%rx*}
+ local rx_args=${all#*rx}
+
+
+
+ ip netns add "${PEER_NS}"
+ ip -netns "${PEER_NS}" link set lo up
+ ip link add type veth
+ ip link set dev veth0 up
+ ip addr add dev veth0 192.168.1.2/24
+ ip addr add dev veth0 2001:db8::2/64 nodad
+
+ ip link set dev veth1 netns "${PEER_NS}"
+ ip -netns "${PEER_NS}" addr add dev veth1 192.168.1.1/24
+ ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad
+ ip -netns "${PEER_NS}" link set dev veth1 up
+ ip netns exec "${PEER_NS}" ethtool -K veth1 rx-gro-list on
+
+
+ ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp_dummy
+ tc -n "${PEER_NS}" qdisc add dev veth1 clsact
+ tc -n "${PEER_NS}" filter add dev veth1 ingress prio 4 protocol ipv6 bpf object-file ../bpf/nat6to4.o section schedcls/ingress6/nat_6 direct-action
+ tc -n "${PEER_NS}" filter add dev veth1 egress prio 4 protocol ip bpf object-file ../bpf/nat6to4.o section schedcls/egress4/snat4 direct-action
+ echo ${rx_args}
+ ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r &
+
+ # Hack: let bg programs complete the startup
+ sleep 0.1
+ ./udpgso_bench_tx ${tx_args}
+}
+
+run_in_netns() {
+ local -r args=$@
+ echo ${args}
+ ./in_netns.sh $0 __subprocess ${args}
+}
+
+run_udp() {
+ local -r args=$@
+
+ echo "udp gso - over veth touching data"
+ run_in_netns ${args} -u -S 0 rx -4 -v
+
+ echo "udp gso and gro - over veth touching data"
+ run_in_netns ${args} -S 0 rx -4 -G
+}
+
+run_tcp() {
+ local -r args=$@
+
+ echo "tcp - over veth touching data"
+ run_in_netns ${args} -t rx -4 -t
+}
+
+run_all() {
+ local -r core_args="-l 4"
+ local -r ipv4_args="${core_args} -4 -D 192.168.1.1"
+ local -r ipv6_args="${core_args} -6 -D 2001:db8::1"
+
+ echo "ipv6"
+ run_tcp "${ipv6_args}"
+ run_udp "${ipv6_args}"
+}
+
+if [ ! -f ../bpf/xdp_dummy.o ]; then
+ echo "Missing xdp_dummy helper. Build bpf selftest first"
+ exit -1
+fi
+
+if [ ! -f bpf/nat6to4.o ]; then
+ echo "Missing nat6to4 helper. Build bpfnat6to4.o selftest first"
+ exit -1
+fi
+
+if [[ $# -eq 0 ]]; then
+ run_all
+elif [[ $1 == "__subprocess" ]]; then
+ shift
+ run_one $@
+else
+ run_in_netns $@
+fi
diff --git a/tools/testing/selftests/pid_namespace/Makefile b/tools/testing/selftests/pid_namespace/Makefile
index dcaefa224ca0..edafaca1aeb3 100644
--- a/tools/testing/selftests/pid_namespace/Makefile
+++ b/tools/testing/selftests/pid_namespace/Makefile
@@ -1,8 +1,8 @@
# SPDX-License-Identifier: GPL-2.0
CFLAGS += -g -I../../../../usr/include/
-TEST_GEN_PROGS := regression_enomem
+TEST_GEN_PROGS = regression_enomem
-include ../lib.mk
+LOCAL_HDRS += $(selfdir)/pidfd/pidfd.h
-$(OUTPUT)/regression_enomem: regression_enomem.c ../pidfd/pidfd.h
+include ../lib.mk
diff --git a/tools/testing/selftests/pidfd/pidfd_wait.c b/tools/testing/selftests/pidfd/pidfd_wait.c
index 17999e082aa7..070c1c876df1 100644
--- a/tools/testing/selftests/pidfd/pidfd_wait.c
+++ b/tools/testing/selftests/pidfd/pidfd_wait.c
@@ -95,7 +95,6 @@ TEST(wait_states)
.flags = CLONE_PIDFD | CLONE_PARENT_SETTID,
.exit_signal = SIGCHLD,
};
- int ret;
pid_t pid;
siginfo_t info = {
.si_signo = 0,
diff --git a/tools/testing/selftests/proc/proc-pid-vm.c b/tools/testing/selftests/proc/proc-pid-vm.c
index 18a3bde8bc96..28604c9f805c 100644
--- a/tools/testing/selftests/proc/proc-pid-vm.c
+++ b/tools/testing/selftests/proc/proc-pid-vm.c
@@ -46,6 +46,8 @@
#include <sys/time.h>
#include <sys/resource.h>
+#include "../kselftest.h"
+
static inline long sys_execveat(int dirfd, const char *pathname, char **argv, char **envp, int flags)
{
return syscall(SYS_execveat, dirfd, pathname, argv, envp, flags);
@@ -368,7 +370,7 @@ int main(void)
};
int i;
- for (i = 0; i < sizeof(S)/sizeof(S[0]); i++) {
+ for (i = 0; i < ARRAY_SIZE(S); i++) {
assert(memmem(buf, rv, S[i], strlen(S[i])));
}
@@ -417,7 +419,7 @@ int main(void)
};
int i;
- for (i = 0; i < sizeof(S)/sizeof(S[0]); i++) {
+ for (i = 0; i < ARRAY_SIZE(S); i++) {
assert(memmem(buf, rv, S[i], strlen(S[i])));
}
}
diff --git a/tools/testing/selftests/rcutorture/bin/functions.sh b/tools/testing/selftests/rcutorture/bin/functions.sh
index c35ba24f994c..66d0414d8e4b 100644
--- a/tools/testing/selftests/rcutorture/bin/functions.sh
+++ b/tools/testing/selftests/rcutorture/bin/functions.sh
@@ -301,7 +301,7 @@ specify_qemu_cpus () {
echo $2 -smp $3
;;
qemu-system-ppc64)
- nt="`lscpu | grep '^NUMA node0' | sed -e 's/^[^,]*,\([0-9]*\),.*$/\1/'`"
+ nt="`lscpu | sed -n 's/^Thread(s) per core:\s*//p'`"
echo $2 -smp cores=`expr \( $3 + $nt - 1 \) / $nt`,threads=$nt
;;
esac
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
index 5f682fc892dd..88983cba7956 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
@@ -36,7 +36,7 @@ do
then
egrep "error:|warning:|^ld: .*undefined reference to" < $i > $i.diags
files="$files $i.diags $i"
- elif ! test -f ${scenariobasedir}/vmlinux
+ elif ! test -f ${scenariobasedir}/vmlinux && ! test -f "${rundir}/re-run"
then
echo No ${scenariobasedir}/vmlinux file > $i.diags
files="$files $i.diags $i"
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
index 0a5419982ab3..0789c5606d2a 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
@@ -33,7 +33,12 @@ do
TORTURE_SUITE="`cat $i/../torture_suite`"
configfile=`echo $i | sed -e 's,^.*/,,'`
rm -f $i/console.log.*.diags
- kvm-recheck-${TORTURE_SUITE}.sh $i
+ case "${TORTURE_SUITE}" in
+ X*)
+ ;;
+ *)
+ kvm-recheck-${TORTURE_SUITE}.sh $i
+ esac
if test -f "$i/qemu-retval" && test "`cat $i/qemu-retval`" -ne 0 && test "`cat $i/qemu-retval`" -ne 137
then
echo QEMU error, output:
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
index 8c4c1e4792d0..0ff59bd8b640 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
@@ -138,14 +138,14 @@ chmod +x $T/bin/kvm-remote-*.sh
# Check first to avoid the need for cleanup for system-name typos
for i in $systems
do
- ncpus="`ssh $i getconf _NPROCESSORS_ONLN 2> /dev/null`"
- echo $i: $ncpus CPUs " " `date` | tee -a "$oldrun/remote-log"
+ ncpus="`ssh -o BatchMode=yes $i getconf _NPROCESSORS_ONLN 2> /dev/null`"
ret=$?
if test "$ret" -ne 0
then
echo System $i unreachable, giving up. | tee -a "$oldrun/remote-log"
exit 4
fi
+ echo $i: $ncpus CPUs " " `date` | tee -a "$oldrun/remote-log"
done
# Download and expand the tarball on all systems.
@@ -153,14 +153,14 @@ echo Build-products tarball: `du -h $T/binres.tgz` | tee -a "$oldrun/remote-log"
for i in $systems
do
echo Downloading tarball to $i `date` | tee -a "$oldrun/remote-log"
- cat $T/binres.tgz | ssh $i "cd /tmp; tar -xzf -"
+ cat $T/binres.tgz | ssh -o BatchMode=yes $i "cd /tmp; tar -xzf -"
ret=$?
tries=0
while test "$ret" -ne 0
do
echo Unable to download $T/binres.tgz to system $i, waiting and then retrying. $tries prior retries. | tee -a "$oldrun/remote-log"
sleep 60
- cat $T/binres.tgz | ssh $i "cd /tmp; tar -xzf -"
+ cat $T/binres.tgz | ssh -o BatchMode=yes $i "cd /tmp; tar -xzf -"
ret=$?
if test "$ret" -ne 0
then
@@ -185,7 +185,7 @@ checkremotefile () {
while :
do
- ssh $1 "test -f \"$2\""
+ ssh -o BatchMode=yes $1 "test -f \"$2\""
ret=$?
if test "$ret" -eq 255
then
@@ -228,7 +228,7 @@ startbatches () {
then
continue # System still running last test, skip.
fi
- ssh "$i" "cd \"$resdir/$ds\"; touch remote.run; PATH=\"$T/bin:$PATH\" nohup kvm-remote-$curbatch.sh > kvm-remote-$curbatch.sh.out 2>&1 &" 1>&2
+ ssh -o BatchMode=yes "$i" "cd \"$resdir/$ds\"; touch remote.run; PATH=\"$T/bin:$PATH\" nohup kvm-remote-$curbatch.sh > kvm-remote-$curbatch.sh.out 2>&1 &" 1>&2
ret=$?
if test "$ret" -ne 0
then
@@ -267,7 +267,7 @@ do
sleep 30
done
echo " ---" Collecting results from $i `date` | tee -a "$oldrun/remote-log"
- ( cd "$oldrun"; ssh $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - )
+ ( cd "$oldrun"; ssh -o BatchMode=yes $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - )
done
( kvm-end-run-stats.sh "$oldrun" "$starttime"; echo $? > $T/exitcode ) | tee -a "$oldrun/remote-log"
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 55b2c1533282..263e16aeca0e 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -44,6 +44,7 @@ TORTURE_KCONFIG_KASAN_ARG=""
TORTURE_KCONFIG_KCSAN_ARG=""
TORTURE_KMAKE_ARG=""
TORTURE_QEMU_MEM=512
+torture_qemu_mem_default=1
TORTURE_REMOTE=
TORTURE_SHUTDOWN_GRACE=180
TORTURE_SUITE=rcu
@@ -86,7 +87,7 @@ usage () {
echo " --remote"
echo " --results absolute-pathname"
echo " --shutdown-grace seconds"
- echo " --torture lock|rcu|rcuscale|refscale|scf"
+ echo " --torture lock|rcu|rcuscale|refscale|scf|X*"
echo " --trust-make"
exit 1
}
@@ -180,6 +181,10 @@ do
;;
--kasan)
TORTURE_KCONFIG_KASAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KASAN=y"; export TORTURE_KCONFIG_KASAN_ARG
+ if test -n "$torture_qemu_mem_default"
+ then
+ TORTURE_QEMU_MEM=2G
+ fi
;;
--kconfig|--kconfigs)
checkarg --kconfig "(Kconfig options)" $# "$2" '^CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\( CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\)*$' '^error$'
@@ -202,6 +207,7 @@ do
--memory)
checkarg --memory "(memory size)" $# "$2" '^[0-9]\+[MG]\?$' error
TORTURE_QEMU_MEM=$2
+ torture_qemu_mem_default=
shift
;;
--no-initrd)
@@ -231,7 +237,7 @@ do
shift
;;
--torture)
- checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuscale\|refscale\|scf\)$' '^--'
+ checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuscale\|refscale\|scf\|X.*\)$' '^--'
TORTURE_SUITE=$2
TORTURE_MOD="`echo $TORTURE_SUITE | sed -e 's/^\(lock\|rcu\|scf\)$/\1torture/'`"
shift
diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index bfe09e2829c8..d477618e7261 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -54,6 +54,7 @@ do_kvfree=yes
do_kasan=yes
do_kcsan=no
do_clocksourcewd=yes
+do_rt=yes
# doyesno - Helper function for yes/no arguments
function doyesno () {
@@ -82,6 +83,7 @@ usage () {
echo " --do-rcuscale / --do-no-rcuscale"
echo " --do-rcutorture / --do-no-rcutorture"
echo " --do-refscale / --do-no-refscale"
+ echo " --do-rt / --do-no-rt"
echo " --do-scftorture / --do-no-scftorture"
echo " --duration [ <minutes> | <hours>h | <days>d ]"
echo " --kcsan-kmake-arg kernel-make-arguments"
@@ -118,6 +120,7 @@ do
do_scftorture=yes
do_rcuscale=yes
do_refscale=yes
+ do_rt=yes
do_kvfree=yes
do_kasan=yes
do_kcsan=yes
@@ -148,6 +151,7 @@ do
do_scftorture=no
do_rcuscale=no
do_refscale=no
+ do_rt=no
do_kvfree=no
do_kasan=no
do_kcsan=no
@@ -162,6 +166,9 @@ do
--do-refscale|--do-no-refscale)
do_refscale=`doyesno "$1" --do-refscale`
;;
+ --do-rt|--do-no-rt)
+ do_rt=`doyesno "$1" --do-rt`
+ ;;
--do-scftorture|--do-no-scftorture)
do_scftorture=`doyesno "$1" --do-scftorture`
;;
@@ -322,6 +329,7 @@ then
echo " --- make clean" > "$amcdir/Make.out" 2>&1
make -j$MAKE_ALLOTED_CPUS clean >> "$amcdir/Make.out" 2>&1
echo " --- make allmodconfig" >> "$amcdir/Make.out" 2>&1
+ cp .config $amcdir
make -j$MAKE_ALLOTED_CPUS allmodconfig >> "$amcdir/Make.out" 2>&1
echo " --- make " >> "$amcdir/Make.out" 2>&1
make -j$MAKE_ALLOTED_CPUS >> "$amcdir/Make.out" 2>&1
@@ -350,8 +358,19 @@ fi
if test "$do_scftorture" = "yes"
then
- torture_bootargs="scftorture.nthreads=$HALF_ALLOTED_CPUS torture.disable_onoff_at_boot"
- torture_set "scftorture" tools/testing/selftests/rcutorture/bin/kvm.sh --torture scf --allcpus --duration "$duration_scftorture" --configs "$configs_scftorture" --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 1G --trust-make
+ torture_bootargs="scftorture.nthreads=$HALF_ALLOTED_CPUS torture.disable_onoff_at_boot csdlock_debug=1"
+ torture_set "scftorture" tools/testing/selftests/rcutorture/bin/kvm.sh --torture scf --allcpus --duration "$duration_scftorture" --configs "$configs_scftorture" --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 2G --trust-make
+fi
+
+if test "$do_rt" = "yes"
+then
+ # With all post-boot grace periods forced to normal.
+ torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 rcupdate.rcu_normal=1"
+ torture_set "rcurttorture" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration "$duration_rcutorture" --configs "TREE03" --trust-make
+
+ # With all post-boot grace periods forced to expedited.
+ torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 rcupdate.rcu_expedited=1"
+ torture_set "rcurttorture-exp" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration "$duration_rcutorture" --configs "TREE03" --trust-make
fi
if test "$do_refscale" = yes
@@ -363,7 +382,7 @@ fi
for prim in $primlist
do
torture_bootargs="refscale.scale_type="$prim" refscale.nreaders=$HALF_ALLOTED_CPUS refscale.loops=10000 refscale.holdoff=20 torture.disable_onoff_at_boot"
- torture_set "refscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture refscale --allcpus --duration 5 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --bootargs "verbose_batched=$VERBOSE_BATCH_CPUS torture.verbose_sleep_frequency=8 torture.verbose_sleep_duration=$VERBOSE_BATCH_CPUS" --trust-make
+ torture_set "refscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture refscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --bootargs "verbose_batched=$VERBOSE_BATCH_CPUS torture.verbose_sleep_frequency=8 torture.verbose_sleep_duration=$VERBOSE_BATCH_CPUS" --trust-make
done
if test "$do_rcuscale" = yes
@@ -375,13 +394,13 @@ fi
for prim in $primlist
do
torture_bootargs="rcuscale.scale_type="$prim" rcuscale.nwriters=$HALF_ALLOTED_CPUS rcuscale.holdoff=20 torture.disable_onoff_at_boot"
- torture_set "rcuscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 5 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --trust-make
+ torture_set "rcuscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --trust-make
done
if test "$do_kvfree" = "yes"
then
torture_bootargs="rcuscale.kfree_rcu_test=1 rcuscale.kfree_nthreads=16 rcuscale.holdoff=20 rcuscale.kfree_loops=10000 torture.disable_onoff_at_boot"
- torture_set "rcuscale-kvfree" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 10 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 1G --trust-make
+ torture_set "rcuscale-kvfree" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 10 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 2G --trust-make
fi
if test "$do_clocksourcewd" = "yes"
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/RUDE01 b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01
index 7093422050f6..6fd6acb94518 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/RUDE01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01
@@ -8,3 +8,5 @@ CONFIG_DEBUG_LOCK_ALLOC=y
CONFIG_PROVE_LOCKING=y
#CHECK#CONFIG_PROVE_RCU=y
CONFIG_RCU_EXPERT=y
+CONFIG_FORCE_TASKS_RUDE_RCU=y
+#CHECK#CONFIG_TASKS_RUDE_RCU=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N
index 2da8b49589a0..07f5e0a70ae7 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N
@@ -6,3 +6,5 @@ CONFIG_PREEMPT_NONE=y
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=n
#CHECK#CONFIG_RCU_EXPERT=n
+CONFIG_KPROBES=n
+CONFIG_FTRACE=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01
index 3ca112444ce7..d84801b9a7ae 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01
@@ -7,4 +7,5 @@ CONFIG_PREEMPT=y
CONFIG_DEBUG_LOCK_ALLOC=y
CONFIG_PROVE_LOCKING=y
#CHECK#CONFIG_PROVE_RCU=y
+CONFIG_TASKS_RCU=y
CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS02 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02
index ad2be91e5ee7..2f9fcffff5ae 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS02
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02
@@ -2,3 +2,7 @@ CONFIG_SMP=n
CONFIG_PREEMPT_NONE=y
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
+#CHECK#CONFIG_TASKS_RCU=y
+CONFIG_FORCE_TASKS_RCU=y
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS02.boot b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02.boot
index cd2a188eeb6d..b9b6d67cbc5f 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS02.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02.boot
@@ -1 +1,2 @@
rcutorture.torture_type=tasks
+rcutorture.stat_interval=60
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03
index dc02083803ce..dea26c568678 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03
@@ -7,3 +7,5 @@ CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=n
CONFIG_NO_HZ_FULL=y
#CHECK#CONFIG_RCU_EXPERT=n
+CONFIG_TASKS_RCU=y
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
index e4d74e5fc1d0..85b407467454 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
@@ -4,8 +4,11 @@ CONFIG_HOTPLUG_CPU=y
CONFIG_PREEMPT_NONE=y
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
CONFIG_DEBUG_LOCK_ALLOC=n
CONFIG_PROVE_LOCKING=n
#CHECK#CONFIG_PROVE_RCU=n
+CONFIG_FORCE_TASKS_TRACE_RCU=y
+#CHECK#CONFIG_TASKS_TRACE_RCU=y
CONFIG_TASKS_TRACE_RCU_READ_MB=y
CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE02 b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02
index 77541eeb4e9f..093ea6e8e65c 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TRACE02
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02
@@ -7,5 +7,7 @@ CONFIG_PREEMPT=y
CONFIG_DEBUG_LOCK_ALLOC=y
CONFIG_PROVE_LOCKING=y
#CHECK#CONFIG_PROVE_RCU=y
+CONFIG_FORCE_TASKS_TRACE_RCU=y
+#CHECK#CONFIG_TASKS_TRACE_RCU=y
CONFIG_TASKS_TRACE_RCU_READ_MB=n
CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 b/tools/testing/selftests/rcutorture/configs/rcu/TREE04
index 22ad0261728d..ae395981b5e5 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04
@@ -1,8 +1,9 @@
CONFIG_SMP=y
CONFIG_NR_CPUS=8
-CONFIG_PREEMPT_NONE=y
-CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=y
CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
#CHECK#CONFIG_TREE_RCU=y
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 b/tools/testing/selftests/rcutorture/configs/rcu/TREE07
index 2789b47e4ecd..d30922d8c883 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE07
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07
@@ -3,6 +3,7 @@ CONFIG_NR_CPUS=16
CONFIG_PREEMPT_NONE=y
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
#CHECK#CONFIG_TREE_RCU=y
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE09 b/tools/testing/selftests/rcutorture/configs/rcu/TREE09
index 8523a7515cbf..fc45645bb5f4 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE09
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE09
@@ -13,3 +13,5 @@ CONFIG_DEBUG_LOCK_ALLOC=n
CONFIG_RCU_BOOST=n
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
#CHECK#CONFIG_RCU_EXPERT=n
+CONFIG_KPROBES=n
+CONFIG_FTRACE=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE10 b/tools/testing/selftests/rcutorture/configs/rcu/TREE10
index 4a00539bfdd7..a323d8948b7c 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE10
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE10
@@ -3,6 +3,7 @@ CONFIG_NR_CPUS=56
CONFIG_PREEMPT_NONE=y
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
#CHECK#CONFIG_TREE_RCU=y
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh
index effa415f9b92..e2bc99c785e7 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh
+++ b/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh
@@ -9,7 +9,7 @@
# rcutorture_param_n_barrier_cbs bootparam-string
#
-# Adds n_barrier_cbs rcutorture module parameter to kernels having it.
+# Adds n_barrier_cbs rcutorture module parameter if not already specified.
rcutorture_param_n_barrier_cbs () {
if echo $1 | grep -q "rcutorture\.n_barrier_cbs"
then
@@ -30,13 +30,25 @@ rcutorture_param_onoff () {
fi
}
+# rcutorture_param_stat_interval bootparam-string
+#
+# Adds stat_interval rcutorture module parameter if not already specified.
+rcutorture_param_stat_interval () {
+ if echo $1 | grep -q "rcutorture\.stat_interval"
+ then
+ :
+ else
+ echo rcutorture.stat_interval=15
+ fi
+}
+
# per_version_boot_params bootparam-string config-file seconds
#
# Adds per-version torture-module parameters to kernels supporting them.
per_version_boot_params () {
echo $1 `rcutorture_param_onoff "$1" "$2"` \
`rcutorture_param_n_barrier_cbs "$1"` \
- rcutorture.stat_interval=15 \
+ `rcutorture_param_stat_interval "$1"` \
rcutorture.shutdown_secs=$3 \
rcutorture.test_no_idle_hz=1 \
rcutorture.verbose=1
diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon b/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon
index 90942bb5bebc..6a00157bee5b 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon
@@ -1,5 +1,6 @@
CONFIG_RCU_SCALE_TEST=y
CONFIG_PRINTK_TIME=y
-CONFIG_TASKS_RCU_GENERIC=y
-CONFIG_TASKS_RCU=y
-CONFIG_TASKS_TRACE_RCU=y
+CONFIG_FORCE_TASKS_RCU=y
+#CHECK#CONFIG_TASKS_RCU=y
+CONFIG_FORCE_TASKS_TRACE_RCU=y
+#CHECK#CONFIG_TASKS_TRACE_RCU=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/TREE b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE
index f110d9ffbe4c..b10706fd03a4 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuscale/TREE
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE
@@ -16,3 +16,5 @@ CONFIG_RCU_BOOST=n
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
CONFIG_RCU_EXPERT=y
CONFIG_RCU_TRACE=y
+CONFIG_KPROBES=n
+CONFIG_FTRACE=n
diff --git a/tools/testing/selftests/rcutorture/configs/refscale/CFcommon b/tools/testing/selftests/rcutorture/configs/refscale/CFcommon
index a98b58b54bb1..fbea3b13baba 100644
--- a/tools/testing/selftests/rcutorture/configs/refscale/CFcommon
+++ b/tools/testing/selftests/rcutorture/configs/refscale/CFcommon
@@ -1,2 +1,6 @@
CONFIG_RCU_REF_SCALE_TEST=y
CONFIG_PRINTK_TIME=y
+CONFIG_FORCE_TASKS_RCU=y
+#CHECK#CONFIG_TASKS_RCU=y
+CONFIG_FORCE_TASKS_TRACE_RCU=y
+#CHECK#CONFIG_TASKS_TRACE_RCU=y
diff --git a/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT b/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT
index 7f06838a91e6..ef2b501a6971 100644
--- a/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT
+++ b/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT
@@ -15,3 +15,5 @@ CONFIG_PROVE_LOCKING=n
CONFIG_RCU_BOOST=n
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
CONFIG_RCU_EXPERT=y
+CONFIG_KPROBES=n
+CONFIG_FTRACE=n
diff --git a/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT b/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT
index b8429d6c6ebc..3a59346b3de7 100644
--- a/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT
+++ b/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT
@@ -7,3 +7,5 @@ CONFIG_NO_HZ_IDLE=n
CONFIG_NO_HZ_FULL=y
CONFIG_DEBUG_LOCK_ALLOC=n
CONFIG_PROVE_LOCKING=n
+CONFIG_KPROBES=n
+CONFIG_FTRACE=n
diff --git a/tools/testing/selftests/rcutorture/configs/scf/PREEMPT b/tools/testing/selftests/rcutorture/configs/scf/PREEMPT
index ae4992b141b0..cb37e08037d6 100644
--- a/tools/testing/selftests/rcutorture/configs/scf/PREEMPT
+++ b/tools/testing/selftests/rcutorture/configs/scf/PREEMPT
@@ -7,3 +7,4 @@ CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
CONFIG_DEBUG_LOCK_ALLOC=y
CONFIG_PROVE_LOCKING=y
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh
index d3d9e35d3d55..2d949e58f5a5 100644
--- a/tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh
+++ b/tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh
@@ -25,6 +25,5 @@ per_version_boot_params () {
echo $1 `scftorture_param_onoff "$1" "$2"` \
scftorture.stat_interval=15 \
scftorture.shutdown_secs=$3 \
- scftorture.verbose=1 \
- scf
+ scftorture.verbose=1
}
diff --git a/tools/testing/selftests/resctrl/Makefile b/tools/testing/selftests/resctrl/Makefile
index 6bcee2ec91a9..73d53257df42 100644
--- a/tools/testing/selftests/resctrl/Makefile
+++ b/tools/testing/selftests/resctrl/Makefile
@@ -1,17 +1,10 @@
-CC = $(CROSS_COMPILE)gcc
-CFLAGS = -g -Wall -O2 -D_FORTIFY_SOURCE=2
-SRCS=$(wildcard *.c)
-OBJS=$(SRCS:.c=.o)
-
-all: resctrl_tests
+# SPDX-License-Identifier: GPL-2.0
-$(OBJS): $(SRCS)
- $(CC) $(CFLAGS) -c $(SRCS)
+CFLAGS = -g -Wall -O2 -D_FORTIFY_SOURCE=2
+CFLAGS += $(KHDR_INCLUDES)
-resctrl_tests: $(OBJS)
- $(CC) $(CFLAGS) -o $@ $^
+TEST_GEN_PROGS := resctrl_tests
-.PHONY: clean
+include ../lib.mk
-clean:
- $(RM) $(OBJS) resctrl_tests
+$(OUTPUT)/resctrl_tests: $(wildcard *.c)
diff --git a/tools/testing/selftests/resctrl/README b/tools/testing/selftests/resctrl/README
index 3d2bbd4fa3aa..8d11ce7c2ee5 100644
--- a/tools/testing/selftests/resctrl/README
+++ b/tools/testing/selftests/resctrl/README
@@ -12,24 +12,49 @@ Allocation test on Intel RDT hardware. More tests will be added in the future.
And the test suit can be extended to cover AMD QoS and ARM MPAM hardware
as well.
+resctrl_tests can be run with or without kselftest framework.
+
+WITH KSELFTEST FRAMEWORK
+=======================
+
BUILD
-----
-Run "make" to build executable file "resctrl_tests".
+Build executable file "resctrl_tests" from top level directory of the kernel source:
+ $ make -C tools/testing/selftests TARGETS=resctrl
RUN
---
-To use resctrl_tests, root or sudoer privileges are required. This is because
-the test needs to mount resctrl file system and change contents in the file
-system.
+Run resctrl_tests as sudo or root since the test needs to mount resctrl file
+system and change contents in the file system.
+Using kselftest framework will run all supported tests within resctrl_tests:
+
+ $ sudo make -C tools/testing/selftests TARGETS=resctrl run_tests
+
+More details about kselftest framework can be found in
+Documentation/dev-tools/kselftest.rst.
+
+WITHOUT KSELFTEST FRAMEWORK
+===========================
+
+BUILD
+-----
+
+Build executable file "resctrl_tests" from this directory(tools/testing/selftests/resctrl/):
+ $ make
+
+RUN
+---
+Run resctrl_tests as sudo or root since the test needs to mount resctrl file
+system and change contents in the file system.
Executing the test without any parameter will run all supported tests:
- sudo ./resctrl_tests
+ $ sudo ./resctrl_tests
OVERVIEW OF EXECUTION
----------------------
+=====================
A test case has four stages:
@@ -41,7 +66,7 @@ A test case has four stages:
- teardown: umount resctrl and clear temporary files.
ARGUMENTS
----------
+=========
Parameter '-h' shows usage information.
diff --git a/tools/testing/selftests/resctrl/cat_test.c b/tools/testing/selftests/resctrl/cat_test.c
index cd4f68388e0f..1c5e90c63254 100644
--- a/tools/testing/selftests/resctrl/cat_test.c
+++ b/tools/testing/selftests/resctrl/cat_test.c
@@ -89,7 +89,7 @@ static int check_results(struct resctrl_val_param *param)
return show_cache_info(sum_llc_perf_miss, no_of_bits, param->span / 64,
MAX_DIFF, MAX_DIFF_PERCENT, NUM_OF_RUNS,
- !is_amd, false);
+ get_vendor() == ARCH_INTEL, false);
}
void cat_test_cleanup(void)
diff --git a/tools/testing/selftests/resctrl/fill_buf.c b/tools/testing/selftests/resctrl/fill_buf.c
index 51e5cf22632f..56ccbeae0638 100644
--- a/tools/testing/selftests/resctrl/fill_buf.c
+++ b/tools/testing/selftests/resctrl/fill_buf.c
@@ -121,8 +121,10 @@ static int fill_cache_read(unsigned char *start_ptr, unsigned char *end_ptr,
/* Consume read result so that reading memory is not optimized out. */
fp = fopen("/dev/null", "w");
- if (!fp)
+ if (!fp) {
perror("Unable to write to /dev/null");
+ return -1;
+ }
fprintf(fp, "Sum: %d ", ret);
fclose(fp);
diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h
index 1ad10c47e31d..f0ded31fb3c7 100644
--- a/tools/testing/selftests/resctrl/resctrl.h
+++ b/tools/testing/selftests/resctrl/resctrl.h
@@ -34,6 +34,9 @@
#define L3_MON_PATH "/sys/fs/resctrl/info/L3_MON"
#define L3_MON_FEATURES_PATH "/sys/fs/resctrl/info/L3_MON/mon_features"
+#define ARCH_INTEL 1
+#define ARCH_AMD 2
+
#define PARENT_EXIT(err_msg) \
do { \
perror(err_msg); \
@@ -75,8 +78,8 @@ struct resctrl_val_param {
extern pid_t bm_pid, ppid;
extern char llc_occup_path[1024];
-extern bool is_amd;
+int get_vendor(void);
bool check_resctrlfs_support(void);
int filter_dmesg(void);
int remount_resctrlfs(bool mum_resctrlfs);
diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c
index 973f09a66e1e..df0d8d8526fc 100644
--- a/tools/testing/selftests/resctrl/resctrl_tests.c
+++ b/tools/testing/selftests/resctrl/resctrl_tests.c
@@ -13,25 +13,41 @@
#define BENCHMARK_ARGS 64
#define BENCHMARK_ARG_SIZE 64
-bool is_amd;
-
-void detect_amd(void)
+static int detect_vendor(void)
{
FILE *inf = fopen("/proc/cpuinfo", "r");
+ int vendor_id = 0;
+ char *s = NULL;
char *res;
if (!inf)
- return;
+ return vendor_id;
res = fgrep(inf, "vendor_id");
- if (res) {
- char *s = strchr(res, ':');
+ if (res)
+ s = strchr(res, ':');
+
+ if (s && !strcmp(s, ": GenuineIntel\n"))
+ vendor_id = ARCH_INTEL;
+ else if (s && !strcmp(s, ": AuthenticAMD\n"))
+ vendor_id = ARCH_AMD;
- is_amd = s && !strcmp(s, ": AuthenticAMD\n");
- free(res);
- }
fclose(inf);
+ free(res);
+ return vendor_id;
+}
+
+int get_vendor(void)
+{
+ static int vendor = -1;
+
+ if (vendor == -1)
+ vendor = detect_vendor();
+ if (vendor == 0)
+ ksft_print_msg("Can not get vendor info...\n");
+
+ return vendor;
}
static void cmd_help(void)
@@ -70,6 +86,8 @@ static void run_mbm_test(bool has_ben, char **benchmark_cmd, int span,
sprintf(benchmark_cmd[5], "%s", MBA_STR);
res = mbm_bw_change(span, cpu_no, bw_report, benchmark_cmd);
ksft_test_result(!res, "MBM: bw change\n");
+ if ((get_vendor() == ARCH_INTEL) && res)
+ ksft_print_msg("Intel MBM may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n");
mbm_test_cleanup();
}
@@ -106,6 +124,8 @@ static void run_cmt_test(bool has_ben, char **benchmark_cmd, int cpu_no)
sprintf(benchmark_cmd[5], "%s", CMT_STR);
res = cmt_resctrl_val(cpu_no, 5, benchmark_cmd);
ksft_test_result(!res, "CMT: test\n");
+ if ((get_vendor() == ARCH_INTEL) && res)
+ ksft_print_msg("Intel CMT may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n");
cmt_test_cleanup();
}
@@ -205,10 +225,7 @@ int main(int argc, char **argv)
* 2. We execute perf commands
*/
if (geteuid() != 0)
- return ksft_exit_fail_msg("Not running as root, abort testing.\n");
-
- /* Detect AMD vendor */
- detect_amd();
+ return ksft_exit_skip("Not running as root. Skipping...\n");
if (has_ben) {
/* Extract benchmark command from command line. */
@@ -235,16 +252,16 @@ int main(int argc, char **argv)
sprintf(bm_type, "fill_buf");
if (!check_resctrlfs_support())
- return ksft_exit_fail_msg("resctrl FS does not exist\n");
+ return ksft_exit_skip("resctrl FS does not exist. Enable X86_CPU_RESCTRL config option.\n");
filter_dmesg();
ksft_set_plan(tests ? : 4);
- if (!is_amd && mbm_test)
+ if ((get_vendor() == ARCH_INTEL) && mbm_test)
run_mbm_test(has_ben, benchmark_cmd, span, cpu_no, bw_report);
- if (!is_amd && mba_test)
+ if ((get_vendor() == ARCH_INTEL) && mba_test)
run_mba_test(has_ben, benchmark_cmd, span, cpu_no, bw_report);
if (cmt_test)
diff --git a/tools/testing/selftests/resctrl/resctrl_val.c b/tools/testing/selftests/resctrl/resctrl_val.c
index 95224345c78e..b32b96356ec7 100644
--- a/tools/testing/selftests/resctrl/resctrl_val.c
+++ b/tools/testing/selftests/resctrl/resctrl_val.c
@@ -678,6 +678,7 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param)
sigemptyset(&sigact.sa_mask);
sigact.sa_flags = SA_SIGINFO;
if (sigaction(SIGINT, &sigact, NULL) ||
+ sigaction(SIGTERM, &sigact, NULL) ||
sigaction(SIGHUP, &sigact, NULL)) {
perror("# sigaction");
ret = errno;
diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c
index 5f5a166ade60..6f543e470ad4 100644
--- a/tools/testing/selftests/resctrl/resctrlfs.c
+++ b/tools/testing/selftests/resctrl/resctrlfs.c
@@ -106,7 +106,7 @@ int get_resource_id(int cpu_no, int *resource_id)
char phys_pkg_path[1024];
FILE *fp;
- if (is_amd)
+ if (get_vendor() == ARCH_AMD)
sprintf(phys_pkg_path, "%s%d/cache/index3/id",
PHYS_ID_PATH, cpu_no);
else
diff --git a/tools/testing/selftests/resctrl/settings b/tools/testing/selftests/resctrl/settings
new file mode 100644
index 000000000000..a383f3d4565b
--- /dev/null
+++ b/tools/testing/selftests/resctrl/settings
@@ -0,0 +1,3 @@
+# If running time is longer than 120 seconds when new tests are added in
+# the future, increase timeout here.
+timeout=120
diff --git a/tools/testing/selftests/seccomp/Makefile b/tools/testing/selftests/seccomp/Makefile
index 585f7a0c10cb..f017c382c036 100644
--- a/tools/testing/selftests/seccomp/Makefile
+++ b/tools/testing/selftests/seccomp/Makefile
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
CFLAGS += -Wl,-no-as-needed -Wall -isystem ../../../../usr/include/
LDFLAGS += -lpthread
+LDLIBS += -lcap
TEST_GEN_PROGS := seccomp_bpf seccomp_benchmark
include ../lib.mk
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 9d126d7fabdb..136df5b76319 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -46,6 +46,7 @@
#include <sys/ioctl.h>
#include <linux/kcmp.h>
#include <sys/resource.h>
+#include <sys/capability.h>
#include <unistd.h>
#include <sys/syscall.h>
@@ -59,6 +60,8 @@
#define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__)
#endif
+#define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
+
#ifndef PR_SET_PTRACER
# define PR_SET_PTRACER 0x59616d61
#endif
@@ -268,6 +271,10 @@ struct seccomp_notif_addfd_big {
#define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4)
#endif
+#ifndef SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV
+#define SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV (1UL << 5)
+#endif
+
#ifndef seccomp
int seccomp(unsigned int op, unsigned int flags, void *args)
{
@@ -955,7 +962,7 @@ TEST(ERRNO_valid)
ASSERT_EQ(0, ret);
EXPECT_EQ(parent, syscall(__NR_getppid));
- EXPECT_EQ(-1, read(0, NULL, 0));
+ EXPECT_EQ(-1, read(-1, NULL, 0));
EXPECT_EQ(E2BIG, errno);
}
@@ -974,7 +981,7 @@ TEST(ERRNO_zero)
EXPECT_EQ(parent, syscall(__NR_getppid));
/* "errno" of 0 is ok. */
- EXPECT_EQ(0, read(0, NULL, 0));
+ EXPECT_EQ(0, read(-1, NULL, 0));
}
/*
@@ -995,7 +1002,7 @@ TEST(ERRNO_capped)
ASSERT_EQ(0, ret);
EXPECT_EQ(parent, syscall(__NR_getppid));
- EXPECT_EQ(-1, read(0, NULL, 0));
+ EXPECT_EQ(-1, read(-1, NULL, 0));
EXPECT_EQ(4095, errno);
}
@@ -1026,7 +1033,7 @@ TEST(ERRNO_order)
ASSERT_EQ(0, ret);
EXPECT_EQ(parent, syscall(__NR_getppid));
- EXPECT_EQ(-1, read(0, NULL, 0));
+ EXPECT_EQ(-1, read(-1, NULL, 0));
EXPECT_EQ(12, errno);
}
@@ -2623,7 +2630,7 @@ void *tsync_sibling(void *data)
ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
if (!ret)
return (void *)SIBLING_EXIT_NEWPRIVS;
- read(0, NULL, 0);
+ read(-1, NULL, 0);
return (void *)SIBLING_EXIT_UNKILLED;
}
@@ -3742,7 +3749,10 @@ TEST(user_notification_fault_recv)
struct seccomp_notif req = {};
struct seccomp_notif_resp resp = {};
- ASSERT_EQ(unshare(CLONE_NEWUSER), 0);
+ ASSERT_EQ(unshare(CLONE_NEWUSER), 0) {
+ if (errno == EINVAL)
+ SKIP(return, "kernel missing CLONE_NEWUSER support");
+ }
listener = user_notif_syscall(__NR_getppid,
SECCOMP_FILTER_FLAG_NEW_LISTENER);
@@ -4231,6 +4241,421 @@ TEST(user_notification_addfd_rlimit)
close(memfd);
}
+/* Make sure PTRACE_O_SUSPEND_SECCOMP requires CAP_SYS_ADMIN. */
+FIXTURE(O_SUSPEND_SECCOMP) {
+ pid_t pid;
+};
+
+FIXTURE_SETUP(O_SUSPEND_SECCOMP)
+{
+ ERRNO_FILTER(block_read, E2BIG);
+ cap_value_t cap_list[] = { CAP_SYS_ADMIN };
+ cap_t caps;
+
+ self->pid = 0;
+
+ /* make sure we don't have CAP_SYS_ADMIN */
+ caps = cap_get_proc();
+ ASSERT_NE(NULL, caps);
+ ASSERT_EQ(0, cap_set_flag(caps, CAP_EFFECTIVE, 1, cap_list, CAP_CLEAR));
+ ASSERT_EQ(0, cap_set_proc(caps));
+ cap_free(caps);
+
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+ ASSERT_EQ(0, prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_block_read));
+
+ self->pid = fork();
+ ASSERT_GE(self->pid, 0);
+
+ if (self->pid == 0) {
+ while (1)
+ pause();
+ _exit(127);
+ }
+}
+
+FIXTURE_TEARDOWN(O_SUSPEND_SECCOMP)
+{
+ if (self->pid)
+ kill(self->pid, SIGKILL);
+}
+
+TEST_F(O_SUSPEND_SECCOMP, setoptions)
+{
+ int wstatus;
+
+ ASSERT_EQ(0, ptrace(PTRACE_ATTACH, self->pid, NULL, 0));
+ ASSERT_EQ(self->pid, wait(&wstatus));
+ ASSERT_EQ(-1, ptrace(PTRACE_SETOPTIONS, self->pid, NULL, PTRACE_O_SUSPEND_SECCOMP));
+ if (errno == EINVAL)
+ SKIP(return, "Kernel does not support PTRACE_O_SUSPEND_SECCOMP (missing CONFIG_CHECKPOINT_RESTORE?)");
+ ASSERT_EQ(EPERM, errno);
+}
+
+TEST_F(O_SUSPEND_SECCOMP, seize)
+{
+ int ret;
+
+ ret = ptrace(PTRACE_SEIZE, self->pid, NULL, PTRACE_O_SUSPEND_SECCOMP);
+ ASSERT_EQ(-1, ret);
+ if (errno == EINVAL)
+ SKIP(return, "Kernel does not support PTRACE_O_SUSPEND_SECCOMP (missing CONFIG_CHECKPOINT_RESTORE?)");
+ ASSERT_EQ(EPERM, errno);
+}
+
+/*
+ * get_nth - Get the nth, space separated entry in a file.
+ *
+ * Returns the length of the read field.
+ * Throws error if field is zero-lengthed.
+ */
+static ssize_t get_nth(struct __test_metadata *_metadata, const char *path,
+ const unsigned int position, char **entry)
+{
+ char *line = NULL;
+ unsigned int i;
+ ssize_t nread;
+ size_t len = 0;
+ FILE *f;
+
+ f = fopen(path, "r");
+ ASSERT_NE(f, NULL) {
+ TH_LOG("Could not open %s: %s", path, strerror(errno));
+ }
+
+ for (i = 0; i < position; i++) {
+ nread = getdelim(&line, &len, ' ', f);
+ ASSERT_GE(nread, 0) {
+ TH_LOG("Failed to read %d entry in file %s", i, path);
+ }
+ }
+ fclose(f);
+
+ ASSERT_GT(nread, 0) {
+ TH_LOG("Entry in file %s had zero length", path);
+ }
+
+ *entry = line;
+ return nread - 1;
+}
+
+/* For a given PID, get the task state (D, R, etc...) */
+static char get_proc_stat(struct __test_metadata *_metadata, pid_t pid)
+{
+ char proc_path[100] = {0};
+ char status;
+ char *line;
+
+ snprintf(proc_path, sizeof(proc_path), "/proc/%d/stat", pid);
+ ASSERT_EQ(get_nth(_metadata, proc_path, 3, &line), 1);
+
+ status = *line;
+ free(line);
+
+ return status;
+}
+
+TEST(user_notification_fifo)
+{
+ struct seccomp_notif_resp resp = {};
+ struct seccomp_notif req = {};
+ int i, status, listener;
+ pid_t pid, pids[3];
+ __u64 baseid;
+ long ret;
+ /* 100 ms */
+ struct timespec delay = { .tv_nsec = 100000000 };
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ /* Setup a listener */
+ listener = user_notif_syscall(__NR_getppid,
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ ASSERT_GE(listener, 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ ret = syscall(__NR_getppid);
+ exit(ret != USER_NOTIF_MAGIC);
+ }
+
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+ baseid = req.id + 1;
+
+ resp.id = req.id;
+ resp.error = 0;
+ resp.val = USER_NOTIF_MAGIC;
+
+ /* check that we make sure flags == 0 */
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
+
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
+ EXPECT_EQ(true, WIFEXITED(status));
+ EXPECT_EQ(0, WEXITSTATUS(status));
+
+ /* Start children, and generate notifications */
+ for (i = 0; i < ARRAY_SIZE(pids); i++) {
+ pid = fork();
+ if (pid == 0) {
+ ret = syscall(__NR_getppid);
+ exit(ret != USER_NOTIF_MAGIC);
+ }
+ pids[i] = pid;
+ }
+
+ /* This spins until all of the children are sleeping */
+restart_wait:
+ for (i = 0; i < ARRAY_SIZE(pids); i++) {
+ if (get_proc_stat(_metadata, pids[i]) != 'S') {
+ nanosleep(&delay, NULL);
+ goto restart_wait;
+ }
+ }
+
+ /* Read the notifications in order (and respond) */
+ for (i = 0; i < ARRAY_SIZE(pids); i++) {
+ memset(&req, 0, sizeof(req));
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+ EXPECT_EQ(req.id, baseid + i);
+ resp.id = req.id;
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
+ }
+
+ /* Make sure notifications were received */
+ for (i = 0; i < ARRAY_SIZE(pids); i++) {
+ EXPECT_EQ(waitpid(pids[i], &status, 0), pids[i]);
+ EXPECT_EQ(true, WIFEXITED(status));
+ EXPECT_EQ(0, WEXITSTATUS(status));
+ }
+}
+
+/* get_proc_syscall - Get the syscall in progress for a given pid
+ *
+ * Returns the current syscall number for a given process
+ * Returns -1 if not in syscall (running or blocked)
+ */
+static long get_proc_syscall(struct __test_metadata *_metadata, int pid)
+{
+ char proc_path[100] = {0};
+ long ret = -1;
+ ssize_t nread;
+ char *line;
+
+ snprintf(proc_path, sizeof(proc_path), "/proc/%d/syscall", pid);
+ nread = get_nth(_metadata, proc_path, 1, &line);
+ ASSERT_GT(nread, 0);
+
+ if (!strncmp("running", line, MIN(7, nread)))
+ ret = strtol(line, NULL, 16);
+
+ free(line);
+ return ret;
+}
+
+/* Ensure non-fatal signals prior to receive are unmodified */
+TEST(user_notification_wait_killable_pre_notification)
+{
+ struct sigaction new_action = {
+ .sa_handler = signal_handler,
+ };
+ int listener, status, sk_pair[2];
+ pid_t pid;
+ long ret;
+ char c;
+ /* 100 ms */
+ struct timespec delay = { .tv_nsec = 100000000 };
+
+ ASSERT_EQ(sigemptyset(&new_action.sa_mask), 0);
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret)
+ {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0);
+
+ listener = user_notif_syscall(
+ __NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER |
+ SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV);
+ ASSERT_GE(listener, 0);
+
+ /*
+ * Check that we can kill the process with SIGUSR1 prior to receiving
+ * the notification. SIGUSR1 is wired up to a custom signal handler,
+ * and make sure it gets called.
+ */
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ close(sk_pair[0]);
+ handled = sk_pair[1];
+
+ /* Setup the non-fatal sigaction without SA_RESTART */
+ if (sigaction(SIGUSR1, &new_action, NULL)) {
+ perror("sigaction");
+ exit(1);
+ }
+
+ ret = syscall(__NR_getppid);
+ /* Make sure we got a return from a signal interruption */
+ exit(ret != -1 || errno != EINTR);
+ }
+
+ /*
+ * Make sure we've gotten to the seccomp user notification wait
+ * from getppid prior to sending any signals
+ */
+ while (get_proc_syscall(_metadata, pid) != __NR_getppid &&
+ get_proc_stat(_metadata, pid) != 'S')
+ nanosleep(&delay, NULL);
+
+ /* Send non-fatal kill signal */
+ EXPECT_EQ(kill(pid, SIGUSR1), 0);
+
+ /* wait for process to exit (exit checks for EINTR) */
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
+ EXPECT_EQ(true, WIFEXITED(status));
+ EXPECT_EQ(0, WEXITSTATUS(status));
+
+ EXPECT_EQ(read(sk_pair[0], &c, 1), 1);
+}
+
+/* Ensure non-fatal signals after receive are blocked */
+TEST(user_notification_wait_killable)
+{
+ struct sigaction new_action = {
+ .sa_handler = signal_handler,
+ };
+ struct seccomp_notif_resp resp = {};
+ struct seccomp_notif req = {};
+ int listener, status, sk_pair[2];
+ pid_t pid;
+ long ret;
+ char c;
+ /* 100 ms */
+ struct timespec delay = { .tv_nsec = 100000000 };
+
+ ASSERT_EQ(sigemptyset(&new_action.sa_mask), 0);
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret)
+ {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0);
+
+ listener = user_notif_syscall(
+ __NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER |
+ SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV);
+ ASSERT_GE(listener, 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ close(sk_pair[0]);
+ handled = sk_pair[1];
+
+ /* Setup the sigaction without SA_RESTART */
+ if (sigaction(SIGUSR1, &new_action, NULL)) {
+ perror("sigaction");
+ exit(1);
+ }
+
+ /* Make sure that the syscall is completed (no EINTR) */
+ ret = syscall(__NR_getppid);
+ exit(ret != USER_NOTIF_MAGIC);
+ }
+
+ /*
+ * Get the notification, to make move the notifying process into a
+ * non-preemptible (TASK_KILLABLE) state.
+ */
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+ /* Send non-fatal kill signal */
+ EXPECT_EQ(kill(pid, SIGUSR1), 0);
+
+ /*
+ * Make sure the task enters moves to TASK_KILLABLE by waiting for
+ * D (Disk Sleep) state after receiving non-fatal signal.
+ */
+ while (get_proc_stat(_metadata, pid) != 'D')
+ nanosleep(&delay, NULL);
+
+ resp.id = req.id;
+ resp.val = USER_NOTIF_MAGIC;
+ /* Make sure the notification is found and able to be replied to */
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
+
+ /*
+ * Make sure that the signal handler does get called once we're back in
+ * userspace.
+ */
+ EXPECT_EQ(read(sk_pair[0], &c, 1), 1);
+ /* wait for process to exit (exit checks for USER_NOTIF_MAGIC) */
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
+ EXPECT_EQ(true, WIFEXITED(status));
+ EXPECT_EQ(0, WEXITSTATUS(status));
+}
+
+/* Ensure fatal signals after receive are not blocked */
+TEST(user_notification_wait_killable_fatal)
+{
+ struct seccomp_notif req = {};
+ int listener, status;
+ pid_t pid;
+ long ret;
+ /* 100 ms */
+ struct timespec delay = { .tv_nsec = 100000000 };
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret)
+ {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ listener = user_notif_syscall(
+ __NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER |
+ SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV);
+ ASSERT_GE(listener, 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* This should never complete as it should get a SIGTERM */
+ syscall(__NR_getppid);
+ exit(1);
+ }
+
+ while (get_proc_stat(_metadata, pid) != 'S')
+ nanosleep(&delay, NULL);
+
+ /*
+ * Get the notification, to make move the notifying process into a
+ * non-preemptible (TASK_KILLABLE) state.
+ */
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+ /* Kill the process with a fatal signal */
+ EXPECT_EQ(kill(pid, SIGTERM), 0);
+
+ /*
+ * Wait for the process to exit, and make sure the process terminated
+ * due to the SIGTERM signal.
+ */
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
+ EXPECT_EQ(true, WIFSIGNALED(status));
+ EXPECT_EQ(SIGTERM, WTERMSIG(status));
+}
+
/*
* TODO:
* - expand NNP testing
diff --git a/tools/testing/selftests/vDSO/vdso_test_correctness.c b/tools/testing/selftests/vDSO/vdso_test_correctness.c
index c4aea794725a..e691a3cf1491 100644
--- a/tools/testing/selftests/vDSO/vdso_test_correctness.c
+++ b/tools/testing/selftests/vDSO/vdso_test_correctness.c
@@ -20,6 +20,7 @@
#include <limits.h>
#include "vdso_config.h"
+#include "../kselftest.h"
static const char **name;
@@ -306,10 +307,8 @@ static void test_clock_gettime(void)
return;
}
- for (int clock = 0; clock < sizeof(clocknames) / sizeof(clocknames[0]);
- clock++) {
+ for (int clock = 0; clock < ARRAY_SIZE(clocknames); clock++)
test_one_clock_gettime(clock, clocknames[clock]);
- }
/* Also test some invalid clock ids */
test_one_clock_gettime(-1, "invalid");
@@ -370,10 +369,8 @@ static void test_clock_gettime64(void)
return;
}
- for (int clock = 0; clock < sizeof(clocknames) / sizeof(clocknames[0]);
- clock++) {
+ for (int clock = 0; clock < ARRAY_SIZE(clocknames); clock++)
test_one_clock_gettime64(clock, clocknames[clock]);
- }
/* Also test some invalid clock ids */
test_one_clock_gettime64(-1, "invalid");
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 04a49e876a46..5b1ecd00695b 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -57,9 +57,9 @@ CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_32bit_prog
CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_64bit_program.c)
CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_program.c -no-pie)
-TARGETS := protection_keys
-BINARIES_32 := $(TARGETS:%=%_32)
-BINARIES_64 := $(TARGETS:%=%_64)
+VMTARGETS := protection_keys
+BINARIES_32 := $(VMTARGETS:%=%_32)
+BINARIES_64 := $(VMTARGETS:%=%_64)
ifeq ($(CAN_BUILD_WITH_NOPIE),1)
CFLAGS += -no-pie
@@ -112,7 +112,7 @@ $(BINARIES_32): CFLAGS += -m32 -mxsave
$(BINARIES_32): LDLIBS += -lrt -ldl -lm
$(BINARIES_32): $(OUTPUT)/%_32: %.c
$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@
-$(foreach t,$(TARGETS),$(eval $(call gen-target-rule-32,$(t))))
+$(foreach t,$(VMTARGETS),$(eval $(call gen-target-rule-32,$(t))))
endif
ifeq ($(CAN_BUILD_X86_64),1)
@@ -120,7 +120,7 @@ $(BINARIES_64): CFLAGS += -m64 -mxsave
$(BINARIES_64): LDLIBS += -lrt -ldl
$(BINARIES_64): $(OUTPUT)/%_64: %.c
$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@
-$(foreach t,$(TARGETS),$(eval $(call gen-target-rule-64,$(t))))
+$(foreach t,$(VMTARGETS),$(eval $(call gen-target-rule-64,$(t))))
endif
# x86_64 users should be encouraged to install 32-bit libraries
diff --git a/tools/testing/selftests/vm/mremap_test.c b/tools/testing/selftests/vm/mremap_test.c
index 7c0b0617b9f8..db0270127aeb 100644
--- a/tools/testing/selftests/vm/mremap_test.c
+++ b/tools/testing/selftests/vm/mremap_test.c
@@ -6,9 +6,11 @@
#include <errno.h>
#include <stdlib.h>
+#include <stdio.h>
#include <string.h>
#include <sys/mman.h>
#include <time.h>
+#include <stdbool.h>
#include "../kselftest.h"
@@ -64,6 +66,59 @@ enum {
}
/*
+ * Returns false if the requested remap region overlaps with an
+ * existing mapping (e.g text, stack) else returns true.
+ */
+static bool is_remap_region_valid(void *addr, unsigned long long size)
+{
+ void *remap_addr = NULL;
+ bool ret = true;
+
+ /* Use MAP_FIXED_NOREPLACE flag to ensure region is not mapped */
+ remap_addr = mmap(addr, size, PROT_READ | PROT_WRITE,
+ MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED,
+ -1, 0);
+
+ if (remap_addr == MAP_FAILED) {
+ if (errno == EEXIST)
+ ret = false;
+ } else {
+ munmap(remap_addr, size);
+ }
+
+ return ret;
+}
+
+/* Returns mmap_min_addr sysctl tunable from procfs */
+static unsigned long long get_mmap_min_addr(void)
+{
+ FILE *fp;
+ int n_matched;
+ static unsigned long long addr;
+
+ if (addr)
+ return addr;
+
+ fp = fopen("/proc/sys/vm/mmap_min_addr", "r");
+ if (fp == NULL) {
+ ksft_print_msg("Failed to open /proc/sys/vm/mmap_min_addr: %s\n",
+ strerror(errno));
+ exit(KSFT_SKIP);
+ }
+
+ n_matched = fscanf(fp, "%llu", &addr);
+ if (n_matched != 1) {
+ ksft_print_msg("Failed to read /proc/sys/vm/mmap_min_addr: %s\n",
+ strerror(errno));
+ fclose(fp);
+ exit(KSFT_SKIP);
+ }
+
+ fclose(fp);
+ return addr;
+}
+
+/*
* Returns the start address of the mapping on success, else returns
* NULL on failure.
*/
@@ -71,11 +126,18 @@ static void *get_source_mapping(struct config c)
{
unsigned long long addr = 0ULL;
void *src_addr = NULL;
+ unsigned long long mmap_min_addr;
+
+ mmap_min_addr = get_mmap_min_addr();
+
retry:
addr += c.src_alignment;
+ if (addr < mmap_min_addr)
+ goto retry;
+
src_addr = mmap((void *) addr, c.region_size, PROT_READ | PROT_WRITE,
- MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED,
- -1, 0);
+ MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED,
+ -1, 0);
if (src_addr == MAP_FAILED) {
if (errno == EPERM || errno == EEXIST)
goto retry;
@@ -90,8 +152,10 @@ retry:
* alignment in the tests.
*/
if (((unsigned long long) src_addr & (c.src_alignment - 1)) ||
- !((unsigned long long) src_addr & c.src_alignment))
+ !((unsigned long long) src_addr & c.src_alignment)) {
+ munmap(src_addr, c.region_size);
goto retry;
+ }
if (!src_addr)
goto error;
@@ -140,9 +204,20 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
if (!((unsigned long long) addr & c.dest_alignment))
addr = (void *) ((unsigned long long) addr | c.dest_alignment);
+ /* Don't destroy existing mappings unless expected to overlap */
+ while (!is_remap_region_valid(addr, c.region_size) && !c.overlapping) {
+ /* Check for unsigned overflow */
+ if (addr + c.dest_alignment < addr) {
+ ksft_print_msg("Couldn't find a valid region to remap to\n");
+ ret = -1;
+ goto out;
+ }
+ addr += c.dest_alignment;
+ }
+
clock_gettime(CLOCK_MONOTONIC, &t_start);
dest_addr = mremap(src_addr, c.region_size, c.region_size,
- MREMAP_MAYMOVE|MREMAP_FIXED, (char *) addr);
+ MREMAP_MAYMOVE|MREMAP_FIXED, (char *) addr);
clock_gettime(CLOCK_MONOTONIC, &t_end);
if (dest_addr == MAP_FAILED) {
@@ -193,7 +268,7 @@ static void run_mremap_test_case(struct test test_case, int *failures,
if (remap_time < 0) {
if (test_case.expect_failure)
- ksft_test_result_pass("%s\n\tExpected mremap failure\n",
+ ksft_test_result_xfail("%s\n\tExpected mremap failure\n",
test_case.name);
else {
ksft_test_result_fail("%s\n", test_case.name);
diff --git a/tools/testing/selftests/vm/pkey-x86.h b/tools/testing/selftests/vm/pkey-x86.h
index e4a4ce2b826d..b078ce9c6d2a 100644
--- a/tools/testing/selftests/vm/pkey-x86.h
+++ b/tools/testing/selftests/vm/pkey-x86.h
@@ -80,19 +80,6 @@ static inline void __write_pkey_reg(u64 pkey_reg)
assert(pkey_reg == __read_pkey_reg());
}
-static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
-{
- /* ecx is often an input as well as an output. */
- asm volatile(
- "cpuid;"
- : "=a" (*eax),
- "=b" (*ebx),
- "=c" (*ecx),
- "=d" (*edx)
- : "0" (*eax), "2" (*ecx));
-}
-
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx) */
#define X86_FEATURE_PKU (1<<3) /* Protection Keys for Userspace */
#define X86_FEATURE_OSPKE (1<<4) /* OS Protection Keys Enable */
@@ -104,9 +91,7 @@ static inline int cpu_has_pkeys(void)
unsigned int ecx;
unsigned int edx;
- eax = 0x7;
- ecx = 0x0;
- __cpuid(&eax, &ebx, &ecx, &edx);
+ __cpuid_count(0x7, 0x0, eax, ebx, ecx, edx);
if (!(ecx & X86_FEATURE_PKU)) {
dprintf2("cpu does not have PKU\n");
@@ -142,9 +127,7 @@ int pkey_reg_xstate_offset(void)
/* assume that XSTATE_PKEY is set in XCR0 */
leaf = XSTATE_PKEY_BIT;
{
- eax = XSTATE_CPUID;
- ecx = leaf;
- __cpuid(&eax, &ebx, &ecx, &edx);
+ __cpuid_count(XSTATE_CPUID, leaf, eax, ebx, ecx, edx);
if (leaf == XSTATE_PKEY_BIT) {
xstate_offset = ebx;
diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh
index 3b265f140c25..352ba00cf26b 100755
--- a/tools/testing/selftests/vm/run_vmtests.sh
+++ b/tools/testing/selftests/vm/run_vmtests.sh
@@ -291,11 +291,16 @@ echo "-------------------"
echo "running mremap_test"
echo "-------------------"
./mremap_test
-if [ $? -ne 0 ]; then
+ret_val=$?
+
+if [ $ret_val -eq 0 ]; then
+ echo "[PASS]"
+elif [ $ret_val -eq $ksft_skip ]; then
+ echo "[SKIP]"
+ exitcode=$ksft_skip
+else
echo "[FAIL]"
exitcode=1
-else
- echo "[PASS]"
fi
echo "-----------------"
diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh
index 8a9461aa0878..69c7796c7ca9 100755
--- a/tools/testing/selftests/wireguard/netns.sh
+++ b/tools/testing/selftests/wireguard/netns.sh
@@ -22,10 +22,12 @@
# interfaces in $ns1 and $ns2. See https://www.wireguard.com/netns/ for further
# details on how this is accomplished.
set -e
+shopt -s extglob
exec 3>&1
export LANG=C
export WG_HIDE_KEYS=never
+NPROC=( /sys/devices/system/cpu/cpu+([0-9]) ); NPROC=${#NPROC[@]}
netns0="wg-test-$$-0"
netns1="wg-test-$$-1"
netns2="wg-test-$$-2"
@@ -143,17 +145,15 @@ tests() {
n1 iperf3 -Z -t 3 -b 0 -u -c fd00::2
# TCP over IPv4, in parallel
- for max in 4 5 50; do
- local pids=( )
- for ((i=0; i < max; ++i)) do
- n2 iperf3 -p $(( 5200 + i )) -s -1 -B 192.168.241.2 &
- pids+=( $! ); waitiperf $netns2 $! $(( 5200 + i ))
- done
- for ((i=0; i < max; ++i)) do
- n1 iperf3 -Z -t 3 -p $(( 5200 + i )) -c 192.168.241.2 &
- done
- wait "${pids[@]}"
+ local pids=( ) i
+ for ((i=0; i < NPROC; ++i)) do
+ n2 iperf3 -p $(( 5200 + i )) -s -1 -B 192.168.241.2 &
+ pids+=( $! ); waitiperf $netns2 $! $(( 5200 + i ))
done
+ for ((i=0; i < NPROC; ++i)) do
+ n1 iperf3 -Z -t 3 -p $(( 5200 + i )) -c 192.168.241.2 &
+ done
+ wait "${pids[@]}"
}
[[ $(ip1 link show dev wg0) =~ mtu\ ([0-9]+) ]] && orig_mtu="${BASH_REMATCH[1]}"
@@ -280,7 +280,19 @@ read _ _ tx_bytes_before < <(n0 wg show wg1 transfer)
! n0 ping -W 1 -c 10 -f 192.168.241.2 || false
sleep 1
read _ _ tx_bytes_after < <(n0 wg show wg1 transfer)
-(( tx_bytes_after - tx_bytes_before < 70000 ))
+if ! (( tx_bytes_after - tx_bytes_before < 70000 )); then
+ errstart=$'\x1b[37m\x1b[41m\x1b[1m'
+ errend=$'\x1b[0m'
+ echo "${errstart} ${errend}"
+ echo "${errstart} E R R O R ${errend}"
+ echo "${errstart} ${errend}"
+ echo "${errstart} This architecture does not do the right thing ${errend}"
+ echo "${errstart} with cross-namespace routing loops. This test ${errend}"
+ echo "${errstart} has thus technically failed but, as this issue ${errend}"
+ echo "${errstart} is as yet unsolved, these tests will continue ${errend}"
+ echo "${errstart} onward. :( ${errend}"
+ echo "${errstart} ${errend}"
+fi
ip0 link del wg1
ip1 link del wg0
diff --git a/tools/testing/selftests/wireguard/qemu/.gitignore b/tools/testing/selftests/wireguard/qemu/.gitignore
index bfa15e6feb2f..42ab9d72b37b 100644
--- a/tools/testing/selftests/wireguard/qemu/.gitignore
+++ b/tools/testing/selftests/wireguard/qemu/.gitignore
@@ -1,3 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
build/
distfiles/
+ccache/
diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile
index 4bdd6c1a19d3..bca07b93eeb0 100644
--- a/tools/testing/selftests/wireguard/qemu/Makefile
+++ b/tools/testing/selftests/wireguard/qemu/Makefile
@@ -4,26 +4,24 @@
PWD := $(shell pwd)
-CHOST := $(shell gcc -dumpmachine)
-HOST_ARCH := $(firstword $(subst -, ,$(CHOST)))
-ifneq (,$(ARCH))
-CBUILD := $(subst -gcc,,$(lastword $(subst /, ,$(firstword $(wildcard $(foreach bindir,$(subst :, ,$(PATH)),$(bindir)/$(ARCH)-*-gcc))))))
-ifeq (,$(CBUILD))
-$(error The toolchain for $(ARCH) is not installed)
-endif
-else
-CBUILD := $(CHOST)
-ARCH := $(firstword $(subst -, ,$(CBUILD)))
-endif
-
# Set these from the environment to override
KERNEL_PATH ?= $(PWD)/../../../../..
BUILD_PATH ?= $(PWD)/build/$(ARCH)
DISTFILES_PATH ?= $(PWD)/distfiles
NR_CPUS ?= 4
+ARCH ?=
+CBUILD := $(shell gcc -dumpmachine)
+HOST_ARCH := $(firstword $(subst -, ,$(CBUILD)))
+ifeq ($(ARCH),)
+ARCH := $(HOST_ARCH)
+endif
MIRROR := https://download.wireguard.com/qemu-test/distfiles/
+KERNEL_BUILD_PATH := $(BUILD_PATH)/kernel$(if $(findstring yes,$(DEBUG_KERNEL)),-debug)
+rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d))
+WIREGUARD_SOURCES := $(call rwildcard,$(KERNEL_PATH)/drivers/net/wireguard/,*)
+
default: qemu
# variable name, tarball project name, version, tarball extension, default URI base
@@ -36,42 +34,33 @@ $(call file_download,$$($(1)_NAME)$(4),$(5),$(6))
endef
define file_download =
-$(DISTFILES_PATH)/$(1):
+$(DISTFILES_PATH)/$(1): | $(4)
mkdir -p $(DISTFILES_PATH)
- flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp; [ -f $$@.tmp ] || exit 1; if echo "$(3) $$@.tmp" | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi'
+ flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp; [ -f $$@.tmp ] || exit 1; if ([ -n "$(4)" ] && sed -n "s#^\([a-f0-9]\{64\}\) \($(1)\)\$$$$#\1 $(DISTFILES_PATH)/\2.tmp#p" "$(4)" || echo "$(3) $$@.tmp") | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi'
endef
-$(eval $(call tar_download,MUSL,musl,1.2.0,.tar.gz,https://musl.libc.org/releases/,c6de7b191139142d3f9a7b5b702c9cae1b5ee6e7f57e582da9328629408fd4e8))
-$(eval $(call tar_download,IPERF,iperf,3.7,.tar.gz,https://downloads.es.net/pub/iperf/,d846040224317caf2f75c843d309a950a7db23f9b44b94688ccbe557d6d1710c))
-$(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d))
-$(eval $(call tar_download,IPROUTE2,iproute2,5.6.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,1b5b0e25ce6e23da7526ea1da044e814ad85ba761b10dd29c2b027c056b04692))
-$(eval $(call tar_download,IPTABLES,iptables,1.8.4,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,993a3a5490a544c2cbf2ef15cf7e7ed21af1845baf228318d5c36ef8827e157c))
-$(eval $(call tar_download,NMAP,nmap,7.80,.tar.bz2,https://nmap.org/dist/,fcfa5a0e42099e12e4bf7a68ebe6fde05553383a682e816a7ec9256ab4773faa))
+$(eval $(call tar_download,IPERF,iperf,3.11,.tar.gz,https://downloads.es.net/pub/iperf/,de8cb409fad61a0574f4cb07eb19ce1159707403ac2dc01b5d175e91240b7e5f))
+$(eval $(call tar_download,BASH,bash,5.1.16,.tar.gz,https://ftp.gnu.org/gnu/bash/,5bac17218d3911834520dad13cd1f85ab944e1c09ae1aba55906be1f8192f558))
+$(eval $(call tar_download,IPROUTE2,iproute2,5.17.0,.tar.gz,https://www.kernel.org/pub/linux/utils/net/iproute2/,bda331d5c4606138892f23a565d78fca18919b4d508a0b7ca8391c2da2db68b9))
+$(eval $(call tar_download,IPTABLES,iptables,1.8.7,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,c109c96bb04998cd44156622d36f8e04b140701ec60531a10668cfdff5e8d8f0))
+$(eval $(call tar_download,NMAP,nmap,7.92,.tgz,https://nmap.org/dist/,064183ea642dc4c12b1ab3b5358ce1cef7d2e7e11ffa2849f16d339f5b717117))
$(eval $(call tar_download,IPUTILS,iputils,s20190709,.tar.gz,https://github.com/iputils/iputils/archive/s20190709.tar.gz/#,a15720dd741d7538dd2645f9f516d193636ae4300ff7dbc8bfca757bf166490a))
-$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20200206,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,f5207248c6a3c3e3bfc9ab30b91c1897b00802ed861e1f9faaed873366078c64))
-
-KERNEL_BUILD_PATH := $(BUILD_PATH)/kernel$(if $(findstring yes,$(DEBUG_KERNEL)),-debug)
-rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d))
-WIREGUARD_SOURCES := $(call rwildcard,$(KERNEL_PATH)/drivers/net/wireguard/,*)
-
-export CFLAGS ?= -O3 -pipe
-export LDFLAGS ?=
-export CPPFLAGS := -I$(BUILD_PATH)/include
+$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20210914,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,97ff31489217bb265b7ae850d3d0f335ab07d2652ba1feec88b734bc96bd05ac))
+export CFLAGS := -O3 -pipe
ifeq ($(HOST_ARCH),$(ARCH))
-CROSS_COMPILE_FLAG := --host=$(CHOST)
CFLAGS += -march=native
-STRIP := strip
-else
-$(info Cross compilation: building for $(CBUILD) using $(CHOST))
-CROSS_COMPILE_FLAG := --build=$(CBUILD) --host=$(CHOST)
-export CROSS_COMPILE=$(CBUILD)-
-STRIP := $(CBUILD)-strip
endif
+export LDFLAGS :=
+export CPPFLAGS :=
+
+QEMU_VPORT_RESULT :=
ifeq ($(ARCH),aarch64)
+CHOST := aarch64-linux-musl
QEMU_ARCH := aarch64
KERNEL_ARCH := arm64
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm64/boot/Image
+QEMU_VPORT_RESULT := virtio-serial-device
ifeq ($(HOST_ARCH),$(ARCH))
QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
else
@@ -79,9 +68,11 @@ QEMU_MACHINE := -cpu cortex-a53 -machine virt
CFLAGS += -march=armv8-a -mtune=cortex-a53
endif
else ifeq ($(ARCH),aarch64_be)
+CHOST := aarch64_be-linux-musl
QEMU_ARCH := aarch64
KERNEL_ARCH := arm64
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm64/boot/Image
+QEMU_VPORT_RESULT := virtio-serial-device
ifeq ($(HOST_ARCH),$(ARCH))
QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
else
@@ -89,9 +80,11 @@ QEMU_MACHINE := -cpu cortex-a53 -machine virt
CFLAGS += -march=armv8-a -mtune=cortex-a53
endif
else ifeq ($(ARCH),arm)
+CHOST := arm-linux-musleabi
QEMU_ARCH := arm
KERNEL_ARCH := arm
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm/boot/zImage
+QEMU_VPORT_RESULT := virtio-serial-device
ifeq ($(HOST_ARCH),$(ARCH))
QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
else
@@ -99,9 +92,11 @@ QEMU_MACHINE := -cpu cortex-a15 -machine virt
CFLAGS += -march=armv7-a -mtune=cortex-a15 -mabi=aapcs-linux
endif
else ifeq ($(ARCH),armeb)
+CHOST := armeb-linux-musleabi
QEMU_ARCH := arm
KERNEL_ARCH := arm
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm/boot/zImage
+QEMU_VPORT_RESULT := virtio-serial-device
ifeq ($(HOST_ARCH),$(ARCH))
QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
else
@@ -110,6 +105,7 @@ CFLAGS += -march=armv7-a -mabi=aapcs-linux # We don't pass -mtune=cortex-a15 due
LDFLAGS += -Wl,--be8
endif
else ifeq ($(ARCH),x86_64)
+CHOST := x86_64-linux-musl
QEMU_ARCH := x86_64
KERNEL_ARCH := x86_64
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage
@@ -120,6 +116,7 @@ QEMU_MACHINE := -cpu Skylake-Server -machine q35
CFLAGS += -march=skylake-avx512
endif
else ifeq ($(ARCH),i686)
+CHOST := i686-linux-musl
QEMU_ARCH := i386
KERNEL_ARCH := x86
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage
@@ -130,6 +127,7 @@ QEMU_MACHINE := -cpu coreduo -machine q35
CFLAGS += -march=prescott
endif
else ifeq ($(ARCH),mips64)
+CHOST := mips64-linux-musl
QEMU_ARCH := mips64
KERNEL_ARCH := mips
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
@@ -141,6 +139,7 @@ QEMU_MACHINE := -cpu MIPS64R2-generic -machine malta -smp 1
CFLAGS += -march=mips64r2 -EB
endif
else ifeq ($(ARCH),mips64el)
+CHOST := mips64el-linux-musl
QEMU_ARCH := mips64el
KERNEL_ARCH := mips
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
@@ -152,6 +151,7 @@ QEMU_MACHINE := -cpu MIPS64R2-generic -machine malta -smp 1
CFLAGS += -march=mips64r2 -EL
endif
else ifeq ($(ARCH),mips)
+CHOST := mips-linux-musl
QEMU_ARCH := mips
KERNEL_ARCH := mips
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
@@ -163,6 +163,7 @@ QEMU_MACHINE := -cpu 24Kf -machine malta -smp 1
CFLAGS += -march=mips32r2 -EB
endif
else ifeq ($(ARCH),mipsel)
+CHOST := mipsel-linux-musl
QEMU_ARCH := mipsel
KERNEL_ARCH := mips
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
@@ -173,7 +174,18 @@ else
QEMU_MACHINE := -cpu 24Kf -machine malta -smp 1
CFLAGS += -march=mips32r2 -EL
endif
+else ifeq ($(ARCH),powerpc64)
+CHOST := powerpc64-linux-musl
+QEMU_ARCH := ppc64
+KERNEL_ARCH := powerpc
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host,accel=kvm -machine pseries
+else
+QEMU_MACHINE := -machine pseries
+endif
else ifeq ($(ARCH),powerpc64le)
+CHOST := powerpc64le-linux-musl
QEMU_ARCH := ppc64
KERNEL_ARCH := powerpc
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
@@ -182,8 +194,8 @@ QEMU_MACHINE := -cpu host,accel=kvm -machine pseries
else
QEMU_MACHINE := -machine pseries
endif
-CFLAGS += -mcpu=powerpc64le -mlong-double-64
else ifeq ($(ARCH),powerpc)
+CHOST := powerpc-linux-musl
QEMU_ARCH := ppc
KERNEL_ARCH := powerpc
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/powerpc/boot/uImage
@@ -192,26 +204,79 @@ QEMU_MACHINE := -cpu host,accel=kvm -machine ppce500
else
QEMU_MACHINE := -machine ppce500
endif
-CFLAGS += -mcpu=powerpc -mlong-double-64 -msecure-plt
else ifeq ($(ARCH),m68k)
+CHOST := m68k-linux-musl
QEMU_ARCH := m68k
KERNEL_ARCH := m68k
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
KERNEL_CMDLINE := $(shell sed -n 's/CONFIG_CMDLINE=\(.*\)/\1/p' arch/m68k.config)
ifeq ($(HOST_ARCH),$(ARCH))
-QEMU_MACHINE := -cpu host,accel=kvm -machine q800 -smp 1 -append $(KERNEL_CMDLINE)
+QEMU_MACHINE := -cpu host,accel=kvm -machine q800 -append $(KERNEL_CMDLINE)
else
QEMU_MACHINE := -machine q800 -smp 1 -append $(KERNEL_CMDLINE)
endif
+else ifeq ($(ARCH),riscv64)
+CHOST := riscv64-linux-musl
+QEMU_ARCH := riscv64
+KERNEL_ARCH := riscv
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/riscv/boot/Image
+QEMU_VPORT_RESULT := virtio-serial-device
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host,accel=kvm -machine virt
+else
+QEMU_MACHINE := -cpu rv64 -machine virt
+endif
+else ifeq ($(ARCH),riscv32)
+CHOST := riscv32-linux-musl
+QEMU_ARCH := riscv32
+KERNEL_ARCH := riscv
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/riscv/boot/Image
+QEMU_VPORT_RESULT := virtio-serial-device
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host,accel=kvm -machine virt
+else
+QEMU_MACHINE := -cpu rv32 -machine virt
+endif
+else ifeq ($(ARCH),s390x)
+CHOST := s390x-linux-musl
+QEMU_ARCH := s390x
+KERNEL_ARCH := s390
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/s390/boot/bzImage
+KERNEL_CMDLINE := $(shell sed -n 's/CONFIG_CMDLINE=\(.*\)/\1/p' arch/s390x.config)
+QEMU_VPORT_RESULT := virtio-serial-ccw
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host,accel=kvm -machine s390-ccw-virtio -append $(KERNEL_CMDLINE)
+else
+QEMU_MACHINE := -machine s390-ccw-virtio -append $(KERNEL_CMDLINE)
+endif
else
-$(error I only build: x86_64, i686, arm, armeb, aarch64, aarch64_be, mips, mipsel, mips64, mips64el, powerpc64le, powerpc, m68k)
+$(error I only build: x86_64, i686, arm, armeb, aarch64, aarch64_be, mips, mipsel, mips64, mips64el, powerpc64, powerpc64le, powerpc, m68k, riscv64, riscv32, s390x)
+endif
+
+TOOLCHAIN_FILENAME := $(CHOST)-cross.tgz
+TOOLCHAIN_TAR := $(DISTFILES_PATH)/$(TOOLCHAIN_FILENAME)
+TOOLCHAIN_PATH := $(BUILD_PATH)/$(CHOST)-cross
+TOOLCHAIN_DIR := https://download.wireguard.com/qemu-test/toolchains/20211123/
+$(eval $(call file_download,toolchain-sha256sums-20211123,$(TOOLCHAIN_DIR)SHA256SUMS#,83da033fd8c798df476c21d9612da2dfb896ec62fbed4ceec5eefc0e56b3f0c8))
+$(eval $(call file_download,$(TOOLCHAIN_FILENAME),$(TOOLCHAIN_DIR),,$(DISTFILES_PATH)/toolchain-sha256sums-20211123))
+
+STRIP := $(CHOST)-strip
+CROSS_COMPILE_FLAG := --build=$(CBUILD) --host=$(CHOST)
+$(info Building for $(CHOST) using $(CBUILD))
+export CROSS_COMPILE := $(CHOST)-
+export PATH := $(TOOLCHAIN_PATH)/bin:$(PATH)
+export CC := $(CHOST)-gcc
+CCACHE_PATH := $(shell which ccache 2>/dev/null)
+ifneq ($(CCACHE_PATH),)
+export KBUILD_BUILD_TIMESTAMP := Fri Jun 5 15:58:00 CEST 2015
+export PATH := $(TOOLCHAIN_PATH)/bin/ccache:$(PATH)
+export CCACHE_SLOPPINESS := file_macro,time_macros
+export CCACHE_DIR ?= $(PWD)/ccache
endif
-REAL_CC := $(CBUILD)-gcc
-MUSL_CC := $(BUILD_PATH)/musl-gcc
-export CC := $(MUSL_CC)
-USERSPACE_DEPS := $(MUSL_CC) $(BUILD_PATH)/include/.installed $(BUILD_PATH)/include/linux/.installed
+USERSPACE_DEPS := $(TOOLCHAIN_PATH)/.installed $(TOOLCHAIN_PATH)/$(CHOST)/include/linux/.installed
+comma := ,
build: $(KERNEL_BZIMAGE)
qemu: $(KERNEL_BZIMAGE)
rm -f $(BUILD_PATH)/result
@@ -222,13 +287,14 @@ qemu: $(KERNEL_BZIMAGE)
$(QEMU_MACHINE) \
-m $$(grep -q CONFIG_DEBUG_KMEMLEAK=y $(KERNEL_BUILD_PATH)/.config && echo 1G || echo 256M) \
-serial stdio \
- -serial file:$(BUILD_PATH)/result \
+ -chardev file,path=$(BUILD_PATH)/result,id=result \
+ $(if $(QEMU_VPORT_RESULT),-device $(QEMU_VPORT_RESULT) -device virtserialport$(comma)chardev=result,-serial chardev:result) \
-no-reboot \
-monitor none \
-kernel $<
grep -Fq success $(BUILD_PATH)/result
-$(BUILD_PATH)/init-cpio-spec.txt:
+$(BUILD_PATH)/init-cpio-spec.txt: $(TOOLCHAIN_PATH)/.installed $(BUILD_PATH)/init
mkdir -p $(BUILD_PATH)
echo "file /init $(BUILD_PATH)/init 755 0 0" > $@
echo "file /init.sh $(PWD)/../netns.sh 755 0 0" >> $@
@@ -246,10 +312,10 @@ $(BUILD_PATH)/init-cpio-spec.txt:
echo "slink /bin/iptables xtables-legacy-multi 777 0 0" >> $@
echo "slink /bin/ping6 ping 777 0 0" >> $@
echo "dir /lib 755 0 0" >> $@
- echo "file /lib/libc.so $(MUSL_PATH)/lib/libc.so 755 0 0" >> $@
- echo "slink /lib/ld-linux.so.1 libc.so 777 0 0" >> $@
+ echo "file /lib/libc.so $(TOOLCHAIN_PATH)/$(CHOST)/lib/libc.so 755 0 0" >> $@
+ echo "slink $$($(CHOST)-readelf -p .interp '$(BUILD_PATH)/init'| grep -o '/lib/.*') libc.so 777 0 0" >> $@
-$(KERNEL_BUILD_PATH)/.config: kernel.config arch/$(ARCH).config
+$(KERNEL_BUILD_PATH)/.config: $(TOOLCHAIN_PATH)/.installed kernel.config arch/$(ARCH).config
mkdir -p $(KERNEL_BUILD_PATH)
cp kernel.config $(KERNEL_BUILD_PATH)/minimal.config
printf 'CONFIG_NR_CPUS=$(NR_CPUS)\nCONFIG_INITRAMFS_SOURCE="$(BUILD_PATH)/init-cpio-spec.txt"\n' >> $(KERNEL_BUILD_PATH)/minimal.config
@@ -258,29 +324,24 @@ $(KERNEL_BUILD_PATH)/.config: kernel.config arch/$(ARCH).config
cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config $(KERNEL_BUILD_PATH)/minimal.config
$(if $(findstring yes,$(DEBUG_KERNEL)),cp debug.config $(KERNEL_BUILD_PATH) && cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config debug.config,)
-$(KERNEL_BZIMAGE): $(KERNEL_BUILD_PATH)/.config $(BUILD_PATH)/init-cpio-spec.txt $(MUSL_PATH)/lib/libc.so $(IPERF_PATH)/src/iperf3 $(IPUTILS_PATH)/ping $(BASH_PATH)/bash $(IPROUTE2_PATH)/misc/ss $(IPROUTE2_PATH)/ip/ip $(IPTABLES_PATH)/iptables/xtables-legacy-multi $(NMAP_PATH)/ncat/ncat $(WIREGUARD_TOOLS_PATH)/src/wg $(BUILD_PATH)/init ../netns.sh $(WIREGUARD_SOURCES)
+$(KERNEL_BZIMAGE): $(TOOLCHAIN_PATH)/.installed $(KERNEL_BUILD_PATH)/.config $(BUILD_PATH)/init-cpio-spec.txt $(IPERF_PATH)/src/iperf3 $(IPUTILS_PATH)/ping $(BASH_PATH)/bash $(IPROUTE2_PATH)/misc/ss $(IPROUTE2_PATH)/ip/ip $(IPTABLES_PATH)/iptables/xtables-legacy-multi $(NMAP_PATH)/ncat/ncat $(WIREGUARD_TOOLS_PATH)/src/wg $(BUILD_PATH)/init ../netns.sh $(WIREGUARD_SOURCES)
$(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE)
-$(BUILD_PATH)/include/linux/.installed: | $(KERNEL_BUILD_PATH)/.config
- $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) INSTALL_HDR_PATH=$(BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) headers_install
+$(TOOLCHAIN_PATH)/$(CHOST)/include/linux/.installed: | $(KERNEL_BUILD_PATH)/.config $(TOOLCHAIN_PATH)/.installed
+ rm -rf $(TOOLCHAIN_PATH)/$(CHOST)/include/linux
+ $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) INSTALL_HDR_PATH=$(TOOLCHAIN_PATH)/$(CHOST) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) headers_install
touch $@
-$(MUSL_PATH)/lib/libc.so: $(MUSL_TAR)
+$(TOOLCHAIN_PATH)/.installed: $(TOOLCHAIN_TAR)
mkdir -p $(BUILD_PATH)
flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
- cd $(MUSL_PATH) && CC=$(REAL_CC) ./configure --prefix=/ --disable-static --build=$(CBUILD)
- $(MAKE) -C $(MUSL_PATH)
- $(STRIP) -s $@
-
-$(BUILD_PATH)/include/.installed: $(MUSL_PATH)/lib/libc.so
- $(MAKE) -C $(MUSL_PATH) DESTDIR=$(BUILD_PATH) install-headers
+ $(STRIP) -s $(TOOLCHAIN_PATH)/$(CHOST)/lib/libc.so
+ifneq ($(CCACHE_PATH),)
+ mkdir -p $(TOOLCHAIN_PATH)/bin/ccache
+ ln -s $(CCACHE_PATH) $(TOOLCHAIN_PATH)/bin/ccache/$(CC)
+endif
touch $@
-$(MUSL_CC): $(MUSL_PATH)/lib/libc.so
- sh $(MUSL_PATH)/tools/musl-gcc.specs.sh $(BUILD_PATH)/include $(MUSL_PATH)/lib /lib/ld-linux.so.1 > $(BUILD_PATH)/musl-gcc.specs
- printf '#!/bin/sh\nexec "$(REAL_CC)" --specs="$(BUILD_PATH)/musl-gcc.specs" "$$@"\n' > $(BUILD_PATH)/musl-gcc
- chmod +x $(BUILD_PATH)/musl-gcc
-
$(IPERF_PATH)/.installed: $(IPERF_TAR)
mkdir -p $(BUILD_PATH)
flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
@@ -289,6 +350,7 @@ $(IPERF_PATH)/.installed: $(IPERF_TAR)
touch $@
$(IPERF_PATH)/src/iperf3: | $(IPERF_PATH)/.installed $(USERSPACE_DEPS)
+ cd $(IPERF_PATH) && autoreconf -fi
cd $(IPERF_PATH) && CFLAGS="$(CFLAGS) -D_GNU_SOURCE" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --with-openssl=no
$(MAKE) -C $(IPERF_PATH)
$(STRIP) -s $@
@@ -304,7 +366,7 @@ $(WIREGUARD_TOOLS_PATH)/src/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(USERSPACE
$(BUILD_PATH)/init: init.c | $(USERSPACE_DEPS)
mkdir -p $(BUILD_PATH)
- $(MUSL_CC) -o $@ $(CFLAGS) $(LDFLAGS) -std=gnu11 $<
+ $(CC) -o $@ $(CFLAGS) $(LDFLAGS) -std=gnu11 $<
$(STRIP) -s $@
$(IPUTILS_PATH)/.installed: $(IPUTILS_TAR)
@@ -323,15 +385,15 @@ $(BASH_PATH)/.installed: $(BASH_TAR)
touch $@
$(BASH_PATH)/bash: | $(BASH_PATH)/.installed $(USERSPACE_DEPS)
- cd $(BASH_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --without-bash-malloc --disable-debugger --disable-help-builtin --disable-history --disable-multibyte --disable-progcomp --disable-readline --disable-mem-scramble
+ cd $(BASH_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --without-bash-malloc --disable-debugger --disable-help-builtin --disable-history --disable-progcomp --disable-readline --disable-mem-scramble
$(MAKE) -C $(BASH_PATH)
$(STRIP) -s $@
$(IPROUTE2_PATH)/.installed: $(IPROUTE2_TAR)
mkdir -p $(BUILD_PATH)
flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
- printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=n\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS\n' > $(IPROUTE2_PATH)/config.mk
- printf 'lib: snapshot\n\t$$(MAKE) -C lib\nip/ip: lib\n\t$$(MAKE) -C ip ip\nmisc/ss: lib\n\t$$(MAKE) -C misc ss\n' >> $(IPROUTE2_PATH)/Makefile
+ printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=n\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS -DHAVE_HANDLE_AT\n' > $(IPROUTE2_PATH)/config.mk
+ printf 'libutil.a.done:\n\tflock -x $$@.lock $$(MAKE) -C lib\n\ttouch $$@\nip/ip: libutil.a.done\n\t$$(MAKE) -C ip ip\nmisc/ss: libutil.a.done\n\t$$(MAKE) -C misc ss\n' >> $(IPROUTE2_PATH)/Makefile
touch $@
$(IPROUTE2_PATH)/ip/ip: | $(IPROUTE2_PATH)/.installed $(USERSPACE_DEPS)
@@ -370,8 +432,13 @@ clean:
distclean: clean
rm -rf $(DISTFILES_PATH)
+cacheclean: clean
+ifneq ($(CCACHE_DIR),)
+ rm -rf $(CCACHE_DIR)
+endif
+
menuconfig: $(KERNEL_BUILD_PATH)/.config
$(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) menuconfig
-.PHONY: qemu build clean distclean menuconfig
+.PHONY: qemu build clean distclean cacheclean menuconfig
.DELETE_ON_ERROR:
diff --git a/tools/testing/selftests/wireguard/qemu/arch/aarch64.config b/tools/testing/selftests/wireguard/qemu/arch/aarch64.config
index 3d063bb247bb..09016880ce03 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/aarch64.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/aarch64.config
@@ -1,5 +1,8 @@
CONFIG_SERIAL_AMBA_PL011=y
CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_VIRTIO_MENU=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_CONSOLE=y
CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1"
+CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1"
CONFIG_FRAME_WARN=1280
diff --git a/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config b/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config
index dbdc7e406a7b..19ff66e4c602 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config
@@ -1,6 +1,9 @@
CONFIG_CPU_BIG_ENDIAN=y
CONFIG_SERIAL_AMBA_PL011=y
CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_VIRTIO_MENU=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_CONSOLE=y
CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1"
+CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1"
CONFIG_FRAME_WARN=1280
diff --git a/tools/testing/selftests/wireguard/qemu/arch/arm.config b/tools/testing/selftests/wireguard/qemu/arch/arm.config
index 148f49905418..fc7959bef9c2 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/arm.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/arm.config
@@ -4,6 +4,9 @@ CONFIG_ARCH_VIRT=y
CONFIG_THUMB2_KERNEL=n
CONFIG_SERIAL_AMBA_PL011=y
CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_VIRTIO_MENU=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_CONSOLE=y
CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1"
+CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1"
CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/armeb.config b/tools/testing/selftests/wireguard/qemu/arch/armeb.config
index bd76b07d00a2..f3066be81c19 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/armeb.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/armeb.config
@@ -4,7 +4,10 @@ CONFIG_ARCH_VIRT=y
CONFIG_THUMB2_KERNEL=n
CONFIG_SERIAL_AMBA_PL011=y
CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_VIRTIO_MENU=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_CONSOLE=y
CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1"
+CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1"
CONFIG_CPU_BIG_ENDIAN=y
CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/i686.config b/tools/testing/selftests/wireguard/qemu/arch/i686.config
index a85025d7206e..6d90892a85a2 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/i686.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/i686.config
@@ -1,5 +1,6 @@
+CONFIG_ACPI=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/m68k.config b/tools/testing/selftests/wireguard/qemu/arch/m68k.config
index 62a15bdb877e..82c925e49beb 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/m68k.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/m68k.config
@@ -5,5 +5,5 @@ CONFIG_MAC=y
CONFIG_SERIAL_PMACZILOG=y
CONFIG_SERIAL_PMACZILOG_TTYS=y
CONFIG_SERIAL_PMACZILOG_CONSOLE=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/mips.config b/tools/testing/selftests/wireguard/qemu/arch/mips.config
index df71d6b95546..d7ec63c17b30 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/mips.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/mips.config
@@ -7,5 +7,5 @@ CONFIG_POWER_RESET_SYSCON=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/mips64.config b/tools/testing/selftests/wireguard/qemu/arch/mips64.config
index 90c783f725c4..0994947e3392 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/mips64.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/mips64.config
@@ -10,5 +10,5 @@ CONFIG_POWER_RESET_SYSCON=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
CONFIG_FRAME_WARN=1280
diff --git a/tools/testing/selftests/wireguard/qemu/arch/mips64el.config b/tools/testing/selftests/wireguard/qemu/arch/mips64el.config
index 435b0b43e00c..591184342f47 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/mips64el.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/mips64el.config
@@ -11,5 +11,5 @@ CONFIG_POWER_RESET_SYSCON=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
CONFIG_FRAME_WARN=1280
diff --git a/tools/testing/selftests/wireguard/qemu/arch/mipsel.config b/tools/testing/selftests/wireguard/qemu/arch/mipsel.config
index 62bb50c4a85f..18a498293737 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/mipsel.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/mipsel.config
@@ -8,5 +8,5 @@ CONFIG_POWER_RESET_SYSCON=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/powerpc.config b/tools/testing/selftests/wireguard/qemu/arch/powerpc.config
index 57957093b71b..5e04882e8e35 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/powerpc.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc.config
@@ -6,5 +6,5 @@ CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_MATH_EMULATION=y
CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/powerpc64.config b/tools/testing/selftests/wireguard/qemu/arch/powerpc64.config
new file mode 100644
index 000000000000..737194b7619e
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc64.config
@@ -0,0 +1,13 @@
+CONFIG_PPC64=y
+CONFIG_PPC_PSERIES=y
+CONFIG_ALTIVEC=y
+CONFIG_VSX=y
+CONFIG_PPC_OF_BOOT_TRAMPOLINE=y
+CONFIG_PPC_RADIX_MMU=y
+CONFIG_HVC_CONSOLE=y
+CONFIG_CPU_BIG_ENDIAN=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=hvc0 wg.success=hvc1 panic_on_warn=1"
+CONFIG_SECTION_MISMATCH_WARN_ONLY=y
+CONFIG_FRAME_WARN=1280
+CONFIG_THREAD_SHIFT=14
diff --git a/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config b/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config
index f52f1e2bc7f6..8148b9d1220a 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config
@@ -7,7 +7,7 @@ CONFIG_PPC_RADIX_MMU=y
CONFIG_HVC_CONSOLE=y
CONFIG_CPU_LITTLE_ENDIAN=y
CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=hvc0 wg.success=hvc1"
+CONFIG_CMDLINE="console=hvc0 wg.success=hvc1 panic_on_warn=1"
CONFIG_SECTION_MISMATCH_WARN_ONLY=y
CONFIG_FRAME_WARN=1280
CONFIG_THREAD_SHIFT=14
diff --git a/tools/testing/selftests/wireguard/qemu/arch/riscv32.config b/tools/testing/selftests/wireguard/qemu/arch/riscv32.config
new file mode 100644
index 000000000000..0bd0e72d95d4
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/riscv32.config
@@ -0,0 +1,12 @@
+CONFIG_ARCH_RV32I=y
+CONFIG_MMU=y
+CONFIG_FPU=y
+CONFIG_SOC_VIRT=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+CONFIG_VIRTIO_MENU=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_CMDLINE="console=ttyS0 wg.success=vport0p1 panic_on_warn=1"
+CONFIG_CMDLINE_FORCE=y
diff --git a/tools/testing/selftests/wireguard/qemu/arch/riscv64.config b/tools/testing/selftests/wireguard/qemu/arch/riscv64.config
new file mode 100644
index 000000000000..dc266f3b1915
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/riscv64.config
@@ -0,0 +1,12 @@
+CONFIG_ARCH_RV64I=y
+CONFIG_MMU=y
+CONFIG_FPU=y
+CONFIG_SOC_VIRT=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+CONFIG_VIRTIO_MENU=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_CMDLINE="console=ttyS0 wg.success=vport0p1 panic_on_warn=1"
+CONFIG_CMDLINE_FORCE=y
diff --git a/tools/testing/selftests/wireguard/qemu/arch/s390x.config b/tools/testing/selftests/wireguard/qemu/arch/s390x.config
new file mode 100644
index 000000000000..a7b44dca0b0a
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/s390x.config
@@ -0,0 +1,6 @@
+CONFIG_SCLP_VT220_TTY=y
+CONFIG_SCLP_VT220_CONSOLE=y
+CONFIG_VIRTIO_MENU=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_S390_GUEST=y
+CONFIG_CMDLINE="console=ttysclp0 wg.success=vport0p1 panic_on_warn=1"
diff --git a/tools/testing/selftests/wireguard/qemu/arch/x86_64.config b/tools/testing/selftests/wireguard/qemu/arch/x86_64.config
index 00a1ef4869d5..efa00693e08b 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/x86_64.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/x86_64.config
@@ -1,5 +1,6 @@
+CONFIG_ACPI=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
CONFIG_FRAME_WARN=1280
diff --git a/tools/testing/selftests/wireguard/qemu/init.c b/tools/testing/selftests/wireguard/qemu/init.c
index 0b45055d9de0..2a0f48fac925 100644
--- a/tools/testing/selftests/wireguard/qemu/init.c
+++ b/tools/testing/selftests/wireguard/qemu/init.c
@@ -110,12 +110,6 @@ static void enable_logging(void)
panic("write(exception-trace)");
close(fd);
}
- fd = open("/proc/sys/kernel/panic_on_warn", O_WRONLY);
- if (fd >= 0) {
- if (write(fd, "1\n", 2) != 2)
- panic("write(panic_on_warn)");
- close(fd);
- }
}
static void kmod_selftests(void)
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 53df7d3893d3..0388c4d60af0 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -92,6 +92,10 @@ warn_32bit_failure:
echo "If you are using a Fedora-like distribution, try:"; \
echo ""; \
echo " yum install glibc-devel.*i686"; \
+ echo ""; \
+ echo "If you are using a SUSE-like distribution, try:"; \
+ echo ""; \
+ echo " zypper install gcc-32bit glibc-devel-static-32bit"; \
exit 0;
endif
diff --git a/tools/testing/selftests/x86/amx.c b/tools/testing/selftests/x86/amx.c
index 2189f0322d8b..625e42901237 100644
--- a/tools/testing/selftests/x86/amx.c
+++ b/tools/testing/selftests/x86/amx.c
@@ -17,6 +17,8 @@
#include <sys/syscall.h>
#include <sys/wait.h>
+#include "../kselftest.h" /* For __cpuid_count() */
+
#ifndef __x86_64__
# error This test is 64-bit only
#endif
@@ -45,13 +47,6 @@ static inline uint64_t xgetbv(uint32_t index)
return eax + ((uint64_t)edx << 32);
}
-static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
-{
- asm volatile("cpuid;"
- : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx)
- : "0" (*eax), "2" (*ecx));
-}
-
static inline void xsave(struct xsave_buffer *xbuf, uint64_t rfbm)
{
uint32_t rfbm_lo = rfbm;
@@ -115,9 +110,7 @@ static inline void check_cpuid_xsave(void)
* support for the XSAVE feature set, including
* XGETBV.
*/
- eax = 1;
- ecx = 0;
- cpuid(&eax, &ebx, &ecx, &edx);
+ __cpuid_count(1, 0, eax, ebx, ecx, edx);
if (!(ecx & CPUID_LEAF1_ECX_XSAVE_MASK))
fatal_error("cpuid: no CPU xsave support");
if (!(ecx & CPUID_LEAF1_ECX_OSXSAVE_MASK))
@@ -140,9 +133,8 @@ static void check_cpuid_xtiledata(void)
{
uint32_t eax, ebx, ecx, edx;
- eax = CPUID_LEAF_XSTATE;
- ecx = CPUID_SUBLEAF_XSTATE_USER;
- cpuid(&eax, &ebx, &ecx, &edx);
+ __cpuid_count(CPUID_LEAF_XSTATE, CPUID_SUBLEAF_XSTATE_USER,
+ eax, ebx, ecx, edx);
/*
* EBX enumerates the size (in bytes) required by the XSAVE
@@ -153,10 +145,8 @@ static void check_cpuid_xtiledata(void)
*/
xbuf_size = ebx;
- eax = CPUID_LEAF_XSTATE;
- ecx = XFEATURE_XTILEDATA;
-
- cpuid(&eax, &ebx, &ecx, &edx);
+ __cpuid_count(CPUID_LEAF_XSTATE, XFEATURE_XTILEDATA,
+ eax, ebx, ecx, edx);
/*
* eax: XTILEDATA state component size
* ebx: XTILEDATA state component offset in user buffer
diff --git a/tools/testing/selftests/x86/corrupt_xstate_header.c b/tools/testing/selftests/x86/corrupt_xstate_header.c
index ab8599c10ce5..cf9ce8fbb656 100644
--- a/tools/testing/selftests/x86/corrupt_xstate_header.c
+++ b/tools/testing/selftests/x86/corrupt_xstate_header.c
@@ -17,25 +17,13 @@
#include <stdint.h>
#include <sys/wait.h>
-static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
-{
- asm volatile(
- "cpuid;"
- : "=a" (*eax),
- "=b" (*ebx),
- "=c" (*ecx),
- "=d" (*edx)
- : "0" (*eax), "2" (*ecx));
-}
+#include "../kselftest.h" /* For __cpuid_count() */
static inline int xsave_enabled(void)
{
unsigned int eax, ebx, ecx, edx;
- eax = 0x1;
- ecx = 0x0;
- __cpuid(&eax, &ebx, &ecx, &edx);
+ __cpuid_count(0x1, 0x0, eax, ebx, ecx, edx);
/* Is CR4.OSXSAVE enabled ? */
return ecx & (1U << 27);
diff --git a/tools/thermal/lib/Build b/tools/thermal/lib/Build
new file mode 100644
index 000000000000..06f22760a272
--- /dev/null
+++ b/tools/thermal/lib/Build
@@ -0,0 +1,3 @@
+libthermal_tools-y += mainloop.o
+libthermal_tools-y += log.o
+libthermal_tools-y += uptimeofday.o
diff --git a/tools/thermal/lib/Makefile b/tools/thermal/lib/Makefile
new file mode 100644
index 000000000000..82db451935c5
--- /dev/null
+++ b/tools/thermal/lib/Makefile
@@ -0,0 +1,158 @@
+# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+# Most of this file is copied from tools/lib/perf/Makefile
+
+LIBTHERMAL_TOOLS_VERSION = 0
+LIBTHERMAL_TOOLS_PATCHLEVEL = 0
+LIBTHERMAL_TOOLS_EXTRAVERSION = 1
+
+MAKEFLAGS += --no-print-directory
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+# $(info Determined 'srctree' to be $(srctree))
+endif
+
+INSTALL = install
+
+# Use DESTDIR for installing into a different root directory.
+# This is useful for building a package. The program will be
+# installed in this directory as if it was the root directory.
+# Then the build tool can move it later.
+DESTDIR ?=
+DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
+
+include $(srctree)/tools/scripts/Makefile.include
+include $(srctree)/tools/scripts/Makefile.arch
+
+ifeq ($(LP64), 1)
+ libdir_relative = lib64
+else
+ libdir_relative = lib
+endif
+
+prefix ?=
+libdir = $(prefix)/$(libdir_relative)
+
+# Shell quotes
+libdir_SQ = $(subst ','\'',$(libdir))
+libdir_relative_SQ = $(subst ','\'',$(libdir_relative))
+
+ifeq ("$(origin V)", "command line")
+ VERBOSE = $(V)
+endif
+ifndef VERBOSE
+ VERBOSE = 0
+endif
+
+ifeq ($(VERBOSE),1)
+ Q =
+else
+ Q = @
+endif
+
+# Set compile option CFLAGS
+ifdef EXTRA_CFLAGS
+ CFLAGS := $(EXTRA_CFLAGS)
+else
+ CFLAGS := -g -Wall
+endif
+
+INCLUDES = \
+-I/usr/include/libnl3 \
+-I$(srctree)/tools/lib/thermal/include \
+-I$(srctree)/tools/lib/ \
+-I$(srctree)/tools/include \
+-I$(srctree)/tools/arch/$(SRCARCH)/include/ \
+-I$(srctree)/tools/arch/$(SRCARCH)/include/uapi \
+-I$(srctree)/tools/include/uapi
+
+# Append required CFLAGS
+override CFLAGS += $(EXTRA_WARNINGS)
+override CFLAGS += -Werror -Wall
+override CFLAGS += -fPIC
+override CFLAGS += $(INCLUDES)
+override CFGLAS += -Wl,-L.
+override CFGLAS += -Wl,-lthermal
+
+all:
+
+export srctree OUTPUT CC LD CFLAGS V
+export DESTDIR DESTDIR_SQ
+
+include $(srctree)/tools/build/Makefile.include
+
+PATCHLEVEL = $(LIBTHERMAL_TOOLS_PATCHLEVEL)
+EXTRAVERSION = $(LIBTHERMAL_TOOLS_EXTRAVERSION)
+VERSION = $(LIBTHERMAL_TOOLS_VERSION).$(LIBTHERMAL_TOOLS_PATCHLEVEL).$(LIBTHERMAL_TOOLS_EXTRAVERSION)
+
+LIBTHERMAL_TOOLS_SO := $(OUTPUT)libthermal_tools.so.$(VERSION)
+LIBTHERMAL_TOOLS_A := $(OUTPUT)libthermal_tools.a
+LIBTHERMAL_TOOLS_IN := $(OUTPUT)libthermal_tools-in.o
+LIBTHERMAL_TOOLS_PC := $(OUTPUT)libthermal_tools.pc
+
+LIBTHERMAL_TOOLS_ALL := $(LIBTHERMAL_TOOLS_A) $(OUTPUT)libthermal_tools.so*
+
+$(LIBTHERMAL_TOOLS_IN): FORCE
+ $(Q)$(MAKE) $(build)=libthermal_tools
+
+$(LIBTHERMAL_TOOLS_A): $(LIBTHERMAL_TOOLS_IN)
+ $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBTHERMAL_TOOLS_IN)
+
+$(LIBTHERMAL_TOOLS_SO): $(LIBTHERMAL_TOOLS_IN)
+ $(QUIET_LINK)$(CC) --shared -Wl,-soname,libthermal_tools.so $^ -o $@
+ @ln -sf $(@F) $(OUTPUT)libthermal_tools.so
+ @ln -sf $(@F) $(OUTPUT)libthermal_tools.so.$(LIBTHERMAL_TOOLS_VERSION)
+
+
+libs: $(LIBTHERMAL_TOOLS_A) $(LIBTHERMAL_TOOLS_SO) $(LIBTHERMAL_TOOLS_PC)
+
+all: fixdep
+ $(Q)$(MAKE) libs
+
+clean:
+ $(call QUIET_CLEAN, libthermal_tools) $(RM) $(LIBTHERMAL_TOOLS_A) \
+ *.o *~ *.a *.so *.so.$(VERSION) *.so.$(LIBTHERMAL_TOOLS_VERSION) .*.d .*.cmd LIBTHERMAL_TOOLS-CFLAGS $(LIBTHERMAL_TOOLS_PC)
+
+$(LIBTHERMAL_TOOLS_PC):
+ $(QUIET_GEN)sed -e "s|@PREFIX@|$(prefix)|" \
+ -e "s|@LIBDIR@|$(libdir_SQ)|" \
+ -e "s|@VERSION@|$(VERSION)|" \
+ < libthermal_tools.pc.template > $@
+
+define do_install_mkdir
+ if [ ! -d '$(DESTDIR_SQ)$1' ]; then \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \
+ fi
+endef
+
+define do_install
+ if [ ! -d '$(DESTDIR_SQ)$2' ]; then \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \
+ fi; \
+ $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2'
+endef
+
+install_lib: libs
+ $(call QUIET_INSTALL, $(LIBTHERMAL_TOOLS_ALL)) \
+ $(call do_install_mkdir,$(libdir_SQ)); \
+ cp -fpR $(LIBTHERMAL_TOOLS_ALL) $(DESTDIR)$(libdir_SQ)
+
+install_headers:
+ $(call QUIET_INSTALL, headers) \
+ $(call do_install,include/thermal.h,$(prefix)/include/thermal,644); \
+
+install_pkgconfig: $(LIBTHERMAL_TOOLS_PC)
+ $(call QUIET_INSTALL, $(LIBTHERMAL_TOOLS_PC)) \
+ $(call do_install,$(LIBTHERMAL_TOOLS_PC),$(libdir_SQ)/pkgconfig,644)
+
+install_doc:
+ $(Q)$(MAKE) -C Documentation install-man install-html install-examples
+
+#install: install_lib install_headers install_pkgconfig install_doc
+install: install_lib install_headers install_pkgconfig
+
+FORCE:
+
+.PHONY: all install clean FORCE
diff --git a/tools/thermal/lib/libthermal_tools.pc.template b/tools/thermal/lib/libthermal_tools.pc.template
new file mode 100644
index 000000000000..6f3769731b59
--- /dev/null
+++ b/tools/thermal/lib/libthermal_tools.pc.template
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+prefix=@PREFIX@
+libdir=@LIBDIR@
+includedir=${prefix}/include
+
+Name: libthermal
+Description: thermal library
+Requires: libnl-3.0 libnl-genl-3.0
+Version: @VERSION@
+Libs: -L${libdir} -lnl-genl-3 -lnl-3
+Cflags: -I${includedir} -I{include}/libnl3
diff --git a/tools/thermal/lib/log.c b/tools/thermal/lib/log.c
new file mode 100644
index 000000000000..597d6e7f7858
--- /dev/null
+++ b/tools/thermal/lib/log.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: LGPL-2.1+
+// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org>
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <syslog.h>
+#include "log.h"
+
+static const char *__ident = "unknown";
+static int __options;
+
+static const char * const loglvl[] = {
+ [LOG_DEBUG] = "DEBUG",
+ [LOG_INFO] = "INFO",
+ [LOG_NOTICE] = "NOTICE",
+ [LOG_WARNING] = "WARN",
+ [LOG_ERR] = "ERROR",
+ [LOG_CRIT] = "CRITICAL",
+ [LOG_ALERT] = "ALERT",
+ [LOG_EMERG] = "EMERG",
+};
+
+int log_str2level(const char *lvl)
+{
+ int i;
+
+ for (i = 0; i < sizeof(loglvl) / sizeof(loglvl[LOG_DEBUG]); i++)
+ if (!strcmp(lvl, loglvl[i]))
+ return i;
+
+ return LOG_DEBUG;
+}
+
+extern void logit(int level, const char *format, ...)
+{
+ va_list args;
+
+ va_start(args, format);
+
+ if (__options & TO_SYSLOG)
+ vsyslog(level, format, args);
+
+ if (__options & TO_STDERR)
+ vfprintf(stderr, format, args);
+
+ if (__options & TO_STDOUT)
+ vfprintf(stdout, format, args);
+
+ va_end(args);
+}
+
+int log_init(int level, const char *ident, int options)
+{
+ if (!options)
+ return -1;
+
+ if (level > LOG_DEBUG)
+ return -1;
+
+ if (!ident)
+ return -1;
+
+ __ident = ident;
+ __options = options;
+
+ if (options & TO_SYSLOG) {
+ openlog(__ident, options | LOG_NDELAY, LOG_USER);
+ setlogmask(LOG_UPTO(level));
+ }
+
+ return 0;
+}
+
+void log_exit(void)
+{
+ closelog();
+}
diff --git a/tools/thermal/lib/log.h b/tools/thermal/lib/log.h
new file mode 100644
index 000000000000..be8ab5144938
--- /dev/null
+++ b/tools/thermal/lib/log.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/* Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> */
+#ifndef __THERMAL_TOOLS_LOG_H
+#define __THERMAL_TOOLS_LOG_H
+
+#include <syslog.h>
+
+#ifndef __maybe_unused
+#define __maybe_unused __attribute__((__unused__))
+#endif
+
+#define TO_SYSLOG 0x1
+#define TO_STDOUT 0x2
+#define TO_STDERR 0x4
+
+extern void logit(int level, const char *format, ...);
+
+#define DEBUG(fmt, ...) logit(LOG_DEBUG, "%s:%d: " fmt, __func__, __LINE__, ##__VA_ARGS__)
+#define INFO(fmt, ...) logit(LOG_INFO, fmt, ##__VA_ARGS__)
+#define NOTICE(fmt, ...) logit(LOG_NOTICE, fmt, ##__VA_ARGS__)
+#define WARN(fmt, ...) logit(LOG_WARNING, fmt, ##__VA_ARGS__)
+#define ERROR(fmt, ...) logit(LOG_ERR, fmt, ##__VA_ARGS__)
+#define CRITICAL(fmt, ...) logit(LOG_CRIT, fmt, ##__VA_ARGS__)
+#define ALERT(fmt, ...) logit(LOG_ALERT, fmt, ##__VA_ARGS__)
+#define EMERG(fmt, ...) logit(LOG_EMERG, fmt, ##__VA_ARGS__)
+
+int log_init(int level, const char *ident, int options);
+int log_str2level(const char *lvl);
+void log_exit(void);
+
+#endif
diff --git a/tools/thermal/lib/mainloop.c b/tools/thermal/lib/mainloop.c
new file mode 100644
index 000000000000..94cbbcbd1c14
--- /dev/null
+++ b/tools/thermal/lib/mainloop.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: LGPL-2.1+
+// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org>
+#include <stdlib.h>
+#include <errno.h>
+#include <unistd.h>
+#include <signal.h>
+#include <sys/epoll.h>
+#include "mainloop.h"
+#include "log.h"
+
+static int epfd = -1;
+static unsigned short nrhandler;
+static sig_atomic_t exit_mainloop;
+
+struct mainloop_data {
+ mainloop_callback_t cb;
+ void *data;
+ int fd;
+};
+
+static struct mainloop_data **mds;
+
+#define MAX_EVENTS 10
+
+int mainloop(unsigned int timeout)
+{
+ int i, nfds;
+ struct epoll_event events[MAX_EVENTS];
+ struct mainloop_data *md;
+
+ if (epfd < 0)
+ return -1;
+
+ for (;;) {
+
+ nfds = epoll_wait(epfd, events, MAX_EVENTS, timeout);
+
+ if (exit_mainloop || !nfds)
+ return 0;
+
+ if (nfds < 0) {
+ if (errno == EINTR)
+ continue;
+ return -1;
+ }
+
+ for (i = 0; i < nfds; i++) {
+ md = events[i].data.ptr;
+
+ if (md->cb(md->fd, md->data) > 0)
+ return 0;
+ }
+ }
+}
+
+int mainloop_add(int fd, mainloop_callback_t cb, void *data)
+{
+ struct epoll_event ev = {
+ .events = EPOLLIN,
+ };
+
+ struct mainloop_data *md;
+
+ if (fd >= nrhandler) {
+ mds = realloc(mds, sizeof(*mds) * (fd + 1));
+ if (!mds)
+ return -1;
+ nrhandler = fd + 1;
+ }
+
+ md = malloc(sizeof(*md));
+ if (!md)
+ return -1;
+
+ md->data = data;
+ md->cb = cb;
+ md->fd = fd;
+
+ mds[fd] = md;
+ ev.data.ptr = md;
+
+ if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev) < 0) {
+ free(md);
+ return -1;
+ }
+
+ return 0;
+}
+
+int mainloop_del(int fd)
+{
+ if (fd >= nrhandler)
+ return -1;
+
+ if (epoll_ctl(epfd, EPOLL_CTL_DEL, fd, NULL) < 0)
+ return -1;
+
+ free(mds[fd]);
+
+ return 0;
+}
+
+int mainloop_init(void)
+{
+ epfd = epoll_create(2);
+ if (epfd < 0)
+ return -1;
+
+ return 0;
+}
+
+void mainloop_exit(void)
+{
+ exit_mainloop = 1;
+}
+
+void mainloop_fini(void)
+{
+ close(epfd);
+}
diff --git a/tools/thermal/lib/mainloop.h b/tools/thermal/lib/mainloop.h
new file mode 100644
index 000000000000..89b61e89d905
--- /dev/null
+++ b/tools/thermal/lib/mainloop.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/* Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> */
+#ifndef __THERMAL_TOOLS_MAINLOOP_H
+#define __THERMAL_TOOLS_MAINLOOP_H
+
+typedef int (*mainloop_callback_t)(int fd, void *data);
+
+extern int mainloop(unsigned int timeout);
+extern int mainloop_add(int fd, mainloop_callback_t cb, void *data);
+extern int mainloop_del(int fd);
+extern void mainloop_exit(void);
+extern int mainloop_init(void);
+extern void mainloop_fini(void);
+
+#endif
diff --git a/tools/thermal/lib/thermal-tools.h b/tools/thermal/lib/thermal-tools.h
new file mode 100644
index 000000000000..f43939a468a3
--- /dev/null
+++ b/tools/thermal/lib/thermal-tools.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/* Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> */
+#ifndef __THERMAL_TOOLS
+#define __THERMAL_TOOLS
+
+#include "log.h"
+#include "mainloop.h"
+#include "uptimeofday.h"
+
+#endif
diff --git a/tools/thermal/lib/uptimeofday.c b/tools/thermal/lib/uptimeofday.c
new file mode 100644
index 000000000000..dacb02956a68
--- /dev/null
+++ b/tools/thermal/lib/uptimeofday.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: LGPL-2.1+
+// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org>
+#include <stdio.h>
+#include <sys/time.h>
+#include <linux/sysinfo.h>
+#include "thermal-tools.h"
+
+static unsigned long __offset;
+static struct timeval __tv;
+
+int uptimeofday_init(void)
+{
+ struct sysinfo info;
+
+ if (sysinfo(&info))
+ return -1;
+
+ gettimeofday(&__tv, NULL);
+
+ __offset = __tv.tv_sec - info.uptime;
+
+ return 0;
+}
+
+unsigned long getuptimeofday_ms(void)
+{
+ gettimeofday(&__tv, NULL);
+
+ return ((__tv.tv_sec - __offset) * 1000) + (__tv.tv_usec / 1000);
+}
+
+struct timespec msec_to_timespec(int msec)
+{
+ struct timespec tv = {
+ .tv_sec = (msec / 1000),
+ .tv_nsec = (msec % 1000) * 1000000,
+ };
+
+ return tv;
+}
diff --git a/tools/thermal/lib/uptimeofday.h b/tools/thermal/lib/uptimeofday.h
new file mode 100644
index 000000000000..c0da5de41325
--- /dev/null
+++ b/tools/thermal/lib/uptimeofday.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/* Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> */
+#ifndef __THERMAL_TOOLS_UPTIMEOFDAY_H
+#define __THERMAL_TOOLS_UPTIMEOFDAY_H
+#include <sys/sysinfo.h>
+#include <sys/time.h>
+
+int uptimeofday_init(void);
+unsigned long getuptimeofday_ms(void);
+struct timespec msec_to_timespec(int msec);
+
+#endif
diff --git a/tools/thermal/thermal-engine/Build b/tools/thermal/thermal-engine/Build
new file mode 100644
index 000000000000..20c3c478b88d
--- /dev/null
+++ b/tools/thermal/thermal-engine/Build
@@ -0,0 +1 @@
+thermal-engine-y += thermal-engine.o
diff --git a/tools/thermal/thermal-engine/Makefile b/tools/thermal/thermal-engine/Makefile
new file mode 100644
index 000000000000..6bd05ff89485
--- /dev/null
+++ b/tools/thermal/thermal-engine/Makefile
@@ -0,0 +1,28 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for thermal tools
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+# $(info Determined 'srctree' to be $(srctree))
+endif
+
+CFLAGS = -Wall -Wextra
+CFLAGS += -I$(srctree)/tools/thermal/lib
+CFLAGS += -I$(srctree)/tools/lib/thermal/include
+
+LDFLAGS = -L$(srctree)/tools/thermal/lib
+LDFLAGS += -L$(srctree)/tools/lib/thermal
+LDFLAGS += -lthermal_tools
+LDFLAGS += -lthermal
+LDFLAGS += -lconfig
+LDFLAGS += -lnl-genl-3 -lnl-3
+
+VERSION = 0.0.1
+
+all: thermal-engine
+%: %.c
+ $(CC) $(CFLAGS) -D VERSION=\"$(VERSION)\" -o $@ $^ $(LDFLAGS)
+clean:
+ $(RM) thermal-engine
diff --git a/tools/thermal/thermal-engine/thermal-engine.c b/tools/thermal/thermal-engine/thermal-engine.c
new file mode 100644
index 000000000000..9b1476a2680f
--- /dev/null
+++ b/tools/thermal/thermal-engine/thermal-engine.c
@@ -0,0 +1,341 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Thermal monitoring tool based on the thermal netlink events.
+ *
+ * Copyright (C) 2022 Linaro Ltd.
+ *
+ * Author: Daniel Lezcano <daniel.lezcano@kernel.org>
+ */
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <libgen.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+
+#include <syslog.h>
+
+#include <sys/epoll.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <thermal.h>
+#include "thermal-tools.h"
+
+struct options {
+ int loglevel;
+ int logopt;
+ int interactive;
+ int daemonize;
+};
+
+struct thermal_data {
+ struct thermal_zone *tz;
+ struct thermal_handler *th;
+};
+
+static int show_trip(struct thermal_trip *tt, __maybe_unused void *arg)
+{
+ INFO("trip id=%d, type=%d, temp=%d, hyst=%d\n",
+ tt->id, tt->type, tt->temp, tt->hyst);
+
+ return 0;
+}
+
+static int show_temp(struct thermal_zone *tz, __maybe_unused void *arg)
+{
+ thermal_cmd_get_temp(arg, tz);
+
+ INFO("temperature: %d\n", tz->temp);
+
+ return 0;
+}
+
+static int show_governor(struct thermal_zone *tz, __maybe_unused void *arg)
+{
+ thermal_cmd_get_governor(arg, tz);
+
+ INFO("governor: '%s'\n", tz->governor);
+
+ return 0;
+}
+
+static int show_tz(struct thermal_zone *tz, __maybe_unused void *arg)
+{
+ INFO("thermal zone '%s', id=%d\n", tz->name, tz->id);
+
+ for_each_thermal_trip(tz->trip, show_trip, NULL);
+
+ show_temp(tz, arg);
+
+ show_governor(tz, arg);
+
+ return 0;
+}
+
+static int tz_create(const char *name, int tz_id, __maybe_unused void *arg)
+{
+ INFO("Thermal zone '%s'/%d created\n", name, tz_id);
+
+ return 0;
+}
+
+static int tz_delete(int tz_id, __maybe_unused void *arg)
+{
+ INFO("Thermal zone %d deleted\n", tz_id);
+
+ return 0;
+}
+
+static int tz_disable(int tz_id, void *arg)
+{
+ struct thermal_data *td = arg;
+ struct thermal_zone *tz = thermal_zone_find_by_id(td->tz, tz_id);
+
+ INFO("Thermal zone %d ('%s') disabled\n", tz_id, tz->name);
+
+ return 0;
+}
+
+static int tz_enable(int tz_id, void *arg)
+{
+ struct thermal_data *td = arg;
+ struct thermal_zone *tz = thermal_zone_find_by_id(td->tz, tz_id);
+
+ INFO("Thermal zone %d ('%s') enabled\n", tz_id, tz->name);
+
+ return 0;
+}
+
+static int trip_high(int tz_id, int trip_id, int temp, void *arg)
+{
+ struct thermal_data *td = arg;
+ struct thermal_zone *tz = thermal_zone_find_by_id(td->tz, tz_id);
+
+ INFO("Thermal zone %d ('%s'): trip point %d crossed way up with %d °C\n",
+ tz_id, tz->name, trip_id, temp);
+
+ return 0;
+}
+
+static int trip_low(int tz_id, int trip_id, int temp, void *arg)
+{
+ struct thermal_data *td = arg;
+ struct thermal_zone *tz = thermal_zone_find_by_id(td->tz, tz_id);
+
+ INFO("Thermal zone %d ('%s'): trip point %d crossed way down with %d °C\n",
+ tz_id, tz->name, trip_id, temp);
+
+ return 0;
+}
+
+static int trip_add(int tz_id, int trip_id, int type, int temp, int hyst, __maybe_unused void *arg)
+{
+ INFO("Trip point added %d: id=%d, type=%d, temp=%d, hyst=%d\n",
+ tz_id, trip_id, type, temp, hyst);
+
+ return 0;
+}
+
+static int trip_delete(int tz_id, int trip_id, __maybe_unused void *arg)
+{
+ INFO("Trip point deleted %d: id=%d\n", tz_id, trip_id);
+
+ return 0;
+}
+
+static int trip_change(int tz_id, int trip_id, int type, int temp,
+ int hyst, __maybe_unused void *arg)
+{
+ struct thermal_data *td = arg;
+ struct thermal_zone *tz = thermal_zone_find_by_id(td->tz, tz_id);
+
+ INFO("Trip point changed %d: id=%d, type=%d, temp=%d, hyst=%d\n",
+ tz_id, trip_id, type, temp, hyst);
+
+ tz->trip[trip_id].type = type;
+ tz->trip[trip_id].temp = temp;
+ tz->trip[trip_id].hyst = hyst;
+
+ return 0;
+}
+
+static int cdev_add(const char *name, int cdev_id, int max_state, __maybe_unused void *arg)
+{
+ INFO("Cooling device '%s'/%d (max state=%d) added\n", name, cdev_id, max_state);
+
+ return 0;
+}
+
+static int cdev_delete(int cdev_id, __maybe_unused void *arg)
+{
+ INFO("Cooling device %d deleted", cdev_id);
+
+ return 0;
+}
+
+static int cdev_update(int cdev_id, int cur_state, __maybe_unused void *arg)
+{
+ INFO("cdev:%d state:%d\n", cdev_id, cur_state);
+
+ return 0;
+}
+
+static int gov_change(int tz_id, const char *name, __maybe_unused void *arg)
+{
+ struct thermal_data *td = arg;
+ struct thermal_zone *tz = thermal_zone_find_by_id(td->tz, tz_id);
+
+ INFO("%s: governor changed %s -> %s\n", tz->name, tz->governor, name);
+
+ strcpy(tz->governor, name);
+
+ return 0;
+}
+
+static struct thermal_ops ops = {
+ .events.tz_create = tz_create,
+ .events.tz_delete = tz_delete,
+ .events.tz_disable = tz_disable,
+ .events.tz_enable = tz_enable,
+ .events.trip_high = trip_high,
+ .events.trip_low = trip_low,
+ .events.trip_add = trip_add,
+ .events.trip_delete = trip_delete,
+ .events.trip_change = trip_change,
+ .events.cdev_add = cdev_add,
+ .events.cdev_delete = cdev_delete,
+ .events.cdev_update = cdev_update,
+ .events.gov_change = gov_change
+};
+
+static int thermal_event(__maybe_unused int fd, __maybe_unused void *arg)
+{
+ struct thermal_data *td = arg;
+
+ return thermal_events_handle(td->th, td);
+}
+
+static void usage(const char *cmd)
+{
+ printf("%s : A thermal monitoring engine based on notifications\n", cmd);
+ printf("Usage: %s [options]\n", cmd);
+ printf("\t-h, --help\t\tthis help\n");
+ printf("\t-d, --daemonize\n");
+ printf("\t-l <level>, --loglevel <level>\tlog level: ");
+ printf("DEBUG, INFO, NOTICE, WARN, ERROR\n");
+ printf("\t-s, --syslog\t\toutput to syslog\n");
+ printf("\n");
+ exit(0);
+}
+
+static int options_init(int argc, char *argv[], struct options *options)
+{
+ int opt;
+
+ struct option long_options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "daemonize", no_argument, NULL, 'd' },
+ { "syslog", no_argument, NULL, 's' },
+ { "loglevel", required_argument, NULL, 'l' },
+ { 0, 0, 0, 0 }
+ };
+
+ while (1) {
+
+ int optindex = 0;
+
+ opt = getopt_long(argc, argv, "l:dhs", long_options, &optindex);
+ if (opt == -1)
+ break;
+
+ switch (opt) {
+ case 'l':
+ options->loglevel = log_str2level(optarg);
+ break;
+ case 'd':
+ options->daemonize = 1;
+ break;
+ case 's':
+ options->logopt = TO_SYSLOG;
+ break;
+ case 'h':
+ usage(basename(argv[0]));
+ break;
+ default: /* '?' */
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+enum {
+ THERMAL_ENGINE_SUCCESS = 0,
+ THERMAL_ENGINE_OPTION_ERROR,
+ THERMAL_ENGINE_DAEMON_ERROR,
+ THERMAL_ENGINE_LOG_ERROR,
+ THERMAL_ENGINE_THERMAL_ERROR,
+ THERMAL_ENGINE_MAINLOOP_ERROR,
+};
+
+int main(int argc, char *argv[])
+{
+ struct thermal_data td;
+ struct options options = {
+ .loglevel = LOG_INFO,
+ .logopt = TO_STDOUT,
+ };
+
+ if (options_init(argc, argv, &options)) {
+ ERROR("Usage: %s --help\n", argv[0]);
+ return THERMAL_ENGINE_OPTION_ERROR;
+ }
+
+ if (options.daemonize && daemon(0, 0)) {
+ ERROR("Failed to daemonize: %p\n");
+ return THERMAL_ENGINE_DAEMON_ERROR;
+ }
+
+ if (log_init(options.loglevel, basename(argv[0]), options.logopt)) {
+ ERROR("Failed to initialize logging facility\n");
+ return THERMAL_ENGINE_LOG_ERROR;
+ }
+
+ td.th = thermal_init(&ops);
+ if (!td.th) {
+ ERROR("Failed to initialize the thermal library\n");
+ return THERMAL_ENGINE_THERMAL_ERROR;
+ }
+
+ td.tz = thermal_zone_discover(td.th);
+ if (!td.tz) {
+ ERROR("No thermal zone available\n");
+ return THERMAL_ENGINE_THERMAL_ERROR;
+ }
+
+ for_each_thermal_zone(td.tz, show_tz, td.th);
+
+ if (mainloop_init()) {
+ ERROR("Failed to initialize the mainloop\n");
+ return THERMAL_ENGINE_MAINLOOP_ERROR;
+ }
+
+ if (mainloop_add(thermal_events_fd(td.th), thermal_event, &td)) {
+ ERROR("Failed to setup the mainloop\n");
+ return THERMAL_ENGINE_MAINLOOP_ERROR;
+ }
+
+ INFO("Waiting for thermal events ...\n");
+
+ if (mainloop(-1)) {
+ ERROR("Mainloop failed\n");
+ return THERMAL_ENGINE_MAINLOOP_ERROR;
+ }
+
+ return THERMAL_ENGINE_SUCCESS;
+}
diff --git a/tools/thermal/thermometer/Build b/tools/thermal/thermometer/Build
new file mode 100644
index 000000000000..1b96c159c3c8
--- /dev/null
+++ b/tools/thermal/thermometer/Build
@@ -0,0 +1 @@
+thermometer-y += thermometer.o
diff --git a/tools/thermal/thermometer/Makefile b/tools/thermal/thermometer/Makefile
new file mode 100644
index 000000000000..d8f8bc82fe3b
--- /dev/null
+++ b/tools/thermal/thermometer/Makefile
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for cgroup tools
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+# $(info Determined 'srctree' to be $(srctree))
+endif
+
+CFLAGS = -Wall -Wextra
+CFLAGS += -I$(srctree)/tools/thermal/lib
+
+LDFLAGS = -L$(srctree)/tools/thermal/lib
+LDFLAGS += -lthermal_tools
+LDFLAGS += -lconfig
+
+VERSION = 0.0.1
+TARGET=thermometer
+
+all: $(TARGET)
+%: %.c
+ $(CC) $(CFLAGS) -D VERSION=\"$(VERSION)\" -o $@ $^ $(LDFLAGS)
+
+clean:
+ $(RM) $(TARGET)
diff --git a/tools/thermal/thermometer/thermometer.8 b/tools/thermal/thermometer/thermometer.8
new file mode 100644
index 000000000000..d090fbca4cba
--- /dev/null
+++ b/tools/thermal/thermometer/thermometer.8
@@ -0,0 +1,92 @@
+.TH THERMOMETER 8
+# SPDX-License-Identifier: GPL-2.0
+.SH NAME
+\fBthermometer\fP - A thermal profiling tool
+
+.SH SYNOPSIS
+.ft B
+.B thermometer
+.RB [ options ]
+.RB [ command ]
+.br
+.SH DESCRIPTION
+\fBthermometer \fP captures the thermal zones temperature at a
+specified sampling period. It is optimized to reduce as much as
+possible the overhead while doing the temperature acquisition in order
+to prevent disrupting the running application we may want to profile.
+
+This low overhead also allows a high rate sampling for the temperature
+which could be necessary to spot overshots and undershots.
+
+If no configuration file is specified, then all the thermal zones will
+be monitored at 4Hz, so every 250ms. A configuration file specifies
+the thermal zone names and the desired sampling period. A thermal zone
+name can be a regular expression to specify a group of thermal zone.
+
+The sampling of the different thermal zones will be written into
+separate files with the thermal zone name. It is possible to specify a
+postfix to identify them for example for a specific scenario. The
+output directory can be specified in addition.
+
+Without any parameters, \fBthermometer \fP captures all the thermal
+zone temperatures every 250ms and write to the current directory the
+captured files postfixed with the current date.
+
+If a running \fBduration\fP is specified or a \fBcommand\fP, the
+capture ends at the end of the duration if the command did not
+finished before. The \fBduration\fP can be specified alone as well as
+the \fBcommand\fP. If none is specified, the capture will continue
+indefinitively until interrupted by \fBSIGINT\fP or \fBSIGQUIT\fP.
+.PP
+
+.SS Options
+.PP
+The \fB-h, --help\fP option shows a short usage help
+.PP
+The \fB-o <dir>, --output <dir>\fP option defines the output directory to put the
+sampling files
+.PP
+The \fB-c <config>, --config <config>\fP option specifies the configuration file to use
+.PP
+The \fB-d <seconds>, --duration <seconds>\fP option specifies the duration of the capture
+.PP
+The \fB-l <loglevel>, --loglevel <loglevel>\fP option sets the loglevel [DEBUG,INFO,NOTICE,WARN,ERROR]
+.PP
+The \fB-p <string>, --postfix <string>\fP option appends \fBstring\fP at the end of the capture filenames
+.PP
+The \fB-s, --syslog\fP option sets the output to syslog, default is \fBstdout\fP
+.PP
+The \fB-w, --overwrite\fP overwrites the output files if they exist
+.PP
+
+.PP
+
+.SS "Exit status:"
+.TP
+0
+if OK,
+.TP
+1
+Error with the options specified as parameters
+.TP
+2
+Error when configuring the logging facility
+.TP
+3
+Error when configuring the time
+.TP
+4
+Error in the initialization routine
+.TP
+5
+Error during the runtime
+
+.SH Capture file format
+
+Every file contains two columns. The first one is the uptime timestamp
+in order to find a point in time since the system started up if there
+is any thermal event. The second one is the temperature in milli
+degree. The first line contains the label of each column.
+
+.SH AUTHOR
+Daniel Lezcano <daniel.lezcano@kernel.org>
diff --git a/tools/thermal/thermometer/thermometer.c b/tools/thermal/thermometer/thermometer.c
new file mode 100644
index 000000000000..1a87a0a77f9f
--- /dev/null
+++ b/tools/thermal/thermometer/thermometer.c
@@ -0,0 +1,572 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org>
+#define _GNU_SOURCE
+#include <dirent.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <regex.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/signalfd.h>
+#include <sys/timerfd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <time.h>
+#include <unistd.h>
+#include <linux/thermal.h>
+
+#include <libconfig.h>
+#include "thermal-tools.h"
+
+#define CLASS_THERMAL "/sys/class/thermal"
+
+enum {
+ THERMOMETER_SUCCESS = 0,
+ THERMOMETER_OPTION_ERROR,
+ THERMOMETER_LOG_ERROR,
+ THERMOMETER_CONFIG_ERROR,
+ THERMOMETER_TIME_ERROR,
+ THERMOMETER_INIT_ERROR,
+ THERMOMETER_RUNTIME_ERROR
+};
+
+struct options {
+ int loglvl;
+ int logopt;
+ int overwrite;
+ int duration;
+ const char *config;
+ char postfix[PATH_MAX];
+ char output[PATH_MAX];
+};
+
+struct tz_regex {
+ regex_t regex;
+ int polling;
+};
+
+struct configuration {
+ struct tz_regex *tz_regex;
+ int nr_tz_regex;
+
+};
+
+struct tz {
+ FILE *file_out;
+ int fd_temp;
+ int fd_timer;
+ int polling;
+ const char *name;
+};
+
+struct thermometer {
+ struct tz *tz;
+ int nr_tz;
+};
+
+static struct tz_regex *configuration_tz_match(const char *expr,
+ struct configuration *config)
+{
+ int i;
+
+ for (i = 0; i < config->nr_tz_regex; i++) {
+
+ if (!regexec(&config->tz_regex[i].regex, expr, 0, NULL, 0))
+ return &config->tz_regex[i];
+ }
+
+ return NULL;
+}
+
+static int configuration_default_init(struct configuration *config)
+{
+ config->tz_regex = realloc(config->tz_regex, sizeof(*config->tz_regex) *
+ (config->nr_tz_regex + 1));
+
+ if (regcomp(&config->tz_regex[config->nr_tz_regex].regex, ".*",
+ REG_NOSUB | REG_EXTENDED)) {
+ ERROR("Invalid regular expression\n");
+ return -1;
+ }
+
+ config->tz_regex[config->nr_tz_regex].polling = 250;
+ config->nr_tz_regex = 1;
+
+ return 0;
+}
+
+static int configuration_init(const char *path, struct configuration *config)
+{
+ config_t cfg;
+
+ config_setting_t *tz;
+ int i, length;
+
+ if (path && access(path, F_OK)) {
+ ERROR("'%s' is not accessible\n", path);
+ return -1;
+ }
+
+ if (!path && !config->nr_tz_regex) {
+ INFO("No thermal zones configured, using wildcard for all of them\n");
+ return configuration_default_init(config);
+ }
+
+ config_init(&cfg);
+
+ if (!config_read_file(&cfg, path)) {
+ ERROR("Failed to parse %s:%d - %s\n", config_error_file(&cfg),
+ config_error_line(&cfg), config_error_text(&cfg));
+
+ return -1;
+ }
+
+ tz = config_lookup(&cfg, "thermal-zones");
+ if (!tz) {
+ ERROR("No thermal zone configured to be monitored\n");
+ return -1;
+ }
+
+ length = config_setting_length(tz);
+
+ INFO("Found %d thermal zone(s) regular expression\n", length);
+
+ for (i = 0; i < length; i++) {
+
+ config_setting_t *node;
+ const char *name;
+ int polling;
+
+ node = config_setting_get_elem(tz, i);
+ if (!node) {
+ ERROR("Missing node name '%d'\n", i);
+ return -1;
+ }
+
+ if (!config_setting_lookup_string(node, "name", &name)) {
+ ERROR("Thermal zone name not found\n");
+ return -1;
+ }
+
+ if (!config_setting_lookup_int(node, "polling", &polling)) {
+ ERROR("Polling value not found");
+ return -1;
+ }
+
+ config->tz_regex = realloc(config->tz_regex, sizeof(*config->tz_regex) *
+ (config->nr_tz_regex + 1));
+
+ if (regcomp(&config->tz_regex[config->nr_tz_regex].regex, name,
+ REG_NOSUB | REG_EXTENDED)) {
+ ERROR("Invalid regular expression '%s'\n", name);
+ continue;
+ }
+
+ config->tz_regex[config->nr_tz_regex].polling = polling;
+ config->nr_tz_regex++;
+
+ INFO("Thermal zone regular expression '%s' with polling %d\n",
+ name, polling);
+ }
+
+ return 0;
+}
+
+static void usage(const char *cmd)
+{
+ printf("%s Version: %s\n", cmd, VERSION);
+ printf("Usage: %s [options]\n", cmd);
+ printf("\t-h, --help\t\tthis help\n");
+ printf("\t-o, --output <dir>\toutput directory for temperature capture\n");
+ printf("\t-c, --config <file>\tconfiguration file\n");
+ printf("\t-d, --duration <seconds>\tcapture duration\n");
+ printf("\t-l, --loglevel <level>\tlog level: ");
+ printf("DEBUG, INFO, NOTICE, WARN, ERROR\n");
+ printf("\t-p, --postfix <string>\tpostfix to be happened at the end of the files\n");
+ printf("\t-s, --syslog\t\toutput to syslog\n");
+ printf("\t-w, --overwrite\t\toverwrite the temperature capture files if they exist\n");
+ printf("\n");
+ exit(0);
+}
+
+static int options_init(int argc, char *argv[], struct options *options)
+{
+ int opt;
+ time_t now = time(NULL);
+
+ struct option long_options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "config", required_argument, NULL, 'c' },
+ { "duration", required_argument, NULL, 'd' },
+ { "loglevel", required_argument, NULL, 'l' },
+ { "postfix", required_argument, NULL, 'p' },
+ { "output", required_argument, NULL, 'o' },
+ { "syslog", required_argument, NULL, 's' },
+ { "overwrite", no_argument, NULL, 'w' },
+ { 0, 0, 0, 0 }
+ };
+
+ strftime(options->postfix, sizeof(options->postfix),
+ "-%Y-%m-%d_%H:%M:%S", gmtime(&now));
+
+ while (1) {
+
+ int optindex = 0;
+
+ opt = getopt_long(argc, argv, "ho:c:d:l:p:sw", long_options, &optindex);
+ if (opt == -1)
+ break;
+
+ switch (opt) {
+ case 'c':
+ options->config = optarg;
+ break;
+ case 'd':
+ options->duration = atoi(optarg) * 1000;
+ break;
+ case 'l':
+ options->loglvl = log_str2level(optarg);
+ break;
+ case 'h':
+ usage(basename(argv[0]));
+ break;
+ case 'p':
+ strcpy(options->postfix, optarg);
+ break;
+ case 'o':
+ strcpy(options->output, optarg);
+ break;
+ case 's':
+ options->logopt = TO_SYSLOG;
+ break;
+ case 'w':
+ options->overwrite = 1;
+ break;
+ default: /* '?' */
+ ERROR("Usage: %s --help\n", argv[0]);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int thermometer_add_tz(const char *path, const char *name, int polling,
+ struct thermometer *thermometer)
+{
+ int fd;
+ char tz_path[PATH_MAX];
+
+ sprintf(tz_path, CLASS_THERMAL"/%s/temp", path);
+
+ fd = open(tz_path, O_RDONLY);
+ if (fd < 0) {
+ ERROR("Failed to open '%s': %m\n", tz_path);
+ return -1;
+ }
+
+ thermometer->tz = realloc(thermometer->tz,
+ sizeof(*thermometer->tz) * (thermometer->nr_tz + 1));
+ if (!thermometer->tz) {
+ ERROR("Failed to allocate thermometer->tz\n");
+ return -1;
+ }
+
+ thermometer->tz[thermometer->nr_tz].fd_temp = fd;
+ thermometer->tz[thermometer->nr_tz].name = strdup(name);
+ thermometer->tz[thermometer->nr_tz].polling = polling;
+ thermometer->nr_tz++;
+
+ INFO("Added thermal zone '%s->%s (polling:%d)'\n", path, name, polling);
+
+ return 0;
+}
+
+static int thermometer_init(struct configuration *config,
+ struct thermometer *thermometer)
+{
+ DIR *dir;
+ struct dirent *dirent;
+ struct tz_regex *tz_regex;
+ const char *tz_dirname = "thermal_zone";
+
+ if (mainloop_init()) {
+ ERROR("Failed to start mainloop\n");
+ return -1;
+ }
+
+ dir = opendir(CLASS_THERMAL);
+ if (!dir) {
+ ERROR("failed to open '%s'\n", CLASS_THERMAL);
+ return -1;
+ }
+
+ while ((dirent = readdir(dir))) {
+ char tz_type[THERMAL_NAME_LENGTH];
+ char tz_path[PATH_MAX];
+ FILE *tz_file;
+
+ if (strncmp(dirent->d_name, tz_dirname, strlen(tz_dirname)))
+ continue;
+
+ sprintf(tz_path, CLASS_THERMAL"/%s/type", dirent->d_name);
+
+ tz_file = fopen(tz_path, "r");
+ if (!tz_file) {
+ ERROR("Failed to open '%s': %m", tz_path);
+ continue;
+ }
+
+ fscanf(tz_file, "%s", tz_type);
+
+ fclose(tz_file);
+
+ tz_regex = configuration_tz_match(tz_type, config);
+ if (!tz_regex)
+ continue;
+
+ if (thermometer_add_tz(dirent->d_name, tz_type,
+ tz_regex->polling, thermometer))
+ continue;
+ }
+
+ closedir(dir);
+
+ return 0;
+}
+
+static int timer_temperature_callback(int fd, void *arg)
+{
+ struct tz *tz = arg;
+ char buf[16] = { 0 };
+
+ pread(tz->fd_temp, buf, sizeof(buf), 0);
+
+ fprintf(tz->file_out, "%ld %s", getuptimeofday_ms(), buf);
+
+ read(fd, buf, sizeof(buf));
+
+ return 0;
+}
+
+static int thermometer_start(struct thermometer *thermometer,
+ struct options *options)
+{
+ struct itimerspec timer_it = { 0 };
+ char *path;
+ FILE *f;
+ int i;
+
+ INFO("Capturing %d thermal zone(s) temperature...\n", thermometer->nr_tz);
+
+ if (access(options->output, F_OK) && mkdir(options->output, 0700)) {
+ ERROR("Failed to create directory '%s'\n", options->output);
+ return -1;
+ }
+
+ for (i = 0; i < thermometer->nr_tz; i++) {
+
+ asprintf(&path, "%s/%s%s", options->output,
+ thermometer->tz[i].name, options->postfix);
+
+ if (!options->overwrite && !access(path, F_OK)) {
+ ERROR("'%s' already exists\n", path);
+ return -1;
+ }
+
+ f = fopen(path, "w");
+ if (!f) {
+ ERROR("Failed to create '%s':%m\n", path);
+ return -1;
+ }
+
+ fprintf(f, "timestamp(ms) %s(°mC)\n", thermometer->tz[i].name);
+
+ thermometer->tz[i].file_out = f;
+
+ DEBUG("Created '%s' file for thermal zone '%s'\n", path, thermometer->tz[i].name);
+
+ /*
+ * Create polling timer
+ */
+ thermometer->tz[i].fd_timer = timerfd_create(CLOCK_MONOTONIC, 0);
+ if (thermometer->tz[i].fd_timer < 0) {
+ ERROR("Failed to create timer for '%s': %m\n",
+ thermometer->tz[i].name);
+ return -1;
+ }
+
+ DEBUG("Watching '%s' every %d ms\n",
+ thermometer->tz[i].name, thermometer->tz[i].polling);
+
+ timer_it.it_interval = timer_it.it_value =
+ msec_to_timespec(thermometer->tz[i].polling);
+
+ if (timerfd_settime(thermometer->tz[i].fd_timer, 0,
+ &timer_it, NULL) < 0)
+ return -1;
+
+ if (mainloop_add(thermometer->tz[i].fd_timer,
+ timer_temperature_callback,
+ &thermometer->tz[i]))
+ return -1;
+ }
+
+ return 0;
+}
+
+static int thermometer_execute(int argc, char *argv[], char *const envp[], pid_t *pid)
+{
+ if (!argc)
+ return 0;
+
+ *pid = fork();
+ if (*pid < 0) {
+ ERROR("Failed to fork process: %m");
+ return -1;
+ }
+
+ if (!(*pid)) {
+ execvpe(argv[0], argv, envp);
+ exit(1);
+ }
+
+ return 0;
+}
+
+static int kill_process(__maybe_unused int fd, void *arg)
+{
+ pid_t pid = *(pid_t *)arg;
+
+ if (kill(pid, SIGTERM))
+ ERROR("Failed to send SIGTERM signal to '%d': %p\n", pid);
+ else if (waitpid(pid, NULL, 0))
+ ERROR("Failed to wait pid '%d': %p\n", pid);
+
+ mainloop_exit();
+
+ return 0;
+}
+
+static int exit_mainloop(__maybe_unused int fd, __maybe_unused void *arg)
+{
+ mainloop_exit();
+ return 0;
+}
+
+static int thermometer_wait(struct options *options, pid_t pid)
+{
+ int fd;
+ sigset_t mask;
+
+ /*
+ * If there is a duration specified, we will exit the mainloop
+ * and gracefully close all the files which will flush the
+ * file system cache
+ */
+ if (options->duration) {
+ struct itimerspec timer_it = { 0 };
+
+ timer_it.it_value = msec_to_timespec(options->duration);
+
+ fd = timerfd_create(CLOCK_MONOTONIC, 0);
+ if (fd < 0) {
+ ERROR("Failed to create duration timer: %m\n");
+ return -1;
+ }
+
+ if (timerfd_settime(fd, 0, &timer_it, NULL)) {
+ ERROR("Failed to set timer time: %m\n");
+ return -1;
+ }
+
+ if (mainloop_add(fd, pid < 0 ? exit_mainloop : kill_process, &pid)) {
+ ERROR("Failed to set timer exit mainloop callback\n");
+ return -1;
+ }
+ }
+
+ /*
+ * We want to catch any keyboard interrupt, as well as child
+ * signals if any in order to exit properly
+ */
+ sigemptyset(&mask);
+ sigaddset(&mask, SIGINT);
+ sigaddset(&mask, SIGQUIT);
+ sigaddset(&mask, SIGCHLD);
+
+ if (sigprocmask(SIG_BLOCK, &mask, NULL)) {
+ ERROR("Failed to set sigprocmask: %m\n");
+ return -1;
+ }
+
+ fd = signalfd(-1, &mask, 0);
+ if (fd < 0) {
+ ERROR("Failed to set the signalfd: %m\n");
+ return -1;
+ }
+
+ if (mainloop_add(fd, exit_mainloop, NULL)) {
+ ERROR("Failed to set timer exit mainloop callback\n");
+ return -1;
+ }
+
+ return mainloop(-1);
+}
+
+static int thermometer_stop(struct thermometer *thermometer)
+{
+ int i;
+
+ INFO("Closing/flushing output files\n");
+
+ for (i = 0; i < thermometer->nr_tz; i++)
+ fclose(thermometer->tz[i].file_out);
+
+ return 0;
+}
+
+int main(int argc, char *argv[], char *const envp[])
+{
+ struct options options = {
+ .loglvl = LOG_DEBUG,
+ .logopt = TO_STDOUT,
+ .output = ".",
+ };
+ struct configuration config = { 0 };
+ struct thermometer thermometer = { 0 };
+
+ pid_t pid = -1;
+
+ if (options_init(argc, argv, &options))
+ return THERMOMETER_OPTION_ERROR;
+
+ if (log_init(options.loglvl, argv[0], options.logopt))
+ return THERMOMETER_LOG_ERROR;
+
+ if (configuration_init(options.config, &config))
+ return THERMOMETER_CONFIG_ERROR;
+
+ if (uptimeofday_init())
+ return THERMOMETER_TIME_ERROR;
+
+ if (thermometer_init(&config, &thermometer))
+ return THERMOMETER_INIT_ERROR;
+
+ if (thermometer_start(&thermometer, &options))
+ return THERMOMETER_RUNTIME_ERROR;
+
+ if (thermometer_execute(argc - optind, &argv[optind], envp, &pid))
+ return THERMOMETER_RUNTIME_ERROR;
+
+ if (thermometer_wait(&options, pid))
+ return THERMOMETER_RUNTIME_ERROR;
+
+ if (thermometer_stop(&thermometer))
+ return THERMOMETER_RUNTIME_ERROR;
+
+ return THERMOMETER_SUCCESS;
+}
diff --git a/tools/thermal/thermometer/thermometer.conf b/tools/thermal/thermometer/thermometer.conf
new file mode 100644
index 000000000000..02c6dab3b1b3
--- /dev/null
+++ b/tools/thermal/thermometer/thermometer.conf
@@ -0,0 +1,5 @@
+
+thermal-zones = (
+ { name = "cpu[0-9]-thermal";
+ polling = 100; }
+ )