summaryrefslogtreecommitdiff
path: root/tools/testing
diff options
context:
space:
mode:
Diffstat (limited to 'tools/testing')
-rw-r--r--tools/testing/cxl/Kbuild7
-rw-r--r--tools/testing/cxl/cxl_core_exports.c22
-rw-r--r--tools/testing/cxl/exports.h13
-rw-r--r--tools/testing/cxl/test/cxl.c133
-rw-r--r--tools/testing/cxl/test/mock.c96
-rw-r--r--tools/testing/cxl/test/mock.h9
-rw-r--r--tools/testing/nvdimm/test/ndtest.c13
-rw-r--r--tools/testing/selftests/arm64/fp/fp-ptrace.c5
-rw-r--r--tools/testing/selftests/arm64/fp/sve-ptrace.c61
-rw-r--r--tools/testing/selftests/arm64/fp/zt-test.S2
-rw-r--r--tools/testing/selftests/bpf/config3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arg_parsing.c12
-rw-r--r--tools/testing/selftests/bpf/prog_tests/livepatch_trampoline.c107
-rw-r--r--tools/testing/selftests/bpf/prog_tests/mptcp.c140
-rw-r--r--tools/testing/selftests/bpf/prog_tests/pinning_htab.c36
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c150
-rw-r--r--tools/testing/selftests/bpf/progs/iters_looping.c53
-rw-r--r--tools/testing/selftests/bpf/progs/livepatch_trampoline.c30
-rw-r--r--tools/testing/selftests/bpf/progs/mptcp_sockmap.c43
-rw-r--r--tools/testing/selftests/bpf/progs/stacktrace_ips.c49
-rw-r--r--tools/testing/selftests/bpf/progs/stream_fail.c6
-rw-r--r--tools/testing/selftests/bpf/progs/task_work.c6
-rw-r--r--tools/testing/selftests/bpf/progs/task_work_fail.c8
-rw-r--r--tools/testing/selftests/bpf/progs/task_work_stress.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_pinning_htab.c25
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c14
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_vfs_accept.c2
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_testmod.c26
-rw-r--r--tools/testing/selftests/cachestat/.gitignore1
-rw-r--r--tools/testing/selftests/cachestat/test_cachestat.c4
-rw-r--r--tools/testing/selftests/cgroup/lib/include/cgroup_util.h20
-rw-r--r--tools/testing/selftests/cgroup/test_cpu.c18
-rw-r--r--tools/testing/selftests/coredump/.gitignore4
-rw-r--r--tools/testing/selftests/coredump/Makefile8
-rw-r--r--tools/testing/selftests/coredump/coredump_socket_protocol_test.c1568
-rw-r--r--tools/testing/selftests/coredump/coredump_socket_test.c742
-rw-r--r--tools/testing/selftests/coredump/coredump_test.h59
-rw-r--r--tools/testing/selftests/coredump/coredump_test_helpers.c383
-rw-r--r--tools/testing/selftests/coredump/stackdump_test.c1662
-rw-r--r--tools/testing/selftests/drivers/net/Makefile5
-rw-r--r--tools/testing/selftests/drivers/net/bonding/Makefile23
-rw-r--r--tools/testing/selftests/drivers/net/bonding/config12
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/netcons_over_bonding.sh361
-rw-r--r--tools/testing/selftests/drivers/net/config2
-rw-r--r--tools/testing/selftests/drivers/net/dsa/Makefile12
-rwxr-xr-xtools/testing/selftests/drivers/net/hds.py3
-rw-r--r--tools/testing/selftests/drivers/net/hw/Makefile8
-rw-r--r--tools/testing/selftests/drivers/net/hw/config4
-rw-r--r--tools/testing/selftests/drivers/net/hw/lib/py/__init__.py40
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/pp_alloc_fail.py36
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/__init__.py43
-rw-r--r--tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh78
-rwxr-xr-xtools/testing/selftests/drivers/net/netcons_torture.sh130
-rw-r--r--tools/testing/selftests/drivers/net/netdevsim/Makefile8
-rw-r--r--tools/testing/selftests/drivers/net/team/Makefile11
-rw-r--r--tools/testing/selftests/drivers/net/virtio_net/Makefile13
-rwxr-xr-xtools/testing/selftests/drivers/net/xdp.py99
-rw-r--r--tools/testing/selftests/filesystems/binderfs/binderfs_test.c1
-rw-r--r--tools/testing/selftests/filesystems/utils.c2
-rw-r--r--tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc4
-rw-r--r--tools/testing/selftests/hid/hid_common.h6
-rw-r--r--tools/testing/selftests/hid/hidraw.c473
-rw-r--r--tools/testing/selftests/hid/tests/test_multitouch.py55
-rwxr-xr-xtools/testing/selftests/hid/vmtest.sh668
-rw-r--r--tools/testing/selftests/iommu/iommufd.c2
-rw-r--r--tools/testing/selftests/iommu/iommufd_utils.h4
-rw-r--r--tools/testing/selftests/kvm/Makefile.kvm1
-rw-r--r--tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c2
-rw-r--r--tools/testing/selftests/kvm/arm64/external_aborts.c43
-rw-r--r--tools/testing/selftests/kvm/arm64/get-reg-list.c102
-rw-r--r--tools/testing/selftests/kvm/arm64/set_id_regs.c13
-rw-r--r--tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c3
-rw-r--r--tools/testing/selftests/kvm/guest_memfd_test.c169
-rw-r--r--tools/testing/selftests/kvm/include/arm64/processor.h12
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h27
-rw-r--r--tools/testing/selftests/kvm/include/test_util.h19
-rw-r--r--tools/testing/selftests/kvm/include/x86/processor.h5
-rw-r--r--tools/testing/selftests/kvm/irqfd_test.c14
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c9
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/processor.c5
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c49
-rw-r--r--tools/testing/selftests/kvm/lib/s390/processor.c5
-rw-r--r--tools/testing/selftests/kvm/lib/test_util.c7
-rw-r--r--tools/testing/selftests/kvm/lib/x86/processor.c5
-rw-r--r--tools/testing/selftests/kvm/mmu_stress_test.c5
-rw-r--r--tools/testing/selftests/kvm/pre_fault_memory_test.c131
-rw-r--r--tools/testing/selftests/kvm/s390/ucontrol_test.c16
-rw-r--r--tools/testing/selftests/kvm/set_memory_region_test.c17
-rw-r--r--tools/testing/selftests/kvm/x86/msrs_test.c489
-rw-r--r--tools/testing/selftests/kvm/x86/pmu_counters_test.c8
-rw-r--r--tools/testing/selftests/mm/uffd-unit-tests.c15
-rw-r--r--tools/testing/selftests/namespaces/.gitignore9
-rw-r--r--tools/testing/selftests/namespaces/Makefile24
-rw-r--r--tools/testing/selftests/namespaces/cred_change_test.c814
-rw-r--r--tools/testing/selftests/namespaces/listns_efault_test.c530
-rw-r--r--tools/testing/selftests/namespaces/listns_pagination_bug.c138
-rw-r--r--tools/testing/selftests/namespaces/listns_permissions_test.c759
-rw-r--r--tools/testing/selftests/namespaces/listns_test.c679
-rw-r--r--tools/testing/selftests/namespaces/ns_active_ref_test.c2672
-rw-r--r--tools/testing/selftests/namespaces/nsid_test.c107
-rw-r--r--tools/testing/selftests/namespaces/regression_pidfd_setns_test.c113
-rw-r--r--tools/testing/selftests/namespaces/siocgskns_test.c1824
-rw-r--r--tools/testing/selftests/namespaces/stress_test.c626
-rw-r--r--tools/testing/selftests/namespaces/wrappers.h35
-rw-r--r--tools/testing/selftests/net/.gitignore1
-rw-r--r--tools/testing/selftests/net/Makefile313
-rw-r--r--tools/testing/selftests/net/af_unix/Makefile11
-rw-r--r--tools/testing/selftests/net/af_unix/config2
-rw-r--r--tools/testing/selftests/net/af_unix/so_peek_off.c162
-rwxr-xr-xtools/testing/selftests/net/bareudp.sh2
-rw-r--r--tools/testing/selftests/net/config140
-rw-r--r--tools/testing/selftests/net/forwarding/Makefile56
-rw-r--r--tools/testing/selftests/net/forwarding/config30
-rwxr-xr-xtools/testing/selftests/net/forwarding/lib_sh_test.sh7
-rwxr-xr-xtools/testing/selftests/net/forwarding/local_termination.sh2
-rw-r--r--tools/testing/selftests/net/gro.c12
-rw-r--r--tools/testing/selftests/net/hsr/Makefile6
-rw-r--r--tools/testing/selftests/net/hsr/config4
-rw-r--r--tools/testing/selftests/net/lib.sh2
-rw-r--r--tools/testing/selftests/net/lib/Makefile14
-rw-r--r--tools/testing/selftests/net/lib/py/__init__.py29
-rw-r--r--tools/testing/selftests/net/mptcp/Makefile32
-rw-r--r--tools/testing/selftests/net/mptcp/config44
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_connect.c18
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh2
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh117
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_lib.sh21
-rw-r--r--tools/testing/selftests/net/netfilter/Makefile89
-rw-r--r--tools/testing/selftests/net/netfilter/config52
-rwxr-xr-xtools/testing/selftests/net/netfilter/nf_nat_edemux.sh58
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_fib.sh13
-rw-r--r--tools/testing/selftests/net/ovpn/Makefile12
-rw-r--r--tools/testing/selftests/net/ovpn/config12
-rw-r--r--tools/testing/selftests/net/ovpn/ovpn-cli.c2
-rw-r--r--tools/testing/selftests/net/packetdrill/Makefile10
-rw-r--r--tools/testing/selftests/net/packetdrill/config4
-rw-r--r--tools/testing/selftests/net/rds/Makefile10
-rwxr-xr-xtools/testing/selftests/net/rtnetlink.sh2
-rw-r--r--tools/testing/selftests/net/sctp_hello.c17
-rwxr-xr-xtools/testing/selftests/net/sctp_vrf.sh73
-rw-r--r--tools/testing/selftests/net/tcp_ao/config2
-rw-r--r--tools/testing/selftests/net/tls.c65
-rwxr-xr-xtools/testing/selftests/net/vlan_bridge_binding.sh2
-rw-r--r--tools/testing/selftests/nolibc/Makefile.nolibc6
-rw-r--r--tools/testing/selftests/nolibc/nolibc-test.c13
-rwxr-xr-xtools/testing/selftests/nolibc/run-tests.sh8
-rw-r--r--tools/testing/selftests/pci_endpoint/pci_endpoint_test.c4
-rw-r--r--tools/testing/selftests/pidfd/pidfd.h15
-rw-r--r--tools/testing/selftests/pidfd/pidfd_info_test.c73
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/jitter.sh27
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-again.sh56
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-series.sh116
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/torture.sh1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE041
-rw-r--r--tools/testing/selftests/rseq/rseq-s390.h39
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json44
-rw-r--r--tools/testing/selftests/timers/nanosleep.c55
-rw-r--r--tools/testing/selftests/timers/posix_timers.c32
-rw-r--r--tools/testing/selftests/user_events/perf_test.c2
-rw-r--r--tools/testing/selftests/vDSO/vdso_config.h4
-rw-r--r--tools/testing/selftests/vfio/lib/include/vfio_util.h46
-rw-r--r--tools/testing/selftests/vfio/lib/vfio_pci_device.c350
-rw-r--r--tools/testing/selftests/vfio/vfio_dma_mapping_test.c111
-rw-r--r--tools/testing/selftests/vfio/vfio_pci_driver_test.c12
-rwxr-xr-xtools/testing/selftests/vsock/vmtest.sh8
-rw-r--r--tools/testing/selftests/x86/test_vsyscall.c21
167 files changed, 16654 insertions, 2990 deletions
diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild
index d07f14cb7aa4..0d5ce4b74b9f 100644
--- a/tools/testing/cxl/Kbuild
+++ b/tools/testing/cxl/Kbuild
@@ -5,22 +5,19 @@ ldflags-y += --wrap=acpi_evaluate_integer
ldflags-y += --wrap=acpi_pci_find_root
ldflags-y += --wrap=nvdimm_bus_register
ldflags-y += --wrap=devm_cxl_port_enumerate_dports
-ldflags-y += --wrap=devm_cxl_setup_hdm
-ldflags-y += --wrap=devm_cxl_add_passthrough_decoder
-ldflags-y += --wrap=devm_cxl_enumerate_decoders
ldflags-y += --wrap=cxl_await_media_ready
-ldflags-y += --wrap=cxl_hdm_decode_init
-ldflags-y += --wrap=cxl_dvsec_rr_decode
ldflags-y += --wrap=devm_cxl_add_rch_dport
ldflags-y += --wrap=cxl_rcd_component_reg_phys
ldflags-y += --wrap=cxl_endpoint_parse_cdat
ldflags-y += --wrap=cxl_dport_init_ras_reporting
+ldflags-y += --wrap=devm_cxl_endpoint_decoders_setup
DRIVERS := ../../../drivers
CXL_SRC := $(DRIVERS)/cxl
CXL_CORE_SRC := $(DRIVERS)/cxl/core
ccflags-y := -I$(srctree)/drivers/cxl/
ccflags-y += -D__mock=__weak
+ccflags-y += -DCXL_TEST_ENABLE=1
ccflags-y += -DTRACE_INCLUDE_PATH=$(CXL_CORE_SRC) -I$(srctree)/drivers/cxl/core/
obj-m += cxl_acpi.o
diff --git a/tools/testing/cxl/cxl_core_exports.c b/tools/testing/cxl/cxl_core_exports.c
index f088792a8925..6754de35598d 100644
--- a/tools/testing/cxl/cxl_core_exports.c
+++ b/tools/testing/cxl/cxl_core_exports.c
@@ -2,6 +2,28 @@
/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
#include "cxl.h"
+#include "exports.h"
/* Exporting of cxl_core symbols that are only used by cxl_test */
EXPORT_SYMBOL_NS_GPL(cxl_num_decoders_committed, "CXL");
+
+cxl_add_dport_by_dev_fn _devm_cxl_add_dport_by_dev =
+ __devm_cxl_add_dport_by_dev;
+EXPORT_SYMBOL_NS_GPL(_devm_cxl_add_dport_by_dev, "CXL");
+
+struct cxl_dport *devm_cxl_add_dport_by_dev(struct cxl_port *port,
+ struct device *dport_dev)
+{
+ return _devm_cxl_add_dport_by_dev(port, dport_dev);
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_add_dport_by_dev, "CXL");
+
+cxl_switch_decoders_setup_fn _devm_cxl_switch_port_decoders_setup =
+ __devm_cxl_switch_port_decoders_setup;
+EXPORT_SYMBOL_NS_GPL(_devm_cxl_switch_port_decoders_setup, "CXL");
+
+int devm_cxl_switch_port_decoders_setup(struct cxl_port *port)
+{
+ return _devm_cxl_switch_port_decoders_setup(port);
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_switch_port_decoders_setup, "CXL");
diff --git a/tools/testing/cxl/exports.h b/tools/testing/cxl/exports.h
new file mode 100644
index 000000000000..7ebee7c0bd67
--- /dev/null
+++ b/tools/testing/cxl/exports.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2025 Intel Corporation */
+#ifndef __MOCK_CXL_EXPORTS_H_
+#define __MOCK_CXL_EXPORTS_H_
+
+typedef struct cxl_dport *(*cxl_add_dport_by_dev_fn)(struct cxl_port *port,
+ struct device *dport_dev);
+extern cxl_add_dport_by_dev_fn _devm_cxl_add_dport_by_dev;
+
+typedef int(*cxl_switch_decoders_setup_fn)(struct cxl_port *port);
+extern cxl_switch_decoders_setup_fn _devm_cxl_switch_port_decoders_setup;
+
+#endif
diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c
index 6a25cca5636f..2d135ca533d0 100644
--- a/tools/testing/cxl/test/cxl.c
+++ b/tools/testing/cxl/test/cxl.c
@@ -210,7 +210,7 @@ static struct {
},
.interleave_ways = 0,
.granularity = 4,
- .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
+ .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM |
ACPI_CEDT_CFMWS_RESTRICT_VOLATILE,
.qtg_id = FAKE_QTG_ID,
.window_size = SZ_256M * 4UL,
@@ -225,7 +225,7 @@ static struct {
},
.interleave_ways = 1,
.granularity = 4,
- .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
+ .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM |
ACPI_CEDT_CFMWS_RESTRICT_VOLATILE,
.qtg_id = FAKE_QTG_ID,
.window_size = SZ_256M * 8UL,
@@ -240,7 +240,7 @@ static struct {
},
.interleave_ways = 0,
.granularity = 4,
- .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
+ .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM |
ACPI_CEDT_CFMWS_RESTRICT_PMEM,
.qtg_id = FAKE_QTG_ID,
.window_size = SZ_256M * 4UL,
@@ -255,7 +255,7 @@ static struct {
},
.interleave_ways = 1,
.granularity = 4,
- .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
+ .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM |
ACPI_CEDT_CFMWS_RESTRICT_PMEM,
.qtg_id = FAKE_QTG_ID,
.window_size = SZ_256M * 8UL,
@@ -270,7 +270,7 @@ static struct {
},
.interleave_ways = 0,
.granularity = 4,
- .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
+ .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM |
ACPI_CEDT_CFMWS_RESTRICT_PMEM,
.qtg_id = FAKE_QTG_ID,
.window_size = SZ_256M * 4UL,
@@ -285,7 +285,7 @@ static struct {
},
.interleave_ways = 0,
.granularity = 4,
- .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
+ .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM |
ACPI_CEDT_CFMWS_RESTRICT_VOLATILE,
.qtg_id = FAKE_QTG_ID,
.window_size = SZ_256M,
@@ -302,7 +302,7 @@ static struct {
.interleave_arithmetic = ACPI_CEDT_CFMWS_ARITHMETIC_XOR,
.interleave_ways = 0,
.granularity = 4,
- .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
+ .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM |
ACPI_CEDT_CFMWS_RESTRICT_PMEM,
.qtg_id = FAKE_QTG_ID,
.window_size = SZ_256M * 8UL,
@@ -318,7 +318,7 @@ static struct {
.interleave_arithmetic = ACPI_CEDT_CFMWS_ARITHMETIC_XOR,
.interleave_ways = 1,
.granularity = 0,
- .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
+ .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM |
ACPI_CEDT_CFMWS_RESTRICT_PMEM,
.qtg_id = FAKE_QTG_ID,
.window_size = SZ_256M * 8UL,
@@ -334,7 +334,7 @@ static struct {
.interleave_arithmetic = ACPI_CEDT_CFMWS_ARITHMETIC_XOR,
.interleave_ways = 8,
.granularity = 1,
- .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
+ .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM |
ACPI_CEDT_CFMWS_RESTRICT_PMEM,
.qtg_id = FAKE_QTG_ID,
.window_size = SZ_512M * 6UL,
@@ -643,15 +643,8 @@ static struct cxl_hdm *mock_cxl_setup_hdm(struct cxl_port *port,
return cxlhdm;
}
-static int mock_cxl_add_passthrough_decoder(struct cxl_port *port)
-{
- dev_err(&port->dev, "unexpected passthrough decoder for cxl_test\n");
- return -EOPNOTSUPP;
-}
-
-
struct target_map_ctx {
- int *target_map;
+ u32 *target_map;
int index;
int target_count;
};
@@ -818,15 +811,21 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld)
*/
if (WARN_ON(!dev))
continue;
+
cxlsd = to_cxl_switch_decoder(dev);
if (i == 0) {
/* put cxl_mem.4 second in the decode order */
- if (pdev->id == 4)
+ if (pdev->id == 4) {
cxlsd->target[1] = dport;
- else
+ cxld->target_map[1] = dport->port_id;
+ } else {
cxlsd->target[0] = dport;
- } else
+ cxld->target_map[0] = dport->port_id;
+ }
+ } else {
cxlsd->target[0] = dport;
+ cxld->target_map[0] = dport->port_id;
+ }
cxld = &cxlsd->cxld;
cxld->target_type = CXL_DECODER_HOSTONLYMEM;
cxld->flags = CXL_DECODER_F_ENABLE;
@@ -863,9 +862,7 @@ static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm,
target_count = NR_CXL_SWITCH_PORTS;
for (i = 0; i < NR_CXL_PORT_DECODERS; i++) {
- int target_map[CXL_DECODER_MAX_INTERLEAVE] = { 0 };
struct target_map_ctx ctx = {
- .target_map = target_map,
.target_count = target_count,
};
struct cxl_decoder *cxld;
@@ -894,6 +891,8 @@ static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm,
cxld = &cxled->cxld;
}
+ ctx.target_map = cxld->target_map;
+
mock_init_hdm_decoder(cxld);
if (target_count) {
@@ -905,7 +904,7 @@ static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm,
}
}
- rc = cxl_decoder_add_locked(cxld, target_map);
+ rc = cxl_decoder_add_locked(cxld);
if (rc) {
put_device(&cxld->dev);
dev_err(&port->dev, "Failed to add decoder\n");
@@ -921,10 +920,42 @@ static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm,
return 0;
}
-static int mock_cxl_port_enumerate_dports(struct cxl_port *port)
+static int __mock_cxl_decoders_setup(struct cxl_port *port)
+{
+ struct cxl_hdm *cxlhdm;
+
+ cxlhdm = mock_cxl_setup_hdm(port, NULL);
+ if (IS_ERR(cxlhdm)) {
+ if (PTR_ERR(cxlhdm) != -ENODEV)
+ dev_err(&port->dev, "Failed to map HDM decoder capability\n");
+ return PTR_ERR(cxlhdm);
+ }
+
+ return mock_cxl_enumerate_decoders(cxlhdm, NULL);
+}
+
+static int mock_cxl_switch_port_decoders_setup(struct cxl_port *port)
+{
+ if (is_cxl_root(port) || is_cxl_endpoint(port))
+ return -EOPNOTSUPP;
+
+ return __mock_cxl_decoders_setup(port);
+}
+
+static int mock_cxl_endpoint_decoders_setup(struct cxl_port *port)
+{
+ if (!is_cxl_endpoint(port))
+ return -EOPNOTSUPP;
+
+ return __mock_cxl_decoders_setup(port);
+}
+
+static int get_port_array(struct cxl_port *port,
+ struct platform_device ***port_array,
+ int *port_array_size)
{
struct platform_device **array;
- int i, array_size;
+ int array_size;
if (port->depth == 1) {
if (is_multi_bridge(port->uport_dev)) {
@@ -958,6 +989,22 @@ static int mock_cxl_port_enumerate_dports(struct cxl_port *port)
return -ENXIO;
}
+ *port_array = array;
+ *port_array_size = array_size;
+
+ return 0;
+}
+
+static int mock_cxl_port_enumerate_dports(struct cxl_port *port)
+{
+ struct platform_device **array;
+ int i, array_size;
+ int rc;
+
+ rc = get_port_array(port, &array, &array_size);
+ if (rc)
+ return rc;
+
for (i = 0; i < array_size; i++) {
struct platform_device *pdev = array[i];
struct cxl_dport *dport;
@@ -979,6 +1026,36 @@ static int mock_cxl_port_enumerate_dports(struct cxl_port *port)
return 0;
}
+static struct cxl_dport *mock_cxl_add_dport_by_dev(struct cxl_port *port,
+ struct device *dport_dev)
+{
+ struct platform_device **array;
+ int rc, i, array_size;
+
+ rc = get_port_array(port, &array, &array_size);
+ if (rc)
+ return ERR_PTR(rc);
+
+ for (i = 0; i < array_size; i++) {
+ struct platform_device *pdev = array[i];
+
+ if (pdev->dev.parent != port->uport_dev) {
+ dev_dbg(&port->dev, "%s: mismatch parent %s\n",
+ dev_name(port->uport_dev),
+ dev_name(pdev->dev.parent));
+ continue;
+ }
+
+ if (&pdev->dev != dport_dev)
+ continue;
+
+ return devm_cxl_add_dport(port, &pdev->dev, pdev->id,
+ CXL_RESOURCE_NONE);
+ }
+
+ return ERR_PTR(-ENODEV);
+}
+
/*
* Faking the cxl_dpa_perf for the memdev when appropriate.
*/
@@ -1035,11 +1112,11 @@ static struct cxl_mock_ops cxl_mock_ops = {
.acpi_table_parse_cedt = mock_acpi_table_parse_cedt,
.acpi_evaluate_integer = mock_acpi_evaluate_integer,
.acpi_pci_find_root = mock_acpi_pci_find_root,
+ .devm_cxl_switch_port_decoders_setup = mock_cxl_switch_port_decoders_setup,
+ .devm_cxl_endpoint_decoders_setup = mock_cxl_endpoint_decoders_setup,
.devm_cxl_port_enumerate_dports = mock_cxl_port_enumerate_dports,
- .devm_cxl_setup_hdm = mock_cxl_setup_hdm,
- .devm_cxl_add_passthrough_decoder = mock_cxl_add_passthrough_decoder,
- .devm_cxl_enumerate_decoders = mock_cxl_enumerate_decoders,
.cxl_endpoint_parse_cdat = mock_cxl_endpoint_parse_cdat,
+ .devm_cxl_add_dport_by_dev = mock_cxl_add_dport_by_dev,
.list = LIST_HEAD_INIT(cxl_mock_ops.list),
};
diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c
index 1989ae020df3..995269a75cbd 100644
--- a/tools/testing/cxl/test/mock.c
+++ b/tools/testing/cxl/test/mock.c
@@ -10,12 +10,21 @@
#include <cxlmem.h>
#include <cxlpci.h>
#include "mock.h"
+#include "../exports.h"
static LIST_HEAD(mock);
+static struct cxl_dport *
+redirect_devm_cxl_add_dport_by_dev(struct cxl_port *port,
+ struct device *dport_dev);
+static int redirect_devm_cxl_switch_port_decoders_setup(struct cxl_port *port);
+
void register_cxl_mock_ops(struct cxl_mock_ops *ops)
{
list_add_rcu(&ops->list, &mock);
+ _devm_cxl_add_dport_by_dev = redirect_devm_cxl_add_dport_by_dev;
+ _devm_cxl_switch_port_decoders_setup =
+ redirect_devm_cxl_switch_port_decoders_setup;
}
EXPORT_SYMBOL_GPL(register_cxl_mock_ops);
@@ -23,6 +32,9 @@ DEFINE_STATIC_SRCU(cxl_mock_srcu);
void unregister_cxl_mock_ops(struct cxl_mock_ops *ops)
{
+ _devm_cxl_switch_port_decoders_setup =
+ __devm_cxl_switch_port_decoders_setup;
+ _devm_cxl_add_dport_by_dev = __devm_cxl_add_dport_by_dev;
list_del_rcu(&ops->list);
synchronize_srcu(&cxl_mock_srcu);
}
@@ -131,55 +143,34 @@ __wrap_nvdimm_bus_register(struct device *dev,
}
EXPORT_SYMBOL_GPL(__wrap_nvdimm_bus_register);
-struct cxl_hdm *__wrap_devm_cxl_setup_hdm(struct cxl_port *port,
- struct cxl_endpoint_dvsec_info *info)
-
-{
- int index;
- struct cxl_hdm *cxlhdm;
- struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
-
- if (ops && ops->is_mock_port(port->uport_dev))
- cxlhdm = ops->devm_cxl_setup_hdm(port, info);
- else
- cxlhdm = devm_cxl_setup_hdm(port, info);
- put_cxl_mock_ops(index);
-
- return cxlhdm;
-}
-EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_setup_hdm, "CXL");
-
-int __wrap_devm_cxl_add_passthrough_decoder(struct cxl_port *port)
+int redirect_devm_cxl_switch_port_decoders_setup(struct cxl_port *port)
{
int rc, index;
struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
if (ops && ops->is_mock_port(port->uport_dev))
- rc = ops->devm_cxl_add_passthrough_decoder(port);
+ rc = ops->devm_cxl_switch_port_decoders_setup(port);
else
- rc = devm_cxl_add_passthrough_decoder(port);
+ rc = __devm_cxl_switch_port_decoders_setup(port);
put_cxl_mock_ops(index);
return rc;
}
-EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_add_passthrough_decoder, "CXL");
-int __wrap_devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm,
- struct cxl_endpoint_dvsec_info *info)
+int __wrap_devm_cxl_endpoint_decoders_setup(struct cxl_port *port)
{
int rc, index;
- struct cxl_port *port = cxlhdm->port;
struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
if (ops && ops->is_mock_port(port->uport_dev))
- rc = ops->devm_cxl_enumerate_decoders(cxlhdm, info);
+ rc = ops->devm_cxl_endpoint_decoders_setup(port);
else
- rc = devm_cxl_enumerate_decoders(cxlhdm, info);
+ rc = devm_cxl_endpoint_decoders_setup(port);
put_cxl_mock_ops(index);
return rc;
}
-EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_enumerate_decoders, "CXL");
+EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_endpoint_decoders_setup, "CXL");
int __wrap_devm_cxl_port_enumerate_dports(struct cxl_port *port)
{
@@ -211,39 +202,6 @@ int __wrap_cxl_await_media_ready(struct cxl_dev_state *cxlds)
}
EXPORT_SYMBOL_NS_GPL(__wrap_cxl_await_media_ready, "CXL");
-int __wrap_cxl_hdm_decode_init(struct cxl_dev_state *cxlds,
- struct cxl_hdm *cxlhdm,
- struct cxl_endpoint_dvsec_info *info)
-{
- int rc = 0, index;
- struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
-
- if (ops && ops->is_mock_dev(cxlds->dev))
- rc = 0;
- else
- rc = cxl_hdm_decode_init(cxlds, cxlhdm, info);
- put_cxl_mock_ops(index);
-
- return rc;
-}
-EXPORT_SYMBOL_NS_GPL(__wrap_cxl_hdm_decode_init, "CXL");
-
-int __wrap_cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds,
- struct cxl_endpoint_dvsec_info *info)
-{
- int rc = 0, index;
- struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
-
- if (ops && ops->is_mock_dev(cxlds->dev))
- rc = 0;
- else
- rc = cxl_dvsec_rr_decode(cxlds, info);
- put_cxl_mock_ops(index);
-
- return rc;
-}
-EXPORT_SYMBOL_NS_GPL(__wrap_cxl_dvsec_rr_decode, "CXL");
-
struct cxl_dport *__wrap_devm_cxl_add_rch_dport(struct cxl_port *port,
struct device *dport_dev,
int port_id,
@@ -311,6 +269,22 @@ void __wrap_cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device
}
EXPORT_SYMBOL_NS_GPL(__wrap_cxl_dport_init_ras_reporting, "CXL");
+struct cxl_dport *redirect_devm_cxl_add_dport_by_dev(struct cxl_port *port,
+ struct device *dport_dev)
+{
+ int index;
+ struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
+ struct cxl_dport *dport;
+
+ if (ops && ops->is_mock_port(port->uport_dev))
+ dport = ops->devm_cxl_add_dport_by_dev(port, dport_dev);
+ else
+ dport = __devm_cxl_add_dport_by_dev(port, dport_dev);
+ put_cxl_mock_ops(index);
+
+ return dport;
+}
+
MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("cxl_test: emulation module");
MODULE_IMPORT_NS("ACPI");
diff --git a/tools/testing/cxl/test/mock.h b/tools/testing/cxl/test/mock.h
index d1b0271d2822..4ed932e76aae 100644
--- a/tools/testing/cxl/test/mock.h
+++ b/tools/testing/cxl/test/mock.h
@@ -20,12 +20,11 @@ struct cxl_mock_ops {
bool (*is_mock_port)(struct device *dev);
bool (*is_mock_dev)(struct device *dev);
int (*devm_cxl_port_enumerate_dports)(struct cxl_port *port);
- struct cxl_hdm *(*devm_cxl_setup_hdm)(
- struct cxl_port *port, struct cxl_endpoint_dvsec_info *info);
- int (*devm_cxl_add_passthrough_decoder)(struct cxl_port *port);
- int (*devm_cxl_enumerate_decoders)(
- struct cxl_hdm *hdm, struct cxl_endpoint_dvsec_info *info);
+ int (*devm_cxl_switch_port_decoders_setup)(struct cxl_port *port);
+ int (*devm_cxl_endpoint_decoders_setup)(struct cxl_port *port);
void (*cxl_endpoint_parse_cdat)(struct cxl_port *port);
+ struct cxl_dport *(*devm_cxl_add_dport_by_dev)(struct cxl_port *port,
+ struct device *dport_dev);
};
void register_cxl_mock_ops(struct cxl_mock_ops *ops);
diff --git a/tools/testing/nvdimm/test/ndtest.c b/tools/testing/nvdimm/test/ndtest.c
index 68a064ce598c..8e3b6be53839 100644
--- a/tools/testing/nvdimm/test/ndtest.c
+++ b/tools/testing/nvdimm/test/ndtest.c
@@ -850,11 +850,22 @@ static int ndtest_probe(struct platform_device *pdev)
p->dcr_dma = devm_kcalloc(&p->pdev.dev, NUM_DCR,
sizeof(dma_addr_t), GFP_KERNEL);
+ if (!p->dcr_dma) {
+ rc = -ENOMEM;
+ goto err;
+ }
p->label_dma = devm_kcalloc(&p->pdev.dev, NUM_DCR,
sizeof(dma_addr_t), GFP_KERNEL);
+ if (!p->label_dma) {
+ rc = -ENOMEM;
+ goto err;
+ }
p->dimm_dma = devm_kcalloc(&p->pdev.dev, NUM_DCR,
sizeof(dma_addr_t), GFP_KERNEL);
-
+ if (!p->dimm_dma) {
+ rc = -ENOMEM;
+ goto err;
+ }
rc = ndtest_nvdimm_init(p);
if (rc)
goto err;
diff --git a/tools/testing/selftests/arm64/fp/fp-ptrace.c b/tools/testing/selftests/arm64/fp/fp-ptrace.c
index a85c19e9524e..0114108ab25f 100644
--- a/tools/testing/selftests/arm64/fp/fp-ptrace.c
+++ b/tools/testing/selftests/arm64/fp/fp-ptrace.c
@@ -1071,7 +1071,7 @@ static bool sve_write_supported(struct test_config *config)
static bool sve_write_fpsimd_supported(struct test_config *config)
{
- if (!sve_supported())
+ if (!sve_supported() && !sme_supported())
return false;
if ((config->svcr_in & SVCR_ZA) != (config->svcr_expected & SVCR_ZA))
@@ -1231,9 +1231,6 @@ static void sve_write_fpsimd(pid_t child, struct test_config *config)
vl = vl_expected(config);
vq = __sve_vq_from_vl(vl);
- if (!vl)
- return;
-
iov.iov_len = SVE_PT_SIZE(vq, SVE_PT_REGS_FPSIMD);
iov.iov_base = malloc(iov.iov_len);
if (!iov.iov_base) {
diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c
index e0fc3a001e28..f44d44618575 100644
--- a/tools/testing/selftests/arm64/fp/sve-ptrace.c
+++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c
@@ -394,6 +394,58 @@ out:
free(svebuf);
}
+/* Write the FPSIMD registers via the SVE regset when SVE is not supported */
+static void ptrace_sve_fpsimd_no_sve(pid_t child)
+{
+ void *svebuf;
+ struct user_sve_header *sve;
+ struct user_fpsimd_state *fpsimd, new_fpsimd;
+ unsigned int i, j;
+ unsigned char *p;
+ int ret;
+
+ svebuf = malloc(SVE_PT_SIZE(0, SVE_PT_REGS_FPSIMD));
+ if (!svebuf) {
+ ksft_test_result_fail("Failed to allocate FPSIMD buffer\n");
+ return;
+ }
+
+ /* On a system without SVE the VL should be set to 0 */
+ memset(svebuf, 0, SVE_PT_SIZE(0, SVE_PT_REGS_FPSIMD));
+ sve = svebuf;
+ sve->flags = SVE_PT_REGS_FPSIMD;
+ sve->size = SVE_PT_SIZE(0, SVE_PT_REGS_FPSIMD);
+ sve->vl = 0;
+
+ /* Try to set a known FPSIMD state via PT_REGS_SVE */
+ fpsimd = (struct user_fpsimd_state *)((char *)sve +
+ SVE_PT_FPSIMD_OFFSET);
+ for (i = 0; i < 32; ++i) {
+ p = (unsigned char *)&fpsimd->vregs[i];
+
+ for (j = 0; j < sizeof(fpsimd->vregs[i]); ++j)
+ p[j] = j;
+ }
+
+ ret = set_sve(child, &vec_types[0], sve);
+ ksft_test_result(ret == 0, "FPSIMD write via SVE\n");
+ if (ret) {
+ ksft_test_result_skip("Verify FPSIMD write via SVE\n");
+ goto out;
+ }
+
+ /* Verify via the FPSIMD regset */
+ if (get_fpsimd(child, &new_fpsimd)) {
+ ksft_test_result_skip("Verify FPSIMD write via SVE\n");
+ goto out;
+ }
+ ksft_test_result(memcmp(fpsimd, &new_fpsimd, sizeof(*fpsimd)) == 0,
+ "Verify FPSIMD write via SVE\n");
+
+out:
+ free(svebuf);
+}
+
/* Validate attempting to set SVE data and read SVE data */
static void ptrace_set_sve_get_sve_data(pid_t child,
const struct vec_type *type,
@@ -826,6 +878,15 @@ static int do_parent(pid_t child)
}
}
+ /* We support SVE writes of FPSMID format on SME only systems */
+ if (!(getauxval(AT_HWCAP) & HWCAP_SVE) &&
+ (getauxval(AT_HWCAP2) & HWCAP2_SME)) {
+ ptrace_sve_fpsimd_no_sve(child);
+ } else {
+ ksft_test_result_skip("FPSIMD write via SVE\n");
+ ksft_test_result_skip("Verify FPSIMD write via SVE\n");
+ }
+
ret = EXIT_SUCCESS;
error:
diff --git a/tools/testing/selftests/arm64/fp/zt-test.S b/tools/testing/selftests/arm64/fp/zt-test.S
index 38080f3c3280..a8df05771670 100644
--- a/tools/testing/selftests/arm64/fp/zt-test.S
+++ b/tools/testing/selftests/arm64/fp/zt-test.S
@@ -276,7 +276,7 @@ function barf
bl putdec
puts ", iteration="
mov x0, x22
- bl putdec
+ bl putdecn
puts "\tExpected ["
mov x0, x10
mov x1, x12
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 70b28c1e653e..f2a2fd236ca8 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -50,6 +50,7 @@ CONFIG_IPV6_SIT=y
CONFIG_IPV6_TUNNEL=y
CONFIG_KEYS=y
CONFIG_LIRC=y
+CONFIG_LIVEPATCH=y
CONFIG_LWTUNNEL=y
CONFIG_MODULE_SIG=y
CONFIG_MODULE_SRCVERSION_ALL=y
@@ -111,6 +112,8 @@ CONFIG_IP6_NF_FILTER=y
CONFIG_NF_NAT=y
CONFIG_PACKET=y
CONFIG_RC_CORE=y
+CONFIG_SAMPLES=y
+CONFIG_SAMPLE_LIVEPATCH=m
CONFIG_SECURITY=y
CONFIG_SECURITYFS=y
CONFIG_SYN_COOKIES=y
diff --git a/tools/testing/selftests/bpf/prog_tests/arg_parsing.c b/tools/testing/selftests/bpf/prog_tests/arg_parsing.c
index bb143de68875..e27d66b75fb1 100644
--- a/tools/testing/selftests/bpf/prog_tests/arg_parsing.c
+++ b/tools/testing/selftests/bpf/prog_tests/arg_parsing.c
@@ -144,11 +144,17 @@ static void test_parse_test_list_file(void)
if (!ASSERT_OK(ferror(fp), "prepare tmp"))
goto out_fclose;
+ if (!ASSERT_OK(fsync(fileno(fp)), "fsync tmp"))
+ goto out_fclose;
+
init_test_filter_set(&set);
- ASSERT_OK(parse_test_list_file(tmpfile, &set, true), "parse file");
+ if (!ASSERT_OK(parse_test_list_file(tmpfile, &set, true), "parse file"))
+ goto out_fclose;
+
+ if (!ASSERT_EQ(set.cnt, 4, "test count"))
+ goto out_free_set;
- ASSERT_EQ(set.cnt, 4, "test count");
ASSERT_OK(strcmp("test_with_spaces", set.tests[0].name), "test 0 name");
ASSERT_EQ(set.tests[0].subtest_cnt, 0, "test 0 subtest count");
ASSERT_OK(strcmp("testA", set.tests[1].name), "test 1 name");
@@ -158,8 +164,8 @@ static void test_parse_test_list_file(void)
ASSERT_OK(strcmp("testB", set.tests[2].name), "test 2 name");
ASSERT_OK(strcmp("testC_no_eof_newline", set.tests[3].name), "test 3 name");
+out_free_set:
free_test_filter_set(&set);
-
out_fclose:
fclose(fp);
out_remove:
diff --git a/tools/testing/selftests/bpf/prog_tests/livepatch_trampoline.c b/tools/testing/selftests/bpf/prog_tests/livepatch_trampoline.c
new file mode 100644
index 000000000000..72aa5376c30e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/livepatch_trampoline.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include "testing_helpers.h"
+#include "livepatch_trampoline.skel.h"
+
+static int load_livepatch(void)
+{
+ char path[4096];
+
+ /* CI will set KBUILD_OUTPUT */
+ snprintf(path, sizeof(path), "%s/samples/livepatch/livepatch-sample.ko",
+ getenv("KBUILD_OUTPUT") ? : "../../../..");
+
+ return load_module(path, env_verbosity > VERBOSE_NONE);
+}
+
+static void unload_livepatch(void)
+{
+ /* Disable the livepatch before unloading the module */
+ system("echo 0 > /sys/kernel/livepatch/livepatch_sample/enabled");
+
+ unload_module("livepatch_sample", env_verbosity > VERBOSE_NONE);
+}
+
+static void read_proc_cmdline(void)
+{
+ char buf[4096];
+ int fd, ret;
+
+ fd = open("/proc/cmdline", O_RDONLY);
+ if (!ASSERT_OK_FD(fd, "open /proc/cmdline"))
+ return;
+
+ ret = read(fd, buf, sizeof(buf));
+ if (!ASSERT_GT(ret, 0, "read /proc/cmdline"))
+ goto out;
+
+ ASSERT_OK(strncmp(buf, "this has been live patched", 26), "strncmp");
+
+out:
+ close(fd);
+}
+
+static void __test_livepatch_trampoline(bool fexit_first)
+{
+ struct livepatch_trampoline *skel = NULL;
+ int err;
+
+ skel = livepatch_trampoline__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ goto out;
+
+ skel->bss->my_pid = getpid();
+
+ if (!fexit_first) {
+ /* fentry program is loaded first by default */
+ err = livepatch_trampoline__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto out;
+ } else {
+ /* Manually load fexit program first. */
+ skel->links.fexit_cmdline = bpf_program__attach(skel->progs.fexit_cmdline);
+ if (!ASSERT_OK_PTR(skel->links.fexit_cmdline, "attach_fexit"))
+ goto out;
+
+ skel->links.fentry_cmdline = bpf_program__attach(skel->progs.fentry_cmdline);
+ if (!ASSERT_OK_PTR(skel->links.fentry_cmdline, "attach_fentry"))
+ goto out;
+ }
+
+ read_proc_cmdline();
+
+ ASSERT_EQ(skel->bss->fentry_hit, 1, "fentry_hit");
+ ASSERT_EQ(skel->bss->fexit_hit, 1, "fexit_hit");
+out:
+ livepatch_trampoline__destroy(skel);
+}
+
+void test_livepatch_trampoline(void)
+{
+ int retry_cnt = 0;
+
+retry:
+ if (load_livepatch()) {
+ if (retry_cnt) {
+ ASSERT_OK(1, "load_livepatch");
+ goto out;
+ }
+ /*
+ * Something else (previous run of the same test?) loaded
+ * the KLP module. Unload the KLP module and retry.
+ */
+ unload_livepatch();
+ retry_cnt++;
+ goto retry;
+ }
+
+ if (test__start_subtest("fentry_first"))
+ __test_livepatch_trampoline(false);
+
+ if (test__start_subtest("fexit_first"))
+ __test_livepatch_trampoline(true);
+out:
+ unload_livepatch();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c
index f8eb7f9d4fd2..8fade8bdc451 100644
--- a/tools/testing/selftests/bpf/prog_tests/mptcp.c
+++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
@@ -6,11 +6,13 @@
#include <netinet/in.h>
#include <test_progs.h>
#include <unistd.h>
+#include <errno.h>
#include "cgroup_helpers.h"
#include "network_helpers.h"
#include "mptcp_sock.skel.h"
#include "mptcpify.skel.h"
#include "mptcp_subflow.skel.h"
+#include "mptcp_sockmap.skel.h"
#define NS_TEST "mptcp_ns"
#define ADDR_1 "10.0.1.1"
@@ -436,6 +438,142 @@ close_cgroup:
close(cgroup_fd);
}
+/* Test sockmap on MPTCP server handling non-mp-capable clients. */
+static void test_sockmap_with_mptcp_fallback(struct mptcp_sockmap *skel)
+{
+ int listen_fd = -1, client_fd1 = -1, client_fd2 = -1;
+ int server_fd1 = -1, server_fd2 = -1, sent, recvd;
+ char snd[9] = "123456789";
+ char rcv[10];
+
+ /* start server with MPTCP enabled */
+ listen_fd = start_mptcp_server(AF_INET, NULL, 0, 0);
+ if (!ASSERT_OK_FD(listen_fd, "sockmap-fb:start_mptcp_server"))
+ return;
+
+ skel->bss->trace_port = ntohs(get_socket_local_port(listen_fd));
+ skel->bss->sk_index = 0;
+ /* create client without MPTCP enabled */
+ client_fd1 = connect_to_fd_opts(listen_fd, NULL);
+ if (!ASSERT_OK_FD(client_fd1, "sockmap-fb:connect_to_fd"))
+ goto end;
+
+ server_fd1 = accept(listen_fd, NULL, 0);
+ skel->bss->sk_index = 1;
+ client_fd2 = connect_to_fd_opts(listen_fd, NULL);
+ if (!ASSERT_OK_FD(client_fd2, "sockmap-fb:connect_to_fd"))
+ goto end;
+
+ server_fd2 = accept(listen_fd, NULL, 0);
+ /* test normal redirect behavior: data sent by client_fd1 can be
+ * received by client_fd2
+ */
+ skel->bss->redirect_idx = 1;
+ sent = send(client_fd1, snd, sizeof(snd), 0);
+ if (!ASSERT_EQ(sent, sizeof(snd), "sockmap-fb:send(client_fd1)"))
+ goto end;
+
+ /* try to recv more bytes to avoid truncation check */
+ recvd = recv(client_fd2, rcv, sizeof(rcv), 0);
+ if (!ASSERT_EQ(recvd, sizeof(snd), "sockmap-fb:recv(client_fd2)"))
+ goto end;
+
+end:
+ if (client_fd1 >= 0)
+ close(client_fd1);
+ if (client_fd2 >= 0)
+ close(client_fd2);
+ if (server_fd1 >= 0)
+ close(server_fd1);
+ if (server_fd2 >= 0)
+ close(server_fd2);
+ close(listen_fd);
+}
+
+/* Test sockmap rejection of MPTCP sockets - both server and client sides. */
+static void test_sockmap_reject_mptcp(struct mptcp_sockmap *skel)
+{
+ int listen_fd = -1, server_fd = -1, client_fd1 = -1;
+ int err, zero = 0;
+
+ /* start server with MPTCP enabled */
+ listen_fd = start_mptcp_server(AF_INET, NULL, 0, 0);
+ if (!ASSERT_OK_FD(listen_fd, "start_mptcp_server"))
+ return;
+
+ skel->bss->trace_port = ntohs(get_socket_local_port(listen_fd));
+ skel->bss->sk_index = 0;
+ /* create client with MPTCP enabled */
+ client_fd1 = connect_to_fd(listen_fd, 0);
+ if (!ASSERT_OK_FD(client_fd1, "connect_to_fd client_fd1"))
+ goto end;
+
+ /* bpf_sock_map_update() called from sockops should reject MPTCP sk */
+ if (!ASSERT_EQ(skel->bss->helper_ret, -EOPNOTSUPP, "should reject"))
+ goto end;
+
+ server_fd = accept(listen_fd, NULL, 0);
+ err = bpf_map_update_elem(bpf_map__fd(skel->maps.sock_map),
+ &zero, &server_fd, BPF_NOEXIST);
+ if (!ASSERT_EQ(err, -EOPNOTSUPP, "server should be disallowed"))
+ goto end;
+
+ /* MPTCP client should also be disallowed */
+ err = bpf_map_update_elem(bpf_map__fd(skel->maps.sock_map),
+ &zero, &client_fd1, BPF_NOEXIST);
+ if (!ASSERT_EQ(err, -EOPNOTSUPP, "client should be disallowed"))
+ goto end;
+end:
+ if (client_fd1 >= 0)
+ close(client_fd1);
+ if (server_fd >= 0)
+ close(server_fd);
+ close(listen_fd);
+}
+
+static void test_mptcp_sockmap(void)
+{
+ struct mptcp_sockmap *skel;
+ struct netns_obj *netns;
+ int cgroup_fd, err;
+
+ cgroup_fd = test__join_cgroup("/mptcp_sockmap");
+ if (!ASSERT_OK_FD(cgroup_fd, "join_cgroup: mptcp_sockmap"))
+ return;
+
+ skel = mptcp_sockmap__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_load: mptcp_sockmap"))
+ goto close_cgroup;
+
+ skel->links.mptcp_sockmap_inject =
+ bpf_program__attach_cgroup(skel->progs.mptcp_sockmap_inject, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.mptcp_sockmap_inject, "attach sockmap"))
+ goto skel_destroy;
+
+ err = bpf_prog_attach(bpf_program__fd(skel->progs.mptcp_sockmap_redirect),
+ bpf_map__fd(skel->maps.sock_map),
+ BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach stream verdict"))
+ goto skel_destroy;
+
+ netns = netns_new(NS_TEST, true);
+ if (!ASSERT_OK_PTR(netns, "netns_new: mptcp_sockmap"))
+ goto skel_destroy;
+
+ if (endpoint_init("subflow") < 0)
+ goto close_netns;
+
+ test_sockmap_with_mptcp_fallback(skel);
+ test_sockmap_reject_mptcp(skel);
+
+close_netns:
+ netns_free(netns);
+skel_destroy:
+ mptcp_sockmap__destroy(skel);
+close_cgroup:
+ close(cgroup_fd);
+}
+
void test_mptcp(void)
{
if (test__start_subtest("base"))
@@ -444,4 +582,6 @@ void test_mptcp(void)
test_mptcpify();
if (test__start_subtest("subflow"))
test_subflow();
+ if (test__start_subtest("sockmap"))
+ test_mptcp_sockmap();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/pinning_htab.c b/tools/testing/selftests/bpf/prog_tests/pinning_htab.c
new file mode 100644
index 000000000000..16bd74be3dbe
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/pinning_htab.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "test_pinning_htab.skel.h"
+
+static void unpin_map(const char *map_name, const char *pin_path)
+{
+ struct test_pinning_htab *skel;
+ struct bpf_map *map;
+ int err;
+
+ skel = test_pinning_htab__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel open_and_load"))
+ return;
+
+ map = bpf_object__find_map_by_name(skel->obj, map_name);
+ if (!ASSERT_OK_PTR(map, "bpf_object__find_map_by_name"))
+ goto out;
+
+ err = bpf_map__pin(map, pin_path);
+ if (!ASSERT_OK(err, "bpf_map__pin"))
+ goto out;
+
+ err = bpf_map__unpin(map, pin_path);
+ ASSERT_OK(err, "bpf_map__unpin");
+out:
+ test_pinning_htab__destroy(skel);
+}
+
+void test_pinning_htab(void)
+{
+ if (test__start_subtest("timer_prealloc"))
+ unpin_map("timer_prealloc", "/sys/fs/bpf/timer_prealloc");
+ if (test__start_subtest("timer_no_prealloc"))
+ unpin_map("timer_no_prealloc", "/sys/fs/bpf/timer_no_prealloc");
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c
new file mode 100644
index 000000000000..c9efdd2a5b18
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "stacktrace_ips.skel.h"
+
+#ifdef __x86_64__
+static int check_stacktrace_ips(int fd, __u32 key, int cnt, ...)
+{
+ __u64 ips[PERF_MAX_STACK_DEPTH];
+ struct ksyms *ksyms = NULL;
+ int i, err = 0;
+ va_list args;
+
+ /* sorted by addr */
+ ksyms = load_kallsyms_local();
+ if (!ASSERT_OK_PTR(ksyms, "load_kallsyms_local"))
+ return -1;
+
+ /* unlikely, but... */
+ if (!ASSERT_LT(cnt, PERF_MAX_STACK_DEPTH, "check_max"))
+ return -1;
+
+ err = bpf_map_lookup_elem(fd, &key, ips);
+ if (err)
+ goto out;
+
+ /*
+ * Compare all symbols provided via arguments with stacktrace ips,
+ * and their related symbol addresses.t
+ */
+ va_start(args, cnt);
+
+ for (i = 0; i < cnt; i++) {
+ unsigned long val;
+ struct ksym *ksym;
+
+ val = va_arg(args, unsigned long);
+ ksym = ksym_search_local(ksyms, ips[i]);
+ if (!ASSERT_OK_PTR(ksym, "ksym_search_local"))
+ break;
+ ASSERT_EQ(ksym->addr, val, "stack_cmp");
+ }
+
+ va_end(args);
+
+out:
+ free_kallsyms_local(ksyms);
+ return err;
+}
+
+static void test_stacktrace_ips_kprobe_multi(bool retprobe)
+{
+ LIBBPF_OPTS(bpf_kprobe_multi_opts, opts,
+ .retprobe = retprobe
+ );
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct stacktrace_ips *skel;
+
+ skel = stacktrace_ips__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "stacktrace_ips__open_and_load"))
+ return;
+
+ if (!skel->kconfig->CONFIG_UNWINDER_ORC) {
+ test__skip();
+ goto cleanup;
+ }
+
+ skel->links.kprobe_multi_test = bpf_program__attach_kprobe_multi_opts(
+ skel->progs.kprobe_multi_test,
+ "bpf_testmod_stacktrace_test", &opts);
+ if (!ASSERT_OK_PTR(skel->links.kprobe_multi_test, "bpf_program__attach_kprobe_multi_opts"))
+ goto cleanup;
+
+ trigger_module_test_read(1);
+
+ load_kallsyms();
+
+ check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 4,
+ ksym_get_addr("bpf_testmod_stacktrace_test_3"),
+ ksym_get_addr("bpf_testmod_stacktrace_test_2"),
+ ksym_get_addr("bpf_testmod_stacktrace_test_1"),
+ ksym_get_addr("bpf_testmod_test_read"));
+
+cleanup:
+ stacktrace_ips__destroy(skel);
+}
+
+static void test_stacktrace_ips_raw_tp(void)
+{
+ __u32 info_len = sizeof(struct bpf_prog_info);
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct bpf_prog_info info = {};
+ struct stacktrace_ips *skel;
+ __u64 bpf_prog_ksym = 0;
+ int err;
+
+ skel = stacktrace_ips__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "stacktrace_ips__open_and_load"))
+ return;
+
+ if (!skel->kconfig->CONFIG_UNWINDER_ORC) {
+ test__skip();
+ goto cleanup;
+ }
+
+ skel->links.rawtp_test = bpf_program__attach_raw_tracepoint(
+ skel->progs.rawtp_test,
+ "bpf_testmod_test_read");
+ if (!ASSERT_OK_PTR(skel->links.rawtp_test, "bpf_program__attach_raw_tracepoint"))
+ goto cleanup;
+
+ /* get bpf program address */
+ info.jited_ksyms = ptr_to_u64(&bpf_prog_ksym);
+ info.nr_jited_ksyms = 1;
+ err = bpf_prog_get_info_by_fd(bpf_program__fd(skel->progs.rawtp_test),
+ &info, &info_len);
+ if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd"))
+ goto cleanup;
+
+ trigger_module_test_read(1);
+
+ load_kallsyms();
+
+ check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 2,
+ bpf_prog_ksym,
+ ksym_get_addr("bpf_trace_run2"));
+
+cleanup:
+ stacktrace_ips__destroy(skel);
+}
+
+static void __test_stacktrace_ips(void)
+{
+ if (test__start_subtest("kprobe_multi"))
+ test_stacktrace_ips_kprobe_multi(false);
+ if (test__start_subtest("kretprobe_multi"))
+ test_stacktrace_ips_kprobe_multi(true);
+ if (test__start_subtest("raw_tp"))
+ test_stacktrace_ips_raw_tp();
+}
+#else
+static void __test_stacktrace_ips(void)
+{
+ test__skip();
+}
+#endif
+
+void test_stacktrace_ips(void)
+{
+ __test_stacktrace_ips();
+}
diff --git a/tools/testing/selftests/bpf/progs/iters_looping.c b/tools/testing/selftests/bpf/progs/iters_looping.c
index 05fa5ce7fc59..d00fd570255a 100644
--- a/tools/testing/selftests/bpf/progs/iters_looping.c
+++ b/tools/testing/selftests/bpf/progs/iters_looping.c
@@ -161,3 +161,56 @@ int simplest_loop(void *ctx)
return 0;
}
+
+__used
+static void iterator_with_diff_stack_depth(int x)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ "if r1 == 42 goto 0f;"
+ "*(u64 *)(r10 - 128) = 0;"
+ "0:"
+ /* create iterator */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "1:"
+ /* consume next item */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto 2f;"
+ "goto 1b;"
+ "2:"
+ /* destroy iterator */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+ :
+ : __imm_ptr(iter), ITER_HELPERS
+ : __clobber_common, "r6"
+ );
+}
+
+SEC("socket")
+__success
+__naked int widening_stack_size_bug(void *ctx)
+{
+ /*
+ * Depending on iterator_with_diff_stack_depth() parameter value,
+ * subprogram stack depth is either 8 or 128 bytes. Arrange values so
+ * that it is 128 on a first call and 8 on a second. This triggered a
+ * bug in verifier's widen_imprecise_scalars() logic.
+ */
+ asm volatile (
+ "r6 = 0;"
+ "r1 = 0;"
+ "1:"
+ "call iterator_with_diff_stack_depth;"
+ "r1 = 42;"
+ "r6 += 1;"
+ "if r6 < 2 goto 1b;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
diff --git a/tools/testing/selftests/bpf/progs/livepatch_trampoline.c b/tools/testing/selftests/bpf/progs/livepatch_trampoline.c
new file mode 100644
index 000000000000..15579d5bcd91
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/livepatch_trampoline.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+int fentry_hit;
+int fexit_hit;
+int my_pid;
+
+SEC("fentry/cmdline_proc_show")
+int BPF_PROG(fentry_cmdline)
+{
+ if (my_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ fentry_hit = 1;
+ return 0;
+}
+
+SEC("fexit/cmdline_proc_show")
+int BPF_PROG(fexit_cmdline)
+{
+ if (my_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ fexit_hit = 1;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/mptcp_sockmap.c b/tools/testing/selftests/bpf/progs/mptcp_sockmap.c
new file mode 100644
index 000000000000..d4eef0cbadb9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/mptcp_sockmap.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bpf_tracing_net.h"
+
+char _license[] SEC("license") = "GPL";
+
+int sk_index;
+int redirect_idx;
+int trace_port;
+int helper_ret;
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+ __uint(max_entries, 100);
+} sock_map SEC(".maps");
+
+SEC("sockops")
+int mptcp_sockmap_inject(struct bpf_sock_ops *skops)
+{
+ struct bpf_sock *sk;
+
+ /* only accept specified connection */
+ if (skops->local_port != trace_port ||
+ skops->op != BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB)
+ return 1;
+
+ sk = skops->sk;
+ if (!sk)
+ return 1;
+
+ /* update sk handler */
+ helper_ret = bpf_sock_map_update(skops, &sock_map, &sk_index, BPF_NOEXIST);
+
+ return 1;
+}
+
+SEC("sk_skb/stream_verdict")
+int mptcp_sockmap_redirect(struct __sk_buff *skb)
+{
+ /* redirect skb to the sk under sock_map[redirect_idx] */
+ return bpf_sk_redirect_map(skb, &sock_map, redirect_idx, 0);
+}
diff --git a/tools/testing/selftests/bpf/progs/stacktrace_ips.c b/tools/testing/selftests/bpf/progs/stacktrace_ips.c
new file mode 100644
index 000000000000..a96c8150d7f5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/stacktrace_ips.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#ifndef PERF_MAX_STACK_DEPTH
+#define PERF_MAX_STACK_DEPTH 127
+#endif
+
+typedef __u64 stack_trace_t[PERF_MAX_STACK_DEPTH];
+
+struct {
+ __uint(type, BPF_MAP_TYPE_STACK_TRACE);
+ __uint(max_entries, 16384);
+ __type(key, __u32);
+ __type(value, stack_trace_t);
+} stackmap SEC(".maps");
+
+extern bool CONFIG_UNWINDER_ORC __kconfig __weak;
+
+/*
+ * This function is here to have CONFIG_UNWINDER_ORC
+ * used and added to object BTF.
+ */
+int unused(void)
+{
+ return CONFIG_UNWINDER_ORC ? 0 : 1;
+}
+
+__u32 stack_key;
+
+SEC("kprobe.multi")
+int kprobe_multi_test(struct pt_regs *ctx)
+{
+ stack_key = bpf_get_stackid(ctx, &stackmap, 0);
+ return 0;
+}
+
+SEC("raw_tp/bpf_testmod_test_read")
+int rawtp_test(void *ctx)
+{
+ /* Skip ebpf program entry in the stack. */
+ stack_key = bpf_get_stackid(ctx, &stackmap, 0);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/stream_fail.c b/tools/testing/selftests/bpf/progs/stream_fail.c
index b4a0d0cc8ec8..3662515f0107 100644
--- a/tools/testing/selftests/bpf/progs/stream_fail.c
+++ b/tools/testing/selftests/bpf/progs/stream_fail.c
@@ -10,7 +10,7 @@ SEC("syscall")
__failure __msg("Possibly NULL pointer passed")
int stream_vprintk_null_arg(void *ctx)
{
- bpf_stream_vprintk(BPF_STDOUT, "", NULL, 0, NULL);
+ bpf_stream_vprintk_impl(BPF_STDOUT, "", NULL, 0, NULL);
return 0;
}
@@ -18,7 +18,7 @@ SEC("syscall")
__failure __msg("R3 type=scalar expected=")
int stream_vprintk_scalar_arg(void *ctx)
{
- bpf_stream_vprintk(BPF_STDOUT, "", (void *)46, 0, NULL);
+ bpf_stream_vprintk_impl(BPF_STDOUT, "", (void *)46, 0, NULL);
return 0;
}
@@ -26,7 +26,7 @@ SEC("syscall")
__failure __msg("arg#1 doesn't point to a const string")
int stream_vprintk_string_arg(void *ctx)
{
- bpf_stream_vprintk(BPF_STDOUT, ctx, NULL, 0, NULL);
+ bpf_stream_vprintk_impl(BPF_STDOUT, ctx, NULL, 0, NULL);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/task_work.c b/tools/testing/selftests/bpf/progs/task_work.c
index 23217f06a3ec..663a80990f8f 100644
--- a/tools/testing/selftests/bpf/progs/task_work.c
+++ b/tools/testing/selftests/bpf/progs/task_work.c
@@ -66,7 +66,7 @@ int oncpu_hash_map(struct pt_regs *args)
if (!work)
return 0;
- bpf_task_work_schedule_resume(task, &work->tw, &hmap, process_work, NULL);
+ bpf_task_work_schedule_resume_impl(task, &work->tw, &hmap, process_work, NULL);
return 0;
}
@@ -80,7 +80,7 @@ int oncpu_array_map(struct pt_regs *args)
work = bpf_map_lookup_elem(&arrmap, &key);
if (!work)
return 0;
- bpf_task_work_schedule_signal(task, &work->tw, &arrmap, process_work, NULL);
+ bpf_task_work_schedule_signal_impl(task, &work->tw, &arrmap, process_work, NULL);
return 0;
}
@@ -102,6 +102,6 @@ int oncpu_lru_map(struct pt_regs *args)
work = bpf_map_lookup_elem(&lrumap, &key);
if (!work || work->data[0])
return 0;
- bpf_task_work_schedule_resume(task, &work->tw, &lrumap, process_work, NULL);
+ bpf_task_work_schedule_resume_impl(task, &work->tw, &lrumap, process_work, NULL);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/task_work_fail.c b/tools/testing/selftests/bpf/progs/task_work_fail.c
index 77fe8f28facd..1270953fd092 100644
--- a/tools/testing/selftests/bpf/progs/task_work_fail.c
+++ b/tools/testing/selftests/bpf/progs/task_work_fail.c
@@ -53,7 +53,7 @@ int mismatch_map(struct pt_regs *args)
work = bpf_map_lookup_elem(&arrmap, &key);
if (!work)
return 0;
- bpf_task_work_schedule_resume(task, &work->tw, &hmap, process_work, NULL);
+ bpf_task_work_schedule_resume_impl(task, &work->tw, &hmap, process_work, NULL);
return 0;
}
@@ -65,7 +65,7 @@ int no_map_task_work(struct pt_regs *args)
struct bpf_task_work tw;
task = bpf_get_current_task_btf();
- bpf_task_work_schedule_resume(task, &tw, &hmap, process_work, NULL);
+ bpf_task_work_schedule_resume_impl(task, &tw, &hmap, process_work, NULL);
return 0;
}
@@ -76,7 +76,7 @@ int task_work_null(struct pt_regs *args)
struct task_struct *task;
task = bpf_get_current_task_btf();
- bpf_task_work_schedule_resume(task, NULL, &hmap, process_work, NULL);
+ bpf_task_work_schedule_resume_impl(task, NULL, &hmap, process_work, NULL);
return 0;
}
@@ -91,6 +91,6 @@ int map_null(struct pt_regs *args)
work = bpf_map_lookup_elem(&arrmap, &key);
if (!work)
return 0;
- bpf_task_work_schedule_resume(task, &work->tw, NULL, process_work, NULL);
+ bpf_task_work_schedule_resume_impl(task, &work->tw, NULL, process_work, NULL);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/task_work_stress.c b/tools/testing/selftests/bpf/progs/task_work_stress.c
index 90fca06fff56..55e555f7f41b 100644
--- a/tools/testing/selftests/bpf/progs/task_work_stress.c
+++ b/tools/testing/selftests/bpf/progs/task_work_stress.c
@@ -51,8 +51,8 @@ int schedule_task_work(void *ctx)
if (!work)
return 0;
}
- err = bpf_task_work_schedule_signal(bpf_get_current_task_btf(), &work->tw, &hmap,
- process_work, NULL);
+ err = bpf_task_work_schedule_signal_impl(bpf_get_current_task_btf(), &work->tw, &hmap,
+ process_work, NULL);
if (err)
__sync_fetch_and_add(&schedule_error, 1);
else
diff --git a/tools/testing/selftests/bpf/progs/test_pinning_htab.c b/tools/testing/selftests/bpf/progs/test_pinning_htab.c
new file mode 100644
index 000000000000..ae227930c73c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_pinning_htab.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct timer_val {
+ struct bpf_timer timer;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u32);
+ __type(value, struct timer_val);
+ __uint(max_entries, 1);
+} timer_prealloc SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u32);
+ __type(value, struct timer_val);
+ __uint(max_entries, 1);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+} timer_no_prealloc SEC(".maps");
diff --git a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c
index 6630a92b1b47..1204fbc58178 100644
--- a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c
+++ b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c
@@ -225,7 +225,7 @@ int trusted_to_untrusted(void *ctx)
}
char mem[16];
-u32 off;
+u32 offset;
SEC("tp_btf/sys_enter")
__success
@@ -240,9 +240,9 @@ int anything_to_untrusted(void *ctx)
/* scalar to untrusted */
subprog_untrusted(0);
/* variable offset to untrusted (map) */
- subprog_untrusted((void *)mem + off);
+ subprog_untrusted((void *)mem + offset);
/* variable offset to untrusted (trusted) */
- subprog_untrusted((void *)bpf_get_current_task_btf() + off);
+ subprog_untrusted((void *)bpf_get_current_task_btf() + offset);
return 0;
}
@@ -298,12 +298,12 @@ int anything_to_untrusted_mem(void *ctx)
/* scalar to untrusted mem */
subprog_void_untrusted(0);
/* variable offset to untrusted mem (map) */
- subprog_void_untrusted((void *)mem + off);
+ subprog_void_untrusted((void *)mem + offset);
/* variable offset to untrusted mem (trusted) */
- subprog_void_untrusted(bpf_get_current_task_btf() + off);
+ subprog_void_untrusted(bpf_get_current_task_btf() + offset);
/* variable offset to untrusted char/enum (map) */
- subprog_char_untrusted(mem + off);
- subprog_enum_untrusted((void *)mem + off);
+ subprog_char_untrusted(mem + offset);
+ subprog_enum_untrusted((void *)mem + offset);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c b/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c
index 3e2d76ee8050..55398c04290a 100644
--- a/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c
+++ b/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c
@@ -70,7 +70,7 @@ __success
int BPF_PROG(path_d_path_from_file_argument, struct file *file)
{
int ret;
- struct path *path;
+ const struct path *path;
/* The f_path member is a path which is embedded directly within a
* file. Therefore, a pointer to such embedded members are still
diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
index 8074bc5f6f20..ed0a4721d8fd 100644
--- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
@@ -417,6 +417,30 @@ noinline int bpf_testmod_fentry_test11(u64 a, void *b, short c, int d,
return a + (long)b + c + d + (long)e + f + g + h + i + j + k;
}
+noinline void bpf_testmod_stacktrace_test(void)
+{
+ /* used for stacktrace test as attach function */
+ asm volatile ("");
+}
+
+noinline void bpf_testmod_stacktrace_test_3(void)
+{
+ bpf_testmod_stacktrace_test();
+ asm volatile ("");
+}
+
+noinline void bpf_testmod_stacktrace_test_2(void)
+{
+ bpf_testmod_stacktrace_test_3();
+ asm volatile ("");
+}
+
+noinline void bpf_testmod_stacktrace_test_1(void)
+{
+ bpf_testmod_stacktrace_test_2();
+ asm volatile ("");
+}
+
int bpf_testmod_fentry_ok;
noinline ssize_t
@@ -497,6 +521,8 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj,
21, 22, 23, 24, 25, 26) != 231)
goto out;
+ bpf_testmod_stacktrace_test_1();
+
bpf_testmod_fentry_ok = 1;
out:
return -EIO; /* always fail */
diff --git a/tools/testing/selftests/cachestat/.gitignore b/tools/testing/selftests/cachestat/.gitignore
index d6c30b43a4bb..abbb13b6e96b 100644
--- a/tools/testing/selftests/cachestat/.gitignore
+++ b/tools/testing/selftests/cachestat/.gitignore
@@ -1,2 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
test_cachestat
+tmpshmcstat
diff --git a/tools/testing/selftests/cachestat/test_cachestat.c b/tools/testing/selftests/cachestat/test_cachestat.c
index c952640f163b..ab838bcb9ec5 100644
--- a/tools/testing/selftests/cachestat/test_cachestat.c
+++ b/tools/testing/selftests/cachestat/test_cachestat.c
@@ -226,7 +226,7 @@ bool run_cachestat_test(enum file_type type)
int syscall_ret;
size_t compute_len = PS * 512;
struct cachestat_range cs_range = { PS, compute_len };
- char *filename = "tmpshmcstat";
+ char *filename = "tmpshmcstat", *map;
struct cachestat cs;
bool ret = true;
int fd;
@@ -257,7 +257,7 @@ bool run_cachestat_test(enum file_type type)
}
break;
case FILE_MMAP:
- char *map = mmap(NULL, filesize, PROT_READ | PROT_WRITE,
+ map = mmap(NULL, filesize, PROT_READ | PROT_WRITE,
MAP_SHARED, fd, 0);
if (map == MAP_FAILED) {
diff --git a/tools/testing/selftests/cgroup/lib/include/cgroup_util.h b/tools/testing/selftests/cgroup/lib/include/cgroup_util.h
index 9dc90a1b386d..7ab2824ed7b5 100644
--- a/tools/testing/selftests/cgroup/lib/include/cgroup_util.h
+++ b/tools/testing/selftests/cgroup/lib/include/cgroup_util.h
@@ -25,6 +25,26 @@ static inline int values_close(long a, long b, int err)
return labs(a - b) <= (a + b) / 100 * err;
}
+/*
+ * Checks if two given values differ by less than err% of their sum and assert
+ * with detailed debug info if not.
+ */
+static inline int values_close_report(long a, long b, int err)
+{
+ long diff = labs(a - b);
+ long limit = (a + b) / 100 * err;
+ double actual_err = (a + b) ? (100.0 * diff / (a + b)) : 0.0;
+ int close = diff <= limit;
+
+ if (!close)
+ fprintf(stderr,
+ "[FAIL] actual=%ld expected=%ld | diff=%ld | limit=%ld | "
+ "tolerance=%d%% | actual_error=%.2f%%\n",
+ a, b, diff, limit, err, actual_err);
+
+ return close;
+}
+
extern ssize_t read_text(const char *path, char *buf, size_t max_len);
extern ssize_t write_text(const char *path, char *buf, ssize_t len);
diff --git a/tools/testing/selftests/cgroup/test_cpu.c b/tools/testing/selftests/cgroup/test_cpu.c
index 2a60e6c41940..d54e2317efff 100644
--- a/tools/testing/selftests/cgroup/test_cpu.c
+++ b/tools/testing/selftests/cgroup/test_cpu.c
@@ -219,7 +219,7 @@ static int test_cpucg_stats(const char *root)
if (user_usec <= 0)
goto cleanup;
- if (!values_close(usage_usec, expected_usage_usec, 1))
+ if (!values_close_report(usage_usec, expected_usage_usec, 1))
goto cleanup;
ret = KSFT_PASS;
@@ -291,7 +291,7 @@ static int test_cpucg_nice(const char *root)
user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec");
nice_usec = cg_read_key_long(cpucg, "cpu.stat", "nice_usec");
- if (!values_close(nice_usec, expected_nice_usec, 1))
+ if (!values_close_report(nice_usec, expected_nice_usec, 1))
goto cleanup;
ret = KSFT_PASS;
@@ -404,7 +404,7 @@ overprovision_validate(const struct cpu_hogger *children, int num_children)
goto cleanup;
delta = children[i + 1].usage - children[i].usage;
- if (!values_close(delta, children[0].usage, 35))
+ if (!values_close_report(delta, children[0].usage, 35))
goto cleanup;
}
@@ -444,7 +444,7 @@ underprovision_validate(const struct cpu_hogger *children, int num_children)
int ret = KSFT_FAIL, i;
for (i = 0; i < num_children - 1; i++) {
- if (!values_close(children[i + 1].usage, children[0].usage, 15))
+ if (!values_close_report(children[i + 1].usage, children[0].usage, 15))
goto cleanup;
}
@@ -573,16 +573,16 @@ run_cpucg_nested_weight_test(const char *root, bool overprovisioned)
nested_leaf_usage = leaf[1].usage + leaf[2].usage;
if (overprovisioned) {
- if (!values_close(leaf[0].usage, nested_leaf_usage, 15))
+ if (!values_close_report(leaf[0].usage, nested_leaf_usage, 15))
goto cleanup;
- } else if (!values_close(leaf[0].usage * 2, nested_leaf_usage, 15))
+ } else if (!values_close_report(leaf[0].usage * 2, nested_leaf_usage, 15))
goto cleanup;
child_usage = cg_read_key_long(child, "cpu.stat", "usage_usec");
if (child_usage <= 0)
goto cleanup;
- if (!values_close(child_usage, nested_leaf_usage, 1))
+ if (!values_close_report(child_usage, nested_leaf_usage, 1))
goto cleanup;
ret = KSFT_PASS;
@@ -691,7 +691,7 @@ static int test_cpucg_max(const char *root)
expected_usage_usec
= n_periods * quota_usec + MIN(remainder_usec, quota_usec);
- if (!values_close(usage_usec, expected_usage_usec, 10))
+ if (!values_close_report(usage_usec, expected_usage_usec, 10))
goto cleanup;
ret = KSFT_PASS;
@@ -762,7 +762,7 @@ static int test_cpucg_max_nested(const char *root)
expected_usage_usec
= n_periods * quota_usec + MIN(remainder_usec, quota_usec);
- if (!values_close(usage_usec, expected_usage_usec, 10))
+ if (!values_close_report(usage_usec, expected_usage_usec, 10))
goto cleanup;
ret = KSFT_PASS;
diff --git a/tools/testing/selftests/coredump/.gitignore b/tools/testing/selftests/coredump/.gitignore
new file mode 100644
index 000000000000..097f52db0be9
--- /dev/null
+++ b/tools/testing/selftests/coredump/.gitignore
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+stackdump_test
+coredump_socket_test
+coredump_socket_protocol_test
diff --git a/tools/testing/selftests/coredump/Makefile b/tools/testing/selftests/coredump/Makefile
index 77b3665c73c7..dece1a31d561 100644
--- a/tools/testing/selftests/coredump/Makefile
+++ b/tools/testing/selftests/coredump/Makefile
@@ -1,7 +1,13 @@
# SPDX-License-Identifier: GPL-2.0-only
CFLAGS += -Wall -O0 -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES)
-TEST_GEN_PROGS := stackdump_test
+TEST_GEN_PROGS := stackdump_test \
+ coredump_socket_test \
+ coredump_socket_protocol_test
TEST_FILES := stackdump
include ../lib.mk
+
+$(OUTPUT)/stackdump_test: coredump_test_helpers.c
+$(OUTPUT)/coredump_socket_test: coredump_test_helpers.c
+$(OUTPUT)/coredump_socket_protocol_test: coredump_test_helpers.c
diff --git a/tools/testing/selftests/coredump/coredump_socket_protocol_test.c b/tools/testing/selftests/coredump/coredump_socket_protocol_test.c
new file mode 100644
index 000000000000..d19b6717c53e
--- /dev/null
+++ b/tools/testing/selftests/coredump/coredump_socket_protocol_test.c
@@ -0,0 +1,1568 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <sys/stat.h>
+#include <sys/epoll.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include "coredump_test.h"
+
+#define NUM_CRASHING_COREDUMPS 5
+
+FIXTURE_SETUP(coredump)
+{
+ FILE *file;
+ int ret;
+
+ self->pid_coredump_server = -ESRCH;
+ self->fd_tmpfs_detached = -1;
+ file = fopen("/proc/sys/kernel/core_pattern", "r");
+ ASSERT_NE(NULL, file);
+
+ ret = fread(self->original_core_pattern, 1, sizeof(self->original_core_pattern), file);
+ ASSERT_TRUE(ret || feof(file));
+ ASSERT_LT(ret, sizeof(self->original_core_pattern));
+
+ self->original_core_pattern[ret] = '\0';
+ self->fd_tmpfs_detached = create_detached_tmpfs();
+ ASSERT_GE(self->fd_tmpfs_detached, 0);
+
+ ret = fclose(file);
+ ASSERT_EQ(0, ret);
+}
+
+FIXTURE_TEARDOWN(coredump)
+{
+ const char *reason;
+ FILE *file;
+ int ret, status;
+
+ if (self->pid_coredump_server > 0) {
+ kill(self->pid_coredump_server, SIGTERM);
+ waitpid(self->pid_coredump_server, &status, 0);
+ }
+ unlink("/tmp/coredump.file");
+ unlink("/tmp/coredump.socket");
+
+ file = fopen("/proc/sys/kernel/core_pattern", "w");
+ if (!file) {
+ reason = "Unable to open core_pattern";
+ goto fail;
+ }
+
+ ret = fprintf(file, "%s", self->original_core_pattern);
+ if (ret < 0) {
+ reason = "Unable to write to core_pattern";
+ goto fail;
+ }
+
+ ret = fclose(file);
+ if (ret) {
+ reason = "Unable to close core_pattern";
+ goto fail;
+ }
+
+ if (self->fd_tmpfs_detached >= 0) {
+ ret = close(self->fd_tmpfs_detached);
+ if (ret < 0) {
+ reason = "Unable to close detached tmpfs";
+ goto fail;
+ }
+ self->fd_tmpfs_detached = -1;
+ }
+
+ return;
+fail:
+ /* This should never happen */
+ fprintf(stderr, "Failed to cleanup coredump test: %s\n", reason);
+}
+
+TEST_F(coredump, socket_request_kernel)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct stat st;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_core_file = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "socket_request_kernel: create_and_listen_unix_socket failed: %m\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "socket_request_kernel: write_nointr to ipc socket failed: %m\n");
+ goto out;
+ }
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "socket_request_kernel: accept4 failed: %m\n");
+ goto out;
+ }
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "socket_request_kernel: get_peer_pidfd failed\n");
+ goto out;
+ }
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "socket_request_kernel: get_pidfd_info failed\n");
+ goto out;
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "socket_request_kernel: PIDFD_INFO_COREDUMP not set in mask\n");
+ goto out;
+ }
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "socket_request_kernel: PIDFD_COREDUMPED not set in coredump_mask\n");
+ goto out;
+ }
+
+ fd_core_file = creat("/tmp/coredump.file", 0644);
+ if (fd_core_file < 0) {
+ fprintf(stderr, "socket_request_kernel: creat coredump file failed: %m\n");
+ goto out;
+ }
+
+ if (!read_coredump_req(fd_coredump, &req)) {
+ fprintf(stderr, "socket_request_kernel: read_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT)) {
+ fprintf(stderr, "socket_request_kernel: check_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_KERNEL | COREDUMP_WAIT, 0)) {
+ fprintf(stderr, "socket_request_kernel: send_coredump_ack failed\n");
+ goto out;
+ }
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) {
+ fprintf(stderr, "socket_request_kernel: read_marker COREDUMP_MARK_REQACK failed\n");
+ goto out;
+ }
+
+ for (;;) {
+ char buffer[4096];
+ ssize_t bytes_read, bytes_write;
+
+ bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+ if (bytes_read < 0) {
+ fprintf(stderr, "socket_request_kernel: read from coredump socket failed: %m\n");
+ goto out;
+ }
+
+ if (bytes_read == 0)
+ break;
+
+ bytes_write = write(fd_core_file, buffer, bytes_read);
+ if (bytes_read != bytes_write) {
+ if (bytes_write < 0 && errno == ENOSPC)
+ continue;
+ fprintf(stderr, "socket_request_kernel: write to core file failed (read=%zd, write=%zd): %m\n",
+ bytes_read, bytes_write);
+ goto out;
+ }
+ }
+
+ exit_code = EXIT_SUCCESS;
+ fprintf(stderr, "socket_request_kernel: completed successfully\n");
+out:
+ if (fd_core_file >= 0)
+ close(fd_core_file);
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_TRUE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+
+ ASSERT_EQ(stat("/tmp/coredump.file", &st), 0);
+ ASSERT_GT(st.st_size, 0);
+ system("file /tmp/coredump.file");
+}
+
+TEST_F(coredump, socket_request_userspace)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "socket_request_userspace: create_and_listen_unix_socket failed: %m\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "socket_request_userspace: write_nointr to ipc socket failed: %m\n");
+ goto out;
+ }
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "socket_request_userspace: accept4 failed: %m\n");
+ goto out;
+ }
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "socket_request_userspace: get_peer_pidfd failed\n");
+ goto out;
+ }
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "socket_request_userspace: get_pidfd_info failed\n");
+ goto out;
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "socket_request_userspace: PIDFD_INFO_COREDUMP not set in mask\n");
+ goto out;
+ }
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "socket_request_userspace: PIDFD_COREDUMPED not set in coredump_mask\n");
+ goto out;
+ }
+
+ if (!read_coredump_req(fd_coredump, &req)) {
+ fprintf(stderr, "socket_request_userspace: read_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT)) {
+ fprintf(stderr, "socket_request_userspace: check_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_USERSPACE | COREDUMP_WAIT, 0)) {
+ fprintf(stderr, "socket_request_userspace: send_coredump_ack failed\n");
+ goto out;
+ }
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) {
+ fprintf(stderr, "socket_request_userspace: read_marker COREDUMP_MARK_REQACK failed\n");
+ goto out;
+ }
+
+ for (;;) {
+ char buffer[4096];
+ ssize_t bytes_read;
+
+ bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+ if (bytes_read > 0) {
+ fprintf(stderr, "socket_request_userspace: unexpected data received (expected no coredump data)\n");
+ goto out;
+ }
+
+ if (bytes_read < 0) {
+ fprintf(stderr, "socket_request_userspace: read from coredump socket failed: %m\n");
+ goto out;
+ }
+
+ if (bytes_read == 0)
+ break;
+ }
+
+ exit_code = EXIT_SUCCESS;
+ fprintf(stderr, "socket_request_userspace: completed successfully\n");
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_TRUE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F(coredump, socket_request_reject)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "socket_request_reject: create_and_listen_unix_socket failed: %m\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "socket_request_reject: write_nointr to ipc socket failed: %m\n");
+ goto out;
+ }
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "socket_request_reject: accept4 failed: %m\n");
+ goto out;
+ }
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "socket_request_reject: get_peer_pidfd failed\n");
+ goto out;
+ }
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "socket_request_reject: get_pidfd_info failed\n");
+ goto out;
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "socket_request_reject: PIDFD_INFO_COREDUMP not set in mask\n");
+ goto out;
+ }
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "socket_request_reject: PIDFD_COREDUMPED not set in coredump_mask\n");
+ goto out;
+ }
+
+ if (!read_coredump_req(fd_coredump, &req)) {
+ fprintf(stderr, "socket_request_reject: read_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT)) {
+ fprintf(stderr, "socket_request_reject: check_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_REJECT | COREDUMP_WAIT, 0)) {
+ fprintf(stderr, "socket_request_reject: send_coredump_ack failed\n");
+ goto out;
+ }
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) {
+ fprintf(stderr, "socket_request_reject: read_marker COREDUMP_MARK_REQACK failed\n");
+ goto out;
+ }
+
+ for (;;) {
+ char buffer[4096];
+ ssize_t bytes_read;
+
+ bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+ if (bytes_read > 0) {
+ fprintf(stderr, "socket_request_reject: unexpected data received (expected no coredump data for REJECT)\n");
+ goto out;
+ }
+
+ if (bytes_read < 0) {
+ fprintf(stderr, "socket_request_reject: read from coredump socket failed: %m\n");
+ goto out;
+ }
+
+ if (bytes_read == 0)
+ break;
+ }
+
+ exit_code = EXIT_SUCCESS;
+ fprintf(stderr, "socket_request_reject: completed successfully\n");
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_FALSE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F(coredump, socket_request_invalid_flag_combination)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "socket_request_invalid_flag_combination: create_and_listen_unix_socket failed: %m\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "socket_request_invalid_flag_combination: write_nointr to ipc socket failed: %m\n");
+ goto out;
+ }
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "socket_request_invalid_flag_combination: accept4 failed: %m\n");
+ goto out;
+ }
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "socket_request_invalid_flag_combination: get_peer_pidfd failed\n");
+ goto out;
+ }
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "socket_request_invalid_flag_combination: get_pidfd_info failed\n");
+ goto out;
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "socket_request_invalid_flag_combination: PIDFD_INFO_COREDUMP not set in mask\n");
+ goto out;
+ }
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "socket_request_invalid_flag_combination: PIDFD_COREDUMPED not set in coredump_mask\n");
+ goto out;
+ }
+
+ if (!read_coredump_req(fd_coredump, &req)) {
+ fprintf(stderr, "socket_request_invalid_flag_combination: read_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT)) {
+ fprintf(stderr, "socket_request_invalid_flag_combination: check_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_KERNEL | COREDUMP_REJECT | COREDUMP_WAIT, 0)) {
+ fprintf(stderr, "socket_request_invalid_flag_combination: send_coredump_ack failed\n");
+ goto out;
+ }
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_CONFLICTING)) {
+ fprintf(stderr, "socket_request_invalid_flag_combination: read_marker COREDUMP_MARK_CONFLICTING failed\n");
+ goto out;
+ }
+
+ exit_code = EXIT_SUCCESS;
+ fprintf(stderr, "socket_request_invalid_flag_combination: completed successfully\n");
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_FALSE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F(coredump, socket_request_unknown_flag)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "socket_request_unknown_flag: create_and_listen_unix_socket failed: %m\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "socket_request_unknown_flag: write_nointr to ipc socket failed: %m\n");
+ goto out;
+ }
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "socket_request_unknown_flag: accept4 failed: %m\n");
+ goto out;
+ }
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "socket_request_unknown_flag: get_peer_pidfd failed\n");
+ goto out;
+ }
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "socket_request_unknown_flag: get_pidfd_info failed\n");
+ goto out;
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "socket_request_unknown_flag: PIDFD_INFO_COREDUMP not set in mask\n");
+ goto out;
+ }
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "socket_request_unknown_flag: PIDFD_COREDUMPED not set in coredump_mask\n");
+ goto out;
+ }
+
+ if (!read_coredump_req(fd_coredump, &req)) {
+ fprintf(stderr, "socket_request_unknown_flag: read_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT)) {
+ fprintf(stderr, "socket_request_unknown_flag: check_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!send_coredump_ack(fd_coredump, &req, (1ULL << 63), 0)) {
+ fprintf(stderr, "socket_request_unknown_flag: send_coredump_ack failed\n");
+ goto out;
+ }
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_UNSUPPORTED)) {
+ fprintf(stderr, "socket_request_unknown_flag: read_marker COREDUMP_MARK_UNSUPPORTED failed\n");
+ goto out;
+ }
+
+ exit_code = EXIT_SUCCESS;
+ fprintf(stderr, "socket_request_unknown_flag: completed successfully\n");
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_FALSE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F(coredump, socket_request_invalid_size_small)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "socket_request_invalid_size_small: create_and_listen_unix_socket failed: %m\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "socket_request_invalid_size_small: write_nointr to ipc socket failed: %m\n");
+ goto out;
+ }
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "socket_request_invalid_size_small: accept4 failed: %m\n");
+ goto out;
+ }
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "socket_request_invalid_size_small: get_peer_pidfd failed\n");
+ goto out;
+ }
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "socket_request_invalid_size_small: get_pidfd_info failed\n");
+ goto out;
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "socket_request_invalid_size_small: PIDFD_INFO_COREDUMP not set in mask\n");
+ goto out;
+ }
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "socket_request_invalid_size_small: PIDFD_COREDUMPED not set in coredump_mask\n");
+ goto out;
+ }
+
+ if (!read_coredump_req(fd_coredump, &req)) {
+ fprintf(stderr, "socket_request_invalid_size_small: read_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT)) {
+ fprintf(stderr, "socket_request_invalid_size_small: check_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_REJECT | COREDUMP_WAIT,
+ COREDUMP_ACK_SIZE_VER0 / 2)) {
+ fprintf(stderr, "socket_request_invalid_size_small: send_coredump_ack failed\n");
+ goto out;
+ }
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_MINSIZE)) {
+ fprintf(stderr, "socket_request_invalid_size_small: read_marker COREDUMP_MARK_MINSIZE failed\n");
+ goto out;
+ }
+
+ exit_code = EXIT_SUCCESS;
+ fprintf(stderr, "socket_request_invalid_size_small: completed successfully\n");
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_FALSE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F(coredump, socket_request_invalid_size_large)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "socket_request_invalid_size_large: create_and_listen_unix_socket failed: %m\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "socket_request_invalid_size_large: write_nointr to ipc socket failed: %m\n");
+ goto out;
+ }
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "socket_request_invalid_size_large: accept4 failed: %m\n");
+ goto out;
+ }
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "socket_request_invalid_size_large: get_peer_pidfd failed\n");
+ goto out;
+ }
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "socket_request_invalid_size_large: get_pidfd_info failed\n");
+ goto out;
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "socket_request_invalid_size_large: PIDFD_INFO_COREDUMP not set in mask\n");
+ goto out;
+ }
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "socket_request_invalid_size_large: PIDFD_COREDUMPED not set in coredump_mask\n");
+ goto out;
+ }
+
+ if (!read_coredump_req(fd_coredump, &req)) {
+ fprintf(stderr, "socket_request_invalid_size_large: read_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT)) {
+ fprintf(stderr, "socket_request_invalid_size_large: check_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_REJECT | COREDUMP_WAIT,
+ COREDUMP_ACK_SIZE_VER0 + PAGE_SIZE)) {
+ fprintf(stderr, "socket_request_invalid_size_large: send_coredump_ack failed\n");
+ goto out;
+ }
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_MAXSIZE)) {
+ fprintf(stderr, "socket_request_invalid_size_large: read_marker COREDUMP_MARK_MAXSIZE failed\n");
+ goto out;
+ }
+
+ exit_code = EXIT_SUCCESS;
+ fprintf(stderr, "socket_request_invalid_size_large: completed successfully\n");
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_FALSE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+/*
+ * Test: PIDFD_INFO_COREDUMP_SIGNAL via socket coredump with SIGSEGV
+ *
+ * Verify that when using socket-based coredump protocol,
+ * the coredump_signal field is correctly exposed as SIGSEGV.
+ */
+TEST_F(coredump, socket_coredump_signal_sigsegv)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: create_and_listen_unix_socket failed: %m\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: write_nointr to ipc socket failed: %m\n");
+ goto out;
+ }
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: accept4 failed: %m\n");
+ goto out;
+ }
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: get_peer_pidfd failed\n");
+ goto out;
+ }
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: get_pidfd_info failed\n");
+ goto out;
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: PIDFD_INFO_COREDUMP not set in mask\n");
+ goto out;
+ }
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: PIDFD_COREDUMPED not set in coredump_mask\n");
+ goto out;
+ }
+
+ /* Verify coredump_signal is available and correct */
+ if (!(info.mask & PIDFD_INFO_COREDUMP_SIGNAL)) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: PIDFD_INFO_COREDUMP_SIGNAL not set in mask\n");
+ goto out;
+ }
+
+ if (info.coredump_signal != SIGSEGV) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: coredump_signal=%d, expected SIGSEGV=%d\n",
+ info.coredump_signal, SIGSEGV);
+ goto out;
+ }
+
+ if (!read_coredump_req(fd_coredump, &req)) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: read_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_REJECT | COREDUMP_WAIT, 0)) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: send_coredump_ack failed\n");
+ goto out;
+ }
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: read_marker COREDUMP_MARK_REQACK failed\n");
+ goto out;
+ }
+
+ exit_code = EXIT_SUCCESS;
+ fprintf(stderr, "socket_coredump_signal_sigsegv: completed successfully\n");
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_EQ(WTERMSIG(status), SIGSEGV);
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP));
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP_SIGNAL));
+ ASSERT_EQ(info.coredump_signal, SIGSEGV);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+/*
+ * Test: PIDFD_INFO_COREDUMP_SIGNAL via socket coredump with SIGABRT
+ *
+ * Verify that when using socket-based coredump protocol,
+ * the coredump_signal field is correctly exposed as SIGABRT.
+ */
+TEST_F(coredump, socket_coredump_signal_sigabrt)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: create_and_listen_unix_socket failed: %m\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: write_nointr to ipc socket failed: %m\n");
+ goto out;
+ }
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: accept4 failed: %m\n");
+ goto out;
+ }
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: get_peer_pidfd failed\n");
+ goto out;
+ }
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: get_pidfd_info failed\n");
+ goto out;
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: PIDFD_INFO_COREDUMP not set in mask\n");
+ goto out;
+ }
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: PIDFD_COREDUMPED not set in coredump_mask\n");
+ goto out;
+ }
+
+ /* Verify coredump_signal is available and correct */
+ if (!(info.mask & PIDFD_INFO_COREDUMP_SIGNAL)) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: PIDFD_INFO_COREDUMP_SIGNAL not set in mask\n");
+ goto out;
+ }
+
+ if (info.coredump_signal != SIGABRT) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: coredump_signal=%d, expected SIGABRT=%d\n",
+ info.coredump_signal, SIGABRT);
+ goto out;
+ }
+
+ if (!read_coredump_req(fd_coredump, &req)) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: read_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_REJECT | COREDUMP_WAIT, 0)) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: send_coredump_ack failed\n");
+ goto out;
+ }
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: read_marker COREDUMP_MARK_REQACK failed\n");
+ goto out;
+ }
+
+ exit_code = EXIT_SUCCESS;
+ fprintf(stderr, "socket_coredump_signal_sigabrt: completed successfully\n");
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ abort();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_EQ(WTERMSIG(status), SIGABRT);
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP));
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP_SIGNAL));
+ ASSERT_EQ(info.coredump_signal, SIGABRT);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F_TIMEOUT(coredump, socket_multiple_crashing_coredumps, 500)
+{
+ int pidfd[NUM_CRASHING_COREDUMPS], status[NUM_CRASHING_COREDUMPS];
+ pid_t pid[NUM_CRASHING_COREDUMPS], pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets), 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1, fd_core_file = -1;
+ int exit_code = EXIT_FAILURE;
+ struct coredump_req req = {};
+
+ close(ipc_sockets[0]);
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "Failed to create and listen on unix socket\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "Failed to notify parent via ipc socket\n");
+ goto out;
+ }
+ close(ipc_sockets[1]);
+
+ for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "accept4 failed: %m\n");
+ goto out;
+ }
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "get_peer_pidfd failed for fd %d: %m\n", fd_coredump);
+ goto out;
+ }
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "get_pidfd_info failed for fd %d\n", fd_peer_pidfd);
+ goto out;
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "pidfd info missing PIDFD_INFO_COREDUMP for fd %d\n", fd_peer_pidfd);
+ goto out;
+ }
+ if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "pidfd info missing PIDFD_COREDUMPED for fd %d\n", fd_peer_pidfd);
+ goto out;
+ }
+
+ if (!read_coredump_req(fd_coredump, &req)) {
+ fprintf(stderr, "read_coredump_req failed for fd %d\n", fd_coredump);
+ goto out;
+ }
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT)) {
+ fprintf(stderr, "check_coredump_req failed for fd %d\n", fd_coredump);
+ goto out;
+ }
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_KERNEL | COREDUMP_WAIT, 0)) {
+ fprintf(stderr, "send_coredump_ack failed for fd %d\n", fd_coredump);
+ goto out;
+ }
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) {
+ fprintf(stderr, "read_marker failed for fd %d\n", fd_coredump);
+ goto out;
+ }
+
+ fd_core_file = open_coredump_tmpfile(self->fd_tmpfs_detached);
+ if (fd_core_file < 0) {
+ fprintf(stderr, "%m - open_coredump_tmpfile failed for fd %d\n", fd_coredump);
+ goto out;
+ }
+
+ for (;;) {
+ char buffer[4096];
+ ssize_t bytes_read, bytes_write;
+
+ bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+ if (bytes_read < 0) {
+ fprintf(stderr, "read failed for fd %d: %m\n", fd_coredump);
+ goto out;
+ }
+
+ if (bytes_read == 0)
+ break;
+
+ bytes_write = write(fd_core_file, buffer, bytes_read);
+ if (bytes_read != bytes_write) {
+ if (bytes_write < 0 && errno == ENOSPC)
+ continue;
+ fprintf(stderr, "write failed for fd %d: %m\n", fd_core_file);
+ goto out;
+ }
+ }
+
+ close(fd_core_file);
+ close(fd_peer_pidfd);
+ close(fd_coredump);
+ fd_peer_pidfd = -1;
+ fd_coredump = -1;
+ }
+
+ exit_code = EXIT_SUCCESS;
+out:
+ if (fd_core_file >= 0)
+ close(fd_core_file);
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+ pid[i] = fork();
+ ASSERT_GE(pid[i], 0);
+ if (pid[i] == 0)
+ crashing_child();
+ pidfd[i] = sys_pidfd_open(pid[i], 0);
+ ASSERT_GE(pidfd[i], 0);
+ }
+
+ for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+ waitpid(pid[i], &status[i], 0);
+ ASSERT_TRUE(WIFSIGNALED(status[i]));
+ ASSERT_TRUE(WCOREDUMP(status[i]));
+ }
+
+ for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+ info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP;
+ ASSERT_EQ(ioctl(pidfd[i], PIDFD_GET_INFO, &info), 0);
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+ }
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F_TIMEOUT(coredump, socket_multiple_crashing_coredumps_epoll_workers, 500)
+{
+ int pidfd[NUM_CRASHING_COREDUMPS], status[NUM_CRASHING_COREDUMPS];
+ pid_t pid[NUM_CRASHING_COREDUMPS], pid_coredump_server, worker_pids[NUM_CRASHING_COREDUMPS];
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets), 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ int fd_server = -1, exit_code = EXIT_FAILURE, n_conns = 0;
+ fd_server = -1;
+ exit_code = EXIT_FAILURE;
+ n_conns = 0;
+ close(ipc_sockets[0]);
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "socket_multiple_crashing_coredumps_epoll_workers: create_and_listen_unix_socket failed: %m\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "socket_multiple_crashing_coredumps_epoll_workers: write_nointr to ipc socket failed: %m\n");
+ goto out;
+ }
+ close(ipc_sockets[1]);
+
+ while (n_conns < NUM_CRASHING_COREDUMPS) {
+ int fd_coredump = -1, fd_peer_pidfd = -1, fd_core_file = -1;
+ struct coredump_req req = {};
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ if (errno == EAGAIN || errno == EWOULDBLOCK)
+ continue;
+ fprintf(stderr, "socket_multiple_crashing_coredumps_epoll_workers: accept4 failed: %m\n");
+ goto out;
+ }
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "socket_multiple_crashing_coredumps_epoll_workers: get_peer_pidfd failed\n");
+ goto out;
+ }
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "socket_multiple_crashing_coredumps_epoll_workers: get_pidfd_info failed\n");
+ goto out;
+ }
+ if (!(info.mask & PIDFD_INFO_COREDUMP) || !(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "socket_multiple_crashing_coredumps_epoll_workers: missing PIDFD_INFO_COREDUMP or PIDFD_COREDUMPED\n");
+ goto out;
+ }
+ if (!read_coredump_req(fd_coredump, &req)) {
+ fprintf(stderr, "socket_multiple_crashing_coredumps_epoll_workers: read_coredump_req failed\n");
+ goto out;
+ }
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT)) {
+ fprintf(stderr, "socket_multiple_crashing_coredumps_epoll_workers: check_coredump_req failed\n");
+ goto out;
+ }
+ if (!send_coredump_ack(fd_coredump, &req, COREDUMP_KERNEL | COREDUMP_WAIT, 0)) {
+ fprintf(stderr, "socket_multiple_crashing_coredumps_epoll_workers: send_coredump_ack failed\n");
+ goto out;
+ }
+ if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) {
+ fprintf(stderr, "socket_multiple_crashing_coredumps_epoll_workers: read_marker failed\n");
+ goto out;
+ }
+ fd_core_file = open_coredump_tmpfile(self->fd_tmpfs_detached);
+ if (fd_core_file < 0) {
+ fprintf(stderr, "socket_multiple_crashing_coredumps_epoll_workers: open_coredump_tmpfile failed: %m\n");
+ goto out;
+ }
+ pid_t worker = fork();
+ if (worker == 0) {
+ close(fd_server);
+ process_coredump_worker(fd_coredump, fd_peer_pidfd, fd_core_file);
+ }
+ worker_pids[n_conns] = worker;
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_core_file >= 0)
+ close(fd_core_file);
+ n_conns++;
+ }
+ exit_code = EXIT_SUCCESS;
+out:
+ if (fd_server >= 0)
+ close(fd_server);
+
+ // Reap all worker processes
+ for (int i = 0; i < n_conns; i++) {
+ int wstatus;
+ if (waitpid(worker_pids[i], &wstatus, 0) < 0) {
+ fprintf(stderr, "Failed to wait for worker %d: %m\n", worker_pids[i]);
+ } else if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != EXIT_SUCCESS) {
+ fprintf(stderr, "Worker %d exited with error code %d\n", worker_pids[i], WEXITSTATUS(wstatus));
+ exit_code = EXIT_FAILURE;
+ }
+ }
+
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+ pid[i] = fork();
+ ASSERT_GE(pid[i], 0);
+ if (pid[i] == 0)
+ crashing_child();
+ pidfd[i] = sys_pidfd_open(pid[i], 0);
+ ASSERT_GE(pidfd[i], 0);
+ }
+
+ for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+ ASSERT_GE(waitpid(pid[i], &status[i], 0), 0);
+ ASSERT_TRUE(WIFSIGNALED(status[i]));
+ ASSERT_TRUE(WCOREDUMP(status[i]));
+ }
+
+ for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+ info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP;
+ ASSERT_EQ(ioctl(pidfd[i], PIDFD_GET_INFO, &info), 0);
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+ }
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/coredump/coredump_socket_test.c b/tools/testing/selftests/coredump/coredump_socket_test.c
new file mode 100644
index 000000000000..7e26d4a6a15d
--- /dev/null
+++ b/tools/testing/selftests/coredump/coredump_socket_test.c
@@ -0,0 +1,742 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <sys/stat.h>
+#include <sys/epoll.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include "coredump_test.h"
+
+FIXTURE_SETUP(coredump)
+{
+ FILE *file;
+ int ret;
+
+ self->pid_coredump_server = -ESRCH;
+ self->fd_tmpfs_detached = -1;
+ file = fopen("/proc/sys/kernel/core_pattern", "r");
+ ASSERT_NE(NULL, file);
+
+ ret = fread(self->original_core_pattern, 1, sizeof(self->original_core_pattern), file);
+ ASSERT_TRUE(ret || feof(file));
+ ASSERT_LT(ret, sizeof(self->original_core_pattern));
+
+ self->original_core_pattern[ret] = '\0';
+ self->fd_tmpfs_detached = create_detached_tmpfs();
+ ASSERT_GE(self->fd_tmpfs_detached, 0);
+
+ ret = fclose(file);
+ ASSERT_EQ(0, ret);
+}
+
+FIXTURE_TEARDOWN(coredump)
+{
+ const char *reason;
+ FILE *file;
+ int ret, status;
+
+ if (self->pid_coredump_server > 0) {
+ kill(self->pid_coredump_server, SIGTERM);
+ waitpid(self->pid_coredump_server, &status, 0);
+ }
+ unlink("/tmp/coredump.file");
+ unlink("/tmp/coredump.socket");
+
+ file = fopen("/proc/sys/kernel/core_pattern", "w");
+ if (!file) {
+ reason = "Unable to open core_pattern";
+ goto fail;
+ }
+
+ ret = fprintf(file, "%s", self->original_core_pattern);
+ if (ret < 0) {
+ reason = "Unable to write to core_pattern";
+ goto fail;
+ }
+
+ ret = fclose(file);
+ if (ret) {
+ reason = "Unable to close core_pattern";
+ goto fail;
+ }
+
+ if (self->fd_tmpfs_detached >= 0) {
+ ret = close(self->fd_tmpfs_detached);
+ if (ret < 0) {
+ reason = "Unable to close detached tmpfs";
+ goto fail;
+ }
+ self->fd_tmpfs_detached = -1;
+ }
+
+ return;
+fail:
+ /* This should never happen */
+ fprintf(stderr, "Failed to cleanup coredump test: %s\n", reason);
+}
+
+TEST_F(coredump, socket)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct stat st;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1, fd_core_file = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "socket test: create_and_listen_unix_socket failed: %m\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "socket test: write_nointr to ipc socket failed: %m\n");
+ goto out;
+ }
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "socket test: accept4 failed: %m\n");
+ goto out;
+ }
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "socket test: get_peer_pidfd failed\n");
+ goto out;
+ }
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "socket test: get_pidfd_info failed\n");
+ goto out;
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "socket test: PIDFD_INFO_COREDUMP not set in mask\n");
+ goto out;
+ }
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "socket test: PIDFD_COREDUMPED not set in coredump_mask\n");
+ goto out;
+ }
+
+ fd_core_file = creat("/tmp/coredump.file", 0644);
+ if (fd_core_file < 0) {
+ fprintf(stderr, "socket test: creat coredump file failed: %m\n");
+ goto out;
+ }
+
+ for (;;) {
+ char buffer[4096];
+ ssize_t bytes_read, bytes_write;
+
+ bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+ if (bytes_read < 0) {
+ fprintf(stderr, "socket test: read from coredump socket failed: %m\n");
+ goto out;
+ }
+
+ if (bytes_read == 0)
+ break;
+
+ bytes_write = write(fd_core_file, buffer, bytes_read);
+ if (bytes_read != bytes_write) {
+ if (bytes_write < 0 && errno == ENOSPC)
+ continue;
+ fprintf(stderr, "socket test: write to core file failed (read=%zd, write=%zd): %m\n", bytes_read, bytes_write);
+ goto out;
+ }
+ }
+
+ exit_code = EXIT_SUCCESS;
+ fprintf(stderr, "socket test: completed successfully\n");
+out:
+ if (fd_core_file >= 0)
+ close(fd_core_file);
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_TRUE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+
+ ASSERT_EQ(stat("/tmp/coredump.file", &st), 0);
+ ASSERT_GT(st.st_size, 0);
+}
+
+TEST_F(coredump, socket_detect_userspace_client)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct stat st;
+ struct pidfd_info info = {
+ .mask = PIDFD_INFO_COREDUMP,
+ };
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "socket_detect_userspace_client: create_and_listen_unix_socket failed: %m\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "socket_detect_userspace_client: write_nointr to ipc socket failed: %m\n");
+ goto out;
+ }
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "socket_detect_userspace_client: accept4 failed: %m\n");
+ goto out;
+ }
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "socket_detect_userspace_client: get_peer_pidfd failed\n");
+ goto out;
+ }
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "socket_detect_userspace_client: get_pidfd_info failed\n");
+ goto out;
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "socket_detect_userspace_client: PIDFD_INFO_COREDUMP not set in mask\n");
+ goto out;
+ }
+
+ if (info.coredump_mask & PIDFD_COREDUMPED) {
+ fprintf(stderr, "socket_detect_userspace_client: PIDFD_COREDUMPED incorrectly set (should be userspace client)\n");
+ goto out;
+ }
+
+ exit_code = EXIT_SUCCESS;
+ fprintf(stderr, "socket_detect_userspace_client: completed successfully\n");
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0) {
+ int fd_socket;
+ ssize_t ret;
+ const struct sockaddr_un coredump_sk = {
+ .sun_family = AF_UNIX,
+ .sun_path = "/tmp/coredump.socket",
+ };
+ size_t coredump_sk_len =
+ offsetof(struct sockaddr_un, sun_path) +
+ sizeof("/tmp/coredump.socket");
+
+ fd_socket = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (fd_socket < 0) {
+ fprintf(stderr, "socket_detect_userspace_client (client): socket failed: %m\n");
+ _exit(EXIT_FAILURE);
+ }
+
+ ret = connect(fd_socket, (const struct sockaddr *)&coredump_sk, coredump_sk_len);
+ if (ret < 0) {
+ fprintf(stderr, "socket_detect_userspace_client (client): connect failed: %m\n");
+ _exit(EXIT_FAILURE);
+ }
+
+ close(fd_socket);
+ pause();
+ fprintf(stderr, "socket_detect_userspace_client (client): completed successfully\n");
+ _exit(EXIT_SUCCESS);
+ }
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_EQ((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+
+ ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0), 0);
+ ASSERT_EQ(close(pidfd), 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_EQ(WTERMSIG(status), SIGKILL);
+
+ ASSERT_NE(stat("/tmp/coredump.file", &st), 0);
+ ASSERT_EQ(errno, ENOENT);
+}
+
+TEST_F(coredump, socket_enoent)
+{
+ int pidfd, status;
+ pid_t pid;
+
+ ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket"));
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_FALSE(WCOREDUMP(status));
+}
+
+TEST_F(coredump, socket_no_listener)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ int ipc_sockets[2];
+ char c;
+ const struct sockaddr_un coredump_sk = {
+ .sun_family = AF_UNIX,
+ .sun_path = "/tmp/coredump.socket",
+ };
+ size_t coredump_sk_len = offsetof(struct sockaddr_un, sun_path) +
+ sizeof("/tmp/coredump.socket");
+
+ ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ int fd_server = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
+ if (fd_server < 0) {
+ fprintf(stderr, "socket_no_listener: socket failed: %m\n");
+ goto out;
+ }
+
+ ret = bind(fd_server, (const struct sockaddr *)&coredump_sk, coredump_sk_len);
+ if (ret < 0) {
+ fprintf(stderr, "socket_no_listener: bind failed: %m\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "socket_no_listener: write_nointr to ipc socket failed: %m\n");
+ goto out;
+ }
+
+ exit_code = EXIT_SUCCESS;
+ fprintf(stderr, "socket_no_listener: completed successfully\n");
+out:
+ if (fd_server >= 0)
+ close(fd_server);
+ close(ipc_sockets[1]);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_FALSE(WCOREDUMP(status));
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+/*
+ * Test: PIDFD_INFO_COREDUMP_SIGNAL via simple socket coredump
+ *
+ * Verify that when using simple socket-based coredump (@ pattern),
+ * the coredump_signal field is correctly exposed as SIGSEGV.
+ */
+TEST_F(coredump, socket_coredump_signal_sigsegv)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1, fd_core_file = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: create_and_listen_unix_socket failed: %m\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: write_nointr to ipc socket failed: %m\n");
+ goto out;
+ }
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: accept4 failed: %m\n");
+ goto out;
+ }
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: get_peer_pidfd failed\n");
+ goto out;
+ }
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: get_pidfd_info failed\n");
+ goto out;
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: PIDFD_INFO_COREDUMP not set in mask\n");
+ goto out;
+ }
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: PIDFD_COREDUMPED not set in coredump_mask\n");
+ goto out;
+ }
+
+ /* Verify coredump_signal is available and correct */
+ if (!(info.mask & PIDFD_INFO_COREDUMP_SIGNAL)) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: PIDFD_INFO_COREDUMP_SIGNAL not set in mask\n");
+ goto out;
+ }
+
+ if (info.coredump_signal != SIGSEGV) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: coredump_signal=%d, expected SIGSEGV=%d\n",
+ info.coredump_signal, SIGSEGV);
+ goto out;
+ }
+
+ fd_core_file = open_coredump_tmpfile(self->fd_tmpfs_detached);
+ if (fd_core_file < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: open_coredump_tmpfile failed: %m\n");
+ goto out;
+ }
+
+ for (;;) {
+ char buffer[4096];
+ ssize_t bytes_read, bytes_write;
+
+ bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+ if (bytes_read < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: read from coredump socket failed: %m\n");
+ goto out;
+ }
+
+ if (bytes_read == 0)
+ break;
+
+ bytes_write = write(fd_core_file, buffer, bytes_read);
+ if (bytes_read != bytes_write) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: write to core file failed (read=%zd, write=%zd): %m\n",
+ bytes_read, bytes_write);
+ goto out;
+ }
+ }
+
+ exit_code = EXIT_SUCCESS;
+ fprintf(stderr, "socket_coredump_signal_sigsegv: completed successfully\n");
+out:
+ if (fd_core_file >= 0)
+ close(fd_core_file);
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_EQ(WTERMSIG(status), SIGSEGV);
+ ASSERT_TRUE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP));
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP_SIGNAL));
+ ASSERT_EQ(info.coredump_signal, SIGSEGV);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+/*
+ * Test: PIDFD_INFO_COREDUMP_SIGNAL via simple socket coredump with SIGABRT
+ *
+ * Verify that when using simple socket-based coredump (@ pattern),
+ * the coredump_signal field is correctly exposed as SIGABRT.
+ */
+TEST_F(coredump, socket_coredump_signal_sigabrt)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1, fd_core_file = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: create_and_listen_unix_socket failed: %m\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: write_nointr to ipc socket failed: %m\n");
+ goto out;
+ }
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: accept4 failed: %m\n");
+ goto out;
+ }
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: get_peer_pidfd failed\n");
+ goto out;
+ }
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: get_pidfd_info failed\n");
+ goto out;
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: PIDFD_INFO_COREDUMP not set in mask\n");
+ goto out;
+ }
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: PIDFD_COREDUMPED not set in coredump_mask\n");
+ goto out;
+ }
+
+ /* Verify coredump_signal is available and correct */
+ if (!(info.mask & PIDFD_INFO_COREDUMP_SIGNAL)) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: PIDFD_INFO_COREDUMP_SIGNAL not set in mask\n");
+ goto out;
+ }
+
+ if (info.coredump_signal != SIGABRT) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: coredump_signal=%d, expected SIGABRT=%d\n",
+ info.coredump_signal, SIGABRT);
+ goto out;
+ }
+
+ fd_core_file = open_coredump_tmpfile(self->fd_tmpfs_detached);
+ if (fd_core_file < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: open_coredump_tmpfile failed: %m\n");
+ goto out;
+ }
+
+ for (;;) {
+ char buffer[4096];
+ ssize_t bytes_read, bytes_write;
+
+ bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+ if (bytes_read < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: read from coredump socket failed: %m\n");
+ goto out;
+ }
+
+ if (bytes_read == 0)
+ break;
+
+ bytes_write = write(fd_core_file, buffer, bytes_read);
+ if (bytes_read != bytes_write) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: write to core file failed (read=%zd, write=%zd): %m\n",
+ bytes_read, bytes_write);
+ goto out;
+ }
+ }
+
+ exit_code = EXIT_SUCCESS;
+ fprintf(stderr, "socket_coredump_signal_sigabrt: completed successfully\n");
+out:
+ if (fd_core_file >= 0)
+ close(fd_core_file);
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ abort();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_EQ(WTERMSIG(status), SIGABRT);
+ ASSERT_TRUE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP));
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP_SIGNAL));
+ ASSERT_EQ(info.coredump_signal, SIGABRT);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F(coredump, socket_invalid_paths)
+{
+ ASSERT_FALSE(set_core_pattern("@ /tmp/coredump.socket"));
+ ASSERT_FALSE(set_core_pattern("@/tmp/../coredump.socket"));
+ ASSERT_FALSE(set_core_pattern("@../coredump.socket"));
+ ASSERT_FALSE(set_core_pattern("@/tmp/coredump.socket/.."));
+ ASSERT_FALSE(set_core_pattern("@.."));
+
+ ASSERT_FALSE(set_core_pattern("@@ /tmp/coredump.socket"));
+ ASSERT_FALSE(set_core_pattern("@@/tmp/../coredump.socket"));
+ ASSERT_FALSE(set_core_pattern("@@../coredump.socket"));
+ ASSERT_FALSE(set_core_pattern("@@/tmp/coredump.socket/.."));
+ ASSERT_FALSE(set_core_pattern("@@.."));
+
+ ASSERT_FALSE(set_core_pattern("@@@/tmp/coredump.socket"));
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/coredump/coredump_test.h b/tools/testing/selftests/coredump/coredump_test.h
new file mode 100644
index 000000000000..ed47f01fa53c
--- /dev/null
+++ b/tools/testing/selftests/coredump/coredump_test.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __COREDUMP_TEST_H
+#define __COREDUMP_TEST_H
+
+#include <stdbool.h>
+#include <sys/types.h>
+#include <linux/coredump.h>
+
+#include "../kselftest_harness.h"
+#include "../pidfd/pidfd.h"
+
+#ifndef PAGE_SIZE
+#define PAGE_SIZE 4096
+#endif
+
+#define NUM_THREAD_SPAWN 128
+
+/* Coredump fixture */
+FIXTURE(coredump)
+{
+ char original_core_pattern[256];
+ pid_t pid_coredump_server;
+ int fd_tmpfs_detached;
+};
+
+/* Shared helper function declarations */
+void *do_nothing(void *arg);
+void crashing_child(void);
+int create_detached_tmpfs(void);
+int create_and_listen_unix_socket(const char *path);
+bool set_core_pattern(const char *pattern);
+int get_peer_pidfd(int fd);
+bool get_pidfd_info(int fd_peer_pidfd, struct pidfd_info *info);
+
+/* Inline helper that uses harness types */
+static inline void wait_and_check_coredump_server(pid_t pid_coredump_server,
+ struct __test_metadata *const _metadata,
+ FIXTURE_DATA(coredump) *self)
+{
+ int status;
+ waitpid(pid_coredump_server, &status, 0);
+ self->pid_coredump_server = -ESRCH;
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+}
+
+/* Protocol helper function declarations */
+ssize_t recv_marker(int fd);
+bool read_marker(int fd, enum coredump_mark mark);
+bool read_coredump_req(int fd, struct coredump_req *req);
+bool send_coredump_ack(int fd, const struct coredump_req *req,
+ __u64 mask, size_t size_ack);
+bool check_coredump_req(const struct coredump_req *req, size_t min_size,
+ __u64 required_mask);
+int open_coredump_tmpfile(int fd_tmpfs_detached);
+void process_coredump_worker(int fd_coredump, int fd_peer_pidfd, int fd_core_file);
+
+#endif /* __COREDUMP_TEST_H */
diff --git a/tools/testing/selftests/coredump/coredump_test_helpers.c b/tools/testing/selftests/coredump/coredump_test_helpers.c
new file mode 100644
index 000000000000..a6f6d5f2ae07
--- /dev/null
+++ b/tools/testing/selftests/coredump/coredump_test_helpers.c
@@ -0,0 +1,383 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/coredump.h>
+#include <linux/fs.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "../filesystems/wrappers.h"
+#include "../pidfd/pidfd.h"
+
+/* Forward declarations to avoid including harness header */
+struct __test_metadata;
+
+/* Match the fixture definition from coredump_test.h */
+struct _fixture_coredump_data {
+ char original_core_pattern[256];
+ pid_t pid_coredump_server;
+ int fd_tmpfs_detached;
+};
+
+#ifndef PAGE_SIZE
+#define PAGE_SIZE 4096
+#endif
+
+#define NUM_THREAD_SPAWN 128
+
+void *do_nothing(void *arg)
+{
+ (void)arg;
+ while (1)
+ pause();
+
+ return NULL;
+}
+
+void crashing_child(void)
+{
+ pthread_t thread;
+ int i;
+
+ for (i = 0; i < NUM_THREAD_SPAWN; ++i)
+ pthread_create(&thread, NULL, do_nothing, NULL);
+
+ /* crash on purpose */
+ i = *(int *)NULL;
+}
+
+int create_detached_tmpfs(void)
+{
+ int fd_context, fd_tmpfs;
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ if (fd_context < 0)
+ return -1;
+
+ if (sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0) < 0)
+ return -1;
+
+ fd_tmpfs = sys_fsmount(fd_context, 0, 0);
+ close(fd_context);
+ return fd_tmpfs;
+}
+
+int create_and_listen_unix_socket(const char *path)
+{
+ struct sockaddr_un addr = {
+ .sun_family = AF_UNIX,
+ };
+ assert(strlen(path) < sizeof(addr.sun_path) - 1);
+ strncpy(addr.sun_path, path, sizeof(addr.sun_path) - 1);
+ size_t addr_len =
+ offsetof(struct sockaddr_un, sun_path) + strlen(path) + 1;
+ int fd, ret;
+
+ fd = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
+ if (fd < 0)
+ goto out;
+
+ ret = bind(fd, (const struct sockaddr *)&addr, addr_len);
+ if (ret < 0)
+ goto out;
+
+ ret = listen(fd, 128);
+ if (ret < 0)
+ goto out;
+
+ return fd;
+
+out:
+ if (fd >= 0)
+ close(fd);
+ return -1;
+}
+
+bool set_core_pattern(const char *pattern)
+{
+ int fd;
+ ssize_t ret;
+
+ fd = open("/proc/sys/kernel/core_pattern", O_WRONLY | O_CLOEXEC);
+ if (fd < 0)
+ return false;
+
+ ret = write(fd, pattern, strlen(pattern));
+ close(fd);
+ if (ret < 0)
+ return false;
+
+ fprintf(stderr, "Set core_pattern to '%s' | %zu == %zu\n", pattern, ret, strlen(pattern));
+ return ret == strlen(pattern);
+}
+
+int get_peer_pidfd(int fd)
+{
+ int fd_peer_pidfd;
+ socklen_t fd_peer_pidfd_len = sizeof(fd_peer_pidfd);
+ int ret = getsockopt(fd, SOL_SOCKET, SO_PEERPIDFD, &fd_peer_pidfd,
+ &fd_peer_pidfd_len);
+ if (ret < 0) {
+ fprintf(stderr, "get_peer_pidfd: getsockopt(SO_PEERPIDFD) failed: %m\n");
+ return -1;
+ }
+ fprintf(stderr, "get_peer_pidfd: successfully retrieved pidfd %d\n", fd_peer_pidfd);
+ return fd_peer_pidfd;
+}
+
+bool get_pidfd_info(int fd_peer_pidfd, struct pidfd_info *info)
+{
+ int ret;
+ memset(info, 0, sizeof(*info));
+ info->mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP | PIDFD_INFO_COREDUMP_SIGNAL;
+ ret = ioctl(fd_peer_pidfd, PIDFD_GET_INFO, info);
+ if (ret < 0) {
+ fprintf(stderr, "get_pidfd_info: ioctl(PIDFD_GET_INFO) failed: %m\n");
+ return false;
+ }
+ fprintf(stderr, "get_pidfd_info: mask=0x%llx, coredump_mask=0x%x, coredump_signal=%d\n",
+ (unsigned long long)info->mask, info->coredump_mask, info->coredump_signal);
+ return true;
+}
+
+/* Protocol helper functions */
+
+ssize_t recv_marker(int fd)
+{
+ enum coredump_mark mark = COREDUMP_MARK_REQACK;
+ ssize_t ret;
+
+ ret = recv(fd, &mark, sizeof(mark), MSG_WAITALL);
+ if (ret != sizeof(mark))
+ return -1;
+
+ switch (mark) {
+ case COREDUMP_MARK_REQACK:
+ fprintf(stderr, "Received marker: ReqAck\n");
+ return COREDUMP_MARK_REQACK;
+ case COREDUMP_MARK_MINSIZE:
+ fprintf(stderr, "Received marker: MinSize\n");
+ return COREDUMP_MARK_MINSIZE;
+ case COREDUMP_MARK_MAXSIZE:
+ fprintf(stderr, "Received marker: MaxSize\n");
+ return COREDUMP_MARK_MAXSIZE;
+ case COREDUMP_MARK_UNSUPPORTED:
+ fprintf(stderr, "Received marker: Unsupported\n");
+ return COREDUMP_MARK_UNSUPPORTED;
+ case COREDUMP_MARK_CONFLICTING:
+ fprintf(stderr, "Received marker: Conflicting\n");
+ return COREDUMP_MARK_CONFLICTING;
+ default:
+ fprintf(stderr, "Received unknown marker: %u\n", mark);
+ break;
+ }
+ return -1;
+}
+
+bool read_marker(int fd, enum coredump_mark mark)
+{
+ ssize_t ret;
+
+ ret = recv_marker(fd);
+ if (ret < 0)
+ return false;
+ return ret == mark;
+}
+
+bool read_coredump_req(int fd, struct coredump_req *req)
+{
+ ssize_t ret;
+ size_t field_size, user_size, ack_size, kernel_size, remaining_size;
+
+ memset(req, 0, sizeof(*req));
+ field_size = sizeof(req->size);
+
+ /* Peek the size of the coredump request. */
+ ret = recv(fd, req, field_size, MSG_PEEK | MSG_WAITALL);
+ if (ret != field_size) {
+ fprintf(stderr, "read_coredump_req: peek failed (got %zd, expected %zu): %m\n",
+ ret, field_size);
+ return false;
+ }
+ kernel_size = req->size;
+
+ if (kernel_size < COREDUMP_ACK_SIZE_VER0) {
+ fprintf(stderr, "read_coredump_req: kernel_size %zu < min %d\n",
+ kernel_size, COREDUMP_ACK_SIZE_VER0);
+ return false;
+ }
+ if (kernel_size >= PAGE_SIZE) {
+ fprintf(stderr, "read_coredump_req: kernel_size %zu >= PAGE_SIZE %d\n",
+ kernel_size, PAGE_SIZE);
+ return false;
+ }
+
+ /* Use the minimum of user and kernel size to read the full request. */
+ user_size = sizeof(struct coredump_req);
+ ack_size = user_size < kernel_size ? user_size : kernel_size;
+ ret = recv(fd, req, ack_size, MSG_WAITALL);
+ if (ret != ack_size)
+ return false;
+
+ fprintf(stderr, "Read coredump request with size %u and mask 0x%llx\n",
+ req->size, (unsigned long long)req->mask);
+
+ if (user_size > kernel_size)
+ remaining_size = user_size - kernel_size;
+ else
+ remaining_size = kernel_size - user_size;
+
+ if (PAGE_SIZE <= remaining_size)
+ return false;
+
+ /*
+ * Discard any additional data if the kernel's request was larger than
+ * what we knew about or cared about.
+ */
+ if (remaining_size) {
+ char buffer[PAGE_SIZE];
+
+ ret = recv(fd, buffer, sizeof(buffer), MSG_WAITALL);
+ if (ret != remaining_size)
+ return false;
+ fprintf(stderr, "Discarded %zu bytes of data after coredump request\n", remaining_size);
+ }
+
+ return true;
+}
+
+bool send_coredump_ack(int fd, const struct coredump_req *req,
+ __u64 mask, size_t size_ack)
+{
+ ssize_t ret;
+ /*
+ * Wrap struct coredump_ack in a larger struct so we can
+ * simulate sending to much data to the kernel.
+ */
+ struct large_ack_for_size_testing {
+ struct coredump_ack ack;
+ char buffer[PAGE_SIZE];
+ } large_ack = {};
+
+ if (!size_ack)
+ size_ack = sizeof(struct coredump_ack) < req->size_ack ?
+ sizeof(struct coredump_ack) :
+ req->size_ack;
+ large_ack.ack.mask = mask;
+ large_ack.ack.size = size_ack;
+ ret = send(fd, &large_ack, size_ack, MSG_NOSIGNAL);
+ if (ret != size_ack)
+ return false;
+
+ fprintf(stderr, "Sent coredump ack with size %zu and mask 0x%llx\n",
+ size_ack, (unsigned long long)mask);
+ return true;
+}
+
+bool check_coredump_req(const struct coredump_req *req, size_t min_size,
+ __u64 required_mask)
+{
+ if (req->size < min_size)
+ return false;
+ if ((req->mask & required_mask) != required_mask)
+ return false;
+ if (req->mask & ~required_mask)
+ return false;
+ return true;
+}
+
+int open_coredump_tmpfile(int fd_tmpfs_detached)
+{
+ return openat(fd_tmpfs_detached, ".", O_TMPFILE | O_RDWR | O_EXCL, 0600);
+}
+
+void process_coredump_worker(int fd_coredump, int fd_peer_pidfd, int fd_core_file)
+{
+ int epfd = -1;
+ int exit_code = EXIT_FAILURE;
+ struct epoll_event ev;
+ int flags;
+
+ /* Set socket to non-blocking mode for edge-triggered epoll */
+ flags = fcntl(fd_coredump, F_GETFL, 0);
+ if (flags < 0) {
+ fprintf(stderr, "Worker: fcntl(F_GETFL) failed: %m\n");
+ goto out;
+ }
+ if (fcntl(fd_coredump, F_SETFL, flags | O_NONBLOCK) < 0) {
+ fprintf(stderr, "Worker: fcntl(F_SETFL, O_NONBLOCK) failed: %m\n");
+ goto out;
+ }
+
+ epfd = epoll_create1(0);
+ if (epfd < 0) {
+ fprintf(stderr, "Worker: epoll_create1() failed: %m\n");
+ goto out;
+ }
+
+ ev.events = EPOLLIN | EPOLLRDHUP | EPOLLET;
+ ev.data.fd = fd_coredump;
+ if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd_coredump, &ev) < 0) {
+ fprintf(stderr, "Worker: epoll_ctl(EPOLL_CTL_ADD) failed: %m\n");
+ goto out;
+ }
+
+ for (;;) {
+ struct epoll_event events[1];
+ int n = epoll_wait(epfd, events, 1, -1);
+ if (n < 0) {
+ fprintf(stderr, "Worker: epoll_wait() failed: %m\n");
+ break;
+ }
+
+ if (events[0].events & (EPOLLIN | EPOLLRDHUP)) {
+ for (;;) {
+ char buffer[4096];
+ ssize_t bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+ if (bytes_read < 0) {
+ if (errno == EAGAIN || errno == EWOULDBLOCK)
+ break;
+ fprintf(stderr, "Worker: read() failed: %m\n");
+ goto out;
+ }
+ if (bytes_read == 0)
+ goto done;
+ ssize_t bytes_write = write(fd_core_file, buffer, bytes_read);
+ if (bytes_write != bytes_read) {
+ if (bytes_write < 0 && errno == ENOSPC)
+ continue;
+ fprintf(stderr, "Worker: write() failed (read=%zd, write=%zd): %m\n",
+ bytes_read, bytes_write);
+ goto out;
+ }
+ }
+ }
+ }
+
+done:
+ exit_code = EXIT_SUCCESS;
+ fprintf(stderr, "Worker: completed successfully\n");
+out:
+ if (epfd >= 0)
+ close(epfd);
+ if (fd_core_file >= 0)
+ close(fd_core_file);
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ _exit(exit_code);
+}
diff --git a/tools/testing/selftests/coredump/stackdump_test.c b/tools/testing/selftests/coredump/stackdump_test.c
index a4ac80bb1003..c2e895bcc160 100644
--- a/tools/testing/selftests/coredump/stackdump_test.c
+++ b/tools/testing/selftests/coredump/stackdump_test.c
@@ -23,57 +23,15 @@
#include "../filesystems/wrappers.h"
#include "../pidfd/pidfd.h"
+#include "coredump_test.h"
+
#define STACKDUMP_FILE "stack_values"
#define STACKDUMP_SCRIPT "stackdump"
-#define NUM_THREAD_SPAWN 128
#ifndef PAGE_SIZE
#define PAGE_SIZE 4096
#endif
-static void *do_nothing(void *)
-{
- while (1)
- pause();
-
- return NULL;
-}
-
-static void crashing_child(void)
-{
- pthread_t thread;
- int i;
-
- for (i = 0; i < NUM_THREAD_SPAWN; ++i)
- pthread_create(&thread, NULL, do_nothing, NULL);
-
- /* crash on purpose */
- i = *(int *)NULL;
-}
-
-FIXTURE(coredump)
-{
- char original_core_pattern[256];
- pid_t pid_coredump_server;
- int fd_tmpfs_detached;
-};
-
-static int create_detached_tmpfs(void)
-{
- int fd_context, fd_tmpfs;
-
- fd_context = sys_fsopen("tmpfs", 0);
- if (fd_context < 0)
- return -1;
-
- if (sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0) < 0)
- return -1;
-
- fd_tmpfs = sys_fsmount(fd_context, 0, 0);
- close(fd_context);
- return fd_tmpfs;
-}
-
FIXTURE_SETUP(coredump)
{
FILE *file;
@@ -208,1620 +166,4 @@ TEST_F_TIMEOUT(coredump, stackdump, 120)
fclose(file);
}
-static int create_and_listen_unix_socket(const char *path)
-{
- struct sockaddr_un addr = {
- .sun_family = AF_UNIX,
- };
- assert(strlen(path) < sizeof(addr.sun_path) - 1);
- strncpy(addr.sun_path, path, sizeof(addr.sun_path) - 1);
- size_t addr_len =
- offsetof(struct sockaddr_un, sun_path) + strlen(path) + 1;
- int fd, ret;
-
- fd = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
- if (fd < 0)
- goto out;
-
- ret = bind(fd, (const struct sockaddr *)&addr, addr_len);
- if (ret < 0)
- goto out;
-
- ret = listen(fd, 128);
- if (ret < 0)
- goto out;
-
- return fd;
-
-out:
- if (fd >= 0)
- close(fd);
- return -1;
-}
-
-static bool set_core_pattern(const char *pattern)
-{
- int fd;
- ssize_t ret;
-
- fd = open("/proc/sys/kernel/core_pattern", O_WRONLY | O_CLOEXEC);
- if (fd < 0)
- return false;
-
- ret = write(fd, pattern, strlen(pattern));
- close(fd);
- if (ret < 0)
- return false;
-
- fprintf(stderr, "Set core_pattern to '%s' | %zu == %zu\n", pattern, ret, strlen(pattern));
- return ret == strlen(pattern);
-}
-
-static int get_peer_pidfd(int fd)
-{
- int fd_peer_pidfd;
- socklen_t fd_peer_pidfd_len = sizeof(fd_peer_pidfd);
- int ret = getsockopt(fd, SOL_SOCKET, SO_PEERPIDFD, &fd_peer_pidfd,
- &fd_peer_pidfd_len);
- if (ret < 0) {
- fprintf(stderr, "%m - Failed to retrieve peer pidfd for coredump socket connection\n");
- return -1;
- }
- return fd_peer_pidfd;
-}
-
-static bool get_pidfd_info(int fd_peer_pidfd, struct pidfd_info *info)
-{
- memset(info, 0, sizeof(*info));
- info->mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP;
- return ioctl(fd_peer_pidfd, PIDFD_GET_INFO, info) == 0;
-}
-
-static void
-wait_and_check_coredump_server(pid_t pid_coredump_server,
- struct __test_metadata *const _metadata,
- FIXTURE_DATA(coredump)* self)
-{
- int status;
- waitpid(pid_coredump_server, &status, 0);
- self->pid_coredump_server = -ESRCH;
- ASSERT_TRUE(WIFEXITED(status));
- ASSERT_EQ(WEXITSTATUS(status), 0);
-}
-
-TEST_F(coredump, socket)
-{
- int pidfd, ret, status;
- pid_t pid, pid_coredump_server;
- struct stat st;
- struct pidfd_info info = {};
- int ipc_sockets[2];
- char c;
-
- ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket"));
-
- ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
- ASSERT_EQ(ret, 0);
-
- pid_coredump_server = fork();
- ASSERT_GE(pid_coredump_server, 0);
- if (pid_coredump_server == 0) {
- int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1, fd_core_file = -1;
- int exit_code = EXIT_FAILURE;
-
- close(ipc_sockets[0]);
-
- fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
- if (fd_server < 0)
- goto out;
-
- if (write_nointr(ipc_sockets[1], "1", 1) < 0)
- goto out;
-
- close(ipc_sockets[1]);
-
- fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
- if (fd_coredump < 0)
- goto out;
-
- fd_peer_pidfd = get_peer_pidfd(fd_coredump);
- if (fd_peer_pidfd < 0)
- goto out;
-
- if (!get_pidfd_info(fd_peer_pidfd, &info))
- goto out;
-
- if (!(info.mask & PIDFD_INFO_COREDUMP))
- goto out;
-
- if (!(info.coredump_mask & PIDFD_COREDUMPED))
- goto out;
-
- fd_core_file = creat("/tmp/coredump.file", 0644);
- if (fd_core_file < 0)
- goto out;
-
- for (;;) {
- char buffer[4096];
- ssize_t bytes_read, bytes_write;
-
- bytes_read = read(fd_coredump, buffer, sizeof(buffer));
- if (bytes_read < 0)
- goto out;
-
- if (bytes_read == 0)
- break;
-
- bytes_write = write(fd_core_file, buffer, bytes_read);
- if (bytes_read != bytes_write)
- goto out;
- }
-
- exit_code = EXIT_SUCCESS;
-out:
- if (fd_core_file >= 0)
- close(fd_core_file);
- if (fd_peer_pidfd >= 0)
- close(fd_peer_pidfd);
- if (fd_coredump >= 0)
- close(fd_coredump);
- if (fd_server >= 0)
- close(fd_server);
- _exit(exit_code);
- }
- self->pid_coredump_server = pid_coredump_server;
-
- EXPECT_EQ(close(ipc_sockets[1]), 0);
- ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
- EXPECT_EQ(close(ipc_sockets[0]), 0);
-
- pid = fork();
- ASSERT_GE(pid, 0);
- if (pid == 0)
- crashing_child();
-
- pidfd = sys_pidfd_open(pid, 0);
- ASSERT_GE(pidfd, 0);
-
- waitpid(pid, &status, 0);
- ASSERT_TRUE(WIFSIGNALED(status));
- ASSERT_TRUE(WCOREDUMP(status));
-
- ASSERT_TRUE(get_pidfd_info(pidfd, &info));
- ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
- ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
-
- wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
-
- ASSERT_EQ(stat("/tmp/coredump.file", &st), 0);
- ASSERT_GT(st.st_size, 0);
- system("file /tmp/coredump.file");
-}
-
-TEST_F(coredump, socket_detect_userspace_client)
-{
- int pidfd, ret, status;
- pid_t pid, pid_coredump_server;
- struct stat st;
- struct pidfd_info info = {};
- int ipc_sockets[2];
- char c;
-
- ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket"));
-
- ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
- ASSERT_EQ(ret, 0);
-
- pid_coredump_server = fork();
- ASSERT_GE(pid_coredump_server, 0);
- if (pid_coredump_server == 0) {
- int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
- int exit_code = EXIT_FAILURE;
-
- close(ipc_sockets[0]);
-
- fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
- if (fd_server < 0)
- goto out;
-
- if (write_nointr(ipc_sockets[1], "1", 1) < 0)
- goto out;
-
- close(ipc_sockets[1]);
-
- fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
- if (fd_coredump < 0)
- goto out;
-
- fd_peer_pidfd = get_peer_pidfd(fd_coredump);
- if (fd_peer_pidfd < 0)
- goto out;
-
- if (!get_pidfd_info(fd_peer_pidfd, &info))
- goto out;
-
- if (!(info.mask & PIDFD_INFO_COREDUMP))
- goto out;
-
- if (info.coredump_mask & PIDFD_COREDUMPED)
- goto out;
-
- exit_code = EXIT_SUCCESS;
-out:
- if (fd_peer_pidfd >= 0)
- close(fd_peer_pidfd);
- if (fd_coredump >= 0)
- close(fd_coredump);
- if (fd_server >= 0)
- close(fd_server);
- _exit(exit_code);
- }
- self->pid_coredump_server = pid_coredump_server;
-
- EXPECT_EQ(close(ipc_sockets[1]), 0);
- ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
- EXPECT_EQ(close(ipc_sockets[0]), 0);
-
- pid = fork();
- ASSERT_GE(pid, 0);
- if (pid == 0) {
- int fd_socket;
- ssize_t ret;
- const struct sockaddr_un coredump_sk = {
- .sun_family = AF_UNIX,
- .sun_path = "/tmp/coredump.socket",
- };
- size_t coredump_sk_len =
- offsetof(struct sockaddr_un, sun_path) +
- sizeof("/tmp/coredump.socket");
-
- fd_socket = socket(AF_UNIX, SOCK_STREAM, 0);
- if (fd_socket < 0)
- _exit(EXIT_FAILURE);
-
- ret = connect(fd_socket, (const struct sockaddr *)&coredump_sk, coredump_sk_len);
- if (ret < 0)
- _exit(EXIT_FAILURE);
-
- close(fd_socket);
- _exit(EXIT_SUCCESS);
- }
-
- pidfd = sys_pidfd_open(pid, 0);
- ASSERT_GE(pidfd, 0);
-
- waitpid(pid, &status, 0);
- ASSERT_TRUE(WIFEXITED(status));
- ASSERT_EQ(WEXITSTATUS(status), 0);
-
- ASSERT_TRUE(get_pidfd_info(pidfd, &info));
- ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
- ASSERT_EQ((info.coredump_mask & PIDFD_COREDUMPED), 0);
-
- wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
-
- ASSERT_NE(stat("/tmp/coredump.file", &st), 0);
- ASSERT_EQ(errno, ENOENT);
-}
-
-TEST_F(coredump, socket_enoent)
-{
- int pidfd, status;
- pid_t pid;
-
- ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket"));
-
- pid = fork();
- ASSERT_GE(pid, 0);
- if (pid == 0)
- crashing_child();
-
- pidfd = sys_pidfd_open(pid, 0);
- ASSERT_GE(pidfd, 0);
-
- waitpid(pid, &status, 0);
- ASSERT_TRUE(WIFSIGNALED(status));
- ASSERT_FALSE(WCOREDUMP(status));
-}
-
-TEST_F(coredump, socket_no_listener)
-{
- int pidfd, ret, status;
- pid_t pid, pid_coredump_server;
- int ipc_sockets[2];
- char c;
- const struct sockaddr_un coredump_sk = {
- .sun_family = AF_UNIX,
- .sun_path = "/tmp/coredump.socket",
- };
- size_t coredump_sk_len = offsetof(struct sockaddr_un, sun_path) +
- sizeof("/tmp/coredump.socket");
-
- ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket"));
-
- ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
- ASSERT_EQ(ret, 0);
-
- pid_coredump_server = fork();
- ASSERT_GE(pid_coredump_server, 0);
- if (pid_coredump_server == 0) {
- int fd_server = -1;
- int exit_code = EXIT_FAILURE;
-
- close(ipc_sockets[0]);
-
- fd_server = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
- if (fd_server < 0)
- goto out;
-
- ret = bind(fd_server, (const struct sockaddr *)&coredump_sk, coredump_sk_len);
- if (ret < 0)
- goto out;
-
- if (write_nointr(ipc_sockets[1], "1", 1) < 0)
- goto out;
-
- exit_code = EXIT_SUCCESS;
-out:
- if (fd_server >= 0)
- close(fd_server);
- close(ipc_sockets[1]);
- _exit(exit_code);
- }
- self->pid_coredump_server = pid_coredump_server;
-
- EXPECT_EQ(close(ipc_sockets[1]), 0);
- ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
- EXPECT_EQ(close(ipc_sockets[0]), 0);
-
- pid = fork();
- ASSERT_GE(pid, 0);
- if (pid == 0)
- crashing_child();
-
- pidfd = sys_pidfd_open(pid, 0);
- ASSERT_GE(pidfd, 0);
-
- waitpid(pid, &status, 0);
- ASSERT_TRUE(WIFSIGNALED(status));
- ASSERT_FALSE(WCOREDUMP(status));
-
- wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
-}
-
-static ssize_t recv_marker(int fd)
-{
- enum coredump_mark mark = COREDUMP_MARK_REQACK;
- ssize_t ret;
-
- ret = recv(fd, &mark, sizeof(mark), MSG_WAITALL);
- if (ret != sizeof(mark))
- return -1;
-
- switch (mark) {
- case COREDUMP_MARK_REQACK:
- fprintf(stderr, "Received marker: ReqAck\n");
- return COREDUMP_MARK_REQACK;
- case COREDUMP_MARK_MINSIZE:
- fprintf(stderr, "Received marker: MinSize\n");
- return COREDUMP_MARK_MINSIZE;
- case COREDUMP_MARK_MAXSIZE:
- fprintf(stderr, "Received marker: MaxSize\n");
- return COREDUMP_MARK_MAXSIZE;
- case COREDUMP_MARK_UNSUPPORTED:
- fprintf(stderr, "Received marker: Unsupported\n");
- return COREDUMP_MARK_UNSUPPORTED;
- case COREDUMP_MARK_CONFLICTING:
- fprintf(stderr, "Received marker: Conflicting\n");
- return COREDUMP_MARK_CONFLICTING;
- default:
- fprintf(stderr, "Received unknown marker: %u\n", mark);
- break;
- }
- return -1;
-}
-
-static bool read_marker(int fd, enum coredump_mark mark)
-{
- ssize_t ret;
-
- ret = recv_marker(fd);
- if (ret < 0)
- return false;
- return ret == mark;
-}
-
-static bool read_coredump_req(int fd, struct coredump_req *req)
-{
- ssize_t ret;
- size_t field_size, user_size, ack_size, kernel_size, remaining_size;
-
- memset(req, 0, sizeof(*req));
- field_size = sizeof(req->size);
-
- /* Peek the size of the coredump request. */
- ret = recv(fd, req, field_size, MSG_PEEK | MSG_WAITALL);
- if (ret != field_size)
- return false;
- kernel_size = req->size;
-
- if (kernel_size < COREDUMP_ACK_SIZE_VER0)
- return false;
- if (kernel_size >= PAGE_SIZE)
- return false;
-
- /* Use the minimum of user and kernel size to read the full request. */
- user_size = sizeof(struct coredump_req);
- ack_size = user_size < kernel_size ? user_size : kernel_size;
- ret = recv(fd, req, ack_size, MSG_WAITALL);
- if (ret != ack_size)
- return false;
-
- fprintf(stderr, "Read coredump request with size %u and mask 0x%llx\n",
- req->size, (unsigned long long)req->mask);
-
- if (user_size > kernel_size)
- remaining_size = user_size - kernel_size;
- else
- remaining_size = kernel_size - user_size;
-
- if (PAGE_SIZE <= remaining_size)
- return false;
-
- /*
- * Discard any additional data if the kernel's request was larger than
- * what we knew about or cared about.
- */
- if (remaining_size) {
- char buffer[PAGE_SIZE];
-
- ret = recv(fd, buffer, sizeof(buffer), MSG_WAITALL);
- if (ret != remaining_size)
- return false;
- fprintf(stderr, "Discarded %zu bytes of data after coredump request\n", remaining_size);
- }
-
- return true;
-}
-
-static bool send_coredump_ack(int fd, const struct coredump_req *req,
- __u64 mask, size_t size_ack)
-{
- ssize_t ret;
- /*
- * Wrap struct coredump_ack in a larger struct so we can
- * simulate sending to much data to the kernel.
- */
- struct large_ack_for_size_testing {
- struct coredump_ack ack;
- char buffer[PAGE_SIZE];
- } large_ack = {};
-
- if (!size_ack)
- size_ack = sizeof(struct coredump_ack) < req->size_ack ?
- sizeof(struct coredump_ack) :
- req->size_ack;
- large_ack.ack.mask = mask;
- large_ack.ack.size = size_ack;
- ret = send(fd, &large_ack, size_ack, MSG_NOSIGNAL);
- if (ret != size_ack)
- return false;
-
- fprintf(stderr, "Sent coredump ack with size %zu and mask 0x%llx\n",
- size_ack, (unsigned long long)mask);
- return true;
-}
-
-static bool check_coredump_req(const struct coredump_req *req, size_t min_size,
- __u64 required_mask)
-{
- if (req->size < min_size)
- return false;
- if ((req->mask & required_mask) != required_mask)
- return false;
- if (req->mask & ~required_mask)
- return false;
- return true;
-}
-
-TEST_F(coredump, socket_request_kernel)
-{
- int pidfd, ret, status;
- pid_t pid, pid_coredump_server;
- struct stat st;
- struct pidfd_info info = {};
- int ipc_sockets[2];
- char c;
-
- ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
-
- ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
- ASSERT_EQ(ret, 0);
-
- pid_coredump_server = fork();
- ASSERT_GE(pid_coredump_server, 0);
- if (pid_coredump_server == 0) {
- struct coredump_req req = {};
- int fd_server = -1, fd_coredump = -1, fd_core_file = -1, fd_peer_pidfd = -1;
- int exit_code = EXIT_FAILURE;
-
- close(ipc_sockets[0]);
-
- fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
- if (fd_server < 0)
- goto out;
-
- if (write_nointr(ipc_sockets[1], "1", 1) < 0)
- goto out;
-
- close(ipc_sockets[1]);
-
- fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
- if (fd_coredump < 0)
- goto out;
-
- fd_peer_pidfd = get_peer_pidfd(fd_coredump);
- if (fd_peer_pidfd < 0)
- goto out;
-
- if (!get_pidfd_info(fd_peer_pidfd, &info))
- goto out;
-
- if (!(info.mask & PIDFD_INFO_COREDUMP))
- goto out;
-
- if (!(info.coredump_mask & PIDFD_COREDUMPED))
- goto out;
-
- fd_core_file = creat("/tmp/coredump.file", 0644);
- if (fd_core_file < 0)
- goto out;
-
- if (!read_coredump_req(fd_coredump, &req))
- goto out;
-
- if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
- COREDUMP_KERNEL | COREDUMP_USERSPACE |
- COREDUMP_REJECT | COREDUMP_WAIT))
- goto out;
-
- if (!send_coredump_ack(fd_coredump, &req,
- COREDUMP_KERNEL | COREDUMP_WAIT, 0))
- goto out;
-
- if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK))
- goto out;
-
- for (;;) {
- char buffer[4096];
- ssize_t bytes_read, bytes_write;
-
- bytes_read = read(fd_coredump, buffer, sizeof(buffer));
- if (bytes_read < 0)
- goto out;
-
- if (bytes_read == 0)
- break;
-
- bytes_write = write(fd_core_file, buffer, bytes_read);
- if (bytes_read != bytes_write)
- goto out;
- }
-
- exit_code = EXIT_SUCCESS;
-out:
- if (fd_core_file >= 0)
- close(fd_core_file);
- if (fd_peer_pidfd >= 0)
- close(fd_peer_pidfd);
- if (fd_coredump >= 0)
- close(fd_coredump);
- if (fd_server >= 0)
- close(fd_server);
- _exit(exit_code);
- }
- self->pid_coredump_server = pid_coredump_server;
-
- EXPECT_EQ(close(ipc_sockets[1]), 0);
- ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
- EXPECT_EQ(close(ipc_sockets[0]), 0);
-
- pid = fork();
- ASSERT_GE(pid, 0);
- if (pid == 0)
- crashing_child();
-
- pidfd = sys_pidfd_open(pid, 0);
- ASSERT_GE(pidfd, 0);
-
- waitpid(pid, &status, 0);
- ASSERT_TRUE(WIFSIGNALED(status));
- ASSERT_TRUE(WCOREDUMP(status));
-
- ASSERT_TRUE(get_pidfd_info(pidfd, &info));
- ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
- ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
-
- wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
-
- ASSERT_EQ(stat("/tmp/coredump.file", &st), 0);
- ASSERT_GT(st.st_size, 0);
- system("file /tmp/coredump.file");
-}
-
-TEST_F(coredump, socket_request_userspace)
-{
- int pidfd, ret, status;
- pid_t pid, pid_coredump_server;
- struct pidfd_info info = {};
- int ipc_sockets[2];
- char c;
-
- ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
-
- ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
- ASSERT_EQ(ret, 0);
-
- pid_coredump_server = fork();
- ASSERT_GE(pid_coredump_server, 0);
- if (pid_coredump_server == 0) {
- struct coredump_req req = {};
- int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
- int exit_code = EXIT_FAILURE;
-
- close(ipc_sockets[0]);
-
- fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
- if (fd_server < 0)
- goto out;
-
- if (write_nointr(ipc_sockets[1], "1", 1) < 0)
- goto out;
-
- close(ipc_sockets[1]);
-
- fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
- if (fd_coredump < 0)
- goto out;
-
- fd_peer_pidfd = get_peer_pidfd(fd_coredump);
- if (fd_peer_pidfd < 0)
- goto out;
-
- if (!get_pidfd_info(fd_peer_pidfd, &info))
- goto out;
-
- if (!(info.mask & PIDFD_INFO_COREDUMP))
- goto out;
-
- if (!(info.coredump_mask & PIDFD_COREDUMPED))
- goto out;
-
- if (!read_coredump_req(fd_coredump, &req))
- goto out;
-
- if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
- COREDUMP_KERNEL | COREDUMP_USERSPACE |
- COREDUMP_REJECT | COREDUMP_WAIT))
- goto out;
-
- if (!send_coredump_ack(fd_coredump, &req,
- COREDUMP_USERSPACE | COREDUMP_WAIT, 0))
- goto out;
-
- if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK))
- goto out;
-
- for (;;) {
- char buffer[4096];
- ssize_t bytes_read;
-
- bytes_read = read(fd_coredump, buffer, sizeof(buffer));
- if (bytes_read > 0)
- goto out;
-
- if (bytes_read < 0)
- goto out;
-
- if (bytes_read == 0)
- break;
- }
-
- exit_code = EXIT_SUCCESS;
-out:
- if (fd_peer_pidfd >= 0)
- close(fd_peer_pidfd);
- if (fd_coredump >= 0)
- close(fd_coredump);
- if (fd_server >= 0)
- close(fd_server);
- _exit(exit_code);
- }
- self->pid_coredump_server = pid_coredump_server;
-
- EXPECT_EQ(close(ipc_sockets[1]), 0);
- ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
- EXPECT_EQ(close(ipc_sockets[0]), 0);
-
- pid = fork();
- ASSERT_GE(pid, 0);
- if (pid == 0)
- crashing_child();
-
- pidfd = sys_pidfd_open(pid, 0);
- ASSERT_GE(pidfd, 0);
-
- waitpid(pid, &status, 0);
- ASSERT_TRUE(WIFSIGNALED(status));
- ASSERT_TRUE(WCOREDUMP(status));
-
- ASSERT_TRUE(get_pidfd_info(pidfd, &info));
- ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
- ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
-
- wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
-}
-
-TEST_F(coredump, socket_request_reject)
-{
- int pidfd, ret, status;
- pid_t pid, pid_coredump_server;
- struct pidfd_info info = {};
- int ipc_sockets[2];
- char c;
-
- ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
-
- ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
- ASSERT_EQ(ret, 0);
-
- pid_coredump_server = fork();
- ASSERT_GE(pid_coredump_server, 0);
- if (pid_coredump_server == 0) {
- struct coredump_req req = {};
- int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
- int exit_code = EXIT_FAILURE;
-
- close(ipc_sockets[0]);
-
- fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
- if (fd_server < 0)
- goto out;
-
- if (write_nointr(ipc_sockets[1], "1", 1) < 0)
- goto out;
-
- close(ipc_sockets[1]);
-
- fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
- if (fd_coredump < 0)
- goto out;
-
- fd_peer_pidfd = get_peer_pidfd(fd_coredump);
- if (fd_peer_pidfd < 0)
- goto out;
-
- if (!get_pidfd_info(fd_peer_pidfd, &info))
- goto out;
-
- if (!(info.mask & PIDFD_INFO_COREDUMP))
- goto out;
-
- if (!(info.coredump_mask & PIDFD_COREDUMPED))
- goto out;
-
- if (!read_coredump_req(fd_coredump, &req))
- goto out;
-
- if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
- COREDUMP_KERNEL | COREDUMP_USERSPACE |
- COREDUMP_REJECT | COREDUMP_WAIT))
- goto out;
-
- if (!send_coredump_ack(fd_coredump, &req,
- COREDUMP_REJECT | COREDUMP_WAIT, 0))
- goto out;
-
- if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK))
- goto out;
-
- for (;;) {
- char buffer[4096];
- ssize_t bytes_read;
-
- bytes_read = read(fd_coredump, buffer, sizeof(buffer));
- if (bytes_read > 0)
- goto out;
-
- if (bytes_read < 0)
- goto out;
-
- if (bytes_read == 0)
- break;
- }
-
- exit_code = EXIT_SUCCESS;
-out:
- if (fd_peer_pidfd >= 0)
- close(fd_peer_pidfd);
- if (fd_coredump >= 0)
- close(fd_coredump);
- if (fd_server >= 0)
- close(fd_server);
- _exit(exit_code);
- }
- self->pid_coredump_server = pid_coredump_server;
-
- EXPECT_EQ(close(ipc_sockets[1]), 0);
- ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
- EXPECT_EQ(close(ipc_sockets[0]), 0);
-
- pid = fork();
- ASSERT_GE(pid, 0);
- if (pid == 0)
- crashing_child();
-
- pidfd = sys_pidfd_open(pid, 0);
- ASSERT_GE(pidfd, 0);
-
- waitpid(pid, &status, 0);
- ASSERT_TRUE(WIFSIGNALED(status));
- ASSERT_FALSE(WCOREDUMP(status));
-
- ASSERT_TRUE(get_pidfd_info(pidfd, &info));
- ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
- ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
-
- wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
-}
-
-TEST_F(coredump, socket_request_invalid_flag_combination)
-{
- int pidfd, ret, status;
- pid_t pid, pid_coredump_server;
- struct pidfd_info info = {};
- int ipc_sockets[2];
- char c;
-
- ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
-
- ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
- ASSERT_EQ(ret, 0);
-
- pid_coredump_server = fork();
- ASSERT_GE(pid_coredump_server, 0);
- if (pid_coredump_server == 0) {
- struct coredump_req req = {};
- int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
- int exit_code = EXIT_FAILURE;
-
- close(ipc_sockets[0]);
-
- fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
- if (fd_server < 0)
- goto out;
-
- if (write_nointr(ipc_sockets[1], "1", 1) < 0)
- goto out;
-
- close(ipc_sockets[1]);
-
- fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
- if (fd_coredump < 0)
- goto out;
-
- fd_peer_pidfd = get_peer_pidfd(fd_coredump);
- if (fd_peer_pidfd < 0)
- goto out;
-
- if (!get_pidfd_info(fd_peer_pidfd, &info))
- goto out;
-
- if (!(info.mask & PIDFD_INFO_COREDUMP))
- goto out;
-
- if (!(info.coredump_mask & PIDFD_COREDUMPED))
- goto out;
-
- if (!read_coredump_req(fd_coredump, &req))
- goto out;
-
- if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
- COREDUMP_KERNEL | COREDUMP_USERSPACE |
- COREDUMP_REJECT | COREDUMP_WAIT))
- goto out;
-
- if (!send_coredump_ack(fd_coredump, &req,
- COREDUMP_KERNEL | COREDUMP_REJECT | COREDUMP_WAIT, 0))
- goto out;
-
- if (!read_marker(fd_coredump, COREDUMP_MARK_CONFLICTING))
- goto out;
-
- exit_code = EXIT_SUCCESS;
-out:
- if (fd_peer_pidfd >= 0)
- close(fd_peer_pidfd);
- if (fd_coredump >= 0)
- close(fd_coredump);
- if (fd_server >= 0)
- close(fd_server);
- _exit(exit_code);
- }
- self->pid_coredump_server = pid_coredump_server;
-
- EXPECT_EQ(close(ipc_sockets[1]), 0);
- ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
- EXPECT_EQ(close(ipc_sockets[0]), 0);
-
- pid = fork();
- ASSERT_GE(pid, 0);
- if (pid == 0)
- crashing_child();
-
- pidfd = sys_pidfd_open(pid, 0);
- ASSERT_GE(pidfd, 0);
-
- waitpid(pid, &status, 0);
- ASSERT_TRUE(WIFSIGNALED(status));
- ASSERT_FALSE(WCOREDUMP(status));
-
- ASSERT_TRUE(get_pidfd_info(pidfd, &info));
- ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
- ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
-
- wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
-}
-
-TEST_F(coredump, socket_request_unknown_flag)
-{
- int pidfd, ret, status;
- pid_t pid, pid_coredump_server;
- struct pidfd_info info = {};
- int ipc_sockets[2];
- char c;
-
- ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
-
- ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
- ASSERT_EQ(ret, 0);
-
- pid_coredump_server = fork();
- ASSERT_GE(pid_coredump_server, 0);
- if (pid_coredump_server == 0) {
- struct coredump_req req = {};
- int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
- int exit_code = EXIT_FAILURE;
-
- close(ipc_sockets[0]);
-
- fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
- if (fd_server < 0)
- goto out;
-
- if (write_nointr(ipc_sockets[1], "1", 1) < 0)
- goto out;
-
- close(ipc_sockets[1]);
-
- fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
- if (fd_coredump < 0)
- goto out;
-
- fd_peer_pidfd = get_peer_pidfd(fd_coredump);
- if (fd_peer_pidfd < 0)
- goto out;
-
- if (!get_pidfd_info(fd_peer_pidfd, &info))
- goto out;
-
- if (!(info.mask & PIDFD_INFO_COREDUMP))
- goto out;
-
- if (!(info.coredump_mask & PIDFD_COREDUMPED))
- goto out;
-
- if (!read_coredump_req(fd_coredump, &req))
- goto out;
-
- if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
- COREDUMP_KERNEL | COREDUMP_USERSPACE |
- COREDUMP_REJECT | COREDUMP_WAIT))
- goto out;
-
- if (!send_coredump_ack(fd_coredump, &req, (1ULL << 63), 0))
- goto out;
-
- if (!read_marker(fd_coredump, COREDUMP_MARK_UNSUPPORTED))
- goto out;
-
- exit_code = EXIT_SUCCESS;
-out:
- if (fd_peer_pidfd >= 0)
- close(fd_peer_pidfd);
- if (fd_coredump >= 0)
- close(fd_coredump);
- if (fd_server >= 0)
- close(fd_server);
- _exit(exit_code);
- }
- self->pid_coredump_server = pid_coredump_server;
-
- EXPECT_EQ(close(ipc_sockets[1]), 0);
- ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
- EXPECT_EQ(close(ipc_sockets[0]), 0);
-
- pid = fork();
- ASSERT_GE(pid, 0);
- if (pid == 0)
- crashing_child();
-
- pidfd = sys_pidfd_open(pid, 0);
- ASSERT_GE(pidfd, 0);
-
- waitpid(pid, &status, 0);
- ASSERT_TRUE(WIFSIGNALED(status));
- ASSERT_FALSE(WCOREDUMP(status));
-
- ASSERT_TRUE(get_pidfd_info(pidfd, &info));
- ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
- ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
-
- wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
-}
-
-TEST_F(coredump, socket_request_invalid_size_small)
-{
- int pidfd, ret, status;
- pid_t pid, pid_coredump_server;
- struct pidfd_info info = {};
- int ipc_sockets[2];
- char c;
-
- ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
-
- ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
- ASSERT_EQ(ret, 0);
-
- pid_coredump_server = fork();
- ASSERT_GE(pid_coredump_server, 0);
- if (pid_coredump_server == 0) {
- struct coredump_req req = {};
- int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
- int exit_code = EXIT_FAILURE;
-
- close(ipc_sockets[0]);
-
- fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
- if (fd_server < 0)
- goto out;
-
- if (write_nointr(ipc_sockets[1], "1", 1) < 0)
- goto out;
-
- close(ipc_sockets[1]);
-
- fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
- if (fd_coredump < 0)
- goto out;
-
- fd_peer_pidfd = get_peer_pidfd(fd_coredump);
- if (fd_peer_pidfd < 0)
- goto out;
-
- if (!get_pidfd_info(fd_peer_pidfd, &info))
- goto out;
-
- if (!(info.mask & PIDFD_INFO_COREDUMP))
- goto out;
-
- if (!(info.coredump_mask & PIDFD_COREDUMPED))
- goto out;
-
- if (!read_coredump_req(fd_coredump, &req))
- goto out;
-
- if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
- COREDUMP_KERNEL | COREDUMP_USERSPACE |
- COREDUMP_REJECT | COREDUMP_WAIT))
- goto out;
-
- if (!send_coredump_ack(fd_coredump, &req,
- COREDUMP_REJECT | COREDUMP_WAIT,
- COREDUMP_ACK_SIZE_VER0 / 2))
- goto out;
-
- if (!read_marker(fd_coredump, COREDUMP_MARK_MINSIZE))
- goto out;
-
- exit_code = EXIT_SUCCESS;
-out:
- if (fd_peer_pidfd >= 0)
- close(fd_peer_pidfd);
- if (fd_coredump >= 0)
- close(fd_coredump);
- if (fd_server >= 0)
- close(fd_server);
- _exit(exit_code);
- }
- self->pid_coredump_server = pid_coredump_server;
-
- EXPECT_EQ(close(ipc_sockets[1]), 0);
- ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
- EXPECT_EQ(close(ipc_sockets[0]), 0);
-
- pid = fork();
- ASSERT_GE(pid, 0);
- if (pid == 0)
- crashing_child();
-
- pidfd = sys_pidfd_open(pid, 0);
- ASSERT_GE(pidfd, 0);
-
- waitpid(pid, &status, 0);
- ASSERT_TRUE(WIFSIGNALED(status));
- ASSERT_FALSE(WCOREDUMP(status));
-
- ASSERT_TRUE(get_pidfd_info(pidfd, &info));
- ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
- ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
-
- wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
-}
-
-TEST_F(coredump, socket_request_invalid_size_large)
-{
- int pidfd, ret, status;
- pid_t pid, pid_coredump_server;
- struct pidfd_info info = {};
- int ipc_sockets[2];
- char c;
-
- ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
-
- ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
- ASSERT_EQ(ret, 0);
-
- pid_coredump_server = fork();
- ASSERT_GE(pid_coredump_server, 0);
- if (pid_coredump_server == 0) {
- struct coredump_req req = {};
- int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
- int exit_code = EXIT_FAILURE;
-
- close(ipc_sockets[0]);
-
- fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
- if (fd_server < 0)
- goto out;
-
- if (write_nointr(ipc_sockets[1], "1", 1) < 0)
- goto out;
-
- close(ipc_sockets[1]);
-
- fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
- if (fd_coredump < 0)
- goto out;
-
- fd_peer_pidfd = get_peer_pidfd(fd_coredump);
- if (fd_peer_pidfd < 0)
- goto out;
-
- if (!get_pidfd_info(fd_peer_pidfd, &info))
- goto out;
-
- if (!(info.mask & PIDFD_INFO_COREDUMP))
- goto out;
-
- if (!(info.coredump_mask & PIDFD_COREDUMPED))
- goto out;
-
- if (!read_coredump_req(fd_coredump, &req))
- goto out;
-
- if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
- COREDUMP_KERNEL | COREDUMP_USERSPACE |
- COREDUMP_REJECT | COREDUMP_WAIT))
- goto out;
-
- if (!send_coredump_ack(fd_coredump, &req,
- COREDUMP_REJECT | COREDUMP_WAIT,
- COREDUMP_ACK_SIZE_VER0 + PAGE_SIZE))
- goto out;
-
- if (!read_marker(fd_coredump, COREDUMP_MARK_MAXSIZE))
- goto out;
-
- exit_code = EXIT_SUCCESS;
-out:
- if (fd_peer_pidfd >= 0)
- close(fd_peer_pidfd);
- if (fd_coredump >= 0)
- close(fd_coredump);
- if (fd_server >= 0)
- close(fd_server);
- _exit(exit_code);
- }
- self->pid_coredump_server = pid_coredump_server;
-
- EXPECT_EQ(close(ipc_sockets[1]), 0);
- ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
- EXPECT_EQ(close(ipc_sockets[0]), 0);
-
- pid = fork();
- ASSERT_GE(pid, 0);
- if (pid == 0)
- crashing_child();
-
- pidfd = sys_pidfd_open(pid, 0);
- ASSERT_GE(pidfd, 0);
-
- waitpid(pid, &status, 0);
- ASSERT_TRUE(WIFSIGNALED(status));
- ASSERT_FALSE(WCOREDUMP(status));
-
- ASSERT_TRUE(get_pidfd_info(pidfd, &info));
- ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
- ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
-
- wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
-}
-
-static int open_coredump_tmpfile(int fd_tmpfs_detached)
-{
- return openat(fd_tmpfs_detached, ".", O_TMPFILE | O_RDWR | O_EXCL, 0600);
-}
-
-#define NUM_CRASHING_COREDUMPS 5
-
-TEST_F_TIMEOUT(coredump, socket_multiple_crashing_coredumps, 500)
-{
- int pidfd[NUM_CRASHING_COREDUMPS], status[NUM_CRASHING_COREDUMPS];
- pid_t pid[NUM_CRASHING_COREDUMPS], pid_coredump_server;
- struct pidfd_info info = {};
- int ipc_sockets[2];
- char c;
-
- ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
-
- ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets), 0);
-
- pid_coredump_server = fork();
- ASSERT_GE(pid_coredump_server, 0);
- if (pid_coredump_server == 0) {
- int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1, fd_core_file = -1;
- int exit_code = EXIT_FAILURE;
- struct coredump_req req = {};
-
- close(ipc_sockets[0]);
- fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
- if (fd_server < 0) {
- fprintf(stderr, "Failed to create and listen on unix socket\n");
- goto out;
- }
-
- if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
- fprintf(stderr, "Failed to notify parent via ipc socket\n");
- goto out;
- }
- close(ipc_sockets[1]);
-
- for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
- fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
- if (fd_coredump < 0) {
- fprintf(stderr, "accept4 failed: %m\n");
- goto out;
- }
-
- fd_peer_pidfd = get_peer_pidfd(fd_coredump);
- if (fd_peer_pidfd < 0) {
- fprintf(stderr, "get_peer_pidfd failed for fd %d: %m\n", fd_coredump);
- goto out;
- }
-
- if (!get_pidfd_info(fd_peer_pidfd, &info)) {
- fprintf(stderr, "get_pidfd_info failed for fd %d\n", fd_peer_pidfd);
- goto out;
- }
-
- if (!(info.mask & PIDFD_INFO_COREDUMP)) {
- fprintf(stderr, "pidfd info missing PIDFD_INFO_COREDUMP for fd %d\n", fd_peer_pidfd);
- goto out;
- }
- if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
- fprintf(stderr, "pidfd info missing PIDFD_COREDUMPED for fd %d\n", fd_peer_pidfd);
- goto out;
- }
-
- if (!read_coredump_req(fd_coredump, &req)) {
- fprintf(stderr, "read_coredump_req failed for fd %d\n", fd_coredump);
- goto out;
- }
-
- if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
- COREDUMP_KERNEL | COREDUMP_USERSPACE |
- COREDUMP_REJECT | COREDUMP_WAIT)) {
- fprintf(stderr, "check_coredump_req failed for fd %d\n", fd_coredump);
- goto out;
- }
-
- if (!send_coredump_ack(fd_coredump, &req,
- COREDUMP_KERNEL | COREDUMP_WAIT, 0)) {
- fprintf(stderr, "send_coredump_ack failed for fd %d\n", fd_coredump);
- goto out;
- }
-
- if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) {
- fprintf(stderr, "read_marker failed for fd %d\n", fd_coredump);
- goto out;
- }
-
- fd_core_file = open_coredump_tmpfile(self->fd_tmpfs_detached);
- if (fd_core_file < 0) {
- fprintf(stderr, "%m - open_coredump_tmpfile failed for fd %d\n", fd_coredump);
- goto out;
- }
-
- for (;;) {
- char buffer[4096];
- ssize_t bytes_read, bytes_write;
-
- bytes_read = read(fd_coredump, buffer, sizeof(buffer));
- if (bytes_read < 0) {
- fprintf(stderr, "read failed for fd %d: %m\n", fd_coredump);
- goto out;
- }
-
- if (bytes_read == 0)
- break;
-
- bytes_write = write(fd_core_file, buffer, bytes_read);
- if (bytes_read != bytes_write) {
- fprintf(stderr, "write failed for fd %d: %m\n", fd_core_file);
- goto out;
- }
- }
-
- close(fd_core_file);
- close(fd_peer_pidfd);
- close(fd_coredump);
- fd_peer_pidfd = -1;
- fd_coredump = -1;
- }
-
- exit_code = EXIT_SUCCESS;
-out:
- if (fd_core_file >= 0)
- close(fd_core_file);
- if (fd_peer_pidfd >= 0)
- close(fd_peer_pidfd);
- if (fd_coredump >= 0)
- close(fd_coredump);
- if (fd_server >= 0)
- close(fd_server);
- _exit(exit_code);
- }
- self->pid_coredump_server = pid_coredump_server;
-
- EXPECT_EQ(close(ipc_sockets[1]), 0);
- ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
- EXPECT_EQ(close(ipc_sockets[0]), 0);
-
- for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
- pid[i] = fork();
- ASSERT_GE(pid[i], 0);
- if (pid[i] == 0)
- crashing_child();
- pidfd[i] = sys_pidfd_open(pid[i], 0);
- ASSERT_GE(pidfd[i], 0);
- }
-
- for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
- waitpid(pid[i], &status[i], 0);
- ASSERT_TRUE(WIFSIGNALED(status[i]));
- ASSERT_TRUE(WCOREDUMP(status[i]));
- }
-
- for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
- info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP;
- ASSERT_EQ(ioctl(pidfd[i], PIDFD_GET_INFO, &info), 0);
- ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
- ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
- }
-
- wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
-}
-
-#define MAX_EVENTS 128
-
-static void process_coredump_worker(int fd_coredump, int fd_peer_pidfd, int fd_core_file)
-{
- int epfd = -1;
- int exit_code = EXIT_FAILURE;
-
- epfd = epoll_create1(0);
- if (epfd < 0)
- goto out;
-
- struct epoll_event ev;
- ev.events = EPOLLIN | EPOLLRDHUP | EPOLLET;
- ev.data.fd = fd_coredump;
- if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd_coredump, &ev) < 0)
- goto out;
-
- for (;;) {
- struct epoll_event events[1];
- int n = epoll_wait(epfd, events, 1, -1);
- if (n < 0)
- break;
-
- if (events[0].events & (EPOLLIN | EPOLLRDHUP)) {
- for (;;) {
- char buffer[4096];
- ssize_t bytes_read = read(fd_coredump, buffer, sizeof(buffer));
- if (bytes_read < 0) {
- if (errno == EAGAIN || errno == EWOULDBLOCK)
- break;
- goto out;
- }
- if (bytes_read == 0)
- goto done;
- ssize_t bytes_write = write(fd_core_file, buffer, bytes_read);
- if (bytes_write != bytes_read)
- goto out;
- }
- }
- }
-
-done:
- exit_code = EXIT_SUCCESS;
-out:
- if (epfd >= 0)
- close(epfd);
- if (fd_core_file >= 0)
- close(fd_core_file);
- if (fd_peer_pidfd >= 0)
- close(fd_peer_pidfd);
- if (fd_coredump >= 0)
- close(fd_coredump);
- _exit(exit_code);
-}
-
-TEST_F_TIMEOUT(coredump, socket_multiple_crashing_coredumps_epoll_workers, 500)
-{
- int pidfd[NUM_CRASHING_COREDUMPS], status[NUM_CRASHING_COREDUMPS];
- pid_t pid[NUM_CRASHING_COREDUMPS], pid_coredump_server, worker_pids[NUM_CRASHING_COREDUMPS];
- struct pidfd_info info = {};
- int ipc_sockets[2];
- char c;
-
- ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
- ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets), 0);
-
- pid_coredump_server = fork();
- ASSERT_GE(pid_coredump_server, 0);
- if (pid_coredump_server == 0) {
- int fd_server = -1, exit_code = EXIT_FAILURE, n_conns = 0;
- fd_server = -1;
- exit_code = EXIT_FAILURE;
- n_conns = 0;
- close(ipc_sockets[0]);
- fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
- if (fd_server < 0)
- goto out;
-
- if (write_nointr(ipc_sockets[1], "1", 1) < 0)
- goto out;
- close(ipc_sockets[1]);
-
- while (n_conns < NUM_CRASHING_COREDUMPS) {
- int fd_coredump = -1, fd_peer_pidfd = -1, fd_core_file = -1;
- struct coredump_req req = {};
- fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
- if (fd_coredump < 0) {
- if (errno == EAGAIN || errno == EWOULDBLOCK)
- continue;
- goto out;
- }
- fd_peer_pidfd = get_peer_pidfd(fd_coredump);
- if (fd_peer_pidfd < 0)
- goto out;
- if (!get_pidfd_info(fd_peer_pidfd, &info))
- goto out;
- if (!(info.mask & PIDFD_INFO_COREDUMP) || !(info.coredump_mask & PIDFD_COREDUMPED))
- goto out;
- if (!read_coredump_req(fd_coredump, &req))
- goto out;
- if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
- COREDUMP_KERNEL | COREDUMP_USERSPACE |
- COREDUMP_REJECT | COREDUMP_WAIT))
- goto out;
- if (!send_coredump_ack(fd_coredump, &req, COREDUMP_KERNEL | COREDUMP_WAIT, 0))
- goto out;
- if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK))
- goto out;
- fd_core_file = open_coredump_tmpfile(self->fd_tmpfs_detached);
- if (fd_core_file < 0)
- goto out;
- pid_t worker = fork();
- if (worker == 0) {
- close(fd_server);
- process_coredump_worker(fd_coredump, fd_peer_pidfd, fd_core_file);
- }
- worker_pids[n_conns] = worker;
- if (fd_coredump >= 0)
- close(fd_coredump);
- if (fd_peer_pidfd >= 0)
- close(fd_peer_pidfd);
- if (fd_core_file >= 0)
- close(fd_core_file);
- n_conns++;
- }
- exit_code = EXIT_SUCCESS;
-out:
- if (fd_server >= 0)
- close(fd_server);
-
- // Reap all worker processes
- for (int i = 0; i < n_conns; i++) {
- int wstatus;
- if (waitpid(worker_pids[i], &wstatus, 0) < 0) {
- fprintf(stderr, "Failed to wait for worker %d: %m\n", worker_pids[i]);
- } else if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != EXIT_SUCCESS) {
- fprintf(stderr, "Worker %d exited with error code %d\n", worker_pids[i], WEXITSTATUS(wstatus));
- exit_code = EXIT_FAILURE;
- }
- }
-
- _exit(exit_code);
- }
- self->pid_coredump_server = pid_coredump_server;
-
- EXPECT_EQ(close(ipc_sockets[1]), 0);
- ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
- EXPECT_EQ(close(ipc_sockets[0]), 0);
-
- for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
- pid[i] = fork();
- ASSERT_GE(pid[i], 0);
- if (pid[i] == 0)
- crashing_child();
- pidfd[i] = sys_pidfd_open(pid[i], 0);
- ASSERT_GE(pidfd[i], 0);
- }
-
- for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
- ASSERT_GE(waitpid(pid[i], &status[i], 0), 0);
- ASSERT_TRUE(WIFSIGNALED(status[i]));
- ASSERT_TRUE(WCOREDUMP(status[i]));
- }
-
- for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
- info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP;
- ASSERT_EQ(ioctl(pidfd[i], PIDFD_GET_INFO, &info), 0);
- ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
- ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
- }
-
- wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
-}
-
-TEST_F(coredump, socket_invalid_paths)
-{
- ASSERT_FALSE(set_core_pattern("@ /tmp/coredump.socket"));
- ASSERT_FALSE(set_core_pattern("@/tmp/../coredump.socket"));
- ASSERT_FALSE(set_core_pattern("@../coredump.socket"));
- ASSERT_FALSE(set_core_pattern("@/tmp/coredump.socket/.."));
- ASSERT_FALSE(set_core_pattern("@.."));
-
- ASSERT_FALSE(set_core_pattern("@@ /tmp/coredump.socket"));
- ASSERT_FALSE(set_core_pattern("@@/tmp/../coredump.socket"));
- ASSERT_FALSE(set_core_pattern("@@../coredump.socket"));
- ASSERT_FALSE(set_core_pattern("@@/tmp/coredump.socket/.."));
- ASSERT_FALSE(set_core_pattern("@@.."));
-
- ASSERT_FALSE(set_core_pattern("@@@/tmp/coredump.socket"));
-}
-
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile
index bd3af9a34e2f..71ee69e524d7 100644
--- a/tools/testing/selftests/drivers/net/Makefile
+++ b/tools/testing/selftests/drivers/net/Makefile
@@ -10,6 +10,7 @@ TEST_GEN_FILES := \
# end of TEST_GEN_FILES
TEST_PROGS := \
+ hds.py \
napi_id.py \
napi_threaded.py \
netcons_basic.sh \
@@ -17,13 +18,13 @@ TEST_PROGS := \
netcons_fragmented_msg.sh \
netcons_overflow.sh \
netcons_sysdata.sh \
+ netcons_torture.sh \
netpoll_basic.py \
ping.py \
psp.py \
queues.py \
- stats.py \
shaper.py \
- hds.py \
+ stats.py \
xdp.py \
# end of TEST_PROGS
diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile
index 2f095cf67d9a..6c5c60adb5e8 100644
--- a/tools/testing/selftests/drivers/net/bonding/Makefile
+++ b/tools/testing/selftests/drivers/net/bonding/Makefile
@@ -4,24 +4,29 @@
TEST_PROGS := \
bond-arp-interval-causes-panic.sh \
bond-break-lacpdu-tx.sh \
+ bond-eth-type-change.sh \
bond-lladdr-target.sh \
+ bond_ipsec_offload.sh \
+ bond_lacp_prio.sh \
+ bond_macvlan_ipvlan.sh \
+ bond_options.sh \
+ bond_passive_lacp.sh \
dev_addr_lists.sh \
mode-1-recovery-updelay.sh \
mode-2-recovery-updelay.sh \
- bond_options.sh \
- bond-eth-type-change.sh \
- bond_macvlan_ipvlan.sh \
- bond_passive_lacp.sh \
- bond_lacp_prio.sh
- bond_ipsec_offload.sh
+ netcons_over_bonding.sh \
+# end of TEST_PROGS
TEST_FILES := \
- lag_lib.sh \
bond_topo_2d1c.sh \
- bond_topo_3d1c.sh
+ bond_topo_3d1c.sh \
+ lag_lib.sh \
+# end of TEST_FILES
TEST_INCLUDES := \
+ ../../../net/lib.sh \
+ ../lib/sh/lib_netcons.sh \
../../../net/forwarding/lib.sh \
- ../../../net/lib.sh
+# end of TEST_INCLUDES
include ../../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/bonding/config b/tools/testing/selftests/drivers/net/bonding/config
index e5b7a8db4dfa..991494376223 100644
--- a/tools/testing/selftests/drivers/net/bonding/config
+++ b/tools/testing/selftests/drivers/net/bonding/config
@@ -1,17 +1,21 @@
CONFIG_BONDING=y
CONFIG_BRIDGE=y
+CONFIG_CONFIGFS_FS=y
CONFIG_DUMMY=y
+CONFIG_INET_ESP=y
+CONFIG_INET_ESP_OFFLOAD=y
CONFIG_IPV6=y
-CONFIG_MACVLAN=y
CONFIG_IPVLAN=y
+CONFIG_MACVLAN=y
CONFIG_NET_ACT_GACT=y
CONFIG_NET_CLS_FLOWER=y
CONFIG_NET_CLS_MATCHALL=m
+CONFIG_NETCONSOLE=m
+CONFIG_NETCONSOLE_DYNAMIC=y
+CONFIG_NETCONSOLE_EXTENDED_LOG=y
+CONFIG_NETDEVSIM=m
CONFIG_NET_SCH_INGRESS=y
CONFIG_NLMON=y
CONFIG_VETH=y
CONFIG_VLAN_8021Q=m
-CONFIG_INET_ESP=y
-CONFIG_INET_ESP_OFFLOAD=y
CONFIG_XFRM_USER=m
-CONFIG_NETDEVSIM=m
diff --git a/tools/testing/selftests/drivers/net/bonding/netcons_over_bonding.sh b/tools/testing/selftests/drivers/net/bonding/netcons_over_bonding.sh
new file mode 100755
index 000000000000..477cc9379500
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/netcons_over_bonding.sh
@@ -0,0 +1,361 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This selftest exercises trying to have multiple netpoll users at the same
+# time.
+#
+# This selftest has multiple smalls test inside, and the goal is to
+# get interfaces with bonding and netconsole in different orders in order
+# to catch any possible issue.
+#
+# The main test composes of four interfaces being created using netdevsim; two
+# of them are bonded to serve as the netconsole's transmit interface. The
+# remaining two interfaces are similarly bonded and assigned to a separate
+# network namespace, which acts as the receive interface, where socat monitors
+# for incoming messages.
+#
+# A netconsole message is then sent to ensure it is properly received across
+# this configuration.
+#
+# Later, run a few other tests, to make sure that bonding and netconsole
+# cannot coexist.
+#
+# The test's objective is to exercise netpoll usage when managed simultaneously
+# by multiple subsystems (netconsole and bonding).
+#
+# Author: Breno Leitao <leitao@debian.org>
+
+set -euo pipefail
+
+SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+source "${SCRIPTDIR}"/../lib/sh/lib_netcons.sh
+
+modprobe netdevsim 2> /dev/null || true
+modprobe netconsole 2> /dev/null || true
+modprobe bonding 2> /dev/null || true
+modprobe veth 2> /dev/null || true
+
+# The content of kmsg will be save to the following file
+OUTPUT_FILE="/tmp/${TARGET}"
+
+# Check for basic system dependency and exit if not found
+check_for_dependencies
+# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5)
+echo "6 5" > /proc/sys/kernel/printk
+# Remove the namespace, interfaces and netconsole target on exit
+trap cleanup_bond EXIT
+
+FORMAT="extended"
+IP_VERSION="ipv4"
+VETH0="veth"$(( RANDOM % 256))
+VETH1="veth"$((256 + RANDOM % 256))
+TXNS=""
+RXNS=""
+
+# Create "bond_tx_XX" and "bond_rx_XX" interfaces, and set DSTIF and SRCIF with
+# the bonding interfaces
+function setup_bonding_ifaces() {
+ local RAND=$(( RANDOM % 100 ))
+ BOND_TX_MAIN_IF="bond_tx_$RAND"
+ BOND_RX_MAIN_IF="bond_rx_$RAND"
+
+ # Setup TX
+ if ! ip -n "${TXNS}" link add "${BOND_TX_MAIN_IF}" type bond mode balance-rr
+ then
+ echo "Failed to create bond TX interface. Is CONFIG_BONDING set?" >&2
+ # only clean nsim ifaces and namespace. Nothing else has been
+ # initialized
+ cleanup_bond_nsim
+ trap - EXIT
+ exit "${ksft_skip}"
+ fi
+
+ # create_netdevsim() got the interface up, but it needs to be down
+ # before being enslaved.
+ ip -n "${TXNS}" \
+ link set "${BOND_TX1_SLAVE_IF}" down
+ ip -n "${TXNS}" \
+ link set "${BOND_TX2_SLAVE_IF}" down
+ ip -n "${TXNS}" \
+ link set "${BOND_TX1_SLAVE_IF}" master "${BOND_TX_MAIN_IF}"
+ ip -n "${TXNS}" \
+ link set "${BOND_TX2_SLAVE_IF}" master "${BOND_TX_MAIN_IF}"
+ ip -n "${TXNS}" \
+ link set "${BOND_TX_MAIN_IF}" up
+
+ # Setup RX
+ ip -n "${RXNS}" \
+ link add "${BOND_RX_MAIN_IF}" type bond mode balance-rr
+ ip -n "${RXNS}" \
+ link set "${BOND_RX1_SLAVE_IF}" down
+ ip -n "${RXNS}" \
+ link set "${BOND_RX2_SLAVE_IF}" down
+ ip -n "${RXNS}" \
+ link set "${BOND_RX1_SLAVE_IF}" master "${BOND_RX_MAIN_IF}"
+ ip -n "${RXNS}" \
+ link set "${BOND_RX2_SLAVE_IF}" master "${BOND_RX_MAIN_IF}"
+ ip -n "${RXNS}" \
+ link set "${BOND_RX_MAIN_IF}" up
+
+ export DSTIF="${BOND_RX_MAIN_IF}"
+ export SRCIF="${BOND_TX_MAIN_IF}"
+}
+
+# Create 4 netdevsim interfaces. Two of them will be bound to TX bonding iface
+# and the other two will be bond to the RX interface (on the other namespace)
+function create_ifaces_bond() {
+ BOND_TX1_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_TX_1}" "${TXNS}")
+ BOND_TX2_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_TX_2}" "${TXNS}")
+ BOND_RX1_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_RX_1}" "${RXNS}")
+ BOND_RX2_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_RX_2}" "${RXNS}")
+}
+
+# netdevsim link BOND_TX to BOND_RX interfaces
+function link_ifaces_bond() {
+ local BOND_TX1_SLAVE_IFIDX
+ local BOND_TX2_SLAVE_IFIDX
+ local BOND_RX1_SLAVE_IFIDX
+ local BOND_RX2_SLAVE_IFIDX
+ local TXNS_FD
+ local RXNS_FD
+
+ BOND_TX1_SLAVE_IFIDX=$(ip netns exec "${TXNS}" \
+ cat /sys/class/net/"$BOND_TX1_SLAVE_IF"/ifindex)
+ BOND_TX2_SLAVE_IFIDX=$(ip netns exec "${TXNS}" \
+ cat /sys/class/net/"$BOND_TX2_SLAVE_IF"/ifindex)
+ BOND_RX1_SLAVE_IFIDX=$(ip netns exec "${RXNS}" \
+ cat /sys/class/net/"$BOND_RX1_SLAVE_IF"/ifindex)
+ BOND_RX2_SLAVE_IFIDX=$(ip netns exec "${RXNS}" \
+ cat /sys/class/net/"$BOND_RX2_SLAVE_IF"/ifindex)
+
+ exec {TXNS_FD}</var/run/netns/"${TXNS}"
+ exec {RXNS_FD}</var/run/netns/"${RXNS}"
+
+ # Linking TX ifaces to the RX ones (on the other namespace)
+ echo "${TXNS_FD}:$BOND_TX1_SLAVE_IFIDX $RXNS_FD:$BOND_RX1_SLAVE_IFIDX" \
+ > "$NSIM_DEV_SYS_LINK"
+ echo "${TXNS_FD}:$BOND_TX2_SLAVE_IFIDX $RXNS_FD:$BOND_RX2_SLAVE_IFIDX" \
+ > "$NSIM_DEV_SYS_LINK"
+
+ exec {TXNS_FD}<&-
+ exec {RXNS_FD}<&-
+}
+
+function create_all_ifaces() {
+ # setup_ns function is coming from lib.sh
+ setup_ns TXNS RXNS
+ export NAMESPACE="${RXNS}"
+
+ # Create two interfaces for RX and two for TX
+ create_ifaces_bond
+ # Link netlink ifaces
+ link_ifaces_bond
+}
+
+# configure DSTIF and SRCIF IPs
+function configure_ifaces_ips() {
+ local IP_VERSION=${1:-"ipv4"}
+ select_ipv4_or_ipv6 "${IP_VERSION}"
+
+ ip -n "${RXNS}" addr add "${DSTIP}"/24 dev "${DSTIF}"
+ ip -n "${RXNS}" link set "${DSTIF}" up
+
+ ip -n "${TXNS}" addr add "${SRCIP}"/24 dev "${SRCIF}"
+ ip -n "${TXNS}" link set "${SRCIF}" up
+}
+
+function test_enable_netpoll_on_enslaved_iface() {
+ echo 0 > "${NETCONS_PATH}"/enabled
+
+ # At this stage, BOND_TX1_SLAVE_IF is enslaved to BOND_TX_MAIN_IF, and
+ # linked to BOND_RX1_SLAVE_IF inside the namespace.
+ echo "${BOND_TX1_SLAVE_IF}" > "${NETCONS_PATH}"/dev_name
+
+ # This should fail with the following message in dmesg:
+ # netpoll: netconsole: ethX is a slave device, aborting
+ set +e
+ enable_netcons_ns 2> /dev/null
+ set -e
+
+ if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 1 ]]
+ then
+ echo "test failed: Bonding and netpoll cannot co-exists." >&2
+ exit "${ksft_fail}"
+ fi
+}
+
+function test_delete_bond_and_reenable_target() {
+ ip -n "${TXNS}" \
+ link delete "${BOND_TX_MAIN_IF}" type bond
+
+ # BOND_TX1_SLAVE_IF is not attached to a bond interface anymore
+ # netpoll can be plugged in there
+ echo "${BOND_TX1_SLAVE_IF}" > "${NETCONS_PATH}"/dev_name
+
+ # this should work, since the interface is not enslaved
+ enable_netcons_ns
+
+ if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 0 ]]
+ then
+ echo "test failed: Unable to start netpoll on an unbond iface." >&2
+ exit "${ksft_fail}"
+ fi
+}
+
+# Send a netconsole message to the netconsole target
+function test_send_netcons_msg_through_bond_iface() {
+ # Listen for netconsole port inside the namespace and
+ # destination interface
+ listen_port_and_save_to "${OUTPUT_FILE}" "${IP_VERSION}" &
+ # Wait for socat to start and listen to the port.
+ wait_for_port "${RXNS}" "${PORT}" "${IP_VERSION}"
+ # Send the message
+ echo "${MSG}: ${TARGET}" > /dev/kmsg
+ # Wait until socat saves the file to disk
+ busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
+ # Make sure the message was received in the dst part
+ # and exit
+ validate_result "${OUTPUT_FILE}" "${FORMAT}"
+ # kill socat in case it is still running
+ pkill_socat
+}
+
+# BOND_TX1_SLAVE_IF has netconsole enabled on it, bind it to BOND_TX_MAIN_IF.
+# Given BOND_TX_MAIN_IF was deleted, recreate it first
+function test_enslave_netcons_enabled_iface {
+ # netconsole got disabled while the interface was down
+ if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 0 ]]
+ then
+ echo "test failed: netconsole expected to be enabled against BOND_TX1_SLAVE_IF" >&2
+ exit "${ksft_fail}"
+ fi
+
+ # recreate the bonding iface. it got deleted by previous
+ # test (test_delete_bond_and_reenable_target)
+ ip -n "${TXNS}" \
+ link add "${BOND_TX_MAIN_IF}" type bond mode balance-rr
+
+ # sub-interface need to be down before attaching to bonding
+ # This will also disable netconsole.
+ ip -n "${TXNS}" \
+ link set "${BOND_TX1_SLAVE_IF}" down
+ ip -n "${TXNS}" \
+ link set "${BOND_TX1_SLAVE_IF}" master "${BOND_TX_MAIN_IF}"
+ ip -n "${TXNS}" \
+ link set "${BOND_TX_MAIN_IF}" up
+
+ # netconsole got disabled while the interface was down
+ if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 1 ]]
+ then
+ echo "test failed: Device is part of a bond iface, cannot have netcons enabled" >&2
+ exit "${ksft_fail}"
+ fi
+}
+
+# Get netconsole enabled on a bonding interface and attach a second
+# sub-interface.
+function test_enslave_iface_to_bond {
+ # BOND_TX_MAIN_IF has only BOND_TX1_SLAVE_IF right now
+ echo "${BOND_TX_MAIN_IF}" > "${NETCONS_PATH}"/dev_name
+ enable_netcons_ns
+
+ # netcons is attached to bond0 and BOND_TX1_SLAVE_IF is
+ # part of BOND_TX_MAIN_IF. Attach BOND_TX2_SLAVE_IF to BOND_TX_MAIN_IF.
+ ip -n "${TXNS}" \
+ link set "${BOND_TX2_SLAVE_IF}" master "${BOND_TX_MAIN_IF}"
+ if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 0 ]]
+ then
+ echo "test failed: Netconsole should be enabled on bonding interface. Failed" >&2
+ exit "${ksft_fail}"
+ fi
+}
+
+function test_enslave_iff_disabled_netpoll_iface {
+ local ret
+
+ # Create two interfaces. veth interfaces it known to have
+ # IFF_DISABLE_NETPOLL set
+ if ! ip link add "${VETH0}" type veth peer name "${VETH1}"
+ then
+ echo "Failed to create veth TX interface. Is CONFIG_VETH set?" >&2
+ exit "${ksft_skip}"
+ fi
+ set +e
+ # This will print RTNETLINK answers: Device or resource busy
+ ip link set "${VETH0}" master "${BOND_TX_MAIN_IF}" 2> /dev/null
+ ret=$?
+ set -e
+ if [[ $ret -eq 0 ]]
+ then
+ echo "test failed: veth interface could not be enslaved"
+ exit "${ksft_fail}"
+ fi
+}
+
+# Given that netconsole picks the current net namespace, we need to enable it
+# from inside the TXNS namespace
+function enable_netcons_ns() {
+ ip netns exec "${TXNS}" sh -c \
+ "mount -t configfs configfs /sys/kernel/config && echo 1 > $NETCONS_PATH/enabled"
+}
+
+####################
+# Tests start here #
+####################
+
+# Create regular interfaces using netdevsim and link them
+create_all_ifaces
+
+# Setup the bonding interfaces
+# BOND_RX_MAIN_IF has BOND_RX{1,2}_SLAVE_IF
+# BOND_TX_MAIN_IF has BOND_TX{1,2}_SLAVE_IF
+setup_bonding_ifaces
+
+# Configure the ips as BOND_RX1_SLAVE_IF and BOND_TX1_SLAVE_IF
+configure_ifaces_ips "${IP_VERSION}"
+
+_create_dynamic_target "${FORMAT}" "${NETCONS_PATH}"
+enable_netcons_ns
+set_user_data
+
+# Test #1 : Create an bonding interface and attach netpoll into
+# the bonding interface. Netconsole/netpoll should work on
+# the bonding interface.
+test_send_netcons_msg_through_bond_iface
+echo "test #1: netpoll on bonding interface worked. Test passed" >&2
+
+# Test #2: Attach netpoll to an enslaved interface
+# Try to attach netpoll to an enslaved sub-interface (while still being part of
+# a bonding interface), which shouldn't be allowed
+test_enable_netpoll_on_enslaved_iface
+echo "test #2: netpoll correctly rejected enslaved interface (expected behavior). Test passed." >&2
+
+# Test #3: Unplug the sub-interface from bond and enable netconsole
+# Detach the interface from a bonding interface and attach netpoll again
+test_delete_bond_and_reenable_target
+echo "test #3: Able to attach to an unbound interface. Test passed." >&2
+
+# Test #4: Enslave a sub-interface that had netconsole enabled
+# Try to enslave an interface that has netconsole/netpoll enabled.
+# Previous test has netconsole enabled in BOND_TX1_SLAVE_IF, try to enslave it
+test_enslave_netcons_enabled_iface
+echo "test #4: Enslaving an interface with netpoll attached. Test passed." >&2
+
+# Test #5: Enslave a sub-interface to a bonding interface
+# Enslave an interface to a bond interface that has netpoll attached
+# At this stage, BOND_TX_MAIN_IF is created and BOND_TX1_SLAVE_IF is part of
+# it. Netconsole is currently disabled
+test_enslave_iface_to_bond
+echo "test #5: Enslaving an interface to bond+netpoll. Test passed." >&2
+
+# Test #6: Enslave a IFF_DISABLE_NETPOLL sub-interface to a bonding interface
+# At this stage, BOND_TX_MAIN_IF has both sub interface and netconsole is
+# enabled. This test will try to enslave an a veth (IFF_DISABLE_NETPOLL) interface
+# and it should fail, with netpoll: veth0 doesn't support polling
+test_enslave_iff_disabled_netpoll_iface
+echo "test #6: Enslaving IFF_DISABLE_NETPOLL ifaces to bond iface is not supported. Test passed." >&2
+
+cleanup_bond
+trap - EXIT
+exit "${EXIT_STATUS}"
diff --git a/tools/testing/selftests/drivers/net/config b/tools/testing/selftests/drivers/net/config
index 601431248d5b..77ccf83d87e0 100644
--- a/tools/testing/selftests/drivers/net/config
+++ b/tools/testing/selftests/drivers/net/config
@@ -3,8 +3,8 @@ CONFIG_DEBUG_INFO_BTF=y
CONFIG_DEBUG_INFO_BTF_MODULES=n
CONFIG_INET_PSP=y
CONFIG_IPV6=y
-CONFIG_NETDEVSIM=m
CONFIG_NETCONSOLE=m
CONFIG_NETCONSOLE_DYNAMIC=y
CONFIG_NETCONSOLE_EXTENDED_LOG=y
+CONFIG_NETDEVSIM=m
CONFIG_XDP_SOCKETS=y
diff --git a/tools/testing/selftests/drivers/net/dsa/Makefile b/tools/testing/selftests/drivers/net/dsa/Makefile
index cd6817fe5be6..7994bd0e5c44 100644
--- a/tools/testing/selftests/drivers/net/dsa/Makefile
+++ b/tools/testing/selftests/drivers/net/dsa/Makefile
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0+ OR MIT
-TEST_PROGS = bridge_locked_port.sh \
+TEST_PROGS := \
+ bridge_locked_port.sh \
bridge_mdb.sh \
bridge_mld.sh \
bridge_vlan_aware.sh \
@@ -9,11 +10,13 @@ TEST_PROGS = bridge_locked_port.sh \
local_termination.sh \
no_forwarding.sh \
tc_actions.sh \
- test_bridge_fdb_stress.sh
+ test_bridge_fdb_stress.sh \
+# end of TEST_PROGS
TEST_FILES := \
+ forwarding.config \
run_net_forwarding_test.sh \
- forwarding.config
+# end of TEST_FILES
TEST_INCLUDES := \
../../../net/forwarding/bridge_locked_port.sh \
@@ -27,6 +30,7 @@ TEST_INCLUDES := \
../../../net/forwarding/no_forwarding.sh \
../../../net/forwarding/tc_actions.sh \
../../../net/forwarding/tc_common.sh \
- ../../../net/lib.sh
+ ../../../net/lib.sh \
+# end of TEST_INCLUDES
include ../../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/hds.py b/tools/testing/selftests/drivers/net/hds.py
index a2011474e625..c4fe049e9baa 100755
--- a/tools/testing/selftests/drivers/net/hds.py
+++ b/tools/testing/selftests/drivers/net/hds.py
@@ -3,11 +3,12 @@
import errno
import os
+import random
from typing import Union
from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_raises, KsftSkipEx
from lib.py import CmdExitFailure, EthtoolFamily, NlError
from lib.py import NetDrvEnv
-from lib.py import defer, ethtool, ip, random
+from lib.py import defer, ethtool, ip
def _get_hds_mode(cfg, netnl) -> str:
diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile
index ee09a40d532c..8133d1a0051c 100644
--- a/tools/testing/selftests/drivers/net/hw/Makefile
+++ b/tools/testing/selftests/drivers/net/hw/Makefile
@@ -32,8 +32,8 @@ TEST_FILES := \
TEST_INCLUDES := \
$(wildcard lib/py/*.py ../lib/py/*.py) \
../../../net/lib.sh \
- ../../../net/forwarding/lib.sh \
../../../net/forwarding/ipip_lib.sh \
+ ../../../net/forwarding/lib.sh \
../../../net/forwarding/tc_common.sh \
#
@@ -45,7 +45,11 @@ TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c))
include ../../../lib.mk
# YNL build
-YNL_GENS := ethtool netdev
+YNL_GENS := \
+ ethtool \
+ netdev \
+# end of YNL_GENS
+
include ../../../net/ynl.mk
include ../../../net/bpf.mk
diff --git a/tools/testing/selftests/drivers/net/hw/config b/tools/testing/selftests/drivers/net/hw/config
index e8a06aa1471c..2307aa001be1 100644
--- a/tools/testing/selftests/drivers/net/hw/config
+++ b/tools/testing/selftests/drivers/net/hw/config
@@ -1,3 +1,7 @@
+CONFIG_FAIL_FUNCTION=y
+CONFIG_FAULT_INJECTION=y
+CONFIG_FAULT_INJECTION_DEBUG_FS=y
+CONFIG_FUNCTION_ERROR_INJECTION=y
CONFIG_IO_URING=y
CONFIG_IPV6=y
CONFIG_IPV6_GRE=y
diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
index 0ceb297e7757..fb010a48a5a1 100644
--- a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
+++ b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
@@ -1,5 +1,13 @@
# SPDX-License-Identifier: GPL-2.0
+"""
+Driver test environment (hardware-only tests).
+NetDrvEnv and NetDrvEpEnv are the main environment classes.
+Former is for local host only tests, latter creates / connects
+to a remote endpoint. See NIPA wiki for more information about
+running and writing driver tests.
+"""
+
import sys
from pathlib import Path
@@ -8,26 +16,36 @@ KSFT_DIR = (Path(__file__).parent / "../../../../..").resolve()
try:
sys.path.append(KSFT_DIR.as_posix())
- from net.lib.py import *
- from drivers.net.lib.py import *
-
# Import one by one to avoid pylint false positives
+ from net.lib.py import NetNS, NetNSEnter, NetdevSimDev
from net.lib.py import EthtoolFamily, NetdevFamily, NetshaperFamily, \
NlError, RtnlFamily, DevlinkFamily, PSPFamily
from net.lib.py import CmdExitFailure
- from net.lib.py import bkg, cmd, defer, ethtool, fd_read_timeout, ip, \
- rand_port, tool, wait_port_listen
- from net.lib.py import fd_read_timeout
+ from net.lib.py import bkg, cmd, bpftool, bpftrace, defer, ethtool, \
+ fd_read_timeout, ip, rand_port, wait_port_listen, wait_file
from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx
from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \
ksft_setup
from net.lib.py import ksft_eq, ksft_ge, ksft_in, ksft_is, ksft_lt, \
ksft_ne, ksft_not_in, ksft_raises, ksft_true, ksft_gt, ksft_not_none
- from net.lib.py import NetNSEnter
- from drivers.net.lib.py import GenerateTraffic
+ from drivers.net.lib.py import GenerateTraffic, Remote
from drivers.net.lib.py import NetDrvEnv, NetDrvEpEnv
+
+ __all__ = ["NetNS", "NetNSEnter", "NetdevSimDev",
+ "EthtoolFamily", "NetdevFamily", "NetshaperFamily",
+ "NlError", "RtnlFamily", "DevlinkFamily", "PSPFamily",
+ "CmdExitFailure",
+ "bkg", "cmd", "bpftool", "bpftrace", "defer", "ethtool",
+ "fd_read_timeout", "ip", "rand_port",
+ "wait_port_listen", "wait_file",
+ "KsftSkipEx", "KsftFailEx", "KsftXfailEx",
+ "ksft_disruptive", "ksft_exit", "ksft_pr", "ksft_run",
+ "ksft_setup",
+ "ksft_eq", "ksft_ge", "ksft_in", "ksft_is", "ksft_lt",
+ "ksft_ne", "ksft_not_in", "ksft_raises", "ksft_true", "ksft_gt",
+ "ksft_not_none", "ksft_not_none",
+ "NetDrvEnv", "NetDrvEpEnv", "GenerateTraffic", "Remote"]
except ModuleNotFoundError as e:
- ksft_pr("Failed importing `net` library from kernel sources")
- ksft_pr(str(e))
- ktap_result(True, comment="SKIP")
+ print("Failed importing `net` library from kernel sources")
+ print(str(e))
sys.exit(4)
diff --git a/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py
index ad192fef3117..2a51b60df8a1 100755
--- a/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py
+++ b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py
@@ -1,8 +1,13 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0
+"""
+Test driver resilience vs page pool allocation failures.
+"""
+
import errno
import time
+import math
import os
from lib.py import ksft_run, ksft_exit, ksft_pr
from lib.py import KsftSkipEx, KsftFailEx
@@ -13,7 +18,8 @@ from lib.py import cmd, tool, GenerateTraffic
def _write_fail_config(config):
for key, value in config.items():
- with open("/sys/kernel/debug/fail_function/" + key, "w") as fp:
+ path = "/sys/kernel/debug/fail_function/"
+ with open(path + key, "w", encoding='ascii') as fp:
fp.write(str(value) + "\n")
@@ -22,8 +28,7 @@ def _enable_pp_allocation_fail():
raise KsftSkipEx("Kernel built without function error injection (or DebugFS)")
if not os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_netmems"):
- with open("/sys/kernel/debug/fail_function/inject", "w") as fp:
- fp.write("page_pool_alloc_netmems\n")
+ _write_fail_config({"inject": "page_pool_alloc_netmems"})
_write_fail_config({
"verbose": 0,
@@ -38,8 +43,7 @@ def _disable_pp_allocation_fail():
return
if os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_netmems"):
- with open("/sys/kernel/debug/fail_function/inject", "w") as fp:
- fp.write("\n")
+ _write_fail_config({"inject": ""})
_write_fail_config({
"probability": 0,
@@ -48,6 +52,10 @@ def _disable_pp_allocation_fail():
def test_pp_alloc(cfg, netdevnl):
+ """
+ Configure page pool allocation fail injection while traffic is running.
+ """
+
def get_stats():
return netdevnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
@@ -55,7 +63,7 @@ def test_pp_alloc(cfg, netdevnl):
stat1 = get_stats()
time.sleep(1)
stat2 = get_stats()
- if stat2['rx-packets'] - stat1['rx-packets'] < 15000:
+ if stat2['rx-packets'] - stat1['rx-packets'] < 4000:
raise KsftFailEx("Traffic seems low:", stat2['rx-packets'] - stat1['rx-packets'])
@@ -82,11 +90,16 @@ def test_pp_alloc(cfg, netdevnl):
time.sleep(3)
s2 = get_stats()
- if s2['rx-alloc-fail'] - s1['rx-alloc-fail'] < 1:
+ seen_fails = s2['rx-alloc-fail'] - s1['rx-alloc-fail']
+ if seen_fails < 1:
raise KsftSkipEx("Allocation failures not increasing")
- if s2['rx-alloc-fail'] - s1['rx-alloc-fail'] < 100:
- raise KsftSkipEx("Allocation increasing too slowly", s2['rx-alloc-fail'] - s1['rx-alloc-fail'],
- "packets:", s2['rx-packets'] - s1['rx-packets'])
+ pkts = s2['rx-packets'] - s1['rx-packets']
+ # Expecting one failure per 512 buffers, 3.1x safety margin
+ want_fails = math.floor(pkts / 512 / 3.1)
+ if seen_fails < want_fails:
+ raise KsftSkipEx("Allocation increasing too slowly", seen_fails,
+ "packets:", pkts)
+ ksft_pr(f"Seen: pkts:{pkts} fails:{seen_fails} (pass thrs:{want_fails})")
# Basic failures are fine, try to wobble some settings to catch extra failures
check_traffic_flowing()
@@ -105,7 +118,7 @@ def test_pp_alloc(cfg, netdevnl):
else:
ksft_pr("ethtool -G change retval: did not succeed", new_g)
else:
- ksft_pr("ethtool -G change retval: did not try")
+ ksft_pr("ethtool -G change retval: did not try")
time.sleep(0.1)
check_traffic_flowing()
@@ -119,6 +132,7 @@ def test_pp_alloc(cfg, netdevnl):
def main() -> None:
+ """ Ksft boiler plate main """
netdevnl = NetdevFamily()
with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
diff --git a/tools/testing/selftests/drivers/net/lib/py/__init__.py b/tools/testing/selftests/drivers/net/lib/py/__init__.py
index 2a645415c4ca..b0c6300150fb 100644
--- a/tools/testing/selftests/drivers/net/lib/py/__init__.py
+++ b/tools/testing/selftests/drivers/net/lib/py/__init__.py
@@ -1,5 +1,13 @@
# SPDX-License-Identifier: GPL-2.0
+"""
+Driver test environment.
+NetDrvEnv and NetDrvEpEnv are the main environment classes.
+Former is for local host only tests, latter creates / connects
+to a remote endpoint. See NIPA wiki for more information about
+running and writing driver tests.
+"""
+
import sys
from pathlib import Path
@@ -8,26 +16,39 @@ KSFT_DIR = (Path(__file__).parent / "../../../..").resolve()
try:
sys.path.append(KSFT_DIR.as_posix())
- from net.lib.py import *
-
# Import one by one to avoid pylint false positives
+ from net.lib.py import NetNS, NetNSEnter, NetdevSimDev
from net.lib.py import EthtoolFamily, NetdevFamily, NetshaperFamily, \
NlError, RtnlFamily, DevlinkFamily, PSPFamily
from net.lib.py import CmdExitFailure
from net.lib.py import bkg, cmd, bpftool, bpftrace, defer, ethtool, \
- fd_read_timeout, ip, rand_port, tool, wait_port_listen, wait_file
- from net.lib.py import fd_read_timeout
+ fd_read_timeout, ip, rand_port, wait_port_listen, wait_file
from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx
from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \
ksft_setup
from net.lib.py import ksft_eq, ksft_ge, ksft_in, ksft_is, ksft_lt, \
ksft_ne, ksft_not_in, ksft_raises, ksft_true, ksft_gt, ksft_not_none
+
+ __all__ = ["NetNS", "NetNSEnter", "NetdevSimDev",
+ "EthtoolFamily", "NetdevFamily", "NetshaperFamily",
+ "NlError", "RtnlFamily", "DevlinkFamily", "PSPFamily",
+ "CmdExitFailure",
+ "bkg", "cmd", "bpftool", "bpftrace", "defer", "ethtool",
+ "fd_read_timeout", "ip", "rand_port",
+ "wait_port_listen", "wait_file",
+ "KsftSkipEx", "KsftFailEx", "KsftXfailEx",
+ "ksft_disruptive", "ksft_exit", "ksft_pr", "ksft_run",
+ "ksft_setup",
+ "ksft_eq", "ksft_ge", "ksft_in", "ksft_is", "ksft_lt",
+ "ksft_ne", "ksft_not_in", "ksft_raises", "ksft_true", "ksft_gt",
+ "ksft_not_none", "ksft_not_none"]
+
+ from .env import NetDrvEnv, NetDrvEpEnv
+ from .load import GenerateTraffic
+ from .remote import Remote
+
+ __all__ += ["NetDrvEnv", "NetDrvEpEnv", "GenerateTraffic", "Remote"]
except ModuleNotFoundError as e:
- ksft_pr("Failed importing `net` library from kernel sources")
- ksft_pr(str(e))
- ktap_result(True, comment="SKIP")
+ print("Failed importing `net` library from kernel sources")
+ print(str(e))
sys.exit(4)
-
-from .env import *
-from .load import *
-from .remote import Remote
diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
index 8e1085e89647..87f89fd92f8c 100644
--- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
+++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
@@ -11,9 +11,11 @@ set -euo pipefail
LIBDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
SRCIF="" # to be populated later
+SRCIP="" # to be populated later
SRCIP4="192.0.2.1"
SRCIP6="fc00::1"
DSTIF="" # to be populated later
+DSTIP="" # to be populated later
DSTIP4="192.0.2.2"
DSTIP6="fc00::2"
@@ -28,17 +30,23 @@ NETCONS_PATH="${NETCONS_CONFIGFS}"/"${TARGET}"
# NAMESPACE will be populated by setup_ns with a random value
NAMESPACE=""
-# IDs for netdevsim
+# IDs for netdevsim. We either use NSIM_DEV_{1,2}_ID for standard test
+# or NSIM_BOND_{T,R}X_{1,2} for the bonding tests. Not both at the
+# same time.
NSIM_DEV_1_ID=$((256 + RANDOM % 256))
NSIM_DEV_2_ID=$((512 + RANDOM % 256))
+NSIM_BOND_TX_1=$((768 + RANDOM % 256))
+NSIM_BOND_TX_2=$((1024 + RANDOM % 256))
+NSIM_BOND_RX_1=$((1280 + RANDOM % 256))
+NSIM_BOND_RX_2=$((1536 + RANDOM % 256))
NSIM_DEV_SYS_NEW="/sys/bus/netdevsim/new_device"
+NSIM_DEV_SYS_LINK="/sys/bus/netdevsim/link_device"
# Used to create and delete namespaces
source "${LIBDIR}"/../../../../net/lib.sh
# Create netdevsim interfaces
create_ifaces() {
-
echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_NEW"
echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_NEW"
udevadm settle 2> /dev/null || true
@@ -113,31 +121,38 @@ function set_network() {
configure_ip
}
-function create_dynamic_target() {
- local FORMAT=${1:-"extended"}
+function _create_dynamic_target() {
+ local FORMAT="${1:?FORMAT parameter required}"
+ local NCPATH="${2:?NCPATH parameter required}"
DSTMAC=$(ip netns exec "${NAMESPACE}" \
ip link show "${DSTIF}" | awk '/ether/ {print $2}')
# Create a dynamic target
- mkdir "${NETCONS_PATH}"
+ mkdir "${NCPATH}"
- echo "${DSTIP}" > "${NETCONS_PATH}"/remote_ip
- echo "${SRCIP}" > "${NETCONS_PATH}"/local_ip
- echo "${DSTMAC}" > "${NETCONS_PATH}"/remote_mac
- echo "${SRCIF}" > "${NETCONS_PATH}"/dev_name
+ echo "${DSTIP}" > "${NCPATH}"/remote_ip
+ echo "${SRCIP}" > "${NCPATH}"/local_ip
+ echo "${DSTMAC}" > "${NCPATH}"/remote_mac
+ echo "${SRCIF}" > "${NCPATH}"/dev_name
if [ "${FORMAT}" == "basic" ]
then
# Basic target does not support release
- echo 0 > "${NETCONS_PATH}"/release
- echo 0 > "${NETCONS_PATH}"/extended
+ echo 0 > "${NCPATH}"/release
+ echo 0 > "${NCPATH}"/extended
elif [ "${FORMAT}" == "extended" ]
then
- echo 1 > "${NETCONS_PATH}"/extended
+ echo 1 > "${NCPATH}"/extended
fi
+}
- echo 1 > "${NETCONS_PATH}"/enabled
+function create_dynamic_target() {
+ local FORMAT=${1:-"extended"}
+ local NCPATH=${2:-"$NETCONS_PATH"}
+ _create_dynamic_target "${FORMAT}" "${NCPATH}"
+
+ echo 1 > "${NCPATH}"/enabled
# This will make sure that the kernel was able to
# load the netconsole driver configuration. The console message
@@ -185,14 +200,26 @@ function do_cleanup() {
echo "${DEFAULT_PRINTK_VALUES}" > /proc/sys/kernel/printk
}
-function cleanup() {
+function cleanup_netcons() {
# delete netconsole dynamic reconfiguration
- echo 0 > "${NETCONS_PATH}"/enabled
+ # do not fail if the target is already disabled
+ if [[ ! -d "${NETCONS_PATH}" ]]
+ then
+ # in some cases this is called before netcons path is created
+ return
+ fi
+ if [[ $(cat "${NETCONS_PATH}"/enabled) != 0 ]]
+ then
+ echo 0 > "${NETCONS_PATH}"/enabled || true
+ fi
# Remove all the keys that got created during the selftest
find "${NETCONS_PATH}/userdata/" -mindepth 1 -type d -delete
# Remove the configfs entry
rmdir "${NETCONS_PATH}"
+}
+function cleanup() {
+ cleanup_netcons
do_cleanup
}
@@ -369,3 +396,24 @@ function wait_for_port() {
# more frequently on IPv6
sleep 1
}
+
+# Clean up netdevsim ifaces created for bonding test
+function cleanup_bond_nsim() {
+ ip -n "${TXNS}" \
+ link delete "${BOND_TX_MAIN_IF}" type bond || true
+ ip -n "${RXNS}" \
+ link delete "${BOND_RX_MAIN_IF}" type bond || true
+
+ cleanup_netdevsim "$NSIM_BOND_TX_1"
+ cleanup_netdevsim "$NSIM_BOND_TX_2"
+ cleanup_netdevsim "$NSIM_BOND_RX_1"
+ cleanup_netdevsim "$NSIM_BOND_RX_2"
+}
+
+# cleanup tests that use bonding interfaces
+function cleanup_bond() {
+ cleanup_netcons
+ cleanup_bond_nsim
+ cleanup_all_ns
+ ip link delete "${VETH0}" || true
+}
diff --git a/tools/testing/selftests/drivers/net/netcons_torture.sh b/tools/testing/selftests/drivers/net/netcons_torture.sh
new file mode 100755
index 000000000000..2ce9ee3719d1
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netcons_torture.sh
@@ -0,0 +1,130 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Repeatedly send kernel messages, toggles netconsole targets on and off,
+# creates and deletes targets in parallel, and toggles the source interface to
+# simulate stress conditions.
+#
+# This test aims to verify the robustness of netconsole under dynamic
+# configurations and concurrent operations.
+#
+# The major goal is to run this test with LOCKDEP, Kmemleak and KASAN to make
+# sure no issues is reported.
+#
+# Author: Breno Leitao <leitao@debian.org>
+
+set -euo pipefail
+
+SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
+
+# Number of times the main loop run
+ITERATIONS=${1:-150}
+
+# Only test extended format
+FORMAT="extended"
+# And ipv6 only
+IP_VERSION="ipv6"
+
+# Create, enable and delete some targets.
+create_and_delete_random_target() {
+ COUNT=2
+ RND_PREFIX=$(mktemp -u netcons_rnd_XXXX_)
+
+ if [ -d "${NETCONS_CONFIGFS}/${RND_PREFIX}${COUNT}" ] || \
+ [ -d "${NETCONS_CONFIGFS}/${RND_PREFIX}0" ]; then
+ echo "Function didn't finish yet, skipping it." >&2
+ return
+ fi
+
+ # enable COUNT targets
+ for i in $(seq ${COUNT})
+ do
+ RND_TARGET="${RND_PREFIX}"${i}
+ RND_TARGET_PATH="${NETCONS_CONFIGFS}"/"${RND_TARGET}"
+
+ # Basic population so the target can come up
+ _create_dynamic_target "${FORMAT}" "${RND_TARGET_PATH}"
+ done
+
+ echo "netconsole selftest: ${COUNT} additional targets were created" > /dev/kmsg
+ # disable them all
+ for i in $(seq ${COUNT})
+ do
+ RND_TARGET="${RND_PREFIX}"${i}
+ RND_TARGET_PATH="${NETCONS_CONFIGFS}"/"${RND_TARGET}"
+ if [[ $(cat "${RND_TARGET_PATH}/enabled") -eq 1 ]]
+ then
+ echo 0 > "${RND_TARGET_PATH}"/enabled
+ fi
+ rmdir "${RND_TARGET_PATH}"
+ done
+}
+
+# Disable and enable the target mid-air, while messages
+# are being transmitted.
+toggle_netcons_target() {
+ for i in $(seq 2)
+ do
+ if [ ! -d "${NETCONS_PATH}" ]
+ then
+ break
+ fi
+ echo 0 > "${NETCONS_PATH}"/enabled 2> /dev/null || true
+ # Try to enable a bit harder, given it might fail to enable
+ # Write to `enabled` might fail depending on the lock, which is
+ # highly contentious here
+ for _ in $(seq 5)
+ do
+ echo 1 > "${NETCONS_PATH}"/enabled 2> /dev/null || true
+ done
+ done
+}
+
+toggle_iface(){
+ ip link set "${SRCIF}" down
+ ip link set "${SRCIF}" up
+}
+
+# Start here
+
+modprobe netdevsim 2> /dev/null || true
+modprobe netconsole 2> /dev/null || true
+
+# Check for basic system dependency and exit if not found
+check_for_dependencies
+# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5)
+echo "6 5" > /proc/sys/kernel/printk
+# Remove the namespace, interfaces and netconsole target on exit
+trap cleanup EXIT
+# Create one namespace and two interfaces
+set_network "${IP_VERSION}"
+# Create a dynamic target for netconsole
+create_dynamic_target "${FORMAT}"
+
+for i in $(seq "$ITERATIONS")
+do
+ for _ in $(seq 10)
+ do
+ echo "${MSG}: ${TARGET} ${i}" > /dev/kmsg
+ done
+ wait
+
+ if (( i % 30 == 0 )); then
+ toggle_netcons_target &
+ fi
+
+ if (( i % 50 == 0 )); then
+ # create some targets, enable them, send msg and disable
+ # all in a parallel thread
+ create_and_delete_random_target &
+ fi
+
+ if (( i % 70 == 0 )); then
+ toggle_iface &
+ fi
+done
+wait
+
+exit "${EXIT_STATUS}"
diff --git a/tools/testing/selftests/drivers/net/netdevsim/Makefile b/tools/testing/selftests/drivers/net/netdevsim/Makefile
index 07b7c46d3311..df10c7243511 100644
--- a/tools/testing/selftests/drivers/net/netdevsim/Makefile
+++ b/tools/testing/selftests/drivers/net/netdevsim/Makefile
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0+ OR MIT
-TEST_PROGS = devlink.sh \
+TEST_PROGS := \
+ devlink.sh \
devlink_in_netns.sh \
devlink_trap.sh \
ethtool-coalesce.sh \
@@ -17,5 +18,10 @@ TEST_PROGS = devlink.sh \
psample.sh \
tc-mq-visibility.sh \
udp_tunnel_nic.sh \
+# end of TEST_PROGS
+
+TEST_FILES := \
+ ethtool-common.sh
+# end of TEST_FILES
include ../../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/team/Makefile b/tools/testing/selftests/drivers/net/team/Makefile
index 89d854c7e674..1340b3df9c31 100644
--- a/tools/testing/selftests/drivers/net/team/Makefile
+++ b/tools/testing/selftests/drivers/net/team/Makefile
@@ -1,13 +1,18 @@
# SPDX-License-Identifier: GPL-2.0
# Makefile for net selftests
-TEST_PROGS := dev_addr_lists.sh propagation.sh options.sh
+TEST_PROGS := \
+ dev_addr_lists.sh \
+ options.sh \
+ propagation.sh \
+# end of TEST_PROGS
TEST_INCLUDES := \
../bonding/lag_lib.sh \
../../../net/forwarding/lib.sh \
- ../../../net/lib.sh \
../../../net/in_netns.sh \
- ../../../net/lib/sh/defer.sh
+ ../../../net/lib.sh \
+ ../../../net/lib/sh/defer.sh \
+# end of TEST_INCLUDES
include ../../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/virtio_net/Makefile b/tools/testing/selftests/drivers/net/virtio_net/Makefile
index 7ec7cd3ab2cc..868ece3fea1f 100644
--- a/tools/testing/selftests/drivers/net/virtio_net/Makefile
+++ b/tools/testing/selftests/drivers/net/virtio_net/Makefile
@@ -1,15 +1,12 @@
# SPDX-License-Identifier: GPL-2.0+ OR MIT
-TEST_PROGS = basic_features.sh \
- #
+TEST_PROGS = basic_features.sh
-TEST_FILES = \
- virtio_net_common.sh \
- #
+TEST_FILES = virtio_net_common.sh
TEST_INCLUDES = \
- ../../../net/forwarding/lib.sh \
- ../../../net/lib.sh \
- #
+ ../../../net/forwarding/lib.sh \
+ ../../../net/lib.sh \
+# end of TEST_INCLUDES
include ../../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/xdp.py b/tools/testing/selftests/drivers/net/xdp.py
index 08fea4230759..a148004e1c36 100755
--- a/tools/testing/selftests/drivers/net/xdp.py
+++ b/tools/testing/selftests/drivers/net/xdp.py
@@ -11,8 +11,9 @@ import string
from dataclasses import dataclass
from enum import Enum
-from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ne, ksft_pr
-from lib.py import KsftFailEx, NetDrvEpEnv, EthtoolFamily, NlError
+from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge, ksft_ne, ksft_pr
+from lib.py import KsftFailEx, NetDrvEpEnv
+from lib.py import EthtoolFamily, NetdevFamily, NlError
from lib.py import bkg, cmd, rand_port, wait_port_listen
from lib.py import ip, bpftool, defer
@@ -541,11 +542,11 @@ def get_hds_thresh(cfg):
The HDS threshold value. If the threshold is not supported or an error occurs,
a default value of 1500 is returned.
"""
- netnl = cfg.netnl
+ ethnl = cfg.ethnl
hds_thresh = 1500
try:
- rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ rings = ethnl.rings_get({'header': {'dev-index': cfg.ifindex}})
if 'hds-thresh' not in rings:
ksft_pr(f'hds-thresh not supported. Using default: {hds_thresh}')
return hds_thresh
@@ -562,7 +563,7 @@ def _test_xdp_native_head_adjst(cfg, prog, pkt_sz_lst, offset_lst):
Args:
cfg: Configuration object containing network settings.
- netnl: Network namespace or link object (not used in this function).
+ ethnl: Network namespace or link object (not used in this function).
This function sets up the packet size and offset lists, then performs
the head adjustment test by sending and receiving UDP packets.
@@ -671,6 +672,88 @@ def test_xdp_native_adjst_head_shrnk_data(cfg):
_validate_res(res, offset_lst, pkt_sz_lst)
+def _test_xdp_native_ifc_stats(cfg, act):
+ cfg.require_cmd("socat")
+
+ bpf_info = BPFProgInfo("xdp_prog", "xdp_native.bpf.o", "xdp", 1500)
+ prog_info = _load_xdp_prog(cfg, bpf_info)
+ port = rand_port()
+
+ _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, act.value)
+ _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port)
+
+ # Discard the input, but we need a listener to avoid ICMP errors
+ rx_udp = f"socat -{cfg.addr_ipver} -T 2 -u UDP-RECV:{port},reuseport " + \
+ "/dev/null"
+ # Listener runs on "remote" in case of XDP_TX
+ rx_host = cfg.remote if act == XDPAction.TX else None
+ # We want to spew 2000 packets quickly, bash seems to do a good enough job
+ tx_udp = f"exec 5<>/dev/udp/{cfg.addr}/{port}; " \
+ "for i in `seq 2000`; do echo a >&5; done; exec 5>&-"
+
+ cfg.wait_hw_stats_settle()
+ # Qstats have more clearly defined semantics than rtnetlink.
+ # XDP is the "first layer of the stack" so XDP packets should be counted
+ # as received and sent as if the decision was made in the routing layer.
+ before = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
+
+ with bkg(rx_udp, host=rx_host, exit_wait=True):
+ wait_port_listen(port, proto="udp", host=rx_host)
+ cmd(tx_udp, host=cfg.remote, shell=True)
+
+ cfg.wait_hw_stats_settle()
+ after = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
+
+ ksft_ge(after['rx-packets'] - before['rx-packets'], 2000)
+ if act == XDPAction.TX:
+ ksft_ge(after['tx-packets'] - before['tx-packets'], 2000)
+
+ expected_pkts = 2000
+ stats = _get_stats(prog_info["maps"]["map_xdp_stats"])
+ ksft_eq(stats[XDPStats.RX.value], expected_pkts, "XDP RX stats mismatch")
+ if act == XDPAction.TX:
+ ksft_eq(stats[XDPStats.TX.value], expected_pkts, "XDP TX stats mismatch")
+
+ # Flip the ring count back and forth to make sure the stats from XDP rings
+ # don't get lost.
+ chans = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}})
+ if chans.get('combined-count', 0) > 1:
+ cfg.ethnl.channels_set({'header': {'dev-index': cfg.ifindex},
+ 'combined-count': 1})
+ cfg.ethnl.channels_set({'header': {'dev-index': cfg.ifindex},
+ 'combined-count': chans['combined-count']})
+ before = after
+ after = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
+
+ ksft_ge(after['rx-packets'], before['rx-packets'])
+ if act == XDPAction.TX:
+ ksft_ge(after['tx-packets'], before['tx-packets'])
+
+
+def test_xdp_native_qstats_pass(cfg):
+ """
+ Send 2000 messages, expect XDP_PASS, make sure the packets were counted
+ to interface level qstats (Rx).
+ """
+ _test_xdp_native_ifc_stats(cfg, XDPAction.PASS)
+
+
+def test_xdp_native_qstats_drop(cfg):
+ """
+ Send 2000 messages, expect XDP_DROP, make sure the packets were counted
+ to interface level qstats (Rx).
+ """
+ _test_xdp_native_ifc_stats(cfg, XDPAction.DROP)
+
+
+def test_xdp_native_qstats_tx(cfg):
+ """
+ Send 2000 messages, expect XDP_TX, make sure the packets were counted
+ to interface level qstats (Rx and Tx)
+ """
+ _test_xdp_native_ifc_stats(cfg, XDPAction.TX)
+
+
def main():
"""
Main function to execute the XDP tests.
@@ -681,7 +764,8 @@ def main():
function to execute the tests.
"""
with NetDrvEpEnv(__file__) as cfg:
- cfg.netnl = EthtoolFamily()
+ cfg.ethnl = EthtoolFamily()
+ cfg.netnl = NetdevFamily()
ksft_run(
[
test_xdp_native_pass_sb,
@@ -694,6 +778,9 @@ def main():
test_xdp_native_adjst_tail_shrnk_data,
test_xdp_native_adjst_head_grow_data,
test_xdp_native_adjst_head_shrnk_data,
+ test_xdp_native_qstats_pass,
+ test_xdp_native_qstats_drop,
+ test_xdp_native_qstats_tx,
],
args=(cfg,))
ksft_exit()
diff --git a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
index 81db85a5cc16..39a68078a79b 100644
--- a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
+++ b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
@@ -65,6 +65,7 @@ static int __do_binderfs_test(struct __test_metadata *_metadata)
"oneway_spam_detection",
"extended_error",
"freeze_notification",
+ "transaction_report",
};
change_mountns(_metadata);
diff --git a/tools/testing/selftests/filesystems/utils.c b/tools/testing/selftests/filesystems/utils.c
index c43a69dffd83..a0c64f415a7f 100644
--- a/tools/testing/selftests/filesystems/utils.c
+++ b/tools/testing/selftests/filesystems/utils.c
@@ -487,7 +487,7 @@ int setup_userns(void)
uid_t uid = getuid();
gid_t gid = getgid();
- ret = unshare(CLONE_NEWNS|CLONE_NEWUSER|CLONE_NEWPID);
+ ret = unshare(CLONE_NEWNS|CLONE_NEWUSER);
if (ret) {
ksft_exit_fail_msg("unsharing mountns and userns: %s\n",
strerror(errno));
diff --git a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc
index c62165fabd0c..cfa16aa1f39a 100644
--- a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc
+++ b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc
@@ -20,6 +20,10 @@ sample_events() {
echo 0 > tracing_on
echo 0 > events/enable
+# Clear functions caused by page cache; run sample_events twice
+sample_events
+sample_events
+
echo "Get the most frequently calling function"
echo > trace
sample_events
diff --git a/tools/testing/selftests/hid/hid_common.h b/tools/testing/selftests/hid/hid_common.h
index f77f69c6657d..8085519c47cb 100644
--- a/tools/testing/selftests/hid/hid_common.h
+++ b/tools/testing/selftests/hid/hid_common.h
@@ -230,6 +230,12 @@ static int uhid_event(struct __test_metadata *_metadata, int fd)
break;
case UHID_SET_REPORT:
UHID_LOG("UHID_SET_REPORT from uhid-dev");
+
+ answer.type = UHID_SET_REPORT_REPLY;
+ answer.u.set_report_reply.id = ev.u.set_report.id;
+ answer.u.set_report_reply.err = 0; /* success */
+
+ uhid_write(_metadata, fd, &answer);
break;
default:
TH_LOG("Invalid event from uhid-dev: %u", ev.type);
diff --git a/tools/testing/selftests/hid/hidraw.c b/tools/testing/selftests/hid/hidraw.c
index 821db37ba4bb..d625772f8b7c 100644
--- a/tools/testing/selftests/hid/hidraw.c
+++ b/tools/testing/selftests/hid/hidraw.c
@@ -2,6 +2,9 @@
/* Copyright (c) 2022-2024 Red Hat */
#include "hid_common.h"
+#include <linux/input.h>
+#include <string.h>
+#include <sys/ioctl.h>
/* for older kernels */
#ifndef HIDIOCREVOKE
@@ -215,6 +218,476 @@ TEST_F(hidraw, write_event_revoked)
pthread_mutex_unlock(&uhid_output_mtx);
}
+/*
+ * Test HIDIOCGRDESCSIZE ioctl to get report descriptor size
+ */
+TEST_F(hidraw, ioctl_rdescsize)
+{
+ int desc_size = 0;
+ int err;
+
+ /* call HIDIOCGRDESCSIZE ioctl */
+ err = ioctl(self->hidraw_fd, HIDIOCGRDESCSIZE, &desc_size);
+ ASSERT_EQ(err, 0) TH_LOG("HIDIOCGRDESCSIZE ioctl failed");
+
+ /* verify the size matches our test report descriptor */
+ ASSERT_EQ(desc_size, sizeof(rdesc))
+ TH_LOG("expected size %zu, got %d", sizeof(rdesc), desc_size);
+}
+
+/*
+ * Test HIDIOCGRDESC ioctl to get report descriptor data
+ */
+TEST_F(hidraw, ioctl_rdesc)
+{
+ struct hidraw_report_descriptor desc;
+ int err;
+
+ /* get the full report descriptor */
+ desc.size = sizeof(rdesc);
+ err = ioctl(self->hidraw_fd, HIDIOCGRDESC, &desc);
+ ASSERT_EQ(err, 0) TH_LOG("HIDIOCGRDESC ioctl failed");
+
+ /* verify the descriptor data matches our test descriptor */
+ ASSERT_EQ(memcmp(desc.value, rdesc, sizeof(rdesc)), 0)
+ TH_LOG("report descriptor data mismatch");
+}
+
+/*
+ * Test HIDIOCGRDESC ioctl with smaller buffer size
+ */
+TEST_F(hidraw, ioctl_rdesc_small_buffer)
+{
+ struct hidraw_report_descriptor desc;
+ int err;
+ size_t small_size = sizeof(rdesc) / 2; /* request half the descriptor size */
+
+ /* get partial report descriptor */
+ desc.size = small_size;
+ err = ioctl(self->hidraw_fd, HIDIOCGRDESC, &desc);
+ ASSERT_EQ(err, 0) TH_LOG("HIDIOCGRDESC ioctl failed with small buffer");
+
+ /* verify we got the first part of the descriptor */
+ ASSERT_EQ(memcmp(desc.value, rdesc, small_size), 0)
+ TH_LOG("partial report descriptor data mismatch");
+}
+
+/*
+ * Test HIDIOCGRAWINFO ioctl to get device information
+ */
+TEST_F(hidraw, ioctl_rawinfo)
+{
+ struct hidraw_devinfo devinfo;
+ int err;
+
+ /* get device info */
+ err = ioctl(self->hidraw_fd, HIDIOCGRAWINFO, &devinfo);
+ ASSERT_EQ(err, 0) TH_LOG("HIDIOCGRAWINFO ioctl failed");
+
+ /* verify device info matches our test setup */
+ ASSERT_EQ(devinfo.bustype, BUS_USB)
+ TH_LOG("expected bustype 0x03, got 0x%x", devinfo.bustype);
+ ASSERT_EQ(devinfo.vendor, 0x0001)
+ TH_LOG("expected vendor 0x0001, got 0x%x", devinfo.vendor);
+ ASSERT_EQ(devinfo.product, 0x0a37)
+ TH_LOG("expected product 0x0a37, got 0x%x", devinfo.product);
+}
+
+/*
+ * Test HIDIOCGFEATURE ioctl to get feature report
+ */
+TEST_F(hidraw, ioctl_gfeature)
+{
+ __u8 buf[10] = {0};
+ int err;
+
+ /* set report ID 1 in first byte */
+ buf[0] = 1;
+
+ /* get feature report */
+ err = ioctl(self->hidraw_fd, HIDIOCGFEATURE(sizeof(buf)), buf);
+ ASSERT_EQ(err, sizeof(feature_data)) TH_LOG("HIDIOCGFEATURE ioctl failed, got %d", err);
+
+ /* verify we got the expected feature data */
+ ASSERT_EQ(buf[0], feature_data[0])
+ TH_LOG("expected feature_data[0] = %d, got %d", feature_data[0], buf[0]);
+ ASSERT_EQ(buf[1], feature_data[1])
+ TH_LOG("expected feature_data[1] = %d, got %d", feature_data[1], buf[1]);
+}
+
+/*
+ * Test HIDIOCGFEATURE ioctl with invalid report ID
+ */
+TEST_F(hidraw, ioctl_gfeature_invalid)
+{
+ __u8 buf[10] = {0};
+ int err;
+
+ /* set invalid report ID (not 1) */
+ buf[0] = 2;
+
+ /* try to get feature report */
+ err = ioctl(self->hidraw_fd, HIDIOCGFEATURE(sizeof(buf)), buf);
+ ASSERT_LT(err, 0) TH_LOG("HIDIOCGFEATURE should have failed with invalid report ID");
+ ASSERT_EQ(errno, EIO) TH_LOG("expected EIO, got errno %d", errno);
+}
+
+/*
+ * Test ioctl with incorrect nr bits
+ */
+TEST_F(hidraw, ioctl_invalid_nr)
+{
+ char buf[256] = {0};
+ int err;
+ unsigned int bad_cmd;
+
+ /*
+ * craft an ioctl command with wrong _IOC_NR bits
+ */
+ bad_cmd = _IOC(_IOC_WRITE|_IOC_READ, 'H', 0x00, sizeof(buf)); /* 0 is not valid */
+
+ /* test the ioctl */
+ err = ioctl(self->hidraw_fd, bad_cmd, buf);
+ ASSERT_LT(err, 0) TH_LOG("ioctl read-write with wrong _IOC_NR (0) should have failed");
+ ASSERT_EQ(errno, ENOTTY)
+ TH_LOG("expected ENOTTY for wrong read-write _IOC_NR (0), got errno %d", errno);
+
+ /*
+ * craft an ioctl command with wrong _IOC_NR bits
+ */
+ bad_cmd = _IOC(_IOC_READ, 'H', 0x00, sizeof(buf)); /* 0 is not valid */
+
+ /* test the ioctl */
+ err = ioctl(self->hidraw_fd, bad_cmd, buf);
+ ASSERT_LT(err, 0) TH_LOG("ioctl read-only with wrong _IOC_NR (0) should have failed");
+ ASSERT_EQ(errno, ENOTTY)
+ TH_LOG("expected ENOTTY for wrong read-only _IOC_NR (0), got errno %d", errno);
+
+ /* also test with bigger number */
+ bad_cmd = _IOC(_IOC_READ, 'H', 0x42, sizeof(buf)); /* 0x42 is not valid as well */
+
+ err = ioctl(self->hidraw_fd, bad_cmd, buf);
+ ASSERT_LT(err, 0) TH_LOG("ioctl read-only with wrong _IOC_NR (0x42) should have failed");
+ ASSERT_EQ(errno, ENOTTY)
+ TH_LOG("expected ENOTTY for wrong read-only _IOC_NR (0x42), got errno %d", errno);
+
+ /* also test with bigger number: 0x42 is not valid as well */
+ bad_cmd = _IOC(_IOC_WRITE|_IOC_READ, 'H', 0x42, sizeof(buf));
+
+ err = ioctl(self->hidraw_fd, bad_cmd, buf);
+ ASSERT_LT(err, 0) TH_LOG("ioctl read-write with wrong _IOC_NR (0x42) should have failed");
+ ASSERT_EQ(errno, ENOTTY)
+ TH_LOG("expected ENOTTY for wrong read-write _IOC_NR (0x42), got errno %d", errno);
+}
+
+/*
+ * Test ioctl with incorrect type bits
+ */
+TEST_F(hidraw, ioctl_invalid_type)
+{
+ char buf[256] = {0};
+ int err;
+ unsigned int bad_cmd;
+
+ /*
+ * craft an ioctl command with wrong _IOC_TYPE bits
+ */
+ bad_cmd = _IOC(_IOC_WRITE|_IOC_READ, 'I', 0x01, sizeof(buf)); /* 'I' should be 'H' */
+
+ /* test the ioctl */
+ err = ioctl(self->hidraw_fd, bad_cmd, buf);
+ ASSERT_LT(err, 0) TH_LOG("ioctl with wrong _IOC_TYPE (I) should have failed");
+ ASSERT_EQ(errno, EINVAL) TH_LOG("expected EINVAL for wrong _IOC_NR, got errno %d", errno);
+}
+
+/*
+ * Test HIDIOCGFEATURE ioctl with incorrect _IOC_DIR bits
+ */
+TEST_F(hidraw, ioctl_gfeature_invalid_dir)
+{
+ __u8 buf[10] = {0};
+ int err;
+ unsigned int bad_cmd;
+
+ /* set report ID 1 in first byte */
+ buf[0] = 1;
+
+ /*
+ * craft an ioctl command with wrong _IOC_DIR bits
+ * HIDIOCGFEATURE should have _IOC_WRITE|_IOC_READ, let's use only _IOC_WRITE
+ */
+ bad_cmd = _IOC(_IOC_WRITE, 'H', 0x07, sizeof(buf)); /* should be _IOC_WRITE|_IOC_READ */
+
+ /* try to get feature report with wrong direction bits */
+ err = ioctl(self->hidraw_fd, bad_cmd, buf);
+ ASSERT_LT(err, 0) TH_LOG("HIDIOCGFEATURE with wrong _IOC_DIR should have failed");
+ ASSERT_EQ(errno, EINVAL) TH_LOG("expected EINVAL for wrong _IOC_DIR, got errno %d", errno);
+
+ /* also test with only _IOC_READ */
+ bad_cmd = _IOC(_IOC_READ, 'H', 0x07, sizeof(buf)); /* should be _IOC_WRITE|_IOC_READ */
+
+ err = ioctl(self->hidraw_fd, bad_cmd, buf);
+ ASSERT_LT(err, 0) TH_LOG("HIDIOCGFEATURE with wrong _IOC_DIR should have failed");
+ ASSERT_EQ(errno, EINVAL) TH_LOG("expected EINVAL for wrong _IOC_DIR, got errno %d", errno);
+}
+
+/*
+ * Test read-only ioctl with incorrect _IOC_DIR bits
+ */
+TEST_F(hidraw, ioctl_readonly_invalid_dir)
+{
+ char buf[256] = {0};
+ int err;
+ unsigned int bad_cmd;
+
+ /*
+ * craft an ioctl command with wrong _IOC_DIR bits
+ * HIDIOCGRAWNAME should have _IOC_READ, let's use _IOC_WRITE
+ */
+ bad_cmd = _IOC(_IOC_WRITE, 'H', 0x04, sizeof(buf)); /* should be _IOC_READ */
+
+ /* try to get device name with wrong direction bits */
+ err = ioctl(self->hidraw_fd, bad_cmd, buf);
+ ASSERT_LT(err, 0) TH_LOG("HIDIOCGRAWNAME with wrong _IOC_DIR should have failed");
+ ASSERT_EQ(errno, EINVAL) TH_LOG("expected EINVAL for wrong _IOC_DIR, got errno %d", errno);
+
+ /* also test with _IOC_WRITE|_IOC_READ */
+ bad_cmd = _IOC(_IOC_WRITE|_IOC_READ, 'H', 0x04, sizeof(buf)); /* should be only _IOC_READ */
+
+ err = ioctl(self->hidraw_fd, bad_cmd, buf);
+ ASSERT_LT(err, 0) TH_LOG("HIDIOCGRAWNAME with wrong _IOC_DIR should have failed");
+ ASSERT_EQ(errno, EINVAL) TH_LOG("expected EINVAL for wrong _IOC_DIR, got errno %d", errno);
+}
+
+/*
+ * Test HIDIOCSFEATURE ioctl to set feature report
+ */
+TEST_F(hidraw, ioctl_sfeature)
+{
+ __u8 buf[10] = {0};
+ int err;
+
+ /* prepare feature report data */
+ buf[0] = 1; /* report ID */
+ buf[1] = 0x42;
+ buf[2] = 0x24;
+
+ /* set feature report */
+ err = ioctl(self->hidraw_fd, HIDIOCSFEATURE(3), buf);
+ ASSERT_EQ(err, 3) TH_LOG("HIDIOCSFEATURE ioctl failed, got %d", err);
+
+ /*
+ * Note: The uhid mock doesn't validate the set report data,
+ * so we just verify the ioctl succeeds
+ */
+}
+
+/*
+ * Test HIDIOCGINPUT ioctl to get input report
+ */
+TEST_F(hidraw, ioctl_ginput)
+{
+ __u8 buf[10] = {0};
+ int err;
+
+ /* set report ID 1 in first byte */
+ buf[0] = 1;
+
+ /* get input report */
+ err = ioctl(self->hidraw_fd, HIDIOCGINPUT(sizeof(buf)), buf);
+ ASSERT_EQ(err, sizeof(feature_data)) TH_LOG("HIDIOCGINPUT ioctl failed, got %d", err);
+
+ /* verify we got the expected input data */
+ ASSERT_EQ(buf[0], feature_data[0])
+ TH_LOG("expected feature_data[0] = %d, got %d", feature_data[0], buf[0]);
+ ASSERT_EQ(buf[1], feature_data[1])
+ TH_LOG("expected feature_data[1] = %d, got %d", feature_data[1], buf[1]);
+}
+
+/*
+ * Test HIDIOCGINPUT ioctl with invalid report ID
+ */
+TEST_F(hidraw, ioctl_ginput_invalid)
+{
+ __u8 buf[10] = {0};
+ int err;
+
+ /* set invalid report ID (not 1) */
+ buf[0] = 2;
+
+ /* try to get input report */
+ err = ioctl(self->hidraw_fd, HIDIOCGINPUT(sizeof(buf)), buf);
+ ASSERT_LT(err, 0) TH_LOG("HIDIOCGINPUT should have failed with invalid report ID");
+ ASSERT_EQ(errno, EIO) TH_LOG("expected EIO, got errno %d", errno);
+}
+
+/*
+ * Test HIDIOCSINPUT ioctl to set input report
+ */
+TEST_F(hidraw, ioctl_sinput)
+{
+ __u8 buf[10] = {0};
+ int err;
+
+ /* prepare input report data */
+ buf[0] = 1; /* report ID */
+ buf[1] = 0x55;
+ buf[2] = 0xAA;
+
+ /* set input report */
+ err = ioctl(self->hidraw_fd, HIDIOCSINPUT(3), buf);
+ ASSERT_EQ(err, 3) TH_LOG("HIDIOCSINPUT ioctl failed, got %d", err);
+
+ /*
+ * Note: The uhid mock doesn't validate the set report data,
+ * so we just verify the ioctl succeeds
+ */
+}
+
+/*
+ * Test HIDIOCGOUTPUT ioctl to get output report
+ */
+TEST_F(hidraw, ioctl_goutput)
+{
+ __u8 buf[10] = {0};
+ int err;
+
+ /* set report ID 1 in first byte */
+ buf[0] = 1;
+
+ /* get output report */
+ err = ioctl(self->hidraw_fd, HIDIOCGOUTPUT(sizeof(buf)), buf);
+ ASSERT_EQ(err, sizeof(feature_data)) TH_LOG("HIDIOCGOUTPUT ioctl failed, got %d", err);
+
+ /* verify we got the expected output data */
+ ASSERT_EQ(buf[0], feature_data[0])
+ TH_LOG("expected feature_data[0] = %d, got %d", feature_data[0], buf[0]);
+ ASSERT_EQ(buf[1], feature_data[1])
+ TH_LOG("expected feature_data[1] = %d, got %d", feature_data[1], buf[1]);
+}
+
+/*
+ * Test HIDIOCGOUTPUT ioctl with invalid report ID
+ */
+TEST_F(hidraw, ioctl_goutput_invalid)
+{
+ __u8 buf[10] = {0};
+ int err;
+
+ /* set invalid report ID (not 1) */
+ buf[0] = 2;
+
+ /* try to get output report */
+ err = ioctl(self->hidraw_fd, HIDIOCGOUTPUT(sizeof(buf)), buf);
+ ASSERT_LT(err, 0) TH_LOG("HIDIOCGOUTPUT should have failed with invalid report ID");
+ ASSERT_EQ(errno, EIO) TH_LOG("expected EIO, got errno %d", errno);
+}
+
+/*
+ * Test HIDIOCSOUTPUT ioctl to set output report
+ */
+TEST_F(hidraw, ioctl_soutput)
+{
+ __u8 buf[10] = {0};
+ int err;
+
+ /* prepare output report data */
+ buf[0] = 1; /* report ID */
+ buf[1] = 0x33;
+ buf[2] = 0xCC;
+
+ /* set output report */
+ err = ioctl(self->hidraw_fd, HIDIOCSOUTPUT(3), buf);
+ ASSERT_EQ(err, 3) TH_LOG("HIDIOCSOUTPUT ioctl failed, got %d", err);
+
+ /*
+ * Note: The uhid mock doesn't validate the set report data,
+ * so we just verify the ioctl succeeds
+ */
+}
+
+/*
+ * Test HIDIOCGRAWNAME ioctl to get device name string
+ */
+TEST_F(hidraw, ioctl_rawname)
+{
+ char name[256] = {0};
+ char expected_name[64];
+ int err;
+
+ /* get device name */
+ err = ioctl(self->hidraw_fd, HIDIOCGRAWNAME(sizeof(name)), name);
+ ASSERT_GT(err, 0) TH_LOG("HIDIOCGRAWNAME ioctl failed, got %d", err);
+
+ /* construct expected name based on device id */
+ snprintf(expected_name, sizeof(expected_name), "test-uhid-device-%d", self->hid.dev_id);
+
+ /* verify the name matches expected pattern */
+ ASSERT_EQ(strcmp(name, expected_name), 0)
+ TH_LOG("expected name '%s', got '%s'", expected_name, name);
+}
+
+/*
+ * Test HIDIOCGRAWPHYS ioctl to get device physical address string
+ */
+TEST_F(hidraw, ioctl_rawphys)
+{
+ char phys[256] = {0};
+ char expected_phys[64];
+ int err;
+
+ /* get device physical address */
+ err = ioctl(self->hidraw_fd, HIDIOCGRAWPHYS(sizeof(phys)), phys);
+ ASSERT_GT(err, 0) TH_LOG("HIDIOCGRAWPHYS ioctl failed, got %d", err);
+
+ /* construct expected phys based on device id */
+ snprintf(expected_phys, sizeof(expected_phys), "%d", self->hid.dev_id);
+
+ /* verify the phys matches expected value */
+ ASSERT_EQ(strcmp(phys, expected_phys), 0)
+ TH_LOG("expected phys '%s', got '%s'", expected_phys, phys);
+}
+
+/*
+ * Test HIDIOCGRAWUNIQ ioctl to get device unique identifier string
+ */
+TEST_F(hidraw, ioctl_rawuniq)
+{
+ char uniq[256] = {0};
+ int err;
+
+ /* get device unique identifier */
+ err = ioctl(self->hidraw_fd, HIDIOCGRAWUNIQ(sizeof(uniq)), uniq);
+ ASSERT_GE(err, 0) TH_LOG("HIDIOCGRAWUNIQ ioctl failed, got %d", err);
+
+ /* uniq is typically empty in our test setup */
+ ASSERT_EQ(strlen(uniq), 0) TH_LOG("expected empty uniq, got '%s'", uniq);
+}
+
+/*
+ * Test device string ioctls with small buffer sizes
+ */
+TEST_F(hidraw, ioctl_strings_small_buffer)
+{
+ char small_buf[8] = {0};
+ char expected_name[64];
+ int err;
+
+ /* test HIDIOCGRAWNAME with small buffer */
+ err = ioctl(self->hidraw_fd, HIDIOCGRAWNAME(sizeof(small_buf)), small_buf);
+ ASSERT_EQ(err, sizeof(small_buf))
+ TH_LOG("HIDIOCGRAWNAME with small buffer failed, got %d", err);
+
+ /* construct expected truncated name */
+ snprintf(expected_name, sizeof(expected_name), "test-uhid-device-%d", self->hid.dev_id);
+
+ /* verify we got truncated name (first 8 chars, no null terminator guaranteed) */
+ ASSERT_EQ(strncmp(small_buf, expected_name, sizeof(small_buf)), 0)
+ TH_LOG("expected truncated name to match first %zu chars", sizeof(small_buf));
+
+ /* Note: hidraw driver doesn't guarantee null termination when buffer is too small */
+}
+
int main(int argc, char **argv)
{
return test_harness_run(argc, argv);
diff --git a/tools/testing/selftests/hid/tests/test_multitouch.py b/tools/testing/selftests/hid/tests/test_multitouch.py
index 5d2ffa3d5977..ece0ba8e7d34 100644
--- a/tools/testing/selftests/hid/tests/test_multitouch.py
+++ b/tools/testing/selftests/hid/tests/test_multitouch.py
@@ -1752,6 +1752,52 @@ class TestWin8TSConfidence(BaseTest.TestWin8Multitouch):
assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+ @pytest.mark.skip_if_uhdev(
+ lambda uhdev: "Confidence" not in uhdev.fields,
+ "Device not compatible, missing Confidence usage",
+ )
+ def test_mt_confidence_bad_multi_release(self):
+ """Check for the sticky finger being properly detected.
+
+ We first inject 3 fingers, then release only the second.
+ After 100 ms, we should receive a generated event about the
+ 2 missing fingers being released.
+ """
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ # send 3 touches
+ t0 = Touch(1, 50, 10)
+ t1 = Touch(2, 150, 100)
+ t2 = Touch(3, 250, 200)
+ r = uhdev.event([t0, t1, t2])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+
+ # release the second
+ t1.tipswitch = False
+ r = uhdev.event([t1])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+
+ # only the second is released
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] != -1
+ assert evdev.slots[1][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+ assert evdev.slots[2][libevdev.EV_ABS.ABS_MT_TRACKING_ID] != -1
+
+ # wait for the timer to kick in
+ time.sleep(0.2)
+
+ events = uhdev.next_sync_events()
+ self.debug_reports([], uhdev, events)
+
+ # now all 3 fingers are released
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 0) in events
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+ assert evdev.slots[1][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+ assert evdev.slots[2][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+
+
class TestElanXPS9360(BaseTest.TestWin8Multitouch):
def create_device(self):
return Digitizer(
@@ -2086,3 +2132,12 @@ class Testsynaptics_06cb_ce08(BaseTest.TestPTP):
input_info=(BusType.I2C, 0x06CB, 0xCE08),
rdesc="05 01 09 02 a1 01 85 02 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 01 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0 05 01 09 02 a1 01 85 18 09 01 a1 00 05 09 19 01 29 03 46 00 00 15 00 25 01 75 01 95 03 81 02 95 05 81 01 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0 06 00 ff 09 02 a1 01 85 20 09 01 a1 00 09 03 15 00 26 ff 00 35 00 46 ff 00 75 08 95 05 81 02 c0 c0 05 0d 09 05 a1 01 85 03 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 f8 04 75 10 55 0e 65 11 09 30 35 00 46 24 04 95 01 81 02 46 30 02 26 a0 02 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 f8 04 75 10 55 0e 65 11 09 30 35 00 46 24 04 95 01 81 02 46 30 02 26 a0 02 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 f8 04 75 10 55 0e 65 11 09 30 35 00 46 24 04 95 01 81 02 46 30 02 26 a0 02 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 f8 04 75 10 55 0e 65 11 09 30 35 00 46 24 04 95 01 81 02 46 30 02 26 a0 02 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 f8 04 75 10 55 0e 65 11 09 30 35 00 46 24 04 95 01 81 02 46 30 02 26 a0 02 09 31 81 02 c0 05 0d 55 0c 66 01 10 47 ff ff 00 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 09 54 25 7f 95 01 75 08 81 02 05 09 09 01 25 01 75 01 95 01 81 02 95 07 81 03 05 0d 85 08 09 55 09 59 75 04 95 02 25 0f b1 02 85 0d 09 60 75 01 95 01 15 00 25 01 b1 02 95 07 b1 03 85 07 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 0d 09 0e a1 01 85 04 09 22 a1 02 09 52 15 00 25 0a 75 08 95 01 b1 02 c0 09 22 a1 00 85 06 09 57 09 58 75 01 95 02 25 01 b1 02 95 06 b1 03 c0 c0 06 00 ff 09 01 a1 01 85 09 09 02 15 00 26 ff 00 75 08 95 14 91 02 85 0a 09 03 15 00 26 ff 00 75 08 95 14 91 02 85 0b 09 04 15 00 26 ff 00 75 08 95 45 81 02 85 0c 09 05 15 00 26 ff 00 75 08 95 45 81 02 85 0f 09 06 15 00 26 ff 00 75 08 95 03 b1 02 85 0e 09 07 15 00 26 ff 00 75 08 95 01 b1 02 c0",
)
+
+class Testsynaptics_06cb_ce26(TestWin8TSConfidence):
+ def create_device(self):
+ return PTP(
+ "uhid test synaptics_06cb_ce26",
+ max_contacts=5,
+ input_info=(BusType.I2C, 0x06CB, 0xCE26),
+ rdesc="05 01 09 02 a1 01 85 02 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 01 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0 05 0d 09 05 a1 01 85 03 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 45 05 75 10 55 0e 65 11 09 30 35 00 46 64 04 95 01 81 02 46 a2 02 26 29 03 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 45 05 75 10 55 0e 65 11 09 30 35 00 46 64 04 95 01 81 02 46 a2 02 26 29 03 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 45 05 75 10 55 0e 65 11 09 30 35 00 46 64 04 95 01 81 02 46 a2 02 26 29 03 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 45 05 75 10 55 0e 65 11 09 30 35 00 46 64 04 95 01 81 02 46 a2 02 26 29 03 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 45 05 75 10 55 0e 65 11 09 30 35 00 46 64 04 95 01 81 02 46 a2 02 26 29 03 09 31 81 02 c0 05 0d 55 0c 66 01 10 47 ff ff 00 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 09 54 25 7f 95 01 75 08 81 02 05 09 09 01 25 01 75 01 95 01 81 02 95 07 81 03 05 0d 85 08 09 55 09 59 75 04 95 02 25 0f b1 02 85 0d 09 60 75 01 95 01 15 00 25 01 b1 02 95 07 b1 03 85 07 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 0d 09 0e a1 01 85 04 09 22 a1 02 09 52 15 00 25 0a 75 08 95 01 b1 02 c0 09 22 a1 00 85 06 09 57 09 58 75 01 95 02 25 01 b1 02 95 06 b1 03 c0 c0 06 00 ff 09 01 a1 01 85 09 09 02 15 00 26 ff 00 75 08 95 14 91 02 85 0a 09 03 15 00 26 ff 00 75 08 95 14 91 02 85 0b 09 04 15 00 26 ff 00 75 08 95 3d 81 02 85 0c 09 05 15 00 26 ff 00 75 08 95 3d 81 02 85 0f 09 06 15 00 26 ff 00 75 08 95 03 b1 02 85 0e 09 07 15 00 26 ff 00 75 08 95 01 b1 02 c0",
+ )
diff --git a/tools/testing/selftests/hid/vmtest.sh b/tools/testing/selftests/hid/vmtest.sh
index db534e9099a8..ecbd57f775a0 100755
--- a/tools/testing/selftests/hid/vmtest.sh
+++ b/tools/testing/selftests/hid/vmtest.sh
@@ -1,296 +1,474 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2025 Red Hat
+# Copyright (c) 2025 Meta Platforms, Inc. and affiliates
+#
+# Dependencies:
+# * virtme-ng
+# * busybox-static (used by virtme-ng)
+# * qemu (used by virtme-ng)
+
+readonly SCRIPT_DIR="$(cd -P -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)"
+readonly KERNEL_CHECKOUT=$(realpath "${SCRIPT_DIR}"/../../../../)
+
+source "${SCRIPT_DIR}"/../kselftest/ktap_helpers.sh
+
+readonly HID_BPF_TEST="${SCRIPT_DIR}"/hid_bpf
+readonly HIDRAW_TEST="${SCRIPT_DIR}"/hidraw
+readonly HID_BPF_PROGS="${KERNEL_CHECKOUT}/drivers/hid/bpf/progs"
+readonly SSH_GUEST_PORT=22
+readonly WAIT_PERIOD=3
+readonly WAIT_PERIOD_MAX=60
+readonly WAIT_TOTAL=$(( WAIT_PERIOD * WAIT_PERIOD_MAX ))
+readonly QEMU_PIDFILE=$(mktemp /tmp/qemu_hid_vmtest_XXXX.pid)
+
+readonly QEMU_OPTS="\
+ --pidfile ${QEMU_PIDFILE} \
+"
+readonly KERNEL_CMDLINE=""
+readonly LOG=$(mktemp /tmp/hid_vmtest_XXXX.log)
+readonly TEST_NAMES=(vm_hid_bpf vm_hidraw vm_pytest)
+readonly TEST_DESCS=(
+ "Run hid_bpf tests in the VM."
+ "Run hidraw tests in the VM."
+ "Run the hid-tools test-suite in the VM."
+)
+
+VERBOSE=0
+SHELL_MODE=0
+BUILD_HOST=""
+BUILD_HOST_PODMAN_CONTAINER_NAME=""
+
+usage() {
+ local name
+ local desc
+ local i
+
+ echo
+ echo "$0 [OPTIONS] [TEST]... [-- tests-args]"
+ echo "If no TEST argument is given, all tests will be run."
+ echo
+ echo "Options"
+ echo " -b: build the kernel from the current source tree and use it for guest VMs"
+ echo " -H: hostname for remote build host (used with -b)"
+ echo " -p: podman container name for remote build host (used with -b)"
+ echo " Example: -H beefyserver -p vng"
+ echo " -q: set the path to or name of qemu binary"
+ echo " -s: start a shell in the VM instead of running tests"
+ echo " -v: more verbose output (can be repeated multiple times)"
+ echo
+ echo "Available tests"
+
+ for ((i = 0; i < ${#TEST_NAMES[@]}; i++)); do
+ name=${TEST_NAMES[${i}]}
+ desc=${TEST_DESCS[${i}]}
+ printf "\t%-35s%-35s\n" "${name}" "${desc}"
+ done
+ echo
-set -u
-set -e
-
-# This script currently only works for x86_64
-ARCH="$(uname -m)"
-case "${ARCH}" in
-x86_64)
- QEMU_BINARY=qemu-system-x86_64
- BZIMAGE="arch/x86/boot/bzImage"
- ;;
-*)
- echo "Unsupported architecture"
exit 1
- ;;
-esac
-SCRIPT_DIR="$(dirname $(realpath $0))"
-OUTPUT_DIR="$SCRIPT_DIR/results"
-KCONFIG_REL_PATHS=("${SCRIPT_DIR}/config" "${SCRIPT_DIR}/config.common" "${SCRIPT_DIR}/config.${ARCH}")
-B2C_URL="https://gitlab.freedesktop.org/gfx-ci/boot2container/-/raw/main/vm2c.py"
-NUM_COMPILE_JOBS="$(nproc)"
-LOG_FILE_BASE="$(date +"hid_selftests.%Y-%m-%d_%H-%M-%S")"
-LOG_FILE="${LOG_FILE_BASE}.log"
-EXIT_STATUS_FILE="${LOG_FILE_BASE}.exit_status"
-CONTAINER_IMAGE="registry.freedesktop.org/bentiss/hid/fedora/39:2023-11-22.1"
-
-TARGETS="${TARGETS:=$(basename ${SCRIPT_DIR})}"
-DEFAULT_COMMAND="pip3 install hid-tools; make -C tools/testing/selftests TARGETS=${TARGETS} run_tests"
-
-usage()
-{
- cat <<EOF
-Usage: $0 [-j N] [-s] [-b] [-d <output_dir>] -- [<command>]
-
-<command> is the command you would normally run when you are in
-the source kernel direcory. e.g:
-
- $0 -- ./tools/testing/selftests/hid/hid_bpf
-
-If no command is specified and a debug shell (-s) is not requested,
-"${DEFAULT_COMMAND}" will be run by default.
-
-If you build your kernel using KBUILD_OUTPUT= or O= options, these
-can be passed as environment variables to the script:
-
- O=<kernel_build_path> $0 -- ./tools/testing/selftests/hid/hid_bpf
-
-or
-
- KBUILD_OUTPUT=<kernel_build_path> $0 -- ./tools/testing/selftests/hid/hid_bpf
-
-Options:
-
- -u) Update the boot2container script to a newer version.
- -d) Update the output directory (default: ${OUTPUT_DIR})
- -b) Run only the build steps for the kernel and the selftests
- -j) Number of jobs for compilation, similar to -j in make
- (default: ${NUM_COMPILE_JOBS})
- -s) Instead of powering off the VM, start an interactive
- shell. If <command> is specified, the shell runs after
- the command finishes executing
-EOF
}
-download()
-{
- local file="$1"
+die() {
+ echo "$*" >&2
+ exit "${KSFT_FAIL}"
+}
- echo "Downloading $file..." >&2
- curl -Lsf "$file" -o "${@:2}"
+vm_ssh() {
+ # vng --ssh-client keeps shouting "Warning: Permanently added 'virtme-ng%22'
+ # (ED25519) to the list of known hosts.",
+ # So replace the command with what's actually called and add the "-q" option
+ stdbuf -oL ssh -q \
+ -F ${HOME}/.cache/virtme-ng/.ssh/virtme-ng-ssh.conf \
+ -l root virtme-ng%${SSH_GUEST_PORT} \
+ "$@"
+ return $?
}
-recompile_kernel()
-{
- local kernel_checkout="$1"
- local make_command="$2"
+cleanup() {
+ if [[ -s "${QEMU_PIDFILE}" ]]; then
+ pkill -SIGTERM -F "${QEMU_PIDFILE}" > /dev/null 2>&1
+ fi
- cd "${kernel_checkout}"
+ # If failure occurred during or before qemu start up, then we need
+ # to clean this up ourselves.
+ if [[ -e "${QEMU_PIDFILE}" ]]; then
+ rm "${QEMU_PIDFILE}"
+ fi
+}
+
+check_args() {
+ local found
- ${make_command} olddefconfig
- ${make_command} headers
- ${make_command}
+ for arg in "$@"; do
+ found=0
+ for name in "${TEST_NAMES[@]}"; do
+ if [[ "${name}" = "${arg}" ]]; then
+ found=1
+ break
+ fi
+ done
+
+ if [[ "${found}" -eq 0 ]]; then
+ echo "${arg} is not an available test" >&2
+ usage
+ fi
+ done
+
+ for arg in "$@"; do
+ if ! command -v > /dev/null "test_${arg}"; then
+ echo "Test ${arg} not found" >&2
+ usage
+ fi
+ done
+}
+
+check_deps() {
+ for dep in vng ${QEMU} busybox pkill ssh pytest; do
+ if [[ ! -x $(command -v "${dep}") ]]; then
+ echo -e "skip: dependency ${dep} not found!\n"
+ exit "${KSFT_SKIP}"
+ fi
+ done
+
+ if [[ ! -x $(command -v "${HID_BPF_TEST}") ]]; then
+ printf "skip: %s not found!" "${HID_BPF_TEST}"
+ printf " Please build the kselftest hid_bpf target.\n"
+ exit "${KSFT_SKIP}"
+ fi
+
+ if [[ ! -x $(command -v "${HIDRAW_TEST}") ]]; then
+ printf "skip: %s not found!" "${HIDRAW_TEST}"
+ printf " Please build the kselftest hidraw target.\n"
+ exit "${KSFT_SKIP}"
+ fi
}
-update_selftests()
-{
- local kernel_checkout="$1"
- local selftests_dir="${kernel_checkout}/tools/testing/selftests/hid"
+check_vng() {
+ local tested_versions
+ local version
+ local ok
- cd "${selftests_dir}"
- ${make_command}
+ tested_versions=("1.36" "1.37")
+ version="$(vng --version)"
+
+ ok=0
+ for tv in "${tested_versions[@]}"; do
+ if [[ "${version}" == *"${tv}"* ]]; then
+ ok=1
+ break
+ fi
+ done
+
+ if [[ ! "${ok}" -eq 1 ]]; then
+ printf "warning: vng version '%s' has not been tested and may " "${version}" >&2
+ printf "not function properly.\n\tThe following versions have been tested: " >&2
+ echo "${tested_versions[@]}" >&2
+ fi
}
-run_vm()
-{
- local run_dir="$1"
- local b2c="$2"
- local kernel_bzimage="$3"
- local command="$4"
- local post_command=""
-
- cd "${run_dir}"
-
- if ! which "${QEMU_BINARY}" &> /dev/null; then
- cat <<EOF
-Could not find ${QEMU_BINARY}
-Please install qemu or set the QEMU_BINARY environment variable.
-EOF
+handle_build() {
+ if [[ ! "${BUILD}" -eq 1 ]]; then
+ return
+ fi
+
+ if [[ ! -d "${KERNEL_CHECKOUT}" ]]; then
+ echo "-b requires vmtest.sh called from the kernel source tree" >&2
exit 1
fi
- # alpine (used in post-container requires the PATH to have /bin
- export PATH=$PATH:/bin
+ pushd "${KERNEL_CHECKOUT}" &>/dev/null
- if [[ "${debug_shell}" != "yes" ]]
- then
- touch ${OUTPUT_DIR}/${LOG_FILE}
- command="mount bpffs -t bpf /sys/fs/bpf/; set -o pipefail ; ${command} 2>&1 | tee ${OUTPUT_DIR}/${LOG_FILE}"
- post_command="cat ${OUTPUT_DIR}/${LOG_FILE}"
- else
- command="mount bpffs -t bpf /sys/fs/bpf/; ${command}"
+ if ! vng --kconfig --config "${SCRIPT_DIR}"/config; then
+ die "failed to generate .config for kernel source tree (${KERNEL_CHECKOUT})"
fi
- set +e
- $b2c --command "${command}" \
- --kernel ${kernel_bzimage} \
- --workdir ${OUTPUT_DIR} \
- --image ${CONTAINER_IMAGE}
+ local vng_args=("-v" "--config" "${SCRIPT_DIR}/config" "--build")
- echo $? > ${OUTPUT_DIR}/${EXIT_STATUS_FILE}
+ if [[ -n "${BUILD_HOST}" ]]; then
+ vng_args+=("--build-host" "${BUILD_HOST}")
+ fi
- set -e
+ if [[ -n "${BUILD_HOST_PODMAN_CONTAINER_NAME}" ]]; then
+ vng_args+=("--build-host-exec-prefix" \
+ "podman exec -ti ${BUILD_HOST_PODMAN_CONTAINER_NAME}")
+ fi
- ${post_command}
-}
+ if ! vng "${vng_args[@]}"; then
+ die "failed to build kernel from source tree (${KERNEL_CHECKOUT})"
+ fi
-is_rel_path()
-{
- local path="$1"
+ if ! make -j$(nproc) -C "${HID_BPF_PROGS}"; then
+ die "failed to build HID bpf objects from source tree (${HID_BPF_PROGS})"
+ fi
- [[ ${path:0:1} != "/" ]]
+ if ! make -j$(nproc) -C "${SCRIPT_DIR}"; then
+ die "failed to build HID selftests from source tree (${SCRIPT_DIR})"
+ fi
+
+ popd &>/dev/null
}
-do_update_kconfig()
-{
- local kernel_checkout="$1"
- local kconfig_file="$2"
+vm_start() {
+ local logfile=/dev/null
+ local verbose_opt=""
+ local kernel_opt=""
+ local qemu
- rm -f "$kconfig_file" 2> /dev/null
+ qemu=$(command -v "${QEMU}")
- for config in "${KCONFIG_REL_PATHS[@]}"; do
- local kconfig_src="${config}"
- cat "$kconfig_src" >> "$kconfig_file"
- done
-}
+ if [[ "${VERBOSE}" -eq 2 ]]; then
+ verbose_opt="--verbose"
+ logfile=/dev/stdout
+ fi
-update_kconfig()
-{
- local kernel_checkout="$1"
- local kconfig_file="$2"
-
- if [[ -f "${kconfig_file}" ]]; then
- local local_modified="$(stat -c %Y "${kconfig_file}")"
-
- for config in "${KCONFIG_REL_PATHS[@]}"; do
- local kconfig_src="${config}"
- local src_modified="$(stat -c %Y "${kconfig_src}")"
- # Only update the config if it has been updated after the
- # previously cached config was created. This avoids
- # unnecessarily compiling the kernel and selftests.
- if [[ "${src_modified}" -gt "${local_modified}" ]]; then
- do_update_kconfig "$kernel_checkout" "$kconfig_file"
- # Once we have found one outdated configuration
- # there is no need to check other ones.
- break
- fi
- done
- else
- do_update_kconfig "$kernel_checkout" "$kconfig_file"
+ # If we are running from within the kernel source tree, use the kernel source tree
+ # as the kernel to boot, otherwise use the currently running kernel.
+ if [[ "$(realpath "$(pwd)")" == "${KERNEL_CHECKOUT}"* ]]; then
+ kernel_opt="${KERNEL_CHECKOUT}"
fi
-}
-main()
-{
- local script_dir="$(cd -P -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)"
- local kernel_checkout=$(realpath "${script_dir}"/../../../../)
- # By default the script searches for the kernel in the checkout directory but
- # it also obeys environment variables O= and KBUILD_OUTPUT=
- local kernel_bzimage="${kernel_checkout}/${BZIMAGE}"
- local command="${DEFAULT_COMMAND}"
- local update_b2c="no"
- local debug_shell="no"
- local build_only="no"
-
- while getopts ':hsud:j:b' opt; do
- case ${opt} in
- u)
- update_b2c="yes"
- ;;
- d)
- OUTPUT_DIR="$OPTARG"
- ;;
- j)
- NUM_COMPILE_JOBS="$OPTARG"
- ;;
- s)
- command="/bin/sh"
- debug_shell="yes"
- ;;
- b)
- build_only="yes"
- ;;
- h)
- usage
- exit 0
- ;;
- \? )
- echo "Invalid Option: -$OPTARG"
- usage
- exit 1
- ;;
- : )
- echo "Invalid Option: -$OPTARG requires an argument"
- usage
- exit 1
- ;;
- esac
- done
- shift $((OPTIND -1))
-
- # trap 'catch "$?"' EXIT
- if [[ "${build_only}" == "no" && "${debug_shell}" == "no" ]]; then
- if [[ $# -eq 0 ]]; then
- echo "No command specified, will run ${DEFAULT_COMMAND} in the vm"
- else
- command="$@"
-
- if [[ "${command}" == "/bin/bash" || "${command}" == "bash" ]]
- then
- debug_shell="yes"
- fi
+ vng \
+ --run \
+ ${kernel_opt} \
+ ${verbose_opt} \
+ --qemu-opts="${QEMU_OPTS}" \
+ --qemu="${qemu}" \
+ --user root \
+ --append "${KERNEL_CMDLINE}" \
+ --ssh "${SSH_GUEST_PORT}" \
+ --rw &> ${logfile} &
+
+ local vng_pid=$!
+ local elapsed=0
+
+ while [[ ! -s "${QEMU_PIDFILE}" ]]; do
+ if ! kill -0 "${vng_pid}" 2>/dev/null; then
+ echo "vng process (PID ${vng_pid}) exited early, check logs for details" >&2
+ die "failed to boot VM"
fi
- fi
- local kconfig_file="${OUTPUT_DIR}/latest.config"
- local make_command="make -j ${NUM_COMPILE_JOBS} KCONFIG_CONFIG=${kconfig_file}"
+ if [[ ${elapsed} -ge ${WAIT_TOTAL} ]]; then
+ echo "Timed out after ${WAIT_TOTAL} seconds waiting for VM to boot" >&2
+ die "failed to boot VM"
+ fi
- # Figure out where the kernel is being built.
- # O takes precedence over KBUILD_OUTPUT.
- if [[ "${O:=""}" != "" ]]; then
- if is_rel_path "${O}"; then
- O="$(realpath "${PWD}/${O}")"
+ sleep 1
+ elapsed=$((elapsed + 1))
+ done
+}
+
+vm_wait_for_ssh() {
+ local i
+
+ i=0
+ while true; do
+ if [[ ${i} -gt ${WAIT_PERIOD_MAX} ]]; then
+ die "Timed out waiting for guest ssh"
fi
- kernel_bzimage="${O}/${BZIMAGE}"
- make_command="${make_command} O=${O}"
- elif [[ "${KBUILD_OUTPUT:=""}" != "" ]]; then
- if is_rel_path "${KBUILD_OUTPUT}"; then
- KBUILD_OUTPUT="$(realpath "${PWD}/${KBUILD_OUTPUT}")"
+ if vm_ssh -- true; then
+ break
fi
- kernel_bzimage="${KBUILD_OUTPUT}/${BZIMAGE}"
- make_command="${make_command} KBUILD_OUTPUT=${KBUILD_OUTPUT}"
+ i=$(( i + 1 ))
+ sleep ${WAIT_PERIOD}
+ done
+}
+
+vm_mount_bpffs() {
+ vm_ssh -- mount bpffs -t bpf /sys/fs/bpf
+}
+
+__log_stdin() {
+ stdbuf -oL awk '{ printf "%s:\t%s\n","'"${prefix}"'", $0; fflush() }'
+}
+
+__log_args() {
+ echo "$*" | awk '{ printf "%s:\t%s\n","'"${prefix}"'", $0 }'
+}
+
+log() {
+ local verbose="$1"
+ shift
+
+ local prefix="$1"
+
+ shift
+ local redirect=
+ if [[ ${verbose} -le 0 ]]; then
+ redirect=/dev/null
+ else
+ redirect=/dev/stdout
+ fi
+
+ if [[ "$#" -eq 0 ]]; then
+ __log_stdin | tee -a "${LOG}" > ${redirect}
+ else
+ __log_args "$@" | tee -a "${LOG}" > ${redirect}
fi
+}
- local b2c="${OUTPUT_DIR}/vm2c.py"
+log_setup() {
+ log $((VERBOSE-1)) "setup" "$@"
+}
- echo "Output directory: ${OUTPUT_DIR}"
+log_host() {
+ local testname=$1
- mkdir -p "${OUTPUT_DIR}"
- update_kconfig "${kernel_checkout}" "${kconfig_file}"
+ shift
+ log $((VERBOSE-1)) "test:${testname}:host" "$@"
+}
- recompile_kernel "${kernel_checkout}" "${make_command}"
- update_selftests "${kernel_checkout}" "${make_command}"
+log_guest() {
+ local testname=$1
- if [[ "${build_only}" == "no" ]]; then
- if [[ "${update_b2c}" == "no" && ! -f "${b2c}" ]]; then
- echo "vm2c script not found in ${b2c}"
- update_b2c="yes"
- fi
+ shift
+ log ${VERBOSE} "# test:${testname}" "$@"
+}
- if [[ "${update_b2c}" == "yes" ]]; then
- download $B2C_URL $b2c
- chmod +x $b2c
- fi
+test_vm_hid_bpf() {
+ local testname="${FUNCNAME[0]#test_}"
- run_vm "${kernel_checkout}" $b2c "${kernel_bzimage}" "${command}"
- if [[ "${debug_shell}" != "yes" ]]; then
- echo "Logs saved in ${OUTPUT_DIR}/${LOG_FILE}"
- fi
+ vm_ssh -- "${HID_BPF_TEST}" \
+ 2>&1 | log_guest "${testname}"
+
+ return ${PIPESTATUS[0]}
+}
+
+test_vm_hidraw() {
+ local testname="${FUNCNAME[0]#test_}"
+
+ vm_ssh -- "${HIDRAW_TEST}" \
+ 2>&1 | log_guest "${testname}"
+
+ return ${PIPESTATUS[0]}
+}
+
+test_vm_pytest() {
+ local testname="${FUNCNAME[0]#test_}"
- exit $(cat ${OUTPUT_DIR}/${EXIT_STATUS_FILE})
+ shift
+
+ vm_ssh -- pytest ${SCRIPT_DIR}/tests --color=yes "$@" \
+ 2>&1 | log_guest "${testname}"
+
+ return ${PIPESTATUS[0]}
+}
+
+run_test() {
+ local vm_oops_cnt_before
+ local vm_warn_cnt_before
+ local vm_oops_cnt_after
+ local vm_warn_cnt_after
+ local name
+ local rc
+
+ vm_oops_cnt_before=$(vm_ssh -- dmesg | grep -c -i 'Oops')
+ vm_error_cnt_before=$(vm_ssh -- dmesg --level=err | wc -l)
+
+ name=$(echo "${1}" | awk '{ print $1 }')
+ eval test_"${name}" "$@"
+ rc=$?
+
+ vm_oops_cnt_after=$(vm_ssh -- dmesg | grep -i 'Oops' | wc -l)
+ if [[ ${vm_oops_cnt_after} -gt ${vm_oops_cnt_before} ]]; then
+ echo "FAIL: kernel oops detected on vm" | log_host "${name}"
+ rc=$KSFT_FAIL
+ fi
+
+ vm_error_cnt_after=$(vm_ssh -- dmesg --level=err | wc -l)
+ if [[ ${vm_error_cnt_after} -gt ${vm_error_cnt_before} ]]; then
+ echo "FAIL: kernel error detected on vm" | log_host "${name}"
+ vm_ssh -- dmesg --level=err | log_host "${name}"
+ rc=$KSFT_FAIL
fi
+
+ return "${rc}"
}
-main "$@"
+QEMU="qemu-system-$(uname -m)"
+
+while getopts :hvsbq:H:p: o
+do
+ case $o in
+ v) VERBOSE=$((VERBOSE+1));;
+ s) SHELL_MODE=1;;
+ b) BUILD=1;;
+ q) QEMU=$OPTARG;;
+ H) BUILD_HOST=$OPTARG;;
+ p) BUILD_HOST_PODMAN_CONTAINER_NAME=$OPTARG;;
+ h|*) usage;;
+ esac
+done
+shift $((OPTIND-1))
+
+trap cleanup EXIT
+
+PARAMS=""
+
+if [[ ${#} -eq 0 ]]; then
+ ARGS=("${TEST_NAMES[@]}")
+else
+ ARGS=()
+ COUNT=0
+ for arg in $@; do
+ COUNT=$((COUNT+1))
+ if [[ x"$arg" == x"--" ]]; then
+ break
+ fi
+ ARGS+=($arg)
+ done
+ shift $COUNT
+ PARAMS="$@"
+fi
+
+if [[ "${SHELL_MODE}" -eq 0 ]]; then
+ check_args "${ARGS[@]}"
+ echo "1..${#ARGS[@]}"
+fi
+check_deps
+check_vng
+handle_build
+
+log_setup "Booting up VM"
+vm_start
+vm_wait_for_ssh
+vm_mount_bpffs
+log_setup "VM booted up"
+
+if [[ "${SHELL_MODE}" -eq 1 ]]; then
+ log_setup "Starting interactive shell in VM"
+ echo "Starting shell in VM. Use 'exit' to quit and shutdown the VM."
+ CURRENT_DIR="$(pwd)"
+ vm_ssh -t -- "cd '${CURRENT_DIR}' && exec bash -l"
+ exit "$KSFT_PASS"
+fi
+
+cnt_pass=0
+cnt_fail=0
+cnt_skip=0
+cnt_total=0
+for arg in "${ARGS[@]}"; do
+ run_test "${arg}" "${PARAMS}"
+ rc=$?
+ if [[ ${rc} -eq $KSFT_PASS ]]; then
+ cnt_pass=$(( cnt_pass + 1 ))
+ echo "ok ${cnt_total} ${arg}"
+ elif [[ ${rc} -eq $KSFT_SKIP ]]; then
+ cnt_skip=$(( cnt_skip + 1 ))
+ echo "ok ${cnt_total} ${arg} # SKIP"
+ elif [[ ${rc} -eq $KSFT_FAIL ]]; then
+ cnt_fail=$(( cnt_fail + 1 ))
+ echo "not ok ${cnt_total} ${arg} # exit=$rc"
+ fi
+ cnt_total=$(( cnt_total + 1 ))
+done
+
+echo "SUMMARY: PASS=${cnt_pass} SKIP=${cnt_skip} FAIL=${cnt_fail}"
+echo "Log: ${LOG}"
+
+if [ $((cnt_pass + cnt_skip)) -eq ${cnt_total} ]; then
+ exit "$KSFT_PASS"
+else
+ exit "$KSFT_FAIL"
+fi
diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c
index 3eebf5e3b974..bb4d33dde3c8 100644
--- a/tools/testing/selftests/iommu/iommufd.c
+++ b/tools/testing/selftests/iommu/iommufd.c
@@ -2638,6 +2638,8 @@ TEST_F(vfio_compat_mock_domain, map)
ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd));
ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd));
ASSERT_EQ(BUFFER_SIZE, unmap_cmd.size);
+ /* Unmap of empty is success */
+ ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd));
/* UNMAP_FLAG_ALL requires 0 iova/size */
ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd));
diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h
index 772ca1db6e59..9f472c20c190 100644
--- a/tools/testing/selftests/iommu/iommufd_utils.h
+++ b/tools/testing/selftests/iommu/iommufd_utils.h
@@ -1044,8 +1044,8 @@ static int _test_cmd_trigger_vevents(int fd, __u32 dev_id, __u32 nvevents)
};
while (nvevents--) {
- if (!ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_TRIGGER_VEVENT),
- &trigger_vevent_cmd))
+ if (ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_TRIGGER_VEVENT),
+ &trigger_vevent_cmd))
return -1;
}
return 0;
diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm
index 8926ff6808cf..148d427ff24b 100644
--- a/tools/testing/selftests/kvm/Makefile.kvm
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -87,6 +87,7 @@ TEST_GEN_PROGS_x86 += x86/kvm_clock_test
TEST_GEN_PROGS_x86 += x86/kvm_pv_test
TEST_GEN_PROGS_x86 += x86/kvm_buslock_test
TEST_GEN_PROGS_x86 += x86/monitor_mwait_test
+TEST_GEN_PROGS_x86 += x86/msrs_test
TEST_GEN_PROGS_x86 += x86/nested_emulation_test
TEST_GEN_PROGS_x86 += x86/nested_exceptions_test
TEST_GEN_PROGS_x86 += x86/platform_info_test
diff --git a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c
index 91906414a474..993c9e38e729 100644
--- a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c
+++ b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c
@@ -1020,7 +1020,7 @@ static void set_counter_defaults(void)
{
const uint64_t MIN_ROLLOVER_SECS = 40ULL * 365 * 24 * 3600;
uint64_t freq = read_sysreg(CNTFRQ_EL0);
- uint64_t width = ilog2(MIN_ROLLOVER_SECS * freq);
+ int width = ilog2(MIN_ROLLOVER_SECS * freq);
width = clamp(width, 56, 64);
CVAL_MAX = GENMASK_ULL(width - 1, 0);
diff --git a/tools/testing/selftests/kvm/arm64/external_aborts.c b/tools/testing/selftests/kvm/arm64/external_aborts.c
index 592b26ded779..d8fe17a6cc59 100644
--- a/tools/testing/selftests/kvm/arm64/external_aborts.c
+++ b/tools/testing/selftests/kvm/arm64/external_aborts.c
@@ -359,6 +359,44 @@ static void test_mmio_ease(void)
kvm_vm_free(vm);
}
+static void test_serror_amo_guest(void)
+{
+ /*
+ * The ISB is entirely unnecessary (and highlights how FEAT_NV2 is borked)
+ * since the write is redirected to memory. But don't write (intentionally)
+ * broken code!
+ */
+ sysreg_clear_set(hcr_el2, HCR_EL2_AMO | HCR_EL2_TGE, 0);
+ isb();
+
+ GUEST_SYNC(0);
+ GUEST_ASSERT(read_sysreg(isr_el1) & ISR_EL1_A);
+
+ /*
+ * KVM treats the effective value of AMO as 1 when
+ * HCR_EL2.{E2H,TGE} = {1, 0}, meaning the SError will be taken when
+ * unmasked.
+ */
+ local_serror_enable();
+ isb();
+ local_serror_disable();
+
+ GUEST_FAIL("Should've taken pending SError exception");
+}
+
+static void test_serror_amo(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_amo_guest,
+ unexpected_dabt_handler);
+
+ vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_serror_handler);
+ vcpu_run_expect_sync(vcpu);
+ vcpu_inject_serror(vcpu);
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
int main(void)
{
test_mmio_abort();
@@ -369,4 +407,9 @@ int main(void)
test_serror_emulated();
test_mmio_ease();
test_s1ptw_abort();
+
+ if (!test_supports_el2())
+ return 0;
+
+ test_serror_amo();
}
diff --git a/tools/testing/selftests/kvm/arm64/get-reg-list.c b/tools/testing/selftests/kvm/arm64/get-reg-list.c
index 011fad95dd02..0a3a94c4cca1 100644
--- a/tools/testing/selftests/kvm/arm64/get-reg-list.c
+++ b/tools/testing/selftests/kvm/arm64/get-reg-list.c
@@ -63,8 +63,13 @@ static struct feature_id_reg feat_id_regs[] = {
REG_FEAT(HDFGWTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2),
REG_FEAT(ZCR_EL2, ID_AA64PFR0_EL1, SVE, IMP),
REG_FEAT(SCTLR2_EL1, ID_AA64MMFR3_EL1, SCTLRX, IMP),
+ REG_FEAT(SCTLR2_EL2, ID_AA64MMFR3_EL1, SCTLRX, IMP),
REG_FEAT(VDISR_EL2, ID_AA64PFR0_EL1, RAS, IMP),
REG_FEAT(VSESR_EL2, ID_AA64PFR0_EL1, RAS, IMP),
+ REG_FEAT(VNCR_EL2, ID_AA64MMFR4_EL1, NV_frac, NV2_ONLY),
+ REG_FEAT(CNTHV_CTL_EL2, ID_AA64MMFR1_EL1, VH, IMP),
+ REG_FEAT(CNTHV_CVAL_EL2,ID_AA64MMFR1_EL1, VH, IMP),
+ REG_FEAT(ZCR_EL2, ID_AA64PFR0_EL1, SVE, IMP),
};
bool filter_reg(__u64 reg)
@@ -345,9 +350,20 @@ static __u64 base_regs[] = {
KVM_REG_ARM_FW_FEAT_BMAP_REG(1), /* KVM_REG_ARM_STD_HYP_BMAP */
KVM_REG_ARM_FW_FEAT_BMAP_REG(2), /* KVM_REG_ARM_VENDOR_HYP_BMAP */
KVM_REG_ARM_FW_FEAT_BMAP_REG(3), /* KVM_REG_ARM_VENDOR_HYP_BMAP_2 */
- ARM64_SYS_REG(3, 3, 14, 3, 1), /* CNTV_CTL_EL0 */
- ARM64_SYS_REG(3, 3, 14, 3, 2), /* CNTV_CVAL_EL0 */
- ARM64_SYS_REG(3, 3, 14, 0, 2),
+
+ /*
+ * EL0 Virtual Timer Registers
+ *
+ * WARNING:
+ * KVM_REG_ARM_TIMER_CVAL and KVM_REG_ARM_TIMER_CNT are not defined
+ * with the appropriate register encodings. Their values have been
+ * accidentally swapped. As this is set API, the definitions here
+ * must be used, rather than ones derived from the encodings.
+ */
+ KVM_ARM64_SYS_REG(SYS_CNTV_CTL_EL0),
+ KVM_REG_ARM_TIMER_CVAL,
+ KVM_REG_ARM_TIMER_CNT,
+
ARM64_SYS_REG(3, 0, 0, 0, 0), /* MIDR_EL1 */
ARM64_SYS_REG(3, 0, 0, 0, 6), /* REVIDR_EL1 */
ARM64_SYS_REG(3, 1, 0, 0, 1), /* CLIDR_EL1 */
@@ -704,6 +720,7 @@ static __u64 el2_regs[] = {
SYS_REG(VMPIDR_EL2),
SYS_REG(SCTLR_EL2),
SYS_REG(ACTLR_EL2),
+ SYS_REG(SCTLR2_EL2),
SYS_REG(HCR_EL2),
SYS_REG(MDCR_EL2),
SYS_REG(CPTR_EL2),
@@ -755,6 +772,10 @@ static __u64 el2_regs[] = {
SYS_REG(VSESR_EL2),
};
+static __u64 el2_e2h0_regs[] = {
+ /* Empty */
+};
+
#define BASE_SUBLIST \
{ "base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), }
#define VREGS_SUBLIST \
@@ -789,6 +810,15 @@ static __u64 el2_regs[] = {
.regs = el2_regs, \
.regs_n = ARRAY_SIZE(el2_regs), \
}
+#define EL2_E2H0_SUBLIST \
+ EL2_SUBLIST, \
+ { \
+ .name = "EL2 E2H0", \
+ .capability = KVM_CAP_ARM_EL2_E2H0, \
+ .feature = KVM_ARM_VCPU_HAS_EL2_E2H0, \
+ .regs = el2_e2h0_regs, \
+ .regs_n = ARRAY_SIZE(el2_e2h0_regs), \
+ }
static struct vcpu_reg_list vregs_config = {
.sublists = {
@@ -897,6 +927,65 @@ static struct vcpu_reg_list el2_pauth_pmu_config = {
},
};
+static struct vcpu_reg_list el2_e2h0_vregs_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_E2H0_SUBLIST,
+ VREGS_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_e2h0_vregs_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_E2H0_SUBLIST,
+ VREGS_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_e2h0_sve_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_E2H0_SUBLIST,
+ SVE_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_e2h0_sve_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_E2H0_SUBLIST,
+ SVE_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_e2h0_pauth_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_E2H0_SUBLIST,
+ VREGS_SUBLIST,
+ PAUTH_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_e2h0_pauth_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_E2H0_SUBLIST,
+ VREGS_SUBLIST,
+ PAUTH_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+
struct vcpu_reg_list *vcpu_configs[] = {
&vregs_config,
&vregs_pmu_config,
@@ -911,5 +1000,12 @@ struct vcpu_reg_list *vcpu_configs[] = {
&el2_sve_pmu_config,
&el2_pauth_config,
&el2_pauth_pmu_config,
+
+ &el2_e2h0_vregs_config,
+ &el2_e2h0_vregs_pmu_config,
+ &el2_e2h0_sve_config,
+ &el2_e2h0_sve_pmu_config,
+ &el2_e2h0_pauth_config,
+ &el2_e2h0_pauth_pmu_config,
};
int vcpu_configs_n = ARRAY_SIZE(vcpu_configs);
diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c
index 8ff1e853f7f8..c4815d365816 100644
--- a/tools/testing/selftests/kvm/arm64/set_id_regs.c
+++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c
@@ -249,11 +249,14 @@ static void guest_code(void)
GUEST_REG_SYNC(SYS_ID_AA64ISAR2_EL1);
GUEST_REG_SYNC(SYS_ID_AA64ISAR3_EL1);
GUEST_REG_SYNC(SYS_ID_AA64PFR0_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64PFR1_EL1);
GUEST_REG_SYNC(SYS_ID_AA64MMFR0_EL1);
GUEST_REG_SYNC(SYS_ID_AA64MMFR1_EL1);
GUEST_REG_SYNC(SYS_ID_AA64MMFR2_EL1);
GUEST_REG_SYNC(SYS_ID_AA64MMFR3_EL1);
GUEST_REG_SYNC(SYS_ID_AA64ZFR0_EL1);
+ GUEST_REG_SYNC(SYS_MPIDR_EL1);
+ GUEST_REG_SYNC(SYS_CLIDR_EL1);
GUEST_REG_SYNC(SYS_CTR_EL0);
GUEST_REG_SYNC(SYS_MIDR_EL1);
GUEST_REG_SYNC(SYS_REVIDR_EL1);
@@ -265,7 +268,9 @@ static void guest_code(void)
/* Return a safe value to a given ftr_bits an ftr value */
uint64_t get_safe_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr)
{
- uint64_t ftr_max = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0);
+ uint64_t ftr_max = ftr_bits->mask >> ftr_bits->shift;
+
+ TEST_ASSERT(ftr_max > 1, "This test doesn't support single bit features");
if (ftr_bits->sign == FTR_UNSIGNED) {
switch (ftr_bits->type) {
@@ -317,7 +322,9 @@ uint64_t get_safe_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr)
/* Return an invalid value to a given ftr_bits an ftr value */
uint64_t get_invalid_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr)
{
- uint64_t ftr_max = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0);
+ uint64_t ftr_max = ftr_bits->mask >> ftr_bits->shift;
+
+ TEST_ASSERT(ftr_max > 1, "This test doesn't support single bit features");
if (ftr_bits->sign == FTR_UNSIGNED) {
switch (ftr_bits->type) {
@@ -669,7 +676,7 @@ static void test_clidr(struct kvm_vcpu *vcpu)
clidr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1));
/* find the first empty level in the cache hierarchy */
- for (level = 1; level < 7; level++) {
+ for (level = 1; level <= 7; level++) {
if (!CLIDR_CTYPE(clidr, level))
break;
}
diff --git a/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c
index 87922a89b134..687d04463983 100644
--- a/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c
+++ b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c
@@ -123,6 +123,7 @@ static void guest_setup_gic(void)
static void guest_code(size_t nr_lpis)
{
guest_setup_gic();
+ local_irq_enable();
GUEST_SYNC(0);
@@ -331,7 +332,7 @@ static void setup_vm(void)
{
int i;
- vcpus = malloc(test_data.nr_cpus * sizeof(struct kvm_vcpu));
+ vcpus = malloc(test_data.nr_cpus * sizeof(struct kvm_vcpu *));
TEST_ASSERT(vcpus, "Failed to allocate vCPU array");
vm = vm_create_with_vcpus(test_data.nr_cpus, guest_code, vcpus);
diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c
index b3ca6737f304..e7d9aeb418d3 100644
--- a/tools/testing/selftests/kvm/guest_memfd_test.c
+++ b/tools/testing/selftests/kvm/guest_memfd_test.c
@@ -14,8 +14,6 @@
#include <linux/bitmap.h>
#include <linux/falloc.h>
#include <linux/sizes.h>
-#include <setjmp.h>
-#include <signal.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
@@ -24,7 +22,9 @@
#include "test_util.h"
#include "ucall_common.h"
-static void test_file_read_write(int fd)
+static size_t page_size;
+
+static void test_file_read_write(int fd, size_t total_size)
{
char buf[64];
@@ -38,18 +38,22 @@ static void test_file_read_write(int fd)
"pwrite on a guest_mem fd should fail");
}
-static void test_mmap_supported(int fd, size_t page_size, size_t total_size)
+static void test_mmap_cow(int fd, size_t size)
+{
+ void *mem;
+
+ mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+ TEST_ASSERT(mem == MAP_FAILED, "Copy-on-write not allowed by guest_memfd.");
+}
+
+static void test_mmap_supported(int fd, size_t total_size)
{
const char val = 0xaa;
char *mem;
size_t i;
int ret;
- mem = mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
- TEST_ASSERT(mem == MAP_FAILED, "Copy-on-write not allowed by guest_memfd.");
-
- mem = mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
- TEST_ASSERT(mem != MAP_FAILED, "mmap() for guest_memfd should succeed.");
+ mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
memset(mem, val, total_size);
for (i = 0; i < total_size; i++)
@@ -68,45 +72,37 @@ static void test_mmap_supported(int fd, size_t page_size, size_t total_size)
for (i = 0; i < total_size; i++)
TEST_ASSERT_EQ(READ_ONCE(mem[i]), val);
- ret = munmap(mem, total_size);
- TEST_ASSERT(!ret, "munmap() should succeed.");
-}
-
-static sigjmp_buf jmpbuf;
-void fault_sigbus_handler(int signum)
-{
- siglongjmp(jmpbuf, 1);
+ kvm_munmap(mem, total_size);
}
-static void test_fault_overflow(int fd, size_t page_size, size_t total_size)
+static void test_fault_sigbus(int fd, size_t accessible_size, size_t map_size)
{
- struct sigaction sa_old, sa_new = {
- .sa_handler = fault_sigbus_handler,
- };
- size_t map_size = total_size * 4;
const char val = 0xaa;
char *mem;
size_t i;
- int ret;
- mem = mmap(NULL, map_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
- TEST_ASSERT(mem != MAP_FAILED, "mmap() for guest_memfd should succeed.");
+ mem = kvm_mmap(map_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
- sigaction(SIGBUS, &sa_new, &sa_old);
- if (sigsetjmp(jmpbuf, 1) == 0) {
- memset(mem, 0xaa, map_size);
- TEST_ASSERT(false, "memset() should have triggered SIGBUS.");
- }
- sigaction(SIGBUS, &sa_old, NULL);
+ TEST_EXPECT_SIGBUS(memset(mem, val, map_size));
+ TEST_EXPECT_SIGBUS((void)READ_ONCE(mem[accessible_size]));
- for (i = 0; i < total_size; i++)
+ for (i = 0; i < accessible_size; i++)
TEST_ASSERT_EQ(READ_ONCE(mem[i]), val);
- ret = munmap(mem, map_size);
- TEST_ASSERT(!ret, "munmap() should succeed.");
+ kvm_munmap(mem, map_size);
+}
+
+static void test_fault_overflow(int fd, size_t total_size)
+{
+ test_fault_sigbus(fd, total_size, total_size * 4);
+}
+
+static void test_fault_private(int fd, size_t total_size)
+{
+ test_fault_sigbus(fd, 0, total_size);
}
-static void test_mmap_not_supported(int fd, size_t page_size, size_t total_size)
+static void test_mmap_not_supported(int fd, size_t total_size)
{
char *mem;
@@ -117,7 +113,7 @@ static void test_mmap_not_supported(int fd, size_t page_size, size_t total_size)
TEST_ASSERT_EQ(mem, MAP_FAILED);
}
-static void test_file_size(int fd, size_t page_size, size_t total_size)
+static void test_file_size(int fd, size_t total_size)
{
struct stat sb;
int ret;
@@ -128,7 +124,7 @@ static void test_file_size(int fd, size_t page_size, size_t total_size)
TEST_ASSERT_EQ(sb.st_blksize, page_size);
}
-static void test_fallocate(int fd, size_t page_size, size_t total_size)
+static void test_fallocate(int fd, size_t total_size)
{
int ret;
@@ -165,7 +161,7 @@ static void test_fallocate(int fd, size_t page_size, size_t total_size)
TEST_ASSERT(!ret, "fallocate to restore punched hole should succeed");
}
-static void test_invalid_punch_hole(int fd, size_t page_size, size_t total_size)
+static void test_invalid_punch_hole(int fd, size_t total_size)
{
struct {
off_t offset;
@@ -196,8 +192,7 @@ static void test_invalid_punch_hole(int fd, size_t page_size, size_t total_size)
}
static void test_create_guest_memfd_invalid_sizes(struct kvm_vm *vm,
- uint64_t guest_memfd_flags,
- size_t page_size)
+ uint64_t guest_memfd_flags)
{
size_t size;
int fd;
@@ -214,7 +209,6 @@ static void test_create_guest_memfd_multiple(struct kvm_vm *vm)
{
int fd1, fd2, ret;
struct stat st1, st2;
- size_t page_size = getpagesize();
fd1 = __vm_create_guest_memfd(vm, page_size, 0);
TEST_ASSERT(fd1 != -1, "memfd creation should succeed");
@@ -239,9 +233,9 @@ static void test_create_guest_memfd_multiple(struct kvm_vm *vm)
close(fd1);
}
-static void test_guest_memfd_flags(struct kvm_vm *vm, uint64_t valid_flags)
+static void test_guest_memfd_flags(struct kvm_vm *vm)
{
- size_t page_size = getpagesize();
+ uint64_t valid_flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS);
uint64_t flag;
int fd;
@@ -260,43 +254,57 @@ static void test_guest_memfd_flags(struct kvm_vm *vm, uint64_t valid_flags)
}
}
-static void test_guest_memfd(unsigned long vm_type)
+#define gmem_test(__test, __vm, __flags) \
+do { \
+ int fd = vm_create_guest_memfd(__vm, page_size * 4, __flags); \
+ \
+ test_##__test(fd, page_size * 4); \
+ close(fd); \
+} while (0)
+
+static void __test_guest_memfd(struct kvm_vm *vm, uint64_t flags)
{
- uint64_t flags = 0;
- struct kvm_vm *vm;
- size_t total_size;
- size_t page_size;
- int fd;
+ test_create_guest_memfd_multiple(vm);
+ test_create_guest_memfd_invalid_sizes(vm, flags);
- page_size = getpagesize();
- total_size = page_size * 4;
+ gmem_test(file_read_write, vm, flags);
- vm = vm_create_barebones_type(vm_type);
+ if (flags & GUEST_MEMFD_FLAG_MMAP) {
+ if (flags & GUEST_MEMFD_FLAG_INIT_SHARED) {
+ gmem_test(mmap_supported, vm, flags);
+ gmem_test(fault_overflow, vm, flags);
+ } else {
+ gmem_test(fault_private, vm, flags);
+ }
- if (vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_MMAP))
- flags |= GUEST_MEMFD_FLAG_MMAP;
+ gmem_test(mmap_cow, vm, flags);
+ } else {
+ gmem_test(mmap_not_supported, vm, flags);
+ }
- test_create_guest_memfd_multiple(vm);
- test_create_guest_memfd_invalid_sizes(vm, flags, page_size);
+ gmem_test(file_size, vm, flags);
+ gmem_test(fallocate, vm, flags);
+ gmem_test(invalid_punch_hole, vm, flags);
+}
- fd = vm_create_guest_memfd(vm, total_size, flags);
+static void test_guest_memfd(unsigned long vm_type)
+{
+ struct kvm_vm *vm = vm_create_barebones_type(vm_type);
+ uint64_t flags;
- test_file_read_write(fd);
+ test_guest_memfd_flags(vm);
- if (flags & GUEST_MEMFD_FLAG_MMAP) {
- test_mmap_supported(fd, page_size, total_size);
- test_fault_overflow(fd, page_size, total_size);
- } else {
- test_mmap_not_supported(fd, page_size, total_size);
- }
+ __test_guest_memfd(vm, 0);
- test_file_size(fd, page_size, total_size);
- test_fallocate(fd, page_size, total_size);
- test_invalid_punch_hole(fd, page_size, total_size);
+ flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS);
+ if (flags & GUEST_MEMFD_FLAG_MMAP)
+ __test_guest_memfd(vm, GUEST_MEMFD_FLAG_MMAP);
- test_guest_memfd_flags(vm, flags);
+ /* MMAP should always be supported if INIT_SHARED is supported. */
+ if (flags & GUEST_MEMFD_FLAG_INIT_SHARED)
+ __test_guest_memfd(vm, GUEST_MEMFD_FLAG_MMAP |
+ GUEST_MEMFD_FLAG_INIT_SHARED);
- close(fd);
kvm_vm_free(vm);
}
@@ -328,22 +336,26 @@ static void test_guest_memfd_guest(void)
size_t size;
int fd, i;
- if (!kvm_has_cap(KVM_CAP_GUEST_MEMFD_MMAP))
+ if (!kvm_check_cap(KVM_CAP_GUEST_MEMFD_FLAGS))
return;
vm = __vm_create_shape_with_one_vcpu(VM_SHAPE_DEFAULT, &vcpu, 1, guest_code);
- TEST_ASSERT(vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_MMAP),
- "Default VM type should always support guest_memfd mmap()");
+ TEST_ASSERT(vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS) & GUEST_MEMFD_FLAG_MMAP,
+ "Default VM type should support MMAP, supported flags = 0x%x",
+ vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS));
+ TEST_ASSERT(vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS) & GUEST_MEMFD_FLAG_INIT_SHARED,
+ "Default VM type should support INIT_SHARED, supported flags = 0x%x",
+ vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS));
size = vm->page_size;
- fd = vm_create_guest_memfd(vm, size, GUEST_MEMFD_FLAG_MMAP);
+ fd = vm_create_guest_memfd(vm, size, GUEST_MEMFD_FLAG_MMAP |
+ GUEST_MEMFD_FLAG_INIT_SHARED);
vm_set_user_memory_region2(vm, slot, KVM_MEM_GUEST_MEMFD, gpa, size, NULL, fd, 0);
- mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
- TEST_ASSERT(mem != MAP_FAILED, "mmap() on guest_memfd failed");
+ mem = kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
memset(mem, 0xaa, size);
- munmap(mem, size);
+ kvm_munmap(mem, size);
virt_pg_map(vm, gpa, gpa);
vcpu_args_set(vcpu, 2, gpa, size);
@@ -351,8 +363,7 @@ static void test_guest_memfd_guest(void)
TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
- mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
- TEST_ASSERT(mem != MAP_FAILED, "mmap() on guest_memfd failed");
+ mem = kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
for (i = 0; i < size; i++)
TEST_ASSERT_EQ(mem[i], 0xff);
@@ -366,6 +377,8 @@ int main(int argc, char *argv[])
TEST_REQUIRE(kvm_has_cap(KVM_CAP_GUEST_MEMFD));
+ page_size = getpagesize();
+
/*
* Not all architectures support KVM_CAP_VM_TYPES. However, those that
* support guest_memfd have that support for the default VM type.
diff --git a/tools/testing/selftests/kvm/include/arm64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h
index 6f481475c135..ff928716574d 100644
--- a/tools/testing/selftests/kvm/include/arm64/processor.h
+++ b/tools/testing/selftests/kvm/include/arm64/processor.h
@@ -305,7 +305,17 @@ void test_wants_mte(void);
void test_disable_default_vgic(void);
bool vm_supports_el2(struct kvm_vm *vm);
-static bool vcpu_has_el2(struct kvm_vcpu *vcpu)
+
+static inline bool test_supports_el2(void)
+{
+ struct kvm_vm *vm = vm_create(1);
+ bool supported = vm_supports_el2(vm);
+
+ kvm_vm_free(vm);
+ return supported;
+}
+
+static inline bool vcpu_has_el2(struct kvm_vcpu *vcpu)
{
return vcpu->init.features[0] & BIT(KVM_ARM_VCPU_HAS_EL2);
}
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 26cc30290e76..d3f3e455c031 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -286,6 +286,31 @@ static inline bool kvm_has_cap(long cap)
#define __KVM_SYSCALL_ERROR(_name, _ret) \
"%s failed, rc: %i errno: %i (%s)", (_name), (_ret), errno, strerror(errno)
+static inline void *__kvm_mmap(size_t size, int prot, int flags, int fd,
+ off_t offset)
+{
+ void *mem;
+
+ mem = mmap(NULL, size, prot, flags, fd, offset);
+ TEST_ASSERT(mem != MAP_FAILED, __KVM_SYSCALL_ERROR("mmap()",
+ (int)(unsigned long)MAP_FAILED));
+
+ return mem;
+}
+
+static inline void *kvm_mmap(size_t size, int prot, int flags, int fd)
+{
+ return __kvm_mmap(size, prot, flags, fd, 0);
+}
+
+static inline void kvm_munmap(void *mem, size_t size)
+{
+ int ret;
+
+ ret = munmap(mem, size);
+ TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
+}
+
/*
* Use the "inner", double-underscore macro when reporting errors from within
* other macros so that the name of ioctl() and not its literal numeric value
@@ -1273,4 +1298,6 @@ bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr);
uint32_t guest_get_vcpuid(void);
+bool kvm_arch_has_default_irqchip(void);
+
#endif /* SELFTEST_KVM_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index c6ef895fbd9a..b4872ba8ed12 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -8,6 +8,8 @@
#ifndef SELFTEST_KVM_TEST_UTIL_H
#define SELFTEST_KVM_TEST_UTIL_H
+#include <setjmp.h>
+#include <signal.h>
#include <stdlib.h>
#include <stdarg.h>
#include <stdbool.h>
@@ -78,6 +80,23 @@ do { \
__builtin_unreachable(); \
} while (0)
+extern sigjmp_buf expect_sigbus_jmpbuf;
+void expect_sigbus_handler(int signum);
+
+#define TEST_EXPECT_SIGBUS(action) \
+do { \
+ struct sigaction sa_old, sa_new = { \
+ .sa_handler = expect_sigbus_handler, \
+ }; \
+ \
+ sigaction(SIGBUS, &sa_new, &sa_old); \
+ if (sigsetjmp(expect_sigbus_jmpbuf, 1) == 0) { \
+ action; \
+ TEST_FAIL("'%s' should have triggered SIGBUS", #action); \
+ } \
+ sigaction(SIGBUS, &sa_old, NULL); \
+} while (0)
+
size_t parse_size(const char *size);
int64_t timespec_to_ns(struct timespec ts);
diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h
index fbe875eafca5..51cd84b9ca66 100644
--- a/tools/testing/selftests/kvm/include/x86/processor.h
+++ b/tools/testing/selftests/kvm/include/x86/processor.h
@@ -1362,6 +1362,11 @@ static inline bool kvm_is_unrestricted_guest_enabled(void)
return get_kvm_intel_param_bool("unrestricted_guest");
}
+static inline bool kvm_is_ignore_msrs(void)
+{
+ return get_kvm_param_bool("ignore_msrs");
+}
+
uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr,
int *level);
uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr);
diff --git a/tools/testing/selftests/kvm/irqfd_test.c b/tools/testing/selftests/kvm/irqfd_test.c
index 7c301b4c7005..5d7590d01868 100644
--- a/tools/testing/selftests/kvm/irqfd_test.c
+++ b/tools/testing/selftests/kvm/irqfd_test.c
@@ -89,11 +89,19 @@ static void juggle_eventfd_primary(struct kvm_vm *vm, int eventfd)
int main(int argc, char *argv[])
{
pthread_t racing_thread;
+ struct kvm_vcpu *unused;
int r, i;
- /* Create "full" VMs, as KVM_IRQFD requires an in-kernel IRQ chip. */
- vm1 = vm_create(1);
- vm2 = vm_create(1);
+ TEST_REQUIRE(kvm_arch_has_default_irqchip());
+
+ /*
+ * Create "full" VMs, as KVM_IRQFD requires an in-kernel IRQ chip. Also
+ * create an unused vCPU as certain architectures (like arm64) need to
+ * complete IRQ chip initialization after all possible vCPUs for a VM
+ * have been created.
+ */
+ vm1 = vm_create_with_one_vcpu(&unused, NULL);
+ vm2 = vm_create_with_one_vcpu(&unused, NULL);
WRITE_ONCE(__eventfd, kvm_new_eventfd());
diff --git a/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c
index 09f270545646..0e2f8ed90f30 100644
--- a/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c
+++ b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c
@@ -15,6 +15,8 @@
#include "gic_v3.h"
#include "processor.h"
+#define GITS_COLLECTION_TARGET_SHIFT 16
+
static u64 its_read_u64(unsigned long offset)
{
return readq_relaxed(GITS_BASE_GVA + offset);
@@ -163,6 +165,11 @@ static void its_encode_collection(struct its_cmd_block *cmd, u16 col)
its_mask_encode(&cmd->raw_cmd[2], col, 15, 0);
}
+static u64 procnum_to_rdbase(u32 vcpu_id)
+{
+ return vcpu_id << GITS_COLLECTION_TARGET_SHIFT;
+}
+
#define GITS_CMDQ_POLL_ITERATIONS 0
static void its_send_cmd(void *cmdq_base, struct its_cmd_block *cmd)
@@ -217,7 +224,7 @@ void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool val
its_encode_cmd(&cmd, GITS_CMD_MAPC);
its_encode_collection(&cmd, collection_id);
- its_encode_target(&cmd, vcpu_id);
+ its_encode_target(&cmd, procnum_to_rdbase(vcpu_id));
its_encode_valid(&cmd, valid);
its_send_cmd(cmdq_base, &cmd);
diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c
index 369a4c87dd8f..54f6d17c78f7 100644
--- a/tools/testing/selftests/kvm/lib/arm64/processor.c
+++ b/tools/testing/selftests/kvm/lib/arm64/processor.c
@@ -725,3 +725,8 @@ void kvm_arch_vm_release(struct kvm_vm *vm)
if (vm->arch.has_gic)
close(vm->arch.gic_fd);
}
+
+bool kvm_arch_has_default_irqchip(void)
+{
+ return request_vgic && kvm_supports_vgic_v3();
+}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 6743fbd9bd67..1a93d6361671 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -741,13 +741,11 @@ static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
int ret;
if (vcpu->dirty_gfns) {
- ret = munmap(vcpu->dirty_gfns, vm->dirty_ring_size);
- TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
+ kvm_munmap(vcpu->dirty_gfns, vm->dirty_ring_size);
vcpu->dirty_gfns = NULL;
}
- ret = munmap(vcpu->run, vcpu_mmap_sz());
- TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
+ kvm_munmap(vcpu->run, vcpu_mmap_sz());
ret = close(vcpu->fd);
TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret));
@@ -783,20 +781,16 @@ void kvm_vm_release(struct kvm_vm *vmp)
static void __vm_mem_region_delete(struct kvm_vm *vm,
struct userspace_mem_region *region)
{
- int ret;
-
rb_erase(&region->gpa_node, &vm->regions.gpa_tree);
rb_erase(&region->hva_node, &vm->regions.hva_tree);
hash_del(&region->slot_node);
sparsebit_free(&region->unused_phy_pages);
sparsebit_free(&region->protected_phy_pages);
- ret = munmap(region->mmap_start, region->mmap_size);
- TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
+ kvm_munmap(region->mmap_start, region->mmap_size);
if (region->fd >= 0) {
/* There's an extra map when using shared memory. */
- ret = munmap(region->mmap_alias, region->mmap_size);
- TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
+ kvm_munmap(region->mmap_alias, region->mmap_size);
close(region->fd);
}
if (region->region.guest_memfd >= 0)
@@ -1053,12 +1047,9 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
region->fd = kvm_memfd_alloc(region->mmap_size,
src_type == VM_MEM_SRC_SHARED_HUGETLB);
- region->mmap_start = mmap(NULL, region->mmap_size,
- PROT_READ | PROT_WRITE,
- vm_mem_backing_src_alias(src_type)->flag,
- region->fd, 0);
- TEST_ASSERT(region->mmap_start != MAP_FAILED,
- __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED));
+ region->mmap_start = kvm_mmap(region->mmap_size, PROT_READ | PROT_WRITE,
+ vm_mem_backing_src_alias(src_type)->flag,
+ region->fd);
TEST_ASSERT(!is_backing_src_hugetlb(src_type) ||
region->mmap_start == align_ptr_up(region->mmap_start, backing_src_pagesz),
@@ -1129,12 +1120,10 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
/* If shared memory, create an alias. */
if (region->fd >= 0) {
- region->mmap_alias = mmap(NULL, region->mmap_size,
- PROT_READ | PROT_WRITE,
- vm_mem_backing_src_alias(src_type)->flag,
- region->fd, 0);
- TEST_ASSERT(region->mmap_alias != MAP_FAILED,
- __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED));
+ region->mmap_alias = kvm_mmap(region->mmap_size,
+ PROT_READ | PROT_WRITE,
+ vm_mem_backing_src_alias(src_type)->flag,
+ region->fd);
/* Align host alias address */
region->host_alias = align_ptr_up(region->mmap_alias, alignment);
@@ -1344,10 +1333,8 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->run), "vcpu mmap size "
"smaller than expected, vcpu_mmap_sz: %zi expected_min: %zi",
vcpu_mmap_sz(), sizeof(*vcpu->run));
- vcpu->run = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(),
- PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0);
- TEST_ASSERT(vcpu->run != MAP_FAILED,
- __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED));
+ vcpu->run = kvm_mmap(vcpu_mmap_sz(), PROT_READ | PROT_WRITE,
+ MAP_SHARED, vcpu->fd);
if (kvm_has_cap(KVM_CAP_BINARY_STATS_FD))
vcpu->stats.fd = vcpu_get_stats_fd(vcpu);
@@ -1794,9 +1781,8 @@ void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu)
page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped exec");
- addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd,
- page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
- TEST_ASSERT(addr != MAP_FAILED, "Dirty ring map failed");
+ addr = __kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd,
+ page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
vcpu->dirty_gfns = addr;
vcpu->dirty_gfns_count = size / sizeof(struct kvm_dirty_gfn);
@@ -2344,3 +2330,8 @@ bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr)
pg = paddr >> vm->page_shift;
return sparsebit_is_set(region->protected_phy_pages, pg);
}
+
+__weak bool kvm_arch_has_default_irqchip(void)
+{
+ return false;
+}
diff --git a/tools/testing/selftests/kvm/lib/s390/processor.c b/tools/testing/selftests/kvm/lib/s390/processor.c
index 20cfe970e3e3..8ceeb17c819a 100644
--- a/tools/testing/selftests/kvm/lib/s390/processor.c
+++ b/tools/testing/selftests/kvm/lib/s390/processor.c
@@ -221,3 +221,8 @@ void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
{
}
+
+bool kvm_arch_has_default_irqchip(void)
+{
+ return true;
+}
diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c
index 03eb99af9b8d..8a1848586a85 100644
--- a/tools/testing/selftests/kvm/lib/test_util.c
+++ b/tools/testing/selftests/kvm/lib/test_util.c
@@ -18,6 +18,13 @@
#include "test_util.h"
+sigjmp_buf expect_sigbus_jmpbuf;
+
+void __attribute__((used)) expect_sigbus_handler(int signum)
+{
+ siglongjmp(expect_sigbus_jmpbuf, 1);
+}
+
/*
* Random number generator that is usable from guest code. This is the
* Park-Miller LCG using standard constants.
diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c
index c748cd9b2eef..b418502c5ecc 100644
--- a/tools/testing/selftests/kvm/lib/x86/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86/processor.c
@@ -1318,3 +1318,8 @@ bool sys_clocksource_is_based_on_tsc(void)
return ret;
}
+
+bool kvm_arch_has_default_irqchip(void)
+{
+ return true;
+}
diff --git a/tools/testing/selftests/kvm/mmu_stress_test.c b/tools/testing/selftests/kvm/mmu_stress_test.c
index 6a437d2be9fa..37b7e6524533 100644
--- a/tools/testing/selftests/kvm/mmu_stress_test.c
+++ b/tools/testing/selftests/kvm/mmu_stress_test.c
@@ -339,8 +339,7 @@ int main(int argc, char *argv[])
TEST_ASSERT(max_gpa > (4 * slot_size), "MAXPHYADDR <4gb ");
fd = kvm_memfd_alloc(slot_size, hugepages);
- mem = mmap(NULL, slot_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
- TEST_ASSERT(mem != MAP_FAILED, "mmap() failed");
+ mem = kvm_mmap(slot_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
TEST_ASSERT(!madvise(mem, slot_size, MADV_NOHUGEPAGE), "madvise() failed");
@@ -413,7 +412,7 @@ int main(int argc, char *argv[])
for (slot = (slot - 1) & ~1ull; slot >= first_slot; slot -= 2)
vm_set_user_memory_region(vm, slot, 0, 0, 0, NULL);
- munmap(mem, slot_size / 2);
+ kvm_munmap(mem, slot_size / 2);
/* Sanity check that the vCPUs actually ran. */
for (i = 0; i < nr_vcpus; i++)
diff --git a/tools/testing/selftests/kvm/pre_fault_memory_test.c b/tools/testing/selftests/kvm/pre_fault_memory_test.c
index 0350a8896a2f..f04768c1d2e4 100644
--- a/tools/testing/selftests/kvm/pre_fault_memory_test.c
+++ b/tools/testing/selftests/kvm/pre_fault_memory_test.c
@@ -10,6 +10,7 @@
#include <test_util.h>
#include <kvm_util.h>
#include <processor.h>
+#include <pthread.h>
/* Arbitrarily chosen values */
#define TEST_SIZE (SZ_2M + PAGE_SIZE)
@@ -30,18 +31,66 @@ static void guest_code(uint64_t base_gpa)
GUEST_DONE();
}
-static void pre_fault_memory(struct kvm_vcpu *vcpu, u64 gpa, u64 size,
- u64 left)
+struct slot_worker_data {
+ struct kvm_vm *vm;
+ u64 gpa;
+ uint32_t flags;
+ bool worker_ready;
+ bool prefault_ready;
+ bool recreate_slot;
+};
+
+static void *delete_slot_worker(void *__data)
+{
+ struct slot_worker_data *data = __data;
+ struct kvm_vm *vm = data->vm;
+
+ WRITE_ONCE(data->worker_ready, true);
+
+ while (!READ_ONCE(data->prefault_ready))
+ cpu_relax();
+
+ vm_mem_region_delete(vm, TEST_SLOT);
+
+ while (!READ_ONCE(data->recreate_slot))
+ cpu_relax();
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, data->gpa,
+ TEST_SLOT, TEST_NPAGES, data->flags);
+
+ return NULL;
+}
+
+static void pre_fault_memory(struct kvm_vcpu *vcpu, u64 base_gpa, u64 offset,
+ u64 size, u64 expected_left, bool private)
{
struct kvm_pre_fault_memory range = {
- .gpa = gpa,
+ .gpa = base_gpa + offset,
.size = size,
.flags = 0,
};
- u64 prev;
+ struct slot_worker_data data = {
+ .vm = vcpu->vm,
+ .gpa = base_gpa,
+ .flags = private ? KVM_MEM_GUEST_MEMFD : 0,
+ };
+ bool slot_recreated = false;
+ pthread_t slot_worker;
int ret, save_errno;
+ u64 prev;
+
+ /*
+ * Concurrently delete (and recreate) the slot to test KVM's handling
+ * of a racing memslot deletion with prefaulting.
+ */
+ pthread_create(&slot_worker, NULL, delete_slot_worker, &data);
- do {
+ while (!READ_ONCE(data.worker_ready))
+ cpu_relax();
+
+ WRITE_ONCE(data.prefault_ready, true);
+
+ for (;;) {
prev = range.size;
ret = __vcpu_ioctl(vcpu, KVM_PRE_FAULT_MEMORY, &range);
save_errno = errno;
@@ -49,18 +98,65 @@ static void pre_fault_memory(struct kvm_vcpu *vcpu, u64 gpa, u64 size,
"%sexpecting range.size to change on %s",
ret < 0 ? "not " : "",
ret < 0 ? "failure" : "success");
- } while (ret >= 0 ? range.size : save_errno == EINTR);
- TEST_ASSERT(range.size == left,
- "Completed with %lld bytes left, expected %" PRId64,
- range.size, left);
+ /*
+ * Immediately retry prefaulting if KVM was interrupted by an
+ * unrelated signal/event.
+ */
+ if (ret < 0 && save_errno == EINTR)
+ continue;
+
+ /*
+ * Tell the worker to recreate the slot in order to complete
+ * prefaulting (if prefault didn't already succeed before the
+ * slot was deleted) and/or to prepare for the next testcase.
+ * Wait for the worker to exit so that the next invocation of
+ * prefaulting is guaranteed to complete (assuming no KVM bugs).
+ */
+ if (!slot_recreated) {
+ WRITE_ONCE(data.recreate_slot, true);
+ pthread_join(slot_worker, NULL);
+ slot_recreated = true;
+
+ /*
+ * Retry prefaulting to get a stable result, i.e. to
+ * avoid seeing random EAGAIN failures. Don't retry if
+ * prefaulting already succeeded, as KVM disallows
+ * prefaulting with size=0, i.e. blindly retrying would
+ * result in test failures due to EINVAL. KVM should
+ * always return success if all bytes are prefaulted,
+ * i.e. there is no need to guard against EAGAIN being
+ * returned.
+ */
+ if (range.size)
+ continue;
+ }
+
+ /*
+ * All done if there are no remaining bytes to prefault, or if
+ * prefaulting failed (EINTR was handled above, and EAGAIN due
+ * to prefaulting a memslot that's being actively deleted should
+ * be impossible since the memslot has already been recreated).
+ */
+ if (!range.size || ret < 0)
+ break;
+ }
- if (left == 0)
- __TEST_ASSERT_VM_VCPU_IOCTL(!ret, "KVM_PRE_FAULT_MEMORY", ret, vcpu->vm);
+ TEST_ASSERT(range.size == expected_left,
+ "Completed with %llu bytes left, expected %lu",
+ range.size, expected_left);
+
+ /*
+ * Assert success if prefaulting the entire range should succeed, i.e.
+ * complete with no bytes remaining. Otherwise prefaulting should have
+ * failed due to ENOENT (due to RET_PF_EMULATE for emulated MMIO when
+ * no memslot exists).
+ */
+ if (!expected_left)
+ TEST_ASSERT_VM_VCPU_IOCTL(!ret, KVM_PRE_FAULT_MEMORY, ret, vcpu->vm);
else
- /* No memory slot causes RET_PF_EMULATE. it results in -ENOENT. */
- __TEST_ASSERT_VM_VCPU_IOCTL(ret && save_errno == ENOENT,
- "KVM_PRE_FAULT_MEMORY", ret, vcpu->vm);
+ TEST_ASSERT_VM_VCPU_IOCTL(ret && save_errno == ENOENT,
+ KVM_PRE_FAULT_MEMORY, ret, vcpu->vm);
}
static void __test_pre_fault_memory(unsigned long vm_type, bool private)
@@ -97,9 +193,10 @@ static void __test_pre_fault_memory(unsigned long vm_type, bool private)
if (private)
vm_mem_set_private(vm, guest_test_phys_mem, TEST_SIZE);
- pre_fault_memory(vcpu, guest_test_phys_mem, SZ_2M, 0);
- pre_fault_memory(vcpu, guest_test_phys_mem + SZ_2M, PAGE_SIZE * 2, PAGE_SIZE);
- pre_fault_memory(vcpu, guest_test_phys_mem + TEST_SIZE, PAGE_SIZE, PAGE_SIZE);
+
+ pre_fault_memory(vcpu, guest_test_phys_mem, 0, SZ_2M, 0, private);
+ pre_fault_memory(vcpu, guest_test_phys_mem, SZ_2M, PAGE_SIZE * 2, PAGE_SIZE, private);
+ pre_fault_memory(vcpu, guest_test_phys_mem, TEST_SIZE, PAGE_SIZE, PAGE_SIZE, private);
vcpu_args_set(vcpu, 1, guest_test_virt_mem);
vcpu_run(vcpu);
diff --git a/tools/testing/selftests/kvm/s390/ucontrol_test.c b/tools/testing/selftests/kvm/s390/ucontrol_test.c
index d265b34c54be..50bc1c38225a 100644
--- a/tools/testing/selftests/kvm/s390/ucontrol_test.c
+++ b/tools/testing/selftests/kvm/s390/ucontrol_test.c
@@ -142,19 +142,17 @@ FIXTURE_SETUP(uc_kvm)
self->kvm_run_size = ioctl(self->kvm_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
ASSERT_GE(self->kvm_run_size, sizeof(struct kvm_run))
TH_LOG(KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, self->kvm_run_size));
- self->run = (struct kvm_run *)mmap(NULL, self->kvm_run_size,
- PROT_READ | PROT_WRITE, MAP_SHARED, self->vcpu_fd, 0);
- ASSERT_NE(self->run, MAP_FAILED);
+ self->run = kvm_mmap(self->kvm_run_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED, self->vcpu_fd);
/**
* For virtual cpus that have been created with S390 user controlled
* virtual machines, the resulting vcpu fd can be memory mapped at page
* offset KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of
* the virtual cpu's hardware control block.
*/
- self->sie_block = (struct kvm_s390_sie_block *)mmap(NULL, PAGE_SIZE,
- PROT_READ | PROT_WRITE, MAP_SHARED,
- self->vcpu_fd, KVM_S390_SIE_PAGE_OFFSET << PAGE_SHIFT);
- ASSERT_NE(self->sie_block, MAP_FAILED);
+ self->sie_block = __kvm_mmap(PAGE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_SHARED, self->vcpu_fd,
+ KVM_S390_SIE_PAGE_OFFSET << PAGE_SHIFT);
TH_LOG("VM created %p %p", self->run, self->sie_block);
@@ -186,8 +184,8 @@ FIXTURE_SETUP(uc_kvm)
FIXTURE_TEARDOWN(uc_kvm)
{
- munmap(self->sie_block, PAGE_SIZE);
- munmap(self->run, self->kvm_run_size);
+ kvm_munmap(self->sie_block, PAGE_SIZE);
+ kvm_munmap(self->run, self->kvm_run_size);
close(self->vcpu_fd);
close(self->vm_fd);
close(self->kvm_fd);
diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c
index ce3ac0fd6dfb..7fe427ff9b38 100644
--- a/tools/testing/selftests/kvm/set_memory_region_test.c
+++ b/tools/testing/selftests/kvm/set_memory_region_test.c
@@ -433,10 +433,10 @@ static void test_add_max_memory_regions(void)
pr_info("Adding slots 0..%i, each memory region with %dK size\n",
(max_mem_slots - 1), MEM_REGION_SIZE >> 10);
- mem = mmap(NULL, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment,
- PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
- TEST_ASSERT(mem != MAP_FAILED, "Failed to mmap() host");
+
+ mem = kvm_mmap((size_t)max_mem_slots * MEM_REGION_SIZE + alignment,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1);
mem_aligned = (void *)(((size_t) mem + alignment - 1) & ~(alignment - 1));
for (slot = 0; slot < max_mem_slots; slot++)
@@ -446,9 +446,8 @@ static void test_add_max_memory_regions(void)
mem_aligned + (uint64_t)slot * MEM_REGION_SIZE);
/* Check it cannot be added memory slots beyond the limit */
- mem_extra = mmap(NULL, MEM_REGION_SIZE, PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- TEST_ASSERT(mem_extra != MAP_FAILED, "Failed to mmap() host");
+ mem_extra = kvm_mmap(MEM_REGION_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1);
ret = __vm_set_user_memory_region(vm, max_mem_slots, 0,
(uint64_t)max_mem_slots * MEM_REGION_SIZE,
@@ -456,8 +455,8 @@ static void test_add_max_memory_regions(void)
TEST_ASSERT(ret == -1 && errno == EINVAL,
"Adding one more memory slot should fail with EINVAL");
- munmap(mem, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment);
- munmap(mem_extra, MEM_REGION_SIZE);
+ kvm_munmap(mem, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment);
+ kvm_munmap(mem_extra, MEM_REGION_SIZE);
kvm_vm_free(vm);
}
diff --git a/tools/testing/selftests/kvm/x86/msrs_test.c b/tools/testing/selftests/kvm/x86/msrs_test.c
new file mode 100644
index 000000000000..40d918aedce6
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/msrs_test.c
@@ -0,0 +1,489 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <asm/msr-index.h>
+
+#include <stdint.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+
+/* Use HYPERVISOR for MSRs that are emulated unconditionally (as is HYPERVISOR). */
+#define X86_FEATURE_NONE X86_FEATURE_HYPERVISOR
+
+struct kvm_msr {
+ const struct kvm_x86_cpu_feature feature;
+ const struct kvm_x86_cpu_feature feature2;
+ const char *name;
+ const u64 reset_val;
+ const u64 write_val;
+ const u64 rsvd_val;
+ const u32 index;
+ const bool is_kvm_defined;
+};
+
+#define ____MSR_TEST(msr, str, val, rsvd, reset, feat, f2, is_kvm) \
+{ \
+ .index = msr, \
+ .name = str, \
+ .write_val = val, \
+ .rsvd_val = rsvd, \
+ .reset_val = reset, \
+ .feature = X86_FEATURE_ ##feat, \
+ .feature2 = X86_FEATURE_ ##f2, \
+ .is_kvm_defined = is_kvm, \
+}
+
+#define __MSR_TEST(msr, str, val, rsvd, reset, feat) \
+ ____MSR_TEST(msr, str, val, rsvd, reset, feat, feat, false)
+
+#define MSR_TEST_NON_ZERO(msr, val, rsvd, reset, feat) \
+ __MSR_TEST(msr, #msr, val, rsvd, reset, feat)
+
+#define MSR_TEST(msr, val, rsvd, feat) \
+ __MSR_TEST(msr, #msr, val, rsvd, 0, feat)
+
+#define MSR_TEST2(msr, val, rsvd, feat, f2) \
+ ____MSR_TEST(msr, #msr, val, rsvd, 0, feat, f2, false)
+
+/*
+ * Note, use a page aligned value for the canonical value so that the value
+ * is compatible with MSRs that use bits 11:0 for things other than addresses.
+ */
+static const u64 canonical_val = 0x123456789000ull;
+
+/*
+ * Arbitrary value with bits set in every byte, but not all bits set. This is
+ * also a non-canonical value, but that's coincidental (any 64-bit value with
+ * an alternating 0s/1s pattern will be non-canonical).
+ */
+static const u64 u64_val = 0xaaaa5555aaaa5555ull;
+
+#define MSR_TEST_CANONICAL(msr, feat) \
+ __MSR_TEST(msr, #msr, canonical_val, NONCANONICAL, 0, feat)
+
+#define MSR_TEST_KVM(msr, val, rsvd, feat) \
+ ____MSR_TEST(KVM_REG_ ##msr, #msr, val, rsvd, 0, feat, feat, true)
+
+/*
+ * The main struct must be scoped to a function due to the use of structures to
+ * define features. For the global structure, allocate enough space for the
+ * foreseeable future without getting too ridiculous, to minimize maintenance
+ * costs (bumping the array size every time an MSR is added is really annoying).
+ */
+static struct kvm_msr msrs[128];
+static int idx;
+
+static bool ignore_unsupported_msrs;
+
+static u64 fixup_rdmsr_val(u32 msr, u64 want)
+{
+ /*
+ * AMD CPUs drop bits 63:32 on some MSRs that Intel CPUs support. KVM
+ * is supposed to emulate that behavior based on guest vendor model
+ * (which is the same as the host vendor model for this test).
+ */
+ if (!host_cpu_is_amd)
+ return want;
+
+ switch (msr) {
+ case MSR_IA32_SYSENTER_ESP:
+ case MSR_IA32_SYSENTER_EIP:
+ case MSR_TSC_AUX:
+ return want & GENMASK_ULL(31, 0);
+ default:
+ return want;
+ }
+}
+
+static void __rdmsr(u32 msr, u64 want)
+{
+ u64 val;
+ u8 vec;
+
+ vec = rdmsr_safe(msr, &val);
+ __GUEST_ASSERT(!vec, "Unexpected %s on RDMSR(0x%x)", ex_str(vec), msr);
+
+ __GUEST_ASSERT(val == want, "Wanted 0x%lx from RDMSR(0x%x), got 0x%lx",
+ want, msr, val);
+}
+
+static void __wrmsr(u32 msr, u64 val)
+{
+ u8 vec;
+
+ vec = wrmsr_safe(msr, val);
+ __GUEST_ASSERT(!vec, "Unexpected %s on WRMSR(0x%x, 0x%lx)",
+ ex_str(vec), msr, val);
+ __rdmsr(msr, fixup_rdmsr_val(msr, val));
+}
+
+static void guest_test_supported_msr(const struct kvm_msr *msr)
+{
+ __rdmsr(msr->index, msr->reset_val);
+ __wrmsr(msr->index, msr->write_val);
+ GUEST_SYNC(fixup_rdmsr_val(msr->index, msr->write_val));
+
+ __rdmsr(msr->index, msr->reset_val);
+}
+
+static void guest_test_unsupported_msr(const struct kvm_msr *msr)
+{
+ u64 val;
+ u8 vec;
+
+ /*
+ * KVM's ABI with respect to ignore_msrs is a mess and largely beyond
+ * repair, just skip the unsupported MSR tests.
+ */
+ if (ignore_unsupported_msrs)
+ goto skip_wrmsr_gp;
+
+ /*
+ * {S,U}_CET exist if IBT or SHSTK is supported, but with bits that are
+ * writable only if their associated feature is supported. Skip the
+ * RDMSR #GP test if the secondary feature is supported, but perform
+ * the WRMSR #GP test as the to-be-written value is tied to the primary
+ * feature. For all other MSRs, simply do nothing.
+ */
+ if (this_cpu_has(msr->feature2)) {
+ if (msr->index != MSR_IA32_U_CET &&
+ msr->index != MSR_IA32_S_CET)
+ goto skip_wrmsr_gp;
+
+ goto skip_rdmsr_gp;
+ }
+
+ vec = rdmsr_safe(msr->index, &val);
+ __GUEST_ASSERT(vec == GP_VECTOR, "Wanted #GP on RDMSR(0x%x), got %s",
+ msr->index, ex_str(vec));
+
+skip_rdmsr_gp:
+ vec = wrmsr_safe(msr->index, msr->write_val);
+ __GUEST_ASSERT(vec == GP_VECTOR, "Wanted #GP on WRMSR(0x%x, 0x%lx), got %s",
+ msr->index, msr->write_val, ex_str(vec));
+
+skip_wrmsr_gp:
+ GUEST_SYNC(0);
+}
+
+void guest_test_reserved_val(const struct kvm_msr *msr)
+{
+ /* Skip reserved value checks as well, ignore_msrs is trully a mess. */
+ if (ignore_unsupported_msrs)
+ return;
+
+ /*
+ * If the CPU will truncate the written value (e.g. SYSENTER on AMD),
+ * expect success and a truncated value, not #GP.
+ */
+ if (!this_cpu_has(msr->feature) ||
+ msr->rsvd_val == fixup_rdmsr_val(msr->index, msr->rsvd_val)) {
+ u8 vec = wrmsr_safe(msr->index, msr->rsvd_val);
+
+ __GUEST_ASSERT(vec == GP_VECTOR,
+ "Wanted #GP on WRMSR(0x%x, 0x%lx), got %s",
+ msr->index, msr->rsvd_val, ex_str(vec));
+ } else {
+ __wrmsr(msr->index, msr->rsvd_val);
+ __wrmsr(msr->index, msr->reset_val);
+ }
+}
+
+static void guest_main(void)
+{
+ for (;;) {
+ const struct kvm_msr *msr = &msrs[READ_ONCE(idx)];
+
+ if (this_cpu_has(msr->feature))
+ guest_test_supported_msr(msr);
+ else
+ guest_test_unsupported_msr(msr);
+
+ if (msr->rsvd_val)
+ guest_test_reserved_val(msr);
+
+ GUEST_SYNC(msr->reset_val);
+ }
+}
+
+static bool has_one_reg;
+static bool use_one_reg;
+
+#define KVM_X86_MAX_NR_REGS 1
+
+static bool vcpu_has_reg(struct kvm_vcpu *vcpu, u64 reg)
+{
+ struct {
+ struct kvm_reg_list list;
+ u64 regs[KVM_X86_MAX_NR_REGS];
+ } regs = {};
+ int r, i;
+
+ /*
+ * If KVM_GET_REG_LIST succeeds with n=0, i.e. there are no supported
+ * regs, then the vCPU obviously doesn't support the reg.
+ */
+ r = __vcpu_ioctl(vcpu, KVM_GET_REG_LIST, &regs.list);
+ if (!r)
+ return false;
+
+ TEST_ASSERT_EQ(errno, E2BIG);
+
+ /*
+ * KVM x86 is expected to support enumerating a relative small number
+ * of regs. The majority of registers supported by KVM_{G,S}ET_ONE_REG
+ * are enumerated via other ioctls, e.g. KVM_GET_MSR_INDEX_LIST. For
+ * simplicity, hardcode the maximum number of regs and manually update
+ * the test as necessary.
+ */
+ TEST_ASSERT(regs.list.n <= KVM_X86_MAX_NR_REGS,
+ "KVM reports %llu regs, test expects at most %u regs, stale test?",
+ regs.list.n, KVM_X86_MAX_NR_REGS);
+
+ vcpu_ioctl(vcpu, KVM_GET_REG_LIST, &regs.list);
+ for (i = 0; i < regs.list.n; i++) {
+ if (regs.regs[i] == reg)
+ return true;
+ }
+
+ return false;
+}
+
+static void host_test_kvm_reg(struct kvm_vcpu *vcpu)
+{
+ bool has_reg = vcpu_cpuid_has(vcpu, msrs[idx].feature);
+ u64 reset_val = msrs[idx].reset_val;
+ u64 write_val = msrs[idx].write_val;
+ u64 rsvd_val = msrs[idx].rsvd_val;
+ u32 reg = msrs[idx].index;
+ u64 val;
+ int r;
+
+ if (!use_one_reg)
+ return;
+
+ TEST_ASSERT_EQ(vcpu_has_reg(vcpu, KVM_X86_REG_KVM(reg)), has_reg);
+
+ if (!has_reg) {
+ r = __vcpu_get_reg(vcpu, KVM_X86_REG_KVM(reg), &val);
+ TEST_ASSERT(r && errno == EINVAL,
+ "Expected failure on get_reg(0x%x)", reg);
+ rsvd_val = 0;
+ goto out;
+ }
+
+ val = vcpu_get_reg(vcpu, KVM_X86_REG_KVM(reg));
+ TEST_ASSERT(val == reset_val, "Wanted 0x%lx from get_reg(0x%x), got 0x%lx",
+ reset_val, reg, val);
+
+ vcpu_set_reg(vcpu, KVM_X86_REG_KVM(reg), write_val);
+ val = vcpu_get_reg(vcpu, KVM_X86_REG_KVM(reg));
+ TEST_ASSERT(val == write_val, "Wanted 0x%lx from get_reg(0x%x), got 0x%lx",
+ write_val, reg, val);
+
+out:
+ r = __vcpu_set_reg(vcpu, KVM_X86_REG_KVM(reg), rsvd_val);
+ TEST_ASSERT(r, "Expected failure on set_reg(0x%x, 0x%lx)", reg, rsvd_val);
+}
+
+static void host_test_msr(struct kvm_vcpu *vcpu, u64 guest_val)
+{
+ u64 reset_val = msrs[idx].reset_val;
+ u32 msr = msrs[idx].index;
+ u64 val;
+
+ if (!kvm_cpu_has(msrs[idx].feature))
+ return;
+
+ val = vcpu_get_msr(vcpu, msr);
+ TEST_ASSERT(val == guest_val, "Wanted 0x%lx from get_msr(0x%x), got 0x%lx",
+ guest_val, msr, val);
+
+ if (use_one_reg)
+ vcpu_set_reg(vcpu, KVM_X86_REG_MSR(msr), reset_val);
+ else
+ vcpu_set_msr(vcpu, msr, reset_val);
+
+ val = vcpu_get_msr(vcpu, msr);
+ TEST_ASSERT(val == reset_val, "Wanted 0x%lx from get_msr(0x%x), got 0x%lx",
+ reset_val, msr, val);
+
+ if (!has_one_reg)
+ return;
+
+ val = vcpu_get_reg(vcpu, KVM_X86_REG_MSR(msr));
+ TEST_ASSERT(val == reset_val, "Wanted 0x%lx from get_reg(0x%x), got 0x%lx",
+ reset_val, msr, val);
+}
+
+static void do_vcpu_run(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ for (;;) {
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ host_test_msr(vcpu, uc.args[1]);
+ return;
+ case UCALL_PRINTF:
+ pr_info("%s", uc.buffer);
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ case UCALL_DONE:
+ TEST_FAIL("Unexpected UCALL_DONE");
+ default:
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
+ }
+}
+
+static void vcpus_run(struct kvm_vcpu **vcpus, const int NR_VCPUS)
+{
+ int i;
+
+ for (i = 0; i < NR_VCPUS; i++)
+ do_vcpu_run(vcpus[i]);
+}
+
+#define MISC_ENABLES_RESET_VAL (MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL | MSR_IA32_MISC_ENABLE_BTS_UNAVAIL)
+
+static void test_msrs(void)
+{
+ const struct kvm_msr __msrs[] = {
+ MSR_TEST_NON_ZERO(MSR_IA32_MISC_ENABLE,
+ MISC_ENABLES_RESET_VAL | MSR_IA32_MISC_ENABLE_FAST_STRING,
+ MSR_IA32_MISC_ENABLE_FAST_STRING, MISC_ENABLES_RESET_VAL, NONE),
+ MSR_TEST_NON_ZERO(MSR_IA32_CR_PAT, 0x07070707, 0, 0x7040600070406, NONE),
+
+ /*
+ * TSC_AUX is supported if RDTSCP *or* RDPID is supported. Add
+ * entries for each features so that TSC_AUX doesn't exists for
+ * the "unsupported" vCPU, and obviously to test both cases.
+ */
+ MSR_TEST2(MSR_TSC_AUX, 0x12345678, u64_val, RDTSCP, RDPID),
+ MSR_TEST2(MSR_TSC_AUX, 0x12345678, u64_val, RDPID, RDTSCP),
+
+ MSR_TEST(MSR_IA32_SYSENTER_CS, 0x1234, 0, NONE),
+ /*
+ * SYSENTER_{ESP,EIP} are technically non-canonical on Intel,
+ * but KVM doesn't emulate that behavior on emulated writes,
+ * i.e. this test will observe different behavior if the MSR
+ * writes are handed by hardware vs. KVM. KVM's behavior is
+ * intended (though far from ideal), so don't bother testing
+ * non-canonical values.
+ */
+ MSR_TEST(MSR_IA32_SYSENTER_ESP, canonical_val, 0, NONE),
+ MSR_TEST(MSR_IA32_SYSENTER_EIP, canonical_val, 0, NONE),
+
+ MSR_TEST_CANONICAL(MSR_FS_BASE, LM),
+ MSR_TEST_CANONICAL(MSR_GS_BASE, LM),
+ MSR_TEST_CANONICAL(MSR_KERNEL_GS_BASE, LM),
+ MSR_TEST_CANONICAL(MSR_LSTAR, LM),
+ MSR_TEST_CANONICAL(MSR_CSTAR, LM),
+ MSR_TEST(MSR_SYSCALL_MASK, 0xffffffff, 0, LM),
+
+ MSR_TEST2(MSR_IA32_S_CET, CET_SHSTK_EN, CET_RESERVED, SHSTK, IBT),
+ MSR_TEST2(MSR_IA32_S_CET, CET_ENDBR_EN, CET_RESERVED, IBT, SHSTK),
+ MSR_TEST2(MSR_IA32_U_CET, CET_SHSTK_EN, CET_RESERVED, SHSTK, IBT),
+ MSR_TEST2(MSR_IA32_U_CET, CET_ENDBR_EN, CET_RESERVED, IBT, SHSTK),
+ MSR_TEST_CANONICAL(MSR_IA32_PL0_SSP, SHSTK),
+ MSR_TEST(MSR_IA32_PL0_SSP, canonical_val, canonical_val | 1, SHSTK),
+ MSR_TEST_CANONICAL(MSR_IA32_PL1_SSP, SHSTK),
+ MSR_TEST(MSR_IA32_PL1_SSP, canonical_val, canonical_val | 1, SHSTK),
+ MSR_TEST_CANONICAL(MSR_IA32_PL2_SSP, SHSTK),
+ MSR_TEST(MSR_IA32_PL2_SSP, canonical_val, canonical_val | 1, SHSTK),
+ MSR_TEST_CANONICAL(MSR_IA32_PL3_SSP, SHSTK),
+ MSR_TEST(MSR_IA32_PL3_SSP, canonical_val, canonical_val | 1, SHSTK),
+
+ MSR_TEST_KVM(GUEST_SSP, canonical_val, NONCANONICAL, SHSTK),
+ };
+
+ const struct kvm_x86_cpu_feature feat_none = X86_FEATURE_NONE;
+ const struct kvm_x86_cpu_feature feat_lm = X86_FEATURE_LM;
+
+ /*
+ * Create three vCPUs, but run them on the same task, to validate KVM's
+ * context switching of MSR state. Don't pin the task to a pCPU to
+ * also validate KVM's handling of cross-pCPU migration. Use the full
+ * set of features for the first two vCPUs, but clear all features in
+ * third vCPU in order to test both positive and negative paths.
+ */
+ const int NR_VCPUS = 3;
+ struct kvm_vcpu *vcpus[NR_VCPUS];
+ struct kvm_vm *vm;
+ int i;
+
+ kvm_static_assert(sizeof(__msrs) <= sizeof(msrs));
+ kvm_static_assert(ARRAY_SIZE(__msrs) <= ARRAY_SIZE(msrs));
+ memcpy(msrs, __msrs, sizeof(__msrs));
+
+ ignore_unsupported_msrs = kvm_is_ignore_msrs();
+
+ vm = vm_create_with_vcpus(NR_VCPUS, guest_main, vcpus);
+
+ sync_global_to_guest(vm, msrs);
+ sync_global_to_guest(vm, ignore_unsupported_msrs);
+
+ /*
+ * Clear features in the "unsupported features" vCPU. This needs to be
+ * done before the first vCPU run as KVM's ABI is that guest CPUID is
+ * immutable once the vCPU has been run.
+ */
+ for (idx = 0; idx < ARRAY_SIZE(__msrs); idx++) {
+ /*
+ * Don't clear LM; selftests are 64-bit only, and KVM doesn't
+ * honor LM=0 for MSRs that are supposed to exist if and only
+ * if the vCPU is a 64-bit model. Ditto for NONE; clearing a
+ * fake feature flag will result in false failures.
+ */
+ if (memcmp(&msrs[idx].feature, &feat_lm, sizeof(feat_lm)) &&
+ memcmp(&msrs[idx].feature, &feat_none, sizeof(feat_none)))
+ vcpu_clear_cpuid_feature(vcpus[2], msrs[idx].feature);
+ }
+
+ for (idx = 0; idx < ARRAY_SIZE(__msrs); idx++) {
+ struct kvm_msr *msr = &msrs[idx];
+
+ if (msr->is_kvm_defined) {
+ for (i = 0; i < NR_VCPUS; i++)
+ host_test_kvm_reg(vcpus[i]);
+ continue;
+ }
+
+ /*
+ * Verify KVM_GET_SUPPORTED_CPUID and KVM_GET_MSR_INDEX_LIST
+ * are consistent with respect to MSRs whose existence is
+ * enumerated via CPUID. Skip the check for FS/GS.base MSRs,
+ * as they aren't reported in the save/restore list since their
+ * state is managed via SREGS.
+ */
+ TEST_ASSERT(msr->index == MSR_FS_BASE || msr->index == MSR_GS_BASE ||
+ kvm_msr_is_in_save_restore_list(msr->index) ==
+ (kvm_cpu_has(msr->feature) || kvm_cpu_has(msr->feature2)),
+ "%s %s in save/restore list, but %s according to CPUID", msr->name,
+ kvm_msr_is_in_save_restore_list(msr->index) ? "is" : "isn't",
+ (kvm_cpu_has(msr->feature) || kvm_cpu_has(msr->feature2)) ?
+ "supported" : "unsupported");
+
+ sync_global_to_guest(vm, idx);
+
+ vcpus_run(vcpus, NR_VCPUS);
+ vcpus_run(vcpus, NR_VCPUS);
+ }
+
+ kvm_vm_free(vm);
+}
+
+int main(void)
+{
+ has_one_reg = kvm_has_cap(KVM_CAP_ONE_REG);
+
+ test_msrs();
+
+ if (has_one_reg) {
+ use_one_reg = true;
+ test_msrs();
+ }
+}
diff --git a/tools/testing/selftests/kvm/x86/pmu_counters_test.c b/tools/testing/selftests/kvm/x86/pmu_counters_test.c
index bb215230cc8a..3eaa216b96c0 100644
--- a/tools/testing/selftests/kvm/x86/pmu_counters_test.c
+++ b/tools/testing/selftests/kvm/x86/pmu_counters_test.c
@@ -14,10 +14,10 @@
#define NUM_BRANCH_INSNS_RETIRED (NUM_LOOPS)
/*
- * Number of instructions in each loop. 1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE,
- * 1 LOOP.
+ * Number of instructions in each loop. 1 ENTER, 1 CLFLUSH/CLFLUSHOPT/NOP,
+ * 1 MFENCE, 1 MOV, 1 LEAVE, 1 LOOP.
*/
-#define NUM_INSNS_PER_LOOP 4
+#define NUM_INSNS_PER_LOOP 6
/*
* Number of "extra" instructions that will be counted, i.e. the number of
@@ -226,9 +226,11 @@ do { \
__asm__ __volatile__("wrmsr\n\t" \
" mov $" __stringify(NUM_LOOPS) ", %%ecx\n\t" \
"1:\n\t" \
+ FEP "enter $0, $0\n\t" \
clflush "\n\t" \
"mfence\n\t" \
"mov %[m], %%eax\n\t" \
+ FEP "leave\n\t" \
FEP "loop 1b\n\t" \
FEP "mov %%edi, %%ecx\n\t" \
FEP "xor %%eax, %%eax\n\t" \
diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c
index 9e3be2ee7f1b..f917b4c4c943 100644
--- a/tools/testing/selftests/mm/uffd-unit-tests.c
+++ b/tools/testing/selftests/mm/uffd-unit-tests.c
@@ -1758,10 +1758,15 @@ int main(int argc, char *argv[])
uffd_test_ops = mem_type->mem_ops;
uffd_test_case_ops = test->test_case_ops;
- if (mem_type->mem_flag & (MEM_HUGETLB_PRIVATE | MEM_HUGETLB))
+ if (mem_type->mem_flag & (MEM_HUGETLB_PRIVATE | MEM_HUGETLB)) {
gopts.page_size = default_huge_page_size();
- else
+ if (gopts.page_size == 0) {
+ uffd_test_skip("huge page size is 0, feature missing?");
+ continue;
+ }
+ } else {
gopts.page_size = psize();
+ }
/* Ensure we have at least 2 pages */
gopts.nr_pages = MAX(UFFD_TEST_MEM_SIZE, gopts.page_size * 2)
@@ -1776,12 +1781,6 @@ int main(int argc, char *argv[])
continue;
uffd_test_start("%s on %s", test->name, mem_type->name);
- if ((mem_type->mem_flag == MEM_HUGETLB ||
- mem_type->mem_flag == MEM_HUGETLB_PRIVATE) &&
- (default_huge_page_size() == 0)) {
- uffd_test_skip("huge page size is 0, feature missing?");
- continue;
- }
if (!uffd_feature_supported(test)) {
uffd_test_skip("feature missing");
continue;
diff --git a/tools/testing/selftests/namespaces/.gitignore b/tools/testing/selftests/namespaces/.gitignore
index ccfb40837a73..0989e80da457 100644
--- a/tools/testing/selftests/namespaces/.gitignore
+++ b/tools/testing/selftests/namespaces/.gitignore
@@ -1,3 +1,12 @@
nsid_test
file_handle_test
init_ino_test
+ns_active_ref_test
+listns_test
+listns_permissions_test
+listns_efault_test
+siocgskns_test
+cred_change_test
+stress_test
+listns_pagination_bug
+regression_pidfd_setns_test
diff --git a/tools/testing/selftests/namespaces/Makefile b/tools/testing/selftests/namespaces/Makefile
index 5fe4b3dc07d3..fbb821652c17 100644
--- a/tools/testing/selftests/namespaces/Makefile
+++ b/tools/testing/selftests/namespaces/Makefile
@@ -1,7 +1,29 @@
# SPDX-License-Identifier: GPL-2.0-only
CFLAGS += -Wall -O0 -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES)
+LDLIBS += -lcap
-TEST_GEN_PROGS := nsid_test file_handle_test init_ino_test
+TEST_GEN_PROGS := nsid_test \
+ file_handle_test \
+ init_ino_test \
+ ns_active_ref_test \
+ listns_test \
+ listns_permissions_test \
+ listns_efault_test \
+ siocgskns_test \
+ cred_change_test \
+ stress_test \
+ listns_pagination_bug \
+ regression_pidfd_setns_test
include ../lib.mk
+$(OUTPUT)/ns_active_ref_test: ../filesystems/utils.c
+$(OUTPUT)/listns_test: ../filesystems/utils.c
+$(OUTPUT)/listns_permissions_test: ../filesystems/utils.c
+$(OUTPUT)/listns_efault_test: ../filesystems/utils.c
+$(OUTPUT)/siocgskns_test: ../filesystems/utils.c
+$(OUTPUT)/cred_change_test: ../filesystems/utils.c
+$(OUTPUT)/stress_test: ../filesystems/utils.c
+$(OUTPUT)/listns_pagination_bug: ../filesystems/utils.c
+$(OUTPUT)/regression_pidfd_setns_test: ../filesystems/utils.c
+
diff --git a/tools/testing/selftests/namespaces/cred_change_test.c b/tools/testing/selftests/namespaces/cred_change_test.c
new file mode 100644
index 000000000000..7b4f5ad3f725
--- /dev/null
+++ b/tools/testing/selftests/namespaces/cred_change_test.c
@@ -0,0 +1,814 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/capability.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <linux/nsfs.h>
+#include "../kselftest_harness.h"
+#include "../filesystems/utils.h"
+#include "wrappers.h"
+
+/*
+ * Test credential changes and their impact on namespace active references.
+ */
+
+/*
+ * Test setuid() in a user namespace properly swaps active references.
+ * Create a user namespace with multiple UIDs mapped, then setuid() between them.
+ * Verify that the user namespace remains active throughout.
+ */
+TEST(setuid_preserves_active_refs)
+{
+ pid_t pid;
+ int status;
+ __u64 userns_id;
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = CLONE_NEWUSER,
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ __u64 ns_ids[256];
+ ssize_t ret;
+ int i;
+ bool found = false;
+ int pipefd[2];
+
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ int fd, userns_fd;
+ __u64 child_userns_id;
+ uid_t orig_uid = getuid();
+ int setuid_count;
+
+ close(pipefd[0]);
+
+ /* Create new user namespace with multiple UIDs mapped (0-9) */
+ userns_fd = get_userns_fd(0, orig_uid, 10);
+ if (userns_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ if (setns(userns_fd, CLONE_NEWUSER) < 0) {
+ close(userns_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(userns_fd);
+
+ /* Get user namespace ID */
+ fd = open("/proc/self/ns/user", O_RDONLY);
+ if (fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ if (ioctl(fd, NS_GET_ID, &child_userns_id) < 0) {
+ close(fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(fd);
+
+ /* Send namespace ID to parent */
+ write(pipefd[1], &child_userns_id, sizeof(child_userns_id));
+
+ /*
+ * Perform multiple setuid() calls.
+ * Each setuid() triggers commit_creds() which should properly
+ * swap active references via switch_cred_namespaces().
+ */
+ for (setuid_count = 0; setuid_count < 50; setuid_count++) {
+ uid_t target_uid = (setuid_count % 10);
+ if (setuid(target_uid) < 0) {
+ if (errno != EPERM) {
+ close(pipefd[1]);
+ exit(1);
+ }
+ }
+ }
+
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Parent process */
+ close(pipefd[1]);
+
+ if (read(pipefd[0], &userns_id, sizeof(userns_id)) != sizeof(userns_id)) {
+ close(pipefd[0]);
+ kill(pid, SIGKILL);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to get namespace ID from child");
+ }
+ close(pipefd[0]);
+
+ TH_LOG("Child user namespace ID: %llu", (unsigned long long)userns_id);
+
+ /* Verify namespace is active while child is running */
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+ if (ret < 0) {
+ kill(pid, SIGKILL);
+ waitpid(pid, NULL, 0);
+ if (errno == ENOSYS)
+ SKIP(return, "listns() not supported");
+ ASSERT_GE(ret, 0);
+ }
+
+ for (i = 0; i < ret; i++) {
+ if (ns_ids[i] == userns_id) {
+ found = true;
+ break;
+ }
+ }
+ ASSERT_TRUE(found);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ /* Verify namespace becomes inactive after child exits */
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+ ASSERT_GE(ret, 0);
+
+ found = false;
+ for (i = 0; i < ret; i++) {
+ if (ns_ids[i] == userns_id) {
+ found = true;
+ break;
+ }
+ }
+
+ ASSERT_FALSE(found);
+ TH_LOG("setuid() correctly preserved active references (no leak)");
+}
+
+/*
+ * Test setgid() in a user namespace properly handles active references.
+ */
+TEST(setgid_preserves_active_refs)
+{
+ pid_t pid;
+ int status;
+ __u64 userns_id;
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = CLONE_NEWUSER,
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ __u64 ns_ids[256];
+ ssize_t ret;
+ int i;
+ bool found = false;
+ int pipefd[2];
+
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ int fd, userns_fd;
+ __u64 child_userns_id;
+ uid_t orig_uid = getuid();
+ int setgid_count;
+
+ close(pipefd[0]);
+
+ /* Create new user namespace with multiple GIDs mapped */
+ userns_fd = get_userns_fd(0, orig_uid, 10);
+ if (userns_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ if (setns(userns_fd, CLONE_NEWUSER) < 0) {
+ close(userns_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(userns_fd);
+
+ /* Get user namespace ID */
+ fd = open("/proc/self/ns/user", O_RDONLY);
+ if (fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ if (ioctl(fd, NS_GET_ID, &child_userns_id) < 0) {
+ close(fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(fd);
+
+ write(pipefd[1], &child_userns_id, sizeof(child_userns_id));
+
+ /* Perform multiple setgid() calls */
+ for (setgid_count = 0; setgid_count < 50; setgid_count++) {
+ gid_t target_gid = (setgid_count % 10);
+ if (setgid(target_gid) < 0) {
+ if (errno != EPERM) {
+ close(pipefd[1]);
+ exit(1);
+ }
+ }
+ }
+
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Parent process */
+ close(pipefd[1]);
+
+ if (read(pipefd[0], &userns_id, sizeof(userns_id)) != sizeof(userns_id)) {
+ close(pipefd[0]);
+ kill(pid, SIGKILL);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to get namespace ID from child");
+ }
+ close(pipefd[0]);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ /* Verify namespace becomes inactive */
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+ if (ret < 0) {
+ if (errno == ENOSYS)
+ SKIP(return, "listns() not supported");
+ ASSERT_GE(ret, 0);
+ }
+
+ for (i = 0; i < ret; i++) {
+ if (ns_ids[i] == userns_id) {
+ found = true;
+ break;
+ }
+ }
+
+ ASSERT_FALSE(found);
+ TH_LOG("setgid() correctly preserved active references (no leak)");
+}
+
+/*
+ * Test setresuid() which changes real, effective, and saved UIDs.
+ * This should properly swap active references via commit_creds().
+ */
+TEST(setresuid_preserves_active_refs)
+{
+ pid_t pid;
+ int status;
+ __u64 userns_id;
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = CLONE_NEWUSER,
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ __u64 ns_ids[256];
+ ssize_t ret;
+ int i;
+ bool found = false;
+ int pipefd[2];
+
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ int fd, userns_fd;
+ __u64 child_userns_id;
+ uid_t orig_uid = getuid();
+ int setres_count;
+
+ close(pipefd[0]);
+
+ /* Create new user namespace */
+ userns_fd = get_userns_fd(0, orig_uid, 10);
+ if (userns_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ if (setns(userns_fd, CLONE_NEWUSER) < 0) {
+ close(userns_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(userns_fd);
+
+ /* Get user namespace ID */
+ fd = open("/proc/self/ns/user", O_RDONLY);
+ if (fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ if (ioctl(fd, NS_GET_ID, &child_userns_id) < 0) {
+ close(fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(fd);
+
+ write(pipefd[1], &child_userns_id, sizeof(child_userns_id));
+
+ /* Perform multiple setresuid() calls */
+ for (setres_count = 0; setres_count < 30; setres_count++) {
+ uid_t uid1 = (setres_count % 5);
+ uid_t uid2 = ((setres_count + 1) % 5);
+ uid_t uid3 = ((setres_count + 2) % 5);
+
+ if (setresuid(uid1, uid2, uid3) < 0) {
+ if (errno != EPERM) {
+ close(pipefd[1]);
+ exit(1);
+ }
+ }
+ }
+
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Parent process */
+ close(pipefd[1]);
+
+ if (read(pipefd[0], &userns_id, sizeof(userns_id)) != sizeof(userns_id)) {
+ close(pipefd[0]);
+ kill(pid, SIGKILL);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to get namespace ID from child");
+ }
+ close(pipefd[0]);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ /* Verify namespace becomes inactive */
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+ if (ret < 0) {
+ if (errno == ENOSYS)
+ SKIP(return, "listns() not supported");
+ ASSERT_GE(ret, 0);
+ }
+
+ for (i = 0; i < ret; i++) {
+ if (ns_ids[i] == userns_id) {
+ found = true;
+ break;
+ }
+ }
+
+ ASSERT_FALSE(found);
+ TH_LOG("setresuid() correctly preserved active references (no leak)");
+}
+
+/*
+ * Test credential changes across multiple user namespaces.
+ * Create nested user namespaces and verify active reference tracking.
+ */
+TEST(cred_change_nested_userns)
+{
+ pid_t pid;
+ int status;
+ __u64 parent_userns_id, child_userns_id;
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = CLONE_NEWUSER,
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ __u64 ns_ids[256];
+ ssize_t ret;
+ int i;
+ bool found_parent = false, found_child = false;
+ int pipefd[2];
+
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ int fd, userns_fd;
+ __u64 parent_id, child_id;
+ uid_t orig_uid = getuid();
+
+ close(pipefd[0]);
+
+ /* Create first user namespace */
+ userns_fd = get_userns_fd(0, orig_uid, 1);
+ if (userns_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ if (setns(userns_fd, CLONE_NEWUSER) < 0) {
+ close(userns_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(userns_fd);
+
+ /* Get first namespace ID */
+ fd = open("/proc/self/ns/user", O_RDONLY);
+ if (fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ if (ioctl(fd, NS_GET_ID, &parent_id) < 0) {
+ close(fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(fd);
+
+ /* Create nested user namespace */
+ userns_fd = get_userns_fd(0, 0, 1);
+ if (userns_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ if (setns(userns_fd, CLONE_NEWUSER) < 0) {
+ close(userns_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(userns_fd);
+
+ /* Get nested namespace ID */
+ fd = open("/proc/self/ns/user", O_RDONLY);
+ if (fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ if (ioctl(fd, NS_GET_ID, &child_id) < 0) {
+ close(fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(fd);
+
+ /* Send both IDs to parent */
+ write(pipefd[1], &parent_id, sizeof(parent_id));
+ write(pipefd[1], &child_id, sizeof(child_id));
+
+ /* Perform some credential changes in nested namespace */
+ setuid(0);
+ setgid(0);
+
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Parent process */
+ close(pipefd[1]);
+
+ /* Read both namespace IDs */
+ if (read(pipefd[0], &parent_userns_id, sizeof(parent_userns_id)) != sizeof(parent_userns_id)) {
+ close(pipefd[0]);
+ kill(pid, SIGKILL);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to get parent namespace ID");
+ }
+
+ if (read(pipefd[0], &child_userns_id, sizeof(child_userns_id)) != sizeof(child_userns_id)) {
+ close(pipefd[0]);
+ kill(pid, SIGKILL);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to get child namespace ID");
+ }
+ close(pipefd[0]);
+
+ TH_LOG("Parent userns: %llu, Child userns: %llu",
+ (unsigned long long)parent_userns_id,
+ (unsigned long long)child_userns_id);
+
+ /* Verify both namespaces are active */
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+ if (ret < 0) {
+ kill(pid, SIGKILL);
+ waitpid(pid, NULL, 0);
+ if (errno == ENOSYS)
+ SKIP(return, "listns() not supported");
+ ASSERT_GE(ret, 0);
+ }
+
+ for (i = 0; i < ret; i++) {
+ if (ns_ids[i] == parent_userns_id)
+ found_parent = true;
+ if (ns_ids[i] == child_userns_id)
+ found_child = true;
+ }
+
+ ASSERT_TRUE(found_parent);
+ ASSERT_TRUE(found_child);
+
+ /* Wait for child */
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ /* Verify both namespaces become inactive */
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+ ASSERT_GE(ret, 0);
+
+ found_parent = false;
+ found_child = false;
+ for (i = 0; i < ret; i++) {
+ if (ns_ids[i] == parent_userns_id)
+ found_parent = true;
+ if (ns_ids[i] == child_userns_id)
+ found_child = true;
+ }
+
+ ASSERT_FALSE(found_parent);
+ ASSERT_FALSE(found_child);
+ TH_LOG("Nested user namespace credential changes preserved active refs (no leak)");
+}
+
+/*
+ * Test rapid credential changes don't cause refcount imbalances.
+ * This stress-tests the switch_cred_namespaces() logic.
+ */
+TEST(rapid_cred_changes_no_leak)
+{
+ pid_t pid;
+ int status;
+ __u64 userns_id;
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = CLONE_NEWUSER,
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ __u64 ns_ids[256];
+ ssize_t ret;
+ int i;
+ bool found = false;
+ int pipefd[2];
+
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ int fd, userns_fd;
+ __u64 child_userns_id;
+ uid_t orig_uid = getuid();
+ int change_count;
+
+ close(pipefd[0]);
+
+ /* Create new user namespace with wider range of UIDs/GIDs */
+ userns_fd = get_userns_fd(0, orig_uid, 100);
+ if (userns_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ if (setns(userns_fd, CLONE_NEWUSER) < 0) {
+ close(userns_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(userns_fd);
+
+ /* Get user namespace ID */
+ fd = open("/proc/self/ns/user", O_RDONLY);
+ if (fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ if (ioctl(fd, NS_GET_ID, &child_userns_id) < 0) {
+ close(fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(fd);
+
+ write(pipefd[1], &child_userns_id, sizeof(child_userns_id));
+
+ /*
+ * Perform many rapid credential changes.
+ * Mix setuid, setgid, setreuid, setregid, setresuid, setresgid.
+ */
+ for (change_count = 0; change_count < 200; change_count++) {
+ switch (change_count % 6) {
+ case 0:
+ setuid(change_count % 50);
+ break;
+ case 1:
+ setgid(change_count % 50);
+ break;
+ case 2:
+ setreuid(change_count % 50, (change_count + 1) % 50);
+ break;
+ case 3:
+ setregid(change_count % 50, (change_count + 1) % 50);
+ break;
+ case 4:
+ setresuid(change_count % 50, (change_count + 1) % 50, (change_count + 2) % 50);
+ break;
+ case 5:
+ setresgid(change_count % 50, (change_count + 1) % 50, (change_count + 2) % 50);
+ break;
+ }
+ }
+
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Parent process */
+ close(pipefd[1]);
+
+ if (read(pipefd[0], &userns_id, sizeof(userns_id)) != sizeof(userns_id)) {
+ close(pipefd[0]);
+ kill(pid, SIGKILL);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to get namespace ID from child");
+ }
+ close(pipefd[0]);
+
+ TH_LOG("Testing with user namespace ID: %llu", (unsigned long long)userns_id);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ /* Verify namespace becomes inactive (no leaked active refs) */
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+ if (ret < 0) {
+ if (errno == ENOSYS)
+ SKIP(return, "listns() not supported");
+ ASSERT_GE(ret, 0);
+ }
+
+ for (i = 0; i < ret; i++) {
+ if (ns_ids[i] == userns_id) {
+ found = true;
+ break;
+ }
+ }
+
+ ASSERT_FALSE(found);
+ TH_LOG("200 rapid credential changes completed with no active ref leak");
+}
+
+/*
+ * Test setfsuid/setfsgid which change filesystem UID/GID.
+ * These also trigger credential changes but may have different code paths.
+ */
+TEST(setfsuid_preserves_active_refs)
+{
+ pid_t pid;
+ int status;
+ __u64 userns_id;
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = CLONE_NEWUSER,
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ __u64 ns_ids[256];
+ ssize_t ret;
+ int i;
+ bool found = false;
+ int pipefd[2];
+
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ int fd, userns_fd;
+ __u64 child_userns_id;
+ uid_t orig_uid = getuid();
+ int change_count;
+
+ close(pipefd[0]);
+
+ /* Create new user namespace */
+ userns_fd = get_userns_fd(0, orig_uid, 10);
+ if (userns_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ if (setns(userns_fd, CLONE_NEWUSER) < 0) {
+ close(userns_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(userns_fd);
+
+ /* Get user namespace ID */
+ fd = open("/proc/self/ns/user", O_RDONLY);
+ if (fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ if (ioctl(fd, NS_GET_ID, &child_userns_id) < 0) {
+ close(fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(fd);
+
+ write(pipefd[1], &child_userns_id, sizeof(child_userns_id));
+
+ /* Perform multiple setfsuid/setfsgid calls */
+ for (change_count = 0; change_count < 50; change_count++) {
+ setfsuid(change_count % 10);
+ setfsgid(change_count % 10);
+ }
+
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Parent process */
+ close(pipefd[1]);
+
+ if (read(pipefd[0], &userns_id, sizeof(userns_id)) != sizeof(userns_id)) {
+ close(pipefd[0]);
+ kill(pid, SIGKILL);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to get namespace ID from child");
+ }
+ close(pipefd[0]);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ /* Verify namespace becomes inactive */
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+ if (ret < 0) {
+ if (errno == ENOSYS)
+ SKIP(return, "listns() not supported");
+ ASSERT_GE(ret, 0);
+ }
+
+ for (i = 0; i < ret; i++) {
+ if (ns_ids[i] == userns_id) {
+ found = true;
+ break;
+ }
+ }
+
+ ASSERT_FALSE(found);
+ TH_LOG("setfsuid/setfsgid correctly preserved active references (no leak)");
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/namespaces/listns_efault_test.c b/tools/testing/selftests/namespaces/listns_efault_test.c
new file mode 100644
index 000000000000..c7ed4023d7a8
--- /dev/null
+++ b/tools/testing/selftests/namespaces/listns_efault_test.c
@@ -0,0 +1,530 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/nsfs.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/mount.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include "../kselftest_harness.h"
+#include "../filesystems/utils.h"
+#include "../pidfd/pidfd.h"
+#include "wrappers.h"
+
+/*
+ * Test listns() error handling with invalid buffer addresses.
+ *
+ * When the buffer pointer is invalid (e.g., crossing page boundaries
+ * into unmapped memory), listns() returns EINVAL.
+ *
+ * This test also creates mount namespaces that get destroyed during
+ * iteration, testing that namespace cleanup happens outside the RCU
+ * read lock.
+ */
+TEST(listns_partial_fault_with_ns_cleanup)
+{
+ void *map;
+ __u64 *ns_ids;
+ ssize_t ret;
+ long page_size;
+ pid_t pid, iter_pid;
+ int pidfds[5];
+ int sv[5][2];
+ int iter_pidfd;
+ int i, status;
+ char c;
+
+ page_size = sysconf(_SC_PAGESIZE);
+ ASSERT_GT(page_size, 0);
+
+ /*
+ * Map two pages:
+ * - First page: readable and writable
+ * - Second page: will be unmapped to trigger EFAULT
+ */
+ map = mmap(NULL, page_size * 2, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(map, MAP_FAILED);
+
+ /* Unmap the second page */
+ ret = munmap((char *)map + page_size, page_size);
+ ASSERT_EQ(ret, 0);
+
+ /*
+ * Position the buffer pointer so there's room for exactly one u64
+ * before the page boundary. The second u64 would fall into the
+ * unmapped page.
+ */
+ ns_ids = ((__u64 *)((char *)map + page_size)) - 1;
+
+ /*
+ * Create a separate process to run listns() in a loop concurrently
+ * with namespace creation and destruction.
+ */
+ iter_pid = create_child(&iter_pidfd, 0);
+ ASSERT_NE(iter_pid, -1);
+
+ if (iter_pid == 0) {
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = 0, /* All types */
+ .spare2 = 0,
+ .user_ns_id = 0, /* Global listing */
+ };
+ int iter_ret;
+
+ /*
+ * Loop calling listns() until killed.
+ * The kernel should:
+ * 1. Successfully write the first namespace ID (within valid page)
+ * 2. Fail with EFAULT when trying to write the second ID (unmapped page)
+ * 3. Handle concurrent namespace destruction without deadlock
+ */
+ while (1) {
+ iter_ret = sys_listns(&req, ns_ids, 2, 0);
+
+ if (iter_ret == -1 && errno == ENOSYS)
+ _exit(PIDFD_SKIP);
+ }
+ }
+
+ /* Small delay to let iterator start looping */
+ usleep(50000);
+
+ /*
+ * Create several child processes, each in its own mount namespace.
+ * These will be destroyed while the iterator is running listns().
+ */
+ for (i = 0; i < 5; i++) {
+ /* Create socketpair for synchronization */
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv[i]), 0);
+
+ pid = create_child(&pidfds[i], CLONE_NEWNS);
+ ASSERT_NE(pid, -1);
+
+ if (pid == 0) {
+ close(sv[i][0]); /* Close parent end */
+
+ if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0))
+ _exit(1);
+
+ /* Child: create a couple of tmpfs mounts */
+ if (mkdir("/tmp/test_mnt1", 0755) == -1 && errno != EEXIST)
+ _exit(1);
+ if (mkdir("/tmp/test_mnt2", 0755) == -1 && errno != EEXIST)
+ _exit(1);
+
+ if (mount("tmpfs", "/tmp/test_mnt1", "tmpfs", 0, NULL) == -1)
+ _exit(1);
+ if (mount("tmpfs", "/tmp/test_mnt2", "tmpfs", 0, NULL) == -1)
+ _exit(1);
+
+ /* Signal parent that setup is complete */
+ if (write_nointr(sv[i][1], "R", 1) != 1)
+ _exit(1);
+
+ /* Wait for parent to signal us to exit */
+ if (read_nointr(sv[i][1], &c, 1) != 1)
+ _exit(1);
+
+ close(sv[i][1]);
+ _exit(0);
+ }
+
+ close(sv[i][1]); /* Close child end */
+ }
+
+ /* Wait for all children to finish setup */
+ for (i = 0; i < 5; i++) {
+ ret = read_nointr(sv[i][0], &c, 1);
+ ASSERT_EQ(ret, 1);
+ ASSERT_EQ(c, 'R');
+ }
+
+ /*
+ * Signal children to exit. This will destroy their mount namespaces
+ * while listns() is iterating the namespace tree.
+ * This tests that cleanup happens outside the RCU read lock.
+ */
+ for (i = 0; i < 5; i++)
+ write_nointr(sv[i][0], "X", 1);
+
+ /* Wait for all mount namespace children to exit and cleanup */
+ for (i = 0; i < 5; i++) {
+ waitpid(-1, NULL, 0);
+ close(sv[i][0]);
+ close(pidfds[i]);
+ }
+
+ /* Kill iterator and wait for it */
+ sys_pidfd_send_signal(iter_pidfd, SIGKILL, NULL, 0);
+ ret = waitpid(iter_pid, &status, 0);
+ ASSERT_EQ(ret, iter_pid);
+ close(iter_pidfd);
+
+ /* Should have been killed */
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_EQ(WTERMSIG(status), SIGKILL);
+
+ /* Clean up */
+ munmap(map, page_size);
+}
+
+/*
+ * Test listns() error handling when the entire buffer is invalid.
+ * This is a sanity check that basic invalid pointer detection works.
+ */
+TEST(listns_complete_fault)
+{
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = 0,
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ __u64 *ns_ids;
+ ssize_t ret;
+
+ /* Use a clearly invalid pointer */
+ ns_ids = (__u64 *)0xdeadbeef;
+
+ ret = sys_listns(&req, ns_ids, 10, 0);
+
+ if (ret == -1 && errno == ENOSYS)
+ SKIP(return, "listns() not supported");
+
+ /* Should fail with EFAULT */
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, EFAULT);
+}
+
+/*
+ * Test listns() error handling when the buffer is NULL.
+ */
+TEST(listns_null_buffer)
+{
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = 0,
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ ssize_t ret;
+
+ /* NULL buffer with non-zero count should fail */
+ ret = sys_listns(&req, NULL, 10, 0);
+
+ if (ret == -1 && errno == ENOSYS)
+ SKIP(return, "listns() not supported");
+
+ /* Should fail with EFAULT */
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, EFAULT);
+}
+
+/*
+ * Test listns() with a buffer that becomes invalid mid-iteration
+ * (after several successful writes), combined with mount namespace
+ * destruction to test RCU cleanup logic.
+ */
+TEST(listns_late_fault_with_ns_cleanup)
+{
+ void *map;
+ __u64 *ns_ids;
+ ssize_t ret;
+ long page_size;
+ pid_t pid, iter_pid;
+ int pidfds[10];
+ int sv[10][2];
+ int iter_pidfd;
+ int i, status;
+ char c;
+
+ page_size = sysconf(_SC_PAGESIZE);
+ ASSERT_GT(page_size, 0);
+
+ /* Map two pages */
+ map = mmap(NULL, page_size * 2, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(map, MAP_FAILED);
+
+ /* Unmap the second page */
+ ret = munmap((char *)map + page_size, page_size);
+ ASSERT_EQ(ret, 0);
+
+ /*
+ * Position buffer so we can write several u64s successfully
+ * before hitting the page boundary.
+ */
+ ns_ids = ((__u64 *)((char *)map + page_size)) - 5;
+
+ /*
+ * Create a separate process to run listns() concurrently.
+ */
+ iter_pid = create_child(&iter_pidfd, 0);
+ ASSERT_NE(iter_pid, -1);
+
+ if (iter_pid == 0) {
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = 0,
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ int iter_ret;
+
+ /*
+ * Loop calling listns() until killed.
+ * Request 10 namespace IDs while namespaces are being destroyed.
+ * This tests:
+ * 1. EFAULT handling when buffer becomes invalid
+ * 2. Namespace cleanup outside RCU read lock during iteration
+ */
+ while (1) {
+ iter_ret = sys_listns(&req, ns_ids, 10, 0);
+
+ if (iter_ret == -1 && errno == ENOSYS)
+ _exit(PIDFD_SKIP);
+ }
+ }
+
+ /* Small delay to let iterator start looping */
+ usleep(50000);
+
+ /*
+ * Create more children with mount namespaces to increase the
+ * likelihood that namespace cleanup happens during iteration.
+ */
+ for (i = 0; i < 10; i++) {
+ /* Create socketpair for synchronization */
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv[i]), 0);
+
+ pid = create_child(&pidfds[i], CLONE_NEWNS);
+ ASSERT_NE(pid, -1);
+
+ if (pid == 0) {
+ close(sv[i][0]); /* Close parent end */
+
+ if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0))
+ _exit(1);
+
+ /* Child: create tmpfs mounts */
+ if (mkdir("/tmp/test_mnt1", 0755) == -1 && errno != EEXIST)
+ _exit(1);
+ if (mkdir("/tmp/test_mnt2", 0755) == -1 && errno != EEXIST)
+ _exit(1);
+
+ if (mount("tmpfs", "/tmp/test_mnt1", "tmpfs", 0, NULL) == -1)
+ _exit(1);
+ if (mount("tmpfs", "/tmp/test_mnt2", "tmpfs", 0, NULL) == -1)
+ _exit(1);
+
+ /* Signal parent that setup is complete */
+ if (write_nointr(sv[i][1], "R", 1) != 1)
+ _exit(1);
+
+ /* Wait for parent to signal us to exit */
+ if (read_nointr(sv[i][1], &c, 1) != 1)
+ _exit(1);
+
+ close(sv[i][1]);
+ _exit(0);
+ }
+
+ close(sv[i][1]); /* Close child end */
+ }
+
+ /* Wait for all children to finish setup */
+ for (i = 0; i < 10; i++) {
+ ret = read_nointr(sv[i][0], &c, 1);
+ ASSERT_EQ(ret, 1);
+ ASSERT_EQ(c, 'R');
+ }
+
+ /* Kill half the children */
+ for (i = 0; i < 5; i++)
+ write_nointr(sv[i][0], "X", 1);
+
+ /* Small delay to let some exit */
+ usleep(10000);
+
+ /* Kill remaining children */
+ for (i = 5; i < 10; i++)
+ write_nointr(sv[i][0], "X", 1);
+
+ /* Wait for all children and cleanup */
+ for (i = 0; i < 10; i++) {
+ waitpid(-1, NULL, 0);
+ close(sv[i][0]);
+ close(pidfds[i]);
+ }
+
+ /* Kill iterator and wait for it */
+ sys_pidfd_send_signal(iter_pidfd, SIGKILL, NULL, 0);
+ ret = waitpid(iter_pid, &status, 0);
+ ASSERT_EQ(ret, iter_pid);
+ close(iter_pidfd);
+
+ /* Should have been killed */
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_EQ(WTERMSIG(status), SIGKILL);
+
+ /* Clean up */
+ munmap(map, page_size);
+}
+
+/*
+ * Test specifically focused on mount namespace cleanup during EFAULT.
+ * Filter for mount namespaces only.
+ */
+TEST(listns_mnt_ns_cleanup_on_fault)
+{
+ void *map;
+ __u64 *ns_ids;
+ ssize_t ret;
+ long page_size;
+ pid_t pid, iter_pid;
+ int pidfds[8];
+ int sv[8][2];
+ int iter_pidfd;
+ int i, status;
+ char c;
+
+ page_size = sysconf(_SC_PAGESIZE);
+ ASSERT_GT(page_size, 0);
+
+ /* Set up partial fault buffer */
+ map = mmap(NULL, page_size * 2, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(map, MAP_FAILED);
+
+ ret = munmap((char *)map + page_size, page_size);
+ ASSERT_EQ(ret, 0);
+
+ /* Position for 3 successful writes, then fault */
+ ns_ids = ((__u64 *)((char *)map + page_size)) - 3;
+
+ /*
+ * Create a separate process to run listns() concurrently.
+ */
+ iter_pid = create_child(&iter_pidfd, 0);
+ ASSERT_NE(iter_pid, -1);
+
+ if (iter_pid == 0) {
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = CLONE_NEWNS, /* Only mount namespaces */
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ int iter_ret;
+
+ /*
+ * Loop calling listns() until killed.
+ * Call listns() to race with namespace destruction.
+ */
+ while (1) {
+ iter_ret = sys_listns(&req, ns_ids, 10, 0);
+
+ if (iter_ret == -1 && errno == ENOSYS)
+ _exit(PIDFD_SKIP);
+ }
+ }
+
+ /* Small delay to let iterator start looping */
+ usleep(50000);
+
+ /* Create children with mount namespaces */
+ for (i = 0; i < 8; i++) {
+ /* Create socketpair for synchronization */
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv[i]), 0);
+
+ pid = create_child(&pidfds[i], CLONE_NEWNS);
+ ASSERT_NE(pid, -1);
+
+ if (pid == 0) {
+ close(sv[i][0]); /* Close parent end */
+
+ if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0))
+ _exit(1);
+
+ /* Do some mount operations to make cleanup more interesting */
+ if (mkdir("/tmp/test_mnt1", 0755) == -1 && errno != EEXIST)
+ _exit(1);
+ if (mkdir("/tmp/test_mnt2", 0755) == -1 && errno != EEXIST)
+ _exit(1);
+
+ if (mount("tmpfs", "/tmp/test_mnt1", "tmpfs", 0, NULL) == -1)
+ _exit(1);
+ if (mount("tmpfs", "/tmp/test_mnt2", "tmpfs", 0, NULL) == -1)
+ _exit(1);
+
+ /* Signal parent that setup is complete */
+ if (write_nointr(sv[i][1], "R", 1) != 1)
+ _exit(1);
+
+ /* Wait for parent to signal us to exit */
+ if (read_nointr(sv[i][1], &c, 1) != 1)
+ _exit(1);
+
+ close(sv[i][1]);
+ _exit(0);
+ }
+
+ close(sv[i][1]); /* Close child end */
+ }
+
+ /* Wait for all children to finish setup */
+ for (i = 0; i < 8; i++) {
+ ret = read_nointr(sv[i][0], &c, 1);
+ ASSERT_EQ(ret, 1);
+ ASSERT_EQ(c, 'R');
+ }
+
+ /* Kill children to trigger namespace destruction during iteration */
+ for (i = 0; i < 8; i++)
+ write_nointr(sv[i][0], "X", 1);
+
+ /* Wait for children and cleanup */
+ for (i = 0; i < 8; i++) {
+ waitpid(-1, NULL, 0);
+ close(sv[i][0]);
+ close(pidfds[i]);
+ }
+
+ /* Kill iterator and wait for it */
+ sys_pidfd_send_signal(iter_pidfd, SIGKILL, NULL, 0);
+ ret = waitpid(iter_pid, &status, 0);
+ ASSERT_EQ(ret, iter_pid);
+ close(iter_pidfd);
+
+ /* Should have been killed */
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_EQ(WTERMSIG(status), SIGKILL);
+
+ munmap(map, page_size);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/namespaces/listns_pagination_bug.c b/tools/testing/selftests/namespaces/listns_pagination_bug.c
new file mode 100644
index 000000000000..da7d33f96397
--- /dev/null
+++ b/tools/testing/selftests/namespaces/listns_pagination_bug.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include "../kselftest_harness.h"
+#include "../filesystems/utils.h"
+#include "wrappers.h"
+
+/*
+ * Minimal test case to reproduce KASAN out-of-bounds in listns pagination.
+ *
+ * The bug occurs when:
+ * 1. Filtering by a specific namespace type (e.g., CLONE_NEWUSER)
+ * 2. Using pagination (req.ns_id != 0)
+ * 3. The lookup_ns_id_at() call in do_listns() passes ns_type=0 instead of
+ * the filtered type, causing it to search the unified tree and potentially
+ * return a namespace of the wrong type.
+ */
+TEST(pagination_with_type_filter)
+{
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = CLONE_NEWUSER, /* Filter by user namespace */
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ pid_t pids[10];
+ int num_children = 10;
+ int i;
+ int sv[2];
+ __u64 first_batch[3];
+ ssize_t ret;
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv), 0);
+
+ /* Create children with user namespaces */
+ for (i = 0; i < num_children; i++) {
+ pids[i] = fork();
+ ASSERT_GE(pids[i], 0);
+
+ if (pids[i] == 0) {
+ char c;
+ close(sv[0]);
+
+ if (setup_userns() < 0) {
+ close(sv[1]);
+ exit(1);
+ }
+
+ /* Signal parent we're ready */
+ if (write(sv[1], &c, 1) != 1) {
+ close(sv[1]);
+ exit(1);
+ }
+
+ /* Wait for parent signal to exit */
+ if (read(sv[1], &c, 1) != 1) {
+ close(sv[1]);
+ exit(1);
+ }
+
+ close(sv[1]);
+ exit(0);
+ }
+ }
+
+ close(sv[1]);
+
+ /* Wait for all children to signal ready */
+ for (i = 0; i < num_children; i++) {
+ char c;
+ if (read(sv[0], &c, 1) != 1) {
+ close(sv[0]);
+ for (int j = 0; j < num_children; j++)
+ kill(pids[j], SIGKILL);
+ for (int j = 0; j < num_children; j++)
+ waitpid(pids[j], NULL, 0);
+ ASSERT_TRUE(false);
+ }
+ }
+
+ /* First batch - this should work */
+ ret = sys_listns(&req, first_batch, 3, 0);
+ if (ret < 0) {
+ if (errno == ENOSYS) {
+ close(sv[0]);
+ for (i = 0; i < num_children; i++)
+ kill(pids[i], SIGKILL);
+ for (i = 0; i < num_children; i++)
+ waitpid(pids[i], NULL, 0);
+ SKIP(return, "listns() not supported");
+ }
+ ASSERT_GE(ret, 0);
+ }
+
+ TH_LOG("First batch returned %zd entries", ret);
+
+ if (ret == 3) {
+ __u64 second_batch[3];
+
+ /* Second batch - pagination triggers the bug */
+ req.ns_id = first_batch[2]; /* Continue from last ID */
+ ret = sys_listns(&req, second_batch, 3, 0);
+
+ TH_LOG("Second batch returned %zd entries", ret);
+ ASSERT_GE(ret, 0);
+ }
+
+ /* Signal all children to exit */
+ for (i = 0; i < num_children; i++) {
+ char c = 'X';
+ if (write(sv[0], &c, 1) != 1) {
+ close(sv[0]);
+ for (int j = i; j < num_children; j++)
+ kill(pids[j], SIGKILL);
+ for (int j = 0; j < num_children; j++)
+ waitpid(pids[j], NULL, 0);
+ ASSERT_TRUE(false);
+ }
+ }
+
+ close(sv[0]);
+
+ /* Cleanup */
+ for (i = 0; i < num_children; i++) {
+ int status;
+ waitpid(pids[i], &status, 0);
+ }
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/namespaces/listns_permissions_test.c b/tools/testing/selftests/namespaces/listns_permissions_test.c
new file mode 100644
index 000000000000..82d818751a5f
--- /dev/null
+++ b/tools/testing/selftests/namespaces/listns_permissions_test.c
@@ -0,0 +1,759 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/nsfs.h>
+#include <sys/capability.h>
+#include <sys/ioctl.h>
+#include <sys/prctl.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include "../kselftest_harness.h"
+#include "../filesystems/utils.h"
+#include "wrappers.h"
+
+/*
+ * Test that unprivileged users can only see namespaces they're currently in.
+ * Create a namespace, drop privileges, verify we can only see our own namespaces.
+ */
+TEST(listns_unprivileged_current_only)
+{
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = CLONE_NEWNET,
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ __u64 ns_ids[100];
+ ssize_t ret;
+ int pipefd[2];
+ pid_t pid;
+ int status;
+ bool found_ours;
+ int unexpected_count;
+
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ int fd;
+ __u64 our_netns_id;
+ bool found_ours;
+ int unexpected_count;
+
+ close(pipefd[0]);
+
+ /* Create user namespace to be unprivileged */
+ if (setup_userns() < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ /* Create a network namespace */
+ if (unshare(CLONE_NEWNET) < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ /* Get our network namespace ID */
+ fd = open("/proc/self/ns/net", O_RDONLY);
+ if (fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ if (ioctl(fd, NS_GET_ID, &our_netns_id) < 0) {
+ close(fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(fd);
+
+ /* Now we're unprivileged - list all network namespaces */
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+ if (ret < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ /* We should only see our own network namespace */
+ found_ours = false;
+ unexpected_count = 0;
+
+ for (ssize_t i = 0; i < ret; i++) {
+ if (ns_ids[i] == our_netns_id) {
+ found_ours = true;
+ } else {
+ /* This is either init_net (which we can see) or unexpected */
+ unexpected_count++;
+ }
+ }
+
+ /* Send results to parent */
+ write(pipefd[1], &found_ours, sizeof(found_ours));
+ write(pipefd[1], &unexpected_count, sizeof(unexpected_count));
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Parent */
+ close(pipefd[1]);
+
+ found_ours = false;
+ unexpected_count = 0;
+ read(pipefd[0], &found_ours, sizeof(found_ours));
+ read(pipefd[0], &unexpected_count, sizeof(unexpected_count));
+ close(pipefd[0]);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ /* Child should have seen its own namespace */
+ ASSERT_TRUE(found_ours);
+
+ TH_LOG("Unprivileged child saw its own namespace, plus %d others (likely init_net)",
+ unexpected_count);
+}
+
+/*
+ * Test that users with CAP_SYS_ADMIN in a user namespace can see
+ * all namespaces owned by that user namespace.
+ */
+TEST(listns_cap_sys_admin_in_userns)
+{
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = 0, /* All types */
+ .spare2 = 0,
+ .user_ns_id = 0, /* Will be set to our created user namespace */
+ };
+ __u64 ns_ids[100];
+ int pipefd[2];
+ pid_t pid;
+ int status;
+ bool success;
+ ssize_t count;
+
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ int fd;
+ __u64 userns_id;
+ ssize_t ret;
+ int min_expected;
+ bool success;
+
+ close(pipefd[0]);
+
+ /* Create user namespace - we'll have CAP_SYS_ADMIN in it */
+ if (setup_userns() < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ /* Get the user namespace ID */
+ fd = open("/proc/self/ns/user", O_RDONLY);
+ if (fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ if (ioctl(fd, NS_GET_ID, &userns_id) < 0) {
+ close(fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(fd);
+
+ /* Create several namespaces owned by this user namespace */
+ unshare(CLONE_NEWNET);
+ unshare(CLONE_NEWUTS);
+ unshare(CLONE_NEWIPC);
+
+ /* List namespaces owned by our user namespace */
+ req.user_ns_id = userns_id;
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+ if (ret < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ /*
+ * We have CAP_SYS_ADMIN in this user namespace,
+ * so we should see all namespaces owned by it.
+ * That includes: net, uts, ipc, and the user namespace itself.
+ */
+ min_expected = 4;
+ success = (ret >= min_expected);
+
+ write(pipefd[1], &success, sizeof(success));
+ write(pipefd[1], &ret, sizeof(ret));
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Parent */
+ close(pipefd[1]);
+
+ success = false;
+ count = 0;
+ read(pipefd[0], &success, sizeof(success));
+ read(pipefd[0], &count, sizeof(count));
+ close(pipefd[0]);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ ASSERT_TRUE(success);
+ TH_LOG("User with CAP_SYS_ADMIN saw %zd namespaces owned by their user namespace",
+ count);
+}
+
+/*
+ * Test that users cannot see namespaces from unrelated user namespaces.
+ * Create two sibling user namespaces, verify they can't see each other's
+ * owned namespaces.
+ */
+TEST(listns_cannot_see_sibling_userns_namespaces)
+{
+ int pipefd[2];
+ pid_t pid1, pid2;
+ int status;
+ __u64 netns_a_id;
+ int pipefd2[2];
+ bool found_sibling_netns;
+
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ /* Fork first child - creates user namespace A */
+ pid1 = fork();
+ ASSERT_GE(pid1, 0);
+
+ if (pid1 == 0) {
+ int fd;
+ __u64 netns_a_id;
+ char buf;
+
+ close(pipefd[0]);
+
+ /* Create user namespace A */
+ if (setup_userns() < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ /* Create network namespace owned by user namespace A */
+ if (unshare(CLONE_NEWNET) < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ /* Get network namespace ID */
+ fd = open("/proc/self/ns/net", O_RDONLY);
+ if (fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ if (ioctl(fd, NS_GET_ID, &netns_a_id) < 0) {
+ close(fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(fd);
+
+ /* Send namespace ID to parent */
+ write(pipefd[1], &netns_a_id, sizeof(netns_a_id));
+
+ /* Keep alive for sibling to check */
+ read(pipefd[1], &buf, 1);
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Parent reads namespace A ID */
+ close(pipefd[1]);
+ netns_a_id = 0;
+ read(pipefd[0], &netns_a_id, sizeof(netns_a_id));
+
+ TH_LOG("User namespace A created network namespace with ID %llu",
+ (unsigned long long)netns_a_id);
+
+ /* Fork second child - creates user namespace B */
+ ASSERT_EQ(pipe(pipefd2), 0);
+
+ pid2 = fork();
+ ASSERT_GE(pid2, 0);
+
+ if (pid2 == 0) {
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = CLONE_NEWNET,
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ __u64 ns_ids[100];
+ ssize_t ret;
+ bool found_sibling_netns;
+
+ close(pipefd[0]);
+ close(pipefd2[0]);
+
+ /* Create user namespace B (sibling to A) */
+ if (setup_userns() < 0) {
+ close(pipefd2[1]);
+ exit(1);
+ }
+
+ /* Try to list all network namespaces */
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+
+ found_sibling_netns = false;
+ if (ret > 0) {
+ for (ssize_t i = 0; i < ret; i++) {
+ if (ns_ids[i] == netns_a_id) {
+ found_sibling_netns = true;
+ break;
+ }
+ }
+ }
+
+ /* We should NOT see the sibling's network namespace */
+ write(pipefd2[1], &found_sibling_netns, sizeof(found_sibling_netns));
+ close(pipefd2[1]);
+ exit(0);
+ }
+
+ /* Parent reads result from second child */
+ close(pipefd2[1]);
+ found_sibling_netns = false;
+ read(pipefd2[0], &found_sibling_netns, sizeof(found_sibling_netns));
+ close(pipefd2[0]);
+
+ /* Signal first child to exit */
+ close(pipefd[0]);
+
+ /* Wait for both children */
+ waitpid(pid2, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+
+ waitpid(pid1, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+
+ /* Second child should NOT have seen first child's namespace */
+ ASSERT_FALSE(found_sibling_netns);
+ TH_LOG("User namespace B correctly could not see sibling namespace A's network namespace");
+}
+
+/*
+ * Test permission checking with LISTNS_CURRENT_USER.
+ * Verify that listing with LISTNS_CURRENT_USER respects permissions.
+ */
+TEST(listns_current_user_permissions)
+{
+ int pipefd[2];
+ pid_t pid;
+ int status;
+ bool success;
+ ssize_t count;
+
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = 0,
+ .spare2 = 0,
+ .user_ns_id = LISTNS_CURRENT_USER,
+ };
+ __u64 ns_ids[100];
+ ssize_t ret;
+ bool success;
+
+ close(pipefd[0]);
+
+ /* Create user namespace */
+ if (setup_userns() < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ /* Create some namespaces owned by this user namespace */
+ if (unshare(CLONE_NEWNET) < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ if (unshare(CLONE_NEWUTS) < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ /* List with LISTNS_CURRENT_USER - should see our owned namespaces */
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+
+ success = (ret >= 3); /* At least user, net, uts */
+ write(pipefd[1], &success, sizeof(success));
+ write(pipefd[1], &ret, sizeof(ret));
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Parent */
+ close(pipefd[1]);
+
+ success = false;
+ count = 0;
+ read(pipefd[0], &success, sizeof(success));
+ read(pipefd[0], &count, sizeof(count));
+ close(pipefd[0]);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ ASSERT_TRUE(success);
+ TH_LOG("LISTNS_CURRENT_USER returned %zd namespaces", count);
+}
+
+/*
+ * Test that CAP_SYS_ADMIN in parent user namespace allows seeing
+ * child user namespace's owned namespaces.
+ */
+TEST(listns_parent_userns_cap_sys_admin)
+{
+ int pipefd[2];
+ pid_t pid;
+ int status;
+ bool found_child_userns;
+ ssize_t count;
+
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ int fd;
+ __u64 parent_userns_id;
+ __u64 child_userns_id;
+ struct ns_id_req req;
+ __u64 ns_ids[100];
+ ssize_t ret;
+ bool found_child_userns;
+
+ close(pipefd[0]);
+
+ /* Create parent user namespace - we have CAP_SYS_ADMIN in it */
+ if (setup_userns() < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ /* Get parent user namespace ID */
+ fd = open("/proc/self/ns/user", O_RDONLY);
+ if (fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ if (ioctl(fd, NS_GET_ID, &parent_userns_id) < 0) {
+ close(fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(fd);
+
+ /* Create child user namespace */
+ if (setup_userns() < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ /* Get child user namespace ID */
+ fd = open("/proc/self/ns/user", O_RDONLY);
+ if (fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ if (ioctl(fd, NS_GET_ID, &child_userns_id) < 0) {
+ close(fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(fd);
+
+ /* Create namespaces owned by child user namespace */
+ if (unshare(CLONE_NEWNET) < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ /* List namespaces owned by parent user namespace */
+ req.size = sizeof(req);
+ req.spare = 0;
+ req.ns_id = 0;
+ req.ns_type = 0;
+ req.spare2 = 0;
+ req.user_ns_id = parent_userns_id;
+
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+
+ /* Should see child user namespace in the list */
+ found_child_userns = false;
+ if (ret > 0) {
+ for (ssize_t i = 0; i < ret; i++) {
+ if (ns_ids[i] == child_userns_id) {
+ found_child_userns = true;
+ break;
+ }
+ }
+ }
+
+ write(pipefd[1], &found_child_userns, sizeof(found_child_userns));
+ write(pipefd[1], &ret, sizeof(ret));
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Parent */
+ close(pipefd[1]);
+
+ found_child_userns = false;
+ count = 0;
+ read(pipefd[0], &found_child_userns, sizeof(found_child_userns));
+ read(pipefd[0], &count, sizeof(count));
+ close(pipefd[0]);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ ASSERT_TRUE(found_child_userns);
+ TH_LOG("Process with CAP_SYS_ADMIN in parent user namespace saw child user namespace (total: %zd)",
+ count);
+}
+
+/*
+ * Test that we can see user namespaces we have CAP_SYS_ADMIN inside of.
+ * This is different from seeing namespaces owned by a user namespace.
+ */
+TEST(listns_cap_sys_admin_inside_userns)
+{
+ int pipefd[2];
+ pid_t pid;
+ int status;
+ bool found_ours;
+
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ int fd;
+ __u64 our_userns_id;
+ struct ns_id_req req;
+ __u64 ns_ids[100];
+ ssize_t ret;
+ bool found_ours;
+
+ close(pipefd[0]);
+
+ /* Create user namespace - we have CAP_SYS_ADMIN inside it */
+ if (setup_userns() < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ /* Get our user namespace ID */
+ fd = open("/proc/self/ns/user", O_RDONLY);
+ if (fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ if (ioctl(fd, NS_GET_ID, &our_userns_id) < 0) {
+ close(fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(fd);
+
+ /* List all user namespaces globally */
+ req.size = sizeof(req);
+ req.spare = 0;
+ req.ns_id = 0;
+ req.ns_type = CLONE_NEWUSER;
+ req.spare2 = 0;
+ req.user_ns_id = 0;
+
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+
+ /* We should be able to see our own user namespace */
+ found_ours = false;
+ if (ret > 0) {
+ for (ssize_t i = 0; i < ret; i++) {
+ if (ns_ids[i] == our_userns_id) {
+ found_ours = true;
+ break;
+ }
+ }
+ }
+
+ write(pipefd[1], &found_ours, sizeof(found_ours));
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Parent */
+ close(pipefd[1]);
+
+ found_ours = false;
+ read(pipefd[0], &found_ours, sizeof(found_ours));
+ close(pipefd[0]);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ ASSERT_TRUE(found_ours);
+ TH_LOG("Process can see user namespace it has CAP_SYS_ADMIN inside of");
+}
+
+/*
+ * Test that dropping CAP_SYS_ADMIN restricts what we can see.
+ */
+TEST(listns_drop_cap_sys_admin)
+{
+ cap_t caps;
+ cap_value_t cap_list[1] = { CAP_SYS_ADMIN };
+
+ /* This test needs to start with CAP_SYS_ADMIN */
+ caps = cap_get_proc();
+ if (!caps) {
+ SKIP(return, "Cannot get capabilities");
+ }
+
+ cap_flag_value_t cap_val;
+ if (cap_get_flag(caps, CAP_SYS_ADMIN, CAP_EFFECTIVE, &cap_val) < 0) {
+ cap_free(caps);
+ SKIP(return, "Cannot check CAP_SYS_ADMIN");
+ }
+
+ if (cap_val != CAP_SET) {
+ cap_free(caps);
+ SKIP(return, "Test needs CAP_SYS_ADMIN to start");
+ }
+ cap_free(caps);
+
+ int pipefd[2];
+ pid_t pid;
+ int status;
+ bool correct;
+ ssize_t count_before, count_after;
+
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = CLONE_NEWNET,
+ .spare2 = 0,
+ .user_ns_id = LISTNS_CURRENT_USER,
+ };
+ __u64 ns_ids_before[100];
+ ssize_t count_before;
+ __u64 ns_ids_after[100];
+ ssize_t count_after;
+ bool correct;
+
+ close(pipefd[0]);
+
+ /* Create user namespace */
+ if (setup_userns() < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ /* Count namespaces with CAP_SYS_ADMIN */
+ count_before = sys_listns(&req, ns_ids_before, ARRAY_SIZE(ns_ids_before), 0);
+
+ /* Drop CAP_SYS_ADMIN */
+ caps = cap_get_proc();
+ if (caps) {
+ cap_set_flag(caps, CAP_EFFECTIVE, 1, cap_list, CAP_CLEAR);
+ cap_set_flag(caps, CAP_PERMITTED, 1, cap_list, CAP_CLEAR);
+ cap_set_proc(caps);
+ cap_free(caps);
+ }
+
+ /* Ensure we can't regain the capability */
+ prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+
+ /* Count namespaces without CAP_SYS_ADMIN */
+ count_after = sys_listns(&req, ns_ids_after, ARRAY_SIZE(ns_ids_after), 0);
+
+ /* Without CAP_SYS_ADMIN, we should see same or fewer namespaces */
+ correct = (count_after <= count_before);
+
+ write(pipefd[1], &correct, sizeof(correct));
+ write(pipefd[1], &count_before, sizeof(count_before));
+ write(pipefd[1], &count_after, sizeof(count_after));
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Parent */
+ close(pipefd[1]);
+
+ correct = false;
+ count_before = 0;
+ count_after = 0;
+ read(pipefd[0], &correct, sizeof(correct));
+ read(pipefd[0], &count_before, sizeof(count_before));
+ read(pipefd[0], &count_after, sizeof(count_after));
+ close(pipefd[0]);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ ASSERT_TRUE(correct);
+ TH_LOG("With CAP_SYS_ADMIN: %zd namespaces, without: %zd namespaces",
+ count_before, count_after);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/namespaces/listns_test.c b/tools/testing/selftests/namespaces/listns_test.c
new file mode 100644
index 000000000000..8a95789d6a87
--- /dev/null
+++ b/tools/testing/selftests/namespaces/listns_test.c
@@ -0,0 +1,679 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/nsfs.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include "../kselftest_harness.h"
+#include "../filesystems/utils.h"
+#include "wrappers.h"
+
+/*
+ * Test basic listns() functionality with the unified namespace tree.
+ * List all active namespaces globally.
+ */
+TEST(listns_basic_unified)
+{
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = 0, /* All types */
+ .spare2 = 0,
+ .user_ns_id = 0, /* Global listing */
+ };
+ __u64 ns_ids[100];
+ ssize_t ret;
+
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+ if (ret < 0) {
+ if (errno == ENOSYS)
+ SKIP(return, "listns() not supported");
+ TH_LOG("listns failed: %s (errno=%d)", strerror(errno), errno);
+ ASSERT_TRUE(false);
+ }
+
+ /* Should find at least the initial namespaces */
+ ASSERT_GT(ret, 0);
+ TH_LOG("Found %zd active namespaces", ret);
+
+ /* Verify all returned IDs are non-zero */
+ for (ssize_t i = 0; i < ret; i++) {
+ ASSERT_NE(ns_ids[i], 0);
+ TH_LOG(" [%zd] ns_id: %llu", i, (unsigned long long)ns_ids[i]);
+ }
+}
+
+/*
+ * Test listns() with type filtering.
+ * List only network namespaces.
+ */
+TEST(listns_filter_by_type)
+{
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = CLONE_NEWNET, /* Only network namespaces */
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ __u64 ns_ids[100];
+ ssize_t ret;
+
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+ if (ret < 0) {
+ if (errno == ENOSYS)
+ SKIP(return, "listns() not supported");
+ TH_LOG("listns failed: %s (errno=%d)", strerror(errno), errno);
+ ASSERT_TRUE(false);
+ }
+ ASSERT_GE(ret, 0);
+
+ /* Should find at least init_net */
+ ASSERT_GT(ret, 0);
+ TH_LOG("Found %zd active network namespaces", ret);
+
+ /* Verify we can open each namespace and it's actually a network namespace */
+ for (ssize_t i = 0; i < ret && i < 5; i++) {
+ struct nsfs_file_handle nsfh = {
+ .ns_id = ns_ids[i],
+ .ns_type = CLONE_NEWNET,
+ .ns_inum = 0,
+ };
+ struct file_handle *fh;
+ int fd;
+
+ fh = (struct file_handle *)malloc(sizeof(*fh) + sizeof(nsfh));
+ ASSERT_NE(fh, NULL);
+ fh->handle_bytes = sizeof(nsfh);
+ fh->handle_type = 0;
+ memcpy(fh->f_handle, &nsfh, sizeof(nsfh));
+
+ fd = open_by_handle_at(-10003, fh, O_RDONLY);
+ free(fh);
+
+ if (fd >= 0) {
+ int ns_type;
+ /* Verify it's a network namespace via ioctl */
+ ns_type = ioctl(fd, NS_GET_NSTYPE);
+ if (ns_type >= 0) {
+ ASSERT_EQ(ns_type, CLONE_NEWNET);
+ }
+ close(fd);
+ }
+ }
+}
+
+/*
+ * Test listns() pagination.
+ * List namespaces in batches.
+ */
+TEST(listns_pagination)
+{
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = 0,
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ __u64 batch1[2], batch2[2];
+ ssize_t ret1, ret2;
+
+ /* Get first batch */
+ ret1 = sys_listns(&req, batch1, ARRAY_SIZE(batch1), 0);
+ if (ret1 < 0) {
+ if (errno == ENOSYS)
+ SKIP(return, "listns() not supported");
+ TH_LOG("listns failed: %s (errno=%d)", strerror(errno), errno);
+ ASSERT_TRUE(false);
+ }
+ ASSERT_GE(ret1, 0);
+
+ if (ret1 == 0)
+ SKIP(return, "No namespaces found");
+
+ TH_LOG("First batch: %zd namespaces", ret1);
+
+ /* Get second batch using last ID from first batch */
+ if (ret1 == ARRAY_SIZE(batch1)) {
+ req.ns_id = batch1[ret1 - 1];
+ ret2 = sys_listns(&req, batch2, ARRAY_SIZE(batch2), 0);
+ ASSERT_GE(ret2, 0);
+
+ TH_LOG("Second batch: %zd namespaces (after ns_id=%llu)",
+ ret2, (unsigned long long)req.ns_id);
+
+ /* If we got more results, verify IDs are monotonically increasing */
+ if (ret2 > 0) {
+ ASSERT_GT(batch2[0], batch1[ret1 - 1]);
+ TH_LOG("Pagination working: %llu > %llu",
+ (unsigned long long)batch2[0],
+ (unsigned long long)batch1[ret1 - 1]);
+ }
+ } else {
+ TH_LOG("All namespaces fit in first batch");
+ }
+}
+
+/*
+ * Test listns() with LISTNS_CURRENT_USER.
+ * List namespaces owned by current user namespace.
+ */
+TEST(listns_current_user)
+{
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = 0,
+ .spare2 = 0,
+ .user_ns_id = LISTNS_CURRENT_USER,
+ };
+ __u64 ns_ids[100];
+ ssize_t ret;
+
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+ if (ret < 0) {
+ if (errno == ENOSYS)
+ SKIP(return, "listns() not supported");
+ TH_LOG("listns failed: %s (errno=%d)", strerror(errno), errno);
+ ASSERT_TRUE(false);
+ }
+ ASSERT_GE(ret, 0);
+
+ /* Should find at least the initial namespaces if we're in init_user_ns */
+ TH_LOG("Found %zd namespaces owned by current user namespace", ret);
+
+ for (ssize_t i = 0; i < ret; i++)
+ TH_LOG(" [%zd] ns_id: %llu", i, (unsigned long long)ns_ids[i]);
+}
+
+/*
+ * Test that listns() only returns active namespaces.
+ * Create a namespace, let it become inactive, verify it's not listed.
+ */
+TEST(listns_only_active)
+{
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = CLONE_NEWNET,
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ __u64 ns_ids_before[100], ns_ids_after[100];
+ ssize_t ret_before, ret_after;
+ int pipefd[2];
+ pid_t pid;
+ __u64 new_ns_id = 0;
+ int status;
+
+ /* Get initial list */
+ ret_before = sys_listns(&req, ns_ids_before, ARRAY_SIZE(ns_ids_before), 0);
+ if (ret_before < 0) {
+ if (errno == ENOSYS)
+ SKIP(return, "listns() not supported");
+ TH_LOG("listns failed: %s (errno=%d)", strerror(errno), errno);
+ ASSERT_TRUE(false);
+ }
+ ASSERT_GE(ret_before, 0);
+
+ TH_LOG("Before: %zd active network namespaces", ret_before);
+
+ /* Create a new namespace in a child process and get its ID */
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ int fd;
+ __u64 ns_id;
+
+ close(pipefd[0]);
+
+ /* Create new network namespace */
+ if (unshare(CLONE_NEWNET) < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ /* Get its ID */
+ fd = open("/proc/self/ns/net", O_RDONLY);
+ if (fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ if (ioctl(fd, NS_GET_ID, &ns_id) < 0) {
+ close(fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(fd);
+
+ /* Send ID to parent */
+ write(pipefd[1], &ns_id, sizeof(ns_id));
+ close(pipefd[1]);
+
+ /* Keep namespace active briefly */
+ usleep(100000);
+ exit(0);
+ }
+
+ /* Parent reads the new namespace ID */
+ {
+ int bytes;
+
+ close(pipefd[1]);
+ bytes = read(pipefd[0], &new_ns_id, sizeof(new_ns_id));
+ close(pipefd[0]);
+
+ if (bytes == sizeof(new_ns_id)) {
+ __u64 ns_ids_during[100];
+ int ret_during;
+
+ TH_LOG("Child created namespace with ID %llu", (unsigned long long)new_ns_id);
+
+ /* List namespaces while child is still alive - should see new one */
+ ret_during = sys_listns(&req, ns_ids_during, ARRAY_SIZE(ns_ids_during), 0);
+ ASSERT_GE(ret_during, 0);
+ TH_LOG("During: %d active network namespaces", ret_during);
+
+ /* Should have more namespaces than before */
+ ASSERT_GE(ret_during, ret_before);
+ }
+ }
+
+ /* Wait for child to exit */
+ waitpid(pid, &status, 0);
+
+ /* Give time for namespace to become inactive */
+ usleep(100000);
+
+ /* List namespaces after child exits - should not see new one */
+ ret_after = sys_listns(&req, ns_ids_after, ARRAY_SIZE(ns_ids_after), 0);
+ ASSERT_GE(ret_after, 0);
+ TH_LOG("After: %zd active network namespaces", ret_after);
+
+ /* Verify the new namespace ID is not in the after list */
+ if (new_ns_id != 0) {
+ bool found = false;
+
+ for (ssize_t i = 0; i < ret_after; i++) {
+ if (ns_ids_after[i] == new_ns_id) {
+ found = true;
+ break;
+ }
+ }
+ ASSERT_FALSE(found);
+ }
+}
+
+/*
+ * Test listns() with specific user namespace ID.
+ * Create a user namespace and list namespaces it owns.
+ */
+TEST(listns_specific_userns)
+{
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = 0,
+ .spare2 = 0,
+ .user_ns_id = 0, /* Will be filled with created userns ID */
+ };
+ __u64 ns_ids[100];
+ int sv[2];
+ pid_t pid;
+ int status;
+ __u64 user_ns_id = 0;
+ int bytes;
+ ssize_t ret;
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ int fd;
+ __u64 ns_id;
+ char buf;
+
+ close(sv[0]);
+
+ /* Create new user namespace */
+ if (setup_userns() < 0) {
+ close(sv[1]);
+ exit(1);
+ }
+
+ /* Get user namespace ID */
+ fd = open("/proc/self/ns/user", O_RDONLY);
+ if (fd < 0) {
+ close(sv[1]);
+ exit(1);
+ }
+
+ if (ioctl(fd, NS_GET_ID, &ns_id) < 0) {
+ close(fd);
+ close(sv[1]);
+ exit(1);
+ }
+ close(fd);
+
+ /* Send ID to parent */
+ if (write(sv[1], &ns_id, sizeof(ns_id)) != sizeof(ns_id)) {
+ close(sv[1]);
+ exit(1);
+ }
+
+ /* Create some namespaces owned by this user namespace */
+ unshare(CLONE_NEWNET);
+ unshare(CLONE_NEWUTS);
+
+ /* Wait for parent signal */
+ if (read(sv[1], &buf, 1) != 1) {
+ close(sv[1]);
+ exit(1);
+ }
+ close(sv[1]);
+ exit(0);
+ }
+
+ /* Parent */
+ close(sv[1]);
+ bytes = read(sv[0], &user_ns_id, sizeof(user_ns_id));
+
+ if (bytes != sizeof(user_ns_id)) {
+ close(sv[0]);
+ kill(pid, SIGKILL);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to get user namespace ID from child");
+ }
+
+ TH_LOG("Child created user namespace with ID %llu", (unsigned long long)user_ns_id);
+
+ /* List namespaces owned by this user namespace */
+ req.user_ns_id = user_ns_id;
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+
+ if (ret < 0) {
+ TH_LOG("listns failed: %s (errno=%d)", strerror(errno), errno);
+ close(sv[0]);
+ kill(pid, SIGKILL);
+ waitpid(pid, NULL, 0);
+ if (errno == ENOSYS) {
+ SKIP(return, "listns() not supported");
+ }
+ ASSERT_GE(ret, 0);
+ }
+
+ TH_LOG("Found %zd namespaces owned by user namespace %llu", ret,
+ (unsigned long long)user_ns_id);
+
+ /* Should find at least the network and UTS namespaces we created */
+ if (ret > 0) {
+ for (ssize_t i = 0; i < ret && i < 10; i++)
+ TH_LOG(" [%zd] ns_id: %llu", i, (unsigned long long)ns_ids[i]);
+ }
+
+ /* Signal child to exit */
+ if (write(sv[0], "X", 1) != 1) {
+ close(sv[0]);
+ kill(pid, SIGKILL);
+ waitpid(pid, NULL, 0);
+ ASSERT_TRUE(false);
+ }
+ close(sv[0]);
+ waitpid(pid, &status, 0);
+}
+
+/*
+ * Test listns() with multiple namespace types filter.
+ */
+TEST(listns_multiple_types)
+{
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = CLONE_NEWNET | CLONE_NEWUTS, /* Network and UTS */
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ __u64 ns_ids[100];
+ ssize_t ret;
+
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+ if (ret < 0) {
+ if (errno == ENOSYS)
+ SKIP(return, "listns() not supported");
+ TH_LOG("listns failed: %s (errno=%d)", strerror(errno), errno);
+ ASSERT_TRUE(false);
+ }
+ ASSERT_GE(ret, 0);
+
+ TH_LOG("Found %zd active network/UTS namespaces", ret);
+
+ for (ssize_t i = 0; i < ret; i++)
+ TH_LOG(" [%zd] ns_id: %llu", i, (unsigned long long)ns_ids[i]);
+}
+
+/*
+ * Test that hierarchical active reference propagation keeps parent
+ * user namespaces visible in listns().
+ */
+TEST(listns_hierarchical_visibility)
+{
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = CLONE_NEWUSER,
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ __u64 parent_ns_id = 0, child_ns_id = 0;
+ int sv[2];
+ pid_t pid;
+ int status;
+ int bytes;
+ __u64 ns_ids[100];
+ ssize_t ret;
+ bool found_parent, found_child;
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ int fd;
+ char buf;
+
+ close(sv[0]);
+
+ /* Create parent user namespace */
+ if (setup_userns() < 0) {
+ close(sv[1]);
+ exit(1);
+ }
+
+ fd = open("/proc/self/ns/user", O_RDONLY);
+ if (fd < 0) {
+ close(sv[1]);
+ exit(1);
+ }
+
+ if (ioctl(fd, NS_GET_ID, &parent_ns_id) < 0) {
+ close(fd);
+ close(sv[1]);
+ exit(1);
+ }
+ close(fd);
+
+ /* Create child user namespace */
+ if (setup_userns() < 0) {
+ close(sv[1]);
+ exit(1);
+ }
+
+ fd = open("/proc/self/ns/user", O_RDONLY);
+ if (fd < 0) {
+ close(sv[1]);
+ exit(1);
+ }
+
+ if (ioctl(fd, NS_GET_ID, &child_ns_id) < 0) {
+ close(fd);
+ close(sv[1]);
+ exit(1);
+ }
+ close(fd);
+
+ /* Send both IDs to parent */
+ if (write(sv[1], &parent_ns_id, sizeof(parent_ns_id)) != sizeof(parent_ns_id)) {
+ close(sv[1]);
+ exit(1);
+ }
+ if (write(sv[1], &child_ns_id, sizeof(child_ns_id)) != sizeof(child_ns_id)) {
+ close(sv[1]);
+ exit(1);
+ }
+
+ /* Wait for parent signal */
+ if (read(sv[1], &buf, 1) != 1) {
+ close(sv[1]);
+ exit(1);
+ }
+ close(sv[1]);
+ exit(0);
+ }
+
+ /* Parent */
+ close(sv[1]);
+
+ /* Read both namespace IDs */
+ bytes = read(sv[0], &parent_ns_id, sizeof(parent_ns_id));
+ bytes += read(sv[0], &child_ns_id, sizeof(child_ns_id));
+
+ if (bytes != (int)(2 * sizeof(__u64))) {
+ close(sv[0]);
+ kill(pid, SIGKILL);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to get namespace IDs from child");
+ }
+
+ TH_LOG("Parent user namespace ID: %llu", (unsigned long long)parent_ns_id);
+ TH_LOG("Child user namespace ID: %llu", (unsigned long long)child_ns_id);
+
+ /* List all user namespaces */
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+
+ if (ret < 0 && errno == ENOSYS) {
+ close(sv[0]);
+ kill(pid, SIGKILL);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "listns() not supported");
+ }
+
+ ASSERT_GE(ret, 0);
+ TH_LOG("Found %zd active user namespaces", ret);
+
+ /* Both parent and child should be visible (active due to child process) */
+ found_parent = false;
+ found_child = false;
+ for (ssize_t i = 0; i < ret; i++) {
+ if (ns_ids[i] == parent_ns_id)
+ found_parent = true;
+ if (ns_ids[i] == child_ns_id)
+ found_child = true;
+ }
+
+ TH_LOG("Parent namespace %s, child namespace %s",
+ found_parent ? "found" : "NOT FOUND",
+ found_child ? "found" : "NOT FOUND");
+
+ ASSERT_TRUE(found_child);
+ /* With hierarchical propagation, parent should also be active */
+ ASSERT_TRUE(found_parent);
+
+ /* Signal child to exit */
+ if (write(sv[0], "X", 1) != 1) {
+ close(sv[0]);
+ kill(pid, SIGKILL);
+ waitpid(pid, NULL, 0);
+ ASSERT_TRUE(false);
+ }
+ close(sv[0]);
+ waitpid(pid, &status, 0);
+}
+
+/*
+ * Test error cases for listns().
+ */
+TEST(listns_error_cases)
+{
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = 0,
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ __u64 ns_ids[10];
+ int ret;
+
+ /* Test with invalid flags */
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0xFFFF);
+ if (errno == ENOSYS) {
+ /* listns() not supported, skip this check */
+ } else {
+ ASSERT_LT(ret, 0);
+ ASSERT_EQ(errno, EINVAL);
+ }
+
+ /* Test with NULL ns_ids array */
+ ret = sys_listns(&req, NULL, 10, 0);
+ ASSERT_LT(ret, 0);
+
+ /* Test with invalid spare field */
+ req.spare = 1;
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+ if (errno == ENOSYS) {
+ /* listns() not supported, skip this check */
+ } else {
+ ASSERT_LT(ret, 0);
+ ASSERT_EQ(errno, EINVAL);
+ }
+ req.spare = 0;
+
+ /* Test with huge nr_ns_ids */
+ ret = sys_listns(&req, ns_ids, 2000000, 0);
+ if (errno == ENOSYS) {
+ /* listns() not supported, skip this check */
+ } else {
+ ASSERT_LT(ret, 0);
+ }
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/namespaces/ns_active_ref_test.c b/tools/testing/selftests/namespaces/ns_active_ref_test.c
new file mode 100644
index 000000000000..093268f0efaa
--- /dev/null
+++ b/tools/testing/selftests/namespaces/ns_active_ref_test.c
@@ -0,0 +1,2672 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/nsfs.h>
+#include <sys/mount.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <pthread.h>
+#include "../kselftest_harness.h"
+#include "../filesystems/utils.h"
+#include "wrappers.h"
+
+#ifndef FD_NSFS_ROOT
+#define FD_NSFS_ROOT -10003 /* Root of the nsfs filesystem */
+#endif
+
+#ifndef FILEID_NSFS
+#define FILEID_NSFS 0xf1
+#endif
+
+/*
+ * Test that initial namespaces can be reopened via file handle.
+ * Initial namespaces should have active ref count of 1 from boot.
+ */
+TEST(init_ns_always_active)
+{
+ struct file_handle *handle;
+ int mount_id;
+ int ret;
+ int fd1, fd2;
+ struct stat st1, st2;
+
+ handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ);
+ ASSERT_NE(handle, NULL);
+
+ /* Open initial network namespace */
+ fd1 = open("/proc/1/ns/net", O_RDONLY);
+ ASSERT_GE(fd1, 0);
+
+ /* Get file handle for initial namespace */
+ handle->handle_bytes = MAX_HANDLE_SZ;
+ ret = name_to_handle_at(fd1, "", handle, &mount_id, AT_EMPTY_PATH);
+ if (ret < 0 && errno == EOPNOTSUPP) {
+ SKIP(free(handle); close(fd1);
+ return, "nsfs doesn't support file handles");
+ }
+ ASSERT_EQ(ret, 0);
+
+ /* Close the namespace fd */
+ close(fd1);
+
+ /* Try to reopen via file handle - should succeed since init ns is always active */
+ fd2 = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ if (fd2 < 0 && (errno == EINVAL || errno == EOPNOTSUPP)) {
+ SKIP(free(handle);
+ return, "open_by_handle_at with FD_NSFS_ROOT not supported");
+ }
+ ASSERT_GE(fd2, 0);
+
+ /* Verify we opened the same namespace */
+ fd1 = open("/proc/1/ns/net", O_RDONLY);
+ ASSERT_GE(fd1, 0);
+ ASSERT_EQ(fstat(fd1, &st1), 0);
+ ASSERT_EQ(fstat(fd2, &st2), 0);
+ ASSERT_EQ(st1.st_ino, st2.st_ino);
+
+ close(fd1);
+ close(fd2);
+ free(handle);
+}
+
+/*
+ * Test namespace lifecycle: create a namespace in a child process,
+ * get a file handle while it's active, then try to reopen after
+ * the process exits (namespace becomes inactive).
+ */
+TEST(ns_inactive_after_exit)
+{
+ struct file_handle *handle;
+ int mount_id;
+ int ret;
+ int fd;
+ int pipefd[2];
+ pid_t pid;
+ int status;
+ char buf[sizeof(*handle) + MAX_HANDLE_SZ];
+
+ /* Create pipe for passing file handle from child */
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ close(pipefd[0]);
+
+ /* Create new network namespace */
+ ret = unshare(CLONE_NEWNET);
+ if (ret < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ /* Open our new namespace */
+ fd = open("/proc/self/ns/net", O_RDONLY);
+ if (fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ /* Get file handle for the namespace */
+ handle = (struct file_handle *)buf;
+ handle->handle_bytes = MAX_HANDLE_SZ;
+ ret = name_to_handle_at(fd, "", handle, &mount_id, AT_EMPTY_PATH);
+ close(fd);
+
+ if (ret < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ /* Send handle to parent */
+ write(pipefd[1], buf, sizeof(*handle) + handle->handle_bytes);
+ close(pipefd[1]);
+
+ /* Exit - namespace should become inactive */
+ exit(0);
+ }
+
+ /* Parent process */
+ close(pipefd[1]);
+
+ /* Read file handle from child */
+ ret = read(pipefd[0], buf, sizeof(buf));
+ close(pipefd[0]);
+
+ /* Wait for child to exit */
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ ASSERT_GT(ret, 0);
+ handle = (struct file_handle *)buf;
+
+ /* Try to reopen namespace - should fail with ENOENT since it's inactive */
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ ASSERT_LT(fd, 0);
+ /* Should fail with ENOENT (namespace inactive) or ESTALE */
+ ASSERT_TRUE(errno == ENOENT || errno == ESTALE);
+}
+
+/*
+ * Test that a namespace remains active while a process is using it,
+ * even after the creating process exits.
+ */
+TEST(ns_active_with_multiple_processes)
+{
+ struct file_handle *handle;
+ int mount_id;
+ int ret;
+ int fd;
+ int pipefd[2];
+ int syncpipe[2];
+ pid_t pid1, pid2;
+ int status;
+ char buf[sizeof(*handle) + MAX_HANDLE_SZ];
+ char sync_byte;
+
+ /* Create pipes for communication */
+ ASSERT_EQ(pipe(pipefd), 0);
+ ASSERT_EQ(pipe(syncpipe), 0);
+
+ pid1 = fork();
+ ASSERT_GE(pid1, 0);
+
+ if (pid1 == 0) {
+ /* First child - creates namespace */
+ close(pipefd[0]);
+ close(syncpipe[1]);
+
+ /* Create new network namespace */
+ ret = unshare(CLONE_NEWNET);
+ if (ret < 0) {
+ close(pipefd[1]);
+ close(syncpipe[0]);
+ exit(1);
+ }
+
+ /* Open and get handle */
+ fd = open("/proc/self/ns/net", O_RDONLY);
+ if (fd < 0) {
+ close(pipefd[1]);
+ close(syncpipe[0]);
+ exit(1);
+ }
+
+ handle = (struct file_handle *)buf;
+ handle->handle_bytes = MAX_HANDLE_SZ;
+ ret = name_to_handle_at(fd, "", handle, &mount_id, AT_EMPTY_PATH);
+ close(fd);
+
+ if (ret < 0) {
+ close(pipefd[1]);
+ close(syncpipe[0]);
+ exit(1);
+ }
+
+ /* Send handle to parent */
+ write(pipefd[1], buf, sizeof(*handle) + handle->handle_bytes);
+ close(pipefd[1]);
+
+ /* Wait for signal before exiting */
+ read(syncpipe[0], &sync_byte, 1);
+ close(syncpipe[0]);
+ exit(0);
+ }
+
+ /* Parent reads handle */
+ close(pipefd[1]);
+ ret = read(pipefd[0], buf, sizeof(buf));
+ close(pipefd[0]);
+ ASSERT_GT(ret, 0);
+
+ handle = (struct file_handle *)buf;
+
+ /* Create second child that will keep namespace active */
+ pid2 = fork();
+ ASSERT_GE(pid2, 0);
+
+ if (pid2 == 0) {
+ /* Second child - reopens the namespace */
+ close(syncpipe[0]);
+ close(syncpipe[1]);
+
+ /* Open the namespace via handle */
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ if (fd < 0) {
+ exit(1);
+ }
+
+ /* Join the namespace */
+ ret = setns(fd, CLONE_NEWNET);
+ close(fd);
+ if (ret < 0) {
+ exit(1);
+ }
+
+ /* Sleep to keep namespace active */
+ sleep(1);
+ exit(0);
+ }
+
+ /* Let second child enter the namespace */
+ usleep(100000); /* 100ms */
+
+ /* Signal first child to exit */
+ close(syncpipe[0]);
+ sync_byte = 'X';
+ write(syncpipe[1], &sync_byte, 1);
+ close(syncpipe[1]);
+
+ /* Wait for first child */
+ waitpid(pid1, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+
+ /* Namespace should still be active because second child is using it */
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ ASSERT_GE(fd, 0);
+ close(fd);
+
+ /* Wait for second child */
+ waitpid(pid2, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+}
+
+/*
+ * Test user namespace active ref tracking via credential lifecycle
+ */
+TEST(userns_active_ref_lifecycle)
+{
+ struct file_handle *handle;
+ int mount_id;
+ int ret;
+ int fd;
+ int pipefd[2];
+ pid_t pid;
+ int status;
+ char buf[sizeof(*handle) + MAX_HANDLE_SZ];
+
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ close(pipefd[0]);
+
+ /* Create new user namespace */
+ ret = unshare(CLONE_NEWUSER);
+ if (ret < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ /* Set up uid/gid mappings */
+ int uid_map_fd = open("/proc/self/uid_map", O_WRONLY);
+ int gid_map_fd = open("/proc/self/gid_map", O_WRONLY);
+ int setgroups_fd = open("/proc/self/setgroups", O_WRONLY);
+
+ if (uid_map_fd >= 0 && gid_map_fd >= 0 && setgroups_fd >= 0) {
+ write(setgroups_fd, "deny", 4);
+ close(setgroups_fd);
+
+ char mapping[64];
+ snprintf(mapping, sizeof(mapping), "0 %d 1", getuid());
+ write(uid_map_fd, mapping, strlen(mapping));
+ close(uid_map_fd);
+
+ snprintf(mapping, sizeof(mapping), "0 %d 1", getgid());
+ write(gid_map_fd, mapping, strlen(mapping));
+ close(gid_map_fd);
+ }
+
+ /* Get file handle */
+ fd = open("/proc/self/ns/user", O_RDONLY);
+ if (fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ handle = (struct file_handle *)buf;
+ handle->handle_bytes = MAX_HANDLE_SZ;
+ ret = name_to_handle_at(fd, "", handle, &mount_id, AT_EMPTY_PATH);
+ close(fd);
+
+ if (ret < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ /* Send handle to parent */
+ write(pipefd[1], buf, sizeof(*handle) + handle->handle_bytes);
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Parent */
+ close(pipefd[1]);
+ ret = read(pipefd[0], buf, sizeof(buf));
+ close(pipefd[0]);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ ASSERT_GT(ret, 0);
+ handle = (struct file_handle *)buf;
+
+ /* Namespace should be inactive after all tasks exit */
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ ASSERT_LT(fd, 0);
+ ASSERT_TRUE(errno == ENOENT || errno == ESTALE);
+}
+
+/*
+ * Test PID namespace active ref tracking
+ */
+TEST(pidns_active_ref_lifecycle)
+{
+ struct file_handle *handle;
+ int mount_id;
+ int ret;
+ int fd;
+ int pipefd[2];
+ pid_t pid;
+ int status;
+ char buf[sizeof(*handle) + MAX_HANDLE_SZ];
+
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ close(pipefd[0]);
+
+ /* Create new PID namespace */
+ ret = unshare(CLONE_NEWPID);
+ if (ret < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ /* Fork to actually enter the PID namespace */
+ pid_t child = fork();
+ if (child < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ if (child == 0) {
+ /* Grandchild - in new PID namespace */
+ fd = open("/proc/self/ns/pid", O_RDONLY);
+ if (fd < 0) {
+ exit(1);
+ }
+
+ handle = (struct file_handle *)buf;
+ handle->handle_bytes = MAX_HANDLE_SZ;
+ ret = name_to_handle_at(fd, "", handle, &mount_id, AT_EMPTY_PATH);
+ close(fd);
+
+ if (ret < 0) {
+ exit(1);
+ }
+
+ /* Send handle to grandparent */
+ write(pipefd[1], buf, sizeof(*handle) + handle->handle_bytes);
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Wait for grandchild */
+ waitpid(child, NULL, 0);
+ exit(0);
+ }
+
+ /* Parent */
+ close(pipefd[1]);
+ ret = read(pipefd[0], buf, sizeof(buf));
+ close(pipefd[0]);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ ASSERT_GT(ret, 0);
+ handle = (struct file_handle *)buf;
+
+ /* Namespace should be inactive after all processes exit */
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ ASSERT_LT(fd, 0);
+ ASSERT_TRUE(errno == ENOENT || errno == ESTALE);
+}
+
+/*
+ * Test that an open file descriptor keeps a namespace active.
+ * Even after the creating process exits, the namespace should remain
+ * active as long as an fd is held open.
+ */
+TEST(ns_fd_keeps_active)
+{
+ struct file_handle *handle;
+ int mount_id;
+ int ret;
+ int nsfd;
+ int pipe_child_ready[2];
+ int pipe_parent_ready[2];
+ pid_t pid;
+ int status;
+ char buf[sizeof(*handle) + MAX_HANDLE_SZ];
+ char sync_byte;
+ char proc_path[64];
+
+ ASSERT_EQ(pipe(pipe_child_ready), 0);
+ ASSERT_EQ(pipe(pipe_parent_ready), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ close(pipe_child_ready[0]);
+ close(pipe_parent_ready[1]);
+
+ TH_LOG("Child: creating new network namespace");
+
+ /* Create new network namespace */
+ ret = unshare(CLONE_NEWNET);
+ if (ret < 0) {
+ TH_LOG("Child: unshare(CLONE_NEWNET) failed: %s", strerror(errno));
+ close(pipe_child_ready[1]);
+ close(pipe_parent_ready[0]);
+ exit(1);
+ }
+
+ TH_LOG("Child: network namespace created successfully");
+
+ /* Get file handle for the namespace */
+ nsfd = open("/proc/self/ns/net", O_RDONLY);
+ if (nsfd < 0) {
+ TH_LOG("Child: failed to open /proc/self/ns/net: %s", strerror(errno));
+ close(pipe_child_ready[1]);
+ close(pipe_parent_ready[0]);
+ exit(1);
+ }
+
+ TH_LOG("Child: opened namespace fd %d", nsfd);
+
+ handle = (struct file_handle *)buf;
+ handle->handle_bytes = MAX_HANDLE_SZ;
+ ret = name_to_handle_at(nsfd, "", handle, &mount_id, AT_EMPTY_PATH);
+ close(nsfd);
+
+ if (ret < 0) {
+ TH_LOG("Child: name_to_handle_at failed: %s", strerror(errno));
+ close(pipe_child_ready[1]);
+ close(pipe_parent_ready[0]);
+ exit(1);
+ }
+
+ TH_LOG("Child: got file handle (bytes=%u)", handle->handle_bytes);
+
+ /* Send file handle to parent */
+ ret = write(pipe_child_ready[1], buf, sizeof(*handle) + handle->handle_bytes);
+ TH_LOG("Child: sent %d bytes of file handle to parent", ret);
+ close(pipe_child_ready[1]);
+
+ /* Wait for parent to open the fd */
+ TH_LOG("Child: waiting for parent to open fd");
+ ret = read(pipe_parent_ready[0], &sync_byte, 1);
+ close(pipe_parent_ready[0]);
+
+ TH_LOG("Child: parent signaled (read %d bytes), exiting now", ret);
+ /* Exit - namespace should stay active because parent holds fd */
+ exit(0);
+ }
+
+ /* Parent process */
+ close(pipe_child_ready[1]);
+ close(pipe_parent_ready[0]);
+
+ TH_LOG("Parent: reading file handle from child");
+
+ /* Read file handle from child */
+ ret = read(pipe_child_ready[0], buf, sizeof(buf));
+ close(pipe_child_ready[0]);
+ ASSERT_GT(ret, 0);
+ handle = (struct file_handle *)buf;
+
+ TH_LOG("Parent: received %d bytes, handle size=%u", ret, handle->handle_bytes);
+
+ /* Open the child's namespace while it's still alive */
+ snprintf(proc_path, sizeof(proc_path), "/proc/%d/ns/net", pid);
+ TH_LOG("Parent: opening child's namespace at %s", proc_path);
+ nsfd = open(proc_path, O_RDONLY);
+ if (nsfd < 0) {
+ TH_LOG("Parent: failed to open %s: %s", proc_path, strerror(errno));
+ close(pipe_parent_ready[1]);
+ kill(pid, SIGKILL);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to open child's namespace");
+ }
+
+ TH_LOG("Parent: opened child's namespace, got fd %d", nsfd);
+
+ /* Signal child that we have the fd */
+ sync_byte = 'G';
+ write(pipe_parent_ready[1], &sync_byte, 1);
+ close(pipe_parent_ready[1]);
+ TH_LOG("Parent: signaled child that we have the fd");
+
+ /* Wait for child to exit */
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ TH_LOG("Child exited, parent holds fd %d to namespace", nsfd);
+
+ /*
+ * Namespace should still be ACTIVE because we hold an fd.
+ * We should be able to reopen it via file handle.
+ */
+ TH_LOG("Attempting to reopen namespace via file handle (should succeed - fd held)");
+ int fd2 = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ ASSERT_GE(fd2, 0);
+
+ TH_LOG("Successfully reopened namespace via file handle, got fd %d", fd2);
+
+ /* Verify it's the same namespace */
+ struct stat st1, st2;
+ ASSERT_EQ(fstat(nsfd, &st1), 0);
+ ASSERT_EQ(fstat(fd2, &st2), 0);
+ TH_LOG("Namespace inodes: nsfd=%lu, fd2=%lu", st1.st_ino, st2.st_ino);
+ ASSERT_EQ(st1.st_ino, st2.st_ino);
+ close(fd2);
+
+ /* Now close the fd - namespace should become inactive */
+ TH_LOG("Closing fd %d - namespace should become inactive", nsfd);
+ close(nsfd);
+
+ /* Now reopening should fail - namespace is inactive */
+ TH_LOG("Attempting to reopen namespace via file handle (should fail - inactive)");
+ fd2 = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ ASSERT_LT(fd2, 0);
+ /* Should fail with ENOENT (inactive) or ESTALE (gone) */
+ TH_LOG("Reopen failed as expected: %s (errno=%d)", strerror(errno), errno);
+ ASSERT_TRUE(errno == ENOENT || errno == ESTALE);
+}
+
+/*
+ * Test hierarchical active reference propagation.
+ * When a child namespace is active, its owning user namespace should also
+ * be active automatically due to hierarchical active reference propagation.
+ * This ensures parents are always reachable when children are active.
+ */
+TEST(ns_parent_always_reachable)
+{
+ struct file_handle *parent_handle, *child_handle;
+ int ret;
+ int child_nsfd;
+ int pipefd[2];
+ pid_t pid;
+ int status;
+ __u64 parent_id, child_id;
+ char parent_buf[sizeof(*parent_handle) + MAX_HANDLE_SZ];
+ char child_buf[sizeof(*child_handle) + MAX_HANDLE_SZ];
+
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ close(pipefd[0]);
+
+ TH_LOG("Child: creating parent user namespace and setting up mappings");
+
+ /* Create parent user namespace with mappings */
+ ret = setup_userns();
+ if (ret < 0) {
+ TH_LOG("Child: setup_userns() for parent failed: %s", strerror(errno));
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ TH_LOG("Child: parent user namespace created, now uid=%d gid=%d", getuid(), getgid());
+
+ /* Get namespace ID for parent user namespace */
+ int parent_fd = open("/proc/self/ns/user", O_RDONLY);
+ if (parent_fd < 0) {
+ TH_LOG("Child: failed to open parent /proc/self/ns/user: %s", strerror(errno));
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ TH_LOG("Child: opened parent userns fd %d", parent_fd);
+
+ if (ioctl(parent_fd, NS_GET_ID, &parent_id) < 0) {
+ TH_LOG("Child: NS_GET_ID for parent failed: %s", strerror(errno));
+ close(parent_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(parent_fd);
+
+ TH_LOG("Child: got parent namespace ID %llu", (unsigned long long)parent_id);
+
+ /* Create child user namespace within parent */
+ TH_LOG("Child: creating nested child user namespace");
+ ret = setup_userns();
+ if (ret < 0) {
+ TH_LOG("Child: setup_userns() for child failed: %s", strerror(errno));
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ TH_LOG("Child: nested child user namespace created, uid=%d gid=%d", getuid(), getgid());
+
+ /* Get namespace ID for child user namespace */
+ int child_fd = open("/proc/self/ns/user", O_RDONLY);
+ if (child_fd < 0) {
+ TH_LOG("Child: failed to open child /proc/self/ns/user: %s", strerror(errno));
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ TH_LOG("Child: opened child userns fd %d", child_fd);
+
+ if (ioctl(child_fd, NS_GET_ID, &child_id) < 0) {
+ TH_LOG("Child: NS_GET_ID for child failed: %s", strerror(errno));
+ close(child_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(child_fd);
+
+ TH_LOG("Child: got child namespace ID %llu", (unsigned long long)child_id);
+
+ /* Send both namespace IDs to parent */
+ TH_LOG("Child: sending both namespace IDs to parent");
+ write(pipefd[1], &parent_id, sizeof(parent_id));
+ write(pipefd[1], &child_id, sizeof(child_id));
+ close(pipefd[1]);
+
+ TH_LOG("Child: exiting - parent userns should become inactive");
+ /* Exit - parent user namespace should become inactive */
+ exit(0);
+ }
+
+ /* Parent process */
+ close(pipefd[1]);
+
+ TH_LOG("Parent: reading both namespace IDs from child");
+
+ /* Read both namespace IDs - fixed size, no parsing needed */
+ ret = read(pipefd[0], &parent_id, sizeof(parent_id));
+ if (ret != sizeof(parent_id)) {
+ close(pipefd[0]);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read parent namespace ID from child");
+ }
+
+ ret = read(pipefd[0], &child_id, sizeof(child_id));
+ close(pipefd[0]);
+ if (ret != sizeof(child_id)) {
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read child namespace ID from child");
+ }
+
+ TH_LOG("Parent: received parent_id=%llu, child_id=%llu",
+ (unsigned long long)parent_id, (unsigned long long)child_id);
+
+ /* Construct file handles from namespace IDs */
+ parent_handle = (struct file_handle *)parent_buf;
+ parent_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ parent_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *parent_fh = (struct nsfs_file_handle *)parent_handle->f_handle;
+ parent_fh->ns_id = parent_id;
+ parent_fh->ns_type = 0;
+ parent_fh->ns_inum = 0;
+
+ child_handle = (struct file_handle *)child_buf;
+ child_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ child_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *child_fh = (struct nsfs_file_handle *)child_handle->f_handle;
+ child_fh->ns_id = child_id;
+ child_fh->ns_type = 0;
+ child_fh->ns_inum = 0;
+
+ TH_LOG("Parent: opening child namespace BEFORE child exits");
+
+ /* Open child namespace while child is still alive to keep it active */
+ child_nsfd = open_by_handle_at(FD_NSFS_ROOT, child_handle, O_RDONLY);
+ if (child_nsfd < 0) {
+ TH_LOG("Failed to open child namespace: %s (errno=%d)", strerror(errno), errno);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to open child namespace");
+ }
+
+ TH_LOG("Opened child namespace fd %d", child_nsfd);
+
+ /* Now wait for child to exit */
+ TH_LOG("Parent: waiting for child to exit");
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ TH_LOG("Child process exited, parent holds fd to child namespace");
+
+ /*
+ * With hierarchical active reference propagation:
+ * Since the child namespace is active (parent process holds fd),
+ * the parent user namespace should ALSO be active automatically.
+ * This is because when we took an active reference on the child,
+ * it propagated up to the owning user namespace.
+ */
+ TH_LOG("Attempting to reopen parent namespace (should SUCCEED - hierarchical propagation)");
+ int parent_fd = open_by_handle_at(FD_NSFS_ROOT, parent_handle, O_RDONLY);
+ ASSERT_GE(parent_fd, 0);
+
+ TH_LOG("SUCCESS: Parent namespace is active (fd=%d) due to active child", parent_fd);
+
+ /* Verify we can also get parent via NS_GET_USERNS */
+ TH_LOG("Verifying NS_GET_USERNS also works");
+ int parent_fd2 = ioctl(child_nsfd, NS_GET_USERNS);
+ if (parent_fd2 < 0) {
+ close(parent_fd);
+ close(child_nsfd);
+ TH_LOG("NS_GET_USERNS failed: %s (errno=%d)", strerror(errno), errno);
+ SKIP(return, "NS_GET_USERNS not supported or failed");
+ }
+
+ TH_LOG("NS_GET_USERNS succeeded, got parent fd %d", parent_fd2);
+
+ /* Verify both methods give us the same namespace */
+ struct stat st1, st2;
+ ASSERT_EQ(fstat(parent_fd, &st1), 0);
+ ASSERT_EQ(fstat(parent_fd2, &st2), 0);
+ TH_LOG("Parent namespace inodes: parent_fd=%lu, parent_fd2=%lu", st1.st_ino, st2.st_ino);
+ ASSERT_EQ(st1.st_ino, st2.st_ino);
+
+ /*
+ * Close child fd - parent should remain active because we still
+ * hold direct references to it (parent_fd and parent_fd2).
+ */
+ TH_LOG("Closing child fd - parent should remain active (direct refs held)");
+ close(child_nsfd);
+
+ /* Parent should still be openable */
+ TH_LOG("Verifying parent still active via file handle");
+ int parent_fd3 = open_by_handle_at(FD_NSFS_ROOT, parent_handle, O_RDONLY);
+ ASSERT_GE(parent_fd3, 0);
+ close(parent_fd3);
+
+ TH_LOG("Closing all fds to parent namespace");
+ close(parent_fd);
+ close(parent_fd2);
+
+ /* Both should now be inactive */
+ TH_LOG("Attempting to reopen parent (should fail - inactive, no refs)");
+ parent_fd = open_by_handle_at(FD_NSFS_ROOT, parent_handle, O_RDONLY);
+ ASSERT_LT(parent_fd, 0);
+ TH_LOG("Parent inactive as expected: %s (errno=%d)", strerror(errno), errno);
+ ASSERT_TRUE(errno == ENOENT || errno == ESTALE);
+}
+
+/*
+ * Test that bind mounts keep namespaces in the tree even when inactive
+ */
+TEST(ns_bind_mount_keeps_in_tree)
+{
+ struct file_handle *handle;
+ int mount_id;
+ int ret;
+ int fd;
+ int pipefd[2];
+ pid_t pid;
+ int status;
+ char buf[sizeof(*handle) + MAX_HANDLE_SZ];
+ char tmpfile[] = "/tmp/ns-test-XXXXXX";
+ int tmpfd;
+
+ /* Create temporary file for bind mount */
+ tmpfd = mkstemp(tmpfile);
+ if (tmpfd < 0) {
+ SKIP(return, "Cannot create temporary file");
+ }
+ close(tmpfd);
+
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ close(pipefd[0]);
+
+ /* Unshare mount namespace and make mounts private to avoid propagation */
+ ret = unshare(CLONE_NEWNS);
+ if (ret < 0) {
+ close(pipefd[1]);
+ unlink(tmpfile);
+ exit(1);
+ }
+ ret = mount(NULL, "/", NULL, MS_PRIVATE | MS_REC, NULL);
+ if (ret < 0) {
+ close(pipefd[1]);
+ unlink(tmpfile);
+ exit(1);
+ }
+
+ /* Create new network namespace */
+ ret = unshare(CLONE_NEWNET);
+ if (ret < 0) {
+ close(pipefd[1]);
+ unlink(tmpfile);
+ exit(1);
+ }
+
+ /* Bind mount the namespace */
+ ret = mount("/proc/self/ns/net", tmpfile, NULL, MS_BIND, NULL);
+ if (ret < 0) {
+ close(pipefd[1]);
+ unlink(tmpfile);
+ exit(1);
+ }
+
+ /* Get file handle */
+ fd = open("/proc/self/ns/net", O_RDONLY);
+ if (fd < 0) {
+ umount(tmpfile);
+ close(pipefd[1]);
+ unlink(tmpfile);
+ exit(1);
+ }
+
+ handle = (struct file_handle *)buf;
+ handle->handle_bytes = MAX_HANDLE_SZ;
+ ret = name_to_handle_at(fd, "", handle, &mount_id, AT_EMPTY_PATH);
+ close(fd);
+
+ if (ret < 0) {
+ umount(tmpfile);
+ close(pipefd[1]);
+ unlink(tmpfile);
+ exit(1);
+ }
+
+ /* Send handle to parent */
+ write(pipefd[1], buf, sizeof(*handle) + handle->handle_bytes);
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Parent */
+ close(pipefd[1]);
+ ret = read(pipefd[0], buf, sizeof(buf));
+ close(pipefd[0]);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ ASSERT_GT(ret, 0);
+ handle = (struct file_handle *)buf;
+
+ /*
+ * Namespace should be inactive but still in tree due to bind mount.
+ * Reopening should fail with ENOENT (inactive) not ESTALE (not in tree).
+ */
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ ASSERT_LT(fd, 0);
+ /* Should be ENOENT (inactive) since bind mount keeps it in tree */
+ if (errno != ENOENT && errno != ESTALE) {
+ TH_LOG("Unexpected error: %d", errno);
+ }
+
+ /* Cleanup */
+ umount(tmpfile);
+ unlink(tmpfile);
+}
+
+/*
+ * Test multi-level hierarchy (3+ levels deep).
+ * Grandparent → Parent → Child
+ * When child is active, both parent AND grandparent should be active.
+ */
+TEST(ns_multilevel_hierarchy)
+{
+ struct file_handle *gp_handle, *p_handle, *c_handle;
+ int ret, pipefd[2];
+ pid_t pid;
+ int status;
+ __u64 gp_id, p_id, c_id;
+ char gp_buf[sizeof(*gp_handle) + MAX_HANDLE_SZ];
+ char p_buf[sizeof(*p_handle) + MAX_HANDLE_SZ];
+ char c_buf[sizeof(*c_handle) + MAX_HANDLE_SZ];
+
+ ASSERT_EQ(pipe(pipefd), 0);
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ close(pipefd[0]);
+
+ /* Create grandparent user namespace */
+ if (setup_userns() < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ int gp_fd = open("/proc/self/ns/user", O_RDONLY);
+ if (gp_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+ if (ioctl(gp_fd, NS_GET_ID, &gp_id) < 0) {
+ close(gp_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(gp_fd);
+
+ /* Create parent user namespace */
+ if (setup_userns() < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ int p_fd = open("/proc/self/ns/user", O_RDONLY);
+ if (p_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+ if (ioctl(p_fd, NS_GET_ID, &p_id) < 0) {
+ close(p_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(p_fd);
+
+ /* Create child user namespace */
+ if (setup_userns() < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ int c_fd = open("/proc/self/ns/user", O_RDONLY);
+ if (c_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+ if (ioctl(c_fd, NS_GET_ID, &c_id) < 0) {
+ close(c_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(c_fd);
+
+ /* Send all three namespace IDs */
+ write(pipefd[1], &gp_id, sizeof(gp_id));
+ write(pipefd[1], &p_id, sizeof(p_id));
+ write(pipefd[1], &c_id, sizeof(c_id));
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ close(pipefd[1]);
+
+ /* Read all three namespace IDs - fixed size, no parsing needed */
+ ret = read(pipefd[0], &gp_id, sizeof(gp_id));
+ if (ret != sizeof(gp_id)) {
+ close(pipefd[0]);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read grandparent namespace ID from child");
+ }
+
+ ret = read(pipefd[0], &p_id, sizeof(p_id));
+ if (ret != sizeof(p_id)) {
+ close(pipefd[0]);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read parent namespace ID from child");
+ }
+
+ ret = read(pipefd[0], &c_id, sizeof(c_id));
+ close(pipefd[0]);
+ if (ret != sizeof(c_id)) {
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read child namespace ID from child");
+ }
+
+ /* Construct file handles from namespace IDs */
+ gp_handle = (struct file_handle *)gp_buf;
+ gp_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ gp_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *gp_fh = (struct nsfs_file_handle *)gp_handle->f_handle;
+ gp_fh->ns_id = gp_id;
+ gp_fh->ns_type = 0;
+ gp_fh->ns_inum = 0;
+
+ p_handle = (struct file_handle *)p_buf;
+ p_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ p_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *p_fh = (struct nsfs_file_handle *)p_handle->f_handle;
+ p_fh->ns_id = p_id;
+ p_fh->ns_type = 0;
+ p_fh->ns_inum = 0;
+
+ c_handle = (struct file_handle *)c_buf;
+ c_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ c_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *c_fh = (struct nsfs_file_handle *)c_handle->f_handle;
+ c_fh->ns_id = c_id;
+ c_fh->ns_type = 0;
+ c_fh->ns_inum = 0;
+
+ /* Open child before process exits */
+ int c_fd = open_by_handle_at(FD_NSFS_ROOT, c_handle, O_RDONLY);
+ if (c_fd < 0) {
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to open child namespace");
+ }
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ /*
+ * With 3-level hierarchy and child active:
+ * - Child is active (we hold fd)
+ * - Parent should be active (propagated from child)
+ * - Grandparent should be active (propagated from parent)
+ */
+ TH_LOG("Testing parent active when child is active");
+ int p_fd = open_by_handle_at(FD_NSFS_ROOT, p_handle, O_RDONLY);
+ ASSERT_GE(p_fd, 0);
+
+ TH_LOG("Testing grandparent active when child is active");
+ int gp_fd = open_by_handle_at(FD_NSFS_ROOT, gp_handle, O_RDONLY);
+ ASSERT_GE(gp_fd, 0);
+
+ close(c_fd);
+ close(p_fd);
+ close(gp_fd);
+}
+
+/*
+ * Test multiple children sharing same parent.
+ * Parent should stay active as long as ANY child is active.
+ */
+TEST(ns_multiple_children_same_parent)
+{
+ struct file_handle *p_handle, *c1_handle, *c2_handle;
+ int ret, pipefd[2];
+ pid_t pid;
+ int status;
+ __u64 p_id, c1_id, c2_id;
+ char p_buf[sizeof(*p_handle) + MAX_HANDLE_SZ];
+ char c1_buf[sizeof(*c1_handle) + MAX_HANDLE_SZ];
+ char c2_buf[sizeof(*c2_handle) + MAX_HANDLE_SZ];
+
+ ASSERT_EQ(pipe(pipefd), 0);
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ close(pipefd[0]);
+
+ /* Create parent user namespace */
+ if (setup_userns() < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ int p_fd = open("/proc/self/ns/user", O_RDONLY);
+ if (p_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+ if (ioctl(p_fd, NS_GET_ID, &p_id) < 0) {
+ close(p_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(p_fd);
+
+ /* Create first child user namespace */
+ if (setup_userns() < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ int c1_fd = open("/proc/self/ns/user", O_RDONLY);
+ if (c1_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+ if (ioctl(c1_fd, NS_GET_ID, &c1_id) < 0) {
+ close(c1_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(c1_fd);
+
+ /* Return to parent user namespace and create second child */
+ /* We can't actually do this easily, so let's create a sibling namespace
+ * by creating a network namespace instead */
+ if (unshare(CLONE_NEWNET) < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ int c2_fd = open("/proc/self/ns/net", O_RDONLY);
+ if (c2_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+ if (ioctl(c2_fd, NS_GET_ID, &c2_id) < 0) {
+ close(c2_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(c2_fd);
+
+ /* Send all namespace IDs */
+ write(pipefd[1], &p_id, sizeof(p_id));
+ write(pipefd[1], &c1_id, sizeof(c1_id));
+ write(pipefd[1], &c2_id, sizeof(c2_id));
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ close(pipefd[1]);
+
+ /* Read all three namespace IDs - fixed size, no parsing needed */
+ ret = read(pipefd[0], &p_id, sizeof(p_id));
+ if (ret != sizeof(p_id)) {
+ close(pipefd[0]);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read parent namespace ID");
+ }
+
+ ret = read(pipefd[0], &c1_id, sizeof(c1_id));
+ if (ret != sizeof(c1_id)) {
+ close(pipefd[0]);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read first child namespace ID");
+ }
+
+ ret = read(pipefd[0], &c2_id, sizeof(c2_id));
+ close(pipefd[0]);
+ if (ret != sizeof(c2_id)) {
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read second child namespace ID");
+ }
+
+ /* Construct file handles from namespace IDs */
+ p_handle = (struct file_handle *)p_buf;
+ p_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ p_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *p_fh = (struct nsfs_file_handle *)p_handle->f_handle;
+ p_fh->ns_id = p_id;
+ p_fh->ns_type = 0;
+ p_fh->ns_inum = 0;
+
+ c1_handle = (struct file_handle *)c1_buf;
+ c1_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ c1_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *c1_fh = (struct nsfs_file_handle *)c1_handle->f_handle;
+ c1_fh->ns_id = c1_id;
+ c1_fh->ns_type = 0;
+ c1_fh->ns_inum = 0;
+
+ c2_handle = (struct file_handle *)c2_buf;
+ c2_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ c2_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *c2_fh = (struct nsfs_file_handle *)c2_handle->f_handle;
+ c2_fh->ns_id = c2_id;
+ c2_fh->ns_type = 0;
+ c2_fh->ns_inum = 0;
+
+ /* Open both children before process exits */
+ int c1_fd = open_by_handle_at(FD_NSFS_ROOT, c1_handle, O_RDONLY);
+ int c2_fd = open_by_handle_at(FD_NSFS_ROOT, c2_handle, O_RDONLY);
+
+ if (c1_fd < 0 || c2_fd < 0) {
+ if (c1_fd >= 0) close(c1_fd);
+ if (c2_fd >= 0) close(c2_fd);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to open child namespaces");
+ }
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ /* Parent should be active (both children active) */
+ TH_LOG("Both children active - parent should be active");
+ int p_fd = open_by_handle_at(FD_NSFS_ROOT, p_handle, O_RDONLY);
+ ASSERT_GE(p_fd, 0);
+ close(p_fd);
+
+ /* Close first child - parent should STILL be active */
+ TH_LOG("Closing first child - parent should still be active");
+ close(c1_fd);
+ p_fd = open_by_handle_at(FD_NSFS_ROOT, p_handle, O_RDONLY);
+ ASSERT_GE(p_fd, 0);
+ close(p_fd);
+
+ /* Close second child - NOW parent should become inactive */
+ TH_LOG("Closing second child - parent should become inactive");
+ close(c2_fd);
+ p_fd = open_by_handle_at(FD_NSFS_ROOT, p_handle, O_RDONLY);
+ ASSERT_LT(p_fd, 0);
+}
+
+/*
+ * Test that different namespace types with same owner all contribute
+ * active references to the owning user namespace.
+ */
+TEST(ns_different_types_same_owner)
+{
+ struct file_handle *u_handle, *n_handle, *ut_handle;
+ int ret, pipefd[2];
+ pid_t pid;
+ int status;
+ __u64 u_id, n_id, ut_id;
+ char u_buf[sizeof(*u_handle) + MAX_HANDLE_SZ];
+ char n_buf[sizeof(*n_handle) + MAX_HANDLE_SZ];
+ char ut_buf[sizeof(*ut_handle) + MAX_HANDLE_SZ];
+
+ ASSERT_EQ(pipe(pipefd), 0);
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ close(pipefd[0]);
+
+ /* Create user namespace */
+ if (setup_userns() < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ int u_fd = open("/proc/self/ns/user", O_RDONLY);
+ if (u_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+ if (ioctl(u_fd, NS_GET_ID, &u_id) < 0) {
+ close(u_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(u_fd);
+
+ /* Create network namespace (owned by user namespace) */
+ if (unshare(CLONE_NEWNET) < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ int n_fd = open("/proc/self/ns/net", O_RDONLY);
+ if (n_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+ if (ioctl(n_fd, NS_GET_ID, &n_id) < 0) {
+ close(n_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(n_fd);
+
+ /* Create UTS namespace (also owned by user namespace) */
+ if (unshare(CLONE_NEWUTS) < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ int ut_fd = open("/proc/self/ns/uts", O_RDONLY);
+ if (ut_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+ if (ioctl(ut_fd, NS_GET_ID, &ut_id) < 0) {
+ close(ut_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(ut_fd);
+
+ /* Send all namespace IDs */
+ write(pipefd[1], &u_id, sizeof(u_id));
+ write(pipefd[1], &n_id, sizeof(n_id));
+ write(pipefd[1], &ut_id, sizeof(ut_id));
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ close(pipefd[1]);
+
+ /* Read all three namespace IDs - fixed size, no parsing needed */
+ ret = read(pipefd[0], &u_id, sizeof(u_id));
+ if (ret != sizeof(u_id)) {
+ close(pipefd[0]);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read user namespace ID");
+ }
+
+ ret = read(pipefd[0], &n_id, sizeof(n_id));
+ if (ret != sizeof(n_id)) {
+ close(pipefd[0]);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read network namespace ID");
+ }
+
+ ret = read(pipefd[0], &ut_id, sizeof(ut_id));
+ close(pipefd[0]);
+ if (ret != sizeof(ut_id)) {
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read UTS namespace ID");
+ }
+
+ /* Construct file handles from namespace IDs */
+ u_handle = (struct file_handle *)u_buf;
+ u_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ u_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *u_fh = (struct nsfs_file_handle *)u_handle->f_handle;
+ u_fh->ns_id = u_id;
+ u_fh->ns_type = 0;
+ u_fh->ns_inum = 0;
+
+ n_handle = (struct file_handle *)n_buf;
+ n_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ n_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *n_fh = (struct nsfs_file_handle *)n_handle->f_handle;
+ n_fh->ns_id = n_id;
+ n_fh->ns_type = 0;
+ n_fh->ns_inum = 0;
+
+ ut_handle = (struct file_handle *)ut_buf;
+ ut_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ ut_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *ut_fh = (struct nsfs_file_handle *)ut_handle->f_handle;
+ ut_fh->ns_id = ut_id;
+ ut_fh->ns_type = 0;
+ ut_fh->ns_inum = 0;
+
+ /* Open both non-user namespaces before process exits */
+ int n_fd = open_by_handle_at(FD_NSFS_ROOT, n_handle, O_RDONLY);
+ int ut_fd = open_by_handle_at(FD_NSFS_ROOT, ut_handle, O_RDONLY);
+
+ if (n_fd < 0 || ut_fd < 0) {
+ if (n_fd >= 0) close(n_fd);
+ if (ut_fd >= 0) close(ut_fd);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to open namespaces");
+ }
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ /*
+ * Both network and UTS namespaces are active.
+ * User namespace should be active (gets 2 active refs).
+ */
+ TH_LOG("Both net and uts active - user namespace should be active");
+ int u_fd = open_by_handle_at(FD_NSFS_ROOT, u_handle, O_RDONLY);
+ ASSERT_GE(u_fd, 0);
+ close(u_fd);
+
+ /* Close network namespace - user namespace should STILL be active */
+ TH_LOG("Closing network ns - user ns should still be active (uts still active)");
+ close(n_fd);
+ u_fd = open_by_handle_at(FD_NSFS_ROOT, u_handle, O_RDONLY);
+ ASSERT_GE(u_fd, 0);
+ close(u_fd);
+
+ /* Close UTS namespace - user namespace should become inactive */
+ TH_LOG("Closing uts ns - user ns should become inactive");
+ close(ut_fd);
+ u_fd = open_by_handle_at(FD_NSFS_ROOT, u_handle, O_RDONLY);
+ ASSERT_LT(u_fd, 0);
+}
+
+/*
+ * Test hierarchical propagation with deep namespace hierarchy.
+ * Create: init_user_ns -> user_A -> user_B -> net_ns
+ * When net_ns is active, both user_A and user_B should be active.
+ * This verifies the conditional recursion in __ns_ref_active_put() works.
+ */
+TEST(ns_deep_hierarchy_propagation)
+{
+ struct file_handle *ua_handle, *ub_handle, *net_handle;
+ int ret, pipefd[2];
+ pid_t pid;
+ int status;
+ __u64 ua_id, ub_id, net_id;
+ char ua_buf[sizeof(*ua_handle) + MAX_HANDLE_SZ];
+ char ub_buf[sizeof(*ub_handle) + MAX_HANDLE_SZ];
+ char net_buf[sizeof(*net_handle) + MAX_HANDLE_SZ];
+
+ ASSERT_EQ(pipe(pipefd), 0);
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ close(pipefd[0]);
+
+ /* Create user_A -> user_B -> net hierarchy */
+ if (setup_userns() < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ int ua_fd = open("/proc/self/ns/user", O_RDONLY);
+ if (ua_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+ if (ioctl(ua_fd, NS_GET_ID, &ua_id) < 0) {
+ close(ua_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(ua_fd);
+
+ if (setup_userns() < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ int ub_fd = open("/proc/self/ns/user", O_RDONLY);
+ if (ub_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+ if (ioctl(ub_fd, NS_GET_ID, &ub_id) < 0) {
+ close(ub_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(ub_fd);
+
+ if (unshare(CLONE_NEWNET) < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ int net_fd = open("/proc/self/ns/net", O_RDONLY);
+ if (net_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+ if (ioctl(net_fd, NS_GET_ID, &net_id) < 0) {
+ close(net_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(net_fd);
+
+ /* Send all three namespace IDs */
+ write(pipefd[1], &ua_id, sizeof(ua_id));
+ write(pipefd[1], &ub_id, sizeof(ub_id));
+ write(pipefd[1], &net_id, sizeof(net_id));
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ close(pipefd[1]);
+
+ /* Read all three namespace IDs - fixed size, no parsing needed */
+ ret = read(pipefd[0], &ua_id, sizeof(ua_id));
+ if (ret != sizeof(ua_id)) {
+ close(pipefd[0]);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read user_A namespace ID");
+ }
+
+ ret = read(pipefd[0], &ub_id, sizeof(ub_id));
+ if (ret != sizeof(ub_id)) {
+ close(pipefd[0]);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read user_B namespace ID");
+ }
+
+ ret = read(pipefd[0], &net_id, sizeof(net_id));
+ close(pipefd[0]);
+ if (ret != sizeof(net_id)) {
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read network namespace ID");
+ }
+
+ /* Construct file handles from namespace IDs */
+ ua_handle = (struct file_handle *)ua_buf;
+ ua_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ ua_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *ua_fh = (struct nsfs_file_handle *)ua_handle->f_handle;
+ ua_fh->ns_id = ua_id;
+ ua_fh->ns_type = 0;
+ ua_fh->ns_inum = 0;
+
+ ub_handle = (struct file_handle *)ub_buf;
+ ub_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ ub_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *ub_fh = (struct nsfs_file_handle *)ub_handle->f_handle;
+ ub_fh->ns_id = ub_id;
+ ub_fh->ns_type = 0;
+ ub_fh->ns_inum = 0;
+
+ net_handle = (struct file_handle *)net_buf;
+ net_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ net_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *net_fh = (struct nsfs_file_handle *)net_handle->f_handle;
+ net_fh->ns_id = net_id;
+ net_fh->ns_type = 0;
+ net_fh->ns_inum = 0;
+
+ /* Open net_ns before child exits to keep it active */
+ int net_fd = open_by_handle_at(FD_NSFS_ROOT, net_handle, O_RDONLY);
+ if (net_fd < 0) {
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to open network namespace");
+ }
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ /* With net_ns active, both user_A and user_B should be active */
+ TH_LOG("Testing user_B active (net_ns active causes propagation)");
+ int ub_fd = open_by_handle_at(FD_NSFS_ROOT, ub_handle, O_RDONLY);
+ ASSERT_GE(ub_fd, 0);
+
+ TH_LOG("Testing user_A active (propagated through user_B)");
+ int ua_fd = open_by_handle_at(FD_NSFS_ROOT, ua_handle, O_RDONLY);
+ ASSERT_GE(ua_fd, 0);
+
+ /* Close net_ns - user_B should stay active (we hold direct ref) */
+ TH_LOG("Closing net_ns, user_B should remain active (direct ref held)");
+ close(net_fd);
+ int ub_fd2 = open_by_handle_at(FD_NSFS_ROOT, ub_handle, O_RDONLY);
+ ASSERT_GE(ub_fd2, 0);
+ close(ub_fd2);
+
+ /* Close user_B - user_A should stay active (we hold direct ref) */
+ TH_LOG("Closing user_B, user_A should remain active (direct ref held)");
+ close(ub_fd);
+ int ua_fd2 = open_by_handle_at(FD_NSFS_ROOT, ua_handle, O_RDONLY);
+ ASSERT_GE(ua_fd2, 0);
+ close(ua_fd2);
+
+ /* Close user_A - everything should become inactive */
+ TH_LOG("Closing user_A, all should become inactive");
+ close(ua_fd);
+
+ /* All should now be inactive */
+ ua_fd = open_by_handle_at(FD_NSFS_ROOT, ua_handle, O_RDONLY);
+ ASSERT_LT(ua_fd, 0);
+}
+
+/*
+ * Test that parent stays active as long as ANY child is active.
+ * Create parent user namespace with two child net namespaces.
+ * Parent should remain active until BOTH children are inactive.
+ */
+TEST(ns_parent_multiple_children_refcount)
+{
+ struct file_handle *parent_handle, *net1_handle, *net2_handle;
+ int ret, pipefd[2], syncpipe[2];
+ pid_t pid;
+ int status;
+ __u64 p_id, n1_id, n2_id;
+ char p_buf[sizeof(*parent_handle) + MAX_HANDLE_SZ];
+ char n1_buf[sizeof(*net1_handle) + MAX_HANDLE_SZ];
+ char n2_buf[sizeof(*net2_handle) + MAX_HANDLE_SZ];
+ char sync_byte;
+
+ ASSERT_EQ(pipe(pipefd), 0);
+ ASSERT_EQ(pipe(syncpipe), 0);
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ close(pipefd[0]);
+ close(syncpipe[1]);
+
+ /* Create parent user namespace */
+ if (setup_userns() < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ int p_fd = open("/proc/self/ns/user", O_RDONLY);
+ if (p_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+ if (ioctl(p_fd, NS_GET_ID, &p_id) < 0) {
+ close(p_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(p_fd);
+
+ /* Create first network namespace */
+ if (unshare(CLONE_NEWNET) < 0) {
+ close(pipefd[1]);
+ close(syncpipe[0]);
+ exit(1);
+ }
+
+ int n1_fd = open("/proc/self/ns/net", O_RDONLY);
+ if (n1_fd < 0) {
+ close(pipefd[1]);
+ close(syncpipe[0]);
+ exit(1);
+ }
+ if (ioctl(n1_fd, NS_GET_ID, &n1_id) < 0) {
+ close(n1_fd);
+ close(pipefd[1]);
+ close(syncpipe[0]);
+ exit(1);
+ }
+ /* Keep n1_fd open so first namespace stays active */
+
+ /* Create second network namespace */
+ if (unshare(CLONE_NEWNET) < 0) {
+ close(n1_fd);
+ close(pipefd[1]);
+ close(syncpipe[0]);
+ exit(1);
+ }
+
+ int n2_fd = open("/proc/self/ns/net", O_RDONLY);
+ if (n2_fd < 0) {
+ close(n1_fd);
+ close(pipefd[1]);
+ close(syncpipe[0]);
+ exit(1);
+ }
+ if (ioctl(n2_fd, NS_GET_ID, &n2_id) < 0) {
+ close(n1_fd);
+ close(n2_fd);
+ close(pipefd[1]);
+ close(syncpipe[0]);
+ exit(1);
+ }
+ /* Keep both n1_fd and n2_fd open */
+
+ /* Send all namespace IDs */
+ write(pipefd[1], &p_id, sizeof(p_id));
+ write(pipefd[1], &n1_id, sizeof(n1_id));
+ write(pipefd[1], &n2_id, sizeof(n2_id));
+ close(pipefd[1]);
+
+ /* Wait for parent to signal before exiting */
+ read(syncpipe[0], &sync_byte, 1);
+ close(syncpipe[0]);
+ exit(0);
+ }
+
+ close(pipefd[1]);
+ close(syncpipe[0]);
+
+ /* Read all three namespace IDs - fixed size, no parsing needed */
+ ret = read(pipefd[0], &p_id, sizeof(p_id));
+ if (ret != sizeof(p_id)) {
+ close(pipefd[0]);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read parent namespace ID");
+ }
+
+ ret = read(pipefd[0], &n1_id, sizeof(n1_id));
+ if (ret != sizeof(n1_id)) {
+ close(pipefd[0]);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read first network namespace ID");
+ }
+
+ ret = read(pipefd[0], &n2_id, sizeof(n2_id));
+ close(pipefd[0]);
+ if (ret != sizeof(n2_id)) {
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read second network namespace ID");
+ }
+
+ /* Construct file handles from namespace IDs */
+ parent_handle = (struct file_handle *)p_buf;
+ parent_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ parent_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *p_fh = (struct nsfs_file_handle *)parent_handle->f_handle;
+ p_fh->ns_id = p_id;
+ p_fh->ns_type = 0;
+ p_fh->ns_inum = 0;
+
+ net1_handle = (struct file_handle *)n1_buf;
+ net1_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ net1_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *n1_fh = (struct nsfs_file_handle *)net1_handle->f_handle;
+ n1_fh->ns_id = n1_id;
+ n1_fh->ns_type = 0;
+ n1_fh->ns_inum = 0;
+
+ net2_handle = (struct file_handle *)n2_buf;
+ net2_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ net2_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *n2_fh = (struct nsfs_file_handle *)net2_handle->f_handle;
+ n2_fh->ns_id = n2_id;
+ n2_fh->ns_type = 0;
+ n2_fh->ns_inum = 0;
+
+ /* Open both net namespaces while child is still alive */
+ int n1_fd = open_by_handle_at(FD_NSFS_ROOT, net1_handle, O_RDONLY);
+ int n2_fd = open_by_handle_at(FD_NSFS_ROOT, net2_handle, O_RDONLY);
+ if (n1_fd < 0 || n2_fd < 0) {
+ if (n1_fd >= 0) close(n1_fd);
+ if (n2_fd >= 0) close(n2_fd);
+ sync_byte = 'G';
+ write(syncpipe[1], &sync_byte, 1);
+ close(syncpipe[1]);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to open net namespaces");
+ }
+
+ /* Signal child that we have opened the namespaces */
+ sync_byte = 'G';
+ write(syncpipe[1], &sync_byte, 1);
+ close(syncpipe[1]);
+
+ /* Wait for child to exit */
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ /* Parent should be active (has 2 active children) */
+ TH_LOG("Both net namespaces active - parent should be active");
+ int p_fd = open_by_handle_at(FD_NSFS_ROOT, parent_handle, O_RDONLY);
+ ASSERT_GE(p_fd, 0);
+ close(p_fd);
+
+ /* Close first net namespace - parent should STILL be active */
+ TH_LOG("Closing first net ns - parent should still be active");
+ close(n1_fd);
+ p_fd = open_by_handle_at(FD_NSFS_ROOT, parent_handle, O_RDONLY);
+ ASSERT_GE(p_fd, 0);
+ close(p_fd);
+
+ /* Close second net namespace - parent should become inactive */
+ TH_LOG("Closing second net ns - parent should become inactive");
+ close(n2_fd);
+ p_fd = open_by_handle_at(FD_NSFS_ROOT, parent_handle, O_RDONLY);
+ ASSERT_LT(p_fd, 0);
+}
+
+/*
+ * Test that user namespace as a child also propagates correctly.
+ * Create user_A -> user_B, verify when user_B is active that user_A
+ * is also active. This is different from non-user namespace children.
+ */
+TEST(ns_userns_child_propagation)
+{
+ struct file_handle *ua_handle, *ub_handle;
+ int ret, pipefd[2];
+ pid_t pid;
+ int status;
+ __u64 ua_id, ub_id;
+ char ua_buf[sizeof(*ua_handle) + MAX_HANDLE_SZ];
+ char ub_buf[sizeof(*ub_handle) + MAX_HANDLE_SZ];
+
+ ASSERT_EQ(pipe(pipefd), 0);
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ close(pipefd[0]);
+
+ /* Create user_A */
+ if (setup_userns() < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ int ua_fd = open("/proc/self/ns/user", O_RDONLY);
+ if (ua_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+ if (ioctl(ua_fd, NS_GET_ID, &ua_id) < 0) {
+ close(ua_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(ua_fd);
+
+ /* Create user_B (child of user_A) */
+ if (setup_userns() < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ int ub_fd = open("/proc/self/ns/user", O_RDONLY);
+ if (ub_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+ if (ioctl(ub_fd, NS_GET_ID, &ub_id) < 0) {
+ close(ub_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(ub_fd);
+
+ /* Send both namespace IDs */
+ write(pipefd[1], &ua_id, sizeof(ua_id));
+ write(pipefd[1], &ub_id, sizeof(ub_id));
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ close(pipefd[1]);
+
+ /* Read both namespace IDs - fixed size, no parsing needed */
+ ret = read(pipefd[0], &ua_id, sizeof(ua_id));
+ if (ret != sizeof(ua_id)) {
+ close(pipefd[0]);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read user_A namespace ID");
+ }
+
+ ret = read(pipefd[0], &ub_id, sizeof(ub_id));
+ close(pipefd[0]);
+ if (ret != sizeof(ub_id)) {
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read user_B namespace ID");
+ }
+
+ /* Construct file handles from namespace IDs */
+ ua_handle = (struct file_handle *)ua_buf;
+ ua_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ ua_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *ua_fh = (struct nsfs_file_handle *)ua_handle->f_handle;
+ ua_fh->ns_id = ua_id;
+ ua_fh->ns_type = 0;
+ ua_fh->ns_inum = 0;
+
+ ub_handle = (struct file_handle *)ub_buf;
+ ub_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ ub_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *ub_fh = (struct nsfs_file_handle *)ub_handle->f_handle;
+ ub_fh->ns_id = ub_id;
+ ub_fh->ns_type = 0;
+ ub_fh->ns_inum = 0;
+
+ /* Open user_B before child exits */
+ int ub_fd = open_by_handle_at(FD_NSFS_ROOT, ub_handle, O_RDONLY);
+ if (ub_fd < 0) {
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to open user_B");
+ }
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ /* With user_B active, user_A should also be active */
+ TH_LOG("Testing user_A active when child user_B is active");
+ int ua_fd = open_by_handle_at(FD_NSFS_ROOT, ua_handle, O_RDONLY);
+ ASSERT_GE(ua_fd, 0);
+
+ /* Close user_B */
+ TH_LOG("Closing user_B");
+ close(ub_fd);
+
+ /* user_A should remain active (we hold direct ref) */
+ int ua_fd2 = open_by_handle_at(FD_NSFS_ROOT, ua_handle, O_RDONLY);
+ ASSERT_GE(ua_fd2, 0);
+ close(ua_fd2);
+
+ /* Close user_A - should become inactive */
+ TH_LOG("Closing user_A - should become inactive");
+ close(ua_fd);
+
+ ua_fd = open_by_handle_at(FD_NSFS_ROOT, ua_handle, O_RDONLY);
+ ASSERT_LT(ua_fd, 0);
+}
+
+/*
+ * Test different namespace types (net, uts, ipc) all contributing
+ * active references to the same owning user namespace.
+ */
+TEST(ns_mixed_types_same_owner)
+{
+ struct file_handle *user_handle, *net_handle, *uts_handle;
+ int ret, pipefd[2];
+ pid_t pid;
+ int status;
+ __u64 u_id, n_id, ut_id;
+ char u_buf[sizeof(*user_handle) + MAX_HANDLE_SZ];
+ char n_buf[sizeof(*net_handle) + MAX_HANDLE_SZ];
+ char ut_buf[sizeof(*uts_handle) + MAX_HANDLE_SZ];
+
+ ASSERT_EQ(pipe(pipefd), 0);
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ close(pipefd[0]);
+
+ if (setup_userns() < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ int u_fd = open("/proc/self/ns/user", O_RDONLY);
+ if (u_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+ if (ioctl(u_fd, NS_GET_ID, &u_id) < 0) {
+ close(u_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(u_fd);
+
+ if (unshare(CLONE_NEWNET) < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ int n_fd = open("/proc/self/ns/net", O_RDONLY);
+ if (n_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+ if (ioctl(n_fd, NS_GET_ID, &n_id) < 0) {
+ close(n_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(n_fd);
+
+ if (unshare(CLONE_NEWUTS) < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ int ut_fd = open("/proc/self/ns/uts", O_RDONLY);
+ if (ut_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+ if (ioctl(ut_fd, NS_GET_ID, &ut_id) < 0) {
+ close(ut_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(ut_fd);
+
+ /* Send all namespace IDs */
+ write(pipefd[1], &u_id, sizeof(u_id));
+ write(pipefd[1], &n_id, sizeof(n_id));
+ write(pipefd[1], &ut_id, sizeof(ut_id));
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ close(pipefd[1]);
+
+ /* Read all three namespace IDs - fixed size, no parsing needed */
+ ret = read(pipefd[0], &u_id, sizeof(u_id));
+ if (ret != sizeof(u_id)) {
+ close(pipefd[0]);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read user namespace ID");
+ }
+
+ ret = read(pipefd[0], &n_id, sizeof(n_id));
+ if (ret != sizeof(n_id)) {
+ close(pipefd[0]);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read network namespace ID");
+ }
+
+ ret = read(pipefd[0], &ut_id, sizeof(ut_id));
+ close(pipefd[0]);
+ if (ret != sizeof(ut_id)) {
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read UTS namespace ID");
+ }
+
+ /* Construct file handles from namespace IDs */
+ user_handle = (struct file_handle *)u_buf;
+ user_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ user_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *u_fh = (struct nsfs_file_handle *)user_handle->f_handle;
+ u_fh->ns_id = u_id;
+ u_fh->ns_type = 0;
+ u_fh->ns_inum = 0;
+
+ net_handle = (struct file_handle *)n_buf;
+ net_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ net_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *n_fh = (struct nsfs_file_handle *)net_handle->f_handle;
+ n_fh->ns_id = n_id;
+ n_fh->ns_type = 0;
+ n_fh->ns_inum = 0;
+
+ uts_handle = (struct file_handle *)ut_buf;
+ uts_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ uts_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *ut_fh = (struct nsfs_file_handle *)uts_handle->f_handle;
+ ut_fh->ns_id = ut_id;
+ ut_fh->ns_type = 0;
+ ut_fh->ns_inum = 0;
+
+ /* Open both non-user namespaces */
+ int n_fd = open_by_handle_at(FD_NSFS_ROOT, net_handle, O_RDONLY);
+ int ut_fd = open_by_handle_at(FD_NSFS_ROOT, uts_handle, O_RDONLY);
+ if (n_fd < 0 || ut_fd < 0) {
+ if (n_fd >= 0) close(n_fd);
+ if (ut_fd >= 0) close(ut_fd);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to open namespaces");
+ }
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ /* User namespace should be active (2 active children) */
+ TH_LOG("Both net and uts active - user ns should be active");
+ int u_fd = open_by_handle_at(FD_NSFS_ROOT, user_handle, O_RDONLY);
+ ASSERT_GE(u_fd, 0);
+ close(u_fd);
+
+ /* Close net - user ns should STILL be active (uts still active) */
+ TH_LOG("Closing net - user ns should still be active");
+ close(n_fd);
+ u_fd = open_by_handle_at(FD_NSFS_ROOT, user_handle, O_RDONLY);
+ ASSERT_GE(u_fd, 0);
+ close(u_fd);
+
+ /* Close uts - user ns should become inactive */
+ TH_LOG("Closing uts - user ns should become inactive");
+ close(ut_fd);
+ u_fd = open_by_handle_at(FD_NSFS_ROOT, user_handle, O_RDONLY);
+ ASSERT_LT(u_fd, 0);
+}
+
+/* Thread test helpers and structures */
+struct thread_ns_info {
+ __u64 ns_id;
+ int pipefd;
+ int syncfd_read;
+ int syncfd_write;
+ int exit_code;
+};
+
+static void *thread_create_namespace(void *arg)
+{
+ struct thread_ns_info *info = (struct thread_ns_info *)arg;
+ int ret;
+
+ /* Create new network namespace */
+ ret = unshare(CLONE_NEWNET);
+ if (ret < 0) {
+ info->exit_code = 1;
+ return NULL;
+ }
+
+ /* Get namespace ID */
+ int fd = open("/proc/thread-self/ns/net", O_RDONLY);
+ if (fd < 0) {
+ info->exit_code = 2;
+ return NULL;
+ }
+
+ ret = ioctl(fd, NS_GET_ID, &info->ns_id);
+ close(fd);
+ if (ret < 0) {
+ info->exit_code = 3;
+ return NULL;
+ }
+
+ /* Send namespace ID to main thread */
+ if (write(info->pipefd, &info->ns_id, sizeof(info->ns_id)) != sizeof(info->ns_id)) {
+ info->exit_code = 4;
+ return NULL;
+ }
+
+ /* Wait for signal to exit */
+ char sync_byte;
+ if (read(info->syncfd_read, &sync_byte, 1) != 1) {
+ info->exit_code = 5;
+ return NULL;
+ }
+
+ info->exit_code = 0;
+ return NULL;
+}
+
+/*
+ * Test that namespace becomes inactive after thread exits.
+ * This verifies active reference counting works with threads, not just processes.
+ */
+TEST(thread_ns_inactive_after_exit)
+{
+ pthread_t thread;
+ struct thread_ns_info info;
+ struct file_handle *handle;
+ int pipefd[2];
+ int syncpipe[2];
+ int ret;
+ char sync_byte;
+ char buf[sizeof(*handle) + MAX_HANDLE_SZ];
+
+ ASSERT_EQ(pipe(pipefd), 0);
+ ASSERT_EQ(pipe(syncpipe), 0);
+
+ info.pipefd = pipefd[1];
+ info.syncfd_read = syncpipe[0];
+ info.syncfd_write = -1;
+ info.exit_code = -1;
+
+ /* Create thread that will create a namespace */
+ ret = pthread_create(&thread, NULL, thread_create_namespace, &info);
+ ASSERT_EQ(ret, 0);
+
+ /* Read namespace ID from thread */
+ __u64 ns_id;
+ ret = read(pipefd[0], &ns_id, sizeof(ns_id));
+ if (ret != sizeof(ns_id)) {
+ sync_byte = 'X';
+ write(syncpipe[1], &sync_byte, 1);
+ pthread_join(thread, NULL);
+ close(pipefd[0]);
+ close(pipefd[1]);
+ close(syncpipe[0]);
+ close(syncpipe[1]);
+ SKIP(return, "Failed to read namespace ID from thread");
+ }
+
+ TH_LOG("Thread created namespace with ID %llu", (unsigned long long)ns_id);
+
+ /* Construct file handle */
+ handle = (struct file_handle *)buf;
+ handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *fh = (struct nsfs_file_handle *)handle->f_handle;
+ fh->ns_id = ns_id;
+ fh->ns_type = 0;
+ fh->ns_inum = 0;
+
+ /* Namespace should be active while thread is alive */
+ TH_LOG("Attempting to open namespace while thread is alive (should succeed)");
+ int nsfd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ ASSERT_GE(nsfd, 0);
+ close(nsfd);
+
+ /* Signal thread to exit */
+ TH_LOG("Signaling thread to exit");
+ sync_byte = 'X';
+ ASSERT_EQ(write(syncpipe[1], &sync_byte, 1), 1);
+ close(syncpipe[1]);
+
+ /* Wait for thread to exit */
+ ASSERT_EQ(pthread_join(thread, NULL), 0);
+ close(pipefd[0]);
+ close(pipefd[1]);
+ close(syncpipe[0]);
+
+ if (info.exit_code != 0)
+ SKIP(return, "Thread failed to create namespace");
+
+ TH_LOG("Thread exited, namespace should be inactive");
+
+ /* Namespace should now be inactive */
+ nsfd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ ASSERT_LT(nsfd, 0);
+ /* Should fail with ENOENT (inactive) or ESTALE (gone) */
+ TH_LOG("Namespace inactive as expected: %s (errno=%d)", strerror(errno), errno);
+ ASSERT_TRUE(errno == ENOENT || errno == ESTALE);
+}
+
+/*
+ * Test that a namespace remains active while a thread holds an fd to it.
+ * Even after the thread exits, the namespace should remain active as long as
+ * another thread holds a file descriptor to it.
+ */
+TEST(thread_ns_fd_keeps_active)
+{
+ pthread_t thread;
+ struct thread_ns_info info;
+ struct file_handle *handle;
+ int pipefd[2];
+ int syncpipe[2];
+ int ret;
+ char sync_byte;
+ char buf[sizeof(*handle) + MAX_HANDLE_SZ];
+
+ ASSERT_EQ(pipe(pipefd), 0);
+ ASSERT_EQ(pipe(syncpipe), 0);
+
+ info.pipefd = pipefd[1];
+ info.syncfd_read = syncpipe[0];
+ info.syncfd_write = -1;
+ info.exit_code = -1;
+
+ /* Create thread that will create a namespace */
+ ret = pthread_create(&thread, NULL, thread_create_namespace, &info);
+ ASSERT_EQ(ret, 0);
+
+ /* Read namespace ID from thread */
+ __u64 ns_id;
+ ret = read(pipefd[0], &ns_id, sizeof(ns_id));
+ if (ret != sizeof(ns_id)) {
+ sync_byte = 'X';
+ write(syncpipe[1], &sync_byte, 1);
+ pthread_join(thread, NULL);
+ close(pipefd[0]);
+ close(pipefd[1]);
+ close(syncpipe[0]);
+ close(syncpipe[1]);
+ SKIP(return, "Failed to read namespace ID from thread");
+ }
+
+ TH_LOG("Thread created namespace with ID %llu", (unsigned long long)ns_id);
+
+ /* Construct file handle */
+ handle = (struct file_handle *)buf;
+ handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *fh = (struct nsfs_file_handle *)handle->f_handle;
+ fh->ns_id = ns_id;
+ fh->ns_type = 0;
+ fh->ns_inum = 0;
+
+ /* Open namespace while thread is alive */
+ TH_LOG("Opening namespace while thread is alive");
+ int nsfd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ ASSERT_GE(nsfd, 0);
+
+ /* Signal thread to exit */
+ TH_LOG("Signaling thread to exit");
+ sync_byte = 'X';
+ write(syncpipe[1], &sync_byte, 1);
+ close(syncpipe[1]);
+
+ /* Wait for thread to exit */
+ pthread_join(thread, NULL);
+ close(pipefd[0]);
+ close(pipefd[1]);
+ close(syncpipe[0]);
+
+ if (info.exit_code != 0) {
+ close(nsfd);
+ SKIP(return, "Thread failed to create namespace");
+ }
+
+ TH_LOG("Thread exited, but main thread holds fd - namespace should remain active");
+
+ /* Namespace should still be active because we hold an fd */
+ int nsfd2 = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ ASSERT_GE(nsfd2, 0);
+
+ /* Verify it's the same namespace */
+ struct stat st1, st2;
+ ASSERT_EQ(fstat(nsfd, &st1), 0);
+ ASSERT_EQ(fstat(nsfd2, &st2), 0);
+ ASSERT_EQ(st1.st_ino, st2.st_ino);
+ close(nsfd2);
+
+ TH_LOG("Closing fd - namespace should become inactive");
+ close(nsfd);
+
+ /* Now namespace should be inactive */
+ nsfd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ ASSERT_LT(nsfd, 0);
+ /* Should fail with ENOENT (inactive) or ESTALE (gone) */
+ TH_LOG("Namespace inactive as expected: %s (errno=%d)", strerror(errno), errno);
+ ASSERT_TRUE(errno == ENOENT || errno == ESTALE);
+}
+
+/* Structure for thread data in subprocess */
+struct thread_sleep_data {
+ int syncfd_read;
+};
+
+static void *thread_sleep_and_wait(void *arg)
+{
+ struct thread_sleep_data *data = (struct thread_sleep_data *)arg;
+ char sync_byte;
+
+ /* Wait for signal to exit - read will unblock when pipe is closed */
+ (void)read(data->syncfd_read, &sync_byte, 1);
+ return NULL;
+}
+
+/*
+ * Test that namespaces become inactive after subprocess with multiple threads exits.
+ * Create a subprocess that unshares user and network namespaces, then creates two
+ * threads that share those namespaces. Verify that after all threads and subprocess
+ * exit, the namespaces are no longer listed by listns() and cannot be opened by
+ * open_by_handle_at().
+ */
+TEST(thread_subprocess_ns_inactive_after_all_exit)
+{
+ int pipefd[2];
+ int sv[2];
+ pid_t pid;
+ int status;
+ __u64 user_id, net_id;
+ struct file_handle *user_handle, *net_handle;
+ char user_buf[sizeof(*user_handle) + MAX_HANDLE_SZ];
+ char net_buf[sizeof(*net_handle) + MAX_HANDLE_SZ];
+ char sync_byte;
+ int ret;
+
+ ASSERT_EQ(pipe(pipefd), 0);
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ close(pipefd[0]);
+ close(sv[0]);
+
+ /* Create user namespace with mappings */
+ if (setup_userns() < 0) {
+ fprintf(stderr, "Child: setup_userns() failed: %s\n", strerror(errno));
+ close(pipefd[1]);
+ close(sv[1]);
+ exit(1);
+ }
+ fprintf(stderr, "Child: setup_userns() succeeded\n");
+
+ /* Get user namespace ID */
+ int user_fd = open("/proc/self/ns/user", O_RDONLY);
+ if (user_fd < 0) {
+ fprintf(stderr, "Child: open(/proc/self/ns/user) failed: %s\n", strerror(errno));
+ close(pipefd[1]);
+ close(sv[1]);
+ exit(1);
+ }
+
+ if (ioctl(user_fd, NS_GET_ID, &user_id) < 0) {
+ fprintf(stderr, "Child: ioctl(NS_GET_ID) for user ns failed: %s\n", strerror(errno));
+ close(user_fd);
+ close(pipefd[1]);
+ close(sv[1]);
+ exit(1);
+ }
+ close(user_fd);
+ fprintf(stderr, "Child: user ns ID = %llu\n", (unsigned long long)user_id);
+
+ /* Unshare network namespace */
+ if (unshare(CLONE_NEWNET) < 0) {
+ fprintf(stderr, "Child: unshare(CLONE_NEWNET) failed: %s\n", strerror(errno));
+ close(pipefd[1]);
+ close(sv[1]);
+ exit(1);
+ }
+ fprintf(stderr, "Child: unshare(CLONE_NEWNET) succeeded\n");
+
+ /* Get network namespace ID */
+ int net_fd = open("/proc/self/ns/net", O_RDONLY);
+ if (net_fd < 0) {
+ fprintf(stderr, "Child: open(/proc/self/ns/net) failed: %s\n", strerror(errno));
+ close(pipefd[1]);
+ close(sv[1]);
+ exit(1);
+ }
+
+ if (ioctl(net_fd, NS_GET_ID, &net_id) < 0) {
+ fprintf(stderr, "Child: ioctl(NS_GET_ID) for net ns failed: %s\n", strerror(errno));
+ close(net_fd);
+ close(pipefd[1]);
+ close(sv[1]);
+ exit(1);
+ }
+ close(net_fd);
+ fprintf(stderr, "Child: net ns ID = %llu\n", (unsigned long long)net_id);
+
+ /* Send namespace IDs to parent */
+ if (write(pipefd[1], &user_id, sizeof(user_id)) != sizeof(user_id)) {
+ fprintf(stderr, "Child: write(user_id) failed: %s\n", strerror(errno));
+ exit(1);
+ }
+ if (write(pipefd[1], &net_id, sizeof(net_id)) != sizeof(net_id)) {
+ fprintf(stderr, "Child: write(net_id) failed: %s\n", strerror(errno));
+ exit(1);
+ }
+ close(pipefd[1]);
+ fprintf(stderr, "Child: sent namespace IDs to parent\n");
+
+ /* Create two threads that share the namespaces */
+ pthread_t thread1, thread2;
+ struct thread_sleep_data data;
+ data.syncfd_read = sv[1];
+
+ int ret_thread = pthread_create(&thread1, NULL, thread_sleep_and_wait, &data);
+ if (ret_thread != 0) {
+ fprintf(stderr, "Child: pthread_create(thread1) failed: %s\n", strerror(ret_thread));
+ close(sv[1]);
+ exit(1);
+ }
+ fprintf(stderr, "Child: created thread1\n");
+
+ ret_thread = pthread_create(&thread2, NULL, thread_sleep_and_wait, &data);
+ if (ret_thread != 0) {
+ fprintf(stderr, "Child: pthread_create(thread2) failed: %s\n", strerror(ret_thread));
+ close(sv[1]);
+ pthread_cancel(thread1);
+ exit(1);
+ }
+ fprintf(stderr, "Child: created thread2\n");
+
+ /* Wait for threads to complete - they will unblock when parent writes */
+ fprintf(stderr, "Child: waiting for threads to exit\n");
+ pthread_join(thread1, NULL);
+ fprintf(stderr, "Child: thread1 exited\n");
+ pthread_join(thread2, NULL);
+ fprintf(stderr, "Child: thread2 exited\n");
+
+ close(sv[1]);
+
+ /* Exit - namespaces should become inactive */
+ fprintf(stderr, "Child: all threads joined, exiting with success\n");
+ exit(0);
+ }
+
+ /* Parent process */
+ close(pipefd[1]);
+ close(sv[1]);
+
+ TH_LOG("Parent: waiting to read namespace IDs from child");
+
+ /* Read namespace IDs from child */
+ ret = read(pipefd[0], &user_id, sizeof(user_id));
+ if (ret != sizeof(user_id)) {
+ TH_LOG("Parent: failed to read user_id, ret=%d, errno=%s", ret, strerror(errno));
+ close(pipefd[0]);
+ sync_byte = 'X';
+ (void)write(sv[0], &sync_byte, 1);
+ close(sv[0]);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read user namespace ID from child");
+ }
+
+ ret = read(pipefd[0], &net_id, sizeof(net_id));
+ close(pipefd[0]);
+ if (ret != sizeof(net_id)) {
+ TH_LOG("Parent: failed to read net_id, ret=%d, errno=%s", ret, strerror(errno));
+ sync_byte = 'X';
+ (void)write(sv[0], &sync_byte, 1);
+ close(sv[0]);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read network namespace ID from child");
+ }
+
+ TH_LOG("Child created user ns %llu and net ns %llu with 2 threads",
+ (unsigned long long)user_id, (unsigned long long)net_id);
+
+ /* Construct file handles */
+ user_handle = (struct file_handle *)user_buf;
+ user_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ user_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *user_fh = (struct nsfs_file_handle *)user_handle->f_handle;
+ user_fh->ns_id = user_id;
+ user_fh->ns_type = 0;
+ user_fh->ns_inum = 0;
+
+ net_handle = (struct file_handle *)net_buf;
+ net_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ net_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *net_fh = (struct nsfs_file_handle *)net_handle->f_handle;
+ net_fh->ns_id = net_id;
+ net_fh->ns_type = 0;
+ net_fh->ns_inum = 0;
+
+ /* Verify namespaces are active while subprocess and threads are alive */
+ TH_LOG("Verifying namespaces are active while subprocess with threads is running");
+ int user_fd = open_by_handle_at(FD_NSFS_ROOT, user_handle, O_RDONLY);
+ ASSERT_GE(user_fd, 0);
+
+ int net_fd = open_by_handle_at(FD_NSFS_ROOT, net_handle, O_RDONLY);
+ ASSERT_GE(net_fd, 0);
+
+ close(user_fd);
+ close(net_fd);
+
+ /* Also verify they appear in listns() */
+ TH_LOG("Verifying namespaces appear in listns() while active");
+ struct ns_id_req req = {
+ .size = sizeof(struct ns_id_req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = CLONE_NEWUSER,
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ __u64 ns_ids[256];
+ int nr_ids = sys_listns(&req, ns_ids, 256, 0);
+ if (nr_ids < 0) {
+ TH_LOG("listns() not available, skipping listns verification");
+ } else {
+ /* Check if user_id is in the list */
+ int found_user = 0;
+ for (int i = 0; i < nr_ids; i++) {
+ if (ns_ids[i] == user_id) {
+ found_user = 1;
+ break;
+ }
+ }
+ ASSERT_TRUE(found_user);
+ TH_LOG("User namespace found in listns() as expected");
+
+ /* Check network namespace */
+ req.ns_type = CLONE_NEWNET;
+ nr_ids = sys_listns(&req, ns_ids, 256, 0);
+ if (nr_ids >= 0) {
+ int found_net = 0;
+ for (int i = 0; i < nr_ids; i++) {
+ if (ns_ids[i] == net_id) {
+ found_net = 1;
+ break;
+ }
+ }
+ ASSERT_TRUE(found_net);
+ TH_LOG("Network namespace found in listns() as expected");
+ }
+ }
+
+ /* Signal threads to exit */
+ TH_LOG("Signaling threads to exit");
+ sync_byte = 'X';
+ /* Write two bytes - one for each thread */
+ ASSERT_EQ(write(sv[0], &sync_byte, 1), 1);
+ ASSERT_EQ(write(sv[0], &sync_byte, 1), 1);
+ close(sv[0]);
+
+ /* Wait for child process to exit */
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ if (WEXITSTATUS(status) != 0) {
+ TH_LOG("Child process failed with exit code %d", WEXITSTATUS(status));
+ SKIP(return, "Child process failed");
+ }
+
+ TH_LOG("Subprocess and all threads have exited successfully");
+
+ /* Verify namespaces are now inactive - open_by_handle_at should fail */
+ TH_LOG("Verifying namespaces are inactive after subprocess and threads exit");
+ user_fd = open_by_handle_at(FD_NSFS_ROOT, user_handle, O_RDONLY);
+ ASSERT_LT(user_fd, 0);
+ TH_LOG("User namespace inactive as expected: %s (errno=%d)",
+ strerror(errno), errno);
+ ASSERT_TRUE(errno == ENOENT || errno == ESTALE);
+
+ net_fd = open_by_handle_at(FD_NSFS_ROOT, net_handle, O_RDONLY);
+ ASSERT_LT(net_fd, 0);
+ TH_LOG("Network namespace inactive as expected: %s (errno=%d)",
+ strerror(errno), errno);
+ ASSERT_TRUE(errno == ENOENT || errno == ESTALE);
+
+ /* Verify namespaces do NOT appear in listns() */
+ TH_LOG("Verifying namespaces do NOT appear in listns() when inactive");
+ memset(&req, 0, sizeof(req));
+ req.size = sizeof(struct ns_id_req);
+ req.ns_type = CLONE_NEWUSER;
+ nr_ids = sys_listns(&req, ns_ids, 256, 0);
+ if (nr_ids >= 0) {
+ int found_user = 0;
+ for (int i = 0; i < nr_ids; i++) {
+ if (ns_ids[i] == user_id) {
+ found_user = 1;
+ break;
+ }
+ }
+ ASSERT_FALSE(found_user);
+ TH_LOG("User namespace correctly not listed in listns()");
+
+ /* Check network namespace */
+ req.ns_type = CLONE_NEWNET;
+ nr_ids = sys_listns(&req, ns_ids, 256, 0);
+ if (nr_ids >= 0) {
+ int found_net = 0;
+ for (int i = 0; i < nr_ids; i++) {
+ if (ns_ids[i] == net_id) {
+ found_net = 1;
+ break;
+ }
+ }
+ ASSERT_FALSE(found_net);
+ TH_LOG("Network namespace correctly not listed in listns()");
+ }
+ }
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/namespaces/nsid_test.c b/tools/testing/selftests/namespaces/nsid_test.c
index e28accd74a57..527ade0a8673 100644
--- a/tools/testing/selftests/namespaces/nsid_test.c
+++ b/tools/testing/selftests/namespaces/nsid_test.c
@@ -6,6 +6,7 @@
#include <libgen.h>
#include <limits.h>
#include <pthread.h>
+#include <signal.h>
#include <string.h>
#include <sys/mount.h>
#include <poll.h>
@@ -14,12 +15,30 @@
#include <sys/stat.h>
#include <sys/socket.h>
#include <sys/un.h>
+#include <sys/wait.h>
#include <unistd.h>
#include <linux/fs.h>
#include <linux/limits.h>
#include <linux/nsfs.h>
#include "../kselftest_harness.h"
+/* Fixture for tests that create child processes */
+FIXTURE(nsid) {
+ pid_t child_pid;
+};
+
+FIXTURE_SETUP(nsid) {
+ self->child_pid = 0;
+}
+
+FIXTURE_TEARDOWN(nsid) {
+ /* Clean up any child process that may still be running */
+ if (self->child_pid > 0) {
+ kill(self->child_pid, SIGKILL);
+ waitpid(self->child_pid, NULL, 0);
+ }
+}
+
TEST(nsid_mntns_basic)
{
__u64 mnt_ns_id = 0;
@@ -44,7 +63,7 @@ TEST(nsid_mntns_basic)
close(fd_mntns);
}
-TEST(nsid_mntns_separate)
+TEST_F(nsid, mntns_separate)
{
__u64 parent_mnt_ns_id = 0;
__u64 child_mnt_ns_id = 0;
@@ -90,6 +109,9 @@ TEST(nsid_mntns_separate)
_exit(0);
}
+ /* Track child for cleanup */
+ self->child_pid = pid;
+
/* Parent process */
close(pipefd[1]);
@@ -99,8 +121,6 @@ TEST(nsid_mntns_separate)
if (buf == 'S') {
/* Child couldn't create namespace, skip test */
- kill(pid, SIGTERM);
- waitpid(pid, NULL, 0);
close(fd_parent_mntns);
SKIP(return, "No permission to create mount namespace");
}
@@ -123,10 +143,6 @@ TEST(nsid_mntns_separate)
close(fd_parent_mntns);
close(fd_child_mntns);
-
- /* Clean up child process */
- kill(pid, SIGTERM);
- waitpid(pid, NULL, 0);
}
TEST(nsid_cgroupns_basic)
@@ -153,7 +169,7 @@ TEST(nsid_cgroupns_basic)
close(fd_cgroupns);
}
-TEST(nsid_cgroupns_separate)
+TEST_F(nsid, cgroupns_separate)
{
__u64 parent_cgroup_ns_id = 0;
__u64 child_cgroup_ns_id = 0;
@@ -199,6 +215,9 @@ TEST(nsid_cgroupns_separate)
_exit(0);
}
+ /* Track child for cleanup */
+ self->child_pid = pid;
+
/* Parent process */
close(pipefd[1]);
@@ -208,8 +227,6 @@ TEST(nsid_cgroupns_separate)
if (buf == 'S') {
/* Child couldn't create namespace, skip test */
- kill(pid, SIGTERM);
- waitpid(pid, NULL, 0);
close(fd_parent_cgroupns);
SKIP(return, "No permission to create cgroup namespace");
}
@@ -232,10 +249,6 @@ TEST(nsid_cgroupns_separate)
close(fd_parent_cgroupns);
close(fd_child_cgroupns);
-
- /* Clean up child process */
- kill(pid, SIGTERM);
- waitpid(pid, NULL, 0);
}
TEST(nsid_ipcns_basic)
@@ -262,7 +275,7 @@ TEST(nsid_ipcns_basic)
close(fd_ipcns);
}
-TEST(nsid_ipcns_separate)
+TEST_F(nsid, ipcns_separate)
{
__u64 parent_ipc_ns_id = 0;
__u64 child_ipc_ns_id = 0;
@@ -308,6 +321,9 @@ TEST(nsid_ipcns_separate)
_exit(0);
}
+ /* Track child for cleanup */
+ self->child_pid = pid;
+
/* Parent process */
close(pipefd[1]);
@@ -317,8 +333,6 @@ TEST(nsid_ipcns_separate)
if (buf == 'S') {
/* Child couldn't create namespace, skip test */
- kill(pid, SIGTERM);
- waitpid(pid, NULL, 0);
close(fd_parent_ipcns);
SKIP(return, "No permission to create IPC namespace");
}
@@ -341,10 +355,6 @@ TEST(nsid_ipcns_separate)
close(fd_parent_ipcns);
close(fd_child_ipcns);
-
- /* Clean up child process */
- kill(pid, SIGTERM);
- waitpid(pid, NULL, 0);
}
TEST(nsid_utsns_basic)
@@ -371,7 +381,7 @@ TEST(nsid_utsns_basic)
close(fd_utsns);
}
-TEST(nsid_utsns_separate)
+TEST_F(nsid, utsns_separate)
{
__u64 parent_uts_ns_id = 0;
__u64 child_uts_ns_id = 0;
@@ -417,6 +427,9 @@ TEST(nsid_utsns_separate)
_exit(0);
}
+ /* Track child for cleanup */
+ self->child_pid = pid;
+
/* Parent process */
close(pipefd[1]);
@@ -426,8 +439,6 @@ TEST(nsid_utsns_separate)
if (buf == 'S') {
/* Child couldn't create namespace, skip test */
- kill(pid, SIGTERM);
- waitpid(pid, NULL, 0);
close(fd_parent_utsns);
SKIP(return, "No permission to create UTS namespace");
}
@@ -450,10 +461,6 @@ TEST(nsid_utsns_separate)
close(fd_parent_utsns);
close(fd_child_utsns);
-
- /* Clean up child process */
- kill(pid, SIGTERM);
- waitpid(pid, NULL, 0);
}
TEST(nsid_userns_basic)
@@ -480,7 +487,7 @@ TEST(nsid_userns_basic)
close(fd_userns);
}
-TEST(nsid_userns_separate)
+TEST_F(nsid, userns_separate)
{
__u64 parent_user_ns_id = 0;
__u64 child_user_ns_id = 0;
@@ -526,6 +533,9 @@ TEST(nsid_userns_separate)
_exit(0);
}
+ /* Track child for cleanup */
+ self->child_pid = pid;
+
/* Parent process */
close(pipefd[1]);
@@ -535,8 +545,6 @@ TEST(nsid_userns_separate)
if (buf == 'S') {
/* Child couldn't create namespace, skip test */
- kill(pid, SIGTERM);
- waitpid(pid, NULL, 0);
close(fd_parent_userns);
SKIP(return, "No permission to create user namespace");
}
@@ -559,10 +567,6 @@ TEST(nsid_userns_separate)
close(fd_parent_userns);
close(fd_child_userns);
-
- /* Clean up child process */
- kill(pid, SIGTERM);
- waitpid(pid, NULL, 0);
}
TEST(nsid_timens_basic)
@@ -591,7 +595,7 @@ TEST(nsid_timens_basic)
close(fd_timens);
}
-TEST(nsid_timens_separate)
+TEST_F(nsid, timens_separate)
{
__u64 parent_time_ns_id = 0;
__u64 child_time_ns_id = 0;
@@ -652,6 +656,9 @@ TEST(nsid_timens_separate)
}
}
+ /* Track child for cleanup */
+ self->child_pid = pid;
+
/* Parent process */
close(pipefd[1]);
@@ -660,8 +667,6 @@ TEST(nsid_timens_separate)
if (buf == 'S') {
/* Child couldn't create namespace, skip test */
- kill(pid, SIGTERM);
- waitpid(pid, NULL, 0);
close(fd_parent_timens);
close(pipefd[0]);
SKIP(return, "Cannot create time namespace");
@@ -689,10 +694,6 @@ TEST(nsid_timens_separate)
close(fd_parent_timens);
close(fd_child_timens);
-
- /* Clean up child process */
- kill(pid, SIGTERM);
- waitpid(pid, NULL, 0);
}
TEST(nsid_pidns_basic)
@@ -719,7 +720,7 @@ TEST(nsid_pidns_basic)
close(fd_pidns);
}
-TEST(nsid_pidns_separate)
+TEST_F(nsid, pidns_separate)
{
__u64 parent_pid_ns_id = 0;
__u64 child_pid_ns_id = 0;
@@ -776,6 +777,9 @@ TEST(nsid_pidns_separate)
}
}
+ /* Track child for cleanup */
+ self->child_pid = pid;
+
/* Parent process */
close(pipefd[1]);
@@ -784,8 +788,6 @@ TEST(nsid_pidns_separate)
if (buf == 'S') {
/* Child couldn't create namespace, skip test */
- kill(pid, SIGTERM);
- waitpid(pid, NULL, 0);
close(fd_parent_pidns);
close(pipefd[0]);
SKIP(return, "No permission to create PID namespace");
@@ -813,10 +815,6 @@ TEST(nsid_pidns_separate)
close(fd_parent_pidns);
close(fd_child_pidns);
-
- /* Clean up child process */
- kill(pid, SIGTERM);
- waitpid(pid, NULL, 0);
}
TEST(nsid_netns_basic)
@@ -860,7 +858,7 @@ TEST(nsid_netns_basic)
close(fd_netns);
}
-TEST(nsid_netns_separate)
+TEST_F(nsid, netns_separate)
{
__u64 parent_net_ns_id = 0;
__u64 parent_netns_cookie = 0;
@@ -920,6 +918,9 @@ TEST(nsid_netns_separate)
_exit(0);
}
+ /* Track child for cleanup */
+ self->child_pid = pid;
+
/* Parent process */
close(pipefd[1]);
@@ -929,8 +930,6 @@ TEST(nsid_netns_separate)
if (buf == 'S') {
/* Child couldn't create namespace, skip test */
- kill(pid, SIGTERM);
- waitpid(pid, NULL, 0);
close(fd_parent_netns);
close(parent_sock);
SKIP(return, "No permission to create network namespace");
@@ -977,10 +976,6 @@ TEST(nsid_netns_separate)
close(fd_parent_netns);
close(fd_child_netns);
close(parent_sock);
-
- /* Clean up child process */
- kill(pid, SIGTERM);
- waitpid(pid, NULL, 0);
}
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/namespaces/regression_pidfd_setns_test.c b/tools/testing/selftests/namespaces/regression_pidfd_setns_test.c
new file mode 100644
index 000000000000..753fd29dffd8
--- /dev/null
+++ b/tools/testing/selftests/namespaces/regression_pidfd_setns_test.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <unistd.h>
+#include "../pidfd/pidfd.h"
+#include "../kselftest_harness.h"
+
+/*
+ * Regression tests for the setns(pidfd) active reference counting bug.
+ *
+ * These tests are based on the reproducers that triggered the race condition
+ * fixed by commit 1c465d0518dc ("ns: handle setns(pidfd, ...) cleanly").
+ *
+ * The bug: When using setns() with a pidfd, if the target task exits between
+ * prepare_nsset() and commit_nsset(), the namespaces would become inactive.
+ * Then ns_ref_active_get() would increment from 0 without properly resurrecting
+ * the owner chain, causing active reference count underflows.
+ */
+
+/*
+ * Simple pidfd setns test using create_child()+unshare().
+ *
+ * Without the fix, this would trigger active refcount warnings when the
+ * parent exits after doing setns(pidfd) on a child that has already exited.
+ */
+TEST(simple_pidfd_setns)
+{
+ pid_t child_pid;
+ int pidfd = -1;
+ int ret;
+ int sv[2];
+ char c;
+
+ /* Ignore SIGCHLD for autoreap */
+ ASSERT_NE(signal(SIGCHLD, SIG_IGN), SIG_ERR);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv), 0);
+
+ /* Create a child process without namespaces initially */
+ child_pid = create_child(&pidfd, 0);
+ ASSERT_GE(child_pid, 0);
+
+ if (child_pid == 0) {
+ close(sv[0]);
+
+ if (unshare(CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWNET | CLONE_NEWUSER) < 0) {
+ close(sv[1]);
+ _exit(1);
+ }
+
+ /* Signal parent that namespaces are ready */
+ if (write_nointr(sv[1], "1", 1) < 0) {
+ close(sv[1]);
+ _exit(1);
+ }
+
+ close(sv[1]);
+ _exit(0);
+ }
+ ASSERT_GE(pidfd, 0);
+ EXPECT_EQ(close(sv[1]), 0);
+
+ ret = read_nointr(sv[0], &c, 1);
+ ASSERT_EQ(ret, 1);
+ EXPECT_EQ(close(sv[0]), 0);
+
+ /* Set to child's namespaces via pidfd */
+ ret = setns(pidfd, CLONE_NEWUTS | CLONE_NEWIPC);
+ TH_LOG("setns() returned %d", ret);
+ close(pidfd);
+}
+
+/*
+ * Simple pidfd setns test using create_child().
+ *
+ * This variation uses create_child() with namespace flags directly.
+ * Namespaces are created immediately at clone time.
+ */
+TEST(simple_pidfd_setns_clone)
+{
+ pid_t child_pid;
+ int pidfd = -1;
+ int ret;
+
+ /* Ignore SIGCHLD for autoreap */
+ ASSERT_NE(signal(SIGCHLD, SIG_IGN), SIG_ERR);
+
+ /* Create a child process with new namespaces using create_child() */
+ child_pid = create_child(&pidfd, CLONE_NEWUSER | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWNET);
+ ASSERT_GE(child_pid, 0);
+
+ if (child_pid == 0) {
+ /* Child: sleep for a while so parent can setns to us */
+ sleep(2);
+ _exit(0);
+ }
+
+ /* Parent: pidfd was already created by create_child() */
+ ASSERT_GE(pidfd, 0);
+
+ /* Set to child's namespaces via pidfd */
+ ret = setns(pidfd, CLONE_NEWUTS | CLONE_NEWIPC);
+ close(pidfd);
+ TH_LOG("setns() returned %d", ret);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/namespaces/siocgskns_test.c b/tools/testing/selftests/namespaces/siocgskns_test.c
new file mode 100644
index 000000000000..ba689a22d82f
--- /dev/null
+++ b/tools/testing/selftests/namespaces/siocgskns_test.c
@@ -0,0 +1,1824 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <linux/if.h>
+#include <linux/sockios.h>
+#include <linux/nsfs.h>
+#include <arpa/inet.h>
+#include "../kselftest_harness.h"
+#include "../filesystems/utils.h"
+#include "wrappers.h"
+
+#ifndef SIOCGSKNS
+#define SIOCGSKNS 0x894C
+#endif
+
+#ifndef FD_NSFS_ROOT
+#define FD_NSFS_ROOT -10003
+#endif
+
+#ifndef FILEID_NSFS
+#define FILEID_NSFS 0xf1
+#endif
+
+/*
+ * Test basic SIOCGSKNS functionality.
+ * Create a socket and verify SIOCGSKNS returns the correct network namespace.
+ */
+TEST(siocgskns_basic)
+{
+ int sock_fd, netns_fd, current_netns_fd;
+ struct stat st1, st2;
+
+ /* Create a TCP socket */
+ sock_fd = socket(AF_INET, SOCK_STREAM, 0);
+ ASSERT_GE(sock_fd, 0);
+
+ /* Use SIOCGSKNS to get network namespace */
+ netns_fd = ioctl(sock_fd, SIOCGSKNS);
+ if (netns_fd < 0) {
+ close(sock_fd);
+ if (errno == ENOTTY || errno == EINVAL)
+ SKIP(return, "SIOCGSKNS not supported");
+ ASSERT_GE(netns_fd, 0);
+ }
+
+ /* Get current network namespace */
+ current_netns_fd = open("/proc/self/ns/net", O_RDONLY);
+ ASSERT_GE(current_netns_fd, 0);
+
+ /* Verify they match */
+ ASSERT_EQ(fstat(netns_fd, &st1), 0);
+ ASSERT_EQ(fstat(current_netns_fd, &st2), 0);
+ ASSERT_EQ(st1.st_ino, st2.st_ino);
+
+ close(sock_fd);
+ close(netns_fd);
+ close(current_netns_fd);
+}
+
+/*
+ * Test that socket file descriptors keep network namespaces active.
+ * Create a network namespace, create a socket in it, then exit the namespace.
+ * The namespace should remain active while the socket FD is held.
+ */
+TEST(siocgskns_keeps_netns_active)
+{
+ int sock_fd, netns_fd, test_fd;
+ int ipc_sockets[2];
+ pid_t pid;
+ int status;
+ struct stat st;
+
+ EXPECT_EQ(socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child: create new netns and socket */
+ close(ipc_sockets[0]);
+
+ if (unshare(CLONE_NEWNET) < 0) {
+ TH_LOG("unshare(CLONE_NEWNET) failed: %s", strerror(errno));
+ close(ipc_sockets[1]);
+ exit(1);
+ }
+
+ /* Create a socket in the new network namespace */
+ sock_fd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (sock_fd < 0) {
+ TH_LOG("socket() failed: %s", strerror(errno));
+ close(ipc_sockets[1]);
+ exit(1);
+ }
+
+ /* Send socket FD to parent via SCM_RIGHTS */
+ struct msghdr msg = {0};
+ struct iovec iov = {0};
+ char buf[1] = {'X'};
+ char cmsg_buf[CMSG_SPACE(sizeof(int))];
+
+ iov.iov_base = buf;
+ iov.iov_len = 1;
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cmsg_buf;
+ msg.msg_controllen = sizeof(cmsg_buf);
+
+ struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+ memcpy(CMSG_DATA(cmsg), &sock_fd, sizeof(int));
+
+ if (sendmsg(ipc_sockets[1], &msg, 0) < 0) {
+ close(sock_fd);
+ close(ipc_sockets[1]);
+ exit(1);
+ }
+
+ close(sock_fd);
+ close(ipc_sockets[1]);
+ exit(0);
+ }
+
+ /* Parent: receive socket FD */
+ close(ipc_sockets[1]);
+
+ struct msghdr msg = {0};
+ struct iovec iov = {0};
+ char buf[1];
+ char cmsg_buf[CMSG_SPACE(sizeof(int))];
+
+ iov.iov_base = buf;
+ iov.iov_len = 1;
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cmsg_buf;
+ msg.msg_controllen = sizeof(cmsg_buf);
+
+ ssize_t n = recvmsg(ipc_sockets[0], &msg, 0);
+ close(ipc_sockets[0]);
+ ASSERT_EQ(n, 1);
+
+ struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+ ASSERT_NE(cmsg, NULL);
+ ASSERT_EQ(cmsg->cmsg_type, SCM_RIGHTS);
+
+ memcpy(&sock_fd, CMSG_DATA(cmsg), sizeof(int));
+
+ /* Wait for child to exit */
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ /* Get network namespace from socket */
+ netns_fd = ioctl(sock_fd, SIOCGSKNS);
+ if (netns_fd < 0) {
+ close(sock_fd);
+ if (errno == ENOTTY || errno == EINVAL)
+ SKIP(return, "SIOCGSKNS not supported");
+ ASSERT_GE(netns_fd, 0);
+ }
+
+ ASSERT_EQ(fstat(netns_fd, &st), 0);
+
+ /*
+ * Namespace should still be active because socket FD keeps it alive.
+ * Try to access it via /proc/self/fd/<fd>.
+ */
+ char path[64];
+ snprintf(path, sizeof(path), "/proc/self/fd/%d", netns_fd);
+ test_fd = open(path, O_RDONLY);
+ ASSERT_GE(test_fd, 0);
+ close(test_fd);
+ close(netns_fd);
+
+ /* Close socket - namespace should become inactive */
+ close(sock_fd);
+
+ /* Try SIOCGSKNS again - should fail since socket is closed */
+ ASSERT_LT(ioctl(sock_fd, SIOCGSKNS), 0);
+}
+
+/*
+ * Test SIOCGSKNS with different socket types (TCP, UDP, RAW).
+ */
+TEST(siocgskns_socket_types)
+{
+ int sock_tcp, sock_udp, sock_raw;
+ int netns_tcp, netns_udp, netns_raw;
+ struct stat st_tcp, st_udp, st_raw;
+
+ /* TCP socket */
+ sock_tcp = socket(AF_INET, SOCK_STREAM, 0);
+ ASSERT_GE(sock_tcp, 0);
+
+ /* UDP socket */
+ sock_udp = socket(AF_INET, SOCK_DGRAM, 0);
+ ASSERT_GE(sock_udp, 0);
+
+ /* RAW socket (may require privileges) */
+ sock_raw = socket(AF_INET, SOCK_RAW, IPPROTO_ICMP);
+ if (sock_raw < 0 && (errno == EPERM || errno == EACCES)) {
+ sock_raw = -1; /* Skip raw socket test */
+ }
+
+ /* Test SIOCGSKNS on TCP */
+ netns_tcp = ioctl(sock_tcp, SIOCGSKNS);
+ if (netns_tcp < 0) {
+ close(sock_tcp);
+ close(sock_udp);
+ if (sock_raw >= 0) close(sock_raw);
+ if (errno == ENOTTY || errno == EINVAL)
+ SKIP(return, "SIOCGSKNS not supported");
+ ASSERT_GE(netns_tcp, 0);
+ }
+
+ /* Test SIOCGSKNS on UDP */
+ netns_udp = ioctl(sock_udp, SIOCGSKNS);
+ ASSERT_GE(netns_udp, 0);
+
+ /* Test SIOCGSKNS on RAW (if available) */
+ if (sock_raw >= 0) {
+ netns_raw = ioctl(sock_raw, SIOCGSKNS);
+ ASSERT_GE(netns_raw, 0);
+ }
+
+ /* Verify all return the same network namespace */
+ ASSERT_EQ(fstat(netns_tcp, &st_tcp), 0);
+ ASSERT_EQ(fstat(netns_udp, &st_udp), 0);
+ ASSERT_EQ(st_tcp.st_ino, st_udp.st_ino);
+
+ if (sock_raw >= 0) {
+ ASSERT_EQ(fstat(netns_raw, &st_raw), 0);
+ ASSERT_EQ(st_tcp.st_ino, st_raw.st_ino);
+ close(netns_raw);
+ close(sock_raw);
+ }
+
+ close(netns_tcp);
+ close(netns_udp);
+ close(sock_tcp);
+ close(sock_udp);
+}
+
+/*
+ * Test SIOCGSKNS across setns.
+ * Create a socket in netns A, switch to netns B, verify SIOCGSKNS still
+ * returns netns A.
+ */
+TEST(siocgskns_across_setns)
+{
+ int sock_fd, netns_a_fd, netns_b_fd, result_fd;
+ struct stat st_a;
+
+ /* Get current netns (A) */
+ netns_a_fd = open("/proc/self/ns/net", O_RDONLY);
+ ASSERT_GE(netns_a_fd, 0);
+ ASSERT_EQ(fstat(netns_a_fd, &st_a), 0);
+
+ /* Create socket in netns A */
+ sock_fd = socket(AF_INET, SOCK_STREAM, 0);
+ ASSERT_GE(sock_fd, 0);
+
+ /* Create new netns (B) */
+ ASSERT_EQ(unshare(CLONE_NEWNET), 0);
+
+ netns_b_fd = open("/proc/self/ns/net", O_RDONLY);
+ ASSERT_GE(netns_b_fd, 0);
+
+ /* Get netns from socket created in A */
+ result_fd = ioctl(sock_fd, SIOCGSKNS);
+ if (result_fd < 0) {
+ close(sock_fd);
+ setns(netns_a_fd, CLONE_NEWNET);
+ close(netns_a_fd);
+ close(netns_b_fd);
+ if (errno == ENOTTY || errno == EINVAL)
+ SKIP(return, "SIOCGSKNS not supported");
+ ASSERT_GE(result_fd, 0);
+ }
+
+ /* Verify it still points to netns A */
+ struct stat st_result_stat;
+ ASSERT_EQ(fstat(result_fd, &st_result_stat), 0);
+ ASSERT_EQ(st_a.st_ino, st_result_stat.st_ino);
+
+ close(result_fd);
+ close(sock_fd);
+ close(netns_b_fd);
+
+ /* Restore original netns */
+ ASSERT_EQ(setns(netns_a_fd, CLONE_NEWNET), 0);
+ close(netns_a_fd);
+}
+
+/*
+ * Test SIOCGSKNS fails on non-socket file descriptors.
+ */
+TEST(siocgskns_non_socket)
+{
+ int fd;
+ int pipefd[2];
+
+ /* Test on regular file */
+ fd = open("/dev/null", O_RDONLY);
+ ASSERT_GE(fd, 0);
+
+ ASSERT_LT(ioctl(fd, SIOCGSKNS), 0);
+ ASSERT_TRUE(errno == ENOTTY || errno == EINVAL);
+ close(fd);
+
+ /* Test on pipe */
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ ASSERT_LT(ioctl(pipefd[0], SIOCGSKNS), 0);
+ ASSERT_TRUE(errno == ENOTTY || errno == EINVAL);
+
+ close(pipefd[0]);
+ close(pipefd[1]);
+}
+
+/*
+ * Test multiple sockets keep the same network namespace active.
+ * Create multiple sockets, verify closing some doesn't affect others.
+ */
+TEST(siocgskns_multiple_sockets)
+{
+ int socks[5];
+ int netns_fds[5];
+ int i;
+ struct stat st;
+ ino_t netns_ino;
+
+ /* Create new network namespace */
+ ASSERT_EQ(unshare(CLONE_NEWNET), 0);
+
+ /* Create multiple sockets */
+ for (i = 0; i < 5; i++) {
+ socks[i] = socket(AF_INET, SOCK_STREAM, 0);
+ ASSERT_GE(socks[i], 0);
+ }
+
+ /* Get netns from all sockets */
+ for (i = 0; i < 5; i++) {
+ netns_fds[i] = ioctl(socks[i], SIOCGSKNS);
+ if (netns_fds[i] < 0) {
+ int j;
+ for (j = 0; j <= i; j++) {
+ close(socks[j]);
+ if (j < i && netns_fds[j] >= 0)
+ close(netns_fds[j]);
+ }
+ if (errno == ENOTTY || errno == EINVAL)
+ SKIP(return, "SIOCGSKNS not supported");
+ ASSERT_GE(netns_fds[i], 0);
+ }
+ }
+
+ /* Verify all point to same netns */
+ ASSERT_EQ(fstat(netns_fds[0], &st), 0);
+ netns_ino = st.st_ino;
+
+ for (i = 1; i < 5; i++) {
+ ASSERT_EQ(fstat(netns_fds[i], &st), 0);
+ ASSERT_EQ(st.st_ino, netns_ino);
+ }
+
+ /* Close some sockets */
+ for (i = 0; i < 3; i++) {
+ close(socks[i]);
+ }
+
+ /* Remaining netns FDs should still be valid */
+ for (i = 3; i < 5; i++) {
+ char path[64];
+ snprintf(path, sizeof(path), "/proc/self/fd/%d", netns_fds[i]);
+ int test_fd = open(path, O_RDONLY);
+ ASSERT_GE(test_fd, 0);
+ close(test_fd);
+ }
+
+ /* Cleanup */
+ for (i = 0; i < 5; i++) {
+ if (i >= 3)
+ close(socks[i]);
+ close(netns_fds[i]);
+ }
+}
+
+/*
+ * Test socket keeps netns active after creating process exits.
+ * Verify that as long as the socket FD exists, the namespace remains active.
+ */
+TEST(siocgskns_netns_lifecycle)
+{
+ int sock_fd, netns_fd;
+ int ipc_sockets[2];
+ int syncpipe[2];
+ pid_t pid;
+ int status;
+ char sync_byte;
+ struct stat st;
+ ino_t netns_ino;
+
+ EXPECT_EQ(socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets), 0);
+
+ ASSERT_EQ(pipe(syncpipe), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child */
+ close(ipc_sockets[0]);
+ close(syncpipe[1]);
+
+ if (unshare(CLONE_NEWNET) < 0) {
+ close(ipc_sockets[1]);
+ close(syncpipe[0]);
+ exit(1);
+ }
+
+ sock_fd = socket(AF_INET, SOCK_STREAM, 0);
+ if (sock_fd < 0) {
+ close(ipc_sockets[1]);
+ close(syncpipe[0]);
+ exit(1);
+ }
+
+ /* Send socket to parent */
+ struct msghdr msg = {0};
+ struct iovec iov = {0};
+ char buf[1] = {'X'};
+ char cmsg_buf[CMSG_SPACE(sizeof(int))];
+
+ iov.iov_base = buf;
+ iov.iov_len = 1;
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cmsg_buf;
+ msg.msg_controllen = sizeof(cmsg_buf);
+
+ struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+ memcpy(CMSG_DATA(cmsg), &sock_fd, sizeof(int));
+
+ if (sendmsg(ipc_sockets[1], &msg, 0) < 0) {
+ close(sock_fd);
+ close(ipc_sockets[1]);
+ close(syncpipe[0]);
+ exit(1);
+ }
+
+ close(sock_fd);
+ close(ipc_sockets[1]);
+
+ /* Wait for parent signal */
+ read(syncpipe[0], &sync_byte, 1);
+ close(syncpipe[0]);
+ exit(0);
+ }
+
+ /* Parent */
+ close(ipc_sockets[1]);
+ close(syncpipe[0]);
+
+ /* Receive socket FD */
+ struct msghdr msg = {0};
+ struct iovec iov = {0};
+ char buf[1];
+ char cmsg_buf[CMSG_SPACE(sizeof(int))];
+
+ iov.iov_base = buf;
+ iov.iov_len = 1;
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cmsg_buf;
+ msg.msg_controllen = sizeof(cmsg_buf);
+
+ ssize_t n = recvmsg(ipc_sockets[0], &msg, 0);
+ close(ipc_sockets[0]);
+ ASSERT_EQ(n, 1);
+
+ struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+ ASSERT_NE(cmsg, NULL);
+ memcpy(&sock_fd, CMSG_DATA(cmsg), sizeof(int));
+
+ /* Get netns from socket while child is alive */
+ netns_fd = ioctl(sock_fd, SIOCGSKNS);
+ if (netns_fd < 0) {
+ sync_byte = 'G';
+ write(syncpipe[1], &sync_byte, 1);
+ close(syncpipe[1]);
+ close(sock_fd);
+ waitpid(pid, NULL, 0);
+ if (errno == ENOTTY || errno == EINVAL)
+ SKIP(return, "SIOCGSKNS not supported");
+ ASSERT_GE(netns_fd, 0);
+ }
+ ASSERT_EQ(fstat(netns_fd, &st), 0);
+ netns_ino = st.st_ino;
+
+ /* Signal child to exit */
+ sync_byte = 'G';
+ write(syncpipe[1], &sync_byte, 1);
+ close(syncpipe[1]);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+
+ /*
+ * Socket FD should still keep namespace active even after
+ * the creating process exited.
+ */
+ int test_fd = ioctl(sock_fd, SIOCGSKNS);
+ ASSERT_GE(test_fd, 0);
+
+ struct stat st_test;
+ ASSERT_EQ(fstat(test_fd, &st_test), 0);
+ ASSERT_EQ(st_test.st_ino, netns_ino);
+
+ close(test_fd);
+ close(netns_fd);
+
+ /* Close socket - namespace should become inactive */
+ close(sock_fd);
+}
+
+/*
+ * Test IPv6 sockets also work with SIOCGSKNS.
+ */
+TEST(siocgskns_ipv6)
+{
+ int sock_fd, netns_fd, current_netns_fd;
+ struct stat st1, st2;
+
+ /* Create an IPv6 TCP socket */
+ sock_fd = socket(AF_INET6, SOCK_STREAM, 0);
+ ASSERT_GE(sock_fd, 0);
+
+ /* Use SIOCGSKNS */
+ netns_fd = ioctl(sock_fd, SIOCGSKNS);
+ if (netns_fd < 0) {
+ close(sock_fd);
+ if (errno == ENOTTY || errno == EINVAL)
+ SKIP(return, "SIOCGSKNS not supported");
+ ASSERT_GE(netns_fd, 0);
+ }
+
+ /* Verify it matches current namespace */
+ current_netns_fd = open("/proc/self/ns/net", O_RDONLY);
+ ASSERT_GE(current_netns_fd, 0);
+
+ ASSERT_EQ(fstat(netns_fd, &st1), 0);
+ ASSERT_EQ(fstat(current_netns_fd, &st2), 0);
+ ASSERT_EQ(st1.st_ino, st2.st_ino);
+
+ close(sock_fd);
+ close(netns_fd);
+ close(current_netns_fd);
+}
+
+/*
+ * Test that socket-kept netns appears in listns() output.
+ * Verify that a network namespace kept alive by a socket FD appears in
+ * listns() output even after the creating process exits, and that it
+ * disappears when the socket is closed.
+ */
+TEST(siocgskns_listns_visibility)
+{
+ int sock_fd, netns_fd, owner_fd;
+ int ipc_sockets[2];
+ pid_t pid;
+ int status;
+ __u64 netns_id, owner_id;
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = CLONE_NEWNET,
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ __u64 ns_ids[256];
+ int ret, i;
+ bool found_netns = false;
+
+ EXPECT_EQ(socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child: create new netns and socket */
+ close(ipc_sockets[0]);
+
+ if (unshare(CLONE_NEWNET) < 0) {
+ close(ipc_sockets[1]);
+ exit(1);
+ }
+
+ sock_fd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (sock_fd < 0) {
+ close(ipc_sockets[1]);
+ exit(1);
+ }
+
+ /* Send socket FD to parent via SCM_RIGHTS */
+ struct msghdr msg = {0};
+ struct iovec iov = {0};
+ char buf[1] = {'X'};
+ char cmsg_buf[CMSG_SPACE(sizeof(int))];
+
+ iov.iov_base = buf;
+ iov.iov_len = 1;
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cmsg_buf;
+ msg.msg_controllen = sizeof(cmsg_buf);
+
+ struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+ memcpy(CMSG_DATA(cmsg), &sock_fd, sizeof(int));
+
+ if (sendmsg(ipc_sockets[1], &msg, 0) < 0) {
+ close(sock_fd);
+ close(ipc_sockets[1]);
+ exit(1);
+ }
+
+ close(sock_fd);
+ close(ipc_sockets[1]);
+ exit(0);
+ }
+
+ /* Parent: receive socket FD */
+ close(ipc_sockets[1]);
+
+ struct msghdr msg = {0};
+ struct iovec iov = {0};
+ char buf[1];
+ char cmsg_buf[CMSG_SPACE(sizeof(int))];
+
+ iov.iov_base = buf;
+ iov.iov_len = 1;
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cmsg_buf;
+ msg.msg_controllen = sizeof(cmsg_buf);
+
+ ssize_t n = recvmsg(ipc_sockets[0], &msg, 0);
+ close(ipc_sockets[0]);
+ ASSERT_EQ(n, 1);
+
+ struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+ ASSERT_NE(cmsg, NULL);
+ memcpy(&sock_fd, CMSG_DATA(cmsg), sizeof(int));
+
+ /* Wait for child to exit */
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ /* Get network namespace from socket */
+ netns_fd = ioctl(sock_fd, SIOCGSKNS);
+ if (netns_fd < 0) {
+ close(sock_fd);
+ if (errno == ENOTTY || errno == EINVAL)
+ SKIP(return, "SIOCGSKNS not supported");
+ ASSERT_GE(netns_fd, 0);
+ }
+
+ /* Get namespace ID */
+ ret = ioctl(netns_fd, NS_GET_ID, &netns_id);
+ if (ret < 0) {
+ close(sock_fd);
+ close(netns_fd);
+ if (errno == ENOTTY || errno == EINVAL)
+ SKIP(return, "NS_GET_ID not supported");
+ ASSERT_EQ(ret, 0);
+ }
+
+ /* Get owner user namespace */
+ owner_fd = ioctl(netns_fd, NS_GET_USERNS);
+ if (owner_fd < 0) {
+ close(sock_fd);
+ close(netns_fd);
+ if (errno == ENOTTY || errno == EINVAL)
+ SKIP(return, "NS_GET_USERNS not supported");
+ ASSERT_GE(owner_fd, 0);
+ }
+
+ /* Get owner namespace ID */
+ ret = ioctl(owner_fd, NS_GET_ID, &owner_id);
+ if (ret < 0) {
+ close(owner_fd);
+ close(sock_fd);
+ close(netns_fd);
+ ASSERT_EQ(ret, 0);
+ }
+ close(owner_fd);
+
+ /* Namespace should appear in listns() output */
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+ if (ret < 0) {
+ close(sock_fd);
+ close(netns_fd);
+ if (errno == ENOSYS)
+ SKIP(return, "listns() not supported");
+ TH_LOG("listns failed: %s", strerror(errno));
+ ASSERT_GE(ret, 0);
+ }
+
+ /* Search for our network namespace in the list */
+ for (i = 0; i < ret; i++) {
+ if (ns_ids[i] == netns_id) {
+ found_netns = true;
+ break;
+ }
+ }
+
+ ASSERT_TRUE(found_netns);
+ TH_LOG("Found netns %llu in listns() output (kept alive by socket)", netns_id);
+
+ /* Now verify with owner filtering */
+ req.user_ns_id = owner_id;
+ found_netns = false;
+
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+ ASSERT_GE(ret, 0);
+
+ for (i = 0; i < ret; i++) {
+ if (ns_ids[i] == netns_id) {
+ found_netns = true;
+ break;
+ }
+ }
+
+ ASSERT_TRUE(found_netns);
+ TH_LOG("Found netns %llu owned by userns %llu", netns_id, owner_id);
+
+ /* Close socket - namespace should become inactive and disappear from listns() */
+ close(sock_fd);
+ close(netns_fd);
+
+ /* Verify it's no longer in listns() output */
+ req.user_ns_id = 0;
+ found_netns = false;
+
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+ ASSERT_GE(ret, 0);
+
+ for (i = 0; i < ret; i++) {
+ if (ns_ids[i] == netns_id) {
+ found_netns = true;
+ break;
+ }
+ }
+
+ ASSERT_FALSE(found_netns);
+ TH_LOG("Netns %llu correctly disappeared from listns() after socket closed", netns_id);
+}
+
+/*
+ * Test that socket-kept netns can be reopened via file handle.
+ * Verify that a network namespace kept alive by a socket FD can be
+ * reopened using file handles even after the creating process exits.
+ */
+TEST(siocgskns_file_handle)
+{
+ int sock_fd, netns_fd, reopened_fd;
+ int ipc_sockets[2];
+ pid_t pid;
+ int status;
+ struct stat st1, st2;
+ ino_t netns_ino;
+ __u64 netns_id;
+ struct file_handle *handle;
+ struct nsfs_file_handle *nsfs_fh;
+ int ret;
+
+ /* Allocate file_handle structure for nsfs */
+ handle = malloc(sizeof(struct file_handle) + sizeof(struct nsfs_file_handle));
+ ASSERT_NE(handle, NULL);
+ handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ handle->handle_type = FILEID_NSFS;
+
+ EXPECT_EQ(socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child: create new netns and socket */
+ close(ipc_sockets[0]);
+
+ if (unshare(CLONE_NEWNET) < 0) {
+ close(ipc_sockets[1]);
+ exit(1);
+ }
+
+ sock_fd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (sock_fd < 0) {
+ close(ipc_sockets[1]);
+ exit(1);
+ }
+
+ /* Send socket FD to parent via SCM_RIGHTS */
+ struct msghdr msg = {0};
+ struct iovec iov = {0};
+ char buf[1] = {'X'};
+ char cmsg_buf[CMSG_SPACE(sizeof(int))];
+
+ iov.iov_base = buf;
+ iov.iov_len = 1;
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cmsg_buf;
+ msg.msg_controllen = sizeof(cmsg_buf);
+
+ struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+ memcpy(CMSG_DATA(cmsg), &sock_fd, sizeof(int));
+
+ if (sendmsg(ipc_sockets[1], &msg, 0) < 0) {
+ close(sock_fd);
+ close(ipc_sockets[1]);
+ exit(1);
+ }
+
+ close(sock_fd);
+ close(ipc_sockets[1]);
+ exit(0);
+ }
+
+ /* Parent: receive socket FD */
+ close(ipc_sockets[1]);
+
+ struct msghdr msg = {0};
+ struct iovec iov = {0};
+ char buf[1];
+ char cmsg_buf[CMSG_SPACE(sizeof(int))];
+
+ iov.iov_base = buf;
+ iov.iov_len = 1;
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cmsg_buf;
+ msg.msg_controllen = sizeof(cmsg_buf);
+
+ ssize_t n = recvmsg(ipc_sockets[0], &msg, 0);
+ close(ipc_sockets[0]);
+ ASSERT_EQ(n, 1);
+
+ struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+ ASSERT_NE(cmsg, NULL);
+ memcpy(&sock_fd, CMSG_DATA(cmsg), sizeof(int));
+
+ /* Wait for child to exit */
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ /* Get network namespace from socket */
+ netns_fd = ioctl(sock_fd, SIOCGSKNS);
+ if (netns_fd < 0) {
+ free(handle);
+ close(sock_fd);
+ if (errno == ENOTTY || errno == EINVAL)
+ SKIP(return, "SIOCGSKNS not supported");
+ ASSERT_GE(netns_fd, 0);
+ }
+
+ ASSERT_EQ(fstat(netns_fd, &st1), 0);
+ netns_ino = st1.st_ino;
+
+ /* Get namespace ID */
+ ret = ioctl(netns_fd, NS_GET_ID, &netns_id);
+ if (ret < 0) {
+ free(handle);
+ close(sock_fd);
+ close(netns_fd);
+ if (errno == ENOTTY || errno == EINVAL)
+ SKIP(return, "NS_GET_ID not supported");
+ ASSERT_EQ(ret, 0);
+ }
+
+ /* Construct file handle from namespace ID */
+ nsfs_fh = (struct nsfs_file_handle *)handle->f_handle;
+ nsfs_fh->ns_id = netns_id;
+ nsfs_fh->ns_type = 0; /* Type field not needed for reopening */
+ nsfs_fh->ns_inum = 0; /* Inum field not needed for reopening */
+
+ TH_LOG("Constructed file handle for netns %lu (id=%llu)", netns_ino, netns_id);
+
+ /* Reopen namespace using file handle (while socket still keeps it alive) */
+ reopened_fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ if (reopened_fd < 0) {
+ free(handle);
+ close(sock_fd);
+ if (errno == EOPNOTSUPP || errno == ENOSYS || errno == EBADF)
+ SKIP(return, "open_by_handle_at with FD_NSFS_ROOT not supported");
+ TH_LOG("open_by_handle_at failed: %s", strerror(errno));
+ ASSERT_GE(reopened_fd, 0);
+ }
+
+ /* Verify it's the same namespace */
+ ASSERT_EQ(fstat(reopened_fd, &st2), 0);
+ ASSERT_EQ(st1.st_ino, st2.st_ino);
+ ASSERT_EQ(st1.st_dev, st2.st_dev);
+
+ TH_LOG("Successfully reopened netns %lu via file handle", netns_ino);
+
+ close(reopened_fd);
+
+ /* Close the netns FD */
+ close(netns_fd);
+
+ /* Try to reopen via file handle - should fail since namespace is now inactive */
+ reopened_fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ ASSERT_LT(reopened_fd, 0);
+ TH_LOG("Correctly failed to reopen inactive netns: %s", strerror(errno));
+
+ /* Get network namespace from socket */
+ netns_fd = ioctl(sock_fd, SIOCGSKNS);
+ if (netns_fd < 0) {
+ free(handle);
+ close(sock_fd);
+ if (errno == ENOTTY || errno == EINVAL)
+ SKIP(return, "SIOCGSKNS not supported");
+ ASSERT_GE(netns_fd, 0);
+ }
+
+ /* Reopen namespace using file handle (while socket still keeps it alive) */
+ reopened_fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ if (reopened_fd < 0) {
+ free(handle);
+ close(sock_fd);
+ if (errno == EOPNOTSUPP || errno == ENOSYS || errno == EBADF)
+ SKIP(return, "open_by_handle_at with FD_NSFS_ROOT not supported");
+ TH_LOG("open_by_handle_at failed: %s", strerror(errno));
+ ASSERT_GE(reopened_fd, 0);
+ }
+
+ /* Verify it's the same namespace */
+ ASSERT_EQ(fstat(reopened_fd, &st2), 0);
+ ASSERT_EQ(st1.st_ino, st2.st_ino);
+ ASSERT_EQ(st1.st_dev, st2.st_dev);
+
+ TH_LOG("Successfully reopened netns %lu via file handle", netns_ino);
+
+ /* Close socket - namespace should become inactive */
+ close(sock_fd);
+ free(handle);
+}
+
+/*
+ * Test combined listns() and file handle operations with socket-kept netns.
+ * Create a netns, keep it alive with a socket, verify it appears in listns(),
+ * then reopen it via file handle obtained from listns() entry.
+ */
+TEST(siocgskns_listns_and_file_handle)
+{
+ int sock_fd, netns_fd, userns_fd, reopened_fd;
+ int ipc_sockets[2];
+ pid_t pid;
+ int status;
+ struct stat st;
+ ino_t netns_ino;
+ __u64 netns_id, userns_id;
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = CLONE_NEWNET | CLONE_NEWUSER,
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ __u64 ns_ids[256];
+ int ret, i;
+ bool found_netns = false, found_userns = false;
+ struct file_handle *handle;
+ struct nsfs_file_handle *nsfs_fh;
+
+ /* Allocate file_handle structure for nsfs */
+ handle = malloc(sizeof(struct file_handle) + sizeof(struct nsfs_file_handle));
+ ASSERT_NE(handle, NULL);
+ handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ handle->handle_type = FILEID_NSFS;
+
+ EXPECT_EQ(socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child: create new userns and netns with socket */
+ close(ipc_sockets[0]);
+
+ if (setup_userns() < 0) {
+ close(ipc_sockets[1]);
+ exit(1);
+ }
+
+ if (unshare(CLONE_NEWNET) < 0) {
+ close(ipc_sockets[1]);
+ exit(1);
+ }
+
+ sock_fd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (sock_fd < 0) {
+ close(ipc_sockets[1]);
+ exit(1);
+ }
+
+ /* Send socket FD to parent via SCM_RIGHTS */
+ struct msghdr msg = {0};
+ struct iovec iov = {0};
+ char buf[1] = {'X'};
+ char cmsg_buf[CMSG_SPACE(sizeof(int))];
+
+ iov.iov_base = buf;
+ iov.iov_len = 1;
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cmsg_buf;
+ msg.msg_controllen = sizeof(cmsg_buf);
+
+ struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+ memcpy(CMSG_DATA(cmsg), &sock_fd, sizeof(int));
+
+ if (sendmsg(ipc_sockets[1], &msg, 0) < 0) {
+ close(sock_fd);
+ close(ipc_sockets[1]);
+ exit(1);
+ }
+
+ close(sock_fd);
+ close(ipc_sockets[1]);
+ exit(0);
+ }
+
+ /* Parent: receive socket FD */
+ close(ipc_sockets[1]);
+
+ struct msghdr msg = {0};
+ struct iovec iov = {0};
+ char buf[1];
+ char cmsg_buf[CMSG_SPACE(sizeof(int))];
+
+ iov.iov_base = buf;
+ iov.iov_len = 1;
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cmsg_buf;
+ msg.msg_controllen = sizeof(cmsg_buf);
+
+ ssize_t n = recvmsg(ipc_sockets[0], &msg, 0);
+ close(ipc_sockets[0]);
+ ASSERT_EQ(n, 1);
+
+ struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+ ASSERT_NE(cmsg, NULL);
+ memcpy(&sock_fd, CMSG_DATA(cmsg), sizeof(int));
+
+ /* Wait for child to exit */
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ /* Get network namespace from socket */
+ netns_fd = ioctl(sock_fd, SIOCGSKNS);
+ if (netns_fd < 0) {
+ free(handle);
+ close(sock_fd);
+ if (errno == ENOTTY || errno == EINVAL)
+ SKIP(return, "SIOCGSKNS not supported");
+ ASSERT_GE(netns_fd, 0);
+ }
+
+ ASSERT_EQ(fstat(netns_fd, &st), 0);
+ netns_ino = st.st_ino;
+
+ /* Get namespace ID */
+ ret = ioctl(netns_fd, NS_GET_ID, &netns_id);
+ if (ret < 0) {
+ free(handle);
+ close(sock_fd);
+ close(netns_fd);
+ if (errno == ENOTTY || errno == EINVAL)
+ SKIP(return, "NS_GET_ID not supported");
+ ASSERT_EQ(ret, 0);
+ }
+
+ /* Get owner user namespace */
+ userns_fd = ioctl(netns_fd, NS_GET_USERNS);
+ if (userns_fd < 0) {
+ free(handle);
+ close(sock_fd);
+ close(netns_fd);
+ if (errno == ENOTTY || errno == EINVAL)
+ SKIP(return, "NS_GET_USERNS not supported");
+ ASSERT_GE(userns_fd, 0);
+ }
+
+ /* Get owner namespace ID */
+ ret = ioctl(userns_fd, NS_GET_ID, &userns_id);
+ if (ret < 0) {
+ close(userns_fd);
+ free(handle);
+ close(sock_fd);
+ close(netns_fd);
+ ASSERT_EQ(ret, 0);
+ }
+ close(userns_fd);
+
+ TH_LOG("Testing netns %lu (id=%llu) owned by userns id=%llu", netns_ino, netns_id, userns_id);
+
+ /* Verify namespace appears in listns() */
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+ if (ret < 0) {
+ free(handle);
+ close(sock_fd);
+ close(netns_fd);
+ if (errno == ENOSYS)
+ SKIP(return, "listns() not supported");
+ TH_LOG("listns failed: %s", strerror(errno));
+ ASSERT_GE(ret, 0);
+ }
+
+ found_netns = false;
+ found_userns = false;
+ for (i = 0; i < ret; i++) {
+ if (ns_ids[i] == netns_id)
+ found_netns = true;
+ if (ns_ids[i] == userns_id)
+ found_userns = true;
+ }
+ ASSERT_TRUE(found_netns);
+ ASSERT_TRUE(found_userns);
+ TH_LOG("Found netns %llu in listns() output", netns_id);
+
+ /* Construct file handle from namespace ID */
+ nsfs_fh = (struct nsfs_file_handle *)handle->f_handle;
+ nsfs_fh->ns_id = netns_id;
+ nsfs_fh->ns_type = 0;
+ nsfs_fh->ns_inum = 0;
+
+ reopened_fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ if (reopened_fd < 0) {
+ free(handle);
+ close(sock_fd);
+ if (errno == EOPNOTSUPP || errno == ENOSYS || errno == EBADF)
+ SKIP(return, "open_by_handle_at with FD_NSFS_ROOT not supported");
+ TH_LOG("open_by_handle_at failed: %s", strerror(errno));
+ ASSERT_GE(reopened_fd, 0);
+ }
+
+ struct stat reopened_st;
+ ASSERT_EQ(fstat(reopened_fd, &reopened_st), 0);
+ ASSERT_EQ(reopened_st.st_ino, netns_ino);
+
+ TH_LOG("Successfully reopened netns %lu via file handle (socket-kept)", netns_ino);
+
+ close(reopened_fd);
+ close(netns_fd);
+
+ /* Try to reopen via file handle - should fail since namespace is now inactive */
+ reopened_fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ ASSERT_LT(reopened_fd, 0);
+ TH_LOG("Correctly failed to reopen inactive netns: %s", strerror(errno));
+
+ /* Get network namespace from socket */
+ netns_fd = ioctl(sock_fd, SIOCGSKNS);
+ if (netns_fd < 0) {
+ free(handle);
+ close(sock_fd);
+ if (errno == ENOTTY || errno == EINVAL)
+ SKIP(return, "SIOCGSKNS not supported");
+ ASSERT_GE(netns_fd, 0);
+ }
+
+ /* Verify namespace appears in listns() */
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+ if (ret < 0) {
+ free(handle);
+ close(sock_fd);
+ close(netns_fd);
+ if (errno == ENOSYS)
+ SKIP(return, "listns() not supported");
+ TH_LOG("listns failed: %s", strerror(errno));
+ ASSERT_GE(ret, 0);
+ }
+
+ found_netns = false;
+ found_userns = false;
+ for (i = 0; i < ret; i++) {
+ if (ns_ids[i] == netns_id)
+ found_netns = true;
+ if (ns_ids[i] == userns_id)
+ found_userns = true;
+ }
+ ASSERT_TRUE(found_netns);
+ ASSERT_TRUE(found_userns);
+ TH_LOG("Found netns %llu in listns() output", netns_id);
+
+ close(netns_fd);
+
+ /* Verify namespace appears in listns() */
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+ if (ret < 0) {
+ free(handle);
+ close(sock_fd);
+ close(netns_fd);
+ if (errno == ENOSYS)
+ SKIP(return, "listns() not supported");
+ TH_LOG("listns failed: %s", strerror(errno));
+ ASSERT_GE(ret, 0);
+ }
+
+ found_netns = false;
+ found_userns = false;
+ for (i = 0; i < ret; i++) {
+ if (ns_ids[i] == netns_id)
+ found_netns = true;
+ if (ns_ids[i] == userns_id)
+ found_userns = true;
+ }
+ ASSERT_FALSE(found_netns);
+ ASSERT_FALSE(found_userns);
+ TH_LOG("Netns %llu correctly disappeared from listns() after socket closed", netns_id);
+
+ close(sock_fd);
+ free(handle);
+}
+
+/*
+ * Test multi-level namespace resurrection across three user namespace levels.
+ *
+ * This test creates a complex namespace hierarchy with three levels of user
+ * namespaces and a network namespace at the deepest level. It verifies that
+ * the resurrection semantics work correctly when SIOCGSKNS is called on a
+ * socket from an inactive namespace tree, and that listns() and
+ * open_by_handle_at() correctly respect visibility rules.
+ *
+ * Hierarchy after child processes exit (all with 0 active refcount):
+ *
+ * net_L3A (0) <- Level 3 network namespace
+ * |
+ * +
+ * userns_L3 (0) <- Level 3 user namespace
+ * |
+ * +
+ * userns_L2 (0) <- Level 2 user namespace
+ * |
+ * +
+ * userns_L1 (0) <- Level 1 user namespace
+ * |
+ * x
+ * init_user_ns
+ *
+ * The test verifies:
+ * 1. SIOCGSKNS on a socket from inactive net_L3A resurrects the entire chain
+ * 2. After resurrection, all namespaces are visible in listns()
+ * 3. Resurrected namespaces can be reopened via file handles
+ * 4. Closing the netns FD cascades down: the entire ownership chain
+ * (userns_L3 -> userns_L2 -> userns_L1) becomes inactive again
+ * 5. Inactive namespaces disappear from listns() and cannot be reopened
+ * 6. Calling SIOCGSKNS again on the same socket resurrects the tree again
+ * 7. After second resurrection, namespaces are visible and can be reopened
+ */
+TEST(siocgskns_multilevel_resurrection)
+{
+ int ipc_sockets[2];
+ pid_t pid_l1, pid_l2, pid_l3;
+ int status;
+
+ /* Namespace file descriptors to be received from child */
+ int sock_L3A_fd = -1;
+ int netns_L3A_fd = -1;
+ __u64 netns_L3A_id;
+ __u64 userns_L1_id, userns_L2_id, userns_L3_id;
+
+ /* For listns() and file handle testing */
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = CLONE_NEWNET | CLONE_NEWUSER,
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ __u64 ns_ids[256];
+ int ret, i;
+ struct file_handle *handle;
+ struct nsfs_file_handle *nsfs_fh;
+ int reopened_fd;
+
+ /* Allocate file handle for testing */
+ handle = malloc(sizeof(struct file_handle) + sizeof(struct nsfs_file_handle));
+ ASSERT_NE(handle, NULL);
+ handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ handle->handle_type = FILEID_NSFS;
+
+ EXPECT_EQ(socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets), 0);
+
+ /*
+ * Fork level 1 child that creates userns_L1
+ */
+ pid_l1 = fork();
+ ASSERT_GE(pid_l1, 0);
+
+ if (pid_l1 == 0) {
+ /* Level 1 child */
+ int ipc_L2[2];
+ close(ipc_sockets[0]);
+
+ /* Create userns_L1 */
+ if (setup_userns() < 0) {
+ close(ipc_sockets[1]);
+ exit(1);
+ }
+
+ /* Create socketpair for communicating with L2 child */
+ if (socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_L2) < 0) {
+ close(ipc_sockets[1]);
+ exit(1);
+ }
+
+ /*
+ * Fork level 2 child that creates userns_L2
+ */
+ pid_l2 = fork();
+ if (pid_l2 < 0) {
+ close(ipc_sockets[1]);
+ close(ipc_L2[0]);
+ close(ipc_L2[1]);
+ exit(1);
+ }
+
+ if (pid_l2 == 0) {
+ /* Level 2 child */
+ int ipc_L3[2];
+ close(ipc_L2[0]);
+
+ /* Create userns_L2 (nested inside userns_L1) */
+ if (setup_userns() < 0) {
+ close(ipc_L2[1]);
+ exit(1);
+ }
+
+ /* Create socketpair for communicating with L3 child */
+ if (socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_L3) < 0) {
+ close(ipc_L2[1]);
+ exit(1);
+ }
+
+ /*
+ * Fork level 3 child that creates userns_L3 and network namespaces
+ */
+ pid_l3 = fork();
+ if (pid_l3 < 0) {
+ close(ipc_L2[1]);
+ close(ipc_L3[0]);
+ close(ipc_L3[1]);
+ exit(1);
+ }
+
+ if (pid_l3 == 0) {
+ /* Level 3 child - the deepest level */
+ int sock_fd;
+ close(ipc_L3[0]);
+
+ /* Create userns_L3 (nested inside userns_L2) */
+ if (setup_userns() < 0) {
+ close(ipc_L3[1]);
+ exit(1);
+ }
+
+ /* Create network namespace at level 3 */
+ if (unshare(CLONE_NEWNET) < 0) {
+ close(ipc_L3[1]);
+ exit(1);
+ }
+
+ /* Create socket in net_L3A */
+ sock_fd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (sock_fd < 0) {
+ close(ipc_L3[1]);
+ exit(1);
+ }
+
+ /* Send socket FD to L2 parent */
+ struct msghdr msg = {0};
+ struct iovec iov = {0};
+ char buf[1] = {'X'};
+ char cmsg_buf[CMSG_SPACE(sizeof(int))];
+
+ iov.iov_base = buf;
+ iov.iov_len = 1;
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cmsg_buf;
+ msg.msg_controllen = sizeof(cmsg_buf);
+
+ struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+ memcpy(CMSG_DATA(cmsg), &sock_fd, sizeof(int));
+
+ if (sendmsg(ipc_L3[1], &msg, 0) < 0) {
+ close(sock_fd);
+ close(ipc_L3[1]);
+ exit(1);
+ }
+
+ close(sock_fd);
+ close(ipc_L3[1]);
+ exit(0);
+ }
+
+ /* Level 2 child - receive from L3 and forward to L1 */
+ close(ipc_L3[1]);
+
+ struct msghdr msg = {0};
+ struct iovec iov = {0};
+ char buf[1];
+ char cmsg_buf[CMSG_SPACE(sizeof(int))];
+ int received_fd;
+
+ iov.iov_base = buf;
+ iov.iov_len = 1;
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cmsg_buf;
+ msg.msg_controllen = sizeof(cmsg_buf);
+
+ ssize_t n = recvmsg(ipc_L3[0], &msg, 0);
+ close(ipc_L3[0]);
+
+ if (n != 1) {
+ close(ipc_L2[1]);
+ waitpid(pid_l3, NULL, 0);
+ exit(1);
+ }
+
+ struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+ if (!cmsg) {
+ close(ipc_L2[1]);
+ waitpid(pid_l3, NULL, 0);
+ exit(1);
+ }
+ memcpy(&received_fd, CMSG_DATA(cmsg), sizeof(int));
+
+ /* Wait for L3 child */
+ waitpid(pid_l3, NULL, 0);
+
+ /* Forward the socket FD to L1 parent */
+ memset(&msg, 0, sizeof(msg));
+ buf[0] = 'Y';
+ iov.iov_base = buf;
+ iov.iov_len = 1;
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cmsg_buf;
+ msg.msg_controllen = sizeof(cmsg_buf);
+
+ cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+ memcpy(CMSG_DATA(cmsg), &received_fd, sizeof(int));
+
+ if (sendmsg(ipc_L2[1], &msg, 0) < 0) {
+ close(received_fd);
+ close(ipc_L2[1]);
+ exit(1);
+ }
+
+ close(received_fd);
+ close(ipc_L2[1]);
+ exit(0);
+ }
+
+ /* Level 1 child - receive from L2 and forward to parent */
+ close(ipc_L2[1]);
+
+ struct msghdr msg = {0};
+ struct iovec iov = {0};
+ char buf[1];
+ char cmsg_buf[CMSG_SPACE(sizeof(int))];
+ int received_fd;
+
+ iov.iov_base = buf;
+ iov.iov_len = 1;
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cmsg_buf;
+ msg.msg_controllen = sizeof(cmsg_buf);
+
+ ssize_t n = recvmsg(ipc_L2[0], &msg, 0);
+ close(ipc_L2[0]);
+
+ if (n != 1) {
+ close(ipc_sockets[1]);
+ waitpid(pid_l2, NULL, 0);
+ exit(1);
+ }
+
+ struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+ if (!cmsg) {
+ close(ipc_sockets[1]);
+ waitpid(pid_l2, NULL, 0);
+ exit(1);
+ }
+ memcpy(&received_fd, CMSG_DATA(cmsg), sizeof(int));
+
+ /* Wait for L2 child */
+ waitpid(pid_l2, NULL, 0);
+
+ /* Forward the socket FD to parent */
+ memset(&msg, 0, sizeof(msg));
+ buf[0] = 'Z';
+ iov.iov_base = buf;
+ iov.iov_len = 1;
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cmsg_buf;
+ msg.msg_controllen = sizeof(cmsg_buf);
+
+ cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+ memcpy(CMSG_DATA(cmsg), &received_fd, sizeof(int));
+
+ if (sendmsg(ipc_sockets[1], &msg, 0) < 0) {
+ close(received_fd);
+ close(ipc_sockets[1]);
+ exit(1);
+ }
+
+ close(received_fd);
+ close(ipc_sockets[1]);
+ exit(0);
+ }
+
+ /* Parent - receive the socket from the deepest level */
+ close(ipc_sockets[1]);
+
+ struct msghdr msg = {0};
+ struct iovec iov = {0};
+ char buf[1];
+ char cmsg_buf[CMSG_SPACE(sizeof(int))];
+
+ iov.iov_base = buf;
+ iov.iov_len = 1;
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cmsg_buf;
+ msg.msg_controllen = sizeof(cmsg_buf);
+
+ ssize_t n = recvmsg(ipc_sockets[0], &msg, 0);
+ close(ipc_sockets[0]);
+
+ if (n != 1) {
+ free(handle);
+ waitpid(pid_l1, NULL, 0);
+ SKIP(return, "Failed to receive socket from child");
+ }
+
+ struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+ if (!cmsg) {
+ free(handle);
+ waitpid(pid_l1, NULL, 0);
+ SKIP(return, "Failed to receive socket from child");
+ }
+ memcpy(&sock_L3A_fd, CMSG_DATA(cmsg), sizeof(int));
+
+ /* Wait for L1 child */
+ waitpid(pid_l1, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ /*
+ * At this point, all child processes have exited. The socket itself
+ * doesn't keep the namespace active - we need to call SIOCGSKNS which
+ * will resurrect the entire namespace tree by taking active references.
+ */
+
+ /* Get network namespace from socket - this resurrects the tree */
+ netns_L3A_fd = ioctl(sock_L3A_fd, SIOCGSKNS);
+ if (netns_L3A_fd < 0) {
+ free(handle);
+ close(sock_L3A_fd);
+ if (errno == ENOTTY || errno == EINVAL)
+ SKIP(return, "SIOCGSKNS not supported");
+ ASSERT_GE(netns_L3A_fd, 0);
+ }
+
+ /* Get namespace ID for net_L3A */
+ ret = ioctl(netns_L3A_fd, NS_GET_ID, &netns_L3A_id);
+ if (ret < 0) {
+ free(handle);
+ close(sock_L3A_fd);
+ close(netns_L3A_fd);
+ if (errno == ENOTTY || errno == EINVAL)
+ SKIP(return, "NS_GET_ID not supported");
+ ASSERT_EQ(ret, 0);
+ }
+
+ /* Get owner user namespace chain: userns_L3 -> userns_L2 -> userns_L1 */
+ int userns_L3_fd = ioctl(netns_L3A_fd, NS_GET_USERNS);
+ if (userns_L3_fd < 0) {
+ free(handle);
+ close(sock_L3A_fd);
+ close(netns_L3A_fd);
+ if (errno == ENOTTY || errno == EINVAL)
+ SKIP(return, "NS_GET_USERNS not supported");
+ ASSERT_GE(userns_L3_fd, 0);
+ }
+
+ ret = ioctl(userns_L3_fd, NS_GET_ID, &userns_L3_id);
+ ASSERT_EQ(ret, 0);
+
+ int userns_L2_fd = ioctl(userns_L3_fd, NS_GET_USERNS);
+ ASSERT_GE(userns_L2_fd, 0);
+ ret = ioctl(userns_L2_fd, NS_GET_ID, &userns_L2_id);
+ ASSERT_EQ(ret, 0);
+
+ int userns_L1_fd = ioctl(userns_L2_fd, NS_GET_USERNS);
+ ASSERT_GE(userns_L1_fd, 0);
+ ret = ioctl(userns_L1_fd, NS_GET_ID, &userns_L1_id);
+ ASSERT_EQ(ret, 0);
+
+ close(userns_L1_fd);
+ close(userns_L2_fd);
+ close(userns_L3_fd);
+
+ TH_LOG("Multi-level hierarchy: net_L3A (id=%llu) -> userns_L3 (id=%llu) -> userns_L2 (id=%llu) -> userns_L1 (id=%llu)",
+ netns_L3A_id, userns_L3_id, userns_L2_id, userns_L1_id);
+
+ /*
+ * Test 1: Verify net_L3A is visible in listns() after resurrection.
+ * The entire ownership chain should be resurrected and visible.
+ */
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+ if (ret < 0) {
+ free(handle);
+ close(sock_L3A_fd);
+ close(netns_L3A_fd);
+ if (errno == ENOSYS)
+ SKIP(return, "listns() not supported");
+ ASSERT_GE(ret, 0);
+ }
+
+ bool found_netns_L3A = false;
+ bool found_userns_L1 = false;
+ bool found_userns_L2 = false;
+ bool found_userns_L3 = false;
+
+ for (i = 0; i < ret; i++) {
+ if (ns_ids[i] == netns_L3A_id)
+ found_netns_L3A = true;
+ if (ns_ids[i] == userns_L1_id)
+ found_userns_L1 = true;
+ if (ns_ids[i] == userns_L2_id)
+ found_userns_L2 = true;
+ if (ns_ids[i] == userns_L3_id)
+ found_userns_L3 = true;
+ }
+
+ ASSERT_TRUE(found_netns_L3A);
+ ASSERT_TRUE(found_userns_L1);
+ ASSERT_TRUE(found_userns_L2);
+ ASSERT_TRUE(found_userns_L3);
+ TH_LOG("Resurrection verified: all namespaces in hierarchy visible in listns()");
+
+ /*
+ * Test 2: Verify net_L3A can be reopened via file handle.
+ */
+ nsfs_fh = (struct nsfs_file_handle *)handle->f_handle;
+ nsfs_fh->ns_id = netns_L3A_id;
+ nsfs_fh->ns_type = 0;
+ nsfs_fh->ns_inum = 0;
+
+ reopened_fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ if (reopened_fd < 0) {
+ free(handle);
+ close(sock_L3A_fd);
+ close(netns_L3A_fd);
+ if (errno == EOPNOTSUPP || errno == ENOSYS || errno == EBADF)
+ SKIP(return, "open_by_handle_at with FD_NSFS_ROOT not supported");
+ TH_LOG("open_by_handle_at failed: %s", strerror(errno));
+ ASSERT_GE(reopened_fd, 0);
+ }
+
+ close(reopened_fd);
+ TH_LOG("File handle test passed: net_L3A can be reopened");
+
+ /*
+ * Test 3: Verify that when we close the netns FD (dropping the last
+ * active reference), the entire tree becomes inactive and disappears
+ * from listns(). The cascade goes: net_L3A drops -> userns_L3 drops ->
+ * userns_L2 drops -> userns_L1 drops.
+ */
+ close(netns_L3A_fd);
+
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+ ASSERT_GE(ret, 0);
+
+ found_netns_L3A = false;
+ found_userns_L1 = false;
+ found_userns_L2 = false;
+ found_userns_L3 = false;
+
+ for (i = 0; i < ret; i++) {
+ if (ns_ids[i] == netns_L3A_id)
+ found_netns_L3A = true;
+ if (ns_ids[i] == userns_L1_id)
+ found_userns_L1 = true;
+ if (ns_ids[i] == userns_L2_id)
+ found_userns_L2 = true;
+ if (ns_ids[i] == userns_L3_id)
+ found_userns_L3 = true;
+ }
+
+ ASSERT_FALSE(found_netns_L3A);
+ ASSERT_FALSE(found_userns_L1);
+ ASSERT_FALSE(found_userns_L2);
+ ASSERT_FALSE(found_userns_L3);
+ TH_LOG("Cascade test passed: all namespaces disappeared after netns FD closed");
+
+ /*
+ * Test 4: Verify file handle no longer works for inactive namespace.
+ */
+ reopened_fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ if (reopened_fd >= 0) {
+ close(reopened_fd);
+ free(handle);
+ ASSERT_TRUE(false); /* Should have failed */
+ }
+ TH_LOG("Inactive namespace correctly cannot be reopened via file handle");
+
+ /*
+ * Test 5: Verify that calling SIOCGSKNS again resurrects the tree again.
+ * The socket is still valid, so we can call SIOCGSKNS on it to resurrect
+ * the namespace tree once more.
+ */
+ netns_L3A_fd = ioctl(sock_L3A_fd, SIOCGSKNS);
+ ASSERT_GE(netns_L3A_fd, 0);
+
+ TH_LOG("Called SIOCGSKNS again to resurrect the namespace tree");
+
+ /* Verify the namespace tree is resurrected and visible in listns() */
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+ ASSERT_GE(ret, 0);
+
+ found_netns_L3A = false;
+ found_userns_L1 = false;
+ found_userns_L2 = false;
+ found_userns_L3 = false;
+
+ for (i = 0; i < ret; i++) {
+ if (ns_ids[i] == netns_L3A_id)
+ found_netns_L3A = true;
+ if (ns_ids[i] == userns_L1_id)
+ found_userns_L1 = true;
+ if (ns_ids[i] == userns_L2_id)
+ found_userns_L2 = true;
+ if (ns_ids[i] == userns_L3_id)
+ found_userns_L3 = true;
+ }
+
+ ASSERT_TRUE(found_netns_L3A);
+ ASSERT_TRUE(found_userns_L1);
+ ASSERT_TRUE(found_userns_L2);
+ ASSERT_TRUE(found_userns_L3);
+ TH_LOG("Second resurrection verified: all namespaces in hierarchy visible in listns() again");
+
+ /* Verify we can reopen via file handle again */
+ reopened_fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ if (reopened_fd < 0) {
+ free(handle);
+ close(sock_L3A_fd);
+ close(netns_L3A_fd);
+ TH_LOG("open_by_handle_at failed after second resurrection: %s", strerror(errno));
+ ASSERT_GE(reopened_fd, 0);
+ }
+
+ close(reopened_fd);
+ TH_LOG("File handle test passed: net_L3A can be reopened after second resurrection");
+
+ /* Final cleanup */
+ close(sock_L3A_fd);
+ close(netns_L3A_fd);
+ free(handle);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/namespaces/stress_test.c b/tools/testing/selftests/namespaces/stress_test.c
new file mode 100644
index 000000000000..dd7df7d6cb27
--- /dev/null
+++ b/tools/testing/selftests/namespaces/stress_test.c
@@ -0,0 +1,626 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <linux/nsfs.h>
+#include "../kselftest_harness.h"
+#include "../filesystems/utils.h"
+#include "wrappers.h"
+
+/*
+ * Stress tests for namespace active reference counting.
+ *
+ * These tests validate that the active reference counting system can handle
+ * high load scenarios including rapid namespace creation/destruction, large
+ * numbers of concurrent namespaces, and various edge cases under stress.
+ */
+
+/*
+ * Test rapid creation and destruction of user namespaces.
+ * Create and destroy namespaces in quick succession to stress the
+ * active reference tracking and ensure no leaks occur.
+ */
+TEST(rapid_namespace_creation_destruction)
+{
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = CLONE_NEWUSER,
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ __u64 ns_ids_before[256], ns_ids_after[256];
+ ssize_t ret_before, ret_after;
+ int i;
+
+ /* Get baseline count of active user namespaces */
+ ret_before = sys_listns(&req, ns_ids_before, ARRAY_SIZE(ns_ids_before), 0);
+ if (ret_before < 0) {
+ if (errno == ENOSYS)
+ SKIP(return, "listns() not supported");
+ ASSERT_GE(ret_before, 0);
+ }
+
+ TH_LOG("Baseline: %zd active user namespaces", ret_before);
+
+ /* Rapidly create and destroy 100 user namespaces */
+ for (i = 0; i < 100; i++) {
+ pid_t pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child: create user namespace and immediately exit */
+ if (setup_userns() < 0)
+ exit(1);
+ exit(0);
+ }
+
+ /* Parent: wait for child */
+ int status;
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+ }
+
+ /* Verify we're back to baseline (no leaked namespaces) */
+ ret_after = sys_listns(&req, ns_ids_after, ARRAY_SIZE(ns_ids_after), 0);
+ ASSERT_GE(ret_after, 0);
+
+ TH_LOG("After 100 rapid create/destroy cycles: %zd active user namespaces", ret_after);
+ ASSERT_EQ(ret_before, ret_after);
+}
+
+/*
+ * Test creating many concurrent namespaces.
+ * Verify that listns() correctly tracks all of them and that they all
+ * become inactive after processes exit.
+ */
+TEST(many_concurrent_namespaces)
+{
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = CLONE_NEWUSER,
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ __u64 ns_ids_before[512], ns_ids_during[512], ns_ids_after[512];
+ ssize_t ret_before, ret_during, ret_after;
+ pid_t pids[50];
+ int num_children = 50;
+ int i;
+ int sv[2];
+
+ /* Get baseline */
+ ret_before = sys_listns(&req, ns_ids_before, ARRAY_SIZE(ns_ids_before), 0);
+ if (ret_before < 0) {
+ if (errno == ENOSYS)
+ SKIP(return, "listns() not supported");
+ ASSERT_GE(ret_before, 0);
+ }
+
+ TH_LOG("Baseline: %zd active user namespaces", ret_before);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv), 0);
+
+ /* Create many children, each with their own user namespace */
+ for (i = 0; i < num_children; i++) {
+ pids[i] = fork();
+ ASSERT_GE(pids[i], 0);
+
+ if (pids[i] == 0) {
+ /* Child: create user namespace and wait for parent signal */
+ char c;
+
+ close(sv[0]);
+
+ if (setup_userns() < 0) {
+ close(sv[1]);
+ exit(1);
+ }
+
+ /* Signal parent we're ready */
+ if (write(sv[1], &c, 1) != 1) {
+ close(sv[1]);
+ exit(1);
+ }
+
+ /* Wait for parent signal to exit */
+ if (read(sv[1], &c, 1) != 1) {
+ close(sv[1]);
+ exit(1);
+ }
+
+ close(sv[1]);
+ exit(0);
+ }
+ }
+
+ close(sv[1]);
+
+ /* Wait for all children to signal ready */
+ for (i = 0; i < num_children; i++) {
+ char c;
+ if (read(sv[0], &c, 1) != 1) {
+ /* If we fail to read, kill all children and exit */
+ close(sv[0]);
+ for (int j = 0; j < num_children; j++)
+ kill(pids[j], SIGKILL);
+ for (int j = 0; j < num_children; j++)
+ waitpid(pids[j], NULL, 0);
+ ASSERT_TRUE(false);
+ }
+ }
+
+ /* List namespaces while all children are running */
+ ret_during = sys_listns(&req, ns_ids_during, ARRAY_SIZE(ns_ids_during), 0);
+ ASSERT_GE(ret_during, 0);
+
+ TH_LOG("With %d children running: %zd active user namespaces", num_children, ret_during);
+
+ /* Should have at least num_children more namespaces than baseline */
+ ASSERT_GE(ret_during, ret_before + num_children);
+
+ /* Signal all children to exit */
+ for (i = 0; i < num_children; i++) {
+ char c = 'X';
+ if (write(sv[0], &c, 1) != 1) {
+ /* If we fail to write, kill remaining children */
+ close(sv[0]);
+ for (int j = i; j < num_children; j++)
+ kill(pids[j], SIGKILL);
+ for (int j = 0; j < num_children; j++)
+ waitpid(pids[j], NULL, 0);
+ ASSERT_TRUE(false);
+ }
+ }
+
+ close(sv[0]);
+
+ /* Wait for all children */
+ for (i = 0; i < num_children; i++) {
+ int status;
+ waitpid(pids[i], &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ }
+
+ /* Verify we're back to baseline */
+ ret_after = sys_listns(&req, ns_ids_after, ARRAY_SIZE(ns_ids_after), 0);
+ ASSERT_GE(ret_after, 0);
+
+ TH_LOG("After all children exit: %zd active user namespaces", ret_after);
+ ASSERT_EQ(ret_before, ret_after);
+}
+
+/*
+ * Test rapid namespace creation with different namespace types.
+ * Create multiple types of namespaces rapidly to stress the tracking system.
+ */
+TEST(rapid_mixed_namespace_creation)
+{
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = 0, /* All types */
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ __u64 ns_ids_before[512], ns_ids_after[512];
+ ssize_t ret_before, ret_after;
+ int i;
+
+ /* Get baseline count */
+ ret_before = sys_listns(&req, ns_ids_before, ARRAY_SIZE(ns_ids_before), 0);
+ if (ret_before < 0) {
+ if (errno == ENOSYS)
+ SKIP(return, "listns() not supported");
+ ASSERT_GE(ret_before, 0);
+ }
+
+ TH_LOG("Baseline: %zd active namespaces (all types)", ret_before);
+
+ /* Rapidly create and destroy namespaces with multiple types */
+ for (i = 0; i < 50; i++) {
+ pid_t pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child: create multiple namespace types */
+ if (setup_userns() < 0)
+ exit(1);
+
+ /* Create additional namespace types */
+ if (unshare(CLONE_NEWNET) < 0)
+ exit(1);
+ if (unshare(CLONE_NEWUTS) < 0)
+ exit(1);
+ if (unshare(CLONE_NEWIPC) < 0)
+ exit(1);
+
+ exit(0);
+ }
+
+ /* Parent: wait for child */
+ int status;
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ }
+
+ /* Verify we're back to baseline */
+ ret_after = sys_listns(&req, ns_ids_after, ARRAY_SIZE(ns_ids_after), 0);
+ ASSERT_GE(ret_after, 0);
+
+ TH_LOG("After 50 rapid mixed namespace cycles: %zd active namespaces", ret_after);
+ ASSERT_EQ(ret_before, ret_after);
+}
+
+/*
+ * Test nested namespace creation under stress.
+ * Create deeply nested namespace hierarchies and verify proper cleanup.
+ */
+TEST(nested_namespace_stress)
+{
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = CLONE_NEWUSER,
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ __u64 ns_ids_before[512], ns_ids_after[512];
+ ssize_t ret_before, ret_after;
+ int i;
+
+ /* Get baseline */
+ ret_before = sys_listns(&req, ns_ids_before, ARRAY_SIZE(ns_ids_before), 0);
+ if (ret_before < 0) {
+ if (errno == ENOSYS)
+ SKIP(return, "listns() not supported");
+ ASSERT_GE(ret_before, 0);
+ }
+
+ TH_LOG("Baseline: %zd active user namespaces", ret_before);
+
+ /* Create 20 processes, each with nested user namespaces */
+ for (i = 0; i < 20; i++) {
+ pid_t pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ int userns_fd;
+ uid_t orig_uid = getuid();
+ int depth;
+
+ /* Create nested user namespaces (up to 5 levels) */
+ for (depth = 0; depth < 5; depth++) {
+ userns_fd = get_userns_fd(0, (depth == 0) ? orig_uid : 0, 1);
+ if (userns_fd < 0)
+ exit(1);
+
+ if (setns(userns_fd, CLONE_NEWUSER) < 0) {
+ close(userns_fd);
+ exit(1);
+ }
+ close(userns_fd);
+ }
+
+ exit(0);
+ }
+
+ /* Parent: wait for child */
+ int status;
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ }
+
+ /* Verify we're back to baseline */
+ ret_after = sys_listns(&req, ns_ids_after, ARRAY_SIZE(ns_ids_after), 0);
+ ASSERT_GE(ret_after, 0);
+
+ TH_LOG("After 20 nested namespace hierarchies: %zd active user namespaces", ret_after);
+ ASSERT_EQ(ret_before, ret_after);
+}
+
+/*
+ * Test listns() pagination under stress.
+ * Create many namespaces and verify pagination works correctly.
+ */
+TEST(listns_pagination_stress)
+{
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = CLONE_NEWUSER,
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ pid_t pids[30];
+ int num_children = 30;
+ int i;
+ int sv[2];
+ __u64 all_ns_ids[512];
+ int total_found = 0;
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv), 0);
+
+ /* Create many children with user namespaces */
+ for (i = 0; i < num_children; i++) {
+ pids[i] = fork();
+ ASSERT_GE(pids[i], 0);
+
+ if (pids[i] == 0) {
+ char c;
+ close(sv[0]);
+
+ if (setup_userns() < 0) {
+ close(sv[1]);
+ exit(1);
+ }
+
+ /* Signal parent we're ready */
+ if (write(sv[1], &c, 1) != 1) {
+ close(sv[1]);
+ exit(1);
+ }
+
+ /* Wait for parent signal to exit */
+ if (read(sv[1], &c, 1) != 1) {
+ close(sv[1]);
+ exit(1);
+ }
+
+ close(sv[1]);
+ exit(0);
+ }
+ }
+
+ close(sv[1]);
+
+ /* Wait for all children to signal ready */
+ for (i = 0; i < num_children; i++) {
+ char c;
+ if (read(sv[0], &c, 1) != 1) {
+ /* If we fail to read, kill all children and exit */
+ close(sv[0]);
+ for (int j = 0; j < num_children; j++)
+ kill(pids[j], SIGKILL);
+ for (int j = 0; j < num_children; j++)
+ waitpid(pids[j], NULL, 0);
+ ASSERT_TRUE(false);
+ }
+ }
+
+ /* Paginate through all namespaces using small batch sizes */
+ req.ns_id = 0;
+ while (1) {
+ __u64 batch[5]; /* Small batch size to force pagination */
+ ssize_t ret;
+
+ ret = sys_listns(&req, batch, ARRAY_SIZE(batch), 0);
+ if (ret < 0) {
+ if (errno == ENOSYS) {
+ close(sv[0]);
+ for (i = 0; i < num_children; i++)
+ kill(pids[i], SIGKILL);
+ for (i = 0; i < num_children; i++)
+ waitpid(pids[i], NULL, 0);
+ SKIP(return, "listns() not supported");
+ }
+ ASSERT_GE(ret, 0);
+ }
+
+ if (ret == 0)
+ break;
+
+ /* Store results */
+ for (i = 0; i < ret && total_found < 512; i++) {
+ all_ns_ids[total_found++] = batch[i];
+ }
+
+ /* Update cursor for next batch */
+ if (ret == ARRAY_SIZE(batch))
+ req.ns_id = batch[ret - 1];
+ else
+ break;
+ }
+
+ TH_LOG("Paginated through %d user namespaces", total_found);
+
+ /* Verify no duplicates in pagination */
+ for (i = 0; i < total_found; i++) {
+ for (int j = i + 1; j < total_found; j++) {
+ if (all_ns_ids[i] == all_ns_ids[j]) {
+ TH_LOG("Found duplicate ns_id: %llu at positions %d and %d",
+ (unsigned long long)all_ns_ids[i], i, j);
+ ASSERT_TRUE(false);
+ }
+ }
+ }
+
+ /* Signal all children to exit */
+ for (i = 0; i < num_children; i++) {
+ char c = 'X';
+ if (write(sv[0], &c, 1) != 1) {
+ close(sv[0]);
+ for (int j = i; j < num_children; j++)
+ kill(pids[j], SIGKILL);
+ for (int j = 0; j < num_children; j++)
+ waitpid(pids[j], NULL, 0);
+ ASSERT_TRUE(false);
+ }
+ }
+
+ close(sv[0]);
+
+ /* Wait for all children */
+ for (i = 0; i < num_children; i++) {
+ int status;
+ waitpid(pids[i], &status, 0);
+ }
+}
+
+/*
+ * Test concurrent namespace operations.
+ * Multiple processes creating, querying, and destroying namespaces concurrently.
+ */
+TEST(concurrent_namespace_operations)
+{
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = 0,
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ __u64 ns_ids_before[512], ns_ids_after[512];
+ ssize_t ret_before, ret_after;
+ pid_t pids[20];
+ int num_workers = 20;
+ int i;
+
+ /* Get baseline */
+ ret_before = sys_listns(&req, ns_ids_before, ARRAY_SIZE(ns_ids_before), 0);
+ if (ret_before < 0) {
+ if (errno == ENOSYS)
+ SKIP(return, "listns() not supported");
+ ASSERT_GE(ret_before, 0);
+ }
+
+ TH_LOG("Baseline: %zd active namespaces", ret_before);
+
+ /* Create worker processes that do concurrent operations */
+ for (i = 0; i < num_workers; i++) {
+ pids[i] = fork();
+ ASSERT_GE(pids[i], 0);
+
+ if (pids[i] == 0) {
+ /* Each worker: create namespaces, list them, repeat */
+ int iterations;
+
+ for (iterations = 0; iterations < 10; iterations++) {
+ int userns_fd;
+ __u64 temp_ns_ids[100];
+ ssize_t ret;
+
+ /* Create a user namespace */
+ userns_fd = get_userns_fd(0, getuid(), 1);
+ if (userns_fd < 0)
+ continue;
+
+ /* List namespaces */
+ ret = sys_listns(&req, temp_ns_ids, ARRAY_SIZE(temp_ns_ids), 0);
+ (void)ret;
+
+ close(userns_fd);
+
+ /* Small delay */
+ usleep(1000);
+ }
+
+ exit(0);
+ }
+ }
+
+ /* Wait for all workers */
+ for (i = 0; i < num_workers; i++) {
+ int status;
+ waitpid(pids[i], &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+ }
+
+ /* Verify we're back to baseline */
+ ret_after = sys_listns(&req, ns_ids_after, ARRAY_SIZE(ns_ids_after), 0);
+ ASSERT_GE(ret_after, 0);
+
+ TH_LOG("After concurrent operations: %zd active namespaces", ret_after);
+ ASSERT_EQ(ret_before, ret_after);
+}
+
+/*
+ * Test namespace churn - continuous creation and destruction.
+ * Simulates high-churn scenarios like container orchestration.
+ */
+TEST(namespace_churn)
+{
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = CLONE_NEWUSER | CLONE_NEWNET | CLONE_NEWUTS,
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ __u64 ns_ids_before[512], ns_ids_after[512];
+ ssize_t ret_before, ret_after;
+ int cycle;
+
+ /* Get baseline */
+ ret_before = sys_listns(&req, ns_ids_before, ARRAY_SIZE(ns_ids_before), 0);
+ if (ret_before < 0) {
+ if (errno == ENOSYS)
+ SKIP(return, "listns() not supported");
+ ASSERT_GE(ret_before, 0);
+ }
+
+ TH_LOG("Baseline: %zd active namespaces", ret_before);
+
+ /* Simulate churn: batches of namespaces created and destroyed */
+ for (cycle = 0; cycle < 10; cycle++) {
+ pid_t batch_pids[10];
+ int i;
+
+ /* Create batch */
+ for (i = 0; i < 10; i++) {
+ batch_pids[i] = fork();
+ ASSERT_GE(batch_pids[i], 0);
+
+ if (batch_pids[i] == 0) {
+ /* Create multiple namespace types */
+ if (setup_userns() < 0)
+ exit(1);
+ if (unshare(CLONE_NEWNET) < 0)
+ exit(1);
+ if (unshare(CLONE_NEWUTS) < 0)
+ exit(1);
+
+ /* Keep namespaces alive briefly */
+ usleep(10000);
+ exit(0);
+ }
+ }
+
+ /* Wait for batch to complete */
+ for (i = 0; i < 10; i++) {
+ int status;
+ waitpid(batch_pids[i], &status, 0);
+ }
+ }
+
+ /* Verify we're back to baseline */
+ ret_after = sys_listns(&req, ns_ids_after, ARRAY_SIZE(ns_ids_after), 0);
+ ASSERT_GE(ret_after, 0);
+
+ TH_LOG("After 10 churn cycles (100 namespace sets): %zd active namespaces", ret_after);
+ ASSERT_EQ(ret_before, ret_after);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/namespaces/wrappers.h b/tools/testing/selftests/namespaces/wrappers.h
new file mode 100644
index 000000000000..9741a64a5b1d
--- /dev/null
+++ b/tools/testing/selftests/namespaces/wrappers.h
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/nsfs.h>
+#include <linux/types.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#ifndef __SELFTESTS_NAMESPACES_WRAPPERS_H__
+#define __SELFTESTS_NAMESPACES_WRAPPERS_H__
+
+#ifndef __NR_listns
+ #if defined __alpha__
+ #define __NR_listns 580
+ #elif defined _MIPS_SIM
+ #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
+ #define __NR_listns 4470
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
+ #define __NR_listns 6470
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
+ #define __NR_listns 5470
+ #endif
+ #else
+ #define __NR_listns 470
+ #endif
+#endif
+
+static inline int sys_listns(const struct ns_id_req *req, __u64 *ns_ids,
+ size_t nr_ns_ids, unsigned int flags)
+{
+ return syscall(__NR_listns, req, ns_ids, nr_ns_ids, flags);
+}
+
+#endif /* __SELFTESTS_NAMESPACES_WRAPPERS_H__ */
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 439101b518ee..8f9850a71f54 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -45,6 +45,7 @@ skf_net_off
socket
so_incoming_cpu
so_netns_cookie
+so_peek_off
so_txtime
so_rcv_listener
stress_reuseport_listen
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 5d9d96515c4a..b5127e968108 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -1,128 +1,201 @@
# SPDX-License-Identifier: GPL-2.0
# Makefile for net selftests
-CFLAGS += -Wall -Wl,--no-as-needed -O2 -g
+CFLAGS += -Wall -Wl,--no-as-needed -O2 -g
CFLAGS += -I../../../../usr/include/ $(KHDR_INCLUDES)
# Additional include paths needed by kselftest.h
CFLAGS += -I../
-TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh \
- rtnetlink.sh xfrm_policy.sh
-TEST_PROGS += fcnal-ipv4.sh fcnal-ipv6.sh fcnal-other.sh
-TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh
-TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh
-TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh
-TEST_PROGS += test_vxlan_fdb_changelink.sh so_txtime.sh ipv6_flowlabel.sh
-TEST_PROGS += tcp_fastopen_backup_key.sh l2tp.sh traceroute.sh
-TEST_PROGS += fin_ack_lat.sh fib_nexthop_multiprefix.sh fib_nexthops.sh fib_nexthop_nongw.sh
-TEST_PROGS += altnames.sh icmp.sh icmp_redirect.sh ip6_gre_headroom.sh
-TEST_PROGS += route_localnet.sh
-TEST_PROGS += reuseaddr_ports_exhausted.sh
-TEST_PROGS += txtimestamp.sh
-TEST_PROGS += vrf-xfrm-tests.sh
-TEST_PROGS += rxtimestamp.sh
-TEST_PROGS += drop_monitor_tests.sh
-TEST_PROGS += vrf_route_leaking.sh
-TEST_PROGS += bareudp.sh
-TEST_PROGS += amt.sh
-TEST_PROGS += unicast_extensions.sh
-TEST_PROGS += udpgro_fwd.sh
-TEST_PROGS += udpgro_frglist.sh
-TEST_PROGS += nat6to4.sh
-TEST_PROGS += veth.sh
-TEST_PROGS += ioam6.sh
-TEST_PROGS += gro.sh
-TEST_PROGS += gre_gso.sh
-TEST_PROGS += gre_ipv6_lladdr.sh
-TEST_PROGS += cmsg_so_mark.sh
-TEST_PROGS += cmsg_so_priority.sh
-TEST_PROGS += test_so_rcv.sh
-TEST_PROGS += cmsg_time.sh cmsg_ip.sh
-TEST_PROGS += netns-name.sh
-TEST_PROGS += link_netns.py
-TEST_PROGS += nl_netdev.py
-TEST_PROGS += rtnetlink.py
-TEST_PROGS += rtnetlink_notification.sh
-TEST_PROGS += srv6_end_dt46_l3vpn_test.sh
-TEST_PROGS += srv6_end_dt4_l3vpn_test.sh
-TEST_PROGS += srv6_end_dt6_l3vpn_test.sh
-TEST_PROGS += srv6_hencap_red_l3vpn_test.sh
-TEST_PROGS += srv6_hl2encap_red_l2vpn_test.sh
-TEST_PROGS += srv6_end_next_csid_l3vpn_test.sh
-TEST_PROGS += srv6_end_x_next_csid_l3vpn_test.sh
-TEST_PROGS += srv6_end_flavors_test.sh
-TEST_PROGS += srv6_end_dx4_netfilter_test.sh
-TEST_PROGS += srv6_end_dx6_netfilter_test.sh
-TEST_PROGS += vrf_strict_mode_test.sh
-TEST_PROGS += arp_ndisc_evict_nocarrier.sh
-TEST_PROGS += ndisc_unsolicited_na_test.sh
-TEST_PROGS += arp_ndisc_untracked_subnets.sh
-TEST_PROGS += stress_reuseport_listen.sh
-TEST_PROGS += l2_tos_ttl_inherit.sh
-TEST_PROGS += bind_bhash.sh
-TEST_PROGS += ip_local_port_range.sh
-TEST_PROGS += rps_default_mask.sh
-TEST_PROGS += big_tcp.sh
-TEST_PROGS += netns-sysctl.sh
-TEST_PROGS += netdev-l2addr.sh
-TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh xfrm_policy_add_speed.sh
-TEST_GEN_FILES = socket nettest
-TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
-TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite
-TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag
-TEST_GEN_FILES += so_txtime ipv6_flowlabel ipv6_flowlabel_mgr so_netns_cookie
-TEST_GEN_FILES += tcp_fastopen_backup_key
-TEST_GEN_FILES += fin_ack_lat
-TEST_GEN_FILES += reuseaddr_ports_exhausted
-TEST_GEN_FILES += hwtstamp_config rxtimestamp timestamping txtimestamp
-TEST_GEN_FILES += ipsec
-TEST_GEN_FILES += ioam6_parser
-TEST_GEN_FILES += gro
-TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
-TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls tun tap epoll_busy_poll
-TEST_GEN_FILES += toeplitz
-TEST_GEN_FILES += cmsg_sender
-TEST_GEN_FILES += stress_reuseport_listen
-TEST_GEN_FILES += so_rcv_listener
-TEST_PROGS += test_vxlan_vnifiltering.sh
-TEST_GEN_FILES += io_uring_zerocopy_tx
-TEST_PROGS += io_uring_zerocopy_tx.sh
-TEST_GEN_FILES += bind_bhash
-TEST_GEN_PROGS += sk_bind_sendto_listen
-TEST_GEN_PROGS += sk_connect_zero_addr
-TEST_GEN_PROGS += sk_so_peek_off
-TEST_PROGS += test_ingress_egress_chaining.sh
-TEST_GEN_PROGS += so_incoming_cpu
-TEST_PROGS += sctp_vrf.sh
-TEST_GEN_FILES += sctp_hello
-TEST_GEN_FILES += ip_local_port_range
-TEST_GEN_PROGS += bind_wildcard
-TEST_GEN_PROGS += bind_timewait
-TEST_PROGS += test_vxlan_mdb.sh
-TEST_PROGS += test_bridge_neigh_suppress.sh
-TEST_PROGS += test_vxlan_nh.sh
-TEST_PROGS += test_vxlan_nolocalbypass.sh
-TEST_PROGS += test_bridge_backup_port.sh
-TEST_PROGS += test_neigh.sh
-TEST_PROGS += fdb_flush.sh fdb_notify.sh
-TEST_PROGS += fq_band_pktlimit.sh
-TEST_PROGS += vlan_hw_filter.sh
-TEST_PROGS += vlan_bridge_binding.sh
-TEST_PROGS += bpf_offload.py
-TEST_PROGS += ipv6_route_update_soft_lockup.sh
-TEST_PROGS += busy_poll_test.sh
-TEST_GEN_PROGS += proc_net_pktgen
-TEST_PROGS += lwt_dst_cache_ref_loop.sh
-TEST_PROGS += skf_net_off.sh
-TEST_GEN_FILES += skf_net_off
-TEST_GEN_FILES += tfo
-TEST_PROGS += tfo_passive.sh
-TEST_PROGS += broadcast_ether_dst.sh
-TEST_PROGS += broadcast_pmtu.sh
-TEST_PROGS += ipv6_force_forwarding.sh
-TEST_GEN_PROGS += ipv6_fragmentation
-TEST_PROGS += route_hint.sh
-TEST_GEN_PROGS += tcp_port_share
+TEST_PROGS := \
+ altnames.sh \
+ amt.sh \
+ arp_ndisc_evict_nocarrier.sh \
+ arp_ndisc_untracked_subnets.sh \
+ bareudp.sh \
+ big_tcp.sh \
+ bind_bhash.sh \
+ bpf_offload.py \
+ broadcast_ether_dst.sh \
+ broadcast_pmtu.sh \
+ busy_poll_test.sh \
+ cmsg_ip.sh \
+ cmsg_so_mark.sh \
+ cmsg_so_priority.sh \
+ cmsg_time.sh \
+ drop_monitor_tests.sh \
+ fcnal-ipv4.sh \
+ fcnal-ipv6.sh \
+ fcnal-other.sh \
+ fdb_flush.sh \
+ fdb_notify.sh \
+ fib-onlink-tests.sh \
+ fib_nexthop_multiprefix.sh \
+ fib_nexthop_nongw.sh \
+ fib_nexthops.sh \
+ fib_rule_tests.sh \
+ fib_tests.sh \
+ fin_ack_lat.sh \
+ fq_band_pktlimit.sh \
+ gre_gso.sh \
+ gre_ipv6_lladdr.sh \
+ gro.sh \
+ icmp.sh \
+ icmp_redirect.sh \
+ io_uring_zerocopy_tx.sh \
+ ioam6.sh \
+ ip6_gre_headroom.sh \
+ ip_defrag.sh \
+ ip_local_port_range.sh \
+ ipv6_flowlabel.sh \
+ ipv6_force_forwarding.sh \
+ ipv6_route_update_soft_lockup.sh \
+ l2_tos_ttl_inherit.sh \
+ l2tp.sh \
+ link_netns.py \
+ lwt_dst_cache_ref_loop.sh \
+ msg_zerocopy.sh \
+ nat6to4.sh \
+ ndisc_unsolicited_na_test.sh \
+ netdev-l2addr.sh \
+ netdevice.sh \
+ netns-name.sh \
+ netns-sysctl.sh \
+ nl_netdev.py \
+ pmtu.sh \
+ psock_snd.sh \
+ reuseaddr_ports_exhausted.sh \
+ reuseport_addr_any.sh \
+ route_hint.sh \
+ route_localnet.sh \
+ rps_default_mask.sh \
+ rtnetlink.py \
+ rtnetlink.sh \
+ rtnetlink_notification.sh \
+ run_afpackettests \
+ run_netsocktests \
+ rxtimestamp.sh \
+ sctp_vrf.sh \
+ skf_net_off.sh \
+ so_txtime.sh \
+ srv6_end_dt46_l3vpn_test.sh \
+ srv6_end_dt4_l3vpn_test.sh \
+ srv6_end_dt6_l3vpn_test.sh \
+ srv6_end_dx4_netfilter_test.sh \
+ srv6_end_dx6_netfilter_test.sh \
+ srv6_end_flavors_test.sh \
+ srv6_end_next_csid_l3vpn_test.sh \
+ srv6_end_x_next_csid_l3vpn_test.sh \
+ srv6_hencap_red_l3vpn_test.sh \
+ srv6_hl2encap_red_l2vpn_test.sh \
+ stress_reuseport_listen.sh \
+ tcp_fastopen_backup_key.sh \
+ test_bpf.sh \
+ test_bridge_backup_port.sh \
+ test_bridge_neigh_suppress.sh \
+ test_ingress_egress_chaining.sh \
+ test_neigh.sh \
+ test_so_rcv.sh \
+ test_vxlan_fdb_changelink.sh \
+ test_vxlan_mdb.sh \
+ test_vxlan_nh.sh \
+ test_vxlan_nolocalbypass.sh \
+ test_vxlan_under_vrf.sh \
+ test_vxlan_vnifiltering.sh \
+ tfo_passive.sh \
+ traceroute.sh \
+ txtimestamp.sh \
+ udpgro.sh \
+ udpgro_bench.sh \
+ udpgro_frglist.sh \
+ udpgro_fwd.sh \
+ udpgso.sh \
+ udpgso_bench.sh \
+ unicast_extensions.sh \
+ veth.sh \
+ vlan_bridge_binding.sh \
+ vlan_hw_filter.sh \
+ vrf-xfrm-tests.sh \
+ vrf_route_leaking.sh \
+ vrf_strict_mode_test.sh \
+ xfrm_policy.sh \
+# end of TEST_PROGS
+
+TEST_PROGS_EXTENDED := \
+ toeplitz.sh \
+ toeplitz_client.sh \
+ xfrm_policy_add_speed.sh \
+# end of TEST_PROGS_EXTENDED
+
+TEST_GEN_FILES := \
+ bind_bhash \
+ cmsg_sender \
+ fin_ack_lat \
+ gro \
+ hwtstamp_config \
+ io_uring_zerocopy_tx \
+ ioam6_parser \
+ ip_defrag \
+ ip_local_port_range \
+ ipsec \
+ ipv6_flowlabel \
+ ipv6_flowlabel_mgr \
+ msg_zerocopy \
+ nettest \
+ psock_fanout \
+ psock_snd \
+ psock_tpacket \
+ reuseaddr_ports_exhausted \
+ reuseport_addr_any \
+ rxtimestamp \
+ sctp_hello \
+ skf_net_off \
+ so_netns_cookie \
+ so_rcv_listener \
+ so_txtime \
+ socket \
+ stress_reuseport_listen \
+ tcp_fastopen_backup_key \
+ tcp_inq \
+ tcp_mmap \
+ tfo \
+ timestamping \
+ toeplitz \
+ txring_overwrite \
+ txtimestamp \
+ udpgso \
+ udpgso_bench_rx \
+ udpgso_bench_tx \
+# end of TEST_GEN_FILES
+
+TEST_GEN_PROGS := \
+ bind_timewait \
+ bind_wildcard \
+ epoll_busy_poll \
+ ipv6_fragmentation \
+ proc_net_pktgen \
+ reuseaddr_conflict \
+ reuseport_bpf \
+ reuseport_bpf_cpu \
+ reuseport_bpf_numa \
+ reuseport_dualstack \
+ sk_bind_sendto_listen \
+ sk_connect_zero_addr \
+ sk_so_peek_off \
+ so_incoming_cpu \
+ tap \
+ tcp_port_share \
+ tls \
+ tun \
+# end of TEST_GEN_PROGS
+
+TEST_FILES := \
+ fcnal-test.sh \
+ in_netns.sh \
+ lib.sh \
+ settings \
+ setup_loopback.sh \
+ setup_veth.sh \
+# end of TEST_FILES
# YNL files, must be before "include ..lib.mk"
YNL_GEN_FILES := busy_poller
@@ -130,10 +203,6 @@ YNL_GEN_PROGS := netlink-dumps
TEST_GEN_FILES += $(YNL_GEN_FILES)
TEST_GEN_PROGS += $(YNL_GEN_PROGS)
-TEST_FILES := settings
-TEST_FILES += fcnal-test.sh
-TEST_FILES += in_netns.sh lib.sh setup_loopback.sh setup_veth.sh
-
TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c))
TEST_INCLUDES := forwarding/lib.sh
diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile
index 0a20c98bbcfd..528d14c598bb 100644
--- a/tools/testing/selftests/net/af_unix/Makefile
+++ b/tools/testing/selftests/net/af_unix/Makefile
@@ -1,4 +1,13 @@
CFLAGS += $(KHDR_INCLUDES) -Wall -Wflex-array-member-not-at-end
-TEST_GEN_PROGS := diag_uid msg_oob scm_inq scm_pidfd scm_rights unix_connect
+
+TEST_GEN_PROGS := \
+ diag_uid \
+ msg_oob \
+ scm_inq \
+ scm_pidfd \
+ scm_rights \
+ so_peek_off \
+ unix_connect \
+# end of TEST_GEN_PROGS
include ../../lib.mk
diff --git a/tools/testing/selftests/net/af_unix/config b/tools/testing/selftests/net/af_unix/config
index 37368567768c..b5429c15a53c 100644
--- a/tools/testing/selftests/net/af_unix/config
+++ b/tools/testing/selftests/net/af_unix/config
@@ -1,3 +1,3 @@
-CONFIG_UNIX=y
CONFIG_AF_UNIX_OOB=y
+CONFIG_UNIX=y
CONFIG_UNIX_DIAG=m
diff --git a/tools/testing/selftests/net/af_unix/so_peek_off.c b/tools/testing/selftests/net/af_unix/so_peek_off.c
new file mode 100644
index 000000000000..1a77728128e5
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/so_peek_off.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2025 Google LLC */
+
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <sys/socket.h>
+
+#include "../../kselftest_harness.h"
+
+FIXTURE(so_peek_off)
+{
+ int fd[2]; /* 0: sender, 1: receiver */
+};
+
+FIXTURE_VARIANT(so_peek_off)
+{
+ int type;
+};
+
+FIXTURE_VARIANT_ADD(so_peek_off, stream)
+{
+ .type = SOCK_STREAM,
+};
+
+FIXTURE_VARIANT_ADD(so_peek_off, dgram)
+{
+ .type = SOCK_DGRAM,
+};
+
+FIXTURE_VARIANT_ADD(so_peek_off, seqpacket)
+{
+ .type = SOCK_SEQPACKET,
+};
+
+FIXTURE_SETUP(so_peek_off)
+{
+ struct timeval timeout = {
+ .tv_sec = 0,
+ .tv_usec = 3000,
+ };
+ int ret;
+
+ ret = socketpair(AF_UNIX, variant->type, 0, self->fd);
+ ASSERT_EQ(0, ret);
+
+ ret = setsockopt(self->fd[1], SOL_SOCKET, SO_RCVTIMEO_NEW,
+ &timeout, sizeof(timeout));
+ ASSERT_EQ(0, ret);
+
+ ret = setsockopt(self->fd[1], SOL_SOCKET, SO_PEEK_OFF,
+ &(int){0}, sizeof(int));
+ ASSERT_EQ(0, ret);
+}
+
+FIXTURE_TEARDOWN(so_peek_off)
+{
+ close_range(self->fd[0], self->fd[1], 0);
+}
+
+#define sendeq(fd, str, flags) \
+ do { \
+ int bytes, len = strlen(str); \
+ \
+ bytes = send(fd, str, len, flags); \
+ ASSERT_EQ(len, bytes); \
+ } while (0)
+
+#define recveq(fd, str, buflen, flags) \
+ do { \
+ char buf[(buflen) + 1] = {}; \
+ int bytes; \
+ \
+ bytes = recv(fd, buf, buflen, flags); \
+ ASSERT_NE(-1, bytes); \
+ ASSERT_STREQ(str, buf); \
+ } while (0)
+
+#define async \
+ for (pid_t pid = (pid = fork(), \
+ pid < 0 ? \
+ __TH_LOG("Failed to start async {}"), \
+ _metadata->exit_code = KSFT_FAIL, \
+ __bail(1, _metadata), \
+ 0xdead : \
+ pid); \
+ !pid; exit(0))
+
+TEST_F(so_peek_off, single_chunk)
+{
+ sendeq(self->fd[0], "aaaabbbb", 0);
+
+ recveq(self->fd[1], "aaaa", 4, MSG_PEEK);
+ recveq(self->fd[1], "bbbb", 100, MSG_PEEK);
+}
+
+TEST_F(so_peek_off, two_chunks)
+{
+ sendeq(self->fd[0], "aaaa", 0);
+ sendeq(self->fd[0], "bbbb", 0);
+
+ recveq(self->fd[1], "aaaa", 4, MSG_PEEK);
+ recveq(self->fd[1], "bbbb", 100, MSG_PEEK);
+}
+
+TEST_F(so_peek_off, two_chunks_blocking)
+{
+ async {
+ usleep(1000);
+ sendeq(self->fd[0], "aaaa", 0);
+ }
+
+ recveq(self->fd[1], "aaaa", 4, MSG_PEEK);
+
+ async {
+ usleep(1000);
+ sendeq(self->fd[0], "bbbb", 0);
+ }
+
+ /* goto again; -> goto redo; in unix_stream_read_generic(). */
+ recveq(self->fd[1], "bbbb", 100, MSG_PEEK);
+}
+
+TEST_F(so_peek_off, two_chunks_overlap)
+{
+ sendeq(self->fd[0], "aaaa", 0);
+ recveq(self->fd[1], "aa", 2, MSG_PEEK);
+
+ sendeq(self->fd[0], "bbbb", 0);
+
+ if (variant->type == SOCK_STREAM) {
+ /* SOCK_STREAM tries to fill the buffer. */
+ recveq(self->fd[1], "aabb", 4, MSG_PEEK);
+ recveq(self->fd[1], "bb", 100, MSG_PEEK);
+ } else {
+ /* SOCK_DGRAM and SOCK_SEQPACKET returns at the skb boundary. */
+ recveq(self->fd[1], "aa", 100, MSG_PEEK);
+ recveq(self->fd[1], "bbbb", 100, MSG_PEEK);
+ }
+}
+
+TEST_F(so_peek_off, two_chunks_overlap_blocking)
+{
+ async {
+ usleep(1000);
+ sendeq(self->fd[0], "aaaa", 0);
+ }
+
+ recveq(self->fd[1], "aa", 2, MSG_PEEK);
+
+ async {
+ usleep(1000);
+ sendeq(self->fd[0], "bbbb", 0);
+ }
+
+ /* Even SOCK_STREAM does not wait if at least one byte is read. */
+ recveq(self->fd[1], "aa", 100, MSG_PEEK);
+
+ recveq(self->fd[1], "bbbb", 100, MSG_PEEK);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/bareudp.sh b/tools/testing/selftests/net/bareudp.sh
index 4046131e7888..d9e5b967f815 100755
--- a/tools/testing/selftests/net/bareudp.sh
+++ b/tools/testing/selftests/net/bareudp.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
# Test various bareudp tunnel configurations.
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index d548611e2698..1e1f253118f5 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -1,130 +1,130 @@
-CONFIG_USER_NS=y
-CONFIG_NET_NS=y
+CONFIG_AMT=m
+CONFIG_BAREUDP=m
CONFIG_BONDING=m
CONFIG_BPF_SYSCALL=y
-CONFIG_TEST_BPF=m
-CONFIG_NUMA=y
-CONFIG_RPS=y
-CONFIG_SYSFS=y
-CONFIG_PROC_SYSCTL=y
-CONFIG_NET_VRF=y
-CONFIG_NET_L3_MASTER_DEV=y
-CONFIG_IPV6=y
-CONFIG_IPV6_MULTIPLE_TABLES=y
-CONFIG_VETH=y
-CONFIG_NET_IPVTI=y
-CONFIG_IPV6_VTI=y
-CONFIG_DUMMY=y
-CONFIG_BRIDGE_VLAN_FILTERING=y
CONFIG_BRIDGE=y
+CONFIG_BRIDGE_VLAN_FILTERING=y
+CONFIG_CAN=m
+CONFIG_CAN_DEV=m
+CONFIG_CAN_VXCAN=m
+CONFIG_CRYPTO_ARIA=y
CONFIG_CRYPTO_CHACHA20POLY1305=m
+CONFIG_CRYPTO_SHA1=y
+CONFIG_CRYPTO_SM4_GENERIC=y
CONFIG_DEBUG_INFO_BTF=y
CONFIG_DEBUG_INFO_BTF_MODULES=n
-CONFIG_VLAN_8021Q=y
+CONFIG_DUMMY=y
CONFIG_GENEVE=m
CONFIG_IFB=y
CONFIG_INET_DIAG=y
CONFIG_INET_ESP=y
CONFIG_INET_ESP_OFFLOAD=y
-CONFIG_CRYPTO_SHA1=y
-CONFIG_NET_FOU=y
-CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_NETFILTER=y
-CONFIG_NETFILTER_ADVANCED=y
-CONFIG_NETFILTER_XTABLES_LEGACY=y
-CONFIG_NF_CONNTRACK=m
-CONFIG_IPV6_MROUTE=y
-CONFIG_IPV6_SIT=y
-CONFIG_NF_NAT=m
+CONFIG_IP6_NF_FILTER=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP6_NF_IPTABLES_LEGACY=m
-CONFIG_IP_NF_IPTABLES=m
-CONFIG_IP_NF_IPTABLES_LEGACY=m
CONFIG_IP6_NF_MANGLE=m
-CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_MATCH_RPFILTER=m
CONFIG_IP6_NF_NAT=m
CONFIG_IP6_NF_RAW=m
-CONFIG_IP_NF_MANGLE=m
+CONFIG_IP6_NF_TARGET_REJECT=m
CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_IPTABLES_LEGACY=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_MATCH_RPFILTER=m
CONFIG_IP_NF_NAT=m
CONFIG_IP_NF_RAW=m
CONFIG_IP_NF_TARGET_REJECT=m
-CONFIG_IP6_NF_TARGET_REJECT=m
CONFIG_IP_NF_TARGET_TTL=m
+CONFIG_IP_SCTP=m
+CONFIG_IPV6=y
CONFIG_IPV6_GRE=m
+CONFIG_IPV6_ILA=m
+CONFIG_IPV6_IOAM6_LWTUNNEL=y
+CONFIG_IPV6_MROUTE=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_IPV6_RPL_LWTUNNEL=y
CONFIG_IPV6_SEG6_LWTUNNEL=y
+CONFIG_IPV6_SIT=y
+CONFIG_IPV6_VTI=y
+CONFIG_IPVLAN=m
+CONFIG_KALLSYMS=y
+CONFIG_L2TP=m
CONFIG_L2TP_ETH=m
CONFIG_L2TP_IP=m
-CONFIG_L2TP=m
CONFIG_L2TP_V3=y
CONFIG_MACSEC=m
CONFIG_MACVLAN=y
CONFIG_MACVTAP=y
CONFIG_MPLS=y
+CONFIG_MPLS_IPTUNNEL=m
+CONFIG_MPLS_ROUTING=m
CONFIG_MPTCP=y
-CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_IPV6=y
-CONFIG_NF_TABLES_IPV4=y
-CONFIG_NFT_NAT=m
-CONFIG_NETFILTER_XT_MATCH_LENGTH=m
-CONFIG_NETFILTER_XT_TARGET_HL=m
-CONFIG_NETFILTER_XT_NAT=m
CONFIG_NET_ACT_CSUM=m
CONFIG_NET_ACT_CT=m
CONFIG_NET_ACT_GACT=m
+CONFIG_NET_ACT_MIRRED=m
CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_ACT_TUNNEL_KEY=m
CONFIG_NET_CLS_BASIC=m
CONFIG_NET_CLS_BPF=m
+CONFIG_NET_CLS_FLOWER=m
CONFIG_NET_CLS_MATCHALL=m
CONFIG_NET_CLS_U32=m
-CONFIG_NET_IPGRE_DEMUX=m
+CONFIG_NETDEVSIM=m
+CONFIG_NET_DROP_MONITOR=m
+CONFIG_NETFILTER=y
+CONFIG_NETFILTER_ADVANCED=y
+CONFIG_NETFILTER_XTABLES_LEGACY=y
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_POLICY=m
+CONFIG_NETFILTER_XT_NAT=m
+CONFIG_NETFILTER_XT_TARGET_HL=m
+CONFIG_NET_FOU=y
+CONFIG_NET_FOU_IP_TUNNELS=y
CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_DEMUX=m
CONFIG_NET_IPIP=y
+CONFIG_NET_IPVTI=y
+CONFIG_NETKIT=y
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_NET_NS=y
+CONFIG_NET_PKTGEN=m
+CONFIG_NET_SCH_ETF=m
+CONFIG_NET_SCH_FQ=m
CONFIG_NET_SCH_FQ_CODEL=m
CONFIG_NET_SCH_HTB=m
-CONFIG_NET_SCH_FQ=m
-CONFIG_NET_SCH_ETF=m
+CONFIG_NET_SCH_INGRESS=m
CONFIG_NET_SCH_NETEM=y
CONFIG_NET_SCH_PRIO=m
-CONFIG_NFT_COMPAT=m
+CONFIG_NET_VRF=y
+CONFIG_NF_CONNTRACK=m
CONFIG_NF_CONNTRACK_OVS=y
CONFIG_NF_FLOW_TABLE=m
+CONFIG_NF_NAT=m
+CONFIG_NF_TABLES=m
+CONFIG_NF_TABLES_IPV4=y
+CONFIG_NF_TABLES_IPV6=y
+CONFIG_NFT_COMPAT=m
+CONFIG_NFT_NAT=m
+CONFIG_NUMA=y
CONFIG_OPENVSWITCH=m
CONFIG_OPENVSWITCH_GENEVE=m
CONFIG_OPENVSWITCH_GRE=m
CONFIG_OPENVSWITCH_VXLAN=m
+CONFIG_PROC_SYSCTL=y
CONFIG_PSAMPLE=m
+CONFIG_RPS=y
+CONFIG_SYSFS=y
CONFIG_TCP_MD5SIG=y
CONFIG_TEST_BLACKHOLE_DEV=m
-CONFIG_KALLSYMS=y
+CONFIG_TEST_BPF=m
CONFIG_TLS=m
CONFIG_TRACEPOINTS=y
-CONFIG_NET_DROP_MONITOR=m
-CONFIG_NETDEVSIM=m
-CONFIG_MPLS_ROUTING=m
-CONFIG_MPLS_IPTUNNEL=m
-CONFIG_NET_SCH_INGRESS=m
-CONFIG_NET_CLS_FLOWER=m
-CONFIG_NET_ACT_TUNNEL_KEY=m
-CONFIG_NET_ACT_MIRRED=m
-CONFIG_BAREUDP=m
-CONFIG_IPV6_IOAM6_LWTUNNEL=y
-CONFIG_CRYPTO_SM4_GENERIC=y
-CONFIG_AMT=m
CONFIG_TUN=y
+CONFIG_USER_NS=y
+CONFIG_VETH=y
+CONFIG_VLAN_8021Q=y
CONFIG_VXLAN=m
-CONFIG_IP_SCTP=m
-CONFIG_NETFILTER_XT_MATCH_POLICY=m
-CONFIG_CRYPTO_ARIA=y
CONFIG_XFRM_INTERFACE=m
CONFIG_XFRM_USER=m
-CONFIG_IP_NF_MATCH_RPFILTER=m
-CONFIG_IP6_NF_MATCH_RPFILTER=m
-CONFIG_IPVLAN=m
-CONFIG_CAN=m
-CONFIG_CAN_DEV=m
-CONFIG_CAN_VXCAN=m
-CONFIG_NETKIT=y
-CONFIG_NET_PKTGEN=m
-CONFIG_IPV6_ILA=m
-CONFIG_IPV6_RPL_LWTUNNEL=y
diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index e6f482a600da..ff4a00d91a26 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -1,11 +1,11 @@
# SPDX-License-Identifier: GPL-2.0+ OR MIT
-TEST_PROGS = \
+TEST_PROGS := \
bridge_activity_notify.sh \
bridge_fdb_learning_limit.sh \
+ bridge_fdb_local_vlan_0.sh \
bridge_igmp.sh \
bridge_locked_port.sh \
- bridge_fdb_local_vlan_0.sh \
bridge_mdb.sh \
bridge_mdb_host.sh \
bridge_mdb_max.sh \
@@ -21,64 +21,64 @@ TEST_PROGS = \
gre_custom_multipath_hash.sh \
gre_inner_v4_multipath.sh \
gre_inner_v6_multipath.sh \
- gre_multipath_nh_res.sh \
- gre_multipath_nh.sh \
gre_multipath.sh \
+ gre_multipath_nh.sh \
+ gre_multipath_nh_res.sh \
ip6_forward_instats_vrf.sh \
ip6gre_custom_multipath_hash.sh \
+ ip6gre_flat.sh \
ip6gre_flat_key.sh \
ip6gre_flat_keys.sh \
- ip6gre_flat.sh \
+ ip6gre_hier.sh \
ip6gre_hier_key.sh \
ip6gre_hier_keys.sh \
- ip6gre_hier.sh \
ip6gre_inner_v4_multipath.sh \
ip6gre_inner_v6_multipath.sh \
+ ipip_flat_gre.sh \
ipip_flat_gre_key.sh \
ipip_flat_gre_keys.sh \
- ipip_flat_gre.sh \
+ ipip_hier_gre.sh \
ipip_hier_gre_key.sh \
ipip_hier_gre_keys.sh \
- ipip_hier_gre.sh \
lib_sh_test.sh \
local_termination.sh \
min_max_mtu.sh \
+ mirror_gre.sh \
mirror_gre_bound.sh \
mirror_gre_bridge_1d.sh \
mirror_gre_bridge_1d_vlan.sh \
- mirror_gre_bridge_1q_lag.sh \
mirror_gre_bridge_1q.sh \
+ mirror_gre_bridge_1q_lag.sh \
mirror_gre_changes.sh \
mirror_gre_flower.sh \
mirror_gre_lag_lacp.sh \
mirror_gre_neigh.sh \
mirror_gre_nh.sh \
- mirror_gre.sh \
- mirror_gre_vlan_bridge_1q.sh \
mirror_gre_vlan.sh \
+ mirror_gre_vlan_bridge_1q.sh \
mirror_vlan.sh \
no_forwarding.sh \
pedit_dsfield.sh \
pedit_ip.sh \
pedit_l4port.sh \
- q_in_vni_ipv6.sh \
q_in_vni.sh \
+ q_in_vni_ipv6.sh \
+ router.sh \
router_bridge.sh \
router_bridge_1d.sh \
router_bridge_1d_lag.sh \
router_bridge_lag.sh \
+ router_bridge_pvid_vlan_upper.sh \
router_bridge_vlan.sh \
router_bridge_vlan_upper.sh \
- router_bridge_pvid_vlan_upper.sh \
router_bridge_vlan_upper_pvid.sh \
router_broadcast.sh \
- router_mpath_nh_res.sh \
router_mpath_nh.sh \
+ router_mpath_nh_res.sh \
router_mpath_seed.sh \
router_multicast.sh \
router_multipath.sh \
router_nh.sh \
- router.sh \
router_vid_1.sh \
sch_ets.sh \
sch_red.sh \
@@ -88,32 +88,34 @@ TEST_PROGS = \
skbedit_priority.sh \
tc_actions.sh \
tc_chains.sh \
- tc_flower_router.sh \
tc_flower.sh \
- tc_flower_l2_miss.sh \
tc_flower_cfm.sh \
+ tc_flower_l2_miss.sh \
tc_flower_port_range.sh \
+ tc_flower_router.sh \
tc_mpls_l2vpn.sh \
tc_police.sh \
tc_shblocks.sh \
tc_tunnel_key.sh \
tc_vlan_modify.sh \
- vxlan_asymmetric_ipv6.sh \
vxlan_asymmetric.sh \
+ vxlan_asymmetric_ipv6.sh \
+ vxlan_bridge_1d.sh \
vxlan_bridge_1d_ipv6.sh \
- vxlan_bridge_1d_port_8472_ipv6.sh \
vxlan_bridge_1d_port_8472.sh \
- vxlan_bridge_1d.sh \
+ vxlan_bridge_1d_port_8472_ipv6.sh \
+ vxlan_bridge_1q.sh \
vxlan_bridge_1q_ipv6.sh \
vxlan_bridge_1q_mc_ul.sh \
- vxlan_bridge_1q_port_8472_ipv6.sh \
vxlan_bridge_1q_port_8472.sh \
- vxlan_bridge_1q.sh \
+ vxlan_bridge_1q_port_8472_ipv6.sh \
vxlan_reserved.sh \
+ vxlan_symmetric.sh \
vxlan_symmetric_ipv6.sh \
- vxlan_symmetric.sh
+# end of TEST_PROGS
-TEST_FILES := devlink_lib.sh \
+TEST_FILES := \
+ devlink_lib.sh \
fib_offload_lib.sh \
forwarding.config.sample \
ip6gre_lib.sh \
@@ -128,10 +130,12 @@ TEST_FILES := devlink_lib.sh \
sch_ets_tests.sh \
sch_tbf_core.sh \
sch_tbf_etsprio.sh \
- tc_common.sh
+ tc_common.sh \
+# end of TEST_FILES
TEST_INCLUDES := \
+ $(wildcard ../lib/sh/*.sh) \
../lib.sh \
- $(wildcard ../lib/sh/*.sh)
+# end of TEST_INCLUDES
include ../../lib.mk
diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config
index 18fd69d8d937..ce64518aaa11 100644
--- a/tools/testing/selftests/net/forwarding/config
+++ b/tools/testing/selftests/net/forwarding/config
@@ -1,24 +1,23 @@
+CONFIG_BPF_SYSCALL=y
CONFIG_BRIDGE=m
-CONFIG_VLAN_8021Q=m
-CONFIG_BRIDGE_VLAN_FILTERING=y
CONFIG_BRIDGE_IGMP_SNOOPING=y
-CONFIG_NET_L3_MASTER_DEV=y
-CONFIG_IPV6_MULTIPLE_TABLES=y
-CONFIG_NET_VRF=m
-CONFIG_BPF_SYSCALL=y
+CONFIG_BRIDGE_VLAN_FILTERING=y
CONFIG_CGROUP_BPF=y
CONFIG_DUMMY=m
+CONFIG_IP_MROUTE=y
+CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
CONFIG_IPV6=y
CONFIG_IPV6_GRE=m
CONFIG_IPV6_MROUTE=y
CONFIG_IPV6_MROUTE_MULTIPLE_TABLES=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
CONFIG_IPV6_PIMSM_V2=y
-CONFIG_IP_MROUTE=y
-CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
-CONFIG_IP_PIMSM_V1=y
-CONFIG_IP_PIMSM_V2=y
CONFIG_MACVLAN=m
+CONFIG_NAMESPACES=y
CONFIG_NET_ACT_CT=m
+CONFIG_NET_ACT_GACT=m
CONFIG_NET_ACT_MIRRED=m
CONFIG_NET_ACT_MPLS=m
CONFIG_NET_ACT_PEDIT=m
@@ -27,29 +26,30 @@ CONFIG_NET_ACT_SAMPLE=m
CONFIG_NET_ACT_SKBEDIT=m
CONFIG_NET_ACT_TUNNEL_KEY=m
CONFIG_NET_ACT_VLAN=m
+CONFIG_NET_CLS_BASIC=m
CONFIG_NET_CLS_FLOWER=m
CONFIG_NET_CLS_MATCHALL=m
-CONFIG_NET_CLS_BASIC=m
CONFIG_NET_EMATCH=y
CONFIG_NET_EMATCH_META=m
+CONFIG_NETFILTER=y
CONFIG_NET_IPGRE=m
CONFIG_NET_IPGRE_DEMUX=m
CONFIG_NET_IPIP=m
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_NET_NS=y
CONFIG_NET_SCH_ETS=m
CONFIG_NET_SCH_INGRESS=m
-CONFIG_NET_ACT_GACT=m
CONFIG_NET_SCH_PRIO=m
CONFIG_NET_SCH_RED=m
CONFIG_NET_SCH_TBF=m
CONFIG_NET_TC_SKB_EXT=y
CONFIG_NET_TEAM=y
CONFIG_NET_TEAM_MODE_LOADBALANCE=y
-CONFIG_NETFILTER=y
+CONFIG_NET_VRF=m
CONFIG_NF_CONNTRACK=m
CONFIG_NF_FLOW_TABLE=m
CONFIG_NF_TABLES=m
CONFIG_VETH=m
-CONFIG_NAMESPACES=y
-CONFIG_NET_NS=y
+CONFIG_VLAN_8021Q=m
CONFIG_VXLAN=m
CONFIG_XFRM_USER=m
diff --git a/tools/testing/selftests/net/forwarding/lib_sh_test.sh b/tools/testing/selftests/net/forwarding/lib_sh_test.sh
index ff2accccaf4d..b4eda6c6199e 100755
--- a/tools/testing/selftests/net/forwarding/lib_sh_test.sh
+++ b/tools/testing/selftests/net/forwarding/lib_sh_test.sh
@@ -30,6 +30,11 @@ tfail()
do_test "tfail" false
}
+tfail2()
+{
+ do_test "tfail2" false
+}
+
txfail()
{
FAIL_TO_XFAIL=yes do_test "txfail" false
@@ -132,6 +137,8 @@ test_ret()
ret_subtest $ksft_fail "tfail" txfail tfail
ret_subtest $ksft_xfail "txfail" txfail txfail
+
+ ret_subtest $ksft_fail "tfail2" tfail2 tfail
}
exit_status_tests_run()
diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh
index ecd34f364125..892895659c7e 100755
--- a/tools/testing/selftests/net/forwarding/local_termination.sh
+++ b/tools/testing/selftests/net/forwarding/local_termination.sh
@@ -176,6 +176,8 @@ run_test()
local rcv_dmac=$(mac_get $rcv_if_name)
local should_receive
+ setup_wait
+
tcpdump_start $rcv_if_name
mc_route_prepare $send_if_name
diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c
index 2b1d9f2b3e9e..cfc39f70635d 100644
--- a/tools/testing/selftests/net/gro.c
+++ b/tools/testing/selftests/net/gro.c
@@ -754,11 +754,11 @@ static void send_ipv6_exthdr(int fd, struct sockaddr_ll *daddr, char *ext_data1,
static char exthdr_pck[sizeof(buf) + MIN_EXTHDR_SIZE];
create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
- add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_HOPOPTS, ext_data1);
+ add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_DSTOPTS, ext_data1);
write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr);
create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0);
- add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_HOPOPTS, ext_data2);
+ add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_DSTOPTS, ext_data2);
write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr);
}
@@ -989,6 +989,7 @@ static void check_recv_pkts(int fd, int *correct_payload,
static void gro_sender(void)
{
+ const int fin_delay_us = 100 * 1000;
static char fin_pkt[MAX_HDR_LEN];
struct sockaddr_ll daddr = {};
int txfd = -1;
@@ -1032,15 +1033,22 @@ static void gro_sender(void)
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
} else if (strcmp(testname, "tcp") == 0) {
send_changed_checksum(txfd, &daddr);
+ /* Adding sleep before sending FIN so that it is not
+ * received prior to other packets.
+ */
+ usleep(fin_delay_us);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
send_changed_seq(txfd, &daddr);
+ usleep(fin_delay_us);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
send_changed_ts(txfd, &daddr);
+ usleep(fin_delay_us);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
send_diff_opt(txfd, &daddr);
+ usleep(fin_delay_us);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
} else if (strcmp(testname, "ip") == 0) {
send_changed_ECN(txfd, &daddr);
diff --git a/tools/testing/selftests/net/hsr/Makefile b/tools/testing/selftests/net/hsr/Makefile
index 884cd2cc0681..4b6afc0fe9f8 100644
--- a/tools/testing/selftests/net/hsr/Makefile
+++ b/tools/testing/selftests/net/hsr/Makefile
@@ -2,7 +2,11 @@
top_srcdir = ../../../../..
-TEST_PROGS := hsr_ping.sh hsr_redbox.sh
+TEST_PROGS := \
+ hsr_ping.sh \
+ hsr_redbox.sh \
+# end of TEST_PROGS
+
TEST_FILES += hsr_common.sh
include ../../lib.mk
diff --git a/tools/testing/selftests/net/hsr/config b/tools/testing/selftests/net/hsr/config
index 555a868743f0..205cc4d3d64b 100644
--- a/tools/testing/selftests/net/hsr/config
+++ b/tools/testing/selftests/net/hsr/config
@@ -1,6 +1,6 @@
+CONFIG_BRIDGE=y
+CONFIG_HSR=y
CONFIG_IPV6=y
CONFIG_NET_SCH_NETEM=m
-CONFIG_HSR=y
CONFIG_VETH=y
-CONFIG_BRIDGE=y
CONFIG_VLAN_8021Q=m
diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh
index feba4ef69a54..f448bafb3f20 100644
--- a/tools/testing/selftests/net/lib.sh
+++ b/tools/testing/selftests/net/lib.sh
@@ -43,7 +43,7 @@ __ksft_status_merge()
weights[$i]=$((weight++))
done
- if [[ ${weights[$a]} > ${weights[$b]} ]]; then
+ if [[ ${weights[$a]} -ge ${weights[$b]} ]]; then
echo "$a"
return 0
else
diff --git a/tools/testing/selftests/net/lib/Makefile b/tools/testing/selftests/net/lib/Makefile
index 88c4bc461459..ce795bc0a1af 100644
--- a/tools/testing/selftests/net/lib/Makefile
+++ b/tools/testing/selftests/net/lib/Makefile
@@ -5,12 +5,16 @@ CFLAGS += -I../../../../../usr/include/ $(KHDR_INCLUDES)
# Additional include paths needed by kselftest.h
CFLAGS += -I../../
-TEST_FILES := ../../../../../Documentation/netlink/specs
-TEST_FILES += ../../../../net/ynl
+TEST_FILES := \
+ ../../../../net/ynl \
+ ../../../../../Documentation/netlink/specs \
+# end of TEST_FILES
-TEST_GEN_FILES += csum
-TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c))
-TEST_GEN_FILES += xdp_helper
+TEST_GEN_FILES := \
+ $(patsubst %.c,%.o,$(wildcard *.bpf.c)) \
+ csum \
+ xdp_helper \
+# end of TEST_GEN_FILES
TEST_INCLUDES := $(wildcard py/*.py sh/*.sh)
diff --git a/tools/testing/selftests/net/lib/py/__init__.py b/tools/testing/selftests/net/lib/py/__init__.py
index 997b85cc216a..97b7cf2b20eb 100644
--- a/tools/testing/selftests/net/lib/py/__init__.py
+++ b/tools/testing/selftests/net/lib/py/__init__.py
@@ -1,9 +1,32 @@
# SPDX-License-Identifier: GPL-2.0
+"""
+Python selftest helpers for netdev.
+"""
+
from .consts import KSRC
-from .ksft import *
+from .ksft import KsftFailEx, KsftSkipEx, KsftXfailEx, ksft_pr, ksft_eq, \
+ ksft_ne, ksft_true, ksft_not_none, ksft_in, ksft_not_in, ksft_is, \
+ ksft_ge, ksft_gt, ksft_lt, ksft_raises, ksft_busy_wait, \
+ ktap_result, ksft_disruptive, ksft_setup, ksft_run, ksft_exit
from .netns import NetNS, NetNSEnter
-from .nsim import *
-from .utils import *
+from .nsim import NetdevSim, NetdevSimDev
+from .utils import CmdExitFailure, fd_read_timeout, cmd, bkg, defer, \
+ bpftool, ip, ethtool, bpftrace, rand_port, wait_port_listen, wait_file
from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily, RtnlAddrFamily
from .ynl import NetshaperFamily, DevlinkFamily, PSPFamily
+
+__all__ = ["KSRC",
+ "KsftFailEx", "KsftSkipEx", "KsftXfailEx", "ksft_pr", "ksft_eq",
+ "ksft_ne", "ksft_true", "ksft_not_none", "ksft_in", "ksft_not_in",
+ "ksft_is", "ksft_ge", "ksft_gt", "ksft_lt", "ksft_raises",
+ "ksft_busy_wait", "ktap_result", "ksft_disruptive", "ksft_setup",
+ "ksft_run", "ksft_exit",
+ "NetNS", "NetNSEnter",
+ "CmdExitFailure", "fd_read_timeout", "cmd", "bkg", "defer",
+ "bpftool", "ip", "ethtool", "bpftrace", "rand_port",
+ "wait_port_listen", "wait_file",
+ "NetdevSim", "NetdevSimDev",
+ "NetshaperFamily", "DevlinkFamily", "PSPFamily", "NlError",
+ "YnlFamily", "EthtoolFamily", "NetdevFamily", "RtnlFamily",
+ "RtnlAddrFamily"]
diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile
index 4c7e51336ab2..15d144a25d82 100644
--- a/tools/testing/selftests/net/mptcp/Makefile
+++ b/tools/testing/selftests/net/mptcp/Makefile
@@ -4,13 +4,31 @@ top_srcdir = ../../../../..
CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES)
-TEST_PROGS := mptcp_connect.sh mptcp_connect_mmap.sh mptcp_connect_sendfile.sh \
- mptcp_connect_checksum.sh pm_netlink.sh mptcp_join.sh diag.sh \
- simult_flows.sh mptcp_sockopt.sh userspace_pm.sh
-
-TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq mptcp_diag
-
-TEST_FILES := mptcp_lib.sh settings
+TEST_PROGS := \
+ diag.sh \
+ mptcp_connect.sh \
+ mptcp_connect_checksum.sh \
+ mptcp_connect_mmap.sh \
+ mptcp_connect_sendfile.sh \
+ mptcp_join.sh \
+ mptcp_sockopt.sh \
+ pm_netlink.sh \
+ simult_flows.sh \
+ userspace_pm.sh \
+# end of TEST_PROGS
+
+TEST_GEN_FILES := \
+ mptcp_connect \
+ mptcp_diag \
+ mptcp_inq \
+ mptcp_sockopt \
+ pm_nl_ctl \
+# end of TEST_GEN_FILES
+
+TEST_FILES := \
+ mptcp_lib.sh \
+ settings \
+# end of TEST_FILES
TEST_INCLUDES := ../lib.sh $(wildcard ../lib/sh/*.sh)
diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config
index 968d440c03fe..59051ee2a986 100644
--- a/tools/testing/selftests/net/mptcp/config
+++ b/tools/testing/selftests/net/mptcp/config
@@ -1,36 +1,36 @@
+CONFIG_INET_DIAG=m
+CONFIG_INET_MPTCP_DIAG=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IPV6=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
CONFIG_KALLSYMS=y
CONFIG_MPTCP=y
-CONFIG_IPV6=y
CONFIG_MPTCP_IPV6=y
-CONFIG_INET_DIAG=m
-CONFIG_INET_MPTCP_DIAG=m
-CONFIG_VETH=y
-CONFIG_NET_SCH_NETEM=m
-CONFIG_SYN_COOKIES=y
+CONFIG_NET_ACT_CSUM=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_CLS_FW=m
CONFIG_NETFILTER=y
CONFIG_NETFILTER_ADVANCED=y
CONFIG_NETFILTER_NETLINK=m
-CONFIG_NF_TABLES=m
-CONFIG_NFT_COMPAT=m
CONFIG_NETFILTER_XTABLES=m
CONFIG_NETFILTER_XTABLES_LEGACY=y
CONFIG_NETFILTER_XT_MATCH_BPF=m
CONFIG_NETFILTER_XT_MATCH_LENGTH=m
CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NF_TABLES=m
CONFIG_NF_TABLES_INET=y
-CONFIG_NFT_TPROXY=m
+CONFIG_NFT_COMPAT=m
CONFIG_NFT_SOCKET=m
-CONFIG_IP_ADVANCED_ROUTER=y
-CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_NF_FILTER=m
-CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_REJECT=m
-CONFIG_IP6_NF_TARGET_REJECT=m
-CONFIG_IPV6_MULTIPLE_TABLES=y
-CONFIG_IP6_NF_FILTER=m
-CONFIG_NET_ACT_CSUM=m
-CONFIG_NET_ACT_PEDIT=m
-CONFIG_NET_CLS_ACT=y
-CONFIG_NET_CLS_FW=m
-CONFIG_NET_SCH_INGRESS=m
+CONFIG_NFT_TPROXY=m
+CONFIG_SYN_COOKIES=y
+CONFIG_VETH=y
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index b148cadb96d0..fc7e22b503d3 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -710,8 +710,14 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd,
bw = do_rnd_write(peerfd, winfo->buf + winfo->off, winfo->len);
if (bw < 0) {
- if (cfg_rcv_trunc)
- return 0;
+ /* expected reset, continue to read */
+ if (cfg_rcv_trunc &&
+ (errno == ECONNRESET ||
+ errno == EPIPE)) {
+ fds.events &= ~POLLOUT;
+ continue;
+ }
+
perror("write");
return 111;
}
@@ -737,8 +743,10 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd,
}
if (fds.revents & (POLLERR | POLLNVAL)) {
- if (cfg_rcv_trunc)
- return 0;
+ if (cfg_rcv_trunc) {
+ fds.events &= ~(POLLERR | POLLNVAL);
+ continue;
+ }
fprintf(stderr, "Unexpected revents: "
"POLLERR/POLLNVAL(%x)\n", fds.revents);
return 5;
@@ -1433,7 +1441,7 @@ static void parse_opts(int argc, char **argv)
*/
if (cfg_truncate < 0) {
cfg_rcv_trunc = true;
- signal(SIGPIPE, handle_signal);
+ signal(SIGPIPE, SIG_IGN);
}
break;
case 'j':
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 47ecb5b3836e..9b7b93f8eb0c 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -492,7 +492,7 @@ do_transfer()
"than expected (${expect_synrx})"
retc=1
fi
- if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ] && [ ${stat_ooo_now} -eq 0 ]; then
+ if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ]; then
if [ ${stat_ooo_now} -eq 0 ]; then
mptcp_lib_pr_fail "lower MPC ACK rx (${stat_ackrx_now_l})" \
"than expected (${expect_ackrx})"
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index c90d8e8b95cb..43f31f8d587f 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -2324,7 +2324,7 @@ laminar_endp_tests()
{
# no laminar endpoints: routing rules are used
if reset_with_tcp_filter "without a laminar endpoint" ns1 10.0.2.2 REJECT &&
- mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then
+ continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then
pm_nl_set_limits $ns1 0 2
pm_nl_set_limits $ns2 2 2
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
@@ -2336,7 +2336,7 @@ laminar_endp_tests()
# laminar endpoints: this endpoint is used
if reset_with_tcp_filter "with a laminar endpoint" ns1 10.0.2.2 REJECT &&
- mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then
+ continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then
pm_nl_set_limits $ns1 0 2
pm_nl_set_limits $ns2 2 2
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
@@ -2348,7 +2348,7 @@ laminar_endp_tests()
# laminar endpoints: these endpoints are used
if reset_with_tcp_filter "with multiple laminar endpoints" ns1 10.0.2.2 REJECT &&
- mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then
+ continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then
pm_nl_set_limits $ns1 0 2
pm_nl_set_limits $ns2 2 2
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
@@ -2363,7 +2363,7 @@ laminar_endp_tests()
# laminar endpoints: only one endpoint is used
if reset_with_tcp_filter "single laminar endpoint" ns1 10.0.2.2 REJECT &&
- mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then
+ continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then
pm_nl_set_limits $ns1 0 2
pm_nl_set_limits $ns2 2 2
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
@@ -2376,7 +2376,7 @@ laminar_endp_tests()
# laminar endpoints: subflow and laminar flags
if reset_with_tcp_filter "sublow + laminar endpoints" ns1 10.0.2.2 REJECT &&
- mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then
+ continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then
pm_nl_set_limits $ns1 0 4
pm_nl_set_limits $ns2 2 4
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
@@ -2532,7 +2532,7 @@ remove_tests()
if reset "remove single subflow"; then
pm_nl_set_limits $ns1 0 1
pm_nl_set_limits $ns2 0 1
- pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup
addr_nr_ns2=-1 speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 1 1 1
@@ -2545,8 +2545,8 @@ remove_tests()
if reset "remove multiple subflows"; then
pm_nl_set_limits $ns1 0 2
pm_nl_set_limits $ns2 0 2
- pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
- pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,backup
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup
addr_nr_ns2=-2 speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 2 2 2
@@ -2557,7 +2557,7 @@ remove_tests()
# single address, remove
if reset "remove single address"; then
pm_nl_set_limits $ns1 0 1
- pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup
pm_nl_set_limits $ns2 1 1
addr_nr_ns1=-1 speed=slow \
run_tests $ns1 $ns2 10.0.1.1
@@ -2570,9 +2570,9 @@ remove_tests()
# subflow and signal, remove
if reset "remove subflow and signal"; then
pm_nl_set_limits $ns1 0 2
- pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup
pm_nl_set_limits $ns2 1 2
- pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup
addr_nr_ns1=-1 addr_nr_ns2=-1 speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 2 2 2
@@ -2584,10 +2584,10 @@ remove_tests()
# subflows and signal, remove
if reset "remove subflows and signal"; then
pm_nl_set_limits $ns1 0 3
- pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup
pm_nl_set_limits $ns2 1 3
- pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
- pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup
+ pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow,backup
addr_nr_ns1=-1 addr_nr_ns2=-2 speed=10 \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 3 3 3
@@ -2599,9 +2599,9 @@ remove_tests()
# addresses remove
if reset "remove addresses"; then
pm_nl_set_limits $ns1 3 3
- pm_nl_add_endpoint $ns1 10.0.2.1 flags signal id 250
- pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
- pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup id 250
+ pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup
+ pm_nl_add_endpoint $ns1 10.0.4.1 flags signal,backup
pm_nl_set_limits $ns2 3 3
addr_nr_ns1=-3 speed=10 \
run_tests $ns1 $ns2 10.0.1.1
@@ -2614,10 +2614,10 @@ remove_tests()
# invalid addresses remove
if reset "remove invalid addresses"; then
pm_nl_set_limits $ns1 3 3
- pm_nl_add_endpoint $ns1 10.0.12.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.12.1 flags signal,backup
# broadcast IP: no packet for this address will be received on ns1
- pm_nl_add_endpoint $ns1 224.0.0.1 flags signal
- pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+ pm_nl_add_endpoint $ns1 224.0.0.1 flags signal,backup
+ pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup
pm_nl_set_limits $ns2 2 2
addr_nr_ns1=-3 speed=10 \
run_tests $ns1 $ns2 10.0.1.1
@@ -2631,10 +2631,10 @@ remove_tests()
# subflows and signal, flush
if reset "flush subflows and signal"; then
pm_nl_set_limits $ns1 0 3
- pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup
pm_nl_set_limits $ns2 1 3
- pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
- pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup
+ pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow,backup
addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 3 3 3
@@ -2647,9 +2647,9 @@ remove_tests()
if reset "flush subflows"; then
pm_nl_set_limits $ns1 3 3
pm_nl_set_limits $ns2 3 3
- pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow id 150
- pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
- pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,backup id 150
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup
+ pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow,backup
addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 3 3 3
@@ -2666,9 +2666,9 @@ remove_tests()
# addresses flush
if reset "flush addresses"; then
pm_nl_set_limits $ns1 3 3
- pm_nl_add_endpoint $ns1 10.0.2.1 flags signal id 250
- pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
- pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup id 250
+ pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup
+ pm_nl_add_endpoint $ns1 10.0.4.1 flags signal,backup
pm_nl_set_limits $ns2 3 3
addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \
run_tests $ns1 $ns2 10.0.1.1
@@ -2681,9 +2681,9 @@ remove_tests()
# invalid addresses flush
if reset "flush invalid addresses"; then
pm_nl_set_limits $ns1 3 3
- pm_nl_add_endpoint $ns1 10.0.12.1 flags signal
- pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
- pm_nl_add_endpoint $ns1 10.0.14.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.12.1 flags signal,backup
+ pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup
+ pm_nl_add_endpoint $ns1 10.0.14.1 flags signal,backup
pm_nl_set_limits $ns2 3 3
addr_nr_ns1=-8 speed=slow \
run_tests $ns1 $ns2 10.0.1.1
@@ -3500,7 +3500,6 @@ fullmesh_tests()
fastclose_tests()
{
if reset_check_counter "fastclose test" "MPTcpExtMPFastcloseTx"; then
- MPTCP_LIB_SUBTEST_FLAKY=1
test_linkfail=1024 fastclose=client \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 0 0 0
@@ -3509,7 +3508,6 @@ fastclose_tests()
fi
if reset_check_counter "fastclose server test" "MPTcpExtMPFastcloseRx"; then
- MPTCP_LIB_SUBTEST_FLAKY=1
test_linkfail=1024 fastclose=server \
run_tests $ns1 $ns2 10.0.1.1
join_rst_nr=1 \
@@ -3806,7 +3804,7 @@ userspace_tests()
continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
set_userspace_pm $ns1
pm_nl_set_limits $ns2 2 2
- { speed=5 \
+ { timeout_test=120 test_linkfail=128 speed=5 \
run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
wait_mpj $ns1
@@ -3831,7 +3829,7 @@ userspace_tests()
chk_mptcp_info subflows 0 subflows 0
chk_subflows_total 1 1
kill_events_pids
- mptcp_lib_kill_wait $tests_pid
+ mptcp_lib_kill_group_wait $tests_pid
fi
# userspace pm create destroy subflow
@@ -3839,7 +3837,7 @@ userspace_tests()
continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
set_userspace_pm $ns2
pm_nl_set_limits $ns1 0 1
- { speed=5 \
+ { timeout_test=120 test_linkfail=128 speed=5 \
run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
wait_mpj $ns2
@@ -3859,7 +3857,7 @@ userspace_tests()
chk_mptcp_info subflows 0 subflows 0
chk_subflows_total 1 1
kill_events_pids
- mptcp_lib_kill_wait $tests_pid
+ mptcp_lib_kill_group_wait $tests_pid
fi
# userspace pm create id 0 subflow
@@ -3867,7 +3865,7 @@ userspace_tests()
continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
set_userspace_pm $ns2
pm_nl_set_limits $ns1 0 1
- { speed=5 \
+ { timeout_test=120 test_linkfail=128 speed=5 \
run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
wait_mpj $ns2
@@ -3880,7 +3878,7 @@ userspace_tests()
chk_mptcp_info subflows 1 subflows 1
chk_subflows_total 2 2
kill_events_pids
- mptcp_lib_kill_wait $tests_pid
+ mptcp_lib_kill_group_wait $tests_pid
fi
# userspace pm remove initial subflow
@@ -3888,7 +3886,7 @@ userspace_tests()
continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
set_userspace_pm $ns2
pm_nl_set_limits $ns1 0 1
- { speed=5 \
+ { timeout_test=120 test_linkfail=128 speed=5 \
run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
wait_mpj $ns2
@@ -3904,7 +3902,7 @@ userspace_tests()
chk_mptcp_info subflows 1 subflows 1
chk_subflows_total 1 1
kill_events_pids
- mptcp_lib_kill_wait $tests_pid
+ mptcp_lib_kill_group_wait $tests_pid
fi
# userspace pm send RM_ADDR for ID 0
@@ -3912,7 +3910,7 @@ userspace_tests()
continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
set_userspace_pm $ns1
pm_nl_set_limits $ns2 1 1
- { speed=5 \
+ { timeout_test=120 test_linkfail=128 speed=5 \
run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
wait_mpj $ns1
@@ -3930,7 +3928,7 @@ userspace_tests()
chk_mptcp_info subflows 1 subflows 1
chk_subflows_total 1 1
kill_events_pids
- mptcp_lib_kill_wait $tests_pid
+ mptcp_lib_kill_group_wait $tests_pid
fi
}
@@ -3939,11 +3937,11 @@ endpoint_tests()
# subflow_rebuild_header is needed to support the implicit flag
# userspace pm type prevents add_addr
if reset "implicit EP" &&
- mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
+ continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
pm_nl_set_limits $ns1 2 2
pm_nl_set_limits $ns2 2 2
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
- { speed=slow \
+ { timeout_test=120 test_linkfail=128 speed=slow \
run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
@@ -3960,17 +3958,17 @@ endpoint_tests()
pm_nl_add_endpoint $ns2 10.0.2.2 flags signal
pm_nl_check_endpoint "modif is allowed" \
$ns2 10.0.2.2 id 1 flags signal
- mptcp_lib_kill_wait $tests_pid
+ mptcp_lib_kill_group_wait $tests_pid
fi
if reset_with_tcp_filter "delete and re-add" ns2 10.0.3.2 REJECT OUTPUT &&
- mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
+ continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
start_events
pm_nl_set_limits $ns1 0 3
pm_nl_set_limits $ns2 0 3
pm_nl_add_endpoint $ns2 10.0.1.2 id 1 dev ns2eth1 flags subflow
pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow
- { test_linkfail=4 speed=5 \
+ { timeout_test=120 test_linkfail=128 speed=5 \
run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
@@ -4015,7 +4013,7 @@ endpoint_tests()
chk_mptcp_info subflows 3 subflows 3
done
- mptcp_lib_kill_wait $tests_pid
+ mptcp_lib_kill_group_wait $tests_pid
kill_events_pids
chk_evt_nr ns1 MPTCP_LIB_EVENT_LISTENER_CREATED 1
@@ -4040,7 +4038,7 @@ endpoint_tests()
# remove and re-add
if reset_with_events "delete re-add signal" &&
- mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
+ continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=0
pm_nl_set_limits $ns1 0 3
pm_nl_set_limits $ns2 3 3
@@ -4048,7 +4046,7 @@ endpoint_tests()
# broadcast IP: no packet for this address will be received on ns1
pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal
pm_nl_add_endpoint $ns1 10.0.1.1 id 42 flags signal
- { test_linkfail=4 speed=5 \
+ { timeout_test=120 test_linkfail=128 speed=5 \
run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
@@ -4057,39 +4055,46 @@ endpoint_tests()
$ns1 10.0.2.1 id 1 flags signal
chk_subflow_nr "before delete" 2
chk_mptcp_info subflows 1 subflows 1
+ chk_mptcp_info add_addr_signal 2 add_addr_accepted 1
pm_nl_del_endpoint $ns1 1 10.0.2.1
pm_nl_del_endpoint $ns1 2 224.0.0.1
sleep 0.5
chk_subflow_nr "after delete" 1
chk_mptcp_info subflows 0 subflows 0
+ chk_mptcp_info add_addr_signal 0 add_addr_accepted 0
pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal
pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal
wait_mpj $ns2
chk_subflow_nr "after re-add" 3
chk_mptcp_info subflows 2 subflows 2
+ chk_mptcp_info add_addr_signal 2 add_addr_accepted 2
pm_nl_del_endpoint $ns1 42 10.0.1.1
sleep 0.5
chk_subflow_nr "after delete ID 0" 2
chk_mptcp_info subflows 2 subflows 2
+ chk_mptcp_info add_addr_signal 2 add_addr_accepted 2
pm_nl_add_endpoint $ns1 10.0.1.1 id 99 flags signal
wait_mpj $ns2
chk_subflow_nr "after re-add ID 0" 3
chk_mptcp_info subflows 3 subflows 3
+ chk_mptcp_info add_addr_signal 3 add_addr_accepted 2
pm_nl_del_endpoint $ns1 99 10.0.1.1
sleep 0.5
chk_subflow_nr "after re-delete ID 0" 2
chk_mptcp_info subflows 2 subflows 2
+ chk_mptcp_info add_addr_signal 2 add_addr_accepted 2
pm_nl_add_endpoint $ns1 10.0.1.1 id 88 flags signal
wait_mpj $ns2
chk_subflow_nr "after re-re-add ID 0" 3
chk_mptcp_info subflows 3 subflows 3
- mptcp_lib_kill_wait $tests_pid
+ chk_mptcp_info add_addr_signal 3 add_addr_accepted 2
+ mptcp_lib_kill_group_wait $tests_pid
kill_events_pids
chk_evt_nr ns1 MPTCP_LIB_EVENT_LISTENER_CREATED 1
@@ -4115,13 +4120,13 @@ endpoint_tests()
# flush and re-add
if reset_with_tcp_filter "flush re-add" ns2 10.0.3.2 REJECT OUTPUT &&
- mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
+ continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
pm_nl_set_limits $ns1 0 2
pm_nl_set_limits $ns2 1 2
# broadcast IP: no packet for this address will be received on ns1
pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal
pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow
- { test_linkfail=4 speed=20 \
+ { timeout_test=120 test_linkfail=128 speed=20 \
run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
@@ -4137,7 +4142,7 @@ endpoint_tests()
wait_mpj $ns2
pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal
wait_mpj $ns2
- mptcp_lib_kill_wait $tests_pid
+ mptcp_lib_kill_group_wait $tests_pid
join_syn_tx=3 join_connect_err=1 \
chk_join_nr 2 2 2
diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
index d62e653d48b0..f4388900016a 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
@@ -350,6 +350,27 @@ mptcp_lib_kill_wait() {
wait "${1}" 2>/dev/null
}
+# $1: PID
+mptcp_lib_pid_list_children() {
+ local curr="${1}"
+ # evoke 'ps' only once
+ local pids="${2:-"$(ps o pid,ppid)"}"
+
+ echo "${curr}"
+
+ local pid
+ for pid in $(echo "${pids}" | awk "\$2 == ${curr} { print \$1 }"); do
+ mptcp_lib_pid_list_children "${pid}" "${pids}"
+ done
+}
+
+# $1: PID
+mptcp_lib_kill_group_wait() {
+ # Some users might not have procps-ng: cannot use "kill -- -PID"
+ mptcp_lib_pid_list_children "${1}" | xargs -r kill &>/dev/null
+ wait "${1}" 2>/dev/null
+}
+
# $1: IP address
mptcp_lib_is_v6() {
[ -z "${1##*:*}" ]
diff --git a/tools/testing/selftests/net/netfilter/Makefile b/tools/testing/selftests/net/netfilter/Makefile
index a98ed892f55f..ee2d1a5254f8 100644
--- a/tools/testing/selftests/net/netfilter/Makefile
+++ b/tools/testing/selftests/net/netfilter/Makefile
@@ -6,46 +6,52 @@ HOSTPKG_CONFIG := pkg-config
MNL_CFLAGS := $(shell $(HOSTPKG_CONFIG) --cflags libmnl 2>/dev/null)
MNL_LDLIBS := $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl)
-TEST_PROGS := br_netfilter.sh bridge_brouter.sh
-TEST_PROGS += br_netfilter_queue.sh
-TEST_PROGS += conntrack_dump_flush.sh
-TEST_PROGS += conntrack_icmp_related.sh
-TEST_PROGS += conntrack_ipip_mtu.sh
-TEST_PROGS += conntrack_tcp_unreplied.sh
-TEST_PROGS += conntrack_resize.sh
-TEST_PROGS += conntrack_sctp_collision.sh
-TEST_PROGS += conntrack_vrf.sh
-TEST_PROGS += conntrack_clash.sh
-TEST_PROGS += conntrack_reverse_clash.sh
-TEST_PROGS += ipvs.sh
-TEST_PROGS += nf_conntrack_packetdrill.sh
-TEST_PROGS += nf_nat_edemux.sh
-TEST_PROGS += nft_audit.sh
-TEST_PROGS += nft_concat_range.sh
-TEST_PROGS += nft_conntrack_helper.sh
-TEST_PROGS += nft_fib.sh
-TEST_PROGS += nft_flowtable.sh
-TEST_PROGS += nft_interface_stress.sh
-TEST_PROGS += nft_meta.sh
-TEST_PROGS += nft_nat.sh
-TEST_PROGS += nft_nat_zones.sh
-TEST_PROGS += nft_queue.sh
-TEST_PROGS += nft_synproxy.sh
-TEST_PROGS += nft_tproxy_tcp.sh
-TEST_PROGS += nft_tproxy_udp.sh
-TEST_PROGS += nft_zones_many.sh
-TEST_PROGS += rpath.sh
-TEST_PROGS += vxlan_mtu_frag.sh
-TEST_PROGS += xt_string.sh
+TEST_PROGS := \
+ br_netfilter.sh \
+ br_netfilter_queue.sh \
+ bridge_brouter.sh \
+ conntrack_clash.sh \
+ conntrack_dump_flush.sh \
+ conntrack_icmp_related.sh \
+ conntrack_ipip_mtu.sh \
+ conntrack_resize.sh \
+ conntrack_reverse_clash.sh \
+ conntrack_sctp_collision.sh \
+ conntrack_tcp_unreplied.sh \
+ conntrack_vrf.sh \
+ ipvs.sh \
+ nf_conntrack_packetdrill.sh \
+ nf_nat_edemux.sh \
+ nft_audit.sh \
+ nft_concat_range.sh \
+ nft_conntrack_helper.sh \
+ nft_fib.sh \
+ nft_flowtable.sh \
+ nft_interface_stress.sh \
+ nft_meta.sh \
+ nft_nat.sh \
+ nft_nat_zones.sh \
+ nft_queue.sh \
+ nft_synproxy.sh \
+ nft_tproxy_tcp.sh \
+ nft_tproxy_udp.sh \
+ nft_zones_many.sh \
+ rpath.sh \
+ vxlan_mtu_frag.sh \
+ xt_string.sh \
+# end of TEST_PROGS
TEST_PROGS_EXTENDED = nft_concat_range_perf.sh
-TEST_GEN_FILES = audit_logread
-TEST_GEN_FILES += connect_close nf_queue
-TEST_GEN_FILES += conntrack_dump_flush
-TEST_GEN_FILES += conntrack_reverse_clash
-TEST_GEN_FILES += sctp_collision
-TEST_GEN_FILES += udpclash
+TEST_GEN_FILES = \
+ audit_logread \
+ connect_close \
+ conntrack_dump_flush \
+ conntrack_reverse_clash \
+ nf_queue \
+ sctp_collision \
+ udpclash \
+# end of TEST_GEN_FILES
include ../../lib.mk
@@ -56,9 +62,12 @@ $(OUTPUT)/conntrack_dump_flush: CFLAGS += $(MNL_CFLAGS)
$(OUTPUT)/conntrack_dump_flush: LDLIBS += $(MNL_LDLIBS)
$(OUTPUT)/udpclash: LDLIBS += -lpthread
-TEST_FILES := lib.sh
-TEST_FILES += packetdrill
+TEST_FILES := \
+ lib.sh \
+ packetdrill \
+# end of TEST_FILES
TEST_INCLUDES := \
+ $(wildcard ../lib/sh/*.sh) \
../lib.sh \
- $(wildcard ../lib/sh/*.sh)
+# end of TEST_INCLUDES
diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config
index 305e46b819cb..12ce61fa15a8 100644
--- a/tools/testing/selftests/net/netfilter/config
+++ b/tools/testing/selftests/net/netfilter/config
@@ -1,77 +1,80 @@
CONFIG_AUDIT=y
CONFIG_BPF_SYSCALL=y
CONFIG_BRIDGE=m
-CONFIG_NETFILTER_XTABLES_LEGACY=y
-CONFIG_BRIDGE_NF_EBTABLES_LEGACY=m
CONFIG_BRIDGE_EBT_BROUTE=m
CONFIG_BRIDGE_EBT_IP=m
CONFIG_BRIDGE_EBT_REDIRECT=m
CONFIG_BRIDGE_EBT_T_FILTER=m
CONFIG_BRIDGE_NETFILTER=m
CONFIG_BRIDGE_NF_EBTABLES=m
+CONFIG_BRIDGE_NF_EBTABLES_LEGACY=m
CONFIG_BRIDGE_VLAN_FILTERING=y
CONFIG_CGROUP_BPF=y
+CONFIG_CRYPTO_SHA1=m
CONFIG_DUMMY=m
+CONFIG_INET_DIAG=m
CONFIG_INET_ESP=m
-CONFIG_CRYPTO_SHA1=m
-CONFIG_IP_NF_MATCH_RPFILTER=m
+CONFIG_INET_SCTP_DIAG=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_IPTABLES_LEGACY=m
CONFIG_IP6_NF_MATCH_RPFILTER=m
+CONFIG_IP6_NF_RAW=m
+CONFIG_IP_NF_FILTER=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_IPTABLES_LEGACY=m
-CONFIG_IP6_NF_IPTABLES=m
-CONFIG_IP6_NF_IPTABLES_LEGACY=m
+CONFIG_IP_NF_MATCH_RPFILTER=m
CONFIG_IP_NF_NAT=m
-CONFIG_IP_NF_FILTER=m
-CONFIG_IP6_NF_FILTER=m
CONFIG_IP_NF_RAW=m
-CONFIG_IP6_NF_RAW=m
CONFIG_IP_SCTP=m
+CONFIG_IPV6=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
CONFIG_IP_VS=m
CONFIG_IP_VS_PROTO_TCP=y
CONFIG_IP_VS_RR=m
-CONFIG_IPV6=y
-CONFIG_IPV6_MULTIPLE_TABLES=y
CONFIG_MACVLAN=m
CONFIG_NAMESPACES=y
CONFIG_NET_CLS_U32=m
-CONFIG_NET_L3_MASTER_DEV=y
-CONFIG_NET_NS=y
-CONFIG_NET_SCH_NETEM=m
-CONFIG_NET_SCH_HTB=m
-CONFIG_NET_IPIP=m
-CONFIG_NET_VRF=y
CONFIG_NETFILTER=y
CONFIG_NETFILTER_ADVANCED=y
CONFIG_NETFILTER_NETLINK=m
CONFIG_NETFILTER_NETLINK_QUEUE=m
CONFIG_NETFILTER_SYNPROXY=m
CONFIG_NETFILTER_XTABLES=m
-CONFIG_NETFILTER_XT_NAT=m
+CONFIG_NETFILTER_XTABLES_LEGACY=y
CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
CONFIG_NETFILTER_XT_MATCH_STATE=m
CONFIG_NETFILTER_XT_MATCH_STRING=m
+CONFIG_NETFILTER_XT_NAT=m
CONFIG_NETFILTER_XT_TARGET_REDIRECT=m
+CONFIG_NET_IPIP=m
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_NET_NS=y
+CONFIG_NET_PKTGEN=m
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NET_VRF=y
CONFIG_NF_CONNTRACK=m
-CONFIG_NF_CONNTRACK_PROCFS=y
CONFIG_NF_CONNTRACK_EVENTS=y
CONFIG_NF_CONNTRACK_FTP=m
CONFIG_NF_CONNTRACK_MARK=y
+CONFIG_NF_CONNTRACK_PROCFS=y
CONFIG_NF_CONNTRACK_ZONES=y
CONFIG_NF_CT_NETLINK=m
CONFIG_NF_CT_PROTO_SCTP=y
CONFIG_NF_FLOW_TABLE=m
+CONFIG_NF_FLOW_TABLE_INET=m
CONFIG_NF_LOG_IPV4=m
CONFIG_NF_LOG_IPV6=m
CONFIG_NF_NAT=m
-CONFIG_NF_NAT_REDIRECT=y
CONFIG_NF_NAT_MASQUERADE=y
+CONFIG_NF_NAT_REDIRECT=y
CONFIG_NF_TABLES=m
CONFIG_NF_TABLES_BRIDGE=m
CONFIG_NF_TABLES_INET=y
CONFIG_NF_TABLES_IPV4=y
CONFIG_NF_TABLES_IPV6=y
CONFIG_NF_TABLES_NETDEV=y
-CONFIG_NF_FLOW_TABLE_INET=m
CONFIG_NFT_BRIDGE_META=m
CONFIG_NFT_COMPAT=m
CONFIG_NFT_CT=m
@@ -90,12 +93,9 @@ CONFIG_NFT_QUOTA=m
CONFIG_NFT_REDIR=m
CONFIG_NFT_SYNPROXY=m
CONFIG_NFT_TPROXY=m
+CONFIG_TUN=m
CONFIG_VETH=m
CONFIG_VLAN_8021Q=m
CONFIG_VXLAN=m
-CONFIG_XFRM_USER=m
CONFIG_XFRM_STATISTICS=y
-CONFIG_NET_PKTGEN=m
-CONFIG_TUN=m
-CONFIG_INET_DIAG=m
-CONFIG_INET_SCTP_DIAG=m
+CONFIG_XFRM_USER=m
diff --git a/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh b/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh
index 1014551dd769..6731fe1eaf2e 100755
--- a/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh
+++ b/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh
@@ -17,9 +17,31 @@ cleanup()
checktool "socat -h" "run test without socat"
checktool "iptables --version" "run test without iptables"
+checktool "conntrack --version" "run test without conntrack"
trap cleanup EXIT
+connect_done()
+{
+ local ns="$1"
+ local port="$2"
+
+ ip netns exec "$ns" ss -nt -o state established "dport = :$port" | grep -q "$port"
+}
+
+check_ctstate()
+{
+ local ns="$1"
+ local dp="$2"
+
+ if ! ip netns exec "$ns" conntrack --get -s 192.168.1.2 -d 192.168.1.1 -p tcp \
+ --sport 10000 --dport "$dp" --state ESTABLISHED > /dev/null 2>&1;then
+ echo "FAIL: Did not find expected state for dport $2"
+ ip netns exec "$ns" bash -c 'conntrack -L; conntrack -S; ss -nt'
+ ret=1
+ fi
+}
+
setup_ns ns1 ns2
# Connect the namespaces using a veth pair
@@ -44,15 +66,18 @@ socatpid=$!
ip netns exec "$ns2" sysctl -q net.ipv4.ip_local_port_range="10000 10000"
# add a virtual IP using DNAT
-ip netns exec "$ns2" iptables -t nat -A OUTPUT -d 10.96.0.1/32 -p tcp --dport 443 -j DNAT --to-destination 192.168.1.1:5201
+ip netns exec "$ns2" iptables -t nat -A OUTPUT -d 10.96.0.1/32 -p tcp --dport 443 -j DNAT --to-destination 192.168.1.1:5201 || exit 1
# ... and route it to the other namespace
ip netns exec "$ns2" ip route add 10.96.0.1 via 192.168.1.1
-# add a persistent connection from the other namespace
-ip netns exec "$ns2" socat -t 10 - TCP:192.168.1.1:5201 > /dev/null &
+# listener should be up by now, wait if it isn't yet.
+wait_local_port_listen "$ns1" 5201 tcp
-sleep 1
+# add a persistent connection from the other namespace
+sleep 10 | ip netns exec "$ns2" socat -t 10 - TCP:192.168.1.1:5201 > /dev/null &
+cpid0=$!
+busywait "$BUSYWAIT_TIMEOUT" connect_done "$ns2" "5201"
# ip daddr:dport will be rewritten to 192.168.1.1 5201
# NAT must reallocate source port 10000 because
@@ -71,26 +96,25 @@ fi
ip netns exec "$ns1" iptables -t nat -A PREROUTING -p tcp --dport 5202 -j REDIRECT --to-ports 5201
ip netns exec "$ns1" iptables -t nat -A PREROUTING -p tcp --dport 5203 -j REDIRECT --to-ports 5201
-sleep 5 | ip netns exec "$ns2" socat -t 5 -u STDIN TCP:192.168.1.1:5202,connect-timeout=5 >/dev/null &
+sleep 5 | ip netns exec "$ns2" socat -T 5 -u STDIN TCP:192.168.1.1:5202,connect-timeout=5 >/dev/null &
+cpid1=$!
-# if connect succeeds, client closes instantly due to EOF on stdin.
-# if connect hangs, it will time out after 5s.
-echo | ip netns exec "$ns2" socat -t 3 -u STDIN TCP:192.168.1.1:5203,connect-timeout=5 >/dev/null &
+sleep 5 | ip netns exec "$ns2" socat -T 5 -u STDIN TCP:192.168.1.1:5203,connect-timeout=5 >/dev/null &
cpid2=$!
-time_then=$(date +%s)
-wait $cpid2
-rv=$?
-time_now=$(date +%s)
+busywait "$BUSYWAIT_TIMEOUT" connect_done "$ns2" 5202
+busywait "$BUSYWAIT_TIMEOUT" connect_done "$ns2" 5203
-# Check how much time has elapsed, expectation is for
-# 'cpid2' to connect and then exit (and no connect delay).
-delta=$((time_now - time_then))
+check_ctstate "$ns1" 5202
+check_ctstate "$ns1" 5203
-if [ $delta -lt 2 ] && [ $rv -eq 0 ]; then
+kill $socatpid $cpid0 $cpid1 $cpid2
+socatpid=0
+
+if [ $ret -eq 0 ]; then
echo "PASS: could connect to service via redirected ports"
else
- echo "FAIL: socat cannot connect to service via redirect ($delta seconds elapsed, returned $rv)"
+ echo "FAIL: socat cannot connect to service via redirect"
ret=1
fi
diff --git a/tools/testing/selftests/net/netfilter/nft_fib.sh b/tools/testing/selftests/net/netfilter/nft_fib.sh
index 9929a9ffef65..04544905c216 100755
--- a/tools/testing/selftests/net/netfilter/nft_fib.sh
+++ b/tools/testing/selftests/net/netfilter/nft_fib.sh
@@ -256,12 +256,12 @@ test_ping_unreachable() {
local daddr4=$1
local daddr6=$2
- if ip netns exec "$ns1" ping -c 1 -w 1 -q "$daddr4" > /dev/null; then
+ if ip netns exec "$ns1" ping -c 1 -W 0.1 -q "$daddr4" > /dev/null; then
echo "FAIL: ${ns1} could reach $daddr4" 1>&2
return 1
fi
- if ip netns exec "$ns1" ping -c 1 -w 1 -q "$daddr6" > /dev/null; then
+ if ip netns exec "$ns1" ping -c 1 -W 0.1 -q "$daddr6" > /dev/null; then
echo "FAIL: ${ns1} could reach $daddr6" 1>&2
return 1
fi
@@ -437,14 +437,17 @@ check_type()
local addr="$3"
local type="$4"
local count="$5"
+ local lret=0
[ -z "$count" ] && count=1
if ! ip netns exec "$nsrouter" nft get element inet t "$setname" { "$iifname" . "$addr" . "$type" } |grep -q "counter packets $count";then
- echo "FAIL: did not find $iifname . $addr . $type in $setname"
+ echo "FAIL: did not find $iifname . $addr . $type in $setname with $count packets"
ip netns exec "$nsrouter" nft list set inet t "$setname"
ret=1
- return 1
+ # do not fail right away, delete entry if it exists so later test that
+ # checks for unwanted keys don't get confused by this *expected* key.
+ lret=1
fi
# delete the entry, this allows to check if anything unexpected appeared
@@ -456,7 +459,7 @@ check_type()
return 1
fi
- return 0
+ return $lret
}
check_local()
diff --git a/tools/testing/selftests/net/ovpn/Makefile b/tools/testing/selftests/net/ovpn/Makefile
index e0926d76b4c8..dbe0388c8512 100644
--- a/tools/testing/selftests/net/ovpn/Makefile
+++ b/tools/testing/selftests/net/ovpn/Makefile
@@ -19,13 +19,15 @@ LDLIBS += $(VAR_LDLIBS)
TEST_FILES = common.sh
-TEST_PROGS = test.sh \
- test-large-mtu.sh \
+TEST_PROGS := \
test-chachapoly.sh \
- test-tcp.sh \
- test-float.sh \
+ test-close-socket-tcp.sh \
test-close-socket.sh \
- test-close-socket-tcp.sh
+ test-float.sh \
+ test-large-mtu.sh \
+ test-tcp.sh \
+ test.sh \
+# end of TEST_PROGS
TEST_GEN_FILES := ovpn-cli
diff --git a/tools/testing/selftests/net/ovpn/config b/tools/testing/selftests/net/ovpn/config
index 71946ba9fa17..42699740936d 100644
--- a/tools/testing/selftests/net/ovpn/config
+++ b/tools/testing/selftests/net/ovpn/config
@@ -1,10 +1,10 @@
-CONFIG_NET=y
-CONFIG_INET=y
-CONFIG_STREAM_PARSER=y
-CONFIG_NET_UDP_TUNNEL=y
-CONFIG_DST_CACHE=y
CONFIG_CRYPTO=y
CONFIG_CRYPTO_AES=y
-CONFIG_CRYPTO_GCM=y
CONFIG_CRYPTO_CHACHA20POLY1305=y
+CONFIG_CRYPTO_GCM=y
+CONFIG_DST_CACHE=y
+CONFIG_INET=y
+CONFIG_NET=y
+CONFIG_NET_UDP_TUNNEL=y
CONFIG_OVPN=m
+CONFIG_STREAM_PARSER=y
diff --git a/tools/testing/selftests/net/ovpn/ovpn-cli.c b/tools/testing/selftests/net/ovpn/ovpn-cli.c
index 688a5fa6fdac..0a5226196a2e 100644
--- a/tools/testing/selftests/net/ovpn/ovpn-cli.c
+++ b/tools/testing/selftests/net/ovpn/ovpn-cli.c
@@ -1587,6 +1587,7 @@ static int ovpn_listen_mcast(void)
sock = nl_socket_alloc();
if (!sock) {
fprintf(stderr, "cannot allocate netlink socket\n");
+ ret = -ENOMEM;
goto err_free;
}
@@ -2106,6 +2107,7 @@ static int ovpn_run_cmd(struct ovpn_ctx *ovpn)
ret = ovpn_listen_mcast();
break;
case CMD_INVALID:
+ ret = -EINVAL;
break;
}
diff --git a/tools/testing/selftests/net/packetdrill/Makefile b/tools/testing/selftests/net/packetdrill/Makefile
index 31cfb666ba8b..ff54641493e9 100644
--- a/tools/testing/selftests/net/packetdrill/Makefile
+++ b/tools/testing/selftests/net/packetdrill/Makefile
@@ -1,9 +1,11 @@
# SPDX-License-Identifier: GPL-2.0
-TEST_INCLUDES := ksft_runner.sh \
- defaults.sh \
- set_sysctls.py \
- ../../kselftest/ktap_helpers.sh
+TEST_INCLUDES := \
+ defaults.sh \
+ ksft_runner.sh \
+ set_sysctls.py \
+ ../../kselftest/ktap_helpers.sh \
+# end of TEST_INCLUDES
TEST_PROGS := $(wildcard *.pkt)
diff --git a/tools/testing/selftests/net/packetdrill/config b/tools/testing/selftests/net/packetdrill/config
index 0237ed98f3c0..c4a19a785521 100644
--- a/tools/testing/selftests/net/packetdrill/config
+++ b/tools/testing/selftests/net/packetdrill/config
@@ -1,6 +1,6 @@
-CONFIG_IPV6=y
-CONFIG_HZ_1000=y
CONFIG_HZ=1000
+CONFIG_HZ_1000=y
+CONFIG_IPV6=y
CONFIG_NET_NS=y
CONFIG_NET_SCH_FIFO=y
CONFIG_NET_SCH_FQ=y
diff --git a/tools/testing/selftests/net/rds/Makefile b/tools/testing/selftests/net/rds/Makefile
index 612a7219990e..762845cc973c 100644
--- a/tools/testing/selftests/net/rds/Makefile
+++ b/tools/testing/selftests/net/rds/Makefile
@@ -5,8 +5,14 @@ all:
TEST_PROGS := run.sh
-TEST_FILES := include.sh test.py
+TEST_FILES := \
+ include.sh \
+ test.py \
+# end of TEST_FILES
-EXTRA_CLEAN := /tmp/rds_logs include.sh
+EXTRA_CLEAN := \
+ include.sh \
+ /tmp/rds_logs \
+# end of EXTRA_CLEAN
include ../../lib.mk
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index dbf77513f617..163a084d525d 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -1466,6 +1466,8 @@ usage: ${0##*/} OPTS
EOF
}
+require_command jq
+
#check for needed privileges
if [ "$(id -u)" -ne 0 ];then
end_test "SKIP: Need root privileges"
diff --git a/tools/testing/selftests/net/sctp_hello.c b/tools/testing/selftests/net/sctp_hello.c
index f02f1f95d227..a04dac0b8027 100644
--- a/tools/testing/selftests/net/sctp_hello.c
+++ b/tools/testing/selftests/net/sctp_hello.c
@@ -29,7 +29,6 @@ static void set_addr(struct sockaddr_storage *ss, char *ip, char *port, int *len
static int do_client(int argc, char *argv[])
{
struct sockaddr_storage ss;
- char buf[] = "hello";
int csk, ret, len;
if (argc < 5) {
@@ -56,16 +55,10 @@ static int do_client(int argc, char *argv[])
set_addr(&ss, argv[3], argv[4], &len);
ret = connect(csk, (struct sockaddr *)&ss, len);
- if (ret < 0) {
- printf("failed to connect to peer\n");
+ if (ret < 0)
return -1;
- }
- ret = send(csk, buf, strlen(buf) + 1, 0);
- if (ret < 0) {
- printf("failed to send msg %d\n", ret);
- return -1;
- }
+ recv(csk, NULL, 0, 0);
close(csk);
return 0;
@@ -75,7 +68,6 @@ int main(int argc, char *argv[])
{
struct sockaddr_storage ss;
int lsk, csk, ret, len;
- char buf[20];
if (argc < 2 || (strcmp(argv[1], "server") && strcmp(argv[1], "client"))) {
printf("%s server|client ...\n", argv[0]);
@@ -125,11 +117,6 @@ int main(int argc, char *argv[])
return -1;
}
- ret = recv(csk, buf, sizeof(buf), 0);
- if (ret <= 0) {
- printf("failed to recv msg %d\n", ret);
- return -1;
- }
close(csk);
close(lsk);
diff --git a/tools/testing/selftests/net/sctp_vrf.sh b/tools/testing/selftests/net/sctp_vrf.sh
index c854034b6aa1..667b211aa8a1 100755
--- a/tools/testing/selftests/net/sctp_vrf.sh
+++ b/tools/testing/selftests/net/sctp_vrf.sh
@@ -20,9 +20,9 @@ setup() {
modprobe sctp_diag
setup_ns CLIENT_NS1 CLIENT_NS2 SERVER_NS
- ip net exec $CLIENT_NS1 sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null
- ip net exec $CLIENT_NS2 sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null
- ip net exec $SERVER_NS sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null
+ ip net exec $CLIENT_NS1 sysctl -wq net.ipv6.conf.default.accept_dad=0
+ ip net exec $CLIENT_NS2 sysctl -wq net.ipv6.conf.default.accept_dad=0
+ ip net exec $SERVER_NS sysctl -wq net.ipv6.conf.default.accept_dad=0
ip -n $SERVER_NS link add veth1 type veth peer name veth1 netns $CLIENT_NS1
ip -n $SERVER_NS link add veth2 type veth peer name veth1 netns $CLIENT_NS2
@@ -62,17 +62,40 @@ setup() {
}
cleanup() {
- ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null
+ wait_client $CLIENT_NS1
+ wait_client $CLIENT_NS2
+ stop_server
cleanup_ns $CLIENT_NS1 $CLIENT_NS2 $SERVER_NS
}
-wait_server() {
+start_server() {
local IFACE=$1
local CNT=0
- until ip netns exec $SERVER_NS ss -lS src $SERVER_IP:$SERVER_PORT | \
- grep LISTEN | grep "$IFACE" 2>&1 >/dev/null; do
- [ $((CNT++)) = "20" ] && { RET=3; return $RET; }
+ ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP $SERVER_PORT $IFACE &
+ disown
+ until ip netns exec $SERVER_NS ss -SlH | grep -q "$IFACE"; do
+ [ $((CNT++)) -eq 30 ] && { RET=3; return $RET; }
+ sleep 0.1
+ done
+}
+
+stop_server() {
+ local CNT=0
+
+ ip netns exec $SERVER_NS pkill sctp_hello
+ while ip netns exec $SERVER_NS ss -SaH | grep -q .; do
+ [ $((CNT++)) -eq 30 ] && break
+ sleep 0.1
+ done
+}
+
+wait_client() {
+ local CLIENT_NS=$1
+ local CNT=0
+
+ while ip netns exec $CLIENT_NS ss -SaH | grep -q .; do
+ [ $((CNT++)) -eq 30 ] && break
sleep 0.1
done
}
@@ -81,14 +104,12 @@ do_test() {
local CLIENT_NS=$1
local IFACE=$2
- ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null
- ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \
- $SERVER_PORT $IFACE 2>&1 >/dev/null &
- disown
- wait_server $IFACE || return $RET
+ start_server $IFACE || return $RET
timeout 3 ip netns exec $CLIENT_NS ./sctp_hello client $AF \
- $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null
+ $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT
RET=$?
+ wait_client $CLIENT_NS
+ stop_server
return $RET
}
@@ -96,25 +117,21 @@ do_testx() {
local IFACE1=$1
local IFACE2=$2
- ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null
- ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \
- $SERVER_PORT $IFACE1 2>&1 >/dev/null &
- disown
- wait_server $IFACE1 || return $RET
- ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \
- $SERVER_PORT $IFACE2 2>&1 >/dev/null &
- disown
- wait_server $IFACE2 || return $RET
+ start_server $IFACE1 || return $RET
+ start_server $IFACE2 || return $RET
timeout 3 ip netns exec $CLIENT_NS1 ./sctp_hello client $AF \
- $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null && \
+ $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT && \
timeout 3 ip netns exec $CLIENT_NS2 ./sctp_hello client $AF \
- $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null
+ $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT
RET=$?
+ wait_client $CLIENT_NS1
+ wait_client $CLIENT_NS2
+ stop_server
return $RET
}
testup() {
- ip netns exec $SERVER_NS sysctl -w net.sctp.l3mdev_accept=1 2>&1 >/dev/null
+ ip netns exec $SERVER_NS sysctl -wq net.sctp.l3mdev_accept=1
echo -n "TEST 01: nobind, connect from client 1, l3mdev_accept=1, Y "
do_test $CLIENT_NS1 || { echo "[FAIL]"; return $RET; }
echo "[PASS]"
@@ -123,7 +140,7 @@ testup() {
do_test $CLIENT_NS2 && { echo "[FAIL]"; return $RET; }
echo "[PASS]"
- ip netns exec $SERVER_NS sysctl -w net.sctp.l3mdev_accept=0 2>&1 >/dev/null
+ ip netns exec $SERVER_NS sysctl -wq net.sctp.l3mdev_accept=0
echo -n "TEST 03: nobind, connect from client 1, l3mdev_accept=0, N "
do_test $CLIENT_NS1 && { echo "[FAIL]"; return $RET; }
echo "[PASS]"
@@ -160,7 +177,7 @@ testup() {
do_testx vrf-1 vrf-2 || { echo "[FAIL]"; return $RET; }
echo "[PASS]"
- echo -n "TEST 12: bind vrf-2 & 1 in server, connect from client 1 & 2, N "
+ echo -n "TEST 12: bind vrf-2 & 1 in server, connect from client 1 & 2, Y "
do_testx vrf-2 vrf-1 || { echo "[FAIL]"; return $RET; }
echo "[PASS]"
}
diff --git a/tools/testing/selftests/net/tcp_ao/config b/tools/testing/selftests/net/tcp_ao/config
index 3605e38711cb..971cb6fa2d63 100644
--- a/tools/testing/selftests/net/tcp_ao/config
+++ b/tools/testing/selftests/net/tcp_ao/config
@@ -1,8 +1,8 @@
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_RMD160=y
CONFIG_CRYPTO_SHA1=y
-CONFIG_IPV6_MULTIPLE_TABLES=y
CONFIG_IPV6=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
CONFIG_NET_L3_MASTER_DEV=y
CONFIG_NET_VRF=y
CONFIG_TCP_AO=y
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index e788b84551ca..5c6d8215021c 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -564,6 +564,40 @@ TEST_F(tls, msg_more)
EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
}
+TEST_F(tls, cmsg_msg_more)
+{
+ char *test_str = "test_read";
+ char record_type = 100;
+ int send_len = 10;
+
+ /* we don't allow MSG_MORE with non-DATA records */
+ EXPECT_EQ(tls_send_cmsg(self->fd, record_type, test_str, send_len,
+ MSG_MORE), -1);
+ EXPECT_EQ(errno, EINVAL);
+}
+
+TEST_F(tls, msg_more_then_cmsg)
+{
+ char *test_str = "test_read";
+ char record_type = 100;
+ int send_len = 10;
+ char buf[10 * 2];
+ int ret;
+
+ EXPECT_EQ(send(self->fd, test_str, send_len, MSG_MORE), send_len);
+ EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_DONTWAIT), -1);
+
+ ret = tls_send_cmsg(self->fd, record_type, test_str, send_len, 0);
+ EXPECT_EQ(ret, send_len);
+
+ /* initial DATA record didn't get merged with the non-DATA record */
+ EXPECT_EQ(recv(self->cfd, buf, send_len * 2, 0), send_len);
+
+ EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type,
+ buf, sizeof(buf), MSG_WAITALL),
+ send_len);
+}
+
TEST_F(tls, msg_more_unsent)
{
char const *test_str = "test_read";
@@ -912,6 +946,37 @@ TEST_F(tls, peek_and_splice)
EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
}
+#define MAX_FRAGS 48
+TEST_F(tls, splice_short)
+{
+ struct iovec sendchar_iov;
+ char read_buf[0x10000];
+ char sendbuf[0x100];
+ char sendchar = 'S';
+ int pipefds[2];
+ int i;
+
+ sendchar_iov.iov_base = &sendchar;
+ sendchar_iov.iov_len = 1;
+
+ memset(sendbuf, 's', sizeof(sendbuf));
+
+ ASSERT_GE(pipe2(pipefds, O_NONBLOCK), 0);
+ ASSERT_GE(fcntl(pipefds[0], F_SETPIPE_SZ, (MAX_FRAGS + 1) * 0x1000), 0);
+
+ for (i = 0; i < MAX_FRAGS; i++)
+ ASSERT_GE(vmsplice(pipefds[1], &sendchar_iov, 1, 0), 0);
+
+ ASSERT_EQ(write(pipefds[1], sendbuf, sizeof(sendbuf)), sizeof(sendbuf));
+
+ EXPECT_EQ(splice(pipefds[0], NULL, self->fd, NULL, MAX_FRAGS + 0x1000, 0),
+ MAX_FRAGS + sizeof(sendbuf));
+ EXPECT_EQ(recv(self->cfd, read_buf, sizeof(read_buf), 0), MAX_FRAGS + sizeof(sendbuf));
+ EXPECT_EQ(recv(self->cfd, read_buf, sizeof(read_buf), MSG_DONTWAIT), -1);
+ EXPECT_EQ(errno, EAGAIN);
+}
+#undef MAX_FRAGS
+
TEST_F(tls, recvmsg_single)
{
char const *test_str = "test_recvmsg_single";
diff --git a/tools/testing/selftests/net/vlan_bridge_binding.sh b/tools/testing/selftests/net/vlan_bridge_binding.sh
index db481af9b6b3..e8c02c64e03a 100755
--- a/tools/testing/selftests/net/vlan_bridge_binding.sh
+++ b/tools/testing/selftests/net/vlan_bridge_binding.sh
@@ -249,6 +249,8 @@ test_binding_toggle_off_when_upper_down()
do_test_binding_off : "on->off when upper down"
}
+require_command jq
+
trap defer_scopes_cleanup EXIT
setup_prepare
tests_run
diff --git a/tools/testing/selftests/nolibc/Makefile.nolibc b/tools/testing/selftests/nolibc/Makefile.nolibc
index 330e000baeb1..f9d43cbdc894 100644
--- a/tools/testing/selftests/nolibc/Makefile.nolibc
+++ b/tools/testing/selftests/nolibc/Makefile.nolibc
@@ -87,7 +87,6 @@ IMAGE_riscv = arch/riscv/boot/Image
IMAGE_riscv32 = arch/riscv/boot/Image
IMAGE_riscv64 = arch/riscv/boot/Image
IMAGE_s390x = arch/s390/boot/bzImage
-IMAGE_s390 = arch/s390/boot/bzImage
IMAGE_loongarch = arch/loongarch/boot/vmlinuz.efi
IMAGE_sparc32 = arch/sparc/boot/image
IMAGE_sparc64 = arch/sparc/boot/image
@@ -117,7 +116,6 @@ DEFCONFIG_riscv = defconfig
DEFCONFIG_riscv32 = rv32_defconfig
DEFCONFIG_riscv64 = defconfig
DEFCONFIG_s390x = defconfig
-DEFCONFIG_s390 = defconfig compat.config
DEFCONFIG_loongarch = defconfig
DEFCONFIG_sparc32 = sparc32_defconfig
DEFCONFIG_sparc64 = sparc64_defconfig
@@ -156,7 +154,6 @@ QEMU_ARCH_riscv = riscv64
QEMU_ARCH_riscv32 = riscv32
QEMU_ARCH_riscv64 = riscv64
QEMU_ARCH_s390x = s390x
-QEMU_ARCH_s390 = s390x
QEMU_ARCH_loongarch = loongarch64
QEMU_ARCH_sparc32 = sparc
QEMU_ARCH_sparc64 = sparc64
@@ -197,7 +194,6 @@ QEMU_ARGS_riscv = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_T
QEMU_ARGS_riscv32 = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
QEMU_ARGS_riscv64 = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
QEMU_ARGS_s390x = -M s390-ccw-virtio -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_s390 = -M s390-ccw-virtio -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
QEMU_ARGS_loongarch = -M virt -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
QEMU_ARGS_sparc32 = -M SS-5 -m 256M -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
QEMU_ARGS_sparc64 = -M sun4u -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
@@ -223,13 +219,13 @@ CFLAGS_ppc = -m32 -mbig-endian -mno-vsx $(call cc-option,-mmultiple)
CFLAGS_ppc64 = -m64 -mbig-endian -mno-vsx $(call cc-option,-mmultiple)
CFLAGS_ppc64le = -m64 -mlittle-endian -mno-vsx $(call cc-option,-mabi=elfv2)
CFLAGS_s390x = -m64
-CFLAGS_s390 = -m31
CFLAGS_mips32le = -EL -mabi=32 -fPIC
CFLAGS_mips32be = -EB -mabi=32
CFLAGS_mipsn32le = -EL -mabi=n32 -fPIC -march=mips64r2
CFLAGS_mipsn32be = -EB -mabi=n32 -march=mips64r6
CFLAGS_mips64le = -EL -mabi=64 -march=mips64r6
CFLAGS_mips64be = -EB -mabi=64 -march=mips64r2
+CFLAGS_loongarch = $(if $(LLVM),-fuse-ld=lld)
CFLAGS_sparc32 = $(call cc-option,-m32)
CFLAGS_sh4 = -ml -m4
ifeq ($(origin XARCH),command line)
diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index 29de21595fc9..3c5a226dad3a 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -25,6 +25,7 @@
#include <sys/sysmacros.h>
#include <sys/time.h>
#include <sys/timerfd.h>
+#include <sys/uio.h>
#include <sys/utsname.h>
#include <sys/wait.h>
#include <dirent.h>
@@ -1282,6 +1283,10 @@ int run_syscall(int min, int max)
int proc;
int test;
int tmp;
+ struct iovec iov_one = {
+ .iov_base = &tmp,
+ .iov_len = 1,
+ };
int ret = 0;
void *p1, *p2;
int has_gettid = 1;
@@ -1343,6 +1348,8 @@ int run_syscall(int min, int max)
CASE_TEST(dup3_0); tmp = dup3(0, 100, 0); EXPECT_SYSNE(1, tmp, -1); close(tmp); break;
CASE_TEST(dup3_m1); tmp = dup3(-1, 100, 0); EXPECT_SYSER(1, tmp, -1, EBADF); if (tmp != -1) close(tmp); break;
CASE_TEST(execve_root); EXPECT_SYSER(1, execve("/", (char*[]){ [0] = "/", [1] = NULL }, NULL), -1, EACCES); break;
+ CASE_TEST(fchdir_stdin); EXPECT_SYSER(1, fchdir(STDIN_FILENO), -1, ENOTDIR); break;
+ CASE_TEST(fchdir_badfd); EXPECT_SYSER(1, fchdir(-1), -1, EBADF); break;
CASE_TEST(file_stream); EXPECT_SYSZR(1, test_file_stream()); break;
CASE_TEST(fork); EXPECT_SYSZR(1, test_fork(FORK_STANDARD)); break;
CASE_TEST(getdents64_root); EXPECT_SYSNE(1, test_getdents64("/"), -1); break;
@@ -1395,6 +1402,10 @@ int run_syscall(int min, int max)
CASE_TEST(waitpid_child); EXPECT_SYSER(1, waitpid(getpid(), &tmp, WNOHANG), -1, ECHILD); break;
CASE_TEST(write_badf); EXPECT_SYSER(1, write(-1, &tmp, 1), -1, EBADF); break;
CASE_TEST(write_zero); EXPECT_SYSZR(1, write(1, &tmp, 0)); break;
+ CASE_TEST(readv_badf); EXPECT_SYSER(1, readv(-1, &iov_one, 1), -1, EBADF); break;
+ CASE_TEST(readv_zero); EXPECT_SYSZR(1, readv(1, NULL, 0)); break;
+ CASE_TEST(writev_badf); EXPECT_SYSER(1, writev(-1, &iov_one, 1), -1, EBADF); break;
+ CASE_TEST(writev_zero); EXPECT_SYSZR(1, writev(1, NULL, 0)); break;
CASE_TEST(syscall_noargs); EXPECT_SYSEQ(1, syscall(__NR_getpid), getpid()); break;
CASE_TEST(syscall_args); EXPECT_SYSER(1, syscall(__NR_statx, 0, NULL, 0, 0, NULL), -1, EFAULT); break;
CASE_TEST(namespace); EXPECT_SYSZR(euid0 && proc, test_namespace()); break;
@@ -1540,6 +1551,8 @@ int run_stdlib(int min, int max)
CASE_TEST(abs); EXPECT_EQ(1, abs(-10), 10); break;
CASE_TEST(abs_noop); EXPECT_EQ(1, abs(10), 10); break;
CASE_TEST(difftime); EXPECT_ZR(1, test_difftime()); break;
+ CASE_TEST(memchr_foobar6_o); EXPECT_STREQ(1, memchr("foobar", 'o', 6), "oobar"); break;
+ CASE_TEST(memchr_foobar3_b); EXPECT_STRZR(1, memchr("foobar", 'b', 3)); break;
case __LINE__:
return ret; /* must be last */
diff --git a/tools/testing/selftests/nolibc/run-tests.sh b/tools/testing/selftests/nolibc/run-tests.sh
index e8af1fb505cf..3917cfb8fdc4 100755
--- a/tools/testing/selftests/nolibc/run-tests.sh
+++ b/tools/testing/selftests/nolibc/run-tests.sh
@@ -23,7 +23,7 @@ all_archs=(
mips32le mips32be mipsn32le mipsn32be mips64le mips64be
ppc ppc64 ppc64le
riscv32 riscv64
- s390x s390
+ s390x
loongarch
sparc32 sparc64
m68k
@@ -169,7 +169,7 @@ test_arch() {
cross_compile=$(realpath "${download_location}gcc-${crosstool_version}-nolibc/${ct_arch}-${ct_abi}/bin/${ct_arch}-${ct_abi}-")
build_dir="${build_location}/${arch}"
if [ "$werror" -ne 0 ]; then
- CFLAGS_EXTRA="$CFLAGS_EXTRA -Werror"
+ CFLAGS_EXTRA="$CFLAGS_EXTRA -Werror -Wl,--fatal-warnings"
fi
MAKE=(make -f Makefile.nolibc -j"${nproc}" XARCH="${arch}" CROSS_COMPILE="${cross_compile}" LLVM="${llvm}" O="${build_dir}")
@@ -185,10 +185,6 @@ test_arch() {
exit 1
esac
printf '%-15s' "$arch:"
- if [ "$arch" = "s390" ] && ([ "$llvm" = "1" ] || [ "$test_mode" = "user" ]); then
- echo "Unsupported configuration"
- return
- fi
if [ "$arch" = "m68k" -o "$arch" = "sh4" ] && [ "$llvm" = "1" ]; then
echo "Unsupported configuration"
return
diff --git a/tools/testing/selftests/pci_endpoint/pci_endpoint_test.c b/tools/testing/selftests/pci_endpoint/pci_endpoint_test.c
index da0db0e7c969..cd9075444c32 100644
--- a/tools/testing/selftests/pci_endpoint/pci_endpoint_test.c
+++ b/tools/testing/selftests/pci_endpoint/pci_endpoint_test.c
@@ -121,6 +121,8 @@ TEST_F(pci_ep_basic, MSI_TEST)
for (i = 1; i <= 32; i++) {
pci_ep_ioctl(PCITEST_MSI, i);
+ if (ret == -EINVAL)
+ SKIP(return, "MSI%d is disabled", i);
EXPECT_FALSE(ret) TH_LOG("Test failed for MSI%d", i);
}
}
@@ -137,6 +139,8 @@ TEST_F(pci_ep_basic, MSIX_TEST)
for (i = 1; i <= 2048; i++) {
pci_ep_ioctl(PCITEST_MSIX, i);
+ if (ret == -EINVAL)
+ SKIP(return, "MSI-X%d is disabled", i);
EXPECT_FALSE(ret) TH_LOG("Test failed for MSI-X%d", i);
}
}
diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h
index f87993def738..d60f10a873bb 100644
--- a/tools/testing/selftests/pidfd/pidfd.h
+++ b/tools/testing/selftests/pidfd/pidfd.h
@@ -148,6 +148,14 @@
#define PIDFD_INFO_COREDUMP (1UL << 4)
#endif
+#ifndef PIDFD_INFO_SUPPORTED_MASK
+#define PIDFD_INFO_SUPPORTED_MASK (1UL << 5)
+#endif
+
+#ifndef PIDFD_INFO_COREDUMP_SIGNAL
+#define PIDFD_INFO_COREDUMP_SIGNAL (1UL << 6)
+#endif
+
#ifndef PIDFD_COREDUMPED
#define PIDFD_COREDUMPED (1U << 0) /* Did crash and... */
#endif
@@ -183,8 +191,11 @@ struct pidfd_info {
__u32 fsuid;
__u32 fsgid;
__s32 exit_code;
- __u32 coredump_mask;
- __u32 __spare1;
+ struct {
+ __u32 coredump_mask;
+ __u32 coredump_signal;
+ };
+ __u64 supported_mask;
};
/*
diff --git a/tools/testing/selftests/pidfd/pidfd_info_test.c b/tools/testing/selftests/pidfd/pidfd_info_test.c
index a0eb6e81eaa2..cb5430a2fd75 100644
--- a/tools/testing/selftests/pidfd/pidfd_info_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_info_test.c
@@ -690,4 +690,77 @@ TEST_F(pidfd_info, thread_group_exec_thread)
EXPECT_EQ(close(pidfd_thread), 0);
}
+/*
+ * Test: PIDFD_INFO_SUPPORTED_MASK field
+ *
+ * Verify that when PIDFD_INFO_SUPPORTED_MASK is requested, the kernel
+ * returns the supported_mask field indicating which flags the kernel supports.
+ */
+TEST(supported_mask_field)
+{
+ struct pidfd_info info = {
+ .mask = PIDFD_INFO_SUPPORTED_MASK,
+ };
+ int pidfd;
+ pid_t pid;
+
+ pid = create_child(&pidfd, 0);
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0)
+ pause();
+
+ /* Request supported_mask field */
+ ASSERT_EQ(ioctl(pidfd, PIDFD_GET_INFO, &info), 0);
+
+ /* Verify PIDFD_INFO_SUPPORTED_MASK is set in the reply */
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_SUPPORTED_MASK));
+
+ /* Verify supported_mask contains expected flags */
+ ASSERT_TRUE(!!(info.supported_mask & PIDFD_INFO_PID));
+ ASSERT_TRUE(!!(info.supported_mask & PIDFD_INFO_CREDS));
+ ASSERT_TRUE(!!(info.supported_mask & PIDFD_INFO_CGROUPID));
+ ASSERT_TRUE(!!(info.supported_mask & PIDFD_INFO_EXIT));
+ ASSERT_TRUE(!!(info.supported_mask & PIDFD_INFO_COREDUMP));
+ ASSERT_TRUE(!!(info.supported_mask & PIDFD_INFO_SUPPORTED_MASK));
+ ASSERT_TRUE(!!(info.supported_mask & PIDFD_INFO_COREDUMP_SIGNAL));
+
+ /* Clean up */
+ sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0);
+ sys_waitid(P_PIDFD, pidfd, NULL, WEXITED);
+ close(pidfd);
+}
+
+/*
+ * Test: PIDFD_INFO_SUPPORTED_MASK always available
+ *
+ * Verify that supported_mask is returned even when other fields are requested.
+ */
+TEST(supported_mask_with_other_fields)
+{
+ struct pidfd_info info = {
+ .mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_SUPPORTED_MASK,
+ };
+ int pidfd;
+ pid_t pid;
+
+ pid = create_child(&pidfd, 0);
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0)
+ pause();
+
+ ASSERT_EQ(ioctl(pidfd, PIDFD_GET_INFO, &info), 0);
+
+ /* Both fields should be present */
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_CGROUPID));
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_SUPPORTED_MASK));
+ ASSERT_NE(info.supported_mask, 0);
+
+ /* Clean up */
+ sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0);
+ sys_waitid(P_PIDFD, pidfd, NULL, WEXITED);
+ close(pidfd);
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/rcutorture/bin/jitter.sh b/tools/testing/selftests/rcutorture/bin/jitter.sh
index fd1ffaa5a135..3c1e5d3f8805 100755
--- a/tools/testing/selftests/rcutorture/bin/jitter.sh
+++ b/tools/testing/selftests/rcutorture/bin/jitter.sh
@@ -39,6 +39,22 @@ do
fi
done
+# Uses global variables startsecs, startns, endsecs, endns, and limit.
+# Exit code is success for time not yet elapsed and failure otherwise.
+function timecheck {
+ local done=`awk -v limit=$limit \
+ -v startsecs=$startsecs \
+ -v startns=$startns \
+ -v endsecs=$endsecs \
+ -v endns=$endns < /dev/null '
+ BEGIN {
+ delta = (endsecs - startsecs) * 1000 * 1000;
+ delta += int((endns - startns) / 1000);
+ print delta >= limit;
+ }'`
+ return $done
+}
+
while :
do
# Check for done.
@@ -85,15 +101,20 @@ do
n=$(($n+1))
sleep .$sleeptime
- # Spin a random duration
+ # Spin a random duration, but with rather coarse granularity.
limit=`awk -v me=$me -v n=$n -v spinmax=$spinmax 'BEGIN {
srand(n + me + systime());
printf("%06d", int(rand() * spinmax));
}' < /dev/null`
n=$(($n+1))
- for i in {1..$limit}
+ startsecs=`date +%s`
+ startns=`date +%N`
+ endsecs=$startns
+ endns=$endns
+ while timecheck
do
- echo > /dev/null
+ endsecs=`date +%s`
+ endns=`date +%N`
done
done
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-again.sh b/tools/testing/selftests/rcutorture/bin/kvm-again.sh
index 88ca4e368489..b5239b52cb5d 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-again.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-again.sh
@@ -31,7 +31,7 @@ fi
if ! cp "$oldrun/scenarios" $T/scenarios.oldrun
then
# Later on, can reconstitute this from console.log files.
- echo Prior run batches file does not exist: $oldrun/batches
+ echo Prior run scenarios file does not exist: $oldrun/scenarios
exit 1
fi
@@ -68,7 +68,7 @@ usage () {
echo " --datestamp string"
echo " --dryrun"
echo " --duration minutes | <seconds>s | <hours>h | <days>d"
- echo " --link hard|soft|copy"
+ echo " --link hard|soft|copy|inplace|inplace-force"
echo " --remote"
echo " --rundir /new/res/path"
echo "Command line: $scriptname $args"
@@ -121,7 +121,7 @@ do
shift
;;
--link)
- checkarg --link "hard|soft|copy" "$#" "$2" 'hard\|soft\|copy' '^--'
+ checkarg --link "hard|soft|copy|inplace|inplace-force" "$#" "$2" 'hard\|soft\|copy\|inplace\|inplace-force' '^--'
case "$2" in
copy)
arg_link="cp -R"
@@ -132,6 +132,14 @@ do
soft)
arg_link="cp -Rs"
;;
+ inplace)
+ arg_link="inplace"
+ rundir="$oldrun"
+ ;;
+ inplace-force)
+ arg_link="inplace-force"
+ rundir="$oldrun"
+ ;;
esac
shift
;;
@@ -172,21 +180,37 @@ fi
echo ---- Re-run results directory: $rundir
-# Copy old run directory tree over and adjust.
-mkdir -p "`dirname "$rundir"`"
-if ! $arg_link "$oldrun" "$rundir"
-then
- echo "Cannot copy from $oldrun to $rundir."
- usage
-fi
-rm -f "$rundir"/*/{console.log,console.log.diags,qemu_pid,qemu-pid,qemu-retval,Warnings,kvm-test-1-run.sh.out,kvm-test-1-run-qemu.sh.out,vmlinux} "$rundir"/log
-touch "$rundir/log"
-echo $scriptname $args | tee -a "$rundir/log"
-echo $oldrun > "$rundir/re-run"
-if ! test -d "$rundir/../../bin"
+if test "$oldrun" != "$rundir"
then
- $arg_link "$oldrun/../../bin" "$rundir/../.."
+ # Copy old run directory tree over and adjust.
+ mkdir -p "`dirname "$rundir"`"
+ if ! $arg_link "$oldrun" "$rundir"
+ then
+ echo "Cannot copy from $oldrun to $rundir."
+ usage
+ fi
+ rm -f "$rundir"/*/{console.log,console.log.diags,qemu_pid,qemu-pid,qemu-retval,Warnings,kvm-test-1-run.sh.out,kvm-test-1-run-qemu.sh.out,vmlinux} "$rundir"/log
+ touch "$rundir/log"
+ echo $scriptname $args | tee -a "$rundir/log"
+ echo $oldrun > "$rundir/re-run"
+ if ! test -d "$rundir/../../bin"
+ then
+ $arg_link "$oldrun/../../bin" "$rundir/../.."
+ fi
+else
+ # Check for a run having already happened.
+ find "$rundir" -name console.log -print > $T/oldrun-console.log
+ if test -s $T/oldrun-console.log
+ then
+ echo Run already took place in $rundir
+ if test "$arg_link" = inplace
+ then
+ usage
+ fi
+ fi
fi
+
+# Find runs to be done based on their qemu-cmd files.
for i in $rundir/*/qemu-cmd
do
cp "$i" $T
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-series.sh b/tools/testing/selftests/rcutorture/bin/kvm-series.sh
new file mode 100755
index 000000000000..2ff905a1853b
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-series.sh
@@ -0,0 +1,116 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Usage: kvm-series.sh config-list commit-id-list [ kvm.sh parameters ]
+#
+# Tests the specified list of unadorned configs ("TREE01 SRCU-P" but not
+# "CFLIST" or "3*TRACE01") and an indication of a set of commits to test,
+# then runs each commit through the specified list of commits using kvm.sh.
+# The runs are grouped into a -series/config/commit directory tree.
+# Each run defaults to a duration of one minute.
+#
+# Run in top-level Linux source directory. Please note that this is in
+# no way a replacement for "git bisect"!!!
+#
+# This script is intended to replace kvm-check-branches.sh by providing
+# ease of use and faster execution.
+
+T="`mktemp -d ${TMPDIR-/tmp}/kvm-series.sh.XXXXXX`"
+trap 'rm -rf $T' 0
+
+scriptname=$0
+args="$*"
+
+config_list="${1}"
+if test -z "${config_list}"
+then
+ echo "$0: Need a quoted list of --config arguments for first argument."
+ exit 1
+fi
+if test -z "${config_list}" || echo "${config_list}" | grep -q '\*'
+then
+ echo "$0: Repetition ('*') not allowed in config list."
+ exit 1
+fi
+
+commit_list="${2}"
+if test -z "${commit_list}"
+then
+ echo "$0: Need a list of commits (e.g., HEAD^^^..) for second argument."
+ exit 2
+fi
+git log --pretty=format:"%h" "${commit_list}" > $T/commits
+ret=$?
+if test "${ret}" -ne 0
+then
+ echo "$0: Invalid commit list ('${commit_list}')."
+ exit 2
+fi
+sha1_list=`cat $T/commits`
+
+shift
+shift
+
+RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
+PATH=${RCUTORTURE}/bin:$PATH; export PATH
+. functions.sh
+
+ret=0
+nfail=0
+nsuccess=0
+faillist=
+successlist=
+cursha1="`git rev-parse --abbrev-ref HEAD`"
+ds="`date +%Y.%m.%d-%H.%M.%S`-series"
+startdate="`date`"
+starttime="`get_starttime`"
+
+echo " --- " $scriptname $args | tee -a $T/log
+echo " --- Results directory: " $ds | tee -a $T/log
+
+for config in ${config_list}
+do
+ sha_n=0
+ for sha in ${sha1_list}
+ do
+ sha1=${sha_n}.${sha} # Enable "sort -k1nr" to list commits in order.
+ echo Starting ${config}/${sha1} at `date` | tee -a $T/log
+ git checkout "${sha}"
+ time tools/testing/selftests/rcutorture/bin/kvm.sh --configs "$config" --datestamp "$ds/${config}/${sha1}" --duration 1 "$@"
+ curret=$?
+ if test "${curret}" -ne 0
+ then
+ nfail=$((nfail+1))
+ faillist="$faillist ${config}/${sha1}(${curret})"
+ else
+ nsuccess=$((nsuccess+1))
+ successlist="$successlist ${config}/${sha1}"
+ # Successful run, so remove large files.
+ rm -f ${RCUTORTURE}/$ds/${config}/${sha1}/{vmlinux,bzImage,System.map,Module.symvers}
+ fi
+ if test "${ret}" -eq 0
+ then
+ ret=${curret}
+ fi
+ sha_n=$((sha_n+1))
+ done
+done
+git checkout "${cursha1}"
+
+echo ${nsuccess} SUCCESSES: | tee -a $T/log
+echo ${successlist} | fmt | tee -a $T/log
+echo | tee -a $T/log
+echo ${nfail} FAILURES: | tee -a $T/log
+echo ${faillist} | fmt | tee -a $T/log
+if test -n "${faillist}"
+then
+ echo | tee -a $T/log
+ echo Failures across commits: | tee -a $T/log
+ echo ${faillist} | tr ' ' '\012' | sed -e 's,^[^/]*/,,' -e 's/([0-9]*)//' |
+ sort | uniq -c | sort -k2n | tee -a $T/log
+fi
+echo Started at $startdate, ended at `date`, duration `get_starttime_duration $starttime`. | tee -a $T/log
+echo Summary: Successes: ${nsuccess} Failures: ${nfail} | tee -a $T/log
+cp $T/log tools/testing/selftests/rcutorture/res/${ds}
+
+exit "${ret}"
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 617cba339d28..fff15821c44c 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -199,7 +199,7 @@ do
fi
;;
--kconfig|--kconfigs)
- checkarg --kconfig "(Kconfig options)" $# "$2" '^\(#CHECK#\)\?CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\|"[^"]*"\)\( \+\(#CHECK#\)\?CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\|"[^"]*"\)\)* *$' '^error$'
+ checkarg --kconfig "(Kconfig options)" $# "$2" '^\(#CHECK#\)\?CONFIG_[A-Z0-9_]\+=\([ynm]\|-\?[0-9]\+\|"[^"]*"\)\( \+\(#CHECK#\)\?CONFIG_[A-Z0-9_]\+=\([ynm]\|-\?[0-9]\+\|"[^"]*"\)\)* *$' '^error$'
TORTURE_KCONFIG_ARG="`echo "$TORTURE_KCONFIG_ARG $2" | sed -e 's/^ *//' -e 's/ *$//'`"
shift
;;
diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index 611bc03a8dc7..a33ba109ef0b 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -94,6 +94,7 @@ usage () {
echo " --do-kvfree / --do-no-kvfree / --no-kvfree"
echo " --do-locktorture / --do-no-locktorture / --no-locktorture"
echo " --do-none"
+ echo " --do-normal / --do-no-normal / --no-normal"
echo " --do-rcuscale / --do-no-rcuscale / --no-rcuscale"
echo " --do-rcutasksflavors / --do-no-rcutasksflavors / --no-rcutasksflavors"
echo " --do-rcutorture / --do-no-rcutorture / --no-rcutorture"
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 b/tools/testing/selftests/rcutorture/configs/rcu/TREE04
index dc4985064b3a..67caf4276bb0 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04
@@ -16,3 +16,4 @@ CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
CONFIG_RCU_EXPERT=y
CONFIG_RCU_EQS_DEBUG=y
CONFIG_RCU_LAZY=y
+CONFIG_RCU_DYNTICKS_TORTURE=y
diff --git a/tools/testing/selftests/rseq/rseq-s390.h b/tools/testing/selftests/rseq/rseq-s390.h
index 33baaa9f9997..e7b858cd3736 100644
--- a/tools/testing/selftests/rseq/rseq-s390.h
+++ b/tools/testing/selftests/rseq/rseq-s390.h
@@ -28,8 +28,6 @@ do { \
RSEQ_WRITE_ONCE(*(p), v); \
} while (0)
-#ifdef __s390x__
-
#define LONG_L "lg"
#define LONG_S "stg"
#define LONG_LT_R "ltgr"
@@ -63,43 +61,6 @@ do { \
".quad " __rseq_str(start_ip) ", " __rseq_str(exit_ip) "\n\t" \
".popsection\n\t"
-#elif __s390__
-
-#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \
- start_ip, post_commit_offset, abort_ip) \
- ".pushsection __rseq_cs, \"aw\"\n\t" \
- ".balign 32\n\t" \
- __rseq_str(label) ":\n\t" \
- ".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
- ".long 0x0, " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) "\n\t" \
- ".popsection\n\t" \
- ".pushsection __rseq_cs_ptr_array, \"aw\"\n\t" \
- ".long 0x0, " __rseq_str(label) "b\n\t" \
- ".popsection\n\t"
-
-/*
- * Exit points of a rseq critical section consist of all instructions outside
- * of the critical section where a critical section can either branch to or
- * reach through the normal course of its execution. The abort IP and the
- * post-commit IP are already part of the __rseq_cs section and should not be
- * explicitly defined as additional exit points. Knowing all exit points is
- * useful to assist debuggers stepping over the critical section.
- */
-#define RSEQ_ASM_DEFINE_EXIT_POINT(start_ip, exit_ip) \
- ".pushsection __rseq_exit_point_array, \"aw\"\n\t" \
- ".long 0x0, " __rseq_str(start_ip) ", 0x0, " __rseq_str(exit_ip) "\n\t" \
- ".popsection\n\t"
-
-#define LONG_L "l"
-#define LONG_S "st"
-#define LONG_LT_R "ltr"
-#define LONG_CMP "c"
-#define LONG_CMP_R "cr"
-#define LONG_ADDI "ahi"
-#define LONG_ADD_R "ar"
-
-#endif
-
#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \
__RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \
(post_commit_ip - start_ip), abort_ip)
diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
index 998e5a2f4579..0091bcd91c2c 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
@@ -961,5 +961,49 @@
"teardown": [
"$TC qdisc del dev $DUMMY root"
]
+ },
+ {
+ "id": "4989",
+ "name": "Try to add an fq child to an ingress qdisc",
+ "category": [
+ "qdisc",
+ "ingress"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle ffff:0 ingress"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY parent ffff:0 handle ffe0:0 fq",
+ "expExitCode": "2",
+ "verifyCmd": "$TC -j qdisc ls dev $DUMMY handle ffe0:",
+ "matchJSON": [],
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress"
+ ]
+ },
+ {
+ "id": "c2b0",
+ "name": "Try to add an fq child to a clsact qdisc",
+ "category": [
+ "qdisc",
+ "ingress"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle ffff:0 clsact"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY parent ffff:0 handle ffe0:0 fq",
+ "expExitCode": "2",
+ "verifyCmd": "$TC -j qdisc ls dev $DUMMY handle ffe0:",
+ "matchJSON": [],
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY clsact"
+ ]
}
]
diff --git a/tools/testing/selftests/timers/nanosleep.c b/tools/testing/selftests/timers/nanosleep.c
index 252c6308c569..10badae13ebe 100644
--- a/tools/testing/selftests/timers/nanosleep.c
+++ b/tools/testing/selftests/timers/nanosleep.c
@@ -116,6 +116,56 @@ int nanosleep_test(int clockid, long long ns)
return 0;
}
+static void dummy_event_handler(int val)
+{
+ /* No action needed */
+}
+
+static int nanosleep_test_remaining(int clockid)
+{
+ struct timespec rqtp = {}, rmtp = {};
+ struct itimerspec itimer = {};
+ struct sigaction sa = {};
+ timer_t timer;
+ int ret;
+
+ sa.sa_handler = dummy_event_handler;
+ ret = sigaction(SIGALRM, &sa, NULL);
+ if (ret)
+ return -1;
+
+ ret = timer_create(clockid, NULL, &timer);
+ if (ret)
+ return -1;
+
+ itimer.it_value.tv_nsec = NSEC_PER_SEC / 4;
+ ret = timer_settime(timer, 0, &itimer, NULL);
+ if (ret)
+ return -1;
+
+ rqtp.tv_nsec = NSEC_PER_SEC / 2;
+ ret = clock_nanosleep(clockid, 0, &rqtp, &rmtp);
+ if (ret != EINTR)
+ return -1;
+
+ ret = timer_delete(timer);
+ if (ret)
+ return -1;
+
+ sa.sa_handler = SIG_DFL;
+ ret = sigaction(SIGALRM, &sa, NULL);
+ if (ret)
+ return -1;
+
+ if (!in_order((struct timespec) {}, rmtp))
+ return -1;
+
+ if (!in_order(rmtp, rqtp))
+ return -1;
+
+ return 0;
+}
+
int main(int argc, char **argv)
{
long long length;
@@ -150,6 +200,11 @@ int main(int argc, char **argv)
}
length *= 100;
}
+ ret = nanosleep_test_remaining(clockid);
+ if (ret < 0) {
+ ksft_test_result_fail("%-31s\n", clockstring(clockid));
+ ksft_exit_fail();
+ }
ksft_test_result_pass("%-31s\n", clockstring(clockid));
next:
ret = 0;
diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c
index f0eceb0faf34..a563c438ac79 100644
--- a/tools/testing/selftests/timers/posix_timers.c
+++ b/tools/testing/selftests/timers/posix_timers.c
@@ -18,6 +18,7 @@
#include <time.h>
#include <include/vdso/time64.h>
#include <pthread.h>
+#include <stdbool.h>
#include "../kselftest.h"
@@ -670,8 +671,14 @@ static void check_timer_create_exact(void)
int main(int argc, char **argv)
{
+ bool run_sig_ign_tests = ksft_min_kernel_version(6, 13);
+
ksft_print_header();
- ksft_set_plan(19);
+ if (run_sig_ign_tests) {
+ ksft_set_plan(19);
+ } else {
+ ksft_set_plan(10);
+ }
ksft_print_msg("Testing posix timers. False negative may happen on CPU execution \n");
ksft_print_msg("based timers if other threads run on the CPU...\n");
@@ -695,15 +702,20 @@ int main(int argc, char **argv)
check_timer_create(CLOCK_PROCESS_CPUTIME_ID, "CLOCK_PROCESS_CPUTIME_ID");
check_timer_distribution();
- check_sig_ign(0);
- check_sig_ign(1);
- check_rearm();
- check_delete();
- check_sigev_none(CLOCK_MONOTONIC, "CLOCK_MONOTONIC");
- check_sigev_none(CLOCK_PROCESS_CPUTIME_ID, "CLOCK_PROCESS_CPUTIME_ID");
- check_gettime(CLOCK_MONOTONIC, "CLOCK_MONOTONIC");
- check_gettime(CLOCK_PROCESS_CPUTIME_ID, "CLOCK_PROCESS_CPUTIME_ID");
- check_gettime(CLOCK_THREAD_CPUTIME_ID, "CLOCK_THREAD_CPUTIME_ID");
+ if (run_sig_ign_tests) {
+ check_sig_ign(0);
+ check_sig_ign(1);
+ check_rearm();
+ check_delete();
+ check_sigev_none(CLOCK_MONOTONIC, "CLOCK_MONOTONIC");
+ check_sigev_none(CLOCK_PROCESS_CPUTIME_ID, "CLOCK_PROCESS_CPUTIME_ID");
+ check_gettime(CLOCK_MONOTONIC, "CLOCK_MONOTONIC");
+ check_gettime(CLOCK_PROCESS_CPUTIME_ID, "CLOCK_PROCESS_CPUTIME_ID");
+ check_gettime(CLOCK_THREAD_CPUTIME_ID, "CLOCK_THREAD_CPUTIME_ID");
+ } else {
+ ksft_print_msg("Skipping SIG_IGN tests on kernel < 6.13\n");
+ }
+
check_overrun(CLOCK_MONOTONIC, "CLOCK_MONOTONIC");
check_overrun(CLOCK_PROCESS_CPUTIME_ID, "CLOCK_PROCESS_CPUTIME_ID");
check_overrun(CLOCK_THREAD_CPUTIME_ID, "CLOCK_THREAD_CPUTIME_ID");
diff --git a/tools/testing/selftests/user_events/perf_test.c b/tools/testing/selftests/user_events/perf_test.c
index 5288e768b207..68625362add2 100644
--- a/tools/testing/selftests/user_events/perf_test.c
+++ b/tools/testing/selftests/user_events/perf_test.c
@@ -236,7 +236,7 @@ TEST_F(user, perf_empty_events) {
ASSERT_EQ(1 << reg.enable_bit, self->check);
/* Ensure write shows up at correct offset */
- ASSERT_NE(-1, write(self->data_fd, &reg.write_index,
+ ASSERT_NE(-1, write(self->data_fd, (void *)&reg.write_index,
sizeof(reg.write_index)));
val = (void *)(((char *)perf_page) + perf_page->data_offset);
ASSERT_EQ(PERF_RECORD_SAMPLE, *val);
diff --git a/tools/testing/selftests/vDSO/vdso_config.h b/tools/testing/selftests/vDSO/vdso_config.h
index 5fdd0f362337..50c261005111 100644
--- a/tools/testing/selftests/vDSO/vdso_config.h
+++ b/tools/testing/selftests/vDSO/vdso_config.h
@@ -25,10 +25,6 @@
#define VDSO_VERSION 1
#define VDSO_NAMES 0
#define VDSO_32BIT 1
-#elif defined (__s390__) && !defined(__s390x__)
-#define VDSO_VERSION 2
-#define VDSO_NAMES 0
-#define VDSO_32BIT 1
#elif defined (__s390x__)
#define VDSO_VERSION 2
#define VDSO_NAMES 0
diff --git a/tools/testing/selftests/vfio/lib/include/vfio_util.h b/tools/testing/selftests/vfio/lib/include/vfio_util.h
index ed31606e01b7..69ec0c856481 100644
--- a/tools/testing/selftests/vfio/lib/include/vfio_util.h
+++ b/tools/testing/selftests/vfio/lib/include/vfio_util.h
@@ -4,9 +4,12 @@
#include <fcntl.h>
#include <string.h>
-#include <linux/vfio.h>
+
+#include <uapi/linux/types.h>
+#include <linux/iommufd.h>
#include <linux/list.h>
#include <linux/pci_regs.h>
+#include <linux/vfio.h>
#include "../../../kselftest.h"
@@ -185,6 +188,13 @@ struct vfio_pci_device {
struct vfio_pci_driver driver;
};
+struct iova_allocator {
+ struct iommu_iova_range *ranges;
+ u32 nranges;
+ u32 range_idx;
+ u64 range_offset;
+};
+
/*
* Return the BDF string of the device that the test should use.
*
@@ -206,10 +216,36 @@ struct vfio_pci_device *vfio_pci_device_init(const char *bdf, const char *iommu_
void vfio_pci_device_cleanup(struct vfio_pci_device *device);
void vfio_pci_device_reset(struct vfio_pci_device *device);
-void vfio_pci_dma_map(struct vfio_pci_device *device,
- struct vfio_dma_region *region);
-void vfio_pci_dma_unmap(struct vfio_pci_device *device,
- struct vfio_dma_region *region);
+struct iommu_iova_range *vfio_pci_iova_ranges(struct vfio_pci_device *device,
+ u32 *nranges);
+
+struct iova_allocator *iova_allocator_init(struct vfio_pci_device *device);
+void iova_allocator_cleanup(struct iova_allocator *allocator);
+iova_t iova_allocator_alloc(struct iova_allocator *allocator, size_t size);
+
+int __vfio_pci_dma_map(struct vfio_pci_device *device,
+ struct vfio_dma_region *region);
+int __vfio_pci_dma_unmap(struct vfio_pci_device *device,
+ struct vfio_dma_region *region,
+ u64 *unmapped);
+int __vfio_pci_dma_unmap_all(struct vfio_pci_device *device, u64 *unmapped);
+
+static inline void vfio_pci_dma_map(struct vfio_pci_device *device,
+ struct vfio_dma_region *region)
+{
+ VFIO_ASSERT_EQ(__vfio_pci_dma_map(device, region), 0);
+}
+
+static inline void vfio_pci_dma_unmap(struct vfio_pci_device *device,
+ struct vfio_dma_region *region)
+{
+ VFIO_ASSERT_EQ(__vfio_pci_dma_unmap(device, region, NULL), 0);
+}
+
+static inline void vfio_pci_dma_unmap_all(struct vfio_pci_device *device)
+{
+ VFIO_ASSERT_EQ(__vfio_pci_dma_unmap_all(device, NULL), 0);
+}
void vfio_pci_config_access(struct vfio_pci_device *device, bool write,
size_t config, size_t size, void *data);
diff --git a/tools/testing/selftests/vfio/lib/vfio_pci_device.c b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
index 0921b2451ba5..b479a359da12 100644
--- a/tools/testing/selftests/vfio/lib/vfio_pci_device.c
+++ b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
@@ -2,6 +2,7 @@
#include <dirent.h>
#include <fcntl.h>
#include <libgen.h>
+#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
@@ -11,11 +12,12 @@
#include <sys/mman.h>
#include <uapi/linux/types.h>
+#include <linux/iommufd.h>
#include <linux/limits.h>
#include <linux/mman.h>
+#include <linux/overflow.h>
#include <linux/types.h>
#include <linux/vfio.h>
-#include <linux/iommufd.h>
#include "../../../kselftest.h"
#include <vfio_util.h>
@@ -28,6 +30,249 @@
VFIO_ASSERT_EQ(__ret, 0, "ioctl(%s, %s, %s) returned %d\n", #_fd, #_op, #_arg, __ret); \
} while (0)
+static struct vfio_info_cap_header *next_cap_hdr(void *buf, u32 bufsz,
+ u32 *cap_offset)
+{
+ struct vfio_info_cap_header *hdr;
+
+ if (!*cap_offset)
+ return NULL;
+
+ VFIO_ASSERT_LT(*cap_offset, bufsz);
+ VFIO_ASSERT_GE(bufsz - *cap_offset, sizeof(*hdr));
+
+ hdr = (struct vfio_info_cap_header *)((u8 *)buf + *cap_offset);
+ *cap_offset = hdr->next;
+
+ return hdr;
+}
+
+static struct vfio_info_cap_header *vfio_iommu_info_cap_hdr(struct vfio_iommu_type1_info *info,
+ u16 cap_id)
+{
+ struct vfio_info_cap_header *hdr;
+ u32 cap_offset = info->cap_offset;
+ u32 max_depth;
+ u32 depth = 0;
+
+ if (!(info->flags & VFIO_IOMMU_INFO_CAPS))
+ return NULL;
+
+ if (cap_offset)
+ VFIO_ASSERT_GE(cap_offset, sizeof(*info));
+
+ max_depth = (info->argsz - sizeof(*info)) / sizeof(*hdr);
+
+ while ((hdr = next_cap_hdr(info, info->argsz, &cap_offset))) {
+ depth++;
+ VFIO_ASSERT_LE(depth, max_depth, "Capability chain contains a cycle\n");
+
+ if (hdr->id == cap_id)
+ return hdr;
+ }
+
+ return NULL;
+}
+
+/* Return buffer including capability chain, if present. Free with free() */
+static struct vfio_iommu_type1_info *vfio_iommu_get_info(struct vfio_pci_device *device)
+{
+ struct vfio_iommu_type1_info *info;
+
+ info = malloc(sizeof(*info));
+ VFIO_ASSERT_NOT_NULL(info);
+
+ *info = (struct vfio_iommu_type1_info) {
+ .argsz = sizeof(*info),
+ };
+
+ ioctl_assert(device->container_fd, VFIO_IOMMU_GET_INFO, info);
+ VFIO_ASSERT_GE(info->argsz, sizeof(*info));
+
+ info = realloc(info, info->argsz);
+ VFIO_ASSERT_NOT_NULL(info);
+
+ ioctl_assert(device->container_fd, VFIO_IOMMU_GET_INFO, info);
+ VFIO_ASSERT_GE(info->argsz, sizeof(*info));
+
+ return info;
+}
+
+/*
+ * Return iova ranges for the device's container. Normalize vfio_iommu_type1 to
+ * report iommufd's iommu_iova_range. Free with free().
+ */
+static struct iommu_iova_range *vfio_iommu_iova_ranges(struct vfio_pci_device *device,
+ u32 *nranges)
+{
+ struct vfio_iommu_type1_info_cap_iova_range *cap_range;
+ struct vfio_iommu_type1_info *info;
+ struct vfio_info_cap_header *hdr;
+ struct iommu_iova_range *ranges = NULL;
+
+ info = vfio_iommu_get_info(device);
+ hdr = vfio_iommu_info_cap_hdr(info, VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE);
+ VFIO_ASSERT_NOT_NULL(hdr);
+
+ cap_range = container_of(hdr, struct vfio_iommu_type1_info_cap_iova_range, header);
+ VFIO_ASSERT_GT(cap_range->nr_iovas, 0);
+
+ ranges = calloc(cap_range->nr_iovas, sizeof(*ranges));
+ VFIO_ASSERT_NOT_NULL(ranges);
+
+ for (u32 i = 0; i < cap_range->nr_iovas; i++) {
+ ranges[i] = (struct iommu_iova_range){
+ .start = cap_range->iova_ranges[i].start,
+ .last = cap_range->iova_ranges[i].end,
+ };
+ }
+
+ *nranges = cap_range->nr_iovas;
+
+ free(info);
+ return ranges;
+}
+
+/* Return iova ranges of the device's IOAS. Free with free() */
+static struct iommu_iova_range *iommufd_iova_ranges(struct vfio_pci_device *device,
+ u32 *nranges)
+{
+ struct iommu_iova_range *ranges;
+ int ret;
+
+ struct iommu_ioas_iova_ranges query = {
+ .size = sizeof(query),
+ .ioas_id = device->ioas_id,
+ };
+
+ ret = ioctl(device->iommufd, IOMMU_IOAS_IOVA_RANGES, &query);
+ VFIO_ASSERT_EQ(ret, -1);
+ VFIO_ASSERT_EQ(errno, EMSGSIZE);
+ VFIO_ASSERT_GT(query.num_iovas, 0);
+
+ ranges = calloc(query.num_iovas, sizeof(*ranges));
+ VFIO_ASSERT_NOT_NULL(ranges);
+
+ query.allowed_iovas = (uintptr_t)ranges;
+
+ ioctl_assert(device->iommufd, IOMMU_IOAS_IOVA_RANGES, &query);
+ *nranges = query.num_iovas;
+
+ return ranges;
+}
+
+static int iova_range_comp(const void *a, const void *b)
+{
+ const struct iommu_iova_range *ra = a, *rb = b;
+
+ if (ra->start < rb->start)
+ return -1;
+
+ if (ra->start > rb->start)
+ return 1;
+
+ return 0;
+}
+
+/* Return sorted IOVA ranges of the device. Free with free(). */
+struct iommu_iova_range *vfio_pci_iova_ranges(struct vfio_pci_device *device,
+ u32 *nranges)
+{
+ struct iommu_iova_range *ranges;
+
+ if (device->iommufd)
+ ranges = iommufd_iova_ranges(device, nranges);
+ else
+ ranges = vfio_iommu_iova_ranges(device, nranges);
+
+ if (!ranges)
+ return NULL;
+
+ VFIO_ASSERT_GT(*nranges, 0);
+
+ /* Sort and check that ranges are sane and non-overlapping */
+ qsort(ranges, *nranges, sizeof(*ranges), iova_range_comp);
+ VFIO_ASSERT_LT(ranges[0].start, ranges[0].last);
+
+ for (u32 i = 1; i < *nranges; i++) {
+ VFIO_ASSERT_LT(ranges[i].start, ranges[i].last);
+ VFIO_ASSERT_LT(ranges[i - 1].last, ranges[i].start);
+ }
+
+ return ranges;
+}
+
+struct iova_allocator *iova_allocator_init(struct vfio_pci_device *device)
+{
+ struct iova_allocator *allocator;
+ struct iommu_iova_range *ranges;
+ u32 nranges;
+
+ ranges = vfio_pci_iova_ranges(device, &nranges);
+ VFIO_ASSERT_NOT_NULL(ranges);
+
+ allocator = malloc(sizeof(*allocator));
+ VFIO_ASSERT_NOT_NULL(allocator);
+
+ *allocator = (struct iova_allocator){
+ .ranges = ranges,
+ .nranges = nranges,
+ .range_idx = 0,
+ .range_offset = 0,
+ };
+
+ return allocator;
+}
+
+void iova_allocator_cleanup(struct iova_allocator *allocator)
+{
+ free(allocator->ranges);
+ free(allocator);
+}
+
+iova_t iova_allocator_alloc(struct iova_allocator *allocator, size_t size)
+{
+ VFIO_ASSERT_GT(size, 0, "Invalid size arg, zero\n");
+ VFIO_ASSERT_EQ(size & (size - 1), 0, "Invalid size arg, non-power-of-2\n");
+
+ for (;;) {
+ struct iommu_iova_range *range;
+ iova_t iova, last;
+
+ VFIO_ASSERT_LT(allocator->range_idx, allocator->nranges,
+ "IOVA allocator out of space\n");
+
+ range = &allocator->ranges[allocator->range_idx];
+ iova = range->start + allocator->range_offset;
+
+ /* Check for sufficient space at the current offset */
+ if (check_add_overflow(iova, size - 1, &last) ||
+ last > range->last)
+ goto next_range;
+
+ /* Align iova to size */
+ iova = last & ~(size - 1);
+
+ /* Check for sufficient space at the aligned iova */
+ if (check_add_overflow(iova, size - 1, &last) ||
+ last > range->last)
+ goto next_range;
+
+ if (last == range->last) {
+ allocator->range_idx++;
+ allocator->range_offset = 0;
+ } else {
+ allocator->range_offset = last - range->start + 1;
+ }
+
+ return iova;
+
+next_range:
+ allocator->range_idx++;
+ allocator->range_offset = 0;
+ }
+}
+
iova_t __to_iova(struct vfio_pci_device *device, void *vaddr)
{
struct vfio_dma_region *region;
@@ -141,7 +386,7 @@ static void vfio_pci_irq_get(struct vfio_pci_device *device, u32 index,
ioctl_assert(device->fd, VFIO_DEVICE_GET_IRQ_INFO, irq_info);
}
-static void vfio_iommu_dma_map(struct vfio_pci_device *device,
+static int vfio_iommu_dma_map(struct vfio_pci_device *device,
struct vfio_dma_region *region)
{
struct vfio_iommu_type1_dma_map args = {
@@ -152,10 +397,13 @@ static void vfio_iommu_dma_map(struct vfio_pci_device *device,
.size = region->size,
};
- ioctl_assert(device->container_fd, VFIO_IOMMU_MAP_DMA, &args);
+ if (ioctl(device->container_fd, VFIO_IOMMU_MAP_DMA, &args))
+ return -errno;
+
+ return 0;
}
-static void iommufd_dma_map(struct vfio_pci_device *device,
+static int iommufd_dma_map(struct vfio_pci_device *device,
struct vfio_dma_region *region)
{
struct iommu_ioas_map args = {
@@ -169,54 +417,108 @@ static void iommufd_dma_map(struct vfio_pci_device *device,
.ioas_id = device->ioas_id,
};
- ioctl_assert(device->iommufd, IOMMU_IOAS_MAP, &args);
+ if (ioctl(device->iommufd, IOMMU_IOAS_MAP, &args))
+ return -errno;
+
+ return 0;
}
-void vfio_pci_dma_map(struct vfio_pci_device *device,
+int __vfio_pci_dma_map(struct vfio_pci_device *device,
struct vfio_dma_region *region)
{
+ int ret;
+
if (device->iommufd)
- iommufd_dma_map(device, region);
+ ret = iommufd_dma_map(device, region);
else
- vfio_iommu_dma_map(device, region);
+ ret = vfio_iommu_dma_map(device, region);
+
+ if (ret)
+ return ret;
list_add(&region->link, &device->dma_regions);
+
+ return 0;
}
-static void vfio_iommu_dma_unmap(struct vfio_pci_device *device,
- struct vfio_dma_region *region)
+static int vfio_iommu_dma_unmap(int fd, u64 iova, u64 size, u32 flags,
+ u64 *unmapped)
{
struct vfio_iommu_type1_dma_unmap args = {
.argsz = sizeof(args),
- .iova = region->iova,
- .size = region->size,
+ .iova = iova,
+ .size = size,
+ .flags = flags,
};
- ioctl_assert(device->container_fd, VFIO_IOMMU_UNMAP_DMA, &args);
+ if (ioctl(fd, VFIO_IOMMU_UNMAP_DMA, &args))
+ return -errno;
+
+ if (unmapped)
+ *unmapped = args.size;
+
+ return 0;
}
-static void iommufd_dma_unmap(struct vfio_pci_device *device,
- struct vfio_dma_region *region)
+static int iommufd_dma_unmap(int fd, u64 iova, u64 length, u32 ioas_id,
+ u64 *unmapped)
{
struct iommu_ioas_unmap args = {
.size = sizeof(args),
- .iova = region->iova,
- .length = region->size,
- .ioas_id = device->ioas_id,
+ .iova = iova,
+ .length = length,
+ .ioas_id = ioas_id,
};
- ioctl_assert(device->iommufd, IOMMU_IOAS_UNMAP, &args);
+ if (ioctl(fd, IOMMU_IOAS_UNMAP, &args))
+ return -errno;
+
+ if (unmapped)
+ *unmapped = args.length;
+
+ return 0;
}
-void vfio_pci_dma_unmap(struct vfio_pci_device *device,
- struct vfio_dma_region *region)
+int __vfio_pci_dma_unmap(struct vfio_pci_device *device,
+ struct vfio_dma_region *region, u64 *unmapped)
{
+ int ret;
+
if (device->iommufd)
- iommufd_dma_unmap(device, region);
+ ret = iommufd_dma_unmap(device->iommufd, region->iova,
+ region->size, device->ioas_id,
+ unmapped);
else
- vfio_iommu_dma_unmap(device, region);
+ ret = vfio_iommu_dma_unmap(device->container_fd, region->iova,
+ region->size, 0, unmapped);
+
+ if (ret)
+ return ret;
+
+ list_del_init(&region->link);
+
+ return 0;
+}
+
+int __vfio_pci_dma_unmap_all(struct vfio_pci_device *device, u64 *unmapped)
+{
+ int ret;
+ struct vfio_dma_region *curr, *next;
+
+ if (device->iommufd)
+ ret = iommufd_dma_unmap(device->iommufd, 0, UINT64_MAX,
+ device->ioas_id, unmapped);
+ else
+ ret = vfio_iommu_dma_unmap(device->container_fd, 0, 0,
+ VFIO_DMA_UNMAP_FLAG_ALL, unmapped);
+
+ if (ret)
+ return ret;
+
+ list_for_each_entry_safe(curr, next, &device->dma_regions, link)
+ list_del_init(&curr->link);
- list_del(&region->link);
+ return 0;
}
static void vfio_pci_region_get(struct vfio_pci_device *device, int index,
diff --git a/tools/testing/selftests/vfio/vfio_dma_mapping_test.c b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c
index ab19c54a774d..102603d4407d 100644
--- a/tools/testing/selftests/vfio/vfio_dma_mapping_test.c
+++ b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c
@@ -3,6 +3,8 @@
#include <sys/mman.h>
#include <unistd.h>
+#include <uapi/linux/types.h>
+#include <linux/iommufd.h>
#include <linux/limits.h>
#include <linux/mman.h>
#include <linux/sizes.h>
@@ -93,6 +95,7 @@ static int iommu_mapping_get(const char *bdf, u64 iova,
FIXTURE(vfio_dma_mapping_test) {
struct vfio_pci_device *device;
+ struct iova_allocator *iova_allocator;
};
FIXTURE_VARIANT(vfio_dma_mapping_test) {
@@ -112,13 +115,17 @@ FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(anonymous, 0, 0);
FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(anonymous_hugetlb_2mb, SZ_2M, MAP_HUGETLB | MAP_HUGE_2MB);
FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(anonymous_hugetlb_1gb, SZ_1G, MAP_HUGETLB | MAP_HUGE_1GB);
+#undef FIXTURE_VARIANT_ADD_IOMMU_MODE
+
FIXTURE_SETUP(vfio_dma_mapping_test)
{
self->device = vfio_pci_device_init(device_bdf, variant->iommu_mode);
+ self->iova_allocator = iova_allocator_init(self->device);
}
FIXTURE_TEARDOWN(vfio_dma_mapping_test)
{
+ iova_allocator_cleanup(self->iova_allocator);
vfio_pci_device_cleanup(self->device);
}
@@ -129,6 +136,7 @@ TEST_F(vfio_dma_mapping_test, dma_map_unmap)
struct vfio_dma_region region;
struct iommu_mapping mapping;
u64 mapping_size = size;
+ u64 unmapped;
int rc;
region.vaddr = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0);
@@ -139,7 +147,7 @@ TEST_F(vfio_dma_mapping_test, dma_map_unmap)
else
ASSERT_NE(region.vaddr, MAP_FAILED);
- region.iova = (u64)region.vaddr;
+ region.iova = iova_allocator_alloc(self->iova_allocator, size);
region.size = size;
vfio_pci_dma_map(self->device, &region);
@@ -184,7 +192,9 @@ TEST_F(vfio_dma_mapping_test, dma_map_unmap)
}
unmap:
- vfio_pci_dma_unmap(self->device, &region);
+ rc = __vfio_pci_dma_unmap(self->device, &region, &unmapped);
+ ASSERT_EQ(rc, 0);
+ ASSERT_EQ(unmapped, region.size);
printf("Unmapped IOVA 0x%lx\n", region.iova);
ASSERT_EQ(INVALID_IOVA, __to_iova(self->device, region.vaddr));
ASSERT_NE(0, iommu_mapping_get(device_bdf, region.iova, &mapping));
@@ -192,6 +202,103 @@ unmap:
ASSERT_TRUE(!munmap(region.vaddr, size));
}
+FIXTURE(vfio_dma_map_limit_test) {
+ struct vfio_pci_device *device;
+ struct vfio_dma_region region;
+ size_t mmap_size;
+};
+
+FIXTURE_VARIANT(vfio_dma_map_limit_test) {
+ const char *iommu_mode;
+};
+
+#define FIXTURE_VARIANT_ADD_IOMMU_MODE(_iommu_mode) \
+FIXTURE_VARIANT_ADD(vfio_dma_map_limit_test, _iommu_mode) { \
+ .iommu_mode = #_iommu_mode, \
+}
+
+FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES();
+
+#undef FIXTURE_VARIANT_ADD_IOMMU_MODE
+
+FIXTURE_SETUP(vfio_dma_map_limit_test)
+{
+ struct vfio_dma_region *region = &self->region;
+ struct iommu_iova_range *ranges;
+ u64 region_size = getpagesize();
+ iova_t last_iova;
+ u32 nranges;
+
+ /*
+ * Over-allocate mmap by double the size to provide enough backing vaddr
+ * for overflow tests
+ */
+ self->mmap_size = 2 * region_size;
+
+ self->device = vfio_pci_device_init(device_bdf, variant->iommu_mode);
+ region->vaddr = mmap(NULL, self->mmap_size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ ASSERT_NE(region->vaddr, MAP_FAILED);
+
+ ranges = vfio_pci_iova_ranges(self->device, &nranges);
+ VFIO_ASSERT_NOT_NULL(ranges);
+ last_iova = ranges[nranges - 1].last;
+ free(ranges);
+
+ /* One page prior to the last iova */
+ region->iova = last_iova & ~(region_size - 1);
+ region->size = region_size;
+}
+
+FIXTURE_TEARDOWN(vfio_dma_map_limit_test)
+{
+ vfio_pci_device_cleanup(self->device);
+ ASSERT_EQ(munmap(self->region.vaddr, self->mmap_size), 0);
+}
+
+TEST_F(vfio_dma_map_limit_test, unmap_range)
+{
+ struct vfio_dma_region *region = &self->region;
+ u64 unmapped;
+ int rc;
+
+ vfio_pci_dma_map(self->device, region);
+ ASSERT_EQ(region->iova, to_iova(self->device, region->vaddr));
+
+ rc = __vfio_pci_dma_unmap(self->device, region, &unmapped);
+ ASSERT_EQ(rc, 0);
+ ASSERT_EQ(unmapped, region->size);
+}
+
+TEST_F(vfio_dma_map_limit_test, unmap_all)
+{
+ struct vfio_dma_region *region = &self->region;
+ u64 unmapped;
+ int rc;
+
+ vfio_pci_dma_map(self->device, region);
+ ASSERT_EQ(region->iova, to_iova(self->device, region->vaddr));
+
+ rc = __vfio_pci_dma_unmap_all(self->device, &unmapped);
+ ASSERT_EQ(rc, 0);
+ ASSERT_EQ(unmapped, region->size);
+}
+
+TEST_F(vfio_dma_map_limit_test, overflow)
+{
+ struct vfio_dma_region *region = &self->region;
+ int rc;
+
+ region->iova = ~(iova_t)0 & ~(region->size - 1);
+ region->size = self->mmap_size;
+
+ rc = __vfio_pci_dma_map(self->device, region);
+ ASSERT_EQ(rc, -EOVERFLOW);
+
+ rc = __vfio_pci_dma_unmap(self->device, region, NULL);
+ ASSERT_EQ(rc, -EOVERFLOW);
+}
+
int main(int argc, char *argv[])
{
device_bdf = vfio_selftests_get_bdf(&argc, argv);
diff --git a/tools/testing/selftests/vfio/vfio_pci_driver_test.c b/tools/testing/selftests/vfio/vfio_pci_driver_test.c
index 2dbd70b7db62..f69eec8b928d 100644
--- a/tools/testing/selftests/vfio/vfio_pci_driver_test.c
+++ b/tools/testing/selftests/vfio/vfio_pci_driver_test.c
@@ -19,6 +19,7 @@ static const char *device_bdf;
} while (0)
static void region_setup(struct vfio_pci_device *device,
+ struct iova_allocator *iova_allocator,
struct vfio_dma_region *region, u64 size)
{
const int flags = MAP_SHARED | MAP_ANONYMOUS;
@@ -29,7 +30,7 @@ static void region_setup(struct vfio_pci_device *device,
VFIO_ASSERT_NE(vaddr, MAP_FAILED);
region->vaddr = vaddr;
- region->iova = (u64)vaddr;
+ region->iova = iova_allocator_alloc(iova_allocator, size);
region->size = size;
vfio_pci_dma_map(device, region);
@@ -44,6 +45,7 @@ static void region_teardown(struct vfio_pci_device *device,
FIXTURE(vfio_pci_driver_test) {
struct vfio_pci_device *device;
+ struct iova_allocator *iova_allocator;
struct vfio_dma_region memcpy_region;
void *vaddr;
int msi_fd;
@@ -72,14 +74,15 @@ FIXTURE_SETUP(vfio_pci_driver_test)
struct vfio_pci_driver *driver;
self->device = vfio_pci_device_init(device_bdf, variant->iommu_mode);
+ self->iova_allocator = iova_allocator_init(self->device);
driver = &self->device->driver;
- region_setup(self->device, &self->memcpy_region, SZ_1G);
- region_setup(self->device, &driver->region, SZ_2M);
+ region_setup(self->device, self->iova_allocator, &self->memcpy_region, SZ_1G);
+ region_setup(self->device, self->iova_allocator, &driver->region, SZ_2M);
/* Any IOVA that doesn't overlap memcpy_region and driver->region. */
- self->unmapped_iova = 8UL * SZ_1G;
+ self->unmapped_iova = iova_allocator_alloc(self->iova_allocator, SZ_1G);
vfio_pci_driver_init(self->device);
self->msi_fd = self->device->msi_eventfds[driver->msi];
@@ -108,6 +111,7 @@ FIXTURE_TEARDOWN(vfio_pci_driver_test)
region_teardown(self->device, &self->memcpy_region);
region_teardown(self->device, &driver->region);
+ iova_allocator_cleanup(self->iova_allocator);
vfio_pci_device_cleanup(self->device);
}
diff --git a/tools/testing/selftests/vsock/vmtest.sh b/tools/testing/selftests/vsock/vmtest.sh
index edacebfc1632..8ceeb8a7894f 100755
--- a/tools/testing/selftests/vsock/vmtest.sh
+++ b/tools/testing/selftests/vsock/vmtest.sh
@@ -389,9 +389,9 @@ run_test() {
local rc
host_oops_cnt_before=$(dmesg | grep -c -i 'Oops')
- host_warn_cnt_before=$(dmesg --level=warn | wc -l)
+ host_warn_cnt_before=$(dmesg --level=warn | grep -c -i 'vsock')
vm_oops_cnt_before=$(vm_ssh -- dmesg | grep -c -i 'Oops')
- vm_warn_cnt_before=$(vm_ssh -- dmesg --level=warn | wc -l)
+ vm_warn_cnt_before=$(vm_ssh -- dmesg --level=warn | grep -c -i 'vsock')
name=$(echo "${1}" | awk '{ print $1 }')
eval test_"${name}"
@@ -403,7 +403,7 @@ run_test() {
rc=$KSFT_FAIL
fi
- host_warn_cnt_after=$(dmesg --level=warn | wc -l)
+ host_warn_cnt_after=$(dmesg --level=warn | grep -c -i 'vsock')
if [[ ${host_warn_cnt_after} -gt ${host_warn_cnt_before} ]]; then
echo "FAIL: kernel warning detected on host" | log_host "${name}"
rc=$KSFT_FAIL
@@ -415,7 +415,7 @@ run_test() {
rc=$KSFT_FAIL
fi
- vm_warn_cnt_after=$(vm_ssh -- dmesg --level=warn | wc -l)
+ vm_warn_cnt_after=$(vm_ssh -- dmesg --level=warn | grep -c -i 'vsock')
if [[ ${vm_warn_cnt_after} -gt ${vm_warn_cnt_before} ]]; then
echo "FAIL: kernel warning detected on vm" | log_host "${name}"
rc=$KSFT_FAIL
diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c
index 05e1e6774fba..918eaec8bfbe 100644
--- a/tools/testing/selftests/x86/test_vsyscall.c
+++ b/tools/testing/selftests/x86/test_vsyscall.c
@@ -308,12 +308,13 @@ static void test_getcpu(int cpu)
#ifdef __x86_64__
static jmp_buf jmpbuf;
-static volatile unsigned long segv_err;
+static volatile unsigned long segv_err, segv_trapno;
static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
{
ucontext_t *ctx = (ucontext_t *)ctx_void;
+ segv_trapno = ctx->uc_mcontext.gregs[REG_TRAPNO];
segv_err = ctx->uc_mcontext.gregs[REG_ERR];
siglongjmp(jmpbuf, 1);
}
@@ -336,7 +337,8 @@ static void test_vsys_r(void)
else if (can_read)
ksft_test_result_pass("We have read access\n");
else
- ksft_test_result_pass("We do not have read access: #PF(0x%lx)\n", segv_err);
+ ksft_test_result_pass("We do not have read access (trap=%ld, error=0x%lx)\n",
+ segv_trapno, segv_err);
}
static void test_vsys_x(void)
@@ -347,7 +349,7 @@ static void test_vsys_x(void)
return;
}
- ksft_print_msg("Make sure that vsyscalls really page fault\n");
+ ksft_print_msg("Make sure that vsyscalls really cause a fault\n");
bool can_exec;
if (sigsetjmp(jmpbuf, 1) == 0) {
@@ -358,13 +360,14 @@ static void test_vsys_x(void)
}
if (can_exec)
- ksft_test_result_fail("Executing the vsyscall did not page fault\n");
- else if (segv_err & (1 << 4)) /* INSTR */
- ksft_test_result_pass("Executing the vsyscall page failed: #PF(0x%lx)\n",
- segv_err);
+ ksft_test_result_fail("Executing the vsyscall did not fault\n");
+ /* #GP or #PF (with X86_PF_INSTR) */
+ else if ((segv_trapno == 13) || ((segv_trapno == 14) && (segv_err & (1 << 4))))
+ ksft_test_result_pass("Executing the vsyscall page failed (trap=%ld, error=0x%lx)\n",
+ segv_trapno, segv_err);
else
- ksft_test_result_fail("Execution failed with the wrong error: #PF(0x%lx)\n",
- segv_err);
+ ksft_test_result_fail("Execution failed with the wrong error (trap=%ld, error=0x%lx)\n",
+ segv_trapno, segv_err);
}
/*