diff options
Diffstat (limited to 'tools')
| -rw-r--r-- | tools/include/uapi/linux/bpf.h | 1 | ||||
| -rw-r--r-- | tools/net/ynl/pyynl/lib/ynl.py | 39 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c | 292 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/progs/sk_bypass_prot_mem.c | 104 | ||||
| -rw-r--r-- | tools/testing/selftests/drivers/net/Makefile | 1 | ||||
| -rw-r--r-- | tools/testing/selftests/drivers/net/netdevsim/Makefile | 1 | ||||
| -rwxr-xr-x | tools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh | 85 | ||||
| -rwxr-xr-x | tools/testing/selftests/drivers/net/ring_reconfig.py | 167 | ||||
| -rwxr-xr-x | tools/testing/selftests/net/forwarding/bridge_mdb.sh | 100 | ||||
| -rw-r--r-- | tools/testing/selftests/net/io_uring_zerocopy_tx.c | 24 | ||||
| -rw-r--r-- | tools/testing/selftests/net/netfilter/sctp_collision.c | 3 | ||||
| -rw-r--r-- | tools/testing/selftests/net/netlink-dumps.c | 1 | ||||
| -rw-r--r-- | tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt | 6 | ||||
| -rw-r--r-- | tools/testing/selftests/net/tls.c | 141 | ||||
| -rwxr-xr-x | tools/testing/selftests/net/traceroute.sh | 313 |
15 files changed, 1160 insertions, 118 deletions
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 6829936d33f5..9b17d937edf7 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -7200,6 +7200,7 @@ enum { TCP_BPF_SYN_MAC = 1007, /* Copy the MAC, IP[46], and TCP header */ TCP_BPF_SOCK_OPS_CB_FLAGS = 1008, /* Get or Set TCP sock ops flags */ SK_BPF_CB_FLAGS = 1009, /* Get or set sock ops flags in socket */ + SK_BPF_BYPASS_PROT_MEM = 1010, /* Get or Set sk->sk_bypass_prot_mem */ }; enum { diff --git a/tools/net/ynl/pyynl/lib/ynl.py b/tools/net/ynl/pyynl/lib/ynl.py index 62383c70ebb9..225baad3c8f8 100644 --- a/tools/net/ynl/pyynl/lib/ynl.py +++ b/tools/net/ynl/pyynl/lib/ynl.py @@ -100,12 +100,21 @@ class Netlink: 'bitfield32', 'sint', 'uint']) class NlError(Exception): - def __init__(self, nl_msg): - self.nl_msg = nl_msg - self.error = -nl_msg.error - - def __str__(self): - return f"Netlink error: {os.strerror(self.error)}\n{self.nl_msg}" + def __init__(self, nl_msg): + self.nl_msg = nl_msg + self.error = -nl_msg.error + + def __str__(self): + msg = "Netlink error: " + + extack = self.nl_msg.extack.copy() if self.nl_msg.extack else {} + if 'msg' in extack: + msg += extack['msg'] + ': ' + del extack['msg'] + msg += os.strerror(self.error) + if extack: + msg += ' ' + str(extack) + return msg class ConfigError(Exception): @@ -1039,15 +1048,15 @@ class YnlFamily(SpecFamily): self.check_ntf() def operation_do_attributes(self, name): - """ - For a given operation name, find and return a supported - set of attributes (as a dict). - """ - op = self.find_operation(name) - if not op: - return None - - return op['do']['request']['attributes'].copy() + """ + For a given operation name, find and return a supported + set of attributes (as a dict). + """ + op = self.find_operation(name) + if not op: + return None + + return op['do']['request']['attributes'].copy() def _encode_message(self, op, vals, flags, req_seq): nl_flags = Netlink.NLM_F_REQUEST | Netlink.NLM_F_ACK diff --git a/tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c b/tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c new file mode 100644 index 000000000000..e4940583924b --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c @@ -0,0 +1,292 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2025 Google LLC */ + +#include <test_progs.h> +#include "sk_bypass_prot_mem.skel.h" +#include "network_helpers.h" + +#define NR_PAGES 32 +#define NR_SOCKETS 2 +#define BUF_TOTAL (NR_PAGES * 4096 / NR_SOCKETS) +#define BUF_SINGLE 1024 +#define NR_SEND (BUF_TOTAL / BUF_SINGLE) + +struct test_case { + char name[8]; + int family; + int type; + int (*create_sockets)(struct test_case *test_case, int sk[], int len); + long (*get_memory_allocated)(struct test_case *test_case, struct sk_bypass_prot_mem *skel); +}; + +static int tcp_create_sockets(struct test_case *test_case, int sk[], int len) +{ + int server, i, err = 0; + + server = start_server(test_case->family, test_case->type, NULL, 0, 0); + if (!ASSERT_GE(server, 0, "start_server_str")) + return server; + + /* Keep for-loop so we can change NR_SOCKETS easily. */ + for (i = 0; i < len; i += 2) { + sk[i] = connect_to_fd(server, 0); + if (sk[i] < 0) { + ASSERT_GE(sk[i], 0, "connect_to_fd"); + err = sk[i]; + break; + } + + sk[i + 1] = accept(server, NULL, NULL); + if (sk[i + 1] < 0) { + ASSERT_GE(sk[i + 1], 0, "accept"); + err = sk[i + 1]; + break; + } + } + + close(server); + + return err; +} + +static int udp_create_sockets(struct test_case *test_case, int sk[], int len) +{ + int i, j, err, rcvbuf = BUF_TOTAL; + + /* Keep for-loop so we can change NR_SOCKETS easily. */ + for (i = 0; i < len; i += 2) { + sk[i] = start_server(test_case->family, test_case->type, NULL, 0, 0); + if (sk[i] < 0) { + ASSERT_GE(sk[i], 0, "start_server"); + return sk[i]; + } + + sk[i + 1] = connect_to_fd(sk[i], 0); + if (sk[i + 1] < 0) { + ASSERT_GE(sk[i + 1], 0, "connect_to_fd"); + return sk[i + 1]; + } + + err = connect_fd_to_fd(sk[i], sk[i + 1], 0); + if (err) { + ASSERT_EQ(err, 0, "connect_fd_to_fd"); + return err; + } + + for (j = 0; j < 2; j++) { + err = setsockopt(sk[i + j], SOL_SOCKET, SO_RCVBUF, &rcvbuf, sizeof(int)); + if (err) { + ASSERT_EQ(err, 0, "setsockopt(SO_RCVBUF)"); + return err; + } + } + } + + return 0; +} + +static long get_memory_allocated(struct test_case *test_case, + bool *activated, long *memory_allocated) +{ + int sk; + + *activated = true; + + /* AF_INET and AF_INET6 share the same memory_allocated. + * tcp_init_sock() is called by AF_INET and AF_INET6, + * but udp_lib_init_sock() is inline. + */ + sk = socket(AF_INET, test_case->type, 0); + if (!ASSERT_GE(sk, 0, "get_memory_allocated")) + return -1; + + close(sk); + + return *memory_allocated; +} + +static long tcp_get_memory_allocated(struct test_case *test_case, struct sk_bypass_prot_mem *skel) +{ + return get_memory_allocated(test_case, + &skel->bss->tcp_activated, + &skel->bss->tcp_memory_allocated); +} + +static long udp_get_memory_allocated(struct test_case *test_case, struct sk_bypass_prot_mem *skel) +{ + return get_memory_allocated(test_case, + &skel->bss->udp_activated, + &skel->bss->udp_memory_allocated); +} + +static int check_bypass(struct test_case *test_case, + struct sk_bypass_prot_mem *skel, bool bypass) +{ + char buf[BUF_SINGLE] = {}; + long memory_allocated[2]; + int sk[NR_SOCKETS]; + int err, i, j; + + for (i = 0; i < ARRAY_SIZE(sk); i++) + sk[i] = -1; + + err = test_case->create_sockets(test_case, sk, ARRAY_SIZE(sk)); + if (err) + goto close; + + memory_allocated[0] = test_case->get_memory_allocated(test_case, skel); + + /* allocate pages >= NR_PAGES */ + for (i = 0; i < ARRAY_SIZE(sk); i++) { + for (j = 0; j < NR_SEND; j++) { + int bytes = send(sk[i], buf, sizeof(buf), 0); + + /* Avoid too noisy logs when something failed. */ + if (bytes != sizeof(buf)) { + ASSERT_EQ(bytes, sizeof(buf), "send"); + if (bytes < 0) { + err = bytes; + goto drain; + } + } + } + } + + memory_allocated[1] = test_case->get_memory_allocated(test_case, skel); + + if (bypass) + ASSERT_LE(memory_allocated[1], memory_allocated[0] + 10, "bypass"); + else + ASSERT_GT(memory_allocated[1], memory_allocated[0] + NR_PAGES, "no bypass"); + +drain: + if (test_case->type == SOCK_DGRAM) { + /* UDP starts purging sk->sk_receive_queue after one RCU + * grace period, then udp_memory_allocated goes down, + * so drain the queue before close(). + */ + for (i = 0; i < ARRAY_SIZE(sk); i++) { + for (j = 0; j < NR_SEND; j++) { + int bytes = recv(sk[i], buf, 1, MSG_DONTWAIT | MSG_TRUNC); + + if (bytes == sizeof(buf)) + continue; + if (bytes != -1 || errno != EAGAIN) + PRINT_FAIL("bytes: %d, errno: %s\n", bytes, strerror(errno)); + break; + } + } + } + +close: + for (i = 0; i < ARRAY_SIZE(sk); i++) { + if (sk[i] < 0) + break; + + close(sk[i]); + } + + return err; +} + +static void run_test(struct test_case *test_case) +{ + struct sk_bypass_prot_mem *skel; + struct nstoken *nstoken; + int cgroup, err; + + skel = sk_bypass_prot_mem__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + skel->bss->nr_cpus = libbpf_num_possible_cpus(); + + err = sk_bypass_prot_mem__attach(skel); + if (!ASSERT_OK(err, "attach")) + goto destroy_skel; + + cgroup = test__join_cgroup("/sk_bypass_prot_mem"); + if (!ASSERT_GE(cgroup, 0, "join_cgroup")) + goto destroy_skel; + + err = make_netns("sk_bypass_prot_mem"); + if (!ASSERT_EQ(err, 0, "make_netns")) + goto close_cgroup; + + nstoken = open_netns("sk_bypass_prot_mem"); + if (!ASSERT_OK_PTR(nstoken, "open_netns")) + goto remove_netns; + + err = check_bypass(test_case, skel, false); + if (!ASSERT_EQ(err, 0, "test_bypass(false)")) + goto close_netns; + + err = write_sysctl("/proc/sys/net/core/bypass_prot_mem", "1"); + if (!ASSERT_EQ(err, 0, "write_sysctl(1)")) + goto close_netns; + + err = check_bypass(test_case, skel, true); + if (!ASSERT_EQ(err, 0, "test_bypass(true by sysctl)")) + goto close_netns; + + err = write_sysctl("/proc/sys/net/core/bypass_prot_mem", "0"); + if (!ASSERT_EQ(err, 0, "write_sysctl(0)")) + goto close_netns; + + skel->links.sock_create = bpf_program__attach_cgroup(skel->progs.sock_create, cgroup); + if (!ASSERT_OK_PTR(skel->links.sock_create, "attach_cgroup(sock_create)")) + goto close_netns; + + err = check_bypass(test_case, skel, true); + ASSERT_EQ(err, 0, "test_bypass(true by bpf)"); + +close_netns: + close_netns(nstoken); +remove_netns: + remove_netns("sk_bypass_prot_mem"); +close_cgroup: + close(cgroup); +destroy_skel: + sk_bypass_prot_mem__destroy(skel); +} + +static struct test_case test_cases[] = { + { + .name = "TCP ", + .family = AF_INET, + .type = SOCK_STREAM, + .create_sockets = tcp_create_sockets, + .get_memory_allocated = tcp_get_memory_allocated, + }, + { + .name = "UDP ", + .family = AF_INET, + .type = SOCK_DGRAM, + .create_sockets = udp_create_sockets, + .get_memory_allocated = udp_get_memory_allocated, + }, + { + .name = "TCPv6", + .family = AF_INET6, + .type = SOCK_STREAM, + .create_sockets = tcp_create_sockets, + .get_memory_allocated = tcp_get_memory_allocated, + }, + { + .name = "UDPv6", + .family = AF_INET6, + .type = SOCK_DGRAM, + .create_sockets = udp_create_sockets, + .get_memory_allocated = udp_get_memory_allocated, + }, +}; + +void serial_test_sk_bypass_prot_mem(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(test_cases); i++) { + if (test__start_subtest(test_cases[i].name)) + run_test(&test_cases[i]); + } +} diff --git a/tools/testing/selftests/bpf/progs/sk_bypass_prot_mem.c b/tools/testing/selftests/bpf/progs/sk_bypass_prot_mem.c new file mode 100644 index 000000000000..09a00d11ffcc --- /dev/null +++ b/tools/testing/selftests/bpf/progs/sk_bypass_prot_mem.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2025 Google LLC */ + +#include "bpf_tracing_net.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <errno.h> + +extern int tcp_memory_per_cpu_fw_alloc __ksym; +extern int udp_memory_per_cpu_fw_alloc __ksym; + +int nr_cpus; +bool tcp_activated, udp_activated; +long tcp_memory_allocated, udp_memory_allocated; + +struct sk_prot { + long *memory_allocated; + int *memory_per_cpu_fw_alloc; +}; + +static int drain_memory_per_cpu_fw_alloc(__u32 i, struct sk_prot *sk_prot_ctx) +{ + int *memory_per_cpu_fw_alloc; + + memory_per_cpu_fw_alloc = bpf_per_cpu_ptr(sk_prot_ctx->memory_per_cpu_fw_alloc, i); + if (memory_per_cpu_fw_alloc) + *sk_prot_ctx->memory_allocated += *memory_per_cpu_fw_alloc; + + return 0; +} + +static long get_memory_allocated(struct sock *_sk, int *memory_per_cpu_fw_alloc) +{ + struct sock *sk = bpf_core_cast(_sk, struct sock); + struct sk_prot sk_prot_ctx; + long memory_allocated; + + /* net_aligned_data.{tcp,udp}_memory_allocated was not available. */ + memory_allocated = sk->__sk_common.skc_prot->memory_allocated->counter; + + sk_prot_ctx.memory_allocated = &memory_allocated; + sk_prot_ctx.memory_per_cpu_fw_alloc = memory_per_cpu_fw_alloc; + + bpf_loop(nr_cpus, drain_memory_per_cpu_fw_alloc, &sk_prot_ctx, 0); + + return memory_allocated; +} + +static void fentry_init_sock(struct sock *sk, bool *activated, + long *memory_allocated, int *memory_per_cpu_fw_alloc) +{ + if (!*activated) + return; + + *memory_allocated = get_memory_allocated(sk, memory_per_cpu_fw_alloc); + *activated = false; +} + +SEC("fentry/tcp_init_sock") +int BPF_PROG(fentry_tcp_init_sock, struct sock *sk) +{ + fentry_init_sock(sk, &tcp_activated, + &tcp_memory_allocated, &tcp_memory_per_cpu_fw_alloc); + return 0; +} + +SEC("fentry/udp_init_sock") +int BPF_PROG(fentry_udp_init_sock, struct sock *sk) +{ + fentry_init_sock(sk, &udp_activated, + &udp_memory_allocated, &udp_memory_per_cpu_fw_alloc); + return 0; +} + +SEC("cgroup/sock_create") +int sock_create(struct bpf_sock *ctx) +{ + int err, val = 1; + + err = bpf_setsockopt(ctx, SOL_SOCKET, SK_BPF_BYPASS_PROT_MEM, + &val, sizeof(val)); + if (err) + goto err; + + val = 0; + + err = bpf_getsockopt(ctx, SOL_SOCKET, SK_BPF_BYPASS_PROT_MEM, + &val, sizeof(val)); + if (err) + goto err; + + if (val != 1) { + err = -EINVAL; + goto err; + } + + return 1; + +err: + bpf_set_retval(err); + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile index 6e41635bd55a..68e0bb603a9d 100644 --- a/tools/testing/selftests/drivers/net/Makefile +++ b/tools/testing/selftests/drivers/net/Makefile @@ -22,6 +22,7 @@ TEST_PROGS := \ ping.py \ psp.py \ queues.py \ + ring_reconfig.py \ shaper.py \ stats.py \ xdp.py \ diff --git a/tools/testing/selftests/drivers/net/netdevsim/Makefile b/tools/testing/selftests/drivers/net/netdevsim/Makefile index daf51113c827..833abd8e6fdc 100644 --- a/tools/testing/selftests/drivers/net/netdevsim/Makefile +++ b/tools/testing/selftests/drivers/net/netdevsim/Makefile @@ -8,7 +8,6 @@ TEST_PROGS := \ ethtool-features.sh \ ethtool-fec.sh \ ethtool-pause.sh \ - ethtool-ring.sh \ fib.sh \ fib_notifications.sh \ hw_stats_l3.sh \ diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh deleted file mode 100755 index c969559ffa7a..000000000000 --- a/tools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh +++ /dev/null @@ -1,85 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0-only - -source ethtool-common.sh - -function get_value { - local query="${SETTINGS_MAP[$1]}" - - echo $(ethtool -g $NSIM_NETDEV | \ - tail -n +$CURR_SETT_LINE | \ - awk -F':' -v pattern="$query:" '$0 ~ pattern {gsub(/[\t ]/, "", $2); print $2}') -} - -function update_current_settings { - for key in ${!SETTINGS_MAP[@]}; do - CURRENT_SETTINGS[$key]=$(get_value $key) - done - echo ${CURRENT_SETTINGS[@]} -} - -if ! ethtool -h | grep -q set-ring >/dev/null; then - echo "SKIP: No --set-ring support in ethtool" - exit 4 -fi - -NSIM_NETDEV=$(make_netdev) - -set -o pipefail - -declare -A SETTINGS_MAP=( - ["rx"]="RX" - ["rx-mini"]="RX Mini" - ["rx-jumbo"]="RX Jumbo" - ["tx"]="TX" -) - -declare -A EXPECTED_SETTINGS=( - ["rx"]="" - ["rx-mini"]="" - ["rx-jumbo"]="" - ["tx"]="" -) - -declare -A CURRENT_SETTINGS=( - ["rx"]="" - ["rx-mini"]="" - ["rx-jumbo"]="" - ["tx"]="" -) - -MAX_VALUE=$((RANDOM % $((2**32-1)))) -RING_MAX_LIST=$(ls $NSIM_DEV_DFS/ethtool/ring/) - -for ring_max_entry in $RING_MAX_LIST; do - echo $MAX_VALUE > $NSIM_DEV_DFS/ethtool/ring/$ring_max_entry -done - -CURR_SETT_LINE=$(ethtool -g $NSIM_NETDEV | grep -i -m1 -n 'Current hardware settings' | cut -f1 -d:) - -# populate the expected settings map -for key in ${!SETTINGS_MAP[@]}; do - EXPECTED_SETTINGS[$key]=$(get_value $key) -done - -# test -for key in ${!SETTINGS_MAP[@]}; do - value=$((RANDOM % $MAX_VALUE)) - - ethtool -G $NSIM_NETDEV "$key" "$value" - - EXPECTED_SETTINGS[$key]="$value" - expected=${EXPECTED_SETTINGS[@]} - current=$(update_current_settings) - - check $? "$current" "$expected" - set +x -done - -if [ $num_errors -eq 0 ]; then - echo "PASSED all $((num_passes)) checks" - exit 0 -else - echo "FAILED $num_errors/$((num_errors+num_passes)) checks" - exit 1 -fi diff --git a/tools/testing/selftests/drivers/net/ring_reconfig.py b/tools/testing/selftests/drivers/net/ring_reconfig.py new file mode 100755 index 000000000000..f9530a8b0856 --- /dev/null +++ b/tools/testing/selftests/drivers/net/ring_reconfig.py @@ -0,0 +1,167 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +Test channel and ring size configuration via ethtool (-L / -G). +""" + +from lib.py import ksft_run, ksft_exit, ksft_pr +from lib.py import ksft_eq +from lib.py import NetDrvEpEnv, EthtoolFamily, GenerateTraffic +from lib.py import defer, NlError + + +def channels(cfg) -> None: + """ + Twiddle channel counts in various combinations of parameters. + We're only looking for driver adhering to the requested config + if the config is accepted and crashes. + """ + ehdr = {'header':{'dev-index': cfg.ifindex}} + chans = cfg.eth.channels_get(ehdr) + + all_keys = ["rx", "tx", "combined"] + mixes = [{"combined"}, {"rx", "tx"}, {"rx", "combined"}, {"tx", "combined"}, + {"rx", "tx", "combined"},] + + # Get the set of keys that device actually supports + restore = {} + supported = set() + for key in all_keys: + if key + "-max" in chans: + supported.add(key) + restore |= {key + "-count": chans[key + "-count"]} + + defer(cfg.eth.channels_set, ehdr | restore) + + def test_config(config): + try: + cfg.eth.channels_set(ehdr | config) + get = cfg.eth.channels_get(ehdr) + for k, v in config.items(): + ksft_eq(get.get(k, 0), v) + except NlError as e: + failed.append(mix) + ksft_pr("Can't set", config, e) + else: + ksft_pr("Okay", config) + + failed = [] + for mix in mixes: + if not mix.issubset(supported): + continue + + # Set all the values in the mix to 1, other supported to 0 + config = {} + for key in all_keys: + config[key + "-count"] = 1 if key in mix else 0 + test_config(config) + + for mix in mixes: + if not mix.issubset(supported): + continue + if mix in failed: + continue + + # Set all the values in the mix to max, other supported to 0 + config = {} + for key in all_keys: + config[key + "-count"] = chans[key + '-max'] if key in mix else 0 + test_config(config) + + +def _configure_min_ring_cnt(cfg) -> None: + """ Try to configure a single Rx/Tx ring. """ + ehdr = {'header':{'dev-index': cfg.ifindex}} + chans = cfg.eth.channels_get(ehdr) + + all_keys = ["rx-count", "tx-count", "combined-count"] + restore = {} + config = {} + for key in all_keys: + if key in chans: + restore[key] = chans[key] + config[key] = 0 + + if chans.get('combined-count', 0) > 1: + config['combined-count'] = 1 + elif chans.get('rx-count', 0) > 1 and chans.get('tx-count', 0) > 1: + config['tx-count'] = 1 + config['rx-count'] = 1 + else: + # looks like we're already on 1 channel + return + + cfg.eth.channels_set(ehdr | config) + defer(cfg.eth.channels_set, ehdr | restore) + + +def ringparam(cfg) -> None: + """ + Tweak the ringparam configuration. Try to run some traffic over min + ring size to make sure it actually functions. + """ + ehdr = {'header':{'dev-index': cfg.ifindex}} + rings = cfg.eth.rings_get(ehdr) + + restore = {} + maxes = {} + params = set() + for key in rings.keys(): + if 'max' in key: + param = key[:-4] + maxes[param] = rings[key] + params.add(param) + restore[param] = rings[param] + + defer(cfg.eth.rings_set, ehdr | restore) + + # Speed up the reconfig by configuring just one ring + _configure_min_ring_cnt(cfg) + + # Try to reach min on all settings + for param in params: + val = rings[param] + while True: + try: + cfg.eth.rings_set({'header':{'dev-index': cfg.ifindex}, + param: val // 2}) + if val == 0: + break + val //= 2 + except NlError: + break + + get = cfg.eth.rings_get(ehdr) + ksft_eq(get[param], val) + + ksft_pr(f"Reached min for '{param}' at {val} (max {rings[param]})") + + GenerateTraffic(cfg).wait_pkts_and_stop(10000) + + # Try max across all params, if the driver supports large rings + # this may OOM so we ignore errors + try: + ksft_pr("Applying max settings") + config = {p: maxes[p] for p in params} + cfg.eth.rings_set(ehdr | config) + except NlError as e: + ksft_pr("Can't set max params", config, e) + else: + GenerateTraffic(cfg).wait_pkts_and_stop(10000) + + +def main() -> None: + """ Ksft boiler plate main """ + + with NetDrvEpEnv(__file__) as cfg: + cfg.eth = EthtoolFamily() + + ksft_run([channels, + ringparam], + args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh index 8c1597ebc2d3..e86d77946585 100755 --- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh +++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh @@ -28,6 +28,7 @@ ALL_TESTS=" cfg_test fwd_test ctrl_test + disable_test " NUM_NETIFS=4 @@ -64,7 +65,10 @@ h2_destroy() switch_create() { - ip link add name br0 type bridge vlan_filtering 1 vlan_default_pvid 0 \ + local vlan_filtering=$1; shift + + ip link add name br0 type bridge \ + vlan_filtering "$vlan_filtering" vlan_default_pvid 0 \ mcast_snooping 1 mcast_igmp_version 3 mcast_mld_version 2 bridge vlan add vid 10 dev br0 self bridge vlan add vid 20 dev br0 self @@ -118,7 +122,7 @@ setup_prepare() h1_create h2_create - switch_create + switch_create 1 } cleanup() @@ -1357,6 +1361,98 @@ ctrl_test() ctrl_mldv2_is_in_test } +check_group() +{ + local group=$1; shift + local vid=$1; shift + local should_fail=$1; shift + local when=$1; shift + local -a vidkws + + if ((vid)); then + vidkws=(vid "$vid") + fi + + bridge mdb get dev br0 grp "$group" "${vidkws[@]}" 2>/dev/null | + grep -q "port $swp1" + check_err_fail "$should_fail" $? "$group seen $when snooping disable:" +} + +__disable_test() +{ + local vid=$1; shift + local what=$1; shift + local -a vidkws + + if ((vid)); then + vidkws=(vid "$vid") + fi + + RET=0 + + bridge mdb add dev br0 port "$swp1" grp ff0e::1 permanent \ + "${vidkws[@]}" filter_mode include source_list 2001:db8:1::1 + bridge mdb add dev br0 port "$swp1" grp ff0e::2 permanent \ + "${vidkws[@]}" filter_mode exclude + + bridge mdb add dev br0 port "$swp1" grp ff0e::3 \ + "${vidkws[@]}" filter_mode include source_list 2001:db8:1::2 + bridge mdb add dev br0 port "$swp1" grp ff0e::4 \ + "${vidkws[@]}" filter_mode exclude + + bridge mdb add dev br0 port "$swp1" grp 239.1.1.1 permanent \ + "${vidkws[@]}" filter_mode include source_list 192.0.2.1 + bridge mdb add dev br0 port "$swp1" grp 239.1.1.2 permanent \ + "${vidkws[@]}" filter_mode exclude + + bridge mdb add dev br0 port "$swp1" grp 239.1.1.3 \ + "${vidkws[@]}" filter_mode include source_list 192.0.2.2 + bridge mdb add dev br0 port "$swp1" grp 239.1.1.4 \ + "${vidkws[@]}" filter_mode exclude + + check_group ff0e::1 "$vid" 0 "before" + check_group ff0e::2 "$vid" 0 "before" + check_group ff0e::3 "$vid" 0 "before" + check_group ff0e::4 "$vid" 0 "before" + + check_group 239.1.1.1 "$vid" 0 "before" + check_group 239.1.1.2 "$vid" 0 "before" + check_group 239.1.1.3 "$vid" 0 "before" + check_group 239.1.1.4 "$vid" 0 "before" + + ip link set dev br0 type bridge mcast_snooping 0 + + check_group ff0e::1 "$vid" 0 "after" + check_group ff0e::2 "$vid" 0 "after" + check_group ff0e::3 "$vid" 1 "after" + check_group ff0e::4 "$vid" 1 "after" + + check_group 239.1.1.1 "$vid" 0 "after" + check_group 239.1.1.2 "$vid" 0 "after" + check_group 239.1.1.3 "$vid" 1 "after" + check_group 239.1.1.4 "$vid" 1 "after" + + log_test "$what: Flush after disable" + + ip link set dev br0 type bridge mcast_snooping 1 + sleep 10 +} + +disable_test() +{ + __disable_test 10 802.1q + + switch_destroy + switch_create 0 + setup_wait + + __disable_test 0 802.1d + + switch_destroy + switch_create 1 + setup_wait +} + if ! bridge mdb help 2>&1 | grep -q "flush"; then echo "SKIP: iproute2 too old, missing bridge mdb flush support" exit $ksft_skip diff --git a/tools/testing/selftests/net/io_uring_zerocopy_tx.c b/tools/testing/selftests/net/io_uring_zerocopy_tx.c index 76e604e4810e..7bfeeb133705 100644 --- a/tools/testing/selftests/net/io_uring_zerocopy_tx.c +++ b/tools/testing/selftests/net/io_uring_zerocopy_tx.c @@ -106,14 +106,14 @@ static void do_tx(int domain, int type, int protocol) ret = io_uring_queue_init(512, &ring, 0); if (ret) - error(1, ret, "io_uring: queue init"); + error(1, -ret, "io_uring: queue init"); iov.iov_base = payload; iov.iov_len = cfg_payload_len; ret = io_uring_register_buffers(&ring, &iov, 1); if (ret) - error(1, ret, "io_uring: buffer registration"); + error(1, -ret, "io_uring: buffer registration"); tstop = gettimeofday_ms() + cfg_runtime_ms; do { @@ -149,24 +149,24 @@ static void do_tx(int domain, int type, int protocol) ret = io_uring_submit(&ring); if (ret != cfg_nr_reqs) - error(1, ret, "submit"); + error(1, -ret, "submit"); if (cfg_cork) do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0); for (i = 0; i < cfg_nr_reqs; i++) { ret = io_uring_wait_cqe(&ring, &cqe); if (ret) - error(1, ret, "wait cqe"); + error(1, -ret, "wait cqe"); if (cqe->user_data != NONZC_TAG && cqe->user_data != ZC_TAG) - error(1, -EINVAL, "invalid cqe->user_data"); + error(1, EINVAL, "invalid cqe->user_data"); if (cqe->flags & IORING_CQE_F_NOTIF) { if (cqe->flags & IORING_CQE_F_MORE) - error(1, -EINVAL, "invalid notif flags"); + error(1, EINVAL, "invalid notif flags"); if (compl_cqes <= 0) - error(1, -EINVAL, "notification mismatch"); + error(1, EINVAL, "notification mismatch"); compl_cqes--; i--; io_uring_cqe_seen(&ring); @@ -174,14 +174,14 @@ static void do_tx(int domain, int type, int protocol) } if (cqe->flags & IORING_CQE_F_MORE) { if (cqe->user_data != ZC_TAG) - error(1, cqe->res, "unexpected F_MORE"); + error(1, -cqe->res, "unexpected F_MORE"); compl_cqes++; } if (cqe->res >= 0) { packets++; bytes += cqe->res; } else if (cqe->res != -EAGAIN) { - error(1, cqe->res, "send failed"); + error(1, -cqe->res, "send failed"); } io_uring_cqe_seen(&ring); } @@ -190,11 +190,11 @@ static void do_tx(int domain, int type, int protocol) while (compl_cqes) { ret = io_uring_wait_cqe(&ring, &cqe); if (ret) - error(1, ret, "wait cqe"); + error(1, -ret, "wait cqe"); if (cqe->flags & IORING_CQE_F_MORE) - error(1, -EINVAL, "invalid notif flags"); + error(1, EINVAL, "invalid notif flags"); if (!(cqe->flags & IORING_CQE_F_NOTIF)) - error(1, -EINVAL, "missing notif flag"); + error(1, EINVAL, "missing notif flag"); io_uring_cqe_seen(&ring); compl_cqes--; diff --git a/tools/testing/selftests/net/netfilter/sctp_collision.c b/tools/testing/selftests/net/netfilter/sctp_collision.c index 21bb1cfd8a85..b282d1785c9b 100644 --- a/tools/testing/selftests/net/netfilter/sctp_collision.c +++ b/tools/testing/selftests/net/netfilter/sctp_collision.c @@ -9,9 +9,10 @@ int main(int argc, char *argv[]) { struct sockaddr_in saddr = {}, daddr = {}; - int sd, ret, len = sizeof(daddr); + socklen_t len = sizeof(daddr); struct timeval tv = {25, 0}; char buf[] = "hello"; + int sd, ret; if (argc != 6 || (strcmp(argv[1], "server") && strcmp(argv[1], "client"))) { printf("%s <server|client> <LOCAL_IP> <LOCAL_PORT> <REMOTE_IP> <REMOTE_PORT>\n", diff --git a/tools/testing/selftests/net/netlink-dumps.c b/tools/testing/selftests/net/netlink-dumps.c index 7618ebe528a4..679b6c77ace7 100644 --- a/tools/testing/selftests/net/netlink-dumps.c +++ b/tools/testing/selftests/net/netlink-dumps.c @@ -143,6 +143,7 @@ TEST(dump_extack) EXPECT_EQ(n, -1); EXPECT_EQ(errno, ENOBUFS); + ret = NO_CTRL; for (i = 0; i < cnt; i++) { struct ext_ack ea = {}; diff --git a/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt index 183051ba0cae..6882b8240a8a 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt @@ -23,14 +23,16 @@ // install a qdisc dropping all packets +0 `tc qdisc delete dev tun0 root 2>/dev/null ; tc qdisc add dev tun0 root pfifo limit 0` + +0 write(4, ..., 24) = 24 // When qdisc is congested we retry every 500ms // (TCP_RESOURCE_PROBE_INTERVAL) and therefore // we retry 6 times before hitting 3s timeout. // First verify that the connection is alive: -+3.250 write(4, ..., 24) = 24 ++3 write(4, ..., 24) = 24 + // Now verify that shortly after that the socket is dead: - +.100 write(4, ..., 24) = -1 ETIMEDOUT (Connection timed out) ++1 write(4, ..., 24) = -1 ETIMEDOUT (Connection timed out) +0 %{ assert tcpi_probes == 6, tcpi_probes; \ assert tcpi_backoff == 0, tcpi_backoff }% diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 5c6d8215021c..da1b50b30719 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -2856,6 +2856,147 @@ TEST_F(tls_err, oob_pressure) EXPECT_EQ(send(self->fd2, buf, 5, MSG_OOB), 5); } +/* + * Parse a stream of TLS records and ensure that each record respects + * the specified @max_payload_len. + */ +static size_t parse_tls_records(struct __test_metadata *_metadata, + const __u8 *rx_buf, int rx_len, int overhead, + __u16 max_payload_len) +{ + const __u8 *rec = rx_buf; + size_t total_plaintext_rx = 0; + const __u8 rec_header_len = 5; + + while (rec < rx_buf + rx_len) { + __u16 record_payload_len; + __u16 plaintext_len; + + /* Sanity check that it's a TLS header for application data */ + ASSERT_EQ(rec[0], 23); + ASSERT_EQ(rec[1], 0x3); + ASSERT_EQ(rec[2], 0x3); + + memcpy(&record_payload_len, rec + 3, 2); + record_payload_len = ntohs(record_payload_len); + ASSERT_GE(record_payload_len, overhead); + + plaintext_len = record_payload_len - overhead; + total_plaintext_rx += plaintext_len; + + /* Plaintext must not exceed the specified limit */ + ASSERT_LE(plaintext_len, max_payload_len); + rec += rec_header_len + record_payload_len; + } + + return total_plaintext_rx; +} + +TEST(tls_12_tx_max_payload_len) +{ + struct tls_crypto_info_keys tls12; + int cfd, ret, fd, overhead; + size_t total_plaintext_rx = 0; + __u8 tx[1024], rx[2000]; + __u16 limit = 128; + __u16 opt = 0; + unsigned int optlen = sizeof(opt); + bool notls; + + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_CCM_128, + &tls12, 0); + + ulp_sock_pair(_metadata, &fd, &cfd, ¬ls); + + if (notls) + exit(KSFT_SKIP); + + /* Don't install keys on fd, we'll parse raw records */ + ret = setsockopt(cfd, SOL_TLS, TLS_TX, &tls12, tls12.len); + ASSERT_EQ(ret, 0); + + ret = setsockopt(cfd, SOL_TLS, TLS_TX_MAX_PAYLOAD_LEN, &limit, + sizeof(limit)); + ASSERT_EQ(ret, 0); + + ret = getsockopt(cfd, SOL_TLS, TLS_TX_MAX_PAYLOAD_LEN, &opt, &optlen); + EXPECT_EQ(ret, 0); + EXPECT_EQ(limit, opt); + EXPECT_EQ(optlen, sizeof(limit)); + + memset(tx, 0, sizeof(tx)); + ASSERT_EQ(send(cfd, tx, sizeof(tx), 0), sizeof(tx)); + close(cfd); + + ret = recv(fd, rx, sizeof(rx), 0); + + /* + * 16B tag + 8B IV -- record header (5B) is not counted but we'll + * need it to walk the record stream + */ + overhead = 16 + 8; + total_plaintext_rx = parse_tls_records(_metadata, rx, ret, overhead, + limit); + + ASSERT_EQ(total_plaintext_rx, sizeof(tx)); + close(fd); +} + +TEST(tls_12_tx_max_payload_len_open_rec) +{ + struct tls_crypto_info_keys tls12; + int cfd, ret, fd, overhead; + size_t total_plaintext_rx = 0; + __u8 tx[1024], rx[2000]; + __u16 tx_partial = 256; + __u16 og_limit = 512, limit = 128; + bool notls; + + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_CCM_128, + &tls12, 0); + + ulp_sock_pair(_metadata, &fd, &cfd, ¬ls); + + if (notls) + exit(KSFT_SKIP); + + /* Don't install keys on fd, we'll parse raw records */ + ret = setsockopt(cfd, SOL_TLS, TLS_TX, &tls12, tls12.len); + ASSERT_EQ(ret, 0); + + ret = setsockopt(cfd, SOL_TLS, TLS_TX_MAX_PAYLOAD_LEN, &og_limit, + sizeof(og_limit)); + ASSERT_EQ(ret, 0); + + memset(tx, 0, sizeof(tx)); + ASSERT_EQ(send(cfd, tx, tx_partial, MSG_MORE), tx_partial); + + /* + * Changing the payload limit with a pending open record should + * not be allowed. + */ + ret = setsockopt(cfd, SOL_TLS, TLS_TX_MAX_PAYLOAD_LEN, &limit, + sizeof(limit)); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, EBUSY); + + ASSERT_EQ(send(cfd, tx + tx_partial, sizeof(tx) - tx_partial, MSG_EOR), + sizeof(tx) - tx_partial); + close(cfd); + + ret = recv(fd, rx, sizeof(rx), 0); + + /* + * 16B tag + 8B IV -- record header (5B) is not counted but we'll + * need it to walk the record stream + */ + overhead = 16 + 8; + total_plaintext_rx = parse_tls_records(_metadata, rx, ret, overhead, + og_limit); + ASSERT_EQ(total_plaintext_rx, sizeof(tx)); + close(fd); +} + TEST(non_established) { struct tls12_crypto_info_aes_gcm_256 tls12; struct sockaddr_in addr; diff --git a/tools/testing/selftests/net/traceroute.sh b/tools/testing/selftests/net/traceroute.sh index dbb34c7e09ce..a7c6ab8a0347 100755 --- a/tools/testing/selftests/net/traceroute.sh +++ b/tools/testing/selftests/net/traceroute.sh @@ -36,6 +36,35 @@ run_cmd() return $rc } +__check_traceroute_version() +{ + local cmd=$1; shift + local req_ver=$1; shift + local ver + + req_ver=$(echo "$req_ver" | sed 's/\.//g') + ver=$($cmd -V 2>&1 | grep -Eo '[0-9]+.[0-9]+.[0-9]+' | sed 's/\.//g') + if [[ $ver -lt $req_ver ]]; then + return 1 + else + return 0 + fi +} + +check_traceroute6_version() +{ + local req_ver=$1; shift + + __check_traceroute_version traceroute6 "$req_ver" +} + +check_traceroute_version() +{ + local req_ver=$1; shift + + __check_traceroute_version traceroute "$req_ver" +} + ################################################################################ # create namespaces and interconnects @@ -59,6 +88,8 @@ create_ns() ip netns exec ${ns} ip -6 ro add unreachable default metric 8192 ip netns exec ${ns} sysctl -qw net.ipv4.ip_forward=1 + ip netns exec ${ns} sysctl -qw net.ipv4.icmp_ratelimit=0 + ip netns exec ${ns} sysctl -qw net.ipv6.icmp.ratelimit=0 ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1 ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.forwarding=1 ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.forwarding=1 @@ -298,6 +329,144 @@ run_traceroute6_vrf() } ################################################################################ +# traceroute6 with ICMP extensions test +# +# Verify that in this scenario +# +# ---- ---- ---- +# |H1|--------------------------|R1|--------------------------|H2| +# ---- N1 ---- N2 ---- +# +# ICMP extensions are correctly reported. The loopback interfaces on all the +# nodes are assigned global addresses and the interfaces connecting the nodes +# are assigned IPv6 link-local addresses. + +cleanup_traceroute6_ext() +{ + cleanup_all_ns +} + +setup_traceroute6_ext() +{ + # Start clean + cleanup_traceroute6_ext + + setup_ns h1 r1 h2 + create_ns "$h1" + create_ns "$r1" + create_ns "$h2" + + # Setup N1 + connect_ns "$h1" eth1 - fe80::1/64 "$r1" eth1 - fe80::2/64 + # Setup N2 + connect_ns "$r1" eth2 - fe80::3/64 "$h2" eth2 - fe80::4/64 + + # Setup H1 + ip -n "$h1" address add 2001:db8:1::1/128 dev lo + ip -n "$h1" route add ::/0 nexthop via fe80::2 dev eth1 + + # Setup R1 + ip -n "$r1" address add 2001:db8:1::2/128 dev lo + ip -n "$r1" route add 2001:db8:1::1/128 nexthop via fe80::1 dev eth1 + ip -n "$r1" route add 2001:db8:1::3/128 nexthop via fe80::4 dev eth2 + + # Setup H2 + ip -n "$h2" address add 2001:db8:1::3/128 dev lo + ip -n "$h2" route add ::/0 nexthop via fe80::3 dev eth2 + + # Prime the network + ip netns exec "$h1" ping6 -c5 2001:db8:1::3 >/dev/null 2>&1 +} + +traceroute6_ext_iio_iif_test() +{ + local r1_ifindex h2_ifindex + local pkt_len=$1; shift + + # Test that incoming interface info is not appended by default. + run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep INC" + check_fail $? "Incoming interface info appended by default when should not" + + # Test that the extension is appended when enabled. + run_cmd "$r1" "bash -c \"echo 0x01 > /proc/sys/net/ipv6/icmp/errors_extension_mask\"" + check_err $? "Failed to enable incoming interface info extension on R1" + + run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep INC" + check_err $? "Incoming interface info not appended after enable" + + # Test that the extension is not appended when disabled. + run_cmd "$r1" "bash -c \"echo 0x00 > /proc/sys/net/ipv6/icmp/errors_extension_mask\"" + check_err $? "Failed to disable incoming interface info extension on R1" + + run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep INC" + check_fail $? "Incoming interface info appended after disable" + + # Test that the extension is sent correctly from both R1 and H2. + run_cmd "$r1" "sysctl -w net.ipv6.icmp.errors_extension_mask=0x01" + r1_ifindex=$(ip -n "$r1" -j link show dev eth1 | jq '.[]["ifindex"]') + run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep '<INC:$r1_ifindex,\"eth1\",mtu=1500>'" + check_err $? "Wrong incoming interface info reported from R1" + + run_cmd "$h2" "sysctl -w net.ipv6.icmp.errors_extension_mask=0x01" + h2_ifindex=$(ip -n "$h2" -j link show dev eth2 | jq '.[]["ifindex"]') + run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep '<INC:$h2_ifindex,\"eth2\",mtu=1500>'" + check_err $? "Wrong incoming interface info reported from H2" + + # Add a global address on the incoming interface of R1 and check that + # it is reported. + run_cmd "$r1" "ip address add 2001:db8:100::1/64 dev eth1 nodad" + run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep '<INC:$r1_ifindex,2001:db8:100::1,\"eth1\",mtu=1500>'" + check_err $? "Wrong incoming interface info reported from R1 after address addition" + run_cmd "$r1" "ip address del 2001:db8:100::1/64 dev eth1" + + # Change name and MTU and make sure the result is still correct. + run_cmd "$r1" "ip link set dev eth1 name eth1tag mtu 1501" + run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep '<INC:$r1_ifindex,\"eth1tag\",mtu=1501>'" + check_err $? "Wrong incoming interface info reported from R1 after name and MTU change" + run_cmd "$r1" "ip link set dev eth1tag name eth1 mtu 1500" + + run_cmd "$r1" "sysctl -w net.ipv6.icmp.errors_extension_mask=0x00" + run_cmd "$h2" "sysctl -w net.ipv6.icmp.errors_extension_mask=0x00" +} + +run_traceroute6_ext() +{ + # Need at least version 2.1.5 for RFC 5837 support. + if ! check_traceroute6_version 2.1.5; then + log_test_skip "traceroute6 too old, missing ICMP extensions support" + return + fi + + setup_traceroute6_ext + + RET=0 + + ## General ICMP extensions tests + + # Test that ICMP extensions are disabled by default. + run_cmd "$h1" "sysctl net.ipv6.icmp.errors_extension_mask | grep \"= 0$\"" + check_err $? "ICMP extensions are not disabled by default" + + # Test that unsupported values are rejected. Do not use "sysctl" as + # older versions do not return an error code upon failure. + run_cmd "$h1" "bash -c \"echo 0x80 > /proc/sys/net/ipv6/icmp/errors_extension_mask\"" + check_fail $? "Unsupported sysctl value was not rejected" + + ## Extension-specific tests + + # Incoming interface info test. Test with various packet sizes, + # including the default one. + traceroute6_ext_iio_iif_test + traceroute6_ext_iio_iif_test 127 + traceroute6_ext_iio_iif_test 128 + traceroute6_ext_iio_iif_test 129 + + log_test "IPv6 traceroute with ICMP extensions" + + cleanup_traceroute6_ext +} + +################################################################################ # traceroute test # # Verify that traceroute from H1 to H2 shows 1.0.3.1 and 1.0.1.1 when @@ -438,14 +607,157 @@ run_traceroute_vrf() } ################################################################################ +# traceroute with ICMP extensions test +# +# Verify that in this scenario +# +# ---- ---- ---- +# |H1|--------------------------|R1|--------------------------|H2| +# ---- N1 ---- N2 ---- +# +# ICMP extensions are correctly reported. The loopback interfaces on all the +# nodes are assigned global addresses and the interfaces connecting the nodes +# are assigned IPv6 link-local addresses. + +cleanup_traceroute_ext() +{ + cleanup_all_ns +} + +setup_traceroute_ext() +{ + # Start clean + cleanup_traceroute_ext + + setup_ns h1 r1 h2 + create_ns "$h1" + create_ns "$r1" + create_ns "$h2" + + # Setup N1 + connect_ns "$h1" eth1 - fe80::1/64 "$r1" eth1 - fe80::2/64 + # Setup N2 + connect_ns "$r1" eth2 - fe80::3/64 "$h2" eth2 - fe80::4/64 + + # Setup H1 + ip -n "$h1" address add 192.0.2.1/32 dev lo + ip -n "$h1" route add 0.0.0.0/0 nexthop via inet6 fe80::2 dev eth1 + + # Setup R1 + ip -n "$r1" address add 192.0.2.2/32 dev lo + ip -n "$r1" route add 192.0.2.1/32 nexthop via inet6 fe80::1 dev eth1 + ip -n "$r1" route add 192.0.2.3/32 nexthop via inet6 fe80::4 dev eth2 + + # Setup H2 + ip -n "$h2" address add 192.0.2.3/32 dev lo + ip -n "$h2" route add 0.0.0.0/0 nexthop via inet6 fe80::3 dev eth2 + + # Prime the network + ip netns exec "$h1" ping -c5 192.0.2.3 >/dev/null 2>&1 +} + +traceroute_ext_iio_iif_test() +{ + local r1_ifindex h2_ifindex + local pkt_len=$1; shift + + # Test that incoming interface info is not appended by default. + run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep INC" + check_fail $? "Incoming interface info appended by default when should not" + + # Test that the extension is appended when enabled. + run_cmd "$r1" "bash -c \"echo 0x01 > /proc/sys/net/ipv4/icmp_errors_extension_mask\"" + check_err $? "Failed to enable incoming interface info extension on R1" + + run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep INC" + check_err $? "Incoming interface info not appended after enable" + + # Test that the extension is not appended when disabled. + run_cmd "$r1" "bash -c \"echo 0x00 > /proc/sys/net/ipv4/icmp_errors_extension_mask\"" + check_err $? "Failed to disable incoming interface info extension on R1" + + run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep INC" + check_fail $? "Incoming interface info appended after disable" + + # Test that the extension is sent correctly from both R1 and H2. + run_cmd "$r1" "sysctl -w net.ipv4.icmp_errors_extension_mask=0x01" + r1_ifindex=$(ip -n "$r1" -j link show dev eth1 | jq '.[]["ifindex"]') + run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep '<INC:$r1_ifindex,\"eth1\",mtu=1500>'" + check_err $? "Wrong incoming interface info reported from R1" + + run_cmd "$h2" "sysctl -w net.ipv4.icmp_errors_extension_mask=0x01" + h2_ifindex=$(ip -n "$h2" -j link show dev eth2 | jq '.[]["ifindex"]') + run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep '<INC:$h2_ifindex,\"eth2\",mtu=1500>'" + check_err $? "Wrong incoming interface info reported from H2" + + # Add a global address on the incoming interface of R1 and check that + # it is reported. + run_cmd "$r1" "ip address add 198.51.100.1/24 dev eth1" + run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep '<INC:$r1_ifindex,198.51.100.1,\"eth1\",mtu=1500>'" + check_err $? "Wrong incoming interface info reported from R1 after address addition" + run_cmd "$r1" "ip address del 198.51.100.1/24 dev eth1" + + # Change name and MTU and make sure the result is still correct. + # Re-add the route towards H1 since it was deleted when we removed the + # last IPv4 address from eth1 on R1. + run_cmd "$r1" "ip route add 192.0.2.1/32 nexthop via inet6 fe80::1 dev eth1" + run_cmd "$r1" "ip link set dev eth1 name eth1tag mtu 1501" + run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep '<INC:$r1_ifindex,\"eth1tag\",mtu=1501>'" + check_err $? "Wrong incoming interface info reported from R1 after name and MTU change" + run_cmd "$r1" "ip link set dev eth1tag name eth1 mtu 1500" + + run_cmd "$r1" "sysctl -w net.ipv4.icmp_errors_extension_mask=0x00" + run_cmd "$h2" "sysctl -w net.ipv4.icmp_errors_extension_mask=0x00" +} + +run_traceroute_ext() +{ + # Need at least version 2.1.5 for RFC 5837 support. + if ! check_traceroute_version 2.1.5; then + log_test_skip "traceroute too old, missing ICMP extensions support" + return + fi + + setup_traceroute_ext + + RET=0 + + ## General ICMP extensions tests + + # Test that ICMP extensions are disabled by default. + run_cmd "$h1" "sysctl net.ipv4.icmp_errors_extension_mask | grep \"= 0$\"" + check_err $? "ICMP extensions are not disabled by default" + + # Test that unsupported values are rejected. Do not use "sysctl" as + # older versions do not return an error code upon failure. + run_cmd "$h1" "bash -c \"echo 0x80 > /proc/sys/net/ipv4/icmp_errors_extension_mask\"" + check_fail $? "Unsupported sysctl value was not rejected" + + ## Extension-specific tests + + # Incoming interface info test. Test with various packet sizes, + # including the default one. + traceroute_ext_iio_iif_test + traceroute_ext_iio_iif_test 127 + traceroute_ext_iio_iif_test 128 + traceroute_ext_iio_iif_test 129 + + log_test "IPv4 traceroute with ICMP extensions" + + cleanup_traceroute_ext +} + +################################################################################ # Run tests run_tests() { run_traceroute6 run_traceroute6_vrf + run_traceroute6_ext run_traceroute run_traceroute_vrf + run_traceroute_ext } ################################################################################ @@ -462,6 +774,7 @@ done require_command traceroute6 require_command traceroute +require_command jq run_tests |