diff options
| author | Kuniyuki Iwashima <kuniyu@google.com> | 2025-10-14 22:42:07 +0000 |
|---|---|---|
| committer | Jakub Kicinski <kuba@kernel.org> | 2025-10-17 16:06:52 -0700 |
| commit | 1c17f4373d4db1e1f0ebd3ddcd8e7a642927a826 (patch) | |
| tree | 5e0f389d4c438634f8ede1c10435739d34cea71c /net/ipv6 | |
| parent | 0746da01767e8a0df97ae5d031d852e932e03682 (diff) | |
ipv6: Move ipv6_fl_list from ipv6_pinfo to inet_sock.
In {tcp6,udp6,raw6}_sock, struct ipv6_pinfo is always placed at
the beginning of a new cache line because
1. __alignof__(struct tcp_sock) is 64 due to ____cacheline_aligned
of __cacheline_group_begin(tcp_sock_write_tx)
2. __alignof__(struct udp_sock) is 64 due to ____cacheline_aligned
of struct numa_drop_counters
3. in raw6_sock, struct numa_drop_counters is placed before
struct ipv6_pinfo
. struct ipv6_pinfo is 136 bytes, but the last cache line is
only used by ipv6_fl_list:
$ pahole -C ipv6_pinfo vmlinux
struct ipv6_pinfo {
...
/* --- cacheline 2 boundary (128 bytes) --- */
struct ipv6_fl_socklist * ipv6_fl_list; /* 128 8 */
/* size: 136, cachelines: 3, members: 23 */
Let's move ipv6_fl_list from struct ipv6_pinfo to struct inet_sock
to save a full cache line for {tcp6,udp6,raw6}_sock.
Now, struct ipv6_pinfo is 128 bytes, and {tcp6,udp6,raw6}_sock have
64 bytes less, while {tcp,udp,raw}_sock retain the same size.
Before:
# grep -E "^(RAW|UDP[^L\-]|TCP)" /proc/slabinfo | awk '{print $1, "\t", $4}'
RAWv6 1408
UDPv6 1472
TCPv6 2560
RAW 1152
UDP 1280
TCP 2368
After:
# grep -E "^(RAW|UDP[^L\-]|TCP)" /proc/slabinfo | awk '{print $1, "\t", $4}'
RAWv6 1344
UDPv6 1408
TCPv6 2496
RAW 1152
UDP 1280
TCP 2368
Also, ipv6_fl_list and inet_flags (SNDFLOW bit) are placed in the
same cache line.
$ pahole -C inet_sock vmlinux
...
/* --- cacheline 11 boundary (704 bytes) was 56 bytes ago --- */
struct ipv6_pinfo * pinet6; /* 760 8 */
/* --- cacheline 12 boundary (768 bytes) --- */
struct ipv6_fl_socklist * ipv6_fl_list; /* 768 8 */
unsigned long inet_flags; /* 776 8 */
Doc churn is due to the insufficient Type column (only 1 space short).
Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20251014224210.2964778-1-kuniyu@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net/ipv6')
| -rw-r--r-- | net/ipv6/ip6_flowlabel.c | 44 | ||||
| -rw-r--r-- | net/ipv6/tcp_ipv6.c | 13 |
2 files changed, 28 insertions, 29 deletions
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index a3ff575798dd..60d0be47a9f3 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -66,8 +66,8 @@ EXPORT_SYMBOL(ipv6_flowlabel_exclusive); fl != NULL; \ fl = rcu_dereference(fl->next)) -#define for_each_sk_fl_rcu(np, sfl) \ - for (sfl = rcu_dereference(np->ipv6_fl_list); \ +#define for_each_sk_fl_rcu(sk, sfl) \ + for (sfl = rcu_dereference(inet_sk(sk)->ipv6_fl_list); \ sfl != NULL; \ sfl = rcu_dereference(sfl->next)) @@ -262,12 +262,11 @@ static struct ip6_flowlabel *fl_intern(struct net *net, struct ip6_flowlabel *__fl6_sock_lookup(struct sock *sk, __be32 label) { struct ipv6_fl_socklist *sfl; - struct ipv6_pinfo *np = inet6_sk(sk); label &= IPV6_FLOWLABEL_MASK; rcu_read_lock(); - for_each_sk_fl_rcu(np, sfl) { + for_each_sk_fl_rcu(sk, sfl) { struct ip6_flowlabel *fl = sfl->fl; if (fl->label == label && atomic_inc_not_zero(&fl->users)) { @@ -283,16 +282,16 @@ EXPORT_SYMBOL_GPL(__fl6_sock_lookup); void fl6_free_socklist(struct sock *sk) { - struct ipv6_pinfo *np = inet6_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct ipv6_fl_socklist *sfl; - if (!rcu_access_pointer(np->ipv6_fl_list)) + if (!rcu_access_pointer(inet->ipv6_fl_list)) return; spin_lock_bh(&ip6_sk_fl_lock); - while ((sfl = rcu_dereference_protected(np->ipv6_fl_list, + while ((sfl = rcu_dereference_protected(inet->ipv6_fl_list, lockdep_is_held(&ip6_sk_fl_lock))) != NULL) { - np->ipv6_fl_list = sfl->next; + inet->ipv6_fl_list = sfl->next; spin_unlock_bh(&ip6_sk_fl_lock); fl_release(sfl->fl); @@ -470,16 +469,15 @@ done: static int mem_check(struct sock *sk) { - struct ipv6_pinfo *np = inet6_sk(sk); - struct ipv6_fl_socklist *sfl; int room = FL_MAX_SIZE - atomic_read(&fl_size); + struct ipv6_fl_socklist *sfl; int count = 0; if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK) return 0; rcu_read_lock(); - for_each_sk_fl_rcu(np, sfl) + for_each_sk_fl_rcu(sk, sfl) count++; rcu_read_unlock(); @@ -492,13 +490,15 @@ static int mem_check(struct sock *sk) return 0; } -static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl, - struct ip6_flowlabel *fl) +static inline void fl_link(struct sock *sk, struct ipv6_fl_socklist *sfl, + struct ip6_flowlabel *fl) { + struct inet_sock *inet = inet_sk(sk); + spin_lock_bh(&ip6_sk_fl_lock); sfl->fl = fl; - sfl->next = np->ipv6_fl_list; - rcu_assign_pointer(np->ipv6_fl_list, sfl); + sfl->next = inet->ipv6_fl_list; + rcu_assign_pointer(inet->ipv6_fl_list, sfl); spin_unlock_bh(&ip6_sk_fl_lock); } @@ -520,7 +520,7 @@ int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq, rcu_read_lock(); - for_each_sk_fl_rcu(np, sfl) { + for_each_sk_fl_rcu(sk, sfl) { if (sfl->fl->label == (np->flow_label & IPV6_FLOWLABEL_MASK)) { spin_lock_bh(&ip6_fl_lock); freq->flr_label = sfl->fl->label; @@ -559,7 +559,7 @@ static int ipv6_flowlabel_put(struct sock *sk, struct in6_flowlabel_req *freq) } spin_lock_bh(&ip6_sk_fl_lock); - for (sflp = &np->ipv6_fl_list; + for (sflp = &inet_sk(sk)->ipv6_fl_list; (sfl = socklist_dereference(*sflp)) != NULL; sflp = &sfl->next) { if (sfl->fl->label == freq->flr_label) @@ -579,13 +579,12 @@ found: static int ipv6_flowlabel_renew(struct sock *sk, struct in6_flowlabel_req *freq) { - struct ipv6_pinfo *np = inet6_sk(sk); struct net *net = sock_net(sk); struct ipv6_fl_socklist *sfl; int err; rcu_read_lock(); - for_each_sk_fl_rcu(np, sfl) { + for_each_sk_fl_rcu(sk, sfl) { if (sfl->fl->label == freq->flr_label) { err = fl6_renew(sfl->fl, freq->flr_linger, freq->flr_expires); @@ -614,7 +613,6 @@ static int ipv6_flowlabel_get(struct sock *sk, struct in6_flowlabel_req *freq, { struct ipv6_fl_socklist *sfl, *sfl1 = NULL; struct ip6_flowlabel *fl, *fl1 = NULL; - struct ipv6_pinfo *np = inet6_sk(sk); struct net *net = sock_net(sk); int err; @@ -645,7 +643,7 @@ static int ipv6_flowlabel_get(struct sock *sk, struct in6_flowlabel_req *freq, if (freq->flr_label) { err = -EEXIST; rcu_read_lock(); - for_each_sk_fl_rcu(np, sfl) { + for_each_sk_fl_rcu(sk, sfl) { if (sfl->fl->label == freq->flr_label) { if (freq->flr_flags & IPV6_FL_F_EXCL) { rcu_read_unlock(); @@ -682,7 +680,7 @@ recheck: fl1->linger = fl->linger; if ((long)(fl->expires - fl1->expires) > 0) fl1->expires = fl->expires; - fl_link(np, sfl1, fl1); + fl_link(sk, sfl1, fl1); fl_free(fl); return 0; @@ -716,7 +714,7 @@ release: } } - fl_link(np, sfl1, fl); + fl_link(sk, sfl1, fl); return 0; done: fl_free(fl); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 59c4977a811a..6197dd4e6261 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1386,7 +1386,9 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * if (!newsk) return NULL; - inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); + newinet = inet_sk(newsk); + newinet->pinet6 = tcp_inet6_sk(newsk); + newinet->ipv6_fl_list = NULL; newnp = tcp_inet6_sk(newsk); newtp = tcp_sk(newsk); @@ -1405,7 +1407,6 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * newnp->ipv6_mc_list = NULL; newnp->ipv6_ac_list = NULL; - newnp->ipv6_fl_list = NULL; newnp->pktoptions = NULL; newnp->opt = NULL; newnp->mcast_oif = inet_iif(skb); @@ -1453,10 +1454,12 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * newsk->sk_gso_type = SKB_GSO_TCPV6; inet6_sk_rx_dst_set(newsk, skb); - inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); + newinet = inet_sk(newsk); + newinet->pinet6 = tcp_inet6_sk(newsk); + newinet->ipv6_fl_list = NULL; + newinet->inet_opt = NULL; newtp = tcp_sk(newsk); - newinet = inet_sk(newsk); newnp = tcp_inet6_sk(newsk); memcpy(newnp, np, sizeof(struct ipv6_pinfo)); @@ -1469,10 +1472,8 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * First: no IPv4 options. */ - newinet->inet_opt = NULL; newnp->ipv6_mc_list = NULL; newnp->ipv6_ac_list = NULL; - newnp->ipv6_fl_list = NULL; /* Clone RX bits */ newnp->rxopt.all = np->rxopt.all; |