summaryrefslogtreecommitdiff
path: root/net/ipv6
diff options
context:
space:
mode:
authorKuniyuki Iwashima <kuniyu@google.com>2025-10-14 22:42:07 +0000
committerJakub Kicinski <kuba@kernel.org>2025-10-17 16:06:52 -0700
commit1c17f4373d4db1e1f0ebd3ddcd8e7a642927a826 (patch)
tree5e0f389d4c438634f8ede1c10435739d34cea71c /net/ipv6
parent0746da01767e8a0df97ae5d031d852e932e03682 (diff)
ipv6: Move ipv6_fl_list from ipv6_pinfo to inet_sock.
In {tcp6,udp6,raw6}_sock, struct ipv6_pinfo is always placed at the beginning of a new cache line because 1. __alignof__(struct tcp_sock) is 64 due to ____cacheline_aligned of __cacheline_group_begin(tcp_sock_write_tx) 2. __alignof__(struct udp_sock) is 64 due to ____cacheline_aligned of struct numa_drop_counters 3. in raw6_sock, struct numa_drop_counters is placed before struct ipv6_pinfo . struct ipv6_pinfo is 136 bytes, but the last cache line is only used by ipv6_fl_list: $ pahole -C ipv6_pinfo vmlinux struct ipv6_pinfo { ... /* --- cacheline 2 boundary (128 bytes) --- */ struct ipv6_fl_socklist * ipv6_fl_list; /* 128 8 */ /* size: 136, cachelines: 3, members: 23 */ Let's move ipv6_fl_list from struct ipv6_pinfo to struct inet_sock to save a full cache line for {tcp6,udp6,raw6}_sock. Now, struct ipv6_pinfo is 128 bytes, and {tcp6,udp6,raw6}_sock have 64 bytes less, while {tcp,udp,raw}_sock retain the same size. Before: # grep -E "^(RAW|UDP[^L\-]|TCP)" /proc/slabinfo | awk '{print $1, "\t", $4}' RAWv6 1408 UDPv6 1472 TCPv6 2560 RAW 1152 UDP 1280 TCP 2368 After: # grep -E "^(RAW|UDP[^L\-]|TCP)" /proc/slabinfo | awk '{print $1, "\t", $4}' RAWv6 1344 UDPv6 1408 TCPv6 2496 RAW 1152 UDP 1280 TCP 2368 Also, ipv6_fl_list and inet_flags (SNDFLOW bit) are placed in the same cache line. $ pahole -C inet_sock vmlinux ... /* --- cacheline 11 boundary (704 bytes) was 56 bytes ago --- */ struct ipv6_pinfo * pinet6; /* 760 8 */ /* --- cacheline 12 boundary (768 bytes) --- */ struct ipv6_fl_socklist * ipv6_fl_list; /* 768 8 */ unsigned long inet_flags; /* 776 8 */ Doc churn is due to the insufficient Type column (only 1 space short). Suggested-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com> Link: https://patch.msgid.link/20251014224210.2964778-1-kuniyu@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/ip6_flowlabel.c44
-rw-r--r--net/ipv6/tcp_ipv6.c13
2 files changed, 28 insertions, 29 deletions
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index a3ff575798dd..60d0be47a9f3 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -66,8 +66,8 @@ EXPORT_SYMBOL(ipv6_flowlabel_exclusive);
fl != NULL; \
fl = rcu_dereference(fl->next))
-#define for_each_sk_fl_rcu(np, sfl) \
- for (sfl = rcu_dereference(np->ipv6_fl_list); \
+#define for_each_sk_fl_rcu(sk, sfl) \
+ for (sfl = rcu_dereference(inet_sk(sk)->ipv6_fl_list); \
sfl != NULL; \
sfl = rcu_dereference(sfl->next))
@@ -262,12 +262,11 @@ static struct ip6_flowlabel *fl_intern(struct net *net,
struct ip6_flowlabel *__fl6_sock_lookup(struct sock *sk, __be32 label)
{
struct ipv6_fl_socklist *sfl;
- struct ipv6_pinfo *np = inet6_sk(sk);
label &= IPV6_FLOWLABEL_MASK;
rcu_read_lock();
- for_each_sk_fl_rcu(np, sfl) {
+ for_each_sk_fl_rcu(sk, sfl) {
struct ip6_flowlabel *fl = sfl->fl;
if (fl->label == label && atomic_inc_not_zero(&fl->users)) {
@@ -283,16 +282,16 @@ EXPORT_SYMBOL_GPL(__fl6_sock_lookup);
void fl6_free_socklist(struct sock *sk)
{
- struct ipv6_pinfo *np = inet6_sk(sk);
+ struct inet_sock *inet = inet_sk(sk);
struct ipv6_fl_socklist *sfl;
- if (!rcu_access_pointer(np->ipv6_fl_list))
+ if (!rcu_access_pointer(inet->ipv6_fl_list))
return;
spin_lock_bh(&ip6_sk_fl_lock);
- while ((sfl = rcu_dereference_protected(np->ipv6_fl_list,
+ while ((sfl = rcu_dereference_protected(inet->ipv6_fl_list,
lockdep_is_held(&ip6_sk_fl_lock))) != NULL) {
- np->ipv6_fl_list = sfl->next;
+ inet->ipv6_fl_list = sfl->next;
spin_unlock_bh(&ip6_sk_fl_lock);
fl_release(sfl->fl);
@@ -470,16 +469,15 @@ done:
static int mem_check(struct sock *sk)
{
- struct ipv6_pinfo *np = inet6_sk(sk);
- struct ipv6_fl_socklist *sfl;
int room = FL_MAX_SIZE - atomic_read(&fl_size);
+ struct ipv6_fl_socklist *sfl;
int count = 0;
if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK)
return 0;
rcu_read_lock();
- for_each_sk_fl_rcu(np, sfl)
+ for_each_sk_fl_rcu(sk, sfl)
count++;
rcu_read_unlock();
@@ -492,13 +490,15 @@ static int mem_check(struct sock *sk)
return 0;
}
-static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl,
- struct ip6_flowlabel *fl)
+static inline void fl_link(struct sock *sk, struct ipv6_fl_socklist *sfl,
+ struct ip6_flowlabel *fl)
{
+ struct inet_sock *inet = inet_sk(sk);
+
spin_lock_bh(&ip6_sk_fl_lock);
sfl->fl = fl;
- sfl->next = np->ipv6_fl_list;
- rcu_assign_pointer(np->ipv6_fl_list, sfl);
+ sfl->next = inet->ipv6_fl_list;
+ rcu_assign_pointer(inet->ipv6_fl_list, sfl);
spin_unlock_bh(&ip6_sk_fl_lock);
}
@@ -520,7 +520,7 @@ int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq,
rcu_read_lock();
- for_each_sk_fl_rcu(np, sfl) {
+ for_each_sk_fl_rcu(sk, sfl) {
if (sfl->fl->label == (np->flow_label & IPV6_FLOWLABEL_MASK)) {
spin_lock_bh(&ip6_fl_lock);
freq->flr_label = sfl->fl->label;
@@ -559,7 +559,7 @@ static int ipv6_flowlabel_put(struct sock *sk, struct in6_flowlabel_req *freq)
}
spin_lock_bh(&ip6_sk_fl_lock);
- for (sflp = &np->ipv6_fl_list;
+ for (sflp = &inet_sk(sk)->ipv6_fl_list;
(sfl = socklist_dereference(*sflp)) != NULL;
sflp = &sfl->next) {
if (sfl->fl->label == freq->flr_label)
@@ -579,13 +579,12 @@ found:
static int ipv6_flowlabel_renew(struct sock *sk, struct in6_flowlabel_req *freq)
{
- struct ipv6_pinfo *np = inet6_sk(sk);
struct net *net = sock_net(sk);
struct ipv6_fl_socklist *sfl;
int err;
rcu_read_lock();
- for_each_sk_fl_rcu(np, sfl) {
+ for_each_sk_fl_rcu(sk, sfl) {
if (sfl->fl->label == freq->flr_label) {
err = fl6_renew(sfl->fl, freq->flr_linger,
freq->flr_expires);
@@ -614,7 +613,6 @@ static int ipv6_flowlabel_get(struct sock *sk, struct in6_flowlabel_req *freq,
{
struct ipv6_fl_socklist *sfl, *sfl1 = NULL;
struct ip6_flowlabel *fl, *fl1 = NULL;
- struct ipv6_pinfo *np = inet6_sk(sk);
struct net *net = sock_net(sk);
int err;
@@ -645,7 +643,7 @@ static int ipv6_flowlabel_get(struct sock *sk, struct in6_flowlabel_req *freq,
if (freq->flr_label) {
err = -EEXIST;
rcu_read_lock();
- for_each_sk_fl_rcu(np, sfl) {
+ for_each_sk_fl_rcu(sk, sfl) {
if (sfl->fl->label == freq->flr_label) {
if (freq->flr_flags & IPV6_FL_F_EXCL) {
rcu_read_unlock();
@@ -682,7 +680,7 @@ recheck:
fl1->linger = fl->linger;
if ((long)(fl->expires - fl1->expires) > 0)
fl1->expires = fl->expires;
- fl_link(np, sfl1, fl1);
+ fl_link(sk, sfl1, fl1);
fl_free(fl);
return 0;
@@ -716,7 +714,7 @@ release:
}
}
- fl_link(np, sfl1, fl);
+ fl_link(sk, sfl1, fl);
return 0;
done:
fl_free(fl);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 59c4977a811a..6197dd4e6261 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1386,7 +1386,9 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
if (!newsk)
return NULL;
- inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
+ newinet = inet_sk(newsk);
+ newinet->pinet6 = tcp_inet6_sk(newsk);
+ newinet->ipv6_fl_list = NULL;
newnp = tcp_inet6_sk(newsk);
newtp = tcp_sk(newsk);
@@ -1405,7 +1407,6 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
newnp->ipv6_mc_list = NULL;
newnp->ipv6_ac_list = NULL;
- newnp->ipv6_fl_list = NULL;
newnp->pktoptions = NULL;
newnp->opt = NULL;
newnp->mcast_oif = inet_iif(skb);
@@ -1453,10 +1454,12 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
newsk->sk_gso_type = SKB_GSO_TCPV6;
inet6_sk_rx_dst_set(newsk, skb);
- inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
+ newinet = inet_sk(newsk);
+ newinet->pinet6 = tcp_inet6_sk(newsk);
+ newinet->ipv6_fl_list = NULL;
+ newinet->inet_opt = NULL;
newtp = tcp_sk(newsk);
- newinet = inet_sk(newsk);
newnp = tcp_inet6_sk(newsk);
memcpy(newnp, np, sizeof(struct ipv6_pinfo));
@@ -1469,10 +1472,8 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
First: no IPv4 options.
*/
- newinet->inet_opt = NULL;
newnp->ipv6_mc_list = NULL;
newnp->ipv6_ac_list = NULL;
- newnp->ipv6_fl_list = NULL;
/* Clone RX bits */
newnp->rxopt.all = np->rxopt.all;