diff options
| author | Jakub Kicinski <kuba@kernel.org> | 2025-05-22 09:15:07 -0700 |
|---|---|---|
| committer | Jakub Kicinski <kuba@kernel.org> | 2025-05-22 09:15:07 -0700 |
| commit | 3ccf3f441f93f18edbd482d2ef606e5153c5542c (patch) | |
| tree | e5a890aee3da6824345fee719e443689f815e26e /drivers/infiniband | |
| parent | db807e5ef8eea6948bb4993466623046f52d3056 (diff) | |
| parent | 8f7b00307bf14676b7e7f0210110a36aa0ff3e93 (diff) | |
Merge branch 'net-mlx5-convert-mlx5-to-netdev-instance-locking'
Tariq Toukan says:
====================
net/mlx5: Convert mlx5 to netdev instance locking
Cosmin Ratiu says:
mlx5 manages multiple netdevices, from basic Ethernet to Infiniband
netdevs. This patch series converts the driver to use netdev instance
locking for everything in preparation for TCP devmem Zero Copy.
Because mlx5 is tightly coupled with the ipoib driver, a series of
changes first happen in ipoib to allow it to work with mlx5 netdevs that
use instance locking:
IB/IPoIB: Enqueue separate work_structs for each flushed interface
IB/IPoIB: Replace vlan_rwsem with the netdev instance lock
IB/IPoIB: Allow using netdevs that require the instance lock
A small patch then avoids dropping RTNL during firmware update:
net/mlx5e: Don't drop RTNL during firmware flash
The main patch then converts all mlx5 netdevs to use instance locking:
net/mlx5e: Convert mlx5 netdevs to instance locking
====================
Link: https://patch.msgid.link/1747829342-1018757-1-git-send-email-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'drivers/infiniband')
| -rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib.h | 13 | ||||
| -rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_ib.c | 65 | ||||
| -rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_main.c | 127 | ||||
| -rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 8 | ||||
| -rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_vlan.c | 19 |
5 files changed, 150 insertions, 82 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index abe0522b7df4..91f866e3fb8b 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -329,14 +329,6 @@ struct ipoib_dev_priv { unsigned long flags; - /* - * This protects access to the child_intfs list. - * To READ from child_intfs the RTNL or vlan_rwsem read side must be - * held. To WRITE RTNL and the vlan_rwsem write side must be held (in - * that order) This lock exists because we have a few contexts where - * we need the child_intfs, but do not want to grab the RTNL. - */ - struct rw_semaphore vlan_rwsem; struct mutex mcast_mutex; struct rb_root path_tree; @@ -399,6 +391,9 @@ struct ipoib_dev_priv { struct ib_event_handler event_handler; struct net_device *parent; + /* 'child_intfs' and 'list' membership of all child devices are + * protected by the netdev instance lock of 'dev'. + */ struct list_head child_intfs; struct list_head list; int child_type; @@ -512,6 +507,8 @@ int ipoib_intf_init(struct ib_device *hca, u32 port, const char *format, void ipoib_ib_dev_flush_light(struct work_struct *work); void ipoib_ib_dev_flush_normal(struct work_struct *work); void ipoib_ib_dev_flush_heavy(struct work_struct *work); +void ipoib_queue_work(struct ipoib_dev_priv *priv, + enum ipoib_flush_level level); void ipoib_ib_tx_timeout_work(struct work_struct *work); void ipoib_ib_dev_cleanup(struct net_device *dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 5cde275daa94..10b0dbda6cd5 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -40,6 +40,7 @@ #include <linux/ip.h> #include <linux/tcp.h> +#include <net/netdev_lock.h> #include <rdma/ib_cache.h> #include "ipoib.h" @@ -781,16 +782,20 @@ static void ipoib_napi_enable(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); - napi_enable(&priv->recv_napi); - napi_enable(&priv->send_napi); + netdev_lock_ops_to_full(dev); + napi_enable_locked(&priv->recv_napi); + napi_enable_locked(&priv->send_napi); + netdev_unlock_full_to_ops(dev); } static void ipoib_napi_disable(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); - napi_disable(&priv->recv_napi); - napi_disable(&priv->send_napi); + netdev_lock_ops_to_full(dev); + napi_disable_locked(&priv->recv_napi); + napi_disable_locked(&priv->send_napi); + netdev_unlock_full_to_ops(dev); } int ipoib_ib_dev_stop_default(struct net_device *dev) @@ -1172,24 +1177,11 @@ out: } static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, - enum ipoib_flush_level level, - int nesting) + enum ipoib_flush_level level) { - struct ipoib_dev_priv *cpriv; struct net_device *dev = priv->dev; int result; - down_read_nested(&priv->vlan_rwsem, nesting); - - /* - * Flush any child interfaces too -- they might be up even if - * the parent is down. - */ - list_for_each_entry(cpriv, &priv->child_intfs, list) - __ipoib_ib_dev_flush(cpriv, level, nesting + 1); - - up_read(&priv->vlan_rwsem); - if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) && level != IPOIB_FLUSH_HEAVY) { /* Make sure the dev_addr is set even if not flushing */ @@ -1253,10 +1245,14 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, ipoib_ib_dev_down(dev); if (level == IPOIB_FLUSH_HEAVY) { + netdev_lock_ops(dev); if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) ipoib_ib_dev_stop(dev); - if (ipoib_ib_dev_open(dev)) + result = ipoib_ib_dev_open(dev); + netdev_unlock_ops(dev); + + if (result) return; if (netif_queue_stopped(dev)) @@ -1280,7 +1276,7 @@ void ipoib_ib_dev_flush_light(struct work_struct *work) struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, flush_light); - __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_LIGHT, 0); + __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_LIGHT); } void ipoib_ib_dev_flush_normal(struct work_struct *work) @@ -1288,7 +1284,7 @@ void ipoib_ib_dev_flush_normal(struct work_struct *work) struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, flush_normal); - __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_NORMAL, 0); + __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_NORMAL); } void ipoib_ib_dev_flush_heavy(struct work_struct *work) @@ -1297,10 +1293,35 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work) container_of(work, struct ipoib_dev_priv, flush_heavy); rtnl_lock(); - __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_HEAVY, 0); + __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_HEAVY); rtnl_unlock(); } +void ipoib_queue_work(struct ipoib_dev_priv *priv, + enum ipoib_flush_level level) +{ + if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { + struct ipoib_dev_priv *cpriv; + + netdev_lock(priv->dev); + list_for_each_entry(cpriv, &priv->child_intfs, list) + ipoib_queue_work(cpriv, level); + netdev_unlock(priv->dev); + } + + switch (level) { + case IPOIB_FLUSH_LIGHT: + queue_work(ipoib_workqueue, &priv->flush_light); + break; + case IPOIB_FLUSH_NORMAL: + queue_work(ipoib_workqueue, &priv->flush_normal); + break; + case IPOIB_FLUSH_HEAVY: + queue_work(ipoib_workqueue, &priv->flush_heavy); + break; + } +} + void ipoib_ib_dev_cleanup(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 3b463db8ce39..f2f5465f2a90 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -49,6 +49,7 @@ #include <linux/jhash.h> #include <net/arp.h> #include <net/addrconf.h> +#include <net/netdev_lock.h> #include <net/pkt_sched.h> #include <linux/inetdevice.h> #include <rdma/ib_cache.h> @@ -132,6 +133,52 @@ static int ipoib_netdev_event(struct notifier_block *this, } #endif +struct ipoib_ifupdown_work { + struct work_struct work; + struct net_device *dev; + netdevice_tracker dev_tracker; + bool up; +}; + +static void ipoib_ifupdown_task(struct work_struct *work) +{ + struct ipoib_ifupdown_work *pwork = + container_of(work, struct ipoib_ifupdown_work, work); + struct net_device *dev = pwork->dev; + unsigned int flags; + + rtnl_lock(); + flags = dev->flags; + if (pwork->up) + flags |= IFF_UP; + else + flags &= ~IFF_UP; + + if (dev->flags != flags) + dev_change_flags(dev, flags, NULL); + rtnl_unlock(); + netdev_put(dev, &pwork->dev_tracker); + kfree(pwork); +} + +static void ipoib_schedule_ifupdown_task(struct net_device *dev, bool up) +{ + struct ipoib_ifupdown_work *work; + + if ((up && (dev->flags & IFF_UP)) || + (!up && !(dev->flags & IFF_UP))) + return; + + work = kmalloc(sizeof(*work), GFP_KERNEL); + if (!work) + return; + work->dev = dev; + netdev_hold(dev, &work->dev_tracker, GFP_KERNEL); + work->up = up; + INIT_WORK(&work->work, ipoib_ifupdown_task); + queue_work(ipoib_workqueue, &work->work); +} + int ipoib_open(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); @@ -154,17 +201,10 @@ int ipoib_open(struct net_device *dev) struct ipoib_dev_priv *cpriv; /* Bring up any child interfaces too */ - down_read(&priv->vlan_rwsem); - list_for_each_entry(cpriv, &priv->child_intfs, list) { - int flags; - - flags = cpriv->dev->flags; - if (flags & IFF_UP) - continue; - - dev_change_flags(cpriv->dev, flags | IFF_UP, NULL); - } - up_read(&priv->vlan_rwsem); + netdev_lock_ops_to_full(dev); + list_for_each_entry(cpriv, &priv->child_intfs, list) + ipoib_schedule_ifupdown_task(cpriv->dev, true); + netdev_unlock_full_to_ops(dev); } else if (priv->parent) { struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent); @@ -199,17 +239,10 @@ static int ipoib_stop(struct net_device *dev) struct ipoib_dev_priv *cpriv; /* Bring down any child interfaces too */ - down_read(&priv->vlan_rwsem); - list_for_each_entry(cpriv, &priv->child_intfs, list) { - int flags; - - flags = cpriv->dev->flags; - if (!(flags & IFF_UP)) - continue; - - dev_change_flags(cpriv->dev, flags & ~IFF_UP, NULL); - } - up_read(&priv->vlan_rwsem); + netdev_lock_ops_to_full(dev); + list_for_each_entry(cpriv, &priv->child_intfs, list) + ipoib_schedule_ifupdown_task(cpriv->dev, false); + netdev_unlock_full_to_ops(dev); } return 0; @@ -426,17 +459,20 @@ static int ipoib_match_gid_pkey_addr(struct ipoib_dev_priv *priv, } } + if (test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) + return matches; + /* Check child interfaces */ - down_read_nested(&priv->vlan_rwsem, nesting); + netdev_lock(priv->dev); list_for_each_entry(child_priv, &priv->child_intfs, list) { matches += ipoib_match_gid_pkey_addr(child_priv, gid, - pkey_index, addr, - nesting + 1, - found_net_dev); + pkey_index, addr, + nesting + 1, + found_net_dev); if (matches > 1) break; } - up_read(&priv->vlan_rwsem); + netdev_unlock(priv->dev); return matches; } @@ -531,9 +567,11 @@ int ipoib_set_mode(struct net_device *dev, const char *buf) set_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); ipoib_warn(priv, "enabling connected mode " "will cause multicast packet drops\n"); + netdev_lock_ops(dev); netdev_update_features(dev); - dev_set_mtu(dev, ipoib_cm_max_mtu(dev)); + netif_set_mtu(dev, ipoib_cm_max_mtu(dev)); netif_set_real_num_tx_queues(dev, 1); + netdev_unlock_ops(dev); rtnl_unlock(); priv->tx_wr.wr.send_flags &= ~IB_SEND_IP_CSUM; @@ -543,9 +581,11 @@ int ipoib_set_mode(struct net_device *dev, const char *buf) if (!strcmp(buf, "datagram\n")) { clear_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); + netdev_lock_ops(dev); netdev_update_features(dev); - dev_set_mtu(dev, min(priv->mcast_mtu, dev->mtu)); + netif_set_mtu(dev, min(priv->mcast_mtu, dev->mtu)); netif_set_real_num_tx_queues(dev, dev->num_tx_queues); + netdev_unlock_ops(dev); rtnl_unlock(); ipoib_flush_paths(dev); return (!rtnl_trylock()) ? -EBUSY : 0; @@ -1212,6 +1252,7 @@ void ipoib_ib_tx_timeout_work(struct work_struct *work) int err; rtnl_lock(); + netdev_lock_ops(priv->dev); if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) goto unlock; @@ -1226,6 +1267,7 @@ void ipoib_ib_tx_timeout_work(struct work_struct *work) netif_tx_wake_all_queues(priv->dev); unlock: + netdev_unlock_ops(priv->dev); rtnl_unlock(); } @@ -1992,9 +2034,9 @@ static int ipoib_ndo_init(struct net_device *ndev) dev_hold(priv->parent); - down_write(&ppriv->vlan_rwsem); + netdev_lock(priv->parent); list_add_tail(&priv->list, &ppriv->child_intfs); - up_write(&ppriv->vlan_rwsem); + netdev_unlock(priv->parent); } return 0; @@ -2004,8 +2046,6 @@ static void ipoib_ndo_uninit(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); - ASSERT_RTNL(); - /* * ipoib_remove_one guarantees the children are removed before the * parent, and that is the only place where a parent can be removed. @@ -2015,9 +2055,9 @@ static void ipoib_ndo_uninit(struct net_device *dev) if (priv->parent) { struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent); - down_write(&ppriv->vlan_rwsem); + netdev_lock(ppriv->dev); list_del(&priv->list); - up_write(&ppriv->vlan_rwsem); + netdev_unlock(ppriv->dev); } ipoib_neigh_hash_uninit(dev); @@ -2167,7 +2207,6 @@ static void ipoib_build_priv(struct net_device *dev) priv->dev = dev; spin_lock_init(&priv->lock); - init_rwsem(&priv->vlan_rwsem); mutex_init(&priv->mcast_mutex); INIT_LIST_HEAD(&priv->path_list); @@ -2372,10 +2411,10 @@ static void set_base_guid(struct ipoib_dev_priv *priv, union ib_gid *gid) netif_addr_unlock_bh(netdev); if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { - down_read(&priv->vlan_rwsem); + netdev_lock_ops_to_full(priv->dev); list_for_each_entry(child_priv, &priv->child_intfs, list) set_base_guid(child_priv, gid); - up_read(&priv->vlan_rwsem); + netdev_unlock_full_to_ops(priv->dev); } } @@ -2415,6 +2454,14 @@ static int ipoib_set_mac(struct net_device *dev, void *addr) set_base_guid(priv, (union ib_gid *)(ss->__data + 4)); + if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { + struct ipoib_dev_priv *cpriv; + + netdev_lock_ops_to_full(dev); + list_for_each_entry(cpriv, &priv->child_intfs, list) + queue_work(ipoib_workqueue, &cpriv->flush_light); + netdev_unlock_full_to_ops(dev); + } queue_work(ipoib_workqueue, &priv->flush_light); return 0; @@ -2526,7 +2573,7 @@ static struct net_device *ipoib_add_port(const char *format, ib_register_event_handler(&priv->event_handler); /* call event handler to ensure pkey in sync */ - queue_work(ipoib_workqueue, &priv->flush_heavy); + ipoib_queue_work(priv, IPOIB_FLUSH_HEAVY); ndev->rtnl_link_ops = ipoib_get_link_ops(); @@ -2624,9 +2671,11 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data) rtnl_lock(); + netdev_lock(priv->dev); list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) unregister_netdevice_queue(cpriv->dev, &head); + netdev_unlock(priv->dev); unregister_netdevice_queue(priv->dev, &head); unregister_netdevice_many(&head); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 368e5d77416d..86983080d28b 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -280,15 +280,15 @@ void ipoib_event(struct ib_event_handler *handler, dev_name(&record->device->dev), record->element.port_num); if (record->event == IB_EVENT_CLIENT_REREGISTER) { - queue_work(ipoib_workqueue, &priv->flush_light); + ipoib_queue_work(priv, IPOIB_FLUSH_LIGHT); } else if (record->event == IB_EVENT_PORT_ERR || record->event == IB_EVENT_PORT_ACTIVE || record->event == IB_EVENT_LID_CHANGE) { - queue_work(ipoib_workqueue, &priv->flush_normal); + ipoib_queue_work(priv, IPOIB_FLUSH_NORMAL); } else if (record->event == IB_EVENT_PKEY_CHANGE) { - queue_work(ipoib_workqueue, &priv->flush_heavy); + ipoib_queue_work(priv, IPOIB_FLUSH_HEAVY); } else if (record->event == IB_EVENT_GID_CHANGE && !test_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags)) { - queue_work(ipoib_workqueue, &priv->flush_light); + ipoib_queue_work(priv, IPOIB_FLUSH_LIGHT); } } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 562df2b3ef18..243e8f555eca 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -53,8 +53,7 @@ static bool is_child_unique(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv) { struct ipoib_dev_priv *tpriv; - - ASSERT_RTNL(); + bool result = true; /* * Since the legacy sysfs interface uses pkey for deletion it cannot @@ -73,13 +72,17 @@ static bool is_child_unique(struct ipoib_dev_priv *ppriv, if (ppriv->pkey == priv->pkey) return false; + netdev_lock(ppriv->dev); list_for_each_entry(tpriv, &ppriv->child_intfs, list) { if (tpriv->pkey == priv->pkey && - tpriv->child_type == IPOIB_LEGACY_CHILD) - return false; + tpriv->child_type == IPOIB_LEGACY_CHILD) { + result = false; + break; + } } + netdev_unlock(ppriv->dev); - return true; + return result; } /* @@ -98,8 +101,6 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv, int result; struct rdma_netdev *rn = netdev_priv(ndev); - ASSERT_RTNL(); - /* * We do not need to touch priv if register_netdevice fails, so just * always use this flow. @@ -267,6 +268,7 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey) ppriv = ipoib_priv(pdev); rc = -ENODEV; + netdev_lock(ppriv->dev); list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) { if (priv->pkey == pkey && priv->child_type == IPOIB_LEGACY_CHILD) { @@ -278,9 +280,7 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey) goto out; } - down_write(&ppriv->vlan_rwsem); list_del_init(&priv->list); - up_write(&ppriv->vlan_rwsem); work->dev = priv->dev; INIT_WORK(&work->work, ipoib_vlan_delete_task); queue_work(ipoib_workqueue, &work->work); @@ -291,6 +291,7 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey) } out: + netdev_unlock(ppriv->dev); rtnl_unlock(); return rc; |