diff options
| author | Michal Kubiak <michal.kubiak@intel.com> | 2025-09-25 11:22:53 +0200 |
|---|---|---|
| committer | Tony Nguyen <anthony.l.nguyen@intel.com> | 2025-10-29 13:55:16 -0700 |
| commit | 93f53db9f9dc4a16b40ecd18e6d338ad57e4b670 (patch) | |
| tree | 5c8ed63216eff1e22bae4a6daaadfd17f6ab622b /drivers/net/ethernet/intel/ice/ice_base.c | |
| parent | 3a4f419f750946181e3d6a339a1ef1942c5b5685 (diff) | |
ice: switch to Page Pool
This patch completes the transition of the ice driver to use the Page Pool
and libeth APIs, following the same direction as commit 5fa4caff59f2
("iavf: switch to Page Pool"). With the legacy page splitting and recycling
logic already removed, the driver is now in a clean state to adopt the
modern memory model.
The Page Pool integration simplifies buffer management by offloading
DMA mapping and recycling to the core infrastructure. This eliminates
the need for driver-specific handling of headroom, buffer sizing, and
page order. The libeth helper is used for CPU-side processing, while
DMA-for-device is handled by the Page Pool core.
Additionally, this patch extends the conversion to cover XDP support.
The driver now uses libeth_xdp helpers for Rx buffer processing,
and optimizes XDP_TX by skipping per-frame DMA mapping. Instead, all
buffers are mapped as bi-directional up front, leveraging Page Pool's
lifecycle management. This significantly reduces overhead in virtualized
environments.
Performance observations:
- In typical scenarios (netperf, XDP_PASS, XDP_DROP), performance remains
on par with the previous implementation.
- In XDP_TX mode:
* With IOMMU enabled, performance improves dramatically - over 5x
increase - due to reduced DMA mapping overhead and better memory reuse.
* With IOMMU disabled, performance remains comparable to the previous
implementation, with no significant changes observed.
- In XDP_DROP mode:
* For small MTUs, (where multiple buffers can be allocated on a single
memory page), a performance drop of approximately 20% is observed.
According to 'perf top' analysis, the bottleneck is caused by atomic
reference counter increments in the Page Pool.
* For normal MTUs, (where only one buffer can be allocated within a
single memory page), performance remains comparable to baseline
levels.
This change is also a step toward a more modular and unified XDP
implementation across Intel Ethernet drivers, aligning with ongoing
efforts to consolidate and streamline feature support.
Suggested-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Suggested-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Reviewed-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: Michal Kubiak <michal.kubiak@intel.com>
Tested-by: Alexander Nowlin <alexander.nowlin@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Diffstat (limited to 'drivers/net/ethernet/intel/ice/ice_base.c')
| -rw-r--r-- | drivers/net/ethernet/intel/ice/ice_base.c | 91 |
1 files changed, 40 insertions, 51 deletions
diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index fee58f879d9e..eabab50fab33 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -2,6 +2,7 @@ /* Copyright (c) 2019, Intel Corporation. */ #include <net/xdp_sock_drv.h> +#include <linux/net/intel/libie/rx.h> #include "ice_base.h" #include "ice_lib.h" #include "ice_dcb_lib.h" @@ -495,7 +496,7 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring) /* Receive Packet Data Buffer Size. * The Packet Data Buffer Size is defined in 128 byte units. */ - rlan_ctx.dbuf = DIV_ROUND_UP(ICE_RXBUF_3072, + rlan_ctx.dbuf = DIV_ROUND_UP(ring->rx_buf_len, BIT_ULL(ICE_RLAN_CTX_DBUF_S)); /* use 32 byte descriptors */ @@ -537,7 +538,7 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring) * than 5 x DBUF */ rlan_ctx.rxmax = min_t(u32, vsi->max_frame, - ICE_MAX_CHAINED_RX_BUFS * ICE_RXBUF_3072); + ICE_MAX_CHAINED_RX_BUFS * ring->rx_buf_len); /* Rx queue threshold in units of 64 */ rlan_ctx.lrxqthresh = 1; @@ -573,8 +574,6 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring) if (vsi->type == ICE_VSI_VF) return 0; - ring->rx_offset = ICE_SKB_PAD; - /* init queue specific tail register */ ring->tail = hw->hw_addr + QRX_TAIL(pf_q); writel(0, ring->tail); @@ -582,38 +581,6 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring) return 0; } -static void ice_xsk_pool_fill_cb(struct ice_rx_ring *ring) -{ - void *ctx_ptr = &ring->pkt_ctx; - struct xsk_cb_desc desc = {}; - - XSK_CHECK_PRIV_TYPE(struct ice_xdp_buff); - desc.src = &ctx_ptr; - desc.off = offsetof(struct ice_xdp_buff, pkt_ctx) - - sizeof(struct xdp_buff); - desc.bytes = sizeof(ctx_ptr); - xsk_pool_fill_cb(ring->xsk_pool, &desc); -} - -/** - * ice_get_frame_sz - calculate xdp_buff::frame_sz - * @rx_ring: the ring being configured - * - * Return frame size based on underlying PAGE_SIZE - */ -static unsigned int ice_get_frame_sz(struct ice_rx_ring *rx_ring) -{ - unsigned int frame_sz; - -#if (PAGE_SIZE >= 8192) - frame_sz = rx_ring->rx_buf_len; -#else - frame_sz = PAGE_SIZE; -#endif - - return frame_sz; -} - /** * ice_vsi_cfg_rxq - Configure an Rx queue * @ring: the ring being configured @@ -622,8 +589,14 @@ static unsigned int ice_get_frame_sz(struct ice_rx_ring *rx_ring) */ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) { + struct libeth_fq fq = { + .count = ring->count, + .nid = NUMA_NO_NODE, + .xdp = ice_is_xdp_ena_vsi(ring->vsi), + .buf_len = LIBIE_MAX_RX_BUF_LEN, + }; struct device *dev = ice_pf_to_dev(ring->vsi->back); - u32 num_bufs = ICE_RX_DESC_UNUSED(ring); + u32 num_bufs = ICE_DESC_UNUSED(ring); u32 rx_buf_len; int err; @@ -632,12 +605,16 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, ring->q_index, ring->q_vector->napi.napi_id, - ICE_RXBUF_3072); + ring->rx_buf_len); if (err) return err; } ice_rx_xsk_pool(ring); + err = ice_realloc_rx_xdp_bufs(ring, ring->xsk_pool); + if (err) + return err; + if (ring->xsk_pool) { xdp_rxq_info_unreg(&ring->xdp_rxq); @@ -655,36 +632,38 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) if (err) return err; xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq); - ice_xsk_pool_fill_cb(ring); dev_info(dev, "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n", ring->q_index); } else { + err = libeth_rx_fq_create(&fq, &ring->q_vector->napi); + if (err) + return err; + + ring->pp = fq.pp; + ring->rx_fqes = fq.fqes; + ring->truesize = fq.truesize; + ring->rx_buf_len = fq.buf_len; + if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) { err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, ring->q_index, ring->q_vector->napi.napi_id, - ICE_RXBUF_3072); + ring->rx_buf_len); if (err) - return err; + goto err_destroy_fq; } - - err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, - MEM_TYPE_PAGE_SHARED, - NULL); - if (err) - return err; + xdp_rxq_info_attach_page_pool(&ring->xdp_rxq, + ring->pp); } } - xdp_init_buff(&ring->xdp, ice_get_frame_sz(ring), &ring->xdp_rxq); ring->xdp.data = NULL; - ring->xdp_ext.pkt_ctx = &ring->pkt_ctx; err = ice_setup_rx_ctx(ring); if (err) { dev_err(dev, "ice_setup_rx_ctx failed for RxQ %d, err %d\n", ring->q_index, err); - return err; + goto err_destroy_fq; } if (ring->xsk_pool) { @@ -712,9 +691,19 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) if (ring->vsi->type == ICE_VSI_CTRL) ice_init_ctrl_rx_descs(ring, num_bufs); else - ice_alloc_rx_bufs(ring, num_bufs); + err = ice_alloc_rx_bufs(ring, num_bufs); + + if (err) + goto err_destroy_fq; return 0; + +err_destroy_fq: + libeth_rx_fq_destroy(&fq); + ring->rx_fqes = NULL; + ring->pp = NULL; + + return err; } int ice_vsi_cfg_single_rxq(struct ice_vsi *vsi, u16 q_idx) |