diff options
| author | Michal Kubiak <michal.kubiak@intel.com> | 2025-09-25 11:22:53 +0200 |
|---|---|---|
| committer | Tony Nguyen <anthony.l.nguyen@intel.com> | 2025-10-29 13:55:16 -0700 |
| commit | 93f53db9f9dc4a16b40ecd18e6d338ad57e4b670 (patch) | |
| tree | 5c8ed63216eff1e22bae4a6daaadfd17f6ab622b /drivers/net/ethernet/intel/ice/ice_txrx_lib.c | |
| parent | 3a4f419f750946181e3d6a339a1ef1942c5b5685 (diff) | |
ice: switch to Page Pool
This patch completes the transition of the ice driver to use the Page Pool
and libeth APIs, following the same direction as commit 5fa4caff59f2
("iavf: switch to Page Pool"). With the legacy page splitting and recycling
logic already removed, the driver is now in a clean state to adopt the
modern memory model.
The Page Pool integration simplifies buffer management by offloading
DMA mapping and recycling to the core infrastructure. This eliminates
the need for driver-specific handling of headroom, buffer sizing, and
page order. The libeth helper is used for CPU-side processing, while
DMA-for-device is handled by the Page Pool core.
Additionally, this patch extends the conversion to cover XDP support.
The driver now uses libeth_xdp helpers for Rx buffer processing,
and optimizes XDP_TX by skipping per-frame DMA mapping. Instead, all
buffers are mapped as bi-directional up front, leveraging Page Pool's
lifecycle management. This significantly reduces overhead in virtualized
environments.
Performance observations:
- In typical scenarios (netperf, XDP_PASS, XDP_DROP), performance remains
on par with the previous implementation.
- In XDP_TX mode:
* With IOMMU enabled, performance improves dramatically - over 5x
increase - due to reduced DMA mapping overhead and better memory reuse.
* With IOMMU disabled, performance remains comparable to the previous
implementation, with no significant changes observed.
- In XDP_DROP mode:
* For small MTUs, (where multiple buffers can be allocated on a single
memory page), a performance drop of approximately 20% is observed.
According to 'perf top' analysis, the bottleneck is caused by atomic
reference counter increments in the Page Pool.
* For normal MTUs, (where only one buffer can be allocated within a
single memory page), performance remains comparable to baseline
levels.
This change is also a step toward a more modular and unified XDP
implementation across Intel Ethernet drivers, aligning with ongoing
efforts to consolidate and streamline feature support.
Suggested-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Suggested-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Reviewed-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: Michal Kubiak <michal.kubiak@intel.com>
Tested-by: Alexander Nowlin <alexander.nowlin@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Diffstat (limited to 'drivers/net/ethernet/intel/ice/ice_txrx_lib.c')
| -rw-r--r-- | drivers/net/ethernet/intel/ice/ice_txrx_lib.c | 65 |
1 files changed, 41 insertions, 24 deletions
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c index 45cfaabc41cb..956da38d63b0 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c @@ -3,6 +3,7 @@ #include <linux/filter.h> #include <linux/net/intel/libie/rx.h> +#include <net/libeth/xdp.h> #include "ice_txrx_lib.h" #include "ice_eswitch.h" @@ -230,9 +231,12 @@ ice_process_skb_fields(struct ice_rx_ring *rx_ring, if (ice_is_port_repr_netdev(netdev)) ice_repr_inc_rx_stats(netdev, skb->len); + + /* __skb_push() is needed because xdp_build_skb_from_buff() + * calls eth_type_trans() + */ + __skb_push(skb, ETH_HLEN); skb->protocol = eth_type_trans(skb, netdev); - } else { - skb->protocol = eth_type_trans(skb, rx_ring->netdev); } ice_rx_csum(rx_ring, skb, rx_desc, ptype); @@ -270,19 +274,18 @@ static void ice_clean_xdp_tx_buf(struct device *dev, struct ice_tx_buf *tx_buf, struct xdp_frame_bulk *bq) { - dma_unmap_single(dev, dma_unmap_addr(tx_buf, dma), - dma_unmap_len(tx_buf, len), DMA_TO_DEVICE); - dma_unmap_len_set(tx_buf, len, 0); - switch (tx_buf->type) { case ICE_TX_BUF_XDP_TX: - page_frag_free(tx_buf->raw_buf); + libeth_xdp_return_va(tx_buf->raw_buf, true); break; case ICE_TX_BUF_XDP_XMIT: + dma_unmap_single(dev, dma_unmap_addr(tx_buf, dma), + dma_unmap_len(tx_buf, len), DMA_TO_DEVICE); xdp_return_frame_bulk(tx_buf->xdpf, bq); break; } + dma_unmap_len_set(tx_buf, len, 0); tx_buf->type = ICE_TX_BUF_EMPTY; } @@ -377,9 +380,11 @@ int __ice_xmit_xdp_ring(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring, struct ice_tx_buf *tx_buf; u32 cnt = xdp_ring->count; void *data = xdp->data; + struct page *page; u32 nr_frags = 0; u32 free_space; u32 frag = 0; + u32 offset; free_space = ICE_DESC_UNUSED(xdp_ring); if (free_space < ICE_RING_QUARTER(xdp_ring)) @@ -399,24 +404,28 @@ int __ice_xmit_xdp_ring(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring, tx_head = &xdp_ring->tx_buf[ntu]; tx_buf = tx_head; + page = virt_to_page(data); + offset = offset_in_page(xdp->data); + for (;;) { dma_addr_t dma; - dma = dma_map_single(dev, data, size, DMA_TO_DEVICE); - if (dma_mapping_error(dev, dma)) - goto dma_unmap; - - /* record length, and DMA address */ - dma_unmap_len_set(tx_buf, len, size); - dma_unmap_addr_set(tx_buf, dma, dma); - if (frame) { + dma = dma_map_single(dev, data, size, DMA_TO_DEVICE); + if (dma_mapping_error(dev, dma)) + goto dma_unmap; tx_buf->type = ICE_TX_BUF_FRAG; } else { + dma = page_pool_get_dma_addr(page) + offset; + dma_sync_single_for_device(dev, dma, size, DMA_BIDIRECTIONAL); tx_buf->type = ICE_TX_BUF_XDP_TX; tx_buf->raw_buf = data; } + /* record length, and DMA address */ + dma_unmap_len_set(tx_buf, len, size); + dma_unmap_addr_set(tx_buf, dma, dma); + tx_desc->buf_addr = cpu_to_le64(dma); tx_desc->cmd_type_offset_bsz = ice_build_ctob(0, 0, size, 0); @@ -430,6 +439,8 @@ int __ice_xmit_xdp_ring(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring, tx_desc = ICE_TX_DESC(xdp_ring, ntu); tx_buf = &xdp_ring->tx_buf[ntu]; + page = skb_frag_page(&sinfo->frags[frag]); + offset = skb_frag_off(&sinfo->frags[frag]); data = skb_frag_address(&sinfo->frags[frag]); size = skb_frag_size(&sinfo->frags[frag]); frag++; @@ -514,10 +525,13 @@ void ice_finalize_xdp_rx(struct ice_tx_ring *xdp_ring, unsigned int xdp_res, */ static int ice_xdp_rx_hw_ts(const struct xdp_md *ctx, u64 *ts_ns) { - const struct ice_xdp_buff *xdp_ext = (void *)ctx; + const struct libeth_xdp_buff *xdp_ext = (void *)ctx; + struct ice_rx_ring *rx_ring; - *ts_ns = ice_ptp_get_rx_hwts(xdp_ext->eop_desc, - xdp_ext->pkt_ctx); + rx_ring = libeth_xdp_buff_to_rq(xdp_ext, typeof(*rx_ring), xdp_rxq); + + *ts_ns = ice_ptp_get_rx_hwts(xdp_ext->desc, + &rx_ring->pkt_ctx); if (!*ts_ns) return -ENODATA; @@ -545,10 +559,10 @@ ice_xdp_rx_hash_type(const union ice_32b_rx_flex_desc *eop_desc) static int ice_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash, enum xdp_rss_hash_type *rss_type) { - const struct ice_xdp_buff *xdp_ext = (void *)ctx; + const struct libeth_xdp_buff *xdp_ext = (void *)ctx; - *hash = ice_get_rx_hash(xdp_ext->eop_desc); - *rss_type = ice_xdp_rx_hash_type(xdp_ext->eop_desc); + *hash = ice_get_rx_hash(xdp_ext->desc); + *rss_type = ice_xdp_rx_hash_type(xdp_ext->desc); if (!likely(*hash)) return -ENODATA; @@ -567,13 +581,16 @@ static int ice_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash, static int ice_xdp_rx_vlan_tag(const struct xdp_md *ctx, __be16 *vlan_proto, u16 *vlan_tci) { - const struct ice_xdp_buff *xdp_ext = (void *)ctx; + const struct libeth_xdp_buff *xdp_ext = (void *)ctx; + struct ice_rx_ring *rx_ring; + + rx_ring = libeth_xdp_buff_to_rq(xdp_ext, typeof(*rx_ring), xdp_rxq); - *vlan_proto = xdp_ext->pkt_ctx->vlan_proto; + *vlan_proto = rx_ring->pkt_ctx.vlan_proto; if (!*vlan_proto) return -ENODATA; - *vlan_tci = ice_get_vlan_tci(xdp_ext->eop_desc); + *vlan_tci = ice_get_vlan_tci(xdp_ext->desc); if (!*vlan_tci) return -ENODATA; |