summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
diff options
context:
space:
mode:
authorMichal Kubiak <michal.kubiak@intel.com>2025-09-25 11:22:53 +0200
committerTony Nguyen <anthony.l.nguyen@intel.com>2025-10-29 13:55:16 -0700
commit93f53db9f9dc4a16b40ecd18e6d338ad57e4b670 (patch)
tree5c8ed63216eff1e22bae4a6daaadfd17f6ab622b /drivers/net/ethernet/intel/ice/ice_txrx_lib.c
parent3a4f419f750946181e3d6a339a1ef1942c5b5685 (diff)
ice: switch to Page Pool
This patch completes the transition of the ice driver to use the Page Pool and libeth APIs, following the same direction as commit 5fa4caff59f2 ("iavf: switch to Page Pool"). With the legacy page splitting and recycling logic already removed, the driver is now in a clean state to adopt the modern memory model. The Page Pool integration simplifies buffer management by offloading DMA mapping and recycling to the core infrastructure. This eliminates the need for driver-specific handling of headroom, buffer sizing, and page order. The libeth helper is used for CPU-side processing, while DMA-for-device is handled by the Page Pool core. Additionally, this patch extends the conversion to cover XDP support. The driver now uses libeth_xdp helpers for Rx buffer processing, and optimizes XDP_TX by skipping per-frame DMA mapping. Instead, all buffers are mapped as bi-directional up front, leveraging Page Pool's lifecycle management. This significantly reduces overhead in virtualized environments. Performance observations: - In typical scenarios (netperf, XDP_PASS, XDP_DROP), performance remains on par with the previous implementation. - In XDP_TX mode: * With IOMMU enabled, performance improves dramatically - over 5x increase - due to reduced DMA mapping overhead and better memory reuse. * With IOMMU disabled, performance remains comparable to the previous implementation, with no significant changes observed. - In XDP_DROP mode: * For small MTUs, (where multiple buffers can be allocated on a single memory page), a performance drop of approximately 20% is observed. According to 'perf top' analysis, the bottleneck is caused by atomic reference counter increments in the Page Pool. * For normal MTUs, (where only one buffer can be allocated within a single memory page), performance remains comparable to baseline levels. This change is also a step toward a more modular and unified XDP implementation across Intel Ethernet drivers, aligning with ongoing efforts to consolidate and streamline feature support. Suggested-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com> Suggested-by: Alexander Lobakin <aleksander.lobakin@intel.com> Reviewed-by: Alexander Lobakin <aleksander.lobakin@intel.com> Reviewed-by: Jacob Keller <jacob.e.keller@intel.com> Signed-off-by: Michal Kubiak <michal.kubiak@intel.com> Tested-by: Alexander Nowlin <alexander.nowlin@intel.com> Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Diffstat (limited to 'drivers/net/ethernet/intel/ice/ice_txrx_lib.c')
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx_lib.c65
1 files changed, 41 insertions, 24 deletions
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
index 45cfaabc41cb..956da38d63b0 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
@@ -3,6 +3,7 @@
#include <linux/filter.h>
#include <linux/net/intel/libie/rx.h>
+#include <net/libeth/xdp.h>
#include "ice_txrx_lib.h"
#include "ice_eswitch.h"
@@ -230,9 +231,12 @@ ice_process_skb_fields(struct ice_rx_ring *rx_ring,
if (ice_is_port_repr_netdev(netdev))
ice_repr_inc_rx_stats(netdev, skb->len);
+
+ /* __skb_push() is needed because xdp_build_skb_from_buff()
+ * calls eth_type_trans()
+ */
+ __skb_push(skb, ETH_HLEN);
skb->protocol = eth_type_trans(skb, netdev);
- } else {
- skb->protocol = eth_type_trans(skb, rx_ring->netdev);
}
ice_rx_csum(rx_ring, skb, rx_desc, ptype);
@@ -270,19 +274,18 @@ static void
ice_clean_xdp_tx_buf(struct device *dev, struct ice_tx_buf *tx_buf,
struct xdp_frame_bulk *bq)
{
- dma_unmap_single(dev, dma_unmap_addr(tx_buf, dma),
- dma_unmap_len(tx_buf, len), DMA_TO_DEVICE);
- dma_unmap_len_set(tx_buf, len, 0);
-
switch (tx_buf->type) {
case ICE_TX_BUF_XDP_TX:
- page_frag_free(tx_buf->raw_buf);
+ libeth_xdp_return_va(tx_buf->raw_buf, true);
break;
case ICE_TX_BUF_XDP_XMIT:
+ dma_unmap_single(dev, dma_unmap_addr(tx_buf, dma),
+ dma_unmap_len(tx_buf, len), DMA_TO_DEVICE);
xdp_return_frame_bulk(tx_buf->xdpf, bq);
break;
}
+ dma_unmap_len_set(tx_buf, len, 0);
tx_buf->type = ICE_TX_BUF_EMPTY;
}
@@ -377,9 +380,11 @@ int __ice_xmit_xdp_ring(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring,
struct ice_tx_buf *tx_buf;
u32 cnt = xdp_ring->count;
void *data = xdp->data;
+ struct page *page;
u32 nr_frags = 0;
u32 free_space;
u32 frag = 0;
+ u32 offset;
free_space = ICE_DESC_UNUSED(xdp_ring);
if (free_space < ICE_RING_QUARTER(xdp_ring))
@@ -399,24 +404,28 @@ int __ice_xmit_xdp_ring(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring,
tx_head = &xdp_ring->tx_buf[ntu];
tx_buf = tx_head;
+ page = virt_to_page(data);
+ offset = offset_in_page(xdp->data);
+
for (;;) {
dma_addr_t dma;
- dma = dma_map_single(dev, data, size, DMA_TO_DEVICE);
- if (dma_mapping_error(dev, dma))
- goto dma_unmap;
-
- /* record length, and DMA address */
- dma_unmap_len_set(tx_buf, len, size);
- dma_unmap_addr_set(tx_buf, dma, dma);
-
if (frame) {
+ dma = dma_map_single(dev, data, size, DMA_TO_DEVICE);
+ if (dma_mapping_error(dev, dma))
+ goto dma_unmap;
tx_buf->type = ICE_TX_BUF_FRAG;
} else {
+ dma = page_pool_get_dma_addr(page) + offset;
+ dma_sync_single_for_device(dev, dma, size, DMA_BIDIRECTIONAL);
tx_buf->type = ICE_TX_BUF_XDP_TX;
tx_buf->raw_buf = data;
}
+ /* record length, and DMA address */
+ dma_unmap_len_set(tx_buf, len, size);
+ dma_unmap_addr_set(tx_buf, dma, dma);
+
tx_desc->buf_addr = cpu_to_le64(dma);
tx_desc->cmd_type_offset_bsz = ice_build_ctob(0, 0, size, 0);
@@ -430,6 +439,8 @@ int __ice_xmit_xdp_ring(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring,
tx_desc = ICE_TX_DESC(xdp_ring, ntu);
tx_buf = &xdp_ring->tx_buf[ntu];
+ page = skb_frag_page(&sinfo->frags[frag]);
+ offset = skb_frag_off(&sinfo->frags[frag]);
data = skb_frag_address(&sinfo->frags[frag]);
size = skb_frag_size(&sinfo->frags[frag]);
frag++;
@@ -514,10 +525,13 @@ void ice_finalize_xdp_rx(struct ice_tx_ring *xdp_ring, unsigned int xdp_res,
*/
static int ice_xdp_rx_hw_ts(const struct xdp_md *ctx, u64 *ts_ns)
{
- const struct ice_xdp_buff *xdp_ext = (void *)ctx;
+ const struct libeth_xdp_buff *xdp_ext = (void *)ctx;
+ struct ice_rx_ring *rx_ring;
- *ts_ns = ice_ptp_get_rx_hwts(xdp_ext->eop_desc,
- xdp_ext->pkt_ctx);
+ rx_ring = libeth_xdp_buff_to_rq(xdp_ext, typeof(*rx_ring), xdp_rxq);
+
+ *ts_ns = ice_ptp_get_rx_hwts(xdp_ext->desc,
+ &rx_ring->pkt_ctx);
if (!*ts_ns)
return -ENODATA;
@@ -545,10 +559,10 @@ ice_xdp_rx_hash_type(const union ice_32b_rx_flex_desc *eop_desc)
static int ice_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash,
enum xdp_rss_hash_type *rss_type)
{
- const struct ice_xdp_buff *xdp_ext = (void *)ctx;
+ const struct libeth_xdp_buff *xdp_ext = (void *)ctx;
- *hash = ice_get_rx_hash(xdp_ext->eop_desc);
- *rss_type = ice_xdp_rx_hash_type(xdp_ext->eop_desc);
+ *hash = ice_get_rx_hash(xdp_ext->desc);
+ *rss_type = ice_xdp_rx_hash_type(xdp_ext->desc);
if (!likely(*hash))
return -ENODATA;
@@ -567,13 +581,16 @@ static int ice_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash,
static int ice_xdp_rx_vlan_tag(const struct xdp_md *ctx, __be16 *vlan_proto,
u16 *vlan_tci)
{
- const struct ice_xdp_buff *xdp_ext = (void *)ctx;
+ const struct libeth_xdp_buff *xdp_ext = (void *)ctx;
+ struct ice_rx_ring *rx_ring;
+
+ rx_ring = libeth_xdp_buff_to_rq(xdp_ext, typeof(*rx_ring), xdp_rxq);
- *vlan_proto = xdp_ext->pkt_ctx->vlan_proto;
+ *vlan_proto = rx_ring->pkt_ctx.vlan_proto;
if (!*vlan_proto)
return -ENODATA;
- *vlan_tci = ice_get_vlan_tci(xdp_ext->eop_desc);
+ *vlan_tci = ice_get_vlan_tci(xdp_ext->desc);
if (!*vlan_tci)
return -ENODATA;