summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/Makefile17
-rw-r--r--Documentation/admin-guide/sysctl/net.rst4
-rw-r--r--Documentation/conf.py20
-rw-r--r--Documentation/devicetree/bindings/net/airoha,en7581-npu.yaml22
-rw-r--r--Documentation/devicetree/bindings/net/allwinner,sun4i-a10-emac.yaml9
-rw-r--r--Documentation/devicetree/bindings/net/altr,socfpga-stmmac.yaml7
-rw-r--r--Documentation/devicetree/bindings/net/ethernet-controller.yaml5
-rw-r--r--Documentation/devicetree/bindings/net/fsl,fman-dtsec.yaml4
-rw-r--r--Documentation/devicetree/bindings/net/litex,liteeth.yaml12
-rw-r--r--Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml1
-rw-r--r--Documentation/devicetree/bindings/net/nfc/ti,trf7970a.yaml4
-rw-r--r--Documentation/devicetree/bindings/net/pse-pd/skyworks,si3474.yaml144
-rw-r--r--Documentation/devicetree/bindings/net/qcom,ipq9574-ppe.yaml533
-rw-r--r--Documentation/devicetree/bindings/net/realtek,rtl82xx.yaml6
-rw-r--r--Documentation/devicetree/bindings/net/renesas,rzn1-gmac.yaml9
-rw-r--r--Documentation/devicetree/bindings/ptp/nxp,ptp-netc.yaml63
-rw-r--r--Documentation/netlink/specs/devlink.yaml7
-rw-r--r--Documentation/netlink/specs/ethtool.yaml3
-rw-r--r--Documentation/netlink/specs/fou.yaml4
-rw-r--r--Documentation/netlink/specs/index.rst13
-rw-r--r--Documentation/networking/bonding.rst79
-rw-r--r--Documentation/networking/device_drivers/ethernet/index.rst1
-rw-r--r--Documentation/networking/device_drivers/ethernet/meta/fbnic.rst11
-rw-r--r--Documentation/networking/device_drivers/ethernet/qualcomm/ppe/ppe.rst194
-rw-r--r--Documentation/networking/devlink/devlink-health.rst2
-rw-r--r--Documentation/networking/index.rst2
-rw-r--r--Documentation/networking/ip-sysctl.rst16
-rw-r--r--Documentation/networking/netlink_spec/.gitignore1
-rw-r--r--Documentation/networking/netlink_spec/readme.txt4
-rw-r--r--Documentation/process/maintainer-netdev.rst2
-rwxr-xr-xDocumentation/sphinx/parser_yaml.py123
-rw-r--r--Documentation/userspace-api/netlink/index.rst2
-rw-r--r--Documentation/userspace-api/netlink/netlink-raw.rst6
-rw-r--r--Documentation/userspace-api/netlink/specs.rst2
-rw-r--r--MAINTAINERS21
-rw-r--r--arch/m68k/coldfire/m5272.c4
-rw-r--r--arch/mips/bcm47xx/setup.c4
-rw-r--r--drivers/infiniband/hw/mlx5/std_types.c27
-rw-r--r--drivers/net/Space.c3
-rw-r--r--drivers/net/amt.c6
-rw-r--r--drivers/net/bonding/bond_main.c113
-rw-r--r--drivers/net/bonding/bond_netlink.c14
-rw-r--r--drivers/net/bonding/bond_options.c7
-rw-r--r--drivers/net/bonding/bond_sysfs.c6
-rw-r--r--drivers/net/dsa/Kconfig16
-rw-r--r--drivers/net/dsa/Makefile3
-rw-r--r--drivers/net/dsa/b53/b53_mmap.c35
-rw-r--r--drivers/net/dsa/dsa_loop.c12
-rw-r--r--drivers/net/dsa/ks8995.c (renamed from drivers/net/phy/spi_ks8995.c)453
-rw-r--r--drivers/net/dsa/lantiq/Kconfig7
-rw-r--r--drivers/net/dsa/lantiq/Makefile1
-rw-r--r--drivers/net/dsa/lantiq/lantiq_gswip.c (renamed from drivers/net/dsa/lantiq_gswip.c)469
-rw-r--r--drivers/net/dsa/lantiq/lantiq_gswip.h276
-rw-r--r--drivers/net/dsa/lantiq/lantiq_pce.h (renamed from drivers/net/dsa/lantiq_pce.h)9
-rw-r--r--drivers/net/dsa/realtek/realtek.h3
-rw-r--r--drivers/net/ethernet/airoha/airoha_eth.c7
-rw-r--r--drivers/net/ethernet/airoha/airoha_eth.h23
-rw-r--r--drivers/net/ethernet/airoha/airoha_npu.c174
-rw-r--r--drivers/net/ethernet/airoha/airoha_npu.h36
-rw-r--r--drivers/net/ethernet/airoha/airoha_ppe.c215
-rw-r--r--drivers/net/ethernet/amd/pds_core/main.c2
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c30
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-i2c.c2
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c2
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe.h1
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c18
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.h3
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c2
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c104
-rw-r--r--drivers/net/ethernet/cadence/macb.h67
-rw-r--r--drivers/net/ethernet/cadence/macb_main.c291
-rw-r--r--drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c20
-rw-r--r--drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.h7
-rw-r--r--drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c2
-rw-r--r--drivers/net/ethernet/freescale/enetc/Kconfig3
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc.c209
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc.h21
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc4_hw.h6
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc4_pf.c6
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc_ethtool.c91
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc_hw.h1
-rw-r--r--drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c5
-rw-r--r--drivers/net/ethernet/google/gve/gve_rx_dqo.c35
-rw-r--r--drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c3
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c33
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c4
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_devlink.c10
-rw-r--r--drivers/net/ethernet/huawei/hinic3/Makefile4
-rw-r--r--drivers/net/ethernet/huawei/hinic3/hinic3_cmdq.c915
-rw-r--r--drivers/net/ethernet/huawei/hinic3/hinic3_cmdq.h156
-rw-r--r--drivers/net/ethernet/huawei/hinic3/hinic3_common.c23
-rw-r--r--drivers/net/ethernet/huawei/hinic3/hinic3_common.h27
-rw-r--r--drivers/net/ethernet/huawei/hinic3/hinic3_csr.h79
-rw-r--r--drivers/net/ethernet/huawei/hinic3/hinic3_eqs.c776
-rw-r--r--drivers/net/ethernet/huawei/hinic3/hinic3_eqs.h122
-rw-r--r--drivers/net/ethernet/huawei/hinic3/hinic3_hw_cfg.c43
-rw-r--r--drivers/net/ethernet/huawei/hinic3/hinic3_hw_comm.c31
-rw-r--r--drivers/net/ethernet/huawei/hinic3/hinic3_hw_comm.h13
-rw-r--r--drivers/net/ethernet/huawei/hinic3/hinic3_hw_intf.h36
-rw-r--r--drivers/net/ethernet/huawei/hinic3/hinic3_hwif.c148
-rw-r--r--drivers/net/ethernet/huawei/hinic3/hinic3_hwif.h16
-rw-r--r--drivers/net/ethernet/huawei/hinic3/hinic3_irq.c136
-rw-r--r--drivers/net/ethernet/huawei/hinic3/hinic3_main.c61
-rw-r--r--drivers/net/ethernet/huawei/hinic3/hinic3_mbox.c848
-rw-r--r--drivers/net/ethernet/huawei/hinic3/hinic3_mbox.h126
-rw-r--r--drivers/net/ethernet/huawei/hinic3/hinic3_nic_dev.h14
-rw-r--r--drivers/net/ethernet/huawei/hinic3/hinic3_tx.c6
-rw-r--r--drivers/net/ethernet/huawei/hinic3/hinic3_wq.c109
-rw-r--r--drivers/net/ethernet/huawei/hinic3/hinic3_wq.h19
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.c59
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.h6
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c2
-rw-r--r--drivers/net/ethernet/intel/ice/Makefile8
-rw-r--r--drivers/net/ethernet/intel/ice/devlink/health.c3
-rw-r--r--drivers/net/ethernet/intel/ice/ice.h1
-rw-r--r--drivers/net/ethernet/intel/ice/ice_adminq_cmd.h4
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.c19
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.h2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lag.c975
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lag.h21
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_sriov.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_sriov.h4
-rw-r--r--drivers/net/ethernet/intel/ice/ice_trace.h10
-rw-r--r--drivers/net/ethernet/intel/ice/ice_type.h6
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vf_lib.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vf_lib.h2
-rw-r--r--drivers/net/ethernet/intel/ice/virt/allowlist.c (renamed from drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c)2
-rw-r--r--drivers/net/ethernet/intel/ice/virt/allowlist.h (renamed from drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.h)0
-rw-r--r--drivers/net/ethernet/intel/ice/virt/fdir.c (renamed from drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c)0
-rw-r--r--drivers/net/ethernet/intel/ice/virt/fdir.h (renamed from drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.h)0
-rw-r--r--drivers/net/ethernet/intel/ice/virt/queues.c975
-rw-r--r--drivers/net/ethernet/intel/ice/virt/queues.h20
-rw-r--r--drivers/net/ethernet/intel/ice/virt/rss.c719
-rw-r--r--drivers/net/ethernet/intel/ice/virt/rss.h18
-rw-r--r--drivers/net/ethernet/intel/ice/virt/virtchnl.c (renamed from drivers/net/ethernet/intel/ice/ice_virtchnl.c)1683
-rw-r--r--drivers/net/ethernet/intel/ice/virt/virtchnl.h (renamed from drivers/net/ethernet/intel/ice/ice_virtchnl.h)0
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf.h2
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_lib.c102
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_txrx.c129
-rw-r--r--drivers/net/ethernet/intel/igb/igb_ethtool.c8
-rw-r--r--drivers/net/ethernet/intel/igc/igc_ethtool.c8
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c2
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/ethtool.c6
-rw-r--r--drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c7
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/cgx.h4
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu.c3
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu.h1
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c32
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c16
-rw-r--r--drivers/net/ethernet/mediatek/mtk_wed.c33
-rw-r--r--drivers/net/ethernet/mediatek/mtk_wed.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Kconfig1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c33
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/adj_vport.c209
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c234
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h45
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c105
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c183
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.h18
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fw.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/health.c41
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/dev/diag/dev_tracepoint.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c30
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_cmd.c30
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/vport.c58
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core.c2
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic.h3
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic_csr.h19
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c147
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic_fw.c106
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic_fw.h23
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.c66
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.h28
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic_mac.c57
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic_mac.h6
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic_netdev.c148
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic_netdev.h13
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic_pci.c22
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic_rpc.c88
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic_rpc.h4
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic_txrx.c955
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic_txrx.h33
-rw-r--r--drivers/net/ethernet/microchip/lan865x/lan865x.c1
-rw-r--r--drivers/net/ethernet/microsoft/mana/mana_bpf.c46
-rw-r--r--drivers/net/ethernet/microsoft/mana/mana_en.c151
-rw-r--r--drivers/net/ethernet/netronome/nfp/crypto/tls.c9
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/metadata.c4
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfd3/dp.c16
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfdk/dp.c16
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_devlink.c9
-rw-r--r--drivers/net/ethernet/qualcomm/Kconfig15
-rw-r--r--drivers/net/ethernet/qualcomm/Makefile1
-rw-r--r--drivers/net/ethernet/qualcomm/ppe/Makefile7
-rw-r--r--drivers/net/ethernet/qualcomm/ppe/ppe.c239
-rw-r--r--drivers/net/ethernet/qualcomm/ppe/ppe.h39
-rw-r--r--drivers/net/ethernet/qualcomm/ppe/ppe_config.c2034
-rw-r--r--drivers/net/ethernet/qualcomm/ppe/ppe_config.h317
-rw-r--r--drivers/net/ethernet/qualcomm/ppe/ppe_debugfs.c847
-rw-r--r--drivers/net/ethernet/qualcomm/ppe/ppe_debugfs.h16
-rw-r--r--drivers/net/ethernet/qualcomm/ppe/ppe_regs.h591
-rw-r--r--drivers/net/ethernet/renesas/Makefile1
-rw-r--r--drivers/net/ethernet/renesas/rswitch.h43
-rw-r--r--drivers/net/ethernet/renesas/rswitch_l2.c316
-rw-r--r--drivers/net/ethernet/renesas/rswitch_l2.h15
-rw-r--r--drivers/net/ethernet/renesas/rswitch_main.c (renamed from drivers/net/ethernet/renesas/rswitch.c)86
-rw-r--r--drivers/net/ethernet/sfc/efx_channels.c4
-rw-r--r--drivers/net/ethernet/sfc/falcon/efx.c5
-rw-r--r--drivers/net/ethernet/sfc/siena/efx_channels.c4
-rw-r--r--drivers/net/ethernet/sfc/tc_encap_actions.c4
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/common.h1
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c10
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c74
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c80
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c10
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c71
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c68
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c47
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4.h1
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c8
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/hwif.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac.h13
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c31
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c73
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c54
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c73
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c68
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c34
-rw-r--r--drivers/net/ethernet/wangxun/Kconfig1
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_ethtool.c55
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_lib.c103
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_type.h5
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_vf.h72
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_vf_lib.c2
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_vf_lib.h1
-rw-r--r--drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c3
-rw-r--r--drivers/net/ethernet/wangxun/ngbe/ngbe_main.c6
-rw-r--r--drivers/net/ethernet/wangxun/ngbevf/ngbevf_main.c1
-rw-r--r--drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c3
-rw-r--r--drivers/net/ethernet/wangxun/txgbe/txgbe_main.c1
-rw-r--r--drivers/net/ethernet/wangxun/txgbevf/txgbevf_main.c1
-rw-r--r--drivers/net/gtp.c7
-rw-r--r--drivers/net/ipvlan/ipvlan_core.c4
-rw-r--r--drivers/net/macsec.c173
-rw-r--r--drivers/net/mdio/mdio-bcm-unimac.c4
-rw-r--r--drivers/net/mdio/of_mdio.c1
-rw-r--r--drivers/net/netconsole.c91
-rw-r--r--drivers/net/netdevsim/health.c4
-rw-r--r--drivers/net/phy/Kconfig4
-rw-r--r--drivers/net/phy/Makefile1
-rw-r--r--drivers/net/phy/aquantia/aquantia.h28
-rw-r--r--drivers/net/phy/aquantia/aquantia_main.c531
-rw-r--r--drivers/net/phy/as21xxx.c7
-rw-r--r--drivers/net/phy/fixed_phy.c26
-rw-r--r--drivers/net/phy/mediatek/mtk-2p5ge.c104
-rw-r--r--drivers/net/phy/micrel.c902
-rw-r--r--drivers/net/phy/motorcomm.c117
-rw-r--r--drivers/net/phy/mscc/mscc.h3
-rw-r--r--drivers/net/phy/mscc/mscc_main.c40
-rw-r--r--drivers/net/phy/mxl-86110.c392
-rw-r--r--drivers/net/phy/realtek/realtek_main.c263
-rw-r--r--drivers/net/ppp/Kconfig3
-rw-r--r--drivers/net/ppp/bsd_comp.c4
-rw-r--r--drivers/net/ppp/ppp_generic.c120
-rw-r--r--drivers/net/ppp/ppp_mppe.c108
-rw-r--r--drivers/net/ppp/pppoe.c129
-rw-r--r--drivers/net/pse-pd/Kconfig11
-rw-r--r--drivers/net/pse-pd/Makefile1
-rw-r--r--drivers/net/pse-pd/si3474.c578
-rw-r--r--drivers/net/tun.c4
-rw-r--r--drivers/net/usb/Kconfig1
-rw-r--r--drivers/net/usb/lan78xx.c6
-rw-r--r--drivers/net/vrf.c4
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7915/mmio.c6
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7996/mmio.c12
-rw-r--r--drivers/net/wwan/iosm/iosm_ipc_pcie.c2
-rw-r--r--drivers/nfc/pn533/pn533.c12
-rw-r--r--drivers/nfc/s3fwrn5/Kconfig3
-rw-r--r--drivers/nfc/s3fwrn5/firmware.c17
-rw-r--r--drivers/ptp/Kconfig11
-rw-r--r--drivers/ptp/Makefile1
-rw-r--r--drivers/ptp/ptp_clock.c66
-rw-r--r--drivers/ptp/ptp_clockmatrix.c2
-rw-r--r--drivers/ptp/ptp_netc.c1017
-rw-r--r--drivers/staging/octeon/ethernet-tx.c3
-rw-r--r--include/linux/bnxt/hsi.h315
-rw-r--r--include/linux/bpf.h7
-rw-r--r--include/linux/filter.h6
-rw-r--r--include/linux/if_pppox.h2
-rw-r--r--include/linux/inet_diag.h20
-rw-r--r--include/linux/ipv6.h2
-rw-r--r--include/linux/memcontrol.h45
-rw-r--r--include/linux/micrel_phy.h1
-rw-r--r--include/linux/mlx5/device.h4
-rw-r--r--include/linux/mlx5/mlx5_ifc.h228
-rw-r--r--include/linux/mlx5/vport.h2
-rw-r--r--include/linux/net/intel/libie/adminq.h5
-rw-r--r--include/linux/phy.h23
-rw-r--r--include/linux/phy_fixed.h8
-rw-r--r--include/linux/phylink.h7
-rw-r--r--include/linux/ptp_clock_kernel.h22
-rw-r--r--include/linux/skbuff.h41
-rw-r--r--include/linux/skmsg.h2
-rw-r--r--include/linux/soc/airoha/airoha_offload.h315
-rw-r--r--include/linux/soc/mediatek/mtk_wed.h2
-rw-r--r--include/linux/stmmac.h4
-rw-r--r--include/linux/udp.h1
-rw-r--r--include/net/act_api.h14
-rw-r--r--include/net/bonding.h1
-rw-r--r--include/net/devlink.h16
-rw-r--r--include/net/dst.h16
-rw-r--r--include/net/flow.h11
-rw-r--r--include/net/genetlink.h2
-rw-r--r--include/net/inet6_hashtables.h18
-rw-r--r--include/net/inet_connection_sock.h8
-rw-r--r--include/net/inet_dscp.h6
-rw-r--r--include/net/inet_hashtables.h37
-rw-r--r--include/net/ip.h6
-rw-r--r--include/net/ip6_route.h2
-rw-r--r--include/net/ip_fib.h2
-rw-r--r--include/net/ip_tunnels.h4
-rw-r--r--include/net/mana/mana.h4
-rw-r--r--include/net/netdev_queues.h9
-rw-r--r--include/net/netfilter/ipv4/nf_reject.h8
-rw-r--r--include/net/netfilter/ipv6/nf_reject.h10
-rw-r--r--include/net/netfilter/nf_tables.h2
-rw-r--r--include/net/netfilter/nf_tables_core.h2
-rw-r--r--include/net/netns/ipv4.h1
-rw-r--r--include/net/netns/sctp.h4
-rw-r--r--include/net/nfc/nci_core.h2
-rw-r--r--include/net/page_pool/helpers.h17
-rw-r--r--include/net/ping.h1
-rw-r--r--include/net/proto_memory.h4
-rw-r--r--include/net/raw.h1
-rw-r--r--include/net/route.h4
-rw-r--r--include/net/rps.h92
-rw-r--r--include/net/sctp/auth.h17
-rw-r--r--include/net/sctp/constants.h9
-rw-r--r--include/net/sctp/structs.h35
-rw-r--r--include/net/seg6_hmac.h20
-rw-r--r--include/net/sock.h108
-rw-r--r--include/net/tc_act/tc_skbmod.h1
-rw-r--r--include/net/tc_act/tc_tunnel_key.h1
-rw-r--r--include/net/tc_act/tc_vlan.h1
-rw-r--r--include/net/tcp.h16
-rw-r--r--include/net/timewait_sock.h7
-rw-r--r--include/net/udp.h3
-rw-r--r--include/trace/events/fib.h4
-rw-r--r--include/uapi/linux/devlink.h2
-rw-r--r--include/uapi/linux/ethtool.h1
-rw-r--r--include/uapi/linux/stddef.h2
-rw-r--r--io_uring/zcrx.c3
-rw-r--r--kernel/bpf/helpers.c11
-rw-r--r--kernel/bpf/log.c2
-rw-r--r--kernel/bpf/verifier.c15
-rw-r--r--kernel/time/time.c1
-rw-r--r--mm/memcontrol.c40
-rw-r--r--net/bridge/br_forward.c3
-rw-r--r--net/bridge/br_multicast.c9
-rw-r--r--net/bridge/netfilter/ebtables.c14
-rw-r--r--net/caif/cfctrl.c4
-rw-r--r--net/core/Makefile1
-rw-r--r--net/core/datagram.c2
-rw-r--r--net/core/dev.c71
-rw-r--r--net/core/devmem.c8
-rw-r--r--net/core/devmem.h2
-rw-r--r--net/core/dst.c2
-rw-r--r--net/core/filter.c66
-rw-r--r--net/core/lwt_bpf.c4
-rw-r--r--net/core/net-sysfs.c4
-rw-r--r--net/core/netdev-genl.c122
-rw-r--r--net/core/netdev_queues.c27
-rw-r--r--net/core/netdev_rx_queue.c9
-rw-r--r--net/core/pktgen.c7
-rw-r--r--net/core/skbuff.c4
-rw-r--r--net/core/sock.c78
-rw-r--r--net/devlink/health.c109
-rw-r--r--net/devlink/netlink_gen.c5
-rw-r--r--net/devlink/port.c33
-rw-r--r--net/ethtool/ioctl.c25
-rw-r--r--net/ethtool/rss.c27
-rw-r--r--net/ipv4/arp.c2
-rw-r--r--net/ipv4/esp4.c4
-rw-r--r--net/ipv4/fib_frontend.c7
-rw-r--r--net/ipv4/fib_rules.c4
-rw-r--r--net/ipv4/fou_nl.c4
-rw-r--r--net/ipv4/icmp.c18
-rw-r--r--net/ipv4/inet_connection_sock.c30
-rw-r--r--net/ipv4/inet_diag.c570
-rw-r--r--net/ipv4/inet_hashtables.c36
-rw-r--r--net/ipv4/inet_timewait_sock.c5
-rw-r--r--net/ipv4/ip_fragment.c6
-rw-r--r--net/ipv4/ip_gre.c4
-rw-r--r--net/ipv4/ip_input.c11
-rw-r--r--net/ipv4/ip_options.c5
-rw-r--r--net/ipv4/ip_output.c3
-rw-r--r--net/ipv4/ipmr.c9
-rw-r--r--net/ipv4/netfilter.c9
-rw-r--r--net/ipv4/netfilter/ipt_rpfilter.c4
-rw-r--r--net/ipv4/netfilter/nf_dup_ipv4.c4
-rw-r--r--net/ipv4/netfilter/nf_reject_ipv4.c27
-rw-r--r--net/ipv4/netfilter/nf_socket_ipv4.c3
-rw-r--r--net/ipv4/netfilter/nf_tproxy_ipv4.c5
-rw-r--r--net/ipv4/netfilter/nft_fib_ipv4.c4
-rw-r--r--net/ipv4/nexthop.c42
-rw-r--r--net/ipv4/ping.c68
-rw-r--r--net/ipv4/raw.c7
-rw-r--r--net/ipv4/raw_diag.c10
-rw-r--r--net/ipv4/route.c18
-rw-r--r--net/ipv4/tcp.c21
-rw-r--r--net/ipv4/tcp_cdg.c2
-rw-r--r--net/ipv4/tcp_diag.c461
-rw-r--r--net/ipv4/tcp_fastopen.c7
-rw-r--r--net/ipv4/tcp_input.c18
-rw-r--r--net/ipv4/tcp_ipv4.c26
-rw-r--r--net/ipv4/tcp_metrics.c6
-rw-r--r--net/ipv4/tcp_minisocks.c1
-rw-r--r--net/ipv4/tcp_offload.c3
-rw-r--r--net/ipv4/tcp_output.c19
-rw-r--r--net/ipv4/tcp_timer.c6
-rw-r--r--net/ipv4/udp.c14
-rw-r--r--net/ipv4/udp_diag.c10
-rw-r--r--net/ipv4/udp_tunnel_core.c3
-rw-r--r--net/ipv4/xfrm4_policy.c4
-rw-r--r--net/ipv6/Kconfig7
-rw-r--r--net/ipv6/addrconf.c4
-rw-r--r--net/ipv6/anycast.c2
-rw-r--r--net/ipv6/datagram.c2
-rw-r--r--net/ipv6/esp6.c4
-rw-r--r--net/ipv6/icmp.c6
-rw-r--r--net/ipv6/inet6_hashtables.c51
-rw-r--r--net/ipv6/ip6_gre.c10
-rw-r--r--net/ipv6/ip6_output.c64
-rw-r--r--net/ipv6/mcast.c67
-rw-r--r--net/ipv6/ndisc.c4
-rw-r--r--net/ipv6/netfilter.c5
-rw-r--r--net/ipv6/netfilter/nf_reject_ipv6.c37
-rw-r--r--net/ipv6/netfilter/nf_socket_ipv6.c3
-rw-r--r--net/ipv6/netfilter/nf_tproxy_ipv6.c5
-rw-r--r--net/ipv6/output_core.c8
-rw-r--r--net/ipv6/ping.c1
-rw-r--r--net/ipv6/raw.c9
-rw-r--r--net/ipv6/route.c7
-rw-r--r--net/ipv6/seg6.c7
-rw-r--r--net/ipv6/seg6_hmac.c211
-rw-r--r--net/ipv6/sit.c104
-rw-r--r--net/ipv6/tcp_ipv6.c24
-rw-r--r--net/ipv6/tcpv6_offload.c3
-rw-r--r--net/ipv6/udp.c6
-rw-r--r--net/iucv/af_iucv.c4
-rw-r--r--net/mptcp/crypto.c35
-rw-r--r--net/mptcp/mptcp_diag.c15
-rw-r--r--net/mptcp/protocol.c26
-rw-r--r--net/mptcp/protocol.h4
-rw-r--r--net/mptcp/subflow.c11
-rw-r--r--net/netfilter/nf_conntrack_netlink.c39
-rw-r--r--net/netfilter/nf_tables_api.c47
-rw-r--r--net/netfilter/nft_flow_offload.c4
-rw-r--r--net/netfilter/nft_payload.c20
-rw-r--r--net/netfilter/nft_set_hash.c100
-rw-r--r--net/netfilter/nft_set_pipapo.c89
-rw-r--r--net/netfilter/nft_set_pipapo.h8
-rw-r--r--net/netfilter/nft_set_pipapo_avx2.c140
-rw-r--r--net/netfilter/nft_set_pipapo_avx2.h4
-rw-r--r--net/netfilter/nft_set_rbtree.c35
-rw-r--r--net/netlink/af_netlink.c4
-rw-r--r--net/openvswitch/flow.c12
-rw-r--r--net/openvswitch/flow_table.c7
-rw-r--r--net/packet/af_packet.c2
-rw-r--r--net/phonet/af_phonet.c4
-rw-r--r--net/phonet/pep.c6
-rw-r--r--net/phonet/socket.c25
-rw-r--r--net/rds/af_rds.c2
-rw-r--r--net/rds/connection.c9
-rw-r--r--net/rds/ib_recv.c2
-rw-r--r--net/rds/message.c4
-rw-r--r--net/rds/rds.h2
-rw-r--r--net/rds/recv.c4
-rw-r--r--net/rds/send.c4
-rw-r--r--net/sched/act_api.c12
-rw-r--r--net/sched/act_simple.c1
-rw-r--r--net/sched/act_skbmod.c22
-rw-r--r--net/sched/act_tunnel_key.c16
-rw-r--r--net/sched/act_vlan.c16
-rw-r--r--net/sched/sch_api.c4
-rw-r--r--net/sctp/Kconfig47
-rw-r--r--net/sctp/auth.c166
-rw-r--r--net/sctp/chunk.c3
-rw-r--r--net/sctp/diag.c2
-rw-r--r--net/sctp/endpointola.c23
-rw-r--r--net/sctp/protocol.c14
-rw-r--r--net/sctp/sm_make_chunk.c60
-rw-r--r--net/sctp/sm_statefuns.c5
-rw-r--r--net/sctp/socket.c41
-rw-r--r--net/sctp/sysctl.c49
-rw-r--r--net/smc/smc_ib.c18
-rw-r--r--net/smc/smc_ism.c13
-rw-r--r--net/smc/smc_pnet.c2
-rw-r--r--net/tipc/socket.c6
-rw-r--r--net/vmw_vsock/af_vsock.c7
-rw-r--r--net/xfrm/xfrm_policy.c16
-rw-r--r--rust/kernel/net/phy.rs7
-rwxr-xr-xscripts/headers_install.sh2
-rw-r--r--tools/net/ynl/pyynl/lib/__init__.py2
-rw-r--r--tools/net/ynl/pyynl/lib/doc_generator.py398
-rwxr-xr-xtools/net/ynl/pyynl/ynl_gen_c.py2
-rwxr-xr-xtools/net/ynl/pyynl/ynl_gen_rst.py384
-rw-r--r--tools/testing/selftests/bpf/bpf_kfuncs.h3
-rw-r--r--tools/testing/selftests/bpf/config1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/dynptr.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c218
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_udp4.c3
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_udp6.c4
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_fail.c258
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_success.c55
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_meta.c419
-rwxr-xr-xtools/testing/selftests/drivers/net/hds.py39
-rw-r--r--tools/testing/selftests/drivers/net/hw/Makefile1
-rw-r--r--tools/testing/selftests/drivers/net/hw/config2
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/devmem.py14
-rw-r--r--tools/testing/selftests/drivers/net/hw/ncdevmem.c850
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/rss_ctx.py18
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/rss_flow_label.py167
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/tso.py11
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/__init__.py2
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/env.py41
-rw-r--r--tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh10
-rwxr-xr-xtools/testing/selftests/drivers/net/napi_threaded.py34
-rwxr-xr-xtools/testing/selftests/drivers/net/netcons_cmdline.sh55
-rwxr-xr-xtools/testing/selftests/drivers/net/xdp.py75
-rw-r--r--tools/testing/selftests/net/Makefile1
-rw-r--r--tools/testing/selftests/net/af_unix/Makefile2
-rw-r--r--tools/testing/selftests/net/af_unix/scm_inq.c26
-rw-r--r--tools/testing/selftests/net/af_unix/scm_pidfd.c2
-rw-r--r--tools/testing/selftests/net/af_unix/scm_rights.c28
-rwxr-xr-xtools/testing/selftests/net/bpf_offload.py4
-rw-r--r--tools/testing/selftests/net/cmsg_sender.c10
-rw-r--r--tools/testing/selftests/net/config1
-rw-r--r--tools/testing/selftests/net/forwarding/Makefile4
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_activity_notify.sh173
-rw-r--r--tools/testing/selftests/net/lib/py/utils.py45
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh123
-rw-r--r--tools/testing/selftests/net/netfilter/config1
-rw-r--r--tools/testing/selftests/net/openvswitch/ovs-dpctl.py2
-rwxr-xr-xtools/testing/selftests/net/packetdrill/ksft_runner.sh47
-rw-r--r--tools/testing/selftests/net/psock_tpacket.c4
-rwxr-xr-xtools/testing/selftests/net/route_hint.sh79
-rwxr-xr-xtools/testing/selftests/net/rps_default_mask.sh12
-rwxr-xr-xtools/testing/selftests/net/rtnetlink.sh13
-rw-r--r--tools/testing/selftests/net/socket.c11
-rwxr-xr-xtools/testing/selftests/net/test_bridge_backup_port.sh31
-rw-r--r--tools/testing/selftests/net/tls.c5
-rw-r--r--tools/testing/vsock/util.c1
568 files changed, 27247 insertions, 8422 deletions
diff --git a/Documentation/Makefile b/Documentation/Makefile
index b98477df5ddf..820f07e0afe6 100644
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@@ -104,22 +104,6 @@ quiet_cmd_sphinx = SPHINX $@ --> file://$(abspath $(BUILDDIR)/$3/$4)
cp $(if $(patsubst /%,,$(DOCS_CSS)),$(abspath $(srctree)/$(DOCS_CSS)),$(DOCS_CSS)) $(BUILDDIR)/$3/_static/; \
fi
-YNL_INDEX:=$(srctree)/Documentation/networking/netlink_spec/index.rst
-YNL_RST_DIR:=$(srctree)/Documentation/networking/netlink_spec
-YNL_YAML_DIR:=$(srctree)/Documentation/netlink/specs
-YNL_TOOL:=$(srctree)/tools/net/ynl/pyynl/ynl_gen_rst.py
-
-YNL_RST_FILES_TMP := $(patsubst %.yaml,%.rst,$(wildcard $(YNL_YAML_DIR)/*.yaml))
-YNL_RST_FILES := $(patsubst $(YNL_YAML_DIR)%,$(YNL_RST_DIR)%, $(YNL_RST_FILES_TMP))
-
-$(YNL_INDEX): $(YNL_RST_FILES)
- $(Q)$(YNL_TOOL) -o $@ -x
-
-$(YNL_RST_DIR)/%.rst: $(YNL_YAML_DIR)/%.yaml $(YNL_TOOL)
- $(Q)$(YNL_TOOL) -i $< -o $@
-
-htmldocs texinfodocs latexdocs epubdocs xmldocs: $(YNL_INDEX)
-
htmldocs:
@$(srctree)/scripts/sphinx-pre-install --version-check
@+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,html,$(var),,$(var)))
@@ -186,7 +170,6 @@ refcheckdocs:
$(Q)cd $(srctree);scripts/documentation-file-ref-check
cleandocs:
- $(Q)rm -f $(YNL_INDEX) $(YNL_RST_FILES)
$(Q)rm -rf $(BUILDDIR)
$(Q)$(MAKE) BUILDDIR=$(abspath $(BUILDDIR)) $(build)=Documentation/userspace-api/media clean
diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst
index 7b0c4291c686..2ef50828aff1 100644
--- a/Documentation/admin-guide/sysctl/net.rst
+++ b/Documentation/admin-guide/sysctl/net.rst
@@ -222,6 +222,8 @@ rmem_max
The maximum receive socket buffer size in bytes.
+Default: 4194304
+
rps_default_mask
----------------
@@ -247,6 +249,8 @@ wmem_max
The maximum send socket buffer size in bytes.
+Default: 4194304
+
message_burst and message_cost
------------------------------
diff --git a/Documentation/conf.py b/Documentation/conf.py
index 700516238d3f..f9828f3862f9 100644
--- a/Documentation/conf.py
+++ b/Documentation/conf.py
@@ -42,6 +42,15 @@ exclude_patterns = []
dyn_include_patterns = []
dyn_exclude_patterns = ["output"]
+# Currently, only netlink/specs has a parser for yaml.
+# Prefer using include patterns if available, as it is faster
+if has_include_patterns:
+ dyn_include_patterns.append("netlink/specs/*.yaml")
+else:
+ dyn_exclude_patterns.append("netlink/*.yaml")
+ dyn_exclude_patterns.append("devicetree/bindings/**.yaml")
+ dyn_exclude_patterns.append("core-api/kho/bindings/**.yaml")
+
# Properly handle include/exclude patterns
# ----------------------------------------
@@ -102,12 +111,12 @@ extensions = [
"kernel_include",
"kfigure",
"maintainers_include",
+ "parser_yaml",
"rstFlatTable",
"sphinx.ext.autosectionlabel",
"sphinx.ext.ifconfig",
"translations",
]
-
# Since Sphinx version 3, the C function parser is more pedantic with regards
# to type checking. Due to that, having macros at c:function cause problems.
# Those needed to be escaped by using c_id_attributes[] array
@@ -204,10 +213,11 @@ else:
# Add any paths that contain templates here, relative to this directory.
templates_path = ["sphinx/templates"]
-# The suffix(es) of source filenames.
-# You can specify multiple suffix as a list of string:
-# source_suffix = ['.rst', '.md']
-source_suffix = '.rst'
+# The suffixes of source filenames that will be automatically parsed
+source_suffix = {
+ ".rst": "restructuredtext",
+ ".yaml": "yaml",
+}
# The encoding of source files.
# source_encoding = 'utf-8-sig'
diff --git a/Documentation/devicetree/bindings/net/airoha,en7581-npu.yaml b/Documentation/devicetree/bindings/net/airoha,en7581-npu.yaml
index 76dd97c3fb40..c7644e6586d3 100644
--- a/Documentation/devicetree/bindings/net/airoha,en7581-npu.yaml
+++ b/Documentation/devicetree/bindings/net/airoha,en7581-npu.yaml
@@ -41,9 +41,21 @@ properties:
- description: wlan irq line5
memory-region:
- maxItems: 1
- description:
- Memory used to store NPU firmware binary.
+ oneOf:
+ - items:
+ - description: NPU firmware binary region
+ - items:
+ - description: NPU firmware binary region
+ - description: NPU wlan offload RX buffers region
+ - description: NPU wlan offload TX buffers region
+ - description: NPU wlan offload TX packet identifiers region
+
+ memory-region-names:
+ items:
+ - const: firmware
+ - const: pkt
+ - const: tx-pkt
+ - const: tx-bufid
required:
- compatible
@@ -79,6 +91,8 @@ examples:
<GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 122 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 123 IRQ_TYPE_LEVEL_HIGH>;
- memory-region = <&npu_binary>;
+ memory-region = <&npu_firmware>, <&npu_pkt>, <&npu_txpkt>,
+ <&npu_txbufid>;
+ memory-region-names = "firmware", "pkt", "tx-pkt", "tx-bufid";
};
};
diff --git a/Documentation/devicetree/bindings/net/allwinner,sun4i-a10-emac.yaml b/Documentation/devicetree/bindings/net/allwinner,sun4i-a10-emac.yaml
index eb26623dab51..d4d8f3a7918e 100644
--- a/Documentation/devicetree/bindings/net/allwinner,sun4i-a10-emac.yaml
+++ b/Documentation/devicetree/bindings/net/allwinner,sun4i-a10-emac.yaml
@@ -33,6 +33,15 @@ properties:
- items:
- description: phandle to SRAM
- description: register value for device
+ dmas:
+ items:
+ - description: RX DMA Channel
+ - description: TX DMA Channel
+
+ dma-names:
+ items:
+ - const: rx
+ - const: tx
required:
- compatible
diff --git a/Documentation/devicetree/bindings/net/altr,socfpga-stmmac.yaml b/Documentation/devicetree/bindings/net/altr,socfpga-stmmac.yaml
index 3a22d35db778..fc445ad5a1f1 100644
--- a/Documentation/devicetree/bindings/net/altr,socfpga-stmmac.yaml
+++ b/Documentation/devicetree/bindings/net/altr,socfpga-stmmac.yaml
@@ -62,6 +62,13 @@ properties:
- const: stmmaceth
- const: ptp_ref
+ interrupts:
+ maxItems: 1
+
+ interrupt-names:
+ items:
+ - const: macirq
+
iommus:
minItems: 1
maxItems: 2
diff --git a/Documentation/devicetree/bindings/net/ethernet-controller.yaml b/Documentation/devicetree/bindings/net/ethernet-controller.yaml
index 66b1cfbbfe22..2c924d296a8f 100644
--- a/Documentation/devicetree/bindings/net/ethernet-controller.yaml
+++ b/Documentation/devicetree/bindings/net/ethernet-controller.yaml
@@ -108,6 +108,11 @@ properties:
$ref: "#/properties/phy-handle"
deprecated: true
+ ptp-timer:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ Specifies a reference to a node representing an IEEE 1588 PTP device.
+
rx-fifo-depth:
$ref: /schemas/types.yaml#/definitions/uint32
description:
diff --git a/Documentation/devicetree/bindings/net/fsl,fman-dtsec.yaml b/Documentation/devicetree/bindings/net/fsl,fman-dtsec.yaml
index 60aaf30d68ed..ef1e30a48c91 100644
--- a/Documentation/devicetree/bindings/net/fsl,fman-dtsec.yaml
+++ b/Documentation/devicetree/bindings/net/fsl,fman-dtsec.yaml
@@ -81,10 +81,6 @@ properties:
An array of two references: the first is the FMan RX port and the second
is the TX port used by this MAC.
- ptp-timer:
- $ref: /schemas/types.yaml#/definitions/phandle
- description: A reference to the IEEE1588 timer
-
phys:
description: A reference to the SerDes lane(s)
maxItems: 1
diff --git a/Documentation/devicetree/bindings/net/litex,liteeth.yaml b/Documentation/devicetree/bindings/net/litex,liteeth.yaml
index ebf4e360f8dd..200b198b0d9b 100644
--- a/Documentation/devicetree/bindings/net/litex,liteeth.yaml
+++ b/Documentation/devicetree/bindings/net/litex,liteeth.yaml
@@ -86,14 +86,12 @@ examples:
phy-handle = <&eth_phy>;
mdio {
- #address-cells = <1>;
- #size-cells = <0>;
+ #address-cells = <1>;
+ #size-cells = <0>;
- eth_phy: ethernet-phy@0 {
- reg = <0>;
- };
+ eth_phy: ethernet-phy@0 {
+ reg = <0>;
+ };
};
};
...
-
-# vim: set ts=2 sw=2 sts=2 tw=80 et cc=80 ft=yaml :
diff --git a/Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml b/Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml
index a73fc5036905..082982c59a55 100644
--- a/Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml
+++ b/Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml
@@ -245,4 +245,3 @@ examples:
};
...
-# vim: set ts=2 sw=2 sts=2 tw=80 et cc=80 ft=yaml :
diff --git a/Documentation/devicetree/bindings/net/nfc/ti,trf7970a.yaml b/Documentation/devicetree/bindings/net/nfc/ti,trf7970a.yaml
index 5f49bd9ac5e6..7e96a625f0cf 100644
--- a/Documentation/devicetree/bindings/net/nfc/ti,trf7970a.yaml
+++ b/Documentation/devicetree/bindings/net/nfc/ti,trf7970a.yaml
@@ -56,10 +56,10 @@ properties:
Regulator for supply voltage to VIN pin
ti,rx-gain-reduction-db:
- $ref: /schemas/types.yaml#/definitions/uint32
description: |
Specify an RX gain reduction to reduce antenna sensitivity with 5dB per
- increment, with a maximum of 15dB. Supported values: [0, 5, 10, 15].
+ increment, with a maximum of 15dB.
+ enum: [ 0, 5, 10, 15]
required:
- compatible
diff --git a/Documentation/devicetree/bindings/net/pse-pd/skyworks,si3474.yaml b/Documentation/devicetree/bindings/net/pse-pd/skyworks,si3474.yaml
new file mode 100644
index 000000000000..edd36a43a387
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/pse-pd/skyworks,si3474.yaml
@@ -0,0 +1,144 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/pse-pd/skyworks,si3474.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Skyworks Si3474 Power Sourcing Equipment controller
+
+maintainers:
+ - Piotr Kubik <piotr.kubik@adtran.com>
+
+allOf:
+ - $ref: pse-controller.yaml#
+
+properties:
+ compatible:
+ enum:
+ - skyworks,si3474
+
+ reg:
+ maxItems: 2
+
+ reg-names:
+ items:
+ - const: main
+ - const: secondary
+
+ channels:
+ description: The Si3474 is a single-chip PoE PSE controller managing
+ 8 physical power delivery channels. Internally, it's structured
+ into two logical "Quads".
+ Quad 0 Manages physical channels ('ports' in datasheet) 0, 1, 2, 3
+ Quad 1 Manages physical channels ('ports' in datasheet) 4, 5, 6, 7.
+
+ type: object
+ additionalProperties: false
+
+ properties:
+ "#address-cells":
+ const: 1
+
+ "#size-cells":
+ const: 0
+
+ patternProperties:
+ '^channel@[0-7]$':
+ type: object
+ additionalProperties: false
+
+ properties:
+ reg:
+ maxItems: 1
+
+ required:
+ - reg
+
+ required:
+ - "#address-cells"
+ - "#size-cells"
+
+required:
+ - compatible
+ - reg
+ - pse-pis
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ ethernet-pse@26 {
+ compatible = "skyworks,si3474";
+ reg-names = "main", "secondary";
+ reg = <0x26>, <0x27>;
+
+ channels {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ phys0_0: channel@0 {
+ reg = <0>;
+ };
+ phys0_1: channel@1 {
+ reg = <1>;
+ };
+ phys0_2: channel@2 {
+ reg = <2>;
+ };
+ phys0_3: channel@3 {
+ reg = <3>;
+ };
+ phys0_4: channel@4 {
+ reg = <4>;
+ };
+ phys0_5: channel@5 {
+ reg = <5>;
+ };
+ phys0_6: channel@6 {
+ reg = <6>;
+ };
+ phys0_7: channel@7 {
+ reg = <7>;
+ };
+ };
+ pse-pis {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ pse_pi0: pse-pi@0 {
+ reg = <0>;
+ #pse-cells = <0>;
+ pairset-names = "alternative-a", "alternative-b";
+ pairsets = <&phys0_0>, <&phys0_1>;
+ polarity-supported = "MDI-X", "S";
+ vpwr-supply = <&reg_pse>;
+ };
+ pse_pi1: pse-pi@1 {
+ reg = <1>;
+ #pse-cells = <0>;
+ pairset-names = "alternative-a", "alternative-b";
+ pairsets = <&phys0_2>, <&phys0_3>;
+ polarity-supported = "MDI-X", "S";
+ vpwr-supply = <&reg_pse>;
+ };
+ pse_pi2: pse-pi@2 {
+ reg = <2>;
+ #pse-cells = <0>;
+ pairset-names = "alternative-a", "alternative-b";
+ pairsets = <&phys0_4>, <&phys0_5>;
+ polarity-supported = "MDI-X", "S";
+ vpwr-supply = <&reg_pse>;
+ };
+ pse_pi3: pse-pi@3 {
+ reg = <3>;
+ #pse-cells = <0>;
+ pairset-names = "alternative-a", "alternative-b";
+ pairsets = <&phys0_6>, <&phys0_7>;
+ polarity-supported = "MDI-X", "S";
+ vpwr-supply = <&reg_pse>;
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/net/qcom,ipq9574-ppe.yaml b/Documentation/devicetree/bindings/net/qcom,ipq9574-ppe.yaml
new file mode 100644
index 000000000000..753f370b7605
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/qcom,ipq9574-ppe.yaml
@@ -0,0 +1,533 @@
+# SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/qcom,ipq9574-ppe.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm IPQ packet process engine (PPE)
+
+maintainers:
+ - Luo Jie <quic_luoj@quicinc.com>
+ - Lei Wei <quic_leiwei@quicinc.com>
+ - Suruchi Agarwal <quic_suruchia@quicinc.com>
+ - Pavithra R <quic_pavir@quicinc.com>
+
+description: |
+ The Ethernet functionality in the PPE (Packet Process Engine) is comprised
+ of three components, the switch core, port wrapper and Ethernet DMA.
+
+ The Switch core in the IPQ9574 PPE has maximum of 6 front panel ports and
+ two FIFO interfaces. One of the two FIFO interfaces is used for Ethernet
+ port to host CPU communication using Ethernet DMA. The other is used
+ communicating to the EIP engine which is used for IPsec offload. On the
+ IPQ9574, the PPE includes 6 GMAC/XGMACs that can be connected with external
+ Ethernet PHY. Switch core also includes BM (Buffer Management), QM (Queue
+ Management) and SCH (Scheduler) modules for supporting the packet processing.
+
+ The port wrapper provides connections from the 6 GMAC/XGMACS to UNIPHY (PCS)
+ supporting various modes such as SGMII/QSGMII/PSGMII/USXGMII/10G-BASER. There
+ are 3 UNIPHY (PCS) instances supported on the IPQ9574.
+
+ Ethernet DMA is used to transmit and receive packets between the six Ethernet
+ ports and ARM host CPU.
+
+ The follow diagram shows the PPE hardware block along with its connectivity
+ to the external hardware blocks such clock hardware blocks (CMNPLL, GCC,
+ NSS clock controller) and Ethernet PCS/PHY blocks. For depicting the PHY
+ connectivity, one 4x1 Gbps PHY (QCA8075) and two 10 GBps PHYs are used as an
+ example.
+
+ +---------+
+ | 48 MHZ |
+ +----+----+
+ |(clock)
+ v
+ +----+----+
+ +------| CMN PLL |
+ | +----+----+
+ | |(clock)
+ | v
+ | +----+----+ +----+----+ (clock) +----+----+
+ | +---| NSSCC | | GCC |--------->| MDIO |
+ | | +----+----+ +----+----+ +----+----+
+ | | |(clock & reset) |(clock)
+ | | v v
+ | | +----+---------------------+--+----------+----------+---------+
+ | | | +-----+ |EDMA FIFO | | EIP FIFO|
+ | | | | SCH | +----------+ +---------+
+ | | | +-----+ | | |
+ | | | +------+ +------+ +-------------------+ |
+ | | | | BM | | QM | IPQ9574-PPE | L2/L3 Process | |
+ | | | +------+ +------+ +-------------------+ |
+ | | | | |
+ | | | +-------+ +-------+ +-------+ +-------+ +-------+ +-------+ |
+ | | | | MAC0 | | MAC1 | | MAC2 | | MAC3 | | XGMAC4| |XGMAC5 | |
+ | | | +---+---+ +---+---+ +---+---+ +---+---+ +---+---+ +---+---+ |
+ | | | | | | | | | |
+ | | +-----+---------+---------+---------+---------+---------+-----+
+ | | | | | | | |
+ | | +---+---------+---------+---------+---+ +---+---+ +---+---+
+ +--+---->| PCS0 | | PCS1 | | PCS2 |
+ |(clock) +---+---------+---------+---------+---+ +---+---+ +---+---+
+ | | | | | | |
+ | +---+---------+---------+---------+---+ +---+---+ +---+---+
+ +------->| QCA8075 PHY | | PHY4 | | PHY5 |
+ (clock) +-------------------------------------+ +-------+ +-------+
+
+properties:
+ compatible:
+ enum:
+ - qcom,ipq9574-ppe
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: PPE core clock
+ - description: PPE APB (Advanced Peripheral Bus) clock
+ - description: PPE IPE (Ingress Process Engine) clock
+ - description: PPE BM, QM and scheduler clock
+
+ clock-names:
+ items:
+ - const: ppe
+ - const: apb
+ - const: ipe
+ - const: btq
+
+ resets:
+ maxItems: 1
+ description: PPE reset, which is necessary before configuring PPE hardware
+
+ interrupts:
+ maxItems: 1
+ description: PPE switch miscellaneous interrupt
+
+ interconnects:
+ items:
+ - description: Bus interconnect path leading to PPE switch core function
+ - description: Bus interconnect path leading to PPE register access
+ - description: Bus interconnect path leading to QoS generation
+ - description: Bus interconnect path leading to timeout reference
+ - description: Bus interconnect path leading to NSS NOC from memory NOC
+ - description: Bus interconnect path leading to memory NOC from NSS NOC
+ - description: Bus interconnect path leading to enhanced memory NOC from NSS NOC
+
+ interconnect-names:
+ items:
+ - const: ppe
+ - const: ppe_cfg
+ - const: qos_gen
+ - const: timeout_ref
+ - const: nssnoc_memnoc
+ - const: memnoc_nssnoc
+ - const: memnoc_nssnoc_1
+
+ ethernet-dma:
+ type: object
+ additionalProperties: false
+ description:
+ EDMA (Ethernet DMA) is used to transmit packets between PPE and ARM
+ host CPU. There are 32 TX descriptor rings, 32 TX completion rings,
+ 24 RX descriptor rings and 8 RX fill rings supported.
+
+ properties:
+ clocks:
+ items:
+ - description: EDMA system clock
+ - description: EDMA APB (Advanced Peripheral Bus) clock
+
+ clock-names:
+ items:
+ - const: sys
+ - const: apb
+
+ resets:
+ maxItems: 1
+ description: EDMA reset
+
+ interrupts:
+ minItems: 65
+ maxItems: 65
+
+ interrupt-names:
+ minItems: 65
+ maxItems: 65
+ items:
+ oneOf:
+ - pattern: '^txcmpl_([1-2]?[0-9]|3[01])$'
+ - pattern: '^rxfill_[0-7]$'
+ - pattern: '^rxdesc_(1?[0-9]|2[0-3])$'
+ - const: misc
+ description:
+ Interrupts "txcmpl_[0-31]" are the Ethernet DMA TX completion ring interrupts.
+ Interrupts "rxfill_[0-7]" are the Ethernet DMA RX fill ring interrupts.
+ Interrupts "rxdesc_[0-23]" are the Ethernet DMA RX Descriptor ring interrupts.
+ Interrupt "misc" is the Ethernet DMA miscellaneous error interrupt.
+
+ required:
+ - clocks
+ - clock-names
+ - resets
+ - interrupts
+ - interrupt-names
+
+ ethernet-ports:
+ patternProperties:
+ "^ethernet-port@[1-6]+$":
+ type: object
+ unevaluatedProperties: false
+ $ref: ethernet-switch-port.yaml#
+
+ properties:
+ reg:
+ minimum: 1
+ maximum: 6
+ description: PPE Ethernet port ID
+
+ clocks:
+ items:
+ - description: Port MAC clock
+ - description: Port RX clock
+ - description: Port TX clock
+
+ clock-names:
+ items:
+ - const: mac
+ - const: rx
+ - const: tx
+
+ resets:
+ items:
+ - description: Port MAC reset
+ - description: Port RX reset
+ - description: Port TX reset
+
+ reset-names:
+ items:
+ - const: mac
+ - const: rx
+ - const: tx
+
+ required:
+ - reg
+ - clocks
+ - clock-names
+ - resets
+ - reset-names
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - resets
+ - interconnects
+ - interconnect-names
+ - ethernet-dma
+
+allOf:
+ - $ref: ethernet-switch.yaml
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,ipq9574-gcc.h>
+ #include <dt-bindings/clock/qcom,ipq9574-nsscc.h>
+ #include <dt-bindings/interconnect/qcom,ipq9574.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/reset/qcom,ipq9574-nsscc.h>
+
+ ethernet-switch@3a000000 {
+ compatible = "qcom,ipq9574-ppe";
+ reg = <0x3a000000 0xbef800>;
+ clocks = <&nsscc NSS_CC_PPE_SWITCH_CLK>,
+ <&nsscc NSS_CC_PPE_SWITCH_CFG_CLK>,
+ <&nsscc NSS_CC_PPE_SWITCH_IPE_CLK>,
+ <&nsscc NSS_CC_PPE_SWITCH_BTQ_CLK>;
+ clock-names = "ppe",
+ "apb",
+ "ipe",
+ "btq";
+ resets = <&nsscc PPE_FULL_RESET>;
+ interrupts = <GIC_SPI 498 IRQ_TYPE_LEVEL_HIGH>;
+ interconnects = <&nsscc MASTER_NSSNOC_PPE &nsscc SLAVE_NSSNOC_PPE>,
+ <&nsscc MASTER_NSSNOC_PPE_CFG &nsscc SLAVE_NSSNOC_PPE_CFG>,
+ <&gcc MASTER_NSSNOC_QOSGEN_REF &gcc SLAVE_NSSNOC_QOSGEN_REF>,
+ <&gcc MASTER_NSSNOC_TIMEOUT_REF &gcc SLAVE_NSSNOC_TIMEOUT_REF>,
+ <&gcc MASTER_MEM_NOC_NSSNOC &gcc SLAVE_MEM_NOC_NSSNOC>,
+ <&gcc MASTER_NSSNOC_MEMNOC &gcc SLAVE_NSSNOC_MEMNOC>,
+ <&gcc MASTER_NSSNOC_MEM_NOC_1 &gcc SLAVE_NSSNOC_MEM_NOC_1>;
+ interconnect-names = "ppe",
+ "ppe_cfg",
+ "qos_gen",
+ "timeout_ref",
+ "nssnoc_memnoc",
+ "memnoc_nssnoc",
+ "memnoc_nssnoc_1";
+
+ ethernet-dma {
+ clocks = <&nsscc NSS_CC_PPE_EDMA_CLK>,
+ <&nsscc NSS_CC_PPE_EDMA_CFG_CLK>;
+ clock-names = "sys",
+ "apb";
+ resets = <&nsscc EDMA_HW_RESET>;
+ interrupts = <GIC_SPI 363 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 364 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 365 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 366 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 367 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 368 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 369 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 370 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 371 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 372 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 373 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 374 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 375 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 376 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 377 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 378 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 379 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 380 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 381 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 382 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 383 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 384 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 509 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 508 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 507 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 506 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 505 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 504 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 503 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 502 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 501 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 500 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 355 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 356 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 357 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 358 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 359 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 360 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 361 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 362 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 331 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 332 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 333 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 334 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 335 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 336 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 337 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 338 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 339 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 340 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 341 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 342 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 343 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 344 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 345 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 346 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 347 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 348 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 349 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 350 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 351 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 352 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 353 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 354 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 499 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "txcmpl_0",
+ "txcmpl_1",
+ "txcmpl_2",
+ "txcmpl_3",
+ "txcmpl_4",
+ "txcmpl_5",
+ "txcmpl_6",
+ "txcmpl_7",
+ "txcmpl_8",
+ "txcmpl_9",
+ "txcmpl_10",
+ "txcmpl_11",
+ "txcmpl_12",
+ "txcmpl_13",
+ "txcmpl_14",
+ "txcmpl_15",
+ "txcmpl_16",
+ "txcmpl_17",
+ "txcmpl_18",
+ "txcmpl_19",
+ "txcmpl_20",
+ "txcmpl_21",
+ "txcmpl_22",
+ "txcmpl_23",
+ "txcmpl_24",
+ "txcmpl_25",
+ "txcmpl_26",
+ "txcmpl_27",
+ "txcmpl_28",
+ "txcmpl_29",
+ "txcmpl_30",
+ "txcmpl_31",
+ "rxfill_0",
+ "rxfill_1",
+ "rxfill_2",
+ "rxfill_3",
+ "rxfill_4",
+ "rxfill_5",
+ "rxfill_6",
+ "rxfill_7",
+ "rxdesc_0",
+ "rxdesc_1",
+ "rxdesc_2",
+ "rxdesc_3",
+ "rxdesc_4",
+ "rxdesc_5",
+ "rxdesc_6",
+ "rxdesc_7",
+ "rxdesc_8",
+ "rxdesc_9",
+ "rxdesc_10",
+ "rxdesc_11",
+ "rxdesc_12",
+ "rxdesc_13",
+ "rxdesc_14",
+ "rxdesc_15",
+ "rxdesc_16",
+ "rxdesc_17",
+ "rxdesc_18",
+ "rxdesc_19",
+ "rxdesc_20",
+ "rxdesc_21",
+ "rxdesc_22",
+ "rxdesc_23",
+ "misc";
+ };
+
+ ethernet-ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ ethernet-port@1 {
+ reg = <1>;
+ phy-mode = "qsgmii";
+ managed = "in-band-status";
+ phy-handle = <&phy0>;
+ pcs-handle = <&pcs0_ch0>;
+ clocks = <&nsscc NSS_CC_PORT1_MAC_CLK>,
+ <&nsscc NSS_CC_PORT1_RX_CLK>,
+ <&nsscc NSS_CC_PORT1_TX_CLK>;
+ clock-names = "mac",
+ "rx",
+ "tx";
+ resets = <&nsscc PORT1_MAC_ARES>,
+ <&nsscc PORT1_RX_ARES>,
+ <&nsscc PORT1_TX_ARES>;
+ reset-names = "mac",
+ "rx",
+ "tx";
+ };
+
+ ethernet-port@2 {
+ reg = <2>;
+ phy-mode = "qsgmii";
+ managed = "in-band-status";
+ phy-handle = <&phy1>;
+ pcs-handle = <&pcs0_ch1>;
+ clocks = <&nsscc NSS_CC_PORT2_MAC_CLK>,
+ <&nsscc NSS_CC_PORT2_RX_CLK>,
+ <&nsscc NSS_CC_PORT2_TX_CLK>;
+ clock-names = "mac",
+ "rx",
+ "tx";
+ resets = <&nsscc PORT2_MAC_ARES>,
+ <&nsscc PORT2_RX_ARES>,
+ <&nsscc PORT2_TX_ARES>;
+ reset-names = "mac",
+ "rx",
+ "tx";
+ };
+
+ ethernet-port@3 {
+ reg = <3>;
+ phy-mode = "qsgmii";
+ managed = "in-band-status";
+ phy-handle = <&phy2>;
+ pcs-handle = <&pcs0_ch2>;
+ clocks = <&nsscc NSS_CC_PORT3_MAC_CLK>,
+ <&nsscc NSS_CC_PORT3_RX_CLK>,
+ <&nsscc NSS_CC_PORT3_TX_CLK>;
+ clock-names = "mac",
+ "rx",
+ "tx";
+ resets = <&nsscc PORT3_MAC_ARES>,
+ <&nsscc PORT3_RX_ARES>,
+ <&nsscc PORT3_TX_ARES>;
+ reset-names = "mac",
+ "rx",
+ "tx";
+ };
+
+ ethernet-port@4 {
+ reg = <4>;
+ phy-mode = "qsgmii";
+ managed = "in-band-status";
+ phy-handle = <&phy3>;
+ pcs-handle = <&pcs0_ch3>;
+ clocks = <&nsscc NSS_CC_PORT4_MAC_CLK>,
+ <&nsscc NSS_CC_PORT4_RX_CLK>,
+ <&nsscc NSS_CC_PORT4_TX_CLK>;
+ clock-names = "mac",
+ "rx",
+ "tx";
+ resets = <&nsscc PORT4_MAC_ARES>,
+ <&nsscc PORT4_RX_ARES>,
+ <&nsscc PORT4_TX_ARES>;
+ reset-names = "mac",
+ "rx",
+ "tx";
+ };
+
+ ethernet-port@5 {
+ reg = <5>;
+ phy-mode = "usxgmii";
+ managed = "in-band-status";
+ phy-handle = <&phy4>;
+ pcs-handle = <&pcs1_ch0>;
+ clocks = <&nsscc NSS_CC_PORT5_MAC_CLK>,
+ <&nsscc NSS_CC_PORT5_RX_CLK>,
+ <&nsscc NSS_CC_PORT5_TX_CLK>;
+ clock-names = "mac",
+ "rx",
+ "tx";
+ resets = <&nsscc PORT5_MAC_ARES>,
+ <&nsscc PORT5_RX_ARES>,
+ <&nsscc PORT5_TX_ARES>;
+ reset-names = "mac",
+ "rx",
+ "tx";
+ };
+
+ ethernet-port@6 {
+ reg = <6>;
+ phy-mode = "usxgmii";
+ managed = "in-band-status";
+ phy-handle = <&phy5>;
+ pcs-handle = <&pcs2_ch0>;
+ clocks = <&nsscc NSS_CC_PORT6_MAC_CLK>,
+ <&nsscc NSS_CC_PORT6_RX_CLK>,
+ <&nsscc NSS_CC_PORT6_TX_CLK>;
+ clock-names = "mac",
+ "rx",
+ "tx";
+ resets = <&nsscc PORT6_MAC_ARES>,
+ <&nsscc PORT6_RX_ARES>,
+ <&nsscc PORT6_TX_ARES>;
+ reset-names = "mac",
+ "rx",
+ "tx";
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/net/realtek,rtl82xx.yaml b/Documentation/devicetree/bindings/net/realtek,rtl82xx.yaml
index d248a08a2136..2b5697bd7c5d 100644
--- a/Documentation/devicetree/bindings/net/realtek,rtl82xx.yaml
+++ b/Documentation/devicetree/bindings/net/realtek,rtl82xx.yaml
@@ -45,12 +45,16 @@ properties:
description:
Disable CLKOUT clock, CLKOUT clock default is enabled after hardware reset.
-
realtek,aldps-enable:
type: boolean
description:
Enable ALDPS mode, ALDPS mode default is disabled after hardware reset.
+ wakeup-source:
+ type: boolean
+ description:
+ Enable Wake-on-LAN support for the RTL8211F PHY.
+
unevaluatedProperties: false
allOf:
diff --git a/Documentation/devicetree/bindings/net/renesas,rzn1-gmac.yaml b/Documentation/devicetree/bindings/net/renesas,rzn1-gmac.yaml
index d9a8d586e260..16dd7a2631ab 100644
--- a/Documentation/devicetree/bindings/net/renesas,rzn1-gmac.yaml
+++ b/Documentation/devicetree/bindings/net/renesas,rzn1-gmac.yaml
@@ -30,6 +30,15 @@ properties:
- const: renesas,rzn1-gmac
- const: snps,dwmac
+ interrupts:
+ maxItems: 3
+
+ interrupt-names:
+ items:
+ - const: macirq
+ - const: eth_wake_irq
+ - const: eth_lpi
+
pcs-handle:
description:
phandle pointing to a PCS sub-node compatible with
diff --git a/Documentation/devicetree/bindings/ptp/nxp,ptp-netc.yaml b/Documentation/devicetree/bindings/ptp/nxp,ptp-netc.yaml
new file mode 100644
index 000000000000..042de9d5a92b
--- /dev/null
+++ b/Documentation/devicetree/bindings/ptp/nxp,ptp-netc.yaml
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ptp/nxp,ptp-netc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NXP NETC V4 Timer PTP clock
+
+description:
+ NETC V4 Timer provides current time with nanosecond resolution, precise
+ periodic pulse, pulse on timeout (alarm), and time capture on external
+ pulse support. And it supports time synchronization as required for
+ IEEE 1588 and IEEE 802.1AS-2020.
+
+maintainers:
+ - Wei Fang <wei.fang@nxp.com>
+ - Clark Wang <xiaoning.wang@nxp.com>
+
+properties:
+ compatible:
+ enum:
+ - pci1131,ee02
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+ description:
+ The reference clock of NETC Timer, can be selected between 3 different
+ clock sources using an integrated hardware mux TMR_CTRL[CK_SEL].
+ The "ccm" means the reference clock comes from CCM of SoC.
+ The "ext" means the reference clock comes from external IO pins.
+ If not present, indicates that the system clock of NETC IP is selected
+ as the reference clock.
+
+ clock-names:
+ enum:
+ - ccm
+ - ext
+
+required:
+ - compatible
+ - reg
+
+allOf:
+ - $ref: /schemas/pci/pci-device.yaml
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ pcie {
+ #address-cells = <3>;
+ #size-cells = <2>;
+
+ ptp-timer@18,0 {
+ compatible = "pci1131,ee02";
+ reg = <0x00c000 0 0 0 0>;
+ clocks = <&scmi_clk 18>;
+ clock-names = "ccm";
+ };
+ };
diff --git a/Documentation/netlink/specs/devlink.yaml b/Documentation/netlink/specs/devlink.yaml
index bb87111d5e16..3db59c965869 100644
--- a/Documentation/netlink/specs/devlink.yaml
+++ b/Documentation/netlink/specs/devlink.yaml
@@ -853,6 +853,10 @@ attribute-sets:
type: nest
multi-attr: true
nested-attributes: dl-rate-tc-bws
+ -
+ name: health-reporter-burst-period
+ type: u64
+ doc: Time (in msec) for recoveries before starting the grace period.
-
name: dl-dev-stats
subset-of: devlink
@@ -1216,6 +1220,8 @@ attribute-sets:
name: health-reporter-dump-ts-ns
-
name: health-reporter-auto-dump
+ -
+ name: health-reporter-burst-period
-
name: dl-attr-stats
@@ -1961,6 +1967,7 @@ operations:
- health-reporter-graceful-period
- health-reporter-auto-recover
- health-reporter-auto-dump
+ - health-reporter-burst-period
-
name: health-reporter-recover
diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml
index 1bc1bd7d33c2..7a7594713f1f 100644
--- a/Documentation/netlink/specs/ethtool.yaml
+++ b/Documentation/netlink/specs/ethtool.yaml
@@ -205,6 +205,9 @@ definitions:
-
name: gtp-teid
-
+ name: ip6-fl
+ doc: IPv6 Flow Label
+ -
name: discard
value: 31
diff --git a/Documentation/netlink/specs/fou.yaml b/Documentation/netlink/specs/fou.yaml
index 57735726262e..8e7974ec453f 100644
--- a/Documentation/netlink/specs/fou.yaml
+++ b/Documentation/netlink/specs/fou.yaml
@@ -52,7 +52,7 @@ attribute-sets:
name: local-v6
type: binary
checks:
- min-len: 16
+ exact-len: 16
-
name: peer-v4
type: u32
@@ -60,7 +60,7 @@ attribute-sets:
name: peer-v6
type: binary
checks:
- min-len: 16
+ exact-len: 16
-
name: peer-port
type: u16
diff --git a/Documentation/netlink/specs/index.rst b/Documentation/netlink/specs/index.rst
new file mode 100644
index 000000000000..7f7cf4a096f2
--- /dev/null
+++ b/Documentation/netlink/specs/index.rst
@@ -0,0 +1,13 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. _specs:
+
+=============================
+Netlink Family Specifications
+=============================
+
+.. toctree::
+ :maxdepth: 1
+ :glob:
+
+ *
diff --git a/Documentation/networking/bonding.rst b/Documentation/networking/bonding.rst
index f8f5766703d4..a2b42ae719d2 100644
--- a/Documentation/networking/bonding.rst
+++ b/Documentation/networking/bonding.rst
@@ -582,10 +582,8 @@ miimon
This determines how often the link state of each slave is
inspected for link failures. A value of zero disables MII
link monitoring. A value of 100 is a good starting point.
- The use_carrier option, below, affects how the link state is
- determined. See the High Availability section for additional
- information. The default value is 100 if arp_interval is not
- set.
+
+ The default value is 100 if arp_interval is not set.
min_links
@@ -896,25 +894,14 @@ updelay
use_carrier
- Specifies whether or not miimon should use MII or ETHTOOL
- ioctls vs. netif_carrier_ok() to determine the link
- status. The MII or ETHTOOL ioctls are less efficient and
- utilize a deprecated calling sequence within the kernel. The
- netif_carrier_ok() relies on the device driver to maintain its
- state with netif_carrier_on/off; at this writing, most, but
- not all, device drivers support this facility.
-
- If bonding insists that the link is up when it should not be,
- it may be that your network device driver does not support
- netif_carrier_on/off. The default state for netif_carrier is
- "carrier on," so if a driver does not support netif_carrier,
- it will appear as if the link is always up. In this case,
- setting use_carrier to 0 will cause bonding to revert to the
- MII / ETHTOOL ioctl method to determine the link state.
-
- A value of 1 enables the use of netif_carrier_ok(), a value of
- 0 will use the deprecated MII / ETHTOOL ioctls. The default
- value is 1.
+ Obsolete option that previously selected between MII /
+ ETHTOOL ioctls and netif_carrier_ok() to determine link
+ state.
+
+ All link state checks are now done with netif_carrier_ok().
+
+ For backwards compatibility, this option's value may be inspected
+ or set. The only valid setting is 1.
xmit_hash_policy
@@ -2036,22 +2023,8 @@ depending upon the device driver to maintain its carrier state, by
querying the device's MII registers, or by making an ethtool query to
the device.
-If the use_carrier module parameter is 1 (the default value),
-then the MII monitor will rely on the driver for carrier state
-information (via the netif_carrier subsystem). As explained in the
-use_carrier parameter information, above, if the MII monitor fails to
-detect carrier loss on the device (e.g., when the cable is physically
-disconnected), it may be that the driver does not support
-netif_carrier.
-
-If use_carrier is 0, then the MII monitor will first query the
-device's (via ioctl) MII registers and check the link state. If that
-request fails (not just that it returns carrier down), then the MII
-monitor will make an ethtool ETHTOOL_GLINK request to attempt to obtain
-the same information. If both methods fail (i.e., the driver either
-does not support or had some error in processing both the MII register
-and ethtool requests), then the MII monitor will assume the link is
-up.
+The MII monitor relies on the driver for carrier state information (via
+the netif_carrier subsystem).
8. Potential Sources of Trouble
===============================
@@ -2135,34 +2108,6 @@ This will load tg3 and e1000 modules before loading the bonding one.
Full documentation on this can be found in the modprobe.d and modprobe
manual pages.
-8.3. Painfully Slow Or No Failed Link Detection By Miimon
----------------------------------------------------------
-
-By default, bonding enables the use_carrier option, which
-instructs bonding to trust the driver to maintain carrier state.
-
-As discussed in the options section, above, some drivers do
-not support the netif_carrier_on/_off link state tracking system.
-With use_carrier enabled, bonding will always see these links as up,
-regardless of their actual state.
-
-Additionally, other drivers do support netif_carrier, but do
-not maintain it in real time, e.g., only polling the link state at
-some fixed interval. In this case, miimon will detect failures, but
-only after some long period of time has expired. If it appears that
-miimon is very slow in detecting link failures, try specifying
-use_carrier=0 to see if that improves the failure detection time. If
-it does, then it may be that the driver checks the carrier state at a
-fixed interval, but does not cache the MII register values (so the
-use_carrier=0 method of querying the registers directly works). If
-use_carrier=0 does not improve the failover, then the driver may cache
-the registers, or the problem may be elsewhere.
-
-Also, remember that miimon only checks for the device's
-carrier state. It has no way to determine the state of devices on or
-beyond other ports of a switch, or if a switch is refusing to pass
-traffic while still maintaining carrier on.
-
9. SNMP agents
===============
diff --git a/Documentation/networking/device_drivers/ethernet/index.rst b/Documentation/networking/device_drivers/ethernet/index.rst
index 40ac552641a3..0b0a3eef6aae 100644
--- a/Documentation/networking/device_drivers/ethernet/index.rst
+++ b/Documentation/networking/device_drivers/ethernet/index.rst
@@ -50,6 +50,7 @@ Contents:
neterion/s2io
netronome/nfp
pensando/ionic
+ qualcomm/ppe/ppe
smsc/smc9
stmicro/stmmac
ti/cpsw
diff --git a/Documentation/networking/device_drivers/ethernet/meta/fbnic.rst b/Documentation/networking/device_drivers/ethernet/meta/fbnic.rst
index afb8353daefd..fb6559fa4be4 100644
--- a/Documentation/networking/device_drivers/ethernet/meta/fbnic.rst
+++ b/Documentation/networking/device_drivers/ethernet/meta/fbnic.rst
@@ -160,3 +160,14 @@ behavior and potential performance bottlenecks.
credit exhaustion
- ``pcie_ob_rd_no_np_cred``: Read requests dropped due to non-posted
credit exhaustion
+
+XDP Length Error:
+~~~~~~~~~~~~~~~~~
+
+For XDP programs without frags support, fbnic tries to make sure that MTU fits
+into a single buffer. If an oversized frame is received and gets fragmented,
+it is dropped and the following netlink counters are updated
+
+ - ``rx-length``: number of frames dropped due to lack of fragmentation
+ support in the attached XDP program
+ - ``rx-errors``: total number of packets with errors received on the interface
diff --git a/Documentation/networking/device_drivers/ethernet/qualcomm/ppe/ppe.rst b/Documentation/networking/device_drivers/ethernet/qualcomm/ppe/ppe.rst
new file mode 100644
index 000000000000..4ab299a28969
--- /dev/null
+++ b/Documentation/networking/device_drivers/ethernet/qualcomm/ppe/ppe.rst
@@ -0,0 +1,194 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============================================
+PPE Ethernet Driver for Qualcomm IPQ SoC Family
+===============================================
+
+Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+
+Author: Lei Wei <quic_leiwei@quicinc.com>
+
+
+Contents
+========
+
+- `PPE Overview`_
+- `PPE Driver Overview`_
+- `PPE Driver Supported SoCs`_
+- `Enabling the Driver`_
+- `Debugging`_
+
+
+PPE Overview
+============
+
+IPQ (Qualcomm Internet Processor) SoC (System-on-Chip) series is Qualcomm's series of
+networking SoC for Wi-Fi access points. The PPE (Packet Process Engine) is the Ethernet
+packet process engine in the IPQ SoC.
+
+Below is a simplified hardware diagram of IPQ9574 SoC which includes the PPE engine and
+other blocks which are in the SoC but outside the PPE engine. These blocks work together
+to enable the Ethernet for the IPQ SoC::
+
+ +------+ +------+ +------+ +------+ +------+ +------+ start +-------+
+ |netdev| |netdev| |netdev| |netdev| |netdev| |netdev|<------|PHYLINK|
+ +------+ +------+ +------+ +------+ +------+ +------+ stop +-+-+-+-+
+ | | | ^
+ +-------+ +-------------------------+--------+----------------------+ | | |
+ | GCC | | | EDMA | | | | |
+ +---+---+ | PPE +---+----+ | | | |
+ | clk | | | | | |
+ +-------->| +-----------------------+------+-----+---------------+ | | | |
+ | | Switch Core |Port0 | |Port7(EIP FIFO)| | | | |
+ | | +---+--+ +------+--------+ | | | |
+ | | | | | | | | |
+ +-------+ | | +------+---------------+----+ | | | | |
+ |CMN PLL| | | +---+ +---+ +----+ | +--------+ | | | | | |
+ +---+---+ | | |BM | |QM | |SCH | | | L2/L3 | ....... | | | | | |
+ | | | | +---+ +---+ +----+ | +--------+ | | | | | |
+ | | | | +------+--------------------+ | | | | |
+ | | | | | | | | | |
+ | v | | +-----+-+-----+-+-----+-+-+---+--+-----+-+-----+ | | | | |
+ | +------+ | | |Port1| |Port2| |Port3| |Port4| |Port5| |Port6| | | | | |
+ | |NSSCC | | | +-----+ +-----+ +-----+ +-----+ +-----+ +-----+ | | mac| | |
+ | +-+-+--+ | | |MAC0 | |MAC1 | |MAC2 | |MAC3 | |MAC4 | |MAC5 | | |<---+ | |
+ | ^ | |clk | | +-----+-+-----+-+-----+-+-----+--+-----+-+-----+ | | ops | |
+ | | | +------>| +----|------|-------|-------|---------|--------|-----+ | | |
+ | | | +---------------------------------------------------------+ | |
+ | | | | | | | | | | |
+ | | | MII clk | QSGMII USXGMII USXGMII | |
+ | | +--------------->| | | | | | | |
+ | | +-------------------------+ +---------+ +---------+ | |
+ | |125/312.5MHz clk| (PCS0) | | (PCS1) | | (PCS2) | pcs ops | |
+ | +----------------+ UNIPHY0 | | UNIPHY1 | | UNIPHY2 |<--------+ |
+ +----------------->| | | | | | |
+ | 31.25MHz ref clk +-------------------------+ +---------+ +---------+ |
+ | | | | | | | |
+ | +-----------------------------------------------------+ |
+ |25/50MHz ref clk| +-------------------------+ +------+ +------+ | link |
+ +--------------->| | QUAD PHY | | PHY4 | | PHY5 | |---------+
+ | +-------------------------+ +------+ +------+ | change
+ | |
+ | MDIO bus |
+ +-----------------------------------------------------+
+
+The CMN (Common) PLL, NSSCC (Networking Sub System Clock Controller) and GCC (Global
+Clock Controller) blocks are in the SoC and act as clock providers.
+
+The UNIPHY block is in the SoC and provides the PCS (Physical Coding Sublayer) and
+XPCS (10-Gigabit Physical Coding Sublayer) functions to support different interface
+modes between the PPE MAC and the external PHY.
+
+This documentation focuses on the descriptions of PPE engine and the PPE driver.
+
+The Ethernet functionality in the PPE (Packet Process Engine) is comprised of three
+components: the switch core, port wrapper and Ethernet DMA.
+
+The Switch core in the IPQ9574 PPE has maximum of 6 front panel ports and two FIFO
+interfaces. One of the two FIFO interfaces is used for Ethernet port to host CPU
+communication using Ethernet DMA. The other one is used to communicate to the EIP
+engine which is used for IPsec offload. On the IPQ9574, the PPE includes 6 GMAC/XGMACs
+that can be connected with external Ethernet PHY. Switch core also includes BM (Buffer
+Management), QM (Queue Management) and SCH (Scheduler) modules for supporting the
+packet processing.
+
+The port wrapper provides connections from the 6 GMAC/XGMACS to UNIPHY (PCS) supporting
+various modes such as SGMII/QSGMII/PSGMII/USXGMII/10G-BASER. There are 3 UNIPHY (PCS)
+instances supported on the IPQ9574.
+
+Ethernet DMA is used to transmit and receive packets between the Ethernet subsystem
+and ARM host CPU.
+
+The following lists the main blocks in the PPE engine which will be driven by this
+PPE driver:
+
+- BM
+ BM is the hardware buffer manager for the PPE switch ports.
+- QM
+ Queue Manager for managing the egress hardware queues of the PPE switch ports.
+- SCH
+ The scheduler which manages the hardware traffic scheduling for the PPE switch ports.
+- L2
+ The L2 block performs the packet bridging in the switch core. The bridge domain is
+ represented by the VSI (Virtual Switch Instance) domain in PPE. FDB learning can be
+ enabled based on the VSI domain and bridge forwarding occurs within the VSI domain.
+- MAC
+ The PPE in the IPQ9574 supports up to six MACs (MAC0 to MAC5) which are corresponding
+ to six switch ports (port1 to port6). The MAC block is connected with external PHY
+ through the UNIPHY PCS block. Each MAC block includes the GMAC and XGMAC blocks and
+ the switch port can select to use GMAC or XMAC through a MUX selection according to
+ the external PHY's capability.
+- EDMA (Ethernet DMA)
+ The Ethernet DMA is used to transmit and receive Ethernet packets between the PPE
+ ports and the ARM cores.
+
+The received packet on a PPE MAC port can be forwarded to another PPE MAC port. It can
+be also forwarded to internal switch port0 so that the packet can be delivered to the
+ARM cores using the Ethernet DMA (EDMA) engine. The Ethernet DMA driver will deliver the
+packet to the corresponding 'netdevice' interface.
+
+The software instantiations of the PPE MAC (netdevice), PCS and external PHYs interact
+with the Linux PHYLINK framework to manage the connectivity between the PPE ports and
+the connected PHYs, and the port link states. This is also illustrated in above diagram.
+
+
+PPE Driver Overview
+===================
+PPE driver is Ethernet driver for the Qualcomm IPQ SoC. It is a single platform driver
+which includes the PPE part and Ethernet DMA part. The PPE part initializes and drives the
+various blocks in PPE switch core such as BM/QM/L2 blocks and the PPE MACs. The EDMA part
+drives the Ethernet DMA for packet transfer between PPE ports and ARM cores, and enables
+the netdevice driver for the PPE ports.
+
+The PPE driver files in drivers/net/ethernet/qualcomm/ppe/ are listed as below:
+
+- Makefile
+- ppe.c
+- ppe.h
+- ppe_config.c
+- ppe_config.h
+- ppe_debugfs.c
+- ppe_debugfs.h
+- ppe_regs.h
+
+The ppe.c file contains the main PPE platform driver and undertakes the initialization of
+PPE switch core blocks such as QM, BM and L2. The configuration APIs for these hardware
+blocks are provided in the ppe_config.c file.
+
+The ppe.h defines the PPE device data structure which will be used by PPE driver functions.
+
+The ppe_debugfs.c enables the PPE statistics counters such as PPE port Rx and Tx counters,
+CPU code counters and queue counters.
+
+
+PPE Driver Supported SoCs
+=========================
+
+The PPE driver supports the following IPQ SoC:
+
+- IPQ9574
+
+
+Enabling the Driver
+===================
+
+The driver is located in the menu structure at::
+
+ -> Device Drivers
+ -> Network device support (NETDEVICES [=y])
+ -> Ethernet driver support
+ -> Qualcomm devices
+ -> Qualcomm Technologies, Inc. PPE Ethernet support
+
+If the driver is built as a module, the module will be called qcom-ppe.
+
+The PPE driver functionally depends on the CMN PLL and NSSCC clock controller drivers.
+Please make sure the dependent modules are installed before installing the PPE driver
+module.
+
+
+Debugging
+=========
+
+The PPE hardware counters can be accessed using debugfs interface from the
+``/sys/kernel/debug/ppe/`` directory.
diff --git a/Documentation/networking/devlink/devlink-health.rst b/Documentation/networking/devlink/devlink-health.rst
index e0b8cfed610a..4d10536377ab 100644
--- a/Documentation/networking/devlink/devlink-health.rst
+++ b/Documentation/networking/devlink/devlink-health.rst
@@ -50,7 +50,7 @@ Once an error is reported, devlink health will perform the following actions:
* Auto recovery attempt is being done. Depends on:
- Auto-recovery configuration
- - Grace period vs. time passed since last recover
+ - Grace period (and burst period) vs. time passed since last recover
Devlink formatted message
=========================
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index ac90b82f3ce9..b7a4969e9bc9 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -57,7 +57,7 @@ Contents:
filter
generic-hdlc
generic_netlink
- netlink_spec/index
+ ../netlink/specs/index
gen_stats
gtp
ila
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index 9756d16e3df1..9f5891c9b07b 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -209,7 +209,7 @@ neigh/default/unres_qlen_bytes - INTEGER
Setting negative value is meaningless and will return error.
- Default: SK_WMEM_MAX, (same as net.core.wmem_default).
+ Default: SK_WMEM_DEFAULT, (same as net.core.wmem_default).
Exact value depends on architecture and kernel options,
but should be enough to allow queuing 256 packets
@@ -805,8 +805,8 @@ tcp_rmem - vector of 3 INTEGERs: min, default, max
This value results in initial window of 65535.
max: maximal size of receive buffer allowed for automatically
- selected receiver buffers for TCP socket. This value does not override
- net.core.rmem_max. Calling setsockopt() with SO_RCVBUF disables
+ selected receiver buffers for TCP socket.
+ Calling setsockopt() with SO_RCVBUF disables
automatic tuning of that socket's receive buffer size, in which
case this value is ignored.
Default: between 131072 and 32MB, depending on RAM size.
@@ -3508,16 +3508,10 @@ cookie_hmac_alg - STRING
a listening sctp socket to a connecting client in the INIT-ACK chunk.
Valid values are:
- * md5
- * sha1
+ * sha256
* none
- Ability to assign md5 or sha1 as the selected alg is predicated on the
- configuration of those algorithms at build time (CONFIG_CRYPTO_MD5 and
- CONFIG_CRYPTO_SHA1).
-
- Default: Dependent on configuration. MD5 if available, else SHA1 if
- available, else none.
+ Default: sha256
rcvbuf_policy - INTEGER
Determines if the receive buffer is attributed to the socket or to
diff --git a/Documentation/networking/netlink_spec/.gitignore b/Documentation/networking/netlink_spec/.gitignore
deleted file mode 100644
index 30d85567b592..000000000000
--- a/Documentation/networking/netlink_spec/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-*.rst
diff --git a/Documentation/networking/netlink_spec/readme.txt b/Documentation/networking/netlink_spec/readme.txt
deleted file mode 100644
index 030b44aca4e6..000000000000
--- a/Documentation/networking/netlink_spec/readme.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-SPDX-License-Identifier: GPL-2.0
-
-This file is populated during the build of the documentation (htmldocs) by the
-tools/net/ynl/pyynl/ynl_gen_rst.py script.
diff --git a/Documentation/process/maintainer-netdev.rst b/Documentation/process/maintainer-netdev.rst
index e1755610b4bc..989192421cc9 100644
--- a/Documentation/process/maintainer-netdev.rst
+++ b/Documentation/process/maintainer-netdev.rst
@@ -407,7 +407,7 @@ Clean-up patches
Netdev discourages patches which perform simple clean-ups, which are not in
the context of other work. For example:
-* Addressing ``checkpatch.pl`` warnings
+* Addressing ``checkpatch.pl``, and other trivial coding style warnings
* Addressing :ref:`Local variable ordering<rcs>` issues
* Conversions to device-managed APIs (``devm_`` helpers)
diff --git a/Documentation/sphinx/parser_yaml.py b/Documentation/sphinx/parser_yaml.py
new file mode 100755
index 000000000000..634d84a202fc
--- /dev/null
+++ b/Documentation/sphinx/parser_yaml.py
@@ -0,0 +1,123 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright 2025 Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
+
+"""
+Sphinx extension for processing YAML files
+"""
+
+import os
+import re
+import sys
+
+from pprint import pformat
+
+from docutils import statemachine
+from docutils.parsers.rst import Parser as RSTParser
+from docutils.parsers.rst import states
+from docutils.statemachine import ViewList
+
+from sphinx.util import logging
+from sphinx.parsers import Parser
+
+srctree = os.path.abspath(os.environ["srctree"])
+sys.path.insert(0, os.path.join(srctree, "tools/net/ynl/pyynl/lib"))
+
+from doc_generator import YnlDocGenerator # pylint: disable=C0413
+
+logger = logging.getLogger(__name__)
+
+class YamlParser(Parser):
+ """
+ Kernel parser for YAML files.
+
+ This is a simple sphinx.Parser to handle yaml files inside the
+ Kernel tree that will be part of the built documentation.
+
+ The actual parser function is not contained here: the code was
+ written in a way that parsing yaml for different subsystems
+ can be done from a single dispatcher.
+
+ All it takes to have parse YAML patches is to have an import line:
+
+ from some_parser_code import NewYamlGenerator
+
+ To this module. Then add an instance of the parser with:
+
+ new_parser = NewYamlGenerator()
+
+ and add a logic inside parse() to handle it based on the path,
+ like this:
+
+ if "/foo" in fname:
+ msg = self.new_parser.parse_yaml_file(fname)
+ """
+
+ supported = ('yaml', )
+
+ netlink_parser = YnlDocGenerator()
+
+ re_lineno = re.compile(r"\.\. LINENO ([0-9]+)$")
+
+ tab_width = 8
+
+ def rst_parse(self, inputstring, document, msg):
+ """
+ Receives a ReST content that was previously converted by the
+ YAML parser, adding it to the document tree.
+ """
+
+ self.setup_parse(inputstring, document)
+
+ result = ViewList()
+
+ self.statemachine = states.RSTStateMachine(state_classes=states.state_classes,
+ initial_state='Body',
+ debug=document.reporter.debug_flag)
+
+ try:
+ # Parse message with RSTParser
+ lineoffset = 0;
+
+ lines = statemachine.string2lines(msg, self.tab_width,
+ convert_whitespace=True)
+
+ for line in lines:
+ match = self.re_lineno.match(line)
+ if match:
+ lineoffset = int(match.group(1))
+ continue
+
+ result.append(line, document.current_source, lineoffset)
+
+ self.statemachine.run(result, document)
+
+ except Exception as e:
+ document.reporter.error("YAML parsing error: %s" % pformat(e))
+
+ self.finish_parse()
+
+ # Overrides docutils.parsers.Parser. See sphinx.parsers.RSTParser
+ def parse(self, inputstring, document):
+ """Check if a YAML is meant to be parsed."""
+
+ fname = document.current_source
+
+ # Handle netlink yaml specs
+ if "/netlink/specs/" in fname:
+ msg = self.netlink_parser.parse_yaml_file(fname)
+ self.rst_parse(inputstring, document, msg)
+
+ # All other yaml files are ignored
+
+def setup(app):
+ """Setup function for the Sphinx extension."""
+
+ # Add YAML parser
+ app.add_source_parser(YamlParser)
+ app.add_source_suffix('.yaml', 'yaml')
+
+ return {
+ 'version': '1.0',
+ 'parallel_read_safe': True,
+ 'parallel_write_safe': True,
+ }
diff --git a/Documentation/userspace-api/netlink/index.rst b/Documentation/userspace-api/netlink/index.rst
index c1b6765cc963..83ae25066591 100644
--- a/Documentation/userspace-api/netlink/index.rst
+++ b/Documentation/userspace-api/netlink/index.rst
@@ -18,4 +18,4 @@ Netlink documentation for users.
See also:
- :ref:`Documentation/core-api/netlink.rst <kernel_netlink>`
- - :ref:`Documentation/networking/netlink_spec/index.rst <specs>`
+ - :ref:`Documentation/netlink/specs/index.rst <specs>`
diff --git a/Documentation/userspace-api/netlink/netlink-raw.rst b/Documentation/userspace-api/netlink/netlink-raw.rst
index 31fc91020eb3..aae296c170c5 100644
--- a/Documentation/userspace-api/netlink/netlink-raw.rst
+++ b/Documentation/userspace-api/netlink/netlink-raw.rst
@@ -62,8 +62,8 @@ Sub-messages
------------
Several raw netlink families such as
-:doc:`rt-link<../../networking/netlink_spec/rt-link>` and
-:doc:`tc<../../networking/netlink_spec/tc>` use attribute nesting as an
+:ref:`rt-link<netlink-rt-link>` and
+:ref:`tc<netlink-tc>` use attribute nesting as an
abstraction to carry module specific information.
Conceptually it looks as follows::
@@ -162,7 +162,7 @@ then this is an error.
Nested struct definitions
-------------------------
-Many raw netlink families such as :doc:`tc<../../networking/netlink_spec/tc>`
+Many raw netlink families such as :ref:`tc<netlink-tc>`
make use of nested struct definitions. The ``netlink-raw`` schema makes it
possible to embed a struct within a struct definition using the ``struct``
property. For example, the following struct definition embeds the
diff --git a/Documentation/userspace-api/netlink/specs.rst b/Documentation/userspace-api/netlink/specs.rst
index 1b50d97d8d7c..debb4bfca5c4 100644
--- a/Documentation/userspace-api/netlink/specs.rst
+++ b/Documentation/userspace-api/netlink/specs.rst
@@ -15,7 +15,7 @@ kernel headers directly.
Internally kernel uses the YAML specs to generate:
- the C uAPI header
- - documentation of the protocol as a ReST file - see :ref:`Documentation/networking/netlink_spec/index.rst <specs>`
+ - documentation of the protocol as a ReST file - see :ref:`Documentation/netlink/specs/index.rst <specs>`
- policy tables for input attribute validation
- operation tables
diff --git a/MAINTAINERS b/MAINTAINERS
index 2df02e4374ed..b81595e9ea95 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7308,6 +7308,7 @@ F: scripts/get_abi.py
F: scripts/kernel-doc*
F: scripts/lib/abi/*
F: scripts/lib/kdoc/*
+F: tools/net/ynl/pyynl/lib/doc_generator.py
F: scripts/sphinx-pre-install
X: Documentation/ABI/
X: Documentation/admin-guide/media/
@@ -13812,8 +13813,7 @@ M: Hauke Mehrtens <hauke@hauke-m.de>
L: netdev@vger.kernel.org
S: Maintained
F: Documentation/devicetree/bindings/net/dsa/lantiq,gswip.yaml
-F: drivers/net/dsa/lantiq_gswip.c
-F: drivers/net/dsa/lantiq_pce.h
+F: drivers/net/dsa/lantiq/*
F: drivers/net/ethernet/lantiq_xrx200.c
F: net/dsa/tag_gswip.c
@@ -18293,6 +18293,15 @@ F: Documentation/devicetree/bindings/clock/*imx*
F: drivers/clk/imx/
F: include/dt-bindings/clock/*imx*
+NXP NETC TIMER PTP CLOCK DRIVER
+M: Wei Fang <wei.fang@nxp.com>
+M: Clark Wang <xiaoning.wang@nxp.com>
+L: imx@lists.linux.dev
+L: netdev@vger.kernel.org
+S: Maintained
+F: Documentation/devicetree/bindings/ptp/nxp,ptp-netc.yaml
+F: drivers/ptp/ptp_netc.c
+
NXP PF8100/PF8121A/PF8200 PMIC REGULATOR DEVICE DRIVER
M: Jagan Teki <jagan@amarulasolutions.com>
S: Maintained
@@ -20866,6 +20875,14 @@ S: Maintained
F: Documentation/devicetree/bindings/power/supply/qcom,pmi8998-charger.yaml
F: drivers/power/supply/qcom_smbx.c
+QUALCOMM PPE DRIVER
+M: Luo Jie <quic_luoj@quicinc.com>
+L: netdev@vger.kernel.org
+S: Supported
+F: Documentation/devicetree/bindings/net/qcom,ipq9574-ppe.yaml
+F: Documentation/networking/device_drivers/ethernet/qualcomm/ppe/ppe.rst
+F: drivers/net/ethernet/qualcomm/ppe/
+
QUALCOMM QSEECOM DRIVER
M: Maximilian Luz <luzmaximilian@gmail.com>
L: linux-arm-msm@vger.kernel.org
diff --git a/arch/m68k/coldfire/m5272.c b/arch/m68k/coldfire/m5272.c
index 5b70dfdab368..918e2a3236c5 100644
--- a/arch/m68k/coldfire/m5272.c
+++ b/arch/m68k/coldfire/m5272.c
@@ -108,7 +108,7 @@ void __init config_BSP(char *commandp, int size)
* an ethernet switch. In this case we need to use the fixed phy type,
* and we need to declare it early in boot.
*/
-static struct fixed_phy_status nettel_fixed_phy_status __initdata = {
+static const struct fixed_phy_status nettel_fixed_phy_status __initconst = {
.link = 1,
.speed = 100,
.duplex = 0,
@@ -119,7 +119,7 @@ static struct fixed_phy_status nettel_fixed_phy_status __initdata = {
static int __init init_BSP(void)
{
m5272_uarts_init();
- fixed_phy_add(0, &nettel_fixed_phy_status);
+ fixed_phy_add(&nettel_fixed_phy_status);
clkdev_add_table(m5272_clk_lookup, ARRAY_SIZE(m5272_clk_lookup));
return 0;
}
diff --git a/arch/mips/bcm47xx/setup.c b/arch/mips/bcm47xx/setup.c
index de426a474b5b..a93a4266dc1e 100644
--- a/arch/mips/bcm47xx/setup.c
+++ b/arch/mips/bcm47xx/setup.c
@@ -256,7 +256,7 @@ static int __init bcm47xx_cpu_fixes(void)
}
arch_initcall(bcm47xx_cpu_fixes);
-static struct fixed_phy_status bcm47xx_fixed_phy_status __initdata = {
+static const struct fixed_phy_status bcm47xx_fixed_phy_status __initconst = {
.link = 1,
.speed = SPEED_100,
.duplex = DUPLEX_FULL,
@@ -282,7 +282,7 @@ static int __init bcm47xx_register_bus_complete(void)
bcm47xx_leds_register();
bcm47xx_workarounds();
- fixed_phy_add(0, &bcm47xx_fixed_phy_status);
+ fixed_phy_add(&bcm47xx_fixed_phy_status);
return 0;
}
device_initcall(bcm47xx_register_bus_complete);
diff --git a/drivers/infiniband/hw/mlx5/std_types.c b/drivers/infiniband/hw/mlx5/std_types.c
index bdb568411091..2fcf553044e1 100644
--- a/drivers/infiniband/hw/mlx5/std_types.c
+++ b/drivers/infiniband/hw/mlx5/std_types.c
@@ -83,33 +83,14 @@ static int fill_vport_icm_addr(struct mlx5_core_dev *mdev, u16 vport,
static int fill_vport_vhca_id(struct mlx5_core_dev *mdev, u16 vport,
struct mlx5_ib_uapi_query_port *info)
{
- size_t out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
- u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {};
- void *out;
- int err;
-
- out = kzalloc(out_sz, GFP_KERNEL);
- if (!out)
- return -ENOMEM;
+ int err = mlx5_vport_get_vhca_id(mdev, vport, &info->vport_vhca_id);
- MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
- MLX5_SET(query_hca_cap_in, in, other_function, true);
- MLX5_SET(query_hca_cap_in, in, function_id, vport);
- MLX5_SET(query_hca_cap_in, in, op_mod,
- MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE |
- HCA_CAP_OPMOD_GET_CUR);
-
- err = mlx5_cmd_exec(mdev, in, sizeof(in), out, out_sz);
if (err)
- goto out;
-
- info->vport_vhca_id = MLX5_GET(query_hca_cap_out, out,
- capability.cmd_hca_cap.vhca_id);
+ return err;
info->flags |= MLX5_IB_UAPI_QUERY_PORT_VPORT_VHCA_ID;
-out:
- kfree(out);
- return err;
+
+ return 0;
}
static int fill_multiport_info(struct mlx5_ib_dev *dev, u32 port_num,
diff --git a/drivers/net/Space.c b/drivers/net/Space.c
index dc50797a2ed0..c01e2c2f7d6c 100644
--- a/drivers/net/Space.c
+++ b/drivers/net/Space.c
@@ -67,8 +67,7 @@ static int netdev_boot_setup_add(char *name, struct ifmap *map)
s = dev_boot_setup;
for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
- memset(s[i].name, 0, sizeof(s[i].name));
- strscpy(s[i].name, name, IFNAMSIZ);
+ strscpy_pad(s[i].name, name);
memcpy(&s[i].map, map, sizeof(s[i].map));
break;
}
diff --git a/drivers/net/amt.c b/drivers/net/amt.c
index ed86537b2f61..902c817a0dea 100644
--- a/drivers/net/amt.c
+++ b/drivers/net/amt.c
@@ -11,6 +11,7 @@
#include <linux/net.h>
#include <linux/igmp.h>
#include <linux/workqueue.h>
+#include <net/flow.h>
#include <net/pkt_sched.h>
#include <net/net_namespace.h>
#include <net/ip.h>
@@ -28,6 +29,7 @@
#include <net/addrconf.h>
#include <net/ip6_route.h>
#include <net/inet_common.h>
+#include <net/inet_dscp.h>
#include <net/ip6_checksum.h>
static struct workqueue_struct *amt_wq;
@@ -1018,7 +1020,7 @@ static bool amt_send_membership_update(struct amt_dev *amt,
fl4.flowi4_oif = amt->stream_dev->ifindex;
fl4.daddr = amt->remote_ip;
fl4.saddr = amt->local_ip;
- fl4.flowi4_tos = AMT_TOS;
+ fl4.flowi4_dscp = inet_dsfield_to_dscp(AMT_TOS);
fl4.flowi4_proto = IPPROTO_UDP;
rt = ip_route_output_key(amt->net, &fl4);
if (IS_ERR(rt)) {
@@ -1133,7 +1135,7 @@ static bool amt_send_membership_query(struct amt_dev *amt,
fl4.flowi4_oif = amt->stream_dev->ifindex;
fl4.daddr = tunnel->ip4;
fl4.saddr = amt->local_ip;
- fl4.flowi4_tos = AMT_TOS;
+ fl4.flowi4_dscp = inet_dsfield_to_dscp(AMT_TOS);
fl4.flowi4_proto = IPPROTO_UDP;
rt = ip_route_output_key(amt->net, &fl4);
if (IS_ERR(rt)) {
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 257333c88710..f25c2d2c9181 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -142,8 +142,7 @@ module_param(downdelay, int, 0);
MODULE_PARM_DESC(downdelay, "Delay before considering link down, "
"in milliseconds");
module_param(use_carrier, int, 0);
-MODULE_PARM_DESC(use_carrier, "Use netif_carrier_ok (vs MII ioctls) in miimon; "
- "0 for off, 1 for on (default)");
+MODULE_PARM_DESC(use_carrier, "option obsolete, use_carrier cannot be disabled");
module_param(mode, charp, 0);
MODULE_PARM_DESC(mode, "Mode of operation; 0 for balance-rr, "
"1 for active-backup, 2 for balance-xor, "
@@ -830,77 +829,6 @@ const char *bond_slave_link_status(s8 link)
}
}
-/* if <dev> supports MII link status reporting, check its link status.
- *
- * We either do MII/ETHTOOL ioctls, or check netif_carrier_ok(),
- * depending upon the setting of the use_carrier parameter.
- *
- * Return either BMSR_LSTATUS, meaning that the link is up (or we
- * can't tell and just pretend it is), or 0, meaning that the link is
- * down.
- *
- * If reporting is non-zero, instead of faking link up, return -1 if
- * both ETHTOOL and MII ioctls fail (meaning the device does not
- * support them). If use_carrier is set, return whatever it says.
- * It'd be nice if there was a good way to tell if a driver supports
- * netif_carrier, but there really isn't.
- */
-static int bond_check_dev_link(struct bonding *bond,
- struct net_device *slave_dev, int reporting)
-{
- const struct net_device_ops *slave_ops = slave_dev->netdev_ops;
- struct mii_ioctl_data *mii;
- struct ifreq ifr;
- int ret;
-
- if (!reporting && !netif_running(slave_dev))
- return 0;
-
- if (bond->params.use_carrier)
- return netif_carrier_ok(slave_dev) ? BMSR_LSTATUS : 0;
-
- /* Try to get link status using Ethtool first. */
- if (slave_dev->ethtool_ops->get_link) {
- netdev_lock_ops(slave_dev);
- ret = slave_dev->ethtool_ops->get_link(slave_dev);
- netdev_unlock_ops(slave_dev);
-
- return ret ? BMSR_LSTATUS : 0;
- }
-
- /* Ethtool can't be used, fallback to MII ioctls. */
- if (slave_ops->ndo_eth_ioctl) {
- /* TODO: set pointer to correct ioctl on a per team member
- * bases to make this more efficient. that is, once
- * we determine the correct ioctl, we will always
- * call it and not the others for that team
- * member.
- */
-
- /* We cannot assume that SIOCGMIIPHY will also read a
- * register; not all network drivers (e.g., e100)
- * support that.
- */
-
- /* Yes, the mii is overlaid on the ifreq.ifr_ifru */
- strscpy_pad(ifr.ifr_name, slave_dev->name, IFNAMSIZ);
- mii = if_mii(&ifr);
-
- if (dev_eth_ioctl(slave_dev, &ifr, SIOCGMIIPHY) == 0) {
- mii->reg_num = MII_BMSR;
- if (dev_eth_ioctl(slave_dev, &ifr, SIOCGMIIREG) == 0)
- return mii->val_out & BMSR_LSTATUS;
- }
- }
-
- /* If reporting, report that either there's no ndo_eth_ioctl,
- * or both SIOCGMIIREG and get_link failed (meaning that we
- * cannot report link status). If not reporting, pretend
- * we're ok.
- */
- return reporting ? -1 : BMSR_LSTATUS;
-}
-
/*----------------------------- Multicast list ------------------------------*/
/* Push the promiscuity flag down to appropriate slaves */
@@ -1966,7 +1894,6 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
const struct net_device_ops *slave_ops = slave_dev->netdev_ops;
struct slave *new_slave = NULL, *prev_slave;
struct sockaddr_storage ss;
- int link_reporting;
int res = 0, i;
if (slave_dev->flags & IFF_MASTER &&
@@ -1976,12 +1903,6 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
return -EPERM;
}
- if (!bond->params.use_carrier &&
- slave_dev->ethtool_ops->get_link == NULL &&
- slave_ops->ndo_eth_ioctl == NULL) {
- slave_warn(bond_dev, slave_dev, "no link monitoring support\n");
- }
-
/* already in-use? */
if (netdev_is_rx_handler_busy(slave_dev)) {
SLAVE_NL_ERR(bond_dev, slave_dev, extack,
@@ -2195,29 +2116,10 @@ skip_mac_set:
new_slave->last_tx = new_slave->last_rx;
- if (bond->params.miimon && !bond->params.use_carrier) {
- link_reporting = bond_check_dev_link(bond, slave_dev, 1);
-
- if ((link_reporting == -1) && !bond->params.arp_interval) {
- /* miimon is set but a bonded network driver
- * does not support ETHTOOL/MII and
- * arp_interval is not set. Note: if
- * use_carrier is enabled, we will never go
- * here (because netif_carrier is always
- * supported); thus, we don't need to change
- * the messages for netif_carrier.
- */
- slave_warn(bond_dev, slave_dev, "MII and ETHTOOL support not available for slave, and arp_interval/arp_ip_target module parameters not specified, thus bonding will not detect link failures! see bonding.txt for details\n");
- } else if (link_reporting == -1) {
- /* unable get link status using mii/ethtool */
- slave_warn(bond_dev, slave_dev, "can't get link status from slave; the network driver associated with this interface does not support MII or ETHTOOL link status reporting, thus miimon has no effect on this interface\n");
- }
- }
-
/* check for initial state */
new_slave->link = BOND_LINK_NOCHANGE;
if (bond->params.miimon) {
- if (bond_check_dev_link(bond, slave_dev, 0) == BMSR_LSTATUS) {
+ if (netif_carrier_ok(slave_dev)) {
if (bond->params.updelay) {
bond_set_slave_link_state(new_slave,
BOND_LINK_BACK,
@@ -2759,7 +2661,7 @@ static int bond_miimon_inspect(struct bonding *bond)
bond_for_each_slave_rcu(bond, slave, iter) {
bond_propose_link_state(slave, BOND_LINK_NOCHANGE);
- link_state = bond_check_dev_link(bond, slave->dev, 0);
+ link_state = netif_carrier_ok(slave->dev);
switch (slave->link) {
case BOND_LINK_UP:
@@ -6257,10 +6159,10 @@ static int __init bond_check_params(struct bond_params *params)
downdelay = 0;
}
- if ((use_carrier != 0) && (use_carrier != 1)) {
- pr_warn("Warning: use_carrier module parameter (%d), not of valid value (0/1), so it was set to 1\n",
- use_carrier);
- use_carrier = 1;
+ if (use_carrier != 1) {
+ pr_err("Error: invalid use_carrier parameter (%d)\n",
+ use_carrier);
+ return -EINVAL;
}
if (num_peer_notif < 0 || num_peer_notif > 255) {
@@ -6507,7 +6409,6 @@ static int __init bond_check_params(struct bond_params *params)
params->updelay = updelay;
params->downdelay = downdelay;
params->peer_notif_delay = 0;
- params->use_carrier = use_carrier;
params->lacp_active = 1;
params->lacp_fast = lacp_fast;
params->primary[0] = 0;
diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c
index 57fff2421f1b..e573b34a1bbc 100644
--- a/drivers/net/bonding/bond_netlink.c
+++ b/drivers/net/bonding/bond_netlink.c
@@ -259,13 +259,11 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[],
return err;
}
if (data[IFLA_BOND_USE_CARRIER]) {
- int use_carrier = nla_get_u8(data[IFLA_BOND_USE_CARRIER]);
-
- bond_opt_initval(&newval, use_carrier);
- err = __bond_opt_set(bond, BOND_OPT_USE_CARRIER, &newval,
- data[IFLA_BOND_USE_CARRIER], extack);
- if (err)
- return err;
+ if (nla_get_u8(data[IFLA_BOND_USE_CARRIER]) != 1) {
+ NL_SET_ERR_MSG_ATTR(extack, data[IFLA_BOND_USE_CARRIER],
+ "option obsolete, use_carrier cannot be disabled");
+ return -EINVAL;
+ }
}
if (data[IFLA_BOND_ARP_INTERVAL]) {
int arp_interval = nla_get_u32(data[IFLA_BOND_ARP_INTERVAL]);
@@ -688,7 +686,7 @@ static int bond_fill_info(struct sk_buff *skb,
bond->params.peer_notif_delay * bond->params.miimon))
goto nla_put_failure;
- if (nla_put_u8(skb, IFLA_BOND_USE_CARRIER, bond->params.use_carrier))
+ if (nla_put_u8(skb, IFLA_BOND_USE_CARRIER, 1))
goto nla_put_failure;
if (nla_put_u32(skb, IFLA_BOND_ARP_INTERVAL, bond->params.arp_interval))
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index 3b6f815c55ff..c0a5eb8766b5 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -187,7 +187,6 @@ static const struct bond_opt_value bond_primary_reselect_tbl[] = {
};
static const struct bond_opt_value bond_use_carrier_tbl[] = {
- { "off", 0, 0},
{ "on", 1, BOND_VALFLAG_DEFAULT},
{ NULL, -1, 0}
};
@@ -419,7 +418,7 @@ static const struct bond_option bond_opts[BOND_OPT_LAST] = {
[BOND_OPT_USE_CARRIER] = {
.id = BOND_OPT_USE_CARRIER,
.name = "use_carrier",
- .desc = "Use netif_carrier_ok (vs MII ioctls) in miimon",
+ .desc = "option obsolete, use_carrier cannot be disabled",
.values = bond_use_carrier_tbl,
.set = bond_option_use_carrier_set
},
@@ -1091,10 +1090,6 @@ static int bond_option_peer_notif_delay_set(struct bonding *bond,
static int bond_option_use_carrier_set(struct bonding *bond,
const struct bond_opt_value *newval)
{
- netdev_dbg(bond->dev, "Setting use_carrier to %llu\n",
- newval->value);
- bond->params.use_carrier = newval->value;
-
return 0;
}
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index 1e13bb170515..9a75ad3181ab 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -467,14 +467,12 @@ static ssize_t bonding_show_primary_reselect(struct device *d,
static DEVICE_ATTR(primary_reselect, 0644,
bonding_show_primary_reselect, bonding_sysfs_store_option);
-/* Show the use_carrier flag. */
+/* use_carrier is obsolete, but print value for compatibility */
static ssize_t bonding_show_carrier(struct device *d,
struct device_attribute *attr,
char *buf)
{
- struct bonding *bond = to_bond(d);
-
- return sysfs_emit(buf, "%d\n", bond->params.use_carrier);
+ return sysfs_emit(buf, "1\n");
}
static DEVICE_ATTR(use_carrier, 0644,
bonding_show_carrier, bonding_sysfs_store_option);
diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig
index ec759f8cb0e2..4d9af691b989 100644
--- a/drivers/net/dsa/Kconfig
+++ b/drivers/net/dsa/Kconfig
@@ -26,13 +26,7 @@ config NET_DSA_LOOP
source "drivers/net/dsa/hirschmann/Kconfig"
-config NET_DSA_LANTIQ_GSWIP
- tristate "Lantiq / Intel GSWIP"
- depends on HAS_IOMEM
- select NET_DSA_TAG_GSWIP
- help
- This enables support for the Lantiq / Intel GSWIP 2.1 found in
- the xrx200 / VR9 SoC.
+source "drivers/net/dsa/lantiq/Kconfig"
config NET_DSA_MT7530
tristate "MediaTek MT7530 and MT7531 Ethernet switch support"
@@ -99,6 +93,14 @@ config NET_DSA_RZN1_A5PSW
This driver supports the A5PSW switch, which is embedded in Renesas
RZ/N1 SoC.
+config NET_DSA_KS8995
+ tristate "Micrel KS8995 family 5-ports 10/100 Ethernet switches"
+ depends on SPI
+ select NET_DSA_TAG_NONE
+ help
+ This driver supports the Micrel KS8995 family of 10/100 Mbit ethernet
+ switches, managed over SPI.
+
config NET_DSA_SMSC_LAN9303
tristate
select NET_DSA_TAG_LAN9303
diff --git a/drivers/net/dsa/Makefile b/drivers/net/dsa/Makefile
index cb9a97340e58..c0a534fe6eaf 100644
--- a/drivers/net/dsa/Makefile
+++ b/drivers/net/dsa/Makefile
@@ -5,7 +5,7 @@ obj-$(CONFIG_NET_DSA_LOOP) += dsa_loop.o
ifdef CONFIG_NET_DSA_LOOP
obj-$(CONFIG_FIXED_PHY) += dsa_loop_bdinfo.o
endif
-obj-$(CONFIG_NET_DSA_LANTIQ_GSWIP) += lantiq_gswip.o
+obj-$(CONFIG_NET_DSA_KS8995) += ks8995.o
obj-$(CONFIG_NET_DSA_MT7530) += mt7530.o
obj-$(CONFIG_NET_DSA_MT7530_MDIO) += mt7530-mdio.o
obj-$(CONFIG_NET_DSA_MT7530_MMIO) += mt7530-mmio.o
@@ -19,6 +19,7 @@ obj-$(CONFIG_NET_DSA_VITESSE_VSC73XX_PLATFORM) += vitesse-vsc73xx-platform.o
obj-$(CONFIG_NET_DSA_VITESSE_VSC73XX_SPI) += vitesse-vsc73xx-spi.o
obj-y += b53/
obj-y += hirschmann/
+obj-y += lantiq/
obj-y += microchip/
obj-y += mv88e6xxx/
obj-y += ocelot/
diff --git a/drivers/net/dsa/b53/b53_mmap.c b/drivers/net/dsa/b53/b53_mmap.c
index f06c3e0cc42a..f4a59d8fbdd6 100644
--- a/drivers/net/dsa/b53/b53_mmap.c
+++ b/drivers/net/dsa/b53/b53_mmap.c
@@ -29,8 +29,13 @@
#include "b53_priv.h"
#define BCM63XX_EPHY_REG 0x3C
+#define BCM63268_GPHY_REG 0x54
+
+#define GPHY_CTRL_LOW_PWR BIT(3)
+#define GPHY_CTRL_IDDQ_BIAS BIT(0)
struct b53_phy_info {
+ u32 gphy_port_mask;
u32 ephy_enable_mask;
u32 ephy_port_mask;
u32 ephy_bias_bit;
@@ -65,6 +70,7 @@ static const struct b53_phy_info bcm6368_ephy_info = {
static const u32 bcm63268_ephy_offsets[] = {4, 9, 14};
static const struct b53_phy_info bcm63268_ephy_info = {
+ .gphy_port_mask = BIT(3),
.ephy_enable_mask = GENMASK(4, 0),
.ephy_port_mask = GENMASK((ARRAY_SIZE(bcm63268_ephy_offsets) - 1), 0),
.ephy_bias_bit = 24,
@@ -290,13 +296,30 @@ static int bcm63xx_ephy_set(struct b53_device *dev, int port, bool enable)
return regmap_update_bits(gpio_ctrl, BCM63XX_EPHY_REG, mask, val);
}
+static int bcm63268_gphy_set(struct b53_device *dev, bool enable)
+{
+ struct b53_mmap_priv *priv = dev->priv;
+ struct regmap *gpio_ctrl = priv->gpio_ctrl;
+ u32 mask = GPHY_CTRL_IDDQ_BIAS | GPHY_CTRL_LOW_PWR;
+ u32 val = 0;
+
+ if (!enable)
+ val = mask;
+
+ return regmap_update_bits(gpio_ctrl, BCM63268_GPHY_REG, mask, val);
+}
+
static void b53_mmap_phy_enable(struct b53_device *dev, int port)
{
struct b53_mmap_priv *priv = dev->priv;
int ret = 0;
- if (priv->phy_info && (BIT(port) & priv->phy_info->ephy_port_mask))
- ret = bcm63xx_ephy_set(dev, port, true);
+ if (priv->phy_info) {
+ if (BIT(port) & priv->phy_info->ephy_port_mask)
+ ret = bcm63xx_ephy_set(dev, port, true);
+ else if (BIT(port) & priv->phy_info->gphy_port_mask)
+ ret = bcm63268_gphy_set(dev, true);
+ }
if (!ret)
priv->phys_enabled |= BIT(port);
@@ -307,8 +330,12 @@ static void b53_mmap_phy_disable(struct b53_device *dev, int port)
struct b53_mmap_priv *priv = dev->priv;
int ret = 0;
- if (priv->phy_info && (BIT(port) & priv->phy_info->ephy_port_mask))
- ret = bcm63xx_ephy_set(dev, port, false);
+ if (priv->phy_info) {
+ if (BIT(port) & priv->phy_info->ephy_port_mask)
+ ret = bcm63xx_ephy_set(dev, port, false);
+ else if (BIT(port) & priv->phy_info->gphy_port_mask)
+ ret = bcm63268_gphy_set(dev, false);
+ }
if (!ret)
priv->phys_enabled &= ~BIT(port);
diff --git a/drivers/net/dsa/dsa_loop.c b/drivers/net/dsa/dsa_loop.c
index d8a35f25a4c8..8112515d545e 100644
--- a/drivers/net/dsa/dsa_loop.c
+++ b/drivers/net/dsa/dsa_loop.c
@@ -386,13 +386,10 @@ static struct mdio_driver dsa_loop_drv = {
static void dsa_loop_phydevs_unregister(void)
{
- unsigned int i;
-
- for (i = 0; i < NUM_FIXED_PHYS; i++)
- if (!IS_ERR(phydevs[i])) {
+ for (int i = 0; i < NUM_FIXED_PHYS; i++) {
+ if (!IS_ERR(phydevs[i]))
fixed_phy_unregister(phydevs[i]);
- phy_device_free(phydevs[i]);
- }
+ }
}
static int __init dsa_loop_init(void)
@@ -402,7 +399,8 @@ static int __init dsa_loop_init(void)
.speed = SPEED_100,
.duplex = DUPLEX_FULL,
};
- unsigned int i, ret;
+ unsigned int i;
+ int ret;
for (i = 0; i < NUM_FIXED_PHYS; i++)
phydevs[i] = fixed_phy_register(&status, NULL);
diff --git a/drivers/net/phy/spi_ks8995.c b/drivers/net/dsa/ks8995.c
index d135b061d810..5c4c83e00477 100644
--- a/drivers/net/phy/spi_ks8995.c
+++ b/drivers/net/dsa/ks8995.c
@@ -3,6 +3,7 @@
* SPI driver for Micrel/Kendin KS8995M and KSZ8864RMN ethernet switches
*
* Copyright (C) 2008 Gabor Juhos <juhosg at openwrt.org>
+ * Copyright (C) 2025 Linus Walleij <linus.walleij@linaro.org>
*
* This file was based on: drivers/spi/at25.c
* Copyright (C) 2006 David Brownell
@@ -10,6 +11,9 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/bits.h>
+#include <linux/if_bridge.h>
+#include <linux/if_vlan.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/module.h>
@@ -17,8 +21,8 @@
#include <linux/device.h>
#include <linux/gpio/consumer.h>
#include <linux/of.h>
-
#include <linux/spi/spi.h>
+#include <net/dsa.h>
#define DRV_VERSION "0.1.1"
#define DRV_DESC "Micrel KS8995 Ethernet switch SPI driver"
@@ -29,18 +33,59 @@
#define KS8995_REG_ID1 0x01 /* Chip ID1 */
#define KS8995_REG_GC0 0x02 /* Global Control 0 */
+
+#define KS8995_GC0_P5_PHY BIT(3) /* Port 5 PHY enabled */
+
#define KS8995_REG_GC1 0x03 /* Global Control 1 */
#define KS8995_REG_GC2 0x04 /* Global Control 2 */
+
+#define KS8995_GC2_HUGE BIT(2) /* Huge packet support */
+#define KS8995_GC2_LEGAL BIT(1) /* Legal size override */
+
#define KS8995_REG_GC3 0x05 /* Global Control 3 */
#define KS8995_REG_GC4 0x06 /* Global Control 4 */
+
+#define KS8995_GC4_10BT BIT(4) /* Force switch to 10Mbit */
+#define KS8995_GC4_MII_FLOW BIT(5) /* MII full-duplex flow control enable */
+#define KS8995_GC4_MII_HD BIT(6) /* MII half-duplex mode enable */
+
#define KS8995_REG_GC5 0x07 /* Global Control 5 */
#define KS8995_REG_GC6 0x08 /* Global Control 6 */
#define KS8995_REG_GC7 0x09 /* Global Control 7 */
#define KS8995_REG_GC8 0x0a /* Global Control 8 */
#define KS8995_REG_GC9 0x0b /* Global Control 9 */
-#define KS8995_REG_PC(p, r) ((0x10 * p) + r) /* Port Control */
-#define KS8995_REG_PS(p, r) ((0x10 * p) + r + 0xe) /* Port Status */
+#define KS8995_GC9_SPECIAL BIT(0) /* Special tagging mode (DSA) */
+
+/* In DSA the ports 1-4 are numbered 0-3 and the CPU port is port 4 */
+#define KS8995_REG_PC(p, r) (0x10 + (0x10 * (p)) + (r)) /* Port Control */
+#define KS8995_REG_PS(p, r) (0x1e + (0x10 * (p)) + (r)) /* Port Status */
+
+#define KS8995_REG_PC0 0x00 /* Port Control 0 */
+#define KS8995_REG_PC1 0x01 /* Port Control 1 */
+#define KS8995_REG_PC2 0x02 /* Port Control 2 */
+#define KS8995_REG_PC3 0x03 /* Port Control 3 */
+#define KS8995_REG_PC4 0x04 /* Port Control 4 */
+#define KS8995_REG_PC5 0x05 /* Port Control 5 */
+#define KS8995_REG_PC6 0x06 /* Port Control 6 */
+#define KS8995_REG_PC7 0x07 /* Port Control 7 */
+#define KS8995_REG_PC8 0x08 /* Port Control 8 */
+#define KS8995_REG_PC9 0x09 /* Port Control 9 */
+#define KS8995_REG_PC10 0x0a /* Port Control 10 */
+#define KS8995_REG_PC11 0x0b /* Port Control 11 */
+#define KS8995_REG_PC12 0x0c /* Port Control 12 */
+#define KS8995_REG_PC13 0x0d /* Port Control 13 */
+
+#define KS8995_PC0_TAG_INS BIT(2) /* Enable tag insertion on port */
+#define KS8995_PC0_TAG_REM BIT(1) /* Enable tag removal on port */
+#define KS8995_PC0_PRIO_EN BIT(0) /* Enable priority handling */
+
+#define KS8995_PC2_TXEN BIT(2) /* Enable TX on port */
+#define KS8995_PC2_RXEN BIT(1) /* Enable RX on port */
+#define KS8995_PC2_LEARN_DIS BIT(0) /* Disable learning on port */
+
+#define KS8995_PC13_TXDIS BIT(6) /* Disable transmitter */
+#define KS8995_PC13_PWDN BIT(3) /* Power down */
#define KS8995_REG_TPC0 0x60 /* TOS Priority Control 0 */
#define KS8995_REG_TPC1 0x61 /* TOS Priority Control 1 */
@@ -91,6 +136,8 @@
#define KS8995_CMD_WRITE 0x02U
#define KS8995_CMD_READ 0x03U
+#define KS8995_CPU_PORT 4
+#define KS8995_NUM_PORTS 5 /* 5 ports including the CPU port */
#define KS8995_RESET_DELAY 10 /* usec */
enum ks8995_chip_variant {
@@ -138,11 +185,14 @@ static const struct ks8995_chip_params ks8995_chip[] = {
struct ks8995_switch {
struct spi_device *spi;
+ struct device *dev;
+ struct dsa_switch *ds;
struct mutex lock;
struct gpio_desc *reset_gpio;
struct bin_attribute regs_attr;
const struct ks8995_chip_params *chip;
int revision_id;
+ unsigned int max_mtu[KS8995_NUM_PORTS];
};
static const struct spi_device_id ks8995_id[] = {
@@ -288,30 +338,6 @@ static int ks8995_reset(struct ks8995_switch *ks)
return ks8995_start(ks);
}
-static ssize_t ks8995_registers_read(struct file *filp, struct kobject *kobj,
- const struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count)
-{
- struct device *dev;
- struct ks8995_switch *ks8995;
-
- dev = kobj_to_dev(kobj);
- ks8995 = dev_get_drvdata(dev);
-
- return ks8995_read(ks8995, buf, off, count);
-}
-
-static ssize_t ks8995_registers_write(struct file *filp, struct kobject *kobj,
- const struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count)
-{
- struct device *dev;
- struct ks8995_switch *ks8995;
-
- dev = kobj_to_dev(kobj);
- ks8995 = dev_get_drvdata(dev);
-
- return ks8995_write(ks8995, buf, off, count);
-}
-
/* ks8995_get_revision - get chip revision
* @ks: pointer to switch instance
*
@@ -395,14 +421,325 @@ err_out:
return err;
}
-static const struct bin_attribute ks8995_registers_attr = {
- .attr = {
- .name = "registers",
- .mode = 0600,
- },
- .size = KS8995_REGS_SIZE,
- .read = ks8995_registers_read,
- .write = ks8995_registers_write,
+static int ks8995_check_config(struct ks8995_switch *ks)
+{
+ int ret;
+ u8 val;
+
+ ret = ks8995_read_reg(ks, KS8995_REG_GC0, &val);
+ if (ret) {
+ dev_err(ks->dev, "failed to read KS8995_REG_GC0\n");
+ return ret;
+ }
+
+ dev_dbg(ks->dev, "port 5 PHY %senabled\n",
+ (val & KS8995_GC0_P5_PHY) ? "" : "not ");
+
+ val |= KS8995_GC0_P5_PHY;
+ ret = ks8995_write_reg(ks, KS8995_REG_GC0, val);
+ if (ret)
+ dev_err(ks->dev, "failed to set KS8995_REG_GC0\n");
+
+ dev_dbg(ks->dev, "set KS8995_REG_GC0 to 0x%02x\n", val);
+
+ return 0;
+}
+
+static void
+ks8995_mac_config(struct phylink_config *config, unsigned int mode,
+ const struct phylink_link_state *state)
+{
+}
+
+static void
+ks8995_mac_link_up(struct phylink_config *config, struct phy_device *phydev,
+ unsigned int mode, phy_interface_t interface,
+ int speed, int duplex, bool tx_pause, bool rx_pause)
+{
+ struct dsa_port *dp = dsa_phylink_to_port(config);
+ struct ks8995_switch *ks = dp->ds->priv;
+ int port = dp->index;
+ int ret;
+ u8 val;
+
+ /* Allow forcing the mode on the fixed CPU port, no autonegotiation.
+ * We assume autonegotiation works on the PHY-facing ports.
+ */
+ if (port != KS8995_CPU_PORT)
+ return;
+
+ dev_dbg(ks->dev, "MAC link up on CPU port (%d)\n", port);
+
+ ret = ks8995_read_reg(ks, KS8995_REG_GC4, &val);
+ if (ret) {
+ dev_err(ks->dev, "failed to read KS8995_REG_GC4\n");
+ return;
+ }
+
+ /* Conjure port config */
+ switch (speed) {
+ case SPEED_10:
+ dev_dbg(ks->dev, "set switch MII to 100Mbit mode\n");
+ val |= KS8995_GC4_10BT;
+ break;
+ case SPEED_100:
+ default:
+ dev_dbg(ks->dev, "set switch MII to 100Mbit mode\n");
+ val &= ~KS8995_GC4_10BT;
+ break;
+ }
+
+ if (duplex == DUPLEX_HALF) {
+ dev_dbg(ks->dev, "set switch MII to half duplex\n");
+ val |= KS8995_GC4_MII_HD;
+ } else {
+ dev_dbg(ks->dev, "set switch MII to full duplex\n");
+ val &= ~KS8995_GC4_MII_HD;
+ }
+
+ dev_dbg(ks->dev, "set KS8995_REG_GC4 to %02x\n", val);
+
+ /* Enable the CPU port */
+ ret = ks8995_write_reg(ks, KS8995_REG_GC4, val);
+ if (ret)
+ dev_err(ks->dev, "failed to set KS8995_REG_GC4\n");
+}
+
+static void
+ks8995_mac_link_down(struct phylink_config *config, unsigned int mode,
+ phy_interface_t interface)
+{
+ struct dsa_port *dp = dsa_phylink_to_port(config);
+ struct ks8995_switch *ks = dp->ds->priv;
+ int port = dp->index;
+
+ if (port != KS8995_CPU_PORT)
+ return;
+
+ dev_dbg(ks->dev, "MAC link down on CPU port (%d)\n", port);
+
+ /* Disable the CPU port */
+}
+
+static const struct phylink_mac_ops ks8995_phylink_mac_ops = {
+ .mac_config = ks8995_mac_config,
+ .mac_link_up = ks8995_mac_link_up,
+ .mac_link_down = ks8995_mac_link_down,
+};
+
+static enum
+dsa_tag_protocol ks8995_get_tag_protocol(struct dsa_switch *ds,
+ int port,
+ enum dsa_tag_protocol mp)
+{
+ /* This switch actually uses the 6 byte KS8995 protocol */
+ return DSA_TAG_PROTO_NONE;
+}
+
+static int ks8995_setup(struct dsa_switch *ds)
+{
+ return 0;
+}
+
+static int ks8995_port_enable(struct dsa_switch *ds, int port,
+ struct phy_device *phy)
+{
+ struct ks8995_switch *ks = ds->priv;
+
+ dev_dbg(ks->dev, "enable port %d\n", port);
+
+ return 0;
+}
+
+static void ks8995_port_disable(struct dsa_switch *ds, int port)
+{
+ struct ks8995_switch *ks = ds->priv;
+
+ dev_dbg(ks->dev, "disable port %d\n", port);
+}
+
+static int ks8995_port_pre_bridge_flags(struct dsa_switch *ds, int port,
+ struct switchdev_brport_flags flags,
+ struct netlink_ext_ack *extack)
+{
+ /* We support enabling/disabling learning */
+ if (flags.mask & ~(BR_LEARNING))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int ks8995_port_bridge_flags(struct dsa_switch *ds, int port,
+ struct switchdev_brport_flags flags,
+ struct netlink_ext_ack *extack)
+{
+ struct ks8995_switch *ks = ds->priv;
+ int ret;
+ u8 val;
+
+ if (flags.mask & BR_LEARNING) {
+ ret = ks8995_read_reg(ks, KS8995_REG_PC(port, KS8995_REG_PC2), &val);
+ if (ret) {
+ dev_err(ks->dev, "failed to read KS8995_REG_PC2 on port %d\n", port);
+ return ret;
+ }
+
+ if (flags.val & BR_LEARNING)
+ val &= ~KS8995_PC2_LEARN_DIS;
+ else
+ val |= KS8995_PC2_LEARN_DIS;
+
+ ret = ks8995_write_reg(ks, KS8995_REG_PC(port, KS8995_REG_PC2), val);
+ if (ret) {
+ dev_err(ks->dev, "failed to write KS8995_REG_PC2 on port %d\n", port);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static void ks8995_port_stp_state_set(struct dsa_switch *ds, int port, u8 state)
+{
+ struct ks8995_switch *ks = ds->priv;
+ int ret;
+ u8 val;
+
+ ret = ks8995_read_reg(ks, KS8995_REG_PC(port, KS8995_REG_PC2), &val);
+ if (ret) {
+ dev_err(ks->dev, "failed to read KS8995_REG_PC2 on port %d\n", port);
+ return;
+ }
+
+ /* Set the bits for the different STP states in accordance with
+ * the datasheet, pages 36-37 "Spanning tree support".
+ */
+ switch (state) {
+ case BR_STATE_DISABLED:
+ case BR_STATE_BLOCKING:
+ case BR_STATE_LISTENING:
+ val &= ~KS8995_PC2_TXEN;
+ val &= ~KS8995_PC2_RXEN;
+ val |= KS8995_PC2_LEARN_DIS;
+ break;
+ case BR_STATE_LEARNING:
+ val &= ~KS8995_PC2_TXEN;
+ val &= ~KS8995_PC2_RXEN;
+ val &= ~KS8995_PC2_LEARN_DIS;
+ break;
+ case BR_STATE_FORWARDING:
+ val |= KS8995_PC2_TXEN;
+ val |= KS8995_PC2_RXEN;
+ val &= ~KS8995_PC2_LEARN_DIS;
+ break;
+ default:
+ dev_err(ks->dev, "unknown bridge state requested\n");
+ return;
+ }
+
+ ret = ks8995_write_reg(ks, KS8995_REG_PC(port, KS8995_REG_PC2), val);
+ if (ret) {
+ dev_err(ks->dev, "failed to write KS8995_REG_PC2 on port %d\n", port);
+ return;
+ }
+
+ dev_dbg(ks->dev, "set KS8995_REG_PC2 for port %d to %02x\n", port, val);
+}
+
+static void ks8995_phylink_get_caps(struct dsa_switch *dsa, int port,
+ struct phylink_config *config)
+{
+ unsigned long *interfaces = config->supported_interfaces;
+
+ if (port == KS8995_CPU_PORT)
+ __set_bit(PHY_INTERFACE_MODE_MII, interfaces);
+
+ if (port <= 3) {
+ /* Internal PHYs */
+ __set_bit(PHY_INTERFACE_MODE_INTERNAL, interfaces);
+ /* phylib default */
+ __set_bit(PHY_INTERFACE_MODE_MII, interfaces);
+ }
+
+ config->mac_capabilities = MAC_SYM_PAUSE | MAC_10 | MAC_100;
+}
+
+/* Huge packet support up to 1916 byte packages "inclusive"
+ * which means that tags are included. If the bit is not set
+ * it is 1536 bytes "inclusive". We present the length without
+ * tags or ethernet headers. The setting affects all ports.
+ */
+static int ks8995_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
+{
+ struct ks8995_switch *ks = ds->priv;
+ unsigned int max_mtu;
+ int ret;
+ u8 val;
+ int i;
+
+ ks->max_mtu[port] = new_mtu;
+
+ /* Roof out the MTU for the entire switch to the greatest
+ * common denominator: the biggest set for any one port will
+ * be the biggest MTU for the switch.
+ */
+ max_mtu = ETH_DATA_LEN;
+ for (i = 0; i < KS8995_NUM_PORTS; i++) {
+ if (ks->max_mtu[i] > max_mtu)
+ max_mtu = ks->max_mtu[i];
+ }
+
+ /* Translate to layer 2 size.
+ * Add ethernet and (possible) VLAN headers, and checksum to the size.
+ * For ETH_DATA_LEN (1500 bytes) this will add up to 1522 bytes.
+ */
+ max_mtu += VLAN_ETH_HLEN;
+ max_mtu += ETH_FCS_LEN;
+
+ ret = ks8995_read_reg(ks, KS8995_REG_GC2, &val);
+ if (ret) {
+ dev_err(ks->dev, "failed to read KS8995_REG_GC2\n");
+ return ret;
+ }
+
+ if (max_mtu <= 1522) {
+ val &= ~KS8995_GC2_HUGE;
+ val &= ~KS8995_GC2_LEGAL;
+ } else if (max_mtu > 1522 && max_mtu <= 1536) {
+ /* This accepts packets up to 1536 bytes */
+ val &= ~KS8995_GC2_HUGE;
+ val |= KS8995_GC2_LEGAL;
+ } else {
+ /* This accepts packets up to 1916 bytes */
+ val |= KS8995_GC2_HUGE;
+ val |= KS8995_GC2_LEGAL;
+ }
+
+ dev_dbg(ks->dev, "new max MTU %d bytes (inclusive)\n", max_mtu);
+
+ ret = ks8995_write_reg(ks, KS8995_REG_GC2, val);
+ if (ret)
+ dev_err(ks->dev, "failed to set KS8995_REG_GC2\n");
+
+ return ret;
+}
+
+static int ks8995_get_max_mtu(struct dsa_switch *ds, int port)
+{
+ return 1916 - ETH_HLEN - ETH_FCS_LEN;
+}
+
+static const struct dsa_switch_ops ks8995_ds_ops = {
+ .get_tag_protocol = ks8995_get_tag_protocol,
+ .setup = ks8995_setup,
+ .port_pre_bridge_flags = ks8995_port_pre_bridge_flags,
+ .port_bridge_flags = ks8995_port_bridge_flags,
+ .port_enable = ks8995_port_enable,
+ .port_disable = ks8995_port_disable,
+ .port_stp_state_set = ks8995_port_stp_state_set,
+ .port_change_mtu = ks8995_change_mtu,
+ .port_max_mtu = ks8995_get_max_mtu,
+ .phylink_get_caps = ks8995_phylink_get_caps,
};
/* ------------------------------------------------------------------------ */
@@ -423,6 +760,7 @@ static int ks8995_probe(struct spi_device *spi)
mutex_init(&ks->lock);
ks->spi = spi;
+ ks->dev = &spi->dev;
ks->chip = &ks8995_chip[variant];
ks->reset_gpio = devm_gpiod_get_optional(&spi->dev, "reset",
@@ -438,9 +776,15 @@ static int ks8995_probe(struct spi_device *spi)
if (err)
return err;
- /* de-assert switch reset */
- /* FIXME: this likely requires a delay */
- gpiod_set_value_cansleep(ks->reset_gpio, 0);
+ if (ks->reset_gpio) {
+ /*
+ * If a reset line was obtained, wait for 100us after
+ * de-asserting RESET before accessing any registers, see
+ * the KS8995MA datasheet, page 44.
+ */
+ gpiod_set_value_cansleep(ks->reset_gpio, 0);
+ udelay(100);
+ }
spi_set_drvdata(spi, ks);
@@ -456,24 +800,32 @@ static int ks8995_probe(struct spi_device *spi)
if (err)
return err;
- memcpy(&ks->regs_attr, &ks8995_registers_attr, sizeof(ks->regs_attr));
- ks->regs_attr.size = ks->chip->regs_size;
-
err = ks8995_reset(ks);
if (err)
return err;
- sysfs_attr_init(&ks->regs_attr.attr);
- err = sysfs_create_bin_file(&spi->dev.kobj, &ks->regs_attr);
- if (err) {
- dev_err(&spi->dev, "unable to create sysfs file, err=%d\n",
- err);
- return err;
- }
-
dev_info(&spi->dev, "%s device found, Chip ID:%x, Revision:%x\n",
ks->chip->name, ks->chip->chip_id, ks->revision_id);
+ err = ks8995_check_config(ks);
+ if (err)
+ return err;
+
+ ks->ds = devm_kzalloc(&spi->dev, sizeof(*ks->ds), GFP_KERNEL);
+ if (!ks->ds)
+ return -ENOMEM;
+
+ ks->ds->dev = &spi->dev;
+ ks->ds->num_ports = KS8995_NUM_PORTS;
+ ks->ds->ops = &ks8995_ds_ops;
+ ks->ds->phylink_mac_ops = &ks8995_phylink_mac_ops;
+ ks->ds->priv = ks;
+
+ err = dsa_register_switch(ks->ds);
+ if (err)
+ return dev_err_probe(&spi->dev, err,
+ "unable to register DSA switch\n");
+
return 0;
}
@@ -481,8 +833,7 @@ static void ks8995_remove(struct spi_device *spi)
{
struct ks8995_switch *ks = spi_get_drvdata(spi);
- sysfs_remove_bin_file(&spi->dev.kobj, &ks->regs_attr);
-
+ dsa_unregister_switch(ks->ds);
/* assert reset */
gpiod_set_value_cansleep(ks->reset_gpio, 1);
}
diff --git a/drivers/net/dsa/lantiq/Kconfig b/drivers/net/dsa/lantiq/Kconfig
new file mode 100644
index 000000000000..1cb053c823f7
--- /dev/null
+++ b/drivers/net/dsa/lantiq/Kconfig
@@ -0,0 +1,7 @@
+config NET_DSA_LANTIQ_GSWIP
+ tristate "Lantiq / Intel GSWIP"
+ depends on HAS_IOMEM
+ select NET_DSA_TAG_GSWIP
+ help
+ This enables support for the Lantiq / Intel GSWIP 2.1 found in
+ the xrx200 / VR9 SoC.
diff --git a/drivers/net/dsa/lantiq/Makefile b/drivers/net/dsa/lantiq/Makefile
new file mode 100644
index 000000000000..849f85ebebd6
--- /dev/null
+++ b/drivers/net/dsa/lantiq/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_NET_DSA_LANTIQ_GSWIP) += lantiq_gswip.o
diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq/lantiq_gswip.c
index 6eb3140d4044..1e991d7bca0b 100644
--- a/drivers/net/dsa/lantiq_gswip.c
+++ b/drivers/net/dsa/lantiq/lantiq_gswip.c
@@ -25,7 +25,9 @@
* between all LAN ports by default.
*/
-#include <linux/clk.h>
+#include "lantiq_gswip.h"
+#include "lantiq_pce.h"
+
#include <linux/delay.h>
#include <linux/etherdevice.h>
#include <linux/firmware.h>
@@ -39,258 +41,13 @@
#include <linux/of_platform.h>
#include <linux/phy.h>
#include <linux/phylink.h>
-#include <linux/platform_device.h>
-#include <linux/regmap.h>
-#include <linux/reset.h>
-#include <net/dsa.h>
#include <dt-bindings/mips/lantiq_rcu_gphy.h>
-#include "lantiq_pce.h"
-
-/* GSWIP MDIO Registers */
-#define GSWIP_MDIO_GLOB 0x00
-#define GSWIP_MDIO_GLOB_ENABLE BIT(15)
-#define GSWIP_MDIO_CTRL 0x08
-#define GSWIP_MDIO_CTRL_BUSY BIT(12)
-#define GSWIP_MDIO_CTRL_RD BIT(11)
-#define GSWIP_MDIO_CTRL_WR BIT(10)
-#define GSWIP_MDIO_CTRL_PHYAD_MASK 0x1f
-#define GSWIP_MDIO_CTRL_PHYAD_SHIFT 5
-#define GSWIP_MDIO_CTRL_REGAD_MASK 0x1f
-#define GSWIP_MDIO_READ 0x09
-#define GSWIP_MDIO_WRITE 0x0A
-#define GSWIP_MDIO_MDC_CFG0 0x0B
-#define GSWIP_MDIO_MDC_CFG1 0x0C
-#define GSWIP_MDIO_PHYp(p) (0x15 - (p))
-#define GSWIP_MDIO_PHY_LINK_MASK 0x6000
-#define GSWIP_MDIO_PHY_LINK_AUTO 0x0000
-#define GSWIP_MDIO_PHY_LINK_DOWN 0x4000
-#define GSWIP_MDIO_PHY_LINK_UP 0x2000
-#define GSWIP_MDIO_PHY_SPEED_MASK 0x1800
-#define GSWIP_MDIO_PHY_SPEED_AUTO 0x1800
-#define GSWIP_MDIO_PHY_SPEED_M10 0x0000
-#define GSWIP_MDIO_PHY_SPEED_M100 0x0800
-#define GSWIP_MDIO_PHY_SPEED_G1 0x1000
-#define GSWIP_MDIO_PHY_FDUP_MASK 0x0600
-#define GSWIP_MDIO_PHY_FDUP_AUTO 0x0000
-#define GSWIP_MDIO_PHY_FDUP_EN 0x0200
-#define GSWIP_MDIO_PHY_FDUP_DIS 0x0600
-#define GSWIP_MDIO_PHY_FCONTX_MASK 0x0180
-#define GSWIP_MDIO_PHY_FCONTX_AUTO 0x0000
-#define GSWIP_MDIO_PHY_FCONTX_EN 0x0100
-#define GSWIP_MDIO_PHY_FCONTX_DIS 0x0180
-#define GSWIP_MDIO_PHY_FCONRX_MASK 0x0060
-#define GSWIP_MDIO_PHY_FCONRX_AUTO 0x0000
-#define GSWIP_MDIO_PHY_FCONRX_EN 0x0020
-#define GSWIP_MDIO_PHY_FCONRX_DIS 0x0060
-#define GSWIP_MDIO_PHY_ADDR_MASK 0x001f
-#define GSWIP_MDIO_PHY_MASK (GSWIP_MDIO_PHY_ADDR_MASK | \
- GSWIP_MDIO_PHY_FCONRX_MASK | \
- GSWIP_MDIO_PHY_FCONTX_MASK | \
- GSWIP_MDIO_PHY_LINK_MASK | \
- GSWIP_MDIO_PHY_SPEED_MASK | \
- GSWIP_MDIO_PHY_FDUP_MASK)
-
-/* GSWIP MII Registers */
-#define GSWIP_MII_CFGp(p) (0x2 * (p))
-#define GSWIP_MII_CFG_RESET BIT(15)
-#define GSWIP_MII_CFG_EN BIT(14)
-#define GSWIP_MII_CFG_ISOLATE BIT(13)
-#define GSWIP_MII_CFG_LDCLKDIS BIT(12)
-#define GSWIP_MII_CFG_RGMII_IBS BIT(8)
-#define GSWIP_MII_CFG_RMII_CLK BIT(7)
-#define GSWIP_MII_CFG_MODE_MIIP 0x0
-#define GSWIP_MII_CFG_MODE_MIIM 0x1
-#define GSWIP_MII_CFG_MODE_RMIIP 0x2
-#define GSWIP_MII_CFG_MODE_RMIIM 0x3
-#define GSWIP_MII_CFG_MODE_RGMII 0x4
-#define GSWIP_MII_CFG_MODE_GMII 0x9
-#define GSWIP_MII_CFG_MODE_MASK 0xf
-#define GSWIP_MII_CFG_RATE_M2P5 0x00
-#define GSWIP_MII_CFG_RATE_M25 0x10
-#define GSWIP_MII_CFG_RATE_M125 0x20
-#define GSWIP_MII_CFG_RATE_M50 0x30
-#define GSWIP_MII_CFG_RATE_AUTO 0x40
-#define GSWIP_MII_CFG_RATE_MASK 0x70
-#define GSWIP_MII_PCDU0 0x01
-#define GSWIP_MII_PCDU1 0x03
-#define GSWIP_MII_PCDU5 0x05
-#define GSWIP_MII_PCDU_TXDLY_MASK GENMASK(2, 0)
-#define GSWIP_MII_PCDU_RXDLY_MASK GENMASK(9, 7)
-
-/* GSWIP Core Registers */
-#define GSWIP_SWRES 0x000
-#define GSWIP_SWRES_R1 BIT(1) /* GSWIP Software reset */
-#define GSWIP_SWRES_R0 BIT(0) /* GSWIP Hardware reset */
-#define GSWIP_VERSION 0x013
-#define GSWIP_VERSION_REV_SHIFT 0
-#define GSWIP_VERSION_REV_MASK GENMASK(7, 0)
-#define GSWIP_VERSION_MOD_SHIFT 8
-#define GSWIP_VERSION_MOD_MASK GENMASK(15, 8)
-#define GSWIP_VERSION_2_0 0x100
-#define GSWIP_VERSION_2_1 0x021
-#define GSWIP_VERSION_2_2 0x122
-#define GSWIP_VERSION_2_2_ETC 0x022
-
-#define GSWIP_BM_RAM_VAL(x) (0x043 - (x))
-#define GSWIP_BM_RAM_ADDR 0x044
-#define GSWIP_BM_RAM_CTRL 0x045
-#define GSWIP_BM_RAM_CTRL_BAS BIT(15)
-#define GSWIP_BM_RAM_CTRL_OPMOD BIT(5)
-#define GSWIP_BM_RAM_CTRL_ADDR_MASK GENMASK(4, 0)
-#define GSWIP_BM_QUEUE_GCTRL 0x04A
-#define GSWIP_BM_QUEUE_GCTRL_GL_MOD BIT(10)
-/* buffer management Port Configuration Register */
-#define GSWIP_BM_PCFGp(p) (0x080 + ((p) * 2))
-#define GSWIP_BM_PCFG_CNTEN BIT(0) /* RMON Counter Enable */
-#define GSWIP_BM_PCFG_IGCNT BIT(1) /* Ingres Special Tag RMON count */
-/* buffer management Port Control Register */
-#define GSWIP_BM_RMON_CTRLp(p) (0x81 + ((p) * 2))
-#define GSWIP_BM_CTRL_RMON_RAM1_RES BIT(0) /* Software Reset for RMON RAM 1 */
-#define GSWIP_BM_CTRL_RMON_RAM2_RES BIT(1) /* Software Reset for RMON RAM 2 */
-
-/* PCE */
-#define GSWIP_PCE_TBL_KEY(x) (0x447 - (x))
-#define GSWIP_PCE_TBL_MASK 0x448
-#define GSWIP_PCE_TBL_VAL(x) (0x44D - (x))
-#define GSWIP_PCE_TBL_ADDR 0x44E
-#define GSWIP_PCE_TBL_CTRL 0x44F
-#define GSWIP_PCE_TBL_CTRL_BAS BIT(15)
-#define GSWIP_PCE_TBL_CTRL_TYPE BIT(13)
-#define GSWIP_PCE_TBL_CTRL_VLD BIT(12)
-#define GSWIP_PCE_TBL_CTRL_KEYFORM BIT(11)
-#define GSWIP_PCE_TBL_CTRL_GMAP_MASK GENMASK(10, 7)
-#define GSWIP_PCE_TBL_CTRL_OPMOD_MASK GENMASK(6, 5)
-#define GSWIP_PCE_TBL_CTRL_OPMOD_ADRD 0x00
-#define GSWIP_PCE_TBL_CTRL_OPMOD_ADWR 0x20
-#define GSWIP_PCE_TBL_CTRL_OPMOD_KSRD 0x40
-#define GSWIP_PCE_TBL_CTRL_OPMOD_KSWR 0x60
-#define GSWIP_PCE_TBL_CTRL_ADDR_MASK GENMASK(4, 0)
-#define GSWIP_PCE_PMAP1 0x453 /* Monitoring port map */
-#define GSWIP_PCE_PMAP2 0x454 /* Default Multicast port map */
-#define GSWIP_PCE_PMAP3 0x455 /* Default Unknown Unicast port map */
-#define GSWIP_PCE_GCTRL_0 0x456
-#define GSWIP_PCE_GCTRL_0_MTFL BIT(0) /* MAC Table Flushing */
-#define GSWIP_PCE_GCTRL_0_MC_VALID BIT(3)
-#define GSWIP_PCE_GCTRL_0_VLAN BIT(14) /* VLAN aware Switching */
-#define GSWIP_PCE_GCTRL_1 0x457
-#define GSWIP_PCE_GCTRL_1_MAC_GLOCK BIT(2) /* MAC Address table lock */
-#define GSWIP_PCE_GCTRL_1_MAC_GLOCK_MOD BIT(3) /* Mac address table lock forwarding mode */
-#define GSWIP_PCE_PCTRL_0p(p) (0x480 + ((p) * 0xA))
-#define GSWIP_PCE_PCTRL_0_TVM BIT(5) /* Transparent VLAN mode */
-#define GSWIP_PCE_PCTRL_0_VREP BIT(6) /* VLAN Replace Mode */
-#define GSWIP_PCE_PCTRL_0_INGRESS BIT(11) /* Accept special tag in ingress */
-#define GSWIP_PCE_PCTRL_0_PSTATE_LISTEN 0x0
-#define GSWIP_PCE_PCTRL_0_PSTATE_RX 0x1
-#define GSWIP_PCE_PCTRL_0_PSTATE_TX 0x2
-#define GSWIP_PCE_PCTRL_0_PSTATE_LEARNING 0x3
-#define GSWIP_PCE_PCTRL_0_PSTATE_FORWARDING 0x7
-#define GSWIP_PCE_PCTRL_0_PSTATE_MASK GENMASK(2, 0)
-#define GSWIP_PCE_VCTRL(p) (0x485 + ((p) * 0xA))
-#define GSWIP_PCE_VCTRL_UVR BIT(0) /* Unknown VLAN Rule */
-#define GSWIP_PCE_VCTRL_VIMR BIT(3) /* VLAN Ingress Member violation rule */
-#define GSWIP_PCE_VCTRL_VEMR BIT(4) /* VLAN Egress Member violation rule */
-#define GSWIP_PCE_VCTRL_VSR BIT(5) /* VLAN Security */
-#define GSWIP_PCE_VCTRL_VID0 BIT(6) /* Priority Tagged Rule */
-#define GSWIP_PCE_DEFPVID(p) (0x486 + ((p) * 0xA))
-
-#define GSWIP_MAC_FLEN 0x8C5
-#define GSWIP_MAC_CTRL_0p(p) (0x903 + ((p) * 0xC))
-#define GSWIP_MAC_CTRL_0_PADEN BIT(8)
-#define GSWIP_MAC_CTRL_0_FCS_EN BIT(7)
-#define GSWIP_MAC_CTRL_0_FCON_MASK 0x0070
-#define GSWIP_MAC_CTRL_0_FCON_AUTO 0x0000
-#define GSWIP_MAC_CTRL_0_FCON_RX 0x0010
-#define GSWIP_MAC_CTRL_0_FCON_TX 0x0020
-#define GSWIP_MAC_CTRL_0_FCON_RXTX 0x0030
-#define GSWIP_MAC_CTRL_0_FCON_NONE 0x0040
-#define GSWIP_MAC_CTRL_0_FDUP_MASK 0x000C
-#define GSWIP_MAC_CTRL_0_FDUP_AUTO 0x0000
-#define GSWIP_MAC_CTRL_0_FDUP_EN 0x0004
-#define GSWIP_MAC_CTRL_0_FDUP_DIS 0x000C
-#define GSWIP_MAC_CTRL_0_GMII_MASK 0x0003
-#define GSWIP_MAC_CTRL_0_GMII_AUTO 0x0000
-#define GSWIP_MAC_CTRL_0_GMII_MII 0x0001
-#define GSWIP_MAC_CTRL_0_GMII_RGMII 0x0002
-#define GSWIP_MAC_CTRL_2p(p) (0x905 + ((p) * 0xC))
-#define GSWIP_MAC_CTRL_2_LCHKL BIT(2) /* Frame Length Check Long Enable */
-#define GSWIP_MAC_CTRL_2_MLEN BIT(3) /* Maximum Untagged Frame Lnegth */
-
-/* Ethernet Switch Fetch DMA Port Control Register */
-#define GSWIP_FDMA_PCTRLp(p) (0xA80 + ((p) * 0x6))
-#define GSWIP_FDMA_PCTRL_EN BIT(0) /* FDMA Port Enable */
-#define GSWIP_FDMA_PCTRL_STEN BIT(1) /* Special Tag Insertion Enable */
-#define GSWIP_FDMA_PCTRL_VLANMOD_MASK GENMASK(4, 3) /* VLAN Modification Control */
-#define GSWIP_FDMA_PCTRL_VLANMOD_SHIFT 3 /* VLAN Modification Control */
-#define GSWIP_FDMA_PCTRL_VLANMOD_DIS (0x0 << GSWIP_FDMA_PCTRL_VLANMOD_SHIFT)
-#define GSWIP_FDMA_PCTRL_VLANMOD_PRIO (0x1 << GSWIP_FDMA_PCTRL_VLANMOD_SHIFT)
-#define GSWIP_FDMA_PCTRL_VLANMOD_ID (0x2 << GSWIP_FDMA_PCTRL_VLANMOD_SHIFT)
-#define GSWIP_FDMA_PCTRL_VLANMOD_BOTH (0x3 << GSWIP_FDMA_PCTRL_VLANMOD_SHIFT)
-
-/* Ethernet Switch Store DMA Port Control Register */
-#define GSWIP_SDMA_PCTRLp(p) (0xBC0 + ((p) * 0x6))
-#define GSWIP_SDMA_PCTRL_EN BIT(0) /* SDMA Port Enable */
-#define GSWIP_SDMA_PCTRL_FCEN BIT(1) /* Flow Control Enable */
-#define GSWIP_SDMA_PCTRL_PAUFWD BIT(3) /* Pause Frame Forwarding */
-
-#define GSWIP_TABLE_ACTIVE_VLAN 0x01
-#define GSWIP_TABLE_VLAN_MAPPING 0x02
-#define GSWIP_TABLE_MAC_BRIDGE 0x0b
-#define GSWIP_TABLE_MAC_BRIDGE_KEY3_FID GENMASK(5, 0) /* Filtering identifier */
-#define GSWIP_TABLE_MAC_BRIDGE_VAL0_PORT GENMASK(7, 4) /* Port on learned entries */
-#define GSWIP_TABLE_MAC_BRIDGE_VAL1_STATIC BIT(0) /* Static, non-aging entry */
-
-#define XRX200_GPHY_FW_ALIGN (16 * 1024)
-
-/* Maximum packet size supported by the switch. In theory this should be 10240,
- * but long packets currently cause lock-ups with an MTU of over 2526. Medium
- * packets are sometimes dropped (e.g. TCP over 2477, UDP over 2516-2519, ICMP
- * over 2526), hence an MTU value of 2400 seems safe. This issue only affects
- * packet reception. This is probably caused by the PPA engine, which is on the
- * RX part of the device. Packet transmission works properly up to 10240.
- */
-#define GSWIP_MAX_PACKET_LENGTH 2400
-
-struct gswip_hw_info {
- int max_ports;
- int cpu_port;
- const struct dsa_switch_ops *ops;
-};
-
struct xway_gphy_match_data {
char *fe_firmware_name;
char *ge_firmware_name;
};
-struct gswip_gphy_fw {
- struct clk *clk_gate;
- struct reset_control *reset;
- u32 fw_addr_offset;
- char *fw_name;
-};
-
-struct gswip_vlan {
- struct net_device *bridge;
- u16 vid;
- u8 fid;
-};
-
-struct gswip_priv {
- __iomem void *gswip;
- __iomem void *mdio;
- __iomem void *mii;
- const struct gswip_hw_info *hw_info;
- const struct xway_gphy_match_data *gphy_fw_name_cfg;
- struct dsa_switch *ds;
- struct device *dev;
- struct regmap *rcu_regmap;
- struct gswip_vlan vlans[64];
- int num_gphy_fw;
- struct gswip_gphy_fw *gphy_fw;
- u32 port_vlan_filter;
- struct mutex pce_table_lock;
-};
-
struct gswip_pce_table_entry {
u16 index; // PCE_TBL_ADDR.ADDR = pData->table_index
u16 table; // PCE_TBL_CTRL.ADDR = pData->table
@@ -426,15 +183,29 @@ static void gswip_mii_mask(struct gswip_priv *priv, u32 clear, u32 set,
static void gswip_mii_mask_cfg(struct gswip_priv *priv, u32 clear, u32 set,
int port)
{
- /* There's no MII_CFG register for the CPU port */
- if (!dsa_is_cpu_port(priv->ds, port))
- gswip_mii_mask(priv, clear, set, GSWIP_MII_CFGp(port));
+ int reg_port;
+
+ /* MII_CFG register only exists for MII ports */
+ if (!(priv->hw_info->mii_ports & BIT(port)))
+ return;
+
+ reg_port = port + priv->hw_info->mii_port_reg_offset;
+
+ gswip_mii_mask(priv, clear, set, GSWIP_MII_CFGp(reg_port));
}
static void gswip_mii_mask_pcdu(struct gswip_priv *priv, u32 clear, u32 set,
int port)
{
- switch (port) {
+ int reg_port;
+
+ /* MII_PCDU register only exists for MII ports */
+ if (!(priv->hw_info->mii_ports & BIT(port)))
+ return;
+
+ reg_port = port + priv->hw_info->mii_port_reg_offset;
+
+ switch (reg_port) {
case 0:
gswip_mii_mask(priv, clear, set, GSWIP_MII_PCDU0);
break;
@@ -515,6 +286,9 @@ static int gswip_mdio(struct gswip_priv *priv)
int err = 0;
mdio_np = of_get_compatible_child(switch_np, "lantiq,xrx200-mdio");
+ if (!mdio_np)
+ mdio_np = of_get_child_by_name(switch_np, "mdio");
+
if (!of_device_is_available(mdio_np))
goto out_put_node;
@@ -654,7 +428,6 @@ static int gswip_add_single_port_br(struct gswip_priv *priv, int port, bool add)
{
struct gswip_pce_table_entry vlan_active = {0,};
struct gswip_pce_table_entry vlan_mapping = {0,};
- unsigned int cpu_port = priv->hw_info->cpu_port;
int err;
vlan_active.index = port + 1;
@@ -674,7 +447,7 @@ static int gswip_add_single_port_br(struct gswip_priv *priv, int port, bool add)
vlan_mapping.index = port + 1;
vlan_mapping.table = GSWIP_TABLE_VLAN_MAPPING;
vlan_mapping.val[0] = 0 /* vid */;
- vlan_mapping.val[1] = BIT(port) | BIT(cpu_port);
+ vlan_mapping.val[1] = BIT(port) | dsa_cpu_ports(priv->ds);
vlan_mapping.val[2] = 0;
err = gswip_pce_table_entry_write(priv, &vlan_mapping);
if (err) {
@@ -738,15 +511,15 @@ static int gswip_pce_load_microcode(struct gswip_priv *priv)
GSWIP_PCE_TBL_CTRL_OPMOD_ADWR, GSWIP_PCE_TBL_CTRL);
gswip_switch_w(priv, 0, GSWIP_PCE_TBL_MASK);
- for (i = 0; i < ARRAY_SIZE(gswip_pce_microcode); i++) {
+ for (i = 0; i < priv->hw_info->pce_microcode_size; i++) {
gswip_switch_w(priv, i, GSWIP_PCE_TBL_ADDR);
- gswip_switch_w(priv, gswip_pce_microcode[i].val_0,
+ gswip_switch_w(priv, (*priv->hw_info->pce_microcode)[i].val_0,
GSWIP_PCE_TBL_VAL(0));
- gswip_switch_w(priv, gswip_pce_microcode[i].val_1,
+ gswip_switch_w(priv, (*priv->hw_info->pce_microcode)[i].val_1,
GSWIP_PCE_TBL_VAL(1));
- gswip_switch_w(priv, gswip_pce_microcode[i].val_2,
+ gswip_switch_w(priv, (*priv->hw_info->pce_microcode)[i].val_2,
GSWIP_PCE_TBL_VAL(2));
- gswip_switch_w(priv, gswip_pce_microcode[i].val_3,
+ gswip_switch_w(priv, (*priv->hw_info->pce_microcode)[i].val_3,
GSWIP_PCE_TBL_VAL(3));
/* start the table access: */
@@ -804,10 +577,10 @@ static int gswip_port_vlan_filtering(struct dsa_switch *ds, int port,
static int gswip_setup(struct dsa_switch *ds)
{
+ unsigned int cpu_ports = dsa_cpu_ports(ds);
struct gswip_priv *priv = ds->priv;
- unsigned int cpu_port = priv->hw_info->cpu_port;
- int i;
- int err;
+ struct dsa_port *cpu_dp;
+ int err, i;
gswip_switch_w(priv, GSWIP_SWRES_R0, GSWIP_SWRES);
usleep_range(5000, 10000);
@@ -829,9 +602,9 @@ static int gswip_setup(struct dsa_switch *ds)
}
/* Default unknown Broadcast/Multicast/Unicast port maps */
- gswip_switch_w(priv, BIT(cpu_port), GSWIP_PCE_PMAP1);
- gswip_switch_w(priv, BIT(cpu_port), GSWIP_PCE_PMAP2);
- gswip_switch_w(priv, BIT(cpu_port), GSWIP_PCE_PMAP3);
+ gswip_switch_w(priv, cpu_ports, GSWIP_PCE_PMAP1);
+ gswip_switch_w(priv, cpu_ports, GSWIP_PCE_PMAP2);
+ gswip_switch_w(priv, cpu_ports, GSWIP_PCE_PMAP3);
/* Deactivate MDIO PHY auto polling. Some PHYs as the AR8030 have an
* interoperability problem with this auto polling mechanism because
@@ -854,19 +627,28 @@ static int gswip_setup(struct dsa_switch *ds)
/* Configure the MDIO Clock 2.5 MHz */
gswip_mdio_mask(priv, 0xff, 0x09, GSWIP_MDIO_MDC_CFG1);
+ /* bring up the mdio bus */
+ err = gswip_mdio(priv);
+ if (err) {
+ dev_err(priv->dev, "mdio bus setup failed\n");
+ return err;
+ }
+
/* Disable the xMII interface and clear it's isolation bit */
for (i = 0; i < priv->hw_info->max_ports; i++)
gswip_mii_mask_cfg(priv,
GSWIP_MII_CFG_EN | GSWIP_MII_CFG_ISOLATE,
0, i);
- /* enable special tag insertion on cpu port */
- gswip_switch_mask(priv, 0, GSWIP_FDMA_PCTRL_STEN,
- GSWIP_FDMA_PCTRLp(cpu_port));
+ dsa_switch_for_each_cpu_port(cpu_dp, ds) {
+ /* enable special tag insertion on cpu port */
+ gswip_switch_mask(priv, 0, GSWIP_FDMA_PCTRL_STEN,
+ GSWIP_FDMA_PCTRLp(cpu_dp->index));
- /* accept special tag in ingress direction */
- gswip_switch_mask(priv, 0, GSWIP_PCE_PCTRL_0_INGRESS,
- GSWIP_PCE_PCTRL_0p(cpu_port));
+ /* accept special tag in ingress direction */
+ gswip_switch_mask(priv, 0, GSWIP_PCE_PCTRL_0_INGRESS,
+ GSWIP_PCE_PCTRL_0p(cpu_dp->index));
+ }
gswip_switch_mask(priv, 0, GSWIP_BM_QUEUE_GCTRL_GL_MOD,
GSWIP_BM_QUEUE_GCTRL);
@@ -895,7 +677,9 @@ static enum dsa_tag_protocol gswip_get_tag_protocol(struct dsa_switch *ds,
int port,
enum dsa_tag_protocol mp)
{
- return DSA_TAG_PROTO_GSWIP;
+ struct gswip_priv *priv = ds->priv;
+
+ return priv->hw_info->tag_protocol;
}
static int gswip_vlan_active_create(struct gswip_priv *priv,
@@ -962,7 +746,6 @@ static int gswip_vlan_add_unaware(struct gswip_priv *priv,
{
struct gswip_pce_table_entry vlan_mapping = {0,};
unsigned int max_ports = priv->hw_info->max_ports;
- unsigned int cpu_port = priv->hw_info->cpu_port;
bool active_vlan_created = false;
int idx = -1;
int i;
@@ -1002,7 +785,7 @@ static int gswip_vlan_add_unaware(struct gswip_priv *priv,
}
/* Update the VLAN mapping entry and write it to the switch */
- vlan_mapping.val[1] |= BIT(cpu_port);
+ vlan_mapping.val[1] |= dsa_cpu_ports(priv->ds);
vlan_mapping.val[1] |= BIT(port);
err = gswip_pce_table_entry_write(priv, &vlan_mapping);
if (err) {
@@ -1024,7 +807,7 @@ static int gswip_vlan_add_aware(struct gswip_priv *priv,
{
struct gswip_pce_table_entry vlan_mapping = {0,};
unsigned int max_ports = priv->hw_info->max_ports;
- unsigned int cpu_port = priv->hw_info->cpu_port;
+ unsigned int cpu_ports = dsa_cpu_ports(priv->ds);
bool active_vlan_created = false;
int idx = -1;
int fid = -1;
@@ -1071,8 +854,8 @@ static int gswip_vlan_add_aware(struct gswip_priv *priv,
vlan_mapping.val[0] = vid;
/* Update the VLAN mapping entry and write it to the switch */
- vlan_mapping.val[1] |= BIT(cpu_port);
- vlan_mapping.val[2] |= BIT(cpu_port);
+ vlan_mapping.val[1] |= cpu_ports;
+ vlan_mapping.val[2] |= cpu_ports;
vlan_mapping.val[1] |= BIT(port);
if (untagged)
vlan_mapping.val[2] &= ~BIT(port);
@@ -1099,7 +882,6 @@ static int gswip_vlan_remove(struct gswip_priv *priv,
{
struct gswip_pce_table_entry vlan_mapping = {0,};
unsigned int max_ports = priv->hw_info->max_ports;
- unsigned int cpu_port = priv->hw_info->cpu_port;
int idx = -1;
int i;
int err;
@@ -1135,7 +917,7 @@ static int gswip_vlan_remove(struct gswip_priv *priv,
}
/* In case all ports are removed from the bridge, remove the VLAN */
- if ((vlan_mapping.val[1] & ~BIT(cpu_port)) == 0) {
+ if (!(vlan_mapping.val[1] & ~dsa_cpu_ports(priv->ds))) {
err = gswip_vlan_active_remove(priv, idx);
if (err) {
dev_err(priv->dev, "failed to write active VLAN: %d\n",
@@ -1554,6 +1336,14 @@ static void gswip_xrx300_phylink_get_caps(struct dsa_switch *ds, int port,
MAC_10 | MAC_100 | MAC_1000;
}
+static void gswip_phylink_get_caps(struct dsa_switch *ds, int port,
+ struct phylink_config *config)
+{
+ struct gswip_priv *priv = ds->priv;
+
+ priv->hw_info->phylink_get_caps(ds, port, config);
+}
+
static void gswip_port_set_link(struct gswip_priv *priv, int port, bool link)
{
u32 mdio_phy;
@@ -1672,6 +1462,10 @@ static void gswip_phylink_mac_config(struct phylink_config *config,
miicfg |= GSWIP_MII_CFG_LDCLKDIS;
switch (state->interface) {
+ case PHY_INTERFACE_MODE_SGMII:
+ case PHY_INTERFACE_MODE_1000BASEX:
+ case PHY_INTERFACE_MODE_2500BASEX:
+ return;
case PHY_INTERFACE_MODE_MII:
case PHY_INTERFACE_MODE_INTERNAL:
miicfg |= GSWIP_MII_CFG_MODE_MIIM;
@@ -1820,36 +1614,26 @@ static int gswip_get_sset_count(struct dsa_switch *ds, int port, int sset)
return ARRAY_SIZE(gswip_rmon_cnt);
}
-static const struct phylink_mac_ops gswip_phylink_mac_ops = {
- .mac_config = gswip_phylink_mac_config,
- .mac_link_down = gswip_phylink_mac_link_down,
- .mac_link_up = gswip_phylink_mac_link_up,
-};
+static struct phylink_pcs *gswip_phylink_mac_select_pcs(struct phylink_config *config,
+ phy_interface_t interface)
+{
+ struct dsa_port *dp = dsa_phylink_to_port(config);
+ struct gswip_priv *priv = dp->ds->priv;
-static const struct dsa_switch_ops gswip_xrx200_switch_ops = {
- .get_tag_protocol = gswip_get_tag_protocol,
- .setup = gswip_setup,
- .port_enable = gswip_port_enable,
- .port_disable = gswip_port_disable,
- .port_bridge_join = gswip_port_bridge_join,
- .port_bridge_leave = gswip_port_bridge_leave,
- .port_fast_age = gswip_port_fast_age,
- .port_vlan_filtering = gswip_port_vlan_filtering,
- .port_vlan_add = gswip_port_vlan_add,
- .port_vlan_del = gswip_port_vlan_del,
- .port_stp_state_set = gswip_port_stp_state_set,
- .port_fdb_add = gswip_port_fdb_add,
- .port_fdb_del = gswip_port_fdb_del,
- .port_fdb_dump = gswip_port_fdb_dump,
- .port_change_mtu = gswip_port_change_mtu,
- .port_max_mtu = gswip_port_max_mtu,
- .phylink_get_caps = gswip_xrx200_phylink_get_caps,
- .get_strings = gswip_get_strings,
- .get_ethtool_stats = gswip_get_ethtool_stats,
- .get_sset_count = gswip_get_sset_count,
+ if (priv->hw_info->mac_select_pcs)
+ return priv->hw_info->mac_select_pcs(config, interface);
+
+ return NULL;
+}
+
+static const struct phylink_mac_ops gswip_phylink_mac_ops = {
+ .mac_config = gswip_phylink_mac_config,
+ .mac_link_down = gswip_phylink_mac_link_down,
+ .mac_link_up = gswip_phylink_mac_link_up,
+ .mac_select_pcs = gswip_phylink_mac_select_pcs,
};
-static const struct dsa_switch_ops gswip_xrx300_switch_ops = {
+static const struct dsa_switch_ops gswip_switch_ops = {
.get_tag_protocol = gswip_get_tag_protocol,
.setup = gswip_setup,
.port_enable = gswip_port_enable,
@@ -1866,7 +1650,7 @@ static const struct dsa_switch_ops gswip_xrx300_switch_ops = {
.port_fdb_dump = gswip_port_fdb_dump,
.port_change_mtu = gswip_port_change_mtu,
.port_max_mtu = gswip_port_max_mtu,
- .phylink_get_caps = gswip_xrx300_phylink_get_caps,
+ .phylink_get_caps = gswip_phylink_get_caps,
.get_strings = gswip_get_strings,
.get_ethtool_stats = gswip_get_ethtool_stats,
.get_sset_count = gswip_get_sset_count,
@@ -1935,8 +1719,7 @@ static int gswip_gphy_fw_load(struct gswip_priv *priv, struct gswip_gphy_fw *gph
memcpy(fw_addr, fw->data, fw->size);
} else {
release_firmware(fw);
- return dev_err_probe(dev, -ENOMEM,
- "failed to alloc firmware memory\n");
+ return -ENOMEM;
}
release_firmware(fw);
@@ -2093,6 +1876,30 @@ remove_gphy:
return err;
}
+static int gswip_validate_cpu_port(struct dsa_switch *ds)
+{
+ struct gswip_priv *priv = ds->priv;
+ struct dsa_port *cpu_dp;
+ int cpu_port = -1;
+
+ dsa_switch_for_each_cpu_port(cpu_dp, ds) {
+ if (cpu_port != -1)
+ return dev_err_probe(ds->dev, -EINVAL,
+ "only a single CPU port is supported\n");
+
+ cpu_port = cpu_dp->index;
+ }
+
+ if (cpu_port == -1)
+ return dev_err_probe(ds->dev, -EINVAL, "no CPU port defined\n");
+
+ if (BIT(cpu_port) & ~priv->hw_info->allowed_cpu_ports)
+ return dev_err_probe(ds->dev, -EINVAL,
+ "unsupported CPU port defined\n");
+
+ return 0;
+}
+
static int gswip_probe(struct platform_device *pdev)
{
struct device_node *np, *gphy_fw_np;
@@ -2129,12 +1936,22 @@ static int gswip_probe(struct platform_device *pdev)
priv->ds->dev = dev;
priv->ds->num_ports = priv->hw_info->max_ports;
priv->ds->priv = priv;
- priv->ds->ops = priv->hw_info->ops;
+ priv->ds->ops = &gswip_switch_ops;
priv->ds->phylink_mac_ops = &gswip_phylink_mac_ops;
priv->dev = dev;
mutex_init(&priv->pce_table_lock);
version = gswip_switch_r(priv, GSWIP_VERSION);
+ /* The hardware has the 'major/minor' version bytes in the wrong order
+ * preventing numerical comparisons. Construct a 16-bit unsigned integer
+ * having the REV field as most significant byte and the MOD field as
+ * least significant byte. This is effectively swapping the two bytes of
+ * the version variable, but other than using swab16 it doesn't affect
+ * the source variable.
+ */
+ priv->version = GSWIP_VERSION_REV(version) << 8 |
+ GSWIP_VERSION_MOD(version);
+
np = dev->of_node;
switch (version) {
case GSWIP_VERSION_2_0:
@@ -2163,30 +1980,20 @@ static int gswip_probe(struct platform_device *pdev)
"gphy fw probe failed\n");
}
- /* bring up the mdio bus */
- err = gswip_mdio(priv);
- if (err) {
- dev_err_probe(dev, err, "mdio probe failed\n");
- goto gphy_fw_remove;
- }
-
err = dsa_register_switch(priv->ds);
if (err) {
dev_err_probe(dev, err, "dsa switch registration failed\n");
goto gphy_fw_remove;
}
- if (!dsa_is_cpu_port(priv->ds, priv->hw_info->cpu_port)) {
- err = dev_err_probe(dev, -EINVAL,
- "wrong CPU port defined, HW only supports port: %i\n",
- priv->hw_info->cpu_port);
+
+ err = gswip_validate_cpu_port(priv->ds);
+ if (err)
goto disable_switch;
- }
platform_set_drvdata(pdev, priv);
dev_info(dev, "probed GSWIP version %lx mod %lx\n",
- (version & GSWIP_VERSION_REV_MASK) >> GSWIP_VERSION_REV_SHIFT,
- (version & GSWIP_VERSION_MOD_MASK) >> GSWIP_VERSION_MOD_SHIFT);
+ GSWIP_VERSION_REV(version), GSWIP_VERSION_MOD(version));
return 0;
disable_switch:
@@ -2229,14 +2036,24 @@ static void gswip_shutdown(struct platform_device *pdev)
static const struct gswip_hw_info gswip_xrx200 = {
.max_ports = 7,
- .cpu_port = 6,
- .ops = &gswip_xrx200_switch_ops,
+ .allowed_cpu_ports = BIT(6),
+ .mii_ports = BIT(0) | BIT(1) | BIT(5),
+ .mii_port_reg_offset = 0,
+ .phylink_get_caps = gswip_xrx200_phylink_get_caps,
+ .pce_microcode = &gswip_pce_microcode,
+ .pce_microcode_size = ARRAY_SIZE(gswip_pce_microcode),
+ .tag_protocol = DSA_TAG_PROTO_GSWIP,
};
static const struct gswip_hw_info gswip_xrx300 = {
.max_ports = 7,
- .cpu_port = 6,
- .ops = &gswip_xrx300_switch_ops,
+ .allowed_cpu_ports = BIT(6),
+ .mii_ports = BIT(0) | BIT(5),
+ .mii_port_reg_offset = 0,
+ .phylink_get_caps = gswip_xrx300_phylink_get_caps,
+ .pce_microcode = &gswip_pce_microcode,
+ .pce_microcode_size = ARRAY_SIZE(gswip_pce_microcode),
+ .tag_protocol = DSA_TAG_PROTO_GSWIP,
};
static const struct of_device_id gswip_of_match[] = {
diff --git a/drivers/net/dsa/lantiq/lantiq_gswip.h b/drivers/net/dsa/lantiq/lantiq_gswip.h
new file mode 100644
index 000000000000..2df9c8e8cfd0
--- /dev/null
+++ b/drivers/net/dsa/lantiq/lantiq_gswip.h
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef __LANTIQ_GSWIP_H
+#define __LANTIQ_GSWIP_H
+
+#include <linux/clk.h>
+#include <linux/mutex.h>
+#include <linux/phylink.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/reset.h>
+#include <linux/swab.h>
+#include <net/dsa.h>
+
+/* GSWIP MDIO Registers */
+#define GSWIP_MDIO_GLOB 0x00
+#define GSWIP_MDIO_GLOB_ENABLE BIT(15)
+#define GSWIP_MDIO_CTRL 0x08
+#define GSWIP_MDIO_CTRL_BUSY BIT(12)
+#define GSWIP_MDIO_CTRL_RD BIT(11)
+#define GSWIP_MDIO_CTRL_WR BIT(10)
+#define GSWIP_MDIO_CTRL_PHYAD_MASK 0x1f
+#define GSWIP_MDIO_CTRL_PHYAD_SHIFT 5
+#define GSWIP_MDIO_CTRL_REGAD_MASK 0x1f
+#define GSWIP_MDIO_READ 0x09
+#define GSWIP_MDIO_WRITE 0x0A
+#define GSWIP_MDIO_MDC_CFG0 0x0B
+#define GSWIP_MDIO_MDC_CFG1 0x0C
+#define GSWIP_MDIO_PHYp(p) (0x15 - (p))
+#define GSWIP_MDIO_PHY_LINK_MASK 0x6000
+#define GSWIP_MDIO_PHY_LINK_AUTO 0x0000
+#define GSWIP_MDIO_PHY_LINK_DOWN 0x4000
+#define GSWIP_MDIO_PHY_LINK_UP 0x2000
+#define GSWIP_MDIO_PHY_SPEED_MASK 0x1800
+#define GSWIP_MDIO_PHY_SPEED_AUTO 0x1800
+#define GSWIP_MDIO_PHY_SPEED_M10 0x0000
+#define GSWIP_MDIO_PHY_SPEED_M100 0x0800
+#define GSWIP_MDIO_PHY_SPEED_G1 0x1000
+#define GSWIP_MDIO_PHY_FDUP_MASK 0x0600
+#define GSWIP_MDIO_PHY_FDUP_AUTO 0x0000
+#define GSWIP_MDIO_PHY_FDUP_EN 0x0200
+#define GSWIP_MDIO_PHY_FDUP_DIS 0x0600
+#define GSWIP_MDIO_PHY_FCONTX_MASK 0x0180
+#define GSWIP_MDIO_PHY_FCONTX_AUTO 0x0000
+#define GSWIP_MDIO_PHY_FCONTX_EN 0x0100
+#define GSWIP_MDIO_PHY_FCONTX_DIS 0x0180
+#define GSWIP_MDIO_PHY_FCONRX_MASK 0x0060
+#define GSWIP_MDIO_PHY_FCONRX_AUTO 0x0000
+#define GSWIP_MDIO_PHY_FCONRX_EN 0x0020
+#define GSWIP_MDIO_PHY_FCONRX_DIS 0x0060
+#define GSWIP_MDIO_PHY_ADDR_MASK 0x001f
+#define GSWIP_MDIO_PHY_MASK (GSWIP_MDIO_PHY_ADDR_MASK | \
+ GSWIP_MDIO_PHY_FCONRX_MASK | \
+ GSWIP_MDIO_PHY_FCONTX_MASK | \
+ GSWIP_MDIO_PHY_LINK_MASK | \
+ GSWIP_MDIO_PHY_SPEED_MASK | \
+ GSWIP_MDIO_PHY_FDUP_MASK)
+
+/* GSWIP MII Registers */
+#define GSWIP_MII_CFGp(p) (0x2 * (p))
+#define GSWIP_MII_CFG_RESET BIT(15)
+#define GSWIP_MII_CFG_EN BIT(14)
+#define GSWIP_MII_CFG_ISOLATE BIT(13)
+#define GSWIP_MII_CFG_LDCLKDIS BIT(12)
+#define GSWIP_MII_CFG_RGMII_IBS BIT(8)
+#define GSWIP_MII_CFG_RMII_CLK BIT(7)
+#define GSWIP_MII_CFG_MODE_MIIP 0x0
+#define GSWIP_MII_CFG_MODE_MIIM 0x1
+#define GSWIP_MII_CFG_MODE_RMIIP 0x2
+#define GSWIP_MII_CFG_MODE_RMIIM 0x3
+#define GSWIP_MII_CFG_MODE_RGMII 0x4
+#define GSWIP_MII_CFG_MODE_GMII 0x9
+#define GSWIP_MII_CFG_MODE_MASK 0xf
+#define GSWIP_MII_CFG_RATE_M2P5 0x00
+#define GSWIP_MII_CFG_RATE_M25 0x10
+#define GSWIP_MII_CFG_RATE_M125 0x20
+#define GSWIP_MII_CFG_RATE_M50 0x30
+#define GSWIP_MII_CFG_RATE_AUTO 0x40
+#define GSWIP_MII_CFG_RATE_MASK 0x70
+#define GSWIP_MII_PCDU0 0x01
+#define GSWIP_MII_PCDU1 0x03
+#define GSWIP_MII_PCDU5 0x05
+#define GSWIP_MII_PCDU_TXDLY_MASK GENMASK(2, 0)
+#define GSWIP_MII_PCDU_RXDLY_MASK GENMASK(9, 7)
+
+/* GSWIP Core Registers */
+#define GSWIP_SWRES 0x000
+#define GSWIP_SWRES_R1 BIT(1) /* GSWIP Software reset */
+#define GSWIP_SWRES_R0 BIT(0) /* GSWIP Hardware reset */
+#define GSWIP_VERSION 0x013
+#define GSWIP_VERSION_REV_MASK GENMASK(7, 0)
+#define GSWIP_VERSION_MOD_MASK GENMASK(15, 8)
+#define GSWIP_VERSION_REV(v) FIELD_GET(GSWIP_VERSION_REV_MASK, v)
+#define GSWIP_VERSION_MOD(v) FIELD_GET(GSWIP_VERSION_MOD_MASK, v)
+#define GSWIP_VERSION_2_0 0x100
+#define GSWIP_VERSION_2_1 0x021
+#define GSWIP_VERSION_2_2 0x122
+#define GSWIP_VERSION_2_2_ETC 0x022
+/* The hardware has the 'major/minor' version bytes in the wrong order
+ * preventing numerical comparisons. Swap the bytes of the 16-bit value
+ * to end up with REV being the most significant byte and MOD being the
+ * least significant byte, which then allows comparing it with the
+ * value stored in struct gswip_priv.
+ */
+#define GSWIP_VERSION_GE(priv, ver) ((priv)->version >= swab16(ver))
+
+#define GSWIP_BM_RAM_VAL(x) (0x043 - (x))
+#define GSWIP_BM_RAM_ADDR 0x044
+#define GSWIP_BM_RAM_CTRL 0x045
+#define GSWIP_BM_RAM_CTRL_BAS BIT(15)
+#define GSWIP_BM_RAM_CTRL_OPMOD BIT(5)
+#define GSWIP_BM_RAM_CTRL_ADDR_MASK GENMASK(4, 0)
+#define GSWIP_BM_QUEUE_GCTRL 0x04A
+#define GSWIP_BM_QUEUE_GCTRL_GL_MOD BIT(10)
+/* buffer management Port Configuration Register */
+#define GSWIP_BM_PCFGp(p) (0x080 + ((p) * 2))
+#define GSWIP_BM_PCFG_CNTEN BIT(0) /* RMON Counter Enable */
+#define GSWIP_BM_PCFG_IGCNT BIT(1) /* Ingres Special Tag RMON count */
+/* buffer management Port Control Register */
+#define GSWIP_BM_RMON_CTRLp(p) (0x81 + ((p) * 2))
+#define GSWIP_BM_CTRL_RMON_RAM1_RES BIT(0) /* Software Reset for RMON RAM 1 */
+#define GSWIP_BM_CTRL_RMON_RAM2_RES BIT(1) /* Software Reset for RMON RAM 2 */
+
+/* PCE */
+#define GSWIP_PCE_TBL_KEY(x) (0x447 - (x))
+#define GSWIP_PCE_TBL_MASK 0x448
+#define GSWIP_PCE_TBL_VAL(x) (0x44D - (x))
+#define GSWIP_PCE_TBL_ADDR 0x44E
+#define GSWIP_PCE_TBL_CTRL 0x44F
+#define GSWIP_PCE_TBL_CTRL_BAS BIT(15)
+#define GSWIP_PCE_TBL_CTRL_TYPE BIT(13)
+#define GSWIP_PCE_TBL_CTRL_VLD BIT(12)
+#define GSWIP_PCE_TBL_CTRL_KEYFORM BIT(11)
+#define GSWIP_PCE_TBL_CTRL_GMAP_MASK GENMASK(10, 7)
+#define GSWIP_PCE_TBL_CTRL_OPMOD_MASK GENMASK(6, 5)
+#define GSWIP_PCE_TBL_CTRL_OPMOD_ADRD 0x00
+#define GSWIP_PCE_TBL_CTRL_OPMOD_ADWR 0x20
+#define GSWIP_PCE_TBL_CTRL_OPMOD_KSRD 0x40
+#define GSWIP_PCE_TBL_CTRL_OPMOD_KSWR 0x60
+#define GSWIP_PCE_TBL_CTRL_ADDR_MASK GENMASK(4, 0)
+#define GSWIP_PCE_PMAP1 0x453 /* Monitoring port map */
+#define GSWIP_PCE_PMAP2 0x454 /* Default Multicast port map */
+#define GSWIP_PCE_PMAP3 0x455 /* Default Unknown Unicast port map */
+#define GSWIP_PCE_GCTRL_0 0x456
+#define GSWIP_PCE_GCTRL_0_MTFL BIT(0) /* MAC Table Flushing */
+#define GSWIP_PCE_GCTRL_0_MC_VALID BIT(3)
+#define GSWIP_PCE_GCTRL_0_VLAN BIT(14) /* VLAN aware Switching */
+#define GSWIP_PCE_GCTRL_1 0x457
+#define GSWIP_PCE_GCTRL_1_MAC_GLOCK BIT(2) /* MAC Address table lock */
+#define GSWIP_PCE_GCTRL_1_MAC_GLOCK_MOD BIT(3) /* Mac address table lock forwarding mode */
+#define GSWIP_PCE_PCTRL_0p(p) (0x480 + ((p) * 0xA))
+#define GSWIP_PCE_PCTRL_0_TVM BIT(5) /* Transparent VLAN mode */
+#define GSWIP_PCE_PCTRL_0_VREP BIT(6) /* VLAN Replace Mode */
+#define GSWIP_PCE_PCTRL_0_INGRESS BIT(11) /* Accept special tag in ingress */
+#define GSWIP_PCE_PCTRL_0_PSTATE_LISTEN 0x0
+#define GSWIP_PCE_PCTRL_0_PSTATE_RX 0x1
+#define GSWIP_PCE_PCTRL_0_PSTATE_TX 0x2
+#define GSWIP_PCE_PCTRL_0_PSTATE_LEARNING 0x3
+#define GSWIP_PCE_PCTRL_0_PSTATE_FORWARDING 0x7
+#define GSWIP_PCE_PCTRL_0_PSTATE_MASK GENMASK(2, 0)
+#define GSWIP_PCE_VCTRL(p) (0x485 + ((p) * 0xA))
+#define GSWIP_PCE_VCTRL_UVR BIT(0) /* Unknown VLAN Rule */
+#define GSWIP_PCE_VCTRL_VIMR BIT(3) /* VLAN Ingress Member violation rule */
+#define GSWIP_PCE_VCTRL_VEMR BIT(4) /* VLAN Egress Member violation rule */
+#define GSWIP_PCE_VCTRL_VSR BIT(5) /* VLAN Security */
+#define GSWIP_PCE_VCTRL_VID0 BIT(6) /* Priority Tagged Rule */
+#define GSWIP_PCE_DEFPVID(p) (0x486 + ((p) * 0xA))
+
+#define GSWIP_MAC_FLEN 0x8C5
+#define GSWIP_MAC_CTRL_0p(p) (0x903 + ((p) * 0xC))
+#define GSWIP_MAC_CTRL_0_PADEN BIT(8)
+#define GSWIP_MAC_CTRL_0_FCS_EN BIT(7)
+#define GSWIP_MAC_CTRL_0_FCON_MASK 0x0070
+#define GSWIP_MAC_CTRL_0_FCON_AUTO 0x0000
+#define GSWIP_MAC_CTRL_0_FCON_RX 0x0010
+#define GSWIP_MAC_CTRL_0_FCON_TX 0x0020
+#define GSWIP_MAC_CTRL_0_FCON_RXTX 0x0030
+#define GSWIP_MAC_CTRL_0_FCON_NONE 0x0040
+#define GSWIP_MAC_CTRL_0_FDUP_MASK 0x000C
+#define GSWIP_MAC_CTRL_0_FDUP_AUTO 0x0000
+#define GSWIP_MAC_CTRL_0_FDUP_EN 0x0004
+#define GSWIP_MAC_CTRL_0_FDUP_DIS 0x000C
+#define GSWIP_MAC_CTRL_0_GMII_MASK 0x0003
+#define GSWIP_MAC_CTRL_0_GMII_AUTO 0x0000
+#define GSWIP_MAC_CTRL_0_GMII_MII 0x0001
+#define GSWIP_MAC_CTRL_0_GMII_RGMII 0x0002
+#define GSWIP_MAC_CTRL_2p(p) (0x905 + ((p) * 0xC))
+#define GSWIP_MAC_CTRL_2_LCHKL BIT(2) /* Frame Length Check Long Enable */
+#define GSWIP_MAC_CTRL_2_MLEN BIT(3) /* Maximum Untagged Frame Lnegth */
+
+/* Ethernet Switch Fetch DMA Port Control Register */
+#define GSWIP_FDMA_PCTRLp(p) (0xA80 + ((p) * 0x6))
+#define GSWIP_FDMA_PCTRL_EN BIT(0) /* FDMA Port Enable */
+#define GSWIP_FDMA_PCTRL_STEN BIT(1) /* Special Tag Insertion Enable */
+#define GSWIP_FDMA_PCTRL_VLANMOD_MASK GENMASK(4, 3) /* VLAN Modification Control */
+#define GSWIP_FDMA_PCTRL_VLANMOD_SHIFT 3 /* VLAN Modification Control */
+#define GSWIP_FDMA_PCTRL_VLANMOD_DIS (0x0 << GSWIP_FDMA_PCTRL_VLANMOD_SHIFT)
+#define GSWIP_FDMA_PCTRL_VLANMOD_PRIO (0x1 << GSWIP_FDMA_PCTRL_VLANMOD_SHIFT)
+#define GSWIP_FDMA_PCTRL_VLANMOD_ID (0x2 << GSWIP_FDMA_PCTRL_VLANMOD_SHIFT)
+#define GSWIP_FDMA_PCTRL_VLANMOD_BOTH (0x3 << GSWIP_FDMA_PCTRL_VLANMOD_SHIFT)
+
+/* Ethernet Switch Store DMA Port Control Register */
+#define GSWIP_SDMA_PCTRLp(p) (0xBC0 + ((p) * 0x6))
+#define GSWIP_SDMA_PCTRL_EN BIT(0) /* SDMA Port Enable */
+#define GSWIP_SDMA_PCTRL_FCEN BIT(1) /* Flow Control Enable */
+#define GSWIP_SDMA_PCTRL_PAUFWD BIT(3) /* Pause Frame Forwarding */
+
+#define GSWIP_TABLE_ACTIVE_VLAN 0x01
+#define GSWIP_TABLE_VLAN_MAPPING 0x02
+#define GSWIP_TABLE_MAC_BRIDGE 0x0b
+#define GSWIP_TABLE_MAC_BRIDGE_KEY3_FID GENMASK(5, 0) /* Filtering identifier */
+#define GSWIP_TABLE_MAC_BRIDGE_VAL0_PORT GENMASK(7, 4) /* Port on learned entries */
+#define GSWIP_TABLE_MAC_BRIDGE_VAL1_STATIC BIT(0) /* Static, non-aging entry */
+
+#define XRX200_GPHY_FW_ALIGN (16 * 1024)
+
+/* Maximum packet size supported by the switch. In theory this should be 10240,
+ * but long packets currently cause lock-ups with an MTU of over 2526. Medium
+ * packets are sometimes dropped (e.g. TCP over 2477, UDP over 2516-2519, ICMP
+ * over 2526), hence an MTU value of 2400 seems safe. This issue only affects
+ * packet reception. This is probably caused by the PPA engine, which is on the
+ * RX part of the device. Packet transmission works properly up to 10240.
+ */
+#define GSWIP_MAX_PACKET_LENGTH 2400
+
+struct gswip_pce_microcode {
+ u16 val_3;
+ u16 val_2;
+ u16 val_1;
+ u16 val_0;
+};
+
+struct gswip_hw_info {
+ int max_ports;
+ unsigned int allowed_cpu_ports;
+ unsigned int mii_ports;
+ int mii_port_reg_offset;
+ const struct gswip_pce_microcode (*pce_microcode)[];
+ size_t pce_microcode_size;
+ enum dsa_tag_protocol tag_protocol;
+ void (*phylink_get_caps)(struct dsa_switch *ds, int port,
+ struct phylink_config *config);
+ struct phylink_pcs *(*mac_select_pcs)(struct phylink_config *config,
+ phy_interface_t interface);
+};
+
+struct gswip_gphy_fw {
+ struct clk *clk_gate;
+ struct reset_control *reset;
+ u32 fw_addr_offset;
+ char *fw_name;
+};
+
+struct gswip_vlan {
+ struct net_device *bridge;
+ u16 vid;
+ u8 fid;
+};
+
+struct gswip_priv {
+ __iomem void *gswip;
+ __iomem void *mdio;
+ __iomem void *mii;
+ const struct gswip_hw_info *hw_info;
+ const struct xway_gphy_match_data *gphy_fw_name_cfg;
+ struct dsa_switch *ds;
+ struct device *dev;
+ struct regmap *rcu_regmap;
+ struct gswip_vlan vlans[64];
+ int num_gphy_fw;
+ struct gswip_gphy_fw *gphy_fw;
+ u32 port_vlan_filter;
+ struct mutex pce_table_lock;
+ u16 version;
+};
+
+#endif /* __LANTIQ_GSWIP_H */
diff --git a/drivers/net/dsa/lantiq_pce.h b/drivers/net/dsa/lantiq/lantiq_pce.h
index e2be31f3672a..659f9a0638d9 100644
--- a/drivers/net/dsa/lantiq_pce.h
+++ b/drivers/net/dsa/lantiq/lantiq_pce.h
@@ -7,6 +7,8 @@
* Copyright (C) 2017 - 2018 Hauke Mehrtens <hauke@hauke-m.de>
*/
+#include "lantiq_gswip.h"
+
enum {
OUT_MAC0 = 0,
OUT_MAC1,
@@ -74,13 +76,6 @@ enum {
FLAG_NO, /*13*/
};
-struct gswip_pce_microcode {
- u16 val_3;
- u16 val_2;
- u16 val_1;
- u16 val_0;
-};
-
#define MC_ENTRY(val, msk, ns, out, len, type, flags, ipv4_len) \
{ val, msk, ((ns) << 10 | (out) << 4 | (len) >> 1),\
((len) & 1) << 15 | (type) << 13 | (flags) << 9 | (ipv4_len) << 8 }
diff --git a/drivers/net/dsa/realtek/realtek.h b/drivers/net/dsa/realtek/realtek.h
index a1b2e0b529d5..c03485a80d93 100644
--- a/drivers/net/dsa/realtek/realtek.h
+++ b/drivers/net/dsa/realtek/realtek.h
@@ -19,9 +19,6 @@
struct phylink_mac_ops;
struct realtek_ops;
-struct dentry;
-struct inode;
-struct file;
struct rtl8366_mib_counter {
unsigned int base;
diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c
index e6b802e3d844..81ea01a652b9 100644
--- a/drivers/net/ethernet/airoha/airoha_eth.c
+++ b/drivers/net/ethernet/airoha/airoha_eth.c
@@ -698,7 +698,8 @@ static int airoha_qdma_rx_process(struct airoha_queue *q, int budget)
reason = FIELD_GET(AIROHA_RXD4_PPE_CPU_REASON, msg1);
if (reason == PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED)
- airoha_ppe_check_skb(eth->ppe, q->skb, hash);
+ airoha_ppe_check_skb(&eth->ppe->dev, q->skb, hash,
+ false);
done++;
napi_gro_receive(&q->napi, q->skb);
@@ -2599,13 +2600,15 @@ static int airoha_dev_setup_tc_block_cb(enum tc_setup_type type,
void *type_data, void *cb_priv)
{
struct net_device *dev = cb_priv;
+ struct airoha_gdm_port *port = netdev_priv(dev);
+ struct airoha_eth *eth = port->qdma->eth;
if (!tc_can_offload(dev))
return -EOPNOTSUPP;
switch (type) {
case TC_SETUP_CLSFLOWER:
- return airoha_ppe_setup_tc_block_cb(dev, type_data);
+ return airoha_ppe_setup_tc_block_cb(&eth->ppe->dev, type_data);
case TC_SETUP_CLSMATCHALL:
return airoha_dev_tc_matchall(dev, type_data);
default:
diff --git a/drivers/net/ethernet/airoha/airoha_eth.h b/drivers/net/ethernet/airoha/airoha_eth.h
index a970b789cf23..77fd13d466dc 100644
--- a/drivers/net/ethernet/airoha/airoha_eth.h
+++ b/drivers/net/ethernet/airoha/airoha_eth.h
@@ -13,6 +13,7 @@
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/reset.h>
+#include <linux/soc/airoha/airoha_offload.h>
#include <net/dsa.h>
#define AIROHA_MAX_NUM_GDM_PORTS 4
@@ -229,10 +230,6 @@ struct airoha_hw_stats {
};
enum {
- PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED = 0x0f,
-};
-
-enum {
AIROHA_FOE_STATE_INVALID,
AIROHA_FOE_STATE_UNBIND,
AIROHA_FOE_STATE_BIND,
@@ -252,6 +249,10 @@ enum {
#define AIROHA_FOE_MAC_SMAC_ID GENMASK(20, 16)
#define AIROHA_FOE_MAC_PPPOE_ID GENMASK(15, 0)
+#define AIROHA_FOE_MAC_WDMA_QOS GENMASK(15, 12)
+#define AIROHA_FOE_MAC_WDMA_BAND BIT(11)
+#define AIROHA_FOE_MAC_WDMA_WCID GENMASK(10, 0)
+
struct airoha_foe_mac_info_common {
u16 vlan1;
u16 etype;
@@ -481,6 +482,13 @@ struct airoha_flow_table_entry {
unsigned long cookie;
};
+struct airoha_wdma_info {
+ u8 idx;
+ u8 queue;
+ u16 wcid;
+ u8 bss;
+};
+
/* RX queue to IRQ mapping: BIT(q) in IRQ(n) */
#define RX_IRQ0_BANK_PIN_MASK 0x839f
#define RX_IRQ1_BANK_PIN_MASK 0x7fe00000
@@ -535,6 +543,7 @@ struct airoha_gdm_port {
#define AIROHA_RXD4_FOE_ENTRY GENMASK(15, 0)
struct airoha_ppe {
+ struct airoha_ppe_dev dev;
struct airoha_eth *eth;
void *foe;
@@ -609,9 +618,9 @@ static inline bool airhoa_is_lan_gdm_port(struct airoha_gdm_port *port)
bool airoha_is_valid_gdm_port(struct airoha_eth *eth,
struct airoha_gdm_port *port);
-void airoha_ppe_check_skb(struct airoha_ppe *ppe, struct sk_buff *skb,
- u16 hash);
-int airoha_ppe_setup_tc_block_cb(struct net_device *dev, void *type_data);
+void airoha_ppe_check_skb(struct airoha_ppe_dev *dev, struct sk_buff *skb,
+ u16 hash, bool rx_wlan);
+int airoha_ppe_setup_tc_block_cb(struct airoha_ppe_dev *dev, void *type_data);
int airoha_ppe_init(struct airoha_eth *eth);
void airoha_ppe_deinit(struct airoha_eth *eth);
void airoha_ppe_init_upd_mem(struct airoha_gdm_port *port);
diff --git a/drivers/net/ethernet/airoha/airoha_npu.c b/drivers/net/ethernet/airoha/airoha_npu.c
index a802f95df99d..e1d131d6115c 100644
--- a/drivers/net/ethernet/airoha/airoha_npu.c
+++ b/drivers/net/ethernet/airoha/airoha_npu.c
@@ -13,7 +13,6 @@
#include <linux/regmap.h>
#include "airoha_eth.h"
-#include "airoha_npu.h"
#define NPU_EN7581_FIRMWARE_DATA "airoha/en7581_npu_data.bin"
#define NPU_EN7581_FIRMWARE_RV32 "airoha/en7581_npu_rv32.bin"
@@ -42,6 +41,22 @@
#define REG_CR_MBQ8_CTRL(_n) (NPU_MBOX_BASE_ADDR + 0x0b0 + ((_n) << 2))
#define REG_CR_NPU_MIB(_n) (NPU_MBOX_BASE_ADDR + 0x140 + ((_n) << 2))
+#define NPU_WLAN_BASE_ADDR 0x30d000
+
+#define REG_IRQ_STATUS (NPU_WLAN_BASE_ADDR + 0x030)
+#define REG_IRQ_RXDONE(_n) (NPU_WLAN_BASE_ADDR + ((_n) << 2) + 0x034)
+#define NPU_IRQ_RX_MASK(_n) ((_n) == 1 ? BIT(17) : BIT(16))
+
+#define REG_TX_BASE(_n) (NPU_WLAN_BASE_ADDR + ((_n) << 4) + 0x080)
+#define REG_TX_DSCP_NUM(_n) (NPU_WLAN_BASE_ADDR + ((_n) << 4) + 0x084)
+#define REG_TX_CPU_IDX(_n) (NPU_WLAN_BASE_ADDR + ((_n) << 4) + 0x088)
+#define REG_TX_DMA_IDX(_n) (NPU_WLAN_BASE_ADDR + ((_n) << 4) + 0x08c)
+
+#define REG_RX_BASE(_n) (NPU_WLAN_BASE_ADDR + ((_n) << 4) + 0x180)
+#define REG_RX_DSCP_NUM(_n) (NPU_WLAN_BASE_ADDR + ((_n) << 4) + 0x184)
+#define REG_RX_CPU_IDX(_n) (NPU_WLAN_BASE_ADDR + ((_n) << 4) + 0x188)
+#define REG_RX_DMA_IDX(_n) (NPU_WLAN_BASE_ADDR + ((_n) << 4) + 0x18c)
+
#define NPU_TIMER_BASE_ADDR 0x310100
#define REG_WDT_TIMER_CTRL(_n) (NPU_TIMER_BASE_ADDR + ((_n) * 0x100))
#define WDT_EN_MASK BIT(25)
@@ -124,6 +139,13 @@ struct ppe_mbox_data {
};
};
+struct wlan_mbox_data {
+ u32 ifindex:4;
+ u32 func_type:4;
+ u32 func_id;
+ DECLARE_FLEX_ARRAY(u8, d);
+};
+
static int airoha_npu_send_msg(struct airoha_npu *npu, int func_id,
void *p, int size)
{
@@ -390,6 +412,136 @@ out:
return err;
}
+static int airoha_npu_wlan_msg_send(struct airoha_npu *npu, int ifindex,
+ enum airoha_npu_wlan_set_cmd func_id,
+ void *data, int data_len, gfp_t gfp)
+{
+ struct wlan_mbox_data *wlan_data;
+ int err, len;
+
+ len = sizeof(*wlan_data) + data_len;
+ wlan_data = kzalloc(len, gfp);
+ if (!wlan_data)
+ return -ENOMEM;
+
+ wlan_data->ifindex = ifindex;
+ wlan_data->func_type = NPU_OP_SET;
+ wlan_data->func_id = func_id;
+ memcpy(wlan_data->d, data, data_len);
+
+ err = airoha_npu_send_msg(npu, NPU_FUNC_WIFI, wlan_data, len);
+ kfree(wlan_data);
+
+ return err;
+}
+
+static int airoha_npu_wlan_msg_get(struct airoha_npu *npu, int ifindex,
+ enum airoha_npu_wlan_get_cmd func_id,
+ void *data, int data_len, gfp_t gfp)
+{
+ struct wlan_mbox_data *wlan_data;
+ int err, len;
+
+ len = sizeof(*wlan_data) + data_len;
+ wlan_data = kzalloc(len, gfp);
+ if (!wlan_data)
+ return -ENOMEM;
+
+ wlan_data->ifindex = ifindex;
+ wlan_data->func_type = NPU_OP_GET;
+ wlan_data->func_id = func_id;
+
+ err = airoha_npu_send_msg(npu, NPU_FUNC_WIFI, wlan_data, len);
+ if (!err)
+ memcpy(data, wlan_data->d, data_len);
+ kfree(wlan_data);
+
+ return err;
+}
+
+static int
+airoha_npu_wlan_set_reserved_memory(struct airoha_npu *npu,
+ int ifindex, const char *name,
+ enum airoha_npu_wlan_set_cmd func_id)
+{
+ struct device *dev = npu->dev;
+ struct resource res;
+ int err;
+ u32 val;
+
+ err = of_reserved_mem_region_to_resource_byname(dev->of_node, name,
+ &res);
+ if (err)
+ return err;
+
+ val = res.start;
+ return airoha_npu_wlan_msg_send(npu, ifindex, func_id, &val,
+ sizeof(val), GFP_KERNEL);
+}
+
+static int airoha_npu_wlan_init_memory(struct airoha_npu *npu)
+{
+ enum airoha_npu_wlan_set_cmd cmd = WLAN_FUNC_SET_WAIT_NPU_BAND0_ONCPU;
+ u32 val = 0;
+ int err;
+
+ err = airoha_npu_wlan_msg_send(npu, 1, cmd, &val, sizeof(val),
+ GFP_KERNEL);
+ if (err)
+ return err;
+
+ cmd = WLAN_FUNC_SET_WAIT_TX_BUF_CHECK_ADDR;
+ err = airoha_npu_wlan_set_reserved_memory(npu, 0, "tx-bufid", cmd);
+ if (err)
+ return err;
+
+ cmd = WLAN_FUNC_SET_WAIT_PKT_BUF_ADDR;
+ err = airoha_npu_wlan_set_reserved_memory(npu, 0, "pkt", cmd);
+ if (err)
+ return err;
+
+ cmd = WLAN_FUNC_SET_WAIT_TX_PKT_BUF_ADDR;
+ err = airoha_npu_wlan_set_reserved_memory(npu, 0, "tx-pkt", cmd);
+ if (err)
+ return err;
+
+ cmd = WLAN_FUNC_SET_WAIT_IS_FORCE_TO_CPU;
+ return airoha_npu_wlan_msg_send(npu, 0, cmd, &val, sizeof(val),
+ GFP_KERNEL);
+}
+
+static u32 airoha_npu_wlan_queue_addr_get(struct airoha_npu *npu, int qid,
+ bool xmit)
+{
+ if (xmit)
+ return REG_TX_BASE(qid + 2);
+
+ return REG_RX_BASE(qid);
+}
+
+static void airoha_npu_wlan_irq_status_set(struct airoha_npu *npu, u32 val)
+{
+ regmap_write(npu->regmap, REG_IRQ_STATUS, val);
+}
+
+static u32 airoha_npu_wlan_irq_status_get(struct airoha_npu *npu, int q)
+{
+ u32 val;
+
+ regmap_read(npu->regmap, REG_IRQ_STATUS, &val);
+ return val;
+}
+
+static void airoha_npu_wlan_irq_enable(struct airoha_npu *npu, int q)
+{
+ regmap_set_bits(npu->regmap, REG_IRQ_RXDONE(q), NPU_IRQ_RX_MASK(q));
+}
+
+static void airoha_npu_wlan_irq_disable(struct airoha_npu *npu, int q)
+{
+ regmap_clear_bits(npu->regmap, REG_IRQ_RXDONE(q), NPU_IRQ_RX_MASK(q));
+}
+
struct airoha_npu *airoha_npu_get(struct device *dev, dma_addr_t *stats_addr)
{
struct platform_device *pdev;
@@ -493,6 +645,14 @@ static int airoha_npu_probe(struct platform_device *pdev)
npu->ops.ppe_deinit = airoha_npu_ppe_deinit;
npu->ops.ppe_flush_sram_entries = airoha_npu_ppe_flush_sram_entries;
npu->ops.ppe_foe_commit_entry = airoha_npu_foe_commit_entry;
+ npu->ops.wlan_init_reserved_memory = airoha_npu_wlan_init_memory;
+ npu->ops.wlan_send_msg = airoha_npu_wlan_msg_send;
+ npu->ops.wlan_get_msg = airoha_npu_wlan_msg_get;
+ npu->ops.wlan_get_queue_addr = airoha_npu_wlan_queue_addr_get;
+ npu->ops.wlan_set_irq_status = airoha_npu_wlan_irq_status_set;
+ npu->ops.wlan_get_irq_status = airoha_npu_wlan_irq_status_get;
+ npu->ops.wlan_enable_irq = airoha_npu_wlan_irq_enable;
+ npu->ops.wlan_disable_irq = airoha_npu_wlan_irq_disable;
npu->regmap = devm_regmap_init_mmio(dev, base, &regmap_config);
if (IS_ERR(npu->regmap))
@@ -529,6 +689,15 @@ static int airoha_npu_probe(struct platform_device *pdev)
INIT_WORK(&core->wdt_work, airoha_npu_wdt_work);
}
+ /* wlan IRQ lines */
+ for (i = 0; i < ARRAY_SIZE(npu->irqs); i++) {
+ irq = platform_get_irq(pdev, i + ARRAY_SIZE(npu->cores) + 1);
+ if (irq < 0)
+ return irq;
+
+ npu->irqs[i] = irq;
+ }
+
err = dma_set_coherent_mask(dev, DMA_BIT_MASK(32));
if (err)
return err;
@@ -550,8 +719,7 @@ static int airoha_npu_probe(struct platform_device *pdev)
usleep_range(1000, 2000);
/* enable NPU cores */
- /* do not start core3 since it is used for WiFi offloading */
- regmap_write(npu->regmap, REG_CR_BOOT_CONFIG, 0xf7);
+ regmap_write(npu->regmap, REG_CR_BOOT_CONFIG, 0xff);
regmap_write(npu->regmap, REG_CR_BOOT_TRIGGER, 0x1);
msleep(100);
diff --git a/drivers/net/ethernet/airoha/airoha_npu.h b/drivers/net/ethernet/airoha/airoha_npu.h
deleted file mode 100644
index 98ec3be74ce4..000000000000
--- a/drivers/net/ethernet/airoha/airoha_npu.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (c) 2025 AIROHA Inc
- * Author: Lorenzo Bianconi <lorenzo@kernel.org>
- */
-
-#define NPU_NUM_CORES 8
-
-struct airoha_npu {
- struct device *dev;
- struct regmap *regmap;
-
- struct airoha_npu_core {
- struct airoha_npu *npu;
- /* protect concurrent npu memory accesses */
- spinlock_t lock;
- struct work_struct wdt_work;
- } cores[NPU_NUM_CORES];
-
- struct airoha_foe_stats __iomem *stats;
-
- struct {
- int (*ppe_init)(struct airoha_npu *npu);
- int (*ppe_deinit)(struct airoha_npu *npu);
- int (*ppe_flush_sram_entries)(struct airoha_npu *npu,
- dma_addr_t foe_addr,
- int sram_num_entries);
- int (*ppe_foe_commit_entry)(struct airoha_npu *npu,
- dma_addr_t foe_addr,
- u32 entry_size, u32 hash,
- bool ppe2);
- } ops;
-};
-
-struct airoha_npu *airoha_npu_get(struct device *dev, dma_addr_t *stats_addr);
-void airoha_npu_put(struct airoha_npu *npu);
diff --git a/drivers/net/ethernet/airoha/airoha_ppe.c b/drivers/net/ethernet/airoha/airoha_ppe.c
index 88694b08afa1..78473527ff50 100644
--- a/drivers/net/ethernet/airoha/airoha_ppe.c
+++ b/drivers/net/ethernet/airoha/airoha_ppe.c
@@ -6,11 +6,12 @@
#include <linux/ip.h>
#include <linux/ipv6.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
#include <linux/rhashtable.h>
#include <net/ipv6.h>
#include <net/pkt_cls.h>
-#include "airoha_npu.h"
#include "airoha_regs.h"
#include "airoha_eth.h"
@@ -190,6 +191,31 @@ static int airoha_ppe_flow_mangle_ipv4(const struct flow_action_entry *act,
return 0;
}
+static int airoha_ppe_get_wdma_info(struct net_device *dev, const u8 *addr,
+ struct airoha_wdma_info *info)
+{
+ struct net_device_path_stack stack;
+ struct net_device_path *path;
+ int err;
+
+ if (!dev)
+ return -ENODEV;
+
+ err = dev_fill_forward_path(dev, addr, &stack);
+ if (err)
+ return err;
+
+ path = &stack.path[stack.num_paths - 1];
+ if (path->type != DEV_PATH_MTK_WDMA)
+ return -1;
+
+ info->idx = path->mtk_wdma.wdma_idx;
+ info->bss = path->mtk_wdma.bss;
+ info->wcid = path->mtk_wdma.wcid;
+
+ return 0;
+}
+
static int airoha_get_dsa_port(struct net_device **dev)
{
#if IS_ENABLED(CONFIG_NET_DSA)
@@ -220,9 +246,9 @@ static int airoha_ppe_foe_entry_prepare(struct airoha_eth *eth,
struct airoha_flow_data *data,
int l4proto)
{
- int dsa_port = airoha_get_dsa_port(&dev);
+ u32 qdata = FIELD_PREP(AIROHA_FOE_SHAPER_ID, 0x7f), ports_pad, val;
+ int wlan_etype = -EINVAL, dsa_port = airoha_get_dsa_port(&dev);
struct airoha_foe_mac_info_common *l2;
- u32 qdata, ports_pad, val;
u8 smac_id = 0xf;
memset(hwe, 0, sizeof(*hwe));
@@ -236,31 +262,47 @@ static int airoha_ppe_foe_entry_prepare(struct airoha_eth *eth,
AIROHA_FOE_IB1_BIND_TTL;
hwe->ib1 = val;
- val = FIELD_PREP(AIROHA_FOE_IB2_PORT_AG, 0x1f) |
- AIROHA_FOE_IB2_PSE_QOS;
- if (dsa_port >= 0)
- val |= FIELD_PREP(AIROHA_FOE_IB2_NBQ, dsa_port);
-
+ val = FIELD_PREP(AIROHA_FOE_IB2_PORT_AG, 0x1f);
if (dev) {
- struct airoha_gdm_port *port = netdev_priv(dev);
- u8 pse_port;
-
- if (!airoha_is_valid_gdm_port(eth, port))
- return -EINVAL;
-
- if (dsa_port >= 0)
- pse_port = port->id == 4 ? FE_PSE_PORT_GDM4 : port->id;
- else
- pse_port = 2; /* uplink relies on GDM2 loopback */
- val |= FIELD_PREP(AIROHA_FOE_IB2_PSE_PORT, pse_port);
-
- /* For downlink traffic consume SRAM memory for hw forwarding
- * descriptors queue.
- */
- if (airhoa_is_lan_gdm_port(port))
- val |= AIROHA_FOE_IB2_FAST_PATH;
-
- smac_id = port->id;
+ struct airoha_wdma_info info = {};
+
+ if (!airoha_ppe_get_wdma_info(dev, data->eth.h_dest, &info)) {
+ val |= FIELD_PREP(AIROHA_FOE_IB2_NBQ, info.idx) |
+ FIELD_PREP(AIROHA_FOE_IB2_PSE_PORT,
+ FE_PSE_PORT_CDM4);
+ qdata |= FIELD_PREP(AIROHA_FOE_ACTDP, info.bss);
+ wlan_etype = FIELD_PREP(AIROHA_FOE_MAC_WDMA_BAND,
+ info.idx) |
+ FIELD_PREP(AIROHA_FOE_MAC_WDMA_WCID,
+ info.wcid);
+ } else {
+ struct airoha_gdm_port *port = netdev_priv(dev);
+ u8 pse_port;
+
+ if (!airoha_is_valid_gdm_port(eth, port))
+ return -EINVAL;
+
+ if (dsa_port >= 0)
+ pse_port = port->id == 4 ? FE_PSE_PORT_GDM4
+ : port->id;
+ else
+ pse_port = 2; /* uplink relies on GDM2
+ * loopback
+ */
+
+ val |= FIELD_PREP(AIROHA_FOE_IB2_PSE_PORT, pse_port) |
+ AIROHA_FOE_IB2_PSE_QOS;
+ /* For downlink traffic consume SRAM memory for hw
+ * forwarding descriptors queue.
+ */
+ if (airhoa_is_lan_gdm_port(port))
+ val |= AIROHA_FOE_IB2_FAST_PATH;
+ if (dsa_port >= 0)
+ val |= FIELD_PREP(AIROHA_FOE_IB2_NBQ,
+ dsa_port);
+
+ smac_id = port->id;
+ }
}
if (is_multicast_ether_addr(data->eth.h_dest))
@@ -272,7 +314,6 @@ static int airoha_ppe_foe_entry_prepare(struct airoha_eth *eth,
if (type == PPE_PKT_TYPE_IPV6_ROUTE_3T)
hwe->ipv6.ports = ports_pad;
- qdata = FIELD_PREP(AIROHA_FOE_SHAPER_ID, 0x7f);
if (type == PPE_PKT_TYPE_BRIDGE) {
airoha_ppe_foe_set_bridge_addrs(&hwe->bridge, &data->eth);
hwe->bridge.data = qdata;
@@ -313,7 +354,9 @@ static int airoha_ppe_foe_entry_prepare(struct airoha_eth *eth,
l2->vlan2 = data->vlan.hdr[1].id;
}
- if (dsa_port >= 0) {
+ if (wlan_etype >= 0) {
+ l2->etype = wlan_etype;
+ } else if (dsa_port >= 0) {
l2->etype = BIT(dsa_port);
l2->etype |= !data->vlan.num ? BIT(15) : 0;
} else if (data->pppoe.num) {
@@ -490,6 +533,10 @@ static void airoha_ppe_foe_flow_stats_update(struct airoha_ppe *ppe,
meter = &hwe->ipv4.l2.meter;
}
+ pse_port = FIELD_GET(AIROHA_FOE_IB2_PSE_PORT, *ib2);
+ if (pse_port == FE_PSE_PORT_CDM4)
+ return;
+
airoha_ppe_foe_flow_stat_entry_reset(ppe, npu, index);
val = FIELD_GET(AIROHA_FOE_CHANNEL | AIROHA_FOE_QID, *data);
@@ -500,7 +547,6 @@ static void airoha_ppe_foe_flow_stats_update(struct airoha_ppe *ppe,
AIROHA_FOE_IB2_PSE_QOS | AIROHA_FOE_IB2_FAST_PATH);
*meter |= FIELD_PREP(AIROHA_FOE_TUNNEL_MTU, val);
- pse_port = FIELD_GET(AIROHA_FOE_IB2_PSE_PORT, *ib2);
nbq = pse_port == 1 ? 6 : 5;
*ib2 &= ~(AIROHA_FOE_IB2_NBQ | AIROHA_FOE_IB2_PSE_PORT |
AIROHA_FOE_IB2_PSE_QOS);
@@ -570,7 +616,7 @@ static bool airoha_ppe_foe_compare_entry(struct airoha_flow_table_entry *e,
static int airoha_ppe_foe_commit_entry(struct airoha_ppe *ppe,
struct airoha_foe_entry *e,
- u32 hash)
+ u32 hash, bool rx_wlan)
{
struct airoha_foe_entry *hwe = ppe->foe + hash * sizeof(*hwe);
u32 ts = airoha_ppe_get_timestamp(ppe);
@@ -593,7 +639,8 @@ static int airoha_ppe_foe_commit_entry(struct airoha_ppe *ppe,
goto unlock;
}
- airoha_ppe_foe_flow_stats_update(ppe, npu, hwe, hash);
+ if (!rx_wlan)
+ airoha_ppe_foe_flow_stats_update(ppe, npu, hwe, hash);
if (hash < PPE_SRAM_NUM_ENTRIES) {
dma_addr_t addr = ppe->foe_dma + hash * sizeof(*hwe);
@@ -619,7 +666,7 @@ static void airoha_ppe_foe_remove_flow(struct airoha_ppe *ppe,
e->data.ib1 &= ~AIROHA_FOE_IB1_BIND_STATE;
e->data.ib1 |= FIELD_PREP(AIROHA_FOE_IB1_BIND_STATE,
AIROHA_FOE_STATE_INVALID);
- airoha_ppe_foe_commit_entry(ppe, &e->data, e->hash);
+ airoha_ppe_foe_commit_entry(ppe, &e->data, e->hash, false);
e->hash = 0xffff;
}
if (e->type == FLOW_TYPE_L2_SUBFLOW) {
@@ -658,7 +705,7 @@ static void airoha_ppe_foe_flow_remove_entry(struct airoha_ppe *ppe,
static int
airoha_ppe_foe_commit_subflow_entry(struct airoha_ppe *ppe,
struct airoha_flow_table_entry *e,
- u32 hash)
+ u32 hash, bool rx_wlan)
{
u32 mask = AIROHA_FOE_IB1_BIND_PACKET_TYPE | AIROHA_FOE_IB1_BIND_UDP;
struct airoha_foe_entry *hwe_p, hwe;
@@ -699,14 +746,14 @@ airoha_ppe_foe_commit_subflow_entry(struct airoha_ppe *ppe,
}
hwe.bridge.data = e->data.bridge.data;
- airoha_ppe_foe_commit_entry(ppe, &hwe, hash);
+ airoha_ppe_foe_commit_entry(ppe, &hwe, hash, rx_wlan);
return 0;
}
static void airoha_ppe_foe_insert_entry(struct airoha_ppe *ppe,
struct sk_buff *skb,
- u32 hash)
+ u32 hash, bool rx_wlan)
{
struct airoha_flow_table_entry *e;
struct airoha_foe_bridge br = {};
@@ -739,7 +786,7 @@ static void airoha_ppe_foe_insert_entry(struct airoha_ppe *ppe,
if (!airoha_ppe_foe_compare_entry(e, hwe))
continue;
- airoha_ppe_foe_commit_entry(ppe, &e->data, hash);
+ airoha_ppe_foe_commit_entry(ppe, &e->data, hash, rx_wlan);
commit_done = true;
e->hash = hash;
}
@@ -751,7 +798,7 @@ static void airoha_ppe_foe_insert_entry(struct airoha_ppe *ppe,
e = rhashtable_lookup_fast(&ppe->l2_flows, &br,
airoha_l2_flow_table_params);
if (e)
- airoha_ppe_foe_commit_subflow_entry(ppe, e, hash);
+ airoha_ppe_foe_commit_subflow_entry(ppe, e, hash, rx_wlan);
unlock:
spin_unlock_bh(&ppe_lock);
}
@@ -890,11 +937,10 @@ static int airoha_ppe_entry_idle_time(struct airoha_ppe *ppe,
return airoha_ppe_get_entry_idle_time(ppe, e->data.ib1);
}
-static int airoha_ppe_flow_offload_replace(struct airoha_gdm_port *port,
+static int airoha_ppe_flow_offload_replace(struct airoha_eth *eth,
struct flow_cls_offload *f)
{
struct flow_rule *rule = flow_cls_offload_flow_rule(f);
- struct airoha_eth *eth = port->qdma->eth;
struct airoha_flow_table_entry *e;
struct airoha_flow_data data = {};
struct net_device *odev = NULL;
@@ -1091,10 +1137,9 @@ free_entry:
return err;
}
-static int airoha_ppe_flow_offload_destroy(struct airoha_gdm_port *port,
+static int airoha_ppe_flow_offload_destroy(struct airoha_eth *eth,
struct flow_cls_offload *f)
{
- struct airoha_eth *eth = port->qdma->eth;
struct airoha_flow_table_entry *e;
e = rhashtable_lookup(&eth->flow_table, &f->cookie,
@@ -1137,10 +1182,9 @@ void airoha_ppe_foe_entry_get_stats(struct airoha_ppe *ppe, u32 hash,
rcu_read_unlock();
}
-static int airoha_ppe_flow_offload_stats(struct airoha_gdm_port *port,
+static int airoha_ppe_flow_offload_stats(struct airoha_eth *eth,
struct flow_cls_offload *f)
{
- struct airoha_eth *eth = port->qdma->eth;
struct airoha_flow_table_entry *e;
u32 idle;
@@ -1164,16 +1208,16 @@ static int airoha_ppe_flow_offload_stats(struct airoha_gdm_port *port,
return 0;
}
-static int airoha_ppe_flow_offload_cmd(struct airoha_gdm_port *port,
+static int airoha_ppe_flow_offload_cmd(struct airoha_eth *eth,
struct flow_cls_offload *f)
{
switch (f->command) {
case FLOW_CLS_REPLACE:
- return airoha_ppe_flow_offload_replace(port, f);
+ return airoha_ppe_flow_offload_replace(eth, f);
case FLOW_CLS_DESTROY:
- return airoha_ppe_flow_offload_destroy(port, f);
+ return airoha_ppe_flow_offload_destroy(eth, f);
case FLOW_CLS_STATS:
- return airoha_ppe_flow_offload_stats(port, f);
+ return airoha_ppe_flow_offload_stats(eth, f);
default:
break;
}
@@ -1240,11 +1284,10 @@ error_npu_put:
return err;
}
-int airoha_ppe_setup_tc_block_cb(struct net_device *dev, void *type_data)
+int airoha_ppe_setup_tc_block_cb(struct airoha_ppe_dev *dev, void *type_data)
{
- struct airoha_gdm_port *port = netdev_priv(dev);
- struct flow_cls_offload *cls = type_data;
- struct airoha_eth *eth = port->qdma->eth;
+ struct airoha_ppe *ppe = dev->priv;
+ struct airoha_eth *eth = ppe->eth;
int err = 0;
mutex_lock(&flow_offload_mutex);
@@ -1252,16 +1295,17 @@ int airoha_ppe_setup_tc_block_cb(struct net_device *dev, void *type_data)
if (!eth->npu)
err = airoha_ppe_offload_setup(eth);
if (!err)
- err = airoha_ppe_flow_offload_cmd(port, cls);
+ err = airoha_ppe_flow_offload_cmd(eth, type_data);
mutex_unlock(&flow_offload_mutex);
return err;
}
-void airoha_ppe_check_skb(struct airoha_ppe *ppe, struct sk_buff *skb,
- u16 hash)
+void airoha_ppe_check_skb(struct airoha_ppe_dev *dev, struct sk_buff *skb,
+ u16 hash, bool rx_wlan)
{
+ struct airoha_ppe *ppe = dev->priv;
u16 now, diff;
if (hash > PPE_HASH_MASK)
@@ -1273,7 +1317,7 @@ void airoha_ppe_check_skb(struct airoha_ppe *ppe, struct sk_buff *skb,
return;
ppe->foe_check_time[hash] = now;
- airoha_ppe_foe_insert_entry(ppe, skb, hash);
+ airoha_ppe_foe_insert_entry(ppe, skb, hash, rx_wlan);
}
void airoha_ppe_init_upd_mem(struct airoha_gdm_port *port)
@@ -1297,6 +1341,61 @@ void airoha_ppe_init_upd_mem(struct airoha_gdm_port *port)
PPE_UPDMEM_WR_MASK | PPE_UPDMEM_REQ_MASK);
}
+struct airoha_ppe_dev *airoha_ppe_get_dev(struct device *dev)
+{
+ struct platform_device *pdev;
+ struct device_node *np;
+ struct airoha_eth *eth;
+
+ np = of_parse_phandle(dev->of_node, "airoha,eth", 0);
+ if (!np)
+ return ERR_PTR(-ENODEV);
+
+ pdev = of_find_device_by_node(np);
+ if (!pdev) {
+ dev_err(dev, "cannot find device node %s\n", np->name);
+ of_node_put(np);
+ return ERR_PTR(-ENODEV);
+ }
+ of_node_put(np);
+
+ if (!try_module_get(THIS_MODULE)) {
+ dev_err(dev, "failed to get the device driver module\n");
+ goto error_pdev_put;
+ }
+
+ eth = platform_get_drvdata(pdev);
+ if (!eth)
+ goto error_module_put;
+
+ if (!device_link_add(dev, &pdev->dev, DL_FLAG_AUTOREMOVE_SUPPLIER)) {
+ dev_err(&pdev->dev,
+ "failed to create device link to consumer %s\n",
+ dev_name(dev));
+ goto error_module_put;
+ }
+
+ return &eth->ppe->dev;
+
+error_module_put:
+ module_put(THIS_MODULE);
+error_pdev_put:
+ platform_device_put(pdev);
+
+ return ERR_PTR(-ENODEV);
+}
+EXPORT_SYMBOL_GPL(airoha_ppe_get_dev);
+
+void airoha_ppe_put_dev(struct airoha_ppe_dev *dev)
+{
+ struct airoha_ppe *ppe = dev->priv;
+ struct airoha_eth *eth = ppe->eth;
+
+ module_put(THIS_MODULE);
+ put_device(eth->dev);
+}
+EXPORT_SYMBOL_GPL(airoha_ppe_put_dev);
+
int airoha_ppe_init(struct airoha_eth *eth)
{
struct airoha_ppe *ppe;
@@ -1306,6 +1405,10 @@ int airoha_ppe_init(struct airoha_eth *eth)
if (!ppe)
return -ENOMEM;
+ ppe->dev.ops.setup_tc_block_cb = airoha_ppe_setup_tc_block_cb;
+ ppe->dev.ops.check_skb = airoha_ppe_check_skb;
+ ppe->dev.priv = ppe;
+
foe_size = PPE_NUM_ENTRIES * sizeof(struct airoha_foe_entry);
ppe->foe = dmam_alloc_coherent(eth->dev, foe_size, &ppe->foe_dma,
GFP_KERNEL);
diff --git a/drivers/net/ethernet/amd/pds_core/main.c b/drivers/net/ethernet/amd/pds_core/main.c
index 9b81e1c260c2..c7a2eff57632 100644
--- a/drivers/net/ethernet/amd/pds_core/main.c
+++ b/drivers/net/ethernet/amd/pds_core/main.c
@@ -280,7 +280,7 @@ static int pdsc_init_pf(struct pdsc *pdsc)
goto err_out_del_dev;
}
- hr = devl_health_reporter_create(dl, &pdsc_fw_reporter_ops, 0, pdsc);
+ hr = devl_health_reporter_create(dl, &pdsc_fw_reporter_ops, pdsc);
if (IS_ERR(hr)) {
devl_unlock(dl);
dev_warn(pdsc->dev, "Failed to create fw reporter: %pe\n", hr);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
index be0d2c7d08dc..b6e1b67a2d0e 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
@@ -329,6 +329,7 @@ static int xgbe_get_coalesce(struct net_device *netdev,
ec->rx_coalesce_usecs = pdata->rx_usecs;
ec->rx_max_coalesced_frames = pdata->rx_frames;
+ ec->tx_coalesce_usecs = pdata->tx_usecs;
ec->tx_max_coalesced_frames = pdata->tx_frames;
return 0;
@@ -342,7 +343,8 @@ static int xgbe_set_coalesce(struct net_device *netdev,
struct xgbe_prv_data *pdata = netdev_priv(netdev);
struct xgbe_hw_if *hw_if = &pdata->hw_if;
unsigned int rx_frames, rx_riwt, rx_usecs;
- unsigned int tx_frames;
+ unsigned int tx_frames, tx_usecs;
+ unsigned int jiffy_us = jiffies_to_usecs(1);
rx_riwt = hw_if->usec_to_riwt(pdata, ec->rx_coalesce_usecs);
rx_usecs = ec->rx_coalesce_usecs;
@@ -364,20 +366,42 @@ static int xgbe_set_coalesce(struct net_device *netdev,
return -EINVAL;
}
+ tx_usecs = ec->tx_coalesce_usecs;
tx_frames = ec->tx_max_coalesced_frames;
/* Check the bounds of values for Tx */
+ if (!tx_usecs) {
+ NL_SET_ERR_MSG_FMT_MOD(extack,
+ "tx-usecs must not be 0");
+ return -EINVAL;
+ }
+ if (tx_usecs > XGMAC_MAX_COAL_TX_TICK) {
+ NL_SET_ERR_MSG_FMT_MOD(extack, "tx-usecs is limited to %d usec",
+ XGMAC_MAX_COAL_TX_TICK);
+ return -EINVAL;
+ }
if (tx_frames > pdata->tx_desc_count) {
netdev_err(netdev, "tx-frames is limited to %d frames\n",
pdata->tx_desc_count);
return -EINVAL;
}
+ /* Round tx-usecs to nearest multiple of jiffy granularity */
+ if (tx_usecs % jiffy_us) {
+ tx_usecs = rounddown(tx_usecs, jiffy_us);
+ if (!tx_usecs)
+ tx_usecs = jiffy_us;
+ NL_SET_ERR_MSG_FMT_MOD(extack,
+ "tx-usecs rounded to %u usec due to jiffy granularity (%u usec)",
+ tx_usecs, jiffy_us);
+ }
+
pdata->rx_riwt = rx_riwt;
pdata->rx_usecs = rx_usecs;
pdata->rx_frames = rx_frames;
hw_if->config_rx_coalesce(pdata);
+ pdata->tx_usecs = tx_usecs;
pdata->tx_frames = tx_frames;
hw_if->config_tx_coalesce(pdata);
@@ -440,7 +464,7 @@ static int xgbe_set_rxfh(struct net_device *netdev,
{
struct xgbe_prv_data *pdata = netdev_priv(netdev);
struct xgbe_hw_if *hw_if = &pdata->hw_if;
- unsigned int ret;
+ int ret;
if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE &&
rxfh->hfunc != ETH_RSS_HASH_TOP) {
@@ -709,7 +733,7 @@ out:
}
static const struct ethtool_ops xgbe_ethtool_ops = {
- .supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS |
+ .supported_coalesce_params = ETHTOOL_COALESCE_USECS |
ETHTOOL_COALESCE_MAX_FRAMES,
.get_drvinfo = xgbe_get_drvinfo,
.get_msglevel = xgbe_get_msglevel,
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c
index d40011e8ddf2..65eb7b577b65 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c
@@ -70,7 +70,7 @@ static int xgbe_i2c_set_enable(struct xgbe_prv_data *pdata, bool enable)
static int xgbe_i2c_disable(struct xgbe_prv_data *pdata)
{
- unsigned int ret;
+ int ret;
ret = xgbe_i2c_set_enable(pdata, false);
if (ret) {
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
index 23c39e92e783..a56efc1bee33 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
@@ -2902,7 +2902,7 @@ static void xgbe_phy_sfp_setup(struct xgbe_prv_data *pdata)
static int xgbe_phy_int_mdio_reset(struct xgbe_prv_data *pdata)
{
struct xgbe_phy_data *phy_data = pdata->phy_data;
- unsigned int ret;
+ int ret;
ret = pdata->hw_if.set_gpio(pdata, phy_data->mdio_reset_gpio);
if (ret)
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h
index d7e03e292ec4..0fa80a238ac5 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe.h
@@ -168,6 +168,7 @@
/* Default coalescing parameters */
#define XGMAC_INIT_DMA_TX_USECS 1000
#define XGMAC_INIT_DMA_TX_FRAMES 25
+#define XGMAC_MAX_COAL_TX_TICK 100000
#define XGMAC_MAX_DMA_RIWT 0xff
#define XGMAC_INIT_DMA_RX_USECS 30
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 0daa08cecaf2..5f4f4d99f1e7 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -142,6 +142,7 @@ static const struct {
[NETXTREME_E_P5_VF] = { "Broadcom BCM5750X NetXtreme-E Ethernet Virtual Function" },
[NETXTREME_E_P5_VF_HV] = { "Broadcom BCM5750X NetXtreme-E Virtual Function for Hyper-V" },
[NETXTREME_E_P7_VF] = { "Broadcom BCM5760X Virtual Function" },
+ [NETXTREME_E_P7_VF_HV] = { "Broadcom BCM5760X Virtual Function for Hyper-V" },
};
static const struct pci_device_id bnxt_pci_tbl[] = {
@@ -217,6 +218,7 @@ static const struct pci_device_id bnxt_pci_tbl[] = {
{ PCI_VDEVICE(BROADCOM, 0x1808), .driver_data = NETXTREME_E_P5_VF_HV },
{ PCI_VDEVICE(BROADCOM, 0x1809), .driver_data = NETXTREME_E_P5_VF_HV },
{ PCI_VDEVICE(BROADCOM, 0x1819), .driver_data = NETXTREME_E_P7_VF },
+ { PCI_VDEVICE(BROADCOM, 0x181b), .driver_data = NETXTREME_E_P7_VF_HV },
{ PCI_VDEVICE(BROADCOM, 0xd800), .driver_data = NETXTREME_S_VF },
#endif
{ 0 }
@@ -315,7 +317,8 @@ static bool bnxt_vf_pciid(enum board_idx idx)
return (idx == NETXTREME_C_VF || idx == NETXTREME_E_VF ||
idx == NETXTREME_S_VF || idx == NETXTREME_C_VF_HV ||
idx == NETXTREME_E_VF_HV || idx == NETXTREME_E_P5_VF ||
- idx == NETXTREME_E_P5_VF_HV || idx == NETXTREME_E_P7_VF);
+ idx == NETXTREME_E_P5_VF_HV || idx == NETXTREME_E_P7_VF ||
+ idx == NETXTREME_E_P7_VF_HV);
}
#define DB_CP_REARM_FLAGS (DB_KEY_CP | DB_IDX_VALID)
@@ -3797,8 +3800,7 @@ static void bnxt_free_rx_rings(struct bnxt *bp)
xdp_rxq_info_unreg(&rxr->xdp_rxq);
page_pool_destroy(rxr->page_pool);
- if (bnxt_separate_head_pool(rxr))
- page_pool_destroy(rxr->head_pool);
+ page_pool_destroy(rxr->head_pool);
rxr->page_pool = rxr->head_pool = NULL;
kfree(rxr->rx_agg_bmap);
@@ -3845,6 +3847,8 @@ static int bnxt_alloc_rx_page_pool(struct bnxt *bp,
pool = page_pool_create(&pp);
if (IS_ERR(pool))
goto err_destroy_pp;
+ } else {
+ page_pool_get(pool);
}
rxr->head_pool = pool;
@@ -6969,6 +6973,8 @@ static int bnxt_hwrm_vnic_qcaps(struct bnxt *bp)
bp->rss_cap |= BNXT_RSS_CAP_ESP_V4_RSS_CAP;
if (flags & VNIC_QCAPS_RESP_FLAGS_RSS_IPSEC_ESP_SPI_IPV6_CAP)
bp->rss_cap |= BNXT_RSS_CAP_ESP_V6_RSS_CAP;
+ if (flags & VNIC_QCAPS_RESP_FLAGS_RSS_IPV6_FLOW_LABEL_CAP)
+ bp->rss_cap |= BNXT_RSS_CAP_IPV6_FLOW_LABEL_RSS_CAP;
if (flags & VNIC_QCAPS_RESP_FLAGS_RE_FLUSH_CAP)
bp->fw_cap |= BNXT_FW_CAP_VNIC_RE_FLUSH;
}
@@ -15922,8 +15928,7 @@ err_rxq_info_unreg:
xdp_rxq_info_unreg(&clone->xdp_rxq);
err_page_pool_destroy:
page_pool_destroy(clone->page_pool);
- if (bnxt_separate_head_pool(clone))
- page_pool_destroy(clone->head_pool);
+ page_pool_destroy(clone->head_pool);
clone->page_pool = NULL;
clone->head_pool = NULL;
return rc;
@@ -15941,8 +15946,7 @@ static void bnxt_queue_mem_free(struct net_device *dev, void *qmem)
xdp_rxq_info_unreg(&rxr->xdp_rxq);
page_pool_destroy(rxr->page_pool);
- if (bnxt_separate_head_pool(rxr))
- page_pool_destroy(rxr->head_pool);
+ page_pool_destroy(rxr->head_pool);
rxr->page_pool = NULL;
rxr->head_pool = NULL;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index fda0d3cc6227..1bb2a5de88cd 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -2130,6 +2130,7 @@ enum board_idx {
NETXTREME_E_P5_VF,
NETXTREME_E_P5_VF_HV,
NETXTREME_E_P7_VF,
+ NETXTREME_E_P7_VF_HV,
};
#define BNXT_TRACE_BUF_MAGIC_BYTE ((u8)0xbc)
@@ -2407,6 +2408,7 @@ struct bnxt {
#define BNXT_RSS_CAP_ESP_V4_RSS_CAP BIT(6)
#define BNXT_RSS_CAP_ESP_V6_RSS_CAP BIT(7)
#define BNXT_RSS_CAP_MULTI_RSS_CTX BIT(8)
+#define BNXT_RSS_CAP_IPV6_FLOW_LABEL_RSS_CAP BIT(9)
u8 rss_hash_key[HW_HASH_KEY_SIZE];
u8 rss_hash_key_valid:1;
@@ -2542,6 +2544,7 @@ struct bnxt {
u16 fw_rx_stats_ext_size;
u16 fw_tx_stats_ext_size;
u16 hw_ring_stats_size;
+ u16 pcie_stat_len;
u8 pri2cos_idx[8];
u8 pri2cos_valid;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
index 4c4581b0342e..43fb75806cd6 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
@@ -220,7 +220,7 @@ __bnxt_dl_reporter_create(struct bnxt *bp,
{
struct devlink_health_reporter *reporter;
- reporter = devlink_health_reporter_create(bp->dl, ops, 0, bp);
+ reporter = devlink_health_reporter_create(bp->dl, ops, bp);
if (IS_ERR(reporter)) {
netdev_warn(bp->dev, "Failed to create %s health reporter, rc = %ld\n",
ops->name, PTR_ERR(reporter));
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 1b37612b1c01..2830a2b17a27 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -1584,6 +1584,8 @@ static u64 get_ethtool_ipv6_rss(struct bnxt *bp)
{
if (bp->rss_hash_cfg & VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6)
return RXH_IP_SRC | RXH_IP_DST;
+ if (bp->rss_hash_cfg & VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6_FLOW_LABEL)
+ return RXH_IP_SRC | RXH_IP_DST | RXH_IP6_FL;
return 0;
}
@@ -1662,13 +1664,18 @@ static int bnxt_set_rxfh_fields(struct net_device *dev,
if (cmd->data == RXH_4TUPLE)
tuple = 4;
- else if (cmd->data == RXH_2TUPLE)
+ else if (cmd->data == RXH_2TUPLE ||
+ cmd->data == (RXH_2TUPLE | RXH_IP6_FL))
tuple = 2;
else if (!cmd->data)
tuple = 0;
else
return -EINVAL;
+ if (cmd->data & RXH_IP6_FL &&
+ !(bp->rss_cap & BNXT_RSS_CAP_IPV6_FLOW_LABEL_RSS_CAP))
+ return -EINVAL;
+
if (cmd->flow_type == TCP_V4_FLOW) {
rss_hash_cfg &= ~VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV4;
if (tuple == 4)
@@ -1732,10 +1739,15 @@ static int bnxt_set_rxfh_fields(struct net_device *dev,
case AH_V6_FLOW:
case ESP_V6_FLOW:
case IPV6_FLOW:
- if (tuple == 2)
+ rss_hash_cfg &= ~(VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6 |
+ VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6_FLOW_LABEL);
+ if (!tuple)
+ break;
+ if (cmd->data & RXH_IP6_FL)
+ rss_hash_cfg |=
+ VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6_FLOW_LABEL;
+ else if (tuple == 2)
rss_hash_cfg |= VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6;
- else if (!tuple)
- rss_hash_cfg &= ~VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6;
break;
}
@@ -2049,38 +2061,52 @@ static void bnxt_get_drvinfo(struct net_device *dev,
static int bnxt_get_regs_len(struct net_device *dev)
{
struct bnxt *bp = netdev_priv(dev);
- int reg_len;
if (!BNXT_PF(bp))
return -EOPNOTSUPP;
- reg_len = BNXT_PXP_REG_LEN;
+ return BNXT_PXP_REG_LEN + bp->pcie_stat_len;
+}
- if (bp->fw_cap & BNXT_FW_CAP_PCIE_STATS_SUPPORTED)
- reg_len += sizeof(struct pcie_ctx_hw_stats);
+static void *
+__bnxt_hwrm_pcie_qstats(struct bnxt *bp, struct hwrm_pcie_qstats_input *req)
+{
+ struct pcie_ctx_hw_stats_v2 *hw_pcie_stats;
+ dma_addr_t hw_pcie_stats_addr;
+ int rc;
- return reg_len;
+ hw_pcie_stats = hwrm_req_dma_slice(bp, req, sizeof(*hw_pcie_stats),
+ &hw_pcie_stats_addr);
+ if (!hw_pcie_stats)
+ return NULL;
+
+ req->pcie_stat_size = cpu_to_le16(sizeof(*hw_pcie_stats));
+ req->pcie_stat_host_addr = cpu_to_le64(hw_pcie_stats_addr);
+ rc = hwrm_req_send(bp, req);
+
+ return rc ? NULL : hw_pcie_stats;
}
#define BNXT_PCIE_32B_ENTRY(start, end) \
- { offsetof(struct pcie_ctx_hw_stats, start), \
- offsetof(struct pcie_ctx_hw_stats, end) }
+ { offsetof(struct pcie_ctx_hw_stats_v2, start),\
+ offsetof(struct pcie_ctx_hw_stats_v2, end) }
static const struct {
u16 start;
u16 end;
} bnxt_pcie_32b_entries[] = {
BNXT_PCIE_32B_ENTRY(pcie_ltssm_histogram[0], pcie_ltssm_histogram[3]),
+ BNXT_PCIE_32B_ENTRY(pcie_tl_credit_nph_histogram[0], unused_1),
+ BNXT_PCIE_32B_ENTRY(pcie_rd_latency_histogram[0], unused_2),
};
static void bnxt_get_regs(struct net_device *dev, struct ethtool_regs *regs,
void *_p)
{
- struct pcie_ctx_hw_stats *hw_pcie_stats;
+ struct hwrm_pcie_qstats_output *resp;
struct hwrm_pcie_qstats_input *req;
struct bnxt *bp = netdev_priv(dev);
- dma_addr_t hw_pcie_stats_addr;
- int rc;
+ u8 *src;
regs->version = 0;
if (!(bp->fw_dbg_cap & DBG_QCAPS_RESP_FLAGS_REG_ACCESS_RESTRICTED))
@@ -2092,24 +2118,21 @@ static void bnxt_get_regs(struct net_device *dev, struct ethtool_regs *regs,
if (hwrm_req_init(bp, req, HWRM_PCIE_QSTATS))
return;
- hw_pcie_stats = hwrm_req_dma_slice(bp, req, sizeof(*hw_pcie_stats),
- &hw_pcie_stats_addr);
- if (!hw_pcie_stats) {
- hwrm_req_drop(bp, req);
- return;
- }
-
- regs->version = 1;
- hwrm_req_hold(bp, req); /* hold on to slice */
- req->pcie_stat_size = cpu_to_le16(sizeof(*hw_pcie_stats));
- req->pcie_stat_host_addr = cpu_to_le64(hw_pcie_stats_addr);
- rc = hwrm_req_send(bp, req);
- if (!rc) {
+ resp = hwrm_req_hold(bp, req);
+ src = __bnxt_hwrm_pcie_qstats(bp, req);
+ if (src) {
u8 *dst = (u8 *)(_p + BNXT_PXP_REG_LEN);
- u8 *src = (u8 *)hw_pcie_stats;
- int i, j;
+ int i, j, len;
+
+ len = min(bp->pcie_stat_len, le16_to_cpu(resp->pcie_stat_size));
+ if (len <= sizeof(struct pcie_ctx_hw_stats))
+ regs->version = 1;
+ else if (len < sizeof(struct pcie_ctx_hw_stats_v2))
+ regs->version = 2;
+ else
+ regs->version = 3;
- for (i = 0, j = 0; i < sizeof(*hw_pcie_stats); ) {
+ for (i = 0, j = 0; i < len; ) {
if (i >= bnxt_pcie_32b_entries[j].start &&
i <= bnxt_pcie_32b_entries[j].end) {
u32 *dst32 = (u32 *)(dst + i);
@@ -5254,6 +5277,26 @@ static int bnxt_get_ts_info(struct net_device *dev,
return 0;
}
+static void bnxt_hwrm_pcie_qstats(struct bnxt *bp)
+{
+ struct hwrm_pcie_qstats_output *resp;
+ struct hwrm_pcie_qstats_input *req;
+
+ bp->pcie_stat_len = 0;
+ if (!(bp->fw_cap & BNXT_FW_CAP_PCIE_STATS_SUPPORTED))
+ return;
+
+ if (hwrm_req_init(bp, req, HWRM_PCIE_QSTATS))
+ return;
+
+ resp = hwrm_req_hold(bp, req);
+ if (__bnxt_hwrm_pcie_qstats(bp, req))
+ bp->pcie_stat_len = min_t(u16,
+ le16_to_cpu(resp->pcie_stat_size),
+ sizeof(struct pcie_ctx_hw_stats_v2));
+ hwrm_req_drop(bp, req);
+}
+
void bnxt_ethtool_init(struct bnxt *bp)
{
struct hwrm_selftest_qlist_output *resp;
@@ -5262,6 +5305,7 @@ void bnxt_ethtool_init(struct bnxt *bp)
struct net_device *dev = bp->dev;
int i, rc;
+ bnxt_hwrm_pcie_qstats(bp);
if (!(bp->fw_cap & BNXT_FW_CAP_PKG_VER))
bnxt_get_pkgver(dev);
diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index c9a5c8beb2fa..904954610611 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -184,6 +184,13 @@
#define GEM_DCFG8 0x029C /* Design Config 8 */
#define GEM_DCFG10 0x02A4 /* Design Config 10 */
#define GEM_DCFG12 0x02AC /* Design Config 12 */
+#define GEM_ENST_START_TIME_Q0 0x0800 /* ENST Q0 start time */
+#define GEM_ENST_START_TIME_Q1 0x0804 /* ENST Q1 start time */
+#define GEM_ENST_ON_TIME_Q0 0x0820 /* ENST Q0 on time */
+#define GEM_ENST_ON_TIME_Q1 0x0824 /* ENST Q1 on time */
+#define GEM_ENST_OFF_TIME_Q0 0x0840 /* ENST Q0 off time */
+#define GEM_ENST_OFF_TIME_Q1 0x0844 /* ENST Q1 off time */
+#define GEM_ENST_CONTROL 0x0880 /* ENST control register */
#define GEM_USX_CONTROL 0x0A80 /* High speed PCS control register */
#define GEM_USX_STATUS 0x0A88 /* High speed PCS status register */
@@ -221,6 +228,13 @@
#define GEM_IDR(hw_q) (0x0620 + ((hw_q) << 2))
#define GEM_IMR(hw_q) (0x0640 + ((hw_q) << 2))
+#define GEM_ENST_START_TIME(hw_q) (0x0800 + ((hw_q) << 2))
+#define GEM_ENST_ON_TIME(hw_q) (0x0820 + ((hw_q) << 2))
+#define GEM_ENST_OFF_TIME(hw_q) (0x0840 + ((hw_q) << 2))
+
+/* Bitfields in ENST_CONTROL */
+#define GEM_ENST_DISABLE_QUEUE_OFFSET 16
+
/* Bitfields in NCR */
#define MACB_LB_OFFSET 0 /* reserved */
#define MACB_LB_SIZE 1
@@ -554,6 +568,23 @@
#define GEM_HIGH_SPEED_OFFSET 26
#define GEM_HIGH_SPEED_SIZE 1
+/* Bitfields in ENST_START_TIME_Qx. */
+#define GEM_START_TIME_SEC_OFFSET 30
+#define GEM_START_TIME_SEC_SIZE 2
+#define GEM_START_TIME_NSEC_OFFSET 0
+#define GEM_START_TIME_NSEC_SIZE 30
+
+/* Bitfields in ENST_ON_TIME_Qx. */
+#define GEM_ON_TIME_OFFSET 0
+#define GEM_ON_TIME_SIZE 17
+
+/* Bitfields in ENST_OFF_TIME_Qx. */
+#define GEM_OFF_TIME_OFFSET 0
+#define GEM_OFF_TIME_SIZE 17
+
+/* Hardware ENST timing registers granularity */
+#define ENST_TIME_GRANULARITY_NS 8
+
/* Bitfields in USX_CONTROL. */
#define GEM_USX_CTRL_SPEED_OFFSET 14
#define GEM_USX_CTRL_SPEED_SIZE 3
@@ -739,6 +770,7 @@
#define MACB_CAPS_MIIONRGMII 0x00000200
#define MACB_CAPS_NEED_TSUCLK 0x00000400
#define MACB_CAPS_QUEUE_DISABLE 0x00000800
+#define MACB_CAPS_QBV 0x00001000
#define MACB_CAPS_PCS 0x01000000
#define MACB_CAPS_HIGH_SPEED 0x02000000
#define MACB_CAPS_CLK_HW_CHG 0x04000000
@@ -1219,6 +1251,11 @@ struct macb_queue {
unsigned int RBQP;
unsigned int RBQPH;
+ /* ENST register offsets for this queue */
+ unsigned int ENST_START_TIME;
+ unsigned int ENST_ON_TIME;
+ unsigned int ENST_OFF_TIME;
+
/* Lock to protect tx_head and tx_tail */
spinlock_t tx_ptr_lock;
unsigned int tx_head, tx_tail;
@@ -1397,6 +1434,19 @@ static inline bool gem_has_ptp(struct macb *bp)
return IS_ENABLED(CONFIG_MACB_USE_HWSTAMP) && (bp->caps & MACB_CAPS_GEM_HAS_PTP);
}
+/* ENST Helper functions */
+static inline u64 enst_ns_to_hw_units(size_t ns, u32 speed_mbps)
+{
+ return DIV_ROUND_UP((ns) * (speed_mbps),
+ (ENST_TIME_GRANULARITY_NS * 1000));
+}
+
+static inline u64 enst_max_hw_interval(u32 speed_mbps)
+{
+ return DIV_ROUND_UP(GENMASK(GEM_ON_TIME_SIZE - 1, 0) *
+ ENST_TIME_GRANULARITY_NS * 1000, (speed_mbps));
+}
+
/**
* struct macb_platform_data - platform data for MACB Ethernet used for PCI registration
* @pclk: platform clock
@@ -1407,4 +1457,21 @@ struct macb_platform_data {
struct clk *hclk;
};
+/**
+ * struct macb_queue_enst_config - Configuration for Enhanced Scheduled Traffic
+ * @start_time_mask: Bitmask representing the start time for the queue
+ * @on_time_bytes: "on" time nsec expressed in bytes
+ * @off_time_bytes: "off" time nsec expressed in bytes
+ * @queue_id: Identifier for the queue
+ *
+ * This structure holds the configuration parameters for an ENST queue,
+ * used to control time-based transmission scheduling in the MACB driver.
+ */
+struct macb_queue_enst_config {
+ u32 start_time_mask;
+ u32 on_time_bytes;
+ u32 off_time_bytes;
+ u8 queue_id;
+};
+
#endif /* _MACB_H */
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index c769b7dbd3ba..a6fbab388c19 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -36,6 +36,7 @@
#include <linux/reset.h>
#include <linux/firmware/xlnx-zynqmp.h>
#include <linux/inetdevice.h>
+#include <net/pkt_sched.h>
#include "macb.h"
/* This structure is only used for MACB on SiFive FU540 devices */
@@ -4088,6 +4089,223 @@ static void macb_restore_features(struct macb *bp)
macb_set_rxflow_feature(bp, features);
}
+static int macb_taprio_setup_replace(struct net_device *ndev,
+ struct tc_taprio_qopt_offload *conf)
+{
+ u64 total_on_time = 0, start_time_sec = 0, start_time = conf->base_time;
+ u32 configured_queues = 0, speed = 0, start_time_nsec;
+ struct macb_queue_enst_config *enst_queue;
+ struct tc_taprio_sched_entry *entry;
+ struct macb *bp = netdev_priv(ndev);
+ struct ethtool_link_ksettings kset;
+ struct macb_queue *queue;
+ size_t i;
+ int err;
+
+ if (conf->num_entries > bp->num_queues) {
+ netdev_err(ndev, "Too many TAPRIO entries: %zu > %d queues\n",
+ conf->num_entries, bp->num_queues);
+ return -EINVAL;
+ }
+
+ if (conf->base_time < 0) {
+ netdev_err(ndev, "Invalid base_time: must be 0 or positive, got %lld\n",
+ conf->base_time);
+ return -ERANGE;
+ }
+
+ /* Get the current link speed */
+ err = phylink_ethtool_ksettings_get(bp->phylink, &kset);
+ if (unlikely(err)) {
+ netdev_err(ndev, "Failed to get link settings: %d\n", err);
+ return err;
+ }
+
+ speed = kset.base.speed;
+ if (unlikely(speed <= 0)) {
+ netdev_err(ndev, "Invalid speed: %d\n", speed);
+ return -EINVAL;
+ }
+
+ enst_queue = kcalloc(conf->num_entries, sizeof(*enst_queue), GFP_KERNEL);
+ if (unlikely(!enst_queue))
+ return -ENOMEM;
+
+ /* Pre-validate all entries before making any hardware changes */
+ for (i = 0; i < conf->num_entries; i++) {
+ entry = &conf->entries[i];
+
+ if (entry->command != TC_TAPRIO_CMD_SET_GATES) {
+ netdev_err(ndev, "Entry %zu: unsupported command %d\n",
+ i, entry->command);
+ err = -EOPNOTSUPP;
+ goto cleanup;
+ }
+
+ /* Validate gate_mask: must be nonzero, single queue, and within range */
+ if (!is_power_of_2(entry->gate_mask)) {
+ netdev_err(ndev, "Entry %zu: gate_mask 0x%x is not a power of 2 (only one queue per entry allowed)\n",
+ i, entry->gate_mask);
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* gate_mask must not select queues outside the valid queue_mask */
+ if (entry->gate_mask & ~bp->queue_mask) {
+ netdev_err(ndev, "Entry %zu: gate_mask 0x%x exceeds queue range (max_queues=%d)\n",
+ i, entry->gate_mask, bp->num_queues);
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* Check for start time limits */
+ start_time_sec = start_time;
+ start_time_nsec = do_div(start_time_sec, NSEC_PER_SEC);
+ if (start_time_sec > GENMASK(GEM_START_TIME_SEC_SIZE - 1, 0)) {
+ netdev_err(ndev, "Entry %zu: Start time %llu s exceeds hardware limit\n",
+ i, start_time_sec);
+ err = -ERANGE;
+ goto cleanup;
+ }
+
+ /* Check for on time limit */
+ if (entry->interval > enst_max_hw_interval(speed)) {
+ netdev_err(ndev, "Entry %zu: interval %u ns exceeds hardware limit %llu ns\n",
+ i, entry->interval, enst_max_hw_interval(speed));
+ err = -ERANGE;
+ goto cleanup;
+ }
+
+ /* Check for off time limit*/
+ if ((conf->cycle_time - entry->interval) > enst_max_hw_interval(speed)) {
+ netdev_err(ndev, "Entry %zu: off_time %llu ns exceeds hardware limit %llu ns\n",
+ i, conf->cycle_time - entry->interval,
+ enst_max_hw_interval(speed));
+ err = -ERANGE;
+ goto cleanup;
+ }
+
+ enst_queue[i].queue_id = order_base_2(entry->gate_mask);
+ enst_queue[i].start_time_mask =
+ (start_time_sec << GEM_START_TIME_SEC_OFFSET) |
+ start_time_nsec;
+ enst_queue[i].on_time_bytes =
+ enst_ns_to_hw_units(entry->interval, speed);
+ enst_queue[i].off_time_bytes =
+ enst_ns_to_hw_units(conf->cycle_time - entry->interval, speed);
+
+ configured_queues |= entry->gate_mask;
+ total_on_time += entry->interval;
+ start_time += entry->interval;
+ }
+
+ /* Check total interval doesn't exceed cycle time */
+ if (total_on_time > conf->cycle_time) {
+ netdev_err(ndev, "Total ON %llu ns exceeds cycle time %llu ns\n",
+ total_on_time, conf->cycle_time);
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ netdev_dbg(ndev, "TAPRIO setup: %zu entries, base_time=%lld ns, cycle_time=%llu ns\n",
+ conf->num_entries, conf->base_time, conf->cycle_time);
+
+ /* All validations passed - proceed with hardware configuration */
+ scoped_guard(spinlock_irqsave, &bp->lock) {
+ /* Disable ENST queues if running before configuring */
+ gem_writel(bp, ENST_CONTROL,
+ bp->queue_mask << GEM_ENST_DISABLE_QUEUE_OFFSET);
+
+ for (i = 0; i < conf->num_entries; i++) {
+ queue = &bp->queues[enst_queue[i].queue_id];
+ /* Configure queue timing registers */
+ queue_writel(queue, ENST_START_TIME,
+ enst_queue[i].start_time_mask);
+ queue_writel(queue, ENST_ON_TIME,
+ enst_queue[i].on_time_bytes);
+ queue_writel(queue, ENST_OFF_TIME,
+ enst_queue[i].off_time_bytes);
+ }
+
+ /* Enable ENST for all configured queues in one write */
+ gem_writel(bp, ENST_CONTROL, configured_queues);
+ }
+
+ netdev_info(ndev, "TAPRIO configuration completed successfully: %zu entries, %d queues configured\n",
+ conf->num_entries, hweight32(configured_queues));
+
+cleanup:
+ kfree(enst_queue);
+ return err;
+}
+
+static void macb_taprio_destroy(struct net_device *ndev)
+{
+ struct macb *bp = netdev_priv(ndev);
+ struct macb_queue *queue;
+ u32 enst_disable_mask;
+ unsigned int q;
+
+ netdev_reset_tc(ndev);
+ enst_disable_mask = bp->queue_mask << GEM_ENST_DISABLE_QUEUE_OFFSET;
+
+ scoped_guard(spinlock_irqsave, &bp->lock) {
+ /* Single disable command for all queues */
+ gem_writel(bp, ENST_CONTROL, enst_disable_mask);
+
+ /* Clear all queue ENST registers in batch */
+ for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
+ queue_writel(queue, ENST_START_TIME, 0);
+ queue_writel(queue, ENST_ON_TIME, 0);
+ queue_writel(queue, ENST_OFF_TIME, 0);
+ }
+ }
+ netdev_info(ndev, "TAPRIO destroy: All gates disabled\n");
+}
+
+static int macb_setup_taprio(struct net_device *ndev,
+ struct tc_taprio_qopt_offload *taprio)
+{
+ struct macb *bp = netdev_priv(ndev);
+ int err = 0;
+
+ if (unlikely(!(ndev->hw_features & NETIF_F_HW_TC)))
+ return -EOPNOTSUPP;
+
+ /* Check if Device is in runtime suspend */
+ if (unlikely(pm_runtime_suspended(&bp->pdev->dev))) {
+ netdev_err(ndev, "Device is in runtime suspend\n");
+ return -EOPNOTSUPP;
+ }
+
+ switch (taprio->cmd) {
+ case TAPRIO_CMD_REPLACE:
+ err = macb_taprio_setup_replace(ndev, taprio);
+ break;
+ case TAPRIO_CMD_DESTROY:
+ macb_taprio_destroy(ndev);
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ }
+
+ return err;
+}
+
+static int macb_setup_tc(struct net_device *dev, enum tc_setup_type type,
+ void *type_data)
+{
+ if (!dev || !type_data)
+ return -EINVAL;
+
+ switch (type) {
+ case TC_SETUP_QDISC_TAPRIO:
+ return macb_setup_taprio(dev, type_data);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
static const struct net_device_ops macb_netdev_ops = {
.ndo_open = macb_open,
.ndo_stop = macb_close,
@@ -4105,6 +4323,7 @@ static const struct net_device_ops macb_netdev_ops = {
.ndo_features_check = macb_features_check,
.ndo_hwtstamp_set = macb_hwtstamp_set,
.ndo_hwtstamp_get = macb_hwtstamp_get,
+ .ndo_setup_tc = macb_setup_tc,
};
/* Configure peripheral capabilities according to device tree
@@ -4331,6 +4550,10 @@ static int macb_init(struct platform_device *pdev)
#endif
}
+ queue->ENST_START_TIME = GEM_ENST_START_TIME(hw_q);
+ queue->ENST_ON_TIME = GEM_ENST_ON_TIME(hw_q);
+ queue->ENST_OFF_TIME = GEM_ENST_OFF_TIME(hw_q);
+
/* get irq: here we use the linux queue index, not the hardware
* queue index. the queue irq definitions in the device tree
* must remove the optional gaps that could exist in the
@@ -4383,6 +4606,10 @@ static int macb_init(struct platform_device *pdev)
dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
if (bp->caps & MACB_CAPS_SG_DISABLED)
dev->hw_features &= ~NETIF_F_SG;
+ /* Enable HW_TC if hardware supports QBV */
+ if (bp->caps & MACB_CAPS_QBV)
+ dev->hw_features |= NETIF_F_HW_TC;
+
dev->features = dev->hw_features;
/* Check RX Flow Filters support.
@@ -4826,36 +5053,45 @@ static unsigned long fu540_macb_tx_recalc_rate(struct clk_hw *hw,
return mgmt->rate;
}
-static long fu540_macb_tx_round_rate(struct clk_hw *hw, unsigned long rate,
- unsigned long *parent_rate)
-{
- if (WARN_ON(rate < 2500000))
- return 2500000;
- else if (rate == 2500000)
- return 2500000;
- else if (WARN_ON(rate < 13750000))
- return 2500000;
- else if (WARN_ON(rate < 25000000))
- return 25000000;
- else if (rate == 25000000)
- return 25000000;
- else if (WARN_ON(rate < 75000000))
- return 25000000;
- else if (WARN_ON(rate < 125000000))
- return 125000000;
- else if (rate == 125000000)
- return 125000000;
-
- WARN_ON(rate > 125000000);
+static int fu540_macb_tx_determine_rate(struct clk_hw *hw,
+ struct clk_rate_request *req)
+{
+ if (WARN_ON(req->rate < 2500000))
+ req->rate = 2500000;
+ else if (req->rate == 2500000)
+ req->rate = 2500000;
+ else if (WARN_ON(req->rate < 13750000))
+ req->rate = 2500000;
+ else if (WARN_ON(req->rate < 25000000))
+ req->rate = 25000000;
+ else if (req->rate == 25000000)
+ req->rate = 25000000;
+ else if (WARN_ON(req->rate < 75000000))
+ req->rate = 25000000;
+ else if (WARN_ON(req->rate < 125000000))
+ req->rate = 125000000;
+ else if (req->rate == 125000000)
+ req->rate = 125000000;
+ else if (WARN_ON(req->rate > 125000000))
+ req->rate = 125000000;
+ else
+ req->rate = 125000000;
- return 125000000;
+ return 0;
}
static int fu540_macb_tx_set_rate(struct clk_hw *hw, unsigned long rate,
unsigned long parent_rate)
{
- rate = fu540_macb_tx_round_rate(hw, rate, &parent_rate);
- if (rate != 125000000)
+ struct clk_rate_request req;
+ int ret;
+
+ clk_hw_init_rate_request(hw, &req, rate);
+ ret = fu540_macb_tx_determine_rate(hw, &req);
+ if (ret != 0)
+ return ret;
+
+ if (req.rate != 125000000)
iowrite32(1, mgmt->reg);
else
iowrite32(0, mgmt->reg);
@@ -4866,7 +5102,7 @@ static int fu540_macb_tx_set_rate(struct clk_hw *hw, unsigned long rate,
static const struct clk_ops fu540_c000_ops = {
.recalc_rate = fu540_macb_tx_recalc_rate,
- .round_rate = fu540_macb_tx_round_rate,
+ .determine_rate = fu540_macb_tx_determine_rate,
.set_rate = fu540_macb_tx_set_rate,
};
@@ -5127,8 +5363,9 @@ static const struct macb_config sama7g5_emac_config = {
static const struct macb_config versal_config = {
.caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_JUMBO |
- MACB_CAPS_GEM_HAS_PTP | MACB_CAPS_BD_RD_PREFETCH | MACB_CAPS_NEED_TSUCLK |
- MACB_CAPS_QUEUE_DISABLE,
+ MACB_CAPS_GEM_HAS_PTP | MACB_CAPS_BD_RD_PREFETCH |
+ MACB_CAPS_NEED_TSUCLK | MACB_CAPS_QUEUE_DISABLE |
+ MACB_CAPS_QBV,
.dma_burst_length = 16,
.clk_init = macb_clk_init,
.init = init_reset_optional,
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
index 6f6525983130..4ee970f3bad6 100644
--- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
+++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
@@ -171,7 +171,7 @@ static void chtls_purge_receive_queue(struct sock *sk)
struct sk_buff *skb;
while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
- skb_dst_set(skb, (void *)NULL);
+ skb_dstref_steal(skb);
kfree_skb(skb);
}
}
@@ -194,7 +194,7 @@ static void chtls_purge_recv_queue(struct sock *sk)
struct sk_buff *skb;
while ((skb = __skb_dequeue(&tlsk->sk_recv_queue)) != NULL) {
- skb_dst_set(skb, NULL);
+ skb_dstref_steal(skb);
kfree_skb(skb);
}
}
@@ -505,7 +505,7 @@ static void reset_listen_child(struct sock *child)
chtls_send_reset(child, CPL_ABORT_SEND_RST, skb);
sock_orphan(child);
- INC_ORPHAN_COUNT(child);
+ tcp_orphan_count_inc();
if (child->sk_state == TCP_CLOSE)
inet_csk_destroy_sock(child);
}
@@ -870,7 +870,7 @@ static void do_abort_syn_rcv(struct sock *child, struct sock *parent)
* created only after 3 way handshake is done.
*/
sock_orphan(child);
- INC_ORPHAN_COUNT(child);
+ tcp_orphan_count_inc();
chtls_release_resources(child);
chtls_conn_done(child);
} else {
@@ -951,6 +951,7 @@ static unsigned int chtls_select_mss(const struct chtls_sock *csk,
struct tcp_sock *tp;
unsigned int mss;
struct sock *sk;
+ u16 user_mss;
mss = ntohs(req->tcpopt.mss);
sk = csk->sk;
@@ -969,8 +970,9 @@ static unsigned int chtls_select_mss(const struct chtls_sock *csk,
tcpoptsz += round_up(TCPOLEN_TIMESTAMP, 4);
tp->advmss = dst_metric_advmss(dst);
- if (USER_MSS(tp) && tp->advmss > USER_MSS(tp))
- tp->advmss = USER_MSS(tp);
+ user_mss = USER_MSS(tp);
+ if (user_mss && tp->advmss > user_mss)
+ tp->advmss = user_mss;
if (tp->advmss > pmtu - iphdrsz)
tp->advmss = pmtu - iphdrsz;
if (mss && tp->advmss > mss)
@@ -1734,7 +1736,7 @@ static int chtls_rx_data(struct chtls_dev *cdev, struct sk_buff *skb)
pr_err("can't find conn. for hwtid %u.\n", hwtid);
return -EINVAL;
}
- skb_dst_set(skb, NULL);
+ skb_dstref_steal(skb);
process_cpl_msg(chtls_recv_data, sk, skb);
return 0;
}
@@ -1786,7 +1788,7 @@ static int chtls_rx_pdu(struct chtls_dev *cdev, struct sk_buff *skb)
pr_err("can't find conn. for hwtid %u.\n", hwtid);
return -EINVAL;
}
- skb_dst_set(skb, NULL);
+ skb_dstref_steal(skb);
process_cpl_msg(chtls_recv_pdu, sk, skb);
return 0;
}
@@ -1855,7 +1857,7 @@ static int chtls_rx_cmp(struct chtls_dev *cdev, struct sk_buff *skb)
pr_err("can't find conn. for hwtid %u.\n", hwtid);
return -EINVAL;
}
- skb_dst_set(skb, NULL);
+ skb_dstref_steal(skb);
process_cpl_msg(chtls_rx_hdr, sk, skb);
return 0;
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.h b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.h
index f61ca657601c..29ceff5a5fcb 100644
--- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.h
+++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.h
@@ -90,12 +90,11 @@ struct deferred_skb_cb {
#define SND_WSCALE(tp) ((tp)->rx_opt.snd_wscale)
#define RCV_WSCALE(tp) ((tp)->rx_opt.rcv_wscale)
-#define USER_MSS(tp) ((tp)->rx_opt.user_mss)
+#define USER_MSS(tp) (READ_ONCE((tp)->rx_opt.user_mss))
#define TS_RECENT_STAMP(tp) ((tp)->rx_opt.ts_recent_stamp)
#define WSCALE_OK(tp) ((tp)->rx_opt.wscale_ok)
#define TSTAMP_OK(tp) ((tp)->rx_opt.tstamp_ok)
#define SACK_OK(tp) ((tp)->rx_opt.sack_ok)
-#define INC_ORPHAN_COUNT(sk) this_cpu_inc(*(sk)->sk_prot->orphan_count)
/* TLS SKB */
#define skb_ulp_tls_inline(skb) (ULP_SKB_CB(skb)->ulp.tls.ofld)
@@ -171,14 +170,14 @@ static inline void chtls_set_req_addr(struct request_sock *oreq,
static inline void chtls_free_skb(struct sock *sk, struct sk_buff *skb)
{
- skb_dst_set(skb, NULL);
+ skb_dstref_steal(skb);
__skb_unlink(skb, &sk->sk_receive_queue);
__kfree_skb(skb);
}
static inline void chtls_kfree_skb(struct sock *sk, struct sk_buff *skb)
{
- skb_dst_set(skb, NULL);
+ skb_dstref_steal(skb);
__skb_unlink(skb, &sk->sk_receive_queue);
kfree_skb(skb);
}
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c
index 465fa8077964..4036db466e18 100644
--- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c
+++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c
@@ -1434,7 +1434,7 @@ static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
continue;
found_ok_skb:
if (!skb->len) {
- skb_dst_set(skb, NULL);
+ skb_dstref_steal(skb);
__skb_unlink(skb, &sk->sk_receive_queue);
kfree_skb(skb);
diff --git a/drivers/net/ethernet/freescale/enetc/Kconfig b/drivers/net/ethernet/freescale/enetc/Kconfig
index 54b0f0a5a6bb..117038104b69 100644
--- a/drivers/net/ethernet/freescale/enetc/Kconfig
+++ b/drivers/net/ethernet/freescale/enetc/Kconfig
@@ -28,6 +28,7 @@ config NXP_NTMP
config FSL_ENETC
tristate "ENETC PF driver"
+ depends on PTP_1588_CLOCK_OPTIONAL
depends on PCI_MSI
select FSL_ENETC_CORE
select FSL_ENETC_IERB
@@ -45,6 +46,7 @@ config FSL_ENETC
config NXP_ENETC4
tristate "ENETC4 PF driver"
+ depends on PTP_1588_CLOCK_OPTIONAL
depends on PCI_MSI
select FSL_ENETC_CORE
select FSL_ENETC_MDIO
@@ -62,6 +64,7 @@ config NXP_ENETC4
config FSL_ENETC_VF
tristate "ENETC VF driver"
+ depends on PTP_1588_CLOCK_OPTIONAL
depends on PCI_MSI
select FSL_ENETC_CORE
select FSL_ENETC_MDIO
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index e4287725832e..aae462a0cf5a 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -221,22 +221,111 @@ static void enetc_unwind_tx_frame(struct enetc_bdr *tx_ring, int count, int i)
}
}
+static void enetc_set_one_step_ts(struct enetc_si *si, bool udp, int offset)
+{
+ u32 val = ENETC_PM0_SINGLE_STEP_EN;
+
+ val |= ENETC_SET_SINGLE_STEP_OFFSET(offset);
+ if (udp)
+ val |= ENETC_PM0_SINGLE_STEP_CH;
+
+ /* The "Correction" field of a packet is updated based on the
+ * current time and the timestamp provided
+ */
+ enetc_port_mac_wr(si, ENETC_PM0_SINGLE_STEP, val);
+}
+
+static void enetc4_set_one_step_ts(struct enetc_si *si, bool udp, int offset)
+{
+ u32 val = PM_SINGLE_STEP_EN;
+
+ val |= PM_SINGLE_STEP_OFFSET_SET(offset);
+ if (udp)
+ val |= PM_SINGLE_STEP_CH;
+
+ enetc_port_mac_wr(si, ENETC4_PM_SINGLE_STEP(0), val);
+}
+
+static u32 enetc_update_ptp_sync_msg(struct enetc_ndev_priv *priv,
+ struct sk_buff *skb, bool csum_offload)
+{
+ struct enetc_skb_cb *enetc_cb = ENETC_SKB_CB(skb);
+ u16 tstamp_off = enetc_cb->origin_tstamp_off;
+ u16 corr_off = enetc_cb->correction_off;
+ struct enetc_si *si = priv->si;
+ struct enetc_hw *hw = &si->hw;
+ __be32 new_sec_l, new_nsec;
+ __be16 new_sec_h;
+ u32 lo, hi, nsec;
+ u8 *data;
+ u64 sec;
+
+ lo = enetc_rd_hot(hw, ENETC_SICTR0);
+ hi = enetc_rd_hot(hw, ENETC_SICTR1);
+ sec = (u64)hi << 32 | lo;
+ nsec = do_div(sec, 1000000000);
+
+ /* Update originTimestamp field of Sync packet
+ * - 48 bits seconds field
+ * - 32 bits nanseconds field
+ *
+ * In addition, if csum_offload is false, the UDP checksum needs
+ * to be updated by software after updating originTimestamp field,
+ * otherwise the hardware will calculate the wrong checksum when
+ * updating the correction field and update it to the packet.
+ */
+
+ data = skb_mac_header(skb);
+ new_sec_h = htons((sec >> 32) & 0xffff);
+ new_sec_l = htonl(sec & 0xffffffff);
+ new_nsec = htonl(nsec);
+ if (enetc_cb->udp && !csum_offload) {
+ struct udphdr *uh = udp_hdr(skb);
+ __be32 old_sec_l, old_nsec;
+ __be16 old_sec_h;
+
+ old_sec_h = *(__be16 *)(data + tstamp_off);
+ inet_proto_csum_replace2(&uh->check, skb, old_sec_h,
+ new_sec_h, false);
+
+ old_sec_l = *(__be32 *)(data + tstamp_off + 2);
+ inet_proto_csum_replace4(&uh->check, skb, old_sec_l,
+ new_sec_l, false);
+
+ old_nsec = *(__be32 *)(data + tstamp_off + 6);
+ inet_proto_csum_replace4(&uh->check, skb, old_nsec,
+ new_nsec, false);
+ }
+
+ *(__be16 *)(data + tstamp_off) = new_sec_h;
+ *(__be32 *)(data + tstamp_off + 2) = new_sec_l;
+ *(__be32 *)(data + tstamp_off + 6) = new_nsec;
+
+ /* Configure single-step register */
+ if (is_enetc_rev1(si))
+ enetc_set_one_step_ts(si, enetc_cb->udp, corr_off);
+ else
+ enetc4_set_one_step_ts(si, enetc_cb->udp, corr_off);
+
+ return lo & ENETC_TXBD_TSTAMP;
+}
+
static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb)
{
bool do_vlan, do_onestep_tstamp = false, do_twostep_tstamp = false;
struct enetc_ndev_priv *priv = netdev_priv(tx_ring->ndev);
- struct enetc_hw *hw = &priv->si->hw;
+ struct enetc_skb_cb *enetc_cb = ENETC_SKB_CB(skb);
struct enetc_tx_swbd *tx_swbd;
int len = skb_headlen(skb);
union enetc_tx_bd temp_bd;
- u8 msgtype, twostep, udp;
+ bool csum_offload = false;
union enetc_tx_bd *txbd;
- u16 offset1, offset2;
int i, count = 0;
skb_frag_t *frag;
unsigned int f;
dma_addr_t dma;
u8 flags = 0;
+ u32 tstamp;
enetc_clear_tx_bd(&temp_bd);
if (skb->ip_summed == CHECKSUM_PARTIAL) {
@@ -256,11 +345,19 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb)
temp_bd.l4_aux = FIELD_PREP(ENETC_TX_BD_L4T,
ENETC_TXBD_L4T_UDP);
flags |= ENETC_TXBD_FLAGS_CSUM_LSO | ENETC_TXBD_FLAGS_L4CS;
+ csum_offload = true;
} else if (skb_checksum_help(skb)) {
return 0;
}
}
+ if (enetc_cb->flag & ENETC_F_TX_ONESTEP_SYNC_TSTAMP) {
+ do_onestep_tstamp = true;
+ tstamp = enetc_update_ptp_sync_msg(priv, skb, csum_offload);
+ } else if (enetc_cb->flag & ENETC_F_TX_TSTAMP) {
+ do_twostep_tstamp = true;
+ }
+
i = tx_ring->next_to_use;
txbd = ENETC_TXBD(*tx_ring, i);
prefetchw(txbd);
@@ -280,17 +377,6 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb)
count++;
do_vlan = skb_vlan_tag_present(skb);
- if (skb->cb[0] & ENETC_F_TX_ONESTEP_SYNC_TSTAMP) {
- if (enetc_ptp_parse(skb, &udp, &msgtype, &twostep, &offset1,
- &offset2) ||
- msgtype != PTP_MSGTYPE_SYNC || twostep)
- WARN_ONCE(1, "Bad packet for one-step timestamping\n");
- else
- do_onestep_tstamp = true;
- } else if (skb->cb[0] & ENETC_F_TX_TSTAMP) {
- do_twostep_tstamp = true;
- }
-
tx_swbd->do_twostep_tstamp = do_twostep_tstamp;
tx_swbd->qbv_en = !!(priv->active_offloads & ENETC_F_QBV);
tx_swbd->check_wb = tx_swbd->do_twostep_tstamp || tx_swbd->qbv_en;
@@ -333,65 +419,9 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb)
}
if (do_onestep_tstamp) {
- __be32 new_sec_l, new_nsec;
- u32 lo, hi, nsec, val;
- __be16 new_sec_h;
- u8 *data;
- u64 sec;
-
- lo = enetc_rd_hot(hw, ENETC_SICTR0);
- hi = enetc_rd_hot(hw, ENETC_SICTR1);
- sec = (u64)hi << 32 | lo;
- nsec = do_div(sec, 1000000000);
-
/* Configure extension BD */
- temp_bd.ext.tstamp = cpu_to_le32(lo & 0x3fffffff);
+ temp_bd.ext.tstamp = cpu_to_le32(tstamp);
e_flags |= ENETC_TXBD_E_FLAGS_ONE_STEP_PTP;
-
- /* Update originTimestamp field of Sync packet
- * - 48 bits seconds field
- * - 32 bits nanseconds field
- *
- * In addition, the UDP checksum needs to be updated
- * by software after updating originTimestamp field,
- * otherwise the hardware will calculate the wrong
- * checksum when updating the correction field and
- * update it to the packet.
- */
- data = skb_mac_header(skb);
- new_sec_h = htons((sec >> 32) & 0xffff);
- new_sec_l = htonl(sec & 0xffffffff);
- new_nsec = htonl(nsec);
- if (udp) {
- struct udphdr *uh = udp_hdr(skb);
- __be32 old_sec_l, old_nsec;
- __be16 old_sec_h;
-
- old_sec_h = *(__be16 *)(data + offset2);
- inet_proto_csum_replace2(&uh->check, skb, old_sec_h,
- new_sec_h, false);
-
- old_sec_l = *(__be32 *)(data + offset2 + 2);
- inet_proto_csum_replace4(&uh->check, skb, old_sec_l,
- new_sec_l, false);
-
- old_nsec = *(__be32 *)(data + offset2 + 6);
- inet_proto_csum_replace4(&uh->check, skb, old_nsec,
- new_nsec, false);
- }
-
- *(__be16 *)(data + offset2) = new_sec_h;
- *(__be32 *)(data + offset2 + 2) = new_sec_l;
- *(__be32 *)(data + offset2 + 6) = new_nsec;
-
- /* Configure single-step register */
- val = ENETC_PM0_SINGLE_STEP_EN;
- val |= ENETC_SET_SINGLE_STEP_OFFSET(offset1);
- if (udp)
- val |= ENETC_PM0_SINGLE_STEP_CH;
-
- enetc_port_mac_wr(priv->si, ENETC_PM0_SINGLE_STEP,
- val);
} else if (do_twostep_tstamp) {
skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
e_flags |= ENETC_TXBD_E_FLAGS_TWO_STEP_PTP;
@@ -938,12 +968,13 @@ err_chained_bd:
static netdev_tx_t enetc_start_xmit(struct sk_buff *skb,
struct net_device *ndev)
{
+ struct enetc_skb_cb *enetc_cb = ENETC_SKB_CB(skb);
struct enetc_ndev_priv *priv = netdev_priv(ndev);
struct enetc_bdr *tx_ring;
int count;
/* Queue one-step Sync packet if already locked */
- if (skb->cb[0] & ENETC_F_TX_ONESTEP_SYNC_TSTAMP) {
+ if (enetc_cb->flag & ENETC_F_TX_ONESTEP_SYNC_TSTAMP) {
if (test_and_set_bit_lock(ENETC_TX_ONESTEP_TSTAMP_IN_PROGRESS,
&priv->flags)) {
skb_queue_tail(&priv->tx_skbs, skb);
@@ -1005,24 +1036,29 @@ drop_packet_err:
netdev_tx_t enetc_xmit(struct sk_buff *skb, struct net_device *ndev)
{
+ struct enetc_skb_cb *enetc_cb = ENETC_SKB_CB(skb);
struct enetc_ndev_priv *priv = netdev_priv(ndev);
u8 udp, msgtype, twostep;
u16 offset1, offset2;
- /* Mark tx timestamp type on skb->cb[0] if requires */
+ /* Mark tx timestamp type on enetc_cb->flag if requires */
if ((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
- (priv->active_offloads & ENETC_F_TX_TSTAMP_MASK)) {
- skb->cb[0] = priv->active_offloads & ENETC_F_TX_TSTAMP_MASK;
- } else {
- skb->cb[0] = 0;
- }
+ (priv->active_offloads & ENETC_F_TX_TSTAMP_MASK))
+ enetc_cb->flag = priv->active_offloads & ENETC_F_TX_TSTAMP_MASK;
+ else
+ enetc_cb->flag = 0;
/* Fall back to two-step timestamp if not one-step Sync packet */
- if (skb->cb[0] & ENETC_F_TX_ONESTEP_SYNC_TSTAMP) {
+ if (enetc_cb->flag & ENETC_F_TX_ONESTEP_SYNC_TSTAMP) {
if (enetc_ptp_parse(skb, &udp, &msgtype, &twostep,
&offset1, &offset2) ||
- msgtype != PTP_MSGTYPE_SYNC || twostep != 0)
- skb->cb[0] = ENETC_F_TX_TSTAMP;
+ msgtype != PTP_MSGTYPE_SYNC || twostep != 0) {
+ enetc_cb->flag = ENETC_F_TX_TSTAMP;
+ } else {
+ enetc_cb->udp = !!udp;
+ enetc_cb->correction_off = offset1;
+ enetc_cb->origin_tstamp_off = offset2;
+ }
}
return enetc_start_xmit(skb, ndev);
@@ -1214,7 +1250,9 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
if (xdp_frame) {
xdp_return_frame(xdp_frame);
} else if (skb) {
- if (unlikely(skb->cb[0] & ENETC_F_TX_ONESTEP_SYNC_TSTAMP)) {
+ struct enetc_skb_cb *enetc_cb = ENETC_SKB_CB(skb);
+
+ if (unlikely(enetc_cb->flag & ENETC_F_TX_ONESTEP_SYNC_TSTAMP)) {
/* Start work to release lock for next one-step
* timestamping packet. And send one skb in
* tx_skbs queue if has.
@@ -1397,8 +1435,7 @@ static void enetc_get_offloads(struct enetc_bdr *rx_ring,
__vlan_hwaccel_put_tag(skb, tpid, le16_to_cpu(rxbd->r.vlan_opt));
}
- if (IS_ENABLED(CONFIG_FSL_ENETC_PTP_CLOCK) &&
- (priv->active_offloads & ENETC_F_RX_TSTAMP))
+ if (priv->active_offloads & ENETC_F_RX_TSTAMP)
enetc_get_rx_tstamp(rx_ring->ndev, rxbd, skb);
}
@@ -3301,7 +3338,7 @@ int enetc_hwtstamp_set(struct net_device *ndev,
struct enetc_ndev_priv *priv = netdev_priv(ndev);
int err, new_offloads = priv->active_offloads;
- if (!IS_ENABLED(CONFIG_FSL_ENETC_PTP_CLOCK))
+ if (!enetc_ptp_clock_is_enabled(priv->si))
return -EOPNOTSUPP;
switch (config->tx_type) {
@@ -3351,7 +3388,7 @@ int enetc_hwtstamp_get(struct net_device *ndev,
{
struct enetc_ndev_priv *priv = netdev_priv(ndev);
- if (!IS_ENABLED(CONFIG_FSL_ENETC_PTP_CLOCK))
+ if (!enetc_ptp_clock_is_enabled(priv->si))
return -EOPNOTSUPP;
if (priv->active_offloads & ENETC_F_TX_ONESTEP_SYNC_TSTAMP)
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h
index 62e8ee4d2f04..815afdc2ec23 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc.h
@@ -54,6 +54,15 @@ struct enetc_tx_swbd {
u8 qbv_en:1;
};
+struct enetc_skb_cb {
+ u8 flag;
+ bool udp;
+ u16 correction_off;
+ u16 origin_tstamp_off;
+};
+
+#define ENETC_SKB_CB(skb) ((struct enetc_skb_cb *)((skb)->cb))
+
struct enetc_lso_t {
bool ipv6;
bool tcp;
@@ -217,7 +226,7 @@ static inline union enetc_rx_bd *enetc_rxbd(struct enetc_bdr *rx_ring, int i)
{
int hw_idx = i;
- if (IS_ENABLED(CONFIG_FSL_ENETC_PTP_CLOCK) && rx_ring->ext_en)
+ if (rx_ring->ext_en)
hw_idx = 2 * i;
return &(((union enetc_rx_bd *)rx_ring->bd_base)[hw_idx]);
@@ -231,7 +240,7 @@ static inline void enetc_rxbd_next(struct enetc_bdr *rx_ring,
new_rxbd++;
- if (IS_ENABLED(CONFIG_FSL_ENETC_PTP_CLOCK) && rx_ring->ext_en)
+ if (rx_ring->ext_en)
new_rxbd++;
if (unlikely(++new_index == rx_ring->bd_count)) {
@@ -589,6 +598,14 @@ static inline void enetc_cbd_free_data_mem(struct enetc_si *si, int size,
void enetc_reset_ptcmsdur(struct enetc_hw *hw);
void enetc_set_ptcmsdur(struct enetc_hw *hw, u32 *queue_max_sdu);
+static inline bool enetc_ptp_clock_is_enabled(struct enetc_si *si)
+{
+ if (is_enetc_rev1(si))
+ return IS_ENABLED(CONFIG_FSL_ENETC_PTP_CLOCK);
+
+ return IS_ENABLED(CONFIG_PTP_NETC_V4_TIMER);
+}
+
#ifdef CONFIG_FSL_ENETC_QOS
int enetc_qos_query_caps(struct net_device *ndev, void *type_data);
int enetc_setup_tc_taprio(struct net_device *ndev, void *type_data);
diff --git a/drivers/net/ethernet/freescale/enetc/enetc4_hw.h b/drivers/net/ethernet/freescale/enetc/enetc4_hw.h
index aa25b445d301..19bf0e89cdc2 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc4_hw.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc4_hw.h
@@ -171,6 +171,12 @@
/* Port MAC 0/1 Pause Quanta Threshold Register */
#define ENETC4_PM_PAUSE_THRESH(mac) (0x5064 + (mac) * 0x400)
+#define ENETC4_PM_SINGLE_STEP(mac) (0x50c0 + (mac) * 0x400)
+#define PM_SINGLE_STEP_CH BIT(6)
+#define PM_SINGLE_STEP_OFFSET GENMASK(15, 7)
+#define PM_SINGLE_STEP_OFFSET_SET(o) FIELD_PREP(PM_SINGLE_STEP_OFFSET, o)
+#define PM_SINGLE_STEP_EN BIT(31)
+
/* Port MAC 0 Interface Mode Control Register */
#define ENETC4_PM_IF_MODE(mac) (0x5300 + (mac) * 0x400)
#define PM_IF_MODE_IFMODE GENMASK(2, 0)
diff --git a/drivers/net/ethernet/freescale/enetc/enetc4_pf.c b/drivers/net/ethernet/freescale/enetc/enetc4_pf.c
index b3dc1afeefd1..2e07b9b746e1 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc4_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc4_pf.c
@@ -569,6 +569,9 @@ static const struct net_device_ops enetc4_ndev_ops = {
.ndo_set_features = enetc4_pf_set_features,
.ndo_vlan_rx_add_vid = enetc_vlan_rx_add_vid,
.ndo_vlan_rx_kill_vid = enetc_vlan_rx_del_vid,
+ .ndo_eth_ioctl = enetc_ioctl,
+ .ndo_hwtstamp_get = enetc_hwtstamp_get,
+ .ndo_hwtstamp_set = enetc_hwtstamp_set,
};
static struct phylink_pcs *
@@ -1016,8 +1019,7 @@ static int enetc4_pf_probe(struct pci_dev *pdev,
err = devm_add_action_or_reset(dev, enetc4_pci_remove, pdev);
if (err)
- return dev_err_probe(dev, err,
- "Add enetc4_pci_remove() action failed\n");
+ return err;
/* si is the private data. */
si = pci_get_drvdata(pdev);
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
index 961e76cd8489..6215e9c68fc5 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
@@ -4,6 +4,9 @@
#include <linux/ethtool_netlink.h>
#include <linux/net_tstamp.h>
#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/ptp_clock_kernel.h>
+
#include "enetc.h"
static const u32 enetc_si_regs[] = {
@@ -877,23 +880,54 @@ static int enetc_set_coalesce(struct net_device *ndev,
return 0;
}
-static int enetc_get_ts_info(struct net_device *ndev,
- struct kernel_ethtool_ts_info *info)
+static int enetc4_get_phc_index_by_pdev(struct enetc_si *si)
{
- struct enetc_ndev_priv *priv = netdev_priv(ndev);
- int *phc_idx;
-
- phc_idx = symbol_get(enetc_phc_index);
- if (phc_idx) {
- info->phc_index = *phc_idx;
- symbol_put(enetc_phc_index);
+ struct pci_bus *bus = si->pdev->bus;
+ struct pci_dev *timer_pdev;
+ unsigned int devfn;
+ int phc_index;
+
+ switch (si->revision) {
+ case ENETC_REV_4_1:
+ devfn = PCI_DEVFN(24, 0);
+ break;
+ default:
+ return -1;
}
- if (!IS_ENABLED(CONFIG_FSL_ENETC_PTP_CLOCK)) {
- info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE;
+ timer_pdev = pci_get_slot(bus, devfn);
+ if (!timer_pdev)
+ return -1;
- return 0;
- }
+ phc_index = ptp_clock_index_by_dev(&timer_pdev->dev);
+ pci_dev_put(timer_pdev);
+
+ return phc_index;
+}
+
+static int enetc4_get_phc_index(struct enetc_si *si)
+{
+ struct device_node *np = si->pdev->dev.of_node;
+ struct device_node *timer_np;
+ int phc_index;
+
+ if (!np)
+ return enetc4_get_phc_index_by_pdev(si);
+
+ timer_np = of_parse_phandle(np, "ptp-timer", 0);
+ if (!timer_np)
+ return enetc4_get_phc_index_by_pdev(si);
+
+ phc_index = ptp_clock_index_by_of_node(timer_np);
+ of_node_put(timer_np);
+
+ return phc_index;
+}
+
+static void enetc_get_ts_generic_info(struct net_device *ndev,
+ struct kernel_ethtool_ts_info *info)
+{
+ struct enetc_ndev_priv *priv = netdev_priv(ndev);
info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE |
SOF_TIMESTAMPING_RX_HARDWARE |
@@ -908,6 +942,36 @@ static int enetc_get_ts_info(struct net_device *ndev,
info->rx_filters = (1 << HWTSTAMP_FILTER_NONE) |
(1 << HWTSTAMP_FILTER_ALL);
+}
+
+static int enetc_get_ts_info(struct net_device *ndev,
+ struct kernel_ethtool_ts_info *info)
+{
+ struct enetc_ndev_priv *priv = netdev_priv(ndev);
+ struct enetc_si *si = priv->si;
+ int *phc_idx;
+
+ if (!enetc_ptp_clock_is_enabled(si))
+ goto timestamp_tx_sw;
+
+ if (is_enetc_rev1(si)) {
+ phc_idx = symbol_get(enetc_phc_index);
+ if (phc_idx) {
+ info->phc_index = *phc_idx;
+ symbol_put(enetc_phc_index);
+ }
+ } else {
+ info->phc_index = enetc4_get_phc_index(si);
+ if (info->phc_index < 0)
+ goto timestamp_tx_sw;
+ }
+
+ enetc_get_ts_generic_info(ndev, info);
+
+ return 0;
+
+timestamp_tx_sw:
+ info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE;
return 0;
}
@@ -1296,6 +1360,7 @@ const struct ethtool_ops enetc4_pf_ethtool_ops = {
.get_rxfh = enetc_get_rxfh,
.set_rxfh = enetc_set_rxfh,
.get_rxfh_fields = enetc_get_rxfh_fields,
+ .get_ts_info = enetc_get_ts_info,
};
void enetc_set_ethtool_ops(struct net_device *ndev)
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h
index 73763e8f4879..377c96325814 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h
@@ -614,6 +614,7 @@ enum enetc_txbd_flags {
#define ENETC_TXBD_STATS_WIN BIT(7)
#define ENETC_TXBD_TXSTART_MASK GENMASK(24, 0)
#define ENETC_TXBD_FLAGS_OFFSET 24
+#define ENETC_TXBD_TSTAMP GENMASK(29, 0)
static inline __le32 enetc_txbd_set_tx_start(u64 tx_start, u8 flags)
{
diff --git a/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c b/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c
index 8f5021e59e0a..0e2b703c673a 100644
--- a/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c
+++ b/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c
@@ -260,6 +260,11 @@ struct page_pool *gve_rx_create_page_pool(struct gve_priv *priv,
.offset = xdp ? XDP_PACKET_HEADROOM : 0,
};
+ if (priv->header_split_enabled) {
+ pp.flags |= PP_FLAG_ALLOW_UNREADABLE_NETMEM;
+ pp.queue_idx = rx->q_num;
+ }
+
return page_pool_create(&pp);
}
diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c
index 7380c2b7a2d8..55393b784317 100644
--- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c
+++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c
@@ -718,6 +718,24 @@ static int gve_rx_xsk_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
return 0;
}
+static void gve_dma_sync(struct gve_priv *priv, struct gve_rx_ring *rx,
+ struct gve_rx_buf_state_dqo *buf_state, u16 buf_len)
+{
+ struct gve_rx_slot_page_info *page_info = &buf_state->page_info;
+
+ if (rx->dqo.page_pool) {
+ page_pool_dma_sync_netmem_for_cpu(rx->dqo.page_pool,
+ page_info->netmem,
+ page_info->page_offset,
+ buf_len);
+ } else {
+ dma_sync_single_range_for_cpu(&priv->pdev->dev, buf_state->addr,
+ page_info->page_offset +
+ page_info->pad,
+ buf_len, DMA_FROM_DEVICE);
+ }
+}
+
/* Returns 0 if descriptor is completed successfully.
* Returns -EINVAL if descriptor is invalid.
* Returns -ENOMEM if data cannot be copied to skb.
@@ -793,13 +811,18 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
rx->rx_hsplit_unsplit_pkt += unsplit;
rx->rx_hsplit_bytes += hdr_len;
u64_stats_update_end(&rx->statss);
+ } else if (!rx->ctx.skb_head && rx->dqo.page_pool &&
+ netmem_is_net_iov(buf_state->page_info.netmem)) {
+ /* when header split is disabled, the header went to the packet
+ * buffer. If the packet buffer is a net_iov, those can't be
+ * easily mapped into the kernel space to access the header
+ * required to process the packet.
+ */
+ goto error;
}
/* Sync the portion of dma buffer for CPU to read. */
- dma_sync_single_range_for_cpu(&priv->pdev->dev, buf_state->addr,
- buf_state->page_info.page_offset +
- buf_state->page_info.pad,
- buf_len, DMA_FROM_DEVICE);
+ gve_dma_sync(priv, rx, buf_state, buf_len);
/* Append to current skb if one exists. */
if (rx->ctx.skb_head) {
@@ -837,7 +860,9 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
u64_stats_update_end(&rx->statss);
}
- if (eop && buf_len <= priv->rx_copybreak) {
+ if (eop && buf_len <= priv->rx_copybreak &&
+ !(rx->dqo.page_pool &&
+ netmem_is_net_iov(buf_state->page_info.netmem))) {
rx->ctx.skb_head = gve_rx_copy(priv->dev, napi,
&buf_state->page_info, buf_len);
if (unlikely(!rx->ctx.skb_head))
diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c
index 2e64dc1ab355..0b92a2e5e986 100644
--- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c
+++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c
@@ -417,7 +417,7 @@ static int hbg_pci_init(struct pci_dev *pdev)
priv->io_base = pcim_iomap_table(pdev)[0];
if (!priv->io_base)
- return dev_err_probe(dev, -ENOMEM, "failed to get io base\n");
+ return -ENOMEM;
pci_set_master(pdev);
return 0;
diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c
index 8b7b476ed7fb..37791de47f6f 100644
--- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c
+++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c
@@ -278,8 +278,7 @@ int hbg_mdio_init(struct hbg_priv *priv)
mdio_bus = devm_mdiobus_alloc(dev);
if (!mdio_bus)
- return dev_err_probe(dev, -ENOMEM,
- "failed to alloc MDIO bus\n");
+ return -ENOMEM;
mdio_bus->parent = dev;
mdio_bus->priv = priv;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index 0255c8acb744..4cce4f4ba6b0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -843,7 +843,7 @@ static int hns3_dbg_bd_file_init(struct hnae3_handle *handle, u32 cmd)
entry_dir = hns3_dbg_dentry[hns3_dbg_cmd[cmd].dentry].dentry;
max_queue_num = hns3_get_max_available_channels(handle);
- data = devm_kzalloc(&handle->pdev->dev, max_queue_num * sizeof(*data),
+ data = devm_kcalloc(&handle->pdev->dev, max_queue_num, sizeof(*data),
GFP_KERNEL);
if (!data)
return -ENOMEM;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index d5454e126c85..a752d0e3db3a 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -1927,6 +1927,31 @@ static int hns3_set_tx_spare_buf_size(struct net_device *netdev,
return ret;
}
+static int hns3_check_tx_copybreak(struct net_device *netdev, u32 copybreak)
+{
+ struct hns3_nic_priv *priv = netdev_priv(netdev);
+
+ if (copybreak < priv->min_tx_copybreak) {
+ netdev_err(netdev, "tx copybreak %u should be no less than %u!\n",
+ copybreak, priv->min_tx_copybreak);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int hns3_check_tx_spare_buf_size(struct net_device *netdev, u32 buf_size)
+{
+ struct hns3_nic_priv *priv = netdev_priv(netdev);
+
+ if (buf_size < priv->min_tx_spare_buf_size) {
+ netdev_err(netdev,
+ "tx spare buf size %u should be no less than %u!\n",
+ buf_size, priv->min_tx_spare_buf_size);
+ return -EINVAL;
+ }
+ return 0;
+}
+
static int hns3_set_tunable(struct net_device *netdev,
const struct ethtool_tunable *tuna,
const void *data)
@@ -1943,6 +1968,10 @@ static int hns3_set_tunable(struct net_device *netdev,
switch (tuna->id) {
case ETHTOOL_TX_COPYBREAK:
+ ret = hns3_check_tx_copybreak(netdev, *(u32 *)data);
+ if (ret)
+ return ret;
+
priv->tx_copybreak = *(u32 *)data;
for (i = 0; i < h->kinfo.num_tqps; i++)
@@ -1957,6 +1986,10 @@ static int hns3_set_tunable(struct net_device *netdev,
break;
case ETHTOOL_TX_COPYBREAK_BUF_SIZE:
+ ret = hns3_check_tx_spare_buf_size(netdev, *(u32 *)data);
+ if (ret)
+ return ret;
+
old_tx_spare_buf_size = h->kinfo.tx_spare_buf_size;
new_tx_spare_buf_size = *(u32 *)data;
netdev_info(netdev, "request to set tx spare buf size from %u to %u\n",
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index f209a05e2033..f5457ae0b64f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -2182,8 +2182,8 @@ static bool hclge_drop_pfc_buf_till_fit(struct hclge_dev *hdev,
return hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all);
}
-static int hclge_only_alloc_priv_buff(struct hclge_dev *hdev,
- struct hclge_pkt_buf_alloc *buf_alloc)
+static bool hclge_only_alloc_priv_buff(struct hclge_dev *hdev,
+ struct hclge_pkt_buf_alloc *buf_alloc)
{
#define COMPENSATE_BUFFER 0x3C00
#define COMPENSATE_HALF_MPS_NUM 5
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_devlink.c b/drivers/net/ethernet/huawei/hinic/hinic_devlink.c
index 03e42512a2d5..300bc267a259 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_devlink.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_devlink.c
@@ -443,8 +443,9 @@ int hinic_health_reporters_create(struct hinic_devlink_priv *priv)
struct devlink *devlink = priv_to_devlink(priv);
priv->hw_fault_reporter =
- devlink_health_reporter_create(devlink, &hinic_hw_fault_reporter_ops,
- 0, priv);
+ devlink_health_reporter_create(devlink,
+ &hinic_hw_fault_reporter_ops,
+ priv);
if (IS_ERR(priv->hw_fault_reporter)) {
dev_warn(&priv->hwdev->hwif->pdev->dev, "Failed to create hw fault reporter, err: %ld\n",
PTR_ERR(priv->hw_fault_reporter));
@@ -452,8 +453,9 @@ int hinic_health_reporters_create(struct hinic_devlink_priv *priv)
}
priv->fw_fault_reporter =
- devlink_health_reporter_create(devlink, &hinic_fw_fault_reporter_ops,
- 0, priv);
+ devlink_health_reporter_create(devlink,
+ &hinic_fw_fault_reporter_ops,
+ priv);
if (IS_ERR(priv->fw_fault_reporter)) {
dev_warn(&priv->hwdev->hwif->pdev->dev, "Failed to create fw fault reporter, err: %ld\n",
PTR_ERR(priv->fw_fault_reporter));
diff --git a/drivers/net/ethernet/huawei/hinic3/Makefile b/drivers/net/ethernet/huawei/hinic3/Makefile
index 509dfbfb0e96..2a0ed8e2c63e 100644
--- a/drivers/net/ethernet/huawei/hinic3/Makefile
+++ b/drivers/net/ethernet/huawei/hinic3/Makefile
@@ -3,7 +3,9 @@
obj-$(CONFIG_HINIC3) += hinic3.o
-hinic3-objs := hinic3_common.o \
+hinic3-objs := hinic3_cmdq.o \
+ hinic3_common.o \
+ hinic3_eqs.o \
hinic3_hw_cfg.o \
hinic3_hw_comm.o \
hinic3_hwdev.o \
diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_cmdq.c b/drivers/net/ethernet/huawei/hinic3/hinic3_cmdq.c
new file mode 100644
index 000000000000..ef539d1b69a3
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic3/hinic3_cmdq.c
@@ -0,0 +1,915 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
+
+#include <linux/bitfield.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+
+#include "hinic3_cmdq.h"
+#include "hinic3_hwdev.h"
+#include "hinic3_hwif.h"
+#include "hinic3_mbox.h"
+
+#define CMDQ_BUF_SIZE 2048
+#define CMDQ_WQEBB_SIZE 64
+
+#define CMDQ_CMD_TIMEOUT 5000
+#define CMDQ_ENABLE_WAIT_TIMEOUT 300
+
+#define CMDQ_CTXT_CURR_WQE_PAGE_PFN_MASK GENMASK_ULL(51, 0)
+#define CMDQ_CTXT_EQ_ID_MASK GENMASK_ULL(60, 53)
+#define CMDQ_CTXT_CEQ_ARM_MASK BIT_ULL(61)
+#define CMDQ_CTXT_CEQ_EN_MASK BIT_ULL(62)
+#define CMDQ_CTXT_HW_BUSY_BIT_MASK BIT_ULL(63)
+
+#define CMDQ_CTXT_WQ_BLOCK_PFN_MASK GENMASK_ULL(51, 0)
+#define CMDQ_CTXT_CI_MASK GENMASK_ULL(63, 52)
+#define CMDQ_CTXT_SET(val, member) \
+ FIELD_PREP(CMDQ_CTXT_##member##_MASK, val)
+
+#define CMDQ_WQE_HDR_BUFDESC_LEN_MASK GENMASK(7, 0)
+#define CMDQ_WQE_HDR_COMPLETE_FMT_MASK BIT(15)
+#define CMDQ_WQE_HDR_DATA_FMT_MASK BIT(22)
+#define CMDQ_WQE_HDR_COMPLETE_REQ_MASK BIT(23)
+#define CMDQ_WQE_HDR_COMPLETE_SECT_LEN_MASK GENMASK(28, 27)
+#define CMDQ_WQE_HDR_CTRL_LEN_MASK GENMASK(30, 29)
+#define CMDQ_WQE_HDR_HW_BUSY_BIT_MASK BIT(31)
+#define CMDQ_WQE_HDR_SET(val, member) \
+ FIELD_PREP(CMDQ_WQE_HDR_##member##_MASK, val)
+#define CMDQ_WQE_HDR_GET(val, member) \
+ FIELD_GET(CMDQ_WQE_HDR_##member##_MASK, le32_to_cpu(val))
+
+#define CMDQ_CTRL_PI_MASK GENMASK(15, 0)
+#define CMDQ_CTRL_CMD_MASK GENMASK(23, 16)
+#define CMDQ_CTRL_MOD_MASK GENMASK(28, 24)
+#define CMDQ_CTRL_HW_BUSY_BIT_MASK BIT(31)
+#define CMDQ_CTRL_SET(val, member) \
+ FIELD_PREP(CMDQ_CTRL_##member##_MASK, val)
+#define CMDQ_CTRL_GET(val, member) \
+ FIELD_GET(CMDQ_CTRL_##member##_MASK, val)
+
+#define CMDQ_WQE_ERRCODE_VAL_MASK GENMASK(30, 0)
+#define CMDQ_WQE_ERRCODE_GET(val, member) \
+ FIELD_GET(CMDQ_WQE_ERRCODE_##member##_MASK, le32_to_cpu(val))
+
+#define CMDQ_DB_INFO_HI_PROD_IDX_MASK GENMASK(7, 0)
+#define CMDQ_DB_INFO_SET(val, member) \
+ FIELD_PREP(CMDQ_DB_INFO_##member##_MASK, val)
+
+#define CMDQ_DB_HEAD_QUEUE_TYPE_MASK BIT(23)
+#define CMDQ_DB_HEAD_CMDQ_TYPE_MASK GENMASK(26, 24)
+#define CMDQ_DB_HEAD_SET(val, member) \
+ FIELD_PREP(CMDQ_DB_HEAD_##member##_MASK, val)
+
+#define CMDQ_CEQE_TYPE_MASK GENMASK(2, 0)
+#define CMDQ_CEQE_GET(val, member) \
+ FIELD_GET(CMDQ_CEQE_##member##_MASK, le32_to_cpu(val))
+
+#define CMDQ_WQE_HEADER(wqe) ((struct cmdq_header *)(wqe))
+#define CMDQ_WQE_COMPLETED(ctrl_info) \
+ CMDQ_CTRL_GET(le32_to_cpu(ctrl_info), HW_BUSY_BIT)
+
+#define CMDQ_PFN(addr) ((addr) >> 12)
+
+/* cmdq work queue's chip logical address table is up to 512B */
+#define CMDQ_WQ_CLA_SIZE 512
+
+/* Completion codes: send, direct sync, force stop */
+#define CMDQ_SEND_CMPT_CODE 10
+#define CMDQ_DIRECT_SYNC_CMPT_CODE 11
+#define CMDQ_FORCE_STOP_CMPT_CODE 12
+
+enum cmdq_data_format {
+ CMDQ_DATA_SGE = 0,
+ CMDQ_DATA_DIRECT = 1,
+};
+
+enum cmdq_ctrl_sect_len {
+ CMDQ_CTRL_SECT_LEN = 1,
+ CMDQ_CTRL_DIRECT_SECT_LEN = 2,
+};
+
+enum cmdq_bufdesc_len {
+ CMDQ_BUFDESC_LCMD_LEN = 2,
+ CMDQ_BUFDESC_SCMD_LEN = 3,
+};
+
+enum cmdq_completion_format {
+ CMDQ_COMPLETE_DIRECT = 0,
+ CMDQ_COMPLETE_SGE = 1,
+};
+
+enum cmdq_cmd_type {
+ CMDQ_CMD_DIRECT_RESP,
+ CMDQ_CMD_SGE_RESP,
+};
+
+#define CMDQ_WQE_NUM_WQEBBS 1
+
+static struct cmdq_wqe *cmdq_read_wqe(struct hinic3_wq *wq, u16 *ci)
+{
+ if (hinic3_wq_get_used(wq) == 0)
+ return NULL;
+
+ *ci = wq->cons_idx & wq->idx_mask;
+
+ return get_q_element(&wq->qpages, wq->cons_idx, NULL);
+}
+
+struct hinic3_cmd_buf *hinic3_alloc_cmd_buf(struct hinic3_hwdev *hwdev)
+{
+ struct hinic3_cmd_buf *cmd_buf;
+ struct hinic3_cmdqs *cmdqs;
+
+ cmdqs = hwdev->cmdqs;
+
+ cmd_buf = kmalloc(sizeof(*cmd_buf), GFP_ATOMIC);
+ if (!cmd_buf)
+ return NULL;
+
+ cmd_buf->buf = dma_pool_alloc(cmdqs->cmd_buf_pool, GFP_ATOMIC,
+ &cmd_buf->dma_addr);
+ if (!cmd_buf->buf) {
+ dev_err(hwdev->dev, "Failed to allocate cmdq cmd buf from the pool\n");
+ goto err_free_cmd_buf;
+ }
+
+ cmd_buf->size = cpu_to_le16(CMDQ_BUF_SIZE);
+ refcount_set(&cmd_buf->ref_cnt, 1);
+
+ return cmd_buf;
+
+err_free_cmd_buf:
+ kfree(cmd_buf);
+
+ return NULL;
+}
+
+void hinic3_free_cmd_buf(struct hinic3_hwdev *hwdev,
+ struct hinic3_cmd_buf *cmd_buf)
+{
+ struct hinic3_cmdqs *cmdqs;
+
+ if (!refcount_dec_and_test(&cmd_buf->ref_cnt))
+ return;
+
+ cmdqs = hwdev->cmdqs;
+
+ dma_pool_free(cmdqs->cmd_buf_pool, cmd_buf->buf, cmd_buf->dma_addr);
+ kfree(cmd_buf);
+}
+
+static void cmdq_clear_cmd_buf(struct hinic3_cmdq_cmd_info *cmd_info,
+ struct hinic3_hwdev *hwdev)
+{
+ if (cmd_info->buf_in) {
+ hinic3_free_cmd_buf(hwdev, cmd_info->buf_in);
+ cmd_info->buf_in = NULL;
+ }
+}
+
+static void clear_wqe_complete_bit(struct hinic3_cmdq *cmdq,
+ struct cmdq_wqe *wqe, u16 ci)
+{
+ struct cmdq_header *hdr = CMDQ_WQE_HEADER(wqe);
+ __le32 header_info = hdr->header_info;
+ enum cmdq_data_format df;
+ struct cmdq_ctrl *ctrl;
+
+ df = CMDQ_WQE_HDR_GET(header_info, DATA_FMT);
+ if (df == CMDQ_DATA_SGE)
+ ctrl = &wqe->wqe_lcmd.ctrl;
+ else
+ ctrl = &wqe->wqe_scmd.ctrl;
+
+ /* clear HW busy bit */
+ ctrl->ctrl_info = 0;
+ cmdq->cmd_infos[ci].cmd_type = HINIC3_CMD_TYPE_NONE;
+ wmb(); /* verify wqe is clear before updating ci */
+ hinic3_wq_put_wqebbs(&cmdq->wq, CMDQ_WQE_NUM_WQEBBS);
+}
+
+static void cmdq_update_cmd_status(struct hinic3_cmdq *cmdq, u16 prod_idx,
+ struct cmdq_wqe *wqe)
+{
+ struct hinic3_cmdq_cmd_info *cmd_info;
+ struct cmdq_wqe_lcmd *wqe_lcmd;
+ __le32 status_info;
+
+ wqe_lcmd = &wqe->wqe_lcmd;
+ cmd_info = &cmdq->cmd_infos[prod_idx];
+ if (cmd_info->errcode) {
+ status_info = wqe_lcmd->status.status_info;
+ *cmd_info->errcode = CMDQ_WQE_ERRCODE_GET(status_info, VAL);
+ }
+
+ if (cmd_info->direct_resp)
+ *cmd_info->direct_resp = wqe_lcmd->completion.resp.direct.val;
+}
+
+static void cmdq_sync_cmd_handler(struct hinic3_cmdq *cmdq,
+ struct cmdq_wqe *wqe, u16 ci)
+{
+ spin_lock(&cmdq->cmdq_lock);
+ cmdq_update_cmd_status(cmdq, ci, wqe);
+ if (cmdq->cmd_infos[ci].cmpt_code) {
+ *cmdq->cmd_infos[ci].cmpt_code = CMDQ_DIRECT_SYNC_CMPT_CODE;
+ cmdq->cmd_infos[ci].cmpt_code = NULL;
+ }
+
+ /* Ensure that completion code has been updated before updating done */
+ smp_wmb();
+ if (cmdq->cmd_infos[ci].done) {
+ complete(cmdq->cmd_infos[ci].done);
+ cmdq->cmd_infos[ci].done = NULL;
+ }
+ spin_unlock(&cmdq->cmdq_lock);
+
+ cmdq_clear_cmd_buf(&cmdq->cmd_infos[ci], cmdq->hwdev);
+ clear_wqe_complete_bit(cmdq, wqe, ci);
+}
+
+void hinic3_cmdq_ceq_handler(struct hinic3_hwdev *hwdev, __le32 ceqe_data)
+{
+ enum hinic3_cmdq_type cmdq_type = CMDQ_CEQE_GET(ceqe_data, TYPE);
+ struct hinic3_cmdqs *cmdqs = hwdev->cmdqs;
+ struct hinic3_cmdq_cmd_info *cmd_info;
+ struct cmdq_wqe_lcmd *wqe_lcmd;
+ struct hinic3_cmdq *cmdq;
+ struct cmdq_wqe *wqe;
+ __le32 ctrl_info;
+ u16 ci;
+
+ if (unlikely(cmdq_type >= ARRAY_SIZE(cmdqs->cmdq)))
+ return;
+
+ cmdq = &cmdqs->cmdq[cmdq_type];
+ while ((wqe = cmdq_read_wqe(&cmdq->wq, &ci)) != NULL) {
+ cmd_info = &cmdq->cmd_infos[ci];
+ switch (cmd_info->cmd_type) {
+ case HINIC3_CMD_TYPE_NONE:
+ return;
+ case HINIC3_CMD_TYPE_TIMEOUT:
+ dev_warn(hwdev->dev, "Cmdq timeout, q_id: %u, ci: %u\n",
+ cmdq_type, ci);
+ fallthrough;
+ case HINIC3_CMD_TYPE_FAKE_TIMEOUT:
+ cmdq_clear_cmd_buf(cmd_info, hwdev);
+ clear_wqe_complete_bit(cmdq, wqe, ci);
+ break;
+ default:
+ /* only arm bit is using scmd wqe,
+ * the other wqe is lcmd
+ */
+ wqe_lcmd = &wqe->wqe_lcmd;
+ ctrl_info = wqe_lcmd->ctrl.ctrl_info;
+ if (!CMDQ_WQE_COMPLETED(ctrl_info))
+ return;
+
+ dma_rmb();
+ /* For FORCE_STOP cmd_type, we also need to wait for
+ * the firmware processing to complete to prevent the
+ * firmware from accessing the released cmd_buf
+ */
+ if (cmd_info->cmd_type == HINIC3_CMD_TYPE_FORCE_STOP) {
+ cmdq_clear_cmd_buf(cmd_info, hwdev);
+ clear_wqe_complete_bit(cmdq, wqe, ci);
+ } else {
+ cmdq_sync_cmd_handler(cmdq, wqe, ci);
+ }
+
+ break;
+ }
+ }
+}
+
+static int wait_cmdqs_enable(struct hinic3_cmdqs *cmdqs)
+{
+ unsigned long end;
+
+ end = jiffies + msecs_to_jiffies(CMDQ_ENABLE_WAIT_TIMEOUT);
+ do {
+ if (cmdqs->status & HINIC3_CMDQ_ENABLE)
+ return 0;
+ usleep_range(1000, 2000);
+ } while (time_before(jiffies, end) && !cmdqs->disable_flag);
+
+ cmdqs->disable_flag = 1;
+
+ return -EBUSY;
+}
+
+static void cmdq_set_completion(struct cmdq_completion *complete,
+ struct hinic3_cmd_buf *buf_out)
+{
+ struct hinic3_sge *sge = &complete->resp.sge;
+
+ hinic3_set_sge(sge, buf_out->dma_addr, cpu_to_le32(CMDQ_BUF_SIZE));
+}
+
+static struct cmdq_wqe *cmdq_get_wqe(struct hinic3_wq *wq, u16 *pi)
+{
+ if (!hinic3_wq_free_wqebbs(wq))
+ return NULL;
+
+ return hinic3_wq_get_one_wqebb(wq, pi);
+}
+
+static void cmdq_set_lcmd_bufdesc(struct cmdq_wqe_lcmd *wqe,
+ struct hinic3_cmd_buf *buf_in)
+{
+ hinic3_set_sge(&wqe->buf_desc.sge, buf_in->dma_addr,
+ (__force __le32)buf_in->size);
+}
+
+static void cmdq_set_db(struct hinic3_cmdq *cmdq,
+ enum hinic3_cmdq_type cmdq_type, u16 prod_idx)
+{
+ u8 __iomem *db_base = cmdq->hwdev->cmdqs->cmdqs_db_base;
+ u16 db_ofs = (prod_idx & 0xFF) << 3;
+ struct cmdq_db db;
+
+ db.db_info = cpu_to_le32(CMDQ_DB_INFO_SET(prod_idx >> 8, HI_PROD_IDX));
+ db.db_head = cpu_to_le32(CMDQ_DB_HEAD_SET(1, QUEUE_TYPE) |
+ CMDQ_DB_HEAD_SET(cmdq_type, CMDQ_TYPE));
+ writeq(*(u64 *)&db, db_base + db_ofs);
+}
+
+static void cmdq_wqe_fill(struct cmdq_wqe *hw_wqe,
+ const struct cmdq_wqe *shadow_wqe)
+{
+ const struct cmdq_header *src = (struct cmdq_header *)shadow_wqe;
+ struct cmdq_header *dst = (struct cmdq_header *)hw_wqe;
+ size_t len;
+
+ len = sizeof(struct cmdq_wqe) - sizeof(struct cmdq_header);
+ memcpy(dst + 1, src + 1, len);
+ /* Ensure buffer len before updating header */
+ wmb();
+ WRITE_ONCE(*dst, *src);
+}
+
+static void cmdq_prepare_wqe_ctrl(struct cmdq_wqe *wqe, u8 wrapped,
+ u8 mod, u8 cmd, u16 prod_idx,
+ enum cmdq_completion_format complete_format,
+ enum cmdq_data_format data_format,
+ enum cmdq_bufdesc_len buf_len)
+{
+ struct cmdq_header *hdr = CMDQ_WQE_HEADER(wqe);
+ enum cmdq_ctrl_sect_len ctrl_len;
+ struct cmdq_wqe_lcmd *wqe_lcmd;
+ struct cmdq_wqe_scmd *wqe_scmd;
+ struct cmdq_ctrl *ctrl;
+
+ if (data_format == CMDQ_DATA_SGE) {
+ wqe_lcmd = &wqe->wqe_lcmd;
+ wqe_lcmd->status.status_info = 0;
+ ctrl = &wqe_lcmd->ctrl;
+ ctrl_len = CMDQ_CTRL_SECT_LEN;
+ } else {
+ wqe_scmd = &wqe->wqe_scmd;
+ wqe_scmd->status.status_info = 0;
+ ctrl = &wqe_scmd->ctrl;
+ ctrl_len = CMDQ_CTRL_DIRECT_SECT_LEN;
+ }
+
+ ctrl->ctrl_info =
+ cpu_to_le32(CMDQ_CTRL_SET(prod_idx, PI) |
+ CMDQ_CTRL_SET(cmd, CMD) |
+ CMDQ_CTRL_SET(mod, MOD));
+
+ hdr->header_info =
+ cpu_to_le32(CMDQ_WQE_HDR_SET(buf_len, BUFDESC_LEN) |
+ CMDQ_WQE_HDR_SET(complete_format, COMPLETE_FMT) |
+ CMDQ_WQE_HDR_SET(data_format, DATA_FMT) |
+ CMDQ_WQE_HDR_SET(1, COMPLETE_REQ) |
+ CMDQ_WQE_HDR_SET(3, COMPLETE_SECT_LEN) |
+ CMDQ_WQE_HDR_SET(ctrl_len, CTRL_LEN) |
+ CMDQ_WQE_HDR_SET(wrapped, HW_BUSY_BIT));
+}
+
+static void cmdq_set_lcmd_wqe(struct cmdq_wqe *wqe,
+ enum cmdq_cmd_type cmd_type,
+ struct hinic3_cmd_buf *buf_in,
+ struct hinic3_cmd_buf *buf_out,
+ u8 wrapped, u8 mod, u8 cmd, u16 prod_idx)
+{
+ enum cmdq_completion_format complete_format = CMDQ_COMPLETE_DIRECT;
+ struct cmdq_wqe_lcmd *wqe_lcmd = &wqe->wqe_lcmd;
+
+ switch (cmd_type) {
+ case CMDQ_CMD_DIRECT_RESP:
+ wqe_lcmd->completion.resp.direct.val = 0;
+ break;
+ case CMDQ_CMD_SGE_RESP:
+ if (buf_out) {
+ complete_format = CMDQ_COMPLETE_SGE;
+ cmdq_set_completion(&wqe_lcmd->completion, buf_out);
+ }
+ break;
+ }
+
+ cmdq_prepare_wqe_ctrl(wqe, wrapped, mod, cmd, prod_idx, complete_format,
+ CMDQ_DATA_SGE, CMDQ_BUFDESC_LCMD_LEN);
+ cmdq_set_lcmd_bufdesc(wqe_lcmd, buf_in);
+}
+
+static int hinic3_cmdq_sync_timeout_check(struct hinic3_cmdq *cmdq,
+ struct cmdq_wqe *wqe, u16 pi)
+{
+ struct cmdq_wqe_lcmd *wqe_lcmd;
+ struct cmdq_ctrl *ctrl;
+ __le32 ctrl_info;
+
+ wqe_lcmd = &wqe->wqe_lcmd;
+ ctrl = &wqe_lcmd->ctrl;
+ ctrl_info = ctrl->ctrl_info;
+ if (!CMDQ_WQE_COMPLETED(ctrl_info)) {
+ dev_dbg(cmdq->hwdev->dev, "Cmdq sync command check busy bit not set\n");
+ return -EFAULT;
+ }
+ cmdq_update_cmd_status(cmdq, pi, wqe);
+
+ return 0;
+}
+
+static void clear_cmd_info(struct hinic3_cmdq_cmd_info *cmd_info,
+ const struct hinic3_cmdq_cmd_info *saved_cmd_info)
+{
+ if (cmd_info->errcode == saved_cmd_info->errcode)
+ cmd_info->errcode = NULL;
+
+ if (cmd_info->done == saved_cmd_info->done)
+ cmd_info->done = NULL;
+
+ if (cmd_info->direct_resp == saved_cmd_info->direct_resp)
+ cmd_info->direct_resp = NULL;
+}
+
+static int wait_cmdq_sync_cmd_completion(struct hinic3_cmdq *cmdq,
+ struct hinic3_cmdq_cmd_info *cmd_info,
+ struct hinic3_cmdq_cmd_info *saved_cmd_info,
+ u64 curr_msg_id, u16 curr_prod_idx,
+ struct cmdq_wqe *curr_wqe,
+ u32 timeout)
+{
+ ulong timeo = msecs_to_jiffies(timeout);
+ int err;
+
+ if (wait_for_completion_timeout(saved_cmd_info->done, timeo))
+ return 0;
+
+ spin_lock_bh(&cmdq->cmdq_lock);
+ if (cmd_info->cmpt_code == saved_cmd_info->cmpt_code)
+ cmd_info->cmpt_code = NULL;
+
+ if (*saved_cmd_info->cmpt_code == CMDQ_DIRECT_SYNC_CMPT_CODE) {
+ dev_dbg(cmdq->hwdev->dev, "Cmdq direct sync command has been completed\n");
+ spin_unlock_bh(&cmdq->cmdq_lock);
+ return 0;
+ }
+
+ if (curr_msg_id == cmd_info->cmdq_msg_id) {
+ err = hinic3_cmdq_sync_timeout_check(cmdq, curr_wqe,
+ curr_prod_idx);
+ if (err)
+ cmd_info->cmd_type = HINIC3_CMD_TYPE_TIMEOUT;
+ else
+ cmd_info->cmd_type = HINIC3_CMD_TYPE_FAKE_TIMEOUT;
+ } else {
+ err = -ETIMEDOUT;
+ dev_err(cmdq->hwdev->dev,
+ "Cmdq sync command current msg id mismatch cmd_info msg id\n");
+ }
+
+ clear_cmd_info(cmd_info, saved_cmd_info);
+ spin_unlock_bh(&cmdq->cmdq_lock);
+
+ return err;
+}
+
+static int cmdq_sync_cmd_direct_resp(struct hinic3_cmdq *cmdq, u8 mod, u8 cmd,
+ struct hinic3_cmd_buf *buf_in,
+ __le64 *out_param)
+{
+ struct hinic3_cmdq_cmd_info *cmd_info, saved_cmd_info;
+ int cmpt_code = CMDQ_SEND_CMPT_CODE;
+ struct cmdq_wqe *curr_wqe, wqe = {};
+ struct hinic3_wq *wq = &cmdq->wq;
+ u16 curr_prod_idx, next_prod_idx;
+ struct completion done;
+ u64 curr_msg_id;
+ int errcode;
+ u8 wrapped;
+ int err;
+
+ spin_lock_bh(&cmdq->cmdq_lock);
+ curr_wqe = cmdq_get_wqe(wq, &curr_prod_idx);
+ if (!curr_wqe) {
+ spin_unlock_bh(&cmdq->cmdq_lock);
+ return -EBUSY;
+ }
+
+ wrapped = cmdq->wrapped;
+ next_prod_idx = curr_prod_idx + CMDQ_WQE_NUM_WQEBBS;
+ if (next_prod_idx >= wq->q_depth) {
+ cmdq->wrapped ^= 1;
+ next_prod_idx -= wq->q_depth;
+ }
+
+ cmd_info = &cmdq->cmd_infos[curr_prod_idx];
+ init_completion(&done);
+ refcount_inc(&buf_in->ref_cnt);
+ cmd_info->cmd_type = HINIC3_CMD_TYPE_DIRECT_RESP;
+ cmd_info->done = &done;
+ cmd_info->errcode = &errcode;
+ cmd_info->direct_resp = out_param;
+ cmd_info->cmpt_code = &cmpt_code;
+ cmd_info->buf_in = buf_in;
+ saved_cmd_info = *cmd_info;
+ cmdq_set_lcmd_wqe(&wqe, CMDQ_CMD_DIRECT_RESP, buf_in, NULL,
+ wrapped, mod, cmd, curr_prod_idx);
+
+ cmdq_wqe_fill(curr_wqe, &wqe);
+ (cmd_info->cmdq_msg_id)++;
+ curr_msg_id = cmd_info->cmdq_msg_id;
+ cmdq_set_db(cmdq, HINIC3_CMDQ_SYNC, next_prod_idx);
+ spin_unlock_bh(&cmdq->cmdq_lock);
+
+ err = wait_cmdq_sync_cmd_completion(cmdq, cmd_info, &saved_cmd_info,
+ curr_msg_id, curr_prod_idx,
+ curr_wqe, CMDQ_CMD_TIMEOUT);
+ if (err) {
+ dev_err(cmdq->hwdev->dev,
+ "Cmdq sync command timeout, mod: %u, cmd: %u, prod idx: 0x%x\n",
+ mod, cmd, curr_prod_idx);
+ err = -ETIMEDOUT;
+ }
+
+ if (cmpt_code == CMDQ_FORCE_STOP_CMPT_CODE) {
+ dev_dbg(cmdq->hwdev->dev,
+ "Force stop cmdq cmd, mod: %u, cmd: %u\n", mod, cmd);
+ err = -EAGAIN;
+ }
+
+ smp_rmb(); /* read error code after completion */
+
+ return err ? err : errcode;
+}
+
+int hinic3_cmdq_direct_resp(struct hinic3_hwdev *hwdev, u8 mod, u8 cmd,
+ struct hinic3_cmd_buf *buf_in, __le64 *out_param)
+{
+ struct hinic3_cmdqs *cmdqs;
+ int err;
+
+ cmdqs = hwdev->cmdqs;
+ err = wait_cmdqs_enable(cmdqs);
+ if (err) {
+ dev_err(hwdev->dev, "Cmdq is disabled\n");
+ return err;
+ }
+
+ err = cmdq_sync_cmd_direct_resp(&cmdqs->cmdq[HINIC3_CMDQ_SYNC],
+ mod, cmd, buf_in, out_param);
+
+ return err;
+}
+
+static void cmdq_init_queue_ctxt(struct hinic3_hwdev *hwdev, u8 cmdq_id,
+ struct comm_cmdq_ctxt_info *ctxt_info)
+{
+ const struct hinic3_cmdqs *cmdqs;
+ u64 cmdq_first_block_paddr, pfn;
+ const struct hinic3_wq *wq;
+
+ cmdqs = hwdev->cmdqs;
+ wq = &cmdqs->cmdq[cmdq_id].wq;
+ pfn = CMDQ_PFN(hinic3_wq_get_first_wqe_page_addr(wq));
+
+ ctxt_info->curr_wqe_page_pfn =
+ cpu_to_le64(CMDQ_CTXT_SET(1, HW_BUSY_BIT) |
+ CMDQ_CTXT_SET(1, CEQ_EN) |
+ CMDQ_CTXT_SET(1, CEQ_ARM) |
+ CMDQ_CTXT_SET(0, EQ_ID) |
+ CMDQ_CTXT_SET(pfn, CURR_WQE_PAGE_PFN));
+
+ if (!hinic3_wq_is_0_level_cla(wq)) {
+ cmdq_first_block_paddr = cmdqs->wq_block_paddr;
+ pfn = CMDQ_PFN(cmdq_first_block_paddr);
+ }
+
+ ctxt_info->wq_block_pfn = cpu_to_le64(CMDQ_CTXT_SET(wq->cons_idx, CI) |
+ CMDQ_CTXT_SET(pfn, WQ_BLOCK_PFN));
+}
+
+static int init_cmdq(struct hinic3_cmdq *cmdq, struct hinic3_hwdev *hwdev,
+ enum hinic3_cmdq_type q_type)
+{
+ int err;
+
+ cmdq->cmdq_type = q_type;
+ cmdq->wrapped = 1;
+ cmdq->hwdev = hwdev;
+
+ spin_lock_init(&cmdq->cmdq_lock);
+
+ cmdq->cmd_infos = kcalloc(cmdq->wq.q_depth, sizeof(*cmdq->cmd_infos),
+ GFP_KERNEL);
+ if (!cmdq->cmd_infos) {
+ err = -ENOMEM;
+ return err;
+ }
+
+ return 0;
+}
+
+static int hinic3_set_cmdq_ctxt(struct hinic3_hwdev *hwdev, u8 cmdq_id)
+{
+ struct comm_cmd_set_cmdq_ctxt cmdq_ctxt = {};
+ struct mgmt_msg_params msg_params = {};
+ int err;
+
+ cmdq_init_queue_ctxt(hwdev, cmdq_id, &cmdq_ctxt.ctxt);
+ cmdq_ctxt.func_id = hinic3_global_func_id(hwdev);
+ cmdq_ctxt.cmdq_id = cmdq_id;
+
+ mgmt_msg_params_init_default(&msg_params, &cmdq_ctxt,
+ sizeof(cmdq_ctxt));
+
+ err = hinic3_send_mbox_to_mgmt(hwdev, MGMT_MOD_COMM,
+ COMM_CMD_SET_CMDQ_CTXT, &msg_params);
+ if (err || cmdq_ctxt.head.status) {
+ dev_err(hwdev->dev, "Failed to set cmdq ctxt, err: %d, status: 0x%x\n",
+ err, cmdq_ctxt.head.status);
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+static int hinic3_set_cmdq_ctxts(struct hinic3_hwdev *hwdev)
+{
+ struct hinic3_cmdqs *cmdqs = hwdev->cmdqs;
+ u8 cmdq_type;
+ int err;
+
+ for (cmdq_type = 0; cmdq_type < cmdqs->cmdq_num; cmdq_type++) {
+ err = hinic3_set_cmdq_ctxt(hwdev, cmdq_type);
+ if (err)
+ return err;
+ }
+
+ cmdqs->status |= HINIC3_CMDQ_ENABLE;
+ cmdqs->disable_flag = 0;
+
+ return 0;
+}
+
+static int create_cmdq_wq(struct hinic3_hwdev *hwdev,
+ struct hinic3_cmdqs *cmdqs)
+{
+ u8 cmdq_type;
+ int err;
+
+ for (cmdq_type = 0; cmdq_type < cmdqs->cmdq_num; cmdq_type++) {
+ err = hinic3_wq_create(hwdev, &cmdqs->cmdq[cmdq_type].wq,
+ CMDQ_DEPTH, CMDQ_WQEBB_SIZE);
+ if (err) {
+ dev_err(hwdev->dev, "Failed to create cmdq wq\n");
+ goto err_destroy_wq;
+ }
+ }
+
+ /* 1-level Chip Logical Address (CLA) must put all
+ * cmdq's wq page addr in one wq block
+ */
+ if (!hinic3_wq_is_0_level_cla(&cmdqs->cmdq[HINIC3_CMDQ_SYNC].wq)) {
+ if (cmdqs->cmdq[HINIC3_CMDQ_SYNC].wq.qpages.num_pages >
+ CMDQ_WQ_CLA_SIZE / sizeof(u64)) {
+ err = -EINVAL;
+ dev_err(hwdev->dev,
+ "Cmdq number of wq pages exceeds limit: %lu\n",
+ CMDQ_WQ_CLA_SIZE / sizeof(u64));
+ goto err_destroy_wq;
+ }
+
+ cmdqs->wq_block_vaddr =
+ dma_alloc_coherent(hwdev->dev, HINIC3_MIN_PAGE_SIZE,
+ &cmdqs->wq_block_paddr, GFP_KERNEL);
+ if (!cmdqs->wq_block_vaddr) {
+ err = -ENOMEM;
+ goto err_destroy_wq;
+ }
+
+ for (cmdq_type = 0; cmdq_type < cmdqs->cmdq_num; cmdq_type++)
+ memcpy((u8 *)cmdqs->wq_block_vaddr +
+ CMDQ_WQ_CLA_SIZE * cmdq_type,
+ cmdqs->cmdq[cmdq_type].wq.wq_block_vaddr,
+ cmdqs->cmdq[cmdq_type].wq.qpages.num_pages *
+ sizeof(__be64));
+ }
+
+ return 0;
+
+err_destroy_wq:
+ while (cmdq_type > 0) {
+ cmdq_type--;
+ hinic3_wq_destroy(hwdev, &cmdqs->cmdq[cmdq_type].wq);
+ }
+
+ return err;
+}
+
+static void destroy_cmdq_wq(struct hinic3_hwdev *hwdev,
+ struct hinic3_cmdqs *cmdqs)
+{
+ u8 cmdq_type;
+
+ if (cmdqs->wq_block_vaddr)
+ dma_free_coherent(hwdev->dev, HINIC3_MIN_PAGE_SIZE,
+ cmdqs->wq_block_vaddr, cmdqs->wq_block_paddr);
+
+ for (cmdq_type = 0; cmdq_type < cmdqs->cmdq_num; cmdq_type++)
+ hinic3_wq_destroy(hwdev, &cmdqs->cmdq[cmdq_type].wq);
+}
+
+static int init_cmdqs(struct hinic3_hwdev *hwdev)
+{
+ struct hinic3_cmdqs *cmdqs;
+
+ cmdqs = kzalloc(sizeof(*cmdqs), GFP_KERNEL);
+ if (!cmdqs)
+ return -ENOMEM;
+
+ hwdev->cmdqs = cmdqs;
+ cmdqs->hwdev = hwdev;
+ cmdqs->cmdq_num = hwdev->max_cmdq;
+
+ cmdqs->cmd_buf_pool = dma_pool_create("hinic3_cmdq", hwdev->dev,
+ CMDQ_BUF_SIZE, CMDQ_BUF_SIZE, 0);
+ if (!cmdqs->cmd_buf_pool) {
+ dev_err(hwdev->dev, "Failed to create cmdq buffer pool\n");
+ kfree(cmdqs);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void cmdq_flush_sync_cmd(struct hinic3_cmdq_cmd_info *cmd_info)
+{
+ if (cmd_info->cmd_type != HINIC3_CMD_TYPE_DIRECT_RESP)
+ return;
+
+ cmd_info->cmd_type = HINIC3_CMD_TYPE_FORCE_STOP;
+
+ if (cmd_info->cmpt_code &&
+ *cmd_info->cmpt_code == CMDQ_SEND_CMPT_CODE)
+ *cmd_info->cmpt_code = CMDQ_FORCE_STOP_CMPT_CODE;
+
+ if (cmd_info->done) {
+ complete(cmd_info->done);
+ cmd_info->done = NULL;
+ cmd_info->cmpt_code = NULL;
+ cmd_info->direct_resp = NULL;
+ cmd_info->errcode = NULL;
+ }
+}
+
+static void hinic3_cmdq_flush_cmd(struct hinic3_cmdq *cmdq)
+{
+ struct hinic3_cmdq_cmd_info *cmd_info;
+ u16 ci;
+
+ spin_lock_bh(&cmdq->cmdq_lock);
+ while (cmdq_read_wqe(&cmdq->wq, &ci)) {
+ hinic3_wq_put_wqebbs(&cmdq->wq, CMDQ_WQE_NUM_WQEBBS);
+ cmd_info = &cmdq->cmd_infos[ci];
+ if (cmd_info->cmd_type == HINIC3_CMD_TYPE_DIRECT_RESP)
+ cmdq_flush_sync_cmd(cmd_info);
+ }
+ spin_unlock_bh(&cmdq->cmdq_lock);
+}
+
+void hinic3_cmdq_flush_sync_cmd(struct hinic3_hwdev *hwdev)
+{
+ struct hinic3_cmdq *cmdq;
+ u16 wqe_cnt, wqe_idx, i;
+ struct hinic3_wq *wq;
+
+ cmdq = &hwdev->cmdqs->cmdq[HINIC3_CMDQ_SYNC];
+ spin_lock_bh(&cmdq->cmdq_lock);
+ wq = &cmdq->wq;
+ wqe_cnt = hinic3_wq_get_used(wq);
+ for (i = 0; i < wqe_cnt; i++) {
+ wqe_idx = (wq->cons_idx + i) & wq->idx_mask;
+ cmdq_flush_sync_cmd(cmdq->cmd_infos + wqe_idx);
+ }
+ spin_unlock_bh(&cmdq->cmdq_lock);
+}
+
+static void hinic3_cmdq_reset_all_cmd_buf(struct hinic3_cmdq *cmdq)
+{
+ u16 i;
+
+ for (i = 0; i < cmdq->wq.q_depth; i++)
+ cmdq_clear_cmd_buf(&cmdq->cmd_infos[i], cmdq->hwdev);
+}
+
+int hinic3_reinit_cmdq_ctxts(struct hinic3_hwdev *hwdev)
+{
+ struct hinic3_cmdqs *cmdqs = hwdev->cmdqs;
+ u8 cmdq_type;
+
+ for (cmdq_type = 0; cmdq_type < cmdqs->cmdq_num; cmdq_type++) {
+ hinic3_cmdq_flush_cmd(&cmdqs->cmdq[cmdq_type]);
+ hinic3_cmdq_reset_all_cmd_buf(&cmdqs->cmdq[cmdq_type]);
+ cmdqs->cmdq[cmdq_type].wrapped = 1;
+ hinic3_wq_reset(&cmdqs->cmdq[cmdq_type].wq);
+ }
+
+ return hinic3_set_cmdq_ctxts(hwdev);
+}
+
+int hinic3_cmdqs_init(struct hinic3_hwdev *hwdev)
+{
+ struct hinic3_cmdqs *cmdqs;
+ void __iomem *db_base;
+ u8 cmdq_type;
+ int err;
+
+ err = init_cmdqs(hwdev);
+ if (err)
+ goto err_out;
+
+ cmdqs = hwdev->cmdqs;
+ err = create_cmdq_wq(hwdev, cmdqs);
+ if (err)
+ goto err_free_cmdqs;
+
+ err = hinic3_alloc_db_addr(hwdev, &db_base, NULL);
+ if (err) {
+ dev_err(hwdev->dev, "Failed to allocate doorbell address\n");
+ goto err_destroy_cmdq_wq;
+ }
+ cmdqs->cmdqs_db_base = db_base;
+
+ for (cmdq_type = 0; cmdq_type < cmdqs->cmdq_num; cmdq_type++) {
+ err = init_cmdq(&cmdqs->cmdq[cmdq_type], hwdev, cmdq_type);
+ if (err) {
+ dev_err(hwdev->dev,
+ "Failed to initialize cmdq type : %d\n",
+ cmdq_type);
+ goto err_free_cmd_infos;
+ }
+ }
+
+ err = hinic3_set_cmdq_ctxts(hwdev);
+ if (err)
+ goto err_free_cmd_infos;
+
+ return 0;
+
+err_free_cmd_infos:
+ while (cmdq_type > 0) {
+ cmdq_type--;
+ kfree(cmdqs->cmdq[cmdq_type].cmd_infos);
+ }
+
+ hinic3_free_db_addr(hwdev, cmdqs->cmdqs_db_base);
+
+err_destroy_cmdq_wq:
+ destroy_cmdq_wq(hwdev, cmdqs);
+
+err_free_cmdqs:
+ dma_pool_destroy(cmdqs->cmd_buf_pool);
+ kfree(cmdqs);
+
+err_out:
+ return err;
+}
+
+void hinic3_cmdqs_free(struct hinic3_hwdev *hwdev)
+{
+ struct hinic3_cmdqs *cmdqs = hwdev->cmdqs;
+ u8 cmdq_type;
+
+ cmdqs->status &= ~HINIC3_CMDQ_ENABLE;
+
+ for (cmdq_type = 0; cmdq_type < cmdqs->cmdq_num; cmdq_type++) {
+ hinic3_cmdq_flush_cmd(&cmdqs->cmdq[cmdq_type]);
+ hinic3_cmdq_reset_all_cmd_buf(&cmdqs->cmdq[cmdq_type]);
+ kfree(cmdqs->cmdq[cmdq_type].cmd_infos);
+ }
+
+ hinic3_free_db_addr(hwdev, cmdqs->cmdqs_db_base);
+ destroy_cmdq_wq(hwdev, cmdqs);
+ dma_pool_destroy(cmdqs->cmd_buf_pool);
+ kfree(cmdqs);
+}
+
+bool hinic3_cmdq_idle(struct hinic3_cmdq *cmdq)
+{
+ return hinic3_wq_get_used(&cmdq->wq) == 0;
+}
diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_cmdq.h b/drivers/net/ethernet/huawei/hinic3/hinic3_cmdq.h
new file mode 100644
index 000000000000..f99c386a2780
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic3/hinic3_cmdq.h
@@ -0,0 +1,156 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. */
+
+#ifndef _HINIC3_CMDQ_H_
+#define _HINIC3_CMDQ_H_
+
+#include <linux/dmapool.h>
+
+#include "hinic3_hw_intf.h"
+#include "hinic3_wq.h"
+
+#define CMDQ_DEPTH 4096
+
+struct cmdq_db {
+ __le32 db_head;
+ __le32 db_info;
+};
+
+/* hw defined cmdq wqe header */
+struct cmdq_header {
+ __le32 header_info;
+ __le32 saved_data;
+};
+
+struct cmdq_lcmd_bufdesc {
+ struct hinic3_sge sge;
+ __le64 rsvd2;
+ __le64 rsvd3;
+};
+
+struct cmdq_status {
+ __le32 status_info;
+};
+
+struct cmdq_ctrl {
+ __le32 ctrl_info;
+};
+
+struct cmdq_direct_resp {
+ __le64 val;
+ __le64 rsvd;
+};
+
+struct cmdq_completion {
+ union {
+ struct hinic3_sge sge;
+ struct cmdq_direct_resp direct;
+ } resp;
+};
+
+struct cmdq_wqe_scmd {
+ struct cmdq_header header;
+ __le64 rsvd3;
+ struct cmdq_status status;
+ struct cmdq_ctrl ctrl;
+ struct cmdq_completion completion;
+ __le32 rsvd10[6];
+};
+
+struct cmdq_wqe_lcmd {
+ struct cmdq_header header;
+ struct cmdq_status status;
+ struct cmdq_ctrl ctrl;
+ struct cmdq_completion completion;
+ struct cmdq_lcmd_bufdesc buf_desc;
+};
+
+struct cmdq_wqe {
+ union {
+ struct cmdq_wqe_scmd wqe_scmd;
+ struct cmdq_wqe_lcmd wqe_lcmd;
+ };
+};
+
+static_assert(sizeof(struct cmdq_wqe) == 64);
+
+enum hinic3_cmdq_type {
+ HINIC3_CMDQ_SYNC = 0,
+ HINIC3_MAX_CMDQ_TYPES = 4
+};
+
+enum hinic3_cmdq_status {
+ HINIC3_CMDQ_ENABLE = BIT(0),
+};
+
+enum hinic3_cmdq_cmd_type {
+ HINIC3_CMD_TYPE_NONE,
+ HINIC3_CMD_TYPE_DIRECT_RESP,
+ HINIC3_CMD_TYPE_FAKE_TIMEOUT,
+ HINIC3_CMD_TYPE_TIMEOUT,
+ HINIC3_CMD_TYPE_FORCE_STOP,
+};
+
+struct hinic3_cmd_buf {
+ void *buf;
+ dma_addr_t dma_addr;
+ __le16 size;
+ refcount_t ref_cnt;
+};
+
+struct hinic3_cmdq_cmd_info {
+ enum hinic3_cmdq_cmd_type cmd_type;
+ struct completion *done;
+ int *errcode;
+ /* completion code */
+ int *cmpt_code;
+ __le64 *direct_resp;
+ u64 cmdq_msg_id;
+ struct hinic3_cmd_buf *buf_in;
+};
+
+struct hinic3_cmdq {
+ struct hinic3_wq wq;
+ enum hinic3_cmdq_type cmdq_type;
+ u8 wrapped;
+ /* synchronize command submission with completions via event queue */
+ spinlock_t cmdq_lock;
+ struct hinic3_cmdq_cmd_info *cmd_infos;
+ struct hinic3_hwdev *hwdev;
+};
+
+struct hinic3_cmdqs {
+ struct hinic3_hwdev *hwdev;
+ struct hinic3_cmdq cmdq[HINIC3_MAX_CMDQ_TYPES];
+ struct dma_pool *cmd_buf_pool;
+ /* doorbell area */
+ u8 __iomem *cmdqs_db_base;
+
+ /* When command queue uses multiple memory pages (1-level CLA), this
+ * block will hold aggregated indirection table for all command queues
+ * of cmdqs. Not used for small cmdq (0-level CLA).
+ */
+ dma_addr_t wq_block_paddr;
+ void *wq_block_vaddr;
+
+ u32 status;
+ u32 disable_flag;
+ u8 cmdq_num;
+};
+
+int hinic3_cmdqs_init(struct hinic3_hwdev *hwdev);
+void hinic3_cmdqs_free(struct hinic3_hwdev *hwdev);
+
+struct hinic3_cmd_buf *hinic3_alloc_cmd_buf(struct hinic3_hwdev *hwdev);
+void hinic3_free_cmd_buf(struct hinic3_hwdev *hwdev,
+ struct hinic3_cmd_buf *cmd_buf);
+void hinic3_cmdq_ceq_handler(struct hinic3_hwdev *hwdev, __le32 ceqe_data);
+
+int hinic3_cmdq_direct_resp(struct hinic3_hwdev *hwdev, u8 mod, u8 cmd,
+ struct hinic3_cmd_buf *buf_in, __le64 *out_param);
+
+void hinic3_cmdq_flush_sync_cmd(struct hinic3_hwdev *hwdev);
+int hinic3_reinit_cmdq_ctxts(struct hinic3_hwdev *hwdev);
+bool hinic3_cmdq_idle(struct hinic3_cmdq *cmdq);
+
+#endif
diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_common.c b/drivers/net/ethernet/huawei/hinic3/hinic3_common.c
index 0aa42068728c..fe4778d152cf 100644
--- a/drivers/net/ethernet/huawei/hinic3/hinic3_common.c
+++ b/drivers/net/ethernet/huawei/hinic3/hinic3_common.c
@@ -3,6 +3,7 @@
#include <linux/delay.h>
#include <linux/dma-mapping.h>
+#include <linux/iopoll.h>
#include "hinic3_common.h"
@@ -51,3 +52,25 @@ void hinic3_dma_free_coherent_align(struct device *dev,
dma_free_coherent(dev, mem_align->real_size,
mem_align->ori_vaddr, mem_align->ori_paddr);
}
+
+int hinic3_wait_for_timeout(void *priv_data, wait_cpl_handler handler,
+ u32 wait_total_ms, u32 wait_once_us)
+{
+ enum hinic3_wait_return ret;
+ int err;
+
+ err = read_poll_timeout(handler, ret, ret == HINIC3_WAIT_PROCESS_CPL,
+ wait_once_us, wait_total_ms * USEC_PER_MSEC,
+ false, priv_data);
+
+ return err;
+}
+
+/* Data provided to/by cmdq is arranged in structs with little endian fields but
+ * every dword (32bits) should be swapped since HW swaps it again when it
+ * copies it from/to host memory.
+ */
+void hinic3_cmdq_buf_swab32(void *data, int len)
+{
+ swab32_array(data, len / sizeof(u32));
+}
diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_common.h b/drivers/net/ethernet/huawei/hinic3/hinic3_common.h
index bb795dace04c..a8fabfae90fb 100644
--- a/drivers/net/ethernet/huawei/hinic3/hinic3_common.h
+++ b/drivers/net/ethernet/huawei/hinic3/hinic3_common.h
@@ -18,10 +18,37 @@ struct hinic3_dma_addr_align {
dma_addr_t align_paddr;
};
+enum hinic3_wait_return {
+ HINIC3_WAIT_PROCESS_CPL = 0,
+ HINIC3_WAIT_PROCESS_WAITING = 1,
+};
+
+struct hinic3_sge {
+ __le32 hi_addr;
+ __le32 lo_addr;
+ __le32 len;
+ __le32 rsvd;
+};
+
+static inline void hinic3_set_sge(struct hinic3_sge *sge, dma_addr_t addr,
+ __le32 len)
+{
+ sge->hi_addr = cpu_to_le32(upper_32_bits(addr));
+ sge->lo_addr = cpu_to_le32(lower_32_bits(addr));
+ sge->len = len;
+ sge->rsvd = 0;
+}
+
int hinic3_dma_zalloc_coherent_align(struct device *dev, u32 size, u32 align,
gfp_t flag,
struct hinic3_dma_addr_align *mem_align);
void hinic3_dma_free_coherent_align(struct device *dev,
struct hinic3_dma_addr_align *mem_align);
+typedef enum hinic3_wait_return (*wait_cpl_handler)(void *priv_data);
+int hinic3_wait_for_timeout(void *priv_data, wait_cpl_handler handler,
+ u32 wait_total_ms, u32 wait_once_us);
+
+void hinic3_cmdq_buf_swab32(void *data, int len);
+
#endif
diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_csr.h b/drivers/net/ethernet/huawei/hinic3/hinic3_csr.h
new file mode 100644
index 000000000000..e7417e8efa99
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic3/hinic3_csr.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. */
+
+#ifndef _HINIC3_CSR_H_
+#define _HINIC3_CSR_H_
+
+#define HINIC3_CFG_REGS_FLAG 0x40000000
+#define HINIC3_REGS_FLAG_MASK 0x3FFFFFFF
+
+#define HINIC3_VF_CFG_REG_OFFSET 0x2000
+
+/* HW interface registers */
+#define HINIC3_CSR_FUNC_ATTR0_ADDR (HINIC3_CFG_REGS_FLAG + 0x0)
+#define HINIC3_CSR_FUNC_ATTR1_ADDR (HINIC3_CFG_REGS_FLAG + 0x4)
+#define HINIC3_CSR_FUNC_ATTR2_ADDR (HINIC3_CFG_REGS_FLAG + 0x8)
+#define HINIC3_CSR_FUNC_ATTR3_ADDR (HINIC3_CFG_REGS_FLAG + 0xC)
+#define HINIC3_CSR_FUNC_ATTR4_ADDR (HINIC3_CFG_REGS_FLAG + 0x10)
+#define HINIC3_CSR_FUNC_ATTR5_ADDR (HINIC3_CFG_REGS_FLAG + 0x14)
+#define HINIC3_CSR_FUNC_ATTR6_ADDR (HINIC3_CFG_REGS_FLAG + 0x18)
+
+#define HINIC3_FUNC_CSR_MAILBOX_DATA_OFF 0x80
+#define HINIC3_FUNC_CSR_MAILBOX_CONTROL_OFF (HINIC3_CFG_REGS_FLAG + 0x0100)
+#define HINIC3_FUNC_CSR_MAILBOX_INT_OFF (HINIC3_CFG_REGS_FLAG + 0x0104)
+#define HINIC3_FUNC_CSR_MAILBOX_RESULT_H_OFF (HINIC3_CFG_REGS_FLAG + 0x0108)
+#define HINIC3_FUNC_CSR_MAILBOX_RESULT_L_OFF (HINIC3_CFG_REGS_FLAG + 0x010C)
+
+#define HINIC3_CSR_DMA_ATTR_TBL_ADDR (HINIC3_CFG_REGS_FLAG + 0x380)
+#define HINIC3_CSR_DMA_ATTR_INDIR_IDX_ADDR (HINIC3_CFG_REGS_FLAG + 0x390)
+
+/* MSI-X registers */
+#define HINIC3_CSR_FUNC_MSI_CLR_WR_ADDR (HINIC3_CFG_REGS_FLAG + 0x58)
+
+#define HINIC3_MSI_CLR_INDIR_RESEND_TIMER_CLR_MASK BIT(0)
+#define HINIC3_MSI_CLR_INDIR_INT_MSK_SET_MASK BIT(1)
+#define HINIC3_MSI_CLR_INDIR_INT_MSK_CLR_MASK BIT(2)
+#define HINIC3_MSI_CLR_INDIR_AUTO_MSK_SET_MASK BIT(3)
+#define HINIC3_MSI_CLR_INDIR_AUTO_MSK_CLR_MASK BIT(4)
+#define HINIC3_MSI_CLR_INDIR_SIMPLE_INDIR_IDX_MASK GENMASK(31, 22)
+#define HINIC3_MSI_CLR_INDIR_SET(val, member) \
+ FIELD_PREP(HINIC3_MSI_CLR_INDIR_##member##_MASK, val)
+
+/* EQ registers */
+#define HINIC3_AEQ_INDIR_IDX_ADDR (HINIC3_CFG_REGS_FLAG + 0x210)
+#define HINIC3_CEQ_INDIR_IDX_ADDR (HINIC3_CFG_REGS_FLAG + 0x290)
+
+#define HINIC3_EQ_INDIR_IDX_ADDR(type) \
+ ((type == HINIC3_AEQ) ? HINIC3_AEQ_INDIR_IDX_ADDR : \
+ HINIC3_CEQ_INDIR_IDX_ADDR)
+
+#define HINIC3_AEQ_MTT_OFF_BASE_ADDR (HINIC3_CFG_REGS_FLAG + 0x240)
+#define HINIC3_CEQ_MTT_OFF_BASE_ADDR (HINIC3_CFG_REGS_FLAG + 0x2C0)
+
+#define HINIC3_CSR_EQ_PAGE_OFF_STRIDE 8
+
+#define HINIC3_AEQ_HI_PHYS_ADDR_REG(pg_num) \
+ (HINIC3_AEQ_MTT_OFF_BASE_ADDR + (pg_num) * \
+ HINIC3_CSR_EQ_PAGE_OFF_STRIDE)
+
+#define HINIC3_AEQ_LO_PHYS_ADDR_REG(pg_num) \
+ (HINIC3_AEQ_MTT_OFF_BASE_ADDR + (pg_num) * \
+ HINIC3_CSR_EQ_PAGE_OFF_STRIDE + 4)
+
+#define HINIC3_CEQ_HI_PHYS_ADDR_REG(pg_num) \
+ (HINIC3_CEQ_MTT_OFF_BASE_ADDR + (pg_num) * \
+ HINIC3_CSR_EQ_PAGE_OFF_STRIDE)
+
+#define HINIC3_CEQ_LO_PHYS_ADDR_REG(pg_num) \
+ (HINIC3_CEQ_MTT_OFF_BASE_ADDR + (pg_num) * \
+ HINIC3_CSR_EQ_PAGE_OFF_STRIDE + 4)
+
+#define HINIC3_CSR_AEQ_CTRL_0_ADDR (HINIC3_CFG_REGS_FLAG + 0x200)
+#define HINIC3_CSR_AEQ_CTRL_1_ADDR (HINIC3_CFG_REGS_FLAG + 0x204)
+#define HINIC3_CSR_AEQ_PROD_IDX_ADDR (HINIC3_CFG_REGS_FLAG + 0x20C)
+#define HINIC3_CSR_AEQ_CI_SIMPLE_INDIR_ADDR (HINIC3_CFG_REGS_FLAG + 0x50)
+
+#define HINIC3_CSR_CEQ_PROD_IDX_ADDR (HINIC3_CFG_REGS_FLAG + 0x28c)
+#define HINIC3_CSR_CEQ_CI_SIMPLE_INDIR_ADDR (HINIC3_CFG_REGS_FLAG + 0x54)
+
+#endif
diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_eqs.c b/drivers/net/ethernet/huawei/hinic3/hinic3_eqs.c
new file mode 100644
index 000000000000..01686472985b
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic3/hinic3_eqs.c
@@ -0,0 +1,776 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
+
+#include <linux/delay.h>
+
+#include "hinic3_csr.h"
+#include "hinic3_eqs.h"
+#include "hinic3_hwdev.h"
+#include "hinic3_hwif.h"
+#include "hinic3_mbox.h"
+
+#define AEQ_CTRL_0_INTR_IDX_MASK GENMASK(9, 0)
+#define AEQ_CTRL_0_DMA_ATTR_MASK GENMASK(17, 12)
+#define AEQ_CTRL_0_PCI_INTF_IDX_MASK GENMASK(22, 20)
+#define AEQ_CTRL_0_INTR_MODE_MASK BIT(31)
+#define AEQ_CTRL_0_SET(val, member) \
+ FIELD_PREP(AEQ_CTRL_0_##member##_MASK, val)
+
+#define AEQ_CTRL_1_LEN_MASK GENMASK(20, 0)
+#define AEQ_CTRL_1_ELEM_SIZE_MASK GENMASK(25, 24)
+#define AEQ_CTRL_1_PAGE_SIZE_MASK GENMASK(31, 28)
+#define AEQ_CTRL_1_SET(val, member) \
+ FIELD_PREP(AEQ_CTRL_1_##member##_MASK, val)
+
+#define CEQ_CTRL_0_INTR_IDX_MASK GENMASK(9, 0)
+#define CEQ_CTRL_0_DMA_ATTR_MASK GENMASK(17, 12)
+#define CEQ_CTRL_0_LIMIT_KICK_MASK GENMASK(23, 20)
+#define CEQ_CTRL_0_PCI_INTF_IDX_MASK GENMASK(25, 24)
+#define CEQ_CTRL_0_PAGE_SIZE_MASK GENMASK(30, 27)
+#define CEQ_CTRL_0_INTR_MODE_MASK BIT(31)
+#define CEQ_CTRL_0_SET(val, member) \
+ FIELD_PREP(CEQ_CTRL_0_##member##_MASK, val)
+
+#define CEQ_CTRL_1_LEN_MASK GENMASK(19, 0)
+#define CEQ_CTRL_1_SET(val, member) \
+ FIELD_PREP(CEQ_CTRL_1_##member##_MASK, val)
+
+#define CEQE_TYPE_MASK GENMASK(25, 23)
+#define CEQE_TYPE(type) \
+ FIELD_GET(CEQE_TYPE_MASK, le32_to_cpu(type))
+
+#define CEQE_DATA_MASK GENMASK(25, 0)
+#define CEQE_DATA(data) ((data) & cpu_to_le32(CEQE_DATA_MASK))
+
+#define EQ_ELEM_DESC_TYPE_MASK GENMASK(6, 0)
+#define EQ_ELEM_DESC_SRC_MASK BIT(7)
+#define EQ_ELEM_DESC_SIZE_MASK GENMASK(15, 8)
+#define EQ_ELEM_DESC_WRAPPED_MASK BIT(31)
+#define EQ_ELEM_DESC_GET(val, member) \
+ FIELD_GET(EQ_ELEM_DESC_##member##_MASK, le32_to_cpu(val))
+
+#define EQ_CI_SIMPLE_INDIR_CI_MASK GENMASK(20, 0)
+#define EQ_CI_SIMPLE_INDIR_ARMED_MASK BIT(21)
+#define EQ_CI_SIMPLE_INDIR_AEQ_IDX_MASK GENMASK(31, 30)
+#define EQ_CI_SIMPLE_INDIR_CEQ_IDX_MASK GENMASK(31, 24)
+#define EQ_CI_SIMPLE_INDIR_SET(val, member) \
+ FIELD_PREP(EQ_CI_SIMPLE_INDIR_##member##_MASK, val)
+
+#define EQ_CI_SIMPLE_INDIR_REG_ADDR(eq) \
+ (((eq)->type == HINIC3_AEQ) ? \
+ HINIC3_CSR_AEQ_CI_SIMPLE_INDIR_ADDR : \
+ HINIC3_CSR_CEQ_CI_SIMPLE_INDIR_ADDR)
+
+#define EQ_PROD_IDX_REG_ADDR(eq) \
+ (((eq)->type == HINIC3_AEQ) ? \
+ HINIC3_CSR_AEQ_PROD_IDX_ADDR : HINIC3_CSR_CEQ_PROD_IDX_ADDR)
+
+#define EQ_HI_PHYS_ADDR_REG(type, pg_num) \
+ (((type) == HINIC3_AEQ) ? \
+ HINIC3_AEQ_HI_PHYS_ADDR_REG(pg_num) : \
+ HINIC3_CEQ_HI_PHYS_ADDR_REG(pg_num))
+
+#define EQ_LO_PHYS_ADDR_REG(type, pg_num) \
+ (((type) == HINIC3_AEQ) ? \
+ HINIC3_AEQ_LO_PHYS_ADDR_REG(pg_num) : \
+ HINIC3_CEQ_LO_PHYS_ADDR_REG(pg_num))
+
+#define EQ_MSIX_RESEND_TIMER_CLEAR 1
+
+#define HINIC3_EQ_MAX_PAGES(eq) \
+ ((eq)->type == HINIC3_AEQ ? \
+ HINIC3_AEQ_MAX_PAGES : HINIC3_CEQ_MAX_PAGES)
+
+#define HINIC3_TASK_PROCESS_EQE_LIMIT 1024
+#define HINIC3_EQ_UPDATE_CI_STEP 64
+#define HINIC3_EQS_WQ_NAME "hinic3_eqs"
+
+#define HINIC3_EQ_VALID_SHIFT 31
+#define HINIC3_EQ_WRAPPED(eq) \
+ ((eq)->wrapped << HINIC3_EQ_VALID_SHIFT)
+
+#define HINIC3_EQ_WRAPPED_SHIFT 20
+#define HINIC3_EQ_CONS_IDX(eq) \
+ ((eq)->cons_idx | ((eq)->wrapped << HINIC3_EQ_WRAPPED_SHIFT))
+
+static const struct hinic3_aeq_elem *get_curr_aeq_elem(const struct hinic3_eq *eq)
+{
+ return get_q_element(&eq->qpages, eq->cons_idx, NULL);
+}
+
+static const __be32 *get_curr_ceq_elem(const struct hinic3_eq *eq)
+{
+ return get_q_element(&eq->qpages, eq->cons_idx, NULL);
+}
+
+int hinic3_aeq_register_cb(struct hinic3_hwdev *hwdev,
+ enum hinic3_aeq_type event,
+ hinic3_aeq_event_cb hwe_cb)
+{
+ struct hinic3_aeqs *aeqs;
+
+ aeqs = hwdev->aeqs;
+ aeqs->aeq_cb[event] = hwe_cb;
+ spin_lock_init(&aeqs->aeq_lock);
+
+ return 0;
+}
+
+void hinic3_aeq_unregister_cb(struct hinic3_hwdev *hwdev,
+ enum hinic3_aeq_type event)
+{
+ struct hinic3_aeqs *aeqs;
+
+ aeqs = hwdev->aeqs;
+
+ spin_lock_bh(&aeqs->aeq_lock);
+ aeqs->aeq_cb[event] = NULL;
+ spin_unlock_bh(&aeqs->aeq_lock);
+}
+
+int hinic3_ceq_register_cb(struct hinic3_hwdev *hwdev,
+ enum hinic3_ceq_event event,
+ hinic3_ceq_event_cb callback)
+{
+ struct hinic3_ceqs *ceqs;
+
+ ceqs = hwdev->ceqs;
+ ceqs->ceq_cb[event] = callback;
+ spin_lock_init(&ceqs->ceq_lock);
+
+ return 0;
+}
+
+void hinic3_ceq_unregister_cb(struct hinic3_hwdev *hwdev,
+ enum hinic3_ceq_event event)
+{
+ struct hinic3_ceqs *ceqs;
+
+ ceqs = hwdev->ceqs;
+
+ spin_lock_bh(&ceqs->ceq_lock);
+ ceqs->ceq_cb[event] = NULL;
+ spin_unlock_bh(&ceqs->ceq_lock);
+}
+
+/* Set consumer index in the hw. */
+static void set_eq_cons_idx(struct hinic3_eq *eq, u32 arm_state)
+{
+ u32 addr = EQ_CI_SIMPLE_INDIR_REG_ADDR(eq);
+ u32 eq_wrap_ci, val;
+
+ eq_wrap_ci = HINIC3_EQ_CONS_IDX(eq);
+ val = EQ_CI_SIMPLE_INDIR_SET(arm_state, ARMED);
+ if (eq->type == HINIC3_AEQ) {
+ val = val |
+ EQ_CI_SIMPLE_INDIR_SET(eq_wrap_ci, CI) |
+ EQ_CI_SIMPLE_INDIR_SET(eq->q_id, AEQ_IDX);
+ } else {
+ val = val |
+ EQ_CI_SIMPLE_INDIR_SET(eq_wrap_ci, CI) |
+ EQ_CI_SIMPLE_INDIR_SET(eq->q_id, CEQ_IDX);
+ }
+
+ hinic3_hwif_write_reg(eq->hwdev->hwif, addr, val);
+}
+
+static struct hinic3_ceqs *ceq_to_ceqs(const struct hinic3_eq *eq)
+{
+ return container_of(eq, struct hinic3_ceqs, ceq[eq->q_id]);
+}
+
+static void ceq_event_handler(struct hinic3_ceqs *ceqs, __le32 ceqe)
+{
+ enum hinic3_ceq_event event = CEQE_TYPE(ceqe);
+ struct hinic3_hwdev *hwdev = ceqs->hwdev;
+ __le32 ceqe_data = CEQE_DATA(ceqe);
+
+ if (event >= HINIC3_MAX_CEQ_EVENTS) {
+ dev_warn(hwdev->dev, "Ceq unknown event:%d, ceqe data: 0x%x\n",
+ event, ceqe_data);
+ return;
+ }
+
+ spin_lock_bh(&ceqs->ceq_lock);
+ if (ceqs->ceq_cb[event])
+ ceqs->ceq_cb[event](hwdev, ceqe_data);
+
+ spin_unlock_bh(&ceqs->ceq_lock);
+}
+
+static struct hinic3_aeqs *aeq_to_aeqs(const struct hinic3_eq *eq)
+{
+ return container_of(eq, struct hinic3_aeqs, aeq[eq->q_id]);
+}
+
+static void aeq_event_handler(struct hinic3_aeqs *aeqs, __le32 aeqe,
+ const struct hinic3_aeq_elem *aeqe_pos)
+{
+ struct hinic3_hwdev *hwdev = aeqs->hwdev;
+ u8 data[HINIC3_AEQE_DATA_SIZE], size;
+ enum hinic3_aeq_type event;
+ hinic3_aeq_event_cb hwe_cb;
+
+ if (EQ_ELEM_DESC_GET(aeqe, SRC))
+ return;
+
+ event = EQ_ELEM_DESC_GET(aeqe, TYPE);
+ if (event >= HINIC3_MAX_AEQ_EVENTS) {
+ dev_warn(hwdev->dev, "Aeq unknown event:%d\n", event);
+ return;
+ }
+
+ memcpy(data, aeqe_pos->aeqe_data, HINIC3_AEQE_DATA_SIZE);
+ swab32_array((u32 *)data, HINIC3_AEQE_DATA_SIZE / sizeof(u32));
+ size = EQ_ELEM_DESC_GET(aeqe, SIZE);
+
+ spin_lock_bh(&aeqs->aeq_lock);
+ hwe_cb = aeqs->aeq_cb[event];
+ if (hwe_cb)
+ hwe_cb(aeqs->hwdev, data, size);
+ spin_unlock_bh(&aeqs->aeq_lock);
+}
+
+static int aeq_irq_handler(struct hinic3_eq *eq)
+{
+ const struct hinic3_aeq_elem *aeqe_pos;
+ struct hinic3_aeqs *aeqs;
+ u32 i, eqe_cnt = 0;
+ __le32 aeqe;
+
+ aeqs = aeq_to_aeqs(eq);
+ for (i = 0; i < HINIC3_TASK_PROCESS_EQE_LIMIT; i++) {
+ aeqe_pos = get_curr_aeq_elem(eq);
+ aeqe = (__force __le32)swab32((__force __u32)aeqe_pos->desc);
+ /* HW updates wrapped bit, when it adds eq element event */
+ if (EQ_ELEM_DESC_GET(aeqe, WRAPPED) == eq->wrapped)
+ return 0;
+
+ /* Prevent speculative reads from element */
+ dma_rmb();
+ aeq_event_handler(aeqs, aeqe, aeqe_pos);
+ eq->cons_idx++;
+ if (eq->cons_idx == eq->eq_len) {
+ eq->cons_idx = 0;
+ eq->wrapped = !eq->wrapped;
+ }
+
+ if (++eqe_cnt >= HINIC3_EQ_UPDATE_CI_STEP) {
+ eqe_cnt = 0;
+ set_eq_cons_idx(eq, HINIC3_EQ_NOT_ARMED);
+ }
+ }
+
+ return -EAGAIN;
+}
+
+static int ceq_irq_handler(struct hinic3_eq *eq)
+{
+ struct hinic3_ceqs *ceqs;
+ u32 eqe_cnt = 0;
+ __be32 ceqe_raw;
+ __le32 ceqe;
+ u32 i;
+
+ ceqs = ceq_to_ceqs(eq);
+ for (i = 0; i < HINIC3_TASK_PROCESS_EQE_LIMIT; i++) {
+ ceqe_raw = *get_curr_ceq_elem(eq);
+ ceqe = (__force __le32)swab32((__force __u32)ceqe_raw);
+
+ /* HW updates wrapped bit, when it adds eq element event */
+ if (EQ_ELEM_DESC_GET(ceqe, WRAPPED) == eq->wrapped)
+ return 0;
+
+ ceq_event_handler(ceqs, ceqe);
+ eq->cons_idx++;
+ if (eq->cons_idx == eq->eq_len) {
+ eq->cons_idx = 0;
+ eq->wrapped = !eq->wrapped;
+ }
+
+ if (++eqe_cnt >= HINIC3_EQ_UPDATE_CI_STEP) {
+ eqe_cnt = 0;
+ set_eq_cons_idx(eq, HINIC3_EQ_NOT_ARMED);
+ }
+ }
+
+ return -EAGAIN;
+}
+
+static void reschedule_aeq_handler(struct hinic3_eq *eq)
+{
+ struct hinic3_aeqs *aeqs = aeq_to_aeqs(eq);
+
+ queue_work(aeqs->workq, &eq->aeq_work);
+}
+
+static int eq_irq_handler(struct hinic3_eq *eq)
+{
+ int err;
+
+ if (eq->type == HINIC3_AEQ)
+ err = aeq_irq_handler(eq);
+ else
+ err = ceq_irq_handler(eq);
+
+ set_eq_cons_idx(eq, err ? HINIC3_EQ_NOT_ARMED :
+ HINIC3_EQ_ARMED);
+
+ return err;
+}
+
+static void aeq_irq_work(struct work_struct *work)
+{
+ struct hinic3_eq *eq = container_of(work, struct hinic3_eq, aeq_work);
+ int err;
+
+ err = eq_irq_handler(eq);
+ if (err)
+ reschedule_aeq_handler(eq);
+}
+
+static irqreturn_t aeq_interrupt(int irq, void *data)
+{
+ struct workqueue_struct *workq;
+ struct hinic3_eq *aeq = data;
+ struct hinic3_hwdev *hwdev;
+ struct hinic3_aeqs *aeqs;
+
+ aeqs = aeq_to_aeqs(aeq);
+ hwdev = aeq->hwdev;
+
+ /* clear resend timer cnt register */
+ workq = aeqs->workq;
+ hinic3_msix_intr_clear_resend_bit(hwdev, aeq->msix_entry_idx,
+ EQ_MSIX_RESEND_TIMER_CLEAR);
+ queue_work(workq, &aeq->aeq_work);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t ceq_interrupt(int irq, void *data)
+{
+ struct hinic3_eq *ceq = data;
+ int err;
+
+ /* clear resend timer counters */
+ hinic3_msix_intr_clear_resend_bit(ceq->hwdev, ceq->msix_entry_idx,
+ EQ_MSIX_RESEND_TIMER_CLEAR);
+ err = eq_irq_handler(ceq);
+ if (err)
+ return IRQ_NONE;
+
+ return IRQ_HANDLED;
+}
+
+static int hinic3_set_ceq_ctrl_reg(struct hinic3_hwdev *hwdev, u16 q_id,
+ u32 ctrl0, u32 ctrl1)
+{
+ struct comm_cmd_set_ceq_ctrl_reg ceq_ctrl = {};
+ struct mgmt_msg_params msg_params = {};
+ int err;
+
+ ceq_ctrl.func_id = hinic3_global_func_id(hwdev);
+ ceq_ctrl.q_id = q_id;
+ ceq_ctrl.ctrl0 = ctrl0;
+ ceq_ctrl.ctrl1 = ctrl1;
+
+ mgmt_msg_params_init_default(&msg_params, &ceq_ctrl, sizeof(ceq_ctrl));
+
+ err = hinic3_send_mbox_to_mgmt(hwdev, MGMT_MOD_COMM,
+ COMM_CMD_SET_CEQ_CTRL_REG, &msg_params);
+ if (err || ceq_ctrl.head.status) {
+ dev_err(hwdev->dev, "Failed to set ceq %u ctrl reg, err: %d status: 0x%x\n",
+ q_id, err, ceq_ctrl.head.status);
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+static int set_eq_ctrls(struct hinic3_eq *eq)
+{
+ struct hinic3_hwif *hwif = eq->hwdev->hwif;
+ struct hinic3_queue_pages *qpages;
+ u8 pci_intf_idx, elem_size;
+ u32 mask, ctrl0, ctrl1;
+ u32 page_size_val;
+ int err;
+
+ qpages = &eq->qpages;
+ page_size_val = ilog2(qpages->page_size / HINIC3_MIN_PAGE_SIZE);
+ pci_intf_idx = hwif->attr.pci_intf_idx;
+
+ if (eq->type == HINIC3_AEQ) {
+ /* set ctrl0 using read-modify-write */
+ mask = AEQ_CTRL_0_INTR_IDX_MASK |
+ AEQ_CTRL_0_DMA_ATTR_MASK |
+ AEQ_CTRL_0_PCI_INTF_IDX_MASK |
+ AEQ_CTRL_0_INTR_MODE_MASK;
+ ctrl0 = hinic3_hwif_read_reg(hwif, HINIC3_CSR_AEQ_CTRL_0_ADDR);
+ ctrl0 = (ctrl0 & ~mask) |
+ AEQ_CTRL_0_SET(eq->msix_entry_idx, INTR_IDX) |
+ AEQ_CTRL_0_SET(0, DMA_ATTR) |
+ AEQ_CTRL_0_SET(pci_intf_idx, PCI_INTF_IDX) |
+ AEQ_CTRL_0_SET(HINIC3_INTR_MODE_ARMED, INTR_MODE);
+ hinic3_hwif_write_reg(hwif, HINIC3_CSR_AEQ_CTRL_0_ADDR, ctrl0);
+
+ /* HW expects log2(number of 32 byte units). */
+ elem_size = qpages->elem_size_shift - 5;
+ ctrl1 = AEQ_CTRL_1_SET(eq->eq_len, LEN) |
+ AEQ_CTRL_1_SET(elem_size, ELEM_SIZE) |
+ AEQ_CTRL_1_SET(page_size_val, PAGE_SIZE);
+ hinic3_hwif_write_reg(hwif, HINIC3_CSR_AEQ_CTRL_1_ADDR, ctrl1);
+ } else {
+ ctrl0 = CEQ_CTRL_0_SET(eq->msix_entry_idx, INTR_IDX) |
+ CEQ_CTRL_0_SET(0, DMA_ATTR) |
+ CEQ_CTRL_0_SET(0, LIMIT_KICK) |
+ CEQ_CTRL_0_SET(pci_intf_idx, PCI_INTF_IDX) |
+ CEQ_CTRL_0_SET(page_size_val, PAGE_SIZE) |
+ CEQ_CTRL_0_SET(HINIC3_INTR_MODE_ARMED, INTR_MODE);
+
+ ctrl1 = CEQ_CTRL_1_SET(eq->eq_len, LEN);
+
+ /* set ceq ctrl reg through mgmt cpu */
+ err = hinic3_set_ceq_ctrl_reg(eq->hwdev, eq->q_id, ctrl0,
+ ctrl1);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static void ceq_elements_init(struct hinic3_eq *eq, u32 init_val)
+{
+ __be32 *ceqe;
+ u32 i;
+
+ for (i = 0; i < eq->eq_len; i++) {
+ ceqe = get_q_element(&eq->qpages, i, NULL);
+ *ceqe = cpu_to_be32(init_val);
+ }
+
+ wmb(); /* Clear ceq elements bit */
+}
+
+static void aeq_elements_init(struct hinic3_eq *eq, u32 init_val)
+{
+ struct hinic3_aeq_elem *aeqe;
+ u32 i;
+
+ for (i = 0; i < eq->eq_len; i++) {
+ aeqe = get_q_element(&eq->qpages, i, NULL);
+ aeqe->desc = cpu_to_be32(init_val);
+ }
+
+ wmb(); /* Clear aeq elements bit */
+}
+
+static void eq_elements_init(struct hinic3_eq *eq, u32 init_val)
+{
+ if (eq->type == HINIC3_AEQ)
+ aeq_elements_init(eq, init_val);
+ else
+ ceq_elements_init(eq, init_val);
+}
+
+static int alloc_eq_pages(struct hinic3_eq *eq)
+{
+ struct hinic3_hwif *hwif = eq->hwdev->hwif;
+ struct hinic3_queue_pages *qpages;
+ dma_addr_t page_paddr;
+ u32 reg, init_val;
+ u16 pg_idx;
+ int err;
+
+ qpages = &eq->qpages;
+ err = hinic3_queue_pages_alloc(eq->hwdev, qpages, HINIC3_MIN_PAGE_SIZE);
+ if (err)
+ return err;
+
+ for (pg_idx = 0; pg_idx < qpages->num_pages; pg_idx++) {
+ page_paddr = qpages->pages[pg_idx].align_paddr;
+ reg = EQ_HI_PHYS_ADDR_REG(eq->type, pg_idx);
+ hinic3_hwif_write_reg(hwif, reg, upper_32_bits(page_paddr));
+ reg = EQ_LO_PHYS_ADDR_REG(eq->type, pg_idx);
+ hinic3_hwif_write_reg(hwif, reg, lower_32_bits(page_paddr));
+ }
+
+ init_val = HINIC3_EQ_WRAPPED(eq);
+ eq_elements_init(eq, init_val);
+
+ return 0;
+}
+
+static void eq_calc_page_size_and_num(struct hinic3_eq *eq, u32 elem_size)
+{
+ u32 max_pages, min_page_size, page_size, total_size;
+
+ /* No need for complicated arithmetic. All values must be power of 2.
+ * Multiplications give power of 2 and divisions give power of 2 without
+ * remainder.
+ */
+ max_pages = HINIC3_EQ_MAX_PAGES(eq);
+ min_page_size = HINIC3_MIN_PAGE_SIZE;
+ total_size = eq->eq_len * elem_size;
+
+ if (total_size <= max_pages * min_page_size)
+ page_size = min_page_size;
+ else
+ page_size = total_size / max_pages;
+
+ hinic3_queue_pages_init(&eq->qpages, eq->eq_len, page_size, elem_size);
+}
+
+static int request_eq_irq(struct hinic3_eq *eq)
+{
+ int err;
+
+ if (eq->type == HINIC3_AEQ) {
+ INIT_WORK(&eq->aeq_work, aeq_irq_work);
+ snprintf(eq->irq_name, sizeof(eq->irq_name),
+ "hinic3_aeq%u@pci:%s", eq->q_id,
+ pci_name(eq->hwdev->pdev));
+ err = request_irq(eq->irq_id, aeq_interrupt, 0,
+ eq->irq_name, eq);
+ } else {
+ snprintf(eq->irq_name, sizeof(eq->irq_name),
+ "hinic3_ceq%u@pci:%s", eq->q_id,
+ pci_name(eq->hwdev->pdev));
+ err = request_threaded_irq(eq->irq_id, NULL, ceq_interrupt,
+ IRQF_ONESHOT, eq->irq_name, eq);
+ }
+
+ return err;
+}
+
+static void reset_eq(struct hinic3_eq *eq)
+{
+ /* clear eq_len to force eqe drop in hardware */
+ if (eq->type == HINIC3_AEQ)
+ hinic3_hwif_write_reg(eq->hwdev->hwif,
+ HINIC3_CSR_AEQ_CTRL_1_ADDR, 0);
+ else
+ hinic3_set_ceq_ctrl_reg(eq->hwdev, eq->q_id, 0, 0);
+
+ hinic3_hwif_write_reg(eq->hwdev->hwif, EQ_PROD_IDX_REG_ADDR(eq), 0);
+}
+
+static int init_eq(struct hinic3_eq *eq, struct hinic3_hwdev *hwdev, u16 q_id,
+ u32 q_len, enum hinic3_eq_type type,
+ struct msix_entry *msix_entry)
+{
+ u32 elem_size;
+ int err;
+
+ eq->hwdev = hwdev;
+ eq->q_id = q_id;
+ eq->type = type;
+ eq->eq_len = q_len;
+
+ /* Indirect access should set q_id first */
+ hinic3_hwif_write_reg(hwdev->hwif, HINIC3_EQ_INDIR_IDX_ADDR(eq->type),
+ eq->q_id);
+
+ reset_eq(eq);
+
+ eq->cons_idx = 0;
+ eq->wrapped = 0;
+
+ elem_size = (type == HINIC3_AEQ) ? HINIC3_AEQE_SIZE : HINIC3_CEQE_SIZE;
+ eq_calc_page_size_and_num(eq, elem_size);
+
+ err = alloc_eq_pages(eq);
+ if (err) {
+ dev_err(hwdev->dev, "Failed to allocate pages for eq\n");
+ return err;
+ }
+
+ eq->msix_entry_idx = msix_entry->entry;
+ eq->irq_id = msix_entry->vector;
+
+ err = set_eq_ctrls(eq);
+ if (err) {
+ dev_err(hwdev->dev, "Failed to set ctrls for eq\n");
+ goto err_free_queue_pages;
+ }
+
+ set_eq_cons_idx(eq, HINIC3_EQ_ARMED);
+
+ err = request_eq_irq(eq);
+ if (err) {
+ dev_err(hwdev->dev,
+ "Failed to request irq for the eq, err: %d\n", err);
+ goto err_free_queue_pages;
+ }
+
+ hinic3_set_msix_state(hwdev, eq->msix_entry_idx, HINIC3_MSIX_DISABLE);
+
+ return 0;
+
+err_free_queue_pages:
+ hinic3_queue_pages_free(hwdev, &eq->qpages);
+
+ return err;
+}
+
+static void remove_eq(struct hinic3_eq *eq)
+{
+ hinic3_set_msix_state(eq->hwdev, eq->msix_entry_idx,
+ HINIC3_MSIX_DISABLE);
+ free_irq(eq->irq_id, eq);
+ /* Indirect access should set q_id first */
+ hinic3_hwif_write_reg(eq->hwdev->hwif,
+ HINIC3_EQ_INDIR_IDX_ADDR(eq->type),
+ eq->q_id);
+
+ if (eq->type == HINIC3_AEQ) {
+ disable_work_sync(&eq->aeq_work);
+ /* clear eq_len to avoid hw access host memory */
+ hinic3_hwif_write_reg(eq->hwdev->hwif,
+ HINIC3_CSR_AEQ_CTRL_1_ADDR, 0);
+ } else {
+ hinic3_set_ceq_ctrl_reg(eq->hwdev, eq->q_id, 0, 0);
+ }
+
+ /* update consumer index to avoid invalid interrupt */
+ eq->cons_idx = hinic3_hwif_read_reg(eq->hwdev->hwif,
+ EQ_PROD_IDX_REG_ADDR(eq));
+ set_eq_cons_idx(eq, HINIC3_EQ_NOT_ARMED);
+ hinic3_queue_pages_free(eq->hwdev, &eq->qpages);
+}
+
+int hinic3_aeqs_init(struct hinic3_hwdev *hwdev, u16 num_aeqs,
+ struct msix_entry *msix_entries)
+{
+ struct hinic3_aeqs *aeqs;
+ u16 q_id;
+ int err;
+
+ aeqs = kzalloc(sizeof(*aeqs), GFP_KERNEL);
+ if (!aeqs)
+ return -ENOMEM;
+
+ hwdev->aeqs = aeqs;
+ aeqs->hwdev = hwdev;
+ aeqs->num_aeqs = num_aeqs;
+ aeqs->workq = alloc_workqueue(HINIC3_EQS_WQ_NAME, WQ_MEM_RECLAIM,
+ HINIC3_MAX_AEQS);
+ if (!aeqs->workq) {
+ dev_err(hwdev->dev, "Failed to initialize aeq workqueue\n");
+ err = -ENOMEM;
+ goto err_free_aeqs;
+ }
+
+ for (q_id = 0; q_id < num_aeqs; q_id++) {
+ err = init_eq(&aeqs->aeq[q_id], hwdev, q_id,
+ HINIC3_DEFAULT_AEQ_LEN, HINIC3_AEQ,
+ &msix_entries[q_id]);
+ if (err) {
+ dev_err(hwdev->dev, "Failed to init aeq %u\n",
+ q_id);
+ goto err_remove_eqs;
+ }
+ }
+ for (q_id = 0; q_id < num_aeqs; q_id++)
+ hinic3_set_msix_state(hwdev, aeqs->aeq[q_id].msix_entry_idx,
+ HINIC3_MSIX_ENABLE);
+
+ return 0;
+
+err_remove_eqs:
+ while (q_id > 0) {
+ q_id--;
+ remove_eq(&aeqs->aeq[q_id]);
+ }
+
+ destroy_workqueue(aeqs->workq);
+
+err_free_aeqs:
+ kfree(aeqs);
+
+ return err;
+}
+
+void hinic3_aeqs_free(struct hinic3_hwdev *hwdev)
+{
+ struct hinic3_aeqs *aeqs = hwdev->aeqs;
+ enum hinic3_aeq_type aeq_event;
+ struct hinic3_eq *eq;
+ u16 q_id;
+
+ for (q_id = 0; q_id < aeqs->num_aeqs; q_id++) {
+ eq = aeqs->aeq + q_id;
+ remove_eq(eq);
+ hinic3_free_irq(hwdev, eq->irq_id);
+ }
+
+ for (aeq_event = 0; aeq_event < HINIC3_MAX_AEQ_EVENTS; aeq_event++)
+ hinic3_aeq_unregister_cb(hwdev, aeq_event);
+
+ destroy_workqueue(aeqs->workq);
+
+ kfree(aeqs);
+}
+
+int hinic3_ceqs_init(struct hinic3_hwdev *hwdev, u16 num_ceqs,
+ struct msix_entry *msix_entries)
+{
+ struct hinic3_ceqs *ceqs;
+ u16 q_id;
+ int err;
+
+ ceqs = kzalloc(sizeof(*ceqs), GFP_KERNEL);
+ if (!ceqs)
+ return -ENOMEM;
+
+ hwdev->ceqs = ceqs;
+ ceqs->hwdev = hwdev;
+ ceqs->num_ceqs = num_ceqs;
+
+ for (q_id = 0; q_id < num_ceqs; q_id++) {
+ err = init_eq(&ceqs->ceq[q_id], hwdev, q_id,
+ HINIC3_DEFAULT_CEQ_LEN, HINIC3_CEQ,
+ &msix_entries[q_id]);
+ if (err) {
+ dev_err(hwdev->dev, "Failed to init ceq %u\n",
+ q_id);
+ goto err_free_ceqs;
+ }
+ }
+ for (q_id = 0; q_id < num_ceqs; q_id++)
+ hinic3_set_msix_state(hwdev, ceqs->ceq[q_id].msix_entry_idx,
+ HINIC3_MSIX_ENABLE);
+
+ return 0;
+
+err_free_ceqs:
+ while (q_id > 0) {
+ q_id--;
+ remove_eq(&ceqs->ceq[q_id]);
+ }
+
+ kfree(ceqs);
+
+ return err;
+}
+
+void hinic3_ceqs_free(struct hinic3_hwdev *hwdev)
+{
+ struct hinic3_ceqs *ceqs = hwdev->ceqs;
+ enum hinic3_ceq_event ceq_event;
+ struct hinic3_eq *eq;
+ u16 q_id;
+
+ for (q_id = 0; q_id < ceqs->num_ceqs; q_id++) {
+ eq = ceqs->ceq + q_id;
+ remove_eq(eq);
+ hinic3_free_irq(hwdev, eq->irq_id);
+ }
+
+ for (ceq_event = 0; ceq_event < HINIC3_MAX_CEQ_EVENTS; ceq_event++)
+ hinic3_ceq_unregister_cb(hwdev, ceq_event);
+
+ kfree(ceqs);
+}
diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_eqs.h b/drivers/net/ethernet/huawei/hinic3/hinic3_eqs.h
new file mode 100644
index 000000000000..005a6e0745b3
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic3/hinic3_eqs.h
@@ -0,0 +1,122 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. */
+
+#ifndef _HINIC3_EQS_H_
+#define _HINIC3_EQS_H_
+
+#include <linux/interrupt.h>
+
+#include "hinic3_hw_cfg.h"
+#include "hinic3_queue_common.h"
+
+#define HINIC3_MAX_AEQS 4
+#define HINIC3_MAX_CEQS 32
+
+#define HINIC3_AEQ_MAX_PAGES 4
+#define HINIC3_CEQ_MAX_PAGES 8
+
+#define HINIC3_AEQE_SIZE 64
+#define HINIC3_CEQE_SIZE 4
+
+#define HINIC3_AEQE_DESC_SIZE 4
+#define HINIC3_AEQE_DATA_SIZE (HINIC3_AEQE_SIZE - HINIC3_AEQE_DESC_SIZE)
+
+#define HINIC3_DEFAULT_AEQ_LEN 0x10000
+#define HINIC3_DEFAULT_CEQ_LEN 0x10000
+
+#define HINIC3_EQ_IRQ_NAME_LEN 64
+
+#define HINIC3_EQ_USLEEP_LOW_BOUND 900
+#define HINIC3_EQ_USLEEP_HIGH_BOUND 1000
+
+enum hinic3_eq_type {
+ HINIC3_AEQ = 0,
+ HINIC3_CEQ = 1,
+};
+
+enum hinic3_eq_intr_mode {
+ HINIC3_INTR_MODE_ARMED = 0,
+ HINIC3_INTR_MODE_ALWAYS = 1,
+};
+
+enum hinic3_eq_ci_arm_state {
+ HINIC3_EQ_NOT_ARMED = 0,
+ HINIC3_EQ_ARMED = 1,
+};
+
+struct hinic3_eq {
+ struct hinic3_hwdev *hwdev;
+ struct hinic3_queue_pages qpages;
+ u16 q_id;
+ enum hinic3_eq_type type;
+ u32 eq_len;
+ u32 cons_idx;
+ u8 wrapped;
+ u32 irq_id;
+ u16 msix_entry_idx;
+ char irq_name[HINIC3_EQ_IRQ_NAME_LEN];
+ struct work_struct aeq_work;
+};
+
+struct hinic3_aeq_elem {
+ u8 aeqe_data[HINIC3_AEQE_DATA_SIZE];
+ __be32 desc;
+};
+
+enum hinic3_aeq_type {
+ HINIC3_HW_INTER_INT = 0,
+ HINIC3_MBX_FROM_FUNC = 1,
+ HINIC3_MSG_FROM_FW = 2,
+ HINIC3_MAX_AEQ_EVENTS = 6,
+};
+
+typedef void (*hinic3_aeq_event_cb)(struct hinic3_hwdev *hwdev, u8 *data,
+ u8 size);
+
+struct hinic3_aeqs {
+ struct hinic3_hwdev *hwdev;
+ hinic3_aeq_event_cb aeq_cb[HINIC3_MAX_AEQ_EVENTS];
+ struct hinic3_eq aeq[HINIC3_MAX_AEQS];
+ u16 num_aeqs;
+ struct workqueue_struct *workq;
+ /* lock for aeq event flag */
+ spinlock_t aeq_lock;
+};
+
+enum hinic3_ceq_event {
+ HINIC3_CMDQ = 3,
+ HINIC3_MAX_CEQ_EVENTS = 6,
+};
+
+typedef void (*hinic3_ceq_event_cb)(struct hinic3_hwdev *hwdev,
+ __le32 ceqe_data);
+
+struct hinic3_ceqs {
+ struct hinic3_hwdev *hwdev;
+
+ hinic3_ceq_event_cb ceq_cb[HINIC3_MAX_CEQ_EVENTS];
+
+ struct hinic3_eq ceq[HINIC3_MAX_CEQS];
+ u16 num_ceqs;
+ /* lock for ceq event flag */
+ spinlock_t ceq_lock;
+};
+
+int hinic3_aeqs_init(struct hinic3_hwdev *hwdev, u16 num_aeqs,
+ struct msix_entry *msix_entries);
+void hinic3_aeqs_free(struct hinic3_hwdev *hwdev);
+int hinic3_aeq_register_cb(struct hinic3_hwdev *hwdev,
+ enum hinic3_aeq_type event,
+ hinic3_aeq_event_cb hwe_cb);
+void hinic3_aeq_unregister_cb(struct hinic3_hwdev *hwdev,
+ enum hinic3_aeq_type event);
+int hinic3_ceqs_init(struct hinic3_hwdev *hwdev, u16 num_ceqs,
+ struct msix_entry *msix_entries);
+void hinic3_ceqs_free(struct hinic3_hwdev *hwdev);
+int hinic3_ceq_register_cb(struct hinic3_hwdev *hwdev,
+ enum hinic3_ceq_event event,
+ hinic3_ceq_event_cb callback);
+void hinic3_ceq_unregister_cb(struct hinic3_hwdev *hwdev,
+ enum hinic3_ceq_event event);
+
+#endif
diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_hw_cfg.c b/drivers/net/ethernet/huawei/hinic3/hinic3_hw_cfg.c
index 87d9450c30ca..0599fc4f3fb0 100644
--- a/drivers/net/ethernet/huawei/hinic3/hinic3_hw_cfg.c
+++ b/drivers/net/ethernet/huawei/hinic3/hinic3_hw_cfg.c
@@ -8,6 +8,49 @@
#include "hinic3_hwif.h"
#include "hinic3_mbox.h"
+int hinic3_alloc_irqs(struct hinic3_hwdev *hwdev, u16 num,
+ struct msix_entry *alloc_arr, u16 *act_num)
+{
+ struct hinic3_irq_info *irq_info;
+ struct hinic3_irq *curr;
+ u16 i, found = 0;
+
+ irq_info = &hwdev->cfg_mgmt->irq_info;
+ mutex_lock(&irq_info->irq_mutex);
+ for (i = 0; i < irq_info->num_irq && found < num; i++) {
+ curr = irq_info->irq + i;
+ if (curr->allocated)
+ continue;
+ curr->allocated = true;
+ alloc_arr[found].vector = curr->irq_id;
+ alloc_arr[found].entry = curr->msix_entry_idx;
+ found++;
+ }
+ mutex_unlock(&irq_info->irq_mutex);
+
+ *act_num = found;
+
+ return found == 0 ? -ENOMEM : 0;
+}
+
+void hinic3_free_irq(struct hinic3_hwdev *hwdev, u32 irq_id)
+{
+ struct hinic3_irq_info *irq_info;
+ struct hinic3_irq *curr;
+ u16 i;
+
+ irq_info = &hwdev->cfg_mgmt->irq_info;
+ mutex_lock(&irq_info->irq_mutex);
+ for (i = 0; i < irq_info->num_irq; i++) {
+ curr = irq_info->irq + i;
+ if (curr->irq_id == irq_id) {
+ curr->allocated = false;
+ break;
+ }
+ }
+ mutex_unlock(&irq_info->irq_mutex);
+}
+
bool hinic3_support_nic(struct hinic3_hwdev *hwdev)
{
return hwdev->cfg_mgmt->cap.supp_svcs_bitmap &
diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_hw_comm.c b/drivers/net/ethernet/huawei/hinic3/hinic3_hw_comm.c
index 434696ce7dc2..7adcdd569c7b 100644
--- a/drivers/net/ethernet/huawei/hinic3/hinic3_hw_comm.c
+++ b/drivers/net/ethernet/huawei/hinic3/hinic3_hw_comm.c
@@ -8,6 +8,37 @@
#include "hinic3_hwif.h"
#include "hinic3_mbox.h"
+int hinic3_set_interrupt_cfg_direct(struct hinic3_hwdev *hwdev,
+ const struct hinic3_interrupt_info *info)
+{
+ struct comm_cmd_cfg_msix_ctrl_reg msix_cfg = {};
+ struct mgmt_msg_params msg_params = {};
+ int err;
+
+ msix_cfg.func_id = hinic3_global_func_id(hwdev);
+ msix_cfg.msix_index = info->msix_index;
+ msix_cfg.opcode = MGMT_MSG_CMD_OP_SET;
+
+ msix_cfg.lli_credit_cnt = info->lli_credit_limit;
+ msix_cfg.lli_timer_cnt = info->lli_timer_cfg;
+ msix_cfg.pending_cnt = info->pending_limit;
+ msix_cfg.coalesce_timer_cnt = info->coalesc_timer_cfg;
+ msix_cfg.resend_timer_cnt = info->resend_timer_cfg;
+
+ mgmt_msg_params_init_default(&msg_params, &msix_cfg, sizeof(msix_cfg));
+
+ err = hinic3_send_mbox_to_mgmt(hwdev, MGMT_MOD_COMM,
+ COMM_CMD_CFG_MSIX_CTRL_REG, &msg_params);
+ if (err || msix_cfg.head.status) {
+ dev_err(hwdev->dev,
+ "Failed to set interrupt config, err: %d, status: 0x%x\n",
+ err, msix_cfg.head.status);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
int hinic3_func_reset(struct hinic3_hwdev *hwdev, u16 func_id, u64 reset_flag)
{
struct comm_cmd_func_reset func_reset = {};
diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_hw_comm.h b/drivers/net/ethernet/huawei/hinic3/hinic3_hw_comm.h
index c33a1c77da9c..2270987b126f 100644
--- a/drivers/net/ethernet/huawei/hinic3/hinic3_hw_comm.h
+++ b/drivers/net/ethernet/huawei/hinic3/hinic3_hw_comm.h
@@ -8,6 +8,19 @@
struct hinic3_hwdev;
+struct hinic3_interrupt_info {
+ u32 lli_set;
+ u32 interrupt_coalesc_set;
+ u16 msix_index;
+ u8 lli_credit_limit;
+ u8 lli_timer_cfg;
+ u8 pending_limit;
+ u8 coalesc_timer_cfg;
+ u8 resend_timer_cfg;
+};
+
+int hinic3_set_interrupt_cfg_direct(struct hinic3_hwdev *hwdev,
+ const struct hinic3_interrupt_info *info);
int hinic3_func_reset(struct hinic3_hwdev *hwdev, u16 func_id, u64 reset_flag);
#endif
diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_hw_intf.h b/drivers/net/ethernet/huawei/hinic3/hinic3_hw_intf.h
index 22c84093efa2..379ba4cb042c 100644
--- a/drivers/net/ethernet/huawei/hinic3/hinic3_hw_intf.h
+++ b/drivers/net/ethernet/huawei/hinic3/hinic3_hw_intf.h
@@ -70,6 +70,20 @@ enum comm_cmd {
COMM_CMD_SET_DMA_ATTR = 25,
};
+struct comm_cmd_cfg_msix_ctrl_reg {
+ struct mgmt_msg_head head;
+ u16 func_id;
+ u8 opcode;
+ u8 rsvd1;
+ u16 msix_index;
+ u8 pending_cnt;
+ u8 coalesce_timer_cnt;
+ u8 resend_timer_cnt;
+ u8 lli_timer_cnt;
+ u8 lli_credit_cnt;
+ u8 rsvd2[5];
+};
+
enum comm_func_reset_bits {
COMM_FUNC_RESET_BIT_FLUSH = BIT(0),
COMM_FUNC_RESET_BIT_MQM = BIT(1),
@@ -100,6 +114,28 @@ struct comm_cmd_feature_nego {
u64 s_feature[COMM_MAX_FEATURE_QWORD];
};
+struct comm_cmd_set_ceq_ctrl_reg {
+ struct mgmt_msg_head head;
+ u16 func_id;
+ u16 q_id;
+ u32 ctrl0;
+ u32 ctrl1;
+ u32 rsvd1;
+};
+
+struct comm_cmdq_ctxt_info {
+ __le64 curr_wqe_page_pfn;
+ __le64 wq_block_pfn;
+};
+
+struct comm_cmd_set_cmdq_ctxt {
+ struct mgmt_msg_head head;
+ u16 func_id;
+ u8 cmdq_id;
+ u8 rsvd1[5];
+ struct comm_cmdq_ctxt_info ctxt;
+};
+
/* Services supported by HW. HW uses these values when delivering events.
* HW supports multiple services that are not yet supported by driver
* (e.g. RoCE).
diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_hwif.c b/drivers/net/ethernet/huawei/hinic3/hinic3_hwif.c
index 0865453bf0e7..d4af376b7f35 100644
--- a/drivers/net/ethernet/huawei/hinic3/hinic3_hwif.c
+++ b/drivers/net/ethernet/huawei/hinic3/hinic3_hwif.c
@@ -6,13 +6,159 @@
#include <linux/io.h>
#include "hinic3_common.h"
+#include "hinic3_csr.h"
#include "hinic3_hwdev.h"
#include "hinic3_hwif.h"
+/* config BAR4/5 4MB, DB & DWQE both 2MB */
+#define HINIC3_DB_DWQE_SIZE 0x00400000
+
+/* db/dwqe page size: 4K */
+#define HINIC3_DB_PAGE_SIZE 0x00001000
+#define HINIC3_DWQE_OFFSET 0x00000800
+#define HINIC3_DB_MAX_AREAS (HINIC3_DB_DWQE_SIZE / HINIC3_DB_PAGE_SIZE)
+
+#define HINIC3_GET_REG_ADDR(reg) ((reg) & (HINIC3_REGS_FLAG_MASK))
+
+static void __iomem *hinic3_reg_addr(struct hinic3_hwif *hwif, u32 reg)
+{
+ return hwif->cfg_regs_base + HINIC3_GET_REG_ADDR(reg);
+}
+
+u32 hinic3_hwif_read_reg(struct hinic3_hwif *hwif, u32 reg)
+{
+ void __iomem *addr = hinic3_reg_addr(hwif, reg);
+
+ return ioread32be(addr);
+}
+
+void hinic3_hwif_write_reg(struct hinic3_hwif *hwif, u32 reg, u32 val)
+{
+ void __iomem *addr = hinic3_reg_addr(hwif, reg);
+
+ iowrite32be(val, addr);
+}
+
+static int get_db_idx(struct hinic3_hwif *hwif, u32 *idx)
+{
+ struct hinic3_db_area *db_area = &hwif->db_area;
+ u32 pg_idx;
+
+ spin_lock(&db_area->idx_lock);
+ pg_idx = find_first_zero_bit(db_area->db_bitmap_array,
+ db_area->db_max_areas);
+ if (pg_idx == db_area->db_max_areas) {
+ spin_unlock(&db_area->idx_lock);
+ return -ENOMEM;
+ }
+ set_bit(pg_idx, db_area->db_bitmap_array);
+ spin_unlock(&db_area->idx_lock);
+
+ *idx = pg_idx;
+
+ return 0;
+}
+
+static void free_db_idx(struct hinic3_hwif *hwif, u32 idx)
+{
+ struct hinic3_db_area *db_area = &hwif->db_area;
+
+ spin_lock(&db_area->idx_lock);
+ clear_bit(idx, db_area->db_bitmap_array);
+ spin_unlock(&db_area->idx_lock);
+}
+
+void hinic3_free_db_addr(struct hinic3_hwdev *hwdev, const u8 __iomem *db_base)
+{
+ struct hinic3_hwif *hwif;
+ uintptr_t distance;
+ u32 idx;
+
+ hwif = hwdev->hwif;
+ distance = db_base - hwif->db_base;
+ idx = distance / HINIC3_DB_PAGE_SIZE;
+
+ free_db_idx(hwif, idx);
+}
+
+int hinic3_alloc_db_addr(struct hinic3_hwdev *hwdev, void __iomem **db_base,
+ void __iomem **dwqe_base)
+{
+ struct hinic3_hwif *hwif;
+ u8 __iomem *addr;
+ u32 idx;
+ int err;
+
+ hwif = hwdev->hwif;
+
+ err = get_db_idx(hwif, &idx);
+ if (err)
+ return err;
+
+ addr = hwif->db_base + idx * HINIC3_DB_PAGE_SIZE;
+ *db_base = addr;
+
+ if (dwqe_base)
+ *dwqe_base = addr + HINIC3_DWQE_OFFSET;
+
+ return 0;
+}
+
void hinic3_set_msix_state(struct hinic3_hwdev *hwdev, u16 msix_idx,
enum hinic3_msix_state flag)
{
- /* Completed by later submission due to LoC limit. */
+ struct hinic3_hwif *hwif;
+ u8 int_msk = 1;
+ u32 mask_bits;
+ u32 addr;
+
+ hwif = hwdev->hwif;
+
+ if (flag)
+ mask_bits = HINIC3_MSI_CLR_INDIR_SET(int_msk, INT_MSK_SET);
+ else
+ mask_bits = HINIC3_MSI_CLR_INDIR_SET(int_msk, INT_MSK_CLR);
+ mask_bits = mask_bits |
+ HINIC3_MSI_CLR_INDIR_SET(msix_idx, SIMPLE_INDIR_IDX);
+
+ addr = HINIC3_CSR_FUNC_MSI_CLR_WR_ADDR;
+ hinic3_hwif_write_reg(hwif, addr, mask_bits);
+}
+
+void hinic3_msix_intr_clear_resend_bit(struct hinic3_hwdev *hwdev, u16 msix_idx,
+ u8 clear_resend_en)
+{
+ struct hinic3_hwif *hwif;
+ u32 msix_ctrl, addr;
+
+ hwif = hwdev->hwif;
+
+ msix_ctrl = HINIC3_MSI_CLR_INDIR_SET(msix_idx, SIMPLE_INDIR_IDX) |
+ HINIC3_MSI_CLR_INDIR_SET(clear_resend_en, RESEND_TIMER_CLR);
+
+ addr = HINIC3_CSR_FUNC_MSI_CLR_WR_ADDR;
+ hinic3_hwif_write_reg(hwif, addr, msix_ctrl);
+}
+
+void hinic3_set_msix_auto_mask_state(struct hinic3_hwdev *hwdev, u16 msix_idx,
+ enum hinic3_msix_auto_mask flag)
+{
+ struct hinic3_hwif *hwif;
+ u32 mask_bits;
+ u32 addr;
+
+ hwif = hwdev->hwif;
+
+ if (flag)
+ mask_bits = HINIC3_MSI_CLR_INDIR_SET(1, AUTO_MSK_SET);
+ else
+ mask_bits = HINIC3_MSI_CLR_INDIR_SET(1, AUTO_MSK_CLR);
+
+ mask_bits = mask_bits |
+ HINIC3_MSI_CLR_INDIR_SET(msix_idx, SIMPLE_INDIR_IDX);
+
+ addr = HINIC3_CSR_FUNC_MSI_CLR_WR_ADDR;
+ hinic3_hwif_write_reg(hwif, addr, mask_bits);
}
u16 hinic3_global_func_id(struct hinic3_hwdev *hwdev)
diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_hwif.h b/drivers/net/ethernet/huawei/hinic3/hinic3_hwif.h
index 513c9680e6b6..29dd86eb458a 100644
--- a/drivers/net/ethernet/huawei/hinic3/hinic3_hwif.h
+++ b/drivers/net/ethernet/huawei/hinic3/hinic3_hwif.h
@@ -50,8 +50,24 @@ enum hinic3_msix_state {
HINIC3_MSIX_DISABLE,
};
+enum hinic3_msix_auto_mask {
+ HINIC3_CLR_MSIX_AUTO_MASK,
+ HINIC3_SET_MSIX_AUTO_MASK,
+};
+
+u32 hinic3_hwif_read_reg(struct hinic3_hwif *hwif, u32 reg);
+void hinic3_hwif_write_reg(struct hinic3_hwif *hwif, u32 reg, u32 val);
+
+int hinic3_alloc_db_addr(struct hinic3_hwdev *hwdev, void __iomem **db_base,
+ void __iomem **dwqe_base);
+void hinic3_free_db_addr(struct hinic3_hwdev *hwdev, const u8 __iomem *db_base);
+
void hinic3_set_msix_state(struct hinic3_hwdev *hwdev, u16 msix_idx,
enum hinic3_msix_state flag);
+void hinic3_msix_intr_clear_resend_bit(struct hinic3_hwdev *hwdev, u16 msix_idx,
+ u8 clear_resend_en);
+void hinic3_set_msix_auto_mask_state(struct hinic3_hwdev *hwdev, u16 msix_idx,
+ enum hinic3_msix_auto_mask flag);
u16 hinic3_global_func_id(struct hinic3_hwdev *hwdev);
diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_irq.c b/drivers/net/ethernet/huawei/hinic3/hinic3_irq.c
index 8b92eed25edf..33eb9080739d 100644
--- a/drivers/net/ethernet/huawei/hinic3/hinic3_irq.c
+++ b/drivers/net/ethernet/huawei/hinic3/hinic3_irq.c
@@ -38,7 +38,7 @@ static int hinic3_poll(struct napi_struct *napi, int budget)
return work_done;
}
-void qp_add_napi(struct hinic3_irq_cfg *irq_cfg)
+static void qp_add_napi(struct hinic3_irq_cfg *irq_cfg)
{
struct hinic3_nic_dev *nic_dev = netdev_priv(irq_cfg->netdev);
@@ -50,7 +50,7 @@ void qp_add_napi(struct hinic3_irq_cfg *irq_cfg)
napi_enable(&irq_cfg->napi);
}
-void qp_del_napi(struct hinic3_irq_cfg *irq_cfg)
+static void qp_del_napi(struct hinic3_irq_cfg *irq_cfg)
{
napi_disable(&irq_cfg->napi);
netif_queue_set_napi(irq_cfg->netdev, irq_cfg->irq_id,
@@ -60,3 +60,135 @@ void qp_del_napi(struct hinic3_irq_cfg *irq_cfg)
netif_stop_subqueue(irq_cfg->netdev, irq_cfg->irq_id);
netif_napi_del(&irq_cfg->napi);
}
+
+static irqreturn_t qp_irq(int irq, void *data)
+{
+ struct hinic3_irq_cfg *irq_cfg = data;
+ struct hinic3_nic_dev *nic_dev;
+
+ nic_dev = netdev_priv(irq_cfg->netdev);
+ hinic3_msix_intr_clear_resend_bit(nic_dev->hwdev,
+ irq_cfg->msix_entry_idx, 1);
+
+ napi_schedule(&irq_cfg->napi);
+
+ return IRQ_HANDLED;
+}
+
+static int hinic3_request_irq(struct hinic3_irq_cfg *irq_cfg, u16 q_id)
+{
+ struct hinic3_interrupt_info info = {};
+ struct hinic3_nic_dev *nic_dev;
+ struct net_device *netdev;
+ int err;
+
+ netdev = irq_cfg->netdev;
+ nic_dev = netdev_priv(netdev);
+ qp_add_napi(irq_cfg);
+
+ info.msix_index = irq_cfg->msix_entry_idx;
+ info.interrupt_coalesc_set = 1;
+ info.pending_limit = nic_dev->intr_coalesce[q_id].pending_limit;
+ info.coalesc_timer_cfg =
+ nic_dev->intr_coalesce[q_id].coalesce_timer_cfg;
+ info.resend_timer_cfg = nic_dev->intr_coalesce[q_id].resend_timer_cfg;
+ err = hinic3_set_interrupt_cfg_direct(nic_dev->hwdev, &info);
+ if (err) {
+ netdev_err(netdev, "Failed to set RX interrupt coalescing attribute.\n");
+ qp_del_napi(irq_cfg);
+ return err;
+ }
+
+ err = request_irq(irq_cfg->irq_id, qp_irq, 0, irq_cfg->irq_name,
+ irq_cfg);
+ if (err) {
+ qp_del_napi(irq_cfg);
+ return err;
+ }
+
+ irq_set_affinity_hint(irq_cfg->irq_id, &irq_cfg->affinity_mask);
+
+ return 0;
+}
+
+static void hinic3_release_irq(struct hinic3_irq_cfg *irq_cfg)
+{
+ irq_set_affinity_hint(irq_cfg->irq_id, NULL);
+ free_irq(irq_cfg->irq_id, irq_cfg);
+}
+
+int hinic3_qps_irq_init(struct net_device *netdev)
+{
+ struct hinic3_nic_dev *nic_dev = netdev_priv(netdev);
+ struct pci_dev *pdev = nic_dev->pdev;
+ struct hinic3_irq_cfg *irq_cfg;
+ struct msix_entry *msix_entry;
+ u32 local_cpu;
+ u16 q_id;
+ int err;
+
+ for (q_id = 0; q_id < nic_dev->q_params.num_qps; q_id++) {
+ msix_entry = &nic_dev->qps_msix_entries[q_id];
+ irq_cfg = &nic_dev->q_params.irq_cfg[q_id];
+
+ irq_cfg->irq_id = msix_entry->vector;
+ irq_cfg->msix_entry_idx = msix_entry->entry;
+ irq_cfg->netdev = netdev;
+ irq_cfg->txq = &nic_dev->txqs[q_id];
+ irq_cfg->rxq = &nic_dev->rxqs[q_id];
+ nic_dev->rxqs[q_id].irq_cfg = irq_cfg;
+
+ local_cpu = cpumask_local_spread(q_id, dev_to_node(&pdev->dev));
+ cpumask_set_cpu(local_cpu, &irq_cfg->affinity_mask);
+
+ snprintf(irq_cfg->irq_name, sizeof(irq_cfg->irq_name),
+ "%s_qp%u", netdev->name, q_id);
+
+ err = hinic3_request_irq(irq_cfg, q_id);
+ if (err) {
+ netdev_err(netdev, "Failed to request Rx irq\n");
+ goto err_release_irqs;
+ }
+
+ hinic3_set_msix_auto_mask_state(nic_dev->hwdev,
+ irq_cfg->msix_entry_idx,
+ HINIC3_SET_MSIX_AUTO_MASK);
+ hinic3_set_msix_state(nic_dev->hwdev, irq_cfg->msix_entry_idx,
+ HINIC3_MSIX_ENABLE);
+ }
+
+ return 0;
+
+err_release_irqs:
+ while (q_id > 0) {
+ q_id--;
+ irq_cfg = &nic_dev->q_params.irq_cfg[q_id];
+ qp_del_napi(irq_cfg);
+ hinic3_set_msix_state(nic_dev->hwdev, irq_cfg->msix_entry_idx,
+ HINIC3_MSIX_DISABLE);
+ hinic3_set_msix_auto_mask_state(nic_dev->hwdev,
+ irq_cfg->msix_entry_idx,
+ HINIC3_CLR_MSIX_AUTO_MASK);
+ hinic3_release_irq(irq_cfg);
+ }
+
+ return err;
+}
+
+void hinic3_qps_irq_uninit(struct net_device *netdev)
+{
+ struct hinic3_nic_dev *nic_dev = netdev_priv(netdev);
+ struct hinic3_irq_cfg *irq_cfg;
+ u16 q_id;
+
+ for (q_id = 0; q_id < nic_dev->q_params.num_qps; q_id++) {
+ irq_cfg = &nic_dev->q_params.irq_cfg[q_id];
+ qp_del_napi(irq_cfg);
+ hinic3_set_msix_state(nic_dev->hwdev, irq_cfg->msix_entry_idx,
+ HINIC3_MSIX_DISABLE);
+ hinic3_set_msix_auto_mask_state(nic_dev->hwdev,
+ irq_cfg->msix_entry_idx,
+ HINIC3_CLR_MSIX_AUTO_MASK);
+ hinic3_release_irq(irq_cfg);
+ }
+}
diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_main.c b/drivers/net/ethernet/huawei/hinic3/hinic3_main.c
index 497f2a36f35d..a0b04fb07c76 100644
--- a/drivers/net/ethernet/huawei/hinic3/hinic3_main.c
+++ b/drivers/net/ethernet/huawei/hinic3/hinic3_main.c
@@ -17,12 +17,53 @@
#define HINIC3_NIC_DRV_DESC "Intelligent Network Interface Card Driver"
-#define HINIC3_RX_BUF_LEN 2048
-#define HINIC3_LRO_REPLENISH_THLD 256
-#define HINIC3_NIC_DEV_WQ_NAME "hinic3_nic_dev_wq"
+#define HINIC3_RX_BUF_LEN 2048
+#define HINIC3_LRO_REPLENISH_THLD 256
+#define HINIC3_NIC_DEV_WQ_NAME "hinic3_nic_dev_wq"
-#define HINIC3_SQ_DEPTH 1024
-#define HINIC3_RQ_DEPTH 1024
+#define HINIC3_SQ_DEPTH 1024
+#define HINIC3_RQ_DEPTH 1024
+
+#define HINIC3_DEFAULT_TXRX_MSIX_PENDING_LIMIT 2
+#define HINIC3_DEFAULT_TXRX_MSIX_COALESC_TIMER_CFG 25
+#define HINIC3_DEFAULT_TXRX_MSIX_RESEND_TIMER_CFG 7
+
+static void init_intr_coal_param(struct net_device *netdev)
+{
+ struct hinic3_nic_dev *nic_dev = netdev_priv(netdev);
+ struct hinic3_intr_coal_info *info;
+ u16 i;
+
+ for (i = 0; i < nic_dev->max_qps; i++) {
+ info = &nic_dev->intr_coalesce[i];
+ info->pending_limit = HINIC3_DEFAULT_TXRX_MSIX_PENDING_LIMIT;
+ info->coalesce_timer_cfg = HINIC3_DEFAULT_TXRX_MSIX_COALESC_TIMER_CFG;
+ info->resend_timer_cfg = HINIC3_DEFAULT_TXRX_MSIX_RESEND_TIMER_CFG;
+ }
+}
+
+static int hinic3_init_intr_coalesce(struct net_device *netdev)
+{
+ struct hinic3_nic_dev *nic_dev = netdev_priv(netdev);
+
+ nic_dev->intr_coalesce = kcalloc(nic_dev->max_qps,
+ sizeof(*nic_dev->intr_coalesce),
+ GFP_KERNEL);
+
+ if (!nic_dev->intr_coalesce)
+ return -ENOMEM;
+
+ init_intr_coal_param(netdev);
+
+ return 0;
+}
+
+static void hinic3_free_intr_coalesce(struct net_device *netdev)
+{
+ struct hinic3_nic_dev *nic_dev = netdev_priv(netdev);
+
+ kfree(nic_dev->intr_coalesce);
+}
static int hinic3_alloc_txrxqs(struct net_device *netdev)
{
@@ -42,8 +83,17 @@ static int hinic3_alloc_txrxqs(struct net_device *netdev)
goto err_free_txqs;
}
+ err = hinic3_init_intr_coalesce(netdev);
+ if (err) {
+ dev_err(hwdev->dev, "Failed to init_intr_coalesce\n");
+ goto err_free_rxqs;
+ }
+
return 0;
+err_free_rxqs:
+ hinic3_free_rxqs(netdev);
+
err_free_txqs:
hinic3_free_txqs(netdev);
@@ -52,6 +102,7 @@ err_free_txqs:
static void hinic3_free_txrxqs(struct net_device *netdev)
{
+ hinic3_free_intr_coalesce(netdev);
hinic3_free_rxqs(netdev);
hinic3_free_txqs(netdev);
}
diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_mbox.c b/drivers/net/ethernet/huawei/hinic3/hinic3_mbox.c
index e74d1eb09730..cf67e26acece 100644
--- a/drivers/net/ethernet/huawei/hinic3/hinic3_mbox.c
+++ b/drivers/net/ethernet/huawei/hinic3/hinic3_mbox.c
@@ -4,13 +4,857 @@
#include <linux/dma-mapping.h>
#include "hinic3_common.h"
+#include "hinic3_csr.h"
#include "hinic3_hwdev.h"
#include "hinic3_hwif.h"
#include "hinic3_mbox.h"
+#define MBOX_INT_DST_AEQN_MASK GENMASK(11, 10)
+#define MBOX_INT_SRC_RESP_AEQN_MASK GENMASK(13, 12)
+#define MBOX_INT_STAT_DMA_MASK GENMASK(19, 14)
+/* TX size, expressed in 4 bytes units */
+#define MBOX_INT_TX_SIZE_MASK GENMASK(24, 20)
+/* SO_RO == strong order, relaxed order */
+#define MBOX_INT_STAT_DMA_SO_RO_MASK GENMASK(26, 25)
+#define MBOX_INT_WB_EN_MASK BIT(28)
+#define MBOX_INT_SET(val, field) \
+ FIELD_PREP(MBOX_INT_##field##_MASK, val)
+
+#define MBOX_CTRL_TRIGGER_AEQE_MASK BIT(0)
+#define MBOX_CTRL_TX_STATUS_MASK BIT(1)
+#define MBOX_CTRL_DST_FUNC_MASK GENMASK(28, 16)
+#define MBOX_CTRL_SET(val, field) \
+ FIELD_PREP(MBOX_CTRL_##field##_MASK, val)
+
+#define MBOX_MSG_POLLING_TIMEOUT_MS 8000 // send msg seg timeout
+#define MBOX_COMP_POLLING_TIMEOUT_MS 40000 // response
+
+#define MBOX_MAX_BUF_SZ 2048
+#define MBOX_HEADER_SZ 8
+
+/* MBOX size is 64B, 8B for mbox_header, 8B reserved */
+#define MBOX_SEG_LEN 48
+#define MBOX_SEG_LEN_ALIGN 4
+#define MBOX_WB_STATUS_LEN 16
+
+#define MBOX_SEQ_ID_START_VAL 0
+#define MBOX_SEQ_ID_MAX_VAL 42
+#define MBOX_LAST_SEG_MAX_LEN \
+ (MBOX_MAX_BUF_SZ - MBOX_SEQ_ID_MAX_VAL * MBOX_SEG_LEN)
+
+/* mbox write back status is 16B, only first 4B is used */
+#define MBOX_WB_STATUS_ERRCODE_MASK 0xFFFF
+#define MBOX_WB_STATUS_MASK 0xFF
+#define MBOX_WB_ERROR_CODE_MASK 0xFF00
+#define MBOX_WB_STATUS_FINISHED_SUCCESS 0xFF
+#define MBOX_WB_STATUS_NOT_FINISHED 0x00
+
+#define MBOX_STATUS_FINISHED(wb) \
+ ((FIELD_PREP(MBOX_WB_STATUS_MASK, (wb))) != MBOX_WB_STATUS_NOT_FINISHED)
+#define MBOX_STATUS_SUCCESS(wb) \
+ ((FIELD_PREP(MBOX_WB_STATUS_MASK, (wb))) == \
+ MBOX_WB_STATUS_FINISHED_SUCCESS)
+#define MBOX_STATUS_ERRCODE(wb) \
+ ((wb) & MBOX_WB_ERROR_CODE_MASK)
+
+#define MBOX_DMA_MSG_QUEUE_DEPTH 32
+#define MBOX_AREA(hwif) \
+ ((hwif)->cfg_regs_base + HINIC3_FUNC_CSR_MAILBOX_DATA_OFF)
+
+#define MBOX_MQ_CI_OFFSET \
+ (HINIC3_CFG_REGS_FLAG + HINIC3_FUNC_CSR_MAILBOX_DATA_OFF + \
+ MBOX_HEADER_SZ + MBOX_SEG_LEN)
+
+#define MBOX_MQ_SYNC_CI_MASK GENMASK(7, 0)
+#define MBOX_MQ_ASYNC_CI_MASK GENMASK(15, 8)
+#define MBOX_MQ_CI_GET(val, field) \
+ FIELD_GET(MBOX_MQ_##field##_CI_MASK, val)
+
+#define MBOX_MGMT_FUNC_ID 0x1FFF
+#define MBOX_COMM_F_MBOX_SEGMENT BIT(3)
+
+static u8 *get_mobx_body_from_hdr(u8 *header)
+{
+ return header + MBOX_HEADER_SZ;
+}
+
+static struct hinic3_msg_desc *get_mbox_msg_desc(struct hinic3_mbox *mbox,
+ enum mbox_msg_direction_type dir,
+ u16 src_func_id)
+{
+ struct hinic3_msg_channel *msg_ch;
+
+ msg_ch = (src_func_id == MBOX_MGMT_FUNC_ID) ?
+ &mbox->mgmt_msg : mbox->func_msg;
+
+ return (dir == MBOX_MSG_SEND) ?
+ &msg_ch->recv_msg : &msg_ch->resp_msg;
+}
+
+static void resp_mbox_handler(struct hinic3_mbox *mbox,
+ const struct hinic3_msg_desc *msg_desc)
+{
+ spin_lock(&mbox->mbox_lock);
+ if (msg_desc->msg_info.msg_id == mbox->send_msg_id &&
+ mbox->event_flag == MBOX_EVENT_START)
+ mbox->event_flag = MBOX_EVENT_SUCCESS;
+ spin_unlock(&mbox->mbox_lock);
+}
+
+static bool mbox_segment_valid(struct hinic3_mbox *mbox,
+ struct hinic3_msg_desc *msg_desc,
+ __le64 mbox_header)
+{
+ u8 seq_id, seg_len, msg_id, mod;
+ __le16 src_func_idx, cmd;
+
+ seq_id = MBOX_MSG_HEADER_GET(mbox_header, SEQID);
+ seg_len = MBOX_MSG_HEADER_GET(mbox_header, SEG_LEN);
+ msg_id = MBOX_MSG_HEADER_GET(mbox_header, MSG_ID);
+ mod = MBOX_MSG_HEADER_GET(mbox_header, MODULE);
+ cmd = cpu_to_le16(MBOX_MSG_HEADER_GET(mbox_header, CMD));
+ src_func_idx = cpu_to_le16(MBOX_MSG_HEADER_GET(mbox_header,
+ SRC_GLB_FUNC_IDX));
+
+ if (seq_id > MBOX_SEQ_ID_MAX_VAL || seg_len > MBOX_SEG_LEN ||
+ (seq_id == MBOX_SEQ_ID_MAX_VAL && seg_len > MBOX_LAST_SEG_MAX_LEN))
+ goto err_seg;
+
+ if (seq_id == 0) {
+ msg_desc->seq_id = seq_id;
+ msg_desc->msg_info.msg_id = msg_id;
+ msg_desc->mod = mod;
+ msg_desc->cmd = cmd;
+ } else {
+ if (seq_id != msg_desc->seq_id + 1 ||
+ msg_id != msg_desc->msg_info.msg_id ||
+ mod != msg_desc->mod || cmd != msg_desc->cmd)
+ goto err_seg;
+
+ msg_desc->seq_id = seq_id;
+ }
+
+ return true;
+
+err_seg:
+ dev_err(mbox->hwdev->dev,
+ "Mailbox segment check failed, src func id: 0x%x, front seg info: seq id: 0x%x, msg id: 0x%x, mod: 0x%x, cmd: 0x%x\n",
+ src_func_idx, msg_desc->seq_id, msg_desc->msg_info.msg_id,
+ msg_desc->mod, msg_desc->cmd);
+ dev_err(mbox->hwdev->dev,
+ "Current seg info: seg len: 0x%x, seq id: 0x%x, msg id: 0x%x, mod: 0x%x, cmd: 0x%x\n",
+ seg_len, seq_id, msg_id, mod, cmd);
+
+ return false;
+}
+
+static void recv_mbox_handler(struct hinic3_mbox *mbox,
+ u8 *header, struct hinic3_msg_desc *msg_desc)
+{
+ __le64 mbox_header = *((__force __le64 *)header);
+ u8 *mbox_body = get_mobx_body_from_hdr(header);
+ u8 seq_id, seg_len;
+ int pos;
+
+ if (!mbox_segment_valid(mbox, msg_desc, mbox_header)) {
+ msg_desc->seq_id = MBOX_SEQ_ID_MAX_VAL;
+ return;
+ }
+
+ seq_id = MBOX_MSG_HEADER_GET(mbox_header, SEQID);
+ seg_len = MBOX_MSG_HEADER_GET(mbox_header, SEG_LEN);
+
+ pos = seq_id * MBOX_SEG_LEN;
+ memcpy(msg_desc->msg + pos, mbox_body, seg_len);
+
+ if (!MBOX_MSG_HEADER_GET(mbox_header, LAST))
+ return;
+
+ msg_desc->msg_len = cpu_to_le16(MBOX_MSG_HEADER_GET(mbox_header,
+ MSG_LEN));
+ msg_desc->msg_info.status = MBOX_MSG_HEADER_GET(mbox_header, STATUS);
+
+ if (MBOX_MSG_HEADER_GET(mbox_header, DIRECTION) == MBOX_MSG_RESP)
+ resp_mbox_handler(mbox, msg_desc);
+}
+
+void hinic3_mbox_func_aeqe_handler(struct hinic3_hwdev *hwdev, u8 *header,
+ u8 size)
+{
+ __le64 mbox_header = *((__force __le64 *)header);
+ enum mbox_msg_direction_type dir;
+ struct hinic3_msg_desc *msg_desc;
+ struct hinic3_mbox *mbox;
+ u16 src_func_id;
+
+ mbox = hwdev->mbox;
+ dir = MBOX_MSG_HEADER_GET(mbox_header, DIRECTION);
+ src_func_id = MBOX_MSG_HEADER_GET(mbox_header, SRC_GLB_FUNC_IDX);
+ msg_desc = get_mbox_msg_desc(mbox, dir, src_func_id);
+ recv_mbox_handler(mbox, header, msg_desc);
+}
+
+static int init_mbox_dma_queue(struct hinic3_hwdev *hwdev,
+ struct mbox_dma_queue *mq)
+{
+ u32 size;
+
+ mq->depth = MBOX_DMA_MSG_QUEUE_DEPTH;
+ mq->prod_idx = 0;
+ mq->cons_idx = 0;
+
+ size = mq->depth * MBOX_MAX_BUF_SZ;
+ mq->dma_buf_vaddr = dma_alloc_coherent(hwdev->dev, size,
+ &mq->dma_buf_paddr,
+ GFP_KERNEL);
+ if (!mq->dma_buf_vaddr)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void uninit_mbox_dma_queue(struct hinic3_hwdev *hwdev,
+ struct mbox_dma_queue *mq)
+{
+ dma_free_coherent(hwdev->dev, mq->depth * MBOX_MAX_BUF_SZ,
+ mq->dma_buf_vaddr, mq->dma_buf_paddr);
+}
+
+static int hinic3_init_mbox_dma_queue(struct hinic3_mbox *mbox)
+{
+ u32 val;
+ int err;
+
+ err = init_mbox_dma_queue(mbox->hwdev, &mbox->sync_msg_queue);
+ if (err)
+ return err;
+
+ err = init_mbox_dma_queue(mbox->hwdev, &mbox->async_msg_queue);
+ if (err) {
+ uninit_mbox_dma_queue(mbox->hwdev, &mbox->sync_msg_queue);
+ return err;
+ }
+
+ val = hinic3_hwif_read_reg(mbox->hwdev->hwif, MBOX_MQ_CI_OFFSET);
+ val &= ~MBOX_MQ_SYNC_CI_MASK;
+ val &= ~MBOX_MQ_ASYNC_CI_MASK;
+ hinic3_hwif_write_reg(mbox->hwdev->hwif, MBOX_MQ_CI_OFFSET, val);
+
+ return 0;
+}
+
+static void hinic3_uninit_mbox_dma_queue(struct hinic3_mbox *mbox)
+{
+ uninit_mbox_dma_queue(mbox->hwdev, &mbox->sync_msg_queue);
+ uninit_mbox_dma_queue(mbox->hwdev, &mbox->async_msg_queue);
+}
+
+static int alloc_mbox_msg_channel(struct hinic3_msg_channel *msg_ch)
+{
+ msg_ch->resp_msg.msg = kzalloc(MBOX_MAX_BUF_SZ, GFP_KERNEL);
+ if (!msg_ch->resp_msg.msg)
+ return -ENOMEM;
+
+ msg_ch->recv_msg.msg = kzalloc(MBOX_MAX_BUF_SZ, GFP_KERNEL);
+ if (!msg_ch->recv_msg.msg) {
+ kfree(msg_ch->resp_msg.msg);
+ return -ENOMEM;
+ }
+
+ msg_ch->resp_msg.seq_id = MBOX_SEQ_ID_MAX_VAL;
+ msg_ch->recv_msg.seq_id = MBOX_SEQ_ID_MAX_VAL;
+
+ return 0;
+}
+
+static void free_mbox_msg_channel(struct hinic3_msg_channel *msg_ch)
+{
+ kfree(msg_ch->recv_msg.msg);
+ kfree(msg_ch->resp_msg.msg);
+}
+
+static int init_mgmt_msg_channel(struct hinic3_mbox *mbox)
+{
+ int err;
+
+ err = alloc_mbox_msg_channel(&mbox->mgmt_msg);
+ if (err) {
+ dev_err(mbox->hwdev->dev, "Failed to alloc mgmt message channel\n");
+ return err;
+ }
+
+ err = hinic3_init_mbox_dma_queue(mbox);
+ if (err) {
+ dev_err(mbox->hwdev->dev, "Failed to init mbox dma queue\n");
+ free_mbox_msg_channel(&mbox->mgmt_msg);
+ return err;
+ }
+
+ return 0;
+}
+
+static void uninit_mgmt_msg_channel(struct hinic3_mbox *mbox)
+{
+ hinic3_uninit_mbox_dma_queue(mbox);
+ free_mbox_msg_channel(&mbox->mgmt_msg);
+}
+
+static int hinic3_init_func_mbox_msg_channel(struct hinic3_hwdev *hwdev)
+{
+ struct hinic3_mbox *mbox;
+ int err;
+
+ mbox = hwdev->mbox;
+ mbox->func_msg = kzalloc(sizeof(*mbox->func_msg), GFP_KERNEL);
+ if (!mbox->func_msg)
+ return -ENOMEM;
+
+ err = alloc_mbox_msg_channel(mbox->func_msg);
+ if (err)
+ goto err_free_func_msg;
+
+ return 0;
+
+err_free_func_msg:
+ kfree(mbox->func_msg);
+ mbox->func_msg = NULL;
+
+ return err;
+}
+
+static void hinic3_uninit_func_mbox_msg_channel(struct hinic3_hwdev *hwdev)
+{
+ struct hinic3_mbox *mbox = hwdev->mbox;
+
+ free_mbox_msg_channel(mbox->func_msg);
+ kfree(mbox->func_msg);
+ mbox->func_msg = NULL;
+}
+
+static void prepare_send_mbox(struct hinic3_mbox *mbox)
+{
+ struct hinic3_send_mbox *send_mbox = &mbox->send_mbox;
+
+ send_mbox->data = MBOX_AREA(mbox->hwdev->hwif);
+}
+
+static int alloc_mbox_wb_status(struct hinic3_mbox *mbox)
+{
+ struct hinic3_send_mbox *send_mbox = &mbox->send_mbox;
+ struct hinic3_hwdev *hwdev = mbox->hwdev;
+ u32 addr_h, addr_l;
+
+ send_mbox->wb_vaddr = dma_alloc_coherent(hwdev->dev,
+ MBOX_WB_STATUS_LEN,
+ &send_mbox->wb_paddr,
+ GFP_KERNEL);
+ if (!send_mbox->wb_vaddr)
+ return -ENOMEM;
+
+ addr_h = upper_32_bits(send_mbox->wb_paddr);
+ addr_l = lower_32_bits(send_mbox->wb_paddr);
+ hinic3_hwif_write_reg(hwdev->hwif, HINIC3_FUNC_CSR_MAILBOX_RESULT_H_OFF,
+ addr_h);
+ hinic3_hwif_write_reg(hwdev->hwif, HINIC3_FUNC_CSR_MAILBOX_RESULT_L_OFF,
+ addr_l);
+
+ return 0;
+}
+
+static void free_mbox_wb_status(struct hinic3_mbox *mbox)
+{
+ struct hinic3_send_mbox *send_mbox = &mbox->send_mbox;
+ struct hinic3_hwdev *hwdev = mbox->hwdev;
+
+ hinic3_hwif_write_reg(hwdev->hwif, HINIC3_FUNC_CSR_MAILBOX_RESULT_H_OFF,
+ 0);
+ hinic3_hwif_write_reg(hwdev->hwif, HINIC3_FUNC_CSR_MAILBOX_RESULT_L_OFF,
+ 0);
+
+ dma_free_coherent(hwdev->dev, MBOX_WB_STATUS_LEN,
+ send_mbox->wb_vaddr, send_mbox->wb_paddr);
+}
+
+static int hinic3_mbox_pre_init(struct hinic3_hwdev *hwdev,
+ struct hinic3_mbox *mbox)
+{
+ mbox->hwdev = hwdev;
+ mutex_init(&mbox->mbox_send_lock);
+ spin_lock_init(&mbox->mbox_lock);
+
+ mbox->workq = create_singlethread_workqueue(HINIC3_MBOX_WQ_NAME);
+ if (!mbox->workq) {
+ dev_err(hwdev->dev, "Failed to initialize MBOX workqueue\n");
+ return -ENOMEM;
+ }
+ hwdev->mbox = mbox;
+
+ return 0;
+}
+
+int hinic3_init_mbox(struct hinic3_hwdev *hwdev)
+{
+ struct hinic3_mbox *mbox;
+ int err;
+
+ mbox = kzalloc(sizeof(*mbox), GFP_KERNEL);
+ if (!mbox)
+ return -ENOMEM;
+
+ err = hinic3_mbox_pre_init(hwdev, mbox);
+ if (err)
+ goto err_free_mbox;
+
+ err = init_mgmt_msg_channel(mbox);
+ if (err)
+ goto err_destroy_workqueue;
+
+ err = hinic3_init_func_mbox_msg_channel(hwdev);
+ if (err)
+ goto err_uninit_mgmt_msg_ch;
+
+ err = alloc_mbox_wb_status(mbox);
+ if (err) {
+ dev_err(hwdev->dev, "Failed to alloc mbox write back status\n");
+ goto err_uninit_func_mbox_msg_ch;
+ }
+
+ prepare_send_mbox(mbox);
+
+ return 0;
+
+err_uninit_func_mbox_msg_ch:
+ hinic3_uninit_func_mbox_msg_channel(hwdev);
+
+err_uninit_mgmt_msg_ch:
+ uninit_mgmt_msg_channel(mbox);
+
+err_destroy_workqueue:
+ destroy_workqueue(mbox->workq);
+
+err_free_mbox:
+ kfree(mbox);
+
+ return err;
+}
+
+void hinic3_free_mbox(struct hinic3_hwdev *hwdev)
+{
+ struct hinic3_mbox *mbox = hwdev->mbox;
+
+ destroy_workqueue(mbox->workq);
+ free_mbox_wb_status(mbox);
+ hinic3_uninit_func_mbox_msg_channel(hwdev);
+ uninit_mgmt_msg_channel(mbox);
+ kfree(mbox);
+}
+
+#define MBOX_DMA_MSG_INIT_XOR_VAL 0x5a5a5a5a
+#define MBOX_XOR_DATA_ALIGN 4
+static u32 mbox_dma_msg_xor(u32 *data, u32 msg_len)
+{
+ u32 xor = MBOX_DMA_MSG_INIT_XOR_VAL;
+ u32 dw_len = msg_len / sizeof(u32);
+ u32 i;
+
+ for (i = 0; i < dw_len; i++)
+ xor ^= data[i];
+
+ return xor;
+}
+
+#define MBOX_MQ_ID_MASK(mq, idx) ((idx) & ((mq)->depth - 1))
+
+static bool is_msg_queue_full(struct mbox_dma_queue *mq)
+{
+ return MBOX_MQ_ID_MASK(mq, (mq)->prod_idx + 1) ==
+ MBOX_MQ_ID_MASK(mq, (mq)->cons_idx);
+}
+
+static int mbox_prepare_dma_entry(struct hinic3_mbox *mbox,
+ struct mbox_dma_queue *mq,
+ struct mbox_dma_msg *dma_msg,
+ const void *msg, u32 msg_len)
+{
+ u64 dma_addr, offset;
+ void *dma_vaddr;
+
+ if (is_msg_queue_full(mq)) {
+ dev_err(mbox->hwdev->dev, "Mbox sync message queue is busy, pi: %u, ci: %u\n",
+ mq->prod_idx, MBOX_MQ_ID_MASK(mq, mq->cons_idx));
+ return -EBUSY;
+ }
+
+ /* copy data to DMA buffer */
+ offset = mq->prod_idx * MBOX_MAX_BUF_SZ;
+ dma_vaddr = (u8 *)mq->dma_buf_vaddr + offset;
+ memcpy(dma_vaddr, msg, msg_len);
+ dma_addr = mq->dma_buf_paddr + offset;
+ dma_msg->dma_addr_high = cpu_to_le32(upper_32_bits(dma_addr));
+ dma_msg->dma_addr_low = cpu_to_le32(lower_32_bits(dma_addr));
+ dma_msg->msg_len = cpu_to_le32(msg_len);
+ /* The firmware obtains message based on 4B alignment. */
+ dma_msg->xor = cpu_to_le32(mbox_dma_msg_xor(dma_vaddr,
+ ALIGN(msg_len, MBOX_XOR_DATA_ALIGN)));
+ mq->prod_idx++;
+ mq->prod_idx = MBOX_MQ_ID_MASK(mq, mq->prod_idx);
+
+ return 0;
+}
+
+static int mbox_prepare_dma_msg(struct hinic3_mbox *mbox,
+ enum mbox_msg_ack_type ack_type,
+ struct mbox_dma_msg *dma_msg, const void *msg,
+ u32 msg_len)
+{
+ struct mbox_dma_queue *mq;
+ u32 val;
+
+ val = hinic3_hwif_read_reg(mbox->hwdev->hwif, MBOX_MQ_CI_OFFSET);
+ if (ack_type == MBOX_MSG_ACK) {
+ mq = &mbox->sync_msg_queue;
+ mq->cons_idx = MBOX_MQ_CI_GET(val, SYNC);
+ } else {
+ mq = &mbox->async_msg_queue;
+ mq->cons_idx = MBOX_MQ_CI_GET(val, ASYNC);
+ }
+
+ return mbox_prepare_dma_entry(mbox, mq, dma_msg, msg, msg_len);
+}
+
+static void clear_mbox_status(struct hinic3_send_mbox *mbox)
+{
+ __be64 *wb_status = mbox->wb_vaddr;
+
+ *wb_status = 0;
+ /* clear mailbox write back status */
+ wmb();
+}
+
+static void mbox_dword_write(const void *src, void __iomem *dst, u32 count)
+{
+ const __le32 *src32 = src;
+ u32 __iomem *dst32 = dst;
+ u32 i;
+
+ /* Data written to mbox is arranged in structs with little endian fields
+ * but when written to HW every dword (32bits) should be swapped since
+ * the HW will swap it again.
+ */
+ for (i = 0; i < count; i++)
+ __raw_writel(swab32((__force __u32)src32[i]), dst32 + i);
+}
+
+static void mbox_copy_header(struct hinic3_hwdev *hwdev,
+ struct hinic3_send_mbox *mbox, __le64 *header)
+{
+ mbox_dword_write(header, mbox->data, MBOX_HEADER_SZ / sizeof(__le32));
+}
+
+static void mbox_copy_send_data(struct hinic3_hwdev *hwdev,
+ struct hinic3_send_mbox *mbox, void *seg,
+ u32 seg_len)
+{
+ u32 __iomem *dst = (u32 __iomem *)(mbox->data + MBOX_HEADER_SZ);
+ u32 count, leftover, last_dword;
+ const __le32 *src = seg;
+
+ count = seg_len / sizeof(u32);
+ leftover = seg_len % sizeof(u32);
+ if (count > 0)
+ mbox_dword_write(src, dst, count);
+
+ if (leftover > 0) {
+ last_dword = 0;
+ memcpy(&last_dword, src + count, leftover);
+ mbox_dword_write(&last_dword, dst + count, 1);
+ }
+}
+
+static void write_mbox_msg_attr(struct hinic3_mbox *mbox,
+ u16 dst_func, u16 dst_aeqn, u32 seg_len)
+{
+ struct hinic3_hwif *hwif = mbox->hwdev->hwif;
+ u32 mbox_int, mbox_ctrl, tx_size;
+
+ tx_size = ALIGN(seg_len + MBOX_HEADER_SZ, MBOX_SEG_LEN_ALIGN) >> 2;
+
+ mbox_int = MBOX_INT_SET(dst_aeqn, DST_AEQN) |
+ MBOX_INT_SET(0, STAT_DMA) |
+ MBOX_INT_SET(tx_size, TX_SIZE) |
+ MBOX_INT_SET(0, STAT_DMA_SO_RO) |
+ MBOX_INT_SET(1, WB_EN);
+
+ mbox_ctrl = MBOX_CTRL_SET(1, TX_STATUS) |
+ MBOX_CTRL_SET(0, TRIGGER_AEQE) |
+ MBOX_CTRL_SET(dst_func, DST_FUNC);
+
+ hinic3_hwif_write_reg(hwif, HINIC3_FUNC_CSR_MAILBOX_INT_OFF, mbox_int);
+ hinic3_hwif_write_reg(hwif, HINIC3_FUNC_CSR_MAILBOX_CONTROL_OFF,
+ mbox_ctrl);
+}
+
+static u16 get_mbox_status(const struct hinic3_send_mbox *mbox)
+{
+ __be64 *wb_status = mbox->wb_vaddr;
+ u64 wb_val;
+
+ wb_val = be64_to_cpu(*wb_status);
+ /* verify reading before check */
+ rmb();
+
+ return wb_val & MBOX_WB_STATUS_ERRCODE_MASK;
+}
+
+static enum hinic3_wait_return check_mbox_wb_status(void *priv_data)
+{
+ struct hinic3_mbox *mbox = priv_data;
+ u16 wb_status;
+
+ wb_status = get_mbox_status(&mbox->send_mbox);
+
+ return MBOX_STATUS_FINISHED(wb_status) ?
+ HINIC3_WAIT_PROCESS_CPL : HINIC3_WAIT_PROCESS_WAITING;
+}
+
+static int send_mbox_seg(struct hinic3_mbox *mbox, __le64 header,
+ u16 dst_func, void *seg, u32 seg_len, void *msg_info)
+{
+ struct hinic3_send_mbox *send_mbox = &mbox->send_mbox;
+ struct hinic3_hwdev *hwdev = mbox->hwdev;
+ u8 num_aeqs = hwdev->hwif->attr.num_aeqs;
+ enum mbox_msg_direction_type dir;
+ u16 dst_aeqn, wb_status, errcode;
+ int err;
+
+ /* mbox to mgmt cpu, hardware doesn't care about dst aeq id */
+ if (num_aeqs > MBOX_MSG_AEQ_FOR_MBOX) {
+ dir = MBOX_MSG_HEADER_GET(header, DIRECTION);
+ dst_aeqn = (dir == MBOX_MSG_SEND) ?
+ MBOX_MSG_AEQ_FOR_EVENT : MBOX_MSG_AEQ_FOR_MBOX;
+ } else {
+ dst_aeqn = 0;
+ }
+
+ clear_mbox_status(send_mbox);
+ mbox_copy_header(hwdev, send_mbox, &header);
+ mbox_copy_send_data(hwdev, send_mbox, seg, seg_len);
+ write_mbox_msg_attr(mbox, dst_func, dst_aeqn, seg_len);
+
+ err = hinic3_wait_for_timeout(mbox, check_mbox_wb_status,
+ MBOX_MSG_POLLING_TIMEOUT_MS,
+ USEC_PER_MSEC);
+ wb_status = get_mbox_status(send_mbox);
+ if (err) {
+ dev_err(hwdev->dev, "Send mailbox segment timeout, wb status: 0x%x\n",
+ wb_status);
+ return err;
+ }
+
+ if (!MBOX_STATUS_SUCCESS(wb_status)) {
+ dev_err(hwdev->dev,
+ "Send mailbox segment to function %u error, wb status: 0x%x\n",
+ dst_func, wb_status);
+ errcode = MBOX_STATUS_ERRCODE(wb_status);
+ return errcode ? errcode : -EFAULT;
+ }
+
+ return 0;
+}
+
+static int send_mbox_msg(struct hinic3_mbox *mbox, u8 mod, u16 cmd,
+ const void *msg, u32 msg_len, u16 dst_func,
+ enum mbox_msg_direction_type direction,
+ enum mbox_msg_ack_type ack_type,
+ struct mbox_msg_info *msg_info)
+{
+ enum mbox_msg_data_type data_type = MBOX_MSG_DATA_INLINE;
+ struct hinic3_hwdev *hwdev = mbox->hwdev;
+ struct mbox_dma_msg dma_msg;
+ u32 seg_len = MBOX_SEG_LEN;
+ __le64 header = 0;
+ u32 seq_id = 0;
+ u16 rsp_aeq_id;
+ u8 *msg_seg;
+ int err = 0;
+ u32 left;
+
+ if (hwdev->hwif->attr.num_aeqs > MBOX_MSG_AEQ_FOR_MBOX)
+ rsp_aeq_id = MBOX_MSG_AEQ_FOR_MBOX;
+ else
+ rsp_aeq_id = 0;
+
+ if (dst_func == MBOX_MGMT_FUNC_ID &&
+ !(hwdev->features[0] & MBOX_COMM_F_MBOX_SEGMENT)) {
+ err = mbox_prepare_dma_msg(mbox, ack_type, &dma_msg,
+ msg, msg_len);
+ if (err)
+ goto err_send;
+
+ msg = &dma_msg;
+ msg_len = sizeof(dma_msg);
+ data_type = MBOX_MSG_DATA_DMA;
+ }
+
+ msg_seg = (u8 *)msg;
+ left = msg_len;
+
+ header = cpu_to_le64(MBOX_MSG_HEADER_SET(msg_len, MSG_LEN) |
+ MBOX_MSG_HEADER_SET(mod, MODULE) |
+ MBOX_MSG_HEADER_SET(seg_len, SEG_LEN) |
+ MBOX_MSG_HEADER_SET(ack_type, NO_ACK) |
+ MBOX_MSG_HEADER_SET(data_type, DATA_TYPE) |
+ MBOX_MSG_HEADER_SET(MBOX_SEQ_ID_START_VAL, SEQID) |
+ MBOX_MSG_HEADER_SET(direction, DIRECTION) |
+ MBOX_MSG_HEADER_SET(cmd, CMD) |
+ MBOX_MSG_HEADER_SET(msg_info->msg_id, MSG_ID) |
+ MBOX_MSG_HEADER_SET(rsp_aeq_id, AEQ_ID) |
+ MBOX_MSG_HEADER_SET(MBOX_MSG_FROM_MBOX, SOURCE) |
+ MBOX_MSG_HEADER_SET(!!msg_info->status, STATUS));
+
+ while (!(MBOX_MSG_HEADER_GET(header, LAST))) {
+ if (left <= MBOX_SEG_LEN) {
+ header &= cpu_to_le64(~MBOX_MSG_HEADER_SEG_LEN_MASK);
+ header |=
+ cpu_to_le64(MBOX_MSG_HEADER_SET(left, SEG_LEN) |
+ MBOX_MSG_HEADER_SET(1, LAST));
+ seg_len = left;
+ }
+
+ err = send_mbox_seg(mbox, header, dst_func, msg_seg,
+ seg_len, msg_info);
+ if (err) {
+ dev_err(hwdev->dev, "Failed to send mbox seg, seq_id=0x%llx\n",
+ MBOX_MSG_HEADER_GET(header, SEQID));
+ goto err_send;
+ }
+
+ left -= MBOX_SEG_LEN;
+ msg_seg += MBOX_SEG_LEN;
+ seq_id++;
+ header &= cpu_to_le64(~MBOX_MSG_HEADER_SEG_LEN_MASK);
+ header |= cpu_to_le64(MBOX_MSG_HEADER_SET(seq_id, SEQID));
+ }
+
+err_send:
+ return err;
+}
+
+static void set_mbox_to_func_event(struct hinic3_mbox *mbox,
+ enum mbox_event_state event_flag)
+{
+ spin_lock(&mbox->mbox_lock);
+ mbox->event_flag = event_flag;
+ spin_unlock(&mbox->mbox_lock);
+}
+
+static enum hinic3_wait_return check_mbox_msg_finish(void *priv_data)
+{
+ struct hinic3_mbox *mbox = priv_data;
+
+ return (mbox->event_flag == MBOX_EVENT_SUCCESS) ?
+ HINIC3_WAIT_PROCESS_CPL : HINIC3_WAIT_PROCESS_WAITING;
+}
+
+static int wait_mbox_msg_completion(struct hinic3_mbox *mbox,
+ u32 timeout)
+{
+ u32 wait_time;
+ int err;
+
+ wait_time = (timeout != 0) ? timeout : MBOX_COMP_POLLING_TIMEOUT_MS;
+ err = hinic3_wait_for_timeout(mbox, check_mbox_msg_finish,
+ wait_time, USEC_PER_MSEC);
+ if (err) {
+ set_mbox_to_func_event(mbox, MBOX_EVENT_TIMEOUT);
+ return err;
+ }
+ set_mbox_to_func_event(mbox, MBOX_EVENT_END);
+
+ return 0;
+}
+
int hinic3_send_mbox_to_mgmt(struct hinic3_hwdev *hwdev, u8 mod, u16 cmd,
const struct mgmt_msg_params *msg_params)
{
- /* Completed by later submission due to LoC limit. */
- return -EFAULT;
+ struct hinic3_mbox *mbox = hwdev->mbox;
+ struct mbox_msg_info msg_info = {};
+ struct hinic3_msg_desc *msg_desc;
+ u32 msg_len;
+ int err;
+
+ /* expect response message */
+ msg_desc = get_mbox_msg_desc(mbox, MBOX_MSG_RESP, MBOX_MGMT_FUNC_ID);
+ mutex_lock(&mbox->mbox_send_lock);
+ msg_info.msg_id = (mbox->send_msg_id + 1) & 0xF;
+ mbox->send_msg_id = msg_info.msg_id;
+ set_mbox_to_func_event(mbox, MBOX_EVENT_START);
+
+ err = send_mbox_msg(mbox, mod, cmd, msg_params->buf_in,
+ msg_params->in_size, MBOX_MGMT_FUNC_ID,
+ MBOX_MSG_SEND, MBOX_MSG_ACK, &msg_info);
+ if (err) {
+ dev_err(hwdev->dev, "Send mailbox mod %u, cmd %u failed, msg_id: %u, err: %d\n",
+ mod, cmd, msg_info.msg_id, err);
+ set_mbox_to_func_event(mbox, MBOX_EVENT_FAIL);
+ goto err_send;
+ }
+
+ if (wait_mbox_msg_completion(mbox, msg_params->timeout_ms)) {
+ dev_err(hwdev->dev,
+ "Send mbox msg timeout, msg_id: %u\n", msg_info.msg_id);
+ err = -ETIMEDOUT;
+ goto err_send;
+ }
+
+ if (mod != msg_desc->mod || cmd != le16_to_cpu(msg_desc->cmd)) {
+ dev_err(hwdev->dev,
+ "Invalid response mbox message, mod: 0x%x, cmd: 0x%x, expect mod: 0x%x, cmd: 0x%x\n",
+ msg_desc->mod, msg_desc->cmd, mod, cmd);
+ err = -EFAULT;
+ goto err_send;
+ }
+
+ if (msg_desc->msg_info.status) {
+ err = msg_desc->msg_info.status;
+ goto err_send;
+ }
+
+ if (msg_params->buf_out) {
+ msg_len = le16_to_cpu(msg_desc->msg_len);
+ if (msg_len != msg_params->expected_out_size) {
+ dev_err(hwdev->dev,
+ "Invalid response mbox message length: %u for mod %d cmd %u, expected length: %u\n",
+ msg_desc->msg_len, mod, cmd,
+ msg_params->expected_out_size);
+ err = -EFAULT;
+ goto err_send;
+ }
+
+ memcpy(msg_params->buf_out, msg_desc->msg, msg_len);
+ }
+
+err_send:
+ mutex_unlock(&mbox->mbox_send_lock);
+
+ return err;
+}
+
+int hinic3_send_mbox_to_mgmt_no_ack(struct hinic3_hwdev *hwdev, u8 mod, u16 cmd,
+ const struct mgmt_msg_params *msg_params)
+{
+ struct hinic3_mbox *mbox = hwdev->mbox;
+ struct mbox_msg_info msg_info = {};
+ int err;
+
+ mutex_lock(&mbox->mbox_send_lock);
+ err = send_mbox_msg(mbox, mod, cmd, msg_params->buf_in,
+ msg_params->in_size, MBOX_MGMT_FUNC_ID,
+ MBOX_MSG_SEND, MBOX_MSG_NO_ACK, &msg_info);
+ if (err)
+ dev_err(hwdev->dev, "Send mailbox no ack failed\n");
+
+ mutex_unlock(&mbox->mbox_send_lock);
+
+ return err;
}
diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_mbox.h b/drivers/net/ethernet/huawei/hinic3/hinic3_mbox.h
index d7a6c37b7eff..e71629e95086 100644
--- a/drivers/net/ethernet/huawei/hinic3/hinic3_mbox.h
+++ b/drivers/net/ethernet/huawei/hinic3/hinic3_mbox.h
@@ -8,8 +8,134 @@
#include <linux/mutex.h>
struct hinic3_hwdev;
+struct mgmt_msg_params;
+
+#define MBOX_MSG_HEADER_SRC_GLB_FUNC_IDX_MASK GENMASK_ULL(12, 0)
+#define MBOX_MSG_HEADER_STATUS_MASK BIT_ULL(13)
+#define MBOX_MSG_HEADER_SOURCE_MASK BIT_ULL(15)
+#define MBOX_MSG_HEADER_AEQ_ID_MASK GENMASK_ULL(17, 16)
+#define MBOX_MSG_HEADER_MSG_ID_MASK GENMASK_ULL(21, 18)
+#define MBOX_MSG_HEADER_CMD_MASK GENMASK_ULL(31, 22)
+#define MBOX_MSG_HEADER_MSG_LEN_MASK GENMASK_ULL(42, 32)
+#define MBOX_MSG_HEADER_MODULE_MASK GENMASK_ULL(47, 43)
+#define MBOX_MSG_HEADER_SEG_LEN_MASK GENMASK_ULL(53, 48)
+#define MBOX_MSG_HEADER_NO_ACK_MASK BIT_ULL(54)
+#define MBOX_MSG_HEADER_DATA_TYPE_MASK BIT_ULL(55)
+#define MBOX_MSG_HEADER_SEQID_MASK GENMASK_ULL(61, 56)
+#define MBOX_MSG_HEADER_LAST_MASK BIT_ULL(62)
+#define MBOX_MSG_HEADER_DIRECTION_MASK BIT_ULL(63)
+
+#define MBOX_MSG_HEADER_SET(val, member) \
+ FIELD_PREP(MBOX_MSG_HEADER_##member##_MASK, val)
+#define MBOX_MSG_HEADER_GET(val, member) \
+ FIELD_GET(MBOX_MSG_HEADER_##member##_MASK, le64_to_cpu(val))
+
+/* identifies if a segment belongs to a message or to a response. A VF is only
+ * expected to send messages and receive responses. PF driver could receive
+ * messages and send responses.
+ */
+enum mbox_msg_direction_type {
+ MBOX_MSG_SEND = 0,
+ MBOX_MSG_RESP = 1,
+};
+
+/* Indicates if mbox message expects a response (ack) or not */
+enum mbox_msg_ack_type {
+ MBOX_MSG_ACK = 0,
+ MBOX_MSG_NO_ACK = 1,
+};
+
+enum mbox_msg_data_type {
+ MBOX_MSG_DATA_INLINE = 0,
+ MBOX_MSG_DATA_DMA = 1,
+};
+
+enum mbox_msg_src_type {
+ MBOX_MSG_FROM_MBOX = 1,
+};
+
+enum mbox_msg_aeq_type {
+ MBOX_MSG_AEQ_FOR_EVENT = 0,
+ MBOX_MSG_AEQ_FOR_MBOX = 1,
+};
+
+#define HINIC3_MBOX_WQ_NAME "hinic3_mbox"
+
+struct mbox_msg_info {
+ u8 msg_id;
+ u8 status;
+};
+
+struct hinic3_msg_desc {
+ u8 *msg;
+ __le16 msg_len;
+ u8 seq_id;
+ u8 mod;
+ __le16 cmd;
+ struct mbox_msg_info msg_info;
+};
+
+struct hinic3_msg_channel {
+ struct hinic3_msg_desc resp_msg;
+ struct hinic3_msg_desc recv_msg;
+};
+
+struct hinic3_send_mbox {
+ u8 __iomem *data;
+ void *wb_vaddr;
+ dma_addr_t wb_paddr;
+};
+
+enum mbox_event_state {
+ MBOX_EVENT_START = 0,
+ MBOX_EVENT_FAIL = 1,
+ MBOX_EVENT_SUCCESS = 2,
+ MBOX_EVENT_TIMEOUT = 3,
+ MBOX_EVENT_END = 4,
+};
+
+struct mbox_dma_msg {
+ __le32 xor;
+ __le32 dma_addr_high;
+ __le32 dma_addr_low;
+ __le32 msg_len;
+ __le64 rsvd;
+};
+
+struct mbox_dma_queue {
+ void *dma_buf_vaddr;
+ dma_addr_t dma_buf_paddr;
+ u16 depth;
+ u16 prod_idx;
+ u16 cons_idx;
+};
+
+struct hinic3_mbox {
+ struct hinic3_hwdev *hwdev;
+ /* lock for send mbox message and ack message */
+ struct mutex mbox_send_lock;
+ struct hinic3_send_mbox send_mbox;
+ struct mbox_dma_queue sync_msg_queue;
+ struct mbox_dma_queue async_msg_queue;
+ struct workqueue_struct *workq;
+ /* driver and MGMT CPU */
+ struct hinic3_msg_channel mgmt_msg;
+ /* VF to PF */
+ struct hinic3_msg_channel *func_msg;
+ u8 send_msg_id;
+ enum mbox_event_state event_flag;
+ /* lock for mbox event flag */
+ spinlock_t mbox_lock;
+};
+
+void hinic3_mbox_func_aeqe_handler(struct hinic3_hwdev *hwdev, u8 *header,
+ u8 size);
+int hinic3_init_mbox(struct hinic3_hwdev *hwdev);
+void hinic3_free_mbox(struct hinic3_hwdev *hwdev);
int hinic3_send_mbox_to_mgmt(struct hinic3_hwdev *hwdev, u8 mod, u16 cmd,
const struct mgmt_msg_params *msg_params);
+int hinic3_send_mbox_to_mgmt_no_ack(struct hinic3_hwdev *hwdev, u8 mod, u16 cmd,
+ const struct mgmt_msg_params *msg_params);
#endif
diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_nic_dev.h b/drivers/net/ethernet/huawei/hinic3/hinic3_nic_dev.h
index c994fc9b6ee0..9fad834f9e92 100644
--- a/drivers/net/ethernet/huawei/hinic3/hinic3_nic_dev.h
+++ b/drivers/net/ethernet/huawei/hinic3/hinic3_nic_dev.h
@@ -51,6 +51,12 @@ struct hinic3_dyna_txrxq_params {
struct hinic3_irq_cfg *irq_cfg;
};
+struct hinic3_intr_coal_info {
+ u8 pending_limit;
+ u8 coalesce_timer_cfg;
+ u8 resend_timer_cfg;
+};
+
struct hinic3_nic_dev {
struct pci_dev *pdev;
struct net_device *netdev;
@@ -70,13 +76,13 @@ struct hinic3_nic_dev {
u16 num_qp_irq;
struct msix_entry *qps_msix_entries;
+ struct hinic3_intr_coal_info *intr_coalesce;
+
bool link_status_up;
};
void hinic3_set_netdev_ops(struct net_device *netdev);
-
-/* Temporary prototypes. Functions become static in later submission. */
-void qp_add_napi(struct hinic3_irq_cfg *irq_cfg);
-void qp_del_napi(struct hinic3_irq_cfg *irq_cfg);
+int hinic3_qps_irq_init(struct net_device *netdev);
+void hinic3_qps_irq_uninit(struct net_device *netdev);
#endif
diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_tx.c b/drivers/net/ethernet/huawei/hinic3/hinic3_tx.c
index 3f7f73430be4..f1c745ee3087 100644
--- a/drivers/net/ethernet/huawei/hinic3/hinic3_tx.c
+++ b/drivers/net/ethernet/huawei/hinic3/hinic3_tx.c
@@ -55,9 +55,9 @@ void hinic3_free_txqs(struct net_device *netdev)
static void hinic3_set_buf_desc(struct hinic3_sq_bufdesc *buf_descs,
dma_addr_t addr, u32 len)
{
- buf_descs->hi_addr = upper_32_bits(addr);
- buf_descs->lo_addr = lower_32_bits(addr);
- buf_descs->len = len;
+ buf_descs->hi_addr = cpu_to_le32(upper_32_bits(addr));
+ buf_descs->lo_addr = cpu_to_le32(lower_32_bits(addr));
+ buf_descs->len = cpu_to_le32(len);
}
static int hinic3_tx_map_skb(struct net_device *netdev, struct sk_buff *skb,
diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_wq.c b/drivers/net/ethernet/huawei/hinic3/hinic3_wq.c
index 2ac7efcd1365..bc3ffdc25cf6 100644
--- a/drivers/net/ethernet/huawei/hinic3/hinic3_wq.c
+++ b/drivers/net/ethernet/huawei/hinic3/hinic3_wq.c
@@ -6,6 +6,110 @@
#include "hinic3_hwdev.h"
#include "hinic3_wq.h"
+#define WQ_MIN_DEPTH 64
+#define WQ_MAX_DEPTH 65536
+#define WQ_PAGE_ADDR_SIZE sizeof(u64)
+#define WQ_MAX_NUM_PAGES (HINIC3_MIN_PAGE_SIZE / WQ_PAGE_ADDR_SIZE)
+
+static int wq_init_wq_block(struct hinic3_hwdev *hwdev, struct hinic3_wq *wq)
+{
+ struct hinic3_queue_pages *qpages = &wq->qpages;
+ int i;
+
+ if (hinic3_wq_is_0_level_cla(wq)) {
+ wq->wq_block_paddr = qpages->pages[0].align_paddr;
+ wq->wq_block_vaddr = qpages->pages[0].align_vaddr;
+
+ return 0;
+ }
+
+ if (wq->qpages.num_pages > WQ_MAX_NUM_PAGES) {
+ dev_err(hwdev->dev, "wq num_pages exceed limit: %lu\n",
+ WQ_MAX_NUM_PAGES);
+ return -EFAULT;
+ }
+
+ wq->wq_block_vaddr = dma_alloc_coherent(hwdev->dev,
+ HINIC3_MIN_PAGE_SIZE,
+ &wq->wq_block_paddr,
+ GFP_KERNEL);
+ if (!wq->wq_block_vaddr)
+ return -ENOMEM;
+
+ for (i = 0; i < qpages->num_pages; i++)
+ wq->wq_block_vaddr[i] = cpu_to_be64(qpages->pages[i].align_paddr);
+
+ return 0;
+}
+
+static int wq_alloc_pages(struct hinic3_hwdev *hwdev, struct hinic3_wq *wq)
+{
+ int err;
+
+ err = hinic3_queue_pages_alloc(hwdev, &wq->qpages, 0);
+ if (err)
+ return err;
+
+ err = wq_init_wq_block(hwdev, wq);
+ if (err) {
+ hinic3_queue_pages_free(hwdev, &wq->qpages);
+ return err;
+ }
+
+ return 0;
+}
+
+static void wq_free_pages(struct hinic3_hwdev *hwdev, struct hinic3_wq *wq)
+{
+ if (!hinic3_wq_is_0_level_cla(wq))
+ dma_free_coherent(hwdev->dev,
+ HINIC3_MIN_PAGE_SIZE,
+ wq->wq_block_vaddr,
+ wq->wq_block_paddr);
+
+ hinic3_queue_pages_free(hwdev, &wq->qpages);
+}
+
+int hinic3_wq_create(struct hinic3_hwdev *hwdev, struct hinic3_wq *wq,
+ u32 q_depth, u16 wqebb_size)
+{
+ u32 wq_page_size;
+
+ if (q_depth < WQ_MIN_DEPTH || q_depth > WQ_MAX_DEPTH ||
+ !is_power_of_2(q_depth) || !is_power_of_2(wqebb_size)) {
+ dev_err(hwdev->dev, "Invalid WQ: q_depth %u, wqebb_size %u\n",
+ q_depth, wqebb_size);
+ return -EINVAL;
+ }
+
+ wq_page_size = ALIGN(hwdev->wq_page_size, HINIC3_MIN_PAGE_SIZE);
+
+ memset(wq, 0, sizeof(*wq));
+ wq->q_depth = q_depth;
+ wq->idx_mask = q_depth - 1;
+
+ hinic3_queue_pages_init(&wq->qpages, q_depth, wq_page_size, wqebb_size);
+
+ return wq_alloc_pages(hwdev, wq);
+}
+
+void hinic3_wq_destroy(struct hinic3_hwdev *hwdev, struct hinic3_wq *wq)
+{
+ wq_free_pages(hwdev, wq);
+}
+
+void hinic3_wq_reset(struct hinic3_wq *wq)
+{
+ struct hinic3_queue_pages *qpages = &wq->qpages;
+ u16 pg_idx;
+
+ wq->cons_idx = 0;
+ wq->prod_idx = 0;
+
+ for (pg_idx = 0; pg_idx < qpages->num_pages; pg_idx++)
+ memset(qpages->pages[pg_idx].align_vaddr, 0, qpages->page_size);
+}
+
void hinic3_wq_get_multi_wqebbs(struct hinic3_wq *wq,
u16 num_wqebbs, u16 *prod_idx,
struct hinic3_sq_bufdesc **first_part_wqebbs,
@@ -27,3 +131,8 @@ void hinic3_wq_get_multi_wqebbs(struct hinic3_wq *wq,
*second_part_wqebbs = get_q_element(&wq->qpages, idx, NULL);
}
}
+
+bool hinic3_wq_is_0_level_cla(const struct hinic3_wq *wq)
+{
+ return wq->qpages.num_pages == 1;
+}
diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_wq.h b/drivers/net/ethernet/huawei/hinic3/hinic3_wq.h
index ab37893efd7e..9b3f012bec80 100644
--- a/drivers/net/ethernet/huawei/hinic3/hinic3_wq.h
+++ b/drivers/net/ethernet/huawei/hinic3/hinic3_wq.h
@@ -10,10 +10,10 @@
struct hinic3_sq_bufdesc {
/* 31-bits Length, L2NIC only uses length[17:0] */
- u32 len;
- u32 rsvd;
- u32 hi_addr;
- u32 lo_addr;
+ __le32 len;
+ __le32 rsvd;
+ __le32 hi_addr;
+ __le32 lo_addr;
};
/* Work queue is used to submit elements (tx, rx, cmd) to hw.
@@ -59,6 +59,7 @@ static inline void *hinic3_wq_get_one_wqebb(struct hinic3_wq *wq, u16 *pi)
{
*pi = wq->prod_idx & wq->idx_mask;
wq->prod_idx++;
+
return get_q_element(&wq->qpages, *pi, NULL);
}
@@ -67,10 +68,20 @@ static inline void hinic3_wq_put_wqebbs(struct hinic3_wq *wq, u16 num_wqebbs)
wq->cons_idx += num_wqebbs;
}
+static inline u64 hinic3_wq_get_first_wqe_page_addr(const struct hinic3_wq *wq)
+{
+ return wq->qpages.pages[0].align_paddr;
+}
+
+int hinic3_wq_create(struct hinic3_hwdev *hwdev, struct hinic3_wq *wq,
+ u32 q_depth, u16 wqebb_size);
+void hinic3_wq_destroy(struct hinic3_hwdev *hwdev, struct hinic3_wq *wq);
+void hinic3_wq_reset(struct hinic3_wq *wq);
void hinic3_wq_get_multi_wqebbs(struct hinic3_wq *wq,
u16 num_wqebbs, u16 *prod_idx,
struct hinic3_sq_bufdesc **first_part_wqebbs,
struct hinic3_sq_bufdesc **second_part_wqebbs,
u16 *first_part_wqebbs_num);
+bool hinic3_wq_is_0_level_cla(const struct hinic3_wq *wq);
#endif
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index eec971567aac..3808148c1fc7 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -756,6 +756,17 @@ static void deactivate_rx_pools(struct ibmvnic_adapter *adapter)
adapter->rx_pool[i].active = 0;
}
+static void ibmvnic_set_safe_max_ind_descs(struct ibmvnic_adapter *adapter)
+{
+ if (adapter->cur_max_ind_descs > IBMVNIC_SAFE_IND_DESC) {
+ netdev_info(adapter->netdev,
+ "set max ind descs from %u to safe limit %u\n",
+ adapter->cur_max_ind_descs,
+ IBMVNIC_SAFE_IND_DESC);
+ adapter->cur_max_ind_descs = IBMVNIC_SAFE_IND_DESC;
+ }
+}
+
static void replenish_rx_pool(struct ibmvnic_adapter *adapter,
struct ibmvnic_rx_pool *pool)
{
@@ -843,7 +854,7 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter,
sub_crq->rx_add.len = cpu_to_be32(pool->buff_size << shift);
/* if send_subcrq_indirect queue is full, flush to VIOS */
- if (ind_bufp->index == IBMVNIC_MAX_IND_DESCS ||
+ if (ind_bufp->index == adapter->cur_max_ind_descs ||
i == count - 1) {
lpar_rc =
send_subcrq_indirect(adapter, handle,
@@ -862,6 +873,14 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter,
failure:
if (lpar_rc != H_PARAMETER && lpar_rc != H_CLOSED)
dev_err_ratelimited(dev, "rx: replenish packet buffer failed\n");
+
+ /* Detect platform limit H_PARAMETER */
+ if (lpar_rc == H_PARAMETER)
+ ibmvnic_set_safe_max_ind_descs(adapter);
+
+ /* For all error case, temporarily drop only this batch
+ * Rely on TCP/IP retransmissions to retry and recover
+ */
for (i = ind_bufp->index - 1; i >= 0; --i) {
struct ibmvnic_rx_buff *rx_buff;
@@ -2381,16 +2400,28 @@ static int ibmvnic_tx_scrq_flush(struct ibmvnic_adapter *adapter,
rc = send_subcrq_direct(adapter, handle,
(u64 *)ind_bufp->indir_arr);
- if (rc)
+ if (rc) {
+ dev_err_ratelimited(&adapter->vdev->dev,
+ "tx_flush failed, rc=%u (%llu entries dma=%pad handle=%llx)\n",
+ rc, entries, &dma_addr, handle);
+ /* Detect platform limit H_PARAMETER */
+ if (rc == H_PARAMETER)
+ ibmvnic_set_safe_max_ind_descs(adapter);
+
+ /* For all error case, temporarily drop only this batch
+ * Rely on TCP/IP retransmissions to retry and recover
+ */
ibmvnic_tx_scrq_clean_buffer(adapter, tx_scrq);
- else
+ } else {
ind_bufp->index = 0;
+ }
return rc;
}
static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
{
struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+ u32 cur_max_ind_descs = adapter->cur_max_ind_descs;
int queue_num = skb_get_queue_mapping(skb);
u8 *hdrs = (u8 *)&adapter->tx_rx_desc_req;
struct device *dev = &adapter->vdev->dev;
@@ -2590,7 +2621,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
tx_crq.v1.n_crq_elem = num_entries;
tx_buff->num_entries = num_entries;
/* flush buffer if current entry can not fit */
- if (num_entries + ind_bufp->index > IBMVNIC_MAX_IND_DESCS) {
+ if (num_entries + ind_bufp->index > cur_max_ind_descs) {
lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, true);
if (lpar_rc != H_SUCCESS)
goto tx_flush_err;
@@ -2603,7 +2634,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
ind_bufp->index += num_entries;
if (__netdev_tx_sent_queue(txq, skb->len,
netdev_xmit_more() &&
- ind_bufp->index < IBMVNIC_MAX_IND_DESCS)) {
+ ind_bufp->index < cur_max_ind_descs)) {
lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, true);
if (lpar_rc != H_SUCCESS)
goto tx_err;
@@ -4006,7 +4037,7 @@ static void release_sub_crq_queue(struct ibmvnic_adapter *adapter,
}
dma_free_coherent(dev,
- IBMVNIC_IND_ARR_SZ,
+ IBMVNIC_IND_MAX_ARR_SZ,
scrq->ind_buf.indir_arr,
scrq->ind_buf.indir_dma);
@@ -4063,7 +4094,7 @@ static struct ibmvnic_sub_crq_queue *init_sub_crq_queue(struct ibmvnic_adapter
scrq->ind_buf.indir_arr =
dma_alloc_coherent(dev,
- IBMVNIC_IND_ARR_SZ,
+ IBMVNIC_IND_MAX_ARR_SZ,
&scrq->ind_buf.indir_dma,
GFP_KERNEL);
@@ -6369,6 +6400,19 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter, bool reset)
rc = reset_sub_crq_queues(adapter);
}
} else {
+ if (adapter->reset_reason == VNIC_RESET_MOBILITY) {
+ /* After an LPM, reset the max number of indirect
+ * subcrq descriptors per H_SEND_SUB_CRQ_INDIRECT
+ * hcall to the default max (e.g POWER8 -> POWER10)
+ *
+ * If the new destination platform does not support
+ * the higher limit max (e.g. POWER10-> POWER8 LPM)
+ * H_PARAMETER will trigger automatic fallback to the
+ * safe minimum limit.
+ */
+ adapter->cur_max_ind_descs = IBMVNIC_MAX_IND_DESCS;
+ }
+
rc = init_sub_crqs(adapter);
}
@@ -6520,6 +6564,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
adapter->wait_for_reset = false;
adapter->last_reset_time = jiffies;
+ adapter->cur_max_ind_descs = IBMVNIC_MAX_IND_DESCS;
rc = register_netdev(netdev);
if (rc) {
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index 246ddce753f9..480dc587078f 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -29,8 +29,9 @@
#define IBMVNIC_BUFFS_PER_POOL 100
#define IBMVNIC_MAX_QUEUES 16
#define IBMVNIC_MAX_QUEUE_SZ 4096
-#define IBMVNIC_MAX_IND_DESCS 16
-#define IBMVNIC_IND_ARR_SZ (IBMVNIC_MAX_IND_DESCS * 32)
+#define IBMVNIC_MAX_IND_DESCS 128
+#define IBMVNIC_SAFE_IND_DESC 16
+#define IBMVNIC_IND_MAX_ARR_SZ (IBMVNIC_MAX_IND_DESCS * 32)
#define IBMVNIC_TSO_BUF_SZ 65536
#define IBMVNIC_TSO_BUFS 64
@@ -930,6 +931,7 @@ struct ibmvnic_adapter {
struct ibmvnic_control_ip_offload_buffer ip_offload_ctrl;
dma_addr_t ip_offload_ctrl_tok;
u32 msg_enable;
+ u32 cur_max_ind_descs;
/* Vital Product Data (VPD) */
struct ibmvnic_vpd *vpd;
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
index 1954a04460d1..bf2029144c1d 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
@@ -560,7 +560,7 @@ static int fm10k_set_ringparam(struct net_device *netdev,
/* allocate temporary buffer to store rings in */
i = max_t(int, interface->num_tx_queues, interface->num_rx_queues);
- temp_ring = vmalloc(array_size(i, sizeof(struct fm10k_ring)));
+ temp_ring = vmalloc_array(i, sizeof(struct fm10k_ring));
if (!temp_ring) {
err = -ENOMEM;
diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
index d0f9c9492363..eac45d7c0cf1 100644
--- a/drivers/net/ethernet/intel/ice/Makefile
+++ b/drivers/net/ethernet/intel/ice/Makefile
@@ -47,9 +47,11 @@ ice-y := ice_main.o \
ice_adapter.o
ice-$(CONFIG_PCI_IOV) += \
ice_sriov.o \
- ice_virtchnl.o \
- ice_virtchnl_allowlist.o \
- ice_virtchnl_fdir.o \
+ virt/allowlist.o \
+ virt/fdir.o \
+ virt/queues.o \
+ virt/virtchnl.o \
+ virt/rss.o \
ice_vf_mbx.o \
ice_vf_vsi_vlan_ops.o \
ice_vf_lib.o
diff --git a/drivers/net/ethernet/intel/ice/devlink/health.c b/drivers/net/ethernet/intel/ice/devlink/health.c
index ab519c0f28bf..8e9a8a8178d4 100644
--- a/drivers/net/ethernet/intel/ice/devlink/health.c
+++ b/drivers/net/ethernet/intel/ice/devlink/health.c
@@ -450,9 +450,8 @@ ice_init_devlink_rep(struct ice_pf *pf,
{
struct devlink *devlink = priv_to_devlink(pf);
struct devlink_health_reporter *rep;
- const u64 graceful_period = 0;
- rep = devl_health_reporter_create(devlink, ops, graceful_period, pf);
+ rep = devl_health_reporter_create(devlink, ops, pf);
if (IS_ERR(rep)) {
struct device *dev = ice_pf_to_dev(pf);
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 8a8a01a4bb40..e952d67388bf 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -203,6 +203,7 @@ enum ice_feature {
ICE_F_GCS,
ICE_F_ROCE_LAG,
ICE_F_SRIOV_LAG,
+ ICE_F_SRIOV_AA_LAG,
ICE_F_MBX_LIMIT,
ICE_F_MAX
};
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index 3bd3ea3af888..caae1780fd37 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -2060,6 +2060,10 @@ struct ice_aqc_cfg_txqs {
#define ICE_AQC_Q_CFG_SRC_PRT_M 0x7
#define ICE_AQC_Q_CFG_DST_PRT_S 3
#define ICE_AQC_Q_CFG_DST_PRT_M (0x7 << ICE_AQC_Q_CFG_DST_PRT_S)
+#define ICE_AQC_Q_CFG_MODE_M GENMASK(7, 6)
+#define ICE_AQC_Q_CFG_MODE_SAME_PF 0x0
+#define ICE_AQC_Q_CFG_MODE_GIVE_OWN 0x1
+#define ICE_AQC_Q_CFG_MODE_KEEP_OWN 0x2
u8 time_out;
#define ICE_AQC_Q_CFG_TIMEOUT_S 2
#define ICE_AQC_Q_CFG_TIMEOUT_M (0x1F << ICE_AQC_Q_CFG_TIMEOUT_S)
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 003d60a4db21..808870539667 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -2418,12 +2418,15 @@ ice_parse_common_caps(struct ice_hw *hw, struct ice_hw_common_caps *caps,
caps->reset_restrict_support);
break;
case LIBIE_AQC_CAPS_FW_LAG_SUPPORT:
- caps->roce_lag = !!(number & LIBIE_AQC_BIT_ROCEV2_LAG);
+ caps->roce_lag = number & LIBIE_AQC_BIT_ROCEV2_LAG;
ice_debug(hw, ICE_DBG_INIT, "%s: roce_lag = %u\n",
prefix, caps->roce_lag);
- caps->sriov_lag = !!(number & LIBIE_AQC_BIT_SRIOV_LAG);
+ caps->sriov_lag = number & LIBIE_AQC_BIT_SRIOV_LAG;
ice_debug(hw, ICE_DBG_INIT, "%s: sriov_lag = %u\n",
prefix, caps->sriov_lag);
+ caps->sriov_aa_lag = number & LIBIE_AQC_BIT_SRIOV_AA_LAG;
+ ice_debug(hw, ICE_DBG_INIT, "%s: sriov_aa_lag = %u\n",
+ prefix, caps->sriov_aa_lag);
break;
case LIBIE_AQC_CAPS_TX_SCHED_TOPO_COMP_MODE:
caps->tx_sched_topo_comp_mode_en = (number == 1);
@@ -4712,24 +4715,24 @@ do_aq:
}
/**
- * ice_aq_cfg_lan_txq
+ * ice_aq_cfg_lan_txq - send AQ command 0x0C32 to FW
* @hw: pointer to the hardware structure
* @buf: buffer for command
* @buf_size: size of buffer in bytes
* @num_qs: number of queues being configured
* @oldport: origination lport
* @newport: destination lport
+ * @mode: cmd_type for move to use
* @cd: pointer to command details structure or NULL
*
* Move/Configure LAN Tx queue (0x0C32)
*
- * There is a better AQ command to use for moving nodes, so only coding
- * this one for configuring the node.
+ * Return: Zero on success, associated error code on failure.
*/
int
ice_aq_cfg_lan_txq(struct ice_hw *hw, struct ice_aqc_cfg_txqs_buf *buf,
u16 buf_size, u16 num_qs, u8 oldport, u8 newport,
- struct ice_sq_cd *cd)
+ u8 mode, struct ice_sq_cd *cd)
{
struct ice_aqc_cfg_txqs *cmd;
struct libie_aq_desc desc;
@@ -4742,10 +4745,12 @@ ice_aq_cfg_lan_txq(struct ice_hw *hw, struct ice_aqc_cfg_txqs_buf *buf,
if (!buf)
return -EINVAL;
- cmd->cmd_type = ICE_AQC_Q_CFG_TC_CHNG;
+ cmd->cmd_type = mode;
cmd->num_qs = num_qs;
cmd->port_num_chng = (oldport & ICE_AQC_Q_CFG_SRC_PRT_M);
cmd->port_num_chng |= FIELD_PREP(ICE_AQC_Q_CFG_DST_PRT_M, newport);
+ cmd->port_num_chng |= FIELD_PREP(ICE_AQC_Q_CFG_MODE_M,
+ ICE_AQC_Q_CFG_MODE_KEEP_OWN);
cmd->time_out = FIELD_PREP(ICE_AQC_Q_CFG_TIMEOUT_M, 5);
cmd->blocked_cgds = 0;
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
index 60320cdf7804..dba15ad315a6 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -270,7 +270,7 @@ ice_ena_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 q_handle,
int
ice_aq_cfg_lan_txq(struct ice_hw *hw, struct ice_aqc_cfg_txqs_buf *buf,
u16 buf_size, u16 num_qs, u8 oldport, u8 newport,
- struct ice_sq_cd *cd);
+ u8 mode, struct ice_sq_cd *cd);
int ice_replay_vsi(struct ice_hw *hw, u16 vsi_handle);
void ice_replay_post(struct ice_hw *hw);
struct ice_q_ctx *
diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c
index b1129da72139..80312e1dcf7f 100644
--- a/drivers/net/ethernet/intel/ice/ice_lag.c
+++ b/drivers/net/ethernet/intel/ice/ice_lag.c
@@ -10,12 +10,17 @@
#define ICE_LAG_RES_SHARED BIT(14)
#define ICE_LAG_RES_VALID BIT(15)
-#define LACP_TRAIN_PKT_LEN 16
-static const u8 lacp_train_pkt[LACP_TRAIN_PKT_LEN] = { 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0,
- 0x88, 0x09, 0, 0 };
+#define ICE_TRAIN_PKT_LEN 16
+static const u8 lacp_train_pkt[ICE_TRAIN_PKT_LEN] = { 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0,
+ 0x88, 0x09, 0, 0 };
+static const u8 act_act_train_pkt[ICE_TRAIN_PKT_LEN] = { 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0 };
#define ICE_RECIPE_LEN 64
+#define ICE_LAG_SRIOV_CP_RECIPE 10
+
static const u8 ice_dflt_vsi_rcp[ICE_RECIPE_LEN] = {
0x05, 0, 0, 0, 0x20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0x85, 0, 0x01, 0, 0, 0, 0xff, 0xff, 0x08, 0, 0, 0, 0, 0, 0, 0,
@@ -46,10 +51,10 @@ static void ice_lag_set_primary(struct ice_lag *lag)
}
/**
- * ice_lag_set_backup - set PF LAG state to Backup
+ * ice_lag_set_bkup - set PF LAG state to Backup
* @lag: LAG info struct
*/
-static void ice_lag_set_backup(struct ice_lag *lag)
+static void ice_lag_set_bkup(struct ice_lag *lag)
{
struct ice_pf *pf = lag->pf;
@@ -99,6 +104,28 @@ static bool netif_is_same_ice(struct ice_pf *pf, struct net_device *netdev)
}
/**
+ * ice_lag_config_eswitch - configure eswitch to work with LAG
+ * @lag: lag info struct
+ * @netdev: active network interface device struct
+ *
+ * Updates all port representors in eswitch to use @netdev for Tx.
+ *
+ * Configures the netdev to keep dst metadata (also used in representor Tx).
+ * This is required for an uplink without switchdev mode configured.
+ */
+static void ice_lag_config_eswitch(struct ice_lag *lag,
+ struct net_device *netdev)
+{
+ struct ice_repr *repr;
+ unsigned long id;
+
+ xa_for_each(&lag->pf->eswitch.reprs, id, repr)
+ repr->dst->u.port_info.lower_dev = netdev;
+
+ netif_keep_dst(netdev);
+}
+
+/**
* ice_netdev_to_lag - return pointer to associated lag struct from netdev
* @netdev: pointer to net_device struct to query
*/
@@ -210,13 +237,12 @@ ice_lag_cfg_fltr(struct ice_lag *lag, u32 act, u16 recipe_id, u16 *rule_idx,
u8 direction, bool add)
{
struct ice_sw_rule_lkup_rx_tx *s_rule;
+ struct ice_hw *hw = &lag->pf->hw;
u16 s_rule_sz, vsi_num;
- struct ice_hw *hw;
u8 *eth_hdr;
u32 opc;
int err;
- hw = &lag->pf->hw;
vsi_num = ice_get_hw_vsi_num(hw, 0);
s_rule_sz = ICE_SW_RULE_RX_TX_ETH_HDR_SIZE(s_rule);
@@ -314,26 +340,15 @@ ice_lag_cfg_drop_fltr(struct ice_lag *lag, bool add)
}
/**
- * ice_lag_cfg_pf_fltrs - set filters up for new active port
+ * ice_lag_cfg_pf_fltrs_act_bkup - set filters up for new active port
* @lag: local interfaces lag struct
- * @ptr: opaque data containing notifier event
+ * @bonding_info: netdev event bonding info
*/
static void
-ice_lag_cfg_pf_fltrs(struct ice_lag *lag, void *ptr)
+ice_lag_cfg_pf_fltrs_act_bkup(struct ice_lag *lag,
+ struct netdev_bonding_info *bonding_info)
{
- struct netdev_notifier_bonding_info *info;
- struct netdev_bonding_info *bonding_info;
- struct net_device *event_netdev;
- struct device *dev;
-
- event_netdev = netdev_notifier_info_to_dev(ptr);
- /* not for this netdev */
- if (event_netdev != lag->netdev)
- return;
-
- info = (struct netdev_notifier_bonding_info *)ptr;
- bonding_info = &info->bonding_info;
- dev = ice_pf_to_dev(lag->pf);
+ struct device *dev = ice_pf_to_dev(lag->pf);
/* interface not active - remove old default VSI rule */
if (bonding_info->slave.state && lag->pf_rx_rule_id) {
@@ -354,6 +369,105 @@ ice_lag_cfg_pf_fltrs(struct ice_lag *lag, void *ptr)
}
/**
+ * ice_lag_cfg_lp_fltr - configure lport filters
+ * @lag: local interface's lag struct
+ * @add: add or remove rule
+ * @cp: control packet only or general PF lport rule
+ */
+static void
+ice_lag_cfg_lp_fltr(struct ice_lag *lag, bool add, bool cp)
+{
+ struct ice_sw_rule_lkup_rx_tx *s_rule;
+ struct ice_vsi *vsi = lag->pf->vsi[0];
+ u16 buf_len, opc;
+
+ buf_len = ICE_SW_RULE_RX_TX_HDR_SIZE(s_rule, ICE_TRAIN_PKT_LEN);
+ s_rule = kzalloc(buf_len, GFP_KERNEL);
+ if (!s_rule) {
+ netdev_warn(lag->netdev, "-ENOMEM error configuring CP filter\n");
+ return;
+ }
+
+ if (add) {
+ if (cp) {
+ s_rule->recipe_id =
+ cpu_to_le16(ICE_LAG_SRIOV_CP_RECIPE);
+ memcpy(s_rule->hdr_data, lacp_train_pkt,
+ ICE_TRAIN_PKT_LEN);
+ } else {
+ s_rule->recipe_id = cpu_to_le16(lag->act_act_recipe);
+ memcpy(s_rule->hdr_data, act_act_train_pkt,
+ ICE_TRAIN_PKT_LEN);
+ }
+
+ s_rule->src = cpu_to_le16(vsi->port_info->lport);
+ s_rule->act = cpu_to_le32(ICE_FWD_TO_VSI |
+ ICE_SINGLE_ACT_LAN_ENABLE |
+ ICE_SINGLE_ACT_VALID_BIT |
+ FIELD_PREP(ICE_SINGLE_ACT_VSI_ID_M,
+ vsi->vsi_num));
+ s_rule->hdr_len = cpu_to_le16(ICE_TRAIN_PKT_LEN);
+ s_rule->hdr.type = cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_RX);
+ opc = ice_aqc_opc_add_sw_rules;
+ } else {
+ opc = ice_aqc_opc_remove_sw_rules;
+ if (cp)
+ s_rule->index = cpu_to_le16(lag->cp_rule_idx);
+ else
+ s_rule->index = cpu_to_le16(lag->act_act_rule_idx);
+ }
+ if (ice_aq_sw_rules(&lag->pf->hw, s_rule, buf_len, 1, opc, NULL)) {
+ netdev_warn(lag->netdev, "Error %s %s rule for aggregate\n",
+ add ? "ADDING" : "REMOVING",
+ cp ? "CONTROL PACKET" : "LPORT");
+ goto err_cp_free;
+ }
+
+ if (add) {
+ if (cp)
+ lag->cp_rule_idx = le16_to_cpu(s_rule->index);
+ else
+ lag->act_act_rule_idx = le16_to_cpu(s_rule->index);
+ } else {
+ if (cp)
+ lag->cp_rule_idx = 0;
+ else
+ lag->act_act_rule_idx = 0;
+ }
+
+err_cp_free:
+ kfree(s_rule);
+}
+
+/**
+ * ice_lag_cfg_pf_fltrs - set filters up for PF traffic
+ * @lag: local interfaces lag struct
+ * @ptr: opaque data containing notifier event
+ */
+static void
+ice_lag_cfg_pf_fltrs(struct ice_lag *lag, void *ptr)
+{
+ struct netdev_notifier_bonding_info *info = ptr;
+ struct netdev_bonding_info *bonding_info;
+ struct net_device *event_netdev;
+
+ event_netdev = netdev_notifier_info_to_dev(ptr);
+ if (event_netdev != lag->netdev)
+ return;
+
+ bonding_info = &info->bonding_info;
+
+ if (lag->bond_aa) {
+ if (lag->need_fltr_cfg) {
+ ice_lag_cfg_lp_fltr(lag, true, false);
+ lag->need_fltr_cfg = false;
+ }
+ } else {
+ ice_lag_cfg_pf_fltrs_act_bkup(lag, bonding_info);
+ }
+}
+
+/**
* ice_display_lag_info - print LAG info
* @lag: LAG info struct
*/
@@ -402,12 +516,11 @@ static u16
ice_lag_qbuf_recfg(struct ice_hw *hw, struct ice_aqc_cfg_txqs_buf *qbuf,
u16 vsi_num, u16 numq, u8 tc)
{
+ struct ice_pf *pf = hw->back;
struct ice_q_ctx *q_ctx;
u16 qid, count = 0;
- struct ice_pf *pf;
int i;
- pf = hw->back;
for (i = 0; i < numq; i++) {
q_ctx = ice_get_lan_q_ctx(hw, vsi_num, tc, i);
if (!q_ctx) {
@@ -577,7 +690,7 @@ ice_lag_move_vf_node_tc(struct ice_lag *lag, u8 oldport, u8 newport,
}
if (ice_aq_cfg_lan_txq(&lag->pf->hw, qbuf, qbuf_size, valq, oldport,
- newport, NULL)) {
+ newport, ICE_AQC_Q_CFG_TC_CHNG, NULL)) {
dev_warn(dev, "Failure to configure queues for LAG failover\n");
goto qbuf_err;
}
@@ -713,10 +826,17 @@ void ice_lag_move_new_vf_nodes(struct ice_vf *vf)
if (lag->upper_netdev)
ice_lag_build_netdev_list(lag, &ndlist);
- if (ice_is_feature_supported(pf, ICE_F_SRIOV_LAG) &&
- lag->bonded && lag->primary && pri_port != act_port &&
- !list_empty(lag->netdev_head))
- ice_lag_move_single_vf_nodes(lag, pri_port, act_port, vsi->idx);
+ if (lag->bonded && lag->primary && !list_empty(lag->netdev_head)) {
+ if (lag->bond_aa &&
+ ice_is_feature_supported(pf, ICE_F_SRIOV_AA_LAG))
+ ice_lag_aa_failover(lag, ICE_LAGS_IDX, NULL);
+
+ if (!lag->bond_aa &&
+ ice_is_feature_supported(pf, ICE_F_SRIOV_LAG) &&
+ pri_port != act_port)
+ ice_lag_move_single_vf_nodes(lag, pri_port, act_port,
+ vsi->idx);
+ }
ice_lag_destroy_netdev_list(lag, &ndlist);
@@ -767,61 +887,6 @@ void ice_lag_move_vf_nodes_cfg(struct ice_lag *lag, u8 src_prt, u8 dst_prt)
ice_lag_destroy_netdev_list(lag, &ndlist);
}
-#define ICE_LAG_SRIOV_CP_RECIPE 10
-#define ICE_LAG_SRIOV_TRAIN_PKT_LEN 16
-
-/**
- * ice_lag_cfg_cp_fltr - configure filter for control packets
- * @lag: local interface's lag struct
- * @add: add or remove rule
- */
-static void
-ice_lag_cfg_cp_fltr(struct ice_lag *lag, bool add)
-{
- struct ice_sw_rule_lkup_rx_tx *s_rule = NULL;
- struct ice_vsi *vsi;
- u16 buf_len, opc;
-
- vsi = lag->pf->vsi[0];
-
- buf_len = ICE_SW_RULE_RX_TX_HDR_SIZE(s_rule,
- ICE_LAG_SRIOV_TRAIN_PKT_LEN);
- s_rule = kzalloc(buf_len, GFP_KERNEL);
- if (!s_rule) {
- netdev_warn(lag->netdev, "-ENOMEM error configuring CP filter\n");
- return;
- }
-
- if (add) {
- s_rule->hdr.type = cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_RX);
- s_rule->recipe_id = cpu_to_le16(ICE_LAG_SRIOV_CP_RECIPE);
- s_rule->src = cpu_to_le16(vsi->port_info->lport);
- s_rule->act = cpu_to_le32(ICE_FWD_TO_VSI |
- ICE_SINGLE_ACT_LAN_ENABLE |
- ICE_SINGLE_ACT_VALID_BIT |
- FIELD_PREP(ICE_SINGLE_ACT_VSI_ID_M, vsi->vsi_num));
- s_rule->hdr_len = cpu_to_le16(ICE_LAG_SRIOV_TRAIN_PKT_LEN);
- memcpy(s_rule->hdr_data, lacp_train_pkt, LACP_TRAIN_PKT_LEN);
- opc = ice_aqc_opc_add_sw_rules;
- } else {
- opc = ice_aqc_opc_remove_sw_rules;
- s_rule->index = cpu_to_le16(lag->cp_rule_idx);
- }
- if (ice_aq_sw_rules(&lag->pf->hw, s_rule, buf_len, 1, opc, NULL)) {
- netdev_warn(lag->netdev, "Error %s CP rule for fail-over\n",
- add ? "ADDING" : "REMOVING");
- goto cp_free;
- }
-
- if (add)
- lag->cp_rule_idx = le16_to_cpu(s_rule->index);
- else
- lag->cp_rule_idx = 0;
-
-cp_free:
- kfree(s_rule);
-}
-
/**
* ice_lag_prepare_vf_reset - helper to adjust vf lag for reset
* @lag: lag struct for interface that owns VF
@@ -835,11 +900,20 @@ u8 ice_lag_prepare_vf_reset(struct ice_lag *lag)
u8 pri_prt, act_prt;
if (lag && lag->bonded && lag->primary && lag->upper_netdev) {
- pri_prt = lag->pf->hw.port_info->lport;
- act_prt = lag->active_port;
- if (act_prt != pri_prt && act_prt != ICE_LAG_INVALID_PORT) {
- ice_lag_move_vf_nodes_cfg(lag, act_prt, pri_prt);
- return act_prt;
+ if (!lag->bond_aa) {
+ pri_prt = lag->pf->hw.port_info->lport;
+ act_prt = lag->active_port;
+ if (act_prt != pri_prt &&
+ act_prt != ICE_LAG_INVALID_PORT) {
+ ice_lag_move_vf_nodes_cfg(lag, act_prt, pri_prt);
+ return act_prt;
+ }
+ } else {
+ if (lag->port_bitmap & ICE_LAGS_M) {
+ lag->port_bitmap &= ~ICE_LAGS_M;
+ ice_lag_aa_failover(lag, ICE_LAGP_IDX, NULL);
+ lag->port_bitmap |= ICE_LAGS_M;
+ }
}
}
@@ -857,10 +931,15 @@ void ice_lag_complete_vf_reset(struct ice_lag *lag, u8 act_prt)
{
u8 pri_prt;
- if (lag && lag->bonded && lag->primary &&
- act_prt != ICE_LAG_INVALID_PORT) {
- pri_prt = lag->pf->hw.port_info->lport;
- ice_lag_move_vf_nodes_cfg(lag, pri_prt, act_prt);
+ if (lag && lag->bonded && lag->primary) {
+ if (!lag->bond_aa) {
+ pri_prt = lag->pf->hw.port_info->lport;
+ if (act_prt != ICE_LAG_INVALID_PORT)
+ ice_lag_move_vf_nodes_cfg(lag, pri_prt,
+ act_prt);
+ } else {
+ ice_lag_aa_failover(lag, ICE_LAGS_IDX, NULL);
+ }
}
}
@@ -873,13 +952,12 @@ void ice_lag_complete_vf_reset(struct ice_lag *lag, u8 act_prt)
*/
static void ice_lag_info_event(struct ice_lag *lag, void *ptr)
{
- struct netdev_notifier_bonding_info *info;
+ struct netdev_notifier_bonding_info *info = ptr;
struct netdev_bonding_info *bonding_info;
struct net_device *event_netdev;
const char *lag_netdev_name;
event_netdev = netdev_notifier_info_to_dev(ptr);
- info = ptr;
lag_netdev_name = netdev_name(lag->netdev);
bonding_info = &info->bonding_info;
@@ -897,7 +975,7 @@ static void ice_lag_info_event(struct ice_lag *lag, void *ptr)
}
if (bonding_info->slave.state)
- ice_lag_set_backup(lag);
+ ice_lag_set_bkup(lag);
else
ice_lag_set_primary(lag);
@@ -906,6 +984,295 @@ lag_out:
}
/**
+ * ice_lag_aa_qbuf_recfg - fill a single queue buffer for recfg cmd
+ * @hw: HW struct that contains the queue context
+ * @qbuf: pointer to single queue buffer
+ * @vsi_num: index of the VF VSI in PF space
+ * @qnum: queue index
+ *
+ * Return: Zero on success, error code on failure.
+ */
+static int
+ice_lag_aa_qbuf_recfg(struct ice_hw *hw, struct ice_aqc_cfg_txqs_buf *qbuf,
+ u16 vsi_num, int qnum)
+{
+ struct ice_pf *pf = hw->back;
+ struct ice_q_ctx *q_ctx;
+ u16 q_id;
+
+ q_ctx = ice_get_lan_q_ctx(hw, vsi_num, 0, qnum);
+ if (!q_ctx) {
+ dev_dbg(ice_hw_to_dev(hw), "LAG queue %d no Q context\n", qnum);
+ return -ENOENT;
+ }
+
+ if (q_ctx->q_teid == ICE_INVAL_TEID) {
+ dev_dbg(ice_hw_to_dev(hw), "LAG queue %d INVAL TEID\n", qnum);
+ return -EINVAL;
+ }
+
+ if (q_ctx->q_handle == ICE_INVAL_Q_HANDLE) {
+ dev_dbg(ice_hw_to_dev(hw), "LAG queue %d INVAL Q HANDLE\n", qnum);
+ return -EINVAL;
+ }
+
+ q_id = pf->vsi[vsi_num]->txq_map[q_ctx->q_handle];
+ qbuf->queue_info[0].q_handle = cpu_to_le16(q_id);
+ qbuf->queue_info[0].tc = 0;
+ qbuf->queue_info[0].q_teid = cpu_to_le32(q_ctx->q_teid);
+
+ return 0;
+}
+
+/**
+ * ice_lag_aa_move_vf_qs - Move some/all VF queues to destination
+ * @lag: primary interface's lag struct
+ * @dest: index of destination port
+ * @vsi_num: index of VF VSI in PF space
+ * @all: if true move all queues to destination
+ * @odd: VF wide q indicator for odd/even
+ * @e_pf: PF struct for the event interface
+ *
+ * the parameter "all" is to control whether we are splitting the queues
+ * between two interfaces or moving them all to the destination interface
+ */
+static void ice_lag_aa_move_vf_qs(struct ice_lag *lag, u8 dest, u16 vsi_num,
+ bool all, bool *odd, struct ice_pf *e_pf)
+{
+ DEFINE_RAW_FLEX(struct ice_aqc_cfg_txqs_buf, qbuf, queue_info, 1);
+ struct ice_hw *old_hw, *new_hw, *pri_hw, *sec_hw;
+ struct device *dev = ice_pf_to_dev(lag->pf);
+ struct ice_vsi_ctx *pv_ctx, *sv_ctx;
+ struct ice_lag_netdev_list ndlist;
+ u16 num_q, qbuf_size, sec_vsi_num;
+ u8 pri_lport, sec_lport;
+ u32 pvf_teid, svf_teid;
+ u16 vf_id;
+
+ vf_id = lag->pf->vsi[vsi_num]->vf->vf_id;
+ /* If sec_vf[] not defined, then no second interface to share with */
+ if (lag->sec_vf[vf_id])
+ sec_vsi_num = lag->sec_vf[vf_id]->idx;
+ else
+ return;
+
+ pri_lport = lag->bond_lport_pri;
+ sec_lport = lag->bond_lport_sec;
+
+ if (pri_lport == ICE_LAG_INVALID_PORT ||
+ sec_lport == ICE_LAG_INVALID_PORT)
+ return;
+
+ if (!e_pf)
+ ice_lag_build_netdev_list(lag, &ndlist);
+
+ pri_hw = &lag->pf->hw;
+ if (e_pf && lag->pf != e_pf)
+ sec_hw = &e_pf->hw;
+ else
+ sec_hw = ice_lag_find_hw_by_lport(lag, sec_lport);
+
+ if (!pri_hw || !sec_hw)
+ return;
+
+ if (dest == ICE_LAGP_IDX) {
+ struct ice_vsi *vsi;
+
+ vsi = ice_get_main_vsi(lag->pf);
+ if (!vsi)
+ return;
+
+ old_hw = sec_hw;
+ new_hw = pri_hw;
+ ice_lag_config_eswitch(lag, vsi->netdev);
+ } else {
+ struct ice_pf *sec_pf = sec_hw->back;
+ struct ice_vsi *vsi;
+
+ vsi = ice_get_main_vsi(sec_pf);
+ if (!vsi)
+ return;
+
+ old_hw = pri_hw;
+ new_hw = sec_hw;
+ ice_lag_config_eswitch(lag, vsi->netdev);
+ }
+
+ pv_ctx = ice_get_vsi_ctx(pri_hw, vsi_num);
+ if (!pv_ctx) {
+ dev_warn(dev, "Unable to locate primary VSI %d context for LAG failover\n",
+ vsi_num);
+ return;
+ }
+
+ sv_ctx = ice_get_vsi_ctx(sec_hw, sec_vsi_num);
+ if (!sv_ctx) {
+ dev_warn(dev, "Unable to locate secondary VSI %d context for LAG failover\n",
+ vsi_num);
+ return;
+ }
+
+ num_q = pv_ctx->num_lan_q_entries[0];
+ qbuf_size = __struct_size(qbuf);
+
+ /* Suspend traffic for primary VSI VF */
+ pvf_teid = le32_to_cpu(pv_ctx->sched.vsi_node[0]->info.node_teid);
+ ice_sched_suspend_resume_elems(pri_hw, 1, &pvf_teid, true);
+
+ /* Suspend traffic for secondary VSI VF */
+ svf_teid = le32_to_cpu(sv_ctx->sched.vsi_node[0]->info.node_teid);
+ ice_sched_suspend_resume_elems(sec_hw, 1, &svf_teid, true);
+
+ for (int i = 0; i < num_q; i++) {
+ struct ice_sched_node *n_prt, *q_node, *parent;
+ struct ice_port_info *pi, *new_pi;
+ struct ice_vsi_ctx *src_ctx;
+ struct ice_sched_node *p;
+ struct ice_q_ctx *q_ctx;
+ u16 dst_vsi_num;
+
+ pi = old_hw->port_info;
+ new_pi = new_hw->port_info;
+
+ *odd = !(*odd);
+ if ((dest == ICE_LAGP_IDX && *odd && !all) ||
+ (dest == ICE_LAGS_IDX && !(*odd) && !all) ||
+ lag->q_home[vf_id][i] == dest)
+ continue;
+
+ if (dest == ICE_LAGP_IDX)
+ dst_vsi_num = vsi_num;
+ else
+ dst_vsi_num = sec_vsi_num;
+
+ n_prt = ice_sched_get_free_qparent(new_hw->port_info,
+ dst_vsi_num, 0,
+ ICE_SCHED_NODE_OWNER_LAN);
+ if (!n_prt)
+ continue;
+
+ q_ctx = ice_get_lan_q_ctx(pri_hw, vsi_num, 0, i);
+ if (!q_ctx)
+ continue;
+
+ if (dest == ICE_LAGP_IDX)
+ src_ctx = sv_ctx;
+ else
+ src_ctx = pv_ctx;
+
+ q_node = ice_sched_find_node_by_teid(src_ctx->sched.vsi_node[0],
+ q_ctx->q_teid);
+ if (!q_node)
+ continue;
+
+ qbuf->src_parent_teid = q_node->info.parent_teid;
+ qbuf->dst_parent_teid = n_prt->info.node_teid;
+
+ /* Move the node in the HW/FW */
+ if (ice_lag_aa_qbuf_recfg(pri_hw, qbuf, vsi_num, i))
+ continue;
+
+ if (dest == ICE_LAGP_IDX)
+ ice_aq_cfg_lan_txq(pri_hw, qbuf, qbuf_size, 1,
+ sec_lport, pri_lport,
+ ICE_AQC_Q_CFG_MOVE_TC_CHNG,
+ NULL);
+ else
+ ice_aq_cfg_lan_txq(pri_hw, qbuf, qbuf_size, 1,
+ pri_lport, sec_lport,
+ ICE_AQC_Q_CFG_MOVE_TC_CHNG,
+ NULL);
+
+ /* Move the node in the SW */
+ parent = q_node->parent;
+ if (!parent)
+ continue;
+
+ for (int n = 0; n < parent->num_children; n++) {
+ int j;
+
+ if (parent->children[n] != q_node)
+ continue;
+
+ for (j = n + 1; j < parent->num_children;
+ j++) {
+ parent->children[j - 1] =
+ parent->children[j];
+ }
+ parent->children[j] = NULL;
+ parent->num_children--;
+ break;
+ }
+
+ p = pi->sib_head[0][q_node->tx_sched_layer];
+ while (p) {
+ if (p->sibling == q_node) {
+ p->sibling = q_node->sibling;
+ break;
+ }
+ p = p->sibling;
+ }
+
+ if (pi->sib_head[0][q_node->tx_sched_layer] == q_node)
+ pi->sib_head[0][q_node->tx_sched_layer] =
+ q_node->sibling;
+
+ q_node->parent = n_prt;
+ q_node->info.parent_teid = n_prt->info.node_teid;
+ q_node->sibling = NULL;
+ p = new_pi->sib_head[0][q_node->tx_sched_layer];
+ if (p) {
+ while (p) {
+ if (!p->sibling) {
+ p->sibling = q_node;
+ break;
+ }
+ p = p->sibling;
+ }
+ } else {
+ new_pi->sib_head[0][q_node->tx_sched_layer] =
+ q_node;
+ }
+
+ n_prt->children[n_prt->num_children++] = q_node;
+ lag->q_home[vf_id][i] = dest;
+ }
+
+ ice_sched_suspend_resume_elems(pri_hw, 1, &pvf_teid, false);
+ ice_sched_suspend_resume_elems(sec_hw, 1, &svf_teid, false);
+
+ if (!e_pf)
+ ice_lag_destroy_netdev_list(lag, &ndlist);
+}
+
+/**
+ * ice_lag_aa_failover - move VF queues in A/A mode
+ * @lag: primary lag struct
+ * @dest: index of destination port
+ * @e_pf: PF struct for event port
+ */
+void ice_lag_aa_failover(struct ice_lag *lag, u8 dest, struct ice_pf *e_pf)
+{
+ bool odd = true, all = false;
+ int i;
+
+ /* Primary can be a target if down (cleanup), but secondary can't */
+ if (dest == ICE_LAGS_IDX && !(lag->port_bitmap & ICE_LAGS_M))
+ return;
+
+ /* Move all queues to a destination if only one port is active,
+ * or no ports are active and dest is primary.
+ */
+ if ((lag->port_bitmap ^ (ICE_LAGP_M | ICE_LAGS_M)) ||
+ (!lag->port_bitmap && dest == ICE_LAGP_IDX))
+ all = true;
+
+ ice_for_each_vsi(lag->pf, i)
+ if (lag->pf->vsi[i] && lag->pf->vsi[i]->type == ICE_VSI_VF)
+ ice_lag_aa_move_vf_qs(lag, dest, i, all, &odd, e_pf);
+}
+
+/**
* ice_lag_reclaim_vf_tc - move scheduling nodes back to primary interface
* @lag: primary interface lag struct
* @src_hw: HW struct current node location
@@ -921,13 +1288,12 @@ ice_lag_reclaim_vf_tc(struct ice_lag *lag, struct ice_hw *src_hw, u16 vsi_num,
u16 numq, valq, num_moved, qbuf_size;
u16 buf_size = __struct_size(buf);
struct ice_aqc_cfg_txqs_buf *qbuf;
+ struct ice_hw *hw = &lag->pf->hw;
struct ice_sched_node *n_prt;
__le32 teid, parent_teid;
struct ice_vsi_ctx *ctx;
- struct ice_hw *hw;
u32 tmp_teid;
- hw = &lag->pf->hw;
ctx = ice_get_vsi_ctx(hw, vsi_num);
if (!ctx) {
dev_warn(dev, "Unable to locate VSI context for LAG reclaim\n");
@@ -968,7 +1334,7 @@ ice_lag_reclaim_vf_tc(struct ice_lag *lag, struct ice_hw *src_hw, u16 vsi_num,
if (ice_aq_cfg_lan_txq(hw, qbuf, qbuf_size, numq,
src_hw->port_info->lport, hw->port_info->lport,
- NULL)) {
+ ICE_AQC_Q_CFG_TC_CHNG, NULL)) {
dev_warn(dev, "Failure to configure queues for LAG failover\n");
goto reclaim_qerr;
}
@@ -1039,36 +1405,15 @@ static void ice_lag_link(struct ice_lag *lag)
lag->bonded = true;
lag->role = ICE_LAG_UNSET;
+ lag->need_fltr_cfg = true;
netdev_info(lag->netdev, "Shared SR-IOV resources in bond are active\n");
}
/**
- * ice_lag_config_eswitch - configure eswitch to work with LAG
- * @lag: lag info struct
- * @netdev: active network interface device struct
- *
- * Updates all port representors in eswitch to use @netdev for Tx.
- *
- * Configures the netdev to keep dst metadata (also used in representor Tx).
- * This is required for an uplink without switchdev mode configured.
- */
-static void ice_lag_config_eswitch(struct ice_lag *lag,
- struct net_device *netdev)
-{
- struct ice_repr *repr;
- unsigned long id;
-
- xa_for_each(&lag->pf->eswitch.reprs, id, repr)
- repr->dst->u.port_info.lower_dev = netdev;
-
- netif_keep_dst(netdev);
-}
-
-/**
- * ice_lag_unlink - handle unlink event
+ * ice_lag_act_bkup_unlink - handle unlink event for A/B bond
* @lag: LAG info struct
*/
-static void ice_lag_unlink(struct ice_lag *lag)
+static void ice_lag_act_bkup_unlink(struct ice_lag *lag)
{
u8 pri_port, act_port, loc_port;
struct ice_pf *pf = lag->pf;
@@ -1104,10 +1449,32 @@ static void ice_lag_unlink(struct ice_lag *lag)
}
}
}
+}
- lag->bonded = false;
- lag->role = ICE_LAG_NONE;
- lag->upper_netdev = NULL;
+/**
+ * ice_lag_aa_unlink - handle unlink event for Active-Active bond
+ * @lag: LAG info struct
+ */
+static void ice_lag_aa_unlink(struct ice_lag *lag)
+{
+ struct ice_lag *pri_lag;
+
+ if (lag->primary) {
+ pri_lag = lag;
+ lag->port_bitmap &= ~ICE_LAGP_M;
+ } else {
+ pri_lag = ice_lag_find_primary(lag);
+ if (pri_lag)
+ pri_lag->port_bitmap &= ICE_LAGS_M;
+ }
+
+ if (pri_lag) {
+ ice_lag_aa_failover(pri_lag, ICE_LAGP_IDX, lag->pf);
+ if (lag->primary)
+ pri_lag->bond_lport_pri = ICE_LAG_INVALID_PORT;
+ else
+ pri_lag->bond_lport_sec = ICE_LAG_INVALID_PORT;
+ }
}
/**
@@ -1123,10 +1490,20 @@ static void ice_lag_link_unlink(struct ice_lag *lag, void *ptr)
if (netdev != lag->netdev)
return;
- if (info->linking)
+ if (info->linking) {
ice_lag_link(lag);
- else
- ice_lag_unlink(lag);
+ } else {
+ if (lag->bond_aa)
+ ice_lag_aa_unlink(lag);
+ else
+ ice_lag_act_bkup_unlink(lag);
+
+ lag->bonded = false;
+ lag->role = ICE_LAG_NONE;
+ lag->upper_netdev = NULL;
+ lag->bond_aa = false;
+ lag->need_fltr_cfg = false;
+ }
}
/**
@@ -1224,11 +1601,8 @@ ice_lag_set_swid(u16 primary_swid, struct ice_lag *local_lag,
*/
static void ice_lag_primary_swid(struct ice_lag *lag, bool link)
{
- struct ice_hw *hw;
- u16 swid;
-
- hw = &lag->pf->hw;
- swid = hw->port_info->sw_id;
+ struct ice_hw *hw = &lag->pf->hw;
+ u16 swid = hw->port_info->sw_id;
if (ice_share_res(hw, ICE_AQC_RES_TYPE_SWID, link, swid))
dev_warn(ice_pf_to_dev(lag->pf), "Failure to set primary interface shared status\n");
@@ -1241,12 +1615,10 @@ static void ice_lag_primary_swid(struct ice_lag *lag, bool link)
*/
static void ice_lag_add_prune_list(struct ice_lag *lag, struct ice_pf *event_pf)
{
- u16 num_vsi, rule_buf_sz, vsi_list_id, event_vsi_num, prim_vsi_idx;
- struct ice_sw_rule_vsi_list *s_rule = NULL;
+ u16 rule_buf_sz, vsi_list_id, event_vsi_num, prim_vsi_idx, num_vsi = 1;
+ struct ice_sw_rule_vsi_list *s_rule;
struct device *dev;
- num_vsi = 1;
-
dev = ice_pf_to_dev(lag->pf);
event_vsi_num = event_pf->vsi[0]->vsi_num;
prim_vsi_idx = lag->pf->vsi[0]->idx;
@@ -1282,12 +1654,10 @@ static void ice_lag_add_prune_list(struct ice_lag *lag, struct ice_pf *event_pf)
*/
static void ice_lag_del_prune_list(struct ice_lag *lag, struct ice_pf *event_pf)
{
- u16 num_vsi, vsi_num, vsi_idx, rule_buf_sz, vsi_list_id;
- struct ice_sw_rule_vsi_list *s_rule = NULL;
+ u16 vsi_num, vsi_idx, rule_buf_sz, vsi_list_id, num_vsi = 1;
+ struct ice_sw_rule_vsi_list *s_rule;
struct device *dev;
- num_vsi = 1;
-
dev = ice_pf_to_dev(lag->pf);
vsi_num = event_pf->vsi[0]->vsi_num;
vsi_idx = lag->pf->vsi[0]->idx;
@@ -1335,6 +1705,11 @@ static void ice_lag_init_feature_support_flag(struct ice_pf *pf)
ice_set_feature_support(pf, ICE_F_SRIOV_LAG);
else
ice_clear_feature_support(pf, ICE_F_SRIOV_LAG);
+
+ if (caps->sriov_aa_lag && ice_pkg_has_lport_extract(&pf->hw))
+ ice_set_feature_support(pf, ICE_F_SRIOV_AA_LAG);
+ else
+ ice_clear_feature_support(pf, ICE_F_SRIOV_AA_LAG);
}
/**
@@ -1344,11 +1719,10 @@ static void ice_lag_init_feature_support_flag(struct ice_pf *pf)
*/
static void ice_lag_changeupper_event(struct ice_lag *lag, void *ptr)
{
- struct netdev_notifier_changeupper_info *info;
+ struct netdev_notifier_changeupper_info *info = ptr;
struct ice_lag *primary_lag;
struct net_device *netdev;
- info = ptr;
netdev = netdev_notifier_info_to_dev(ptr);
/* not for this netdev */
@@ -1369,6 +1743,9 @@ static void ice_lag_changeupper_event(struct ice_lag *lag, void *ptr)
/* Configure primary's SWID to be shared */
ice_lag_primary_swid(lag, true);
primary_lag = lag;
+ lag->bond_lport_pri = lag->pf->hw.port_info->lport;
+ lag->bond_lport_sec = ICE_LAG_INVALID_PORT;
+ lag->port_bitmap = 0;
} else {
u16 swid;
@@ -1378,16 +1755,29 @@ static void ice_lag_changeupper_event(struct ice_lag *lag, void *ptr)
swid = primary_lag->pf->hw.port_info->sw_id;
ice_lag_set_swid(swid, lag, true);
ice_lag_add_prune_list(primary_lag, lag->pf);
- ice_lag_cfg_drop_fltr(lag, true);
+ primary_lag->bond_lport_sec =
+ lag->pf->hw.port_info->lport;
}
/* add filter for primary control packets */
- ice_lag_cfg_cp_fltr(lag, true);
+ ice_lag_cfg_lp_fltr(lag, true, true);
} else {
if (!primary_lag && lag->primary)
primary_lag = lag;
+ if (primary_lag) {
+ for (int i = 0; i < ICE_MAX_SRIOV_VFS; i++) {
+ if (primary_lag->sec_vf[i]) {
+ ice_vsi_release(primary_lag->sec_vf[i]);
+ primary_lag->sec_vf[i] = NULL;
+ }
+ }
+ }
+
if (!lag->primary) {
ice_lag_set_swid(0, lag, false);
+ if (primary_lag)
+ primary_lag->bond_lport_sec =
+ ICE_LAG_INVALID_PORT;
} else {
if (primary_lag && lag->primary) {
ice_lag_primary_swid(lag, false);
@@ -1395,7 +1785,7 @@ static void ice_lag_changeupper_event(struct ice_lag *lag, void *ptr)
}
}
/* remove filter for control packets */
- ice_lag_cfg_cp_fltr(lag, false);
+ ice_lag_cfg_lp_fltr(lag, false, !lag->bond_aa);
}
}
@@ -1408,7 +1798,7 @@ static void ice_lag_changeupper_event(struct ice_lag *lag, void *ptr)
*/
static void ice_lag_monitor_link(struct ice_lag *lag, void *ptr)
{
- struct netdev_notifier_changeupper_info *info;
+ struct netdev_notifier_changeupper_info *info = ptr;
struct ice_hw *prim_hw, *active_hw;
struct net_device *event_netdev;
struct ice_pf *pf;
@@ -1421,19 +1811,34 @@ static void ice_lag_monitor_link(struct ice_lag *lag, void *ptr)
if (!netif_is_same_ice(lag->pf, event_netdev))
return;
+ if (info->upper_dev != lag->upper_netdev)
+ return;
+
+ if (info->linking)
+ return;
+
pf = lag->pf;
prim_hw = &pf->hw;
prim_port = prim_hw->port_info->lport;
- info = (struct netdev_notifier_changeupper_info *)ptr;
- if (info->upper_dev != lag->upper_netdev)
- return;
-
- if (!info->linking) {
- /* Since there are only two interfaces allowed in SRIOV+LAG, if
- * one port is leaving, then nodes need to be on primary
- * interface.
- */
+ /* Since there are only two interfaces allowed in SRIOV+LAG, if
+ * one port is leaving, then nodes need to be on primary
+ * interface.
+ */
+ if (lag->bond_aa) {
+ struct ice_netdev_priv *e_ndp;
+ struct ice_pf *e_pf;
+
+ e_ndp = netdev_priv(event_netdev);
+ e_pf = e_ndp->vsi->back;
+
+ if (lag->bond_lport_pri != ICE_LAG_INVALID_PORT &&
+ lag->port_bitmap & ICE_LAGS_M) {
+ lag->port_bitmap &= ~ICE_LAGS_M;
+ ice_lag_aa_failover(lag, ICE_LAGP_IDX, e_pf);
+ lag->bond_lport_sec = ICE_LAG_INVALID_PORT;
+ }
+ } else {
if (prim_port != lag->active_port &&
lag->active_port != ICE_LAG_INVALID_PORT) {
active_hw = ice_lag_find_hw_by_lport(lag,
@@ -1445,45 +1850,32 @@ static void ice_lag_monitor_link(struct ice_lag *lag, void *ptr)
}
/**
- * ice_lag_monitor_active - main PF keep track of which port is active
+ * ice_lag_monitor_act_bkup - keep track of which port is active in A/B LAG
* @lag: lag info struct
- * @ptr: opaque data containing notifier event
+ * @b_info: bonding info
+ * @event_netdev: net_device got target netdev
*
* This function is for the primary PF to monitor changes in which port is
* active and handle changes for SRIOV VF functionality
*/
-static void ice_lag_monitor_active(struct ice_lag *lag, void *ptr)
+static void ice_lag_monitor_act_bkup(struct ice_lag *lag,
+ struct netdev_bonding_info *b_info,
+ struct net_device *event_netdev)
{
- struct net_device *event_netdev, *event_upper;
- struct netdev_notifier_bonding_info *info;
- struct netdev_bonding_info *bonding_info;
struct ice_netdev_priv *event_np;
struct ice_pf *pf, *event_pf;
u8 prim_port, event_port;
- if (!lag->primary)
- return;
-
pf = lag->pf;
if (!pf)
return;
- event_netdev = netdev_notifier_info_to_dev(ptr);
- rcu_read_lock();
- event_upper = netdev_master_upper_dev_get_rcu(event_netdev);
- rcu_read_unlock();
- if (!netif_is_ice(event_netdev) || event_upper != lag->upper_netdev)
- return;
-
event_np = netdev_priv(event_netdev);
event_pf = event_np->vsi->back;
event_port = event_pf->hw.port_info->lport;
prim_port = pf->hw.port_info->lport;
- info = (struct netdev_notifier_bonding_info *)ptr;
- bonding_info = &info->bonding_info;
-
- if (!bonding_info->slave.state) {
+ if (!b_info->slave.state) {
/* if no port is currently active, then nodes and filters exist
* on primary port, check if we need to move them
*/
@@ -1520,6 +1912,128 @@ static void ice_lag_monitor_active(struct ice_lag *lag, void *ptr)
}
/**
+ * ice_lag_aa_clear_spoof - adjust the placeholder VSI spoofing for A/A LAG
+ * @vsi: placeholder VSI to adjust
+ */
+static void ice_lag_aa_clear_spoof(struct ice_vsi *vsi)
+{
+ ice_vsi_update_security(vsi, ice_vsi_ctx_clear_antispoof);
+}
+
+/**
+ * ice_lag_monitor_act_act - Keep track of active ports in A/A LAG
+ * @lag: lag struct for primary interface
+ * @b_info: bonding_info for event
+ * @event_netdev: net_device for target netdev
+ */
+static void ice_lag_monitor_act_act(struct ice_lag *lag,
+ struct netdev_bonding_info *b_info,
+ struct net_device *event_netdev)
+{
+ struct ice_netdev_priv *event_np;
+ u8 prim_port, event_port;
+ struct ice_pf *event_pf;
+
+ event_np = netdev_priv(event_netdev);
+ event_pf = event_np->vsi->back;
+ event_port = event_pf->hw.port_info->lport;
+ prim_port = lag->pf->hw.port_info->lport;
+
+ if (b_info->slave.link == BOND_LINK_UP) {
+ /* Port is coming up */
+ if (prim_port == event_port) {
+ /* Processing event for primary interface */
+ if (lag->bond_lport_pri == ICE_LAG_INVALID_PORT)
+ return;
+
+ if (!(lag->port_bitmap & ICE_LAGP_M)) {
+ /* Primary port was not marked up before, move
+ * some|all VF queues to it and mark as up
+ */
+ lag->port_bitmap |= ICE_LAGP_M;
+ ice_lag_aa_failover(lag, ICE_LAGP_IDX, event_pf);
+ }
+ } else {
+ if (lag->bond_lport_sec == ICE_LAG_INVALID_PORT)
+ return;
+
+ /* Create placeholder VSIs on secondary PF.
+ * The placeholder is necessary so that we have
+ * an element that represents the VF on the secondary
+ * interface's scheduling tree. This will be a tree
+ * root for scheduling nodes when they are moved to
+ * the secondary interface.
+ */
+ if (!lag->sec_vf[0]) {
+ struct ice_vsi_cfg_params params = {};
+ struct ice_vsi *nvsi;
+ struct ice_vf *vf;
+ unsigned int bkt;
+
+ params.type = ICE_VSI_VF;
+ params.port_info = event_pf->hw.port_info;
+ params.flags = ICE_VSI_FLAG_INIT;
+
+ ice_for_each_vf(lag->pf, bkt, vf) {
+ params.vf = vf;
+ nvsi = ice_vsi_setup(event_pf,
+ &params);
+ ice_lag_aa_clear_spoof(nvsi);
+ lag->sec_vf[vf->vf_id] = nvsi;
+ }
+ }
+
+ if (!(lag->port_bitmap & ICE_LAGS_M)) {
+ /* Secondary port was not marked up before,
+ * move some|all VF queues to it and mark as up
+ */
+ lag->port_bitmap |= ICE_LAGS_M;
+ ice_lag_aa_failover(lag, ICE_LAGS_IDX, event_pf);
+ }
+ }
+ } else {
+ /* Port is going down */
+ if (prim_port == event_port) {
+ lag->port_bitmap &= ~ICE_LAGP_M;
+ ice_lag_aa_failover(lag, ICE_LAGS_IDX, event_pf);
+ } else {
+ lag->port_bitmap &= ~ICE_LAGS_M;
+ ice_lag_aa_failover(lag, ICE_LAGP_IDX, event_pf);
+ }
+ }
+}
+
+/**
+ * ice_lag_monitor_info - Calls relevant A/A or A/B monitoring function
+ * @lag: lag info struct
+ * @ptr: opaque data containing notifier event
+ *
+ * This function is for the primary PF to monitor changes in which port is
+ * active and handle changes for SRIOV VF functionality
+ */
+static void ice_lag_monitor_info(struct ice_lag *lag, void *ptr)
+{
+ struct netdev_notifier_bonding_info *info = ptr;
+ struct net_device *event_netdev, *event_upper;
+ struct netdev_bonding_info *bonding_info;
+
+ if (!lag->primary)
+ return;
+
+ event_netdev = netdev_notifier_info_to_dev(ptr);
+ bonding_info = &info->bonding_info;
+ rcu_read_lock();
+ event_upper = netdev_master_upper_dev_get_rcu(event_netdev);
+ rcu_read_unlock();
+ if (!netif_is_ice(event_netdev) || event_upper != lag->upper_netdev)
+ return;
+
+ if (lag->bond_aa)
+ ice_lag_monitor_act_act(lag, bonding_info, event_netdev);
+ else
+ ice_lag_monitor_act_bkup(lag, bonding_info, event_netdev);
+}
+/**
* ice_lag_chk_comp - evaluate bonded interface for feature support
* @lag: lag info struct
* @ptr: opaque data for netdev event info
@@ -1527,13 +2041,21 @@ static void ice_lag_monitor_active(struct ice_lag *lag, void *ptr)
static bool
ice_lag_chk_comp(struct ice_lag *lag, void *ptr)
{
+ struct netdev_notifier_bonding_info *info = ptr;
struct net_device *event_netdev, *event_upper;
- struct netdev_notifier_bonding_info *info;
struct netdev_bonding_info *bonding_info;
struct list_head *tmp;
struct device *dev;
int count = 0;
+ /* All members need to know if bond A/A or A/B */
+ bonding_info = &info->bonding_info;
+ lag->bond_mode = bonding_info->master.bond_mode;
+ if (lag->bond_mode != BOND_MODE_ACTIVEBACKUP)
+ lag->bond_aa = true;
+ else
+ lag->bond_aa = false;
+
if (!lag->primary)
return true;
@@ -1554,13 +2076,9 @@ ice_lag_chk_comp(struct ice_lag *lag, void *ptr)
return false;
}
- info = (struct netdev_notifier_bonding_info *)ptr;
- bonding_info = &info->bonding_info;
- lag->bond_mode = bonding_info->master.bond_mode;
- if (lag->bond_mode != BOND_MODE_ACTIVEBACKUP) {
- dev_info(dev, "Bond Mode not ACTIVE-BACKUP - VF LAG disabled\n");
+ if (lag->bond_aa && !ice_is_feature_supported(lag->pf,
+ ICE_F_SRIOV_AA_LAG))
return false;
- }
list_for_each(tmp, lag->netdev_head) {
struct ice_dcbx_cfg *dcb_cfg, *peer_dcb_cfg;
@@ -1664,10 +2182,9 @@ ice_lag_unregister(struct ice_lag *lag, struct net_device *event_netdev)
static void
ice_lag_monitor_rdma(struct ice_lag *lag, void *ptr)
{
- struct netdev_notifier_changeupper_info *info;
+ struct netdev_notifier_changeupper_info *info = ptr;
struct net_device *netdev;
- info = ptr;
netdev = netdev_notifier_info_to_dev(ptr);
if (netdev != lag->netdev)
@@ -1715,12 +2232,30 @@ static void ice_lag_chk_disabled_bond(struct ice_lag *lag, void *ptr)
*/
static void ice_lag_disable_sriov_bond(struct ice_lag *lag)
{
- struct ice_netdev_priv *np;
- struct ice_pf *pf;
+ struct ice_netdev_priv *np = netdev_priv(lag->netdev);
+ struct ice_pf *pf = np->vsi->back;
- np = netdev_priv(lag->netdev);
- pf = np->vsi->back;
ice_clear_feature_support(pf, ICE_F_SRIOV_LAG);
+ ice_clear_feature_support(pf, ICE_F_SRIOV_AA_LAG);
+}
+
+/**
+ * ice_lag_preset_drop_fltr - preset drop filter for A/B bonds
+ * @lag: local lag struct
+ * @ptr: opaque data containing event
+ *
+ * Sets the initial drop filter for secondary interface in an
+ * active-backup bond
+ */
+static void ice_lag_preset_drop_fltr(struct ice_lag *lag, void *ptr)
+{
+ struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+
+ if (netdev != lag->netdev || lag->primary || !lag->need_fltr_cfg)
+ return;
+
+ ice_lag_cfg_drop_fltr(lag, true);
+ lag->need_fltr_cfg = false;
}
/**
@@ -1761,10 +2296,12 @@ static void ice_lag_process_event(struct work_struct *work)
ice_lag_unregister(lag_work->lag, netdev);
goto lag_cleanup;
}
- ice_lag_monitor_active(lag_work->lag,
- &lag_work->info.bonding_info);
ice_lag_cfg_pf_fltrs(lag_work->lag,
&lag_work->info.bonding_info);
+ ice_lag_preset_drop_fltr(lag_work->lag,
+ &lag_work->info.bonding_info);
+ ice_lag_monitor_info(lag_work->lag,
+ &lag_work->info.bonding_info);
}
ice_lag_info_event(lag_work->lag, &lag_work->info.bonding_info);
break;
@@ -1837,9 +2374,8 @@ ice_lag_event_handler(struct notifier_block *notif_blk, unsigned long event,
lag_work->lag = lag;
lag_work->event = event;
if (event == NETDEV_CHANGEUPPER) {
- struct netdev_notifier_changeupper_info *info;
+ struct netdev_notifier_changeupper_info *info = ptr;
- info = ptr;
upper_netdev = info->upper_dev;
} else {
upper_netdev = netdev_master_upper_dev_get(netdev);
@@ -1889,10 +2425,8 @@ ice_lag_event_handler(struct notifier_block *notif_blk, unsigned long event,
*/
static int ice_register_lag_handler(struct ice_lag *lag)
{
+ struct notifier_block *notif_blk = &lag->notif_block;
struct device *dev = ice_pf_to_dev(lag->pf);
- struct notifier_block *notif_blk;
-
- notif_blk = &lag->notif_block;
if (!notif_blk->notifier_call) {
notif_blk->notifier_call = ice_lag_event_handler;
@@ -1912,10 +2446,9 @@ static int ice_register_lag_handler(struct ice_lag *lag)
*/
static void ice_unregister_lag_handler(struct ice_lag *lag)
{
+ struct notifier_block *notif_blk = &lag->notif_block;
struct device *dev = ice_pf_to_dev(lag->pf);
- struct notifier_block *notif_blk;
- notif_blk = &lag->notif_block;
if (notif_blk->notifier_call) {
unregister_netdevice_notifier(notif_blk);
dev_dbg(dev, "LAG event handler unregistered\n");
@@ -1977,13 +2510,12 @@ ice_lag_move_vf_nodes_tc_sync(struct ice_lag *lag, struct ice_hw *dest_hw,
u16 numq, valq, num_moved, qbuf_size;
u16 buf_size = __struct_size(buf);
struct ice_aqc_cfg_txqs_buf *qbuf;
+ struct ice_hw *hw = &lag->pf->hw;
struct ice_sched_node *n_prt;
__le32 teid, parent_teid;
struct ice_vsi_ctx *ctx;
- struct ice_hw *hw;
u32 tmp_teid;
- hw = &lag->pf->hw;
ctx = ice_get_vsi_ctx(hw, vsi_num);
if (!ctx) {
dev_warn(dev, "LAG rebuild failed after reset due to VSI Context failure\n");
@@ -2020,7 +2552,8 @@ ice_lag_move_vf_nodes_tc_sync(struct ice_lag *lag, struct ice_hw *dest_hw,
}
if (ice_aq_cfg_lan_txq(hw, qbuf, qbuf_size, numq, hw->port_info->lport,
- dest_hw->port_info->lport, NULL)) {
+ dest_hw->port_info->lport,
+ ICE_AQC_Q_CFG_TC_CHNG, NULL)) {
dev_warn(dev, "Failure to configure queues for LAG reset rebuild\n");
goto sync_qerr;
}
@@ -2116,9 +2649,13 @@ int ice_init_lag(struct ice_pf *pf)
lag->netdev = vsi->netdev;
lag->role = ICE_LAG_NONE;
lag->active_port = ICE_LAG_INVALID_PORT;
+ lag->port_bitmap = 0x0;
lag->bonded = false;
+ lag->bond_aa = false;
+ lag->need_fltr_cfg = false;
lag->upper_netdev = NULL;
lag->notif_block.notifier_call = NULL;
+ memset(lag->sec_vf, 0, sizeof(lag->sec_vf));
err = ice_register_lag_handler(lag);
if (err) {
@@ -2136,6 +2673,11 @@ int ice_init_lag(struct ice_pf *pf)
if (err)
goto free_rcp_res;
+ err = ice_create_lag_recipe(&pf->hw, &lag->act_act_recipe,
+ ice_lport_rcp, 1);
+ if (err)
+ goto free_lport_res;
+
/* associate recipes to profiles */
for (n = 0; n < ICE_PROFID_IPV6_GTPU_IPV6_TCP_INNER; n++) {
err = ice_aq_get_recipe_to_profile(&pf->hw, n,
@@ -2145,7 +2687,8 @@ int ice_init_lag(struct ice_pf *pf)
if (recipe_bits & BIT(ICE_SW_LKUP_DFLT)) {
recipe_bits |= BIT(lag->pf_recipe) |
- BIT(lag->lport_recipe);
+ BIT(lag->lport_recipe) |
+ BIT(lag->act_act_recipe);
ice_aq_map_recipe_to_profile(&pf->hw, n,
recipe_bits, NULL);
}
@@ -2156,9 +2699,13 @@ int ice_init_lag(struct ice_pf *pf)
dev_dbg(dev, "INIT LAG complete\n");
return 0;
+free_lport_res:
+ ice_free_hw_res(&pf->hw, ICE_AQC_RES_TYPE_RECIPE, 1,
+ &lag->lport_recipe);
+
free_rcp_res:
ice_free_hw_res(&pf->hw, ICE_AQC_RES_TYPE_RECIPE, 1,
- &pf->lag->pf_recipe);
+ &lag->pf_recipe);
lag_error:
kfree(lag);
pf->lag = NULL;
@@ -2174,9 +2721,7 @@ lag_error:
*/
void ice_deinit_lag(struct ice_pf *pf)
{
- struct ice_lag *lag;
-
- lag = pf->lag;
+ struct ice_lag *lag = pf->lag;
if (!lag)
return;
@@ -2245,11 +2790,15 @@ void ice_lag_rebuild(struct ice_pf *pf)
ice_lag_move_vf_nodes_sync(prim_lag, &pf->hw);
}
- ice_lag_cfg_cp_fltr(lag, true);
+ if (!lag->bond_aa) {
+ ice_lag_cfg_lp_fltr(lag, true, true);
+ if (lag->pf_rx_rule_id)
+ if (ice_lag_cfg_dflt_fltr(lag, true))
+ dev_err(ice_pf_to_dev(pf), "Error adding default VSI rule in rebuild\n");
+ } else {
+ ice_lag_cfg_lp_fltr(lag, true, false);
+ }
- if (lag->pf_rx_rule_id)
- if (ice_lag_cfg_dflt_fltr(lag, true))
- dev_err(ice_pf_to_dev(pf), "Error adding default VSI rule in rebuild\n");
ice_clear_rdma_cap(pf);
lag_rebuild_out:
diff --git a/drivers/net/ethernet/intel/ice/ice_lag.h b/drivers/net/ethernet/intel/ice/ice_lag.h
index 69347d9f986b..e2a0a782bdd7 100644
--- a/drivers/net/ethernet/intel/ice/ice_lag.h
+++ b/drivers/net/ethernet/intel/ice/ice_lag.h
@@ -14,7 +14,11 @@ enum ice_lag_role {
ICE_LAG_UNSET
};
-#define ICE_LAG_INVALID_PORT 0xFF
+#define ICE_LAG_INVALID_PORT 0xFF
+#define ICE_LAGP_IDX 0
+#define ICE_LAGS_IDX 1
+#define ICE_LAGP_M 0x1
+#define ICE_LAGS_M 0x2
#define ICE_LAG_RESET_RETRIES 5
#define ICE_SW_DEFAULT_PROFILE 0
@@ -41,12 +45,26 @@ struct ice_lag {
u8 active_port; /* lport value for the current active port */
u8 bonded:1; /* currently bonded */
u8 primary:1; /* this is primary */
+ u8 bond_aa:1; /* is this bond active-active */
+ u8 need_fltr_cfg:1; /* fltrs for A/A bond still need to be make */
+ u8 port_bitmap:2; /* bitmap of active ports */
+ u8 bond_lport_pri; /* lport values for primary PF */
+ u8 bond_lport_sec; /* lport values for secondary PF */
+
+ /* q_home keeps track of which interface the q is currently on */
+ u8 q_home[ICE_MAX_SRIOV_VFS][ICE_MAX_RSS_QS_PER_VF];
+
+ /* placeholder VSI for hanging VF queues from on secondary interface */
+ struct ice_vsi *sec_vf[ICE_MAX_SRIOV_VFS];
+
u16 pf_recipe;
u16 lport_recipe;
+ u16 act_act_recipe;
u16 pf_rx_rule_id;
u16 pf_tx_rule_id;
u16 cp_rule_idx;
u16 lport_rule_idx;
+ u16 act_act_rule_idx;
u8 role;
};
@@ -65,6 +83,7 @@ struct ice_lag_work {
};
void ice_lag_move_new_vf_nodes(struct ice_vf *vf);
+void ice_lag_aa_failover(struct ice_lag *lag, u8 dest, struct ice_pf *e_pf);
int ice_init_lag(struct ice_pf *pf);
void ice_deinit_lag(struct ice_pf *pf);
void ice_lag_rebuild(struct ice_pf *pf);
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 77781277aa8e..92b95d92d599 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -9125,7 +9125,7 @@ static int ice_create_q_channels(struct ice_vsi *vsi)
list_add_tail(&ch->list, &vsi->ch_list);
vsi->tc_map_vsi[i] = ch->ch_vsi;
dev_dbg(ice_pf_to_dev(pf),
- "successfully created channel: VSI %pK\n", ch->ch_vsi);
+ "successfully created channel: VSI %p\n", ch->ch_vsi);
}
return 0;
diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c
index 9ce4c4db400e..843e82fd3bf9 100644
--- a/drivers/net/ethernet/intel/ice/ice_sriov.c
+++ b/drivers/net/ethernet/intel/ice/ice_sriov.c
@@ -9,7 +9,7 @@
#include "ice_dcb_lib.h"
#include "ice_flow.h"
#include "ice_eswitch.h"
-#include "ice_virtchnl_allowlist.h"
+#include "virt/allowlist.h"
#include "ice_flex_pipe.h"
#include "ice_vf_vsi_vlan_ops.h"
#include "ice_vlan.h"
diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.h b/drivers/net/ethernet/intel/ice/ice_sriov.h
index d1a998a4bef6..6c4fad09a527 100644
--- a/drivers/net/ethernet/intel/ice/ice_sriov.h
+++ b/drivers/net/ethernet/intel/ice/ice_sriov.h
@@ -3,9 +3,9 @@
#ifndef _ICE_SRIOV_H_
#define _ICE_SRIOV_H_
-#include "ice_virtchnl_fdir.h"
+#include "virt/fdir.h"
#include "ice_vf_lib.h"
-#include "ice_virtchnl.h"
+#include "virt/virtchnl.h"
/* Static VF transaction/status register def */
#define VF_DEVICE_STATUS 0xAA
diff --git a/drivers/net/ethernet/intel/ice/ice_trace.h b/drivers/net/ethernet/intel/ice/ice_trace.h
index 07aab6e130cd..4f35ef8d6b29 100644
--- a/drivers/net/ethernet/intel/ice/ice_trace.h
+++ b/drivers/net/ethernet/intel/ice/ice_trace.h
@@ -130,7 +130,7 @@ DECLARE_EVENT_CLASS(ice_tx_template,
__entry->buf = buf;
__assign_str(devname);),
- TP_printk("netdev: %s ring: %pK desc: %pK buf %pK", __get_str(devname),
+ TP_printk("netdev: %s ring: %p desc: %p buf %p", __get_str(devname),
__entry->ring, __entry->desc, __entry->buf)
);
@@ -158,7 +158,7 @@ DECLARE_EVENT_CLASS(ice_rx_template,
__entry->desc = desc;
__assign_str(devname);),
- TP_printk("netdev: %s ring: %pK desc: %pK", __get_str(devname),
+ TP_printk("netdev: %s ring: %p desc: %p", __get_str(devname),
__entry->ring, __entry->desc)
);
DEFINE_EVENT(ice_rx_template, ice_clean_rx_irq,
@@ -182,7 +182,7 @@ DECLARE_EVENT_CLASS(ice_rx_indicate_template,
__entry->skb = skb;
__assign_str(devname);),
- TP_printk("netdev: %s ring: %pK desc: %pK skb %pK", __get_str(devname),
+ TP_printk("netdev: %s ring: %p desc: %p skb %p", __get_str(devname),
__entry->ring, __entry->desc, __entry->skb)
);
@@ -205,7 +205,7 @@ DECLARE_EVENT_CLASS(ice_xmit_template,
__entry->skb = skb;
__assign_str(devname);),
- TP_printk("netdev: %s skb: %pK ring: %pK", __get_str(devname),
+ TP_printk("netdev: %s skb: %p ring: %p", __get_str(devname),
__entry->skb, __entry->ring)
);
@@ -228,7 +228,7 @@ DECLARE_EVENT_CLASS(ice_tx_tstamp_template,
TP_fast_assign(__entry->skb = skb;
__entry->idx = idx;),
- TP_printk("skb %pK idx %d",
+ TP_printk("skb %p idx %d",
__entry->skb, __entry->idx)
);
#define DEFINE_TX_TSTAMP_OP_EVENT(name) \
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index 03c6c271865d..8d19efc1df72 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -293,8 +293,10 @@ struct ice_hw_common_caps {
u8 dcb;
u8 ieee_1588;
u8 rdma;
- u8 roce_lag;
- u8 sriov_lag;
+
+ bool roce_lag;
+ bool sriov_lag;
+ bool sriov_aa_lag;
bool nvm_update_pending_nvm;
bool nvm_update_pending_orom;
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c
index 5ee74f3e82dc..de9e81ccee66 100644
--- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c
@@ -5,7 +5,7 @@
#include "ice.h"
#include "ice_lib.h"
#include "ice_fltr.h"
-#include "ice_virtchnl_allowlist.h"
+#include "virt/allowlist.h"
/* Public functions which may be accessed by all driver files */
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.h b/drivers/net/ethernet/intel/ice/ice_vf_lib.h
index ffe1f9f830ea..b00708907176 100644
--- a/drivers/net/ethernet/intel/ice/ice_vf_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.h
@@ -13,7 +13,7 @@
#include <linux/avf/virtchnl.h>
#include "ice_type.h"
#include "ice_flow.h"
-#include "ice_virtchnl_fdir.h"
+#include "virt/fdir.h"
#include "ice_vsi_vlan_ops.h"
#define ICE_MAX_SRIOV_VFS 256
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c b/drivers/net/ethernet/intel/ice/virt/allowlist.c
index 4c2ec2337b38..a07efec19c45 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c
+++ b/drivers/net/ethernet/intel/ice/virt/allowlist.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2021, Intel Corporation. */
-#include "ice_virtchnl_allowlist.h"
+#include "allowlist.h"
/* Purpose of this file is to share functionality to allowlist or denylist
* opcodes used in PF <-> VF communication. Group of opcodes:
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.h b/drivers/net/ethernet/intel/ice/virt/allowlist.h
index d3ae86ded219..d3ae86ded219 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.h
+++ b/drivers/net/ethernet/intel/ice/virt/allowlist.h
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/virt/fdir.c
index ae83c3914e29..ae83c3914e29 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
+++ b/drivers/net/ethernet/intel/ice/virt/fdir.c
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.h b/drivers/net/ethernet/intel/ice/virt/fdir.h
index ac6dcab454b4..ac6dcab454b4 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.h
+++ b/drivers/net/ethernet/intel/ice/virt/fdir.h
diff --git a/drivers/net/ethernet/intel/ice/virt/queues.c b/drivers/net/ethernet/intel/ice/virt/queues.c
new file mode 100644
index 000000000000..40575cfe6dd4
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/virt/queues.c
@@ -0,0 +1,975 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2022, Intel Corporation. */
+
+#include "virtchnl.h"
+#include "queues.h"
+#include "ice_vf_lib_private.h"
+#include "ice.h"
+#include "ice_base.h"
+#include "ice_lib.h"
+
+/**
+ * ice_vc_get_max_frame_size - get max frame size allowed for VF
+ * @vf: VF used to determine max frame size
+ *
+ * Max frame size is determined based on the current port's max frame size and
+ * whether a port VLAN is configured on this VF. The VF is not aware whether
+ * it's in a port VLAN so the PF needs to account for this in max frame size
+ * checks and sending the max frame size to the VF.
+ */
+u16 ice_vc_get_max_frame_size(struct ice_vf *vf)
+{
+ struct ice_port_info *pi = ice_vf_get_port_info(vf);
+ u16 max_frame_size;
+
+ max_frame_size = pi->phy.link_info.max_frame_size;
+
+ if (ice_vf_is_port_vlan_ena(vf))
+ max_frame_size -= VLAN_HLEN;
+
+ return max_frame_size;
+}
+
+/**
+ * ice_vc_isvalid_q_id
+ * @vsi: VSI to check queue ID against
+ * @qid: VSI relative queue ID
+ *
+ * check for the valid queue ID
+ */
+static bool ice_vc_isvalid_q_id(struct ice_vsi *vsi, u16 qid)
+{
+ /* allocated Tx and Rx queues should be always equal for VF VSI */
+ return qid < vsi->alloc_txq;
+}
+
+/**
+ * ice_vc_isvalid_ring_len
+ * @ring_len: length of ring
+ *
+ * check for the valid ring count, should be multiple of ICE_REQ_DESC_MULTIPLE
+ * or zero
+ */
+static bool ice_vc_isvalid_ring_len(u16 ring_len)
+{
+ return ring_len == 0 ||
+ (ring_len >= ICE_MIN_NUM_DESC &&
+ ring_len <= ICE_MAX_NUM_DESC &&
+ !(ring_len % ICE_REQ_DESC_MULTIPLE));
+}
+
+/**
+ * ice_vf_cfg_qs_bw - Configure per queue bandwidth
+ * @vf: pointer to the VF info
+ * @num_queues: number of queues to be configured
+ *
+ * Configure per queue bandwidth.
+ *
+ * Return: 0 on success or negative error value.
+ */
+static int ice_vf_cfg_qs_bw(struct ice_vf *vf, u16 num_queues)
+{
+ struct ice_hw *hw = &vf->pf->hw;
+ struct ice_vsi *vsi;
+ int ret;
+ u16 i;
+
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi)
+ return -EINVAL;
+
+ for (i = 0; i < num_queues; i++) {
+ u32 p_rate, min_rate;
+ u8 tc;
+
+ p_rate = vf->qs_bw[i].peak;
+ min_rate = vf->qs_bw[i].committed;
+ tc = vf->qs_bw[i].tc;
+ if (p_rate)
+ ret = ice_cfg_q_bw_lmt(hw->port_info, vsi->idx, tc,
+ vf->qs_bw[i].queue_id,
+ ICE_MAX_BW, p_rate);
+ else
+ ret = ice_cfg_q_bw_dflt_lmt(hw->port_info, vsi->idx, tc,
+ vf->qs_bw[i].queue_id,
+ ICE_MAX_BW);
+ if (ret)
+ return ret;
+
+ if (min_rate)
+ ret = ice_cfg_q_bw_lmt(hw->port_info, vsi->idx, tc,
+ vf->qs_bw[i].queue_id,
+ ICE_MIN_BW, min_rate);
+ else
+ ret = ice_cfg_q_bw_dflt_lmt(hw->port_info, vsi->idx, tc,
+ vf->qs_bw[i].queue_id,
+ ICE_MIN_BW);
+
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_vf_cfg_q_quanta_profile - Configure quanta profile
+ * @vf: pointer to the VF info
+ * @quanta_prof_idx: pointer to the quanta profile index
+ * @quanta_size: quanta size to be set
+ *
+ * This function chooses available quanta profile and configures the register.
+ * The quanta profile is evenly divided by the number of device ports, and then
+ * available to the specific PF and VFs. The first profile for each PF is a
+ * reserved default profile. Only quanta size of the rest unused profile can be
+ * modified.
+ *
+ * Return: 0 on success or negative error value.
+ */
+static int ice_vf_cfg_q_quanta_profile(struct ice_vf *vf, u16 quanta_size,
+ u16 *quanta_prof_idx)
+{
+ const u16 n_desc = calc_quanta_desc(quanta_size);
+ struct ice_hw *hw = &vf->pf->hw;
+ const u16 n_cmd = 2 * n_desc;
+ struct ice_pf *pf = vf->pf;
+ u16 per_pf, begin_id;
+ u8 n_used;
+ u32 reg;
+
+ begin_id = (GLCOMM_QUANTA_PROF_MAX_INDEX + 1) / hw->dev_caps.num_funcs *
+ hw->logical_pf_id;
+
+ if (quanta_size == ICE_DFLT_QUANTA) {
+ *quanta_prof_idx = begin_id;
+ } else {
+ per_pf = (GLCOMM_QUANTA_PROF_MAX_INDEX + 1) /
+ hw->dev_caps.num_funcs;
+ n_used = pf->num_quanta_prof_used;
+ if (n_used < per_pf) {
+ *quanta_prof_idx = begin_id + 1 + n_used;
+ pf->num_quanta_prof_used++;
+ } else {
+ return -EINVAL;
+ }
+ }
+
+ reg = FIELD_PREP(GLCOMM_QUANTA_PROF_QUANTA_SIZE_M, quanta_size) |
+ FIELD_PREP(GLCOMM_QUANTA_PROF_MAX_CMD_M, n_cmd) |
+ FIELD_PREP(GLCOMM_QUANTA_PROF_MAX_DESC_M, n_desc);
+ wr32(hw, GLCOMM_QUANTA_PROF(*quanta_prof_idx), reg);
+
+ return 0;
+}
+
+/**
+ * ice_vc_validate_vqs_bitmaps - validate Rx/Tx queue bitmaps from VIRTCHNL
+ * @vqs: virtchnl_queue_select structure containing bitmaps to validate
+ *
+ * Return true on successful validation, else false
+ */
+static bool ice_vc_validate_vqs_bitmaps(struct virtchnl_queue_select *vqs)
+{
+ if ((!vqs->rx_queues && !vqs->tx_queues) ||
+ vqs->rx_queues >= BIT(ICE_MAX_RSS_QS_PER_VF) ||
+ vqs->tx_queues >= BIT(ICE_MAX_RSS_QS_PER_VF))
+ return false;
+
+ return true;
+}
+
+/**
+ * ice_vf_ena_txq_interrupt - enable Tx queue interrupt via QINT_TQCTL
+ * @vsi: VSI of the VF to configure
+ * @q_idx: VF queue index used to determine the queue in the PF's space
+ */
+void ice_vf_ena_txq_interrupt(struct ice_vsi *vsi, u32 q_idx)
+{
+ struct ice_hw *hw = &vsi->back->hw;
+ u32 pfq = vsi->txq_map[q_idx];
+ u32 reg;
+
+ reg = rd32(hw, QINT_TQCTL(pfq));
+
+ /* MSI-X index 0 in the VF's space is always for the OICR, which means
+ * this is most likely a poll mode VF driver, so don't enable an
+ * interrupt that was never configured via VIRTCHNL_OP_CONFIG_IRQ_MAP
+ */
+ if (!(reg & QINT_TQCTL_MSIX_INDX_M))
+ return;
+
+ wr32(hw, QINT_TQCTL(pfq), reg | QINT_TQCTL_CAUSE_ENA_M);
+}
+
+/**
+ * ice_vf_ena_rxq_interrupt - enable Tx queue interrupt via QINT_RQCTL
+ * @vsi: VSI of the VF to configure
+ * @q_idx: VF queue index used to determine the queue in the PF's space
+ */
+void ice_vf_ena_rxq_interrupt(struct ice_vsi *vsi, u32 q_idx)
+{
+ struct ice_hw *hw = &vsi->back->hw;
+ u32 pfq = vsi->rxq_map[q_idx];
+ u32 reg;
+
+ reg = rd32(hw, QINT_RQCTL(pfq));
+
+ /* MSI-X index 0 in the VF's space is always for the OICR, which means
+ * this is most likely a poll mode VF driver, so don't enable an
+ * interrupt that was never configured via VIRTCHNL_OP_CONFIG_IRQ_MAP
+ */
+ if (!(reg & QINT_RQCTL_MSIX_INDX_M))
+ return;
+
+ wr32(hw, QINT_RQCTL(pfq), reg | QINT_RQCTL_CAUSE_ENA_M);
+}
+
+/**
+ * ice_vc_ena_qs_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * called from the VF to enable all or specific queue(s)
+ */
+int ice_vc_ena_qs_msg(struct ice_vf *vf, u8 *msg)
+{
+ enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+ struct virtchnl_queue_select *vqs =
+ (struct virtchnl_queue_select *)msg;
+ struct ice_vsi *vsi;
+ unsigned long q_map;
+ u16 vf_q_id;
+
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (!ice_vc_isvalid_vsi_id(vf, vqs->vsi_id)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (!ice_vc_validate_vqs_bitmaps(vqs)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ /* Enable only Rx rings, Tx rings were enabled by the FW when the
+ * Tx queue group list was configured and the context bits were
+ * programmed using ice_vsi_cfg_txqs
+ */
+ q_map = vqs->rx_queues;
+ for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) {
+ if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ /* Skip queue if enabled */
+ if (test_bit(vf_q_id, vf->rxq_ena))
+ continue;
+
+ if (ice_vsi_ctrl_one_rx_ring(vsi, true, vf_q_id, true)) {
+ dev_err(ice_pf_to_dev(vsi->back), "Failed to enable Rx ring %d on VSI %d\n",
+ vf_q_id, vsi->vsi_num);
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ ice_vf_ena_rxq_interrupt(vsi, vf_q_id);
+ set_bit(vf_q_id, vf->rxq_ena);
+ }
+
+ q_map = vqs->tx_queues;
+ for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) {
+ if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ /* Skip queue if enabled */
+ if (test_bit(vf_q_id, vf->txq_ena))
+ continue;
+
+ ice_vf_ena_txq_interrupt(vsi, vf_q_id);
+ set_bit(vf_q_id, vf->txq_ena);
+ }
+
+ /* Set flag to indicate that queues are enabled */
+ if (v_ret == VIRTCHNL_STATUS_SUCCESS)
+ set_bit(ICE_VF_STATE_QS_ENA, vf->vf_states);
+
+error_param:
+ /* send the response to the VF */
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ENABLE_QUEUES, v_ret,
+ NULL, 0);
+}
+
+/**
+ * ice_vf_vsi_dis_single_txq - disable a single Tx queue
+ * @vf: VF to disable queue for
+ * @vsi: VSI for the VF
+ * @q_id: VF relative (0-based) queue ID
+ *
+ * Attempt to disable the Tx queue passed in. If the Tx queue was successfully
+ * disabled then clear q_id bit in the enabled queues bitmap and return
+ * success. Otherwise return error.
+ */
+int ice_vf_vsi_dis_single_txq(struct ice_vf *vf, struct ice_vsi *vsi, u16 q_id)
+{
+ struct ice_txq_meta txq_meta = { 0 };
+ struct ice_tx_ring *ring;
+ int err;
+
+ if (!test_bit(q_id, vf->txq_ena))
+ dev_dbg(ice_pf_to_dev(vsi->back), "Queue %u on VSI %u is not enabled, but stopping it anyway\n",
+ q_id, vsi->vsi_num);
+
+ ring = vsi->tx_rings[q_id];
+ if (!ring)
+ return -EINVAL;
+
+ ice_fill_txq_meta(vsi, ring, &txq_meta);
+
+ err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, vf->vf_id, ring, &txq_meta);
+ if (err) {
+ dev_err(ice_pf_to_dev(vsi->back), "Failed to stop Tx ring %d on VSI %d\n",
+ q_id, vsi->vsi_num);
+ return err;
+ }
+
+ /* Clear enabled queues flag */
+ clear_bit(q_id, vf->txq_ena);
+
+ return 0;
+}
+
+/**
+ * ice_vc_dis_qs_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * called from the VF to disable all or specific queue(s)
+ */
+int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg)
+{
+ enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+ struct virtchnl_queue_select *vqs =
+ (struct virtchnl_queue_select *)msg;
+ struct ice_vsi *vsi;
+ unsigned long q_map;
+ u16 vf_q_id;
+
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) &&
+ !test_bit(ICE_VF_STATE_QS_ENA, vf->vf_states)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (!ice_vc_isvalid_vsi_id(vf, vqs->vsi_id)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (!ice_vc_validate_vqs_bitmaps(vqs)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (vqs->tx_queues) {
+ q_map = vqs->tx_queues;
+
+ for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) {
+ if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (ice_vf_vsi_dis_single_txq(vf, vsi, vf_q_id)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+ }
+ }
+
+ q_map = vqs->rx_queues;
+ /* speed up Rx queue disable by batching them if possible */
+ if (q_map &&
+ bitmap_equal(&q_map, vf->rxq_ena, ICE_MAX_RSS_QS_PER_VF)) {
+ if (ice_vsi_stop_all_rx_rings(vsi)) {
+ dev_err(ice_pf_to_dev(vsi->back), "Failed to stop all Rx rings on VSI %d\n",
+ vsi->vsi_num);
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ bitmap_zero(vf->rxq_ena, ICE_MAX_RSS_QS_PER_VF);
+ } else if (q_map) {
+ for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) {
+ if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ /* Skip queue if not enabled */
+ if (!test_bit(vf_q_id, vf->rxq_ena))
+ continue;
+
+ if (ice_vsi_ctrl_one_rx_ring(vsi, false, vf_q_id,
+ true)) {
+ dev_err(ice_pf_to_dev(vsi->back), "Failed to stop Rx ring %d on VSI %d\n",
+ vf_q_id, vsi->vsi_num);
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ /* Clear enabled queues flag */
+ clear_bit(vf_q_id, vf->rxq_ena);
+ }
+ }
+
+ /* Clear enabled queues flag */
+ if (v_ret == VIRTCHNL_STATUS_SUCCESS && ice_vf_has_no_qs_ena(vf))
+ clear_bit(ICE_VF_STATE_QS_ENA, vf->vf_states);
+
+error_param:
+ /* send the response to the VF */
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DISABLE_QUEUES, v_ret,
+ NULL, 0);
+}
+
+/**
+ * ice_cfg_interrupt
+ * @vf: pointer to the VF info
+ * @vsi: the VSI being configured
+ * @map: vector map for mapping vectors to queues
+ * @q_vector: structure for interrupt vector
+ * configure the IRQ to queue map
+ */
+static enum virtchnl_status_code
+ice_cfg_interrupt(struct ice_vf *vf, struct ice_vsi *vsi,
+ struct virtchnl_vector_map *map,
+ struct ice_q_vector *q_vector)
+{
+ u16 vsi_q_id, vsi_q_id_idx;
+ unsigned long qmap;
+
+ q_vector->num_ring_rx = 0;
+ q_vector->num_ring_tx = 0;
+
+ qmap = map->rxq_map;
+ for_each_set_bit(vsi_q_id_idx, &qmap, ICE_MAX_RSS_QS_PER_VF) {
+ vsi_q_id = vsi_q_id_idx;
+
+ if (!ice_vc_isvalid_q_id(vsi, vsi_q_id))
+ return VIRTCHNL_STATUS_ERR_PARAM;
+
+ q_vector->num_ring_rx++;
+ q_vector->rx.itr_idx = map->rxitr_idx;
+ vsi->rx_rings[vsi_q_id]->q_vector = q_vector;
+ ice_cfg_rxq_interrupt(vsi, vsi_q_id,
+ q_vector->vf_reg_idx,
+ q_vector->rx.itr_idx);
+ }
+
+ qmap = map->txq_map;
+ for_each_set_bit(vsi_q_id_idx, &qmap, ICE_MAX_RSS_QS_PER_VF) {
+ vsi_q_id = vsi_q_id_idx;
+
+ if (!ice_vc_isvalid_q_id(vsi, vsi_q_id))
+ return VIRTCHNL_STATUS_ERR_PARAM;
+
+ q_vector->num_ring_tx++;
+ q_vector->tx.itr_idx = map->txitr_idx;
+ vsi->tx_rings[vsi_q_id]->q_vector = q_vector;
+ ice_cfg_txq_interrupt(vsi, vsi_q_id,
+ q_vector->vf_reg_idx,
+ q_vector->tx.itr_idx);
+ }
+
+ return VIRTCHNL_STATUS_SUCCESS;
+}
+
+/**
+ * ice_vc_cfg_irq_map_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * called from the VF to configure the IRQ to queue map
+ */
+int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg)
+{
+ enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+ u16 num_q_vectors_mapped, vsi_id, vector_id;
+ struct virtchnl_irq_map_info *irqmap_info;
+ struct virtchnl_vector_map *map;
+ struct ice_vsi *vsi;
+ int i;
+
+ irqmap_info = (struct virtchnl_irq_map_info *)msg;
+ num_q_vectors_mapped = irqmap_info->num_vectors;
+
+ /* Check to make sure number of VF vectors mapped is not greater than
+ * number of VF vectors originally allocated, and check that
+ * there is actually at least a single VF queue vector mapped
+ */
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) ||
+ vf->num_msix < num_q_vectors_mapped ||
+ !num_q_vectors_mapped) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ for (i = 0; i < num_q_vectors_mapped; i++) {
+ struct ice_q_vector *q_vector;
+
+ map = &irqmap_info->vecmap[i];
+
+ vector_id = map->vector_id;
+ vsi_id = map->vsi_id;
+ /* vector_id is always 0-based for each VF, and can never be
+ * larger than or equal to the max allowed interrupts per VF
+ */
+ if (!(vector_id < vf->num_msix) ||
+ !ice_vc_isvalid_vsi_id(vf, vsi_id) ||
+ (!vector_id && (map->rxq_map || map->txq_map))) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ /* No need to map VF miscellaneous or rogue vector */
+ if (!vector_id)
+ continue;
+
+ /* Subtract non queue vector from vector_id passed by VF
+ * to get actual number of VSI queue vector array index
+ */
+ q_vector = vsi->q_vectors[vector_id - ICE_NONQ_VECS_VF];
+ if (!q_vector) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ /* lookout for the invalid queue index */
+ v_ret = ice_cfg_interrupt(vf, vsi, map, q_vector);
+ if (v_ret)
+ goto error_param;
+ }
+
+error_param:
+ /* send the response to the VF */
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_IRQ_MAP, v_ret,
+ NULL, 0);
+}
+
+/**
+ * ice_vc_cfg_q_bw - Configure per queue bandwidth
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer which holds the command descriptor
+ *
+ * Configure VF queues bandwidth.
+ *
+ * Return: 0 on success or negative error value.
+ */
+int ice_vc_cfg_q_bw(struct ice_vf *vf, u8 *msg)
+{
+ enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+ struct virtchnl_queues_bw_cfg *qbw =
+ (struct virtchnl_queues_bw_cfg *)msg;
+ struct ice_vsi *vsi;
+ u16 i;
+
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) ||
+ !ice_vc_isvalid_vsi_id(vf, qbw->vsi_id)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto err;
+ }
+
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto err;
+ }
+
+ if (qbw->num_queues > ICE_MAX_RSS_QS_PER_VF ||
+ qbw->num_queues > min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)) {
+ dev_err(ice_pf_to_dev(vf->pf), "VF-%d trying to configure more than allocated number of queues: %d\n",
+ vf->vf_id, min_t(u16, vsi->alloc_txq, vsi->alloc_rxq));
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto err;
+ }
+
+ for (i = 0; i < qbw->num_queues; i++) {
+ if (qbw->cfg[i].shaper.peak != 0 && vf->max_tx_rate != 0 &&
+ qbw->cfg[i].shaper.peak > vf->max_tx_rate) {
+ dev_warn(ice_pf_to_dev(vf->pf), "The maximum queue %d rate limit configuration may not take effect because the maximum TX rate for VF-%d is %d\n",
+ qbw->cfg[i].queue_id, vf->vf_id,
+ vf->max_tx_rate);
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto err;
+ }
+ if (qbw->cfg[i].shaper.committed != 0 && vf->min_tx_rate != 0 &&
+ qbw->cfg[i].shaper.committed < vf->min_tx_rate) {
+ dev_warn(ice_pf_to_dev(vf->pf), "The minimum queue %d rate limit configuration may not take effect because the minimum TX rate for VF-%d is %d\n",
+ qbw->cfg[i].queue_id, vf->vf_id,
+ vf->min_tx_rate);
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto err;
+ }
+ if (qbw->cfg[i].queue_id > vf->num_vf_qs) {
+ dev_warn(ice_pf_to_dev(vf->pf), "VF-%d trying to configure invalid queue_id\n",
+ vf->vf_id);
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto err;
+ }
+ if (qbw->cfg[i].tc >= ICE_MAX_TRAFFIC_CLASS) {
+ dev_warn(ice_pf_to_dev(vf->pf), "VF-%d trying to configure a traffic class higher than allowed\n",
+ vf->vf_id);
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto err;
+ }
+ }
+
+ for (i = 0; i < qbw->num_queues; i++) {
+ vf->qs_bw[i].queue_id = qbw->cfg[i].queue_id;
+ vf->qs_bw[i].peak = qbw->cfg[i].shaper.peak;
+ vf->qs_bw[i].committed = qbw->cfg[i].shaper.committed;
+ vf->qs_bw[i].tc = qbw->cfg[i].tc;
+ }
+
+ if (ice_vf_cfg_qs_bw(vf, qbw->num_queues))
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+
+err:
+ /* send the response to the VF */
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_QUEUE_BW,
+ v_ret, NULL, 0);
+}
+
+/**
+ * ice_vc_cfg_q_quanta - Configure per queue quanta
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer which holds the command descriptor
+ *
+ * Configure VF queues quanta.
+ *
+ * Return: 0 on success or negative error value.
+ */
+int ice_vc_cfg_q_quanta(struct ice_vf *vf, u8 *msg)
+{
+ u16 quanta_prof_id, quanta_size, start_qid, num_queues, end_qid, i;
+ enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+ struct virtchnl_quanta_cfg *qquanta =
+ (struct virtchnl_quanta_cfg *)msg;
+ struct ice_vsi *vsi;
+ int ret;
+
+ start_qid = qquanta->queue_select.start_queue_id;
+ num_queues = qquanta->queue_select.num_queues;
+
+ if (check_add_overflow(start_qid, num_queues, &end_qid)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto err;
+ }
+
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto err;
+ }
+
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto err;
+ }
+
+ if (end_qid > ICE_MAX_RSS_QS_PER_VF ||
+ end_qid > min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)) {
+ dev_err(ice_pf_to_dev(vf->pf), "VF-%d trying to configure more than allocated number of queues: %d\n",
+ vf->vf_id, min_t(u16, vsi->alloc_txq, vsi->alloc_rxq));
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto err;
+ }
+
+ quanta_size = qquanta->quanta_size;
+ if (quanta_size > ICE_MAX_QUANTA_SIZE ||
+ quanta_size < ICE_MIN_QUANTA_SIZE) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto err;
+ }
+
+ if (quanta_size % 64) {
+ dev_err(ice_pf_to_dev(vf->pf), "quanta size should be the product of 64\n");
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto err;
+ }
+
+ ret = ice_vf_cfg_q_quanta_profile(vf, quanta_size,
+ &quanta_prof_id);
+ if (ret) {
+ v_ret = VIRTCHNL_STATUS_ERR_NOT_SUPPORTED;
+ goto err;
+ }
+
+ for (i = start_qid; i < end_qid; i++)
+ vsi->tx_rings[i]->quanta_prof_id = quanta_prof_id;
+
+err:
+ /* send the response to the VF */
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_QUANTA,
+ v_ret, NULL, 0);
+}
+
+/**
+ * ice_vc_cfg_qs_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * called from the VF to configure the Rx/Tx queues
+ */
+int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
+{
+ struct virtchnl_vsi_queue_config_info *qci =
+ (struct virtchnl_vsi_queue_config_info *)msg;
+ struct virtchnl_queue_pair_info *qpi;
+ struct ice_pf *pf = vf->pf;
+ struct ice_vsi *vsi;
+ int i = -1, q_idx;
+ bool ena_ts;
+ u8 act_prt;
+
+ mutex_lock(&pf->lag_mutex);
+ act_prt = ice_lag_prepare_vf_reset(pf->lag);
+
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))
+ goto error_param;
+
+ if (!ice_vc_isvalid_vsi_id(vf, qci->vsi_id))
+ goto error_param;
+
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi)
+ goto error_param;
+
+ if (qci->num_queue_pairs > ICE_MAX_RSS_QS_PER_VF ||
+ qci->num_queue_pairs > min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)) {
+ dev_err(ice_pf_to_dev(pf), "VF-%d requesting more than supported number of queues: %d\n",
+ vf->vf_id, min_t(u16, vsi->alloc_txq, vsi->alloc_rxq));
+ goto error_param;
+ }
+
+ for (i = 0; i < qci->num_queue_pairs; i++) {
+ if (!qci->qpair[i].rxq.crc_disable)
+ continue;
+
+ if (!(vf->driver_caps & VIRTCHNL_VF_OFFLOAD_CRC) ||
+ vf->vlan_strip_ena)
+ goto error_param;
+ }
+
+ for (i = 0; i < qci->num_queue_pairs; i++) {
+ qpi = &qci->qpair[i];
+ if (qpi->txq.vsi_id != qci->vsi_id ||
+ qpi->rxq.vsi_id != qci->vsi_id ||
+ qpi->rxq.queue_id != qpi->txq.queue_id ||
+ qpi->txq.headwb_enabled ||
+ !ice_vc_isvalid_ring_len(qpi->txq.ring_len) ||
+ !ice_vc_isvalid_ring_len(qpi->rxq.ring_len) ||
+ !ice_vc_isvalid_q_id(vsi, qpi->txq.queue_id)) {
+ goto error_param;
+ }
+
+ q_idx = qpi->rxq.queue_id;
+
+ /* make sure selected "q_idx" is in valid range of queues
+ * for selected "vsi"
+ */
+ if (q_idx >= vsi->alloc_txq || q_idx >= vsi->alloc_rxq) {
+ goto error_param;
+ }
+
+ /* copy Tx queue info from VF into VSI */
+ if (qpi->txq.ring_len > 0) {
+ vsi->tx_rings[q_idx]->dma = qpi->txq.dma_ring_addr;
+ vsi->tx_rings[q_idx]->count = qpi->txq.ring_len;
+
+ /* Disable any existing queue first */
+ if (ice_vf_vsi_dis_single_txq(vf, vsi, q_idx))
+ goto error_param;
+
+ /* Configure a queue with the requested settings */
+ if (ice_vsi_cfg_single_txq(vsi, vsi->tx_rings, q_idx)) {
+ dev_warn(ice_pf_to_dev(pf), "VF-%d failed to configure TX queue %d\n",
+ vf->vf_id, q_idx);
+ goto error_param;
+ }
+ }
+
+ /* copy Rx queue info from VF into VSI */
+ if (qpi->rxq.ring_len > 0) {
+ u16 max_frame_size = ice_vc_get_max_frame_size(vf);
+ struct ice_rx_ring *ring = vsi->rx_rings[q_idx];
+ u32 rxdid;
+
+ ring->dma = qpi->rxq.dma_ring_addr;
+ ring->count = qpi->rxq.ring_len;
+
+ if (qpi->rxq.crc_disable)
+ ring->flags |= ICE_RX_FLAGS_CRC_STRIP_DIS;
+ else
+ ring->flags &= ~ICE_RX_FLAGS_CRC_STRIP_DIS;
+
+ if (qpi->rxq.databuffer_size != 0 &&
+ (qpi->rxq.databuffer_size > ((16 * 1024) - 128) ||
+ qpi->rxq.databuffer_size < 1024))
+ goto error_param;
+ ring->rx_buf_len = qpi->rxq.databuffer_size;
+ if (qpi->rxq.max_pkt_size > max_frame_size ||
+ qpi->rxq.max_pkt_size < 64)
+ goto error_param;
+
+ ring->max_frame = qpi->rxq.max_pkt_size;
+ /* add space for the port VLAN since the VF driver is
+ * not expected to account for it in the MTU
+ * calculation
+ */
+ if (ice_vf_is_port_vlan_ena(vf))
+ ring->max_frame += VLAN_HLEN;
+
+ if (ice_vsi_cfg_single_rxq(vsi, q_idx)) {
+ dev_warn(ice_pf_to_dev(pf), "VF-%d failed to configure RX queue %d\n",
+ vf->vf_id, q_idx);
+ goto error_param;
+ }
+
+ /* If Rx flex desc is supported, select RXDID for Rx
+ * queues. Otherwise, use legacy 32byte descriptor
+ * format. Legacy 16byte descriptor is not supported.
+ * If this RXDID is selected, return error.
+ */
+ if (vf->driver_caps &
+ VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) {
+ rxdid = qpi->rxq.rxdid;
+ if (!(BIT(rxdid) & pf->supported_rxdids))
+ goto error_param;
+ } else {
+ rxdid = ICE_RXDID_LEGACY_1;
+ }
+
+ ena_ts = ((vf->driver_caps &
+ VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) &&
+ (vf->driver_caps & VIRTCHNL_VF_CAP_PTP) &&
+ (qpi->rxq.flags & VIRTCHNL_PTP_RX_TSTAMP));
+
+ ice_write_qrxflxp_cntxt(&vsi->back->hw,
+ vsi->rxq_map[q_idx], rxdid,
+ ICE_RXDID_PRIO, ena_ts);
+ }
+ }
+
+ ice_lag_complete_vf_reset(pf->lag, act_prt);
+ mutex_unlock(&pf->lag_mutex);
+
+ /* send the response to the VF */
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_VSI_QUEUES,
+ VIRTCHNL_STATUS_SUCCESS, NULL, 0);
+error_param:
+ /* disable whatever we can */
+ for (; i >= 0; i--) {
+ if (ice_vsi_ctrl_one_rx_ring(vsi, false, i, true))
+ dev_err(ice_pf_to_dev(pf), "VF-%d could not disable RX queue %d\n",
+ vf->vf_id, i);
+ if (ice_vf_vsi_dis_single_txq(vf, vsi, i))
+ dev_err(ice_pf_to_dev(pf), "VF-%d could not disable TX queue %d\n",
+ vf->vf_id, i);
+ }
+
+ ice_lag_complete_vf_reset(pf->lag, act_prt);
+ mutex_unlock(&pf->lag_mutex);
+
+ ice_lag_move_new_vf_nodes(vf);
+
+ /* send the response to the VF */
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_VSI_QUEUES,
+ VIRTCHNL_STATUS_ERR_PARAM, NULL, 0);
+}
+
+/**
+ * ice_vc_request_qs_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * VFs get a default number of queues but can use this message to request a
+ * different number. If the request is successful, PF will reset the VF and
+ * return 0. If unsuccessful, PF will send message informing VF of number of
+ * available queue pairs via virtchnl message response to VF.
+ */
+int ice_vc_request_qs_msg(struct ice_vf *vf, u8 *msg)
+{
+ enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+ struct virtchnl_vf_res_request *vfres =
+ (struct virtchnl_vf_res_request *)msg;
+ u16 req_queues = vfres->num_queue_pairs;
+ struct ice_pf *pf = vf->pf;
+ u16 max_allowed_vf_queues;
+ u16 tx_rx_queue_left;
+ struct device *dev;
+ u16 cur_queues;
+
+ dev = ice_pf_to_dev(pf);
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ cur_queues = vf->num_vf_qs;
+ tx_rx_queue_left = min_t(u16, ice_get_avail_txq_count(pf),
+ ice_get_avail_rxq_count(pf));
+ max_allowed_vf_queues = tx_rx_queue_left + cur_queues;
+ if (!req_queues) {
+ dev_err(dev, "VF %d tried to request 0 queues. Ignoring.\n",
+ vf->vf_id);
+ } else if (req_queues > ICE_MAX_RSS_QS_PER_VF) {
+ dev_err(dev, "VF %d tried to request more than %d queues.\n",
+ vf->vf_id, ICE_MAX_RSS_QS_PER_VF);
+ vfres->num_queue_pairs = ICE_MAX_RSS_QS_PER_VF;
+ } else if (req_queues > cur_queues &&
+ req_queues - cur_queues > tx_rx_queue_left) {
+ dev_warn(dev, "VF %d requested %u more queues, but only %u left.\n",
+ vf->vf_id, req_queues - cur_queues, tx_rx_queue_left);
+ vfres->num_queue_pairs = min_t(u16, max_allowed_vf_queues,
+ ICE_MAX_RSS_QS_PER_VF);
+ } else {
+ /* request is successful, then reset VF */
+ vf->num_req_qs = req_queues;
+ ice_reset_vf(vf, ICE_VF_RESET_NOTIFY);
+ dev_info(dev, "VF %d granted request of %u queues.\n",
+ vf->vf_id, req_queues);
+ return 0;
+ }
+
+error_param:
+ /* send the response to the VF */
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_REQUEST_QUEUES,
+ v_ret, (u8 *)vfres, sizeof(*vfres));
+}
+
diff --git a/drivers/net/ethernet/intel/ice/virt/queues.h b/drivers/net/ethernet/intel/ice/virt/queues.h
new file mode 100644
index 000000000000..c4a792cecea1
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/virt/queues.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2022, Intel Corporation. */
+
+#ifndef _ICE_VIRT_QUEUES_H_
+#define _ICE_VIRT_QUEUES_H_
+
+#include <linux/types.h>
+
+struct ice_vf;
+
+u16 ice_vc_get_max_frame_size(struct ice_vf *vf);
+int ice_vc_ena_qs_msg(struct ice_vf *vf, u8 *msg);
+int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg);
+int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg);
+int ice_vc_cfg_q_bw(struct ice_vf *vf, u8 *msg);
+int ice_vc_cfg_q_quanta(struct ice_vf *vf, u8 *msg);
+int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg);
+int ice_vc_request_qs_msg(struct ice_vf *vf, u8 *msg);
+
+#endif /* _ICE_VIRT_QUEUES_H_ */
diff --git a/drivers/net/ethernet/intel/ice/virt/rss.c b/drivers/net/ethernet/intel/ice/virt/rss.c
new file mode 100644
index 000000000000..cbdbb32d512b
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/virt/rss.c
@@ -0,0 +1,719 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2022, Intel Corporation. */
+
+#include "rss.h"
+#include "ice_vf_lib_private.h"
+#include "ice.h"
+
+#define FIELD_SELECTOR(proto_hdr_field) \
+ BIT((proto_hdr_field) & PROTO_HDR_FIELD_MASK)
+
+struct ice_vc_hdr_match_type {
+ u32 vc_hdr; /* virtchnl headers (VIRTCHNL_PROTO_HDR_XXX) */
+ u32 ice_hdr; /* ice headers (ICE_FLOW_SEG_HDR_XXX) */
+};
+
+static const struct ice_vc_hdr_match_type ice_vc_hdr_list[] = {
+ {VIRTCHNL_PROTO_HDR_NONE, ICE_FLOW_SEG_HDR_NONE},
+ {VIRTCHNL_PROTO_HDR_ETH, ICE_FLOW_SEG_HDR_ETH},
+ {VIRTCHNL_PROTO_HDR_S_VLAN, ICE_FLOW_SEG_HDR_VLAN},
+ {VIRTCHNL_PROTO_HDR_C_VLAN, ICE_FLOW_SEG_HDR_VLAN},
+ {VIRTCHNL_PROTO_HDR_IPV4, ICE_FLOW_SEG_HDR_IPV4 |
+ ICE_FLOW_SEG_HDR_IPV_OTHER},
+ {VIRTCHNL_PROTO_HDR_IPV6, ICE_FLOW_SEG_HDR_IPV6 |
+ ICE_FLOW_SEG_HDR_IPV_OTHER},
+ {VIRTCHNL_PROTO_HDR_TCP, ICE_FLOW_SEG_HDR_TCP},
+ {VIRTCHNL_PROTO_HDR_UDP, ICE_FLOW_SEG_HDR_UDP},
+ {VIRTCHNL_PROTO_HDR_SCTP, ICE_FLOW_SEG_HDR_SCTP},
+ {VIRTCHNL_PROTO_HDR_PPPOE, ICE_FLOW_SEG_HDR_PPPOE},
+ {VIRTCHNL_PROTO_HDR_GTPU_IP, ICE_FLOW_SEG_HDR_GTPU_IP},
+ {VIRTCHNL_PROTO_HDR_GTPU_EH, ICE_FLOW_SEG_HDR_GTPU_EH},
+ {VIRTCHNL_PROTO_HDR_GTPU_EH_PDU_DWN,
+ ICE_FLOW_SEG_HDR_GTPU_DWN},
+ {VIRTCHNL_PROTO_HDR_GTPU_EH_PDU_UP,
+ ICE_FLOW_SEG_HDR_GTPU_UP},
+ {VIRTCHNL_PROTO_HDR_L2TPV3, ICE_FLOW_SEG_HDR_L2TPV3},
+ {VIRTCHNL_PROTO_HDR_ESP, ICE_FLOW_SEG_HDR_ESP},
+ {VIRTCHNL_PROTO_HDR_AH, ICE_FLOW_SEG_HDR_AH},
+ {VIRTCHNL_PROTO_HDR_PFCP, ICE_FLOW_SEG_HDR_PFCP_SESSION},
+};
+
+struct ice_vc_hash_field_match_type {
+ u32 vc_hdr; /* virtchnl headers
+ * (VIRTCHNL_PROTO_HDR_XXX)
+ */
+ u32 vc_hash_field; /* virtchnl hash fields selector
+ * FIELD_SELECTOR((VIRTCHNL_PROTO_HDR_ETH_XXX))
+ */
+ u64 ice_hash_field; /* ice hash fields
+ * (BIT_ULL(ICE_FLOW_FIELD_IDX_XXX))
+ */
+};
+
+static const struct
+ice_vc_hash_field_match_type ice_vc_hash_field_list[] = {
+ {VIRTCHNL_PROTO_HDR_ETH, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_SRC),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_ETH_SA)},
+ {VIRTCHNL_PROTO_HDR_ETH, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_DST),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_ETH_DA)},
+ {VIRTCHNL_PROTO_HDR_ETH, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_SRC) |
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_DST),
+ ICE_FLOW_HASH_ETH},
+ {VIRTCHNL_PROTO_HDR_ETH,
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_ETHERTYPE),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_ETH_TYPE)},
+ {VIRTCHNL_PROTO_HDR_S_VLAN,
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_S_VLAN_ID),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_S_VLAN)},
+ {VIRTCHNL_PROTO_HDR_C_VLAN,
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_C_VLAN_ID),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_C_VLAN)},
+ {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA)},
+ {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA)},
+ {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) |
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST),
+ ICE_FLOW_HASH_IPV4},
+ {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) |
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA) |
+ BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)},
+ {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST) |
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA) |
+ BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)},
+ {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) |
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST) |
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT),
+ ICE_FLOW_HASH_IPV4 | BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)},
+ {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)},
+ {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_SA)},
+ {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_DA)},
+ {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC) |
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST),
+ ICE_FLOW_HASH_IPV6},
+ {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC) |
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_SA) |
+ BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)},
+ {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST) |
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_DA) |
+ BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)},
+ {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC) |
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST) |
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT),
+ ICE_FLOW_HASH_IPV6 | BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)},
+ {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)},
+ {VIRTCHNL_PROTO_HDR_TCP,
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_SRC_PORT),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_SRC_PORT)},
+ {VIRTCHNL_PROTO_HDR_TCP,
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_DST_PORT),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_DST_PORT)},
+ {VIRTCHNL_PROTO_HDR_TCP,
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_SRC_PORT) |
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_DST_PORT),
+ ICE_FLOW_HASH_TCP_PORT},
+ {VIRTCHNL_PROTO_HDR_UDP,
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_SRC_PORT),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_SRC_PORT)},
+ {VIRTCHNL_PROTO_HDR_UDP,
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_DST_PORT),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_DST_PORT)},
+ {VIRTCHNL_PROTO_HDR_UDP,
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_SRC_PORT) |
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_DST_PORT),
+ ICE_FLOW_HASH_UDP_PORT},
+ {VIRTCHNL_PROTO_HDR_SCTP,
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT)},
+ {VIRTCHNL_PROTO_HDR_SCTP,
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_DST_PORT),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_DST_PORT)},
+ {VIRTCHNL_PROTO_HDR_SCTP,
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT) |
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_DST_PORT),
+ ICE_FLOW_HASH_SCTP_PORT},
+ {VIRTCHNL_PROTO_HDR_PPPOE,
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_PPPOE_SESS_ID),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_PPPOE_SESS_ID)},
+ {VIRTCHNL_PROTO_HDR_GTPU_IP,
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_GTPU_IP_TEID),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_GTPU_IP_TEID)},
+ {VIRTCHNL_PROTO_HDR_L2TPV3,
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_L2TPV3_SESS_ID),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_L2TPV3_SESS_ID)},
+ {VIRTCHNL_PROTO_HDR_ESP, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ESP_SPI),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_ESP_SPI)},
+ {VIRTCHNL_PROTO_HDR_AH, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_AH_SPI),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_AH_SPI)},
+ {VIRTCHNL_PROTO_HDR_PFCP, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_PFCP_SEID),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_PFCP_SEID)},
+};
+
+/**
+ * ice_vc_validate_pattern
+ * @vf: pointer to the VF info
+ * @proto: virtchnl protocol headers
+ *
+ * validate the pattern is supported or not.
+ *
+ * Return: true on success, false on error.
+ */
+bool
+ice_vc_validate_pattern(struct ice_vf *vf, struct virtchnl_proto_hdrs *proto)
+{
+ bool is_ipv4 = false;
+ bool is_ipv6 = false;
+ bool is_udp = false;
+ u16 ptype = -1;
+ int i = 0;
+
+ while (i < proto->count &&
+ proto->proto_hdr[i].type != VIRTCHNL_PROTO_HDR_NONE) {
+ switch (proto->proto_hdr[i].type) {
+ case VIRTCHNL_PROTO_HDR_ETH:
+ ptype = ICE_PTYPE_MAC_PAY;
+ break;
+ case VIRTCHNL_PROTO_HDR_IPV4:
+ ptype = ICE_PTYPE_IPV4_PAY;
+ is_ipv4 = true;
+ break;
+ case VIRTCHNL_PROTO_HDR_IPV6:
+ ptype = ICE_PTYPE_IPV6_PAY;
+ is_ipv6 = true;
+ break;
+ case VIRTCHNL_PROTO_HDR_UDP:
+ if (is_ipv4)
+ ptype = ICE_PTYPE_IPV4_UDP_PAY;
+ else if (is_ipv6)
+ ptype = ICE_PTYPE_IPV6_UDP_PAY;
+ is_udp = true;
+ break;
+ case VIRTCHNL_PROTO_HDR_TCP:
+ if (is_ipv4)
+ ptype = ICE_PTYPE_IPV4_TCP_PAY;
+ else if (is_ipv6)
+ ptype = ICE_PTYPE_IPV6_TCP_PAY;
+ break;
+ case VIRTCHNL_PROTO_HDR_SCTP:
+ if (is_ipv4)
+ ptype = ICE_PTYPE_IPV4_SCTP_PAY;
+ else if (is_ipv6)
+ ptype = ICE_PTYPE_IPV6_SCTP_PAY;
+ break;
+ case VIRTCHNL_PROTO_HDR_GTPU_IP:
+ case VIRTCHNL_PROTO_HDR_GTPU_EH:
+ if (is_ipv4)
+ ptype = ICE_MAC_IPV4_GTPU;
+ else if (is_ipv6)
+ ptype = ICE_MAC_IPV6_GTPU;
+ goto out;
+ case VIRTCHNL_PROTO_HDR_L2TPV3:
+ if (is_ipv4)
+ ptype = ICE_MAC_IPV4_L2TPV3;
+ else if (is_ipv6)
+ ptype = ICE_MAC_IPV6_L2TPV3;
+ goto out;
+ case VIRTCHNL_PROTO_HDR_ESP:
+ if (is_ipv4)
+ ptype = is_udp ? ICE_MAC_IPV4_NAT_T_ESP :
+ ICE_MAC_IPV4_ESP;
+ else if (is_ipv6)
+ ptype = is_udp ? ICE_MAC_IPV6_NAT_T_ESP :
+ ICE_MAC_IPV6_ESP;
+ goto out;
+ case VIRTCHNL_PROTO_HDR_AH:
+ if (is_ipv4)
+ ptype = ICE_MAC_IPV4_AH;
+ else if (is_ipv6)
+ ptype = ICE_MAC_IPV6_AH;
+ goto out;
+ case VIRTCHNL_PROTO_HDR_PFCP:
+ if (is_ipv4)
+ ptype = ICE_MAC_IPV4_PFCP_SESSION;
+ else if (is_ipv6)
+ ptype = ICE_MAC_IPV6_PFCP_SESSION;
+ goto out;
+ default:
+ break;
+ }
+ i++;
+ }
+
+out:
+ return ice_hw_ptype_ena(&vf->pf->hw, ptype);
+}
+
+/**
+ * ice_vc_parse_rss_cfg - parses hash fields and headers from
+ * a specific virtchnl RSS cfg
+ * @hw: pointer to the hardware
+ * @rss_cfg: pointer to the virtchnl RSS cfg
+ * @hash_cfg: pointer to the HW hash configuration
+ *
+ * Return true if all the protocol header and hash fields in the RSS cfg could
+ * be parsed, else return false
+ *
+ * This function parses the virtchnl RSS cfg to be the intended
+ * hash fields and the intended header for RSS configuration
+ */
+static bool ice_vc_parse_rss_cfg(struct ice_hw *hw,
+ struct virtchnl_rss_cfg *rss_cfg,
+ struct ice_rss_hash_cfg *hash_cfg)
+{
+ const struct ice_vc_hash_field_match_type *hf_list;
+ const struct ice_vc_hdr_match_type *hdr_list;
+ int i, hf_list_len, hdr_list_len;
+ u32 *addl_hdrs = &hash_cfg->addl_hdrs;
+ u64 *hash_flds = &hash_cfg->hash_flds;
+
+ /* set outer layer RSS as default */
+ hash_cfg->hdr_type = ICE_RSS_OUTER_HEADERS;
+
+ if (rss_cfg->rss_algorithm == VIRTCHNL_RSS_ALG_TOEPLITZ_SYMMETRIC)
+ hash_cfg->symm = true;
+ else
+ hash_cfg->symm = false;
+
+ hf_list = ice_vc_hash_field_list;
+ hf_list_len = ARRAY_SIZE(ice_vc_hash_field_list);
+ hdr_list = ice_vc_hdr_list;
+ hdr_list_len = ARRAY_SIZE(ice_vc_hdr_list);
+
+ for (i = 0; i < rss_cfg->proto_hdrs.count; i++) {
+ struct virtchnl_proto_hdr *proto_hdr =
+ &rss_cfg->proto_hdrs.proto_hdr[i];
+ bool hdr_found = false;
+ int j;
+
+ /* Find matched ice headers according to virtchnl headers. */
+ for (j = 0; j < hdr_list_len; j++) {
+ struct ice_vc_hdr_match_type hdr_map = hdr_list[j];
+
+ if (proto_hdr->type == hdr_map.vc_hdr) {
+ *addl_hdrs |= hdr_map.ice_hdr;
+ hdr_found = true;
+ }
+ }
+
+ if (!hdr_found)
+ return false;
+
+ /* Find matched ice hash fields according to
+ * virtchnl hash fields.
+ */
+ for (j = 0; j < hf_list_len; j++) {
+ struct ice_vc_hash_field_match_type hf_map = hf_list[j];
+
+ if (proto_hdr->type == hf_map.vc_hdr &&
+ proto_hdr->field_selector == hf_map.vc_hash_field) {
+ *hash_flds |= hf_map.ice_hash_field;
+ break;
+ }
+ }
+ }
+
+ return true;
+}
+
+/**
+ * ice_vf_adv_rss_offload_ena - determine if capabilities support advanced
+ * RSS offloads
+ * @caps: VF driver negotiated capabilities
+ *
+ * Return true if VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF capability is set,
+ * else return false
+ */
+static bool ice_vf_adv_rss_offload_ena(u32 caps)
+{
+ return !!(caps & VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF);
+}
+
+/**
+ * ice_vc_handle_rss_cfg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the message buffer
+ * @add: add a RSS config if true, otherwise delete a RSS config
+ *
+ * This function adds/deletes a RSS config
+ */
+int ice_vc_handle_rss_cfg(struct ice_vf *vf, u8 *msg, bool add)
+{
+ u32 v_opcode = add ? VIRTCHNL_OP_ADD_RSS_CFG : VIRTCHNL_OP_DEL_RSS_CFG;
+ struct virtchnl_rss_cfg *rss_cfg = (struct virtchnl_rss_cfg *)msg;
+ enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+ struct device *dev = ice_pf_to_dev(vf->pf);
+ struct ice_hw *hw = &vf->pf->hw;
+ struct ice_vsi *vsi;
+
+ if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) {
+ dev_dbg(dev, "VF %d attempting to configure RSS, but RSS is not supported by the PF\n",
+ vf->vf_id);
+ v_ret = VIRTCHNL_STATUS_ERR_NOT_SUPPORTED;
+ goto error_param;
+ }
+
+ if (!ice_vf_adv_rss_offload_ena(vf->driver_caps)) {
+ dev_dbg(dev, "VF %d attempting to configure RSS, but Advanced RSS offload is not supported\n",
+ vf->vf_id);
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (rss_cfg->proto_hdrs.count > VIRTCHNL_MAX_NUM_PROTO_HDRS ||
+ rss_cfg->rss_algorithm < VIRTCHNL_RSS_ALG_TOEPLITZ_ASYMMETRIC ||
+ rss_cfg->rss_algorithm > VIRTCHNL_RSS_ALG_XOR_SYMMETRIC) {
+ dev_dbg(dev, "VF %d attempting to configure RSS, but RSS configuration is not valid\n",
+ vf->vf_id);
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (!ice_vc_validate_pattern(vf, &rss_cfg->proto_hdrs)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (rss_cfg->rss_algorithm == VIRTCHNL_RSS_ALG_R_ASYMMETRIC) {
+ struct ice_vsi_ctx *ctx;
+ u8 lut_type, hash_type;
+ int status;
+
+ lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI;
+ hash_type = add ? ICE_AQ_VSI_Q_OPT_RSS_HASH_XOR :
+ ICE_AQ_VSI_Q_OPT_RSS_HASH_TPLZ;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx) {
+ v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY;
+ goto error_param;
+ }
+
+ ctx->info.q_opt_rss =
+ FIELD_PREP(ICE_AQ_VSI_Q_OPT_RSS_LUT_M, lut_type) |
+ FIELD_PREP(ICE_AQ_VSI_Q_OPT_RSS_HASH_M, hash_type);
+
+ /* Preserve existing queueing option setting */
+ ctx->info.q_opt_rss |= (vsi->info.q_opt_rss &
+ ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_M);
+ ctx->info.q_opt_tc = vsi->info.q_opt_tc;
+ ctx->info.q_opt_flags = vsi->info.q_opt_rss;
+
+ ctx->info.valid_sections =
+ cpu_to_le16(ICE_AQ_VSI_PROP_Q_OPT_VALID);
+
+ status = ice_update_vsi(hw, vsi->idx, ctx, NULL);
+ if (status) {
+ dev_err(dev, "update VSI for RSS failed, err %d aq_err %s\n",
+ status, libie_aq_str(hw->adminq.sq_last_status));
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ } else {
+ vsi->info.q_opt_rss = ctx->info.q_opt_rss;
+ }
+
+ kfree(ctx);
+ } else {
+ struct ice_rss_hash_cfg cfg;
+
+ /* Only check for none raw pattern case */
+ if (!ice_vc_validate_pattern(vf, &rss_cfg->proto_hdrs)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+ cfg.addl_hdrs = ICE_FLOW_SEG_HDR_NONE;
+ cfg.hash_flds = ICE_HASH_INVALID;
+ cfg.hdr_type = ICE_RSS_ANY_HEADERS;
+
+ if (!ice_vc_parse_rss_cfg(hw, rss_cfg, &cfg)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (add) {
+ if (ice_add_rss_cfg(hw, vsi, &cfg)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ dev_err(dev, "ice_add_rss_cfg failed for vsi = %d, v_ret = %d\n",
+ vsi->vsi_num, v_ret);
+ }
+ } else {
+ int status;
+
+ status = ice_rem_rss_cfg(hw, vsi->idx, &cfg);
+ /* We just ignore -ENOENT, because if two configurations
+ * share the same profile remove one of them actually
+ * removes both, since the profile is deleted.
+ */
+ if (status && status != -ENOENT) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ dev_err(dev, "ice_rem_rss_cfg failed for VF ID:%d, error:%d\n",
+ vf->vf_id, status);
+ }
+ }
+ }
+
+error_param:
+ return ice_vc_send_msg_to_vf(vf, v_opcode, v_ret, NULL, 0);
+}
+
+/**
+ * ice_vc_config_rss_key
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * Configure the VF's RSS key
+ */
+int ice_vc_config_rss_key(struct ice_vf *vf, u8 *msg)
+{
+ enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+ struct virtchnl_rss_key *vrk =
+ (struct virtchnl_rss_key *)msg;
+ struct ice_vsi *vsi;
+
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (!ice_vc_isvalid_vsi_id(vf, vrk->vsi_id)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (vrk->key_len != ICE_VSIQF_HKEY_ARRAY_SIZE) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (ice_set_rss_key(vsi, vrk->key))
+ v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR;
+error_param:
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_RSS_KEY, v_ret,
+ NULL, 0);
+}
+
+/**
+ * ice_vc_config_rss_lut
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * Configure the VF's RSS LUT
+ */
+int ice_vc_config_rss_lut(struct ice_vf *vf, u8 *msg)
+{
+ struct virtchnl_rss_lut *vrl = (struct virtchnl_rss_lut *)msg;
+ enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+ struct ice_vsi *vsi;
+
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (!ice_vc_isvalid_vsi_id(vf, vrl->vsi_id)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (vrl->lut_entries != ICE_LUT_VSI_SIZE) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (ice_set_rss_lut(vsi, vrl->lut, ICE_LUT_VSI_SIZE))
+ v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR;
+error_param:
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_RSS_LUT, v_ret,
+ NULL, 0);
+}
+
+/**
+ * ice_vc_config_rss_hfunc
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * Configure the VF's RSS Hash function
+ */
+int ice_vc_config_rss_hfunc(struct ice_vf *vf, u8 *msg)
+{
+ struct virtchnl_rss_hfunc *vrh = (struct virtchnl_rss_hfunc *)msg;
+ enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+ u8 hfunc = ICE_AQ_VSI_Q_OPT_RSS_HASH_TPLZ;
+ struct ice_vsi *vsi;
+
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (!ice_vc_isvalid_vsi_id(vf, vrh->vsi_id)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (vrh->rss_algorithm == VIRTCHNL_RSS_ALG_TOEPLITZ_SYMMETRIC)
+ hfunc = ICE_AQ_VSI_Q_OPT_RSS_HASH_SYM_TPLZ;
+
+ if (ice_set_rss_hfunc(vsi, hfunc))
+ v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR;
+error_param:
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_RSS_HFUNC, v_ret,
+ NULL, 0);
+}
+
+/**
+ * ice_vc_get_rss_hashcfg - return the RSS Hash configuration
+ * @vf: pointer to the VF info
+ */
+int ice_vc_get_rss_hashcfg(struct ice_vf *vf)
+{
+ enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+ struct virtchnl_rss_hashcfg *vrh = NULL;
+ int len = 0, ret;
+
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto err;
+ }
+
+ if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) {
+ dev_err(ice_pf_to_dev(vf->pf), "RSS not supported by PF\n");
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto err;
+ }
+
+ len = sizeof(struct virtchnl_rss_hashcfg);
+ vrh = kzalloc(len, GFP_KERNEL);
+ if (!vrh) {
+ v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY;
+ len = 0;
+ goto err;
+ }
+
+ vrh->hashcfg = ICE_DEFAULT_RSS_HASHCFG;
+err:
+ /* send the response back to the VF */
+ ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_GET_RSS_HASHCFG_CAPS, v_ret,
+ (u8 *)vrh, len);
+ kfree(vrh);
+ return ret;
+}
+
+/**
+ * ice_vc_set_rss_hashcfg - set RSS Hash configuration bits for the VF
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ */
+int ice_vc_set_rss_hashcfg(struct ice_vf *vf, u8 *msg)
+{
+ struct virtchnl_rss_hashcfg *vrh = (struct virtchnl_rss_hashcfg *)msg;
+ enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+ struct ice_pf *pf = vf->pf;
+ struct ice_vsi *vsi;
+ struct device *dev;
+ int status;
+
+ dev = ice_pf_to_dev(pf);
+
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto err;
+ }
+
+ if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
+ dev_err(dev, "RSS not supported by PF\n");
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto err;
+ }
+
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto err;
+ }
+
+ /* clear all previously programmed RSS configuration to allow VF drivers
+ * the ability to customize the RSS configuration and/or completely
+ * disable RSS
+ */
+ status = ice_rem_vsi_rss_cfg(&pf->hw, vsi->idx);
+ if (status && !vrh->hashcfg) {
+ /* only report failure to clear the current RSS configuration if
+ * that was clearly the VF's intention (i.e. vrh->hashcfg = 0)
+ */
+ v_ret = ice_err_to_virt_err(status);
+ goto err;
+ } else if (status) {
+ /* allow the VF to update the RSS configuration even on failure
+ * to clear the current RSS confguration in an attempt to keep
+ * RSS in a working state
+ */
+ dev_warn(dev, "Failed to clear the RSS configuration for VF %u\n",
+ vf->vf_id);
+ }
+
+ if (vrh->hashcfg) {
+ status = ice_add_avf_rss_cfg(&pf->hw, vsi, vrh->hashcfg);
+ v_ret = ice_err_to_virt_err(status);
+ }
+
+ /* save the requested VF configuration */
+ if (!v_ret)
+ vf->rss_hashcfg = vrh->hashcfg;
+
+ /* send the response to the VF */
+err:
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_SET_RSS_HASHCFG, v_ret,
+ NULL, 0);
+}
+
diff --git a/drivers/net/ethernet/intel/ice/virt/rss.h b/drivers/net/ethernet/intel/ice/virt/rss.h
new file mode 100644
index 000000000000..784d4c43ce8b
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/virt/rss.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2022, Intel Corporation. */
+
+#ifndef _ICE_VIRT_RSS_H_
+#define _ICE_VIRT_RSS_H_
+
+#include <linux/types.h>
+
+struct ice_vf;
+
+int ice_vc_handle_rss_cfg(struct ice_vf *vf, u8 *msg, bool add);
+int ice_vc_config_rss_key(struct ice_vf *vf, u8 *msg);
+int ice_vc_config_rss_lut(struct ice_vf *vf, u8 *msg);
+int ice_vc_config_rss_hfunc(struct ice_vf *vf, u8 *msg);
+int ice_vc_get_rss_hashcfg(struct ice_vf *vf);
+int ice_vc_set_rss_hashcfg(struct ice_vf *vf, u8 *msg);
+
+#endif /* _ICE_VIRT_RSS_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/virt/virtchnl.c
index 257967273079..f3f921134379 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c
+++ b/drivers/net/ethernet/intel/ice/virt/virtchnl.c
@@ -1,170 +1,20 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2022, Intel Corporation. */
-#include "ice_virtchnl.h"
+#include "virtchnl.h"
+#include "queues.h"
+#include "rss.h"
#include "ice_vf_lib_private.h"
#include "ice.h"
#include "ice_base.h"
#include "ice_lib.h"
#include "ice_fltr.h"
-#include "ice_virtchnl_allowlist.h"
+#include "allowlist.h"
#include "ice_vf_vsi_vlan_ops.h"
#include "ice_vlan.h"
#include "ice_flex_pipe.h"
#include "ice_dcb_lib.h"
-#define FIELD_SELECTOR(proto_hdr_field) \
- BIT((proto_hdr_field) & PROTO_HDR_FIELD_MASK)
-
-struct ice_vc_hdr_match_type {
- u32 vc_hdr; /* virtchnl headers (VIRTCHNL_PROTO_HDR_XXX) */
- u32 ice_hdr; /* ice headers (ICE_FLOW_SEG_HDR_XXX) */
-};
-
-static const struct ice_vc_hdr_match_type ice_vc_hdr_list[] = {
- {VIRTCHNL_PROTO_HDR_NONE, ICE_FLOW_SEG_HDR_NONE},
- {VIRTCHNL_PROTO_HDR_ETH, ICE_FLOW_SEG_HDR_ETH},
- {VIRTCHNL_PROTO_HDR_S_VLAN, ICE_FLOW_SEG_HDR_VLAN},
- {VIRTCHNL_PROTO_HDR_C_VLAN, ICE_FLOW_SEG_HDR_VLAN},
- {VIRTCHNL_PROTO_HDR_IPV4, ICE_FLOW_SEG_HDR_IPV4 |
- ICE_FLOW_SEG_HDR_IPV_OTHER},
- {VIRTCHNL_PROTO_HDR_IPV6, ICE_FLOW_SEG_HDR_IPV6 |
- ICE_FLOW_SEG_HDR_IPV_OTHER},
- {VIRTCHNL_PROTO_HDR_TCP, ICE_FLOW_SEG_HDR_TCP},
- {VIRTCHNL_PROTO_HDR_UDP, ICE_FLOW_SEG_HDR_UDP},
- {VIRTCHNL_PROTO_HDR_SCTP, ICE_FLOW_SEG_HDR_SCTP},
- {VIRTCHNL_PROTO_HDR_PPPOE, ICE_FLOW_SEG_HDR_PPPOE},
- {VIRTCHNL_PROTO_HDR_GTPU_IP, ICE_FLOW_SEG_HDR_GTPU_IP},
- {VIRTCHNL_PROTO_HDR_GTPU_EH, ICE_FLOW_SEG_HDR_GTPU_EH},
- {VIRTCHNL_PROTO_HDR_GTPU_EH_PDU_DWN,
- ICE_FLOW_SEG_HDR_GTPU_DWN},
- {VIRTCHNL_PROTO_HDR_GTPU_EH_PDU_UP,
- ICE_FLOW_SEG_HDR_GTPU_UP},
- {VIRTCHNL_PROTO_HDR_L2TPV3, ICE_FLOW_SEG_HDR_L2TPV3},
- {VIRTCHNL_PROTO_HDR_ESP, ICE_FLOW_SEG_HDR_ESP},
- {VIRTCHNL_PROTO_HDR_AH, ICE_FLOW_SEG_HDR_AH},
- {VIRTCHNL_PROTO_HDR_PFCP, ICE_FLOW_SEG_HDR_PFCP_SESSION},
-};
-
-struct ice_vc_hash_field_match_type {
- u32 vc_hdr; /* virtchnl headers
- * (VIRTCHNL_PROTO_HDR_XXX)
- */
- u32 vc_hash_field; /* virtchnl hash fields selector
- * FIELD_SELECTOR((VIRTCHNL_PROTO_HDR_ETH_XXX))
- */
- u64 ice_hash_field; /* ice hash fields
- * (BIT_ULL(ICE_FLOW_FIELD_IDX_XXX))
- */
-};
-
-static const struct
-ice_vc_hash_field_match_type ice_vc_hash_field_list[] = {
- {VIRTCHNL_PROTO_HDR_ETH, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_SRC),
- BIT_ULL(ICE_FLOW_FIELD_IDX_ETH_SA)},
- {VIRTCHNL_PROTO_HDR_ETH, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_DST),
- BIT_ULL(ICE_FLOW_FIELD_IDX_ETH_DA)},
- {VIRTCHNL_PROTO_HDR_ETH, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_SRC) |
- FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_DST),
- ICE_FLOW_HASH_ETH},
- {VIRTCHNL_PROTO_HDR_ETH,
- FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_ETHERTYPE),
- BIT_ULL(ICE_FLOW_FIELD_IDX_ETH_TYPE)},
- {VIRTCHNL_PROTO_HDR_S_VLAN,
- FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_S_VLAN_ID),
- BIT_ULL(ICE_FLOW_FIELD_IDX_S_VLAN)},
- {VIRTCHNL_PROTO_HDR_C_VLAN,
- FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_C_VLAN_ID),
- BIT_ULL(ICE_FLOW_FIELD_IDX_C_VLAN)},
- {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC),
- BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA)},
- {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST),
- BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA)},
- {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) |
- FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST),
- ICE_FLOW_HASH_IPV4},
- {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) |
- FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT),
- BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA) |
- BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)},
- {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST) |
- FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT),
- BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA) |
- BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)},
- {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) |
- FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST) |
- FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT),
- ICE_FLOW_HASH_IPV4 | BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)},
- {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT),
- BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)},
- {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC),
- BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_SA)},
- {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST),
- BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_DA)},
- {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC) |
- FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST),
- ICE_FLOW_HASH_IPV6},
- {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC) |
- FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT),
- BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_SA) |
- BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)},
- {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST) |
- FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT),
- BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_DA) |
- BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)},
- {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC) |
- FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST) |
- FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT),
- ICE_FLOW_HASH_IPV6 | BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)},
- {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT),
- BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)},
- {VIRTCHNL_PROTO_HDR_TCP,
- FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_SRC_PORT),
- BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_SRC_PORT)},
- {VIRTCHNL_PROTO_HDR_TCP,
- FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_DST_PORT),
- BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_DST_PORT)},
- {VIRTCHNL_PROTO_HDR_TCP,
- FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_SRC_PORT) |
- FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_DST_PORT),
- ICE_FLOW_HASH_TCP_PORT},
- {VIRTCHNL_PROTO_HDR_UDP,
- FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_SRC_PORT),
- BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_SRC_PORT)},
- {VIRTCHNL_PROTO_HDR_UDP,
- FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_DST_PORT),
- BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_DST_PORT)},
- {VIRTCHNL_PROTO_HDR_UDP,
- FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_SRC_PORT) |
- FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_DST_PORT),
- ICE_FLOW_HASH_UDP_PORT},
- {VIRTCHNL_PROTO_HDR_SCTP,
- FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT),
- BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT)},
- {VIRTCHNL_PROTO_HDR_SCTP,
- FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_DST_PORT),
- BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_DST_PORT)},
- {VIRTCHNL_PROTO_HDR_SCTP,
- FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT) |
- FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_DST_PORT),
- ICE_FLOW_HASH_SCTP_PORT},
- {VIRTCHNL_PROTO_HDR_PPPOE,
- FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_PPPOE_SESS_ID),
- BIT_ULL(ICE_FLOW_FIELD_IDX_PPPOE_SESS_ID)},
- {VIRTCHNL_PROTO_HDR_GTPU_IP,
- FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_GTPU_IP_TEID),
- BIT_ULL(ICE_FLOW_FIELD_IDX_GTPU_IP_TEID)},
- {VIRTCHNL_PROTO_HDR_L2TPV3,
- FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_L2TPV3_SESS_ID),
- BIT_ULL(ICE_FLOW_FIELD_IDX_L2TPV3_SESS_ID)},
- {VIRTCHNL_PROTO_HDR_ESP, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ESP_SPI),
- BIT_ULL(ICE_FLOW_FIELD_IDX_ESP_SPI)},
- {VIRTCHNL_PROTO_HDR_AH, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_AH_SPI),
- BIT_ULL(ICE_FLOW_FIELD_IDX_AH_SPI)},
- {VIRTCHNL_PROTO_HDR_PFCP, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_PFCP_SEID),
- BIT_ULL(ICE_FLOW_FIELD_IDX_PFCP_SEID)},
-};
-
/**
* ice_vc_vf_broadcast - Broadcast a message to all VFs on PF
* @pf: pointer to the PF structure
@@ -338,28 +188,6 @@ static int ice_vc_get_ver_msg(struct ice_vf *vf, u8 *msg)
}
/**
- * ice_vc_get_max_frame_size - get max frame size allowed for VF
- * @vf: VF used to determine max frame size
- *
- * Max frame size is determined based on the current port's max frame size and
- * whether a port VLAN is configured on this VF. The VF is not aware whether
- * it's in a port VLAN so the PF needs to account for this in max frame size
- * checks and sending the max frame size to the VF.
- */
-static u16 ice_vc_get_max_frame_size(struct ice_vf *vf)
-{
- struct ice_port_info *pi = ice_vf_get_port_info(vf);
- u16 max_frame_size;
-
- max_frame_size = pi->phy.link_info.max_frame_size;
-
- if (ice_vf_is_port_vlan_ena(vf))
- max_frame_size -= VLAN_HLEN;
-
- return max_frame_size;
-}
-
-/**
* ice_vc_get_vlan_caps
* @hw: pointer to the hw
* @vf: pointer to the VF info
@@ -559,488 +387,6 @@ bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id)
}
/**
- * ice_vc_isvalid_q_id
- * @vsi: VSI to check queue ID against
- * @qid: VSI relative queue ID
- *
- * check for the valid queue ID
- */
-static bool ice_vc_isvalid_q_id(struct ice_vsi *vsi, u16 qid)
-{
- /* allocated Tx and Rx queues should be always equal for VF VSI */
- return qid < vsi->alloc_txq;
-}
-
-/**
- * ice_vc_isvalid_ring_len
- * @ring_len: length of ring
- *
- * check for the valid ring count, should be multiple of ICE_REQ_DESC_MULTIPLE
- * or zero
- */
-static bool ice_vc_isvalid_ring_len(u16 ring_len)
-{
- return ring_len == 0 ||
- (ring_len >= ICE_MIN_NUM_DESC &&
- ring_len <= ICE_MAX_NUM_DESC &&
- !(ring_len % ICE_REQ_DESC_MULTIPLE));
-}
-
-/**
- * ice_vc_validate_pattern
- * @vf: pointer to the VF info
- * @proto: virtchnl protocol headers
- *
- * validate the pattern is supported or not.
- *
- * Return: true on success, false on error.
- */
-bool
-ice_vc_validate_pattern(struct ice_vf *vf, struct virtchnl_proto_hdrs *proto)
-{
- bool is_ipv4 = false;
- bool is_ipv6 = false;
- bool is_udp = false;
- u16 ptype = -1;
- int i = 0;
-
- while (i < proto->count &&
- proto->proto_hdr[i].type != VIRTCHNL_PROTO_HDR_NONE) {
- switch (proto->proto_hdr[i].type) {
- case VIRTCHNL_PROTO_HDR_ETH:
- ptype = ICE_PTYPE_MAC_PAY;
- break;
- case VIRTCHNL_PROTO_HDR_IPV4:
- ptype = ICE_PTYPE_IPV4_PAY;
- is_ipv4 = true;
- break;
- case VIRTCHNL_PROTO_HDR_IPV6:
- ptype = ICE_PTYPE_IPV6_PAY;
- is_ipv6 = true;
- break;
- case VIRTCHNL_PROTO_HDR_UDP:
- if (is_ipv4)
- ptype = ICE_PTYPE_IPV4_UDP_PAY;
- else if (is_ipv6)
- ptype = ICE_PTYPE_IPV6_UDP_PAY;
- is_udp = true;
- break;
- case VIRTCHNL_PROTO_HDR_TCP:
- if (is_ipv4)
- ptype = ICE_PTYPE_IPV4_TCP_PAY;
- else if (is_ipv6)
- ptype = ICE_PTYPE_IPV6_TCP_PAY;
- break;
- case VIRTCHNL_PROTO_HDR_SCTP:
- if (is_ipv4)
- ptype = ICE_PTYPE_IPV4_SCTP_PAY;
- else if (is_ipv6)
- ptype = ICE_PTYPE_IPV6_SCTP_PAY;
- break;
- case VIRTCHNL_PROTO_HDR_GTPU_IP:
- case VIRTCHNL_PROTO_HDR_GTPU_EH:
- if (is_ipv4)
- ptype = ICE_MAC_IPV4_GTPU;
- else if (is_ipv6)
- ptype = ICE_MAC_IPV6_GTPU;
- goto out;
- case VIRTCHNL_PROTO_HDR_L2TPV3:
- if (is_ipv4)
- ptype = ICE_MAC_IPV4_L2TPV3;
- else if (is_ipv6)
- ptype = ICE_MAC_IPV6_L2TPV3;
- goto out;
- case VIRTCHNL_PROTO_HDR_ESP:
- if (is_ipv4)
- ptype = is_udp ? ICE_MAC_IPV4_NAT_T_ESP :
- ICE_MAC_IPV4_ESP;
- else if (is_ipv6)
- ptype = is_udp ? ICE_MAC_IPV6_NAT_T_ESP :
- ICE_MAC_IPV6_ESP;
- goto out;
- case VIRTCHNL_PROTO_HDR_AH:
- if (is_ipv4)
- ptype = ICE_MAC_IPV4_AH;
- else if (is_ipv6)
- ptype = ICE_MAC_IPV6_AH;
- goto out;
- case VIRTCHNL_PROTO_HDR_PFCP:
- if (is_ipv4)
- ptype = ICE_MAC_IPV4_PFCP_SESSION;
- else if (is_ipv6)
- ptype = ICE_MAC_IPV6_PFCP_SESSION;
- goto out;
- default:
- break;
- }
- i++;
- }
-
-out:
- return ice_hw_ptype_ena(&vf->pf->hw, ptype);
-}
-
-/**
- * ice_vc_parse_rss_cfg - parses hash fields and headers from
- * a specific virtchnl RSS cfg
- * @hw: pointer to the hardware
- * @rss_cfg: pointer to the virtchnl RSS cfg
- * @hash_cfg: pointer to the HW hash configuration
- *
- * Return true if all the protocol header and hash fields in the RSS cfg could
- * be parsed, else return false
- *
- * This function parses the virtchnl RSS cfg to be the intended
- * hash fields and the intended header for RSS configuration
- */
-static bool ice_vc_parse_rss_cfg(struct ice_hw *hw,
- struct virtchnl_rss_cfg *rss_cfg,
- struct ice_rss_hash_cfg *hash_cfg)
-{
- const struct ice_vc_hash_field_match_type *hf_list;
- const struct ice_vc_hdr_match_type *hdr_list;
- int i, hf_list_len, hdr_list_len;
- u32 *addl_hdrs = &hash_cfg->addl_hdrs;
- u64 *hash_flds = &hash_cfg->hash_flds;
-
- /* set outer layer RSS as default */
- hash_cfg->hdr_type = ICE_RSS_OUTER_HEADERS;
-
- if (rss_cfg->rss_algorithm == VIRTCHNL_RSS_ALG_TOEPLITZ_SYMMETRIC)
- hash_cfg->symm = true;
- else
- hash_cfg->symm = false;
-
- hf_list = ice_vc_hash_field_list;
- hf_list_len = ARRAY_SIZE(ice_vc_hash_field_list);
- hdr_list = ice_vc_hdr_list;
- hdr_list_len = ARRAY_SIZE(ice_vc_hdr_list);
-
- for (i = 0; i < rss_cfg->proto_hdrs.count; i++) {
- struct virtchnl_proto_hdr *proto_hdr =
- &rss_cfg->proto_hdrs.proto_hdr[i];
- bool hdr_found = false;
- int j;
-
- /* Find matched ice headers according to virtchnl headers. */
- for (j = 0; j < hdr_list_len; j++) {
- struct ice_vc_hdr_match_type hdr_map = hdr_list[j];
-
- if (proto_hdr->type == hdr_map.vc_hdr) {
- *addl_hdrs |= hdr_map.ice_hdr;
- hdr_found = true;
- }
- }
-
- if (!hdr_found)
- return false;
-
- /* Find matched ice hash fields according to
- * virtchnl hash fields.
- */
- for (j = 0; j < hf_list_len; j++) {
- struct ice_vc_hash_field_match_type hf_map = hf_list[j];
-
- if (proto_hdr->type == hf_map.vc_hdr &&
- proto_hdr->field_selector == hf_map.vc_hash_field) {
- *hash_flds |= hf_map.ice_hash_field;
- break;
- }
- }
- }
-
- return true;
-}
-
-/**
- * ice_vf_adv_rss_offload_ena - determine if capabilities support advanced
- * RSS offloads
- * @caps: VF driver negotiated capabilities
- *
- * Return true if VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF capability is set,
- * else return false
- */
-static bool ice_vf_adv_rss_offload_ena(u32 caps)
-{
- return !!(caps & VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF);
-}
-
-/**
- * ice_vc_handle_rss_cfg
- * @vf: pointer to the VF info
- * @msg: pointer to the message buffer
- * @add: add a RSS config if true, otherwise delete a RSS config
- *
- * This function adds/deletes a RSS config
- */
-static int ice_vc_handle_rss_cfg(struct ice_vf *vf, u8 *msg, bool add)
-{
- u32 v_opcode = add ? VIRTCHNL_OP_ADD_RSS_CFG : VIRTCHNL_OP_DEL_RSS_CFG;
- struct virtchnl_rss_cfg *rss_cfg = (struct virtchnl_rss_cfg *)msg;
- enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
- struct device *dev = ice_pf_to_dev(vf->pf);
- struct ice_hw *hw = &vf->pf->hw;
- struct ice_vsi *vsi;
-
- if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) {
- dev_dbg(dev, "VF %d attempting to configure RSS, but RSS is not supported by the PF\n",
- vf->vf_id);
- v_ret = VIRTCHNL_STATUS_ERR_NOT_SUPPORTED;
- goto error_param;
- }
-
- if (!ice_vf_adv_rss_offload_ena(vf->driver_caps)) {
- dev_dbg(dev, "VF %d attempting to configure RSS, but Advanced RSS offload is not supported\n",
- vf->vf_id);
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto error_param;
- }
-
- if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto error_param;
- }
-
- if (rss_cfg->proto_hdrs.count > VIRTCHNL_MAX_NUM_PROTO_HDRS ||
- rss_cfg->rss_algorithm < VIRTCHNL_RSS_ALG_TOEPLITZ_ASYMMETRIC ||
- rss_cfg->rss_algorithm > VIRTCHNL_RSS_ALG_XOR_SYMMETRIC) {
- dev_dbg(dev, "VF %d attempting to configure RSS, but RSS configuration is not valid\n",
- vf->vf_id);
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto error_param;
- }
-
- vsi = ice_get_vf_vsi(vf);
- if (!vsi) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto error_param;
- }
-
- if (!ice_vc_validate_pattern(vf, &rss_cfg->proto_hdrs)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto error_param;
- }
-
- if (rss_cfg->rss_algorithm == VIRTCHNL_RSS_ALG_R_ASYMMETRIC) {
- struct ice_vsi_ctx *ctx;
- u8 lut_type, hash_type;
- int status;
-
- lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI;
- hash_type = add ? ICE_AQ_VSI_Q_OPT_RSS_HASH_XOR :
- ICE_AQ_VSI_Q_OPT_RSS_HASH_TPLZ;
-
- ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
- if (!ctx) {
- v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY;
- goto error_param;
- }
-
- ctx->info.q_opt_rss =
- FIELD_PREP(ICE_AQ_VSI_Q_OPT_RSS_LUT_M, lut_type) |
- FIELD_PREP(ICE_AQ_VSI_Q_OPT_RSS_HASH_M, hash_type);
-
- /* Preserve existing queueing option setting */
- ctx->info.q_opt_rss |= (vsi->info.q_opt_rss &
- ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_M);
- ctx->info.q_opt_tc = vsi->info.q_opt_tc;
- ctx->info.q_opt_flags = vsi->info.q_opt_rss;
-
- ctx->info.valid_sections =
- cpu_to_le16(ICE_AQ_VSI_PROP_Q_OPT_VALID);
-
- status = ice_update_vsi(hw, vsi->idx, ctx, NULL);
- if (status) {
- dev_err(dev, "update VSI for RSS failed, err %d aq_err %s\n",
- status, libie_aq_str(hw->adminq.sq_last_status));
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- } else {
- vsi->info.q_opt_rss = ctx->info.q_opt_rss;
- }
-
- kfree(ctx);
- } else {
- struct ice_rss_hash_cfg cfg;
-
- /* Only check for none raw pattern case */
- if (!ice_vc_validate_pattern(vf, &rss_cfg->proto_hdrs)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto error_param;
- }
- cfg.addl_hdrs = ICE_FLOW_SEG_HDR_NONE;
- cfg.hash_flds = ICE_HASH_INVALID;
- cfg.hdr_type = ICE_RSS_ANY_HEADERS;
-
- if (!ice_vc_parse_rss_cfg(hw, rss_cfg, &cfg)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto error_param;
- }
-
- if (add) {
- if (ice_add_rss_cfg(hw, vsi, &cfg)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- dev_err(dev, "ice_add_rss_cfg failed for vsi = %d, v_ret = %d\n",
- vsi->vsi_num, v_ret);
- }
- } else {
- int status;
-
- status = ice_rem_rss_cfg(hw, vsi->idx, &cfg);
- /* We just ignore -ENOENT, because if two configurations
- * share the same profile remove one of them actually
- * removes both, since the profile is deleted.
- */
- if (status && status != -ENOENT) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- dev_err(dev, "ice_rem_rss_cfg failed for VF ID:%d, error:%d\n",
- vf->vf_id, status);
- }
- }
- }
-
-error_param:
- return ice_vc_send_msg_to_vf(vf, v_opcode, v_ret, NULL, 0);
-}
-
-/**
- * ice_vc_config_rss_key
- * @vf: pointer to the VF info
- * @msg: pointer to the msg buffer
- *
- * Configure the VF's RSS key
- */
-static int ice_vc_config_rss_key(struct ice_vf *vf, u8 *msg)
-{
- enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
- struct virtchnl_rss_key *vrk =
- (struct virtchnl_rss_key *)msg;
- struct ice_vsi *vsi;
-
- if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto error_param;
- }
-
- if (!ice_vc_isvalid_vsi_id(vf, vrk->vsi_id)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto error_param;
- }
-
- if (vrk->key_len != ICE_VSIQF_HKEY_ARRAY_SIZE) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto error_param;
- }
-
- if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto error_param;
- }
-
- vsi = ice_get_vf_vsi(vf);
- if (!vsi) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto error_param;
- }
-
- if (ice_set_rss_key(vsi, vrk->key))
- v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR;
-error_param:
- return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_RSS_KEY, v_ret,
- NULL, 0);
-}
-
-/**
- * ice_vc_config_rss_lut
- * @vf: pointer to the VF info
- * @msg: pointer to the msg buffer
- *
- * Configure the VF's RSS LUT
- */
-static int ice_vc_config_rss_lut(struct ice_vf *vf, u8 *msg)
-{
- struct virtchnl_rss_lut *vrl = (struct virtchnl_rss_lut *)msg;
- enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
- struct ice_vsi *vsi;
-
- if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto error_param;
- }
-
- if (!ice_vc_isvalid_vsi_id(vf, vrl->vsi_id)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto error_param;
- }
-
- if (vrl->lut_entries != ICE_LUT_VSI_SIZE) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto error_param;
- }
-
- if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto error_param;
- }
-
- vsi = ice_get_vf_vsi(vf);
- if (!vsi) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto error_param;
- }
-
- if (ice_set_rss_lut(vsi, vrl->lut, ICE_LUT_VSI_SIZE))
- v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR;
-error_param:
- return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_RSS_LUT, v_ret,
- NULL, 0);
-}
-
-/**
- * ice_vc_config_rss_hfunc
- * @vf: pointer to the VF info
- * @msg: pointer to the msg buffer
- *
- * Configure the VF's RSS Hash function
- */
-static int ice_vc_config_rss_hfunc(struct ice_vf *vf, u8 *msg)
-{
- struct virtchnl_rss_hfunc *vrh = (struct virtchnl_rss_hfunc *)msg;
- enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
- u8 hfunc = ICE_AQ_VSI_Q_OPT_RSS_HASH_TPLZ;
- struct ice_vsi *vsi;
-
- if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto error_param;
- }
-
- if (!ice_vc_isvalid_vsi_id(vf, vrh->vsi_id)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto error_param;
- }
-
- if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto error_param;
- }
-
- vsi = ice_get_vf_vsi(vf);
- if (!vsi) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto error_param;
- }
-
- if (vrh->rss_algorithm == VIRTCHNL_RSS_ALG_TOEPLITZ_SYMMETRIC)
- hfunc = ICE_AQ_VSI_Q_OPT_RSS_HASH_SYM_TPLZ;
-
- if (ice_set_rss_hfunc(vsi, hfunc))
- v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR;
-error_param:
- return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_RSS_HFUNC, v_ret,
- NULL, 0);
-}
-
-/**
* ice_vc_get_qos_caps - Get current QoS caps from PF
* @vf: pointer to the VF info
*
@@ -1122,110 +468,6 @@ err:
}
/**
- * ice_vf_cfg_qs_bw - Configure per queue bandwidth
- * @vf: pointer to the VF info
- * @num_queues: number of queues to be configured
- *
- * Configure per queue bandwidth.
- *
- * Return: 0 on success or negative error value.
- */
-static int ice_vf_cfg_qs_bw(struct ice_vf *vf, u16 num_queues)
-{
- struct ice_hw *hw = &vf->pf->hw;
- struct ice_vsi *vsi;
- int ret;
- u16 i;
-
- vsi = ice_get_vf_vsi(vf);
- if (!vsi)
- return -EINVAL;
-
- for (i = 0; i < num_queues; i++) {
- u32 p_rate, min_rate;
- u8 tc;
-
- p_rate = vf->qs_bw[i].peak;
- min_rate = vf->qs_bw[i].committed;
- tc = vf->qs_bw[i].tc;
- if (p_rate)
- ret = ice_cfg_q_bw_lmt(hw->port_info, vsi->idx, tc,
- vf->qs_bw[i].queue_id,
- ICE_MAX_BW, p_rate);
- else
- ret = ice_cfg_q_bw_dflt_lmt(hw->port_info, vsi->idx, tc,
- vf->qs_bw[i].queue_id,
- ICE_MAX_BW);
- if (ret)
- return ret;
-
- if (min_rate)
- ret = ice_cfg_q_bw_lmt(hw->port_info, vsi->idx, tc,
- vf->qs_bw[i].queue_id,
- ICE_MIN_BW, min_rate);
- else
- ret = ice_cfg_q_bw_dflt_lmt(hw->port_info, vsi->idx, tc,
- vf->qs_bw[i].queue_id,
- ICE_MIN_BW);
-
- if (ret)
- return ret;
- }
-
- return 0;
-}
-
-/**
- * ice_vf_cfg_q_quanta_profile - Configure quanta profile
- * @vf: pointer to the VF info
- * @quanta_prof_idx: pointer to the quanta profile index
- * @quanta_size: quanta size to be set
- *
- * This function chooses available quanta profile and configures the register.
- * The quanta profile is evenly divided by the number of device ports, and then
- * available to the specific PF and VFs. The first profile for each PF is a
- * reserved default profile. Only quanta size of the rest unused profile can be
- * modified.
- *
- * Return: 0 on success or negative error value.
- */
-static int ice_vf_cfg_q_quanta_profile(struct ice_vf *vf, u16 quanta_size,
- u16 *quanta_prof_idx)
-{
- const u16 n_desc = calc_quanta_desc(quanta_size);
- struct ice_hw *hw = &vf->pf->hw;
- const u16 n_cmd = 2 * n_desc;
- struct ice_pf *pf = vf->pf;
- u16 per_pf, begin_id;
- u8 n_used;
- u32 reg;
-
- begin_id = (GLCOMM_QUANTA_PROF_MAX_INDEX + 1) / hw->dev_caps.num_funcs *
- hw->logical_pf_id;
-
- if (quanta_size == ICE_DFLT_QUANTA) {
- *quanta_prof_idx = begin_id;
- } else {
- per_pf = (GLCOMM_QUANTA_PROF_MAX_INDEX + 1) /
- hw->dev_caps.num_funcs;
- n_used = pf->num_quanta_prof_used;
- if (n_used < per_pf) {
- *quanta_prof_idx = begin_id + 1 + n_used;
- pf->num_quanta_prof_used++;
- } else {
- return -EINVAL;
- }
- }
-
- reg = FIELD_PREP(GLCOMM_QUANTA_PROF_QUANTA_SIZE_M, quanta_size) |
- FIELD_PREP(GLCOMM_QUANTA_PROF_MAX_CMD_M, n_cmd) |
- FIELD_PREP(GLCOMM_QUANTA_PROF_MAX_DESC_M, n_desc);
- wr32(hw, GLCOMM_QUANTA_PROF(*quanta_prof_idx), reg);
-
- return 0;
-}
-
-/**
* ice_vc_cfg_promiscuous_mode_msg
* @vf: pointer to the VF info
* @msg: pointer to the msg buffer
@@ -1407,757 +649,6 @@ error_param:
}
/**
- * ice_vc_validate_vqs_bitmaps - validate Rx/Tx queue bitmaps from VIRTCHNL
- * @vqs: virtchnl_queue_select structure containing bitmaps to validate
- *
- * Return true on successful validation, else false
- */
-static bool ice_vc_validate_vqs_bitmaps(struct virtchnl_queue_select *vqs)
-{
- if ((!vqs->rx_queues && !vqs->tx_queues) ||
- vqs->rx_queues >= BIT(ICE_MAX_RSS_QS_PER_VF) ||
- vqs->tx_queues >= BIT(ICE_MAX_RSS_QS_PER_VF))
- return false;
-
- return true;
-}
-
-/**
- * ice_vf_ena_txq_interrupt - enable Tx queue interrupt via QINT_TQCTL
- * @vsi: VSI of the VF to configure
- * @q_idx: VF queue index used to determine the queue in the PF's space
- */
-void ice_vf_ena_txq_interrupt(struct ice_vsi *vsi, u32 q_idx)
-{
- struct ice_hw *hw = &vsi->back->hw;
- u32 pfq = vsi->txq_map[q_idx];
- u32 reg;
-
- reg = rd32(hw, QINT_TQCTL(pfq));
-
- /* MSI-X index 0 in the VF's space is always for the OICR, which means
- * this is most likely a poll mode VF driver, so don't enable an
- * interrupt that was never configured via VIRTCHNL_OP_CONFIG_IRQ_MAP
- */
- if (!(reg & QINT_TQCTL_MSIX_INDX_M))
- return;
-
- wr32(hw, QINT_TQCTL(pfq), reg | QINT_TQCTL_CAUSE_ENA_M);
-}
-
-/**
- * ice_vf_ena_rxq_interrupt - enable Tx queue interrupt via QINT_RQCTL
- * @vsi: VSI of the VF to configure
- * @q_idx: VF queue index used to determine the queue in the PF's space
- */
-void ice_vf_ena_rxq_interrupt(struct ice_vsi *vsi, u32 q_idx)
-{
- struct ice_hw *hw = &vsi->back->hw;
- u32 pfq = vsi->rxq_map[q_idx];
- u32 reg;
-
- reg = rd32(hw, QINT_RQCTL(pfq));
-
- /* MSI-X index 0 in the VF's space is always for the OICR, which means
- * this is most likely a poll mode VF driver, so don't enable an
- * interrupt that was never configured via VIRTCHNL_OP_CONFIG_IRQ_MAP
- */
- if (!(reg & QINT_RQCTL_MSIX_INDX_M))
- return;
-
- wr32(hw, QINT_RQCTL(pfq), reg | QINT_RQCTL_CAUSE_ENA_M);
-}
-
-/**
- * ice_vc_ena_qs_msg
- * @vf: pointer to the VF info
- * @msg: pointer to the msg buffer
- *
- * called from the VF to enable all or specific queue(s)
- */
-static int ice_vc_ena_qs_msg(struct ice_vf *vf, u8 *msg)
-{
- enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
- struct virtchnl_queue_select *vqs =
- (struct virtchnl_queue_select *)msg;
- struct ice_vsi *vsi;
- unsigned long q_map;
- u16 vf_q_id;
-
- if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto error_param;
- }
-
- if (!ice_vc_isvalid_vsi_id(vf, vqs->vsi_id)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto error_param;
- }
-
- if (!ice_vc_validate_vqs_bitmaps(vqs)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto error_param;
- }
-
- vsi = ice_get_vf_vsi(vf);
- if (!vsi) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto error_param;
- }
-
- /* Enable only Rx rings, Tx rings were enabled by the FW when the
- * Tx queue group list was configured and the context bits were
- * programmed using ice_vsi_cfg_txqs
- */
- q_map = vqs->rx_queues;
- for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) {
- if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto error_param;
- }
-
- /* Skip queue if enabled */
- if (test_bit(vf_q_id, vf->rxq_ena))
- continue;
-
- if (ice_vsi_ctrl_one_rx_ring(vsi, true, vf_q_id, true)) {
- dev_err(ice_pf_to_dev(vsi->back), "Failed to enable Rx ring %d on VSI %d\n",
- vf_q_id, vsi->vsi_num);
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto error_param;
- }
-
- ice_vf_ena_rxq_interrupt(vsi, vf_q_id);
- set_bit(vf_q_id, vf->rxq_ena);
- }
-
- q_map = vqs->tx_queues;
- for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) {
- if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto error_param;
- }
-
- /* Skip queue if enabled */
- if (test_bit(vf_q_id, vf->txq_ena))
- continue;
-
- ice_vf_ena_txq_interrupt(vsi, vf_q_id);
- set_bit(vf_q_id, vf->txq_ena);
- }
-
- /* Set flag to indicate that queues are enabled */
- if (v_ret == VIRTCHNL_STATUS_SUCCESS)
- set_bit(ICE_VF_STATE_QS_ENA, vf->vf_states);
-
-error_param:
- /* send the response to the VF */
- return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ENABLE_QUEUES, v_ret,
- NULL, 0);
-}
-
-/**
- * ice_vf_vsi_dis_single_txq - disable a single Tx queue
- * @vf: VF to disable queue for
- * @vsi: VSI for the VF
- * @q_id: VF relative (0-based) queue ID
- *
- * Attempt to disable the Tx queue passed in. If the Tx queue was successfully
- * disabled then clear q_id bit in the enabled queues bitmap and return
- * success. Otherwise return error.
- */
-int ice_vf_vsi_dis_single_txq(struct ice_vf *vf, struct ice_vsi *vsi, u16 q_id)
-{
- struct ice_txq_meta txq_meta = { 0 };
- struct ice_tx_ring *ring;
- int err;
-
- if (!test_bit(q_id, vf->txq_ena))
- dev_dbg(ice_pf_to_dev(vsi->back), "Queue %u on VSI %u is not enabled, but stopping it anyway\n",
- q_id, vsi->vsi_num);
-
- ring = vsi->tx_rings[q_id];
- if (!ring)
- return -EINVAL;
-
- ice_fill_txq_meta(vsi, ring, &txq_meta);
-
- err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, vf->vf_id, ring, &txq_meta);
- if (err) {
- dev_err(ice_pf_to_dev(vsi->back), "Failed to stop Tx ring %d on VSI %d\n",
- q_id, vsi->vsi_num);
- return err;
- }
-
- /* Clear enabled queues flag */
- clear_bit(q_id, vf->txq_ena);
-
- return 0;
-}
-
-/**
- * ice_vc_dis_qs_msg
- * @vf: pointer to the VF info
- * @msg: pointer to the msg buffer
- *
- * called from the VF to disable all or specific queue(s)
- */
-static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg)
-{
- enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
- struct virtchnl_queue_select *vqs =
- (struct virtchnl_queue_select *)msg;
- struct ice_vsi *vsi;
- unsigned long q_map;
- u16 vf_q_id;
-
- if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) &&
- !test_bit(ICE_VF_STATE_QS_ENA, vf->vf_states)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto error_param;
- }
-
- if (!ice_vc_isvalid_vsi_id(vf, vqs->vsi_id)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto error_param;
- }
-
- if (!ice_vc_validate_vqs_bitmaps(vqs)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto error_param;
- }
-
- vsi = ice_get_vf_vsi(vf);
- if (!vsi) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto error_param;
- }
-
- if (vqs->tx_queues) {
- q_map = vqs->tx_queues;
-
- for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) {
- if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto error_param;
- }
-
- if (ice_vf_vsi_dis_single_txq(vf, vsi, vf_q_id)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto error_param;
- }
- }
- }
-
- q_map = vqs->rx_queues;
- /* speed up Rx queue disable by batching them if possible */
- if (q_map &&
- bitmap_equal(&q_map, vf->rxq_ena, ICE_MAX_RSS_QS_PER_VF)) {
- if (ice_vsi_stop_all_rx_rings(vsi)) {
- dev_err(ice_pf_to_dev(vsi->back), "Failed to stop all Rx rings on VSI %d\n",
- vsi->vsi_num);
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto error_param;
- }
-
- bitmap_zero(vf->rxq_ena, ICE_MAX_RSS_QS_PER_VF);
- } else if (q_map) {
- for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) {
- if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto error_param;
- }
-
- /* Skip queue if not enabled */
- if (!test_bit(vf_q_id, vf->rxq_ena))
- continue;
-
- if (ice_vsi_ctrl_one_rx_ring(vsi, false, vf_q_id,
- true)) {
- dev_err(ice_pf_to_dev(vsi->back), "Failed to stop Rx ring %d on VSI %d\n",
- vf_q_id, vsi->vsi_num);
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto error_param;
- }
-
- /* Clear enabled queues flag */
- clear_bit(vf_q_id, vf->rxq_ena);
- }
- }
-
- /* Clear enabled queues flag */
- if (v_ret == VIRTCHNL_STATUS_SUCCESS && ice_vf_has_no_qs_ena(vf))
- clear_bit(ICE_VF_STATE_QS_ENA, vf->vf_states);
-
-error_param:
- /* send the response to the VF */
- return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DISABLE_QUEUES, v_ret,
- NULL, 0);
-}
-
-/**
- * ice_cfg_interrupt
- * @vf: pointer to the VF info
- * @vsi: the VSI being configured
- * @map: vector map for mapping vectors to queues
- * @q_vector: structure for interrupt vector
- * configure the IRQ to queue map
- */
-static enum virtchnl_status_code
-ice_cfg_interrupt(struct ice_vf *vf, struct ice_vsi *vsi,
- struct virtchnl_vector_map *map,
- struct ice_q_vector *q_vector)
-{
- u16 vsi_q_id, vsi_q_id_idx;
- unsigned long qmap;
-
- q_vector->num_ring_rx = 0;
- q_vector->num_ring_tx = 0;
-
- qmap = map->rxq_map;
- for_each_set_bit(vsi_q_id_idx, &qmap, ICE_MAX_RSS_QS_PER_VF) {
- vsi_q_id = vsi_q_id_idx;
-
- if (!ice_vc_isvalid_q_id(vsi, vsi_q_id))
- return VIRTCHNL_STATUS_ERR_PARAM;
-
- q_vector->num_ring_rx++;
- q_vector->rx.itr_idx = map->rxitr_idx;
- vsi->rx_rings[vsi_q_id]->q_vector = q_vector;
- ice_cfg_rxq_interrupt(vsi, vsi_q_id,
- q_vector->vf_reg_idx,
- q_vector->rx.itr_idx);
- }
-
- qmap = map->txq_map;
- for_each_set_bit(vsi_q_id_idx, &qmap, ICE_MAX_RSS_QS_PER_VF) {
- vsi_q_id = vsi_q_id_idx;
-
- if (!ice_vc_isvalid_q_id(vsi, vsi_q_id))
- return VIRTCHNL_STATUS_ERR_PARAM;
-
- q_vector->num_ring_tx++;
- q_vector->tx.itr_idx = map->txitr_idx;
- vsi->tx_rings[vsi_q_id]->q_vector = q_vector;
- ice_cfg_txq_interrupt(vsi, vsi_q_id,
- q_vector->vf_reg_idx,
- q_vector->tx.itr_idx);
- }
-
- return VIRTCHNL_STATUS_SUCCESS;
-}
-
-/**
- * ice_vc_cfg_irq_map_msg
- * @vf: pointer to the VF info
- * @msg: pointer to the msg buffer
- *
- * called from the VF to configure the IRQ to queue map
- */
-static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg)
-{
- enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
- u16 num_q_vectors_mapped, vsi_id, vector_id;
- struct virtchnl_irq_map_info *irqmap_info;
- struct virtchnl_vector_map *map;
- struct ice_vsi *vsi;
- int i;
-
- irqmap_info = (struct virtchnl_irq_map_info *)msg;
- num_q_vectors_mapped = irqmap_info->num_vectors;
-
- /* Check to make sure number of VF vectors mapped is not greater than
- * number of VF vectors originally allocated, and check that
- * there is actually at least a single VF queue vector mapped
- */
- if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) ||
- vf->num_msix < num_q_vectors_mapped ||
- !num_q_vectors_mapped) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto error_param;
- }
-
- vsi = ice_get_vf_vsi(vf);
- if (!vsi) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto error_param;
- }
-
- for (i = 0; i < num_q_vectors_mapped; i++) {
- struct ice_q_vector *q_vector;
-
- map = &irqmap_info->vecmap[i];
-
- vector_id = map->vector_id;
- vsi_id = map->vsi_id;
- /* vector_id is always 0-based for each VF, and can never be
- * larger than or equal to the max allowed interrupts per VF
- */
- if (!(vector_id < vf->num_msix) ||
- !ice_vc_isvalid_vsi_id(vf, vsi_id) ||
- (!vector_id && (map->rxq_map || map->txq_map))) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto error_param;
- }
-
- /* No need to map VF miscellaneous or rogue vector */
- if (!vector_id)
- continue;
-
- /* Subtract non queue vector from vector_id passed by VF
- * to get actual number of VSI queue vector array index
- */
- q_vector = vsi->q_vectors[vector_id - ICE_NONQ_VECS_VF];
- if (!q_vector) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto error_param;
- }
-
- /* lookout for the invalid queue index */
- v_ret = ice_cfg_interrupt(vf, vsi, map, q_vector);
- if (v_ret)
- goto error_param;
- }
-
-error_param:
- /* send the response to the VF */
- return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_IRQ_MAP, v_ret,
- NULL, 0);
-}
-
-/**
- * ice_vc_cfg_q_bw - Configure per queue bandwidth
- * @vf: pointer to the VF info
- * @msg: pointer to the msg buffer which holds the command descriptor
- *
- * Configure VF queues bandwidth.
- *
- * Return: 0 on success or negative error value.
- */
-static int ice_vc_cfg_q_bw(struct ice_vf *vf, u8 *msg)
-{
- enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
- struct virtchnl_queues_bw_cfg *qbw =
- (struct virtchnl_queues_bw_cfg *)msg;
- struct ice_vsi *vsi;
- u16 i;
-
- if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) ||
- !ice_vc_isvalid_vsi_id(vf, qbw->vsi_id)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto err;
- }
-
- vsi = ice_get_vf_vsi(vf);
- if (!vsi) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto err;
- }
-
- if (qbw->num_queues > ICE_MAX_RSS_QS_PER_VF ||
- qbw->num_queues > min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)) {
- dev_err(ice_pf_to_dev(vf->pf), "VF-%d trying to configure more than allocated number of queues: %d\n",
- vf->vf_id, min_t(u16, vsi->alloc_txq, vsi->alloc_rxq));
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto err;
- }
-
- for (i = 0; i < qbw->num_queues; i++) {
- if (qbw->cfg[i].shaper.peak != 0 && vf->max_tx_rate != 0 &&
- qbw->cfg[i].shaper.peak > vf->max_tx_rate) {
- dev_warn(ice_pf_to_dev(vf->pf), "The maximum queue %d rate limit configuration may not take effect because the maximum TX rate for VF-%d is %d\n",
- qbw->cfg[i].queue_id, vf->vf_id,
- vf->max_tx_rate);
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto err;
- }
- if (qbw->cfg[i].shaper.committed != 0 && vf->min_tx_rate != 0 &&
- qbw->cfg[i].shaper.committed < vf->min_tx_rate) {
- dev_warn(ice_pf_to_dev(vf->pf), "The minimum queue %d rate limit configuration may not take effect because the minimum TX rate for VF-%d is %d\n",
- qbw->cfg[i].queue_id, vf->vf_id,
- vf->min_tx_rate);
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto err;
- }
- if (qbw->cfg[i].queue_id > vf->num_vf_qs) {
- dev_warn(ice_pf_to_dev(vf->pf), "VF-%d trying to configure invalid queue_id\n",
- vf->vf_id);
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto err;
- }
- if (qbw->cfg[i].tc >= ICE_MAX_TRAFFIC_CLASS) {
- dev_warn(ice_pf_to_dev(vf->pf), "VF-%d trying to configure a traffic class higher than allowed\n",
- vf->vf_id);
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto err;
- }
- }
-
- for (i = 0; i < qbw->num_queues; i++) {
- vf->qs_bw[i].queue_id = qbw->cfg[i].queue_id;
- vf->qs_bw[i].peak = qbw->cfg[i].shaper.peak;
- vf->qs_bw[i].committed = qbw->cfg[i].shaper.committed;
- vf->qs_bw[i].tc = qbw->cfg[i].tc;
- }
-
- if (ice_vf_cfg_qs_bw(vf, qbw->num_queues))
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
-
-err:
- /* send the response to the VF */
- return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_QUEUE_BW,
- v_ret, NULL, 0);
-}
-
-/**
- * ice_vc_cfg_q_quanta - Configure per queue quanta
- * @vf: pointer to the VF info
- * @msg: pointer to the msg buffer which holds the command descriptor
- *
- * Configure VF queues quanta.
- *
- * Return: 0 on success or negative error value.
- */
-static int ice_vc_cfg_q_quanta(struct ice_vf *vf, u8 *msg)
-{
- u16 quanta_prof_id, quanta_size, start_qid, num_queues, end_qid, i;
- enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
- struct virtchnl_quanta_cfg *qquanta =
- (struct virtchnl_quanta_cfg *)msg;
- struct ice_vsi *vsi;
- int ret;
-
- start_qid = qquanta->queue_select.start_queue_id;
- num_queues = qquanta->queue_select.num_queues;
-
- if (check_add_overflow(start_qid, num_queues, &end_qid)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto err;
- }
-
- if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto err;
- }
-
- vsi = ice_get_vf_vsi(vf);
- if (!vsi) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto err;
- }
-
- if (end_qid > ICE_MAX_RSS_QS_PER_VF ||
- end_qid > min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)) {
- dev_err(ice_pf_to_dev(vf->pf), "VF-%d trying to configure more than allocated number of queues: %d\n",
- vf->vf_id, min_t(u16, vsi->alloc_txq, vsi->alloc_rxq));
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto err;
- }
-
- quanta_size = qquanta->quanta_size;
- if (quanta_size > ICE_MAX_QUANTA_SIZE ||
- quanta_size < ICE_MIN_QUANTA_SIZE) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto err;
- }
-
- if (quanta_size % 64) {
- dev_err(ice_pf_to_dev(vf->pf), "quanta size should be the product of 64\n");
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto err;
- }
-
- ret = ice_vf_cfg_q_quanta_profile(vf, quanta_size,
- &quanta_prof_id);
- if (ret) {
- v_ret = VIRTCHNL_STATUS_ERR_NOT_SUPPORTED;
- goto err;
- }
-
- for (i = start_qid; i < end_qid; i++)
- vsi->tx_rings[i]->quanta_prof_id = quanta_prof_id;
-
-err:
- /* send the response to the VF */
- return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_QUANTA,
- v_ret, NULL, 0);
-}
-
-/**
- * ice_vc_cfg_qs_msg
- * @vf: pointer to the VF info
- * @msg: pointer to the msg buffer
- *
- * called from the VF to configure the Rx/Tx queues
- */
-static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
-{
- struct virtchnl_vsi_queue_config_info *qci =
- (struct virtchnl_vsi_queue_config_info *)msg;
- struct virtchnl_queue_pair_info *qpi;
- struct ice_pf *pf = vf->pf;
- struct ice_vsi *vsi;
- int i = -1, q_idx;
- bool ena_ts;
- u8 act_prt;
-
- mutex_lock(&pf->lag_mutex);
- act_prt = ice_lag_prepare_vf_reset(pf->lag);
-
- if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))
- goto error_param;
-
- if (!ice_vc_isvalid_vsi_id(vf, qci->vsi_id))
- goto error_param;
-
- vsi = ice_get_vf_vsi(vf);
- if (!vsi)
- goto error_param;
-
- if (qci->num_queue_pairs > ICE_MAX_RSS_QS_PER_VF ||
- qci->num_queue_pairs > min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)) {
- dev_err(ice_pf_to_dev(pf), "VF-%d requesting more than supported number of queues: %d\n",
- vf->vf_id, min_t(u16, vsi->alloc_txq, vsi->alloc_rxq));
- goto error_param;
- }
-
- for (i = 0; i < qci->num_queue_pairs; i++) {
- if (!qci->qpair[i].rxq.crc_disable)
- continue;
-
- if (!(vf->driver_caps & VIRTCHNL_VF_OFFLOAD_CRC) ||
- vf->vlan_strip_ena)
- goto error_param;
- }
-
- for (i = 0; i < qci->num_queue_pairs; i++) {
- qpi = &qci->qpair[i];
- if (qpi->txq.vsi_id != qci->vsi_id ||
- qpi->rxq.vsi_id != qci->vsi_id ||
- qpi->rxq.queue_id != qpi->txq.queue_id ||
- qpi->txq.headwb_enabled ||
- !ice_vc_isvalid_ring_len(qpi->txq.ring_len) ||
- !ice_vc_isvalid_ring_len(qpi->rxq.ring_len) ||
- !ice_vc_isvalid_q_id(vsi, qpi->txq.queue_id)) {
- goto error_param;
- }
-
- q_idx = qpi->rxq.queue_id;
-
- /* make sure selected "q_idx" is in valid range of queues
- * for selected "vsi"
- */
- if (q_idx >= vsi->alloc_txq || q_idx >= vsi->alloc_rxq) {
- goto error_param;
- }
-
- /* copy Tx queue info from VF into VSI */
- if (qpi->txq.ring_len > 0) {
- vsi->tx_rings[q_idx]->dma = qpi->txq.dma_ring_addr;
- vsi->tx_rings[q_idx]->count = qpi->txq.ring_len;
-
- /* Disable any existing queue first */
- if (ice_vf_vsi_dis_single_txq(vf, vsi, q_idx))
- goto error_param;
-
- /* Configure a queue with the requested settings */
- if (ice_vsi_cfg_single_txq(vsi, vsi->tx_rings, q_idx)) {
- dev_warn(ice_pf_to_dev(pf), "VF-%d failed to configure TX queue %d\n",
- vf->vf_id, q_idx);
- goto error_param;
- }
- }
-
- /* copy Rx queue info from VF into VSI */
- if (qpi->rxq.ring_len > 0) {
- u16 max_frame_size = ice_vc_get_max_frame_size(vf);
- struct ice_rx_ring *ring = vsi->rx_rings[q_idx];
- u32 rxdid;
-
- ring->dma = qpi->rxq.dma_ring_addr;
- ring->count = qpi->rxq.ring_len;
-
- if (qpi->rxq.crc_disable)
- ring->flags |= ICE_RX_FLAGS_CRC_STRIP_DIS;
- else
- ring->flags &= ~ICE_RX_FLAGS_CRC_STRIP_DIS;
-
- if (qpi->rxq.databuffer_size != 0 &&
- (qpi->rxq.databuffer_size > ((16 * 1024) - 128) ||
- qpi->rxq.databuffer_size < 1024))
- goto error_param;
- ring->rx_buf_len = qpi->rxq.databuffer_size;
- if (qpi->rxq.max_pkt_size > max_frame_size ||
- qpi->rxq.max_pkt_size < 64)
- goto error_param;
-
- ring->max_frame = qpi->rxq.max_pkt_size;
- /* add space for the port VLAN since the VF driver is
- * not expected to account for it in the MTU
- * calculation
- */
- if (ice_vf_is_port_vlan_ena(vf))
- ring->max_frame += VLAN_HLEN;
-
- if (ice_vsi_cfg_single_rxq(vsi, q_idx)) {
- dev_warn(ice_pf_to_dev(pf), "VF-%d failed to configure RX queue %d\n",
- vf->vf_id, q_idx);
- goto error_param;
- }
-
- /* If Rx flex desc is supported, select RXDID for Rx
- * queues. Otherwise, use legacy 32byte descriptor
- * format. Legacy 16byte descriptor is not supported.
- * If this RXDID is selected, return error.
- */
- if (vf->driver_caps &
- VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) {
- rxdid = qpi->rxq.rxdid;
- if (!(BIT(rxdid) & pf->supported_rxdids))
- goto error_param;
- } else {
- rxdid = ICE_RXDID_LEGACY_1;
- }
-
- ena_ts = ((vf->driver_caps &
- VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) &&
- (vf->driver_caps & VIRTCHNL_VF_CAP_PTP) &&
- (qpi->rxq.flags & VIRTCHNL_PTP_RX_TSTAMP));
-
- ice_write_qrxflxp_cntxt(&vsi->back->hw,
- vsi->rxq_map[q_idx], rxdid,
- ICE_RXDID_PRIO, ena_ts);
- }
- }
-
- ice_lag_complete_vf_reset(pf->lag, act_prt);
- mutex_unlock(&pf->lag_mutex);
-
- /* send the response to the VF */
- return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_VSI_QUEUES,
- VIRTCHNL_STATUS_SUCCESS, NULL, 0);
-error_param:
- /* disable whatever we can */
- for (; i >= 0; i--) {
- if (ice_vsi_ctrl_one_rx_ring(vsi, false, i, true))
- dev_err(ice_pf_to_dev(pf), "VF-%d could not disable RX queue %d\n",
- vf->vf_id, i);
- if (ice_vf_vsi_dis_single_txq(vf, vsi, i))
- dev_err(ice_pf_to_dev(pf), "VF-%d could not disable TX queue %d\n",
- vf->vf_id, i);
- }
-
- ice_lag_complete_vf_reset(pf->lag, act_prt);
- mutex_unlock(&pf->lag_mutex);
-
- ice_lag_move_new_vf_nodes(vf);
-
- /* send the response to the VF */
- return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_VSI_QUEUES,
- VIRTCHNL_STATUS_ERR_PARAM, NULL, 0);
-}
-
-/**
* ice_can_vf_change_mac
* @vf: pointer to the VF info
*
@@ -2531,66 +1022,6 @@ static int ice_vc_del_mac_addr_msg(struct ice_vf *vf, u8 *msg)
}
/**
- * ice_vc_request_qs_msg
- * @vf: pointer to the VF info
- * @msg: pointer to the msg buffer
- *
- * VFs get a default number of queues but can use this message to request a
- * different number. If the request is successful, PF will reset the VF and
- * return 0. If unsuccessful, PF will send message informing VF of number of
- * available queue pairs via virtchnl message response to VF.
- */
-static int ice_vc_request_qs_msg(struct ice_vf *vf, u8 *msg)
-{
- enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
- struct virtchnl_vf_res_request *vfres =
- (struct virtchnl_vf_res_request *)msg;
- u16 req_queues = vfres->num_queue_pairs;
- struct ice_pf *pf = vf->pf;
- u16 max_allowed_vf_queues;
- u16 tx_rx_queue_left;
- struct device *dev;
- u16 cur_queues;
-
- dev = ice_pf_to_dev(pf);
- if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto error_param;
- }
-
- cur_queues = vf->num_vf_qs;
- tx_rx_queue_left = min_t(u16, ice_get_avail_txq_count(pf),
- ice_get_avail_rxq_count(pf));
- max_allowed_vf_queues = tx_rx_queue_left + cur_queues;
- if (!req_queues) {
- dev_err(dev, "VF %d tried to request 0 queues. Ignoring.\n",
- vf->vf_id);
- } else if (req_queues > ICE_MAX_RSS_QS_PER_VF) {
- dev_err(dev, "VF %d tried to request more than %d queues.\n",
- vf->vf_id, ICE_MAX_RSS_QS_PER_VF);
- vfres->num_queue_pairs = ICE_MAX_RSS_QS_PER_VF;
- } else if (req_queues > cur_queues &&
- req_queues - cur_queues > tx_rx_queue_left) {
- dev_warn(dev, "VF %d requested %u more queues, but only %u left.\n",
- vf->vf_id, req_queues - cur_queues, tx_rx_queue_left);
- vfres->num_queue_pairs = min_t(u16, max_allowed_vf_queues,
- ICE_MAX_RSS_QS_PER_VF);
- } else {
- /* request is successful, then reset VF */
- vf->num_req_qs = req_queues;
- ice_reset_vf(vf, ICE_VF_RESET_NOTIFY);
- dev_info(dev, "VF %d granted request of %u queues.\n",
- vf->vf_id, req_queues);
- return 0;
- }
-
-error_param:
- /* send the response to the VF */
- return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_REQUEST_QUEUES,
- v_ret, (u8 *)vfres, sizeof(*vfres));
-}
-
-/**
* ice_vf_vlan_offload_ena - determine if capabilities support VLAN offloads
* @caps: VF driver negotiated capabilities
*
@@ -2983,112 +1414,6 @@ error_param:
}
/**
- * ice_vc_get_rss_hashcfg - return the RSS Hash configuration
- * @vf: pointer to the VF info
- */
-static int ice_vc_get_rss_hashcfg(struct ice_vf *vf)
-{
- enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
- struct virtchnl_rss_hashcfg *vrh = NULL;
- int len = 0, ret;
-
- if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto err;
- }
-
- if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) {
- dev_err(ice_pf_to_dev(vf->pf), "RSS not supported by PF\n");
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto err;
- }
-
- len = sizeof(struct virtchnl_rss_hashcfg);
- vrh = kzalloc(len, GFP_KERNEL);
- if (!vrh) {
- v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY;
- len = 0;
- goto err;
- }
-
- vrh->hashcfg = ICE_DEFAULT_RSS_HASHCFG;
-err:
- /* send the response back to the VF */
- ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_GET_RSS_HASHCFG_CAPS, v_ret,
- (u8 *)vrh, len);
- kfree(vrh);
- return ret;
-}
-
-/**
- * ice_vc_set_rss_hashcfg - set RSS Hash configuration bits for the VF
- * @vf: pointer to the VF info
- * @msg: pointer to the msg buffer
- */
-static int ice_vc_set_rss_hashcfg(struct ice_vf *vf, u8 *msg)
-{
- struct virtchnl_rss_hashcfg *vrh = (struct virtchnl_rss_hashcfg *)msg;
- enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
- struct ice_pf *pf = vf->pf;
- struct ice_vsi *vsi;
- struct device *dev;
- int status;
-
- dev = ice_pf_to_dev(pf);
-
- if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto err;
- }
-
- if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
- dev_err(dev, "RSS not supported by PF\n");
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto err;
- }
-
- vsi = ice_get_vf_vsi(vf);
- if (!vsi) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto err;
- }
-
- /* clear all previously programmed RSS configuration to allow VF drivers
- * the ability to customize the RSS configuration and/or completely
- * disable RSS
- */
- status = ice_rem_vsi_rss_cfg(&pf->hw, vsi->idx);
- if (status && !vrh->hashcfg) {
- /* only report failure to clear the current RSS configuration if
- * that was clearly the VF's intention (i.e. vrh->hashcfg = 0)
- */
- v_ret = ice_err_to_virt_err(status);
- goto err;
- } else if (status) {
- /* allow the VF to update the RSS configuration even on failure
- * to clear the current RSS confguration in an attempt to keep
- * RSS in a working state
- */
- dev_warn(dev, "Failed to clear the RSS configuration for VF %u\n",
- vf->vf_id);
- }
-
- if (vrh->hashcfg) {
- status = ice_add_avf_rss_cfg(&pf->hw, vsi, vrh->hashcfg);
- v_ret = ice_err_to_virt_err(status);
- }
-
- /* save the requested VF configuration */
- if (!v_ret)
- vf->rss_hashcfg = vrh->hashcfg;
-
- /* send the response to the VF */
-err:
- return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_SET_RSS_HASHCFG, v_ret,
- NULL, 0);
-}
-
-/**
* ice_vc_query_rxdid - query RXDID supported by DDP package
* @vf: pointer to VF info
*
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.h b/drivers/net/ethernet/intel/ice/virt/virtchnl.h
index 71bb456e2d71..71bb456e2d71 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl.h
+++ b/drivers/net/ethernet/intel/ice/virt/virtchnl.h
diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h
index f4c0eaf9bde3..aafbb280c2e7 100644
--- a/drivers/net/ethernet/intel/idpf/idpf.h
+++ b/drivers/net/ethernet/intel/idpf/idpf.h
@@ -148,6 +148,7 @@ enum idpf_vport_state {
* @link_speed_mbps: Link speed in mbps
* @vport_idx: Relative vport index
* @max_tx_hdr_size: Max header length hardware can support
+ * @tx_max_bufs: Max buffers that can be transmitted with scatter-gather
* @state: See enum idpf_vport_state
* @netstats: Packet and byte stats
* @stats_lock: Lock to protect stats update
@@ -159,6 +160,7 @@ struct idpf_netdev_priv {
u32 link_speed_mbps;
u16 vport_idx;
u16 max_tx_hdr_size;
+ u16 tx_max_bufs;
enum idpf_vport_state state;
struct rtnl_link_stats64 netstats;
spinlock_t stats_lock;
diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c
index 513032cb5f08..e327950c93d8 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_lib.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c
@@ -776,6 +776,7 @@ static int idpf_cfg_netdev(struct idpf_vport *vport)
np->vport_idx = vport->idx;
np->vport_id = vport->vport_id;
np->max_tx_hdr_size = idpf_get_max_tx_hdr_size(adapter);
+ np->tx_max_bufs = idpf_get_max_tx_bufs(adapter);
spin_lock_init(&np->stats_lock);
@@ -2272,6 +2273,92 @@ static int idpf_change_mtu(struct net_device *netdev, int new_mtu)
}
/**
+ * idpf_chk_tso_segment - Check skb is not using too many buffers
+ * @skb: send buffer
+ * @max_bufs: maximum number of buffers
+ *
+ * For TSO we need to count the TSO header and segment payload separately. As
+ * such we need to check cases where we have max_bufs-1 fragments or more as we
+ * can potentially require max_bufs+1 DMA transactions, 1 for the TSO header, 1
+ * for the segment payload in the first descriptor, and another max_buf-1 for
+ * the fragments.
+ *
+ * Returns true if the packet needs to be software segmented by core stack.
+ */
+static bool idpf_chk_tso_segment(const struct sk_buff *skb,
+ unsigned int max_bufs)
+{
+ const struct skb_shared_info *shinfo = skb_shinfo(skb);
+ const skb_frag_t *frag, *stale;
+ int nr_frags, sum;
+
+ /* no need to check if number of frags is less than max_bufs - 1 */
+ nr_frags = shinfo->nr_frags;
+ if (nr_frags < (max_bufs - 1))
+ return false;
+
+ /* We need to walk through the list and validate that each group
+ * of max_bufs-2 fragments totals at least gso_size.
+ */
+ nr_frags -= max_bufs - 2;
+ frag = &shinfo->frags[0];
+
+ /* Initialize size to the negative value of gso_size minus 1. We use
+ * this as the worst case scenario in which the frag ahead of us only
+ * provides one byte which is why we are limited to max_bufs-2
+ * descriptors for a single transmit as the header and previous
+ * fragment are already consuming 2 descriptors.
+ */
+ sum = 1 - shinfo->gso_size;
+
+ /* Add size of frags 0 through 4 to create our initial sum */
+ sum += skb_frag_size(frag++);
+ sum += skb_frag_size(frag++);
+ sum += skb_frag_size(frag++);
+ sum += skb_frag_size(frag++);
+ sum += skb_frag_size(frag++);
+
+ /* Walk through fragments adding latest fragment, testing it, and
+ * then removing stale fragments from the sum.
+ */
+ for (stale = &shinfo->frags[0];; stale++) {
+ int stale_size = skb_frag_size(stale);
+
+ sum += skb_frag_size(frag++);
+
+ /* The stale fragment may present us with a smaller
+ * descriptor than the actual fragment size. To account
+ * for that we need to remove all the data on the front and
+ * figure out what the remainder would be in the last
+ * descriptor associated with the fragment.
+ */
+ if (stale_size > IDPF_TX_MAX_DESC_DATA) {
+ int align_pad = -(skb_frag_off(stale)) &
+ (IDPF_TX_MAX_READ_REQ_SIZE - 1);
+
+ sum -= align_pad;
+ stale_size -= align_pad;
+
+ do {
+ sum -= IDPF_TX_MAX_DESC_DATA_ALIGNED;
+ stale_size -= IDPF_TX_MAX_DESC_DATA_ALIGNED;
+ } while (stale_size > IDPF_TX_MAX_DESC_DATA);
+ }
+
+ /* if sum is negative we failed to make sufficient progress */
+ if (sum < 0)
+ return true;
+
+ if (!nr_frags--)
+ break;
+
+ sum -= stale_size;
+ }
+
+ return false;
+}
+
+/**
* idpf_features_check - Validate packet conforms to limits
* @skb: skb buffer
* @netdev: This port's netdev
@@ -2292,12 +2379,15 @@ static netdev_features_t idpf_features_check(struct sk_buff *skb,
if (skb->ip_summed != CHECKSUM_PARTIAL)
return features;
- /* We cannot support GSO if the MSS is going to be less than
- * 88 bytes. If it is then we need to drop support for GSO.
- */
- if (skb_is_gso(skb) &&
- (skb_shinfo(skb)->gso_size < IDPF_TX_TSO_MIN_MSS))
- features &= ~NETIF_F_GSO_MASK;
+ if (skb_is_gso(skb)) {
+ /* We cannot support GSO if the MSS is going to be less than
+ * 88 bytes. If it is then we need to drop support for GSO.
+ */
+ if (skb_shinfo(skb)->gso_size < IDPF_TX_TSO_MIN_MSS)
+ features &= ~NETIF_F_GSO_MASK;
+ else if (idpf_chk_tso_segment(skb, np->tx_max_bufs))
+ features &= ~NETIF_F_GSO_MASK;
+ }
/* Ensure MACLEN is <= 126 bytes (63 words) and not an odd size */
len = skb_network_offset(skb);
diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
index eaad52a83b04..194f924d2bd6 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
@@ -11,8 +11,28 @@
#define idpf_tx_buf_next(buf) (*(u32 *)&(buf)->priv)
LIBETH_SQE_CHECK_PRIV(u32);
-static bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs,
- unsigned int count);
+/**
+ * idpf_chk_linearize - Check if skb exceeds max descriptors per packet
+ * @skb: send buffer
+ * @max_bufs: maximum scatter gather buffers for single packet
+ * @count: number of buffers this packet needs
+ *
+ * Make sure we don't exceed maximum scatter gather buffers for a single
+ * packet.
+ * TSO case has been handled earlier from idpf_features_check().
+ */
+static bool idpf_chk_linearize(const struct sk_buff *skb,
+ unsigned int max_bufs,
+ unsigned int count)
+{
+ if (likely(count <= max_bufs))
+ return false;
+
+ if (skb_is_gso(skb))
+ return false;
+
+ return true;
+}
/**
* idpf_tx_timeout - Respond to a Tx Hang
@@ -2397,111 +2417,6 @@ int idpf_tso(struct sk_buff *skb, struct idpf_tx_offload_params *off)
return 1;
}
-/**
- * __idpf_chk_linearize - Check skb is not using too many buffers
- * @skb: send buffer
- * @max_bufs: maximum number of buffers
- *
- * For TSO we need to count the TSO header and segment payload separately. As
- * such we need to check cases where we have max_bufs-1 fragments or more as we
- * can potentially require max_bufs+1 DMA transactions, 1 for the TSO header, 1
- * for the segment payload in the first descriptor, and another max_buf-1 for
- * the fragments.
- */
-static bool __idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs)
-{
- const struct skb_shared_info *shinfo = skb_shinfo(skb);
- const skb_frag_t *frag, *stale;
- int nr_frags, sum;
-
- /* no need to check if number of frags is less than max_bufs - 1 */
- nr_frags = shinfo->nr_frags;
- if (nr_frags < (max_bufs - 1))
- return false;
-
- /* We need to walk through the list and validate that each group
- * of max_bufs-2 fragments totals at least gso_size.
- */
- nr_frags -= max_bufs - 2;
- frag = &shinfo->frags[0];
-
- /* Initialize size to the negative value of gso_size minus 1. We use
- * this as the worst case scenario in which the frag ahead of us only
- * provides one byte which is why we are limited to max_bufs-2
- * descriptors for a single transmit as the header and previous
- * fragment are already consuming 2 descriptors.
- */
- sum = 1 - shinfo->gso_size;
-
- /* Add size of frags 0 through 4 to create our initial sum */
- sum += skb_frag_size(frag++);
- sum += skb_frag_size(frag++);
- sum += skb_frag_size(frag++);
- sum += skb_frag_size(frag++);
- sum += skb_frag_size(frag++);
-
- /* Walk through fragments adding latest fragment, testing it, and
- * then removing stale fragments from the sum.
- */
- for (stale = &shinfo->frags[0];; stale++) {
- int stale_size = skb_frag_size(stale);
-
- sum += skb_frag_size(frag++);
-
- /* The stale fragment may present us with a smaller
- * descriptor than the actual fragment size. To account
- * for that we need to remove all the data on the front and
- * figure out what the remainder would be in the last
- * descriptor associated with the fragment.
- */
- if (stale_size > IDPF_TX_MAX_DESC_DATA) {
- int align_pad = -(skb_frag_off(stale)) &
- (IDPF_TX_MAX_READ_REQ_SIZE - 1);
-
- sum -= align_pad;
- stale_size -= align_pad;
-
- do {
- sum -= IDPF_TX_MAX_DESC_DATA_ALIGNED;
- stale_size -= IDPF_TX_MAX_DESC_DATA_ALIGNED;
- } while (stale_size > IDPF_TX_MAX_DESC_DATA);
- }
-
- /* if sum is negative we failed to make sufficient progress */
- if (sum < 0)
- return true;
-
- if (!nr_frags--)
- break;
-
- sum -= stale_size;
- }
-
- return false;
-}
-
-/**
- * idpf_chk_linearize - Check if skb exceeds max descriptors per packet
- * @skb: send buffer
- * @max_bufs: maximum scatter gather buffers for single packet
- * @count: number of buffers this packet needs
- *
- * Make sure we don't exceed maximum scatter gather buffers for a single
- * packet. We have to do some special checking around the boundary (max_bufs-1)
- * if TSO is on since we need count the TSO header and payload separately.
- * E.g.: a packet with 7 fragments can require 9 DMA transactions; 1 for TSO
- * header, 1 for segment payload, and then 7 for the fragments.
- */
-static bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs,
- unsigned int count)
-{
- if (likely(count < max_bufs))
- return false;
- if (skb_is_gso(skb))
- return __idpf_chk_linearize(skb, max_bufs);
-
- return count > max_bufs;
-}
/**
* idpf_tx_splitq_get_ctx_desc - grab next desc and update buffer ring
diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index 92ef33459aec..51d5cb6599ed 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -920,11 +920,11 @@ static int igb_set_ringparam(struct net_device *netdev,
}
if (adapter->num_tx_queues > adapter->num_rx_queues)
- temp_ring = vmalloc(array_size(sizeof(struct igb_ring),
- adapter->num_tx_queues));
+ temp_ring = vmalloc_array(adapter->num_tx_queues,
+ sizeof(struct igb_ring));
else
- temp_ring = vmalloc(array_size(sizeof(struct igb_ring),
- adapter->num_rx_queues));
+ temp_ring = vmalloc_array(adapter->num_rx_queues,
+ sizeof(struct igb_ring));
if (!temp_ring) {
err = -ENOMEM;
diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index ecb35b693ce5..f3e7218ba6f3 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -627,11 +627,11 @@ igc_ethtool_set_ringparam(struct net_device *netdev,
}
if (adapter->num_tx_queues > adapter->num_rx_queues)
- temp_ring = vmalloc(array_size(sizeof(struct igc_ring),
- adapter->num_tx_queues));
+ temp_ring = vmalloc_array(adapter->num_tx_queues,
+ sizeof(struct igc_ring));
else
- temp_ring = vmalloc(array_size(sizeof(struct igc_ring),
- adapter->num_rx_queues));
+ temp_ring = vmalloc_array(adapter->num_rx_queues,
+ sizeof(struct igc_ring));
if (!temp_ring) {
err = -ENOMEM;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index 1a2f1bdb91aa..2d660e9edb80 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -1278,7 +1278,7 @@ static int ixgbe_set_ringparam(struct net_device *netdev,
/* allocate temporary buffer to store rings in */
i = max_t(int, adapter->num_tx_queues + adapter->num_xdp_queues,
adapter->num_rx_queues);
- temp_ring = vmalloc(array_size(i, sizeof(struct ixgbe_ring)));
+ temp_ring = vmalloc_array(i, sizeof(struct ixgbe_ring));
if (!temp_ring) {
err = -ENOMEM;
diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
index 7ac53171b041..bebad564188e 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
@@ -276,9 +276,9 @@ static int ixgbevf_set_ringparam(struct net_device *netdev,
}
if (new_tx_count != adapter->tx_ring_count) {
- tx_ring = vmalloc(array_size(sizeof(*tx_ring),
- adapter->num_tx_queues +
- adapter->num_xdp_queues));
+ tx_ring = vmalloc_array(adapter->num_tx_queues +
+ adapter->num_xdp_queues,
+ sizeof(*tx_ring));
if (!tx_ring) {
err = -ENOMEM;
goto clear_reset;
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 8ebb985d2573..35d1184458fd 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -6222,6 +6222,12 @@ static struct mvpp2_port *mvpp2_pcs_gmac_to_port(struct phylink_pcs *pcs)
return container_of(pcs, struct mvpp2_port, pcs_gmac);
}
+static unsigned int mvpp2_xjg_pcs_inband_caps(struct phylink_pcs *pcs,
+ phy_interface_t interface)
+{
+ return LINK_INBAND_DISABLE;
+}
+
static void mvpp2_xlg_pcs_get_state(struct phylink_pcs *pcs,
unsigned int neg_mode,
struct phylink_link_state *state)
@@ -6256,6 +6262,7 @@ static int mvpp2_xlg_pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode,
}
static const struct phylink_pcs_ops mvpp2_phylink_xlg_pcs_ops = {
+ .pcs_inband_caps = mvpp2_xjg_pcs_inband_caps,
.pcs_get_state = mvpp2_xlg_pcs_get_state,
.pcs_config = mvpp2_xlg_pcs_config,
};
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
index 950231e7ea71..92ccf343dfe0 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
@@ -161,10 +161,6 @@ int cgx_get_link_info(void *cgxd, int lmac_id,
struct cgx_link_user_info *linfo);
int cgx_lmac_linkup_start(void *cgxd);
int cgx_get_fwdata_base(u64 *base);
-int cgx_lmac_get_pause_frm(void *cgxd, int lmac_id,
- u8 *tx_pause, u8 *rx_pause);
-int cgx_lmac_set_pause_frm(void *cgxd, int lmac_id,
- u8 tx_pause, u8 rx_pause);
void cgx_lmac_ptp_config(void *cgxd, int lmac_id, bool enable);
u8 cgx_lmac_get_p2x(int cgx_id, int lmac_id);
int cgx_set_fec(u64 fec, int cgx_id, int lmac_id);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
index c6bb3aaa8e0d..2d78e08f985f 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
@@ -1164,6 +1164,9 @@ cpt:
rvu_program_channels(rvu);
cgx_start_linkup(rvu);
+ rvu_block_bcast_xon(rvu, BLKADDR_NIX0);
+ rvu_block_bcast_xon(rvu, BLKADDR_NIX1);
+
err = rvu_mcs_init(rvu);
if (err) {
dev_err(rvu->dev, "%s: Failed to initialize mcs\n", __func__);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index 18c7bb39dbc7..b58283341923 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -1031,6 +1031,7 @@ int rvu_nix_mcast_update_mcam_entry(struct rvu *rvu, u16 pcifunc,
void rvu_nix_flr_free_bpids(struct rvu *rvu, u16 pcifunc);
int rvu_alloc_cint_qint_mem(struct rvu *rvu, struct rvu_pfvf *pfvf,
int blkaddr, int nixlf);
+void rvu_block_bcast_xon(struct rvu *rvu, int blkaddr);
/* NPC APIs */
void rvu_npc_freemem(struct rvu *rvu);
int rvu_npc_get_pkind(struct rvu *rvu, u16 pf);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c
index 27c3a2daaaa9..3735372539bd 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c
@@ -505,7 +505,9 @@ static int rvu_nix_register_reporters(struct rvu_devlink *rvu_dl)
rvu_reporters->nix_event_ctx = nix_event_context;
rvu_reporters->rvu_hw_nix_intr_reporter =
- devlink_health_reporter_create(rvu_dl->dl, &rvu_hw_nix_intr_reporter_ops, 0, rvu);
+ devlink_health_reporter_create(rvu_dl->dl,
+ &rvu_hw_nix_intr_reporter_ops,
+ rvu);
if (IS_ERR(rvu_reporters->rvu_hw_nix_intr_reporter)) {
dev_warn(rvu->dev, "Failed to create hw_nix_intr reporter, err=%ld\n",
PTR_ERR(rvu_reporters->rvu_hw_nix_intr_reporter));
@@ -513,7 +515,9 @@ static int rvu_nix_register_reporters(struct rvu_devlink *rvu_dl)
}
rvu_reporters->rvu_hw_nix_gen_reporter =
- devlink_health_reporter_create(rvu_dl->dl, &rvu_hw_nix_gen_reporter_ops, 0, rvu);
+ devlink_health_reporter_create(rvu_dl->dl,
+ &rvu_hw_nix_gen_reporter_ops,
+ rvu);
if (IS_ERR(rvu_reporters->rvu_hw_nix_gen_reporter)) {
dev_warn(rvu->dev, "Failed to create hw_nix_gen reporter, err=%ld\n",
PTR_ERR(rvu_reporters->rvu_hw_nix_gen_reporter));
@@ -521,7 +525,9 @@ static int rvu_nix_register_reporters(struct rvu_devlink *rvu_dl)
}
rvu_reporters->rvu_hw_nix_err_reporter =
- devlink_health_reporter_create(rvu_dl->dl, &rvu_hw_nix_err_reporter_ops, 0, rvu);
+ devlink_health_reporter_create(rvu_dl->dl,
+ &rvu_hw_nix_err_reporter_ops,
+ rvu);
if (IS_ERR(rvu_reporters->rvu_hw_nix_err_reporter)) {
dev_warn(rvu->dev, "Failed to create hw_nix_err reporter, err=%ld\n",
PTR_ERR(rvu_reporters->rvu_hw_nix_err_reporter));
@@ -529,7 +535,9 @@ static int rvu_nix_register_reporters(struct rvu_devlink *rvu_dl)
}
rvu_reporters->rvu_hw_nix_ras_reporter =
- devlink_health_reporter_create(rvu_dl->dl, &rvu_hw_nix_ras_reporter_ops, 0, rvu);
+ devlink_health_reporter_create(rvu_dl->dl,
+ &rvu_hw_nix_ras_reporter_ops,
+ rvu);
if (IS_ERR(rvu_reporters->rvu_hw_nix_ras_reporter)) {
dev_warn(rvu->dev, "Failed to create hw_nix_ras reporter, err=%ld\n",
PTR_ERR(rvu_reporters->rvu_hw_nix_ras_reporter));
@@ -1051,7 +1059,9 @@ static int rvu_npa_register_reporters(struct rvu_devlink *rvu_dl)
rvu_reporters->npa_event_ctx = npa_event_context;
rvu_reporters->rvu_hw_npa_intr_reporter =
- devlink_health_reporter_create(rvu_dl->dl, &rvu_hw_npa_intr_reporter_ops, 0, rvu);
+ devlink_health_reporter_create(rvu_dl->dl,
+ &rvu_hw_npa_intr_reporter_ops,
+ rvu);
if (IS_ERR(rvu_reporters->rvu_hw_npa_intr_reporter)) {
dev_warn(rvu->dev, "Failed to create hw_npa_intr reporter, err=%ld\n",
PTR_ERR(rvu_reporters->rvu_hw_npa_intr_reporter));
@@ -1059,7 +1069,9 @@ static int rvu_npa_register_reporters(struct rvu_devlink *rvu_dl)
}
rvu_reporters->rvu_hw_npa_gen_reporter =
- devlink_health_reporter_create(rvu_dl->dl, &rvu_hw_npa_gen_reporter_ops, 0, rvu);
+ devlink_health_reporter_create(rvu_dl->dl,
+ &rvu_hw_npa_gen_reporter_ops,
+ rvu);
if (IS_ERR(rvu_reporters->rvu_hw_npa_gen_reporter)) {
dev_warn(rvu->dev, "Failed to create hw_npa_gen reporter, err=%ld\n",
PTR_ERR(rvu_reporters->rvu_hw_npa_gen_reporter));
@@ -1067,7 +1079,9 @@ static int rvu_npa_register_reporters(struct rvu_devlink *rvu_dl)
}
rvu_reporters->rvu_hw_npa_err_reporter =
- devlink_health_reporter_create(rvu_dl->dl, &rvu_hw_npa_err_reporter_ops, 0, rvu);
+ devlink_health_reporter_create(rvu_dl->dl,
+ &rvu_hw_npa_err_reporter_ops,
+ rvu);
if (IS_ERR(rvu_reporters->rvu_hw_npa_err_reporter)) {
dev_warn(rvu->dev, "Failed to create hw_npa_err reporter, err=%ld\n",
PTR_ERR(rvu_reporters->rvu_hw_npa_err_reporter));
@@ -1075,7 +1089,9 @@ static int rvu_npa_register_reporters(struct rvu_devlink *rvu_dl)
}
rvu_reporters->rvu_hw_npa_ras_reporter =
- devlink_health_reporter_create(rvu_dl->dl, &rvu_hw_npa_ras_reporter_ops, 0, rvu);
+ devlink_health_reporter_create(rvu_dl->dl,
+ &rvu_hw_npa_ras_reporter_ops,
+ rvu);
if (IS_ERR(rvu_reporters->rvu_hw_npa_ras_reporter)) {
dev_warn(rvu->dev, "Failed to create hw_npa_ras reporter, err=%ld\n",
PTR_ERR(rvu_reporters->rvu_hw_npa_ras_reporter));
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index 60db1f616cc8..828316211b24 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -6616,3 +6616,19 @@ unlock_grp:
return ret;
}
+
+/* On CN10k and older series of silicons, hardware may incorrectly
+ * assert XOFF on certain channels. Issue a write on NIX_AF_RX_CHANX_CFG
+ * to broadcacst XON on the same.
+ */
+void rvu_block_bcast_xon(struct rvu *rvu, int blkaddr)
+{
+ struct rvu_block *block = &rvu->hw->block[blkaddr];
+ u64 cfg;
+
+ if (!block->implemented || is_cn20k(rvu->pdev))
+ return;
+
+ cfg = rvu_read64(rvu, blkaddr, NIX_AF_RX_CHANX_CFG(0));
+ rvu_write64(rvu, blkaddr, NIX_AF_RX_CHANX_CFG(0), cfg);
+}
diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c
index 0a80d8f8cff7..3dbb113b792c 100644
--- a/drivers/net/ethernet/mediatek/mtk_wed.c
+++ b/drivers/net/ethernet/mediatek/mtk_wed.c
@@ -59,7 +59,9 @@ struct mtk_wed_flow_block_priv {
static const struct mtk_wed_soc_data mt7622_data = {
.regmap = {
.tx_bm_tkid = 0x088,
- .wpdma_rx_ring0 = 0x770,
+ .wpdma_rx_ring = {
+ 0x770,
+ },
.reset_idx_tx_mask = GENMASK(3, 0),
.reset_idx_rx_mask = GENMASK(17, 16),
},
@@ -70,7 +72,9 @@ static const struct mtk_wed_soc_data mt7622_data = {
static const struct mtk_wed_soc_data mt7986_data = {
.regmap = {
.tx_bm_tkid = 0x0c8,
- .wpdma_rx_ring0 = 0x770,
+ .wpdma_rx_ring = {
+ 0x770,
+ },
.reset_idx_tx_mask = GENMASK(1, 0),
.reset_idx_rx_mask = GENMASK(7, 6),
},
@@ -81,7 +85,10 @@ static const struct mtk_wed_soc_data mt7986_data = {
static const struct mtk_wed_soc_data mt7988_data = {
.regmap = {
.tx_bm_tkid = 0x0c8,
- .wpdma_rx_ring0 = 0x7d0,
+ .wpdma_rx_ring = {
+ 0x7d0,
+ 0x7d8,
+ },
.reset_idx_tx_mask = GENMASK(1, 0),
.reset_idx_rx_mask = GENMASK(7, 6),
},
@@ -621,8 +628,8 @@ mtk_wed_amsdu_init(struct mtk_wed_device *dev)
return ret;
}
- /* eagle E1 PCIE1 tx ring 22 flow control issue */
- if (dev->wlan.id == 0x7991)
+ /* Kite and Eagle E1 PCIE1 tx ring 22 flow control issue */
+ if (dev->wlan.id == 0x7991 || dev->wlan.id == 0x7992)
wed_clr(dev, MTK_WED_AMSDU_FIFO, MTK_WED_AMSDU_IS_PRIOR0_RING);
wed_set(dev, MTK_WED_CTRL, MTK_WED_CTRL_TX_AMSDU_EN);
@@ -1239,7 +1246,11 @@ mtk_wed_set_wpdma(struct mtk_wed_device *dev)
return;
wed_w32(dev, MTK_WED_WPDMA_RX_GLO_CFG, dev->wlan.wpdma_rx_glo);
- wed_w32(dev, dev->hw->soc->regmap.wpdma_rx_ring0, dev->wlan.wpdma_rx);
+ wed_w32(dev, dev->hw->soc->regmap.wpdma_rx_ring[0],
+ dev->wlan.wpdma_rx[0]);
+ if (mtk_wed_is_v3_or_greater(dev->hw))
+ wed_w32(dev, dev->hw->soc->regmap.wpdma_rx_ring[1],
+ dev->wlan.wpdma_rx[1]);
if (!dev->wlan.hw_rro)
return;
@@ -2323,6 +2334,16 @@ mtk_wed_start(struct mtk_wed_device *dev, u32 irq_mask)
if (!dev->rx_wdma[i].desc)
mtk_wed_wdma_rx_ring_setup(dev, i, 16, false);
+ if (dev->wlan.hw_rro) {
+ for (i = 0; i < MTK_WED_RX_PAGE_QUEUES; i++) {
+ u32 addr = MTK_WED_RRO_MSDU_PG_CTRL0(i) +
+ MTK_WED_RING_OFS_COUNT;
+
+ if (!wed_r32(dev, addr))
+ wed_w32(dev, addr, 1);
+ }
+ }
+
mtk_wed_hw_init(dev);
mtk_wed_configure_irq(dev, irq_mask);
diff --git a/drivers/net/ethernet/mediatek/mtk_wed.h b/drivers/net/ethernet/mediatek/mtk_wed.h
index c1f0479d7a71..b49aee9a8b65 100644
--- a/drivers/net/ethernet/mediatek/mtk_wed.h
+++ b/drivers/net/ethernet/mediatek/mtk_wed.h
@@ -17,7 +17,7 @@ struct mtk_wed_wo;
struct mtk_wed_soc_data {
struct {
u32 tx_bm_tkid;
- u32 wpdma_rx_ring0;
+ u32 wpdma_rx_ring[MTK_WED_RX_QUEUES];
u32 reset_idx_tx_mask;
u32 reset_idx_rx_mask;
} regmap;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index 6ec7d6e0181d..8ef2ac2060ba 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -8,7 +8,6 @@ config MLX5_CORE
depends on PCI
select AUXILIARY_BUS
select NET_DEVLINK
- depends on VXLAN || !VXLAN
depends on MLXFW || !MLXFW
depends on PTP_1588_CLOCK_OPTIONAL
depends on PCI_HYPERV_INTERFACE || !PCI_HYPERV_INTERFACE
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index a253c73db9e5..a65ab661375a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -69,7 +69,7 @@ mlx5_core-$(CONFIG_MLX5_TC_SAMPLE) += en/tc/sample.o
# Core extra
#
mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \
- ecpf.o rdma.o esw/legacy.o \
+ ecpf.o rdma.o esw/legacy.o esw/adj_vport.o \
esw/devlink_port.o esw/vporttbl.o esw/qos.o esw/ipsec.o
mlx5_core-$(CONFIG_MLX5_ESWITCH) += esw/acl/helper.o \
@@ -85,7 +85,9 @@ mlx5_core-$(CONFIG_MLX5_BRIDGE) += esw/bridge.o esw/bridge_mcast.o esw/bridge
mlx5_core-$(CONFIG_HWMON) += hwmon.o
mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o
-mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o
+ifneq ($(CONFIG_VXLAN),)
+ mlx5_core-y += lib/vxlan.o
+endif
mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += lib/hv.o lib/hv_vhca.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c
index 86253a89c24c..73f5b62b8c7f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c
@@ -1,6 +1,8 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. */
+#include <linux/mlx5/vport.h>
+
#include "reporter_vnic.h"
#include "en_stats.h"
#include "devlink.h"
@@ -133,7 +135,7 @@ void mlx5_reporter_vnic_create(struct mlx5_core_dev *dev)
health->vnic_reporter =
devlink_health_reporter_create(devlink,
&mlx5_reporter_vnic_ops,
- 0, dev);
+ dev);
if (IS_ERR(health->vnic_reporter))
mlx5_core_warn(dev,
"Failed to create vnic reporter, err = %ld\n",
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
index 16c44d628eda..eb1cace5910c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
@@ -651,22 +651,26 @@ void mlx5e_reporter_icosq_resume_recovery(struct mlx5e_channel *c)
mutex_unlock(&c->icosq_recovery_lock);
}
+#define MLX5E_REPORTER_RX_GRACEFUL_PERIOD 500
+#define MLX5E_REPORTER_RX_BURST_PERIOD 500
+
static const struct devlink_health_reporter_ops mlx5_rx_reporter_ops = {
.name = "rx",
.recover = mlx5e_rx_reporter_recover,
.diagnose = mlx5e_rx_reporter_diagnose,
.dump = mlx5e_rx_reporter_dump,
+ .default_graceful_period = MLX5E_REPORTER_RX_GRACEFUL_PERIOD,
+ .default_burst_period = MLX5E_REPORTER_RX_BURST_PERIOD,
};
-#define MLX5E_REPORTER_RX_GRACEFUL_PERIOD 500
-
void mlx5e_reporter_rx_create(struct mlx5e_priv *priv)
{
+ struct devlink_port *port = priv->netdev->devlink_port;
struct devlink_health_reporter *reporter;
- reporter = devlink_port_health_reporter_create(priv->netdev->devlink_port,
+ reporter = devlink_port_health_reporter_create(port,
&mlx5_rx_reporter_ops,
- MLX5E_REPORTER_RX_GRACEFUL_PERIOD, priv);
+ priv);
if (IS_ERR(reporter)) {
netdev_warn(priv->netdev, "Failed to create rx reporter, err = %ld\n",
PTR_ERR(reporter));
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
index 85d5cb39b107..5a4fe8403a21 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -539,22 +539,26 @@ void mlx5e_reporter_tx_ptpsq_unhealthy(struct mlx5e_ptpsq *ptpsq)
mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx);
}
+#define MLX5E_REPORTER_TX_GRACEFUL_PERIOD 500
+#define MLX5E_REPORTER_TX_BURST_PERIOD 500
+
static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = {
.name = "tx",
.recover = mlx5e_tx_reporter_recover,
.diagnose = mlx5e_tx_reporter_diagnose,
.dump = mlx5e_tx_reporter_dump,
+ .default_graceful_period = MLX5E_REPORTER_TX_GRACEFUL_PERIOD,
+ .default_burst_period = MLX5E_REPORTER_TX_BURST_PERIOD,
};
-#define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500
-
void mlx5e_reporter_tx_create(struct mlx5e_priv *priv)
{
+ struct devlink_port *port = priv->netdev->devlink_port;
struct devlink_health_reporter *reporter;
- reporter = devlink_port_health_reporter_create(priv->netdev->devlink_port,
+ reporter = devlink_port_health_reporter_create(port,
&mlx5_tx_reporter_ops,
- MLX5_REPORTER_TX_GRACEFUL_PERIOD, priv);
+ priv);
if (IS_ERR(reporter)) {
netdev_warn(priv->netdev,
"Failed to create tx reporter, err = %ld\n",
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
index 2162d776fe35..a14f216048cd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
@@ -1,7 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2018 Mellanox Technologies. */
-#include <net/inet_ecn.h>
+#include <net/flow.h>
+#include <net/inet_dscp.h>
#include <net/vxlan.h>
#include <net/gre.h>
#include <net/geneve.h>
@@ -233,7 +234,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
int err;
/* add the IP fields */
- attr.fl.fl4.flowi4_tos = tun_key->tos & ~INET_ECN_MASK;
+ attr.fl.fl4.flowi4_dscp = inet_dsfield_to_dscp(tun_key->tos);
attr.fl.fl4.daddr = tun_key->u.ipv4.dst;
attr.fl.fl4.saddr = tun_key->u.ipv4.src;
attr.ttl = tun_key->ttl;
@@ -349,7 +350,7 @@ int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv,
int err;
/* add the IP fields */
- attr.fl.fl4.flowi4_tos = tun_key->tos & ~INET_ECN_MASK;
+ attr.fl.fl4.flowi4_dscp = inet_dsfield_to_dscp(tun_key->tos);
attr.fl.fl4.daddr = tun_key->u.ipv4.dst;
attr.fl.fl4.saddr = tun_key->u.ipv4.src;
attr.ttl = tun_key->ttl;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
index 65ccb33edafb..d7a11ff9bbdb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
@@ -498,9 +498,9 @@ static void resync_update_sn(struct mlx5e_rq *rq, struct sk_buff *skb)
depth += sizeof(struct iphdr);
th = (void *)iph + sizeof(struct iphdr);
- sk = inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
- iph->saddr, th->source, iph->daddr,
- th->dest, netdev->ifindex);
+ sk = inet_lookup_established(net, iph->saddr, th->source,
+ iph->daddr, th->dest,
+ netdev->ifindex);
#if IS_ENABLED(CONFIG_IPV6)
} else {
struct ipv6hdr *ipv6h = (struct ipv6hdr *)iph;
@@ -508,8 +508,7 @@ static void resync_update_sn(struct mlx5e_rq *rq, struct sk_buff *skb)
depth += sizeof(struct ipv6hdr);
th = (void *)ipv6h + sizeof(struct ipv6hdr);
- sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
- &ipv6h->saddr, th->source,
+ sk = __inet6_lookup_established(net, &ipv6h->saddr, th->source,
&ipv6h->daddr, ntohs(th->dest),
netdev->ifindex, 0);
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index e680673ffb72..714cce595692 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -780,13 +780,6 @@ static void mlx5e_rq_shampo_hd_info_free(struct mlx5e_rq *rq)
bitmap_free(rq->mpwqe.shampo->bitmap);
}
-static bool mlx5_rq_needs_separate_hd_pool(struct mlx5e_rq *rq)
-{
- struct netdev_rx_queue *rxq = __netif_get_rx_queue(rq->netdev, rq->ix);
-
- return !!rxq->mp_params.mp_ops;
-}
-
static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_rq_param *rqp,
@@ -825,7 +818,7 @@ static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev,
hd_pool_size = (rq->mpwqe.shampo->hd_per_wqe * wq_size) /
MLX5E_SHAMPO_WQ_HEADER_PER_PAGE;
- if (mlx5_rq_needs_separate_hd_pool(rq)) {
+ if (netif_rxq_has_unreadable_mp(rq->netdev, rq->ix)) {
/* Separate page pool for shampo headers */
struct page_pool_params pp_params = { };
@@ -5642,12 +5635,36 @@ static int mlx5e_queue_start(struct net_device *dev, void *newq,
return 0;
}
+static struct device *mlx5e_queue_get_dma_dev(struct net_device *dev,
+ int queue_index)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_channels *channels;
+ struct device *pdev = NULL;
+ struct mlx5e_channel *ch;
+
+ channels = &priv->channels;
+
+ mutex_lock(&priv->state_lock);
+
+ if (queue_index >= channels->num)
+ goto out;
+
+ ch = channels->c[queue_index];
+ pdev = ch->pdev;
+out:
+ mutex_unlock(&priv->state_lock);
+
+ return pdev;
+}
+
static const struct netdev_queue_mgmt_ops mlx5e_queue_mgmt_ops = {
.ndo_queue_mem_size = sizeof(struct mlx5_qmgmt_data),
.ndo_queue_mem_alloc = mlx5e_queue_mem_alloc,
.ndo_queue_mem_free = mlx5e_queue_mem_free,
.ndo_queue_start = mlx5e_queue_start,
.ndo_queue_stop = mlx5e_queue_stop,
+ .ndo_queue_get_dma_dev = mlx5e_queue_get_dma_dev,
};
static void mlx5e_build_nic_netdev(struct net_device *netdev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 63a7a788fb0d..b231e7855bca 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -1447,7 +1447,7 @@ static void mlx5e_rep_vnic_reporter_create(struct mlx5e_priv *priv,
reporter = devl_port_health_reporter_create(dl_port,
&mlx5_rep_vnic_reporter_ops,
- 0, rpriv);
+ rpriv);
if (IS_ERR(reporter)) {
mlx5_core_err(priv->mdev,
"Failed to create representor vnic reporter, err = %ld\n",
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/adj_vport.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/adj_vport.c
new file mode 100644
index 000000000000..0091ba697bae
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/adj_vport.c
@@ -0,0 +1,209 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "fs_core.h"
+#include "eswitch.h"
+
+enum {
+ MLX5_ADJ_VPORT_DISCONNECT = 0x0,
+ MLX5_ADJ_VPORT_CONNECT = 0x1,
+};
+
+static int mlx5_esw_adj_vport_modify(struct mlx5_core_dev *dev,
+ u16 vport, bool connect)
+{
+ u32 in[MLX5_ST_SZ_DW(modify_vport_state_in)] = {};
+
+ MLX5_SET(modify_vport_state_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_VPORT_STATE);
+ MLX5_SET(modify_vport_state_in, in, op_mod,
+ MLX5_VPORT_STATE_OP_MOD_ESW_VPORT);
+ MLX5_SET(modify_vport_state_in, in, other_vport, 1);
+ MLX5_SET(modify_vport_state_in, in, vport_number, vport);
+ MLX5_SET(modify_vport_state_in, in, ingress_connect_valid, 1);
+ MLX5_SET(modify_vport_state_in, in, egress_connect_valid, 1);
+ MLX5_SET(modify_vport_state_in, in, ingress_connect, connect);
+ MLX5_SET(modify_vport_state_in, in, egress_connect, connect);
+
+ return mlx5_cmd_exec_in(dev, modify_vport_state, in);
+}
+
+static void mlx5_esw_destroy_esw_vport(struct mlx5_core_dev *dev, u16 vport)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_esw_vport_in)] = {};
+
+ MLX5_SET(destroy_esw_vport_in, in, opcode,
+ MLX5_CMD_OPCODE_DESTROY_ESW_VPORT);
+ MLX5_SET(destroy_esw_vport_in, in, vport_num, vport);
+
+ mlx5_cmd_exec_in(dev, destroy_esw_vport, in);
+}
+
+static int mlx5_esw_create_esw_vport(struct mlx5_core_dev *dev, u16 vhca_id,
+ u16 *vport_num)
+{
+ u32 out[MLX5_ST_SZ_DW(create_esw_vport_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(create_esw_vport_in)] = {};
+ int err;
+
+ MLX5_SET(create_esw_vport_in, in, opcode,
+ MLX5_CMD_OPCODE_CREATE_ESW_VPORT);
+ MLX5_SET(create_esw_vport_in, in, managed_vhca_id, vhca_id);
+
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (!err)
+ *vport_num = MLX5_GET(create_esw_vport_out, out, vport_num);
+
+ return err;
+}
+
+static int mlx5_esw_adj_vport_create(struct mlx5_eswitch *esw, u16 vhca_id,
+ const void *rid_info_reg)
+{
+ struct mlx5_vport *vport;
+ u16 vport_num;
+ int err;
+
+ err = mlx5_esw_create_esw_vport(esw->dev, vhca_id, &vport_num);
+ if (err) {
+ esw_warn(esw->dev,
+ "Failed to create adjacent vport for vhca_id %d, err %d\n",
+ vhca_id, err);
+ return err;
+ }
+
+ esw_debug(esw->dev, "Created adjacent vport[%d] %d for vhca_id 0x%x\n",
+ esw->last_vport_idx, vport_num, vhca_id);
+
+ err = mlx5_esw_vport_alloc(esw, esw->last_vport_idx++, vport_num);
+ if (err)
+ goto destroy_esw_vport;
+
+ xa_set_mark(&esw->vports, vport_num, MLX5_ESW_VPT_VF);
+ vport = mlx5_eswitch_get_vport(esw, vport_num);
+ vport->adjacent = true;
+ vport->vhca_id = vhca_id;
+
+ vport->adj_info.parent_pci_devfn =
+ MLX5_GET(function_vhca_rid_info_reg, rid_info_reg,
+ parent_pci_device_function);
+ vport->adj_info.function_id =
+ MLX5_GET(function_vhca_rid_info_reg, rid_info_reg, function_id);
+
+ mlx5_fs_vport_egress_acl_ns_add(esw->dev->priv.steering, vport->index);
+ mlx5_fs_vport_ingress_acl_ns_add(esw->dev->priv.steering, vport->index);
+ err = mlx5_esw_offloads_rep_add(esw, vport);
+ if (err)
+ goto acl_ns_remove;
+
+ mlx5_esw_adj_vport_modify(esw->dev, vport_num, MLX5_ADJ_VPORT_CONNECT);
+ return 0;
+
+acl_ns_remove:
+ mlx5_fs_vport_ingress_acl_ns_remove(esw->dev->priv.steering,
+ vport->index);
+ mlx5_fs_vport_egress_acl_ns_remove(esw->dev->priv.steering,
+ vport->index);
+ mlx5_esw_vport_free(esw, vport);
+destroy_esw_vport:
+ mlx5_esw_destroy_esw_vport(esw->dev, vport_num);
+ return err;
+}
+
+static void mlx5_esw_adj_vport_destroy(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ u16 vport_num = vport->vport;
+
+ esw_debug(esw->dev, "Destroying adjacent vport %d for vhca_id 0x%x\n",
+ vport_num, vport->vhca_id);
+ mlx5_esw_adj_vport_modify(esw->dev, vport_num,
+ MLX5_ADJ_VPORT_DISCONNECT);
+ mlx5_esw_offloads_rep_remove(esw, vport);
+ mlx5_fs_vport_egress_acl_ns_remove(esw->dev->priv.steering,
+ vport->index);
+ mlx5_fs_vport_ingress_acl_ns_remove(esw->dev->priv.steering,
+ vport->index);
+ mlx5_esw_vport_free(esw, vport);
+ /* Reset the vport index back so new adj vports can use this index.
+ * When vport count can incrementally change, this needs to be modified.
+ */
+ esw->last_vport_idx--;
+ mlx5_esw_destroy_esw_vport(esw->dev, vport_num);
+}
+
+void mlx5_esw_adjacent_vhcas_cleanup(struct mlx5_eswitch *esw)
+{
+ struct mlx5_vport *vport;
+ unsigned long i;
+
+ if (!MLX5_CAP_GEN_2(esw->dev, delegated_vhca_max))
+ return;
+
+ mlx5_esw_for_each_vf_vport(esw, i, vport, U16_MAX) {
+ if (!vport->adjacent)
+ continue;
+ mlx5_esw_adj_vport_destroy(esw, vport);
+ }
+}
+
+void mlx5_esw_adjacent_vhcas_setup(struct mlx5_eswitch *esw)
+{
+ u32 delegated_vhca_max = MLX5_CAP_GEN_2(esw->dev, delegated_vhca_max);
+ u32 in[MLX5_ST_SZ_DW(query_delegated_vhca_in)] = {};
+ int outlen, err, i = 0;
+ u8 *out;
+ u32 count;
+
+ if (!delegated_vhca_max)
+ return;
+
+ outlen = MLX5_ST_SZ_BYTES(query_delegated_vhca_out) +
+ delegated_vhca_max *
+ MLX5_ST_SZ_BYTES(delegated_function_vhca_rid_info);
+
+ esw_debug(esw->dev, "delegated_vhca_max=%d\n", delegated_vhca_max);
+
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return;
+
+ MLX5_SET(query_delegated_vhca_in, in, opcode,
+ MLX5_CMD_OPCODE_QUERY_DELEGATED_VHCA);
+
+ err = mlx5_cmd_exec(esw->dev, in, sizeof(in), out, outlen);
+ if (err) {
+ kvfree(out);
+ esw_warn(esw->dev, "Failed to query delegated vhca, err %d\n",
+ err);
+ return;
+ }
+
+ count = MLX5_GET(query_delegated_vhca_out, out, functions_count);
+ esw_debug(esw->dev, "Delegated vhca functions count %d\n", count);
+
+ for (i = 0; i < count; i++) {
+ const void *rid_info, *rid_info_reg;
+ u16 vhca_id;
+
+ rid_info = MLX5_ADDR_OF(query_delegated_vhca_out, out,
+ delegated_function_vhca_rid_info[i]);
+
+ rid_info_reg = MLX5_ADDR_OF(delegated_function_vhca_rid_info,
+ rid_info, function_vhca_rid_info);
+
+ vhca_id = MLX5_GET(function_vhca_rid_info_reg, rid_info_reg,
+ vhca_id);
+ esw_debug(esw->dev, "Delegating vhca_id 0x%x\n", vhca_id);
+
+ err = mlx5_esw_adj_vport_create(esw, vhca_id, rid_info_reg);
+ if (err) {
+ esw_warn(esw->dev,
+ "Failed to init adjacent vhca 0x%x, err %d\n",
+ vhca_id, err);
+ break;
+ }
+ }
+
+ kvfree(out);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
index c33accadae0f..cf88a106d80d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
@@ -27,6 +27,7 @@ static void mlx5_esw_offloads_pf_vf_devlink_port_attrs_set(struct mlx5_eswitch *
{
struct mlx5_core_dev *dev = esw->dev;
struct netdev_phys_item_id ppid = {};
+ struct mlx5_vport *vport;
u32 controller_num = 0;
bool external;
u16 pfnum;
@@ -42,10 +43,18 @@ static void mlx5_esw_offloads_pf_vf_devlink_port_attrs_set(struct mlx5_eswitch *
dl_port->attrs.switch_id.id_len = ppid.id_len;
devlink_port_attrs_pci_pf_set(dl_port, controller_num, pfnum, external);
} else if (mlx5_eswitch_is_vf_vport(esw, vport_num)) {
+ u16 func_id = vport_num - 1;
+
+ vport = mlx5_eswitch_get_vport(esw, vport_num);
memcpy(dl_port->attrs.switch_id.id, ppid.id, ppid.id_len);
dl_port->attrs.switch_id.id_len = ppid.id_len;
+ if (vport->adjacent) {
+ func_id = vport->adj_info.function_id;
+ pfnum = vport->adj_info.parent_pci_devfn;
+ }
+
devlink_port_attrs_pci_vf_set(dl_port, controller_num, pfnum,
- vport_num - 1, external);
+ func_id, external);
} else if (mlx5_core_is_ec_vf_vport(esw->dev, vport_num)) {
u16 base_vport = mlx5_core_ec_vf_vport_base(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 4917d185d0c3..10eca910a2db 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -820,6 +820,7 @@ static int mlx5_esw_vport_caps_get(struct mlx5_eswitch *esw, struct mlx5_vport *
hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
vport->info.roce_enabled = MLX5_GET(cmd_hca_cap, hca_caps, roce);
+ vport->vhca_id = MLX5_GET(cmd_hca_cap, hca_caps, vhca_id);
if (!MLX5_CAP_GEN_MAX(esw->dev, hca_cap_2))
goto out_free;
@@ -839,6 +840,18 @@ out_free:
return err;
}
+bool mlx5_esw_vport_vhca_id(struct mlx5_eswitch *esw, u16 vportn, u16 *vhca_id)
+{
+ struct mlx5_vport *vport;
+
+ vport = mlx5_eswitch_get_vport(esw, vportn);
+ if (IS_ERR(vport) || MLX5_VPORT_INVAL_VHCA_ID(vport))
+ return false;
+
+ *vhca_id = vport->vhca_id;
+ return true;
+}
+
static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
{
bool vst_mode_steering = esw_vst_mode_is_steering(esw);
@@ -929,7 +942,7 @@ int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
if (!mlx5_esw_is_manager_vport(esw, vport_num) &&
MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) {
- ret = mlx5_esw_vport_vhca_id_set(esw, vport_num);
+ ret = mlx5_esw_vport_vhca_id_map(esw, vport);
if (ret)
goto err_vhca_mapping;
}
@@ -973,7 +986,7 @@ void mlx5_esw_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
if (!mlx5_esw_is_manager_vport(esw, vport_num) &&
MLX5_CAP_GEN(esw->dev, vhca_resource_manager))
- mlx5_esw_vport_vhca_id_clear(esw, vport_num);
+ mlx5_esw_vport_vhca_id_unmap(esw, vport);
if (vport->vport != MLX5_VPORT_PF &&
(vport->info.ipsec_crypto_enabled || vport->info.ipsec_packet_enabled))
@@ -1038,6 +1051,25 @@ const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev)
return ERR_PTR(err);
}
+static int mlx5_esw_host_functions_enabled_query(struct mlx5_eswitch *esw)
+{
+ const u32 *query_host_out;
+
+ if (!mlx5_core_is_ecpf_esw_manager(esw->dev))
+ return 0;
+
+ query_host_out = mlx5_esw_query_functions(esw->dev);
+ if (IS_ERR(query_host_out))
+ return PTR_ERR(query_host_out);
+
+ esw->esw_funcs.host_funcs_disabled =
+ MLX5_GET(query_esw_functions_out, query_host_out,
+ host_params_context.host_pf_not_exist);
+
+ kvfree(query_host_out);
+ return 0;
+}
+
static void mlx5_eswitch_event_handler_register(struct mlx5_eswitch *esw)
{
if (esw->mode == MLX5_ESWITCH_OFFLOADS && mlx5_eswitch_is_funcs_handler(esw->dev)) {
@@ -1185,7 +1217,8 @@ void mlx5_eswitch_unload_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs)
unsigned long i;
mlx5_esw_for_each_vf_vport(esw, i, vport, num_vfs) {
- if (!vport->enabled)
+ /* Adjacent VFs are unloaded separately */
+ if (!vport->enabled || vport->adjacent)
continue;
mlx5_eswitch_unload_pf_vf_vport(esw, vport->vport);
}
@@ -1204,6 +1237,42 @@ static void mlx5_eswitch_unload_ec_vf_vports(struct mlx5_eswitch *esw,
}
}
+static void mlx5_eswitch_unload_adj_vf_vports(struct mlx5_eswitch *esw)
+{
+ struct mlx5_vport *vport;
+ unsigned long i;
+
+ mlx5_esw_for_each_vf_vport(esw, i, vport, U16_MAX) {
+ if (!vport->enabled || !vport->adjacent)
+ continue;
+ mlx5_eswitch_unload_pf_vf_vport(esw, vport->vport);
+ }
+}
+
+static int
+mlx5_eswitch_load_adj_vf_vports(struct mlx5_eswitch *esw,
+ enum mlx5_eswitch_vport_event enabled_events)
+{
+ struct mlx5_vport *vport;
+ unsigned long i;
+ int err;
+
+ mlx5_esw_for_each_vf_vport(esw, i, vport, U16_MAX) {
+ if (!vport->adjacent)
+ continue;
+ err = mlx5_eswitch_load_pf_vf_vport(esw, vport->vport,
+ enabled_events);
+ if (err)
+ goto unload_adj_vf_vport;
+ }
+
+ return 0;
+
+unload_adj_vf_vport:
+ mlx5_eswitch_unload_adj_vf_vports(esw);
+ return err;
+}
+
int mlx5_eswitch_load_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs,
enum mlx5_eswitch_vport_event enabled_events)
{
@@ -1278,17 +1347,19 @@ mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw,
esw->mode == MLX5_ESWITCH_LEGACY;
/* Enable PF vport */
- if (pf_needed) {
+ if (pf_needed && mlx5_esw_host_functions_enabled(esw->dev)) {
ret = mlx5_eswitch_load_pf_vf_vport(esw, MLX5_VPORT_PF,
enabled_events);
if (ret)
return ret;
}
- /* Enable external host PF HCA */
- ret = host_pf_enable_hca(esw->dev);
- if (ret)
- goto pf_hca_err;
+ if (mlx5_esw_host_functions_enabled(esw->dev)) {
+ /* Enable external host PF HCA */
+ ret = host_pf_enable_hca(esw->dev);
+ if (ret)
+ goto pf_hca_err;
+ }
/* Enable ECPF vport */
if (mlx5_ecpf_vport_exists(esw->dev)) {
@@ -1311,8 +1382,16 @@ mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw,
enabled_events);
if (ret)
goto vf_err;
+
+ /* Enable adjacent VF vports */
+ ret = mlx5_eswitch_load_adj_vf_vports(esw, enabled_events);
+ if (ret)
+ goto unload_vf_vports;
+
return 0;
+unload_vf_vports:
+ mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs);
vf_err:
if (mlx5_core_ec_sriov_enabled(esw->dev))
mlx5_eswitch_unload_ec_vf_vports(esw, esw->esw_funcs.num_ec_vfs);
@@ -1320,9 +1399,10 @@ ec_vf_err:
if (mlx5_ecpf_vport_exists(esw->dev))
mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_ECPF);
ecpf_err:
- host_pf_disable_hca(esw->dev);
+ if (mlx5_esw_host_functions_enabled(esw->dev))
+ host_pf_disable_hca(esw->dev);
pf_hca_err:
- if (pf_needed)
+ if (pf_needed && mlx5_esw_host_functions_enabled(esw->dev))
mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_PF);
return ret;
}
@@ -1332,6 +1412,8 @@ pf_hca_err:
*/
void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw)
{
+ mlx5_eswitch_unload_adj_vf_vports(esw);
+
mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs);
if (mlx5_core_ec_sriov_enabled(esw->dev))
@@ -1342,10 +1424,12 @@ void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw)
mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_ECPF);
}
- host_pf_disable_hca(esw->dev);
+ if (mlx5_esw_host_functions_enabled(esw->dev))
+ host_pf_disable_hca(esw->dev);
- if (mlx5_core_is_ecpf_esw_manager(esw->dev) ||
- esw->mode == MLX5_ESWITCH_LEGACY)
+ if ((mlx5_core_is_ecpf_esw_manager(esw->dev) ||
+ esw->mode == MLX5_ESWITCH_LEGACY) &&
+ mlx5_esw_host_functions_enabled(esw->dev))
mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_PF);
}
@@ -1402,19 +1486,76 @@ static void mlx5_esw_mode_change_notify(struct mlx5_eswitch *esw, u16 mode)
blocking_notifier_call_chain(&esw->n_head, 0, &info);
}
+static int mlx5_esw_egress_acls_init(struct mlx5_core_dev *dev)
+{
+ struct mlx5_flow_steering *steering = dev->priv.steering;
+ int total_vports = mlx5_eswitch_get_total_vports(dev);
+ int err;
+ int i;
+
+ for (i = 0; i < total_vports; i++) {
+ err = mlx5_fs_vport_egress_acl_ns_add(steering, i);
+ if (err)
+ goto acl_ns_remove;
+ }
+ return 0;
+
+acl_ns_remove:
+ while (i--)
+ mlx5_fs_vport_egress_acl_ns_remove(steering, i);
+ return err;
+}
+
+static void mlx5_esw_egress_acls_cleanup(struct mlx5_core_dev *dev)
+{
+ struct mlx5_flow_steering *steering = dev->priv.steering;
+ int total_vports = mlx5_eswitch_get_total_vports(dev);
+ int i;
+
+ for (i = total_vports - 1; i >= 0; i--)
+ mlx5_fs_vport_egress_acl_ns_remove(steering, i);
+}
+
+static int mlx5_esw_ingress_acls_init(struct mlx5_core_dev *dev)
+{
+ struct mlx5_flow_steering *steering = dev->priv.steering;
+ int total_vports = mlx5_eswitch_get_total_vports(dev);
+ int err;
+ int i;
+
+ for (i = 0; i < total_vports; i++) {
+ err = mlx5_fs_vport_ingress_acl_ns_add(steering, i);
+ if (err)
+ goto acl_ns_remove;
+ }
+ return 0;
+
+acl_ns_remove:
+ while (i--)
+ mlx5_fs_vport_ingress_acl_ns_remove(steering, i);
+ return err;
+}
+
+static void mlx5_esw_ingress_acls_cleanup(struct mlx5_core_dev *dev)
+{
+ struct mlx5_flow_steering *steering = dev->priv.steering;
+ int total_vports = mlx5_eswitch_get_total_vports(dev);
+ int i;
+
+ for (i = total_vports - 1; i >= 0; i--)
+ mlx5_fs_vport_ingress_acl_ns_remove(steering, i);
+}
+
static int mlx5_esw_acls_ns_init(struct mlx5_eswitch *esw)
{
struct mlx5_core_dev *dev = esw->dev;
- int total_vports;
int err;
if (esw->flags & MLX5_ESWITCH_VPORT_ACL_NS_CREATED)
return 0;
- total_vports = mlx5_eswitch_get_total_vports(dev);
-
if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) {
- err = mlx5_fs_egress_acls_init(dev, total_vports);
+ err = mlx5_esw_egress_acls_init(dev);
if (err)
return err;
} else {
@@ -1422,7 +1563,7 @@ static int mlx5_esw_acls_ns_init(struct mlx5_eswitch *esw)
}
if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) {
- err = mlx5_fs_ingress_acls_init(dev, total_vports);
+ err = mlx5_esw_ingress_acls_init(dev);
if (err)
goto err;
} else {
@@ -1433,7 +1574,7 @@ static int mlx5_esw_acls_ns_init(struct mlx5_eswitch *esw)
err:
if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support))
- mlx5_fs_egress_acls_cleanup(dev);
+ mlx5_esw_egress_acls_cleanup(dev);
return err;
}
@@ -1443,9 +1584,9 @@ static void mlx5_esw_acls_ns_cleanup(struct mlx5_eswitch *esw)
esw->flags &= ~MLX5_ESWITCH_VPORT_ACL_NS_CREATED;
if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support))
- mlx5_fs_ingress_acls_cleanup(dev);
+ mlx5_esw_ingress_acls_cleanup(dev);
if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support))
- mlx5_fs_egress_acls_cleanup(dev);
+ mlx5_esw_egress_acls_cleanup(dev);
}
/**
@@ -1674,7 +1815,8 @@ int mlx5_esw_sf_max_hpf_functions(struct mlx5_core_dev *dev, u16 *max_sfs, u16 *
void *hca_caps;
int err;
- if (!mlx5_core_is_ecpf(dev)) {
+ if (!mlx5_core_is_ecpf(dev) ||
+ !mlx5_esw_host_functions_enabled(dev)) {
*max_sfs = 0;
return 0;
}
@@ -1696,8 +1838,7 @@ out_free:
return err;
}
-static int mlx5_esw_vport_alloc(struct mlx5_eswitch *esw,
- int index, u16 vport_num)
+int mlx5_esw_vport_alloc(struct mlx5_eswitch *esw, int index, u16 vport_num)
{
struct mlx5_vport *vport;
int err;
@@ -1710,6 +1851,7 @@ static int mlx5_esw_vport_alloc(struct mlx5_eswitch *esw,
vport->vport = vport_num;
vport->index = index;
vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO;
+ vport->vhca_id = MLX5_VHCA_ID_INVALID;
INIT_WORK(&vport->vport_change_handler, esw_vport_change_handler);
err = xa_insert(&esw->vports, vport_num, vport, GFP_KERNEL);
if (err)
@@ -1723,8 +1865,9 @@ insert_err:
return err;
}
-static void mlx5_esw_vport_free(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
+void mlx5_esw_vport_free(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
{
+ esw->total_vports--;
xa_erase(&esw->vports, vport->vport);
kfree(vport);
}
@@ -1750,21 +1893,23 @@ static int mlx5_esw_vports_init(struct mlx5_eswitch *esw)
xa_init(&esw->vports);
- err = mlx5_esw_vport_alloc(esw, idx, MLX5_VPORT_PF);
- if (err)
- goto err;
- if (esw->first_host_vport == MLX5_VPORT_PF)
- xa_set_mark(&esw->vports, idx, MLX5_ESW_VPT_HOST_FN);
- idx++;
-
- for (i = 0; i < mlx5_core_max_vfs(dev); i++) {
- err = mlx5_esw_vport_alloc(esw, idx, idx);
+ if (mlx5_esw_host_functions_enabled(dev)) {
+ err = mlx5_esw_vport_alloc(esw, idx, MLX5_VPORT_PF);
if (err)
goto err;
- xa_set_mark(&esw->vports, idx, MLX5_ESW_VPT_VF);
- xa_set_mark(&esw->vports, idx, MLX5_ESW_VPT_HOST_FN);
+ if (esw->first_host_vport == MLX5_VPORT_PF)
+ xa_set_mark(&esw->vports, idx, MLX5_ESW_VPT_HOST_FN);
idx++;
+ for (i = 0; i < mlx5_core_max_vfs(dev); i++) {
+ err = mlx5_esw_vport_alloc(esw, idx, idx);
+ if (err)
+ goto err;
+ xa_set_mark(&esw->vports, idx, MLX5_ESW_VPT_VF);
+ xa_set_mark(&esw->vports, idx, MLX5_ESW_VPT_HOST_FN);
+ idx++;
+ }
}
+
base_sf_num = mlx5_sf_start_function_id(dev);
for (i = 0; i < mlx5_sf_max_functions(dev); i++) {
err = mlx5_esw_vport_alloc(esw, idx, base_sf_num + i);
@@ -1806,6 +1951,9 @@ static int mlx5_esw_vports_init(struct mlx5_eswitch *esw)
err = mlx5_esw_vport_alloc(esw, idx, MLX5_VPORT_UPLINK);
if (err)
goto err;
+
+ /* Adjacent vports or other dynamically create vports will use this */
+ esw->last_vport_idx = ++idx;
return 0;
err:
@@ -1864,6 +2012,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
goto free_esw;
esw->dev = dev;
+ dev->priv.eswitch = esw;
esw->manager_vport = mlx5_eswitch_manager_vport(dev);
esw->first_host_vport = mlx5_eswitch_first_host_vport_num(dev);
@@ -1874,11 +2023,14 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
goto abort;
}
+ err = mlx5_esw_host_functions_enabled_query(esw);
+ if (err)
+ goto abort;
+
err = mlx5_esw_vports_init(esw);
if (err)
goto abort;
- dev->priv.eswitch = esw;
err = esw_offloads_init(esw);
if (err)
goto reps_err;
@@ -2410,3 +2562,11 @@ void mlx5_eswitch_unblock_ipsec(struct mlx5_core_dev *dev)
dev->num_ipsec_offloads--;
mutex_unlock(&esw->state_lock);
}
+
+bool mlx5_esw_host_functions_enabled(const struct mlx5_core_dev *dev)
+{
+ if (!dev->priv.eswitch)
+ return true;
+
+ return !dev->priv.eswitch->esw_funcs.host_funcs_disabled;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 45506ad56847..4fe285ce32aa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -197,6 +197,11 @@ static inline struct mlx5_vport *mlx5_devlink_port_vport_get(struct devlink_port
return mlx5_devlink_port_get(dl_port)->vport;
}
+#define MLX5_VHCA_ID_INVALID (-1)
+
+#define MLX5_VPORT_INVAL_VHCA_ID(vport) \
+ ((vport)->vhca_id == MLX5_VHCA_ID_INVALID)
+
struct mlx5_vport {
struct mlx5_core_dev *dev;
struct hlist_head uc_list[MLX5_L2_ADDR_HASH_SIZE];
@@ -209,6 +214,13 @@ struct mlx5_vport {
struct vport_egress egress;
u32 default_metadata;
u32 metadata;
+ int vhca_id;
+
+ bool adjacent; /* delegated vhca from adjacent function */
+ struct {
+ u16 parent_pci_devfn; /* Adjacent parent PCI device function */
+ u16 function_id; /* Function ID of the delegated VPort */
+ } adj_info;
struct mlx5_vport_info info;
@@ -323,6 +335,7 @@ struct mlx5_host_work {
struct mlx5_esw_functions {
struct mlx5_nb nb;
+ bool host_funcs_disabled;
u16 num_vfs;
u16 num_ec_vfs;
};
@@ -377,6 +390,7 @@ struct mlx5_eswitch {
struct mlx5_esw_bridge_offloads *br_offloads;
struct mlx5_esw_offload offloads;
+ u32 last_vport_idx;
int mode;
u16 manager_vport;
u16 first_host_vport;
@@ -410,6 +424,8 @@ int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32
/* E-Switch API */
int mlx5_eswitch_init(struct mlx5_core_dev *dev);
void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw);
+int mlx5_esw_vport_alloc(struct mlx5_eswitch *esw, int index, u16 vport_num);
+void mlx5_esw_vport_free(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
#define MLX5_ESWITCH_IGNORE_NUM_VFS (-1)
int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs);
@@ -615,6 +631,9 @@ bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0,
const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev);
+void mlx5_esw_adjacent_vhcas_setup(struct mlx5_eswitch *esw);
+void mlx5_esw_adjacent_vhcas_cleanup(struct mlx5_eswitch *esw);
+
#define MLX5_DEBUG_ESWITCH_MASK BIT(3)
#define esw_info(__dev, format, ...) \
@@ -817,9 +836,17 @@ struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u1
int mlx5_esw_sf_max_hpf_functions(struct mlx5_core_dev *dev, u16 *max_sfs, u16 *sf_base_id);
-int mlx5_esw_vport_vhca_id_set(struct mlx5_eswitch *esw, u16 vport_num);
-void mlx5_esw_vport_vhca_id_clear(struct mlx5_eswitch *esw, u16 vport_num);
+int mlx5_esw_vport_vhca_id_map(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport);
+void mlx5_esw_vport_vhca_id_unmap(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport);
int mlx5_eswitch_vhca_id_to_vport(struct mlx5_eswitch *esw, u16 vhca_id, u16 *vport_num);
+bool mlx5_esw_vport_vhca_id(struct mlx5_eswitch *esw, u16 vportn, u16 *vhca_id);
+
+void mlx5_esw_offloads_rep_remove(struct mlx5_eswitch *esw,
+ const struct mlx5_vport *vport);
+int mlx5_esw_offloads_rep_add(struct mlx5_eswitch *esw,
+ const struct mlx5_vport *vport);
/**
* struct mlx5_esw_event_info - Indicates eswitch mode changed/changing.
@@ -893,6 +920,7 @@ int mlx5_esw_ipsec_vf_packet_offload_set(struct mlx5_eswitch *esw, struct mlx5_v
bool enable);
int mlx5_esw_ipsec_vf_packet_offload_supported(struct mlx5_core_dev *dev,
u16 vport_num);
+bool mlx5_esw_host_functions_enabled(const struct mlx5_core_dev *dev);
#else /* CONFIG_MLX5_ESWITCH */
/* eswitch API stubs */
static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
@@ -960,6 +988,19 @@ static inline bool mlx5_eswitch_block_ipsec(struct mlx5_core_dev *dev)
}
static inline void mlx5_eswitch_unblock_ipsec(struct mlx5_core_dev *dev) {}
+
+static inline bool
+mlx5_esw_host_functions_enabled(const struct mlx5_core_dev *dev)
+{
+ return true;
+}
+
+static inline bool
+mlx5_esw_vport_vhca_id(struct mlx5_eswitch *esw, u16 vportn, u16 *vhca_id)
+{
+ return -EOPNOTSUPP;
+}
+
#endif /* CONFIG_MLX5_ESWITCH */
#endif /* __MLX5_ESWITCH_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index bee906661282..d57f86d297ab 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -1213,7 +1213,8 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
misc_parameters);
- if (mlx5_core_is_ecpf_esw_manager(peer_dev)) {
+ if (mlx5_core_is_ecpf_esw_manager(peer_dev) &&
+ mlx5_esw_host_functions_enabled(peer_dev)) {
peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_PF);
esw_set_peer_miss_rule_source_port(esw, peer_esw, spec,
MLX5_VPORT_PF);
@@ -1239,19 +1240,21 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
flows[peer_vport->index] = flow;
}
- mlx5_esw_for_each_vf_vport(peer_esw, i, peer_vport,
- mlx5_core_max_vfs(peer_dev)) {
- esw_set_peer_miss_rule_source_port(esw,
- peer_esw,
- spec, peer_vport->vport);
+ if (mlx5_esw_host_functions_enabled(esw->dev)) {
+ mlx5_esw_for_each_vf_vport(peer_esw, i, peer_vport,
+ mlx5_core_max_vfs(peer_dev)) {
+ esw_set_peer_miss_rule_source_port(esw, peer_esw,
+ spec,
+ peer_vport->vport);
- flow = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw),
- spec, &flow_act, &dest, 1);
- if (IS_ERR(flow)) {
- err = PTR_ERR(flow);
- goto add_vf_flow_err;
+ flow = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw),
+ spec, &flow_act, &dest, 1);
+ if (IS_ERR(flow)) {
+ err = PTR_ERR(flow);
+ goto add_vf_flow_err;
+ }
+ flows[peer_vport->index] = flow;
}
- flows[peer_vport->index] = flow;
}
if (mlx5_core_ec_sriov_enabled(peer_dev)) {
@@ -1301,7 +1304,9 @@ add_vf_flow_err:
mlx5_del_flow_rules(flows[peer_vport->index]);
}
add_ecpf_flow_err:
- if (mlx5_core_is_ecpf_esw_manager(peer_dev)) {
+
+ if (mlx5_core_is_ecpf_esw_manager(peer_dev) &&
+ mlx5_esw_host_functions_enabled(peer_dev)) {
peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_PF);
mlx5_del_flow_rules(flows[peer_vport->index]);
}
@@ -2373,7 +2378,20 @@ static int esw_offloads_start(struct mlx5_eswitch *esw,
return 0;
}
-static int mlx5_esw_offloads_rep_init(struct mlx5_eswitch *esw, const struct mlx5_vport *vport)
+void mlx5_esw_offloads_rep_remove(struct mlx5_eswitch *esw,
+ const struct mlx5_vport *vport)
+{
+ struct mlx5_eswitch_rep *rep = xa_load(&esw->offloads.vport_reps,
+ vport->vport);
+
+ if (!rep)
+ return;
+ xa_erase(&esw->offloads.vport_reps, vport->vport);
+ kfree(rep);
+}
+
+int mlx5_esw_offloads_rep_add(struct mlx5_eswitch *esw,
+ const struct mlx5_vport *vport)
{
struct mlx5_eswitch_rep *rep;
int rep_type;
@@ -2385,9 +2403,19 @@ static int mlx5_esw_offloads_rep_init(struct mlx5_eswitch *esw, const struct mlx
rep->vport = vport->vport;
rep->vport_index = vport->index;
- for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++)
- atomic_set(&rep->rep_data[rep_type].state, REP_UNREGISTERED);
-
+ for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) {
+ if (!esw->offloads.rep_ops[rep_type]) {
+ atomic_set(&rep->rep_data[rep_type].state,
+ REP_UNREGISTERED);
+ continue;
+ }
+ /* Dynamic/delegated vports add their representors after
+ * mlx5_eswitch_register_vport_reps, so mark them as registered
+ * for them to be loaded later with the others.
+ */
+ rep->esw = esw;
+ atomic_set(&rep->rep_data[rep_type].state, REP_REGISTERED);
+ }
err = xa_insert(&esw->offloads.vport_reps, rep->vport, rep, GFP_KERNEL);
if (err)
goto insert_err;
@@ -2425,7 +2453,7 @@ static int esw_offloads_init_reps(struct mlx5_eswitch *esw)
xa_init(&esw->offloads.vport_reps);
mlx5_esw_for_each_vport(esw, i, vport) {
- err = mlx5_esw_offloads_rep_init(esw, vport);
+ err = mlx5_esw_offloads_rep_add(esw, vport);
if (err)
goto err;
}
@@ -3533,6 +3561,8 @@ int esw_offloads_enable(struct mlx5_eswitch *esw)
int err;
mutex_init(&esw->offloads.termtbl_mutex);
+ mlx5_esw_adjacent_vhcas_setup(esw);
+
err = mlx5_rdma_enable_roce(esw->dev);
if (err)
goto err_roce;
@@ -3597,6 +3627,7 @@ err_vport_metadata:
err_metadata:
mlx5_rdma_disable_roce(esw->dev);
err_roce:
+ mlx5_esw_adjacent_vhcas_cleanup(esw);
mutex_destroy(&esw->offloads.termtbl_mutex);
return err;
}
@@ -3630,6 +3661,7 @@ void esw_offloads_disable(struct mlx5_eswitch *esw)
mapping_destroy(esw->offloads.reg_c0_obj_pool);
esw_offloads_metadata_uninit(esw);
mlx5_rdma_disable_roce(esw->dev);
+ mlx5_esw_adjacent_vhcas_cleanup(esw);
mutex_destroy(&esw->offloads.termtbl_mutex);
}
@@ -4059,7 +4091,8 @@ mlx5_eswitch_vport_has_rep(const struct mlx5_eswitch *esw, u16 vport_num)
{
/* Currently, only ECPF based device has representor for host PF. */
if (vport_num == MLX5_VPORT_PF &&
- !mlx5_core_is_ecpf_esw_manager(esw->dev))
+ (!mlx5_core_is_ecpf_esw_manager(esw->dev) ||
+ !mlx5_esw_host_functions_enabled(esw->dev)))
return false;
if (vport_num == MLX5_VPORT_ECPF &&
@@ -4161,23 +4194,28 @@ u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw,
}
EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match);
-int mlx5_esw_vport_vhca_id_set(struct mlx5_eswitch *esw, u16 vport_num)
+int mlx5_esw_vport_vhca_id_map(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
{
u16 *old_entry, *vhca_map_entry, vhca_id;
- int err;
- err = mlx5_vport_get_vhca_id(esw->dev, vport_num, &vhca_id);
- if (err) {
- esw_warn(esw->dev, "Getting vhca_id for vport failed (vport=%u,err=%d)\n",
- vport_num, err);
- return err;
+ if (WARN_ONCE(MLX5_VPORT_INVAL_VHCA_ID(vport),
+ "vport %d vhca_id is not set", vport->vport)) {
+ int err;
+
+ err = mlx5_vport_get_vhca_id(vport->dev, vport->vport,
+ &vhca_id);
+ if (err)
+ return err;
+ vport->vhca_id = vhca_id;
}
+ vhca_id = vport->vhca_id;
vhca_map_entry = kmalloc(sizeof(*vhca_map_entry), GFP_KERNEL);
if (!vhca_map_entry)
return -ENOMEM;
- *vhca_map_entry = vport_num;
+ *vhca_map_entry = vport->vport;
old_entry = xa_store(&esw->offloads.vhca_map, vhca_id, vhca_map_entry, GFP_KERNEL);
if (xa_is_err(old_entry)) {
kfree(vhca_map_entry);
@@ -4187,17 +4225,12 @@ int mlx5_esw_vport_vhca_id_set(struct mlx5_eswitch *esw, u16 vport_num)
return 0;
}
-void mlx5_esw_vport_vhca_id_clear(struct mlx5_eswitch *esw, u16 vport_num)
+void mlx5_esw_vport_vhca_id_unmap(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
{
- u16 *vhca_map_entry, vhca_id;
- int err;
-
- err = mlx5_vport_get_vhca_id(esw->dev, vport_num, &vhca_id);
- if (err)
- esw_warn(esw->dev, "Getting vhca_id for vport failed (vport=%hu,err=%d)\n",
- vport_num, err);
+ u16 *vhca_map_entry;
- vhca_map_entry = xa_erase(&esw->offloads.vhca_map, vhca_id);
+ vhca_map_entry = xa_erase(&esw->offloads.vhca_map, vport->vhca_id);
kfree(vhca_map_entry);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index cb165085a4c1..4308e89802f3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -2793,30 +2793,32 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
}
EXPORT_SYMBOL(mlx5_get_flow_namespace);
+struct mlx5_vport_acl_root_ns {
+ u16 vport_idx;
+ struct mlx5_flow_root_namespace *root_ns;
+};
+
struct mlx5_flow_namespace *
mlx5_get_flow_vport_namespace(struct mlx5_core_dev *dev,
enum mlx5_flow_namespace_type type, int vport_idx)
{
struct mlx5_flow_steering *steering = dev->priv.steering;
+ struct mlx5_vport_acl_root_ns *vport_ns;
if (!steering)
return NULL;
switch (type) {
case MLX5_FLOW_NAMESPACE_ESW_EGRESS:
- if (vport_idx >= steering->esw_egress_acl_vports)
- return NULL;
- if (steering->esw_egress_root_ns &&
- steering->esw_egress_root_ns[vport_idx])
- return &steering->esw_egress_root_ns[vport_idx]->ns;
+ vport_ns = xa_load(&steering->esw_egress_root_ns, vport_idx);
+ if (vport_ns)
+ return &vport_ns->root_ns->ns;
else
return NULL;
case MLX5_FLOW_NAMESPACE_ESW_INGRESS:
- if (vport_idx >= steering->esw_ingress_acl_vports)
- return NULL;
- if (steering->esw_ingress_root_ns &&
- steering->esw_ingress_root_ns[vport_idx])
- return &steering->esw_ingress_root_ns[vport_idx]->ns;
+ vport_ns = xa_load(&steering->esw_ingress_root_ns, vport_idx);
+ if (vport_ns)
+ return &vport_ns->root_ns->ns;
else
return NULL;
case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX:
@@ -3575,118 +3577,102 @@ out_err:
return err;
}
-static int init_egress_acl_root_ns(struct mlx5_flow_steering *steering, int vport)
+static void
+mlx5_fs_remove_vport_acl_root_ns(struct xarray *esw_acl_root_ns, u16 vport_idx)
{
- struct fs_prio *prio;
-
- steering->esw_egress_root_ns[vport] = create_root_ns(steering, FS_FT_ESW_EGRESS_ACL);
- if (!steering->esw_egress_root_ns[vport])
- return -ENOMEM;
+ struct mlx5_vport_acl_root_ns *vport_ns;
- /* create 1 prio*/
- prio = fs_create_prio(&steering->esw_egress_root_ns[vport]->ns, 0, 1);
- return PTR_ERR_OR_ZERO(prio);
+ vport_ns = xa_erase(esw_acl_root_ns, vport_idx);
+ if (vport_ns) {
+ cleanup_root_ns(vport_ns->root_ns);
+ kfree(vport_ns);
+ }
}
-static int init_ingress_acl_root_ns(struct mlx5_flow_steering *steering, int vport)
+static int
+mlx5_fs_add_vport_acl_root_ns(struct mlx5_flow_steering *steering,
+ struct xarray *esw_acl_root_ns,
+ enum fs_flow_table_type table_type,
+ u16 vport_idx)
{
+ struct mlx5_vport_acl_root_ns *vport_ns;
struct fs_prio *prio;
+ int err;
- steering->esw_ingress_root_ns[vport] = create_root_ns(steering, FS_FT_ESW_INGRESS_ACL);
- if (!steering->esw_ingress_root_ns[vport])
- return -ENOMEM;
+ /* sanity check, intended xarrays are used */
+ if (WARN_ON(esw_acl_root_ns != &steering->esw_egress_root_ns &&
+ esw_acl_root_ns != &steering->esw_ingress_root_ns))
+ return -EINVAL;
- /* create 1 prio*/
- prio = fs_create_prio(&steering->esw_ingress_root_ns[vport]->ns, 0, 1);
- return PTR_ERR_OR_ZERO(prio);
-}
+ if (table_type != FS_FT_ESW_EGRESS_ACL &&
+ table_type != FS_FT_ESW_INGRESS_ACL) {
+ mlx5_core_err(steering->dev,
+ "Invalid table type %d for egress/ingress ACLs\n",
+ table_type);
+ return -EINVAL;
+ }
-int mlx5_fs_egress_acls_init(struct mlx5_core_dev *dev, int total_vports)
-{
- struct mlx5_flow_steering *steering = dev->priv.steering;
- int err;
- int i;
+ if (xa_load(esw_acl_root_ns, vport_idx))
+ return -EEXIST;
- steering->esw_egress_root_ns =
- kcalloc(total_vports,
- sizeof(*steering->esw_egress_root_ns),
- GFP_KERNEL);
- if (!steering->esw_egress_root_ns)
+ vport_ns = kzalloc(sizeof(*vport_ns), GFP_KERNEL);
+ if (!vport_ns)
return -ENOMEM;
- for (i = 0; i < total_vports; i++) {
- err = init_egress_acl_root_ns(steering, i);
- if (err)
- goto cleanup_root_ns;
+ vport_ns->root_ns = create_root_ns(steering, table_type);
+ if (!vport_ns->root_ns) {
+ err = -ENOMEM;
+ goto kfree_vport_ns;
+ }
+
+ /* create 1 prio*/
+ prio = fs_create_prio(&vport_ns->root_ns->ns, 0, 1);
+ if (IS_ERR(prio)) {
+ err = PTR_ERR(prio);
+ goto cleanup_root_ns;
}
- steering->esw_egress_acl_vports = total_vports;
+
+ vport_ns->vport_idx = vport_idx;
+ err = xa_insert(esw_acl_root_ns, vport_idx, vport_ns, GFP_KERNEL);
+ if (err)
+ goto cleanup_root_ns;
return 0;
cleanup_root_ns:
- for (i--; i >= 0; i--)
- cleanup_root_ns(steering->esw_egress_root_ns[i]);
- kfree(steering->esw_egress_root_ns);
- steering->esw_egress_root_ns = NULL;
+ cleanup_root_ns(vport_ns->root_ns);
+kfree_vport_ns:
+ kfree(vport_ns);
return err;
}
-void mlx5_fs_egress_acls_cleanup(struct mlx5_core_dev *dev)
+int mlx5_fs_vport_egress_acl_ns_add(struct mlx5_flow_steering *steering,
+ u16 vport_idx)
{
- struct mlx5_flow_steering *steering = dev->priv.steering;
- int i;
-
- if (!steering->esw_egress_root_ns)
- return;
-
- for (i = 0; i < steering->esw_egress_acl_vports; i++)
- cleanup_root_ns(steering->esw_egress_root_ns[i]);
-
- kfree(steering->esw_egress_root_ns);
- steering->esw_egress_root_ns = NULL;
+ return mlx5_fs_add_vport_acl_root_ns(steering,
+ &steering->esw_egress_root_ns,
+ FS_FT_ESW_EGRESS_ACL, vport_idx);
}
-int mlx5_fs_ingress_acls_init(struct mlx5_core_dev *dev, int total_vports)
+int mlx5_fs_vport_ingress_acl_ns_add(struct mlx5_flow_steering *steering,
+ u16 vport_idx)
{
- struct mlx5_flow_steering *steering = dev->priv.steering;
- int err;
- int i;
-
- steering->esw_ingress_root_ns =
- kcalloc(total_vports,
- sizeof(*steering->esw_ingress_root_ns),
- GFP_KERNEL);
- if (!steering->esw_ingress_root_ns)
- return -ENOMEM;
-
- for (i = 0; i < total_vports; i++) {
- err = init_ingress_acl_root_ns(steering, i);
- if (err)
- goto cleanup_root_ns;
- }
- steering->esw_ingress_acl_vports = total_vports;
- return 0;
-
-cleanup_root_ns:
- for (i--; i >= 0; i--)
- cleanup_root_ns(steering->esw_ingress_root_ns[i]);
- kfree(steering->esw_ingress_root_ns);
- steering->esw_ingress_root_ns = NULL;
- return err;
+ return mlx5_fs_add_vport_acl_root_ns(steering,
+ &steering->esw_ingress_root_ns,
+ FS_FT_ESW_INGRESS_ACL, vport_idx);
}
-void mlx5_fs_ingress_acls_cleanup(struct mlx5_core_dev *dev)
+void mlx5_fs_vport_egress_acl_ns_remove(struct mlx5_flow_steering *steering,
+ int vport_idx)
{
- struct mlx5_flow_steering *steering = dev->priv.steering;
- int i;
-
- if (!steering->esw_ingress_root_ns)
- return;
-
- for (i = 0; i < steering->esw_ingress_acl_vports; i++)
- cleanup_root_ns(steering->esw_ingress_root_ns[i]);
+ mlx5_fs_remove_vport_acl_root_ns(&steering->esw_egress_root_ns,
+ vport_idx);
+}
- kfree(steering->esw_ingress_root_ns);
- steering->esw_ingress_root_ns = NULL;
+void mlx5_fs_vport_ingress_acl_ns_remove(struct mlx5_flow_steering *steering,
+ int vport_idx)
+{
+ mlx5_fs_remove_vport_acl_root_ns(&steering->esw_ingress_root_ns,
+ vport_idx);
}
u32 mlx5_fs_get_capabilities(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type)
@@ -3818,6 +3804,11 @@ void mlx5_fs_core_cleanup(struct mlx5_core_dev *dev)
{
struct mlx5_flow_steering *steering = dev->priv.steering;
+ WARN_ON(!xa_empty(&steering->esw_egress_root_ns));
+ WARN_ON(!xa_empty(&steering->esw_ingress_root_ns));
+ xa_destroy(&steering->esw_egress_root_ns);
+ xa_destroy(&steering->esw_ingress_root_ns);
+
cleanup_root_ns(steering->root_ns);
cleanup_fdb_root_ns(steering);
cleanup_root_ns(steering->port_sel_root_ns);
@@ -3908,6 +3899,8 @@ int mlx5_fs_core_init(struct mlx5_core_dev *dev)
goto err;
}
+ xa_init(&steering->esw_egress_root_ns);
+ xa_init(&steering->esw_ingress_root_ns);
return 0;
err:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index 500826229b0b..7877d9a2118d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -151,16 +151,14 @@ struct mlx5_flow_steering {
struct mlx5_flow_root_namespace *root_ns;
struct mlx5_flow_root_namespace *fdb_root_ns;
struct mlx5_flow_namespace **fdb_sub_ns;
- struct mlx5_flow_root_namespace **esw_egress_root_ns;
- struct mlx5_flow_root_namespace **esw_ingress_root_ns;
+ struct xarray esw_egress_root_ns;
+ struct xarray esw_ingress_root_ns;
struct mlx5_flow_root_namespace *sniffer_tx_root_ns;
struct mlx5_flow_root_namespace *sniffer_rx_root_ns;
struct mlx5_flow_root_namespace *rdma_rx_root_ns;
struct mlx5_flow_root_namespace *rdma_tx_root_ns;
struct mlx5_flow_root_namespace *egress_root_ns;
struct mlx5_flow_root_namespace *port_sel_root_ns;
- int esw_egress_acl_vports;
- int esw_ingress_acl_vports;
struct mlx5_flow_root_namespace **rdma_transport_rx_root_ns;
struct mlx5_flow_root_namespace **rdma_transport_tx_root_ns;
int rdma_transport_rx_vports;
@@ -378,10 +376,14 @@ void mlx5_fs_core_free(struct mlx5_core_dev *dev);
int mlx5_fs_core_init(struct mlx5_core_dev *dev);
void mlx5_fs_core_cleanup(struct mlx5_core_dev *dev);
-int mlx5_fs_egress_acls_init(struct mlx5_core_dev *dev, int total_vports);
-void mlx5_fs_egress_acls_cleanup(struct mlx5_core_dev *dev);
-int mlx5_fs_ingress_acls_init(struct mlx5_core_dev *dev, int total_vports);
-void mlx5_fs_ingress_acls_cleanup(struct mlx5_core_dev *dev);
+int mlx5_fs_vport_egress_acl_ns_add(struct mlx5_flow_steering *steering,
+ u16 vport_idx);
+int mlx5_fs_vport_ingress_acl_ns_add(struct mlx5_flow_steering *steering,
+ u16 vport_idx);
+void mlx5_fs_vport_egress_acl_ns_remove(struct mlx5_flow_steering *steering,
+ int vport_idx);
+void mlx5_fs_vport_ingress_acl_ns_remove(struct mlx5_flow_steering *steering,
+ int vport_idx);
u32 mlx5_fs_get_capabilities(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index 57476487e31f..eeb4437975f2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -294,6 +294,12 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
return err;
}
+ if (MLX5_CAP_GEN(dev, psp)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_PSP);
+ if (err)
+ return err;
+ }
+
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index cf7a1edd0530..b63c5a221eb9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -669,54 +669,61 @@ static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work)
}
}
+#define MLX5_FW_REPORTER_ECPF_GRACEFUL_PERIOD 180000
+#define MLX5_FW_REPORTER_PF_GRACEFUL_PERIOD 60000
+#define MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD 30000
+#define MLX5_FW_REPORTER_DEFAULT_GRACEFUL_PERIOD \
+ MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD
+
+static
+const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ecpf_ops = {
+ .name = "fw_fatal",
+ .recover = mlx5_fw_fatal_reporter_recover,
+ .dump = mlx5_fw_fatal_reporter_dump,
+ .default_graceful_period =
+ MLX5_FW_REPORTER_ECPF_GRACEFUL_PERIOD,
+};
+
static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_pf_ops = {
.name = "fw_fatal",
.recover = mlx5_fw_fatal_reporter_recover,
.dump = mlx5_fw_fatal_reporter_dump,
+ .default_graceful_period = MLX5_FW_REPORTER_PF_GRACEFUL_PERIOD,
};
static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = {
.name = "fw_fatal",
.recover = mlx5_fw_fatal_reporter_recover,
+ .default_graceful_period =
+ MLX5_FW_REPORTER_DEFAULT_GRACEFUL_PERIOD,
};
-#define MLX5_FW_REPORTER_ECPF_GRACEFUL_PERIOD 180000
-#define MLX5_FW_REPORTER_PF_GRACEFUL_PERIOD 60000
-#define MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD 30000
-#define MLX5_FW_REPORTER_DEFAULT_GRACEFUL_PERIOD MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD
-
void mlx5_fw_reporters_create(struct mlx5_core_dev *dev)
{
const struct devlink_health_reporter_ops *fw_fatal_ops;
struct mlx5_core_health *health = &dev->priv.health;
const struct devlink_health_reporter_ops *fw_ops;
struct devlink *devlink = priv_to_devlink(dev);
- u64 grace_period;
- fw_fatal_ops = &mlx5_fw_fatal_reporter_pf_ops;
fw_ops = &mlx5_fw_reporter_pf_ops;
if (mlx5_core_is_ecpf(dev)) {
- grace_period = MLX5_FW_REPORTER_ECPF_GRACEFUL_PERIOD;
+ fw_fatal_ops = &mlx5_fw_fatal_reporter_ecpf_ops;
} else if (mlx5_core_is_pf(dev)) {
- grace_period = MLX5_FW_REPORTER_PF_GRACEFUL_PERIOD;
+ fw_fatal_ops = &mlx5_fw_fatal_reporter_pf_ops;
} else {
/* VF or SF */
- grace_period = MLX5_FW_REPORTER_DEFAULT_GRACEFUL_PERIOD;
fw_fatal_ops = &mlx5_fw_fatal_reporter_ops;
fw_ops = &mlx5_fw_reporter_ops;
}
- health->fw_reporter =
- devl_health_reporter_create(devlink, fw_ops, 0, dev);
+ health->fw_reporter = devl_health_reporter_create(devlink, fw_ops, dev);
if (IS_ERR(health->fw_reporter))
mlx5_core_warn(dev, "Failed to create fw reporter, err = %ld\n",
PTR_ERR(health->fw_reporter));
- health->fw_fatal_reporter =
- devl_health_reporter_create(devlink,
- fw_fatal_ops,
- grace_period,
- dev);
+ health->fw_fatal_reporter = devl_health_reporter_create(devlink,
+ fw_fatal_ops,
+ dev);
if (IS_ERR(health->fw_fatal_reporter))
mlx5_core_warn(dev, "Failed to create fw fatal reporter, err = %ld\n",
PTR_ERR(health->fw_fatal_reporter));
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 8517d4e5d5ef..0951c7cc1b5f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1798,6 +1798,7 @@ static const int types[] = {
MLX5_CAP_VDPA_EMULATION,
MLX5_CAP_IPSEC,
MLX5_CAP_PORT_SELECTION,
+ MLX5_CAP_PSP,
MLX5_CAP_MACSEC,
MLX5_CAP_ADV_VIRTUALIZATION,
MLX5_CAP_CRYPTO,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 9d3504f5abfa..082259b56816 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -449,8 +449,6 @@ int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap
#define mlx5_vport_get_other_func_general_cap(dev, vport, out) \
mlx5_vport_get_other_func_cap(dev, vport, out, MLX5_CAP_GENERAL)
-int mlx5_vport_get_vhca_id(struct mlx5_core_dev *dev, u16 vport, u16 *vhca_id);
-
static inline u32 mlx5_sriov_get_vf_total_msix(struct pci_dev *pdev)
{
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/diag/dev_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/diag/dev_tracepoint.h
index 0537de86f981..9b0f44253f33 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/diag/dev_tracepoint.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/diag/dev_tracepoint.h
@@ -28,7 +28,7 @@ DECLARE_EVENT_CLASS(mlx5_sf_dev_template,
__entry->hw_fn_id = sfdev->fn_id;
__entry->sfnum = sfdev->sfnum;
),
- TP_printk("(%s) sfdev=%pK aux_id=%d hw_id=0x%x sfnum=%u\n",
+ TP_printk("(%s) sfdev=%p aux_id=%d hw_id=0x%x sfnum=%u\n",
__get_str(devname), __entry->sfdev,
__entry->aux_id, __entry->hw_fn_id,
__entry->sfnum)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c
index 0bdcab2e5cf3..f22eaf506d28 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c
@@ -1200,34 +1200,20 @@ out:
int mlx5hws_cmd_query_gvmi(struct mlx5_core_dev *mdev, bool other_function,
u16 vport_number, u16 *gvmi)
{
- bool ec_vf_func = other_function ? mlx5_core_is_ec_vf_vport(mdev, vport_number) : false;
- u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {};
- int out_size;
- void *out;
int err;
- out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out);
- out = kzalloc(out_size, GFP_KERNEL);
- if (!out)
- return -ENOMEM;
-
- MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
- MLX5_SET(query_hca_cap_in, in, other_function, other_function);
- MLX5_SET(query_hca_cap_in, in, function_id,
- mlx5_vport_to_func_id(mdev, vport_number, ec_vf_func));
- MLX5_SET(query_hca_cap_in, in, ec_vf_function, ec_vf_func);
- MLX5_SET(query_hca_cap_in, in, op_mod,
- MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1 | HCA_CAP_OPMOD_GET_CUR);
+ if (!other_function) {
+ /* self vhca_id */
+ *gvmi = MLX5_CAP_GEN(mdev, vhca_id);
+ return 0;
+ }
- err = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out);
+ err = mlx5_vport_get_vhca_id(mdev, vport_number, gvmi);
if (err) {
- kfree(out);
+ mlx5_core_err(mdev, "Failed to get vport vhca id for vport %d\n",
+ vport_number);
return err;
}
- *gvmi = MLX5_GET(query_hca_cap_out, out, capability.cmd_hca_cap.vhca_id);
-
- kfree(out);
-
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c
index c6436c3a7a83..c4bb6967f74d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c
@@ -1280,7 +1280,7 @@ hws_definer_conv_misc2(struct mlx5hws_definer_conv_data *cd,
struct mlx5hws_definer_fc *fc = cd->fc;
struct mlx5hws_definer_fc *curr_fc;
- if (HWS_IS_FLD_SET_SZ(match_param, misc_parameters_2.reserved_at_1a0, 0x8) ||
+ if (HWS_IS_FLD_SET_SZ(match_param, misc_parameters_2.psp_syndrome, 0x8) ||
HWS_IS_FLD_SET_SZ(match_param,
misc_parameters_2.ipsec_next_header, 0x8) ||
HWS_IS_FLD_SET_SZ(match_param, misc_parameters_2.reserved_at_1c0, 0x40) ||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_cmd.c
index baefb9a3fa05..1ebb2b15c080 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_cmd.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2019 Mellanox Technologies. */
#include "dr_types.h"
+#include "eswitch.h"
int mlx5dr_cmd_query_esw_vport_context(struct mlx5_core_dev *mdev,
bool other_vport,
@@ -34,34 +35,21 @@ int mlx5dr_cmd_query_esw_vport_context(struct mlx5_core_dev *mdev,
int mlx5dr_cmd_query_gvmi(struct mlx5_core_dev *mdev, bool other_vport,
u16 vport_number, u16 *gvmi)
{
- bool ec_vf_func = other_vport ? mlx5_core_is_ec_vf_vport(mdev, vport_number) : false;
- u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {};
- int out_size;
- void *out;
int err;
- out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out);
- out = kzalloc(out_size, GFP_KERNEL);
- if (!out)
- return -ENOMEM;
-
- MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
- MLX5_SET(query_hca_cap_in, in, other_function, other_vport);
- MLX5_SET(query_hca_cap_in, in, function_id, mlx5_vport_to_func_id(mdev, vport_number, ec_vf_func));
- MLX5_SET(query_hca_cap_in, in, ec_vf_function, ec_vf_func);
- MLX5_SET(query_hca_cap_in, in, op_mod,
- MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1 |
- HCA_CAP_OPMOD_GET_CUR);
+ if (!other_vport) {
+ /* self vhca_id */
+ *gvmi = MLX5_CAP_GEN(mdev, vhca_id);
+ return 0;
+ }
- err = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out);
+ err = mlx5_vport_get_vhca_id(mdev, vport_number, gvmi);
if (err) {
- kfree(out);
+ mlx5_core_err(mdev, "Failed to get vport vhca id for vport %d\n",
+ vport_number);
return err;
}
- *gvmi = MLX5_GET(query_hca_cap_out, out, capability.cmd_hca_cap.vhca_id);
-
- kfree(out);
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index da5c24fc7b30..2ed2e530b07d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -36,6 +36,7 @@
#include <linux/mlx5/vport.h>
#include <linux/mlx5/eswitch.h>
#include "mlx5_core.h"
+#include "eswitch.h"
#include "sf/sf.h"
/* Mutex to hold while enabling or disabling RoCE */
@@ -1189,18 +1190,44 @@ u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev)
}
EXPORT_SYMBOL_GPL(mlx5_query_nic_system_image_guid);
+static bool mlx5_vport_use_vhca_id_as_func_id(struct mlx5_core_dev *dev,
+ u16 vport_num, u16 *vhca_id)
+{
+ if (!MLX5_CAP_GEN_2(dev, function_id_type_vhca_id))
+ return false;
+
+ return mlx5_esw_vport_vhca_id(dev->priv.eswitch, vport_num, vhca_id);
+}
+
int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 vport, void *out,
u16 opmod)
{
- bool ec_vf_func = mlx5_core_is_ec_vf_vport(dev, vport);
u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)] = {};
+ u16 vhca_id = 0, function_id = 0;
+ bool ec_vf_func = false;
+
+ /* if this vport is referring to a vport on the ec PF (embedded cpu )
+ * let the FW know which domain we are querying since vport numbers or
+ * function_ids are not unique across the different PF domains,
+ * unless we use vhca_id as the function_id below.
+ */
+ ec_vf_func = mlx5_core_is_ec_vf_vport(dev, vport);
+ function_id = mlx5_vport_to_func_id(dev, vport, ec_vf_func);
+
+ if (mlx5_vport_use_vhca_id_as_func_id(dev, vport, &vhca_id)) {
+ MLX5_SET(query_hca_cap_in, in, function_id_type, 1);
+ function_id = vhca_id;
+ ec_vf_func = false;
+ mlx5_core_dbg(dev, "%s using vhca_id as function_id for vport %d vhca_id 0x%x\n",
+ __func__, vport, vhca_id);
+ }
opmod = (opmod << 1) | (HCA_CAP_OPMOD_GET_MAX & 0x01);
MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
- MLX5_SET(query_hca_cap_in, in, function_id, mlx5_vport_to_func_id(dev, vport, ec_vf_func));
MLX5_SET(query_hca_cap_in, in, other_function, true);
MLX5_SET(query_hca_cap_in, in, ec_vf_function, ec_vf_func);
+ MLX5_SET(query_hca_cap_in, in, function_id, function_id);
return mlx5_cmd_exec_inout(dev, query_hca_cap, in, out);
}
EXPORT_SYMBOL_GPL(mlx5_vport_get_other_func_cap);
@@ -1212,7 +1239,9 @@ int mlx5_vport_get_vhca_id(struct mlx5_core_dev *dev, u16 vport, u16 *vhca_id)
void *hca_caps;
int err;
- *vhca_id = 0;
+ /* try get vhca_id via eswitch */
+ if (mlx5_esw_vport_vhca_id(dev->priv.eswitch, vport, vhca_id))
+ return 0;
query_ctx = kzalloc(query_out_sz, GFP_KERNEL);
if (!query_ctx)
@@ -1229,12 +1258,14 @@ out_free:
kfree(query_ctx);
return err;
}
+EXPORT_SYMBOL_GPL(mlx5_vport_get_vhca_id);
int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap,
u16 vport, u16 opmod)
{
- bool ec_vf_func = mlx5_core_is_ec_vf_vport(dev, vport);
int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
+ u16 vhca_id = 0, function_id = 0;
+ bool ec_vf_func = false;
void *set_hca_cap;
void *set_ctx;
int ret;
@@ -1243,14 +1274,29 @@ int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap
if (!set_ctx)
return -ENOMEM;
+ /* if this vport is referring to a vport on the ec PF (embedded cpu )
+ * let the FW know which domain we are querying since vport numbers or
+ * function_ids are not unique across the different PF domains,
+ * unless we use vhca_id as the function_id below.
+ */
+ ec_vf_func = mlx5_core_is_ec_vf_vport(dev, vport);
+ function_id = mlx5_vport_to_func_id(dev, vport, ec_vf_func);
+
+ if (mlx5_vport_use_vhca_id_as_func_id(dev, vport, &vhca_id)) {
+ MLX5_SET(set_hca_cap_in, set_ctx, function_id_type, 1);
+ function_id = vhca_id;
+ ec_vf_func = false;
+ mlx5_core_dbg(dev, "%s using vhca_id as function_id for vport %d vhca_id 0x%x\n",
+ __func__, vport, vhca_id);
+ }
+
MLX5_SET(set_hca_cap_in, set_ctx, opcode, MLX5_CMD_OP_SET_HCA_CAP);
MLX5_SET(set_hca_cap_in, set_ctx, op_mod, opmod << 1);
set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
memcpy(set_hca_cap, hca_cap, MLX5_ST_SZ_BYTES(cmd_hca_cap));
- MLX5_SET(set_hca_cap_in, set_ctx, function_id,
- mlx5_vport_to_func_id(dev, vport, ec_vf_func));
MLX5_SET(set_hca_cap_in, set_ctx, other_function, true);
MLX5_SET(set_hca_cap_in, set_ctx, ec_vf_function, ec_vf_func);
+ MLX5_SET(set_hca_cap_in, set_ctx, function_id, function_id);
ret = mlx5_cmd_exec_in(dev, set_hca_cap, set_ctx);
kfree(set_ctx);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index 2bb2b77351bd..980f3223f124 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -2043,7 +2043,7 @@ static int mlxsw_core_health_init(struct mlxsw_core *mlxsw_core)
return 0;
fw_fatal = devl_health_reporter_create(devlink, &mlxsw_core_health_fw_fatal_ops,
- 0, mlxsw_core);
+ mlxsw_core);
if (IS_ERR(fw_fatal)) {
dev_err(mlxsw_core->bus_info->dev, "Failed to create fw fatal reporter");
return PTR_ERR(fw_fatal);
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic.h b/drivers/net/ethernet/meta/fbnic/fbnic.h
index c376e06880c9..311c7dda911a 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic.h
+++ b/drivers/net/ethernet/meta/fbnic/fbnic.h
@@ -84,9 +84,6 @@ struct fbnic_dev {
/* Local copy of hardware statistics */
struct fbnic_hw_stats hw_stats;
- /* Lock protecting access to hw_stats */
- spinlock_t hw_stats_lock;
-
struct fbnic_fw_log fw_log;
};
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_csr.h b/drivers/net/ethernet/meta/fbnic/fbnic_csr.h
index a81db842aa53..e2fffe1597e9 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_csr.h
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_csr.h
@@ -790,6 +790,21 @@ enum {
#define FBNIC_CSR_END_PCS 0x10668 /* CSR section delimiter */
#define FBNIC_CSR_START_RSFEC 0x10800 /* CSR section delimiter */
+
+/* We have 4 RSFEC engines present in our part, however we are only using 1.
+ * As such only CCW(0) and NCCW(0) will never be non-zero and the other
+ * registers can be ignored.
+ */
+#define FBNIC_RSFEC_CCW_LO(n) (0x10802 + 8 * (n)) /* 0x42008 + 32*n */
+#define FBNIC_RSFEC_CCW_HI(n) (0x10803 + 8 * (n)) /* 0x4200c + 32*n */
+#define FBNIC_RSFEC_NCCW_LO(n) (0x10804 + 8 * (n)) /* 0x42010 + 32*n */
+#define FBNIC_RSFEC_NCCW_HI(n) (0x10805 + 8 * (n)) /* 0x42014 + 32*n */
+
+#define FBNIC_PCS_MAX_LANES 4
+#define FBNIC_PCS_SYMBLERR_LO(n) \
+ (0x10880 + 2 * (n)) /* 0x42200 + 8*n */
+#define FBNIC_PCS_SYMBLERR_HI(n) \
+ (0x10881 + 2 * (n)) /* 0x42204 + 8*n */
#define FBNIC_CSR_END_RSFEC 0x108c8 /* CSR section delimiter */
/* MAC MAC registers (ASIC only) */
@@ -829,6 +844,10 @@ enum {
#define FBNIC_CSR_END_SIG 0x1184e /* CSR section delimiter */
#define FBNIC_CSR_START_MAC_STAT 0x11a00
+#define FBNIC_MAC_STAT_RX_XOFF_STB_L 0x11a00 /* 0x46800 */
+#define FBNIC_MAC_STAT_RX_XOFF_STB_H 0x11a01 /* 0x46804 */
+#define FBNIC_MAC_STAT_TX_XOFF_STB_L 0x11a04 /* 0x46810 */
+#define FBNIC_MAC_STAT_TX_XOFF_STB_H 0x11a05 /* 0x46814 */
#define FBNIC_MAC_STAT_RX_BYTE_COUNT_L 0x11a08 /* 0x46820 */
#define FBNIC_MAC_STAT_RX_BYTE_COUNT_H 0x11a09 /* 0x46824 */
#define FBNIC_MAC_STAT_RX_ALIGN_ERROR_L 0x11a0a /* 0x46828 */
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
index dc7ba8d5fc43..b4ff98ee2051 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
@@ -2,6 +2,7 @@
/* Copyright (c) Meta Platforms, Inc. and affiliates. */
#include <linux/ethtool.h>
+#include <linux/ethtool_netlink.h>
#include <linux/netdevice.h>
#include <linux/pci.h>
#include <net/ipv6.h>
@@ -111,6 +112,20 @@ static const struct fbnic_stat fbnic_gstrings_hw_q_stats[] = {
FBNIC_HW_RXB_DEQUEUE_STATS_LEN * FBNIC_RXB_DEQUEUE_INDICES + \
FBNIC_HW_Q_STATS_LEN * FBNIC_MAX_QUEUES)
+#define FBNIC_QUEUE_STAT(name, stat) \
+ FBNIC_STAT_FIELDS(fbnic_ring, name, stat)
+
+static const struct fbnic_stat fbnic_gstrings_xdp_stats[] = {
+ FBNIC_QUEUE_STAT("xdp_tx_queue_%u_packets", stats.packets),
+ FBNIC_QUEUE_STAT("xdp_tx_queue_%u_bytes", stats.bytes),
+ FBNIC_QUEUE_STAT("xdp_tx_queue_%u_dropped", stats.dropped),
+};
+
+#define FBNIC_XDP_STATS_LEN ARRAY_SIZE(fbnic_gstrings_xdp_stats)
+
+#define FBNIC_STATS_LEN \
+ (FBNIC_HW_STATS_LEN + FBNIC_XDP_STATS_LEN * FBNIC_MAX_XDPQS)
+
static void
fbnic_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo)
{
@@ -160,6 +175,7 @@ static void fbnic_clone_swap_cfg(struct fbnic_net *orig,
swap(clone->num_rx_queues, orig->num_rx_queues);
swap(clone->num_tx_queues, orig->num_tx_queues);
swap(clone->num_napi, orig->num_napi);
+ swap(clone->hds_thresh, orig->hds_thresh);
}
static void fbnic_aggregate_vector_counters(struct fbnic_net *fbn,
@@ -277,15 +293,21 @@ fbnic_get_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
ring->rx_mini_pending = fbn->hpq_size;
ring->rx_jumbo_pending = fbn->ppq_size;
ring->tx_pending = fbn->txq_size;
+
+ kernel_ring->tcp_data_split = ETHTOOL_TCP_DATA_SPLIT_ENABLED;
+ kernel_ring->hds_thresh_max = FBNIC_HDS_THRESH_MAX;
+ kernel_ring->hds_thresh = fbn->hds_thresh;
}
static void fbnic_set_rings(struct fbnic_net *fbn,
- struct ethtool_ringparam *ring)
+ struct ethtool_ringparam *ring,
+ struct kernel_ethtool_ringparam *kernel_ring)
{
fbn->rcq_size = ring->rx_pending;
fbn->hpq_size = ring->rx_mini_pending;
fbn->ppq_size = ring->rx_jumbo_pending;
fbn->txq_size = ring->tx_pending;
+ fbn->hds_thresh = kernel_ring->hds_thresh;
}
static int
@@ -316,8 +338,24 @@ fbnic_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
return -EINVAL;
}
+ if (kernel_ring->tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_DISABLED) {
+ NL_SET_ERR_MSG_MOD(extack, "Cannot disable TCP data split");
+ return -EINVAL;
+ }
+
+ /* If an XDP program is attached, we should check for potential frame
+ * splitting. If the new HDS threshold can cause splitting, we should
+ * only allow if the attached XDP program can handle frags.
+ */
+ if (fbnic_check_split_frames(fbn->xdp_prog, netdev->mtu,
+ kernel_ring->hds_thresh)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Use higher HDS threshold or multi-buf capable program");
+ return -EINVAL;
+ }
+
if (!netif_running(netdev)) {
- fbnic_set_rings(fbn, ring);
+ fbnic_set_rings(fbn, ring, kernel_ring);
return 0;
}
@@ -325,7 +363,7 @@ fbnic_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
if (!clone)
return -ENOMEM;
- fbnic_set_rings(clone, ring);
+ fbnic_set_rings(clone, ring, kernel_ring);
err = fbnic_alloc_napi_vectors(clone);
if (err)
@@ -398,6 +436,16 @@ static void fbnic_get_rxb_dequeue_strings(u8 **data, unsigned int idx)
ethtool_sprintf(data, stat->string, idx);
}
+static void fbnic_get_xdp_queue_strings(u8 **data, unsigned int idx)
+{
+ const struct fbnic_stat *stat;
+ int i;
+
+ stat = fbnic_gstrings_xdp_stats;
+ for (i = 0; i < FBNIC_XDP_STATS_LEN; i++, stat++)
+ ethtool_sprintf(data, stat->string, idx);
+}
+
static void fbnic_get_strings(struct net_device *dev, u32 sset, u8 *data)
{
const struct fbnic_stat *stat;
@@ -423,6 +471,9 @@ static void fbnic_get_strings(struct net_device *dev, u32 sset, u8 *data)
for (i = 0; i < FBNIC_HW_Q_STATS_LEN; i++, stat++)
ethtool_sprintf(&data, stat->string, idx);
}
+
+ for (i = 0; i < FBNIC_MAX_XDPQS; i++)
+ fbnic_get_xdp_queue_strings(&data, i);
break;
}
}
@@ -440,6 +491,24 @@ static void fbnic_report_hw_stats(const struct fbnic_stat *stat,
}
}
+static void fbnic_get_xdp_queue_stats(struct fbnic_ring *ring, u64 **data)
+{
+ const struct fbnic_stat *stat;
+ int i;
+
+ if (!ring) {
+ *data += FBNIC_XDP_STATS_LEN;
+ return;
+ }
+
+ stat = fbnic_gstrings_xdp_stats;
+ for (i = 0; i < FBNIC_XDP_STATS_LEN; i++, stat++, (*data)++) {
+ u8 *p = (u8 *)ring + stat->offset;
+
+ **data = *(u64 *)p;
+ }
+}
+
static void fbnic_get_ethtool_stats(struct net_device *dev,
struct ethtool_stats *stats, u64 *data)
{
@@ -449,7 +518,7 @@ static void fbnic_get_ethtool_stats(struct net_device *dev,
fbnic_get_hw_stats(fbn->fbd);
- spin_lock(&fbd->hw_stats_lock);
+ spin_lock(&fbd->hw_stats.lock);
fbnic_report_hw_stats(fbnic_gstrings_hw_stats, &fbd->hw_stats,
FBNIC_HW_FIXED_STATS_LEN, &data);
@@ -486,14 +555,17 @@ static void fbnic_get_ethtool_stats(struct net_device *dev,
fbnic_report_hw_stats(fbnic_gstrings_hw_q_stats, hw_q,
FBNIC_HW_Q_STATS_LEN, &data);
}
- spin_unlock(&fbd->hw_stats_lock);
+ spin_unlock(&fbd->hw_stats.lock);
+
+ for (i = 0; i < FBNIC_MAX_XDPQS; i++)
+ fbnic_get_xdp_queue_stats(fbn->tx[i + FBNIC_MAX_TXQS], &data);
}
static int fbnic_get_sset_count(struct net_device *dev, int sset)
{
switch (sset) {
case ETH_SS_STATS:
- return FBNIC_HW_STATS_LEN;
+ return FBNIC_STATS_LEN;
default:
return -EOPNOTSUPP;
}
@@ -1310,7 +1382,7 @@ fbnic_get_rss_hash_opts(struct net_device *netdev,
#define FBNIC_L2_HASH_OPTIONS \
(RXH_L2DA | RXH_DISCARD)
#define FBNIC_L3_HASH_OPTIONS \
- (FBNIC_L2_HASH_OPTIONS | RXH_IP_SRC | RXH_IP_DST)
+ (FBNIC_L2_HASH_OPTIONS | RXH_IP_SRC | RXH_IP_DST | RXH_IP6_FL)
#define FBNIC_L4_HASH_OPTIONS \
(FBNIC_L3_HASH_OPTIONS | RXH_L4_B_0_1 | RXH_L4_B_2_3)
@@ -1570,6 +1642,62 @@ static void fbnic_set_counter(u64 *stat, struct fbnic_stat_counter *counter)
}
static void
+fbnic_get_pause_stats(struct net_device *netdev,
+ struct ethtool_pause_stats *pause_stats)
+{
+ struct fbnic_net *fbn = netdev_priv(netdev);
+ struct fbnic_mac_stats *mac_stats;
+ struct fbnic_dev *fbd = fbn->fbd;
+
+ mac_stats = &fbd->hw_stats.mac;
+
+ fbd->mac->get_pause_stats(fbd, false, &mac_stats->pause);
+
+ pause_stats->tx_pause_frames = mac_stats->pause.tx_pause_frames.value;
+ pause_stats->rx_pause_frames = mac_stats->pause.rx_pause_frames.value;
+}
+
+static void
+fbnic_get_fec_stats(struct net_device *netdev,
+ struct ethtool_fec_stats *fec_stats)
+{
+ struct fbnic_net *fbn = netdev_priv(netdev);
+ struct fbnic_phy_stats *phy_stats;
+ struct fbnic_dev *fbd = fbn->fbd;
+
+ fbnic_get_hw_stats32(fbd);
+ phy_stats = &fbd->hw_stats.phy;
+
+ spin_lock(&fbd->hw_stats.lock);
+ fec_stats->corrected_blocks.total =
+ phy_stats->fec.corrected_blocks.value;
+ fec_stats->uncorrectable_blocks.total =
+ phy_stats->fec.uncorrectable_blocks.value;
+ spin_unlock(&fbd->hw_stats.lock);
+}
+
+static void
+fbnic_get_eth_phy_stats(struct net_device *netdev,
+ struct ethtool_eth_phy_stats *eth_phy_stats)
+{
+ struct fbnic_net *fbn = netdev_priv(netdev);
+ struct fbnic_phy_stats *phy_stats;
+ struct fbnic_dev *fbd = fbn->fbd;
+ u64 total = 0;
+ int i;
+
+ fbnic_get_hw_stats32(fbd);
+ phy_stats = &fbd->hw_stats.phy;
+
+ spin_lock(&fbd->hw_stats.lock);
+ for (i = 0; i < FBNIC_PCS_MAX_LANES; i++)
+ total += phy_stats->pcs.SymbolErrorDuringCarrier.lanes[i].value;
+
+ eth_phy_stats->SymbolErrorDuringCarrier = total;
+ spin_unlock(&fbd->hw_stats.lock);
+}
+
+static void
fbnic_get_eth_mac_stats(struct net_device *netdev,
struct ethtool_eth_mac_stats *eth_mac_stats)
{
@@ -1678,6 +1806,8 @@ fbnic_get_rmon_stats(struct net_device *netdev,
static const struct ethtool_ops fbnic_ethtool_ops = {
.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
ETHTOOL_COALESCE_RX_MAX_FRAMES,
+ .supported_ring_params = ETHTOOL_RING_USE_TCP_DATA_SPLIT |
+ ETHTOOL_RING_USE_HDS_THRS,
.rxfh_max_num_contexts = FBNIC_RPC_RSS_TBL_COUNT,
.get_drvinfo = fbnic_get_drvinfo,
.get_regs_len = fbnic_get_regs_len,
@@ -1687,6 +1817,7 @@ static const struct ethtool_ops fbnic_ethtool_ops = {
.set_coalesce = fbnic_set_coalesce,
.get_ringparam = fbnic_get_ringparam,
.set_ringparam = fbnic_set_ringparam,
+ .get_pause_stats = fbnic_get_pause_stats,
.get_pauseparam = fbnic_phylink_get_pauseparam,
.set_pauseparam = fbnic_phylink_set_pauseparam,
.get_strings = fbnic_get_strings,
@@ -1708,7 +1839,9 @@ static const struct ethtool_ops fbnic_ethtool_ops = {
.get_ts_info = fbnic_get_ts_info,
.get_ts_stats = fbnic_get_ts_stats,
.get_link_ksettings = fbnic_phylink_ethtool_ksettings_get,
+ .get_fec_stats = fbnic_get_fec_stats,
.get_fecparam = fbnic_phylink_get_fecparam,
+ .get_eth_phy_stats = fbnic_get_eth_phy_stats,
.get_eth_mac_stats = fbnic_get_eth_mac_stats,
.get_eth_ctrl_stats = fbnic_get_eth_ctrl_stats,
.get_rmon_stats = fbnic_get_rmon_stats,
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c
index 0c55be7d2547..6e580654493c 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c
@@ -653,6 +653,9 @@ static int fbnic_fw_parse_cap_resp(void *opaque, struct fbnic_tlv_msg **results)
fbd->fw_cap.anti_rollback_version =
fta_get_uint(results, FBNIC_FW_CAP_RESP_ANTI_ROLLBACK_VERSION);
+ /* Always assume we need a BMC reinit */
+ fbd->fw_cap.need_bmc_tcam_reinit = true;
+
return 0;
}
@@ -1410,6 +1413,109 @@ void fbnic_mbx_flush_tx(struct fbnic_dev *fbd)
} while (time_is_after_jiffies(timeout));
}
+int fbnic_fw_xmit_rpc_macda_sync(struct fbnic_dev *fbd)
+{
+ struct fbnic_tlv_msg *mac_array;
+ int i, addr_count = 0, err;
+ struct fbnic_tlv_msg *msg;
+ u32 rx_flags = 0;
+
+ /* Nothing to do if there is no FW to sync with */
+ if (!fbd->mbx[FBNIC_IPC_MBX_TX_IDX].ready)
+ return 0;
+
+ msg = fbnic_tlv_msg_alloc(FBNIC_TLV_MSG_ID_RPC_MAC_SYNC_REQ);
+ if (!msg)
+ return -ENOMEM;
+
+ mac_array = fbnic_tlv_attr_nest_start(msg,
+ FBNIC_FW_RPC_MAC_SYNC_UC_ARRAY);
+ if (!mac_array)
+ goto free_message_nospc;
+
+ /* Populate the unicast MAC addrs and capture PROMISC/ALLMULTI flags */
+ for (addr_count = 0, i = FBNIC_RPC_TCAM_MACDA_PROMISC_IDX;
+ i >= fbd->mac_addr_boundary; i--) {
+ struct fbnic_mac_addr *mac_addr = &fbd->mac_addr[i];
+
+ if (mac_addr->state != FBNIC_TCAM_S_VALID)
+ continue;
+ if (test_bit(FBNIC_MAC_ADDR_T_ALLMULTI, mac_addr->act_tcam))
+ rx_flags |= FW_RPC_MAC_SYNC_RX_FLAGS_ALLMULTI;
+ if (test_bit(FBNIC_MAC_ADDR_T_PROMISC, mac_addr->act_tcam))
+ rx_flags |= FW_RPC_MAC_SYNC_RX_FLAGS_PROMISC;
+ if (!test_bit(FBNIC_MAC_ADDR_T_UNICAST, mac_addr->act_tcam))
+ continue;
+ if (addr_count == FW_RPC_MAC_SYNC_UC_ARRAY_SIZE) {
+ rx_flags |= FW_RPC_MAC_SYNC_RX_FLAGS_PROMISC;
+ continue;
+ }
+
+ err = fbnic_tlv_attr_put_value(mac_array,
+ FBNIC_FW_RPC_MAC_SYNC_MAC_ADDR,
+ mac_addr->value.addr8,
+ ETH_ALEN);
+ if (err)
+ goto free_message;
+ addr_count++;
+ }
+
+ /* Close array */
+ fbnic_tlv_attr_nest_stop(msg);
+
+ mac_array = fbnic_tlv_attr_nest_start(msg,
+ FBNIC_FW_RPC_MAC_SYNC_MC_ARRAY);
+ if (!mac_array)
+ goto free_message_nospc;
+
+ /* Repeat for multicast addrs, record BROADCAST/ALLMULTI flags */
+ for (addr_count = 0, i = FBNIC_RPC_TCAM_MACDA_BROADCAST_IDX;
+ i < fbd->mac_addr_boundary; i++) {
+ struct fbnic_mac_addr *mac_addr = &fbd->mac_addr[i];
+
+ if (mac_addr->state != FBNIC_TCAM_S_VALID)
+ continue;
+ if (test_bit(FBNIC_MAC_ADDR_T_BROADCAST, mac_addr->act_tcam))
+ rx_flags |= FW_RPC_MAC_SYNC_RX_FLAGS_BROADCAST;
+ if (test_bit(FBNIC_MAC_ADDR_T_ALLMULTI, mac_addr->act_tcam))
+ rx_flags |= FW_RPC_MAC_SYNC_RX_FLAGS_ALLMULTI;
+ if (!test_bit(FBNIC_MAC_ADDR_T_MULTICAST, mac_addr->act_tcam))
+ continue;
+ if (addr_count == FW_RPC_MAC_SYNC_MC_ARRAY_SIZE) {
+ rx_flags |= FW_RPC_MAC_SYNC_RX_FLAGS_ALLMULTI;
+ continue;
+ }
+
+ err = fbnic_tlv_attr_put_value(mac_array,
+ FBNIC_FW_RPC_MAC_SYNC_MAC_ADDR,
+ mac_addr->value.addr8,
+ ETH_ALEN);
+ if (err)
+ goto free_message;
+ addr_count++;
+ }
+
+ /* Close array */
+ fbnic_tlv_attr_nest_stop(msg);
+
+ /* Report flags at end of list */
+ err = fbnic_tlv_attr_put_int(msg, FBNIC_FW_RPC_MAC_SYNC_RX_FLAGS,
+ rx_flags);
+ if (err)
+ goto free_message;
+
+ /* Send message of to FW notifying it of current RPC config */
+ err = fbnic_mbx_map_tlv_msg(fbd, msg);
+ if (err)
+ goto free_message;
+ return 0;
+free_message_nospc:
+ err = -ENOSPC;
+free_message:
+ free_page((unsigned long)msg);
+ return err;
+}
+
void fbnic_get_fw_ver_commit_str(struct fbnic_dev *fbd, char *fw_version,
const size_t str_sz)
{
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.h b/drivers/net/ethernet/meta/fbnic/fbnic_fw.h
index fde331696fdd..ec67b80809b0 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.h
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.h
@@ -51,8 +51,10 @@ struct fbnic_fw_cap {
} stored;
u8 active_slot;
u8 bmc_mac_addr[4][ETH_ALEN];
- u8 bmc_present : 1;
- u8 all_multi : 1;
+ u8 bmc_present : 1;
+ u8 need_bmc_tcam_reinit : 1;
+ u8 need_bmc_macda_sync : 1;
+ u8 all_multi : 1;
u8 link_speed;
u8 link_fec;
u32 anti_rollback_version;
@@ -97,6 +99,7 @@ int fbnic_fw_xmit_tsene_read_msg(struct fbnic_dev *fbd,
struct fbnic_fw_completion *cmpl_data);
int fbnic_fw_xmit_send_logs(struct fbnic_dev *fbd, bool enable,
bool send_log_history);
+int fbnic_fw_xmit_rpc_macda_sync(struct fbnic_dev *fbd);
struct fbnic_fw_completion *fbnic_fw_alloc_cmpl(u32 msg_type);
void fbnic_fw_put_cmpl(struct fbnic_fw_completion *cmpl_data);
@@ -143,6 +146,7 @@ enum {
FBNIC_TLV_MSG_ID_LOG_SEND_LOGS_REQ = 0x43,
FBNIC_TLV_MSG_ID_LOG_MSG_REQ = 0x44,
FBNIC_TLV_MSG_ID_LOG_MSG_RESP = 0x45,
+ FBNIC_TLV_MSG_ID_RPC_MAC_SYNC_REQ = 0x46,
};
#define FBNIC_FW_CAP_RESP_VERSION_MAJOR CSR_GENMASK(31, 24)
@@ -235,4 +239,19 @@ enum {
FBNIC_FW_LOG_MSG_MAX
};
+enum {
+ FBNIC_FW_RPC_MAC_SYNC_RX_FLAGS = 0x0,
+ FBNIC_FW_RPC_MAC_SYNC_UC_ARRAY = 0x1,
+ FBNIC_FW_RPC_MAC_SYNC_MC_ARRAY = 0x2,
+ FBNIC_FW_RPC_MAC_SYNC_MAC_ADDR = 0x3,
+ FBNIC_FW_RPC_MAC_SYNC_MSG_MAX
+};
+
+#define FW_RPC_MAC_SYNC_RX_FLAGS_PROMISC 1
+#define FW_RPC_MAC_SYNC_RX_FLAGS_ALLMULTI 2
+#define FW_RPC_MAC_SYNC_RX_FLAGS_BROADCAST 4
+
+#define FW_RPC_MAC_SYNC_UC_ARRAY_SIZE 8
+#define FW_RPC_MAC_SYNC_MC_ARRAY_SIZE 8
+
#endif /* _FBNIC_FW_H_ */
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.c b/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.c
index 4223d8100e64..8b9b2076beec 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.c
@@ -1,6 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) Meta Platforms, Inc. and affiliates. */
+#include <linux/rtnetlink.h>
+
#include "fbnic.h"
static void fbnic_hw_stat_rst32(struct fbnic_dev *fbd, u32 reg,
@@ -421,9 +423,9 @@ static void fbnic_get_hw_rxq_stats32(struct fbnic_dev *fbd,
void fbnic_get_hw_q_stats(struct fbnic_dev *fbd,
struct fbnic_hw_q_stats *hw_q)
{
- spin_lock(&fbd->hw_stats_lock);
+ spin_lock(&fbd->hw_stats.lock);
fbnic_get_hw_rxq_stats32(fbd, hw_q);
- spin_unlock(&fbd->hw_stats_lock);
+ spin_unlock(&fbd->hw_stats.lock);
}
static void fbnic_reset_pcie_stats_asic(struct fbnic_dev *fbd,
@@ -510,20 +512,68 @@ static void fbnic_get_pcie_stats_asic64(struct fbnic_dev *fbd,
&pcie->ob_rd_no_np_cred);
}
+static void fbnic_reset_phy_stats(struct fbnic_dev *fbd,
+ struct fbnic_phy_stats *phy_stats)
+{
+ const struct fbnic_mac *mac = fbd->mac;
+
+ mac->get_fec_stats(fbd, true, &phy_stats->fec);
+ mac->get_pcs_stats(fbd, true, &phy_stats->pcs);
+}
+
+static void fbnic_get_phy_stats32(struct fbnic_dev *fbd,
+ struct fbnic_phy_stats *phy_stats)
+{
+ const struct fbnic_mac *mac = fbd->mac;
+
+ mac->get_fec_stats(fbd, false, &phy_stats->fec);
+ mac->get_pcs_stats(fbd, false, &phy_stats->pcs);
+}
+
+static void fbnic_reset_hw_mac_stats(struct fbnic_dev *fbd,
+ struct fbnic_mac_stats *mac_stats)
+{
+ const struct fbnic_mac *mac = fbd->mac;
+
+ mac->get_eth_mac_stats(fbd, true, &mac_stats->eth_mac);
+ mac->get_pause_stats(fbd, true, &mac_stats->pause);
+ mac->get_eth_ctrl_stats(fbd, true, &mac_stats->eth_ctrl);
+ mac->get_rmon_stats(fbd, true, &mac_stats->rmon);
+}
+
void fbnic_reset_hw_stats(struct fbnic_dev *fbd)
{
- spin_lock(&fbd->hw_stats_lock);
+ spin_lock(&fbd->hw_stats.lock);
+ fbnic_reset_phy_stats(fbd, &fbd->hw_stats.phy);
fbnic_reset_tmi_stats(fbd, &fbd->hw_stats.tmi);
fbnic_reset_tti_stats(fbd, &fbd->hw_stats.tti);
fbnic_reset_rpc_stats(fbd, &fbd->hw_stats.rpc);
fbnic_reset_rxb_stats(fbd, &fbd->hw_stats.rxb);
fbnic_reset_hw_rxq_stats(fbd, fbd->hw_stats.hw_q);
fbnic_reset_pcie_stats_asic(fbd, &fbd->hw_stats.pcie);
- spin_unlock(&fbd->hw_stats_lock);
+ spin_unlock(&fbd->hw_stats.lock);
+
+ /* Once registered, the only other access to MAC stats is via the
+ * ethtool API which is protected by the rtnl_lock. The call to
+ * fbnic_reset_hw_stats() during PCI recovery is also protected
+ * by the rtnl_lock hence, we don't need the spinlock to access
+ * the MAC stats.
+ */
+ if (fbd->netdev)
+ ASSERT_RTNL();
+ fbnic_reset_hw_mac_stats(fbd, &fbd->hw_stats.mac);
+}
+
+void fbnic_init_hw_stats(struct fbnic_dev *fbd)
+{
+ spin_lock_init(&fbd->hw_stats.lock);
+
+ fbnic_reset_hw_stats(fbd);
}
static void __fbnic_get_hw_stats32(struct fbnic_dev *fbd)
{
+ fbnic_get_phy_stats32(fbd, &fbd->hw_stats.phy);
fbnic_get_tmi_stats32(fbd, &fbd->hw_stats.tmi);
fbnic_get_tti_stats32(fbd, &fbd->hw_stats.tti);
fbnic_get_rpc_stats32(fbd, &fbd->hw_stats.rpc);
@@ -533,19 +583,19 @@ static void __fbnic_get_hw_stats32(struct fbnic_dev *fbd)
void fbnic_get_hw_stats32(struct fbnic_dev *fbd)
{
- spin_lock(&fbd->hw_stats_lock);
+ spin_lock(&fbd->hw_stats.lock);
__fbnic_get_hw_stats32(fbd);
- spin_unlock(&fbd->hw_stats_lock);
+ spin_unlock(&fbd->hw_stats.lock);
}
void fbnic_get_hw_stats(struct fbnic_dev *fbd)
{
- spin_lock(&fbd->hw_stats_lock);
+ spin_lock(&fbd->hw_stats.lock);
__fbnic_get_hw_stats32(fbd);
fbnic_get_tmi_stats(fbd, &fbd->hw_stats.tmi);
fbnic_get_tti_stats(fbd, &fbd->hw_stats.tti);
fbnic_get_rxb_stats(fbd, &fbd->hw_stats.rxb);
fbnic_get_pcie_stats_asic64(fbd, &fbd->hw_stats.pcie);
- spin_unlock(&fbd->hw_stats_lock);
+ spin_unlock(&fbd->hw_stats.lock);
}
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.h b/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.h
index 4fe239717497..aa3f429a9aed 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.h
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.h
@@ -5,6 +5,7 @@
#define _FBNIC_HW_STATS_H_
#include <linux/ethtool.h>
+#include <linux/spinlock.h>
#include "fbnic_csr.h"
@@ -22,6 +23,16 @@ struct fbnic_hw_stat {
struct fbnic_stat_counter bytes;
};
+struct fbnic_fec_stats {
+ struct fbnic_stat_counter corrected_blocks, uncorrectable_blocks;
+};
+
+struct fbnic_pcs_stats {
+ struct {
+ struct fbnic_stat_counter lanes[FBNIC_PCS_MAX_LANES];
+ } SymbolErrorDuringCarrier;
+};
+
/* Note: not updated by fbnic_get_hw_stats() */
struct fbnic_eth_ctrl_stats {
struct fbnic_stat_counter MACControlFramesTransmitted;
@@ -39,6 +50,12 @@ struct fbnic_rmon_stats {
struct fbnic_stat_counter hist_tx[ETHTOOL_RMON_HIST_MAX];
};
+/* Note: not updated by fbnic_get_hw_stats() */
+struct fbnic_pause_stats {
+ struct fbnic_stat_counter tx_pause_frames;
+ struct fbnic_stat_counter rx_pause_frames;
+};
+
struct fbnic_eth_mac_stats {
struct fbnic_stat_counter FramesTransmittedOK;
struct fbnic_stat_counter FramesReceivedOK;
@@ -55,8 +72,14 @@ struct fbnic_eth_mac_stats {
struct fbnic_stat_counter FrameTooLongErrors;
};
+struct fbnic_phy_stats {
+ struct fbnic_fec_stats fec;
+ struct fbnic_pcs_stats pcs;
+};
+
struct fbnic_mac_stats {
struct fbnic_eth_mac_stats eth_mac;
+ struct fbnic_pause_stats pause;
struct fbnic_eth_ctrl_stats eth_ctrl;
struct fbnic_rmon_stats rmon;
};
@@ -115,6 +138,7 @@ struct fbnic_pcie_stats {
};
struct fbnic_hw_stats {
+ struct fbnic_phy_stats phy;
struct fbnic_mac_stats mac;
struct fbnic_tmi_stats tmi;
struct fbnic_tti_stats tti;
@@ -122,11 +146,15 @@ struct fbnic_hw_stats {
struct fbnic_rxb_stats rxb;
struct fbnic_hw_q_stats hw_q[FBNIC_MAX_QUEUES];
struct fbnic_pcie_stats pcie;
+
+ /* Lock protecting the access to hw stats */
+ spinlock_t lock;
};
u64 fbnic_stat_rd64(struct fbnic_dev *fbd, u32 reg, u32 offset);
void fbnic_reset_hw_stats(struct fbnic_dev *fbd);
+void fbnic_init_hw_stats(struct fbnic_dev *fbd);
void fbnic_get_hw_q_stats(struct fbnic_dev *fbd,
struct fbnic_hw_q_stats *hw_q);
void fbnic_get_hw_stats32(struct fbnic_dev *fbd);
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_mac.c b/drivers/net/ethernet/meta/fbnic/fbnic_mac.c
index fd8d67f9048e..8f998d26b9a3 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_mac.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_mac.c
@@ -632,6 +632,50 @@ static void fbnic_mac_link_up_asic(struct fbnic_dev *fbd,
}
static void
+fbnic_pcs_rsfec_stat_rd32(struct fbnic_dev *fbd, u32 reg, bool reset,
+ struct fbnic_stat_counter *stat)
+{
+ u32 pcs_rsfec_stat;
+
+ /* The PCS/RFSEC registers are only 16b wide each. So what we will
+ * have after the 64b read is 0x0000xxxx0000xxxx. To make it usable
+ * as a full stat we will shift the upper bits into the lower set of
+ * 0s and then mask off the math at 32b.
+ *
+ * Read ordering must be lower reg followed by upper reg.
+ */
+ pcs_rsfec_stat = rd32(fbd, reg) & 0xffff;
+ pcs_rsfec_stat |= rd32(fbd, reg + 1) << 16;
+
+ /* RFSEC registers clear themselves upon being read so there is no
+ * need to store the old_reg_value.
+ */
+ if (!reset)
+ stat->value += pcs_rsfec_stat;
+}
+
+static void
+fbnic_mac_get_fec_stats(struct fbnic_dev *fbd, bool reset,
+ struct fbnic_fec_stats *s)
+{
+ fbnic_pcs_rsfec_stat_rd32(fbd, FBNIC_RSFEC_CCW_LO(0), reset,
+ &s->corrected_blocks);
+ fbnic_pcs_rsfec_stat_rd32(fbd, FBNIC_RSFEC_NCCW_LO(0), reset,
+ &s->uncorrectable_blocks);
+}
+
+static void
+fbnic_mac_get_pcs_stats(struct fbnic_dev *fbd, bool reset,
+ struct fbnic_pcs_stats *s)
+{
+ int i;
+
+ for (i = 0; i < FBNIC_PCS_MAX_LANES; i++)
+ fbnic_pcs_rsfec_stat_rd32(fbd, FBNIC_PCS_SYMBLERR_LO(i), reset,
+ &s->SymbolErrorDuringCarrier.lanes[i]);
+}
+
+static void
fbnic_mac_get_eth_mac_stats(struct fbnic_dev *fbd, bool reset,
struct fbnic_eth_mac_stats *mac_stats)
{
@@ -666,6 +710,16 @@ fbnic_mac_get_eth_mac_stats(struct fbnic_dev *fbd, bool reset,
}
static void
+fbnic_mac_get_pause_stats(struct fbnic_dev *fbd, bool reset,
+ struct fbnic_pause_stats *pause_stats)
+{
+ fbnic_mac_stat_rd64(fbd, reset, pause_stats->tx_pause_frames,
+ MAC_STAT_TX_XOFF_STB);
+ fbnic_mac_stat_rd64(fbd, reset, pause_stats->rx_pause_frames,
+ MAC_STAT_RX_XOFF_STB);
+}
+
+static void
fbnic_mac_get_eth_ctrl_stats(struct fbnic_dev *fbd, bool reset,
struct fbnic_eth_ctrl_stats *ctrl_stats)
{
@@ -809,7 +863,10 @@ static const struct fbnic_mac fbnic_mac_asic = {
.pcs_disable = fbnic_pcs_disable_asic,
.pcs_get_link = fbnic_pcs_get_link_asic,
.pcs_get_link_event = fbnic_pcs_get_link_event_asic,
+ .get_fec_stats = fbnic_mac_get_fec_stats,
+ .get_pcs_stats = fbnic_mac_get_pcs_stats,
.get_eth_mac_stats = fbnic_mac_get_eth_mac_stats,
+ .get_pause_stats = fbnic_mac_get_pause_stats,
.get_eth_ctrl_stats = fbnic_mac_get_eth_ctrl_stats,
.get_rmon_stats = fbnic_mac_get_rmon_stats,
.link_down = fbnic_mac_link_down_asic,
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_mac.h b/drivers/net/ethernet/meta/fbnic/fbnic_mac.h
index 86fa06da2b3e..ede5ff0dae22 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_mac.h
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_mac.h
@@ -79,8 +79,14 @@ struct fbnic_mac {
bool (*pcs_get_link)(struct fbnic_dev *fbd);
int (*pcs_get_link_event)(struct fbnic_dev *fbd);
+ void (*get_fec_stats)(struct fbnic_dev *fbd, bool reset,
+ struct fbnic_fec_stats *fec_stats);
+ void (*get_pcs_stats)(struct fbnic_dev *fbd, bool reset,
+ struct fbnic_pcs_stats *pcs_stats);
void (*get_eth_mac_stats)(struct fbnic_dev *fbd, bool reset,
struct fbnic_eth_mac_stats *mac_stats);
+ void (*get_pause_stats)(struct fbnic_dev *fbd, bool reset,
+ struct fbnic_pause_stats *pause_stats);
void (*get_eth_ctrl_stats)(struct fbnic_dev *fbd, bool reset,
struct fbnic_eth_ctrl_stats *ctrl_stats);
void (*get_rmon_stats)(struct fbnic_dev *fbd, bool reset,
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
index 40581550da1a..dd35de301870 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
@@ -183,11 +183,10 @@ static int fbnic_mc_unsync(struct net_device *netdev, const unsigned char *addr)
return ret;
}
-void __fbnic_set_rx_mode(struct net_device *netdev)
+void __fbnic_set_rx_mode(struct fbnic_dev *fbd)
{
- struct fbnic_net *fbn = netdev_priv(netdev);
bool uc_promisc = false, mc_promisc = false;
- struct fbnic_dev *fbd = fbn->fbd;
+ struct net_device *netdev = fbd->netdev;
struct fbnic_mac_addr *mac_addr;
int err;
@@ -224,49 +223,8 @@ void __fbnic_set_rx_mode(struct net_device *netdev)
uc_promisc |= !!(netdev->flags & IFF_PROMISC);
mc_promisc |= !!(netdev->flags & IFF_ALLMULTI) || uc_promisc;
- /* Populate last TCAM entry with promiscuous entry and 0/1 bit mask */
- mac_addr = &fbd->mac_addr[FBNIC_RPC_TCAM_MACDA_PROMISC_IDX];
- if (uc_promisc) {
- if (!is_zero_ether_addr(mac_addr->value.addr8) ||
- mac_addr->state != FBNIC_TCAM_S_VALID) {
- eth_zero_addr(mac_addr->value.addr8);
- eth_broadcast_addr(mac_addr->mask.addr8);
- clear_bit(FBNIC_MAC_ADDR_T_ALLMULTI,
- mac_addr->act_tcam);
- set_bit(FBNIC_MAC_ADDR_T_PROMISC,
- mac_addr->act_tcam);
- mac_addr->state = FBNIC_TCAM_S_ADD;
- }
- } else if (mc_promisc &&
- (!fbnic_bmc_present(fbd) || !fbd->fw_cap.all_multi)) {
- /* We have to add a special handler for multicast as the
- * BMC may have an all-multi rule already in place. As such
- * adding a rule ourselves won't do any good so we will have
- * to modify the rules for the ALL MULTI below if the BMC
- * already has the rule in place.
- */
- if (!is_multicast_ether_addr(mac_addr->value.addr8) ||
- mac_addr->state != FBNIC_TCAM_S_VALID) {
- eth_zero_addr(mac_addr->value.addr8);
- eth_broadcast_addr(mac_addr->mask.addr8);
- mac_addr->value.addr8[0] ^= 1;
- mac_addr->mask.addr8[0] ^= 1;
- set_bit(FBNIC_MAC_ADDR_T_ALLMULTI,
- mac_addr->act_tcam);
- clear_bit(FBNIC_MAC_ADDR_T_PROMISC,
- mac_addr->act_tcam);
- mac_addr->state = FBNIC_TCAM_S_ADD;
- }
- } else if (mac_addr->state == FBNIC_TCAM_S_VALID) {
- if (test_bit(FBNIC_MAC_ADDR_T_BMC, mac_addr->act_tcam)) {
- clear_bit(FBNIC_MAC_ADDR_T_ALLMULTI,
- mac_addr->act_tcam);
- clear_bit(FBNIC_MAC_ADDR_T_PROMISC,
- mac_addr->act_tcam);
- } else {
- mac_addr->state = FBNIC_TCAM_S_DELETE;
- }
- }
+ /* Update the promiscuous rules */
+ fbnic_promisc_sync(fbd, uc_promisc, mc_promisc);
/* Add rules for BMC all multicast if it is enabled */
fbnic_bmc_rpc_all_multi_config(fbd, mc_promisc);
@@ -282,9 +240,12 @@ void __fbnic_set_rx_mode(struct net_device *netdev)
static void fbnic_set_rx_mode(struct net_device *netdev)
{
+ struct fbnic_net *fbn = netdev_priv(netdev);
+ struct fbnic_dev *fbd = fbn->fbd;
+
/* No need to update the hardware if we are not running */
if (netif_running(netdev))
- __fbnic_set_rx_mode(netdev);
+ __fbnic_set_rx_mode(fbd);
}
static int fbnic_set_mac(struct net_device *netdev, void *p)
@@ -301,10 +262,9 @@ static int fbnic_set_mac(struct net_device *netdev, void *p)
return 0;
}
-void fbnic_clear_rx_mode(struct net_device *netdev)
+void fbnic_clear_rx_mode(struct fbnic_dev *fbd)
{
- struct fbnic_net *fbn = netdev_priv(netdev);
- struct fbnic_dev *fbd = fbn->fbd;
+ struct net_device *netdev = fbd->netdev;
int idx;
for (idx = ARRAY_SIZE(fbd->mac_addr); idx--;) {
@@ -411,11 +371,12 @@ static void fbnic_get_stats64(struct net_device *dev,
struct rtnl_link_stats64 *stats64)
{
u64 rx_bytes, rx_packets, rx_dropped = 0, rx_errors = 0;
+ u64 rx_over = 0, rx_missed = 0, rx_length = 0;
u64 tx_bytes, tx_packets, tx_dropped = 0;
struct fbnic_net *fbn = netdev_priv(dev);
struct fbnic_dev *fbd = fbn->fbd;
struct fbnic_queue_stats *stats;
- u64 rx_over = 0, rx_missed = 0;
+
unsigned int start, i;
fbnic_get_hw_stats(fbd);
@@ -427,12 +388,12 @@ static void fbnic_get_stats64(struct net_device *dev,
tx_dropped = stats->dropped;
/* Record drops from Tx HW Datapath */
- spin_lock(&fbd->hw_stats_lock);
+ spin_lock(&fbd->hw_stats.lock);
tx_dropped += fbd->hw_stats.tmi.drop.frames.value +
fbd->hw_stats.tti.cm_drop.frames.value +
fbd->hw_stats.tti.frame_drop.frames.value +
fbd->hw_stats.tti.tbi_drop.frames.value;
- spin_unlock(&fbd->hw_stats_lock);
+ spin_unlock(&fbd->hw_stats.lock);
stats64->tx_bytes = tx_bytes;
stats64->tx_packets = tx_packets;
@@ -463,7 +424,7 @@ static void fbnic_get_stats64(struct net_device *dev,
rx_packets = stats->packets;
rx_dropped = stats->dropped;
- spin_lock(&fbd->hw_stats_lock);
+ spin_lock(&fbd->hw_stats.lock);
/* Record drops for the host FIFOs.
* 4: network to Host, 6: BMC to Host
* Exclude the BMC and MC FIFOs as those stats may contain drops
@@ -483,7 +444,7 @@ static void fbnic_get_stats64(struct net_device *dev,
/* Report packets with errors */
rx_errors += fbd->hw_stats.hw_q[i].rde_pkt_err.value;
}
- spin_unlock(&fbd->hw_stats_lock);
+ spin_unlock(&fbd->hw_stats.lock);
stats64->rx_bytes = rx_bytes;
stats64->rx_packets = rx_packets;
@@ -493,6 +454,7 @@ static void fbnic_get_stats64(struct net_device *dev,
stats64->rx_missed_errors = rx_missed;
for (i = 0; i < fbn->num_rx_queues; i++) {
+ struct fbnic_ring *xdpr = fbn->tx[FBNIC_MAX_TXQS + i];
struct fbnic_ring *rxr = fbn->rx[i];
if (!rxr)
@@ -504,14 +466,66 @@ static void fbnic_get_stats64(struct net_device *dev,
rx_bytes = stats->bytes;
rx_packets = stats->packets;
rx_dropped = stats->dropped;
+ rx_length = stats->rx.length_errors;
} while (u64_stats_fetch_retry(&stats->syncp, start));
stats64->rx_bytes += rx_bytes;
stats64->rx_packets += rx_packets;
stats64->rx_dropped += rx_dropped;
+ stats64->rx_errors += rx_length;
+ stats64->rx_length_errors += rx_length;
+
+ if (!xdpr)
+ continue;
+
+ stats = &xdpr->stats;
+ do {
+ start = u64_stats_fetch_begin(&stats->syncp);
+ tx_bytes = stats->bytes;
+ tx_packets = stats->packets;
+ tx_dropped = stats->dropped;
+ } while (u64_stats_fetch_retry(&stats->syncp, start));
+
+ stats64->tx_bytes += tx_bytes;
+ stats64->tx_packets += tx_packets;
+ stats64->tx_dropped += tx_dropped;
}
}
+bool fbnic_check_split_frames(struct bpf_prog *prog, unsigned int mtu,
+ u32 hds_thresh)
+{
+ if (!prog)
+ return false;
+
+ if (prog->aux->xdp_has_frags)
+ return false;
+
+ return mtu + ETH_HLEN > hds_thresh;
+}
+
+static int fbnic_bpf(struct net_device *netdev, struct netdev_bpf *bpf)
+{
+ struct bpf_prog *prog = bpf->prog, *prev_prog;
+ struct fbnic_net *fbn = netdev_priv(netdev);
+
+ if (bpf->command != XDP_SETUP_PROG)
+ return -EINVAL;
+
+ if (fbnic_check_split_frames(prog, netdev->mtu,
+ fbn->hds_thresh)) {
+ NL_SET_ERR_MSG_MOD(bpf->extack,
+ "MTU too high, or HDS threshold is too low for single buffer XDP");
+ return -EOPNOTSUPP;
+ }
+
+ prev_prog = xchg(&fbn->xdp_prog, prog);
+ if (prev_prog)
+ bpf_prog_put(prev_prog);
+
+ return 0;
+}
+
static const struct net_device_ops fbnic_netdev_ops = {
.ndo_open = fbnic_open,
.ndo_stop = fbnic_stop,
@@ -521,6 +535,7 @@ static const struct net_device_ops fbnic_netdev_ops = {
.ndo_set_mac_address = fbnic_set_mac,
.ndo_set_rx_mode = fbnic_set_rx_mode,
.ndo_get_stats64 = fbnic_get_stats64,
+ .ndo_bpf = fbnic_bpf,
.ndo_hwtstamp_get = fbnic_hwtstamp_get,
.ndo_hwtstamp_set = fbnic_hwtstamp_set,
};
@@ -557,12 +572,12 @@ static void fbnic_get_queue_stats_rx(struct net_device *dev, int idx,
fbnic_get_hw_q_stats(fbd, fbd->hw_stats.hw_q);
- spin_lock(&fbd->hw_stats_lock);
+ spin_lock(&fbd->hw_stats.lock);
rx->hw_drop_overruns = fbd->hw_stats.hw_q[idx].rde_pkt_cq_drop.value +
fbd->hw_stats.hw_q[idx].rde_pkt_bdq_drop.value;
rx->hw_drops = fbd->hw_stats.hw_q[idx].rde_pkt_err.value +
rx->hw_drop_overruns;
- spin_unlock(&fbd->hw_stats_lock);
+ spin_unlock(&fbd->hw_stats.lock);
}
static void fbnic_get_queue_stats_tx(struct net_device *dev, int idx,
@@ -572,6 +587,7 @@ static void fbnic_get_queue_stats_tx(struct net_device *dev, int idx,
struct fbnic_ring *txr = fbn->tx[idx];
struct fbnic_queue_stats *stats;
u64 stop, wake, csum, lso;
+ struct fbnic_ring *xdpr;
unsigned int start;
u64 bytes, packets;
@@ -595,6 +611,19 @@ static void fbnic_get_queue_stats_tx(struct net_device *dev, int idx,
tx->hw_gso_wire_packets = lso;
tx->stop = stop;
tx->wake = wake;
+
+ xdpr = fbn->tx[FBNIC_MAX_TXQS + idx];
+ if (xdpr) {
+ stats = &xdpr->stats;
+ do {
+ start = u64_stats_fetch_begin(&stats->syncp);
+ bytes = stats->bytes;
+ packets = stats->packets;
+ } while (u64_stats_fetch_retry(&stats->syncp, start));
+
+ tx->bytes += bytes;
+ tx->packets += packets;
+ }
}
static void fbnic_get_base_stats(struct net_device *dev,
@@ -682,6 +711,7 @@ struct net_device *fbnic_netdev_alloc(struct fbnic_dev *fbd)
netdev->netdev_ops = &fbnic_netdev_ops;
netdev->stat_ops = &fbnic_stat_ops;
+ netdev->queue_mgmt_ops = &fbnic_queue_mgmt_ops;
fbnic_set_ethtool_ops(netdev);
@@ -699,6 +729,10 @@ struct net_device *fbnic_netdev_alloc(struct fbnic_dev *fbd)
fbn->rx_usecs = FBNIC_RX_USECS_DEFAULT;
fbn->rx_max_frames = FBNIC_RX_FRAMES_DEFAULT;
+ /* Initialize the hds_thresh */
+ netdev->cfg->hds_thresh = FBNIC_HDS_THRESH_DEFAULT;
+ fbn->hds_thresh = FBNIC_HDS_THRESH_DEFAULT;
+
default_queues = netif_get_num_default_rss_queues();
if (default_queues > fbd->max_num_queues)
default_queues = fbd->max_num_queues;
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h
index 86576ae04262..e84e0527c3a9 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h
@@ -18,7 +18,9 @@
#define FBNIC_TUN_GSO_FEATURES NETIF_F_GSO_IPXIP6
struct fbnic_net {
- struct fbnic_ring *tx[FBNIC_MAX_TXQS];
+ struct bpf_prog *xdp_prog;
+
+ struct fbnic_ring *tx[FBNIC_MAX_TXQS + FBNIC_MAX_XDPQS];
struct fbnic_ring *rx[FBNIC_MAX_RXQS];
struct fbnic_napi_vector *napi[FBNIC_MAX_NAPI_VECTORS];
@@ -31,6 +33,8 @@ struct fbnic_net {
u32 ppq_size;
u32 rcq_size;
+ u32 hds_thresh;
+
u16 rx_usecs;
u16 tx_usecs;
@@ -90,8 +94,8 @@ void fbnic_time_init(struct fbnic_net *fbn);
int fbnic_time_start(struct fbnic_net *fbn);
void fbnic_time_stop(struct fbnic_net *fbn);
-void __fbnic_set_rx_mode(struct net_device *netdev);
-void fbnic_clear_rx_mode(struct net_device *netdev);
+void __fbnic_set_rx_mode(struct fbnic_dev *fbd);
+void fbnic_clear_rx_mode(struct fbnic_dev *fbd);
void fbnic_phylink_get_pauseparam(struct net_device *netdev,
struct ethtool_pauseparam *pause);
@@ -102,4 +106,7 @@ int fbnic_phylink_ethtool_ksettings_get(struct net_device *netdev,
int fbnic_phylink_get_fecparam(struct net_device *netdev,
struct ethtool_fecparam *fecparam);
int fbnic_phylink_init(struct net_device *netdev);
+
+bool fbnic_check_split_frames(struct bpf_prog *prog,
+ unsigned int mtu, u32 hds_threshold);
#endif /* _FBNIC_NETDEV_H_ */
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c
index 28e23e3ffca8..9fdc8f4f36cc 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c
@@ -135,7 +135,7 @@ void fbnic_up(struct fbnic_net *fbn)
fbnic_rss_reinit_hw(fbn->fbd, fbn);
- __fbnic_set_rx_mode(fbn->netdev);
+ __fbnic_set_rx_mode(fbn->fbd);
/* Enable Tx/Rx processing */
fbnic_napi_enable(fbn);
@@ -152,7 +152,7 @@ void fbnic_down_noidle(struct fbnic_net *fbn)
fbnic_napi_disable(fbn);
netif_tx_disable(fbn->netdev);
- fbnic_clear_rx_mode(fbn->netdev);
+ fbnic_clear_rx_mode(fbn->fbd);
fbnic_clear_rules(fbn->fbd);
fbnic_rss_disable_hw(fbn->fbd);
fbnic_disable(fbn);
@@ -204,8 +204,13 @@ static void fbnic_service_task(struct work_struct *work)
fbnic_health_check(fbd);
- if (netif_carrier_ok(fbd->netdev))
+ fbnic_bmc_rpc_check(fbd);
+
+ if (netif_carrier_ok(fbd->netdev)) {
+ netdev_lock(fbd->netdev);
fbnic_napi_depletion_check(fbd->netdev);
+ netdev_unlock(fbd->netdev);
+ }
if (netif_running(fbd->netdev))
schedule_delayed_work(&fbd->service_task, HZ);
@@ -302,10 +307,9 @@ static int fbnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
fbnic_devlink_register(fbd);
fbnic_dbg_fbd_init(fbd);
- spin_lock_init(&fbd->hw_stats_lock);
/* Capture snapshot of hardware stats so netdev can calculate delta */
- fbnic_reset_hw_stats(fbd);
+ fbnic_init_hw_stats(fbd);
fbnic_hwmon_register(fbd);
@@ -390,12 +394,14 @@ static int fbnic_pm_suspend(struct device *dev)
goto null_uc_addr;
rtnl_lock();
+ netdev_lock(netdev);
netif_device_detach(netdev);
if (netif_running(netdev))
netdev->netdev_ops->ndo_stop(netdev);
+ netdev_unlock(netdev);
rtnl_unlock();
null_uc_addr:
@@ -460,10 +466,12 @@ static int __fbnic_pm_resume(struct device *dev)
fbnic_reset_queues(fbn, fbn->num_tx_queues, fbn->num_rx_queues);
rtnl_lock();
+ netdev_lock(netdev);
if (netif_running(netdev))
err = __fbnic_open(fbn);
+ netdev_unlock(netdev);
rtnl_unlock();
if (err)
goto err_free_mbx;
@@ -489,6 +497,10 @@ static void __fbnic_pm_attach(struct device *dev)
struct net_device *netdev = fbd->netdev;
struct fbnic_net *fbn;
+ rtnl_lock();
+ fbnic_reset_hw_stats(fbd);
+ rtnl_unlock();
+
if (fbnic_init_failure(fbd))
return;
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c b/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c
index 8ff07b5562e3..4284b3cb7fcc 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c
@@ -6,6 +6,7 @@
#include <net/ipv6.h>
#include "fbnic.h"
+#include "fbnic_fw.h"
#include "fbnic_netdev.h"
#include "fbnic_rpc.h"
@@ -71,6 +72,8 @@ u16 fbnic_flow_hash_2_rss_en_mask(struct fbnic_net *fbn, int flow_type)
rss_en_mask |= FBNIC_FH_2_RSSEM_BIT(IP_DST, IP_DST, flow_hash);
rss_en_mask |= FBNIC_FH_2_RSSEM_BIT(L4_B_0_1, L4_SRC, flow_hash);
rss_en_mask |= FBNIC_FH_2_RSSEM_BIT(L4_B_2_3, L4_DST, flow_hash);
+ rss_en_mask |= FBNIC_FH_2_RSSEM_BIT(IP6_FL, OV6_FL_LBL, flow_hash);
+ rss_en_mask |= FBNIC_FH_2_RSSEM_BIT(IP6_FL, IV6_FL_LBL, flow_hash);
return rss_en_mask;
}
@@ -129,12 +132,9 @@ void fbnic_bmc_rpc_all_multi_config(struct fbnic_dev *fbd,
else
clear_bit(FBNIC_MAC_ADDR_T_ALLMULTI,
mac_addr->act_tcam);
- } else if (!test_bit(FBNIC_MAC_ADDR_T_BMC, mac_addr->act_tcam) &&
- !is_zero_ether_addr(mac_addr->mask.addr8) &&
- mac_addr->state == FBNIC_TCAM_S_VALID) {
- clear_bit(FBNIC_MAC_ADDR_T_ALLMULTI, mac_addr->act_tcam);
- clear_bit(FBNIC_MAC_ADDR_T_BMC, mac_addr->act_tcam);
- mac_addr->state = FBNIC_TCAM_S_DELETE;
+ } else {
+ __fbnic_xc_unsync(mac_addr, FBNIC_MAC_ADDR_T_BMC);
+ __fbnic_xc_unsync(mac_addr, FBNIC_MAC_ADDR_T_ALLMULTI);
}
/* We have to add a special handler for multicast as the
@@ -236,8 +236,25 @@ void fbnic_bmc_rpc_init(struct fbnic_dev *fbd)
act_tcam->mask.tcam[j] = 0xffff;
act_tcam->state = FBNIC_TCAM_S_UPDATE;
+}
- fbnic_bmc_rpc_all_multi_config(fbd, false);
+void fbnic_bmc_rpc_check(struct fbnic_dev *fbd)
+{
+ int err;
+
+ if (fbd->fw_cap.need_bmc_tcam_reinit) {
+ fbnic_bmc_rpc_init(fbd);
+ __fbnic_set_rx_mode(fbd);
+ fbd->fw_cap.need_bmc_tcam_reinit = false;
+ }
+
+ if (fbd->fw_cap.need_bmc_macda_sync) {
+ err = fbnic_fw_xmit_rpc_macda_sync(fbd);
+ if (err)
+ dev_warn(fbd->dev,
+ "Writing MACDA table to FW failed, err: %d\n", err);
+ fbd->fw_cap.need_bmc_macda_sync = false;
+ }
}
#define FBNIC_ACT1_INIT(_l4, _udp, _ip, _v6) \
@@ -452,6 +469,50 @@ int __fbnic_xc_unsync(struct fbnic_mac_addr *mac_addr, unsigned int tcam_idx)
return 0;
}
+void fbnic_promisc_sync(struct fbnic_dev *fbd,
+ bool uc_promisc, bool mc_promisc)
+{
+ struct fbnic_mac_addr *mac_addr;
+
+ /* Populate last TCAM entry with promiscuous entry and 0/1 bit mask */
+ mac_addr = &fbd->mac_addr[FBNIC_RPC_TCAM_MACDA_PROMISC_IDX];
+ if (uc_promisc) {
+ if (!is_zero_ether_addr(mac_addr->value.addr8) ||
+ mac_addr->state != FBNIC_TCAM_S_VALID) {
+ eth_zero_addr(mac_addr->value.addr8);
+ eth_broadcast_addr(mac_addr->mask.addr8);
+ clear_bit(FBNIC_MAC_ADDR_T_ALLMULTI,
+ mac_addr->act_tcam);
+ set_bit(FBNIC_MAC_ADDR_T_PROMISC,
+ mac_addr->act_tcam);
+ mac_addr->state = FBNIC_TCAM_S_ADD;
+ }
+ } else if (mc_promisc &&
+ (!fbnic_bmc_present(fbd) || !fbd->fw_cap.all_multi)) {
+ /* We have to add a special handler for multicast as the
+ * BMC may have an all-multi rule already in place. As such
+ * adding a rule ourselves won't do any good so we will have
+ * to modify the rules for the ALL MULTI below if the BMC
+ * already has the rule in place.
+ */
+ if (!is_multicast_ether_addr(mac_addr->value.addr8) ||
+ mac_addr->state != FBNIC_TCAM_S_VALID) {
+ eth_zero_addr(mac_addr->value.addr8);
+ eth_broadcast_addr(mac_addr->mask.addr8);
+ mac_addr->value.addr8[0] ^= 1;
+ mac_addr->mask.addr8[0] ^= 1;
+ set_bit(FBNIC_MAC_ADDR_T_ALLMULTI,
+ mac_addr->act_tcam);
+ clear_bit(FBNIC_MAC_ADDR_T_PROMISC,
+ mac_addr->act_tcam);
+ mac_addr->state = FBNIC_TCAM_S_ADD;
+ }
+ } else if (mac_addr->state == FBNIC_TCAM_S_VALID) {
+ __fbnic_xc_unsync(mac_addr, FBNIC_MAC_ADDR_T_ALLMULTI);
+ __fbnic_xc_unsync(mac_addr, FBNIC_MAC_ADDR_T_PROMISC);
+ }
+}
+
void fbnic_sift_macda(struct fbnic_dev *fbd)
{
int dest, src;
@@ -556,7 +617,7 @@ static void fbnic_write_macda_entry(struct fbnic_dev *fbd, unsigned int idx,
void fbnic_write_macda(struct fbnic_dev *fbd)
{
- int idx;
+ int idx, updates = 0;
for (idx = ARRAY_SIZE(fbd->mac_addr); idx--;) {
struct fbnic_mac_addr *mac_addr = &fbd->mac_addr[idx];
@@ -565,6 +626,9 @@ void fbnic_write_macda(struct fbnic_dev *fbd)
if (!(mac_addr->state & FBNIC_TCAM_S_UPDATE))
continue;
+ /* Record update count */
+ updates++;
+
/* Clear by writing 0s. */
if (mac_addr->state == FBNIC_TCAM_S_DELETE) {
/* Invalidate entry and clear addr state info */
@@ -578,6 +642,14 @@ void fbnic_write_macda(struct fbnic_dev *fbd)
mac_addr->state = FBNIC_TCAM_S_VALID;
}
+
+ /* If reinitializing the BMC TCAM we are doing an initial update */
+ if (fbd->fw_cap.need_bmc_tcam_reinit)
+ updates++;
+
+ /* If needed notify firmware of changes to MACDA TCAM */
+ if (updates != 0 && fbnic_bmc_present(fbd))
+ fbd->fw_cap.need_bmc_macda_sync = true;
}
static void fbnic_clear_act_tcam(struct fbnic_dev *fbd, unsigned int idx)
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_rpc.h b/drivers/net/ethernet/meta/fbnic/fbnic_rpc.h
index 6892414195c3..3d4925b2ac75 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_rpc.h
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_rpc.h
@@ -184,6 +184,7 @@ struct fbnic_net;
void fbnic_bmc_rpc_init(struct fbnic_dev *fbd);
void fbnic_bmc_rpc_all_multi_config(struct fbnic_dev *fbd, bool enable_host);
+void fbnic_bmc_rpc_check(struct fbnic_dev *fbd);
void fbnic_reset_indir_tbl(struct fbnic_net *fbn);
void fbnic_rss_key_fill(u32 *buffer);
@@ -201,6 +202,9 @@ struct fbnic_mac_addr *__fbnic_mc_sync(struct fbnic_dev *fbd,
void fbnic_sift_macda(struct fbnic_dev *fbd);
void fbnic_write_macda(struct fbnic_dev *fbd);
+void fbnic_promisc_sync(struct fbnic_dev *fbd,
+ bool uc_promisc, bool mc_promisc);
+
struct fbnic_ip_addr *__fbnic_ip4_sync(struct fbnic_dev *fbd,
struct fbnic_ip_addr *ip_addr,
const struct in_addr *addr,
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
index f9543d03485f..493f7f4df013 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
@@ -2,11 +2,14 @@
/* Copyright (c) Meta Platforms, Inc. and affiliates. */
#include <linux/bitfield.h>
+#include <linux/bpf.h>
+#include <linux/bpf_trace.h>
#include <linux/iopoll.h>
#include <linux/pci.h>
#include <net/netdev_queues.h>
#include <net/page_pool/helpers.h>
#include <net/tcp.h>
+#include <net/xdp.h>
#include "fbnic.h"
#include "fbnic_csr.h"
@@ -14,6 +17,13 @@
#include "fbnic_txrx.h"
enum {
+ FBNIC_XDP_PASS = 0,
+ FBNIC_XDP_CONSUME,
+ FBNIC_XDP_TX,
+ FBNIC_XDP_LEN_ERR,
+};
+
+enum {
FBNIC_XMIT_CB_TS = 0x01,
};
@@ -606,6 +616,54 @@ static void fbnic_clean_twq0(struct fbnic_napi_vector *nv, int napi_budget,
}
}
+static void fbnic_clean_twq1(struct fbnic_napi_vector *nv, bool pp_allow_direct,
+ struct fbnic_ring *ring, bool discard,
+ unsigned int hw_head)
+{
+ u64 total_bytes = 0, total_packets = 0;
+ unsigned int head = ring->head;
+
+ while (hw_head != head) {
+ struct page *page;
+ u64 twd;
+
+ if (unlikely(!(ring->desc[head] & FBNIC_TWD_TYPE(AL))))
+ goto next_desc;
+
+ twd = le64_to_cpu(ring->desc[head]);
+ page = ring->tx_buf[head];
+
+ /* TYPE_AL is 2, TYPE_LAST_AL is 3. So this trick gives
+ * us one increment per packet, with no branches.
+ */
+ total_packets += FIELD_GET(FBNIC_TWD_TYPE_MASK, twd) -
+ FBNIC_TWD_TYPE_AL;
+ total_bytes += FIELD_GET(FBNIC_TWD_LEN_MASK, twd);
+
+ page_pool_put_page(page->pp, page, -1, pp_allow_direct);
+next_desc:
+ head++;
+ head &= ring->size_mask;
+ }
+
+ if (!total_bytes)
+ return;
+
+ ring->head = head;
+
+ if (discard) {
+ u64_stats_update_begin(&ring->stats.syncp);
+ ring->stats.dropped += total_packets;
+ u64_stats_update_end(&ring->stats.syncp);
+ return;
+ }
+
+ u64_stats_update_begin(&ring->stats.syncp);
+ ring->stats.bytes += total_bytes;
+ ring->stats.packets += total_packets;
+ u64_stats_update_end(&ring->stats.syncp);
+}
+
static void fbnic_clean_tsq(struct fbnic_napi_vector *nv,
struct fbnic_ring *ring,
u64 tcd, int *ts_head, int *head0)
@@ -657,44 +715,65 @@ static void fbnic_clean_tsq(struct fbnic_napi_vector *nv,
}
static void fbnic_page_pool_init(struct fbnic_ring *ring, unsigned int idx,
- struct page *page)
+ netmem_ref netmem)
{
struct fbnic_rx_buf *rx_buf = &ring->rx_buf[idx];
- page_pool_fragment_page(page, FBNIC_PAGECNT_BIAS_MAX);
+ page_pool_fragment_netmem(netmem, FBNIC_PAGECNT_BIAS_MAX);
rx_buf->pagecnt_bias = FBNIC_PAGECNT_BIAS_MAX;
- rx_buf->page = page;
+ rx_buf->netmem = netmem;
}
-static struct page *fbnic_page_pool_get(struct fbnic_ring *ring,
- unsigned int idx)
+static struct page *
+fbnic_page_pool_get_head(struct fbnic_q_triad *qt, unsigned int idx)
{
- struct fbnic_rx_buf *rx_buf = &ring->rx_buf[idx];
+ struct fbnic_rx_buf *rx_buf = &qt->sub0.rx_buf[idx];
rx_buf->pagecnt_bias--;
- return rx_buf->page;
+ /* sub0 is always fed system pages, from the NAPI-level page_pool */
+ return netmem_to_page(rx_buf->netmem);
+}
+
+static netmem_ref
+fbnic_page_pool_get_data(struct fbnic_q_triad *qt, unsigned int idx)
+{
+ struct fbnic_rx_buf *rx_buf = &qt->sub1.rx_buf[idx];
+
+ rx_buf->pagecnt_bias--;
+
+ return rx_buf->netmem;
}
static void fbnic_page_pool_drain(struct fbnic_ring *ring, unsigned int idx,
- struct fbnic_napi_vector *nv, int budget)
+ int budget)
{
struct fbnic_rx_buf *rx_buf = &ring->rx_buf[idx];
- struct page *page = rx_buf->page;
+ netmem_ref netmem = rx_buf->netmem;
- if (!page_pool_unref_page(page, rx_buf->pagecnt_bias))
- page_pool_put_unrefed_page(nv->page_pool, page, -1, !!budget);
+ if (!page_pool_unref_netmem(netmem, rx_buf->pagecnt_bias))
+ page_pool_put_unrefed_netmem(ring->page_pool, netmem, -1,
+ !!budget);
- rx_buf->page = NULL;
+ rx_buf->netmem = 0;
}
static void fbnic_clean_twq(struct fbnic_napi_vector *nv, int napi_budget,
- struct fbnic_q_triad *qt, s32 ts_head, s32 head0)
+ struct fbnic_q_triad *qt, s32 ts_head, s32 head0,
+ s32 head1)
{
if (head0 >= 0)
fbnic_clean_twq0(nv, napi_budget, &qt->sub0, false, head0);
else if (ts_head >= 0)
fbnic_clean_twq0(nv, napi_budget, &qt->sub0, false, ts_head);
+
+ if (head1 >= 0) {
+ qt->cmpl.deferred_head = -1;
+ if (napi_budget)
+ fbnic_clean_twq1(nv, true, &qt->sub1, false, head1);
+ else
+ qt->cmpl.deferred_head = head1;
+ }
}
static void
@@ -702,6 +781,7 @@ fbnic_clean_tcq(struct fbnic_napi_vector *nv, struct fbnic_q_triad *qt,
int napi_budget)
{
struct fbnic_ring *cmpl = &qt->cmpl;
+ s32 head1 = cmpl->deferred_head;
s32 head0 = -1, ts_head = -1;
__le64 *raw_tcd, done;
u32 head = cmpl->head;
@@ -719,7 +799,10 @@ fbnic_clean_tcq(struct fbnic_napi_vector *nv, struct fbnic_q_triad *qt,
switch (FIELD_GET(FBNIC_TCD_TYPE_MASK, tcd)) {
case FBNIC_TCD_TYPE_0:
- if (!(tcd & FBNIC_TCD_TWQ1))
+ if (tcd & FBNIC_TCD_TWQ1)
+ head1 = FIELD_GET(FBNIC_TCD_TYPE0_HEAD1_MASK,
+ tcd);
+ else
head0 = FIELD_GET(FBNIC_TCD_TYPE0_HEAD0_MASK,
tcd);
/* Currently all err status bits are related to
@@ -752,11 +835,11 @@ fbnic_clean_tcq(struct fbnic_napi_vector *nv, struct fbnic_q_triad *qt,
}
/* Unmap and free processed buffers */
- fbnic_clean_twq(nv, napi_budget, qt, ts_head, head0);
+ fbnic_clean_twq(nv, napi_budget, qt, ts_head, head0, head1);
}
-static void fbnic_clean_bdq(struct fbnic_napi_vector *nv, int napi_budget,
- struct fbnic_ring *ring, unsigned int hw_head)
+static void fbnic_clean_bdq(struct fbnic_ring *ring, unsigned int hw_head,
+ int napi_budget)
{
unsigned int head = ring->head;
@@ -764,7 +847,7 @@ static void fbnic_clean_bdq(struct fbnic_napi_vector *nv, int napi_budget,
return;
do {
- fbnic_page_pool_drain(ring, head, nv, napi_budget);
+ fbnic_page_pool_drain(ring, head, napi_budget);
head++;
head &= ring->size_mask;
@@ -773,10 +856,10 @@ static void fbnic_clean_bdq(struct fbnic_napi_vector *nv, int napi_budget,
ring->head = head;
}
-static void fbnic_bd_prep(struct fbnic_ring *bdq, u16 id, struct page *page)
+static void fbnic_bd_prep(struct fbnic_ring *bdq, u16 id, netmem_ref netmem)
{
__le64 *bdq_desc = &bdq->desc[id * FBNIC_BD_FRAG_COUNT];
- dma_addr_t dma = page_pool_get_dma_addr(page);
+ dma_addr_t dma = page_pool_get_dma_addr_netmem(netmem);
u64 bd, i = FBNIC_BD_FRAG_COUNT;
bd = (FBNIC_BD_PAGE_ADDR_MASK & dma) |
@@ -794,7 +877,7 @@ static void fbnic_bd_prep(struct fbnic_ring *bdq, u16 id, struct page *page)
} while (--i);
}
-static void fbnic_fill_bdq(struct fbnic_napi_vector *nv, struct fbnic_ring *bdq)
+static void fbnic_fill_bdq(struct fbnic_ring *bdq)
{
unsigned int count = fbnic_desc_unused(bdq);
unsigned int i = bdq->tail;
@@ -803,10 +886,10 @@ static void fbnic_fill_bdq(struct fbnic_napi_vector *nv, struct fbnic_ring *bdq)
return;
do {
- struct page *page;
+ netmem_ref netmem;
- page = page_pool_dev_alloc_pages(nv->page_pool);
- if (!page) {
+ netmem = page_pool_dev_alloc_netmems(bdq->page_pool);
+ if (!netmem) {
u64_stats_update_begin(&bdq->stats.syncp);
bdq->stats.rx.alloc_failed++;
u64_stats_update_end(&bdq->stats.syncp);
@@ -814,8 +897,8 @@ static void fbnic_fill_bdq(struct fbnic_napi_vector *nv, struct fbnic_ring *bdq)
break;
}
- fbnic_page_pool_init(bdq, i, page);
- fbnic_bd_prep(bdq, i, page);
+ fbnic_page_pool_init(bdq, i, netmem);
+ fbnic_bd_prep(bdq, i, netmem);
i++;
i &= bdq->size_mask;
@@ -862,7 +945,7 @@ static void fbnic_pkt_prepare(struct fbnic_napi_vector *nv, u64 rcd,
{
unsigned int hdr_pg_idx = FIELD_GET(FBNIC_RCD_AL_BUFF_PAGE_MASK, rcd);
unsigned int hdr_pg_off = FIELD_GET(FBNIC_RCD_AL_BUFF_OFF_MASK, rcd);
- struct page *page = fbnic_page_pool_get(&qt->sub0, hdr_pg_idx);
+ struct page *page = fbnic_page_pool_get_head(qt, hdr_pg_idx);
unsigned int len = FIELD_GET(FBNIC_RCD_AL_BUFF_LEN_MASK, rcd);
unsigned int frame_sz, hdr_pg_start, hdr_pg_end, headroom;
unsigned char *hdr_start;
@@ -877,7 +960,7 @@ static void fbnic_pkt_prepare(struct fbnic_napi_vector *nv, u64 rcd,
headroom = hdr_pg_off - hdr_pg_start + FBNIC_RX_PAD;
frame_sz = hdr_pg_end - hdr_pg_start;
- xdp_init_buff(&pkt->buff, frame_sz, NULL);
+ xdp_init_buff(&pkt->buff, frame_sz, &qt->xdp_rxq);
hdr_pg_start += (FBNIC_RCD_AL_BUFF_FRAG_MASK & rcd) *
FBNIC_BD_FRAG_SIZE;
@@ -888,13 +971,12 @@ static void fbnic_pkt_prepare(struct fbnic_napi_vector *nv, u64 rcd,
/* Build frame around buffer */
hdr_start = page_address(page) + hdr_pg_start;
-
+ net_prefetch(pkt->buff.data);
xdp_prepare_buff(&pkt->buff, hdr_start, headroom,
len - FBNIC_RX_PAD, true);
- pkt->data_truesize = 0;
- pkt->data_len = 0;
- pkt->nr_frags = 0;
+ pkt->hwtstamp = 0;
+ pkt->add_frag_failed = false;
}
static void fbnic_add_rx_frag(struct fbnic_napi_vector *nv, u64 rcd,
@@ -904,9 +986,9 @@ static void fbnic_add_rx_frag(struct fbnic_napi_vector *nv, u64 rcd,
unsigned int pg_idx = FIELD_GET(FBNIC_RCD_AL_BUFF_PAGE_MASK, rcd);
unsigned int pg_off = FIELD_GET(FBNIC_RCD_AL_BUFF_OFF_MASK, rcd);
unsigned int len = FIELD_GET(FBNIC_RCD_AL_BUFF_LEN_MASK, rcd);
- struct page *page = fbnic_page_pool_get(&qt->sub1, pg_idx);
- struct skb_shared_info *shinfo;
+ netmem_ref netmem = fbnic_page_pool_get_data(qt, pg_idx);
unsigned int truesize;
+ bool added;
truesize = FIELD_GET(FBNIC_RCD_AL_PAGE_FIN, rcd) ?
FBNIC_BD_FRAG_SIZE - pg_off : ALIGN(len, 128);
@@ -915,88 +997,171 @@ static void fbnic_add_rx_frag(struct fbnic_napi_vector *nv, u64 rcd,
FBNIC_BD_FRAG_SIZE;
/* Sync DMA buffer */
- dma_sync_single_range_for_cpu(nv->dev, page_pool_get_dma_addr(page),
- pg_off, truesize, DMA_BIDIRECTIONAL);
-
- /* Add page to xdp shared info */
- shinfo = xdp_get_shared_info_from_buff(&pkt->buff);
-
- /* We use gso_segs to store truesize */
- pkt->data_truesize += truesize;
-
- __skb_fill_page_desc_noacc(shinfo, pkt->nr_frags++, page, pg_off, len);
-
- /* Store data_len in gso_size */
- pkt->data_len += len;
+ page_pool_dma_sync_netmem_for_cpu(qt->sub1.page_pool, netmem,
+ pg_off, truesize);
+
+ added = xdp_buff_add_frag(&pkt->buff, netmem, pg_off, len, truesize);
+ if (unlikely(!added)) {
+ pkt->add_frag_failed = true;
+ netdev_err_once(nv->napi.dev,
+ "Failed to add fragment to xdp_buff\n");
+ }
}
-static void fbnic_put_pkt_buff(struct fbnic_napi_vector *nv,
+static void fbnic_put_pkt_buff(struct fbnic_q_triad *qt,
struct fbnic_pkt_buff *pkt, int budget)
{
- struct skb_shared_info *shinfo;
struct page *page;
- int nr_frags;
if (!pkt->buff.data_hard_start)
return;
- shinfo = xdp_get_shared_info_from_buff(&pkt->buff);
- nr_frags = pkt->nr_frags;
+ if (xdp_buff_has_frags(&pkt->buff)) {
+ struct skb_shared_info *shinfo;
+ netmem_ref netmem;
+ int nr_frags;
+
+ shinfo = xdp_get_shared_info_from_buff(&pkt->buff);
+ nr_frags = shinfo->nr_frags;
- while (nr_frags--) {
- page = skb_frag_page(&shinfo->frags[nr_frags]);
- page_pool_put_full_page(nv->page_pool, page, !!budget);
+ while (nr_frags--) {
+ netmem = skb_frag_netmem(&shinfo->frags[nr_frags]);
+ page_pool_put_full_netmem(qt->sub1.page_pool, netmem,
+ !!budget);
+ }
}
page = virt_to_page(pkt->buff.data_hard_start);
- page_pool_put_full_page(nv->page_pool, page, !!budget);
+ page_pool_put_full_page(qt->sub0.page_pool, page, !!budget);
}
static struct sk_buff *fbnic_build_skb(struct fbnic_napi_vector *nv,
struct fbnic_pkt_buff *pkt)
{
- unsigned int nr_frags = pkt->nr_frags;
- struct skb_shared_info *shinfo;
- unsigned int truesize;
struct sk_buff *skb;
- truesize = xdp_data_hard_end(&pkt->buff) + FBNIC_RX_TROOM -
- pkt->buff.data_hard_start;
-
- /* Build frame around buffer */
- skb = napi_build_skb(pkt->buff.data_hard_start, truesize);
- if (unlikely(!skb))
+ skb = xdp_build_skb_from_buff(&pkt->buff);
+ if (!skb)
return NULL;
- /* Push data pointer to start of data, put tail to end of data */
- skb_reserve(skb, pkt->buff.data - pkt->buff.data_hard_start);
- __skb_put(skb, pkt->buff.data_end - pkt->buff.data);
+ /* Add timestamp if present */
+ if (pkt->hwtstamp)
+ skb_hwtstamps(skb)->hwtstamp = pkt->hwtstamp;
- /* Add tracking for metadata at the start of the frame */
- skb_metadata_set(skb, pkt->buff.data - pkt->buff.data_meta);
+ return skb;
+}
- /* Add Rx frags */
- if (nr_frags) {
- /* Verify that shared info didn't move */
+static long fbnic_pkt_tx(struct fbnic_napi_vector *nv,
+ struct fbnic_pkt_buff *pkt)
+{
+ struct fbnic_ring *ring = &nv->qt[0].sub1;
+ int size, offset, nsegs = 1, data_len = 0;
+ unsigned int tail = ring->tail;
+ struct skb_shared_info *shinfo;
+ skb_frag_t *frag = NULL;
+ struct page *page;
+ dma_addr_t dma;
+ __le64 *twd;
+
+ if (unlikely(xdp_buff_has_frags(&pkt->buff))) {
shinfo = xdp_get_shared_info_from_buff(&pkt->buff);
- WARN_ON(skb_shinfo(skb) != shinfo);
+ nsegs += shinfo->nr_frags;
+ data_len = shinfo->xdp_frags_size;
+ frag = &shinfo->frags[0];
+ }
- skb->truesize += pkt->data_truesize;
- skb->data_len += pkt->data_len;
- shinfo->nr_frags = nr_frags;
- skb->len += pkt->data_len;
+ if (fbnic_desc_unused(ring) < nsegs) {
+ u64_stats_update_begin(&ring->stats.syncp);
+ ring->stats.dropped++;
+ u64_stats_update_end(&ring->stats.syncp);
+ return -FBNIC_XDP_CONSUME;
}
- skb_mark_for_recycle(skb);
+ page = virt_to_page(pkt->buff.data_hard_start);
+ offset = offset_in_page(pkt->buff.data);
+ dma = page_pool_get_dma_addr(page);
- /* Set MAC header specific fields */
- skb->protocol = eth_type_trans(skb, nv->napi.dev);
+ size = pkt->buff.data_end - pkt->buff.data;
- /* Add timestamp if present */
- if (pkt->hwtstamp)
- skb_hwtstamps(skb)->hwtstamp = pkt->hwtstamp;
+ while (nsegs--) {
+ dma_sync_single_range_for_device(nv->dev, dma, offset, size,
+ DMA_BIDIRECTIONAL);
+ dma += offset;
- return skb;
+ ring->tx_buf[tail] = page;
+
+ twd = &ring->desc[tail];
+ *twd = cpu_to_le64(FIELD_PREP(FBNIC_TWD_ADDR_MASK, dma) |
+ FIELD_PREP(FBNIC_TWD_LEN_MASK, size) |
+ FIELD_PREP(FBNIC_TWD_TYPE_MASK,
+ FBNIC_TWD_TYPE_AL));
+
+ tail++;
+ tail &= ring->size_mask;
+
+ if (!data_len)
+ break;
+
+ offset = skb_frag_off(frag);
+ page = skb_frag_page(frag);
+ dma = page_pool_get_dma_addr(page);
+
+ size = skb_frag_size(frag);
+ data_len -= size;
+ frag++;
+ }
+
+ *twd |= FBNIC_TWD_TYPE(LAST_AL);
+
+ ring->tail = tail;
+
+ return -FBNIC_XDP_TX;
+}
+
+static void fbnic_pkt_commit_tail(struct fbnic_napi_vector *nv,
+ unsigned int pkt_tail)
+{
+ struct fbnic_ring *ring = &nv->qt[0].sub1;
+
+ /* Force DMA writes to flush before writing to tail */
+ dma_wmb();
+
+ writel(pkt_tail, ring->doorbell);
+}
+
+static struct sk_buff *fbnic_run_xdp(struct fbnic_napi_vector *nv,
+ struct fbnic_pkt_buff *pkt)
+{
+ struct fbnic_net *fbn = netdev_priv(nv->napi.dev);
+ struct bpf_prog *xdp_prog;
+ int act;
+
+ xdp_prog = READ_ONCE(fbn->xdp_prog);
+ if (!xdp_prog)
+ goto xdp_pass;
+
+ /* Should never happen, config paths enforce HDS threshold > MTU */
+ if (xdp_buff_has_frags(&pkt->buff) && !xdp_prog->aux->xdp_has_frags)
+ return ERR_PTR(-FBNIC_XDP_LEN_ERR);
+
+ act = bpf_prog_run_xdp(xdp_prog, &pkt->buff);
+ switch (act) {
+ case XDP_PASS:
+xdp_pass:
+ return fbnic_build_skb(nv, pkt);
+ case XDP_TX:
+ return ERR_PTR(fbnic_pkt_tx(nv, pkt));
+ default:
+ bpf_warn_invalid_xdp_action(nv->napi.dev, xdp_prog, act);
+ fallthrough;
+ case XDP_ABORTED:
+ trace_xdp_exception(nv->napi.dev, xdp_prog, act);
+ fallthrough;
+ case XDP_DROP:
+ break;
+ }
+
+ return ERR_PTR(-FBNIC_XDP_CONSUME);
}
static enum pkt_hash_types fbnic_skb_hash_type(u64 rcd)
@@ -1050,10 +1215,10 @@ static int fbnic_clean_rcq(struct fbnic_napi_vector *nv,
struct fbnic_q_triad *qt, int budget)
{
unsigned int packets = 0, bytes = 0, dropped = 0, alloc_failed = 0;
- u64 csum_complete = 0, csum_none = 0;
+ u64 csum_complete = 0, csum_none = 0, length_errors = 0;
+ s32 head0 = -1, head1 = -1, pkt_tail = -1;
struct fbnic_ring *rcq = &qt->cmpl;
struct fbnic_pkt_buff *pkt;
- s32 head0 = -1, head1 = -1;
__le64 *raw_rcd, done;
u32 head = rcq->head;
@@ -1094,8 +1259,10 @@ static int fbnic_clean_rcq(struct fbnic_napi_vector *nv,
/* We currently ignore the action table index */
break;
case FBNIC_RCD_TYPE_META:
- if (likely(!fbnic_rcd_metadata_err(rcd)))
- skb = fbnic_build_skb(nv, pkt);
+ if (unlikely(pkt->add_frag_failed))
+ skb = NULL;
+ else if (likely(!fbnic_rcd_metadata_err(rcd)))
+ skb = fbnic_run_xdp(nv, pkt);
/* Populate skb and invalidate XDP */
if (!IS_ERR_OR_NULL(skb)) {
@@ -1107,15 +1274,20 @@ static int fbnic_clean_rcq(struct fbnic_napi_vector *nv,
bytes += skb->len;
napi_gro_receive(&nv->napi, skb);
+ } else if (skb == ERR_PTR(-FBNIC_XDP_TX)) {
+ pkt_tail = nv->qt[0].sub1.tail;
+ bytes += xdp_get_buff_len(&pkt->buff);
} else {
if (!skb) {
alloc_failed++;
dropped++;
+ } else if (skb == ERR_PTR(-FBNIC_XDP_LEN_ERR)) {
+ length_errors++;
} else {
dropped++;
}
- fbnic_put_pkt_buff(nv, pkt, 1);
+ fbnic_put_pkt_buff(qt, pkt, 1);
}
pkt->buff.data_hard_start = NULL;
@@ -1140,16 +1312,20 @@ static int fbnic_clean_rcq(struct fbnic_napi_vector *nv,
rcq->stats.rx.alloc_failed += alloc_failed;
rcq->stats.rx.csum_complete += csum_complete;
rcq->stats.rx.csum_none += csum_none;
+ rcq->stats.rx.length_errors += length_errors;
u64_stats_update_end(&rcq->stats.syncp);
+ if (pkt_tail >= 0)
+ fbnic_pkt_commit_tail(nv, pkt_tail);
+
/* Unmap and free processed buffers */
if (head0 >= 0)
- fbnic_clean_bdq(nv, budget, &qt->sub0, head0);
- fbnic_fill_bdq(nv, &qt->sub0);
+ fbnic_clean_bdq(&qt->sub0, head0, budget);
+ fbnic_fill_bdq(&qt->sub0);
if (head1 >= 0)
- fbnic_clean_bdq(nv, budget, &qt->sub1, head1);
- fbnic_fill_bdq(nv, &qt->sub1);
+ fbnic_clean_bdq(&qt->sub1, head1, budget);
+ fbnic_fill_bdq(&qt->sub1);
/* Record the current head/tail of the queue */
if (rcq->head != head) {
@@ -1220,8 +1396,9 @@ void fbnic_aggregate_ring_rx_counters(struct fbnic_net *fbn,
fbn->rx_stats.rx.alloc_failed += stats->rx.alloc_failed;
fbn->rx_stats.rx.csum_complete += stats->rx.csum_complete;
fbn->rx_stats.rx.csum_none += stats->rx.csum_none;
+ fbn->rx_stats.rx.length_errors += stats->rx.length_errors;
/* Remember to add new stats here */
- BUILD_BUG_ON(sizeof(fbn->rx_stats.rx) / 8 != 3);
+ BUILD_BUG_ON(sizeof(fbn->rx_stats.rx) / 8 != 4);
}
void fbnic_aggregate_ring_tx_counters(struct fbnic_net *fbn,
@@ -1243,6 +1420,22 @@ void fbnic_aggregate_ring_tx_counters(struct fbnic_net *fbn,
BUILD_BUG_ON(sizeof(fbn->tx_stats.twq) / 8 != 6);
}
+static void fbnic_aggregate_ring_xdp_counters(struct fbnic_net *fbn,
+ struct fbnic_ring *xdpr)
+{
+ struct fbnic_queue_stats *stats = &xdpr->stats;
+
+ if (!(xdpr->flags & FBNIC_RING_F_STATS))
+ return;
+
+ /* Capture stats from queues before dissasociating them */
+ fbn->rx_stats.bytes += stats->bytes;
+ fbn->rx_stats.packets += stats->packets;
+ fbn->rx_stats.dropped += stats->dropped;
+ fbn->tx_stats.bytes += stats->bytes;
+ fbn->tx_stats.packets += stats->packets;
+}
+
static void fbnic_remove_tx_ring(struct fbnic_net *fbn,
struct fbnic_ring *txr)
{
@@ -1256,6 +1449,19 @@ static void fbnic_remove_tx_ring(struct fbnic_net *fbn,
fbn->tx[txr->q_idx] = NULL;
}
+static void fbnic_remove_xdp_ring(struct fbnic_net *fbn,
+ struct fbnic_ring *xdpr)
+{
+ if (!(xdpr->flags & FBNIC_RING_F_STATS))
+ return;
+
+ fbnic_aggregate_ring_xdp_counters(fbn, xdpr);
+
+ /* Remove pointer to the Tx ring */
+ WARN_ON(fbn->tx[xdpr->q_idx] && fbn->tx[xdpr->q_idx] != xdpr);
+ fbn->tx[xdpr->q_idx] = NULL;
+}
+
static void fbnic_remove_rx_ring(struct fbnic_net *fbn,
struct fbnic_ring *rxr)
{
@@ -1269,6 +1475,12 @@ static void fbnic_remove_rx_ring(struct fbnic_net *fbn,
fbn->rx[rxr->q_idx] = NULL;
}
+static void fbnic_free_qt_page_pools(struct fbnic_q_triad *qt)
+{
+ page_pool_destroy(qt->sub0.page_pool);
+ page_pool_destroy(qt->sub1.page_pool);
+}
+
static void fbnic_free_napi_vector(struct fbnic_net *fbn,
struct fbnic_napi_vector *nv)
{
@@ -1277,6 +1489,7 @@ static void fbnic_free_napi_vector(struct fbnic_net *fbn,
for (i = 0; i < nv->txt_count; i++) {
fbnic_remove_tx_ring(fbn, &nv->qt[i].sub0);
+ fbnic_remove_xdp_ring(fbn, &nv->qt[i].sub1);
fbnic_remove_tx_ring(fbn, &nv->qt[i].cmpl);
}
@@ -1287,8 +1500,7 @@ static void fbnic_free_napi_vector(struct fbnic_net *fbn,
}
fbnic_napi_free_irq(fbd, nv);
- page_pool_destroy(nv->page_pool);
- netif_napi_del(&nv->napi);
+ netif_napi_del_locked(&nv->napi);
fbn->napi[fbnic_napi_idx(nv)] = NULL;
kfree(nv);
}
@@ -1302,23 +1514,22 @@ void fbnic_free_napi_vectors(struct fbnic_net *fbn)
fbnic_free_napi_vector(fbn, fbn->napi[i]);
}
-#define FBNIC_PAGE_POOL_FLAGS \
- (PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV)
-
-static int fbnic_alloc_nv_page_pool(struct fbnic_net *fbn,
- struct fbnic_napi_vector *nv)
+static int
+fbnic_alloc_qt_page_pools(struct fbnic_net *fbn, struct fbnic_q_triad *qt,
+ unsigned int rxq_idx)
{
struct page_pool_params pp_params = {
.order = 0,
- .flags = FBNIC_PAGE_POOL_FLAGS,
- .pool_size = (fbn->hpq_size + fbn->ppq_size) * nv->rxt_count,
+ .flags = PP_FLAG_DMA_MAP |
+ PP_FLAG_DMA_SYNC_DEV,
+ .pool_size = fbn->hpq_size + fbn->ppq_size,
.nid = NUMA_NO_NODE,
- .dev = nv->dev,
+ .dev = fbn->netdev->dev.parent,
.dma_dir = DMA_BIDIRECTIONAL,
.offset = 0,
.max_len = PAGE_SIZE,
- .napi = &nv->napi,
.netdev = fbn->netdev,
+ .queue_idx = rxq_idx,
};
struct page_pool *pp;
@@ -1338,9 +1549,24 @@ static int fbnic_alloc_nv_page_pool(struct fbnic_net *fbn,
if (IS_ERR(pp))
return PTR_ERR(pp);
- nv->page_pool = pp;
+ qt->sub0.page_pool = pp;
+ if (netif_rxq_has_unreadable_mp(fbn->netdev, rxq_idx)) {
+ pp_params.flags |= PP_FLAG_ALLOW_UNREADABLE_NETMEM;
+ pp_params.dma_dir = DMA_FROM_DEVICE;
+
+ pp = page_pool_create(&pp_params);
+ if (IS_ERR(pp))
+ goto err_destroy_sub0;
+ } else {
+ page_pool_get(pp);
+ }
+ qt->sub1.page_pool = pp;
return 0;
+
+err_destroy_sub0:
+ page_pool_destroy(pp);
+ return PTR_ERR(pp);
}
static void fbnic_ring_init(struct fbnic_ring *ring, u32 __iomem *doorbell,
@@ -1350,6 +1576,7 @@ static void fbnic_ring_init(struct fbnic_ring *ring, u32 __iomem *doorbell,
ring->doorbell = doorbell;
ring->q_idx = q_idx;
ring->flags = flags;
+ ring->deferred_head = -1;
}
static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn,
@@ -1359,11 +1586,18 @@ static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn,
{
int txt_count = txq_count, rxt_count = rxq_count;
u32 __iomem *uc_addr = fbd->uc_addr0;
+ int xdp_count = 0, qt_count, err;
struct fbnic_napi_vector *nv;
struct fbnic_q_triad *qt;
- int qt_count, err;
u32 __iomem *db;
+ /* We need to reserve at least one Tx Queue Triad for an XDP ring */
+ if (rxq_count) {
+ xdp_count = 1;
+ if (!txt_count)
+ txt_count = 1;
+ }
+
qt_count = txt_count + rxq_count;
if (!qt_count)
return -EINVAL;
@@ -1387,37 +1621,32 @@ static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn,
/* Tie napi to netdev */
fbn->napi[fbnic_napi_idx(nv)] = nv;
- netif_napi_add(fbn->netdev, &nv->napi, fbnic_poll);
+ netif_napi_add_locked(fbn->netdev, &nv->napi, fbnic_poll);
/* Record IRQ to NAPI struct */
- netif_napi_set_irq(&nv->napi,
- pci_irq_vector(to_pci_dev(fbd->dev), nv->v_idx));
+ netif_napi_set_irq_locked(&nv->napi,
+ pci_irq_vector(to_pci_dev(fbd->dev),
+ nv->v_idx));
/* Tie nv back to PCIe dev */
nv->dev = fbd->dev;
- /* Allocate page pool */
- if (rxq_count) {
- err = fbnic_alloc_nv_page_pool(fbn, nv);
- if (err)
- goto napi_del;
- }
-
/* Request the IRQ for napi vector */
err = fbnic_napi_request_irq(fbd, nv);
if (err)
- goto pp_destroy;
+ goto napi_del;
/* Initialize queue triads */
qt = nv->qt;
while (txt_count) {
+ u8 flags = FBNIC_RING_F_CTX | FBNIC_RING_F_STATS;
+
/* Configure Tx queue */
db = &uc_addr[FBNIC_QUEUE(txq_idx) + FBNIC_QUEUE_TWQ0_TAIL];
/* Assign Tx queue to netdev if applicable */
if (txq_count > 0) {
- u8 flags = FBNIC_RING_F_CTX | FBNIC_RING_F_STATS;
fbnic_ring_init(&qt->sub0, db, txq_idx, flags);
fbn->tx[txq_idx] = &qt->sub0;
@@ -1427,6 +1656,28 @@ static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn,
FBNIC_RING_F_DISABLED);
}
+ /* Configure XDP queue */
+ db = &uc_addr[FBNIC_QUEUE(txq_idx) + FBNIC_QUEUE_TWQ1_TAIL];
+
+ /* Assign XDP queue to netdev if applicable
+ *
+ * The setup for this is in itself a bit different.
+ * 1. We only need one XDP Tx queue per NAPI vector.
+ * 2. We associate it to the first Rx queue index.
+ * 3. The hardware side is associated based on the Tx Queue.
+ * 4. The netdev queue is offset by FBNIC_MAX_TXQs.
+ */
+ if (xdp_count > 0) {
+ unsigned int xdp_idx = FBNIC_MAX_TXQS + rxq_idx;
+
+ fbnic_ring_init(&qt->sub1, db, xdp_idx, flags);
+ fbn->tx[xdp_idx] = &qt->sub1;
+ xdp_count--;
+ } else {
+ fbnic_ring_init(&qt->sub1, db, 0,
+ FBNIC_RING_F_DISABLED);
+ }
+
/* Configure Tx completion queue */
db = &uc_addr[FBNIC_QUEUE(txq_idx) + FBNIC_QUEUE_TCQ_HEAD];
fbnic_ring_init(&qt->cmpl, db, 0, 0);
@@ -1463,10 +1714,8 @@ static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn,
return 0;
-pp_destroy:
- page_pool_destroy(nv->page_pool);
napi_del:
- netif_napi_del(&nv->napi);
+ netif_napi_del_locked(&nv->napi);
fbn->napi[fbnic_napi_idx(nv)] = NULL;
kfree(nv);
return err;
@@ -1680,6 +1929,12 @@ static void fbnic_free_qt_resources(struct fbnic_net *fbn,
fbnic_free_ring_resources(dev, &qt->cmpl);
fbnic_free_ring_resources(dev, &qt->sub1);
fbnic_free_ring_resources(dev, &qt->sub0);
+
+ if (xdp_rxq_info_is_reg(&qt->xdp_rxq)) {
+ xdp_rxq_info_unreg_mem_model(&qt->xdp_rxq);
+ xdp_rxq_info_unreg(&qt->xdp_rxq);
+ fbnic_free_qt_page_pools(qt);
+ }
}
static int fbnic_alloc_tx_qt_resources(struct fbnic_net *fbn,
@@ -1692,6 +1947,10 @@ static int fbnic_alloc_tx_qt_resources(struct fbnic_net *fbn,
if (err)
return err;
+ err = fbnic_alloc_tx_ring_resources(fbn, &qt->sub1);
+ if (err)
+ goto free_sub0;
+
err = fbnic_alloc_tx_ring_resources(fbn, &qt->cmpl);
if (err)
goto free_sub1;
@@ -1699,20 +1958,37 @@ static int fbnic_alloc_tx_qt_resources(struct fbnic_net *fbn,
return 0;
free_sub1:
+ fbnic_free_ring_resources(dev, &qt->sub1);
+free_sub0:
fbnic_free_ring_resources(dev, &qt->sub0);
return err;
}
static int fbnic_alloc_rx_qt_resources(struct fbnic_net *fbn,
+ struct fbnic_napi_vector *nv,
struct fbnic_q_triad *qt)
{
struct device *dev = fbn->netdev->dev.parent;
int err;
- err = fbnic_alloc_rx_ring_resources(fbn, &qt->sub0);
+ err = fbnic_alloc_qt_page_pools(fbn, qt, qt->cmpl.q_idx);
if (err)
return err;
+ err = xdp_rxq_info_reg(&qt->xdp_rxq, fbn->netdev, qt->sub0.q_idx,
+ nv->napi.napi_id);
+ if (err)
+ goto free_page_pools;
+
+ err = xdp_rxq_info_reg_mem_model(&qt->xdp_rxq, MEM_TYPE_PAGE_POOL,
+ qt->sub0.page_pool);
+ if (err)
+ goto unreg_rxq;
+
+ err = fbnic_alloc_rx_ring_resources(fbn, &qt->sub0);
+ if (err)
+ goto unreg_mm;
+
err = fbnic_alloc_rx_ring_resources(fbn, &qt->sub1);
if (err)
goto free_sub0;
@@ -1727,19 +2003,21 @@ free_sub1:
fbnic_free_ring_resources(dev, &qt->sub1);
free_sub0:
fbnic_free_ring_resources(dev, &qt->sub0);
+unreg_mm:
+ xdp_rxq_info_unreg_mem_model(&qt->xdp_rxq);
+unreg_rxq:
+ xdp_rxq_info_unreg(&qt->xdp_rxq);
+free_page_pools:
+ fbnic_free_qt_page_pools(qt);
return err;
}
static void fbnic_free_nv_resources(struct fbnic_net *fbn,
struct fbnic_napi_vector *nv)
{
- int i, j;
-
- /* Free Tx Resources */
- for (i = 0; i < nv->txt_count; i++)
- fbnic_free_qt_resources(fbn, &nv->qt[i]);
+ int i;
- for (j = 0; j < nv->rxt_count; j++, i++)
+ for (i = 0; i < nv->txt_count + nv->rxt_count; i++)
fbnic_free_qt_resources(fbn, &nv->qt[i]);
}
@@ -1752,19 +2030,19 @@ static int fbnic_alloc_nv_resources(struct fbnic_net *fbn,
for (i = 0; i < nv->txt_count; i++) {
err = fbnic_alloc_tx_qt_resources(fbn, &nv->qt[i]);
if (err)
- goto free_resources;
+ goto free_qt_resources;
}
/* Allocate Rx Resources */
for (j = 0; j < nv->rxt_count; j++, i++) {
- err = fbnic_alloc_rx_qt_resources(fbn, &nv->qt[i]);
+ err = fbnic_alloc_rx_qt_resources(fbn, nv, &nv->qt[i]);
if (err)
- goto free_resources;
+ goto free_qt_resources;
}
return 0;
-free_resources:
+free_qt_resources:
while (i--)
fbnic_free_qt_resources(fbn, &nv->qt[i]);
return err;
@@ -1871,6 +2149,15 @@ static void fbnic_disable_twq0(struct fbnic_ring *txr)
fbnic_ring_wr32(txr, FBNIC_QUEUE_TWQ0_CTL, twq_ctl);
}
+static void fbnic_disable_twq1(struct fbnic_ring *txr)
+{
+ u32 twq_ctl = fbnic_ring_rd32(txr, FBNIC_QUEUE_TWQ1_CTL);
+
+ twq_ctl &= ~FBNIC_QUEUE_TWQ_CTL_ENABLE;
+
+ fbnic_ring_wr32(txr, FBNIC_QUEUE_TWQ1_CTL, twq_ctl);
+}
+
static void fbnic_disable_tcq(struct fbnic_ring *txr)
{
fbnic_ring_wr32(txr, FBNIC_QUEUE_TCQ_CTL, 0);
@@ -1897,36 +2184,48 @@ void fbnic_napi_disable(struct fbnic_net *fbn)
int i;
for (i = 0; i < fbn->num_napi; i++) {
- napi_disable(&fbn->napi[i]->napi);
+ napi_disable_locked(&fbn->napi[i]->napi);
fbnic_nv_irq_disable(fbn->napi[i]);
}
}
-void fbnic_disable(struct fbnic_net *fbn)
+static void __fbnic_nv_disable(struct fbnic_napi_vector *nv)
{
- struct fbnic_dev *fbd = fbn->fbd;
- int i, j, t;
-
- for (i = 0; i < fbn->num_napi; i++) {
- struct fbnic_napi_vector *nv = fbn->napi[i];
+ int i, t;
- /* Disable Tx queue triads */
- for (t = 0; t < nv->txt_count; t++) {
- struct fbnic_q_triad *qt = &nv->qt[t];
+ /* Disable Tx queue triads */
+ for (t = 0; t < nv->txt_count; t++) {
+ struct fbnic_q_triad *qt = &nv->qt[t];
- fbnic_disable_twq0(&qt->sub0);
- fbnic_disable_tcq(&qt->cmpl);
- }
+ fbnic_disable_twq0(&qt->sub0);
+ fbnic_disable_twq1(&qt->sub1);
+ fbnic_disable_tcq(&qt->cmpl);
+ }
- /* Disable Rx queue triads */
- for (j = 0; j < nv->rxt_count; j++, t++) {
- struct fbnic_q_triad *qt = &nv->qt[t];
+ /* Disable Rx queue triads */
+ for (i = 0; i < nv->rxt_count; i++, t++) {
+ struct fbnic_q_triad *qt = &nv->qt[t];
- fbnic_disable_bdq(&qt->sub0, &qt->sub1);
- fbnic_disable_rcq(&qt->cmpl);
- }
+ fbnic_disable_bdq(&qt->sub0, &qt->sub1);
+ fbnic_disable_rcq(&qt->cmpl);
}
+}
+
+static void
+fbnic_nv_disable(struct fbnic_net *fbn, struct fbnic_napi_vector *nv)
+{
+ __fbnic_nv_disable(nv);
+ fbnic_wrfl(fbn->fbd);
+}
+
+void fbnic_disable(struct fbnic_net *fbn)
+{
+ struct fbnic_dev *fbd = fbn->fbd;
+ int i;
+
+ for (i = 0; i < fbn->num_napi; i++)
+ __fbnic_nv_disable(fbn->napi[i]);
fbnic_wrfl(fbd);
}
@@ -2015,73 +2314,119 @@ int fbnic_wait_all_queues_idle(struct fbnic_dev *fbd, bool may_fail)
return err;
}
-void fbnic_flush(struct fbnic_net *fbn)
+static int
+fbnic_wait_queue_idle(struct fbnic_net *fbn, bool rx, unsigned int idx)
{
- int i;
+ static const unsigned int tx_regs[] = {
+ FBNIC_QM_TWQ_IDLE(0), FBNIC_QM_TQS_IDLE(0),
+ FBNIC_QM_TDE_IDLE(0), FBNIC_QM_TCQ_IDLE(0),
+ }, rx_regs[] = {
+ FBNIC_QM_HPQ_IDLE(0), FBNIC_QM_PPQ_IDLE(0),
+ FBNIC_QM_RCQ_IDLE(0),
+ };
+ struct fbnic_dev *fbd = fbn->fbd;
+ unsigned int val, mask, off;
+ const unsigned int *regs;
+ unsigned int reg_cnt;
+ int i, err;
- for (i = 0; i < fbn->num_napi; i++) {
- struct fbnic_napi_vector *nv = fbn->napi[i];
- int j, t;
+ regs = rx ? rx_regs : tx_regs;
+ reg_cnt = rx ? ARRAY_SIZE(rx_regs) : ARRAY_SIZE(tx_regs);
- /* Flush any processed Tx Queue Triads and drop the rest */
- for (t = 0; t < nv->txt_count; t++) {
- struct fbnic_q_triad *qt = &nv->qt[t];
- struct netdev_queue *tx_queue;
+ off = idx / 32;
+ mask = BIT(idx % 32);
- /* Clean the work queues of unprocessed work */
- fbnic_clean_twq0(nv, 0, &qt->sub0, true, qt->sub0.tail);
+ for (i = 0; i < reg_cnt; i++) {
+ err = read_poll_timeout_atomic(fbnic_rd32, val, val & mask,
+ 2, 500000, false,
+ fbd, regs[i] + off);
+ if (err) {
+ netdev_err(fbd->netdev,
+ "wait for queue %s%d idle failed 0x%04x(%d): %08x (mask: %08x)\n",
+ rx ? "Rx" : "Tx", idx, regs[i] + off, i,
+ val, mask);
+ return err;
+ }
+ }
- /* Reset completion queue descriptor ring */
- memset(qt->cmpl.desc, 0, qt->cmpl.size);
+ return 0;
+}
- /* Nothing else to do if Tx queue is disabled */
- if (qt->sub0.flags & FBNIC_RING_F_DISABLED)
- continue;
+static void fbnic_nv_flush(struct fbnic_napi_vector *nv)
+{
+ int j, t;
- /* Reset BQL associated with Tx queue */
- tx_queue = netdev_get_tx_queue(nv->napi.dev,
- qt->sub0.q_idx);
- netdev_tx_reset_queue(tx_queue);
- }
+ /* Flush any processed Tx Queue Triads and drop the rest */
+ for (t = 0; t < nv->txt_count; t++) {
+ struct fbnic_q_triad *qt = &nv->qt[t];
+ struct netdev_queue *tx_queue;
- /* Flush any processed Rx Queue Triads and drop the rest */
- for (j = 0; j < nv->rxt_count; j++, t++) {
- struct fbnic_q_triad *qt = &nv->qt[t];
+ /* Clean the work queues of unprocessed work */
+ fbnic_clean_twq0(nv, 0, &qt->sub0, true, qt->sub0.tail);
+ fbnic_clean_twq1(nv, false, &qt->sub1, true,
+ qt->sub1.tail);
- /* Clean the work queues of unprocessed work */
- fbnic_clean_bdq(nv, 0, &qt->sub0, qt->sub0.tail);
- fbnic_clean_bdq(nv, 0, &qt->sub1, qt->sub1.tail);
+ /* Reset completion queue descriptor ring */
+ memset(qt->cmpl.desc, 0, qt->cmpl.size);
- /* Reset completion queue descriptor ring */
- memset(qt->cmpl.desc, 0, qt->cmpl.size);
+ /* Nothing else to do if Tx queue is disabled */
+ if (qt->sub0.flags & FBNIC_RING_F_DISABLED)
+ continue;
- fbnic_put_pkt_buff(nv, qt->cmpl.pkt, 0);
- qt->cmpl.pkt->buff.data_hard_start = NULL;
- }
+ /* Reset BQL associated with Tx queue */
+ tx_queue = netdev_get_tx_queue(nv->napi.dev,
+ qt->sub0.q_idx);
+ netdev_tx_reset_queue(tx_queue);
+ }
+
+ /* Flush any processed Rx Queue Triads and drop the rest */
+ for (j = 0; j < nv->rxt_count; j++, t++) {
+ struct fbnic_q_triad *qt = &nv->qt[t];
+
+ /* Clean the work queues of unprocessed work */
+ fbnic_clean_bdq(&qt->sub0, qt->sub0.tail, 0);
+ fbnic_clean_bdq(&qt->sub1, qt->sub1.tail, 0);
+
+ /* Reset completion queue descriptor ring */
+ memset(qt->cmpl.desc, 0, qt->cmpl.size);
+
+ fbnic_put_pkt_buff(qt, qt->cmpl.pkt, 0);
+ memset(qt->cmpl.pkt, 0, sizeof(struct fbnic_pkt_buff));
}
}
-void fbnic_fill(struct fbnic_net *fbn)
+void fbnic_flush(struct fbnic_net *fbn)
{
int i;
- for (i = 0; i < fbn->num_napi; i++) {
- struct fbnic_napi_vector *nv = fbn->napi[i];
- int j, t;
+ for (i = 0; i < fbn->num_napi; i++)
+ fbnic_nv_flush(fbn->napi[i]);
+}
- /* Configure NAPI mapping and populate pages
- * in the BDQ rings to use for Rx
- */
- for (j = 0, t = nv->txt_count; j < nv->rxt_count; j++, t++) {
- struct fbnic_q_triad *qt = &nv->qt[t];
+static void fbnic_nv_fill(struct fbnic_napi_vector *nv)
+{
+ int j, t;
- /* Populate the header and payload BDQs */
- fbnic_fill_bdq(nv, &qt->sub0);
- fbnic_fill_bdq(nv, &qt->sub1);
- }
+ /* Configure NAPI mapping and populate pages
+ * in the BDQ rings to use for Rx
+ */
+ for (j = 0, t = nv->txt_count; j < nv->rxt_count; j++, t++) {
+ struct fbnic_q_triad *qt = &nv->qt[t];
+
+ /* Populate the header and payload BDQs */
+ fbnic_fill_bdq(&qt->sub0);
+ fbnic_fill_bdq(&qt->sub1);
}
}
+void fbnic_fill(struct fbnic_net *fbn)
+{
+ int i;
+
+ for (i = 0; i < fbn->num_napi; i++)
+ fbnic_nv_fill(fbn->napi[i]);
+}
+
static void fbnic_enable_twq0(struct fbnic_ring *twq)
{
u32 log_size = fls(twq->size_mask);
@@ -2104,6 +2449,28 @@ static void fbnic_enable_twq0(struct fbnic_ring *twq)
fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ0_CTL, FBNIC_QUEUE_TWQ_CTL_ENABLE);
}
+static void fbnic_enable_twq1(struct fbnic_ring *twq)
+{
+ u32 log_size = fls(twq->size_mask);
+
+ if (!twq->size_mask)
+ return;
+
+ /* Reset head/tail */
+ fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ1_CTL, FBNIC_QUEUE_TWQ_CTL_RESET);
+ twq->tail = 0;
+ twq->head = 0;
+
+ /* Store descriptor ring address and size */
+ fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ1_BAL, lower_32_bits(twq->dma));
+ fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ1_BAH, upper_32_bits(twq->dma));
+
+ /* Write lower 4 bits of log size as 64K ring size is 0 */
+ fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ1_SIZE, log_size & 0xf);
+
+ fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ1_CTL, FBNIC_QUEUE_TWQ_CTL_ENABLE);
+}
+
static void fbnic_enable_tcq(struct fbnic_napi_vector *nv,
struct fbnic_ring *tcq)
{
@@ -2232,13 +2599,22 @@ static void fbnic_enable_rcq(struct fbnic_napi_vector *nv,
{
struct fbnic_net *fbn = netdev_priv(nv->napi.dev);
u32 log_size = fls(rcq->size_mask);
- u32 rcq_ctl;
+ u32 hds_thresh = fbn->hds_thresh;
+ u32 rcq_ctl = 0;
fbnic_config_drop_mode_rcq(nv, rcq);
- rcq_ctl = FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_PADLEN_MASK, FBNIC_RX_PAD) |
- FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_MAX_HDR_MASK,
- FBNIC_RX_MAX_HDR) |
+ /* Force lower bound on MAX_HEADER_BYTES. Below this, all frames should
+ * be split at L4. It would also result in the frames being split at
+ * L2/L3 depending on the frame size.
+ */
+ if (fbn->hds_thresh < FBNIC_HDR_BYTES_MIN) {
+ rcq_ctl = FBNIC_QUEUE_RDE_CTL0_EN_HDR_SPLIT;
+ hds_thresh = FBNIC_HDR_BYTES_MIN;
+ }
+
+ rcq_ctl |= FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_PADLEN_MASK, FBNIC_RX_PAD) |
+ FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_MAX_HDR_MASK, hds_thresh) |
FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_PAYLD_OFF_MASK,
FBNIC_RX_PAYLD_OFFSET) |
FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_PAYLD_PG_CL_MASK,
@@ -2266,32 +2642,47 @@ static void fbnic_enable_rcq(struct fbnic_napi_vector *nv,
fbnic_ring_wr32(rcq, FBNIC_QUEUE_RCQ_CTL, FBNIC_QUEUE_RCQ_CTL_ENABLE);
}
-void fbnic_enable(struct fbnic_net *fbn)
+static void __fbnic_nv_enable(struct fbnic_napi_vector *nv)
{
- struct fbnic_dev *fbd = fbn->fbd;
- int i;
+ int j, t;
- for (i = 0; i < fbn->num_napi; i++) {
- struct fbnic_napi_vector *nv = fbn->napi[i];
- int j, t;
+ /* Setup Tx Queue Triads */
+ for (t = 0; t < nv->txt_count; t++) {
+ struct fbnic_q_triad *qt = &nv->qt[t];
- /* Setup Tx Queue Triads */
- for (t = 0; t < nv->txt_count; t++) {
- struct fbnic_q_triad *qt = &nv->qt[t];
+ fbnic_enable_twq0(&qt->sub0);
+ fbnic_enable_twq1(&qt->sub1);
+ fbnic_enable_tcq(nv, &qt->cmpl);
+ }
- fbnic_enable_twq0(&qt->sub0);
- fbnic_enable_tcq(nv, &qt->cmpl);
- }
+ /* Setup Rx Queue Triads */
+ for (j = 0; j < nv->rxt_count; j++, t++) {
+ struct fbnic_q_triad *qt = &nv->qt[t];
- /* Setup Rx Queue Triads */
- for (j = 0; j < nv->rxt_count; j++, t++) {
- struct fbnic_q_triad *qt = &nv->qt[t];
+ page_pool_enable_direct_recycling(qt->sub0.page_pool,
+ &nv->napi);
+ page_pool_enable_direct_recycling(qt->sub1.page_pool,
+ &nv->napi);
- fbnic_enable_bdq(&qt->sub0, &qt->sub1);
- fbnic_config_drop_mode_rcq(nv, &qt->cmpl);
- fbnic_enable_rcq(nv, &qt->cmpl);
- }
+ fbnic_enable_bdq(&qt->sub0, &qt->sub1);
+ fbnic_config_drop_mode_rcq(nv, &qt->cmpl);
+ fbnic_enable_rcq(nv, &qt->cmpl);
}
+}
+
+static void fbnic_nv_enable(struct fbnic_net *fbn, struct fbnic_napi_vector *nv)
+{
+ __fbnic_nv_enable(nv);
+ fbnic_wrfl(fbn->fbd);
+}
+
+void fbnic_enable(struct fbnic_net *fbn)
+{
+ struct fbnic_dev *fbd = fbn->fbd;
+ int i;
+
+ for (i = 0; i < fbn->num_napi; i++)
+ __fbnic_nv_enable(fbn->napi[i]);
fbnic_wrfl(fbd);
}
@@ -2310,7 +2701,7 @@ void fbnic_napi_enable(struct fbnic_net *fbn)
for (i = 0; i < fbn->num_napi; i++) {
struct fbnic_napi_vector *nv = fbn->napi[i];
- napi_enable(&nv->napi);
+ napi_enable_locked(&nv->napi);
fbnic_nv_irq_enable(nv);
@@ -2363,3 +2754,123 @@ void fbnic_napi_depletion_check(struct net_device *netdev)
fbnic_wrfl(fbd);
}
+
+static int fbnic_queue_mem_alloc(struct net_device *dev, void *qmem, int idx)
+{
+ struct fbnic_net *fbn = netdev_priv(dev);
+ const struct fbnic_q_triad *real;
+ struct fbnic_q_triad *qt = qmem;
+ struct fbnic_napi_vector *nv;
+
+ if (!netif_running(dev))
+ return fbnic_alloc_qt_page_pools(fbn, qt, idx);
+
+ real = container_of(fbn->rx[idx], struct fbnic_q_triad, cmpl);
+ nv = fbn->napi[idx % fbn->num_napi];
+
+ fbnic_ring_init(&qt->sub0, real->sub0.doorbell, real->sub0.q_idx,
+ real->sub0.flags);
+ fbnic_ring_init(&qt->sub1, real->sub1.doorbell, real->sub1.q_idx,
+ real->sub1.flags);
+ fbnic_ring_init(&qt->cmpl, real->cmpl.doorbell, real->cmpl.q_idx,
+ real->cmpl.flags);
+
+ return fbnic_alloc_rx_qt_resources(fbn, nv, qt);
+}
+
+static void fbnic_queue_mem_free(struct net_device *dev, void *qmem)
+{
+ struct fbnic_net *fbn = netdev_priv(dev);
+ struct fbnic_q_triad *qt = qmem;
+
+ if (!netif_running(dev))
+ fbnic_free_qt_page_pools(qt);
+ else
+ fbnic_free_qt_resources(fbn, qt);
+}
+
+static void __fbnic_nv_restart(struct fbnic_net *fbn,
+ struct fbnic_napi_vector *nv)
+{
+ struct fbnic_dev *fbd = fbn->fbd;
+ int i;
+
+ fbnic_nv_enable(fbn, nv);
+ fbnic_nv_fill(nv);
+
+ napi_enable_locked(&nv->napi);
+ fbnic_nv_irq_enable(nv);
+ fbnic_wr32(fbd, FBNIC_INTR_SET(nv->v_idx / 32), BIT(nv->v_idx % 32));
+ fbnic_wrfl(fbd);
+
+ for (i = 0; i < nv->txt_count; i++)
+ netif_wake_subqueue(fbn->netdev, nv->qt[i].sub0.q_idx);
+}
+
+static int fbnic_queue_start(struct net_device *dev, void *qmem, int idx)
+{
+ struct fbnic_net *fbn = netdev_priv(dev);
+ struct fbnic_napi_vector *nv;
+ struct fbnic_q_triad *real;
+
+ real = container_of(fbn->rx[idx], struct fbnic_q_triad, cmpl);
+ nv = fbn->napi[idx % fbn->num_napi];
+
+ fbnic_aggregate_ring_rx_counters(fbn, &real->sub0);
+ fbnic_aggregate_ring_rx_counters(fbn, &real->sub1);
+ fbnic_aggregate_ring_rx_counters(fbn, &real->cmpl);
+
+ memcpy(real, qmem, sizeof(*real));
+
+ __fbnic_nv_restart(fbn, nv);
+
+ return 0;
+}
+
+static int fbnic_queue_stop(struct net_device *dev, void *qmem, int idx)
+{
+ struct fbnic_net *fbn = netdev_priv(dev);
+ const struct fbnic_q_triad *real;
+ struct fbnic_napi_vector *nv;
+ int i, t;
+ int err;
+
+ real = container_of(fbn->rx[idx], struct fbnic_q_triad, cmpl);
+ nv = fbn->napi[idx % fbn->num_napi];
+
+ napi_disable_locked(&nv->napi);
+ fbnic_nv_irq_disable(nv);
+
+ for (i = 0; i < nv->txt_count; i++)
+ netif_stop_subqueue(dev, nv->qt[i].sub0.q_idx);
+ fbnic_nv_disable(fbn, nv);
+
+ for (t = 0; t < nv->txt_count + nv->rxt_count; t++) {
+ err = fbnic_wait_queue_idle(fbn, t >= nv->txt_count,
+ nv->qt[t].sub0.q_idx);
+ if (err)
+ goto err_restart;
+ }
+
+ fbnic_synchronize_irq(fbn->fbd, nv->v_idx);
+ fbnic_nv_flush(nv);
+
+ page_pool_disable_direct_recycling(real->sub0.page_pool);
+ page_pool_disable_direct_recycling(real->sub1.page_pool);
+
+ memcpy(qmem, real, sizeof(*real));
+
+ return 0;
+
+err_restart:
+ __fbnic_nv_restart(fbn, nv);
+ return err;
+}
+
+const struct netdev_queue_mgmt_ops fbnic_queue_mgmt_ops = {
+ .ndo_queue_mem_size = sizeof(struct fbnic_q_triad),
+ .ndo_queue_mem_alloc = fbnic_queue_mem_alloc,
+ .ndo_queue_mem_free = fbnic_queue_mem_free,
+ .ndo_queue_start = fbnic_queue_start,
+ .ndo_queue_stop = fbnic_queue_stop,
+};
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h
index 34693596e5eb..31fac0ba0902 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h
@@ -35,6 +35,7 @@ struct fbnic_net;
#define FBNIC_MAX_TXQS 128u
#define FBNIC_MAX_RXQS 128u
+#define FBNIC_MAX_XDPQS 128u
/* These apply to TWQs, TCQ, RCQ */
#define FBNIC_QUEUE_SIZE_MIN 16u
@@ -50,10 +51,10 @@ struct fbnic_net;
#define FBNIC_RX_TROOM \
SKB_DATA_ALIGN(sizeof(struct skb_shared_info))
+#define FBNIC_RX_HROOM_PAD 128
#define FBNIC_RX_HROOM \
- (ALIGN(FBNIC_RX_TROOM + NET_SKB_PAD, 128) - FBNIC_RX_TROOM)
+ (ALIGN(FBNIC_RX_TROOM + FBNIC_RX_HROOM_PAD, 128) - FBNIC_RX_TROOM)
#define FBNIC_RX_PAD 0
-#define FBNIC_RX_MAX_HDR (1536 - FBNIC_RX_PAD)
#define FBNIC_RX_PAYLD_OFFSET 0
#define FBNIC_RX_PAYLD_PG_CL 0
@@ -61,12 +62,16 @@ struct fbnic_net;
#define FBNIC_RING_F_CTX BIT(1)
#define FBNIC_RING_F_STATS BIT(2) /* Ring's stats may be used */
+#define FBNIC_HDS_THRESH_MAX \
+ (4096 - FBNIC_RX_HROOM - FBNIC_RX_TROOM - FBNIC_RX_PAD)
+#define FBNIC_HDS_THRESH_DEFAULT \
+ (1536 - FBNIC_RX_PAD)
+#define FBNIC_HDR_BYTES_MIN 128
+
struct fbnic_pkt_buff {
struct xdp_buff buff;
ktime_t hwtstamp;
- u32 data_truesize;
- u16 data_len;
- u16 nr_frags;
+ bool add_frag_failed;
};
struct fbnic_queue_stats {
@@ -85,6 +90,7 @@ struct fbnic_queue_stats {
u64 alloc_failed;
u64 csum_complete;
u64 csum_none;
+ u64 length_errors;
} rx;
};
u64 dropped;
@@ -94,7 +100,7 @@ struct fbnic_queue_stats {
#define FBNIC_PAGECNT_BIAS_MAX PAGE_SIZE
struct fbnic_rx_buf {
- struct page *page;
+ netmem_ref netmem;
long pagecnt_bias;
};
@@ -115,6 +121,17 @@ struct fbnic_ring {
u32 head, tail; /* Head/Tail of ring */
+ union {
+ /* Rx BDQs only */
+ struct page_pool *page_pool;
+
+ /* Deferred_head is used to cache the head for TWQ1 if
+ * an attempt is made to clean TWQ1 with zero napi_budget.
+ * We do not use it for any other ring.
+ */
+ s32 deferred_head;
+ };
+
struct fbnic_queue_stats stats;
/* Slow path fields follow */
@@ -124,12 +141,12 @@ struct fbnic_ring {
struct fbnic_q_triad {
struct fbnic_ring sub0, sub1, cmpl;
+ struct xdp_rxq_info xdp_rxq;
};
struct fbnic_napi_vector {
struct napi_struct napi;
struct device *dev; /* Device for DMA unmapping */
- struct page_pool *page_pool;
struct fbnic_dev *fbd;
u16 v_idx;
@@ -139,6 +156,8 @@ struct fbnic_napi_vector {
struct fbnic_q_triad qt[];
};
+extern const struct netdev_queue_mgmt_ops fbnic_queue_mgmt_ops;
+
netdev_tx_t fbnic_xmit_frame(struct sk_buff *skb, struct net_device *dev);
netdev_features_t
fbnic_features_check(struct sk_buff *skb, struct net_device *dev,
diff --git a/drivers/net/ethernet/microchip/lan865x/lan865x.c b/drivers/net/ethernet/microchip/lan865x/lan865x.c
index 79b800d2b72c..b428ad6516c5 100644
--- a/drivers/net/ethernet/microchip/lan865x/lan865x.c
+++ b/drivers/net/ethernet/microchip/lan865x/lan865x.c
@@ -326,6 +326,7 @@ static const struct net_device_ops lan865x_netdev_ops = {
.ndo_start_xmit = lan865x_send_packet,
.ndo_set_rx_mode = lan865x_set_multicast_list,
.ndo_set_mac_address = lan865x_set_mac_address,
+ .ndo_eth_ioctl = phy_do_ioctl_running,
};
static int lan865x_probe(struct spi_device *spi)
diff --git a/drivers/net/ethernet/microsoft/mana/mana_bpf.c b/drivers/net/ethernet/microsoft/mana/mana_bpf.c
index d30721d4516f..7697c9b52ed3 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_bpf.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_bpf.c
@@ -174,6 +174,7 @@ static int mana_xdp_set(struct net_device *ndev, struct bpf_prog *prog,
struct mana_port_context *apc = netdev_priv(ndev);
struct bpf_prog *old_prog;
struct gdma_context *gc;
+ int err;
gc = apc->ac->gdma_dev->gdma_context;
@@ -195,11 +196,45 @@ static int mana_xdp_set(struct net_device *ndev, struct bpf_prog *prog,
*/
apc->bpf_prog = prog;
- if (old_prog)
- bpf_prog_put(old_prog);
+ if (apc->port_is_up) {
+ /* Re-create rxq's after xdp prog was loaded or unloaded.
+ * Ex: re create rxq's to switch from full pages to smaller
+ * size page fragments when xdp prog is unloaded and
+ * vice-versa.
+ */
+
+ /* Pre-allocate buffers to prevent failure in mana_attach */
+ err = mana_pre_alloc_rxbufs(apc, ndev->mtu, apc->num_queues);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "XDP: Insufficient memory for tx/rx re-config");
+ return err;
+ }
+
+ err = mana_detach(ndev, false);
+ if (err) {
+ netdev_err(ndev,
+ "mana_detach failed at xdp set: %d\n", err);
+ NL_SET_ERR_MSG_MOD(extack,
+ "XDP: Re-config failed at detach");
+ goto err_dealloc_rxbuffs;
+ }
+
+ err = mana_attach(ndev);
+ if (err) {
+ netdev_err(ndev,
+ "mana_attach failed at xdp set: %d\n", err);
+ NL_SET_ERR_MSG_MOD(extack,
+ "XDP: Re-config failed at attach");
+ goto err_dealloc_rxbuffs;
+ }
- if (apc->port_is_up)
mana_chn_setxdp(apc, prog);
+ mana_pre_dealloc_rxbufs(apc);
+ }
+
+ if (old_prog)
+ bpf_prog_put(old_prog);
if (prog)
ndev->max_mtu = MANA_XDP_MTU_MAX;
@@ -207,6 +242,11 @@ static int mana_xdp_set(struct net_device *ndev, struct bpf_prog *prog,
ndev->max_mtu = gc->adapter_mtu - ETH_HLEN;
return 0;
+
+err_dealloc_rxbuffs:
+ apc->bpf_prog = old_prog;
+ mana_pre_dealloc_rxbufs(apc);
+ return err;
}
int mana_bpf(struct net_device *ndev, struct netdev_bpf *bpf)
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 550843e2164b..f4fc86f20213 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -57,6 +57,15 @@ static bool mana_en_need_log(struct mana_port_context *apc, int err)
return true;
}
+static void mana_put_rx_page(struct mana_rxq *rxq, struct page *page,
+ bool from_pool)
+{
+ if (from_pool)
+ page_pool_put_full_page(rxq->page_pool, page, false);
+ else
+ put_page(page);
+}
+
/* Microsoft Azure Network Adapter (MANA) functions */
static int mana_open(struct net_device *ndev)
@@ -630,21 +639,40 @@ static void *mana_get_rxbuf_pre(struct mana_rxq *rxq, dma_addr_t *da)
}
/* Get RX buffer's data size, alloc size, XDP headroom based on MTU */
-static void mana_get_rxbuf_cfg(int mtu, u32 *datasize, u32 *alloc_size,
- u32 *headroom)
+static void mana_get_rxbuf_cfg(struct mana_port_context *apc,
+ int mtu, u32 *datasize, u32 *alloc_size,
+ u32 *headroom, u32 *frag_count)
{
- if (mtu > MANA_XDP_MTU_MAX)
- *headroom = 0; /* no support for XDP */
- else
- *headroom = XDP_PACKET_HEADROOM;
+ u32 len, buf_size;
- *alloc_size = SKB_DATA_ALIGN(mtu + MANA_RXBUF_PAD + *headroom);
+ /* Calculate datasize first (consistent across all cases) */
+ *datasize = mtu + ETH_HLEN;
- /* Using page pool in this case, so alloc_size is PAGE_SIZE */
- if (*alloc_size < PAGE_SIZE)
- *alloc_size = PAGE_SIZE;
+ /* For xdp and jumbo frames make sure only one packet fits per page */
+ if (mtu + MANA_RXBUF_PAD > PAGE_SIZE / 2 || mana_xdp_get(apc)) {
+ if (mana_xdp_get(apc)) {
+ *headroom = XDP_PACKET_HEADROOM;
+ *alloc_size = PAGE_SIZE;
+ } else {
+ *headroom = 0; /* no support for XDP */
+ *alloc_size = SKB_DATA_ALIGN(mtu + MANA_RXBUF_PAD +
+ *headroom);
+ }
- *datasize = mtu + ETH_HLEN;
+ *frag_count = 1;
+ return;
+ }
+
+ /* Standard MTU case - optimize for multiple packets per page */
+ *headroom = 0;
+
+ /* Calculate base buffer size needed */
+ len = SKB_DATA_ALIGN(mtu + MANA_RXBUF_PAD + *headroom);
+ buf_size = ALIGN(len, MANA_RX_FRAG_ALIGNMENT);
+
+ /* Calculate how many packets can fit in a page */
+ *frag_count = PAGE_SIZE / buf_size;
+ *alloc_size = buf_size;
}
int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu, int num_queues)
@@ -656,8 +684,9 @@ int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu, int num_qu
void *va;
int i;
- mana_get_rxbuf_cfg(new_mtu, &mpc->rxbpre_datasize,
- &mpc->rxbpre_alloc_size, &mpc->rxbpre_headroom);
+ mana_get_rxbuf_cfg(mpc, new_mtu, &mpc->rxbpre_datasize,
+ &mpc->rxbpre_alloc_size, &mpc->rxbpre_headroom,
+ &mpc->rxbpre_frag_count);
dev = mpc->ac->gdma_dev->gdma_context->dev;
@@ -1842,8 +1871,11 @@ drop_xdp:
drop:
if (from_pool) {
- page_pool_recycle_direct(rxq->page_pool,
- virt_to_head_page(buf_va));
+ if (rxq->frag_count == 1)
+ page_pool_recycle_direct(rxq->page_pool,
+ virt_to_head_page(buf_va));
+ else
+ page_pool_free_va(rxq->page_pool, buf_va, true);
} else {
WARN_ON_ONCE(rxq->xdp_save_va);
/* Save for reuse */
@@ -1859,33 +1891,46 @@ static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev,
dma_addr_t *da, bool *from_pool)
{
struct page *page;
+ u32 offset;
void *va;
-
*from_pool = false;
- /* Reuse XDP dropped page if available */
- if (rxq->xdp_save_va) {
- va = rxq->xdp_save_va;
- rxq->xdp_save_va = NULL;
- } else {
- page = page_pool_dev_alloc_pages(rxq->page_pool);
- if (!page)
+ /* Don't use fragments for jumbo frames or XDP where it's 1 fragment
+ * per page.
+ */
+ if (rxq->frag_count == 1) {
+ /* Reuse XDP dropped page if available */
+ if (rxq->xdp_save_va) {
+ va = rxq->xdp_save_va;
+ page = virt_to_head_page(va);
+ rxq->xdp_save_va = NULL;
+ } else {
+ page = page_pool_dev_alloc_pages(rxq->page_pool);
+ if (!page)
+ return NULL;
+
+ *from_pool = true;
+ va = page_to_virt(page);
+ }
+
+ *da = dma_map_single(dev, va + rxq->headroom, rxq->datasize,
+ DMA_FROM_DEVICE);
+ if (dma_mapping_error(dev, *da)) {
+ mana_put_rx_page(rxq, page, *from_pool);
return NULL;
+ }
- *from_pool = true;
- va = page_to_virt(page);
+ return va;
}
- *da = dma_map_single(dev, va + rxq->headroom, rxq->datasize,
- DMA_FROM_DEVICE);
- if (dma_mapping_error(dev, *da)) {
- if (*from_pool)
- page_pool_put_full_page(rxq->page_pool, page, false);
- else
- put_page(virt_to_head_page(va));
-
+ page = page_pool_dev_alloc_frag(rxq->page_pool, &offset,
+ rxq->alloc_size);
+ if (!page)
return NULL;
- }
+
+ va = page_to_virt(page) + offset;
+ *da = page_pool_get_dma_addr(page) + offset + rxq->headroom;
+ *from_pool = true;
return va;
}
@@ -1902,9 +1947,9 @@ static void mana_refill_rx_oob(struct device *dev, struct mana_rxq *rxq,
va = mana_get_rxfrag(rxq, dev, &da, &from_pool);
if (!va)
return;
-
- dma_unmap_single(dev, rxoob->sgl[0].address, rxq->datasize,
- DMA_FROM_DEVICE);
+ if (!rxoob->from_pool || rxq->frag_count == 1)
+ dma_unmap_single(dev, rxoob->sgl[0].address, rxq->datasize,
+ DMA_FROM_DEVICE);
*old_buf = rxoob->buf_va;
*old_fp = rxoob->from_pool;
@@ -2315,15 +2360,15 @@ static void mana_destroy_rxq(struct mana_port_context *apc,
if (!rx_oob->buf_va)
continue;
- dma_unmap_single(dev, rx_oob->sgl[0].address,
- rx_oob->sgl[0].size, DMA_FROM_DEVICE);
-
page = virt_to_head_page(rx_oob->buf_va);
- if (rx_oob->from_pool)
- page_pool_put_full_page(rxq->page_pool, page, false);
- else
- put_page(page);
+ if (rxq->frag_count == 1 || !rx_oob->from_pool) {
+ dma_unmap_single(dev, rx_oob->sgl[0].address,
+ rx_oob->sgl[0].size, DMA_FROM_DEVICE);
+ mana_put_rx_page(rxq, page, rx_oob->from_pool);
+ } else {
+ page_pool_free_va(rxq->page_pool, rx_oob->buf_va, true);
+ }
rx_oob->buf_va = NULL;
}
@@ -2429,11 +2474,22 @@ static int mana_create_page_pool(struct mana_rxq *rxq, struct gdma_context *gc)
struct page_pool_params pprm = {};
int ret;
- pprm.pool_size = mpc->rx_queue_size;
+ pprm.pool_size = mpc->rx_queue_size / rxq->frag_count + 1;
pprm.nid = gc->numa_node;
pprm.napi = &rxq->rx_cq.napi;
pprm.netdev = rxq->ndev;
pprm.order = get_order(rxq->alloc_size);
+ pprm.queue_idx = rxq->rxq_idx;
+ pprm.dev = gc->dev;
+
+ /* Let the page pool do the dma map when page sharing with multiple
+ * fragments enabled for rx buffers.
+ */
+ if (rxq->frag_count > 1) {
+ pprm.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV;
+ pprm.max_len = PAGE_SIZE;
+ pprm.dma_dir = DMA_FROM_DEVICE;
+ }
rxq->page_pool = page_pool_create(&pprm);
@@ -2472,9 +2528,8 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
rxq->rxq_idx = rxq_idx;
rxq->rxobj = INVALID_MANA_HANDLE;
- mana_get_rxbuf_cfg(ndev->mtu, &rxq->datasize, &rxq->alloc_size,
- &rxq->headroom);
-
+ mana_get_rxbuf_cfg(apc, ndev->mtu, &rxq->datasize, &rxq->alloc_size,
+ &rxq->headroom, &rxq->frag_count);
/* Create page pool for RX queue */
err = mana_create_page_pool(rxq, gc);
if (err) {
diff --git a/drivers/net/ethernet/netronome/nfp/crypto/tls.c b/drivers/net/ethernet/netronome/nfp/crypto/tls.c
index f80f1a6953fa..f252ecdcd2cd 100644
--- a/drivers/net/ethernet/netronome/nfp/crypto/tls.c
+++ b/drivers/net/ethernet/netronome/nfp/crypto/tls.c
@@ -495,14 +495,13 @@ int nfp_net_tls_rx_resync_req(struct net_device *netdev,
switch (ipv6h->version) {
case 4:
- sk = inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
- iph->saddr, th->source, iph->daddr,
- th->dest, netdev->ifindex);
+ sk = inet_lookup_established(net, iph->saddr, th->source,
+ iph->daddr, th->dest,
+ netdev->ifindex);
break;
#if IS_ENABLED(CONFIG_IPV6)
case 6:
- sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
- &ipv6h->saddr, th->source,
+ sk = __inet6_lookup_established(net, &ipv6h->saddr, th->source,
&ipv6h->daddr, ntohs(th->dest),
netdev->ifindex, 0);
break;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
index 80e4675582bf..dde60c4572fa 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/metadata.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
@@ -564,8 +564,8 @@ int nfp_flower_metadata_init(struct nfp_app *app, u64 host_ctx_count,
/* Init ring buffer and unallocated stats_ids. */
priv->stats_ids.free_list.buf =
- vmalloc(array_size(NFP_FL_STATS_ELEM_RS,
- priv->stats_ring_size));
+ vmalloc_array(priv->stats_ring_size,
+ NFP_FL_STATS_ELEM_RS);
if (!priv->stats_ids.free_list.buf)
goto err_free_last_used;
diff --git a/drivers/net/ethernet/netronome/nfp/nfd3/dp.c b/drivers/net/ethernet/netronome/nfp/nfd3/dp.c
index 08086eb76996..91a227929a5f 100644
--- a/drivers/net/ethernet/netronome/nfp/nfd3/dp.c
+++ b/drivers/net/ethernet/netronome/nfp/nfd3/dp.c
@@ -1169,14 +1169,10 @@ int nfp_nfd3_poll(struct napi_struct *napi, int budget)
if (r_vec->nfp_net->rx_coalesce_adapt_on && r_vec->rx_ring) {
struct dim_sample dim_sample = {};
- unsigned int start;
u64 pkts, bytes;
- do {
- start = u64_stats_fetch_begin(&r_vec->rx_sync);
- pkts = r_vec->rx_pkts;
- bytes = r_vec->rx_bytes;
- } while (u64_stats_fetch_retry(&r_vec->rx_sync, start));
+ pkts = r_vec->rx_pkts;
+ bytes = r_vec->rx_bytes;
dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample);
net_dim(&r_vec->rx_dim, &dim_sample);
@@ -1184,14 +1180,10 @@ int nfp_nfd3_poll(struct napi_struct *napi, int budget)
if (r_vec->nfp_net->tx_coalesce_adapt_on && r_vec->tx_ring) {
struct dim_sample dim_sample = {};
- unsigned int start;
u64 pkts, bytes;
- do {
- start = u64_stats_fetch_begin(&r_vec->tx_sync);
- pkts = r_vec->tx_pkts;
- bytes = r_vec->tx_bytes;
- } while (u64_stats_fetch_retry(&r_vec->tx_sync, start));
+ pkts = r_vec->tx_pkts;
+ bytes = r_vec->tx_bytes;
dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample);
net_dim(&r_vec->tx_dim, &dim_sample);
diff --git a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c
index ab3cd06ed63e..ee0db3d5fd66 100644
--- a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c
+++ b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c
@@ -1279,14 +1279,10 @@ int nfp_nfdk_poll(struct napi_struct *napi, int budget)
if (r_vec->nfp_net->rx_coalesce_adapt_on && r_vec->rx_ring) {
struct dim_sample dim_sample = {};
- unsigned int start;
u64 pkts, bytes;
- do {
- start = u64_stats_fetch_begin(&r_vec->rx_sync);
- pkts = r_vec->rx_pkts;
- bytes = r_vec->rx_bytes;
- } while (u64_stats_fetch_retry(&r_vec->rx_sync, start));
+ pkts = r_vec->rx_pkts;
+ bytes = r_vec->rx_bytes;
dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample);
net_dim(&r_vec->rx_dim, &dim_sample);
@@ -1294,14 +1290,10 @@ int nfp_nfdk_poll(struct napi_struct *napi, int budget)
if (r_vec->nfp_net->tx_coalesce_adapt_on && r_vec->tx_ring) {
struct dim_sample dim_sample = {};
- unsigned int start;
u64 pkts, bytes;
- do {
- start = u64_stats_fetch_begin(&r_vec->tx_sync);
- pkts = r_vec->tx_pkts;
- bytes = r_vec->tx_bytes;
- } while (u64_stats_fetch_retry(&r_vec->tx_sync, start));
+ pkts = r_vec->tx_pkts;
+ bytes = r_vec->tx_bytes;
dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample);
net_dim(&r_vec->tx_dim, &dim_sample);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_devlink.c b/drivers/net/ethernet/qlogic/qed/qed_devlink.c
index 1adc7fbb3f2f..94c5689b5abd 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_devlink.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_devlink.c
@@ -87,20 +87,21 @@ qed_fw_fatal_reporter_recover(struct devlink_health_reporter *reporter,
return 0;
}
+#define QED_REPORTER_FW_GRACEFUL_PERIOD 0
+
static const struct devlink_health_reporter_ops qed_fw_fatal_reporter_ops = {
.name = "fw_fatal",
.recover = qed_fw_fatal_reporter_recover,
.dump = qed_fw_fatal_reporter_dump,
+ .default_graceful_period = QED_REPORTER_FW_GRACEFUL_PERIOD,
};
-#define QED_REPORTER_FW_GRACEFUL_PERIOD 0
-
void qed_fw_reporters_create(struct devlink *devlink)
{
struct qed_devlink *dl = devlink_priv(devlink);
- dl->fw_reporter = devlink_health_reporter_create(devlink, &qed_fw_fatal_reporter_ops,
- QED_REPORTER_FW_GRACEFUL_PERIOD, dl);
+ dl->fw_reporter = devlink_health_reporter_create(devlink,
+ &qed_fw_fatal_reporter_ops, dl);
if (IS_ERR(dl->fw_reporter)) {
DP_NOTICE(dl->cdev, "Failed to create fw reporter, err = %ld\n",
PTR_ERR(dl->fw_reporter));
diff --git a/drivers/net/ethernet/qualcomm/Kconfig b/drivers/net/ethernet/qualcomm/Kconfig
index a4434eb38950..ba7efb108637 100644
--- a/drivers/net/ethernet/qualcomm/Kconfig
+++ b/drivers/net/ethernet/qualcomm/Kconfig
@@ -60,6 +60,21 @@ config QCOM_EMAC
low power, Receive-Side Scaling (RSS), and IEEE 1588-2008
Precision Clock Synchronization Protocol.
+config QCOM_PPE
+ tristate "Qualcomm Technologies, Inc. PPE Ethernet support"
+ depends on COMMON_CLK && HAS_IOMEM && OF
+ depends on ARCH_QCOM || COMPILE_TEST
+ select REGMAP_MMIO
+ help
+ This driver supports the Qualcomm Technologies, Inc. packet
+ process engine (PPE) available with IPQ SoC. The PPE includes
+ the Ethernet MACs, Ethernet DMA (EDMA) and switch core that
+ supports L3 flow offload, L2 switch function, RSS and tunnel
+ offload.
+
+ To compile this driver as a module, choose M here. The module
+ will be called qcom-ppe.
+
source "drivers/net/ethernet/qualcomm/rmnet/Kconfig"
endif # NET_VENDOR_QUALCOMM
diff --git a/drivers/net/ethernet/qualcomm/Makefile b/drivers/net/ethernet/qualcomm/Makefile
index 9250976dd884..166a59aea363 100644
--- a/drivers/net/ethernet/qualcomm/Makefile
+++ b/drivers/net/ethernet/qualcomm/Makefile
@@ -11,4 +11,5 @@ qcauart-objs := qca_uart.o
obj-y += emac/
+obj-$(CONFIG_QCOM_PPE) += ppe/
obj-$(CONFIG_RMNET) += rmnet/
diff --git a/drivers/net/ethernet/qualcomm/ppe/Makefile b/drivers/net/ethernet/qualcomm/ppe/Makefile
new file mode 100644
index 000000000000..9e60b2400c16
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/ppe/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Makefile for the device driver of PPE (Packet Process Engine) in IPQ SoC
+#
+
+obj-$(CONFIG_QCOM_PPE) += qcom-ppe.o
+qcom-ppe-objs := ppe.o ppe_config.o ppe_debugfs.o
diff --git a/drivers/net/ethernet/qualcomm/ppe/ppe.c b/drivers/net/ethernet/qualcomm/ppe/ppe.c
new file mode 100644
index 000000000000..be747510d947
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/ppe/ppe.c
@@ -0,0 +1,239 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+ */
+
+/* PPE platform device probe, DTSI parser and PPE clock initializations. */
+
+#include <linux/clk.h>
+#include <linux/interconnect.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/reset.h>
+
+#include "ppe.h"
+#include "ppe_config.h"
+#include "ppe_debugfs.h"
+
+#define PPE_PORT_MAX 8
+#define PPE_CLK_RATE 353000000
+
+/* ICC clocks for enabling PPE device. The avg_bw and peak_bw with value 0
+ * will be updated by the clock rate of PPE.
+ */
+static const struct icc_bulk_data ppe_icc_data[] = {
+ {
+ .name = "ppe",
+ .avg_bw = 0,
+ .peak_bw = 0,
+ },
+ {
+ .name = "ppe_cfg",
+ .avg_bw = 0,
+ .peak_bw = 0,
+ },
+ {
+ .name = "qos_gen",
+ .avg_bw = 6000,
+ .peak_bw = 6000,
+ },
+ {
+ .name = "timeout_ref",
+ .avg_bw = 6000,
+ .peak_bw = 6000,
+ },
+ {
+ .name = "nssnoc_memnoc",
+ .avg_bw = 533333,
+ .peak_bw = 533333,
+ },
+ {
+ .name = "memnoc_nssnoc",
+ .avg_bw = 533333,
+ .peak_bw = 533333,
+ },
+ {
+ .name = "memnoc_nssnoc_1",
+ .avg_bw = 533333,
+ .peak_bw = 533333,
+ },
+};
+
+static const struct regmap_range ppe_readable_ranges[] = {
+ regmap_reg_range(0x0, 0x1ff), /* Global */
+ regmap_reg_range(0x400, 0x5ff), /* LPI CSR */
+ regmap_reg_range(0x1000, 0x11ff), /* GMAC0 */
+ regmap_reg_range(0x1200, 0x13ff), /* GMAC1 */
+ regmap_reg_range(0x1400, 0x15ff), /* GMAC2 */
+ regmap_reg_range(0x1600, 0x17ff), /* GMAC3 */
+ regmap_reg_range(0x1800, 0x19ff), /* GMAC4 */
+ regmap_reg_range(0x1a00, 0x1bff), /* GMAC5 */
+ regmap_reg_range(0xb000, 0xefff), /* PRX CSR */
+ regmap_reg_range(0xf000, 0x1efff), /* IPE */
+ regmap_reg_range(0x20000, 0x5ffff), /* PTX CSR */
+ regmap_reg_range(0x60000, 0x9ffff), /* IPE L2 CSR */
+ regmap_reg_range(0xb0000, 0xeffff), /* IPO CSR */
+ regmap_reg_range(0x100000, 0x17ffff), /* IPE PC */
+ regmap_reg_range(0x180000, 0x1bffff), /* PRE IPO CSR */
+ regmap_reg_range(0x1d0000, 0x1dffff), /* Tunnel parser */
+ regmap_reg_range(0x1e0000, 0x1effff), /* Ingress parse */
+ regmap_reg_range(0x200000, 0x2fffff), /* IPE L3 */
+ regmap_reg_range(0x300000, 0x3fffff), /* IPE tunnel */
+ regmap_reg_range(0x400000, 0x4fffff), /* Scheduler */
+ regmap_reg_range(0x500000, 0x503fff), /* XGMAC0 */
+ regmap_reg_range(0x504000, 0x507fff), /* XGMAC1 */
+ regmap_reg_range(0x508000, 0x50bfff), /* XGMAC2 */
+ regmap_reg_range(0x50c000, 0x50ffff), /* XGMAC3 */
+ regmap_reg_range(0x510000, 0x513fff), /* XGMAC4 */
+ regmap_reg_range(0x514000, 0x517fff), /* XGMAC5 */
+ regmap_reg_range(0x600000, 0x6fffff), /* BM */
+ regmap_reg_range(0x800000, 0x9fffff), /* QM */
+ regmap_reg_range(0xb00000, 0xbef800), /* EDMA */
+};
+
+static const struct regmap_access_table ppe_reg_table = {
+ .yes_ranges = ppe_readable_ranges,
+ .n_yes_ranges = ARRAY_SIZE(ppe_readable_ranges),
+};
+
+static const struct regmap_config regmap_config_ipq9574 = {
+ .reg_bits = 32,
+ .reg_stride = 4,
+ .val_bits = 32,
+ .rd_table = &ppe_reg_table,
+ .wr_table = &ppe_reg_table,
+ .max_register = 0xbef800,
+ .fast_io = true,
+};
+
+static int ppe_clock_init_and_reset(struct ppe_device *ppe_dev)
+{
+ unsigned long ppe_rate = ppe_dev->clk_rate;
+ struct device *dev = ppe_dev->dev;
+ struct reset_control *rstc;
+ struct clk_bulk_data *clks;
+ struct clk *clk;
+ int ret, i;
+
+ for (i = 0; i < ppe_dev->num_icc_paths; i++) {
+ ppe_dev->icc_paths[i].name = ppe_icc_data[i].name;
+ ppe_dev->icc_paths[i].avg_bw = ppe_icc_data[i].avg_bw ? :
+ Bps_to_icc(ppe_rate);
+
+ /* PPE does not have an explicit peak bandwidth requirement,
+ * so set the peak bandwidth to be equal to the average
+ * bandwidth.
+ */
+ ppe_dev->icc_paths[i].peak_bw = ppe_icc_data[i].peak_bw ? :
+ Bps_to_icc(ppe_rate);
+ }
+
+ ret = devm_of_icc_bulk_get(dev, ppe_dev->num_icc_paths,
+ ppe_dev->icc_paths);
+ if (ret)
+ return ret;
+
+ ret = icc_bulk_set_bw(ppe_dev->num_icc_paths, ppe_dev->icc_paths);
+ if (ret)
+ return ret;
+
+ /* The PPE clocks have a common parent clock. Setting the clock
+ * rate of "ppe" ensures the clock rate of all PPE clocks is
+ * configured to the same rate.
+ */
+ clk = devm_clk_get(dev, "ppe");
+ if (IS_ERR(clk))
+ return PTR_ERR(clk);
+
+ ret = clk_set_rate(clk, ppe_rate);
+ if (ret)
+ return ret;
+
+ ret = devm_clk_bulk_get_all_enabled(dev, &clks);
+ if (ret < 0)
+ return ret;
+
+ /* Reset the PPE. */
+ rstc = devm_reset_control_get_exclusive(dev, NULL);
+ if (IS_ERR(rstc))
+ return PTR_ERR(rstc);
+
+ ret = reset_control_assert(rstc);
+ if (ret)
+ return ret;
+
+ /* The delay 10 ms of assert is necessary for resetting PPE. */
+ usleep_range(10000, 11000);
+
+ return reset_control_deassert(rstc);
+}
+
+static int qcom_ppe_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct ppe_device *ppe_dev;
+ void __iomem *base;
+ int ret, num_icc;
+
+ num_icc = ARRAY_SIZE(ppe_icc_data);
+ ppe_dev = devm_kzalloc(dev, struct_size(ppe_dev, icc_paths, num_icc),
+ GFP_KERNEL);
+ if (!ppe_dev)
+ return -ENOMEM;
+
+ base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(base))
+ return dev_err_probe(dev, PTR_ERR(base), "PPE ioremap failed\n");
+
+ ppe_dev->regmap = devm_regmap_init_mmio(dev, base, &regmap_config_ipq9574);
+ if (IS_ERR(ppe_dev->regmap))
+ return dev_err_probe(dev, PTR_ERR(ppe_dev->regmap),
+ "PPE initialize regmap failed\n");
+ ppe_dev->dev = dev;
+ ppe_dev->clk_rate = PPE_CLK_RATE;
+ ppe_dev->num_ports = PPE_PORT_MAX;
+ ppe_dev->num_icc_paths = num_icc;
+
+ ret = ppe_clock_init_and_reset(ppe_dev);
+ if (ret)
+ return dev_err_probe(dev, ret, "PPE clock config failed\n");
+
+ ret = ppe_hw_config(ppe_dev);
+ if (ret)
+ return dev_err_probe(dev, ret, "PPE HW config failed\n");
+
+ ppe_debugfs_setup(ppe_dev);
+ platform_set_drvdata(pdev, ppe_dev);
+
+ return 0;
+}
+
+static void qcom_ppe_remove(struct platform_device *pdev)
+{
+ struct ppe_device *ppe_dev;
+
+ ppe_dev = platform_get_drvdata(pdev);
+ ppe_debugfs_teardown(ppe_dev);
+}
+
+static const struct of_device_id qcom_ppe_of_match[] = {
+ { .compatible = "qcom,ipq9574-ppe" },
+ {}
+};
+MODULE_DEVICE_TABLE(of, qcom_ppe_of_match);
+
+static struct platform_driver qcom_ppe_driver = {
+ .driver = {
+ .name = "qcom_ppe",
+ .of_match_table = qcom_ppe_of_match,
+ },
+ .probe = qcom_ppe_probe,
+ .remove = qcom_ppe_remove,
+};
+module_platform_driver(qcom_ppe_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Qualcomm Technologies, Inc. IPQ PPE driver");
diff --git a/drivers/net/ethernet/qualcomm/ppe/ppe.h b/drivers/net/ethernet/qualcomm/ppe/ppe.h
new file mode 100644
index 000000000000..27458f0bc206
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/ppe/ppe.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ *
+ * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+ */
+
+#ifndef __PPE_H__
+#define __PPE_H__
+
+#include <linux/compiler.h>
+#include <linux/interconnect.h>
+
+struct device;
+struct regmap;
+struct dentry;
+
+/**
+ * struct ppe_device - PPE device private data.
+ * @dev: PPE device structure.
+ * @regmap: PPE register map.
+ * @clk_rate: PPE clock rate.
+ * @num_ports: Number of PPE ports.
+ * @debugfs_root: Debugfs root entry.
+ * @num_icc_paths: Number of interconnect paths.
+ * @icc_paths: Interconnect path array.
+ *
+ * PPE device is the instance of PPE hardware, which is used to
+ * configure PPE packet process modules such as BM (buffer management),
+ * QM (queue management), and scheduler.
+ */
+struct ppe_device {
+ struct device *dev;
+ struct regmap *regmap;
+ unsigned long clk_rate;
+ unsigned int num_ports;
+ struct dentry *debugfs_root;
+ unsigned int num_icc_paths;
+ struct icc_bulk_data icc_paths[] __counted_by(num_icc_paths);
+};
+#endif
diff --git a/drivers/net/ethernet/qualcomm/ppe/ppe_config.c b/drivers/net/ethernet/qualcomm/ppe/ppe_config.c
new file mode 100644
index 000000000000..e9a0e22907a6
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/ppe/ppe_config.c
@@ -0,0 +1,2034 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+ */
+
+/* PPE HW initialization configs such as BM(buffer management),
+ * QM(queue management) and scheduler configs.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/bitmap.h>
+#include <linux/bits.h>
+#include <linux/device.h>
+#include <linux/regmap.h>
+
+#include "ppe.h"
+#include "ppe_config.h"
+#include "ppe_regs.h"
+
+#define PPE_QUEUE_SCH_PRI_NUM 8
+
+/**
+ * struct ppe_bm_port_config - PPE BM port configuration.
+ * @port_id_start: The fist BM port ID to configure.
+ * @port_id_end: The last BM port ID to configure.
+ * @pre_alloc: BM port dedicated buffer number.
+ * @in_fly_buf: Buffer number for receiving the packet after pause frame sent.
+ * @ceil: Ceil to generate the back pressure.
+ * @weight: Weight value.
+ * @resume_offset: Resume offset from the threshold value.
+ * @resume_ceil: Ceil to resume from the back pressure state.
+ * @dynamic: Dynamic threshold used or not.
+ *
+ * The is for configuring the threshold that impacts the port
+ * flow control.
+ */
+struct ppe_bm_port_config {
+ unsigned int port_id_start;
+ unsigned int port_id_end;
+ unsigned int pre_alloc;
+ unsigned int in_fly_buf;
+ unsigned int ceil;
+ unsigned int weight;
+ unsigned int resume_offset;
+ unsigned int resume_ceil;
+ bool dynamic;
+};
+
+/**
+ * struct ppe_qm_queue_config - PPE queue config.
+ * @queue_start: PPE start of queue ID.
+ * @queue_end: PPE end of queue ID.
+ * @prealloc_buf: Queue dedicated buffer number.
+ * @ceil: Ceil to start drop packet from queue.
+ * @weight: Weight value.
+ * @resume_offset: Resume offset from the threshold.
+ * @dynamic: Threshold value is decided dynamically or statically.
+ *
+ * Queue configuration decides the threshold to drop packet from PPE
+ * hardware queue.
+ */
+struct ppe_qm_queue_config {
+ unsigned int queue_start;
+ unsigned int queue_end;
+ unsigned int prealloc_buf;
+ unsigned int ceil;
+ unsigned int weight;
+ unsigned int resume_offset;
+ bool dynamic;
+};
+
+/**
+ * enum ppe_scheduler_direction - PPE scheduler direction for packet.
+ * @PPE_SCH_INGRESS: Scheduler for the packet on ingress,
+ * @PPE_SCH_EGRESS: Scheduler for the packet on egress,
+ */
+enum ppe_scheduler_direction {
+ PPE_SCH_INGRESS = 0,
+ PPE_SCH_EGRESS = 1,
+};
+
+/**
+ * struct ppe_scheduler_bm_config - PPE arbitration for buffer config.
+ * @valid: Arbitration entry valid or not.
+ * @dir: Arbitration entry for egress or ingress.
+ * @port: Port ID to use arbitration entry.
+ * @backup_port_valid: Backup port valid or not.
+ * @backup_port: Backup port ID to use.
+ *
+ * Configure the scheduler settings for accessing and releasing the PPE buffers.
+ */
+struct ppe_scheduler_bm_config {
+ bool valid;
+ enum ppe_scheduler_direction dir;
+ unsigned int port;
+ bool backup_port_valid;
+ unsigned int backup_port;
+};
+
+/**
+ * struct ppe_scheduler_qm_config - PPE arbitration for scheduler config.
+ * @ensch_port_bmp: Port bit map for enqueue scheduler.
+ * @ensch_port: Port ID to enqueue scheduler.
+ * @desch_port: Port ID to dequeue scheduler.
+ * @desch_backup_port_valid: Dequeue for the backup port valid or not.
+ * @desch_backup_port: Backup port ID to dequeue scheduler.
+ *
+ * Configure the scheduler settings for enqueuing and dequeuing packets on
+ * the PPE port.
+ */
+struct ppe_scheduler_qm_config {
+ unsigned int ensch_port_bmp;
+ unsigned int ensch_port;
+ unsigned int desch_port;
+ bool desch_backup_port_valid;
+ unsigned int desch_backup_port;
+};
+
+/**
+ * struct ppe_scheduler_port_config - PPE port scheduler config.
+ * @port: Port ID to be scheduled.
+ * @flow_level: Scheduler flow level or not.
+ * @node_id: Node ID, for level 0, queue ID is used.
+ * @loop_num: Loop number of scheduler config.
+ * @pri_max: Max priority configured.
+ * @flow_id: Strict priority ID.
+ * @drr_node_id: Node ID for scheduler.
+ *
+ * PPE port scheduler configuration which decides the priority in the
+ * packet scheduler for the egress port.
+ */
+struct ppe_scheduler_port_config {
+ unsigned int port;
+ bool flow_level;
+ unsigned int node_id;
+ unsigned int loop_num;
+ unsigned int pri_max;
+ unsigned int flow_id;
+ unsigned int drr_node_id;
+};
+
+/**
+ * struct ppe_port_schedule_resource - PPE port scheduler resource.
+ * @ucastq_start: Unicast queue start ID.
+ * @ucastq_end: Unicast queue end ID.
+ * @mcastq_start: Multicast queue start ID.
+ * @mcastq_end: Multicast queue end ID.
+ * @flow_id_start: Flow start ID.
+ * @flow_id_end: Flow end ID.
+ * @l0node_start: Scheduler node start ID for queue level.
+ * @l0node_end: Scheduler node end ID for queue level.
+ * @l1node_start: Scheduler node start ID for flow level.
+ * @l1node_end: Scheduler node end ID for flow level.
+ *
+ * PPE scheduler resource allocated among the PPE ports.
+ */
+struct ppe_port_schedule_resource {
+ unsigned int ucastq_start;
+ unsigned int ucastq_end;
+ unsigned int mcastq_start;
+ unsigned int mcastq_end;
+ unsigned int flow_id_start;
+ unsigned int flow_id_end;
+ unsigned int l0node_start;
+ unsigned int l0node_end;
+ unsigned int l1node_start;
+ unsigned int l1node_end;
+};
+
+/* There are total 2048 buffers available in PPE, out of which some
+ * buffers are reserved for some specific purposes per PPE port. The
+ * rest of the pool of 1550 buffers are assigned to the general 'group0'
+ * which is shared among all ports of the PPE.
+ */
+static const int ipq9574_ppe_bm_group_config = 1550;
+
+/* The buffer configurations per PPE port. There are 15 BM ports and
+ * 4 BM groups supported by PPE. BM port (0-7) is for EDMA port 0,
+ * BM port (8-13) is for PPE physical port 1-6 and BM port 14 is for
+ * EIP port.
+ */
+static const struct ppe_bm_port_config ipq9574_ppe_bm_port_config[] = {
+ {
+ /* Buffer configuration for the BM port ID 0 of EDMA. */
+ .port_id_start = 0,
+ .port_id_end = 0,
+ .pre_alloc = 0,
+ .in_fly_buf = 100,
+ .ceil = 1146,
+ .weight = 7,
+ .resume_offset = 8,
+ .resume_ceil = 0,
+ .dynamic = true,
+ },
+ {
+ /* Buffer configuration for the BM port ID 1-7 of EDMA. */
+ .port_id_start = 1,
+ .port_id_end = 7,
+ .pre_alloc = 0,
+ .in_fly_buf = 100,
+ .ceil = 250,
+ .weight = 4,
+ .resume_offset = 36,
+ .resume_ceil = 0,
+ .dynamic = true,
+ },
+ {
+ /* Buffer configuration for the BM port ID 8-13 of PPE ports. */
+ .port_id_start = 8,
+ .port_id_end = 13,
+ .pre_alloc = 0,
+ .in_fly_buf = 128,
+ .ceil = 250,
+ .weight = 4,
+ .resume_offset = 36,
+ .resume_ceil = 0,
+ .dynamic = true,
+ },
+ {
+ /* Buffer configuration for the BM port ID 14 of EIP. */
+ .port_id_start = 14,
+ .port_id_end = 14,
+ .pre_alloc = 0,
+ .in_fly_buf = 40,
+ .ceil = 250,
+ .weight = 4,
+ .resume_offset = 36,
+ .resume_ceil = 0,
+ .dynamic = true,
+ },
+};
+
+/* QM fetches the packet from PPE buffer management for transmitting the
+ * packet out. The QM group configuration limits the total number of buffers
+ * enqueued by all PPE hardware queues.
+ * There are total 2048 buffers available, out of which some buffers are
+ * dedicated to hardware exception handlers. The remaining buffers are
+ * assigned to the general 'group0', which is the group assigned to all
+ * queues by default.
+ */
+static const int ipq9574_ppe_qm_group_config = 2000;
+
+/* Default QM settings for unicast and multicast queues for IPQ9754. */
+static const struct ppe_qm_queue_config ipq9574_ppe_qm_queue_config[] = {
+ {
+ /* QM settings for unicast queues 0 to 255. */
+ .queue_start = 0,
+ .queue_end = 255,
+ .prealloc_buf = 0,
+ .ceil = 1200,
+ .weight = 7,
+ .resume_offset = 36,
+ .dynamic = true,
+ },
+ {
+ /* QM settings for multicast queues 256 to 299. */
+ .queue_start = 256,
+ .queue_end = 299,
+ .prealloc_buf = 0,
+ .ceil = 250,
+ .weight = 0,
+ .resume_offset = 36,
+ .dynamic = false,
+ },
+};
+
+/* PPE scheduler configuration for BM includes multiple entries. Each entry
+ * indicates the primary port to be assigned the buffers for the ingress or
+ * to release the buffers for the egress. Backup port ID will be used when
+ * the primary port ID is down.
+ */
+static const struct ppe_scheduler_bm_config ipq9574_ppe_sch_bm_config[] = {
+ {true, PPE_SCH_INGRESS, 0, false, 0},
+ {true, PPE_SCH_EGRESS, 0, false, 0},
+ {true, PPE_SCH_INGRESS, 5, false, 0},
+ {true, PPE_SCH_EGRESS, 5, false, 0},
+ {true, PPE_SCH_INGRESS, 6, false, 0},
+ {true, PPE_SCH_EGRESS, 6, false, 0},
+ {true, PPE_SCH_INGRESS, 1, false, 0},
+ {true, PPE_SCH_EGRESS, 1, false, 0},
+ {true, PPE_SCH_INGRESS, 0, false, 0},
+ {true, PPE_SCH_EGRESS, 0, false, 0},
+ {true, PPE_SCH_INGRESS, 5, false, 0},
+ {true, PPE_SCH_EGRESS, 5, false, 0},
+ {true, PPE_SCH_INGRESS, 6, false, 0},
+ {true, PPE_SCH_EGRESS, 6, false, 0},
+ {true, PPE_SCH_INGRESS, 7, false, 0},
+ {true, PPE_SCH_EGRESS, 7, false, 0},
+ {true, PPE_SCH_INGRESS, 0, false, 0},
+ {true, PPE_SCH_EGRESS, 0, false, 0},
+ {true, PPE_SCH_INGRESS, 1, false, 0},
+ {true, PPE_SCH_EGRESS, 1, false, 0},
+ {true, PPE_SCH_INGRESS, 5, false, 0},
+ {true, PPE_SCH_EGRESS, 5, false, 0},
+ {true, PPE_SCH_INGRESS, 6, false, 0},
+ {true, PPE_SCH_EGRESS, 6, false, 0},
+ {true, PPE_SCH_INGRESS, 2, false, 0},
+ {true, PPE_SCH_EGRESS, 2, false, 0},
+ {true, PPE_SCH_INGRESS, 0, false, 0},
+ {true, PPE_SCH_EGRESS, 0, false, 0},
+ {true, PPE_SCH_INGRESS, 5, false, 0},
+ {true, PPE_SCH_EGRESS, 5, false, 0},
+ {true, PPE_SCH_INGRESS, 6, false, 0},
+ {true, PPE_SCH_EGRESS, 6, false, 0},
+ {true, PPE_SCH_INGRESS, 1, false, 0},
+ {true, PPE_SCH_EGRESS, 1, false, 0},
+ {true, PPE_SCH_INGRESS, 3, false, 0},
+ {true, PPE_SCH_EGRESS, 3, false, 0},
+ {true, PPE_SCH_INGRESS, 0, false, 0},
+ {true, PPE_SCH_EGRESS, 0, false, 0},
+ {true, PPE_SCH_INGRESS, 5, false, 0},
+ {true, PPE_SCH_EGRESS, 5, false, 0},
+ {true, PPE_SCH_INGRESS, 6, false, 0},
+ {true, PPE_SCH_EGRESS, 6, false, 0},
+ {true, PPE_SCH_INGRESS, 7, false, 0},
+ {true, PPE_SCH_EGRESS, 7, false, 0},
+ {true, PPE_SCH_INGRESS, 0, false, 0},
+ {true, PPE_SCH_EGRESS, 0, false, 0},
+ {true, PPE_SCH_INGRESS, 1, false, 0},
+ {true, PPE_SCH_EGRESS, 1, false, 0},
+ {true, PPE_SCH_INGRESS, 5, false, 0},
+ {true, PPE_SCH_EGRESS, 5, false, 0},
+ {true, PPE_SCH_INGRESS, 6, false, 0},
+ {true, PPE_SCH_EGRESS, 6, false, 0},
+ {true, PPE_SCH_INGRESS, 4, false, 0},
+ {true, PPE_SCH_EGRESS, 4, false, 0},
+ {true, PPE_SCH_INGRESS, 0, false, 0},
+ {true, PPE_SCH_EGRESS, 0, false, 0},
+ {true, PPE_SCH_INGRESS, 5, false, 0},
+ {true, PPE_SCH_EGRESS, 5, false, 0},
+ {true, PPE_SCH_INGRESS, 6, false, 0},
+ {true, PPE_SCH_EGRESS, 6, false, 0},
+ {true, PPE_SCH_INGRESS, 1, false, 0},
+ {true, PPE_SCH_EGRESS, 1, false, 0},
+ {true, PPE_SCH_INGRESS, 0, false, 0},
+ {true, PPE_SCH_EGRESS, 0, false, 0},
+ {true, PPE_SCH_INGRESS, 5, false, 0},
+ {true, PPE_SCH_EGRESS, 5, false, 0},
+ {true, PPE_SCH_INGRESS, 6, false, 0},
+ {true, PPE_SCH_EGRESS, 6, false, 0},
+ {true, PPE_SCH_INGRESS, 2, false, 0},
+ {true, PPE_SCH_EGRESS, 2, false, 0},
+ {true, PPE_SCH_INGRESS, 0, false, 0},
+ {true, PPE_SCH_EGRESS, 0, false, 0},
+ {true, PPE_SCH_INGRESS, 7, false, 0},
+ {true, PPE_SCH_EGRESS, 7, false, 0},
+ {true, PPE_SCH_INGRESS, 5, false, 0},
+ {true, PPE_SCH_EGRESS, 5, false, 0},
+ {true, PPE_SCH_INGRESS, 6, false, 0},
+ {true, PPE_SCH_EGRESS, 6, false, 0},
+ {true, PPE_SCH_INGRESS, 1, false, 0},
+ {true, PPE_SCH_EGRESS, 1, false, 0},
+ {true, PPE_SCH_INGRESS, 0, false, 0},
+ {true, PPE_SCH_EGRESS, 0, false, 0},
+ {true, PPE_SCH_INGRESS, 5, false, 0},
+ {true, PPE_SCH_EGRESS, 5, false, 0},
+ {true, PPE_SCH_INGRESS, 6, false, 0},
+ {true, PPE_SCH_EGRESS, 6, false, 0},
+ {true, PPE_SCH_INGRESS, 3, false, 0},
+ {true, PPE_SCH_EGRESS, 3, false, 0},
+ {true, PPE_SCH_INGRESS, 1, false, 0},
+ {true, PPE_SCH_EGRESS, 1, false, 0},
+ {true, PPE_SCH_INGRESS, 0, false, 0},
+ {true, PPE_SCH_EGRESS, 0, false, 0},
+ {true, PPE_SCH_INGRESS, 5, false, 0},
+ {true, PPE_SCH_EGRESS, 5, false, 0},
+ {true, PPE_SCH_INGRESS, 6, false, 0},
+ {true, PPE_SCH_EGRESS, 6, false, 0},
+ {true, PPE_SCH_INGRESS, 4, false, 0},
+ {true, PPE_SCH_EGRESS, 4, false, 0},
+ {true, PPE_SCH_INGRESS, 7, false, 0},
+ {true, PPE_SCH_EGRESS, 7, false, 0},
+};
+
+/* PPE scheduler configuration for QM includes multiple entries. Each entry
+ * contains ports to be dispatched for enqueueing and dequeueing. The backup
+ * port for dequeueing is supported to be used when the primary port for
+ * dequeueing is down.
+ */
+static const struct ppe_scheduler_qm_config ipq9574_ppe_sch_qm_config[] = {
+ {0x98, 6, 0, true, 1},
+ {0x94, 5, 6, true, 3},
+ {0x86, 0, 5, true, 4},
+ {0x8C, 1, 6, true, 0},
+ {0x1C, 7, 5, true, 1},
+ {0x98, 2, 6, true, 0},
+ {0x1C, 5, 7, true, 1},
+ {0x34, 3, 6, true, 0},
+ {0x8C, 4, 5, true, 1},
+ {0x98, 2, 6, true, 0},
+ {0x8C, 5, 4, true, 1},
+ {0xA8, 0, 6, true, 2},
+ {0x98, 5, 1, true, 0},
+ {0x98, 6, 5, true, 2},
+ {0x89, 1, 6, true, 4},
+ {0xA4, 3, 0, true, 1},
+ {0x8C, 5, 6, true, 4},
+ {0xA8, 0, 2, true, 1},
+ {0x98, 6, 5, true, 0},
+ {0xC4, 4, 3, true, 1},
+ {0x94, 6, 5, true, 0},
+ {0x1C, 7, 6, true, 1},
+ {0x98, 2, 5, true, 0},
+ {0x1C, 6, 7, true, 1},
+ {0x1C, 5, 6, true, 0},
+ {0x94, 3, 5, true, 1},
+ {0x8C, 4, 6, true, 0},
+ {0x94, 1, 5, true, 3},
+ {0x94, 6, 1, true, 0},
+ {0xD0, 3, 5, true, 2},
+ {0x98, 6, 0, true, 1},
+ {0x94, 5, 6, true, 3},
+ {0x94, 1, 5, true, 0},
+ {0x98, 2, 6, true, 1},
+ {0x8C, 4, 5, true, 0},
+ {0x1C, 7, 6, true, 1},
+ {0x8C, 0, 5, true, 4},
+ {0x89, 1, 6, true, 2},
+ {0x98, 5, 0, true, 1},
+ {0x94, 6, 5, true, 3},
+ {0x92, 0, 6, true, 2},
+ {0x98, 1, 5, true, 0},
+ {0x98, 6, 2, true, 1},
+ {0xD0, 0, 5, true, 3},
+ {0x94, 6, 0, true, 1},
+ {0x8C, 5, 6, true, 4},
+ {0x8C, 1, 5, true, 0},
+ {0x1C, 6, 7, true, 1},
+ {0x1C, 5, 6, true, 0},
+ {0xB0, 2, 3, true, 1},
+ {0xC4, 4, 5, true, 0},
+ {0x8C, 6, 4, true, 1},
+ {0xA4, 3, 6, true, 0},
+ {0x1C, 5, 7, true, 1},
+ {0x4C, 0, 5, true, 4},
+ {0x8C, 6, 0, true, 1},
+ {0x34, 7, 6, true, 3},
+ {0x94, 5, 0, true, 1},
+ {0x98, 6, 5, true, 2},
+};
+
+static const struct ppe_scheduler_port_config ppe_port_sch_config[] = {
+ {
+ .port = 0,
+ .flow_level = true,
+ .node_id = 0,
+ .loop_num = 1,
+ .pri_max = 1,
+ .flow_id = 0,
+ .drr_node_id = 0,
+ },
+ {
+ .port = 0,
+ .flow_level = false,
+ .node_id = 0,
+ .loop_num = 8,
+ .pri_max = 8,
+ .flow_id = 0,
+ .drr_node_id = 0,
+ },
+ {
+ .port = 0,
+ .flow_level = false,
+ .node_id = 8,
+ .loop_num = 8,
+ .pri_max = 8,
+ .flow_id = 0,
+ .drr_node_id = 0,
+ },
+ {
+ .port = 0,
+ .flow_level = false,
+ .node_id = 16,
+ .loop_num = 8,
+ .pri_max = 8,
+ .flow_id = 0,
+ .drr_node_id = 0,
+ },
+ {
+ .port = 0,
+ .flow_level = false,
+ .node_id = 24,
+ .loop_num = 8,
+ .pri_max = 8,
+ .flow_id = 0,
+ .drr_node_id = 0,
+ },
+ {
+ .port = 0,
+ .flow_level = false,
+ .node_id = 32,
+ .loop_num = 8,
+ .pri_max = 8,
+ .flow_id = 0,
+ .drr_node_id = 0,
+ },
+ {
+ .port = 0,
+ .flow_level = false,
+ .node_id = 40,
+ .loop_num = 8,
+ .pri_max = 8,
+ .flow_id = 0,
+ .drr_node_id = 0,
+ },
+ {
+ .port = 0,
+ .flow_level = false,
+ .node_id = 48,
+ .loop_num = 8,
+ .pri_max = 8,
+ .flow_id = 0,
+ .drr_node_id = 0,
+ },
+ {
+ .port = 0,
+ .flow_level = false,
+ .node_id = 56,
+ .loop_num = 8,
+ .pri_max = 8,
+ .flow_id = 0,
+ .drr_node_id = 0,
+ },
+ {
+ .port = 0,
+ .flow_level = false,
+ .node_id = 256,
+ .loop_num = 8,
+ .pri_max = 8,
+ .flow_id = 0,
+ .drr_node_id = 0,
+ },
+ {
+ .port = 0,
+ .flow_level = false,
+ .node_id = 264,
+ .loop_num = 8,
+ .pri_max = 8,
+ .flow_id = 0,
+ .drr_node_id = 0,
+ },
+ {
+ .port = 1,
+ .flow_level = true,
+ .node_id = 36,
+ .loop_num = 2,
+ .pri_max = 0,
+ .flow_id = 1,
+ .drr_node_id = 8,
+ },
+ {
+ .port = 1,
+ .flow_level = false,
+ .node_id = 144,
+ .loop_num = 16,
+ .pri_max = 8,
+ .flow_id = 36,
+ .drr_node_id = 48,
+ },
+ {
+ .port = 1,
+ .flow_level = false,
+ .node_id = 272,
+ .loop_num = 4,
+ .pri_max = 4,
+ .flow_id = 36,
+ .drr_node_id = 48,
+ },
+ {
+ .port = 2,
+ .flow_level = true,
+ .node_id = 40,
+ .loop_num = 2,
+ .pri_max = 0,
+ .flow_id = 2,
+ .drr_node_id = 12,
+ },
+ {
+ .port = 2,
+ .flow_level = false,
+ .node_id = 160,
+ .loop_num = 16,
+ .pri_max = 8,
+ .flow_id = 40,
+ .drr_node_id = 64,
+ },
+ {
+ .port = 2,
+ .flow_level = false,
+ .node_id = 276,
+ .loop_num = 4,
+ .pri_max = 4,
+ .flow_id = 40,
+ .drr_node_id = 64,
+ },
+ {
+ .port = 3,
+ .flow_level = true,
+ .node_id = 44,
+ .loop_num = 2,
+ .pri_max = 0,
+ .flow_id = 3,
+ .drr_node_id = 16,
+ },
+ {
+ .port = 3,
+ .flow_level = false,
+ .node_id = 176,
+ .loop_num = 16,
+ .pri_max = 8,
+ .flow_id = 44,
+ .drr_node_id = 80,
+ },
+ {
+ .port = 3,
+ .flow_level = false,
+ .node_id = 280,
+ .loop_num = 4,
+ .pri_max = 4,
+ .flow_id = 44,
+ .drr_node_id = 80,
+ },
+ {
+ .port = 4,
+ .flow_level = true,
+ .node_id = 48,
+ .loop_num = 2,
+ .pri_max = 0,
+ .flow_id = 4,
+ .drr_node_id = 20,
+ },
+ {
+ .port = 4,
+ .flow_level = false,
+ .node_id = 192,
+ .loop_num = 16,
+ .pri_max = 8,
+ .flow_id = 48,
+ .drr_node_id = 96,
+ },
+ {
+ .port = 4,
+ .flow_level = false,
+ .node_id = 284,
+ .loop_num = 4,
+ .pri_max = 4,
+ .flow_id = 48,
+ .drr_node_id = 96,
+ },
+ {
+ .port = 5,
+ .flow_level = true,
+ .node_id = 52,
+ .loop_num = 2,
+ .pri_max = 0,
+ .flow_id = 5,
+ .drr_node_id = 24,
+ },
+ {
+ .port = 5,
+ .flow_level = false,
+ .node_id = 208,
+ .loop_num = 16,
+ .pri_max = 8,
+ .flow_id = 52,
+ .drr_node_id = 112,
+ },
+ {
+ .port = 5,
+ .flow_level = false,
+ .node_id = 288,
+ .loop_num = 4,
+ .pri_max = 4,
+ .flow_id = 52,
+ .drr_node_id = 112,
+ },
+ {
+ .port = 6,
+ .flow_level = true,
+ .node_id = 56,
+ .loop_num = 2,
+ .pri_max = 0,
+ .flow_id = 6,
+ .drr_node_id = 28,
+ },
+ {
+ .port = 6,
+ .flow_level = false,
+ .node_id = 224,
+ .loop_num = 16,
+ .pri_max = 8,
+ .flow_id = 56,
+ .drr_node_id = 128,
+ },
+ {
+ .port = 6,
+ .flow_level = false,
+ .node_id = 292,
+ .loop_num = 4,
+ .pri_max = 4,
+ .flow_id = 56,
+ .drr_node_id = 128,
+ },
+ {
+ .port = 7,
+ .flow_level = true,
+ .node_id = 60,
+ .loop_num = 2,
+ .pri_max = 0,
+ .flow_id = 7,
+ .drr_node_id = 32,
+ },
+ {
+ .port = 7,
+ .flow_level = false,
+ .node_id = 240,
+ .loop_num = 16,
+ .pri_max = 8,
+ .flow_id = 60,
+ .drr_node_id = 144,
+ },
+ {
+ .port = 7,
+ .flow_level = false,
+ .node_id = 296,
+ .loop_num = 4,
+ .pri_max = 4,
+ .flow_id = 60,
+ .drr_node_id = 144,
+ },
+};
+
+/* The scheduler resource is applied to each PPE port, The resource
+ * includes the unicast & multicast queues, flow nodes and DRR nodes.
+ */
+static const struct ppe_port_schedule_resource ppe_scheduler_res[] = {
+ { .ucastq_start = 0,
+ .ucastq_end = 63,
+ .mcastq_start = 256,
+ .mcastq_end = 271,
+ .flow_id_start = 0,
+ .flow_id_end = 0,
+ .l0node_start = 0,
+ .l0node_end = 7,
+ .l1node_start = 0,
+ .l1node_end = 0,
+ },
+ { .ucastq_start = 144,
+ .ucastq_end = 159,
+ .mcastq_start = 272,
+ .mcastq_end = 275,
+ .flow_id_start = 36,
+ .flow_id_end = 39,
+ .l0node_start = 48,
+ .l0node_end = 63,
+ .l1node_start = 8,
+ .l1node_end = 11,
+ },
+ { .ucastq_start = 160,
+ .ucastq_end = 175,
+ .mcastq_start = 276,
+ .mcastq_end = 279,
+ .flow_id_start = 40,
+ .flow_id_end = 43,
+ .l0node_start = 64,
+ .l0node_end = 79,
+ .l1node_start = 12,
+ .l1node_end = 15,
+ },
+ { .ucastq_start = 176,
+ .ucastq_end = 191,
+ .mcastq_start = 280,
+ .mcastq_end = 283,
+ .flow_id_start = 44,
+ .flow_id_end = 47,
+ .l0node_start = 80,
+ .l0node_end = 95,
+ .l1node_start = 16,
+ .l1node_end = 19,
+ },
+ { .ucastq_start = 192,
+ .ucastq_end = 207,
+ .mcastq_start = 284,
+ .mcastq_end = 287,
+ .flow_id_start = 48,
+ .flow_id_end = 51,
+ .l0node_start = 96,
+ .l0node_end = 111,
+ .l1node_start = 20,
+ .l1node_end = 23,
+ },
+ { .ucastq_start = 208,
+ .ucastq_end = 223,
+ .mcastq_start = 288,
+ .mcastq_end = 291,
+ .flow_id_start = 52,
+ .flow_id_end = 55,
+ .l0node_start = 112,
+ .l0node_end = 127,
+ .l1node_start = 24,
+ .l1node_end = 27,
+ },
+ { .ucastq_start = 224,
+ .ucastq_end = 239,
+ .mcastq_start = 292,
+ .mcastq_end = 295,
+ .flow_id_start = 56,
+ .flow_id_end = 59,
+ .l0node_start = 128,
+ .l0node_end = 143,
+ .l1node_start = 28,
+ .l1node_end = 31,
+ },
+ { .ucastq_start = 240,
+ .ucastq_end = 255,
+ .mcastq_start = 296,
+ .mcastq_end = 299,
+ .flow_id_start = 60,
+ .flow_id_end = 63,
+ .l0node_start = 144,
+ .l0node_end = 159,
+ .l1node_start = 32,
+ .l1node_end = 35,
+ },
+ { .ucastq_start = 64,
+ .ucastq_end = 143,
+ .mcastq_start = 0,
+ .mcastq_end = 0,
+ .flow_id_start = 1,
+ .flow_id_end = 35,
+ .l0node_start = 8,
+ .l0node_end = 47,
+ .l1node_start = 1,
+ .l1node_end = 7,
+ },
+};
+
+/* Set the PPE queue level scheduler configuration. */
+static int ppe_scheduler_l0_queue_map_set(struct ppe_device *ppe_dev,
+ int node_id, int port,
+ struct ppe_scheduler_cfg scheduler_cfg)
+{
+ u32 val, reg;
+ int ret;
+
+ reg = PPE_L0_FLOW_MAP_TBL_ADDR + node_id * PPE_L0_FLOW_MAP_TBL_INC;
+ val = FIELD_PREP(PPE_L0_FLOW_MAP_TBL_FLOW_ID, scheduler_cfg.flow_id);
+ val |= FIELD_PREP(PPE_L0_FLOW_MAP_TBL_C_PRI, scheduler_cfg.pri);
+ val |= FIELD_PREP(PPE_L0_FLOW_MAP_TBL_E_PRI, scheduler_cfg.pri);
+ val |= FIELD_PREP(PPE_L0_FLOW_MAP_TBL_C_NODE_WT, scheduler_cfg.drr_node_wt);
+ val |= FIELD_PREP(PPE_L0_FLOW_MAP_TBL_E_NODE_WT, scheduler_cfg.drr_node_wt);
+
+ ret = regmap_write(ppe_dev->regmap, reg, val);
+ if (ret)
+ return ret;
+
+ reg = PPE_L0_C_FLOW_CFG_TBL_ADDR +
+ (scheduler_cfg.flow_id * PPE_QUEUE_SCH_PRI_NUM + scheduler_cfg.pri) *
+ PPE_L0_C_FLOW_CFG_TBL_INC;
+ val = FIELD_PREP(PPE_L0_C_FLOW_CFG_TBL_NODE_ID, scheduler_cfg.drr_node_id);
+ val |= FIELD_PREP(PPE_L0_C_FLOW_CFG_TBL_NODE_CREDIT_UNIT, scheduler_cfg.unit_is_packet);
+
+ ret = regmap_write(ppe_dev->regmap, reg, val);
+ if (ret)
+ return ret;
+
+ reg = PPE_L0_E_FLOW_CFG_TBL_ADDR +
+ (scheduler_cfg.flow_id * PPE_QUEUE_SCH_PRI_NUM + scheduler_cfg.pri) *
+ PPE_L0_E_FLOW_CFG_TBL_INC;
+ val = FIELD_PREP(PPE_L0_E_FLOW_CFG_TBL_NODE_ID, scheduler_cfg.drr_node_id);
+ val |= FIELD_PREP(PPE_L0_E_FLOW_CFG_TBL_NODE_CREDIT_UNIT, scheduler_cfg.unit_is_packet);
+
+ ret = regmap_write(ppe_dev->regmap, reg, val);
+ if (ret)
+ return ret;
+
+ reg = PPE_L0_FLOW_PORT_MAP_TBL_ADDR + node_id * PPE_L0_FLOW_PORT_MAP_TBL_INC;
+ val = FIELD_PREP(PPE_L0_FLOW_PORT_MAP_TBL_PORT_NUM, port);
+
+ ret = regmap_write(ppe_dev->regmap, reg, val);
+ if (ret)
+ return ret;
+
+ reg = PPE_L0_COMP_CFG_TBL_ADDR + node_id * PPE_L0_COMP_CFG_TBL_INC;
+ val = FIELD_PREP(PPE_L0_COMP_CFG_TBL_NODE_METER_LEN, scheduler_cfg.frame_mode);
+
+ return regmap_update_bits(ppe_dev->regmap, reg,
+ PPE_L0_COMP_CFG_TBL_NODE_METER_LEN,
+ val);
+}
+
+/* Set the PPE flow level scheduler configuration. */
+static int ppe_scheduler_l1_queue_map_set(struct ppe_device *ppe_dev,
+ int node_id, int port,
+ struct ppe_scheduler_cfg scheduler_cfg)
+{
+ u32 val, reg;
+ int ret;
+
+ val = FIELD_PREP(PPE_L1_FLOW_MAP_TBL_FLOW_ID, scheduler_cfg.flow_id);
+ val |= FIELD_PREP(PPE_L1_FLOW_MAP_TBL_C_PRI, scheduler_cfg.pri);
+ val |= FIELD_PREP(PPE_L1_FLOW_MAP_TBL_E_PRI, scheduler_cfg.pri);
+ val |= FIELD_PREP(PPE_L1_FLOW_MAP_TBL_C_NODE_WT, scheduler_cfg.drr_node_wt);
+ val |= FIELD_PREP(PPE_L1_FLOW_MAP_TBL_E_NODE_WT, scheduler_cfg.drr_node_wt);
+ reg = PPE_L1_FLOW_MAP_TBL_ADDR + node_id * PPE_L1_FLOW_MAP_TBL_INC;
+
+ ret = regmap_write(ppe_dev->regmap, reg, val);
+ if (ret)
+ return ret;
+
+ val = FIELD_PREP(PPE_L1_C_FLOW_CFG_TBL_NODE_ID, scheduler_cfg.drr_node_id);
+ val |= FIELD_PREP(PPE_L1_C_FLOW_CFG_TBL_NODE_CREDIT_UNIT, scheduler_cfg.unit_is_packet);
+ reg = PPE_L1_C_FLOW_CFG_TBL_ADDR +
+ (scheduler_cfg.flow_id * PPE_QUEUE_SCH_PRI_NUM + scheduler_cfg.pri) *
+ PPE_L1_C_FLOW_CFG_TBL_INC;
+
+ ret = regmap_write(ppe_dev->regmap, reg, val);
+ if (ret)
+ return ret;
+
+ val = FIELD_PREP(PPE_L1_E_FLOW_CFG_TBL_NODE_ID, scheduler_cfg.drr_node_id);
+ val |= FIELD_PREP(PPE_L1_E_FLOW_CFG_TBL_NODE_CREDIT_UNIT, scheduler_cfg.unit_is_packet);
+ reg = PPE_L1_E_FLOW_CFG_TBL_ADDR +
+ (scheduler_cfg.flow_id * PPE_QUEUE_SCH_PRI_NUM + scheduler_cfg.pri) *
+ PPE_L1_E_FLOW_CFG_TBL_INC;
+
+ ret = regmap_write(ppe_dev->regmap, reg, val);
+ if (ret)
+ return ret;
+
+ val = FIELD_PREP(PPE_L1_FLOW_PORT_MAP_TBL_PORT_NUM, port);
+ reg = PPE_L1_FLOW_PORT_MAP_TBL_ADDR + node_id * PPE_L1_FLOW_PORT_MAP_TBL_INC;
+
+ ret = regmap_write(ppe_dev->regmap, reg, val);
+ if (ret)
+ return ret;
+
+ reg = PPE_L1_COMP_CFG_TBL_ADDR + node_id * PPE_L1_COMP_CFG_TBL_INC;
+ val = FIELD_PREP(PPE_L1_COMP_CFG_TBL_NODE_METER_LEN, scheduler_cfg.frame_mode);
+
+ return regmap_update_bits(ppe_dev->regmap, reg, PPE_L1_COMP_CFG_TBL_NODE_METER_LEN, val);
+}
+
+/**
+ * ppe_queue_scheduler_set - Configure scheduler for PPE hardware queue
+ * @ppe_dev: PPE device
+ * @node_id: PPE queue ID or flow ID
+ * @flow_level: Flow level scheduler or queue level scheduler
+ * @port: PPE port ID set scheduler configuration
+ * @scheduler_cfg: PPE scheduler configuration
+ *
+ * PPE scheduler configuration supports queue level and flow level on
+ * the PPE egress port.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int ppe_queue_scheduler_set(struct ppe_device *ppe_dev,
+ int node_id, bool flow_level, int port,
+ struct ppe_scheduler_cfg scheduler_cfg)
+{
+ if (flow_level)
+ return ppe_scheduler_l1_queue_map_set(ppe_dev, node_id,
+ port, scheduler_cfg);
+
+ return ppe_scheduler_l0_queue_map_set(ppe_dev, node_id,
+ port, scheduler_cfg);
+}
+
+/**
+ * ppe_queue_ucast_base_set - Set PPE unicast queue base ID and profile ID
+ * @ppe_dev: PPE device
+ * @queue_dst: PPE queue destination configuration
+ * @queue_base: PPE queue base ID
+ * @profile_id: Profile ID
+ *
+ * The PPE unicast queue base ID and profile ID are configured based on the
+ * destination port information that can be service code or CPU code or the
+ * destination port.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int ppe_queue_ucast_base_set(struct ppe_device *ppe_dev,
+ struct ppe_queue_ucast_dest queue_dst,
+ int queue_base, int profile_id)
+{
+ int index, profile_size;
+ u32 val, reg;
+
+ profile_size = queue_dst.src_profile << 8;
+ if (queue_dst.service_code_en)
+ index = PPE_QUEUE_BASE_SERVICE_CODE + profile_size +
+ queue_dst.service_code;
+ else if (queue_dst.cpu_code_en)
+ index = PPE_QUEUE_BASE_CPU_CODE + profile_size +
+ queue_dst.cpu_code;
+ else
+ index = profile_size + queue_dst.dest_port;
+
+ val = FIELD_PREP(PPE_UCAST_QUEUE_MAP_TBL_PROFILE_ID, profile_id);
+ val |= FIELD_PREP(PPE_UCAST_QUEUE_MAP_TBL_QUEUE_ID, queue_base);
+ reg = PPE_UCAST_QUEUE_MAP_TBL_ADDR + index * PPE_UCAST_QUEUE_MAP_TBL_INC;
+
+ return regmap_write(ppe_dev->regmap, reg, val);
+}
+
+/**
+ * ppe_queue_ucast_offset_pri_set - Set PPE unicast queue offset based on priority
+ * @ppe_dev: PPE device
+ * @profile_id: Profile ID
+ * @priority: PPE internal priority to be used to set queue offset
+ * @queue_offset: Queue offset used for calculating the destination queue ID
+ *
+ * The PPE unicast queue offset is configured based on the PPE
+ * internal priority.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int ppe_queue_ucast_offset_pri_set(struct ppe_device *ppe_dev,
+ int profile_id,
+ int priority,
+ int queue_offset)
+{
+ u32 val, reg;
+ int index;
+
+ index = (profile_id << 4) + priority;
+ val = FIELD_PREP(PPE_UCAST_PRIORITY_MAP_TBL_CLASS, queue_offset);
+ reg = PPE_UCAST_PRIORITY_MAP_TBL_ADDR + index * PPE_UCAST_PRIORITY_MAP_TBL_INC;
+
+ return regmap_write(ppe_dev->regmap, reg, val);
+}
+
+/**
+ * ppe_queue_ucast_offset_hash_set - Set PPE unicast queue offset based on hash
+ * @ppe_dev: PPE device
+ * @profile_id: Profile ID
+ * @rss_hash: Packet hash value to be used to set queue offset
+ * @queue_offset: Queue offset used for calculating the destination queue ID
+ *
+ * The PPE unicast queue offset is configured based on the RSS hash value.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int ppe_queue_ucast_offset_hash_set(struct ppe_device *ppe_dev,
+ int profile_id,
+ int rss_hash,
+ int queue_offset)
+{
+ u32 val, reg;
+ int index;
+
+ index = (profile_id << 8) + rss_hash;
+ val = FIELD_PREP(PPE_UCAST_HASH_MAP_TBL_HASH, queue_offset);
+ reg = PPE_UCAST_HASH_MAP_TBL_ADDR + index * PPE_UCAST_HASH_MAP_TBL_INC;
+
+ return regmap_write(ppe_dev->regmap, reg, val);
+}
+
+/**
+ * ppe_port_resource_get - Get PPE resource per port
+ * @ppe_dev: PPE device
+ * @port: PPE port
+ * @type: Resource type
+ * @res_start: Resource start ID returned
+ * @res_end: Resource end ID returned
+ *
+ * PPE resource is assigned per PPE port, which is acquired for QoS scheduler.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int ppe_port_resource_get(struct ppe_device *ppe_dev, int port,
+ enum ppe_resource_type type,
+ int *res_start, int *res_end)
+{
+ struct ppe_port_schedule_resource res;
+
+ /* The reserved resource with the maximum port ID of PPE is
+ * also allowed to be acquired.
+ */
+ if (port > ppe_dev->num_ports)
+ return -EINVAL;
+
+ res = ppe_scheduler_res[port];
+ switch (type) {
+ case PPE_RES_UCAST:
+ *res_start = res.ucastq_start;
+ *res_end = res.ucastq_end;
+ break;
+ case PPE_RES_MCAST:
+ *res_start = res.mcastq_start;
+ *res_end = res.mcastq_end;
+ break;
+ case PPE_RES_FLOW_ID:
+ *res_start = res.flow_id_start;
+ *res_end = res.flow_id_end;
+ break;
+ case PPE_RES_L0_NODE:
+ *res_start = res.l0node_start;
+ *res_end = res.l0node_end;
+ break;
+ case PPE_RES_L1_NODE:
+ *res_start = res.l1node_start;
+ *res_end = res.l1node_end;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * ppe_sc_config_set - Set PPE service code configuration
+ * @ppe_dev: PPE device
+ * @sc: Service ID, 0-255 supported by PPE
+ * @cfg: Service code configuration
+ *
+ * PPE service code is used by the PPE during its packet processing stages,
+ * to perform or bypass certain selected packet operations on the packet.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int ppe_sc_config_set(struct ppe_device *ppe_dev, int sc, struct ppe_sc_cfg cfg)
+{
+ u32 val, reg, servcode_val[2] = {};
+ unsigned long bitmap_value;
+ int ret;
+
+ val = FIELD_PREP(PPE_IN_L2_SERVICE_TBL_DST_PORT_ID_VALID, cfg.dest_port_valid);
+ val |= FIELD_PREP(PPE_IN_L2_SERVICE_TBL_DST_PORT_ID, cfg.dest_port);
+ val |= FIELD_PREP(PPE_IN_L2_SERVICE_TBL_DST_DIRECTION, cfg.is_src);
+
+ bitmap_value = bitmap_read(cfg.bitmaps.egress, 0, PPE_SC_BYPASS_EGRESS_SIZE);
+ val |= FIELD_PREP(PPE_IN_L2_SERVICE_TBL_DST_BYPASS_BITMAP, bitmap_value);
+ val |= FIELD_PREP(PPE_IN_L2_SERVICE_TBL_RX_CNT_EN,
+ test_bit(PPE_SC_BYPASS_COUNTER_RX, cfg.bitmaps.counter));
+ val |= FIELD_PREP(PPE_IN_L2_SERVICE_TBL_TX_CNT_EN,
+ test_bit(PPE_SC_BYPASS_COUNTER_TX, cfg.bitmaps.counter));
+ reg = PPE_IN_L2_SERVICE_TBL_ADDR + PPE_IN_L2_SERVICE_TBL_INC * sc;
+
+ ret = regmap_write(ppe_dev->regmap, reg, val);
+ if (ret)
+ return ret;
+
+ bitmap_value = bitmap_read(cfg.bitmaps.ingress, 0, PPE_SC_BYPASS_INGRESS_SIZE);
+ PPE_SERVICE_SET_BYPASS_BITMAP(servcode_val, bitmap_value);
+ PPE_SERVICE_SET_RX_CNT_EN(servcode_val,
+ test_bit(PPE_SC_BYPASS_COUNTER_RX_VLAN, cfg.bitmaps.counter));
+ reg = PPE_SERVICE_TBL_ADDR + PPE_SERVICE_TBL_INC * sc;
+
+ ret = regmap_bulk_write(ppe_dev->regmap, reg,
+ servcode_val, ARRAY_SIZE(servcode_val));
+ if (ret)
+ return ret;
+
+ reg = PPE_EG_SERVICE_TBL_ADDR + PPE_EG_SERVICE_TBL_INC * sc;
+ ret = regmap_bulk_read(ppe_dev->regmap, reg,
+ servcode_val, ARRAY_SIZE(servcode_val));
+ if (ret)
+ return ret;
+
+ PPE_EG_SERVICE_SET_NEXT_SERVCODE(servcode_val, cfg.next_service_code);
+ PPE_EG_SERVICE_SET_UPDATE_ACTION(servcode_val, cfg.eip_field_update_bitmap);
+ PPE_EG_SERVICE_SET_HW_SERVICE(servcode_val, cfg.eip_hw_service);
+ PPE_EG_SERVICE_SET_OFFSET_SEL(servcode_val, cfg.eip_offset_sel);
+ PPE_EG_SERVICE_SET_TX_CNT_EN(servcode_val,
+ test_bit(PPE_SC_BYPASS_COUNTER_TX_VLAN, cfg.bitmaps.counter));
+
+ ret = regmap_bulk_write(ppe_dev->regmap, reg,
+ servcode_val, ARRAY_SIZE(servcode_val));
+ if (ret)
+ return ret;
+
+ bitmap_value = bitmap_read(cfg.bitmaps.tunnel, 0, PPE_SC_BYPASS_TUNNEL_SIZE);
+ val = FIELD_PREP(PPE_TL_SERVICE_TBL_BYPASS_BITMAP, bitmap_value);
+ reg = PPE_TL_SERVICE_TBL_ADDR + PPE_TL_SERVICE_TBL_INC * sc;
+
+ return regmap_write(ppe_dev->regmap, reg, val);
+}
+
+/**
+ * ppe_counter_enable_set - Set PPE port counter enabled
+ * @ppe_dev: PPE device
+ * @port: PPE port ID
+ *
+ * Enable PPE counters on the given port for the unicast packet, multicast
+ * packet and VLAN packet received and transmitted by PPE.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int ppe_counter_enable_set(struct ppe_device *ppe_dev, int port)
+{
+ u32 reg, mru_mtu_val[3];
+ int ret;
+
+ reg = PPE_MRU_MTU_CTRL_TBL_ADDR + PPE_MRU_MTU_CTRL_TBL_INC * port;
+ ret = regmap_bulk_read(ppe_dev->regmap, reg,
+ mru_mtu_val, ARRAY_SIZE(mru_mtu_val));
+ if (ret)
+ return ret;
+
+ PPE_MRU_MTU_CTRL_SET_RX_CNT_EN(mru_mtu_val, true);
+ PPE_MRU_MTU_CTRL_SET_TX_CNT_EN(mru_mtu_val, true);
+ ret = regmap_bulk_write(ppe_dev->regmap, reg,
+ mru_mtu_val, ARRAY_SIZE(mru_mtu_val));
+ if (ret)
+ return ret;
+
+ reg = PPE_MC_MTU_CTRL_TBL_ADDR + PPE_MC_MTU_CTRL_TBL_INC * port;
+ ret = regmap_set_bits(ppe_dev->regmap, reg, PPE_MC_MTU_CTRL_TBL_TX_CNT_EN);
+ if (ret)
+ return ret;
+
+ reg = PPE_PORT_EG_VLAN_TBL_ADDR + PPE_PORT_EG_VLAN_TBL_INC * port;
+
+ return regmap_set_bits(ppe_dev->regmap, reg, PPE_PORT_EG_VLAN_TBL_TX_COUNTING_EN);
+}
+
+static int ppe_rss_hash_ipv4_config(struct ppe_device *ppe_dev, int index,
+ struct ppe_rss_hash_cfg cfg)
+{
+ u32 reg, val;
+
+ switch (index) {
+ case 0:
+ val = cfg.hash_sip_mix[0];
+ break;
+ case 1:
+ val = cfg.hash_dip_mix[0];
+ break;
+ case 2:
+ val = cfg.hash_protocol_mix;
+ break;
+ case 3:
+ val = cfg.hash_dport_mix;
+ break;
+ case 4:
+ val = cfg.hash_sport_mix;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ reg = PPE_RSS_HASH_MIX_IPV4_ADDR + index * PPE_RSS_HASH_MIX_IPV4_INC;
+
+ return regmap_update_bits(ppe_dev->regmap, reg,
+ PPE_RSS_HASH_MIX_IPV4_VAL,
+ FIELD_PREP(PPE_RSS_HASH_MIX_IPV4_VAL, val));
+}
+
+static int ppe_rss_hash_ipv6_config(struct ppe_device *ppe_dev, int index,
+ struct ppe_rss_hash_cfg cfg)
+{
+ u32 reg, val;
+
+ switch (index) {
+ case 0 ... 3:
+ val = cfg.hash_sip_mix[index];
+ break;
+ case 4 ... 7:
+ val = cfg.hash_dip_mix[index - 4];
+ break;
+ case 8:
+ val = cfg.hash_protocol_mix;
+ break;
+ case 9:
+ val = cfg.hash_dport_mix;
+ break;
+ case 10:
+ val = cfg.hash_sport_mix;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ reg = PPE_RSS_HASH_MIX_ADDR + index * PPE_RSS_HASH_MIX_INC;
+
+ return regmap_update_bits(ppe_dev->regmap, reg,
+ PPE_RSS_HASH_MIX_VAL,
+ FIELD_PREP(PPE_RSS_HASH_MIX_VAL, val));
+}
+
+/**
+ * ppe_rss_hash_config_set - Configure the PPE hash settings for the packet received.
+ * @ppe_dev: PPE device.
+ * @mode: Configure RSS hash for the packet type IPv4 and IPv6.
+ * @cfg: RSS hash configuration.
+ *
+ * PPE RSS hash settings are configured for the packet type IPv4 and IPv6.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int ppe_rss_hash_config_set(struct ppe_device *ppe_dev, int mode,
+ struct ppe_rss_hash_cfg cfg)
+{
+ u32 val, reg;
+ int i, ret;
+
+ if (mode & PPE_RSS_HASH_MODE_IPV4) {
+ val = FIELD_PREP(PPE_RSS_HASH_MASK_IPV4_HASH_MASK, cfg.hash_mask);
+ val |= FIELD_PREP(PPE_RSS_HASH_MASK_IPV4_FRAGMENT, cfg.hash_fragment_mode);
+ ret = regmap_write(ppe_dev->regmap, PPE_RSS_HASH_MASK_IPV4_ADDR, val);
+ if (ret)
+ return ret;
+
+ val = FIELD_PREP(PPE_RSS_HASH_SEED_IPV4_VAL, cfg.hash_seed);
+ ret = regmap_write(ppe_dev->regmap, PPE_RSS_HASH_SEED_IPV4_ADDR, val);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < PPE_RSS_HASH_MIX_IPV4_ENTRIES; i++) {
+ ret = ppe_rss_hash_ipv4_config(ppe_dev, i, cfg);
+ if (ret)
+ return ret;
+ }
+
+ for (i = 0; i < PPE_RSS_HASH_FIN_IPV4_ENTRIES; i++) {
+ val = FIELD_PREP(PPE_RSS_HASH_FIN_IPV4_INNER, cfg.hash_fin_inner[i]);
+ val |= FIELD_PREP(PPE_RSS_HASH_FIN_IPV4_OUTER, cfg.hash_fin_outer[i]);
+ reg = PPE_RSS_HASH_FIN_IPV4_ADDR + i * PPE_RSS_HASH_FIN_IPV4_INC;
+
+ ret = regmap_write(ppe_dev->regmap, reg, val);
+ if (ret)
+ return ret;
+ }
+ }
+
+ if (mode & PPE_RSS_HASH_MODE_IPV6) {
+ val = FIELD_PREP(PPE_RSS_HASH_MASK_HASH_MASK, cfg.hash_mask);
+ val |= FIELD_PREP(PPE_RSS_HASH_MASK_FRAGMENT, cfg.hash_fragment_mode);
+ ret = regmap_write(ppe_dev->regmap, PPE_RSS_HASH_MASK_ADDR, val);
+ if (ret)
+ return ret;
+
+ val = FIELD_PREP(PPE_RSS_HASH_SEED_VAL, cfg.hash_seed);
+ ret = regmap_write(ppe_dev->regmap, PPE_RSS_HASH_SEED_ADDR, val);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < PPE_RSS_HASH_MIX_ENTRIES; i++) {
+ ret = ppe_rss_hash_ipv6_config(ppe_dev, i, cfg);
+ if (ret)
+ return ret;
+ }
+
+ for (i = 0; i < PPE_RSS_HASH_FIN_ENTRIES; i++) {
+ val = FIELD_PREP(PPE_RSS_HASH_FIN_INNER, cfg.hash_fin_inner[i]);
+ val |= FIELD_PREP(PPE_RSS_HASH_FIN_OUTER, cfg.hash_fin_outer[i]);
+ reg = PPE_RSS_HASH_FIN_ADDR + i * PPE_RSS_HASH_FIN_INC;
+
+ ret = regmap_write(ppe_dev->regmap, reg, val);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * ppe_ring_queue_map_set - Set the PPE queue to Ethernet DMA ring mapping
+ * @ppe_dev: PPE device
+ * @ring_id: Ethernet DMA ring ID
+ * @queue_map: Bit map of queue IDs to given Ethernet DMA ring
+ *
+ * Configure the mapping from a set of PPE queues to a given Ethernet DMA ring.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int ppe_ring_queue_map_set(struct ppe_device *ppe_dev, int ring_id, u32 *queue_map)
+{
+ u32 reg, queue_bitmap_val[PPE_RING_TO_QUEUE_BITMAP_WORD_CNT];
+
+ memcpy(queue_bitmap_val, queue_map, sizeof(queue_bitmap_val));
+ reg = PPE_RING_Q_MAP_TBL_ADDR + PPE_RING_Q_MAP_TBL_INC * ring_id;
+
+ return regmap_bulk_write(ppe_dev->regmap, reg,
+ queue_bitmap_val,
+ ARRAY_SIZE(queue_bitmap_val));
+}
+
+static int ppe_config_bm_threshold(struct ppe_device *ppe_dev, int bm_port_id,
+ const struct ppe_bm_port_config port_cfg)
+{
+ u32 reg, val, bm_fc_val[2];
+ int ret;
+
+ reg = PPE_BM_PORT_FC_CFG_TBL_ADDR + PPE_BM_PORT_FC_CFG_TBL_INC * bm_port_id;
+ ret = regmap_bulk_read(ppe_dev->regmap, reg,
+ bm_fc_val, ARRAY_SIZE(bm_fc_val));
+ if (ret)
+ return ret;
+
+ /* Configure BM flow control related threshold. */
+ PPE_BM_PORT_FC_SET_WEIGHT(bm_fc_val, port_cfg.weight);
+ PPE_BM_PORT_FC_SET_RESUME_OFFSET(bm_fc_val, port_cfg.resume_offset);
+ PPE_BM_PORT_FC_SET_RESUME_THRESHOLD(bm_fc_val, port_cfg.resume_ceil);
+ PPE_BM_PORT_FC_SET_DYNAMIC(bm_fc_val, port_cfg.dynamic);
+ PPE_BM_PORT_FC_SET_REACT_LIMIT(bm_fc_val, port_cfg.in_fly_buf);
+ PPE_BM_PORT_FC_SET_PRE_ALLOC(bm_fc_val, port_cfg.pre_alloc);
+
+ /* Configure low/high bits of the ceiling for the BM port. */
+ val = FIELD_GET(GENMASK(2, 0), port_cfg.ceil);
+ PPE_BM_PORT_FC_SET_CEILING_LOW(bm_fc_val, val);
+ val = FIELD_GET(GENMASK(10, 3), port_cfg.ceil);
+ PPE_BM_PORT_FC_SET_CEILING_HIGH(bm_fc_val, val);
+
+ ret = regmap_bulk_write(ppe_dev->regmap, reg,
+ bm_fc_val, ARRAY_SIZE(bm_fc_val));
+ if (ret)
+ return ret;
+
+ /* Assign the default group ID 0 to the BM port. */
+ val = FIELD_PREP(PPE_BM_PORT_GROUP_ID_SHARED_GROUP_ID, 0);
+ reg = PPE_BM_PORT_GROUP_ID_ADDR + PPE_BM_PORT_GROUP_ID_INC * bm_port_id;
+ ret = regmap_update_bits(ppe_dev->regmap, reg,
+ PPE_BM_PORT_GROUP_ID_SHARED_GROUP_ID,
+ val);
+ if (ret)
+ return ret;
+
+ /* Enable BM port flow control. */
+ reg = PPE_BM_PORT_FC_MODE_ADDR + PPE_BM_PORT_FC_MODE_INC * bm_port_id;
+
+ return regmap_set_bits(ppe_dev->regmap, reg, PPE_BM_PORT_FC_MODE_EN);
+}
+
+/* Configure the buffer threshold for the port flow control function. */
+static int ppe_config_bm(struct ppe_device *ppe_dev)
+{
+ const struct ppe_bm_port_config *port_cfg;
+ unsigned int i, bm_port_id, port_cfg_cnt;
+ u32 reg, val;
+ int ret;
+
+ /* Configure the allocated buffer number only for group 0.
+ * The buffer number of group 1-3 is already cleared to 0
+ * after PPE reset during the probe of PPE driver.
+ */
+ reg = PPE_BM_SHARED_GROUP_CFG_ADDR;
+ val = FIELD_PREP(PPE_BM_SHARED_GROUP_CFG_SHARED_LIMIT,
+ ipq9574_ppe_bm_group_config);
+ ret = regmap_update_bits(ppe_dev->regmap, reg,
+ PPE_BM_SHARED_GROUP_CFG_SHARED_LIMIT,
+ val);
+ if (ret)
+ goto bm_config_fail;
+
+ /* Configure buffer thresholds for the BM ports. */
+ port_cfg = ipq9574_ppe_bm_port_config;
+ port_cfg_cnt = ARRAY_SIZE(ipq9574_ppe_bm_port_config);
+ for (i = 0; i < port_cfg_cnt; i++) {
+ for (bm_port_id = port_cfg[i].port_id_start;
+ bm_port_id <= port_cfg[i].port_id_end; bm_port_id++) {
+ ret = ppe_config_bm_threshold(ppe_dev, bm_port_id,
+ port_cfg[i]);
+ if (ret)
+ goto bm_config_fail;
+ }
+ }
+
+ return 0;
+
+bm_config_fail:
+ dev_err(ppe_dev->dev, "PPE BM config error %d\n", ret);
+ return ret;
+}
+
+/* Configure PPE hardware queue depth, which is decided by the threshold
+ * of queue.
+ */
+static int ppe_config_qm(struct ppe_device *ppe_dev)
+{
+ const struct ppe_qm_queue_config *queue_cfg;
+ int ret, i, queue_id, queue_cfg_count;
+ u32 reg, multicast_queue_cfg[5];
+ u32 unicast_queue_cfg[4];
+ u32 group_cfg[3];
+
+ /* Assign the buffer number to the group 0 by default. */
+ reg = PPE_AC_GRP_CFG_TBL_ADDR;
+ ret = regmap_bulk_read(ppe_dev->regmap, reg,
+ group_cfg, ARRAY_SIZE(group_cfg));
+ if (ret)
+ goto qm_config_fail;
+
+ PPE_AC_GRP_SET_BUF_LIMIT(group_cfg, ipq9574_ppe_qm_group_config);
+
+ ret = regmap_bulk_write(ppe_dev->regmap, reg,
+ group_cfg, ARRAY_SIZE(group_cfg));
+ if (ret)
+ goto qm_config_fail;
+
+ queue_cfg = ipq9574_ppe_qm_queue_config;
+ queue_cfg_count = ARRAY_SIZE(ipq9574_ppe_qm_queue_config);
+ for (i = 0; i < queue_cfg_count; i++) {
+ queue_id = queue_cfg[i].queue_start;
+
+ /* Configure threshold for dropping packets separately for
+ * unicast and multicast PPE queues.
+ */
+ while (queue_id <= queue_cfg[i].queue_end) {
+ if (queue_id < PPE_AC_UNICAST_QUEUE_CFG_TBL_ENTRIES) {
+ reg = PPE_AC_UNICAST_QUEUE_CFG_TBL_ADDR +
+ PPE_AC_UNICAST_QUEUE_CFG_TBL_INC * queue_id;
+
+ ret = regmap_bulk_read(ppe_dev->regmap, reg,
+ unicast_queue_cfg,
+ ARRAY_SIZE(unicast_queue_cfg));
+ if (ret)
+ goto qm_config_fail;
+
+ PPE_AC_UNICAST_QUEUE_SET_EN(unicast_queue_cfg, true);
+ PPE_AC_UNICAST_QUEUE_SET_GRP_ID(unicast_queue_cfg, 0);
+ PPE_AC_UNICAST_QUEUE_SET_PRE_LIMIT(unicast_queue_cfg,
+ queue_cfg[i].prealloc_buf);
+ PPE_AC_UNICAST_QUEUE_SET_DYNAMIC(unicast_queue_cfg,
+ queue_cfg[i].dynamic);
+ PPE_AC_UNICAST_QUEUE_SET_WEIGHT(unicast_queue_cfg,
+ queue_cfg[i].weight);
+ PPE_AC_UNICAST_QUEUE_SET_THRESHOLD(unicast_queue_cfg,
+ queue_cfg[i].ceil);
+ PPE_AC_UNICAST_QUEUE_SET_GRN_RESUME(unicast_queue_cfg,
+ queue_cfg[i].resume_offset);
+
+ ret = regmap_bulk_write(ppe_dev->regmap, reg,
+ unicast_queue_cfg,
+ ARRAY_SIZE(unicast_queue_cfg));
+ if (ret)
+ goto qm_config_fail;
+ } else {
+ reg = PPE_AC_MULTICAST_QUEUE_CFG_TBL_ADDR +
+ PPE_AC_MULTICAST_QUEUE_CFG_TBL_INC * queue_id;
+
+ ret = regmap_bulk_read(ppe_dev->regmap, reg,
+ multicast_queue_cfg,
+ ARRAY_SIZE(multicast_queue_cfg));
+ if (ret)
+ goto qm_config_fail;
+
+ PPE_AC_MULTICAST_QUEUE_SET_EN(multicast_queue_cfg, true);
+ PPE_AC_MULTICAST_QUEUE_SET_GRN_GRP_ID(multicast_queue_cfg, 0);
+ PPE_AC_MULTICAST_QUEUE_SET_GRN_PRE_LIMIT(multicast_queue_cfg,
+ queue_cfg[i].prealloc_buf);
+ PPE_AC_MULTICAST_QUEUE_SET_GRN_THRESHOLD(multicast_queue_cfg,
+ queue_cfg[i].ceil);
+ PPE_AC_MULTICAST_QUEUE_SET_GRN_RESUME(multicast_queue_cfg,
+ queue_cfg[i].resume_offset);
+
+ ret = regmap_bulk_write(ppe_dev->regmap, reg,
+ multicast_queue_cfg,
+ ARRAY_SIZE(multicast_queue_cfg));
+ if (ret)
+ goto qm_config_fail;
+ }
+
+ /* Enable enqueue. */
+ reg = PPE_ENQ_OPR_TBL_ADDR + PPE_ENQ_OPR_TBL_INC * queue_id;
+ ret = regmap_clear_bits(ppe_dev->regmap, reg,
+ PPE_ENQ_OPR_TBL_ENQ_DISABLE);
+ if (ret)
+ goto qm_config_fail;
+
+ /* Enable dequeue. */
+ reg = PPE_DEQ_OPR_TBL_ADDR + PPE_DEQ_OPR_TBL_INC * queue_id;
+ ret = regmap_clear_bits(ppe_dev->regmap, reg,
+ PPE_DEQ_OPR_TBL_DEQ_DISABLE);
+ if (ret)
+ goto qm_config_fail;
+
+ queue_id++;
+ }
+ }
+
+ /* Enable queue counter for all PPE hardware queues. */
+ ret = regmap_set_bits(ppe_dev->regmap, PPE_EG_BRIDGE_CONFIG_ADDR,
+ PPE_EG_BRIDGE_CONFIG_QUEUE_CNT_EN);
+ if (ret)
+ goto qm_config_fail;
+
+ return 0;
+
+qm_config_fail:
+ dev_err(ppe_dev->dev, "PPE QM config error %d\n", ret);
+ return ret;
+}
+
+static int ppe_node_scheduler_config(struct ppe_device *ppe_dev,
+ const struct ppe_scheduler_port_config config)
+{
+ struct ppe_scheduler_cfg sch_cfg;
+ int ret, i;
+
+ for (i = 0; i < config.loop_num; i++) {
+ if (!config.pri_max) {
+ /* Round robin scheduler without priority. */
+ sch_cfg.flow_id = config.flow_id;
+ sch_cfg.pri = 0;
+ sch_cfg.drr_node_id = config.drr_node_id;
+ } else {
+ sch_cfg.flow_id = config.flow_id + (i / config.pri_max);
+ sch_cfg.pri = i % config.pri_max;
+ sch_cfg.drr_node_id = config.drr_node_id + i;
+ }
+
+ /* Scheduler weight, must be more than 0. */
+ sch_cfg.drr_node_wt = 1;
+ /* Byte based to be scheduled. */
+ sch_cfg.unit_is_packet = false;
+ /* Frame + CRC calculated. */
+ sch_cfg.frame_mode = PPE_SCH_WITH_FRAME_CRC;
+
+ ret = ppe_queue_scheduler_set(ppe_dev, config.node_id + i,
+ config.flow_level,
+ config.port,
+ sch_cfg);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/* Initialize scheduler settings for PPE buffer utilization and dispatching
+ * packet on PPE queue.
+ */
+static int ppe_config_scheduler(struct ppe_device *ppe_dev)
+{
+ const struct ppe_scheduler_port_config *port_cfg;
+ const struct ppe_scheduler_qm_config *qm_cfg;
+ const struct ppe_scheduler_bm_config *bm_cfg;
+ int ret, i, count;
+ u32 val, reg;
+
+ count = ARRAY_SIZE(ipq9574_ppe_sch_bm_config);
+ bm_cfg = ipq9574_ppe_sch_bm_config;
+
+ /* Configure the depth of BM scheduler entries. */
+ val = FIELD_PREP(PPE_BM_SCH_CTRL_SCH_DEPTH, count);
+ val |= FIELD_PREP(PPE_BM_SCH_CTRL_SCH_OFFSET, 0);
+ val |= FIELD_PREP(PPE_BM_SCH_CTRL_SCH_EN, 1);
+
+ ret = regmap_write(ppe_dev->regmap, PPE_BM_SCH_CTRL_ADDR, val);
+ if (ret)
+ goto sch_config_fail;
+
+ /* Configure each BM scheduler entry with the valid ingress port and
+ * egress port, the second port takes effect when the specified port
+ * is in the inactive state.
+ */
+ for (i = 0; i < count; i++) {
+ val = FIELD_PREP(PPE_BM_SCH_CFG_TBL_VALID, bm_cfg[i].valid);
+ val |= FIELD_PREP(PPE_BM_SCH_CFG_TBL_DIR, bm_cfg[i].dir);
+ val |= FIELD_PREP(PPE_BM_SCH_CFG_TBL_PORT_NUM, bm_cfg[i].port);
+ val |= FIELD_PREP(PPE_BM_SCH_CFG_TBL_SECOND_PORT_VALID,
+ bm_cfg[i].backup_port_valid);
+ val |= FIELD_PREP(PPE_BM_SCH_CFG_TBL_SECOND_PORT,
+ bm_cfg[i].backup_port);
+
+ reg = PPE_BM_SCH_CFG_TBL_ADDR + i * PPE_BM_SCH_CFG_TBL_INC;
+ ret = regmap_write(ppe_dev->regmap, reg, val);
+ if (ret)
+ goto sch_config_fail;
+ }
+
+ count = ARRAY_SIZE(ipq9574_ppe_sch_qm_config);
+ qm_cfg = ipq9574_ppe_sch_qm_config;
+
+ /* Configure the depth of QM scheduler entries. */
+ val = FIELD_PREP(PPE_PSCH_SCH_DEPTH_CFG_SCH_DEPTH, count);
+ ret = regmap_write(ppe_dev->regmap, PPE_PSCH_SCH_DEPTH_CFG_ADDR, val);
+ if (ret)
+ goto sch_config_fail;
+
+ /* Configure each QM scheduler entry with enqueue port and dequeue
+ * port, the second port takes effect when the specified dequeue
+ * port is in the inactive port.
+ */
+ for (i = 0; i < count; i++) {
+ val = FIELD_PREP(PPE_PSCH_SCH_CFG_TBL_ENS_PORT_BITMAP,
+ qm_cfg[i].ensch_port_bmp);
+ val |= FIELD_PREP(PPE_PSCH_SCH_CFG_TBL_ENS_PORT,
+ qm_cfg[i].ensch_port);
+ val |= FIELD_PREP(PPE_PSCH_SCH_CFG_TBL_DES_PORT,
+ qm_cfg[i].desch_port);
+ val |= FIELD_PREP(PPE_PSCH_SCH_CFG_TBL_DES_SECOND_PORT_EN,
+ qm_cfg[i].desch_backup_port_valid);
+ val |= FIELD_PREP(PPE_PSCH_SCH_CFG_TBL_DES_SECOND_PORT,
+ qm_cfg[i].desch_backup_port);
+
+ reg = PPE_PSCH_SCH_CFG_TBL_ADDR + i * PPE_PSCH_SCH_CFG_TBL_INC;
+ ret = regmap_write(ppe_dev->regmap, reg, val);
+ if (ret)
+ goto sch_config_fail;
+ }
+
+ count = ARRAY_SIZE(ppe_port_sch_config);
+ port_cfg = ppe_port_sch_config;
+
+ /* Configure scheduler per PPE queue or flow. */
+ for (i = 0; i < count; i++) {
+ if (port_cfg[i].port >= ppe_dev->num_ports)
+ break;
+
+ ret = ppe_node_scheduler_config(ppe_dev, port_cfg[i]);
+ if (ret)
+ goto sch_config_fail;
+ }
+
+ return 0;
+
+sch_config_fail:
+ dev_err(ppe_dev->dev, "PPE scheduler arbitration config error %d\n", ret);
+ return ret;
+};
+
+/* Configure PPE queue destination of each PPE port. */
+static int ppe_queue_dest_init(struct ppe_device *ppe_dev)
+{
+ int ret, port_id, index, q_base, q_offset, res_start, res_end, pri_max;
+ struct ppe_queue_ucast_dest queue_dst;
+
+ for (port_id = 0; port_id < ppe_dev->num_ports; port_id++) {
+ memset(&queue_dst, 0, sizeof(queue_dst));
+
+ ret = ppe_port_resource_get(ppe_dev, port_id, PPE_RES_UCAST,
+ &res_start, &res_end);
+ if (ret)
+ return ret;
+
+ q_base = res_start;
+ queue_dst.dest_port = port_id;
+
+ /* Configure queue base ID and profile ID that is same as
+ * physical port ID.
+ */
+ ret = ppe_queue_ucast_base_set(ppe_dev, queue_dst,
+ q_base, port_id);
+ if (ret)
+ return ret;
+
+ /* Queue priority range supported by each PPE port */
+ ret = ppe_port_resource_get(ppe_dev, port_id, PPE_RES_L0_NODE,
+ &res_start, &res_end);
+ if (ret)
+ return ret;
+
+ pri_max = res_end - res_start;
+
+ /* Redirect ARP reply packet with the max priority on CPU port,
+ * which keeps the ARP reply directed to CPU (CPU code is 101)
+ * with highest priority queue of EDMA.
+ */
+ if (port_id == 0) {
+ memset(&queue_dst, 0, sizeof(queue_dst));
+
+ queue_dst.cpu_code_en = true;
+ queue_dst.cpu_code = 101;
+ ret = ppe_queue_ucast_base_set(ppe_dev, queue_dst,
+ q_base + pri_max,
+ 0);
+ if (ret)
+ return ret;
+ }
+
+ /* Initialize the queue offset of internal priority. */
+ for (index = 0; index < PPE_QUEUE_INTER_PRI_NUM; index++) {
+ q_offset = index > pri_max ? pri_max : index;
+
+ ret = ppe_queue_ucast_offset_pri_set(ppe_dev, port_id,
+ index, q_offset);
+ if (ret)
+ return ret;
+ }
+
+ /* Initialize the queue offset of RSS hash as 0 to avoid the
+ * random hardware value that will lead to the unexpected
+ * destination queue generated.
+ */
+ for (index = 0; index < PPE_QUEUE_HASH_NUM; index++) {
+ ret = ppe_queue_ucast_offset_hash_set(ppe_dev, port_id,
+ index, 0);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/* Initialize the service code 1 used by CPU port. */
+static int ppe_servcode_init(struct ppe_device *ppe_dev)
+{
+ struct ppe_sc_cfg sc_cfg = {};
+
+ bitmap_zero(sc_cfg.bitmaps.counter, PPE_SC_BYPASS_COUNTER_SIZE);
+ bitmap_zero(sc_cfg.bitmaps.tunnel, PPE_SC_BYPASS_TUNNEL_SIZE);
+
+ bitmap_fill(sc_cfg.bitmaps.ingress, PPE_SC_BYPASS_INGRESS_SIZE);
+ clear_bit(PPE_SC_BYPASS_INGRESS_FAKE_MAC_HEADER, sc_cfg.bitmaps.ingress);
+ clear_bit(PPE_SC_BYPASS_INGRESS_SERVICE_CODE, sc_cfg.bitmaps.ingress);
+ clear_bit(PPE_SC_BYPASS_INGRESS_FAKE_L2_PROTO, sc_cfg.bitmaps.ingress);
+
+ bitmap_fill(sc_cfg.bitmaps.egress, PPE_SC_BYPASS_EGRESS_SIZE);
+ clear_bit(PPE_SC_BYPASS_EGRESS_ACL_POST_ROUTING_CHECK, sc_cfg.bitmaps.egress);
+
+ return ppe_sc_config_set(ppe_dev, PPE_EDMA_SC_BYPASS_ID, sc_cfg);
+}
+
+/* Initialize PPE port configurations. */
+static int ppe_port_config_init(struct ppe_device *ppe_dev)
+{
+ u32 reg, val, mru_mtu_val[3];
+ int i, ret;
+
+ /* MTU and MRU settings are not required for CPU port 0. */
+ for (i = 1; i < ppe_dev->num_ports; i++) {
+ /* Enable Ethernet port counter */
+ ret = ppe_counter_enable_set(ppe_dev, i);
+ if (ret)
+ return ret;
+
+ reg = PPE_MRU_MTU_CTRL_TBL_ADDR + PPE_MRU_MTU_CTRL_TBL_INC * i;
+ ret = regmap_bulk_read(ppe_dev->regmap, reg,
+ mru_mtu_val, ARRAY_SIZE(mru_mtu_val));
+ if (ret)
+ return ret;
+
+ /* Drop the packet when the packet size is more than the MTU
+ * and redirect the packet to the CPU port when the received
+ * packet size is more than the MRU of the physical interface.
+ */
+ PPE_MRU_MTU_CTRL_SET_MRU_CMD(mru_mtu_val, PPE_ACTION_REDIRECT_TO_CPU);
+ PPE_MRU_MTU_CTRL_SET_MTU_CMD(mru_mtu_val, PPE_ACTION_DROP);
+ ret = regmap_bulk_write(ppe_dev->regmap, reg,
+ mru_mtu_val, ARRAY_SIZE(mru_mtu_val));
+ if (ret)
+ return ret;
+
+ reg = PPE_MC_MTU_CTRL_TBL_ADDR + PPE_MC_MTU_CTRL_TBL_INC * i;
+ val = FIELD_PREP(PPE_MC_MTU_CTRL_TBL_MTU_CMD, PPE_ACTION_DROP);
+ ret = regmap_update_bits(ppe_dev->regmap, reg,
+ PPE_MC_MTU_CTRL_TBL_MTU_CMD,
+ val);
+ if (ret)
+ return ret;
+ }
+
+ /* Enable CPU port counters. */
+ return ppe_counter_enable_set(ppe_dev, 0);
+}
+
+/* Initialize the PPE RSS configuration for IPv4 and IPv6 packet receive.
+ * RSS settings are to calculate the random RSS hash value generated during
+ * packet receive. This hash is then used to generate the queue offset used
+ * to determine the queue used to transmit the packet.
+ */
+static int ppe_rss_hash_init(struct ppe_device *ppe_dev)
+{
+ u16 fins[PPE_RSS_HASH_TUPLES] = { 0x205, 0x264, 0x227, 0x245, 0x201 };
+ u8 ips[PPE_RSS_HASH_IP_LENGTH] = { 0x13, 0xb, 0x13, 0xb };
+ struct ppe_rss_hash_cfg hash_cfg;
+ int i, ret;
+
+ hash_cfg.hash_seed = get_random_u32();
+ hash_cfg.hash_mask = 0xfff;
+
+ /* Use 5 tuple as RSS hash key for the first fragment of TCP, UDP
+ * and UDP-Lite packets.
+ */
+ hash_cfg.hash_fragment_mode = false;
+
+ /* The final common seed configs used to calculate the RSS has value,
+ * which is available for both IPv4 and IPv6 packet.
+ */
+ for (i = 0; i < ARRAY_SIZE(fins); i++) {
+ hash_cfg.hash_fin_inner[i] = fins[i] & 0x1f;
+ hash_cfg.hash_fin_outer[i] = fins[i] >> 5;
+ }
+
+ /* RSS seeds for IP protocol, L4 destination & source port and
+ * destination & source IP used to calculate the RSS hash value.
+ */
+ hash_cfg.hash_protocol_mix = 0x13;
+ hash_cfg.hash_dport_mix = 0xb;
+ hash_cfg.hash_sport_mix = 0x13;
+ hash_cfg.hash_dip_mix[0] = 0xb;
+ hash_cfg.hash_sip_mix[0] = 0x13;
+
+ /* Configure RSS seed configs for IPv4 packet. */
+ ret = ppe_rss_hash_config_set(ppe_dev, PPE_RSS_HASH_MODE_IPV4, hash_cfg);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < ARRAY_SIZE(ips); i++) {
+ hash_cfg.hash_sip_mix[i] = ips[i];
+ hash_cfg.hash_dip_mix[i] = ips[i];
+ }
+
+ /* Configure RSS seed configs for IPv6 packet. */
+ return ppe_rss_hash_config_set(ppe_dev, PPE_RSS_HASH_MODE_IPV6, hash_cfg);
+}
+
+/* Initialize mapping between PPE queues assigned to CPU port 0
+ * to Ethernet DMA ring 0.
+ */
+static int ppe_queues_to_ring_init(struct ppe_device *ppe_dev)
+{
+ u32 queue_bmap[PPE_RING_TO_QUEUE_BITMAP_WORD_CNT] = {};
+ int ret, queue_id, queue_max;
+
+ ret = ppe_port_resource_get(ppe_dev, 0, PPE_RES_UCAST,
+ &queue_id, &queue_max);
+ if (ret)
+ return ret;
+
+ for (; queue_id <= queue_max; queue_id++)
+ queue_bmap[queue_id / 32] |= BIT_MASK(queue_id % 32);
+
+ return ppe_ring_queue_map_set(ppe_dev, 0, queue_bmap);
+}
+
+/* Initialize PPE bridge settings to only enable L2 frame receive and
+ * transmit between CPU port and PPE Ethernet ports.
+ */
+static int ppe_bridge_init(struct ppe_device *ppe_dev)
+{
+ u32 reg, mask, port_cfg[4], vsi_cfg[2];
+ int ret, i;
+
+ /* Configure the following settings for CPU port0:
+ * a.) Enable Bridge TX
+ * b.) Disable FDB new address learning
+ * c.) Disable station move address learning
+ */
+ mask = PPE_PORT_BRIDGE_TXMAC_EN;
+ mask |= PPE_PORT_BRIDGE_NEW_LRN_EN;
+ mask |= PPE_PORT_BRIDGE_STA_MOVE_LRN_EN;
+ ret = regmap_update_bits(ppe_dev->regmap,
+ PPE_PORT_BRIDGE_CTRL_ADDR,
+ mask,
+ PPE_PORT_BRIDGE_TXMAC_EN);
+ if (ret)
+ return ret;
+
+ for (i = 1; i < ppe_dev->num_ports; i++) {
+ /* Enable invalid VSI forwarding for all the physical ports
+ * to CPU port0, in case no VSI is assigned to the physical
+ * port.
+ */
+ reg = PPE_L2_VP_PORT_TBL_ADDR + PPE_L2_VP_PORT_TBL_INC * i;
+ ret = regmap_bulk_read(ppe_dev->regmap, reg,
+ port_cfg, ARRAY_SIZE(port_cfg));
+
+ if (ret)
+ return ret;
+
+ PPE_L2_PORT_SET_INVALID_VSI_FWD_EN(port_cfg, true);
+ PPE_L2_PORT_SET_DST_INFO(port_cfg, 0);
+
+ ret = regmap_bulk_write(ppe_dev->regmap, reg,
+ port_cfg, ARRAY_SIZE(port_cfg));
+ if (ret)
+ return ret;
+ }
+
+ for (i = 0; i < PPE_VSI_TBL_ENTRIES; i++) {
+ /* Set the VSI forward membership to include only CPU port0.
+ * FDB learning and forwarding take place only after switchdev
+ * is supported later to create the VSI and join the physical
+ * ports to the VSI port member.
+ */
+ reg = PPE_VSI_TBL_ADDR + PPE_VSI_TBL_INC * i;
+ ret = regmap_bulk_read(ppe_dev->regmap, reg,
+ vsi_cfg, ARRAY_SIZE(vsi_cfg));
+ if (ret)
+ return ret;
+
+ PPE_VSI_SET_MEMBER_PORT_BITMAP(vsi_cfg, BIT(0));
+ PPE_VSI_SET_UUC_BITMAP(vsi_cfg, BIT(0));
+ PPE_VSI_SET_UMC_BITMAP(vsi_cfg, BIT(0));
+ PPE_VSI_SET_BC_BITMAP(vsi_cfg, BIT(0));
+ PPE_VSI_SET_NEW_ADDR_LRN_EN(vsi_cfg, true);
+ PPE_VSI_SET_NEW_ADDR_FWD_CMD(vsi_cfg, PPE_ACTION_FORWARD);
+ PPE_VSI_SET_STATION_MOVE_LRN_EN(vsi_cfg, true);
+ PPE_VSI_SET_STATION_MOVE_FWD_CMD(vsi_cfg, PPE_ACTION_FORWARD);
+
+ ret = regmap_bulk_write(ppe_dev->regmap, reg,
+ vsi_cfg, ARRAY_SIZE(vsi_cfg));
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+int ppe_hw_config(struct ppe_device *ppe_dev)
+{
+ int ret;
+
+ ret = ppe_config_bm(ppe_dev);
+ if (ret)
+ return ret;
+
+ ret = ppe_config_qm(ppe_dev);
+ if (ret)
+ return ret;
+
+ ret = ppe_config_scheduler(ppe_dev);
+ if (ret)
+ return ret;
+
+ ret = ppe_queue_dest_init(ppe_dev);
+ if (ret)
+ return ret;
+
+ ret = ppe_servcode_init(ppe_dev);
+ if (ret)
+ return ret;
+
+ ret = ppe_port_config_init(ppe_dev);
+ if (ret)
+ return ret;
+
+ ret = ppe_rss_hash_init(ppe_dev);
+ if (ret)
+ return ret;
+
+ ret = ppe_queues_to_ring_init(ppe_dev);
+ if (ret)
+ return ret;
+
+ return ppe_bridge_init(ppe_dev);
+}
diff --git a/drivers/net/ethernet/qualcomm/ppe/ppe_config.h b/drivers/net/ethernet/qualcomm/ppe/ppe_config.h
new file mode 100644
index 000000000000..4bb45ca40144
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/ppe/ppe_config.h
@@ -0,0 +1,317 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ *
+ * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+ */
+
+#ifndef __PPE_CONFIG_H__
+#define __PPE_CONFIG_H__
+
+#include <linux/types.h>
+
+#include "ppe.h"
+
+/* There are different table index ranges for configuring queue base ID of
+ * the destination port, CPU code and service code.
+ */
+#define PPE_QUEUE_BASE_DEST_PORT 0
+#define PPE_QUEUE_BASE_CPU_CODE 1024
+#define PPE_QUEUE_BASE_SERVICE_CODE 2048
+
+#define PPE_QUEUE_INTER_PRI_NUM 16
+#define PPE_QUEUE_HASH_NUM 256
+
+/* The service code is used by EDMA port to transmit packet to PPE. */
+#define PPE_EDMA_SC_BYPASS_ID 1
+
+/* The PPE RSS hash configured for IPv4 and IPv6 packet separately. */
+#define PPE_RSS_HASH_MODE_IPV4 BIT(0)
+#define PPE_RSS_HASH_MODE_IPV6 BIT(1)
+#define PPE_RSS_HASH_IP_LENGTH 4
+#define PPE_RSS_HASH_TUPLES 5
+
+/* PPE supports 300 queues, each bit presents as one queue. */
+#define PPE_RING_TO_QUEUE_BITMAP_WORD_CNT 10
+
+/**
+ * enum ppe_scheduler_frame_mode - PPE scheduler frame mode.
+ * @PPE_SCH_WITH_IPG_PREAMBLE_FRAME_CRC: The scheduled frame includes IPG,
+ * preamble, Ethernet packet and CRC.
+ * @PPE_SCH_WITH_FRAME_CRC: The scheduled frame includes Ethernet frame and CRC
+ * excluding IPG and preamble.
+ * @PPE_SCH_WITH_L3_PAYLOAD: The scheduled frame includes layer 3 packet data.
+ */
+enum ppe_scheduler_frame_mode {
+ PPE_SCH_WITH_IPG_PREAMBLE_FRAME_CRC = 0,
+ PPE_SCH_WITH_FRAME_CRC = 1,
+ PPE_SCH_WITH_L3_PAYLOAD = 2,
+};
+
+/**
+ * struct ppe_scheduler_cfg - PPE scheduler configuration.
+ * @flow_id: PPE flow ID.
+ * @pri: Scheduler priority.
+ * @drr_node_id: Node ID for scheduled traffic.
+ * @drr_node_wt: Weight for scheduled traffic.
+ * @unit_is_packet: Packet based or byte based unit for scheduled traffic.
+ * @frame_mode: Packet mode to be scheduled.
+ *
+ * PPE scheduler supports commit rate and exceed rate configurations.
+ */
+struct ppe_scheduler_cfg {
+ int flow_id;
+ int pri;
+ int drr_node_id;
+ int drr_node_wt;
+ bool unit_is_packet;
+ enum ppe_scheduler_frame_mode frame_mode;
+};
+
+/**
+ * enum ppe_resource_type - PPE resource type.
+ * @PPE_RES_UCAST: Unicast queue resource.
+ * @PPE_RES_MCAST: Multicast queue resource.
+ * @PPE_RES_L0_NODE: Level 0 for queue based node resource.
+ * @PPE_RES_L1_NODE: Level 1 for flow based node resource.
+ * @PPE_RES_FLOW_ID: Flow based node resource.
+ */
+enum ppe_resource_type {
+ PPE_RES_UCAST,
+ PPE_RES_MCAST,
+ PPE_RES_L0_NODE,
+ PPE_RES_L1_NODE,
+ PPE_RES_FLOW_ID,
+};
+
+/**
+ * struct ppe_queue_ucast_dest - PPE unicast queue destination.
+ * @src_profile: Source profile.
+ * @service_code_en: Enable service code to map the queue base ID.
+ * @service_code: Service code.
+ * @cpu_code_en: Enable CPU code to map the queue base ID.
+ * @cpu_code: CPU code.
+ * @dest_port: destination port.
+ *
+ * PPE egress queue ID is decided by the service code if enabled, otherwise
+ * by the CPU code if enabled, or by destination port if both service code
+ * and CPU code are disabled.
+ */
+struct ppe_queue_ucast_dest {
+ int src_profile;
+ bool service_code_en;
+ int service_code;
+ bool cpu_code_en;
+ int cpu_code;
+ int dest_port;
+};
+
+/* Hardware bitmaps for bypassing features of the ingress packet. */
+enum ppe_sc_ingress_type {
+ PPE_SC_BYPASS_INGRESS_VLAN_TAG_FMT_CHECK = 0,
+ PPE_SC_BYPASS_INGRESS_VLAN_MEMBER_CHECK = 1,
+ PPE_SC_BYPASS_INGRESS_VLAN_TRANSLATE = 2,
+ PPE_SC_BYPASS_INGRESS_MY_MAC_CHECK = 3,
+ PPE_SC_BYPASS_INGRESS_DIP_LOOKUP = 4,
+ PPE_SC_BYPASS_INGRESS_FLOW_LOOKUP = 5,
+ PPE_SC_BYPASS_INGRESS_FLOW_ACTION = 6,
+ PPE_SC_BYPASS_INGRESS_ACL = 7,
+ PPE_SC_BYPASS_INGRESS_FAKE_MAC_HEADER = 8,
+ PPE_SC_BYPASS_INGRESS_SERVICE_CODE = 9,
+ PPE_SC_BYPASS_INGRESS_WRONG_PKT_FMT_L2 = 10,
+ PPE_SC_BYPASS_INGRESS_WRONG_PKT_FMT_L3_IPV4 = 11,
+ PPE_SC_BYPASS_INGRESS_WRONG_PKT_FMT_L3_IPV6 = 12,
+ PPE_SC_BYPASS_INGRESS_WRONG_PKT_FMT_L4 = 13,
+ PPE_SC_BYPASS_INGRESS_FLOW_SERVICE_CODE = 14,
+ PPE_SC_BYPASS_INGRESS_ACL_SERVICE_CODE = 15,
+ PPE_SC_BYPASS_INGRESS_FAKE_L2_PROTO = 16,
+ PPE_SC_BYPASS_INGRESS_PPPOE_TERMINATION = 17,
+ PPE_SC_BYPASS_INGRESS_DEFAULT_VLAN = 18,
+ PPE_SC_BYPASS_INGRESS_DEFAULT_PCP = 19,
+ PPE_SC_BYPASS_INGRESS_VSI_ASSIGN = 20,
+ /* Values 21-23 are not specified by hardware. */
+ PPE_SC_BYPASS_INGRESS_VLAN_ASSIGN_FAIL = 24,
+ PPE_SC_BYPASS_INGRESS_SOURCE_GUARD = 25,
+ PPE_SC_BYPASS_INGRESS_MRU_MTU_CHECK = 26,
+ PPE_SC_BYPASS_INGRESS_FLOW_SRC_CHECK = 27,
+ PPE_SC_BYPASS_INGRESS_FLOW_QOS = 28,
+ /* This must be last as it determines the size of the BITMAP. */
+ PPE_SC_BYPASS_INGRESS_SIZE,
+};
+
+/* Hardware bitmaps for bypassing features of the egress packet. */
+enum ppe_sc_egress_type {
+ PPE_SC_BYPASS_EGRESS_VLAN_MEMBER_CHECK = 0,
+ PPE_SC_BYPASS_EGRESS_VLAN_TRANSLATE = 1,
+ PPE_SC_BYPASS_EGRESS_VLAN_TAG_FMT_CTRL = 2,
+ PPE_SC_BYPASS_EGRESS_FDB_LEARN = 3,
+ PPE_SC_BYPASS_EGRESS_FDB_REFRESH = 4,
+ PPE_SC_BYPASS_EGRESS_L2_SOURCE_SECURITY = 5,
+ PPE_SC_BYPASS_EGRESS_MANAGEMENT_FWD = 6,
+ PPE_SC_BYPASS_EGRESS_BRIDGING_FWD = 7,
+ PPE_SC_BYPASS_EGRESS_IN_STP_FLTR = 8,
+ PPE_SC_BYPASS_EGRESS_EG_STP_FLTR = 9,
+ PPE_SC_BYPASS_EGRESS_SOURCE_FLTR = 10,
+ PPE_SC_BYPASS_EGRESS_POLICER = 11,
+ PPE_SC_BYPASS_EGRESS_L2_PKT_EDIT = 12,
+ PPE_SC_BYPASS_EGRESS_L3_PKT_EDIT = 13,
+ PPE_SC_BYPASS_EGRESS_ACL_POST_ROUTING_CHECK = 14,
+ PPE_SC_BYPASS_EGRESS_PORT_ISOLATION = 15,
+ PPE_SC_BYPASS_EGRESS_PRE_ACL_QOS = 16,
+ PPE_SC_BYPASS_EGRESS_POST_ACL_QOS = 17,
+ PPE_SC_BYPASS_EGRESS_DSCP_QOS = 18,
+ PPE_SC_BYPASS_EGRESS_PCP_QOS = 19,
+ PPE_SC_BYPASS_EGRESS_PREHEADER_QOS = 20,
+ PPE_SC_BYPASS_EGRESS_FAKE_MAC_DROP = 21,
+ PPE_SC_BYPASS_EGRESS_TUNL_CONTEXT = 22,
+ PPE_SC_BYPASS_EGRESS_FLOW_POLICER = 23,
+ /* This must be last as it determines the size of the BITMAP. */
+ PPE_SC_BYPASS_EGRESS_SIZE,
+};
+
+/* Hardware bitmaps for bypassing counter of packet. */
+enum ppe_sc_counter_type {
+ PPE_SC_BYPASS_COUNTER_RX_VLAN = 0,
+ PPE_SC_BYPASS_COUNTER_RX = 1,
+ PPE_SC_BYPASS_COUNTER_TX_VLAN = 2,
+ PPE_SC_BYPASS_COUNTER_TX = 3,
+ /* This must be last as it determines the size of the BITMAP. */
+ PPE_SC_BYPASS_COUNTER_SIZE,
+};
+
+/* Hardware bitmaps for bypassing features of tunnel packet. */
+enum ppe_sc_tunnel_type {
+ PPE_SC_BYPASS_TUNNEL_SERVICE_CODE = 0,
+ PPE_SC_BYPASS_TUNNEL_TUNNEL_HANDLE = 1,
+ PPE_SC_BYPASS_TUNNEL_L3_IF_CHECK = 2,
+ PPE_SC_BYPASS_TUNNEL_VLAN_CHECK = 3,
+ PPE_SC_BYPASS_TUNNEL_DMAC_CHECK = 4,
+ PPE_SC_BYPASS_TUNNEL_UDP_CSUM_0_CHECK = 5,
+ PPE_SC_BYPASS_TUNNEL_TBL_DE_ACCE_CHECK = 6,
+ PPE_SC_BYPASS_TUNNEL_PPPOE_MC_TERM_CHECK = 7,
+ PPE_SC_BYPASS_TUNNEL_TTL_EXCEED_CHECK = 8,
+ PPE_SC_BYPASS_TUNNEL_MAP_SRC_CHECK = 9,
+ PPE_SC_BYPASS_TUNNEL_MAP_DST_CHECK = 10,
+ PPE_SC_BYPASS_TUNNEL_LPM_DST_LOOKUP = 11,
+ PPE_SC_BYPASS_TUNNEL_LPM_LOOKUP = 12,
+ PPE_SC_BYPASS_TUNNEL_WRONG_PKT_FMT_L2 = 13,
+ PPE_SC_BYPASS_TUNNEL_WRONG_PKT_FMT_L3_IPV4 = 14,
+ PPE_SC_BYPASS_TUNNEL_WRONG_PKT_FMT_L3_IPV6 = 15,
+ PPE_SC_BYPASS_TUNNEL_WRONG_PKT_FMT_L4 = 16,
+ PPE_SC_BYPASS_TUNNEL_WRONG_PKT_FMT_TUNNEL = 17,
+ /* Values 18-19 are not specified by hardware. */
+ PPE_SC_BYPASS_TUNNEL_PRE_IPO = 20,
+ /* This must be last as it determines the size of the BITMAP. */
+ PPE_SC_BYPASS_TUNNEL_SIZE,
+};
+
+/**
+ * struct ppe_sc_bypass - PPE service bypass bitmaps
+ * @ingress: Bitmap of features that can be bypassed on the ingress packet.
+ * @egress: Bitmap of features that can be bypassed on the egress packet.
+ * @counter: Bitmap of features that can be bypassed on the counter type.
+ * @tunnel: Bitmap of features that can be bypassed on the tunnel packet.
+ */
+struct ppe_sc_bypass {
+ DECLARE_BITMAP(ingress, PPE_SC_BYPASS_INGRESS_SIZE);
+ DECLARE_BITMAP(egress, PPE_SC_BYPASS_EGRESS_SIZE);
+ DECLARE_BITMAP(counter, PPE_SC_BYPASS_COUNTER_SIZE);
+ DECLARE_BITMAP(tunnel, PPE_SC_BYPASS_TUNNEL_SIZE);
+};
+
+/**
+ * struct ppe_sc_cfg - PPE service code configuration.
+ * @dest_port_valid: Generate destination port or not.
+ * @dest_port: Destination port ID.
+ * @bitmaps: Bitmap of bypass features.
+ * @is_src: Destination port acts as source port, packet sent to CPU.
+ * @next_service_code: New service code generated.
+ * @eip_field_update_bitmap: Fields updated as actions taken for EIP.
+ * @eip_hw_service: Selected hardware functions for EIP.
+ * @eip_offset_sel: Packet offset selection, using packet's layer 4 offset
+ * or using packet's layer 3 offset for EIP.
+ *
+ * Service code is generated during the packet passing through PPE.
+ */
+struct ppe_sc_cfg {
+ bool dest_port_valid;
+ int dest_port;
+ struct ppe_sc_bypass bitmaps;
+ bool is_src;
+ int next_service_code;
+ int eip_field_update_bitmap;
+ int eip_hw_service;
+ int eip_offset_sel;
+};
+
+/**
+ * enum ppe_action_type - PPE action of the received packet.
+ * @PPE_ACTION_FORWARD: Packet forwarded per L2/L3 process.
+ * @PPE_ACTION_DROP: Packet dropped by PPE.
+ * @PPE_ACTION_COPY_TO_CPU: Packet copied to CPU port per multicast queue.
+ * @PPE_ACTION_REDIRECT_TO_CPU: Packet redirected to CPU port per unicast queue.
+ */
+enum ppe_action_type {
+ PPE_ACTION_FORWARD = 0,
+ PPE_ACTION_DROP = 1,
+ PPE_ACTION_COPY_TO_CPU = 2,
+ PPE_ACTION_REDIRECT_TO_CPU = 3,
+};
+
+/**
+ * struct ppe_rss_hash_cfg - PPE RSS hash configuration.
+ * @hash_mask: Mask of the generated hash value.
+ * @hash_fragment_mode: Hash generation mode for the first fragment of TCP,
+ * UDP and UDP-Lite packets, to use either 3 tuple or 5 tuple for RSS hash
+ * key computation.
+ * @hash_seed: Seed to generate RSS hash.
+ * @hash_sip_mix: Source IP selection.
+ * @hash_dip_mix: Destination IP selection.
+ * @hash_protocol_mix: Protocol selection.
+ * @hash_sport_mix: Source L4 port selection.
+ * @hash_dport_mix: Destination L4 port selection.
+ * @hash_fin_inner: RSS hash value first selection.
+ * @hash_fin_outer: RSS hash value second selection.
+ *
+ * PPE RSS hash value is generated for the packet based on the RSS hash
+ * configured.
+ */
+struct ppe_rss_hash_cfg {
+ u32 hash_mask;
+ bool hash_fragment_mode;
+ u32 hash_seed;
+ u8 hash_sip_mix[PPE_RSS_HASH_IP_LENGTH];
+ u8 hash_dip_mix[PPE_RSS_HASH_IP_LENGTH];
+ u8 hash_protocol_mix;
+ u8 hash_sport_mix;
+ u8 hash_dport_mix;
+ u8 hash_fin_inner[PPE_RSS_HASH_TUPLES];
+ u8 hash_fin_outer[PPE_RSS_HASH_TUPLES];
+};
+
+int ppe_hw_config(struct ppe_device *ppe_dev);
+int ppe_queue_scheduler_set(struct ppe_device *ppe_dev,
+ int node_id, bool flow_level, int port,
+ struct ppe_scheduler_cfg scheduler_cfg);
+int ppe_queue_ucast_base_set(struct ppe_device *ppe_dev,
+ struct ppe_queue_ucast_dest queue_dst,
+ int queue_base,
+ int profile_id);
+int ppe_queue_ucast_offset_pri_set(struct ppe_device *ppe_dev,
+ int profile_id,
+ int priority,
+ int queue_offset);
+int ppe_queue_ucast_offset_hash_set(struct ppe_device *ppe_dev,
+ int profile_id,
+ int rss_hash,
+ int queue_offset);
+int ppe_port_resource_get(struct ppe_device *ppe_dev, int port,
+ enum ppe_resource_type type,
+ int *res_start, int *res_end);
+int ppe_sc_config_set(struct ppe_device *ppe_dev, int sc,
+ struct ppe_sc_cfg cfg);
+int ppe_counter_enable_set(struct ppe_device *ppe_dev, int port);
+int ppe_rss_hash_config_set(struct ppe_device *ppe_dev, int mode,
+ struct ppe_rss_hash_cfg hash_cfg);
+int ppe_ring_queue_map_set(struct ppe_device *ppe_dev,
+ int ring_id,
+ u32 *queue_map);
+#endif
diff --git a/drivers/net/ethernet/qualcomm/ppe/ppe_debugfs.c b/drivers/net/ethernet/qualcomm/ppe/ppe_debugfs.c
new file mode 100644
index 000000000000..fd959a76ff43
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/ppe/ppe_debugfs.c
@@ -0,0 +1,847 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+ */
+
+/* PPE debugfs routines for display of PPE counters useful for debug. */
+
+#include <linux/bitfield.h>
+#include <linux/debugfs.h>
+#include <linux/dev_printk.h>
+#include <linux/device.h>
+#include <linux/regmap.h>
+#include <linux/seq_file.h>
+
+#include "ppe.h"
+#include "ppe_config.h"
+#include "ppe_debugfs.h"
+#include "ppe_regs.h"
+
+#define PPE_PKT_CNT_TBL_SIZE 3
+#define PPE_DROP_PKT_CNT_TBL_SIZE 5
+
+#define PPE_W0_PKT_CNT GENMASK(31, 0)
+#define PPE_W2_DROP_PKT_CNT_LOW GENMASK(31, 8)
+#define PPE_W3_DROP_PKT_CNT_HIGH GENMASK(7, 0)
+
+#define PPE_GET_PKT_CNT(tbl_cnt) \
+ FIELD_GET(PPE_W0_PKT_CNT, *(tbl_cnt))
+#define PPE_GET_DROP_PKT_CNT_LOW(tbl_cnt) \
+ FIELD_GET(PPE_W2_DROP_PKT_CNT_LOW, *((tbl_cnt) + 0x2))
+#define PPE_GET_DROP_PKT_CNT_HIGH(tbl_cnt) \
+ FIELD_GET(PPE_W3_DROP_PKT_CNT_HIGH, *((tbl_cnt) + 0x3))
+
+/**
+ * enum ppe_cnt_size_type - PPE counter size type
+ * @PPE_PKT_CNT_SIZE_1WORD: Counter size with single register
+ * @PPE_PKT_CNT_SIZE_3WORD: Counter size with table of 3 words
+ * @PPE_PKT_CNT_SIZE_5WORD: Counter size with table of 5 words
+ *
+ * PPE takes the different register size to record the packet counters.
+ * It uses single register, or register table with 3 words or 5 words.
+ * The counter with table size 5 words also records the drop counter.
+ * There are also some other counter types occupying sizes less than 32
+ * bits, which is not covered by this enumeration type.
+ */
+enum ppe_cnt_size_type {
+ PPE_PKT_CNT_SIZE_1WORD,
+ PPE_PKT_CNT_SIZE_3WORD,
+ PPE_PKT_CNT_SIZE_5WORD,
+};
+
+/**
+ * enum ppe_cnt_type - PPE counter type.
+ * @PPE_CNT_BM: Packet counter processed by BM.
+ * @PPE_CNT_PARSE: Packet counter parsed on ingress.
+ * @PPE_CNT_PORT_RX: Packet counter on the ingress port.
+ * @PPE_CNT_VLAN_RX: VLAN packet counter received.
+ * @PPE_CNT_L2_FWD: Packet counter processed by L2 forwarding.
+ * @PPE_CNT_CPU_CODE: Packet counter marked with various CPU codes.
+ * @PPE_CNT_VLAN_TX: VLAN packet counter transmitted.
+ * @PPE_CNT_PORT_TX: Packet counter on the egress port.
+ * @PPE_CNT_QM: Packet counter processed by QM.
+ */
+enum ppe_cnt_type {
+ PPE_CNT_BM,
+ PPE_CNT_PARSE,
+ PPE_CNT_PORT_RX,
+ PPE_CNT_VLAN_RX,
+ PPE_CNT_L2_FWD,
+ PPE_CNT_CPU_CODE,
+ PPE_CNT_VLAN_TX,
+ PPE_CNT_PORT_TX,
+ PPE_CNT_QM,
+};
+
+/**
+ * struct ppe_debugfs_entry - PPE debugfs entry.
+ * @name: Debugfs file name.
+ * @counter_type: PPE packet counter type.
+ * @ppe: PPE device.
+ *
+ * The PPE debugfs entry is used to create the debugfs file and passed
+ * to debugfs_create_file() as private data.
+ */
+struct ppe_debugfs_entry {
+ const char *name;
+ enum ppe_cnt_type counter_type;
+ struct ppe_device *ppe;
+};
+
+static const struct ppe_debugfs_entry debugfs_files[] = {
+ {
+ .name = "bm",
+ .counter_type = PPE_CNT_BM,
+ },
+ {
+ .name = "parse",
+ .counter_type = PPE_CNT_PARSE,
+ },
+ {
+ .name = "port_rx",
+ .counter_type = PPE_CNT_PORT_RX,
+ },
+ {
+ .name = "vlan_rx",
+ .counter_type = PPE_CNT_VLAN_RX,
+ },
+ {
+ .name = "l2_forward",
+ .counter_type = PPE_CNT_L2_FWD,
+ },
+ {
+ .name = "cpu_code",
+ .counter_type = PPE_CNT_CPU_CODE,
+ },
+ {
+ .name = "vlan_tx",
+ .counter_type = PPE_CNT_VLAN_TX,
+ },
+ {
+ .name = "port_tx",
+ .counter_type = PPE_CNT_PORT_TX,
+ },
+ {
+ .name = "qm",
+ .counter_type = PPE_CNT_QM,
+ },
+};
+
+static int ppe_pkt_cnt_get(struct ppe_device *ppe_dev, u32 reg,
+ enum ppe_cnt_size_type cnt_type,
+ u32 *cnt, u32 *drop_cnt)
+{
+ u32 drop_pkt_cnt[PPE_DROP_PKT_CNT_TBL_SIZE];
+ u32 pkt_cnt[PPE_PKT_CNT_TBL_SIZE];
+ u32 value;
+ int ret;
+
+ switch (cnt_type) {
+ case PPE_PKT_CNT_SIZE_1WORD:
+ ret = regmap_read(ppe_dev->regmap, reg, &value);
+ if (ret)
+ return ret;
+
+ *cnt = value;
+ break;
+ case PPE_PKT_CNT_SIZE_3WORD:
+ ret = regmap_bulk_read(ppe_dev->regmap, reg,
+ pkt_cnt, ARRAY_SIZE(pkt_cnt));
+ if (ret)
+ return ret;
+
+ *cnt = PPE_GET_PKT_CNT(pkt_cnt);
+ break;
+ case PPE_PKT_CNT_SIZE_5WORD:
+ ret = regmap_bulk_read(ppe_dev->regmap, reg,
+ drop_pkt_cnt, ARRAY_SIZE(drop_pkt_cnt));
+ if (ret)
+ return ret;
+
+ *cnt = PPE_GET_PKT_CNT(drop_pkt_cnt);
+
+ /* Drop counter with low 24 bits. */
+ value = PPE_GET_DROP_PKT_CNT_LOW(drop_pkt_cnt);
+ *drop_cnt = FIELD_PREP(GENMASK(23, 0), value);
+
+ /* Drop counter with high 8 bits. */
+ value = PPE_GET_DROP_PKT_CNT_HIGH(drop_pkt_cnt);
+ *drop_cnt |= FIELD_PREP(GENMASK(31, 24), value);
+ break;
+ }
+
+ return 0;
+}
+
+static void ppe_tbl_pkt_cnt_clear(struct ppe_device *ppe_dev, u32 reg,
+ enum ppe_cnt_size_type cnt_type)
+{
+ u32 drop_pkt_cnt[PPE_DROP_PKT_CNT_TBL_SIZE] = {};
+ u32 pkt_cnt[PPE_PKT_CNT_TBL_SIZE] = {};
+
+ switch (cnt_type) {
+ case PPE_PKT_CNT_SIZE_1WORD:
+ regmap_write(ppe_dev->regmap, reg, 0);
+ break;
+ case PPE_PKT_CNT_SIZE_3WORD:
+ regmap_bulk_write(ppe_dev->regmap, reg,
+ pkt_cnt, ARRAY_SIZE(pkt_cnt));
+ break;
+ case PPE_PKT_CNT_SIZE_5WORD:
+ regmap_bulk_write(ppe_dev->regmap, reg,
+ drop_pkt_cnt, ARRAY_SIZE(drop_pkt_cnt));
+ break;
+ }
+}
+
+static int ppe_bm_counter_get(struct ppe_device *ppe_dev, struct seq_file *seq)
+{
+ u32 reg, val, pkt_cnt, pkt_cnt1;
+ int ret, i, tag;
+
+ seq_printf(seq, "%-24s", "BM SILENT_DROP:");
+ tag = 0;
+ for (i = 0; i < PPE_DROP_CNT_TBL_ENTRIES; i++) {
+ reg = PPE_DROP_CNT_TBL_ADDR + i * PPE_DROP_CNT_TBL_INC;
+ ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_1WORD,
+ &pkt_cnt, NULL);
+ if (ret) {
+ dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret);
+ return ret;
+ }
+
+ if (pkt_cnt > 0) {
+ if (!((++tag) % 4))
+ seq_printf(seq, "\n%-24s", "");
+
+ seq_printf(seq, "%10u(%s=%04d)", pkt_cnt, "port", i);
+ }
+ }
+
+ seq_putc(seq, '\n');
+
+ /* The number of packets dropped because hardware buffers were
+ * available only partially for the packet.
+ */
+ seq_printf(seq, "%-24s", "BM OVERFLOW_DROP:");
+ tag = 0;
+ for (i = 0; i < PPE_DROP_STAT_TBL_ENTRIES; i++) {
+ reg = PPE_DROP_STAT_TBL_ADDR + PPE_DROP_STAT_TBL_INC * i;
+
+ ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD,
+ &pkt_cnt, NULL);
+ if (ret) {
+ dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret);
+ return ret;
+ }
+
+ if (pkt_cnt > 0) {
+ if (!((++tag) % 4))
+ seq_printf(seq, "\n%-24s", "");
+
+ seq_printf(seq, "%10u(%s=%04d)", pkt_cnt, "port", i);
+ }
+ }
+
+ seq_putc(seq, '\n');
+
+ /* The number of currently occupied buffers, that can't be flushed. */
+ seq_printf(seq, "%-24s", "BM USED/REACT:");
+ tag = 0;
+ for (i = 0; i < PPE_BM_USED_CNT_TBL_ENTRIES; i++) {
+ reg = PPE_BM_USED_CNT_TBL_ADDR + i * PPE_BM_USED_CNT_TBL_INC;
+ ret = regmap_read(ppe_dev->regmap, reg, &val);
+ if (ret) {
+ dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret);
+ return ret;
+ }
+
+ /* The number of PPE buffers used for caching the received
+ * packets before the pause frame sent.
+ */
+ pkt_cnt = FIELD_GET(PPE_BM_USED_CNT_VAL, val);
+
+ reg = PPE_BM_REACT_CNT_TBL_ADDR + i * PPE_BM_REACT_CNT_TBL_INC;
+ ret = regmap_read(ppe_dev->regmap, reg, &val);
+ if (ret) {
+ dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret);
+ return ret;
+ }
+
+ /* The number of PPE buffers used for caching the received
+ * packets after pause frame sent out.
+ */
+ pkt_cnt1 = FIELD_GET(PPE_BM_REACT_CNT_VAL, val);
+
+ if (pkt_cnt > 0 || pkt_cnt1 > 0) {
+ if (!((++tag) % 4))
+ seq_printf(seq, "\n%-24s", "");
+
+ seq_printf(seq, "%10u/%u(%s=%04d)", pkt_cnt, pkt_cnt1,
+ "port", i);
+ }
+ }
+
+ seq_putc(seq, '\n');
+
+ return 0;
+}
+
+/* The number of packets processed by the ingress parser module of PPE. */
+static int ppe_parse_pkt_counter_get(struct ppe_device *ppe_dev,
+ struct seq_file *seq)
+{
+ u32 reg, cnt = 0, tunnel_cnt = 0;
+ int i, ret, tag = 0;
+
+ seq_printf(seq, "%-24s", "PARSE TPRX/IPRX:");
+ for (i = 0; i < PPE_IPR_PKT_CNT_TBL_ENTRIES; i++) {
+ reg = PPE_TPR_PKT_CNT_TBL_ADDR + i * PPE_TPR_PKT_CNT_TBL_INC;
+ ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_1WORD,
+ &tunnel_cnt, NULL);
+ if (ret) {
+ dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret);
+ return ret;
+ }
+
+ reg = PPE_IPR_PKT_CNT_TBL_ADDR + i * PPE_IPR_PKT_CNT_TBL_INC;
+ ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_1WORD,
+ &cnt, NULL);
+ if (ret) {
+ dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret);
+ return ret;
+ }
+
+ if (tunnel_cnt > 0 || cnt > 0) {
+ if (!((++tag) % 4))
+ seq_printf(seq, "\n%-24s", "");
+
+ seq_printf(seq, "%10u/%u(%s=%04d)", tunnel_cnt, cnt,
+ "port", i);
+ }
+ }
+
+ seq_putc(seq, '\n');
+
+ return 0;
+}
+
+/* The number of packets received or dropped on the ingress port. */
+static int ppe_port_rx_counter_get(struct ppe_device *ppe_dev,
+ struct seq_file *seq)
+{
+ u32 reg, pkt_cnt = 0, drop_cnt = 0;
+ int ret, i, tag;
+
+ seq_printf(seq, "%-24s", "PORT RX/RX_DROP:");
+ tag = 0;
+ for (i = 0; i < PPE_PHY_PORT_RX_CNT_TBL_ENTRIES; i++) {
+ reg = PPE_PHY_PORT_RX_CNT_TBL_ADDR + PPE_PHY_PORT_RX_CNT_TBL_INC * i;
+ ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_5WORD,
+ &pkt_cnt, &drop_cnt);
+ if (ret) {
+ dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret);
+ return ret;
+ }
+
+ if (pkt_cnt > 0) {
+ if (!((++tag) % 4))
+ seq_printf(seq, "\n%-24s", "");
+
+ seq_printf(seq, "%10u/%u(%s=%04d)", pkt_cnt, drop_cnt,
+ "port", i);
+ }
+ }
+
+ seq_putc(seq, '\n');
+
+ seq_printf(seq, "%-24s", "VPORT RX/RX_DROP:");
+ tag = 0;
+ for (i = 0; i < PPE_PORT_RX_CNT_TBL_ENTRIES; i++) {
+ reg = PPE_PORT_RX_CNT_TBL_ADDR + PPE_PORT_RX_CNT_TBL_INC * i;
+ ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_5WORD,
+ &pkt_cnt, &drop_cnt);
+ if (ret) {
+ dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret);
+ return ret;
+ }
+
+ if (pkt_cnt > 0) {
+ if (!((++tag) % 4))
+ seq_printf(seq, "\n%-24s", "");
+
+ seq_printf(seq, "%10u/%u(%s=%04d)", pkt_cnt, drop_cnt,
+ "port", i);
+ }
+ }
+
+ seq_putc(seq, '\n');
+
+ return 0;
+}
+
+/* The number of packets received or dropped by layer 2 processing. */
+static int ppe_l2_counter_get(struct ppe_device *ppe_dev,
+ struct seq_file *seq)
+{
+ u32 reg, pkt_cnt = 0, drop_cnt = 0;
+ int ret, i, tag = 0;
+
+ seq_printf(seq, "%-24s", "L2 RX/RX_DROP:");
+ for (i = 0; i < PPE_PRE_L2_CNT_TBL_ENTRIES; i++) {
+ reg = PPE_PRE_L2_CNT_TBL_ADDR + PPE_PRE_L2_CNT_TBL_INC * i;
+ ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_5WORD,
+ &pkt_cnt, &drop_cnt);
+ if (ret) {
+ dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret);
+ return ret;
+ }
+
+ if (pkt_cnt > 0) {
+ if (!((++tag) % 4))
+ seq_printf(seq, "\n%-24s", "");
+
+ seq_printf(seq, "%10u/%u(%s=%04d)", pkt_cnt, drop_cnt,
+ "vsi", i);
+ }
+ }
+
+ seq_putc(seq, '\n');
+
+ return 0;
+}
+
+/* The number of VLAN packets received by PPE. */
+static int ppe_vlan_rx_counter_get(struct ppe_device *ppe_dev,
+ struct seq_file *seq)
+{
+ u32 reg, pkt_cnt = 0;
+ int ret, i, tag = 0;
+
+ seq_printf(seq, "%-24s", "VLAN RX:");
+ for (i = 0; i < PPE_VLAN_CNT_TBL_ENTRIES; i++) {
+ reg = PPE_VLAN_CNT_TBL_ADDR + PPE_VLAN_CNT_TBL_INC * i;
+
+ ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD,
+ &pkt_cnt, NULL);
+ if (ret) {
+ dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret);
+ return ret;
+ }
+
+ if (pkt_cnt > 0) {
+ if (!((++tag) % 4))
+ seq_printf(seq, "\n%-24s", "");
+
+ seq_printf(seq, "%10u(%s=%04d)", pkt_cnt, "vsi", i);
+ }
+ }
+
+ seq_putc(seq, '\n');
+
+ return 0;
+}
+
+/* The number of packets handed to CPU by PPE. */
+static int ppe_cpu_code_counter_get(struct ppe_device *ppe_dev,
+ struct seq_file *seq)
+{
+ u32 reg, pkt_cnt = 0;
+ int ret, i;
+
+ seq_printf(seq, "%-24s", "CPU CODE:");
+ for (i = 0; i < PPE_DROP_CPU_CNT_TBL_ENTRIES; i++) {
+ reg = PPE_DROP_CPU_CNT_TBL_ADDR + PPE_DROP_CPU_CNT_TBL_INC * i;
+
+ ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD,
+ &pkt_cnt, NULL);
+ if (ret) {
+ dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret);
+ return ret;
+ }
+
+ if (!pkt_cnt)
+ continue;
+
+ /* There are 256 CPU codes saved in the first 256 entries
+ * of register table, and 128 drop codes for each PPE port
+ * (0-7), the total entries is 256 + 8 * 128.
+ */
+ if (i < 256)
+ seq_printf(seq, "%10u(cpucode:%d)", pkt_cnt, i);
+ else
+ seq_printf(seq, "%10u(port=%04d),dropcode:%d", pkt_cnt,
+ (i - 256) % 8, (i - 256) / 8);
+ seq_putc(seq, '\n');
+ seq_printf(seq, "%-24s", "");
+ }
+
+ seq_putc(seq, '\n');
+
+ return 0;
+}
+
+/* The number of packets forwarded by VLAN on the egress direction. */
+static int ppe_vlan_tx_counter_get(struct ppe_device *ppe_dev,
+ struct seq_file *seq)
+{
+ u32 reg, pkt_cnt = 0;
+ int ret, i, tag = 0;
+
+ seq_printf(seq, "%-24s", "VLAN TX:");
+ for (i = 0; i < PPE_EG_VSI_COUNTER_TBL_ENTRIES; i++) {
+ reg = PPE_EG_VSI_COUNTER_TBL_ADDR + PPE_EG_VSI_COUNTER_TBL_INC * i;
+
+ ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD,
+ &pkt_cnt, NULL);
+ if (ret) {
+ dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret);
+ return ret;
+ }
+
+ if (pkt_cnt > 0) {
+ if (!((++tag) % 4))
+ seq_printf(seq, "\n%-24s", "");
+
+ seq_printf(seq, "%10u(%s=%04d)", pkt_cnt, "vsi", i);
+ }
+ }
+
+ seq_putc(seq, '\n');
+
+ return 0;
+}
+
+/* The number of packets transmitted or dropped on the egress port. */
+static int ppe_port_tx_counter_get(struct ppe_device *ppe_dev,
+ struct seq_file *seq)
+{
+ u32 reg, pkt_cnt = 0, drop_cnt = 0;
+ int ret, i, tag;
+
+ seq_printf(seq, "%-24s", "VPORT TX/TX_DROP:");
+ tag = 0;
+ for (i = 0; i < PPE_VPORT_TX_COUNTER_TBL_ENTRIES; i++) {
+ reg = PPE_VPORT_TX_COUNTER_TBL_ADDR + PPE_VPORT_TX_COUNTER_TBL_INC * i;
+ ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD,
+ &pkt_cnt, NULL);
+ if (ret) {
+ dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret);
+ return ret;
+ }
+
+ reg = PPE_VPORT_TX_DROP_CNT_TBL_ADDR + PPE_VPORT_TX_DROP_CNT_TBL_INC * i;
+ ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD,
+ &drop_cnt, NULL);
+ if (ret) {
+ dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret);
+ return ret;
+ }
+
+ if (pkt_cnt > 0 || drop_cnt > 0) {
+ if (!((++tag) % 4))
+ seq_printf(seq, "\n%-24s", "");
+
+ seq_printf(seq, "%10u/%u(%s=%04d)", pkt_cnt, drop_cnt,
+ "port", i);
+ }
+ }
+
+ seq_putc(seq, '\n');
+
+ seq_printf(seq, "%-24s", "PORT TX/TX_DROP:");
+ tag = 0;
+ for (i = 0; i < PPE_PORT_TX_COUNTER_TBL_ENTRIES; i++) {
+ reg = PPE_PORT_TX_COUNTER_TBL_ADDR + PPE_PORT_TX_COUNTER_TBL_INC * i;
+ ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD,
+ &pkt_cnt, NULL);
+ if (ret) {
+ dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret);
+ return ret;
+ }
+
+ reg = PPE_PORT_TX_DROP_CNT_TBL_ADDR + PPE_PORT_TX_DROP_CNT_TBL_INC * i;
+ ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD,
+ &drop_cnt, NULL);
+ if (ret) {
+ dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret);
+ return ret;
+ }
+
+ if (pkt_cnt > 0 || drop_cnt > 0) {
+ if (!((++tag) % 4))
+ seq_printf(seq, "\n%-24s", "");
+
+ seq_printf(seq, "%10u/%u(%s=%04d)", pkt_cnt, drop_cnt,
+ "port", i);
+ }
+ }
+
+ seq_putc(seq, '\n');
+
+ return 0;
+}
+
+/* The number of packets transmitted or pending by the PPE queue. */
+static int ppe_queue_counter_get(struct ppe_device *ppe_dev,
+ struct seq_file *seq)
+{
+ u32 reg, val, pkt_cnt = 0, pend_cnt = 0, drop_cnt = 0;
+ int ret, i, tag = 0;
+
+ seq_printf(seq, "%-24s", "QUEUE TX/PEND/DROP:");
+ for (i = 0; i < PPE_QUEUE_TX_COUNTER_TBL_ENTRIES; i++) {
+ reg = PPE_QUEUE_TX_COUNTER_TBL_ADDR + PPE_QUEUE_TX_COUNTER_TBL_INC * i;
+ ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD,
+ &pkt_cnt, NULL);
+ if (ret) {
+ dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret);
+ return ret;
+ }
+
+ if (i < PPE_AC_UNICAST_QUEUE_CFG_TBL_ENTRIES) {
+ reg = PPE_AC_UNICAST_QUEUE_CNT_TBL_ADDR +
+ PPE_AC_UNICAST_QUEUE_CNT_TBL_INC * i;
+ ret = regmap_read(ppe_dev->regmap, reg, &val);
+ if (ret) {
+ dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret);
+ return ret;
+ }
+
+ pend_cnt = FIELD_GET(PPE_AC_UNICAST_QUEUE_CNT_TBL_PEND_CNT, val);
+
+ reg = PPE_UNICAST_DROP_CNT_TBL_ADDR +
+ PPE_AC_UNICAST_QUEUE_CNT_TBL_INC *
+ (i * PPE_UNICAST_DROP_TYPES + PPE_UNICAST_DROP_FORCE_OFFSET);
+
+ ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD,
+ &drop_cnt, NULL);
+ if (ret) {
+ dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret);
+ return ret;
+ }
+ } else {
+ int mq_offset = i - PPE_AC_UNICAST_QUEUE_CFG_TBL_ENTRIES;
+
+ reg = PPE_AC_MULTICAST_QUEUE_CNT_TBL_ADDR +
+ PPE_AC_MULTICAST_QUEUE_CNT_TBL_INC * mq_offset;
+ ret = regmap_read(ppe_dev->regmap, reg, &val);
+ if (ret) {
+ dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret);
+ return ret;
+ }
+
+ pend_cnt = FIELD_GET(PPE_AC_MULTICAST_QUEUE_CNT_TBL_PEND_CNT, val);
+
+ if (mq_offset < PPE_P0_MULTICAST_QUEUE_NUM) {
+ reg = PPE_CPU_PORT_MULTICAST_FORCE_DROP_CNT_TBL_ADDR(mq_offset);
+ } else {
+ mq_offset -= PPE_P0_MULTICAST_QUEUE_NUM;
+
+ reg = PPE_P1_MULTICAST_DROP_CNT_TBL_ADDR;
+ reg += (mq_offset / PPE_MULTICAST_QUEUE_NUM) *
+ PPE_MULTICAST_QUEUE_PORT_ADDR_INC;
+ reg += (mq_offset % PPE_MULTICAST_QUEUE_NUM) *
+ PPE_MULTICAST_DROP_CNT_TBL_INC *
+ PPE_MULTICAST_DROP_TYPES;
+ }
+
+ ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD,
+ &drop_cnt, NULL);
+ if (ret) {
+ dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret);
+ return ret;
+ }
+ }
+
+ if (pkt_cnt > 0 || pend_cnt > 0 || drop_cnt > 0) {
+ if (!((++tag) % 4))
+ seq_printf(seq, "\n%-24s", "");
+
+ seq_printf(seq, "%10u/%u/%u(%s=%04d)",
+ pkt_cnt, pend_cnt, drop_cnt, "queue", i);
+ }
+ }
+
+ seq_putc(seq, '\n');
+
+ return 0;
+}
+
+/* Display the various packet counters of PPE. */
+static int ppe_packet_counter_show(struct seq_file *seq, void *v)
+{
+ struct ppe_debugfs_entry *entry = seq->private;
+ struct ppe_device *ppe_dev = entry->ppe;
+ int ret;
+
+ switch (entry->counter_type) {
+ case PPE_CNT_BM:
+ ret = ppe_bm_counter_get(ppe_dev, seq);
+ break;
+ case PPE_CNT_PARSE:
+ ret = ppe_parse_pkt_counter_get(ppe_dev, seq);
+ break;
+ case PPE_CNT_PORT_RX:
+ ret = ppe_port_rx_counter_get(ppe_dev, seq);
+ break;
+ case PPE_CNT_VLAN_RX:
+ ret = ppe_vlan_rx_counter_get(ppe_dev, seq);
+ break;
+ case PPE_CNT_L2_FWD:
+ ret = ppe_l2_counter_get(ppe_dev, seq);
+ break;
+ case PPE_CNT_CPU_CODE:
+ ret = ppe_cpu_code_counter_get(ppe_dev, seq);
+ break;
+ case PPE_CNT_VLAN_TX:
+ ret = ppe_vlan_tx_counter_get(ppe_dev, seq);
+ break;
+ case PPE_CNT_PORT_TX:
+ ret = ppe_port_tx_counter_get(ppe_dev, seq);
+ break;
+ case PPE_CNT_QM:
+ ret = ppe_queue_counter_get(ppe_dev, seq);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+/* Flush the various packet counters of PPE. */
+static ssize_t ppe_packet_counter_write(struct file *file,
+ const char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct ppe_debugfs_entry *entry = file_inode(file)->i_private;
+ struct ppe_device *ppe_dev = entry->ppe;
+ u32 reg;
+ int i;
+
+ switch (entry->counter_type) {
+ case PPE_CNT_BM:
+ for (i = 0; i < PPE_DROP_CNT_TBL_ENTRIES; i++) {
+ reg = PPE_DROP_CNT_TBL_ADDR + i * PPE_DROP_CNT_TBL_INC;
+ ppe_tbl_pkt_cnt_clear(ppe_dev, reg, PPE_PKT_CNT_SIZE_1WORD);
+ }
+
+ for (i = 0; i < PPE_DROP_STAT_TBL_ENTRIES; i++) {
+ reg = PPE_DROP_STAT_TBL_ADDR + PPE_DROP_STAT_TBL_INC * i;
+ ppe_tbl_pkt_cnt_clear(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD);
+ }
+
+ break;
+ case PPE_CNT_PARSE:
+ for (i = 0; i < PPE_IPR_PKT_CNT_TBL_ENTRIES; i++) {
+ reg = PPE_IPR_PKT_CNT_TBL_ADDR + i * PPE_IPR_PKT_CNT_TBL_INC;
+ ppe_tbl_pkt_cnt_clear(ppe_dev, reg, PPE_PKT_CNT_SIZE_1WORD);
+
+ reg = PPE_TPR_PKT_CNT_TBL_ADDR + i * PPE_TPR_PKT_CNT_TBL_INC;
+ ppe_tbl_pkt_cnt_clear(ppe_dev, reg, PPE_PKT_CNT_SIZE_1WORD);
+ }
+
+ break;
+ case PPE_CNT_PORT_RX:
+ for (i = 0; i < PPE_PORT_RX_CNT_TBL_ENTRIES; i++) {
+ reg = PPE_PORT_RX_CNT_TBL_ADDR + PPE_PORT_RX_CNT_TBL_INC * i;
+ ppe_tbl_pkt_cnt_clear(ppe_dev, reg, PPE_PKT_CNT_SIZE_5WORD);
+ }
+
+ for (i = 0; i < PPE_PHY_PORT_RX_CNT_TBL_ENTRIES; i++) {
+ reg = PPE_PHY_PORT_RX_CNT_TBL_ADDR + PPE_PHY_PORT_RX_CNT_TBL_INC * i;
+ ppe_tbl_pkt_cnt_clear(ppe_dev, reg, PPE_PKT_CNT_SIZE_5WORD);
+ }
+
+ break;
+ case PPE_CNT_VLAN_RX:
+ for (i = 0; i < PPE_VLAN_CNT_TBL_ENTRIES; i++) {
+ reg = PPE_VLAN_CNT_TBL_ADDR + PPE_VLAN_CNT_TBL_INC * i;
+ ppe_tbl_pkt_cnt_clear(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD);
+ }
+
+ break;
+ case PPE_CNT_L2_FWD:
+ for (i = 0; i < PPE_PRE_L2_CNT_TBL_ENTRIES; i++) {
+ reg = PPE_PRE_L2_CNT_TBL_ADDR + PPE_PRE_L2_CNT_TBL_INC * i;
+ ppe_tbl_pkt_cnt_clear(ppe_dev, reg, PPE_PKT_CNT_SIZE_5WORD);
+ }
+
+ break;
+ case PPE_CNT_CPU_CODE:
+ for (i = 0; i < PPE_DROP_CPU_CNT_TBL_ENTRIES; i++) {
+ reg = PPE_DROP_CPU_CNT_TBL_ADDR + PPE_DROP_CPU_CNT_TBL_INC * i;
+ ppe_tbl_pkt_cnt_clear(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD);
+ }
+
+ break;
+ case PPE_CNT_VLAN_TX:
+ for (i = 0; i < PPE_EG_VSI_COUNTER_TBL_ENTRIES; i++) {
+ reg = PPE_EG_VSI_COUNTER_TBL_ADDR + PPE_EG_VSI_COUNTER_TBL_INC * i;
+ ppe_tbl_pkt_cnt_clear(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD);
+ }
+
+ break;
+ case PPE_CNT_PORT_TX:
+ for (i = 0; i < PPE_PORT_TX_COUNTER_TBL_ENTRIES; i++) {
+ reg = PPE_PORT_TX_DROP_CNT_TBL_ADDR + PPE_PORT_TX_DROP_CNT_TBL_INC * i;
+ ppe_tbl_pkt_cnt_clear(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD);
+
+ reg = PPE_PORT_TX_COUNTER_TBL_ADDR + PPE_PORT_TX_COUNTER_TBL_INC * i;
+ ppe_tbl_pkt_cnt_clear(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD);
+ }
+
+ for (i = 0; i < PPE_VPORT_TX_COUNTER_TBL_ENTRIES; i++) {
+ reg = PPE_VPORT_TX_COUNTER_TBL_ADDR + PPE_VPORT_TX_COUNTER_TBL_INC * i;
+ ppe_tbl_pkt_cnt_clear(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD);
+
+ reg = PPE_VPORT_TX_DROP_CNT_TBL_ADDR + PPE_VPORT_TX_DROP_CNT_TBL_INC * i;
+ ppe_tbl_pkt_cnt_clear(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD);
+ }
+
+ break;
+ case PPE_CNT_QM:
+ for (i = 0; i < PPE_QUEUE_TX_COUNTER_TBL_ENTRIES; i++) {
+ reg = PPE_QUEUE_TX_COUNTER_TBL_ADDR + PPE_QUEUE_TX_COUNTER_TBL_INC * i;
+ ppe_tbl_pkt_cnt_clear(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD);
+ }
+
+ break;
+ default:
+ break;
+ }
+
+ return count;
+}
+DEFINE_SHOW_STORE_ATTRIBUTE(ppe_packet_counter);
+
+void ppe_debugfs_setup(struct ppe_device *ppe_dev)
+{
+ struct ppe_debugfs_entry *entry;
+ int i;
+
+ ppe_dev->debugfs_root = debugfs_create_dir("ppe", NULL);
+ if (IS_ERR(ppe_dev->debugfs_root))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(debugfs_files); i++) {
+ entry = devm_kzalloc(ppe_dev->dev, sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return;
+
+ entry->ppe = ppe_dev;
+ entry->counter_type = debugfs_files[i].counter_type;
+
+ debugfs_create_file(debugfs_files[i].name, 0444,
+ ppe_dev->debugfs_root, entry,
+ &ppe_packet_counter_fops);
+ }
+}
+
+void ppe_debugfs_teardown(struct ppe_device *ppe_dev)
+{
+ debugfs_remove_recursive(ppe_dev->debugfs_root);
+ ppe_dev->debugfs_root = NULL;
+}
diff --git a/drivers/net/ethernet/qualcomm/ppe/ppe_debugfs.h b/drivers/net/ethernet/qualcomm/ppe/ppe_debugfs.h
new file mode 100644
index 000000000000..81f49a709123
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/ppe/ppe_debugfs.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ *
+ * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+ */
+
+/* PPE debugfs counters setup. */
+
+#ifndef __PPE_DEBUGFS_H__
+#define __PPE_DEBUGFS_H__
+
+#include "ppe.h"
+
+void ppe_debugfs_setup(struct ppe_device *ppe_dev);
+void ppe_debugfs_teardown(struct ppe_device *ppe_dev);
+
+#endif
diff --git a/drivers/net/ethernet/qualcomm/ppe/ppe_regs.h b/drivers/net/ethernet/qualcomm/ppe/ppe_regs.h
new file mode 100644
index 000000000000..746dfbb5a682
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/ppe/ppe_regs.h
@@ -0,0 +1,591 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ *
+ * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+ */
+
+/* PPE hardware register and table declarations. */
+#ifndef __PPE_REGS_H__
+#define __PPE_REGS_H__
+
+#include <linux/bitfield.h>
+
+/* PPE scheduler configurations for buffer manager block. */
+#define PPE_BM_SCH_CTRL_ADDR 0xb000
+#define PPE_BM_SCH_CTRL_INC 4
+#define PPE_BM_SCH_CTRL_SCH_DEPTH GENMASK(7, 0)
+#define PPE_BM_SCH_CTRL_SCH_OFFSET GENMASK(14, 8)
+#define PPE_BM_SCH_CTRL_SCH_EN BIT(31)
+
+/* PPE drop counters. */
+#define PPE_DROP_CNT_TBL_ADDR 0xb024
+#define PPE_DROP_CNT_TBL_ENTRIES 8
+#define PPE_DROP_CNT_TBL_INC 4
+
+/* BM port drop counters. */
+#define PPE_DROP_STAT_TBL_ADDR 0xe000
+#define PPE_DROP_STAT_TBL_ENTRIES 30
+#define PPE_DROP_STAT_TBL_INC 0x10
+
+/* Egress VLAN counters. */
+#define PPE_EG_VSI_COUNTER_TBL_ADDR 0x41000
+#define PPE_EG_VSI_COUNTER_TBL_ENTRIES 64
+#define PPE_EG_VSI_COUNTER_TBL_INC 0x10
+
+/* Port TX counters. */
+#define PPE_PORT_TX_COUNTER_TBL_ADDR 0x45000
+#define PPE_PORT_TX_COUNTER_TBL_ENTRIES 8
+#define PPE_PORT_TX_COUNTER_TBL_INC 0x10
+
+/* Virtual port TX counters. */
+#define PPE_VPORT_TX_COUNTER_TBL_ADDR 0x47000
+#define PPE_VPORT_TX_COUNTER_TBL_ENTRIES 256
+#define PPE_VPORT_TX_COUNTER_TBL_INC 0x10
+
+/* Queue counters. */
+#define PPE_QUEUE_TX_COUNTER_TBL_ADDR 0x4a000
+#define PPE_QUEUE_TX_COUNTER_TBL_ENTRIES 300
+#define PPE_QUEUE_TX_COUNTER_TBL_INC 0x10
+
+/* RSS settings are to calculate the random RSS hash value generated during
+ * packet receive to ARM cores. This hash is then used to generate the queue
+ * offset used to determine the queue used to transmit the packet to ARM cores.
+ */
+#define PPE_RSS_HASH_MASK_ADDR 0xb4318
+#define PPE_RSS_HASH_MASK_HASH_MASK GENMASK(20, 0)
+#define PPE_RSS_HASH_MASK_FRAGMENT BIT(28)
+
+#define PPE_RSS_HASH_SEED_ADDR 0xb431c
+#define PPE_RSS_HASH_SEED_VAL GENMASK(31, 0)
+
+#define PPE_RSS_HASH_MIX_ADDR 0xb4320
+#define PPE_RSS_HASH_MIX_ENTRIES 11
+#define PPE_RSS_HASH_MIX_INC 4
+#define PPE_RSS_HASH_MIX_VAL GENMASK(4, 0)
+
+#define PPE_RSS_HASH_FIN_ADDR 0xb4350
+#define PPE_RSS_HASH_FIN_ENTRIES 5
+#define PPE_RSS_HASH_FIN_INC 4
+#define PPE_RSS_HASH_FIN_INNER GENMASK(4, 0)
+#define PPE_RSS_HASH_FIN_OUTER GENMASK(9, 5)
+
+#define PPE_RSS_HASH_MASK_IPV4_ADDR 0xb4380
+#define PPE_RSS_HASH_MASK_IPV4_HASH_MASK GENMASK(20, 0)
+#define PPE_RSS_HASH_MASK_IPV4_FRAGMENT BIT(28)
+
+#define PPE_RSS_HASH_SEED_IPV4_ADDR 0xb4384
+#define PPE_RSS_HASH_SEED_IPV4_VAL GENMASK(31, 0)
+
+#define PPE_RSS_HASH_MIX_IPV4_ADDR 0xb4390
+#define PPE_RSS_HASH_MIX_IPV4_ENTRIES 5
+#define PPE_RSS_HASH_MIX_IPV4_INC 4
+#define PPE_RSS_HASH_MIX_IPV4_VAL GENMASK(4, 0)
+
+#define PPE_RSS_HASH_FIN_IPV4_ADDR 0xb43b0
+#define PPE_RSS_HASH_FIN_IPV4_ENTRIES 5
+#define PPE_RSS_HASH_FIN_IPV4_INC 4
+#define PPE_RSS_HASH_FIN_IPV4_INNER GENMASK(4, 0)
+#define PPE_RSS_HASH_FIN_IPV4_OUTER GENMASK(9, 5)
+
+#define PPE_BM_SCH_CFG_TBL_ADDR 0xc000
+#define PPE_BM_SCH_CFG_TBL_ENTRIES 128
+#define PPE_BM_SCH_CFG_TBL_INC 0x10
+#define PPE_BM_SCH_CFG_TBL_PORT_NUM GENMASK(3, 0)
+#define PPE_BM_SCH_CFG_TBL_DIR BIT(4)
+#define PPE_BM_SCH_CFG_TBL_VALID BIT(5)
+#define PPE_BM_SCH_CFG_TBL_SECOND_PORT_VALID BIT(6)
+#define PPE_BM_SCH_CFG_TBL_SECOND_PORT GENMASK(11, 8)
+
+/* PPE service code configuration for the ingress direction functions,
+ * including bypass configuration for relevant PPE switch core functions
+ * such as flow entry lookup bypass.
+ */
+#define PPE_SERVICE_TBL_ADDR 0x15000
+#define PPE_SERVICE_TBL_ENTRIES 256
+#define PPE_SERVICE_TBL_INC 0x10
+#define PPE_SERVICE_W0_BYPASS_BITMAP GENMASK(31, 0)
+#define PPE_SERVICE_W1_RX_CNT_EN BIT(0)
+
+#define PPE_SERVICE_SET_BYPASS_BITMAP(tbl_cfg, value) \
+ FIELD_MODIFY(PPE_SERVICE_W0_BYPASS_BITMAP, tbl_cfg, value)
+#define PPE_SERVICE_SET_RX_CNT_EN(tbl_cfg, value) \
+ FIELD_MODIFY(PPE_SERVICE_W1_RX_CNT_EN, (tbl_cfg) + 0x1, value)
+
+/* PPE port egress VLAN configurations. */
+#define PPE_PORT_EG_VLAN_TBL_ADDR 0x20020
+#define PPE_PORT_EG_VLAN_TBL_ENTRIES 8
+#define PPE_PORT_EG_VLAN_TBL_INC 4
+#define PPE_PORT_EG_VLAN_TBL_VLAN_TYPE BIT(0)
+#define PPE_PORT_EG_VLAN_TBL_CTAG_MODE GENMASK(2, 1)
+#define PPE_PORT_EG_VLAN_TBL_STAG_MODE GENMASK(4, 3)
+#define PPE_PORT_EG_VLAN_TBL_VSI_TAG_MODE_EN BIT(5)
+#define PPE_PORT_EG_VLAN_TBL_PCP_PROP_CMD BIT(6)
+#define PPE_PORT_EG_VLAN_TBL_DEI_PROP_CMD BIT(7)
+#define PPE_PORT_EG_VLAN_TBL_TX_COUNTING_EN BIT(8)
+
+/* PPE queue counters enable/disable control. */
+#define PPE_EG_BRIDGE_CONFIG_ADDR 0x20044
+#define PPE_EG_BRIDGE_CONFIG_QUEUE_CNT_EN BIT(2)
+
+/* PPE service code configuration on the egress direction. */
+#define PPE_EG_SERVICE_TBL_ADDR 0x43000
+#define PPE_EG_SERVICE_TBL_ENTRIES 256
+#define PPE_EG_SERVICE_TBL_INC 0x10
+#define PPE_EG_SERVICE_W0_UPDATE_ACTION GENMASK(31, 0)
+#define PPE_EG_SERVICE_W1_NEXT_SERVCODE GENMASK(7, 0)
+#define PPE_EG_SERVICE_W1_HW_SERVICE GENMASK(13, 8)
+#define PPE_EG_SERVICE_W1_OFFSET_SEL BIT(14)
+#define PPE_EG_SERVICE_W1_TX_CNT_EN BIT(15)
+
+#define PPE_EG_SERVICE_SET_UPDATE_ACTION(tbl_cfg, value) \
+ FIELD_MODIFY(PPE_EG_SERVICE_W0_UPDATE_ACTION, tbl_cfg, value)
+#define PPE_EG_SERVICE_SET_NEXT_SERVCODE(tbl_cfg, value) \
+ FIELD_MODIFY(PPE_EG_SERVICE_W1_NEXT_SERVCODE, (tbl_cfg) + 0x1, value)
+#define PPE_EG_SERVICE_SET_HW_SERVICE(tbl_cfg, value) \
+ FIELD_MODIFY(PPE_EG_SERVICE_W1_HW_SERVICE, (tbl_cfg) + 0x1, value)
+#define PPE_EG_SERVICE_SET_OFFSET_SEL(tbl_cfg, value) \
+ FIELD_MODIFY(PPE_EG_SERVICE_W1_OFFSET_SEL, (tbl_cfg) + 0x1, value)
+#define PPE_EG_SERVICE_SET_TX_CNT_EN(tbl_cfg, value) \
+ FIELD_MODIFY(PPE_EG_SERVICE_W1_TX_CNT_EN, (tbl_cfg) + 0x1, value)
+
+/* PPE port bridge configuration */
+#define PPE_PORT_BRIDGE_CTRL_ADDR 0x60300
+#define PPE_PORT_BRIDGE_CTRL_ENTRIES 8
+#define PPE_PORT_BRIDGE_CTRL_INC 4
+#define PPE_PORT_BRIDGE_NEW_LRN_EN BIT(0)
+#define PPE_PORT_BRIDGE_STA_MOVE_LRN_EN BIT(3)
+#define PPE_PORT_BRIDGE_TXMAC_EN BIT(16)
+
+/* PPE port control configurations for the traffic to the multicast queues. */
+#define PPE_MC_MTU_CTRL_TBL_ADDR 0x60a00
+#define PPE_MC_MTU_CTRL_TBL_ENTRIES 8
+#define PPE_MC_MTU_CTRL_TBL_INC 4
+#define PPE_MC_MTU_CTRL_TBL_MTU GENMASK(13, 0)
+#define PPE_MC_MTU_CTRL_TBL_MTU_CMD GENMASK(15, 14)
+#define PPE_MC_MTU_CTRL_TBL_TX_CNT_EN BIT(16)
+
+/* PPE VSI configurations */
+#define PPE_VSI_TBL_ADDR 0x63800
+#define PPE_VSI_TBL_ENTRIES 64
+#define PPE_VSI_TBL_INC 0x10
+#define PPE_VSI_W0_MEMBER_PORT_BITMAP GENMASK(7, 0)
+#define PPE_VSI_W0_UUC_BITMAP GENMASK(15, 8)
+#define PPE_VSI_W0_UMC_BITMAP GENMASK(23, 16)
+#define PPE_VSI_W0_BC_BITMAP GENMASK(31, 24)
+#define PPE_VSI_W1_NEW_ADDR_LRN_EN BIT(0)
+#define PPE_VSI_W1_NEW_ADDR_FWD_CMD GENMASK(2, 1)
+#define PPE_VSI_W1_STATION_MOVE_LRN_EN BIT(3)
+#define PPE_VSI_W1_STATION_MOVE_FWD_CMD GENMASK(5, 4)
+
+#define PPE_VSI_SET_MEMBER_PORT_BITMAP(tbl_cfg, value) \
+ FIELD_MODIFY(PPE_VSI_W0_MEMBER_PORT_BITMAP, tbl_cfg, value)
+#define PPE_VSI_SET_UUC_BITMAP(tbl_cfg, value) \
+ FIELD_MODIFY(PPE_VSI_W0_UUC_BITMAP, tbl_cfg, value)
+#define PPE_VSI_SET_UMC_BITMAP(tbl_cfg, value) \
+ FIELD_MODIFY(PPE_VSI_W0_UMC_BITMAP, tbl_cfg, value)
+#define PPE_VSI_SET_BC_BITMAP(tbl_cfg, value) \
+ FIELD_MODIFY(PPE_VSI_W0_BC_BITMAP, tbl_cfg, value)
+#define PPE_VSI_SET_NEW_ADDR_LRN_EN(tbl_cfg, value) \
+ FIELD_MODIFY(PPE_VSI_W1_NEW_ADDR_LRN_EN, (tbl_cfg) + 0x1, value)
+#define PPE_VSI_SET_NEW_ADDR_FWD_CMD(tbl_cfg, value) \
+ FIELD_MODIFY(PPE_VSI_W1_NEW_ADDR_FWD_CMD, (tbl_cfg) + 0x1, value)
+#define PPE_VSI_SET_STATION_MOVE_LRN_EN(tbl_cfg, value) \
+ FIELD_MODIFY(PPE_VSI_W1_STATION_MOVE_LRN_EN, (tbl_cfg) + 0x1, value)
+#define PPE_VSI_SET_STATION_MOVE_FWD_CMD(tbl_cfg, value) \
+ FIELD_MODIFY(PPE_VSI_W1_STATION_MOVE_FWD_CMD, (tbl_cfg) + 0x1, value)
+
+/* PPE port control configurations for the traffic to the unicast queues. */
+#define PPE_MRU_MTU_CTRL_TBL_ADDR 0x65000
+#define PPE_MRU_MTU_CTRL_TBL_ENTRIES 256
+#define PPE_MRU_MTU_CTRL_TBL_INC 0x10
+#define PPE_MRU_MTU_CTRL_W0_MRU GENMASK(13, 0)
+#define PPE_MRU_MTU_CTRL_W0_MRU_CMD GENMASK(15, 14)
+#define PPE_MRU_MTU_CTRL_W0_MTU GENMASK(29, 16)
+#define PPE_MRU_MTU_CTRL_W0_MTU_CMD GENMASK(31, 30)
+#define PPE_MRU_MTU_CTRL_W1_RX_CNT_EN BIT(0)
+#define PPE_MRU_MTU_CTRL_W1_TX_CNT_EN BIT(1)
+#define PPE_MRU_MTU_CTRL_W1_SRC_PROFILE GENMASK(3, 2)
+#define PPE_MRU_MTU_CTRL_W1_INNER_PREC_LOW BIT(31)
+#define PPE_MRU_MTU_CTRL_W2_INNER_PREC_HIGH GENMASK(1, 0)
+
+#define PPE_MRU_MTU_CTRL_SET_MRU(tbl_cfg, value) \
+ FIELD_MODIFY(PPE_MRU_MTU_CTRL_W0_MRU, tbl_cfg, value)
+#define PPE_MRU_MTU_CTRL_SET_MRU_CMD(tbl_cfg, value) \
+ FIELD_MODIFY(PPE_MRU_MTU_CTRL_W0_MRU_CMD, tbl_cfg, value)
+#define PPE_MRU_MTU_CTRL_SET_MTU(tbl_cfg, value) \
+ FIELD_MODIFY(PPE_MRU_MTU_CTRL_W0_MTU, tbl_cfg, value)
+#define PPE_MRU_MTU_CTRL_SET_MTU_CMD(tbl_cfg, value) \
+ FIELD_MODIFY(PPE_MRU_MTU_CTRL_W0_MTU_CMD, tbl_cfg, value)
+#define PPE_MRU_MTU_CTRL_SET_RX_CNT_EN(tbl_cfg, value) \
+ FIELD_MODIFY(PPE_MRU_MTU_CTRL_W1_RX_CNT_EN, (tbl_cfg) + 0x1, value)
+#define PPE_MRU_MTU_CTRL_SET_TX_CNT_EN(tbl_cfg, value) \
+ FIELD_MODIFY(PPE_MRU_MTU_CTRL_W1_TX_CNT_EN, (tbl_cfg) + 0x1, value)
+
+/* PPE service code configuration for destination port and counter. */
+#define PPE_IN_L2_SERVICE_TBL_ADDR 0x66000
+#define PPE_IN_L2_SERVICE_TBL_ENTRIES 256
+#define PPE_IN_L2_SERVICE_TBL_INC 0x10
+#define PPE_IN_L2_SERVICE_TBL_DST_PORT_ID_VALID BIT(0)
+#define PPE_IN_L2_SERVICE_TBL_DST_PORT_ID GENMASK(4, 1)
+#define PPE_IN_L2_SERVICE_TBL_DST_DIRECTION BIT(5)
+#define PPE_IN_L2_SERVICE_TBL_DST_BYPASS_BITMAP GENMASK(29, 6)
+#define PPE_IN_L2_SERVICE_TBL_RX_CNT_EN BIT(30)
+#define PPE_IN_L2_SERVICE_TBL_TX_CNT_EN BIT(31)
+
+/* L2 Port configurations */
+#define PPE_L2_VP_PORT_TBL_ADDR 0x98000
+#define PPE_L2_VP_PORT_TBL_ENTRIES 256
+#define PPE_L2_VP_PORT_TBL_INC 0x10
+#define PPE_L2_VP_PORT_W0_INVALID_VSI_FWD_EN BIT(0)
+#define PPE_L2_VP_PORT_W0_DST_INFO GENMASK(9, 2)
+
+#define PPE_L2_PORT_SET_INVALID_VSI_FWD_EN(tbl_cfg, value) \
+ FIELD_MODIFY(PPE_L2_VP_PORT_W0_INVALID_VSI_FWD_EN, tbl_cfg, value)
+#define PPE_L2_PORT_SET_DST_INFO(tbl_cfg, value) \
+ FIELD_MODIFY(PPE_L2_VP_PORT_W0_DST_INFO, tbl_cfg, value)
+
+/* Port RX and RX drop counters. */
+#define PPE_PORT_RX_CNT_TBL_ADDR 0x150000
+#define PPE_PORT_RX_CNT_TBL_ENTRIES 256
+#define PPE_PORT_RX_CNT_TBL_INC 0x20
+
+/* Physical port RX and RX drop counters. */
+#define PPE_PHY_PORT_RX_CNT_TBL_ADDR 0x156000
+#define PPE_PHY_PORT_RX_CNT_TBL_ENTRIES 8
+#define PPE_PHY_PORT_RX_CNT_TBL_INC 0x20
+
+/* Counters for the packet to CPU port. */
+#define PPE_DROP_CPU_CNT_TBL_ADDR 0x160000
+#define PPE_DROP_CPU_CNT_TBL_ENTRIES 1280
+#define PPE_DROP_CPU_CNT_TBL_INC 0x10
+
+/* VLAN counters. */
+#define PPE_VLAN_CNT_TBL_ADDR 0x178000
+#define PPE_VLAN_CNT_TBL_ENTRIES 64
+#define PPE_VLAN_CNT_TBL_INC 0x10
+
+/* PPE L2 counters. */
+#define PPE_PRE_L2_CNT_TBL_ADDR 0x17c000
+#define PPE_PRE_L2_CNT_TBL_ENTRIES 64
+#define PPE_PRE_L2_CNT_TBL_INC 0x20
+
+/* Port TX drop counters. */
+#define PPE_PORT_TX_DROP_CNT_TBL_ADDR 0x17d000
+#define PPE_PORT_TX_DROP_CNT_TBL_ENTRIES 8
+#define PPE_PORT_TX_DROP_CNT_TBL_INC 0x10
+
+/* Virtual port TX counters. */
+#define PPE_VPORT_TX_DROP_CNT_TBL_ADDR 0x17e000
+#define PPE_VPORT_TX_DROP_CNT_TBL_ENTRIES 256
+#define PPE_VPORT_TX_DROP_CNT_TBL_INC 0x10
+
+/* Counters for the tunnel packet. */
+#define PPE_TPR_PKT_CNT_TBL_ADDR 0x1d0080
+#define PPE_TPR_PKT_CNT_TBL_ENTRIES 8
+#define PPE_TPR_PKT_CNT_TBL_INC 4
+
+/* Counters for the all packet received. */
+#define PPE_IPR_PKT_CNT_TBL_ADDR 0x1e0080
+#define PPE_IPR_PKT_CNT_TBL_ENTRIES 8
+#define PPE_IPR_PKT_CNT_TBL_INC 4
+
+/* PPE service code configuration for the tunnel packet. */
+#define PPE_TL_SERVICE_TBL_ADDR 0x306000
+#define PPE_TL_SERVICE_TBL_ENTRIES 256
+#define PPE_TL_SERVICE_TBL_INC 4
+#define PPE_TL_SERVICE_TBL_BYPASS_BITMAP GENMASK(31, 0)
+
+/* Port scheduler global config. */
+#define PPE_PSCH_SCH_DEPTH_CFG_ADDR 0x400000
+#define PPE_PSCH_SCH_DEPTH_CFG_INC 4
+#define PPE_PSCH_SCH_DEPTH_CFG_SCH_DEPTH GENMASK(7, 0)
+
+/* PPE queue level scheduler configurations. */
+#define PPE_L0_FLOW_MAP_TBL_ADDR 0x402000
+#define PPE_L0_FLOW_MAP_TBL_ENTRIES 300
+#define PPE_L0_FLOW_MAP_TBL_INC 0x10
+#define PPE_L0_FLOW_MAP_TBL_FLOW_ID GENMASK(5, 0)
+#define PPE_L0_FLOW_MAP_TBL_C_PRI GENMASK(8, 6)
+#define PPE_L0_FLOW_MAP_TBL_E_PRI GENMASK(11, 9)
+#define PPE_L0_FLOW_MAP_TBL_C_NODE_WT GENMASK(21, 12)
+#define PPE_L0_FLOW_MAP_TBL_E_NODE_WT GENMASK(31, 22)
+
+#define PPE_L0_C_FLOW_CFG_TBL_ADDR 0x404000
+#define PPE_L0_C_FLOW_CFG_TBL_ENTRIES 512
+#define PPE_L0_C_FLOW_CFG_TBL_INC 0x10
+#define PPE_L0_C_FLOW_CFG_TBL_NODE_ID GENMASK(7, 0)
+#define PPE_L0_C_FLOW_CFG_TBL_NODE_CREDIT_UNIT BIT(8)
+
+#define PPE_L0_E_FLOW_CFG_TBL_ADDR 0x406000
+#define PPE_L0_E_FLOW_CFG_TBL_ENTRIES 512
+#define PPE_L0_E_FLOW_CFG_TBL_INC 0x10
+#define PPE_L0_E_FLOW_CFG_TBL_NODE_ID GENMASK(7, 0)
+#define PPE_L0_E_FLOW_CFG_TBL_NODE_CREDIT_UNIT BIT(8)
+
+#define PPE_L0_FLOW_PORT_MAP_TBL_ADDR 0x408000
+#define PPE_L0_FLOW_PORT_MAP_TBL_ENTRIES 300
+#define PPE_L0_FLOW_PORT_MAP_TBL_INC 0x10
+#define PPE_L0_FLOW_PORT_MAP_TBL_PORT_NUM GENMASK(3, 0)
+
+#define PPE_L0_COMP_CFG_TBL_ADDR 0x428000
+#define PPE_L0_COMP_CFG_TBL_ENTRIES 300
+#define PPE_L0_COMP_CFG_TBL_INC 0x10
+#define PPE_L0_COMP_CFG_TBL_SHAPER_METER_LEN GENMASK(1, 0)
+#define PPE_L0_COMP_CFG_TBL_NODE_METER_LEN GENMASK(3, 2)
+
+/* PPE queue to Ethernet DMA ring mapping table. */
+#define PPE_RING_Q_MAP_TBL_ADDR 0x42a000
+#define PPE_RING_Q_MAP_TBL_ENTRIES 24
+#define PPE_RING_Q_MAP_TBL_INC 0x40
+
+/* Table addresses for per-queue dequeue setting. */
+#define PPE_DEQ_OPR_TBL_ADDR 0x430000
+#define PPE_DEQ_OPR_TBL_ENTRIES 300
+#define PPE_DEQ_OPR_TBL_INC 0x10
+#define PPE_DEQ_OPR_TBL_DEQ_DISABLE BIT(0)
+
+/* PPE flow level scheduler configurations. */
+#define PPE_L1_FLOW_MAP_TBL_ADDR 0x440000
+#define PPE_L1_FLOW_MAP_TBL_ENTRIES 64
+#define PPE_L1_FLOW_MAP_TBL_INC 0x10
+#define PPE_L1_FLOW_MAP_TBL_FLOW_ID GENMASK(3, 0)
+#define PPE_L1_FLOW_MAP_TBL_C_PRI GENMASK(6, 4)
+#define PPE_L1_FLOW_MAP_TBL_E_PRI GENMASK(9, 7)
+#define PPE_L1_FLOW_MAP_TBL_C_NODE_WT GENMASK(19, 10)
+#define PPE_L1_FLOW_MAP_TBL_E_NODE_WT GENMASK(29, 20)
+
+#define PPE_L1_C_FLOW_CFG_TBL_ADDR 0x442000
+#define PPE_L1_C_FLOW_CFG_TBL_ENTRIES 64
+#define PPE_L1_C_FLOW_CFG_TBL_INC 0x10
+#define PPE_L1_C_FLOW_CFG_TBL_NODE_ID GENMASK(5, 0)
+#define PPE_L1_C_FLOW_CFG_TBL_NODE_CREDIT_UNIT BIT(6)
+
+#define PPE_L1_E_FLOW_CFG_TBL_ADDR 0x444000
+#define PPE_L1_E_FLOW_CFG_TBL_ENTRIES 64
+#define PPE_L1_E_FLOW_CFG_TBL_INC 0x10
+#define PPE_L1_E_FLOW_CFG_TBL_NODE_ID GENMASK(5, 0)
+#define PPE_L1_E_FLOW_CFG_TBL_NODE_CREDIT_UNIT BIT(6)
+
+#define PPE_L1_FLOW_PORT_MAP_TBL_ADDR 0x446000
+#define PPE_L1_FLOW_PORT_MAP_TBL_ENTRIES 64
+#define PPE_L1_FLOW_PORT_MAP_TBL_INC 0x10
+#define PPE_L1_FLOW_PORT_MAP_TBL_PORT_NUM GENMASK(3, 0)
+
+#define PPE_L1_COMP_CFG_TBL_ADDR 0x46a000
+#define PPE_L1_COMP_CFG_TBL_ENTRIES 64
+#define PPE_L1_COMP_CFG_TBL_INC 0x10
+#define PPE_L1_COMP_CFG_TBL_SHAPER_METER_LEN GENMASK(1, 0)
+#define PPE_L1_COMP_CFG_TBL_NODE_METER_LEN GENMASK(3, 2)
+
+/* PPE port scheduler configurations for egress. */
+#define PPE_PSCH_SCH_CFG_TBL_ADDR 0x47a000
+#define PPE_PSCH_SCH_CFG_TBL_ENTRIES 128
+#define PPE_PSCH_SCH_CFG_TBL_INC 0x10
+#define PPE_PSCH_SCH_CFG_TBL_DES_PORT GENMASK(3, 0)
+#define PPE_PSCH_SCH_CFG_TBL_ENS_PORT GENMASK(7, 4)
+#define PPE_PSCH_SCH_CFG_TBL_ENS_PORT_BITMAP GENMASK(15, 8)
+#define PPE_PSCH_SCH_CFG_TBL_DES_SECOND_PORT_EN BIT(16)
+#define PPE_PSCH_SCH_CFG_TBL_DES_SECOND_PORT GENMASK(20, 17)
+
+/* There are 15 BM ports and 4 BM groups supported by PPE.
+ * BM port (0-7) is for EDMA port 0, BM port (8-13) is for
+ * PPE physical port 1-6 and BM port 14 is for EIP port.
+ */
+#define PPE_BM_PORT_FC_MODE_ADDR 0x600100
+#define PPE_BM_PORT_FC_MODE_ENTRIES 15
+#define PPE_BM_PORT_FC_MODE_INC 0x4
+#define PPE_BM_PORT_FC_MODE_EN BIT(0)
+
+#define PPE_BM_PORT_GROUP_ID_ADDR 0x600180
+#define PPE_BM_PORT_GROUP_ID_ENTRIES 15
+#define PPE_BM_PORT_GROUP_ID_INC 0x4
+#define PPE_BM_PORT_GROUP_ID_SHARED_GROUP_ID GENMASK(1, 0)
+
+/* Counters for PPE buffers used for packets cached. */
+#define PPE_BM_USED_CNT_TBL_ADDR 0x6001c0
+#define PPE_BM_USED_CNT_TBL_ENTRIES 15
+#define PPE_BM_USED_CNT_TBL_INC 0x4
+#define PPE_BM_USED_CNT_VAL GENMASK(10, 0)
+
+/* Counters for PPE buffers used for packets received after pause frame sent. */
+#define PPE_BM_REACT_CNT_TBL_ADDR 0x600240
+#define PPE_BM_REACT_CNT_TBL_ENTRIES 15
+#define PPE_BM_REACT_CNT_TBL_INC 0x4
+#define PPE_BM_REACT_CNT_VAL GENMASK(8, 0)
+
+#define PPE_BM_SHARED_GROUP_CFG_ADDR 0x600290
+#define PPE_BM_SHARED_GROUP_CFG_ENTRIES 4
+#define PPE_BM_SHARED_GROUP_CFG_INC 0x4
+#define PPE_BM_SHARED_GROUP_CFG_SHARED_LIMIT GENMASK(10, 0)
+
+#define PPE_BM_PORT_FC_CFG_TBL_ADDR 0x601000
+#define PPE_BM_PORT_FC_CFG_TBL_ENTRIES 15
+#define PPE_BM_PORT_FC_CFG_TBL_INC 0x10
+#define PPE_BM_PORT_FC_W0_REACT_LIMIT GENMASK(8, 0)
+#define PPE_BM_PORT_FC_W0_RESUME_THRESHOLD GENMASK(17, 9)
+#define PPE_BM_PORT_FC_W0_RESUME_OFFSET GENMASK(28, 18)
+#define PPE_BM_PORT_FC_W0_CEILING_LOW GENMASK(31, 29)
+#define PPE_BM_PORT_FC_W1_CEILING_HIGH GENMASK(7, 0)
+#define PPE_BM_PORT_FC_W1_WEIGHT GENMASK(10, 8)
+#define PPE_BM_PORT_FC_W1_DYNAMIC BIT(11)
+#define PPE_BM_PORT_FC_W1_PRE_ALLOC GENMASK(22, 12)
+
+#define PPE_BM_PORT_FC_SET_REACT_LIMIT(tbl_cfg, value) \
+ FIELD_MODIFY(PPE_BM_PORT_FC_W0_REACT_LIMIT, tbl_cfg, value)
+#define PPE_BM_PORT_FC_SET_RESUME_THRESHOLD(tbl_cfg, value) \
+ FIELD_MODIFY(PPE_BM_PORT_FC_W0_RESUME_THRESHOLD, tbl_cfg, value)
+#define PPE_BM_PORT_FC_SET_RESUME_OFFSET(tbl_cfg, value) \
+ FIELD_MODIFY(PPE_BM_PORT_FC_W0_RESUME_OFFSET, tbl_cfg, value)
+#define PPE_BM_PORT_FC_SET_CEILING_LOW(tbl_cfg, value) \
+ FIELD_MODIFY(PPE_BM_PORT_FC_W0_CEILING_LOW, tbl_cfg, value)
+#define PPE_BM_PORT_FC_SET_CEILING_HIGH(tbl_cfg, value) \
+ FIELD_MODIFY(PPE_BM_PORT_FC_W1_CEILING_HIGH, (tbl_cfg) + 0x1, value)
+#define PPE_BM_PORT_FC_SET_WEIGHT(tbl_cfg, value) \
+ FIELD_MODIFY(PPE_BM_PORT_FC_W1_WEIGHT, (tbl_cfg) + 0x1, value)
+#define PPE_BM_PORT_FC_SET_DYNAMIC(tbl_cfg, value) \
+ FIELD_MODIFY(PPE_BM_PORT_FC_W1_DYNAMIC, (tbl_cfg) + 0x1, value)
+#define PPE_BM_PORT_FC_SET_PRE_ALLOC(tbl_cfg, value) \
+ FIELD_MODIFY(PPE_BM_PORT_FC_W1_PRE_ALLOC, (tbl_cfg) + 0x1, value)
+
+/* The queue base configurations based on destination port,
+ * service code or CPU code.
+ */
+#define PPE_UCAST_QUEUE_MAP_TBL_ADDR 0x810000
+#define PPE_UCAST_QUEUE_MAP_TBL_ENTRIES 3072
+#define PPE_UCAST_QUEUE_MAP_TBL_INC 0x10
+#define PPE_UCAST_QUEUE_MAP_TBL_PROFILE_ID GENMASK(3, 0)
+#define PPE_UCAST_QUEUE_MAP_TBL_QUEUE_ID GENMASK(11, 4)
+
+/* The queue offset configurations based on RSS hash value. */
+#define PPE_UCAST_HASH_MAP_TBL_ADDR 0x830000
+#define PPE_UCAST_HASH_MAP_TBL_ENTRIES 4096
+#define PPE_UCAST_HASH_MAP_TBL_INC 0x10
+#define PPE_UCAST_HASH_MAP_TBL_HASH GENMASK(7, 0)
+
+/* The queue offset configurations based on PPE internal priority. */
+#define PPE_UCAST_PRIORITY_MAP_TBL_ADDR 0x842000
+#define PPE_UCAST_PRIORITY_MAP_TBL_ENTRIES 256
+#define PPE_UCAST_PRIORITY_MAP_TBL_INC 0x10
+#define PPE_UCAST_PRIORITY_MAP_TBL_CLASS GENMASK(3, 0)
+
+/* PPE unicast queue (0-255) configurations. */
+#define PPE_AC_UNICAST_QUEUE_CFG_TBL_ADDR 0x848000
+#define PPE_AC_UNICAST_QUEUE_CFG_TBL_ENTRIES 256
+#define PPE_AC_UNICAST_QUEUE_CFG_TBL_INC 0x10
+#define PPE_AC_UNICAST_QUEUE_CFG_W0_EN BIT(0)
+#define PPE_AC_UNICAST_QUEUE_CFG_W0_WRED_EN BIT(1)
+#define PPE_AC_UNICAST_QUEUE_CFG_W0_FC_EN BIT(2)
+#define PPE_AC_UNICAST_QUEUE_CFG_W0_CLR_AWARE BIT(3)
+#define PPE_AC_UNICAST_QUEUE_CFG_W0_GRP_ID GENMASK(5, 4)
+#define PPE_AC_UNICAST_QUEUE_CFG_W0_PRE_LIMIT GENMASK(16, 6)
+#define PPE_AC_UNICAST_QUEUE_CFG_W0_DYNAMIC BIT(17)
+#define PPE_AC_UNICAST_QUEUE_CFG_W0_WEIGHT GENMASK(20, 18)
+#define PPE_AC_UNICAST_QUEUE_CFG_W0_THRESHOLD GENMASK(31, 21)
+#define PPE_AC_UNICAST_QUEUE_CFG_W3_GRN_RESUME GENMASK(23, 13)
+
+#define PPE_AC_UNICAST_QUEUE_SET_EN(tbl_cfg, value) \
+ FIELD_MODIFY(PPE_AC_UNICAST_QUEUE_CFG_W0_EN, tbl_cfg, value)
+#define PPE_AC_UNICAST_QUEUE_SET_GRP_ID(tbl_cfg, value) \
+ FIELD_MODIFY(PPE_AC_UNICAST_QUEUE_CFG_W0_GRP_ID, tbl_cfg, value)
+#define PPE_AC_UNICAST_QUEUE_SET_PRE_LIMIT(tbl_cfg, value) \
+ FIELD_MODIFY(PPE_AC_UNICAST_QUEUE_CFG_W0_PRE_LIMIT, tbl_cfg, value)
+#define PPE_AC_UNICAST_QUEUE_SET_DYNAMIC(tbl_cfg, value) \
+ FIELD_MODIFY(PPE_AC_UNICAST_QUEUE_CFG_W0_DYNAMIC, tbl_cfg, value)
+#define PPE_AC_UNICAST_QUEUE_SET_WEIGHT(tbl_cfg, value) \
+ FIELD_MODIFY(PPE_AC_UNICAST_QUEUE_CFG_W0_WEIGHT, tbl_cfg, value)
+#define PPE_AC_UNICAST_QUEUE_SET_THRESHOLD(tbl_cfg, value) \
+ FIELD_MODIFY(PPE_AC_UNICAST_QUEUE_CFG_W0_THRESHOLD, tbl_cfg, value)
+#define PPE_AC_UNICAST_QUEUE_SET_GRN_RESUME(tbl_cfg, value) \
+ FIELD_MODIFY(PPE_AC_UNICAST_QUEUE_CFG_W3_GRN_RESUME, (tbl_cfg) + 0x3, value)
+
+/* PPE multicast queue (256-299) configurations. */
+#define PPE_AC_MULTICAST_QUEUE_CFG_TBL_ADDR 0x84a000
+#define PPE_AC_MULTICAST_QUEUE_CFG_TBL_ENTRIES 44
+#define PPE_AC_MULTICAST_QUEUE_CFG_TBL_INC 0x10
+#define PPE_AC_MULTICAST_QUEUE_CFG_W0_EN BIT(0)
+#define PPE_AC_MULTICAST_QUEUE_CFG_W0_FC_EN BIT(1)
+#define PPE_AC_MULTICAST_QUEUE_CFG_W0_CLR_AWARE BIT(2)
+#define PPE_AC_MULTICAST_QUEUE_CFG_W0_GRP_ID GENMASK(4, 3)
+#define PPE_AC_MULTICAST_QUEUE_CFG_W0_PRE_LIMIT GENMASK(15, 5)
+#define PPE_AC_MULTICAST_QUEUE_CFG_W0_THRESHOLD GENMASK(26, 16)
+#define PPE_AC_MULTICAST_QUEUE_CFG_W2_RESUME GENMASK(17, 7)
+
+#define PPE_AC_MULTICAST_QUEUE_SET_EN(tbl_cfg, value) \
+ FIELD_MODIFY(PPE_AC_MULTICAST_QUEUE_CFG_W0_EN, tbl_cfg, value)
+#define PPE_AC_MULTICAST_QUEUE_SET_GRN_GRP_ID(tbl_cfg, value) \
+ FIELD_MODIFY(PPE_AC_MULTICAST_QUEUE_CFG_W0_GRP_ID, tbl_cfg, value)
+#define PPE_AC_MULTICAST_QUEUE_SET_GRN_PRE_LIMIT(tbl_cfg, value) \
+ FIELD_MODIFY(PPE_AC_MULTICAST_QUEUE_CFG_W0_PRE_LIMIT, tbl_cfg, value)
+#define PPE_AC_MULTICAST_QUEUE_SET_GRN_THRESHOLD(tbl_cfg, value) \
+ FIELD_MODIFY(PPE_AC_MULTICAST_QUEUE_CFG_W0_THRESHOLD, tbl_cfg, value)
+#define PPE_AC_MULTICAST_QUEUE_SET_GRN_RESUME(tbl_cfg, value) \
+ FIELD_MODIFY(PPE_AC_MULTICAST_QUEUE_CFG_W2_RESUME, (tbl_cfg) + 0x2, value)
+
+/* PPE admission control group (0-3) configurations */
+#define PPE_AC_GRP_CFG_TBL_ADDR 0x84c000
+#define PPE_AC_GRP_CFG_TBL_ENTRIES 0x4
+#define PPE_AC_GRP_CFG_TBL_INC 0x10
+#define PPE_AC_GRP_W0_AC_EN BIT(0)
+#define PPE_AC_GRP_W0_AC_FC_EN BIT(1)
+#define PPE_AC_GRP_W0_CLR_AWARE BIT(2)
+#define PPE_AC_GRP_W0_THRESHOLD_LOW GENMASK(31, 25)
+#define PPE_AC_GRP_W1_THRESHOLD_HIGH GENMASK(3, 0)
+#define PPE_AC_GRP_W1_BUF_LIMIT GENMASK(14, 4)
+#define PPE_AC_GRP_W2_RESUME_GRN GENMASK(15, 5)
+#define PPE_AC_GRP_W2_PRE_ALLOC GENMASK(26, 16)
+
+#define PPE_AC_GRP_SET_BUF_LIMIT(tbl_cfg, value) \
+ FIELD_MODIFY(PPE_AC_GRP_W1_BUF_LIMIT, (tbl_cfg) + 0x1, value)
+
+/* Counters for packets handled by unicast queues (0-255). */
+#define PPE_AC_UNICAST_QUEUE_CNT_TBL_ADDR 0x84e000
+#define PPE_AC_UNICAST_QUEUE_CNT_TBL_ENTRIES 256
+#define PPE_AC_UNICAST_QUEUE_CNT_TBL_INC 0x10
+#define PPE_AC_UNICAST_QUEUE_CNT_TBL_PEND_CNT GENMASK(12, 0)
+
+/* Counters for packets handled by multicast queues (256-299). */
+#define PPE_AC_MULTICAST_QUEUE_CNT_TBL_ADDR 0x852000
+#define PPE_AC_MULTICAST_QUEUE_CNT_TBL_ENTRIES 44
+#define PPE_AC_MULTICAST_QUEUE_CNT_TBL_INC 0x10
+#define PPE_AC_MULTICAST_QUEUE_CNT_TBL_PEND_CNT GENMASK(12, 0)
+
+/* Table addresses for per-queue enqueue setting. */
+#define PPE_ENQ_OPR_TBL_ADDR 0x85c000
+#define PPE_ENQ_OPR_TBL_ENTRIES 300
+#define PPE_ENQ_OPR_TBL_INC 0x10
+#define PPE_ENQ_OPR_TBL_ENQ_DISABLE BIT(0)
+
+/* Unicast drop count includes the possible drops with WRED for the green,
+ * yellow and red categories.
+ */
+#define PPE_UNICAST_DROP_CNT_TBL_ADDR 0x9e0000
+#define PPE_UNICAST_DROP_CNT_TBL_ENTRIES 1536
+#define PPE_UNICAST_DROP_CNT_TBL_INC 0x10
+#define PPE_UNICAST_DROP_TYPES 6
+#define PPE_UNICAST_DROP_FORCE_OFFSET 3
+
+/* There are 16 multicast queues dedicated to CPU port 0. Multicast drop
+ * count includes the force drop for green, yellow and red category packets.
+ */
+#define PPE_P0_MULTICAST_DROP_CNT_TBL_ADDR 0x9f0000
+#define PPE_P0_MULTICAST_DROP_CNT_TBL_ENTRIES 48
+#define PPE_P0_MULTICAST_DROP_CNT_TBL_INC 0x10
+#define PPE_P0_MULTICAST_QUEUE_NUM 16
+
+/* Each PPE physical port has four dedicated multicast queues, providing
+ * a total of 12 entries per port. The multicast drop count includes forced
+ * drops for green, yellow, and red category packets.
+ */
+#define PPE_MULTICAST_QUEUE_PORT_ADDR_INC 0x1000
+#define PPE_MULTICAST_DROP_CNT_TBL_INC 0x10
+#define PPE_MULTICAST_DROP_TYPES 3
+#define PPE_MULTICAST_QUEUE_NUM 4
+#define PPE_MULTICAST_DROP_CNT_TBL_ENTRIES 12
+
+#define PPE_CPU_PORT_MULTICAST_FORCE_DROP_CNT_TBL_ADDR(mq_offset) \
+ (PPE_P0_MULTICAST_DROP_CNT_TBL_ADDR + \
+ (mq_offset) * PPE_P0_MULTICAST_DROP_CNT_TBL_INC * \
+ PPE_MULTICAST_DROP_TYPES)
+
+#define PPE_P1_MULTICAST_DROP_CNT_TBL_ADDR \
+ (PPE_P0_MULTICAST_DROP_CNT_TBL_ADDR + PPE_MULTICAST_QUEUE_PORT_ADDR_INC)
+#endif
diff --git a/drivers/net/ethernet/renesas/Makefile b/drivers/net/ethernet/renesas/Makefile
index f65fc76f8b4d..d63e0c61bb68 100644
--- a/drivers/net/ethernet/renesas/Makefile
+++ b/drivers/net/ethernet/renesas/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_SH_ETH) += sh_eth.o
ravb-objs := ravb_main.o ravb_ptp.o
obj-$(CONFIG_RAVB) += ravb.o
+rswitch-objs := rswitch_main.o rswitch_l2.o
obj-$(CONFIG_RENESAS_ETHER_SWITCH) += rswitch.o
obj-$(CONFIG_RENESAS_GEN4_PTP) += rcar_gen4_ptp.o
diff --git a/drivers/net/ethernet/renesas/rswitch.h b/drivers/net/ethernet/renesas/rswitch.h
index 532192cbca4b..a1d4a877e5bd 100644
--- a/drivers/net/ethernet/renesas/rswitch.h
+++ b/drivers/net/ethernet/renesas/rswitch.h
@@ -1,19 +1,25 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* Renesas Ethernet Switch device driver
*
- * Copyright (C) 2022 Renesas Electronics Corporation
+ * Copyright (C) 2022-2025 Renesas Electronics Corporation
*/
#ifndef __RSWITCH_H__
#define __RSWITCH_H__
#include <linux/platform_device.h>
+#include <linux/phy.h>
+
#include "rcar_gen4_ptp.h"
#define RSWITCH_MAX_NUM_QUEUES 128
#define RSWITCH_NUM_AGENTS 5
#define RSWITCH_NUM_PORTS 3
+
+#define rswitch_for_all_ports(_priv, _rdev) \
+ list_for_each_entry(_rdev, &_priv->port_list, list)
+
#define rswitch_for_each_enabled_port(priv, i) \
for (i = 0; i < RSWITCH_NUM_PORTS; i++) \
if (priv->rdev[i]->disabled) \
@@ -809,7 +815,8 @@ enum rswitch_gwca_mode {
#define FWPC0_IP4EA BIT(10)
#define FWPC0_IPDSA BIT(12)
#define FWPC0_IPHLA BIT(18)
-#define FWPC0_MACSDA BIT(20)
+#define FWPC0_MACDSA BIT(20)
+#define FWPC0_MACSSA BIT(23)
#define FWPC0_MACHLA BIT(26)
#define FWPC0_MACHMA BIT(27)
#define FWPC0_VLANSA BIT(28)
@@ -820,12 +827,30 @@ enum rswitch_gwca_mode {
#define FWPC2(i) (FWPC20 + (i) * 0x10)
#define FWCP2_LTWFW GENMASK(16 + (RSWITCH_NUM_AGENTS - 1), 16)
+#define FWCP2_LTWFW_MASK GENMASK(16 + (RSWITCH_NUM_AGENTS - 1), 16)
#define FWPBFC(i) (FWPBFC0 + (i) * 0x10)
#define FWPBFC_PBDV GENMASK(RSWITCH_NUM_AGENTS - 1, 0)
#define FWPBFCSDC(j, i) (FWPBFCSDC00 + (i) * 0x10 + (j) * 0x04)
+#define FWMACHEC_MACHMUE_MASK GENMASK(26, 16)
+
+#define FWMACTIM_MACTIOG BIT(0)
+#define FWMACTIM_MACTR BIT(1)
+
+#define FWMACAGUSPC_MACAGUSP GENMASK(9, 0)
+#define FWMACAGC_MACAGT GENMASK(15, 0)
+#define FWMACAGC_MACAGE BIT(16)
+#define FWMACAGC_MACAGSL BIT(17)
+#define FWMACAGC_MACAGPM BIT(18)
+#define FWMACAGC_MACDES BIT(24)
+#define FWMACAGC_MACAGOG BIT(28)
+#define FWMACAGC_MACDESOG BIT(29)
+
+#define RSW_AGEING_CLK_PER_US 0x140
+#define RSW_AGEING_TIME 300
+
/* TOP */
#define TPEMIMC7(queue) (TPEMIMC70 + (queue) * 4)
@@ -994,10 +1019,18 @@ struct rswitch_device {
DECLARE_BITMAP(ts_skb_used, TS_TAGS_PER_PORT);
bool disabled;
+ struct list_head list;
+
int port;
struct rswitch_etha *etha;
struct device_node *np_port;
struct phy *serdes;
+
+ struct net_device *brdev; /* master bridge device */
+ unsigned int learning_requested : 1;
+ unsigned int learning_offloaded : 1;
+ unsigned int forwarding_requested : 1;
+ unsigned int forwarding_offloaded : 1;
};
struct rswitch_mfwd_mac_table_entry {
@@ -1022,11 +1055,17 @@ struct rswitch_private {
struct rswitch_etha etha[RSWITCH_NUM_PORTS];
struct rswitch_mfwd mfwd;
+ struct list_head port_list;
+
spinlock_t lock; /* lock interrupt registers' control */
struct clk *clk;
bool etha_no_runtime_change;
bool gwca_halt;
+ struct net_device *offload_brdev;
};
+bool is_rdev(const struct net_device *ndev);
+void rswitch_modify(void __iomem *addr, enum rswitch_reg reg, u32 clear, u32 set);
+
#endif /* #ifndef __RSWITCH_H__ */
diff --git a/drivers/net/ethernet/renesas/rswitch_l2.c b/drivers/net/ethernet/renesas/rswitch_l2.c
new file mode 100644
index 000000000000..4a69ec77d69c
--- /dev/null
+++ b/drivers/net/ethernet/renesas/rswitch_l2.c
@@ -0,0 +1,316 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Renesas Ethernet Switch device driver
+ *
+ * Copyright (C) 2025 Renesas Electronics Corporation
+ */
+
+#include <linux/err.h>
+#include <linux/etherdevice.h>
+#include <linux/if_bridge.h>
+#include <linux/kernel.h>
+#include <net/switchdev.h>
+
+#include "rswitch.h"
+#include "rswitch_l2.h"
+
+static bool rdev_for_l2_offload(struct rswitch_device *rdev)
+{
+ return rdev->priv->offload_brdev &&
+ rdev->brdev == rdev->priv->offload_brdev &&
+ (test_bit(rdev->port, rdev->priv->opened_ports));
+}
+
+static void rswitch_change_l2_hw_offloading(struct rswitch_device *rdev,
+ bool start, bool learning)
+{
+ u32 bits = learning ? FWPC0_MACSSA | FWPC0_MACHLA | FWPC0_MACHMA : FWPC0_MACDSA;
+ u32 clear = start ? 0 : bits;
+ u32 set = start ? bits : 0;
+
+ if ((learning && rdev->learning_offloaded == start) ||
+ (!learning && rdev->forwarding_offloaded == start))
+ return;
+
+ rswitch_modify(rdev->priv->addr, FWPC0(rdev->port), clear, set);
+
+ if (learning)
+ rdev->learning_offloaded = start;
+ else
+ rdev->forwarding_offloaded = start;
+
+ netdev_info(rdev->ndev, "%s hw %s\n", start ? "starting" : "stopping",
+ learning ? "learning" : "forwarding");
+}
+
+static void rswitch_update_l2_hw_learning(struct rswitch_private *priv)
+{
+ struct rswitch_device *rdev;
+ bool learning_needed;
+
+ rswitch_for_all_ports(priv, rdev) {
+ if (rdev_for_l2_offload(rdev))
+ learning_needed = rdev->learning_requested;
+ else
+ learning_needed = false;
+
+ rswitch_change_l2_hw_offloading(rdev, learning_needed, true);
+ }
+}
+
+static void rswitch_update_l2_hw_forwarding(struct rswitch_private *priv)
+{
+ struct rswitch_device *rdev;
+ unsigned int fwd_mask;
+
+ /* calculate fwd_mask with zeroes in bits corresponding to ports that
+ * shall participate in hardware forwarding
+ */
+ fwd_mask = GENMASK(RSWITCH_NUM_AGENTS - 1, 0);
+
+ rswitch_for_all_ports(priv, rdev) {
+ if (rdev_for_l2_offload(rdev) && rdev->forwarding_requested)
+ fwd_mask &= ~BIT(rdev->port);
+ }
+
+ rswitch_for_all_ports(priv, rdev) {
+ if ((rdev_for_l2_offload(rdev) && rdev->forwarding_requested) ||
+ rdev->forwarding_offloaded) {
+ /* Update allowed offload destinations even for ports
+ * with L2 offload enabled earlier.
+ *
+ * Do not allow L2 forwarding to self for hw port.
+ */
+ iowrite32(FIELD_PREP(FWCP2_LTWFW_MASK, fwd_mask | BIT(rdev->port)),
+ priv->addr + FWPC2(rdev->port));
+ }
+
+ if (rdev_for_l2_offload(rdev) &&
+ rdev->forwarding_requested &&
+ !rdev->forwarding_offloaded) {
+ rswitch_change_l2_hw_offloading(rdev, true, false);
+ } else if (rdev->forwarding_offloaded) {
+ rswitch_change_l2_hw_offloading(rdev, false, false);
+ }
+ }
+}
+
+void rswitch_update_l2_offload(struct rswitch_private *priv)
+{
+ rswitch_update_l2_hw_learning(priv);
+ rswitch_update_l2_hw_forwarding(priv);
+}
+
+static void rswitch_update_offload_brdev(struct rswitch_private *priv)
+{
+ struct net_device *offload_brdev = NULL;
+ struct rswitch_device *rdev, *rdev2;
+
+ rswitch_for_all_ports(priv, rdev) {
+ if (!rdev->brdev)
+ continue;
+ rswitch_for_all_ports(priv, rdev2) {
+ if (rdev2 == rdev)
+ break;
+ if (rdev2->brdev == rdev->brdev) {
+ offload_brdev = rdev->brdev;
+ break;
+ }
+ }
+ if (offload_brdev)
+ break;
+ }
+
+ if (offload_brdev == priv->offload_brdev)
+ dev_dbg(&priv->pdev->dev,
+ "changing l2 offload from %s to %s\n",
+ netdev_name(priv->offload_brdev),
+ netdev_name(offload_brdev));
+ else if (offload_brdev)
+ dev_dbg(&priv->pdev->dev, "starting l2 offload for %s\n",
+ netdev_name(offload_brdev));
+ else if (!offload_brdev)
+ dev_dbg(&priv->pdev->dev, "stopping l2 offload for %s\n",
+ netdev_name(priv->offload_brdev));
+
+ priv->offload_brdev = offload_brdev;
+
+ rswitch_update_l2_offload(priv);
+}
+
+static bool rswitch_port_check(const struct net_device *ndev)
+{
+ return is_rdev(ndev);
+}
+
+static void rswitch_port_update_brdev(struct net_device *ndev,
+ struct net_device *brdev)
+{
+ struct rswitch_device *rdev;
+
+ if (!is_rdev(ndev))
+ return;
+
+ rdev = netdev_priv(ndev);
+ rdev->brdev = brdev;
+ rswitch_update_offload_brdev(rdev->priv);
+}
+
+static int rswitch_port_update_stp_state(struct net_device *ndev, u8 stp_state)
+{
+ struct rswitch_device *rdev;
+
+ if (!is_rdev(ndev))
+ return -ENODEV;
+
+ rdev = netdev_priv(ndev);
+ rdev->learning_requested = (stp_state == BR_STATE_LEARNING ||
+ stp_state == BR_STATE_FORWARDING);
+ rdev->forwarding_requested = (stp_state == BR_STATE_FORWARDING);
+ rswitch_update_l2_offload(rdev->priv);
+
+ return 0;
+}
+
+static int rswitch_netdevice_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
+ struct netdev_notifier_changeupper_info *info;
+ struct net_device *brdev;
+
+ if (!rswitch_port_check(ndev))
+ return NOTIFY_DONE;
+ if (event != NETDEV_CHANGEUPPER)
+ return NOTIFY_DONE;
+
+ info = ptr;
+
+ if (netif_is_bridge_master(info->upper_dev)) {
+ brdev = info->linking ? info->upper_dev : NULL;
+ rswitch_port_update_brdev(ndev, brdev);
+ }
+
+ return NOTIFY_OK;
+}
+
+static int rswitch_update_ageing_time(struct net_device *ndev, clock_t time)
+{
+ struct rswitch_device *rdev = netdev_priv(ndev);
+ u32 reg_val;
+
+ if (!is_rdev(ndev))
+ return -ENODEV;
+
+ if (!FIELD_FIT(FWMACAGC_MACAGT, time))
+ return -EINVAL;
+
+ reg_val = FIELD_PREP(FWMACAGC_MACAGT, time);
+ reg_val |= FWMACAGC_MACAGE | FWMACAGC_MACAGSL;
+ iowrite32(reg_val, rdev->priv->addr + FWMACAGC);
+
+ return 0;
+}
+
+static int rswitch_port_attr_set(struct net_device *ndev, const void *ctx,
+ const struct switchdev_attr *attr,
+ struct netlink_ext_ack *extack)
+{
+ switch (attr->id) {
+ case SWITCHDEV_ATTR_ID_PORT_STP_STATE:
+ return rswitch_port_update_stp_state(ndev, attr->u.stp_state);
+ case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME:
+ return rswitch_update_ageing_time(ndev, attr->u.ageing_time);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int rswitch_switchdev_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct net_device *ndev = switchdev_notifier_info_to_dev(ptr);
+ int ret;
+
+ if (event == SWITCHDEV_PORT_ATTR_SET) {
+ ret = switchdev_handle_port_attr_set(ndev, ptr,
+ rswitch_port_check,
+ rswitch_port_attr_set);
+ return notifier_from_errno(ret);
+ }
+
+ if (!rswitch_port_check(ndev))
+ return NOTIFY_DONE;
+
+ return notifier_from_errno(-EOPNOTSUPP);
+}
+
+static int rswitch_switchdev_blocking_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct net_device *ndev = switchdev_notifier_info_to_dev(ptr);
+ int ret;
+
+ switch (event) {
+ case SWITCHDEV_PORT_OBJ_ADD:
+ return -EOPNOTSUPP;
+ case SWITCHDEV_PORT_OBJ_DEL:
+ return -EOPNOTSUPP;
+ case SWITCHDEV_PORT_ATTR_SET:
+ ret = switchdev_handle_port_attr_set(ndev, ptr,
+ rswitch_port_check,
+ rswitch_port_attr_set);
+ break;
+ default:
+ if (!rswitch_port_check(ndev))
+ return NOTIFY_DONE;
+ ret = -EOPNOTSUPP;
+ }
+
+ return notifier_from_errno(ret);
+}
+
+static struct notifier_block rswitch_netdevice_nb = {
+ .notifier_call = rswitch_netdevice_event,
+};
+
+static struct notifier_block rswitch_switchdev_nb = {
+ .notifier_call = rswitch_switchdev_event,
+};
+
+static struct notifier_block rswitch_switchdev_blocking_nb = {
+ .notifier_call = rswitch_switchdev_blocking_event,
+};
+
+int rswitch_register_notifiers(void)
+{
+ int ret;
+
+ ret = register_netdevice_notifier(&rswitch_netdevice_nb);
+ if (ret)
+ goto register_netdevice_notifier_failed;
+
+ ret = register_switchdev_notifier(&rswitch_switchdev_nb);
+ if (ret)
+ goto register_switchdev_notifier_failed;
+
+ ret = register_switchdev_blocking_notifier(&rswitch_switchdev_blocking_nb);
+ if (ret)
+ goto register_switchdev_blocking_notifier_failed;
+
+ return 0;
+
+register_switchdev_blocking_notifier_failed:
+ unregister_switchdev_notifier(&rswitch_switchdev_nb);
+register_switchdev_notifier_failed:
+ unregister_netdevice_notifier(&rswitch_netdevice_nb);
+register_netdevice_notifier_failed:
+
+ return ret;
+}
+
+void rswitch_unregister_notifiers(void)
+{
+ unregister_switchdev_blocking_notifier(&rswitch_switchdev_blocking_nb);
+ unregister_switchdev_notifier(&rswitch_switchdev_nb);
+ unregister_netdevice_notifier(&rswitch_netdevice_nb);
+}
diff --git a/drivers/net/ethernet/renesas/rswitch_l2.h b/drivers/net/ethernet/renesas/rswitch_l2.h
new file mode 100644
index 000000000000..57050ede8f31
--- /dev/null
+++ b/drivers/net/ethernet/renesas/rswitch_l2.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Renesas Ethernet Switch device driver
+ *
+ * Copyright (C) 2025 Renesas Electronics Corporation
+ */
+
+#ifndef __RSWITCH_L2_H__
+#define __RSWITCH_L2_H__
+
+void rswitch_update_l2_offload(struct rswitch_private *priv);
+
+int rswitch_register_notifiers(void);
+void rswitch_unregister_notifiers(void);
+
+#endif /* #ifndef __RSWITCH_L2_H__ */
diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch_main.c
index aba772e14555..59dceb81607c 100644
--- a/drivers/net/ethernet/renesas/rswitch.c
+++ b/drivers/net/ethernet/renesas/rswitch_main.c
@@ -1,15 +1,18 @@
// SPDX-License-Identifier: GPL-2.0
/* Renesas Ethernet Switch device driver
*
- * Copyright (C) 2022 Renesas Electronics Corporation
+ * Copyright (C) 2022-2025 Renesas Electronics Corporation
*/
#include <linux/clk.h>
#include <linux/dma-mapping.h>
#include <linux/err.h>
#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/ip.h>
#include <linux/iopoll.h>
#include <linux/kernel.h>
+#include <linux/list.h>
#include <linux/module.h>
#include <linux/net_tstamp.h>
#include <linux/of.h>
@@ -25,6 +28,7 @@
#include <linux/sys_soc.h>
#include "rswitch.h"
+#include "rswitch_l2.h"
static int rswitch_reg_wait(void __iomem *addr, u32 offs, u32 mask, u32 expected)
{
@@ -34,7 +38,7 @@ static int rswitch_reg_wait(void __iomem *addr, u32 offs, u32 mask, u32 expected
1, RSWITCH_TIMEOUT_US);
}
-static void rswitch_modify(void __iomem *addr, enum rswitch_reg reg, u32 clear, u32 set)
+void rswitch_modify(void __iomem *addr, enum rswitch_reg reg, u32 clear, u32 set)
{
iowrite32((ioread32(addr + reg) & ~clear) | set, addr + reg);
}
@@ -109,10 +113,11 @@ static void rswitch_top_init(struct rswitch_private *priv)
}
/* Forwarding engine block (MFWD) */
-static void rswitch_fwd_init(struct rswitch_private *priv)
+static int rswitch_fwd_init(struct rswitch_private *priv)
{
u32 all_ports_mask = GENMASK(RSWITCH_NUM_AGENTS - 1, 0);
unsigned int i;
+ u32 reg_val;
/* Start with empty configuration */
for (i = 0; i < RSWITCH_NUM_AGENTS; i++) {
@@ -128,6 +133,14 @@ static void rswitch_fwd_init(struct rswitch_private *priv)
iowrite32(0, priv->addr + FWPBFC(i));
}
+ /* Configure MAC table aging */
+ rswitch_modify(priv->addr, FWMACAGUSPC, FWMACAGUSPC_MACAGUSP,
+ FIELD_PREP(FWMACAGUSPC_MACAGUSP, RSW_AGEING_CLK_PER_US));
+
+ reg_val = FIELD_PREP(FWMACAGC_MACAGT, RSW_AGEING_TIME);
+ reg_val |= FWMACAGC_MACAGE | FWMACAGC_MACAGSL;
+ iowrite32(reg_val, priv->addr + FWMACAGC);
+
/* For enabled ETHA ports, setup port based forwarding */
rswitch_for_each_enabled_port(priv, i) {
/* Port based forwarding from port i to GWCA port */
@@ -140,6 +153,16 @@ static void rswitch_fwd_init(struct rswitch_private *priv)
/* For GWCA port, allow direct descriptor forwarding */
rswitch_modify(priv->addr, FWPC1(priv->gwca.index), FWPC1_DDE, FWPC1_DDE);
+
+ /* Initialize hardware L2 forwarding table */
+
+ /* Allow entire table to be used for "unsecure" entries */
+ rswitch_modify(priv->addr, FWMACHEC, 0, FWMACHEC_MACHMUE_MASK);
+
+ /* Initialize MAC hash table */
+ iowrite32(FWMACTIM_MACTIOG, priv->addr + FWMACTIM);
+
+ return rswitch_reg_wait(priv->addr, FWMACTIM, FWMACTIM_MACTIOG, 0);
}
/* Gateway CPU agent block (GWCA) */
@@ -1602,6 +1625,9 @@ static int rswitch_open(struct net_device *ndev)
netif_start_queue(ndev);
+ if (rdev->brdev)
+ rswitch_update_l2_offload(rdev->priv);
+
return 0;
};
@@ -1624,6 +1650,9 @@ static int rswitch_stop(struct net_device *ndev)
napi_disable(&rdev->napi);
+ if (rdev->brdev)
+ rswitch_update_l2_offload(rdev->priv);
+
if (bitmap_empty(rdev->priv->opened_ports, RSWITCH_NUM_PORTS))
iowrite32(GWCA_TS_IRQ_BIT, rdev->priv->addr + GWTSDID);
@@ -1850,16 +1879,46 @@ static int rswitch_eth_ioctl(struct net_device *ndev, struct ifreq *req, int cmd
}
}
+static int rswitch_get_port_parent_id(struct net_device *ndev,
+ struct netdev_phys_item_id *ppid)
+{
+ struct rswitch_device *rdev = netdev_priv(ndev);
+ const char *name;
+
+ name = dev_name(&rdev->priv->pdev->dev);
+ ppid->id_len = min_t(size_t, strlen(name), sizeof(ppid->id));
+ memcpy(ppid->id, name, ppid->id_len);
+
+ return 0;
+}
+
+static int rswitch_get_phys_port_name(struct net_device *ndev,
+ char *name, size_t len)
+{
+ struct rswitch_device *rdev = netdev_priv(ndev);
+
+ snprintf(name, len, "tsn%d", rdev->port);
+
+ return 0;
+}
+
static const struct net_device_ops rswitch_netdev_ops = {
.ndo_open = rswitch_open,
.ndo_stop = rswitch_stop,
.ndo_start_xmit = rswitch_start_xmit,
.ndo_get_stats = rswitch_get_stats,
.ndo_eth_ioctl = rswitch_eth_ioctl,
+ .ndo_get_port_parent_id = rswitch_get_port_parent_id,
+ .ndo_get_phys_port_name = rswitch_get_phys_port_name,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = eth_mac_addr,
};
+bool is_rdev(const struct net_device *ndev)
+{
+ return (ndev->netdev_ops == &rswitch_netdev_ops);
+}
+
static int rswitch_get_ts_info(struct net_device *ndev, struct kernel_ethtool_ts_info *info)
{
struct rswitch_device *rdev = netdev_priv(ndev);
@@ -1959,6 +2018,8 @@ static int rswitch_device_alloc(struct rswitch_private *priv, unsigned int index
if (err < 0)
goto out_txdmac;
+ list_add_tail(&rdev->list, &priv->port_list);
+
return 0;
out_txdmac:
@@ -1978,6 +2039,7 @@ static void rswitch_device_free(struct rswitch_private *priv, unsigned int index
struct rswitch_device *rdev = priv->rdev[index];
struct net_device *ndev = rdev->ndev;
+ list_del(&rdev->list);
rswitch_txdmac_free(ndev);
rswitch_rxdmac_free(ndev);
of_node_put(rdev->np_port);
@@ -2024,7 +2086,9 @@ static int rswitch_init(struct rswitch_private *priv)
}
}
- rswitch_fwd_init(priv);
+ err = rswitch_fwd_init(priv);
+ if (err < 0)
+ goto err_fwd_init;
err = rcar_gen4_ptp_register(priv->ptp_priv, RCAR_GEN4_PTP_REG_LAYOUT,
clk_get_rate(priv->clk));
@@ -2073,6 +2137,7 @@ err_gwca_ts_request_irq:
err_gwca_request_irq:
rcar_gen4_ptp_unregister(priv->ptp_priv);
+err_fwd_init:
err_ptp_register:
for (i = 0; i < RSWITCH_NUM_PORTS; i++)
rswitch_device_free(priv, i);
@@ -2107,6 +2172,7 @@ static int renesas_eth_sw_probe(struct platform_device *pdev)
priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
if (!priv)
return -ENOMEM;
+
spin_lock_init(&priv->lock);
priv->clk = devm_clk_get(&pdev->dev, NULL);
@@ -2144,6 +2210,8 @@ static int renesas_eth_sw_probe(struct platform_device *pdev)
if (!priv->gwca.queues)
return -ENOMEM;
+ INIT_LIST_HEAD(&priv->port_list);
+
pm_runtime_enable(&pdev->dev);
pm_runtime_get_sync(&pdev->dev);
@@ -2154,6 +2222,15 @@ static int renesas_eth_sw_probe(struct platform_device *pdev)
return ret;
}
+ if (list_empty(&priv->port_list))
+ dev_warn(&pdev->dev, "could not initialize any ports\n");
+
+ ret = rswitch_register_notifiers();
+ if (ret) {
+ dev_err(&pdev->dev, "could not register notifiers\n");
+ return ret;
+ }
+
device_set_wakeup_capable(&pdev->dev, 1);
return ret;
@@ -2187,6 +2264,7 @@ static void renesas_eth_sw_remove(struct platform_device *pdev)
{
struct rswitch_private *priv = platform_get_drvdata(pdev);
+ rswitch_unregister_notifiers();
rswitch_deinit(priv);
pm_runtime_put(&pdev->dev);
diff --git a/drivers/net/ethernet/sfc/efx_channels.c b/drivers/net/ethernet/sfc/efx_channels.c
index 06b4f52713ef..0f66324ed351 100644
--- a/drivers/net/ethernet/sfc/efx_channels.c
+++ b/drivers/net/ethernet/sfc/efx_channels.c
@@ -216,8 +216,8 @@ static int efx_allocate_msix_channels(struct efx_nic *efx,
if (efx_separate_tx_channels) {
efx->n_tx_channels =
- min(max(n_channels / 2, 1U),
- efx->max_tx_channels);
+ clamp(n_channels / 2, 1U,
+ efx->max_tx_channels);
efx->tx_channel_offset =
n_channels - efx->n_tx_channels;
efx->n_rx_channels =
diff --git a/drivers/net/ethernet/sfc/falcon/efx.c b/drivers/net/ethernet/sfc/falcon/efx.c
index b07f7e4e2877..d19fbf8732ff 100644
--- a/drivers/net/ethernet/sfc/falcon/efx.c
+++ b/drivers/net/ethernet/sfc/falcon/efx.c
@@ -1394,9 +1394,8 @@ static int ef4_probe_interrupts(struct ef4_nic *efx)
if (n_channels > extra_channels)
n_channels -= extra_channels;
if (ef4_separate_tx_channels) {
- efx->n_tx_channels = min(max(n_channels / 2,
- 1U),
- efx->max_tx_channels);
+ efx->n_tx_channels = clamp(n_channels / 2, 1U,
+ efx->max_tx_channels);
efx->n_rx_channels = max(n_channels -
efx->n_tx_channels,
1U);
diff --git a/drivers/net/ethernet/sfc/siena/efx_channels.c b/drivers/net/ethernet/sfc/siena/efx_channels.c
index d120b3c83ac0..703419866d18 100644
--- a/drivers/net/ethernet/sfc/siena/efx_channels.c
+++ b/drivers/net/ethernet/sfc/siena/efx_channels.c
@@ -217,8 +217,8 @@ static int efx_allocate_msix_channels(struct efx_nic *efx,
if (efx_siena_separate_tx_channels) {
efx->n_tx_channels =
- min(max(n_channels / 2, 1U),
- efx->max_tx_channels);
+ clamp(n_channels / 2, 1U,
+ efx->max_tx_channels);
efx->tx_channel_offset =
n_channels - efx->n_tx_channels;
efx->n_rx_channels =
diff --git a/drivers/net/ethernet/sfc/tc_encap_actions.c b/drivers/net/ethernet/sfc/tc_encap_actions.c
index e872f926e438..eef06e48185d 100644
--- a/drivers/net/ethernet/sfc/tc_encap_actions.c
+++ b/drivers/net/ethernet/sfc/tc_encap_actions.c
@@ -11,6 +11,8 @@
#include "tc_encap_actions.h"
#include "tc.h"
#include "mae.h"
+#include <net/flow.h>
+#include <net/inet_dscp.h>
#include <net/vxlan.h>
#include <net/geneve.h>
#include <net/netevent.h>
@@ -99,7 +101,7 @@ static int efx_bind_neigh(struct efx_nic *efx,
case EFX_ENCAP_TYPE_GENEVE:
flow4.flowi4_proto = IPPROTO_UDP;
flow4.fl4_dport = encap->key.tp_dst;
- flow4.flowi4_tos = encap->key.tos;
+ flow4.flowi4_dscp = inet_dsfield_to_dscp(encap->key.tos);
flow4.daddr = encap->key.u.ipv4.dst;
flow4.saddr = encap->key.u.ipv4.src;
break;
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index cbffccb3b9af..eaa1f2e1c5a5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -602,7 +602,6 @@ struct mac_device_info {
unsigned int mcast_bits_log2;
unsigned int rx_csum;
unsigned int pcs;
- unsigned int pmt;
unsigned int ps;
unsigned int xlgmac;
unsigned int num_vlan;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
index 889e2bb6f7f5..80200a6aa0cb 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
@@ -49,7 +49,7 @@ struct imx_dwmac_ops {
u32 flags;
bool mac_rgmii_txclk_auto_adj;
- int (*fix_soc_reset)(void *priv, void __iomem *ioaddr);
+ int (*fix_soc_reset)(struct stmmac_priv *priv, void __iomem *ioaddr);
int (*set_intf_mode)(struct plat_stmmacenet_data *plat_dat);
void (*fix_mac_speed)(void *priv, int speed, unsigned int mode);
};
@@ -265,9 +265,9 @@ static void imx93_dwmac_fix_speed(void *priv, int speed, unsigned int mode)
writel(old_ctrl, dwmac->base_addr + MAC_CTRL_REG);
}
-static int imx_dwmac_mx93_reset(void *priv, void __iomem *ioaddr)
+static int imx_dwmac_mx93_reset(struct stmmac_priv *priv, void __iomem *ioaddr)
{
- struct plat_stmmacenet_data *plat_dat = priv;
+ struct plat_stmmacenet_data *plat_dat = priv->plat;
u32 value = readl(ioaddr + DMA_BUS_MODE);
/* DMA SW reset */
@@ -301,6 +301,7 @@ imx_dwmac_parse_dt(struct imx_priv_data *dwmac, struct device *dev)
dwmac->clk_mem = NULL;
if (of_machine_is_compatible("fsl,imx8dxl") ||
+ of_machine_is_compatible("fsl,imx91") ||
of_machine_is_compatible("fsl,imx93")) {
dwmac->clk_mem = devm_clk_get(dev, "mem");
if (IS_ERR(dwmac->clk_mem)) {
@@ -310,9 +311,10 @@ imx_dwmac_parse_dt(struct imx_priv_data *dwmac, struct device *dev)
}
if (of_machine_is_compatible("fsl,imx8mp") ||
+ of_machine_is_compatible("fsl,imx91") ||
of_machine_is_compatible("fsl,imx93")) {
/* Binding doc describes the propety:
- * is required by i.MX8MP, i.MX93.
+ * is required by i.MX8MP, i.MX91, i.MX93.
* is optinoal for i.MX8DXL.
*/
dwmac->intf_regmap =
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index ea33ae39be6b..3fac3945cbfa 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -1231,6 +1231,37 @@ static int stmmac_config_multi_msi(struct pci_dev *pdev,
return 0;
}
+static int intel_eth_pci_suspend(struct device *dev, void *bsp_priv)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ int ret;
+
+ ret = pci_save_state(pdev);
+ if (ret)
+ return ret;
+
+ pci_wake_from_d3(pdev, true);
+ pci_set_power_state(pdev, PCI_D3hot);
+ return 0;
+}
+
+static int intel_eth_pci_resume(struct device *dev, void *bsp_priv)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ int ret;
+
+ pci_restore_state(pdev);
+ pci_set_power_state(pdev, PCI_D0);
+
+ ret = pcim_enable_device(pdev);
+ if (ret)
+ return ret;
+
+ pci_set_master(pdev);
+
+ return 0;
+}
+
/**
* intel_eth_pci_probe
*
@@ -1292,6 +1323,9 @@ static int intel_eth_pci_probe(struct pci_dev *pdev,
pci_set_master(pdev);
plat->bsp_priv = intel_priv;
+ plat->suspend = intel_eth_pci_suspend;
+ plat->resume = intel_eth_pci_resume;
+
intel_priv->mdio_adhoc_addr = INTEL_MGBE_ADHOC_ADDR;
intel_priv->crossts_adj = 1;
@@ -1355,44 +1389,6 @@ static void intel_eth_pci_remove(struct pci_dev *pdev)
clk_unregister_fixed_rate(priv->plat->stmmac_clk);
}
-static int __maybe_unused intel_eth_pci_suspend(struct device *dev)
-{
- struct pci_dev *pdev = to_pci_dev(dev);
- int ret;
-
- ret = stmmac_suspend(dev);
- if (ret)
- return ret;
-
- ret = pci_save_state(pdev);
- if (ret)
- return ret;
-
- pci_wake_from_d3(pdev, true);
- pci_set_power_state(pdev, PCI_D3hot);
- return 0;
-}
-
-static int __maybe_unused intel_eth_pci_resume(struct device *dev)
-{
- struct pci_dev *pdev = to_pci_dev(dev);
- int ret;
-
- pci_restore_state(pdev);
- pci_set_power_state(pdev, PCI_D0);
-
- ret = pcim_enable_device(pdev);
- if (ret)
- return ret;
-
- pci_set_master(pdev);
-
- return stmmac_resume(dev);
-}
-
-static SIMPLE_DEV_PM_OPS(intel_eth_pm_ops, intel_eth_pci_suspend,
- intel_eth_pci_resume);
-
#define PCI_DEVICE_ID_INTEL_QUARK 0x0937
#define PCI_DEVICE_ID_INTEL_EHL_RGMII1G 0x4b30
#define PCI_DEVICE_ID_INTEL_EHL_SGMII1G 0x4b31
@@ -1442,7 +1438,7 @@ static struct pci_driver intel_eth_pci_driver = {
.probe = intel_eth_pci_probe,
.remove = intel_eth_pci_remove,
.driver = {
- .pm = &intel_eth_pm_ops,
+ .pm = &stmmac_simple_pm_ops,
},
};
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
index e1591e6217d4..6fca0fca4892 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
@@ -509,10 +509,15 @@ static int loongson_dwmac_acpi_config(struct pci_dev *pdev,
}
/* Loongson's DWMAC device may take nearly two seconds to complete DMA reset */
-static int loongson_dwmac_fix_reset(void *priv, void __iomem *ioaddr)
+static int loongson_dwmac_fix_reset(struct stmmac_priv *priv, void __iomem *ioaddr)
{
u32 value = readl(ioaddr + DMA_BUS_MODE);
+ if (value & DMA_BUS_MODE_SFT_RESET) {
+ netdev_err(priv->dev, "the PHY clock is missing\n");
+ return -EINVAL;
+ }
+
value |= DMA_BUS_MODE_SFT_RESET;
writel(value, ioaddr + DMA_BUS_MODE);
@@ -521,6 +526,37 @@ static int loongson_dwmac_fix_reset(void *priv, void __iomem *ioaddr)
10000, 2000000);
}
+static int loongson_dwmac_suspend(struct device *dev, void *bsp_priv)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ int ret;
+
+ ret = pci_save_state(pdev);
+ if (ret)
+ return ret;
+
+ pci_disable_device(pdev);
+ pci_wake_from_d3(pdev, true);
+ return 0;
+}
+
+static int loongson_dwmac_resume(struct device *dev, void *bsp_priv)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ int ret;
+
+ pci_restore_state(pdev);
+ pci_set_power_state(pdev, PCI_D0);
+
+ ret = pci_enable_device(pdev);
+ if (ret)
+ return ret;
+
+ pci_set_master(pdev);
+
+ return 0;
+}
+
static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct plat_stmmacenet_data *plat;
@@ -565,6 +601,8 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id
plat->bsp_priv = ld;
plat->setup = loongson_dwmac_setup;
plat->fix_soc_reset = loongson_dwmac_fix_reset;
+ plat->suspend = loongson_dwmac_suspend;
+ plat->resume = loongson_dwmac_resume;
ld->dev = &pdev->dev;
ld->loongson_id = readl(res.addr + GMAC_VERSION) & 0xff;
@@ -621,44 +659,6 @@ static void loongson_dwmac_remove(struct pci_dev *pdev)
pci_disable_device(pdev);
}
-static int __maybe_unused loongson_dwmac_suspend(struct device *dev)
-{
- struct pci_dev *pdev = to_pci_dev(dev);
- int ret;
-
- ret = stmmac_suspend(dev);
- if (ret)
- return ret;
-
- ret = pci_save_state(pdev);
- if (ret)
- return ret;
-
- pci_disable_device(pdev);
- pci_wake_from_d3(pdev, true);
- return 0;
-}
-
-static int __maybe_unused loongson_dwmac_resume(struct device *dev)
-{
- struct pci_dev *pdev = to_pci_dev(dev);
- int ret;
-
- pci_restore_state(pdev);
- pci_set_power_state(pdev, PCI_D0);
-
- ret = pci_enable_device(pdev);
- if (ret)
- return ret;
-
- pci_set_master(pdev);
-
- return stmmac_resume(dev);
-}
-
-static SIMPLE_DEV_PM_OPS(loongson_dwmac_pm_ops, loongson_dwmac_suspend,
- loongson_dwmac_resume);
-
static const struct pci_device_id loongson_dwmac_id_table[] = {
{ PCI_DEVICE_DATA(LOONGSON, GMAC1, &loongson_gmac_pci_info) },
{ PCI_DEVICE_DATA(LOONGSON, GMAC2, &loongson_gmac_pci_info) },
@@ -673,7 +673,7 @@ static struct pci_driver loongson_dwmac_driver = {
.probe = loongson_dwmac_probe,
.remove = loongson_dwmac_remove,
.driver = {
- .pm = &loongson_dwmac_pm_ops,
+ .pm = &stmmac_simple_pm_ops,
},
};
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
index 39421d6a34e4..f1b36f0a401d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
@@ -523,7 +523,7 @@ static int mediatek_dwmac_clk_init(struct mediatek_dwmac_plat_data *plat)
return ret;
}
-static int mediatek_dwmac_init(struct platform_device *pdev, void *priv)
+static int mediatek_dwmac_init(struct device *dev, void *priv)
{
struct mediatek_dwmac_plat_data *plat = priv;
const struct mediatek_dwmac_variant *variant = plat->variant;
@@ -532,7 +532,7 @@ static int mediatek_dwmac_init(struct platform_device *pdev, void *priv)
if (variant->dwmac_set_phy_interface) {
ret = variant->dwmac_set_phy_interface(plat);
if (ret) {
- dev_err(plat->dev, "failed to set phy interface, err = %d\n", ret);
+ dev_err(dev, "failed to set phy interface, err = %d\n", ret);
return ret;
}
}
@@ -540,7 +540,7 @@ static int mediatek_dwmac_init(struct platform_device *pdev, void *priv)
if (variant->dwmac_set_delay) {
ret = variant->dwmac_set_delay(plat);
if (ret) {
- dev_err(plat->dev, "failed to set delay value, err = %d\n", ret);
+ dev_err(dev, "failed to set delay value, err = %d\n", ret);
return ret;
}
}
@@ -589,7 +589,7 @@ static int mediatek_dwmac_common_data(struct platform_device *pdev,
plat->maxmtu = ETH_DATA_LEN;
plat->host_dma_width = priv_plat->variant->dma_bit_mask;
plat->bsp_priv = priv_plat;
- plat->init = mediatek_dwmac_init;
+ plat->resume = mediatek_dwmac_init;
plat->clks_config = mediatek_dwmac_clks_config;
plat->safety_feat_cfg = devm_kzalloc(&pdev->dev,
@@ -654,7 +654,7 @@ static int mediatek_dwmac_probe(struct platform_device *pdev)
return PTR_ERR(plat_dat);
mediatek_dwmac_common_data(pdev, plat_dat, priv_plat);
- mediatek_dwmac_init(pdev, priv_plat);
+ mediatek_dwmac_init(&pdev->dev, priv_plat);
ret = mediatek_dwmac_clks_config(priv_plat, true);
if (ret)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
index f6687c2f30f6..266c53379236 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
@@ -71,7 +71,6 @@ struct rk_priv_data {
phy_interface_t phy_iface;
int id;
struct regulator *regulator;
- bool suspended;
const struct rk_gmac_ops *ops;
bool clk_enabled;
@@ -557,9 +556,7 @@ static const struct rk_gmac_ops rk3308_ops = {
#define RK3328_GMAC_RMII_MODE GRF_BIT(9)
#define RK3328_GMAC_RMII_MODE_CLR GRF_CLR_BIT(9)
#define RK3328_GMAC_TXCLK_DLY_ENABLE GRF_BIT(0)
-#define RK3328_GMAC_TXCLK_DLY_DISABLE GRF_CLR_BIT(0)
#define RK3328_GMAC_RXCLK_DLY_ENABLE GRF_BIT(1)
-#define RK3328_GMAC_RXCLK_DLY_DISABLE GRF_CLR_BIT(0)
/* RK3328_GRF_MACPHY_CON1 */
#define RK3328_MACPHY_RMII_MODE GRF_BIT(9)
@@ -1413,12 +1410,15 @@ static int rk_gmac_clk_init(struct plat_stmmacenet_data *plat)
clk_set_rate(plat->stmmac_clk, 50000000);
}
- if (plat->phy_node && bsp_priv->integrated_phy) {
+ if (plat->phy_node) {
bsp_priv->clk_phy = of_clk_get(plat->phy_node, 0);
ret = PTR_ERR_OR_ZERO(bsp_priv->clk_phy);
- if (ret)
- return dev_err_probe(dev, ret, "Cannot get PHY clock\n");
- clk_set_rate(bsp_priv->clk_phy, 50000000);
+ /* If it is not integrated_phy, clk_phy is optional */
+ if (bsp_priv->integrated_phy) {
+ if (ret)
+ return dev_err_probe(dev, ret, "Cannot get PHY clock\n");
+ clk_set_rate(bsp_priv->clk_phy, 50000000);
+ }
}
return 0;
@@ -1706,6 +1706,28 @@ static int rk_set_clk_tx_rate(void *bsp_priv_, struct clk *clk_tx_i,
return -EINVAL;
}
+static int rk_gmac_suspend(struct device *dev, void *bsp_priv_)
+{
+ struct rk_priv_data *bsp_priv = bsp_priv_;
+
+ /* Keep the PHY up if we use Wake-on-Lan. */
+ if (!device_may_wakeup(dev))
+ rk_gmac_powerdown(bsp_priv);
+
+ return 0;
+}
+
+static int rk_gmac_resume(struct device *dev, void *bsp_priv_)
+{
+ struct rk_priv_data *bsp_priv = bsp_priv_;
+
+ /* The PHY was up for Wake-on-Lan. */
+ if (!device_may_wakeup(dev))
+ rk_gmac_powerup(bsp_priv);
+
+ return 0;
+}
+
static int rk_gmac_probe(struct platform_device *pdev)
{
struct plat_stmmacenet_data *plat_dat;
@@ -1738,6 +1760,8 @@ static int rk_gmac_probe(struct platform_device *pdev)
plat_dat->get_interfaces = rk_get_interfaces;
plat_dat->set_clk_tx_rate = rk_set_clk_tx_rate;
+ plat_dat->suspend = rk_gmac_suspend;
+ plat_dat->resume = rk_gmac_resume;
plat_dat->bsp_priv = rk_gmac_setup(pdev, plat_dat, data);
if (IS_ERR(plat_dat->bsp_priv))
@@ -1776,37 +1800,6 @@ static void rk_gmac_remove(struct platform_device *pdev)
clk_put(bsp_priv->clk_phy);
}
-#ifdef CONFIG_PM_SLEEP
-static int rk_gmac_suspend(struct device *dev)
-{
- struct rk_priv_data *bsp_priv = get_stmmac_bsp_priv(dev);
- int ret = stmmac_suspend(dev);
-
- /* Keep the PHY up if we use Wake-on-Lan. */
- if (!device_may_wakeup(dev)) {
- rk_gmac_powerdown(bsp_priv);
- bsp_priv->suspended = true;
- }
-
- return ret;
-}
-
-static int rk_gmac_resume(struct device *dev)
-{
- struct rk_priv_data *bsp_priv = get_stmmac_bsp_priv(dev);
-
- /* The PHY was up for Wake-on-Lan. */
- if (bsp_priv->suspended) {
- rk_gmac_powerup(bsp_priv);
- bsp_priv->suspended = false;
- }
-
- return stmmac_resume(dev);
-}
-#endif /* CONFIG_PM_SLEEP */
-
-static SIMPLE_DEV_PM_OPS(rk_gmac_pm_ops, rk_gmac_suspend, rk_gmac_resume);
-
static const struct of_device_id rk_gmac_dwmac_match[] = {
{ .compatible = "rockchip,px30-gmac", .data = &px30_ops },
{ .compatible = "rockchip,rk3128-gmac", .data = &rk3128_ops },
@@ -1832,7 +1825,7 @@ static struct platform_driver rk_gmac_dwmac_driver = {
.remove = rk_gmac_remove,
.driver = {
.name = "rk_gmac-dwmac",
- .pm = &rk_gmac_pm_ops,
+ .pm = &stmmac_simple_pm_ops,
.of_match_table = rk_gmac_dwmac_match,
},
};
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
index 1eb16eec9c0d..77a04c4579c9 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
@@ -498,6 +498,26 @@ static int stm32mp1_parse_data(struct stm32_dwmac *dwmac,
return err;
}
+static int stm32_dwmac_suspend(struct device *dev, void *bsp_priv)
+{
+ struct stm32_dwmac *dwmac = bsp_priv;
+
+ stm32_dwmac_clk_disable(dwmac);
+
+ return dwmac->ops->suspend ? dwmac->ops->suspend(dwmac) : 0;
+}
+
+static int stm32_dwmac_resume(struct device *dev, void *bsp_priv)
+{
+ struct stmmac_priv *priv = netdev_priv(dev_get_drvdata(dev));
+ struct stm32_dwmac *dwmac = bsp_priv;
+
+ if (dwmac->ops->resume)
+ dwmac->ops->resume(dwmac);
+
+ return stm32_dwmac_init(priv->plat);
+}
+
static int stm32_dwmac_probe(struct platform_device *pdev)
{
struct plat_stmmacenet_data *plat_dat;
@@ -535,6 +555,8 @@ static int stm32_dwmac_probe(struct platform_device *pdev)
plat_dat->flags |= STMMAC_FLAG_EN_TX_LPI_CLK_PHY_CAP;
plat_dat->bsp_priv = dwmac;
+ plat_dat->suspend = stm32_dwmac_suspend;
+ plat_dat->resume = stm32_dwmac_resume;
ret = stm32_dwmac_init(plat_dat);
if (ret)
@@ -600,50 +622,6 @@ static void stm32mp1_resume(struct stm32_dwmac *dwmac)
clk_disable_unprepare(dwmac->clk_ethstp);
}
-#ifdef CONFIG_PM_SLEEP
-static int stm32_dwmac_suspend(struct device *dev)
-{
- struct net_device *ndev = dev_get_drvdata(dev);
- struct stmmac_priv *priv = netdev_priv(ndev);
- struct stm32_dwmac *dwmac = priv->plat->bsp_priv;
-
- int ret;
-
- ret = stmmac_suspend(dev);
- if (ret)
- return ret;
-
- stm32_dwmac_clk_disable(dwmac);
-
- if (dwmac->ops->suspend)
- ret = dwmac->ops->suspend(dwmac);
-
- return ret;
-}
-
-static int stm32_dwmac_resume(struct device *dev)
-{
- struct net_device *ndev = dev_get_drvdata(dev);
- struct stmmac_priv *priv = netdev_priv(ndev);
- struct stm32_dwmac *dwmac = priv->plat->bsp_priv;
- int ret;
-
- if (dwmac->ops->resume)
- dwmac->ops->resume(dwmac);
-
- ret = stm32_dwmac_init(priv->plat);
- if (ret)
- return ret;
-
- ret = stmmac_resume(dev);
-
- return ret;
-}
-#endif /* CONFIG_PM_SLEEP */
-
-static SIMPLE_DEV_PM_OPS(stm32_dwmac_pm_ops,
- stm32_dwmac_suspend, stm32_dwmac_resume);
-
static struct stm32_ops stm32mcu_dwmac_data = {
.set_mode = stm32mcu_set_mode
};
@@ -691,7 +669,7 @@ static struct platform_driver stm32_dwmac_driver = {
.remove = stm32_dwmac_remove,
.driver = {
.name = "stm32-dwmac",
- .pm = &stm32_dwmac_pm_ops,
+ .pm = &stmmac_simple_pm_ops,
.of_match_table = stm32_dwmac_match,
},
};
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index 2796dc426943..690f3650f84e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -31,10 +31,6 @@
*/
/* struct emac_variant - Describe dwmac-sun8i hardware variant
- * @default_syscon_value: The default value of the EMAC register in syscon
- * This value is used for disabling properly EMAC
- * and used as a good starting value in case of the
- * boot process(uboot) leave some stuff.
* @syscon_field reg_field for the syscon's gmac register
* @soc_has_internal_phy: Does the MAC embed an internal PHY
* @support_mii: Does the MAC handle MII
@@ -48,7 +44,6 @@
* value of zero indicates this is not supported.
*/
struct emac_variant {
- u32 default_syscon_value;
const struct reg_field *syscon_field;
bool soc_has_internal_phy;
bool support_mii;
@@ -94,7 +89,6 @@ static const struct reg_field sun8i_ccu_reg_field = {
};
static const struct emac_variant emac_variant_h3 = {
- .default_syscon_value = 0x58000,
.syscon_field = &sun8i_syscon_reg_field,
.soc_has_internal_phy = true,
.support_mii = true,
@@ -105,14 +99,12 @@ static const struct emac_variant emac_variant_h3 = {
};
static const struct emac_variant emac_variant_v3s = {
- .default_syscon_value = 0x38000,
.syscon_field = &sun8i_syscon_reg_field,
.soc_has_internal_phy = true,
.support_mii = true
};
static const struct emac_variant emac_variant_a83t = {
- .default_syscon_value = 0,
.syscon_field = &sun8i_syscon_reg_field,
.soc_has_internal_phy = false,
.support_mii = true,
@@ -122,7 +114,6 @@ static const struct emac_variant emac_variant_a83t = {
};
static const struct emac_variant emac_variant_r40 = {
- .default_syscon_value = 0,
.syscon_field = &sun8i_ccu_reg_field,
.support_mii = true,
.support_rgmii = true,
@@ -130,7 +121,6 @@ static const struct emac_variant emac_variant_r40 = {
};
static const struct emac_variant emac_variant_a64 = {
- .default_syscon_value = 0,
.syscon_field = &sun8i_syscon_reg_field,
.soc_has_internal_phy = false,
.support_mii = true,
@@ -141,7 +131,6 @@ static const struct emac_variant emac_variant_a64 = {
};
static const struct emac_variant emac_variant_h6 = {
- .default_syscon_value = 0x50000,
.syscon_field = &sun8i_syscon_reg_field,
/* The "Internal PHY" of H6 is not on the die. It's on the
* co-packaged AC200 chip instead.
@@ -933,25 +922,11 @@ static int sun8i_dwmac_set_syscon(struct device *dev,
struct sunxi_priv_data *gmac = plat->bsp_priv;
struct device_node *node = dev->of_node;
int ret;
- u32 reg, val;
-
- ret = regmap_field_read(gmac->regmap_field, &val);
- if (ret) {
- dev_err(dev, "Fail to read from regmap field.\n");
- return ret;
- }
-
- reg = gmac->variant->default_syscon_value;
- if (reg != val)
- dev_warn(dev,
- "Current syscon value is not the default %x (expect %x)\n",
- val, reg);
+ u32 reg = 0, val;
if (gmac->variant->soc_has_internal_phy) {
if (of_property_read_bool(node, "allwinner,leds-active-low"))
reg |= H3_EPHY_LED_POL;
- else
- reg &= ~H3_EPHY_LED_POL;
/* Force EPHY xtal frequency to 24MHz. */
reg |= H3_EPHY_CLK_SEL;
@@ -965,11 +940,6 @@ static int sun8i_dwmac_set_syscon(struct device *dev,
* address. No need to mask it again.
*/
reg |= ret << H3_EPHY_ADDR_SHIFT;
- } else {
- /* For SoCs without internal PHY the PHY selection bit should be
- * set to 0 (external PHY).
- */
- reg &= ~H3_EPHY_SELECT;
}
if (!of_property_read_u32(node, "allwinner,tx-delay-ps", &val)) {
@@ -980,8 +950,6 @@ static int sun8i_dwmac_set_syscon(struct device *dev,
val /= 100;
dev_dbg(dev, "set tx-delay to %x\n", val);
if (val <= gmac->variant->tx_delay_max) {
- reg &= ~(gmac->variant->tx_delay_max <<
- SYSCON_ETXDC_SHIFT);
reg |= (val << SYSCON_ETXDC_SHIFT);
} else {
dev_err(dev, "Invalid TX clock delay: %d\n",
@@ -998,8 +966,6 @@ static int sun8i_dwmac_set_syscon(struct device *dev,
val /= 100;
dev_dbg(dev, "set rx-delay to %x\n", val);
if (val <= gmac->variant->rx_delay_max) {
- reg &= ~(gmac->variant->rx_delay_max <<
- SYSCON_ERXDC_SHIFT);
reg |= (val << SYSCON_ERXDC_SHIFT);
} else {
dev_err(dev, "Invalid RX clock delay: %d\n",
@@ -1008,11 +974,6 @@ static int sun8i_dwmac_set_syscon(struct device *dev,
}
}
- /* Clear interface mode bits */
- reg &= ~(SYSCON_ETCS_MASK | SYSCON_EPIT);
- if (gmac->variant->support_rmii)
- reg &= ~SYSCON_RMII_EN;
-
switch (plat->mac_interface) {
case PHY_INTERFACE_MODE_MII:
/* default */
@@ -1039,9 +1000,9 @@ static int sun8i_dwmac_set_syscon(struct device *dev,
static void sun8i_dwmac_unset_syscon(struct sunxi_priv_data *gmac)
{
- u32 reg = gmac->variant->default_syscon_value;
-
- regmap_field_write(gmac->regmap_field, reg);
+ if (gmac->variant->soc_has_internal_phy)
+ regmap_field_write(gmac->regmap_field,
+ (H3_EPHY_SHUTDOWN | H3_EPHY_SELECT));
}
static void sun8i_dwmac_exit(struct platform_device *pdev, void *priv)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
index f4694fd576f5..3dec1a264cf6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
@@ -341,6 +341,7 @@ static inline u32 mtl_chanx_base_addr(const struct dwmac4_addrs *addrs,
#define MTL_OP_MODE_RFA_SHIFT 8
#define MTL_OP_MODE_EHFC BIT(7)
+#define MTL_OP_MODE_DIS_TCP_EF BIT(6)
#define MTL_OP_MODE_RTC_MASK GENMASK(1, 0)
#define MTL_OP_MODE_RTC_SHIFT 0
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
index a5fb31eb0192..aac68dc28dc1 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
@@ -110,16 +110,20 @@ static int dwmac4_wrback_get_rx_status(struct stmmac_extra_stats *x,
message_type = (rdes1 & ERDES4_MSG_TYPE_MASK) >> 8;
- if (rdes1 & RDES1_IP_HDR_ERROR)
+ if (rdes1 & RDES1_IP_HDR_ERROR) {
x->ip_hdr_err++;
+ ret |= csum_none;
+ }
if (rdes1 & RDES1_IP_CSUM_BYPASSED)
x->ip_csum_bypassed++;
if (rdes1 & RDES1_IPV4_HEADER)
x->ipv4_pkt_rcvd++;
if (rdes1 & RDES1_IPV6_HEADER)
x->ipv6_pkt_rcvd++;
- if (rdes1 & RDES1_IP_PAYLOAD_ERROR)
+ if (rdes1 & RDES1_IP_PAYLOAD_ERROR) {
x->ip_payload_err++;
+ ret |= csum_none;
+ }
if (message_type == RDES_EXT_NO_PTP)
x->no_ptp_rx_msg_type_ext++;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
index 0cb84a0041a4..d87a8b595e6a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
@@ -268,6 +268,8 @@ static void dwmac4_dma_rx_chan_op_mode(struct stmmac_priv *priv,
mtl_rx_op = readl(ioaddr + MTL_CHAN_RX_OP_MODE(dwmac4_addrs, channel));
+ mtl_rx_op |= MTL_OP_MODE_DIS_TCP_EF;
+
if (mode == SF_DMA_MODE) {
pr_debug("GMAC: enable RX store and forward mode\n");
mtl_rx_op |= MTL_OP_MODE_RSF;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
index 4846bf49c576..467f1a05747e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
@@ -251,7 +251,7 @@ void dwmac_dma_flush_tx_fifo(void __iomem *ioaddr)
void stmmac_set_mac_addr(void __iomem *ioaddr, const u8 addr[6],
unsigned int high, unsigned int low)
{
- unsigned long data;
+ u32 data;
data = (addr[5] << 8) | addr[4];
/* For MAC Addr registers we have to set the Address Enable (AE)
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.c b/drivers/net/ethernet/stmicro/stmmac/hwif.c
index 99635b37044a..3f7c765dcb79 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.c
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.c
@@ -100,7 +100,7 @@ int stmmac_reset(struct stmmac_priv *priv, void __iomem *ioaddr)
return -EINVAL;
if (plat && plat->fix_soc_reset)
- return plat->fix_soc_reset(plat, ioaddr);
+ return plat->fix_soc_reset(priv, ioaddr);
return stmmac_do_callback(priv, dma, reset, ioaddr);
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index cda09cf5dcca..78d6b3737a26 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -289,7 +289,6 @@ struct stmmac_priv {
u32 msg_enable;
int wolopts;
int wol_irq;
- bool wol_irq_disabled;
int clk_csr;
struct timer_list eee_ctrl_timer;
int lpi_irq;
@@ -374,6 +373,18 @@ enum stmmac_state {
STMMAC_SERVICE_SCHED,
};
+extern const struct dev_pm_ops stmmac_simple_pm_ops;
+
+static inline bool stmmac_wol_enabled_mac(struct stmmac_priv *priv)
+{
+ return priv->plat->pmt && device_may_wakeup(priv->device);
+}
+
+static inline bool stmmac_wol_enabled_phy(struct stmmac_priv *priv)
+{
+ return !priv->plat->pmt && device_may_wakeup(priv->device);
+}
+
int stmmac_mdio_unregister(struct net_device *ndev);
int stmmac_mdio_register(struct net_device *ndev);
int stmmac_mdio_reset(struct mii_bus *mii);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index 77758a7299b4..39fa1ec92f82 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -803,7 +803,6 @@ static void stmmac_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
static int stmmac_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
{
struct stmmac_priv *priv = netdev_priv(dev);
- u32 support = WAKE_MAGIC | WAKE_UCAST;
if (!device_can_wakeup(priv->device))
return -EOPNOTSUPP;
@@ -816,29 +815,7 @@ static int stmmac_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
return ret;
}
- /* By default almost all GMAC devices support the WoL via
- * magic frame but we can disable it if the HW capability
- * register shows no support for pmt_magic_frame. */
- if ((priv->hw_cap_support) && (!priv->dma_cap.pmt_magic_frame))
- wol->wolopts &= ~WAKE_MAGIC;
-
- if (wol->wolopts & ~support)
- return -EINVAL;
-
- if (wol->wolopts) {
- pr_info("stmmac: wakeup enable\n");
- device_set_wakeup_enable(priv->device, 1);
- /* Avoid unbalanced enable_irq_wake calls */
- if (priv->wol_irq_disabled)
- enable_irq_wake(priv->wol_irq);
- priv->wol_irq_disabled = false;
- } else {
- device_set_wakeup_enable(priv->device, 0);
- /* Avoid unbalanced disable_irq_wake calls */
- if (!priv->wol_irq_disabled)
- disable_irq_wake(priv->wol_irq);
- priv->wol_irq_disabled = true;
- }
+ device_set_wakeup_enable(priv->device, !!wol->wolopts);
mutex_lock(&priv->lock);
priv->wolopts = wol->wolopts;
@@ -852,9 +829,6 @@ static int stmmac_ethtool_op_get_eee(struct net_device *dev,
{
struct stmmac_priv *priv = netdev_priv(dev);
- if (!priv->dma_cap.eee)
- return -EOPNOTSUPP;
-
return phylink_ethtool_get_eee(priv->phylink, edata);
}
@@ -863,9 +837,6 @@ static int stmmac_ethtool_op_set_eee(struct net_device *dev,
{
struct stmmac_priv *priv = netdev_priv(dev);
- if (!priv->dma_cap.eee)
- return -EOPNOTSUPP;
-
return phylink_ethtool_set_eee(priv->phylink, edata);
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 7b16d1207b80..f0abd99fd137 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -29,6 +29,7 @@
#include <linux/dma-mapping.h>
#include <linux/slab.h>
#include <linux/pm_runtime.h>
+#include <linux/pm_wakeirq.h>
#include <linux/prefetch.h>
#include <linux/pinctrl/consumer.h>
#ifdef CONFIG_DEBUG_FS
@@ -148,33 +149,34 @@ static void stmmac_exit_fs(struct net_device *dev);
int stmmac_bus_clks_config(struct stmmac_priv *priv, bool enabled)
{
- int ret = 0;
+ struct plat_stmmacenet_data *plat_dat = priv->plat;
+ int ret;
if (enabled) {
- ret = clk_prepare_enable(priv->plat->stmmac_clk);
+ ret = clk_prepare_enable(plat_dat->stmmac_clk);
if (ret)
return ret;
- ret = clk_prepare_enable(priv->plat->pclk);
+ ret = clk_prepare_enable(plat_dat->pclk);
if (ret) {
- clk_disable_unprepare(priv->plat->stmmac_clk);
+ clk_disable_unprepare(plat_dat->stmmac_clk);
return ret;
}
- if (priv->plat->clks_config) {
- ret = priv->plat->clks_config(priv->plat->bsp_priv, enabled);
+ if (plat_dat->clks_config) {
+ ret = plat_dat->clks_config(plat_dat->bsp_priv, enabled);
if (ret) {
- clk_disable_unprepare(priv->plat->stmmac_clk);
- clk_disable_unprepare(priv->plat->pclk);
+ clk_disable_unprepare(plat_dat->stmmac_clk);
+ clk_disable_unprepare(plat_dat->pclk);
return ret;
}
}
} else {
- clk_disable_unprepare(priv->plat->stmmac_clk);
- clk_disable_unprepare(priv->plat->pclk);
- if (priv->plat->clks_config)
- priv->plat->clks_config(priv->plat->bsp_priv, enabled);
+ clk_disable_unprepare(plat_dat->stmmac_clk);
+ clk_disable_unprepare(plat_dat->pclk);
+ if (plat_dat->clks_config)
+ plat_dat->clks_config(plat_dat->bsp_priv, enabled);
}
- return ret;
+ return 0;
}
EXPORT_SYMBOL_GPL(stmmac_bus_clks_config);
@@ -3725,7 +3727,6 @@ static int stmmac_request_irq_multi_msi(struct net_device *dev)
/* Request the Wake IRQ in case of another line
* is used for WoL
*/
- priv->wol_irq_disabled = true;
if (priv->wol_irq > 0 && priv->wol_irq != dev->irq) {
int_name = priv->int_name_wol;
sprintf(int_name, "%s:%s", dev->name, "wol");
@@ -3886,7 +3887,6 @@ static int stmmac_request_irq_single(struct net_device *dev)
/* Request the Wake IRQ in case of another line
* is used for WoL
*/
- priv->wol_irq_disabled = true;
if (priv->wol_irq > 0 && priv->wol_irq != dev->irq) {
ret = request_irq(priv->wol_irq, stmmac_interrupt,
IRQF_SHARED, dev->name, dev);
@@ -4140,8 +4140,13 @@ static int stmmac_release(struct net_device *dev)
struct stmmac_priv *priv = netdev_priv(dev);
u32 chan;
+ /* If the PHY or MAC has WoL enabled, then the PHY will not be
+ * suspended when phylink_stop() is called below. Set the PHY
+ * to its slowest speed to save power.
+ */
if (device_may_wakeup(priv->device))
phylink_speed_down(priv->phylink, false);
+
/* Stop and disconnect the PHY */
phylink_stop(priv->phylink);
phylink_disconnect_phy(priv->phylink);
@@ -5735,7 +5740,8 @@ drain_data:
skb->protocol = eth_type_trans(skb, priv->dev);
- if (unlikely(!coe) || !stmmac_has_ip_ethertype(skb))
+ if (unlikely(!coe) || !stmmac_has_ip_ethertype(skb) ||
+ (status & csum_none))
skb_checksum_none_assert(skb);
else
skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -7242,7 +7248,6 @@ static int stmmac_hw_init(struct stmmac_priv *priv)
priv->plat->enh_desc = priv->dma_cap.enh_desc;
priv->plat->pmt = priv->dma_cap.pmt_remote_wake_up &&
!(priv->plat->flags & STMMAC_FLAG_USE_PHY_WOL);
- priv->hw->pmt = priv->plat->pmt;
if (priv->dma_cap.hash_tb_sz) {
priv->hw->multicast_filter_bins =
(BIT(priv->dma_cap.hash_tb_sz) << 5);
@@ -7280,6 +7285,7 @@ static int stmmac_hw_init(struct stmmac_priv *priv)
if (priv->plat->pmt) {
dev_info(priv->device, "Wake-Up On Lan supported\n");
device_set_wakeup_capable(priv->device, 1);
+ devm_pm_set_wake_irq(priv->device, priv->wol_irq);
}
if (priv->dma_cap.tsoen)
@@ -7860,7 +7866,7 @@ int stmmac_suspend(struct device *dev)
priv->plat->serdes_powerdown(ndev, priv->plat->bsp_priv);
/* Enable Power down mode by programming the PMT regs */
- if (device_may_wakeup(priv->device) && priv->plat->pmt) {
+ if (stmmac_wol_enabled_mac(priv)) {
stmmac_pmt(priv, priv->hw, priv->wolopts);
priv->irq_wake = 1;
} else {
@@ -7871,16 +7877,18 @@ int stmmac_suspend(struct device *dev)
mutex_unlock(&priv->lock);
rtnl_lock();
- if (device_may_wakeup(priv->device) && !priv->plat->pmt)
+ if (stmmac_wol_enabled_phy(priv))
phylink_speed_down(priv->phylink, false);
- phylink_suspend(priv->phylink,
- device_may_wakeup(priv->device) && priv->plat->pmt);
+ phylink_suspend(priv->phylink, stmmac_wol_enabled_mac(priv));
rtnl_unlock();
if (stmmac_fpe_supported(priv))
ethtool_mmsv_stop(&priv->fpe_cfg.mmsv);
+ if (priv->plat->suspend)
+ return priv->plat->suspend(dev, priv->plat->bsp_priv);
+
return 0;
}
EXPORT_SYMBOL_GPL(stmmac_suspend);
@@ -7933,6 +7941,12 @@ int stmmac_resume(struct device *dev)
struct stmmac_priv *priv = netdev_priv(ndev);
int ret;
+ if (priv->plat->resume) {
+ ret = priv->plat->resume(dev, priv->plat->bsp_priv);
+ if (ret)
+ return ret;
+ }
+
if (!netif_running(ndev))
return 0;
@@ -7942,7 +7956,7 @@ int stmmac_resume(struct device *dev)
* this bit because it can generate problems while resuming
* from another devices (e.g. serial console).
*/
- if (device_may_wakeup(priv->device) && priv->plat->pmt) {
+ if (stmmac_wol_enabled_mac(priv)) {
mutex_lock(&priv->lock);
stmmac_pmt(priv, priv->hw, 0);
mutex_unlock(&priv->lock);
@@ -7977,7 +7991,14 @@ int stmmac_resume(struct device *dev)
stmmac_free_tx_skbufs(priv);
stmmac_clear_descriptors(priv, &priv->dma_conf);
- stmmac_hw_setup(ndev, false);
+ ret = stmmac_hw_setup(ndev, false);
+ if (ret < 0) {
+ netdev_err(priv->dev, "%s: Hw setup failed\n", __func__);
+ mutex_unlock(&priv->lock);
+ rtnl_unlock();
+ return ret;
+ }
+
stmmac_init_coalesce(priv);
phylink_rx_clk_stop_block(priv->phylink);
stmmac_set_rx_mode(ndev);
@@ -7995,7 +8016,7 @@ int stmmac_resume(struct device *dev)
* workqueue thread, which will race with initialisation.
*/
phylink_resume(priv->phylink);
- if (device_may_wakeup(priv->device) && !priv->plat->pmt)
+ if (stmmac_wol_enabled_phy(priv))
phylink_speed_up(priv->phylink);
rtnl_unlock();
@@ -8006,6 +8027,10 @@ int stmmac_resume(struct device *dev)
}
EXPORT_SYMBOL_GPL(stmmac_resume);
+/* This is not the same as EXPORT_GPL_SIMPLE_DEV_PM_OPS() when CONFIG_PM=n */
+DEFINE_SIMPLE_DEV_PM_OPS(stmmac_simple_pm_ops, stmmac_suspend, stmmac_resume);
+EXPORT_SYMBOL_GPL(stmmac_simple_pm_ops);
+
#ifndef MODULE
static int __init stmmac_cmdline_opt(char *str)
{
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
index 836f2848dfeb..0a302b711bc2 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
@@ -131,12 +131,9 @@ err_disable_clks:
static int stmmac_xgmac2_mdio_read_c22(struct mii_bus *bus, int phyaddr,
int phyreg)
{
- struct net_device *ndev = bus->priv;
- struct stmmac_priv *priv;
+ struct stmmac_priv *priv = netdev_priv(bus->priv);
u32 addr;
- priv = netdev_priv(ndev);
-
/* Until ver 2.20 XGMAC does not support C22 addr >= 4 */
if (priv->synopsys_id < DWXGMAC_CORE_2_20 &&
phyaddr > MII_XGMAC_MAX_C22ADDR)
@@ -150,12 +147,9 @@ static int stmmac_xgmac2_mdio_read_c22(struct mii_bus *bus, int phyaddr,
static int stmmac_xgmac2_mdio_read_c45(struct mii_bus *bus, int phyaddr,
int devad, int phyreg)
{
- struct net_device *ndev = bus->priv;
- struct stmmac_priv *priv;
+ struct stmmac_priv *priv = netdev_priv(bus->priv);
u32 addr;
- priv = netdev_priv(ndev);
-
stmmac_xgmac2_c45_format(priv, phyaddr, devad, phyreg, &addr);
return stmmac_xgmac2_mdio_read(priv, addr, MII_XGMAC_BUSY);
@@ -209,12 +203,9 @@ err_disable_clks:
static int stmmac_xgmac2_mdio_write_c22(struct mii_bus *bus, int phyaddr,
int phyreg, u16 phydata)
{
- struct net_device *ndev = bus->priv;
- struct stmmac_priv *priv;
+ struct stmmac_priv *priv = netdev_priv(bus->priv);
u32 addr;
- priv = netdev_priv(ndev);
-
/* Until ver 2.20 XGMAC does not support C22 addr >= 4 */
if (priv->synopsys_id < DWXGMAC_CORE_2_20 &&
phyaddr > MII_XGMAC_MAX_C22ADDR)
@@ -229,12 +220,9 @@ static int stmmac_xgmac2_mdio_write_c22(struct mii_bus *bus, int phyaddr,
static int stmmac_xgmac2_mdio_write_c45(struct mii_bus *bus, int phyaddr,
int devad, int phyreg, u16 phydata)
{
- struct net_device *ndev = bus->priv;
- struct stmmac_priv *priv;
+ struct stmmac_priv *priv = netdev_priv(bus->priv);
u32 addr;
- priv = netdev_priv(ndev);
-
stmmac_xgmac2_c45_format(priv, phyaddr, devad, phyreg, &addr);
return stmmac_xgmac2_mdio_write(priv, addr, MII_XGMAC_BUSY,
@@ -274,8 +262,7 @@ static int stmmac_mdio_read(struct stmmac_priv *priv, int data, u32 value)
*/
static int stmmac_mdio_read_c22(struct mii_bus *bus, int phyaddr, int phyreg)
{
- struct net_device *ndev = bus->priv;
- struct stmmac_priv *priv = netdev_priv(ndev);
+ struct stmmac_priv *priv = netdev_priv(bus->priv);
u32 value = MII_BUSY;
int data = 0;
@@ -312,25 +299,20 @@ static int stmmac_mdio_read_c22(struct mii_bus *bus, int phyaddr, int phyreg)
static int stmmac_mdio_read_c45(struct mii_bus *bus, int phyaddr, int devad,
int phyreg)
{
- struct net_device *ndev = bus->priv;
- struct stmmac_priv *priv = netdev_priv(ndev);
+ struct stmmac_priv *priv = netdev_priv(bus->priv);
u32 value = MII_BUSY;
int data = 0;
- data = pm_runtime_get_sync(priv->device);
- if (data < 0) {
- pm_runtime_put_noidle(priv->device);
+ data = pm_runtime_resume_and_get(priv->device);
+ if (data < 0)
return data;
- }
value |= (phyaddr << priv->hw->mii.addr_shift)
& priv->hw->mii.addr_mask;
- value |= (phyreg << priv->hw->mii.reg_shift) & priv->hw->mii.reg_mask;
value |= (priv->clk_csr << priv->hw->mii.clk_csr_shift)
& priv->hw->mii.clk_csr_mask;
value |= MII_GMAC4_READ;
value |= MII_GMAC4_C45E;
- value &= ~priv->hw->mii.reg_mask;
value |= (devad << priv->hw->mii.reg_shift) & priv->hw->mii.reg_mask;
data |= phyreg << MII_GMAC4_REG_ADDR_SHIFT;
@@ -373,8 +355,7 @@ static int stmmac_mdio_write(struct stmmac_priv *priv, int data, u32 value)
static int stmmac_mdio_write_c22(struct mii_bus *bus, int phyaddr, int phyreg,
u16 phydata)
{
- struct net_device *ndev = bus->priv;
- struct stmmac_priv *priv = netdev_priv(ndev);
+ struct stmmac_priv *priv = netdev_priv(bus->priv);
int ret, data = phydata;
u32 value = MII_BUSY;
@@ -412,27 +393,22 @@ static int stmmac_mdio_write_c22(struct mii_bus *bus, int phyaddr, int phyreg,
static int stmmac_mdio_write_c45(struct mii_bus *bus, int phyaddr,
int devad, int phyreg, u16 phydata)
{
- struct net_device *ndev = bus->priv;
- struct stmmac_priv *priv = netdev_priv(ndev);
+ struct stmmac_priv *priv = netdev_priv(bus->priv);
int ret, data = phydata;
u32 value = MII_BUSY;
- ret = pm_runtime_get_sync(priv->device);
- if (ret < 0) {
- pm_runtime_put_noidle(priv->device);
+ ret = pm_runtime_resume_and_get(priv->device);
+ if (ret < 0)
return ret;
- }
value |= (phyaddr << priv->hw->mii.addr_shift)
& priv->hw->mii.addr_mask;
- value |= (phyreg << priv->hw->mii.reg_shift) & priv->hw->mii.reg_mask;
value |= (priv->clk_csr << priv->hw->mii.clk_csr_shift)
& priv->hw->mii.clk_csr_mask;
value |= MII_GMAC4_WRITE;
value |= MII_GMAC4_C45E;
- value &= ~priv->hw->mii.reg_mask;
value |= (devad << priv->hw->mii.reg_shift) & priv->hw->mii.reg_mask;
data |= phyreg << MII_GMAC4_REG_ADDR_SHIFT;
@@ -452,8 +428,7 @@ static int stmmac_mdio_write_c45(struct mii_bus *bus, int phyaddr,
int stmmac_mdio_reset(struct mii_bus *bus)
{
#if IS_ENABLED(CONFIG_STMMAC_PLATFORM)
- struct net_device *ndev = bus->priv;
- struct stmmac_priv *priv = netdev_priv(ndev);
+ struct stmmac_priv *priv = netdev_priv(bus->priv);
unsigned int mii_address = priv->hw->mii.addr;
#ifdef CONFIG_OF
@@ -497,12 +472,11 @@ int stmmac_mdio_reset(struct mii_bus *bus)
int stmmac_pcs_setup(struct net_device *ndev)
{
+ struct stmmac_priv *priv = netdev_priv(ndev);
struct fwnode_handle *devnode, *pcsnode;
struct dw_xpcs *xpcs = NULL;
- struct stmmac_priv *priv;
int addr, ret;
- priv = netdev_priv(ndev);
devnode = priv->plat->port_node;
if (priv->plat->pcs_init) {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
index 9c1b54b701f7..e6a7d0ddac2a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
@@ -138,6 +138,37 @@ static const struct stmmac_pci_info snps_gmac5_pci_info = {
.setup = snps_gmac5_default_data,
};
+static int stmmac_pci_suspend(struct device *dev, void *bsp_priv)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ int ret;
+
+ ret = pci_save_state(pdev);
+ if (ret)
+ return ret;
+
+ pci_disable_device(pdev);
+ pci_wake_from_d3(pdev, true);
+ return 0;
+}
+
+static int stmmac_pci_resume(struct device *dev, void *bsp_priv)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ int ret;
+
+ pci_restore_state(pdev);
+ pci_set_power_state(pdev, PCI_D0);
+
+ ret = pci_enable_device(pdev);
+ if (ret)
+ return ret;
+
+ pci_set_master(pdev);
+
+ return 0;
+}
+
/**
* stmmac_pci_probe
*
@@ -217,6 +248,9 @@ static int stmmac_pci_probe(struct pci_dev *pdev,
plat->safety_feat_cfg->prtyen = 1;
plat->safety_feat_cfg->tmouten = 1;
+ plat->suspend = stmmac_pci_suspend;
+ plat->resume = stmmac_pci_resume;
+
return stmmac_dvr_probe(&pdev->dev, plat, &res);
}
@@ -231,43 +265,6 @@ static void stmmac_pci_remove(struct pci_dev *pdev)
stmmac_dvr_remove(&pdev->dev);
}
-static int __maybe_unused stmmac_pci_suspend(struct device *dev)
-{
- struct pci_dev *pdev = to_pci_dev(dev);
- int ret;
-
- ret = stmmac_suspend(dev);
- if (ret)
- return ret;
-
- ret = pci_save_state(pdev);
- if (ret)
- return ret;
-
- pci_disable_device(pdev);
- pci_wake_from_d3(pdev, true);
- return 0;
-}
-
-static int __maybe_unused stmmac_pci_resume(struct device *dev)
-{
- struct pci_dev *pdev = to_pci_dev(dev);
- int ret;
-
- pci_restore_state(pdev);
- pci_set_power_state(pdev, PCI_D0);
-
- ret = pci_enable_device(pdev);
- if (ret)
- return ret;
-
- pci_set_master(pdev);
-
- return stmmac_resume(dev);
-}
-
-static SIMPLE_DEV_PM_OPS(stmmac_pm_ops, stmmac_pci_suspend, stmmac_pci_resume);
-
/* synthetic ID, no official vendor */
#define PCI_VENDOR_ID_STMMAC 0x0700
@@ -289,7 +286,7 @@ static struct pci_driver stmmac_pci_driver = {
.probe = stmmac_pci_probe,
.remove = stmmac_pci_remove,
.driver = {
- .pm = &stmmac_pm_ops,
+ .pm = &stmmac_simple_pm_ops,
},
};
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 030fcf1b5993..a3e077f225d1 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -811,6 +811,22 @@ static void stmmac_pltfr_exit(struct platform_device *pdev,
plat->exit(pdev, plat->bsp_priv);
}
+static int stmmac_plat_suspend(struct device *dev, void *bsp_priv)
+{
+ struct stmmac_priv *priv = netdev_priv(dev_get_drvdata(dev));
+
+ stmmac_pltfr_exit(to_platform_device(dev), priv->plat);
+
+ return 0;
+}
+
+static int stmmac_plat_resume(struct device *dev, void *bsp_priv)
+{
+ struct stmmac_priv *priv = netdev_priv(dev_get_drvdata(dev));
+
+ return stmmac_pltfr_init(to_platform_device(dev), priv->plat);
+}
+
/**
* stmmac_pltfr_probe
* @pdev: platform device pointer
@@ -825,6 +841,11 @@ int stmmac_pltfr_probe(struct platform_device *pdev,
{
int ret;
+ if (!plat->suspend && plat->exit)
+ plat->suspend = stmmac_plat_suspend;
+ if (!plat->resume && plat->init)
+ plat->resume = stmmac_plat_resume;
+
ret = stmmac_pltfr_init(pdev, plat);
if (ret)
return ret;
@@ -886,47 +907,6 @@ void stmmac_pltfr_remove(struct platform_device *pdev)
}
EXPORT_SYMBOL_GPL(stmmac_pltfr_remove);
-/**
- * stmmac_pltfr_suspend
- * @dev: device pointer
- * Description: this function is invoked when suspend the driver and it direcly
- * call the main suspend function and then, if required, on some platform, it
- * can call an exit helper.
- */
-static int __maybe_unused stmmac_pltfr_suspend(struct device *dev)
-{
- int ret;
- struct net_device *ndev = dev_get_drvdata(dev);
- struct stmmac_priv *priv = netdev_priv(ndev);
- struct platform_device *pdev = to_platform_device(dev);
-
- ret = stmmac_suspend(dev);
- stmmac_pltfr_exit(pdev, priv->plat);
-
- return ret;
-}
-
-/**
- * stmmac_pltfr_resume
- * @dev: device pointer
- * Description: this function is invoked when resume the driver before calling
- * the main resume function, on some platforms, it can call own init helper
- * if required.
- */
-static int __maybe_unused stmmac_pltfr_resume(struct device *dev)
-{
- struct net_device *ndev = dev_get_drvdata(dev);
- struct stmmac_priv *priv = netdev_priv(ndev);
- struct platform_device *pdev = to_platform_device(dev);
- int ret;
-
- ret = stmmac_pltfr_init(pdev, priv->plat);
- if (ret)
- return ret;
-
- return stmmac_resume(dev);
-}
-
static int __maybe_unused stmmac_runtime_suspend(struct device *dev)
{
struct net_device *ndev = dev_get_drvdata(dev);
@@ -954,7 +934,7 @@ static int __maybe_unused stmmac_pltfr_noirq_suspend(struct device *dev)
if (!netif_running(ndev))
return 0;
- if (!device_may_wakeup(priv->device) || !priv->plat->pmt) {
+ if (!stmmac_wol_enabled_mac(priv)) {
/* Disable clock in case of PWM is off */
clk_disable_unprepare(priv->plat->clk_ptp_ref);
@@ -975,7 +955,7 @@ static int __maybe_unused stmmac_pltfr_noirq_resume(struct device *dev)
if (!netif_running(ndev))
return 0;
- if (!device_may_wakeup(priv->device) || !priv->plat->pmt) {
+ if (!stmmac_wol_enabled_mac(priv)) {
/* enable the clk previously disabled */
ret = pm_runtime_force_resume(dev);
if (ret)
@@ -994,7 +974,7 @@ static int __maybe_unused stmmac_pltfr_noirq_resume(struct device *dev)
}
const struct dev_pm_ops stmmac_pltfr_pm_ops = {
- SET_SYSTEM_SLEEP_PM_OPS(stmmac_pltfr_suspend, stmmac_pltfr_resume)
+ SET_SYSTEM_SLEEP_PM_OPS(stmmac_suspend, stmmac_resume)
SET_RUNTIME_PM_OPS(stmmac_runtime_suspend, stmmac_runtime_resume, NULL)
SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(stmmac_pltfr_noirq_suspend, stmmac_pltfr_noirq_resume)
};
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
index 3767ba495e78..ecbff20771f4 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
@@ -10,6 +10,8 @@
#include "stmmac.h"
#include "stmmac_ptp.h"
+#define PTP_SAFE_TIME_OFFSET_NS 500000
+
/**
* stmmac_adjust_freq
*
@@ -171,7 +173,11 @@ static int stmmac_enable(struct ptp_clock_info *ptp,
u32 acr_value;
switch (rq->type) {
- case PTP_CLK_REQ_PEROUT:
+ case PTP_CLK_REQ_PEROUT: {
+ struct timespec64 curr_time;
+ u64 target_ns = 0;
+ u64 ns = 0;
+
/* Reject requests with unsupported flags */
if (rq->perout.flags)
return -EOPNOTSUPP;
@@ -180,6 +186,31 @@ static int stmmac_enable(struct ptp_clock_info *ptp,
cfg->start.tv_sec = rq->perout.start.sec;
cfg->start.tv_nsec = rq->perout.start.nsec;
+
+ /* A time set in the past won't trigger the start of the flexible PPS generation for
+ * the GMAC5. For some reason it does for the GMAC4 but setting a time in the past
+ * should be addressed anyway. Therefore, any value set it the past is considered as
+ * an offset compared to the current MAC system time.
+ * Be aware that an offset too low may not trigger flexible PPS generation
+ * if time spent in this configuration makes the targeted time already outdated.
+ * To address this, add a safe time offset.
+ */
+ if (!cfg->start.tv_sec && cfg->start.tv_nsec < PTP_SAFE_TIME_OFFSET_NS)
+ cfg->start.tv_nsec += PTP_SAFE_TIME_OFFSET_NS;
+
+ target_ns = cfg->start.tv_nsec + ((u64)cfg->start.tv_sec * NSEC_PER_SEC);
+
+ stmmac_get_systime(priv, priv->ptpaddr, &ns);
+ if (ns > TIME64_MAX - PTP_SAFE_TIME_OFFSET_NS)
+ return -EINVAL;
+
+ curr_time = ns_to_timespec64(ns);
+ if (target_ns < ns + PTP_SAFE_TIME_OFFSET_NS) {
+ cfg->start = timespec64_add_safe(cfg->start, curr_time);
+ if (cfg->start.tv_sec == TIME64_MAX)
+ return -EINVAL;
+ }
+
cfg->period.tv_sec = rq->perout.period.sec;
cfg->period.tv_nsec = rq->perout.period.nsec;
@@ -190,6 +221,7 @@ static int stmmac_enable(struct ptp_clock_info *ptp,
priv->systime_flags);
write_unlock_irqrestore(&priv->ptp_lock, flags);
break;
+ }
case PTP_CLK_REQ_EXTTS: {
u8 channel;
diff --git a/drivers/net/ethernet/wangxun/Kconfig b/drivers/net/ethernet/wangxun/Kconfig
index 424ec3212128..d138dea7d208 100644
--- a/drivers/net/ethernet/wangxun/Kconfig
+++ b/drivers/net/ethernet/wangxun/Kconfig
@@ -20,6 +20,7 @@ config LIBWX
tristate
depends on PTP_1588_CLOCK_OPTIONAL
select PAGE_POOL
+ select DIMLIB
help
Common library for Wangxun(R) Ethernet drivers.
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c b/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c
index c12a4cb951f6..9572b9f28e59 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c
@@ -303,6 +303,11 @@ int wx_get_coalesce(struct net_device *netdev,
else
ec->rx_coalesce_usecs = wx->rx_itr_setting >> 2;
+ if (wx->adaptive_itr) {
+ ec->use_adaptive_rx_coalesce = 1;
+ ec->use_adaptive_tx_coalesce = 1;
+ }
+
/* if in mixed tx/rx queues per vector mode, report only rx settings */
if (wx->q_vector[0]->tx.count && wx->q_vector[0]->rx.count)
return 0;
@@ -334,19 +339,28 @@ int wx_set_coalesce(struct net_device *netdev,
return -EOPNOTSUPP;
}
- if (ec->tx_max_coalesced_frames_irq)
- wx->tx_work_limit = ec->tx_max_coalesced_frames_irq;
+ if (ec->tx_max_coalesced_frames_irq > U16_MAX ||
+ !ec->tx_max_coalesced_frames_irq)
+ return -EINVAL;
+
+ wx->tx_work_limit = ec->tx_max_coalesced_frames_irq;
switch (wx->mac.type) {
case wx_mac_sp:
max_eitr = WX_SP_MAX_EITR;
+ rx_itr_param = WX_20K_ITR;
+ tx_itr_param = WX_12K_ITR;
break;
case wx_mac_aml:
case wx_mac_aml40:
max_eitr = WX_AML_MAX_EITR;
+ rx_itr_param = WX_20K_ITR;
+ tx_itr_param = WX_12K_ITR;
break;
default:
max_eitr = WX_EM_MAX_EITR;
+ rx_itr_param = WX_7K_ITR;
+ tx_itr_param = WX_7K_ITR;
break;
}
@@ -354,36 +368,37 @@ int wx_set_coalesce(struct net_device *netdev,
(ec->tx_coalesce_usecs > (max_eitr >> 2)))
return -EINVAL;
+ if (ec->use_adaptive_rx_coalesce) {
+ wx->adaptive_itr = true;
+ wx->rx_itr_setting = 1;
+ wx->tx_itr_setting = 1;
+ return 0;
+ }
+
if (ec->rx_coalesce_usecs > 1)
wx->rx_itr_setting = ec->rx_coalesce_usecs << 2;
else
wx->rx_itr_setting = ec->rx_coalesce_usecs;
- if (wx->rx_itr_setting == 1)
- rx_itr_param = WX_20K_ITR;
- else
- rx_itr_param = wx->rx_itr_setting;
-
if (ec->tx_coalesce_usecs > 1)
wx->tx_itr_setting = ec->tx_coalesce_usecs << 2;
else
wx->tx_itr_setting = ec->tx_coalesce_usecs;
- if (wx->tx_itr_setting == 1) {
- switch (wx->mac.type) {
- case wx_mac_sp:
- case wx_mac_aml:
- case wx_mac_aml40:
- tx_itr_param = WX_12K_ITR;
- break;
- default:
- tx_itr_param = WX_20K_ITR;
- break;
- }
- } else {
- tx_itr_param = wx->tx_itr_setting;
+ if (wx->adaptive_itr) {
+ wx->adaptive_itr = false;
+ wx->rx_itr_setting = rx_itr_param;
+ wx->tx_itr_setting = tx_itr_param;
+ } else if (wx->rx_itr_setting == 1 || wx->tx_itr_setting == 1) {
+ wx->adaptive_itr = true;
}
+ if (wx->rx_itr_setting != 1)
+ rx_itr_param = wx->rx_itr_setting;
+
+ if (wx->tx_itr_setting != 1)
+ tx_itr_param = wx->tx_itr_setting;
+
/* mixed Rx/Tx */
if (wx->q_vector[0]->tx.count && wx->q_vector[0]->rx.count)
wx->tx_itr_setting = wx->rx_itr_setting;
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c
index 723785ef87bb..5086db060c61 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c
@@ -16,6 +16,7 @@
#include "wx_lib.h"
#include "wx_ptp.h"
#include "wx_hw.h"
+#include "wx_vf_lib.h"
/* Lookup table mapping the HW PTYPE to the bit field for decoding */
static struct wx_dec_ptype wx_ptype_lookup[256] = {
@@ -832,6 +833,36 @@ static bool wx_clean_tx_irq(struct wx_q_vector *q_vector,
return !!budget;
}
+static void wx_update_rx_dim_sample(struct wx_q_vector *q_vector)
+{
+ struct dim_sample sample = {};
+
+ dim_update_sample(q_vector->total_events,
+ q_vector->rx.total_packets,
+ q_vector->rx.total_bytes,
+ &sample);
+
+ net_dim(&q_vector->rx.dim, &sample);
+}
+
+static void wx_update_tx_dim_sample(struct wx_q_vector *q_vector)
+{
+ struct dim_sample sample = {};
+
+ dim_update_sample(q_vector->total_events,
+ q_vector->tx.total_packets,
+ q_vector->tx.total_bytes,
+ &sample);
+
+ net_dim(&q_vector->tx.dim, &sample);
+}
+
+static void wx_update_dim_sample(struct wx_q_vector *q_vector)
+{
+ wx_update_rx_dim_sample(q_vector);
+ wx_update_tx_dim_sample(q_vector);
+}
+
/**
* wx_poll - NAPI polling RX/TX cleanup routine
* @napi: napi struct with our devices info in it
@@ -878,6 +909,8 @@ static int wx_poll(struct napi_struct *napi, int budget)
/* all work done, exit the polling mode */
if (likely(napi_complete_done(napi, work_done))) {
+ if (wx->adaptive_itr)
+ wx_update_dim_sample(q_vector);
if (netif_running(wx->netdev))
wx_intr_enable(wx, WX_INTR_Q(q_vector->v_idx));
}
@@ -1591,6 +1624,65 @@ netdev_tx_t wx_xmit_frame(struct sk_buff *skb,
}
EXPORT_SYMBOL(wx_xmit_frame);
+static void wx_set_itr(struct wx_q_vector *q_vector)
+{
+ struct wx *wx = q_vector->wx;
+ u32 new_itr;
+
+ if (!wx->adaptive_itr)
+ return;
+
+ /* use the smallest value of new ITR delay calculations */
+ new_itr = min(q_vector->rx.itr, q_vector->tx.itr);
+ new_itr <<= 2;
+
+ if (new_itr != q_vector->itr) {
+ /* save the algorithm value here */
+ q_vector->itr = new_itr;
+
+ if (wx->pdev->is_virtfn)
+ wx_write_eitr_vf(q_vector);
+ else
+ wx_write_eitr(q_vector);
+ }
+}
+
+static void wx_rx_dim_work(struct work_struct *work)
+{
+ struct dim *dim = container_of(work, struct dim, work);
+ struct dim_cq_moder rx_moder;
+ struct wx_ring_container *rx;
+ struct wx_q_vector *q_vector;
+
+ rx = container_of(dim, struct wx_ring_container, dim);
+
+ rx_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
+ rx->itr = rx_moder.usec;
+
+ q_vector = container_of(rx, struct wx_q_vector, rx);
+ wx_set_itr(q_vector);
+
+ dim->state = DIM_START_MEASURE;
+}
+
+static void wx_tx_dim_work(struct work_struct *work)
+{
+ struct dim *dim = container_of(work, struct dim, work);
+ struct dim_cq_moder tx_moder;
+ struct wx_ring_container *tx;
+ struct wx_q_vector *q_vector;
+
+ tx = container_of(dim, struct wx_ring_container, dim);
+
+ tx_moder = net_dim_get_tx_moderation(dim->mode, dim->profile_ix);
+ tx->itr = tx_moder.usec;
+
+ q_vector = container_of(tx, struct wx_q_vector, tx);
+ wx_set_itr(q_vector);
+
+ dim->state = DIM_START_MEASURE;
+}
+
void wx_napi_enable_all(struct wx *wx)
{
struct wx_q_vector *q_vector;
@@ -1598,6 +1690,11 @@ void wx_napi_enable_all(struct wx *wx)
for (q_idx = 0; q_idx < wx->num_q_vectors; q_idx++) {
q_vector = wx->q_vector[q_idx];
+
+ INIT_WORK(&q_vector->rx.dim.work, wx_rx_dim_work);
+ INIT_WORK(&q_vector->tx.dim.work, wx_tx_dim_work);
+ q_vector->rx.dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_CQE;
+ q_vector->tx.dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_CQE;
napi_enable(&q_vector->napi);
}
}
@@ -1611,6 +1708,8 @@ void wx_napi_disable_all(struct wx *wx)
for (q_idx = 0; q_idx < wx->num_q_vectors; q_idx++) {
q_vector = wx->q_vector[q_idx];
napi_disable(&q_vector->napi);
+ disable_work_sync(&q_vector->rx.dim.work);
+ disable_work_sync(&q_vector->tx.dim.work);
}
}
EXPORT_SYMBOL(wx_napi_disable_all);
@@ -2197,8 +2296,10 @@ irqreturn_t wx_msix_clean_rings(int __always_unused irq, void *data)
struct wx_q_vector *q_vector = data;
/* EIAM disabled interrupts (on this vector) for us */
- if (q_vector->rx.ring || q_vector->tx.ring)
+ if (q_vector->rx.ring || q_vector->tx.ring) {
napi_schedule_irqoff(&q_vector->napi);
+ q_vector->total_events++;
+ }
return IRQ_HANDLED;
}
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h
index 9d5d10f9e410..ec63e7ec8b24 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_type.h
+++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h
@@ -10,6 +10,7 @@
#include <linux/netdevice.h>
#include <linux/if_vlan.h>
#include <linux/phylink.h>
+#include <linux/dim.h>
#include <net/ip.h>
#define WX_NCSI_SUP 0x8000
@@ -1033,6 +1034,7 @@ struct wx_ring_container {
unsigned int total_packets; /* total packets processed this int */
u8 count; /* total number of rings in vector */
u8 itr; /* current ITR setting for ring */
+ struct dim dim; /* data for net_dim algorithm */
};
struct wx_ring {
struct wx_ring *next; /* pointer to next ring in q_vector */
@@ -1089,6 +1091,8 @@ struct wx_q_vector {
struct napi_struct napi;
struct rcu_head rcu; /* to avoid race with update stats on free */
+ u16 total_events; /* number of interrupts processed */
+
char name[IFNAMSIZ + 17];
/* for dynamic allocation of rings associated with this q_vector */
@@ -1268,6 +1272,7 @@ struct wx {
int num_rx_queues;
u16 rx_itr_setting;
u16 rx_work_limit;
+ bool adaptive_itr;
int num_q_vectors; /* current number of q_vectors for device */
int max_q_vectors; /* upper limit of q_vectors for device */
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_vf.h b/drivers/net/ethernet/wangxun/libwx/wx_vf.h
index fec1126703e3..3f16de0fa427 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_vf.h
+++ b/drivers/net/ethernet/wangxun/libwx/wx_vf.h
@@ -4,6 +4,7 @@
#ifndef _WX_VF_H_
#define _WX_VF_H_
+/* Control registers */
#define WX_VF_MAX_RING_NUMS 8
#define WX_VX_PF_BME 0x4B8
#define WX_VF_BME_ENABLE BIT(0)
@@ -12,16 +13,32 @@
#define WX_VXCTRL_RST BIT(0)
#define WX_VXMRQC 0x78
+#define WX_VXMRQC_PSR_L4HDR BIT(0)
+#define WX_VXMRQC_PSR_L3HDR BIT(1)
+#define WX_VXMRQC_PSR_L2HDR BIT(2)
+#define WX_VXMRQC_PSR_TUNHDR BIT(3)
+#define WX_VXMRQC_PSR_TUNMAC BIT(4)
+#define WX_VXMRQC_PSR_MASK GENMASK(5, 1)
+#define WX_VXMRQC_PSR(f) FIELD_PREP(GENMASK(5, 1), f)
+#define WX_VXMRQC_RSS_HASH(f) FIELD_PREP(GENMASK(15, 13), f)
+#define WX_VXMRQC_RSS_MASK GENMASK(31, 16)
+#define WX_VXMRQC_RSS(f) FIELD_PREP(GENMASK(31, 16), f)
+#define WX_VXMRQC_RSS_ALG_IPV4_TCP BIT(0)
+#define WX_VXMRQC_RSS_ALG_IPV4 BIT(1)
+#define WX_VXMRQC_RSS_ALG_IPV6 BIT(4)
+#define WX_VXMRQC_RSS_ALG_IPV6_TCP BIT(5)
+#define WX_VXMRQC_RSS_EN BIT(8)
+
+#define WX_VXRSSRK(i) (0x80 + ((i) * 4)) /* i=[0,9] */
+#define WX_VXRETA(i) (0xC0 + ((i) * 4)) /* i=[0,15] */
+
+/* Interrupt registers */
#define WX_VXICR 0x100
#define WX_VXIMS 0x108
#define WX_VXIMC 0x10C
#define WX_VF_IRQ_CLEAR_MASK 7
#define WX_VF_MAX_TX_QUEUES 4
#define WX_VF_MAX_RX_QUEUES 4
-#define WX_VXTXDCTL(r) (0x3010 + (0x40 * (r)))
-#define WX_VXRXDCTL(r) (0x1010 + (0x40 * (r)))
-#define WX_VXRXDCTL_ENABLE BIT(0)
-#define WX_VXTXDCTL_FLUSH BIT(26)
#define WX_VXITR(i) (0x200 + (4 * (i))) /* i=[0,1] */
#define WX_VXITR_MASK GENMASK(8, 0)
@@ -29,16 +46,6 @@
#define WX_VXIVAR_MISC 0x260
#define WX_VXIVAR(i) (0x240 + (4 * (i))) /* i=[0,3] */
-#define WX_VXRXDCTL_RSCMAX(f) FIELD_PREP(GENMASK(24, 23), f)
-#define WX_VXRXDCTL_BUFLEN(f) FIELD_PREP(GENMASK(6, 1), f)
-#define WX_VXRXDCTL_BUFSZ(f) FIELD_PREP(GENMASK(11, 8), f)
-#define WX_VXRXDCTL_HDRSZ(f) FIELD_PREP(GENMASK(15, 12), f)
-
-#define WX_VXRXDCTL_RSCMAX_MASK GENMASK(24, 23)
-#define WX_VXRXDCTL_BUFLEN_MASK GENMASK(6, 1)
-#define WX_VXRXDCTL_BUFSZ_MASK GENMASK(11, 8)
-#define WX_VXRXDCTL_HDRSZ_MASK GENMASK(15, 12)
-
#define wx_conf_size(v, mwidth, uwidth) ({ \
typeof(v) _v = (v); \
(_v == 2 << (mwidth) ? 0 : _v >> (uwidth)); \
@@ -59,44 +66,35 @@
#define WX_VXRDBAH(r) (0x1004 + (0x40 * (r)))
#define WX_VXRDT(r) (0x1008 + (0x40 * (r)))
#define WX_VXRDH(r) (0x100C + (0x40 * (r)))
-
+#define WX_VXRXDCTL(r) (0x1010 + (0x40 * (r)))
+#define WX_VXRXDCTL_ENABLE BIT(0)
+#define WX_VXRXDCTL_BUFLEN_MASK GENMASK(6, 1)
+#define WX_VXRXDCTL_BUFLEN(f) FIELD_PREP(GENMASK(6, 1), f)
+#define WX_VXRXDCTL_BUFSZ_MASK GENMASK(11, 8)
+#define WX_VXRXDCTL_BUFSZ(f) FIELD_PREP(GENMASK(11, 8), f)
+#define WX_VXRXDCTL_HDRSZ_MASK GENMASK(15, 12)
+#define WX_VXRXDCTL_HDRSZ(f) FIELD_PREP(GENMASK(15, 12), f)
+#define WX_VXRXDCTL_RSCMAX_MASK GENMASK(24, 23)
+#define WX_VXRXDCTL_RSCMAX(f) FIELD_PREP(GENMASK(24, 23), f)
#define WX_VXRXDCTL_RSCEN BIT(29)
#define WX_VXRXDCTL_DROP BIT(30)
#define WX_VXRXDCTL_VLAN BIT(31)
+/* Transimit Path */
#define WX_VXTDBAL(r) (0x3000 + (0x40 * (r)))
#define WX_VXTDBAH(r) (0x3004 + (0x40 * (r)))
#define WX_VXTDT(r) (0x3008 + (0x40 * (r)))
#define WX_VXTDH(r) (0x300C + (0x40 * (r)))
-
+#define WX_VXTXDCTL(r) (0x3010 + (0x40 * (r)))
#define WX_VXTXDCTL_ENABLE BIT(0)
#define WX_VXTXDCTL_BUFLEN(f) FIELD_PREP(GENMASK(6, 1), f)
#define WX_VXTXDCTL_PTHRESH(f) FIELD_PREP(GENMASK(11, 8), f)
#define WX_VXTXDCTL_WTHRESH(f) FIELD_PREP(GENMASK(22, 16), f)
-
-#define WX_VXMRQC_PSR(f) FIELD_PREP(GENMASK(5, 1), f)
-#define WX_VXMRQC_PSR_MASK GENMASK(5, 1)
-#define WX_VXMRQC_PSR_L4HDR BIT(0)
-#define WX_VXMRQC_PSR_L3HDR BIT(1)
-#define WX_VXMRQC_PSR_L2HDR BIT(2)
-#define WX_VXMRQC_PSR_TUNHDR BIT(3)
-#define WX_VXMRQC_PSR_TUNMAC BIT(4)
-
-#define WX_VXRSSRK(i) (0x80 + ((i) * 4)) /* i=[0,9] */
-#define WX_VXRETA(i) (0xC0 + ((i) * 4)) /* i=[0,15] */
-
-#define WX_VXMRQC_RSS(f) FIELD_PREP(GENMASK(31, 16), f)
-#define WX_VXMRQC_RSS_MASK GENMASK(31, 16)
-#define WX_VXMRQC_RSS_ALG_IPV4_TCP BIT(0)
-#define WX_VXMRQC_RSS_ALG_IPV4 BIT(1)
-#define WX_VXMRQC_RSS_ALG_IPV6 BIT(4)
-#define WX_VXMRQC_RSS_ALG_IPV6_TCP BIT(5)
-#define WX_VXMRQC_RSS_EN BIT(8)
-#define WX_VXMRQC_RSS_HASH(f) FIELD_PREP(GENMASK(15, 13), f)
+#define WX_VXTXDCTL_FLUSH BIT(26)
#define WX_PFLINK_STATUS(g) FIELD_GET(BIT(0), g)
#define WX_PFLINK_SPEED(g) FIELD_GET(GENMASK(31, 1), g)
-#define WX_VXSTATUS_SPEED(g) FIELD_GET(GENMASK(4, 1), g)
+#define WX_VXSTATUS_SPEED(g) FIELD_GET(GENMASK(4, 1), g)
struct wx_link_reg_fields {
u32 mac_type;
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.c
index 3023ea2732ef..a87887b9f8ee 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.c
@@ -10,7 +10,7 @@
#include "wx_vf.h"
#include "wx_vf_lib.h"
-static void wx_write_eitr_vf(struct wx_q_vector *q_vector)
+void wx_write_eitr_vf(struct wx_q_vector *q_vector)
{
struct wx *wx = q_vector->wx;
int v_idx = q_vector->v_idx;
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.h b/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.h
index 43ea126b79eb..a4bd23c92800 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.h
+++ b/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.h
@@ -4,6 +4,7 @@
#ifndef _WX_VF_LIB_H_
#define _WX_VF_LIB_H_
+void wx_write_eitr_vf(struct wx_q_vector *q_vector);
void wx_configure_msix_vf(struct wx *wx);
int wx_write_uc_addr_list_vf(struct net_device *netdev);
void wx_setup_psrtype_vf(struct wx *wx);
diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c
index 7e2d9ec38a30..4363bab33496 100644
--- a/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c
+++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c
@@ -115,7 +115,8 @@ static int ngbe_set_channels(struct net_device *dev,
static const struct ethtool_ops ngbe_ethtool_ops = {
.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
- ETHTOOL_COALESCE_TX_MAX_FRAMES_IRQ,
+ ETHTOOL_COALESCE_TX_MAX_FRAMES_IRQ |
+ ETHTOOL_COALESCE_USE_ADAPTIVE,
.get_drvinfo = wx_get_drvinfo,
.get_link = ethtool_op_get_link,
.get_link_ksettings = wx_get_link_ksettings,
diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
index e0fc897b0a58..58488e138beb 100644
--- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
+++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
@@ -119,9 +119,9 @@ static int ngbe_sw_init(struct wx *wx)
num_online_cpus());
wx->rss_enabled = true;
- /* enable itr by default in dynamic mode */
- wx->rx_itr_setting = 1;
- wx->tx_itr_setting = 1;
+ wx->adaptive_itr = false;
+ wx->rx_itr_setting = WX_7K_ITR;
+ wx->tx_itr_setting = WX_7K_ITR;
/* set default ring sizes */
wx->tx_ring_count = NGBE_DEFAULT_TXD;
diff --git a/drivers/net/ethernet/wangxun/ngbevf/ngbevf_main.c b/drivers/net/ethernet/wangxun/ngbevf/ngbevf_main.c
index c1246ab5239c..5f9ddb5e5403 100644
--- a/drivers/net/ethernet/wangxun/ngbevf/ngbevf_main.c
+++ b/drivers/net/ethernet/wangxun/ngbevf/ngbevf_main.c
@@ -100,6 +100,7 @@ static int ngbevf_sw_init(struct wx *wx)
wx->mac.max_tx_queues = NGBEVF_MAX_TX_QUEUES;
wx->mac.max_rx_queues = NGBEVF_MAX_RX_QUEUES;
/* Enable dynamic interrupt throttling rates */
+ wx->adaptive_itr = true;
wx->rx_itr_setting = 1;
wx->tx_itr_setting = 1;
/* set default ring sizes */
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c
index a4753402660e..b496ec502fed 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c
@@ -538,7 +538,8 @@ static int txgbe_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
static const struct ethtool_ops txgbe_ethtool_ops = {
.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
- ETHTOOL_COALESCE_TX_MAX_FRAMES_IRQ,
+ ETHTOOL_COALESCE_TX_MAX_FRAMES_IRQ |
+ ETHTOOL_COALESCE_USE_ADAPTIVE,
.get_drvinfo = wx_get_drvinfo,
.nway_reset = wx_nway_reset,
.get_link = ethtool_op_get_link,
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
index a5867f3c93fc..c4c4d70d8466 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
@@ -401,6 +401,7 @@ static int txgbe_sw_init(struct wx *wx)
set_bit(WX_FLAG_MULTI_64_FUNC, wx->flags);
/* enable itr by default in dynamic mode */
+ wx->adaptive_itr = true;
wx->rx_itr_setting = 1;
wx->tx_itr_setting = 1;
diff --git a/drivers/net/ethernet/wangxun/txgbevf/txgbevf_main.c b/drivers/net/ethernet/wangxun/txgbevf/txgbevf_main.c
index ebfce3cf753e..3755bb399f71 100644
--- a/drivers/net/ethernet/wangxun/txgbevf/txgbevf_main.c
+++ b/drivers/net/ethernet/wangxun/txgbevf/txgbevf_main.c
@@ -144,6 +144,7 @@ static int txgbevf_sw_init(struct wx *wx)
wx->mac.max_tx_queues = TXGBEVF_MAX_TX_QUEUES;
wx->mac.max_rx_queues = TXGBEVF_MAX_RX_QUEUES;
/* Enable dynamic interrupt throttling rates */
+ wx->adaptive_itr = true;
wx->rx_itr_setting = 1;
wx->tx_itr_setting = 1;
/* set default ring sizes */
diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index 4b668ebaa0f7..5cb59d72bc82 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -21,9 +21,10 @@
#include <linux/file.h>
#include <linux/gtp.h>
+#include <net/flow.h>
+#include <net/inet_dscp.h>
#include <net/net_namespace.h>
#include <net/protocol.h>
-#include <net/inet_dscp.h>
#include <net/inet_sock.h>
#include <net/ip.h>
#include <net/ipv6.h>
@@ -352,7 +353,7 @@ static struct rtable *ip4_route_output_gtp(struct flowi4 *fl4,
fl4->flowi4_oif = sk->sk_bound_dev_if;
fl4->daddr = daddr;
fl4->saddr = saddr;
- fl4->flowi4_tos = inet_dscp_to_dsfield(inet_sk_dscp(inet_sk(sk)));
+ fl4->flowi4_dscp = inet_sk_dscp(inet_sk(sk));
fl4->flowi4_scope = ip_sock_rt_scope(sk);
fl4->flowi4_proto = sk->sk_protocol;
@@ -2401,7 +2402,7 @@ static int gtp_genl_send_echo_req(struct sk_buff *skb, struct genl_info *info)
udp_tunnel_xmit_skb(rt, sk, skb_to_send,
fl4.saddr, fl4.daddr,
- fl4.flowi4_tos,
+ inet_dscp_to_dsfield(fl4.flowi4_dscp),
ip4_dst_hoplimit(&rt->dst),
0,
port, port,
diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index e3e65772c599..d7e3ddbcab6f 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -2,7 +2,7 @@
/* Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com>
*/
-#include <net/inet_dscp.h>
+#include <net/flow.h>
#include <net/ip.h>
#include "ipvlan.h"
@@ -433,7 +433,7 @@ static noinline_for_stack int ipvlan_process_v4_outbound(struct sk_buff *skb)
ip4h = ip_hdr(skb);
fl4.daddr = ip4h->daddr;
fl4.saddr = ip4h->saddr;
- fl4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(ip4h));
+ fl4.flowi4_dscp = ip4h_dscp(ip4h);
rt = ip_route_output_flow(net, &fl4, NULL);
if (IS_ERR(rt))
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index 01329fe7451a..0b21818e4925 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -1583,9 +1583,6 @@ static struct macsec_tx_sa *get_txsa_from_nl(struct net *net,
if (IS_ERR(dev))
return ERR_CAST(dev);
- if (*assoc_num >= MACSEC_NUM_AN)
- return ERR_PTR(-EINVAL);
-
secy = &macsec_priv(dev)->secy;
tx_sc = &secy->tx_sc;
@@ -1646,8 +1643,6 @@ static struct macsec_rx_sa *get_rxsa_from_nl(struct net *net,
return ERR_PTR(-EINVAL);
*assoc_num = nla_get_u8(tb_sa[MACSEC_SA_ATTR_AN]);
- if (*assoc_num >= MACSEC_NUM_AN)
- return ERR_PTR(-EINVAL);
rx_sc = get_rxsc_from_nl(net, attrs, tb_rxsc, devp, secyp);
if (IS_ERR(rx_sc))
@@ -1670,24 +1665,21 @@ static const struct nla_policy macsec_genl_policy[NUM_MACSEC_ATTR] = {
static const struct nla_policy macsec_genl_rxsc_policy[NUM_MACSEC_RXSC_ATTR] = {
[MACSEC_RXSC_ATTR_SCI] = { .type = NLA_U64 },
- [MACSEC_RXSC_ATTR_ACTIVE] = { .type = NLA_U8 },
+ [MACSEC_RXSC_ATTR_ACTIVE] = NLA_POLICY_MAX(NLA_U8, 1),
};
static const struct nla_policy macsec_genl_sa_policy[NUM_MACSEC_SA_ATTR] = {
- [MACSEC_SA_ATTR_AN] = { .type = NLA_U8 },
- [MACSEC_SA_ATTR_ACTIVE] = { .type = NLA_U8 },
- [MACSEC_SA_ATTR_PN] = NLA_POLICY_MIN_LEN(4),
- [MACSEC_SA_ATTR_KEYID] = { .type = NLA_BINARY,
- .len = MACSEC_KEYID_LEN, },
- [MACSEC_SA_ATTR_KEY] = { .type = NLA_BINARY,
- .len = MACSEC_MAX_KEY_LEN, },
+ [MACSEC_SA_ATTR_AN] = NLA_POLICY_MAX(NLA_U8, MACSEC_NUM_AN - 1),
+ [MACSEC_SA_ATTR_ACTIVE] = NLA_POLICY_MAX(NLA_U8, 1),
+ [MACSEC_SA_ATTR_PN] = NLA_POLICY_MIN(NLA_UINT, 1),
+ [MACSEC_SA_ATTR_KEYID] = NLA_POLICY_EXACT_LEN(MACSEC_KEYID_LEN),
+ [MACSEC_SA_ATTR_KEY] = NLA_POLICY_MAX_LEN(MACSEC_MAX_KEY_LEN),
[MACSEC_SA_ATTR_SSCI] = { .type = NLA_U32 },
- [MACSEC_SA_ATTR_SALT] = { .type = NLA_BINARY,
- .len = MACSEC_SALT_LEN, },
+ [MACSEC_SA_ATTR_SALT] = NLA_POLICY_EXACT_LEN(MACSEC_SALT_LEN),
};
static const struct nla_policy macsec_genl_offload_policy[NUM_MACSEC_OFFLOAD_ATTR] = {
- [MACSEC_OFFLOAD_ATTR_TYPE] = { .type = NLA_U8 },
+ [MACSEC_OFFLOAD_ATTR_TYPE] = NLA_POLICY_MAX(NLA_U8, MACSEC_OFFLOAD_MAX),
};
/* Offloads an operation to a device driver */
@@ -1739,21 +1731,6 @@ static bool validate_add_rxsa(struct nlattr **attrs)
!attrs[MACSEC_SA_ATTR_KEYID])
return false;
- if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN)
- return false;
-
- if (attrs[MACSEC_SA_ATTR_PN] &&
- nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0)
- return false;
-
- if (attrs[MACSEC_SA_ATTR_ACTIVE]) {
- if (nla_get_u8(attrs[MACSEC_SA_ATTR_ACTIVE]) > 1)
- return false;
- }
-
- if (nla_len(attrs[MACSEC_SA_ATTR_KEYID]) != MACSEC_KEYID_LEN)
- return false;
-
return true;
}
@@ -1812,14 +1789,6 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info)
rtnl_unlock();
return -EINVAL;
}
-
- if (nla_len(tb_sa[MACSEC_SA_ATTR_SALT]) != MACSEC_SALT_LEN) {
- pr_notice("macsec: nl: add_rxsa: bad salt length: %d != %d\n",
- nla_len(tb_sa[MACSEC_SA_ATTR_SALT]),
- MACSEC_SALT_LEN);
- rtnl_unlock();
- return -EINVAL;
- }
}
rx_sa = rtnl_dereference(rx_sc->sa[assoc_num]);
@@ -1895,19 +1864,6 @@ cleanup:
return err;
}
-static bool validate_add_rxsc(struct nlattr **attrs)
-{
- if (!attrs[MACSEC_RXSC_ATTR_SCI])
- return false;
-
- if (attrs[MACSEC_RXSC_ATTR_ACTIVE]) {
- if (nla_get_u8(attrs[MACSEC_RXSC_ATTR_ACTIVE]) > 1)
- return false;
- }
-
- return true;
-}
-
static int macsec_add_rxsc(struct sk_buff *skb, struct genl_info *info)
{
struct net_device *dev;
@@ -1925,7 +1881,7 @@ static int macsec_add_rxsc(struct sk_buff *skb, struct genl_info *info)
if (parse_rxsc_config(attrs, tb_rxsc))
return -EINVAL;
- if (!validate_add_rxsc(tb_rxsc))
+ if (!tb_rxsc[MACSEC_RXSC_ATTR_SCI])
return -EINVAL;
rtnl_lock();
@@ -1984,20 +1940,6 @@ static bool validate_add_txsa(struct nlattr **attrs)
!attrs[MACSEC_SA_ATTR_KEYID])
return false;
- if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN)
- return false;
-
- if (nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0)
- return false;
-
- if (attrs[MACSEC_SA_ATTR_ACTIVE]) {
- if (nla_get_u8(attrs[MACSEC_SA_ATTR_ACTIVE]) > 1)
- return false;
- }
-
- if (nla_len(attrs[MACSEC_SA_ATTR_KEYID]) != MACSEC_KEYID_LEN)
- return false;
-
return true;
}
@@ -2055,14 +1997,6 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info)
rtnl_unlock();
return -EINVAL;
}
-
- if (nla_len(tb_sa[MACSEC_SA_ATTR_SALT]) != MACSEC_SALT_LEN) {
- pr_notice("macsec: nl: add_txsa: bad salt length: %d != %d\n",
- nla_len(tb_sa[MACSEC_SA_ATTR_SALT]),
- MACSEC_SALT_LEN);
- rtnl_unlock();
- return -EINVAL;
- }
}
tx_sa = rtnl_dereference(tx_sc->sa[assoc_num]);
@@ -2339,17 +2273,6 @@ static bool validate_upd_sa(struct nlattr **attrs)
attrs[MACSEC_SA_ATTR_SALT])
return false;
- if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN)
- return false;
-
- if (attrs[MACSEC_SA_ATTR_PN] && nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0)
- return false;
-
- if (attrs[MACSEC_SA_ATTR_ACTIVE]) {
- if (nla_get_u8(attrs[MACSEC_SA_ATTR_ACTIVE]) > 1)
- return false;
- }
-
return true;
}
@@ -2556,7 +2479,7 @@ static int macsec_upd_rxsc(struct sk_buff *skb, struct genl_info *info)
if (parse_rxsc_config(attrs, tb_rxsc))
return -EINVAL;
- if (!validate_add_rxsc(tb_rxsc))
+ if (!tb_rxsc[MACSEC_RXSC_ATTR_SCI])
return -EINVAL;
rtnl_lock();
@@ -3834,21 +3757,23 @@ static const struct device_type macsec_type = {
.name = "macsec",
};
+static int validate_cipher_suite(const struct nlattr *attr,
+ struct netlink_ext_ack *extack);
static const struct nla_policy macsec_rtnl_policy[IFLA_MACSEC_MAX + 1] = {
[IFLA_MACSEC_SCI] = { .type = NLA_U64 },
[IFLA_MACSEC_PORT] = { .type = NLA_U16 },
- [IFLA_MACSEC_ICV_LEN] = { .type = NLA_U8 },
- [IFLA_MACSEC_CIPHER_SUITE] = { .type = NLA_U64 },
+ [IFLA_MACSEC_ICV_LEN] = NLA_POLICY_RANGE(NLA_U8, MACSEC_MIN_ICV_LEN, MACSEC_STD_ICV_LEN),
+ [IFLA_MACSEC_CIPHER_SUITE] = NLA_POLICY_VALIDATE_FN(NLA_U64, validate_cipher_suite),
[IFLA_MACSEC_WINDOW] = { .type = NLA_U32 },
- [IFLA_MACSEC_ENCODING_SA] = { .type = NLA_U8 },
- [IFLA_MACSEC_ENCRYPT] = { .type = NLA_U8 },
- [IFLA_MACSEC_PROTECT] = { .type = NLA_U8 },
- [IFLA_MACSEC_INC_SCI] = { .type = NLA_U8 },
- [IFLA_MACSEC_ES] = { .type = NLA_U8 },
- [IFLA_MACSEC_SCB] = { .type = NLA_U8 },
- [IFLA_MACSEC_REPLAY_PROTECT] = { .type = NLA_U8 },
- [IFLA_MACSEC_VALIDATION] = { .type = NLA_U8 },
- [IFLA_MACSEC_OFFLOAD] = { .type = NLA_U8 },
+ [IFLA_MACSEC_ENCODING_SA] = NLA_POLICY_MAX(NLA_U8, MACSEC_NUM_AN - 1),
+ [IFLA_MACSEC_ENCRYPT] = NLA_POLICY_MAX(NLA_U8, 1),
+ [IFLA_MACSEC_PROTECT] = NLA_POLICY_MAX(NLA_U8, 1),
+ [IFLA_MACSEC_INC_SCI] = NLA_POLICY_MAX(NLA_U8, 1),
+ [IFLA_MACSEC_ES] = NLA_POLICY_MAX(NLA_U8, 1),
+ [IFLA_MACSEC_SCB] = NLA_POLICY_MAX(NLA_U8, 1),
+ [IFLA_MACSEC_REPLAY_PROTECT] = NLA_POLICY_MAX(NLA_U8, 1),
+ [IFLA_MACSEC_VALIDATION] = NLA_POLICY_MAX(NLA_U8, MACSEC_VALIDATE_MAX),
+ [IFLA_MACSEC_OFFLOAD] = NLA_POLICY_MAX(NLA_U8, MACSEC_OFFLOAD_MAX),
};
static void macsec_free_netdev(struct net_device *dev)
@@ -4302,20 +4227,30 @@ unregister:
return err;
}
+static int validate_cipher_suite(const struct nlattr *attr,
+ struct netlink_ext_ack *extack)
+{
+ switch (nla_get_u64(attr)) {
+ case MACSEC_CIPHER_ID_GCM_AES_128:
+ case MACSEC_CIPHER_ID_GCM_AES_256:
+ case MACSEC_CIPHER_ID_GCM_AES_XPN_128:
+ case MACSEC_CIPHER_ID_GCM_AES_XPN_256:
+ case MACSEC_DEFAULT_CIPHER_ID:
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
static int macsec_validate_attr(struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
{
- u64 csid = MACSEC_DEFAULT_CIPHER_ID;
u8 icv_len = MACSEC_DEFAULT_ICV_LEN;
- int flag;
bool es, scb, sci;
if (!data)
return 0;
- if (data[IFLA_MACSEC_CIPHER_SUITE])
- csid = nla_get_u64(data[IFLA_MACSEC_CIPHER_SUITE]);
-
if (data[IFLA_MACSEC_ICV_LEN]) {
icv_len = nla_get_u8(data[IFLA_MACSEC_ICV_LEN]);
if (icv_len != MACSEC_DEFAULT_ICV_LEN) {
@@ -4331,34 +4266,6 @@ static int macsec_validate_attr(struct nlattr *tb[], struct nlattr *data[],
}
}
- switch (csid) {
- case MACSEC_CIPHER_ID_GCM_AES_128:
- case MACSEC_CIPHER_ID_GCM_AES_256:
- case MACSEC_CIPHER_ID_GCM_AES_XPN_128:
- case MACSEC_CIPHER_ID_GCM_AES_XPN_256:
- case MACSEC_DEFAULT_CIPHER_ID:
- if (icv_len < MACSEC_MIN_ICV_LEN ||
- icv_len > MACSEC_STD_ICV_LEN)
- return -EINVAL;
- break;
- default:
- return -EINVAL;
- }
-
- if (data[IFLA_MACSEC_ENCODING_SA]) {
- if (nla_get_u8(data[IFLA_MACSEC_ENCODING_SA]) >= MACSEC_NUM_AN)
- return -EINVAL;
- }
-
- for (flag = IFLA_MACSEC_ENCODING_SA + 1;
- flag < IFLA_MACSEC_VALIDATION;
- flag++) {
- if (data[flag]) {
- if (nla_get_u8(data[flag]) > 1)
- return -EINVAL;
- }
- }
-
es = nla_get_u8_default(data[IFLA_MACSEC_ES], false);
sci = nla_get_u8_default(data[IFLA_MACSEC_INC_SCI], false);
scb = nla_get_u8_default(data[IFLA_MACSEC_SCB], false);
@@ -4366,10 +4273,6 @@ static int macsec_validate_attr(struct nlattr *tb[], struct nlattr *data[],
if ((sci && (scb || es)) || (scb && es))
return -EINVAL;
- if (data[IFLA_MACSEC_VALIDATION] &&
- nla_get_u8(data[IFLA_MACSEC_VALIDATION]) > MACSEC_VALIDATE_MAX)
- return -EINVAL;
-
if ((data[IFLA_MACSEC_REPLAY_PROTECT] &&
nla_get_u8(data[IFLA_MACSEC_REPLAY_PROTECT])) &&
!data[IFLA_MACSEC_WINDOW])
diff --git a/drivers/net/mdio/mdio-bcm-unimac.c b/drivers/net/mdio/mdio-bcm-unimac.c
index 7baab230008a..37e35f282d9a 100644
--- a/drivers/net/mdio/mdio-bcm-unimac.c
+++ b/drivers/net/mdio/mdio-bcm-unimac.c
@@ -215,7 +215,9 @@ static int unimac_mdio_clk_set(struct unimac_mdio_priv *priv)
div = (rate / (2 * priv->clk_freq)) - 1;
if (div & ~MDIO_CLK_DIV_MASK) {
- pr_warn("Incorrect MDIO clock frequency, ignoring\n");
+ dev_warn(priv->mii_bus->parent,
+ "Ignoring MDIO clock frequency request: %d vs. rate: %ld\n",
+ priv->clk_freq, rate);
ret = 0;
goto out;
}
diff --git a/drivers/net/mdio/of_mdio.c b/drivers/net/mdio/of_mdio.c
index 98f667b121f7..d8ca63ed8719 100644
--- a/drivers/net/mdio/of_mdio.c
+++ b/drivers/net/mdio/of_mdio.c
@@ -473,6 +473,5 @@ void of_phy_deregister_fixed_link(struct device_node *np)
fixed_phy_unregister(phydev);
put_device(&phydev->mdio.dev); /* of_phy_find_device() */
- phy_device_free(phydev); /* fixed_phy_register() */
}
EXPORT_SYMBOL(of_phy_deregister_fixed_link);
diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c
index e3722de08ea9..194570443493 100644
--- a/drivers/net/netconsole.c
+++ b/drivers/net/netconsole.c
@@ -300,6 +300,33 @@ static void netconsole_print_banner(struct netpoll *np)
np_info(np, "remote ethernet address %pM\n", np->remote_mac);
}
+/* Parse the string and populate the `inet_addr` union. Return 0 if IPv4 is
+ * populated, 1 if IPv6 is populated, and -1 upon failure.
+ */
+static int netpoll_parse_ip_addr(const char *str, union inet_addr *addr)
+{
+ const char *end = NULL;
+ int len;
+
+ len = strlen(str);
+ if (!len)
+ return -1;
+
+ if (str[len - 1] == '\n')
+ len -= 1;
+
+ if (in4_pton(str, len, (void *)addr, -1, &end) > 0 &&
+ (!end || *end == 0 || *end == '\n'))
+ return 0;
+
+ if (IS_ENABLED(CONFIG_IPV6) &&
+ in6_pton(str, len, (void *)addr, -1, &end) > 0 &&
+ (!end || *end == 0 || *end == '\n'))
+ return 1;
+
+ return -1;
+}
+
#ifdef CONFIG_NETCONSOLE_DYNAMIC
/*
@@ -730,6 +757,7 @@ static ssize_t local_ip_store(struct config_item *item, const char *buf,
{
struct netconsole_target *nt = to_target(item);
ssize_t ret = -EINVAL;
+ int ipv6;
mutex_lock(&dynamic_netconsole_mutex);
if (nt->enabled) {
@@ -738,23 +766,10 @@ static ssize_t local_ip_store(struct config_item *item, const char *buf,
goto out_unlock;
}
- if (strnchr(buf, count, ':')) {
- const char *end;
-
- if (in6_pton(buf, count, nt->np.local_ip.in6.s6_addr, -1, &end) > 0) {
- if (*end && *end != '\n') {
- pr_err("invalid IPv6 address at: <%c>\n", *end);
- goto out_unlock;
- }
- nt->np.ipv6 = true;
- } else
- goto out_unlock;
- } else {
- if (!nt->np.ipv6)
- nt->np.local_ip.ip = in_aton(buf);
- else
- goto out_unlock;
- }
+ ipv6 = netpoll_parse_ip_addr(buf, &nt->np.local_ip);
+ if (ipv6 == -1)
+ goto out_unlock;
+ nt->np.ipv6 = !!ipv6;
ret = strnlen(buf, count);
out_unlock:
@@ -767,6 +782,7 @@ static ssize_t remote_ip_store(struct config_item *item, const char *buf,
{
struct netconsole_target *nt = to_target(item);
ssize_t ret = -EINVAL;
+ int ipv6;
mutex_lock(&dynamic_netconsole_mutex);
if (nt->enabled) {
@@ -775,23 +791,10 @@ static ssize_t remote_ip_store(struct config_item *item, const char *buf,
goto out_unlock;
}
- if (strnchr(buf, count, ':')) {
- const char *end;
-
- if (in6_pton(buf, count, nt->np.remote_ip.in6.s6_addr, -1, &end) > 0) {
- if (*end && *end != '\n') {
- pr_err("invalid IPv6 address at: <%c>\n", *end);
- goto out_unlock;
- }
- nt->np.ipv6 = true;
- } else
- goto out_unlock;
- } else {
- if (!nt->np.ipv6)
- nt->np.remote_ip.ip = in_aton(buf);
- else
- goto out_unlock;
- }
+ ipv6 = netpoll_parse_ip_addr(buf, &nt->np.remote_ip);
+ if (ipv6 == -1)
+ goto out_unlock;
+ nt->np.ipv6 = !!ipv6;
ret = strnlen(buf, count);
out_unlock:
@@ -1742,26 +1745,6 @@ static void write_msg(struct console *con, const char *msg, unsigned int len)
spin_unlock_irqrestore(&target_list_lock, flags);
}
-static int netpoll_parse_ip_addr(const char *str, union inet_addr *addr)
-{
- const char *end;
-
- if (!strchr(str, ':') &&
- in4_pton(str, -1, (void *)addr, -1, &end) > 0) {
- if (!*end)
- return 0;
- }
- if (in6_pton(str, -1, addr->in6.s6_addr, -1, &end) > 0) {
-#if IS_ENABLED(CONFIG_IPV6)
- if (!*end)
- return 1;
-#else
- return -1;
-#endif
- }
- return -1;
-}
-
static int netconsole_parser_cmdline(struct netpoll *np, char *opt)
{
bool ipversion_set = false;
diff --git a/drivers/net/netdevsim/health.c b/drivers/net/netdevsim/health.c
index 688f05316b5e..3bd0e7a489c3 100644
--- a/drivers/net/netdevsim/health.c
+++ b/drivers/net/netdevsim/health.c
@@ -183,14 +183,14 @@ int nsim_dev_health_init(struct nsim_dev *nsim_dev, struct devlink *devlink)
health->empty_reporter =
devl_health_reporter_create(devlink,
&nsim_dev_empty_reporter_ops,
- 0, health);
+ health);
if (IS_ERR(health->empty_reporter))
return PTR_ERR(health->empty_reporter);
health->dummy_reporter =
devl_health_reporter_create(devlink,
&nsim_dev_dummy_reporter_ops,
- 0, health);
+ health);
if (IS_ERR(health->dummy_reporter)) {
err = PTR_ERR(health->dummy_reporter);
goto err_empty_reporter_destroy;
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index 28acc6392cfc..a7fb1d7cae94 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -465,7 +465,3 @@ config XILINX_GMII2RGMII
Ethernet physical media devices and the Gigabit Ethernet controller.
endif # PHYLIB
-
-config MICREL_KS8995MA
- tristate "Micrel KS8995MA 5-ports 10/100 managed Ethernet switch"
- depends on SPI
diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
index b4795aaf9c1c..402a33d559de 100644
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile
@@ -72,7 +72,6 @@ obj-$(CONFIG_MAXLINEAR_GPHY) += mxl-gpy.o
obj-$(CONFIG_MAXLINEAR_86110_PHY) += mxl-86110.o
obj-y += mediatek/
obj-$(CONFIG_MESON_GXL_PHY) += meson-gxl.o
-obj-$(CONFIG_MICREL_KS8995MA) += spi_ks8995.o
obj-$(CONFIG_MICREL_PHY) += micrel.o
obj-$(CONFIG_MICROCHIP_PHY) += microchip.o
obj-$(CONFIG_MICROCHIP_PHY_RDS_PTP) += microchip_rds_ptp.o
diff --git a/drivers/net/phy/aquantia/aquantia.h b/drivers/net/phy/aquantia/aquantia.h
index 0c78bfabace5..492052cf1e6e 100644
--- a/drivers/net/phy/aquantia/aquantia.h
+++ b/drivers/net/phy/aquantia/aquantia.h
@@ -174,10 +174,38 @@ static const struct aqr107_hw_stat aqr107_hw_stats[] = {
#define AQR107_SGMII_STAT_SZ ARRAY_SIZE(aqr107_hw_stats)
+static const struct {
+ int speed;
+ u16 reg;
+} aqr_global_cfg_regs[] = {
+ { SPEED_10, VEND1_GLOBAL_CFG_10M, },
+ { SPEED_100, VEND1_GLOBAL_CFG_100M, },
+ { SPEED_1000, VEND1_GLOBAL_CFG_1G, },
+ { SPEED_2500, VEND1_GLOBAL_CFG_2_5G, },
+ { SPEED_5000, VEND1_GLOBAL_CFG_5G, },
+ { SPEED_10000, VEND1_GLOBAL_CFG_10G, },
+};
+
+#define AQR_NUM_GLOBAL_CFG ARRAY_SIZE(aqr_global_cfg_regs)
+
+enum aqr_rate_adaptation {
+ AQR_RATE_ADAPT_NONE,
+ AQR_RATE_ADAPT_USX,
+ AQR_RATE_ADAPT_PAUSE,
+};
+
+struct aqr_global_syscfg {
+ int speed;
+ phy_interface_t interface;
+ enum aqr_rate_adaptation rate_adapt;
+};
+
struct aqr107_priv {
u64 sgmii_stats[AQR107_SGMII_STAT_SZ];
unsigned long leds_active_low;
unsigned long leds_active_high;
+ bool wait_on_global_cfg;
+ struct aqr_global_syscfg global_cfg[AQR_NUM_GLOBAL_CFG];
};
#if IS_REACHABLE(CONFIG_HWMON)
diff --git a/drivers/net/phy/aquantia/aquantia_main.c b/drivers/net/phy/aquantia/aquantia_main.c
index 77a48635d7bf..8516690e34db 100644
--- a/drivers/net/phy/aquantia/aquantia_main.c
+++ b/drivers/net/phy/aquantia/aquantia_main.c
@@ -26,10 +26,12 @@
#define PHY_ID_AQR111 0x03a1b610
#define PHY_ID_AQR111B0 0x03a1b612
#define PHY_ID_AQR112 0x03a1b662
-#define PHY_ID_AQR412 0x03a1b712
+#define PHY_ID_AQR412 0x03a1b6f2
+#define PHY_ID_AQR412C 0x03a1b712
#define PHY_ID_AQR113 0x31c31c40
#define PHY_ID_AQR113C 0x31c31c12
#define PHY_ID_AQR114C 0x31c31c22
+#define PHY_ID_AQR115 0x31c31c63
#define PHY_ID_AQR115C 0x31c31c33
#define PHY_ID_AQR813 0x31c31cb2
@@ -465,7 +467,7 @@ static int aqr105_config_aneg(struct phy_device *phydev)
return genphy_c45_check_and_restart_aneg(phydev, changed);
}
-static int aqr105_read_rate(struct phy_device *phydev)
+static int aqr_gen1_read_rate(struct phy_device *phydev)
{
int val;
@@ -504,7 +506,7 @@ static int aqr105_read_rate(struct phy_device *phydev)
return 0;
}
-static int aqr105_read_status(struct phy_device *phydev)
+static int aqr_gen1_read_status(struct phy_device *phydev)
{
int ret;
int val;
@@ -562,124 +564,32 @@ static int aqr105_read_status(struct phy_device *phydev)
}
/* Read rate from vendor register */
- return aqr105_read_rate(phydev);
+ return aqr_gen1_read_rate(phydev);
}
-static int aqr107_read_rate(struct phy_device *phydev)
+static int aqr_gen2_read_status(struct phy_device *phydev)
{
- u32 config_reg;
- int val;
-
- val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_TX_VEND_STATUS1);
- if (val < 0)
- return val;
-
- if (val & MDIO_AN_TX_VEND_STATUS1_FULL_DUPLEX)
- phydev->duplex = DUPLEX_FULL;
- else
- phydev->duplex = DUPLEX_HALF;
-
- switch (FIELD_GET(MDIO_AN_TX_VEND_STATUS1_RATE_MASK, val)) {
- case MDIO_AN_TX_VEND_STATUS1_10BASET:
- phydev->speed = SPEED_10;
- config_reg = VEND1_GLOBAL_CFG_10M;
- break;
- case MDIO_AN_TX_VEND_STATUS1_100BASETX:
- phydev->speed = SPEED_100;
- config_reg = VEND1_GLOBAL_CFG_100M;
- break;
- case MDIO_AN_TX_VEND_STATUS1_1000BASET:
- phydev->speed = SPEED_1000;
- config_reg = VEND1_GLOBAL_CFG_1G;
- break;
- case MDIO_AN_TX_VEND_STATUS1_2500BASET:
- phydev->speed = SPEED_2500;
- config_reg = VEND1_GLOBAL_CFG_2_5G;
- break;
- case MDIO_AN_TX_VEND_STATUS1_5000BASET:
- phydev->speed = SPEED_5000;
- config_reg = VEND1_GLOBAL_CFG_5G;
- break;
- case MDIO_AN_TX_VEND_STATUS1_10GBASET:
- phydev->speed = SPEED_10000;
- config_reg = VEND1_GLOBAL_CFG_10G;
- break;
- default:
- phydev->speed = SPEED_UNKNOWN;
- return 0;
- }
-
- val = phy_read_mmd(phydev, MDIO_MMD_VEND1, config_reg);
- if (val < 0)
- return val;
-
- if (FIELD_GET(VEND1_GLOBAL_CFG_RATE_ADAPT, val) ==
- VEND1_GLOBAL_CFG_RATE_ADAPT_PAUSE)
- phydev->rate_matching = RATE_MATCH_PAUSE;
- else
- phydev->rate_matching = RATE_MATCH_NONE;
-
- return 0;
-}
-
-static int aqr107_read_status(struct phy_device *phydev)
-{
- int val, ret;
+ struct aqr107_priv *priv = phydev->priv;
+ int i, ret;
- ret = aqr_read_status(phydev);
+ ret = aqr_gen1_read_status(phydev);
if (ret)
return ret;
- if (!phydev->link || phydev->autoneg == AUTONEG_DISABLE)
- return 0;
+ for (i = 0; i < AQR_NUM_GLOBAL_CFG; i++) {
+ struct aqr_global_syscfg *syscfg = &priv->global_cfg[i];
- /* The status register is not immediately correct on line side link up.
- * Poll periodically until it reflects the correct ON state.
- * Only return fail for read error, timeout defaults to OFF state.
- */
- ret = phy_read_mmd_poll_timeout(phydev, MDIO_MMD_PHYXS,
- MDIO_PHYXS_VEND_IF_STATUS, val,
- (FIELD_GET(MDIO_PHYXS_VEND_IF_STATUS_TYPE_MASK, val) !=
- MDIO_PHYXS_VEND_IF_STATUS_TYPE_OFF),
- AQR107_OP_IN_PROG_SLEEP,
- AQR107_OP_IN_PROG_TIMEOUT, false);
- if (ret && ret != -ETIMEDOUT)
- return ret;
+ if (syscfg->speed != phydev->speed)
+ continue;
- switch (FIELD_GET(MDIO_PHYXS_VEND_IF_STATUS_TYPE_MASK, val)) {
- case MDIO_PHYXS_VEND_IF_STATUS_TYPE_KR:
- phydev->interface = PHY_INTERFACE_MODE_10GKR;
- break;
- case MDIO_PHYXS_VEND_IF_STATUS_TYPE_KX:
- phydev->interface = PHY_INTERFACE_MODE_1000BASEKX;
- break;
- case MDIO_PHYXS_VEND_IF_STATUS_TYPE_XFI:
- phydev->interface = PHY_INTERFACE_MODE_10GBASER;
- break;
- case MDIO_PHYXS_VEND_IF_STATUS_TYPE_USXGMII:
- phydev->interface = PHY_INTERFACE_MODE_USXGMII;
- break;
- case MDIO_PHYXS_VEND_IF_STATUS_TYPE_XAUI:
- phydev->interface = PHY_INTERFACE_MODE_XAUI;
- break;
- case MDIO_PHYXS_VEND_IF_STATUS_TYPE_SGMII:
- phydev->interface = PHY_INTERFACE_MODE_SGMII;
- break;
- case MDIO_PHYXS_VEND_IF_STATUS_TYPE_RXAUI:
- phydev->interface = PHY_INTERFACE_MODE_RXAUI;
- break;
- case MDIO_PHYXS_VEND_IF_STATUS_TYPE_OCSGMII:
- phydev->interface = PHY_INTERFACE_MODE_2500BASEX;
- break;
- case MDIO_PHYXS_VEND_IF_STATUS_TYPE_OFF:
- default:
- phydev->link = false;
- phydev->interface = PHY_INTERFACE_MODE_NA;
+ if (syscfg->rate_adapt == AQR_RATE_ADAPT_PAUSE)
+ phydev->rate_matching = RATE_MATCH_PAUSE;
+ else
+ phydev->rate_matching = RATE_MATCH_NONE;
break;
}
- /* Read possibly downshifted rate from vendor register */
- return aqr107_read_rate(phydev);
+ return 0;
}
static int aqr107_get_downshift(struct phy_device *phydev, u8 *data)
@@ -810,7 +720,7 @@ static int aqr107_config_mdi(struct phy_device *phydev)
mdi_conf | PMAPMD_RSVD_VEND_PROV_MDI_FORCE);
}
-static int aqr107_config_init(struct phy_device *phydev)
+static int aqr_gen1_config_init(struct phy_device *phydev)
{
struct aqr107_priv *priv = phydev->priv;
u32 led_idx;
@@ -859,20 +769,137 @@ static int aqr107_config_init(struct phy_device *phydev)
return 0;
}
-static int aqcs109_config_init(struct phy_device *phydev)
+/* Walk the media-speed configuration registers to determine which
+ * host-side serdes modes may be used by the PHY depending on the
+ * negotiated media speed.
+ */
+static int aqr_gen2_read_global_syscfg(struct phy_device *phydev)
+{
+ struct aqr107_priv *priv = phydev->priv;
+ unsigned int serdes_mode, rate_adapt;
+ phy_interface_t interface;
+ int i, val;
+
+ for (i = 0; i < AQR_NUM_GLOBAL_CFG; i++) {
+ struct aqr_global_syscfg *syscfg = &priv->global_cfg[i];
+
+ syscfg->speed = aqr_global_cfg_regs[i].speed;
+
+ val = phy_read_mmd(phydev, MDIO_MMD_VEND1,
+ aqr_global_cfg_regs[i].reg);
+ if (val < 0)
+ return val;
+
+ serdes_mode = FIELD_GET(VEND1_GLOBAL_CFG_SERDES_MODE, val);
+ rate_adapt = FIELD_GET(VEND1_GLOBAL_CFG_RATE_ADAPT, val);
+
+ switch (serdes_mode) {
+ case VEND1_GLOBAL_CFG_SERDES_MODE_XFI:
+ if (rate_adapt == VEND1_GLOBAL_CFG_RATE_ADAPT_USX)
+ interface = PHY_INTERFACE_MODE_USXGMII;
+ else
+ interface = PHY_INTERFACE_MODE_10GBASER;
+ break;
+
+ case VEND1_GLOBAL_CFG_SERDES_MODE_XFI5G:
+ interface = PHY_INTERFACE_MODE_5GBASER;
+ break;
+
+ case VEND1_GLOBAL_CFG_SERDES_MODE_OCSGMII:
+ interface = PHY_INTERFACE_MODE_2500BASEX;
+ break;
+
+ case VEND1_GLOBAL_CFG_SERDES_MODE_SGMII:
+ interface = PHY_INTERFACE_MODE_SGMII;
+ break;
+
+ default:
+ phydev_warn(phydev, "unrecognised serdes mode %u\n",
+ serdes_mode);
+ interface = PHY_INTERFACE_MODE_NA;
+ break;
+ }
+
+ syscfg->interface = interface;
+
+ switch (rate_adapt) {
+ case VEND1_GLOBAL_CFG_RATE_ADAPT_NONE:
+ syscfg->rate_adapt = AQR_RATE_ADAPT_NONE;
+ break;
+ case VEND1_GLOBAL_CFG_RATE_ADAPT_USX:
+ syscfg->rate_adapt = AQR_RATE_ADAPT_USX;
+ break;
+ case VEND1_GLOBAL_CFG_RATE_ADAPT_PAUSE:
+ syscfg->rate_adapt = AQR_RATE_ADAPT_PAUSE;
+ break;
+ default:
+ phydev_warn(phydev, "unrecognized rate adapt mode %u\n",
+ rate_adapt);
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int aqr_gen2_fill_interface_modes(struct phy_device *phydev)
+{
+ unsigned long *possible = phydev->possible_interfaces;
+ struct aqr107_priv *priv = phydev->priv;
+ phy_interface_t interface;
+ int i, val, ret;
+
+ /* It's been observed on some models that - when coming out of suspend
+ * - the FW signals that the PHY is ready but the GLOBAL_CFG registers
+ * continue on returning zeroes for some time. Let's poll the 100M
+ * register until it returns a real value as both 113c and 115c support
+ * this mode.
+ */
+ if (priv->wait_on_global_cfg) {
+ ret = phy_read_mmd_poll_timeout(phydev, MDIO_MMD_VEND1,
+ VEND1_GLOBAL_CFG_100M, val,
+ val != 0, 1000, 100000, false);
+ if (ret)
+ return ret;
+ }
+
+ ret = aqr_gen2_read_global_syscfg(phydev);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < AQR_NUM_GLOBAL_CFG; i++) {
+ interface = priv->global_cfg[i].interface;
+ if (interface != PHY_INTERFACE_MODE_NA)
+ __set_bit(interface, possible);
+ }
+
+ return 0;
+}
+
+static int aqr_gen2_config_init(struct phy_device *phydev)
{
int ret;
+ ret = aqr_gen1_config_init(phydev);
+ if (ret)
+ return ret;
+
+ return aqr_gen2_fill_interface_modes(phydev);
+}
+
+static int aqr_gen3_config_init(struct phy_device *phydev)
+{
+ return aqr_gen2_config_init(phydev);
+}
+
+static int aqcs109_config_init(struct phy_device *phydev)
+{
/* Check that the PHY interface type is compatible */
if (phydev->interface != PHY_INTERFACE_MODE_SGMII &&
phydev->interface != PHY_INTERFACE_MODE_2500BASEX)
return -ENODEV;
- ret = aqr_wait_reset_complete(phydev);
- if (!ret)
- aqr107_chip_info(phydev);
-
- return aqr107_set_downshift(phydev, MDIO_AN_VEND_PROV_DOWNSHIFT_DFLT);
+ return aqr_gen2_config_init(phydev);
}
static void aqr107_link_change_notify(struct phy_device *phydev)
@@ -920,7 +947,7 @@ static void aqr107_link_change_notify(struct phy_device *phydev)
phydev_info(phydev, "Aquantia 1000Base-T2 mode active\n");
}
-static int aqr107_wait_processor_intensive_op(struct phy_device *phydev)
+static int aqr_gen1_wait_processor_intensive_op(struct phy_device *phydev)
{
int val, err;
@@ -944,17 +971,16 @@ static int aqr107_wait_processor_intensive_op(struct phy_device *phydev)
return 0;
}
-static int aqr107_get_rate_matching(struct phy_device *phydev,
- phy_interface_t iface)
+static int aqr_gen2_get_rate_matching(struct phy_device *phydev,
+ phy_interface_t iface)
{
if (iface == PHY_INTERFACE_MODE_10GBASER ||
- iface == PHY_INTERFACE_MODE_2500BASEX ||
- iface == PHY_INTERFACE_MODE_NA)
+ iface == PHY_INTERFACE_MODE_2500BASEX)
return RATE_MATCH_PAUSE;
return RATE_MATCH_NONE;
}
-static int aqr107_suspend(struct phy_device *phydev)
+static int aqr_gen1_suspend(struct phy_device *phydev)
{
int err;
@@ -963,10 +989,10 @@ static int aqr107_suspend(struct phy_device *phydev)
if (err)
return err;
- return aqr107_wait_processor_intensive_op(phydev);
+ return aqr_gen1_wait_processor_intensive_op(phydev);
}
-static int aqr107_resume(struct phy_device *phydev)
+static int aqr_gen1_resume(struct phy_device *phydev)
{
int err;
@@ -975,89 +1001,7 @@ static int aqr107_resume(struct phy_device *phydev)
if (err)
return err;
- return aqr107_wait_processor_intensive_op(phydev);
-}
-
-static const u16 aqr_global_cfg_regs[] = {
- VEND1_GLOBAL_CFG_10M,
- VEND1_GLOBAL_CFG_100M,
- VEND1_GLOBAL_CFG_1G,
- VEND1_GLOBAL_CFG_2_5G,
- VEND1_GLOBAL_CFG_5G,
- VEND1_GLOBAL_CFG_10G
-};
-
-static int aqr107_fill_interface_modes(struct phy_device *phydev)
-{
- unsigned long *possible = phydev->possible_interfaces;
- unsigned int serdes_mode, rate_adapt;
- phy_interface_t interface;
- int i, val;
-
- /* Walk the media-speed configuration registers to determine which
- * host-side serdes modes may be used by the PHY depending on the
- * negotiated media speed.
- */
- for (i = 0; i < ARRAY_SIZE(aqr_global_cfg_regs); i++) {
- val = phy_read_mmd(phydev, MDIO_MMD_VEND1,
- aqr_global_cfg_regs[i]);
- if (val < 0)
- return val;
-
- serdes_mode = FIELD_GET(VEND1_GLOBAL_CFG_SERDES_MODE, val);
- rate_adapt = FIELD_GET(VEND1_GLOBAL_CFG_RATE_ADAPT, val);
-
- switch (serdes_mode) {
- case VEND1_GLOBAL_CFG_SERDES_MODE_XFI:
- if (rate_adapt == VEND1_GLOBAL_CFG_RATE_ADAPT_USX)
- interface = PHY_INTERFACE_MODE_USXGMII;
- else
- interface = PHY_INTERFACE_MODE_10GBASER;
- break;
-
- case VEND1_GLOBAL_CFG_SERDES_MODE_XFI5G:
- interface = PHY_INTERFACE_MODE_5GBASER;
- break;
-
- case VEND1_GLOBAL_CFG_SERDES_MODE_OCSGMII:
- interface = PHY_INTERFACE_MODE_2500BASEX;
- break;
-
- case VEND1_GLOBAL_CFG_SERDES_MODE_SGMII:
- interface = PHY_INTERFACE_MODE_SGMII;
- break;
-
- default:
- phydev_warn(phydev, "unrecognised serdes mode %u\n",
- serdes_mode);
- interface = PHY_INTERFACE_MODE_NA;
- break;
- }
-
- if (interface != PHY_INTERFACE_MODE_NA)
- __set_bit(interface, possible);
- }
-
- return 0;
-}
-
-static int aqr113c_fill_interface_modes(struct phy_device *phydev)
-{
- int val, ret;
-
- /* It's been observed on some models that - when coming out of suspend
- * - the FW signals that the PHY is ready but the GLOBAL_CFG registers
- * continue on returning zeroes for some time. Let's poll the 100M
- * register until it returns a real value as both 113c and 115c support
- * this mode.
- */
- ret = phy_read_mmd_poll_timeout(phydev, MDIO_MMD_VEND1,
- VEND1_GLOBAL_CFG_100M, val, val != 0,
- 1000, 100000, false);
- if (ret)
- return ret;
-
- return aqr107_fill_interface_modes(phydev);
+ return aqr_gen1_wait_processor_intensive_op(phydev);
}
static int aqr115c_get_features(struct phy_device *phydev)
@@ -1085,11 +1029,14 @@ static int aqr111_get_features(struct phy_device *phydev)
return 0;
}
-static int aqr113c_config_init(struct phy_device *phydev)
+static int aqr_gen4_config_init(struct phy_device *phydev)
{
+ struct aqr107_priv *priv = phydev->priv;
int ret;
- ret = aqr107_config_init(phydev);
+ priv->wait_on_global_cfg = true;
+
+ ret = aqr_gen3_config_init(phydev);
if (ret < 0)
return ret;
@@ -1098,11 +1045,7 @@ static int aqr113c_config_init(struct phy_device *phydev)
if (ret)
return ret;
- ret = aqr107_wait_processor_intensive_op(phydev);
- if (ret)
- return ret;
-
- return aqr113c_fill_interface_modes(phydev);
+ return aqr_gen1_wait_processor_intensive_op(phydev);
}
static int aqr107_probe(struct phy_device *phydev)
@@ -1144,13 +1087,13 @@ static struct phy_driver aqr_driver[] = {
.name = "Aquantia AQR105",
.get_features = aqr105_get_features,
.probe = aqr107_probe,
- .config_init = aqr107_config_init,
+ .config_init = aqr_gen1_config_init,
.config_aneg = aqr105_config_aneg,
.config_intr = aqr_config_intr,
.handle_interrupt = aqr_handle_interrupt,
- .read_status = aqr105_read_status,
- .suspend = aqr107_suspend,
- .resume = aqr107_resume,
+ .read_status = aqr_gen1_read_status,
+ .suspend = aqr_gen1_suspend,
+ .resume = aqr_gen1_resume,
},
{
PHY_ID_MATCH_MODEL(PHY_ID_AQR106),
@@ -1164,16 +1107,16 @@ static struct phy_driver aqr_driver[] = {
PHY_ID_MATCH_MODEL(PHY_ID_AQR107),
.name = "Aquantia AQR107",
.probe = aqr107_probe,
- .get_rate_matching = aqr107_get_rate_matching,
- .config_init = aqr107_config_init,
+ .get_rate_matching = aqr_gen2_get_rate_matching,
+ .config_init = aqr_gen2_config_init,
.config_aneg = aqr_config_aneg,
.config_intr = aqr_config_intr,
.handle_interrupt = aqr_handle_interrupt,
- .read_status = aqr107_read_status,
+ .read_status = aqr_gen2_read_status,
.get_tunable = aqr107_get_tunable,
.set_tunable = aqr107_set_tunable,
- .suspend = aqr107_suspend,
- .resume = aqr107_resume,
+ .suspend = aqr_gen1_suspend,
+ .resume = aqr_gen1_resume,
.get_sset_count = aqr107_get_sset_count,
.get_strings = aqr107_get_strings,
.get_stats = aqr107_get_stats,
@@ -1188,16 +1131,16 @@ static struct phy_driver aqr_driver[] = {
PHY_ID_MATCH_MODEL(PHY_ID_AQCS109),
.name = "Aquantia AQCS109",
.probe = aqr107_probe,
- .get_rate_matching = aqr107_get_rate_matching,
+ .get_rate_matching = aqr_gen2_get_rate_matching,
.config_init = aqcs109_config_init,
.config_aneg = aqr_config_aneg,
.config_intr = aqr_config_intr,
.handle_interrupt = aqr_handle_interrupt,
- .read_status = aqr107_read_status,
+ .read_status = aqr_gen2_read_status,
.get_tunable = aqr107_get_tunable,
.set_tunable = aqr107_set_tunable,
- .suspend = aqr107_suspend,
- .resume = aqr107_resume,
+ .suspend = aqr_gen1_suspend,
+ .resume = aqr_gen1_resume,
.get_sset_count = aqr107_get_sset_count,
.get_strings = aqr107_get_strings,
.get_stats = aqr107_get_stats,
@@ -1213,16 +1156,16 @@ static struct phy_driver aqr_driver[] = {
PHY_ID_MATCH_MODEL(PHY_ID_AQR111),
.name = "Aquantia AQR111",
.probe = aqr107_probe,
- .get_rate_matching = aqr107_get_rate_matching,
- .config_init = aqr107_config_init,
+ .get_rate_matching = aqr_gen2_get_rate_matching,
+ .config_init = aqr_gen3_config_init,
.config_aneg = aqr_config_aneg,
.config_intr = aqr_config_intr,
.handle_interrupt = aqr_handle_interrupt,
- .read_status = aqr107_read_status,
+ .read_status = aqr_gen2_read_status,
.get_tunable = aqr107_get_tunable,
.set_tunable = aqr107_set_tunable,
- .suspend = aqr107_suspend,
- .resume = aqr107_resume,
+ .suspend = aqr_gen1_suspend,
+ .resume = aqr_gen1_resume,
.get_sset_count = aqr107_get_sset_count,
.get_strings = aqr107_get_strings,
.get_stats = aqr107_get_stats,
@@ -1238,16 +1181,16 @@ static struct phy_driver aqr_driver[] = {
PHY_ID_MATCH_MODEL(PHY_ID_AQR111B0),
.name = "Aquantia AQR111B0",
.probe = aqr107_probe,
- .get_rate_matching = aqr107_get_rate_matching,
- .config_init = aqr107_config_init,
+ .get_rate_matching = aqr_gen2_get_rate_matching,
+ .config_init = aqr_gen3_config_init,
.config_aneg = aqr_config_aneg,
.config_intr = aqr_config_intr,
.handle_interrupt = aqr_handle_interrupt,
- .read_status = aqr107_read_status,
+ .read_status = aqr_gen2_read_status,
.get_tunable = aqr107_get_tunable,
.set_tunable = aqr107_set_tunable,
- .suspend = aqr107_suspend,
- .resume = aqr107_resume,
+ .suspend = aqr_gen1_suspend,
+ .resume = aqr_gen1_resume,
.get_sset_count = aqr107_get_sset_count,
.get_strings = aqr107_get_strings,
.get_stats = aqr107_get_stats,
@@ -1271,15 +1214,16 @@ static struct phy_driver aqr_driver[] = {
PHY_ID_MATCH_MODEL(PHY_ID_AQR112),
.name = "Aquantia AQR112",
.probe = aqr107_probe,
+ .config_init = aqr_gen3_config_init,
.config_aneg = aqr_config_aneg,
.config_intr = aqr_config_intr,
.handle_interrupt = aqr_handle_interrupt,
.get_tunable = aqr107_get_tunable,
.set_tunable = aqr107_set_tunable,
- .suspend = aqr107_suspend,
- .resume = aqr107_resume,
- .read_status = aqr107_read_status,
- .get_rate_matching = aqr107_get_rate_matching,
+ .suspend = aqr_gen1_suspend,
+ .resume = aqr_gen1_resume,
+ .read_status = aqr_gen2_read_status,
+ .get_rate_matching = aqr_gen2_get_rate_matching,
.get_sset_count = aqr107_get_sset_count,
.get_strings = aqr107_get_strings,
.get_stats = aqr107_get_stats,
@@ -1294,15 +1238,35 @@ static struct phy_driver aqr_driver[] = {
PHY_ID_MATCH_MODEL(PHY_ID_AQR412),
.name = "Aquantia AQR412",
.probe = aqr107_probe,
+ .config_init = aqr_gen3_config_init,
.config_aneg = aqr_config_aneg,
.config_intr = aqr_config_intr,
.handle_interrupt = aqr_handle_interrupt,
.get_tunable = aqr107_get_tunable,
.set_tunable = aqr107_set_tunable,
- .suspend = aqr107_suspend,
- .resume = aqr107_resume,
- .read_status = aqr107_read_status,
- .get_rate_matching = aqr107_get_rate_matching,
+ .suspend = aqr_gen1_suspend,
+ .resume = aqr_gen1_resume,
+ .read_status = aqr_gen2_read_status,
+ .get_rate_matching = aqr_gen2_get_rate_matching,
+ .get_sset_count = aqr107_get_sset_count,
+ .get_strings = aqr107_get_strings,
+ .get_stats = aqr107_get_stats,
+ .link_change_notify = aqr107_link_change_notify,
+},
+{
+ PHY_ID_MATCH_MODEL(PHY_ID_AQR412C),
+ .name = "Aquantia AQR412C",
+ .probe = aqr107_probe,
+ .config_init = aqr_gen3_config_init,
+ .config_aneg = aqr_config_aneg,
+ .config_intr = aqr_config_intr,
+ .handle_interrupt = aqr_handle_interrupt,
+ .get_tunable = aqr107_get_tunable,
+ .set_tunable = aqr107_set_tunable,
+ .suspend = aqr_gen1_suspend,
+ .resume = aqr_gen1_resume,
+ .read_status = aqr_gen2_read_status,
+ .get_rate_matching = aqr_gen2_get_rate_matching,
.get_sset_count = aqr107_get_sset_count,
.get_strings = aqr107_get_strings,
.get_stats = aqr107_get_stats,
@@ -1312,16 +1276,16 @@ static struct phy_driver aqr_driver[] = {
PHY_ID_MATCH_MODEL(PHY_ID_AQR113),
.name = "Aquantia AQR113",
.probe = aqr107_probe,
- .get_rate_matching = aqr107_get_rate_matching,
- .config_init = aqr113c_config_init,
+ .get_rate_matching = aqr_gen2_get_rate_matching,
+ .config_init = aqr_gen4_config_init,
.config_aneg = aqr_config_aneg,
.config_intr = aqr_config_intr,
.handle_interrupt = aqr_handle_interrupt,
- .read_status = aqr107_read_status,
+ .read_status = aqr_gen2_read_status,
.get_tunable = aqr107_get_tunable,
.set_tunable = aqr107_set_tunable,
- .suspend = aqr107_suspend,
- .resume = aqr107_resume,
+ .suspend = aqr_gen1_suspend,
+ .resume = aqr_gen1_resume,
.get_sset_count = aqr107_get_sset_count,
.get_strings = aqr107_get_strings,
.get_stats = aqr107_get_stats,
@@ -1336,16 +1300,16 @@ static struct phy_driver aqr_driver[] = {
PHY_ID_MATCH_MODEL(PHY_ID_AQR113C),
.name = "Aquantia AQR113C",
.probe = aqr107_probe,
- .get_rate_matching = aqr107_get_rate_matching,
- .config_init = aqr113c_config_init,
+ .get_rate_matching = aqr_gen2_get_rate_matching,
+ .config_init = aqr_gen4_config_init,
.config_aneg = aqr_config_aneg,
.config_intr = aqr_config_intr,
.handle_interrupt = aqr_handle_interrupt,
- .read_status = aqr107_read_status,
+ .read_status = aqr_gen2_read_status,
.get_tunable = aqr107_get_tunable,
.set_tunable = aqr107_set_tunable,
- .suspend = aqr107_suspend,
- .resume = aqr107_resume,
+ .suspend = aqr_gen1_suspend,
+ .resume = aqr_gen1_resume,
.get_sset_count = aqr107_get_sset_count,
.get_strings = aqr107_get_strings,
.get_stats = aqr107_get_stats,
@@ -1360,16 +1324,16 @@ static struct phy_driver aqr_driver[] = {
PHY_ID_MATCH_MODEL(PHY_ID_AQR114C),
.name = "Aquantia AQR114C",
.probe = aqr107_probe,
- .get_rate_matching = aqr107_get_rate_matching,
- .config_init = aqr107_config_init,
+ .get_rate_matching = aqr_gen2_get_rate_matching,
+ .config_init = aqr_gen4_config_init,
.config_aneg = aqr_config_aneg,
.config_intr = aqr_config_intr,
.handle_interrupt = aqr_handle_interrupt,
- .read_status = aqr107_read_status,
+ .read_status = aqr_gen2_read_status,
.get_tunable = aqr107_get_tunable,
.set_tunable = aqr107_set_tunable,
- .suspend = aqr107_suspend,
- .resume = aqr107_resume,
+ .suspend = aqr_gen1_suspend,
+ .resume = aqr_gen1_resume,
.get_sset_count = aqr107_get_sset_count,
.get_strings = aqr107_get_strings,
.get_stats = aqr107_get_stats,
@@ -1382,19 +1346,44 @@ static struct phy_driver aqr_driver[] = {
.led_polarity_set = aqr_phy_led_polarity_set,
},
{
+ PHY_ID_MATCH_MODEL(PHY_ID_AQR115),
+ .name = "Aquantia AQR115",
+ .probe = aqr107_probe,
+ .get_rate_matching = aqr_gen2_get_rate_matching,
+ .config_init = aqr_gen4_config_init,
+ .config_aneg = aqr_config_aneg,
+ .config_intr = aqr_config_intr,
+ .handle_interrupt = aqr_handle_interrupt,
+ .read_status = aqr_gen2_read_status,
+ .get_tunable = aqr107_get_tunable,
+ .set_tunable = aqr107_set_tunable,
+ .suspend = aqr_gen1_suspend,
+ .resume = aqr_gen1_resume,
+ .get_sset_count = aqr107_get_sset_count,
+ .get_strings = aqr107_get_strings,
+ .get_stats = aqr107_get_stats,
+ .get_features = aqr115c_get_features,
+ .link_change_notify = aqr107_link_change_notify,
+ .led_brightness_set = aqr_phy_led_brightness_set,
+ .led_hw_is_supported = aqr_phy_led_hw_is_supported,
+ .led_hw_control_set = aqr_phy_led_hw_control_set,
+ .led_hw_control_get = aqr_phy_led_hw_control_get,
+ .led_polarity_set = aqr_phy_led_polarity_set,
+},
+{
PHY_ID_MATCH_MODEL(PHY_ID_AQR115C),
.name = "Aquantia AQR115C",
.probe = aqr107_probe,
- .get_rate_matching = aqr107_get_rate_matching,
- .config_init = aqr113c_config_init,
+ .get_rate_matching = aqr_gen2_get_rate_matching,
+ .config_init = aqr_gen4_config_init,
.config_aneg = aqr_config_aneg,
.config_intr = aqr_config_intr,
.handle_interrupt = aqr_handle_interrupt,
- .read_status = aqr107_read_status,
+ .read_status = aqr_gen2_read_status,
.get_tunable = aqr107_get_tunable,
.set_tunable = aqr107_set_tunable,
- .suspend = aqr107_suspend,
- .resume = aqr107_resume,
+ .suspend = aqr_gen1_suspend,
+ .resume = aqr_gen1_resume,
.get_sset_count = aqr107_get_sset_count,
.get_strings = aqr107_get_strings,
.get_stats = aqr107_get_stats,
@@ -1410,16 +1399,16 @@ static struct phy_driver aqr_driver[] = {
PHY_ID_MATCH_MODEL(PHY_ID_AQR813),
.name = "Aquantia AQR813",
.probe = aqr107_probe,
- .get_rate_matching = aqr107_get_rate_matching,
- .config_init = aqr107_config_init,
+ .get_rate_matching = aqr_gen2_get_rate_matching,
+ .config_init = aqr_gen4_config_init,
.config_aneg = aqr_config_aneg,
.config_intr = aqr_config_intr,
.handle_interrupt = aqr_handle_interrupt,
- .read_status = aqr107_read_status,
+ .read_status = aqr_gen2_read_status,
.get_tunable = aqr107_get_tunable,
.set_tunable = aqr107_set_tunable,
- .suspend = aqr107_suspend,
- .resume = aqr107_resume,
+ .suspend = aqr_gen1_suspend,
+ .resume = aqr_gen1_resume,
.get_sset_count = aqr107_get_sset_count,
.get_strings = aqr107_get_strings,
.get_stats = aqr107_get_stats,
@@ -1446,9 +1435,11 @@ static const struct mdio_device_id __maybe_unused aqr_tbl[] = {
{ PHY_ID_MATCH_MODEL(PHY_ID_AQR111B0) },
{ PHY_ID_MATCH_MODEL(PHY_ID_AQR112) },
{ PHY_ID_MATCH_MODEL(PHY_ID_AQR412) },
+ { PHY_ID_MATCH_MODEL(PHY_ID_AQR412C) },
{ PHY_ID_MATCH_MODEL(PHY_ID_AQR113) },
{ PHY_ID_MATCH_MODEL(PHY_ID_AQR113C) },
{ PHY_ID_MATCH_MODEL(PHY_ID_AQR114C) },
+ { PHY_ID_MATCH_MODEL(PHY_ID_AQR115) },
{ PHY_ID_MATCH_MODEL(PHY_ID_AQR115C) },
{ PHY_ID_MATCH_MODEL(PHY_ID_AQR813) },
{ }
diff --git a/drivers/net/phy/as21xxx.c b/drivers/net/phy/as21xxx.c
index 92697f43087d..005277360656 100644
--- a/drivers/net/phy/as21xxx.c
+++ b/drivers/net/phy/as21xxx.c
@@ -884,11 +884,12 @@ static int as21xxx_match_phy_device(struct phy_device *phydev,
u32 phy_id;
int ret;
- /* Skip PHY that are not AS21xxx or already have firmware loaded */
- if (phydev->c45_ids.device_ids[MDIO_MMD_PCS] != PHY_ID_AS21XXX)
+ /* Skip PHY that are not AS21xxx */
+ if (!phy_id_compare_vendor(phydev->c45_ids.device_ids[MDIO_MMD_PCS],
+ PHY_VENDOR_AEONSEMI))
return genphy_match_phy_device(phydev, phydrv);
- /* Read PHY ID to handle firmware just loaded */
+ /* Read PHY ID to handle firmware loaded or HW reset */
ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MII_PHYSID1);
if (ret < 0)
return ret;
diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c
index 033656d574b8..7f4e1a155a0f 100644
--- a/drivers/net/phy/fixed_phy.c
+++ b/drivers/net/phy/fixed_phy.c
@@ -10,7 +10,6 @@
#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/device/faux.h>
#include <linux/list.h>
#include <linux/mii.h>
#include <linux/phy.h>
@@ -40,7 +39,6 @@ struct fixed_phy {
struct gpio_desc *link_gpiod;
};
-static struct faux_device *fdev;
static struct fixed_mdio_bus platform_fmb = {
.phys = LIST_HEAD_INIT(platform_fmb.phys),
};
@@ -77,8 +75,6 @@ static int fixed_mdio_read(struct mii_bus *bus, int phy_addr, int reg_num)
list_for_each_entry(fp, &fmb->phys, node) {
if (fp->addr == phy_addr) {
- struct fixed_phy_status state;
-
fp->status.link = !fp->no_carrier;
/* Issue callback if user registered it. */
@@ -88,9 +84,8 @@ static int fixed_mdio_read(struct mii_bus *bus, int phy_addr, int reg_num)
/* Check the GPIO for change in status */
fixed_phy_update(fp);
- state = fp->status;
- return swphy_read_reg(reg_num, &state);
+ return swphy_read_reg(reg_num, &fp->status);
}
}
@@ -160,9 +155,9 @@ static int fixed_phy_add_gpiod(unsigned int irq, int phy_addr,
return 0;
}
-int fixed_phy_add(int phy_addr, const struct fixed_phy_status *status)
+void fixed_phy_add(const struct fixed_phy_status *status)
{
- return fixed_phy_add_gpiod(PHY_POLL, phy_addr, status, NULL);
+ fixed_phy_add_gpiod(PHY_POLL, 0, status, NULL);
}
EXPORT_SYMBOL_GPL(fixed_phy_add);
@@ -309,6 +304,7 @@ void fixed_phy_unregister(struct phy_device *phy)
phy_device_remove(phy);
of_node_put(phy->mdio.dev.of_node);
fixed_phy_del(phy->mdio.addr);
+ phy_device_free(phy);
}
EXPORT_SYMBOL_GPL(fixed_phy_unregister);
@@ -317,20 +313,13 @@ static int __init fixed_mdio_bus_init(void)
struct fixed_mdio_bus *fmb = &platform_fmb;
int ret;
- fdev = faux_device_create("Fixed MDIO bus", NULL, NULL);
- if (!fdev)
- return -ENODEV;
-
fmb->mii_bus = mdiobus_alloc();
- if (fmb->mii_bus == NULL) {
- ret = -ENOMEM;
- goto err_mdiobus_reg;
- }
+ if (!fmb->mii_bus)
+ return -ENOMEM;
snprintf(fmb->mii_bus->id, MII_BUS_ID_SIZE, "fixed-0");
fmb->mii_bus->name = "Fixed MDIO Bus";
fmb->mii_bus->priv = fmb;
- fmb->mii_bus->parent = &fdev->dev;
fmb->mii_bus->read = &fixed_mdio_read;
fmb->mii_bus->write = &fixed_mdio_write;
fmb->mii_bus->phy_mask = ~0;
@@ -343,8 +332,6 @@ static int __init fixed_mdio_bus_init(void)
err_mdiobus_alloc:
mdiobus_free(fmb->mii_bus);
-err_mdiobus_reg:
- faux_device_destroy(fdev);
return ret;
}
module_init(fixed_mdio_bus_init);
@@ -356,7 +343,6 @@ static void __exit fixed_mdio_bus_exit(void)
mdiobus_unregister(fmb->mii_bus);
mdiobus_free(fmb->mii_bus);
- faux_device_destroy(fdev);
list_for_each_entry_safe(fp, tmp, &fmb->phys, node) {
list_del(&fp->node);
diff --git a/drivers/net/phy/mediatek/mtk-2p5ge.c b/drivers/net/phy/mediatek/mtk-2p5ge.c
index e147eab523ef..de8a41a1841d 100644
--- a/drivers/net/phy/mediatek/mtk-2p5ge.c
+++ b/drivers/net/phy/mediatek/mtk-2p5ge.c
@@ -249,8 +249,80 @@ static int mt798x_2p5ge_phy_get_rate_matching(struct phy_device *phydev,
return RATE_MATCH_PAUSE;
}
+static const unsigned long supported_triggers =
+ BIT(TRIGGER_NETDEV_FULL_DUPLEX) |
+ BIT(TRIGGER_NETDEV_LINK) |
+ BIT(TRIGGER_NETDEV_LINK_10) |
+ BIT(TRIGGER_NETDEV_LINK_100) |
+ BIT(TRIGGER_NETDEV_LINK_1000) |
+ BIT(TRIGGER_NETDEV_LINK_2500) |
+ BIT(TRIGGER_NETDEV_RX) |
+ BIT(TRIGGER_NETDEV_TX);
+
+static int mt798x_2p5ge_phy_led_blink_set(struct phy_device *phydev, u8 index,
+ unsigned long *delay_on,
+ unsigned long *delay_off)
+{
+ bool blinking = false;
+ int err = 0;
+
+ err = mtk_phy_led_num_dly_cfg(index, delay_on, delay_off, &blinking);
+ if (err < 0)
+ return err;
+
+ err = mtk_phy_hw_led_blink_set(phydev, index, blinking);
+ if (err)
+ return err;
+
+ if (blinking)
+ mtk_phy_hw_led_on_set(phydev, index, MTK_2P5GPHY_LED_ON_MASK,
+ false);
+
+ return 0;
+}
+
+static int mt798x_2p5ge_phy_led_brightness_set(struct phy_device *phydev,
+ u8 index,
+ enum led_brightness value)
+{
+ int err;
+
+ err = mtk_phy_hw_led_blink_set(phydev, index, false);
+ if (err)
+ return err;
+
+ return mtk_phy_hw_led_on_set(phydev, index, MTK_2P5GPHY_LED_ON_MASK,
+ (value != LED_OFF));
+}
+
+static int mt798x_2p5ge_phy_led_hw_is_supported(struct phy_device *phydev,
+ u8 index, unsigned long rules)
+{
+ return mtk_phy_led_hw_is_supported(phydev, index, rules,
+ supported_triggers);
+}
+
+static int mt798x_2p5ge_phy_led_hw_control_get(struct phy_device *phydev,
+ u8 index, unsigned long *rules)
+{
+ return mtk_phy_led_hw_ctrl_get(phydev, index, rules,
+ MTK_2P5GPHY_LED_ON_SET,
+ MTK_2P5GPHY_LED_RX_BLINK_SET,
+ MTK_2P5GPHY_LED_TX_BLINK_SET);
+};
+
+static int mt798x_2p5ge_phy_led_hw_control_set(struct phy_device *phydev,
+ u8 index, unsigned long rules)
+{
+ return mtk_phy_led_hw_ctrl_set(phydev, index, rules,
+ MTK_2P5GPHY_LED_ON_SET,
+ MTK_2P5GPHY_LED_RX_BLINK_SET,
+ MTK_2P5GPHY_LED_TX_BLINK_SET);
+};
+
static int mt798x_2p5ge_phy_probe(struct phy_device *phydev)
{
+ struct mtk_socphy_priv *priv;
struct pinctrl *pinctrl;
int ret;
@@ -273,19 +345,34 @@ static int mt798x_2p5ge_phy_probe(struct phy_device *phydev)
if (ret < 0)
return ret;
- /* Setup LED */
+ /* Setup LED. On default, LED0 is on/off when link is up/down. As for
+ * LED1, it blinks as tx/rx transmission takes place.
+ */
phy_set_bits_mmd(phydev, MDIO_MMD_VEND2, MTK_PHY_LED0_ON_CTRL,
- MTK_PHY_LED_ON_POLARITY | MTK_PHY_LED_ON_LINK10 |
- MTK_PHY_LED_ON_LINK100 | MTK_PHY_LED_ON_LINK1000 |
- MTK_PHY_LED_ON_LINK2500);
- phy_set_bits_mmd(phydev, MDIO_MMD_VEND2, MTK_PHY_LED1_ON_CTRL,
- MTK_PHY_LED_ON_FDX | MTK_PHY_LED_ON_HDX);
+ MTK_PHY_LED_ON_POLARITY | MTK_2P5GPHY_LED_ON_SET);
+ phy_clear_bits_mmd(phydev, MDIO_MMD_VEND2, MTK_PHY_LED0_BLINK_CTRL,
+ MTK_2P5GPHY_LED_TX_BLINK_SET |
+ MTK_2P5GPHY_LED_RX_BLINK_SET);
+ phy_clear_bits_mmd(phydev, MDIO_MMD_VEND2, MTK_PHY_LED1_ON_CTRL,
+ MTK_PHY_LED_ON_FDX | MTK_PHY_LED_ON_HDX |
+ MTK_2P5GPHY_LED_ON_SET);
+ phy_set_bits_mmd(phydev, MDIO_MMD_VEND2, MTK_PHY_LED1_BLINK_CTRL,
+ MTK_2P5GPHY_LED_TX_BLINK_SET |
+ MTK_2P5GPHY_LED_RX_BLINK_SET);
/* Switch pinctrl after setting polarity to avoid bogus blinking */
pinctrl = devm_pinctrl_get_select(&phydev->mdio.dev, "i2p5gbe-led");
if (IS_ERR(pinctrl))
dev_err(&phydev->mdio.dev, "Fail to set LED pins!\n");
+ priv = devm_kzalloc(&phydev->mdio.dev, sizeof(struct mtk_socphy_priv),
+ GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+ phydev->priv = priv;
+
+ mtk_phy_leds_state_init(phydev);
+
return 0;
}
@@ -303,6 +390,11 @@ static struct phy_driver mtk_2p5gephy_driver[] = {
.resume = genphy_resume,
.read_page = mtk_phy_read_page,
.write_page = mtk_phy_write_page,
+ .led_blink_set = mt798x_2p5ge_phy_led_blink_set,
+ .led_brightness_set = mt798x_2p5ge_phy_led_brightness_set,
+ .led_hw_is_supported = mt798x_2p5ge_phy_led_hw_is_supported,
+ .led_hw_control_get = mt798x_2p5ge_phy_led_hw_control_get,
+ .led_hw_control_set = mt798x_2p5ge_phy_led_hw_control_set,
},
};
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 605b0315b4cb..e403cbbcead5 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -362,6 +362,8 @@
/* Delay used to get the second part from the LTC */
#define LAN8841_GET_SEC_LTC_DELAY (500 * NSEC_PER_MSEC)
+#define LAN8842_REV_8832 0x8832
+
struct kszphy_hw_stat {
const char *string;
u8 reg;
@@ -448,6 +450,19 @@ struct kszphy_priv {
struct kszphy_phy_stats phy_stats;
};
+struct lan8842_phy_stats {
+ u64 rx_packets;
+ u64 rx_errors;
+ u64 tx_packets;
+ u64 tx_errors;
+};
+
+struct lan8842_priv {
+ struct lan8842_phy_stats phy_stats;
+ struct kszphy_ptp_priv ptp_priv;
+ u16 rev;
+};
+
static const struct kszphy_type lan8814_type = {
.led_mode_reg = ~LAN8814_LED_CTRL_1,
.cable_diag_reg = LAN8814_CABLE_DIAG,
@@ -2790,6 +2805,52 @@ static int ksz886x_cable_test_get_status(struct phy_device *phydev,
return ret;
}
+/**
+ * LAN8814_PAGE_AFE_PMA - Selects Extended Page 1.
+ *
+ * This page appears to control the Analog Front-End (AFE) and Physical
+ * Medium Attachment (PMA) layers. It is used to access registers like
+ * LAN8814_PD_CONTROLS and LAN8814_LINK_QUALITY.
+ */
+#define LAN8814_PAGE_AFE_PMA 1
+
+/**
+ * LAN8814_PAGE_PCS_DIGITAL - Selects Extended Page 2.
+ *
+ * This page seems dedicated to the Physical Coding Sublayer (PCS) and other
+ * digital logic. It is used for MDI-X alignment (LAN8814_ALIGN_SWAP) and EEE
+ * state (LAN8814_EEE_STATE) in the LAN8814, and is repurposed for statistics
+ * and self-test counters in the LAN8842.
+ */
+#define LAN8814_PAGE_PCS_DIGITAL 2
+
+/**
+ * LAN8814_PAGE_COMMON_REGS - Selects Extended Page 4.
+ *
+ * This page contains device-common registers that affect the entire chip.
+ * It includes controls for chip-level resets, strap status, GPIO,
+ * QSGMII, the shared 1588 PTP block, and the PVT monitor.
+ */
+#define LAN8814_PAGE_COMMON_REGS 4
+
+/**
+ * LAN8814_PAGE_PORT_REGS - Selects Extended Page 5.
+ *
+ * This page contains port-specific registers that must be accessed
+ * on a per-port basis. It includes controls for port LEDs, QSGMII PCS,
+ * rate adaptation FIFOs, and the per-port 1588 TSU block.
+ */
+#define LAN8814_PAGE_PORT_REGS 5
+
+/**
+ * LAN8814_PAGE_SYSTEM_CTRL - Selects Extended Page 31.
+ *
+ * This page appears to hold fundamental system or global controls. In the
+ * driver, it is used by the related LAN8804 to access the
+ * LAN8814_CLOCK_MANAGEMENT register.
+ */
+#define LAN8814_PAGE_SYSTEM_CTRL 31
+
#define LAN_EXT_PAGE_ACCESS_CONTROL 0x16
#define LAN_EXT_PAGE_ACCESS_ADDRESS_DATA 0x17
#define LAN_EXT_PAGE_ACCESS_CTRL_EP_FUNC 0x4000
@@ -2840,6 +2901,27 @@ static int lanphy_write_page_reg(struct phy_device *phydev, int page, u16 addr,
return val;
}
+static int lanphy_modify_page_reg(struct phy_device *phydev, int page, u16 addr,
+ u16 mask, u16 set)
+{
+ int ret;
+
+ phy_lock_mdio_bus(phydev);
+ __phy_write(phydev, LAN_EXT_PAGE_ACCESS_CONTROL, page);
+ __phy_write(phydev, LAN_EXT_PAGE_ACCESS_ADDRESS_DATA, addr);
+ __phy_write(phydev, LAN_EXT_PAGE_ACCESS_CONTROL,
+ (page | LAN_EXT_PAGE_ACCESS_CTRL_EP_FUNC));
+ ret = __phy_modify_changed(phydev, LAN_EXT_PAGE_ACCESS_ADDRESS_DATA,
+ mask, set);
+ phy_unlock_mdio_bus(phydev);
+
+ if (ret < 0)
+ phydev_err(phydev, "__phy_modify_changed() failed: %pe\n",
+ ERR_PTR(ret));
+
+ return ret;
+}
+
static int lan8814_config_ts_intr(struct phy_device *phydev, bool enable)
{
u16 val = 0;
@@ -2850,35 +2932,46 @@ static int lan8814_config_ts_intr(struct phy_device *phydev, bool enable)
PTP_TSU_INT_EN_PTP_RX_TS_EN_ |
PTP_TSU_INT_EN_PTP_RX_TS_OVRFL_EN_;
- return lanphy_write_page_reg(phydev, 5, PTP_TSU_INT_EN, val);
+ return lanphy_write_page_reg(phydev, LAN8814_PAGE_PORT_REGS,
+ PTP_TSU_INT_EN, val);
}
static void lan8814_ptp_rx_ts_get(struct phy_device *phydev,
u32 *seconds, u32 *nano_seconds, u16 *seq_id)
{
- *seconds = lanphy_read_page_reg(phydev, 5, PTP_RX_INGRESS_SEC_HI);
+ *seconds = lanphy_read_page_reg(phydev, LAN8814_PAGE_PORT_REGS,
+ PTP_RX_INGRESS_SEC_HI);
*seconds = (*seconds << 16) |
- lanphy_read_page_reg(phydev, 5, PTP_RX_INGRESS_SEC_LO);
+ lanphy_read_page_reg(phydev, LAN8814_PAGE_PORT_REGS,
+ PTP_RX_INGRESS_SEC_LO);
- *nano_seconds = lanphy_read_page_reg(phydev, 5, PTP_RX_INGRESS_NS_HI);
+ *nano_seconds = lanphy_read_page_reg(phydev, LAN8814_PAGE_PORT_REGS,
+ PTP_RX_INGRESS_NS_HI);
*nano_seconds = ((*nano_seconds & 0x3fff) << 16) |
- lanphy_read_page_reg(phydev, 5, PTP_RX_INGRESS_NS_LO);
+ lanphy_read_page_reg(phydev, LAN8814_PAGE_PORT_REGS,
+ PTP_RX_INGRESS_NS_LO);
- *seq_id = lanphy_read_page_reg(phydev, 5, PTP_RX_MSG_HEADER2);
+ *seq_id = lanphy_read_page_reg(phydev, LAN8814_PAGE_PORT_REGS,
+ PTP_RX_MSG_HEADER2);
}
static void lan8814_ptp_tx_ts_get(struct phy_device *phydev,
u32 *seconds, u32 *nano_seconds, u16 *seq_id)
{
- *seconds = lanphy_read_page_reg(phydev, 5, PTP_TX_EGRESS_SEC_HI);
+ *seconds = lanphy_read_page_reg(phydev, LAN8814_PAGE_PORT_REGS,
+ PTP_TX_EGRESS_SEC_HI);
*seconds = *seconds << 16 |
- lanphy_read_page_reg(phydev, 5, PTP_TX_EGRESS_SEC_LO);
+ lanphy_read_page_reg(phydev, LAN8814_PAGE_PORT_REGS,
+ PTP_TX_EGRESS_SEC_LO);
- *nano_seconds = lanphy_read_page_reg(phydev, 5, PTP_TX_EGRESS_NS_HI);
+ *nano_seconds = lanphy_read_page_reg(phydev, LAN8814_PAGE_PORT_REGS,
+ PTP_TX_EGRESS_NS_HI);
*nano_seconds = ((*nano_seconds & 0x3fff) << 16) |
- lanphy_read_page_reg(phydev, 5, PTP_TX_EGRESS_NS_LO);
+ lanphy_read_page_reg(phydev, LAN8814_PAGE_PORT_REGS,
+ PTP_TX_EGRESS_NS_LO);
- *seq_id = lanphy_read_page_reg(phydev, 5, PTP_TX_MSG_HEADER2);
+ *seq_id = lanphy_read_page_reg(phydev, LAN8814_PAGE_PORT_REGS,
+ PTP_TX_MSG_HEADER2);
}
static int lan8814_ts_info(struct mii_timestamper *mii_ts, struct kernel_ethtool_ts_info *info)
@@ -2912,11 +3005,11 @@ static void lan8814_flush_fifo(struct phy_device *phydev, bool egress)
int i;
for (i = 0; i < FIFO_SIZE; ++i)
- lanphy_read_page_reg(phydev, 5,
+ lanphy_read_page_reg(phydev, LAN8814_PAGE_PORT_REGS,
egress ? PTP_TX_MSG_HEADER2 : PTP_RX_MSG_HEADER2);
/* Read to clear overflow status bit */
- lanphy_read_page_reg(phydev, 5, PTP_TSU_INT_STS);
+ lanphy_read_page_reg(phydev, LAN8814_PAGE_PORT_REGS, PTP_TSU_INT_STS);
}
static int lan8814_hwtstamp(struct mii_timestamper *mii_ts,
@@ -2928,7 +3021,6 @@ static int lan8814_hwtstamp(struct mii_timestamper *mii_ts,
struct lan8814_ptp_rx_ts *rx_ts, *tmp;
int txcfg = 0, rxcfg = 0;
int pkt_ts_enable;
- int tx_mod;
ptp_priv->hwts_tx_type = config->tx_type;
ptp_priv->rx_filter = config->rx_filter;
@@ -2967,21 +3059,28 @@ static int lan8814_hwtstamp(struct mii_timestamper *mii_ts,
rxcfg |= PTP_RX_PARSE_CONFIG_IPV4_EN_ | PTP_RX_PARSE_CONFIG_IPV6_EN_;
txcfg |= PTP_TX_PARSE_CONFIG_IPV4_EN_ | PTP_TX_PARSE_CONFIG_IPV6_EN_;
}
- lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_RX_PARSE_CONFIG, rxcfg);
- lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_PARSE_CONFIG, txcfg);
+ lanphy_write_page_reg(ptp_priv->phydev, LAN8814_PAGE_PORT_REGS,
+ PTP_RX_PARSE_CONFIG, rxcfg);
+ lanphy_write_page_reg(ptp_priv->phydev, LAN8814_PAGE_PORT_REGS,
+ PTP_TX_PARSE_CONFIG, txcfg);
pkt_ts_enable = PTP_TIMESTAMP_EN_SYNC_ | PTP_TIMESTAMP_EN_DREQ_ |
PTP_TIMESTAMP_EN_PDREQ_ | PTP_TIMESTAMP_EN_PDRES_;
- lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_RX_TIMESTAMP_EN, pkt_ts_enable);
- lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_TIMESTAMP_EN, pkt_ts_enable);
+ lanphy_write_page_reg(ptp_priv->phydev, LAN8814_PAGE_PORT_REGS,
+ PTP_RX_TIMESTAMP_EN, pkt_ts_enable);
+ lanphy_write_page_reg(ptp_priv->phydev, LAN8814_PAGE_PORT_REGS,
+ PTP_TX_TIMESTAMP_EN, pkt_ts_enable);
- tx_mod = lanphy_read_page_reg(ptp_priv->phydev, 5, PTP_TX_MOD);
if (ptp_priv->hwts_tx_type == HWTSTAMP_TX_ONESTEP_SYNC) {
- lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_MOD,
- tx_mod | PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_);
+ lanphy_modify_page_reg(ptp_priv->phydev, LAN8814_PAGE_PORT_REGS,
+ PTP_TX_MOD,
+ PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_,
+ PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_);
} else if (ptp_priv->hwts_tx_type == HWTSTAMP_TX_ON) {
- lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_MOD,
- tx_mod & ~PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_);
+ lanphy_modify_page_reg(ptp_priv->phydev, LAN8814_PAGE_PORT_REGS,
+ PTP_TX_MOD,
+ PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_,
+ 0);
}
if (config->rx_filter != HWTSTAMP_FILTER_NONE)
@@ -3103,29 +3202,41 @@ static bool lan8814_rxtstamp(struct mii_timestamper *mii_ts, struct sk_buff *skb
static void lan8814_ptp_clock_set(struct phy_device *phydev,
time64_t sec, u32 nsec)
{
- lanphy_write_page_reg(phydev, 4, PTP_CLOCK_SET_SEC_LO, lower_16_bits(sec));
- lanphy_write_page_reg(phydev, 4, PTP_CLOCK_SET_SEC_MID, upper_16_bits(sec));
- lanphy_write_page_reg(phydev, 4, PTP_CLOCK_SET_SEC_HI, upper_32_bits(sec));
- lanphy_write_page_reg(phydev, 4, PTP_CLOCK_SET_NS_LO, lower_16_bits(nsec));
- lanphy_write_page_reg(phydev, 4, PTP_CLOCK_SET_NS_HI, upper_16_bits(nsec));
+ lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ PTP_CLOCK_SET_SEC_LO, lower_16_bits(sec));
+ lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ PTP_CLOCK_SET_SEC_MID, upper_16_bits(sec));
+ lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ PTP_CLOCK_SET_SEC_HI, upper_32_bits(sec));
+ lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ PTP_CLOCK_SET_NS_LO, lower_16_bits(nsec));
+ lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ PTP_CLOCK_SET_NS_HI, upper_16_bits(nsec));
- lanphy_write_page_reg(phydev, 4, PTP_CMD_CTL, PTP_CMD_CTL_PTP_CLOCK_LOAD_);
+ lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, PTP_CMD_CTL,
+ PTP_CMD_CTL_PTP_CLOCK_LOAD_);
}
static void lan8814_ptp_clock_get(struct phy_device *phydev,
time64_t *sec, u32 *nsec)
{
- lanphy_write_page_reg(phydev, 4, PTP_CMD_CTL, PTP_CMD_CTL_PTP_CLOCK_READ_);
+ lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, PTP_CMD_CTL,
+ PTP_CMD_CTL_PTP_CLOCK_READ_);
- *sec = lanphy_read_page_reg(phydev, 4, PTP_CLOCK_READ_SEC_HI);
+ *sec = lanphy_read_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ PTP_CLOCK_READ_SEC_HI);
*sec <<= 16;
- *sec |= lanphy_read_page_reg(phydev, 4, PTP_CLOCK_READ_SEC_MID);
+ *sec |= lanphy_read_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ PTP_CLOCK_READ_SEC_MID);
*sec <<= 16;
- *sec |= lanphy_read_page_reg(phydev, 4, PTP_CLOCK_READ_SEC_LO);
+ *sec |= lanphy_read_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ PTP_CLOCK_READ_SEC_LO);
- *nsec = lanphy_read_page_reg(phydev, 4, PTP_CLOCK_READ_NS_HI);
+ *nsec = lanphy_read_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ PTP_CLOCK_READ_NS_HI);
*nsec <<= 16;
- *nsec |= lanphy_read_page_reg(phydev, 4, PTP_CLOCK_READ_NS_LO);
+ *nsec |= lanphy_read_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ PTP_CLOCK_READ_NS_LO);
}
static int lan8814_ptpci_gettime64(struct ptp_clock_info *ptpci,
@@ -3164,14 +3275,18 @@ static void lan8814_ptp_set_target(struct phy_device *phydev, int event,
s64 start_sec, u32 start_nsec)
{
/* Set the start time */
- lanphy_write_page_reg(phydev, 4, LAN8814_PTP_CLOCK_TARGET_SEC_LO(event),
+ lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ LAN8814_PTP_CLOCK_TARGET_SEC_LO(event),
lower_16_bits(start_sec));
- lanphy_write_page_reg(phydev, 4, LAN8814_PTP_CLOCK_TARGET_SEC_HI(event),
+ lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ LAN8814_PTP_CLOCK_TARGET_SEC_HI(event),
upper_16_bits(start_sec));
- lanphy_write_page_reg(phydev, 4, LAN8814_PTP_CLOCK_TARGET_NS_LO(event),
+ lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ LAN8814_PTP_CLOCK_TARGET_NS_LO(event),
lower_16_bits(start_nsec));
- lanphy_write_page_reg(phydev, 4, LAN8814_PTP_CLOCK_TARGET_NS_HI(event),
+ lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ LAN8814_PTP_CLOCK_TARGET_NS_HI(event),
upper_16_bits(start_nsec) & 0x3fff);
}
@@ -3269,9 +3384,11 @@ static void lan8814_ptp_clock_step(struct phy_device *phydev,
adjustment_value_lo = adjustment_value & 0xffff;
adjustment_value_hi = (adjustment_value >> 16) & 0x3fff;
- lanphy_write_page_reg(phydev, 4, PTP_LTC_STEP_ADJ_LO,
+ lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ PTP_LTC_STEP_ADJ_LO,
adjustment_value_lo);
- lanphy_write_page_reg(phydev, 4, PTP_LTC_STEP_ADJ_HI,
+ lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ PTP_LTC_STEP_ADJ_HI,
PTP_LTC_STEP_ADJ_DIR_ |
adjustment_value_hi);
seconds -= ((s32)adjustment_value);
@@ -3289,9 +3406,11 @@ static void lan8814_ptp_clock_step(struct phy_device *phydev,
adjustment_value_lo = adjustment_value & 0xffff;
adjustment_value_hi = (adjustment_value >> 16) & 0x3fff;
- lanphy_write_page_reg(phydev, 4, PTP_LTC_STEP_ADJ_LO,
+ lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ PTP_LTC_STEP_ADJ_LO,
adjustment_value_lo);
- lanphy_write_page_reg(phydev, 4, PTP_LTC_STEP_ADJ_HI,
+ lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ PTP_LTC_STEP_ADJ_HI,
adjustment_value_hi);
seconds += ((s32)adjustment_value);
@@ -3299,8 +3418,8 @@ static void lan8814_ptp_clock_step(struct phy_device *phydev,
set_seconds += adjustment_value;
lan8814_ptp_update_target(phydev, set_seconds);
}
- lanphy_write_page_reg(phydev, 4, PTP_CMD_CTL,
- PTP_CMD_CTL_PTP_LTC_STEP_SEC_);
+ lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ PTP_CMD_CTL, PTP_CMD_CTL_PTP_LTC_STEP_SEC_);
}
if (nano_seconds) {
u16 nano_seconds_lo;
@@ -3309,12 +3428,14 @@ static void lan8814_ptp_clock_step(struct phy_device *phydev,
nano_seconds_lo = nano_seconds & 0xffff;
nano_seconds_hi = (nano_seconds >> 16) & 0x3fff;
- lanphy_write_page_reg(phydev, 4, PTP_LTC_STEP_ADJ_LO,
+ lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ PTP_LTC_STEP_ADJ_LO,
nano_seconds_lo);
- lanphy_write_page_reg(phydev, 4, PTP_LTC_STEP_ADJ_HI,
+ lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ PTP_LTC_STEP_ADJ_HI,
PTP_LTC_STEP_ADJ_DIR_ |
nano_seconds_hi);
- lanphy_write_page_reg(phydev, 4, PTP_CMD_CTL,
+ lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, PTP_CMD_CTL,
PTP_CMD_CTL_PTP_LTC_STEP_NSEC_);
}
}
@@ -3356,8 +3477,10 @@ static int lan8814_ptpci_adjfine(struct ptp_clock_info *ptpci, long scaled_ppm)
kszphy_rate_adj_hi |= PTP_CLOCK_RATE_ADJ_DIR_;
mutex_lock(&shared->shared_lock);
- lanphy_write_page_reg(phydev, 4, PTP_CLOCK_RATE_ADJ_HI, kszphy_rate_adj_hi);
- lanphy_write_page_reg(phydev, 4, PTP_CLOCK_RATE_ADJ_LO, kszphy_rate_adj_lo);
+ lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, PTP_CLOCK_RATE_ADJ_HI,
+ kszphy_rate_adj_hi);
+ lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, PTP_CLOCK_RATE_ADJ_LO,
+ kszphy_rate_adj_lo);
mutex_unlock(&shared->shared_lock);
return 0;
@@ -3366,17 +3489,17 @@ static int lan8814_ptpci_adjfine(struct ptp_clock_info *ptpci, long scaled_ppm)
static void lan8814_ptp_set_reload(struct phy_device *phydev, int event,
s64 period_sec, u32 period_nsec)
{
- lanphy_write_page_reg(phydev, 4,
+ lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
LAN8814_PTP_CLOCK_TARGET_RELOAD_SEC_LO(event),
lower_16_bits(period_sec));
- lanphy_write_page_reg(phydev, 4,
+ lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
LAN8814_PTP_CLOCK_TARGET_RELOAD_SEC_HI(event),
upper_16_bits(period_sec));
- lanphy_write_page_reg(phydev, 4,
+ lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
LAN8814_PTP_CLOCK_TARGET_RELOAD_NS_LO(event),
lower_16_bits(period_nsec));
- lanphy_write_page_reg(phydev, 4,
+ lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
LAN8814_PTP_CLOCK_TARGET_RELOAD_NS_HI(event),
upper_16_bits(period_nsec) & 0x3fff);
}
@@ -3384,73 +3507,72 @@ static void lan8814_ptp_set_reload(struct phy_device *phydev, int event,
static void lan8814_ptp_enable_event(struct phy_device *phydev, int event,
int pulse_width)
{
- u16 val;
-
- val = lanphy_read_page_reg(phydev, 4, LAN8814_PTP_GENERAL_CONFIG);
- /* Set the pulse width of the event */
- val &= ~(LAN8814_PTP_GENERAL_CONFIG_LTC_EVENT_MASK(event));
- /* Make sure that the target clock will be incremented each time when
+ /* Set the pulse width of the event,
+ * Make sure that the target clock will be incremented each time when
* local time reaches or pass it
+ * Set the polarity high
*/
- val |= LAN8814_PTP_GENERAL_CONFIG_LTC_EVENT_SET(event, pulse_width);
- val &= ~(LAN8814_PTP_GENERAL_CONFIG_RELOAD_ADD_X(event));
- /* Set the polarity high */
- val |= LAN8814_PTP_GENERAL_CONFIG_POLARITY_X(event);
- lanphy_write_page_reg(phydev, 4, LAN8814_PTP_GENERAL_CONFIG, val);
+ lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, LAN8814_PTP_GENERAL_CONFIG,
+ LAN8814_PTP_GENERAL_CONFIG_LTC_EVENT_MASK(event) |
+ LAN8814_PTP_GENERAL_CONFIG_LTC_EVENT_SET(event, pulse_width) |
+ LAN8814_PTP_GENERAL_CONFIG_RELOAD_ADD_X(event) |
+ LAN8814_PTP_GENERAL_CONFIG_POLARITY_X(event),
+ LAN8814_PTP_GENERAL_CONFIG_LTC_EVENT_SET(event, pulse_width) |
+ LAN8814_PTP_GENERAL_CONFIG_POLARITY_X(event));
}
static void lan8814_ptp_disable_event(struct phy_device *phydev, int event)
{
- u16 val;
-
/* Set target to too far in the future, effectively disabling it */
lan8814_ptp_set_target(phydev, event, 0xFFFFFFFF, 0);
/* And then reload once it recheas the target */
- val = lanphy_read_page_reg(phydev, 4, LAN8814_PTP_GENERAL_CONFIG);
- val |= LAN8814_PTP_GENERAL_CONFIG_RELOAD_ADD_X(event);
- lanphy_write_page_reg(phydev, 4, LAN8814_PTP_GENERAL_CONFIG, val);
+ lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, LAN8814_PTP_GENERAL_CONFIG,
+ LAN8814_PTP_GENERAL_CONFIG_RELOAD_ADD_X(event),
+ LAN8814_PTP_GENERAL_CONFIG_RELOAD_ADD_X(event));
}
static void lan8814_ptp_perout_off(struct phy_device *phydev, int pin)
{
- u16 val;
-
/* Disable gpio alternate function,
* 1: select as gpio,
* 0: select alt func
*/
- val = lanphy_read_page_reg(phydev, 4, LAN8814_GPIO_EN_ADDR(pin));
- val |= LAN8814_GPIO_EN_BIT(pin);
- lanphy_write_page_reg(phydev, 4, LAN8814_GPIO_EN_ADDR(pin), val);
+ lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ LAN8814_GPIO_EN_ADDR(pin),
+ LAN8814_GPIO_EN_BIT(pin),
+ LAN8814_GPIO_EN_BIT(pin));
- val = lanphy_read_page_reg(phydev, 4, LAN8814_GPIO_DIR_ADDR(pin));
- val &= ~LAN8814_GPIO_DIR_BIT(pin);
- lanphy_write_page_reg(phydev, 4, LAN8814_GPIO_DIR_ADDR(pin), val);
+ lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ LAN8814_GPIO_DIR_ADDR(pin),
+ LAN8814_GPIO_DIR_BIT(pin),
+ 0);
- val = lanphy_read_page_reg(phydev, 4, LAN8814_GPIO_BUF_ADDR(pin));
- val &= ~LAN8814_GPIO_BUF_BIT(pin);
- lanphy_write_page_reg(phydev, 4, LAN8814_GPIO_BUF_ADDR(pin), val);
+ lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ LAN8814_GPIO_BUF_ADDR(pin),
+ LAN8814_GPIO_BUF_BIT(pin),
+ 0);
}
static void lan8814_ptp_perout_on(struct phy_device *phydev, int pin)
{
- int val;
-
/* Set as gpio output */
- val = lanphy_read_page_reg(phydev, 4, LAN8814_GPIO_DIR_ADDR(pin));
- val |= LAN8814_GPIO_DIR_BIT(pin);
- lanphy_write_page_reg(phydev, 4, LAN8814_GPIO_DIR_ADDR(pin), val);
+ lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ LAN8814_GPIO_DIR_ADDR(pin),
+ LAN8814_GPIO_DIR_BIT(pin),
+ LAN8814_GPIO_DIR_BIT(pin));
/* Enable gpio 0:for alternate function, 1:gpio */
- val = lanphy_read_page_reg(phydev, 4, LAN8814_GPIO_EN_ADDR(pin));
- val &= ~LAN8814_GPIO_EN_BIT(pin);
- lanphy_write_page_reg(phydev, 4, LAN8814_GPIO_EN_ADDR(pin), val);
+ lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ LAN8814_GPIO_EN_ADDR(pin),
+ LAN8814_GPIO_EN_BIT(pin),
+ 0);
/* Set buffer type to push pull */
- val = lanphy_read_page_reg(phydev, 4, LAN8814_GPIO_BUF_ADDR(pin));
- val |= LAN8814_GPIO_BUF_BIT(pin);
- lanphy_write_page_reg(phydev, 4, LAN8814_GPIO_BUF_ADDR(pin), val);
+ lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ LAN8814_GPIO_BUF_ADDR(pin),
+ LAN8814_GPIO_BUF_BIT(pin),
+ LAN8814_GPIO_BUF_BIT(pin));
}
static int lan8814_ptp_perout(struct ptp_clock_info *ptpci,
@@ -3565,61 +3687,64 @@ static int lan8814_ptp_perout(struct ptp_clock_info *ptpci,
static void lan8814_ptp_extts_on(struct phy_device *phydev, int pin, u32 flags)
{
- u16 tmp;
-
/* Set as gpio input */
- tmp = lanphy_read_page_reg(phydev, 4, LAN8814_GPIO_DIR_ADDR(pin));
- tmp &= ~LAN8814_GPIO_DIR_BIT(pin);
- lanphy_write_page_reg(phydev, 4, LAN8814_GPIO_DIR_ADDR(pin), tmp);
+ lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ LAN8814_GPIO_DIR_ADDR(pin),
+ LAN8814_GPIO_DIR_BIT(pin),
+ 0);
/* Map the pin to ltc pin 0 of the capture map registers */
- tmp = lanphy_read_page_reg(phydev, 4, PTP_GPIO_CAP_MAP_LO);
- tmp |= pin;
- lanphy_write_page_reg(phydev, 4, PTP_GPIO_CAP_MAP_LO, tmp);
+ lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ PTP_GPIO_CAP_MAP_LO, pin, pin);
/* Enable capture on the edges of the ltc pin */
- tmp = lanphy_read_page_reg(phydev, 4, PTP_GPIO_CAP_EN);
if (flags & PTP_RISING_EDGE)
- tmp |= PTP_GPIO_CAP_EN_GPIO_RE_CAPTURE_ENABLE(0);
+ lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ PTP_GPIO_CAP_EN,
+ PTP_GPIO_CAP_EN_GPIO_RE_CAPTURE_ENABLE(0),
+ PTP_GPIO_CAP_EN_GPIO_RE_CAPTURE_ENABLE(0));
if (flags & PTP_FALLING_EDGE)
- tmp |= PTP_GPIO_CAP_EN_GPIO_FE_CAPTURE_ENABLE(0);
- lanphy_write_page_reg(phydev, 4, PTP_GPIO_CAP_EN, tmp);
+ lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ PTP_GPIO_CAP_EN,
+ PTP_GPIO_CAP_EN_GPIO_FE_CAPTURE_ENABLE(0),
+ PTP_GPIO_CAP_EN_GPIO_FE_CAPTURE_ENABLE(0));
/* Enable interrupt top interrupt */
- tmp = lanphy_read_page_reg(phydev, 4, PTP_COMMON_INT_ENA);
- tmp |= PTP_COMMON_INT_ENA_GPIO_CAP_EN;
- lanphy_write_page_reg(phydev, 4, PTP_COMMON_INT_ENA, tmp);
+ lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, PTP_COMMON_INT_ENA,
+ PTP_COMMON_INT_ENA_GPIO_CAP_EN,
+ PTP_COMMON_INT_ENA_GPIO_CAP_EN);
}
static void lan8814_ptp_extts_off(struct phy_device *phydev, int pin)
{
- u16 tmp;
-
/* Set as gpio out */
- tmp = lanphy_read_page_reg(phydev, 4, LAN8814_GPIO_DIR_ADDR(pin));
- tmp |= LAN8814_GPIO_DIR_BIT(pin);
- lanphy_write_page_reg(phydev, 4, LAN8814_GPIO_DIR_ADDR(pin), tmp);
+ lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ LAN8814_GPIO_DIR_ADDR(pin),
+ LAN8814_GPIO_DIR_BIT(pin),
+ LAN8814_GPIO_DIR_BIT(pin));
/* Enable alternate, 0:for alternate function, 1:gpio */
- tmp = lanphy_read_page_reg(phydev, 4, LAN8814_GPIO_EN_ADDR(pin));
- tmp &= ~LAN8814_GPIO_EN_BIT(pin);
- lanphy_write_page_reg(phydev, 4, LAN8814_GPIO_EN_ADDR(pin), tmp);
+ lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ LAN8814_GPIO_EN_ADDR(pin),
+ LAN8814_GPIO_EN_BIT(pin),
+ 0);
/* Clear the mapping of pin to registers 0 of the capture registers */
- tmp = lanphy_read_page_reg(phydev, 4, PTP_GPIO_CAP_MAP_LO);
- tmp &= ~GENMASK(3, 0);
- lanphy_write_page_reg(phydev, 4, PTP_GPIO_CAP_MAP_LO, tmp);
+ lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ PTP_GPIO_CAP_MAP_LO,
+ GENMASK(3, 0),
+ 0);
/* Disable capture on both of the edges */
- tmp = lanphy_read_page_reg(phydev, 4, PTP_GPIO_CAP_EN);
- tmp &= ~PTP_GPIO_CAP_EN_GPIO_RE_CAPTURE_ENABLE(pin);
- tmp &= ~PTP_GPIO_CAP_EN_GPIO_FE_CAPTURE_ENABLE(pin);
- lanphy_write_page_reg(phydev, 4, PTP_GPIO_CAP_EN, tmp);
+ lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, PTP_GPIO_CAP_EN,
+ PTP_GPIO_CAP_EN_GPIO_RE_CAPTURE_ENABLE(pin) |
+ PTP_GPIO_CAP_EN_GPIO_FE_CAPTURE_ENABLE(pin),
+ 0);
/* Disable interrupt top interrupt */
- tmp = lanphy_read_page_reg(phydev, 4, PTP_COMMON_INT_ENA);
- tmp &= ~PTP_COMMON_INT_ENA_GPIO_CAP_EN;
- lanphy_write_page_reg(phydev, 4, PTP_COMMON_INT_ENA, tmp);
+ lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, PTP_COMMON_INT_ENA,
+ PTP_COMMON_INT_ENA_GPIO_CAP_EN,
+ 0);
}
static int lan8814_ptp_extts(struct ptp_clock_info *ptpci,
@@ -3749,7 +3874,8 @@ static void lan8814_get_tx_ts(struct kszphy_ptp_priv *ptp_priv)
/* If other timestamps are available in the FIFO,
* process them.
*/
- reg = lanphy_read_page_reg(phydev, 5, PTP_CAP_INFO);
+ reg = lanphy_read_page_reg(phydev, LAN8814_PAGE_PORT_REGS,
+ PTP_CAP_INFO);
} while (PTP_CAP_INFO_TX_TS_CNT_GET_(reg) > 0);
}
@@ -3822,7 +3948,8 @@ static void lan8814_get_rx_ts(struct kszphy_ptp_priv *ptp_priv)
/* If other timestamps are available in the FIFO,
* process them.
*/
- reg = lanphy_read_page_reg(phydev, 5, PTP_CAP_INFO);
+ reg = lanphy_read_page_reg(phydev, LAN8814_PAGE_PORT_REGS,
+ PTP_CAP_INFO);
} while (PTP_CAP_INFO_RX_TS_CNT_GET_(reg) > 0);
}
@@ -3859,31 +3986,40 @@ static int lan8814_gpio_process_cap(struct lan8814_shared_priv *shared)
/* This is 0 because whatever was the input pin it was mapped it to
* ltc gpio pin 0
*/
- tmp = lanphy_read_page_reg(phydev, 4, PTP_GPIO_SEL);
- tmp |= PTP_GPIO_SEL_GPIO_SEL(0);
- lanphy_write_page_reg(phydev, 4, PTP_GPIO_SEL, tmp);
+ lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, PTP_GPIO_SEL,
+ PTP_GPIO_SEL_GPIO_SEL(0),
+ PTP_GPIO_SEL_GPIO_SEL(0));
- tmp = lanphy_read_page_reg(phydev, 4, PTP_GPIO_CAP_STS);
+ tmp = lanphy_read_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ PTP_GPIO_CAP_STS);
if (!(tmp & PTP_GPIO_CAP_STS_PTP_GPIO_RE_STS(0)) &&
!(tmp & PTP_GPIO_CAP_STS_PTP_GPIO_FE_STS(0)))
return -1;
if (tmp & BIT(0)) {
- sec = lanphy_read_page_reg(phydev, 4, PTP_GPIO_RE_LTC_SEC_HI_CAP);
+ sec = lanphy_read_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ PTP_GPIO_RE_LTC_SEC_HI_CAP);
sec <<= 16;
- sec |= lanphy_read_page_reg(phydev, 4, PTP_GPIO_RE_LTC_SEC_LO_CAP);
+ sec |= lanphy_read_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ PTP_GPIO_RE_LTC_SEC_LO_CAP);
- nsec = lanphy_read_page_reg(phydev, 4, PTP_GPIO_RE_LTC_NS_HI_CAP) & 0x3fff;
+ nsec = lanphy_read_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ PTP_GPIO_RE_LTC_NS_HI_CAP) & 0x3fff;
nsec <<= 16;
- nsec |= lanphy_read_page_reg(phydev, 4, PTP_GPIO_RE_LTC_NS_LO_CAP);
+ nsec |= lanphy_read_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ PTP_GPIO_RE_LTC_NS_LO_CAP);
} else {
- sec = lanphy_read_page_reg(phydev, 4, PTP_GPIO_FE_LTC_SEC_HI_CAP);
+ sec = lanphy_read_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ PTP_GPIO_FE_LTC_SEC_HI_CAP);
sec <<= 16;
- sec |= lanphy_read_page_reg(phydev, 4, PTP_GPIO_FE_LTC_SEC_LO_CAP);
+ sec |= lanphy_read_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ PTP_GPIO_FE_LTC_SEC_LO_CAP);
- nsec = lanphy_read_page_reg(phydev, 4, PTP_GPIO_FE_LTC_NS_HI_CAP) & 0x3fff;
+ nsec = lanphy_read_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ PTP_GPIO_FE_LTC_NS_HI_CAP) & 0x3fff;
nsec <<= 16;
- nsec |= lanphy_read_page_reg(phydev, 4, PTP_GPIO_RE_LTC_NS_LO_CAP);
+ nsec |= lanphy_read_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ PTP_GPIO_RE_LTC_NS_LO_CAP);
}
ptp_event.index = 0;
@@ -3908,19 +4044,17 @@ static int lan8814_handle_gpio_interrupt(struct phy_device *phydev, u16 status)
static int lan8804_config_init(struct phy_device *phydev)
{
- int val;
-
/* MDI-X setting for swap A,B transmit */
- val = lanphy_read_page_reg(phydev, 2, LAN8804_ALIGN_SWAP);
- val &= ~LAN8804_ALIGN_TX_A_B_SWAP_MASK;
- val |= LAN8804_ALIGN_TX_A_B_SWAP;
- lanphy_write_page_reg(phydev, 2, LAN8804_ALIGN_SWAP, val);
+ lanphy_modify_page_reg(phydev, LAN8814_PAGE_PCS_DIGITAL, LAN8804_ALIGN_SWAP,
+ LAN8804_ALIGN_TX_A_B_SWAP_MASK,
+ LAN8804_ALIGN_TX_A_B_SWAP);
/* Make sure that the PHY will not stop generating the clock when the
* link partner goes down
*/
- lanphy_write_page_reg(phydev, 31, LAN8814_CLOCK_MANAGEMENT, 0x27e);
- lanphy_read_page_reg(phydev, 1, LAN8814_LINK_QUALITY);
+ lanphy_write_page_reg(phydev, LAN8814_PAGE_SYSTEM_CTRL,
+ LAN8814_CLOCK_MANAGEMENT, 0x27e);
+ lanphy_read_page_reg(phydev, LAN8814_PAGE_AFE_PMA, LAN8814_LINK_QUALITY);
return 0;
}
@@ -4002,7 +4136,8 @@ static irqreturn_t lan8814_handle_interrupt(struct phy_device *phydev)
}
while (true) {
- irq_status = lanphy_read_page_reg(phydev, 5, PTP_TSU_INT_STS);
+ irq_status = lanphy_read_page_reg(phydev, LAN8814_PAGE_PORT_REGS,
+ PTP_TSU_INT_STS);
if (!irq_status)
break;
@@ -4030,7 +4165,7 @@ static int lan8814_config_intr(struct phy_device *phydev)
{
int err;
- lanphy_write_page_reg(phydev, 4, LAN8814_INTR_CTRL_REG,
+ lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, LAN8814_INTR_CTRL_REG,
LAN8814_INTR_CTRL_REG_POLARITY |
LAN8814_INTR_CTRL_REG_INTR_ENABLE);
@@ -4056,35 +4191,41 @@ static void lan8814_ptp_init(struct phy_device *phydev)
{
struct kszphy_priv *priv = phydev->priv;
struct kszphy_ptp_priv *ptp_priv = &priv->ptp_priv;
- u32 temp;
if (!IS_ENABLED(CONFIG_PTP_1588_CLOCK) ||
!IS_ENABLED(CONFIG_NETWORK_PHY_TIMESTAMPING))
return;
- lanphy_write_page_reg(phydev, 5, TSU_HARD_RESET, TSU_HARD_RESET_);
+ lanphy_write_page_reg(phydev, LAN8814_PAGE_PORT_REGS,
+ TSU_HARD_RESET, TSU_HARD_RESET_);
- temp = lanphy_read_page_reg(phydev, 5, PTP_TX_MOD);
- temp |= PTP_TX_MOD_BAD_UDPV4_CHKSUM_FORCE_FCS_DIS_;
- lanphy_write_page_reg(phydev, 5, PTP_TX_MOD, temp);
+ lanphy_modify_page_reg(phydev, LAN8814_PAGE_PORT_REGS, PTP_TX_MOD,
+ PTP_TX_MOD_BAD_UDPV4_CHKSUM_FORCE_FCS_DIS_,
+ PTP_TX_MOD_BAD_UDPV4_CHKSUM_FORCE_FCS_DIS_);
- temp = lanphy_read_page_reg(phydev, 5, PTP_RX_MOD);
- temp |= PTP_RX_MOD_BAD_UDPV4_CHKSUM_FORCE_FCS_DIS_;
- lanphy_write_page_reg(phydev, 5, PTP_RX_MOD, temp);
+ lanphy_modify_page_reg(phydev, LAN8814_PAGE_PORT_REGS, PTP_RX_MOD,
+ PTP_RX_MOD_BAD_UDPV4_CHKSUM_FORCE_FCS_DIS_,
+ PTP_RX_MOD_BAD_UDPV4_CHKSUM_FORCE_FCS_DIS_);
- lanphy_write_page_reg(phydev, 5, PTP_RX_PARSE_CONFIG, 0);
- lanphy_write_page_reg(phydev, 5, PTP_TX_PARSE_CONFIG, 0);
+ lanphy_write_page_reg(phydev, LAN8814_PAGE_PORT_REGS,
+ PTP_RX_PARSE_CONFIG, 0);
+ lanphy_write_page_reg(phydev, LAN8814_PAGE_PORT_REGS,
+ PTP_TX_PARSE_CONFIG, 0);
/* Removing default registers configs related to L2 and IP */
- lanphy_write_page_reg(phydev, 5, PTP_TX_PARSE_L2_ADDR_EN, 0);
- lanphy_write_page_reg(phydev, 5, PTP_RX_PARSE_L2_ADDR_EN, 0);
- lanphy_write_page_reg(phydev, 5, PTP_TX_PARSE_IP_ADDR_EN, 0);
- lanphy_write_page_reg(phydev, 5, PTP_RX_PARSE_IP_ADDR_EN, 0);
+ lanphy_write_page_reg(phydev, LAN8814_PAGE_PORT_REGS,
+ PTP_TX_PARSE_L2_ADDR_EN, 0);
+ lanphy_write_page_reg(phydev, LAN8814_PAGE_PORT_REGS,
+ PTP_RX_PARSE_L2_ADDR_EN, 0);
+ lanphy_write_page_reg(phydev, LAN8814_PAGE_PORT_REGS,
+ PTP_TX_PARSE_IP_ADDR_EN, 0);
+ lanphy_write_page_reg(phydev, LAN8814_PAGE_PORT_REGS,
+ PTP_RX_PARSE_IP_ADDR_EN, 0);
/* Disable checking for minorVersionPTP field */
- lanphy_write_page_reg(phydev, 5, PTP_RX_VERSION,
+ lanphy_write_page_reg(phydev, LAN8814_PAGE_PORT_REGS, PTP_RX_VERSION,
PTP_MAX_VERSION(0xff) | PTP_MIN_VERSION(0x0));
- lanphy_write_page_reg(phydev, 5, PTP_TX_VERSION,
+ lanphy_write_page_reg(phydev, LAN8814_PAGE_PORT_REGS, PTP_TX_VERSION,
PTP_MAX_VERSION(0xff) | PTP_MIN_VERSION(0x0));
skb_queue_head_init(&ptp_priv->tx_queue);
@@ -4105,7 +4246,8 @@ static void lan8814_ptp_init(struct phy_device *phydev)
phydev->default_timestamp = true;
}
-static int lan8814_ptp_probe_once(struct phy_device *phydev)
+static int __lan8814_ptp_probe_once(struct phy_device *phydev, char *pin_name,
+ int gpios)
{
struct lan8814_shared_priv *shared = phy_package_get_priv(phydev);
@@ -4113,18 +4255,18 @@ static int lan8814_ptp_probe_once(struct phy_device *phydev)
mutex_init(&shared->shared_lock);
shared->pin_config = devm_kmalloc_array(&phydev->mdio.dev,
- LAN8814_PTP_GPIO_NUM,
+ gpios,
sizeof(*shared->pin_config),
GFP_KERNEL);
if (!shared->pin_config)
return -ENOMEM;
- for (int i = 0; i < LAN8814_PTP_GPIO_NUM; i++) {
+ for (int i = 0; i < gpios; i++) {
struct ptp_pin_desc *ptp_pin = &shared->pin_config[i];
memset(ptp_pin, 0, sizeof(*ptp_pin));
snprintf(ptp_pin->name,
- sizeof(ptp_pin->name), "lan8814_ptp_pin_%02d", i);
+ sizeof(ptp_pin->name), "%s_%02d", pin_name, i);
ptp_pin->index = i;
ptp_pin->func = PTP_PF_NONE;
}
@@ -4134,7 +4276,7 @@ static int lan8814_ptp_probe_once(struct phy_device *phydev)
shared->ptp_clock_info.max_adj = 31249999;
shared->ptp_clock_info.n_alarm = 0;
shared->ptp_clock_info.n_ext_ts = LAN8814_PTP_EXTTS_NUM;
- shared->ptp_clock_info.n_pins = LAN8814_PTP_GPIO_NUM;
+ shared->ptp_clock_info.n_pins = gpios;
shared->ptp_clock_info.pps = 0;
shared->ptp_clock_info.supported_extts_flags = PTP_RISING_EDGE |
PTP_FALLING_EDGE |
@@ -4169,50 +4311,60 @@ static int lan8814_ptp_probe_once(struct phy_device *phydev)
/* The EP.4 is shared between all the PHYs in the package and also it
* can be accessed by any of the PHYs
*/
- lanphy_write_page_reg(phydev, 4, LTC_HARD_RESET, LTC_HARD_RESET_);
- lanphy_write_page_reg(phydev, 4, PTP_OPERATING_MODE,
+ lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ LTC_HARD_RESET, LTC_HARD_RESET_);
+ lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, PTP_OPERATING_MODE,
PTP_OPERATING_MODE_STANDALONE_);
/* Enable ptp to run LTC clock for ptp and gpio 1PPS operation */
- lanphy_write_page_reg(phydev, 4, PTP_CMD_CTL, PTP_CMD_CTL_PTP_ENABLE_);
+ lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, PTP_CMD_CTL,
+ PTP_CMD_CTL_PTP_ENABLE_);
return 0;
}
+static int lan8814_ptp_probe_once(struct phy_device *phydev)
+{
+ return __lan8814_ptp_probe_once(phydev, "lan8814_ptp_pin",
+ LAN8814_PTP_GPIO_NUM);
+}
+
static void lan8814_setup_led(struct phy_device *phydev, int val)
{
int temp;
- temp = lanphy_read_page_reg(phydev, 5, LAN8814_LED_CTRL_1);
+ temp = lanphy_read_page_reg(phydev, LAN8814_PAGE_PORT_REGS,
+ LAN8814_LED_CTRL_1);
if (val)
temp |= LAN8814_LED_CTRL_1_KSZ9031_LED_MODE_;
else
temp &= ~LAN8814_LED_CTRL_1_KSZ9031_LED_MODE_;
- lanphy_write_page_reg(phydev, 5, LAN8814_LED_CTRL_1, temp);
+ lanphy_write_page_reg(phydev, LAN8814_PAGE_PORT_REGS,
+ LAN8814_LED_CTRL_1, temp);
}
static int lan8814_config_init(struct phy_device *phydev)
{
struct kszphy_priv *lan8814 = phydev->priv;
- int val;
/* Reset the PHY */
- val = lanphy_read_page_reg(phydev, 4, LAN8814_QSGMII_SOFT_RESET);
- val |= LAN8814_QSGMII_SOFT_RESET_BIT;
- lanphy_write_page_reg(phydev, 4, LAN8814_QSGMII_SOFT_RESET, val);
+ lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ LAN8814_QSGMII_SOFT_RESET,
+ LAN8814_QSGMII_SOFT_RESET_BIT,
+ LAN8814_QSGMII_SOFT_RESET_BIT);
/* Disable ANEG with QSGMII PCS Host side */
- val = lanphy_read_page_reg(phydev, 5, LAN8814_QSGMII_PCS1G_ANEG_CONFIG);
- val &= ~LAN8814_QSGMII_PCS1G_ANEG_CONFIG_ANEG_ENA;
- lanphy_write_page_reg(phydev, 5, LAN8814_QSGMII_PCS1G_ANEG_CONFIG, val);
+ lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ LAN8814_QSGMII_PCS1G_ANEG_CONFIG,
+ LAN8814_QSGMII_PCS1G_ANEG_CONFIG_ANEG_ENA,
+ 0);
/* MDI-X setting for swap A,B transmit */
- val = lanphy_read_page_reg(phydev, 2, LAN8814_ALIGN_SWAP);
- val &= ~LAN8814_ALIGN_TX_A_B_SWAP_MASK;
- val |= LAN8814_ALIGN_TX_A_B_SWAP;
- lanphy_write_page_reg(phydev, 2, LAN8814_ALIGN_SWAP, val);
+ lanphy_modify_page_reg(phydev, LAN8814_PAGE_PCS_DIGITAL, LAN8814_ALIGN_SWAP,
+ LAN8814_ALIGN_TX_A_B_SWAP_MASK,
+ LAN8814_ALIGN_TX_A_B_SWAP);
if (lan8814->led_mode >= 0)
lan8814_setup_led(phydev, lan8814->led_mode);
@@ -4243,29 +4395,24 @@ static int lan8814_release_coma_mode(struct phy_device *phydev)
static void lan8814_clear_2psp_bit(struct phy_device *phydev)
{
- u16 val;
-
/* It was noticed that when traffic is passing through the PHY and the
* cable is removed then the LED was still one even though there is no
* link
*/
- val = lanphy_read_page_reg(phydev, 2, LAN8814_EEE_STATE);
- val &= ~LAN8814_EEE_STATE_MASK2P5P;
- lanphy_write_page_reg(phydev, 2, LAN8814_EEE_STATE, val);
+ lanphy_modify_page_reg(phydev, LAN8814_PAGE_PCS_DIGITAL, LAN8814_EEE_STATE,
+ LAN8814_EEE_STATE_MASK2P5P,
+ 0);
}
static void lan8814_update_meas_time(struct phy_device *phydev)
{
- u16 val;
-
/* By setting the measure time to a value of 0xb this will allow cables
* longer than 100m to be used. This configuration can be used
* regardless of the mode of operation of the PHY
*/
- val = lanphy_read_page_reg(phydev, 1, LAN8814_PD_CONTROLS);
- val &= ~LAN8814_PD_CONTROLS_PD_MEAS_TIME_MASK;
- val |= LAN8814_PD_CONTROLS_PD_MEAS_TIME_VAL;
- lanphy_write_page_reg(phydev, 1, LAN8814_PD_CONTROLS, val);
+ lanphy_modify_page_reg(phydev, LAN8814_PAGE_AFE_PMA, LAN8814_PD_CONTROLS,
+ LAN8814_PD_CONTROLS_PD_MEAS_TIME_MASK,
+ LAN8814_PD_CONTROLS_PD_MEAS_TIME_VAL);
}
static int lan8814_probe(struct phy_device *phydev)
@@ -4288,7 +4435,7 @@ static int lan8814_probe(struct phy_device *phydev)
/* Strap-in value for PHY address, below register read gives starting
* phy address value
*/
- addr = lanphy_read_page_reg(phydev, 4, 0) & 0x1F;
+ addr = lanphy_read_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, 0) & 0x1F;
devm_phy_package_join(&phydev->mdio.dev, phydev,
addr, sizeof(struct lan8814_shared_priv));
@@ -5643,10 +5790,286 @@ static int ksz9131_resume(struct phy_device *phydev)
return kszphy_resume(phydev);
}
+#define LAN8842_PTP_GPIO_NUM 16
+
+static int lan8842_ptp_probe_once(struct phy_device *phydev)
+{
+ return __lan8814_ptp_probe_once(phydev, "lan8842_ptp_pin",
+ LAN8842_PTP_GPIO_NUM);
+}
+
+#define LAN8842_STRAP_REG 0 /* 0x0 */
+#define LAN8842_STRAP_REG_PHYADDR_MASK GENMASK(4, 0)
+#define LAN8842_SKU_REG 11 /* 0x0b */
+#define LAN8842_SELF_TEST 14 /* 0x0e */
+#define LAN8842_SELF_TEST_RX_CNT_ENA BIT(8)
+#define LAN8842_SELF_TEST_TX_CNT_ENA BIT(4)
+
+static int lan8842_probe(struct phy_device *phydev)
+{
+ struct lan8842_priv *priv;
+ int addr;
+ int ret;
+
+ priv = devm_kzalloc(&phydev->mdio.dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ phydev->priv = priv;
+
+ /* Similar to lan8814 this PHY has a pin which needs to be pulled down
+ * to enable to pass any traffic through it. Therefore use the same
+ * function as lan8814
+ */
+ ret = lan8814_release_coma_mode(phydev);
+ if (ret)
+ return ret;
+
+ /* Enable to count the RX and TX packets */
+ ret = lanphy_write_page_reg(phydev, LAN8814_PAGE_PCS_DIGITAL,
+ LAN8842_SELF_TEST,
+ LAN8842_SELF_TEST_RX_CNT_ENA |
+ LAN8842_SELF_TEST_TX_CNT_ENA);
+ if (ret < 0)
+ return ret;
+
+ /* Revision lan8832 doesn't have support for PTP, therefore don't add
+ * any PTP clocks
+ */
+ ret = lanphy_read_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ LAN8842_SKU_REG);
+ if (ret < 0)
+ return ret;
+
+ priv->rev = ret;
+ if (priv->rev == LAN8842_REV_8832)
+ return 0;
+
+ /* As the lan8814 and lan8842 has the same IP for the PTP block, the
+ * only difference is the number of the GPIOs, then make sure that the
+ * lan8842 initialized also the shared data pointer as this is used in
+ * all the PTP functions for lan8814. The lan8842 doesn't have multiple
+ * PHYs in the same package.
+ */
+ addr = lanphy_read_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ LAN8842_STRAP_REG);
+ if (addr < 0)
+ return addr;
+ addr &= LAN8842_STRAP_REG_PHYADDR_MASK;
+
+ ret = devm_phy_package_join(&phydev->mdio.dev, phydev, addr,
+ sizeof(struct lan8814_shared_priv));
+ if (ret)
+ return ret;
+
+ if (phy_package_init_once(phydev)) {
+ ret = lan8842_ptp_probe_once(phydev);
+ if (ret)
+ return ret;
+ }
+
+ lan8814_ptp_init(phydev);
+
+ return 0;
+}
+
+static int lan8842_config_init(struct phy_device *phydev)
+{
+ int ret;
+
+ /* Reset the PHY */
+ ret = lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ LAN8814_QSGMII_SOFT_RESET,
+ LAN8814_QSGMII_SOFT_RESET_BIT,
+ LAN8814_QSGMII_SOFT_RESET_BIT);
+ if (ret < 0)
+ return ret;
+
+ /* To allow the PHY to control the LEDs the GPIOs of the PHY should have
+ * a function mode and not the GPIO. Apparently by default the value is
+ * GPIO and not function even though the datasheet it says that it is
+ * function. Therefore set this value.
+ */
+ return lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ LAN8814_GPIO_EN2, 0);
+}
+
+#define LAN8842_INTR_CTRL_REG 52 /* 0x34 */
+
+static int lan8842_config_intr(struct phy_device *phydev)
+{
+ int err;
+
+ lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ LAN8842_INTR_CTRL_REG,
+ LAN8814_INTR_CTRL_REG_INTR_ENABLE);
+
+ /* enable / disable interrupts */
+ if (phydev->interrupts == PHY_INTERRUPT_ENABLED) {
+ err = lan8814_ack_interrupt(phydev);
+ if (err)
+ return err;
+
+ err = phy_write(phydev, LAN8814_INTC, LAN8814_INT_LINK);
+ } else {
+ err = phy_write(phydev, LAN8814_INTC, 0);
+ if (err)
+ return err;
+
+ err = lan8814_ack_interrupt(phydev);
+ }
+
+ return err;
+}
+
+static unsigned int lan8842_inband_caps(struct phy_device *phydev,
+ phy_interface_t interface)
+{
+ /* Inband configuration can be enabled or disabled using the registers
+ * PCS1G_ANEG_CONFIG.
+ */
+ return LINK_INBAND_DISABLE | LINK_INBAND_ENABLE;
+}
+
+static int lan8842_config_inband(struct phy_device *phydev, unsigned int modes)
+{
+ bool enable;
+
+ if (modes == LINK_INBAND_DISABLE)
+ enable = false;
+ else
+ enable = true;
+
+ /* Disable or enable in-band autoneg with PCS Host side
+ * It has the same address as lan8814
+ */
+ return lanphy_modify_page_reg(phydev, LAN8814_PAGE_PORT_REGS,
+ LAN8814_QSGMII_PCS1G_ANEG_CONFIG,
+ LAN8814_QSGMII_PCS1G_ANEG_CONFIG_ANEG_ENA,
+ enable ? LAN8814_QSGMII_PCS1G_ANEG_CONFIG_ANEG_ENA : 0);
+}
+
+static void lan8842_handle_ptp_interrupt(struct phy_device *phydev, u16 status)
+{
+ struct kszphy_ptp_priv *ptp_priv;
+ struct lan8842_priv *priv;
+
+ priv = phydev->priv;
+ ptp_priv = &priv->ptp_priv;
+
+ if (status & PTP_TSU_INT_STS_PTP_TX_TS_EN_)
+ lan8814_get_tx_ts(ptp_priv);
+
+ if (status & PTP_TSU_INT_STS_PTP_RX_TS_EN_)
+ lan8814_get_rx_ts(ptp_priv);
+
+ if (status & PTP_TSU_INT_STS_PTP_TX_TS_OVRFL_INT_) {
+ lan8814_flush_fifo(phydev, true);
+ skb_queue_purge(&ptp_priv->tx_queue);
+ }
+
+ if (status & PTP_TSU_INT_STS_PTP_RX_TS_OVRFL_INT_) {
+ lan8814_flush_fifo(phydev, false);
+ skb_queue_purge(&ptp_priv->rx_queue);
+ }
+}
+
+static irqreturn_t lan8842_handle_interrupt(struct phy_device *phydev)
+{
+ struct lan8842_priv *priv = phydev->priv;
+ int ret = IRQ_NONE;
+ int irq_status;
+
+ irq_status = phy_read(phydev, LAN8814_INTS);
+ if (irq_status < 0) {
+ phy_error(phydev);
+ return IRQ_NONE;
+ }
+
+ if (irq_status & LAN8814_INT_LINK) {
+ phy_trigger_machine(phydev);
+ ret = IRQ_HANDLED;
+ }
+
+ /* Phy revision lan8832 doesn't have support for PTP therefore there is
+ * not need to check the PTP and GPIO interrupts
+ */
+ if (priv->rev == LAN8842_REV_8832)
+ goto out;
+
+ while (true) {
+ irq_status = lanphy_read_page_reg(phydev, LAN8814_PAGE_PORT_REGS,
+ PTP_TSU_INT_STS);
+ if (!irq_status)
+ break;
+
+ lan8842_handle_ptp_interrupt(phydev, irq_status);
+ ret = IRQ_HANDLED;
+ }
+
+ if (!lan8814_handle_gpio_interrupt(phydev, irq_status))
+ ret = IRQ_HANDLED;
+
+out:
+ return ret;
+}
+
+static u64 lan8842_get_stat(struct phy_device *phydev, int count, int *regs)
+{
+ u64 ret = 0;
+ int val;
+
+ for (int j = 0; j < count; ++j) {
+ val = lanphy_read_page_reg(phydev, LAN8814_PAGE_PCS_DIGITAL,
+ regs[j]);
+ if (val < 0)
+ return U64_MAX;
+
+ ret <<= 16;
+ ret += val;
+ }
+ return ret;
+}
+
+static int lan8842_update_stats(struct phy_device *phydev)
+{
+ struct lan8842_priv *priv = phydev->priv;
+ int rx_packets_regs[] = {88, 61, 60};
+ int rx_errors_regs[] = {63, 62};
+ int tx_packets_regs[] = {89, 85, 84};
+ int tx_errors_regs[] = {87, 86};
+
+ priv->phy_stats.rx_packets = lan8842_get_stat(phydev,
+ ARRAY_SIZE(rx_packets_regs),
+ rx_packets_regs);
+ priv->phy_stats.rx_errors = lan8842_get_stat(phydev,
+ ARRAY_SIZE(rx_errors_regs),
+ rx_errors_regs);
+ priv->phy_stats.tx_packets = lan8842_get_stat(phydev,
+ ARRAY_SIZE(tx_packets_regs),
+ tx_packets_regs);
+ priv->phy_stats.tx_errors = lan8842_get_stat(phydev,
+ ARRAY_SIZE(tx_errors_regs),
+ tx_errors_regs);
+
+ return 0;
+}
+
+static void lan8842_get_phy_stats(struct phy_device *phydev,
+ struct ethtool_eth_phy_stats *eth_stats,
+ struct ethtool_phy_stats *stats)
+{
+ struct lan8842_priv *priv = phydev->priv;
+
+ stats->rx_packets = priv->phy_stats.rx_packets;
+ stats->rx_errors = priv->phy_stats.rx_errors;
+ stats->tx_packets = priv->phy_stats.tx_packets;
+ stats->tx_errors = priv->phy_stats.tx_errors;
+}
+
static struct phy_driver ksphy_driver[] = {
{
- .phy_id = PHY_ID_KS8737,
- .phy_id_mask = MICREL_PHY_ID_MASK,
+ PHY_ID_MATCH_MODEL(PHY_ID_KS8737),
.name = "Micrel KS8737",
/* PHY_BASIC_FEATURES */
.driver_data = &ks8737_type,
@@ -5687,8 +6110,7 @@ static struct phy_driver ksphy_driver[] = {
.suspend = kszphy_suspend,
.resume = kszphy_resume,
}, {
- .phy_id = PHY_ID_KSZ8041,
- .phy_id_mask = MICREL_PHY_ID_MASK,
+ PHY_ID_MATCH_MODEL(PHY_ID_KSZ8041),
.name = "Micrel KSZ8041",
/* PHY_BASIC_FEATURES */
.driver_data = &ksz8041_type,
@@ -5703,8 +6125,7 @@ static struct phy_driver ksphy_driver[] = {
.suspend = ksz8041_suspend,
.resume = ksz8041_resume,
}, {
- .phy_id = PHY_ID_KSZ8041RNLI,
- .phy_id_mask = MICREL_PHY_ID_MASK,
+ PHY_ID_MATCH_MODEL(PHY_ID_KSZ8041RNLI),
.name = "Micrel KSZ8041RNLI",
/* PHY_BASIC_FEATURES */
.driver_data = &ksz8041_type,
@@ -5747,9 +6168,8 @@ static struct phy_driver ksphy_driver[] = {
.suspend = kszphy_suspend,
.resume = kszphy_resume,
}, {
- .phy_id = PHY_ID_KSZ8081,
+ PHY_ID_MATCH_MODEL(PHY_ID_KSZ8081),
.name = "Micrel KSZ8081 or KSZ8091",
- .phy_id_mask = MICREL_PHY_ID_MASK,
.flags = PHY_POLL_CABLE_TEST,
/* PHY_BASIC_FEATURES */
.driver_data = &ksz8081_type,
@@ -5768,9 +6188,8 @@ static struct phy_driver ksphy_driver[] = {
.cable_test_start = ksz886x_cable_test_start,
.cable_test_get_status = ksz886x_cable_test_get_status,
}, {
- .phy_id = PHY_ID_KSZ8061,
+ PHY_ID_MATCH_MODEL(PHY_ID_KSZ8061),
.name = "Micrel KSZ8061",
- .phy_id_mask = MICREL_PHY_ID_MASK,
/* PHY_BASIC_FEATURES */
.probe = kszphy_probe,
.config_init = ksz8061_config_init,
@@ -5798,8 +6217,7 @@ static struct phy_driver ksphy_driver[] = {
.read_mmd = genphy_read_mmd_unsupported,
.write_mmd = genphy_write_mmd_unsupported,
}, {
- .phy_id = PHY_ID_KSZ9031,
- .phy_id_mask = MICREL_PHY_ID_MASK,
+ PHY_ID_MATCH_MODEL(PHY_ID_KSZ9031),
.name = "Micrel KSZ9031 Gigabit PHY",
.flags = PHY_POLL_CABLE_TEST,
.driver_data = &ksz9021_type,
@@ -5819,8 +6237,7 @@ static struct phy_driver ksphy_driver[] = {
.cable_test_get_status = ksz9x31_cable_test_get_status,
.set_loopback = ksz9031_set_loopback,
}, {
- .phy_id = PHY_ID_LAN8814,
- .phy_id_mask = MICREL_PHY_ID_MASK,
+ PHY_ID_MATCH_MODEL(PHY_ID_LAN8814),
.name = "Microchip INDY Gigabit Quad PHY",
.flags = PHY_POLL_CABLE_TEST,
.config_init = lan8814_config_init,
@@ -5838,8 +6255,7 @@ static struct phy_driver ksphy_driver[] = {
.cable_test_start = lan8814_cable_test_start,
.cable_test_get_status = ksz886x_cable_test_get_status,
}, {
- .phy_id = PHY_ID_LAN8804,
- .phy_id_mask = MICREL_PHY_ID_MASK,
+ PHY_ID_MATCH_MODEL(PHY_ID_LAN8804),
.name = "Microchip LAN966X Gigabit PHY",
.config_init = lan8804_config_init,
.driver_data = &ksz9021_type,
@@ -5854,8 +6270,7 @@ static struct phy_driver ksphy_driver[] = {
.config_intr = lan8804_config_intr,
.handle_interrupt = lan8804_handle_interrupt,
}, {
- .phy_id = PHY_ID_LAN8841,
- .phy_id_mask = MICREL_PHY_ID_MASK,
+ PHY_ID_MATCH_MODEL(PHY_ID_LAN8841),
.name = "Microchip LAN8841 Gigabit PHY",
.flags = PHY_POLL_CABLE_TEST,
.driver_data = &lan8841_type,
@@ -5872,8 +6287,22 @@ static struct phy_driver ksphy_driver[] = {
.cable_test_start = lan8814_cable_test_start,
.cable_test_get_status = ksz886x_cable_test_get_status,
}, {
- .phy_id = PHY_ID_KSZ9131,
- .phy_id_mask = MICREL_PHY_ID_MASK,
+ PHY_ID_MATCH_MODEL(PHY_ID_LAN8842),
+ .name = "Microchip LAN8842 Gigabit PHY",
+ .flags = PHY_POLL_CABLE_TEST,
+ .driver_data = &lan8814_type,
+ .probe = lan8842_probe,
+ .config_init = lan8842_config_init,
+ .config_intr = lan8842_config_intr,
+ .inband_caps = lan8842_inband_caps,
+ .config_inband = lan8842_config_inband,
+ .handle_interrupt = lan8842_handle_interrupt,
+ .get_phy_stats = lan8842_get_phy_stats,
+ .update_stats = lan8842_update_stats,
+ .cable_test_start = lan8814_cable_test_start,
+ .cable_test_get_status = ksz886x_cable_test_get_status,
+}, {
+ PHY_ID_MATCH_MODEL(PHY_ID_KSZ9131),
.name = "Microchip KSZ9131 Gigabit PHY",
/* PHY_GBIT_FEATURES */
.flags = PHY_POLL_CABLE_TEST,
@@ -5894,8 +6323,7 @@ static struct phy_driver ksphy_driver[] = {
.cable_test_get_status = ksz9x31_cable_test_get_status,
.get_features = ksz9477_get_features,
}, {
- .phy_id = PHY_ID_KSZ8873MLL,
- .phy_id_mask = MICREL_PHY_ID_MASK,
+ PHY_ID_MATCH_MODEL(PHY_ID_KSZ8873MLL),
.name = "Micrel KSZ8873MLL Switch",
/* PHY_BASIC_FEATURES */
.config_init = kszphy_config_init,
@@ -5904,8 +6332,7 @@ static struct phy_driver ksphy_driver[] = {
.suspend = genphy_suspend,
.resume = genphy_resume,
}, {
- .phy_id = PHY_ID_KSZ886X,
- .phy_id_mask = MICREL_PHY_ID_MASK,
+ PHY_ID_MATCH_MODEL(PHY_ID_KSZ886X),
.name = "Micrel KSZ8851 Ethernet MAC or KSZ886X Switch",
.driver_data = &ksz886x_type,
/* PHY_BASIC_FEATURES */
@@ -5925,8 +6352,7 @@ static struct phy_driver ksphy_driver[] = {
.suspend = genphy_suspend,
.resume = genphy_resume,
}, {
- .phy_id = PHY_ID_KSZ9477,
- .phy_id_mask = MICREL_PHY_ID_MASK,
+ PHY_ID_MATCH_MODEL(PHY_ID_KSZ9477),
.name = "Microchip KSZ9477",
.probe = kszphy_probe,
/* PHY_GBIT_FEATURES */
@@ -5953,22 +6379,24 @@ MODULE_LICENSE("GPL");
static const struct mdio_device_id __maybe_unused micrel_tbl[] = {
{ PHY_ID_KSZ9021, 0x000ffffe },
- { PHY_ID_KSZ9031, MICREL_PHY_ID_MASK },
- { PHY_ID_KSZ9131, MICREL_PHY_ID_MASK },
+ { PHY_ID_MATCH_MODEL(PHY_ID_KSZ9031) },
+ { PHY_ID_MATCH_MODEL(PHY_ID_KSZ9131) },
{ PHY_ID_KSZ8001, 0x00fffffc },
- { PHY_ID_KS8737, MICREL_PHY_ID_MASK },
+ { PHY_ID_MATCH_MODEL(PHY_ID_KS8737) },
{ PHY_ID_KSZ8021, 0x00ffffff },
{ PHY_ID_KSZ8031, 0x00ffffff },
- { PHY_ID_KSZ8041, MICREL_PHY_ID_MASK },
- { PHY_ID_KSZ8051, MICREL_PHY_ID_MASK },
- { PHY_ID_KSZ8061, MICREL_PHY_ID_MASK },
- { PHY_ID_KSZ8081, MICREL_PHY_ID_MASK },
- { PHY_ID_KSZ8873MLL, MICREL_PHY_ID_MASK },
- { PHY_ID_KSZ886X, MICREL_PHY_ID_MASK },
- { PHY_ID_KSZ9477, MICREL_PHY_ID_MASK },
- { PHY_ID_LAN8814, MICREL_PHY_ID_MASK },
- { PHY_ID_LAN8804, MICREL_PHY_ID_MASK },
- { PHY_ID_LAN8841, MICREL_PHY_ID_MASK },
+ { PHY_ID_MATCH_MODEL(PHY_ID_KSZ8041) },
+ { PHY_ID_MATCH_MODEL(PHY_ID_KSZ8041RNLI) },
+ { PHY_ID_MATCH_MODEL(PHY_ID_KSZ8051) },
+ { PHY_ID_MATCH_MODEL(PHY_ID_KSZ8061) },
+ { PHY_ID_MATCH_MODEL(PHY_ID_KSZ8081) },
+ { PHY_ID_MATCH_MODEL(PHY_ID_KSZ8873MLL) },
+ { PHY_ID_MATCH_MODEL(PHY_ID_KSZ886X) },
+ { PHY_ID_MATCH_MODEL(PHY_ID_KSZ9477) },
+ { PHY_ID_MATCH_MODEL(PHY_ID_LAN8814) },
+ { PHY_ID_MATCH_MODEL(PHY_ID_LAN8804) },
+ { PHY_ID_MATCH_MODEL(PHY_ID_LAN8841) },
+ { PHY_ID_MATCH_MODEL(PHY_ID_LAN8842) },
{ }
};
diff --git a/drivers/net/phy/motorcomm.c b/drivers/net/phy/motorcomm.c
index 0e91f5d1a4fd..a3593e663059 100644
--- a/drivers/net/phy/motorcomm.c
+++ b/drivers/net/phy/motorcomm.c
@@ -213,6 +213,20 @@
#define YT8521_RC1R_RGMII_2_100_NS 14
#define YT8521_RC1R_RGMII_2_250_NS 15
+/* LED CONFIG */
+#define YT8521_MAX_LEDS 3
+#define YT8521_LED0_CFG_REG 0xA00C
+#define YT8521_LED1_CFG_REG 0xA00D
+#define YT8521_LED2_CFG_REG 0xA00E
+#define YT8521_LED_ACT_BLK_IND BIT(13)
+#define YT8521_LED_FDX_ON_EN BIT(12)
+#define YT8521_LED_HDX_ON_EN BIT(11)
+#define YT8521_LED_TXACT_BLK_EN BIT(10)
+#define YT8521_LED_RXACT_BLK_EN BIT(9)
+#define YT8521_LED_1000_ON_EN BIT(6)
+#define YT8521_LED_100_ON_EN BIT(5)
+#define YT8521_LED_10_ON_EN BIT(4)
+
#define YTPHY_MISC_CONFIG_REG 0xA006
#define YTPHY_MCR_FIBER_SPEED_MASK BIT(0)
#define YTPHY_MCR_FIBER_1000BX (0x1 << 0)
@@ -1681,6 +1695,106 @@ err_restore_page:
return phy_restore_page(phydev, old_page, ret);
}
+static const unsigned long supported_trgs = (BIT(TRIGGER_NETDEV_FULL_DUPLEX) |
+ BIT(TRIGGER_NETDEV_HALF_DUPLEX) |
+ BIT(TRIGGER_NETDEV_LINK) |
+ BIT(TRIGGER_NETDEV_LINK_10) |
+ BIT(TRIGGER_NETDEV_LINK_100) |
+ BIT(TRIGGER_NETDEV_LINK_1000) |
+ BIT(TRIGGER_NETDEV_RX) |
+ BIT(TRIGGER_NETDEV_TX));
+
+static int yt8521_led_hw_is_supported(struct phy_device *phydev, u8 index,
+ unsigned long rules)
+{
+ if (index >= YT8521_MAX_LEDS)
+ return -EINVAL;
+
+ /* All combinations of the supported triggers are allowed */
+ if (rules & ~supported_trgs)
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+static int yt8521_led_hw_control_set(struct phy_device *phydev, u8 index,
+ unsigned long rules)
+{
+ u16 val = 0;
+
+ if (index >= YT8521_MAX_LEDS)
+ return -EINVAL;
+
+ if (test_bit(TRIGGER_NETDEV_LINK, &rules)) {
+ val |= YT8521_LED_10_ON_EN;
+ val |= YT8521_LED_100_ON_EN;
+ val |= YT8521_LED_1000_ON_EN;
+ }
+
+ if (test_bit(TRIGGER_NETDEV_LINK_10, &rules))
+ val |= YT8521_LED_10_ON_EN;
+
+ if (test_bit(TRIGGER_NETDEV_LINK_100, &rules))
+ val |= YT8521_LED_100_ON_EN;
+
+ if (test_bit(TRIGGER_NETDEV_LINK_1000, &rules))
+ val |= YT8521_LED_1000_ON_EN;
+
+ if (test_bit(TRIGGER_NETDEV_FULL_DUPLEX, &rules))
+ val |= YT8521_LED_HDX_ON_EN;
+
+ if (test_bit(TRIGGER_NETDEV_HALF_DUPLEX, &rules))
+ val |= YT8521_LED_FDX_ON_EN;
+
+ if (test_bit(TRIGGER_NETDEV_TX, &rules) ||
+ test_bit(TRIGGER_NETDEV_RX, &rules))
+ val |= YT8521_LED_ACT_BLK_IND;
+
+ if (test_bit(TRIGGER_NETDEV_TX, &rules))
+ val |= YT8521_LED_TXACT_BLK_EN;
+
+ if (test_bit(TRIGGER_NETDEV_RX, &rules))
+ val |= YT8521_LED_RXACT_BLK_EN;
+
+ return ytphy_write_ext(phydev, YT8521_LED0_CFG_REG + index, val);
+}
+
+static int yt8521_led_hw_control_get(struct phy_device *phydev, u8 index,
+ unsigned long *rules)
+{
+ int val;
+
+ if (index >= YT8521_MAX_LEDS)
+ return -EINVAL;
+
+ val = ytphy_read_ext(phydev, YT8521_LED0_CFG_REG + index);
+ if (val < 0)
+ return val;
+
+ if (val & YT8521_LED_TXACT_BLK_EN || val & YT8521_LED_ACT_BLK_IND)
+ __set_bit(TRIGGER_NETDEV_TX, rules);
+
+ if (val & YT8521_LED_RXACT_BLK_EN || val & YT8521_LED_ACT_BLK_IND)
+ __set_bit(TRIGGER_NETDEV_RX, rules);
+
+ if (val & YT8521_LED_FDX_ON_EN)
+ __set_bit(TRIGGER_NETDEV_FULL_DUPLEX, rules);
+
+ if (val & YT8521_LED_HDX_ON_EN)
+ __set_bit(TRIGGER_NETDEV_HALF_DUPLEX, rules);
+
+ if (val & YT8521_LED_1000_ON_EN)
+ __set_bit(TRIGGER_NETDEV_LINK_1000, rules);
+
+ if (val & YT8521_LED_100_ON_EN)
+ __set_bit(TRIGGER_NETDEV_LINK_100, rules);
+
+ if (val & YT8521_LED_10_ON_EN)
+ __set_bit(TRIGGER_NETDEV_LINK_10, rules);
+
+ return 0;
+}
+
static int yt8531_config_init(struct phy_device *phydev)
{
struct device_node *node = phydev->mdio.dev.of_node;
@@ -2920,6 +3034,9 @@ static struct phy_driver motorcomm_phy_drvs[] = {
.soft_reset = yt8521_soft_reset,
.suspend = yt8521_suspend,
.resume = yt8521_resume,
+ .led_hw_is_supported = yt8521_led_hw_is_supported,
+ .led_hw_control_set = yt8521_led_hw_control_set,
+ .led_hw_control_get = yt8521_led_hw_control_get,
},
{
PHY_ID_MATCH_EXACT(PHY_ID_YT8531),
diff --git a/drivers/net/phy/mscc/mscc.h b/drivers/net/phy/mscc/mscc.h
index 2bfe314ef881..2d8eca54c40a 100644
--- a/drivers/net/phy/mscc/mscc.h
+++ b/drivers/net/phy/mscc/mscc.h
@@ -196,6 +196,9 @@ enum rgmii_clock_delay {
#define MSCC_PHY_EXTENDED_INT_MS_EGR BIT(9)
/* Extended Page 3 Registers */
+#define MSCC_PHY_SERDES_PCS_CTRL 16
+#define MSCC_PHY_SERDES_ANEG BIT(7)
+
#define MSCC_PHY_SERDES_TX_VALID_CNT 21
#define MSCC_PHY_SERDES_TX_CRC_ERR_CNT 22
#define MSCC_PHY_SERDES_RX_VALID_CNT 28
diff --git a/drivers/net/phy/mscc/mscc_main.c b/drivers/net/phy/mscc/mscc_main.c
index 24c75903f535..ef0ef1570d39 100644
--- a/drivers/net/phy/mscc/mscc_main.c
+++ b/drivers/net/phy/mscc/mscc_main.c
@@ -2202,6 +2202,28 @@ static int vsc85xx_read_status(struct phy_device *phydev)
return genphy_read_status(phydev);
}
+static unsigned int vsc85xx_inband_caps(struct phy_device *phydev,
+ phy_interface_t interface)
+{
+ if (interface != PHY_INTERFACE_MODE_SGMII &&
+ interface != PHY_INTERFACE_MODE_QSGMII)
+ return 0;
+
+ return LINK_INBAND_DISABLE | LINK_INBAND_ENABLE;
+}
+
+static int vsc85xx_config_inband(struct phy_device *phydev, unsigned int modes)
+{
+ u16 reg_val = 0;
+
+ if (modes == LINK_INBAND_ENABLE)
+ reg_val = MSCC_PHY_SERDES_ANEG;
+
+ return phy_modify_paged(phydev, MSCC_PHY_PAGE_EXTENDED_3,
+ MSCC_PHY_SERDES_PCS_CTRL, MSCC_PHY_SERDES_ANEG,
+ reg_val);
+}
+
static int vsc8514_probe(struct phy_device *phydev)
{
struct vsc8531_private *vsc8531;
@@ -2414,6 +2436,8 @@ static struct phy_driver vsc85xx_driver[] = {
.get_sset_count = &vsc85xx_get_sset_count,
.get_strings = &vsc85xx_get_strings,
.get_stats = &vsc85xx_get_stats,
+ .inband_caps = vsc85xx_inband_caps,
+ .config_inband = vsc85xx_config_inband,
},
{
.phy_id = PHY_ID_VSC8514,
@@ -2437,6 +2461,8 @@ static struct phy_driver vsc85xx_driver[] = {
.get_sset_count = &vsc85xx_get_sset_count,
.get_strings = &vsc85xx_get_strings,
.get_stats = &vsc85xx_get_stats,
+ .inband_caps = vsc85xx_inband_caps,
+ .config_inband = vsc85xx_config_inband,
},
{
.phy_id = PHY_ID_VSC8530,
@@ -2557,6 +2583,8 @@ static struct phy_driver vsc85xx_driver[] = {
.get_sset_count = &vsc85xx_get_sset_count,
.get_strings = &vsc85xx_get_strings,
.get_stats = &vsc85xx_get_stats,
+ .inband_caps = vsc85xx_inband_caps,
+ .config_inband = vsc85xx_config_inband,
},
{
.phy_id = PHY_ID_VSC856X,
@@ -2579,6 +2607,8 @@ static struct phy_driver vsc85xx_driver[] = {
.get_sset_count = &vsc85xx_get_sset_count,
.get_strings = &vsc85xx_get_strings,
.get_stats = &vsc85xx_get_stats,
+ .inband_caps = vsc85xx_inband_caps,
+ .config_inband = vsc85xx_config_inband,
},
{
.phy_id = PHY_ID_VSC8572,
@@ -2605,6 +2635,8 @@ static struct phy_driver vsc85xx_driver[] = {
.get_sset_count = &vsc85xx_get_sset_count,
.get_strings = &vsc85xx_get_strings,
.get_stats = &vsc85xx_get_stats,
+ .inband_caps = vsc85xx_inband_caps,
+ .config_inband = vsc85xx_config_inband,
},
{
.phy_id = PHY_ID_VSC8574,
@@ -2631,6 +2663,8 @@ static struct phy_driver vsc85xx_driver[] = {
.get_sset_count = &vsc85xx_get_sset_count,
.get_strings = &vsc85xx_get_strings,
.get_stats = &vsc85xx_get_stats,
+ .inband_caps = vsc85xx_inband_caps,
+ .config_inband = vsc85xx_config_inband,
},
{
.phy_id = PHY_ID_VSC8575,
@@ -2655,6 +2689,8 @@ static struct phy_driver vsc85xx_driver[] = {
.get_sset_count = &vsc85xx_get_sset_count,
.get_strings = &vsc85xx_get_strings,
.get_stats = &vsc85xx_get_stats,
+ .inband_caps = vsc85xx_inband_caps,
+ .config_inband = vsc85xx_config_inband,
},
{
.phy_id = PHY_ID_VSC8582,
@@ -2679,6 +2715,8 @@ static struct phy_driver vsc85xx_driver[] = {
.get_sset_count = &vsc85xx_get_sset_count,
.get_strings = &vsc85xx_get_strings,
.get_stats = &vsc85xx_get_stats,
+ .inband_caps = vsc85xx_inband_caps,
+ .config_inband = vsc85xx_config_inband,
},
{
.phy_id = PHY_ID_VSC8584,
@@ -2704,6 +2742,8 @@ static struct phy_driver vsc85xx_driver[] = {
.get_strings = &vsc85xx_get_strings,
.get_stats = &vsc85xx_get_stats,
.link_change_notify = &vsc85xx_link_change_notify,
+ .inband_caps = vsc85xx_inband_caps,
+ .config_inband = vsc85xx_config_inband,
}
};
diff --git a/drivers/net/phy/mxl-86110.c b/drivers/net/phy/mxl-86110.c
index ff2a3a22bd5b..e5d137a37a1d 100644
--- a/drivers/net/phy/mxl-86110.c
+++ b/drivers/net/phy/mxl-86110.c
@@ -15,6 +15,7 @@
/* PHY ID */
#define PHY_ID_MXL86110 0xc1335580
+#define PHY_ID_MXL86111 0xc1335588
/* required to access extended registers */
#define MXL86110_EXTD_REG_ADDR_OFFSET 0x1E
@@ -22,7 +23,15 @@
#define PHY_IRQ_ENABLE_REG 0x12
#define PHY_IRQ_ENABLE_REG_WOL BIT(6)
-/* SyncE Configuration Register - COM_EXT SYNCE_CFG */
+/* different pages for EXTD access for MXL86111 */
+/* SerDes/PHY Control Access Register - COM_EXT_SMI_SDS_PHY */
+#define MXL86111_EXT_SMI_SDS_PHY_REG 0xA000
+#define MXL86111_EXT_SMI_SDS_PHYSPACE_MASK BIT(1)
+#define MXL86111_EXT_SMI_SDS_PHYFIBER_SPACE (0x1 << 1)
+#define MXL86111_EXT_SMI_SDS_PHYUTP_SPACE (0x0 << 1)
+#define MXL86111_EXT_SMI_SDS_PHY_AUTO 0xff
+
+/* SyncE Configuration Register - COM_EXT_SYNCE_CFG */
#define MXL86110_EXT_SYNCE_CFG_REG 0xA012
#define MXL86110_EXT_SYNCE_CFG_CLK_FRE_SEL BIT(4)
#define MXL86110_EXT_SYNCE_CFG_EN_SYNC_E_DURING_LNKDN BIT(5)
@@ -71,6 +80,11 @@
#define MXL86110_MAX_LEDS 3
/* LED registers and defines */
+#define MXL86110_COM_EXT_LED_GEN_CFG 0xA00B
+# define MXL86110_COM_EXT_LED_GEN_CFG_LFM(x) ((BIT(0) | BIT(1)) << (3 * (x)))
+# define MXL86110_COM_EXT_LED_GEN_CFG_LFME(x) (BIT(0) << (3 * (x)))
+# define MXL86110_COM_EXT_LED_GEN_CFG_LFE(x) (BIT(2) << (3 * (x)))
+
#define MXL86110_LED0_CFG_REG 0xA00C
#define MXL86110_LED1_CFG_REG 0xA00D
#define MXL86110_LED2_CFG_REG 0xA00E
@@ -110,9 +124,67 @@
/* Chip Configuration Register - COM_EXT_CHIP_CFG */
#define MXL86110_EXT_CHIP_CFG_REG 0xA001
+#define MXL86111_EXT_CHIP_CFG_MODE_SEL_MASK GENMASK(2, 0)
+#define MXL86111_EXT_CHIP_CFG_MODE_UTP_TO_RGMII 0
+#define MXL86111_EXT_CHIP_CFG_MODE_FIBER_TO_RGMII 1
+#define MXL86111_EXT_CHIP_CFG_MODE_UTP_FIBER_TO_RGMII 2
+#define MXL86111_EXT_CHIP_CFG_MODE_UTP_TO_SGMII 3
+#define MXL86111_EXT_CHIP_CFG_MODE_SGPHY_TO_RGMAC 4
+#define MXL86111_EXT_CHIP_CFG_MODE_SGMAC_TO_RGPHY 5
+#define MXL86111_EXT_CHIP_CFG_MODE_UTP_TO_FIBER_AUTO 6
+#define MXL86111_EXT_CHIP_CFG_MODE_UTP_TO_FIBER_FORCE 7
+
+#define MXL86111_EXT_CHIP_CFG_CLDO_MASK GENMASK(5, 4)
+#define MXL86111_EXT_CHIP_CFG_CLDO_3V3 0
+#define MXL86111_EXT_CHIP_CFG_CLDO_2V5 1
+#define MXL86111_EXT_CHIP_CFG_CLDO_1V8_2 2
+#define MXL86111_EXT_CHIP_CFG_CLDO_1V8_3 3
+#define MXL86111_EXT_CHIP_CFG_CLDO_SHIFT 4
+#define MXL86111_EXT_CHIP_CFG_ELDO BIT(6)
#define MXL86110_EXT_CHIP_CFG_RXDLY_ENABLE BIT(8)
#define MXL86110_EXT_CHIP_CFG_SW_RST_N_MODE BIT(15)
+/* Specific Status Register - PHY_STAT */
+#define MXL86111_PHY_STAT_REG 0x11
+#define MXL86111_PHY_STAT_SPEED_MASK GENMASK(15, 14)
+#define MXL86111_PHY_STAT_SPEED_OFFSET 14
+#define MXL86111_PHY_STAT_SPEED_10M 0x0
+#define MXL86111_PHY_STAT_SPEED_100M 0x1
+#define MXL86111_PHY_STAT_SPEED_1000M 0x2
+#define MXL86111_PHY_STAT_DPX_OFFSET 13
+#define MXL86111_PHY_STAT_DPX BIT(13)
+#define MXL86111_PHY_STAT_LSRT BIT(10)
+
+/* 3 phy reg page modes,auto mode combines utp and fiber mode*/
+#define MXL86111_MODE_FIBER 0x1
+#define MXL86111_MODE_UTP 0x2
+#define MXL86111_MODE_AUTO 0x3
+
+/* FIBER Auto-Negotiation link partner ability - SDS_AN_LPA */
+#define MXL86111_SDS_AN_LPA_PAUSE (0x3 << 7)
+#define MXL86111_SDS_AN_LPA_ASYM_PAUSE (0x2 << 7)
+
+/* Miscellaneous Control Register - COM_EXT _MISC_CFG */
+#define MXL86111_EXT_MISC_CONFIG_REG 0xa006
+#define MXL86111_EXT_MISC_CONFIG_FIB_SPEED_SEL BIT(0)
+#define MXL86111_EXT_MISC_CONFIG_FIB_SPEED_SEL_1000BX (0x1 << 0)
+#define MXL86111_EXT_MISC_CONFIG_FIB_SPEED_SEL_100BX (0x0 << 0)
+
+/* Phy fiber Link timer cfg2 Register - EXT_SDS_LINK_TIMER_CFG2 */
+#define MXL86111_EXT_SDS_LINK_TIMER_CFG2_REG 0xA5
+#define MXL86111_EXT_SDS_LINK_TIMER_CFG2_EN_AUTOSEN BIT(15)
+
+/* default values of PHY register, required for Dual Media mode */
+#define MII_BMSR_DEFAULT_VAL 0x7949
+#define MII_ESTATUS_DEFAULT_VAL 0x2000
+
+/* Timeout in ms for PHY SW reset check in STD_CTRL/SDS_CTRL */
+#define BMCR_RESET_TIMEOUT 500
+
+/* PL P1 requires optimized RGMII timing for 1.8V RGMII voltage
+ */
+#define MXL86111_PL_P1 0x500
+
/**
* __mxl86110_write_extended_reg() - write to a PHY's extended register
* @phydev: pointer to the PHY device structure
@@ -236,6 +308,29 @@ static int mxl86110_read_extended_reg(struct phy_device *phydev, u16 regnum)
}
/**
+ * mxl86110_modify_extended_reg() - modify bits of a PHY's extended register
+ * @phydev: pointer to the PHY device structure
+ * @regnum: register number to write
+ * @mask: bit mask of bits to clear
+ * @set: bit mask of bits to set
+ *
+ * Note: register value = (old register value & ~mask) | set.
+ *
+ * Return: 0 or negative error code
+ */
+static int mxl86110_modify_extended_reg(struct phy_device *phydev,
+ u16 regnum, u16 mask, u16 set)
+{
+ int ret;
+
+ phy_lock_mdio_bus(phydev);
+ ret = __mxl86110_modify_extended_reg(phydev, regnum, mask, set);
+ phy_unlock_mdio_bus(phydev);
+
+ return ret;
+}
+
+/**
* mxl86110_get_wol() - report if wake-on-lan is enabled
* @phydev: pointer to the phy_device
* @wol: a pointer to a &struct ethtool_wolinfo
@@ -394,6 +489,7 @@ static int mxl86110_led_hw_control_set(struct phy_device *phydev, u8 index,
unsigned long rules)
{
u16 val = 0;
+ int ret;
if (index >= MXL86110_MAX_LEDS)
return -EINVAL;
@@ -423,8 +519,43 @@ static int mxl86110_led_hw_control_set(struct phy_device *phydev, u8 index,
rules & BIT(TRIGGER_NETDEV_RX))
val |= MXL86110_LEDX_CFG_BLINK;
- return mxl86110_write_extended_reg(phydev,
+ ret = mxl86110_write_extended_reg(phydev,
MXL86110_LED0_CFG_REG + index, val);
+ if (ret)
+ return ret;
+
+ /* clear manual control bit */
+ ret = mxl86110_modify_extended_reg(phydev,
+ MXL86110_COM_EXT_LED_GEN_CFG,
+ MXL86110_COM_EXT_LED_GEN_CFG_LFE(index),
+ 0);
+
+ return ret;
+}
+
+static int mxl86110_led_brightness_set(struct phy_device *phydev,
+ u8 index, enum led_brightness value)
+{
+ u16 mask, set;
+ int ret;
+
+ if (index >= MXL86110_MAX_LEDS)
+ return -EINVAL;
+
+ /* force manual control */
+ set = MXL86110_COM_EXT_LED_GEN_CFG_LFE(index);
+ /* clear previous force mode */
+ mask = MXL86110_COM_EXT_LED_GEN_CFG_LFM(index);
+
+ /* force LED to be permanently on */
+ if (value != LED_OFF)
+ set |= MXL86110_COM_EXT_LED_GEN_CFG_LFME(index);
+
+ ret = mxl86110_modify_extended_reg(phydev,
+ MXL86110_COM_EXT_LED_GEN_CFG,
+ mask, set);
+
+ return ret;
}
/**
@@ -521,22 +652,15 @@ static int mxl86110_enable_led_activity_blink(struct phy_device *phydev)
}
/**
- * mxl86110_config_init() - initialize the PHY
+ * mxl86110_config_rgmii_delay() - configure RGMII delays
* @phydev: pointer to the phy_device
*
* Return: 0 or negative errno code
*/
-static int mxl86110_config_init(struct phy_device *phydev)
+static int mxl86110_config_rgmii_delay(struct phy_device *phydev)
{
- u16 val = 0;
int ret;
-
- phy_lock_mdio_bus(phydev);
-
- /* configure syncE / clk output */
- ret = mxl86110_synce_clk_cfg(phydev);
- if (ret < 0)
- goto out;
+ u16 val;
switch (phydev->interface) {
case PHY_INTERFACE_MODE_RGMII:
@@ -578,6 +702,31 @@ static int mxl86110_config_init(struct phy_device *phydev)
if (ret < 0)
goto out;
+out:
+ return ret;
+}
+
+/**
+ * mxl86110_config_init() - initialize the MXL86110 PHY
+ * @phydev: pointer to the phy_device
+ *
+ * Return: 0 or negative errno code
+ */
+static int mxl86110_config_init(struct phy_device *phydev)
+{
+ int ret;
+
+ phy_lock_mdio_bus(phydev);
+
+ /* configure syncE / clk output */
+ ret = mxl86110_synce_clk_cfg(phydev);
+ if (ret < 0)
+ goto out;
+
+ ret = mxl86110_config_rgmii_delay(phydev);
+ if (ret < 0)
+ goto out;
+
ret = mxl86110_enable_led_activity_blink(phydev);
if (ret < 0)
goto out;
@@ -589,6 +738,201 @@ out:
return ret;
}
+/**
+ * mxl86111_probe() - validate bootstrap chip config and set UTP page
+ * @phydev: pointer to the phy_device
+ *
+ * Return: 0 or negative errno code
+ */
+static int mxl86111_probe(struct phy_device *phydev)
+{
+ int chip_config;
+ u16 reg_page;
+ int ret;
+
+ chip_config = mxl86110_read_extended_reg(phydev, MXL86110_EXT_CHIP_CFG_REG);
+ if (chip_config < 0)
+ return chip_config;
+
+ switch (chip_config & MXL86111_EXT_CHIP_CFG_MODE_SEL_MASK) {
+ case MXL86111_EXT_CHIP_CFG_MODE_UTP_TO_SGMII:
+ case MXL86111_EXT_CHIP_CFG_MODE_UTP_TO_RGMII:
+ phydev->port = PORT_TP;
+ reg_page = MXL86111_EXT_SMI_SDS_PHYUTP_SPACE;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ ret = mxl86110_write_extended_reg(phydev,
+ MXL86111_EXT_SMI_SDS_PHY_REG,
+ reg_page);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+/**
+ * mxl86111_config_init() - initialize the MXL86111 PHY
+ * @phydev: pointer to the phy_device
+ *
+ * Return: 0 or negative errno code
+ */
+static int mxl86111_config_init(struct phy_device *phydev)
+{
+ int ret;
+
+ phy_lock_mdio_bus(phydev);
+
+ /* configure syncE / clk output */
+ ret = mxl86110_synce_clk_cfg(phydev);
+ if (ret < 0)
+ goto out;
+
+ switch (phydev->interface) {
+ case PHY_INTERFACE_MODE_100BASEX:
+ ret = __mxl86110_modify_extended_reg(phydev,
+ MXL86111_EXT_MISC_CONFIG_REG,
+ MXL86111_EXT_MISC_CONFIG_FIB_SPEED_SEL,
+ MXL86111_EXT_MISC_CONFIG_FIB_SPEED_SEL_100BX);
+ if (ret < 0)
+ goto out;
+ break;
+ case PHY_INTERFACE_MODE_1000BASEX:
+ case PHY_INTERFACE_MODE_SGMII:
+ ret = __mxl86110_modify_extended_reg(phydev,
+ MXL86111_EXT_MISC_CONFIG_REG,
+ MXL86111_EXT_MISC_CONFIG_FIB_SPEED_SEL,
+ MXL86111_EXT_MISC_CONFIG_FIB_SPEED_SEL_1000BX);
+ if (ret < 0)
+ goto out;
+ break;
+ default:
+ /* RGMII modes */
+ ret = mxl86110_config_rgmii_delay(phydev);
+ if (ret < 0)
+ goto out;
+ ret = __mxl86110_modify_extended_reg(phydev, MXL86110_EXT_RGMII_CFG1_REG,
+ MXL86110_EXT_RGMII_CFG1_FULL_MASK, ret);
+
+ /* PL P1 requires optimized RGMII timing for 1.8V RGMII voltage
+ */
+ ret = __mxl86110_read_extended_reg(phydev, 0xf);
+ if (ret < 0)
+ goto out;
+
+ if (ret == MXL86111_PL_P1) {
+ ret = __mxl86110_read_extended_reg(phydev, MXL86110_EXT_CHIP_CFG_REG);
+ if (ret < 0)
+ goto out;
+
+ /* check if LDO is in 1.8V mode */
+ switch (FIELD_GET(MXL86111_EXT_CHIP_CFG_CLDO_MASK, ret)) {
+ case MXL86111_EXT_CHIP_CFG_CLDO_1V8_3:
+ case MXL86111_EXT_CHIP_CFG_CLDO_1V8_2:
+ ret = __mxl86110_write_extended_reg(phydev, 0xa010, 0xabff);
+ if (ret < 0)
+ goto out;
+ break;
+ default:
+ break;
+ }
+ }
+ break;
+ }
+
+ ret = mxl86110_enable_led_activity_blink(phydev);
+ if (ret < 0)
+ goto out;
+
+ ret = mxl86110_broadcast_cfg(phydev);
+out:
+ phy_unlock_mdio_bus(phydev);
+
+ return ret;
+}
+
+/**
+ * mxl86111_read_page() - read reg page
+ * @phydev: pointer to the phy_device
+ *
+ * Return: current reg space of mxl86111 or negative errno code
+ */
+static int mxl86111_read_page(struct phy_device *phydev)
+{
+ int page;
+
+ page = __mxl86110_read_extended_reg(phydev, MXL86111_EXT_SMI_SDS_PHY_REG);
+ if (page < 0)
+ return page;
+
+ return page & MXL86111_EXT_SMI_SDS_PHYSPACE_MASK;
+};
+
+/**
+ * mxl86111_write_page() - Set reg page
+ * @phydev: pointer to the phy_device
+ * @page: The reg page to set
+ *
+ * Return: 0 or negative errno code
+ */
+static int mxl86111_write_page(struct phy_device *phydev, int page)
+{
+ return __mxl86110_modify_extended_reg(phydev, MXL86111_EXT_SMI_SDS_PHY_REG,
+ MXL86111_EXT_SMI_SDS_PHYSPACE_MASK, page);
+};
+
+static int mxl86111_config_inband(struct phy_device *phydev, unsigned int modes)
+{
+ int ret;
+
+ ret = phy_modify_paged(phydev, MXL86111_EXT_SMI_SDS_PHYFIBER_SPACE,
+ MII_BMCR, BMCR_ANENABLE,
+ (modes == LINK_INBAND_DISABLE) ? 0 : BMCR_ANENABLE);
+ if (ret < 0)
+ goto out;
+
+ phy_lock_mdio_bus(phydev);
+
+ ret = __mxl86110_modify_extended_reg(phydev, MXL86111_EXT_SDS_LINK_TIMER_CFG2_REG,
+ MXL86111_EXT_SDS_LINK_TIMER_CFG2_EN_AUTOSEN,
+ (modes == LINK_INBAND_DISABLE) ? 0 :
+ MXL86111_EXT_SDS_LINK_TIMER_CFG2_EN_AUTOSEN);
+ if (ret < 0)
+ goto out;
+
+ ret = __mxl86110_modify_extended_reg(phydev, MXL86110_EXT_CHIP_CFG_REG,
+ MXL86110_EXT_CHIP_CFG_SW_RST_N_MODE, 0);
+ if (ret < 0)
+ goto out;
+
+ /* For fiber forced mode, power down/up to re-aneg */
+ if (modes != LINK_INBAND_DISABLE) {
+ __phy_modify(phydev, MII_BMCR, 0, BMCR_PDOWN);
+ usleep_range(1000, 1050);
+ __phy_modify(phydev, MII_BMCR, BMCR_PDOWN, 0);
+ }
+
+out:
+ phy_unlock_mdio_bus(phydev);
+
+ return ret;
+}
+
+static unsigned int mxl86111_inband_caps(struct phy_device *phydev,
+ phy_interface_t interface)
+{
+ switch (interface) {
+ case PHY_INTERFACE_MODE_100BASEX:
+ case PHY_INTERFACE_MODE_1000BASEX:
+ case PHY_INTERFACE_MODE_SGMII:
+ return LINK_INBAND_DISABLE | LINK_INBAND_ENABLE;
+ default:
+ return 0;
+ }
+}
+
static struct phy_driver mxl_phy_drvs[] = {
{
PHY_ID_MATCH_EXACT(PHY_ID_MXL86110),
@@ -596,9 +940,26 @@ static struct phy_driver mxl_phy_drvs[] = {
.config_init = mxl86110_config_init,
.get_wol = mxl86110_get_wol,
.set_wol = mxl86110_set_wol,
+ .led_brightness_set = mxl86110_led_brightness_set,
+ .led_hw_is_supported = mxl86110_led_hw_is_supported,
+ .led_hw_control_get = mxl86110_led_hw_control_get,
+ .led_hw_control_set = mxl86110_led_hw_control_set,
+ },
+ {
+ PHY_ID_MATCH_EXACT(PHY_ID_MXL86111),
+ .name = "MXL86111 Gigabit Ethernet",
+ .probe = mxl86111_probe,
+ .config_init = mxl86111_config_init,
+ .get_wol = mxl86110_get_wol,
+ .set_wol = mxl86110_set_wol,
+ .inband_caps = mxl86111_inband_caps,
+ .config_inband = mxl86111_config_inband,
+ .read_page = mxl86111_read_page,
+ .write_page = mxl86111_write_page,
+ .led_brightness_set = mxl86110_led_brightness_set,
.led_hw_is_supported = mxl86110_led_hw_is_supported,
- .led_hw_control_get = mxl86110_led_hw_control_get,
- .led_hw_control_set = mxl86110_led_hw_control_set,
+ .led_hw_control_get = mxl86110_led_hw_control_get,
+ .led_hw_control_set = mxl86110_led_hw_control_set,
},
};
@@ -606,11 +967,12 @@ module_phy_driver(mxl_phy_drvs);
static const struct mdio_device_id __maybe_unused mxl_tbl[] = {
{ PHY_ID_MATCH_EXACT(PHY_ID_MXL86110) },
+ { PHY_ID_MATCH_EXACT(PHY_ID_MXL86111) },
{ }
};
MODULE_DEVICE_TABLE(mdio, mxl_tbl);
-MODULE_DESCRIPTION("MaxLinear MXL86110 PHY driver");
+MODULE_DESCRIPTION("MaxLinear MXL86110/MXL86111 PHY driver");
MODULE_AUTHOR("Stefano Radaelli");
MODULE_LICENSE("GPL");
diff --git a/drivers/net/phy/realtek/realtek_main.c b/drivers/net/phy/realtek/realtek_main.c
index dd0d675149ad..82d8e1335215 100644
--- a/drivers/net/phy/realtek/realtek_main.c
+++ b/drivers/net/phy/realtek/realtek_main.c
@@ -10,6 +10,7 @@
#include <linux/bitops.h>
#include <linux/of.h>
#include <linux/phy.h>
+#include <linux/pm_wakeirq.h>
#include <linux/netdevice.h>
#include <linux/module.h>
#include <linux/delay.h>
@@ -31,6 +32,7 @@
#define RTL821x_INER 0x12
#define RTL8211B_INER_INIT 0x6400
#define RTL8211E_INER_LINK_STATUS BIT(10)
+#define RTL8211F_INER_PME BIT(7)
#define RTL8211F_INER_LINK_STATUS BIT(4)
#define RTL821x_INSR 0x13
@@ -96,17 +98,13 @@
#define RTL8211F_RXCR 0x15
#define RTL8211F_RX_DELAY BIT(3)
-/* RTL8211F WOL interrupt configuration */
-#define RTL8211F_INTBCR_PAGE 0xd40
-#define RTL8211F_INTBCR 0x16
-#define RTL8211F_INTBCR_INTB_PMEB BIT(5)
-
/* RTL8211F WOL settings */
-#define RTL8211F_WOL_SETTINGS_PAGE 0xd8a
+#define RTL8211F_WOL_PAGE 0xd8a
#define RTL8211F_WOL_SETTINGS_EVENTS 16
#define RTL8211F_WOL_EVENT_MAGIC BIT(12)
-#define RTL8211F_WOL_SETTINGS_STATUS 17
-#define RTL8211F_WOL_STATUS_RESET (BIT(15) | 0x1fff)
+#define RTL8211F_WOL_RST_RMSQ 17
+#define RTL8211F_WOL_RG_RSTB BIT(15)
+#define RTL8211F_WOL_RMSQ 0x1fff
/* RTL8211F Unique phyiscal and multicast address (WOL) */
#define RTL8211F_PHYSICAL_ADDR_PAGE 0xd8c
@@ -172,7 +170,8 @@ struct rtl821x_priv {
u16 phycr2;
bool has_phycr2;
struct clk *clk;
- u32 saved_wolopts;
+ /* rtl8211f */
+ u16 iner;
};
static int rtl821x_read_page(struct phy_device *phydev)
@@ -255,6 +254,34 @@ static int rtl821x_probe(struct phy_device *phydev)
return 0;
}
+static int rtl8211f_probe(struct phy_device *phydev)
+{
+ struct device *dev = &phydev->mdio.dev;
+ int ret;
+
+ ret = rtl821x_probe(phydev);
+ if (ret < 0)
+ return ret;
+
+ /* Disable all PME events */
+ ret = phy_write_paged(phydev, RTL8211F_WOL_PAGE,
+ RTL8211F_WOL_SETTINGS_EVENTS, 0);
+ if (ret < 0)
+ return ret;
+
+ /* Mark this PHY as wakeup capable and register the interrupt as a
+ * wakeup IRQ if the PHY is marked as a wakeup source in firmware,
+ * and the interrupt is valid.
+ */
+ if (device_property_read_bool(dev, "wakeup-source") &&
+ phy_interrupt_is_valid(phydev)) {
+ device_set_wakeup_capable(dev, true);
+ devm_pm_set_wake_irq(dev, phydev->irq);
+ }
+
+ return ret;
+}
+
static int rtl8201_ack_interrupt(struct phy_device *phydev)
{
int err;
@@ -352,6 +379,7 @@ static int rtl8211e_config_intr(struct phy_device *phydev)
static int rtl8211f_config_intr(struct phy_device *phydev)
{
+ struct rtl821x_priv *priv = phydev->priv;
u16 val;
int err;
@@ -362,8 +390,10 @@ static int rtl8211f_config_intr(struct phy_device *phydev)
val = RTL8211F_INER_LINK_STATUS;
err = phy_write_paged(phydev, 0xa42, RTL821x_INER, val);
+ if (err == 0)
+ priv->iner = val;
} else {
- val = 0;
+ priv->iner = val = 0;
err = phy_write_paged(phydev, 0xa42, RTL821x_INER, val);
if (err)
return err;
@@ -426,21 +456,34 @@ static irqreturn_t rtl8211f_handle_interrupt(struct phy_device *phydev)
return IRQ_NONE;
}
- if (!(irq_status & RTL8211F_INER_LINK_STATUS))
- return IRQ_NONE;
+ if (irq_status & RTL8211F_INER_LINK_STATUS) {
+ phy_trigger_machine(phydev);
+ return IRQ_HANDLED;
+ }
- phy_trigger_machine(phydev);
+ if (irq_status & RTL8211F_INER_PME) {
+ pm_wakeup_event(&phydev->mdio.dev, 0);
+ return IRQ_HANDLED;
+ }
- return IRQ_HANDLED;
+ return IRQ_NONE;
}
static void rtl8211f_get_wol(struct phy_device *dev, struct ethtool_wolinfo *wol)
{
int wol_events;
+ /* If the PHY is not capable of waking the system, then WoL can not
+ * be supported.
+ */
+ if (!device_can_wakeup(&dev->mdio.dev)) {
+ wol->supported = 0;
+ return;
+ }
+
wol->supported = WAKE_MAGIC;
- wol_events = phy_read_paged(dev, RTL8211F_WOL_SETTINGS_PAGE, RTL8211F_WOL_SETTINGS_EVENTS);
+ wol_events = phy_read_paged(dev, RTL8211F_WOL_PAGE, RTL8211F_WOL_SETTINGS_EVENTS);
if (wol_events < 0)
return;
@@ -453,6 +496,9 @@ static int rtl8211f_set_wol(struct phy_device *dev, struct ethtool_wolinfo *wol)
const u8 *mac_addr = dev->attached_dev->dev_addr;
int oldpage;
+ if (!device_can_wakeup(&dev->mdio.dev))
+ return -EOPNOTSUPP;
+
oldpage = phy_save_page(dev);
if (oldpage < 0)
goto err;
@@ -464,25 +510,23 @@ static int rtl8211f_set_wol(struct phy_device *dev, struct ethtool_wolinfo *wol)
__phy_write(dev, RTL8211F_PHYSICAL_ADDR_WORD1, mac_addr[3] << 8 | (mac_addr[2]));
__phy_write(dev, RTL8211F_PHYSICAL_ADDR_WORD2, mac_addr[5] << 8 | (mac_addr[4]));
- /* Enable magic packet matching and reset WOL status */
- rtl821x_write_page(dev, RTL8211F_WOL_SETTINGS_PAGE);
+ /* Enable magic packet matching */
+ rtl821x_write_page(dev, RTL8211F_WOL_PAGE);
__phy_write(dev, RTL8211F_WOL_SETTINGS_EVENTS, RTL8211F_WOL_EVENT_MAGIC);
- __phy_write(dev, RTL8211F_WOL_SETTINGS_STATUS, RTL8211F_WOL_STATUS_RESET);
-
- /* Enable the WOL interrupt */
- rtl821x_write_page(dev, RTL8211F_INTBCR_PAGE);
- __phy_set_bits(dev, RTL8211F_INTBCR, RTL8211F_INTBCR_INTB_PMEB);
+ /* Set the maximum packet size, and assert WoL reset */
+ __phy_write(dev, RTL8211F_WOL_RST_RMSQ, RTL8211F_WOL_RMSQ);
} else {
- /* Disable the WOL interrupt */
- rtl821x_write_page(dev, RTL8211F_INTBCR_PAGE);
- __phy_clear_bits(dev, RTL8211F_INTBCR, RTL8211F_INTBCR_INTB_PMEB);
-
- /* Disable magic packet matching and reset WOL status */
- rtl821x_write_page(dev, RTL8211F_WOL_SETTINGS_PAGE);
+ /* Disable magic packet matching */
+ rtl821x_write_page(dev, RTL8211F_WOL_PAGE);
__phy_write(dev, RTL8211F_WOL_SETTINGS_EVENTS, 0);
- __phy_write(dev, RTL8211F_WOL_SETTINGS_STATUS, RTL8211F_WOL_STATUS_RESET);
+
+ /* Place WoL in reset */
+ __phy_clear_bits(dev, RTL8211F_WOL_RST_RMSQ,
+ RTL8211F_WOL_RG_RSTB);
}
+ device_set_wakeup_enable(&dev->mdio.dev, !!(wol->wolopts & WAKE_MAGIC));
+
err:
return phy_restore_page(dev, oldpage, 0);
}
@@ -628,6 +672,52 @@ static int rtl821x_suspend(struct phy_device *phydev)
return ret;
}
+static int rtl8211f_suspend(struct phy_device *phydev)
+{
+ u16 wol_rst;
+ int ret;
+
+ ret = rtl821x_suspend(phydev);
+ if (ret < 0)
+ return ret;
+
+ /* If a PME event is enabled, then configure the interrupt for
+ * PME events only, disabling link interrupt. We avoid switching
+ * to PMEB mode as we don't have a status bit for that.
+ */
+ if (device_may_wakeup(&phydev->mdio.dev)) {
+ ret = phy_write_paged(phydev, 0xa42, RTL821x_INER,
+ RTL8211F_INER_PME);
+ if (ret < 0)
+ goto err;
+
+ /* Read the INSR to clear any pending interrupt */
+ phy_read_paged(phydev, RTL8211F_INSR_PAGE, RTL8211F_INSR);
+
+ /* Reset the WoL to ensure that an event is picked up.
+ * Unless we do this, even if we receive another packet,
+ * we may not have a PME interrupt raised.
+ */
+ ret = phy_read_paged(phydev, RTL8211F_WOL_PAGE,
+ RTL8211F_WOL_RST_RMSQ);
+ if (ret < 0)
+ goto err;
+
+ wol_rst = ret & ~RTL8211F_WOL_RG_RSTB;
+ ret = phy_write_paged(phydev, RTL8211F_WOL_PAGE,
+ RTL8211F_WOL_RST_RMSQ, wol_rst);
+ if (ret < 0)
+ goto err;
+
+ wol_rst |= RTL8211F_WOL_RG_RSTB;
+ ret = phy_write_paged(phydev, RTL8211F_WOL_PAGE,
+ RTL8211F_WOL_RST_RMSQ, wol_rst);
+ }
+
+err:
+ return ret;
+}
+
static int rtl821x_resume(struct phy_device *phydev)
{
struct rtl821x_priv *priv = phydev->priv;
@@ -645,10 +735,29 @@ static int rtl821x_resume(struct phy_device *phydev)
return 0;
}
+static int rtl8211f_resume(struct phy_device *phydev)
+{
+ struct rtl821x_priv *priv = phydev->priv;
+ int ret;
+
+ ret = rtl821x_resume(phydev);
+ if (ret < 0)
+ return ret;
+
+ /* If the device was programmed for a PME event, restore the interrupt
+ * enable so phylib can receive link state interrupts.
+ */
+ if (device_may_wakeup(&phydev->mdio.dev))
+ ret = phy_write_paged(phydev, 0xa42, RTL821x_INER, priv->iner);
+
+ return ret;
+}
+
static int rtl8211x_led_hw_is_supported(struct phy_device *phydev, u8 index,
unsigned long rules)
{
- const unsigned long mask = BIT(TRIGGER_NETDEV_LINK_10) |
+ const unsigned long mask = BIT(TRIGGER_NETDEV_LINK) |
+ BIT(TRIGGER_NETDEV_LINK_10) |
BIT(TRIGGER_NETDEV_LINK_100) |
BIT(TRIGGER_NETDEV_LINK_1000) |
BIT(TRIGGER_NETDEV_RX) |
@@ -706,6 +815,12 @@ static int rtl8211f_led_hw_control_get(struct phy_device *phydev, u8 index,
if (val & RTL8211F_LEDCR_LINK_1000)
__set_bit(TRIGGER_NETDEV_LINK_1000, rules);
+ if ((val & RTL8211F_LEDCR_LINK_10) &&
+ (val & RTL8211F_LEDCR_LINK_100) &&
+ (val & RTL8211F_LEDCR_LINK_1000)) {
+ __set_bit(TRIGGER_NETDEV_LINK, rules);
+ }
+
if (val & RTL8211F_LEDCR_ACT_TXRX) {
__set_bit(TRIGGER_NETDEV_RX, rules);
__set_bit(TRIGGER_NETDEV_TX, rules);
@@ -723,14 +838,20 @@ static int rtl8211f_led_hw_control_set(struct phy_device *phydev, u8 index,
if (index >= RTL8211x_LED_COUNT)
return -EINVAL;
- if (test_bit(TRIGGER_NETDEV_LINK_10, &rules))
+ if (test_bit(TRIGGER_NETDEV_LINK, &rules) ||
+ test_bit(TRIGGER_NETDEV_LINK_10, &rules)) {
reg |= RTL8211F_LEDCR_LINK_10;
+ }
- if (test_bit(TRIGGER_NETDEV_LINK_100, &rules))
+ if (test_bit(TRIGGER_NETDEV_LINK, &rules) ||
+ test_bit(TRIGGER_NETDEV_LINK_100, &rules)) {
reg |= RTL8211F_LEDCR_LINK_100;
+ }
- if (test_bit(TRIGGER_NETDEV_LINK_1000, &rules))
+ if (test_bit(TRIGGER_NETDEV_LINK, &rules) ||
+ test_bit(TRIGGER_NETDEV_LINK_1000, &rules)) {
reg |= RTL8211F_LEDCR_LINK_1000;
+ }
if (test_bit(TRIGGER_NETDEV_RX, &rules) ||
test_bit(TRIGGER_NETDEV_TX, &rules)) {
@@ -778,6 +899,12 @@ static int rtl8211e_led_hw_control_get(struct phy_device *phydev, u8 index,
if (cr2 & RTL8211E_LEDCR2_LINK_1000)
__set_bit(TRIGGER_NETDEV_LINK_1000, rules);
+ if ((cr2 & RTL8211E_LEDCR2_LINK_10) &&
+ (cr2 & RTL8211E_LEDCR2_LINK_100) &&
+ (cr2 & RTL8211E_LEDCR2_LINK_1000)) {
+ __set_bit(TRIGGER_NETDEV_LINK, rules);
+ }
+
return ret;
}
@@ -805,14 +932,20 @@ static int rtl8211e_led_hw_control_set(struct phy_device *phydev, u8 index,
if (ret < 0)
return ret;
- if (test_bit(TRIGGER_NETDEV_LINK_10, &rules))
+ if (test_bit(TRIGGER_NETDEV_LINK, &rules) ||
+ test_bit(TRIGGER_NETDEV_LINK_10, &rules)) {
cr2 |= RTL8211E_LEDCR2_LINK_10;
+ }
- if (test_bit(TRIGGER_NETDEV_LINK_100, &rules))
+ if (test_bit(TRIGGER_NETDEV_LINK, &rules) ||
+ test_bit(TRIGGER_NETDEV_LINK_100, &rules)) {
cr2 |= RTL8211E_LEDCR2_LINK_100;
+ }
- if (test_bit(TRIGGER_NETDEV_LINK_1000, &rules))
+ if (test_bit(TRIGGER_NETDEV_LINK, &rules) ||
+ test_bit(TRIGGER_NETDEV_LINK_1000, &rules)) {
cr2 |= RTL8211E_LEDCR2_LINK_1000;
+ }
cr2 <<= RTL8211E_LEDCR2_SHIFT * index;
ret = rtl821x_modify_ext_page(phydev, RTL8211E_LEDCR_EXT_PAGE,
@@ -1038,7 +1171,7 @@ static int rtl822x_probe(struct phy_device *phydev)
return 0;
}
-static int rtl822xb_config_init(struct phy_device *phydev)
+static int rtl822x_set_serdes_option_mode(struct phy_device *phydev, bool gen1)
{
bool has_2500, has_sgmii;
u16 mode;
@@ -1073,15 +1206,18 @@ static int rtl822xb_config_init(struct phy_device *phydev)
/* the following sequence with magic numbers sets up the SerDes
* option mode
*/
- ret = phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x75f3, 0);
- if (ret < 0)
- return ret;
+
+ if (!gen1) {
+ ret = phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x75f3, 0);
+ if (ret < 0)
+ return ret;
+ }
ret = phy_modify_mmd_changed(phydev, MDIO_MMD_VEND1,
RTL822X_VND1_SERDES_OPTION,
RTL822X_VND1_SERDES_OPTION_MODE_MASK,
mode);
- if (ret < 0)
+ if (gen1 || ret < 0)
return ret;
ret = phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x6a04, 0x0503);
@@ -1095,6 +1231,16 @@ static int rtl822xb_config_init(struct phy_device *phydev)
return phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x6f11, 0x8020);
}
+static int rtl822x_config_init(struct phy_device *phydev)
+{
+ return rtl822x_set_serdes_option_mode(phydev, true);
+}
+
+static int rtl822xb_config_init(struct phy_device *phydev)
+{
+ return rtl822x_set_serdes_option_mode(phydev, false);
+}
+
static int rtl822xb_get_rate_matching(struct phy_device *phydev,
phy_interface_t iface)
{
@@ -1280,6 +1426,21 @@ static int rtl822x_c45_read_status(struct phy_device *phydev)
return 0;
}
+static int rtl822x_c45_soft_reset(struct phy_device *phydev)
+{
+ int ret, val;
+
+ ret = phy_modify_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_CTRL1,
+ MDIO_CTRL1_RESET, MDIO_CTRL1_RESET);
+ if (ret < 0)
+ return ret;
+
+ return phy_read_mmd_poll_timeout(phydev, MDIO_MMD_PMAPMD,
+ MDIO_CTRL1, val,
+ !(val & MDIO_CTRL1_RESET),
+ 5000, 100000, true);
+}
+
static int rtl822xb_c45_read_status(struct phy_device *phydev)
{
int ret;
@@ -1612,15 +1773,15 @@ static struct phy_driver realtek_drvs[] = {
}, {
PHY_ID_MATCH_EXACT(0x001cc916),
.name = "RTL8211F Gigabit Ethernet",
- .probe = rtl821x_probe,
+ .probe = rtl8211f_probe,
.config_init = &rtl8211f_config_init,
.read_status = rtlgen_read_status,
.config_intr = &rtl8211f_config_intr,
.handle_interrupt = rtl8211f_handle_interrupt,
.set_wol = rtl8211f_set_wol,
.get_wol = rtl8211f_get_wol,
- .suspend = rtl821x_suspend,
- .resume = rtl821x_resume,
+ .suspend = rtl8211f_suspend,
+ .resume = rtl8211f_resume,
.read_page = rtl821x_read_page,
.write_page = rtl821x_write_page,
.flags = PHY_ALWAYS_CALL_SUSPEND,
@@ -1675,13 +1836,13 @@ static struct phy_driver realtek_drvs[] = {
}, {
PHY_ID_MATCH_EXACT(0x001cc838),
.name = "RTL8226-CG 2.5Gbps PHY",
- .get_features = rtl822x_get_features,
- .config_aneg = rtl822x_config_aneg,
- .read_status = rtl822x_read_status,
- .suspend = genphy_suspend,
- .resume = rtlgen_resume,
- .read_page = rtl821x_read_page,
- .write_page = rtl821x_write_page,
+ .soft_reset = rtl822x_c45_soft_reset,
+ .get_features = rtl822x_c45_get_features,
+ .config_aneg = rtl822x_c45_config_aneg,
+ .config_init = rtl822x_config_init,
+ .read_status = rtl822xb_c45_read_status,
+ .suspend = genphy_c45_pma_suspend,
+ .resume = rtlgen_c45_resume,
}, {
PHY_ID_MATCH_EXACT(0x001cc848),
.name = "RTL8226B-CG_RTL8221B-CG 2.5Gbps PHY",
diff --git a/drivers/net/ppp/Kconfig b/drivers/net/ppp/Kconfig
index 8c9ed1889d1a..a1806b4b84be 100644
--- a/drivers/net/ppp/Kconfig
+++ b/drivers/net/ppp/Kconfig
@@ -85,9 +85,8 @@ config PPP_FILTER
config PPP_MPPE
tristate "PPP MPPE compression (encryption)"
depends on PPP
- select CRYPTO
- select CRYPTO_SHA1
select CRYPTO_LIB_ARC4
+ select CRYPTO_LIB_SHA1
help
Support for the MPPE Encryption protocol, as employed by the
Microsoft Point-to-Point Tunneling Protocol.
diff --git a/drivers/net/ppp/bsd_comp.c b/drivers/net/ppp/bsd_comp.c
index 55954594e157..f385b759d5cf 100644
--- a/drivers/net/ppp/bsd_comp.c
+++ b/drivers/net/ppp/bsd_comp.c
@@ -406,7 +406,7 @@ static void *bsd_alloc (unsigned char *options, int opt_len, int decomp)
* Allocate space for the dictionary. This may be more than one page in
* length.
*/
- db->dict = vmalloc(array_size(hsize, sizeof(struct bsd_dict)));
+ db->dict = vmalloc_array(hsize, sizeof(struct bsd_dict));
if (!db->dict)
{
bsd_free (db);
@@ -425,7 +425,7 @@ static void *bsd_alloc (unsigned char *options, int opt_len, int decomp)
*/
else
{
- db->lens = vmalloc(array_size(sizeof(db->lens[0]), (maxmaxcode + 1)));
+ db->lens = vmalloc_array(maxmaxcode + 1, sizeof(db->lens[0]));
if (!db->lens)
{
bsd_free (db);
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 702a7f7183ce..f9f0f16c41d1 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -179,11 +179,11 @@ struct channel {
struct ppp_channel *chan; /* public channel data structure */
struct rw_semaphore chan_sem; /* protects `chan' during chan ioctl */
spinlock_t downl; /* protects `chan', file.xq dequeue */
- struct ppp *ppp; /* ppp unit we're connected to */
+ struct ppp __rcu *ppp; /* ppp unit we're connected to */
struct net *chan_net; /* the net channel belongs to */
netns_tracker ns_tracker;
struct list_head clist; /* link in list of channels per unit */
- rwlock_t upl; /* protects `ppp' and 'bridge' */
+ spinlock_t upl; /* protects `ppp' and 'bridge' */
struct channel __rcu *bridge; /* "bridged" ppp channel */
#ifdef CONFIG_PPP_MULTILINK
u8 avail; /* flag used in multilink stuff */
@@ -645,34 +645,34 @@ static struct bpf_prog *compat_ppp_get_filter(struct sock_fprog32 __user *p)
*/
static int ppp_bridge_channels(struct channel *pch, struct channel *pchb)
{
- write_lock_bh(&pch->upl);
- if (pch->ppp ||
+ spin_lock(&pch->upl);
+ if (rcu_dereference_protected(pch->ppp, lockdep_is_held(&pch->upl)) ||
rcu_dereference_protected(pch->bridge, lockdep_is_held(&pch->upl))) {
- write_unlock_bh(&pch->upl);
+ spin_unlock(&pch->upl);
return -EALREADY;
}
refcount_inc(&pchb->file.refcnt);
rcu_assign_pointer(pch->bridge, pchb);
- write_unlock_bh(&pch->upl);
+ spin_unlock(&pch->upl);
- write_lock_bh(&pchb->upl);
- if (pchb->ppp ||
+ spin_lock(&pchb->upl);
+ if (rcu_dereference_protected(pchb->ppp, lockdep_is_held(&pchb->upl)) ||
rcu_dereference_protected(pchb->bridge, lockdep_is_held(&pchb->upl))) {
- write_unlock_bh(&pchb->upl);
+ spin_unlock(&pchb->upl);
goto err_unset;
}
refcount_inc(&pch->file.refcnt);
rcu_assign_pointer(pchb->bridge, pch);
- write_unlock_bh(&pchb->upl);
+ spin_unlock(&pchb->upl);
return 0;
err_unset:
- write_lock_bh(&pch->upl);
+ spin_lock(&pch->upl);
/* Re-read pch->bridge with upl held in case it was modified concurrently */
pchb = rcu_dereference_protected(pch->bridge, lockdep_is_held(&pch->upl));
RCU_INIT_POINTER(pch->bridge, NULL);
- write_unlock_bh(&pch->upl);
+ spin_unlock(&pch->upl);
synchronize_rcu();
if (pchb)
@@ -686,25 +686,25 @@ static int ppp_unbridge_channels(struct channel *pch)
{
struct channel *pchb, *pchbb;
- write_lock_bh(&pch->upl);
+ spin_lock(&pch->upl);
pchb = rcu_dereference_protected(pch->bridge, lockdep_is_held(&pch->upl));
if (!pchb) {
- write_unlock_bh(&pch->upl);
+ spin_unlock(&pch->upl);
return -EINVAL;
}
RCU_INIT_POINTER(pch->bridge, NULL);
- write_unlock_bh(&pch->upl);
+ spin_unlock(&pch->upl);
/* Only modify pchb if phcb->bridge points back to pch.
* If not, it implies that there has been a race unbridging (and possibly
* even rebridging) pchb. We should leave pchb alone to avoid either a
* refcount underflow, or breaking another established bridge instance.
*/
- write_lock_bh(&pchb->upl);
+ spin_lock(&pchb->upl);
pchbb = rcu_dereference_protected(pchb->bridge, lockdep_is_held(&pchb->upl));
if (pchbb == pch)
RCU_INIT_POINTER(pchb->bridge, NULL);
- write_unlock_bh(&pchb->upl);
+ spin_unlock(&pchb->upl);
synchronize_rcu();
@@ -2158,10 +2158,9 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb)
#endif /* CONFIG_PPP_MULTILINK */
/* Try to send data out on a channel */
-static void __ppp_channel_push(struct channel *pch)
+static void __ppp_channel_push(struct channel *pch, struct ppp *ppp)
{
struct sk_buff *skb;
- struct ppp *ppp;
spin_lock(&pch->downl);
if (pch->chan) {
@@ -2180,7 +2179,6 @@ static void __ppp_channel_push(struct channel *pch)
spin_unlock(&pch->downl);
/* see if there is anything from the attached unit to be sent */
if (skb_queue_empty(&pch->file.xq)) {
- ppp = pch->ppp;
if (ppp)
__ppp_xmit_process(ppp, NULL);
}
@@ -2189,19 +2187,21 @@ static void __ppp_channel_push(struct channel *pch)
static void ppp_channel_push(struct channel *pch)
{
struct ppp_xmit_recursion *xmit_recursion;
+ struct ppp *ppp;
- read_lock_bh(&pch->upl);
- if (pch->ppp) {
- xmit_recursion = this_cpu_ptr(pch->ppp->xmit_recursion);
- local_lock_nested_bh(&pch->ppp->xmit_recursion->bh_lock);
+ rcu_read_lock_bh();
+ ppp = rcu_dereference_bh(pch->ppp);
+ if (ppp) {
+ xmit_recursion = this_cpu_ptr(ppp->xmit_recursion);
+ local_lock_nested_bh(&ppp->xmit_recursion->bh_lock);
xmit_recursion->owner = current;
- __ppp_channel_push(pch);
+ __ppp_channel_push(pch, ppp);
xmit_recursion->owner = NULL;
- local_unlock_nested_bh(&pch->ppp->xmit_recursion->bh_lock);
+ local_unlock_nested_bh(&ppp->xmit_recursion->bh_lock);
} else {
- __ppp_channel_push(pch);
+ __ppp_channel_push(pch, NULL);
}
- read_unlock_bh(&pch->upl);
+ rcu_read_unlock_bh();
}
/*
@@ -2303,6 +2303,7 @@ void
ppp_input(struct ppp_channel *chan, struct sk_buff *skb)
{
struct channel *pch = chan->ppp;
+ struct ppp *ppp;
int proto;
if (!pch) {
@@ -2314,18 +2315,19 @@ ppp_input(struct ppp_channel *chan, struct sk_buff *skb)
if (ppp_channel_bridge_input(pch, skb))
return;
- read_lock_bh(&pch->upl);
+ rcu_read_lock_bh();
+ ppp = rcu_dereference_bh(pch->ppp);
if (!ppp_decompress_proto(skb)) {
kfree_skb(skb);
- if (pch->ppp) {
- ++pch->ppp->dev->stats.rx_length_errors;
- ppp_receive_error(pch->ppp);
+ if (ppp) {
+ ++ppp->dev->stats.rx_length_errors;
+ ppp_receive_error(ppp);
}
goto done;
}
proto = PPP_PROTO(skb);
- if (!pch->ppp || proto >= 0xc000 || proto == PPP_CCPFRAG) {
+ if (!ppp || proto >= 0xc000 || proto == PPP_CCPFRAG) {
/* put it on the channel queue */
skb_queue_tail(&pch->file.rq, skb);
/* drop old frames if queue too long */
@@ -2334,11 +2336,11 @@ ppp_input(struct ppp_channel *chan, struct sk_buff *skb)
kfree_skb(skb);
wake_up_interruptible(&pch->file.rwait);
} else {
- ppp_do_recv(pch->ppp, skb, pch);
+ ppp_do_recv(ppp, skb, pch);
}
done:
- read_unlock_bh(&pch->upl);
+ rcu_read_unlock_bh();
}
/* Put a 0-length skb in the receive queue as an error indication */
@@ -2347,20 +2349,22 @@ ppp_input_error(struct ppp_channel *chan, int code)
{
struct channel *pch = chan->ppp;
struct sk_buff *skb;
+ struct ppp *ppp;
if (!pch)
return;
- read_lock_bh(&pch->upl);
- if (pch->ppp) {
+ rcu_read_lock_bh();
+ ppp = rcu_dereference_bh(pch->ppp);
+ if (ppp) {
skb = alloc_skb(0, GFP_ATOMIC);
if (skb) {
skb->len = 0; /* probably unnecessary */
skb->cb[0] = code;
- ppp_do_recv(pch->ppp, skb, pch);
+ ppp_do_recv(ppp, skb, pch);
}
}
- read_unlock_bh(&pch->upl);
+ rcu_read_unlock_bh();
}
/*
@@ -2908,7 +2912,6 @@ int ppp_register_net_channel(struct net *net, struct ppp_channel *chan)
pn = ppp_pernet(net);
- pch->ppp = NULL;
pch->chan = chan;
pch->chan_net = get_net_track(net, &pch->ns_tracker, GFP_KERNEL);
chan->ppp = pch;
@@ -2919,7 +2922,7 @@ int ppp_register_net_channel(struct net *net, struct ppp_channel *chan)
#endif /* CONFIG_PPP_MULTILINK */
init_rwsem(&pch->chan_sem);
spin_lock_init(&pch->downl);
- rwlock_init(&pch->upl);
+ spin_lock_init(&pch->upl);
spin_lock_bh(&pn->all_channels_lock);
pch->file.index = ++pn->last_channel_index;
@@ -2948,13 +2951,15 @@ int ppp_channel_index(struct ppp_channel *chan)
int ppp_unit_number(struct ppp_channel *chan)
{
struct channel *pch = chan->ppp;
+ struct ppp *ppp;
int unit = -1;
if (pch) {
- read_lock_bh(&pch->upl);
- if (pch->ppp)
- unit = pch->ppp->file.index;
- read_unlock_bh(&pch->upl);
+ rcu_read_lock();
+ ppp = rcu_dereference(pch->ppp);
+ if (ppp)
+ unit = ppp->file.index;
+ rcu_read_unlock();
}
return unit;
}
@@ -2966,12 +2971,14 @@ char *ppp_dev_name(struct ppp_channel *chan)
{
struct channel *pch = chan->ppp;
char *name = NULL;
+ struct ppp *ppp;
if (pch) {
- read_lock_bh(&pch->upl);
- if (pch->ppp && pch->ppp->dev)
- name = pch->ppp->dev->name;
- read_unlock_bh(&pch->upl);
+ rcu_read_lock();
+ ppp = rcu_dereference(pch->ppp);
+ if (ppp && ppp->dev)
+ name = ppp->dev->name;
+ rcu_read_unlock();
}
return name;
}
@@ -3494,9 +3501,9 @@ ppp_connect_channel(struct channel *pch, int unit)
ppp = ppp_find_unit(pn, unit);
if (!ppp)
goto out;
- write_lock_bh(&pch->upl);
+ spin_lock(&pch->upl);
ret = -EINVAL;
- if (pch->ppp ||
+ if (rcu_dereference_protected(pch->ppp, lockdep_is_held(&pch->upl)) ||
rcu_dereference_protected(pch->bridge, lockdep_is_held(&pch->upl)))
goto outl;
@@ -3521,13 +3528,13 @@ ppp_connect_channel(struct channel *pch, int unit)
ppp->dev->hard_header_len = hdrlen;
list_add_tail_rcu(&pch->clist, &ppp->channels);
++ppp->n_channels;
- pch->ppp = ppp;
+ rcu_assign_pointer(pch->ppp, ppp);
refcount_inc(&ppp->file.refcnt);
ppp_unlock(ppp);
ret = 0;
outl:
- write_unlock_bh(&pch->upl);
+ spin_unlock(&pch->upl);
out:
mutex_unlock(&pn->all_ppp_mutex);
return ret;
@@ -3542,10 +3549,9 @@ ppp_disconnect_channel(struct channel *pch)
struct ppp *ppp;
int err = -EINVAL;
- write_lock_bh(&pch->upl);
- ppp = pch->ppp;
- pch->ppp = NULL;
- write_unlock_bh(&pch->upl);
+ spin_lock(&pch->upl);
+ ppp = rcu_replace_pointer(pch->ppp, NULL, lockdep_is_held(&pch->upl));
+ spin_unlock(&pch->upl);
if (ppp) {
/* remove it from the ppp unit's list */
ppp_lock(ppp);
diff --git a/drivers/net/ppp/ppp_mppe.c b/drivers/net/ppp/ppp_mppe.c
index bcc1eaedf58f..630cbf71c147 100644
--- a/drivers/net/ppp/ppp_mppe.c
+++ b/drivers/net/ppp/ppp_mppe.c
@@ -43,7 +43,7 @@
*/
#include <crypto/arc4.h>
-#include <crypto/hash.h>
+#include <crypto/sha1.h>
#include <linux/err.h>
#include <linux/fips.h>
#include <linux/module.h>
@@ -55,7 +55,6 @@
#include <linux/mm.h>
#include <linux/ppp_defs.h>
#include <linux/ppp-comp.h>
-#include <linux/scatterlist.h>
#include <linux/unaligned.h>
#include "ppp_mppe.h"
@@ -67,31 +66,15 @@ MODULE_ALIAS("ppp-compress-" __stringify(CI_MPPE));
MODULE_VERSION("1.0.2");
#define SHA1_PAD_SIZE 40
-
-/*
- * kernel crypto API needs its arguments to be in kmalloc'd memory, not in the module
- * static data area. That means sha_pad needs to be kmalloc'd.
- */
-
-struct sha_pad {
- unsigned char sha_pad1[SHA1_PAD_SIZE];
- unsigned char sha_pad2[SHA1_PAD_SIZE];
-};
-static struct sha_pad *sha_pad;
-
-static inline void sha_pad_init(struct sha_pad *shapad)
-{
- memset(shapad->sha_pad1, 0x00, sizeof(shapad->sha_pad1));
- memset(shapad->sha_pad2, 0xF2, sizeof(shapad->sha_pad2));
-}
+static const u8 sha_pad1[SHA1_PAD_SIZE] = { 0 };
+static const u8 sha_pad2[SHA1_PAD_SIZE] = { [0 ... SHA1_PAD_SIZE - 1] = 0xF2 };
/*
* State for an MPPE (de)compressor.
*/
struct ppp_mppe_state {
struct arc4_ctx arc4;
- struct shash_desc *sha1;
- unsigned char *sha1_digest;
+ unsigned char sha1_digest[SHA1_DIGEST_SIZE];
unsigned char master_key[MPPE_MAX_KEY_LEN];
unsigned char session_key[MPPE_MAX_KEY_LEN];
unsigned keylen; /* key length in bytes */
@@ -130,16 +113,14 @@ struct ppp_mppe_state {
*/
static void get_new_key_from_sha(struct ppp_mppe_state * state)
{
- crypto_shash_init(state->sha1);
- crypto_shash_update(state->sha1, state->master_key,
- state->keylen);
- crypto_shash_update(state->sha1, sha_pad->sha_pad1,
- sizeof(sha_pad->sha_pad1));
- crypto_shash_update(state->sha1, state->session_key,
- state->keylen);
- crypto_shash_update(state->sha1, sha_pad->sha_pad2,
- sizeof(sha_pad->sha_pad2));
- crypto_shash_final(state->sha1, state->sha1_digest);
+ struct sha1_ctx ctx;
+
+ sha1_init(&ctx);
+ sha1_update(&ctx, state->master_key, state->keylen);
+ sha1_update(&ctx, sha_pad1, sizeof(sha_pad1));
+ sha1_update(&ctx, state->session_key, state->keylen);
+ sha1_update(&ctx, sha_pad2, sizeof(sha_pad2));
+ sha1_final(&ctx, state->sha1_digest);
}
/*
@@ -171,39 +152,15 @@ static void mppe_rekey(struct ppp_mppe_state * state, int initial_key)
static void *mppe_alloc(unsigned char *options, int optlen)
{
struct ppp_mppe_state *state;
- struct crypto_shash *shash;
- unsigned int digestsize;
if (optlen != CILEN_MPPE + sizeof(state->master_key) ||
options[0] != CI_MPPE || options[1] != CILEN_MPPE ||
fips_enabled)
- goto out;
+ return NULL;
state = kzalloc(sizeof(*state), GFP_KERNEL);
if (state == NULL)
- goto out;
-
-
- shash = crypto_alloc_shash("sha1", 0, 0);
- if (IS_ERR(shash))
- goto out_free;
-
- state->sha1 = kmalloc(sizeof(*state->sha1) +
- crypto_shash_descsize(shash),
- GFP_KERNEL);
- if (!state->sha1) {
- crypto_free_shash(shash);
- goto out_free;
- }
- state->sha1->tfm = shash;
-
- digestsize = crypto_shash_digestsize(shash);
- if (digestsize < MPPE_MAX_KEY_LEN)
- goto out_free;
-
- state->sha1_digest = kmalloc(digestsize, GFP_KERNEL);
- if (!state->sha1_digest)
- goto out_free;
+ return NULL;
/* Save keys. */
memcpy(state->master_key, &options[CILEN_MPPE],
@@ -217,16 +174,6 @@ static void *mppe_alloc(unsigned char *options, int optlen)
*/
return (void *)state;
-
-out_free:
- kfree(state->sha1_digest);
- if (state->sha1) {
- crypto_free_shash(state->sha1->tfm);
- kfree_sensitive(state->sha1);
- }
- kfree(state);
-out:
- return NULL;
}
/*
@@ -235,12 +182,8 @@ out:
static void mppe_free(void *arg)
{
struct ppp_mppe_state *state = (struct ppp_mppe_state *) arg;
- if (state) {
- kfree(state->sha1_digest);
- crypto_free_shash(state->sha1->tfm);
- kfree_sensitive(state->sha1);
- kfree_sensitive(state);
- }
+
+ kfree_sensitive(state);
}
/*
@@ -649,31 +592,17 @@ static struct compressor ppp_mppe = {
.comp_extra = MPPE_PAD,
};
-/*
- * ppp_mppe_init()
- *
- * Prior to allowing load, try to load the arc4 and sha1 crypto
- * libraries. The actual use will be allocated later, but
- * this way the module will fail to insmod if they aren't available.
- */
-
static int __init ppp_mppe_init(void)
{
int answer;
- if (fips_enabled || !crypto_has_ahash("sha1", 0, CRYPTO_ALG_ASYNC))
- return -ENODEV;
- sha_pad = kmalloc(sizeof(struct sha_pad), GFP_KERNEL);
- if (!sha_pad)
- return -ENOMEM;
- sha_pad_init(sha_pad);
+ if (fips_enabled)
+ return -ENODEV;
answer = ppp_register_compressor(&ppp_mppe);
if (answer == 0)
printk(KERN_INFO "PPP MPPE Compression module registered\n");
- else
- kfree(sha_pad);
return answer;
}
@@ -681,7 +610,6 @@ static int __init ppp_mppe_init(void)
static void __exit ppp_mppe_cleanup(void)
{
ppp_unregister_compressor(&ppp_mppe);
- kfree(sha_pad);
}
module_init(ppp_mppe_init);
diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index 410effa42ade..4ac6afce267b 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -100,8 +100,8 @@ struct pppoe_net {
* as well, moreover in case of SMP less locking
* controversy here
*/
- struct pppox_sock *hash_table[PPPOE_HASH_SIZE];
- rwlock_t hash_lock;
+ struct pppox_sock __rcu *hash_table[PPPOE_HASH_SIZE];
+ spinlock_t hash_lock;
};
/*
@@ -162,13 +162,13 @@ static struct pppox_sock *__get_item(struct pppoe_net *pn, __be16 sid,
int hash = hash_item(sid, addr);
struct pppox_sock *ret;
- ret = pn->hash_table[hash];
+ ret = rcu_dereference(pn->hash_table[hash]);
while (ret) {
if (cmp_addr(&ret->pppoe_pa, sid, addr) &&
ret->pppoe_ifindex == ifindex)
return ret;
- ret = ret->next;
+ ret = rcu_dereference(ret->next);
}
return NULL;
@@ -177,19 +177,20 @@ static struct pppox_sock *__get_item(struct pppoe_net *pn, __be16 sid,
static int __set_item(struct pppoe_net *pn, struct pppox_sock *po)
{
int hash = hash_item(po->pppoe_pa.sid, po->pppoe_pa.remote);
- struct pppox_sock *ret;
+ struct pppox_sock *ret, *first;
- ret = pn->hash_table[hash];
+ first = rcu_dereference_protected(pn->hash_table[hash], lockdep_is_held(&pn->hash_lock));
+ ret = first;
while (ret) {
if (cmp_2_addr(&ret->pppoe_pa, &po->pppoe_pa) &&
ret->pppoe_ifindex == po->pppoe_ifindex)
return -EALREADY;
- ret = ret->next;
+ ret = rcu_dereference_protected(ret->next, lockdep_is_held(&pn->hash_lock));
}
- po->next = pn->hash_table[hash];
- pn->hash_table[hash] = po;
+ RCU_INIT_POINTER(po->next, first);
+ rcu_assign_pointer(pn->hash_table[hash], po);
return 0;
}
@@ -198,20 +199,24 @@ static void __delete_item(struct pppoe_net *pn, __be16 sid,
char *addr, int ifindex)
{
int hash = hash_item(sid, addr);
- struct pppox_sock *ret, **src;
+ struct pppox_sock *ret, __rcu **src;
- ret = pn->hash_table[hash];
+ ret = rcu_dereference_protected(pn->hash_table[hash], lockdep_is_held(&pn->hash_lock));
src = &pn->hash_table[hash];
while (ret) {
if (cmp_addr(&ret->pppoe_pa, sid, addr) &&
ret->pppoe_ifindex == ifindex) {
- *src = ret->next;
+ struct pppox_sock *next;
+
+ next = rcu_dereference_protected(ret->next,
+ lockdep_is_held(&pn->hash_lock));
+ rcu_assign_pointer(*src, next);
break;
}
src = &ret->next;
- ret = ret->next;
+ ret = rcu_dereference_protected(ret->next, lockdep_is_held(&pn->hash_lock));
}
}
@@ -225,17 +230,15 @@ static inline struct pppox_sock *get_item(struct pppoe_net *pn, __be16 sid,
{
struct pppox_sock *po;
- read_lock_bh(&pn->hash_lock);
po = __get_item(pn, sid, addr, ifindex);
- if (po)
- sock_hold(sk_pppox(po));
- read_unlock_bh(&pn->hash_lock);
+ if (po && !refcount_inc_not_zero(&sk_pppox(po)->sk_refcnt))
+ po = NULL;
return po;
}
-static inline struct pppox_sock *get_item_by_addr(struct net *net,
- struct sockaddr_pppox *sp)
+static inline struct pppox_sock *__get_item_by_addr(struct net *net,
+ struct sockaddr_pppox *sp)
{
struct net_device *dev;
struct pppoe_net *pn;
@@ -243,24 +246,22 @@ static inline struct pppox_sock *get_item_by_addr(struct net *net,
int ifindex;
- rcu_read_lock();
dev = dev_get_by_name_rcu(net, sp->sa_addr.pppoe.dev);
if (dev) {
ifindex = dev->ifindex;
pn = pppoe_pernet(net);
- pppox_sock = get_item(pn, sp->sa_addr.pppoe.sid,
- sp->sa_addr.pppoe.remote, ifindex);
+ pppox_sock = __get_item(pn, sp->sa_addr.pppoe.sid,
+ sp->sa_addr.pppoe.remote, ifindex);
}
- rcu_read_unlock();
return pppox_sock;
}
static inline void delete_item(struct pppoe_net *pn, __be16 sid,
char *addr, int ifindex)
{
- write_lock_bh(&pn->hash_lock);
+ spin_lock(&pn->hash_lock);
__delete_item(pn, sid, addr, ifindex);
- write_unlock_bh(&pn->hash_lock);
+ spin_unlock(&pn->hash_lock);
}
/***************************************************************************
@@ -276,14 +277,16 @@ static void pppoe_flush_dev(struct net_device *dev)
int i;
pn = pppoe_pernet(dev_net(dev));
- write_lock_bh(&pn->hash_lock);
+ spin_lock(&pn->hash_lock);
for (i = 0; i < PPPOE_HASH_SIZE; i++) {
- struct pppox_sock *po = pn->hash_table[i];
+ struct pppox_sock *po = rcu_dereference_protected(pn->hash_table[i],
+ lockdep_is_held(&pn->hash_lock));
struct sock *sk;
while (po) {
while (po && po->pppoe_dev != dev) {
- po = po->next;
+ po = rcu_dereference_protected(po->next,
+ lockdep_is_held(&pn->hash_lock));
}
if (!po)
@@ -300,7 +303,7 @@ static void pppoe_flush_dev(struct net_device *dev)
*/
sock_hold(sk);
- write_unlock_bh(&pn->hash_lock);
+ spin_unlock(&pn->hash_lock);
lock_sock(sk);
if (po->pppoe_dev == dev &&
@@ -320,11 +323,12 @@ static void pppoe_flush_dev(struct net_device *dev)
*/
BUG_ON(pppoe_pernet(dev_net(dev)) == NULL);
- write_lock_bh(&pn->hash_lock);
- po = pn->hash_table[i];
+ spin_lock(&pn->hash_lock);
+ po = rcu_dereference_protected(pn->hash_table[i],
+ lockdep_is_held(&pn->hash_lock));
}
}
- write_unlock_bh(&pn->hash_lock);
+ spin_unlock(&pn->hash_lock);
}
static int pppoe_device_event(struct notifier_block *this,
@@ -375,18 +379,16 @@ static int pppoe_rcv_core(struct sock *sk, struct sk_buff *skb)
if (sk->sk_state & PPPOX_BOUND) {
ppp_input(&po->chan, skb);
} else if (sk->sk_state & PPPOX_RELAY) {
- relay_po = get_item_by_addr(sock_net(sk),
- &po->pppoe_relay);
+ relay_po = __get_item_by_addr(sock_net(sk),
+ &po->pppoe_relay);
if (relay_po == NULL)
goto abort_kfree;
if ((sk_pppox(relay_po)->sk_state & PPPOX_CONNECTED) == 0)
- goto abort_put;
+ goto abort_kfree;
if (!__pppoe_xmit(sk_pppox(relay_po), skb))
- goto abort_put;
-
- sock_put(sk_pppox(relay_po));
+ goto abort_kfree;
} else {
if (sock_queue_rcv_skb(sk, skb))
goto abort_kfree;
@@ -394,9 +396,6 @@ static int pppoe_rcv_core(struct sock *sk, struct sk_buff *skb)
return NET_RX_SUCCESS;
-abort_put:
- sock_put(sk_pppox(relay_po));
-
abort_kfree:
kfree_skb(skb);
return NET_RX_DROP;
@@ -441,14 +440,11 @@ static int pppoe_rcv(struct sk_buff *skb, struct net_device *dev,
ph = pppoe_hdr(skb);
pn = pppoe_pernet(dev_net(dev));
- /* Note that get_item does a sock_hold(), so sk_pppox(po)
- * is known to be safe.
- */
- po = get_item(pn, ph->sid, eth_hdr(skb)->h_source, dev->ifindex);
+ po = __get_item(pn, ph->sid, eth_hdr(skb)->h_source, dev->ifindex);
if (!po)
goto drop;
- return sk_receive_skb(sk_pppox(po), skb, 0);
+ return __sk_receive_skb(sk_pppox(po), skb, 0, 1, false);
drop:
kfree_skb(skb);
@@ -528,6 +524,11 @@ static struct proto pppoe_sk_proto __read_mostly = {
.obj_size = sizeof(struct pppox_sock),
};
+static void pppoe_destruct(struct sock *sk)
+{
+ skb_queue_purge(&sk->sk_receive_queue);
+}
+
/***********************************************************************
*
* Initialize a new struct sock.
@@ -542,11 +543,13 @@ static int pppoe_create(struct net *net, struct socket *sock, int kern)
return -ENOMEM;
sock_init_data(sock, sk);
+ sock_set_flag(sk, SOCK_RCU_FREE);
sock->state = SS_UNCONNECTED;
sock->ops = &pppoe_ops;
sk->sk_backlog_rcv = pppoe_rcv_core;
+ sk->sk_destruct = pppoe_destruct;
sk->sk_state = PPPOX_NONE;
sk->sk_type = SOCK_STREAM;
sk->sk_family = PF_PPPOX;
@@ -599,7 +602,6 @@ static int pppoe_release(struct socket *sock)
sock_orphan(sk);
sock->sk = NULL;
- skb_queue_purge(&sk->sk_receive_queue);
release_sock(sk);
sock_put(sk);
@@ -681,9 +683,9 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr,
&sp->sa_addr.pppoe,
sizeof(struct pppoe_addr));
- write_lock_bh(&pn->hash_lock);
+ spin_lock(&pn->hash_lock);
error = __set_item(pn, po);
- write_unlock_bh(&pn->hash_lock);
+ spin_unlock(&pn->hash_lock);
if (error < 0)
goto err_put;
@@ -808,11 +810,12 @@ static int pppoe_ioctl(struct socket *sock, unsigned int cmd,
/* Check that the socket referenced by the address
actually exists. */
- relay_po = get_item_by_addr(sock_net(sk), &po->pppoe_relay);
+ rcu_read_lock();
+ relay_po = __get_item_by_addr(sock_net(sk), &po->pppoe_relay);
+ rcu_read_unlock();
if (!relay_po)
break;
- sock_put(sk_pppox(relay_po));
sk->sk_state |= PPPOX_RELAY;
err = 0;
break;
@@ -1052,11 +1055,11 @@ static inline struct pppox_sock *pppoe_get_idx(struct pppoe_net *pn, loff_t pos)
int i;
for (i = 0; i < PPPOE_HASH_SIZE; i++) {
- po = pn->hash_table[i];
+ po = rcu_dereference(pn->hash_table[i]);
while (po) {
if (!pos--)
goto out;
- po = po->next;
+ po = rcu_dereference(po->next);
}
}
@@ -1065,19 +1068,19 @@ out:
}
static void *pppoe_seq_start(struct seq_file *seq, loff_t *pos)
- __acquires(pn->hash_lock)
+ __acquires(RCU)
{
struct pppoe_net *pn = pppoe_pernet(seq_file_net(seq));
loff_t l = *pos;
- read_lock_bh(&pn->hash_lock);
+ rcu_read_lock();
return l ? pppoe_get_idx(pn, --l) : SEQ_START_TOKEN;
}
static void *pppoe_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct pppoe_net *pn = pppoe_pernet(seq_file_net(seq));
- struct pppox_sock *po;
+ struct pppox_sock *po, *next;
++*pos;
if (v == SEQ_START_TOKEN) {
@@ -1085,14 +1088,15 @@ static void *pppoe_seq_next(struct seq_file *seq, void *v, loff_t *pos)
goto out;
}
po = v;
- if (po->next)
- po = po->next;
+ next = rcu_dereference(po->next);
+ if (next)
+ po = next;
else {
int hash = hash_item(po->pppoe_pa.sid, po->pppoe_pa.remote);
po = NULL;
while (++hash < PPPOE_HASH_SIZE) {
- po = pn->hash_table[hash];
+ po = rcu_dereference(pn->hash_table[hash]);
if (po)
break;
}
@@ -1103,10 +1107,9 @@ out:
}
static void pppoe_seq_stop(struct seq_file *seq, void *v)
- __releases(pn->hash_lock)
+ __releases(RCU)
{
- struct pppoe_net *pn = pppoe_pernet(seq_file_net(seq));
- read_unlock_bh(&pn->hash_lock);
+ rcu_read_unlock();
}
static const struct seq_operations pppoe_seq_ops = {
@@ -1149,7 +1152,7 @@ static __net_init int pppoe_init_net(struct net *net)
struct pppoe_net *pn = pppoe_pernet(net);
struct proc_dir_entry *pde;
- rwlock_init(&pn->hash_lock);
+ spin_lock_init(&pn->hash_lock);
pde = proc_create_net("pppoe", 0444, net->proc_net,
&pppoe_seq_ops, sizeof(struct seq_net_private));
diff --git a/drivers/net/pse-pd/Kconfig b/drivers/net/pse-pd/Kconfig
index 7fab916a7f46..7ef29657ee5d 100644
--- a/drivers/net/pse-pd/Kconfig
+++ b/drivers/net/pse-pd/Kconfig
@@ -32,6 +32,17 @@ config PSE_PD692X0
To compile this driver as a module, choose M here: the
module will be called pd692x0.
+config PSE_SI3474
+ tristate "Si3474 PSE controller"
+ depends on I2C
+ help
+ This module provides support for Si3474 regulator based Ethernet
+ Power Sourcing Equipment.
+ Only 4-pair PSE configurations are supported.
+
+ To compile this driver as a module, choose M here: the
+ module will be called si3474.
+
config PSE_TPS23881
tristate "TPS23881 PSE controller"
depends on I2C
diff --git a/drivers/net/pse-pd/Makefile b/drivers/net/pse-pd/Makefile
index 9d2898b36737..cc78f7ea7f5f 100644
--- a/drivers/net/pse-pd/Makefile
+++ b/drivers/net/pse-pd/Makefile
@@ -5,4 +5,5 @@ obj-$(CONFIG_PSE_CONTROLLER) += pse_core.o
obj-$(CONFIG_PSE_REGULATOR) += pse_regulator.o
obj-$(CONFIG_PSE_PD692X0) += pd692x0.o
+obj-$(CONFIG_PSE_SI3474) += si3474.o
obj-$(CONFIG_PSE_TPS23881) += tps23881.o
diff --git a/drivers/net/pse-pd/si3474.c b/drivers/net/pse-pd/si3474.c
new file mode 100644
index 000000000000..aa07ffbce54d
--- /dev/null
+++ b/drivers/net/pse-pd/si3474.c
@@ -0,0 +1,578 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Driver for the Skyworks Si3474 PoE PSE Controller
+ *
+ * Chip Architecture & Terminology:
+ *
+ * The Si3474 is a single-chip PoE PSE controller managing 8 physical power
+ * delivery channels. Internally, it's structured into two logical "Quads".
+ *
+ * Quad 0: Manages physical channels ('ports' in datasheet) 0, 1, 2, 3
+ * Quad 1: Manages physical channels ('ports' in datasheet) 4, 5, 6, 7
+ *
+ * Each Quad is accessed via a separate I2C address. The base address range is
+ * set by hardware pins A1-A4, and the specific address selects Quad 0 (usually
+ * the lower/even address) or Quad 1 (usually the higher/odd address).
+ * See datasheet Table 2.2 for the address mapping.
+ *
+ * While the Quads manage channel-specific operations, the Si3474 package has
+ * several resources shared across the entire chip:
+ * - Single RESETb input pin.
+ * - Single INTb output pin (signals interrupts from *either* Quad).
+ * - Single OSS input pin (Emergency Shutdown).
+ * - Global I2C Address (0x7F) used for firmware updates.
+ * - Global status monitoring (Temperature, VDD/VPWR Undervoltage Lockout).
+ *
+ * Driver Architecture:
+ *
+ * To handle the mix of per-Quad access and shared resources correctly, this
+ * driver treats the entire Si3474 package as one logical device. The driver
+ * instance associated with the primary I2C address (Quad 0) takes ownership.
+ * It discovers and manages the I2C client for the secondary address (Quad 1).
+ * This primary instance handles shared resources like IRQ management and
+ * registers a single PSE controller device representing all logical PIs.
+ * Internal functions route I2C commands to the appropriate Quad's i2c_client
+ * based on the target channel or PI.
+ *
+ * Terminology Mapping:
+ *
+ * - "PI" (Power Interface): Refers to the logical PSE port as defined by
+ * IEEE 802.3 (typically corresponds to an RJ45 connector). This is the
+ * `id` (0-7) used in the pse_controller_ops.
+ * - "Channel": Refers to one of the 8 physical power control paths within
+ * the Si3474 chip itself (hardware channels 0-7). This terminology is
+ * used internally within the driver to avoid confusion with 'ports'.
+ * - "Quad": One of the two internal 4-channel management units within the
+ * Si3474, each accessed via its own I2C address.
+ *
+ * Relationship:
+ * - A 2-Pair PoE PI uses 1 Channel.
+ * - A 4-Pair PoE PI uses 2 Channels.
+ *
+ * ASCII Schematic:
+ *
+ * +-----------------------------------------------------+
+ * | Si3474 Chip |
+ * | |
+ * | +---------------------+ +---------------------+ |
+ * | | Quad 0 | | Quad 1 | |
+ * | | Channels 0, 1, 2, 3 | | Channels 4, 5, 6, 7 | |
+ * | +----------^----------+ +-------^-------------+ |
+ * | I2C Addr 0 | | I2C Addr 1 |
+ * | +------------------------+ |
+ * | (Primary Driver Instance) (Managed by Primary) |
+ * | |
+ * | Shared Resources (affect whole chip): |
+ * | - Single INTb Output -> Handled by Primary |
+ * | - Single RESETb Input |
+ * | - Single OSS Input -> Handled by Primary |
+ * | - Global I2C Addr (0x7F) for Firmware Update |
+ * | - Global Status (Temp, VDD/VPWR UVLO) |
+ * +-----------------------------------------------------+
+ * | | | | | | | |
+ * Ch0 Ch1 Ch2 Ch3 Ch4 Ch5 Ch6 Ch7 (Physical Channels)
+ *
+ * Example Mapping (Logical PI to Physical Channel(s)):
+ * * 2-Pair Mode (8 PIs):
+ * PI 0 -> Ch 0
+ * PI 1 -> Ch 1
+ * ...
+ * PI 7 -> Ch 7
+ * * 4-Pair Mode (4 PIs):
+ * PI 0 -> Ch 0 + Ch 1 (Managed via Quad 0 Addr)
+ * PI 1 -> Ch 2 + Ch 3 (Managed via Quad 0 Addr)
+ * PI 2 -> Ch 4 + Ch 5 (Managed via Quad 1 Addr)
+ * PI 3 -> Ch 6 + Ch 7 (Managed via Quad 1 Addr)
+ * (Note: Actual mapping depends on Device Tree and PORT_REMAP config)
+ */
+
+#include <linux/i2c.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pse-pd/pse.h>
+
+#define SI3474_MAX_CHANS 8
+
+#define MANUFACTURER_ID 0x08
+#define IC_ID 0x05
+#define SI3474_DEVICE_ID (MANUFACTURER_ID << 3 | IC_ID)
+
+/* Misc registers */
+#define VENDOR_IC_ID_REG 0x1B
+#define TEMPERATURE_REG 0x2C
+#define FIRMWARE_REVISION_REG 0x41
+#define CHIP_REVISION_REG 0x43
+
+/* Main status registers */
+#define POWER_STATUS_REG 0x10
+#define PORT_MODE_REG 0x12
+#define DETECT_CLASS_ENABLE_REG 0x14
+
+/* PORTn Current */
+#define PORT1_CURRENT_LSB_REG 0x30
+
+/* PORTn Current [mA], return in [nA] */
+/* 1000 * ((PORTn_CURRENT_MSB << 8) + PORTn_CURRENT_LSB) / 16384 */
+#define SI3474_NA_STEP (1000 * 1000 * 1000 / 16384)
+
+/* VPWR Voltage */
+#define VPWR_LSB_REG 0x2E
+#define VPWR_MSB_REG 0x2F
+
+/* PORTn Voltage */
+#define PORT1_VOLTAGE_LSB_REG 0x32
+
+/* VPWR Voltage [V], return in [uV] */
+/* 60 * (( VPWR_MSB << 8) + VPWR_LSB) / 16384 */
+#define SI3474_UV_STEP (1000 * 1000 * 60 / 16384)
+
+/* Helper macros */
+#define CHAN_IDX(chan) ((chan) % 4)
+#define CHAN_BIT(chan) BIT(CHAN_IDX(chan))
+#define CHAN_UPPER_BIT(chan) BIT(CHAN_IDX(chan) + 4)
+
+#define CHAN_MASK(chan) (0x03U << (2 * CHAN_IDX(chan)))
+#define CHAN_REG(base, chan) ((base) + (CHAN_IDX(chan) * 4))
+
+struct si3474_pi_desc {
+ u8 chan[2];
+ bool is_4p;
+};
+
+struct si3474_priv {
+ struct i2c_client *client[2];
+ struct pse_controller_dev pcdev;
+ struct device_node *np;
+ struct si3474_pi_desc pi[SI3474_MAX_CHANS];
+};
+
+static struct si3474_priv *to_si3474_priv(struct pse_controller_dev *pcdev)
+{
+ return container_of(pcdev, struct si3474_priv, pcdev);
+}
+
+static void si3474_get_channels(struct si3474_priv *priv, int id,
+ u8 *chan0, u8 *chan1)
+{
+ *chan0 = priv->pi[id].chan[0];
+ *chan1 = priv->pi[id].chan[1];
+}
+
+static struct i2c_client *si3474_get_chan_client(struct si3474_priv *priv,
+ u8 chan)
+{
+ return (chan < 4) ? priv->client[0] : priv->client[1];
+}
+
+static int si3474_pi_get_admin_state(struct pse_controller_dev *pcdev, int id,
+ struct pse_admin_state *admin_state)
+{
+ struct si3474_priv *priv = to_si3474_priv(pcdev);
+ struct i2c_client *client;
+ bool is_enabled;
+ u8 chan0, chan1;
+ s32 ret;
+
+ si3474_get_channels(priv, id, &chan0, &chan1);
+ client = si3474_get_chan_client(priv, chan0);
+
+ ret = i2c_smbus_read_byte_data(client, PORT_MODE_REG);
+ if (ret < 0) {
+ admin_state->c33_admin_state =
+ ETHTOOL_C33_PSE_ADMIN_STATE_UNKNOWN;
+ return ret;
+ }
+
+ is_enabled = ret & (CHAN_MASK(chan0) | CHAN_MASK(chan1));
+
+ if (is_enabled)
+ admin_state->c33_admin_state =
+ ETHTOOL_C33_PSE_ADMIN_STATE_ENABLED;
+ else
+ admin_state->c33_admin_state =
+ ETHTOOL_C33_PSE_ADMIN_STATE_DISABLED;
+
+ return 0;
+}
+
+static int si3474_pi_get_pw_status(struct pse_controller_dev *pcdev, int id,
+ struct pse_pw_status *pw_status)
+{
+ struct si3474_priv *priv = to_si3474_priv(pcdev);
+ struct i2c_client *client;
+ bool delivering;
+ u8 chan0, chan1;
+ s32 ret;
+
+ si3474_get_channels(priv, id, &chan0, &chan1);
+ client = si3474_get_chan_client(priv, chan0);
+
+ ret = i2c_smbus_read_byte_data(client, POWER_STATUS_REG);
+ if (ret < 0) {
+ pw_status->c33_pw_status = ETHTOOL_C33_PSE_PW_D_STATUS_UNKNOWN;
+ return ret;
+ }
+
+ delivering = ret & (CHAN_UPPER_BIT(chan0) | CHAN_UPPER_BIT(chan1));
+
+ if (delivering)
+ pw_status->c33_pw_status =
+ ETHTOOL_C33_PSE_PW_D_STATUS_DELIVERING;
+ else
+ pw_status->c33_pw_status = ETHTOOL_C33_PSE_PW_D_STATUS_DISABLED;
+
+ return 0;
+}
+
+static int si3474_get_of_channels(struct si3474_priv *priv)
+{
+ struct pse_pi *pi;
+ u32 chan_id;
+ u8 pi_no;
+ s32 ret;
+
+ for (pi_no = 0; pi_no < SI3474_MAX_CHANS; pi_no++) {
+ pi = &priv->pcdev.pi[pi_no];
+ bool pairset_found = false;
+ u8 pairset_no;
+
+ for (pairset_no = 0; pairset_no < 2; pairset_no++) {
+ if (!pi->pairset[pairset_no].np)
+ continue;
+
+ pairset_found = true;
+
+ ret = of_property_read_u32(pi->pairset[pairset_no].np,
+ "reg", &chan_id);
+ if (ret) {
+ dev_err(&priv->client[0]->dev,
+ "Failed to read channel reg property\n");
+ return ret;
+ }
+ if (chan_id > SI3474_MAX_CHANS) {
+ dev_err(&priv->client[0]->dev,
+ "Incorrect channel number: %d\n", chan_id);
+ return -EINVAL;
+ }
+
+ priv->pi[pi_no].chan[pairset_no] = chan_id;
+ /* Mark as 4-pair if second pairset is present */
+ priv->pi[pi_no].is_4p = (pairset_no == 1);
+ }
+
+ if (pairset_found && !priv->pi[pi_no].is_4p) {
+ dev_err(&priv->client[0]->dev,
+ "Second pairset is missing for PI %pOF, only 4p configs are supported\n",
+ pi->np);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int si3474_setup_pi_matrix(struct pse_controller_dev *pcdev)
+{
+ struct si3474_priv *priv = to_si3474_priv(pcdev);
+ s32 ret;
+
+ ret = si3474_get_of_channels(priv);
+ if (ret < 0)
+ dev_warn(&priv->client[0]->dev,
+ "Unable to parse DT PSE power interface matrix\n");
+
+ return ret;
+}
+
+static int si3474_pi_enable(struct pse_controller_dev *pcdev, int id)
+{
+ struct si3474_priv *priv = to_si3474_priv(pcdev);
+ struct i2c_client *client;
+ u8 chan0, chan1;
+ s32 ret;
+ u8 val;
+
+ si3474_get_channels(priv, id, &chan0, &chan1);
+ client = si3474_get_chan_client(priv, chan0);
+
+ /* Release PI from shutdown */
+ ret = i2c_smbus_read_byte_data(client, PORT_MODE_REG);
+ if (ret < 0)
+ return ret;
+
+ val = (u8)ret;
+ val |= CHAN_MASK(chan0);
+ val |= CHAN_MASK(chan1);
+
+ ret = i2c_smbus_write_byte_data(client, PORT_MODE_REG, val);
+ if (ret)
+ return ret;
+
+ /* DETECT_CLASS_ENABLE must be set when using AUTO mode,
+ * otherwise PI does not power up - datasheet section 2.10.2
+ */
+ val = CHAN_BIT(chan0) | CHAN_UPPER_BIT(chan0) |
+ CHAN_BIT(chan1) | CHAN_UPPER_BIT(chan1);
+
+ ret = i2c_smbus_write_byte_data(client, DETECT_CLASS_ENABLE_REG, val);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int si3474_pi_disable(struct pse_controller_dev *pcdev, int id)
+{
+ struct si3474_priv *priv = to_si3474_priv(pcdev);
+ struct i2c_client *client;
+ u8 chan0, chan1;
+ s32 ret;
+ u8 val;
+
+ si3474_get_channels(priv, id, &chan0, &chan1);
+ client = si3474_get_chan_client(priv, chan0);
+
+ /* Set PI in shutdown mode */
+ ret = i2c_smbus_read_byte_data(client, PORT_MODE_REG);
+ if (ret < 0)
+ return ret;
+
+ val = (u8)ret;
+ val &= ~CHAN_MASK(chan0);
+ val &= ~CHAN_MASK(chan1);
+
+ ret = i2c_smbus_write_byte_data(client, PORT_MODE_REG, val);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int si3474_pi_get_chan_current(struct si3474_priv *priv, u8 chan)
+{
+ struct i2c_client *client;
+ u64 tmp_64;
+ s32 ret;
+ u8 reg;
+
+ client = si3474_get_chan_client(priv, chan);
+
+ /* Registers 0x30 to 0x3d */
+ reg = CHAN_REG(PORT1_CURRENT_LSB_REG, chan);
+
+ ret = i2c_smbus_read_word_data(client, reg);
+ if (ret < 0)
+ return ret;
+
+ tmp_64 = ret * SI3474_NA_STEP;
+
+ /* uA = nA / 1000 */
+ tmp_64 = DIV_ROUND_CLOSEST_ULL(tmp_64, 1000);
+ return (int)tmp_64;
+}
+
+static int si3474_pi_get_chan_voltage(struct si3474_priv *priv, u8 chan)
+{
+ struct i2c_client *client;
+ s32 ret;
+ u32 val;
+ u8 reg;
+
+ client = si3474_get_chan_client(priv, chan);
+
+ /* Registers 0x32 to 0x3f */
+ reg = CHAN_REG(PORT1_VOLTAGE_LSB_REG, chan);
+
+ ret = i2c_smbus_read_word_data(client, reg);
+ if (ret < 0)
+ return ret;
+
+ val = ret * SI3474_UV_STEP;
+
+ return (int)val;
+}
+
+static int si3474_pi_get_voltage(struct pse_controller_dev *pcdev, int id)
+{
+ struct si3474_priv *priv = to_si3474_priv(pcdev);
+ struct i2c_client *client;
+ u8 chan0, chan1;
+ s32 ret;
+
+ si3474_get_channels(priv, id, &chan0, &chan1);
+ client = si3474_get_chan_client(priv, chan0);
+
+ /* Check which channels are enabled*/
+ ret = i2c_smbus_read_byte_data(client, POWER_STATUS_REG);
+ if (ret < 0)
+ return ret;
+
+ /* Take voltage from the first enabled channel */
+ if (ret & CHAN_BIT(chan0))
+ ret = si3474_pi_get_chan_voltage(priv, chan0);
+ else if (ret & CHAN_BIT(chan1))
+ ret = si3474_pi_get_chan_voltage(priv, chan1);
+ else
+ /* 'should' be no voltage in this case */
+ return 0;
+
+ return ret;
+}
+
+static int si3474_pi_get_actual_pw(struct pse_controller_dev *pcdev, int id)
+{
+ struct si3474_priv *priv = to_si3474_priv(pcdev);
+ u8 chan0, chan1;
+ u32 uV, uA;
+ u64 tmp_64;
+ s32 ret;
+
+ ret = si3474_pi_get_voltage(&priv->pcdev, id);
+
+ /* Do not read currents if voltage is 0 */
+ if (ret <= 0)
+ return ret;
+ uV = ret;
+
+ si3474_get_channels(priv, id, &chan0, &chan1);
+
+ ret = si3474_pi_get_chan_current(priv, chan0);
+ if (ret < 0)
+ return ret;
+ uA = ret;
+
+ ret = si3474_pi_get_chan_current(priv, chan1);
+ if (ret < 0)
+ return ret;
+ uA += ret;
+
+ tmp_64 = uV;
+ tmp_64 *= uA;
+ /* mW = uV * uA / 1000000000 */
+ return DIV_ROUND_CLOSEST_ULL(tmp_64, 1000000000);
+}
+
+static const struct pse_controller_ops si3474_ops = {
+ .setup_pi_matrix = si3474_setup_pi_matrix,
+ .pi_enable = si3474_pi_enable,
+ .pi_disable = si3474_pi_disable,
+ .pi_get_actual_pw = si3474_pi_get_actual_pw,
+ .pi_get_voltage = si3474_pi_get_voltage,
+ .pi_get_admin_state = si3474_pi_get_admin_state,
+ .pi_get_pw_status = si3474_pi_get_pw_status,
+};
+
+static void si3474_ancillary_i2c_remove(void *data)
+{
+ struct i2c_client *client = data;
+
+ i2c_unregister_device(client);
+}
+
+static int si3474_i2c_probe(struct i2c_client *client)
+{
+ struct device *dev = &client->dev;
+ struct si3474_priv *priv;
+ u8 fw_version;
+ s32 ret;
+
+ if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
+ dev_err(dev, "i2c check functionality failed\n");
+ return -ENXIO;
+ }
+
+ priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ ret = i2c_smbus_read_byte_data(client, VENDOR_IC_ID_REG);
+ if (ret < 0)
+ return ret;
+
+ if (ret != SI3474_DEVICE_ID) {
+ dev_err(dev, "Wrong device ID: 0x%x\n", ret);
+ return -ENXIO;
+ }
+
+ ret = i2c_smbus_read_byte_data(client, FIRMWARE_REVISION_REG);
+ if (ret < 0)
+ return ret;
+ fw_version = ret;
+
+ ret = i2c_smbus_read_byte_data(client, CHIP_REVISION_REG);
+ if (ret < 0)
+ return ret;
+
+ dev_dbg(dev, "Chip revision: 0x%x, firmware version: 0x%x\n",
+ ret, fw_version);
+
+ priv->client[0] = client;
+ i2c_set_clientdata(client, priv);
+
+ priv->client[1] = i2c_new_ancillary_device(priv->client[0], "secondary",
+ priv->client[0]->addr + 1);
+ if (IS_ERR(priv->client[1]))
+ return PTR_ERR(priv->client[1]);
+
+ ret = devm_add_action_or_reset(dev, si3474_ancillary_i2c_remove, priv->client[1]);
+ if (ret < 0) {
+ dev_err(&priv->client[1]->dev, "Cannot register remove callback\n");
+ return ret;
+ }
+
+ ret = i2c_smbus_read_byte_data(priv->client[1], VENDOR_IC_ID_REG);
+ if (ret < 0) {
+ dev_err(&priv->client[1]->dev, "Cannot access secondary PSE controller\n");
+ return ret;
+ }
+
+ if (ret != SI3474_DEVICE_ID) {
+ dev_err(&priv->client[1]->dev,
+ "Wrong device ID for secondary PSE controller: 0x%x\n", ret);
+ return -ENXIO;
+ }
+
+ priv->np = dev->of_node;
+ priv->pcdev.owner = THIS_MODULE;
+ priv->pcdev.ops = &si3474_ops;
+ priv->pcdev.dev = dev;
+ priv->pcdev.types = ETHTOOL_PSE_C33;
+ priv->pcdev.nr_lines = SI3474_MAX_CHANS;
+
+ ret = devm_pse_controller_register(dev, &priv->pcdev);
+ if (ret) {
+ dev_err(dev, "Failed to register PSE controller: 0x%x\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static const struct i2c_device_id si3474_id[] = {
+ { "si3474" },
+ {}
+};
+MODULE_DEVICE_TABLE(i2c, si3474_id);
+
+static const struct of_device_id si3474_of_match[] = {
+ {
+ .compatible = "skyworks,si3474",
+ },
+ {},
+};
+MODULE_DEVICE_TABLE(of, si3474_of_match);
+
+static struct i2c_driver si3474_driver = {
+ .probe = si3474_i2c_probe,
+ .id_table = si3474_id,
+ .driver = {
+ .name = "si3474",
+ .of_match_table = si3474_of_match,
+ },
+};
+module_i2c_driver(si3474_driver);
+
+MODULE_AUTHOR("Piotr Kubik <piotr.kubik@adtran.com>");
+MODULE_DESCRIPTION("Skyworks Si3474 PoE PSE Controller driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index cc6c50180663..86a9e927d0ff 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -2823,13 +2823,13 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
if (netif_running(tun->dev))
netif_tx_wake_all_queues(tun->dev);
- strcpy(ifr->ifr_name, tun->dev->name);
+ strscpy(ifr->ifr_name, tun->dev->name);
return 0;
}
static void tun_get_iff(struct tun_struct *tun, struct ifreq *ifr)
{
- strcpy(ifr->ifr_name, tun->dev->name);
+ strscpy(ifr->ifr_name, tun->dev->name);
ifr->ifr_flags = tun_flags(tun);
diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig
index 0a678e31cfaa..856e648d804e 100644
--- a/drivers/net/usb/Kconfig
+++ b/drivers/net/usb/Kconfig
@@ -116,6 +116,7 @@ config USB_LAN78XX
select PHYLINK
select MICROCHIP_PHY
select CRC32
+ imply NET_SELFTESTS
help
This option adds support for Microchip LAN78XX based USB 2
& USB 3 10/100/1000 Ethernet adapters.
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index 1ff25f57329a..b56e2459ee3c 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -20,6 +20,7 @@
#include <linux/mdio.h>
#include <linux/phy.h>
#include <net/ip6_checksum.h>
+#include <net/selftests.h>
#include <net/vxlan.h>
#include <linux/interrupt.h>
#include <linux/irqdomain.h>
@@ -1702,12 +1703,16 @@ static void lan78xx_get_strings(struct net_device *netdev, u32 stringset,
{
if (stringset == ETH_SS_STATS)
memcpy(data, lan78xx_gstrings, sizeof(lan78xx_gstrings));
+ else if (stringset == ETH_SS_TEST)
+ net_selftest_get_strings(data);
}
static int lan78xx_get_sset_count(struct net_device *netdev, int sset)
{
if (sset == ETH_SS_STATS)
return ARRAY_SIZE(lan78xx_gstrings);
+ else if (sset == ETH_SS_TEST)
+ return net_selftest_get_count();
else
return -EOPNOTSUPP;
}
@@ -1894,6 +1899,7 @@ static const struct ethtool_ops lan78xx_ethtool_ops = {
.set_eeprom = lan78xx_ethtool_set_eeprom,
.get_ethtool_stats = lan78xx_get_stats,
.get_sset_count = lan78xx_get_sset_count,
+ .self_test = net_selftest,
.get_strings = lan78xx_get_strings,
.get_wol = lan78xx_get_wol,
.set_wol = lan78xx_set_wol,
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 3ccd649913b5..571847a7f86d 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -26,6 +26,7 @@
#include <linux/inetdevice.h>
#include <net/arp.h>
+#include <net/flow.h>
#include <net/ip.h>
#include <net/ip_fib.h>
#include <net/ip6_fib.h>
@@ -38,7 +39,6 @@
#include <net/sch_generic.h>
#include <net/netns/generic.h>
#include <net/netfilter/nf_conntrack.h>
-#include <net/inet_dscp.h>
#define DRV_NAME "vrf"
#define DRV_VERSION "1.1"
@@ -505,7 +505,7 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb,
/* needed to match OIF rule */
fl4.flowi4_l3mdev = vrf_dev->ifindex;
fl4.flowi4_iif = LOOPBACK_IFINDEX;
- fl4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(ip4h));
+ fl4.flowi4_dscp = ip4h_dscp(ip4h);
fl4.flowi4_flags = FLOWI_FLAG_ANYSRC;
fl4.flowi4_proto = ip4h->protocol;
fl4.daddr = ip4h->daddr;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
index 4a82f8e4c118..36488aa6cc20 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
@@ -664,8 +664,8 @@ int mt7915_mmio_wed_init(struct mt7915_dev *dev, void *pdev_ptr,
MT_RXQ_WED_RING_BASE;
wed->wlan.wpdma_rx_glo = pci_resource_start(pci_dev, 0) +
MT_WPDMA_GLO_CFG;
- wed->wlan.wpdma_rx = pci_resource_start(pci_dev, 0) +
- MT_RXQ_WED_DATA_RING_BASE;
+ wed->wlan.wpdma_rx[0] = pci_resource_start(pci_dev, 0) +
+ MT_RXQ_WED_DATA_RING_BASE;
} else {
struct platform_device *plat_dev = pdev_ptr;
struct resource *res;
@@ -687,7 +687,7 @@ int mt7915_mmio_wed_init(struct mt7915_dev *dev, void *pdev_ptr,
wed->wlan.wpdma_tx = res->start + MT_TXQ_WED_RING_BASE;
wed->wlan.wpdma_txfree = res->start + MT_RXQ_WED_RING_BASE;
wed->wlan.wpdma_rx_glo = res->start + MT_WPDMA_GLO_CFG;
- wed->wlan.wpdma_rx = res->start + MT_RXQ_WED_DATA_RING_BASE;
+ wed->wlan.wpdma_rx[0] = res->start + MT_RXQ_WED_DATA_RING_BASE;
}
wed->wlan.nbuf = MT7915_HW_TOKEN_SIZE;
wed->wlan.tx_tbit[0] = is_mt7915(&dev->mt76) ? 4 : 30;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mmio.c b/drivers/net/wireless/mediatek/mt76/mt7996/mmio.c
index 30b40f4a91be..fb2428a9b877 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7996/mmio.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7996/mmio.c
@@ -503,9 +503,9 @@ int mt7996_mmio_wed_init(struct mt7996_dev *dev, void *pdev_ptr,
}
wed->wlan.wpdma_rx_glo = wed->wlan.phy_base + hif1_ofs + MT_WFDMA0_GLO_CFG;
- wed->wlan.wpdma_rx = wed->wlan.phy_base + hif1_ofs +
- MT_RXQ_RING_BASE(MT7996_RXQ_BAND0) +
- MT7996_RXQ_BAND0 * MT_RING_SIZE;
+ wed->wlan.wpdma_rx[0] = wed->wlan.phy_base + hif1_ofs +
+ MT_RXQ_RING_BASE(MT7996_RXQ_BAND0) +
+ MT7996_RXQ_BAND0 * MT_RING_SIZE;
wed->wlan.id = MT7996_DEVICE_ID_2;
wed->wlan.tx_tbit[0] = ffs(MT_INT_TX_DONE_BAND2) - 1;
@@ -518,9 +518,9 @@ int mt7996_mmio_wed_init(struct mt7996_dev *dev, void *pdev_ptr,
wed->wlan.wpdma_rx_glo = wed->wlan.phy_base + MT_WFDMA0_GLO_CFG;
- wed->wlan.wpdma_rx = wed->wlan.phy_base +
- MT_RXQ_RING_BASE(MT7996_RXQ_BAND0) +
- MT7996_RXQ_BAND0 * MT_RING_SIZE;
+ wed->wlan.wpdma_rx[0] = wed->wlan.phy_base +
+ MT_RXQ_RING_BASE(MT7996_RXQ_BAND0) +
+ MT7996_RXQ_BAND0 * MT_RING_SIZE;
wed->wlan.wpdma_rx_rro[0] = wed->wlan.phy_base +
MT_RXQ_RING_BASE(MT7996_RXQ_RRO_BAND0) +
diff --git a/drivers/net/wwan/iosm/iosm_ipc_pcie.c b/drivers/net/wwan/iosm/iosm_ipc_pcie.c
index a066977af0be..08ff0d6ccfab 100644
--- a/drivers/net/wwan/iosm/iosm_ipc_pcie.c
+++ b/drivers/net/wwan/iosm/iosm_ipc_pcie.c
@@ -69,7 +69,7 @@ static int ipc_pcie_resources_request(struct iosm_pcie *ipc_pcie)
{
struct pci_dev *pci = ipc_pcie->pci;
u32 cap = 0;
- u32 ret;
+ int ret;
/* Reserved PCI I/O and memory resources.
* Mark all PCI regions associated with PCI device pci as
diff --git a/drivers/nfc/pn533/pn533.c b/drivers/nfc/pn533/pn533.c
index 14661249c690..2b043a9f9533 100644
--- a/drivers/nfc/pn533/pn533.c
+++ b/drivers/nfc/pn533/pn533.c
@@ -1412,11 +1412,9 @@ static int pn533_autopoll_complete(struct pn533 *dev, void *arg,
if (dev->poll_mod_count != 0)
return rc;
goto stop_poll;
- } else if (rc < 0) {
- nfc_err(dev->dev,
- "Error %d when running autopoll\n", rc);
- goto stop_poll;
}
+ nfc_err(dev->dev, "Error %d when running autopoll\n", rc);
+ goto stop_poll;
}
nbtg = resp->data[0];
@@ -1505,11 +1503,9 @@ static int pn533_poll_complete(struct pn533 *dev, void *arg,
if (dev->poll_mod_count != 0)
return rc;
goto stop_poll;
- } else if (rc < 0) {
- nfc_err(dev->dev,
- "Error %d when running poll\n", rc);
- goto stop_poll;
}
+ nfc_err(dev->dev, "Error %d when running poll\n", rc);
+ goto stop_poll;
}
cur_mod = dev->poll_mod_active[dev->poll_mod_curr];
diff --git a/drivers/nfc/s3fwrn5/Kconfig b/drivers/nfc/s3fwrn5/Kconfig
index 8a6b1a79de25..96386b73fa2b 100644
--- a/drivers/nfc/s3fwrn5/Kconfig
+++ b/drivers/nfc/s3fwrn5/Kconfig
@@ -1,8 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
config NFC_S3FWRN5
tristate
- select CRYPTO
- select CRYPTO_HASH
+ select CRYPTO_LIB_SHA1
help
Core driver for Samsung S3FWRN5 NFC chip. Contains core utilities
of chip. It's intended to be used by PHYs to avoid duplicating lots
diff --git a/drivers/nfc/s3fwrn5/firmware.c b/drivers/nfc/s3fwrn5/firmware.c
index 781cdbcac104..64d61b2a715a 100644
--- a/drivers/nfc/s3fwrn5/firmware.c
+++ b/drivers/nfc/s3fwrn5/firmware.c
@@ -8,7 +8,6 @@
#include <linux/completion.h>
#include <linux/firmware.h>
-#include <crypto/hash.h>
#include <crypto/sha1.h>
#include "s3fwrn5.h"
@@ -411,27 +410,13 @@ int s3fwrn5_fw_download(struct s3fwrn5_fw_info *fw_info)
struct device *dev = &fw_info->ndev->nfc_dev->dev;
struct s3fwrn5_fw_image *fw = &fw_info->fw;
u8 hash_data[SHA1_DIGEST_SIZE];
- struct crypto_shash *tfm;
u32 image_size, off;
int ret;
image_size = fw_info->sector_size * fw->image_sectors;
/* Compute SHA of firmware data */
-
- tfm = crypto_alloc_shash("sha1", 0, 0);
- if (IS_ERR(tfm)) {
- dev_err(dev, "Cannot allocate shash (code=%pe)\n", tfm);
- return PTR_ERR(tfm);
- }
-
- ret = crypto_shash_tfm_digest(tfm, fw->image, image_size, hash_data);
-
- crypto_free_shash(tfm);
- if (ret) {
- dev_err(dev, "Cannot compute hash (code=%d)\n", ret);
- return ret;
- }
+ sha1(fw->image, image_size, hash_data);
/* Firmware update process */
diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig
index 204278eb215e..9256bf2e8ad4 100644
--- a/drivers/ptp/Kconfig
+++ b/drivers/ptp/Kconfig
@@ -252,4 +252,15 @@ config PTP_S390
driver provides the raw clock value without the delta to
userspace. That way userspace programs like chrony could steer
the kernel clock.
+
+config PTP_NETC_V4_TIMER
+ tristate "NXP NETC V4 Timer PTP Driver"
+ depends on PTP_1588_CLOCK
+ depends on PCI_MSI
+ help
+ This driver adds support for using the NXP NETC V4 Timer as a PTP
+ clock, the clock is used by ENETC V4 or NETC V4 Switch for PTP time
+ synchronization. It also supports periodic output signal (e.g. PPS)
+ and external trigger timestamping.
+
endmenu
diff --git a/drivers/ptp/Makefile b/drivers/ptp/Makefile
index 25f846fe48c9..8985d723d29c 100644
--- a/drivers/ptp/Makefile
+++ b/drivers/ptp/Makefile
@@ -23,3 +23,4 @@ obj-$(CONFIG_PTP_1588_CLOCK_VMW) += ptp_vmw.o
obj-$(CONFIG_PTP_1588_CLOCK_OCP) += ptp_ocp.o
obj-$(CONFIG_PTP_DFL_TOD) += ptp_dfl_tod.o
obj-$(CONFIG_PTP_S390) += ptp_s390.o
+obj-$(CONFIG_PTP_NETC_V4_TIMER) += ptp_netc.o
diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
index 1cc06b7cb17e..5739a57958c7 100644
--- a/drivers/ptp/ptp_clock.c
+++ b/drivers/ptp/ptp_clock.c
@@ -11,6 +11,7 @@
#include <linux/module.h>
#include <linux/posix-clock.h>
#include <linux/pps_kernel.h>
+#include <linux/property.h>
#include <linux/slab.h>
#include <linux/syscalls.h>
#include <linux/uaccess.h>
@@ -100,6 +101,9 @@ static int ptp_clock_settime(struct posix_clock *pc, const struct timespec64 *tp
return -EBUSY;
}
+ if (!timespec64_valid_settod(tp))
+ return -EINVAL;
+
return ptp->info->settime64(ptp->info, tp);
}
@@ -130,7 +134,7 @@ static int ptp_clock_adjtime(struct posix_clock *pc, struct __kernel_timex *tx)
ops = ptp->info;
if (tx->modes & ADJ_SETOFFSET) {
- struct timespec64 ts;
+ struct timespec64 ts, ts2;
ktime_t kt;
s64 delta;
@@ -143,6 +147,14 @@ static int ptp_clock_adjtime(struct posix_clock *pc, struct __kernel_timex *tx)
if ((unsigned long) ts.tv_nsec >= NSEC_PER_SEC)
return -EINVAL;
+ /* Make sure the offset is valid */
+ err = ptp_clock_gettime(pc, &ts2);
+ if (err)
+ return err;
+ ts2 = timespec64_add(ts2, ts);
+ if (!timespec64_valid_settod(&ts2))
+ return -EINVAL;
+
kt = timespec64_to_ktime(ts);
delta = ktime_to_ns(kt);
err = ops->adjtime(ops, delta);
@@ -477,6 +489,58 @@ int ptp_clock_index(struct ptp_clock *ptp)
}
EXPORT_SYMBOL(ptp_clock_index);
+static int ptp_clock_of_node_match(struct device *dev, const void *data)
+{
+ const struct device_node *parent_np = data;
+
+ return (dev->parent && dev_of_node(dev->parent) == parent_np);
+}
+
+int ptp_clock_index_by_of_node(struct device_node *np)
+{
+ struct ptp_clock *ptp;
+ struct device *dev;
+ int phc_index;
+
+ dev = class_find_device(&ptp_class, NULL, np,
+ ptp_clock_of_node_match);
+ if (!dev)
+ return -1;
+
+ ptp = dev_get_drvdata(dev);
+ phc_index = ptp_clock_index(ptp);
+ put_device(dev);
+
+ return phc_index;
+}
+EXPORT_SYMBOL_GPL(ptp_clock_index_by_of_node);
+
+static int ptp_clock_dev_match(struct device *dev, const void *data)
+{
+ const struct device *parent = data;
+
+ return dev->parent == parent;
+}
+
+int ptp_clock_index_by_dev(struct device *parent)
+{
+ struct ptp_clock *ptp;
+ struct device *dev;
+ int phc_index;
+
+ dev = class_find_device(&ptp_class, NULL, parent,
+ ptp_clock_dev_match);
+ if (!dev)
+ return -1;
+
+ ptp = dev_get_drvdata(dev);
+ phc_index = ptp_clock_index(ptp);
+ put_device(dev);
+
+ return phc_index;
+}
+EXPORT_SYMBOL_GPL(ptp_clock_index_by_dev);
+
int ptp_find_pin(struct ptp_clock *ptp,
enum ptp_pin_function func, unsigned int chan)
{
diff --git a/drivers/ptp/ptp_clockmatrix.c b/drivers/ptp/ptp_clockmatrix.c
index b8d4df8c6da2..59cd6bbb33f3 100644
--- a/drivers/ptp/ptp_clockmatrix.c
+++ b/drivers/ptp/ptp_clockmatrix.c
@@ -1161,7 +1161,7 @@ static int set_pll_output_mask(struct idtcm *idtcm, u16 addr, u8 val)
SET_U16_MSB(idtcm->channel[3].output_mask, val);
break;
default:
- err = -EFAULT; /* Bad address */;
+ err = -EFAULT; /* Bad address */
break;
}
diff --git a/drivers/ptp/ptp_netc.c b/drivers/ptp/ptp_netc.c
new file mode 100644
index 000000000000..8c5fea1f43fa
--- /dev/null
+++ b/drivers/ptp/ptp_netc.c
@@ -0,0 +1,1017 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
+/*
+ * NXP NETC V4 Timer driver
+ * Copyright 2025 NXP
+ */
+
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/fsl/netc_global.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/pci.h>
+#include <linux/ptp_clock_kernel.h>
+
+#define NETC_TMR_PCI_VENDOR_NXP 0x1131
+
+#define NETC_TMR_CTRL 0x0080
+#define TMR_CTRL_CK_SEL GENMASK(1, 0)
+#define TMR_CTRL_TE BIT(2)
+#define TMR_ETEP(i) BIT(8 + (i))
+#define TMR_COMP_MODE BIT(15)
+#define TMR_CTRL_TCLK_PERIOD GENMASK(25, 16)
+#define TMR_CTRL_FS BIT(28)
+
+#define NETC_TMR_TEVENT 0x0084
+#define TMR_TEVNET_PPEN(i) BIT(7 - (i))
+#define TMR_TEVENT_PPEN_ALL GENMASK(7, 5)
+#define TMR_TEVENT_ALMEN(i) BIT(16 + (i))
+#define TMR_TEVENT_ETS_THREN(i) BIT(20 + (i))
+#define TMR_TEVENT_ETSEN(i) BIT(24 + (i))
+#define TMR_TEVENT_ETS_OVEN(i) BIT(28 + (i))
+#define TMR_TEVENT_ETS(i) (TMR_TEVENT_ETS_THREN(i) | \
+ TMR_TEVENT_ETSEN(i) | \
+ TMR_TEVENT_ETS_OVEN(i))
+
+#define NETC_TMR_TEMASK 0x0088
+#define NETC_TMR_STAT 0x0094
+#define TMR_STAT_ETS_VLD(i) BIT(24 + (i))
+
+#define NETC_TMR_CNT_L 0x0098
+#define NETC_TMR_CNT_H 0x009c
+#define NETC_TMR_ADD 0x00a0
+#define NETC_TMR_PRSC 0x00a8
+#define NETC_TMR_ECTRL 0x00ac
+#define NETC_TMR_OFF_L 0x00b0
+#define NETC_TMR_OFF_H 0x00b4
+
+/* i = 0, 1, i indicates the index of TMR_ALARM */
+#define NETC_TMR_ALARM_L(i) (0x00b8 + (i) * 8)
+#define NETC_TMR_ALARM_H(i) (0x00bc + (i) * 8)
+
+/* i = 0, 1, 2. i indicates the index of TMR_FIPER. */
+#define NETC_TMR_FIPER(i) (0x00d0 + (i) * 4)
+
+#define NETC_TMR_FIPER_CTRL 0x00dc
+#define FIPER_CTRL_DIS(i) (BIT(7) << (i) * 8)
+#define FIPER_CTRL_PG(i) (BIT(6) << (i) * 8)
+#define FIPER_CTRL_FS_ALARM(i) (BIT(5) << (i) * 8)
+#define FIPER_CTRL_PW(i) (GENMASK(4, 0) << (i) * 8)
+#define FIPER_CTRL_SET_PW(i, v) (((v) & GENMASK(4, 0)) << 8 * (i))
+
+/* i = 0, 1, i indicates the index of TMR_ETTS */
+#define NETC_TMR_ETTS_L(i) (0x00e0 + (i) * 8)
+#define NETC_TMR_ETTS_H(i) (0x00e4 + (i) * 8)
+#define NETC_TMR_CUR_TIME_L 0x00f0
+#define NETC_TMR_CUR_TIME_H 0x00f4
+
+#define NETC_TMR_REGS_BAR 0
+#define NETC_GLOBAL_OFFSET 0x10000
+#define NETC_GLOBAL_IPBRR0 0xbf8
+#define IPBRR0_IP_REV GENMASK(15, 0)
+#define NETC_REV_4_1 0x0401
+
+#define NETC_TMR_FIPER_NUM 3
+#define NETC_TMR_INVALID_CHANNEL NETC_TMR_FIPER_NUM
+#define NETC_TMR_DEFAULT_PRSC 2
+#define NETC_TMR_DEFAULT_ALARM GENMASK_ULL(63, 0)
+#define NETC_TMR_DEFAULT_FIPER GENMASK(31, 0)
+#define NETC_TMR_FIPER_MAX_PW GENMASK(4, 0)
+#define NETC_TMR_ALARM_NUM 2
+#define NETC_TMR_DEFAULT_ETTF_THR 7
+
+/* 1588 timer reference clock source select */
+#define NETC_TMR_CCM_TIMER1 0 /* enet_timer1_clk_root, from CCM */
+#define NETC_TMR_SYSTEM_CLK 1 /* enet_clk_root/2, from CCM */
+#define NETC_TMR_EXT_OSC 2 /* tmr_1588_clk, from IO pins */
+
+#define NETC_TMR_SYSCLK_333M 333333333U
+
+enum netc_pp_type {
+ NETC_PP_PPS = 1,
+ NETC_PP_PEROUT,
+};
+
+struct netc_pp {
+ enum netc_pp_type type;
+ bool enabled;
+ int alarm_id;
+ u32 period; /* pulse period, ns */
+ u64 stime; /* start time, ns */
+};
+
+struct netc_timer {
+ void __iomem *base;
+ struct pci_dev *pdev;
+ spinlock_t lock; /* Prevent concurrent access to registers */
+
+ struct ptp_clock *clock;
+ struct ptp_clock_info caps;
+ u32 clk_select;
+ u32 clk_freq;
+ u32 oclk_prsc;
+ /* High 32-bit is integer part, low 32-bit is fractional part */
+ u64 period;
+
+ int irq;
+ char irq_name[24];
+ int revision;
+ u32 tmr_emask;
+ u8 pps_channel;
+ u8 fs_alarm_num;
+ u8 fs_alarm_bitmap;
+ struct netc_pp pp[NETC_TMR_FIPER_NUM]; /* periodic pulse */
+};
+
+#define netc_timer_rd(p, o) netc_read((p)->base + (o))
+#define netc_timer_wr(p, o, v) netc_write((p)->base + (o), v)
+#define ptp_to_netc_timer(ptp) container_of((ptp), struct netc_timer, caps)
+
+static const char *const timer_clk_src[] = {
+ "ccm",
+ "ext"
+};
+
+static void netc_timer_cnt_write(struct netc_timer *priv, u64 ns)
+{
+ u32 tmr_cnt_h = upper_32_bits(ns);
+ u32 tmr_cnt_l = lower_32_bits(ns);
+
+ /* Writes to the TMR_CNT_L register copies the written value
+ * into the shadow TMR_CNT_L register. Writes to the TMR_CNT_H
+ * register copies the values written into the shadow TMR_CNT_H
+ * register. Contents of the shadow registers are copied into
+ * the TMR_CNT_L and TMR_CNT_H registers following a write into
+ * the TMR_CNT_H register. So the user must writes to TMR_CNT_L
+ * register first. Other H/L registers should have the same
+ * behavior.
+ */
+ netc_timer_wr(priv, NETC_TMR_CNT_L, tmr_cnt_l);
+ netc_timer_wr(priv, NETC_TMR_CNT_H, tmr_cnt_h);
+}
+
+static u64 netc_timer_offset_read(struct netc_timer *priv)
+{
+ u32 tmr_off_l, tmr_off_h;
+ u64 offset;
+
+ tmr_off_l = netc_timer_rd(priv, NETC_TMR_OFF_L);
+ tmr_off_h = netc_timer_rd(priv, NETC_TMR_OFF_H);
+ offset = (((u64)tmr_off_h) << 32) | tmr_off_l;
+
+ return offset;
+}
+
+static void netc_timer_offset_write(struct netc_timer *priv, u64 offset)
+{
+ u32 tmr_off_h = upper_32_bits(offset);
+ u32 tmr_off_l = lower_32_bits(offset);
+
+ netc_timer_wr(priv, NETC_TMR_OFF_L, tmr_off_l);
+ netc_timer_wr(priv, NETC_TMR_OFF_H, tmr_off_h);
+}
+
+static u64 netc_timer_cur_time_read(struct netc_timer *priv)
+{
+ u32 time_h, time_l;
+ u64 ns;
+
+ /* The user should read NETC_TMR_CUR_TIME_L first to
+ * get correct current time.
+ */
+ time_l = netc_timer_rd(priv, NETC_TMR_CUR_TIME_L);
+ time_h = netc_timer_rd(priv, NETC_TMR_CUR_TIME_H);
+ ns = (u64)time_h << 32 | time_l;
+
+ return ns;
+}
+
+static void netc_timer_alarm_write(struct netc_timer *priv,
+ u64 alarm, int index)
+{
+ u32 alarm_h = upper_32_bits(alarm);
+ u32 alarm_l = lower_32_bits(alarm);
+
+ netc_timer_wr(priv, NETC_TMR_ALARM_L(index), alarm_l);
+ netc_timer_wr(priv, NETC_TMR_ALARM_H(index), alarm_h);
+}
+
+static u32 netc_timer_get_integral_period(struct netc_timer *priv)
+{
+ u32 tmr_ctrl, integral_period;
+
+ tmr_ctrl = netc_timer_rd(priv, NETC_TMR_CTRL);
+ integral_period = FIELD_GET(TMR_CTRL_TCLK_PERIOD, tmr_ctrl);
+
+ return integral_period;
+}
+
+static u32 netc_timer_calculate_fiper_pw(struct netc_timer *priv,
+ u32 fiper)
+{
+ u64 divisor, pulse_width;
+
+ /* Set the FIPER pulse width to half FIPER interval by default.
+ * pulse_width = (fiper / 2) / TMR_GCLK_period,
+ * TMR_GCLK_period = NSEC_PER_SEC / TMR_GCLK_freq,
+ * TMR_GCLK_freq = (clk_freq / oclk_prsc) Hz,
+ * so pulse_width = fiper * clk_freq / (2 * NSEC_PER_SEC * oclk_prsc).
+ */
+ divisor = mul_u32_u32(2 * NSEC_PER_SEC, priv->oclk_prsc);
+ pulse_width = div64_u64(mul_u32_u32(fiper, priv->clk_freq), divisor);
+
+ /* The FIPER_PW field only has 5 bits, need to update oclk_prsc */
+ if (pulse_width > NETC_TMR_FIPER_MAX_PW)
+ pulse_width = NETC_TMR_FIPER_MAX_PW;
+
+ return pulse_width;
+}
+
+static void netc_timer_set_pps_alarm(struct netc_timer *priv, int channel,
+ u32 integral_period)
+{
+ struct netc_pp *pp = &priv->pp[channel];
+ u64 alarm;
+
+ /* Get the alarm value */
+ alarm = netc_timer_cur_time_read(priv) + NSEC_PER_MSEC;
+ alarm = roundup_u64(alarm, NSEC_PER_SEC);
+ alarm = roundup_u64(alarm, integral_period);
+
+ netc_timer_alarm_write(priv, alarm, pp->alarm_id);
+}
+
+static void netc_timer_set_perout_alarm(struct netc_timer *priv, int channel,
+ u32 integral_period)
+{
+ u64 cur_time = netc_timer_cur_time_read(priv);
+ struct netc_pp *pp = &priv->pp[channel];
+ u64 alarm, delta, min_time;
+ u32 period = pp->period;
+ u64 stime = pp->stime;
+
+ min_time = cur_time + NSEC_PER_MSEC + period;
+ if (stime < min_time) {
+ delta = min_time - stime;
+ stime += roundup_u64(delta, period);
+ }
+
+ alarm = roundup_u64(stime - period, integral_period);
+ netc_timer_alarm_write(priv, alarm, pp->alarm_id);
+}
+
+static int netc_timer_get_alarm_id(struct netc_timer *priv)
+{
+ int i;
+
+ for (i = 0; i < priv->fs_alarm_num; i++) {
+ if (!(priv->fs_alarm_bitmap & BIT(i))) {
+ priv->fs_alarm_bitmap |= BIT(i);
+ break;
+ }
+ }
+
+ return i;
+}
+
+static u64 netc_timer_get_gclk_period(struct netc_timer *priv)
+{
+ /* TMR_GCLK_freq = (clk_freq / oclk_prsc) Hz.
+ * TMR_GCLK_period = NSEC_PER_SEC / TMR_GCLK_freq.
+ * TMR_GCLK_period = (NSEC_PER_SEC * oclk_prsc) / clk_freq
+ */
+
+ return div_u64(mul_u32_u32(NSEC_PER_SEC, priv->oclk_prsc),
+ priv->clk_freq);
+}
+
+static void netc_timer_enable_periodic_pulse(struct netc_timer *priv,
+ u8 channel)
+{
+ u32 fiper_pw, fiper, fiper_ctrl, integral_period;
+ struct netc_pp *pp = &priv->pp[channel];
+ int alarm_id = pp->alarm_id;
+
+ integral_period = netc_timer_get_integral_period(priv);
+ /* Set to desired FIPER interval in ns - TCLK_PERIOD */
+ fiper = pp->period - integral_period;
+ fiper_pw = netc_timer_calculate_fiper_pw(priv, fiper);
+
+ fiper_ctrl = netc_timer_rd(priv, NETC_TMR_FIPER_CTRL);
+ fiper_ctrl &= ~(FIPER_CTRL_DIS(channel) | FIPER_CTRL_PW(channel) |
+ FIPER_CTRL_FS_ALARM(channel));
+ fiper_ctrl |= FIPER_CTRL_SET_PW(channel, fiper_pw);
+ fiper_ctrl |= alarm_id ? FIPER_CTRL_FS_ALARM(channel) : 0;
+
+ priv->tmr_emask |= TMR_TEVNET_PPEN(channel) |
+ TMR_TEVENT_ALMEN(alarm_id);
+
+ if (pp->type == NETC_PP_PPS)
+ netc_timer_set_pps_alarm(priv, channel, integral_period);
+ else
+ netc_timer_set_perout_alarm(priv, channel, integral_period);
+
+ netc_timer_wr(priv, NETC_TMR_TEMASK, priv->tmr_emask);
+ netc_timer_wr(priv, NETC_TMR_FIPER(channel), fiper);
+ netc_timer_wr(priv, NETC_TMR_FIPER_CTRL, fiper_ctrl);
+}
+
+static void netc_timer_disable_periodic_pulse(struct netc_timer *priv,
+ u8 channel)
+{
+ struct netc_pp *pp = &priv->pp[channel];
+ int alarm_id = pp->alarm_id;
+ u32 fiper_ctrl;
+
+ if (!pp->enabled)
+ return;
+
+ priv->tmr_emask &= ~(TMR_TEVNET_PPEN(channel) |
+ TMR_TEVENT_ALMEN(alarm_id));
+
+ fiper_ctrl = netc_timer_rd(priv, NETC_TMR_FIPER_CTRL);
+ fiper_ctrl |= FIPER_CTRL_DIS(channel);
+
+ netc_timer_alarm_write(priv, NETC_TMR_DEFAULT_ALARM, alarm_id);
+ netc_timer_wr(priv, NETC_TMR_TEMASK, priv->tmr_emask);
+ netc_timer_wr(priv, NETC_TMR_FIPER(channel), NETC_TMR_DEFAULT_FIPER);
+ netc_timer_wr(priv, NETC_TMR_FIPER_CTRL, fiper_ctrl);
+}
+
+static u8 netc_timer_select_pps_channel(struct netc_timer *priv)
+{
+ int i;
+
+ for (i = 0; i < NETC_TMR_FIPER_NUM; i++) {
+ if (!priv->pp[i].enabled)
+ return i;
+ }
+
+ return NETC_TMR_INVALID_CHANNEL;
+}
+
+/* Note that users should not use this API to output PPS signal on
+ * external pins, because PTP_CLK_REQ_PPS trigger internal PPS event
+ * for input into kernel PPS subsystem. See:
+ * https://lore.kernel.org/r/20201117213826.18235-1-a.fatoum@pengutronix.de
+ */
+static int netc_timer_enable_pps(struct netc_timer *priv,
+ struct ptp_clock_request *rq, int on)
+{
+ struct device *dev = &priv->pdev->dev;
+ unsigned long flags;
+ struct netc_pp *pp;
+ int err = 0;
+
+ spin_lock_irqsave(&priv->lock, flags);
+
+ if (on) {
+ int alarm_id;
+ u8 channel;
+
+ if (priv->pps_channel < NETC_TMR_FIPER_NUM) {
+ channel = priv->pps_channel;
+ } else {
+ channel = netc_timer_select_pps_channel(priv);
+ if (channel == NETC_TMR_INVALID_CHANNEL) {
+ dev_err(dev, "No available FIPERs\n");
+ err = -EBUSY;
+ goto unlock_spinlock;
+ }
+ }
+
+ pp = &priv->pp[channel];
+ if (pp->enabled)
+ goto unlock_spinlock;
+
+ alarm_id = netc_timer_get_alarm_id(priv);
+ if (alarm_id == priv->fs_alarm_num) {
+ dev_err(dev, "No available ALARMs\n");
+ err = -EBUSY;
+ goto unlock_spinlock;
+ }
+
+ pp->enabled = true;
+ pp->type = NETC_PP_PPS;
+ pp->alarm_id = alarm_id;
+ pp->period = NSEC_PER_SEC;
+ priv->pps_channel = channel;
+
+ netc_timer_enable_periodic_pulse(priv, channel);
+ } else {
+ /* pps_channel is invalid if PPS is not enabled, so no
+ * processing is needed.
+ */
+ if (priv->pps_channel >= NETC_TMR_FIPER_NUM)
+ goto unlock_spinlock;
+
+ netc_timer_disable_periodic_pulse(priv, priv->pps_channel);
+ pp = &priv->pp[priv->pps_channel];
+ priv->fs_alarm_bitmap &= ~BIT(pp->alarm_id);
+ memset(pp, 0, sizeof(*pp));
+ priv->pps_channel = NETC_TMR_INVALID_CHANNEL;
+ }
+
+unlock_spinlock:
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ return err;
+}
+
+static int net_timer_enable_perout(struct netc_timer *priv,
+ struct ptp_clock_request *rq, int on)
+{
+ struct device *dev = &priv->pdev->dev;
+ u32 channel = rq->perout.index;
+ unsigned long flags;
+ struct netc_pp *pp;
+ int err = 0;
+
+ spin_lock_irqsave(&priv->lock, flags);
+
+ pp = &priv->pp[channel];
+ if (pp->type == NETC_PP_PPS) {
+ dev_err(dev, "FIPER%u is being used for PPS\n", channel);
+ err = -EBUSY;
+ goto unlock_spinlock;
+ }
+
+ if (on) {
+ u64 period_ns, gclk_period, max_period, min_period;
+ struct timespec64 period, stime;
+ u32 integral_period;
+ int alarm_id;
+
+ period.tv_sec = rq->perout.period.sec;
+ period.tv_nsec = rq->perout.period.nsec;
+ period_ns = timespec64_to_ns(&period);
+
+ integral_period = netc_timer_get_integral_period(priv);
+ max_period = (u64)NETC_TMR_DEFAULT_FIPER + integral_period;
+ gclk_period = netc_timer_get_gclk_period(priv);
+ min_period = gclk_period * 4 + integral_period;
+ if (period_ns > max_period || period_ns < min_period) {
+ dev_err(dev, "The period range is %llu ~ %llu\n",
+ min_period, max_period);
+ err = -EINVAL;
+ goto unlock_spinlock;
+ }
+
+ if (pp->enabled) {
+ alarm_id = pp->alarm_id;
+ } else {
+ alarm_id = netc_timer_get_alarm_id(priv);
+ if (alarm_id == priv->fs_alarm_num) {
+ dev_err(dev, "No available ALARMs\n");
+ err = -EBUSY;
+ goto unlock_spinlock;
+ }
+
+ pp->type = NETC_PP_PEROUT;
+ pp->enabled = true;
+ pp->alarm_id = alarm_id;
+ }
+
+ stime.tv_sec = rq->perout.start.sec;
+ stime.tv_nsec = rq->perout.start.nsec;
+ pp->stime = timespec64_to_ns(&stime);
+ pp->period = period_ns;
+
+ netc_timer_enable_periodic_pulse(priv, channel);
+ } else {
+ netc_timer_disable_periodic_pulse(priv, channel);
+ priv->fs_alarm_bitmap &= ~BIT(pp->alarm_id);
+ memset(pp, 0, sizeof(*pp));
+ }
+
+unlock_spinlock:
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ return err;
+}
+
+static void netc_timer_handle_etts_event(struct netc_timer *priv, int index,
+ bool update_event)
+{
+ struct ptp_clock_event event;
+ u32 etts_l = 0, etts_h = 0;
+
+ while (netc_timer_rd(priv, NETC_TMR_STAT) & TMR_STAT_ETS_VLD(index)) {
+ etts_l = netc_timer_rd(priv, NETC_TMR_ETTS_L(index));
+ etts_h = netc_timer_rd(priv, NETC_TMR_ETTS_H(index));
+ }
+
+ /* Invalid time stamp */
+ if (!etts_l && !etts_h)
+ return;
+
+ if (update_event) {
+ event.type = PTP_CLOCK_EXTTS;
+ event.index = index;
+ event.timestamp = (u64)etts_h << 32;
+ event.timestamp |= etts_l;
+ ptp_clock_event(priv->clock, &event);
+ }
+}
+
+static int netc_timer_enable_extts(struct netc_timer *priv,
+ struct ptp_clock_request *rq, int on)
+{
+ int index = rq->extts.index;
+ unsigned long flags;
+ u32 tmr_ctrl;
+
+ /* Reject requests to enable time stamping on both edges */
+ if ((rq->extts.flags & PTP_EXTTS_EDGES) == PTP_EXTTS_EDGES)
+ return -EOPNOTSUPP;
+
+ spin_lock_irqsave(&priv->lock, flags);
+
+ netc_timer_handle_etts_event(priv, rq->extts.index, false);
+ if (on) {
+ tmr_ctrl = netc_timer_rd(priv, NETC_TMR_CTRL);
+ if (rq->extts.flags & PTP_FALLING_EDGE)
+ tmr_ctrl |= TMR_ETEP(index);
+ else
+ tmr_ctrl &= ~TMR_ETEP(index);
+
+ netc_timer_wr(priv, NETC_TMR_CTRL, tmr_ctrl);
+ priv->tmr_emask |= TMR_TEVENT_ETS(index);
+ } else {
+ priv->tmr_emask &= ~TMR_TEVENT_ETS(index);
+ }
+
+ netc_timer_wr(priv, NETC_TMR_TEMASK, priv->tmr_emask);
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ return 0;
+}
+
+static void netc_timer_disable_fiper(struct netc_timer *priv)
+{
+ u32 fiper_ctrl = netc_timer_rd(priv, NETC_TMR_FIPER_CTRL);
+ int i;
+
+ for (i = 0; i < NETC_TMR_FIPER_NUM; i++) {
+ if (!priv->pp[i].enabled)
+ continue;
+
+ fiper_ctrl |= FIPER_CTRL_DIS(i);
+ netc_timer_wr(priv, NETC_TMR_FIPER(i), NETC_TMR_DEFAULT_FIPER);
+ }
+
+ netc_timer_wr(priv, NETC_TMR_FIPER_CTRL, fiper_ctrl);
+}
+
+static void netc_timer_enable_fiper(struct netc_timer *priv)
+{
+ u32 integral_period = netc_timer_get_integral_period(priv);
+ u32 fiper_ctrl = netc_timer_rd(priv, NETC_TMR_FIPER_CTRL);
+ int i;
+
+ for (i = 0; i < NETC_TMR_FIPER_NUM; i++) {
+ struct netc_pp *pp = &priv->pp[i];
+ u32 fiper;
+
+ if (!pp->enabled)
+ continue;
+
+ fiper_ctrl &= ~FIPER_CTRL_DIS(i);
+
+ if (pp->type == NETC_PP_PPS)
+ netc_timer_set_pps_alarm(priv, i, integral_period);
+ else if (pp->type == NETC_PP_PEROUT)
+ netc_timer_set_perout_alarm(priv, i, integral_period);
+
+ fiper = pp->period - integral_period;
+ netc_timer_wr(priv, NETC_TMR_FIPER(i), fiper);
+ }
+
+ netc_timer_wr(priv, NETC_TMR_FIPER_CTRL, fiper_ctrl);
+}
+
+static int netc_timer_enable(struct ptp_clock_info *ptp,
+ struct ptp_clock_request *rq, int on)
+{
+ struct netc_timer *priv = ptp_to_netc_timer(ptp);
+
+ switch (rq->type) {
+ case PTP_CLK_REQ_PPS:
+ return netc_timer_enable_pps(priv, rq, on);
+ case PTP_CLK_REQ_PEROUT:
+ return net_timer_enable_perout(priv, rq, on);
+ case PTP_CLK_REQ_EXTTS:
+ return netc_timer_enable_extts(priv, rq, on);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static void netc_timer_adjust_period(struct netc_timer *priv, u64 period)
+{
+ u32 fractional_period = lower_32_bits(period);
+ u32 integral_period = upper_32_bits(period);
+ u32 tmr_ctrl, old_tmr_ctrl;
+ unsigned long flags;
+
+ spin_lock_irqsave(&priv->lock, flags);
+
+ old_tmr_ctrl = netc_timer_rd(priv, NETC_TMR_CTRL);
+ tmr_ctrl = u32_replace_bits(old_tmr_ctrl, integral_period,
+ TMR_CTRL_TCLK_PERIOD);
+ if (tmr_ctrl != old_tmr_ctrl) {
+ netc_timer_disable_fiper(priv);
+ netc_timer_wr(priv, NETC_TMR_CTRL, tmr_ctrl);
+ netc_timer_enable_fiper(priv);
+ }
+
+ netc_timer_wr(priv, NETC_TMR_ADD, fractional_period);
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static int netc_timer_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
+{
+ struct netc_timer *priv = ptp_to_netc_timer(ptp);
+ u64 new_period;
+
+ new_period = adjust_by_scaled_ppm(priv->period, scaled_ppm);
+ netc_timer_adjust_period(priv, new_period);
+
+ return 0;
+}
+
+static int netc_timer_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+ struct netc_timer *priv = ptp_to_netc_timer(ptp);
+ unsigned long flags;
+ s64 tmr_off;
+
+ spin_lock_irqsave(&priv->lock, flags);
+
+ netc_timer_disable_fiper(priv);
+
+ /* Adjusting TMROFF instead of TMR_CNT is that the timer
+ * counter keeps increasing during reading and writing
+ * TMR_CNT, which will cause latency.
+ */
+ tmr_off = netc_timer_offset_read(priv);
+ tmr_off += delta;
+ netc_timer_offset_write(priv, tmr_off);
+
+ netc_timer_enable_fiper(priv);
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ return 0;
+}
+
+static int netc_timer_gettimex64(struct ptp_clock_info *ptp,
+ struct timespec64 *ts,
+ struct ptp_system_timestamp *sts)
+{
+ struct netc_timer *priv = ptp_to_netc_timer(ptp);
+ unsigned long flags;
+ u64 ns;
+
+ spin_lock_irqsave(&priv->lock, flags);
+
+ ptp_read_system_prets(sts);
+ ns = netc_timer_cur_time_read(priv);
+ ptp_read_system_postts(sts);
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ *ts = ns_to_timespec64(ns);
+
+ return 0;
+}
+
+static int netc_timer_settime64(struct ptp_clock_info *ptp,
+ const struct timespec64 *ts)
+{
+ struct netc_timer *priv = ptp_to_netc_timer(ptp);
+ u64 ns = timespec64_to_ns(ts);
+ unsigned long flags;
+
+ spin_lock_irqsave(&priv->lock, flags);
+
+ netc_timer_disable_fiper(priv);
+ netc_timer_offset_write(priv, 0);
+ netc_timer_cnt_write(priv, ns);
+ netc_timer_enable_fiper(priv);
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ return 0;
+}
+
+static const struct ptp_clock_info netc_timer_ptp_caps = {
+ .owner = THIS_MODULE,
+ .name = "NETC Timer PTP clock",
+ .max_adj = 500000000,
+ .n_pins = 0,
+ .n_alarm = 2,
+ .pps = 1,
+ .n_per_out = 3,
+ .n_ext_ts = 2,
+ .supported_extts_flags = PTP_RISING_EDGE | PTP_FALLING_EDGE |
+ PTP_STRICT_FLAGS,
+ .adjfine = netc_timer_adjfine,
+ .adjtime = netc_timer_adjtime,
+ .gettimex64 = netc_timer_gettimex64,
+ .settime64 = netc_timer_settime64,
+ .enable = netc_timer_enable,
+};
+
+static void netc_timer_init(struct netc_timer *priv)
+{
+ u32 fractional_period = lower_32_bits(priv->period);
+ u32 integral_period = upper_32_bits(priv->period);
+ u32 tmr_ctrl, fiper_ctrl;
+ struct timespec64 now;
+ u64 ns;
+ int i;
+
+ /* Software must enable timer first and the clock selected must be
+ * active, otherwise, the registers which are in the timer clock
+ * domain are not accessible.
+ */
+ tmr_ctrl = FIELD_PREP(TMR_CTRL_CK_SEL, priv->clk_select) |
+ TMR_CTRL_TE | TMR_CTRL_FS;
+ netc_timer_wr(priv, NETC_TMR_CTRL, tmr_ctrl);
+ netc_timer_wr(priv, NETC_TMR_PRSC, priv->oclk_prsc);
+
+ /* Disable FIPER by default */
+ fiper_ctrl = netc_timer_rd(priv, NETC_TMR_FIPER_CTRL);
+ for (i = 0; i < NETC_TMR_FIPER_NUM; i++) {
+ fiper_ctrl |= FIPER_CTRL_DIS(i);
+ fiper_ctrl &= ~FIPER_CTRL_PG(i);
+ }
+ netc_timer_wr(priv, NETC_TMR_FIPER_CTRL, fiper_ctrl);
+ netc_timer_wr(priv, NETC_TMR_ECTRL, NETC_TMR_DEFAULT_ETTF_THR);
+
+ ktime_get_real_ts64(&now);
+ ns = timespec64_to_ns(&now);
+ netc_timer_cnt_write(priv, ns);
+
+ /* Allow atomic writes to TCLK_PERIOD and TMR_ADD, An update to
+ * TCLK_PERIOD does not take effect until TMR_ADD is written.
+ */
+ tmr_ctrl |= FIELD_PREP(TMR_CTRL_TCLK_PERIOD, integral_period) |
+ TMR_COMP_MODE;
+ netc_timer_wr(priv, NETC_TMR_CTRL, tmr_ctrl);
+ netc_timer_wr(priv, NETC_TMR_ADD, fractional_period);
+}
+
+static int netc_timer_pci_probe(struct pci_dev *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct netc_timer *priv;
+ int err;
+
+ priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ pcie_flr(pdev);
+ err = pci_enable_device_mem(pdev);
+ if (err)
+ return dev_err_probe(dev, err, "Failed to enable device\n");
+
+ dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
+ err = pci_request_mem_regions(pdev, KBUILD_MODNAME);
+ if (err) {
+ dev_err(dev, "pci_request_regions() failed, err:%pe\n",
+ ERR_PTR(err));
+ goto disable_dev;
+ }
+
+ pci_set_master(pdev);
+
+ priv->pdev = pdev;
+ priv->base = pci_ioremap_bar(pdev, NETC_TMR_REGS_BAR);
+ if (!priv->base) {
+ err = -ENOMEM;
+ goto release_mem_regions;
+ }
+
+ pci_set_drvdata(pdev, priv);
+
+ return 0;
+
+release_mem_regions:
+ pci_release_mem_regions(pdev);
+disable_dev:
+ pci_disable_device(pdev);
+
+ return err;
+}
+
+static void netc_timer_pci_remove(struct pci_dev *pdev)
+{
+ struct netc_timer *priv = pci_get_drvdata(pdev);
+
+ iounmap(priv->base);
+ pci_release_mem_regions(pdev);
+ pci_disable_device(pdev);
+}
+
+static int netc_timer_get_reference_clk_source(struct netc_timer *priv)
+{
+ struct device *dev = &priv->pdev->dev;
+ struct clk *clk;
+ int i;
+
+ /* Select NETC system clock as the reference clock by default */
+ priv->clk_select = NETC_TMR_SYSTEM_CLK;
+ priv->clk_freq = NETC_TMR_SYSCLK_333M;
+
+ /* Update the clock source of the reference clock if the clock
+ * is specified in DT node.
+ */
+ for (i = 0; i < ARRAY_SIZE(timer_clk_src); i++) {
+ clk = devm_clk_get_optional_enabled(dev, timer_clk_src[i]);
+ if (IS_ERR(clk))
+ return dev_err_probe(dev, PTR_ERR(clk),
+ "Failed to enable clock\n");
+
+ if (clk) {
+ priv->clk_freq = clk_get_rate(clk);
+ priv->clk_select = i ? NETC_TMR_EXT_OSC :
+ NETC_TMR_CCM_TIMER1;
+ break;
+ }
+ }
+
+ /* The period is a 64-bit number, the high 32-bit is the integer
+ * part of the period, the low 32-bit is the fractional part of
+ * the period. In order to get the desired 32-bit fixed-point
+ * format, multiply the numerator of the fraction by 2^32.
+ */
+ priv->period = div_u64((u64)NSEC_PER_SEC << 32, priv->clk_freq);
+
+ return 0;
+}
+
+static int netc_timer_parse_dt(struct netc_timer *priv)
+{
+ return netc_timer_get_reference_clk_source(priv);
+}
+
+static irqreturn_t netc_timer_isr(int irq, void *data)
+{
+ struct netc_timer *priv = data;
+ struct ptp_clock_event event;
+ u32 tmr_event;
+
+ spin_lock(&priv->lock);
+
+ tmr_event = netc_timer_rd(priv, NETC_TMR_TEVENT);
+ tmr_event &= priv->tmr_emask;
+ /* Clear interrupts status */
+ netc_timer_wr(priv, NETC_TMR_TEVENT, tmr_event);
+
+ if (tmr_event & TMR_TEVENT_ALMEN(0))
+ netc_timer_alarm_write(priv, NETC_TMR_DEFAULT_ALARM, 0);
+
+ if (tmr_event & TMR_TEVENT_ALMEN(1))
+ netc_timer_alarm_write(priv, NETC_TMR_DEFAULT_ALARM, 1);
+
+ if (tmr_event & TMR_TEVENT_PPEN_ALL) {
+ event.type = PTP_CLOCK_PPS;
+ ptp_clock_event(priv->clock, &event);
+ }
+
+ if (tmr_event & TMR_TEVENT_ETS(0))
+ netc_timer_handle_etts_event(priv, 0, true);
+
+ if (tmr_event & TMR_TEVENT_ETS(1))
+ netc_timer_handle_etts_event(priv, 1, true);
+
+ spin_unlock(&priv->lock);
+
+ return IRQ_HANDLED;
+}
+
+static int netc_timer_init_msix_irq(struct netc_timer *priv)
+{
+ struct pci_dev *pdev = priv->pdev;
+ int err, n;
+
+ n = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSIX);
+ if (n != 1) {
+ err = (n < 0) ? n : -EPERM;
+ dev_err(&pdev->dev, "pci_alloc_irq_vectors() failed\n");
+ return err;
+ }
+
+ priv->irq = pci_irq_vector(pdev, 0);
+ err = request_irq(priv->irq, netc_timer_isr, 0, priv->irq_name, priv);
+ if (err) {
+ dev_err(&pdev->dev, "request_irq() failed\n");
+ pci_free_irq_vectors(pdev);
+
+ return err;
+ }
+
+ return 0;
+}
+
+static void netc_timer_free_msix_irq(struct netc_timer *priv)
+{
+ struct pci_dev *pdev = priv->pdev;
+
+ disable_irq(priv->irq);
+ free_irq(priv->irq, priv);
+ pci_free_irq_vectors(pdev);
+}
+
+static int netc_timer_get_global_ip_rev(struct netc_timer *priv)
+{
+ u32 val;
+
+ val = netc_timer_rd(priv, NETC_GLOBAL_OFFSET + NETC_GLOBAL_IPBRR0);
+
+ return val & IPBRR0_IP_REV;
+}
+
+static int netc_timer_probe(struct pci_dev *pdev,
+ const struct pci_device_id *id)
+{
+ struct device *dev = &pdev->dev;
+ struct netc_timer *priv;
+ int err;
+
+ err = netc_timer_pci_probe(pdev);
+ if (err)
+ return err;
+
+ priv = pci_get_drvdata(pdev);
+ priv->revision = netc_timer_get_global_ip_rev(priv);
+ if (priv->revision == NETC_REV_4_1)
+ priv->fs_alarm_num = 1;
+ else
+ priv->fs_alarm_num = NETC_TMR_ALARM_NUM;
+
+ err = netc_timer_parse_dt(priv);
+ if (err)
+ goto timer_pci_remove;
+
+ priv->caps = netc_timer_ptp_caps;
+ priv->oclk_prsc = NETC_TMR_DEFAULT_PRSC;
+ priv->pps_channel = NETC_TMR_INVALID_CHANNEL;
+ spin_lock_init(&priv->lock);
+ snprintf(priv->irq_name, sizeof(priv->irq_name), "ptp-netc %s",
+ pci_name(pdev));
+
+ err = netc_timer_init_msix_irq(priv);
+ if (err)
+ goto timer_pci_remove;
+
+ netc_timer_init(priv);
+ priv->clock = ptp_clock_register(&priv->caps, dev);
+ if (IS_ERR(priv->clock)) {
+ err = PTR_ERR(priv->clock);
+ goto free_msix_irq;
+ }
+
+ return 0;
+
+free_msix_irq:
+ netc_timer_free_msix_irq(priv);
+timer_pci_remove:
+ netc_timer_pci_remove(pdev);
+
+ return err;
+}
+
+static void netc_timer_remove(struct pci_dev *pdev)
+{
+ struct netc_timer *priv = pci_get_drvdata(pdev);
+
+ netc_timer_wr(priv, NETC_TMR_TEMASK, 0);
+ netc_timer_wr(priv, NETC_TMR_CTRL, 0);
+ ptp_clock_unregister(priv->clock);
+ netc_timer_free_msix_irq(priv);
+ netc_timer_pci_remove(pdev);
+}
+
+static const struct pci_device_id netc_timer_id_table[] = {
+ { PCI_DEVICE(NETC_TMR_PCI_VENDOR_NXP, 0xee02) },
+ { }
+};
+MODULE_DEVICE_TABLE(pci, netc_timer_id_table);
+
+static struct pci_driver netc_timer_driver = {
+ .name = KBUILD_MODNAME,
+ .id_table = netc_timer_id_table,
+ .probe = netc_timer_probe,
+ .remove = netc_timer_remove,
+};
+module_pci_driver(netc_timer_driver);
+
+MODULE_DESCRIPTION("NXP NETC Timer PTP Driver");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/staging/octeon/ethernet-tx.c b/drivers/staging/octeon/ethernet-tx.c
index 261f8dbdc382..0ba240e634a1 100644
--- a/drivers/staging/octeon/ethernet-tx.c
+++ b/drivers/staging/octeon/ethernet-tx.c
@@ -346,8 +346,7 @@ netdev_tx_t cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev)
* The skbuff will be reused without ever being freed. We must
* cleanup a bunch of core things.
*/
- dst_release(skb_dst(skb));
- skb_dst_set(skb, NULL);
+ skb_dst_drop(skb);
skb_ext_reset(skb);
nf_reset_ct(skb);
skb_reset_redirect(skb);
diff --git a/include/linux/bnxt/hsi.h b/include/linux/bnxt/hsi.h
index 549231703bce..8c5dac3b3ef3 100644
--- a/include/linux/bnxt/hsi.h
+++ b/include/linux/bnxt/hsi.h
@@ -276,6 +276,10 @@ struct cmd_nums {
#define HWRM_REG_POWER_QUERY 0xe1UL
#define HWRM_CORE_FREQUENCY_QUERY 0xe2UL
#define HWRM_REG_POWER_HISTOGRAM 0xe3UL
+ #define HWRM_MONITOR_PAX_HISTOGRAM_START 0xe4UL
+ #define HWRM_MONITOR_PAX_HISTOGRAM_COLLECT 0xe5UL
+ #define HWRM_STAT_QUERY_ROCE_STATS 0xe6UL
+ #define HWRM_STAT_QUERY_ROCE_STATS_EXT 0xe7UL
#define HWRM_WOL_FILTER_ALLOC 0xf0UL
#define HWRM_WOL_FILTER_FREE 0xf1UL
#define HWRM_WOL_FILTER_QCFG 0xf2UL
@@ -407,9 +411,8 @@ struct cmd_nums {
#define HWRM_FUNC_LAG_UPDATE 0x1b1UL
#define HWRM_FUNC_LAG_FREE 0x1b2UL
#define HWRM_FUNC_LAG_QCFG 0x1b3UL
- #define HWRM_FUNC_TIMEDTX_PACING_RATE_ADD 0x1c2UL
- #define HWRM_FUNC_TIMEDTX_PACING_RATE_DELETE 0x1c3UL
- #define HWRM_FUNC_TIMEDTX_PACING_RATE_QUERY 0x1c4UL
+ #define HWRM_FUNC_TTX_PACING_RATE_PROF_QUERY 0x1c3UL
+ #define HWRM_FUNC_TTX_PACING_RATE_QUERY 0x1c4UL
#define HWRM_SELFTEST_QLIST 0x200UL
#define HWRM_SELFTEST_EXEC 0x201UL
#define HWRM_SELFTEST_IRQ 0x202UL
@@ -441,6 +444,7 @@ struct cmd_nums {
#define HWRM_MFG_WRITE_CERT_NVM 0x21cUL
#define HWRM_PORT_POE_CFG 0x230UL
#define HWRM_PORT_POE_QCFG 0x231UL
+ #define HWRM_PORT_PHY_FDRSTAT 0x232UL
#define HWRM_UDCC_QCAPS 0x258UL
#define HWRM_UDCC_CFG 0x259UL
#define HWRM_UDCC_QCFG 0x25aUL
@@ -453,6 +457,8 @@ struct cmd_nums {
#define HWRM_QUEUE_PFCWD_TIMEOUT_QCAPS 0x261UL
#define HWRM_QUEUE_PFCWD_TIMEOUT_CFG 0x262UL
#define HWRM_QUEUE_PFCWD_TIMEOUT_QCFG 0x263UL
+ #define HWRM_QUEUE_ADPTV_QOS_RX_QCFG 0x264UL
+ #define HWRM_QUEUE_ADPTV_QOS_TX_QCFG 0x265UL
#define HWRM_TF 0x2bcUL
#define HWRM_TF_VERSION_GET 0x2bdUL
#define HWRM_TF_SESSION_OPEN 0x2c6UL
@@ -551,6 +557,8 @@ struct cmd_nums {
#define HWRM_DBG_COREDUMP_CAPTURE 0xff2cUL
#define HWRM_DBG_PTRACE 0xff2dUL
#define HWRM_DBG_SIM_CABLE_STATE 0xff2eUL
+ #define HWRM_DBG_TOKEN_QUERY_AUTH_IDS 0xff2fUL
+ #define HWRM_DBG_TOKEN_CFG 0xff30UL
#define HWRM_NVM_GET_VPD_FIELD_INFO 0xffeaUL
#define HWRM_NVM_SET_VPD_FIELD_INFO 0xffebUL
#define HWRM_NVM_DEFRAG 0xffecUL
@@ -632,8 +640,8 @@ struct hwrm_err_output {
#define HWRM_VERSION_MAJOR 1
#define HWRM_VERSION_MINOR 10
#define HWRM_VERSION_UPDATE 3
-#define HWRM_VERSION_RSVD 97
-#define HWRM_VERSION_STR "1.10.3.97"
+#define HWRM_VERSION_RSVD 133
+#define HWRM_VERSION_STR "1.10.3.133"
/* hwrm_ver_get_input (size:192b/24B) */
struct hwrm_ver_get_input {
@@ -688,6 +696,7 @@ struct hwrm_ver_get_output {
#define VER_GET_RESP_DEV_CAPS_CFG_CFA_TRUFLOW_SUPPORTED 0x4000UL
#define VER_GET_RESP_DEV_CAPS_CFG_SECURE_BOOT_CAPABLE 0x8000UL
#define VER_GET_RESP_DEV_CAPS_CFG_SECURE_SOC_CAPABLE 0x10000UL
+ #define VER_GET_RESP_DEV_CAPS_CFG_DEBUG_TOKEN_SUPPORTED 0x20000UL
u8 roce_fw_maj_8b;
u8 roce_fw_min_8b;
u8 roce_fw_bld_8b;
@@ -872,7 +881,8 @@ struct hwrm_async_event_cmpl {
#define ASYNC_EVENT_CMPL_EVENT_ID_REPRESENTOR_PAIR_CHANGE 0x4eUL
#define ASYNC_EVENT_CMPL_EVENT_ID_VF_STAT_CHANGE 0x4fUL
#define ASYNC_EVENT_CMPL_EVENT_ID_HOST_COREDUMP 0x50UL
- #define ASYNC_EVENT_CMPL_EVENT_ID_MAX_RGTR_EVENT_ID 0x51UL
+ #define ASYNC_EVENT_CMPL_EVENT_ID_ADPTV_QOS 0x51UL
+ #define ASYNC_EVENT_CMPL_EVENT_ID_MAX_RGTR_EVENT_ID 0x52UL
#define ASYNC_EVENT_CMPL_EVENT_ID_FW_TRACE_MSG 0xfeUL
#define ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR 0xffUL
#define ASYNC_EVENT_CMPL_EVENT_ID_LAST ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR
@@ -1344,7 +1354,8 @@ struct hwrm_async_event_cmpl_dbg_buf_producer {
#define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_CA2_TRACE 0x9UL
#define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_RIGP1_TRACE 0xaUL
#define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_AFM_KONG_HWRM_TRACE 0xbUL
- #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_LAST ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_AFM_KONG_HWRM_TRACE
+ #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_ERR_QPC_TRACE 0xcUL
+ #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_LAST ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_ERR_QPC_TRACE
};
/* hwrm_async_event_cmpl_hwrm_error (size:128b/16B) */
@@ -1401,7 +1412,11 @@ struct hwrm_async_event_cmpl_error_report_base {
#define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DOORBELL_DROP_THRESHOLD 0x4UL
#define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_THERMAL_THRESHOLD 0x5UL
#define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DUAL_DATA_RATE_NOT_SUPPORTED 0x6UL
- #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DUAL_DATA_RATE_NOT_SUPPORTED
+ #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DUP_UDCC_SES 0x7UL
+ #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DB_DROP 0x8UL
+ #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_MD_TEMP 0x9UL
+ #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_VNIC_ERR 0xaUL
+ #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_VNIC_ERR
};
/* hwrm_async_event_cmpl_error_report_pause_storm (size:128b/16B) */
@@ -1914,6 +1929,12 @@ struct hwrm_func_qcaps_output {
#define FUNC_QCAPS_RESP_FLAGS_EXT3_RX_RATE_PROFILE_SEL_SUPPORTED 0x8UL
#define FUNC_QCAPS_RESP_FLAGS_EXT3_BIDI_OPT_SUPPORTED 0x10UL
#define FUNC_QCAPS_RESP_FLAGS_EXT3_MIRROR_ON_ROCE_SUPPORTED 0x20UL
+ #define FUNC_QCAPS_RESP_FLAGS_EXT3_ROCE_VF_DYN_ALLOC_SUPPORT 0x40UL
+ #define FUNC_QCAPS_RESP_FLAGS_EXT3_CHANGE_UDP_SRCPORT_SUPPORT 0x80UL
+ #define FUNC_QCAPS_RESP_FLAGS_EXT3_PCIE_COMPLIANCE_SUPPORTED 0x100UL
+ #define FUNC_QCAPS_RESP_FLAGS_EXT3_MULTI_L2_DB_SUPPORTED 0x200UL
+ #define FUNC_QCAPS_RESP_FLAGS_EXT3_PCIE_SECURE_ATS_SUPPORTED 0x400UL
+ #define FUNC_QCAPS_RESP_FLAGS_EXT3_MBUF_STATS_SUPPORTED 0x800UL
__le16 max_roce_vfs;
__le16 max_crypto_rx_flow_filters;
u8 unused_3[3];
@@ -1931,7 +1952,7 @@ struct hwrm_func_qcfg_input {
u8 unused_0[6];
};
-/* hwrm_func_qcfg_output (size:1344b/168B) */
+/* hwrm_func_qcfg_output (size:1408b/176B) */
struct hwrm_func_qcfg_output {
__le16 error_code;
__le16 req_type;
@@ -2124,7 +2145,43 @@ struct hwrm_func_qcfg_output {
#define FUNC_QCFG_RESP_XID_PARTITION_CFG_TX_CK 0x1UL
#define FUNC_QCFG_RESP_XID_PARTITION_CFG_RX_CK 0x2UL
__le16 mirror_vnic_id;
- u8 unused_7[7];
+ u8 max_link_width;
+ #define FUNC_QCFG_RESP_MAX_LINK_WIDTH_UNKNOWN 0x0UL
+ #define FUNC_QCFG_RESP_MAX_LINK_WIDTH_X1 0x1UL
+ #define FUNC_QCFG_RESP_MAX_LINK_WIDTH_X2 0x2UL
+ #define FUNC_QCFG_RESP_MAX_LINK_WIDTH_X4 0x4UL
+ #define FUNC_QCFG_RESP_MAX_LINK_WIDTH_X8 0x8UL
+ #define FUNC_QCFG_RESP_MAX_LINK_WIDTH_X16 0x10UL
+ #define FUNC_QCFG_RESP_MAX_LINK_WIDTH_LAST FUNC_QCFG_RESP_MAX_LINK_WIDTH_X16
+ u8 max_link_speed;
+ #define FUNC_QCFG_RESP_MAX_LINK_SPEED_UNKNOWN 0x0UL
+ #define FUNC_QCFG_RESP_MAX_LINK_SPEED_G1 0x1UL
+ #define FUNC_QCFG_RESP_MAX_LINK_SPEED_G2 0x2UL
+ #define FUNC_QCFG_RESP_MAX_LINK_SPEED_G3 0x3UL
+ #define FUNC_QCFG_RESP_MAX_LINK_SPEED_G4 0x4UL
+ #define FUNC_QCFG_RESP_MAX_LINK_SPEED_G5 0x5UL
+ #define FUNC_QCFG_RESP_MAX_LINK_SPEED_LAST FUNC_QCFG_RESP_MAX_LINK_SPEED_G5
+ u8 negotiated_link_width;
+ #define FUNC_QCFG_RESP_NEGOTIATED_LINK_WIDTH_UNKNOWN 0x0UL
+ #define FUNC_QCFG_RESP_NEGOTIATED_LINK_WIDTH_X1 0x1UL
+ #define FUNC_QCFG_RESP_NEGOTIATED_LINK_WIDTH_X2 0x2UL
+ #define FUNC_QCFG_RESP_NEGOTIATED_LINK_WIDTH_X4 0x4UL
+ #define FUNC_QCFG_RESP_NEGOTIATED_LINK_WIDTH_X8 0x8UL
+ #define FUNC_QCFG_RESP_NEGOTIATED_LINK_WIDTH_X16 0x10UL
+ #define FUNC_QCFG_RESP_NEGOTIATED_LINK_WIDTH_LAST FUNC_QCFG_RESP_NEGOTIATED_LINK_WIDTH_X16
+ u8 negotiated_link_speed;
+ #define FUNC_QCFG_RESP_NEGOTIATED_LINK_SPEED_UNKNOWN 0x0UL
+ #define FUNC_QCFG_RESP_NEGOTIATED_LINK_SPEED_G1 0x1UL
+ #define FUNC_QCFG_RESP_NEGOTIATED_LINK_SPEED_G2 0x2UL
+ #define FUNC_QCFG_RESP_NEGOTIATED_LINK_SPEED_G3 0x3UL
+ #define FUNC_QCFG_RESP_NEGOTIATED_LINK_SPEED_G4 0x4UL
+ #define FUNC_QCFG_RESP_NEGOTIATED_LINK_SPEED_G5 0x5UL
+ #define FUNC_QCFG_RESP_NEGOTIATED_LINK_SPEED_LAST FUNC_QCFG_RESP_NEGOTIATED_LINK_SPEED_G5
+ u8 unused_7[2];
+ u8 pcie_compliance;
+ u8 unused_8;
+ __le16 l2_db_multi_page_size_kb;
+ u8 unused_9[5];
u8 valid;
};
@@ -2322,6 +2379,7 @@ struct hwrm_func_cfg_input {
#define FUNC_CFG_REQ_ENABLES2_ROCE_MAX_GID_PER_VF 0x200UL
#define FUNC_CFG_REQ_ENABLES2_XID_PARTITION_CFG 0x400UL
#define FUNC_CFG_REQ_ENABLES2_PHYSICAL_SLOT_NUMBER 0x800UL
+ #define FUNC_CFG_REQ_ENABLES2_PCIE_COMPLIANCE 0x1000UL
u8 port_kdnet_mode;
#define FUNC_CFG_REQ_PORT_KDNET_MODE_DISABLED 0x0UL
#define FUNC_CFG_REQ_PORT_KDNET_MODE_ENABLED 0x1UL
@@ -2353,7 +2411,8 @@ struct hwrm_func_cfg_input {
__le16 xid_partition_cfg;
#define FUNC_CFG_REQ_XID_PARTITION_CFG_TX_CK 0x1UL
#define FUNC_CFG_REQ_XID_PARTITION_CFG_RX_CK 0x2UL
- __le16 unused_2;
+ u8 pcie_compliance;
+ u8 unused_2;
};
/* hwrm_func_cfg_output (size:128b/16B) */
@@ -2370,11 +2429,41 @@ struct hwrm_func_cfg_output {
struct hwrm_func_cfg_cmd_err {
u8 code;
#define FUNC_CFG_CMD_ERR_CODE_UNKNOWN 0x0UL
- #define FUNC_CFG_CMD_ERR_CODE_PARTITION_MIN_BW_RANGE 0x1UL
- #define FUNC_CFG_CMD_ERR_CODE_PARTITION_MIN_MORE_THAN_MAX 0x2UL
- #define FUNC_CFG_CMD_ERR_CODE_PARTITION_MIN_BW_UNSUPPORTED 0x3UL
- #define FUNC_CFG_CMD_ERR_CODE_PARTITION_BW_PERCENT 0x4UL
- #define FUNC_CFG_CMD_ERR_CODE_LAST FUNC_CFG_CMD_ERR_CODE_PARTITION_BW_PERCENT
+ #define FUNC_CFG_CMD_ERR_CODE_PARTITION_BW_OUT_OF_RANGE 0x1UL
+ #define FUNC_CFG_CMD_ERR_CODE_NPAR_PARTITION_DOWN_FAILED 0x2UL
+ #define FUNC_CFG_CMD_ERR_CODE_TPID_SET_DFLT_VLAN_NOT_SET 0x3UL
+ #define FUNC_CFG_CMD_ERR_CODE_RES_ARRAY_ALLOC_FAILED 0x4UL
+ #define FUNC_CFG_CMD_ERR_CODE_TX_RING_ASSET_TEST_FAILED 0x5UL
+ #define FUNC_CFG_CMD_ERR_CODE_TX_RING_RES_UPDATE_FAILED 0x6UL
+ #define FUNC_CFG_CMD_ERR_CODE_APPLY_MAX_BW_FAILED 0x7UL
+ #define FUNC_CFG_CMD_ERR_CODE_ENABLE_EVB_FAILED 0x8UL
+ #define FUNC_CFG_CMD_ERR_CODE_RSS_CTXT_ASSET_TEST_FAILED 0x9UL
+ #define FUNC_CFG_CMD_ERR_CODE_RSS_CTXT_RES_UPDATE_FAILED 0xaUL
+ #define FUNC_CFG_CMD_ERR_CODE_CMPL_RING_ASSET_TEST_FAILED 0xbUL
+ #define FUNC_CFG_CMD_ERR_CODE_CMPL_RING_RES_UPDATE_FAILED 0xcUL
+ #define FUNC_CFG_CMD_ERR_CODE_NQ_ASSET_TEST_FAILED 0xdUL
+ #define FUNC_CFG_CMD_ERR_CODE_NQ_RES_UPDATE_FAILED 0xeUL
+ #define FUNC_CFG_CMD_ERR_CODE_RX_RING_ASSET_TEST_FAILED 0xfUL
+ #define FUNC_CFG_CMD_ERR_CODE_RX_RING_RES_UPDATE_FAILED 0x10UL
+ #define FUNC_CFG_CMD_ERR_CODE_VNIC_ASSET_TEST_FAILED 0x11UL
+ #define FUNC_CFG_CMD_ERR_CODE_VNIC_RES_UPDATE_FAILED 0x12UL
+ #define FUNC_CFG_CMD_ERR_CODE_FAILED_TO_START_STATS_THREAD 0x13UL
+ #define FUNC_CFG_CMD_ERR_CODE_RDMA_SRIOV_DISABLED 0x14UL
+ #define FUNC_CFG_CMD_ERR_CODE_TX_KTLS_DISABLED 0x15UL
+ #define FUNC_CFG_CMD_ERR_CODE_TX_KTLS_ASSET_TEST_FAILED 0x16UL
+ #define FUNC_CFG_CMD_ERR_CODE_TX_KTLS_RES_UPDATE_FAILED 0x17UL
+ #define FUNC_CFG_CMD_ERR_CODE_RX_KTLS_DISABLED 0x18UL
+ #define FUNC_CFG_CMD_ERR_CODE_RX_KTLS_ASSET_TEST_FAILED 0x19UL
+ #define FUNC_CFG_CMD_ERR_CODE_RX_KTLS_RES_UPDATE_FAILED 0x1aUL
+ #define FUNC_CFG_CMD_ERR_CODE_TX_QUIC_DISABLED 0x1bUL
+ #define FUNC_CFG_CMD_ERR_CODE_TX_QUIC_ASSET_TEST_FAILED 0x1cUL
+ #define FUNC_CFG_CMD_ERR_CODE_TX_QUIC_RES_UPDATE_FAILED 0x1dUL
+ #define FUNC_CFG_CMD_ERR_CODE_RX_QUIC_DISABLED 0x1eUL
+ #define FUNC_CFG_CMD_ERR_CODE_RX_QUIC_ASSET_TEST_FAILED 0x1fUL
+ #define FUNC_CFG_CMD_ERR_CODE_RX_QUIC_RES_UPDATE_FAILED 0x20UL
+ #define FUNC_CFG_CMD_ERR_CODE_INVALID_KDNET_MODE 0x21UL
+ #define FUNC_CFG_CMD_ERR_CODE_SCHQ_CFG_FAIL 0x22UL
+ #define FUNC_CFG_CMD_ERR_CODE_LAST FUNC_CFG_CMD_ERR_CODE_SCHQ_CFG_FAIL
u8 unused_0[7];
};
@@ -3780,6 +3869,7 @@ struct hwrm_func_backing_store_cfg_v2_input {
#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CA2_TRACE 0x28UL
#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_RIGP1_TRACE 0x29UL
#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_AFM_KONG_HWRM_TRACE 0x2aUL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_ERR_QPC_TRACE 0x2bUL
#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_INVALID 0xffffUL
#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_LAST FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_INVALID
__le16 instance;
@@ -3865,6 +3955,7 @@ struct hwrm_func_backing_store_qcfg_v2_input {
#define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_CA2_TRACE 0x28UL
#define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_RIGP1_TRACE 0x29UL
#define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_AFM_KONG_HWRM_TRACE 0x2aUL
+ #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_ERR_QPC_TRACE 0x2bUL
#define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_INVALID 0xffffUL
#define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_LAST FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_INVALID
__le16 instance;
@@ -3904,6 +3995,7 @@ struct hwrm_func_backing_store_qcfg_v2_output {
#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_CA1_TRACE 0x27UL
#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_CA2_TRACE 0x28UL
#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_RIGP1_TRACE 0x29UL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_ERR_QPC_TRACE 0x2aUL
#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_INVALID 0xffffUL
#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_LAST FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_INVALID
__le16 instance;
@@ -4027,6 +4119,7 @@ struct hwrm_func_backing_store_qcaps_v2_input {
#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CA2_TRACE 0x28UL
#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_RIGP1_TRACE 0x29UL
#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_AFM_KONG_HWRM_TRACE 0x2aUL
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_ERR_QPC_TRACE 0x2bUL
#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_INVALID 0xffffUL
#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_LAST FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_INVALID
u8 rsvd[6];
@@ -4070,6 +4163,7 @@ struct hwrm_func_backing_store_qcaps_v2_output {
#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CA2_TRACE 0x28UL
#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_RIGP1_TRACE 0x29UL
#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_AFM_KONG_HWRM_TRACE 0x2aUL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_ERR_QPC_TRACE 0x2bUL
#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_INVALID 0xffffUL
#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_LAST FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_INVALID
__le16 entry_size;
@@ -4216,6 +4310,10 @@ struct hwrm_port_phy_cfg_input {
#define PORT_PHY_CFG_REQ_FLAGS_FEC_RS272_1XN_DISABLE 0x100000UL
#define PORT_PHY_CFG_REQ_FLAGS_FEC_RS272_IEEE_ENABLE 0x200000UL
#define PORT_PHY_CFG_REQ_FLAGS_FEC_RS272_IEEE_DISABLE 0x400000UL
+ #define PORT_PHY_CFG_REQ_FLAGS_LINK_TRAINING_ENABLE 0x800000UL
+ #define PORT_PHY_CFG_REQ_FLAGS_LINK_TRAINING_DISABLE 0x1000000UL
+ #define PORT_PHY_CFG_REQ_FLAGS_PRECODING_ENABLE 0x2000000UL
+ #define PORT_PHY_CFG_REQ_FLAGS_PRECODING_DISABLE 0x4000000UL
__le32 enables;
#define PORT_PHY_CFG_REQ_ENABLES_AUTO_MODE 0x1UL
#define PORT_PHY_CFG_REQ_ENABLES_AUTO_DUPLEX 0x2UL
@@ -4703,6 +4801,8 @@ struct hwrm_port_phy_qcfg_output {
#define PORT_PHY_QCFG_RESP_OPTION_FLAGS_MEDIA_AUTO_DETECT 0x1UL
#define PORT_PHY_QCFG_RESP_OPTION_FLAGS_SIGNAL_MODE_KNOWN 0x2UL
#define PORT_PHY_QCFG_RESP_OPTION_FLAGS_SPEEDS2_SUPPORTED 0x4UL
+ #define PORT_PHY_QCFG_RESP_OPTION_FLAGS_LINK_TRAINING 0x8UL
+ #define PORT_PHY_QCFG_RESP_OPTION_FLAGS_PRECODING 0x10UL
char phy_vendor_name[16];
char phy_vendor_partnumber[16];
__le16 support_pam4_speeds;
@@ -4725,6 +4825,10 @@ struct hwrm_port_phy_qcfg_output {
u8 link_down_reason;
#define PORT_PHY_QCFG_RESP_LINK_DOWN_REASON_RF 0x1UL
#define PORT_PHY_QCFG_RESP_LINK_DOWN_REASON_OTP_SPEED_VIOLATION 0x2UL
+ #define PORT_PHY_QCFG_RESP_LINK_DOWN_REASON_CABLE_REMOVED 0x4UL
+ #define PORT_PHY_QCFG_RESP_LINK_DOWN_REASON_MODULE_FAULT 0x8UL
+ #define PORT_PHY_QCFG_RESP_LINK_DOWN_REASON_BMC_REQUEST 0x10UL
+ #define PORT_PHY_QCFG_RESP_LINK_DOWN_REASON_TX_LASER_DISABLED 0x20UL
__le16 support_speeds2;
#define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS2_1GB 0x1UL
#define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS2_10GB 0x2UL
@@ -5882,9 +5986,10 @@ struct hwrm_port_led_qcaps_output {
#define PORT_LED_QCAPS_RESP_LED0_STATE_CAPS_BLINK_SUPPORTED 0x8UL
#define PORT_LED_QCAPS_RESP_LED0_STATE_CAPS_BLINK_ALT_SUPPORTED 0x10UL
__le16 led0_color_caps;
- #define PORT_LED_QCAPS_RESP_LED0_COLOR_CAPS_RSVD 0x1UL
- #define PORT_LED_QCAPS_RESP_LED0_COLOR_CAPS_AMBER_SUPPORTED 0x2UL
- #define PORT_LED_QCAPS_RESP_LED0_COLOR_CAPS_GREEN_SUPPORTED 0x4UL
+ #define PORT_LED_QCAPS_RESP_LED0_COLOR_CAPS_RSVD 0x1UL
+ #define PORT_LED_QCAPS_RESP_LED0_COLOR_CAPS_AMBER_SUPPORTED 0x2UL
+ #define PORT_LED_QCAPS_RESP_LED0_COLOR_CAPS_GREEN_SUPPORTED 0x4UL
+ #define PORT_LED_QCAPS_RESP_LED0_COLOR_CAPS_GRNAMB_SUPPORTED 0x8UL
u8 led1_id;
u8 led1_type;
#define PORT_LED_QCAPS_RESP_LED1_TYPE_SPEED 0x0UL
@@ -5900,9 +6005,10 @@ struct hwrm_port_led_qcaps_output {
#define PORT_LED_QCAPS_RESP_LED1_STATE_CAPS_BLINK_SUPPORTED 0x8UL
#define PORT_LED_QCAPS_RESP_LED1_STATE_CAPS_BLINK_ALT_SUPPORTED 0x10UL
__le16 led1_color_caps;
- #define PORT_LED_QCAPS_RESP_LED1_COLOR_CAPS_RSVD 0x1UL
- #define PORT_LED_QCAPS_RESP_LED1_COLOR_CAPS_AMBER_SUPPORTED 0x2UL
- #define PORT_LED_QCAPS_RESP_LED1_COLOR_CAPS_GREEN_SUPPORTED 0x4UL
+ #define PORT_LED_QCAPS_RESP_LED1_COLOR_CAPS_RSVD 0x1UL
+ #define PORT_LED_QCAPS_RESP_LED1_COLOR_CAPS_AMBER_SUPPORTED 0x2UL
+ #define PORT_LED_QCAPS_RESP_LED1_COLOR_CAPS_GREEN_SUPPORTED 0x4UL
+ #define PORT_LED_QCAPS_RESP_LED1_COLOR_CAPS_GRNAMB_SUPPORTED 0x8UL
u8 led2_id;
u8 led2_type;
#define PORT_LED_QCAPS_RESP_LED2_TYPE_SPEED 0x0UL
@@ -5918,9 +6024,10 @@ struct hwrm_port_led_qcaps_output {
#define PORT_LED_QCAPS_RESP_LED2_STATE_CAPS_BLINK_SUPPORTED 0x8UL
#define PORT_LED_QCAPS_RESP_LED2_STATE_CAPS_BLINK_ALT_SUPPORTED 0x10UL
__le16 led2_color_caps;
- #define PORT_LED_QCAPS_RESP_LED2_COLOR_CAPS_RSVD 0x1UL
- #define PORT_LED_QCAPS_RESP_LED2_COLOR_CAPS_AMBER_SUPPORTED 0x2UL
- #define PORT_LED_QCAPS_RESP_LED2_COLOR_CAPS_GREEN_SUPPORTED 0x4UL
+ #define PORT_LED_QCAPS_RESP_LED2_COLOR_CAPS_RSVD 0x1UL
+ #define PORT_LED_QCAPS_RESP_LED2_COLOR_CAPS_AMBER_SUPPORTED 0x2UL
+ #define PORT_LED_QCAPS_RESP_LED2_COLOR_CAPS_GREEN_SUPPORTED 0x4UL
+ #define PORT_LED_QCAPS_RESP_LED2_COLOR_CAPS_GRNAMB_SUPPORTED 0x8UL
u8 led3_id;
u8 led3_type;
#define PORT_LED_QCAPS_RESP_LED3_TYPE_SPEED 0x0UL
@@ -5936,9 +6043,10 @@ struct hwrm_port_led_qcaps_output {
#define PORT_LED_QCAPS_RESP_LED3_STATE_CAPS_BLINK_SUPPORTED 0x8UL
#define PORT_LED_QCAPS_RESP_LED3_STATE_CAPS_BLINK_ALT_SUPPORTED 0x10UL
__le16 led3_color_caps;
- #define PORT_LED_QCAPS_RESP_LED3_COLOR_CAPS_RSVD 0x1UL
- #define PORT_LED_QCAPS_RESP_LED3_COLOR_CAPS_AMBER_SUPPORTED 0x2UL
- #define PORT_LED_QCAPS_RESP_LED3_COLOR_CAPS_GREEN_SUPPORTED 0x4UL
+ #define PORT_LED_QCAPS_RESP_LED3_COLOR_CAPS_RSVD 0x1UL
+ #define PORT_LED_QCAPS_RESP_LED3_COLOR_CAPS_AMBER_SUPPORTED 0x2UL
+ #define PORT_LED_QCAPS_RESP_LED3_COLOR_CAPS_GREEN_SUPPORTED 0x4UL
+ #define PORT_LED_QCAPS_RESP_LED3_COLOR_CAPS_GRNAMB_SUPPORTED 0x8UL
u8 unused_4[3];
u8 valid;
};
@@ -7036,9 +7144,22 @@ struct hwrm_vnic_rss_cfg_output {
/* hwrm_vnic_rss_cfg_cmd_err (size:64b/8B) */
struct hwrm_vnic_rss_cfg_cmd_err {
u8 code;
- #define VNIC_RSS_CFG_CMD_ERR_CODE_UNKNOWN 0x0UL
- #define VNIC_RSS_CFG_CMD_ERR_CODE_INTERFACE_NOT_READY 0x1UL
- #define VNIC_RSS_CFG_CMD_ERR_CODE_LAST VNIC_RSS_CFG_CMD_ERR_CODE_INTERFACE_NOT_READY
+ #define VNIC_RSS_CFG_CMD_ERR_CODE_UNKNOWN 0x0UL
+ #define VNIC_RSS_CFG_CMD_ERR_CODE_INTERFACE_NOT_READY 0x1UL
+ #define VNIC_RSS_CFG_CMD_ERR_CODE_UNABLE_TO_GET_RSS_CFG 0x2UL
+ #define VNIC_RSS_CFG_CMD_ERR_CODE_HASH_TYPE_UNSUPPORTED 0x3UL
+ #define VNIC_RSS_CFG_CMD_ERR_CODE_HASH_TYPE_ERR 0x4UL
+ #define VNIC_RSS_CFG_CMD_ERR_CODE_HASH_MODE_FAIL 0x5UL
+ #define VNIC_RSS_CFG_CMD_ERR_CODE_RING_GRP_TABLE_ALLOC_ERR 0x6UL
+ #define VNIC_RSS_CFG_CMD_ERR_CODE_HASH_KEY_ALLOC_ERR 0x7UL
+ #define VNIC_RSS_CFG_CMD_ERR_CODE_DMA_FAILED 0x8UL
+ #define VNIC_RSS_CFG_CMD_ERR_CODE_RX_RING_ALLOC_ERR 0x9UL
+ #define VNIC_RSS_CFG_CMD_ERR_CODE_CMPL_RING_ALLOC_ERR 0xaUL
+ #define VNIC_RSS_CFG_CMD_ERR_CODE_HW_SET_RSS_FAILED 0xbUL
+ #define VNIC_RSS_CFG_CMD_ERR_CODE_CTX_INVALID 0xcUL
+ #define VNIC_RSS_CFG_CMD_ERR_CODE_VNIC_INVALID 0xdUL
+ #define VNIC_RSS_CFG_CMD_ERR_CODE_VNIC_RING_TABLE_PAIR_INVALID 0xeUL
+ #define VNIC_RSS_CFG_CMD_ERR_CODE_LAST VNIC_RSS_CFG_CMD_ERR_CODE_VNIC_RING_TABLE_PAIR_INVALID
u8 unused_0[7];
};
@@ -7177,7 +7298,7 @@ struct hwrm_vnic_rss_cos_lb_ctx_free_output {
u8 valid;
};
-/* hwrm_ring_alloc_input (size:704b/88B) */
+/* hwrm_ring_alloc_input (size:768b/96B) */
struct hwrm_ring_alloc_input {
__le16 req_type;
__le16 cmpl_ring;
@@ -7195,6 +7316,7 @@ struct hwrm_ring_alloc_input {
#define RING_ALLOC_REQ_ENABLES_MPC_CHNLS_TYPE 0x400UL
#define RING_ALLOC_REQ_ENABLES_STEERING_TAG_VALID 0x800UL
#define RING_ALLOC_REQ_ENABLES_RX_RATE_PROFILE_VALID 0x1000UL
+ #define RING_ALLOC_REQ_ENABLES_DPI_VALID 0x2000UL
u8 ring_type;
#define RING_ALLOC_REQ_RING_TYPE_L2_CMPL 0x0UL
#define RING_ALLOC_REQ_RING_TYPE_TX 0x1UL
@@ -7287,6 +7409,8 @@ struct hwrm_ring_alloc_input {
#define RING_ALLOC_REQ_RX_RATE_PROFILE_SEL_LAST RING_ALLOC_REQ_RX_RATE_PROFILE_SEL_POLL_MODE
u8 unused_4;
__le64 cq_handle;
+ __le16 dpi;
+ __le16 unused_5[3];
};
/* hwrm_ring_alloc_output (size:128b/16B) */
@@ -7776,7 +7900,10 @@ struct hwrm_cfa_l2_set_rx_mask_cmd_err {
u8 code;
#define CFA_L2_SET_RX_MASK_CMD_ERR_CODE_UNKNOWN 0x0UL
#define CFA_L2_SET_RX_MASK_CMD_ERR_CODE_NTUPLE_FILTER_CONFLICT_ERR 0x1UL
- #define CFA_L2_SET_RX_MASK_CMD_ERR_CODE_LAST CFA_L2_SET_RX_MASK_CMD_ERR_CODE_NTUPLE_FILTER_CONFLICT_ERR
+ #define CFA_L2_SET_RX_MASK_CMD_ERR_CODE_MAX_VLAN_TAGS 0x2UL
+ #define CFA_L2_SET_RX_MASK_CMD_ERR_CODE_INVALID_VNIC_ID 0x3UL
+ #define CFA_L2_SET_RX_MASK_CMD_ERR_CODE_INVALID_ACTION 0x4UL
+ #define CFA_L2_SET_RX_MASK_CMD_ERR_CODE_LAST CFA_L2_SET_RX_MASK_CMD_ERR_CODE_INVALID_ACTION
u8 unused_0[7];
};
@@ -8109,9 +8236,38 @@ struct hwrm_cfa_ntuple_filter_alloc_output {
/* hwrm_cfa_ntuple_filter_alloc_cmd_err (size:64b/8B) */
struct hwrm_cfa_ntuple_filter_alloc_cmd_err {
u8 code;
- #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_UNKNOWN 0x0UL
- #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_RX_MASK_VLAN_CONFLICT_ERR 0x1UL
- #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_LAST CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_RX_MASK_VLAN_CONFLICT_ERR
+ #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_UNKNOWN 0x0UL
+ #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_ZERO_MAC 0x65UL
+ #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_BC_MC_MAC 0x66UL
+ #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_VNIC 0x67UL
+ #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_PF_FID 0x68UL
+ #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_L2_CTXT_ID 0x69UL
+ #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_NULL_L2_CTXT_CFG 0x6aUL
+ #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_NULL_L2_DATA_FLD 0x6bUL
+ #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_CFA_LAYOUT 0x6cUL
+ #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_L2_CTXT_ALLOC_FAIL 0x6dUL
+ #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_ROCE_FLOW_ERR 0x6eUL
+ #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_OWNER_FID 0x6fUL
+ #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_ZERO_REF_CNT 0x70UL
+ #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_FLOW_TYPE 0x71UL
+ #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_IVLAN 0x72UL
+ #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_MAX_VLAN_ID 0x73UL
+ #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_TNL_REQ 0x74UL
+ #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_L2_ADDR 0x75UL
+ #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_L2_IVLAN 0x76UL
+ #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_L3_ADDR 0x77UL
+ #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_L3_ADDR_TYPE 0x78UL
+ #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_T_L3_ADDR_TYPE 0x79UL
+ #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_DST_VNIC_ID 0x7aUL
+ #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_VNI 0x7bUL
+ #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_DST_ID 0x7cUL
+ #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_FAIL_ROCE_L2_FLOW 0x7dUL
+ #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_NPAR_VLAN 0x7eUL
+ #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_ATSP_ADD 0x7fUL
+ #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_DFLT_VLAN_FAIL 0x80UL
+ #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_L3_TYPE 0x81UL
+ #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_VAL_FAIL_TNL_FLOW 0x82UL
+ #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_LAST CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_VAL_FAIL_TNL_FLOW
u8 unused_0[7];
};
@@ -9181,7 +9337,7 @@ struct pcie_ctx_hw_stats {
__le64 pcie_recovery_histogram;
};
-/* pcie_ctx_hw_stats_v2 (size:4096b/512B) */
+/* pcie_ctx_hw_stats_v2 (size:4544b/568B) */
struct pcie_ctx_hw_stats_v2 {
__le64 pcie_pl_signal_integrity;
__le64 pcie_dl_signal_integrity;
@@ -9212,6 +9368,9 @@ struct pcie_ctx_hw_stats_v2 {
__le64 pcie_other_packet_count;
__le64 pcie_blocked_packet_count;
__le64 pcie_cmpl_packet_count;
+ __le32 pcie_rd_latency_histogram[12];
+ __le32 pcie_rd_latency_all_normal_count;
+ __le32 unused_2;
};
/* hwrm_stat_generic_qstats_input (size:256b/32B) */
@@ -9406,7 +9565,8 @@ struct hwrm_struct_hdr {
#define STRUCT_HDR_STRUCT_ID_MSIX_PER_VF 0xc8UL
#define STRUCT_HDR_STRUCT_ID_UDCC_RTT_BUCKET_COUNT 0x12cUL
#define STRUCT_HDR_STRUCT_ID_UDCC_RTT_BUCKET_BOUND 0x12dUL
- #define STRUCT_HDR_STRUCT_ID_LAST STRUCT_HDR_STRUCT_ID_UDCC_RTT_BUCKET_BOUND
+ #define STRUCT_HDR_STRUCT_ID_DBG_TOKEN_CLAIMS 0x190UL
+ #define STRUCT_HDR_STRUCT_ID_LAST STRUCT_HDR_STRUCT_ID_DBG_TOKEN_CLAIMS
__le16 len;
u8 version;
#define STRUCT_HDR_VERSION_0 0x0UL
@@ -9459,11 +9619,13 @@ struct hwrm_fw_set_structured_data_output {
/* hwrm_fw_set_structured_data_cmd_err (size:64b/8B) */
struct hwrm_fw_set_structured_data_cmd_err {
u8 code;
- #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_UNKNOWN 0x0UL
- #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_HDR_CNT 0x1UL
- #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_FMT 0x2UL
- #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_ID 0x3UL
- #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_LAST FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_ID
+ #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_UNKNOWN 0x0UL
+ #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_HDR_CNT 0x1UL
+ #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_FMT 0x2UL
+ #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_ID 0x3UL
+ #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_ALREADY_ADDED 0x4UL
+ #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_INST_IN_PROG 0x5UL
+ #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_LAST FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_INST_IN_PROG
u8 unused_0[7];
};
@@ -9487,7 +9649,9 @@ struct hwrm_fw_get_structured_data_input {
#define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_NON_TPMR_PEER 0x201UL
#define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_NON_TPMR_OPERATIONAL 0x202UL
#define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_HOST_OPERATIONAL 0x300UL
- #define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_LAST FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_HOST_OPERATIONAL
+ #define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_CLAIMS_SUPPORTED 0x320UL
+ #define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_CLAIMS_ACTIVE 0x321UL
+ #define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_LAST FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_CLAIMS_ACTIVE
u8 count;
u8 unused_0;
};
@@ -10172,7 +10336,8 @@ struct hwrm_dbg_log_buffer_flush_input {
#define DBG_LOG_BUFFER_FLUSH_REQ_TYPE_CA2_TRACE 0x9UL
#define DBG_LOG_BUFFER_FLUSH_REQ_TYPE_RIGP1_TRACE 0xaUL
#define DBG_LOG_BUFFER_FLUSH_REQ_TYPE_AFM_KONG_HWRM_TRACE 0xbUL
- #define DBG_LOG_BUFFER_FLUSH_REQ_TYPE_LAST DBG_LOG_BUFFER_FLUSH_REQ_TYPE_AFM_KONG_HWRM_TRACE
+ #define DBG_LOG_BUFFER_FLUSH_REQ_TYPE_ERR_QPC_TRACE 0xcUL
+ #define DBG_LOG_BUFFER_FLUSH_REQ_TYPE_LAST DBG_LOG_BUFFER_FLUSH_REQ_TYPE_ERR_QPC_TRACE
u8 unused_1[2];
__le32 flags;
#define DBG_LOG_BUFFER_FLUSH_REQ_FLAGS_FLUSH_ALL_BUFFERS 0x1UL
@@ -10295,10 +10460,15 @@ struct hwrm_nvm_write_output {
/* hwrm_nvm_write_cmd_err (size:64b/8B) */
struct hwrm_nvm_write_cmd_err {
u8 code;
- #define NVM_WRITE_CMD_ERR_CODE_UNKNOWN 0x0UL
- #define NVM_WRITE_CMD_ERR_CODE_FRAG_ERR 0x1UL
- #define NVM_WRITE_CMD_ERR_CODE_NO_SPACE 0x2UL
- #define NVM_WRITE_CMD_ERR_CODE_LAST NVM_WRITE_CMD_ERR_CODE_NO_SPACE
+ #define NVM_WRITE_CMD_ERR_CODE_UNKNOWN 0x0UL
+ #define NVM_WRITE_CMD_ERR_CODE_FRAG_ERR 0x1UL
+ #define NVM_WRITE_CMD_ERR_CODE_NO_SPACE 0x2UL
+ #define NVM_WRITE_CMD_ERR_CODE_WRITE_FAILED 0x3UL
+ #define NVM_WRITE_CMD_ERR_CODE_REQD_ERASE_FAILED 0x4UL
+ #define NVM_WRITE_CMD_ERR_CODE_VERIFY_FAILED 0x5UL
+ #define NVM_WRITE_CMD_ERR_CODE_INVALID_HEADER 0x6UL
+ #define NVM_WRITE_CMD_ERR_CODE_UPDATE_DIGEST_FAILED 0x7UL
+ #define NVM_WRITE_CMD_ERR_CODE_LAST NVM_WRITE_CMD_ERR_CODE_UPDATE_DIGEST_FAILED
u8 unused_0[7];
};
@@ -10438,7 +10608,11 @@ struct hwrm_nvm_get_dev_info_output {
__le16 srt2_fw_minor;
__le16 srt2_fw_build;
__le16 srt2_fw_patch;
- u8 unused_0[7];
+ u8 security_soc_fw_major;
+ u8 security_soc_fw_minor;
+ u8 security_soc_fw_build;
+ u8 security_soc_fw_patch;
+ u8 unused_0[3];
u8 valid;
};
@@ -10568,7 +10742,9 @@ struct hwrm_nvm_install_update_cmd_err {
#define NVM_INSTALL_UPDATE_CMD_ERR_CODE_NO_SPACE 0x2UL
#define NVM_INSTALL_UPDATE_CMD_ERR_CODE_ANTI_ROLLBACK 0x3UL
#define NVM_INSTALL_UPDATE_CMD_ERR_CODE_NO_VOLTREG_SUPPORT 0x4UL
- #define NVM_INSTALL_UPDATE_CMD_ERR_CODE_LAST NVM_INSTALL_UPDATE_CMD_ERR_CODE_NO_VOLTREG_SUPPORT
+ #define NVM_INSTALL_UPDATE_CMD_ERR_CODE_DEFRAG_FAILED 0x5UL
+ #define NVM_INSTALL_UPDATE_CMD_ERR_CODE_UNKNOWN_DIR_ERR 0x6UL
+ #define NVM_INSTALL_UPDATE_CMD_ERR_CODE_LAST NVM_INSTALL_UPDATE_CMD_ERR_CODE_UNKNOWN_DIR_ERR
u8 unused_0[7];
};
@@ -10591,7 +10767,8 @@ struct hwrm_nvm_get_variable_input {
__le16 index_2;
__le16 index_3;
u8 flags;
- #define NVM_GET_VARIABLE_REQ_FLAGS_FACTORY_DFLT 0x1UL
+ #define NVM_GET_VARIABLE_REQ_FLAGS_FACTORY_DFLT 0x1UL
+ #define NVM_GET_VARIABLE_REQ_FLAGS_VALIDATE_OPT_VALUE 0x2UL
u8 unused_0;
};
@@ -10606,18 +10783,25 @@ struct hwrm_nvm_get_variable_output {
#define NVM_GET_VARIABLE_RESP_OPTION_NUM_RSVD_0 0x0UL
#define NVM_GET_VARIABLE_RESP_OPTION_NUM_RSVD_FFFF 0xffffUL
#define NVM_GET_VARIABLE_RESP_OPTION_NUM_LAST NVM_GET_VARIABLE_RESP_OPTION_NUM_RSVD_FFFF
- u8 unused_0[3];
+ u8 flags;
+ #define NVM_GET_VARIABLE_RESP_FLAGS_VALIDATE_OPT_VALUE 0x1UL
+ u8 unused_0[2];
u8 valid;
};
/* hwrm_nvm_get_variable_cmd_err (size:64b/8B) */
struct hwrm_nvm_get_variable_cmd_err {
u8 code;
- #define NVM_GET_VARIABLE_CMD_ERR_CODE_UNKNOWN 0x0UL
- #define NVM_GET_VARIABLE_CMD_ERR_CODE_VAR_NOT_EXIST 0x1UL
- #define NVM_GET_VARIABLE_CMD_ERR_CODE_CORRUPT_VAR 0x2UL
- #define NVM_GET_VARIABLE_CMD_ERR_CODE_LEN_TOO_SHORT 0x3UL
- #define NVM_GET_VARIABLE_CMD_ERR_CODE_LAST NVM_GET_VARIABLE_CMD_ERR_CODE_LEN_TOO_SHORT
+ #define NVM_GET_VARIABLE_CMD_ERR_CODE_UNKNOWN 0x0UL
+ #define NVM_GET_VARIABLE_CMD_ERR_CODE_VAR_NOT_EXIST 0x1UL
+ #define NVM_GET_VARIABLE_CMD_ERR_CODE_CORRUPT_VAR 0x2UL
+ #define NVM_GET_VARIABLE_CMD_ERR_CODE_LEN_TOO_SHORT 0x3UL
+ #define NVM_GET_VARIABLE_CMD_ERR_CODE_INDEX_INVALID 0x4UL
+ #define NVM_GET_VARIABLE_CMD_ERR_CODE_ACCESS_DENIED 0x5UL
+ #define NVM_GET_VARIABLE_CMD_ERR_CODE_CB_FAILED 0x6UL
+ #define NVM_GET_VARIABLE_CMD_ERR_CODE_INVALID_DATA_LEN 0x7UL
+ #define NVM_GET_VARIABLE_CMD_ERR_CODE_NO_MEM 0x8UL
+ #define NVM_GET_VARIABLE_CMD_ERR_CODE_LAST NVM_GET_VARIABLE_CMD_ERR_CODE_NO_MEM
u8 unused_0[7];
};
@@ -10667,10 +10851,17 @@ struct hwrm_nvm_set_variable_output {
/* hwrm_nvm_set_variable_cmd_err (size:64b/8B) */
struct hwrm_nvm_set_variable_cmd_err {
u8 code;
- #define NVM_SET_VARIABLE_CMD_ERR_CODE_UNKNOWN 0x0UL
- #define NVM_SET_VARIABLE_CMD_ERR_CODE_VAR_NOT_EXIST 0x1UL
- #define NVM_SET_VARIABLE_CMD_ERR_CODE_CORRUPT_VAR 0x2UL
- #define NVM_SET_VARIABLE_CMD_ERR_CODE_LAST NVM_SET_VARIABLE_CMD_ERR_CODE_CORRUPT_VAR
+ #define NVM_SET_VARIABLE_CMD_ERR_CODE_UNKNOWN 0x0UL
+ #define NVM_SET_VARIABLE_CMD_ERR_CODE_VAR_NOT_EXIST 0x1UL
+ #define NVM_SET_VARIABLE_CMD_ERR_CODE_CORRUPT_VAR 0x2UL
+ #define NVM_SET_VARIABLE_CMD_ERR_CODE_LEN_TOO_SHORT 0x3UL
+ #define NVM_SET_VARIABLE_CMD_ERR_CODE_ACTION_NOT_SUPPORTED 0x4UL
+ #define NVM_SET_VARIABLE_CMD_ERR_CODE_INDEX_INVALID 0x5UL
+ #define NVM_SET_VARIABLE_CMD_ERR_CODE_ACCESS_DENIED 0x6UL
+ #define NVM_SET_VARIABLE_CMD_ERR_CODE_CB_FAILED 0x7UL
+ #define NVM_SET_VARIABLE_CMD_ERR_CODE_INVALID_DATA_LEN 0x8UL
+ #define NVM_SET_VARIABLE_CMD_ERR_CODE_NO_MEM 0x9UL
+ #define NVM_SET_VARIABLE_CMD_ERR_CODE_LAST NVM_SET_VARIABLE_CMD_ERR_CODE_NO_MEM
u8 unused_0[7];
};
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index cc700925b802..ec527b476dba 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -767,12 +767,15 @@ enum bpf_type_flag {
*/
MEM_WRITE = BIT(18 + BPF_BASE_TYPE_BITS),
+ /* DYNPTR points to skb_metadata_end()-skb_metadata_len() */
+ DYNPTR_TYPE_SKB_META = BIT(19 + BPF_BASE_TYPE_BITS),
+
__BPF_TYPE_FLAG_MAX,
__BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1,
};
#define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF | DYNPTR_TYPE_SKB \
- | DYNPTR_TYPE_XDP)
+ | DYNPTR_TYPE_XDP | DYNPTR_TYPE_SKB_META)
/* Max number of base types. */
#define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS)
@@ -1358,6 +1361,8 @@ enum bpf_dynptr_type {
BPF_DYNPTR_TYPE_SKB,
/* Underlying data is a xdp_buff */
BPF_DYNPTR_TYPE_XDP,
+ /* Points to skb_metadata_end()-skb_metadata_len() */
+ BPF_DYNPTR_TYPE_SKB_META,
};
int bpf_dynptr_check_size(u32 size);
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 1e7fd3ee759e..9ed21b65e2e9 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1784,6 +1784,7 @@ int __bpf_xdp_store_bytes(struct xdp_buff *xdp, u32 offset, void *buf, u32 len);
void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len);
void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off,
void *buf, unsigned long len, bool flush);
+void *bpf_skb_meta_pointer(struct sk_buff *skb, u32 offset);
#else /* CONFIG_NET */
static inline int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset,
void *to, u32 len)
@@ -1818,6 +1819,11 @@ static inline void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, voi
unsigned long len, bool flush)
{
}
+
+static inline void *bpf_skb_meta_pointer(struct sk_buff *skb, u32 offset)
+{
+ return NULL;
+}
#endif /* CONFIG_NET */
#endif /* __LINUX_FILTER_H__ */
diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h
index ff3beda1312c..db45d6f1c4f4 100644
--- a/include/linux/if_pppox.h
+++ b/include/linux/if_pppox.h
@@ -43,7 +43,7 @@ struct pppox_sock {
/* struct sock must be the first member of pppox_sock */
struct sock sk;
struct ppp_channel chan;
- struct pppox_sock *next; /* for hash table */
+ struct pppox_sock __rcu *next; /* for hash table */
union {
struct pppoe_opt pppoe;
struct pptp_opt pptp;
diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index a9033696b0aa..704fd415c2b4 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -24,9 +24,6 @@ struct inet_diag_handler {
bool net_admin,
struct sk_buff *skb);
- size_t (*idiag_get_aux_size)(struct sock *sk,
- bool net_admin);
-
int (*destroy)(struct sk_buff *in_skb,
const struct inet_diag_req_v2 *req);
@@ -41,6 +38,11 @@ struct inet_diag_dump_data {
#define inet_diag_nla_bpf_stgs req_nlas[INET_DIAG_REQ_SK_BPF_STORAGES]
struct bpf_sk_storage_diag *bpf_stg_diag;
+ bool mark_needed; /* INET_DIAG_BC_MARK_COND present. */
+#ifdef CONFIG_SOCK_CGROUP_DATA
+ bool cgroup_needed; /* INET_DIAG_BC_CGROUP_COND present. */
+#endif
+ bool userlocks_needed; /* INET_DIAG_BC_AUTO present. */
};
struct inet_connection_sock;
@@ -48,18 +50,8 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
struct sk_buff *skb, struct netlink_callback *cb,
const struct inet_diag_req_v2 *req,
u16 nlmsg_flags, bool net_admin);
-void inet_diag_dump_icsk(struct inet_hashinfo *h, struct sk_buff *skb,
- struct netlink_callback *cb,
- const struct inet_diag_req_v2 *r);
-int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
- struct netlink_callback *cb,
- const struct inet_diag_req_v2 *req);
-
-struct sock *inet_diag_find_one_icsk(struct net *net,
- struct inet_hashinfo *hashinfo,
- const struct inet_diag_req_v2 *req);
-int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk);
+int inet_diag_bc_sk(const struct inet_diag_dump_data *cb_data, struct sock *sk);
void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk);
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index bc6ec2959173..261d02efb615 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -295,7 +295,7 @@ struct raw6_sock {
__u32 offset; /* checksum offset */
struct icmp6_filter filter;
__u32 ip6mr_table;
-
+ struct socket_drop_counters drop_counters;
struct ipv6_pinfo inet6;
};
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 785173aa0739..fb27e3d2fdac 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1596,14 +1596,16 @@ static inline void mem_cgroup_flush_foreign(struct bdi_writeback *wb)
#endif /* CONFIG_CGROUP_WRITEBACK */
struct sock;
-bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages,
- gfp_t gfp_mask);
-void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
#ifdef CONFIG_MEMCG
extern struct static_key_false memcg_sockets_enabled_key;
#define mem_cgroup_sockets_enabled static_branch_unlikely(&memcg_sockets_enabled_key)
+
void mem_cgroup_sk_alloc(struct sock *sk);
void mem_cgroup_sk_free(struct sock *sk);
+void mem_cgroup_sk_inherit(const struct sock *sk, struct sock *newsk);
+bool mem_cgroup_sk_charge(const struct sock *sk, unsigned int nr_pages,
+ gfp_t gfp_mask);
+void mem_cgroup_sk_uncharge(const struct sock *sk, unsigned int nr_pages);
#if BITS_PER_LONG < 64
static inline void mem_cgroup_set_socket_pressure(struct mem_cgroup *memcg)
@@ -1640,32 +1642,37 @@ static inline u64 mem_cgroup_get_socket_pressure(struct mem_cgroup *memcg)
}
#endif
-static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
-{
-#ifdef CONFIG_MEMCG_V1
- if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
- return !!memcg->tcpmem_pressure;
-#endif /* CONFIG_MEMCG_V1 */
- do {
- if (time_before64(get_jiffies_64(), mem_cgroup_get_socket_pressure(memcg)))
- return true;
- } while ((memcg = parent_mem_cgroup(memcg)));
- return false;
-}
-
int alloc_shrinker_info(struct mem_cgroup *memcg);
void free_shrinker_info(struct mem_cgroup *memcg);
void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id);
void reparent_shrinker_deferred(struct mem_cgroup *memcg);
#else
#define mem_cgroup_sockets_enabled 0
-static inline void mem_cgroup_sk_alloc(struct sock *sk) { };
-static inline void mem_cgroup_sk_free(struct sock *sk) { };
-static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
+
+static inline void mem_cgroup_sk_alloc(struct sock *sk)
+{
+}
+
+static inline void mem_cgroup_sk_free(struct sock *sk)
+{
+}
+
+static inline void mem_cgroup_sk_inherit(const struct sock *sk, struct sock *newsk)
+{
+}
+
+static inline bool mem_cgroup_sk_charge(const struct sock *sk,
+ unsigned int nr_pages,
+ gfp_t gfp_mask)
{
return false;
}
+static inline void mem_cgroup_sk_uncharge(const struct sock *sk,
+ unsigned int nr_pages)
+{
+}
+
static inline void set_shrinker_bit(struct mem_cgroup *memcg,
int nid, int shrinker_id)
{
diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h
index 9af01bdd86d2..ca691641788b 100644
--- a/include/linux/micrel_phy.h
+++ b/include/linux/micrel_phy.h
@@ -32,6 +32,7 @@
#define PHY_ID_LAN8814 0x00221660
#define PHY_ID_LAN8804 0x00221670
#define PHY_ID_LAN8841 0x00221650
+#define PHY_ID_LAN8842 0x002216C0
#define PHY_ID_KSZ886X 0x00221430
#define PHY_ID_KSZ8863 0x00221435
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 9d2467f982ad..72a83666e67f 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1248,6 +1248,7 @@ enum mlx5_cap_type {
MLX5_CAP_IPSEC,
MLX5_CAP_CRYPTO = 0x1a,
MLX5_CAP_SHAMPO = 0x1d,
+ MLX5_CAP_PSP = 0x1e,
MLX5_CAP_MACSEC = 0x1f,
MLX5_CAP_GENERAL_2 = 0x20,
MLX5_CAP_PORT_SELECTION = 0x25,
@@ -1487,6 +1488,9 @@ enum mlx5_qcam_feature_groups {
#define MLX5_CAP_SHAMPO(mdev, cap) \
MLX5_GET(shampo_cap, mdev->caps.hca[MLX5_CAP_SHAMPO]->cur, cap)
+#define MLX5_CAP_PSP(mdev, cap)\
+ MLX5_GET(psp_cap, (mdev)->caps.hca[MLX5_CAP_PSP]->cur, cap)
+
enum {
MLX5_CMD_STAT_OK = 0x0,
MLX5_CMD_STAT_INT_ERR = 0x1,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 8360d9011d4f..e9f14a0c7f4f 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -189,6 +189,9 @@ enum {
MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS = 0x727,
MLX5_CMD_OP_RELEASE_XRQ_ERROR = 0x729,
MLX5_CMD_OP_MODIFY_XRQ = 0x72a,
+ MLX5_CMD_OPCODE_QUERY_DELEGATED_VHCA = 0x732,
+ MLX5_CMD_OPCODE_CREATE_ESW_VPORT = 0x733,
+ MLX5_CMD_OPCODE_DESTROY_ESW_VPORT = 0x734,
MLX5_CMD_OP_QUERY_ESW_FUNCTIONS = 0x740,
MLX5_CMD_OP_QUERY_VPORT_STATE = 0x750,
MLX5_CMD_OP_MODIFY_VPORT_STATE = 0x751,
@@ -311,6 +314,8 @@ enum {
MLX5_CMD_OP_CREATE_UMEM = 0xa08,
MLX5_CMD_OP_DESTROY_UMEM = 0xa0a,
MLX5_CMD_OP_SYNC_STEERING = 0xb00,
+ MLX5_CMD_OP_PSP_GEN_SPI = 0xb10,
+ MLX5_CMD_OP_PSP_ROTATE_KEY = 0xb11,
MLX5_CMD_OP_QUERY_VHCA_STATE = 0xb0d,
MLX5_CMD_OP_MODIFY_VHCA_STATE = 0xb0e,
MLX5_CMD_OP_SYNC_CRYPTO = 0xb12,
@@ -486,12 +491,14 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
u8 execute_aso[0x1];
u8 reserved_at_47[0x19];
- u8 reserved_at_60[0x2];
+ u8 reformat_l2_to_l3_psp_tunnel[0x1];
+ u8 reformat_l3_psp_tunnel_to_l2[0x1];
u8 reformat_insert[0x1];
u8 reformat_remove[0x1];
u8 macsec_encrypt[0x1];
u8 macsec_decrypt[0x1];
- u8 reserved_at_66[0x2];
+ u8 psp_encrypt[0x1];
+ u8 psp_decrypt[0x1];
u8 reformat_add_macsec[0x1];
u8 reformat_remove_macsec[0x1];
u8 reparse[0x1];
@@ -700,7 +707,7 @@ struct mlx5_ifc_fte_match_set_misc2_bits {
u8 metadata_reg_a[0x20];
- u8 reserved_at_1a0[0x8];
+ u8 psp_syndrome[0x8];
u8 macsec_syndrome[0x8];
u8 ipsec_syndrome[0x8];
u8 ipsec_next_header[0x8];
@@ -1508,6 +1515,21 @@ struct mlx5_ifc_macsec_cap_bits {
u8 reserved_at_40[0x7c0];
};
+struct mlx5_ifc_psp_cap_bits {
+ u8 reserved_at_0[0x1];
+ u8 psp_crypto_offload[0x1];
+ u8 reserved_at_2[0x1];
+ u8 psp_crypto_esp_aes_gcm_256_encrypt[0x1];
+ u8 psp_crypto_esp_aes_gcm_128_encrypt[0x1];
+ u8 psp_crypto_esp_aes_gcm_256_decrypt[0x1];
+ u8 psp_crypto_esp_aes_gcm_128_decrypt[0x1];
+ u8 reserved_at_7[0x4];
+ u8 log_max_num_of_psp_spi[0x5];
+ u8 reserved_at_10[0x10];
+
+ u8 reserved_at_20[0x7e0];
+};
+
enum {
MLX5_WQ_TYPE_LINKED_LIST = 0x0,
MLX5_WQ_TYPE_CYCLIC = 0x1,
@@ -1873,7 +1895,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 reserved_at_2a0[0x7];
u8 mkey_pcie_tph[0x1];
- u8 reserved_at_2a8[0x3];
+ u8 reserved_at_2a8[0x2];
+
+ u8 psp[0x1];
u8 shampo[0x1];
u8 reserved_at_2ac[0x4];
u8 max_wqe_sz_rq[0x10];
@@ -2207,7 +2231,19 @@ struct mlx5_ifc_cmd_hca_cap_2_bits {
u8 reserved_at_440[0x8];
u8 max_num_eqs_24b[0x18];
- u8 reserved_at_460[0x3a0];
+
+ u8 reserved_at_460[0x160];
+
+ u8 query_adjacent_functions_id[0x1];
+ u8 ingress_egress_esw_vport_connect[0x1];
+ u8 function_id_type_vhca_id[0x1];
+ u8 reserved_at_5c3[0xd];
+ u8 delegate_vhca_management_profiles[0x10];
+
+ u8 delegated_vhca_max[0x10];
+ u8 delegate_vhca_max[0x10];
+
+ u8 reserved_at_600[0x200];
};
enum mlx5_ifc_flow_destination_type {
@@ -3788,6 +3824,7 @@ union mlx5_ifc_hca_cap_union_bits {
struct mlx5_ifc_macsec_cap_bits macsec_cap;
struct mlx5_ifc_crypto_cap_bits crypto_cap;
struct mlx5_ifc_ipsec_cap_bits ipsec_cap;
+ struct mlx5_ifc_psp_cap_bits psp_cap;
u8 reserved_at_0[0x8000];
};
@@ -3817,6 +3854,7 @@ enum {
enum {
MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_IPSEC = 0x0,
MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_MACSEC = 0x1,
+ MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_PSP = 0x2,
};
struct mlx5_ifc_vlan_bits {
@@ -5159,7 +5197,9 @@ struct mlx5_ifc_set_hca_cap_in_bits {
u8 other_function[0x1];
u8 ec_vf_function[0x1];
- u8 reserved_at_42[0xe];
+ u8 reserved_at_42[0x1];
+ u8 function_id_type[0x1];
+ u8 reserved_at_44[0xc];
u8 function_id[0x10];
u8 reserved_at_60[0x20];
@@ -6357,7 +6397,9 @@ struct mlx5_ifc_query_hca_cap_in_bits {
u8 other_function[0x1];
u8 ec_vf_function[0x1];
- u8 reserved_at_42[0xe];
+ u8 reserved_at_42[0x1];
+ u8 function_id_type[0x1];
+ u8 reserved_at_44[0xc];
u8 function_id[0x10];
u8 reserved_at_60[0x20];
@@ -6983,6 +7025,28 @@ struct mlx5_ifc_query_esw_vport_context_in_bits {
u8 reserved_at_60[0x20];
};
+struct mlx5_ifc_destroy_esw_vport_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x20];
+};
+
+struct mlx5_ifc_destroy_esw_vport_in_bits {
+ u8 opcode[0x10];
+ u8 uid[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x10];
+ u8 vport_num[0x10];
+
+ u8 reserved_at_60[0x20];
+};
+
struct mlx5_ifc_modify_esw_vport_context_out_bits {
u8 status[0x8];
u8 reserved_at_8[0x18];
@@ -7118,6 +7182,8 @@ enum mlx5_reformat_ctx_type {
MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT_OVER_UDP = 0xa,
MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV6 = 0xb,
MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_UDPV6 = 0xc,
+ MLX5_REFORMAT_TYPE_ADD_PSP_TUNNEL = 0xd,
+ MLX5_REFORMAT_TYPE_DEL_PSP_TUNNEL = 0xe,
MLX5_REFORMAT_TYPE_INSERT_HDR = 0xf,
MLX5_REFORMAT_TYPE_REMOVE_HDR = 0x10,
MLX5_REFORMAT_TYPE_ADD_MACSEC = 0x11,
@@ -7244,6 +7310,7 @@ enum {
MLX5_ACTION_IN_FIELD_IPSEC_SYNDROME = 0x5D,
MLX5_ACTION_IN_FIELD_OUT_EMD_47_32 = 0x6F,
MLX5_ACTION_IN_FIELD_OUT_EMD_31_0 = 0x70,
+ MLX5_ACTION_IN_FIELD_PSP_SYNDROME = 0x71,
};
struct mlx5_ifc_alloc_modify_header_context_out_bits {
@@ -7484,6 +7551,85 @@ struct mlx5_ifc_query_adapter_in_bits {
u8 reserved_at_40[0x40];
};
+struct mlx5_ifc_function_vhca_rid_info_reg_bits {
+ u8 host_number[0x8];
+ u8 host_pci_device_function[0x8];
+ u8 host_pci_bus[0x8];
+ u8 reserved_at_18[0x3];
+ u8 pci_bus_assigned[0x1];
+ u8 function_type[0x4];
+
+ u8 parent_pci_device_function[0x8];
+ u8 parent_pci_bus[0x8];
+ u8 vhca_id[0x10];
+
+ u8 reserved_at_40[0x10];
+ u8 function_id[0x10];
+
+ u8 reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_delegated_function_vhca_rid_info_bits {
+ struct mlx5_ifc_function_vhca_rid_info_reg_bits function_vhca_rid_info;
+
+ u8 reserved_at_80[0x18];
+ u8 manage_profile[0x8];
+
+ u8 reserved_at_a0[0x60];
+};
+
+struct mlx5_ifc_query_delegated_vhca_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x20];
+
+ u8 reserved_at_60[0x10];
+ u8 functions_count[0x10];
+
+ u8 reserved_at_80[0x80];
+
+ struct mlx5_ifc_delegated_function_vhca_rid_info_bits
+ delegated_function_vhca_rid_info[];
+};
+
+struct mlx5_ifc_query_delegated_vhca_in_bits {
+ u8 opcode[0x10];
+ u8 uid[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_create_esw_vport_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x20];
+
+ u8 reserved_at_60[0x10];
+ u8 vport_num[0x10];
+};
+
+struct mlx5_ifc_create_esw_vport_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_at_10[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x10];
+ u8 managed_vhca_id[0x10];
+
+ u8 reserved_at_60[0x20];
+};
+
struct mlx5_ifc_qp_2rst_out_bits {
u8 status[0x8];
u8 reserved_at_8[0x18];
@@ -7611,7 +7757,12 @@ struct mlx5_ifc_modify_vport_state_in_bits {
u8 reserved_at_41[0xf];
u8 vport_number[0x10];
- u8 reserved_at_60[0x18];
+ u8 reserved_at_60[0x10];
+ u8 ingress_connect[0x1];
+ u8 egress_connect[0x1];
+ u8 ingress_connect_valid[0x1];
+ u8 egress_connect_valid[0x1];
+ u8 reserved_at_74[0x4];
u8 admin_state[0x4];
u8 reserved_at_7c[0x4];
};
@@ -12954,6 +13105,7 @@ enum {
MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_PURPOSE_TLS = 0x1,
MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_PURPOSE_IPSEC = 0x2,
MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_PURPOSE_MACSEC = 0x4,
+ MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_PURPOSE_PSP = 0x6,
};
struct mlx5_ifc_tls_static_params_bits {
@@ -13371,4 +13523,64 @@ enum mlx5e_pcie_cong_event_mod_field {
MLX5_PCIE_CONG_EVENT_MOD_THRESH = BIT(2),
};
+struct mlx5_ifc_psp_rotate_key_in_bits {
+ u8 opcode[0x10];
+ u8 uid[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_psp_rotate_key_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x40];
+};
+
+enum mlx5_psp_gen_spi_in_key_size {
+ MLX5_PSP_GEN_SPI_IN_KEY_SIZE_128 = 0x0,
+ MLX5_PSP_GEN_SPI_IN_KEY_SIZE_256 = 0x1,
+};
+
+struct mlx5_ifc_key_spi_bits {
+ u8 spi[0x20];
+
+ u8 reserved_at_20[0x60];
+
+ u8 key[8][0x20];
+};
+
+struct mlx5_ifc_psp_gen_spi_in_bits {
+ u8 opcode[0x10];
+ u8 uid[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x20];
+
+ u8 key_size[0x2];
+ u8 reserved_at_62[0xe];
+ u8 num_of_spi[0x10];
+};
+
+struct mlx5_ifc_psp_gen_spi_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x10];
+ u8 num_of_spi[0x10];
+
+ u8 reserved_at_60[0x20];
+
+ struct mlx5_ifc_key_spi_bits key_spi[];
+};
+
#endif /* MLX5_IFC_H */
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index c36cc6d82926..c87b9507cfa1 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -135,4 +135,6 @@ int mlx5_nic_vport_unaffiliate_multiport(struct mlx5_core_dev *port_mdev);
u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev);
int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 vport, void *out,
u16 opmod);
+int mlx5_vport_get_vhca_id(struct mlx5_core_dev *dev, u16 vport, u16 *vhca_id);
+
#endif /* __MLX5_VPORT_H__ */
diff --git a/include/linux/net/intel/libie/adminq.h b/include/linux/net/intel/libie/adminq.h
index 012b5d499c1a..ba62f703df43 100644
--- a/include/linux/net/intel/libie/adminq.h
+++ b/include/linux/net/intel/libie/adminq.h
@@ -192,8 +192,9 @@ LIBIE_CHECK_STRUCT_LEN(16, libie_aqc_list_caps);
#define LIBIE_AQC_CAPS_TX_SCHED_TOPO_COMP_MODE 0x0085
#define LIBIE_AQC_CAPS_NAC_TOPOLOGY 0x0087
#define LIBIE_AQC_CAPS_FW_LAG_SUPPORT 0x0092
-#define LIBIE_AQC_BIT_ROCEV2_LAG 0x01
-#define LIBIE_AQC_BIT_SRIOV_LAG 0x02
+#define LIBIE_AQC_BIT_ROCEV2_LAG BIT(0)
+#define LIBIE_AQC_BIT_SRIOV_LAG BIT(1)
+#define LIBIE_AQC_BIT_SRIOV_AA_LAG BIT(2)
#define LIBIE_AQC_CAPS_FLEX10 0x00F1
#define LIBIE_AQC_CAPS_CEM 0x00F2
diff --git a/include/linux/phy.h b/include/linux/phy.h
index bb45787d8684..04553419adc3 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1273,9 +1273,13 @@ struct phy_driver {
#define to_phy_driver(d) container_of_const(to_mdio_common_driver(d), \
struct phy_driver, mdiodrv)
-#define PHY_ID_MATCH_EXACT(id) .phy_id = (id), .phy_id_mask = GENMASK(31, 0)
-#define PHY_ID_MATCH_MODEL(id) .phy_id = (id), .phy_id_mask = GENMASK(31, 4)
-#define PHY_ID_MATCH_VENDOR(id) .phy_id = (id), .phy_id_mask = GENMASK(31, 10)
+#define PHY_ID_MATCH_EXTACT_MASK GENMASK(31, 0)
+#define PHY_ID_MATCH_MODEL_MASK GENMASK(31, 4)
+#define PHY_ID_MATCH_VENDOR_MASK GENMASK(31, 10)
+
+#define PHY_ID_MATCH_EXACT(id) .phy_id = (id), .phy_id_mask = PHY_ID_MATCH_EXTACT_MASK
+#define PHY_ID_MATCH_MODEL(id) .phy_id = (id), .phy_id_mask = PHY_ID_MATCH_MODEL_MASK
+#define PHY_ID_MATCH_VENDOR(id) .phy_id = (id), .phy_id_mask = PHY_ID_MATCH_VENDOR_MASK
/**
* phy_id_compare - compare @id1 with @id2 taking account of @mask
@@ -1292,6 +1296,19 @@ static inline bool phy_id_compare(u32 id1, u32 id2, u32 mask)
}
/**
+ * phy_id_compare_vendor - compare @id with @vendor mask
+ * @id: PHY ID
+ * @vendor_mask: PHY Vendor mask
+ *
+ * Return: true if the bits from @id match @vendor using the
+ * generic PHY Vendor mask.
+ */
+static inline bool phy_id_compare_vendor(u32 id, u32 vendor_mask)
+{
+ return phy_id_compare(id, vendor_mask, PHY_ID_MATCH_VENDOR_MASK);
+}
+
+/**
* phydev_id_compare - compare @id with the PHY's Clause 22 ID
* @phydev: the PHY device
* @id: the PHY ID to be matched
diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h
index 5399b9e41e35..6227a1bdefec 100644
--- a/include/linux/phy_fixed.h
+++ b/include/linux/phy_fixed.h
@@ -17,7 +17,7 @@ struct net_device;
#if IS_ENABLED(CONFIG_FIXED_PHY)
extern int fixed_phy_change_carrier(struct net_device *dev, bool new_carrier);
-int fixed_phy_add(int phy_id, const struct fixed_phy_status *status);
+void fixed_phy_add(const struct fixed_phy_status *status);
struct phy_device *fixed_phy_register(const struct fixed_phy_status *status,
struct device_node *np);
@@ -26,11 +26,7 @@ extern int fixed_phy_set_link_update(struct phy_device *phydev,
int (*link_update)(struct net_device *,
struct fixed_phy_status *));
#else
-static inline int fixed_phy_add(int phy_id,
- const struct fixed_phy_status *status)
-{
- return -ENODEV;
-}
+static inline void fixed_phy_add(const struct fixed_phy_status *status) {}
static inline struct phy_device *
fixed_phy_register(const struct fixed_phy_status *status,
struct device_node *np)
diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index 30659b615fca..9af0411761d7 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -320,9 +320,8 @@ int mac_prepare(struct phylink_config *config, unsigned int mode,
* If in 802.3z mode, the link speed is fixed, dependent on the
* @state->interface. Duplex and pause modes are negotiated via
* the in-band configuration word. Advertised pause modes are set
- * according to the @state->an_enabled and @state->advertising
- * flags. Beware of MACs which only support full duplex at gigabit
- * and higher speeds.
+ * according to @state->advertising. Beware of MACs which only
+ * support full duplex at gigabit and higher speeds.
*
* If in Cisco SGMII mode, the link speed and duplex mode are passed
* in the serial bitstream 16-bit configuration word, and the MAC
@@ -331,7 +330,7 @@ int mac_prepare(struct phylink_config *config, unsigned int mode,
* responsible for reading the configuration word and configuring
* itself accordingly.
*
- * Valid state members: interface, an_enabled, pause, advertising.
+ * Valid state members: interface, pause, advertising.
*
* Implementations are expected to update the MAC to reflect the
* requested settings - i.o.w., if nothing has changed between two
diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
index 3d089bd4d5e9..7dd7951b23d5 100644
--- a/include/linux/ptp_clock_kernel.h
+++ b/include/linux/ptp_clock_kernel.h
@@ -361,6 +361,24 @@ extern void ptp_clock_event(struct ptp_clock *ptp,
extern int ptp_clock_index(struct ptp_clock *ptp);
/**
+ * ptp_clock_index_by_of_node() - obtain the device index of
+ * a PTP clock based on the PTP device of_node
+ *
+ * @np: The device of_node pointer of the PTP device.
+ * Return: The PHC index on success or -1 on failure.
+ */
+int ptp_clock_index_by_of_node(struct device_node *np);
+
+/**
+ * ptp_clock_index_by_dev() - obtain the device index of
+ * a PTP clock based on the PTP device.
+ *
+ * @parent: The parent device (PTP device) pointer of the PTP clock.
+ * Return: The PHC index on success or -1 on failure.
+ */
+int ptp_clock_index_by_dev(struct device *parent);
+
+/**
* ptp_find_pin() - obtain the pin index of a given auxiliary function
*
* The caller must hold ptp_clock::pincfg_mux. Drivers do not have
@@ -425,6 +443,10 @@ static inline void ptp_clock_event(struct ptp_clock *ptp,
{ }
static inline int ptp_clock_index(struct ptp_clock *ptp)
{ return -1; }
+static inline int ptp_clock_index_by_of_node(struct device_node *np)
+{ return -1; }
+static inline int ptp_clock_index_by_dev(struct device *parent)
+{ return -1; }
static inline int ptp_find_pin(struct ptp_clock *ptp,
enum ptp_pin_function func, unsigned int chan)
{ return -1; }
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index fa633657e4c0..62e7addccdf6 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1159,6 +1159,45 @@ static inline struct dst_entry *skb_dst(const struct sk_buff *skb)
return (struct dst_entry *)(skb->_skb_refdst & SKB_DST_PTRMASK);
}
+static inline void skb_dst_check_unset(struct sk_buff *skb)
+{
+ DEBUG_NET_WARN_ON_ONCE((skb->_skb_refdst & SKB_DST_PTRMASK) &&
+ !(skb->_skb_refdst & SKB_DST_NOREF));
+}
+
+/**
+ * skb_dstref_steal() - return current dst_entry value and clear it
+ * @skb: buffer
+ *
+ * Resets skb dst_entry without adjusting its reference count. Useful in
+ * cases where dst_entry needs to be temporarily reset and restored.
+ * Note that the returned value cannot be used directly because it
+ * might contain SKB_DST_NOREF bit.
+ *
+ * When in doubt, prefer skb_dst_drop() over skb_dstref_steal() to correctly
+ * handle dst_entry reference counting.
+ *
+ * Returns: original skb dst_entry.
+ */
+static inline unsigned long skb_dstref_steal(struct sk_buff *skb)
+{
+ unsigned long refdst = skb->_skb_refdst;
+
+ skb->_skb_refdst = 0;
+ return refdst;
+}
+
+/**
+ * skb_dstref_restore() - restore skb dst_entry removed via skb_dstref_steal()
+ * @skb: buffer
+ * @refdst: dst entry from a call to skb_dstref_steal()
+ */
+static inline void skb_dstref_restore(struct sk_buff *skb, unsigned long refdst)
+{
+ skb_dst_check_unset(skb);
+ skb->_skb_refdst = refdst;
+}
+
/**
* skb_dst_set - sets skb dst
* @skb: buffer
@@ -1169,6 +1208,7 @@ static inline struct dst_entry *skb_dst(const struct sk_buff *skb)
*/
static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst)
{
+ skb_dst_check_unset(skb);
skb->slow_gro |= !!dst;
skb->_skb_refdst = (unsigned long)dst;
}
@@ -1185,6 +1225,7 @@ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst)
*/
static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst)
{
+ skb_dst_check_unset(skb);
WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
skb->slow_gro |= !!dst;
skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF;
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 0b9095a281b8..49847888c287 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -315,7 +315,7 @@ static inline bool sk_psock_test_state(const struct sk_psock *psock,
static inline void sock_drop(struct sock *sk, struct sk_buff *skb)
{
- sk_drops_add(sk, skb);
+ sk_drops_skbadd(sk, skb);
kfree_skb(skb);
}
diff --git a/include/linux/soc/airoha/airoha_offload.h b/include/linux/soc/airoha/airoha_offload.h
new file mode 100644
index 000000000000..1dc5b4e35ef9
--- /dev/null
+++ b/include/linux/soc/airoha/airoha_offload.h
@@ -0,0 +1,315 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2025 AIROHA Inc
+ * Author: Lorenzo Bianconi <lorenzo@kernel.org>
+ */
+#ifndef AIROHA_OFFLOAD_H
+#define AIROHA_OFFLOAD_H
+
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+
+enum {
+ PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED = 0x0f,
+};
+
+struct airoha_ppe_dev {
+ struct {
+ int (*setup_tc_block_cb)(struct airoha_ppe_dev *dev,
+ void *type_data);
+ void (*check_skb)(struct airoha_ppe_dev *dev,
+ struct sk_buff *skb, u16 hash,
+ bool rx_wlan);
+ } ops;
+
+ void *priv;
+};
+
+#if (IS_BUILTIN(CONFIG_NET_AIROHA) || IS_MODULE(CONFIG_NET_AIROHA))
+struct airoha_ppe_dev *airoha_ppe_get_dev(struct device *dev);
+void airoha_ppe_put_dev(struct airoha_ppe_dev *dev);
+
+static inline int airoha_ppe_dev_setup_tc_block_cb(struct airoha_ppe_dev *dev,
+ void *type_data)
+{
+ return dev->ops.setup_tc_block_cb(dev, type_data);
+}
+
+static inline void airoha_ppe_dev_check_skb(struct airoha_ppe_dev *dev,
+ struct sk_buff *skb,
+ u16 hash, bool rx_wlan)
+{
+ dev->ops.check_skb(dev, skb, hash, rx_wlan);
+}
+#else
+static inline struct airoha_ppe_dev *airoha_ppe_get_dev(struct device *dev)
+{
+ return NULL;
+}
+
+static inline void airoha_ppe_put_dev(struct airoha_ppe_dev *dev)
+{
+}
+
+static inline int airoha_ppe_setup_tc_block_cb(struct airoha_ppe_dev *dev,
+ void *type_data)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void airoha_ppe_dev_check_skb(struct airoha_ppe_dev *dev,
+ struct sk_buff *skb, u16 hash,
+ bool rx_wlan)
+{
+}
+#endif
+
+#define NPU_NUM_CORES 8
+#define NPU_NUM_IRQ 6
+#define NPU_RX0_DESC_NUM 512
+#define NPU_RX1_DESC_NUM 512
+
+/* CTRL */
+#define NPU_RX_DMA_DESC_LAST_MASK BIT(29)
+#define NPU_RX_DMA_DESC_LEN_MASK GENMASK(28, 15)
+#define NPU_RX_DMA_DESC_CUR_LEN_MASK GENMASK(14, 1)
+#define NPU_RX_DMA_DESC_DONE_MASK BIT(0)
+/* INFO */
+#define NPU_RX_DMA_PKT_COUNT_MASK GENMASK(31, 28)
+#define NPU_RX_DMA_PKT_ID_MASK GENMASK(28, 26)
+#define NPU_RX_DMA_SRC_PORT_MASK GENMASK(25, 21)
+#define NPU_RX_DMA_CRSN_MASK GENMASK(20, 16)
+#define NPU_RX_DMA_FOE_ID_MASK GENMASK(15, 0)
+/* DATA */
+#define NPU_RX_DMA_SID_MASK GENMASK(31, 16)
+#define NPU_RX_DMA_FRAG_TYPE_MASK GENMASK(15, 14)
+#define NPU_RX_DMA_PRIORITY_MASK GENMASK(13, 10)
+#define NPU_RX_DMA_RADIO_ID_MASK GENMASK(9, 6)
+#define NPU_RX_DMA_VAP_ID_MASK GENMASK(5, 2)
+#define NPU_RX_DMA_FRAME_TYPE_MASK GENMASK(1, 0)
+
+struct airoha_npu_rx_dma_desc {
+ u32 ctrl;
+ u32 info;
+ u32 data;
+ u32 addr;
+ u64 rsv;
+} __packed;
+
+/* CTRL */
+#define NPU_TX_DMA_DESC_SCHED_MASK BIT(31)
+#define NPU_TX_DMA_DESC_LEN_MASK GENMASK(30, 18)
+#define NPU_TX_DMA_DESC_VEND_LEN_MASK GENMASK(17, 1)
+#define NPU_TX_DMA_DESC_DONE_MASK BIT(0)
+
+#define NPU_TXWI_LEN 192
+
+struct airoha_npu_tx_dma_desc {
+ u32 ctrl;
+ u32 addr;
+ u64 rsv;
+ u8 txwi[NPU_TXWI_LEN];
+} __packed;
+
+enum airoha_npu_wlan_set_cmd {
+ WLAN_FUNC_SET_WAIT_PCIE_ADDR,
+ WLAN_FUNC_SET_WAIT_DESC,
+ WLAN_FUNC_SET_WAIT_NPU_INIT_DONE,
+ WLAN_FUNC_SET_WAIT_TRAN_TO_CPU,
+ WLAN_FUNC_SET_WAIT_BA_WIN_SIZE,
+ WLAN_FUNC_SET_WAIT_DRIVER_MODEL,
+ WLAN_FUNC_SET_WAIT_DEL_STA,
+ WLAN_FUNC_SET_WAIT_DRAM_BA_NODE_ADDR,
+ WLAN_FUNC_SET_WAIT_PKT_BUF_ADDR,
+ WLAN_FUNC_SET_WAIT_IS_TEST_NOBA,
+ WLAN_FUNC_SET_WAIT_FLUSHONE_TIMEOUT,
+ WLAN_FUNC_SET_WAIT_FLUSHALL_TIMEOUT,
+ WLAN_FUNC_SET_WAIT_IS_FORCE_TO_CPU,
+ WLAN_FUNC_SET_WAIT_PCIE_STATE,
+ WLAN_FUNC_SET_WAIT_PCIE_PORT_TYPE,
+ WLAN_FUNC_SET_WAIT_ERROR_RETRY_TIMES,
+ WLAN_FUNC_SET_WAIT_BAR_INFO,
+ WLAN_FUNC_SET_WAIT_FAST_FLAG,
+ WLAN_FUNC_SET_WAIT_NPU_BAND0_ONCPU,
+ WLAN_FUNC_SET_WAIT_TX_RING_PCIE_ADDR,
+ WLAN_FUNC_SET_WAIT_TX_DESC_HW_BASE,
+ WLAN_FUNC_SET_WAIT_TX_BUF_SPACE_HW_BASE,
+ WLAN_FUNC_SET_WAIT_RX_RING_FOR_TXDONE_HW_BASE,
+ WLAN_FUNC_SET_WAIT_TX_PKT_BUF_ADDR,
+ WLAN_FUNC_SET_WAIT_INODE_TXRX_REG_ADDR,
+ WLAN_FUNC_SET_WAIT_INODE_DEBUG_FLAG,
+ WLAN_FUNC_SET_WAIT_INODE_HW_CFG_INFO,
+ WLAN_FUNC_SET_WAIT_INODE_STOP_ACTION,
+ WLAN_FUNC_SET_WAIT_INODE_PCIE_SWAP,
+ WLAN_FUNC_SET_WAIT_RATELIMIT_CTRL,
+ WLAN_FUNC_SET_WAIT_HWNAT_INIT,
+ WLAN_FUNC_SET_WAIT_ARHT_CHIP_INFO,
+ WLAN_FUNC_SET_WAIT_TX_BUF_CHECK_ADDR,
+ WLAN_FUNC_SET_WAIT_TOKEN_ID_SIZE,
+};
+
+enum airoha_npu_wlan_get_cmd {
+ WLAN_FUNC_GET_WAIT_NPU_INFO,
+ WLAN_FUNC_GET_WAIT_LAST_RATE,
+ WLAN_FUNC_GET_WAIT_COUNTER,
+ WLAN_FUNC_GET_WAIT_DBG_COUNTER,
+ WLAN_FUNC_GET_WAIT_RXDESC_BASE,
+ WLAN_FUNC_GET_WAIT_WCID_DBG_COUNTER,
+ WLAN_FUNC_GET_WAIT_DMA_ADDR,
+ WLAN_FUNC_GET_WAIT_RING_SIZE,
+ WLAN_FUNC_GET_WAIT_NPU_SUPPORT_MAP,
+ WLAN_FUNC_GET_WAIT_MDC_LOCK_ADDRESS,
+ WLAN_FUNC_GET_WAIT_NPU_VERSION,
+};
+
+struct airoha_npu {
+#if (IS_BUILTIN(CONFIG_NET_AIROHA_NPU) || IS_MODULE(CONFIG_NET_AIROHA_NPU))
+ struct device *dev;
+ struct regmap *regmap;
+
+ struct airoha_npu_core {
+ struct airoha_npu *npu;
+ /* protect concurrent npu memory accesses */
+ spinlock_t lock;
+ struct work_struct wdt_work;
+ } cores[NPU_NUM_CORES];
+
+ int irqs[NPU_NUM_IRQ];
+
+ struct airoha_foe_stats __iomem *stats;
+
+ struct {
+ int (*ppe_init)(struct airoha_npu *npu);
+ int (*ppe_deinit)(struct airoha_npu *npu);
+ int (*ppe_flush_sram_entries)(struct airoha_npu *npu,
+ dma_addr_t foe_addr,
+ int sram_num_entries);
+ int (*ppe_foe_commit_entry)(struct airoha_npu *npu,
+ dma_addr_t foe_addr,
+ u32 entry_size, u32 hash,
+ bool ppe2);
+ int (*wlan_init_reserved_memory)(struct airoha_npu *npu);
+ int (*wlan_send_msg)(struct airoha_npu *npu, int ifindex,
+ enum airoha_npu_wlan_set_cmd func_id,
+ void *data, int data_len, gfp_t gfp);
+ int (*wlan_get_msg)(struct airoha_npu *npu, int ifindex,
+ enum airoha_npu_wlan_get_cmd func_id,
+ void *data, int data_len, gfp_t gfp);
+ u32 (*wlan_get_queue_addr)(struct airoha_npu *npu, int qid,
+ bool xmit);
+ void (*wlan_set_irq_status)(struct airoha_npu *npu, u32 val);
+ u32 (*wlan_get_irq_status)(struct airoha_npu *npu, int q);
+ void (*wlan_enable_irq)(struct airoha_npu *npu, int q);
+ void (*wlan_disable_irq)(struct airoha_npu *npu, int q);
+ } ops;
+#endif
+};
+
+#if (IS_BUILTIN(CONFIG_NET_AIROHA_NPU) || IS_MODULE(CONFIG_NET_AIROHA_NPU))
+struct airoha_npu *airoha_npu_get(struct device *dev, dma_addr_t *stats_addr);
+void airoha_npu_put(struct airoha_npu *npu);
+
+static inline int airoha_npu_wlan_init_reserved_memory(struct airoha_npu *npu)
+{
+ return npu->ops.wlan_init_reserved_memory(npu);
+}
+
+static inline int airoha_npu_wlan_send_msg(struct airoha_npu *npu,
+ int ifindex,
+ enum airoha_npu_wlan_set_cmd cmd,
+ void *data, int data_len, gfp_t gfp)
+{
+ return npu->ops.wlan_send_msg(npu, ifindex, cmd, data, data_len, gfp);
+}
+
+static inline int airoha_npu_wlan_get_msg(struct airoha_npu *npu, int ifindex,
+ enum airoha_npu_wlan_get_cmd cmd,
+ void *data, int data_len, gfp_t gfp)
+{
+ return npu->ops.wlan_get_msg(npu, ifindex, cmd, data, data_len, gfp);
+}
+
+static inline u32 airoha_npu_wlan_get_queue_addr(struct airoha_npu *npu,
+ int qid, bool xmit)
+{
+ return npu->ops.wlan_get_queue_addr(npu, qid, xmit);
+}
+
+static inline void airoha_npu_wlan_set_irq_status(struct airoha_npu *npu,
+ u32 val)
+{
+ npu->ops.wlan_set_irq_status(npu, val);
+}
+
+static inline u32 airoha_npu_wlan_get_irq_status(struct airoha_npu *npu, int q)
+{
+ return npu->ops.wlan_get_irq_status(npu, q);
+}
+
+static inline void airoha_npu_wlan_enable_irq(struct airoha_npu *npu, int q)
+{
+ npu->ops.wlan_enable_irq(npu, q);
+}
+
+static inline void airoha_npu_wlan_disable_irq(struct airoha_npu *npu, int q)
+{
+ npu->ops.wlan_disable_irq(npu, q);
+}
+#else
+static inline struct airoha_npu *airoha_npu_get(struct device *dev,
+ dma_addr_t *foe_stats_addr)
+{
+ return NULL;
+}
+
+static inline void airoha_npu_put(struct airoha_npu *npu)
+{
+}
+
+static inline int airoha_npu_wlan_init_reserved_memory(struct airoha_npu *npu)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int airoha_npu_wlan_send_msg(struct airoha_npu *npu,
+ int ifindex,
+ enum airoha_npu_wlan_set_cmd cmd,
+ void *data, int data_len, gfp_t gfp)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int airoha_npu_wlan_get_msg(struct airoha_npu *npu, int ifindex,
+ enum airoha_npu_wlan_get_cmd cmd,
+ void *data, int data_len, gfp_t gfp)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline u32 airoha_npu_wlan_get_queue_addr(struct airoha_npu *npu,
+ int qid, bool xmit)
+{
+ return 0;
+}
+
+static inline void airoha_npu_wlan_set_irq_status(struct airoha_npu *npu,
+ u32 val)
+{
+}
+
+static inline u32 airoha_npu_wlan_get_irq_status(struct airoha_npu *npu,
+ int q)
+{
+ return 0;
+}
+
+static inline void airoha_npu_wlan_enable_irq(struct airoha_npu *npu, int q)
+{
+}
+
+static inline void airoha_npu_wlan_disable_irq(struct airoha_npu *npu, int q)
+{
+}
+#endif
+
+#endif /* AIROHA_OFFLOAD_H */
diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h
index d8949a4ed0dc..c4ff6bab176d 100644
--- a/include/linux/soc/mediatek/mtk_wed.h
+++ b/include/linux/soc/mediatek/mtk_wed.h
@@ -147,7 +147,7 @@ struct mtk_wed_device {
u32 wpdma_tx;
u32 wpdma_txfree;
u32 wpdma_rx_glo;
- u32 wpdma_rx;
+ u32 wpdma_rx[MTK_WED_RX_QUEUES];
u32 wpdma_rx_rro[MTK_WED_RX_QUEUES];
u32 wpdma_rx_pg;
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 26ddf95d23f9..e284f04964bf 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -238,7 +238,7 @@ struct plat_stmmacenet_data {
int (*set_clk_tx_rate)(void *priv, struct clk *clk_tx_i,
phy_interface_t interface, int speed);
void (*fix_mac_speed)(void *priv, int speed, unsigned int mode);
- int (*fix_soc_reset)(void *priv, void __iomem *ioaddr);
+ int (*fix_soc_reset)(struct stmmac_priv *priv, void __iomem *ioaddr);
int (*serdes_powerup)(struct net_device *ndev, void *priv);
void (*serdes_powerdown)(struct net_device *ndev, void *priv);
int (*mac_finish)(struct net_device *ndev,
@@ -248,6 +248,8 @@ struct plat_stmmacenet_data {
void (*ptp_clk_freq_config)(struct stmmac_priv *priv);
int (*init)(struct platform_device *pdev, void *priv);
void (*exit)(struct platform_device *pdev, void *priv);
+ int (*suspend)(struct device *dev, void *priv);
+ int (*resume)(struct device *dev, void *priv);
struct mac_device_info *(*setup)(void *priv);
int (*clks_config)(void *priv, bool enabled);
int (*crosststamp)(ktime_t *device, struct system_counterval_t *system,
diff --git a/include/linux/udp.h b/include/linux/udp.h
index 4e1a672af4c5..981506be1e15 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -108,6 +108,7 @@ struct udp_sock {
* the last UDP socket cacheline.
*/
struct hlist_node tunnel_list;
+ struct socket_drop_counters drop_counters;
};
#define udp_test_bit(nr, sk) \
diff --git a/include/net/act_api.h b/include/net/act_api.h
index 2894cfff2da3..91a24b5e0b93 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -33,7 +33,10 @@ struct tc_action {
struct tcf_t tcfa_tm;
struct gnet_stats_basic_sync tcfa_bstats;
struct gnet_stats_basic_sync tcfa_bstats_hw;
- struct gnet_stats_queue tcfa_qstats;
+
+ atomic_t tcfa_drops;
+ atomic_t tcfa_overlimits;
+
struct net_rate_estimator __rcu *tcfa_rate_est;
spinlock_t tcfa_lock;
struct gnet_stats_basic_sync __percpu *cpu_bstats;
@@ -53,7 +56,6 @@ struct tc_action {
#define tcf_action common.tcfa_action
#define tcf_tm common.tcfa_tm
#define tcf_bstats common.tcfa_bstats
-#define tcf_qstats common.tcfa_qstats
#define tcf_rate_est common.tcfa_rate_est
#define tcf_lock common.tcfa_lock
@@ -241,9 +243,7 @@ static inline void tcf_action_inc_drop_qstats(struct tc_action *a)
qstats_drop_inc(this_cpu_ptr(a->cpu_qstats));
return;
}
- spin_lock(&a->tcfa_lock);
- qstats_drop_inc(&a->tcfa_qstats);
- spin_unlock(&a->tcfa_lock);
+ atomic_inc(&a->tcfa_drops);
}
static inline void tcf_action_inc_overlimit_qstats(struct tc_action *a)
@@ -252,9 +252,7 @@ static inline void tcf_action_inc_overlimit_qstats(struct tc_action *a)
qstats_overlimit_inc(this_cpu_ptr(a->cpu_qstats));
return;
}
- spin_lock(&a->tcfa_lock);
- qstats_overlimit_inc(&a->tcfa_qstats);
- spin_unlock(&a->tcfa_lock);
+ atomic_inc(&a->tcfa_overlimits);
}
void tcf_action_update_stats(struct tc_action *a, u64 bytes, u64 packets,
diff --git a/include/net/bonding.h b/include/net/bonding.h
index e06f0d63b2c1..37335f62f579 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -126,7 +126,6 @@ struct bond_params {
int arp_interval;
int arp_validate;
int arp_all_targets;
- int use_carrier;
int fail_over_mac;
int updelay;
int downdelay;
diff --git a/include/net/devlink.h b/include/net/devlink.h
index b32c9ceeb81d..5f44e702c25c 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -746,6 +746,10 @@ enum devlink_health_reporter_state {
* if priv_ctx is NULL, run a full dump
* @diagnose: callback to diagnose the current status
* @test: callback to trigger a test event
+ * @default_graceful_period: default min time (in msec)
+ * between recovery attempts
+ * @default_burst_period: default time (in msec) for
+ * error recoveries before starting the grace period
*/
struct devlink_health_reporter_ops {
@@ -760,6 +764,8 @@ struct devlink_health_reporter_ops {
struct netlink_ext_ack *extack);
int (*test)(struct devlink_health_reporter *reporter,
struct netlink_ext_ack *extack);
+ u64 default_graceful_period;
+ u64 default_burst_period;
};
/**
@@ -1743,7 +1749,7 @@ void devlink_port_type_ib_set(struct devlink_port *devlink_port,
struct ib_device *ibdev);
void devlink_port_type_clear(struct devlink_port *devlink_port);
void devlink_port_attrs_set(struct devlink_port *devlink_port,
- struct devlink_port_attrs *devlink_port_attrs);
+ const struct devlink_port_attrs *attrs);
void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u32 controller,
u16 pf, bool external);
void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 controller,
@@ -1928,22 +1934,22 @@ void devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name,
struct devlink_health_reporter *
devl_port_health_reporter_create(struct devlink_port *port,
const struct devlink_health_reporter_ops *ops,
- u64 graceful_period, void *priv);
+ void *priv);
struct devlink_health_reporter *
devlink_port_health_reporter_create(struct devlink_port *port,
const struct devlink_health_reporter_ops *ops,
- u64 graceful_period, void *priv);
+ void *priv);
struct devlink_health_reporter *
devl_health_reporter_create(struct devlink *devlink,
const struct devlink_health_reporter_ops *ops,
- u64 graceful_period, void *priv);
+ void *priv);
struct devlink_health_reporter *
devlink_health_reporter_create(struct devlink *devlink,
const struct devlink_health_reporter_ops *ops,
- u64 graceful_period, void *priv);
+ void *priv);
void
devl_health_reporter_destroy(struct devlink_health_reporter *reporter);
diff --git a/include/net/dst.h b/include/net/dst.h
index bab01363bb97..f8aa1239b4db 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -24,7 +24,10 @@
struct sk_buff;
struct dst_entry {
- struct net_device *dev;
+ union {
+ struct net_device *dev;
+ struct net_device __rcu *dev_rcu;
+ };
struct dst_ops *ops;
unsigned long _metrics;
unsigned long expires;
@@ -570,9 +573,12 @@ static inline struct net_device *dst_dev(const struct dst_entry *dst)
static inline struct net_device *dst_dev_rcu(const struct dst_entry *dst)
{
- /* In the future, use rcu_dereference(dst->dev) */
- WARN_ON_ONCE(!rcu_read_lock_held());
- return READ_ONCE(dst->dev);
+ return rcu_dereference(dst->dev_rcu);
+}
+
+static inline struct net *dst_dev_net_rcu(const struct dst_entry *dst)
+{
+ return dev_net_rcu(dst_dev_rcu(dst));
}
static inline struct net_device *skb_dst_dev(const struct sk_buff *skb)
@@ -592,7 +598,7 @@ static inline struct net *skb_dst_dev_net(const struct sk_buff *skb)
static inline struct net *skb_dst_dev_net_rcu(const struct sk_buff *skb)
{
- return dev_net_rcu(skb_dst_dev(skb));
+ return dev_net_rcu(skb_dst_dev_rcu(skb));
}
struct dst_entry *dst_blackhole_check(struct dst_entry *dst, u32 cookie);
diff --git a/include/net/flow.h b/include/net/flow.h
index a1839c278d87..ae9481c40063 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -12,6 +12,7 @@
#include <linux/atomic.h>
#include <linux/container_of.h>
#include <linux/uidgid.h>
+#include <net/inet_dscp.h>
struct flow_keys;
@@ -32,7 +33,7 @@ struct flowi_common {
int flowic_iif;
int flowic_l3mdev;
__u32 flowic_mark;
- __u8 flowic_tos;
+ dscp_t flowic_dscp;
__u8 flowic_scope;
__u8 flowic_proto;
__u8 flowic_flags;
@@ -70,7 +71,7 @@ struct flowi4 {
#define flowi4_iif __fl_common.flowic_iif
#define flowi4_l3mdev __fl_common.flowic_l3mdev
#define flowi4_mark __fl_common.flowic_mark
-#define flowi4_tos __fl_common.flowic_tos
+#define flowi4_dscp __fl_common.flowic_dscp
#define flowi4_scope __fl_common.flowic_scope
#define flowi4_proto __fl_common.flowic_proto
#define flowi4_flags __fl_common.flowic_flags
@@ -103,7 +104,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif,
fl4->flowi4_iif = LOOPBACK_IFINDEX;
fl4->flowi4_l3mdev = 0;
fl4->flowi4_mark = mark;
- fl4->flowi4_tos = tos;
+ fl4->flowi4_dscp = inet_dsfield_to_dscp(tos);
fl4->flowi4_scope = scope;
fl4->flowi4_proto = proto;
fl4->flowi4_flags = flags;
@@ -141,7 +142,7 @@ struct flowi6 {
#define flowi6_uid __fl_common.flowic_uid
struct in6_addr daddr;
struct in6_addr saddr;
- /* Note: flowi6_tos is encoded in flowlabel, too. */
+ /* Note: flowi6_dscp is encoded in flowlabel, too. */
__be32 flowlabel;
union flowi_uli uli;
#define fl6_sport uli.ports.sport
@@ -163,7 +164,7 @@ struct flowi {
#define flowi_iif u.__fl_common.flowic_iif
#define flowi_l3mdev u.__fl_common.flowic_l3mdev
#define flowi_mark u.__fl_common.flowic_mark
-#define flowi_tos u.__fl_common.flowic_tos
+#define flowi_dscp u.__fl_common.flowic_dscp
#define flowi_scope u.__fl_common.flowic_scope
#define flowi_proto u.__fl_common.flowic_proto
#define flowi_flags u.__fl_common.flowic_flags
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index a03d56765832..7b84f2cef8b1 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -62,7 +62,7 @@ struct genl_info;
* @small_ops: the small-struct operations supported by this family
* @n_small_ops: number of small-struct operations supported by this family
* @split_ops: the split do/dump form of operation definition
- * @n_split_ops: number of entries in @split_ops, not that with split do/dump
+ * @n_split_ops: number of entries in @split_ops, note that with split do/dump
* ops the number of entries is not the same as number of commands
* @sock_priv_size: the size of per-socket private memory
* @sock_priv_init: the per-socket private memory initializer
diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index ab3929a2a956..1f985d2012ce 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -41,7 +41,6 @@ static inline unsigned int __inet6_ehashfn(const u32 lhash,
* The sockhash lock must be held as a reader here.
*/
struct sock *__inet6_lookup_established(const struct net *net,
- struct inet_hashinfo *hashinfo,
const struct in6_addr *saddr,
const __be16 sport,
const struct in6_addr *daddr,
@@ -65,7 +64,6 @@ struct sock *inet6_lookup_reuseport(const struct net *net, struct sock *sk,
inet6_ehashfn_t *ehashfn);
struct sock *inet6_lookup_listener(const struct net *net,
- struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
const struct in6_addr *saddr,
const __be16 sport,
@@ -83,7 +81,6 @@ struct sock *inet6_lookup_run_sk_lookup(const struct net *net,
inet6_ehashfn_t *ehashfn);
static inline struct sock *__inet6_lookup(const struct net *net,
- struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
const struct in6_addr *saddr,
const __be16 sport,
@@ -92,14 +89,14 @@ static inline struct sock *__inet6_lookup(const struct net *net,
const int dif, const int sdif,
bool *refcounted)
{
- struct sock *sk = __inet6_lookup_established(net, hashinfo, saddr,
- sport, daddr, hnum,
+ struct sock *sk = __inet6_lookup_established(net, saddr, sport,
+ daddr, hnum,
dif, sdif);
*refcounted = true;
if (sk)
return sk;
*refcounted = false;
- return inet6_lookup_listener(net, hashinfo, skb, doff, saddr, sport,
+ return inet6_lookup_listener(net, skb, doff, saddr, sport,
daddr, hnum, dif, sdif);
}
@@ -143,8 +140,7 @@ struct sock *inet6_steal_sock(struct net *net, struct sk_buff *skb, int doff,
return reuse_sk;
}
-static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo,
- struct sk_buff *skb, int doff,
+static inline struct sock *__inet6_lookup_skb(struct sk_buff *skb, int doff,
const __be16 sport,
const __be16 dport,
int iif, int sdif,
@@ -161,14 +157,12 @@ static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo,
if (sk)
return sk;
- return __inet6_lookup(net, hashinfo, skb,
- doff, &ip6h->saddr, sport,
+ return __inet6_lookup(net, skb, doff, &ip6h->saddr, sport,
&ip6h->daddr, ntohs(dport),
iif, sdif, refcounted);
}
-struct sock *inet6_lookup(const struct net *net, struct inet_hashinfo *hashinfo,
- struct sk_buff *skb, int doff,
+struct sock *inet6_lookup(const struct net *net, struct sk_buff *skb, int doff,
const struct in6_addr *saddr, const __be16 sport,
const struct in6_addr *daddr, const __be16 dport,
const int dif);
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 1735db332aab..0737d8e178dd 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -299,14 +299,8 @@ reqsk_timeout(struct request_sock *req, unsigned long max_timeout)
return (unsigned long)min_t(u64, timeout, max_timeout);
}
-static inline void inet_csk_prepare_for_destroy_sock(struct sock *sk)
-{
- /* The below has to be done to allow calling inet_csk_destroy_sock */
- sock_set_flag(sk, SOCK_DEAD);
- this_cpu_inc(*sk->sk_prot->orphan_count);
-}
-
void inet_csk_destroy_sock(struct sock *sk);
+void inet_csk_prepare_for_destroy_sock(struct sock *sk);
void inet_csk_prepare_forced_close(struct sock *sk);
/*
diff --git a/include/net/inet_dscp.h b/include/net/inet_dscp.h
index 72f250dffada..1aa9f04ed1ab 100644
--- a/include/net/inet_dscp.h
+++ b/include/net/inet_dscp.h
@@ -39,6 +39,12 @@ typedef u8 __bitwise dscp_t;
#define INET_DSCP_MASK 0xfc
+/* A few places in the IPv4 code need to ignore the three high order bits of
+ * DSCP because of backward compatibility (as these bits used to represent the
+ * IPv4 Precedence in RFC 791's TOS field and were ignored).
+ */
+#define INET_DSCP_LEGACY_TOS_MASK ((__force dscp_t)0x1c)
+
static inline dscp_t inet_dsfield_to_dscp(__u8 dsfield)
{
return (__force dscp_t)(dsfield & INET_DSCP_MASK);
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 19dbd9081d5a..a3b32241c2f2 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -294,7 +294,6 @@ int inet_hash(struct sock *sk);
void inet_unhash(struct sock *sk);
struct sock *__inet_lookup_listener(const struct net *net,
- struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
const __be32 saddr, const __be16 sport,
const __be32 daddr,
@@ -302,12 +301,12 @@ struct sock *__inet_lookup_listener(const struct net *net,
const int dif, const int sdif);
static inline struct sock *inet_lookup_listener(struct net *net,
- struct inet_hashinfo *hashinfo,
- struct sk_buff *skb, int doff,
- __be32 saddr, __be16 sport,
- __be32 daddr, __be16 dport, int dif, int sdif)
+ struct sk_buff *skb, int doff,
+ __be32 saddr, __be16 sport,
+ __be32 daddr, __be16 dport,
+ int dif, int sdif)
{
- return __inet_lookup_listener(net, hashinfo, skb, doff, saddr, sport,
+ return __inet_lookup_listener(net, skb, doff, saddr, sport,
daddr, ntohs(dport), dif, sdif);
}
@@ -358,7 +357,6 @@ static inline bool inet_match(const struct net *net, const struct sock *sk,
* not check it for lookups anymore, thanks Alexey. -DaveM
*/
struct sock *__inet_lookup_established(const struct net *net,
- struct inet_hashinfo *hashinfo,
const __be32 saddr, const __be16 sport,
const __be32 daddr, const u16 hnum,
const int dif, const int sdif);
@@ -384,18 +382,16 @@ struct sock *inet_lookup_run_sk_lookup(const struct net *net,
__be32 daddr, u16 hnum, const int dif,
inet_ehashfn_t *ehashfn);
-static inline struct sock *
- inet_lookup_established(struct net *net, struct inet_hashinfo *hashinfo,
- const __be32 saddr, const __be16 sport,
- const __be32 daddr, const __be16 dport,
- const int dif)
+static inline struct sock *inet_lookup_established(struct net *net,
+ const __be32 saddr, const __be16 sport,
+ const __be32 daddr, const __be16 dport,
+ const int dif)
{
- return __inet_lookup_established(net, hashinfo, saddr, sport, daddr,
+ return __inet_lookup_established(net, saddr, sport, daddr,
ntohs(dport), dif, 0);
}
static inline struct sock *__inet_lookup(struct net *net,
- struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
const __be32 saddr, const __be16 sport,
const __be32 daddr, const __be16 dport,
@@ -405,18 +401,17 @@ static inline struct sock *__inet_lookup(struct net *net,
u16 hnum = ntohs(dport);
struct sock *sk;
- sk = __inet_lookup_established(net, hashinfo, saddr, sport,
+ sk = __inet_lookup_established(net, saddr, sport,
daddr, hnum, dif, sdif);
*refcounted = true;
if (sk)
return sk;
*refcounted = false;
- return __inet_lookup_listener(net, hashinfo, skb, doff, saddr,
+ return __inet_lookup_listener(net, skb, doff, saddr,
sport, daddr, hnum, dif, sdif);
}
static inline struct sock *inet_lookup(struct net *net,
- struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
const __be32 saddr, const __be16 sport,
const __be32 daddr, const __be16 dport,
@@ -425,7 +420,7 @@ static inline struct sock *inet_lookup(struct net *net,
struct sock *sk;
bool refcounted;
- sk = __inet_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
+ sk = __inet_lookup(net, skb, doff, saddr, sport, daddr,
dport, dif, 0, &refcounted);
if (sk && !refcounted && !refcount_inc_not_zero(&sk->sk_refcnt))
@@ -473,8 +468,7 @@ struct sock *inet_steal_sock(struct net *net, struct sk_buff *skb, int doff,
return reuse_sk;
}
-static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
- struct sk_buff *skb,
+static inline struct sock *__inet_lookup_skb(struct sk_buff *skb,
int doff,
const __be16 sport,
const __be16 dport,
@@ -492,8 +486,7 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
if (sk)
return sk;
- return __inet_lookup(net, hashinfo, skb,
- doff, iph->saddr, sport,
+ return __inet_lookup(net, skb, doff, iph->saddr, sport,
iph->daddr, dport, inet_iif(skb), sdif,
refcounted);
}
diff --git a/include/net/ip.h b/include/net/ip.h
index befcba575129..6dbd2bf8fa9c 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -467,12 +467,14 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
bool forwarding)
{
const struct rtable *rt = dst_rtable(dst);
+ const struct net_device *dev;
unsigned int mtu, res;
struct net *net;
rcu_read_lock();
- net = dev_net_rcu(dst_dev(dst));
+ dev = dst_dev_rcu(dst);
+ net = dev_net_rcu(dev);
if (READ_ONCE(net->ipv4.sysctl_ip_fwd_use_pmtu) ||
ip_mtu_locked(dst) ||
!forwarding) {
@@ -486,7 +488,7 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
if (mtu)
goto out;
- mtu = READ_ONCE(dst_dev(dst)->mtu);
+ mtu = READ_ONCE(dev->mtu);
if (unlikely(ip_mtu_locked(dst))) {
if (rt->rt_uses_gateway && mtu > 576)
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 9255f21818ee..59f48ca3abdf 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -337,7 +337,7 @@ static inline unsigned int ip6_dst_mtu_maybe_forward(const struct dst_entry *dst
mtu = IPV6_MIN_MTU;
rcu_read_lock();
- idev = __in6_dev_get(dst_dev(dst));
+ idev = __in6_dev_get(dst_dev_rcu(dst));
if (idev)
mtu = READ_ONCE(idev->cnf.mtu6);
rcu_read_unlock();
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 48bb3cf41469..b4495c38e0a0 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -440,7 +440,7 @@ static inline bool fib4_rules_early_flow_dissect(struct net *net,
static inline bool fib_dscp_masked_match(dscp_t dscp, const struct flowi4 *fl4)
{
- return dscp == inet_dsfield_to_dscp(RT_TOS(fl4->flowi4_tos));
+ return dscp == (fl4->flowi4_dscp & INET_DSCP_LEGACY_TOS_MASK);
}
/* Exported by fib_frontend.c */
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 8cf1380f3656..4314a97702ea 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -11,7 +11,9 @@
#include <linux/bitops.h>
#include <net/dsfield.h>
+#include <net/flow.h>
#include <net/gro_cells.h>
+#include <net/inet_dscp.h>
#include <net/inet_ecn.h>
#include <net/netns/generic.h>
#include <net/rtnetlink.h>
@@ -362,7 +364,7 @@ static inline void ip_tunnel_init_flow(struct flowi4 *fl4,
fl4->daddr = daddr;
fl4->saddr = saddr;
- fl4->flowi4_tos = tos;
+ fl4->flowi4_dscp = inet_dsfield_to_dscp(tos);
fl4->flowi4_proto = proto;
fl4->fl4_gre_key = key;
fl4->flowi4_mark = mark;
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index e1030a7d2daa..0921485565c0 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -65,6 +65,8 @@ enum TRI_STATE {
#define MANA_STATS_RX_COUNT 5
#define MANA_STATS_TX_COUNT 11
+#define MANA_RX_FRAG_ALIGNMENT 64
+
struct mana_stats_rx {
u64 packets;
u64 bytes;
@@ -328,6 +330,7 @@ struct mana_rxq {
u32 datasize;
u32 alloc_size;
u32 headroom;
+ u32 frag_count;
mana_handle_t rxobj;
@@ -510,6 +513,7 @@ struct mana_port_context {
u32 rxbpre_datasize;
u32 rxbpre_alloc_size;
u32 rxbpre_headroom;
+ u32 rxbpre_frag_count;
struct bpf_prog *bpf_prog;
diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h
index 6e835972abd1..cd00e0406cf4 100644
--- a/include/net/netdev_queues.h
+++ b/include/net/netdev_queues.h
@@ -127,6 +127,9 @@ void netdev_stat_queue_sum(struct net_device *netdev,
* @ndo_queue_stop: Stop the RX queue at the specified index. The stopped
* queue's memory is written at the specified address.
*
+ * @ndo_queue_get_dma_dev: Get dma device for zero-copy operations to be used
+ * for this queue. Return NULL on error.
+ *
* Note that @ndo_queue_mem_alloc and @ndo_queue_mem_free may be called while
* the interface is closed. @ndo_queue_start and @ndo_queue_stop will only
* be called for an interface which is open.
@@ -144,8 +147,12 @@ struct netdev_queue_mgmt_ops {
int (*ndo_queue_stop)(struct net_device *dev,
void *per_queue_mem,
int idx);
+ struct device * (*ndo_queue_get_dma_dev)(struct net_device *dev,
+ int idx);
};
+bool netif_rxq_has_unreadable_mp(struct net_device *dev, int idx);
+
/**
* DOC: Lockless queue stopping / waking helpers.
*
@@ -321,4 +328,6 @@ static inline void netif_subqueue_sent(const struct net_device *dev,
get_desc, start_thrs); \
})
+struct device *netdev_queue_get_dma_dev(struct net_device *dev, int idx);
+
#endif
diff --git a/include/net/netfilter/ipv4/nf_reject.h b/include/net/netfilter/ipv4/nf_reject.h
index c653fcb88354..09de2f2686b5 100644
--- a/include/net/netfilter/ipv4/nf_reject.h
+++ b/include/net/netfilter/ipv4/nf_reject.h
@@ -10,14 +10,6 @@
void nf_send_unreach(struct sk_buff *skb_in, int code, int hook);
void nf_send_reset(struct net *net, struct sock *, struct sk_buff *oldskb,
int hook);
-const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb,
- struct tcphdr *_oth, int hook);
-struct iphdr *nf_reject_iphdr_put(struct sk_buff *nskb,
- const struct sk_buff *oldskb,
- __u8 protocol, int ttl);
-void nf_reject_ip_tcphdr_put(struct sk_buff *nskb, const struct sk_buff *oldskb,
- const struct tcphdr *oth);
-
struct sk_buff *nf_reject_skb_v4_unreach(struct net *net,
struct sk_buff *oldskb,
const struct net_device *dev,
diff --git a/include/net/netfilter/ipv6/nf_reject.h b/include/net/netfilter/ipv6/nf_reject.h
index d729344ba644..94ec0b9f2838 100644
--- a/include/net/netfilter/ipv6/nf_reject.h
+++ b/include/net/netfilter/ipv6/nf_reject.h
@@ -9,16 +9,6 @@ void nf_send_unreach6(struct net *net, struct sk_buff *skb_in, unsigned char cod
unsigned int hooknum);
void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb,
int hook);
-const struct tcphdr *nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb,
- struct tcphdr *otcph,
- unsigned int *otcplen, int hook);
-struct ipv6hdr *nf_reject_ip6hdr_put(struct sk_buff *nskb,
- const struct sk_buff *oldskb,
- __u8 protocol, int hoplimit);
-void nf_reject_ip6_tcphdr_put(struct sk_buff *nskb,
- const struct sk_buff *oldskb,
- const struct tcphdr *oth, unsigned int otcplen);
-
struct sk_buff *nf_reject_skb_v6_tcp_reset(struct net *net,
struct sk_buff *oldskb,
const struct net_device *dev,
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 891e43a01bdc..e2128663b160 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -556,6 +556,7 @@ struct nft_set_elem_expr {
* @size: maximum set size
* @field_len: length of each field in concatenation, bytes
* @field_count: number of concatenated fields in element
+ * @in_update_walk: true during ->walk() in transaction phase
* @use: number of rules references to this set
* @nelems: number of elements
* @ndeact: number of deactivated elements queued for removal
@@ -590,6 +591,7 @@ struct nft_set {
u32 size;
u8 field_len[NFT_REG32_COUNT];
u8 field_count;
+ bool in_update_walk;
u32 use;
atomic_t nelems;
u32 ndeact;
diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
index 6c2f483d9828..7644cfe9267d 100644
--- a/include/net/netfilter/nf_tables_core.h
+++ b/include/net/netfilter/nf_tables_core.h
@@ -73,7 +73,7 @@ struct nft_ct {
struct nft_payload {
enum nft_payload_bases base:8;
- u8 offset;
+ u16 offset;
u8 len;
u8 dreg;
};
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 6373e3f17da8..54a7d187f62a 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -251,6 +251,7 @@ struct netns_ipv4 {
int sysctl_igmp_qrv;
struct ping_group_range ping_group_range;
+ u16 ping_port_rover;
atomic_t dev_addr_genid;
diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h
index d25cd7a9c5ff..c0f97f36389e 100644
--- a/include/net/netns/sctp.h
+++ b/include/net/netns/sctp.h
@@ -75,8 +75,8 @@ struct netns_sctp {
/* Whether Cookie Preservative is enabled(1) or not(0) */
int cookie_preserve_enable;
- /* The namespace default hmac alg */
- char *sctp_hmac_alg;
+ /* Whether cookie authentication is enabled(1) or not(0) */
+ int cookie_auth_enable;
/* Valid.Cookie.Life - 60 seconds */
unsigned int valid_cookie_life;
diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h
index e180bdf2f82b..664d5058e66e 100644
--- a/include/net/nfc/nci_core.h
+++ b/include/net/nfc/nci_core.h
@@ -52,7 +52,7 @@ enum nci_state {
#define NCI_RF_DISC_SELECT_TIMEOUT 5000
#define NCI_RF_DEACTIVATE_TIMEOUT 30000
#define NCI_CMD_TIMEOUT 5000
-#define NCI_DATA_TIMEOUT 700
+#define NCI_DATA_TIMEOUT 3000
struct nci_dev;
diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h
index db180626be06..3247026e096a 100644
--- a/include/net/page_pool/helpers.h
+++ b/include/net/page_pool/helpers.h
@@ -489,6 +489,11 @@ page_pool_dma_sync_netmem_for_cpu(const struct page_pool *pool,
offset, dma_sync_size);
}
+static inline void page_pool_get(struct page_pool *pool)
+{
+ refcount_inc(&pool->user_cnt);
+}
+
static inline bool page_pool_put(struct page_pool *pool)
{
return refcount_dec_and_test(&pool->user_cnt);
@@ -500,6 +505,18 @@ static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid)
page_pool_update_nid(pool, new_nid);
}
+/**
+ * page_pool_is_unreadable() - will allocated buffers be unreadable for the CPU
+ * @pool: queried page pool
+ *
+ * Check if page pool will return buffers which are unreadable to the CPU /
+ * kernel. This will only be the case if user space bound a memory provider (mp)
+ * which returns unreadable memory to the queue served by the page pool.
+ * If %PP_FLAG_ALLOW_UNREADABLE_NETMEM was set but there is no mp bound
+ * this helper will return false. See also netif_rxq_has_unreadable_mp().
+ *
+ * Return: true if memory allocated by the page pool may be unreadable
+ */
static inline bool page_pool_is_unreadable(struct page_pool *pool)
{
return !!pool->mp_ops;
diff --git a/include/net/ping.h b/include/net/ping.h
index bc7779262e60..9634b8800814 100644
--- a/include/net/ping.h
+++ b/include/net/ping.h
@@ -54,7 +54,6 @@ struct pingfakehdr {
};
int ping_get_port(struct sock *sk, unsigned short ident);
-int ping_hash(struct sock *sk);
void ping_unhash(struct sock *sk);
int ping_init_sock(struct sock *sk);
diff --git a/include/net/proto_memory.h b/include/net/proto_memory.h
index a6ab2f4f5e28..8e91a8fa31b5 100644
--- a/include/net/proto_memory.h
+++ b/include/net/proto_memory.h
@@ -31,8 +31,8 @@ static inline bool sk_under_memory_pressure(const struct sock *sk)
if (!sk->sk_prot->memory_pressure)
return false;
- if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
- mem_cgroup_under_socket_pressure(sk->sk_memcg))
+ if (mem_cgroup_sk_enabled(sk) &&
+ mem_cgroup_sk_under_memory_pressure(sk))
return true;
return !!READ_ONCE(*sk->sk_prot->memory_pressure);
diff --git a/include/net/raw.h b/include/net/raw.h
index 32a61481a253..d52709139060 100644
--- a/include/net/raw.h
+++ b/include/net/raw.h
@@ -81,6 +81,7 @@ struct raw_sock {
struct inet_sock inet;
struct icmp_filter filter;
u32 ipmr_table;
+ struct socket_drop_counters drop_counters;
};
#define raw_sk(ptr) container_of_const(ptr, struct raw_sock, inet.sk)
diff --git a/include/net/route.h b/include/net/route.h
index 7ea840daa775..f90106f383c5 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -189,7 +189,7 @@ static inline struct rtable *ip_route_output(struct net *net, __be32 daddr,
{
struct flowi4 fl4 = {
.flowi4_oif = oif,
- .flowi4_tos = inet_dscp_to_dsfield(dscp),
+ .flowi4_dscp = dscp,
.flowi4_scope = scope,
.daddr = daddr,
.saddr = saddr,
@@ -390,7 +390,7 @@ static inline int ip4_dst_hoplimit(const struct dst_entry *dst)
const struct net *net;
rcu_read_lock();
- net = dev_net_rcu(dst_dev(dst));
+ net = dst_dev_net_rcu(dst);
hoplimit = READ_ONCE(net->ipv4.sysctl_ip_default_ttl);
rcu_read_unlock();
}
diff --git a/include/net/rps.h b/include/net/rps.h
index d8ab3a08bcc4..f1794cd2e7fb 100644
--- a/include/net/rps.h
+++ b/include/net/rps.h
@@ -25,13 +25,16 @@ struct rps_map {
/*
* The rps_dev_flow structure contains the mapping of a flow to a CPU, the
- * tail pointer for that CPU's input queue at the time of last enqueue, and
- * a hardware filter index.
+ * tail pointer for that CPU's input queue at the time of last enqueue, a
+ * hardware filter index, and the hash of the flow if aRFS is enabled.
*/
struct rps_dev_flow {
u16 cpu;
u16 filter;
unsigned int last_qtail;
+#ifdef CONFIG_RFS_ACCEL
+ u32 hash;
+#endif
};
#define RPS_NO_FILTER 0xffff
@@ -82,11 +85,8 @@ static inline void rps_record_sock_flow(struct rps_sock_flow_table *table,
WRITE_ONCE(table->ents[index], val);
}
-#endif /* CONFIG_RPS */
-
-static inline void sock_rps_record_flow_hash(__u32 hash)
+static inline void _sock_rps_record_flow_hash(__u32 hash)
{
-#ifdef CONFIG_RPS
struct rps_sock_flow_table *sock_flow_table;
if (!hash)
@@ -96,42 +96,33 @@ static inline void sock_rps_record_flow_hash(__u32 hash)
if (sock_flow_table)
rps_record_sock_flow(sock_flow_table, hash);
rcu_read_unlock();
-#endif
}
-static inline void sock_rps_record_flow(const struct sock *sk)
+static inline void _sock_rps_record_flow(const struct sock *sk)
{
-#ifdef CONFIG_RPS
- if (static_branch_unlikely(&rfs_needed)) {
- /* Reading sk->sk_rxhash might incur an expensive cache line
- * miss.
- *
- * TCP_ESTABLISHED does cover almost all states where RFS
- * might be useful, and is cheaper [1] than testing :
- * IPv4: inet_sk(sk)->inet_daddr
- * IPv6: ipv6_addr_any(&sk->sk_v6_daddr)
- * OR an additional socket flag
- * [1] : sk_state and sk_prot are in the same cache line.
+ /* Reading sk->sk_rxhash might incur an expensive cache line
+ * miss.
+ *
+ * TCP_ESTABLISHED does cover almost all states where RFS
+ * might be useful, and is cheaper [1] than testing :
+ * IPv4: inet_sk(sk)->inet_daddr
+ * IPv6: ipv6_addr_any(&sk->sk_v6_daddr)
+ * OR an additional socket flag
+ * [1] : sk_state and sk_prot are in the same cache line.
+ */
+ if (sk->sk_state == TCP_ESTABLISHED) {
+ /* This READ_ONCE() is paired with the WRITE_ONCE()
+ * from sock_rps_save_rxhash() and sock_rps_reset_rxhash().
*/
- if (sk->sk_state == TCP_ESTABLISHED) {
- /* This READ_ONCE() is paired with the WRITE_ONCE()
- * from sock_rps_save_rxhash() and sock_rps_reset_rxhash().
- */
- sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash));
- }
+ _sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash));
}
-#endif
}
-static inline void sock_rps_delete_flow(const struct sock *sk)
+static inline void _sock_rps_delete_flow(const struct sock *sk)
{
-#ifdef CONFIG_RPS
struct rps_sock_flow_table *table;
u32 hash, index;
- if (!static_branch_unlikely(&rfs_needed))
- return;
-
hash = READ_ONCE(sk->sk_rxhash);
if (!hash)
return;
@@ -144,6 +135,45 @@ static inline void sock_rps_delete_flow(const struct sock *sk)
WRITE_ONCE(table->ents[index], RPS_NO_CPU);
}
rcu_read_unlock();
+}
+#endif /* CONFIG_RPS */
+
+static inline bool rfs_is_needed(void)
+{
+#ifdef CONFIG_RPS
+ return static_branch_unlikely(&rfs_needed);
+#else
+ return false;
+#endif
+}
+
+static inline void sock_rps_record_flow_hash(__u32 hash)
+{
+#ifdef CONFIG_RPS
+ if (!rfs_is_needed())
+ return;
+
+ _sock_rps_record_flow_hash(hash);
+#endif
+}
+
+static inline void sock_rps_record_flow(const struct sock *sk)
+{
+#ifdef CONFIG_RPS
+ if (!rfs_is_needed())
+ return;
+
+ _sock_rps_record_flow(sk);
+#endif
+}
+
+static inline void sock_rps_delete_flow(const struct sock *sk)
+{
+#ifdef CONFIG_RPS
+ if (!rfs_is_needed())
+ return;
+
+ _sock_rps_delete_flow(sk);
#endif
}
diff --git a/include/net/sctp/auth.h b/include/net/sctp/auth.h
index d4b3b2dcd15b..3d5879e08e78 100644
--- a/include/net/sctp/auth.h
+++ b/include/net/sctp/auth.h
@@ -22,16 +22,11 @@ struct sctp_endpoint;
struct sctp_association;
struct sctp_authkey;
struct sctp_hmacalgo;
-struct crypto_shash;
-/*
- * Define a generic struct that will hold all the info
- * necessary for an HMAC transform
- */
+/* Defines an HMAC algorithm supported by SCTP chunk authentication */
struct sctp_hmac {
- __u16 hmac_id; /* one of the above ids */
- char *hmac_name; /* name for loading */
- __u16 hmac_len; /* length of the signature */
+ __u16 hmac_id; /* one of SCTP_AUTH_HMAC_ID_* */
+ __u16 hmac_len; /* length of the HMAC value in bytes */
};
/* This is generic structure that containst authentication bytes used
@@ -78,9 +73,9 @@ int sctp_auth_asoc_copy_shkeys(const struct sctp_endpoint *ep,
struct sctp_association *asoc,
gfp_t gfp);
int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp);
-void sctp_auth_destroy_hmacs(struct crypto_shash *auth_hmacs[]);
-struct sctp_hmac *sctp_auth_get_hmac(__u16 hmac_id);
-struct sctp_hmac *sctp_auth_asoc_get_hmac(const struct sctp_association *asoc);
+const struct sctp_hmac *sctp_auth_get_hmac(__u16 hmac_id);
+const struct sctp_hmac *
+sctp_auth_asoc_get_hmac(const struct sctp_association *asoc);
void sctp_auth_asoc_set_default_hmac(struct sctp_association *asoc,
struct sctp_hmac_algo_param *hmacs);
int sctp_auth_asoc_verify_hmac_id(const struct sctp_association *asoc,
diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index 5859e0a16a58..ae3376ba0b99 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -296,9 +296,8 @@ enum { SCTP_MAX_GABS = 16 };
*/
#define SCTP_DEFAULT_MINSEGMENT 512 /* MTU size ... if no mtu disc */
-#define SCTP_SECRET_SIZE 32 /* Number of octets in a 256 bits. */
-
-#define SCTP_SIGNATURE_SIZE 20 /* size of a SLA-1 signature */
+#define SCTP_COOKIE_KEY_SIZE 32 /* size of cookie HMAC key */
+#define SCTP_COOKIE_MAC_SIZE 32 /* size of HMAC field in cookies */
#define SCTP_COOKIE_MULTIPLE 32 /* Pad out our cookie to make our hash
* functions simpler to write.
@@ -417,16 +416,12 @@ enum {
SCTP_AUTH_HMAC_ID_RESERVED_0,
SCTP_AUTH_HMAC_ID_SHA1,
SCTP_AUTH_HMAC_ID_RESERVED_2,
-#if defined (CONFIG_CRYPTO_SHA256) || defined (CONFIG_CRYPTO_SHA256_MODULE)
SCTP_AUTH_HMAC_ID_SHA256,
-#endif
__SCTP_AUTH_HMAC_MAX
};
#define SCTP_AUTH_HMAC_ID_MAX __SCTP_AUTH_HMAC_MAX - 1
#define SCTP_AUTH_NUM_HMACS __SCTP_AUTH_HMAC_MAX
-#define SCTP_SHA1_SIG_SIZE 20
-#define SCTP_SHA256_SIG_SIZE 32
/* SCTP-AUTH, Section 3.2
* The chunk types for INIT, INIT-ACK, SHUTDOWN-COMPLETE and AUTH chunks
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 8a540ad9b509..2ae390219efd 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -32,6 +32,7 @@
#ifndef __sctp_structs_h__
#define __sctp_structs_h__
+#include <crypto/sha2.h>
#include <linux/ktime.h>
#include <linux/generic-radix-tree.h>
#include <linux/rhashtable-types.h>
@@ -68,7 +69,6 @@ struct sctp_outq;
struct sctp_bind_addr;
struct sctp_ulpq;
struct sctp_ep_common;
-struct crypto_shash;
struct sctp_stream;
@@ -155,10 +155,6 @@ struct sctp_sock {
/* PF_ family specific functions. */
struct sctp_pf *pf;
- /* Access to HMAC transform. */
- struct crypto_shash *hmac;
- char *sctp_hmac_alg;
-
/* What is our base endpointer? */
struct sctp_endpoint *ep;
@@ -227,7 +223,8 @@ struct sctp_sock {
frag_interleave:1,
recvrcvinfo:1,
recvnxtinfo:1,
- data_ready_signalled:1;
+ data_ready_signalled:1,
+ cookie_auth_enable:1;
atomic_t pd_mode;
@@ -335,7 +332,7 @@ struct sctp_cookie {
/* The format of our cookie that we send to our peer. */
struct sctp_signed_cookie {
- __u8 signature[SCTP_SECRET_SIZE];
+ __u8 mac[SCTP_COOKIE_MAC_SIZE];
__u32 __pad; /* force sctp_cookie alignment to 64 bits */
struct sctp_cookie c;
} __packed;
@@ -1307,33 +1304,15 @@ struct sctp_endpoint {
/* This is really a list of struct sctp_association entries. */
struct list_head asocs;
- /* Secret Key: A secret key used by this endpoint to compute
- * the MAC. This SHOULD be a cryptographic quality
- * random number with a sufficient length.
- * Discussion in [RFC1750] can be helpful in
- * selection of the key.
- */
- __u8 secret_key[SCTP_SECRET_SIZE];
-
- /* digest: This is a digest of the sctp cookie. This field is
- * only used on the receive path when we try to validate
- * that the cookie has not been tampered with. We put
- * this here so we pre-allocate this once and can re-use
- * on every receive.
- */
- __u8 *digest;
-
+ /* Cookie authentication key used by this endpoint */
+ struct hmac_sha256_key cookie_auth_key;
+
/* sendbuf acct. policy. */
__u32 sndbuf_policy;
/* rcvbuf acct. policy. */
__u32 rcvbuf_policy;
- /* SCTP AUTH: array of the HMACs that will be allocated
- * we need this per association so that we don't serialize
- */
- struct crypto_shash **auth_hmacs;
-
/* SCTP-AUTH: hmacs for the endpoint encoded into parameter */
struct sctp_hmac_algo_param *auth_hmacs_list;
diff --git a/include/net/seg6_hmac.h b/include/net/seg6_hmac.h
index 24f733b3e3fe..e9f41725933e 100644
--- a/include/net/seg6_hmac.h
+++ b/include/net/seg6_hmac.h
@@ -9,6 +9,8 @@
#ifndef _NET_SEG6_HMAC_H
#define _NET_SEG6_HMAC_H
+#include <crypto/sha1.h>
+#include <crypto/sha2.h>
#include <net/flow.h>
#include <net/ip6_fib.h>
#include <net/sock.h>
@@ -19,7 +21,6 @@
#include <linux/seg6_hmac.h>
#include <linux/rhashtable-types.h>
-#define SEG6_HMAC_MAX_DIGESTSIZE 160
#define SEG6_HMAC_RING_SIZE 256
struct seg6_hmac_info {
@@ -27,16 +28,15 @@ struct seg6_hmac_info {
struct rcu_head rcu;
u32 hmackeyid;
+ /* The raw key, kept only so it can be returned back to userspace */
char secret[SEG6_HMAC_SECRET_LEN];
u8 slen;
u8 alg_id;
-};
-
-struct seg6_hmac_algo {
- u8 alg_id;
- char name[64];
- struct crypto_shash * __percpu *tfms;
- struct shash_desc * __percpu *shashs;
+ /* The prepared key, which the calculations actually use */
+ union {
+ struct hmac_sha1_key sha1;
+ struct hmac_sha256_key sha256;
+ } key;
};
extern int seg6_hmac_compute(struct seg6_hmac_info *hinfo,
@@ -50,13 +50,9 @@ extern int seg6_push_hmac(struct net *net, struct in6_addr *saddr,
struct ipv6_sr_hdr *srh);
extern bool seg6_hmac_validate_skb(struct sk_buff *skb);
#ifdef CONFIG_IPV6_SEG6_HMAC
-extern int seg6_hmac_init(void);
-extern void seg6_hmac_exit(void);
extern int seg6_hmac_net_init(struct net *net);
extern void seg6_hmac_net_exit(struct net *net);
#else
-static inline int seg6_hmac_init(void) { return 0; }
-static inline void seg6_hmac_exit(void) {}
static inline int seg6_hmac_net_init(struct net *net) { return 0; }
static inline void seg6_hmac_net_exit(struct net *net) {}
#endif
diff --git a/include/net/sock.h b/include/net/sock.h
index fb13322a11fc..896bec2d2176 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -102,6 +102,11 @@ struct net;
typedef __u32 __bitwise __portpair;
typedef __u64 __bitwise __addrpair;
+struct socket_drop_counters {
+ atomic_t drops0 ____cacheline_aligned_in_smp;
+ atomic_t drops1 ____cacheline_aligned_in_smp;
+};
+
/**
* struct sock_common - minimal network layer representation of sockets
* @skc_daddr: Foreign IPv4 addr
@@ -282,6 +287,7 @@ struct sk_filter;
* @sk_err_soft: errors that don't cause failure but are the cause of a
* persistent failure not just 'timed out'
* @sk_drops: raw/udp drops counter
+ * @sk_drop_counters: optional pointer to socket_drop_counters
* @sk_ack_backlog: current listen backlog
* @sk_max_ack_backlog: listen backlog set in listen()
* @sk_uid: user id of owner
@@ -444,10 +450,13 @@ struct sock {
__cacheline_group_begin(sock_read_rxtx);
int sk_err;
struct socket *sk_socket;
+#ifdef CONFIG_MEMCG
struct mem_cgroup *sk_memcg;
+#endif
#ifdef CONFIG_XFRM
struct xfrm_policy __rcu *sk_policy[2];
#endif
+ struct socket_drop_counters *sk_drop_counters;
__cacheline_group_end(sock_read_rxtx);
__cacheline_group_begin(sock_write_rxtx);
@@ -1346,8 +1355,6 @@ struct proto {
unsigned int useroffset; /* Usercopy region offset */
unsigned int usersize; /* Usercopy region size */
- unsigned int __percpu *orphan_count;
-
struct request_sock_ops *rsk_prot;
struct timewait_sock_ops *twsk_prot;
@@ -2603,6 +2610,50 @@ static inline gfp_t gfp_memcg_charge(void)
return in_softirq() ? GFP_ATOMIC : GFP_KERNEL;
}
+#ifdef CONFIG_MEMCG
+static inline struct mem_cgroup *mem_cgroup_from_sk(const struct sock *sk)
+{
+ return sk->sk_memcg;
+}
+
+static inline bool mem_cgroup_sk_enabled(const struct sock *sk)
+{
+ return mem_cgroup_sockets_enabled && mem_cgroup_from_sk(sk);
+}
+
+static inline bool mem_cgroup_sk_under_memory_pressure(const struct sock *sk)
+{
+ struct mem_cgroup *memcg = mem_cgroup_from_sk(sk);
+
+#ifdef CONFIG_MEMCG_V1
+ if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
+ return !!memcg->tcpmem_pressure;
+#endif /* CONFIG_MEMCG_V1 */
+
+ do {
+ if (time_before64(get_jiffies_64(), mem_cgroup_get_socket_pressure(memcg)))
+ return true;
+ } while ((memcg = parent_mem_cgroup(memcg)));
+
+ return false;
+}
+#else
+static inline struct mem_cgroup *mem_cgroup_from_sk(const struct sock *sk)
+{
+ return NULL;
+}
+
+static inline bool mem_cgroup_sk_enabled(const struct sock *sk)
+{
+ return false;
+}
+
+static inline bool mem_cgroup_sk_under_memory_pressure(const struct sock *sk)
+{
+ return false;
+}
+#endif
+
static inline long sock_rcvtimeo(const struct sock *sk, bool noblock)
{
return noblock ? 0 : READ_ONCE(sk->sk_rcvtimeo);
@@ -2645,18 +2696,61 @@ struct sock_skb_cb {
#define sock_skb_cb_check_size(size) \
BUILD_BUG_ON((size) > SOCK_SKB_CB_OFFSET)
+static inline void sk_drops_add(struct sock *sk, int segs)
+{
+ struct socket_drop_counters *sdc = sk->sk_drop_counters;
+
+ if (sdc) {
+ int n = numa_node_id() % 2;
+
+ if (n)
+ atomic_add(segs, &sdc->drops1);
+ else
+ atomic_add(segs, &sdc->drops0);
+ } else {
+ atomic_add(segs, &sk->sk_drops);
+ }
+}
+
+static inline void sk_drops_inc(struct sock *sk)
+{
+ sk_drops_add(sk, 1);
+}
+
+static inline int sk_drops_read(const struct sock *sk)
+{
+ const struct socket_drop_counters *sdc = sk->sk_drop_counters;
+
+ if (sdc) {
+ DEBUG_NET_WARN_ON_ONCE(atomic_read(&sk->sk_drops));
+ return atomic_read(&sdc->drops0) + atomic_read(&sdc->drops1);
+ }
+ return atomic_read(&sk->sk_drops);
+}
+
+static inline void sk_drops_reset(struct sock *sk)
+{
+ struct socket_drop_counters *sdc = sk->sk_drop_counters;
+
+ if (sdc) {
+ atomic_set(&sdc->drops0, 0);
+ atomic_set(&sdc->drops1, 0);
+ }
+ atomic_set(&sk->sk_drops, 0);
+}
+
static inline void
sock_skb_set_dropcount(const struct sock *sk, struct sk_buff *skb)
{
SOCK_SKB_CB(skb)->dropcount = sock_flag(sk, SOCK_RXQ_OVFL) ?
- atomic_read(&sk->sk_drops) : 0;
+ sk_drops_read(sk) : 0;
}
-static inline void sk_drops_add(struct sock *sk, const struct sk_buff *skb)
+static inline void sk_drops_skbadd(struct sock *sk, const struct sk_buff *skb)
{
int segs = max_t(u16, 1, skb_shinfo(skb)->gso_segs);
- atomic_add(segs, &sk->sk_drops);
+ sk_drops_add(sk, segs);
}
static inline ktime_t sock_read_timestamp(struct sock *sk)
@@ -2933,8 +3027,8 @@ void sk_get_meminfo(const struct sock *sk, u32 *meminfo);
*/
#define _SK_MEM_PACKETS 256
#define _SK_MEM_OVERHEAD SKB_TRUESIZE(256)
-#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
-#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
+#define SK_WMEM_DEFAULT (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
+#define SK_RMEM_DEFAULT (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
extern __u32 sysctl_wmem_max;
extern __u32 sysctl_rmem_max;
diff --git a/include/net/tc_act/tc_skbmod.h b/include/net/tc_act/tc_skbmod.h
index 7c240d2fed4e..626704cd6241 100644
--- a/include/net/tc_act/tc_skbmod.h
+++ b/include/net/tc_act/tc_skbmod.h
@@ -12,6 +12,7 @@
struct tcf_skbmod_params {
struct rcu_head rcu;
u64 flags; /*up to 64 types of operations; extend if needed */
+ int action;
u8 eth_dst[ETH_ALEN];
u16 eth_type;
u8 eth_src[ETH_ALEN];
diff --git a/include/net/tc_act/tc_tunnel_key.h b/include/net/tc_act/tc_tunnel_key.h
index 879fe8cff581..0f1925f97520 100644
--- a/include/net/tc_act/tc_tunnel_key.h
+++ b/include/net/tc_act/tc_tunnel_key.h
@@ -14,6 +14,7 @@
struct tcf_tunnel_key_params {
struct rcu_head rcu;
int tcft_action;
+ int action;
struct metadata_dst *tcft_enc_metadata;
};
diff --git a/include/net/tc_act/tc_vlan.h b/include/net/tc_act/tc_vlan.h
index 3f5e9242b5e8..beadee41669a 100644
--- a/include/net/tc_act/tc_vlan.h
+++ b/include/net/tc_act/tc_vlan.h
@@ -10,6 +10,7 @@
#include <linux/tc_act/tc_vlan.h>
struct tcf_vlan_params {
+ int action;
int tcfv_action;
unsigned char tcfv_push_dst[ETH_ALEN];
unsigned char tcfv_push_src[ETH_ALEN];
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 526a26e7a150..0fb7923b8367 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -54,6 +54,16 @@ extern struct inet_hashinfo tcp_hashinfo;
DECLARE_PER_CPU(unsigned int, tcp_orphan_count);
int tcp_orphan_count_sum(void);
+static inline void tcp_orphan_count_inc(void)
+{
+ this_cpu_inc(tcp_orphan_count);
+}
+
+static inline void tcp_orphan_count_dec(void)
+{
+ this_cpu_dec(tcp_orphan_count);
+}
+
DECLARE_PER_CPU(u32, tcp_tw_isn);
void tcp_time_wait(struct sock *sk, int state, int timeo);
@@ -275,8 +285,8 @@ extern unsigned long tcp_memory_pressure;
/* optimized version of sk_under_memory_pressure() for TCP sockets */
static inline bool tcp_under_memory_pressure(const struct sock *sk)
{
- if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
- mem_cgroup_under_socket_pressure(sk->sk_memcg))
+ if (mem_cgroup_sk_enabled(sk) &&
+ mem_cgroup_sk_under_memory_pressure(sk))
return true;
return READ_ONCE(tcp_memory_pressure);
@@ -2612,7 +2622,7 @@ static inline void tcp_segs_in(struct tcp_sock *tp, const struct sk_buff *skb)
*/
static inline void tcp_listendrop(const struct sock *sk)
{
- atomic_inc(&((struct sock *)sk)->sk_drops);
+ sk_drops_inc((struct sock *)sk);
__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS);
}
diff --git a/include/net/timewait_sock.h b/include/net/timewait_sock.h
index 62b3e9f2aed4..0a85ac64a66d 100644
--- a/include/net/timewait_sock.h
+++ b/include/net/timewait_sock.h
@@ -15,13 +15,6 @@ struct timewait_sock_ops {
struct kmem_cache *twsk_slab;
char *twsk_slab_name;
unsigned int twsk_obj_size;
- void (*twsk_destructor)(struct sock *sk);
};
-static inline void twsk_destructor(struct sock *sk)
-{
- if (sk->sk_prot->twsk_prot->twsk_destructor != NULL)
- sk->sk_prot->twsk_prot->twsk_destructor(sk);
-}
-
#endif /* _TIMEWAIT_SOCK_H */
diff --git a/include/net/udp.h b/include/net/udp.h
index e2af3bda90c9..93b159f30e88 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -288,6 +288,7 @@ static inline void udp_lib_init_sock(struct sock *sk)
{
struct udp_sock *up = udp_sk(sk);
+ sk->sk_drop_counters = &up->drop_counters;
skb_queue_head_init(&up->reader_queue);
INIT_HLIST_NODE(&up->tunnel_list);
up->forward_threshold = sk->sk_rcvbuf >> 2;
@@ -627,7 +628,7 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk,
return segs;
drop:
- atomic_add(drop_count, &sk->sk_drops);
+ sk_drops_add(sk, drop_count);
SNMP_ADD_STATS(__UDPX_MIB(sk, ipv4), UDP_MIB_INERRORS, drop_count);
kfree_skb(skb);
return NULL;
diff --git a/include/trace/events/fib.h b/include/trace/events/fib.h
index 20b914250ce9..feb28b359eff 100644
--- a/include/trace/events/fib.h
+++ b/include/trace/events/fib.h
@@ -7,6 +7,8 @@
#include <linux/skbuff.h>
#include <linux/netdevice.h>
+#include <net/flow.h>
+#include <net/inet_dscp.h>
#include <net/ip_fib.h>
#include <linux/tracepoint.h>
@@ -44,7 +46,7 @@ TRACE_EVENT(fib_table_lookup,
__entry->err = err;
__entry->oif = flp->flowi4_oif;
__entry->iif = flp->flowi4_iif;
- __entry->tos = flp->flowi4_tos;
+ __entry->tos = inet_dscp_to_dsfield(flp->flowi4_dscp);
__entry->scope = flp->flowi4_scope;
__entry->flags = flp->flowi4_flags;
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 9fcb25a0f447..bcad11a787a5 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -636,6 +636,8 @@ enum devlink_attr {
DEVLINK_ATTR_RATE_TC_BWS, /* nested */
+ DEVLINK_ATTR_HEALTH_REPORTER_BURST_PERIOD, /* u64 */
+
/* Add new attributes above here, update the spec in
* Documentation/netlink/specs/devlink.yaml and re-generate
* net/devlink/netlink_gen.c.
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 9e9afdd1238a..8bd5ea5469d9 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -2380,6 +2380,7 @@ enum {
#define RXH_L4_B_0_1 (1 << 6) /* src port in case of TCP/UDP/SCTP */
#define RXH_L4_B_2_3 (1 << 7) /* dst port in case of TCP/UDP/SCTP */
#define RXH_GTP_TEID (1 << 8) /* teid in case of GTP */
+#define RXH_IP6_FL (1 << 9) /* IPv6 flow label */
#define RXH_DISCARD (1 << 31)
#define RX_CLS_FLOW_DISC 0xffffffffffffffffULL
diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h
index b87df1b485c2..9a28f7d9a334 100644
--- a/include/uapi/linux/stddef.h
+++ b/include/uapi/linux/stddef.h
@@ -2,7 +2,9 @@
#ifndef _UAPI_LINUX_STDDEF_H
#define _UAPI_LINUX_STDDEF_H
+#ifdef __KERNEL__
#include <linux/compiler_types.h>
+#endif
#ifndef __always_inline
#define __always_inline inline
diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index e5ff49f3425e..319eddfd30e0 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -12,6 +12,7 @@
#include <net/page_pool/helpers.h>
#include <net/page_pool/memory_provider.h>
#include <net/netlink.h>
+#include <net/netdev_queues.h>
#include <net/netdev_rx_queue.h>
#include <net/tcp.h>
#include <net/rps.h>
@@ -599,7 +600,7 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
goto err;
}
- ifq->dev = ifq->netdev->dev.parent;
+ ifq->dev = netdev_queue_get_dma_dev(ifq->netdev, ifq->if_rxq);
if (!ifq->dev) {
ret = -EOPNOTSUPP;
goto err;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 6b4877e85a68..cdffd74ddbe6 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1780,6 +1780,9 @@ static int __bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr_kern *s
return __bpf_skb_load_bytes(src->data, src->offset + offset, dst, len);
case BPF_DYNPTR_TYPE_XDP:
return __bpf_xdp_load_bytes(src->data, src->offset + offset, dst, len);
+ case BPF_DYNPTR_TYPE_SKB_META:
+ memmove(dst, bpf_skb_meta_pointer(src->data, src->offset + offset), len);
+ return 0;
default:
WARN_ONCE(true, "bpf_dynptr_read: unknown dynptr type %d\n", type);
return -EFAULT;
@@ -1836,6 +1839,11 @@ int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset, void *src,
if (flags)
return -EINVAL;
return __bpf_xdp_store_bytes(dst->data, dst->offset + offset, src, len);
+ case BPF_DYNPTR_TYPE_SKB_META:
+ if (flags)
+ return -EINVAL;
+ memmove(bpf_skb_meta_pointer(dst->data, dst->offset + offset), src, len);
+ return 0;
default:
WARN_ONCE(true, "bpf_dynptr_write: unknown dynptr type %d\n", type);
return -EFAULT;
@@ -1882,6 +1890,7 @@ BPF_CALL_3(bpf_dynptr_data, const struct bpf_dynptr_kern *, ptr, u32, offset, u3
return (unsigned long)(ptr->data + ptr->offset + offset);
case BPF_DYNPTR_TYPE_SKB:
case BPF_DYNPTR_TYPE_XDP:
+ case BPF_DYNPTR_TYPE_SKB_META:
/* skb and xdp dynptrs should use bpf_dynptr_slice / bpf_dynptr_slice_rdwr */
return 0;
default:
@@ -2710,6 +2719,8 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr *p, u32 offset,
bpf_xdp_copy_buf(ptr->data, ptr->offset + offset, buffer__opt, len, false);
return buffer__opt;
}
+ case BPF_DYNPTR_TYPE_SKB_META:
+ return bpf_skb_meta_pointer(ptr->data, ptr->offset + offset);
default:
WARN_ONCE(true, "unknown dynptr type %d\n", type);
return NULL;
diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c
index 38050f4ee400..e4983c1303e7 100644
--- a/kernel/bpf/log.c
+++ b/kernel/bpf/log.c
@@ -498,6 +498,8 @@ const char *dynptr_type_str(enum bpf_dynptr_type type)
return "skb";
case BPF_DYNPTR_TYPE_XDP:
return "xdp";
+ case BPF_DYNPTR_TYPE_SKB_META:
+ return "skb_meta";
case BPF_DYNPTR_TYPE_INVALID:
return "<invalid>";
default:
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c4f69a9e9af6..5964bed40ffb 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -674,6 +674,8 @@ static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type)
return BPF_DYNPTR_TYPE_SKB;
case DYNPTR_TYPE_XDP:
return BPF_DYNPTR_TYPE_XDP;
+ case DYNPTR_TYPE_SKB_META:
+ return BPF_DYNPTR_TYPE_SKB_META;
default:
return BPF_DYNPTR_TYPE_INVALID;
}
@@ -690,6 +692,8 @@ static enum bpf_type_flag get_dynptr_type_flag(enum bpf_dynptr_type type)
return DYNPTR_TYPE_SKB;
case BPF_DYNPTR_TYPE_XDP:
return DYNPTR_TYPE_XDP;
+ case BPF_DYNPTR_TYPE_SKB_META:
+ return DYNPTR_TYPE_SKB_META;
default:
return 0;
}
@@ -2274,7 +2278,8 @@ static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
static bool reg_is_dynptr_slice_pkt(const struct bpf_reg_state *reg)
{
return base_type(reg->type) == PTR_TO_MEM &&
- (reg->type & DYNPTR_TYPE_SKB || reg->type & DYNPTR_TYPE_XDP);
+ (reg->type &
+ (DYNPTR_TYPE_SKB | DYNPTR_TYPE_XDP | DYNPTR_TYPE_SKB_META));
}
/* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
@@ -11641,7 +11646,8 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
if (dynptr_type == BPF_DYNPTR_TYPE_INVALID)
return -EFAULT;
- if (dynptr_type == BPF_DYNPTR_TYPE_SKB)
+ if (dynptr_type == BPF_DYNPTR_TYPE_SKB ||
+ dynptr_type == BPF_DYNPTR_TYPE_SKB_META)
/* this will trigger clear_all_pkt_pointers(), which will
* invalidate all dynptr slices associated with the skb
*/
@@ -12228,6 +12234,7 @@ enum special_kfunc_type {
KF_bpf_rbtree_right,
KF_bpf_dynptr_from_skb,
KF_bpf_dynptr_from_xdp,
+ KF_bpf_dynptr_from_skb_meta,
KF_bpf_dynptr_slice,
KF_bpf_dynptr_slice_rdwr,
KF_bpf_dynptr_clone,
@@ -12277,9 +12284,11 @@ BTF_ID(func, bpf_rbtree_right)
#ifdef CONFIG_NET
BTF_ID(func, bpf_dynptr_from_skb)
BTF_ID(func, bpf_dynptr_from_xdp)
+BTF_ID(func, bpf_dynptr_from_skb_meta)
#else
BTF_ID_UNUSED
BTF_ID_UNUSED
+BTF_ID_UNUSED
#endif
BTF_ID(func, bpf_dynptr_slice)
BTF_ID(func, bpf_dynptr_slice_rdwr)
@@ -13253,6 +13262,8 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
dynptr_arg_type |= DYNPTR_TYPE_SKB;
} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_xdp]) {
dynptr_arg_type |= DYNPTR_TYPE_XDP;
+ } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb_meta]) {
+ dynptr_arg_type |= DYNPTR_TYPE_SKB_META;
} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_clone] &&
(dynptr_arg_type & MEM_UNINIT)) {
enum bpf_dynptr_type parent_type = meta->initialized_dynptr.type;
diff --git a/kernel/time/time.c b/kernel/time/time.c
index 1b69caa87480..0ba8e3c50d62 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -858,6 +858,7 @@ struct timespec64 timespec64_add_safe(const struct timespec64 lhs,
return res;
}
+EXPORT_SYMBOL_GPL(timespec64_add_safe);
/**
* get_timespec64 - get user's time value into kernel space
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 8dd7fbed5a94..df3e9205c9e6 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5020,22 +5020,42 @@ out:
void mem_cgroup_sk_free(struct sock *sk)
{
- if (sk->sk_memcg)
- css_put(&sk->sk_memcg->css);
+ struct mem_cgroup *memcg = mem_cgroup_from_sk(sk);
+
+ if (memcg)
+ css_put(&memcg->css);
+}
+
+void mem_cgroup_sk_inherit(const struct sock *sk, struct sock *newsk)
+{
+ struct mem_cgroup *memcg;
+
+ if (sk->sk_memcg == newsk->sk_memcg)
+ return;
+
+ mem_cgroup_sk_free(newsk);
+
+ memcg = mem_cgroup_from_sk(sk);
+ if (memcg)
+ css_get(&memcg->css);
+
+ newsk->sk_memcg = sk->sk_memcg;
}
/**
- * mem_cgroup_charge_skmem - charge socket memory
- * @memcg: memcg to charge
+ * mem_cgroup_sk_charge - charge socket memory
+ * @sk: socket in memcg to charge
* @nr_pages: number of pages to charge
* @gfp_mask: reclaim mode
*
* Charges @nr_pages to @memcg. Returns %true if the charge fit within
* @memcg's configured limit, %false if it doesn't.
*/
-bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages,
- gfp_t gfp_mask)
+bool mem_cgroup_sk_charge(const struct sock *sk, unsigned int nr_pages,
+ gfp_t gfp_mask)
{
+ struct mem_cgroup *memcg = mem_cgroup_from_sk(sk);
+
if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
return memcg1_charge_skmem(memcg, nr_pages, gfp_mask);
@@ -5048,12 +5068,14 @@ bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages,
}
/**
- * mem_cgroup_uncharge_skmem - uncharge socket memory
- * @memcg: memcg to uncharge
+ * mem_cgroup_sk_uncharge - uncharge socket memory
+ * @sk: socket in memcg to uncharge
* @nr_pages: number of pages to uncharge
*/
-void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
+void mem_cgroup_sk_uncharge(const struct sock *sk, unsigned int nr_pages)
{
+ struct mem_cgroup *memcg = mem_cgroup_from_sk(sk);
+
if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) {
memcg1_uncharge_skmem(memcg, nr_pages);
return;
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 29097e984b4f..870bdf2e082c 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -148,7 +148,8 @@ void br_forward(const struct net_bridge_port *to,
goto out;
/* redirect to backup link if the destination port is down */
- if (rcu_access_pointer(to->backup_port) && !netif_carrier_ok(to->dev)) {
+ if (rcu_access_pointer(to->backup_port) &&
+ (!netif_carrier_ok(to->dev) || !netif_running(to->dev))) {
struct net_bridge_port *backup_port;
backup_port = rcu_dereference(to->backup_port);
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 8ce145938b02..22d12e545966 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -4049,8 +4049,7 @@ int br_multicast_rcv(struct net_bridge_mcast **brmctx,
}
static void br_multicast_query_expired(struct net_bridge_mcast *brmctx,
- struct bridge_mcast_own_query *query,
- struct bridge_mcast_querier *querier)
+ struct bridge_mcast_own_query *query)
{
spin_lock(&brmctx->br->multicast_lock);
if (br_multicast_ctx_vlan_disabled(brmctx))
@@ -4069,8 +4068,7 @@ static void br_ip4_multicast_query_expired(struct timer_list *t)
struct net_bridge_mcast *brmctx = timer_container_of(brmctx, t,
ip4_own_query.timer);
- br_multicast_query_expired(brmctx, &brmctx->ip4_own_query,
- &brmctx->ip4_querier);
+ br_multicast_query_expired(brmctx, &brmctx->ip4_own_query);
}
#if IS_ENABLED(CONFIG_IPV6)
@@ -4079,8 +4077,7 @@ static void br_ip6_multicast_query_expired(struct timer_list *t)
struct net_bridge_mcast *brmctx = timer_container_of(brmctx, t,
ip6_own_query.timer);
- br_multicast_query_expired(brmctx, &brmctx->ip6_own_query,
- &brmctx->ip6_querier);
+ br_multicast_query_expired(brmctx, &brmctx->ip6_own_query);
}
#endif
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 3e67d4aff419..5697e3949a36 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -920,8 +920,8 @@ static int translate_table(struct net *net, const char *name,
* if an error occurs
*/
newinfo->chainstack =
- vmalloc(array_size(nr_cpu_ids,
- sizeof(*(newinfo->chainstack))));
+ vmalloc_array(nr_cpu_ids,
+ sizeof(*(newinfo->chainstack)));
if (!newinfo->chainstack)
return -ENOMEM;
for_each_possible_cpu(i) {
@@ -938,7 +938,7 @@ static int translate_table(struct net *net, const char *name,
}
}
- cl_s = vmalloc(array_size(udc_cnt, sizeof(*cl_s)));
+ cl_s = vmalloc_array(udc_cnt, sizeof(*cl_s));
if (!cl_s)
return -ENOMEM;
i = 0; /* the i'th udc */
@@ -1018,8 +1018,8 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl,
* the check on the size is done later, when we have the lock
*/
if (repl->num_counters) {
- unsigned long size = repl->num_counters * sizeof(*counterstmp);
- counterstmp = vmalloc(size);
+ counterstmp = vmalloc_array(repl->num_counters,
+ sizeof(*counterstmp));
if (!counterstmp)
return -ENOMEM;
}
@@ -1386,7 +1386,7 @@ static int do_update_counters(struct net *net, const char *name,
if (num_counters == 0)
return -EINVAL;
- tmp = vmalloc(array_size(num_counters, sizeof(*tmp)));
+ tmp = vmalloc_array(num_counters, sizeof(*tmp));
if (!tmp)
return -ENOMEM;
@@ -1526,7 +1526,7 @@ static int copy_counters_to_user(struct ebt_table *t,
if (num_counters != nentries)
return -EINVAL;
- counterstmp = vmalloc(array_size(nentries, sizeof(*counterstmp)));
+ counterstmp = vmalloc_array(nentries, sizeof(*counterstmp));
if (!counterstmp)
return -ENOMEM;
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index 06b604cf9d58..2aa1e7d46eb2 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -257,9 +257,7 @@ int cfctrl_linkup_request(struct cflayer *layer,
cfpkt_add_body(pkt, &tmp16, 2);
tmp16 = cpu_to_le16(param->u.utility.fifosize_bufs);
cfpkt_add_body(pkt, &tmp16, 2);
- memset(utility_name, 0, sizeof(utility_name));
- strscpy(utility_name, param->u.utility.name,
- UTILITY_NAME_LENGTH);
+ strscpy_pad(utility_name, param->u.utility.name);
cfpkt_add_body(pkt, utility_name, UTILITY_NAME_LENGTH);
tmp8 = param->u.utility.paramlen;
cfpkt_add_body(pkt, &tmp8, 1);
diff --git a/net/core/Makefile b/net/core/Makefile
index b2a76ce33932..9ef2099c5426 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_NETDEV_ADDR_LIST_TEST) += dev_addr_lists_test.o
obj-y += net-sysfs.o
obj-y += hotdata.o
obj-y += netdev_rx_queue.o
+obj-y += netdev_queues.o
obj-$(CONFIG_PAGE_POOL) += page_pool.o page_pool_user.o
obj-$(CONFIG_PROC_FS) += net-procfs.o
obj-$(CONFIG_NET_PKTGEN) += pktgen.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index f474b9b120f9..cb4b9ef2e4e3 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -345,7 +345,7 @@ int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue,
spin_unlock_bh(&sk_queue->lock);
}
- atomic_inc(&sk->sk_drops);
+ sk_drops_inc(sk);
return err;
}
EXPORT_SYMBOL(__sk_queue_drop_skb);
diff --git a/net/core/dev.c b/net/core/dev.c
index 93a25d87b86b..1d1650d9ecff 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4849,9 +4849,40 @@ static u32 rfs_slot(u32 hash, const struct rps_dev_flow_table *flow_table)
return hash_32(hash, flow_table->log);
}
+#ifdef CONFIG_RFS_ACCEL
+/**
+ * rps_flow_is_active - check whether the flow is recently active.
+ * @rflow: Specific flow to check activity.
+ * @flow_table: per-queue flowtable that @rflow belongs to.
+ * @cpu: CPU saved in @rflow.
+ *
+ * If the CPU has processed many packets since the flow's last activity
+ * (beyond 10 times the table size), the flow is considered stale.
+ *
+ * Return: true if flow was recently active.
+ */
+static bool rps_flow_is_active(struct rps_dev_flow *rflow,
+ struct rps_dev_flow_table *flow_table,
+ unsigned int cpu)
+{
+ unsigned int flow_last_active;
+ unsigned int sd_input_head;
+
+ if (cpu >= nr_cpu_ids)
+ return false;
+
+ sd_input_head = READ_ONCE(per_cpu(softnet_data, cpu).input_queue_head);
+ flow_last_active = READ_ONCE(rflow->last_qtail);
+
+ return (int)(sd_input_head - flow_last_active) <
+ (int)(10 << flow_table->log);
+}
+#endif
+
static struct rps_dev_flow *
set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
- struct rps_dev_flow *rflow, u16 next_cpu)
+ struct rps_dev_flow *rflow, u16 next_cpu, u32 hash,
+ u32 flow_id)
{
if (next_cpu < nr_cpu_ids) {
u32 head;
@@ -4859,8 +4890,9 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
struct netdev_rx_queue *rxqueue;
struct rps_dev_flow_table *flow_table;
struct rps_dev_flow *old_rflow;
+ struct rps_dev_flow *tmp_rflow;
+ unsigned int tmp_cpu;
u16 rxq_index;
- u32 flow_id;
int rc;
/* Should we steer this flow to a different hardware queue? */
@@ -4875,14 +4907,29 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
flow_table = rcu_dereference(rxqueue->rps_flow_table);
if (!flow_table)
goto out;
- flow_id = rfs_slot(skb_get_hash(skb), flow_table);
+
+ tmp_rflow = &flow_table->flows[flow_id];
+ tmp_cpu = READ_ONCE(tmp_rflow->cpu);
+
+ if (READ_ONCE(tmp_rflow->filter) != RPS_NO_FILTER) {
+ if (rps_flow_is_active(tmp_rflow, flow_table,
+ tmp_cpu)) {
+ if (hash != READ_ONCE(tmp_rflow->hash) ||
+ next_cpu == tmp_cpu)
+ goto out;
+ }
+ }
+
rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
rxq_index, flow_id);
if (rc < 0)
goto out;
+
old_rflow = rflow;
- rflow = &flow_table->flows[flow_id];
+ rflow = tmp_rflow;
WRITE_ONCE(rflow->filter, rc);
+ WRITE_ONCE(rflow->hash, hash);
+
if (old_rflow->filter == rc)
WRITE_ONCE(old_rflow->filter, RPS_NO_FILTER);
out:
@@ -4908,6 +4955,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
struct rps_dev_flow_table *flow_table;
struct rps_map *map;
int cpu = -1;
+ u32 flow_id;
u32 tcpu;
u32 hash;
@@ -4954,7 +5002,8 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
/* OK, now we know there is a match,
* we can look at the local (per receive queue) flow table
*/
- rflow = &flow_table->flows[rfs_slot(hash, flow_table)];
+ flow_id = rfs_slot(hash, flow_table);
+ rflow = &flow_table->flows[flow_id];
tcpu = rflow->cpu;
/*
@@ -4973,7 +5022,8 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
((int)(READ_ONCE(per_cpu(softnet_data, tcpu).input_queue_head) -
rflow->last_qtail)) >= 0)) {
tcpu = next_cpu;
- rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
+ rflow = set_rps_cpu(dev, skb, rflow, next_cpu, hash,
+ flow_id);
}
if (tcpu < nr_cpu_ids && cpu_online(tcpu)) {
@@ -5017,17 +5067,16 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
struct rps_dev_flow_table *flow_table;
struct rps_dev_flow *rflow;
bool expire = true;
- unsigned int cpu;
rcu_read_lock();
flow_table = rcu_dereference(rxqueue->rps_flow_table);
if (flow_table && flow_id < (1UL << flow_table->log)) {
+ unsigned int cpu;
+
rflow = &flow_table->flows[flow_id];
cpu = READ_ONCE(rflow->cpu);
- if (READ_ONCE(rflow->filter) == filter_id && cpu < nr_cpu_ids &&
- ((int)(READ_ONCE(per_cpu(softnet_data, cpu).input_queue_head) -
- READ_ONCE(rflow->last_qtail)) <
- (int)(10 << flow_table->log)))
+ if (READ_ONCE(rflow->filter) == filter_id &&
+ rps_flow_is_active(rflow, flow_table, cpu))
expire = false;
}
rcu_read_unlock();
diff --git a/net/core/devmem.c b/net/core/devmem.c
index 24c591ab38ae..d9de31a6cc7f 100644
--- a/net/core/devmem.c
+++ b/net/core/devmem.c
@@ -176,6 +176,7 @@ err_close_rxq:
struct net_devmem_dmabuf_binding *
net_devmem_bind_dmabuf(struct net_device *dev,
+ struct device *dma_dev,
enum dma_data_direction direction,
unsigned int dmabuf_fd, struct netdev_nl_sock *priv,
struct netlink_ext_ack *extack)
@@ -188,6 +189,11 @@ net_devmem_bind_dmabuf(struct net_device *dev,
unsigned long virtual;
int err;
+ if (!dma_dev) {
+ NL_SET_ERR_MSG(extack, "Device doesn't support DMA");
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
dmabuf = dma_buf_get(dmabuf_fd);
if (IS_ERR(dmabuf))
return ERR_CAST(dmabuf);
@@ -209,7 +215,7 @@ net_devmem_bind_dmabuf(struct net_device *dev,
binding->dmabuf = dmabuf;
binding->direction = direction;
- binding->attachment = dma_buf_attach(binding->dmabuf, dev->dev.parent);
+ binding->attachment = dma_buf_attach(binding->dmabuf, dma_dev);
if (IS_ERR(binding->attachment)) {
err = PTR_ERR(binding->attachment);
NL_SET_ERR_MSG(extack, "Failed to bind dmabuf to device");
diff --git a/net/core/devmem.h b/net/core/devmem.h
index 41cd6e1c9141..101150d761af 100644
--- a/net/core/devmem.h
+++ b/net/core/devmem.h
@@ -85,6 +85,7 @@ struct dmabuf_genpool_chunk_owner {
void __net_devmem_dmabuf_binding_free(struct work_struct *wq);
struct net_devmem_dmabuf_binding *
net_devmem_bind_dmabuf(struct net_device *dev,
+ struct device *dma_dev,
enum dma_data_direction direction,
unsigned int dmabuf_fd, struct netdev_nl_sock *priv,
struct netlink_ext_ack *extack);
@@ -170,6 +171,7 @@ static inline void net_devmem_put_net_iov(struct net_iov *niov)
static inline struct net_devmem_dmabuf_binding *
net_devmem_bind_dmabuf(struct net_device *dev,
+ struct device *dma_dev,
enum dma_data_direction direction,
unsigned int dmabuf_fd,
struct netdev_nl_sock *priv,
diff --git a/net/core/dst.c b/net/core/dst.c
index e2de8b68c41d..e9d35f49c9e7 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -150,7 +150,7 @@ void dst_dev_put(struct dst_entry *dst)
dst->ops->ifdown(dst, dev);
WRITE_ONCE(dst->input, dst_discard);
WRITE_ONCE(dst->output, dst_discard_out);
- WRITE_ONCE(dst->dev, blackhole_netdev);
+ rcu_assign_pointer(dst->dev_rcu, blackhole_netdev);
netdev_ref_replace(dev, blackhole_netdev, &dst->dev_tracker,
GFP_ATOMIC);
}
diff --git a/net/core/filter.c b/net/core/filter.c
index da391e2b0788..b005363f482c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2373,7 +2373,7 @@ static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev,
struct flowi4 fl4 = {
.flowi4_flags = FLOWI_FLAG_ANYSRC,
.flowi4_mark = skb->mark,
- .flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(ip4h)),
+ .flowi4_dscp = ip4h_dscp(ip4h),
.flowi4_oif = dev->ifindex,
.flowi4_proto = ip4h->protocol,
.daddr = ip4h->daddr,
@@ -6020,7 +6020,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
fl4.flowi4_iif = params->ifindex;
fl4.flowi4_oif = 0;
}
- fl4.flowi4_tos = params->tos & INET_DSCP_MASK;
+ fl4.flowi4_dscp = inet_dsfield_to_dscp(params->tos);
fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
fl4.flowi4_flags = 0;
@@ -6767,7 +6767,6 @@ static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = {
static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
int dif, int sdif, u8 family, u8 proto)
{
- struct inet_hashinfo *hinfo = net->ipv4.tcp_death_row.hashinfo;
bool refcounted = false;
struct sock *sk = NULL;
@@ -6776,7 +6775,7 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
__be32 dst4 = tuple->ipv4.daddr;
if (proto == IPPROTO_TCP)
- sk = __inet_lookup(net, hinfo, NULL, 0,
+ sk = __inet_lookup(net, NULL, 0,
src4, tuple->ipv4.sport,
dst4, tuple->ipv4.dport,
dif, sdif, &refcounted);
@@ -6790,7 +6789,7 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
struct in6_addr *dst6 = (struct in6_addr *)&tuple->ipv6.daddr;
if (proto == IPPROTO_TCP)
- sk = __inet6_lookup(net, hinfo, NULL, 0,
+ sk = __inet6_lookup(net, NULL, 0,
src6, tuple->ipv6.sport,
dst6, ntohs(tuple->ipv6.dport),
dif, sdif, &refcounted);
@@ -11990,6 +11989,16 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return func;
}
+/**
+ * bpf_skb_meta_pointer() - Gets a mutable pointer within the skb metadata area.
+ * @skb: socket buffer carrying the metadata
+ * @offset: offset into the metadata area, must be <= skb_metadata_len()
+ */
+void *bpf_skb_meta_pointer(struct sk_buff *skb, u32 offset)
+{
+ return skb_metadata_end(skb) - skb_metadata_len(skb) + offset;
+}
+
__bpf_kfunc_start_defs();
__bpf_kfunc int bpf_dynptr_from_skb(struct __sk_buff *s, u64 flags,
struct bpf_dynptr *ptr__uninit)
@@ -12007,6 +12016,42 @@ __bpf_kfunc int bpf_dynptr_from_skb(struct __sk_buff *s, u64 flags,
return 0;
}
+/**
+ * bpf_dynptr_from_skb_meta() - Initialize a dynptr to the skb metadata area.
+ * @skb_: socket buffer carrying the metadata
+ * @flags: future use, must be zero
+ * @ptr__uninit: dynptr to initialize
+ *
+ * Set up a dynptr for access to the metadata area earlier allocated from the
+ * XDP context with bpf_xdp_adjust_meta(). Serves as an alternative to
+ * &__sk_buff->data_meta.
+ *
+ * If passed @skb_ is a clone which shares the data with the original, the
+ * dynptr will be read-only. This limitation may be lifted in the future.
+ *
+ * Return:
+ * * %0 - dynptr ready to use
+ * * %-EINVAL - invalid flags, dynptr set to null
+ */
+__bpf_kfunc int bpf_dynptr_from_skb_meta(struct __sk_buff *skb_, u64 flags,
+ struct bpf_dynptr *ptr__uninit)
+{
+ struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)ptr__uninit;
+ struct sk_buff *skb = (struct sk_buff *)skb_;
+
+ if (flags) {
+ bpf_dynptr_set_null(ptr);
+ return -EINVAL;
+ }
+
+ bpf_dynptr_init(ptr, skb, BPF_DYNPTR_TYPE_SKB_META, 0, skb_metadata_len(skb));
+
+ if (skb_cloned(skb))
+ bpf_dynptr_set_rdonly(ptr);
+
+ return 0;
+}
+
__bpf_kfunc int bpf_dynptr_from_xdp(struct xdp_md *x, u64 flags,
struct bpf_dynptr *ptr__uninit)
{
@@ -12181,6 +12226,10 @@ BTF_KFUNCS_START(bpf_kfunc_check_set_skb)
BTF_ID_FLAGS(func, bpf_dynptr_from_skb, KF_TRUSTED_ARGS)
BTF_KFUNCS_END(bpf_kfunc_check_set_skb)
+BTF_KFUNCS_START(bpf_kfunc_check_set_skb_meta)
+BTF_ID_FLAGS(func, bpf_dynptr_from_skb_meta, KF_TRUSTED_ARGS)
+BTF_KFUNCS_END(bpf_kfunc_check_set_skb_meta)
+
BTF_KFUNCS_START(bpf_kfunc_check_set_xdp)
BTF_ID_FLAGS(func, bpf_dynptr_from_xdp)
BTF_KFUNCS_END(bpf_kfunc_check_set_xdp)
@@ -12202,6 +12251,11 @@ static const struct btf_kfunc_id_set bpf_kfunc_set_skb = {
.set = &bpf_kfunc_check_set_skb,
};
+static const struct btf_kfunc_id_set bpf_kfunc_set_skb_meta = {
+ .owner = THIS_MODULE,
+ .set = &bpf_kfunc_check_set_skb_meta,
+};
+
static const struct btf_kfunc_id_set bpf_kfunc_set_xdp = {
.owner = THIS_MODULE,
.set = &bpf_kfunc_check_set_xdp,
@@ -12237,6 +12291,8 @@ static int __init bpf_kfunc_init(void)
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_SEG6LOCAL, &bpf_kfunc_set_skb);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_NETFILTER, &bpf_kfunc_set_skb);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_kfunc_set_skb);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_kfunc_set_skb_meta);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_ACT, &bpf_kfunc_set_skb_meta);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
&bpf_kfunc_set_sock_addr);
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index ae74634310a3..9f40be0c3e71 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -8,12 +8,12 @@
#include <linux/skbuff.h>
#include <linux/types.h>
#include <linux/bpf.h>
+#include <net/flow.h>
#include <net/lwtunnel.h>
#include <net/gre.h>
#include <net/ip.h>
#include <net/ip6_route.h>
#include <net/ipv6_stubs.h>
-#include <net/inet_dscp.h>
struct bpf_lwt_prog {
struct bpf_prog *prog;
@@ -209,7 +209,7 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
fl4.flowi4_oif = oif;
fl4.flowi4_mark = skb->mark;
fl4.flowi4_uid = sock_net_uid(net, sk);
- fl4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph));
+ fl4.flowi4_dscp = ip4h_dscp(iph);
fl4.flowi4_flags = FLOWI_FLAG_ANYSRC;
fl4.flowi4_proto = iph->protocol;
fl4.daddr = iph->daddr;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index c28cd6665444..5ea9f64adce3 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1120,8 +1120,10 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
return -ENOMEM;
table->log = ilog2(mask) + 1;
- for (count = 0; count <= mask; count++)
+ for (count = 0; count <= mask; count++) {
table->flows[count].cpu = RPS_NO_CPU;
+ table->flows[count].filter = RPS_NO_FILTER;
+ }
} else {
table = NULL;
}
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index 6314eb7bdf69..470fabbeacd9 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -869,16 +869,79 @@ int netdev_nl_qstats_get_dumpit(struct sk_buff *skb,
return err;
}
-int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info)
+static int netdev_nl_read_rxq_bitmap(struct genl_info *info,
+ u32 rxq_bitmap_len,
+ unsigned long *rxq_bitmap)
{
+ const int maxtype = ARRAY_SIZE(netdev_queue_id_nl_policy) - 1;
struct nlattr *tb[ARRAY_SIZE(netdev_queue_id_nl_policy)];
+ struct nlattr *attr;
+ int rem, err = 0;
+ u32 rxq_idx;
+
+ nla_for_each_attr_type(attr, NETDEV_A_DMABUF_QUEUES,
+ genlmsg_data(info->genlhdr),
+ genlmsg_len(info->genlhdr), rem) {
+ err = nla_parse_nested(tb, maxtype, attr,
+ netdev_queue_id_nl_policy, info->extack);
+ if (err < 0)
+ return err;
+
+ if (NL_REQ_ATTR_CHECK(info->extack, attr, tb, NETDEV_A_QUEUE_ID) ||
+ NL_REQ_ATTR_CHECK(info->extack, attr, tb, NETDEV_A_QUEUE_TYPE))
+ return -EINVAL;
+
+ if (nla_get_u32(tb[NETDEV_A_QUEUE_TYPE]) != NETDEV_QUEUE_TYPE_RX) {
+ NL_SET_BAD_ATTR(info->extack, tb[NETDEV_A_QUEUE_TYPE]);
+ return -EINVAL;
+ }
+
+ rxq_idx = nla_get_u32(tb[NETDEV_A_QUEUE_ID]);
+ if (rxq_idx >= rxq_bitmap_len) {
+ NL_SET_BAD_ATTR(info->extack, tb[NETDEV_A_QUEUE_ID]);
+ return -EINVAL;
+ }
+
+ bitmap_set(rxq_bitmap, rxq_idx, 1);
+ }
+
+ return 0;
+}
+
+static struct device *
+netdev_nl_get_dma_dev(struct net_device *netdev, unsigned long *rxq_bitmap,
+ struct netlink_ext_ack *extack)
+{
+ struct device *dma_dev = NULL;
+ u32 rxq_idx, prev_rxq_idx;
+
+ for_each_set_bit(rxq_idx, rxq_bitmap, netdev->real_num_rx_queues) {
+ struct device *rxq_dma_dev;
+
+ rxq_dma_dev = netdev_queue_get_dma_dev(netdev, rxq_idx);
+ if (dma_dev && rxq_dma_dev != dma_dev) {
+ NL_SET_ERR_MSG_FMT(extack, "DMA device mismatch between queue %u and %u (multi-PF device?)",
+ rxq_idx, prev_rxq_idx);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ dma_dev = rxq_dma_dev;
+ prev_rxq_idx = rxq_idx;
+ }
+
+ return dma_dev;
+}
+
+int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info)
+{
struct net_devmem_dmabuf_binding *binding;
u32 ifindex, dmabuf_fd, rxq_idx;
struct netdev_nl_sock *priv;
struct net_device *netdev;
+ unsigned long *rxq_bitmap;
+ struct device *dma_dev;
struct sk_buff *rsp;
- struct nlattr *attr;
- int rem, err = 0;
+ int err = 0;
void *hdr;
if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_DEV_IFINDEX) ||
@@ -921,36 +984,31 @@ int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info)
goto err_unlock;
}
- binding = net_devmem_bind_dmabuf(netdev, DMA_FROM_DEVICE, dmabuf_fd,
- priv, info->extack);
- if (IS_ERR(binding)) {
- err = PTR_ERR(binding);
+ rxq_bitmap = bitmap_zalloc(netdev->real_num_rx_queues, GFP_KERNEL);
+ if (!rxq_bitmap) {
+ err = -ENOMEM;
goto err_unlock;
}
- nla_for_each_attr_type(attr, NETDEV_A_DMABUF_QUEUES,
- genlmsg_data(info->genlhdr),
- genlmsg_len(info->genlhdr), rem) {
- err = nla_parse_nested(
- tb, ARRAY_SIZE(netdev_queue_id_nl_policy) - 1, attr,
- netdev_queue_id_nl_policy, info->extack);
- if (err < 0)
- goto err_unbind;
-
- if (NL_REQ_ATTR_CHECK(info->extack, attr, tb, NETDEV_A_QUEUE_ID) ||
- NL_REQ_ATTR_CHECK(info->extack, attr, tb, NETDEV_A_QUEUE_TYPE)) {
- err = -EINVAL;
- goto err_unbind;
- }
+ err = netdev_nl_read_rxq_bitmap(info, netdev->real_num_rx_queues,
+ rxq_bitmap);
+ if (err)
+ goto err_rxq_bitmap;
- if (nla_get_u32(tb[NETDEV_A_QUEUE_TYPE]) != NETDEV_QUEUE_TYPE_RX) {
- NL_SET_BAD_ATTR(info->extack, tb[NETDEV_A_QUEUE_TYPE]);
- err = -EINVAL;
- goto err_unbind;
- }
+ dma_dev = netdev_nl_get_dma_dev(netdev, rxq_bitmap, info->extack);
+ if (IS_ERR(dma_dev)) {
+ err = PTR_ERR(dma_dev);
+ goto err_rxq_bitmap;
+ }
- rxq_idx = nla_get_u32(tb[NETDEV_A_QUEUE_ID]);
+ binding = net_devmem_bind_dmabuf(netdev, dma_dev, DMA_FROM_DEVICE,
+ dmabuf_fd, priv, info->extack);
+ if (IS_ERR(binding)) {
+ err = PTR_ERR(binding);
+ goto err_rxq_bitmap;
+ }
+ for_each_set_bit(rxq_idx, rxq_bitmap, netdev->real_num_rx_queues) {
err = net_devmem_bind_dmabuf_to_queue(netdev, rxq_idx, binding,
info->extack);
if (err)
@@ -964,6 +1022,8 @@ int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info)
if (err)
goto err_unbind;
+ bitmap_free(rxq_bitmap);
+
netdev_unlock(netdev);
mutex_unlock(&priv->lock);
@@ -972,6 +1032,8 @@ int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info)
err_unbind:
net_devmem_unbind_dmabuf(binding);
+err_rxq_bitmap:
+ bitmap_free(rxq_bitmap);
err_unlock:
netdev_unlock(netdev);
err_unlock_sock:
@@ -986,6 +1048,7 @@ int netdev_nl_bind_tx_doit(struct sk_buff *skb, struct genl_info *info)
struct net_devmem_dmabuf_binding *binding;
struct netdev_nl_sock *priv;
struct net_device *netdev;
+ struct device *dma_dev;
u32 ifindex, dmabuf_fd;
struct sk_buff *rsp;
int err = 0;
@@ -1032,8 +1095,9 @@ int netdev_nl_bind_tx_doit(struct sk_buff *skb, struct genl_info *info)
goto err_unlock_netdev;
}
- binding = net_devmem_bind_dmabuf(netdev, DMA_TO_DEVICE, dmabuf_fd, priv,
- info->extack);
+ dma_dev = netdev_queue_get_dma_dev(netdev, 0);
+ binding = net_devmem_bind_dmabuf(netdev, dma_dev, DMA_TO_DEVICE,
+ dmabuf_fd, priv, info->extack);
if (IS_ERR(binding)) {
err = PTR_ERR(binding);
goto err_unlock_netdev;
diff --git a/net/core/netdev_queues.c b/net/core/netdev_queues.c
new file mode 100644
index 000000000000..251f27a8307f
--- /dev/null
+++ b/net/core/netdev_queues.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <net/netdev_queues.h>
+
+/**
+ * netdev_queue_get_dma_dev() - get dma device for zero-copy operations
+ * @dev: net_device
+ * @idx: queue index
+ *
+ * Get dma device for zero-copy operations to be used for this queue.
+ * When such device is not available or valid, the function will return NULL.
+ *
+ * Return: Device or NULL on error
+ */
+struct device *netdev_queue_get_dma_dev(struct net_device *dev, int idx)
+{
+ const struct netdev_queue_mgmt_ops *queue_ops = dev->queue_mgmt_ops;
+ struct device *dma_dev;
+
+ if (queue_ops && queue_ops->ndo_queue_get_dma_dev)
+ dma_dev = queue_ops->ndo_queue_get_dma_dev(dev, idx);
+ else
+ dma_dev = dev->dev.parent;
+
+ return dma_dev && dma_dev->dma_mask ? dma_dev : NULL;
+}
+
diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c
index 3bf1151d8061..c7d9341b7630 100644
--- a/net/core/netdev_rx_queue.c
+++ b/net/core/netdev_rx_queue.c
@@ -9,6 +9,15 @@
#include "page_pool_priv.h"
+/* See also page_pool_is_unreadable() */
+bool netif_rxq_has_unreadable_mp(struct net_device *dev, int idx)
+{
+ struct netdev_rx_queue *rxq = __netif_get_rx_queue(dev, idx);
+
+ return !!rxq->mp_params.mp_ops;
+}
+EXPORT_SYMBOL(netif_rxq_has_unreadable_mp);
+
int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq_idx)
{
struct netdev_rx_queue *rxq = __netif_get_rx_queue(dev, rxq_idx);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 0ebe5461d4d9..d41b03fd1f63 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -114,6 +114,7 @@
#include <linux/sys.h>
#include <linux/types.h>
+#include <linux/minmax.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/kernel.h>
@@ -2841,8 +2842,7 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb,
}
i = 0;
- frag_len = (datalen/frags) < PAGE_SIZE ?
- (datalen/frags) : PAGE_SIZE;
+ frag_len = min_t(int, datalen / frags, PAGE_SIZE);
while (datalen > 0) {
if (unlikely(!pkt_dev->page)) {
int node = numa_node_id();
@@ -2859,8 +2859,7 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb,
if (i == (frags - 1))
skb_frag_fill_page_desc(&skb_shinfo(skb)->frags[i],
pkt_dev->page, 0,
- (datalen < PAGE_SIZE ?
- datalen : PAGE_SIZE));
+ min(datalen, PAGE_SIZE));
else
skb_frag_fill_page_desc(&skb_shinfo(skb)->frags[i],
pkt_dev->page, 0, frag_len);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index ee0274417948..23b776cd9879 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3112,7 +3112,9 @@ static bool __splice_segment(struct page *page, unsigned int poff,
poff += flen;
plen -= flen;
*len -= flen;
- } while (*len && plen);
+ if (!*len)
+ return true;
+ } while (plen);
return false;
}
diff --git a/net/core/sock.c b/net/core/sock.c
index 158bddd23134..02f31f21b4af 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -281,12 +281,12 @@ static struct lock_class_key af_elock_keys[AF_MAX];
static struct lock_class_key af_kern_callback_keys[AF_MAX];
/* Run time adjustable parameters. */
-__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
+__u32 sysctl_wmem_max __read_mostly = 4 << 20;
EXPORT_SYMBOL(sysctl_wmem_max);
-__u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
+__u32 sysctl_rmem_max __read_mostly = 4 << 20;
EXPORT_SYMBOL(sysctl_rmem_max);
-__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
-__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
+__u32 sysctl_wmem_default __read_mostly = SK_WMEM_DEFAULT;
+__u32 sysctl_rmem_default __read_mostly = SK_RMEM_DEFAULT;
DEFINE_STATIC_KEY_FALSE(memalloc_socks_key);
EXPORT_SYMBOL_GPL(memalloc_socks_key);
@@ -491,13 +491,13 @@ int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
struct sk_buff_head *list = &sk->sk_receive_queue;
if (atomic_read(&sk->sk_rmem_alloc) >= READ_ONCE(sk->sk_rcvbuf)) {
- atomic_inc(&sk->sk_drops);
+ sk_drops_inc(sk);
trace_sock_rcvqueue_full(sk, skb);
return -ENOMEM;
}
if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
- atomic_inc(&sk->sk_drops);
+ sk_drops_inc(sk);
return -ENOBUFS;
}
@@ -562,7 +562,7 @@ int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
skb->dev = NULL;
if (sk_rcvqueues_full(sk, READ_ONCE(sk->sk_rcvbuf))) {
- atomic_inc(&sk->sk_drops);
+ sk_drops_inc(sk);
reason = SKB_DROP_REASON_SOCKET_RCVBUFF;
goto discard_and_relse;
}
@@ -585,7 +585,7 @@ int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
reason = SKB_DROP_REASON_PFMEMALLOC;
if (err == -ENOBUFS)
reason = SKB_DROP_REASON_SOCKET_BACKLOG;
- atomic_inc(&sk->sk_drops);
+ sk_drops_inc(sk);
goto discard_and_relse;
}
@@ -1032,7 +1032,7 @@ static int sock_reserve_memory(struct sock *sk, int bytes)
bool charged;
int pages;
- if (!mem_cgroup_sockets_enabled || !sk->sk_memcg || !sk_has_account(sk))
+ if (!mem_cgroup_sk_enabled(sk) || !sk_has_account(sk))
return -EOPNOTSUPP;
if (!bytes)
@@ -1041,8 +1041,8 @@ static int sock_reserve_memory(struct sock *sk, int bytes)
pages = sk_mem_pages(bytes);
/* pre-charge to memcg */
- charged = mem_cgroup_charge_skmem(sk->sk_memcg, pages,
- GFP_KERNEL | __GFP_RETRY_MAYFAIL);
+ charged = mem_cgroup_sk_charge(sk, pages,
+ GFP_KERNEL | __GFP_RETRY_MAYFAIL);
if (!charged)
return -ENOMEM;
@@ -1054,7 +1054,7 @@ static int sock_reserve_memory(struct sock *sk, int bytes)
*/
if (allocated > sk_prot_mem_limits(sk, 1)) {
sk_memory_allocated_sub(sk, pages);
- mem_cgroup_uncharge_skmem(sk->sk_memcg, pages);
+ mem_cgroup_sk_uncharge(sk, pages);
return -ENOMEM;
}
sk_forward_alloc_add(sk, pages << PAGE_SHIFT);
@@ -2505,15 +2505,18 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
newsk->sk_wmem_queued = 0;
newsk->sk_forward_alloc = 0;
newsk->sk_reserved_mem = 0;
- atomic_set(&newsk->sk_drops, 0);
+ DEBUG_NET_WARN_ON_ONCE(newsk->sk_drop_counters);
+ sk_drops_reset(newsk);
newsk->sk_send_head = NULL;
newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
atomic_set(&newsk->sk_zckey, 0);
sock_reset_flag(newsk, SOCK_DONE);
+#ifdef CONFIG_MEMCG
/* sk->sk_memcg will be populated at accept() time */
newsk->sk_memcg = NULL;
+#endif
cgroup_sk_clone(&newsk->sk_cgrp_data);
@@ -2584,7 +2587,7 @@ free:
}
EXPORT_SYMBOL_GPL(sk_clone_lock);
-static u32 sk_dst_gso_max_size(struct sock *sk, struct dst_entry *dst)
+static u32 sk_dst_gso_max_size(struct sock *sk, const struct net_device *dev)
{
bool is_ipv6 = false;
u32 max_size;
@@ -2594,8 +2597,8 @@ static u32 sk_dst_gso_max_size(struct sock *sk, struct dst_entry *dst)
!ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr));
#endif
/* pairs with the WRITE_ONCE() in netif_set_gso(_ipv4)_max_size() */
- max_size = is_ipv6 ? READ_ONCE(dst_dev(dst)->gso_max_size) :
- READ_ONCE(dst_dev(dst)->gso_ipv4_max_size);
+ max_size = is_ipv6 ? READ_ONCE(dev->gso_max_size) :
+ READ_ONCE(dev->gso_ipv4_max_size);
if (max_size > GSO_LEGACY_MAX_SIZE && !sk_is_tcp(sk))
max_size = GSO_LEGACY_MAX_SIZE;
@@ -2604,9 +2607,12 @@ static u32 sk_dst_gso_max_size(struct sock *sk, struct dst_entry *dst)
void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
{
+ const struct net_device *dev;
u32 max_segs = 1;
- sk->sk_route_caps = dst_dev(dst)->features;
+ rcu_read_lock();
+ dev = dst_dev_rcu(dst);
+ sk->sk_route_caps = dev->features;
if (sk_is_tcp(sk)) {
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -2622,13 +2628,14 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
} else {
sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
- sk->sk_gso_max_size = sk_dst_gso_max_size(sk, dst);
+ sk->sk_gso_max_size = sk_dst_gso_max_size(sk, dev);
/* pairs with the WRITE_ONCE() in netif_set_gso_max_segs() */
- max_segs = max_t(u32, READ_ONCE(dst_dev(dst)->gso_max_segs), 1);
+ max_segs = max_t(u32, READ_ONCE(dev->gso_max_segs), 1);
}
}
sk->sk_gso_max_segs = max_segs;
sk_dst_set(sk, dst);
+ rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(sk_setup_caps);
@@ -3241,16 +3248,16 @@ EXPORT_SYMBOL(sk_wait_data);
*/
int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
{
- struct mem_cgroup *memcg = mem_cgroup_sockets_enabled ? sk->sk_memcg : NULL;
+ bool memcg_enabled = false, charged = false;
struct proto *prot = sk->sk_prot;
- bool charged = true;
long allocated;
sk_memory_allocated_add(sk, amt);
allocated = sk_memory_allocated(sk);
- if (memcg) {
- charged = mem_cgroup_charge_skmem(memcg, amt, gfp_memcg_charge());
+ if (mem_cgroup_sk_enabled(sk)) {
+ memcg_enabled = true;
+ charged = mem_cgroup_sk_charge(sk, amt, gfp_memcg_charge());
if (!charged)
goto suppress_allocation;
}
@@ -3324,21 +3331,19 @@ suppress_allocation:
*/
if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) {
/* Force charge with __GFP_NOFAIL */
- if (memcg && !charged) {
- mem_cgroup_charge_skmem(memcg, amt,
- gfp_memcg_charge() | __GFP_NOFAIL);
- }
+ if (memcg_enabled && !charged)
+ mem_cgroup_sk_charge(sk, amt,
+ gfp_memcg_charge() | __GFP_NOFAIL);
return 1;
}
}
- if (kind == SK_MEM_SEND || (kind == SK_MEM_RECV && charged))
- trace_sock_exceed_buf_limit(sk, prot, allocated, kind);
+ trace_sock_exceed_buf_limit(sk, prot, allocated, kind);
sk_memory_allocated_sub(sk, amt);
- if (memcg && charged)
- mem_cgroup_uncharge_skmem(memcg, amt);
+ if (charged)
+ mem_cgroup_sk_uncharge(sk, amt);
return 0;
}
@@ -3376,8 +3381,8 @@ void __sk_mem_reduce_allocated(struct sock *sk, int amount)
{
sk_memory_allocated_sub(sk, amount);
- if (mem_cgroup_sockets_enabled && sk->sk_memcg)
- mem_cgroup_uncharge_skmem(sk->sk_memcg, amount);
+ if (mem_cgroup_sk_enabled(sk))
+ mem_cgroup_sk_uncharge(sk, amount);
if (sk_under_global_memory_pressure(sk) &&
(sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
@@ -3691,7 +3696,7 @@ void sock_init_data_uid(struct socket *sock, struct sock *sk, kuid_t uid)
*/
smp_wmb();
refcount_set(&sk->sk_refcnt, 1);
- atomic_set(&sk->sk_drops, 0);
+ sk_drops_reset(sk);
}
EXPORT_SYMBOL(sock_init_data_uid);
@@ -3951,7 +3956,7 @@ void sk_get_meminfo(const struct sock *sk, u32 *mem)
mem[SK_MEMINFO_WMEM_QUEUED] = READ_ONCE(sk->sk_wmem_queued);
mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len);
- mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
+ mem[SK_MEMINFO_DROPS] = sk_drops_read(sk);
}
#ifdef CONFIG_PROC_FS
@@ -4432,7 +4437,10 @@ static int __init sock_struct_check(void)
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rxtx, sk_err);
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rxtx, sk_socket);
+#ifdef CONFIG_MEMCG
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rxtx, sk_memcg);
+#endif
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rxtx, sk_drop_counters);
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_lock);
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_reserved_mem);
diff --git a/net/devlink/health.c b/net/devlink/health.c
index b3ce8ecbb7fb..136a67c36a20 100644
--- a/net/devlink/health.c
+++ b/net/devlink/health.c
@@ -60,6 +60,7 @@ struct devlink_health_reporter {
struct devlink_port *devlink_port;
struct devlink_fmsg *dump_fmsg;
u64 graceful_period;
+ u64 burst_period;
bool auto_recover;
bool auto_dump;
u8 health_state;
@@ -108,11 +109,14 @@ devlink_port_health_reporter_find_by_name(struct devlink_port *devlink_port,
static struct devlink_health_reporter *
__devlink_health_reporter_create(struct devlink *devlink,
const struct devlink_health_reporter_ops *ops,
- u64 graceful_period, void *priv)
+ void *priv)
{
struct devlink_health_reporter *reporter;
- if (WARN_ON(graceful_period && !ops->recover))
+ if (WARN_ON(ops->default_graceful_period && !ops->recover))
+ return ERR_PTR(-EINVAL);
+
+ if (WARN_ON(ops->default_burst_period && !ops->default_graceful_period))
return ERR_PTR(-EINVAL);
reporter = kzalloc(sizeof(*reporter), GFP_KERNEL);
@@ -122,7 +126,8 @@ __devlink_health_reporter_create(struct devlink *devlink,
reporter->priv = priv;
reporter->ops = ops;
reporter->devlink = devlink;
- reporter->graceful_period = graceful_period;
+ reporter->graceful_period = ops->default_graceful_period;
+ reporter->burst_period = ops->default_burst_period;
reporter->auto_recover = !!ops->recover;
reporter->auto_dump = !!ops->dump;
return reporter;
@@ -134,13 +139,12 @@ __devlink_health_reporter_create(struct devlink *devlink,
*
* @port: devlink_port to which health reports will relate
* @ops: devlink health reporter ops
- * @graceful_period: min time (in msec) between recovery attempts
* @priv: driver priv pointer
*/
struct devlink_health_reporter *
devl_port_health_reporter_create(struct devlink_port *port,
const struct devlink_health_reporter_ops *ops,
- u64 graceful_period, void *priv)
+ void *priv)
{
struct devlink_health_reporter *reporter;
@@ -150,8 +154,7 @@ devl_port_health_reporter_create(struct devlink_port *port,
ops->name))
return ERR_PTR(-EEXIST);
- reporter = __devlink_health_reporter_create(port->devlink, ops,
- graceful_period, priv);
+ reporter = __devlink_health_reporter_create(port->devlink, ops, priv);
if (IS_ERR(reporter))
return reporter;
@@ -164,14 +167,13 @@ EXPORT_SYMBOL_GPL(devl_port_health_reporter_create);
struct devlink_health_reporter *
devlink_port_health_reporter_create(struct devlink_port *port,
const struct devlink_health_reporter_ops *ops,
- u64 graceful_period, void *priv)
+ void *priv)
{
struct devlink_health_reporter *reporter;
struct devlink *devlink = port->devlink;
devl_lock(devlink);
- reporter = devl_port_health_reporter_create(port, ops,
- graceful_period, priv);
+ reporter = devl_port_health_reporter_create(port, ops, priv);
devl_unlock(devlink);
return reporter;
}
@@ -182,13 +184,12 @@ EXPORT_SYMBOL_GPL(devlink_port_health_reporter_create);
*
* @devlink: devlink instance which the health reports will relate
* @ops: devlink health reporter ops
- * @graceful_period: min time (in msec) between recovery attempts
* @priv: driver priv pointer
*/
struct devlink_health_reporter *
devl_health_reporter_create(struct devlink *devlink,
const struct devlink_health_reporter_ops *ops,
- u64 graceful_period, void *priv)
+ void *priv)
{
struct devlink_health_reporter *reporter;
@@ -197,8 +198,7 @@ devl_health_reporter_create(struct devlink *devlink,
if (devlink_health_reporter_find_by_name(devlink, ops->name))
return ERR_PTR(-EEXIST);
- reporter = __devlink_health_reporter_create(devlink, ops,
- graceful_period, priv);
+ reporter = __devlink_health_reporter_create(devlink, ops, priv);
if (IS_ERR(reporter))
return reporter;
@@ -210,13 +210,12 @@ EXPORT_SYMBOL_GPL(devl_health_reporter_create);
struct devlink_health_reporter *
devlink_health_reporter_create(struct devlink *devlink,
const struct devlink_health_reporter_ops *ops,
- u64 graceful_period, void *priv)
+ void *priv)
{
struct devlink_health_reporter *reporter;
devl_lock(devlink);
- reporter = devl_health_reporter_create(devlink, ops,
- graceful_period, priv);
+ reporter = devl_health_reporter_create(devlink, ops, priv);
devl_unlock(devlink);
return reporter;
}
@@ -298,6 +297,10 @@ devlink_nl_health_reporter_fill(struct sk_buff *msg,
reporter->graceful_period))
goto reporter_nest_cancel;
if (reporter->ops->recover &&
+ devlink_nl_put_u64(msg, DEVLINK_ATTR_HEALTH_REPORTER_BURST_PERIOD,
+ reporter->burst_period))
+ goto reporter_nest_cancel;
+ if (reporter->ops->recover &&
nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER,
reporter->auto_recover))
goto reporter_nest_cancel;
@@ -462,16 +465,33 @@ int devlink_nl_health_reporter_set_doit(struct sk_buff *skb,
if (!reporter->ops->recover &&
(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] ||
- info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]))
+ info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] ||
+ info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_BURST_PERIOD]))
return -EOPNOTSUPP;
if (!reporter->ops->dump &&
info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP])
return -EOPNOTSUPP;
- if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD])
+ if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD]) {
reporter->graceful_period =
nla_get_u64(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD]);
+ if (!reporter->graceful_period)
+ reporter->burst_period = 0;
+ }
+
+ if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_BURST_PERIOD]) {
+ u64 burst_period =
+ nla_get_u64(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_BURST_PERIOD]);
+
+ if (!reporter->graceful_period && burst_period) {
+ NL_SET_ERR_MSG_MOD(info->extack,
+ "Cannot set burst period without a grace period.");
+ return -EINVAL;
+ }
+
+ reporter->burst_period = burst_period;
+ }
if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER])
reporter->auto_recover =
@@ -514,11 +534,25 @@ static void devlink_recover_notify(struct devlink_health_reporter *reporter,
devlink_nl_notify_send_desc(devlink, msg, &desc);
}
+static bool
+devlink_health_reporter_in_burst(struct devlink_health_reporter *reporter)
+{
+ unsigned long burst_threshold = reporter->last_recovery_ts +
+ msecs_to_jiffies(reporter->burst_period);
+
+ return time_is_after_jiffies(burst_threshold);
+}
+
void
devlink_health_reporter_recovery_done(struct devlink_health_reporter *reporter)
{
reporter->recovery_count++;
- reporter->last_recovery_ts = jiffies;
+ if (!devlink_health_reporter_in_burst(reporter))
+ /* When burst period is set, last_recovery_ts marks the first
+ * recovery within the burst period, not necessarily the last
+ * one.
+ */
+ reporter->last_recovery_ts = jiffies;
}
EXPORT_SYMBOL_GPL(devlink_health_reporter_recovery_done);
@@ -592,12 +626,37 @@ dump_err:
return err;
}
+static bool
+devlink_health_recover_abort(struct devlink_health_reporter *reporter,
+ enum devlink_health_reporter_state prev_state)
+{
+ unsigned long recover_ts_threshold;
+
+ if (!reporter->auto_recover)
+ return false;
+
+ /* abort if the previous error wasn't recovered */
+ if (prev_state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY)
+ return true;
+
+ if (devlink_health_reporter_in_burst(reporter))
+ return false;
+
+ recover_ts_threshold = reporter->last_recovery_ts +
+ msecs_to_jiffies(reporter->burst_period) +
+ msecs_to_jiffies(reporter->graceful_period);
+ if (reporter->last_recovery_ts && reporter->recovery_count &&
+ time_is_after_jiffies(recover_ts_threshold))
+ return true;
+
+ return false;
+}
+
int devlink_health_report(struct devlink_health_reporter *reporter,
const char *msg, void *priv_ctx)
{
enum devlink_health_reporter_state prev_health_state;
struct devlink *devlink = reporter->devlink;
- unsigned long recover_ts_threshold;
int ret;
/* write a log message of the current error */
@@ -608,13 +667,7 @@ int devlink_health_report(struct devlink_health_reporter *reporter,
reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_ERROR;
devlink_recover_notify(reporter, DEVLINK_CMD_HEALTH_REPORTER_RECOVER);
- /* abort if the previous error wasn't recovered */
- recover_ts_threshold = reporter->last_recovery_ts +
- msecs_to_jiffies(reporter->graceful_period);
- if (reporter->auto_recover &&
- (prev_health_state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY ||
- (reporter->last_recovery_ts && reporter->recovery_count &&
- time_is_after_jiffies(recover_ts_threshold)))) {
+ if (devlink_health_recover_abort(reporter, prev_health_state)) {
trace_devlink_health_recover_aborted(devlink,
reporter->ops->name,
reporter->health_state,
diff --git a/net/devlink/netlink_gen.c b/net/devlink/netlink_gen.c
index d97c326a9045..9fd00977d59e 100644
--- a/net/devlink/netlink_gen.c
+++ b/net/devlink/netlink_gen.c
@@ -389,7 +389,7 @@ static const struct nla_policy devlink_health_reporter_get_dump_nl_policy[DEVLIN
};
/* DEVLINK_CMD_HEALTH_REPORTER_SET - do */
-static const struct nla_policy devlink_health_reporter_set_nl_policy[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP + 1] = {
+static const struct nla_policy devlink_health_reporter_set_nl_policy[DEVLINK_ATTR_HEALTH_REPORTER_BURST_PERIOD + 1] = {
[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
[DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, },
@@ -397,6 +397,7 @@ static const struct nla_policy devlink_health_reporter_set_nl_policy[DEVLINK_ATT
[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = { .type = NLA_U64, },
[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8, },
[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP] = { .type = NLA_U8, },
+ [DEVLINK_ATTR_HEALTH_REPORTER_BURST_PERIOD] = { .type = NLA_U64, },
};
/* DEVLINK_CMD_HEALTH_REPORTER_RECOVER - do */
@@ -1032,7 +1033,7 @@ const struct genl_split_ops devlink_nl_ops[74] = {
.doit = devlink_nl_health_reporter_set_doit,
.post_doit = devlink_nl_post_doit,
.policy = devlink_health_reporter_set_nl_policy,
- .maxattr = DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP,
+ .maxattr = DEVLINK_ATTR_HEALTH_REPORTER_BURST_PERIOD,
.flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
},
{
diff --git a/net/devlink/port.c b/net/devlink/port.c
index cb8d4df61619..93d8a25bb920 100644
--- a/net/devlink/port.c
+++ b/net/devlink/port.c
@@ -1333,8 +1333,8 @@ int devlink_port_netdevice_event(struct notifier_block *nb,
return NOTIFY_OK;
}
-static int __devlink_port_attrs_set(struct devlink_port *devlink_port,
- enum devlink_port_flavour flavour)
+static void __devlink_port_attrs_set(struct devlink_port *devlink_port,
+ enum devlink_port_flavour flavour)
{
struct devlink_port_attrs *attrs = &devlink_port->attrs;
@@ -1347,7 +1347,6 @@ static int __devlink_port_attrs_set(struct devlink_port *devlink_port,
} else {
devlink_port->switch_port = false;
}
- return 0;
}
/**
@@ -1357,17 +1356,13 @@ static int __devlink_port_attrs_set(struct devlink_port *devlink_port,
* @attrs: devlink port attrs
*/
void devlink_port_attrs_set(struct devlink_port *devlink_port,
- struct devlink_port_attrs *attrs)
+ const struct devlink_port_attrs *attrs)
{
- int ret;
-
ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
+ WARN_ON(attrs->splittable && attrs->split);
devlink_port->attrs = *attrs;
- ret = __devlink_port_attrs_set(devlink_port, attrs->flavour);
- if (ret)
- return;
- WARN_ON(attrs->splittable && attrs->split);
+ __devlink_port_attrs_set(devlink_port, attrs->flavour);
}
EXPORT_SYMBOL_GPL(devlink_port_attrs_set);
@@ -1383,14 +1378,10 @@ void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u32 contro
u16 pf, bool external)
{
struct devlink_port_attrs *attrs = &devlink_port->attrs;
- int ret;
ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
- ret = __devlink_port_attrs_set(devlink_port,
- DEVLINK_PORT_FLAVOUR_PCI_PF);
- if (ret)
- return;
+ __devlink_port_attrs_set(devlink_port, DEVLINK_PORT_FLAVOUR_PCI_PF);
attrs->pci_pf.controller = controller;
attrs->pci_pf.pf = pf;
attrs->pci_pf.external = external;
@@ -1411,14 +1402,10 @@ void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 contro
u16 pf, u16 vf, bool external)
{
struct devlink_port_attrs *attrs = &devlink_port->attrs;
- int ret;
ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
- ret = __devlink_port_attrs_set(devlink_port,
- DEVLINK_PORT_FLAVOUR_PCI_VF);
- if (ret)
- return;
+ __devlink_port_attrs_set(devlink_port, DEVLINK_PORT_FLAVOUR_PCI_VF);
attrs->pci_vf.controller = controller;
attrs->pci_vf.pf = pf;
attrs->pci_vf.vf = vf;
@@ -1439,14 +1426,10 @@ void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 contro
u16 pf, u32 sf, bool external)
{
struct devlink_port_attrs *attrs = &devlink_port->attrs;
- int ret;
ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
- ret = __devlink_port_attrs_set(devlink_port,
- DEVLINK_PORT_FLAVOUR_PCI_SF);
- if (ret)
- return;
+ __devlink_port_attrs_set(devlink_port, DEVLINK_PORT_FLAVOUR_PCI_SF);
attrs->pci_sf.controller = controller;
attrs->pci_sf.pf = pf;
attrs->pci_sf.sf = sf;
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index 43a7854e784e..0b2a4d0573b3 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -1014,6 +1014,28 @@ static bool flow_type_hashable(u32 flow_type)
return false;
}
+static bool flow_type_v6(u32 flow_type)
+{
+ switch (flow_type) {
+ case TCP_V6_FLOW:
+ case UDP_V6_FLOW:
+ case SCTP_V6_FLOW:
+ case AH_ESP_V6_FLOW:
+ case AH_V6_FLOW:
+ case ESP_V6_FLOW:
+ case IPV6_FLOW:
+ case GTPU_V6_FLOW:
+ case GTPC_V6_FLOW:
+ case GTPC_TEID_V6_FLOW:
+ case GTPU_EH_V6_FLOW:
+ case GTPU_UL_V6_FLOW:
+ case GTPU_DL_V6_FLOW:
+ return true;
+ }
+
+ return false;
+}
+
/* When adding a new type, update the assert and, if it's hashable, add it to
* the flow_type_hashable switch case.
*/
@@ -1077,6 +1099,9 @@ ethtool_set_rxfh_fields(struct net_device *dev, u32 cmd, void __user *useraddr)
if (rc)
return rc;
+ if (info.data & RXH_IP6_FL && !flow_type_v6(info.flow_type))
+ return -EINVAL;
+
if (info.flow_type & FLOW_RSS && info.rss_context &&
!ops->rxfh_per_ctx_fields)
return -EINVAL;
diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c
index 992e98abe9dd..202d95e8bf3e 100644
--- a/net/ethtool/rss.c
+++ b/net/ethtool/rss.c
@@ -536,35 +536,36 @@ void ethtool_rss_notify(struct net_device *dev, u32 type, u32 rss_context)
#define RFH_MASK (RXH_L2DA | RXH_VLAN | RXH_IP_SRC | RXH_IP_DST | \
RXH_L3_PROTO | RXH_L4_B_0_1 | RXH_L4_B_2_3 | \
RXH_GTP_TEID | RXH_DISCARD)
+#define RFH_MASKv6 (RFH_MASK | RXH_IP6_FL)
static const struct nla_policy ethnl_rss_flows_policy[] = {
[ETHTOOL_A_FLOW_ETHER] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
[ETHTOOL_A_FLOW_IP4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
- [ETHTOOL_A_FLOW_IP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
+ [ETHTOOL_A_FLOW_IP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6),
[ETHTOOL_A_FLOW_TCP4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
[ETHTOOL_A_FLOW_UDP4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
[ETHTOOL_A_FLOW_SCTP4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
[ETHTOOL_A_FLOW_AH_ESP4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
- [ETHTOOL_A_FLOW_TCP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
- [ETHTOOL_A_FLOW_UDP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
- [ETHTOOL_A_FLOW_SCTP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
- [ETHTOOL_A_FLOW_AH_ESP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
+ [ETHTOOL_A_FLOW_TCP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6),
+ [ETHTOOL_A_FLOW_UDP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6),
+ [ETHTOOL_A_FLOW_SCTP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6),
+ [ETHTOOL_A_FLOW_AH_ESP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6),
[ETHTOOL_A_FLOW_AH4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
[ETHTOOL_A_FLOW_ESP4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
- [ETHTOOL_A_FLOW_AH6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
- [ETHTOOL_A_FLOW_ESP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
+ [ETHTOOL_A_FLOW_AH6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6),
+ [ETHTOOL_A_FLOW_ESP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6),
[ETHTOOL_A_FLOW_GTPU4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
- [ETHTOOL_A_FLOW_GTPU6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
+ [ETHTOOL_A_FLOW_GTPU6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6),
[ETHTOOL_A_FLOW_GTPC4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
- [ETHTOOL_A_FLOW_GTPC6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
+ [ETHTOOL_A_FLOW_GTPC6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6),
[ETHTOOL_A_FLOW_GTPC_TEID4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
- [ETHTOOL_A_FLOW_GTPC_TEID6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
+ [ETHTOOL_A_FLOW_GTPC_TEID6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6),
[ETHTOOL_A_FLOW_GTPU_EH4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
- [ETHTOOL_A_FLOW_GTPU_EH6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
+ [ETHTOOL_A_FLOW_GTPU_EH6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6),
[ETHTOOL_A_FLOW_GTPU_UL4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
- [ETHTOOL_A_FLOW_GTPU_UL6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
+ [ETHTOOL_A_FLOW_GTPU_UL6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6),
[ETHTOOL_A_FLOW_GTPU_DL4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
- [ETHTOOL_A_FLOW_GTPU_DL6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
+ [ETHTOOL_A_FLOW_GTPU_DL6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6),
};
const struct nla_policy ethnl_rss_set_policy[ETHTOOL_A_RSS_FLOW_HASH + 1] = {
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 5cfc1c939673..833f2cf97178 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -170,7 +170,7 @@ struct neigh_table arp_tbl = {
[NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ,
[NEIGH_VAR_INTERVAL_PROBE_TIME_MS] = 5 * HZ,
[NEIGH_VAR_GC_STALETIME] = 60 * HZ,
- [NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_MAX,
+ [NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_DEFAULT,
[NEIGH_VAR_PROXY_QLEN] = 64,
[NEIGH_VAR_ANYCAST_DELAY] = 1 * HZ,
[NEIGH_VAR_PROXY_DELAY] = (8 * HZ) / 10,
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index f14a41ee4aa1..2c922afadb8f 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -132,8 +132,8 @@ static struct sock *esp_find_tcp_sk(struct xfrm_state *x)
dport = encap->encap_dport;
spin_unlock_bh(&x->lock);
- sk = inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, x->id.daddr.a4,
- dport, x->props.saddr.a4, sport, 0);
+ sk = inet_lookup_established(net, x->id.daddr.a4, dport,
+ x->props.saddr.a4, sport, 0);
if (!sk)
return ERR_PTR(-ENOENT);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 6e1b94796f67..1dab44e13d3b 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -32,6 +32,7 @@
#include <linux/list.h>
#include <linux/slab.h>
+#include <net/flow.h>
#include <net/inet_dscp.h>
#include <net/ip.h>
#include <net/protocol.h>
@@ -293,7 +294,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
.flowi4_iif = LOOPBACK_IFINDEX,
.flowi4_l3mdev = l3mdev_master_ifindex_rcu(dev),
.daddr = ip_hdr(skb)->saddr,
- .flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(ip_hdr(skb))),
+ .flowi4_dscp = ip4h_dscp(ip_hdr(skb)),
.flowi4_scope = scope,
.flowi4_mark = vmark ? skb->mark : 0,
};
@@ -358,7 +359,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX;
fl4.daddr = src;
fl4.saddr = dst;
- fl4.flowi4_tos = inet_dscp_to_dsfield(dscp);
+ fl4.flowi4_dscp = dscp;
fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
fl4.flowi4_tun_key.tun_id = 0;
fl4.flowi4_flags = 0;
@@ -1372,7 +1373,7 @@ static void nl_fib_lookup(struct net *net, struct fib_result_nl *frn)
struct flowi4 fl4 = {
.flowi4_mark = frn->fl_mark,
.daddr = frn->fl_addr,
- .flowi4_tos = frn->fl_tos & INET_DSCP_MASK,
+ .flowi4_dscp = inet_dsfield_to_dscp(frn->fl_tos),
.flowi4_scope = frn->fl_scope,
};
struct fib_table *tb;
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index fa58d6620ed6..51f0193092f0 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -23,6 +23,7 @@
#include <linux/list.h>
#include <linux/rcupdate.h>
#include <linux/export.h>
+#include <net/flow.h>
#include <net/inet_dscp.h>
#include <net/ip.h>
#include <net/route.h>
@@ -193,8 +194,7 @@ INDIRECT_CALLABLE_SCOPE int fib4_rule_match(struct fib_rule *rule,
* to mask the upper three DSCP bits prior to matching to maintain
* legacy behavior.
*/
- if (r->dscp_full &&
- (r->dscp ^ inet_dsfield_to_dscp(fl4->flowi4_tos)) & r->dscp_mask)
+ if (r->dscp_full && (r->dscp ^ fl4->flowi4_dscp) & r->dscp_mask)
return 0;
else if (!r->dscp_full && r->dscp &&
!fib_dscp_masked_match(r->dscp, fl4))
diff --git a/net/ipv4/fou_nl.c b/net/ipv4/fou_nl.c
index 3d9614609b2d..506260b4a4dc 100644
--- a/net/ipv4/fou_nl.c
+++ b/net/ipv4/fou_nl.c
@@ -18,9 +18,9 @@ const struct nla_policy fou_nl_policy[FOU_ATTR_IFINDEX + 1] = {
[FOU_ATTR_TYPE] = { .type = NLA_U8, },
[FOU_ATTR_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG, },
[FOU_ATTR_LOCAL_V4] = { .type = NLA_U32, },
- [FOU_ATTR_LOCAL_V6] = { .len = 16, },
+ [FOU_ATTR_LOCAL_V6] = NLA_POLICY_EXACT_LEN(16),
[FOU_ATTR_PEER_V4] = { .type = NLA_U32, },
- [FOU_ATTR_PEER_V6] = { .len = 16, },
+ [FOU_ATTR_PEER_V6] = NLA_POLICY_EXACT_LEN(16),
[FOU_ATTR_PEER_PORT] = { .type = NLA_BE16, },
[FOU_ATTR_IFINDEX] = { .type = NLA_S32, },
};
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index c48c572f024d..863bf5023f2a 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -72,6 +72,7 @@
#include <linux/string.h>
#include <linux/netfilter_ipv4.h>
#include <linux/slab.h>
+#include <net/flow.h>
#include <net/snmp.h>
#include <net/ip.h>
#include <net/route.h>
@@ -318,17 +319,17 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
return true;
/* No rate limit on loopback */
- dev = dst_dev(dst);
+ rcu_read_lock();
+ dev = dst_dev_rcu(dst);
if (dev && (dev->flags & IFF_LOOPBACK))
goto out;
- rcu_read_lock();
peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr,
l3mdev_master_ifindex_rcu(dev));
rc = inet_peer_xrlim_allow(peer,
READ_ONCE(net->ipv4.sysctl_icmp_ratelimit));
- rcu_read_unlock();
out:
+ rcu_read_unlock();
if (!rc)
__ICMP_INC_STATS(net, ICMP_MIB_RATELIMITHOST);
else
@@ -444,7 +445,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
fl4.saddr = saddr;
fl4.flowi4_mark = mark;
fl4.flowi4_uid = sock_net_uid(net, NULL);
- fl4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(ip_hdr(skb)));
+ fl4.flowi4_dscp = ip4h_dscp(ip_hdr(skb));
fl4.flowi4_proto = IPPROTO_ICMP;
fl4.flowi4_oif = l3mdev_master_ifindex(skb->dev);
security_skb_classify_flow(skb, flowi4_to_flowi_common(&fl4));
@@ -495,7 +496,7 @@ static struct rtable *icmp_route_lookup(struct net *net, struct flowi4 *fl4,
fl4->saddr = saddr;
fl4->flowi4_mark = mark;
fl4->flowi4_uid = sock_net_uid(net, NULL);
- fl4->flowi4_tos = inet_dscp_to_dsfield(dscp);
+ fl4->flowi4_dscp = dscp;
fl4->flowi4_proto = IPPROTO_ICMP;
fl4->fl4_icmp_type = type;
fl4->fl4_icmp_code = code;
@@ -544,14 +545,15 @@ static struct rtable *icmp_route_lookup(struct net *net, struct flowi4 *fl4,
goto relookup_failed;
}
/* Ugh! */
- orefdst = skb_in->_skb_refdst; /* save old refdst */
- skb_dst_set(skb_in, NULL);
+ orefdst = skb_dstref_steal(skb_in);
err = ip_route_input(skb_in, fl4_dec.daddr, fl4_dec.saddr,
dscp, rt2->dst.dev) ? -EINVAL : 0;
dst_release(&rt2->dst);
rt2 = skb_rtable(skb_in);
- skb_in->_skb_refdst = orefdst; /* restore old refdst */
+ /* steal dst entry from skb_in, don't drop refcnt */
+ skb_dstref_steal(skb_in);
+ skb_dstref_restore(skb_in, orefdst);
}
if (err)
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 1e2df51427fe..142ff8d86fc2 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -706,9 +706,9 @@ struct sock *inet_csk_accept(struct sock *sk, struct proto_accept_arg *arg)
spin_unlock_bh(&queue->fastopenq.lock);
}
-out:
release_sock(sk);
- if (newsk && mem_cgroup_sockets_enabled) {
+
+ if (mem_cgroup_sockets_enabled) {
gfp_t gfp = GFP_KERNEL | __GFP_NOFAIL;
int amt = 0;
@@ -718,7 +718,7 @@ out:
lock_sock(newsk);
mem_cgroup_sk_alloc(newsk);
- if (newsk->sk_memcg) {
+ if (mem_cgroup_from_sk(newsk)) {
/* The socket has not been accepted yet, no need
* to look at newsk->sk_wmem_queued.
*/
@@ -727,23 +727,22 @@ out:
}
if (amt)
- mem_cgroup_charge_skmem(newsk->sk_memcg, amt, gfp);
+ mem_cgroup_sk_charge(newsk, amt, gfp);
kmem_cache_charge(newsk, gfp);
release_sock(newsk);
}
+
if (req)
reqsk_put(req);
- if (newsk)
- inet_init_csk_locks(newsk);
-
+ inet_init_csk_locks(newsk);
return newsk;
+
out_err:
- newsk = NULL;
- req = NULL;
+ release_sock(sk);
arg->err = error;
- goto out;
+ return NULL;
}
EXPORT_SYMBOL(inet_csk_accept);
@@ -1297,12 +1296,19 @@ void inet_csk_destroy_sock(struct sock *sk)
xfrm_sk_free_policy(sk);
- this_cpu_dec(*sk->sk_prot->orphan_count);
+ tcp_orphan_count_dec();
sock_put(sk);
}
EXPORT_SYMBOL(inet_csk_destroy_sock);
+void inet_csk_prepare_for_destroy_sock(struct sock *sk)
+{
+ /* The below has to be done to allow calling inet_csk_destroy_sock */
+ sock_set_flag(sk, SOCK_DEAD);
+ tcp_orphan_count_inc();
+}
+
/* This function allows to force a closure of a socket after the call to
* tcp_create_openreq_child().
*/
@@ -1370,7 +1376,7 @@ static void inet_child_forget(struct sock *sk, struct request_sock *req,
sock_orphan(child);
- this_cpu_inc(*sk->sk_prot->orphan_count);
+ tcp_orphan_count_inc();
if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) {
BUG_ON(rcu_access_pointer(tcp_sk(child)->fastopen_rsk) != req);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 2fa53b16fe77..f0b6c5a411a2 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -20,9 +20,6 @@
#include <net/ipv6.h>
#include <net/inet_common.h>
#include <net/inet_connection_sock.h>
-#include <net/inet_hashtables.h>
-#include <net/inet_timewait_sock.h>
-#include <net/inet6_hashtables.h>
#include <net/bpf_sk_storage.h>
#include <net/netlink.h>
@@ -74,54 +71,29 @@ static void inet_diag_unlock_handler(const struct inet_diag_handler *handler)
void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk)
{
- r->idiag_family = sk->sk_family;
+ r->idiag_family = READ_ONCE(sk->sk_family);
- r->id.idiag_sport = htons(sk->sk_num);
- r->id.idiag_dport = sk->sk_dport;
- r->id.idiag_if = sk->sk_bound_dev_if;
+ r->id.idiag_sport = htons(READ_ONCE(sk->sk_num));
+ r->id.idiag_dport = READ_ONCE(sk->sk_dport);
+ r->id.idiag_if = READ_ONCE(sk->sk_bound_dev_if);
sock_diag_save_cookie(sk, r->id.idiag_cookie);
#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == AF_INET6) {
- *(struct in6_addr *)r->id.idiag_src = sk->sk_v6_rcv_saddr;
- *(struct in6_addr *)r->id.idiag_dst = sk->sk_v6_daddr;
+ if (r->idiag_family == AF_INET6) {
+ data_race(*(struct in6_addr *)r->id.idiag_src = sk->sk_v6_rcv_saddr);
+ data_race(*(struct in6_addr *)r->id.idiag_dst = sk->sk_v6_daddr);
} else
#endif
{
memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src));
memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst));
- r->id.idiag_src[0] = sk->sk_rcv_saddr;
- r->id.idiag_dst[0] = sk->sk_daddr;
+ r->id.idiag_src[0] = READ_ONCE(sk->sk_rcv_saddr);
+ r->id.idiag_dst[0] = READ_ONCE(sk->sk_daddr);
}
}
EXPORT_SYMBOL_GPL(inet_diag_msg_common_fill);
-static size_t inet_sk_attr_size(struct sock *sk,
- const struct inet_diag_req_v2 *req,
- bool net_admin)
-{
- const struct inet_diag_handler *handler;
- size_t aux = 0;
-
- rcu_read_lock();
- handler = rcu_dereference(inet_diag_table[req->sdiag_protocol]);
- DEBUG_NET_WARN_ON_ONCE(!handler);
- if (handler && handler->idiag_get_aux_size)
- aux = handler->idiag_get_aux_size(sk, net_admin);
- rcu_read_unlock();
-
- return nla_total_size(sizeof(struct tcp_info))
- + nla_total_size(sizeof(struct inet_diag_msg))
- + inet_diag_msg_attrs_size()
- + nla_total_size(sizeof(struct inet_diag_meminfo))
- + nla_total_size(SK_MEMINFO_VARS * sizeof(u32))
- + nla_total_size(TCP_CA_NAME_MAX)
- + nla_total_size(sizeof(struct tcpvegas_info))
- + aux
- + 64;
-}
-
int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
struct inet_diag_msg *r, int ext,
struct user_namespace *user_ns,
@@ -313,17 +285,17 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
icsk_pending == ICSK_TIME_REO_TIMEOUT ||
icsk_pending == ICSK_TIME_LOSS_PROBE) {
r->idiag_timer = 1;
- r->idiag_retrans = icsk->icsk_retransmits;
+ r->idiag_retrans = READ_ONCE(icsk->icsk_retransmits);
r->idiag_expires =
jiffies_delta_to_msecs(icsk_timeout(icsk) - jiffies);
} else if (icsk_pending == ICSK_TIME_PROBE0) {
r->idiag_timer = 4;
- r->idiag_retrans = icsk->icsk_probes_out;
+ r->idiag_retrans = READ_ONCE(icsk->icsk_probes_out);
r->idiag_expires =
jiffies_delta_to_msecs(icsk_timeout(icsk) - jiffies);
} else if (timer_pending(&sk->sk_timer)) {
r->idiag_timer = 2;
- r->idiag_retrans = icsk->icsk_probes_out;
+ r->idiag_retrans = READ_ONCE(icsk->icsk_probes_out);
r->idiag_expires =
jiffies_delta_to_msecs(sk->sk_timer.expires - jiffies);
}
@@ -422,183 +394,6 @@ errout:
}
EXPORT_SYMBOL_GPL(inet_sk_diag_fill);
-static int inet_twsk_diag_fill(struct sock *sk,
- struct sk_buff *skb,
- struct netlink_callback *cb,
- u16 nlmsg_flags, bool net_admin)
-{
- struct inet_timewait_sock *tw = inet_twsk(sk);
- struct inet_diag_msg *r;
- struct nlmsghdr *nlh;
- long tmo;
-
- nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, cb->nlh->nlmsg_type,
- sizeof(*r), nlmsg_flags);
- if (!nlh)
- return -EMSGSIZE;
-
- r = nlmsg_data(nlh);
- BUG_ON(tw->tw_state != TCP_TIME_WAIT);
-
- inet_diag_msg_common_fill(r, sk);
- r->idiag_retrans = 0;
-
- r->idiag_state = READ_ONCE(tw->tw_substate);
- r->idiag_timer = 3;
- tmo = tw->tw_timer.expires - jiffies;
- r->idiag_expires = jiffies_delta_to_msecs(tmo);
- r->idiag_rqueue = 0;
- r->idiag_wqueue = 0;
- r->idiag_uid = 0;
- r->idiag_inode = 0;
-
- if (net_admin && nla_put_u32(skb, INET_DIAG_MARK,
- tw->tw_mark)) {
- nlmsg_cancel(skb, nlh);
- return -EMSGSIZE;
- }
-
- nlmsg_end(skb, nlh);
- return 0;
-}
-
-static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb,
- struct netlink_callback *cb,
- u16 nlmsg_flags, bool net_admin)
-{
- struct request_sock *reqsk = inet_reqsk(sk);
- struct inet_diag_msg *r;
- struct nlmsghdr *nlh;
- long tmo;
-
- nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
- cb->nlh->nlmsg_type, sizeof(*r), nlmsg_flags);
- if (!nlh)
- return -EMSGSIZE;
-
- r = nlmsg_data(nlh);
- inet_diag_msg_common_fill(r, sk);
- r->idiag_state = TCP_SYN_RECV;
- r->idiag_timer = 1;
- r->idiag_retrans = reqsk->num_retrans;
-
- BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) !=
- offsetof(struct sock, sk_cookie));
-
- tmo = inet_reqsk(sk)->rsk_timer.expires - jiffies;
- r->idiag_expires = jiffies_delta_to_msecs(tmo);
- r->idiag_rqueue = 0;
- r->idiag_wqueue = 0;
- r->idiag_uid = 0;
- r->idiag_inode = 0;
-
- if (net_admin && nla_put_u32(skb, INET_DIAG_MARK,
- inet_rsk(reqsk)->ir_mark)) {
- nlmsg_cancel(skb, nlh);
- return -EMSGSIZE;
- }
-
- nlmsg_end(skb, nlh);
- return 0;
-}
-
-static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
- struct netlink_callback *cb,
- const struct inet_diag_req_v2 *r,
- u16 nlmsg_flags, bool net_admin)
-{
- if (sk->sk_state == TCP_TIME_WAIT)
- return inet_twsk_diag_fill(sk, skb, cb, nlmsg_flags, net_admin);
-
- if (sk->sk_state == TCP_NEW_SYN_RECV)
- return inet_req_diag_fill(sk, skb, cb, nlmsg_flags, net_admin);
-
- return inet_sk_diag_fill(sk, inet_csk(sk), skb, cb, r, nlmsg_flags,
- net_admin);
-}
-
-struct sock *inet_diag_find_one_icsk(struct net *net,
- struct inet_hashinfo *hashinfo,
- const struct inet_diag_req_v2 *req)
-{
- struct sock *sk;
-
- rcu_read_lock();
- if (req->sdiag_family == AF_INET)
- sk = inet_lookup(net, hashinfo, NULL, 0, req->id.idiag_dst[0],
- req->id.idiag_dport, req->id.idiag_src[0],
- req->id.idiag_sport, req->id.idiag_if);
-#if IS_ENABLED(CONFIG_IPV6)
- else if (req->sdiag_family == AF_INET6) {
- if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) &&
- ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src))
- sk = inet_lookup(net, hashinfo, NULL, 0, req->id.idiag_dst[3],
- req->id.idiag_dport, req->id.idiag_src[3],
- req->id.idiag_sport, req->id.idiag_if);
- else
- sk = inet6_lookup(net, hashinfo, NULL, 0,
- (struct in6_addr *)req->id.idiag_dst,
- req->id.idiag_dport,
- (struct in6_addr *)req->id.idiag_src,
- req->id.idiag_sport,
- req->id.idiag_if);
- }
-#endif
- else {
- rcu_read_unlock();
- return ERR_PTR(-EINVAL);
- }
- rcu_read_unlock();
- if (!sk)
- return ERR_PTR(-ENOENT);
-
- if (sock_diag_check_cookie(sk, req->id.idiag_cookie)) {
- sock_gen_put(sk);
- return ERR_PTR(-ENOENT);
- }
-
- return sk;
-}
-EXPORT_SYMBOL_GPL(inet_diag_find_one_icsk);
-
-int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
- struct netlink_callback *cb,
- const struct inet_diag_req_v2 *req)
-{
- struct sk_buff *in_skb = cb->skb;
- bool net_admin = netlink_net_capable(in_skb, CAP_NET_ADMIN);
- struct net *net = sock_net(in_skb->sk);
- struct sk_buff *rep;
- struct sock *sk;
- int err;
-
- sk = inet_diag_find_one_icsk(net, hashinfo, req);
- if (IS_ERR(sk))
- return PTR_ERR(sk);
-
- rep = nlmsg_new(inet_sk_attr_size(sk, req, net_admin), GFP_KERNEL);
- if (!rep) {
- err = -ENOMEM;
- goto out;
- }
-
- err = sk_diag_fill(sk, rep, cb, req, 0, net_admin);
- if (err < 0) {
- WARN_ON(err == -EMSGSIZE);
- nlmsg_free(rep);
- goto out;
- }
- err = nlmsg_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid);
-
-out:
- if (sk)
- sock_gen_put(sk);
-
- return err;
-}
-EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk);
-
static int inet_diag_cmd_exact(int cmd, struct sk_buff *in_skb,
const struct nlmsghdr *nlh,
int hdrlen,
@@ -785,7 +580,7 @@ static void entry_fill_addrs(struct inet_diag_entry *entry,
const struct sock *sk)
{
#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == AF_INET6) {
+ if (entry->family == AF_INET6) {
entry->saddr = sk->sk_v6_rcv_saddr.s6_addr32;
entry->daddr = sk->sk_v6_daddr.s6_addr32;
} else
@@ -796,31 +591,36 @@ static void entry_fill_addrs(struct inet_diag_entry *entry,
}
}
-int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk)
+int inet_diag_bc_sk(const struct inet_diag_dump_data *cb_data, struct sock *sk)
{
- struct inet_sock *inet = inet_sk(sk);
+ const struct nlattr *bc = cb_data->inet_diag_nla_bc;
+ const struct inet_sock *inet = inet_sk(sk);
struct inet_diag_entry entry;
if (!bc)
return 1;
- entry.family = sk->sk_family;
+ entry.family = READ_ONCE(sk->sk_family);
entry_fill_addrs(&entry, sk);
- entry.sport = inet->inet_num;
- entry.dport = ntohs(inet->inet_dport);
- entry.ifindex = sk->sk_bound_dev_if;
- entry.userlocks = sk_fullsock(sk) ? sk->sk_userlocks : 0;
- if (sk_fullsock(sk))
- entry.mark = READ_ONCE(sk->sk_mark);
- else if (sk->sk_state == TCP_NEW_SYN_RECV)
- entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark;
- else if (sk->sk_state == TCP_TIME_WAIT)
- entry.mark = inet_twsk(sk)->tw_mark;
- else
- entry.mark = 0;
+ entry.sport = READ_ONCE(inet->inet_num);
+ entry.dport = ntohs(READ_ONCE(inet->inet_dport));
+ entry.ifindex = READ_ONCE(sk->sk_bound_dev_if);
+ if (cb_data->userlocks_needed)
+ entry.userlocks = sk_fullsock(sk) ? READ_ONCE(sk->sk_userlocks) : 0;
+ if (cb_data->mark_needed) {
+ if (sk_fullsock(sk))
+ entry.mark = READ_ONCE(sk->sk_mark);
+ else if (sk->sk_state == TCP_NEW_SYN_RECV)
+ entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark;
+ else if (sk->sk_state == TCP_TIME_WAIT)
+ entry.mark = inet_twsk(sk)->tw_mark;
+ else
+ entry.mark = 0;
+ }
#ifdef CONFIG_SOCK_CGROUP_DATA
- entry.cgroup_id = sk_fullsock(sk) ?
- cgroup_id(sock_cgroup_ptr(&sk->sk_cgrp_data)) : 0;
+ if (cb_data->cgroup_needed)
+ entry.cgroup_id = sk_fullsock(sk) ?
+ cgroup_id(sock_cgroup_ptr(&sk->sk_cgrp_data)) : 0;
#endif
return inet_diag_bc_run(bc, &entry);
@@ -920,16 +720,21 @@ static bool valid_cgroupcond(const struct inet_diag_bc_op *op, int len,
}
#endif
-static int inet_diag_bc_audit(const struct nlattr *attr,
+static int inet_diag_bc_audit(struct inet_diag_dump_data *cb_data,
const struct sk_buff *skb)
{
- bool net_admin = netlink_net_capable(skb, CAP_NET_ADMIN);
+ const struct nlattr *attr = cb_data->inet_diag_nla_bc;
const void *bytecode, *bc;
int bytecode_len, len;
+ bool net_admin;
+
+ if (!attr)
+ return 0;
- if (!attr || nla_len(attr) < sizeof(struct inet_diag_bc_op))
+ if (nla_len(attr) < sizeof(struct inet_diag_bc_op))
return -EINVAL;
+ net_admin = netlink_net_capable(skb, CAP_NET_ADMIN);
bytecode = bc = nla_data(attr);
len = bytecode_len = nla_len(attr);
@@ -961,14 +766,18 @@ static int inet_diag_bc_audit(const struct nlattr *attr,
return -EPERM;
if (!valid_markcond(bc, len, &min_len))
return -EINVAL;
+ cb_data->mark_needed = true;
break;
#ifdef CONFIG_SOCK_CGROUP_DATA
case INET_DIAG_BC_CGROUP_COND:
if (!valid_cgroupcond(bc, len, &min_len))
return -EINVAL;
+ cb_data->cgroup_needed = true;
break;
#endif
case INET_DIAG_BC_AUTO:
+ cb_data->userlocks_needed = true;
+ fallthrough;
case INET_DIAG_BC_JMP:
case INET_DIAG_BC_NOP:
break;
@@ -992,280 +801,6 @@ static int inet_diag_bc_audit(const struct nlattr *attr,
return len == 0 ? 0 : -EINVAL;
}
-static void twsk_build_assert(void)
-{
- BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_family) !=
- offsetof(struct sock, sk_family));
-
- BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_num) !=
- offsetof(struct inet_sock, inet_num));
-
- BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_dport) !=
- offsetof(struct inet_sock, inet_dport));
-
- BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_rcv_saddr) !=
- offsetof(struct inet_sock, inet_rcv_saddr));
-
- BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_daddr) !=
- offsetof(struct inet_sock, inet_daddr));
-
-#if IS_ENABLED(CONFIG_IPV6)
- BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_v6_rcv_saddr) !=
- offsetof(struct sock, sk_v6_rcv_saddr));
-
- BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_v6_daddr) !=
- offsetof(struct sock, sk_v6_daddr));
-#endif
-}
-
-void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
- struct netlink_callback *cb,
- const struct inet_diag_req_v2 *r)
-{
- bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
- struct inet_diag_dump_data *cb_data = cb->data;
- struct net *net = sock_net(skb->sk);
- u32 idiag_states = r->idiag_states;
- int i, num, s_i, s_num;
- struct nlattr *bc;
- struct sock *sk;
-
- bc = cb_data->inet_diag_nla_bc;
- if (idiag_states & TCPF_SYN_RECV)
- idiag_states |= TCPF_NEW_SYN_RECV;
- s_i = cb->args[1];
- s_num = num = cb->args[2];
-
- if (cb->args[0] == 0) {
- if (!(idiag_states & TCPF_LISTEN) || r->id.idiag_dport)
- goto skip_listen_ht;
-
- for (i = s_i; i <= hashinfo->lhash2_mask; i++) {
- struct inet_listen_hashbucket *ilb;
- struct hlist_nulls_node *node;
-
- num = 0;
- ilb = &hashinfo->lhash2[i];
-
- if (hlist_nulls_empty(&ilb->nulls_head)) {
- s_num = 0;
- continue;
- }
- spin_lock(&ilb->lock);
- sk_nulls_for_each(sk, node, &ilb->nulls_head) {
- struct inet_sock *inet = inet_sk(sk);
-
- if (!net_eq(sock_net(sk), net))
- continue;
-
- if (num < s_num) {
- num++;
- continue;
- }
-
- if (r->sdiag_family != AF_UNSPEC &&
- sk->sk_family != r->sdiag_family)
- goto next_listen;
-
- if (r->id.idiag_sport != inet->inet_sport &&
- r->id.idiag_sport)
- goto next_listen;
-
- if (!inet_diag_bc_sk(bc, sk))
- goto next_listen;
-
- if (inet_sk_diag_fill(sk, inet_csk(sk), skb,
- cb, r, NLM_F_MULTI,
- net_admin) < 0) {
- spin_unlock(&ilb->lock);
- goto done;
- }
-
-next_listen:
- ++num;
- }
- spin_unlock(&ilb->lock);
-
- s_num = 0;
- }
-skip_listen_ht:
- cb->args[0] = 1;
- s_i = num = s_num = 0;
- }
-
-/* Process a maximum of SKARR_SZ sockets at a time when walking hash buckets
- * with bh disabled.
- */
-#define SKARR_SZ 16
-
- /* Dump bound but inactive (not listening, connecting, etc.) sockets */
- if (cb->args[0] == 1) {
- if (!(idiag_states & TCPF_BOUND_INACTIVE))
- goto skip_bind_ht;
-
- for (i = s_i; i < hashinfo->bhash_size; i++) {
- struct inet_bind_hashbucket *ibb;
- struct inet_bind2_bucket *tb2;
- struct sock *sk_arr[SKARR_SZ];
- int num_arr[SKARR_SZ];
- int idx, accum, res;
-
-resume_bind_walk:
- num = 0;
- accum = 0;
- ibb = &hashinfo->bhash2[i];
-
- if (hlist_empty(&ibb->chain)) {
- s_num = 0;
- continue;
- }
- spin_lock_bh(&ibb->lock);
- inet_bind_bucket_for_each(tb2, &ibb->chain) {
- if (!net_eq(ib2_net(tb2), net))
- continue;
-
- sk_for_each_bound(sk, &tb2->owners) {
- struct inet_sock *inet = inet_sk(sk);
-
- if (num < s_num)
- goto next_bind;
-
- if (sk->sk_state != TCP_CLOSE ||
- !inet->inet_num)
- goto next_bind;
-
- if (r->sdiag_family != AF_UNSPEC &&
- r->sdiag_family != sk->sk_family)
- goto next_bind;
-
- if (!inet_diag_bc_sk(bc, sk))
- goto next_bind;
-
- sock_hold(sk);
- num_arr[accum] = num;
- sk_arr[accum] = sk;
- if (++accum == SKARR_SZ)
- goto pause_bind_walk;
-next_bind:
- num++;
- }
- }
-pause_bind_walk:
- spin_unlock_bh(&ibb->lock);
-
- res = 0;
- for (idx = 0; idx < accum; idx++) {
- if (res >= 0) {
- res = inet_sk_diag_fill(sk_arr[idx],
- NULL, skb, cb,
- r, NLM_F_MULTI,
- net_admin);
- if (res < 0)
- num = num_arr[idx];
- }
- sock_put(sk_arr[idx]);
- }
- if (res < 0)
- goto done;
-
- cond_resched();
-
- if (accum == SKARR_SZ) {
- s_num = num + 1;
- goto resume_bind_walk;
- }
-
- s_num = 0;
- }
-skip_bind_ht:
- cb->args[0] = 2;
- s_i = num = s_num = 0;
- }
-
- if (!(idiag_states & ~TCPF_LISTEN))
- goto out;
-
- for (i = s_i; i <= hashinfo->ehash_mask; i++) {
- struct inet_ehash_bucket *head = &hashinfo->ehash[i];
- spinlock_t *lock = inet_ehash_lockp(hashinfo, i);
- struct hlist_nulls_node *node;
- struct sock *sk_arr[SKARR_SZ];
- int num_arr[SKARR_SZ];
- int idx, accum, res;
-
- if (hlist_nulls_empty(&head->chain))
- continue;
-
- if (i > s_i)
- s_num = 0;
-
-next_chunk:
- num = 0;
- accum = 0;
- spin_lock_bh(lock);
- sk_nulls_for_each(sk, node, &head->chain) {
- int state;
-
- if (!net_eq(sock_net(sk), net))
- continue;
- if (num < s_num)
- goto next_normal;
- state = (sk->sk_state == TCP_TIME_WAIT) ?
- READ_ONCE(inet_twsk(sk)->tw_substate) : sk->sk_state;
- if (!(idiag_states & (1 << state)))
- goto next_normal;
- if (r->sdiag_family != AF_UNSPEC &&
- sk->sk_family != r->sdiag_family)
- goto next_normal;
- if (r->id.idiag_sport != htons(sk->sk_num) &&
- r->id.idiag_sport)
- goto next_normal;
- if (r->id.idiag_dport != sk->sk_dport &&
- r->id.idiag_dport)
- goto next_normal;
- twsk_build_assert();
-
- if (!inet_diag_bc_sk(bc, sk))
- goto next_normal;
-
- if (!refcount_inc_not_zero(&sk->sk_refcnt))
- goto next_normal;
-
- num_arr[accum] = num;
- sk_arr[accum] = sk;
- if (++accum == SKARR_SZ)
- break;
-next_normal:
- ++num;
- }
- spin_unlock_bh(lock);
- res = 0;
- for (idx = 0; idx < accum; idx++) {
- if (res >= 0) {
- res = sk_diag_fill(sk_arr[idx], skb, cb, r,
- NLM_F_MULTI, net_admin);
- if (res < 0)
- num = num_arr[idx];
- }
- sock_gen_put(sk_arr[idx]);
- }
- if (res < 0)
- break;
- cond_resched();
- if (accum == SKARR_SZ) {
- s_num = num + 1;
- goto next_chunk;
- }
- }
-
-done:
- cb->args[1] = i;
- cb->args[2] = num;
-out:
- ;
-}
-EXPORT_SYMBOL_GPL(inet_diag_dump_icsk);
-
static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
const struct inet_diag_req_v2 *r)
{
@@ -1319,13 +854,10 @@ static int __inet_diag_dump_start(struct netlink_callback *cb, int hdrlen)
kfree(cb_data);
return err;
}
- nla = cb_data->inet_diag_nla_bc;
- if (nla) {
- err = inet_diag_bc_audit(nla, skb);
- if (err) {
- kfree(cb_data);
- return err;
- }
+ err = inet_diag_bc_audit(cb_data, skb);
+ if (err) {
+ kfree(cb_data);
+ return err;
}
nla = cb_data->inet_diag_nla_bpf_stgs;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index ceeeec9b7290..ef4ccfd46ff6 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -425,19 +425,18 @@ struct sock *inet_lookup_run_sk_lookup(const struct net *net,
}
struct sock *__inet_lookup_listener(const struct net *net,
- struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
const __be32 saddr, __be16 sport,
const __be32 daddr, const unsigned short hnum,
const int dif, const int sdif)
{
struct inet_listen_hashbucket *ilb2;
+ struct inet_hashinfo *hashinfo;
struct sock *result = NULL;
unsigned int hash2;
/* Lookup redirect from BPF */
- if (static_branch_unlikely(&bpf_sk_lookup_enabled) &&
- hashinfo == net->ipv4.tcp_death_row.hashinfo) {
+ if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
result = inet_lookup_run_sk_lookup(net, IPPROTO_TCP, skb, doff,
saddr, sport, daddr, hnum, dif,
inet_ehashfn);
@@ -445,6 +444,7 @@ struct sock *__inet_lookup_listener(const struct net *net,
goto done;
}
+ hashinfo = net->ipv4.tcp_death_row.hashinfo;
hash2 = ipv4_portaddr_hash(net, daddr, hnum);
ilb2 = inet_lhash2_bucket(hashinfo, hash2);
@@ -490,21 +490,22 @@ void sock_edemux(struct sk_buff *skb)
EXPORT_SYMBOL(sock_edemux);
struct sock *__inet_lookup_established(const struct net *net,
- struct inet_hashinfo *hashinfo,
- const __be32 saddr, const __be16 sport,
- const __be32 daddr, const u16 hnum,
- const int dif, const int sdif)
+ const __be32 saddr, const __be16 sport,
+ const __be32 daddr, const u16 hnum,
+ const int dif, const int sdif)
{
- INET_ADDR_COOKIE(acookie, saddr, daddr);
const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
- struct sock *sk;
+ INET_ADDR_COOKIE(acookie, saddr, daddr);
const struct hlist_nulls_node *node;
- /* Optimize here for direct hit, only listening connections can
- * have wildcards anyways.
- */
- unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport);
- unsigned int slot = hash & hashinfo->ehash_mask;
- struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
+ struct inet_ehash_bucket *head;
+ struct inet_hashinfo *hashinfo;
+ unsigned int hash, slot;
+ struct sock *sk;
+
+ hashinfo = net->ipv4.tcp_death_row.hashinfo;
+ hash = inet_ehashfn(net, daddr, hnum, saddr, sport);
+ slot = hash & hashinfo->ehash_mask;
+ head = &hashinfo->ehash[slot];
begin:
sk_nulls_for_each_rcu(sk, node, &head->chain) {
@@ -579,8 +580,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
if (likely(inet_match(net, sk2, acookie, ports, dif, sdif))) {
if (sk2->sk_state == TCP_TIME_WAIT) {
tw = inet_twsk(sk2);
- if (sk->sk_protocol == IPPROTO_TCP &&
- tcp_twsk_unique(sk, sk2, twp))
+ if (tcp_twsk_unique(sk, sk2, twp))
break;
}
goto not_unique;
@@ -707,7 +707,7 @@ bool inet_ehash_nolisten(struct sock *sk, struct sock *osk, bool *found_dup_sk)
if (ok) {
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
} else {
- this_cpu_inc(*sk->sk_prot->orphan_count);
+ tcp_orphan_count_inc();
inet_sk_set_state(sk, TCP_CLOSE);
sock_set_flag(sk, SOCK_DEAD);
inet_csk_destroy_sock(sk);
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 875ff923a8ed..5b5426b8ee92 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -15,7 +15,7 @@
#include <net/inet_hashtables.h>
#include <net/inet_timewait_sock.h>
#include <net/ip.h>
-
+#include <net/tcp.h>
/**
* inet_twsk_bind_unhash - unhash a timewait socket from bind hash
@@ -74,7 +74,8 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw)
void inet_twsk_free(struct inet_timewait_sock *tw)
{
struct module *owner = tw->tw_prot->owner;
- twsk_destructor((struct sock *)tw);
+
+ tcp_twsk_destructor((struct sock *)tw);
kmem_cache_free(tw->tw_prot->twsk_prot->twsk_slab, tw);
module_put(owner);
}
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index b2584cce90ae..f7012479713b 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -476,14 +476,16 @@ out_fail:
/* Process an incoming IP datagram fragment. */
int ip_defrag(struct net *net, struct sk_buff *skb, u32 user)
{
- struct net_device *dev = skb->dev ? : skb_dst_dev(skb);
- int vif = l3mdev_master_ifindex_rcu(dev);
+ struct net_device *dev;
struct ipq *qp;
+ int vif;
__IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS);
/* Lookup (or create) queue header */
rcu_read_lock();
+ dev = skb->dev ? : skb_dst_dev_rcu(skb);
+ vif = l3mdev_master_ifindex_rcu(dev);
qp = ip_find(net, ip_hdr(skb), user, vif);
if (qp) {
int ret, refs = 0;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index f5b9004d6938..761a53c6a89a 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -28,6 +28,7 @@
#include <linux/etherdevice.h>
#include <linux/if_ether.h>
+#include <net/flow.h>
#include <net/sock.h>
#include <net/ip.h>
#include <net/icmp.h>
@@ -44,7 +45,6 @@
#include <net/gre.h>
#include <net/dst_metadata.h>
#include <net/erspan.h>
-#include <net/inet_dscp.h>
/*
Problems & solutions
@@ -930,7 +930,7 @@ static int ipgre_open(struct net_device *dev)
if (ipv4_is_multicast(t->parms.iph.daddr)) {
struct flowi4 fl4 = {
.flowi4_oif = t->parms.link,
- .flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(&t->parms.iph)),
+ .flowi4_dscp = ip4h_dscp(&t->parms.iph),
.flowi4_scope = RT_SCOPE_UNIVERSE,
.flowi4_proto = IPPROTO_GRE,
.saddr = t->parms.iph.saddr,
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index fc323994b1fa..a09aca2c8567 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -587,9 +587,13 @@ static void ip_sublist_rcv_finish(struct list_head *head)
}
static struct sk_buff *ip_extract_route_hint(const struct net *net,
- struct sk_buff *skb, int rt_type)
+ struct sk_buff *skb)
{
- if (fib4_has_custom_rules(net) || rt_type == RTN_BROADCAST ||
+ const struct iphdr *iph = ip_hdr(skb);
+
+ if (fib4_has_custom_rules(net) ||
+ ipv4_is_lbcast(iph->daddr) ||
+ ipv4_is_zeronet(iph->daddr) ||
IPCB(skb)->flags & IPSKB_MULTIPATH)
return NULL;
@@ -618,8 +622,7 @@ static void ip_list_rcv_finish(struct net *net, struct list_head *head)
dst = skb_dst(skb);
if (curr_dst != dst) {
- hint = ip_extract_route_hint(net, skb,
- dst_rtable(dst)->rt_type);
+ hint = ip_extract_route_hint(net, skb);
/* dispatch old sublist */
if (!list_empty(&sublist))
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index e3321932bec0..be8815ce3ac2 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -615,14 +615,13 @@ int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev)
}
memcpy(&nexthop, &optptr[srrptr-1], 4);
- orefdst = skb->_skb_refdst;
- skb_dst_set(skb, NULL);
+ orefdst = skb_dstref_steal(skb);
err = ip_route_input(skb, nexthop, iph->saddr, ip4h_dscp(iph),
dev) ? -EINVAL : 0;
rt2 = skb_rtable(skb);
if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) {
skb_dst_drop(skb);
- skb->_skb_refdst = orefdst;
+ skb_dstref_restore(skb, orefdst);
return -EINVAL;
}
refdst_drop(orefdst);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 84e7f8a2f50f..2b96651d719b 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -63,6 +63,7 @@
#include <linux/stat.h>
#include <linux/init.h>
+#include <net/flow.h>
#include <net/snmp.h>
#include <net/ip.h>
#include <net/protocol.h>
@@ -485,7 +486,7 @@ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
inet_sk_init_flowi4(inet, fl4);
/* sctp_v4_xmit() uses its own DSCP value */
- fl4->flowi4_tos = tos & INET_DSCP_MASK;
+ fl4->flowi4_dscp = inet_dsfield_to_dscp(tos);
/* If this fails, retransmit mechanism of transport layer will
* keep trying until route appears or the connection times
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index e86a8a862c41..ca9eaee4c2ef 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -42,6 +42,7 @@
#include <linux/init.h>
#include <linux/if_ether.h>
#include <linux/slab.h>
+#include <net/flow.h>
#include <net/net_namespace.h>
#include <net/ip.h>
#include <net/protocol.h>
@@ -1904,7 +1905,7 @@ static int ipmr_prepare_xmit(struct net *net, struct mr_table *mrt,
return -1;
}
- encap += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
+ encap += LL_RESERVED_SPACE(dst_dev_rcu(&rt->dst)) + rt->dst.header_len;
if (skb_cow(skb, encap)) {
ip_rt_put(rt);
@@ -1957,7 +1958,7 @@ static void ipmr_queue_fwd_xmit(struct net *net, struct mr_table *mrt,
* result in receiving multiple packets.
*/
NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD,
- net, NULL, skb, skb->dev, rt->dst.dev,
+ net, NULL, skb, skb->dev, dst_dev_rcu(&rt->dst),
ipmr_forward_finish);
return;
@@ -2120,7 +2121,7 @@ static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb)
struct flowi4 fl4 = {
.daddr = iph->daddr,
.saddr = iph->saddr,
- .flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph)),
+ .flowi4_dscp = ip4h_dscp(iph),
.flowi4_oif = (rt_is_output_route(rt) ?
skb->dev->ifindex : 0),
.flowi4_iif = (rt_is_output_route(rt) ?
@@ -2301,7 +2302,7 @@ int ip_mr_output(struct net *net, struct sock *sk, struct sk_buff *skb)
guard(rcu)();
- dev = rt->dst.dev;
+ dev = dst_dev_rcu(&rt->dst);
if (IPCB(skb)->flags & IPSKB_FORWARDED)
goto mc_output;
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 0565f001120d..ce310eb779e0 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -11,10 +11,10 @@
#include <linux/skbuff.h>
#include <linux/gfp.h>
#include <linux/export.h>
+#include <net/flow.h>
#include <net/route.h>
#include <net/xfrm.h>
#include <net/ip.h>
-#include <net/inet_dscp.h>
#include <net/netfilter/nf_queue.h>
/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
@@ -44,7 +44,7 @@ int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, un
*/
fl4.daddr = iph->daddr;
fl4.saddr = saddr;
- fl4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph));
+ fl4.flowi4_dscp = ip4h_dscp(iph);
fl4.flowi4_oif = sk ? sk->sk_bound_dev_if : 0;
fl4.flowi4_l3mdev = l3mdev_master_ifindex(dev);
fl4.flowi4_mark = skb->mark;
@@ -65,7 +65,10 @@ int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, un
if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
xfrm_decode_session(net, skb, flowi4_to_flowi(&fl4), AF_INET) == 0) {
struct dst_entry *dst = skb_dst(skb);
- skb_dst_set(skb, NULL);
+ /* ignore return value from skb_dstref_steal, xfrm_lookup takes
+ * care of dropping the refcnt if needed.
+ */
+ skb_dstref_steal(skb);
dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), sk, 0);
if (IS_ERR(dst))
return PTR_ERR(dst);
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
index a27782d7653e..6d9bf5106868 100644
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -8,8 +8,8 @@
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/netdevice.h>
-#include <net/inet_dscp.h>
#include <linux/ip.h>
+#include <net/flow.h>
#include <net/ip.h>
#include <net/ip_fib.h>
#include <net/route.h>
@@ -76,7 +76,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
flow.daddr = iph->saddr;
flow.saddr = rpfilter_get_saddr(iph->daddr);
flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0;
- flow.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph));
+ flow.flowi4_dscp = ip4h_dscp(iph);
flow.flowi4_scope = RT_SCOPE_UNIVERSE;
flow.flowi4_l3mdev = l3mdev_master_ifindex_rcu(xt_in(par));
flow.flowi4_uid = sock_net_uid(xt_net(par), NULL);
diff --git a/net/ipv4/netfilter/nf_dup_ipv4.c b/net/ipv4/netfilter/nf_dup_ipv4.c
index ed08fb78cfa8..9a773502f10a 100644
--- a/net/ipv4/netfilter/nf_dup_ipv4.c
+++ b/net/ipv4/netfilter/nf_dup_ipv4.c
@@ -12,10 +12,10 @@
#include <linux/skbuff.h>
#include <linux/netfilter.h>
#include <net/checksum.h>
+#include <net/flow.h>
#include <net/icmp.h>
#include <net/ip.h>
#include <net/route.h>
-#include <net/inet_dscp.h>
#include <net/netfilter/ipv4/nf_dup_ipv4.h>
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
#include <net/netfilter/nf_conntrack.h>
@@ -33,7 +33,7 @@ static bool nf_dup_ipv4_route(struct net *net, struct sk_buff *skb,
fl4.flowi4_oif = oif;
fl4.daddr = gw->s_addr;
- fl4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph));
+ fl4.flowi4_dscp = ip4h_dscp(iph);
fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
fl4.flowi4_flags = FLOWI_FLAG_KNOWN_NH;
rt = ip_route_output_key(net, &fl4);
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index 0d3cb2ba6fc8..05631abe3f0d 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -12,6 +12,15 @@
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_bridge.h>
+static struct iphdr *nf_reject_iphdr_put(struct sk_buff *nskb,
+ const struct sk_buff *oldskb,
+ __u8 protocol, int ttl);
+static void nf_reject_ip_tcphdr_put(struct sk_buff *nskb, const struct sk_buff *oldskb,
+ const struct tcphdr *oth);
+static const struct tcphdr *
+nf_reject_ip_tcphdr_get(struct sk_buff *oldskb,
+ struct tcphdr *_oth, int hook);
+
static int nf_reject_iphdr_validate(struct sk_buff *skb)
{
struct iphdr *iph;
@@ -136,8 +145,9 @@ struct sk_buff *nf_reject_skb_v4_unreach(struct net *net,
}
EXPORT_SYMBOL_GPL(nf_reject_skb_v4_unreach);
-const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb,
- struct tcphdr *_oth, int hook)
+static const struct tcphdr *
+nf_reject_ip_tcphdr_get(struct sk_buff *oldskb,
+ struct tcphdr *_oth, int hook)
{
const struct tcphdr *oth;
@@ -163,11 +173,10 @@ const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb,
return oth;
}
-EXPORT_SYMBOL_GPL(nf_reject_ip_tcphdr_get);
-struct iphdr *nf_reject_iphdr_put(struct sk_buff *nskb,
- const struct sk_buff *oldskb,
- __u8 protocol, int ttl)
+static struct iphdr *nf_reject_iphdr_put(struct sk_buff *nskb,
+ const struct sk_buff *oldskb,
+ __u8 protocol, int ttl)
{
struct iphdr *niph, *oiph = ip_hdr(oldskb);
@@ -188,10 +197,9 @@ struct iphdr *nf_reject_iphdr_put(struct sk_buff *nskb,
return niph;
}
-EXPORT_SYMBOL_GPL(nf_reject_iphdr_put);
-void nf_reject_ip_tcphdr_put(struct sk_buff *nskb, const struct sk_buff *oldskb,
- const struct tcphdr *oth)
+static void nf_reject_ip_tcphdr_put(struct sk_buff *nskb, const struct sk_buff *oldskb,
+ const struct tcphdr *oth)
{
struct iphdr *niph = ip_hdr(nskb);
struct tcphdr *tcph;
@@ -218,7 +226,6 @@ void nf_reject_ip_tcphdr_put(struct sk_buff *nskb, const struct sk_buff *oldskb,
nskb->csum_start = (unsigned char *)tcph - nskb->head;
nskb->csum_offset = offsetof(struct tcphdr, check);
}
-EXPORT_SYMBOL_GPL(nf_reject_ip_tcphdr_put);
static int nf_reject_fill_skb_dst(struct sk_buff *skb_in)
{
diff --git a/net/ipv4/netfilter/nf_socket_ipv4.c b/net/ipv4/netfilter/nf_socket_ipv4.c
index a1350fc25838..5080fa5fbf6a 100644
--- a/net/ipv4/netfilter/nf_socket_ipv4.c
+++ b/net/ipv4/netfilter/nf_socket_ipv4.c
@@ -71,8 +71,7 @@ nf_socket_get_sock_v4(struct net *net, struct sk_buff *skb, const int doff,
{
switch (protocol) {
case IPPROTO_TCP:
- return inet_lookup(net, net->ipv4.tcp_death_row.hashinfo,
- skb, doff, saddr, sport, daddr, dport,
+ return inet_lookup(net, skb, doff, saddr, sport, daddr, dport,
in->ifindex);
case IPPROTO_UDP:
return udp4_lib_lookup(net, saddr, sport, daddr, dport,
diff --git a/net/ipv4/netfilter/nf_tproxy_ipv4.c b/net/ipv4/netfilter/nf_tproxy_ipv4.c
index 73e66a088e25..041c3f37f237 100644
--- a/net/ipv4/netfilter/nf_tproxy_ipv4.c
+++ b/net/ipv4/netfilter/nf_tproxy_ipv4.c
@@ -81,7 +81,6 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb,
const struct net_device *in,
const enum nf_tproxy_lookup_t lookup_type)
{
- struct inet_hashinfo *hinfo = net->ipv4.tcp_death_row.hashinfo;
struct sock *sk;
switch (protocol) {
@@ -95,7 +94,7 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb,
switch (lookup_type) {
case NF_TPROXY_LOOKUP_LISTENER:
- sk = inet_lookup_listener(net, hinfo, skb,
+ sk = inet_lookup_listener(net, skb,
ip_hdrlen(skb) + __tcp_hdrlen(hp),
saddr, sport, daddr, dport,
in->ifindex, 0);
@@ -109,7 +108,7 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb,
*/
break;
case NF_TPROXY_LOOKUP_ESTABLISHED:
- sk = inet_lookup_established(net, hinfo, saddr, sport,
+ sk = inet_lookup_established(net, saddr, sport,
daddr, dport, in->ifindex);
break;
default:
diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c
index 7e7c49535e3f..82af6cd76d13 100644
--- a/net/ipv4/netfilter/nft_fib_ipv4.c
+++ b/net/ipv4/netfilter/nft_fib_ipv4.c
@@ -10,7 +10,7 @@
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nft_fib.h>
-#include <net/inet_dscp.h>
+#include <net/flow.h>
#include <net/ip.h>
#include <net/ip_fib.h>
#include <net/route.h>
@@ -114,7 +114,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
if (priv->flags & NFTA_FIB_F_MARK)
fl4.flowi4_mark = pkt->skb->mark;
- fl4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph));
+ fl4.flowi4_dscp = ip4h_dscp(iph);
if (priv->flags & NFTA_FIB_F_DADDR) {
fl4.daddr = iph->daddr;
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 29118c43ebf5..0a20625f5ffb 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -2087,6 +2087,12 @@ static void remove_nexthop_from_groups(struct net *net, struct nexthop *nh,
{
struct nh_grp_entry *nhge, *tmp;
+ /* If there is nothing to do, let's avoid the costly call to
+ * synchronize_net()
+ */
+ if (list_empty(&nh->grp_list))
+ return;
+
list_for_each_entry_safe(nhge, tmp, &nh->grp_list, nh_list)
remove_nh_grp_entry(net, nhge, nlinfo);
@@ -3511,12 +3517,42 @@ static int rtm_dump_walk_nexthops(struct sk_buff *skb,
int err;
s_idx = ctx->idx;
- for (node = rb_first(root); node; node = rb_next(node)) {
+
+ /* If this is not the first invocation, ctx->idx will contain the id of
+ * the last nexthop we processed. Instead of starting from the very
+ * first element of the red/black tree again and linearly skipping the
+ * (potentially large) set of nodes with an id smaller than s_idx, walk
+ * the tree and find the left-most node whose id is >= s_idx. This
+ * provides an efficient O(log n) starting point for the dump
+ * continuation.
+ */
+ if (s_idx != 0) {
+ struct rb_node *tmp = root->rb_node;
+
+ node = NULL;
+ while (tmp) {
+ struct nexthop *nh;
+
+ nh = rb_entry(tmp, struct nexthop, rb_node);
+ if (nh->id < s_idx) {
+ tmp = tmp->rb_right;
+ } else {
+ /* Track current candidate and keep looking on
+ * the left side to find the left-most
+ * (smallest id) that is still >= s_idx.
+ */
+ node = tmp;
+ tmp = tmp->rb_left;
+ }
+ }
+ } else {
+ node = rb_first(root);
+ }
+
+ for (; node; node = rb_next(node)) {
struct nexthop *nh;
nh = rb_entry(node, struct nexthop, rb_node);
- if (nh->id < s_idx)
- continue;
ctx->idx = nh->id;
err = nh_cb(skb, cb, nh, data);
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 031df4c19fcc..5321c5801c64 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -56,9 +56,7 @@ struct ping_table {
static struct ping_table ping_table;
struct pingv6_ops pingv6_ops;
-EXPORT_SYMBOL_GPL(pingv6_ops);
-
-static u16 ping_port_rover;
+EXPORT_IPV6_MOD_GPL(pingv6_ops);
static inline u32 ping_hashfn(const struct net *net, u32 num, u32 mask)
{
@@ -67,7 +65,6 @@ static inline u32 ping_hashfn(const struct net *net, u32 num, u32 mask)
pr_debug("hash(%u) = %u\n", num, res);
return res;
}
-EXPORT_SYMBOL_GPL(ping_hash);
static inline struct hlist_head *ping_hashslot(struct ping_table *table,
struct net *net, unsigned int num)
@@ -77,6 +74,7 @@ static inline struct hlist_head *ping_hashslot(struct ping_table *table,
int ping_get_port(struct sock *sk, unsigned short ident)
{
+ struct net *net = sock_net(sk);
struct inet_sock *isk, *isk2;
struct hlist_head *hlist;
struct sock *sk2 = NULL;
@@ -84,15 +82,16 @@ int ping_get_port(struct sock *sk, unsigned short ident)
isk = inet_sk(sk);
spin_lock(&ping_table.lock);
if (ident == 0) {
+ u16 result = net->ipv4.ping_port_rover + 1;
u32 i;
- u16 result = ping_port_rover + 1;
for (i = 0; i < (1L << 16); i++, result++) {
if (!result)
- result++; /* avoid zero */
- hlist = ping_hashslot(&ping_table, sock_net(sk),
- result);
+ continue; /* avoid zero */
+ hlist = ping_hashslot(&ping_table, net, result);
sk_for_each(sk2, hlist) {
+ if (!net_eq(sock_net(sk2), net))
+ continue;
isk2 = inet_sk(sk2);
if (isk2->inet_num == result)
@@ -100,7 +99,7 @@ int ping_get_port(struct sock *sk, unsigned short ident)
}
/* found */
- ping_port_rover = ident = result;
+ net->ipv4.ping_port_rover = ident = result;
break;
next_port:
;
@@ -108,8 +107,10 @@ next_port:
if (i >= (1L << 16))
goto fail;
} else {
- hlist = ping_hashslot(&ping_table, sock_net(sk), ident);
+ hlist = ping_hashslot(&ping_table, net, ident);
sk_for_each(sk2, hlist) {
+ if (!net_eq(sock_net(sk2), net))
+ continue;
isk2 = inet_sk(sk2);
/* BUG? Why is this reuse and not reuseaddr? ping.c
@@ -129,7 +130,7 @@ next_port:
pr_debug("was not hashed\n");
sk_add_node_rcu(sk, hlist);
sock_set_flag(sk, SOCK_RCU_FREE);
- sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+ sock_prot_inuse_add(net, sk->sk_prot, 1);
}
spin_unlock(&ping_table.lock);
return 0;
@@ -138,15 +139,7 @@ fail:
spin_unlock(&ping_table.lock);
return -EADDRINUSE;
}
-EXPORT_SYMBOL_GPL(ping_get_port);
-
-int ping_hash(struct sock *sk)
-{
- pr_debug("ping_hash(sk->port=%u)\n", inet_sk(sk)->inet_num);
- BUG(); /* "Please do not press this button again." */
-
- return 0;
-}
+EXPORT_IPV6_MOD_GPL(ping_get_port);
void ping_unhash(struct sock *sk)
{
@@ -161,7 +154,7 @@ void ping_unhash(struct sock *sk)
}
spin_unlock(&ping_table.lock);
}
-EXPORT_SYMBOL_GPL(ping_unhash);
+EXPORT_IPV6_MOD_GPL(ping_unhash);
/* Called under rcu_read_lock() */
static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
@@ -188,6 +181,8 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
}
sk_for_each_rcu(sk, hslot) {
+ if (!net_eq(sock_net(sk), net))
+ continue;
isk = inet_sk(sk);
pr_debug("iterate\n");
@@ -279,7 +274,7 @@ out_release_group:
put_group_info(group_info);
return ret;
}
-EXPORT_SYMBOL_GPL(ping_init_sock);
+EXPORT_IPV6_MOD_GPL(ping_init_sock);
void ping_close(struct sock *sk, long timeout)
{
@@ -289,7 +284,7 @@ void ping_close(struct sock *sk, long timeout)
sk_common_release(sk);
}
-EXPORT_SYMBOL_GPL(ping_close);
+EXPORT_IPV6_MOD_GPL(ping_close);
static int ping_pre_connect(struct sock *sk, struct sockaddr *uaddr,
int addr_len)
@@ -467,7 +462,7 @@ out:
pr_debug("ping_v4_bind -> %d\n", err);
return err;
}
-EXPORT_SYMBOL_GPL(ping_bind);
+EXPORT_IPV6_MOD_GPL(ping_bind);
/*
* Is this a supported type of ICMP message?
@@ -600,7 +595,7 @@ void ping_err(struct sk_buff *skb, int offset, u32 info)
out:
return;
}
-EXPORT_SYMBOL_GPL(ping_err);
+EXPORT_IPV6_MOD_GPL(ping_err);
/*
* Copy and checksum an ICMP Echo packet from user space into a buffer
@@ -630,7 +625,7 @@ int ping_getfrag(void *from, char *to,
return 0;
}
-EXPORT_SYMBOL_GPL(ping_getfrag);
+EXPORT_IPV6_MOD_GPL(ping_getfrag);
static int ping_v4_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh,
struct flowi4 *fl4)
@@ -691,7 +686,7 @@ int ping_common_sendmsg(int family, struct msghdr *msg, size_t len,
return 0;
}
-EXPORT_SYMBOL_GPL(ping_common_sendmsg);
+EXPORT_IPV6_MOD_GPL(ping_common_sendmsg);
static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
@@ -936,7 +931,7 @@ out:
pr_debug("ping_recvmsg -> %d\n", err);
return err;
}
-EXPORT_SYMBOL_GPL(ping_recvmsg);
+EXPORT_IPV6_MOD_GPL(ping_recvmsg);
static enum skb_drop_reason __ping_queue_rcv_skb(struct sock *sk,
struct sk_buff *skb)
@@ -957,7 +952,7 @@ int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
return __ping_queue_rcv_skb(sk, skb) ? -1 : 0;
}
-EXPORT_SYMBOL_GPL(ping_queue_rcv_skb);
+EXPORT_IPV6_MOD_GPL(ping_queue_rcv_skb);
/*
@@ -985,7 +980,7 @@ enum skb_drop_reason ping_rcv(struct sk_buff *skb)
kfree_skb_reason(skb, SKB_DROP_REASON_NO_SOCKET);
return SKB_DROP_REASON_NO_SOCKET;
}
-EXPORT_SYMBOL_GPL(ping_rcv);
+EXPORT_IPV6_MOD_GPL(ping_rcv);
struct proto ping_prot = {
.name = "PING",
@@ -1002,13 +997,12 @@ struct proto ping_prot = {
.bind = ping_bind,
.backlog_rcv = ping_queue_rcv_skb,
.release_cb = ip4_datagram_release_cb,
- .hash = ping_hash,
.unhash = ping_unhash,
.get_port = ping_get_port,
.put_port = ping_unhash,
.obj_size = sizeof(struct inet_sock),
};
-EXPORT_SYMBOL(ping_prot);
+EXPORT_IPV6_MOD(ping_prot);
#ifdef CONFIG_PROC_FS
@@ -1073,7 +1067,7 @@ void *ping_seq_start(struct seq_file *seq, loff_t *pos, sa_family_t family)
return *pos ? ping_get_idx(seq, *pos-1) : SEQ_START_TOKEN;
}
-EXPORT_SYMBOL_GPL(ping_seq_start);
+EXPORT_IPV6_MOD_GPL(ping_seq_start);
static void *ping_v4_seq_start(struct seq_file *seq, loff_t *pos)
{
@@ -1092,14 +1086,14 @@ void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos)
++*pos;
return sk;
}
-EXPORT_SYMBOL_GPL(ping_seq_next);
+EXPORT_IPV6_MOD_GPL(ping_seq_next);
void ping_seq_stop(struct seq_file *seq, void *v)
__releases(ping_table.lock)
{
spin_unlock(&ping_table.lock);
}
-EXPORT_SYMBOL_GPL(ping_seq_stop);
+EXPORT_IPV6_MOD_GPL(ping_seq_stop);
static void ping_v4_format_sock(struct sock *sp, struct seq_file *f,
int bucket)
@@ -1119,7 +1113,7 @@ static void ping_v4_format_sock(struct sock *sp, struct seq_file *f,
from_kuid_munged(seq_user_ns(f), sk_uid(sp)),
0, sock_i_ino(sp),
refcount_read(&sp->sk_refcnt), sp,
- atomic_read(&sp->sk_drops));
+ sk_drops_read(sp));
}
static int ping_v4_seq_show(struct seq_file *seq, void *v)
@@ -1150,6 +1144,8 @@ static int __net_init ping_v4_proc_init_net(struct net *net)
if (!proc_create_net("icmp", 0444, net->proc_net, &ping_v4_seq_ops,
sizeof(struct ping_iter_state)))
return -ENOMEM;
+
+ net->ipv4.ping_port_rover = get_random_u16();
return 0;
}
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 1d2c89d63cc7..d54ebb7df966 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -178,7 +178,7 @@ static int raw_v4_input(struct net *net, struct sk_buff *skb,
if (atomic_read(&sk->sk_rmem_alloc) >=
READ_ONCE(sk->sk_rcvbuf)) {
- atomic_inc(&sk->sk_drops);
+ sk_drops_inc(sk);
continue;
}
@@ -311,7 +311,7 @@ static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb)
int raw_rcv(struct sock *sk, struct sk_buff *skb)
{
if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) {
- atomic_inc(&sk->sk_drops);
+ sk_drops_inc(sk);
sk_skb_reason_drop(sk, skb, SKB_DROP_REASON_XFRM_POLICY);
return NET_RX_DROP;
}
@@ -793,6 +793,7 @@ static int raw_sk_init(struct sock *sk)
{
struct raw_sock *rp = raw_sk(sk);
+ sk->sk_drop_counters = &rp->drop_counters;
if (inet_sk(sk)->inet_num == IPPROTO_ICMP)
memset(&rp->filter, 0, sizeof(rp->filter));
return 0;
@@ -1045,7 +1046,7 @@ static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
0, 0L, 0,
from_kuid_munged(seq_user_ns(seq), sk_uid(sp)),
0, sock_i_ino(sp),
- refcount_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops));
+ refcount_read(&sp->sk_refcnt), sp, sk_drops_read(sp));
}
static int raw_seq_show(struct seq_file *seq, void *v)
diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c
index cc793bd8de25..943e5998e0ad 100644
--- a/net/ipv4/raw_diag.c
+++ b/net/ipv4/raw_diag.c
@@ -126,9 +126,9 @@ static int raw_diag_dump_one(struct netlink_callback *cb,
static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
struct netlink_callback *cb,
const struct inet_diag_req_v2 *r,
- struct nlattr *bc, bool net_admin)
+ bool net_admin)
{
- if (!inet_diag_bc_sk(bc, sk))
+ if (!inet_diag_bc_sk(cb->data, sk))
return 0;
return inet_sk_diag_fill(sk, NULL, skb, cb, r, NLM_F_MULTI, net_admin);
@@ -140,17 +140,13 @@ static void raw_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
struct raw_hashinfo *hashinfo = raw_get_hashinfo(r);
struct net *net = sock_net(skb->sk);
- struct inet_diag_dump_data *cb_data;
int num, s_num, slot, s_slot;
struct hlist_head *hlist;
struct sock *sk = NULL;
- struct nlattr *bc;
if (IS_ERR(hashinfo))
return;
- cb_data = cb->data;
- bc = cb_data->inet_diag_nla_bc;
s_slot = cb->args[0];
num = s_num = cb->args[1];
@@ -174,7 +170,7 @@ static void raw_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
if (r->id.idiag_dport != inet->inet_dport &&
r->id.idiag_dport)
goto next;
- if (sk_diag_dump(sk, skb, cb, r, bc, net_admin) < 0)
+ if (sk_diag_dump(sk, skb, cb, r, net_admin) < 0)
goto out_unlock;
next:
num++;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index baa43e5966b1..50309f2ab132 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -84,6 +84,7 @@
#include <linux/jhash.h>
#include <net/dst.h>
#include <net/dst_metadata.h>
+#include <net/flow.h>
#include <net/inet_dscp.h>
#include <net/net_namespace.h>
#include <net/ip.h>
@@ -413,11 +414,11 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
const void *daddr)
{
const struct rtable *rt = container_of(dst, struct rtable, dst);
- struct net_device *dev = dst_dev(dst);
+ struct net_device *dev;
struct neighbour *n;
rcu_read_lock();
-
+ dev = dst_dev_rcu(dst);
if (likely(rt->rt_gw_family == AF_INET)) {
n = ip_neigh_gw4(dev, rt->rt_gw4);
} else if (rt->rt_gw_family == AF_INET6) {
@@ -1026,7 +1027,7 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
return;
rcu_read_lock();
- net = dev_net_rcu(dst_dev(dst));
+ net = dst_dev_net_rcu(dst);
if (mtu < net->ipv4.ip_rt_min_pmtu) {
lock = true;
mtu = min(old_mtu, net->ipv4.ip_rt_min_pmtu);
@@ -1291,7 +1292,7 @@ void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
struct flowi4 fl4 = {
.daddr = iph->daddr,
.saddr = iph->saddr,
- .flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph)),
+ .flowi4_dscp = ip4h_dscp(iph),
.flowi4_oif = rt->dst.dev->ifindex,
.flowi4_iif = skb->dev->ifindex,
.flowi4_mark = skb->mark,
@@ -1326,7 +1327,7 @@ static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
struct net *net;
rcu_read_lock();
- net = dev_net_rcu(dst_dev(dst));
+ net = dst_dev_net_rcu(dst);
advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size,
net->ipv4.ip_rt_min_advmss);
rcu_read_unlock();
@@ -2210,7 +2211,7 @@ ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
goto martian_source;
}
- if (rt->rt_type != RTN_LOCAL)
+ if (!(rt->rt_flags & RTCF_LOCAL))
goto skip_validate_source;
reason = fib_validate_source_reason(skb, saddr, daddr, dscp, 0, dev,
@@ -2331,7 +2332,7 @@ ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
fl4.flowi4_oif = 0;
fl4.flowi4_iif = dev->ifindex;
fl4.flowi4_mark = skb->mark;
- fl4.flowi4_tos = inet_dscp_to_dsfield(dscp);
+ fl4.flowi4_dscp = dscp;
fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
fl4.flowi4_flags = 0;
fl4.daddr = daddr;
@@ -2694,7 +2695,6 @@ struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
struct rtable *rth;
fl4->flowi4_iif = LOOPBACK_IFINDEX;
- fl4->flowi4_tos &= INET_DSCP_MASK;
rcu_read_lock();
rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb);
@@ -3337,7 +3337,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
fl4.daddr = dst;
fl4.saddr = src;
- fl4.flowi4_tos = inet_dscp_to_dsfield(dscp);
+ fl4.flowi4_dscp = dscp;
fl4.flowi4_oif = nla_get_u32_default(tb[RTA_OIF], 0);
fl4.flowi4_mark = mark;
fl4.flowi4_uid = uid;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 71a956fbfc55..40b774b4f587 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3195,7 +3195,7 @@ adjudge_to_death:
/* remove backlog if any, without releasing ownership. */
__release_sock(sk);
- this_cpu_inc(tcp_orphan_count);
+ tcp_orphan_count_inc();
/* Have we already been destroyed by a softirq or backlog? */
if (state != TCP_CLOSE && sk->sk_state == TCP_CLOSE)
@@ -3376,7 +3376,7 @@ int tcp_disconnect(struct sock *sk, int flags)
WRITE_ONCE(tp->write_seq, seq);
icsk->icsk_backoff = 0;
- icsk->icsk_probes_out = 0;
+ WRITE_ONCE(icsk->icsk_probes_out, 0);
icsk->icsk_probes_tstamp = 0;
icsk->icsk_rto = TCP_TIMEOUT_INIT;
WRITE_ONCE(icsk->icsk_rto_min, TCP_RTO_MIN);
@@ -3760,7 +3760,7 @@ int tcp_sock_set_maxseg(struct sock *sk, int val)
if (val && (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW))
return -EINVAL;
- tcp_sk(sk)->rx_opt.user_mss = val;
+ WRITE_ONCE(tcp_sk(sk)->rx_opt.user_mss, val);
return 0;
}
@@ -3890,15 +3890,13 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
WRITE_ONCE(inet_csk(sk)->icsk_delack_max, delack_max);
return 0;
}
+ case TCP_MAXSEG:
+ return tcp_sock_set_maxseg(sk, val);
}
sockopt_lock_sock(sk);
switch (optname) {
- case TCP_MAXSEG:
- err = tcp_sock_set_maxseg(sk, val);
- break;
-
case TCP_NODELAY:
__tcp_sock_set_nodelay(sk, val);
break;
@@ -4348,7 +4346,8 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk,
nla_put_u32(stats, TCP_NLA_REORDERING, tp->reordering);
nla_put_u32(stats, TCP_NLA_MIN_RTT, tcp_min_rtt(tp));
- nla_put_u8(stats, TCP_NLA_RECUR_RETRANS, inet_csk(sk)->icsk_retransmits);
+ nla_put_u8(stats, TCP_NLA_RECUR_RETRANS,
+ READ_ONCE(inet_csk(sk)->icsk_retransmits));
nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, !!tp->rate_app_limited);
nla_put_u32(stats, TCP_NLA_SND_SSTHRESH, tp->snd_ssthresh);
nla_put_u32(stats, TCP_NLA_DELIVERED, tp->delivered);
@@ -4383,6 +4382,7 @@ int do_tcp_getsockopt(struct sock *sk, int level,
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct net *net = sock_net(sk);
+ int user_mss;
int val, len;
if (copy_from_sockptr(&len, optlen, sizeof(int)))
@@ -4396,9 +4396,10 @@ int do_tcp_getsockopt(struct sock *sk, int level,
switch (optname) {
case TCP_MAXSEG:
val = tp->mss_cache;
- if (tp->rx_opt.user_mss &&
+ user_mss = READ_ONCE(tp->rx_opt.user_mss);
+ if (user_mss &&
((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
- val = tp->rx_opt.user_mss;
+ val = user_mss;
if (tp->repair)
val = tp->rx_opt.mss_clamp;
break;
diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c
index ba4d98e510e0..fbad6c35dee9 100644
--- a/net/ipv4/tcp_cdg.c
+++ b/net/ipv4/tcp_cdg.c
@@ -379,7 +379,7 @@ static void tcp_cdg_init(struct sock *sk)
/* We silently fall back to window = 1 if allocation fails. */
if (window > 1)
ca->gradients = kcalloc(window, sizeof(ca->gradients[0]),
- GFP_NOWAIT | __GFP_NOWARN);
+ GFP_NOWAIT);
ca->rtt_seq = tp->snd_nxt;
ca->shadow_wnd = tcp_snd_cwnd(tp);
}
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 45e174b8cd22..d83efd91f461 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -12,6 +12,9 @@
#include <linux/tcp.h>
+#include <net/inet_hashtables.h>
+#include <net/inet6_hashtables.h>
+#include <net/inet_timewait_sock.h>
#include <net/netlink.h>
#include <net/tcp.h>
@@ -174,27 +177,465 @@ static size_t tcp_diag_get_aux_size(struct sock *sk, bool net_admin)
size += ulp_ops->get_info_size(sk, net_admin);
}
}
- return size;
+
+ return size
+ + nla_total_size(sizeof(struct tcp_info))
+ + nla_total_size(sizeof(struct inet_diag_msg))
+ + inet_diag_msg_attrs_size()
+ + nla_total_size(sizeof(struct inet_diag_meminfo))
+ + nla_total_size(SK_MEMINFO_VARS * sizeof(u32))
+ + nla_total_size(TCP_CA_NAME_MAX)
+ + nla_total_size(sizeof(struct tcpvegas_info))
+ + 64;
+}
+
+static int tcp_twsk_diag_fill(struct sock *sk,
+ struct sk_buff *skb,
+ struct netlink_callback *cb,
+ u16 nlmsg_flags, bool net_admin)
+{
+ struct inet_timewait_sock *tw = inet_twsk(sk);
+ struct inet_diag_msg *r;
+ struct nlmsghdr *nlh;
+ long tmo;
+
+ nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, cb->nlh->nlmsg_type,
+ sizeof(*r), nlmsg_flags);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ r = nlmsg_data(nlh);
+ DEBUG_NET_WARN_ON_ONCE(tw->tw_state != TCP_TIME_WAIT);
+
+ inet_diag_msg_common_fill(r, sk);
+ r->idiag_retrans = 0;
+
+ r->idiag_state = READ_ONCE(tw->tw_substate);
+ r->idiag_timer = 3;
+ tmo = tw->tw_timer.expires - jiffies;
+ r->idiag_expires = jiffies_delta_to_msecs(tmo);
+ r->idiag_rqueue = 0;
+ r->idiag_wqueue = 0;
+ r->idiag_uid = 0;
+ r->idiag_inode = 0;
+
+ if (net_admin && nla_put_u32(skb, INET_DIAG_MARK,
+ tw->tw_mark)) {
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+ }
+
+ nlmsg_end(skb, nlh);
+ return 0;
+}
+
+static int tcp_req_diag_fill(struct sock *sk, struct sk_buff *skb,
+ struct netlink_callback *cb,
+ u16 nlmsg_flags, bool net_admin)
+{
+ struct request_sock *reqsk = inet_reqsk(sk);
+ struct inet_diag_msg *r;
+ struct nlmsghdr *nlh;
+ long tmo;
+
+ nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ cb->nlh->nlmsg_type, sizeof(*r), nlmsg_flags);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ r = nlmsg_data(nlh);
+ inet_diag_msg_common_fill(r, sk);
+ r->idiag_state = TCP_SYN_RECV;
+ r->idiag_timer = 1;
+ r->idiag_retrans = READ_ONCE(reqsk->num_retrans);
+
+ BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) !=
+ offsetof(struct sock, sk_cookie));
+
+ tmo = READ_ONCE(inet_reqsk(sk)->rsk_timer.expires) - jiffies;
+ r->idiag_expires = jiffies_delta_to_msecs(tmo);
+ r->idiag_rqueue = 0;
+ r->idiag_wqueue = 0;
+ r->idiag_uid = 0;
+ r->idiag_inode = 0;
+
+ if (net_admin && nla_put_u32(skb, INET_DIAG_MARK,
+ inet_rsk(reqsk)->ir_mark)) {
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+ }
+
+ nlmsg_end(skb, nlh);
+ return 0;
+}
+
+static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
+ struct netlink_callback *cb,
+ const struct inet_diag_req_v2 *r,
+ u16 nlmsg_flags, bool net_admin)
+{
+ if (sk->sk_state == TCP_TIME_WAIT)
+ return tcp_twsk_diag_fill(sk, skb, cb, nlmsg_flags, net_admin);
+
+ if (sk->sk_state == TCP_NEW_SYN_RECV)
+ return tcp_req_diag_fill(sk, skb, cb, nlmsg_flags, net_admin);
+
+ return inet_sk_diag_fill(sk, inet_csk(sk), skb, cb, r, nlmsg_flags,
+ net_admin);
+}
+
+static void twsk_build_assert(void)
+{
+ BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_family) !=
+ offsetof(struct sock, sk_family));
+
+ BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_num) !=
+ offsetof(struct inet_sock, inet_num));
+
+ BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_dport) !=
+ offsetof(struct inet_sock, inet_dport));
+
+ BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_rcv_saddr) !=
+ offsetof(struct inet_sock, inet_rcv_saddr));
+
+ BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_daddr) !=
+ offsetof(struct inet_sock, inet_daddr));
+
+#if IS_ENABLED(CONFIG_IPV6)
+ BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_v6_rcv_saddr) !=
+ offsetof(struct sock, sk_v6_rcv_saddr));
+
+ BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_v6_daddr) !=
+ offsetof(struct sock, sk_v6_daddr));
+#endif
}
static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
const struct inet_diag_req_v2 *r)
{
- struct inet_hashinfo *hinfo;
+ bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
+ struct inet_diag_dump_data *cb_data = cb->data;
+ struct net *net = sock_net(skb->sk);
+ u32 idiag_states = r->idiag_states;
+ struct inet_hashinfo *hashinfo;
+ int i, num, s_i, s_num;
+ struct sock *sk;
- hinfo = sock_net(cb->skb->sk)->ipv4.tcp_death_row.hashinfo;
+ hashinfo = net->ipv4.tcp_death_row.hashinfo;
+ if (idiag_states & TCPF_SYN_RECV)
+ idiag_states |= TCPF_NEW_SYN_RECV;
+ s_i = cb->args[1];
+ s_num = num = cb->args[2];
+
+ if (cb->args[0] == 0) {
+ if (!(idiag_states & TCPF_LISTEN) || r->id.idiag_dport)
+ goto skip_listen_ht;
+
+ for (i = s_i; i <= hashinfo->lhash2_mask; i++) {
+ struct inet_listen_hashbucket *ilb;
+ struct hlist_nulls_node *node;
+
+ num = 0;
+ ilb = &hashinfo->lhash2[i];
+
+ if (hlist_nulls_empty(&ilb->nulls_head)) {
+ s_num = 0;
+ continue;
+ }
+ spin_lock(&ilb->lock);
+ sk_nulls_for_each(sk, node, &ilb->nulls_head) {
+ struct inet_sock *inet = inet_sk(sk);
+
+ if (!net_eq(sock_net(sk), net))
+ continue;
+
+ if (num < s_num) {
+ num++;
+ continue;
+ }
+
+ if (r->sdiag_family != AF_UNSPEC &&
+ sk->sk_family != r->sdiag_family)
+ goto next_listen;
+
+ if (r->id.idiag_sport != inet->inet_sport &&
+ r->id.idiag_sport)
+ goto next_listen;
+
+ if (!inet_diag_bc_sk(cb_data, sk))
+ goto next_listen;
+
+ if (inet_sk_diag_fill(sk, inet_csk(sk), skb,
+ cb, r, NLM_F_MULTI,
+ net_admin) < 0) {
+ spin_unlock(&ilb->lock);
+ goto done;
+ }
+
+next_listen:
+ ++num;
+ }
+ spin_unlock(&ilb->lock);
+
+ s_num = 0;
+ }
+skip_listen_ht:
+ cb->args[0] = 1;
+ s_i = num = s_num = 0;
+ }
+
+/* Process a maximum of SKARR_SZ sockets at a time when walking hash buckets
+ * with bh disabled.
+ */
+#define SKARR_SZ 16
+
+ /* Dump bound but inactive (not listening, connecting, etc.) sockets */
+ if (cb->args[0] == 1) {
+ if (!(idiag_states & TCPF_BOUND_INACTIVE))
+ goto skip_bind_ht;
+
+ for (i = s_i; i < hashinfo->bhash_size; i++) {
+ struct inet_bind_hashbucket *ibb;
+ struct inet_bind2_bucket *tb2;
+ struct sock *sk_arr[SKARR_SZ];
+ int num_arr[SKARR_SZ];
+ int idx, accum, res;
+
+resume_bind_walk:
+ num = 0;
+ accum = 0;
+ ibb = &hashinfo->bhash2[i];
+
+ if (hlist_empty(&ibb->chain)) {
+ s_num = 0;
+ continue;
+ }
+ spin_lock_bh(&ibb->lock);
+ inet_bind_bucket_for_each(tb2, &ibb->chain) {
+ if (!net_eq(ib2_net(tb2), net))
+ continue;
+
+ sk_for_each_bound(sk, &tb2->owners) {
+ struct inet_sock *inet = inet_sk(sk);
+
+ if (num < s_num)
+ goto next_bind;
+
+ if (sk->sk_state != TCP_CLOSE ||
+ !inet->inet_num)
+ goto next_bind;
+
+ if (r->sdiag_family != AF_UNSPEC &&
+ r->sdiag_family != sk->sk_family)
+ goto next_bind;
+
+ if (!inet_diag_bc_sk(cb_data, sk))
+ goto next_bind;
+
+ sock_hold(sk);
+ num_arr[accum] = num;
+ sk_arr[accum] = sk;
+ if (++accum == SKARR_SZ)
+ goto pause_bind_walk;
+next_bind:
+ num++;
+ }
+ }
+pause_bind_walk:
+ spin_unlock_bh(&ibb->lock);
+
+ res = 0;
+ for (idx = 0; idx < accum; idx++) {
+ if (res >= 0) {
+ res = inet_sk_diag_fill(sk_arr[idx],
+ NULL, skb, cb,
+ r, NLM_F_MULTI,
+ net_admin);
+ if (res < 0)
+ num = num_arr[idx];
+ }
+ sock_put(sk_arr[idx]);
+ }
+ if (res < 0)
+ goto done;
+
+ cond_resched();
+
+ if (accum == SKARR_SZ) {
+ s_num = num + 1;
+ goto resume_bind_walk;
+ }
+
+ s_num = 0;
+ }
+skip_bind_ht:
+ cb->args[0] = 2;
+ s_i = num = s_num = 0;
+ }
- inet_diag_dump_icsk(hinfo, skb, cb, r);
+ if (!(idiag_states & ~TCPF_LISTEN))
+ goto out;
+
+ for (i = s_i; i <= hashinfo->ehash_mask; i++) {
+ struct inet_ehash_bucket *head = &hashinfo->ehash[i];
+ spinlock_t *lock = inet_ehash_lockp(hashinfo, i);
+ struct hlist_nulls_node *node;
+ struct sock *sk_arr[SKARR_SZ];
+ int num_arr[SKARR_SZ];
+ int idx, accum, res;
+
+ if (hlist_nulls_empty(&head->chain))
+ continue;
+
+ if (i > s_i)
+ s_num = 0;
+
+next_chunk:
+ num = 0;
+ accum = 0;
+ spin_lock_bh(lock);
+ sk_nulls_for_each(sk, node, &head->chain) {
+ int state;
+
+ if (!net_eq(sock_net(sk), net))
+ continue;
+ if (num < s_num)
+ goto next_normal;
+ state = (sk->sk_state == TCP_TIME_WAIT) ?
+ READ_ONCE(inet_twsk(sk)->tw_substate) : sk->sk_state;
+ if (!(idiag_states & (1 << state)))
+ goto next_normal;
+ if (r->sdiag_family != AF_UNSPEC &&
+ sk->sk_family != r->sdiag_family)
+ goto next_normal;
+ if (r->id.idiag_sport != htons(sk->sk_num) &&
+ r->id.idiag_sport)
+ goto next_normal;
+ if (r->id.idiag_dport != sk->sk_dport &&
+ r->id.idiag_dport)
+ goto next_normal;
+ twsk_build_assert();
+
+ if (!inet_diag_bc_sk(cb_data, sk))
+ goto next_normal;
+
+ if (!refcount_inc_not_zero(&sk->sk_refcnt))
+ goto next_normal;
+
+ num_arr[accum] = num;
+ sk_arr[accum] = sk;
+ if (++accum == SKARR_SZ)
+ break;
+next_normal:
+ ++num;
+ }
+ spin_unlock_bh(lock);
+
+ res = 0;
+ for (idx = 0; idx < accum; idx++) {
+ if (res >= 0) {
+ res = sk_diag_fill(sk_arr[idx], skb, cb, r,
+ NLM_F_MULTI, net_admin);
+ if (res < 0)
+ num = num_arr[idx];
+ }
+ sock_gen_put(sk_arr[idx]);
+ }
+ if (res < 0)
+ break;
+
+ cond_resched();
+
+ if (accum == SKARR_SZ) {
+ s_num = num + 1;
+ goto next_chunk;
+ }
+ }
+
+done:
+ cb->args[1] = i;
+ cb->args[2] = num;
+out:
+ ;
+}
+
+static struct sock *tcp_diag_find_one_icsk(struct net *net,
+ const struct inet_diag_req_v2 *req)
+{
+ struct sock *sk;
+
+ rcu_read_lock();
+ if (req->sdiag_family == AF_INET) {
+ sk = inet_lookup(net, NULL, 0, req->id.idiag_dst[0],
+ req->id.idiag_dport, req->id.idiag_src[0],
+ req->id.idiag_sport, req->id.idiag_if);
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (req->sdiag_family == AF_INET6) {
+ if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) &&
+ ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src))
+ sk = inet_lookup(net, NULL, 0, req->id.idiag_dst[3],
+ req->id.idiag_dport, req->id.idiag_src[3],
+ req->id.idiag_sport, req->id.idiag_if);
+ else
+ sk = inet6_lookup(net, NULL, 0,
+ (struct in6_addr *)req->id.idiag_dst,
+ req->id.idiag_dport,
+ (struct in6_addr *)req->id.idiag_src,
+ req->id.idiag_sport,
+ req->id.idiag_if);
+#endif
+ } else {
+ rcu_read_unlock();
+ return ERR_PTR(-EINVAL);
+ }
+ rcu_read_unlock();
+ if (!sk)
+ return ERR_PTR(-ENOENT);
+
+ if (sock_diag_check_cookie(sk, req->id.idiag_cookie)) {
+ sock_gen_put(sk);
+ return ERR_PTR(-ENOENT);
+ }
+
+ return sk;
}
static int tcp_diag_dump_one(struct netlink_callback *cb,
const struct inet_diag_req_v2 *req)
{
- struct inet_hashinfo *hinfo;
+ struct sk_buff *in_skb = cb->skb;
+ struct sk_buff *rep;
+ struct sock *sk;
+ struct net *net;
+ bool net_admin;
+ int err;
- hinfo = sock_net(cb->skb->sk)->ipv4.tcp_death_row.hashinfo;
+ net = sock_net(in_skb->sk);
+ sk = tcp_diag_find_one_icsk(net, req);
+ if (IS_ERR(sk))
+ return PTR_ERR(sk);
- return inet_diag_dump_one_icsk(hinfo, cb, req);
+ net_admin = netlink_net_capable(in_skb, CAP_NET_ADMIN);
+ rep = nlmsg_new(tcp_diag_get_aux_size(sk, net_admin), GFP_KERNEL);
+ if (!rep) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = sk_diag_fill(sk, rep, cb, req, 0, net_admin);
+ if (err < 0) {
+ WARN_ON(err == -EMSGSIZE);
+ nlmsg_free(rep);
+ goto out;
+ }
+ err = nlmsg_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid);
+
+out:
+ if (sk)
+ sock_gen_put(sk);
+
+ return err;
}
#ifdef CONFIG_INET_DIAG_DESTROY
@@ -202,13 +643,10 @@ static int tcp_diag_destroy(struct sk_buff *in_skb,
const struct inet_diag_req_v2 *req)
{
struct net *net = sock_net(in_skb->sk);
- struct inet_hashinfo *hinfo;
struct sock *sk;
int err;
- hinfo = net->ipv4.tcp_death_row.hashinfo;
- sk = inet_diag_find_one_icsk(net, hinfo, req);
-
+ sk = tcp_diag_find_one_icsk(net, req);
if (IS_ERR(sk))
return PTR_ERR(sk);
@@ -226,7 +664,6 @@ static const struct inet_diag_handler tcp_diag_handler = {
.dump_one = tcp_diag_dump_one,
.idiag_get_info = tcp_diag_get_info,
.idiag_get_aux = tcp_diag_get_aux,
- .idiag_get_aux_size = tcp_diag_get_aux_size,
.idiag_type = IPPROTO_TCP,
.idiag_info_size = sizeof(struct tcp_info),
#ifdef CONFIG_INET_DIAG_DESTROY
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index f1884f0c9e52..7d945a527daf 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -576,11 +576,12 @@ void tcp_fastopen_active_disable_ofo_check(struct sock *sk)
}
} else if (tp->syn_fastopen_ch &&
atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times)) {
- dst = sk_dst_get(sk);
- dev = dst ? dst_dev(dst) : NULL;
+ rcu_read_lock();
+ dst = __sk_dst_get(sk);
+ dev = dst ? dst_dev_rcu(dst) : NULL;
if (!(dev && (dev->flags & IFF_LOOPBACK)))
atomic_set(&sock_net(sk)->ipv4.tfo_active_disable_times, 0);
- dst_release(dst);
+ rcu_read_unlock();
}
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 71b76e98371a..f1be65af1a77 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2569,7 +2569,7 @@ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
if (frto_undo)
NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPSPURIOUSRTOS);
- inet_csk(sk)->icsk_retransmits = 0;
+ WRITE_ONCE(inet_csk(sk)->icsk_retransmits, 0);
if (tcp_is_non_sack_preventing_reopen(sk))
return true;
if (frto_undo || tcp_is_sack(tp)) {
@@ -3851,7 +3851,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if (after(ack, prior_snd_una)) {
flag |= FLAG_SND_UNA_ADVANCED;
- icsk->icsk_retransmits = 0;
+ WRITE_ONCE(icsk->icsk_retransmits, 0);
#if IS_ENABLED(CONFIG_TLS_DEVICE)
if (static_branch_unlikely(&clean_acked_data_enabled.key))
@@ -3913,7 +3913,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
* log. Something worked...
*/
WRITE_ONCE(sk->sk_err_soft, 0);
- icsk->icsk_probes_out = 0;
+ WRITE_ONCE(icsk->icsk_probes_out, 0);
tp->rcv_tstamp = tcp_jiffies32;
if (!prior_packets)
goto no_queue;
@@ -4830,7 +4830,7 @@ static bool tcp_ooo_try_coalesce(struct sock *sk,
noinline_for_tracing static void
tcp_drop_reason(struct sock *sk, struct sk_buff *skb, enum skb_drop_reason reason)
{
- sk_drops_add(sk, skb);
+ sk_drops_skbadd(sk, skb);
sk_skb_reason_drop(sk, skb, reason);
}
@@ -6297,7 +6297,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
u16 mss = tp->rx_opt.mss_clamp, try_exp = 0;
bool syn_drop = false;
- if (mss == tp->rx_opt.user_mss) {
+ if (mss == READ_ONCE(tp->rx_opt.user_mss)) {
struct tcp_options_received opt;
/* Get original SYNACK MSS value if user MSS sets mss_clamp */
@@ -6636,7 +6636,7 @@ static void tcp_rcv_synrecv_state_fastopen(struct sock *sk)
tcp_try_undo_recovery(sk);
tcp_update_rto_time(tp);
- inet_csk(sk)->icsk_retransmits = 0;
+ WRITE_ONCE(inet_csk(sk)->icsk_retransmits, 0);
/* In tcp_fastopen_synack_timer() on the first SYNACK RTO we set
* retrans_stamp but don't enter CA_Loss, so in case that happened we
* need to zero retrans_stamp here to prevent spurious
@@ -7117,7 +7117,7 @@ u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops,
return 0;
}
- mss = tcp_parse_mss_option(th, tp->rx_opt.user_mss);
+ mss = tcp_parse_mss_option(th, READ_ONCE(tp->rx_opt.user_mss));
if (!mss)
mss = af_ops->mss_clamp;
@@ -7131,7 +7131,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
{
struct tcp_fastopen_cookie foc = { .len = -1 };
struct tcp_options_received tmp_opt;
- struct tcp_sock *tp = tcp_sk(sk);
+ const struct tcp_sock *tp = tcp_sk(sk);
struct net *net = sock_net(sk);
struct sock *fastopen_sk = NULL;
struct request_sock *req;
@@ -7182,7 +7182,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
tcp_clear_options(&tmp_opt);
tmp_opt.mss_clamp = af_ops->mss_clamp;
- tmp_opt.user_mss = tp->rx_opt.user_mss;
+ tmp_opt.user_mss = READ_ONCE(tp->rx_opt.user_mss);
tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0,
want_cookie ? NULL : &foc);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 84d3d556ed80..1e58a8a9ff7a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -506,8 +506,7 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
struct sock *sk;
int err;
- sk = __inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
- iph->daddr, th->dest, iph->saddr,
+ sk = __inet_lookup_established(net, iph->daddr, th->dest, iph->saddr,
ntohs(th->source), inet_iif(skb), 0);
if (!sk) {
__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
@@ -823,8 +822,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb,
* Incoming packet is checked with md5 hash with finding key,
* no RST generated if md5 hash doesn't match.
*/
- sk1 = __inet_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
- NULL, 0, ip_hdr(skb)->saddr,
+ sk1 = __inet_lookup_listener(net, NULL, 0, ip_hdr(skb)->saddr,
th->source, ip_hdr(skb)->daddr,
ntohs(th->source), dif, sdif);
/* don't send rst if it can't find key */
@@ -1992,8 +1990,7 @@ int tcp_v4_early_demux(struct sk_buff *skb)
if (th->doff < sizeof(struct tcphdr) / 4)
return 0;
- sk = __inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
- iph->saddr, th->source,
+ sk = __inet_lookup_established(net, iph->saddr, th->source,
iph->daddr, ntohs(th->dest),
skb->skb_iif, inet_sdif(skb));
if (sk) {
@@ -2236,8 +2233,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
th = (const struct tcphdr *)skb->data;
iph = ip_hdr(skb);
lookup:
- sk = __inet_lookup_skb(net->ipv4.tcp_death_row.hashinfo,
- skb, __tcp_hdrlen(th), th->source,
+ sk = __inet_lookup_skb(skb, __tcp_hdrlen(th), th->source,
th->dest, sdif, &refcounted);
if (!sk)
goto no_tcp_socket;
@@ -2258,7 +2254,7 @@ lookup:
&iph->saddr, &iph->daddr,
AF_INET, dif, sdif);
if (unlikely(drop_reason)) {
- sk_drops_add(sk, skb);
+ sk_drops_skbadd(sk, skb);
reqsk_put(req);
goto discard_it;
}
@@ -2403,7 +2399,7 @@ discard_it:
return 0;
discard_and_relse:
- sk_drops_add(sk, skb);
+ sk_drops_skbadd(sk, skb);
if (refcounted)
sock_put(sk);
goto discard_it;
@@ -2426,9 +2422,7 @@ do_time_wait:
&drop_reason);
switch (tw_status) {
case TCP_TW_SYN: {
- struct sock *sk2 = inet_lookup_listener(net,
- net->ipv4.tcp_death_row.hashinfo,
- skb, __tcp_hdrlen(th),
+ struct sock *sk2 = inet_lookup_listener(net, skb, __tcp_hdrlen(th),
iph->saddr, th->source,
iph->daddr, th->dest,
inet_iif(skb),
@@ -2459,7 +2453,6 @@ do_time_wait:
static struct timewait_sock_ops tcp_timewait_sock_ops = {
.twsk_obj_size = sizeof(struct tcp_timewait_sock),
- .twsk_destructor= tcp_twsk_destructor,
};
void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
@@ -2958,9 +2951,9 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
rx_queue,
timer_active,
jiffies_delta_to_clock_t(timer_expires - jiffies),
- icsk->icsk_retransmits,
+ READ_ONCE(icsk->icsk_retransmits),
from_kuid_munged(seq_user_ns(f), sk_uid(sk)),
- icsk->icsk_probes_out,
+ READ_ONCE(icsk->icsk_probes_out),
sock_i_ino(sk),
refcount_read(&sk->sk_refcnt), sk,
jiffies_to_clock_t(icsk->icsk_rto),
@@ -3524,7 +3517,6 @@ struct proto tcp_prot = {
.leave_memory_pressure = tcp_leave_memory_pressure,
.stream_memory_free = tcp_stream_memory_free,
.sockets_allocated = &tcp_sockets_allocated,
- .orphan_count = &tcp_orphan_count,
.memory_allocated = &net_aligned_data.tcp_memory_allocated,
.per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc,
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 03c068ea27b6..10e86f1008e9 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -170,7 +170,7 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
struct net *net;
spin_lock_bh(&tcp_metrics_lock);
- net = dev_net_rcu(dst_dev(dst));
+ net = dst_dev_net_rcu(dst);
/* While waiting for the spin-lock the cache might have been populated
* with this entry and so we have to check again.
@@ -273,7 +273,7 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req,
return NULL;
}
- net = dev_net_rcu(dst_dev(dst));
+ net = dst_dev_net_rcu(dst);
hash ^= net_hash_mix(net);
hash = hash_32(hash, tcp_metrics_hash_log);
@@ -318,7 +318,7 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk,
else
return NULL;
- net = dev_net_rcu(dst_dev(dst));
+ net = dst_dev_net_rcu(dst);
hash ^= net_hash_mix(net);
hash = hash_32(hash, tcp_metrics_hash_log);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 2994c9222c9c..d1c9e4088646 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -401,7 +401,6 @@ void tcp_twsk_destructor(struct sock *sk)
#endif
tcp_ao_destroy_sock(sk, true);
}
-EXPORT_IPV6_MOD_GPL(tcp_twsk_destructor);
void tcp_twsk_purge(struct list_head *net_exit_list)
{
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index be5c2294610e..e6612bd84d09 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -434,8 +434,7 @@ static void tcp4_check_fraglist_gro(struct list_head *head, struct sk_buff *skb,
inet_get_iif_sdif(skb, &iif, &sdif);
iph = skb_gro_network_header(skb);
net = dev_net_rcu(skb->dev);
- sk = __inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
- iph->saddr, th->source,
+ sk = __inet_lookup_established(net, iph->saddr, th->source,
iph->daddr, ntohs(th->dest),
iif, sdif);
NAPI_GRO_CB(skb)->is_flist = !sk;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index caf11920a878..e180364b8dda 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3578,9 +3578,8 @@ void sk_forced_mem_schedule(struct sock *sk, int size)
sk_forward_alloc_add(sk, amt << PAGE_SHIFT);
sk_memory_allocated_add(sk, amt);
- if (mem_cgroup_sockets_enabled && sk->sk_memcg)
- mem_cgroup_charge_skmem(sk->sk_memcg, amt,
- gfp_memcg_charge() | __GFP_NOFAIL);
+ if (mem_cgroup_sk_enabled(sk))
+ mem_cgroup_sk_charge(sk, amt, gfp_memcg_charge() | __GFP_NOFAIL);
}
/* Send a FIN. The caller locks the socket for us.
@@ -3891,6 +3890,7 @@ static void tcp_connect_init(struct sock *sk)
const struct dst_entry *dst = __sk_dst_get(sk);
struct tcp_sock *tp = tcp_sk(sk);
__u8 rcv_wscale;
+ u16 user_mss;
u32 rcv_wnd;
/* We'll fix this up when we get a response from the other end.
@@ -3903,8 +3903,9 @@ static void tcp_connect_init(struct sock *sk)
tcp_ao_connect_init(sk);
/* If user gave his TCP_MAXSEG, record it to clamp */
- if (tp->rx_opt.user_mss)
- tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
+ user_mss = READ_ONCE(tp->rx_opt.user_mss);
+ if (user_mss)
+ tp->rx_opt.mss_clamp = user_mss;
tp->max_window = 0;
tcp_mtup_init(sk);
tcp_sync_mss(sk, dst_mtu(dst));
@@ -3955,7 +3956,7 @@ static void tcp_connect_init(struct sock *sk)
WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
inet_csk(sk)->icsk_rto = tcp_timeout_init(sk);
- inet_csk(sk)->icsk_retransmits = 0;
+ WRITE_ONCE(inet_csk(sk)->icsk_retransmits, 0);
tcp_clear_retrans(tp);
}
@@ -4393,13 +4394,13 @@ void tcp_send_probe0(struct sock *sk)
if (tp->packets_out || tcp_write_queue_empty(sk)) {
/* Cancel probe timer, if it is not required. */
- icsk->icsk_probes_out = 0;
+ WRITE_ONCE(icsk->icsk_probes_out, 0);
icsk->icsk_backoff = 0;
icsk->icsk_probes_tstamp = 0;
return;
}
- icsk->icsk_probes_out++;
+ WRITE_ONCE(icsk->icsk_probes_out, icsk->icsk_probes_out + 1);
if (err <= 0) {
if (icsk->icsk_backoff < READ_ONCE(net->ipv4.sysctl_tcp_retries2))
icsk->icsk_backoff++;
@@ -4437,7 +4438,7 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)
tcp_sk_rw(sk)->total_retrans++;
}
trace_tcp_retransmit_synack(sk, req);
- req->num_retrans++;
+ WRITE_ONCE(req->num_retrans, req->num_retrans + 1);
}
return res;
}
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index a207877270fb..2dd73a4e8e51 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -392,7 +392,7 @@ static void tcp_probe_timer(struct sock *sk)
int max_probes;
if (tp->packets_out || !skb) {
- icsk->icsk_probes_out = 0;
+ WRITE_ONCE(icsk->icsk_probes_out, 0);
icsk->icsk_probes_tstamp = 0;
return;
}
@@ -444,7 +444,7 @@ static void tcp_update_rto_stats(struct sock *sk)
tp->total_rto_recoveries++;
tp->rto_stamp = tcp_time_stamp_ms(tp);
}
- icsk->icsk_retransmits++;
+ WRITE_ONCE(icsk->icsk_retransmits, icsk->icsk_retransmits + 1);
tp->total_rto++;
}
@@ -839,7 +839,7 @@ static void tcp_keepalive_timer(struct timer_list *t)
goto out;
}
if (tcp_write_wakeup(sk, LINUX_MIB_TCPKEEPALIVE) <= 0) {
- icsk->icsk_probes_out++;
+ WRITE_ONCE(icsk->icsk_probes_out, icsk->icsk_probes_out + 1);
elapsed = keepalive_intvl_when(tp);
} else {
/* If keepalive was lost due to local congestion,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index cc3ce0f762ec..732bdad43626 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1787,7 +1787,7 @@ uncharge_drop:
atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
drop:
- atomic_inc(&sk->sk_drops);
+ sk_drops_inc(sk);
busylock_release(busy);
return err;
}
@@ -1852,7 +1852,7 @@ static struct sk_buff *__first_packet_length(struct sock *sk,
IS_UDPLITE(sk));
__UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS,
IS_UDPLITE(sk));
- atomic_inc(&sk->sk_drops);
+ sk_drops_inc(sk);
__skb_unlink(skb, rcvq);
*total += skb->truesize;
kfree_skb_reason(skb, SKB_DROP_REASON_UDP_CSUM);
@@ -2008,7 +2008,7 @@ try_again:
__UDP_INC_STATS(net, UDP_MIB_CSUMERRORS, is_udplite);
__UDP_INC_STATS(net, UDP_MIB_INERRORS, is_udplite);
- atomic_inc(&sk->sk_drops);
+ sk_drops_inc(sk);
kfree_skb_reason(skb, SKB_DROP_REASON_UDP_CSUM);
goto try_again;
}
@@ -2078,7 +2078,7 @@ try_again:
if (unlikely(err)) {
if (!peeking) {
- atomic_inc(&sk->sk_drops);
+ sk_drops_inc(sk);
UDP_INC_STATS(sock_net(sk),
UDP_MIB_INERRORS, is_udplite);
}
@@ -2449,7 +2449,7 @@ csum_error:
__UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
drop:
__UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
- atomic_inc(&sk->sk_drops);
+ sk_drops_inc(sk);
sk_skb_reason_drop(sk, skb, drop_reason);
return -1;
}
@@ -2534,7 +2534,7 @@ start_lookup:
nskb = skb_clone(skb, GFP_ATOMIC);
if (unlikely(!nskb)) {
- atomic_inc(&sk->sk_drops);
+ sk_drops_inc(sk);
__UDP_INC_STATS(net, UDP_MIB_RCVBUFERRORS,
IS_UDPLITE(sk));
__UDP_INC_STATS(net, UDP_MIB_INERRORS,
@@ -3386,7 +3386,7 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f,
from_kuid_munged(seq_user_ns(f), sk_uid(sp)),
0, sock_i_ino(sp),
refcount_read(&sp->sk_refcnt), sp,
- atomic_read(&sp->sk_drops));
+ sk_drops_read(sp));
}
int udp4_seq_show(struct seq_file *seq, void *v)
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index 38cb3a28e4ed..6e491c720c90 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -16,9 +16,9 @@
static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
struct netlink_callback *cb,
const struct inet_diag_req_v2 *req,
- struct nlattr *bc, bool net_admin)
+ bool net_admin)
{
- if (!inet_diag_bc_sk(bc, sk))
+ if (!inet_diag_bc_sk(cb->data, sk))
return 0;
return inet_sk_diag_fill(sk, NULL, skb, cb, req, NLM_F_MULTI,
@@ -92,12 +92,8 @@ static void udp_dump(struct udp_table *table, struct sk_buff *skb,
{
bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
struct net *net = sock_net(skb->sk);
- struct inet_diag_dump_data *cb_data;
int num, s_num, slot, s_slot;
- struct nlattr *bc;
- cb_data = cb->data;
- bc = cb_data->inet_diag_nla_bc;
s_slot = cb->args[0];
num = s_num = cb->args[1];
@@ -130,7 +126,7 @@ static void udp_dump(struct udp_table *table, struct sk_buff *skb,
r->id.idiag_dport)
goto next;
- if (sk_diag_dump(sk, skb, cb, r, bc, net_admin) < 0) {
+ if (sk_diag_dump(sk, skb, cb, r, net_admin) < 0) {
spin_unlock_bh(&hslot->lock);
goto done;
}
diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c
index fce945f23069..54386e06a813 100644
--- a/net/ipv4/udp_tunnel_core.c
+++ b/net/ipv4/udp_tunnel_core.c
@@ -4,6 +4,7 @@
#include <linux/socket.h>
#include <linux/kernel.h>
#include <net/dst_metadata.h>
+#include <net/flow.h>
#include <net/udp.h>
#include <net/udp_tunnel.h>
#include <net/inet_dscp.h>
@@ -253,7 +254,7 @@ struct rtable *udp_tunnel_dst_lookup(struct sk_buff *skb,
fl4.saddr = key->u.ipv4.src;
fl4.fl4_dport = dport;
fl4.fl4_sport = sport;
- fl4.flowi4_tos = tos & INET_DSCP_MASK;
+ fl4.flowi4_dscp = inet_dsfield_to_dscp(tos);
fl4.flowi4_flags = key->flow_flags;
rt = ip_route_output_key(net, &fl4);
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 7fb6205619e7..58faf1ddd2b1 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -14,7 +14,7 @@
#include <linux/inetdevice.h>
#include <net/dst.h>
#include <net/xfrm.h>
-#include <net/inet_dscp.h>
+#include <net/flow.h>
#include <net/ip.h>
#include <net/l3mdev.h>
@@ -25,7 +25,7 @@ static struct dst_entry *__xfrm4_dst_lookup(struct flowi4 *fl4,
memset(fl4, 0, sizeof(*fl4));
fl4->daddr = params->daddr->a4;
- fl4->flowi4_tos = inet_dscp_to_dsfield(params->dscp);
+ fl4->flowi4_dscp = params->dscp;
fl4->flowi4_l3mdev = l3mdev_master_ifindex_by_index(params->net,
params->oif);
fl4->flowi4_mark = params->mark;
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 1c9c686d9522..b8f9a8c0302e 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -304,10 +304,9 @@ config IPV6_SEG6_LWTUNNEL
config IPV6_SEG6_HMAC
bool "IPv6: Segment Routing HMAC support"
depends on IPV6
- select CRYPTO
- select CRYPTO_HMAC
- select CRYPTO_SHA1
- select CRYPTO_SHA256
+ select CRYPTO_LIB_SHA1
+ select CRYPTO_LIB_SHA256
+ select CRYPTO_LIB_UTILS
help
Support for HMAC signature generation and verification
of SR-enabled packets.
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index f17a5dd4789f..40e9c336f6c5 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -7238,7 +7238,9 @@ static const struct ctl_table addrconf_sysctl[] = {
.data = &ipv6_devconf.rpl_seg_enabled,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
{
.procname = "ioam6_enabled",
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index f8a8e46286b8..52599584422b 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -104,7 +104,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
rcu_read_lock();
rt = rt6_lookup(net, addr, NULL, 0, NULL, 0);
if (rt) {
- dev = dst_dev(&rt->dst);
+ dev = dst_dev_rcu(&rt->dst);
netdev_hold(dev, &dev_tracker, GFP_ATOMIC);
ip6_rt_put(rt);
} else if (ishost) {
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 972bf0426d59..33ebe93d80e3 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -1068,5 +1068,5 @@ void __ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp,
0,
sock_i_ino(sp),
refcount_read(&sp->sk_refcnt), sp,
- atomic_read(&sp->sk_drops));
+ sk_drops_read(sp));
}
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 72adfc107b55..e75da98f5283 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -149,8 +149,8 @@ static struct sock *esp6_find_tcp_sk(struct xfrm_state *x)
dport = encap->encap_dport;
spin_unlock_bh(&x->lock);
- sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, &x->id.daddr.in6,
- dport, &x->props.saddr.in6, ntohs(sport), 0, 0);
+ sk = __inet6_lookup_established(net, &x->id.daddr.in6, dport,
+ &x->props.saddr.in6, ntohs(sport), 0, 0);
if (!sk)
return ERR_PTR(-ENOENT);
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 44550957fd4e..95cdd4cacb00 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -209,7 +209,8 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
* this lookup should be more aggressive (not longer than timeout).
*/
dst = ip6_route_output(net, sk, fl6);
- dev = dst_dev(dst);
+ rcu_read_lock();
+ dev = dst_dev_rcu(dst);
if (dst->error) {
IP6_INC_STATS(net, ip6_dst_idev(dst),
IPSTATS_MIB_OUTNOROUTES);
@@ -224,11 +225,10 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
if (rt->rt6i_dst.plen < 128)
tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
- rcu_read_lock();
peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr);
res = inet_peer_xrlim_allow(peer, tmo);
- rcu_read_unlock();
}
+ rcu_read_unlock();
if (!res)
__ICMP6_INC_STATS(net, ip6_dst_idev(dst),
ICMP6_MIB_RATELIMITHOST);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 76ee521189eb..a3a9ea49fee2 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -47,24 +47,23 @@ EXPORT_SYMBOL_GPL(inet6_ehashfn);
* The sockhash lock must be held as a reader here.
*/
struct sock *__inet6_lookup_established(const struct net *net,
- struct inet_hashinfo *hashinfo,
- const struct in6_addr *saddr,
- const __be16 sport,
- const struct in6_addr *daddr,
- const u16 hnum,
- const int dif, const int sdif)
+ const struct in6_addr *saddr,
+ const __be16 sport,
+ const struct in6_addr *daddr,
+ const u16 hnum,
+ const int dif, const int sdif)
{
- struct sock *sk;
- const struct hlist_nulls_node *node;
const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
- /* Optimize here for direct hit, only listening connections can
- * have wildcards anyways.
- */
- unsigned int hash = inet6_ehashfn(net, daddr, hnum, saddr, sport);
- unsigned int slot = hash & hashinfo->ehash_mask;
- struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
-
+ const struct hlist_nulls_node *node;
+ struct inet_ehash_bucket *head;
+ struct inet_hashinfo *hashinfo;
+ unsigned int hash, slot;
+ struct sock *sk;
+ hashinfo = net->ipv4.tcp_death_row.hashinfo;
+ hash = inet6_ehashfn(net, daddr, hnum, saddr, sport);
+ slot = hash & hashinfo->ehash_mask;
+ head = &hashinfo->ehash[slot];
begin:
sk_nulls_for_each_rcu(sk, node, &head->chain) {
if (sk->sk_hash != hash)
@@ -200,19 +199,20 @@ struct sock *inet6_lookup_run_sk_lookup(const struct net *net,
EXPORT_SYMBOL_GPL(inet6_lookup_run_sk_lookup);
struct sock *inet6_lookup_listener(const struct net *net,
- struct inet_hashinfo *hashinfo,
- struct sk_buff *skb, int doff,
- const struct in6_addr *saddr,
- const __be16 sport, const struct in6_addr *daddr,
- const unsigned short hnum, const int dif, const int sdif)
+ struct sk_buff *skb, int doff,
+ const struct in6_addr *saddr,
+ const __be16 sport,
+ const struct in6_addr *daddr,
+ const unsigned short hnum,
+ const int dif, const int sdif)
{
struct inet_listen_hashbucket *ilb2;
+ struct inet_hashinfo *hashinfo;
struct sock *result = NULL;
unsigned int hash2;
/* Lookup redirect from BPF */
- if (static_branch_unlikely(&bpf_sk_lookup_enabled) &&
- hashinfo == net->ipv4.tcp_death_row.hashinfo) {
+ if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
result = inet6_lookup_run_sk_lookup(net, IPPROTO_TCP, skb, doff,
saddr, sport, daddr, hnum, dif,
inet6_ehashfn);
@@ -220,6 +220,7 @@ struct sock *inet6_lookup_listener(const struct net *net,
goto done;
}
+ hashinfo = net->ipv4.tcp_death_row.hashinfo;
hash2 = ipv6_portaddr_hash(net, daddr, hnum);
ilb2 = inet_lhash2_bucket(hashinfo, hash2);
@@ -244,7 +245,6 @@ done:
EXPORT_SYMBOL_GPL(inet6_lookup_listener);
struct sock *inet6_lookup(const struct net *net,
- struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
const struct in6_addr *saddr, const __be16 sport,
const struct in6_addr *daddr, const __be16 dport,
@@ -253,7 +253,7 @@ struct sock *inet6_lookup(const struct net *net,
struct sock *sk;
bool refcounted;
- sk = __inet6_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
+ sk = __inet6_lookup(net, skb, doff, saddr, sport, daddr,
ntohs(dport), dif, 0, &refcounted);
if (sk && !refcounted && !refcount_inc_not_zero(&sk->sk_refcnt))
sk = NULL;
@@ -305,8 +305,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
dif, sdif))) {
if (sk2->sk_state == TCP_TIME_WAIT) {
tw = inet_twsk(sk2);
- if (sk->sk_protocol == IPPROTO_TCP &&
- tcp_twsk_unique(sk, sk2, twp))
+ if (tcp_twsk_unique(sk, sk2, twp))
break;
}
goto not_unique;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 74d49dd6124d..c82a75510c0e 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -329,9 +329,9 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net,
if (parms->name[0]) {
if (!dev_valid_name(parms->name))
return NULL;
- strscpy(name, parms->name, IFNAMSIZ);
+ strscpy(name, parms->name);
} else {
- strcpy(name, "ip6gre%d");
+ strscpy(name, "ip6gre%d");
}
dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN,
ip6gre_tunnel_setup);
@@ -1469,7 +1469,7 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
tunnel = netdev_priv(dev);
tunnel->dev = dev;
- strcpy(tunnel->parms.name, dev->name);
+ strscpy(tunnel->parms.name, dev->name);
ret = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
if (ret)
@@ -1529,7 +1529,7 @@ static void ip6gre_fb_tunnel_init(struct net_device *dev)
tunnel->dev = dev;
tunnel->net = dev_net(dev);
- strcpy(tunnel->parms.name, dev->name);
+ strscpy(tunnel->parms.name, dev->name);
tunnel->hlen = sizeof(struct ipv6hdr) + 4;
}
@@ -1842,7 +1842,7 @@ static int ip6erspan_tap_init(struct net_device *dev)
tunnel = netdev_priv(dev);
tunnel->dev = dev;
- strcpy(tunnel->parms.name, dev->name);
+ strscpy(tunnel->parms.name, dev->name);
ret = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
if (ret)
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 1e1410237b6e..9d64c13bab5e 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -60,7 +60,7 @@
static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
- struct net_device *dev = dst_dev(dst);
+ struct net_device *dev = dst_dev_rcu(dst);
struct inet6_dev *idev = ip6_dst_idev(dst);
unsigned int hh_len = LL_RESERVED_SPACE(dev);
const struct in6_addr *daddr, *nexthop;
@@ -70,15 +70,12 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
/* Be paranoid, rather than too clever. */
if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) {
- /* Make sure idev stays alive */
- rcu_read_lock();
+ /* idev stays alive because we hold rcu_read_lock(). */
skb = skb_expand_head(skb, hh_len);
if (!skb) {
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
- rcu_read_unlock();
return -ENOMEM;
}
- rcu_read_unlock();
}
hdr = ipv6_hdr(skb);
@@ -123,7 +120,6 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
- rcu_read_lock();
nexthop = rt6_nexthop(dst_rt6_info(dst), daddr);
neigh = __ipv6_neigh_lookup_noref(dev, nexthop);
@@ -131,7 +127,6 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
if (unlikely(!neigh))
neigh = __neigh_create(&nd_tbl, nexthop, dev, false);
if (IS_ERR(neigh)) {
- rcu_read_unlock();
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES);
kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_CREATEFAIL);
return -EINVAL;
@@ -139,7 +134,6 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
}
sock_confirm_neigh(skb, neigh);
ret = neigh_output(neigh, skb, false);
- rcu_read_unlock();
return ret;
}
@@ -233,22 +227,29 @@ static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
- struct net_device *dev = dst_dev(dst), *indev = skb->dev;
- struct inet6_dev *idev = ip6_dst_idev(dst);
+ struct net_device *dev, *indev = skb->dev;
+ struct inet6_dev *idev;
+ int ret;
skb->protocol = htons(ETH_P_IPV6);
+ rcu_read_lock();
+ dev = dst_dev_rcu(dst);
+ idev = ip6_dst_idev(dst);
skb->dev = dev;
if (unlikely(!idev || READ_ONCE(idev->cnf.disable_ipv6))) {
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
+ rcu_read_unlock();
kfree_skb_reason(skb, SKB_DROP_REASON_IPV6DISABLED);
return 0;
}
- return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
- net, sk, skb, indev, dev,
- ip6_finish_output,
- !(IP6CB(skb)->flags & IP6SKB_REROUTED));
+ ret = NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
+ net, sk, skb, indev, dev,
+ ip6_finish_output,
+ !(IP6CB(skb)->flags & IP6SKB_REROUTED));
+ rcu_read_unlock();
+ return ret;
}
EXPORT_SYMBOL(ip6_output);
@@ -268,35 +269,36 @@ bool ip6_autoflowlabel(struct net *net, const struct sock *sk)
int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
__u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
{
- struct net *net = sock_net(sk);
const struct ipv6_pinfo *np = inet6_sk(sk);
struct in6_addr *first_hop = &fl6->daddr;
struct dst_entry *dst = skb_dst(skb);
- struct net_device *dev = dst_dev(dst);
struct inet6_dev *idev = ip6_dst_idev(dst);
struct hop_jumbo_hdr *hop_jumbo;
int hoplen = sizeof(*hop_jumbo);
+ struct net *net = sock_net(sk);
unsigned int head_room;
+ struct net_device *dev;
struct ipv6hdr *hdr;
u8 proto = fl6->flowi6_proto;
int seg_len = skb->len;
- int hlimit = -1;
+ int ret, hlimit = -1;
u32 mtu;
+ rcu_read_lock();
+
+ dev = dst_dev_rcu(dst);
head_room = sizeof(struct ipv6hdr) + hoplen + LL_RESERVED_SPACE(dev);
if (opt)
head_room += opt->opt_nflen + opt->opt_flen;
if (unlikely(head_room > skb_headroom(skb))) {
- /* Make sure idev stays alive */
- rcu_read_lock();
+ /* idev stays alive while we hold rcu_read_lock(). */
skb = skb_expand_head(skb, head_room);
if (!skb) {
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
- rcu_read_unlock();
- return -ENOBUFS;
+ ret = -ENOBUFS;
+ goto unlock;
}
- rcu_read_unlock();
}
if (opt) {
@@ -358,17 +360,21 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
* skb to its handler for processing
*/
skb = l3mdev_ip6_out((struct sock *)sk, skb);
- if (unlikely(!skb))
- return 0;
+ if (unlikely(!skb)) {
+ ret = 0;
+ goto unlock;
+ }
/* hooks should never assume socket lock is held.
* we promote our socket to non const
*/
- return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
- net, (struct sock *)sk, skb, NULL, dev,
- dst_output);
+ ret = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
+ net, (struct sock *)sk, skb, NULL, dev,
+ dst_output);
+ goto unlock;
}
+ ret = -EMSGSIZE;
skb->dev = dev;
/* ipv6_local_error() does not require socket lock,
* we promote our socket to non const
@@ -377,7 +383,9 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
IP6_INC_STATS(net, idev, IPSTATS_MIB_FRAGFAILS);
kfree_skb(skb);
- return -EMSGSIZE;
+unlock:
+ rcu_read_unlock();
+ return ret;
}
EXPORT_SYMBOL(ip6_xmit);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 36ca27496b3c..016b572e7d6f 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -169,6 +169,29 @@ static int unsolicited_report_interval(struct inet6_dev *idev)
return iv > 0 ? iv : 1;
}
+static struct net_device *ip6_mc_find_dev(struct net *net,
+ const struct in6_addr *group,
+ int ifindex)
+{
+ struct net_device *dev = NULL;
+ struct rt6_info *rt;
+
+ if (ifindex == 0) {
+ rcu_read_lock();
+ rt = rt6_lookup(net, group, NULL, 0, NULL, 0);
+ if (rt) {
+ dev = dst_dev_rcu(&rt->dst);
+ dev_hold(dev);
+ ip6_rt_put(rt);
+ }
+ rcu_read_unlock();
+ } else {
+ dev = dev_get_by_index(net, ifindex);
+ }
+
+ return dev;
+}
+
/*
* socket join on multicast group
*/
@@ -191,28 +214,13 @@ static int __ipv6_sock_mc_join(struct sock *sk, int ifindex,
}
mc_lst = sock_kmalloc(sk, sizeof(struct ipv6_mc_socklist), GFP_KERNEL);
-
if (!mc_lst)
return -ENOMEM;
mc_lst->next = NULL;
mc_lst->addr = *addr;
- if (ifindex == 0) {
- struct rt6_info *rt;
-
- rcu_read_lock();
- rt = rt6_lookup(net, addr, NULL, 0, NULL, 0);
- if (rt) {
- dev = dst_dev(&rt->dst);
- dev_hold(dev);
- ip6_rt_put(rt);
- }
- rcu_read_unlock();
- } else {
- dev = dev_get_by_index(net, ifindex);
- }
-
+ dev = ip6_mc_find_dev(net, addr, ifindex);
if (!dev) {
sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
return -ENODEV;
@@ -302,27 +310,14 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
}
EXPORT_SYMBOL(ipv6_sock_mc_drop);
-static struct inet6_dev *ip6_mc_find_dev(struct net *net,
- const struct in6_addr *group,
- int ifindex)
+static struct inet6_dev *ip6_mc_find_idev(struct net *net,
+ const struct in6_addr *group,
+ int ifindex)
{
- struct net_device *dev = NULL;
+ struct net_device *dev;
struct inet6_dev *idev;
- if (ifindex == 0) {
- struct rt6_info *rt;
-
- rcu_read_lock();
- rt = rt6_lookup(net, group, NULL, 0, NULL, 0);
- if (rt) {
- dev = dst_dev(&rt->dst);
- dev_hold(dev);
- ip6_rt_put(rt);
- }
- rcu_read_unlock();
- } else {
- dev = dev_get_by_index(net, ifindex);
- }
+ dev = ip6_mc_find_dev(net, group, ifindex);
if (!dev)
return NULL;
@@ -374,7 +369,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
if (!ipv6_addr_is_multicast(group))
return -EINVAL;
- idev = ip6_mc_find_dev(net, group, pgsr->gsr_interface);
+ idev = ip6_mc_find_idev(net, group, pgsr->gsr_interface);
if (!idev)
return -ENODEV;
@@ -509,7 +504,7 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf,
gsf->gf_fmode != MCAST_EXCLUDE)
return -EINVAL;
- idev = ip6_mc_find_dev(net, group, gsf->gf_interface);
+ idev = ip6_mc_find_idev(net, group, gsf->gf_interface);
if (!idev)
return -ENODEV;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 7d5abb3158ec..f427e41e9c49 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -130,7 +130,7 @@ struct neigh_table nd_tbl = {
[NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ,
[NEIGH_VAR_INTERVAL_PROBE_TIME_MS] = 5 * HZ,
[NEIGH_VAR_GC_STALETIME] = 60 * HZ,
- [NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_MAX,
+ [NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_DEFAULT,
[NEIGH_VAR_PROXY_QLEN] = 64,
[NEIGH_VAR_ANYCAST_DELAY] = 1 * HZ,
[NEIGH_VAR_PROXY_DELAY] = (8 * HZ) / 10,
@@ -505,7 +505,7 @@ void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr,
ip6_nd_hdr(skb, saddr, daddr, READ_ONCE(inet6_sk(sk)->hop_limit), skb->len);
- dev = dst_dev(dst);
+ dev = dst_dev_rcu(dst);
idev = __in6_dev_get(dev);
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS);
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 45f9105f9ac1..46540a5a4331 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -63,7 +63,10 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff
#ifdef CONFIG_XFRM
if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
xfrm_decode_session(net, skb, flowi6_to_flowi(&fl6), AF_INET6) == 0) {
- skb_dst_set(skb, NULL);
+ /* ignore return value from skb_dstref_steal, xfrm_lookup takes
+ * care of dropping the refcnt if needed.
+ */
+ skb_dstref_steal(skb);
dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
if (IS_ERR(dst))
return PTR_ERR(dst);
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index cb2d38e80de9..6b022449f867 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -12,6 +12,19 @@
#include <linux/netfilter_ipv6.h>
#include <linux/netfilter_bridge.h>
+static struct ipv6hdr *
+nf_reject_ip6hdr_put(struct sk_buff *nskb,
+ const struct sk_buff *oldskb,
+ __u8 protocol, int hoplimit);
+static void
+nf_reject_ip6_tcphdr_put(struct sk_buff *nskb,
+ const struct sk_buff *oldskb,
+ const struct tcphdr *oth, unsigned int otcplen);
+static const struct tcphdr *
+nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb,
+ struct tcphdr *otcph,
+ unsigned int *otcplen, int hook);
+
static bool nf_reject_v6_csum_ok(struct sk_buff *skb, int hook)
{
const struct ipv6hdr *ip6h = ipv6_hdr(skb);
@@ -146,9 +159,10 @@ struct sk_buff *nf_reject_skb_v6_unreach(struct net *net,
}
EXPORT_SYMBOL_GPL(nf_reject_skb_v6_unreach);
-const struct tcphdr *nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb,
- struct tcphdr *otcph,
- unsigned int *otcplen, int hook)
+static const struct tcphdr *
+nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb,
+ struct tcphdr *otcph,
+ unsigned int *otcplen, int hook)
{
const struct ipv6hdr *oip6h = ipv6_hdr(oldskb);
u8 proto;
@@ -192,11 +206,11 @@ const struct tcphdr *nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb,
return otcph;
}
-EXPORT_SYMBOL_GPL(nf_reject_ip6_tcphdr_get);
-struct ipv6hdr *nf_reject_ip6hdr_put(struct sk_buff *nskb,
- const struct sk_buff *oldskb,
- __u8 protocol, int hoplimit)
+static struct ipv6hdr *
+nf_reject_ip6hdr_put(struct sk_buff *nskb,
+ const struct sk_buff *oldskb,
+ __u8 protocol, int hoplimit)
{
struct ipv6hdr *ip6h;
const struct ipv6hdr *oip6h = ipv6_hdr(oldskb);
@@ -216,11 +230,11 @@ struct ipv6hdr *nf_reject_ip6hdr_put(struct sk_buff *nskb,
return ip6h;
}
-EXPORT_SYMBOL_GPL(nf_reject_ip6hdr_put);
-void nf_reject_ip6_tcphdr_put(struct sk_buff *nskb,
- const struct sk_buff *oldskb,
- const struct tcphdr *oth, unsigned int otcplen)
+static void
+nf_reject_ip6_tcphdr_put(struct sk_buff *nskb,
+ const struct sk_buff *oldskb,
+ const struct tcphdr *oth, unsigned int otcplen)
{
struct tcphdr *tcph;
@@ -248,7 +262,6 @@ void nf_reject_ip6_tcphdr_put(struct sk_buff *nskb,
csum_partial(tcph,
sizeof(struct tcphdr), 0));
}
-EXPORT_SYMBOL_GPL(nf_reject_ip6_tcphdr_put);
static int nf_reject6_fill_skb_dst(struct sk_buff *skb_in)
{
diff --git a/net/ipv6/netfilter/nf_socket_ipv6.c b/net/ipv6/netfilter/nf_socket_ipv6.c
index 9ea5ef56cb27..ced8bd44828e 100644
--- a/net/ipv6/netfilter/nf_socket_ipv6.c
+++ b/net/ipv6/netfilter/nf_socket_ipv6.c
@@ -83,8 +83,7 @@ nf_socket_get_sock_v6(struct net *net, struct sk_buff *skb, int doff,
{
switch (protocol) {
case IPPROTO_TCP:
- return inet6_lookup(net, net->ipv4.tcp_death_row.hashinfo,
- skb, doff, saddr, sport, daddr, dport,
+ return inet6_lookup(net, skb, doff, saddr, sport, daddr, dport,
in->ifindex);
case IPPROTO_UDP:
return udp6_lib_lookup(net, saddr, sport, daddr, dport,
diff --git a/net/ipv6/netfilter/nf_tproxy_ipv6.c b/net/ipv6/netfilter/nf_tproxy_ipv6.c
index 52f828bb5a83..b2f59ed9d7cc 100644
--- a/net/ipv6/netfilter/nf_tproxy_ipv6.c
+++ b/net/ipv6/netfilter/nf_tproxy_ipv6.c
@@ -80,7 +80,6 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff,
const struct net_device *in,
const enum nf_tproxy_lookup_t lookup_type)
{
- struct inet_hashinfo *hinfo = net->ipv4.tcp_death_row.hashinfo;
struct sock *sk;
switch (protocol) {
@@ -94,7 +93,7 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff,
switch (lookup_type) {
case NF_TPROXY_LOOKUP_LISTENER:
- sk = inet6_lookup_listener(net, hinfo, skb,
+ sk = inet6_lookup_listener(net, skb,
thoff + __tcp_hdrlen(hp),
saddr, sport,
daddr, ntohs(dport),
@@ -109,7 +108,7 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff,
*/
break;
case NF_TPROXY_LOOKUP_ESTABLISHED:
- sk = __inet6_lookup_established(net, hinfo, saddr, sport, daddr,
+ sk = __inet6_lookup_established(net, saddr, sport, daddr,
ntohs(dport), in->ifindex, 0);
break;
default:
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index d21fe27fe21e..1c9b283a4132 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -104,18 +104,20 @@ EXPORT_SYMBOL(ip6_find_1stfragopt);
int ip6_dst_hoplimit(struct dst_entry *dst)
{
int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
+
+ rcu_read_lock();
if (hoplimit == 0) {
- struct net_device *dev = dst_dev(dst);
+ struct net_device *dev = dst_dev_rcu(dst);
struct inet6_dev *idev;
- rcu_read_lock();
idev = __in6_dev_get(dev);
if (idev)
hoplimit = READ_ONCE(idev->cnf.hop_limit);
else
hoplimit = READ_ONCE(dev_net(dev)->ipv6.devconf_all->hop_limit);
- rcu_read_unlock();
}
+ rcu_read_unlock();
+
return hoplimit;
}
EXPORT_SYMBOL(ip6_dst_hoplimit);
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 82b0492923d4..d7a2cdaa2631 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -208,7 +208,6 @@ struct proto pingv6_prot = {
.recvmsg = ping_recvmsg,
.bind = ping_bind,
.backlog_rcv = ping_queue_rcv_skb,
- .hash = ping_hash,
.unhash = ping_unhash,
.get_port = ping_get_port,
.put_port = ping_unhash,
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 4c3f8245c40f..4ae07a67b4d4 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -163,7 +163,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
if (atomic_read(&sk->sk_rmem_alloc) >=
READ_ONCE(sk->sk_rcvbuf)) {
- atomic_inc(&sk->sk_drops);
+ sk_drops_inc(sk);
continue;
}
@@ -361,7 +361,7 @@ static inline int rawv6_rcv_skb(struct sock *sk, struct sk_buff *skb)
if ((raw6_sk(sk)->checksum || rcu_access_pointer(sk->sk_filter)) &&
skb_checksum_complete(skb)) {
- atomic_inc(&sk->sk_drops);
+ sk_drops_inc(sk);
sk_skb_reason_drop(sk, skb, SKB_DROP_REASON_SKB_CSUM);
return NET_RX_DROP;
}
@@ -389,7 +389,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
struct raw6_sock *rp = raw6_sk(sk);
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
- atomic_inc(&sk->sk_drops);
+ sk_drops_inc(sk);
sk_skb_reason_drop(sk, skb, SKB_DROP_REASON_XFRM_POLICY);
return NET_RX_DROP;
}
@@ -414,7 +414,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
if (inet_test_bit(HDRINCL, sk)) {
if (skb_checksum_complete(skb)) {
- atomic_inc(&sk->sk_drops);
+ sk_drops_inc(sk);
sk_skb_reason_drop(sk, skb, SKB_DROP_REASON_SKB_CSUM);
return NET_RX_DROP;
}
@@ -1175,6 +1175,7 @@ static int rawv6_init_sk(struct sock *sk)
{
struct raw6_sock *rp = raw6_sk(sk);
+ sk->sk_drop_counters = &rp->drop_counters;
switch (inet_sk(sk)->inet_num) {
case IPPROTO_ICMPV6:
rp->checksum = 1;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 3299cfa12e21..3371f16b7a3e 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2943,7 +2943,7 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
if (res.f6i->nh) {
struct fib6_nh_match_arg arg = {
- .dev = dst_dev(dst),
+ .dev = dst_dev_rcu(dst),
.gw = &rt6->rt6i_gateway,
};
@@ -3238,7 +3238,6 @@ EXPORT_SYMBOL_GPL(ip6_sk_redirect);
static unsigned int ip6_default_advmss(const struct dst_entry *dst)
{
- struct net_device *dev = dst_dev(dst);
unsigned int mtu = dst_mtu(dst);
struct net *net;
@@ -3246,7 +3245,7 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst)
rcu_read_lock();
- net = dev_net_rcu(dev);
+ net = dst_dev_net_rcu(dst);
if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
@@ -4301,7 +4300,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
if (res.f6i->nh) {
struct fib6_nh_match_arg arg = {
- .dev = dst_dev(dst),
+ .dev = dst_dev_rcu(dst),
.gw = &rt->rt6i_gateway,
};
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index 180da19c148c..a5c4c629b788 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -522,16 +522,10 @@ int __init seg6_init(void)
if (err)
goto out_unregister_iptun;
- err = seg6_hmac_init();
- if (err)
- goto out_unregister_seg6;
-
pr_info("Segment Routing with IPv6\n");
out:
return err;
-out_unregister_seg6:
- seg6_local_exit();
out_unregister_iptun:
seg6_iptunnel_exit();
out_unregister_genl:
@@ -543,7 +537,6 @@ out_unregister_pernet:
void seg6_exit(void)
{
- seg6_hmac_exit();
seg6_local_exit();
seg6_iptunnel_exit();
genl_unregister_family(&seg6_genl_family);
diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c
index fd58426f222b..ee6bac0160ac 100644
--- a/net/ipv6/seg6_hmac.c
+++ b/net/ipv6/seg6_hmac.c
@@ -16,7 +16,6 @@
#include <linux/in6.h>
#include <linux/icmpv6.h>
#include <linux/mroute6.h>
-#include <linux/slab.h>
#include <linux/rhashtable.h>
#include <linux/netfilter.h>
@@ -34,7 +33,8 @@
#include <net/addrconf.h>
#include <net/xfrm.h>
-#include <crypto/hash.h>
+#include <crypto/sha1.h>
+#include <crypto/sha2.h>
#include <crypto/utils.h>
#include <net/seg6.h>
#include <net/genetlink.h>
@@ -78,17 +78,6 @@ static const struct rhashtable_params rht_params = {
.obj_cmpfn = seg6_hmac_cmpfn,
};
-static struct seg6_hmac_algo hmac_algos[] = {
- {
- .alg_id = SEG6_HMAC_ALGO_SHA1,
- .name = "hmac(sha1)",
- },
- {
- .alg_id = SEG6_HMAC_ALGO_SHA256,
- .name = "hmac(sha256)",
- },
-};
-
static struct sr6_tlv_hmac *seg6_get_tlv_hmac(struct ipv6_sr_hdr *srh)
{
struct sr6_tlv_hmac *tlv;
@@ -108,75 +97,13 @@ static struct sr6_tlv_hmac *seg6_get_tlv_hmac(struct ipv6_sr_hdr *srh)
return tlv;
}
-static struct seg6_hmac_algo *__hmac_get_algo(u8 alg_id)
-{
- struct seg6_hmac_algo *algo;
- int i, alg_count;
-
- alg_count = ARRAY_SIZE(hmac_algos);
- for (i = 0; i < alg_count; i++) {
- algo = &hmac_algos[i];
- if (algo->alg_id == alg_id)
- return algo;
- }
-
- return NULL;
-}
-
-static int __do_hmac(struct seg6_hmac_info *hinfo, const char *text, u8 psize,
- u8 *output, int outlen)
-{
- struct seg6_hmac_algo *algo;
- struct crypto_shash *tfm;
- struct shash_desc *shash;
- int ret, dgsize;
-
- algo = __hmac_get_algo(hinfo->alg_id);
- if (!algo)
- return -ENOENT;
-
- tfm = *this_cpu_ptr(algo->tfms);
-
- dgsize = crypto_shash_digestsize(tfm);
- if (dgsize > outlen) {
- pr_debug("sr-ipv6: __do_hmac: digest size too big (%d / %d)\n",
- dgsize, outlen);
- return -ENOMEM;
- }
-
- ret = crypto_shash_setkey(tfm, hinfo->secret, hinfo->slen);
- if (ret < 0) {
- pr_debug("sr-ipv6: crypto_shash_setkey failed: err %d\n", ret);
- goto failed;
- }
-
- shash = *this_cpu_ptr(algo->shashs);
- shash->tfm = tfm;
-
- ret = crypto_shash_digest(shash, text, psize, output);
- if (ret < 0) {
- pr_debug("sr-ipv6: crypto_shash_digest failed: err %d\n", ret);
- goto failed;
- }
-
- return dgsize;
-
-failed:
- return ret;
-}
-
int seg6_hmac_compute(struct seg6_hmac_info *hinfo, struct ipv6_sr_hdr *hdr,
struct in6_addr *saddr, u8 *output)
{
__be32 hmackeyid = cpu_to_be32(hinfo->hmackeyid);
- u8 tmp_out[SEG6_HMAC_MAX_DIGESTSIZE];
- int plen, i, dgsize, wrsize;
+ int plen, i, ret = 0;
char *ring, *off;
- /* a 160-byte buffer for digest output allows to store highest known
- * hash function (RadioGatun) with up to 1216 bits
- */
-
/* saddr(16) + first_seg(1) + flags(1) + keyid(4) + seglist(16n) */
plen = 16 + 1 + 1 + 4 + (hdr->first_segment + 1) * 16;
@@ -219,22 +146,25 @@ int seg6_hmac_compute(struct seg6_hmac_info *hinfo, struct ipv6_sr_hdr *hdr,
off += 16;
}
- dgsize = __do_hmac(hinfo, ring, plen, tmp_out,
- SEG6_HMAC_MAX_DIGESTSIZE);
+ switch (hinfo->alg_id) {
+ case SEG6_HMAC_ALGO_SHA1:
+ hmac_sha1(&hinfo->key.sha1, ring, plen, output);
+ static_assert(SEG6_HMAC_FIELD_LEN > SHA1_DIGEST_SIZE);
+ memset(&output[SHA1_DIGEST_SIZE], 0,
+ SEG6_HMAC_FIELD_LEN - SHA1_DIGEST_SIZE);
+ break;
+ case SEG6_HMAC_ALGO_SHA256:
+ hmac_sha256(&hinfo->key.sha256, ring, plen, output);
+ static_assert(SEG6_HMAC_FIELD_LEN == SHA256_DIGEST_SIZE);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ ret = -EINVAL;
+ break;
+ }
local_unlock_nested_bh(&hmac_storage.bh_lock);
local_bh_enable();
-
- if (dgsize < 0)
- return dgsize;
-
- wrsize = SEG6_HMAC_FIELD_LEN;
- if (wrsize > dgsize)
- wrsize = dgsize;
-
- memset(output, 0, SEG6_HMAC_FIELD_LEN);
- memcpy(output, tmp_out, wrsize);
-
- return 0;
+ return ret;
}
EXPORT_SYMBOL(seg6_hmac_compute);
@@ -305,8 +235,18 @@ int seg6_hmac_info_add(struct net *net, u32 key, struct seg6_hmac_info *hinfo)
struct seg6_pernet_data *sdata = seg6_pernet(net);
int err;
- if (!__hmac_get_algo(hinfo->alg_id))
+ switch (hinfo->alg_id) {
+ case SEG6_HMAC_ALGO_SHA1:
+ hmac_sha1_preparekey(&hinfo->key.sha1,
+ hinfo->secret, hinfo->slen);
+ break;
+ case SEG6_HMAC_ALGO_SHA256:
+ hmac_sha256_preparekey(&hinfo->key.sha256,
+ hinfo->secret, hinfo->slen);
+ break;
+ default:
return -EINVAL;
+ }
err = rhashtable_lookup_insert_fast(&sdata->hmac_infos, &hinfo->node,
rht_params);
@@ -363,65 +303,6 @@ out:
}
EXPORT_SYMBOL(seg6_push_hmac);
-static int seg6_hmac_init_algo(void)
-{
- struct seg6_hmac_algo *algo;
- struct crypto_shash *tfm;
- struct shash_desc *shash;
- int i, alg_count, cpu;
- int ret = -ENOMEM;
-
- alg_count = ARRAY_SIZE(hmac_algos);
-
- for (i = 0; i < alg_count; i++) {
- struct crypto_shash **p_tfm;
- int shsize;
-
- algo = &hmac_algos[i];
- algo->tfms = alloc_percpu(struct crypto_shash *);
- if (!algo->tfms)
- goto error_out;
-
- for_each_possible_cpu(cpu) {
- tfm = crypto_alloc_shash(algo->name, 0, 0);
- if (IS_ERR(tfm)) {
- ret = PTR_ERR(tfm);
- goto error_out;
- }
- p_tfm = per_cpu_ptr(algo->tfms, cpu);
- *p_tfm = tfm;
- }
-
- p_tfm = raw_cpu_ptr(algo->tfms);
- tfm = *p_tfm;
-
- shsize = sizeof(*shash) + crypto_shash_descsize(tfm);
-
- algo->shashs = alloc_percpu(struct shash_desc *);
- if (!algo->shashs)
- goto error_out;
-
- for_each_possible_cpu(cpu) {
- shash = kzalloc_node(shsize, GFP_KERNEL,
- cpu_to_node(cpu));
- if (!shash)
- goto error_out;
- *per_cpu_ptr(algo->shashs, cpu) = shash;
- }
- }
-
- return 0;
-
-error_out:
- seg6_hmac_exit();
- return ret;
-}
-
-int __init seg6_hmac_init(void)
-{
- return seg6_hmac_init_algo();
-}
-
int __net_init seg6_hmac_net_init(struct net *net)
{
struct seg6_pernet_data *sdata = seg6_pernet(net);
@@ -429,36 +310,6 @@ int __net_init seg6_hmac_net_init(struct net *net)
return rhashtable_init(&sdata->hmac_infos, &rht_params);
}
-void seg6_hmac_exit(void)
-{
- struct seg6_hmac_algo *algo = NULL;
- struct crypto_shash *tfm;
- struct shash_desc *shash;
- int i, alg_count, cpu;
-
- alg_count = ARRAY_SIZE(hmac_algos);
- for (i = 0; i < alg_count; i++) {
- algo = &hmac_algos[i];
-
- if (algo->shashs) {
- for_each_possible_cpu(cpu) {
- shash = *per_cpu_ptr(algo->shashs, cpu);
- kfree(shash);
- }
- free_percpu(algo->shashs);
- }
-
- if (algo->tfms) {
- for_each_possible_cpu(cpu) {
- tfm = *per_cpu_ptr(algo->tfms, cpu);
- crypto_free_shash(tfm);
- }
- free_percpu(algo->tfms);
- }
- }
-}
-EXPORT_SYMBOL(seg6_hmac_exit);
-
void __net_exit seg6_hmac_net_exit(struct net *net)
{
struct seg6_pernet_data *sdata = seg6_pernet(net);
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 12496ba1b7d4..cf37ad9686e6 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -848,6 +848,49 @@ static inline __be32 try_6rd(struct ip_tunnel *tunnel,
return dst;
}
+static bool ipip6_tunnel_dst_find(struct sk_buff *skb, __be32 *dst,
+ bool is_isatap)
+{
+ const struct ipv6hdr *iph6 = ipv6_hdr(skb);
+ struct neighbour *neigh = NULL;
+ const struct in6_addr *addr6;
+ bool found = false;
+ int addr_type;
+
+ if (skb_dst(skb))
+ neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr);
+
+ if (!neigh) {
+ net_dbg_ratelimited("nexthop == NULL\n");
+ return false;
+ }
+
+ addr6 = (const struct in6_addr *)&neigh->primary_key;
+ addr_type = ipv6_addr_type(addr6);
+
+ if (is_isatap) {
+ if ((addr_type & IPV6_ADDR_UNICAST) &&
+ ipv6_addr_is_isatap(addr6)) {
+ *dst = addr6->s6_addr32[3];
+ found = true;
+ }
+ } else {
+ if (addr_type == IPV6_ADDR_ANY) {
+ addr6 = &ipv6_hdr(skb)->daddr;
+ addr_type = ipv6_addr_type(addr6);
+ }
+
+ if ((addr_type & IPV6_ADDR_COMPATv4) != 0) {
+ *dst = addr6->s6_addr32[3];
+ found = true;
+ }
+ }
+
+ neigh_release(neigh);
+
+ return found;
+}
+
/*
* This function assumes it is being called from dev_queue_xmit()
* and that skb is filled properly by that function.
@@ -867,8 +910,6 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
__be32 dst = tiph->daddr;
struct flowi4 fl4;
int mtu;
- const struct in6_addr *addr6;
- int addr_type;
u8 ttl;
u8 protocol = IPPROTO_IPV6;
int t_hlen = tunnel->hlen + sizeof(struct iphdr);
@@ -877,64 +918,15 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
tos = ipv6_get_dsfield(iph6);
/* ISATAP (RFC4214) - must come before 6to4 */
- if (dev->priv_flags & IFF_ISATAP) {
- struct neighbour *neigh = NULL;
- bool do_tx_error = false;
-
- if (skb_dst(skb))
- neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr);
-
- if (!neigh) {
- net_dbg_ratelimited("nexthop == NULL\n");
- goto tx_error;
- }
-
- addr6 = (const struct in6_addr *)&neigh->primary_key;
- addr_type = ipv6_addr_type(addr6);
-
- if ((addr_type & IPV6_ADDR_UNICAST) &&
- ipv6_addr_is_isatap(addr6))
- dst = addr6->s6_addr32[3];
- else
- do_tx_error = true;
-
- neigh_release(neigh);
- if (do_tx_error)
- goto tx_error;
- }
+ if ((dev->priv_flags & IFF_ISATAP) &&
+ !ipip6_tunnel_dst_find(skb, &dst, true))
+ goto tx_error;
if (!dst)
dst = try_6rd(tunnel, &iph6->daddr);
- if (!dst) {
- struct neighbour *neigh = NULL;
- bool do_tx_error = false;
-
- if (skb_dst(skb))
- neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr);
-
- if (!neigh) {
- net_dbg_ratelimited("nexthop == NULL\n");
- goto tx_error;
- }
-
- addr6 = (const struct in6_addr *)&neigh->primary_key;
- addr_type = ipv6_addr_type(addr6);
-
- if (addr_type == IPV6_ADDR_ANY) {
- addr6 = &ipv6_hdr(skb)->daddr;
- addr_type = ipv6_addr_type(addr6);
- }
-
- if ((addr_type & IPV6_ADDR_COMPATv4) != 0)
- dst = addr6->s6_addr32[3];
- else
- do_tx_error = true;
-
- neigh_release(neigh);
- if (do_tx_error)
- goto tx_error;
- }
+ if (!dst && !ipip6_tunnel_dst_find(skb, &dst, false))
+ goto tx_error;
flowi4_init_output(&fl4, tunnel->parms.link, tunnel->fwmark,
tos & INET_DSCP_MASK, RT_SCOPE_UNIVERSE,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index e885629312a4..0562e939b2e3 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -388,8 +388,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
bool fatal;
int err;
- sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
- &hdr->daddr, th->dest,
+ sk = __inet6_lookup_established(net, &hdr->daddr, th->dest,
&hdr->saddr, ntohs(th->source),
skb->dev->ifindex, inet6_sdif(skb));
@@ -1073,8 +1072,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb,
* Incoming packet is checked with md5 hash with finding key,
* no RST generated if md5 hash doesn't match.
*/
- sk1 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
- NULL, 0, &ipv6h->saddr, th->source,
+ sk1 = inet6_lookup_listener(net, NULL, 0, &ipv6h->saddr, th->source,
&ipv6h->daddr, ntohs(th->source),
dif, sdif);
if (!sk1)
@@ -1787,7 +1785,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
hdr = ipv6_hdr(skb);
lookup:
- sk = __inet6_lookup_skb(net->ipv4.tcp_death_row.hashinfo, skb, __tcp_hdrlen(th),
+ sk = __inet6_lookup_skb(skb, __tcp_hdrlen(th),
th->source, th->dest, inet6_iif(skb), sdif,
&refcounted);
if (!sk)
@@ -1809,7 +1807,7 @@ lookup:
&hdr->saddr, &hdr->daddr,
AF_INET6, dif, sdif);
if (drop_reason) {
- sk_drops_add(sk, skb);
+ sk_drops_skbadd(sk, skb);
reqsk_put(req);
goto discard_it;
}
@@ -1948,7 +1946,7 @@ discard_it:
return 0;
discard_and_relse:
- sk_drops_add(sk, skb);
+ sk_drops_skbadd(sk, skb);
if (refcounted)
sock_put(sk);
goto discard_it;
@@ -1974,8 +1972,7 @@ do_time_wait:
{
struct sock *sk2;
- sk2 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
- skb, __tcp_hdrlen(th),
+ sk2 = inet6_lookup_listener(net, skb, __tcp_hdrlen(th),
&ipv6_hdr(skb)->saddr, th->source,
&ipv6_hdr(skb)->daddr,
ntohs(th->dest),
@@ -2027,8 +2024,7 @@ void tcp_v6_early_demux(struct sk_buff *skb)
return;
/* Note : We use inet6_iif() here, not tcp_v6_iif() */
- sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
- &hdr->saddr, th->source,
+ sk = __inet6_lookup_established(net, &hdr->saddr, th->source,
&hdr->daddr, ntohs(th->dest),
inet6_iif(skb), inet6_sdif(skb));
if (sk) {
@@ -2048,7 +2044,6 @@ void tcp_v6_early_demux(struct sk_buff *skb)
static struct timewait_sock_ops tcp6_timewait_sock_ops = {
.twsk_obj_size = sizeof(struct tcp6_timewait_sock),
- .twsk_destructor = tcp_twsk_destructor,
};
INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
@@ -2228,9 +2223,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
rx_queue,
timer_active,
jiffies_delta_to_clock_t(timer_expires - jiffies),
- icsk->icsk_retransmits,
+ READ_ONCE(icsk->icsk_retransmits),
from_kuid_munged(seq_user_ns(seq), sk_uid(sp)),
- icsk->icsk_probes_out,
+ READ_ONCE(icsk->icsk_probes_out),
sock_i_ino(sp),
refcount_read(&sp->sk_refcnt), sp,
jiffies_to_clock_t(icsk->icsk_rto),
@@ -2356,7 +2351,6 @@ struct proto tcpv6_prot = {
.per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc,
.memory_pressure = &tcp_memory_pressure,
- .orphan_count = &tcp_orphan_count,
.sysctl_mem = sysctl_tcp_mem,
.sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index a8a04f441e78..effeba58630b 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -36,8 +36,7 @@ static void tcp6_check_fraglist_gro(struct list_head *head, struct sk_buff *skb,
inet6_get_iif_sdif(skb, &iif, &sdif);
hdr = skb_gro_network_header(skb);
net = dev_net_rcu(skb->dev);
- sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
- &hdr->saddr, th->source,
+ sk = __inet6_lookup_established(net, &hdr->saddr, th->source,
&hdr->daddr, ntohs(th->dest),
iif, sdif);
NAPI_GRO_CB(skb)->is_flist = !sk;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 6a68f77da44b..a35ee6d693a8 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -524,7 +524,7 @@ try_again:
}
if (unlikely(err)) {
if (!peeking) {
- atomic_inc(&sk->sk_drops);
+ sk_drops_inc(sk);
SNMP_INC_STATS(mib, UDP_MIB_INERRORS);
}
kfree_skb(skb);
@@ -908,7 +908,7 @@ csum_error:
__UDP6_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
drop:
__UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
- atomic_inc(&sk->sk_drops);
+ sk_drops_inc(sk);
sk_skb_reason_drop(sk, skb, drop_reason);
return -1;
}
@@ -1013,7 +1013,7 @@ start_lookup:
}
nskb = skb_clone(skb, GFP_ATOMIC);
if (unlikely(!nskb)) {
- atomic_inc(&sk->sk_drops);
+ sk_drops_inc(sk);
__UDP6_INC_STATS(net, UDP_MIB_RCVBUFERRORS,
IS_UDPLITE(sk));
__UDP6_INC_STATS(net, UDP_MIB_INERRORS,
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index cc2b3c44bc05..6c717a7ef292 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1187,7 +1187,7 @@ static void iucv_process_message(struct sock *sk, struct sk_buff *skb,
IUCV_SKB_CB(skb)->offset = 0;
if (sk_filter(sk, skb)) {
- atomic_inc(&sk->sk_drops); /* skb rejected by filter */
+ sk_drops_inc(sk); /* skb rejected by filter */
kfree_skb(skb);
return;
}
@@ -2011,7 +2011,7 @@ static int afiucv_hs_callback_rx(struct sock *sk, struct sk_buff *skb)
skb_reset_network_header(skb);
IUCV_SKB_CB(skb)->offset = 0;
if (sk_filter(sk, skb)) {
- atomic_inc(&sk->sk_drops); /* skb rejected by filter */
+ sk_drops_inc(sk); /* skb rejected by filter */
kfree_skb(skb);
return NET_RX_SUCCESS;
}
diff --git a/net/mptcp/crypto.c b/net/mptcp/crypto.c
index b08ba959ac4f..31948e18d97d 100644
--- a/net/mptcp/crypto.c
+++ b/net/mptcp/crypto.c
@@ -22,7 +22,6 @@
#include <linux/kernel.h>
#include <crypto/sha2.h>
-#include <linux/unaligned.h>
#include "protocol.h"
@@ -43,39 +42,9 @@ void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn)
void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac)
{
- u8 input[SHA256_BLOCK_SIZE + SHA256_DIGEST_SIZE];
- u8 key1be[8];
- u8 key2be[8];
- int i;
+ __be64 key[2] = { cpu_to_be64(key1), cpu_to_be64(key2) };
- if (WARN_ON_ONCE(len > SHA256_DIGEST_SIZE))
- len = SHA256_DIGEST_SIZE;
-
- put_unaligned_be64(key1, key1be);
- put_unaligned_be64(key2, key2be);
-
- /* Generate key xored with ipad */
- memset(input, 0x36, SHA256_BLOCK_SIZE);
- for (i = 0; i < 8; i++)
- input[i] ^= key1be[i];
- for (i = 0; i < 8; i++)
- input[i + 8] ^= key2be[i];
-
- memcpy(&input[SHA256_BLOCK_SIZE], msg, len);
-
- /* emit sha256(K1 || msg) on the second input block, so we can
- * reuse 'input' for the last hashing
- */
- sha256(input, SHA256_BLOCK_SIZE + len, &input[SHA256_BLOCK_SIZE]);
-
- /* Prepare second part of hmac */
- memset(input, 0x5C, SHA256_BLOCK_SIZE);
- for (i = 0; i < 8; i++)
- input[i] ^= key1be[i];
- for (i = 0; i < 8; i++)
- input[i + 8] ^= key2be[i];
-
- sha256(input, SHA256_BLOCK_SIZE + SHA256_DIGEST_SIZE, hmac);
+ hmac_sha256_usingrawkey((const u8 *)key, sizeof(key), msg, len, hmac);
}
#if IS_MODULE(CONFIG_MPTCP_KUNIT_TEST)
diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c
index 0566dd793810..ac974299de71 100644
--- a/net/mptcp/mptcp_diag.c
+++ b/net/mptcp/mptcp_diag.c
@@ -15,9 +15,9 @@
static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
struct netlink_callback *cb,
const struct inet_diag_req_v2 *req,
- struct nlattr *bc, bool net_admin)
+ bool net_admin)
{
- if (!inet_diag_bc_sk(bc, sk))
+ if (!inet_diag_bc_sk(cb->data, sk))
return 0;
return inet_sk_diag_fill(sk, inet_csk(sk), skb, cb, req, NLM_F_MULTI,
@@ -76,9 +76,7 @@ static void mptcp_diag_dump_listeners(struct sk_buff *skb, struct netlink_callba
const struct inet_diag_req_v2 *r,
bool net_admin)
{
- struct inet_diag_dump_data *cb_data = cb->data;
struct mptcp_diag_ctx *diag_ctx = (void *)cb->ctx;
- struct nlattr *bc = cb_data->inet_diag_nla_bc;
struct net *net = sock_net(skb->sk);
struct inet_hashinfo *hinfo;
int i;
@@ -121,7 +119,7 @@ static void mptcp_diag_dump_listeners(struct sk_buff *skb, struct netlink_callba
if (!refcount_inc_not_zero(&sk->sk_refcnt))
goto next_listen;
- ret = sk_diag_dump(sk, skb, cb, r, bc, net_admin);
+ ret = sk_diag_dump(sk, skb, cb, r, net_admin);
sock_put(sk);
@@ -154,15 +152,10 @@ static void mptcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
struct mptcp_diag_ctx *diag_ctx = (void *)cb->ctx;
struct net *net = sock_net(skb->sk);
- struct inet_diag_dump_data *cb_data;
struct mptcp_sock *msk;
- struct nlattr *bc;
BUILD_BUG_ON(sizeof(cb->ctx) < sizeof(*diag_ctx));
- cb_data = cb->data;
- bc = cb_data->inet_diag_nla_bc;
-
while ((msk = mptcp_token_iter_next(net, &diag_ctx->s_slot,
&diag_ctx->s_num)) != NULL) {
struct inet_sock *inet = (struct inet_sock *)msk;
@@ -181,7 +174,7 @@ static void mptcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
r->id.idiag_dport)
goto next;
- ret = sk_diag_dump(sk, skb, cb, r, bc, net_admin);
+ ret = sk_diag_dump(sk, skb, cb, r, net_admin);
next:
sock_put(sk);
if (ret < 0) {
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index e6fd97b21e9e..7e9eb0ab21c3 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -12,6 +12,7 @@
#include <linux/sched/signal.h>
#include <linux/atomic.h>
#include <net/aligned_data.h>
+#include <net/rps.h>
#include <net/sock.h>
#include <net/inet_common.h>
#include <net/inet_hashtables.h>
@@ -137,7 +138,7 @@ struct sock *__mptcp_nmpc_sk(struct mptcp_sock *msk)
static void mptcp_drop(struct sock *sk, struct sk_buff *skb)
{
- sk_drops_add(sk, skb);
+ sk_drops_skbadd(sk, skb);
__kfree_skb(skb);
}
@@ -1740,6 +1741,20 @@ static u32 mptcp_send_limit(const struct sock *sk)
return limit - not_sent;
}
+static void mptcp_rps_record_subflows(const struct mptcp_sock *msk)
+{
+ struct mptcp_subflow_context *subflow;
+
+ if (!rfs_is_needed())
+ return;
+
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+
+ sock_rps_record_flow(ssk);
+ }
+}
+
static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -1753,6 +1768,8 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
lock_sock(sk);
+ mptcp_rps_record_subflows(msk);
+
if (unlikely(inet_test_bit(DEFER_CONNECT, sk) ||
msg->msg_flags & MSG_FASTOPEN)) {
int copied_syn = 0;
@@ -2131,6 +2148,8 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
goto out_err;
}
+ mptcp_rps_record_subflows(msk);
+
timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
len = min_t(size_t, len, INT_MAX);
@@ -2587,7 +2606,8 @@ static void __mptcp_retrans(struct sock *sk)
if (mptcp_data_fin_enabled(msk)) {
struct inet_connection_sock *icsk = inet_csk(sk);
- icsk->icsk_retransmits++;
+ WRITE_ONCE(icsk->icsk_retransmits,
+ icsk->icsk_retransmits + 1);
mptcp_set_datafin_timeout(sk);
mptcp_send_ack(msk);
@@ -3920,6 +3940,8 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
mptcp_sock_graft(ssk, newsock);
}
+ mptcp_rps_record_subflows(msk);
+
/* Do late cleanup for the first subflow as necessary. Also
* deal with bad peers not doing a complete shutdown.
*/
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index b15d7fab5c4b..a1787a1344ac 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -788,9 +788,7 @@ static inline bool mptcp_epollin_ready(const struct sock *sk)
* as it can always coalesce them
*/
return (data_avail >= sk->sk_rcvlowat) ||
- (mem_cgroup_sockets_enabled && sk->sk_memcg &&
- mem_cgroup_under_socket_pressure(sk->sk_memcg)) ||
- READ_ONCE(tcp_memory_pressure);
+ tcp_under_memory_pressure(sk);
}
int mptcp_set_rcvlowat(struct sock *sk, int val);
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 3f1b62a9fe88..c8a7e4b59db1 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1717,19 +1717,14 @@ static void mptcp_attach_cgroup(struct sock *parent, struct sock *child)
/* only the additional subflows created by kworkers have to be modified */
if (cgroup_id(sock_cgroup_ptr(parent_skcd)) !=
cgroup_id(sock_cgroup_ptr(child_skcd))) {
-#ifdef CONFIG_MEMCG
- struct mem_cgroup *memcg = parent->sk_memcg;
-
- mem_cgroup_sk_free(child);
- if (memcg && css_tryget(&memcg->css))
- child->sk_memcg = memcg;
-#endif /* CONFIG_MEMCG */
-
cgroup_sk_free(child_skcd);
*child_skcd = *parent_skcd;
cgroup_sk_clone(child_skcd);
}
#endif /* CONFIG_SOCK_CGROUP_DATA */
+
+ if (mem_cgroup_sockets_enabled)
+ mem_cgroup_sk_inherit(parent, child);
}
static void mptcp_subflow_ops_override(struct sock *ssk)
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 50fd6809380f..3a04665adf99 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -60,7 +60,7 @@ MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("List and change connection tracking table");
struct ctnetlink_list_dump_ctx {
- struct nf_conn *last;
+ unsigned long last_id;
unsigned int cpu;
bool done;
};
@@ -1733,16 +1733,6 @@ static int ctnetlink_get_conntrack(struct sk_buff *skb,
return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid);
}
-static int ctnetlink_done_list(struct netlink_callback *cb)
-{
- struct ctnetlink_list_dump_ctx *ctx = (void *)cb->ctx;
-
- if (ctx->last)
- nf_ct_put(ctx->last);
-
- return 0;
-}
-
#ifdef CONFIG_NF_CONNTRACK_EVENTS
static int ctnetlink_dump_one_entry(struct sk_buff *skb,
struct netlink_callback *cb,
@@ -1757,11 +1747,11 @@ static int ctnetlink_dump_one_entry(struct sk_buff *skb,
if (l3proto && nf_ct_l3num(ct) != l3proto)
return 0;
- if (ctx->last) {
- if (ct != ctx->last)
+ if (ctx->last_id) {
+ if (ctnetlink_get_id(ct) != ctx->last_id)
return 0;
- ctx->last = NULL;
+ ctx->last_id = 0;
}
/* We can't dump extension info for the unconfirmed
@@ -1775,12 +1765,8 @@ static int ctnetlink_dump_one_entry(struct sk_buff *skb,
cb->nlh->nlmsg_seq,
NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
ct, dying, 0);
- if (res < 0) {
- if (!refcount_inc_not_zero(&ct->ct_general.use))
- return 0;
-
- ctx->last = ct;
- }
+ if (res < 0)
+ ctx->last_id = ctnetlink_get_id(ct);
return res;
}
@@ -1796,10 +1782,10 @@ static int
ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb)
{
struct ctnetlink_list_dump_ctx *ctx = (void *)cb->ctx;
- struct nf_conn *last = ctx->last;
#ifdef CONFIG_NF_CONNTRACK_EVENTS
const struct net *net = sock_net(skb->sk);
struct nf_conntrack_net_ecache *ecache_net;
+ unsigned long last_id = ctx->last_id;
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_node *n;
#endif
@@ -1807,7 +1793,7 @@ ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb)
if (ctx->done)
return 0;
- ctx->last = NULL;
+ ctx->last_id = 0;
#ifdef CONFIG_NF_CONNTRACK_EVENTS
ecache_net = nf_conn_pernet_ecache(net);
@@ -1818,24 +1804,21 @@ ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb)
int res;
ct = nf_ct_tuplehash_to_ctrack(h);
- if (last && last != ct)
+ if (last_id && last_id != ctnetlink_get_id(ct))
continue;
res = ctnetlink_dump_one_entry(skb, cb, ct, true);
if (res < 0) {
spin_unlock_bh(&ecache_net->dying_lock);
- nf_ct_put(last);
return skb->len;
}
- nf_ct_put(last);
- last = NULL;
+ last_id = 0;
}
spin_unlock_bh(&ecache_net->dying_lock);
#endif
ctx->done = true;
- nf_ct_put(last);
return skb->len;
}
@@ -1847,7 +1830,6 @@ static int ctnetlink_get_ct_dying(struct sk_buff *skb,
if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
.dump = ctnetlink_dump_dying,
- .done = ctnetlink_done_list,
};
return netlink_dump_start(info->sk, skb, info->nlh, &c);
}
@@ -1862,7 +1844,6 @@ static int ctnetlink_get_ct_unconfirmed(struct sk_buff *skb,
if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
.dump = ctnetlink_dump_unconfirmed,
- .done = ctnetlink_done_list,
};
return netlink_dump_start(info->sk, skb, info->nlh, &c);
}
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index c1082de09656..68e273d8821a 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -151,12 +151,12 @@ static void nft_ctx_init(struct nft_ctx *ctx,
bitmap_zero(ctx->reg_inited, NFT_REG32_NUM);
}
-static struct nft_trans *nft_trans_alloc_gfp(const struct nft_ctx *ctx,
- int msg_type, u32 size, gfp_t gfp)
+static struct nft_trans *nft_trans_alloc(const struct nft_ctx *ctx,
+ int msg_type, u32 size)
{
struct nft_trans *trans;
- trans = kzalloc(size, gfp);
+ trans = kzalloc(size, GFP_KERNEL);
if (trans == NULL)
return NULL;
@@ -172,12 +172,6 @@ static struct nft_trans *nft_trans_alloc_gfp(const struct nft_ctx *ctx,
return trans;
}
-static struct nft_trans *nft_trans_alloc(const struct nft_ctx *ctx,
- int msg_type, u32 size)
-{
- return nft_trans_alloc_gfp(ctx, msg_type, size, GFP_KERNEL);
-}
-
static struct nft_trans_binding *nft_trans_get_binding(struct nft_trans *trans)
{
switch (trans->msg_type) {
@@ -442,8 +436,7 @@ static bool nft_trans_collapse_set_elem_allowed(const struct nft_trans_elem *a,
static bool nft_trans_collapse_set_elem(struct nftables_pernet *nft_net,
struct nft_trans_elem *tail,
- struct nft_trans_elem *trans,
- gfp_t gfp)
+ struct nft_trans_elem *trans)
{
unsigned int nelems, old_nelems = tail->nelems;
struct nft_trans_elem *new_trans;
@@ -466,9 +459,11 @@ static bool nft_trans_collapse_set_elem(struct nftables_pernet *nft_net,
/* krealloc might free tail which invalidates list pointers */
list_del_init(&tail->nft_trans.list);
- new_trans = krealloc(tail, struct_size(tail, elems, nelems), gfp);
+ new_trans = krealloc(tail, struct_size(tail, elems, nelems),
+ GFP_KERNEL);
if (!new_trans) {
- list_add_tail(&tail->nft_trans.list, &nft_net->commit_list);
+ list_add_tail(&tail->nft_trans.list,
+ &nft_net->commit_list);
return false;
}
@@ -484,7 +479,7 @@ static bool nft_trans_collapse_set_elem(struct nftables_pernet *nft_net,
}
static bool nft_trans_try_collapse(struct nftables_pernet *nft_net,
- struct nft_trans *trans, gfp_t gfp)
+ struct nft_trans *trans)
{
struct nft_trans *tail;
@@ -501,7 +496,7 @@ static bool nft_trans_try_collapse(struct nftables_pernet *nft_net,
case NFT_MSG_DELSETELEM:
return nft_trans_collapse_set_elem(nft_net,
nft_trans_container_elem(tail),
- nft_trans_container_elem(trans), gfp);
+ nft_trans_container_elem(trans));
}
return false;
@@ -537,17 +532,14 @@ static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *tr
}
}
-static void nft_trans_commit_list_add_elem(struct net *net, struct nft_trans *trans,
- gfp_t gfp)
+static void nft_trans_commit_list_add_elem(struct net *net, struct nft_trans *trans)
{
struct nftables_pernet *nft_net = nft_pernet(net);
WARN_ON_ONCE(trans->msg_type != NFT_MSG_NEWSETELEM &&
trans->msg_type != NFT_MSG_DELSETELEM);
- might_alloc(gfp);
-
- if (nft_trans_try_collapse(nft_net, trans, gfp)) {
+ if (nft_trans_try_collapse(nft_net, trans)) {
kfree(trans);
return;
}
@@ -7570,7 +7562,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
}
ue->priv = elem_priv;
- nft_trans_commit_list_add_elem(ctx->net, trans, GFP_KERNEL);
+ nft_trans_commit_list_add_elem(ctx->net, trans);
goto err_elem_free;
}
}
@@ -7594,7 +7586,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
}
nft_trans_container_elem(trans)->elems[0].priv = elem.priv;
- nft_trans_commit_list_add_elem(ctx->net, trans, GFP_KERNEL);
+ nft_trans_commit_list_add_elem(ctx->net, trans);
return 0;
err_set_full:
@@ -7860,7 +7852,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
nft_setelem_data_deactivate(ctx->net, set, elem.priv);
nft_trans_container_elem(trans)->elems[0].priv = elem.priv;
- nft_trans_commit_list_add_elem(ctx->net, trans, GFP_KERNEL);
+ nft_trans_commit_list_add_elem(ctx->net, trans);
return 0;
fail_ops:
@@ -7885,9 +7877,8 @@ static int nft_setelem_flush(const struct nft_ctx *ctx,
if (!nft_set_elem_active(ext, iter->genmask))
return 0;
- trans = nft_trans_alloc_gfp(ctx, NFT_MSG_DELSETELEM,
- struct_size_t(struct nft_trans_elem, elems, 1),
- GFP_ATOMIC);
+ trans = nft_trans_alloc(ctx, NFT_MSG_DELSETELEM,
+ struct_size_t(struct nft_trans_elem, elems, 1));
if (!trans)
return -ENOMEM;
@@ -7898,7 +7889,7 @@ static int nft_setelem_flush(const struct nft_ctx *ctx,
nft_trans_elem_set(trans) = set;
nft_trans_container_elem(trans)->nelems = 1;
nft_trans_container_elem(trans)->elems[0].priv = elem_priv;
- nft_trans_commit_list_add_elem(ctx->net, trans, GFP_ATOMIC);
+ nft_trans_commit_list_add_elem(ctx->net, trans);
return 0;
}
@@ -7915,7 +7906,7 @@ static int __nft_set_catchall_flush(const struct nft_ctx *ctx,
nft_setelem_data_deactivate(ctx->net, set, elem_priv);
nft_trans_container_elem(trans)->elems[0].priv = elem_priv;
- nft_trans_commit_list_add_elem(ctx->net, trans, GFP_KERNEL);
+ nft_trans_commit_list_add_elem(ctx->net, trans);
return 0;
}
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index 225ff293cd50..14dd1c0698c3 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -9,7 +9,7 @@
#include <linux/netfilter/nf_conntrack_common.h>
#include <linux/netfilter/nf_tables.h>
#include <net/ip.h>
-#include <net/inet_dscp.h>
+#include <net/flow.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_conntrack_core.h>
@@ -236,7 +236,7 @@ static int nft_flow_route(const struct nft_pktinfo *pkt,
fl.u.ip4.saddr = ct->tuplehash[!dir].tuple.src.u3.ip;
fl.u.ip4.flowi4_oif = nft_in(pkt)->ifindex;
fl.u.ip4.flowi4_iif = this_dst->dev->ifindex;
- fl.u.ip4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(ip_hdr(pkt->skb)));
+ fl.u.ip4.flowi4_dscp = ip4h_dscp(ip_hdr(pkt->skb));
fl.u.ip4.flowi4_mark = pkt->skb->mark;
fl.u.ip4.flowi4_flags = FLOWI_FLAG_ANYSRC;
break;
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 7dfc5343dae4..b0214418f75a 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -40,7 +40,7 @@ static bool nft_payload_rebuild_vlan_hdr(const struct sk_buff *skb, int mac_off,
/* add vlan header into the user buffer for if tag was removed by offloads */
static bool
-nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u8 offset, u8 len)
+nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u16 offset, u8 len)
{
int mac_off = skb_mac_header(skb) - skb->data;
u8 *vlanh, *dst_u8 = (u8 *) d;
@@ -212,7 +212,7 @@ static const struct nla_policy nft_payload_policy[NFTA_PAYLOAD_MAX + 1] = {
[NFTA_PAYLOAD_SREG] = { .type = NLA_U32 },
[NFTA_PAYLOAD_DREG] = { .type = NLA_U32 },
[NFTA_PAYLOAD_BASE] = { .type = NLA_U32 },
- [NFTA_PAYLOAD_OFFSET] = NLA_POLICY_MAX(NLA_BE32, 255),
+ [NFTA_PAYLOAD_OFFSET] = { .type = NLA_BE32 },
[NFTA_PAYLOAD_LEN] = NLA_POLICY_MAX(NLA_BE32, 255),
[NFTA_PAYLOAD_CSUM_TYPE] = { .type = NLA_U32 },
[NFTA_PAYLOAD_CSUM_OFFSET] = NLA_POLICY_MAX(NLA_BE32, 255),
@@ -684,7 +684,7 @@ static const struct nft_expr_ops nft_payload_inner_ops = {
static inline void nft_csum_replace(__sum16 *sum, __wsum fsum, __wsum tsum)
{
- *sum = csum_fold(csum_add(csum_sub(~csum_unfold(*sum), fsum), tsum));
+ csum_replace4(sum, (__force __be32)fsum, (__force __be32)tsum);
if (*sum == 0)
*sum = CSUM_MANGLED_0;
}
@@ -797,7 +797,7 @@ static int nft_payload_csum_inet(struct sk_buff *skb, const u32 *src,
struct nft_payload_set {
enum nft_payload_bases base:8;
- u8 offset;
+ u16 offset;
u8 len;
u8 sreg;
u8 csum_type;
@@ -812,7 +812,7 @@ struct nft_payload_vlan_hdr {
};
static bool
-nft_payload_set_vlan(const u32 *src, struct sk_buff *skb, u8 offset, u8 len,
+nft_payload_set_vlan(const u32 *src, struct sk_buff *skb, u16 offset, u8 len,
int *vlan_hlen)
{
struct nft_payload_vlan_hdr *vlanh;
@@ -940,14 +940,18 @@ static int nft_payload_set_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
{
+ u32 csum_offset, offset, csum_type = NFT_PAYLOAD_CSUM_NONE;
struct nft_payload_set *priv = nft_expr_priv(expr);
- u32 csum_offset, csum_type = NFT_PAYLOAD_CSUM_NONE;
int err;
priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE]));
- priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
+ err = nft_parse_u32_check(tb[NFTA_PAYLOAD_OFFSET], U16_MAX, &offset);
+ if (err < 0)
+ return err;
+ priv->offset = offset;
+
if (tb[NFTA_PAYLOAD_CSUM_TYPE])
csum_type = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_TYPE]));
if (tb[NFTA_PAYLOAD_CSUM_OFFSET]) {
@@ -1069,7 +1073,7 @@ nft_payload_select_ops(const struct nft_ctx *ctx,
if (tb[NFTA_PAYLOAD_DREG] == NULL)
return ERR_PTR(-EINVAL);
- err = nft_parse_u32_check(tb[NFTA_PAYLOAD_OFFSET], U8_MAX, &offset);
+ err = nft_parse_u32_check(tb[NFTA_PAYLOAD_OFFSET], U16_MAX, &offset);
if (err < 0)
return ERR_PTR(err);
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 266d0c637225..ba01ce75d6de 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -30,6 +30,7 @@ struct nft_rhash {
struct nft_rhash_elem {
struct nft_elem_priv priv;
struct rhash_head node;
+ struct llist_node walk_node;
u32 wq_gc_seq;
struct nft_set_ext ext;
};
@@ -144,6 +145,7 @@ nft_rhash_update(struct nft_set *set, const u32 *key,
goto err1;
he = nft_elem_priv_cast(elem_priv);
+ init_llist_node(&he->walk_node);
prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node,
nft_rhash_params);
if (IS_ERR(prev))
@@ -180,6 +182,7 @@ static int nft_rhash_insert(const struct net *net, const struct nft_set *set,
};
struct nft_rhash_elem *prev;
+ init_llist_node(&he->walk_node);
prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node,
nft_rhash_params);
if (IS_ERR(prev))
@@ -261,12 +264,12 @@ static bool nft_rhash_delete(const struct nft_set *set,
return true;
}
-static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
- struct nft_set_iter *iter)
+static void nft_rhash_walk_ro(const struct nft_ctx *ctx, struct nft_set *set,
+ struct nft_set_iter *iter)
{
struct nft_rhash *priv = nft_set_priv(set);
- struct nft_rhash_elem *he;
struct rhashtable_iter hti;
+ struct nft_rhash_elem *he;
rhashtable_walk_enter(&priv->ht, &hti);
rhashtable_walk_start(&hti);
@@ -295,6 +298,97 @@ cont:
rhashtable_walk_exit(&hti);
}
+static void nft_rhash_walk_update(const struct nft_ctx *ctx,
+ struct nft_set *set,
+ struct nft_set_iter *iter)
+{
+ struct nft_rhash *priv = nft_set_priv(set);
+ struct nft_rhash_elem *he, *tmp;
+ struct llist_node *first_node;
+ struct rhashtable_iter hti;
+ LLIST_HEAD(walk_list);
+
+ lockdep_assert_held(&nft_pernet(ctx->net)->commit_mutex);
+
+ if (set->in_update_walk) {
+ /* This can happen with bogus rulesets during ruleset validation
+ * when a verdict map causes a jump back to the same map.
+ *
+ * Without this extra check the walk_next loop below will see
+ * elems on the callers walk_list and skip (not validate) them.
+ */
+ iter->err = -EMLINK;
+ return;
+ }
+
+ /* walk happens under RCU.
+ *
+ * We create a snapshot list so ->iter callback can sleep.
+ * commit_mutex is held, elements can ...
+ * .. be added in parallel from dataplane (dynset)
+ * .. be marked as dead in parallel from dataplane (dynset).
+ * .. be queued for removal in parallel (gc timeout).
+ * .. not be freed: transaction mutex is held.
+ */
+ rhashtable_walk_enter(&priv->ht, &hti);
+ rhashtable_walk_start(&hti);
+
+ while ((he = rhashtable_walk_next(&hti))) {
+ if (IS_ERR(he)) {
+ if (PTR_ERR(he) != -EAGAIN) {
+ iter->err = PTR_ERR(he);
+ break;
+ }
+
+ continue;
+ }
+
+ /* rhashtable resized during walk, skip */
+ if (llist_on_list(&he->walk_node))
+ continue;
+
+ llist_add(&he->walk_node, &walk_list);
+ }
+ rhashtable_walk_stop(&hti);
+ rhashtable_walk_exit(&hti);
+
+ first_node = __llist_del_all(&walk_list);
+ set->in_update_walk = true;
+ llist_for_each_entry_safe(he, tmp, first_node, walk_node) {
+ if (iter->err == 0) {
+ iter->err = iter->fn(ctx, set, iter, &he->priv);
+ if (iter->err == 0)
+ iter->count++;
+ }
+
+ /* all entries must be cleared again, else next ->walk iteration
+ * will skip entries.
+ */
+ init_llist_node(&he->walk_node);
+ }
+ set->in_update_walk = false;
+}
+
+static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
+ struct nft_set_iter *iter)
+{
+ switch (iter->type) {
+ case NFT_ITER_UPDATE:
+ /* only relevant for netlink dumps which use READ type */
+ WARN_ON_ONCE(iter->skip != 0);
+
+ nft_rhash_walk_update(ctx, set, iter);
+ break;
+ case NFT_ITER_READ:
+ nft_rhash_walk_ro(ctx, set, iter);
+ break;
+ default:
+ iter->err = -EINVAL;
+ WARN_ON_ONCE(1);
+ break;
+ }
+}
+
static bool nft_rhash_expr_needs_gc_run(const struct nft_set *set,
struct nft_set_ext *ext)
{
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 9a10251228fd..4b64c3bd8e70 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -397,7 +397,7 @@ int pipapo_refill(unsigned long *map, unsigned int len, unsigned int rules,
}
/**
- * pipapo_get() - Get matching element reference given key data
+ * pipapo_get_slow() - Get matching element reference given key data
* @m: storage containing the set elements
* @data: Key data to be matched against existing elements
* @genmask: If set, check that element is active in given genmask
@@ -414,12 +414,12 @@ int pipapo_refill(unsigned long *map, unsigned int len, unsigned int rules,
*
* Return: pointer to &struct nft_pipapo_elem on match, NULL otherwise.
*/
-static struct nft_pipapo_elem *pipapo_get(const struct nft_pipapo_match *m,
- const u8 *data, u8 genmask,
- u64 tstamp)
+static struct nft_pipapo_elem *pipapo_get_slow(const struct nft_pipapo_match *m,
+ const u8 *data, u8 genmask,
+ u64 tstamp)
{
+ unsigned long *res_map, *fill_map, *map;
struct nft_pipapo_scratch *scratch;
- unsigned long *res_map, *fill_map;
const struct nft_pipapo_field *f;
bool map_index;
int i;
@@ -429,11 +429,13 @@ static struct nft_pipapo_elem *pipapo_get(const struct nft_pipapo_match *m,
scratch = *raw_cpu_ptr(m->scratch);
if (unlikely(!scratch))
goto out;
+ __local_lock_nested_bh(&scratch->bh_lock);
map_index = scratch->map_index;
- res_map = scratch->map + (map_index ? m->bsize_max : 0);
- fill_map = scratch->map + (map_index ? 0 : m->bsize_max);
+ map = NFT_PIPAPO_LT_ALIGN(&scratch->__map[0]);
+ res_map = map + (map_index ? m->bsize_max : 0);
+ fill_map = map + (map_index ? 0 : m->bsize_max);
pipapo_resmap_init(m, res_map);
@@ -464,6 +466,7 @@ next_match:
last);
if (b < 0) {
scratch->map_index = map_index;
+ __local_unlock_nested_bh(&scratch->bh_lock);
local_bh_enable();
return NULL;
@@ -483,6 +486,7 @@ next_match:
* *next* bitmap (not initial) for the next packet.
*/
scratch->map_index = map_index;
+ __local_unlock_nested_bh(&scratch->bh_lock);
local_bh_enable();
return e;
}
@@ -497,12 +501,47 @@ next_match:
data += NFT_PIPAPO_GROUPS_PADDING(f);
}
+ __local_unlock_nested_bh(&scratch->bh_lock);
out:
local_bh_enable();
return NULL;
}
/**
+ * pipapo_get() - Get matching element reference given key data
+ * @m: Storage containing the set elements
+ * @data: Key data to be matched against existing elements
+ * @genmask: If set, check that element is active in given genmask
+ * @tstamp: Timestamp to check for expired elements
+ *
+ * This is a dispatcher function, either calling out the generic C
+ * implementation or, if available, the AVX2 one.
+ * This helper is only called from the control plane, with either RCU
+ * read lock or transaction mutex held.
+ *
+ * Return: pointer to &struct nft_pipapo_elem on match, NULL otherwise.
+ */
+static struct nft_pipapo_elem *pipapo_get(const struct nft_pipapo_match *m,
+ const u8 *data, u8 genmask,
+ u64 tstamp)
+{
+ struct nft_pipapo_elem *e;
+
+ local_bh_disable();
+
+#if defined(CONFIG_X86_64) && !defined(CONFIG_UML)
+ if (boot_cpu_has(X86_FEATURE_AVX2) && irq_fpu_usable()) {
+ e = pipapo_get_avx2(m, data, genmask, tstamp);
+ local_bh_enable();
+ return e;
+ }
+#endif
+ e = pipapo_get_slow(m, data, genmask, tstamp);
+ local_bh_enable();
+ return e;
+}
+
+/**
* nft_pipapo_lookup() - Dataplane fronted for main lookup function
* @net: Network namespace
* @set: nftables API set representation
@@ -523,7 +562,7 @@ nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
const struct nft_pipapo_elem *e;
m = rcu_dereference(priv->match);
- e = pipapo_get(m, (const u8 *)key, genmask, get_jiffies_64());
+ e = pipapo_get_slow(m, (const u8 *)key, genmask, get_jiffies_64());
return e ? &e->ext : NULL;
}
@@ -1136,22 +1175,17 @@ static void pipapo_map(struct nft_pipapo_match *m,
}
/**
- * pipapo_free_scratch() - Free per-CPU map at original (not aligned) address
+ * pipapo_free_scratch() - Free per-CPU map at original address
* @m: Matching data
* @cpu: CPU number
*/
static void pipapo_free_scratch(const struct nft_pipapo_match *m, unsigned int cpu)
{
struct nft_pipapo_scratch *s;
- void *mem;
s = *per_cpu_ptr(m->scratch, cpu);
- if (!s)
- return;
- mem = s;
- mem -= s->align_off;
- kvfree(mem);
+ kvfree(s);
}
/**
@@ -1168,11 +1202,8 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
for_each_possible_cpu(i) {
struct nft_pipapo_scratch *scratch;
-#ifdef NFT_PIPAPO_ALIGN
- void *scratch_aligned;
- u32 align_off;
-#endif
- scratch = kvzalloc_node(struct_size(scratch, map, bsize_max * 2) +
+
+ scratch = kvzalloc_node(struct_size(scratch, __map, bsize_max * 2) +
NFT_PIPAPO_ALIGN_HEADROOM,
GFP_KERNEL_ACCOUNT, cpu_to_node(i));
if (!scratch) {
@@ -1187,23 +1218,7 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
}
pipapo_free_scratch(clone, i);
-
-#ifdef NFT_PIPAPO_ALIGN
- /* Align &scratch->map (not the struct itself): the extra
- * %NFT_PIPAPO_ALIGN_HEADROOM bytes passed to kzalloc_node()
- * above guarantee we can waste up to those bytes in order
- * to align the map field regardless of its offset within
- * the struct.
- */
- BUILD_BUG_ON(offsetof(struct nft_pipapo_scratch, map) > NFT_PIPAPO_ALIGN_HEADROOM);
-
- scratch_aligned = NFT_PIPAPO_LT_ALIGN(&scratch->map);
- scratch_aligned -= offsetof(struct nft_pipapo_scratch, map);
- align_off = scratch_aligned - (void *)scratch;
-
- scratch = scratch_aligned;
- scratch->align_off = align_off;
-#endif
+ local_lock_init(&scratch->bh_lock);
*per_cpu_ptr(clone->scratch, i) = scratch;
}
diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h
index 4a2ff85ce1c4..eaab422aa56a 100644
--- a/net/netfilter/nft_set_pipapo.h
+++ b/net/netfilter/nft_set_pipapo.h
@@ -124,14 +124,14 @@ struct nft_pipapo_field {
/**
* struct nft_pipapo_scratch - percpu data used for lookup and matching
+ * @bh_lock: PREEMPT_RT local spinlock
* @map_index: Current working bitmap index, toggled between field matches
- * @align_off: Offset to get the originally allocated address
- * @map: store partial matching results during lookup
+ * @__map: store partial matching results during lookup
*/
struct nft_pipapo_scratch {
+ local_lock_t bh_lock;
u8 map_index;
- u32 align_off;
- unsigned long map[];
+ unsigned long __map[];
};
/**
diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c
index 2f090e253caf..7559306d0aed 100644
--- a/net/netfilter/nft_set_pipapo_avx2.c
+++ b/net/netfilter/nft_set_pipapo_avx2.c
@@ -1099,7 +1099,7 @@ bool nft_pipapo_avx2_estimate(const struct nft_set_desc *desc, u32 features,
desc->field_count < NFT_PIPAPO_MIN_FIELDS)
return false;
- if (!boot_cpu_has(X86_FEATURE_AVX2) || !boot_cpu_has(X86_FEATURE_AVX))
+ if (!boot_cpu_has(X86_FEATURE_AVX2))
return false;
est->size = pipapo_estimate_size(desc);
@@ -1133,66 +1133,50 @@ static inline void pipapo_resmap_init_avx2(const struct nft_pipapo_match *m, uns
}
/**
- * nft_pipapo_avx2_lookup() - Lookup function for AVX2 implementation
- * @net: Network namespace
- * @set: nftables API set representation
- * @key: nftables API element representation containing key data
+ * pipapo_get_avx2() - Lookup function for AVX2 implementation
+ * @m: Storage containing the set elements
+ * @data: Key data to be matched against existing elements
+ * @genmask: If set, check that element is active in given genmask
+ * @tstamp: Timestamp to check for expired elements
*
* For more details, see DOC: Theory of Operation in nft_set_pipapo.c.
*
* This implementation exploits the repetitive characteristic of the algorithm
* to provide a fast, vectorised version using the AVX2 SIMD instruction set.
*
- * Return: true on match, false otherwise.
+ * The caller must check that the FPU is usable.
+ * This function must be called with BH disabled.
+ *
+ * Return: pointer to &struct nft_pipapo_elem on match, NULL otherwise.
*/
-const struct nft_set_ext *
-nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
- const u32 *key)
+struct nft_pipapo_elem *pipapo_get_avx2(const struct nft_pipapo_match *m,
+ const u8 *data, u8 genmask,
+ u64 tstamp)
{
- struct nft_pipapo *priv = nft_set_priv(set);
- const struct nft_set_ext *ext = NULL;
struct nft_pipapo_scratch *scratch;
- u8 genmask = nft_genmask_cur(net);
- const struct nft_pipapo_match *m;
const struct nft_pipapo_field *f;
- const u8 *rp = (const u8 *)key;
- unsigned long *res, *fill;
+ unsigned long *res, *fill, *map;
bool map_index;
int i;
- local_bh_disable();
-
- if (unlikely(!irq_fpu_usable())) {
- ext = nft_pipapo_lookup(net, set, key);
+ scratch = *raw_cpu_ptr(m->scratch);
+ if (unlikely(!scratch))
+ return NULL;
- local_bh_enable();
- return ext;
- }
+ __local_lock_nested_bh(&scratch->bh_lock);
+ map_index = scratch->map_index;
+ map = NFT_PIPAPO_LT_ALIGN(&scratch->__map[0]);
+ res = map + (map_index ? m->bsize_max : 0);
+ fill = map + (map_index ? 0 : m->bsize_max);
- m = rcu_dereference(priv->match);
+ pipapo_resmap_init_avx2(m, res);
- /* This also protects access to all data related to scratch maps.
- *
- * Note that we don't need a valid MXCSR state for any of the
+ /* Note that we don't need a valid MXCSR state for any of the
* operations we use here, so pass 0 as mask and spare a LDMXCSR
* instruction.
*/
kernel_fpu_begin_mask(0);
- scratch = *raw_cpu_ptr(m->scratch);
- if (unlikely(!scratch)) {
- kernel_fpu_end();
- local_bh_enable();
- return NULL;
- }
-
- map_index = scratch->map_index;
-
- res = scratch->map + (map_index ? m->bsize_max : 0);
- fill = scratch->map + (map_index ? 0 : m->bsize_max);
-
- pipapo_resmap_init_avx2(m, res);
-
nft_pipapo_avx2_prepare();
next_match:
@@ -1202,7 +1186,7 @@ next_match:
#define NFT_SET_PIPAPO_AVX2_LOOKUP(b, n) \
(ret = nft_pipapo_avx2_lookup_##b##b_##n(res, fill, f, \
- ret, rp, \
+ ret, data, \
first, last))
if (likely(f->bb == 8)) {
@@ -1218,7 +1202,7 @@ next_match:
NFT_SET_PIPAPO_AVX2_LOOKUP(8, 16);
} else {
ret = nft_pipapo_avx2_lookup_slow(m, res, fill, f,
- ret, rp,
+ ret, data,
first, last);
}
} else {
@@ -1234,7 +1218,7 @@ next_match:
NFT_SET_PIPAPO_AVX2_LOOKUP(4, 32);
} else {
ret = nft_pipapo_avx2_lookup_slow(m, res, fill, f,
- ret, rp,
+ ret, data,
first, last);
}
}
@@ -1242,29 +1226,75 @@ next_match:
#undef NFT_SET_PIPAPO_AVX2_LOOKUP
- if (ret < 0)
- goto out;
+ if (ret < 0) {
+ scratch->map_index = map_index;
+ kernel_fpu_end();
+ __local_unlock_nested_bh(&scratch->bh_lock);
+ return NULL;
+ }
if (last) {
- const struct nft_set_ext *e = &f->mt[ret].e->ext;
+ struct nft_pipapo_elem *e;
- if (unlikely(nft_set_elem_expired(e) ||
- !nft_set_elem_active(e, genmask)))
+ e = f->mt[ret].e;
+ if (unlikely(__nft_set_elem_expired(&e->ext, tstamp) ||
+ !nft_set_elem_active(&e->ext, genmask)))
goto next_match;
- ext = e;
- goto out;
+ scratch->map_index = map_index;
+ kernel_fpu_end();
+ __local_unlock_nested_bh(&scratch->bh_lock);
+ return e;
}
+ map_index = !map_index;
swap(res, fill);
- rp += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
+ data += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
}
-out:
- if (i % 2)
- scratch->map_index = !map_index;
kernel_fpu_end();
+ __local_unlock_nested_bh(&scratch->bh_lock);
+ return NULL;
+}
+
+/**
+ * nft_pipapo_avx2_lookup() - Dataplane frontend for AVX2 implementation
+ * @net: Network namespace
+ * @set: nftables API set representation
+ * @key: nftables API element representation containing key data
+ *
+ * This function is called from the data path. It will search for
+ * an element matching the given key in the current active copy using
+ * the AVX2 routines if the FPU is usable or fall back to the generic
+ * implementation of the algorithm otherwise.
+ *
+ * Return: nftables API extension pointer or NULL if no match.
+ */
+const struct nft_set_ext *
+nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key)
+{
+ struct nft_pipapo *priv = nft_set_priv(set);
+ u8 genmask = nft_genmask_cur(net);
+ const struct nft_pipapo_match *m;
+ const u8 *rp = (const u8 *)key;
+ const struct nft_pipapo_elem *e;
+
+ local_bh_disable();
+
+ if (unlikely(!irq_fpu_usable())) {
+ const struct nft_set_ext *ext;
+
+ ext = nft_pipapo_lookup(net, set, key);
+
+ local_bh_enable();
+ return ext;
+ }
+
+ m = rcu_dereference(priv->match);
+
+ e = pipapo_get_avx2(m, rp, genmask, get_jiffies_64());
local_bh_enable();
- return ext;
+ return e ? &e->ext : NULL;
}
diff --git a/net/netfilter/nft_set_pipapo_avx2.h b/net/netfilter/nft_set_pipapo_avx2.h
index dbb6aaca8a7a..c2999b63da3f 100644
--- a/net/netfilter/nft_set_pipapo_avx2.h
+++ b/net/netfilter/nft_set_pipapo_avx2.h
@@ -5,8 +5,12 @@
#include <asm/fpu/xstate.h>
#define NFT_PIPAPO_ALIGN (XSAVE_YMM_SIZE / BITS_PER_BYTE)
+struct nft_pipapo_match;
bool nft_pipapo_avx2_estimate(const struct nft_set_desc *desc, u32 features,
struct nft_set_estimate *est);
+struct nft_pipapo_elem *pipapo_get_avx2(const struct nft_pipapo_match *m,
+ const u8 *data, u8 genmask,
+ u64 tstamp);
#endif /* defined(CONFIG_X86_64) && !defined(CONFIG_UML) */
#endif /* _NFT_SET_PIPAPO_AVX2_H */
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 938a257c069e..b311b66df3e9 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -584,15 +584,14 @@ nft_rbtree_deactivate(const struct net *net, const struct nft_set *set,
return NULL;
}
-static void nft_rbtree_walk(const struct nft_ctx *ctx,
- struct nft_set *set,
- struct nft_set_iter *iter)
+static void nft_rbtree_do_walk(const struct nft_ctx *ctx,
+ struct nft_set *set,
+ struct nft_set_iter *iter)
{
struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe;
struct rb_node *node;
- read_lock_bh(&priv->lock);
for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
rbe = rb_entry(node, struct nft_rbtree_elem, node);
@@ -600,14 +599,34 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
goto cont;
iter->err = iter->fn(ctx, set, iter, &rbe->priv);
- if (iter->err < 0) {
- read_unlock_bh(&priv->lock);
+ if (iter->err < 0)
return;
- }
cont:
iter->count++;
}
- read_unlock_bh(&priv->lock);
+}
+
+static void nft_rbtree_walk(const struct nft_ctx *ctx,
+ struct nft_set *set,
+ struct nft_set_iter *iter)
+{
+ struct nft_rbtree *priv = nft_set_priv(set);
+
+ switch (iter->type) {
+ case NFT_ITER_UPDATE:
+ lockdep_assert_held(&nft_pernet(ctx->net)->commit_mutex);
+ nft_rbtree_do_walk(ctx, set, iter);
+ break;
+ case NFT_ITER_READ:
+ read_lock_bh(&priv->lock);
+ nft_rbtree_do_walk(ctx, set, iter);
+ read_unlock_bh(&priv->lock);
+ break;
+ default:
+ iter->err = -EINVAL;
+ WARN_ON_ONCE(1);
+ break;
+ }
}
static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set,
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index e2f7080dd5d7..2b46c0cd752a 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -356,7 +356,7 @@ static void netlink_overrun(struct sock *sk)
sk_error_report(sk);
}
}
- atomic_inc(&sk->sk_drops);
+ sk_drops_inc(sk);
}
static void netlink_rcv_wake(struct sock *sk)
@@ -2711,7 +2711,7 @@ static int netlink_native_seq_show(struct seq_file *seq, void *v)
sk_wmem_alloc_get(s),
READ_ONCE(nlk->cb_running),
refcount_read(&s->sk_refcnt),
- atomic_read(&s->sk_drops),
+ sk_drops_read(s),
sock_i_ino(s)
);
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index b80bd3a90773..66366982f604 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -129,15 +129,13 @@ void ovs_flow_stats_get(const struct sw_flow *flow,
struct ovs_flow_stats *ovs_stats,
unsigned long *used, __be16 *tcp_flags)
{
- int cpu;
+ unsigned int cpu;
*used = 0;
*tcp_flags = 0;
memset(ovs_stats, 0, sizeof(*ovs_stats));
- /* We open code this to make sure cpu 0 is always considered */
- for (cpu = 0; cpu < nr_cpu_ids;
- cpu = cpumask_next(cpu, flow->cpu_used_mask)) {
+ for_each_cpu(cpu, flow->cpu_used_mask) {
struct sw_flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]);
if (stats) {
@@ -158,11 +156,9 @@ void ovs_flow_stats_get(const struct sw_flow *flow,
/* Called with ovs_mutex. */
void ovs_flow_stats_clear(struct sw_flow *flow)
{
- int cpu;
+ unsigned int cpu;
- /* We open code this to make sure cpu 0 is always considered */
- for (cpu = 0; cpu < nr_cpu_ids;
- cpu = cpumask_next(cpu, flow->cpu_used_mask)) {
+ for_each_cpu(cpu, flow->cpu_used_mask) {
struct sw_flow_stats *stats = ovsl_dereference(flow->stats[cpu]);
if (stats) {
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index d108ae0bd0ee..ffc72a741a50 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -107,16 +107,15 @@ int ovs_flow_tbl_count(const struct flow_table *table)
static void flow_free(struct sw_flow *flow)
{
- int cpu;
+ unsigned int cpu;
if (ovs_identifier_is_key(&flow->id))
kfree(flow->id.unmasked_key);
if (flow->sf_acts)
ovs_nla_free_flow_actions((struct sw_flow_actions __force *)
flow->sf_acts);
- /* We open code this to make sure cpu 0 is always considered */
- for (cpu = 0; cpu < nr_cpu_ids;
- cpu = cpumask_next(cpu, flow->cpu_used_mask)) {
+
+ for_each_cpu(cpu, flow->cpu_used_mask) {
if (flow->stats[cpu])
kmem_cache_free(flow_stats_cache,
(struct sw_flow_stats __force *)flow->stats[cpu]);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index a7017d7f0927..9d42c4bd6e39 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2265,7 +2265,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
drop_n_acct:
atomic_inc(&po->tp_drops);
- atomic_inc(&sk->sk_drops);
+ sk_drops_inc(sk);
drop_reason = SKB_DROP_REASON_PACKET_SOCK_ERROR;
drop_n_restore:
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index a27efa4faa4e..238a9638d2b0 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -22,7 +22,7 @@
#include <net/phonet/pn_dev.h>
/* Transport protocol registration */
-static const struct phonet_protocol *proto_tab[PHONET_NPROTO] __read_mostly;
+static const struct phonet_protocol __rcu *proto_tab[PHONET_NPROTO] __read_mostly;
static const struct phonet_protocol *phonet_proto_get(unsigned int protocol)
{
@@ -482,7 +482,7 @@ void phonet_proto_unregister(unsigned int protocol,
const struct phonet_protocol *pp)
{
mutex_lock(&proto_tab_lock);
- BUG_ON(proto_tab[protocol] != pp);
+ BUG_ON(rcu_access_pointer(proto_tab[protocol]) != pp);
RCU_INIT_POINTER(proto_tab[protocol], NULL);
mutex_unlock(&proto_tab_lock);
synchronize_rcu();
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 62527e1ebb88..4db564d9d522 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -376,7 +376,7 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
case PNS_PEP_CTRL_REQ:
if (skb_queue_len(&pn->ctrlreq_queue) >= PNPIPE_CTRLREQ_MAX) {
- atomic_inc(&sk->sk_drops);
+ sk_drops_inc(sk);
break;
}
__skb_pull(skb, 4);
@@ -397,7 +397,7 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
}
if (pn->rx_credits == 0) {
- atomic_inc(&sk->sk_drops);
+ sk_drops_inc(sk);
err = -ENOBUFS;
break;
}
@@ -567,7 +567,7 @@ static int pipe_handler_do_rcv(struct sock *sk, struct sk_buff *skb)
}
if (pn->rx_credits == 0) {
- atomic_inc(&sk->sk_drops);
+ sk_drops_inc(sk);
err = NET_RX_DROP;
break;
}
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index ea4d5e6533db..db2d552e9b32 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -587,7 +587,7 @@ static int pn_sock_seq_show(struct seq_file *seq, void *v)
from_kuid_munged(seq_user_ns(seq), sk_uid(sk)),
sock_i_ino(sk),
refcount_read(&sk->sk_refcnt), sk,
- atomic_read(&sk->sk_drops));
+ sk_drops_read(sk));
}
seq_pad(seq, '\n');
return 0;
@@ -602,7 +602,7 @@ const struct seq_operations pn_sock_seq_ops = {
#endif
static struct {
- struct sock *sk[256];
+ struct sock __rcu *sk[256];
} pnres;
/*
@@ -654,7 +654,7 @@ int pn_sock_unbind_res(struct sock *sk, u8 res)
return -EPERM;
mutex_lock(&resource_mutex);
- if (pnres.sk[res] == sk) {
+ if (rcu_access_pointer(pnres.sk[res]) == sk) {
RCU_INIT_POINTER(pnres.sk[res], NULL);
ret = 0;
}
@@ -673,7 +673,7 @@ void pn_sock_unbind_all_res(struct sock *sk)
mutex_lock(&resource_mutex);
for (res = 0; res < 256; res++) {
- if (pnres.sk[res] == sk) {
+ if (rcu_access_pointer(pnres.sk[res]) == sk) {
RCU_INIT_POINTER(pnres.sk[res], NULL);
match++;
}
@@ -688,7 +688,7 @@ void pn_sock_unbind_all_res(struct sock *sk)
}
#ifdef CONFIG_PROC_FS
-static struct sock **pn_res_get_idx(struct seq_file *seq, loff_t pos)
+static struct sock __rcu **pn_res_get_idx(struct seq_file *seq, loff_t pos)
{
struct net *net = seq_file_net(seq);
unsigned int i;
@@ -697,7 +697,7 @@ static struct sock **pn_res_get_idx(struct seq_file *seq, loff_t pos)
return NULL;
for (i = 0; i < 256; i++) {
- if (pnres.sk[i] == NULL)
+ if (rcu_access_pointer(pnres.sk[i]) == NULL)
continue;
if (!pos)
return pnres.sk + i;
@@ -706,7 +706,7 @@ static struct sock **pn_res_get_idx(struct seq_file *seq, loff_t pos)
return NULL;
}
-static struct sock **pn_res_get_next(struct seq_file *seq, struct sock **sk)
+static struct sock __rcu **pn_res_get_next(struct seq_file *seq, struct sock __rcu **sk)
{
struct net *net = seq_file_net(seq);
unsigned int i;
@@ -728,7 +728,7 @@ static void *pn_res_seq_start(struct seq_file *seq, loff_t *pos)
static void *pn_res_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct sock **sk;
+ struct sock __rcu **sk;
if (v == SEQ_START_TOKEN)
sk = pn_res_get_idx(seq, 0);
@@ -747,11 +747,12 @@ static void pn_res_seq_stop(struct seq_file *seq, void *v)
static int pn_res_seq_show(struct seq_file *seq, void *v)
{
seq_setwidth(seq, 63);
- if (v == SEQ_START_TOKEN)
+ if (v == SEQ_START_TOKEN) {
seq_puts(seq, "rs uid inode");
- else {
- struct sock **psk = v;
- struct sock *sk = *psk;
+ } else {
+ struct sock __rcu **psk = v;
+ struct sock *sk = rcu_dereference_protected(*psk,
+ lockdep_is_held(&resource_mutex));
seq_printf(seq, "%02X %5u %lu",
(int) (psk - pnres.sk),
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 086a13170e09..4a7217fbeab6 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -242,7 +242,7 @@ static __poll_t rds_poll(struct file *file, struct socket *sock,
if (rs->rs_snd_bytes < rds_sk_sndbuf(rs))
mask |= (EPOLLOUT | EPOLLWRNORM);
if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
- mask |= POLLERR;
+ mask |= EPOLLERR;
read_unlock_irqrestore(&rs->rs_recv_lock, flags);
/* clear state any time we wake a seen-congested socket */
diff --git a/net/rds/connection.c b/net/rds/connection.c
index d62f486ab29f..68bc88cce84e 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -57,16 +57,17 @@ static struct hlist_head *rds_conn_bucket(const struct in6_addr *laddr,
static u32 rds6_hash_secret __read_mostly;
static u32 rds_hash_secret __read_mostly;
- u32 lhash, fhash, hash;
+ __be32 lhash, fhash;
+ u32 hash;
net_get_random_once(&rds_hash_secret, sizeof(rds_hash_secret));
net_get_random_once(&rds6_hash_secret, sizeof(rds6_hash_secret));
- lhash = (__force u32)laddr->s6_addr32[3];
+ lhash = laddr->s6_addr32[3];
#if IS_ENABLED(CONFIG_IPV6)
- fhash = __ipv6_addr_jhash(faddr, rds6_hash_secret);
+ fhash = (__force __be32)__ipv6_addr_jhash(faddr, rds6_hash_secret);
#else
- fhash = (__force u32)faddr->s6_addr32[3];
+ fhash = faddr->s6_addr32[3];
#endif
hash = __inet_ehashfn(lhash, 0, fhash, 0, rds_hash_secret);
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index e53b7f266bd7..4248dfa816eb 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -1034,7 +1034,7 @@ void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic,
rds_ib_stats_inc(s_ib_rx_ring_empty);
if (rds_ib_ring_low(&ic->i_recv_ring)) {
- rds_ib_recv_refill(conn, 0, GFP_NOWAIT | __GFP_NOWARN);
+ rds_ib_recv_refill(conn, 0, GFP_NOWAIT);
rds_ib_stats_inc(s_ib_rx_refill_from_cq);
}
}
diff --git a/net/rds/message.c b/net/rds/message.c
index 7af59d2443e5..199a899a43e9 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -44,8 +44,8 @@ static unsigned int rds_exthdr_size[__RDS_EXTHDR_MAX] = {
[RDS_EXTHDR_VERSION] = sizeof(struct rds_ext_header_version),
[RDS_EXTHDR_RDMA] = sizeof(struct rds_ext_header_rdma),
[RDS_EXTHDR_RDMA_DEST] = sizeof(struct rds_ext_header_rdma_dest),
-[RDS_EXTHDR_NPATHS] = sizeof(u16),
-[RDS_EXTHDR_GEN_NUM] = sizeof(u32),
+[RDS_EXTHDR_NPATHS] = sizeof(__be16),
+[RDS_EXTHDR_GEN_NUM] = sizeof(__be32),
};
void rds_message_addref(struct rds_message *rm)
diff --git a/net/rds/rds.h b/net/rds/rds.h
index dc360252c515..5b1c072e2e7f 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -93,7 +93,7 @@ enum {
/* Max number of multipaths per RDS connection. Must be a power of 2 */
#define RDS_MPATH_WORKERS 8
-#define RDS_MPATH_HASH(rs, n) (jhash_1word((rs)->rs_bound_port, \
+#define RDS_MPATH_HASH(rs, n) (jhash_1word(ntohs((rs)->rs_bound_port), \
(rs)->rs_hash_initval) & ((n) - 1))
#define IS_CANONICAL(laddr, faddr) (htonl(laddr) < htonl(faddr))
diff --git a/net/rds/recv.c b/net/rds/recv.c
index 5627f80013f8..66205d6924bf 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -202,8 +202,8 @@ static void rds_recv_hs_exthdrs(struct rds_header *hdr,
unsigned int pos = 0, type, len;
union {
struct rds_ext_header_version version;
- u16 rds_npaths;
- u32 rds_gen_num;
+ __be16 rds_npaths;
+ __be32 rds_gen_num;
} buffer;
u32 new_peer_gen_num = 0;
diff --git a/net/rds/send.c b/net/rds/send.c
index 42d991bc8543..0b3d0ef2f008 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -1454,8 +1454,8 @@ rds_send_probe(struct rds_conn_path *cp, __be16 sport,
if (RDS_HS_PROBE(be16_to_cpu(sport), be16_to_cpu(dport)) &&
cp->cp_conn->c_trans->t_mp_capable) {
- u16 npaths = cpu_to_be16(RDS_MPATH_WORKERS);
- u32 my_gen_num = cpu_to_be32(cp->cp_conn->c_my_gen_num);
+ __be16 npaths = cpu_to_be16(RDS_MPATH_WORKERS);
+ __be32 my_gen_num = cpu_to_be32(cp->cp_conn->c_my_gen_num);
rds_message_add_extension(&rm->m_inc.i_hdr,
RDS_EXTHDR_NPATHS, &npaths,
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 9e468e463467..ff6be5cfe2b0 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -1585,7 +1585,7 @@ void tcf_action_update_stats(struct tc_action *a, u64 bytes, u64 packets,
}
_bstats_update(&a->tcfa_bstats, bytes, packets);
- a->tcfa_qstats.drops += drops;
+ atomic_add(drops, &a->tcfa_drops);
if (hw)
_bstats_update(&a->tcfa_bstats_hw, bytes, packets);
}
@@ -1594,8 +1594,9 @@ EXPORT_SYMBOL(tcf_action_update_stats);
int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *p,
int compat_mode)
{
- int err = 0;
+ struct gnet_stats_queue qstats = {0};
struct gnet_dump d;
+ int err = 0;
if (p == NULL)
goto errout;
@@ -1619,14 +1620,17 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *p,
if (err < 0)
goto errout;
+ qstats.drops = atomic_read(&p->tcfa_drops);
+ qstats.overlimits = atomic_read(&p->tcfa_overlimits);
+
if (gnet_stats_copy_basic(&d, p->cpu_bstats,
&p->tcfa_bstats, false) < 0 ||
gnet_stats_copy_basic_hw(&d, p->cpu_bstats_hw,
&p->tcfa_bstats_hw, false) < 0 ||
gnet_stats_copy_rate_est(&d, &p->tcfa_rate_est) < 0 ||
gnet_stats_copy_queue(&d, p->cpu_qstats,
- &p->tcfa_qstats,
- p->tcfa_qstats.qlen) < 0)
+ &qstats,
+ qstats.qlen) < 0)
goto errout;
if (gnet_stats_finish_copy(&d) < 0)
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index f3abe0545989..8e69a919b4fe 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -72,7 +72,6 @@ static int reset_policy(struct tc_action *a, const struct nlattr *defdata,
d = to_defact(a);
spin_lock_bh(&d->tcf_lock);
goto_ch = tcf_action_set_ctrlact(a, p->action, goto_ch);
- memset(d->tcfd_defdata, 0, SIMP_MAX_DATA);
nla_strscpy(d->tcfd_defdata, defdata, SIMP_MAX_DATA);
spin_unlock_bh(&d->tcf_lock);
if (goto_ch)
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index dc0229693461..a9e0c1326e2a 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -27,19 +27,18 @@ TC_INDIRECT_SCOPE int tcf_skbmod_act(struct sk_buff *skb,
struct tcf_result *res)
{
struct tcf_skbmod *d = to_skbmod(a);
- int action, max_edit_len, err;
struct tcf_skbmod_params *p;
+ int max_edit_len, err;
u64 flags;
tcf_lastuse_update(&d->tcf_tm);
bstats_update(this_cpu_ptr(d->common.cpu_bstats), skb);
- action = READ_ONCE(d->tcf_action);
- if (unlikely(action == TC_ACT_SHOT))
+ p = rcu_dereference_bh(d->skbmod_p);
+ if (unlikely(p->action == TC_ACT_SHOT))
goto drop;
max_edit_len = skb_mac_header_len(skb);
- p = rcu_dereference_bh(d->skbmod_p);
flags = p->flags;
/* tcf_skbmod_init() guarantees "flags" to be one of the following:
@@ -85,7 +84,7 @@ TC_INDIRECT_SCOPE int tcf_skbmod_act(struct sk_buff *skb,
INET_ECN_set_ce(skb);
out:
- return action;
+ return p->action;
drop:
qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats));
@@ -193,7 +192,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
}
p->flags = lflags;
-
+ p->action = parm->action;
if (ovr)
spin_lock_bh(&d->tcf_lock);
/* Protected by tcf_lock if overwriting existing action. */
@@ -248,10 +247,9 @@ static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a,
opt.index = d->tcf_index;
opt.refcnt = refcount_read(&d->tcf_refcnt) - ref;
opt.bindcnt = atomic_read(&d->tcf_bindcnt) - bind;
- spin_lock_bh(&d->tcf_lock);
- opt.action = d->tcf_action;
- p = rcu_dereference_protected(d->skbmod_p,
- lockdep_is_held(&d->tcf_lock));
+ rcu_read_lock();
+ p = rcu_dereference(d->skbmod_p);
+ opt.action = p->action;
opt.flags = p->flags;
if (nla_put(skb, TCA_SKBMOD_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
@@ -269,10 +267,10 @@ static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a,
if (nla_put_64bit(skb, TCA_SKBMOD_TM, sizeof(t), &t, TCA_SKBMOD_PAD))
goto nla_put_failure;
- spin_unlock_bh(&d->tcf_lock);
+ rcu_read_unlock();
return skb->len;
nla_put_failure:
- spin_unlock_bh(&d->tcf_lock);
+ rcu_read_unlock();
nlmsg_trim(skb, b);
return -1;
}
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 2cef4b08befb..876b30c5709e 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -29,13 +29,11 @@ TC_INDIRECT_SCOPE int tunnel_key_act(struct sk_buff *skb,
{
struct tcf_tunnel_key *t = to_tunnel_key(a);
struct tcf_tunnel_key_params *params;
- int action;
params = rcu_dereference_bh(t->params);
tcf_lastuse_update(&t->tcf_tm);
tcf_action_update_bstats(&t->common, skb);
- action = READ_ONCE(t->tcf_action);
switch (params->tcft_action) {
case TCA_TUNNEL_KEY_ACT_RELEASE:
@@ -51,7 +49,7 @@ TC_INDIRECT_SCOPE int tunnel_key_act(struct sk_buff *skb,
break;
}
- return action;
+ return params->action;
}
static const struct nla_policy
@@ -532,6 +530,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
params_new->tcft_action = parm->t_action;
params_new->tcft_enc_metadata = metadata;
+ params_new->action = parm->action;
spin_lock_bh(&t->tcf_lock);
goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
params_new = rcu_replace_pointer(t->params, params_new,
@@ -726,10 +725,9 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a,
};
struct tcf_t tm;
- spin_lock_bh(&t->tcf_lock);
- params = rcu_dereference_protected(t->params,
- lockdep_is_held(&t->tcf_lock));
- opt.action = t->tcf_action;
+ rcu_read_lock();
+ params = rcu_dereference(t->params);
+ opt.action = params->action;
opt.t_action = params->tcft_action;
if (nla_put(skb, TCA_TUNNEL_KEY_PARMS, sizeof(opt), &opt))
@@ -766,12 +764,12 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a,
if (nla_put_64bit(skb, TCA_TUNNEL_KEY_TM, sizeof(tm),
&tm, TCA_TUNNEL_KEY_PAD))
goto nla_put_failure;
- spin_unlock_bh(&t->tcf_lock);
+ rcu_read_unlock();
return skb->len;
nla_put_failure:
- spin_unlock_bh(&t->tcf_lock);
+ rcu_read_unlock();
nlmsg_trim(skb, b);
return -1;
}
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 383bf18b6862..a74621797d69 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -25,7 +25,6 @@ TC_INDIRECT_SCOPE int tcf_vlan_act(struct sk_buff *skb,
{
struct tcf_vlan *v = to_vlan(a);
struct tcf_vlan_params *p;
- int action;
int err;
u16 tci;
@@ -38,8 +37,6 @@ TC_INDIRECT_SCOPE int tcf_vlan_act(struct sk_buff *skb,
if (skb_at_tc_ingress(skb))
skb_push_rcsum(skb, skb->mac_len);
- action = READ_ONCE(v->tcf_action);
-
p = rcu_dereference_bh(v->vlan_p);
switch (p->tcfv_action) {
@@ -97,7 +94,7 @@ out:
skb_pull_rcsum(skb, skb->mac_len);
skb_reset_mac_len(skb);
- return action;
+ return p->action;
drop:
tcf_action_inc_drop_qstats(&v->common);
@@ -255,6 +252,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
ETH_ALEN);
}
+ p->action = parm->action;
spin_lock_bh(&v->tcf_lock);
goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
p = rcu_replace_pointer(v->vlan_p, p, lockdep_is_held(&v->tcf_lock));
@@ -297,9 +295,9 @@ static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a,
};
struct tcf_t t;
- spin_lock_bh(&v->tcf_lock);
- opt.action = v->tcf_action;
- p = rcu_dereference_protected(v->vlan_p, lockdep_is_held(&v->tcf_lock));
+ rcu_read_lock();
+ p = rcu_dereference(v->vlan_p);
+ opt.action = p->action;
opt.v_action = p->tcfv_action;
if (nla_put(skb, TCA_VLAN_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
@@ -325,12 +323,12 @@ static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a,
tcf_tm_dump(&t, &v->tcf_tm);
if (nla_put_64bit(skb, TCA_VLAN_TM, sizeof(t), &t, TCA_VLAN_PAD))
goto nla_put_failure;
- spin_unlock_bh(&v->tcf_lock);
+ rcu_read_unlock();
return skb->len;
nla_put_failure:
- spin_unlock_bh(&v->tcf_lock);
+ rcu_read_unlock();
nlmsg_trim(skb, b);
return -1;
}
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index d7c767b861a4..1e058b46d3e1 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -431,7 +431,7 @@ struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
- !memcmp(&rtab->data, nla_data(tab), 1024)) {
+ !memcmp(&rtab->data, nla_data(tab), TC_RTAB_SIZE)) {
rtab->refcnt++;
return rtab;
}
@@ -441,7 +441,7 @@ struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
if (rtab) {
rtab->rate = *r;
rtab->refcnt = 1;
- memcpy(rtab->data, nla_data(tab), 1024);
+ memcpy(rtab->data, nla_data(tab), TC_RTAB_SIZE);
if (r->linklayer == TC_LINKLAYER_UNAWARE)
r->linklayer = __detect_linklayer(r, rtab->data);
rtab->next = qdisc_rtab_list;
diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig
index 24d5a35ce894..e947646a380c 100644
--- a/net/sctp/Kconfig
+++ b/net/sctp/Kconfig
@@ -7,9 +7,9 @@ menuconfig IP_SCTP
tristate "The SCTP Protocol"
depends on INET
depends on IPV6 || IPV6=n
- select CRYPTO
- select CRYPTO_HMAC
- select CRYPTO_SHA1
+ select CRYPTO_LIB_SHA1
+ select CRYPTO_LIB_SHA256
+ select CRYPTO_LIB_UTILS
select NET_CRC32C
select NET_UDP_TUNNEL
help
@@ -49,46 +49,25 @@ config SCTP_DBG_OBJCNT
'cat /proc/net/sctp/sctp_dbg_objcnt'
If unsure, say N
+
choice
- prompt "Default SCTP cookie HMAC encoding"
- default SCTP_DEFAULT_COOKIE_HMAC_MD5
+ prompt "Default SCTP cookie authentication method"
+ default SCTP_DEFAULT_COOKIE_HMAC_SHA256
help
- This option sets the default sctp cookie hmac algorithm
- when in doubt select 'md5'
+ This option sets the default SCTP cookie authentication method, for
+ when a method hasn't been explicitly selected via the
+ net.sctp.cookie_hmac_alg sysctl.
-config SCTP_DEFAULT_COOKIE_HMAC_MD5
- bool "Enable optional MD5 hmac cookie generation"
- help
- Enable optional MD5 hmac based SCTP cookie generation
- select SCTP_COOKIE_HMAC_MD5
+ If unsure, choose the default (HMAC-SHA256).
-config SCTP_DEFAULT_COOKIE_HMAC_SHA1
- bool "Enable optional SHA1 hmac cookie generation"
- help
- Enable optional SHA1 hmac based SCTP cookie generation
- select SCTP_COOKIE_HMAC_SHA1
+config SCTP_DEFAULT_COOKIE_HMAC_SHA256
+ bool "HMAC-SHA256"
config SCTP_DEFAULT_COOKIE_HMAC_NONE
- bool "Use no hmac alg in SCTP cookie generation"
- help
- Use no hmac algorithm in SCTP cookie generation
+ bool "None"
endchoice
-config SCTP_COOKIE_HMAC_MD5
- bool "Enable optional MD5 hmac cookie generation"
- help
- Enable optional MD5 hmac based SCTP cookie generation
- select CRYPTO_HMAC if SCTP_COOKIE_HMAC_MD5
- select CRYPTO_MD5 if SCTP_COOKIE_HMAC_MD5
-
-config SCTP_COOKIE_HMAC_SHA1
- bool "Enable optional SHA1 hmac cookie generation"
- help
- Enable optional SHA1 hmac based SCTP cookie generation
- select CRYPTO_HMAC if SCTP_COOKIE_HMAC_SHA1
- select CRYPTO_SHA1 if SCTP_COOKIE_HMAC_SHA1
-
config INET_SCTP_DIAG
depends on INET_DIAG
def_tristate INET_DIAG
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index c58fffc86a0c..82aad477590e 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -12,36 +12,37 @@
* Vlad Yasevich <vladislav.yasevich@hp.com>
*/
-#include <crypto/hash.h>
+#include <crypto/sha1.h>
+#include <crypto/sha2.h>
#include <linux/slab.h>
#include <linux/types.h>
-#include <linux/scatterlist.h>
#include <net/sctp/sctp.h>
#include <net/sctp/auth.h>
-static struct sctp_hmac sctp_hmac_list[SCTP_AUTH_NUM_HMACS] = {
+static const struct sctp_hmac sctp_hmac_list[SCTP_AUTH_NUM_HMACS] = {
{
/* id 0 is reserved. as all 0 */
.hmac_id = SCTP_AUTH_HMAC_ID_RESERVED_0,
},
{
.hmac_id = SCTP_AUTH_HMAC_ID_SHA1,
- .hmac_name = "hmac(sha1)",
- .hmac_len = SCTP_SHA1_SIG_SIZE,
+ .hmac_len = SHA1_DIGEST_SIZE,
},
{
/* id 2 is reserved as well */
.hmac_id = SCTP_AUTH_HMAC_ID_RESERVED_2,
},
-#if IS_ENABLED(CONFIG_CRYPTO_SHA256)
{
.hmac_id = SCTP_AUTH_HMAC_ID_SHA256,
- .hmac_name = "hmac(sha256)",
- .hmac_len = SCTP_SHA256_SIG_SIZE,
+ .hmac_len = SHA256_DIGEST_SIZE,
}
-#endif
};
+static bool sctp_hmac_supported(__u16 hmac_id)
+{
+ return hmac_id < ARRAY_SIZE(sctp_hmac_list) &&
+ sctp_hmac_list[hmac_id].hmac_len != 0;
+}
void sctp_auth_key_put(struct sctp_auth_bytes *key)
{
@@ -444,76 +445,7 @@ struct sctp_shared_key *sctp_auth_get_shkey(
return NULL;
}
-/*
- * Initialize all the possible digest transforms that we can use. Right
- * now, the supported digests are SHA1 and SHA256. We do this here once
- * because of the restrictiong that transforms may only be allocated in
- * user context. This forces us to pre-allocated all possible transforms
- * at the endpoint init time.
- */
-int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp)
-{
- struct crypto_shash *tfm = NULL;
- __u16 id;
-
- /* If the transforms are already allocated, we are done */
- if (ep->auth_hmacs)
- return 0;
-
- /* Allocated the array of pointers to transorms */
- ep->auth_hmacs = kcalloc(SCTP_AUTH_NUM_HMACS,
- sizeof(struct crypto_shash *),
- gfp);
- if (!ep->auth_hmacs)
- return -ENOMEM;
-
- for (id = 0; id < SCTP_AUTH_NUM_HMACS; id++) {
-
- /* See is we support the id. Supported IDs have name and
- * length fields set, so that we can allocated and use
- * them. We can safely just check for name, for without the
- * name, we can't allocate the TFM.
- */
- if (!sctp_hmac_list[id].hmac_name)
- continue;
-
- /* If this TFM has been allocated, we are all set */
- if (ep->auth_hmacs[id])
- continue;
-
- /* Allocate the ID */
- tfm = crypto_alloc_shash(sctp_hmac_list[id].hmac_name, 0, 0);
- if (IS_ERR(tfm))
- goto out_err;
-
- ep->auth_hmacs[id] = tfm;
- }
-
- return 0;
-
-out_err:
- /* Clean up any successful allocations */
- sctp_auth_destroy_hmacs(ep->auth_hmacs);
- ep->auth_hmacs = NULL;
- return -ENOMEM;
-}
-
-/* Destroy the hmac tfm array */
-void sctp_auth_destroy_hmacs(struct crypto_shash *auth_hmacs[])
-{
- int i;
-
- if (!auth_hmacs)
- return;
-
- for (i = 0; i < SCTP_AUTH_NUM_HMACS; i++) {
- crypto_free_shash(auth_hmacs[i]);
- }
- kfree(auth_hmacs);
-}
-
-
-struct sctp_hmac *sctp_auth_get_hmac(__u16 hmac_id)
+const struct sctp_hmac *sctp_auth_get_hmac(__u16 hmac_id)
{
return &sctp_hmac_list[hmac_id];
}
@@ -521,7 +453,8 @@ struct sctp_hmac *sctp_auth_get_hmac(__u16 hmac_id)
/* Get an hmac description information that we can use to build
* the AUTH chunk
*/
-struct sctp_hmac *sctp_auth_asoc_get_hmac(const struct sctp_association *asoc)
+const struct sctp_hmac *
+sctp_auth_asoc_get_hmac(const struct sctp_association *asoc)
{
struct sctp_hmac_algo_param *hmacs;
__u16 n_elt;
@@ -543,26 +476,10 @@ struct sctp_hmac *sctp_auth_asoc_get_hmac(const struct sctp_association *asoc)
sizeof(struct sctp_paramhdr)) >> 1;
for (i = 0; i < n_elt; i++) {
id = ntohs(hmacs->hmac_ids[i]);
-
- /* Check the id is in the supported range. And
- * see if we support the id. Supported IDs have name and
- * length fields set, so that we can allocate and use
- * them. We can safely just check for name, for without the
- * name, we can't allocate the TFM.
- */
- if (id > SCTP_AUTH_HMAC_ID_MAX ||
- !sctp_hmac_list[id].hmac_name) {
- id = 0;
- continue;
- }
-
- break;
+ if (sctp_hmac_supported(id))
+ return &sctp_hmac_list[id];
}
-
- if (id == 0)
- return NULL;
-
- return &sctp_hmac_list[id];
+ return NULL;
}
static int __sctp_auth_find_hmacid(__be16 *hmacs, int n_elts, __be16 hmac_id)
@@ -606,7 +523,6 @@ int sctp_auth_asoc_verify_hmac_id(const struct sctp_association *asoc,
void sctp_auth_asoc_set_default_hmac(struct sctp_association *asoc,
struct sctp_hmac_algo_param *hmacs)
{
- struct sctp_endpoint *ep;
__u16 id;
int i;
int n_params;
@@ -617,16 +533,9 @@ void sctp_auth_asoc_set_default_hmac(struct sctp_association *asoc,
n_params = (ntohs(hmacs->param_hdr.length) -
sizeof(struct sctp_paramhdr)) >> 1;
- ep = asoc->ep;
for (i = 0; i < n_params; i++) {
id = ntohs(hmacs->hmac_ids[i]);
-
- /* Check the id is in the supported range */
- if (id > SCTP_AUTH_HMAC_ID_MAX)
- continue;
-
- /* If this TFM has been allocated, use this id */
- if (ep->auth_hmacs[id]) {
+ if (sctp_hmac_supported(id)) {
asoc->default_hmac_id = id;
break;
}
@@ -709,10 +618,9 @@ void sctp_auth_calculate_hmac(const struct sctp_association *asoc,
struct sctp_shared_key *ep_key, gfp_t gfp)
{
struct sctp_auth_bytes *asoc_key;
- struct crypto_shash *tfm;
__u16 key_id, hmac_id;
- unsigned char *end;
int free_key = 0;
+ size_t data_len;
__u8 *digest;
/* Extract the info we need:
@@ -733,19 +641,17 @@ void sctp_auth_calculate_hmac(const struct sctp_association *asoc,
free_key = 1;
}
- /* set up scatter list */
- end = skb_tail_pointer(skb);
-
- tfm = asoc->ep->auth_hmacs[hmac_id];
-
+ data_len = skb_tail_pointer(skb) - (unsigned char *)auth;
digest = (u8 *)(&auth->auth_hdr + 1);
- if (crypto_shash_setkey(tfm, &asoc_key->data[0], asoc_key->len))
- goto free;
-
- crypto_shash_tfm_digest(tfm, (u8 *)auth, end - (unsigned char *)auth,
- digest);
+ if (hmac_id == SCTP_AUTH_HMAC_ID_SHA1) {
+ hmac_sha1_usingrawkey(asoc_key->data, asoc_key->len,
+ (const u8 *)auth, data_len, digest);
+ } else {
+ WARN_ON_ONCE(hmac_id != SCTP_AUTH_HMAC_ID_SHA256);
+ hmac_sha256_usingrawkey(asoc_key->data, asoc_key->len,
+ (const u8 *)auth, data_len, digest);
+ }
-free:
if (free_key)
sctp_auth_key_put(asoc_key);
}
@@ -788,14 +694,11 @@ int sctp_auth_ep_set_hmacs(struct sctp_endpoint *ep,
for (i = 0; i < hmacs->shmac_num_idents; i++) {
id = hmacs->shmac_idents[i];
- if (id > SCTP_AUTH_HMAC_ID_MAX)
+ if (!sctp_hmac_supported(id))
return -EOPNOTSUPP;
if (SCTP_AUTH_HMAC_ID_SHA1 == id)
has_sha1 = 1;
-
- if (!sctp_hmac_list[id].hmac_name)
- return -EOPNOTSUPP;
}
if (!has_sha1)
@@ -1021,8 +924,6 @@ int sctp_auth_deact_key_id(struct sctp_endpoint *ep,
int sctp_auth_init(struct sctp_endpoint *ep, gfp_t gfp)
{
- int err = -ENOMEM;
-
/* Allocate space for HMACS and CHUNKS authentication
* variables. There are arrays that we encode directly
* into parameters to make the rest of the operations easier.
@@ -1060,13 +961,6 @@ int sctp_auth_init(struct sctp_endpoint *ep, gfp_t gfp)
ep->auth_chunk_list = auth_chunks;
}
- /* Allocate and initialize transorms arrays for supported
- * HMACs.
- */
- err = sctp_auth_init_hmacs(ep, gfp);
- if (err)
- goto nomem;
-
return 0;
nomem:
@@ -1075,7 +969,7 @@ nomem:
kfree(ep->auth_chunk_list);
ep->auth_hmacs_list = NULL;
ep->auth_chunk_list = NULL;
- return err;
+ return -ENOMEM;
}
void sctp_auth_free(struct sctp_endpoint *ep)
@@ -1084,6 +978,4 @@ void sctp_auth_free(struct sctp_endpoint *ep)
kfree(ep->auth_chunk_list);
ep->auth_hmacs_list = NULL;
ep->auth_chunk_list = NULL;
- sctp_auth_destroy_hmacs(ep->auth_hmacs);
- ep->auth_hmacs = NULL;
}
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index fd4f8243cc35..c655b571ca01 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -184,7 +184,8 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
* DATA.
*/
if (sctp_auth_send_cid(SCTP_CID_DATA, asoc)) {
- struct sctp_hmac *hmac_desc = sctp_auth_asoc_get_hmac(asoc);
+ const struct sctp_hmac *hmac_desc =
+ sctp_auth_asoc_get_hmac(asoc);
if (hmac_desc)
max_data -= SCTP_PAD4(sizeof(struct sctp_auth_chunk) +
diff --git a/net/sctp/diag.c b/net/sctp/diag.c
index 23359e522273..996c2018f0e6 100644
--- a/net/sctp/diag.c
+++ b/net/sctp/diag.c
@@ -173,7 +173,7 @@ static int inet_sctp_diag_fill(struct sock *sk, struct sctp_association *asoc,
mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued;
mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len);
- mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
+ mem[SK_MEMINFO_DROPS] = sk_drops_read(sk);
if (nla_put(skb, INET_DIAG_SKMEMINFO, sizeof(mem), &mem) < 0)
goto errout;
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 7e77b450697c..31e989dfe846 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -35,6 +35,15 @@
/* Forward declarations for internal helpers. */
static void sctp_endpoint_bh_rcv(struct work_struct *work);
+static void gen_cookie_auth_key(struct hmac_sha256_key *key)
+{
+ u8 raw_key[SCTP_COOKIE_KEY_SIZE];
+
+ get_random_bytes(raw_key, sizeof(raw_key));
+ hmac_sha256_preparekey(key, raw_key, sizeof(raw_key));
+ memzero_explicit(raw_key, sizeof(raw_key));
+}
+
/*
* Initialize the base fields of the endpoint structure.
*/
@@ -45,10 +54,6 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
struct net *net = sock_net(sk);
struct sctp_shared_key *null_key;
- ep->digest = kzalloc(SCTP_SIGNATURE_SIZE, gfp);
- if (!ep->digest)
- return NULL;
-
ep->asconf_enable = net->sctp.addip_enable;
ep->auth_enable = net->sctp.auth_enable;
if (ep->auth_enable) {
@@ -90,8 +95,8 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
/* Get the receive buffer policy for this endpoint */
ep->rcvbuf_policy = net->sctp.rcvbuf_policy;
- /* Initialize the secret key used with cookie. */
- get_random_bytes(ep->secret_key, sizeof(ep->secret_key));
+ /* Generate the cookie authentication key. */
+ gen_cookie_auth_key(&ep->cookie_auth_key);
/* SCTP-AUTH extensions*/
INIT_LIST_HEAD(&ep->endpoint_shared_keys);
@@ -118,7 +123,6 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
nomem_shkey:
sctp_auth_free(ep);
nomem:
- kfree(ep->digest);
return NULL;
}
@@ -205,9 +209,6 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep)
return;
}
- /* Free the digest buffer */
- kfree(ep->digest);
-
/* SCTP-AUTH: Free up AUTH releated data such as shared keys
* chunks and hmacs arrays that were allocated
*/
@@ -218,7 +219,7 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep)
sctp_inq_free(&ep->base.inqueue);
sctp_bind_addr_free(&ep->base.bind_addr);
- memset(ep->secret_key, 0, sizeof(ep->secret_key));
+ memzero_explicit(&ep->cookie_auth_key, sizeof(ep->cookie_auth_key));
sk = ep->base.sk;
/* Remove and free the port */
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index a5ccada55f2b..9dbc24af749b 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -34,6 +34,7 @@
#include <linux/memblock.h>
#include <linux/highmem.h>
#include <linux/slab.h>
+#include <net/flow.h>
#include <net/net_namespace.h>
#include <net/protocol.h>
#include <net/ip.h>
@@ -437,7 +438,7 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
fl4->fl4_dport = daddr->v4.sin_port;
fl4->flowi4_proto = IPPROTO_SCTP;
if (asoc) {
- fl4->flowi4_tos = inet_dscp_to_dsfield(dscp);
+ fl4->flowi4_dscp = dscp;
fl4->flowi4_scope = ip_sock_rt_scope(asoc->base.sk);
fl4->flowi4_oif = asoc->base.sk->sk_bound_dev_if;
fl4->fl4_sport = htons(asoc->base.bind_addr.port);
@@ -1334,14 +1335,9 @@ static int __net_init sctp_defaults_init(struct net *net)
/* Whether Cookie Preservative is enabled(1) or not(0) */
net->sctp.cookie_preserve_enable = 1;
- /* Default sctp sockets to use md5 as their hmac alg */
-#if defined (CONFIG_SCTP_DEFAULT_COOKIE_HMAC_MD5)
- net->sctp.sctp_hmac_alg = "md5";
-#elif defined (CONFIG_SCTP_DEFAULT_COOKIE_HMAC_SHA1)
- net->sctp.sctp_hmac_alg = "sha1";
-#else
- net->sctp.sctp_hmac_alg = NULL;
-#endif
+ /* Whether cookie authentication is enabled(1) or not(0) */
+ net->sctp.cookie_auth_enable =
+ !IS_ENABLED(CONFIG_SCTP_DEFAULT_COOKIE_HMAC_NONE);
/* Max.Burst - 4 */
net->sctp.max_burst = SCTP_DEFAULT_MAX_BURST;
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 3ead591c72fd..2c0017d058d4 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -30,7 +30,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <crypto/hash.h>
+#include <crypto/utils.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/ip.h>
@@ -1319,7 +1319,7 @@ struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc,
__u16 key_id)
{
struct sctp_authhdr auth_hdr;
- struct sctp_hmac *hmac_desc;
+ const struct sctp_hmac *hmac_desc;
struct sctp_chunk *retval;
/* Get the first hmac that the peer told us to use */
@@ -1674,8 +1674,10 @@ static struct sctp_cookie_param *sctp_pack_cookie(
* out on the network.
*/
retval = kzalloc(*cookie_len, GFP_ATOMIC);
- if (!retval)
- goto nodata;
+ if (!retval) {
+ *cookie_len = 0;
+ return NULL;
+ }
cookie = (struct sctp_signed_cookie *) retval->body;
@@ -1706,26 +1708,14 @@ static struct sctp_cookie_param *sctp_pack_cookie(
memcpy((__u8 *)(cookie + 1) +
ntohs(init_chunk->chunk_hdr->length), raw_addrs, addrs_len);
- if (sctp_sk(ep->base.sk)->hmac) {
- struct crypto_shash *tfm = sctp_sk(ep->base.sk)->hmac;
- int err;
-
- /* Sign the message. */
- err = crypto_shash_setkey(tfm, ep->secret_key,
- sizeof(ep->secret_key)) ?:
- crypto_shash_tfm_digest(tfm, (u8 *)&cookie->c, bodysize,
- cookie->signature);
- if (err)
- goto free_cookie;
+ /* Sign the cookie, if cookie authentication is enabled. */
+ if (sctp_sk(ep->base.sk)->cookie_auth_enable) {
+ static_assert(sizeof(cookie->mac) == SHA256_DIGEST_SIZE);
+ hmac_sha256(&ep->cookie_auth_key, (const u8 *)&cookie->c,
+ bodysize, cookie->mac);
}
return retval;
-
-free_cookie:
- kfree(retval);
-nodata:
- *cookie_len = 0;
- return NULL;
}
/* Unpack the cookie from COOKIE ECHO chunk, recreating the association. */
@@ -1740,7 +1730,6 @@ struct sctp_association *sctp_unpack_cookie(
struct sctp_signed_cookie *cookie;
struct sk_buff *skb = chunk->skb;
struct sctp_cookie *bear_cookie;
- __u8 *digest = ep->digest;
enum sctp_scope scope;
unsigned int len;
ktime_t kt;
@@ -1770,30 +1759,19 @@ struct sctp_association *sctp_unpack_cookie(
cookie = chunk->subh.cookie_hdr;
bear_cookie = &cookie->c;
- if (!sctp_sk(ep->base.sk)->hmac)
- goto no_hmac;
+ /* Verify the cookie's MAC, if cookie authentication is enabled. */
+ if (sctp_sk(ep->base.sk)->cookie_auth_enable) {
+ u8 mac[SHA256_DIGEST_SIZE];
- /* Check the signature. */
- {
- struct crypto_shash *tfm = sctp_sk(ep->base.sk)->hmac;
- int err;
-
- err = crypto_shash_setkey(tfm, ep->secret_key,
- sizeof(ep->secret_key)) ?:
- crypto_shash_tfm_digest(tfm, (u8 *)bear_cookie, bodysize,
- digest);
- if (err) {
- *error = -SCTP_IERROR_NOMEM;
+ hmac_sha256(&ep->cookie_auth_key, (const u8 *)bear_cookie,
+ bodysize, mac);
+ static_assert(sizeof(cookie->mac) == sizeof(mac));
+ if (crypto_memneq(mac, cookie->mac, sizeof(mac))) {
+ *error = -SCTP_IERROR_BAD_SIG;
goto fail;
}
}
- if (memcmp(digest, cookie->signature, SCTP_SIGNATURE_SIZE)) {
- *error = -SCTP_IERROR_BAD_SIG;
- goto fail;
- }
-
-no_hmac:
/* IG Section 2.35.2:
* 3) Compare the port numbers and the verification tag contained
* within the COOKIE ECHO chunk to the actual port numbers and the
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index a0524ba8d787..4cb8f393434d 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -30,6 +30,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <crypto/utils.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/ip.h>
@@ -4361,7 +4362,7 @@ static enum sctp_ierror sctp_sf_authenticate(
struct sctp_shared_key *sh_key = NULL;
struct sctp_authhdr *auth_hdr;
__u8 *save_digest, *digest;
- struct sctp_hmac *hmac;
+ const struct sctp_hmac *hmac;
unsigned int sig_len;
__u16 key_id;
@@ -4416,7 +4417,7 @@ static enum sctp_ierror sctp_sf_authenticate(
sh_key, GFP_ATOMIC);
/* Discard the packet if the digests do not match */
- if (memcmp(save_digest, digest, sig_len)) {
+ if (crypto_memneq(save_digest, digest, sig_len)) {
kfree(save_digest);
return SCTP_IERROR_BAD_SIG;
}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 4921416434f9..ed8293a34240 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -37,7 +37,6 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <crypto/hash.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/wait.h>
@@ -4987,7 +4986,7 @@ static int sctp_init_sock(struct sock *sk)
sp->default_rcv_context = 0;
sp->max_burst = net->sctp.max_burst;
- sp->sctp_hmac_alg = net->sctp.sctp_hmac_alg;
+ sp->cookie_auth_enable = net->sctp.cookie_auth_enable;
/* Initialize default setup parameters. These parameters
* can be modified with the SCTP_INITMSG socket option or
@@ -5079,8 +5078,6 @@ static int sctp_init_sock(struct sock *sk)
if (!sp->ep)
return -ENOMEM;
- sp->hmac = NULL;
-
sk->sk_destruct = sctp_destruct_sock;
SCTP_DBG_OBJCNT_INC(sock);
@@ -5117,18 +5114,8 @@ static void sctp_destroy_sock(struct sock *sk)
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
}
-/* Triggered when there are no references on the socket anymore */
-static void sctp_destruct_common(struct sock *sk)
-{
- struct sctp_sock *sp = sctp_sk(sk);
-
- /* Free up the HMAC transform. */
- crypto_free_shash(sp->hmac);
-}
-
static void sctp_destruct_sock(struct sock *sk)
{
- sctp_destruct_common(sk);
inet_sock_destruct(sk);
}
@@ -8530,22 +8517,8 @@ static int sctp_listen_start(struct sock *sk, int backlog)
{
struct sctp_sock *sp = sctp_sk(sk);
struct sctp_endpoint *ep = sp->ep;
- struct crypto_shash *tfm = NULL;
- char alg[32];
int err;
- /* Allocate HMAC for generating cookie. */
- if (!sp->hmac && sp->sctp_hmac_alg) {
- sprintf(alg, "hmac(%s)", sp->sctp_hmac_alg);
- tfm = crypto_alloc_shash(alg, 0, 0);
- if (IS_ERR(tfm)) {
- net_info_ratelimited("failed to load transform for %s: %ld\n",
- sp->sctp_hmac_alg, PTR_ERR(tfm));
- return -ENOSYS;
- }
- sctp_sk(sk)->hmac = tfm;
- }
-
/*
* If a bind() or sctp_bindx() is not called prior to a listen()
* call that allows new associations to be accepted, the system
@@ -9561,7 +9534,6 @@ static int sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
* copy.
*/
newsp->ep = newep;
- newsp->hmac = NULL;
/* Hook this new socket in to the bind_hash list. */
head = &sctp_port_hashtable[sctp_phashfn(sock_net(oldsk),
@@ -9581,16 +9553,6 @@ static int sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
if (err)
return err;
- /* New ep's auth_hmacs should be set if old ep's is set, in case
- * that net->sctp.auth_enable has been changed to 0 by users and
- * new ep's auth_hmacs couldn't be set in sctp_endpoint_init().
- */
- if (oldsp->ep->auth_hmacs) {
- err = sctp_auth_init_hmacs(newsp->ep, GFP_KERNEL);
- if (err)
- return err;
- }
-
sctp_auto_asconf_init(newsp);
/* Move any messages in the old socket's receive queue that are for the
@@ -9723,7 +9685,6 @@ struct proto sctp_prot = {
static void sctp_v6_destruct_sock(struct sock *sk)
{
- sctp_destruct_common(sk);
inet6_sock_destruct(sk);
}
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index ee3eac338a9d..15e7db9a3ab2 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -174,7 +174,7 @@ static struct ctl_table sctp_net_table[] = {
},
{
.procname = "cookie_hmac_alg",
- .data = &init_net.sctp.sctp_hmac_alg,
+ .data = &init_net.sctp.cookie_auth_enable,
.maxlen = 8,
.mode = 0644,
.proc_handler = proc_sctp_do_hmac_alg,
@@ -388,10 +388,8 @@ static int proc_sctp_do_hmac_alg(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct net *net = container_of(ctl->data, struct net,
- sctp.sctp_hmac_alg);
+ sctp.cookie_auth_enable);
struct ctl_table tbl;
- bool changed = false;
- char *none = "none";
char tmp[8] = {0};
int ret;
@@ -399,35 +397,26 @@ static int proc_sctp_do_hmac_alg(const struct ctl_table *ctl, int write,
if (write) {
tbl.data = tmp;
- tbl.maxlen = sizeof(tmp);
- } else {
- tbl.data = net->sctp.sctp_hmac_alg ? : none;
- tbl.maxlen = strlen(tbl.data);
- }
-
- ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
- if (write && ret == 0) {
-#ifdef CONFIG_CRYPTO_MD5
- if (!strncmp(tmp, "md5", 3)) {
- net->sctp.sctp_hmac_alg = "md5";
- changed = true;
+ tbl.maxlen = sizeof(tmp) - 1;
+ ret = proc_dostring(&tbl, 1, buffer, lenp, ppos);
+ if (ret)
+ return ret;
+ if (!strcmp(tmp, "sha256")) {
+ net->sctp.cookie_auth_enable = 1;
+ return 0;
}
-#endif
-#ifdef CONFIG_CRYPTO_SHA1
- if (!strncmp(tmp, "sha1", 4)) {
- net->sctp.sctp_hmac_alg = "sha1";
- changed = true;
+ if (!strcmp(tmp, "none")) {
+ net->sctp.cookie_auth_enable = 0;
+ return 0;
}
-#endif
- if (!strncmp(tmp, "none", 4)) {
- net->sctp.sctp_hmac_alg = NULL;
- changed = true;
- }
- if (!changed)
- ret = -EINVAL;
+ return -EINVAL;
}
-
- return ret;
+ if (net->sctp.cookie_auth_enable)
+ tbl.data = (char *)"sha256";
+ else
+ tbl.data = (char *)"none";
+ tbl.maxlen = strlen(tbl.data);
+ return proc_dostring(&tbl, 0, buffer, lenp, ppos);
}
static int proc_sctp_do_rto_min(const struct ctl_table *ctl, int write,
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index a42ef3f77b96..0052f02756eb 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -974,13 +974,17 @@ static int smc_ib_add_dev(struct ib_device *ibdev)
smcibdev->pnetid[i]))
smc_pnetid_by_table_ib(smcibdev, i + 1);
smc_copy_netdev_ifindex(smcibdev, i);
- pr_warn_ratelimited("smc: ib device %s port %d has pnetid "
- "%.16s%s\n",
- smcibdev->ibdev->name, i + 1,
- smcibdev->pnetid[i],
- smcibdev->pnetid_by_user[i] ?
- " (user defined)" :
- "");
+ if (smc_pnet_is_pnetid_set(smcibdev->pnetid[i]))
+ pr_warn_ratelimited("smc: ib device %s port %d has pnetid %.16s%s\n",
+ smcibdev->ibdev->name, i + 1,
+ smcibdev->pnetid[i],
+ smcibdev->pnetid_by_user[i] ?
+ " (user defined)" :
+ "");
+ else
+ pr_warn_ratelimited("smc: ib device %s port %d has no pnetid\n",
+ smcibdev->ibdev->name, i + 1);
+
}
schedule_work(&smcibdev->port_event_work);
return 0;
diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c
index 84f98e18c7db..a58ffb7a0610 100644
--- a/net/smc/smc_ism.c
+++ b/net/smc/smc_ism.c
@@ -518,10 +518,15 @@ static void smcd_register_dev(struct ism_dev *ism)
}
mutex_unlock(&smcd_dev_list.mutex);
- pr_warn_ratelimited("smc: adding smcd device %s with pnetid %.16s%s\n",
- dev_name(&ism->dev), smcd->pnetid,
- smcd->pnetid_by_user ? " (user defined)" : "");
-
+ if (smc_pnet_is_pnetid_set(smcd->pnetid))
+ pr_warn_ratelimited("smc: adding smcd device %s with pnetid %.16s%s\n",
+ dev_name(&ism->dev), smcd->pnetid,
+ smcd->pnetid_by_user ?
+ " (user defined)" :
+ "");
+ else
+ pr_warn_ratelimited("smc: adding smcd device %s without pnetid\n",
+ dev_name(&ism->dev));
return;
}
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 76ad29e31d60..b90337f86e83 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -450,7 +450,7 @@ static int smc_pnet_add_ib(struct smc_pnettable *pnettable, char *ib_name,
return -ENOMEM;
new_pe->type = SMC_PNET_IB;
memcpy(new_pe->pnet_name, pnet_name, SMC_MAX_PNETID_LEN);
- strncpy(new_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX);
+ strscpy(new_pe->ib_name, ib_name);
new_pe->ib_port = ib_port;
new_ibdev = true;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index e028bf658499..1574a83384f8 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2366,7 +2366,7 @@ static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb,
else if (sk_rmem_alloc_get(sk) + skb->truesize >= limit) {
trace_tipc_sk_dump(sk, skb, TIPC_DUMP_ALL,
"err_overload2!");
- atomic_inc(&sk->sk_drops);
+ sk_drops_inc(sk);
err = TIPC_ERR_OVERLOAD;
}
@@ -2458,7 +2458,7 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
trace_tipc_sk_dump(sk, skb, TIPC_DUMP_ALL, "err_overload!");
/* Overload => reject message back to sender */
onode = tipc_own_addr(sock_net(sk));
- atomic_inc(&sk->sk_drops);
+ sk_drops_inc(sk);
if (tipc_msg_reverse(onode, &skb, TIPC_ERR_OVERLOAD)) {
trace_tipc_sk_rej_msg(sk, skb, TIPC_DUMP_ALL,
"@sk_enqueue!");
@@ -3657,7 +3657,7 @@ int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct netlink_callback *cb,
nla_put_u32(skb, TIPC_NLA_SOCK_STAT_SENDQ,
skb_queue_len(&sk->sk_write_queue)) ||
nla_put_u32(skb, TIPC_NLA_SOCK_STAT_DROP,
- atomic_read(&sk->sk_drops)))
+ sk_drops_read(sk)))
goto stat_msg_cancel;
if (tsk->cong_link_cnt &&
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index bebb355f3ffe..0538948d5fd9 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1029,12 +1029,7 @@ static int vsock_getname(struct socket *sock,
vm_addr = &vsk->local_addr;
}
- /* sys_getsockname() and sys_getpeername() pass us a
- * MAX_SOCK_ADDR-sized buffer and don't set addr_len. Unfortunately
- * that macro is defined in socket.c instead of .h, so we hardcode its
- * value here.
- */
- BUILD_BUG_ON(sizeof(*vm_addr) > 128);
+ BUILD_BUG_ON(sizeof(*vm_addr) > sizeof(struct sockaddr_storage));
memcpy(addr, vm_addr, sizeof(*vm_addr));
err = sizeof(*vm_addr);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index c5035a9bc3bb..62486f866975 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2594,7 +2594,7 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl,
static dscp_t xfrm_get_dscp(const struct flowi *fl, int family)
{
if (family == AF_INET)
- return inet_dsfield_to_dscp(fl->u.ip4.flowi4_tos);
+ return fl->u.ip4.flowi4_dscp;
return 0;
}
@@ -3462,7 +3462,7 @@ decode_session4(const struct xfrm_flow_keys *flkeys, struct flowi *fl, bool reve
}
fl4->flowi4_proto = flkeys->basic.ip_proto;
- fl4->flowi4_tos = flkeys->ip.tos & ~INET_ECN_MASK;
+ fl4->flowi4_dscp = inet_dsfield_to_dscp(flkeys->ip.tos);
}
#if IS_ENABLED(CONFIG_IPV6)
@@ -3594,7 +3594,7 @@ static bool xfrm_icmp_flow_decode(struct sk_buff *skb, unsigned short family,
fl1->flowi_oif = fl->flowi_oif;
fl1->flowi_mark = fl->flowi_mark;
- fl1->flowi_tos = fl->flowi_tos;
+ fl1->flowi_dscp = fl->flowi_dscp;
nf_nat_decode_session(newskb, fl1, family);
ret = false;
@@ -3881,12 +3881,18 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
}
skb_dst_force(skb);
- if (!skb_dst(skb)) {
+ dst = skb_dst(skb);
+ if (!dst) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR);
return 0;
}
- dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, XFRM_LOOKUP_QUEUE);
+ /* ignore return value from skb_dstref_steal, xfrm_lookup takes
+ * care of dropping the refcnt if needed.
+ */
+ skb_dstref_steal(skb);
+
+ dst = xfrm_lookup(net, dst, &fl, NULL, XFRM_LOOKUP_QUEUE);
if (IS_ERR(dst)) {
res = 0;
dst = NULL;
diff --git a/rust/kernel/net/phy.rs b/rust/kernel/net/phy.rs
index 7de5cc7a0eee..c895582cd624 100644
--- a/rust/kernel/net/phy.rs
+++ b/rust/kernel/net/phy.rs
@@ -196,11 +196,8 @@ impl Device {
// SAFETY: `phydev` is pointing to a valid object by the type invariant of `Self`.
// So it's just an FFI call.
let ret = unsafe { bindings::phy_read_paged(phydev, page.into(), regnum.into()) };
- if ret < 0 {
- Err(Error::from_errno(ret))
- } else {
- Ok(ret as u16)
- }
+
+ to_result(ret).map(|()| ret as u16)
}
/// Resolves the advertisements into PHY settings.
diff --git a/scripts/headers_install.sh b/scripts/headers_install.sh
index 6bbccb43f7e7..4c20c62c4faf 100755
--- a/scripts/headers_install.sh
+++ b/scripts/headers_install.sh
@@ -32,7 +32,7 @@ fi
sed -E -e '
s/([[:space:](])(__user|__force|__iomem)[[:space:]]/\1/g
s/__attribute_const__([[:space:]]|$)/\1/g
- s@^#include <linux/compiler(|_types).h>@@
+ s@^#include <linux/compiler.h>@@
s/(^|[^a-zA-Z0-9])__packed([^a-zA-Z0-9_]|$)/\1__attribute__((packed))\2/g
s/(^|[[:space:](])(inline|asm|volatile)([[:space:](]|$)/\1__\2__\3/g
s@#(ifndef|define|endif[[:space:]]*/[*])[[:space:]]*_UAPI@#\1 @
diff --git a/tools/net/ynl/pyynl/lib/__init__.py b/tools/net/ynl/pyynl/lib/__init__.py
index 71518b9842ee..5f266ebe4526 100644
--- a/tools/net/ynl/pyynl/lib/__init__.py
+++ b/tools/net/ynl/pyynl/lib/__init__.py
@@ -4,6 +4,8 @@ from .nlspec import SpecAttr, SpecAttrSet, SpecEnumEntry, SpecEnumSet, \
SpecFamily, SpecOperation, SpecSubMessage, SpecSubMessageFormat
from .ynl import YnlFamily, Netlink, NlError
+from .doc_generator import YnlDocGenerator
+
__all__ = ["SpecAttr", "SpecAttrSet", "SpecEnumEntry", "SpecEnumSet",
"SpecFamily", "SpecOperation", "SpecSubMessage", "SpecSubMessageFormat",
"YnlFamily", "Netlink", "NlError"]
diff --git a/tools/net/ynl/pyynl/lib/doc_generator.py b/tools/net/ynl/pyynl/lib/doc_generator.py
new file mode 100644
index 000000000000..403abf1a2eda
--- /dev/null
+++ b/tools/net/ynl/pyynl/lib/doc_generator.py
@@ -0,0 +1,398 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# -*- coding: utf-8; mode: python -*-
+
+"""
+ Class to auto generate the documentation for Netlink specifications.
+
+ :copyright: Copyright (C) 2023 Breno Leitao <leitao@debian.org>
+ :license: GPL Version 2, June 1991 see linux/COPYING for details.
+
+ This class performs extensive parsing to the Linux kernel's netlink YAML
+ spec files, in an effort to avoid needing to heavily mark up the original
+ YAML file.
+
+ This code is split in two classes:
+ 1) RST formatters: Use to convert a string to a RST output
+ 2) YAML Netlink (YNL) doc generator: Generate docs from YAML data
+"""
+
+from typing import Any, Dict, List
+import yaml
+
+LINE_STR = '__lineno__'
+
+class NumberedSafeLoader(yaml.SafeLoader): # pylint: disable=R0901
+ """Override the SafeLoader class to add line number to parsed data"""
+
+ def construct_mapping(self, node, *args, **kwargs):
+ mapping = super().construct_mapping(node, *args, **kwargs)
+ mapping[LINE_STR] = node.start_mark.line
+
+ return mapping
+
+class RstFormatters:
+ """RST Formatters"""
+
+ SPACE_PER_LEVEL = 4
+
+ @staticmethod
+ def headroom(level: int) -> str:
+ """Return space to format"""
+ return " " * (level * RstFormatters.SPACE_PER_LEVEL)
+
+ @staticmethod
+ def bold(text: str) -> str:
+ """Format bold text"""
+ return f"**{text}**"
+
+ @staticmethod
+ def inline(text: str) -> str:
+ """Format inline text"""
+ return f"``{text}``"
+
+ @staticmethod
+ def sanitize(text: str) -> str:
+ """Remove newlines and multiple spaces"""
+ # This is useful for some fields that are spread across multiple lines
+ return str(text).replace("\n", " ").strip()
+
+ def rst_fields(self, key: str, value: str, level: int = 0) -> str:
+ """Return a RST formatted field"""
+ return self.headroom(level) + f":{key}: {value}"
+
+ def rst_definition(self, key: str, value: Any, level: int = 0) -> str:
+ """Format a single rst definition"""
+ return self.headroom(level) + key + "\n" + self.headroom(level + 1) + str(value)
+
+ def rst_paragraph(self, paragraph: str, level: int = 0) -> str:
+ """Return a formatted paragraph"""
+ return self.headroom(level) + paragraph
+
+ def rst_bullet(self, item: str, level: int = 0) -> str:
+ """Return a formatted a bullet"""
+ return self.headroom(level) + f"- {item}"
+
+ @staticmethod
+ def rst_subsection(title: str) -> str:
+ """Add a sub-section to the document"""
+ return f"{title}\n" + "-" * len(title)
+
+ @staticmethod
+ def rst_subsubsection(title: str) -> str:
+ """Add a sub-sub-section to the document"""
+ return f"{title}\n" + "~" * len(title)
+
+ @staticmethod
+ def rst_section(namespace: str, prefix: str, title: str) -> str:
+ """Add a section to the document"""
+ return f".. _{namespace}-{prefix}-{title}:\n\n{title}\n" + "=" * len(title)
+
+ @staticmethod
+ def rst_subtitle(title: str) -> str:
+ """Add a subtitle to the document"""
+ return "\n" + "-" * len(title) + f"\n{title}\n" + "-" * len(title) + "\n\n"
+
+ @staticmethod
+ def rst_title(title: str) -> str:
+ """Add a title to the document"""
+ return "=" * len(title) + f"\n{title}\n" + "=" * len(title) + "\n\n"
+
+ def rst_list_inline(self, list_: List[str], level: int = 0) -> str:
+ """Format a list using inlines"""
+ return self.headroom(level) + "[" + ", ".join(self.inline(i) for i in list_) + "]"
+
+ @staticmethod
+ def rst_ref(namespace: str, prefix: str, name: str) -> str:
+ """Add a hyperlink to the document"""
+ mappings = {'enum': 'definition',
+ 'fixed-header': 'definition',
+ 'nested-attributes': 'attribute-set',
+ 'struct': 'definition'}
+ if prefix in mappings:
+ prefix = mappings[prefix]
+ return f":ref:`{namespace}-{prefix}-{name}`"
+
+ def rst_header(self) -> str:
+ """The headers for all the auto generated RST files"""
+ lines = []
+
+ lines.append(self.rst_paragraph(".. SPDX-License-Identifier: GPL-2.0"))
+ lines.append(self.rst_paragraph(".. NOTE: This document was auto-generated.\n\n"))
+
+ return "\n".join(lines)
+
+ @staticmethod
+ def rst_toctree(maxdepth: int = 2) -> str:
+ """Generate a toctree RST primitive"""
+ lines = []
+
+ lines.append(".. toctree::")
+ lines.append(f" :maxdepth: {maxdepth}\n\n")
+
+ return "\n".join(lines)
+
+ @staticmethod
+ def rst_label(title: str) -> str:
+ """Return a formatted label"""
+ return f".. _{title}:\n\n"
+
+ @staticmethod
+ def rst_lineno(lineno: int) -> str:
+ """Return a lineno comment"""
+ return f".. LINENO {lineno}\n"
+
+class YnlDocGenerator:
+ """YAML Netlink specs Parser"""
+
+ fmt = RstFormatters()
+
+ def parse_mcast_group(self, mcast_group: List[Dict[str, Any]]) -> str:
+ """Parse 'multicast' group list and return a formatted string"""
+ lines = []
+ for group in mcast_group:
+ lines.append(self.fmt.rst_bullet(group["name"]))
+
+ return "\n".join(lines)
+
+ def parse_do(self, do_dict: Dict[str, Any], level: int = 0) -> str:
+ """Parse 'do' section and return a formatted string"""
+ lines = []
+ if LINE_STR in do_dict:
+ lines.append(self.fmt.rst_lineno(do_dict[LINE_STR]))
+
+ for key in do_dict.keys():
+ if key == LINE_STR:
+ continue
+ lines.append(self.fmt.rst_paragraph(self.fmt.bold(key), level + 1))
+ if key in ['request', 'reply']:
+ lines.append(self.parse_do_attributes(do_dict[key], level + 1) + "\n")
+ else:
+ lines.append(self.fmt.headroom(level + 2) + do_dict[key] + "\n")
+
+ return "\n".join(lines)
+
+ def parse_do_attributes(self, attrs: Dict[str, Any], level: int = 0) -> str:
+ """Parse 'attributes' section"""
+ if "attributes" not in attrs:
+ return ""
+ lines = [self.fmt.rst_fields("attributes",
+ self.fmt.rst_list_inline(attrs["attributes"]),
+ level + 1)]
+
+ return "\n".join(lines)
+
+ def parse_operations(self, operations: List[Dict[str, Any]], namespace: str) -> str:
+ """Parse operations block"""
+ preprocessed = ["name", "doc", "title", "do", "dump", "flags"]
+ linkable = ["fixed-header", "attribute-set"]
+ lines = []
+
+ for operation in operations:
+ if LINE_STR in operation:
+ lines.append(self.fmt.rst_lineno(operation[LINE_STR]))
+
+ lines.append(self.fmt.rst_section(namespace, 'operation',
+ operation["name"]))
+ lines.append(self.fmt.rst_paragraph(operation["doc"]) + "\n")
+
+ for key in operation.keys():
+ if key == LINE_STR:
+ continue
+
+ if key in preprocessed:
+ # Skip the special fields
+ continue
+ value = operation[key]
+ if key in linkable:
+ value = self.fmt.rst_ref(namespace, key, value)
+ lines.append(self.fmt.rst_fields(key, value, 0))
+ if 'flags' in operation:
+ lines.append(self.fmt.rst_fields('flags',
+ self.fmt.rst_list_inline(operation['flags'])))
+
+ if "do" in operation:
+ lines.append(self.fmt.rst_paragraph(":do:", 0))
+ lines.append(self.parse_do(operation["do"], 0))
+ if "dump" in operation:
+ lines.append(self.fmt.rst_paragraph(":dump:", 0))
+ lines.append(self.parse_do(operation["dump"], 0))
+
+ # New line after fields
+ lines.append("\n")
+
+ return "\n".join(lines)
+
+ def parse_entries(self, entries: List[Dict[str, Any]], level: int) -> str:
+ """Parse a list of entries"""
+ ignored = ["pad"]
+ lines = []
+ for entry in entries:
+ if isinstance(entry, dict):
+ # entries could be a list or a dictionary
+ field_name = entry.get("name", "")
+ if field_name in ignored:
+ continue
+ type_ = entry.get("type")
+ if type_:
+ field_name += f" ({self.fmt.inline(type_)})"
+ lines.append(
+ self.fmt.rst_fields(field_name,
+ self.fmt.sanitize(entry.get("doc", "")),
+ level)
+ )
+ elif isinstance(entry, list):
+ lines.append(self.fmt.rst_list_inline(entry, level))
+ else:
+ lines.append(self.fmt.rst_bullet(self.fmt.inline(self.fmt.sanitize(entry)),
+ level))
+
+ lines.append("\n")
+ return "\n".join(lines)
+
+ def parse_definitions(self, defs: Dict[str, Any], namespace: str) -> str:
+ """Parse definitions section"""
+ preprocessed = ["name", "entries", "members"]
+ ignored = ["render-max"] # This is not printed
+ lines = []
+
+ for definition in defs:
+ if LINE_STR in definition:
+ lines.append(self.fmt.rst_lineno(definition[LINE_STR]))
+
+ lines.append(self.fmt.rst_section(namespace, 'definition', definition["name"]))
+ for k in definition.keys():
+ if k == LINE_STR:
+ continue
+ if k in preprocessed + ignored:
+ continue
+ lines.append(self.fmt.rst_fields(k, self.fmt.sanitize(definition[k]), 0))
+
+ # Field list needs to finish with a new line
+ lines.append("\n")
+ if "entries" in definition:
+ lines.append(self.fmt.rst_paragraph(":entries:", 0))
+ lines.append(self.parse_entries(definition["entries"], 1))
+ if "members" in definition:
+ lines.append(self.fmt.rst_paragraph(":members:", 0))
+ lines.append(self.parse_entries(definition["members"], 1))
+
+ return "\n".join(lines)
+
+ def parse_attr_sets(self, entries: List[Dict[str, Any]], namespace: str) -> str:
+ """Parse attribute from attribute-set"""
+ preprocessed = ["name", "type"]
+ linkable = ["enum", "nested-attributes", "struct", "sub-message"]
+ ignored = ["checks"]
+ lines = []
+
+ for entry in entries:
+ lines.append(self.fmt.rst_section(namespace, 'attribute-set',
+ entry["name"]))
+ for attr in entry["attributes"]:
+ if LINE_STR in attr:
+ lines.append(self.fmt.rst_lineno(attr[LINE_STR]))
+
+ type_ = attr.get("type")
+ attr_line = attr["name"]
+ if type_:
+ # Add the attribute type in the same line
+ attr_line += f" ({self.fmt.inline(type_)})"
+
+ lines.append(self.fmt.rst_subsubsection(attr_line))
+
+ for k in attr.keys():
+ if k == LINE_STR:
+ continue
+ if k in preprocessed + ignored:
+ continue
+ if k in linkable:
+ value = self.fmt.rst_ref(namespace, k, attr[k])
+ else:
+ value = self.fmt.sanitize(attr[k])
+ lines.append(self.fmt.rst_fields(k, value, 0))
+ lines.append("\n")
+
+ return "\n".join(lines)
+
+ def parse_sub_messages(self, entries: List[Dict[str, Any]], namespace: str) -> str:
+ """Parse sub-message definitions"""
+ lines = []
+
+ for entry in entries:
+ lines.append(self.fmt.rst_section(namespace, 'sub-message',
+ entry["name"]))
+ for fmt in entry["formats"]:
+ value = fmt["value"]
+
+ lines.append(self.fmt.rst_bullet(self.fmt.bold(value)))
+ for attr in ['fixed-header', 'attribute-set']:
+ if attr in fmt:
+ lines.append(self.fmt.rst_fields(attr,
+ self.fmt.rst_ref(namespace,
+ attr,
+ fmt[attr]),
+ 1))
+ lines.append("\n")
+
+ return "\n".join(lines)
+
+ def parse_yaml(self, obj: Dict[str, Any]) -> str:
+ """Format the whole YAML into a RST string"""
+ lines = []
+
+ # Main header
+ lineno = obj.get('__lineno__', 0)
+ lines.append(self.fmt.rst_lineno(lineno))
+
+ family = obj['name']
+
+ lines.append(self.fmt.rst_header())
+ lines.append(self.fmt.rst_label("netlink-" + family))
+
+ title = f"Family ``{family}`` netlink specification"
+ lines.append(self.fmt.rst_title(title))
+ lines.append(self.fmt.rst_paragraph(".. contents:: :depth: 3\n"))
+
+ if "doc" in obj:
+ lines.append(self.fmt.rst_subtitle("Summary"))
+ lines.append(self.fmt.rst_paragraph(obj["doc"], 0))
+
+ # Operations
+ if "operations" in obj:
+ lines.append(self.fmt.rst_subtitle("Operations"))
+ lines.append(self.parse_operations(obj["operations"]["list"],
+ family))
+
+ # Multicast groups
+ if "mcast-groups" in obj:
+ lines.append(self.fmt.rst_subtitle("Multicast groups"))
+ lines.append(self.parse_mcast_group(obj["mcast-groups"]["list"]))
+
+ # Definitions
+ if "definitions" in obj:
+ lines.append(self.fmt.rst_subtitle("Definitions"))
+ lines.append(self.parse_definitions(obj["definitions"], family))
+
+ # Attributes set
+ if "attribute-sets" in obj:
+ lines.append(self.fmt.rst_subtitle("Attribute sets"))
+ lines.append(self.parse_attr_sets(obj["attribute-sets"], family))
+
+ # Sub-messages
+ if "sub-messages" in obj:
+ lines.append(self.fmt.rst_subtitle("Sub-messages"))
+ lines.append(self.parse_sub_messages(obj["sub-messages"], family))
+
+ return "\n".join(lines)
+
+ # Main functions
+ # ==============
+
+ def parse_yaml_file(self, filename: str) -> str:
+ """Transform the YAML specified by filename into an RST-formatted string"""
+ with open(filename, "r", encoding="utf-8") as spec_file:
+ numbered_yaml = yaml.load(spec_file, Loader=NumberedSafeLoader)
+ content = self.parse_yaml(numbered_yaml)
+
+ return content
diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py
index eb295756c3bf..fb7e03805a11 100755
--- a/tools/net/ynl/pyynl/ynl_gen_c.py
+++ b/tools/net/ynl/pyynl/ynl_gen_c.py
@@ -556,7 +556,7 @@ class TypeBinary(Type):
elif 'exact-len' in self.checks:
mem = 'NLA_POLICY_EXACT_LEN(' + self.get_limit_str('exact-len') + ')'
elif 'min-len' in self.checks:
- mem = '{ .len = ' + self.get_limit_str('min-len') + ', }'
+ mem = 'NLA_POLICY_MIN_LEN(' + self.get_limit_str('min-len') + ')'
elif 'max-len' in self.checks:
mem = 'NLA_POLICY_MAX_LEN(' + self.get_limit_str('max-len') + ')'
diff --git a/tools/net/ynl/pyynl/ynl_gen_rst.py b/tools/net/ynl/pyynl/ynl_gen_rst.py
index 0cb6348e28d3..90ae19aac89d 100755
--- a/tools/net/ynl/pyynl/ynl_gen_rst.py
+++ b/tools/net/ynl/pyynl/ynl_gen_rst.py
@@ -10,353 +10,17 @@
This script performs extensive parsing to the Linux kernel's netlink YAML
spec files, in an effort to avoid needing to heavily mark up the original
- YAML file.
-
- This code is split in three big parts:
- 1) RST formatters: Use to convert a string to a RST output
- 2) Parser helpers: Functions to parse the YAML data structure
- 3) Main function and small helpers
+ YAML file. It uses the library code from scripts/lib.
"""
-from typing import Any, Dict, List
import os.path
+import pathlib
import sys
import argparse
import logging
-import yaml
-
-
-SPACE_PER_LEVEL = 4
-
-
-# RST Formatters
-# ==============
-def headroom(level: int) -> str:
- """Return space to format"""
- return " " * (level * SPACE_PER_LEVEL)
-
-
-def bold(text: str) -> str:
- """Format bold text"""
- return f"**{text}**"
-
-
-def inline(text: str) -> str:
- """Format inline text"""
- return f"``{text}``"
-
-
-def sanitize(text: str) -> str:
- """Remove newlines and multiple spaces"""
- # This is useful for some fields that are spread across multiple lines
- return str(text).replace("\n", " ").strip()
-
-
-def rst_fields(key: str, value: str, level: int = 0) -> str:
- """Return a RST formatted field"""
- return headroom(level) + f":{key}: {value}"
-
-
-def rst_definition(key: str, value: Any, level: int = 0) -> str:
- """Format a single rst definition"""
- return headroom(level) + key + "\n" + headroom(level + 1) + str(value)
-
-
-def rst_paragraph(paragraph: str, level: int = 0) -> str:
- """Return a formatted paragraph"""
- return headroom(level) + paragraph
-
-
-def rst_bullet(item: str, level: int = 0) -> str:
- """Return a formatted a bullet"""
- return headroom(level) + f"- {item}"
-
-
-def rst_subsection(title: str) -> str:
- """Add a sub-section to the document"""
- return f"{title}\n" + "-" * len(title)
-
-
-def rst_subsubsection(title: str) -> str:
- """Add a sub-sub-section to the document"""
- return f"{title}\n" + "~" * len(title)
-
-
-def rst_section(namespace: str, prefix: str, title: str) -> str:
- """Add a section to the document"""
- return f".. _{namespace}-{prefix}-{title}:\n\n{title}\n" + "=" * len(title)
-
-
-def rst_subtitle(title: str) -> str:
- """Add a subtitle to the document"""
- return "\n" + "-" * len(title) + f"\n{title}\n" + "-" * len(title) + "\n\n"
-
-
-def rst_title(title: str) -> str:
- """Add a title to the document"""
- return "=" * len(title) + f"\n{title}\n" + "=" * len(title) + "\n\n"
-
-
-def rst_list_inline(list_: List[str], level: int = 0) -> str:
- """Format a list using inlines"""
- return headroom(level) + "[" + ", ".join(inline(i) for i in list_) + "]"
-
-
-def rst_ref(namespace: str, prefix: str, name: str) -> str:
- """Add a hyperlink to the document"""
- mappings = {'enum': 'definition',
- 'fixed-header': 'definition',
- 'nested-attributes': 'attribute-set',
- 'struct': 'definition'}
- if prefix in mappings:
- prefix = mappings[prefix]
- return f":ref:`{namespace}-{prefix}-{name}`"
-
-
-def rst_header() -> str:
- """The headers for all the auto generated RST files"""
- lines = []
-
- lines.append(rst_paragraph(".. SPDX-License-Identifier: GPL-2.0"))
- lines.append(rst_paragraph(".. NOTE: This document was auto-generated.\n\n"))
-
- return "\n".join(lines)
-
-
-def rst_toctree(maxdepth: int = 2) -> str:
- """Generate a toctree RST primitive"""
- lines = []
-
- lines.append(".. toctree::")
- lines.append(f" :maxdepth: {maxdepth}\n\n")
-
- return "\n".join(lines)
-
-
-def rst_label(title: str) -> str:
- """Return a formatted label"""
- return f".. _{title}:\n\n"
-
-
-# Parsers
-# =======
-
-
-def parse_mcast_group(mcast_group: List[Dict[str, Any]]) -> str:
- """Parse 'multicast' group list and return a formatted string"""
- lines = []
- for group in mcast_group:
- lines.append(rst_bullet(group["name"]))
-
- return "\n".join(lines)
-
-
-def parse_do(do_dict: Dict[str, Any], level: int = 0) -> str:
- """Parse 'do' section and return a formatted string"""
- lines = []
- for key in do_dict.keys():
- lines.append(rst_paragraph(bold(key), level + 1))
- if key in ['request', 'reply']:
- lines.append(parse_do_attributes(do_dict[key], level + 1) + "\n")
- else:
- lines.append(headroom(level + 2) + do_dict[key] + "\n")
-
- return "\n".join(lines)
-
-
-def parse_do_attributes(attrs: Dict[str, Any], level: int = 0) -> str:
- """Parse 'attributes' section"""
- if "attributes" not in attrs:
- return ""
- lines = [rst_fields("attributes", rst_list_inline(attrs["attributes"]), level + 1)]
-
- return "\n".join(lines)
-
-
-def parse_operations(operations: List[Dict[str, Any]], namespace: str) -> str:
- """Parse operations block"""
- preprocessed = ["name", "doc", "title", "do", "dump", "flags"]
- linkable = ["fixed-header", "attribute-set"]
- lines = []
-
- for operation in operations:
- lines.append(rst_section(namespace, 'operation', operation["name"]))
- lines.append(rst_paragraph(operation["doc"]) + "\n")
-
- for key in operation.keys():
- if key in preprocessed:
- # Skip the special fields
- continue
- value = operation[key]
- if key in linkable:
- value = rst_ref(namespace, key, value)
- lines.append(rst_fields(key, value, 0))
- if 'flags' in operation:
- lines.append(rst_fields('flags', rst_list_inline(operation['flags'])))
-
- if "do" in operation:
- lines.append(rst_paragraph(":do:", 0))
- lines.append(parse_do(operation["do"], 0))
- if "dump" in operation:
- lines.append(rst_paragraph(":dump:", 0))
- lines.append(parse_do(operation["dump"], 0))
-
- # New line after fields
- lines.append("\n")
-
- return "\n".join(lines)
-
-
-def parse_entries(entries: List[Dict[str, Any]], level: int) -> str:
- """Parse a list of entries"""
- ignored = ["pad"]
- lines = []
- for entry in entries:
- if isinstance(entry, dict):
- # entries could be a list or a dictionary
- field_name = entry.get("name", "")
- if field_name in ignored:
- continue
- type_ = entry.get("type")
- if type_:
- field_name += f" ({inline(type_)})"
- lines.append(
- rst_fields(field_name, sanitize(entry.get("doc", "")), level)
- )
- elif isinstance(entry, list):
- lines.append(rst_list_inline(entry, level))
- else:
- lines.append(rst_bullet(inline(sanitize(entry)), level))
-
- lines.append("\n")
- return "\n".join(lines)
-
-
-def parse_definitions(defs: Dict[str, Any], namespace: str) -> str:
- """Parse definitions section"""
- preprocessed = ["name", "entries", "members"]
- ignored = ["render-max"] # This is not printed
- lines = []
-
- for definition in defs:
- lines.append(rst_section(namespace, 'definition', definition["name"]))
- for k in definition.keys():
- if k in preprocessed + ignored:
- continue
- lines.append(rst_fields(k, sanitize(definition[k]), 0))
-
- # Field list needs to finish with a new line
- lines.append("\n")
- if "entries" in definition:
- lines.append(rst_paragraph(":entries:", 0))
- lines.append(parse_entries(definition["entries"], 1))
- if "members" in definition:
- lines.append(rst_paragraph(":members:", 0))
- lines.append(parse_entries(definition["members"], 1))
-
- return "\n".join(lines)
-
-
-def parse_attr_sets(entries: List[Dict[str, Any]], namespace: str) -> str:
- """Parse attribute from attribute-set"""
- preprocessed = ["name", "type"]
- linkable = ["enum", "nested-attributes", "struct", "sub-message"]
- ignored = ["checks"]
- lines = []
-
- for entry in entries:
- lines.append(rst_section(namespace, 'attribute-set', entry["name"]))
- for attr in entry["attributes"]:
- type_ = attr.get("type")
- attr_line = attr["name"]
- if type_:
- # Add the attribute type in the same line
- attr_line += f" ({inline(type_)})"
-
- lines.append(rst_subsubsection(attr_line))
-
- for k in attr.keys():
- if k in preprocessed + ignored:
- continue
- if k in linkable:
- value = rst_ref(namespace, k, attr[k])
- else:
- value = sanitize(attr[k])
- lines.append(rst_fields(k, value, 0))
- lines.append("\n")
-
- return "\n".join(lines)
-
-
-def parse_sub_messages(entries: List[Dict[str, Any]], namespace: str) -> str:
- """Parse sub-message definitions"""
- lines = []
-
- for entry in entries:
- lines.append(rst_section(namespace, 'sub-message', entry["name"]))
- for fmt in entry["formats"]:
- value = fmt["value"]
-
- lines.append(rst_bullet(bold(value)))
- for attr in ['fixed-header', 'attribute-set']:
- if attr in fmt:
- lines.append(rst_fields(attr,
- rst_ref(namespace, attr, fmt[attr]),
- 1))
- lines.append("\n")
-
- return "\n".join(lines)
-
-
-def parse_yaml(obj: Dict[str, Any]) -> str:
- """Format the whole YAML into a RST string"""
- lines = []
-
- # Main header
-
- lines.append(rst_header())
-
- family = obj['name']
-
- title = f"Family ``{family}`` netlink specification"
- lines.append(rst_title(title))
- lines.append(rst_paragraph(".. contents:: :depth: 3\n"))
-
- if "doc" in obj:
- lines.append(rst_subtitle("Summary"))
- lines.append(rst_paragraph(obj["doc"], 0))
-
- # Operations
- if "operations" in obj:
- lines.append(rst_subtitle("Operations"))
- lines.append(parse_operations(obj["operations"]["list"], family))
-
- # Multicast groups
- if "mcast-groups" in obj:
- lines.append(rst_subtitle("Multicast groups"))
- lines.append(parse_mcast_group(obj["mcast-groups"]["list"]))
-
- # Definitions
- if "definitions" in obj:
- lines.append(rst_subtitle("Definitions"))
- lines.append(parse_definitions(obj["definitions"], family))
-
- # Attributes set
- if "attribute-sets" in obj:
- lines.append(rst_subtitle("Attribute sets"))
- lines.append(parse_attr_sets(obj["attribute-sets"], family))
-
- # Sub-messages
- if "sub-messages" in obj:
- lines.append(rst_subtitle("Sub-messages"))
- lines.append(parse_sub_messages(obj["sub-messages"], family))
-
- return "\n".join(lines)
-
-
-# Main functions
-# ==============
+sys.path.append(pathlib.Path(__file__).resolve().parent.as_posix())
+from lib import YnlDocGenerator # pylint: disable=C0413
def parse_arguments() -> argparse.Namespace:
"""Parse arguments from user"""
@@ -367,9 +31,6 @@ def parse_arguments() -> argparse.Namespace:
# Index and input are mutually exclusive
group = parser.add_mutually_exclusive_group()
- group.add_argument(
- "-x", "--index", action="store_true", help="Generate the index page"
- )
group.add_argument("-i", "--input", help="YAML file name")
args = parser.parse_args()
@@ -391,15 +52,6 @@ def parse_arguments() -> argparse.Namespace:
return args
-def parse_yaml_file(filename: str) -> str:
- """Transform the YAML specified by filename into an RST-formatted string"""
- with open(filename, "r", encoding="utf-8") as spec_file:
- yaml_data = yaml.safe_load(spec_file)
- content = parse_yaml(yaml_data)
-
- return content
-
-
def write_to_rstfile(content: str, filename: str) -> None:
"""Write the generated content into an RST file"""
logging.debug("Saving RST file to %s", filename)
@@ -408,35 +60,17 @@ def write_to_rstfile(content: str, filename: str) -> None:
rst_file.write(content)
-def generate_main_index_rst(output: str) -> None:
- """Generate the `networking_spec/index` content and write to the file"""
- lines = []
-
- lines.append(rst_header())
- lines.append(rst_label("specs"))
- lines.append(rst_title("Netlink Family Specifications"))
- lines.append(rst_toctree(1))
-
- index_dir = os.path.dirname(output)
- logging.debug("Looking for .rst files in %s", index_dir)
- for filename in sorted(os.listdir(index_dir)):
- if not filename.endswith(".rst") or filename == "index.rst":
- continue
- lines.append(f" {filename.replace('.rst', '')}\n")
-
- logging.debug("Writing an index file at %s", output)
- write_to_rstfile("".join(lines), output)
-
-
def main() -> None:
"""Main function that reads the YAML files and generates the RST files"""
args = parse_arguments()
+ parser = YnlDocGenerator()
+
if args.input:
logging.debug("Parsing %s", args.input)
try:
- content = parse_yaml_file(os.path.join(args.input))
+ content = parser.parse_yaml_file(os.path.join(args.input))
except Exception as exception:
logging.warning("Failed to parse %s.", args.input)
logging.warning(exception)
@@ -444,10 +78,6 @@ def main() -> None:
write_to_rstfile(content, args.output)
- if args.index:
- # Generate the index RST file
- generate_main_index_rst(args.output)
-
if __name__ == "__main__":
main()
diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h
index 9386dfe8b884..794d44d19c88 100644
--- a/tools/testing/selftests/bpf/bpf_kfuncs.h
+++ b/tools/testing/selftests/bpf/bpf_kfuncs.h
@@ -19,6 +19,9 @@ extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags,
extern int bpf_dynptr_from_xdp(struct xdp_md *xdp, __u64 flags,
struct bpf_dynptr *ptr__uninit) __ksym __weak;
+extern int bpf_dynptr_from_skb_meta(struct __sk_buff *skb, __u64 flags,
+ struct bpf_dynptr *ptr__uninit) __ksym __weak;
+
/* Description
* Obtain a read-only pointer to the dynptr's data
* Returns
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 8916ab814a3e..70b28c1e653e 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -61,6 +61,7 @@ CONFIG_MPLS_IPTUNNEL=y
CONFIG_MPLS_ROUTING=y
CONFIG_MPTCP=y
CONFIG_NET_ACT_GACT=y
+CONFIG_NET_ACT_MIRRED=y
CONFIG_NET_ACT_SKBMOD=y
CONFIG_NET_CLS=y
CONFIG_NET_CLS_ACT=y
diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c
index 9b2d9ceda210..b9f86cb91e81 100644
--- a/tools/testing/selftests/bpf/prog_tests/dynptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c
@@ -32,6 +32,8 @@ static struct {
{"test_ringbuf", SETUP_SYSCALL_SLEEP},
{"test_skb_readonly", SETUP_SKB_PROG},
{"test_dynptr_skb_data", SETUP_SKB_PROG},
+ {"test_dynptr_skb_meta_data", SETUP_SKB_PROG},
+ {"test_dynptr_skb_meta_flags", SETUP_SKB_PROG},
{"test_adjust", SETUP_SYSCALL_SLEEP},
{"test_adjust_err", SETUP_SYSCALL_SLEEP},
{"test_zero_size_dynptr", SETUP_SYSCALL_SLEEP},
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
index b9d9f0a502ce..46e0730174ed 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
@@ -9,6 +9,7 @@
#define TX_NETNS "xdp_context_tx"
#define RX_NETNS "xdp_context_rx"
#define TAP_NAME "tap0"
+#define DUMMY_NAME "dum0"
#define TAP_NETNS "xdp_context_tuntap"
#define TEST_PAYLOAD_LEN 32
@@ -156,15 +157,30 @@ err:
return -1;
}
-static void assert_test_result(struct test_xdp_meta *skel)
+static int write_test_packet(int tap_fd)
+{
+ __u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN];
+ int n;
+
+ /* The ethernet header doesn't need to be valid for this test */
+ memset(packet, 0, sizeof(struct ethhdr));
+ memcpy(packet + sizeof(struct ethhdr), test_payload, TEST_PAYLOAD_LEN);
+
+ n = write(tap_fd, packet, sizeof(packet));
+ if (!ASSERT_EQ(n, sizeof(packet), "write packet"))
+ return -1;
+
+ return 0;
+}
+
+static void assert_test_result(const struct bpf_map *result_map)
{
int err;
__u32 map_key = 0;
__u8 map_value[TEST_PAYLOAD_LEN];
- err = bpf_map__lookup_elem(skel->maps.test_result, &map_key,
- sizeof(map_key), &map_value,
- TEST_PAYLOAD_LEN, BPF_ANY);
+ err = bpf_map__lookup_elem(result_map, &map_key, sizeof(map_key),
+ &map_value, TEST_PAYLOAD_LEN, BPF_ANY);
if (!ASSERT_OK(err, "lookup test_result"))
return;
@@ -172,6 +188,18 @@ static void assert_test_result(struct test_xdp_meta *skel)
"test_result map contains test payload");
}
+static bool clear_test_result(struct bpf_map *result_map)
+{
+ const __u8 v[sizeof(test_payload)] = {};
+ const __u32 k = 0;
+ int err;
+
+ err = bpf_map__update_elem(result_map, &k, sizeof(k), v, sizeof(v), BPF_ANY);
+ ASSERT_OK(err, "update test_result");
+
+ return err == 0;
+}
+
void test_xdp_context_veth(void)
{
LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS);
@@ -248,7 +276,7 @@ void test_xdp_context_veth(void)
if (!ASSERT_OK(ret, "send_test_packet"))
goto close;
- assert_test_result(skel);
+ assert_test_result(skel->maps.test_result);
close:
close_netns(nstoken);
@@ -257,17 +285,21 @@ close:
netns_free(tx_ns);
}
-void test_xdp_context_tuntap(void)
+static void test_tuntap(struct bpf_program *xdp_prog,
+ struct bpf_program *tc_prio_1_prog,
+ struct bpf_program *tc_prio_2_prog,
+ struct bpf_map *result_map)
{
LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS);
LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
struct netns_obj *ns = NULL;
- struct test_xdp_meta *skel = NULL;
- __u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN];
int tap_fd = -1;
int tap_ifindex;
int ret;
+ if (!clear_test_result(result_map))
+ return;
+
ns = netns_new(TAP_NETNS, true);
if (!ASSERT_OK_PTR(ns, "create and open ns"))
return;
@@ -278,10 +310,6 @@ void test_xdp_context_tuntap(void)
SYS(close, "ip link set dev " TAP_NAME " up");
- skel = test_xdp_meta__open_and_load();
- if (!ASSERT_OK_PTR(skel, "open and load skeleton"))
- goto close;
-
tap_ifindex = if_nametoindex(TAP_NAME);
if (!ASSERT_GE(tap_ifindex, 0, "if_nametoindex"))
goto close;
@@ -291,33 +319,175 @@ void test_xdp_context_tuntap(void)
if (!ASSERT_OK(ret, "bpf_tc_hook_create"))
goto close;
- tc_opts.prog_fd = bpf_program__fd(skel->progs.ing_cls);
+ tc_opts.prog_fd = bpf_program__fd(tc_prio_1_prog);
ret = bpf_tc_attach(&tc_hook, &tc_opts);
if (!ASSERT_OK(ret, "bpf_tc_attach"))
goto close;
- ret = bpf_xdp_attach(tap_ifindex, bpf_program__fd(skel->progs.ing_xdp),
+ if (tc_prio_2_prog) {
+ LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 2,
+ .prog_fd = bpf_program__fd(tc_prio_2_prog));
+
+ ret = bpf_tc_attach(&tc_hook, &tc_opts);
+ if (!ASSERT_OK(ret, "bpf_tc_attach"))
+ goto close;
+ }
+
+ ret = bpf_xdp_attach(tap_ifindex, bpf_program__fd(xdp_prog),
0, NULL);
if (!ASSERT_GE(ret, 0, "bpf_xdp_attach"))
goto close;
- /* The ethernet header is not relevant for this test and doesn't need to
- * be meaningful.
- */
- struct ethhdr eth = { 0 };
+ ret = write_test_packet(tap_fd);
+ if (!ASSERT_OK(ret, "write_test_packet"))
+ goto close;
- memcpy(packet, &eth, sizeof(eth));
- memcpy(packet + sizeof(eth), test_payload, TEST_PAYLOAD_LEN);
+ assert_test_result(result_map);
+
+close:
+ if (tap_fd >= 0)
+ close(tap_fd);
+ netns_free(ns);
+}
- ret = write(tap_fd, packet, sizeof(packet));
- if (!ASSERT_EQ(ret, sizeof(packet), "write packet"))
+/* Write a packet to a tap dev and copy it to ingress of a dummy dev */
+static void test_tuntap_mirred(struct bpf_program *xdp_prog,
+ struct bpf_program *tc_prog,
+ bool *test_pass)
+{
+ LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS);
+ LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
+ struct netns_obj *ns = NULL;
+ int dummy_ifindex;
+ int tap_fd = -1;
+ int tap_ifindex;
+ int ret;
+
+ *test_pass = false;
+
+ ns = netns_new(TAP_NETNS, true);
+ if (!ASSERT_OK_PTR(ns, "netns_new"))
+ return;
+
+ /* Setup dummy interface */
+ SYS(close, "ip link add name " DUMMY_NAME " type dummy");
+ SYS(close, "ip link set dev " DUMMY_NAME " up");
+
+ dummy_ifindex = if_nametoindex(DUMMY_NAME);
+ if (!ASSERT_GE(dummy_ifindex, 0, "if_nametoindex"))
+ goto close;
+
+ tc_hook.ifindex = dummy_ifindex;
+ ret = bpf_tc_hook_create(&tc_hook);
+ if (!ASSERT_OK(ret, "bpf_tc_hook_create"))
+ goto close;
+
+ tc_opts.prog_fd = bpf_program__fd(tc_prog);
+ ret = bpf_tc_attach(&tc_hook, &tc_opts);
+ if (!ASSERT_OK(ret, "bpf_tc_attach"))
+ goto close;
+
+ /* Setup TAP interface */
+ tap_fd = open_tuntap(TAP_NAME, true);
+ if (!ASSERT_GE(tap_fd, 0, "open_tuntap"))
+ goto close;
+
+ SYS(close, "ip link set dev " TAP_NAME " up");
+
+ tap_ifindex = if_nametoindex(TAP_NAME);
+ if (!ASSERT_GE(tap_ifindex, 0, "if_nametoindex"))
+ goto close;
+
+ ret = bpf_xdp_attach(tap_ifindex, bpf_program__fd(xdp_prog), 0, NULL);
+ if (!ASSERT_GE(ret, 0, "bpf_xdp_attach"))
goto close;
- assert_test_result(skel);
+ /* Copy all packets received from TAP to dummy ingress */
+ SYS(close, "tc qdisc add dev " TAP_NAME " clsact");
+ SYS(close, "tc filter add dev " TAP_NAME " ingress "
+ "protocol all matchall "
+ "action mirred ingress mirror dev " DUMMY_NAME);
+
+ /* Receive a packet on TAP */
+ ret = write_test_packet(tap_fd);
+ if (!ASSERT_OK(ret, "write_test_packet"))
+ goto close;
+
+ ASSERT_TRUE(*test_pass, "test_pass");
close:
if (tap_fd >= 0)
close(tap_fd);
- test_xdp_meta__destroy(skel);
netns_free(ns);
}
+
+void test_xdp_context_tuntap(void)
+{
+ struct test_xdp_meta *skel = NULL;
+
+ skel = test_xdp_meta__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open and load skeleton"))
+ return;
+
+ if (test__start_subtest("data_meta"))
+ test_tuntap(skel->progs.ing_xdp,
+ skel->progs.ing_cls,
+ NULL, /* tc prio 2 */
+ skel->maps.test_result);
+ if (test__start_subtest("dynptr_read"))
+ test_tuntap(skel->progs.ing_xdp,
+ skel->progs.ing_cls_dynptr_read,
+ NULL, /* tc prio 2 */
+ skel->maps.test_result);
+ if (test__start_subtest("dynptr_slice"))
+ test_tuntap(skel->progs.ing_xdp,
+ skel->progs.ing_cls_dynptr_slice,
+ NULL, /* tc prio 2 */
+ skel->maps.test_result);
+ if (test__start_subtest("dynptr_write"))
+ test_tuntap(skel->progs.ing_xdp_zalloc_meta,
+ skel->progs.ing_cls_dynptr_write,
+ skel->progs.ing_cls_dynptr_read,
+ skel->maps.test_result);
+ if (test__start_subtest("dynptr_slice_rdwr"))
+ test_tuntap(skel->progs.ing_xdp_zalloc_meta,
+ skel->progs.ing_cls_dynptr_slice_rdwr,
+ skel->progs.ing_cls_dynptr_slice,
+ skel->maps.test_result);
+ if (test__start_subtest("dynptr_offset"))
+ test_tuntap(skel->progs.ing_xdp_zalloc_meta,
+ skel->progs.ing_cls_dynptr_offset_wr,
+ skel->progs.ing_cls_dynptr_offset_rd,
+ skel->maps.test_result);
+ if (test__start_subtest("dynptr_offset_oob"))
+ test_tuntap(skel->progs.ing_xdp,
+ skel->progs.ing_cls_dynptr_offset_oob,
+ skel->progs.ing_cls,
+ skel->maps.test_result);
+ if (test__start_subtest("clone_data_meta_empty_on_data_write"))
+ test_tuntap_mirred(skel->progs.ing_xdp,
+ skel->progs.clone_data_meta_empty_on_data_write,
+ &skel->bss->test_pass);
+ if (test__start_subtest("clone_data_meta_empty_on_meta_write"))
+ test_tuntap_mirred(skel->progs.ing_xdp,
+ skel->progs.clone_data_meta_empty_on_meta_write,
+ &skel->bss->test_pass);
+ if (test__start_subtest("clone_dynptr_empty_on_data_slice_write"))
+ test_tuntap_mirred(skel->progs.ing_xdp,
+ skel->progs.clone_dynptr_empty_on_data_slice_write,
+ &skel->bss->test_pass);
+ if (test__start_subtest("clone_dynptr_empty_on_meta_slice_write"))
+ test_tuntap_mirred(skel->progs.ing_xdp,
+ skel->progs.clone_dynptr_empty_on_meta_slice_write,
+ &skel->bss->test_pass);
+ if (test__start_subtest("clone_dynptr_rdonly_before_data_dynptr_write"))
+ test_tuntap_mirred(skel->progs.ing_xdp,
+ skel->progs.clone_dynptr_rdonly_before_data_dynptr_write,
+ &skel->bss->test_pass);
+ if (test__start_subtest("clone_dynptr_rdonly_before_meta_dynptr_write"))
+ test_tuntap_mirred(skel->progs.ing_xdp,
+ skel->progs.clone_dynptr_rdonly_before_meta_dynptr_write,
+ &skel->bss->test_pass);
+
+ test_xdp_meta__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
index ffbd4b116d17..23b2aa2604de 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
@@ -64,7 +64,8 @@ int dump_udp4(struct bpf_iter__udp *ctx)
0, 0L, 0, ctx->uid, 0,
sock_i_ino(&inet->sk),
inet->sk.sk_refcnt.refs.counter, udp_sk,
- inet->sk.sk_drops.counter);
+ udp_sk->drop_counters.drops0.counter +
+ udp_sk->drop_counters.drops1.counter);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
index 47ff7754f4fd..c48b05aa2a4b 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
@@ -72,7 +72,7 @@ int dump_udp6(struct bpf_iter__udp *ctx)
0, 0L, 0, ctx->uid, 0,
sock_i_ino(&inet->sk),
inet->sk.sk_refcnt.refs.counter, udp_sk,
- inet->sk.sk_drops.counter);
-
+ udp_sk->drop_counters.drops0.counter +
+ udp_sk->drop_counters.drops1.counter);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c
index bd8f15229f5c..dda6a8dada82 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c
@@ -269,6 +269,26 @@ int data_slice_out_of_bounds_skb(struct __sk_buff *skb)
return SK_PASS;
}
+/* A metadata slice can't be accessed out of bounds */
+SEC("?tc")
+__failure __msg("value is outside of the allowed memory range")
+int data_slice_out_of_bounds_skb_meta(struct __sk_buff *skb)
+{
+ struct bpf_dynptr meta;
+ __u8 *md;
+
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md));
+ if (!md)
+ return SK_DROP;
+
+ /* this should fail */
+ *(md + 1) = 42;
+
+ return SK_PASS;
+}
+
SEC("?raw_tp")
__failure __msg("value is outside of the allowed memory range")
int data_slice_out_of_bounds_map_value(void *ctx)
@@ -1089,6 +1109,26 @@ int skb_invalid_slice_write(struct __sk_buff *skb)
return SK_PASS;
}
+/* bpf_dynptr_slice()s are read-only and cannot be written to */
+SEC("?tc")
+__failure __msg("R{{[0-9]+}} cannot write into rdonly_mem")
+int skb_meta_invalid_slice_write(struct __sk_buff *skb)
+{
+ struct bpf_dynptr meta;
+ __u8 *md;
+
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ md = bpf_dynptr_slice(&meta, 0, NULL, sizeof(*md));
+ if (!md)
+ return SK_DROP;
+
+ /* this should fail */
+ *md = 42;
+
+ return SK_PASS;
+}
+
/* The read-only data slice is invalidated whenever a helper changes packet data */
SEC("?tc")
__failure __msg("invalid mem access 'scalar'")
@@ -1192,6 +1232,188 @@ int skb_invalid_data_slice4(struct __sk_buff *skb)
return SK_PASS;
}
+/* Read-only skb data slice is invalidated on write to skb metadata */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int ro_skb_slice_invalid_after_metadata_write(struct __sk_buff *skb)
+{
+ struct bpf_dynptr data, meta;
+ __u8 *d;
+
+ bpf_dynptr_from_skb(skb, 0, &data);
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ d = bpf_dynptr_slice(&data, 0, NULL, sizeof(*d));
+ if (!d)
+ return SK_DROP;
+
+ bpf_dynptr_write(&meta, 0, "x", 1, 0);
+
+ /* this should fail */
+ val = *d;
+
+ return SK_PASS;
+}
+
+/* Read-write skb data slice is invalidated on write to skb metadata */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int rw_skb_slice_invalid_after_metadata_write(struct __sk_buff *skb)
+{
+ struct bpf_dynptr data, meta;
+ __u8 *d;
+
+ bpf_dynptr_from_skb(skb, 0, &data);
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ d = bpf_dynptr_slice_rdwr(&data, 0, NULL, sizeof(*d));
+ if (!d)
+ return SK_DROP;
+
+ bpf_dynptr_write(&meta, 0, "x", 1, 0);
+
+ /* this should fail */
+ *d = 42;
+
+ return SK_PASS;
+}
+
+/* Read-only skb metadata slice is invalidated on write to skb data */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int ro_skb_meta_slice_invalid_after_payload_write(struct __sk_buff *skb)
+{
+ struct bpf_dynptr data, meta;
+ __u8 *md;
+
+ bpf_dynptr_from_skb(skb, 0, &data);
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ md = bpf_dynptr_slice(&meta, 0, NULL, sizeof(*md));
+ if (!md)
+ return SK_DROP;
+
+ bpf_dynptr_write(&data, 0, "x", 1, 0);
+
+ /* this should fail */
+ val = *md;
+
+ return SK_PASS;
+}
+
+/* Read-write skb metadata slice is invalidated on write to skb data slice */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int rw_skb_meta_slice_invalid_after_payload_write(struct __sk_buff *skb)
+{
+ struct bpf_dynptr data, meta;
+ __u8 *md;
+
+ bpf_dynptr_from_skb(skb, 0, &data);
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md));
+ if (!md)
+ return SK_DROP;
+
+ bpf_dynptr_write(&data, 0, "x", 1, 0);
+
+ /* this should fail */
+ *md = 42;
+
+ return SK_PASS;
+}
+
+/* Read-only skb metadata slice is invalidated whenever a helper changes packet data */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int ro_skb_meta_slice_invalid_after_payload_helper(struct __sk_buff *skb)
+{
+ struct bpf_dynptr meta;
+ __u8 *md;
+
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ md = bpf_dynptr_slice(&meta, 0, NULL, sizeof(*md));
+ if (!md)
+ return SK_DROP;
+
+ if (bpf_skb_pull_data(skb, skb->len))
+ return SK_DROP;
+
+ /* this should fail */
+ val = *md;
+
+ return SK_PASS;
+}
+
+/* Read-write skb metadata slice is invalidated whenever a helper changes packet data */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int rw_skb_meta_slice_invalid_after_payload_helper(struct __sk_buff *skb)
+{
+ struct bpf_dynptr meta;
+ __u8 *md;
+
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md));
+ if (!md)
+ return SK_DROP;
+
+ if (bpf_skb_pull_data(skb, skb->len))
+ return SK_DROP;
+
+ /* this should fail */
+ *md = 42;
+
+ return SK_PASS;
+}
+
+/* Read-only skb metadata slice is invalidated on write to skb metadata */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int ro_skb_meta_slice_invalid_after_metadata_write(struct __sk_buff *skb)
+{
+ struct bpf_dynptr meta;
+ __u8 *md;
+
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ md = bpf_dynptr_slice(&meta, 0, NULL, sizeof(*md));
+ if (!md)
+ return SK_DROP;
+
+ bpf_dynptr_write(&meta, 0, "x", 1, 0);
+
+ /* this should fail */
+ val = *md;
+
+ return SK_PASS;
+}
+
+/* Read-write skb metadata slice is invalidated on write to skb metadata */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int rw_skb_meta_slice_invalid_after_metadata_write(struct __sk_buff *skb)
+{
+ struct bpf_dynptr meta;
+ __u8 *md;
+
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md));
+ if (!md)
+ return SK_DROP;
+
+ bpf_dynptr_write(&meta, 0, "x", 1, 0);
+
+ /* this should fail */
+ *md = 42;
+
+ return SK_PASS;
+}
+
/* The read-only data slice is invalidated whenever a helper changes packet data */
SEC("?xdp")
__failure __msg("invalid mem access 'scalar'")
@@ -1255,6 +1477,19 @@ int skb_invalid_ctx(void *ctx)
return 0;
}
+/* Only supported prog type can create skb_meta-type dynptrs */
+SEC("?raw_tp")
+__failure __msg("calling kernel function bpf_dynptr_from_skb_meta is not allowed")
+int skb_meta_invalid_ctx(void *ctx)
+{
+ struct bpf_dynptr meta;
+
+ /* this should fail */
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+
+ return 0;
+}
+
SEC("fentry/skb_tx_error")
__failure __msg("must be referenced or trusted")
int BPF_PROG(skb_invalid_ctx_fentry, void *skb)
@@ -1665,6 +1900,29 @@ int clone_skb_packet_data(struct __sk_buff *skb)
return 0;
}
+/* A skb clone's metadata slice becomes invalid anytime packet data changes */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int clone_skb_packet_meta(struct __sk_buff *skb)
+{
+ struct bpf_dynptr clone, meta;
+ __u8 *md;
+
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+ bpf_dynptr_clone(&meta, &clone);
+ md = bpf_dynptr_slice_rdwr(&clone, 0, NULL, sizeof(*md));
+ if (!md)
+ return SK_DROP;
+
+ if (bpf_skb_pull_data(skb, skb->len))
+ return SK_DROP;
+
+ /* this should fail */
+ *md = 42;
+
+ return 0;
+}
+
/* A xdp clone's data slices should be invalid anytime packet data changes */
SEC("?xdp")
__failure __msg("invalid mem access 'scalar'")
diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c
index 8315273cb900..127dea342e5a 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_success.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_success.c
@@ -211,6 +211,61 @@ int test_dynptr_skb_data(struct __sk_buff *skb)
return 1;
}
+SEC("?tc")
+int test_dynptr_skb_meta_data(struct __sk_buff *skb)
+{
+ struct bpf_dynptr meta;
+ __u8 *md;
+ int ret;
+
+ err = 1;
+ ret = bpf_dynptr_from_skb_meta(skb, 0, &meta);
+ if (ret)
+ return 1;
+
+ /* This should return NULL. Must use bpf_dynptr_slice API */
+ err = 2;
+ md = bpf_dynptr_data(&meta, 0, sizeof(*md));
+ if (md)
+ return 1;
+
+ err = 0;
+ return 1;
+}
+
+/* Check that skb metadata dynptr ops don't accept any flags. */
+SEC("?tc")
+int test_dynptr_skb_meta_flags(struct __sk_buff *skb)
+{
+ const __u64 INVALID_FLAGS = ~0ULL;
+ struct bpf_dynptr meta;
+ __u8 buf;
+ int ret;
+
+ err = 1;
+ ret = bpf_dynptr_from_skb_meta(skb, INVALID_FLAGS, &meta);
+ if (ret != -EINVAL)
+ return 1;
+
+ err = 2;
+ ret = bpf_dynptr_from_skb_meta(skb, 0, &meta);
+ if (ret)
+ return 1;
+
+ err = 3;
+ ret = bpf_dynptr_read(&buf, 0, &meta, 0, INVALID_FLAGS);
+ if (ret != -EINVAL)
+ return 1;
+
+ err = 4;
+ ret = bpf_dynptr_write(&meta, 0, &buf, 0, INVALID_FLAGS);
+ if (ret != -EINVAL)
+ return 1;
+
+ err = 0;
+ return 1;
+}
+
SEC("tp/syscalls/sys_enter_nanosleep")
int test_adjust(void *ctx)
{
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c
index fcf6ca14f2ea..d79cb74b571e 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c
@@ -1,8 +1,11 @@
+#include <stdbool.h>
#include <linux/bpf.h>
+#include <linux/errno.h>
#include <linux/if_ether.h>
#include <linux/pkt_cls.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_kfuncs.h"
#define META_SIZE 32
@@ -23,6 +26,8 @@ struct {
__uint(value_size, META_SIZE);
} test_result SEC(".maps");
+bool test_pass;
+
SEC("tc")
int ing_cls(struct __sk_buff *ctx)
{
@@ -40,6 +45,231 @@ int ing_cls(struct __sk_buff *ctx)
return TC_ACT_SHOT;
}
+/* Read from metadata using bpf_dynptr_read helper */
+SEC("tc")
+int ing_cls_dynptr_read(struct __sk_buff *ctx)
+{
+ struct bpf_dynptr meta;
+ const __u32 zero = 0;
+ __u8 *dst;
+
+ dst = bpf_map_lookup_elem(&test_result, &zero);
+ if (!dst)
+ return TC_ACT_SHOT;
+
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ bpf_dynptr_read(dst, META_SIZE, &meta, 0, 0);
+
+ return TC_ACT_SHOT;
+}
+
+/* Write to metadata using bpf_dynptr_write helper */
+SEC("tc")
+int ing_cls_dynptr_write(struct __sk_buff *ctx)
+{
+ struct bpf_dynptr data, meta;
+ __u8 *src;
+
+ bpf_dynptr_from_skb(ctx, 0, &data);
+ src = bpf_dynptr_slice(&data, sizeof(struct ethhdr), NULL, META_SIZE);
+ if (!src)
+ return TC_ACT_SHOT;
+
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ bpf_dynptr_write(&meta, 0, src, META_SIZE, 0);
+
+ return TC_ACT_UNSPEC; /* pass */
+}
+
+/* Read from metadata using read-only dynptr slice */
+SEC("tc")
+int ing_cls_dynptr_slice(struct __sk_buff *ctx)
+{
+ struct bpf_dynptr meta;
+ const __u32 zero = 0;
+ __u8 *dst, *src;
+
+ dst = bpf_map_lookup_elem(&test_result, &zero);
+ if (!dst)
+ return TC_ACT_SHOT;
+
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ src = bpf_dynptr_slice(&meta, 0, NULL, META_SIZE);
+ if (!src)
+ return TC_ACT_SHOT;
+
+ __builtin_memcpy(dst, src, META_SIZE);
+
+ return TC_ACT_SHOT;
+}
+
+/* Write to metadata using writeable dynptr slice */
+SEC("tc")
+int ing_cls_dynptr_slice_rdwr(struct __sk_buff *ctx)
+{
+ struct bpf_dynptr data, meta;
+ __u8 *src, *dst;
+
+ bpf_dynptr_from_skb(ctx, 0, &data);
+ src = bpf_dynptr_slice(&data, sizeof(struct ethhdr), NULL, META_SIZE);
+ if (!src)
+ return TC_ACT_SHOT;
+
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ dst = bpf_dynptr_slice_rdwr(&meta, 0, NULL, META_SIZE);
+ if (!dst)
+ return TC_ACT_SHOT;
+
+ __builtin_memcpy(dst, src, META_SIZE);
+
+ return TC_ACT_UNSPEC; /* pass */
+}
+
+/* Read skb metadata in chunks from various offsets in different ways. */
+SEC("tc")
+int ing_cls_dynptr_offset_rd(struct __sk_buff *ctx)
+{
+ struct bpf_dynptr meta;
+ const __u32 chunk_len = META_SIZE / 4;
+ const __u32 zero = 0;
+ __u8 *dst, *src;
+
+ dst = bpf_map_lookup_elem(&test_result, &zero);
+ if (!dst)
+ return TC_ACT_SHOT;
+
+ /* 1. Regular read */
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ bpf_dynptr_read(dst, chunk_len, &meta, 0, 0);
+ dst += chunk_len;
+
+ /* 2. Read from an offset-adjusted dynptr */
+ bpf_dynptr_adjust(&meta, chunk_len, bpf_dynptr_size(&meta));
+ bpf_dynptr_read(dst, chunk_len, &meta, 0, 0);
+ dst += chunk_len;
+
+ /* 3. Read at an offset */
+ bpf_dynptr_read(dst, chunk_len, &meta, chunk_len, 0);
+ dst += chunk_len;
+
+ /* 4. Read from a slice starting at an offset */
+ src = bpf_dynptr_slice(&meta, 2 * chunk_len, NULL, chunk_len);
+ if (!src)
+ return TC_ACT_SHOT;
+ __builtin_memcpy(dst, src, chunk_len);
+
+ return TC_ACT_SHOT;
+}
+
+/* Write skb metadata in chunks at various offsets in different ways. */
+SEC("tc")
+int ing_cls_dynptr_offset_wr(struct __sk_buff *ctx)
+{
+ const __u32 chunk_len = META_SIZE / 4;
+ __u8 payload[META_SIZE];
+ struct bpf_dynptr meta;
+ __u8 *dst, *src;
+
+ bpf_skb_load_bytes(ctx, sizeof(struct ethhdr), payload, sizeof(payload));
+ src = payload;
+
+ /* 1. Regular write */
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ bpf_dynptr_write(&meta, 0, src, chunk_len, 0);
+ src += chunk_len;
+
+ /* 2. Write to an offset-adjusted dynptr */
+ bpf_dynptr_adjust(&meta, chunk_len, bpf_dynptr_size(&meta));
+ bpf_dynptr_write(&meta, 0, src, chunk_len, 0);
+ src += chunk_len;
+
+ /* 3. Write at an offset */
+ bpf_dynptr_write(&meta, chunk_len, src, chunk_len, 0);
+ src += chunk_len;
+
+ /* 4. Write to a slice starting at an offset */
+ dst = bpf_dynptr_slice_rdwr(&meta, 2 * chunk_len, NULL, chunk_len);
+ if (!dst)
+ return TC_ACT_SHOT;
+ __builtin_memcpy(dst, src, chunk_len);
+
+ return TC_ACT_UNSPEC; /* pass */
+}
+
+/* Pass an OOB offset to dynptr read, write, adjust, slice. */
+SEC("tc")
+int ing_cls_dynptr_offset_oob(struct __sk_buff *ctx)
+{
+ struct bpf_dynptr meta;
+ __u8 md, *p;
+ int err;
+
+ err = bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ if (err)
+ goto fail;
+
+ /* read offset OOB */
+ err = bpf_dynptr_read(&md, sizeof(md), &meta, META_SIZE, 0);
+ if (err != -E2BIG)
+ goto fail;
+
+ /* write offset OOB */
+ err = bpf_dynptr_write(&meta, META_SIZE, &md, sizeof(md), 0);
+ if (err != -E2BIG)
+ goto fail;
+
+ /* adjust end offset OOB */
+ err = bpf_dynptr_adjust(&meta, 0, META_SIZE + 1);
+ if (err != -ERANGE)
+ goto fail;
+
+ /* adjust start offset OOB */
+ err = bpf_dynptr_adjust(&meta, META_SIZE + 1, META_SIZE + 1);
+ if (err != -ERANGE)
+ goto fail;
+
+ /* slice offset OOB */
+ p = bpf_dynptr_slice(&meta, META_SIZE, NULL, sizeof(*p));
+ if (p)
+ goto fail;
+
+ /* slice rdwr offset OOB */
+ p = bpf_dynptr_slice_rdwr(&meta, META_SIZE, NULL, sizeof(*p));
+ if (p)
+ goto fail;
+
+ return TC_ACT_UNSPEC;
+fail:
+ return TC_ACT_SHOT;
+}
+
+/* Reserve and clear space for metadata but don't populate it */
+SEC("xdp")
+int ing_xdp_zalloc_meta(struct xdp_md *ctx)
+{
+ struct ethhdr *eth = ctx_ptr(ctx, data);
+ __u8 *meta;
+ int ret;
+
+ /* Drop any non-test packets */
+ if (eth + 1 > ctx_ptr(ctx, data_end))
+ return XDP_DROP;
+ if (eth->h_proto != 0)
+ return XDP_DROP;
+
+ ret = bpf_xdp_adjust_meta(ctx, -META_SIZE);
+ if (ret < 0)
+ return XDP_DROP;
+
+ meta = ctx_ptr(ctx, data_meta);
+ if (meta + META_SIZE > ctx_ptr(ctx, data))
+ return XDP_DROP;
+
+ __builtin_memset(meta, 0, META_SIZE);
+
+ return XDP_PASS;
+}
+
SEC("xdp")
int ing_xdp(struct xdp_md *ctx)
{
@@ -73,4 +303,193 @@ int ing_xdp(struct xdp_md *ctx)
return XDP_PASS;
}
+/*
+ * Check that skb->data_meta..skb->data is empty if prog writes to packet
+ * _payload_ using packet pointers. Applies only to cloned skbs.
+ */
+SEC("tc")
+int clone_data_meta_empty_on_data_write(struct __sk_buff *ctx)
+{
+ struct ethhdr *eth = ctx_ptr(ctx, data);
+
+ if (eth + 1 > ctx_ptr(ctx, data_end))
+ goto out;
+ /* Ignore non-test packets */
+ if (eth->h_proto != 0)
+ goto out;
+
+ /* Expect no metadata */
+ if (ctx->data_meta != ctx->data)
+ goto out;
+
+ /* Packet write to trigger unclone in prologue */
+ eth->h_proto = 42;
+
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
+}
+
+/*
+ * Check that skb->data_meta..skb->data is empty if prog writes to packet
+ * _metadata_ using packet pointers. Applies only to cloned skbs.
+ */
+SEC("tc")
+int clone_data_meta_empty_on_meta_write(struct __sk_buff *ctx)
+{
+ struct ethhdr *eth = ctx_ptr(ctx, data);
+ __u8 *md = ctx_ptr(ctx, data_meta);
+
+ if (eth + 1 > ctx_ptr(ctx, data_end))
+ goto out;
+ /* Ignore non-test packets */
+ if (eth->h_proto != 0)
+ goto out;
+
+ if (md + 1 > ctx_ptr(ctx, data)) {
+ /* Expect no metadata */
+ test_pass = true;
+ } else {
+ /* Metadata write to trigger unclone in prologue */
+ *md = 42;
+ }
+out:
+ return TC_ACT_SHOT;
+}
+
+/*
+ * Check that skb_meta dynptr is writable but empty if prog writes to packet
+ * _payload_ using a dynptr slice. Applies only to cloned skbs.
+ */
+SEC("tc")
+int clone_dynptr_empty_on_data_slice_write(struct __sk_buff *ctx)
+{
+ struct bpf_dynptr data, meta;
+ struct ethhdr *eth;
+
+ bpf_dynptr_from_skb(ctx, 0, &data);
+ eth = bpf_dynptr_slice_rdwr(&data, 0, NULL, sizeof(*eth));
+ if (!eth)
+ goto out;
+ /* Ignore non-test packets */
+ if (eth->h_proto != 0)
+ goto out;
+
+ /* Expect no metadata */
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) > 0)
+ goto out;
+
+ /* Packet write to trigger unclone in prologue */
+ eth->h_proto = 42;
+
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
+}
+
+/*
+ * Check that skb_meta dynptr is writable but empty if prog writes to packet
+ * _metadata_ using a dynptr slice. Applies only to cloned skbs.
+ */
+SEC("tc")
+int clone_dynptr_empty_on_meta_slice_write(struct __sk_buff *ctx)
+{
+ struct bpf_dynptr data, meta;
+ const struct ethhdr *eth;
+ __u8 *md;
+
+ bpf_dynptr_from_skb(ctx, 0, &data);
+ eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth));
+ if (!eth)
+ goto out;
+ /* Ignore non-test packets */
+ if (eth->h_proto != 0)
+ goto out;
+
+ /* Expect no metadata */
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) > 0)
+ goto out;
+
+ /* Metadata write to trigger unclone in prologue */
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md));
+ if (md)
+ *md = 42;
+
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
+}
+
+/*
+ * Check that skb_meta dynptr is read-only before prog writes to packet payload
+ * using dynptr_write helper. Applies only to cloned skbs.
+ */
+SEC("tc")
+int clone_dynptr_rdonly_before_data_dynptr_write(struct __sk_buff *ctx)
+{
+ struct bpf_dynptr data, meta;
+ const struct ethhdr *eth;
+
+ bpf_dynptr_from_skb(ctx, 0, &data);
+ eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth));
+ if (!eth)
+ goto out;
+ /* Ignore non-test packets */
+ if (eth->h_proto != 0)
+ goto out;
+
+ /* Expect read-only metadata before unclone */
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ if (!bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != META_SIZE)
+ goto out;
+
+ /* Helper write to payload will unclone the packet */
+ bpf_dynptr_write(&data, offsetof(struct ethhdr, h_proto), "x", 1, 0);
+
+ /* Expect no metadata after unclone */
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != 0)
+ goto out;
+
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
+}
+
+/*
+ * Check that skb_meta dynptr is read-only if prog writes to packet
+ * metadata using dynptr_write helper. Applies only to cloned skbs.
+ */
+SEC("tc")
+int clone_dynptr_rdonly_before_meta_dynptr_write(struct __sk_buff *ctx)
+{
+ struct bpf_dynptr data, meta;
+ const struct ethhdr *eth;
+
+ bpf_dynptr_from_skb(ctx, 0, &data);
+ eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth));
+ if (!eth)
+ goto out;
+ /* Ignore non-test packets */
+ if (eth->h_proto != 0)
+ goto out;
+
+ /* Expect read-only metadata */
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ if (!bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != META_SIZE)
+ goto out;
+
+ /* Metadata write. Expect failure. */
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ if (bpf_dynptr_write(&meta, 0, "x", 1, 0) != -EINVAL)
+ goto out;
+
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/drivers/net/hds.py b/tools/testing/selftests/drivers/net/hds.py
index 7c90a040ce45..a2011474e625 100755
--- a/tools/testing/selftests/drivers/net/hds.py
+++ b/tools/testing/selftests/drivers/net/hds.py
@@ -3,6 +3,7 @@
import errno
import os
+from typing import Union
from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_raises, KsftSkipEx
from lib.py import CmdExitFailure, EthtoolFamily, NlError
from lib.py import NetDrvEnv
@@ -58,7 +59,39 @@ def get_hds_thresh(cfg, netnl) -> None:
if 'hds-thresh' not in rings:
raise KsftSkipEx('hds-thresh not supported by device')
+
+def _hds_reset(cfg, netnl, rings) -> None:
+ cur = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+
+ arg = {'header': {'dev-index': cfg.ifindex}}
+ if cur.get('tcp-data-split') != rings.get('tcp-data-split'):
+ # Try to reset to "unknown" first, we don't know if the setting
+ # was the default or user chose it. Default seems more likely.
+ arg['tcp-data-split'] = "unknown"
+ netnl.rings_set(arg)
+ cur = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ if cur['tcp-data-split'] == rings['tcp-data-split']:
+ del arg['tcp-data-split']
+ else:
+ # Try the explicit setting
+ arg['tcp-data-split'] = rings['tcp-data-split']
+ if cur.get('hds-thresh') != rings.get('hds-thresh'):
+ arg['hds-thresh'] = rings['hds-thresh']
+ if len(arg) > 1:
+ netnl.rings_set(arg)
+
+
+def _defer_reset_hds(cfg, netnl) -> Union[dict, None]:
+ try:
+ rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ if 'hds-thresh' in rings or 'tcp-data-split' in rings:
+ defer(_hds_reset, cfg, netnl, rings)
+ except NlError as e:
+ pass
+
+
def set_hds_enable(cfg, netnl) -> None:
+ _defer_reset_hds(cfg, netnl)
try:
netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'tcp-data-split': 'enabled'})
except NlError as e:
@@ -76,6 +109,7 @@ def set_hds_enable(cfg, netnl) -> None:
ksft_eq('enabled', rings['tcp-data-split'])
def set_hds_disable(cfg, netnl) -> None:
+ _defer_reset_hds(cfg, netnl)
try:
netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'tcp-data-split': 'disabled'})
except NlError as e:
@@ -93,6 +127,7 @@ def set_hds_disable(cfg, netnl) -> None:
ksft_eq('disabled', rings['tcp-data-split'])
def set_hds_thresh_zero(cfg, netnl) -> None:
+ _defer_reset_hds(cfg, netnl)
try:
netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': 0})
except NlError as e:
@@ -110,6 +145,7 @@ def set_hds_thresh_zero(cfg, netnl) -> None:
ksft_eq(0, rings['hds-thresh'])
def set_hds_thresh_random(cfg, netnl) -> None:
+ _defer_reset_hds(cfg, netnl)
try:
rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
except NlError as e:
@@ -140,6 +176,7 @@ def set_hds_thresh_random(cfg, netnl) -> None:
ksft_eq(hds_thresh, rings['hds-thresh'])
def set_hds_thresh_max(cfg, netnl) -> None:
+ _defer_reset_hds(cfg, netnl)
try:
rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
except NlError as e:
@@ -157,6 +194,7 @@ def set_hds_thresh_max(cfg, netnl) -> None:
ksft_eq(rings['hds-thresh'], rings['hds-thresh-max'])
def set_hds_thresh_gt(cfg, netnl) -> None:
+ _defer_reset_hds(cfg, netnl)
try:
rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
except NlError as e:
@@ -178,6 +216,7 @@ def set_xdp(cfg, netnl) -> None:
"""
mode = _get_hds_mode(cfg, netnl)
if mode == 'enabled':
+ _defer_reset_hds(cfg, netnl)
netnl.rings_set({'header': {'dev-index': cfg.ifindex},
'tcp-data-split': 'unknown'})
diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile
index fdc97355588c..5159fd34cb33 100644
--- a/tools/testing/selftests/drivers/net/hw/Makefile
+++ b/tools/testing/selftests/drivers/net/hw/Makefile
@@ -18,6 +18,7 @@ TEST_PROGS = \
pp_alloc_fail.py \
rss_api.py \
rss_ctx.py \
+ rss_flow_label.py \
rss_input_xfrm.py \
tso.py \
xsk_reconfig.py \
diff --git a/tools/testing/selftests/drivers/net/hw/config b/tools/testing/selftests/drivers/net/hw/config
index 88ae719e6f8f..e8a06aa1471c 100644
--- a/tools/testing/selftests/drivers/net/hw/config
+++ b/tools/testing/selftests/drivers/net/hw/config
@@ -1,5 +1,7 @@
+CONFIG_IO_URING=y
CONFIG_IPV6=y
CONFIG_IPV6_GRE=y
CONFIG_NET_IPGRE=y
CONFIG_NET_IPGRE_DEMUX=y
+CONFIG_UDMABUF=y
CONFIG_VXLAN=y
diff --git a/tools/testing/selftests/drivers/net/hw/devmem.py b/tools/testing/selftests/drivers/net/hw/devmem.py
index baa2f24240ba..45c2d49d55b6 100755
--- a/tools/testing/selftests/drivers/net/hw/devmem.py
+++ b/tools/testing/selftests/drivers/net/hw/devmem.py
@@ -24,7 +24,7 @@ def check_rx(cfg) -> None:
require_devmem(cfg)
port = rand_port()
- socat = f"socat -u - TCP{cfg.addr_ipver}:{cfg.addr}:{port},bind={cfg.remote_addr}:{port}"
+ socat = f"socat -u - TCP{cfg.addr_ipver}:{cfg.baddr}:{port},bind={cfg.remote_baddr}:{port}"
listen_cmd = f"{cfg.bin_local} -l -f {cfg.ifname} -s {cfg.addr} -p {port} -c {cfg.remote_addr} -v 7"
with bkg(listen_cmd, exit_wait=True) as ncdevmem:
@@ -42,9 +42,9 @@ def check_tx(cfg) -> None:
port = rand_port()
listen_cmd = f"socat -U - TCP{cfg.addr_ipver}-LISTEN:{port}"
- with bkg(listen_cmd) as socat:
- wait_port_listen(port)
- cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_remote} -f {cfg.ifname} -s {cfg.addr} -p {port}", host=cfg.remote, shell=True)
+ with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as socat:
+ wait_port_listen(port, host=cfg.remote)
+ cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_local} -f {cfg.ifname} -s {cfg.remote_addr} -p {port}", shell=True)
ksft_eq(socat.stdout.strip(), "hello\nworld")
@@ -56,9 +56,9 @@ def check_tx_chunks(cfg) -> None:
port = rand_port()
listen_cmd = f"socat -U - TCP{cfg.addr_ipver}-LISTEN:{port}"
- with bkg(listen_cmd, exit_wait=True) as socat:
- wait_port_listen(port)
- cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_remote} -f {cfg.ifname} -s {cfg.addr} -p {port} -z 3", host=cfg.remote, shell=True)
+ with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as socat:
+ wait_port_listen(port, host=cfg.remote)
+ cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_local} -f {cfg.ifname} -s {cfg.remote_addr} -p {port} -z 3", shell=True)
ksft_eq(socat.stdout.strip(), "hello\nworld")
diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c
index 72f828021f83..8dc9511d046f 100644
--- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c
+++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c
@@ -39,6 +39,7 @@
#define __EXPORTED_HEADERS__
#include <linux/uio.h>
+#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
@@ -97,6 +98,10 @@ static unsigned int dmabuf_id;
static uint32_t tx_dmabuf_id;
static int waittime_ms = 500;
+/* System state loaded by current_config_load() */
+#define MAX_FLOWS 8
+static int ntuple_ids[MAX_FLOWS] = { -1, -1, -1, -1, -1, -1, -1, -1, };
+
struct memory_buffer {
int fd;
size_t size;
@@ -115,6 +120,21 @@ struct memory_provider {
size_t off, int n);
};
+static void pr_err(const char *fmt, ...)
+{
+ va_list args;
+
+ fprintf(stderr, "%s: ", TEST_PREFIX);
+
+ va_start(args, fmt);
+ vfprintf(stderr, fmt, args);
+ va_end(args);
+
+ if (errno != 0)
+ fprintf(stderr, ": %s", strerror(errno));
+ fprintf(stderr, "\n");
+}
+
static struct memory_buffer *udmabuf_alloc(size_t size)
{
struct udmabuf_create create;
@@ -123,27 +143,33 @@ static struct memory_buffer *udmabuf_alloc(size_t size)
ctx = malloc(sizeof(*ctx));
if (!ctx)
- error(1, ENOMEM, "malloc failed");
+ return NULL;
ctx->size = size;
ctx->devfd = open("/dev/udmabuf", O_RDWR);
- if (ctx->devfd < 0)
- error(1, errno,
- "%s: [skip,no-udmabuf: Unable to access DMA buffer device file]\n",
- TEST_PREFIX);
+ if (ctx->devfd < 0) {
+ pr_err("[skip,no-udmabuf: Unable to access DMA buffer device file]");
+ goto err_free_ctx;
+ }
ctx->memfd = memfd_create("udmabuf-test", MFD_ALLOW_SEALING);
- if (ctx->memfd < 0)
- error(1, errno, "%s: [skip,no-memfd]\n", TEST_PREFIX);
+ if (ctx->memfd < 0) {
+ pr_err("[skip,no-memfd]");
+ goto err_close_dev;
+ }
ret = fcntl(ctx->memfd, F_ADD_SEALS, F_SEAL_SHRINK);
- if (ret < 0)
- error(1, errno, "%s: [skip,fcntl-add-seals]\n", TEST_PREFIX);
+ if (ret < 0) {
+ pr_err("[skip,fcntl-add-seals]");
+ goto err_close_memfd;
+ }
ret = ftruncate(ctx->memfd, size);
- if (ret == -1)
- error(1, errno, "%s: [FAIL,memfd-truncate]\n", TEST_PREFIX);
+ if (ret == -1) {
+ pr_err("[FAIL,memfd-truncate]");
+ goto err_close_memfd;
+ }
memset(&create, 0, sizeof(create));
@@ -151,15 +177,29 @@ static struct memory_buffer *udmabuf_alloc(size_t size)
create.offset = 0;
create.size = size;
ctx->fd = ioctl(ctx->devfd, UDMABUF_CREATE, &create);
- if (ctx->fd < 0)
- error(1, errno, "%s: [FAIL, create udmabuf]\n", TEST_PREFIX);
+ if (ctx->fd < 0) {
+ pr_err("[FAIL, create udmabuf]");
+ goto err_close_fd;
+ }
ctx->buf_mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED,
ctx->fd, 0);
- if (ctx->buf_mem == MAP_FAILED)
- error(1, errno, "%s: [FAIL, map udmabuf]\n", TEST_PREFIX);
+ if (ctx->buf_mem == MAP_FAILED) {
+ pr_err("[FAIL, map udmabuf]");
+ goto err_close_fd;
+ }
return ctx;
+
+err_close_fd:
+ close(ctx->fd);
+err_close_memfd:
+ close(ctx->memfd);
+err_close_dev:
+ close(ctx->devfd);
+err_free_ctx:
+ free(ctx);
+ return NULL;
}
static void udmabuf_free(struct memory_buffer *ctx)
@@ -217,7 +257,7 @@ static void print_nonzero_bytes(void *ptr, size_t size)
putchar(p[i]);
}
-void validate_buffer(void *line, size_t size)
+int validate_buffer(void *line, size_t size)
{
static unsigned char seed = 1;
unsigned char *ptr = line;
@@ -232,8 +272,10 @@ void validate_buffer(void *line, size_t size)
"Failed validation: expected=%u, actual=%u, index=%lu\n",
expected, ptr[i], i);
errors++;
- if (errors > 20)
- error(1, 0, "validation failed.");
+ if (errors > 20) {
+ pr_err("validation failed");
+ return -1;
+ }
}
seed++;
if (seed == do_validation)
@@ -241,6 +283,86 @@ void validate_buffer(void *line, size_t size)
}
fprintf(stdout, "Validated buffer\n");
+ return 0;
+}
+
+static int
+__run_command(char *out, size_t outlen, const char *cmd, va_list args)
+{
+ char command[256];
+ FILE *fp;
+
+ vsnprintf(command, sizeof(command), cmd, args);
+
+ fprintf(stderr, "Running: %s\n", command);
+ fp = popen(command, "r");
+ if (!fp)
+ return -1;
+ if (out) {
+ size_t len;
+
+ if (!fgets(out, outlen, fp))
+ return -1;
+
+ /* Remove trailing newline if present */
+ len = strlen(out);
+ if (len && out[len - 1] == '\n')
+ out[len - 1] = '\0';
+ }
+ return pclose(fp);
+}
+
+static int run_command(const char *cmd, ...)
+{
+ va_list args;
+ int ret;
+
+ va_start(args, cmd);
+ ret = __run_command(NULL, 0, cmd, args);
+ va_end(args);
+
+ return ret;
+}
+
+static int ethtool_add_flow(const char *format, ...)
+{
+ char local_output[256], cmd[256];
+ const char *id_start;
+ int flow_idx, ret;
+ char *endptr;
+ long flow_id;
+ va_list args;
+
+ for (flow_idx = 0; flow_idx < MAX_FLOWS; flow_idx++)
+ if (ntuple_ids[flow_idx] == -1)
+ break;
+ if (flow_idx == MAX_FLOWS) {
+ fprintf(stderr, "Error: too many flows\n");
+ return -1;
+ }
+
+ snprintf(cmd, sizeof(cmd), "ethtool -N %s %s", ifname, format);
+
+ va_start(args, format);
+ ret = __run_command(local_output, sizeof(local_output), cmd, args);
+ va_end(args);
+
+ if (ret != 0)
+ return ret;
+
+ /* Extract the ID from the output */
+ id_start = strstr(local_output, "Added rule with ID ");
+ if (!id_start)
+ return -1;
+ id_start += strlen("Added rule with ID ");
+
+ flow_id = strtol(id_start, &endptr, 10);
+ if (endptr == id_start || flow_id < 0 || flow_id > INT_MAX)
+ return -1;
+
+ fprintf(stderr, "Added flow rule with ID %ld\n", flow_id);
+ ntuple_ids[flow_idx] = flow_id;
+ return flow_id;
}
static int rxq_num(int ifindex)
@@ -270,29 +392,17 @@ static int rxq_num(int ifindex)
return num;
}
-#define run_command(cmd, ...) \
- ({ \
- char command[256]; \
- memset(command, 0, sizeof(command)); \
- snprintf(command, sizeof(command), cmd, ##__VA_ARGS__); \
- fprintf(stderr, "Running: %s\n", command); \
- system(command); \
- })
-
-static int reset_flow_steering(void)
+static void reset_flow_steering(void)
{
- /* Depending on the NIC, toggling ntuple off and on might not
- * be allowed. Additionally, attempting to delete existing filters
- * will fail if no filters are present. Therefore, do not enforce
- * the exit status.
- */
-
- run_command("sudo ethtool -K %s ntuple off >&2", ifname);
- run_command("sudo ethtool -K %s ntuple on >&2", ifname);
- run_command(
- "sudo ethtool -n %s | grep 'Filter:' | awk '{print $2}' | xargs -n1 ethtool -N %s delete >&2",
- ifname, ifname);
- return 0;
+ int i;
+
+ for (i = 0; i < MAX_FLOWS; i++) {
+ if (ntuple_ids[i] == -1)
+ continue;
+ run_command("ethtool -N %s delete %d",
+ ifname, ntuple_ids[i]);
+ ntuple_ids[i] = -1;
+ }
}
static const char *tcp_data_split_str(int val)
@@ -309,7 +419,81 @@ static const char *tcp_data_split_str(int val)
}
}
-static int configure_headersplit(bool on)
+static struct ethtool_rings_get_rsp *get_ring_config(void)
+{
+ struct ethtool_rings_get_req *get_req;
+ struct ethtool_rings_get_rsp *get_rsp;
+ struct ynl_error yerr;
+ struct ynl_sock *ys;
+
+ ys = ynl_sock_create(&ynl_ethtool_family, &yerr);
+ if (!ys) {
+ fprintf(stderr, "YNL: %s\n", yerr.msg);
+ return NULL;
+ }
+
+ get_req = ethtool_rings_get_req_alloc();
+ ethtool_rings_get_req_set_header_dev_index(get_req, ifindex);
+ get_rsp = ethtool_rings_get(ys, get_req);
+ ethtool_rings_get_req_free(get_req);
+
+ ynl_sock_destroy(ys);
+
+ return get_rsp;
+}
+
+static void restore_ring_config(const struct ethtool_rings_get_rsp *config)
+{
+ struct ethtool_rings_get_req *get_req;
+ struct ethtool_rings_get_rsp *get_rsp;
+ struct ethtool_rings_set_req *req;
+ struct ynl_error yerr;
+ struct ynl_sock *ys;
+ int ret;
+
+ if (!config)
+ return;
+
+ ys = ynl_sock_create(&ynl_ethtool_family, &yerr);
+ if (!ys) {
+ fprintf(stderr, "YNL: %s\n", yerr.msg);
+ return;
+ }
+
+ req = ethtool_rings_set_req_alloc();
+ ethtool_rings_set_req_set_header_dev_index(req, ifindex);
+ ethtool_rings_set_req_set_tcp_data_split(req,
+ ETHTOOL_TCP_DATA_SPLIT_UNKNOWN);
+ if (config->_present.hds_thresh)
+ ethtool_rings_set_req_set_hds_thresh(req, config->hds_thresh);
+
+ ret = ethtool_rings_set(ys, req);
+ if (ret < 0)
+ fprintf(stderr, "YNL restoring HDS cfg: %s\n", ys->err.msg);
+
+ get_req = ethtool_rings_get_req_alloc();
+ ethtool_rings_get_req_set_header_dev_index(get_req, ifindex);
+ get_rsp = ethtool_rings_get(ys, get_req);
+ ethtool_rings_get_req_free(get_req);
+
+ /* use explicit value if UKNOWN didn't give us the previous */
+ if (get_rsp->tcp_data_split != config->tcp_data_split) {
+ ethtool_rings_set_req_set_tcp_data_split(req,
+ config->tcp_data_split);
+ ret = ethtool_rings_set(ys, req);
+ if (ret < 0)
+ fprintf(stderr, "YNL restoring expl HDS cfg: %s\n",
+ ys->err.msg);
+ }
+
+ ethtool_rings_get_rsp_free(get_rsp);
+ ethtool_rings_set_req_free(req);
+
+ ynl_sock_destroy(ys);
+}
+
+static int
+configure_headersplit(const struct ethtool_rings_get_rsp *old, bool on)
{
struct ethtool_rings_get_req *get_req;
struct ethtool_rings_get_rsp *get_rsp;
@@ -326,8 +510,15 @@ static int configure_headersplit(bool on)
req = ethtool_rings_set_req_alloc();
ethtool_rings_set_req_set_header_dev_index(req, ifindex);
- /* 0 - off, 1 - auto, 2 - on */
- ethtool_rings_set_req_set_tcp_data_split(req, on ? 2 : 0);
+ if (on) {
+ ethtool_rings_set_req_set_tcp_data_split(req,
+ ETHTOOL_TCP_DATA_SPLIT_ENABLED);
+ if (old->_present.hds_thresh)
+ ethtool_rings_set_req_set_hds_thresh(req, 0);
+ } else {
+ ethtool_rings_set_req_set_tcp_data_split(req,
+ ETHTOOL_TCP_DATA_SPLIT_UNKNOWN);
+ }
ret = ethtool_rings_set(ys, req);
if (ret < 0)
fprintf(stderr, "YNL failed: %s\n", ys->err.msg);
@@ -351,12 +542,103 @@ static int configure_headersplit(bool on)
static int configure_rss(void)
{
- return run_command("sudo ethtool -X %s equal %d >&2", ifname, start_queue);
+ return run_command("ethtool -X %s equal %d >&2", ifname, start_queue);
+}
+
+static void reset_rss(void)
+{
+ run_command("ethtool -X %s default >&2", ifname, start_queue);
}
-static int configure_channels(unsigned int rx, unsigned int tx)
+static int check_changing_channels(unsigned int rx, unsigned int tx)
{
- return run_command("sudo ethtool -L %s rx %u tx %u", ifname, rx, tx);
+ struct ethtool_channels_get_req *gchan;
+ struct ethtool_channels_set_req *schan;
+ struct ethtool_channels_get_rsp *chan;
+ struct ynl_error yerr;
+ struct ynl_sock *ys;
+ int ret;
+
+ fprintf(stderr, "setting channel count rx:%u tx:%u\n", rx, tx);
+
+ ys = ynl_sock_create(&ynl_ethtool_family, &yerr);
+ if (!ys) {
+ fprintf(stderr, "YNL: %s\n", yerr.msg);
+ return -1;
+ }
+
+ gchan = ethtool_channels_get_req_alloc();
+ if (!gchan) {
+ ret = -1;
+ goto exit_close_sock;
+ }
+
+ ethtool_channels_get_req_set_header_dev_index(gchan, ifindex);
+ chan = ethtool_channels_get(ys, gchan);
+ ethtool_channels_get_req_free(gchan);
+ if (!chan) {
+ fprintf(stderr, "YNL get channels: %s\n", ys->err.msg);
+ ret = -1;
+ goto exit_close_sock;
+ }
+
+ schan = ethtool_channels_set_req_alloc();
+ if (!schan) {
+ ret = -1;
+ goto exit_free_chan;
+ }
+
+ ethtool_channels_set_req_set_header_dev_index(schan, ifindex);
+
+ if (chan->_present.combined_count) {
+ if (chan->_present.rx_count || chan->_present.tx_count) {
+ ethtool_channels_set_req_set_rx_count(schan, 0);
+ ethtool_channels_set_req_set_tx_count(schan, 0);
+ }
+
+ if (rx == tx) {
+ ethtool_channels_set_req_set_combined_count(schan, rx);
+ } else if (rx > tx) {
+ ethtool_channels_set_req_set_combined_count(schan, tx);
+ ethtool_channels_set_req_set_rx_count(schan, rx - tx);
+ } else {
+ ethtool_channels_set_req_set_combined_count(schan, rx);
+ ethtool_channels_set_req_set_tx_count(schan, tx - rx);
+ }
+
+ } else if (chan->_present.rx_count) {
+ ethtool_channels_set_req_set_rx_count(schan, rx);
+ ethtool_channels_set_req_set_tx_count(schan, tx);
+ } else {
+ fprintf(stderr, "Error: device has neither combined nor rx channels\n");
+ ret = -1;
+ goto exit_free_schan;
+ }
+
+ ret = ethtool_channels_set(ys, schan);
+ if (ret) {
+ fprintf(stderr, "YNL set channels: %s\n", ys->err.msg);
+ } else {
+ /* We were expecting a failure, go back to previous settings */
+ ethtool_channels_set_req_set_combined_count(schan,
+ chan->combined_count);
+ ethtool_channels_set_req_set_rx_count(schan, chan->rx_count);
+ ethtool_channels_set_req_set_tx_count(schan, chan->tx_count);
+
+ ret = ethtool_channels_set(ys, schan);
+ if (ret)
+ fprintf(stderr, "YNL un-setting channels: %s\n",
+ ys->err.msg);
+ }
+
+exit_free_schan:
+ ethtool_channels_set_req_free(schan);
+exit_free_chan:
+ ethtool_channels_get_rsp_free(chan);
+exit_close_sock:
+ ynl_sock_destroy(ys);
+
+ return ret;
}
static int configure_flow_steering(struct sockaddr_in6 *server_sin)
@@ -364,6 +646,7 @@ static int configure_flow_steering(struct sockaddr_in6 *server_sin)
const char *type = "tcp6";
const char *server_addr;
char buf[40];
+ int flow_id;
inet_ntop(AF_INET6, &server_sin->sin6_addr, buf, sizeof(buf));
server_addr = buf;
@@ -374,23 +657,22 @@ static int configure_flow_steering(struct sockaddr_in6 *server_sin)
}
/* Try configure 5-tuple */
- if (run_command("sudo ethtool -N %s flow-type %s %s %s dst-ip %s %s %s dst-port %s queue %d >&2",
- ifname,
- type,
- client_ip ? "src-ip" : "",
- client_ip ?: "",
- server_addr,
- client_ip ? "src-port" : "",
- client_ip ? port : "",
- port, start_queue))
+ flow_id = ethtool_add_flow("flow-type %s %s %s dst-ip %s %s %s dst-port %s queue %d",
+ type,
+ client_ip ? "src-ip" : "",
+ client_ip ?: "",
+ server_addr,
+ client_ip ? "src-port" : "",
+ client_ip ? port : "",
+ port, start_queue);
+ if (flow_id < 0) {
/* If that fails, try configure 3-tuple */
- if (run_command("sudo ethtool -N %s flow-type %s dst-ip %s dst-port %s queue %d >&2",
- ifname,
- type,
- server_addr,
- port, start_queue))
+ flow_id = ethtool_add_flow("flow-type %s dst-ip %s dst-port %s queue %d",
+ type, server_addr, port, start_queue);
+ if (flow_id < 0)
/* If that fails, return error */
return -1;
+ }
return 0;
}
@@ -405,6 +687,7 @@ static int bind_rx_queue(unsigned int ifindex, unsigned int dmabuf_fd,
*ys = ynl_sock_create(&ynl_netdev_family, &yerr);
if (!*ys) {
+ netdev_queue_id_free(queues);
fprintf(stderr, "YNL: %s\n", yerr.msg);
return -1;
}
@@ -483,18 +766,24 @@ err_close:
return -1;
}
-static void enable_reuseaddr(int fd)
+static int enable_reuseaddr(int fd)
{
int opt = 1;
int ret;
ret = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt));
- if (ret)
- error(1, errno, "%s: [FAIL, SO_REUSEPORT]\n", TEST_PREFIX);
+ if (ret) {
+ pr_err("SO_REUSEPORT failed");
+ return -1;
+ }
ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt));
- if (ret)
- error(1, errno, "%s: [FAIL, SO_REUSEADDR]\n", TEST_PREFIX);
+ if (ret) {
+ pr_err("SO_REUSEADDR failed");
+ return -1;
+ }
+
+ return 0;
}
static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6)
@@ -537,6 +826,7 @@ static struct netdev_queue_id *create_queues(void)
static int do_server(struct memory_buffer *mem)
{
+ struct ethtool_rings_get_rsp *ring_config;
char ctrl_data[sizeof(int) * 20000];
size_t non_page_aligned_frags = 0;
struct sockaddr_in6 client_addr;
@@ -548,54 +838,74 @@ static int do_server(struct memory_buffer *mem)
char *tmp_mem = NULL;
struct ynl_sock *ys;
char iobuf[819200];
+ int ret, err = -1;
char buffer[256];
int socket_fd;
int client_fd;
- int ret;
ret = parse_address(server_ip, atoi(port), &server_sin);
- if (ret < 0)
- error(1, 0, "parse server address");
+ if (ret < 0) {
+ pr_err("parse server address");
+ return -1;
+ }
- if (reset_flow_steering())
- error(1, 0, "Failed to reset flow steering\n");
+ ring_config = get_ring_config();
+ if (!ring_config) {
+ pr_err("Failed to get current ring configuration");
+ return -1;
+ }
- if (configure_headersplit(1))
- error(1, 0, "Failed to enable TCP header split\n");
+ if (configure_headersplit(ring_config, 1)) {
+ pr_err("Failed to enable TCP header split");
+ goto err_free_ring_config;
+ }
/* Configure RSS to divert all traffic from our devmem queues */
- if (configure_rss())
- error(1, 0, "Failed to configure rss\n");
+ if (configure_rss()) {
+ pr_err("Failed to configure rss");
+ goto err_reset_headersplit;
+ }
/* Flow steer our devmem flows to start_queue */
- if (configure_flow_steering(&server_sin))
- error(1, 0, "Failed to configure flow steering\n");
+ if (configure_flow_steering(&server_sin)) {
+ pr_err("Failed to configure flow steering");
+ goto err_reset_rss;
+ }
sleep(1);
- if (bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys))
- error(1, 0, "Failed to bind\n");
+ if (bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) {
+ pr_err("Failed to bind");
+ goto err_reset_flow_steering;
+ }
tmp_mem = malloc(mem->size);
if (!tmp_mem)
- error(1, ENOMEM, "malloc failed");
+ goto err_unbind;
socket_fd = socket(AF_INET6, SOCK_STREAM, 0);
- if (socket_fd < 0)
- error(1, errno, "%s: [FAIL, create socket]\n", TEST_PREFIX);
+ if (socket_fd < 0) {
+ pr_err("Failed to create socket");
+ goto err_free_tmp;
+ }
- enable_reuseaddr(socket_fd);
+ if (enable_reuseaddr(socket_fd))
+ goto err_close_socket;
fprintf(stderr, "binding to address %s:%d\n", server_ip,
ntohs(server_sin.sin6_port));
ret = bind(socket_fd, &server_sin, sizeof(server_sin));
- if (ret)
- error(1, errno, "%s: [FAIL, bind]\n", TEST_PREFIX);
+ if (ret) {
+ pr_err("Failed to bind");
+ goto err_close_socket;
+ }
ret = listen(socket_fd, 1);
- if (ret)
- error(1, errno, "%s: [FAIL, listen]\n", TEST_PREFIX);
+ if (ret) {
+ pr_err("Failed to listen");
+ goto err_close_socket;
+ }
client_addr_len = sizeof(client_addr);
@@ -604,6 +914,10 @@ static int do_server(struct memory_buffer *mem)
fprintf(stderr, "Waiting or connection on %s:%d\n", buffer,
ntohs(server_sin.sin6_port));
client_fd = accept(socket_fd, &client_addr, &client_addr_len);
+ if (client_fd < 0) {
+ pr_err("Failed to accept");
+ goto err_close_socket;
+ }
inet_ntop(AF_INET6, &client_addr.sin6_addr, buffer,
sizeof(buffer));
@@ -634,7 +948,8 @@ static int do_server(struct memory_buffer *mem)
continue;
}
if (ret == 0) {
- fprintf(stderr, "client exited\n");
+ errno = 0;
+ pr_err("client exited");
goto cleanup;
}
@@ -672,9 +987,10 @@ static int do_server(struct memory_buffer *mem)
dmabuf_cmsg->frag_size, dmabuf_cmsg->frag_token,
total_received, dmabuf_cmsg->dmabuf_id);
- if (dmabuf_cmsg->dmabuf_id != dmabuf_id)
- error(1, 0,
- "received on wrong dmabuf_id: flow steering error\n");
+ if (dmabuf_cmsg->dmabuf_id != dmabuf_id) {
+ pr_err("received on wrong dmabuf_id: flow steering error");
+ goto err_close_client;
+ }
if (dmabuf_cmsg->frag_size % getpagesize())
non_page_aligned_frags++;
@@ -685,22 +1001,27 @@ static int do_server(struct memory_buffer *mem)
dmabuf_cmsg->frag_offset,
dmabuf_cmsg->frag_size);
- if (do_validation)
- validate_buffer(tmp_mem,
- dmabuf_cmsg->frag_size);
- else
+ if (do_validation) {
+ if (validate_buffer(tmp_mem,
+ dmabuf_cmsg->frag_size))
+ goto err_close_client;
+ } else {
print_nonzero_bytes(tmp_mem,
dmabuf_cmsg->frag_size);
+ }
ret = setsockopt(client_fd, SOL_SOCKET,
SO_DEVMEM_DONTNEED, &token,
sizeof(token));
- if (ret != 1)
- error(1, 0,
- "SO_DEVMEM_DONTNEED not enough tokens");
+ if (ret != 1) {
+ pr_err("SO_DEVMEM_DONTNEED not enough tokens");
+ goto err_close_client;
+ }
+ }
+ if (!is_devmem) {
+ pr_err("flow steering error");
+ goto err_close_client;
}
- if (!is_devmem)
- error(1, 0, "flow steering error\n");
fprintf(stderr, "total_received=%lu\n", total_received);
}
@@ -711,54 +1032,121 @@ static int do_server(struct memory_buffer *mem)
page_aligned_frags, non_page_aligned_frags);
cleanup:
+ err = 0;
- free(tmp_mem);
+err_close_client:
close(client_fd);
+err_close_socket:
close(socket_fd);
+err_free_tmp:
+ free(tmp_mem);
+err_unbind:
ynl_sock_destroy(ys);
-
- return 0;
+err_reset_flow_steering:
+ reset_flow_steering();
+err_reset_rss:
+ reset_rss();
+err_reset_headersplit:
+ restore_ring_config(ring_config);
+err_free_ring_config:
+ ethtool_rings_get_rsp_free(ring_config);
+ return err;
}
-void run_devmem_tests(void)
+int run_devmem_tests(void)
{
+ struct ethtool_rings_get_rsp *ring_config;
+ struct netdev_queue_id *queues;
struct memory_buffer *mem;
struct ynl_sock *ys;
+ int err = -1;
mem = provider->alloc(getpagesize() * NUM_PAGES);
+ if (!mem) {
+ pr_err("Failed to allocate memory buffer");
+ return -1;
+ }
+
+ ring_config = get_ring_config();
+ if (!ring_config) {
+ pr_err("Failed to get current ring configuration");
+ goto err_free_mem;
+ }
/* Configure RSS to divert all traffic from our devmem queues */
- if (configure_rss())
- error(1, 0, "rss error\n");
+ if (configure_rss()) {
+ pr_err("rss error");
+ goto err_free_ring_config;
+ }
- if (configure_headersplit(1))
- error(1, 0, "Failed to configure header split\n");
+ if (configure_headersplit(ring_config, 1)) {
+ pr_err("Failed to configure header split");
+ goto err_reset_rss;
+ }
- if (!bind_rx_queue(ifindex, mem->fd,
- calloc(num_queues, sizeof(struct netdev_queue_id)),
- num_queues, &ys))
- error(1, 0, "Binding empty queues array should have failed\n");
+ queues = netdev_queue_id_alloc(num_queues);
+ if (!queues) {
+ pr_err("Failed to allocate empty queues array");
+ goto err_reset_headersplit;
+ }
- if (configure_headersplit(0))
- error(1, 0, "Failed to configure header split\n");
+ if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) {
+ pr_err("Binding empty queues array should have failed");
+ goto err_unbind;
+ }
- if (!bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys))
- error(1, 0, "Configure dmabuf with header split off should have failed\n");
+ if (configure_headersplit(ring_config, 0)) {
+ pr_err("Failed to configure header split");
+ goto err_reset_headersplit;
+ }
- if (configure_headersplit(1))
- error(1, 0, "Failed to configure header split\n");
+ queues = create_queues();
+ if (!queues) {
+ pr_err("Failed to create queues");
+ goto err_reset_headersplit;
+ }
- if (bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys))
- error(1, 0, "Failed to bind\n");
+ if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) {
+ pr_err("Configure dmabuf with header split off should have failed");
+ goto err_unbind;
+ }
+
+ if (configure_headersplit(ring_config, 1)) {
+ pr_err("Failed to configure header split");
+ goto err_reset_headersplit;
+ }
+
+ queues = create_queues();
+ if (!queues) {
+ pr_err("Failed to create queues");
+ goto err_reset_headersplit;
+ }
+
+ if (bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) {
+ pr_err("Failed to bind");
+ goto err_reset_headersplit;
+ }
/* Deactivating a bound queue should not be legal */
- if (!configure_channels(num_queues, num_queues - 1))
- error(1, 0, "Deactivating a bound queue should be illegal.\n");
+ if (!check_changing_channels(num_queues, num_queues)) {
+ pr_err("Deactivating a bound queue should be illegal");
+ goto err_unbind;
+ }
- /* Closing the netlink socket does an implicit unbind */
- ynl_sock_destroy(ys);
+ err = 0;
+ goto err_unbind;
+err_unbind:
+ ynl_sock_destroy(ys);
+err_reset_headersplit:
+ restore_ring_config(ring_config);
+err_reset_rss:
+ reset_rss();
+err_free_ring_config:
+ ethtool_rings_get_rsp_free(ring_config);
+err_free_mem:
provider->free(mem);
+ return err;
}
static uint64_t gettimeofday_ms(void)
@@ -778,13 +1166,15 @@ static int do_poll(int fd)
pfd.fd = fd;
ret = poll(&pfd, 1, waittime_ms);
- if (ret == -1)
- error(1, errno, "poll");
+ if (ret == -1) {
+ pr_err("poll");
+ return -1;
+ }
return ret && (pfd.revents & POLLERR);
}
-static void wait_compl(int fd)
+static int wait_compl(int fd)
{
int64_t tstop = gettimeofday_ms() + waittime_ms;
char control[CMSG_SPACE(100)] = {};
@@ -798,18 +1188,23 @@ static void wait_compl(int fd)
msg.msg_controllen = sizeof(control);
while (gettimeofday_ms() < tstop) {
- if (!do_poll(fd))
+ ret = do_poll(fd);
+ if (ret < 0)
+ return ret;
+ if (!ret)
continue;
ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
if (ret < 0) {
if (errno == EAGAIN)
continue;
- error(1, errno, "recvmsg(MSG_ERRQUEUE)");
- return;
+ pr_err("recvmsg(MSG_ERRQUEUE)");
+ return -1;
+ }
+ if (msg.msg_flags & MSG_CTRUNC) {
+ pr_err("MSG_CTRUNC");
+ return -1;
}
- if (msg.msg_flags & MSG_CTRUNC)
- error(1, 0, "MSG_CTRUNC\n");
for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) {
if (cm->cmsg_level != SOL_IP &&
@@ -823,20 +1218,25 @@ static void wait_compl(int fd)
continue;
serr = (void *)CMSG_DATA(cm);
- if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY)
- error(1, 0, "wrong origin %u", serr->ee_origin);
- if (serr->ee_errno != 0)
- error(1, 0, "wrong errno %d", serr->ee_errno);
+ if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) {
+ pr_err("wrong origin %u", serr->ee_origin);
+ return -1;
+ }
+ if (serr->ee_errno != 0) {
+ pr_err("wrong errno %d", serr->ee_errno);
+ return -1;
+ }
hi = serr->ee_data;
lo = serr->ee_info;
fprintf(stderr, "tx complete [%d,%d]\n", lo, hi);
- return;
+ return 0;
}
}
- error(1, 0, "did not receive tx completion");
+ pr_err("did not receive tx completion");
+ return -1;
}
static int do_client(struct memory_buffer *mem)
@@ -850,50 +1250,69 @@ static int do_client(struct memory_buffer *mem)
ssize_t line_size = 0;
struct cmsghdr *cmsg;
char *line = NULL;
+ int ret, err = -1;
size_t len = 0;
int socket_fd;
__u32 ddmabuf;
int opt = 1;
- int ret;
ret = parse_address(server_ip, atoi(port), &server_sin);
- if (ret < 0)
- error(1, 0, "parse server address");
+ if (ret < 0) {
+ pr_err("parse server address");
+ return -1;
+ }
+
+ if (client_ip) {
+ ret = parse_address(client_ip, atoi(port), &client_sin);
+ if (ret < 0) {
+ pr_err("parse client address");
+ return ret;
+ }
+ }
socket_fd = socket(AF_INET6, SOCK_STREAM, 0);
- if (socket_fd < 0)
- error(1, socket_fd, "create socket");
+ if (socket_fd < 0) {
+ pr_err("create socket");
+ return -1;
+ }
- enable_reuseaddr(socket_fd);
+ if (enable_reuseaddr(socket_fd))
+ goto err_close_socket;
ret = setsockopt(socket_fd, SOL_SOCKET, SO_BINDTODEVICE, ifname,
strlen(ifname) + 1);
- if (ret)
- error(1, errno, "bindtodevice");
+ if (ret) {
+ pr_err("bindtodevice");
+ goto err_close_socket;
+ }
- if (bind_tx_queue(ifindex, mem->fd, &ys))
- error(1, 0, "Failed to bind\n");
+ if (bind_tx_queue(ifindex, mem->fd, &ys)) {
+ pr_err("Failed to bind");
+ goto err_close_socket;
+ }
if (client_ip) {
- ret = parse_address(client_ip, atoi(port), &client_sin);
- if (ret < 0)
- error(1, 0, "parse client address");
-
ret = bind(socket_fd, &client_sin, sizeof(client_sin));
- if (ret)
- error(1, errno, "bind");
+ if (ret) {
+ pr_err("bind");
+ goto err_unbind;
+ }
}
ret = setsockopt(socket_fd, SOL_SOCKET, SO_ZEROCOPY, &opt, sizeof(opt));
- if (ret)
- error(1, errno, "set sock opt");
+ if (ret) {
+ pr_err("set sock opt");
+ goto err_unbind;
+ }
fprintf(stderr, "Connect to %s %d (via %s)\n", server_ip,
ntohs(server_sin.sin6_port), ifname);
ret = connect(socket_fd, &server_sin, sizeof(server_sin));
- if (ret)
- error(1, errno, "connect");
+ if (ret) {
+ pr_err("connect");
+ goto err_unbind;
+ }
while (1) {
free(line);
@@ -906,10 +1325,11 @@ static int do_client(struct memory_buffer *mem)
if (max_chunk) {
msg.msg_iovlen =
(line_size + max_chunk - 1) / max_chunk;
- if (msg.msg_iovlen > MAX_IOV)
- error(1, 0,
- "can't partition %zd bytes into maximum of %d chunks",
- line_size, MAX_IOV);
+ if (msg.msg_iovlen > MAX_IOV) {
+ pr_err("can't partition %zd bytes into maximum of %d chunks",
+ line_size, MAX_IOV);
+ goto err_free_line;
+ }
for (int i = 0; i < msg.msg_iovlen; i++) {
iov[i].iov_base = (void *)(i * max_chunk);
@@ -940,34 +1360,40 @@ static int do_client(struct memory_buffer *mem)
*((__u32 *)CMSG_DATA(cmsg)) = ddmabuf;
ret = sendmsg(socket_fd, &msg, MSG_ZEROCOPY);
- if (ret < 0)
- error(1, errno, "Failed sendmsg");
+ if (ret < 0) {
+ pr_err("Failed sendmsg");
+ goto err_free_line;
+ }
fprintf(stderr, "sendmsg_ret=%d\n", ret);
- if (ret != line_size)
- error(1, errno, "Did not send all bytes %d vs %zd", ret,
- line_size);
+ if (ret != line_size) {
+ pr_err("Did not send all bytes %d vs %zd", ret, line_size);
+ goto err_free_line;
+ }
- wait_compl(socket_fd);
+ if (wait_compl(socket_fd))
+ goto err_free_line;
}
fprintf(stderr, "%s: tx ok\n", TEST_PREFIX);
+ err = 0;
+
+err_free_line:
free(line);
+err_unbind:
+ ynl_sock_destroy(ys);
+err_close_socket:
close(socket_fd);
-
- if (ys)
- ynl_sock_destroy(ys);
-
- return 0;
+ return err;
}
int main(int argc, char *argv[])
{
struct memory_buffer *mem;
int is_server = 0, opt;
- int ret;
+ int ret, err = 1;
while ((opt = getopt(argc, argv, "ls:c:p:v:q:t:f:z:")) != -1) {
switch (opt) {
@@ -1004,8 +1430,10 @@ int main(int argc, char *argv[])
}
}
- if (!ifname)
- error(1, 0, "Missing -f argument\n");
+ if (!ifname) {
+ pr_err("Missing -f argument");
+ return 1;
+ }
ifindex = if_nametoindex(ifname);
@@ -1014,33 +1442,41 @@ int main(int argc, char *argv[])
if (!server_ip && !client_ip) {
if (start_queue < 0 && num_queues < 0) {
num_queues = rxq_num(ifindex);
- if (num_queues < 0)
- error(1, 0, "couldn't detect number of queues\n");
- if (num_queues < 2)
- error(1, 0,
- "number of device queues is too low\n");
+ if (num_queues < 0) {
+ pr_err("couldn't detect number of queues");
+ return 1;
+ }
+ if (num_queues < 2) {
+ pr_err("number of device queues is too low");
+ return 1;
+ }
/* make sure can bind to multiple queues */
start_queue = num_queues / 2;
num_queues /= 2;
}
- if (start_queue < 0 || num_queues < 0)
- error(1, 0, "Both -t and -q are required\n");
+ if (start_queue < 0 || num_queues < 0) {
+ pr_err("Both -t and -q are required");
+ return 1;
+ }
- run_devmem_tests();
- return 0;
+ return run_devmem_tests();
}
if (start_queue < 0 && num_queues < 0) {
num_queues = rxq_num(ifindex);
- if (num_queues < 2)
- error(1, 0, "number of device queues is too low\n");
+ if (num_queues < 2) {
+ pr_err("number of device queues is too low");
+ return 1;
+ }
num_queues = 1;
start_queue = rxq_num(ifindex) - num_queues;
- if (start_queue < 0)
- error(1, 0, "couldn't detect number of queues\n");
+ if (start_queue < 0) {
+ pr_err("couldn't detect number of queues");
+ return 1;
+ }
fprintf(stderr, "using queues %d..%d\n", start_queue, start_queue + num_queues);
}
@@ -1048,21 +1484,39 @@ int main(int argc, char *argv[])
for (; optind < argc; optind++)
fprintf(stderr, "extra arguments: %s\n", argv[optind]);
- if (start_queue < 0)
- error(1, 0, "Missing -t argument\n");
+ if (start_queue < 0) {
+ pr_err("Missing -t argument");
+ return 1;
+ }
- if (num_queues < 0)
- error(1, 0, "Missing -q argument\n");
+ if (num_queues < 0) {
+ pr_err("Missing -q argument");
+ return 1;
+ }
- if (!server_ip)
- error(1, 0, "Missing -s argument\n");
+ if (!server_ip) {
+ pr_err("Missing -s argument");
+ return 1;
+ }
- if (!port)
- error(1, 0, "Missing -p argument\n");
+ if (!port) {
+ pr_err("Missing -p argument");
+ return 1;
+ }
mem = provider->alloc(getpagesize() * NUM_PAGES);
+ if (!mem) {
+ pr_err("Failed to allocate memory buffer");
+ return 1;
+ }
+
ret = is_server ? do_server(mem) : do_client(mem);
- provider->free(mem);
+ if (ret)
+ goto err_free_mem;
- return ret;
+ err = 0;
+
+err_free_mem:
+ provider->free(mem);
+ return err;
}
diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py
index 7bb552f8b182..ed7e405682f0 100755
--- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py
+++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py
@@ -118,7 +118,7 @@ def test_rss_key_indir(cfg):
qcnt = len(_get_rx_cnts(cfg))
if qcnt < 3:
- KsftSkipEx("Device has fewer than 3 queues (or doesn't support queue stats)")
+ raise KsftSkipEx("Device has fewer than 3 queues (or doesn't support queue stats)")
data = get_rss(cfg)
want_keys = ['rss-hash-key', 'rss-hash-function', 'rss-indirection-table']
@@ -178,8 +178,13 @@ def test_rss_key_indir(cfg):
cnts = _get_rx_cnts(cfg)
GenerateTraffic(cfg).wait_pkts_and_stop(20000)
cnts = _get_rx_cnts(cfg, prev=cnts)
- # First two queues get less traffic than all the rest
- ksft_lt(sum(cnts[:2]), sum(cnts[2:]), "traffic distributed: " + str(cnts))
+ if qcnt > 4:
+ # First two queues get less traffic than all the rest
+ ksft_lt(sum(cnts[:2]), sum(cnts[2:]),
+ "traffic distributed: " + str(cnts))
+ else:
+ # When queue count is low make sure third queue got significant pkts
+ ksft_ge(cnts[2], 3500, "traffic distributed: " + str(cnts))
def test_rss_queue_reconfigure(cfg, main_ctx=True):
@@ -335,19 +340,20 @@ def test_hitless_key_update(cfg):
data = get_rss(cfg)
key_len = len(data['rss-hash-key'])
- key = _rss_key_rand(key_len)
+ ethnl = EthtoolFamily()
+ key = random.randbytes(key_len)
tgen = GenerateTraffic(cfg)
try:
errors0, carrier0 = get_drop_err_sum(cfg)
t0 = datetime.datetime.now()
- ethtool(f"-X {cfg.ifname} hkey " + _rss_key_str(key))
+ ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, "hkey": key})
t1 = datetime.datetime.now()
errors1, carrier1 = get_drop_err_sum(cfg)
finally:
tgen.wait_pkts_and_stop(5000)
- ksft_lt((t1 - t0).total_seconds(), 0.2)
+ ksft_lt((t1 - t0).total_seconds(), 0.15)
ksft_eq(errors1 - errors1, 0)
ksft_eq(carrier1 - carrier0, 0)
diff --git a/tools/testing/selftests/drivers/net/hw/rss_flow_label.py b/tools/testing/selftests/drivers/net/hw/rss_flow_label.py
new file mode 100755
index 000000000000..6fa95fe27c47
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/rss_flow_label.py
@@ -0,0 +1,167 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Tests for RSS hashing on IPv6 Flow Label.
+"""
+
+import glob
+import os
+import socket
+from lib.py import CmdExitFailure
+from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge, ksft_in, \
+ ksft_not_in, ksft_raises, KsftSkipEx
+from lib.py import bkg, cmd, defer, fd_read_timeout, rand_port
+from lib.py import NetDrvEpEnv
+
+
+def _check_system(cfg):
+ if not hasattr(socket, "SO_INCOMING_CPU"):
+ raise KsftSkipEx("socket.SO_INCOMING_CPU was added in Python 3.11")
+
+ qcnt = len(glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*"))
+ if qcnt < 2:
+ raise KsftSkipEx(f"Local has only {qcnt} queues")
+
+ for f in [f"/sys/class/net/{cfg.ifname}/queues/rx-0/rps_flow_cnt",
+ f"/sys/class/net/{cfg.ifname}/queues/rx-0/rps_cpus"]:
+ try:
+ with open(f, 'r') as fp:
+ setting = fp.read().strip()
+ # CPU mask will be zeros and commas
+ if setting.replace("0", "").replace(",", ""):
+ raise KsftSkipEx(f"RPS/RFS is configured: {f}: {setting}")
+ except FileNotFoundError:
+ pass
+
+ # 1 is the default, if someone changed it we probably shouldn"t mess with it
+ af = cmd("cat /proc/sys/net/ipv6/auto_flowlabels", host=cfg.remote).stdout
+ if af.strip() != "1":
+ raise KsftSkipEx("Remote does not have auto_flowlabels enabled")
+
+
+def _ethtool_get_cfg(cfg, fl_type):
+ descr = cmd(f"ethtool -n {cfg.ifname} rx-flow-hash {fl_type}").stdout
+
+ converter = {
+ "IP SA": "s",
+ "IP DA": "d",
+ "L3 proto": "t",
+ "L4 bytes 0 & 1 [TCP/UDP src port]": "f",
+ "L4 bytes 2 & 3 [TCP/UDP dst port]": "n",
+ "IPv6 Flow Label": "l",
+ }
+
+ ret = ""
+ for line in descr.split("\n")[1:-2]:
+ # if this raises we probably need to add more keys to converter above
+ ret += converter[line]
+ return ret
+
+
+def _traffic(cfg, one_sock, one_cpu):
+ local_port = rand_port(socket.SOCK_DGRAM)
+ remote_port = rand_port(socket.SOCK_DGRAM)
+
+ sock = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM)
+ sock.bind(("", local_port))
+ sock.connect((cfg.remote_addr_v["6"], 0))
+ if one_sock:
+ send = f"exec 5<>/dev/udp/{cfg.addr_v['6']}/{local_port}; " \
+ "for i in `seq 20`; do echo a >&5; sleep 0.02; done; exec 5>&-"
+ else:
+ send = "for i in `seq 20`; do echo a | socat -t0.02 - UDP6:" \
+ f"[{cfg.addr_v['6']}]:{local_port},sourceport={remote_port}; done"
+
+ cpus = set()
+ with bkg(send, shell=True, host=cfg.remote, exit_wait=True):
+ for _ in range(20):
+ fd_read_timeout(sock.fileno(), 1)
+ cpu = sock.getsockopt(socket.SOL_SOCKET, socket.SO_INCOMING_CPU)
+ cpus.add(cpu)
+
+ if one_cpu:
+ ksft_eq(len(cpus), 1,
+ f"{one_sock=} - expected one CPU, got traffic on: {cpus=}")
+ else:
+ ksft_ge(len(cpus), 2,
+ f"{one_sock=} - expected many CPUs, got traffic on: {cpus=}")
+
+
+def test_rss_flow_label(cfg):
+ """
+ Test hashing on IPv6 flow label. Send traffic over a single socket
+ and over multiple sockets. Depend on the remote having auto-label
+ enabled so that it randomizes the label per socket.
+ """
+
+ cfg.require_ipver("6")
+ cfg.require_cmd("socat", remote=True)
+ _check_system(cfg)
+
+ # Enable flow label hashing for UDP6
+ initial = _ethtool_get_cfg(cfg, "udp6")
+ no_lbl = initial.replace("l", "")
+ if "l" not in initial:
+ try:
+ cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 l{no_lbl}")
+ except CmdExitFailure as exc:
+ raise KsftSkipEx("Device doesn't support Flow Label for UDP6") from exc
+
+ defer(cmd, f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {initial}")
+
+ _traffic(cfg, one_sock=True, one_cpu=True)
+ _traffic(cfg, one_sock=False, one_cpu=False)
+
+ # Disable it, we should see no hashing (reset was already defer()ed)
+ cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {no_lbl}")
+
+ _traffic(cfg, one_sock=False, one_cpu=True)
+
+
+def _check_v4_flow_types(cfg):
+ for fl_type in ["tcp4", "udp4", "ah4", "esp4", "sctp4"]:
+ try:
+ cur = cmd(f"ethtool -n {cfg.ifname} rx-flow-hash {fl_type}").stdout
+ ksft_not_in("Flow Label", cur,
+ comment=f"{fl_type=} has Flow Label:" + cur)
+ except CmdExitFailure:
+ # Probably does not support this flow type
+ pass
+
+
+def test_rss_flow_label_6only(cfg):
+ """
+ Test interactions with IPv4 flow types. It should not be possible to set
+ IPv6 Flow Label hashing for an IPv4 flow type. The Flow Label should also
+ not appear in the IPv4 "current config".
+ """
+
+ with ksft_raises(CmdExitFailure) as cm:
+ cmd(f"ethtool -N {cfg.ifname} rx-flow-hash tcp4 sdfnl")
+ ksft_in("Invalid argument", cm.exception.cmd.stderr)
+
+ _check_v4_flow_types(cfg)
+
+ # Try to enable Flow Labels and check again, in case it leaks thru
+ initial = _ethtool_get_cfg(cfg, "udp6")
+ changed = initial.replace("l", "") if "l" in initial else initial + "l"
+
+ cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {changed}")
+ restore = defer(cmd, f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {initial}")
+
+ _check_v4_flow_types(cfg)
+ restore.exec()
+ _check_v4_flow_types(cfg)
+
+
+def main() -> None:
+ with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
+ ksft_run([test_rss_flow_label,
+ test_rss_flow_label_6only],
+ args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/tso.py b/tools/testing/selftests/drivers/net/hw/tso.py
index c13dd5efa27a..0998e68ebaf0 100755
--- a/tools/testing/selftests/drivers/net/hw/tso.py
+++ b/tools/testing/selftests/drivers/net/hw/tso.py
@@ -60,16 +60,17 @@ def run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso):
sock_wait_drain(sock)
qstat_new = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
- # No math behind the 10 here, but try to catch cases where
- # TCP falls back to non-LSO.
- ksft_lt(tcp_sock_get_retrans(sock), 10)
- sock.close()
-
# Check that at least 90% of the data was sent as LSO packets.
# System noise may cause false negatives. Also header overheads
# will add up to 5% of extra packes... The check is best effort.
total_lso_wire = len(buf) * 0.90 // cfg.dev["mtu"]
total_lso_super = len(buf) * 0.90 // cfg.dev["tso_max_size"]
+
+ # Make sure we have order of magnitude more LSO packets than
+ # retransmits, in case TCP retransmitted all the LSO packets.
+ ksft_lt(tcp_sock_get_retrans(sock), total_lso_wire / 4)
+ sock.close()
+
if should_lso:
if cfg.have_stat_super_count:
ksft_ge(qstat_new['tx-hw-gso-packets'] -
diff --git a/tools/testing/selftests/drivers/net/lib/py/__init__.py b/tools/testing/selftests/drivers/net/lib/py/__init__.py
index 8711c67ad658..a07b56a75c8a 100644
--- a/tools/testing/selftests/drivers/net/lib/py/__init__.py
+++ b/tools/testing/selftests/drivers/net/lib/py/__init__.py
@@ -15,7 +15,7 @@ try:
NlError, RtnlFamily, DevlinkFamily
from net.lib.py import CmdExitFailure
from net.lib.py import bkg, cmd, bpftool, bpftrace, defer, ethtool, \
- fd_read_timeout, ip, rand_port, tool, wait_port_listen
+ fd_read_timeout, ip, rand_port, tool, wait_port_listen, wait_file
from net.lib.py import fd_read_timeout
from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx
from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \
diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py
index 1b8bd648048f..c1f3b608c6d8 100644
--- a/tools/testing/selftests/drivers/net/lib/py/env.py
+++ b/tools/testing/selftests/drivers/net/lib/py/env.py
@@ -4,7 +4,7 @@ import os
import time
from pathlib import Path
from lib.py import KsftSkipEx, KsftXfailEx
-from lib.py import ksft_setup
+from lib.py import ksft_setup, wait_file
from lib.py import cmd, ethtool, ip, CmdExitFailure
from lib.py import NetNS, NetdevSimDev
from .remote import Remote
@@ -25,6 +25,9 @@ class NetDrvEnvBase:
self.env = self._load_env_file()
+ # Following attrs must be set be inheriting classes
+ self.dev = None
+
def _load_env_file(self):
env = os.environ.copy()
@@ -48,6 +51,22 @@ class NetDrvEnvBase:
env[pair[0]] = pair[1]
return ksft_setup(env)
+ def __del__(self):
+ pass
+
+ def __enter__(self):
+ ip(f"link set dev {self.dev['ifname']} up")
+ wait_file(f"/sys/class/net/{self.dev['ifname']}/carrier",
+ lambda x: x.strip() == "1")
+
+ return self
+
+ def __exit__(self, ex_type, ex_value, ex_tb):
+ """
+ __exit__ gets called at the end of a "with" block.
+ """
+ self.__del__()
+
class NetDrvEnv(NetDrvEnvBase):
"""
@@ -72,17 +91,6 @@ class NetDrvEnv(NetDrvEnvBase):
self.ifname = self.dev['ifname']
self.ifindex = self.dev['ifindex']
- def __enter__(self):
- ip(f"link set dev {self.dev['ifname']} up")
-
- return self
-
- def __exit__(self, ex_type, ex_value, ex_tb):
- """
- __exit__ gets called at the end of a "with" block.
- """
- self.__del__()
-
def __del__(self):
if self._ns:
self._ns.remove()
@@ -219,15 +227,6 @@ class NetDrvEpEnv(NetDrvEnvBase):
raise Exception("Can't resolve remote interface name, multiple interfaces match")
return v6[0]["ifname"] if v6 else v4[0]["ifname"]
- def __enter__(self):
- return self
-
- def __exit__(self, ex_type, ex_value, ex_tb):
- """
- __exit__ gets called at the end of a "with" block.
- """
- self.__del__()
-
def __del__(self):
if self._ns:
self._ns.remove()
diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
index b6071e80ebbb..8e1085e89647 100644
--- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
+++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
@@ -148,12 +148,20 @@ function create_dynamic_target() {
# Generate the command line argument for netconsole following:
# netconsole=[+][src-port]@[src-ip]/[<dev>],[tgt-port]@<tgt-ip>/[tgt-macaddr]
function create_cmdline_str() {
+ local BINDMODE=${1:-"ifname"}
+ if [ "${BINDMODE}" == "ifname" ]
+ then
+ SRCDEV=${SRCIF}
+ else
+ SRCDEV=$(mac_get "${SRCIF}")
+ fi
+
DSTMAC=$(ip netns exec "${NAMESPACE}" \
ip link show "${DSTIF}" | awk '/ether/ {print $2}')
SRCPORT="1514"
TGTPORT="6666"
- echo "netconsole=\"+${SRCPORT}@${SRCIP}/${SRCIF},${TGTPORT}@${DSTIP}/${DSTMAC}\""
+ echo "netconsole=\"+${SRCPORT}@${SRCIP}/${SRCDEV},${TGTPORT}@${DSTIP}/${DSTMAC}\""
}
# Do not append the release to the header of the message
diff --git a/tools/testing/selftests/drivers/net/napi_threaded.py b/tools/testing/selftests/drivers/net/napi_threaded.py
index 9699a100a87d..f4be72b2145a 100755
--- a/tools/testing/selftests/drivers/net/napi_threaded.py
+++ b/tools/testing/selftests/drivers/net/napi_threaded.py
@@ -24,7 +24,8 @@ def _assert_napi_threaded_disabled(nl, napi_id) -> None:
def _set_threaded_state(cfg, threaded) -> None:
- cmd(f"echo {threaded} > /sys/class/net/{cfg.ifname}/threaded")
+ with open(f"/sys/class/net/{cfg.ifname}/threaded", "wb") as fp:
+ fp.write(str(threaded).encode('utf-8'))
def _setup_deferred_cleanup(cfg) -> None:
@@ -38,6 +39,34 @@ def _setup_deferred_cleanup(cfg) -> None:
return combined
+def napi_init(cfg, nl) -> None:
+ """
+ Test that threaded state (in the persistent NAPI config) gets updated
+ even when NAPI with given ID is not allocated at the time.
+ """
+
+ qcnt = _setup_deferred_cleanup(cfg)
+
+ _set_threaded_state(cfg, 1)
+ cmd(f"ethtool -L {cfg.ifname} combined 1")
+ _set_threaded_state(cfg, 0)
+ cmd(f"ethtool -L {cfg.ifname} combined {qcnt}")
+
+ napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True)
+ for napi in napis:
+ ksft_eq(napi['threaded'], 'disabled')
+ ksft_eq(napi.get('pid'), None)
+
+ cmd(f"ethtool -L {cfg.ifname} combined 1")
+ _set_threaded_state(cfg, 1)
+ cmd(f"ethtool -L {cfg.ifname} combined {qcnt}")
+
+ napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True)
+ for napi in napis:
+ ksft_eq(napi['threaded'], 'enabled')
+ ksft_ne(napi.get('pid'), None)
+
+
def enable_dev_threaded_disable_napi_threaded(cfg, nl) -> None:
"""
Test that when napi threaded is enabled at device level and
@@ -103,7 +132,8 @@ def main() -> None:
""" Ksft boiler plate main """
with NetDrvEnv(__file__, queue_count=2) as cfg:
- ksft_run([change_num_queues,
+ ksft_run([napi_init,
+ change_num_queues,
enable_dev_threaded_disable_napi_threaded],
args=(cfg, NetdevFamily()))
ksft_exit()
diff --git a/tools/testing/selftests/drivers/net/netcons_cmdline.sh b/tools/testing/selftests/drivers/net/netcons_cmdline.sh
index ad2fb8b1c463..d1d23dc67f99 100755
--- a/tools/testing/selftests/drivers/net/netcons_cmdline.sh
+++ b/tools/testing/selftests/drivers/net/netcons_cmdline.sh
@@ -19,9 +19,6 @@ check_netconsole_module
modprobe netdevsim 2> /dev/null || true
rmmod netconsole 2> /dev/null || true
-# The content of kmsg will be save to the following file
-OUTPUT_FILE="/tmp/${TARGET}"
-
# Check for basic system dependency and exit if not found
# check_for_dependencies
# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5)
@@ -30,23 +27,39 @@ echo "6 5" > /proc/sys/kernel/printk
trap do_cleanup EXIT
# Create one namespace and two interfaces
set_network
-# Create the command line for netconsole, with the configuration from the
-# function above
-CMDLINE="$(create_cmdline_str)"
-
-# Load the module, with the cmdline set
-modprobe netconsole "${CMDLINE}"
-
-# Listed for netconsole port inside the namespace and destination interface
-listen_port_and_save_to "${OUTPUT_FILE}" &
-# Wait for socat to start and listen to the port.
-wait_local_port_listen "${NAMESPACE}" "${PORT}" udp
-# Send the message
-echo "${MSG}: ${TARGET}" > /dev/kmsg
-# Wait until socat saves the file to disk
-busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
-# Make sure the message was received in the dst part
-# and exit
-validate_msg "${OUTPUT_FILE}"
+
+# Run the test twice, with different cmdline parameters
+for BINDMODE in "ifname" "mac"
+do
+ echo "Running with bind mode: ${BINDMODE}" >&2
+ # Create the command line for netconsole, with the configuration from
+ # the function above
+ CMDLINE=$(create_cmdline_str "${BINDMODE}")
+
+ # The content of kmsg will be save to the following file
+ OUTPUT_FILE="/tmp/${TARGET}-${BINDMODE}"
+
+ # Load the module, with the cmdline set
+ modprobe netconsole "${CMDLINE}"
+
+ # Listed for netconsole port inside the namespace and destination
+ # interface
+ listen_port_and_save_to "${OUTPUT_FILE}" &
+ # Wait for socat to start and listen to the port.
+ wait_local_port_listen "${NAMESPACE}" "${PORT}" udp
+ # Send the message
+ echo "${MSG}: ${TARGET}" > /dev/kmsg
+ # Wait until socat saves the file to disk
+ busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
+ # Make sure the message was received in the dst part
+ # and exit
+ validate_msg "${OUTPUT_FILE}"
+
+ # kill socat in case it is still running
+ pkill_socat
+ # Unload the module
+ rmmod netconsole
+ echo "${BINDMODE} : Test passed" >&2
+done
exit "${ksft_pass}"
diff --git a/tools/testing/selftests/drivers/net/xdp.py b/tools/testing/selftests/drivers/net/xdp.py
index 1dd8bf3bf6c9..08fea4230759 100755
--- a/tools/testing/selftests/drivers/net/xdp.py
+++ b/tools/testing/selftests/drivers/net/xdp.py
@@ -112,10 +112,10 @@ def _load_xdp_prog(cfg, bpf_info):
defer(ip, f"link set dev {cfg.remote_ifname} mtu 1500", host=cfg.remote)
cmd(
- f"ip link set dev {cfg.ifname} mtu {bpf_info.mtu} xdp obj {abs_path} sec {bpf_info.xdp_sec}",
+ f"ip link set dev {cfg.ifname} mtu {bpf_info.mtu} xdpdrv obj {abs_path} sec {bpf_info.xdp_sec}",
shell=True
)
- defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off")
+ defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdpdrv off")
xdp_info = ip(f"-d link show dev {cfg.ifname}", json=True)[0]
prog_info["id"] = xdp_info["xdp"]["prog"]["id"]
@@ -290,34 +290,78 @@ def test_xdp_native_drop_mb(cfg):
_test_drop(cfg, bpf_info, 8000)
-def test_xdp_native_tx_mb(cfg):
+def _test_xdp_native_tx(cfg, bpf_info, payload_lens):
"""
- Tests the XDP_TX action for a multi-buff case.
+ Tests the XDP_TX action.
Args:
cfg: Configuration object containing network settings.
+ bpf_info: BPFProgInfo object containing the BPF program metadata.
+ payload_lens: Array of packet lengths to send.
"""
cfg.require_cmd("socat", remote=True)
-
- bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000)
prog_info = _load_xdp_prog(cfg, bpf_info)
port = rand_port()
_set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.TX.value)
_set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port)
- test_string = ''.join(random.choice(string.ascii_lowercase) for _ in range(8000))
- rx_udp = f"socat -{cfg.addr_ipver} -T 2 -u UDP-RECV:{port},reuseport STDOUT"
- tx_udp = f"echo {test_string} | socat -t 2 -u STDIN UDP:{cfg.baddr}:{port}"
+ expected_pkts = 0
+ for payload_len in payload_lens:
+ test_string = "".join(
+ random.choice(string.ascii_lowercase) for _ in range(payload_len)
+ )
+
+ rx_udp = f"socat -{cfg.addr_ipver} -T 2 " + \
+ f"-u UDP-RECV:{port},reuseport STDOUT"
+
+ # Writing zero bytes to stdin gets ignored by socat,
+ # but with the shut-null flag socat generates a zero sized packet
+ # when the socket is closed.
+ tx_cmd_suffix = ",shut-null" if payload_len == 0 else ""
+ tx_udp = f"echo -n {test_string} | socat -t 2 " + \
+ f"-u STDIN UDP:{cfg.baddr}:{port}{tx_cmd_suffix}"
+
+ with bkg(rx_udp, host=cfg.remote, exit_wait=True) as rnc:
+ wait_port_listen(port, proto="udp", host=cfg.remote)
+ cmd(tx_udp, host=cfg.remote, shell=True)
+
+ ksft_eq(rnc.stdout.strip(), test_string, "UDP packet exchange failed")
+
+ expected_pkts += 1
+ stats = _get_stats(prog_info["maps"]["map_xdp_stats"])
+ ksft_eq(stats[XDPStats.RX.value], expected_pkts, "RX stats mismatch")
+ ksft_eq(stats[XDPStats.TX.value], expected_pkts, "TX stats mismatch")
+
+
+def test_xdp_native_tx_sb(cfg):
+ """
+ Tests the XDP_TX action for a single-buff case.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ """
+ bpf_info = BPFProgInfo("xdp_prog", "xdp_native.bpf.o", "xdp", 1500)
+
+ # Ensure there's enough room for an ETH / IP / UDP header
+ pkt_hdr_len = 42 if cfg.addr_ipver == "4" else 62
- with bkg(rx_udp, host=cfg.remote, exit_wait=True) as rnc:
- wait_port_listen(port, proto="udp", host=cfg.remote)
- cmd(tx_udp, host=cfg.remote, shell=True)
+ _test_xdp_native_tx(cfg, bpf_info, [0, 1500 // 2, 1500 - pkt_hdr_len])
- stats = _get_stats(prog_info['maps']['map_xdp_stats'])
- ksft_eq(rnc.stdout.strip(), test_string, "UDP packet exchange failed")
- ksft_eq(stats[XDPStats.TX.value], 1, "TX stats mismatch")
+def test_xdp_native_tx_mb(cfg):
+ """
+ Tests the XDP_TX action for a multi-buff case.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ """
+ bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o",
+ "xdp.frags", 9000)
+ # The first packet ensures we exercise the fragmented code path.
+ # And the subsequent 0-sized packet ensures the driver
+ # reinitializes xdp_buff correctly.
+ _test_xdp_native_tx(cfg, bpf_info, [8000, 0])
def _validate_res(res, offset_lst, pkt_sz_lst):
@@ -644,6 +688,7 @@ def main():
test_xdp_native_pass_mb,
test_xdp_native_drop_sb,
test_xdp_native_drop_mb,
+ test_xdp_native_tx_sb,
test_xdp_native_tx_mb,
test_xdp_native_adjst_tail_grow_data,
test_xdp_native_adjst_tail_shrnk_data,
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index c7e03e1d6f63..9926a14fd279 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -118,6 +118,7 @@ TEST_GEN_FILES += tfo
TEST_PROGS += tfo_passive.sh
TEST_PROGS += broadcast_pmtu.sh
TEST_PROGS += ipv6_force_forwarding.sh
+TEST_PROGS += route_hint.sh
# YNL files, must be before "include ..lib.mk"
YNL_GEN_FILES := busy_poller netlink-dumps
diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile
index a4b61c6d0290..0a20c98bbcfd 100644
--- a/tools/testing/selftests/net/af_unix/Makefile
+++ b/tools/testing/selftests/net/af_unix/Makefile
@@ -1,4 +1,4 @@
-CFLAGS += $(KHDR_INCLUDES)
+CFLAGS += $(KHDR_INCLUDES) -Wall -Wflex-array-member-not-at-end
TEST_GEN_PROGS := diag_uid msg_oob scm_inq scm_pidfd scm_rights unix_connect
include ../../lib.mk
diff --git a/tools/testing/selftests/net/af_unix/scm_inq.c b/tools/testing/selftests/net/af_unix/scm_inq.c
index 9d22561e7b8f..fc467714387e 100644
--- a/tools/testing/selftests/net/af_unix/scm_inq.c
+++ b/tools/testing/selftests/net/af_unix/scm_inq.c
@@ -11,11 +11,6 @@
#define NR_CHUNKS 100
#define MSG_LEN 256
-struct scm_inq {
- struct cmsghdr cmsghdr;
- int inq;
-};
-
FIXTURE(scm_inq)
{
int fd[2];
@@ -70,35 +65,38 @@ static void send_chunks(struct __test_metadata *_metadata,
static void recv_chunks(struct __test_metadata *_metadata,
FIXTURE_DATA(scm_inq) *self)
{
+ char cmsg_buf[CMSG_SPACE(sizeof(int))];
struct msghdr msg = {};
struct iovec iov = {};
- struct scm_inq cmsg;
+ struct cmsghdr *cmsg;
char buf[MSG_LEN];
int i, ret;
int inq;
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
- msg.msg_control = &cmsg;
- msg.msg_controllen = CMSG_SPACE(sizeof(cmsg.inq));
+ msg.msg_control = cmsg_buf;
+ msg.msg_controllen = sizeof(cmsg_buf);
iov.iov_base = buf;
iov.iov_len = sizeof(buf);
for (i = 0; i < NR_CHUNKS; i++) {
memset(buf, 0, sizeof(buf));
- memset(&cmsg, 0, sizeof(cmsg));
+ memset(cmsg_buf, 0, sizeof(cmsg_buf));
ret = recvmsg(self->fd[1], &msg, 0);
ASSERT_EQ(MSG_LEN, ret);
- ASSERT_NE(NULL, CMSG_FIRSTHDR(&msg));
- ASSERT_EQ(CMSG_LEN(sizeof(cmsg.inq)), cmsg.cmsghdr.cmsg_len);
- ASSERT_EQ(SOL_SOCKET, cmsg.cmsghdr.cmsg_level);
- ASSERT_EQ(SCM_INQ, cmsg.cmsghdr.cmsg_type);
+
+ cmsg = CMSG_FIRSTHDR(&msg);
+ ASSERT_NE(NULL, cmsg);
+ ASSERT_EQ(CMSG_LEN(sizeof(int)), cmsg->cmsg_len);
+ ASSERT_EQ(SOL_SOCKET, cmsg->cmsg_level);
+ ASSERT_EQ(SCM_INQ, cmsg->cmsg_type);
ret = ioctl(self->fd[1], SIOCINQ, &inq);
ASSERT_EQ(0, ret);
- ASSERT_EQ(cmsg.inq, inq);
+ ASSERT_EQ(*(int *)CMSG_DATA(cmsg), inq);
}
}
diff --git a/tools/testing/selftests/net/af_unix/scm_pidfd.c b/tools/testing/selftests/net/af_unix/scm_pidfd.c
index 37e034874034..ef2921988e5f 100644
--- a/tools/testing/selftests/net/af_unix/scm_pidfd.c
+++ b/tools/testing/selftests/net/af_unix/scm_pidfd.c
@@ -137,7 +137,6 @@ struct cmsg_data {
static int parse_cmsg(struct msghdr *msg, struct cmsg_data *res)
{
struct cmsghdr *cmsg;
- int data = 0;
if (msg->msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
log_err("recvmsg: truncated");
@@ -243,7 +242,6 @@ static int cmsg_check_dead(int fd, int expected_pid)
int data = 0;
char control[CMSG_SPACE(sizeof(struct ucred)) +
CMSG_SPACE(sizeof(int))] = { 0 };
- pid_t client_pid;
struct pidfd_info info = {
.mask = PIDFD_INFO_EXIT,
};
diff --git a/tools/testing/selftests/net/af_unix/scm_rights.c b/tools/testing/selftests/net/af_unix/scm_rights.c
index 8b015f16c03d..914f99d153ce 100644
--- a/tools/testing/selftests/net/af_unix/scm_rights.c
+++ b/tools/testing/selftests/net/af_unix/scm_rights.c
@@ -271,20 +271,11 @@ void __send_fd(struct __test_metadata *_metadata,
{
#define MSG "x"
#define MSGLEN 1
- struct {
- struct cmsghdr cmsghdr;
- int fd[2];
- } cmsg = {
- .cmsghdr = {
- .cmsg_len = CMSG_LEN(sizeof(cmsg.fd)),
- .cmsg_level = SOL_SOCKET,
- .cmsg_type = SCM_RIGHTS,
- },
- .fd = {
- self->fd[inflight * 2],
- self->fd[inflight * 2],
- },
+ int fds[2] = {
+ self->fd[inflight * 2],
+ self->fd[inflight * 2],
};
+ char cmsg_buf[CMSG_SPACE(sizeof(fds))];
struct iovec iov = {
.iov_base = MSG,
.iov_len = MSGLEN,
@@ -294,11 +285,18 @@ void __send_fd(struct __test_metadata *_metadata,
.msg_namelen = 0,
.msg_iov = &iov,
.msg_iovlen = 1,
- .msg_control = &cmsg,
- .msg_controllen = CMSG_SPACE(sizeof(cmsg.fd)),
+ .msg_control = cmsg_buf,
+ .msg_controllen = sizeof(cmsg_buf),
};
+ struct cmsghdr *cmsg;
int ret;
+ cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(fds));
+ memcpy(CMSG_DATA(cmsg), fds, sizeof(fds));
+
ret = sendmsg(self->fd[receiver * 2 + 1], &msg, variant->flags);
if (variant->disabled) {
diff --git a/tools/testing/selftests/net/bpf_offload.py b/tools/testing/selftests/net/bpf_offload.py
index b2c271b79240..c856d266c8f3 100755
--- a/tools/testing/selftests/net/bpf_offload.py
+++ b/tools/testing/selftests/net/bpf_offload.py
@@ -184,8 +184,8 @@ def bpftool_prog_list(expected=None, ns="", exclude_orphaned=True):
progs = [ p for p in progs if not p['orphaned'] ]
if expected is not None:
if len(progs) != expected:
- fail(True, "%d BPF programs loaded, expected %d" %
- (len(progs), expected))
+ fail(True, "%d BPF programs loaded, expected %d\nLoaded Progs:\n%s" %
+ (len(progs), expected, pp.pformat(progs)))
return progs
def bpftool_map_list(expected=None, ns=""):
diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c
index a825e628aee7..ded9b925865e 100644
--- a/tools/testing/selftests/net/cmsg_sender.c
+++ b/tools/testing/selftests/net/cmsg_sender.c
@@ -491,7 +491,8 @@ int main(int argc, char *argv[])
if (err) {
fprintf(stderr, "Can't resolve address [%s]:%s\n",
opt.host, opt.service);
- return ERN_SOCK_CREATE;
+ err = ERN_SOCK_CREATE;
+ goto err_free_buff;
}
if (ai->ai_family == AF_INET6 && opt.sock.proto == IPPROTO_ICMP)
@@ -500,8 +501,8 @@ int main(int argc, char *argv[])
fd = socket(ai->ai_family, opt.sock.type, opt.sock.proto);
if (fd < 0) {
fprintf(stderr, "Can't open socket: %s\n", strerror(errno));
- freeaddrinfo(ai);
- return ERN_RESOLVE;
+ err = ERN_RESOLVE;
+ goto err_free_info;
}
if (opt.sock.proto == IPPROTO_ICMP) {
@@ -574,6 +575,9 @@ int main(int argc, char *argv[])
err_out:
close(fd);
+err_free_info:
freeaddrinfo(ai);
+err_free_buff:
+ free(buf);
return err;
}
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index c24417d0047b..d548611e2698 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -26,6 +26,7 @@ CONFIG_IFB=y
CONFIG_INET_DIAG=y
CONFIG_INET_ESP=y
CONFIG_INET_ESP_OFFLOAD=y
+CONFIG_CRYPTO_SHA1=y
CONFIG_NET_FOU=y
CONFIG_NET_FOU_IP_TUNNELS=y
CONFIG_NETFILTER=y
diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index d7bb2e80e88c..0a0d4c2a85f7 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -1,6 +1,8 @@
# SPDX-License-Identifier: GPL-2.0+ OR MIT
-TEST_PROGS = bridge_fdb_learning_limit.sh \
+TEST_PROGS = \
+ bridge_activity_notify.sh \
+ bridge_fdb_learning_limit.sh \
bridge_igmp.sh \
bridge_locked_port.sh \
bridge_mdb.sh \
diff --git a/tools/testing/selftests/net/forwarding/bridge_activity_notify.sh b/tools/testing/selftests/net/forwarding/bridge_activity_notify.sh
new file mode 100755
index 000000000000..a20ef4bd310b
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_activity_notify.sh
@@ -0,0 +1,173 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------+ +------------------------+
+# | H1 (vrf) | | H2 (vrf) |
+# | 192.0.2.1/28 | | 192.0.2.2/28 |
+# | + $h1 | | + $h2 |
+# +----|------------------+ +----|-------------------+
+# | |
+# +----|--------------------------------------------------|-------------------+
+# | SW | | |
+# | +--|--------------------------------------------------|-----------------+ |
+# | | + $swp1 BR1 (802.1d) + $swp2 | |
+# | | | |
+# | +-----------------------------------------------------------------------+ |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+ new_inactive_test
+ existing_active_test
+ norefresh_test
+"
+
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+ simple_if_init "$h1" 192.0.2.1/28
+ defer simple_if_fini "$h1" 192.0.2.1/28
+}
+
+h2_create()
+{
+ simple_if_init "$h2" 192.0.2.2/28
+ defer simple_if_fini "$h2" 192.0.2.2/28
+}
+
+switch_create()
+{
+ ip_link_add br1 type bridge vlan_filtering 0 mcast_snooping 0 \
+ ageing_time "$LOW_AGEING_TIME"
+ ip_link_set_up br1
+
+ ip_link_set_master "$swp1" br1
+ ip_link_set_up "$swp1"
+
+ ip_link_set_master "$swp2" br1
+ ip_link_set_up "$swp2"
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+ defer vrf_cleanup
+
+ h1_create
+ h2_create
+ switch_create
+}
+
+fdb_active_wait()
+{
+ local mac=$1; shift
+
+ bridge -d fdb get "$mac" br br1 | grep -q -v "inactive"
+}
+
+fdb_inactive_wait()
+{
+ local mac=$1; shift
+
+ bridge -d fdb get "$mac" br br1 | grep -q "inactive"
+}
+
+new_inactive_test()
+{
+ local mac="00:11:22:33:44:55"
+
+ # Add a new FDB entry as static and inactive and check that it
+ # becomes active upon traffic.
+ RET=0
+
+ bridge fdb add "$mac" dev "$swp1" master static activity_notify inactive
+ bridge -d fdb get "$mac" br br1 | grep -q "inactive"
+ check_err $? "FDB entry not present as \"inactive\" when should"
+
+ $MZ "$h1" -c 1 -p 64 -a "$mac" -b bcast -t ip -q
+
+ busywait "$BUSYWAIT_TIMEOUT" fdb_active_wait "$mac"
+ check_err $? "FDB entry present as \"inactive\" when should not"
+
+ log_test "Transition from inactive to active"
+
+ bridge fdb del "$mac" dev "$swp1" master
+}
+
+existing_active_test()
+{
+ local mac="00:11:22:33:44:55"
+ local ageing_time
+
+ # Enable activity notifications on an existing dynamic FDB entry and
+ # check that it becomes inactive after the ageing time passed.
+ RET=0
+
+ bridge fdb add "$mac" dev "$swp1" master dynamic
+ bridge fdb replace "$mac" dev "$swp1" master static activity_notify norefresh
+
+ bridge -d fdb get "$mac" br br1 | grep -q "activity_notify"
+ check_err $? "FDB entry not present as \"activity_notify\" when should"
+
+ bridge -d fdb get "$mac" br br1 | grep -q "inactive"
+ check_fail $? "FDB entry present as \"inactive\" when should not"
+
+ ageing_time=$(bridge_ageing_time_get br1)
+ slowwait $((ageing_time * 2)) fdb_inactive_wait "$mac"
+ check_err $? "FDB entry not present as \"inactive\" when should"
+
+ log_test "Transition from active to inactive"
+
+ bridge fdb del "$mac" dev "$swp1" master
+}
+
+norefresh_test()
+{
+ local mac="00:11:22:33:44:55"
+ local updated_time
+
+ # Check that the "updated" time is reset when replacing an FDB entry
+ # without the "norefresh" keyword and that it is not reset when
+ # replacing with the "norefresh" keyword.
+ RET=0
+
+ bridge fdb add "$mac" dev "$swp1" master static
+ sleep 1
+
+ bridge fdb replace "$mac" dev "$swp1" master static activity_notify
+ updated_time=$(bridge -d -s -j fdb get "$mac" br br1 | jq '.[]["updated"]')
+ if [[ $updated_time -ne 0 ]]; then
+ check_err 1 "\"updated\" time was not reset when should"
+ fi
+
+ sleep 1
+ bridge fdb replace "$mac" dev "$swp1" master static norefresh
+ updated_time=$(bridge -d -s -j fdb get "$mac" br br1 | jq '.[]["updated"]')
+ if [[ $updated_time -eq 0 ]]; then
+ check_err 1 "\"updated\" time was reset when should not"
+ fi
+
+ log_test "Resetting of \"updated\" time"
+
+ bridge fdb del "$mac" dev "$swp1" master
+}
+
+if ! bridge fdb help 2>&1 | grep -q "activity_notify"; then
+ echo "SKIP: iproute2 too old, missing bridge FDB activity notification control"
+ exit "$ksft_skip"
+fi
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py
index f395c90fb0f1..cb40ecef9456 100644
--- a/tools/testing/selftests/net/lib/py/utils.py
+++ b/tools/testing/selftests/net/lib/py/utils.py
@@ -1,9 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
-import errno
import json as _json
import os
-import random
import re
import select
import socket
@@ -21,17 +19,19 @@ def fd_read_timeout(fd, timeout):
rlist, _, _ = select.select([fd], [], [], timeout)
if rlist:
return os.read(fd, 1024)
- else:
- raise TimeoutError("Timeout waiting for fd read")
+ raise TimeoutError("Timeout waiting for fd read")
class cmd:
"""
Execute a command on local or remote host.
+ @shell defaults to false, and class will try to split @comm into a list
+ if it's a string with spaces.
+
Use bkg() instead to run a command in the background.
"""
- def __init__(self, comm, shell=True, fail=True, ns=None, background=False,
+ def __init__(self, comm, shell=None, fail=True, ns=None, background=False,
host=None, timeout=5, ksft_wait=None):
if ns:
comm = f'ip netns exec {ns} ' + comm
@@ -45,6 +45,10 @@ class cmd:
if host:
self.proc = host.cmd(comm)
else:
+ # If user doesn't explicitly request shell try to avoid it.
+ if shell is None and isinstance(comm, str) and ' ' in comm:
+ comm = comm.split()
+
# ksft_wait lets us wait for the background process to fully start,
# we pass an FD to the child process, and wait for it to write back.
# Similarly term_fd tells child it's time to exit.
@@ -111,12 +115,13 @@ class bkg(cmd):
with bkg("my_binary", ksft_wait=5):
"""
- def __init__(self, comm, shell=True, fail=None, ns=None, host=None,
+ def __init__(self, comm, shell=None, fail=None, ns=None, host=None,
exit_wait=False, ksft_wait=None):
super().__init__(comm, background=True,
shell=shell, fail=fail, ns=ns, host=host,
ksft_wait=ksft_wait)
self.terminate = not exit_wait and not ksft_wait
+ self._exit_wait = exit_wait
self.check_fail = fail
if shell and self.terminate:
@@ -127,7 +132,9 @@ class bkg(cmd):
return self
def __exit__(self, ex_type, ex_value, ex_tb):
- return self.process(terminate=self.terminate, fail=self.check_fail)
+ # Force termination on exception
+ terminate = self.terminate or (self._exit_wait and ex_type)
+ return self.process(terminate=terminate, fail=self.check_fail)
global_defer_queue = []
@@ -135,8 +142,6 @@ global_defer_queue = []
class defer:
def __init__(self, func, *args, **kwargs):
- global global_defer_queue
-
if not callable(func):
raise Exception("defer created with un-callable object, did you call the function instead of passing its name?")
@@ -224,11 +229,11 @@ def bpftrace(expr, json=None, ns=None, host=None, timeout=None):
return cmd_obj
-def rand_port(type=socket.SOCK_STREAM):
+def rand_port(stype=socket.SOCK_STREAM):
"""
Get a random unprivileged port.
"""
- with socket.socket(socket.AF_INET6, type) as s:
+ with socket.socket(socket.AF_INET6, stype) as s:
s.bind(("", 0))
return s.getsockname()[1]
@@ -249,3 +254,21 @@ def wait_port_listen(port, proto="tcp", ns=None, host=None, sleep=0.005, deadlin
if time.monotonic() > end:
raise Exception("Waiting for port listen timed out")
time.sleep(sleep)
+
+
+def wait_file(fname, test_fn, sleep=0.005, deadline=5, encoding='utf-8'):
+ """
+ Wait for file contents on the local system to satisfy a condition.
+ test_fn() should take one argument (file contents) and return whether
+ condition is met.
+ """
+ end = time.monotonic() + deadline
+
+ with open(fname, "r", encoding=encoding) as fp:
+ while True:
+ if test_fn(fp.read()):
+ break
+ fp.seek(0)
+ if time.monotonic() > end:
+ raise TimeoutError("Wait for file contents failed", fname)
+ time.sleep(sleep)
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 82cae37d9c20..2f046167a0b6 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -74,6 +74,17 @@ unset join_create_err
unset join_bind_err
unset join_connect_err
+unset fb_ns1
+unset fb_ns2
+unset fb_infinite_map_tx
+unset fb_dss_corruption
+unset fb_simult_conn
+unset fb_mpc_passive
+unset fb_mpc_active
+unset fb_mpc_data
+unset fb_md5_sig
+unset fb_dss
+
# generated using "nfbpf_compile '(ip && (ip[54] & 0xf0) == 0x30) ||
# (ip6 && (ip6[74] & 0xf0) == 0x30)'"
CBPF_MPTCP_SUBOPTION_ADD_ADDR="14,
@@ -1399,6 +1410,115 @@ chk_join_tx_nr()
print_results "join Tx" ${rc}
}
+chk_fallback_nr()
+{
+ local infinite_map_tx=${fb_infinite_map_tx:-0}
+ local dss_corruption=${fb_dss_corruption:-0}
+ local simult_conn=${fb_simult_conn:-0}
+ local mpc_passive=${fb_mpc_passive:-0}
+ local mpc_active=${fb_mpc_active:-0}
+ local mpc_data=${fb_mpc_data:-0}
+ local md5_sig=${fb_md5_sig:-0}
+ local dss=${fb_dss:-0}
+ local rc=${KSFT_PASS}
+ local ns=$1
+ local count
+
+ count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtInfiniteMapTx")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "$infinite_map_tx" ]; then
+ rc=${KSFT_FAIL}
+ print_check "$ns infinite map tx fallback"
+ fail_test "got $count infinite map tx fallback[s] in $ns expected $infinite_map_tx"
+ fi
+
+ count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtDSSCorruptionFallback")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "$dss_corruption" ]; then
+ rc=${KSFT_FAIL}
+ print_check "$ns dss corruption fallback"
+ fail_test "got $count dss corruption fallback[s] in $ns expected $dss_corruption"
+ fi
+
+ count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtSimultConnectFallback")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "$simult_conn" ]; then
+ rc=${KSFT_FAIL}
+ print_check "$ns simult conn fallback"
+ fail_test "got $count simult conn fallback[s] in $ns expected $simult_conn"
+ fi
+
+ count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableFallbackACK")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "$mpc_passive" ]; then
+ rc=${KSFT_FAIL}
+ print_check "$ns mpc passive fallback"
+ fail_test "got $count mpc passive fallback[s] in $ns expected $mpc_passive"
+ fi
+
+ count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableFallbackSYNACK")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "$mpc_active" ]; then
+ rc=${KSFT_FAIL}
+ print_check "$ns mpc active fallback"
+ fail_test "got $count mpc active fallback[s] in $ns expected $mpc_active"
+ fi
+
+ count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableDataFallback")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "$mpc_data" ]; then
+ rc=${KSFT_FAIL}
+ print_check "$ns mpc data fallback"
+ fail_test "got $count mpc data fallback[s] in $ns expected $mpc_data"
+ fi
+
+ count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMD5SigFallback")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "$md5_sig" ]; then
+ rc=${KSFT_FAIL}
+ print_check "$ns MD5 Sig fallback"
+ fail_test "got $count MD5 Sig fallback[s] in $ns expected $md5_sig"
+ fi
+
+ count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtDssFallback")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "$dss" ]; then
+ rc=${KSFT_FAIL}
+ print_check "$ns dss fallback"
+ fail_test "got $count dss fallback[s] in $ns expected $dss"
+ fi
+
+ return $rc
+}
+
+chk_fallback_nr_all()
+{
+ local netns=("ns1" "ns2")
+ local fb_ns=("fb_ns1" "fb_ns2")
+ local rc=${KSFT_PASS}
+
+ for i in 0 1; do
+ if [ -n "${!fb_ns[i]}" ]; then
+ eval "${!fb_ns[i]}" \
+ chk_fallback_nr ${netns[i]} || rc=${?}
+ else
+ chk_fallback_nr ${netns[i]} || rc=${?}
+ fi
+ done
+
+ if [ "${rc}" != "${KSFT_PASS}" ]; then
+ print_results "fallback" ${rc}
+ fi
+}
+
chk_join_nr()
{
local syn_nr=$1
@@ -1484,6 +1604,8 @@ chk_join_nr()
join_syn_tx="${join_syn_tx:-${syn_nr}}" \
chk_join_tx_nr
+ chk_fallback_nr_all
+
if $validate_checksum; then
chk_csum_nr $csum_ns1 $csum_ns2
chk_fail_nr $fail_nr $fail_nr
@@ -3337,6 +3459,7 @@ fail_tests()
join_csum_ns1=+1 join_csum_ns2=+0 \
join_fail_nr=1 join_rst_nr=0 join_infi_nr=1 \
join_corrupted_pkts="$(pedit_action_pkts)" \
+ fb_ns1="fb_dss=1" fb_ns2="fb_infinite_map_tx=1" \
chk_join_nr 0 0 0
chk_fail_nr 1 -1 invert
fi
diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config
index 79d5b33966ba..305e46b819cb 100644
--- a/tools/testing/selftests/net/netfilter/config
+++ b/tools/testing/selftests/net/netfilter/config
@@ -13,6 +13,7 @@ CONFIG_BRIDGE_VLAN_FILTERING=y
CONFIG_CGROUP_BPF=y
CONFIG_DUMMY=m
CONFIG_INET_ESP=m
+CONFIG_CRYPTO_SHA1=m
CONFIG_IP_NF_MATCH_RPFILTER=m
CONFIG_IP6_NF_MATCH_RPFILTER=m
CONFIG_IP_NF_IPTABLES=m
diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
index 8a0396bfaf99..b521e0dea506 100644
--- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
+++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
@@ -1877,7 +1877,7 @@ class OvsPacket(GenericNetlinkSocket):
elif msg["cmd"] == OvsPacket.OVS_PACKET_CMD_EXECUTE:
up.execute(msg)
else:
- print("Unkonwn cmd: %d" % msg["cmd"])
+ print("Unknown cmd: %d" % msg["cmd"])
except NetlinkError as ne:
raise ne
diff --git a/tools/testing/selftests/net/packetdrill/ksft_runner.sh b/tools/testing/selftests/net/packetdrill/ksft_runner.sh
index a7e790af38ff..0ae6eeeb1a8e 100755
--- a/tools/testing/selftests/net/packetdrill/ksft_runner.sh
+++ b/tools/testing/selftests/net/packetdrill/ksft_runner.sh
@@ -3,21 +3,22 @@
source "$(dirname $(realpath $0))/../../kselftest/ktap_helpers.sh"
-readonly ipv4_args=('--ip_version=ipv4 '
- '--local_ip=192.168.0.1 '
- '--gateway_ip=192.168.0.1 '
- '--netmask_ip=255.255.0.0 '
- '--remote_ip=192.0.2.1 '
- '-D CMSG_LEVEL_IP=SOL_IP '
- '-D CMSG_TYPE_RECVERR=IP_RECVERR ')
-
-readonly ipv6_args=('--ip_version=ipv6 '
- '--mtu=1520 '
- '--local_ip=fd3d:0a0b:17d6::1 '
- '--gateway_ip=fd3d:0a0b:17d6:8888::1 '
- '--remote_ip=fd3d:fa7b:d17d::1 '
- '-D CMSG_LEVEL_IP=SOL_IPV6 '
- '-D CMSG_TYPE_RECVERR=IPV6_RECVERR ')
+declare -A ip_args=(
+ [ipv4]="--ip_version=ipv4
+ --local_ip=192.168.0.1
+ --gateway_ip=192.168.0.1
+ --netmask_ip=255.255.0.0
+ --remote_ip=192.0.2.1
+ -D CMSG_LEVEL_IP=SOL_IP
+ -D CMSG_TYPE_RECVERR=IP_RECVERR"
+ [ipv6]="--ip_version=ipv6
+ --mtu=1520
+ --local_ip=fd3d:0a0b:17d6::1
+ --gateway_ip=fd3d:0a0b:17d6:8888::1
+ --remote_ip=fd3d:fa7b:d17d::1
+ -D CMSG_LEVEL_IP=SOL_IPV6
+ -D CMSG_TYPE_RECVERR=IPV6_RECVERR"
+)
if [ $# -ne 1 ]; then
ktap_exit_fail_msg "usage: $0 <script>"
@@ -38,12 +39,20 @@ if [[ -n "${KSFT_MACHINE_SLOW}" ]]; then
failfunc=ktap_test_xfail
fi
+ip_versions=$(grep -E '^--ip_version=' $script | cut -d '=' -f 2)
+if [[ -z $ip_versions ]]; then
+ ip_versions="ipv4 ipv6"
+elif [[ ! "$ip_versions" =~ ^ipv[46]$ ]]; then
+ ktap_exit_fail_msg "Too many or unsupported --ip_version: $ip_versions"
+ exit "$KSFT_FAIL"
+fi
+
ktap_print_header
ktap_set_plan 2
-unshare -n packetdrill ${ipv4_args[@]} ${optargs[@]} $script > /dev/null \
- && ktap_test_pass "ipv4" || $failfunc "ipv4"
-unshare -n packetdrill ${ipv6_args[@]} ${optargs[@]} $script > /dev/null \
- && ktap_test_pass "ipv6" || $failfunc "ipv6"
+for ip_version in $ip_versions; do
+ unshare -n packetdrill ${ip_args[$ip_version]} ${optargs[@]} $script > /dev/null \
+ && ktap_test_pass $ip_version || $failfunc $ip_version
+done
ktap_finished
diff --git a/tools/testing/selftests/net/psock_tpacket.c b/tools/testing/selftests/net/psock_tpacket.c
index 221270cee3ea..2938045c5cf9 100644
--- a/tools/testing/selftests/net/psock_tpacket.c
+++ b/tools/testing/selftests/net/psock_tpacket.c
@@ -22,6 +22,7 @@
* - TPACKET_V3: RX_RING
*/
+#undef NDEBUG
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
@@ -33,7 +34,6 @@
#include <ctype.h>
#include <fcntl.h>
#include <unistd.h>
-#include <bits/wordsize.h>
#include <net/ethernet.h>
#include <netinet/ip.h>
#include <arpa/inet.h>
@@ -785,7 +785,7 @@ static int test_kernel_bit_width(void)
static int test_user_bit_width(void)
{
- return __WORDSIZE;
+ return sizeof(long) * 8;
}
static const char *tpacket_str[] = {
diff --git a/tools/testing/selftests/net/route_hint.sh b/tools/testing/selftests/net/route_hint.sh
new file mode 100755
index 000000000000..2db01ece0cc1
--- /dev/null
+++ b/tools/testing/selftests/net/route_hint.sh
@@ -0,0 +1,79 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test ensures directed broadcast routes use dst hint mechanism
+
+source lib.sh
+
+CLIENT_IP4="192.168.0.1"
+SERVER_IP4="192.168.0.2"
+BROADCAST_ADDRESS="192.168.0.255"
+
+setup() {
+ setup_ns CLIENT_NS SERVER_NS
+
+ ip -net "${SERVER_NS}" link add link1 type veth peer name link0 netns "${CLIENT_NS}"
+
+ ip -net "${CLIENT_NS}" link set link0 up
+ ip -net "${CLIENT_NS}" addr add "${CLIENT_IP4}/24" dev link0
+
+ ip -net "${SERVER_NS}" link set link1 up
+ ip -net "${SERVER_NS}" addr add "${SERVER_IP4}/24" dev link1
+
+ ip netns exec "${CLIENT_NS}" ethtool -K link0 tcp-segmentation-offload off
+ ip netns exec "${SERVER_NS}" sh -c "echo 500000000 > /sys/class/net/link1/gro_flush_timeout"
+ ip netns exec "${SERVER_NS}" sh -c "echo 1 > /sys/class/net/link1/napi_defer_hard_irqs"
+ ip netns exec "${SERVER_NS}" ethtool -K link1 generic-receive-offload on
+}
+
+cleanup() {
+ ip -net "${SERVER_NS}" link del link1
+ cleanup_ns "${CLIENT_NS}" "${SERVER_NS}"
+}
+
+directed_bcast_hint_test()
+{
+ local rc=0
+
+ echo "Testing for directed broadcast route hint"
+
+ orig_in_brd=$(ip netns exec "${SERVER_NS}" lnstat -j -i1 -c1 | jq '.in_brd')
+ ip netns exec "${CLIENT_NS}" mausezahn link0 -a own -b bcast -A "${CLIENT_IP4}" \
+ -B "${BROADCAST_ADDRESS}" -c1 -t tcp "sp=1-100,dp=1234,s=1,a=0" -p 5 -q
+ sleep 1
+ new_in_brd=$(ip netns exec "${SERVER_NS}" lnstat -j -i1 -c1 | jq '.in_brd')
+
+ res=$(echo "${new_in_brd} - ${orig_in_brd}" | bc)
+
+ if [ "${res}" -lt 100 ]; then
+ echo "[ OK ]"
+ rc="${ksft_pass}"
+ else
+ echo "[FAIL] expected in_brd to be under 100, got ${res}"
+ rc="${ksft_fail}"
+ fi
+
+ return "${rc}"
+}
+
+if [ ! -x "$(command -v mausezahn)" ]; then
+ echo "SKIP: Could not run test without mausezahn tool"
+ exit "${ksft_skip}"
+fi
+
+if [ ! -x "$(command -v jq)" ]; then
+ echo "SKIP: Could not run test without jq tool"
+ exit "${ksft_skip}"
+fi
+
+if [ ! -x "$(command -v bc)" ]; then
+ echo "SKIP: Could not run test without bc tool"
+ exit "${ksft_skip}"
+fi
+
+trap cleanup EXIT
+
+setup
+
+directed_bcast_hint_test
+exit $?
diff --git a/tools/testing/selftests/net/rps_default_mask.sh b/tools/testing/selftests/net/rps_default_mask.sh
index 4287a8529890..b200019b3c80 100755
--- a/tools/testing/selftests/net/rps_default_mask.sh
+++ b/tools/testing/selftests/net/rps_default_mask.sh
@@ -54,16 +54,16 @@ cleanup
echo 1 > /proc/sys/net/core/rps_default_mask
setup
-chk_rps "changing rps_default_mask dont affect existing devices" "" lo $INITIAL_RPS_DEFAULT_MASK
+chk_rps "changing rps_default_mask doesn't affect existing devices" "" lo $INITIAL_RPS_DEFAULT_MASK
echo 3 > /proc/sys/net/core/rps_default_mask
-chk_rps "changing rps_default_mask dont affect existing netns" $NETNS lo 0
+chk_rps "changing rps_default_mask doesn't affect existing netns" $NETNS lo 0
ip link add name $VETH type veth peer netns $NETNS name $VETH
ip link set dev $VETH up
ip -n $NETNS link set dev $VETH up
-chk_rps "changing rps_default_mask affect newly created devices" "" $VETH 3
-chk_rps "changing rps_default_mask don't affect newly child netns[II]" $NETNS $VETH 0
+chk_rps "changing rps_default_mask affects newly created devices" "" $VETH 3
+chk_rps "changing rps_default_mask doesn't affect newly child netns[II]" $NETNS $VETH 0
ip link del dev $VETH
ip netns del $NETNS
@@ -72,8 +72,8 @@ chk_rps "rps_default_mask is 0 by default in child netns" "$NETNS" lo 0
ip netns exec $NETNS sysctl -qw net.core.rps_default_mask=1
ip link add name $VETH type veth peer netns $NETNS name $VETH
-chk_rps "changing rps_default_mask in child ns don't affect the main one" "" lo $INITIAL_RPS_DEFAULT_MASK
+chk_rps "changing rps_default_mask in child ns doesn't affect the main one" "" lo $INITIAL_RPS_DEFAULT_MASK
chk_rps "changing rps_default_mask in child ns affects new childns devices" $NETNS $VETH 1
-chk_rps "changing rps_default_mask in child ns don't affect existing devices" $NETNS lo 0
+chk_rps "changing rps_default_mask in child ns doesn't affect existing devices" $NETNS lo 0
exit $ret
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index d6c00efeb664..9da47a845be6 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -313,6 +313,8 @@ kci_test_addrlft()
slowwait 5 check_addr_not_exist "$devdummy" "10.23.11."
if [ $? -eq 1 ]; then
+ # troubleshoot the reason for our failure
+ run_cmd ip addr show dev "$devdummy"
check_err 1
end_test "FAIL: preferred_lft addresses remaining"
return
@@ -323,6 +325,11 @@ kci_test_addrlft()
kci_test_promote_secondaries()
{
+ run_cmd ifconfig "$devdummy"
+ if [ $ret -ne 0 ]; then
+ end_test "SKIP: ifconfig not installed"
+ return $ksft_skip
+ fi
promote=$(sysctl -n net.ipv4.conf.$devdummy.promote_secondaries)
sysctl -q net.ipv4.conf.$devdummy.promote_secondaries=1
@@ -1201,6 +1208,12 @@ do_test_address_proto()
local ret=0
local err
+ run_cmd_grep 'proto' ip address help
+ if [ $? -ne 0 ];then
+ end_test "SKIP: addr proto ${what}: iproute2 too old"
+ return $ksft_skip
+ fi
+
ip address add dev "$devdummy" "$addr3"
check_err $?
proto=$(address_get_proto "$addr3")
diff --git a/tools/testing/selftests/net/socket.c b/tools/testing/selftests/net/socket.c
index db1aeb8c5d1e..be1080003c61 100644
--- a/tools/testing/selftests/net/socket.c
+++ b/tools/testing/selftests/net/socket.c
@@ -39,6 +39,7 @@ static int run_tests(void)
{
char err_string1[ERR_STRING_SZ];
char err_string2[ERR_STRING_SZ];
+ const char *msg1, *msg2;
int i, err;
err = 0;
@@ -56,13 +57,13 @@ static int run_tests(void)
errno == -s->expect)
continue;
- strerror_r(-s->expect, err_string1, ERR_STRING_SZ);
- strerror_r(errno, err_string2, ERR_STRING_SZ);
+ msg1 = strerror_r(-s->expect, err_string1, ERR_STRING_SZ);
+ msg2 = strerror_r(errno, err_string2, ERR_STRING_SZ);
fprintf(stderr, "socket(%d, %d, %d) expected "
"err (%s) got (%s)\n",
s->domain, s->type, s->protocol,
- err_string1, err_string2);
+ msg1, msg2);
err = -1;
break;
@@ -70,12 +71,12 @@ static int run_tests(void)
close(fd);
if (s->expect < 0) {
- strerror_r(errno, err_string1, ERR_STRING_SZ);
+ msg1 = strerror_r(errno, err_string1, ERR_STRING_SZ);
fprintf(stderr, "socket(%d, %d, %d) expected "
"success got err (%s)\n",
s->domain, s->type, s->protocol,
- err_string1);
+ msg1);
err = -1;
break;
diff --git a/tools/testing/selftests/net/test_bridge_backup_port.sh b/tools/testing/selftests/net/test_bridge_backup_port.sh
index 1b3f89e2b86e..2a7224fe74f2 100755
--- a/tools/testing/selftests/net/test_bridge_backup_port.sh
+++ b/tools/testing/selftests/net/test_bridge_backup_port.sh
@@ -315,6 +315,29 @@ backup_port()
tc_check_packets $sw1 "dev vx0 egress" 101 1
log_test $? 0 "No forwarding out of vx0"
+ # Check that packets are forwarded out of vx0 when swp1 is
+ # administratively down and out of swp1 when it is administratively up
+ # again.
+ run_cmd "ip -n $sw1 link set dev swp1 down"
+ busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled
+ log_test $? 0 "swp1 administratively down"
+
+ run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets $sw1 "dev swp1 egress" 101 3
+ log_test $? 0 "No forwarding out of swp1"
+ tc_check_packets $sw1 "dev vx0 egress" 101 2
+ log_test $? 0 "Forwarding out of vx0"
+
+ run_cmd "ip -n $sw1 link set dev swp1 up"
+ busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 forwarding
+ log_test $? 0 "swp1 administratively up"
+
+ run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets $sw1 "dev swp1 egress" 101 4
+ log_test $? 0 "Forwarding out of swp1"
+ tc_check_packets $sw1 "dev vx0 egress" 101 2
+ log_test $? 0 "No forwarding out of vx0"
+
# Remove vx0 as the backup port of swp1 and check that packets are no
# longer forwarded out of vx0 when swp1 does not have a carrier.
run_cmd "bridge -n $sw1 link set dev swp1 nobackup_port"
@@ -322,9 +345,9 @@ backup_port()
log_test $? 1 "vx0 not configured as backup port of swp1"
run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
- tc_check_packets $sw1 "dev swp1 egress" 101 4
+ tc_check_packets $sw1 "dev swp1 egress" 101 5
log_test $? 0 "Forwarding out of swp1"
- tc_check_packets $sw1 "dev vx0 egress" 101 1
+ tc_check_packets $sw1 "dev vx0 egress" 101 2
log_test $? 0 "No forwarding out of vx0"
run_cmd "ip -n $sw1 link set dev swp1 carrier off"
@@ -332,9 +355,9 @@ backup_port()
log_test $? 0 "swp1 carrier off"
run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
- tc_check_packets $sw1 "dev swp1 egress" 101 4
+ tc_check_packets $sw1 "dev swp1 egress" 101 5
log_test $? 0 "No forwarding out of swp1"
- tc_check_packets $sw1 "dev vx0 egress" 101 1
+ tc_check_packets $sw1 "dev vx0 egress" 101 2
log_test $? 0 "No forwarding out of vx0"
}
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index 0f5640d8dc7f..cd67b0ae75a7 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -439,6 +439,8 @@ TEST_F(tls, sendfile)
EXPECT_GE(filefd, 0);
fstat(filefd, &st);
EXPECT_GE(sendfile(self->fd, filefd, 0, st.st_size), 0);
+
+ close(filefd);
}
TEST_F(tls, send_then_sendfile)
@@ -460,6 +462,9 @@ TEST_F(tls, send_then_sendfile)
EXPECT_GE(sendfile(self->fd, filefd, 0, st.st_size), 0);
EXPECT_EQ(recv(self->cfd, buf, st.st_size, MSG_WAITALL), st.st_size);
+
+ free(buf);
+ close(filefd);
}
static void chunked_sendfile(struct __test_metadata *_metadata,
diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c
index 7b861a8e997a..d843643ced6b 100644
--- a/tools/testing/vsock/util.c
+++ b/tools/testing/vsock/util.c
@@ -756,7 +756,6 @@ void setsockopt_ull_check(int fd, int level, int optname,
fail:
fprintf(stderr, "%s val %llu\n", errmsg, val);
exit(EXIT_FAILURE);
-;
}
/* Set "int" socket option and check that it's indeed set */