diff options
568 files changed, 27247 insertions, 8422 deletions
diff --git a/Documentation/Makefile b/Documentation/Makefile index b98477df5ddf..820f07e0afe6 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -104,22 +104,6 @@ quiet_cmd_sphinx = SPHINX $@ --> file://$(abspath $(BUILDDIR)/$3/$4) cp $(if $(patsubst /%,,$(DOCS_CSS)),$(abspath $(srctree)/$(DOCS_CSS)),$(DOCS_CSS)) $(BUILDDIR)/$3/_static/; \ fi -YNL_INDEX:=$(srctree)/Documentation/networking/netlink_spec/index.rst -YNL_RST_DIR:=$(srctree)/Documentation/networking/netlink_spec -YNL_YAML_DIR:=$(srctree)/Documentation/netlink/specs -YNL_TOOL:=$(srctree)/tools/net/ynl/pyynl/ynl_gen_rst.py - -YNL_RST_FILES_TMP := $(patsubst %.yaml,%.rst,$(wildcard $(YNL_YAML_DIR)/*.yaml)) -YNL_RST_FILES := $(patsubst $(YNL_YAML_DIR)%,$(YNL_RST_DIR)%, $(YNL_RST_FILES_TMP)) - -$(YNL_INDEX): $(YNL_RST_FILES) - $(Q)$(YNL_TOOL) -o $@ -x - -$(YNL_RST_DIR)/%.rst: $(YNL_YAML_DIR)/%.yaml $(YNL_TOOL) - $(Q)$(YNL_TOOL) -i $< -o $@ - -htmldocs texinfodocs latexdocs epubdocs xmldocs: $(YNL_INDEX) - htmldocs: @$(srctree)/scripts/sphinx-pre-install --version-check @+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,html,$(var),,$(var))) @@ -186,7 +170,6 @@ refcheckdocs: $(Q)cd $(srctree);scripts/documentation-file-ref-check cleandocs: - $(Q)rm -f $(YNL_INDEX) $(YNL_RST_FILES) $(Q)rm -rf $(BUILDDIR) $(Q)$(MAKE) BUILDDIR=$(abspath $(BUILDDIR)) $(build)=Documentation/userspace-api/media clean diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst index 7b0c4291c686..2ef50828aff1 100644 --- a/Documentation/admin-guide/sysctl/net.rst +++ b/Documentation/admin-guide/sysctl/net.rst @@ -222,6 +222,8 @@ rmem_max The maximum receive socket buffer size in bytes. +Default: 4194304 + rps_default_mask ---------------- @@ -247,6 +249,8 @@ wmem_max The maximum send socket buffer size in bytes. +Default: 4194304 + message_burst and message_cost ------------------------------ diff --git a/Documentation/conf.py b/Documentation/conf.py index 700516238d3f..f9828f3862f9 100644 --- a/Documentation/conf.py +++ b/Documentation/conf.py @@ -42,6 +42,15 @@ exclude_patterns = [] dyn_include_patterns = [] dyn_exclude_patterns = ["output"] +# Currently, only netlink/specs has a parser for yaml. +# Prefer using include patterns if available, as it is faster +if has_include_patterns: + dyn_include_patterns.append("netlink/specs/*.yaml") +else: + dyn_exclude_patterns.append("netlink/*.yaml") + dyn_exclude_patterns.append("devicetree/bindings/**.yaml") + dyn_exclude_patterns.append("core-api/kho/bindings/**.yaml") + # Properly handle include/exclude patterns # ---------------------------------------- @@ -102,12 +111,12 @@ extensions = [ "kernel_include", "kfigure", "maintainers_include", + "parser_yaml", "rstFlatTable", "sphinx.ext.autosectionlabel", "sphinx.ext.ifconfig", "translations", ] - # Since Sphinx version 3, the C function parser is more pedantic with regards # to type checking. Due to that, having macros at c:function cause problems. # Those needed to be escaped by using c_id_attributes[] array @@ -204,10 +213,11 @@ else: # Add any paths that contain templates here, relative to this directory. templates_path = ["sphinx/templates"] -# The suffix(es) of source filenames. -# You can specify multiple suffix as a list of string: -# source_suffix = ['.rst', '.md'] -source_suffix = '.rst' +# The suffixes of source filenames that will be automatically parsed +source_suffix = { + ".rst": "restructuredtext", + ".yaml": "yaml", +} # The encoding of source files. # source_encoding = 'utf-8-sig' diff --git a/Documentation/devicetree/bindings/net/airoha,en7581-npu.yaml b/Documentation/devicetree/bindings/net/airoha,en7581-npu.yaml index 76dd97c3fb40..c7644e6586d3 100644 --- a/Documentation/devicetree/bindings/net/airoha,en7581-npu.yaml +++ b/Documentation/devicetree/bindings/net/airoha,en7581-npu.yaml @@ -41,9 +41,21 @@ properties: - description: wlan irq line5 memory-region: - maxItems: 1 - description: - Memory used to store NPU firmware binary. + oneOf: + - items: + - description: NPU firmware binary region + - items: + - description: NPU firmware binary region + - description: NPU wlan offload RX buffers region + - description: NPU wlan offload TX buffers region + - description: NPU wlan offload TX packet identifiers region + + memory-region-names: + items: + - const: firmware + - const: pkt + - const: tx-pkt + - const: tx-bufid required: - compatible @@ -79,6 +91,8 @@ examples: <GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 122 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 123 IRQ_TYPE_LEVEL_HIGH>; - memory-region = <&npu_binary>; + memory-region = <&npu_firmware>, <&npu_pkt>, <&npu_txpkt>, + <&npu_txbufid>; + memory-region-names = "firmware", "pkt", "tx-pkt", "tx-bufid"; }; }; diff --git a/Documentation/devicetree/bindings/net/allwinner,sun4i-a10-emac.yaml b/Documentation/devicetree/bindings/net/allwinner,sun4i-a10-emac.yaml index eb26623dab51..d4d8f3a7918e 100644 --- a/Documentation/devicetree/bindings/net/allwinner,sun4i-a10-emac.yaml +++ b/Documentation/devicetree/bindings/net/allwinner,sun4i-a10-emac.yaml @@ -33,6 +33,15 @@ properties: - items: - description: phandle to SRAM - description: register value for device + dmas: + items: + - description: RX DMA Channel + - description: TX DMA Channel + + dma-names: + items: + - const: rx + - const: tx required: - compatible diff --git a/Documentation/devicetree/bindings/net/altr,socfpga-stmmac.yaml b/Documentation/devicetree/bindings/net/altr,socfpga-stmmac.yaml index 3a22d35db778..fc445ad5a1f1 100644 --- a/Documentation/devicetree/bindings/net/altr,socfpga-stmmac.yaml +++ b/Documentation/devicetree/bindings/net/altr,socfpga-stmmac.yaml @@ -62,6 +62,13 @@ properties: - const: stmmaceth - const: ptp_ref + interrupts: + maxItems: 1 + + interrupt-names: + items: + - const: macirq + iommus: minItems: 1 maxItems: 2 diff --git a/Documentation/devicetree/bindings/net/ethernet-controller.yaml b/Documentation/devicetree/bindings/net/ethernet-controller.yaml index 66b1cfbbfe22..2c924d296a8f 100644 --- a/Documentation/devicetree/bindings/net/ethernet-controller.yaml +++ b/Documentation/devicetree/bindings/net/ethernet-controller.yaml @@ -108,6 +108,11 @@ properties: $ref: "#/properties/phy-handle" deprecated: true + ptp-timer: + $ref: /schemas/types.yaml#/definitions/phandle + description: + Specifies a reference to a node representing an IEEE 1588 PTP device. + rx-fifo-depth: $ref: /schemas/types.yaml#/definitions/uint32 description: diff --git a/Documentation/devicetree/bindings/net/fsl,fman-dtsec.yaml b/Documentation/devicetree/bindings/net/fsl,fman-dtsec.yaml index 60aaf30d68ed..ef1e30a48c91 100644 --- a/Documentation/devicetree/bindings/net/fsl,fman-dtsec.yaml +++ b/Documentation/devicetree/bindings/net/fsl,fman-dtsec.yaml @@ -81,10 +81,6 @@ properties: An array of two references: the first is the FMan RX port and the second is the TX port used by this MAC. - ptp-timer: - $ref: /schemas/types.yaml#/definitions/phandle - description: A reference to the IEEE1588 timer - phys: description: A reference to the SerDes lane(s) maxItems: 1 diff --git a/Documentation/devicetree/bindings/net/litex,liteeth.yaml b/Documentation/devicetree/bindings/net/litex,liteeth.yaml index ebf4e360f8dd..200b198b0d9b 100644 --- a/Documentation/devicetree/bindings/net/litex,liteeth.yaml +++ b/Documentation/devicetree/bindings/net/litex,liteeth.yaml @@ -86,14 +86,12 @@ examples: phy-handle = <ð_phy>; mdio { - #address-cells = <1>; - #size-cells = <0>; + #address-cells = <1>; + #size-cells = <0>; - eth_phy: ethernet-phy@0 { - reg = <0>; - }; + eth_phy: ethernet-phy@0 { + reg = <0>; + }; }; }; ... - -# vim: set ts=2 sw=2 sts=2 tw=80 et cc=80 ft=yaml : diff --git a/Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml b/Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml index a73fc5036905..082982c59a55 100644 --- a/Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml +++ b/Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml @@ -245,4 +245,3 @@ examples: }; ... -# vim: set ts=2 sw=2 sts=2 tw=80 et cc=80 ft=yaml : diff --git a/Documentation/devicetree/bindings/net/nfc/ti,trf7970a.yaml b/Documentation/devicetree/bindings/net/nfc/ti,trf7970a.yaml index 5f49bd9ac5e6..7e96a625f0cf 100644 --- a/Documentation/devicetree/bindings/net/nfc/ti,trf7970a.yaml +++ b/Documentation/devicetree/bindings/net/nfc/ti,trf7970a.yaml @@ -56,10 +56,10 @@ properties: Regulator for supply voltage to VIN pin ti,rx-gain-reduction-db: - $ref: /schemas/types.yaml#/definitions/uint32 description: | Specify an RX gain reduction to reduce antenna sensitivity with 5dB per - increment, with a maximum of 15dB. Supported values: [0, 5, 10, 15]. + increment, with a maximum of 15dB. + enum: [ 0, 5, 10, 15] required: - compatible diff --git a/Documentation/devicetree/bindings/net/pse-pd/skyworks,si3474.yaml b/Documentation/devicetree/bindings/net/pse-pd/skyworks,si3474.yaml new file mode 100644 index 000000000000..edd36a43a387 --- /dev/null +++ b/Documentation/devicetree/bindings/net/pse-pd/skyworks,si3474.yaml @@ -0,0 +1,144 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/pse-pd/skyworks,si3474.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Skyworks Si3474 Power Sourcing Equipment controller + +maintainers: + - Piotr Kubik <piotr.kubik@adtran.com> + +allOf: + - $ref: pse-controller.yaml# + +properties: + compatible: + enum: + - skyworks,si3474 + + reg: + maxItems: 2 + + reg-names: + items: + - const: main + - const: secondary + + channels: + description: The Si3474 is a single-chip PoE PSE controller managing + 8 physical power delivery channels. Internally, it's structured + into two logical "Quads". + Quad 0 Manages physical channels ('ports' in datasheet) 0, 1, 2, 3 + Quad 1 Manages physical channels ('ports' in datasheet) 4, 5, 6, 7. + + type: object + additionalProperties: false + + properties: + "#address-cells": + const: 1 + + "#size-cells": + const: 0 + + patternProperties: + '^channel@[0-7]$': + type: object + additionalProperties: false + + properties: + reg: + maxItems: 1 + + required: + - reg + + required: + - "#address-cells" + - "#size-cells" + +required: + - compatible + - reg + - pse-pis + +unevaluatedProperties: false + +examples: + - | + i2c { + #address-cells = <1>; + #size-cells = <0>; + + ethernet-pse@26 { + compatible = "skyworks,si3474"; + reg-names = "main", "secondary"; + reg = <0x26>, <0x27>; + + channels { + #address-cells = <1>; + #size-cells = <0>; + phys0_0: channel@0 { + reg = <0>; + }; + phys0_1: channel@1 { + reg = <1>; + }; + phys0_2: channel@2 { + reg = <2>; + }; + phys0_3: channel@3 { + reg = <3>; + }; + phys0_4: channel@4 { + reg = <4>; + }; + phys0_5: channel@5 { + reg = <5>; + }; + phys0_6: channel@6 { + reg = <6>; + }; + phys0_7: channel@7 { + reg = <7>; + }; + }; + pse-pis { + #address-cells = <1>; + #size-cells = <0>; + pse_pi0: pse-pi@0 { + reg = <0>; + #pse-cells = <0>; + pairset-names = "alternative-a", "alternative-b"; + pairsets = <&phys0_0>, <&phys0_1>; + polarity-supported = "MDI-X", "S"; + vpwr-supply = <®_pse>; + }; + pse_pi1: pse-pi@1 { + reg = <1>; + #pse-cells = <0>; + pairset-names = "alternative-a", "alternative-b"; + pairsets = <&phys0_2>, <&phys0_3>; + polarity-supported = "MDI-X", "S"; + vpwr-supply = <®_pse>; + }; + pse_pi2: pse-pi@2 { + reg = <2>; + #pse-cells = <0>; + pairset-names = "alternative-a", "alternative-b"; + pairsets = <&phys0_4>, <&phys0_5>; + polarity-supported = "MDI-X", "S"; + vpwr-supply = <®_pse>; + }; + pse_pi3: pse-pi@3 { + reg = <3>; + #pse-cells = <0>; + pairset-names = "alternative-a", "alternative-b"; + pairsets = <&phys0_6>, <&phys0_7>; + polarity-supported = "MDI-X", "S"; + vpwr-supply = <®_pse>; + }; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/net/qcom,ipq9574-ppe.yaml b/Documentation/devicetree/bindings/net/qcom,ipq9574-ppe.yaml new file mode 100644 index 000000000000..753f370b7605 --- /dev/null +++ b/Documentation/devicetree/bindings/net/qcom,ipq9574-ppe.yaml @@ -0,0 +1,533 @@ +# SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/qcom,ipq9574-ppe.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm IPQ packet process engine (PPE) + +maintainers: + - Luo Jie <quic_luoj@quicinc.com> + - Lei Wei <quic_leiwei@quicinc.com> + - Suruchi Agarwal <quic_suruchia@quicinc.com> + - Pavithra R <quic_pavir@quicinc.com> + +description: | + The Ethernet functionality in the PPE (Packet Process Engine) is comprised + of three components, the switch core, port wrapper and Ethernet DMA. + + The Switch core in the IPQ9574 PPE has maximum of 6 front panel ports and + two FIFO interfaces. One of the two FIFO interfaces is used for Ethernet + port to host CPU communication using Ethernet DMA. The other is used + communicating to the EIP engine which is used for IPsec offload. On the + IPQ9574, the PPE includes 6 GMAC/XGMACs that can be connected with external + Ethernet PHY. Switch core also includes BM (Buffer Management), QM (Queue + Management) and SCH (Scheduler) modules for supporting the packet processing. + + The port wrapper provides connections from the 6 GMAC/XGMACS to UNIPHY (PCS) + supporting various modes such as SGMII/QSGMII/PSGMII/USXGMII/10G-BASER. There + are 3 UNIPHY (PCS) instances supported on the IPQ9574. + + Ethernet DMA is used to transmit and receive packets between the six Ethernet + ports and ARM host CPU. + + The follow diagram shows the PPE hardware block along with its connectivity + to the external hardware blocks such clock hardware blocks (CMNPLL, GCC, + NSS clock controller) and Ethernet PCS/PHY blocks. For depicting the PHY + connectivity, one 4x1 Gbps PHY (QCA8075) and two 10 GBps PHYs are used as an + example. + + +---------+ + | 48 MHZ | + +----+----+ + |(clock) + v + +----+----+ + +------| CMN PLL | + | +----+----+ + | |(clock) + | v + | +----+----+ +----+----+ (clock) +----+----+ + | +---| NSSCC | | GCC |--------->| MDIO | + | | +----+----+ +----+----+ +----+----+ + | | |(clock & reset) |(clock) + | | v v + | | +----+---------------------+--+----------+----------+---------+ + | | | +-----+ |EDMA FIFO | | EIP FIFO| + | | | | SCH | +----------+ +---------+ + | | | +-----+ | | | + | | | +------+ +------+ +-------------------+ | + | | | | BM | | QM | IPQ9574-PPE | L2/L3 Process | | + | | | +------+ +------+ +-------------------+ | + | | | | | + | | | +-------+ +-------+ +-------+ +-------+ +-------+ +-------+ | + | | | | MAC0 | | MAC1 | | MAC2 | | MAC3 | | XGMAC4| |XGMAC5 | | + | | | +---+---+ +---+---+ +---+---+ +---+---+ +---+---+ +---+---+ | + | | | | | | | | | | + | | +-----+---------+---------+---------+---------+---------+-----+ + | | | | | | | | + | | +---+---------+---------+---------+---+ +---+---+ +---+---+ + +--+---->| PCS0 | | PCS1 | | PCS2 | + |(clock) +---+---------+---------+---------+---+ +---+---+ +---+---+ + | | | | | | | + | +---+---------+---------+---------+---+ +---+---+ +---+---+ + +------->| QCA8075 PHY | | PHY4 | | PHY5 | + (clock) +-------------------------------------+ +-------+ +-------+ + +properties: + compatible: + enum: + - qcom,ipq9574-ppe + + reg: + maxItems: 1 + + clocks: + items: + - description: PPE core clock + - description: PPE APB (Advanced Peripheral Bus) clock + - description: PPE IPE (Ingress Process Engine) clock + - description: PPE BM, QM and scheduler clock + + clock-names: + items: + - const: ppe + - const: apb + - const: ipe + - const: btq + + resets: + maxItems: 1 + description: PPE reset, which is necessary before configuring PPE hardware + + interrupts: + maxItems: 1 + description: PPE switch miscellaneous interrupt + + interconnects: + items: + - description: Bus interconnect path leading to PPE switch core function + - description: Bus interconnect path leading to PPE register access + - description: Bus interconnect path leading to QoS generation + - description: Bus interconnect path leading to timeout reference + - description: Bus interconnect path leading to NSS NOC from memory NOC + - description: Bus interconnect path leading to memory NOC from NSS NOC + - description: Bus interconnect path leading to enhanced memory NOC from NSS NOC + + interconnect-names: + items: + - const: ppe + - const: ppe_cfg + - const: qos_gen + - const: timeout_ref + - const: nssnoc_memnoc + - const: memnoc_nssnoc + - const: memnoc_nssnoc_1 + + ethernet-dma: + type: object + additionalProperties: false + description: + EDMA (Ethernet DMA) is used to transmit packets between PPE and ARM + host CPU. There are 32 TX descriptor rings, 32 TX completion rings, + 24 RX descriptor rings and 8 RX fill rings supported. + + properties: + clocks: + items: + - description: EDMA system clock + - description: EDMA APB (Advanced Peripheral Bus) clock + + clock-names: + items: + - const: sys + - const: apb + + resets: + maxItems: 1 + description: EDMA reset + + interrupts: + minItems: 65 + maxItems: 65 + + interrupt-names: + minItems: 65 + maxItems: 65 + items: + oneOf: + - pattern: '^txcmpl_([1-2]?[0-9]|3[01])$' + - pattern: '^rxfill_[0-7]$' + - pattern: '^rxdesc_(1?[0-9]|2[0-3])$' + - const: misc + description: + Interrupts "txcmpl_[0-31]" are the Ethernet DMA TX completion ring interrupts. + Interrupts "rxfill_[0-7]" are the Ethernet DMA RX fill ring interrupts. + Interrupts "rxdesc_[0-23]" are the Ethernet DMA RX Descriptor ring interrupts. + Interrupt "misc" is the Ethernet DMA miscellaneous error interrupt. + + required: + - clocks + - clock-names + - resets + - interrupts + - interrupt-names + + ethernet-ports: + patternProperties: + "^ethernet-port@[1-6]+$": + type: object + unevaluatedProperties: false + $ref: ethernet-switch-port.yaml# + + properties: + reg: + minimum: 1 + maximum: 6 + description: PPE Ethernet port ID + + clocks: + items: + - description: Port MAC clock + - description: Port RX clock + - description: Port TX clock + + clock-names: + items: + - const: mac + - const: rx + - const: tx + + resets: + items: + - description: Port MAC reset + - description: Port RX reset + - description: Port TX reset + + reset-names: + items: + - const: mac + - const: rx + - const: tx + + required: + - reg + - clocks + - clock-names + - resets + - reset-names + +required: + - compatible + - reg + - clocks + - clock-names + - resets + - interconnects + - interconnect-names + - ethernet-dma + +allOf: + - $ref: ethernet-switch.yaml + +unevaluatedProperties: false + +examples: + - | + #include <dt-bindings/clock/qcom,ipq9574-gcc.h> + #include <dt-bindings/clock/qcom,ipq9574-nsscc.h> + #include <dt-bindings/interconnect/qcom,ipq9574.h> + #include <dt-bindings/interrupt-controller/arm-gic.h> + #include <dt-bindings/reset/qcom,ipq9574-nsscc.h> + + ethernet-switch@3a000000 { + compatible = "qcom,ipq9574-ppe"; + reg = <0x3a000000 0xbef800>; + clocks = <&nsscc NSS_CC_PPE_SWITCH_CLK>, + <&nsscc NSS_CC_PPE_SWITCH_CFG_CLK>, + <&nsscc NSS_CC_PPE_SWITCH_IPE_CLK>, + <&nsscc NSS_CC_PPE_SWITCH_BTQ_CLK>; + clock-names = "ppe", + "apb", + "ipe", + "btq"; + resets = <&nsscc PPE_FULL_RESET>; + interrupts = <GIC_SPI 498 IRQ_TYPE_LEVEL_HIGH>; + interconnects = <&nsscc MASTER_NSSNOC_PPE &nsscc SLAVE_NSSNOC_PPE>, + <&nsscc MASTER_NSSNOC_PPE_CFG &nsscc SLAVE_NSSNOC_PPE_CFG>, + <&gcc MASTER_NSSNOC_QOSGEN_REF &gcc SLAVE_NSSNOC_QOSGEN_REF>, + <&gcc MASTER_NSSNOC_TIMEOUT_REF &gcc SLAVE_NSSNOC_TIMEOUT_REF>, + <&gcc MASTER_MEM_NOC_NSSNOC &gcc SLAVE_MEM_NOC_NSSNOC>, + <&gcc MASTER_NSSNOC_MEMNOC &gcc SLAVE_NSSNOC_MEMNOC>, + <&gcc MASTER_NSSNOC_MEM_NOC_1 &gcc SLAVE_NSSNOC_MEM_NOC_1>; + interconnect-names = "ppe", + "ppe_cfg", + "qos_gen", + "timeout_ref", + "nssnoc_memnoc", + "memnoc_nssnoc", + "memnoc_nssnoc_1"; + + ethernet-dma { + clocks = <&nsscc NSS_CC_PPE_EDMA_CLK>, + <&nsscc NSS_CC_PPE_EDMA_CFG_CLK>; + clock-names = "sys", + "apb"; + resets = <&nsscc EDMA_HW_RESET>; + interrupts = <GIC_SPI 363 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 364 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 365 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 366 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 367 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 368 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 369 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 370 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 371 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 372 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 373 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 374 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 375 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 376 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 377 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 378 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 379 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 380 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 381 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 382 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 383 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 384 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 509 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 508 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 507 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 506 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 505 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 504 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 503 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 502 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 501 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 500 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 355 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 356 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 357 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 358 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 359 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 360 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 361 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 362 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 331 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 332 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 333 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 334 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 335 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 336 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 337 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 338 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 339 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 340 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 341 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 342 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 343 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 344 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 345 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 346 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 347 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 348 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 349 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 350 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 351 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 352 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 353 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 354 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 499 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "txcmpl_0", + "txcmpl_1", + "txcmpl_2", + "txcmpl_3", + "txcmpl_4", + "txcmpl_5", + "txcmpl_6", + "txcmpl_7", + "txcmpl_8", + "txcmpl_9", + "txcmpl_10", + "txcmpl_11", + "txcmpl_12", + "txcmpl_13", + "txcmpl_14", + "txcmpl_15", + "txcmpl_16", + "txcmpl_17", + "txcmpl_18", + "txcmpl_19", + "txcmpl_20", + "txcmpl_21", + "txcmpl_22", + "txcmpl_23", + "txcmpl_24", + "txcmpl_25", + "txcmpl_26", + "txcmpl_27", + "txcmpl_28", + "txcmpl_29", + "txcmpl_30", + "txcmpl_31", + "rxfill_0", + "rxfill_1", + "rxfill_2", + "rxfill_3", + "rxfill_4", + "rxfill_5", + "rxfill_6", + "rxfill_7", + "rxdesc_0", + "rxdesc_1", + "rxdesc_2", + "rxdesc_3", + "rxdesc_4", + "rxdesc_5", + "rxdesc_6", + "rxdesc_7", + "rxdesc_8", + "rxdesc_9", + "rxdesc_10", + "rxdesc_11", + "rxdesc_12", + "rxdesc_13", + "rxdesc_14", + "rxdesc_15", + "rxdesc_16", + "rxdesc_17", + "rxdesc_18", + "rxdesc_19", + "rxdesc_20", + "rxdesc_21", + "rxdesc_22", + "rxdesc_23", + "misc"; + }; + + ethernet-ports { + #address-cells = <1>; + #size-cells = <0>; + + ethernet-port@1 { + reg = <1>; + phy-mode = "qsgmii"; + managed = "in-band-status"; + phy-handle = <&phy0>; + pcs-handle = <&pcs0_ch0>; + clocks = <&nsscc NSS_CC_PORT1_MAC_CLK>, + <&nsscc NSS_CC_PORT1_RX_CLK>, + <&nsscc NSS_CC_PORT1_TX_CLK>; + clock-names = "mac", + "rx", + "tx"; + resets = <&nsscc PORT1_MAC_ARES>, + <&nsscc PORT1_RX_ARES>, + <&nsscc PORT1_TX_ARES>; + reset-names = "mac", + "rx", + "tx"; + }; + + ethernet-port@2 { + reg = <2>; + phy-mode = "qsgmii"; + managed = "in-band-status"; + phy-handle = <&phy1>; + pcs-handle = <&pcs0_ch1>; + clocks = <&nsscc NSS_CC_PORT2_MAC_CLK>, + <&nsscc NSS_CC_PORT2_RX_CLK>, + <&nsscc NSS_CC_PORT2_TX_CLK>; + clock-names = "mac", + "rx", + "tx"; + resets = <&nsscc PORT2_MAC_ARES>, + <&nsscc PORT2_RX_ARES>, + <&nsscc PORT2_TX_ARES>; + reset-names = "mac", + "rx", + "tx"; + }; + + ethernet-port@3 { + reg = <3>; + phy-mode = "qsgmii"; + managed = "in-band-status"; + phy-handle = <&phy2>; + pcs-handle = <&pcs0_ch2>; + clocks = <&nsscc NSS_CC_PORT3_MAC_CLK>, + <&nsscc NSS_CC_PORT3_RX_CLK>, + <&nsscc NSS_CC_PORT3_TX_CLK>; + clock-names = "mac", + "rx", + "tx"; + resets = <&nsscc PORT3_MAC_ARES>, + <&nsscc PORT3_RX_ARES>, + <&nsscc PORT3_TX_ARES>; + reset-names = "mac", + "rx", + "tx"; + }; + + ethernet-port@4 { + reg = <4>; + phy-mode = "qsgmii"; + managed = "in-band-status"; + phy-handle = <&phy3>; + pcs-handle = <&pcs0_ch3>; + clocks = <&nsscc NSS_CC_PORT4_MAC_CLK>, + <&nsscc NSS_CC_PORT4_RX_CLK>, + <&nsscc NSS_CC_PORT4_TX_CLK>; + clock-names = "mac", + "rx", + "tx"; + resets = <&nsscc PORT4_MAC_ARES>, + <&nsscc PORT4_RX_ARES>, + <&nsscc PORT4_TX_ARES>; + reset-names = "mac", + "rx", + "tx"; + }; + + ethernet-port@5 { + reg = <5>; + phy-mode = "usxgmii"; + managed = "in-band-status"; + phy-handle = <&phy4>; + pcs-handle = <&pcs1_ch0>; + clocks = <&nsscc NSS_CC_PORT5_MAC_CLK>, + <&nsscc NSS_CC_PORT5_RX_CLK>, + <&nsscc NSS_CC_PORT5_TX_CLK>; + clock-names = "mac", + "rx", + "tx"; + resets = <&nsscc PORT5_MAC_ARES>, + <&nsscc PORT5_RX_ARES>, + <&nsscc PORT5_TX_ARES>; + reset-names = "mac", + "rx", + "tx"; + }; + + ethernet-port@6 { + reg = <6>; + phy-mode = "usxgmii"; + managed = "in-band-status"; + phy-handle = <&phy5>; + pcs-handle = <&pcs2_ch0>; + clocks = <&nsscc NSS_CC_PORT6_MAC_CLK>, + <&nsscc NSS_CC_PORT6_RX_CLK>, + <&nsscc NSS_CC_PORT6_TX_CLK>; + clock-names = "mac", + "rx", + "tx"; + resets = <&nsscc PORT6_MAC_ARES>, + <&nsscc PORT6_RX_ARES>, + <&nsscc PORT6_TX_ARES>; + reset-names = "mac", + "rx", + "tx"; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/net/realtek,rtl82xx.yaml b/Documentation/devicetree/bindings/net/realtek,rtl82xx.yaml index d248a08a2136..2b5697bd7c5d 100644 --- a/Documentation/devicetree/bindings/net/realtek,rtl82xx.yaml +++ b/Documentation/devicetree/bindings/net/realtek,rtl82xx.yaml @@ -45,12 +45,16 @@ properties: description: Disable CLKOUT clock, CLKOUT clock default is enabled after hardware reset. - realtek,aldps-enable: type: boolean description: Enable ALDPS mode, ALDPS mode default is disabled after hardware reset. + wakeup-source: + type: boolean + description: + Enable Wake-on-LAN support for the RTL8211F PHY. + unevaluatedProperties: false allOf: diff --git a/Documentation/devicetree/bindings/net/renesas,rzn1-gmac.yaml b/Documentation/devicetree/bindings/net/renesas,rzn1-gmac.yaml index d9a8d586e260..16dd7a2631ab 100644 --- a/Documentation/devicetree/bindings/net/renesas,rzn1-gmac.yaml +++ b/Documentation/devicetree/bindings/net/renesas,rzn1-gmac.yaml @@ -30,6 +30,15 @@ properties: - const: renesas,rzn1-gmac - const: snps,dwmac + interrupts: + maxItems: 3 + + interrupt-names: + items: + - const: macirq + - const: eth_wake_irq + - const: eth_lpi + pcs-handle: description: phandle pointing to a PCS sub-node compatible with diff --git a/Documentation/devicetree/bindings/ptp/nxp,ptp-netc.yaml b/Documentation/devicetree/bindings/ptp/nxp,ptp-netc.yaml new file mode 100644 index 000000000000..042de9d5a92b --- /dev/null +++ b/Documentation/devicetree/bindings/ptp/nxp,ptp-netc.yaml @@ -0,0 +1,63 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/ptp/nxp,ptp-netc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: NXP NETC V4 Timer PTP clock + +description: + NETC V4 Timer provides current time with nanosecond resolution, precise + periodic pulse, pulse on timeout (alarm), and time capture on external + pulse support. And it supports time synchronization as required for + IEEE 1588 and IEEE 802.1AS-2020. + +maintainers: + - Wei Fang <wei.fang@nxp.com> + - Clark Wang <xiaoning.wang@nxp.com> + +properties: + compatible: + enum: + - pci1131,ee02 + + reg: + maxItems: 1 + + clocks: + maxItems: 1 + description: + The reference clock of NETC Timer, can be selected between 3 different + clock sources using an integrated hardware mux TMR_CTRL[CK_SEL]. + The "ccm" means the reference clock comes from CCM of SoC. + The "ext" means the reference clock comes from external IO pins. + If not present, indicates that the system clock of NETC IP is selected + as the reference clock. + + clock-names: + enum: + - ccm + - ext + +required: + - compatible + - reg + +allOf: + - $ref: /schemas/pci/pci-device.yaml + +unevaluatedProperties: false + +examples: + - | + pcie { + #address-cells = <3>; + #size-cells = <2>; + + ptp-timer@18,0 { + compatible = "pci1131,ee02"; + reg = <0x00c000 0 0 0 0>; + clocks = <&scmi_clk 18>; + clock-names = "ccm"; + }; + }; diff --git a/Documentation/netlink/specs/devlink.yaml b/Documentation/netlink/specs/devlink.yaml index bb87111d5e16..3db59c965869 100644 --- a/Documentation/netlink/specs/devlink.yaml +++ b/Documentation/netlink/specs/devlink.yaml @@ -853,6 +853,10 @@ attribute-sets: type: nest multi-attr: true nested-attributes: dl-rate-tc-bws + - + name: health-reporter-burst-period + type: u64 + doc: Time (in msec) for recoveries before starting the grace period. - name: dl-dev-stats subset-of: devlink @@ -1216,6 +1220,8 @@ attribute-sets: name: health-reporter-dump-ts-ns - name: health-reporter-auto-dump + - + name: health-reporter-burst-period - name: dl-attr-stats @@ -1961,6 +1967,7 @@ operations: - health-reporter-graceful-period - health-reporter-auto-recover - health-reporter-auto-dump + - health-reporter-burst-period - name: health-reporter-recover diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 1bc1bd7d33c2..7a7594713f1f 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -205,6 +205,9 @@ definitions: - name: gtp-teid - + name: ip6-fl + doc: IPv6 Flow Label + - name: discard value: 31 diff --git a/Documentation/netlink/specs/fou.yaml b/Documentation/netlink/specs/fou.yaml index 57735726262e..8e7974ec453f 100644 --- a/Documentation/netlink/specs/fou.yaml +++ b/Documentation/netlink/specs/fou.yaml @@ -52,7 +52,7 @@ attribute-sets: name: local-v6 type: binary checks: - min-len: 16 + exact-len: 16 - name: peer-v4 type: u32 @@ -60,7 +60,7 @@ attribute-sets: name: peer-v6 type: binary checks: - min-len: 16 + exact-len: 16 - name: peer-port type: u16 diff --git a/Documentation/netlink/specs/index.rst b/Documentation/netlink/specs/index.rst new file mode 100644 index 000000000000..7f7cf4a096f2 --- /dev/null +++ b/Documentation/netlink/specs/index.rst @@ -0,0 +1,13 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. _specs: + +============================= +Netlink Family Specifications +============================= + +.. toctree:: + :maxdepth: 1 + :glob: + + * diff --git a/Documentation/networking/bonding.rst b/Documentation/networking/bonding.rst index f8f5766703d4..a2b42ae719d2 100644 --- a/Documentation/networking/bonding.rst +++ b/Documentation/networking/bonding.rst @@ -582,10 +582,8 @@ miimon This determines how often the link state of each slave is inspected for link failures. A value of zero disables MII link monitoring. A value of 100 is a good starting point. - The use_carrier option, below, affects how the link state is - determined. See the High Availability section for additional - information. The default value is 100 if arp_interval is not - set. + + The default value is 100 if arp_interval is not set. min_links @@ -896,25 +894,14 @@ updelay use_carrier - Specifies whether or not miimon should use MII or ETHTOOL - ioctls vs. netif_carrier_ok() to determine the link - status. The MII or ETHTOOL ioctls are less efficient and - utilize a deprecated calling sequence within the kernel. The - netif_carrier_ok() relies on the device driver to maintain its - state with netif_carrier_on/off; at this writing, most, but - not all, device drivers support this facility. - - If bonding insists that the link is up when it should not be, - it may be that your network device driver does not support - netif_carrier_on/off. The default state for netif_carrier is - "carrier on," so if a driver does not support netif_carrier, - it will appear as if the link is always up. In this case, - setting use_carrier to 0 will cause bonding to revert to the - MII / ETHTOOL ioctl method to determine the link state. - - A value of 1 enables the use of netif_carrier_ok(), a value of - 0 will use the deprecated MII / ETHTOOL ioctls. The default - value is 1. + Obsolete option that previously selected between MII / + ETHTOOL ioctls and netif_carrier_ok() to determine link + state. + + All link state checks are now done with netif_carrier_ok(). + + For backwards compatibility, this option's value may be inspected + or set. The only valid setting is 1. xmit_hash_policy @@ -2036,22 +2023,8 @@ depending upon the device driver to maintain its carrier state, by querying the device's MII registers, or by making an ethtool query to the device. -If the use_carrier module parameter is 1 (the default value), -then the MII monitor will rely on the driver for carrier state -information (via the netif_carrier subsystem). As explained in the -use_carrier parameter information, above, if the MII monitor fails to -detect carrier loss on the device (e.g., when the cable is physically -disconnected), it may be that the driver does not support -netif_carrier. - -If use_carrier is 0, then the MII monitor will first query the -device's (via ioctl) MII registers and check the link state. If that -request fails (not just that it returns carrier down), then the MII -monitor will make an ethtool ETHTOOL_GLINK request to attempt to obtain -the same information. If both methods fail (i.e., the driver either -does not support or had some error in processing both the MII register -and ethtool requests), then the MII monitor will assume the link is -up. +The MII monitor relies on the driver for carrier state information (via +the netif_carrier subsystem). 8. Potential Sources of Trouble =============================== @@ -2135,34 +2108,6 @@ This will load tg3 and e1000 modules before loading the bonding one. Full documentation on this can be found in the modprobe.d and modprobe manual pages. -8.3. Painfully Slow Or No Failed Link Detection By Miimon ---------------------------------------------------------- - -By default, bonding enables the use_carrier option, which -instructs bonding to trust the driver to maintain carrier state. - -As discussed in the options section, above, some drivers do -not support the netif_carrier_on/_off link state tracking system. -With use_carrier enabled, bonding will always see these links as up, -regardless of their actual state. - -Additionally, other drivers do support netif_carrier, but do -not maintain it in real time, e.g., only polling the link state at -some fixed interval. In this case, miimon will detect failures, but -only after some long period of time has expired. If it appears that -miimon is very slow in detecting link failures, try specifying -use_carrier=0 to see if that improves the failure detection time. If -it does, then it may be that the driver checks the carrier state at a -fixed interval, but does not cache the MII register values (so the -use_carrier=0 method of querying the registers directly works). If -use_carrier=0 does not improve the failover, then the driver may cache -the registers, or the problem may be elsewhere. - -Also, remember that miimon only checks for the device's -carrier state. It has no way to determine the state of devices on or -beyond other ports of a switch, or if a switch is refusing to pass -traffic while still maintaining carrier on. - 9. SNMP agents =============== diff --git a/Documentation/networking/device_drivers/ethernet/index.rst b/Documentation/networking/device_drivers/ethernet/index.rst index 40ac552641a3..0b0a3eef6aae 100644 --- a/Documentation/networking/device_drivers/ethernet/index.rst +++ b/Documentation/networking/device_drivers/ethernet/index.rst @@ -50,6 +50,7 @@ Contents: neterion/s2io netronome/nfp pensando/ionic + qualcomm/ppe/ppe smsc/smc9 stmicro/stmmac ti/cpsw diff --git a/Documentation/networking/device_drivers/ethernet/meta/fbnic.rst b/Documentation/networking/device_drivers/ethernet/meta/fbnic.rst index afb8353daefd..fb6559fa4be4 100644 --- a/Documentation/networking/device_drivers/ethernet/meta/fbnic.rst +++ b/Documentation/networking/device_drivers/ethernet/meta/fbnic.rst @@ -160,3 +160,14 @@ behavior and potential performance bottlenecks. credit exhaustion - ``pcie_ob_rd_no_np_cred``: Read requests dropped due to non-posted credit exhaustion + +XDP Length Error: +~~~~~~~~~~~~~~~~~ + +For XDP programs without frags support, fbnic tries to make sure that MTU fits +into a single buffer. If an oversized frame is received and gets fragmented, +it is dropped and the following netlink counters are updated + + - ``rx-length``: number of frames dropped due to lack of fragmentation + support in the attached XDP program + - ``rx-errors``: total number of packets with errors received on the interface diff --git a/Documentation/networking/device_drivers/ethernet/qualcomm/ppe/ppe.rst b/Documentation/networking/device_drivers/ethernet/qualcomm/ppe/ppe.rst new file mode 100644 index 000000000000..4ab299a28969 --- /dev/null +++ b/Documentation/networking/device_drivers/ethernet/qualcomm/ppe/ppe.rst @@ -0,0 +1,194 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=============================================== +PPE Ethernet Driver for Qualcomm IPQ SoC Family +=============================================== + +Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. + +Author: Lei Wei <quic_leiwei@quicinc.com> + + +Contents +======== + +- `PPE Overview`_ +- `PPE Driver Overview`_ +- `PPE Driver Supported SoCs`_ +- `Enabling the Driver`_ +- `Debugging`_ + + +PPE Overview +============ + +IPQ (Qualcomm Internet Processor) SoC (System-on-Chip) series is Qualcomm's series of +networking SoC for Wi-Fi access points. The PPE (Packet Process Engine) is the Ethernet +packet process engine in the IPQ SoC. + +Below is a simplified hardware diagram of IPQ9574 SoC which includes the PPE engine and +other blocks which are in the SoC but outside the PPE engine. These blocks work together +to enable the Ethernet for the IPQ SoC:: + + +------+ +------+ +------+ +------+ +------+ +------+ start +-------+ + |netdev| |netdev| |netdev| |netdev| |netdev| |netdev|<------|PHYLINK| + +------+ +------+ +------+ +------+ +------+ +------+ stop +-+-+-+-+ + | | | ^ + +-------+ +-------------------------+--------+----------------------+ | | | + | GCC | | | EDMA | | | | | + +---+---+ | PPE +---+----+ | | | | + | clk | | | | | | + +-------->| +-----------------------+------+-----+---------------+ | | | | + | | Switch Core |Port0 | |Port7(EIP FIFO)| | | | | + | | +---+--+ +------+--------+ | | | | + | | | | | | | | | + +-------+ | | +------+---------------+----+ | | | | | + |CMN PLL| | | +---+ +---+ +----+ | +--------+ | | | | | | + +---+---+ | | |BM | |QM | |SCH | | | L2/L3 | ....... | | | | | | + | | | | +---+ +---+ +----+ | +--------+ | | | | | | + | | | | +------+--------------------+ | | | | | + | | | | | | | | | | + | v | | +-----+-+-----+-+-----+-+-+---+--+-----+-+-----+ | | | | | + | +------+ | | |Port1| |Port2| |Port3| |Port4| |Port5| |Port6| | | | | | + | |NSSCC | | | +-----+ +-----+ +-----+ +-----+ +-----+ +-----+ | | mac| | | + | +-+-+--+ | | |MAC0 | |MAC1 | |MAC2 | |MAC3 | |MAC4 | |MAC5 | | |<---+ | | + | ^ | |clk | | +-----+-+-----+-+-----+-+-----+--+-----+-+-----+ | | ops | | + | | | +------>| +----|------|-------|-------|---------|--------|-----+ | | | + | | | +---------------------------------------------------------+ | | + | | | | | | | | | | | + | | | MII clk | QSGMII USXGMII USXGMII | | + | | +--------------->| | | | | | | | + | | +-------------------------+ +---------+ +---------+ | | + | |125/312.5MHz clk| (PCS0) | | (PCS1) | | (PCS2) | pcs ops | | + | +----------------+ UNIPHY0 | | UNIPHY1 | | UNIPHY2 |<--------+ | + +----------------->| | | | | | | + | 31.25MHz ref clk +-------------------------+ +---------+ +---------+ | + | | | | | | | | + | +-----------------------------------------------------+ | + |25/50MHz ref clk| +-------------------------+ +------+ +------+ | link | + +--------------->| | QUAD PHY | | PHY4 | | PHY5 | |---------+ + | +-------------------------+ +------+ +------+ | change + | | + | MDIO bus | + +-----------------------------------------------------+ + +The CMN (Common) PLL, NSSCC (Networking Sub System Clock Controller) and GCC (Global +Clock Controller) blocks are in the SoC and act as clock providers. + +The UNIPHY block is in the SoC and provides the PCS (Physical Coding Sublayer) and +XPCS (10-Gigabit Physical Coding Sublayer) functions to support different interface +modes between the PPE MAC and the external PHY. + +This documentation focuses on the descriptions of PPE engine and the PPE driver. + +The Ethernet functionality in the PPE (Packet Process Engine) is comprised of three +components: the switch core, port wrapper and Ethernet DMA. + +The Switch core in the IPQ9574 PPE has maximum of 6 front panel ports and two FIFO +interfaces. One of the two FIFO interfaces is used for Ethernet port to host CPU +communication using Ethernet DMA. The other one is used to communicate to the EIP +engine which is used for IPsec offload. On the IPQ9574, the PPE includes 6 GMAC/XGMACs +that can be connected with external Ethernet PHY. Switch core also includes BM (Buffer +Management), QM (Queue Management) and SCH (Scheduler) modules for supporting the +packet processing. + +The port wrapper provides connections from the 6 GMAC/XGMACS to UNIPHY (PCS) supporting +various modes such as SGMII/QSGMII/PSGMII/USXGMII/10G-BASER. There are 3 UNIPHY (PCS) +instances supported on the IPQ9574. + +Ethernet DMA is used to transmit and receive packets between the Ethernet subsystem +and ARM host CPU. + +The following lists the main blocks in the PPE engine which will be driven by this +PPE driver: + +- BM + BM is the hardware buffer manager for the PPE switch ports. +- QM + Queue Manager for managing the egress hardware queues of the PPE switch ports. +- SCH + The scheduler which manages the hardware traffic scheduling for the PPE switch ports. +- L2 + The L2 block performs the packet bridging in the switch core. The bridge domain is + represented by the VSI (Virtual Switch Instance) domain in PPE. FDB learning can be + enabled based on the VSI domain and bridge forwarding occurs within the VSI domain. +- MAC + The PPE in the IPQ9574 supports up to six MACs (MAC0 to MAC5) which are corresponding + to six switch ports (port1 to port6). The MAC block is connected with external PHY + through the UNIPHY PCS block. Each MAC block includes the GMAC and XGMAC blocks and + the switch port can select to use GMAC or XMAC through a MUX selection according to + the external PHY's capability. +- EDMA (Ethernet DMA) + The Ethernet DMA is used to transmit and receive Ethernet packets between the PPE + ports and the ARM cores. + +The received packet on a PPE MAC port can be forwarded to another PPE MAC port. It can +be also forwarded to internal switch port0 so that the packet can be delivered to the +ARM cores using the Ethernet DMA (EDMA) engine. The Ethernet DMA driver will deliver the +packet to the corresponding 'netdevice' interface. + +The software instantiations of the PPE MAC (netdevice), PCS and external PHYs interact +with the Linux PHYLINK framework to manage the connectivity between the PPE ports and +the connected PHYs, and the port link states. This is also illustrated in above diagram. + + +PPE Driver Overview +=================== +PPE driver is Ethernet driver for the Qualcomm IPQ SoC. It is a single platform driver +which includes the PPE part and Ethernet DMA part. The PPE part initializes and drives the +various blocks in PPE switch core such as BM/QM/L2 blocks and the PPE MACs. The EDMA part +drives the Ethernet DMA for packet transfer between PPE ports and ARM cores, and enables +the netdevice driver for the PPE ports. + +The PPE driver files in drivers/net/ethernet/qualcomm/ppe/ are listed as below: + +- Makefile +- ppe.c +- ppe.h +- ppe_config.c +- ppe_config.h +- ppe_debugfs.c +- ppe_debugfs.h +- ppe_regs.h + +The ppe.c file contains the main PPE platform driver and undertakes the initialization of +PPE switch core blocks such as QM, BM and L2. The configuration APIs for these hardware +blocks are provided in the ppe_config.c file. + +The ppe.h defines the PPE device data structure which will be used by PPE driver functions. + +The ppe_debugfs.c enables the PPE statistics counters such as PPE port Rx and Tx counters, +CPU code counters and queue counters. + + +PPE Driver Supported SoCs +========================= + +The PPE driver supports the following IPQ SoC: + +- IPQ9574 + + +Enabling the Driver +=================== + +The driver is located in the menu structure at:: + + -> Device Drivers + -> Network device support (NETDEVICES [=y]) + -> Ethernet driver support + -> Qualcomm devices + -> Qualcomm Technologies, Inc. PPE Ethernet support + +If the driver is built as a module, the module will be called qcom-ppe. + +The PPE driver functionally depends on the CMN PLL and NSSCC clock controller drivers. +Please make sure the dependent modules are installed before installing the PPE driver +module. + + +Debugging +========= + +The PPE hardware counters can be accessed using debugfs interface from the +``/sys/kernel/debug/ppe/`` directory. diff --git a/Documentation/networking/devlink/devlink-health.rst b/Documentation/networking/devlink/devlink-health.rst index e0b8cfed610a..4d10536377ab 100644 --- a/Documentation/networking/devlink/devlink-health.rst +++ b/Documentation/networking/devlink/devlink-health.rst @@ -50,7 +50,7 @@ Once an error is reported, devlink health will perform the following actions: * Auto recovery attempt is being done. Depends on: - Auto-recovery configuration - - Grace period vs. time passed since last recover + - Grace period (and burst period) vs. time passed since last recover Devlink formatted message ========================= diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst index ac90b82f3ce9..b7a4969e9bc9 100644 --- a/Documentation/networking/index.rst +++ b/Documentation/networking/index.rst @@ -57,7 +57,7 @@ Contents: filter generic-hdlc generic_netlink - netlink_spec/index + ../netlink/specs/index gen_stats gtp ila diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 9756d16e3df1..9f5891c9b07b 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -209,7 +209,7 @@ neigh/default/unres_qlen_bytes - INTEGER Setting negative value is meaningless and will return error. - Default: SK_WMEM_MAX, (same as net.core.wmem_default). + Default: SK_WMEM_DEFAULT, (same as net.core.wmem_default). Exact value depends on architecture and kernel options, but should be enough to allow queuing 256 packets @@ -805,8 +805,8 @@ tcp_rmem - vector of 3 INTEGERs: min, default, max This value results in initial window of 65535. max: maximal size of receive buffer allowed for automatically - selected receiver buffers for TCP socket. This value does not override - net.core.rmem_max. Calling setsockopt() with SO_RCVBUF disables + selected receiver buffers for TCP socket. + Calling setsockopt() with SO_RCVBUF disables automatic tuning of that socket's receive buffer size, in which case this value is ignored. Default: between 131072 and 32MB, depending on RAM size. @@ -3508,16 +3508,10 @@ cookie_hmac_alg - STRING a listening sctp socket to a connecting client in the INIT-ACK chunk. Valid values are: - * md5 - * sha1 + * sha256 * none - Ability to assign md5 or sha1 as the selected alg is predicated on the - configuration of those algorithms at build time (CONFIG_CRYPTO_MD5 and - CONFIG_CRYPTO_SHA1). - - Default: Dependent on configuration. MD5 if available, else SHA1 if - available, else none. + Default: sha256 rcvbuf_policy - INTEGER Determines if the receive buffer is attributed to the socket or to diff --git a/Documentation/networking/netlink_spec/.gitignore b/Documentation/networking/netlink_spec/.gitignore deleted file mode 100644 index 30d85567b592..000000000000 --- a/Documentation/networking/netlink_spec/.gitignore +++ /dev/null @@ -1 +0,0 @@ -*.rst diff --git a/Documentation/networking/netlink_spec/readme.txt b/Documentation/networking/netlink_spec/readme.txt deleted file mode 100644 index 030b44aca4e6..000000000000 --- a/Documentation/networking/netlink_spec/readme.txt +++ /dev/null @@ -1,4 +0,0 @@ -SPDX-License-Identifier: GPL-2.0 - -This file is populated during the build of the documentation (htmldocs) by the -tools/net/ynl/pyynl/ynl_gen_rst.py script. diff --git a/Documentation/process/maintainer-netdev.rst b/Documentation/process/maintainer-netdev.rst index e1755610b4bc..989192421cc9 100644 --- a/Documentation/process/maintainer-netdev.rst +++ b/Documentation/process/maintainer-netdev.rst @@ -407,7 +407,7 @@ Clean-up patches Netdev discourages patches which perform simple clean-ups, which are not in the context of other work. For example: -* Addressing ``checkpatch.pl`` warnings +* Addressing ``checkpatch.pl``, and other trivial coding style warnings * Addressing :ref:`Local variable ordering<rcs>` issues * Conversions to device-managed APIs (``devm_`` helpers) diff --git a/Documentation/sphinx/parser_yaml.py b/Documentation/sphinx/parser_yaml.py new file mode 100755 index 000000000000..634d84a202fc --- /dev/null +++ b/Documentation/sphinx/parser_yaml.py @@ -0,0 +1,123 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright 2025 Mauro Carvalho Chehab <mchehab+huawei@kernel.org> + +""" +Sphinx extension for processing YAML files +""" + +import os +import re +import sys + +from pprint import pformat + +from docutils import statemachine +from docutils.parsers.rst import Parser as RSTParser +from docutils.parsers.rst import states +from docutils.statemachine import ViewList + +from sphinx.util import logging +from sphinx.parsers import Parser + +srctree = os.path.abspath(os.environ["srctree"]) +sys.path.insert(0, os.path.join(srctree, "tools/net/ynl/pyynl/lib")) + +from doc_generator import YnlDocGenerator # pylint: disable=C0413 + +logger = logging.getLogger(__name__) + +class YamlParser(Parser): + """ + Kernel parser for YAML files. + + This is a simple sphinx.Parser to handle yaml files inside the + Kernel tree that will be part of the built documentation. + + The actual parser function is not contained here: the code was + written in a way that parsing yaml for different subsystems + can be done from a single dispatcher. + + All it takes to have parse YAML patches is to have an import line: + + from some_parser_code import NewYamlGenerator + + To this module. Then add an instance of the parser with: + + new_parser = NewYamlGenerator() + + and add a logic inside parse() to handle it based on the path, + like this: + + if "/foo" in fname: + msg = self.new_parser.parse_yaml_file(fname) + """ + + supported = ('yaml', ) + + netlink_parser = YnlDocGenerator() + + re_lineno = re.compile(r"\.\. LINENO ([0-9]+)$") + + tab_width = 8 + + def rst_parse(self, inputstring, document, msg): + """ + Receives a ReST content that was previously converted by the + YAML parser, adding it to the document tree. + """ + + self.setup_parse(inputstring, document) + + result = ViewList() + + self.statemachine = states.RSTStateMachine(state_classes=states.state_classes, + initial_state='Body', + debug=document.reporter.debug_flag) + + try: + # Parse message with RSTParser + lineoffset = 0; + + lines = statemachine.string2lines(msg, self.tab_width, + convert_whitespace=True) + + for line in lines: + match = self.re_lineno.match(line) + if match: + lineoffset = int(match.group(1)) + continue + + result.append(line, document.current_source, lineoffset) + + self.statemachine.run(result, document) + + except Exception as e: + document.reporter.error("YAML parsing error: %s" % pformat(e)) + + self.finish_parse() + + # Overrides docutils.parsers.Parser. See sphinx.parsers.RSTParser + def parse(self, inputstring, document): + """Check if a YAML is meant to be parsed.""" + + fname = document.current_source + + # Handle netlink yaml specs + if "/netlink/specs/" in fname: + msg = self.netlink_parser.parse_yaml_file(fname) + self.rst_parse(inputstring, document, msg) + + # All other yaml files are ignored + +def setup(app): + """Setup function for the Sphinx extension.""" + + # Add YAML parser + app.add_source_parser(YamlParser) + app.add_source_suffix('.yaml', 'yaml') + + return { + 'version': '1.0', + 'parallel_read_safe': True, + 'parallel_write_safe': True, + } diff --git a/Documentation/userspace-api/netlink/index.rst b/Documentation/userspace-api/netlink/index.rst index c1b6765cc963..83ae25066591 100644 --- a/Documentation/userspace-api/netlink/index.rst +++ b/Documentation/userspace-api/netlink/index.rst @@ -18,4 +18,4 @@ Netlink documentation for users. See also: - :ref:`Documentation/core-api/netlink.rst <kernel_netlink>` - - :ref:`Documentation/networking/netlink_spec/index.rst <specs>` + - :ref:`Documentation/netlink/specs/index.rst <specs>` diff --git a/Documentation/userspace-api/netlink/netlink-raw.rst b/Documentation/userspace-api/netlink/netlink-raw.rst index 31fc91020eb3..aae296c170c5 100644 --- a/Documentation/userspace-api/netlink/netlink-raw.rst +++ b/Documentation/userspace-api/netlink/netlink-raw.rst @@ -62,8 +62,8 @@ Sub-messages ------------ Several raw netlink families such as -:doc:`rt-link<../../networking/netlink_spec/rt-link>` and -:doc:`tc<../../networking/netlink_spec/tc>` use attribute nesting as an +:ref:`rt-link<netlink-rt-link>` and +:ref:`tc<netlink-tc>` use attribute nesting as an abstraction to carry module specific information. Conceptually it looks as follows:: @@ -162,7 +162,7 @@ then this is an error. Nested struct definitions ------------------------- -Many raw netlink families such as :doc:`tc<../../networking/netlink_spec/tc>` +Many raw netlink families such as :ref:`tc<netlink-tc>` make use of nested struct definitions. The ``netlink-raw`` schema makes it possible to embed a struct within a struct definition using the ``struct`` property. For example, the following struct definition embeds the diff --git a/Documentation/userspace-api/netlink/specs.rst b/Documentation/userspace-api/netlink/specs.rst index 1b50d97d8d7c..debb4bfca5c4 100644 --- a/Documentation/userspace-api/netlink/specs.rst +++ b/Documentation/userspace-api/netlink/specs.rst @@ -15,7 +15,7 @@ kernel headers directly. Internally kernel uses the YAML specs to generate: - the C uAPI header - - documentation of the protocol as a ReST file - see :ref:`Documentation/networking/netlink_spec/index.rst <specs>` + - documentation of the protocol as a ReST file - see :ref:`Documentation/netlink/specs/index.rst <specs>` - policy tables for input attribute validation - operation tables diff --git a/MAINTAINERS b/MAINTAINERS index 2df02e4374ed..b81595e9ea95 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7308,6 +7308,7 @@ F: scripts/get_abi.py F: scripts/kernel-doc* F: scripts/lib/abi/* F: scripts/lib/kdoc/* +F: tools/net/ynl/pyynl/lib/doc_generator.py F: scripts/sphinx-pre-install X: Documentation/ABI/ X: Documentation/admin-guide/media/ @@ -13812,8 +13813,7 @@ M: Hauke Mehrtens <hauke@hauke-m.de> L: netdev@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/net/dsa/lantiq,gswip.yaml -F: drivers/net/dsa/lantiq_gswip.c -F: drivers/net/dsa/lantiq_pce.h +F: drivers/net/dsa/lantiq/* F: drivers/net/ethernet/lantiq_xrx200.c F: net/dsa/tag_gswip.c @@ -18293,6 +18293,15 @@ F: Documentation/devicetree/bindings/clock/*imx* F: drivers/clk/imx/ F: include/dt-bindings/clock/*imx* +NXP NETC TIMER PTP CLOCK DRIVER +M: Wei Fang <wei.fang@nxp.com> +M: Clark Wang <xiaoning.wang@nxp.com> +L: imx@lists.linux.dev +L: netdev@vger.kernel.org +S: Maintained +F: Documentation/devicetree/bindings/ptp/nxp,ptp-netc.yaml +F: drivers/ptp/ptp_netc.c + NXP PF8100/PF8121A/PF8200 PMIC REGULATOR DEVICE DRIVER M: Jagan Teki <jagan@amarulasolutions.com> S: Maintained @@ -20866,6 +20875,14 @@ S: Maintained F: Documentation/devicetree/bindings/power/supply/qcom,pmi8998-charger.yaml F: drivers/power/supply/qcom_smbx.c +QUALCOMM PPE DRIVER +M: Luo Jie <quic_luoj@quicinc.com> +L: netdev@vger.kernel.org +S: Supported +F: Documentation/devicetree/bindings/net/qcom,ipq9574-ppe.yaml +F: Documentation/networking/device_drivers/ethernet/qualcomm/ppe/ppe.rst +F: drivers/net/ethernet/qualcomm/ppe/ + QUALCOMM QSEECOM DRIVER M: Maximilian Luz <luzmaximilian@gmail.com> L: linux-arm-msm@vger.kernel.org diff --git a/arch/m68k/coldfire/m5272.c b/arch/m68k/coldfire/m5272.c index 5b70dfdab368..918e2a3236c5 100644 --- a/arch/m68k/coldfire/m5272.c +++ b/arch/m68k/coldfire/m5272.c @@ -108,7 +108,7 @@ void __init config_BSP(char *commandp, int size) * an ethernet switch. In this case we need to use the fixed phy type, * and we need to declare it early in boot. */ -static struct fixed_phy_status nettel_fixed_phy_status __initdata = { +static const struct fixed_phy_status nettel_fixed_phy_status __initconst = { .link = 1, .speed = 100, .duplex = 0, @@ -119,7 +119,7 @@ static struct fixed_phy_status nettel_fixed_phy_status __initdata = { static int __init init_BSP(void) { m5272_uarts_init(); - fixed_phy_add(0, &nettel_fixed_phy_status); + fixed_phy_add(&nettel_fixed_phy_status); clkdev_add_table(m5272_clk_lookup, ARRAY_SIZE(m5272_clk_lookup)); return 0; } diff --git a/arch/mips/bcm47xx/setup.c b/arch/mips/bcm47xx/setup.c index de426a474b5b..a93a4266dc1e 100644 --- a/arch/mips/bcm47xx/setup.c +++ b/arch/mips/bcm47xx/setup.c @@ -256,7 +256,7 @@ static int __init bcm47xx_cpu_fixes(void) } arch_initcall(bcm47xx_cpu_fixes); -static struct fixed_phy_status bcm47xx_fixed_phy_status __initdata = { +static const struct fixed_phy_status bcm47xx_fixed_phy_status __initconst = { .link = 1, .speed = SPEED_100, .duplex = DUPLEX_FULL, @@ -282,7 +282,7 @@ static int __init bcm47xx_register_bus_complete(void) bcm47xx_leds_register(); bcm47xx_workarounds(); - fixed_phy_add(0, &bcm47xx_fixed_phy_status); + fixed_phy_add(&bcm47xx_fixed_phy_status); return 0; } device_initcall(bcm47xx_register_bus_complete); diff --git a/drivers/infiniband/hw/mlx5/std_types.c b/drivers/infiniband/hw/mlx5/std_types.c index bdb568411091..2fcf553044e1 100644 --- a/drivers/infiniband/hw/mlx5/std_types.c +++ b/drivers/infiniband/hw/mlx5/std_types.c @@ -83,33 +83,14 @@ static int fill_vport_icm_addr(struct mlx5_core_dev *mdev, u16 vport, static int fill_vport_vhca_id(struct mlx5_core_dev *mdev, u16 vport, struct mlx5_ib_uapi_query_port *info) { - size_t out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); - u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {}; - void *out; - int err; - - out = kzalloc(out_sz, GFP_KERNEL); - if (!out) - return -ENOMEM; + int err = mlx5_vport_get_vhca_id(mdev, vport, &info->vport_vhca_id); - MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); - MLX5_SET(query_hca_cap_in, in, other_function, true); - MLX5_SET(query_hca_cap_in, in, function_id, vport); - MLX5_SET(query_hca_cap_in, in, op_mod, - MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE | - HCA_CAP_OPMOD_GET_CUR); - - err = mlx5_cmd_exec(mdev, in, sizeof(in), out, out_sz); if (err) - goto out; - - info->vport_vhca_id = MLX5_GET(query_hca_cap_out, out, - capability.cmd_hca_cap.vhca_id); + return err; info->flags |= MLX5_IB_UAPI_QUERY_PORT_VPORT_VHCA_ID; -out: - kfree(out); - return err; + + return 0; } static int fill_multiport_info(struct mlx5_ib_dev *dev, u32 port_num, diff --git a/drivers/net/Space.c b/drivers/net/Space.c index dc50797a2ed0..c01e2c2f7d6c 100644 --- a/drivers/net/Space.c +++ b/drivers/net/Space.c @@ -67,8 +67,7 @@ static int netdev_boot_setup_add(char *name, struct ifmap *map) s = dev_boot_setup; for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { if (s[i].name[0] == '\0' || s[i].name[0] == ' ') { - memset(s[i].name, 0, sizeof(s[i].name)); - strscpy(s[i].name, name, IFNAMSIZ); + strscpy_pad(s[i].name, name); memcpy(&s[i].map, map, sizeof(s[i].map)); break; } diff --git a/drivers/net/amt.c b/drivers/net/amt.c index ed86537b2f61..902c817a0dea 100644 --- a/drivers/net/amt.c +++ b/drivers/net/amt.c @@ -11,6 +11,7 @@ #include <linux/net.h> #include <linux/igmp.h> #include <linux/workqueue.h> +#include <net/flow.h> #include <net/pkt_sched.h> #include <net/net_namespace.h> #include <net/ip.h> @@ -28,6 +29,7 @@ #include <net/addrconf.h> #include <net/ip6_route.h> #include <net/inet_common.h> +#include <net/inet_dscp.h> #include <net/ip6_checksum.h> static struct workqueue_struct *amt_wq; @@ -1018,7 +1020,7 @@ static bool amt_send_membership_update(struct amt_dev *amt, fl4.flowi4_oif = amt->stream_dev->ifindex; fl4.daddr = amt->remote_ip; fl4.saddr = amt->local_ip; - fl4.flowi4_tos = AMT_TOS; + fl4.flowi4_dscp = inet_dsfield_to_dscp(AMT_TOS); fl4.flowi4_proto = IPPROTO_UDP; rt = ip_route_output_key(amt->net, &fl4); if (IS_ERR(rt)) { @@ -1133,7 +1135,7 @@ static bool amt_send_membership_query(struct amt_dev *amt, fl4.flowi4_oif = amt->stream_dev->ifindex; fl4.daddr = tunnel->ip4; fl4.saddr = amt->local_ip; - fl4.flowi4_tos = AMT_TOS; + fl4.flowi4_dscp = inet_dsfield_to_dscp(AMT_TOS); fl4.flowi4_proto = IPPROTO_UDP; rt = ip_route_output_key(amt->net, &fl4); if (IS_ERR(rt)) { diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 257333c88710..f25c2d2c9181 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -142,8 +142,7 @@ module_param(downdelay, int, 0); MODULE_PARM_DESC(downdelay, "Delay before considering link down, " "in milliseconds"); module_param(use_carrier, int, 0); -MODULE_PARM_DESC(use_carrier, "Use netif_carrier_ok (vs MII ioctls) in miimon; " - "0 for off, 1 for on (default)"); +MODULE_PARM_DESC(use_carrier, "option obsolete, use_carrier cannot be disabled"); module_param(mode, charp, 0); MODULE_PARM_DESC(mode, "Mode of operation; 0 for balance-rr, " "1 for active-backup, 2 for balance-xor, " @@ -830,77 +829,6 @@ const char *bond_slave_link_status(s8 link) } } -/* if <dev> supports MII link status reporting, check its link status. - * - * We either do MII/ETHTOOL ioctls, or check netif_carrier_ok(), - * depending upon the setting of the use_carrier parameter. - * - * Return either BMSR_LSTATUS, meaning that the link is up (or we - * can't tell and just pretend it is), or 0, meaning that the link is - * down. - * - * If reporting is non-zero, instead of faking link up, return -1 if - * both ETHTOOL and MII ioctls fail (meaning the device does not - * support them). If use_carrier is set, return whatever it says. - * It'd be nice if there was a good way to tell if a driver supports - * netif_carrier, but there really isn't. - */ -static int bond_check_dev_link(struct bonding *bond, - struct net_device *slave_dev, int reporting) -{ - const struct net_device_ops *slave_ops = slave_dev->netdev_ops; - struct mii_ioctl_data *mii; - struct ifreq ifr; - int ret; - - if (!reporting && !netif_running(slave_dev)) - return 0; - - if (bond->params.use_carrier) - return netif_carrier_ok(slave_dev) ? BMSR_LSTATUS : 0; - - /* Try to get link status using Ethtool first. */ - if (slave_dev->ethtool_ops->get_link) { - netdev_lock_ops(slave_dev); - ret = slave_dev->ethtool_ops->get_link(slave_dev); - netdev_unlock_ops(slave_dev); - - return ret ? BMSR_LSTATUS : 0; - } - - /* Ethtool can't be used, fallback to MII ioctls. */ - if (slave_ops->ndo_eth_ioctl) { - /* TODO: set pointer to correct ioctl on a per team member - * bases to make this more efficient. that is, once - * we determine the correct ioctl, we will always - * call it and not the others for that team - * member. - */ - - /* We cannot assume that SIOCGMIIPHY will also read a - * register; not all network drivers (e.g., e100) - * support that. - */ - - /* Yes, the mii is overlaid on the ifreq.ifr_ifru */ - strscpy_pad(ifr.ifr_name, slave_dev->name, IFNAMSIZ); - mii = if_mii(&ifr); - - if (dev_eth_ioctl(slave_dev, &ifr, SIOCGMIIPHY) == 0) { - mii->reg_num = MII_BMSR; - if (dev_eth_ioctl(slave_dev, &ifr, SIOCGMIIREG) == 0) - return mii->val_out & BMSR_LSTATUS; - } - } - - /* If reporting, report that either there's no ndo_eth_ioctl, - * or both SIOCGMIIREG and get_link failed (meaning that we - * cannot report link status). If not reporting, pretend - * we're ok. - */ - return reporting ? -1 : BMSR_LSTATUS; -} - /*----------------------------- Multicast list ------------------------------*/ /* Push the promiscuity flag down to appropriate slaves */ @@ -1966,7 +1894,6 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, const struct net_device_ops *slave_ops = slave_dev->netdev_ops; struct slave *new_slave = NULL, *prev_slave; struct sockaddr_storage ss; - int link_reporting; int res = 0, i; if (slave_dev->flags & IFF_MASTER && @@ -1976,12 +1903,6 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, return -EPERM; } - if (!bond->params.use_carrier && - slave_dev->ethtool_ops->get_link == NULL && - slave_ops->ndo_eth_ioctl == NULL) { - slave_warn(bond_dev, slave_dev, "no link monitoring support\n"); - } - /* already in-use? */ if (netdev_is_rx_handler_busy(slave_dev)) { SLAVE_NL_ERR(bond_dev, slave_dev, extack, @@ -2195,29 +2116,10 @@ skip_mac_set: new_slave->last_tx = new_slave->last_rx; - if (bond->params.miimon && !bond->params.use_carrier) { - link_reporting = bond_check_dev_link(bond, slave_dev, 1); - - if ((link_reporting == -1) && !bond->params.arp_interval) { - /* miimon is set but a bonded network driver - * does not support ETHTOOL/MII and - * arp_interval is not set. Note: if - * use_carrier is enabled, we will never go - * here (because netif_carrier is always - * supported); thus, we don't need to change - * the messages for netif_carrier. - */ - slave_warn(bond_dev, slave_dev, "MII and ETHTOOL support not available for slave, and arp_interval/arp_ip_target module parameters not specified, thus bonding will not detect link failures! see bonding.txt for details\n"); - } else if (link_reporting == -1) { - /* unable get link status using mii/ethtool */ - slave_warn(bond_dev, slave_dev, "can't get link status from slave; the network driver associated with this interface does not support MII or ETHTOOL link status reporting, thus miimon has no effect on this interface\n"); - } - } - /* check for initial state */ new_slave->link = BOND_LINK_NOCHANGE; if (bond->params.miimon) { - if (bond_check_dev_link(bond, slave_dev, 0) == BMSR_LSTATUS) { + if (netif_carrier_ok(slave_dev)) { if (bond->params.updelay) { bond_set_slave_link_state(new_slave, BOND_LINK_BACK, @@ -2759,7 +2661,7 @@ static int bond_miimon_inspect(struct bonding *bond) bond_for_each_slave_rcu(bond, slave, iter) { bond_propose_link_state(slave, BOND_LINK_NOCHANGE); - link_state = bond_check_dev_link(bond, slave->dev, 0); + link_state = netif_carrier_ok(slave->dev); switch (slave->link) { case BOND_LINK_UP: @@ -6257,10 +6159,10 @@ static int __init bond_check_params(struct bond_params *params) downdelay = 0; } - if ((use_carrier != 0) && (use_carrier != 1)) { - pr_warn("Warning: use_carrier module parameter (%d), not of valid value (0/1), so it was set to 1\n", - use_carrier); - use_carrier = 1; + if (use_carrier != 1) { + pr_err("Error: invalid use_carrier parameter (%d)\n", + use_carrier); + return -EINVAL; } if (num_peer_notif < 0 || num_peer_notif > 255) { @@ -6507,7 +6409,6 @@ static int __init bond_check_params(struct bond_params *params) params->updelay = updelay; params->downdelay = downdelay; params->peer_notif_delay = 0; - params->use_carrier = use_carrier; params->lacp_active = 1; params->lacp_fast = lacp_fast; params->primary[0] = 0; diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c index 57fff2421f1b..e573b34a1bbc 100644 --- a/drivers/net/bonding/bond_netlink.c +++ b/drivers/net/bonding/bond_netlink.c @@ -259,13 +259,11 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], return err; } if (data[IFLA_BOND_USE_CARRIER]) { - int use_carrier = nla_get_u8(data[IFLA_BOND_USE_CARRIER]); - - bond_opt_initval(&newval, use_carrier); - err = __bond_opt_set(bond, BOND_OPT_USE_CARRIER, &newval, - data[IFLA_BOND_USE_CARRIER], extack); - if (err) - return err; + if (nla_get_u8(data[IFLA_BOND_USE_CARRIER]) != 1) { + NL_SET_ERR_MSG_ATTR(extack, data[IFLA_BOND_USE_CARRIER], + "option obsolete, use_carrier cannot be disabled"); + return -EINVAL; + } } if (data[IFLA_BOND_ARP_INTERVAL]) { int arp_interval = nla_get_u32(data[IFLA_BOND_ARP_INTERVAL]); @@ -688,7 +686,7 @@ static int bond_fill_info(struct sk_buff *skb, bond->params.peer_notif_delay * bond->params.miimon)) goto nla_put_failure; - if (nla_put_u8(skb, IFLA_BOND_USE_CARRIER, bond->params.use_carrier)) + if (nla_put_u8(skb, IFLA_BOND_USE_CARRIER, 1)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BOND_ARP_INTERVAL, bond->params.arp_interval)) diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 3b6f815c55ff..c0a5eb8766b5 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -187,7 +187,6 @@ static const struct bond_opt_value bond_primary_reselect_tbl[] = { }; static const struct bond_opt_value bond_use_carrier_tbl[] = { - { "off", 0, 0}, { "on", 1, BOND_VALFLAG_DEFAULT}, { NULL, -1, 0} }; @@ -419,7 +418,7 @@ static const struct bond_option bond_opts[BOND_OPT_LAST] = { [BOND_OPT_USE_CARRIER] = { .id = BOND_OPT_USE_CARRIER, .name = "use_carrier", - .desc = "Use netif_carrier_ok (vs MII ioctls) in miimon", + .desc = "option obsolete, use_carrier cannot be disabled", .values = bond_use_carrier_tbl, .set = bond_option_use_carrier_set }, @@ -1091,10 +1090,6 @@ static int bond_option_peer_notif_delay_set(struct bonding *bond, static int bond_option_use_carrier_set(struct bonding *bond, const struct bond_opt_value *newval) { - netdev_dbg(bond->dev, "Setting use_carrier to %llu\n", - newval->value); - bond->params.use_carrier = newval->value; - return 0; } diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index 1e13bb170515..9a75ad3181ab 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -467,14 +467,12 @@ static ssize_t bonding_show_primary_reselect(struct device *d, static DEVICE_ATTR(primary_reselect, 0644, bonding_show_primary_reselect, bonding_sysfs_store_option); -/* Show the use_carrier flag. */ +/* use_carrier is obsolete, but print value for compatibility */ static ssize_t bonding_show_carrier(struct device *d, struct device_attribute *attr, char *buf) { - struct bonding *bond = to_bond(d); - - return sysfs_emit(buf, "%d\n", bond->params.use_carrier); + return sysfs_emit(buf, "1\n"); } static DEVICE_ATTR(use_carrier, 0644, bonding_show_carrier, bonding_sysfs_store_option); diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig index ec759f8cb0e2..4d9af691b989 100644 --- a/drivers/net/dsa/Kconfig +++ b/drivers/net/dsa/Kconfig @@ -26,13 +26,7 @@ config NET_DSA_LOOP source "drivers/net/dsa/hirschmann/Kconfig" -config NET_DSA_LANTIQ_GSWIP - tristate "Lantiq / Intel GSWIP" - depends on HAS_IOMEM - select NET_DSA_TAG_GSWIP - help - This enables support for the Lantiq / Intel GSWIP 2.1 found in - the xrx200 / VR9 SoC. +source "drivers/net/dsa/lantiq/Kconfig" config NET_DSA_MT7530 tristate "MediaTek MT7530 and MT7531 Ethernet switch support" @@ -99,6 +93,14 @@ config NET_DSA_RZN1_A5PSW This driver supports the A5PSW switch, which is embedded in Renesas RZ/N1 SoC. +config NET_DSA_KS8995 + tristate "Micrel KS8995 family 5-ports 10/100 Ethernet switches" + depends on SPI + select NET_DSA_TAG_NONE + help + This driver supports the Micrel KS8995 family of 10/100 Mbit ethernet + switches, managed over SPI. + config NET_DSA_SMSC_LAN9303 tristate select NET_DSA_TAG_LAN9303 diff --git a/drivers/net/dsa/Makefile b/drivers/net/dsa/Makefile index cb9a97340e58..c0a534fe6eaf 100644 --- a/drivers/net/dsa/Makefile +++ b/drivers/net/dsa/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_NET_DSA_LOOP) += dsa_loop.o ifdef CONFIG_NET_DSA_LOOP obj-$(CONFIG_FIXED_PHY) += dsa_loop_bdinfo.o endif -obj-$(CONFIG_NET_DSA_LANTIQ_GSWIP) += lantiq_gswip.o +obj-$(CONFIG_NET_DSA_KS8995) += ks8995.o obj-$(CONFIG_NET_DSA_MT7530) += mt7530.o obj-$(CONFIG_NET_DSA_MT7530_MDIO) += mt7530-mdio.o obj-$(CONFIG_NET_DSA_MT7530_MMIO) += mt7530-mmio.o @@ -19,6 +19,7 @@ obj-$(CONFIG_NET_DSA_VITESSE_VSC73XX_PLATFORM) += vitesse-vsc73xx-platform.o obj-$(CONFIG_NET_DSA_VITESSE_VSC73XX_SPI) += vitesse-vsc73xx-spi.o obj-y += b53/ obj-y += hirschmann/ +obj-y += lantiq/ obj-y += microchip/ obj-y += mv88e6xxx/ obj-y += ocelot/ diff --git a/drivers/net/dsa/b53/b53_mmap.c b/drivers/net/dsa/b53/b53_mmap.c index f06c3e0cc42a..f4a59d8fbdd6 100644 --- a/drivers/net/dsa/b53/b53_mmap.c +++ b/drivers/net/dsa/b53/b53_mmap.c @@ -29,8 +29,13 @@ #include "b53_priv.h" #define BCM63XX_EPHY_REG 0x3C +#define BCM63268_GPHY_REG 0x54 + +#define GPHY_CTRL_LOW_PWR BIT(3) +#define GPHY_CTRL_IDDQ_BIAS BIT(0) struct b53_phy_info { + u32 gphy_port_mask; u32 ephy_enable_mask; u32 ephy_port_mask; u32 ephy_bias_bit; @@ -65,6 +70,7 @@ static const struct b53_phy_info bcm6368_ephy_info = { static const u32 bcm63268_ephy_offsets[] = {4, 9, 14}; static const struct b53_phy_info bcm63268_ephy_info = { + .gphy_port_mask = BIT(3), .ephy_enable_mask = GENMASK(4, 0), .ephy_port_mask = GENMASK((ARRAY_SIZE(bcm63268_ephy_offsets) - 1), 0), .ephy_bias_bit = 24, @@ -290,13 +296,30 @@ static int bcm63xx_ephy_set(struct b53_device *dev, int port, bool enable) return regmap_update_bits(gpio_ctrl, BCM63XX_EPHY_REG, mask, val); } +static int bcm63268_gphy_set(struct b53_device *dev, bool enable) +{ + struct b53_mmap_priv *priv = dev->priv; + struct regmap *gpio_ctrl = priv->gpio_ctrl; + u32 mask = GPHY_CTRL_IDDQ_BIAS | GPHY_CTRL_LOW_PWR; + u32 val = 0; + + if (!enable) + val = mask; + + return regmap_update_bits(gpio_ctrl, BCM63268_GPHY_REG, mask, val); +} + static void b53_mmap_phy_enable(struct b53_device *dev, int port) { struct b53_mmap_priv *priv = dev->priv; int ret = 0; - if (priv->phy_info && (BIT(port) & priv->phy_info->ephy_port_mask)) - ret = bcm63xx_ephy_set(dev, port, true); + if (priv->phy_info) { + if (BIT(port) & priv->phy_info->ephy_port_mask) + ret = bcm63xx_ephy_set(dev, port, true); + else if (BIT(port) & priv->phy_info->gphy_port_mask) + ret = bcm63268_gphy_set(dev, true); + } if (!ret) priv->phys_enabled |= BIT(port); @@ -307,8 +330,12 @@ static void b53_mmap_phy_disable(struct b53_device *dev, int port) struct b53_mmap_priv *priv = dev->priv; int ret = 0; - if (priv->phy_info && (BIT(port) & priv->phy_info->ephy_port_mask)) - ret = bcm63xx_ephy_set(dev, port, false); + if (priv->phy_info) { + if (BIT(port) & priv->phy_info->ephy_port_mask) + ret = bcm63xx_ephy_set(dev, port, false); + else if (BIT(port) & priv->phy_info->gphy_port_mask) + ret = bcm63268_gphy_set(dev, false); + } if (!ret) priv->phys_enabled &= ~BIT(port); diff --git a/drivers/net/dsa/dsa_loop.c b/drivers/net/dsa/dsa_loop.c index d8a35f25a4c8..8112515d545e 100644 --- a/drivers/net/dsa/dsa_loop.c +++ b/drivers/net/dsa/dsa_loop.c @@ -386,13 +386,10 @@ static struct mdio_driver dsa_loop_drv = { static void dsa_loop_phydevs_unregister(void) { - unsigned int i; - - for (i = 0; i < NUM_FIXED_PHYS; i++) - if (!IS_ERR(phydevs[i])) { + for (int i = 0; i < NUM_FIXED_PHYS; i++) { + if (!IS_ERR(phydevs[i])) fixed_phy_unregister(phydevs[i]); - phy_device_free(phydevs[i]); - } + } } static int __init dsa_loop_init(void) @@ -402,7 +399,8 @@ static int __init dsa_loop_init(void) .speed = SPEED_100, .duplex = DUPLEX_FULL, }; - unsigned int i, ret; + unsigned int i; + int ret; for (i = 0; i < NUM_FIXED_PHYS; i++) phydevs[i] = fixed_phy_register(&status, NULL); diff --git a/drivers/net/phy/spi_ks8995.c b/drivers/net/dsa/ks8995.c index d135b061d810..5c4c83e00477 100644 --- a/drivers/net/phy/spi_ks8995.c +++ b/drivers/net/dsa/ks8995.c @@ -3,6 +3,7 @@ * SPI driver for Micrel/Kendin KS8995M and KSZ8864RMN ethernet switches * * Copyright (C) 2008 Gabor Juhos <juhosg at openwrt.org> + * Copyright (C) 2025 Linus Walleij <linus.walleij@linaro.org> * * This file was based on: drivers/spi/at25.c * Copyright (C) 2006 David Brownell @@ -10,6 +11,9 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/bits.h> +#include <linux/if_bridge.h> +#include <linux/if_vlan.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/module.h> @@ -17,8 +21,8 @@ #include <linux/device.h> #include <linux/gpio/consumer.h> #include <linux/of.h> - #include <linux/spi/spi.h> +#include <net/dsa.h> #define DRV_VERSION "0.1.1" #define DRV_DESC "Micrel KS8995 Ethernet switch SPI driver" @@ -29,18 +33,59 @@ #define KS8995_REG_ID1 0x01 /* Chip ID1 */ #define KS8995_REG_GC0 0x02 /* Global Control 0 */ + +#define KS8995_GC0_P5_PHY BIT(3) /* Port 5 PHY enabled */ + #define KS8995_REG_GC1 0x03 /* Global Control 1 */ #define KS8995_REG_GC2 0x04 /* Global Control 2 */ + +#define KS8995_GC2_HUGE BIT(2) /* Huge packet support */ +#define KS8995_GC2_LEGAL BIT(1) /* Legal size override */ + #define KS8995_REG_GC3 0x05 /* Global Control 3 */ #define KS8995_REG_GC4 0x06 /* Global Control 4 */ + +#define KS8995_GC4_10BT BIT(4) /* Force switch to 10Mbit */ +#define KS8995_GC4_MII_FLOW BIT(5) /* MII full-duplex flow control enable */ +#define KS8995_GC4_MII_HD BIT(6) /* MII half-duplex mode enable */ + #define KS8995_REG_GC5 0x07 /* Global Control 5 */ #define KS8995_REG_GC6 0x08 /* Global Control 6 */ #define KS8995_REG_GC7 0x09 /* Global Control 7 */ #define KS8995_REG_GC8 0x0a /* Global Control 8 */ #define KS8995_REG_GC9 0x0b /* Global Control 9 */ -#define KS8995_REG_PC(p, r) ((0x10 * p) + r) /* Port Control */ -#define KS8995_REG_PS(p, r) ((0x10 * p) + r + 0xe) /* Port Status */ +#define KS8995_GC9_SPECIAL BIT(0) /* Special tagging mode (DSA) */ + +/* In DSA the ports 1-4 are numbered 0-3 and the CPU port is port 4 */ +#define KS8995_REG_PC(p, r) (0x10 + (0x10 * (p)) + (r)) /* Port Control */ +#define KS8995_REG_PS(p, r) (0x1e + (0x10 * (p)) + (r)) /* Port Status */ + +#define KS8995_REG_PC0 0x00 /* Port Control 0 */ +#define KS8995_REG_PC1 0x01 /* Port Control 1 */ +#define KS8995_REG_PC2 0x02 /* Port Control 2 */ +#define KS8995_REG_PC3 0x03 /* Port Control 3 */ +#define KS8995_REG_PC4 0x04 /* Port Control 4 */ +#define KS8995_REG_PC5 0x05 /* Port Control 5 */ +#define KS8995_REG_PC6 0x06 /* Port Control 6 */ +#define KS8995_REG_PC7 0x07 /* Port Control 7 */ +#define KS8995_REG_PC8 0x08 /* Port Control 8 */ +#define KS8995_REG_PC9 0x09 /* Port Control 9 */ +#define KS8995_REG_PC10 0x0a /* Port Control 10 */ +#define KS8995_REG_PC11 0x0b /* Port Control 11 */ +#define KS8995_REG_PC12 0x0c /* Port Control 12 */ +#define KS8995_REG_PC13 0x0d /* Port Control 13 */ + +#define KS8995_PC0_TAG_INS BIT(2) /* Enable tag insertion on port */ +#define KS8995_PC0_TAG_REM BIT(1) /* Enable tag removal on port */ +#define KS8995_PC0_PRIO_EN BIT(0) /* Enable priority handling */ + +#define KS8995_PC2_TXEN BIT(2) /* Enable TX on port */ +#define KS8995_PC2_RXEN BIT(1) /* Enable RX on port */ +#define KS8995_PC2_LEARN_DIS BIT(0) /* Disable learning on port */ + +#define KS8995_PC13_TXDIS BIT(6) /* Disable transmitter */ +#define KS8995_PC13_PWDN BIT(3) /* Power down */ #define KS8995_REG_TPC0 0x60 /* TOS Priority Control 0 */ #define KS8995_REG_TPC1 0x61 /* TOS Priority Control 1 */ @@ -91,6 +136,8 @@ #define KS8995_CMD_WRITE 0x02U #define KS8995_CMD_READ 0x03U +#define KS8995_CPU_PORT 4 +#define KS8995_NUM_PORTS 5 /* 5 ports including the CPU port */ #define KS8995_RESET_DELAY 10 /* usec */ enum ks8995_chip_variant { @@ -138,11 +185,14 @@ static const struct ks8995_chip_params ks8995_chip[] = { struct ks8995_switch { struct spi_device *spi; + struct device *dev; + struct dsa_switch *ds; struct mutex lock; struct gpio_desc *reset_gpio; struct bin_attribute regs_attr; const struct ks8995_chip_params *chip; int revision_id; + unsigned int max_mtu[KS8995_NUM_PORTS]; }; static const struct spi_device_id ks8995_id[] = { @@ -288,30 +338,6 @@ static int ks8995_reset(struct ks8995_switch *ks) return ks8995_start(ks); } -static ssize_t ks8995_registers_read(struct file *filp, struct kobject *kobj, - const struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) -{ - struct device *dev; - struct ks8995_switch *ks8995; - - dev = kobj_to_dev(kobj); - ks8995 = dev_get_drvdata(dev); - - return ks8995_read(ks8995, buf, off, count); -} - -static ssize_t ks8995_registers_write(struct file *filp, struct kobject *kobj, - const struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) -{ - struct device *dev; - struct ks8995_switch *ks8995; - - dev = kobj_to_dev(kobj); - ks8995 = dev_get_drvdata(dev); - - return ks8995_write(ks8995, buf, off, count); -} - /* ks8995_get_revision - get chip revision * @ks: pointer to switch instance * @@ -395,14 +421,325 @@ err_out: return err; } -static const struct bin_attribute ks8995_registers_attr = { - .attr = { - .name = "registers", - .mode = 0600, - }, - .size = KS8995_REGS_SIZE, - .read = ks8995_registers_read, - .write = ks8995_registers_write, +static int ks8995_check_config(struct ks8995_switch *ks) +{ + int ret; + u8 val; + + ret = ks8995_read_reg(ks, KS8995_REG_GC0, &val); + if (ret) { + dev_err(ks->dev, "failed to read KS8995_REG_GC0\n"); + return ret; + } + + dev_dbg(ks->dev, "port 5 PHY %senabled\n", + (val & KS8995_GC0_P5_PHY) ? "" : "not "); + + val |= KS8995_GC0_P5_PHY; + ret = ks8995_write_reg(ks, KS8995_REG_GC0, val); + if (ret) + dev_err(ks->dev, "failed to set KS8995_REG_GC0\n"); + + dev_dbg(ks->dev, "set KS8995_REG_GC0 to 0x%02x\n", val); + + return 0; +} + +static void +ks8995_mac_config(struct phylink_config *config, unsigned int mode, + const struct phylink_link_state *state) +{ +} + +static void +ks8995_mac_link_up(struct phylink_config *config, struct phy_device *phydev, + unsigned int mode, phy_interface_t interface, + int speed, int duplex, bool tx_pause, bool rx_pause) +{ + struct dsa_port *dp = dsa_phylink_to_port(config); + struct ks8995_switch *ks = dp->ds->priv; + int port = dp->index; + int ret; + u8 val; + + /* Allow forcing the mode on the fixed CPU port, no autonegotiation. + * We assume autonegotiation works on the PHY-facing ports. + */ + if (port != KS8995_CPU_PORT) + return; + + dev_dbg(ks->dev, "MAC link up on CPU port (%d)\n", port); + + ret = ks8995_read_reg(ks, KS8995_REG_GC4, &val); + if (ret) { + dev_err(ks->dev, "failed to read KS8995_REG_GC4\n"); + return; + } + + /* Conjure port config */ + switch (speed) { + case SPEED_10: + dev_dbg(ks->dev, "set switch MII to 100Mbit mode\n"); + val |= KS8995_GC4_10BT; + break; + case SPEED_100: + default: + dev_dbg(ks->dev, "set switch MII to 100Mbit mode\n"); + val &= ~KS8995_GC4_10BT; + break; + } + + if (duplex == DUPLEX_HALF) { + dev_dbg(ks->dev, "set switch MII to half duplex\n"); + val |= KS8995_GC4_MII_HD; + } else { + dev_dbg(ks->dev, "set switch MII to full duplex\n"); + val &= ~KS8995_GC4_MII_HD; + } + + dev_dbg(ks->dev, "set KS8995_REG_GC4 to %02x\n", val); + + /* Enable the CPU port */ + ret = ks8995_write_reg(ks, KS8995_REG_GC4, val); + if (ret) + dev_err(ks->dev, "failed to set KS8995_REG_GC4\n"); +} + +static void +ks8995_mac_link_down(struct phylink_config *config, unsigned int mode, + phy_interface_t interface) +{ + struct dsa_port *dp = dsa_phylink_to_port(config); + struct ks8995_switch *ks = dp->ds->priv; + int port = dp->index; + + if (port != KS8995_CPU_PORT) + return; + + dev_dbg(ks->dev, "MAC link down on CPU port (%d)\n", port); + + /* Disable the CPU port */ +} + +static const struct phylink_mac_ops ks8995_phylink_mac_ops = { + .mac_config = ks8995_mac_config, + .mac_link_up = ks8995_mac_link_up, + .mac_link_down = ks8995_mac_link_down, +}; + +static enum +dsa_tag_protocol ks8995_get_tag_protocol(struct dsa_switch *ds, + int port, + enum dsa_tag_protocol mp) +{ + /* This switch actually uses the 6 byte KS8995 protocol */ + return DSA_TAG_PROTO_NONE; +} + +static int ks8995_setup(struct dsa_switch *ds) +{ + return 0; +} + +static int ks8995_port_enable(struct dsa_switch *ds, int port, + struct phy_device *phy) +{ + struct ks8995_switch *ks = ds->priv; + + dev_dbg(ks->dev, "enable port %d\n", port); + + return 0; +} + +static void ks8995_port_disable(struct dsa_switch *ds, int port) +{ + struct ks8995_switch *ks = ds->priv; + + dev_dbg(ks->dev, "disable port %d\n", port); +} + +static int ks8995_port_pre_bridge_flags(struct dsa_switch *ds, int port, + struct switchdev_brport_flags flags, + struct netlink_ext_ack *extack) +{ + /* We support enabling/disabling learning */ + if (flags.mask & ~(BR_LEARNING)) + return -EINVAL; + + return 0; +} + +static int ks8995_port_bridge_flags(struct dsa_switch *ds, int port, + struct switchdev_brport_flags flags, + struct netlink_ext_ack *extack) +{ + struct ks8995_switch *ks = ds->priv; + int ret; + u8 val; + + if (flags.mask & BR_LEARNING) { + ret = ks8995_read_reg(ks, KS8995_REG_PC(port, KS8995_REG_PC2), &val); + if (ret) { + dev_err(ks->dev, "failed to read KS8995_REG_PC2 on port %d\n", port); + return ret; + } + + if (flags.val & BR_LEARNING) + val &= ~KS8995_PC2_LEARN_DIS; + else + val |= KS8995_PC2_LEARN_DIS; + + ret = ks8995_write_reg(ks, KS8995_REG_PC(port, KS8995_REG_PC2), val); + if (ret) { + dev_err(ks->dev, "failed to write KS8995_REG_PC2 on port %d\n", port); + return ret; + } + } + + return 0; +} + +static void ks8995_port_stp_state_set(struct dsa_switch *ds, int port, u8 state) +{ + struct ks8995_switch *ks = ds->priv; + int ret; + u8 val; + + ret = ks8995_read_reg(ks, KS8995_REG_PC(port, KS8995_REG_PC2), &val); + if (ret) { + dev_err(ks->dev, "failed to read KS8995_REG_PC2 on port %d\n", port); + return; + } + + /* Set the bits for the different STP states in accordance with + * the datasheet, pages 36-37 "Spanning tree support". + */ + switch (state) { + case BR_STATE_DISABLED: + case BR_STATE_BLOCKING: + case BR_STATE_LISTENING: + val &= ~KS8995_PC2_TXEN; + val &= ~KS8995_PC2_RXEN; + val |= KS8995_PC2_LEARN_DIS; + break; + case BR_STATE_LEARNING: + val &= ~KS8995_PC2_TXEN; + val &= ~KS8995_PC2_RXEN; + val &= ~KS8995_PC2_LEARN_DIS; + break; + case BR_STATE_FORWARDING: + val |= KS8995_PC2_TXEN; + val |= KS8995_PC2_RXEN; + val &= ~KS8995_PC2_LEARN_DIS; + break; + default: + dev_err(ks->dev, "unknown bridge state requested\n"); + return; + } + + ret = ks8995_write_reg(ks, KS8995_REG_PC(port, KS8995_REG_PC2), val); + if (ret) { + dev_err(ks->dev, "failed to write KS8995_REG_PC2 on port %d\n", port); + return; + } + + dev_dbg(ks->dev, "set KS8995_REG_PC2 for port %d to %02x\n", port, val); +} + +static void ks8995_phylink_get_caps(struct dsa_switch *dsa, int port, + struct phylink_config *config) +{ + unsigned long *interfaces = config->supported_interfaces; + + if (port == KS8995_CPU_PORT) + __set_bit(PHY_INTERFACE_MODE_MII, interfaces); + + if (port <= 3) { + /* Internal PHYs */ + __set_bit(PHY_INTERFACE_MODE_INTERNAL, interfaces); + /* phylib default */ + __set_bit(PHY_INTERFACE_MODE_MII, interfaces); + } + + config->mac_capabilities = MAC_SYM_PAUSE | MAC_10 | MAC_100; +} + +/* Huge packet support up to 1916 byte packages "inclusive" + * which means that tags are included. If the bit is not set + * it is 1536 bytes "inclusive". We present the length without + * tags or ethernet headers. The setting affects all ports. + */ +static int ks8995_change_mtu(struct dsa_switch *ds, int port, int new_mtu) +{ + struct ks8995_switch *ks = ds->priv; + unsigned int max_mtu; + int ret; + u8 val; + int i; + + ks->max_mtu[port] = new_mtu; + + /* Roof out the MTU for the entire switch to the greatest + * common denominator: the biggest set for any one port will + * be the biggest MTU for the switch. + */ + max_mtu = ETH_DATA_LEN; + for (i = 0; i < KS8995_NUM_PORTS; i++) { + if (ks->max_mtu[i] > max_mtu) + max_mtu = ks->max_mtu[i]; + } + + /* Translate to layer 2 size. + * Add ethernet and (possible) VLAN headers, and checksum to the size. + * For ETH_DATA_LEN (1500 bytes) this will add up to 1522 bytes. + */ + max_mtu += VLAN_ETH_HLEN; + max_mtu += ETH_FCS_LEN; + + ret = ks8995_read_reg(ks, KS8995_REG_GC2, &val); + if (ret) { + dev_err(ks->dev, "failed to read KS8995_REG_GC2\n"); + return ret; + } + + if (max_mtu <= 1522) { + val &= ~KS8995_GC2_HUGE; + val &= ~KS8995_GC2_LEGAL; + } else if (max_mtu > 1522 && max_mtu <= 1536) { + /* This accepts packets up to 1536 bytes */ + val &= ~KS8995_GC2_HUGE; + val |= KS8995_GC2_LEGAL; + } else { + /* This accepts packets up to 1916 bytes */ + val |= KS8995_GC2_HUGE; + val |= KS8995_GC2_LEGAL; + } + + dev_dbg(ks->dev, "new max MTU %d bytes (inclusive)\n", max_mtu); + + ret = ks8995_write_reg(ks, KS8995_REG_GC2, val); + if (ret) + dev_err(ks->dev, "failed to set KS8995_REG_GC2\n"); + + return ret; +} + +static int ks8995_get_max_mtu(struct dsa_switch *ds, int port) +{ + return 1916 - ETH_HLEN - ETH_FCS_LEN; +} + +static const struct dsa_switch_ops ks8995_ds_ops = { + .get_tag_protocol = ks8995_get_tag_protocol, + .setup = ks8995_setup, + .port_pre_bridge_flags = ks8995_port_pre_bridge_flags, + .port_bridge_flags = ks8995_port_bridge_flags, + .port_enable = ks8995_port_enable, + .port_disable = ks8995_port_disable, + .port_stp_state_set = ks8995_port_stp_state_set, + .port_change_mtu = ks8995_change_mtu, + .port_max_mtu = ks8995_get_max_mtu, + .phylink_get_caps = ks8995_phylink_get_caps, }; /* ------------------------------------------------------------------------ */ @@ -423,6 +760,7 @@ static int ks8995_probe(struct spi_device *spi) mutex_init(&ks->lock); ks->spi = spi; + ks->dev = &spi->dev; ks->chip = &ks8995_chip[variant]; ks->reset_gpio = devm_gpiod_get_optional(&spi->dev, "reset", @@ -438,9 +776,15 @@ static int ks8995_probe(struct spi_device *spi) if (err) return err; - /* de-assert switch reset */ - /* FIXME: this likely requires a delay */ - gpiod_set_value_cansleep(ks->reset_gpio, 0); + if (ks->reset_gpio) { + /* + * If a reset line was obtained, wait for 100us after + * de-asserting RESET before accessing any registers, see + * the KS8995MA datasheet, page 44. + */ + gpiod_set_value_cansleep(ks->reset_gpio, 0); + udelay(100); + } spi_set_drvdata(spi, ks); @@ -456,24 +800,32 @@ static int ks8995_probe(struct spi_device *spi) if (err) return err; - memcpy(&ks->regs_attr, &ks8995_registers_attr, sizeof(ks->regs_attr)); - ks->regs_attr.size = ks->chip->regs_size; - err = ks8995_reset(ks); if (err) return err; - sysfs_attr_init(&ks->regs_attr.attr); - err = sysfs_create_bin_file(&spi->dev.kobj, &ks->regs_attr); - if (err) { - dev_err(&spi->dev, "unable to create sysfs file, err=%d\n", - err); - return err; - } - dev_info(&spi->dev, "%s device found, Chip ID:%x, Revision:%x\n", ks->chip->name, ks->chip->chip_id, ks->revision_id); + err = ks8995_check_config(ks); + if (err) + return err; + + ks->ds = devm_kzalloc(&spi->dev, sizeof(*ks->ds), GFP_KERNEL); + if (!ks->ds) + return -ENOMEM; + + ks->ds->dev = &spi->dev; + ks->ds->num_ports = KS8995_NUM_PORTS; + ks->ds->ops = &ks8995_ds_ops; + ks->ds->phylink_mac_ops = &ks8995_phylink_mac_ops; + ks->ds->priv = ks; + + err = dsa_register_switch(ks->ds); + if (err) + return dev_err_probe(&spi->dev, err, + "unable to register DSA switch\n"); + return 0; } @@ -481,8 +833,7 @@ static void ks8995_remove(struct spi_device *spi) { struct ks8995_switch *ks = spi_get_drvdata(spi); - sysfs_remove_bin_file(&spi->dev.kobj, &ks->regs_attr); - + dsa_unregister_switch(ks->ds); /* assert reset */ gpiod_set_value_cansleep(ks->reset_gpio, 1); } diff --git a/drivers/net/dsa/lantiq/Kconfig b/drivers/net/dsa/lantiq/Kconfig new file mode 100644 index 000000000000..1cb053c823f7 --- /dev/null +++ b/drivers/net/dsa/lantiq/Kconfig @@ -0,0 +1,7 @@ +config NET_DSA_LANTIQ_GSWIP + tristate "Lantiq / Intel GSWIP" + depends on HAS_IOMEM + select NET_DSA_TAG_GSWIP + help + This enables support for the Lantiq / Intel GSWIP 2.1 found in + the xrx200 / VR9 SoC. diff --git a/drivers/net/dsa/lantiq/Makefile b/drivers/net/dsa/lantiq/Makefile new file mode 100644 index 000000000000..849f85ebebd6 --- /dev/null +++ b/drivers/net/dsa/lantiq/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_NET_DSA_LANTIQ_GSWIP) += lantiq_gswip.o diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq/lantiq_gswip.c index 6eb3140d4044..1e991d7bca0b 100644 --- a/drivers/net/dsa/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq/lantiq_gswip.c @@ -25,7 +25,9 @@ * between all LAN ports by default. */ -#include <linux/clk.h> +#include "lantiq_gswip.h" +#include "lantiq_pce.h" + #include <linux/delay.h> #include <linux/etherdevice.h> #include <linux/firmware.h> @@ -39,258 +41,13 @@ #include <linux/of_platform.h> #include <linux/phy.h> #include <linux/phylink.h> -#include <linux/platform_device.h> -#include <linux/regmap.h> -#include <linux/reset.h> -#include <net/dsa.h> #include <dt-bindings/mips/lantiq_rcu_gphy.h> -#include "lantiq_pce.h" - -/* GSWIP MDIO Registers */ -#define GSWIP_MDIO_GLOB 0x00 -#define GSWIP_MDIO_GLOB_ENABLE BIT(15) -#define GSWIP_MDIO_CTRL 0x08 -#define GSWIP_MDIO_CTRL_BUSY BIT(12) -#define GSWIP_MDIO_CTRL_RD BIT(11) -#define GSWIP_MDIO_CTRL_WR BIT(10) -#define GSWIP_MDIO_CTRL_PHYAD_MASK 0x1f -#define GSWIP_MDIO_CTRL_PHYAD_SHIFT 5 -#define GSWIP_MDIO_CTRL_REGAD_MASK 0x1f -#define GSWIP_MDIO_READ 0x09 -#define GSWIP_MDIO_WRITE 0x0A -#define GSWIP_MDIO_MDC_CFG0 0x0B -#define GSWIP_MDIO_MDC_CFG1 0x0C -#define GSWIP_MDIO_PHYp(p) (0x15 - (p)) -#define GSWIP_MDIO_PHY_LINK_MASK 0x6000 -#define GSWIP_MDIO_PHY_LINK_AUTO 0x0000 -#define GSWIP_MDIO_PHY_LINK_DOWN 0x4000 -#define GSWIP_MDIO_PHY_LINK_UP 0x2000 -#define GSWIP_MDIO_PHY_SPEED_MASK 0x1800 -#define GSWIP_MDIO_PHY_SPEED_AUTO 0x1800 -#define GSWIP_MDIO_PHY_SPEED_M10 0x0000 -#define GSWIP_MDIO_PHY_SPEED_M100 0x0800 -#define GSWIP_MDIO_PHY_SPEED_G1 0x1000 -#define GSWIP_MDIO_PHY_FDUP_MASK 0x0600 -#define GSWIP_MDIO_PHY_FDUP_AUTO 0x0000 -#define GSWIP_MDIO_PHY_FDUP_EN 0x0200 -#define GSWIP_MDIO_PHY_FDUP_DIS 0x0600 -#define GSWIP_MDIO_PHY_FCONTX_MASK 0x0180 -#define GSWIP_MDIO_PHY_FCONTX_AUTO 0x0000 -#define GSWIP_MDIO_PHY_FCONTX_EN 0x0100 -#define GSWIP_MDIO_PHY_FCONTX_DIS 0x0180 -#define GSWIP_MDIO_PHY_FCONRX_MASK 0x0060 -#define GSWIP_MDIO_PHY_FCONRX_AUTO 0x0000 -#define GSWIP_MDIO_PHY_FCONRX_EN 0x0020 -#define GSWIP_MDIO_PHY_FCONRX_DIS 0x0060 -#define GSWIP_MDIO_PHY_ADDR_MASK 0x001f -#define GSWIP_MDIO_PHY_MASK (GSWIP_MDIO_PHY_ADDR_MASK | \ - GSWIP_MDIO_PHY_FCONRX_MASK | \ - GSWIP_MDIO_PHY_FCONTX_MASK | \ - GSWIP_MDIO_PHY_LINK_MASK | \ - GSWIP_MDIO_PHY_SPEED_MASK | \ - GSWIP_MDIO_PHY_FDUP_MASK) - -/* GSWIP MII Registers */ -#define GSWIP_MII_CFGp(p) (0x2 * (p)) -#define GSWIP_MII_CFG_RESET BIT(15) -#define GSWIP_MII_CFG_EN BIT(14) -#define GSWIP_MII_CFG_ISOLATE BIT(13) -#define GSWIP_MII_CFG_LDCLKDIS BIT(12) -#define GSWIP_MII_CFG_RGMII_IBS BIT(8) -#define GSWIP_MII_CFG_RMII_CLK BIT(7) -#define GSWIP_MII_CFG_MODE_MIIP 0x0 -#define GSWIP_MII_CFG_MODE_MIIM 0x1 -#define GSWIP_MII_CFG_MODE_RMIIP 0x2 -#define GSWIP_MII_CFG_MODE_RMIIM 0x3 -#define GSWIP_MII_CFG_MODE_RGMII 0x4 -#define GSWIP_MII_CFG_MODE_GMII 0x9 -#define GSWIP_MII_CFG_MODE_MASK 0xf -#define GSWIP_MII_CFG_RATE_M2P5 0x00 -#define GSWIP_MII_CFG_RATE_M25 0x10 -#define GSWIP_MII_CFG_RATE_M125 0x20 -#define GSWIP_MII_CFG_RATE_M50 0x30 -#define GSWIP_MII_CFG_RATE_AUTO 0x40 -#define GSWIP_MII_CFG_RATE_MASK 0x70 -#define GSWIP_MII_PCDU0 0x01 -#define GSWIP_MII_PCDU1 0x03 -#define GSWIP_MII_PCDU5 0x05 -#define GSWIP_MII_PCDU_TXDLY_MASK GENMASK(2, 0) -#define GSWIP_MII_PCDU_RXDLY_MASK GENMASK(9, 7) - -/* GSWIP Core Registers */ -#define GSWIP_SWRES 0x000 -#define GSWIP_SWRES_R1 BIT(1) /* GSWIP Software reset */ -#define GSWIP_SWRES_R0 BIT(0) /* GSWIP Hardware reset */ -#define GSWIP_VERSION 0x013 -#define GSWIP_VERSION_REV_SHIFT 0 -#define GSWIP_VERSION_REV_MASK GENMASK(7, 0) -#define GSWIP_VERSION_MOD_SHIFT 8 -#define GSWIP_VERSION_MOD_MASK GENMASK(15, 8) -#define GSWIP_VERSION_2_0 0x100 -#define GSWIP_VERSION_2_1 0x021 -#define GSWIP_VERSION_2_2 0x122 -#define GSWIP_VERSION_2_2_ETC 0x022 - -#define GSWIP_BM_RAM_VAL(x) (0x043 - (x)) -#define GSWIP_BM_RAM_ADDR 0x044 -#define GSWIP_BM_RAM_CTRL 0x045 -#define GSWIP_BM_RAM_CTRL_BAS BIT(15) -#define GSWIP_BM_RAM_CTRL_OPMOD BIT(5) -#define GSWIP_BM_RAM_CTRL_ADDR_MASK GENMASK(4, 0) -#define GSWIP_BM_QUEUE_GCTRL 0x04A -#define GSWIP_BM_QUEUE_GCTRL_GL_MOD BIT(10) -/* buffer management Port Configuration Register */ -#define GSWIP_BM_PCFGp(p) (0x080 + ((p) * 2)) -#define GSWIP_BM_PCFG_CNTEN BIT(0) /* RMON Counter Enable */ -#define GSWIP_BM_PCFG_IGCNT BIT(1) /* Ingres Special Tag RMON count */ -/* buffer management Port Control Register */ -#define GSWIP_BM_RMON_CTRLp(p) (0x81 + ((p) * 2)) -#define GSWIP_BM_CTRL_RMON_RAM1_RES BIT(0) /* Software Reset for RMON RAM 1 */ -#define GSWIP_BM_CTRL_RMON_RAM2_RES BIT(1) /* Software Reset for RMON RAM 2 */ - -/* PCE */ -#define GSWIP_PCE_TBL_KEY(x) (0x447 - (x)) -#define GSWIP_PCE_TBL_MASK 0x448 -#define GSWIP_PCE_TBL_VAL(x) (0x44D - (x)) -#define GSWIP_PCE_TBL_ADDR 0x44E -#define GSWIP_PCE_TBL_CTRL 0x44F -#define GSWIP_PCE_TBL_CTRL_BAS BIT(15) -#define GSWIP_PCE_TBL_CTRL_TYPE BIT(13) -#define GSWIP_PCE_TBL_CTRL_VLD BIT(12) -#define GSWIP_PCE_TBL_CTRL_KEYFORM BIT(11) -#define GSWIP_PCE_TBL_CTRL_GMAP_MASK GENMASK(10, 7) -#define GSWIP_PCE_TBL_CTRL_OPMOD_MASK GENMASK(6, 5) -#define GSWIP_PCE_TBL_CTRL_OPMOD_ADRD 0x00 -#define GSWIP_PCE_TBL_CTRL_OPMOD_ADWR 0x20 -#define GSWIP_PCE_TBL_CTRL_OPMOD_KSRD 0x40 -#define GSWIP_PCE_TBL_CTRL_OPMOD_KSWR 0x60 -#define GSWIP_PCE_TBL_CTRL_ADDR_MASK GENMASK(4, 0) -#define GSWIP_PCE_PMAP1 0x453 /* Monitoring port map */ -#define GSWIP_PCE_PMAP2 0x454 /* Default Multicast port map */ -#define GSWIP_PCE_PMAP3 0x455 /* Default Unknown Unicast port map */ -#define GSWIP_PCE_GCTRL_0 0x456 -#define GSWIP_PCE_GCTRL_0_MTFL BIT(0) /* MAC Table Flushing */ -#define GSWIP_PCE_GCTRL_0_MC_VALID BIT(3) -#define GSWIP_PCE_GCTRL_0_VLAN BIT(14) /* VLAN aware Switching */ -#define GSWIP_PCE_GCTRL_1 0x457 -#define GSWIP_PCE_GCTRL_1_MAC_GLOCK BIT(2) /* MAC Address table lock */ -#define GSWIP_PCE_GCTRL_1_MAC_GLOCK_MOD BIT(3) /* Mac address table lock forwarding mode */ -#define GSWIP_PCE_PCTRL_0p(p) (0x480 + ((p) * 0xA)) -#define GSWIP_PCE_PCTRL_0_TVM BIT(5) /* Transparent VLAN mode */ -#define GSWIP_PCE_PCTRL_0_VREP BIT(6) /* VLAN Replace Mode */ -#define GSWIP_PCE_PCTRL_0_INGRESS BIT(11) /* Accept special tag in ingress */ -#define GSWIP_PCE_PCTRL_0_PSTATE_LISTEN 0x0 -#define GSWIP_PCE_PCTRL_0_PSTATE_RX 0x1 -#define GSWIP_PCE_PCTRL_0_PSTATE_TX 0x2 -#define GSWIP_PCE_PCTRL_0_PSTATE_LEARNING 0x3 -#define GSWIP_PCE_PCTRL_0_PSTATE_FORWARDING 0x7 -#define GSWIP_PCE_PCTRL_0_PSTATE_MASK GENMASK(2, 0) -#define GSWIP_PCE_VCTRL(p) (0x485 + ((p) * 0xA)) -#define GSWIP_PCE_VCTRL_UVR BIT(0) /* Unknown VLAN Rule */ -#define GSWIP_PCE_VCTRL_VIMR BIT(3) /* VLAN Ingress Member violation rule */ -#define GSWIP_PCE_VCTRL_VEMR BIT(4) /* VLAN Egress Member violation rule */ -#define GSWIP_PCE_VCTRL_VSR BIT(5) /* VLAN Security */ -#define GSWIP_PCE_VCTRL_VID0 BIT(6) /* Priority Tagged Rule */ -#define GSWIP_PCE_DEFPVID(p) (0x486 + ((p) * 0xA)) - -#define GSWIP_MAC_FLEN 0x8C5 -#define GSWIP_MAC_CTRL_0p(p) (0x903 + ((p) * 0xC)) -#define GSWIP_MAC_CTRL_0_PADEN BIT(8) -#define GSWIP_MAC_CTRL_0_FCS_EN BIT(7) -#define GSWIP_MAC_CTRL_0_FCON_MASK 0x0070 -#define GSWIP_MAC_CTRL_0_FCON_AUTO 0x0000 -#define GSWIP_MAC_CTRL_0_FCON_RX 0x0010 -#define GSWIP_MAC_CTRL_0_FCON_TX 0x0020 -#define GSWIP_MAC_CTRL_0_FCON_RXTX 0x0030 -#define GSWIP_MAC_CTRL_0_FCON_NONE 0x0040 -#define GSWIP_MAC_CTRL_0_FDUP_MASK 0x000C -#define GSWIP_MAC_CTRL_0_FDUP_AUTO 0x0000 -#define GSWIP_MAC_CTRL_0_FDUP_EN 0x0004 -#define GSWIP_MAC_CTRL_0_FDUP_DIS 0x000C -#define GSWIP_MAC_CTRL_0_GMII_MASK 0x0003 -#define GSWIP_MAC_CTRL_0_GMII_AUTO 0x0000 -#define GSWIP_MAC_CTRL_0_GMII_MII 0x0001 -#define GSWIP_MAC_CTRL_0_GMII_RGMII 0x0002 -#define GSWIP_MAC_CTRL_2p(p) (0x905 + ((p) * 0xC)) -#define GSWIP_MAC_CTRL_2_LCHKL BIT(2) /* Frame Length Check Long Enable */ -#define GSWIP_MAC_CTRL_2_MLEN BIT(3) /* Maximum Untagged Frame Lnegth */ - -/* Ethernet Switch Fetch DMA Port Control Register */ -#define GSWIP_FDMA_PCTRLp(p) (0xA80 + ((p) * 0x6)) -#define GSWIP_FDMA_PCTRL_EN BIT(0) /* FDMA Port Enable */ -#define GSWIP_FDMA_PCTRL_STEN BIT(1) /* Special Tag Insertion Enable */ -#define GSWIP_FDMA_PCTRL_VLANMOD_MASK GENMASK(4, 3) /* VLAN Modification Control */ -#define GSWIP_FDMA_PCTRL_VLANMOD_SHIFT 3 /* VLAN Modification Control */ -#define GSWIP_FDMA_PCTRL_VLANMOD_DIS (0x0 << GSWIP_FDMA_PCTRL_VLANMOD_SHIFT) -#define GSWIP_FDMA_PCTRL_VLANMOD_PRIO (0x1 << GSWIP_FDMA_PCTRL_VLANMOD_SHIFT) -#define GSWIP_FDMA_PCTRL_VLANMOD_ID (0x2 << GSWIP_FDMA_PCTRL_VLANMOD_SHIFT) -#define GSWIP_FDMA_PCTRL_VLANMOD_BOTH (0x3 << GSWIP_FDMA_PCTRL_VLANMOD_SHIFT) - -/* Ethernet Switch Store DMA Port Control Register */ -#define GSWIP_SDMA_PCTRLp(p) (0xBC0 + ((p) * 0x6)) -#define GSWIP_SDMA_PCTRL_EN BIT(0) /* SDMA Port Enable */ -#define GSWIP_SDMA_PCTRL_FCEN BIT(1) /* Flow Control Enable */ -#define GSWIP_SDMA_PCTRL_PAUFWD BIT(3) /* Pause Frame Forwarding */ - -#define GSWIP_TABLE_ACTIVE_VLAN 0x01 -#define GSWIP_TABLE_VLAN_MAPPING 0x02 -#define GSWIP_TABLE_MAC_BRIDGE 0x0b -#define GSWIP_TABLE_MAC_BRIDGE_KEY3_FID GENMASK(5, 0) /* Filtering identifier */ -#define GSWIP_TABLE_MAC_BRIDGE_VAL0_PORT GENMASK(7, 4) /* Port on learned entries */ -#define GSWIP_TABLE_MAC_BRIDGE_VAL1_STATIC BIT(0) /* Static, non-aging entry */ - -#define XRX200_GPHY_FW_ALIGN (16 * 1024) - -/* Maximum packet size supported by the switch. In theory this should be 10240, - * but long packets currently cause lock-ups with an MTU of over 2526. Medium - * packets are sometimes dropped (e.g. TCP over 2477, UDP over 2516-2519, ICMP - * over 2526), hence an MTU value of 2400 seems safe. This issue only affects - * packet reception. This is probably caused by the PPA engine, which is on the - * RX part of the device. Packet transmission works properly up to 10240. - */ -#define GSWIP_MAX_PACKET_LENGTH 2400 - -struct gswip_hw_info { - int max_ports; - int cpu_port; - const struct dsa_switch_ops *ops; -}; - struct xway_gphy_match_data { char *fe_firmware_name; char *ge_firmware_name; }; -struct gswip_gphy_fw { - struct clk *clk_gate; - struct reset_control *reset; - u32 fw_addr_offset; - char *fw_name; -}; - -struct gswip_vlan { - struct net_device *bridge; - u16 vid; - u8 fid; -}; - -struct gswip_priv { - __iomem void *gswip; - __iomem void *mdio; - __iomem void *mii; - const struct gswip_hw_info *hw_info; - const struct xway_gphy_match_data *gphy_fw_name_cfg; - struct dsa_switch *ds; - struct device *dev; - struct regmap *rcu_regmap; - struct gswip_vlan vlans[64]; - int num_gphy_fw; - struct gswip_gphy_fw *gphy_fw; - u32 port_vlan_filter; - struct mutex pce_table_lock; -}; - struct gswip_pce_table_entry { u16 index; // PCE_TBL_ADDR.ADDR = pData->table_index u16 table; // PCE_TBL_CTRL.ADDR = pData->table @@ -426,15 +183,29 @@ static void gswip_mii_mask(struct gswip_priv *priv, u32 clear, u32 set, static void gswip_mii_mask_cfg(struct gswip_priv *priv, u32 clear, u32 set, int port) { - /* There's no MII_CFG register for the CPU port */ - if (!dsa_is_cpu_port(priv->ds, port)) - gswip_mii_mask(priv, clear, set, GSWIP_MII_CFGp(port)); + int reg_port; + + /* MII_CFG register only exists for MII ports */ + if (!(priv->hw_info->mii_ports & BIT(port))) + return; + + reg_port = port + priv->hw_info->mii_port_reg_offset; + + gswip_mii_mask(priv, clear, set, GSWIP_MII_CFGp(reg_port)); } static void gswip_mii_mask_pcdu(struct gswip_priv *priv, u32 clear, u32 set, int port) { - switch (port) { + int reg_port; + + /* MII_PCDU register only exists for MII ports */ + if (!(priv->hw_info->mii_ports & BIT(port))) + return; + + reg_port = port + priv->hw_info->mii_port_reg_offset; + + switch (reg_port) { case 0: gswip_mii_mask(priv, clear, set, GSWIP_MII_PCDU0); break; @@ -515,6 +286,9 @@ static int gswip_mdio(struct gswip_priv *priv) int err = 0; mdio_np = of_get_compatible_child(switch_np, "lantiq,xrx200-mdio"); + if (!mdio_np) + mdio_np = of_get_child_by_name(switch_np, "mdio"); + if (!of_device_is_available(mdio_np)) goto out_put_node; @@ -654,7 +428,6 @@ static int gswip_add_single_port_br(struct gswip_priv *priv, int port, bool add) { struct gswip_pce_table_entry vlan_active = {0,}; struct gswip_pce_table_entry vlan_mapping = {0,}; - unsigned int cpu_port = priv->hw_info->cpu_port; int err; vlan_active.index = port + 1; @@ -674,7 +447,7 @@ static int gswip_add_single_port_br(struct gswip_priv *priv, int port, bool add) vlan_mapping.index = port + 1; vlan_mapping.table = GSWIP_TABLE_VLAN_MAPPING; vlan_mapping.val[0] = 0 /* vid */; - vlan_mapping.val[1] = BIT(port) | BIT(cpu_port); + vlan_mapping.val[1] = BIT(port) | dsa_cpu_ports(priv->ds); vlan_mapping.val[2] = 0; err = gswip_pce_table_entry_write(priv, &vlan_mapping); if (err) { @@ -738,15 +511,15 @@ static int gswip_pce_load_microcode(struct gswip_priv *priv) GSWIP_PCE_TBL_CTRL_OPMOD_ADWR, GSWIP_PCE_TBL_CTRL); gswip_switch_w(priv, 0, GSWIP_PCE_TBL_MASK); - for (i = 0; i < ARRAY_SIZE(gswip_pce_microcode); i++) { + for (i = 0; i < priv->hw_info->pce_microcode_size; i++) { gswip_switch_w(priv, i, GSWIP_PCE_TBL_ADDR); - gswip_switch_w(priv, gswip_pce_microcode[i].val_0, + gswip_switch_w(priv, (*priv->hw_info->pce_microcode)[i].val_0, GSWIP_PCE_TBL_VAL(0)); - gswip_switch_w(priv, gswip_pce_microcode[i].val_1, + gswip_switch_w(priv, (*priv->hw_info->pce_microcode)[i].val_1, GSWIP_PCE_TBL_VAL(1)); - gswip_switch_w(priv, gswip_pce_microcode[i].val_2, + gswip_switch_w(priv, (*priv->hw_info->pce_microcode)[i].val_2, GSWIP_PCE_TBL_VAL(2)); - gswip_switch_w(priv, gswip_pce_microcode[i].val_3, + gswip_switch_w(priv, (*priv->hw_info->pce_microcode)[i].val_3, GSWIP_PCE_TBL_VAL(3)); /* start the table access: */ @@ -804,10 +577,10 @@ static int gswip_port_vlan_filtering(struct dsa_switch *ds, int port, static int gswip_setup(struct dsa_switch *ds) { + unsigned int cpu_ports = dsa_cpu_ports(ds); struct gswip_priv *priv = ds->priv; - unsigned int cpu_port = priv->hw_info->cpu_port; - int i; - int err; + struct dsa_port *cpu_dp; + int err, i; gswip_switch_w(priv, GSWIP_SWRES_R0, GSWIP_SWRES); usleep_range(5000, 10000); @@ -829,9 +602,9 @@ static int gswip_setup(struct dsa_switch *ds) } /* Default unknown Broadcast/Multicast/Unicast port maps */ - gswip_switch_w(priv, BIT(cpu_port), GSWIP_PCE_PMAP1); - gswip_switch_w(priv, BIT(cpu_port), GSWIP_PCE_PMAP2); - gswip_switch_w(priv, BIT(cpu_port), GSWIP_PCE_PMAP3); + gswip_switch_w(priv, cpu_ports, GSWIP_PCE_PMAP1); + gswip_switch_w(priv, cpu_ports, GSWIP_PCE_PMAP2); + gswip_switch_w(priv, cpu_ports, GSWIP_PCE_PMAP3); /* Deactivate MDIO PHY auto polling. Some PHYs as the AR8030 have an * interoperability problem with this auto polling mechanism because @@ -854,19 +627,28 @@ static int gswip_setup(struct dsa_switch *ds) /* Configure the MDIO Clock 2.5 MHz */ gswip_mdio_mask(priv, 0xff, 0x09, GSWIP_MDIO_MDC_CFG1); + /* bring up the mdio bus */ + err = gswip_mdio(priv); + if (err) { + dev_err(priv->dev, "mdio bus setup failed\n"); + return err; + } + /* Disable the xMII interface and clear it's isolation bit */ for (i = 0; i < priv->hw_info->max_ports; i++) gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_EN | GSWIP_MII_CFG_ISOLATE, 0, i); - /* enable special tag insertion on cpu port */ - gswip_switch_mask(priv, 0, GSWIP_FDMA_PCTRL_STEN, - GSWIP_FDMA_PCTRLp(cpu_port)); + dsa_switch_for_each_cpu_port(cpu_dp, ds) { + /* enable special tag insertion on cpu port */ + gswip_switch_mask(priv, 0, GSWIP_FDMA_PCTRL_STEN, + GSWIP_FDMA_PCTRLp(cpu_dp->index)); - /* accept special tag in ingress direction */ - gswip_switch_mask(priv, 0, GSWIP_PCE_PCTRL_0_INGRESS, - GSWIP_PCE_PCTRL_0p(cpu_port)); + /* accept special tag in ingress direction */ + gswip_switch_mask(priv, 0, GSWIP_PCE_PCTRL_0_INGRESS, + GSWIP_PCE_PCTRL_0p(cpu_dp->index)); + } gswip_switch_mask(priv, 0, GSWIP_BM_QUEUE_GCTRL_GL_MOD, GSWIP_BM_QUEUE_GCTRL); @@ -895,7 +677,9 @@ static enum dsa_tag_protocol gswip_get_tag_protocol(struct dsa_switch *ds, int port, enum dsa_tag_protocol mp) { - return DSA_TAG_PROTO_GSWIP; + struct gswip_priv *priv = ds->priv; + + return priv->hw_info->tag_protocol; } static int gswip_vlan_active_create(struct gswip_priv *priv, @@ -962,7 +746,6 @@ static int gswip_vlan_add_unaware(struct gswip_priv *priv, { struct gswip_pce_table_entry vlan_mapping = {0,}; unsigned int max_ports = priv->hw_info->max_ports; - unsigned int cpu_port = priv->hw_info->cpu_port; bool active_vlan_created = false; int idx = -1; int i; @@ -1002,7 +785,7 @@ static int gswip_vlan_add_unaware(struct gswip_priv *priv, } /* Update the VLAN mapping entry and write it to the switch */ - vlan_mapping.val[1] |= BIT(cpu_port); + vlan_mapping.val[1] |= dsa_cpu_ports(priv->ds); vlan_mapping.val[1] |= BIT(port); err = gswip_pce_table_entry_write(priv, &vlan_mapping); if (err) { @@ -1024,7 +807,7 @@ static int gswip_vlan_add_aware(struct gswip_priv *priv, { struct gswip_pce_table_entry vlan_mapping = {0,}; unsigned int max_ports = priv->hw_info->max_ports; - unsigned int cpu_port = priv->hw_info->cpu_port; + unsigned int cpu_ports = dsa_cpu_ports(priv->ds); bool active_vlan_created = false; int idx = -1; int fid = -1; @@ -1071,8 +854,8 @@ static int gswip_vlan_add_aware(struct gswip_priv *priv, vlan_mapping.val[0] = vid; /* Update the VLAN mapping entry and write it to the switch */ - vlan_mapping.val[1] |= BIT(cpu_port); - vlan_mapping.val[2] |= BIT(cpu_port); + vlan_mapping.val[1] |= cpu_ports; + vlan_mapping.val[2] |= cpu_ports; vlan_mapping.val[1] |= BIT(port); if (untagged) vlan_mapping.val[2] &= ~BIT(port); @@ -1099,7 +882,6 @@ static int gswip_vlan_remove(struct gswip_priv *priv, { struct gswip_pce_table_entry vlan_mapping = {0,}; unsigned int max_ports = priv->hw_info->max_ports; - unsigned int cpu_port = priv->hw_info->cpu_port; int idx = -1; int i; int err; @@ -1135,7 +917,7 @@ static int gswip_vlan_remove(struct gswip_priv *priv, } /* In case all ports are removed from the bridge, remove the VLAN */ - if ((vlan_mapping.val[1] & ~BIT(cpu_port)) == 0) { + if (!(vlan_mapping.val[1] & ~dsa_cpu_ports(priv->ds))) { err = gswip_vlan_active_remove(priv, idx); if (err) { dev_err(priv->dev, "failed to write active VLAN: %d\n", @@ -1554,6 +1336,14 @@ static void gswip_xrx300_phylink_get_caps(struct dsa_switch *ds, int port, MAC_10 | MAC_100 | MAC_1000; } +static void gswip_phylink_get_caps(struct dsa_switch *ds, int port, + struct phylink_config *config) +{ + struct gswip_priv *priv = ds->priv; + + priv->hw_info->phylink_get_caps(ds, port, config); +} + static void gswip_port_set_link(struct gswip_priv *priv, int port, bool link) { u32 mdio_phy; @@ -1672,6 +1462,10 @@ static void gswip_phylink_mac_config(struct phylink_config *config, miicfg |= GSWIP_MII_CFG_LDCLKDIS; switch (state->interface) { + case PHY_INTERFACE_MODE_SGMII: + case PHY_INTERFACE_MODE_1000BASEX: + case PHY_INTERFACE_MODE_2500BASEX: + return; case PHY_INTERFACE_MODE_MII: case PHY_INTERFACE_MODE_INTERNAL: miicfg |= GSWIP_MII_CFG_MODE_MIIM; @@ -1820,36 +1614,26 @@ static int gswip_get_sset_count(struct dsa_switch *ds, int port, int sset) return ARRAY_SIZE(gswip_rmon_cnt); } -static const struct phylink_mac_ops gswip_phylink_mac_ops = { - .mac_config = gswip_phylink_mac_config, - .mac_link_down = gswip_phylink_mac_link_down, - .mac_link_up = gswip_phylink_mac_link_up, -}; +static struct phylink_pcs *gswip_phylink_mac_select_pcs(struct phylink_config *config, + phy_interface_t interface) +{ + struct dsa_port *dp = dsa_phylink_to_port(config); + struct gswip_priv *priv = dp->ds->priv; -static const struct dsa_switch_ops gswip_xrx200_switch_ops = { - .get_tag_protocol = gswip_get_tag_protocol, - .setup = gswip_setup, - .port_enable = gswip_port_enable, - .port_disable = gswip_port_disable, - .port_bridge_join = gswip_port_bridge_join, - .port_bridge_leave = gswip_port_bridge_leave, - .port_fast_age = gswip_port_fast_age, - .port_vlan_filtering = gswip_port_vlan_filtering, - .port_vlan_add = gswip_port_vlan_add, - .port_vlan_del = gswip_port_vlan_del, - .port_stp_state_set = gswip_port_stp_state_set, - .port_fdb_add = gswip_port_fdb_add, - .port_fdb_del = gswip_port_fdb_del, - .port_fdb_dump = gswip_port_fdb_dump, - .port_change_mtu = gswip_port_change_mtu, - .port_max_mtu = gswip_port_max_mtu, - .phylink_get_caps = gswip_xrx200_phylink_get_caps, - .get_strings = gswip_get_strings, - .get_ethtool_stats = gswip_get_ethtool_stats, - .get_sset_count = gswip_get_sset_count, + if (priv->hw_info->mac_select_pcs) + return priv->hw_info->mac_select_pcs(config, interface); + + return NULL; +} + +static const struct phylink_mac_ops gswip_phylink_mac_ops = { + .mac_config = gswip_phylink_mac_config, + .mac_link_down = gswip_phylink_mac_link_down, + .mac_link_up = gswip_phylink_mac_link_up, + .mac_select_pcs = gswip_phylink_mac_select_pcs, }; -static const struct dsa_switch_ops gswip_xrx300_switch_ops = { +static const struct dsa_switch_ops gswip_switch_ops = { .get_tag_protocol = gswip_get_tag_protocol, .setup = gswip_setup, .port_enable = gswip_port_enable, @@ -1866,7 +1650,7 @@ static const struct dsa_switch_ops gswip_xrx300_switch_ops = { .port_fdb_dump = gswip_port_fdb_dump, .port_change_mtu = gswip_port_change_mtu, .port_max_mtu = gswip_port_max_mtu, - .phylink_get_caps = gswip_xrx300_phylink_get_caps, + .phylink_get_caps = gswip_phylink_get_caps, .get_strings = gswip_get_strings, .get_ethtool_stats = gswip_get_ethtool_stats, .get_sset_count = gswip_get_sset_count, @@ -1935,8 +1719,7 @@ static int gswip_gphy_fw_load(struct gswip_priv *priv, struct gswip_gphy_fw *gph memcpy(fw_addr, fw->data, fw->size); } else { release_firmware(fw); - return dev_err_probe(dev, -ENOMEM, - "failed to alloc firmware memory\n"); + return -ENOMEM; } release_firmware(fw); @@ -2093,6 +1876,30 @@ remove_gphy: return err; } +static int gswip_validate_cpu_port(struct dsa_switch *ds) +{ + struct gswip_priv *priv = ds->priv; + struct dsa_port *cpu_dp; + int cpu_port = -1; + + dsa_switch_for_each_cpu_port(cpu_dp, ds) { + if (cpu_port != -1) + return dev_err_probe(ds->dev, -EINVAL, + "only a single CPU port is supported\n"); + + cpu_port = cpu_dp->index; + } + + if (cpu_port == -1) + return dev_err_probe(ds->dev, -EINVAL, "no CPU port defined\n"); + + if (BIT(cpu_port) & ~priv->hw_info->allowed_cpu_ports) + return dev_err_probe(ds->dev, -EINVAL, + "unsupported CPU port defined\n"); + + return 0; +} + static int gswip_probe(struct platform_device *pdev) { struct device_node *np, *gphy_fw_np; @@ -2129,12 +1936,22 @@ static int gswip_probe(struct platform_device *pdev) priv->ds->dev = dev; priv->ds->num_ports = priv->hw_info->max_ports; priv->ds->priv = priv; - priv->ds->ops = priv->hw_info->ops; + priv->ds->ops = &gswip_switch_ops; priv->ds->phylink_mac_ops = &gswip_phylink_mac_ops; priv->dev = dev; mutex_init(&priv->pce_table_lock); version = gswip_switch_r(priv, GSWIP_VERSION); + /* The hardware has the 'major/minor' version bytes in the wrong order + * preventing numerical comparisons. Construct a 16-bit unsigned integer + * having the REV field as most significant byte and the MOD field as + * least significant byte. This is effectively swapping the two bytes of + * the version variable, but other than using swab16 it doesn't affect + * the source variable. + */ + priv->version = GSWIP_VERSION_REV(version) << 8 | + GSWIP_VERSION_MOD(version); + np = dev->of_node; switch (version) { case GSWIP_VERSION_2_0: @@ -2163,30 +1980,20 @@ static int gswip_probe(struct platform_device *pdev) "gphy fw probe failed\n"); } - /* bring up the mdio bus */ - err = gswip_mdio(priv); - if (err) { - dev_err_probe(dev, err, "mdio probe failed\n"); - goto gphy_fw_remove; - } - err = dsa_register_switch(priv->ds); if (err) { dev_err_probe(dev, err, "dsa switch registration failed\n"); goto gphy_fw_remove; } - if (!dsa_is_cpu_port(priv->ds, priv->hw_info->cpu_port)) { - err = dev_err_probe(dev, -EINVAL, - "wrong CPU port defined, HW only supports port: %i\n", - priv->hw_info->cpu_port); + + err = gswip_validate_cpu_port(priv->ds); + if (err) goto disable_switch; - } platform_set_drvdata(pdev, priv); dev_info(dev, "probed GSWIP version %lx mod %lx\n", - (version & GSWIP_VERSION_REV_MASK) >> GSWIP_VERSION_REV_SHIFT, - (version & GSWIP_VERSION_MOD_MASK) >> GSWIP_VERSION_MOD_SHIFT); + GSWIP_VERSION_REV(version), GSWIP_VERSION_MOD(version)); return 0; disable_switch: @@ -2229,14 +2036,24 @@ static void gswip_shutdown(struct platform_device *pdev) static const struct gswip_hw_info gswip_xrx200 = { .max_ports = 7, - .cpu_port = 6, - .ops = &gswip_xrx200_switch_ops, + .allowed_cpu_ports = BIT(6), + .mii_ports = BIT(0) | BIT(1) | BIT(5), + .mii_port_reg_offset = 0, + .phylink_get_caps = gswip_xrx200_phylink_get_caps, + .pce_microcode = &gswip_pce_microcode, + .pce_microcode_size = ARRAY_SIZE(gswip_pce_microcode), + .tag_protocol = DSA_TAG_PROTO_GSWIP, }; static const struct gswip_hw_info gswip_xrx300 = { .max_ports = 7, - .cpu_port = 6, - .ops = &gswip_xrx300_switch_ops, + .allowed_cpu_ports = BIT(6), + .mii_ports = BIT(0) | BIT(5), + .mii_port_reg_offset = 0, + .phylink_get_caps = gswip_xrx300_phylink_get_caps, + .pce_microcode = &gswip_pce_microcode, + .pce_microcode_size = ARRAY_SIZE(gswip_pce_microcode), + .tag_protocol = DSA_TAG_PROTO_GSWIP, }; static const struct of_device_id gswip_of_match[] = { diff --git a/drivers/net/dsa/lantiq/lantiq_gswip.h b/drivers/net/dsa/lantiq/lantiq_gswip.h new file mode 100644 index 000000000000..2df9c8e8cfd0 --- /dev/null +++ b/drivers/net/dsa/lantiq/lantiq_gswip.h @@ -0,0 +1,276 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef __LANTIQ_GSWIP_H +#define __LANTIQ_GSWIP_H + +#include <linux/clk.h> +#include <linux/mutex.h> +#include <linux/phylink.h> +#include <linux/platform_device.h> +#include <linux/regmap.h> +#include <linux/reset.h> +#include <linux/swab.h> +#include <net/dsa.h> + +/* GSWIP MDIO Registers */ +#define GSWIP_MDIO_GLOB 0x00 +#define GSWIP_MDIO_GLOB_ENABLE BIT(15) +#define GSWIP_MDIO_CTRL 0x08 +#define GSWIP_MDIO_CTRL_BUSY BIT(12) +#define GSWIP_MDIO_CTRL_RD BIT(11) +#define GSWIP_MDIO_CTRL_WR BIT(10) +#define GSWIP_MDIO_CTRL_PHYAD_MASK 0x1f +#define GSWIP_MDIO_CTRL_PHYAD_SHIFT 5 +#define GSWIP_MDIO_CTRL_REGAD_MASK 0x1f +#define GSWIP_MDIO_READ 0x09 +#define GSWIP_MDIO_WRITE 0x0A +#define GSWIP_MDIO_MDC_CFG0 0x0B +#define GSWIP_MDIO_MDC_CFG1 0x0C +#define GSWIP_MDIO_PHYp(p) (0x15 - (p)) +#define GSWIP_MDIO_PHY_LINK_MASK 0x6000 +#define GSWIP_MDIO_PHY_LINK_AUTO 0x0000 +#define GSWIP_MDIO_PHY_LINK_DOWN 0x4000 +#define GSWIP_MDIO_PHY_LINK_UP 0x2000 +#define GSWIP_MDIO_PHY_SPEED_MASK 0x1800 +#define GSWIP_MDIO_PHY_SPEED_AUTO 0x1800 +#define GSWIP_MDIO_PHY_SPEED_M10 0x0000 +#define GSWIP_MDIO_PHY_SPEED_M100 0x0800 +#define GSWIP_MDIO_PHY_SPEED_G1 0x1000 +#define GSWIP_MDIO_PHY_FDUP_MASK 0x0600 +#define GSWIP_MDIO_PHY_FDUP_AUTO 0x0000 +#define GSWIP_MDIO_PHY_FDUP_EN 0x0200 +#define GSWIP_MDIO_PHY_FDUP_DIS 0x0600 +#define GSWIP_MDIO_PHY_FCONTX_MASK 0x0180 +#define GSWIP_MDIO_PHY_FCONTX_AUTO 0x0000 +#define GSWIP_MDIO_PHY_FCONTX_EN 0x0100 +#define GSWIP_MDIO_PHY_FCONTX_DIS 0x0180 +#define GSWIP_MDIO_PHY_FCONRX_MASK 0x0060 +#define GSWIP_MDIO_PHY_FCONRX_AUTO 0x0000 +#define GSWIP_MDIO_PHY_FCONRX_EN 0x0020 +#define GSWIP_MDIO_PHY_FCONRX_DIS 0x0060 +#define GSWIP_MDIO_PHY_ADDR_MASK 0x001f +#define GSWIP_MDIO_PHY_MASK (GSWIP_MDIO_PHY_ADDR_MASK | \ + GSWIP_MDIO_PHY_FCONRX_MASK | \ + GSWIP_MDIO_PHY_FCONTX_MASK | \ + GSWIP_MDIO_PHY_LINK_MASK | \ + GSWIP_MDIO_PHY_SPEED_MASK | \ + GSWIP_MDIO_PHY_FDUP_MASK) + +/* GSWIP MII Registers */ +#define GSWIP_MII_CFGp(p) (0x2 * (p)) +#define GSWIP_MII_CFG_RESET BIT(15) +#define GSWIP_MII_CFG_EN BIT(14) +#define GSWIP_MII_CFG_ISOLATE BIT(13) +#define GSWIP_MII_CFG_LDCLKDIS BIT(12) +#define GSWIP_MII_CFG_RGMII_IBS BIT(8) +#define GSWIP_MII_CFG_RMII_CLK BIT(7) +#define GSWIP_MII_CFG_MODE_MIIP 0x0 +#define GSWIP_MII_CFG_MODE_MIIM 0x1 +#define GSWIP_MII_CFG_MODE_RMIIP 0x2 +#define GSWIP_MII_CFG_MODE_RMIIM 0x3 +#define GSWIP_MII_CFG_MODE_RGMII 0x4 +#define GSWIP_MII_CFG_MODE_GMII 0x9 +#define GSWIP_MII_CFG_MODE_MASK 0xf +#define GSWIP_MII_CFG_RATE_M2P5 0x00 +#define GSWIP_MII_CFG_RATE_M25 0x10 +#define GSWIP_MII_CFG_RATE_M125 0x20 +#define GSWIP_MII_CFG_RATE_M50 0x30 +#define GSWIP_MII_CFG_RATE_AUTO 0x40 +#define GSWIP_MII_CFG_RATE_MASK 0x70 +#define GSWIP_MII_PCDU0 0x01 +#define GSWIP_MII_PCDU1 0x03 +#define GSWIP_MII_PCDU5 0x05 +#define GSWIP_MII_PCDU_TXDLY_MASK GENMASK(2, 0) +#define GSWIP_MII_PCDU_RXDLY_MASK GENMASK(9, 7) + +/* GSWIP Core Registers */ +#define GSWIP_SWRES 0x000 +#define GSWIP_SWRES_R1 BIT(1) /* GSWIP Software reset */ +#define GSWIP_SWRES_R0 BIT(0) /* GSWIP Hardware reset */ +#define GSWIP_VERSION 0x013 +#define GSWIP_VERSION_REV_MASK GENMASK(7, 0) +#define GSWIP_VERSION_MOD_MASK GENMASK(15, 8) +#define GSWIP_VERSION_REV(v) FIELD_GET(GSWIP_VERSION_REV_MASK, v) +#define GSWIP_VERSION_MOD(v) FIELD_GET(GSWIP_VERSION_MOD_MASK, v) +#define GSWIP_VERSION_2_0 0x100 +#define GSWIP_VERSION_2_1 0x021 +#define GSWIP_VERSION_2_2 0x122 +#define GSWIP_VERSION_2_2_ETC 0x022 +/* The hardware has the 'major/minor' version bytes in the wrong order + * preventing numerical comparisons. Swap the bytes of the 16-bit value + * to end up with REV being the most significant byte and MOD being the + * least significant byte, which then allows comparing it with the + * value stored in struct gswip_priv. + */ +#define GSWIP_VERSION_GE(priv, ver) ((priv)->version >= swab16(ver)) + +#define GSWIP_BM_RAM_VAL(x) (0x043 - (x)) +#define GSWIP_BM_RAM_ADDR 0x044 +#define GSWIP_BM_RAM_CTRL 0x045 +#define GSWIP_BM_RAM_CTRL_BAS BIT(15) +#define GSWIP_BM_RAM_CTRL_OPMOD BIT(5) +#define GSWIP_BM_RAM_CTRL_ADDR_MASK GENMASK(4, 0) +#define GSWIP_BM_QUEUE_GCTRL 0x04A +#define GSWIP_BM_QUEUE_GCTRL_GL_MOD BIT(10) +/* buffer management Port Configuration Register */ +#define GSWIP_BM_PCFGp(p) (0x080 + ((p) * 2)) +#define GSWIP_BM_PCFG_CNTEN BIT(0) /* RMON Counter Enable */ +#define GSWIP_BM_PCFG_IGCNT BIT(1) /* Ingres Special Tag RMON count */ +/* buffer management Port Control Register */ +#define GSWIP_BM_RMON_CTRLp(p) (0x81 + ((p) * 2)) +#define GSWIP_BM_CTRL_RMON_RAM1_RES BIT(0) /* Software Reset for RMON RAM 1 */ +#define GSWIP_BM_CTRL_RMON_RAM2_RES BIT(1) /* Software Reset for RMON RAM 2 */ + +/* PCE */ +#define GSWIP_PCE_TBL_KEY(x) (0x447 - (x)) +#define GSWIP_PCE_TBL_MASK 0x448 +#define GSWIP_PCE_TBL_VAL(x) (0x44D - (x)) +#define GSWIP_PCE_TBL_ADDR 0x44E +#define GSWIP_PCE_TBL_CTRL 0x44F +#define GSWIP_PCE_TBL_CTRL_BAS BIT(15) +#define GSWIP_PCE_TBL_CTRL_TYPE BIT(13) +#define GSWIP_PCE_TBL_CTRL_VLD BIT(12) +#define GSWIP_PCE_TBL_CTRL_KEYFORM BIT(11) +#define GSWIP_PCE_TBL_CTRL_GMAP_MASK GENMASK(10, 7) +#define GSWIP_PCE_TBL_CTRL_OPMOD_MASK GENMASK(6, 5) +#define GSWIP_PCE_TBL_CTRL_OPMOD_ADRD 0x00 +#define GSWIP_PCE_TBL_CTRL_OPMOD_ADWR 0x20 +#define GSWIP_PCE_TBL_CTRL_OPMOD_KSRD 0x40 +#define GSWIP_PCE_TBL_CTRL_OPMOD_KSWR 0x60 +#define GSWIP_PCE_TBL_CTRL_ADDR_MASK GENMASK(4, 0) +#define GSWIP_PCE_PMAP1 0x453 /* Monitoring port map */ +#define GSWIP_PCE_PMAP2 0x454 /* Default Multicast port map */ +#define GSWIP_PCE_PMAP3 0x455 /* Default Unknown Unicast port map */ +#define GSWIP_PCE_GCTRL_0 0x456 +#define GSWIP_PCE_GCTRL_0_MTFL BIT(0) /* MAC Table Flushing */ +#define GSWIP_PCE_GCTRL_0_MC_VALID BIT(3) +#define GSWIP_PCE_GCTRL_0_VLAN BIT(14) /* VLAN aware Switching */ +#define GSWIP_PCE_GCTRL_1 0x457 +#define GSWIP_PCE_GCTRL_1_MAC_GLOCK BIT(2) /* MAC Address table lock */ +#define GSWIP_PCE_GCTRL_1_MAC_GLOCK_MOD BIT(3) /* Mac address table lock forwarding mode */ +#define GSWIP_PCE_PCTRL_0p(p) (0x480 + ((p) * 0xA)) +#define GSWIP_PCE_PCTRL_0_TVM BIT(5) /* Transparent VLAN mode */ +#define GSWIP_PCE_PCTRL_0_VREP BIT(6) /* VLAN Replace Mode */ +#define GSWIP_PCE_PCTRL_0_INGRESS BIT(11) /* Accept special tag in ingress */ +#define GSWIP_PCE_PCTRL_0_PSTATE_LISTEN 0x0 +#define GSWIP_PCE_PCTRL_0_PSTATE_RX 0x1 +#define GSWIP_PCE_PCTRL_0_PSTATE_TX 0x2 +#define GSWIP_PCE_PCTRL_0_PSTATE_LEARNING 0x3 +#define GSWIP_PCE_PCTRL_0_PSTATE_FORWARDING 0x7 +#define GSWIP_PCE_PCTRL_0_PSTATE_MASK GENMASK(2, 0) +#define GSWIP_PCE_VCTRL(p) (0x485 + ((p) * 0xA)) +#define GSWIP_PCE_VCTRL_UVR BIT(0) /* Unknown VLAN Rule */ +#define GSWIP_PCE_VCTRL_VIMR BIT(3) /* VLAN Ingress Member violation rule */ +#define GSWIP_PCE_VCTRL_VEMR BIT(4) /* VLAN Egress Member violation rule */ +#define GSWIP_PCE_VCTRL_VSR BIT(5) /* VLAN Security */ +#define GSWIP_PCE_VCTRL_VID0 BIT(6) /* Priority Tagged Rule */ +#define GSWIP_PCE_DEFPVID(p) (0x486 + ((p) * 0xA)) + +#define GSWIP_MAC_FLEN 0x8C5 +#define GSWIP_MAC_CTRL_0p(p) (0x903 + ((p) * 0xC)) +#define GSWIP_MAC_CTRL_0_PADEN BIT(8) +#define GSWIP_MAC_CTRL_0_FCS_EN BIT(7) +#define GSWIP_MAC_CTRL_0_FCON_MASK 0x0070 +#define GSWIP_MAC_CTRL_0_FCON_AUTO 0x0000 +#define GSWIP_MAC_CTRL_0_FCON_RX 0x0010 +#define GSWIP_MAC_CTRL_0_FCON_TX 0x0020 +#define GSWIP_MAC_CTRL_0_FCON_RXTX 0x0030 +#define GSWIP_MAC_CTRL_0_FCON_NONE 0x0040 +#define GSWIP_MAC_CTRL_0_FDUP_MASK 0x000C +#define GSWIP_MAC_CTRL_0_FDUP_AUTO 0x0000 +#define GSWIP_MAC_CTRL_0_FDUP_EN 0x0004 +#define GSWIP_MAC_CTRL_0_FDUP_DIS 0x000C +#define GSWIP_MAC_CTRL_0_GMII_MASK 0x0003 +#define GSWIP_MAC_CTRL_0_GMII_AUTO 0x0000 +#define GSWIP_MAC_CTRL_0_GMII_MII 0x0001 +#define GSWIP_MAC_CTRL_0_GMII_RGMII 0x0002 +#define GSWIP_MAC_CTRL_2p(p) (0x905 + ((p) * 0xC)) +#define GSWIP_MAC_CTRL_2_LCHKL BIT(2) /* Frame Length Check Long Enable */ +#define GSWIP_MAC_CTRL_2_MLEN BIT(3) /* Maximum Untagged Frame Lnegth */ + +/* Ethernet Switch Fetch DMA Port Control Register */ +#define GSWIP_FDMA_PCTRLp(p) (0xA80 + ((p) * 0x6)) +#define GSWIP_FDMA_PCTRL_EN BIT(0) /* FDMA Port Enable */ +#define GSWIP_FDMA_PCTRL_STEN BIT(1) /* Special Tag Insertion Enable */ +#define GSWIP_FDMA_PCTRL_VLANMOD_MASK GENMASK(4, 3) /* VLAN Modification Control */ +#define GSWIP_FDMA_PCTRL_VLANMOD_SHIFT 3 /* VLAN Modification Control */ +#define GSWIP_FDMA_PCTRL_VLANMOD_DIS (0x0 << GSWIP_FDMA_PCTRL_VLANMOD_SHIFT) +#define GSWIP_FDMA_PCTRL_VLANMOD_PRIO (0x1 << GSWIP_FDMA_PCTRL_VLANMOD_SHIFT) +#define GSWIP_FDMA_PCTRL_VLANMOD_ID (0x2 << GSWIP_FDMA_PCTRL_VLANMOD_SHIFT) +#define GSWIP_FDMA_PCTRL_VLANMOD_BOTH (0x3 << GSWIP_FDMA_PCTRL_VLANMOD_SHIFT) + +/* Ethernet Switch Store DMA Port Control Register */ +#define GSWIP_SDMA_PCTRLp(p) (0xBC0 + ((p) * 0x6)) +#define GSWIP_SDMA_PCTRL_EN BIT(0) /* SDMA Port Enable */ +#define GSWIP_SDMA_PCTRL_FCEN BIT(1) /* Flow Control Enable */ +#define GSWIP_SDMA_PCTRL_PAUFWD BIT(3) /* Pause Frame Forwarding */ + +#define GSWIP_TABLE_ACTIVE_VLAN 0x01 +#define GSWIP_TABLE_VLAN_MAPPING 0x02 +#define GSWIP_TABLE_MAC_BRIDGE 0x0b +#define GSWIP_TABLE_MAC_BRIDGE_KEY3_FID GENMASK(5, 0) /* Filtering identifier */ +#define GSWIP_TABLE_MAC_BRIDGE_VAL0_PORT GENMASK(7, 4) /* Port on learned entries */ +#define GSWIP_TABLE_MAC_BRIDGE_VAL1_STATIC BIT(0) /* Static, non-aging entry */ + +#define XRX200_GPHY_FW_ALIGN (16 * 1024) + +/* Maximum packet size supported by the switch. In theory this should be 10240, + * but long packets currently cause lock-ups with an MTU of over 2526. Medium + * packets are sometimes dropped (e.g. TCP over 2477, UDP over 2516-2519, ICMP + * over 2526), hence an MTU value of 2400 seems safe. This issue only affects + * packet reception. This is probably caused by the PPA engine, which is on the + * RX part of the device. Packet transmission works properly up to 10240. + */ +#define GSWIP_MAX_PACKET_LENGTH 2400 + +struct gswip_pce_microcode { + u16 val_3; + u16 val_2; + u16 val_1; + u16 val_0; +}; + +struct gswip_hw_info { + int max_ports; + unsigned int allowed_cpu_ports; + unsigned int mii_ports; + int mii_port_reg_offset; + const struct gswip_pce_microcode (*pce_microcode)[]; + size_t pce_microcode_size; + enum dsa_tag_protocol tag_protocol; + void (*phylink_get_caps)(struct dsa_switch *ds, int port, + struct phylink_config *config); + struct phylink_pcs *(*mac_select_pcs)(struct phylink_config *config, + phy_interface_t interface); +}; + +struct gswip_gphy_fw { + struct clk *clk_gate; + struct reset_control *reset; + u32 fw_addr_offset; + char *fw_name; +}; + +struct gswip_vlan { + struct net_device *bridge; + u16 vid; + u8 fid; +}; + +struct gswip_priv { + __iomem void *gswip; + __iomem void *mdio; + __iomem void *mii; + const struct gswip_hw_info *hw_info; + const struct xway_gphy_match_data *gphy_fw_name_cfg; + struct dsa_switch *ds; + struct device *dev; + struct regmap *rcu_regmap; + struct gswip_vlan vlans[64]; + int num_gphy_fw; + struct gswip_gphy_fw *gphy_fw; + u32 port_vlan_filter; + struct mutex pce_table_lock; + u16 version; +}; + +#endif /* __LANTIQ_GSWIP_H */ diff --git a/drivers/net/dsa/lantiq_pce.h b/drivers/net/dsa/lantiq/lantiq_pce.h index e2be31f3672a..659f9a0638d9 100644 --- a/drivers/net/dsa/lantiq_pce.h +++ b/drivers/net/dsa/lantiq/lantiq_pce.h @@ -7,6 +7,8 @@ * Copyright (C) 2017 - 2018 Hauke Mehrtens <hauke@hauke-m.de> */ +#include "lantiq_gswip.h" + enum { OUT_MAC0 = 0, OUT_MAC1, @@ -74,13 +76,6 @@ enum { FLAG_NO, /*13*/ }; -struct gswip_pce_microcode { - u16 val_3; - u16 val_2; - u16 val_1; - u16 val_0; -}; - #define MC_ENTRY(val, msk, ns, out, len, type, flags, ipv4_len) \ { val, msk, ((ns) << 10 | (out) << 4 | (len) >> 1),\ ((len) & 1) << 15 | (type) << 13 | (flags) << 9 | (ipv4_len) << 8 } diff --git a/drivers/net/dsa/realtek/realtek.h b/drivers/net/dsa/realtek/realtek.h index a1b2e0b529d5..c03485a80d93 100644 --- a/drivers/net/dsa/realtek/realtek.h +++ b/drivers/net/dsa/realtek/realtek.h @@ -19,9 +19,6 @@ struct phylink_mac_ops; struct realtek_ops; -struct dentry; -struct inode; -struct file; struct rtl8366_mib_counter { unsigned int base; diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index e6b802e3d844..81ea01a652b9 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -698,7 +698,8 @@ static int airoha_qdma_rx_process(struct airoha_queue *q, int budget) reason = FIELD_GET(AIROHA_RXD4_PPE_CPU_REASON, msg1); if (reason == PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED) - airoha_ppe_check_skb(eth->ppe, q->skb, hash); + airoha_ppe_check_skb(ð->ppe->dev, q->skb, hash, + false); done++; napi_gro_receive(&q->napi, q->skb); @@ -2599,13 +2600,15 @@ static int airoha_dev_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv) { struct net_device *dev = cb_priv; + struct airoha_gdm_port *port = netdev_priv(dev); + struct airoha_eth *eth = port->qdma->eth; if (!tc_can_offload(dev)) return -EOPNOTSUPP; switch (type) { case TC_SETUP_CLSFLOWER: - return airoha_ppe_setup_tc_block_cb(dev, type_data); + return airoha_ppe_setup_tc_block_cb(ð->ppe->dev, type_data); case TC_SETUP_CLSMATCHALL: return airoha_dev_tc_matchall(dev, type_data); default: diff --git a/drivers/net/ethernet/airoha/airoha_eth.h b/drivers/net/ethernet/airoha/airoha_eth.h index a970b789cf23..77fd13d466dc 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.h +++ b/drivers/net/ethernet/airoha/airoha_eth.h @@ -13,6 +13,7 @@ #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/reset.h> +#include <linux/soc/airoha/airoha_offload.h> #include <net/dsa.h> #define AIROHA_MAX_NUM_GDM_PORTS 4 @@ -229,10 +230,6 @@ struct airoha_hw_stats { }; enum { - PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED = 0x0f, -}; - -enum { AIROHA_FOE_STATE_INVALID, AIROHA_FOE_STATE_UNBIND, AIROHA_FOE_STATE_BIND, @@ -252,6 +249,10 @@ enum { #define AIROHA_FOE_MAC_SMAC_ID GENMASK(20, 16) #define AIROHA_FOE_MAC_PPPOE_ID GENMASK(15, 0) +#define AIROHA_FOE_MAC_WDMA_QOS GENMASK(15, 12) +#define AIROHA_FOE_MAC_WDMA_BAND BIT(11) +#define AIROHA_FOE_MAC_WDMA_WCID GENMASK(10, 0) + struct airoha_foe_mac_info_common { u16 vlan1; u16 etype; @@ -481,6 +482,13 @@ struct airoha_flow_table_entry { unsigned long cookie; }; +struct airoha_wdma_info { + u8 idx; + u8 queue; + u16 wcid; + u8 bss; +}; + /* RX queue to IRQ mapping: BIT(q) in IRQ(n) */ #define RX_IRQ0_BANK_PIN_MASK 0x839f #define RX_IRQ1_BANK_PIN_MASK 0x7fe00000 @@ -535,6 +543,7 @@ struct airoha_gdm_port { #define AIROHA_RXD4_FOE_ENTRY GENMASK(15, 0) struct airoha_ppe { + struct airoha_ppe_dev dev; struct airoha_eth *eth; void *foe; @@ -609,9 +618,9 @@ static inline bool airhoa_is_lan_gdm_port(struct airoha_gdm_port *port) bool airoha_is_valid_gdm_port(struct airoha_eth *eth, struct airoha_gdm_port *port); -void airoha_ppe_check_skb(struct airoha_ppe *ppe, struct sk_buff *skb, - u16 hash); -int airoha_ppe_setup_tc_block_cb(struct net_device *dev, void *type_data); +void airoha_ppe_check_skb(struct airoha_ppe_dev *dev, struct sk_buff *skb, + u16 hash, bool rx_wlan); +int airoha_ppe_setup_tc_block_cb(struct airoha_ppe_dev *dev, void *type_data); int airoha_ppe_init(struct airoha_eth *eth); void airoha_ppe_deinit(struct airoha_eth *eth); void airoha_ppe_init_upd_mem(struct airoha_gdm_port *port); diff --git a/drivers/net/ethernet/airoha/airoha_npu.c b/drivers/net/ethernet/airoha/airoha_npu.c index a802f95df99d..e1d131d6115c 100644 --- a/drivers/net/ethernet/airoha/airoha_npu.c +++ b/drivers/net/ethernet/airoha/airoha_npu.c @@ -13,7 +13,6 @@ #include <linux/regmap.h> #include "airoha_eth.h" -#include "airoha_npu.h" #define NPU_EN7581_FIRMWARE_DATA "airoha/en7581_npu_data.bin" #define NPU_EN7581_FIRMWARE_RV32 "airoha/en7581_npu_rv32.bin" @@ -42,6 +41,22 @@ #define REG_CR_MBQ8_CTRL(_n) (NPU_MBOX_BASE_ADDR + 0x0b0 + ((_n) << 2)) #define REG_CR_NPU_MIB(_n) (NPU_MBOX_BASE_ADDR + 0x140 + ((_n) << 2)) +#define NPU_WLAN_BASE_ADDR 0x30d000 + +#define REG_IRQ_STATUS (NPU_WLAN_BASE_ADDR + 0x030) +#define REG_IRQ_RXDONE(_n) (NPU_WLAN_BASE_ADDR + ((_n) << 2) + 0x034) +#define NPU_IRQ_RX_MASK(_n) ((_n) == 1 ? BIT(17) : BIT(16)) + +#define REG_TX_BASE(_n) (NPU_WLAN_BASE_ADDR + ((_n) << 4) + 0x080) +#define REG_TX_DSCP_NUM(_n) (NPU_WLAN_BASE_ADDR + ((_n) << 4) + 0x084) +#define REG_TX_CPU_IDX(_n) (NPU_WLAN_BASE_ADDR + ((_n) << 4) + 0x088) +#define REG_TX_DMA_IDX(_n) (NPU_WLAN_BASE_ADDR + ((_n) << 4) + 0x08c) + +#define REG_RX_BASE(_n) (NPU_WLAN_BASE_ADDR + ((_n) << 4) + 0x180) +#define REG_RX_DSCP_NUM(_n) (NPU_WLAN_BASE_ADDR + ((_n) << 4) + 0x184) +#define REG_RX_CPU_IDX(_n) (NPU_WLAN_BASE_ADDR + ((_n) << 4) + 0x188) +#define REG_RX_DMA_IDX(_n) (NPU_WLAN_BASE_ADDR + ((_n) << 4) + 0x18c) + #define NPU_TIMER_BASE_ADDR 0x310100 #define REG_WDT_TIMER_CTRL(_n) (NPU_TIMER_BASE_ADDR + ((_n) * 0x100)) #define WDT_EN_MASK BIT(25) @@ -124,6 +139,13 @@ struct ppe_mbox_data { }; }; +struct wlan_mbox_data { + u32 ifindex:4; + u32 func_type:4; + u32 func_id; + DECLARE_FLEX_ARRAY(u8, d); +}; + static int airoha_npu_send_msg(struct airoha_npu *npu, int func_id, void *p, int size) { @@ -390,6 +412,136 @@ out: return err; } +static int airoha_npu_wlan_msg_send(struct airoha_npu *npu, int ifindex, + enum airoha_npu_wlan_set_cmd func_id, + void *data, int data_len, gfp_t gfp) +{ + struct wlan_mbox_data *wlan_data; + int err, len; + + len = sizeof(*wlan_data) + data_len; + wlan_data = kzalloc(len, gfp); + if (!wlan_data) + return -ENOMEM; + + wlan_data->ifindex = ifindex; + wlan_data->func_type = NPU_OP_SET; + wlan_data->func_id = func_id; + memcpy(wlan_data->d, data, data_len); + + err = airoha_npu_send_msg(npu, NPU_FUNC_WIFI, wlan_data, len); + kfree(wlan_data); + + return err; +} + +static int airoha_npu_wlan_msg_get(struct airoha_npu *npu, int ifindex, + enum airoha_npu_wlan_get_cmd func_id, + void *data, int data_len, gfp_t gfp) +{ + struct wlan_mbox_data *wlan_data; + int err, len; + + len = sizeof(*wlan_data) + data_len; + wlan_data = kzalloc(len, gfp); + if (!wlan_data) + return -ENOMEM; + + wlan_data->ifindex = ifindex; + wlan_data->func_type = NPU_OP_GET; + wlan_data->func_id = func_id; + + err = airoha_npu_send_msg(npu, NPU_FUNC_WIFI, wlan_data, len); + if (!err) + memcpy(data, wlan_data->d, data_len); + kfree(wlan_data); + + return err; +} + +static int +airoha_npu_wlan_set_reserved_memory(struct airoha_npu *npu, + int ifindex, const char *name, + enum airoha_npu_wlan_set_cmd func_id) +{ + struct device *dev = npu->dev; + struct resource res; + int err; + u32 val; + + err = of_reserved_mem_region_to_resource_byname(dev->of_node, name, + &res); + if (err) + return err; + + val = res.start; + return airoha_npu_wlan_msg_send(npu, ifindex, func_id, &val, + sizeof(val), GFP_KERNEL); +} + +static int airoha_npu_wlan_init_memory(struct airoha_npu *npu) +{ + enum airoha_npu_wlan_set_cmd cmd = WLAN_FUNC_SET_WAIT_NPU_BAND0_ONCPU; + u32 val = 0; + int err; + + err = airoha_npu_wlan_msg_send(npu, 1, cmd, &val, sizeof(val), + GFP_KERNEL); + if (err) + return err; + + cmd = WLAN_FUNC_SET_WAIT_TX_BUF_CHECK_ADDR; + err = airoha_npu_wlan_set_reserved_memory(npu, 0, "tx-bufid", cmd); + if (err) + return err; + + cmd = WLAN_FUNC_SET_WAIT_PKT_BUF_ADDR; + err = airoha_npu_wlan_set_reserved_memory(npu, 0, "pkt", cmd); + if (err) + return err; + + cmd = WLAN_FUNC_SET_WAIT_TX_PKT_BUF_ADDR; + err = airoha_npu_wlan_set_reserved_memory(npu, 0, "tx-pkt", cmd); + if (err) + return err; + + cmd = WLAN_FUNC_SET_WAIT_IS_FORCE_TO_CPU; + return airoha_npu_wlan_msg_send(npu, 0, cmd, &val, sizeof(val), + GFP_KERNEL); +} + +static u32 airoha_npu_wlan_queue_addr_get(struct airoha_npu *npu, int qid, + bool xmit) +{ + if (xmit) + return REG_TX_BASE(qid + 2); + + return REG_RX_BASE(qid); +} + +static void airoha_npu_wlan_irq_status_set(struct airoha_npu *npu, u32 val) +{ + regmap_write(npu->regmap, REG_IRQ_STATUS, val); +} + +static u32 airoha_npu_wlan_irq_status_get(struct airoha_npu *npu, int q) +{ + u32 val; + + regmap_read(npu->regmap, REG_IRQ_STATUS, &val); + return val; +} + +static void airoha_npu_wlan_irq_enable(struct airoha_npu *npu, int q) +{ + regmap_set_bits(npu->regmap, REG_IRQ_RXDONE(q), NPU_IRQ_RX_MASK(q)); +} + +static void airoha_npu_wlan_irq_disable(struct airoha_npu *npu, int q) +{ + regmap_clear_bits(npu->regmap, REG_IRQ_RXDONE(q), NPU_IRQ_RX_MASK(q)); +} + struct airoha_npu *airoha_npu_get(struct device *dev, dma_addr_t *stats_addr) { struct platform_device *pdev; @@ -493,6 +645,14 @@ static int airoha_npu_probe(struct platform_device *pdev) npu->ops.ppe_deinit = airoha_npu_ppe_deinit; npu->ops.ppe_flush_sram_entries = airoha_npu_ppe_flush_sram_entries; npu->ops.ppe_foe_commit_entry = airoha_npu_foe_commit_entry; + npu->ops.wlan_init_reserved_memory = airoha_npu_wlan_init_memory; + npu->ops.wlan_send_msg = airoha_npu_wlan_msg_send; + npu->ops.wlan_get_msg = airoha_npu_wlan_msg_get; + npu->ops.wlan_get_queue_addr = airoha_npu_wlan_queue_addr_get; + npu->ops.wlan_set_irq_status = airoha_npu_wlan_irq_status_set; + npu->ops.wlan_get_irq_status = airoha_npu_wlan_irq_status_get; + npu->ops.wlan_enable_irq = airoha_npu_wlan_irq_enable; + npu->ops.wlan_disable_irq = airoha_npu_wlan_irq_disable; npu->regmap = devm_regmap_init_mmio(dev, base, ®map_config); if (IS_ERR(npu->regmap)) @@ -529,6 +689,15 @@ static int airoha_npu_probe(struct platform_device *pdev) INIT_WORK(&core->wdt_work, airoha_npu_wdt_work); } + /* wlan IRQ lines */ + for (i = 0; i < ARRAY_SIZE(npu->irqs); i++) { + irq = platform_get_irq(pdev, i + ARRAY_SIZE(npu->cores) + 1); + if (irq < 0) + return irq; + + npu->irqs[i] = irq; + } + err = dma_set_coherent_mask(dev, DMA_BIT_MASK(32)); if (err) return err; @@ -550,8 +719,7 @@ static int airoha_npu_probe(struct platform_device *pdev) usleep_range(1000, 2000); /* enable NPU cores */ - /* do not start core3 since it is used for WiFi offloading */ - regmap_write(npu->regmap, REG_CR_BOOT_CONFIG, 0xf7); + regmap_write(npu->regmap, REG_CR_BOOT_CONFIG, 0xff); regmap_write(npu->regmap, REG_CR_BOOT_TRIGGER, 0x1); msleep(100); diff --git a/drivers/net/ethernet/airoha/airoha_npu.h b/drivers/net/ethernet/airoha/airoha_npu.h deleted file mode 100644 index 98ec3be74ce4..000000000000 --- a/drivers/net/ethernet/airoha/airoha_npu.h +++ /dev/null @@ -1,36 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (c) 2025 AIROHA Inc - * Author: Lorenzo Bianconi <lorenzo@kernel.org> - */ - -#define NPU_NUM_CORES 8 - -struct airoha_npu { - struct device *dev; - struct regmap *regmap; - - struct airoha_npu_core { - struct airoha_npu *npu; - /* protect concurrent npu memory accesses */ - spinlock_t lock; - struct work_struct wdt_work; - } cores[NPU_NUM_CORES]; - - struct airoha_foe_stats __iomem *stats; - - struct { - int (*ppe_init)(struct airoha_npu *npu); - int (*ppe_deinit)(struct airoha_npu *npu); - int (*ppe_flush_sram_entries)(struct airoha_npu *npu, - dma_addr_t foe_addr, - int sram_num_entries); - int (*ppe_foe_commit_entry)(struct airoha_npu *npu, - dma_addr_t foe_addr, - u32 entry_size, u32 hash, - bool ppe2); - } ops; -}; - -struct airoha_npu *airoha_npu_get(struct device *dev, dma_addr_t *stats_addr); -void airoha_npu_put(struct airoha_npu *npu); diff --git a/drivers/net/ethernet/airoha/airoha_ppe.c b/drivers/net/ethernet/airoha/airoha_ppe.c index 88694b08afa1..78473527ff50 100644 --- a/drivers/net/ethernet/airoha/airoha_ppe.c +++ b/drivers/net/ethernet/airoha/airoha_ppe.c @@ -6,11 +6,12 @@ #include <linux/ip.h> #include <linux/ipv6.h> +#include <linux/of_platform.h> +#include <linux/platform_device.h> #include <linux/rhashtable.h> #include <net/ipv6.h> #include <net/pkt_cls.h> -#include "airoha_npu.h" #include "airoha_regs.h" #include "airoha_eth.h" @@ -190,6 +191,31 @@ static int airoha_ppe_flow_mangle_ipv4(const struct flow_action_entry *act, return 0; } +static int airoha_ppe_get_wdma_info(struct net_device *dev, const u8 *addr, + struct airoha_wdma_info *info) +{ + struct net_device_path_stack stack; + struct net_device_path *path; + int err; + + if (!dev) + return -ENODEV; + + err = dev_fill_forward_path(dev, addr, &stack); + if (err) + return err; + + path = &stack.path[stack.num_paths - 1]; + if (path->type != DEV_PATH_MTK_WDMA) + return -1; + + info->idx = path->mtk_wdma.wdma_idx; + info->bss = path->mtk_wdma.bss; + info->wcid = path->mtk_wdma.wcid; + + return 0; +} + static int airoha_get_dsa_port(struct net_device **dev) { #if IS_ENABLED(CONFIG_NET_DSA) @@ -220,9 +246,9 @@ static int airoha_ppe_foe_entry_prepare(struct airoha_eth *eth, struct airoha_flow_data *data, int l4proto) { - int dsa_port = airoha_get_dsa_port(&dev); + u32 qdata = FIELD_PREP(AIROHA_FOE_SHAPER_ID, 0x7f), ports_pad, val; + int wlan_etype = -EINVAL, dsa_port = airoha_get_dsa_port(&dev); struct airoha_foe_mac_info_common *l2; - u32 qdata, ports_pad, val; u8 smac_id = 0xf; memset(hwe, 0, sizeof(*hwe)); @@ -236,31 +262,47 @@ static int airoha_ppe_foe_entry_prepare(struct airoha_eth *eth, AIROHA_FOE_IB1_BIND_TTL; hwe->ib1 = val; - val = FIELD_PREP(AIROHA_FOE_IB2_PORT_AG, 0x1f) | - AIROHA_FOE_IB2_PSE_QOS; - if (dsa_port >= 0) - val |= FIELD_PREP(AIROHA_FOE_IB2_NBQ, dsa_port); - + val = FIELD_PREP(AIROHA_FOE_IB2_PORT_AG, 0x1f); if (dev) { - struct airoha_gdm_port *port = netdev_priv(dev); - u8 pse_port; - - if (!airoha_is_valid_gdm_port(eth, port)) - return -EINVAL; - - if (dsa_port >= 0) - pse_port = port->id == 4 ? FE_PSE_PORT_GDM4 : port->id; - else - pse_port = 2; /* uplink relies on GDM2 loopback */ - val |= FIELD_PREP(AIROHA_FOE_IB2_PSE_PORT, pse_port); - - /* For downlink traffic consume SRAM memory for hw forwarding - * descriptors queue. - */ - if (airhoa_is_lan_gdm_port(port)) - val |= AIROHA_FOE_IB2_FAST_PATH; - - smac_id = port->id; + struct airoha_wdma_info info = {}; + + if (!airoha_ppe_get_wdma_info(dev, data->eth.h_dest, &info)) { + val |= FIELD_PREP(AIROHA_FOE_IB2_NBQ, info.idx) | + FIELD_PREP(AIROHA_FOE_IB2_PSE_PORT, + FE_PSE_PORT_CDM4); + qdata |= FIELD_PREP(AIROHA_FOE_ACTDP, info.bss); + wlan_etype = FIELD_PREP(AIROHA_FOE_MAC_WDMA_BAND, + info.idx) | + FIELD_PREP(AIROHA_FOE_MAC_WDMA_WCID, + info.wcid); + } else { + struct airoha_gdm_port *port = netdev_priv(dev); + u8 pse_port; + + if (!airoha_is_valid_gdm_port(eth, port)) + return -EINVAL; + + if (dsa_port >= 0) + pse_port = port->id == 4 ? FE_PSE_PORT_GDM4 + : port->id; + else + pse_port = 2; /* uplink relies on GDM2 + * loopback + */ + + val |= FIELD_PREP(AIROHA_FOE_IB2_PSE_PORT, pse_port) | + AIROHA_FOE_IB2_PSE_QOS; + /* For downlink traffic consume SRAM memory for hw + * forwarding descriptors queue. + */ + if (airhoa_is_lan_gdm_port(port)) + val |= AIROHA_FOE_IB2_FAST_PATH; + if (dsa_port >= 0) + val |= FIELD_PREP(AIROHA_FOE_IB2_NBQ, + dsa_port); + + smac_id = port->id; + } } if (is_multicast_ether_addr(data->eth.h_dest)) @@ -272,7 +314,6 @@ static int airoha_ppe_foe_entry_prepare(struct airoha_eth *eth, if (type == PPE_PKT_TYPE_IPV6_ROUTE_3T) hwe->ipv6.ports = ports_pad; - qdata = FIELD_PREP(AIROHA_FOE_SHAPER_ID, 0x7f); if (type == PPE_PKT_TYPE_BRIDGE) { airoha_ppe_foe_set_bridge_addrs(&hwe->bridge, &data->eth); hwe->bridge.data = qdata; @@ -313,7 +354,9 @@ static int airoha_ppe_foe_entry_prepare(struct airoha_eth *eth, l2->vlan2 = data->vlan.hdr[1].id; } - if (dsa_port >= 0) { + if (wlan_etype >= 0) { + l2->etype = wlan_etype; + } else if (dsa_port >= 0) { l2->etype = BIT(dsa_port); l2->etype |= !data->vlan.num ? BIT(15) : 0; } else if (data->pppoe.num) { @@ -490,6 +533,10 @@ static void airoha_ppe_foe_flow_stats_update(struct airoha_ppe *ppe, meter = &hwe->ipv4.l2.meter; } + pse_port = FIELD_GET(AIROHA_FOE_IB2_PSE_PORT, *ib2); + if (pse_port == FE_PSE_PORT_CDM4) + return; + airoha_ppe_foe_flow_stat_entry_reset(ppe, npu, index); val = FIELD_GET(AIROHA_FOE_CHANNEL | AIROHA_FOE_QID, *data); @@ -500,7 +547,6 @@ static void airoha_ppe_foe_flow_stats_update(struct airoha_ppe *ppe, AIROHA_FOE_IB2_PSE_QOS | AIROHA_FOE_IB2_FAST_PATH); *meter |= FIELD_PREP(AIROHA_FOE_TUNNEL_MTU, val); - pse_port = FIELD_GET(AIROHA_FOE_IB2_PSE_PORT, *ib2); nbq = pse_port == 1 ? 6 : 5; *ib2 &= ~(AIROHA_FOE_IB2_NBQ | AIROHA_FOE_IB2_PSE_PORT | AIROHA_FOE_IB2_PSE_QOS); @@ -570,7 +616,7 @@ static bool airoha_ppe_foe_compare_entry(struct airoha_flow_table_entry *e, static int airoha_ppe_foe_commit_entry(struct airoha_ppe *ppe, struct airoha_foe_entry *e, - u32 hash) + u32 hash, bool rx_wlan) { struct airoha_foe_entry *hwe = ppe->foe + hash * sizeof(*hwe); u32 ts = airoha_ppe_get_timestamp(ppe); @@ -593,7 +639,8 @@ static int airoha_ppe_foe_commit_entry(struct airoha_ppe *ppe, goto unlock; } - airoha_ppe_foe_flow_stats_update(ppe, npu, hwe, hash); + if (!rx_wlan) + airoha_ppe_foe_flow_stats_update(ppe, npu, hwe, hash); if (hash < PPE_SRAM_NUM_ENTRIES) { dma_addr_t addr = ppe->foe_dma + hash * sizeof(*hwe); @@ -619,7 +666,7 @@ static void airoha_ppe_foe_remove_flow(struct airoha_ppe *ppe, e->data.ib1 &= ~AIROHA_FOE_IB1_BIND_STATE; e->data.ib1 |= FIELD_PREP(AIROHA_FOE_IB1_BIND_STATE, AIROHA_FOE_STATE_INVALID); - airoha_ppe_foe_commit_entry(ppe, &e->data, e->hash); + airoha_ppe_foe_commit_entry(ppe, &e->data, e->hash, false); e->hash = 0xffff; } if (e->type == FLOW_TYPE_L2_SUBFLOW) { @@ -658,7 +705,7 @@ static void airoha_ppe_foe_flow_remove_entry(struct airoha_ppe *ppe, static int airoha_ppe_foe_commit_subflow_entry(struct airoha_ppe *ppe, struct airoha_flow_table_entry *e, - u32 hash) + u32 hash, bool rx_wlan) { u32 mask = AIROHA_FOE_IB1_BIND_PACKET_TYPE | AIROHA_FOE_IB1_BIND_UDP; struct airoha_foe_entry *hwe_p, hwe; @@ -699,14 +746,14 @@ airoha_ppe_foe_commit_subflow_entry(struct airoha_ppe *ppe, } hwe.bridge.data = e->data.bridge.data; - airoha_ppe_foe_commit_entry(ppe, &hwe, hash); + airoha_ppe_foe_commit_entry(ppe, &hwe, hash, rx_wlan); return 0; } static void airoha_ppe_foe_insert_entry(struct airoha_ppe *ppe, struct sk_buff *skb, - u32 hash) + u32 hash, bool rx_wlan) { struct airoha_flow_table_entry *e; struct airoha_foe_bridge br = {}; @@ -739,7 +786,7 @@ static void airoha_ppe_foe_insert_entry(struct airoha_ppe *ppe, if (!airoha_ppe_foe_compare_entry(e, hwe)) continue; - airoha_ppe_foe_commit_entry(ppe, &e->data, hash); + airoha_ppe_foe_commit_entry(ppe, &e->data, hash, rx_wlan); commit_done = true; e->hash = hash; } @@ -751,7 +798,7 @@ static void airoha_ppe_foe_insert_entry(struct airoha_ppe *ppe, e = rhashtable_lookup_fast(&ppe->l2_flows, &br, airoha_l2_flow_table_params); if (e) - airoha_ppe_foe_commit_subflow_entry(ppe, e, hash); + airoha_ppe_foe_commit_subflow_entry(ppe, e, hash, rx_wlan); unlock: spin_unlock_bh(&ppe_lock); } @@ -890,11 +937,10 @@ static int airoha_ppe_entry_idle_time(struct airoha_ppe *ppe, return airoha_ppe_get_entry_idle_time(ppe, e->data.ib1); } -static int airoha_ppe_flow_offload_replace(struct airoha_gdm_port *port, +static int airoha_ppe_flow_offload_replace(struct airoha_eth *eth, struct flow_cls_offload *f) { struct flow_rule *rule = flow_cls_offload_flow_rule(f); - struct airoha_eth *eth = port->qdma->eth; struct airoha_flow_table_entry *e; struct airoha_flow_data data = {}; struct net_device *odev = NULL; @@ -1091,10 +1137,9 @@ free_entry: return err; } -static int airoha_ppe_flow_offload_destroy(struct airoha_gdm_port *port, +static int airoha_ppe_flow_offload_destroy(struct airoha_eth *eth, struct flow_cls_offload *f) { - struct airoha_eth *eth = port->qdma->eth; struct airoha_flow_table_entry *e; e = rhashtable_lookup(ð->flow_table, &f->cookie, @@ -1137,10 +1182,9 @@ void airoha_ppe_foe_entry_get_stats(struct airoha_ppe *ppe, u32 hash, rcu_read_unlock(); } -static int airoha_ppe_flow_offload_stats(struct airoha_gdm_port *port, +static int airoha_ppe_flow_offload_stats(struct airoha_eth *eth, struct flow_cls_offload *f) { - struct airoha_eth *eth = port->qdma->eth; struct airoha_flow_table_entry *e; u32 idle; @@ -1164,16 +1208,16 @@ static int airoha_ppe_flow_offload_stats(struct airoha_gdm_port *port, return 0; } -static int airoha_ppe_flow_offload_cmd(struct airoha_gdm_port *port, +static int airoha_ppe_flow_offload_cmd(struct airoha_eth *eth, struct flow_cls_offload *f) { switch (f->command) { case FLOW_CLS_REPLACE: - return airoha_ppe_flow_offload_replace(port, f); + return airoha_ppe_flow_offload_replace(eth, f); case FLOW_CLS_DESTROY: - return airoha_ppe_flow_offload_destroy(port, f); + return airoha_ppe_flow_offload_destroy(eth, f); case FLOW_CLS_STATS: - return airoha_ppe_flow_offload_stats(port, f); + return airoha_ppe_flow_offload_stats(eth, f); default: break; } @@ -1240,11 +1284,10 @@ error_npu_put: return err; } -int airoha_ppe_setup_tc_block_cb(struct net_device *dev, void *type_data) +int airoha_ppe_setup_tc_block_cb(struct airoha_ppe_dev *dev, void *type_data) { - struct airoha_gdm_port *port = netdev_priv(dev); - struct flow_cls_offload *cls = type_data; - struct airoha_eth *eth = port->qdma->eth; + struct airoha_ppe *ppe = dev->priv; + struct airoha_eth *eth = ppe->eth; int err = 0; mutex_lock(&flow_offload_mutex); @@ -1252,16 +1295,17 @@ int airoha_ppe_setup_tc_block_cb(struct net_device *dev, void *type_data) if (!eth->npu) err = airoha_ppe_offload_setup(eth); if (!err) - err = airoha_ppe_flow_offload_cmd(port, cls); + err = airoha_ppe_flow_offload_cmd(eth, type_data); mutex_unlock(&flow_offload_mutex); return err; } -void airoha_ppe_check_skb(struct airoha_ppe *ppe, struct sk_buff *skb, - u16 hash) +void airoha_ppe_check_skb(struct airoha_ppe_dev *dev, struct sk_buff *skb, + u16 hash, bool rx_wlan) { + struct airoha_ppe *ppe = dev->priv; u16 now, diff; if (hash > PPE_HASH_MASK) @@ -1273,7 +1317,7 @@ void airoha_ppe_check_skb(struct airoha_ppe *ppe, struct sk_buff *skb, return; ppe->foe_check_time[hash] = now; - airoha_ppe_foe_insert_entry(ppe, skb, hash); + airoha_ppe_foe_insert_entry(ppe, skb, hash, rx_wlan); } void airoha_ppe_init_upd_mem(struct airoha_gdm_port *port) @@ -1297,6 +1341,61 @@ void airoha_ppe_init_upd_mem(struct airoha_gdm_port *port) PPE_UPDMEM_WR_MASK | PPE_UPDMEM_REQ_MASK); } +struct airoha_ppe_dev *airoha_ppe_get_dev(struct device *dev) +{ + struct platform_device *pdev; + struct device_node *np; + struct airoha_eth *eth; + + np = of_parse_phandle(dev->of_node, "airoha,eth", 0); + if (!np) + return ERR_PTR(-ENODEV); + + pdev = of_find_device_by_node(np); + if (!pdev) { + dev_err(dev, "cannot find device node %s\n", np->name); + of_node_put(np); + return ERR_PTR(-ENODEV); + } + of_node_put(np); + + if (!try_module_get(THIS_MODULE)) { + dev_err(dev, "failed to get the device driver module\n"); + goto error_pdev_put; + } + + eth = platform_get_drvdata(pdev); + if (!eth) + goto error_module_put; + + if (!device_link_add(dev, &pdev->dev, DL_FLAG_AUTOREMOVE_SUPPLIER)) { + dev_err(&pdev->dev, + "failed to create device link to consumer %s\n", + dev_name(dev)); + goto error_module_put; + } + + return ð->ppe->dev; + +error_module_put: + module_put(THIS_MODULE); +error_pdev_put: + platform_device_put(pdev); + + return ERR_PTR(-ENODEV); +} +EXPORT_SYMBOL_GPL(airoha_ppe_get_dev); + +void airoha_ppe_put_dev(struct airoha_ppe_dev *dev) +{ + struct airoha_ppe *ppe = dev->priv; + struct airoha_eth *eth = ppe->eth; + + module_put(THIS_MODULE); + put_device(eth->dev); +} +EXPORT_SYMBOL_GPL(airoha_ppe_put_dev); + int airoha_ppe_init(struct airoha_eth *eth) { struct airoha_ppe *ppe; @@ -1306,6 +1405,10 @@ int airoha_ppe_init(struct airoha_eth *eth) if (!ppe) return -ENOMEM; + ppe->dev.ops.setup_tc_block_cb = airoha_ppe_setup_tc_block_cb; + ppe->dev.ops.check_skb = airoha_ppe_check_skb; + ppe->dev.priv = ppe; + foe_size = PPE_NUM_ENTRIES * sizeof(struct airoha_foe_entry); ppe->foe = dmam_alloc_coherent(eth->dev, foe_size, &ppe->foe_dma, GFP_KERNEL); diff --git a/drivers/net/ethernet/amd/pds_core/main.c b/drivers/net/ethernet/amd/pds_core/main.c index 9b81e1c260c2..c7a2eff57632 100644 --- a/drivers/net/ethernet/amd/pds_core/main.c +++ b/drivers/net/ethernet/amd/pds_core/main.c @@ -280,7 +280,7 @@ static int pdsc_init_pf(struct pdsc *pdsc) goto err_out_del_dev; } - hr = devl_health_reporter_create(dl, &pdsc_fw_reporter_ops, 0, pdsc); + hr = devl_health_reporter_create(dl, &pdsc_fw_reporter_ops, pdsc); if (IS_ERR(hr)) { devl_unlock(dl); dev_warn(pdsc->dev, "Failed to create fw reporter: %pe\n", hr); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c index be0d2c7d08dc..b6e1b67a2d0e 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c @@ -329,6 +329,7 @@ static int xgbe_get_coalesce(struct net_device *netdev, ec->rx_coalesce_usecs = pdata->rx_usecs; ec->rx_max_coalesced_frames = pdata->rx_frames; + ec->tx_coalesce_usecs = pdata->tx_usecs; ec->tx_max_coalesced_frames = pdata->tx_frames; return 0; @@ -342,7 +343,8 @@ static int xgbe_set_coalesce(struct net_device *netdev, struct xgbe_prv_data *pdata = netdev_priv(netdev); struct xgbe_hw_if *hw_if = &pdata->hw_if; unsigned int rx_frames, rx_riwt, rx_usecs; - unsigned int tx_frames; + unsigned int tx_frames, tx_usecs; + unsigned int jiffy_us = jiffies_to_usecs(1); rx_riwt = hw_if->usec_to_riwt(pdata, ec->rx_coalesce_usecs); rx_usecs = ec->rx_coalesce_usecs; @@ -364,20 +366,42 @@ static int xgbe_set_coalesce(struct net_device *netdev, return -EINVAL; } + tx_usecs = ec->tx_coalesce_usecs; tx_frames = ec->tx_max_coalesced_frames; /* Check the bounds of values for Tx */ + if (!tx_usecs) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "tx-usecs must not be 0"); + return -EINVAL; + } + if (tx_usecs > XGMAC_MAX_COAL_TX_TICK) { + NL_SET_ERR_MSG_FMT_MOD(extack, "tx-usecs is limited to %d usec", + XGMAC_MAX_COAL_TX_TICK); + return -EINVAL; + } if (tx_frames > pdata->tx_desc_count) { netdev_err(netdev, "tx-frames is limited to %d frames\n", pdata->tx_desc_count); return -EINVAL; } + /* Round tx-usecs to nearest multiple of jiffy granularity */ + if (tx_usecs % jiffy_us) { + tx_usecs = rounddown(tx_usecs, jiffy_us); + if (!tx_usecs) + tx_usecs = jiffy_us; + NL_SET_ERR_MSG_FMT_MOD(extack, + "tx-usecs rounded to %u usec due to jiffy granularity (%u usec)", + tx_usecs, jiffy_us); + } + pdata->rx_riwt = rx_riwt; pdata->rx_usecs = rx_usecs; pdata->rx_frames = rx_frames; hw_if->config_rx_coalesce(pdata); + pdata->tx_usecs = tx_usecs; pdata->tx_frames = tx_frames; hw_if->config_tx_coalesce(pdata); @@ -440,7 +464,7 @@ static int xgbe_set_rxfh(struct net_device *netdev, { struct xgbe_prv_data *pdata = netdev_priv(netdev); struct xgbe_hw_if *hw_if = &pdata->hw_if; - unsigned int ret; + int ret; if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE && rxfh->hfunc != ETH_RSS_HASH_TOP) { @@ -709,7 +733,7 @@ out: } static const struct ethtool_ops xgbe_ethtool_ops = { - .supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS | + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_MAX_FRAMES, .get_drvinfo = xgbe_get_drvinfo, .get_msglevel = xgbe_get_msglevel, diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c index d40011e8ddf2..65eb7b577b65 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c @@ -70,7 +70,7 @@ static int xgbe_i2c_set_enable(struct xgbe_prv_data *pdata, bool enable) static int xgbe_i2c_disable(struct xgbe_prv_data *pdata) { - unsigned int ret; + int ret; ret = xgbe_i2c_set_enable(pdata, false); if (ret) { diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c index 23c39e92e783..a56efc1bee33 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c @@ -2902,7 +2902,7 @@ static void xgbe_phy_sfp_setup(struct xgbe_prv_data *pdata) static int xgbe_phy_int_mdio_reset(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; - unsigned int ret; + int ret; ret = pdata->hw_if.set_gpio(pdata, phy_data->mdio_reset_gpio); if (ret) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index d7e03e292ec4..0fa80a238ac5 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -168,6 +168,7 @@ /* Default coalescing parameters */ #define XGMAC_INIT_DMA_TX_USECS 1000 #define XGMAC_INIT_DMA_TX_FRAMES 25 +#define XGMAC_MAX_COAL_TX_TICK 100000 #define XGMAC_MAX_DMA_RIWT 0xff #define XGMAC_INIT_DMA_RX_USECS 30 diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 0daa08cecaf2..5f4f4d99f1e7 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -142,6 +142,7 @@ static const struct { [NETXTREME_E_P5_VF] = { "Broadcom BCM5750X NetXtreme-E Ethernet Virtual Function" }, [NETXTREME_E_P5_VF_HV] = { "Broadcom BCM5750X NetXtreme-E Virtual Function for Hyper-V" }, [NETXTREME_E_P7_VF] = { "Broadcom BCM5760X Virtual Function" }, + [NETXTREME_E_P7_VF_HV] = { "Broadcom BCM5760X Virtual Function for Hyper-V" }, }; static const struct pci_device_id bnxt_pci_tbl[] = { @@ -217,6 +218,7 @@ static const struct pci_device_id bnxt_pci_tbl[] = { { PCI_VDEVICE(BROADCOM, 0x1808), .driver_data = NETXTREME_E_P5_VF_HV }, { PCI_VDEVICE(BROADCOM, 0x1809), .driver_data = NETXTREME_E_P5_VF_HV }, { PCI_VDEVICE(BROADCOM, 0x1819), .driver_data = NETXTREME_E_P7_VF }, + { PCI_VDEVICE(BROADCOM, 0x181b), .driver_data = NETXTREME_E_P7_VF_HV }, { PCI_VDEVICE(BROADCOM, 0xd800), .driver_data = NETXTREME_S_VF }, #endif { 0 } @@ -315,7 +317,8 @@ static bool bnxt_vf_pciid(enum board_idx idx) return (idx == NETXTREME_C_VF || idx == NETXTREME_E_VF || idx == NETXTREME_S_VF || idx == NETXTREME_C_VF_HV || idx == NETXTREME_E_VF_HV || idx == NETXTREME_E_P5_VF || - idx == NETXTREME_E_P5_VF_HV || idx == NETXTREME_E_P7_VF); + idx == NETXTREME_E_P5_VF_HV || idx == NETXTREME_E_P7_VF || + idx == NETXTREME_E_P7_VF_HV); } #define DB_CP_REARM_FLAGS (DB_KEY_CP | DB_IDX_VALID) @@ -3797,8 +3800,7 @@ static void bnxt_free_rx_rings(struct bnxt *bp) xdp_rxq_info_unreg(&rxr->xdp_rxq); page_pool_destroy(rxr->page_pool); - if (bnxt_separate_head_pool(rxr)) - page_pool_destroy(rxr->head_pool); + page_pool_destroy(rxr->head_pool); rxr->page_pool = rxr->head_pool = NULL; kfree(rxr->rx_agg_bmap); @@ -3845,6 +3847,8 @@ static int bnxt_alloc_rx_page_pool(struct bnxt *bp, pool = page_pool_create(&pp); if (IS_ERR(pool)) goto err_destroy_pp; + } else { + page_pool_get(pool); } rxr->head_pool = pool; @@ -6969,6 +6973,8 @@ static int bnxt_hwrm_vnic_qcaps(struct bnxt *bp) bp->rss_cap |= BNXT_RSS_CAP_ESP_V4_RSS_CAP; if (flags & VNIC_QCAPS_RESP_FLAGS_RSS_IPSEC_ESP_SPI_IPV6_CAP) bp->rss_cap |= BNXT_RSS_CAP_ESP_V6_RSS_CAP; + if (flags & VNIC_QCAPS_RESP_FLAGS_RSS_IPV6_FLOW_LABEL_CAP) + bp->rss_cap |= BNXT_RSS_CAP_IPV6_FLOW_LABEL_RSS_CAP; if (flags & VNIC_QCAPS_RESP_FLAGS_RE_FLUSH_CAP) bp->fw_cap |= BNXT_FW_CAP_VNIC_RE_FLUSH; } @@ -15922,8 +15928,7 @@ err_rxq_info_unreg: xdp_rxq_info_unreg(&clone->xdp_rxq); err_page_pool_destroy: page_pool_destroy(clone->page_pool); - if (bnxt_separate_head_pool(clone)) - page_pool_destroy(clone->head_pool); + page_pool_destroy(clone->head_pool); clone->page_pool = NULL; clone->head_pool = NULL; return rc; @@ -15941,8 +15946,7 @@ static void bnxt_queue_mem_free(struct net_device *dev, void *qmem) xdp_rxq_info_unreg(&rxr->xdp_rxq); page_pool_destroy(rxr->page_pool); - if (bnxt_separate_head_pool(rxr)) - page_pool_destroy(rxr->head_pool); + page_pool_destroy(rxr->head_pool); rxr->page_pool = NULL; rxr->head_pool = NULL; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index fda0d3cc6227..1bb2a5de88cd 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2130,6 +2130,7 @@ enum board_idx { NETXTREME_E_P5_VF, NETXTREME_E_P5_VF_HV, NETXTREME_E_P7_VF, + NETXTREME_E_P7_VF_HV, }; #define BNXT_TRACE_BUF_MAGIC_BYTE ((u8)0xbc) @@ -2407,6 +2408,7 @@ struct bnxt { #define BNXT_RSS_CAP_ESP_V4_RSS_CAP BIT(6) #define BNXT_RSS_CAP_ESP_V6_RSS_CAP BIT(7) #define BNXT_RSS_CAP_MULTI_RSS_CTX BIT(8) +#define BNXT_RSS_CAP_IPV6_FLOW_LABEL_RSS_CAP BIT(9) u8 rss_hash_key[HW_HASH_KEY_SIZE]; u8 rss_hash_key_valid:1; @@ -2542,6 +2544,7 @@ struct bnxt { u16 fw_rx_stats_ext_size; u16 fw_tx_stats_ext_size; u16 hw_ring_stats_size; + u16 pcie_stat_len; u8 pri2cos_idx[8]; u8 pri2cos_valid; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index 4c4581b0342e..43fb75806cd6 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -220,7 +220,7 @@ __bnxt_dl_reporter_create(struct bnxt *bp, { struct devlink_health_reporter *reporter; - reporter = devlink_health_reporter_create(bp->dl, ops, 0, bp); + reporter = devlink_health_reporter_create(bp->dl, ops, bp); if (IS_ERR(reporter)) { netdev_warn(bp->dev, "Failed to create %s health reporter, rc = %ld\n", ops->name, PTR_ERR(reporter)); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 1b37612b1c01..2830a2b17a27 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -1584,6 +1584,8 @@ static u64 get_ethtool_ipv6_rss(struct bnxt *bp) { if (bp->rss_hash_cfg & VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6) return RXH_IP_SRC | RXH_IP_DST; + if (bp->rss_hash_cfg & VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6_FLOW_LABEL) + return RXH_IP_SRC | RXH_IP_DST | RXH_IP6_FL; return 0; } @@ -1662,13 +1664,18 @@ static int bnxt_set_rxfh_fields(struct net_device *dev, if (cmd->data == RXH_4TUPLE) tuple = 4; - else if (cmd->data == RXH_2TUPLE) + else if (cmd->data == RXH_2TUPLE || + cmd->data == (RXH_2TUPLE | RXH_IP6_FL)) tuple = 2; else if (!cmd->data) tuple = 0; else return -EINVAL; + if (cmd->data & RXH_IP6_FL && + !(bp->rss_cap & BNXT_RSS_CAP_IPV6_FLOW_LABEL_RSS_CAP)) + return -EINVAL; + if (cmd->flow_type == TCP_V4_FLOW) { rss_hash_cfg &= ~VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV4; if (tuple == 4) @@ -1732,10 +1739,15 @@ static int bnxt_set_rxfh_fields(struct net_device *dev, case AH_V6_FLOW: case ESP_V6_FLOW: case IPV6_FLOW: - if (tuple == 2) + rss_hash_cfg &= ~(VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6 | + VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6_FLOW_LABEL); + if (!tuple) + break; + if (cmd->data & RXH_IP6_FL) + rss_hash_cfg |= + VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6_FLOW_LABEL; + else if (tuple == 2) rss_hash_cfg |= VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6; - else if (!tuple) - rss_hash_cfg &= ~VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6; break; } @@ -2049,38 +2061,52 @@ static void bnxt_get_drvinfo(struct net_device *dev, static int bnxt_get_regs_len(struct net_device *dev) { struct bnxt *bp = netdev_priv(dev); - int reg_len; if (!BNXT_PF(bp)) return -EOPNOTSUPP; - reg_len = BNXT_PXP_REG_LEN; + return BNXT_PXP_REG_LEN + bp->pcie_stat_len; +} - if (bp->fw_cap & BNXT_FW_CAP_PCIE_STATS_SUPPORTED) - reg_len += sizeof(struct pcie_ctx_hw_stats); +static void * +__bnxt_hwrm_pcie_qstats(struct bnxt *bp, struct hwrm_pcie_qstats_input *req) +{ + struct pcie_ctx_hw_stats_v2 *hw_pcie_stats; + dma_addr_t hw_pcie_stats_addr; + int rc; - return reg_len; + hw_pcie_stats = hwrm_req_dma_slice(bp, req, sizeof(*hw_pcie_stats), + &hw_pcie_stats_addr); + if (!hw_pcie_stats) + return NULL; + + req->pcie_stat_size = cpu_to_le16(sizeof(*hw_pcie_stats)); + req->pcie_stat_host_addr = cpu_to_le64(hw_pcie_stats_addr); + rc = hwrm_req_send(bp, req); + + return rc ? NULL : hw_pcie_stats; } #define BNXT_PCIE_32B_ENTRY(start, end) \ - { offsetof(struct pcie_ctx_hw_stats, start), \ - offsetof(struct pcie_ctx_hw_stats, end) } + { offsetof(struct pcie_ctx_hw_stats_v2, start),\ + offsetof(struct pcie_ctx_hw_stats_v2, end) } static const struct { u16 start; u16 end; } bnxt_pcie_32b_entries[] = { BNXT_PCIE_32B_ENTRY(pcie_ltssm_histogram[0], pcie_ltssm_histogram[3]), + BNXT_PCIE_32B_ENTRY(pcie_tl_credit_nph_histogram[0], unused_1), + BNXT_PCIE_32B_ENTRY(pcie_rd_latency_histogram[0], unused_2), }; static void bnxt_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *_p) { - struct pcie_ctx_hw_stats *hw_pcie_stats; + struct hwrm_pcie_qstats_output *resp; struct hwrm_pcie_qstats_input *req; struct bnxt *bp = netdev_priv(dev); - dma_addr_t hw_pcie_stats_addr; - int rc; + u8 *src; regs->version = 0; if (!(bp->fw_dbg_cap & DBG_QCAPS_RESP_FLAGS_REG_ACCESS_RESTRICTED)) @@ -2092,24 +2118,21 @@ static void bnxt_get_regs(struct net_device *dev, struct ethtool_regs *regs, if (hwrm_req_init(bp, req, HWRM_PCIE_QSTATS)) return; - hw_pcie_stats = hwrm_req_dma_slice(bp, req, sizeof(*hw_pcie_stats), - &hw_pcie_stats_addr); - if (!hw_pcie_stats) { - hwrm_req_drop(bp, req); - return; - } - - regs->version = 1; - hwrm_req_hold(bp, req); /* hold on to slice */ - req->pcie_stat_size = cpu_to_le16(sizeof(*hw_pcie_stats)); - req->pcie_stat_host_addr = cpu_to_le64(hw_pcie_stats_addr); - rc = hwrm_req_send(bp, req); - if (!rc) { + resp = hwrm_req_hold(bp, req); + src = __bnxt_hwrm_pcie_qstats(bp, req); + if (src) { u8 *dst = (u8 *)(_p + BNXT_PXP_REG_LEN); - u8 *src = (u8 *)hw_pcie_stats; - int i, j; + int i, j, len; + + len = min(bp->pcie_stat_len, le16_to_cpu(resp->pcie_stat_size)); + if (len <= sizeof(struct pcie_ctx_hw_stats)) + regs->version = 1; + else if (len < sizeof(struct pcie_ctx_hw_stats_v2)) + regs->version = 2; + else + regs->version = 3; - for (i = 0, j = 0; i < sizeof(*hw_pcie_stats); ) { + for (i = 0, j = 0; i < len; ) { if (i >= bnxt_pcie_32b_entries[j].start && i <= bnxt_pcie_32b_entries[j].end) { u32 *dst32 = (u32 *)(dst + i); @@ -5254,6 +5277,26 @@ static int bnxt_get_ts_info(struct net_device *dev, return 0; } +static void bnxt_hwrm_pcie_qstats(struct bnxt *bp) +{ + struct hwrm_pcie_qstats_output *resp; + struct hwrm_pcie_qstats_input *req; + + bp->pcie_stat_len = 0; + if (!(bp->fw_cap & BNXT_FW_CAP_PCIE_STATS_SUPPORTED)) + return; + + if (hwrm_req_init(bp, req, HWRM_PCIE_QSTATS)) + return; + + resp = hwrm_req_hold(bp, req); + if (__bnxt_hwrm_pcie_qstats(bp, req)) + bp->pcie_stat_len = min_t(u16, + le16_to_cpu(resp->pcie_stat_size), + sizeof(struct pcie_ctx_hw_stats_v2)); + hwrm_req_drop(bp, req); +} + void bnxt_ethtool_init(struct bnxt *bp) { struct hwrm_selftest_qlist_output *resp; @@ -5262,6 +5305,7 @@ void bnxt_ethtool_init(struct bnxt *bp) struct net_device *dev = bp->dev; int i, rc; + bnxt_hwrm_pcie_qstats(bp); if (!(bp->fw_cap & BNXT_FW_CAP_PKG_VER)) bnxt_get_pkgver(dev); diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index c9a5c8beb2fa..904954610611 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -184,6 +184,13 @@ #define GEM_DCFG8 0x029C /* Design Config 8 */ #define GEM_DCFG10 0x02A4 /* Design Config 10 */ #define GEM_DCFG12 0x02AC /* Design Config 12 */ +#define GEM_ENST_START_TIME_Q0 0x0800 /* ENST Q0 start time */ +#define GEM_ENST_START_TIME_Q1 0x0804 /* ENST Q1 start time */ +#define GEM_ENST_ON_TIME_Q0 0x0820 /* ENST Q0 on time */ +#define GEM_ENST_ON_TIME_Q1 0x0824 /* ENST Q1 on time */ +#define GEM_ENST_OFF_TIME_Q0 0x0840 /* ENST Q0 off time */ +#define GEM_ENST_OFF_TIME_Q1 0x0844 /* ENST Q1 off time */ +#define GEM_ENST_CONTROL 0x0880 /* ENST control register */ #define GEM_USX_CONTROL 0x0A80 /* High speed PCS control register */ #define GEM_USX_STATUS 0x0A88 /* High speed PCS status register */ @@ -221,6 +228,13 @@ #define GEM_IDR(hw_q) (0x0620 + ((hw_q) << 2)) #define GEM_IMR(hw_q) (0x0640 + ((hw_q) << 2)) +#define GEM_ENST_START_TIME(hw_q) (0x0800 + ((hw_q) << 2)) +#define GEM_ENST_ON_TIME(hw_q) (0x0820 + ((hw_q) << 2)) +#define GEM_ENST_OFF_TIME(hw_q) (0x0840 + ((hw_q) << 2)) + +/* Bitfields in ENST_CONTROL */ +#define GEM_ENST_DISABLE_QUEUE_OFFSET 16 + /* Bitfields in NCR */ #define MACB_LB_OFFSET 0 /* reserved */ #define MACB_LB_SIZE 1 @@ -554,6 +568,23 @@ #define GEM_HIGH_SPEED_OFFSET 26 #define GEM_HIGH_SPEED_SIZE 1 +/* Bitfields in ENST_START_TIME_Qx. */ +#define GEM_START_TIME_SEC_OFFSET 30 +#define GEM_START_TIME_SEC_SIZE 2 +#define GEM_START_TIME_NSEC_OFFSET 0 +#define GEM_START_TIME_NSEC_SIZE 30 + +/* Bitfields in ENST_ON_TIME_Qx. */ +#define GEM_ON_TIME_OFFSET 0 +#define GEM_ON_TIME_SIZE 17 + +/* Bitfields in ENST_OFF_TIME_Qx. */ +#define GEM_OFF_TIME_OFFSET 0 +#define GEM_OFF_TIME_SIZE 17 + +/* Hardware ENST timing registers granularity */ +#define ENST_TIME_GRANULARITY_NS 8 + /* Bitfields in USX_CONTROL. */ #define GEM_USX_CTRL_SPEED_OFFSET 14 #define GEM_USX_CTRL_SPEED_SIZE 3 @@ -739,6 +770,7 @@ #define MACB_CAPS_MIIONRGMII 0x00000200 #define MACB_CAPS_NEED_TSUCLK 0x00000400 #define MACB_CAPS_QUEUE_DISABLE 0x00000800 +#define MACB_CAPS_QBV 0x00001000 #define MACB_CAPS_PCS 0x01000000 #define MACB_CAPS_HIGH_SPEED 0x02000000 #define MACB_CAPS_CLK_HW_CHG 0x04000000 @@ -1219,6 +1251,11 @@ struct macb_queue { unsigned int RBQP; unsigned int RBQPH; + /* ENST register offsets for this queue */ + unsigned int ENST_START_TIME; + unsigned int ENST_ON_TIME; + unsigned int ENST_OFF_TIME; + /* Lock to protect tx_head and tx_tail */ spinlock_t tx_ptr_lock; unsigned int tx_head, tx_tail; @@ -1397,6 +1434,19 @@ static inline bool gem_has_ptp(struct macb *bp) return IS_ENABLED(CONFIG_MACB_USE_HWSTAMP) && (bp->caps & MACB_CAPS_GEM_HAS_PTP); } +/* ENST Helper functions */ +static inline u64 enst_ns_to_hw_units(size_t ns, u32 speed_mbps) +{ + return DIV_ROUND_UP((ns) * (speed_mbps), + (ENST_TIME_GRANULARITY_NS * 1000)); +} + +static inline u64 enst_max_hw_interval(u32 speed_mbps) +{ + return DIV_ROUND_UP(GENMASK(GEM_ON_TIME_SIZE - 1, 0) * + ENST_TIME_GRANULARITY_NS * 1000, (speed_mbps)); +} + /** * struct macb_platform_data - platform data for MACB Ethernet used for PCI registration * @pclk: platform clock @@ -1407,4 +1457,21 @@ struct macb_platform_data { struct clk *hclk; }; +/** + * struct macb_queue_enst_config - Configuration for Enhanced Scheduled Traffic + * @start_time_mask: Bitmask representing the start time for the queue + * @on_time_bytes: "on" time nsec expressed in bytes + * @off_time_bytes: "off" time nsec expressed in bytes + * @queue_id: Identifier for the queue + * + * This structure holds the configuration parameters for an ENST queue, + * used to control time-based transmission scheduling in the MACB driver. + */ +struct macb_queue_enst_config { + u32 start_time_mask; + u32 on_time_bytes; + u32 off_time_bytes; + u8 queue_id; +}; + #endif /* _MACB_H */ diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index c769b7dbd3ba..a6fbab388c19 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -36,6 +36,7 @@ #include <linux/reset.h> #include <linux/firmware/xlnx-zynqmp.h> #include <linux/inetdevice.h> +#include <net/pkt_sched.h> #include "macb.h" /* This structure is only used for MACB on SiFive FU540 devices */ @@ -4088,6 +4089,223 @@ static void macb_restore_features(struct macb *bp) macb_set_rxflow_feature(bp, features); } +static int macb_taprio_setup_replace(struct net_device *ndev, + struct tc_taprio_qopt_offload *conf) +{ + u64 total_on_time = 0, start_time_sec = 0, start_time = conf->base_time; + u32 configured_queues = 0, speed = 0, start_time_nsec; + struct macb_queue_enst_config *enst_queue; + struct tc_taprio_sched_entry *entry; + struct macb *bp = netdev_priv(ndev); + struct ethtool_link_ksettings kset; + struct macb_queue *queue; + size_t i; + int err; + + if (conf->num_entries > bp->num_queues) { + netdev_err(ndev, "Too many TAPRIO entries: %zu > %d queues\n", + conf->num_entries, bp->num_queues); + return -EINVAL; + } + + if (conf->base_time < 0) { + netdev_err(ndev, "Invalid base_time: must be 0 or positive, got %lld\n", + conf->base_time); + return -ERANGE; + } + + /* Get the current link speed */ + err = phylink_ethtool_ksettings_get(bp->phylink, &kset); + if (unlikely(err)) { + netdev_err(ndev, "Failed to get link settings: %d\n", err); + return err; + } + + speed = kset.base.speed; + if (unlikely(speed <= 0)) { + netdev_err(ndev, "Invalid speed: %d\n", speed); + return -EINVAL; + } + + enst_queue = kcalloc(conf->num_entries, sizeof(*enst_queue), GFP_KERNEL); + if (unlikely(!enst_queue)) + return -ENOMEM; + + /* Pre-validate all entries before making any hardware changes */ + for (i = 0; i < conf->num_entries; i++) { + entry = &conf->entries[i]; + + if (entry->command != TC_TAPRIO_CMD_SET_GATES) { + netdev_err(ndev, "Entry %zu: unsupported command %d\n", + i, entry->command); + err = -EOPNOTSUPP; + goto cleanup; + } + + /* Validate gate_mask: must be nonzero, single queue, and within range */ + if (!is_power_of_2(entry->gate_mask)) { + netdev_err(ndev, "Entry %zu: gate_mask 0x%x is not a power of 2 (only one queue per entry allowed)\n", + i, entry->gate_mask); + err = -EINVAL; + goto cleanup; + } + + /* gate_mask must not select queues outside the valid queue_mask */ + if (entry->gate_mask & ~bp->queue_mask) { + netdev_err(ndev, "Entry %zu: gate_mask 0x%x exceeds queue range (max_queues=%d)\n", + i, entry->gate_mask, bp->num_queues); + err = -EINVAL; + goto cleanup; + } + + /* Check for start time limits */ + start_time_sec = start_time; + start_time_nsec = do_div(start_time_sec, NSEC_PER_SEC); + if (start_time_sec > GENMASK(GEM_START_TIME_SEC_SIZE - 1, 0)) { + netdev_err(ndev, "Entry %zu: Start time %llu s exceeds hardware limit\n", + i, start_time_sec); + err = -ERANGE; + goto cleanup; + } + + /* Check for on time limit */ + if (entry->interval > enst_max_hw_interval(speed)) { + netdev_err(ndev, "Entry %zu: interval %u ns exceeds hardware limit %llu ns\n", + i, entry->interval, enst_max_hw_interval(speed)); + err = -ERANGE; + goto cleanup; + } + + /* Check for off time limit*/ + if ((conf->cycle_time - entry->interval) > enst_max_hw_interval(speed)) { + netdev_err(ndev, "Entry %zu: off_time %llu ns exceeds hardware limit %llu ns\n", + i, conf->cycle_time - entry->interval, + enst_max_hw_interval(speed)); + err = -ERANGE; + goto cleanup; + } + + enst_queue[i].queue_id = order_base_2(entry->gate_mask); + enst_queue[i].start_time_mask = + (start_time_sec << GEM_START_TIME_SEC_OFFSET) | + start_time_nsec; + enst_queue[i].on_time_bytes = + enst_ns_to_hw_units(entry->interval, speed); + enst_queue[i].off_time_bytes = + enst_ns_to_hw_units(conf->cycle_time - entry->interval, speed); + + configured_queues |= entry->gate_mask; + total_on_time += entry->interval; + start_time += entry->interval; + } + + /* Check total interval doesn't exceed cycle time */ + if (total_on_time > conf->cycle_time) { + netdev_err(ndev, "Total ON %llu ns exceeds cycle time %llu ns\n", + total_on_time, conf->cycle_time); + err = -EINVAL; + goto cleanup; + } + + netdev_dbg(ndev, "TAPRIO setup: %zu entries, base_time=%lld ns, cycle_time=%llu ns\n", + conf->num_entries, conf->base_time, conf->cycle_time); + + /* All validations passed - proceed with hardware configuration */ + scoped_guard(spinlock_irqsave, &bp->lock) { + /* Disable ENST queues if running before configuring */ + gem_writel(bp, ENST_CONTROL, + bp->queue_mask << GEM_ENST_DISABLE_QUEUE_OFFSET); + + for (i = 0; i < conf->num_entries; i++) { + queue = &bp->queues[enst_queue[i].queue_id]; + /* Configure queue timing registers */ + queue_writel(queue, ENST_START_TIME, + enst_queue[i].start_time_mask); + queue_writel(queue, ENST_ON_TIME, + enst_queue[i].on_time_bytes); + queue_writel(queue, ENST_OFF_TIME, + enst_queue[i].off_time_bytes); + } + + /* Enable ENST for all configured queues in one write */ + gem_writel(bp, ENST_CONTROL, configured_queues); + } + + netdev_info(ndev, "TAPRIO configuration completed successfully: %zu entries, %d queues configured\n", + conf->num_entries, hweight32(configured_queues)); + +cleanup: + kfree(enst_queue); + return err; +} + +static void macb_taprio_destroy(struct net_device *ndev) +{ + struct macb *bp = netdev_priv(ndev); + struct macb_queue *queue; + u32 enst_disable_mask; + unsigned int q; + + netdev_reset_tc(ndev); + enst_disable_mask = bp->queue_mask << GEM_ENST_DISABLE_QUEUE_OFFSET; + + scoped_guard(spinlock_irqsave, &bp->lock) { + /* Single disable command for all queues */ + gem_writel(bp, ENST_CONTROL, enst_disable_mask); + + /* Clear all queue ENST registers in batch */ + for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { + queue_writel(queue, ENST_START_TIME, 0); + queue_writel(queue, ENST_ON_TIME, 0); + queue_writel(queue, ENST_OFF_TIME, 0); + } + } + netdev_info(ndev, "TAPRIO destroy: All gates disabled\n"); +} + +static int macb_setup_taprio(struct net_device *ndev, + struct tc_taprio_qopt_offload *taprio) +{ + struct macb *bp = netdev_priv(ndev); + int err = 0; + + if (unlikely(!(ndev->hw_features & NETIF_F_HW_TC))) + return -EOPNOTSUPP; + + /* Check if Device is in runtime suspend */ + if (unlikely(pm_runtime_suspended(&bp->pdev->dev))) { + netdev_err(ndev, "Device is in runtime suspend\n"); + return -EOPNOTSUPP; + } + + switch (taprio->cmd) { + case TAPRIO_CMD_REPLACE: + err = macb_taprio_setup_replace(ndev, taprio); + break; + case TAPRIO_CMD_DESTROY: + macb_taprio_destroy(ndev); + break; + default: + err = -EOPNOTSUPP; + } + + return err; +} + +static int macb_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) +{ + if (!dev || !type_data) + return -EINVAL; + + switch (type) { + case TC_SETUP_QDISC_TAPRIO: + return macb_setup_taprio(dev, type_data); + default: + return -EOPNOTSUPP; + } +} + static const struct net_device_ops macb_netdev_ops = { .ndo_open = macb_open, .ndo_stop = macb_close, @@ -4105,6 +4323,7 @@ static const struct net_device_ops macb_netdev_ops = { .ndo_features_check = macb_features_check, .ndo_hwtstamp_set = macb_hwtstamp_set, .ndo_hwtstamp_get = macb_hwtstamp_get, + .ndo_setup_tc = macb_setup_tc, }; /* Configure peripheral capabilities according to device tree @@ -4331,6 +4550,10 @@ static int macb_init(struct platform_device *pdev) #endif } + queue->ENST_START_TIME = GEM_ENST_START_TIME(hw_q); + queue->ENST_ON_TIME = GEM_ENST_ON_TIME(hw_q); + queue->ENST_OFF_TIME = GEM_ENST_OFF_TIME(hw_q); + /* get irq: here we use the linux queue index, not the hardware * queue index. the queue irq definitions in the device tree * must remove the optional gaps that could exist in the @@ -4383,6 +4606,10 @@ static int macb_init(struct platform_device *pdev) dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_RXCSUM; if (bp->caps & MACB_CAPS_SG_DISABLED) dev->hw_features &= ~NETIF_F_SG; + /* Enable HW_TC if hardware supports QBV */ + if (bp->caps & MACB_CAPS_QBV) + dev->hw_features |= NETIF_F_HW_TC; + dev->features = dev->hw_features; /* Check RX Flow Filters support. @@ -4826,36 +5053,45 @@ static unsigned long fu540_macb_tx_recalc_rate(struct clk_hw *hw, return mgmt->rate; } -static long fu540_macb_tx_round_rate(struct clk_hw *hw, unsigned long rate, - unsigned long *parent_rate) -{ - if (WARN_ON(rate < 2500000)) - return 2500000; - else if (rate == 2500000) - return 2500000; - else if (WARN_ON(rate < 13750000)) - return 2500000; - else if (WARN_ON(rate < 25000000)) - return 25000000; - else if (rate == 25000000) - return 25000000; - else if (WARN_ON(rate < 75000000)) - return 25000000; - else if (WARN_ON(rate < 125000000)) - return 125000000; - else if (rate == 125000000) - return 125000000; - - WARN_ON(rate > 125000000); +static int fu540_macb_tx_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) +{ + if (WARN_ON(req->rate < 2500000)) + req->rate = 2500000; + else if (req->rate == 2500000) + req->rate = 2500000; + else if (WARN_ON(req->rate < 13750000)) + req->rate = 2500000; + else if (WARN_ON(req->rate < 25000000)) + req->rate = 25000000; + else if (req->rate == 25000000) + req->rate = 25000000; + else if (WARN_ON(req->rate < 75000000)) + req->rate = 25000000; + else if (WARN_ON(req->rate < 125000000)) + req->rate = 125000000; + else if (req->rate == 125000000) + req->rate = 125000000; + else if (WARN_ON(req->rate > 125000000)) + req->rate = 125000000; + else + req->rate = 125000000; - return 125000000; + return 0; } static int fu540_macb_tx_set_rate(struct clk_hw *hw, unsigned long rate, unsigned long parent_rate) { - rate = fu540_macb_tx_round_rate(hw, rate, &parent_rate); - if (rate != 125000000) + struct clk_rate_request req; + int ret; + + clk_hw_init_rate_request(hw, &req, rate); + ret = fu540_macb_tx_determine_rate(hw, &req); + if (ret != 0) + return ret; + + if (req.rate != 125000000) iowrite32(1, mgmt->reg); else iowrite32(0, mgmt->reg); @@ -4866,7 +5102,7 @@ static int fu540_macb_tx_set_rate(struct clk_hw *hw, unsigned long rate, static const struct clk_ops fu540_c000_ops = { .recalc_rate = fu540_macb_tx_recalc_rate, - .round_rate = fu540_macb_tx_round_rate, + .determine_rate = fu540_macb_tx_determine_rate, .set_rate = fu540_macb_tx_set_rate, }; @@ -5127,8 +5363,9 @@ static const struct macb_config sama7g5_emac_config = { static const struct macb_config versal_config = { .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_JUMBO | - MACB_CAPS_GEM_HAS_PTP | MACB_CAPS_BD_RD_PREFETCH | MACB_CAPS_NEED_TSUCLK | - MACB_CAPS_QUEUE_DISABLE, + MACB_CAPS_GEM_HAS_PTP | MACB_CAPS_BD_RD_PREFETCH | + MACB_CAPS_NEED_TSUCLK | MACB_CAPS_QUEUE_DISABLE | + MACB_CAPS_QBV, .dma_burst_length = 16, .clk_init = macb_clk_init, .init = init_reset_optional, diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c index 6f6525983130..4ee970f3bad6 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c @@ -171,7 +171,7 @@ static void chtls_purge_receive_queue(struct sock *sk) struct sk_buff *skb; while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { - skb_dst_set(skb, (void *)NULL); + skb_dstref_steal(skb); kfree_skb(skb); } } @@ -194,7 +194,7 @@ static void chtls_purge_recv_queue(struct sock *sk) struct sk_buff *skb; while ((skb = __skb_dequeue(&tlsk->sk_recv_queue)) != NULL) { - skb_dst_set(skb, NULL); + skb_dstref_steal(skb); kfree_skb(skb); } } @@ -505,7 +505,7 @@ static void reset_listen_child(struct sock *child) chtls_send_reset(child, CPL_ABORT_SEND_RST, skb); sock_orphan(child); - INC_ORPHAN_COUNT(child); + tcp_orphan_count_inc(); if (child->sk_state == TCP_CLOSE) inet_csk_destroy_sock(child); } @@ -870,7 +870,7 @@ static void do_abort_syn_rcv(struct sock *child, struct sock *parent) * created only after 3 way handshake is done. */ sock_orphan(child); - INC_ORPHAN_COUNT(child); + tcp_orphan_count_inc(); chtls_release_resources(child); chtls_conn_done(child); } else { @@ -951,6 +951,7 @@ static unsigned int chtls_select_mss(const struct chtls_sock *csk, struct tcp_sock *tp; unsigned int mss; struct sock *sk; + u16 user_mss; mss = ntohs(req->tcpopt.mss); sk = csk->sk; @@ -969,8 +970,9 @@ static unsigned int chtls_select_mss(const struct chtls_sock *csk, tcpoptsz += round_up(TCPOLEN_TIMESTAMP, 4); tp->advmss = dst_metric_advmss(dst); - if (USER_MSS(tp) && tp->advmss > USER_MSS(tp)) - tp->advmss = USER_MSS(tp); + user_mss = USER_MSS(tp); + if (user_mss && tp->advmss > user_mss) + tp->advmss = user_mss; if (tp->advmss > pmtu - iphdrsz) tp->advmss = pmtu - iphdrsz; if (mss && tp->advmss > mss) @@ -1734,7 +1736,7 @@ static int chtls_rx_data(struct chtls_dev *cdev, struct sk_buff *skb) pr_err("can't find conn. for hwtid %u.\n", hwtid); return -EINVAL; } - skb_dst_set(skb, NULL); + skb_dstref_steal(skb); process_cpl_msg(chtls_recv_data, sk, skb); return 0; } @@ -1786,7 +1788,7 @@ static int chtls_rx_pdu(struct chtls_dev *cdev, struct sk_buff *skb) pr_err("can't find conn. for hwtid %u.\n", hwtid); return -EINVAL; } - skb_dst_set(skb, NULL); + skb_dstref_steal(skb); process_cpl_msg(chtls_recv_pdu, sk, skb); return 0; } @@ -1855,7 +1857,7 @@ static int chtls_rx_cmp(struct chtls_dev *cdev, struct sk_buff *skb) pr_err("can't find conn. for hwtid %u.\n", hwtid); return -EINVAL; } - skb_dst_set(skb, NULL); + skb_dstref_steal(skb); process_cpl_msg(chtls_rx_hdr, sk, skb); return 0; diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.h b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.h index f61ca657601c..29ceff5a5fcb 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.h +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.h @@ -90,12 +90,11 @@ struct deferred_skb_cb { #define SND_WSCALE(tp) ((tp)->rx_opt.snd_wscale) #define RCV_WSCALE(tp) ((tp)->rx_opt.rcv_wscale) -#define USER_MSS(tp) ((tp)->rx_opt.user_mss) +#define USER_MSS(tp) (READ_ONCE((tp)->rx_opt.user_mss)) #define TS_RECENT_STAMP(tp) ((tp)->rx_opt.ts_recent_stamp) #define WSCALE_OK(tp) ((tp)->rx_opt.wscale_ok) #define TSTAMP_OK(tp) ((tp)->rx_opt.tstamp_ok) #define SACK_OK(tp) ((tp)->rx_opt.sack_ok) -#define INC_ORPHAN_COUNT(sk) this_cpu_inc(*(sk)->sk_prot->orphan_count) /* TLS SKB */ #define skb_ulp_tls_inline(skb) (ULP_SKB_CB(skb)->ulp.tls.ofld) @@ -171,14 +170,14 @@ static inline void chtls_set_req_addr(struct request_sock *oreq, static inline void chtls_free_skb(struct sock *sk, struct sk_buff *skb) { - skb_dst_set(skb, NULL); + skb_dstref_steal(skb); __skb_unlink(skb, &sk->sk_receive_queue); __kfree_skb(skb); } static inline void chtls_kfree_skb(struct sock *sk, struct sk_buff *skb) { - skb_dst_set(skb, NULL); + skb_dstref_steal(skb); __skb_unlink(skb, &sk->sk_receive_queue); kfree_skb(skb); } diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c index 465fa8077964..4036db466e18 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c @@ -1434,7 +1434,7 @@ static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, continue; found_ok_skb: if (!skb->len) { - skb_dst_set(skb, NULL); + skb_dstref_steal(skb); __skb_unlink(skb, &sk->sk_receive_queue); kfree_skb(skb); diff --git a/drivers/net/ethernet/freescale/enetc/Kconfig b/drivers/net/ethernet/freescale/enetc/Kconfig index 54b0f0a5a6bb..117038104b69 100644 --- a/drivers/net/ethernet/freescale/enetc/Kconfig +++ b/drivers/net/ethernet/freescale/enetc/Kconfig @@ -28,6 +28,7 @@ config NXP_NTMP config FSL_ENETC tristate "ENETC PF driver" + depends on PTP_1588_CLOCK_OPTIONAL depends on PCI_MSI select FSL_ENETC_CORE select FSL_ENETC_IERB @@ -45,6 +46,7 @@ config FSL_ENETC config NXP_ENETC4 tristate "ENETC4 PF driver" + depends on PTP_1588_CLOCK_OPTIONAL depends on PCI_MSI select FSL_ENETC_CORE select FSL_ENETC_MDIO @@ -62,6 +64,7 @@ config NXP_ENETC4 config FSL_ENETC_VF tristate "ENETC VF driver" + depends on PTP_1588_CLOCK_OPTIONAL depends on PCI_MSI select FSL_ENETC_CORE select FSL_ENETC_MDIO diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index e4287725832e..aae462a0cf5a 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -221,22 +221,111 @@ static void enetc_unwind_tx_frame(struct enetc_bdr *tx_ring, int count, int i) } } +static void enetc_set_one_step_ts(struct enetc_si *si, bool udp, int offset) +{ + u32 val = ENETC_PM0_SINGLE_STEP_EN; + + val |= ENETC_SET_SINGLE_STEP_OFFSET(offset); + if (udp) + val |= ENETC_PM0_SINGLE_STEP_CH; + + /* The "Correction" field of a packet is updated based on the + * current time and the timestamp provided + */ + enetc_port_mac_wr(si, ENETC_PM0_SINGLE_STEP, val); +} + +static void enetc4_set_one_step_ts(struct enetc_si *si, bool udp, int offset) +{ + u32 val = PM_SINGLE_STEP_EN; + + val |= PM_SINGLE_STEP_OFFSET_SET(offset); + if (udp) + val |= PM_SINGLE_STEP_CH; + + enetc_port_mac_wr(si, ENETC4_PM_SINGLE_STEP(0), val); +} + +static u32 enetc_update_ptp_sync_msg(struct enetc_ndev_priv *priv, + struct sk_buff *skb, bool csum_offload) +{ + struct enetc_skb_cb *enetc_cb = ENETC_SKB_CB(skb); + u16 tstamp_off = enetc_cb->origin_tstamp_off; + u16 corr_off = enetc_cb->correction_off; + struct enetc_si *si = priv->si; + struct enetc_hw *hw = &si->hw; + __be32 new_sec_l, new_nsec; + __be16 new_sec_h; + u32 lo, hi, nsec; + u8 *data; + u64 sec; + + lo = enetc_rd_hot(hw, ENETC_SICTR0); + hi = enetc_rd_hot(hw, ENETC_SICTR1); + sec = (u64)hi << 32 | lo; + nsec = do_div(sec, 1000000000); + + /* Update originTimestamp field of Sync packet + * - 48 bits seconds field + * - 32 bits nanseconds field + * + * In addition, if csum_offload is false, the UDP checksum needs + * to be updated by software after updating originTimestamp field, + * otherwise the hardware will calculate the wrong checksum when + * updating the correction field and update it to the packet. + */ + + data = skb_mac_header(skb); + new_sec_h = htons((sec >> 32) & 0xffff); + new_sec_l = htonl(sec & 0xffffffff); + new_nsec = htonl(nsec); + if (enetc_cb->udp && !csum_offload) { + struct udphdr *uh = udp_hdr(skb); + __be32 old_sec_l, old_nsec; + __be16 old_sec_h; + + old_sec_h = *(__be16 *)(data + tstamp_off); + inet_proto_csum_replace2(&uh->check, skb, old_sec_h, + new_sec_h, false); + + old_sec_l = *(__be32 *)(data + tstamp_off + 2); + inet_proto_csum_replace4(&uh->check, skb, old_sec_l, + new_sec_l, false); + + old_nsec = *(__be32 *)(data + tstamp_off + 6); + inet_proto_csum_replace4(&uh->check, skb, old_nsec, + new_nsec, false); + } + + *(__be16 *)(data + tstamp_off) = new_sec_h; + *(__be32 *)(data + tstamp_off + 2) = new_sec_l; + *(__be32 *)(data + tstamp_off + 6) = new_nsec; + + /* Configure single-step register */ + if (is_enetc_rev1(si)) + enetc_set_one_step_ts(si, enetc_cb->udp, corr_off); + else + enetc4_set_one_step_ts(si, enetc_cb->udp, corr_off); + + return lo & ENETC_TXBD_TSTAMP; +} + static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb) { bool do_vlan, do_onestep_tstamp = false, do_twostep_tstamp = false; struct enetc_ndev_priv *priv = netdev_priv(tx_ring->ndev); - struct enetc_hw *hw = &priv->si->hw; + struct enetc_skb_cb *enetc_cb = ENETC_SKB_CB(skb); struct enetc_tx_swbd *tx_swbd; int len = skb_headlen(skb); union enetc_tx_bd temp_bd; - u8 msgtype, twostep, udp; + bool csum_offload = false; union enetc_tx_bd *txbd; - u16 offset1, offset2; int i, count = 0; skb_frag_t *frag; unsigned int f; dma_addr_t dma; u8 flags = 0; + u32 tstamp; enetc_clear_tx_bd(&temp_bd); if (skb->ip_summed == CHECKSUM_PARTIAL) { @@ -256,11 +345,19 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb) temp_bd.l4_aux = FIELD_PREP(ENETC_TX_BD_L4T, ENETC_TXBD_L4T_UDP); flags |= ENETC_TXBD_FLAGS_CSUM_LSO | ENETC_TXBD_FLAGS_L4CS; + csum_offload = true; } else if (skb_checksum_help(skb)) { return 0; } } + if (enetc_cb->flag & ENETC_F_TX_ONESTEP_SYNC_TSTAMP) { + do_onestep_tstamp = true; + tstamp = enetc_update_ptp_sync_msg(priv, skb, csum_offload); + } else if (enetc_cb->flag & ENETC_F_TX_TSTAMP) { + do_twostep_tstamp = true; + } + i = tx_ring->next_to_use; txbd = ENETC_TXBD(*tx_ring, i); prefetchw(txbd); @@ -280,17 +377,6 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb) count++; do_vlan = skb_vlan_tag_present(skb); - if (skb->cb[0] & ENETC_F_TX_ONESTEP_SYNC_TSTAMP) { - if (enetc_ptp_parse(skb, &udp, &msgtype, &twostep, &offset1, - &offset2) || - msgtype != PTP_MSGTYPE_SYNC || twostep) - WARN_ONCE(1, "Bad packet for one-step timestamping\n"); - else - do_onestep_tstamp = true; - } else if (skb->cb[0] & ENETC_F_TX_TSTAMP) { - do_twostep_tstamp = true; - } - tx_swbd->do_twostep_tstamp = do_twostep_tstamp; tx_swbd->qbv_en = !!(priv->active_offloads & ENETC_F_QBV); tx_swbd->check_wb = tx_swbd->do_twostep_tstamp || tx_swbd->qbv_en; @@ -333,65 +419,9 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb) } if (do_onestep_tstamp) { - __be32 new_sec_l, new_nsec; - u32 lo, hi, nsec, val; - __be16 new_sec_h; - u8 *data; - u64 sec; - - lo = enetc_rd_hot(hw, ENETC_SICTR0); - hi = enetc_rd_hot(hw, ENETC_SICTR1); - sec = (u64)hi << 32 | lo; - nsec = do_div(sec, 1000000000); - /* Configure extension BD */ - temp_bd.ext.tstamp = cpu_to_le32(lo & 0x3fffffff); + temp_bd.ext.tstamp = cpu_to_le32(tstamp); e_flags |= ENETC_TXBD_E_FLAGS_ONE_STEP_PTP; - - /* Update originTimestamp field of Sync packet - * - 48 bits seconds field - * - 32 bits nanseconds field - * - * In addition, the UDP checksum needs to be updated - * by software after updating originTimestamp field, - * otherwise the hardware will calculate the wrong - * checksum when updating the correction field and - * update it to the packet. - */ - data = skb_mac_header(skb); - new_sec_h = htons((sec >> 32) & 0xffff); - new_sec_l = htonl(sec & 0xffffffff); - new_nsec = htonl(nsec); - if (udp) { - struct udphdr *uh = udp_hdr(skb); - __be32 old_sec_l, old_nsec; - __be16 old_sec_h; - - old_sec_h = *(__be16 *)(data + offset2); - inet_proto_csum_replace2(&uh->check, skb, old_sec_h, - new_sec_h, false); - - old_sec_l = *(__be32 *)(data + offset2 + 2); - inet_proto_csum_replace4(&uh->check, skb, old_sec_l, - new_sec_l, false); - - old_nsec = *(__be32 *)(data + offset2 + 6); - inet_proto_csum_replace4(&uh->check, skb, old_nsec, - new_nsec, false); - } - - *(__be16 *)(data + offset2) = new_sec_h; - *(__be32 *)(data + offset2 + 2) = new_sec_l; - *(__be32 *)(data + offset2 + 6) = new_nsec; - - /* Configure single-step register */ - val = ENETC_PM0_SINGLE_STEP_EN; - val |= ENETC_SET_SINGLE_STEP_OFFSET(offset1); - if (udp) - val |= ENETC_PM0_SINGLE_STEP_CH; - - enetc_port_mac_wr(priv->si, ENETC_PM0_SINGLE_STEP, - val); } else if (do_twostep_tstamp) { skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; e_flags |= ENETC_TXBD_E_FLAGS_TWO_STEP_PTP; @@ -938,12 +968,13 @@ err_chained_bd: static netdev_tx_t enetc_start_xmit(struct sk_buff *skb, struct net_device *ndev) { + struct enetc_skb_cb *enetc_cb = ENETC_SKB_CB(skb); struct enetc_ndev_priv *priv = netdev_priv(ndev); struct enetc_bdr *tx_ring; int count; /* Queue one-step Sync packet if already locked */ - if (skb->cb[0] & ENETC_F_TX_ONESTEP_SYNC_TSTAMP) { + if (enetc_cb->flag & ENETC_F_TX_ONESTEP_SYNC_TSTAMP) { if (test_and_set_bit_lock(ENETC_TX_ONESTEP_TSTAMP_IN_PROGRESS, &priv->flags)) { skb_queue_tail(&priv->tx_skbs, skb); @@ -1005,24 +1036,29 @@ drop_packet_err: netdev_tx_t enetc_xmit(struct sk_buff *skb, struct net_device *ndev) { + struct enetc_skb_cb *enetc_cb = ENETC_SKB_CB(skb); struct enetc_ndev_priv *priv = netdev_priv(ndev); u8 udp, msgtype, twostep; u16 offset1, offset2; - /* Mark tx timestamp type on skb->cb[0] if requires */ + /* Mark tx timestamp type on enetc_cb->flag if requires */ if ((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && - (priv->active_offloads & ENETC_F_TX_TSTAMP_MASK)) { - skb->cb[0] = priv->active_offloads & ENETC_F_TX_TSTAMP_MASK; - } else { - skb->cb[0] = 0; - } + (priv->active_offloads & ENETC_F_TX_TSTAMP_MASK)) + enetc_cb->flag = priv->active_offloads & ENETC_F_TX_TSTAMP_MASK; + else + enetc_cb->flag = 0; /* Fall back to two-step timestamp if not one-step Sync packet */ - if (skb->cb[0] & ENETC_F_TX_ONESTEP_SYNC_TSTAMP) { + if (enetc_cb->flag & ENETC_F_TX_ONESTEP_SYNC_TSTAMP) { if (enetc_ptp_parse(skb, &udp, &msgtype, &twostep, &offset1, &offset2) || - msgtype != PTP_MSGTYPE_SYNC || twostep != 0) - skb->cb[0] = ENETC_F_TX_TSTAMP; + msgtype != PTP_MSGTYPE_SYNC || twostep != 0) { + enetc_cb->flag = ENETC_F_TX_TSTAMP; + } else { + enetc_cb->udp = !!udp; + enetc_cb->correction_off = offset1; + enetc_cb->origin_tstamp_off = offset2; + } } return enetc_start_xmit(skb, ndev); @@ -1214,7 +1250,9 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget) if (xdp_frame) { xdp_return_frame(xdp_frame); } else if (skb) { - if (unlikely(skb->cb[0] & ENETC_F_TX_ONESTEP_SYNC_TSTAMP)) { + struct enetc_skb_cb *enetc_cb = ENETC_SKB_CB(skb); + + if (unlikely(enetc_cb->flag & ENETC_F_TX_ONESTEP_SYNC_TSTAMP)) { /* Start work to release lock for next one-step * timestamping packet. And send one skb in * tx_skbs queue if has. @@ -1397,8 +1435,7 @@ static void enetc_get_offloads(struct enetc_bdr *rx_ring, __vlan_hwaccel_put_tag(skb, tpid, le16_to_cpu(rxbd->r.vlan_opt)); } - if (IS_ENABLED(CONFIG_FSL_ENETC_PTP_CLOCK) && - (priv->active_offloads & ENETC_F_RX_TSTAMP)) + if (priv->active_offloads & ENETC_F_RX_TSTAMP) enetc_get_rx_tstamp(rx_ring->ndev, rxbd, skb); } @@ -3301,7 +3338,7 @@ int enetc_hwtstamp_set(struct net_device *ndev, struct enetc_ndev_priv *priv = netdev_priv(ndev); int err, new_offloads = priv->active_offloads; - if (!IS_ENABLED(CONFIG_FSL_ENETC_PTP_CLOCK)) + if (!enetc_ptp_clock_is_enabled(priv->si)) return -EOPNOTSUPP; switch (config->tx_type) { @@ -3351,7 +3388,7 @@ int enetc_hwtstamp_get(struct net_device *ndev, { struct enetc_ndev_priv *priv = netdev_priv(ndev); - if (!IS_ENABLED(CONFIG_FSL_ENETC_PTP_CLOCK)) + if (!enetc_ptp_clock_is_enabled(priv->si)) return -EOPNOTSUPP; if (priv->active_offloads & ENETC_F_TX_ONESTEP_SYNC_TSTAMP) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h index 62e8ee4d2f04..815afdc2ec23 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.h +++ b/drivers/net/ethernet/freescale/enetc/enetc.h @@ -54,6 +54,15 @@ struct enetc_tx_swbd { u8 qbv_en:1; }; +struct enetc_skb_cb { + u8 flag; + bool udp; + u16 correction_off; + u16 origin_tstamp_off; +}; + +#define ENETC_SKB_CB(skb) ((struct enetc_skb_cb *)((skb)->cb)) + struct enetc_lso_t { bool ipv6; bool tcp; @@ -217,7 +226,7 @@ static inline union enetc_rx_bd *enetc_rxbd(struct enetc_bdr *rx_ring, int i) { int hw_idx = i; - if (IS_ENABLED(CONFIG_FSL_ENETC_PTP_CLOCK) && rx_ring->ext_en) + if (rx_ring->ext_en) hw_idx = 2 * i; return &(((union enetc_rx_bd *)rx_ring->bd_base)[hw_idx]); @@ -231,7 +240,7 @@ static inline void enetc_rxbd_next(struct enetc_bdr *rx_ring, new_rxbd++; - if (IS_ENABLED(CONFIG_FSL_ENETC_PTP_CLOCK) && rx_ring->ext_en) + if (rx_ring->ext_en) new_rxbd++; if (unlikely(++new_index == rx_ring->bd_count)) { @@ -589,6 +598,14 @@ static inline void enetc_cbd_free_data_mem(struct enetc_si *si, int size, void enetc_reset_ptcmsdur(struct enetc_hw *hw); void enetc_set_ptcmsdur(struct enetc_hw *hw, u32 *queue_max_sdu); +static inline bool enetc_ptp_clock_is_enabled(struct enetc_si *si) +{ + if (is_enetc_rev1(si)) + return IS_ENABLED(CONFIG_FSL_ENETC_PTP_CLOCK); + + return IS_ENABLED(CONFIG_PTP_NETC_V4_TIMER); +} + #ifdef CONFIG_FSL_ENETC_QOS int enetc_qos_query_caps(struct net_device *ndev, void *type_data); int enetc_setup_tc_taprio(struct net_device *ndev, void *type_data); diff --git a/drivers/net/ethernet/freescale/enetc/enetc4_hw.h b/drivers/net/ethernet/freescale/enetc/enetc4_hw.h index aa25b445d301..19bf0e89cdc2 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc4_hw.h +++ b/drivers/net/ethernet/freescale/enetc/enetc4_hw.h @@ -171,6 +171,12 @@ /* Port MAC 0/1 Pause Quanta Threshold Register */ #define ENETC4_PM_PAUSE_THRESH(mac) (0x5064 + (mac) * 0x400) +#define ENETC4_PM_SINGLE_STEP(mac) (0x50c0 + (mac) * 0x400) +#define PM_SINGLE_STEP_CH BIT(6) +#define PM_SINGLE_STEP_OFFSET GENMASK(15, 7) +#define PM_SINGLE_STEP_OFFSET_SET(o) FIELD_PREP(PM_SINGLE_STEP_OFFSET, o) +#define PM_SINGLE_STEP_EN BIT(31) + /* Port MAC 0 Interface Mode Control Register */ #define ENETC4_PM_IF_MODE(mac) (0x5300 + (mac) * 0x400) #define PM_IF_MODE_IFMODE GENMASK(2, 0) diff --git a/drivers/net/ethernet/freescale/enetc/enetc4_pf.c b/drivers/net/ethernet/freescale/enetc/enetc4_pf.c index b3dc1afeefd1..2e07b9b746e1 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc4_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc4_pf.c @@ -569,6 +569,9 @@ static const struct net_device_ops enetc4_ndev_ops = { .ndo_set_features = enetc4_pf_set_features, .ndo_vlan_rx_add_vid = enetc_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = enetc_vlan_rx_del_vid, + .ndo_eth_ioctl = enetc_ioctl, + .ndo_hwtstamp_get = enetc_hwtstamp_get, + .ndo_hwtstamp_set = enetc_hwtstamp_set, }; static struct phylink_pcs * @@ -1016,8 +1019,7 @@ static int enetc4_pf_probe(struct pci_dev *pdev, err = devm_add_action_or_reset(dev, enetc4_pci_remove, pdev); if (err) - return dev_err_probe(dev, err, - "Add enetc4_pci_remove() action failed\n"); + return err; /* si is the private data. */ si = pci_get_drvdata(pdev); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c index 961e76cd8489..6215e9c68fc5 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c @@ -4,6 +4,9 @@ #include <linux/ethtool_netlink.h> #include <linux/net_tstamp.h> #include <linux/module.h> +#include <linux/of.h> +#include <linux/ptp_clock_kernel.h> + #include "enetc.h" static const u32 enetc_si_regs[] = { @@ -877,23 +880,54 @@ static int enetc_set_coalesce(struct net_device *ndev, return 0; } -static int enetc_get_ts_info(struct net_device *ndev, - struct kernel_ethtool_ts_info *info) +static int enetc4_get_phc_index_by_pdev(struct enetc_si *si) { - struct enetc_ndev_priv *priv = netdev_priv(ndev); - int *phc_idx; - - phc_idx = symbol_get(enetc_phc_index); - if (phc_idx) { - info->phc_index = *phc_idx; - symbol_put(enetc_phc_index); + struct pci_bus *bus = si->pdev->bus; + struct pci_dev *timer_pdev; + unsigned int devfn; + int phc_index; + + switch (si->revision) { + case ENETC_REV_4_1: + devfn = PCI_DEVFN(24, 0); + break; + default: + return -1; } - if (!IS_ENABLED(CONFIG_FSL_ENETC_PTP_CLOCK)) { - info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE; + timer_pdev = pci_get_slot(bus, devfn); + if (!timer_pdev) + return -1; - return 0; - } + phc_index = ptp_clock_index_by_dev(&timer_pdev->dev); + pci_dev_put(timer_pdev); + + return phc_index; +} + +static int enetc4_get_phc_index(struct enetc_si *si) +{ + struct device_node *np = si->pdev->dev.of_node; + struct device_node *timer_np; + int phc_index; + + if (!np) + return enetc4_get_phc_index_by_pdev(si); + + timer_np = of_parse_phandle(np, "ptp-timer", 0); + if (!timer_np) + return enetc4_get_phc_index_by_pdev(si); + + phc_index = ptp_clock_index_by_of_node(timer_np); + of_node_put(timer_np); + + return phc_index; +} + +static void enetc_get_ts_generic_info(struct net_device *ndev, + struct kernel_ethtool_ts_info *info) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE | SOF_TIMESTAMPING_RX_HARDWARE | @@ -908,6 +942,36 @@ static int enetc_get_ts_info(struct net_device *ndev, info->rx_filters = (1 << HWTSTAMP_FILTER_NONE) | (1 << HWTSTAMP_FILTER_ALL); +} + +static int enetc_get_ts_info(struct net_device *ndev, + struct kernel_ethtool_ts_info *info) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + struct enetc_si *si = priv->si; + int *phc_idx; + + if (!enetc_ptp_clock_is_enabled(si)) + goto timestamp_tx_sw; + + if (is_enetc_rev1(si)) { + phc_idx = symbol_get(enetc_phc_index); + if (phc_idx) { + info->phc_index = *phc_idx; + symbol_put(enetc_phc_index); + } + } else { + info->phc_index = enetc4_get_phc_index(si); + if (info->phc_index < 0) + goto timestamp_tx_sw; + } + + enetc_get_ts_generic_info(ndev, info); + + return 0; + +timestamp_tx_sw: + info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE; return 0; } @@ -1296,6 +1360,7 @@ const struct ethtool_ops enetc4_pf_ethtool_ops = { .get_rxfh = enetc_get_rxfh, .set_rxfh = enetc_set_rxfh, .get_rxfh_fields = enetc_get_rxfh_fields, + .get_ts_info = enetc_get_ts_info, }; void enetc_set_ethtool_ops(struct net_device *ndev) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h index 73763e8f4879..377c96325814 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h +++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h @@ -614,6 +614,7 @@ enum enetc_txbd_flags { #define ENETC_TXBD_STATS_WIN BIT(7) #define ENETC_TXBD_TXSTART_MASK GENMASK(24, 0) #define ENETC_TXBD_FLAGS_OFFSET 24 +#define ENETC_TXBD_TSTAMP GENMASK(29, 0) static inline __le32 enetc_txbd_set_tx_start(u64 tx_start, u8 flags) { diff --git a/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c b/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c index 8f5021e59e0a..0e2b703c673a 100644 --- a/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c @@ -260,6 +260,11 @@ struct page_pool *gve_rx_create_page_pool(struct gve_priv *priv, .offset = xdp ? XDP_PACKET_HEADROOM : 0, }; + if (priv->header_split_enabled) { + pp.flags |= PP_FLAG_ALLOW_UNREADABLE_NETMEM; + pp.queue_idx = rx->q_num; + } + return page_pool_create(&pp); } diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c index 7380c2b7a2d8..55393b784317 100644 --- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c @@ -718,6 +718,24 @@ static int gve_rx_xsk_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, return 0; } +static void gve_dma_sync(struct gve_priv *priv, struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state, u16 buf_len) +{ + struct gve_rx_slot_page_info *page_info = &buf_state->page_info; + + if (rx->dqo.page_pool) { + page_pool_dma_sync_netmem_for_cpu(rx->dqo.page_pool, + page_info->netmem, + page_info->page_offset, + buf_len); + } else { + dma_sync_single_range_for_cpu(&priv->pdev->dev, buf_state->addr, + page_info->page_offset + + page_info->pad, + buf_len, DMA_FROM_DEVICE); + } +} + /* Returns 0 if descriptor is completed successfully. * Returns -EINVAL if descriptor is invalid. * Returns -ENOMEM if data cannot be copied to skb. @@ -793,13 +811,18 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, rx->rx_hsplit_unsplit_pkt += unsplit; rx->rx_hsplit_bytes += hdr_len; u64_stats_update_end(&rx->statss); + } else if (!rx->ctx.skb_head && rx->dqo.page_pool && + netmem_is_net_iov(buf_state->page_info.netmem)) { + /* when header split is disabled, the header went to the packet + * buffer. If the packet buffer is a net_iov, those can't be + * easily mapped into the kernel space to access the header + * required to process the packet. + */ + goto error; } /* Sync the portion of dma buffer for CPU to read. */ - dma_sync_single_range_for_cpu(&priv->pdev->dev, buf_state->addr, - buf_state->page_info.page_offset + - buf_state->page_info.pad, - buf_len, DMA_FROM_DEVICE); + gve_dma_sync(priv, rx, buf_state, buf_len); /* Append to current skb if one exists. */ if (rx->ctx.skb_head) { @@ -837,7 +860,9 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, u64_stats_update_end(&rx->statss); } - if (eop && buf_len <= priv->rx_copybreak) { + if (eop && buf_len <= priv->rx_copybreak && + !(rx->dqo.page_pool && + netmem_is_net_iov(buf_state->page_info.netmem))) { rx->ctx.skb_head = gve_rx_copy(priv->dev, napi, &buf_state->page_info, buf_len); if (unlikely(!rx->ctx.skb_head)) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c index 2e64dc1ab355..0b92a2e5e986 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c @@ -417,7 +417,7 @@ static int hbg_pci_init(struct pci_dev *pdev) priv->io_base = pcim_iomap_table(pdev)[0]; if (!priv->io_base) - return dev_err_probe(dev, -ENOMEM, "failed to get io base\n"); + return -ENOMEM; pci_set_master(pdev); return 0; diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c index 8b7b476ed7fb..37791de47f6f 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c @@ -278,8 +278,7 @@ int hbg_mdio_init(struct hbg_priv *priv) mdio_bus = devm_mdiobus_alloc(dev); if (!mdio_bus) - return dev_err_probe(dev, -ENOMEM, - "failed to alloc MDIO bus\n"); + return -ENOMEM; mdio_bus->parent = dev; mdio_bus->priv = priv; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index 0255c8acb744..4cce4f4ba6b0 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -843,7 +843,7 @@ static int hns3_dbg_bd_file_init(struct hnae3_handle *handle, u32 cmd) entry_dir = hns3_dbg_dentry[hns3_dbg_cmd[cmd].dentry].dentry; max_queue_num = hns3_get_max_available_channels(handle); - data = devm_kzalloc(&handle->pdev->dev, max_queue_num * sizeof(*data), + data = devm_kcalloc(&handle->pdev->dev, max_queue_num, sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index d5454e126c85..a752d0e3db3a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -1927,6 +1927,31 @@ static int hns3_set_tx_spare_buf_size(struct net_device *netdev, return ret; } +static int hns3_check_tx_copybreak(struct net_device *netdev, u32 copybreak) +{ + struct hns3_nic_priv *priv = netdev_priv(netdev); + + if (copybreak < priv->min_tx_copybreak) { + netdev_err(netdev, "tx copybreak %u should be no less than %u!\n", + copybreak, priv->min_tx_copybreak); + return -EINVAL; + } + return 0; +} + +static int hns3_check_tx_spare_buf_size(struct net_device *netdev, u32 buf_size) +{ + struct hns3_nic_priv *priv = netdev_priv(netdev); + + if (buf_size < priv->min_tx_spare_buf_size) { + netdev_err(netdev, + "tx spare buf size %u should be no less than %u!\n", + buf_size, priv->min_tx_spare_buf_size); + return -EINVAL; + } + return 0; +} + static int hns3_set_tunable(struct net_device *netdev, const struct ethtool_tunable *tuna, const void *data) @@ -1943,6 +1968,10 @@ static int hns3_set_tunable(struct net_device *netdev, switch (tuna->id) { case ETHTOOL_TX_COPYBREAK: + ret = hns3_check_tx_copybreak(netdev, *(u32 *)data); + if (ret) + return ret; + priv->tx_copybreak = *(u32 *)data; for (i = 0; i < h->kinfo.num_tqps; i++) @@ -1957,6 +1986,10 @@ static int hns3_set_tunable(struct net_device *netdev, break; case ETHTOOL_TX_COPYBREAK_BUF_SIZE: + ret = hns3_check_tx_spare_buf_size(netdev, *(u32 *)data); + if (ret) + return ret; + old_tx_spare_buf_size = h->kinfo.tx_spare_buf_size; new_tx_spare_buf_size = *(u32 *)data; netdev_info(netdev, "request to set tx spare buf size from %u to %u\n", diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index f209a05e2033..f5457ae0b64f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -2182,8 +2182,8 @@ static bool hclge_drop_pfc_buf_till_fit(struct hclge_dev *hdev, return hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all); } -static int hclge_only_alloc_priv_buff(struct hclge_dev *hdev, - struct hclge_pkt_buf_alloc *buf_alloc) +static bool hclge_only_alloc_priv_buff(struct hclge_dev *hdev, + struct hclge_pkt_buf_alloc *buf_alloc) { #define COMPENSATE_BUFFER 0x3C00 #define COMPENSATE_HALF_MPS_NUM 5 diff --git a/drivers/net/ethernet/huawei/hinic/hinic_devlink.c b/drivers/net/ethernet/huawei/hinic/hinic_devlink.c index 03e42512a2d5..300bc267a259 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_devlink.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_devlink.c @@ -443,8 +443,9 @@ int hinic_health_reporters_create(struct hinic_devlink_priv *priv) struct devlink *devlink = priv_to_devlink(priv); priv->hw_fault_reporter = - devlink_health_reporter_create(devlink, &hinic_hw_fault_reporter_ops, - 0, priv); + devlink_health_reporter_create(devlink, + &hinic_hw_fault_reporter_ops, + priv); if (IS_ERR(priv->hw_fault_reporter)) { dev_warn(&priv->hwdev->hwif->pdev->dev, "Failed to create hw fault reporter, err: %ld\n", PTR_ERR(priv->hw_fault_reporter)); @@ -452,8 +453,9 @@ int hinic_health_reporters_create(struct hinic_devlink_priv *priv) } priv->fw_fault_reporter = - devlink_health_reporter_create(devlink, &hinic_fw_fault_reporter_ops, - 0, priv); + devlink_health_reporter_create(devlink, + &hinic_fw_fault_reporter_ops, + priv); if (IS_ERR(priv->fw_fault_reporter)) { dev_warn(&priv->hwdev->hwif->pdev->dev, "Failed to create fw fault reporter, err: %ld\n", PTR_ERR(priv->fw_fault_reporter)); diff --git a/drivers/net/ethernet/huawei/hinic3/Makefile b/drivers/net/ethernet/huawei/hinic3/Makefile index 509dfbfb0e96..2a0ed8e2c63e 100644 --- a/drivers/net/ethernet/huawei/hinic3/Makefile +++ b/drivers/net/ethernet/huawei/hinic3/Makefile @@ -3,7 +3,9 @@ obj-$(CONFIG_HINIC3) += hinic3.o -hinic3-objs := hinic3_common.o \ +hinic3-objs := hinic3_cmdq.o \ + hinic3_common.o \ + hinic3_eqs.o \ hinic3_hw_cfg.o \ hinic3_hw_comm.o \ hinic3_hwdev.o \ diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_cmdq.c b/drivers/net/ethernet/huawei/hinic3/hinic3_cmdq.c new file mode 100644 index 000000000000..ef539d1b69a3 --- /dev/null +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_cmdq.c @@ -0,0 +1,915 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + +#include <linux/bitfield.h> +#include <linux/delay.h> +#include <linux/dma-mapping.h> + +#include "hinic3_cmdq.h" +#include "hinic3_hwdev.h" +#include "hinic3_hwif.h" +#include "hinic3_mbox.h" + +#define CMDQ_BUF_SIZE 2048 +#define CMDQ_WQEBB_SIZE 64 + +#define CMDQ_CMD_TIMEOUT 5000 +#define CMDQ_ENABLE_WAIT_TIMEOUT 300 + +#define CMDQ_CTXT_CURR_WQE_PAGE_PFN_MASK GENMASK_ULL(51, 0) +#define CMDQ_CTXT_EQ_ID_MASK GENMASK_ULL(60, 53) +#define CMDQ_CTXT_CEQ_ARM_MASK BIT_ULL(61) +#define CMDQ_CTXT_CEQ_EN_MASK BIT_ULL(62) +#define CMDQ_CTXT_HW_BUSY_BIT_MASK BIT_ULL(63) + +#define CMDQ_CTXT_WQ_BLOCK_PFN_MASK GENMASK_ULL(51, 0) +#define CMDQ_CTXT_CI_MASK GENMASK_ULL(63, 52) +#define CMDQ_CTXT_SET(val, member) \ + FIELD_PREP(CMDQ_CTXT_##member##_MASK, val) + +#define CMDQ_WQE_HDR_BUFDESC_LEN_MASK GENMASK(7, 0) +#define CMDQ_WQE_HDR_COMPLETE_FMT_MASK BIT(15) +#define CMDQ_WQE_HDR_DATA_FMT_MASK BIT(22) +#define CMDQ_WQE_HDR_COMPLETE_REQ_MASK BIT(23) +#define CMDQ_WQE_HDR_COMPLETE_SECT_LEN_MASK GENMASK(28, 27) +#define CMDQ_WQE_HDR_CTRL_LEN_MASK GENMASK(30, 29) +#define CMDQ_WQE_HDR_HW_BUSY_BIT_MASK BIT(31) +#define CMDQ_WQE_HDR_SET(val, member) \ + FIELD_PREP(CMDQ_WQE_HDR_##member##_MASK, val) +#define CMDQ_WQE_HDR_GET(val, member) \ + FIELD_GET(CMDQ_WQE_HDR_##member##_MASK, le32_to_cpu(val)) + +#define CMDQ_CTRL_PI_MASK GENMASK(15, 0) +#define CMDQ_CTRL_CMD_MASK GENMASK(23, 16) +#define CMDQ_CTRL_MOD_MASK GENMASK(28, 24) +#define CMDQ_CTRL_HW_BUSY_BIT_MASK BIT(31) +#define CMDQ_CTRL_SET(val, member) \ + FIELD_PREP(CMDQ_CTRL_##member##_MASK, val) +#define CMDQ_CTRL_GET(val, member) \ + FIELD_GET(CMDQ_CTRL_##member##_MASK, val) + +#define CMDQ_WQE_ERRCODE_VAL_MASK GENMASK(30, 0) +#define CMDQ_WQE_ERRCODE_GET(val, member) \ + FIELD_GET(CMDQ_WQE_ERRCODE_##member##_MASK, le32_to_cpu(val)) + +#define CMDQ_DB_INFO_HI_PROD_IDX_MASK GENMASK(7, 0) +#define CMDQ_DB_INFO_SET(val, member) \ + FIELD_PREP(CMDQ_DB_INFO_##member##_MASK, val) + +#define CMDQ_DB_HEAD_QUEUE_TYPE_MASK BIT(23) +#define CMDQ_DB_HEAD_CMDQ_TYPE_MASK GENMASK(26, 24) +#define CMDQ_DB_HEAD_SET(val, member) \ + FIELD_PREP(CMDQ_DB_HEAD_##member##_MASK, val) + +#define CMDQ_CEQE_TYPE_MASK GENMASK(2, 0) +#define CMDQ_CEQE_GET(val, member) \ + FIELD_GET(CMDQ_CEQE_##member##_MASK, le32_to_cpu(val)) + +#define CMDQ_WQE_HEADER(wqe) ((struct cmdq_header *)(wqe)) +#define CMDQ_WQE_COMPLETED(ctrl_info) \ + CMDQ_CTRL_GET(le32_to_cpu(ctrl_info), HW_BUSY_BIT) + +#define CMDQ_PFN(addr) ((addr) >> 12) + +/* cmdq work queue's chip logical address table is up to 512B */ +#define CMDQ_WQ_CLA_SIZE 512 + +/* Completion codes: send, direct sync, force stop */ +#define CMDQ_SEND_CMPT_CODE 10 +#define CMDQ_DIRECT_SYNC_CMPT_CODE 11 +#define CMDQ_FORCE_STOP_CMPT_CODE 12 + +enum cmdq_data_format { + CMDQ_DATA_SGE = 0, + CMDQ_DATA_DIRECT = 1, +}; + +enum cmdq_ctrl_sect_len { + CMDQ_CTRL_SECT_LEN = 1, + CMDQ_CTRL_DIRECT_SECT_LEN = 2, +}; + +enum cmdq_bufdesc_len { + CMDQ_BUFDESC_LCMD_LEN = 2, + CMDQ_BUFDESC_SCMD_LEN = 3, +}; + +enum cmdq_completion_format { + CMDQ_COMPLETE_DIRECT = 0, + CMDQ_COMPLETE_SGE = 1, +}; + +enum cmdq_cmd_type { + CMDQ_CMD_DIRECT_RESP, + CMDQ_CMD_SGE_RESP, +}; + +#define CMDQ_WQE_NUM_WQEBBS 1 + +static struct cmdq_wqe *cmdq_read_wqe(struct hinic3_wq *wq, u16 *ci) +{ + if (hinic3_wq_get_used(wq) == 0) + return NULL; + + *ci = wq->cons_idx & wq->idx_mask; + + return get_q_element(&wq->qpages, wq->cons_idx, NULL); +} + +struct hinic3_cmd_buf *hinic3_alloc_cmd_buf(struct hinic3_hwdev *hwdev) +{ + struct hinic3_cmd_buf *cmd_buf; + struct hinic3_cmdqs *cmdqs; + + cmdqs = hwdev->cmdqs; + + cmd_buf = kmalloc(sizeof(*cmd_buf), GFP_ATOMIC); + if (!cmd_buf) + return NULL; + + cmd_buf->buf = dma_pool_alloc(cmdqs->cmd_buf_pool, GFP_ATOMIC, + &cmd_buf->dma_addr); + if (!cmd_buf->buf) { + dev_err(hwdev->dev, "Failed to allocate cmdq cmd buf from the pool\n"); + goto err_free_cmd_buf; + } + + cmd_buf->size = cpu_to_le16(CMDQ_BUF_SIZE); + refcount_set(&cmd_buf->ref_cnt, 1); + + return cmd_buf; + +err_free_cmd_buf: + kfree(cmd_buf); + + return NULL; +} + +void hinic3_free_cmd_buf(struct hinic3_hwdev *hwdev, + struct hinic3_cmd_buf *cmd_buf) +{ + struct hinic3_cmdqs *cmdqs; + + if (!refcount_dec_and_test(&cmd_buf->ref_cnt)) + return; + + cmdqs = hwdev->cmdqs; + + dma_pool_free(cmdqs->cmd_buf_pool, cmd_buf->buf, cmd_buf->dma_addr); + kfree(cmd_buf); +} + +static void cmdq_clear_cmd_buf(struct hinic3_cmdq_cmd_info *cmd_info, + struct hinic3_hwdev *hwdev) +{ + if (cmd_info->buf_in) { + hinic3_free_cmd_buf(hwdev, cmd_info->buf_in); + cmd_info->buf_in = NULL; + } +} + +static void clear_wqe_complete_bit(struct hinic3_cmdq *cmdq, + struct cmdq_wqe *wqe, u16 ci) +{ + struct cmdq_header *hdr = CMDQ_WQE_HEADER(wqe); + __le32 header_info = hdr->header_info; + enum cmdq_data_format df; + struct cmdq_ctrl *ctrl; + + df = CMDQ_WQE_HDR_GET(header_info, DATA_FMT); + if (df == CMDQ_DATA_SGE) + ctrl = &wqe->wqe_lcmd.ctrl; + else + ctrl = &wqe->wqe_scmd.ctrl; + + /* clear HW busy bit */ + ctrl->ctrl_info = 0; + cmdq->cmd_infos[ci].cmd_type = HINIC3_CMD_TYPE_NONE; + wmb(); /* verify wqe is clear before updating ci */ + hinic3_wq_put_wqebbs(&cmdq->wq, CMDQ_WQE_NUM_WQEBBS); +} + +static void cmdq_update_cmd_status(struct hinic3_cmdq *cmdq, u16 prod_idx, + struct cmdq_wqe *wqe) +{ + struct hinic3_cmdq_cmd_info *cmd_info; + struct cmdq_wqe_lcmd *wqe_lcmd; + __le32 status_info; + + wqe_lcmd = &wqe->wqe_lcmd; + cmd_info = &cmdq->cmd_infos[prod_idx]; + if (cmd_info->errcode) { + status_info = wqe_lcmd->status.status_info; + *cmd_info->errcode = CMDQ_WQE_ERRCODE_GET(status_info, VAL); + } + + if (cmd_info->direct_resp) + *cmd_info->direct_resp = wqe_lcmd->completion.resp.direct.val; +} + +static void cmdq_sync_cmd_handler(struct hinic3_cmdq *cmdq, + struct cmdq_wqe *wqe, u16 ci) +{ + spin_lock(&cmdq->cmdq_lock); + cmdq_update_cmd_status(cmdq, ci, wqe); + if (cmdq->cmd_infos[ci].cmpt_code) { + *cmdq->cmd_infos[ci].cmpt_code = CMDQ_DIRECT_SYNC_CMPT_CODE; + cmdq->cmd_infos[ci].cmpt_code = NULL; + } + + /* Ensure that completion code has been updated before updating done */ + smp_wmb(); + if (cmdq->cmd_infos[ci].done) { + complete(cmdq->cmd_infos[ci].done); + cmdq->cmd_infos[ci].done = NULL; + } + spin_unlock(&cmdq->cmdq_lock); + + cmdq_clear_cmd_buf(&cmdq->cmd_infos[ci], cmdq->hwdev); + clear_wqe_complete_bit(cmdq, wqe, ci); +} + +void hinic3_cmdq_ceq_handler(struct hinic3_hwdev *hwdev, __le32 ceqe_data) +{ + enum hinic3_cmdq_type cmdq_type = CMDQ_CEQE_GET(ceqe_data, TYPE); + struct hinic3_cmdqs *cmdqs = hwdev->cmdqs; + struct hinic3_cmdq_cmd_info *cmd_info; + struct cmdq_wqe_lcmd *wqe_lcmd; + struct hinic3_cmdq *cmdq; + struct cmdq_wqe *wqe; + __le32 ctrl_info; + u16 ci; + + if (unlikely(cmdq_type >= ARRAY_SIZE(cmdqs->cmdq))) + return; + + cmdq = &cmdqs->cmdq[cmdq_type]; + while ((wqe = cmdq_read_wqe(&cmdq->wq, &ci)) != NULL) { + cmd_info = &cmdq->cmd_infos[ci]; + switch (cmd_info->cmd_type) { + case HINIC3_CMD_TYPE_NONE: + return; + case HINIC3_CMD_TYPE_TIMEOUT: + dev_warn(hwdev->dev, "Cmdq timeout, q_id: %u, ci: %u\n", + cmdq_type, ci); + fallthrough; + case HINIC3_CMD_TYPE_FAKE_TIMEOUT: + cmdq_clear_cmd_buf(cmd_info, hwdev); + clear_wqe_complete_bit(cmdq, wqe, ci); + break; + default: + /* only arm bit is using scmd wqe, + * the other wqe is lcmd + */ + wqe_lcmd = &wqe->wqe_lcmd; + ctrl_info = wqe_lcmd->ctrl.ctrl_info; + if (!CMDQ_WQE_COMPLETED(ctrl_info)) + return; + + dma_rmb(); + /* For FORCE_STOP cmd_type, we also need to wait for + * the firmware processing to complete to prevent the + * firmware from accessing the released cmd_buf + */ + if (cmd_info->cmd_type == HINIC3_CMD_TYPE_FORCE_STOP) { + cmdq_clear_cmd_buf(cmd_info, hwdev); + clear_wqe_complete_bit(cmdq, wqe, ci); + } else { + cmdq_sync_cmd_handler(cmdq, wqe, ci); + } + + break; + } + } +} + +static int wait_cmdqs_enable(struct hinic3_cmdqs *cmdqs) +{ + unsigned long end; + + end = jiffies + msecs_to_jiffies(CMDQ_ENABLE_WAIT_TIMEOUT); + do { + if (cmdqs->status & HINIC3_CMDQ_ENABLE) + return 0; + usleep_range(1000, 2000); + } while (time_before(jiffies, end) && !cmdqs->disable_flag); + + cmdqs->disable_flag = 1; + + return -EBUSY; +} + +static void cmdq_set_completion(struct cmdq_completion *complete, + struct hinic3_cmd_buf *buf_out) +{ + struct hinic3_sge *sge = &complete->resp.sge; + + hinic3_set_sge(sge, buf_out->dma_addr, cpu_to_le32(CMDQ_BUF_SIZE)); +} + +static struct cmdq_wqe *cmdq_get_wqe(struct hinic3_wq *wq, u16 *pi) +{ + if (!hinic3_wq_free_wqebbs(wq)) + return NULL; + + return hinic3_wq_get_one_wqebb(wq, pi); +} + +static void cmdq_set_lcmd_bufdesc(struct cmdq_wqe_lcmd *wqe, + struct hinic3_cmd_buf *buf_in) +{ + hinic3_set_sge(&wqe->buf_desc.sge, buf_in->dma_addr, + (__force __le32)buf_in->size); +} + +static void cmdq_set_db(struct hinic3_cmdq *cmdq, + enum hinic3_cmdq_type cmdq_type, u16 prod_idx) +{ + u8 __iomem *db_base = cmdq->hwdev->cmdqs->cmdqs_db_base; + u16 db_ofs = (prod_idx & 0xFF) << 3; + struct cmdq_db db; + + db.db_info = cpu_to_le32(CMDQ_DB_INFO_SET(prod_idx >> 8, HI_PROD_IDX)); + db.db_head = cpu_to_le32(CMDQ_DB_HEAD_SET(1, QUEUE_TYPE) | + CMDQ_DB_HEAD_SET(cmdq_type, CMDQ_TYPE)); + writeq(*(u64 *)&db, db_base + db_ofs); +} + +static void cmdq_wqe_fill(struct cmdq_wqe *hw_wqe, + const struct cmdq_wqe *shadow_wqe) +{ + const struct cmdq_header *src = (struct cmdq_header *)shadow_wqe; + struct cmdq_header *dst = (struct cmdq_header *)hw_wqe; + size_t len; + + len = sizeof(struct cmdq_wqe) - sizeof(struct cmdq_header); + memcpy(dst + 1, src + 1, len); + /* Ensure buffer len before updating header */ + wmb(); + WRITE_ONCE(*dst, *src); +} + +static void cmdq_prepare_wqe_ctrl(struct cmdq_wqe *wqe, u8 wrapped, + u8 mod, u8 cmd, u16 prod_idx, + enum cmdq_completion_format complete_format, + enum cmdq_data_format data_format, + enum cmdq_bufdesc_len buf_len) +{ + struct cmdq_header *hdr = CMDQ_WQE_HEADER(wqe); + enum cmdq_ctrl_sect_len ctrl_len; + struct cmdq_wqe_lcmd *wqe_lcmd; + struct cmdq_wqe_scmd *wqe_scmd; + struct cmdq_ctrl *ctrl; + + if (data_format == CMDQ_DATA_SGE) { + wqe_lcmd = &wqe->wqe_lcmd; + wqe_lcmd->status.status_info = 0; + ctrl = &wqe_lcmd->ctrl; + ctrl_len = CMDQ_CTRL_SECT_LEN; + } else { + wqe_scmd = &wqe->wqe_scmd; + wqe_scmd->status.status_info = 0; + ctrl = &wqe_scmd->ctrl; + ctrl_len = CMDQ_CTRL_DIRECT_SECT_LEN; + } + + ctrl->ctrl_info = + cpu_to_le32(CMDQ_CTRL_SET(prod_idx, PI) | + CMDQ_CTRL_SET(cmd, CMD) | + CMDQ_CTRL_SET(mod, MOD)); + + hdr->header_info = + cpu_to_le32(CMDQ_WQE_HDR_SET(buf_len, BUFDESC_LEN) | + CMDQ_WQE_HDR_SET(complete_format, COMPLETE_FMT) | + CMDQ_WQE_HDR_SET(data_format, DATA_FMT) | + CMDQ_WQE_HDR_SET(1, COMPLETE_REQ) | + CMDQ_WQE_HDR_SET(3, COMPLETE_SECT_LEN) | + CMDQ_WQE_HDR_SET(ctrl_len, CTRL_LEN) | + CMDQ_WQE_HDR_SET(wrapped, HW_BUSY_BIT)); +} + +static void cmdq_set_lcmd_wqe(struct cmdq_wqe *wqe, + enum cmdq_cmd_type cmd_type, + struct hinic3_cmd_buf *buf_in, + struct hinic3_cmd_buf *buf_out, + u8 wrapped, u8 mod, u8 cmd, u16 prod_idx) +{ + enum cmdq_completion_format complete_format = CMDQ_COMPLETE_DIRECT; + struct cmdq_wqe_lcmd *wqe_lcmd = &wqe->wqe_lcmd; + + switch (cmd_type) { + case CMDQ_CMD_DIRECT_RESP: + wqe_lcmd->completion.resp.direct.val = 0; + break; + case CMDQ_CMD_SGE_RESP: + if (buf_out) { + complete_format = CMDQ_COMPLETE_SGE; + cmdq_set_completion(&wqe_lcmd->completion, buf_out); + } + break; + } + + cmdq_prepare_wqe_ctrl(wqe, wrapped, mod, cmd, prod_idx, complete_format, + CMDQ_DATA_SGE, CMDQ_BUFDESC_LCMD_LEN); + cmdq_set_lcmd_bufdesc(wqe_lcmd, buf_in); +} + +static int hinic3_cmdq_sync_timeout_check(struct hinic3_cmdq *cmdq, + struct cmdq_wqe *wqe, u16 pi) +{ + struct cmdq_wqe_lcmd *wqe_lcmd; + struct cmdq_ctrl *ctrl; + __le32 ctrl_info; + + wqe_lcmd = &wqe->wqe_lcmd; + ctrl = &wqe_lcmd->ctrl; + ctrl_info = ctrl->ctrl_info; + if (!CMDQ_WQE_COMPLETED(ctrl_info)) { + dev_dbg(cmdq->hwdev->dev, "Cmdq sync command check busy bit not set\n"); + return -EFAULT; + } + cmdq_update_cmd_status(cmdq, pi, wqe); + + return 0; +} + +static void clear_cmd_info(struct hinic3_cmdq_cmd_info *cmd_info, + const struct hinic3_cmdq_cmd_info *saved_cmd_info) +{ + if (cmd_info->errcode == saved_cmd_info->errcode) + cmd_info->errcode = NULL; + + if (cmd_info->done == saved_cmd_info->done) + cmd_info->done = NULL; + + if (cmd_info->direct_resp == saved_cmd_info->direct_resp) + cmd_info->direct_resp = NULL; +} + +static int wait_cmdq_sync_cmd_completion(struct hinic3_cmdq *cmdq, + struct hinic3_cmdq_cmd_info *cmd_info, + struct hinic3_cmdq_cmd_info *saved_cmd_info, + u64 curr_msg_id, u16 curr_prod_idx, + struct cmdq_wqe *curr_wqe, + u32 timeout) +{ + ulong timeo = msecs_to_jiffies(timeout); + int err; + + if (wait_for_completion_timeout(saved_cmd_info->done, timeo)) + return 0; + + spin_lock_bh(&cmdq->cmdq_lock); + if (cmd_info->cmpt_code == saved_cmd_info->cmpt_code) + cmd_info->cmpt_code = NULL; + + if (*saved_cmd_info->cmpt_code == CMDQ_DIRECT_SYNC_CMPT_CODE) { + dev_dbg(cmdq->hwdev->dev, "Cmdq direct sync command has been completed\n"); + spin_unlock_bh(&cmdq->cmdq_lock); + return 0; + } + + if (curr_msg_id == cmd_info->cmdq_msg_id) { + err = hinic3_cmdq_sync_timeout_check(cmdq, curr_wqe, + curr_prod_idx); + if (err) + cmd_info->cmd_type = HINIC3_CMD_TYPE_TIMEOUT; + else + cmd_info->cmd_type = HINIC3_CMD_TYPE_FAKE_TIMEOUT; + } else { + err = -ETIMEDOUT; + dev_err(cmdq->hwdev->dev, + "Cmdq sync command current msg id mismatch cmd_info msg id\n"); + } + + clear_cmd_info(cmd_info, saved_cmd_info); + spin_unlock_bh(&cmdq->cmdq_lock); + + return err; +} + +static int cmdq_sync_cmd_direct_resp(struct hinic3_cmdq *cmdq, u8 mod, u8 cmd, + struct hinic3_cmd_buf *buf_in, + __le64 *out_param) +{ + struct hinic3_cmdq_cmd_info *cmd_info, saved_cmd_info; + int cmpt_code = CMDQ_SEND_CMPT_CODE; + struct cmdq_wqe *curr_wqe, wqe = {}; + struct hinic3_wq *wq = &cmdq->wq; + u16 curr_prod_idx, next_prod_idx; + struct completion done; + u64 curr_msg_id; + int errcode; + u8 wrapped; + int err; + + spin_lock_bh(&cmdq->cmdq_lock); + curr_wqe = cmdq_get_wqe(wq, &curr_prod_idx); + if (!curr_wqe) { + spin_unlock_bh(&cmdq->cmdq_lock); + return -EBUSY; + } + + wrapped = cmdq->wrapped; + next_prod_idx = curr_prod_idx + CMDQ_WQE_NUM_WQEBBS; + if (next_prod_idx >= wq->q_depth) { + cmdq->wrapped ^= 1; + next_prod_idx -= wq->q_depth; + } + + cmd_info = &cmdq->cmd_infos[curr_prod_idx]; + init_completion(&done); + refcount_inc(&buf_in->ref_cnt); + cmd_info->cmd_type = HINIC3_CMD_TYPE_DIRECT_RESP; + cmd_info->done = &done; + cmd_info->errcode = &errcode; + cmd_info->direct_resp = out_param; + cmd_info->cmpt_code = &cmpt_code; + cmd_info->buf_in = buf_in; + saved_cmd_info = *cmd_info; + cmdq_set_lcmd_wqe(&wqe, CMDQ_CMD_DIRECT_RESP, buf_in, NULL, + wrapped, mod, cmd, curr_prod_idx); + + cmdq_wqe_fill(curr_wqe, &wqe); + (cmd_info->cmdq_msg_id)++; + curr_msg_id = cmd_info->cmdq_msg_id; + cmdq_set_db(cmdq, HINIC3_CMDQ_SYNC, next_prod_idx); + spin_unlock_bh(&cmdq->cmdq_lock); + + err = wait_cmdq_sync_cmd_completion(cmdq, cmd_info, &saved_cmd_info, + curr_msg_id, curr_prod_idx, + curr_wqe, CMDQ_CMD_TIMEOUT); + if (err) { + dev_err(cmdq->hwdev->dev, + "Cmdq sync command timeout, mod: %u, cmd: %u, prod idx: 0x%x\n", + mod, cmd, curr_prod_idx); + err = -ETIMEDOUT; + } + + if (cmpt_code == CMDQ_FORCE_STOP_CMPT_CODE) { + dev_dbg(cmdq->hwdev->dev, + "Force stop cmdq cmd, mod: %u, cmd: %u\n", mod, cmd); + err = -EAGAIN; + } + + smp_rmb(); /* read error code after completion */ + + return err ? err : errcode; +} + +int hinic3_cmdq_direct_resp(struct hinic3_hwdev *hwdev, u8 mod, u8 cmd, + struct hinic3_cmd_buf *buf_in, __le64 *out_param) +{ + struct hinic3_cmdqs *cmdqs; + int err; + + cmdqs = hwdev->cmdqs; + err = wait_cmdqs_enable(cmdqs); + if (err) { + dev_err(hwdev->dev, "Cmdq is disabled\n"); + return err; + } + + err = cmdq_sync_cmd_direct_resp(&cmdqs->cmdq[HINIC3_CMDQ_SYNC], + mod, cmd, buf_in, out_param); + + return err; +} + +static void cmdq_init_queue_ctxt(struct hinic3_hwdev *hwdev, u8 cmdq_id, + struct comm_cmdq_ctxt_info *ctxt_info) +{ + const struct hinic3_cmdqs *cmdqs; + u64 cmdq_first_block_paddr, pfn; + const struct hinic3_wq *wq; + + cmdqs = hwdev->cmdqs; + wq = &cmdqs->cmdq[cmdq_id].wq; + pfn = CMDQ_PFN(hinic3_wq_get_first_wqe_page_addr(wq)); + + ctxt_info->curr_wqe_page_pfn = + cpu_to_le64(CMDQ_CTXT_SET(1, HW_BUSY_BIT) | + CMDQ_CTXT_SET(1, CEQ_EN) | + CMDQ_CTXT_SET(1, CEQ_ARM) | + CMDQ_CTXT_SET(0, EQ_ID) | + CMDQ_CTXT_SET(pfn, CURR_WQE_PAGE_PFN)); + + if (!hinic3_wq_is_0_level_cla(wq)) { + cmdq_first_block_paddr = cmdqs->wq_block_paddr; + pfn = CMDQ_PFN(cmdq_first_block_paddr); + } + + ctxt_info->wq_block_pfn = cpu_to_le64(CMDQ_CTXT_SET(wq->cons_idx, CI) | + CMDQ_CTXT_SET(pfn, WQ_BLOCK_PFN)); +} + +static int init_cmdq(struct hinic3_cmdq *cmdq, struct hinic3_hwdev *hwdev, + enum hinic3_cmdq_type q_type) +{ + int err; + + cmdq->cmdq_type = q_type; + cmdq->wrapped = 1; + cmdq->hwdev = hwdev; + + spin_lock_init(&cmdq->cmdq_lock); + + cmdq->cmd_infos = kcalloc(cmdq->wq.q_depth, sizeof(*cmdq->cmd_infos), + GFP_KERNEL); + if (!cmdq->cmd_infos) { + err = -ENOMEM; + return err; + } + + return 0; +} + +static int hinic3_set_cmdq_ctxt(struct hinic3_hwdev *hwdev, u8 cmdq_id) +{ + struct comm_cmd_set_cmdq_ctxt cmdq_ctxt = {}; + struct mgmt_msg_params msg_params = {}; + int err; + + cmdq_init_queue_ctxt(hwdev, cmdq_id, &cmdq_ctxt.ctxt); + cmdq_ctxt.func_id = hinic3_global_func_id(hwdev); + cmdq_ctxt.cmdq_id = cmdq_id; + + mgmt_msg_params_init_default(&msg_params, &cmdq_ctxt, + sizeof(cmdq_ctxt)); + + err = hinic3_send_mbox_to_mgmt(hwdev, MGMT_MOD_COMM, + COMM_CMD_SET_CMDQ_CTXT, &msg_params); + if (err || cmdq_ctxt.head.status) { + dev_err(hwdev->dev, "Failed to set cmdq ctxt, err: %d, status: 0x%x\n", + err, cmdq_ctxt.head.status); + return -EFAULT; + } + + return 0; +} + +static int hinic3_set_cmdq_ctxts(struct hinic3_hwdev *hwdev) +{ + struct hinic3_cmdqs *cmdqs = hwdev->cmdqs; + u8 cmdq_type; + int err; + + for (cmdq_type = 0; cmdq_type < cmdqs->cmdq_num; cmdq_type++) { + err = hinic3_set_cmdq_ctxt(hwdev, cmdq_type); + if (err) + return err; + } + + cmdqs->status |= HINIC3_CMDQ_ENABLE; + cmdqs->disable_flag = 0; + + return 0; +} + +static int create_cmdq_wq(struct hinic3_hwdev *hwdev, + struct hinic3_cmdqs *cmdqs) +{ + u8 cmdq_type; + int err; + + for (cmdq_type = 0; cmdq_type < cmdqs->cmdq_num; cmdq_type++) { + err = hinic3_wq_create(hwdev, &cmdqs->cmdq[cmdq_type].wq, + CMDQ_DEPTH, CMDQ_WQEBB_SIZE); + if (err) { + dev_err(hwdev->dev, "Failed to create cmdq wq\n"); + goto err_destroy_wq; + } + } + + /* 1-level Chip Logical Address (CLA) must put all + * cmdq's wq page addr in one wq block + */ + if (!hinic3_wq_is_0_level_cla(&cmdqs->cmdq[HINIC3_CMDQ_SYNC].wq)) { + if (cmdqs->cmdq[HINIC3_CMDQ_SYNC].wq.qpages.num_pages > + CMDQ_WQ_CLA_SIZE / sizeof(u64)) { + err = -EINVAL; + dev_err(hwdev->dev, + "Cmdq number of wq pages exceeds limit: %lu\n", + CMDQ_WQ_CLA_SIZE / sizeof(u64)); + goto err_destroy_wq; + } + + cmdqs->wq_block_vaddr = + dma_alloc_coherent(hwdev->dev, HINIC3_MIN_PAGE_SIZE, + &cmdqs->wq_block_paddr, GFP_KERNEL); + if (!cmdqs->wq_block_vaddr) { + err = -ENOMEM; + goto err_destroy_wq; + } + + for (cmdq_type = 0; cmdq_type < cmdqs->cmdq_num; cmdq_type++) + memcpy((u8 *)cmdqs->wq_block_vaddr + + CMDQ_WQ_CLA_SIZE * cmdq_type, + cmdqs->cmdq[cmdq_type].wq.wq_block_vaddr, + cmdqs->cmdq[cmdq_type].wq.qpages.num_pages * + sizeof(__be64)); + } + + return 0; + +err_destroy_wq: + while (cmdq_type > 0) { + cmdq_type--; + hinic3_wq_destroy(hwdev, &cmdqs->cmdq[cmdq_type].wq); + } + + return err; +} + +static void destroy_cmdq_wq(struct hinic3_hwdev *hwdev, + struct hinic3_cmdqs *cmdqs) +{ + u8 cmdq_type; + + if (cmdqs->wq_block_vaddr) + dma_free_coherent(hwdev->dev, HINIC3_MIN_PAGE_SIZE, + cmdqs->wq_block_vaddr, cmdqs->wq_block_paddr); + + for (cmdq_type = 0; cmdq_type < cmdqs->cmdq_num; cmdq_type++) + hinic3_wq_destroy(hwdev, &cmdqs->cmdq[cmdq_type].wq); +} + +static int init_cmdqs(struct hinic3_hwdev *hwdev) +{ + struct hinic3_cmdqs *cmdqs; + + cmdqs = kzalloc(sizeof(*cmdqs), GFP_KERNEL); + if (!cmdqs) + return -ENOMEM; + + hwdev->cmdqs = cmdqs; + cmdqs->hwdev = hwdev; + cmdqs->cmdq_num = hwdev->max_cmdq; + + cmdqs->cmd_buf_pool = dma_pool_create("hinic3_cmdq", hwdev->dev, + CMDQ_BUF_SIZE, CMDQ_BUF_SIZE, 0); + if (!cmdqs->cmd_buf_pool) { + dev_err(hwdev->dev, "Failed to create cmdq buffer pool\n"); + kfree(cmdqs); + return -ENOMEM; + } + + return 0; +} + +static void cmdq_flush_sync_cmd(struct hinic3_cmdq_cmd_info *cmd_info) +{ + if (cmd_info->cmd_type != HINIC3_CMD_TYPE_DIRECT_RESP) + return; + + cmd_info->cmd_type = HINIC3_CMD_TYPE_FORCE_STOP; + + if (cmd_info->cmpt_code && + *cmd_info->cmpt_code == CMDQ_SEND_CMPT_CODE) + *cmd_info->cmpt_code = CMDQ_FORCE_STOP_CMPT_CODE; + + if (cmd_info->done) { + complete(cmd_info->done); + cmd_info->done = NULL; + cmd_info->cmpt_code = NULL; + cmd_info->direct_resp = NULL; + cmd_info->errcode = NULL; + } +} + +static void hinic3_cmdq_flush_cmd(struct hinic3_cmdq *cmdq) +{ + struct hinic3_cmdq_cmd_info *cmd_info; + u16 ci; + + spin_lock_bh(&cmdq->cmdq_lock); + while (cmdq_read_wqe(&cmdq->wq, &ci)) { + hinic3_wq_put_wqebbs(&cmdq->wq, CMDQ_WQE_NUM_WQEBBS); + cmd_info = &cmdq->cmd_infos[ci]; + if (cmd_info->cmd_type == HINIC3_CMD_TYPE_DIRECT_RESP) + cmdq_flush_sync_cmd(cmd_info); + } + spin_unlock_bh(&cmdq->cmdq_lock); +} + +void hinic3_cmdq_flush_sync_cmd(struct hinic3_hwdev *hwdev) +{ + struct hinic3_cmdq *cmdq; + u16 wqe_cnt, wqe_idx, i; + struct hinic3_wq *wq; + + cmdq = &hwdev->cmdqs->cmdq[HINIC3_CMDQ_SYNC]; + spin_lock_bh(&cmdq->cmdq_lock); + wq = &cmdq->wq; + wqe_cnt = hinic3_wq_get_used(wq); + for (i = 0; i < wqe_cnt; i++) { + wqe_idx = (wq->cons_idx + i) & wq->idx_mask; + cmdq_flush_sync_cmd(cmdq->cmd_infos + wqe_idx); + } + spin_unlock_bh(&cmdq->cmdq_lock); +} + +static void hinic3_cmdq_reset_all_cmd_buf(struct hinic3_cmdq *cmdq) +{ + u16 i; + + for (i = 0; i < cmdq->wq.q_depth; i++) + cmdq_clear_cmd_buf(&cmdq->cmd_infos[i], cmdq->hwdev); +} + +int hinic3_reinit_cmdq_ctxts(struct hinic3_hwdev *hwdev) +{ + struct hinic3_cmdqs *cmdqs = hwdev->cmdqs; + u8 cmdq_type; + + for (cmdq_type = 0; cmdq_type < cmdqs->cmdq_num; cmdq_type++) { + hinic3_cmdq_flush_cmd(&cmdqs->cmdq[cmdq_type]); + hinic3_cmdq_reset_all_cmd_buf(&cmdqs->cmdq[cmdq_type]); + cmdqs->cmdq[cmdq_type].wrapped = 1; + hinic3_wq_reset(&cmdqs->cmdq[cmdq_type].wq); + } + + return hinic3_set_cmdq_ctxts(hwdev); +} + +int hinic3_cmdqs_init(struct hinic3_hwdev *hwdev) +{ + struct hinic3_cmdqs *cmdqs; + void __iomem *db_base; + u8 cmdq_type; + int err; + + err = init_cmdqs(hwdev); + if (err) + goto err_out; + + cmdqs = hwdev->cmdqs; + err = create_cmdq_wq(hwdev, cmdqs); + if (err) + goto err_free_cmdqs; + + err = hinic3_alloc_db_addr(hwdev, &db_base, NULL); + if (err) { + dev_err(hwdev->dev, "Failed to allocate doorbell address\n"); + goto err_destroy_cmdq_wq; + } + cmdqs->cmdqs_db_base = db_base; + + for (cmdq_type = 0; cmdq_type < cmdqs->cmdq_num; cmdq_type++) { + err = init_cmdq(&cmdqs->cmdq[cmdq_type], hwdev, cmdq_type); + if (err) { + dev_err(hwdev->dev, + "Failed to initialize cmdq type : %d\n", + cmdq_type); + goto err_free_cmd_infos; + } + } + + err = hinic3_set_cmdq_ctxts(hwdev); + if (err) + goto err_free_cmd_infos; + + return 0; + +err_free_cmd_infos: + while (cmdq_type > 0) { + cmdq_type--; + kfree(cmdqs->cmdq[cmdq_type].cmd_infos); + } + + hinic3_free_db_addr(hwdev, cmdqs->cmdqs_db_base); + +err_destroy_cmdq_wq: + destroy_cmdq_wq(hwdev, cmdqs); + +err_free_cmdqs: + dma_pool_destroy(cmdqs->cmd_buf_pool); + kfree(cmdqs); + +err_out: + return err; +} + +void hinic3_cmdqs_free(struct hinic3_hwdev *hwdev) +{ + struct hinic3_cmdqs *cmdqs = hwdev->cmdqs; + u8 cmdq_type; + + cmdqs->status &= ~HINIC3_CMDQ_ENABLE; + + for (cmdq_type = 0; cmdq_type < cmdqs->cmdq_num; cmdq_type++) { + hinic3_cmdq_flush_cmd(&cmdqs->cmdq[cmdq_type]); + hinic3_cmdq_reset_all_cmd_buf(&cmdqs->cmdq[cmdq_type]); + kfree(cmdqs->cmdq[cmdq_type].cmd_infos); + } + + hinic3_free_db_addr(hwdev, cmdqs->cmdqs_db_base); + destroy_cmdq_wq(hwdev, cmdqs); + dma_pool_destroy(cmdqs->cmd_buf_pool); + kfree(cmdqs); +} + +bool hinic3_cmdq_idle(struct hinic3_cmdq *cmdq) +{ + return hinic3_wq_get_used(&cmdq->wq) == 0; +} diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_cmdq.h b/drivers/net/ethernet/huawei/hinic3/hinic3_cmdq.h new file mode 100644 index 000000000000..f99c386a2780 --- /dev/null +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_cmdq.h @@ -0,0 +1,156 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. */ + +#ifndef _HINIC3_CMDQ_H_ +#define _HINIC3_CMDQ_H_ + +#include <linux/dmapool.h> + +#include "hinic3_hw_intf.h" +#include "hinic3_wq.h" + +#define CMDQ_DEPTH 4096 + +struct cmdq_db { + __le32 db_head; + __le32 db_info; +}; + +/* hw defined cmdq wqe header */ +struct cmdq_header { + __le32 header_info; + __le32 saved_data; +}; + +struct cmdq_lcmd_bufdesc { + struct hinic3_sge sge; + __le64 rsvd2; + __le64 rsvd3; +}; + +struct cmdq_status { + __le32 status_info; +}; + +struct cmdq_ctrl { + __le32 ctrl_info; +}; + +struct cmdq_direct_resp { + __le64 val; + __le64 rsvd; +}; + +struct cmdq_completion { + union { + struct hinic3_sge sge; + struct cmdq_direct_resp direct; + } resp; +}; + +struct cmdq_wqe_scmd { + struct cmdq_header header; + __le64 rsvd3; + struct cmdq_status status; + struct cmdq_ctrl ctrl; + struct cmdq_completion completion; + __le32 rsvd10[6]; +}; + +struct cmdq_wqe_lcmd { + struct cmdq_header header; + struct cmdq_status status; + struct cmdq_ctrl ctrl; + struct cmdq_completion completion; + struct cmdq_lcmd_bufdesc buf_desc; +}; + +struct cmdq_wqe { + union { + struct cmdq_wqe_scmd wqe_scmd; + struct cmdq_wqe_lcmd wqe_lcmd; + }; +}; + +static_assert(sizeof(struct cmdq_wqe) == 64); + +enum hinic3_cmdq_type { + HINIC3_CMDQ_SYNC = 0, + HINIC3_MAX_CMDQ_TYPES = 4 +}; + +enum hinic3_cmdq_status { + HINIC3_CMDQ_ENABLE = BIT(0), +}; + +enum hinic3_cmdq_cmd_type { + HINIC3_CMD_TYPE_NONE, + HINIC3_CMD_TYPE_DIRECT_RESP, + HINIC3_CMD_TYPE_FAKE_TIMEOUT, + HINIC3_CMD_TYPE_TIMEOUT, + HINIC3_CMD_TYPE_FORCE_STOP, +}; + +struct hinic3_cmd_buf { + void *buf; + dma_addr_t dma_addr; + __le16 size; + refcount_t ref_cnt; +}; + +struct hinic3_cmdq_cmd_info { + enum hinic3_cmdq_cmd_type cmd_type; + struct completion *done; + int *errcode; + /* completion code */ + int *cmpt_code; + __le64 *direct_resp; + u64 cmdq_msg_id; + struct hinic3_cmd_buf *buf_in; +}; + +struct hinic3_cmdq { + struct hinic3_wq wq; + enum hinic3_cmdq_type cmdq_type; + u8 wrapped; + /* synchronize command submission with completions via event queue */ + spinlock_t cmdq_lock; + struct hinic3_cmdq_cmd_info *cmd_infos; + struct hinic3_hwdev *hwdev; +}; + +struct hinic3_cmdqs { + struct hinic3_hwdev *hwdev; + struct hinic3_cmdq cmdq[HINIC3_MAX_CMDQ_TYPES]; + struct dma_pool *cmd_buf_pool; + /* doorbell area */ + u8 __iomem *cmdqs_db_base; + + /* When command queue uses multiple memory pages (1-level CLA), this + * block will hold aggregated indirection table for all command queues + * of cmdqs. Not used for small cmdq (0-level CLA). + */ + dma_addr_t wq_block_paddr; + void *wq_block_vaddr; + + u32 status; + u32 disable_flag; + u8 cmdq_num; +}; + +int hinic3_cmdqs_init(struct hinic3_hwdev *hwdev); +void hinic3_cmdqs_free(struct hinic3_hwdev *hwdev); + +struct hinic3_cmd_buf *hinic3_alloc_cmd_buf(struct hinic3_hwdev *hwdev); +void hinic3_free_cmd_buf(struct hinic3_hwdev *hwdev, + struct hinic3_cmd_buf *cmd_buf); +void hinic3_cmdq_ceq_handler(struct hinic3_hwdev *hwdev, __le32 ceqe_data); + +int hinic3_cmdq_direct_resp(struct hinic3_hwdev *hwdev, u8 mod, u8 cmd, + struct hinic3_cmd_buf *buf_in, __le64 *out_param); + +void hinic3_cmdq_flush_sync_cmd(struct hinic3_hwdev *hwdev); +int hinic3_reinit_cmdq_ctxts(struct hinic3_hwdev *hwdev); +bool hinic3_cmdq_idle(struct hinic3_cmdq *cmdq); + +#endif diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_common.c b/drivers/net/ethernet/huawei/hinic3/hinic3_common.c index 0aa42068728c..fe4778d152cf 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_common.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_common.c @@ -3,6 +3,7 @@ #include <linux/delay.h> #include <linux/dma-mapping.h> +#include <linux/iopoll.h> #include "hinic3_common.h" @@ -51,3 +52,25 @@ void hinic3_dma_free_coherent_align(struct device *dev, dma_free_coherent(dev, mem_align->real_size, mem_align->ori_vaddr, mem_align->ori_paddr); } + +int hinic3_wait_for_timeout(void *priv_data, wait_cpl_handler handler, + u32 wait_total_ms, u32 wait_once_us) +{ + enum hinic3_wait_return ret; + int err; + + err = read_poll_timeout(handler, ret, ret == HINIC3_WAIT_PROCESS_CPL, + wait_once_us, wait_total_ms * USEC_PER_MSEC, + false, priv_data); + + return err; +} + +/* Data provided to/by cmdq is arranged in structs with little endian fields but + * every dword (32bits) should be swapped since HW swaps it again when it + * copies it from/to host memory. + */ +void hinic3_cmdq_buf_swab32(void *data, int len) +{ + swab32_array(data, len / sizeof(u32)); +} diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_common.h b/drivers/net/ethernet/huawei/hinic3/hinic3_common.h index bb795dace04c..a8fabfae90fb 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_common.h +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_common.h @@ -18,10 +18,37 @@ struct hinic3_dma_addr_align { dma_addr_t align_paddr; }; +enum hinic3_wait_return { + HINIC3_WAIT_PROCESS_CPL = 0, + HINIC3_WAIT_PROCESS_WAITING = 1, +}; + +struct hinic3_sge { + __le32 hi_addr; + __le32 lo_addr; + __le32 len; + __le32 rsvd; +}; + +static inline void hinic3_set_sge(struct hinic3_sge *sge, dma_addr_t addr, + __le32 len) +{ + sge->hi_addr = cpu_to_le32(upper_32_bits(addr)); + sge->lo_addr = cpu_to_le32(lower_32_bits(addr)); + sge->len = len; + sge->rsvd = 0; +} + int hinic3_dma_zalloc_coherent_align(struct device *dev, u32 size, u32 align, gfp_t flag, struct hinic3_dma_addr_align *mem_align); void hinic3_dma_free_coherent_align(struct device *dev, struct hinic3_dma_addr_align *mem_align); +typedef enum hinic3_wait_return (*wait_cpl_handler)(void *priv_data); +int hinic3_wait_for_timeout(void *priv_data, wait_cpl_handler handler, + u32 wait_total_ms, u32 wait_once_us); + +void hinic3_cmdq_buf_swab32(void *data, int len); + #endif diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_csr.h b/drivers/net/ethernet/huawei/hinic3/hinic3_csr.h new file mode 100644 index 000000000000..e7417e8efa99 --- /dev/null +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_csr.h @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. */ + +#ifndef _HINIC3_CSR_H_ +#define _HINIC3_CSR_H_ + +#define HINIC3_CFG_REGS_FLAG 0x40000000 +#define HINIC3_REGS_FLAG_MASK 0x3FFFFFFF + +#define HINIC3_VF_CFG_REG_OFFSET 0x2000 + +/* HW interface registers */ +#define HINIC3_CSR_FUNC_ATTR0_ADDR (HINIC3_CFG_REGS_FLAG + 0x0) +#define HINIC3_CSR_FUNC_ATTR1_ADDR (HINIC3_CFG_REGS_FLAG + 0x4) +#define HINIC3_CSR_FUNC_ATTR2_ADDR (HINIC3_CFG_REGS_FLAG + 0x8) +#define HINIC3_CSR_FUNC_ATTR3_ADDR (HINIC3_CFG_REGS_FLAG + 0xC) +#define HINIC3_CSR_FUNC_ATTR4_ADDR (HINIC3_CFG_REGS_FLAG + 0x10) +#define HINIC3_CSR_FUNC_ATTR5_ADDR (HINIC3_CFG_REGS_FLAG + 0x14) +#define HINIC3_CSR_FUNC_ATTR6_ADDR (HINIC3_CFG_REGS_FLAG + 0x18) + +#define HINIC3_FUNC_CSR_MAILBOX_DATA_OFF 0x80 +#define HINIC3_FUNC_CSR_MAILBOX_CONTROL_OFF (HINIC3_CFG_REGS_FLAG + 0x0100) +#define HINIC3_FUNC_CSR_MAILBOX_INT_OFF (HINIC3_CFG_REGS_FLAG + 0x0104) +#define HINIC3_FUNC_CSR_MAILBOX_RESULT_H_OFF (HINIC3_CFG_REGS_FLAG + 0x0108) +#define HINIC3_FUNC_CSR_MAILBOX_RESULT_L_OFF (HINIC3_CFG_REGS_FLAG + 0x010C) + +#define HINIC3_CSR_DMA_ATTR_TBL_ADDR (HINIC3_CFG_REGS_FLAG + 0x380) +#define HINIC3_CSR_DMA_ATTR_INDIR_IDX_ADDR (HINIC3_CFG_REGS_FLAG + 0x390) + +/* MSI-X registers */ +#define HINIC3_CSR_FUNC_MSI_CLR_WR_ADDR (HINIC3_CFG_REGS_FLAG + 0x58) + +#define HINIC3_MSI_CLR_INDIR_RESEND_TIMER_CLR_MASK BIT(0) +#define HINIC3_MSI_CLR_INDIR_INT_MSK_SET_MASK BIT(1) +#define HINIC3_MSI_CLR_INDIR_INT_MSK_CLR_MASK BIT(2) +#define HINIC3_MSI_CLR_INDIR_AUTO_MSK_SET_MASK BIT(3) +#define HINIC3_MSI_CLR_INDIR_AUTO_MSK_CLR_MASK BIT(4) +#define HINIC3_MSI_CLR_INDIR_SIMPLE_INDIR_IDX_MASK GENMASK(31, 22) +#define HINIC3_MSI_CLR_INDIR_SET(val, member) \ + FIELD_PREP(HINIC3_MSI_CLR_INDIR_##member##_MASK, val) + +/* EQ registers */ +#define HINIC3_AEQ_INDIR_IDX_ADDR (HINIC3_CFG_REGS_FLAG + 0x210) +#define HINIC3_CEQ_INDIR_IDX_ADDR (HINIC3_CFG_REGS_FLAG + 0x290) + +#define HINIC3_EQ_INDIR_IDX_ADDR(type) \ + ((type == HINIC3_AEQ) ? HINIC3_AEQ_INDIR_IDX_ADDR : \ + HINIC3_CEQ_INDIR_IDX_ADDR) + +#define HINIC3_AEQ_MTT_OFF_BASE_ADDR (HINIC3_CFG_REGS_FLAG + 0x240) +#define HINIC3_CEQ_MTT_OFF_BASE_ADDR (HINIC3_CFG_REGS_FLAG + 0x2C0) + +#define HINIC3_CSR_EQ_PAGE_OFF_STRIDE 8 + +#define HINIC3_AEQ_HI_PHYS_ADDR_REG(pg_num) \ + (HINIC3_AEQ_MTT_OFF_BASE_ADDR + (pg_num) * \ + HINIC3_CSR_EQ_PAGE_OFF_STRIDE) + +#define HINIC3_AEQ_LO_PHYS_ADDR_REG(pg_num) \ + (HINIC3_AEQ_MTT_OFF_BASE_ADDR + (pg_num) * \ + HINIC3_CSR_EQ_PAGE_OFF_STRIDE + 4) + +#define HINIC3_CEQ_HI_PHYS_ADDR_REG(pg_num) \ + (HINIC3_CEQ_MTT_OFF_BASE_ADDR + (pg_num) * \ + HINIC3_CSR_EQ_PAGE_OFF_STRIDE) + +#define HINIC3_CEQ_LO_PHYS_ADDR_REG(pg_num) \ + (HINIC3_CEQ_MTT_OFF_BASE_ADDR + (pg_num) * \ + HINIC3_CSR_EQ_PAGE_OFF_STRIDE + 4) + +#define HINIC3_CSR_AEQ_CTRL_0_ADDR (HINIC3_CFG_REGS_FLAG + 0x200) +#define HINIC3_CSR_AEQ_CTRL_1_ADDR (HINIC3_CFG_REGS_FLAG + 0x204) +#define HINIC3_CSR_AEQ_PROD_IDX_ADDR (HINIC3_CFG_REGS_FLAG + 0x20C) +#define HINIC3_CSR_AEQ_CI_SIMPLE_INDIR_ADDR (HINIC3_CFG_REGS_FLAG + 0x50) + +#define HINIC3_CSR_CEQ_PROD_IDX_ADDR (HINIC3_CFG_REGS_FLAG + 0x28c) +#define HINIC3_CSR_CEQ_CI_SIMPLE_INDIR_ADDR (HINIC3_CFG_REGS_FLAG + 0x54) + +#endif diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_eqs.c b/drivers/net/ethernet/huawei/hinic3/hinic3_eqs.c new file mode 100644 index 000000000000..01686472985b --- /dev/null +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_eqs.c @@ -0,0 +1,776 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + +#include <linux/delay.h> + +#include "hinic3_csr.h" +#include "hinic3_eqs.h" +#include "hinic3_hwdev.h" +#include "hinic3_hwif.h" +#include "hinic3_mbox.h" + +#define AEQ_CTRL_0_INTR_IDX_MASK GENMASK(9, 0) +#define AEQ_CTRL_0_DMA_ATTR_MASK GENMASK(17, 12) +#define AEQ_CTRL_0_PCI_INTF_IDX_MASK GENMASK(22, 20) +#define AEQ_CTRL_0_INTR_MODE_MASK BIT(31) +#define AEQ_CTRL_0_SET(val, member) \ + FIELD_PREP(AEQ_CTRL_0_##member##_MASK, val) + +#define AEQ_CTRL_1_LEN_MASK GENMASK(20, 0) +#define AEQ_CTRL_1_ELEM_SIZE_MASK GENMASK(25, 24) +#define AEQ_CTRL_1_PAGE_SIZE_MASK GENMASK(31, 28) +#define AEQ_CTRL_1_SET(val, member) \ + FIELD_PREP(AEQ_CTRL_1_##member##_MASK, val) + +#define CEQ_CTRL_0_INTR_IDX_MASK GENMASK(9, 0) +#define CEQ_CTRL_0_DMA_ATTR_MASK GENMASK(17, 12) +#define CEQ_CTRL_0_LIMIT_KICK_MASK GENMASK(23, 20) +#define CEQ_CTRL_0_PCI_INTF_IDX_MASK GENMASK(25, 24) +#define CEQ_CTRL_0_PAGE_SIZE_MASK GENMASK(30, 27) +#define CEQ_CTRL_0_INTR_MODE_MASK BIT(31) +#define CEQ_CTRL_0_SET(val, member) \ + FIELD_PREP(CEQ_CTRL_0_##member##_MASK, val) + +#define CEQ_CTRL_1_LEN_MASK GENMASK(19, 0) +#define CEQ_CTRL_1_SET(val, member) \ + FIELD_PREP(CEQ_CTRL_1_##member##_MASK, val) + +#define CEQE_TYPE_MASK GENMASK(25, 23) +#define CEQE_TYPE(type) \ + FIELD_GET(CEQE_TYPE_MASK, le32_to_cpu(type)) + +#define CEQE_DATA_MASK GENMASK(25, 0) +#define CEQE_DATA(data) ((data) & cpu_to_le32(CEQE_DATA_MASK)) + +#define EQ_ELEM_DESC_TYPE_MASK GENMASK(6, 0) +#define EQ_ELEM_DESC_SRC_MASK BIT(7) +#define EQ_ELEM_DESC_SIZE_MASK GENMASK(15, 8) +#define EQ_ELEM_DESC_WRAPPED_MASK BIT(31) +#define EQ_ELEM_DESC_GET(val, member) \ + FIELD_GET(EQ_ELEM_DESC_##member##_MASK, le32_to_cpu(val)) + +#define EQ_CI_SIMPLE_INDIR_CI_MASK GENMASK(20, 0) +#define EQ_CI_SIMPLE_INDIR_ARMED_MASK BIT(21) +#define EQ_CI_SIMPLE_INDIR_AEQ_IDX_MASK GENMASK(31, 30) +#define EQ_CI_SIMPLE_INDIR_CEQ_IDX_MASK GENMASK(31, 24) +#define EQ_CI_SIMPLE_INDIR_SET(val, member) \ + FIELD_PREP(EQ_CI_SIMPLE_INDIR_##member##_MASK, val) + +#define EQ_CI_SIMPLE_INDIR_REG_ADDR(eq) \ + (((eq)->type == HINIC3_AEQ) ? \ + HINIC3_CSR_AEQ_CI_SIMPLE_INDIR_ADDR : \ + HINIC3_CSR_CEQ_CI_SIMPLE_INDIR_ADDR) + +#define EQ_PROD_IDX_REG_ADDR(eq) \ + (((eq)->type == HINIC3_AEQ) ? \ + HINIC3_CSR_AEQ_PROD_IDX_ADDR : HINIC3_CSR_CEQ_PROD_IDX_ADDR) + +#define EQ_HI_PHYS_ADDR_REG(type, pg_num) \ + (((type) == HINIC3_AEQ) ? \ + HINIC3_AEQ_HI_PHYS_ADDR_REG(pg_num) : \ + HINIC3_CEQ_HI_PHYS_ADDR_REG(pg_num)) + +#define EQ_LO_PHYS_ADDR_REG(type, pg_num) \ + (((type) == HINIC3_AEQ) ? \ + HINIC3_AEQ_LO_PHYS_ADDR_REG(pg_num) : \ + HINIC3_CEQ_LO_PHYS_ADDR_REG(pg_num)) + +#define EQ_MSIX_RESEND_TIMER_CLEAR 1 + +#define HINIC3_EQ_MAX_PAGES(eq) \ + ((eq)->type == HINIC3_AEQ ? \ + HINIC3_AEQ_MAX_PAGES : HINIC3_CEQ_MAX_PAGES) + +#define HINIC3_TASK_PROCESS_EQE_LIMIT 1024 +#define HINIC3_EQ_UPDATE_CI_STEP 64 +#define HINIC3_EQS_WQ_NAME "hinic3_eqs" + +#define HINIC3_EQ_VALID_SHIFT 31 +#define HINIC3_EQ_WRAPPED(eq) \ + ((eq)->wrapped << HINIC3_EQ_VALID_SHIFT) + +#define HINIC3_EQ_WRAPPED_SHIFT 20 +#define HINIC3_EQ_CONS_IDX(eq) \ + ((eq)->cons_idx | ((eq)->wrapped << HINIC3_EQ_WRAPPED_SHIFT)) + +static const struct hinic3_aeq_elem *get_curr_aeq_elem(const struct hinic3_eq *eq) +{ + return get_q_element(&eq->qpages, eq->cons_idx, NULL); +} + +static const __be32 *get_curr_ceq_elem(const struct hinic3_eq *eq) +{ + return get_q_element(&eq->qpages, eq->cons_idx, NULL); +} + +int hinic3_aeq_register_cb(struct hinic3_hwdev *hwdev, + enum hinic3_aeq_type event, + hinic3_aeq_event_cb hwe_cb) +{ + struct hinic3_aeqs *aeqs; + + aeqs = hwdev->aeqs; + aeqs->aeq_cb[event] = hwe_cb; + spin_lock_init(&aeqs->aeq_lock); + + return 0; +} + +void hinic3_aeq_unregister_cb(struct hinic3_hwdev *hwdev, + enum hinic3_aeq_type event) +{ + struct hinic3_aeqs *aeqs; + + aeqs = hwdev->aeqs; + + spin_lock_bh(&aeqs->aeq_lock); + aeqs->aeq_cb[event] = NULL; + spin_unlock_bh(&aeqs->aeq_lock); +} + +int hinic3_ceq_register_cb(struct hinic3_hwdev *hwdev, + enum hinic3_ceq_event event, + hinic3_ceq_event_cb callback) +{ + struct hinic3_ceqs *ceqs; + + ceqs = hwdev->ceqs; + ceqs->ceq_cb[event] = callback; + spin_lock_init(&ceqs->ceq_lock); + + return 0; +} + +void hinic3_ceq_unregister_cb(struct hinic3_hwdev *hwdev, + enum hinic3_ceq_event event) +{ + struct hinic3_ceqs *ceqs; + + ceqs = hwdev->ceqs; + + spin_lock_bh(&ceqs->ceq_lock); + ceqs->ceq_cb[event] = NULL; + spin_unlock_bh(&ceqs->ceq_lock); +} + +/* Set consumer index in the hw. */ +static void set_eq_cons_idx(struct hinic3_eq *eq, u32 arm_state) +{ + u32 addr = EQ_CI_SIMPLE_INDIR_REG_ADDR(eq); + u32 eq_wrap_ci, val; + + eq_wrap_ci = HINIC3_EQ_CONS_IDX(eq); + val = EQ_CI_SIMPLE_INDIR_SET(arm_state, ARMED); + if (eq->type == HINIC3_AEQ) { + val = val | + EQ_CI_SIMPLE_INDIR_SET(eq_wrap_ci, CI) | + EQ_CI_SIMPLE_INDIR_SET(eq->q_id, AEQ_IDX); + } else { + val = val | + EQ_CI_SIMPLE_INDIR_SET(eq_wrap_ci, CI) | + EQ_CI_SIMPLE_INDIR_SET(eq->q_id, CEQ_IDX); + } + + hinic3_hwif_write_reg(eq->hwdev->hwif, addr, val); +} + +static struct hinic3_ceqs *ceq_to_ceqs(const struct hinic3_eq *eq) +{ + return container_of(eq, struct hinic3_ceqs, ceq[eq->q_id]); +} + +static void ceq_event_handler(struct hinic3_ceqs *ceqs, __le32 ceqe) +{ + enum hinic3_ceq_event event = CEQE_TYPE(ceqe); + struct hinic3_hwdev *hwdev = ceqs->hwdev; + __le32 ceqe_data = CEQE_DATA(ceqe); + + if (event >= HINIC3_MAX_CEQ_EVENTS) { + dev_warn(hwdev->dev, "Ceq unknown event:%d, ceqe data: 0x%x\n", + event, ceqe_data); + return; + } + + spin_lock_bh(&ceqs->ceq_lock); + if (ceqs->ceq_cb[event]) + ceqs->ceq_cb[event](hwdev, ceqe_data); + + spin_unlock_bh(&ceqs->ceq_lock); +} + +static struct hinic3_aeqs *aeq_to_aeqs(const struct hinic3_eq *eq) +{ + return container_of(eq, struct hinic3_aeqs, aeq[eq->q_id]); +} + +static void aeq_event_handler(struct hinic3_aeqs *aeqs, __le32 aeqe, + const struct hinic3_aeq_elem *aeqe_pos) +{ + struct hinic3_hwdev *hwdev = aeqs->hwdev; + u8 data[HINIC3_AEQE_DATA_SIZE], size; + enum hinic3_aeq_type event; + hinic3_aeq_event_cb hwe_cb; + + if (EQ_ELEM_DESC_GET(aeqe, SRC)) + return; + + event = EQ_ELEM_DESC_GET(aeqe, TYPE); + if (event >= HINIC3_MAX_AEQ_EVENTS) { + dev_warn(hwdev->dev, "Aeq unknown event:%d\n", event); + return; + } + + memcpy(data, aeqe_pos->aeqe_data, HINIC3_AEQE_DATA_SIZE); + swab32_array((u32 *)data, HINIC3_AEQE_DATA_SIZE / sizeof(u32)); + size = EQ_ELEM_DESC_GET(aeqe, SIZE); + + spin_lock_bh(&aeqs->aeq_lock); + hwe_cb = aeqs->aeq_cb[event]; + if (hwe_cb) + hwe_cb(aeqs->hwdev, data, size); + spin_unlock_bh(&aeqs->aeq_lock); +} + +static int aeq_irq_handler(struct hinic3_eq *eq) +{ + const struct hinic3_aeq_elem *aeqe_pos; + struct hinic3_aeqs *aeqs; + u32 i, eqe_cnt = 0; + __le32 aeqe; + + aeqs = aeq_to_aeqs(eq); + for (i = 0; i < HINIC3_TASK_PROCESS_EQE_LIMIT; i++) { + aeqe_pos = get_curr_aeq_elem(eq); + aeqe = (__force __le32)swab32((__force __u32)aeqe_pos->desc); + /* HW updates wrapped bit, when it adds eq element event */ + if (EQ_ELEM_DESC_GET(aeqe, WRAPPED) == eq->wrapped) + return 0; + + /* Prevent speculative reads from element */ + dma_rmb(); + aeq_event_handler(aeqs, aeqe, aeqe_pos); + eq->cons_idx++; + if (eq->cons_idx == eq->eq_len) { + eq->cons_idx = 0; + eq->wrapped = !eq->wrapped; + } + + if (++eqe_cnt >= HINIC3_EQ_UPDATE_CI_STEP) { + eqe_cnt = 0; + set_eq_cons_idx(eq, HINIC3_EQ_NOT_ARMED); + } + } + + return -EAGAIN; +} + +static int ceq_irq_handler(struct hinic3_eq *eq) +{ + struct hinic3_ceqs *ceqs; + u32 eqe_cnt = 0; + __be32 ceqe_raw; + __le32 ceqe; + u32 i; + + ceqs = ceq_to_ceqs(eq); + for (i = 0; i < HINIC3_TASK_PROCESS_EQE_LIMIT; i++) { + ceqe_raw = *get_curr_ceq_elem(eq); + ceqe = (__force __le32)swab32((__force __u32)ceqe_raw); + + /* HW updates wrapped bit, when it adds eq element event */ + if (EQ_ELEM_DESC_GET(ceqe, WRAPPED) == eq->wrapped) + return 0; + + ceq_event_handler(ceqs, ceqe); + eq->cons_idx++; + if (eq->cons_idx == eq->eq_len) { + eq->cons_idx = 0; + eq->wrapped = !eq->wrapped; + } + + if (++eqe_cnt >= HINIC3_EQ_UPDATE_CI_STEP) { + eqe_cnt = 0; + set_eq_cons_idx(eq, HINIC3_EQ_NOT_ARMED); + } + } + + return -EAGAIN; +} + +static void reschedule_aeq_handler(struct hinic3_eq *eq) +{ + struct hinic3_aeqs *aeqs = aeq_to_aeqs(eq); + + queue_work(aeqs->workq, &eq->aeq_work); +} + +static int eq_irq_handler(struct hinic3_eq *eq) +{ + int err; + + if (eq->type == HINIC3_AEQ) + err = aeq_irq_handler(eq); + else + err = ceq_irq_handler(eq); + + set_eq_cons_idx(eq, err ? HINIC3_EQ_NOT_ARMED : + HINIC3_EQ_ARMED); + + return err; +} + +static void aeq_irq_work(struct work_struct *work) +{ + struct hinic3_eq *eq = container_of(work, struct hinic3_eq, aeq_work); + int err; + + err = eq_irq_handler(eq); + if (err) + reschedule_aeq_handler(eq); +} + +static irqreturn_t aeq_interrupt(int irq, void *data) +{ + struct workqueue_struct *workq; + struct hinic3_eq *aeq = data; + struct hinic3_hwdev *hwdev; + struct hinic3_aeqs *aeqs; + + aeqs = aeq_to_aeqs(aeq); + hwdev = aeq->hwdev; + + /* clear resend timer cnt register */ + workq = aeqs->workq; + hinic3_msix_intr_clear_resend_bit(hwdev, aeq->msix_entry_idx, + EQ_MSIX_RESEND_TIMER_CLEAR); + queue_work(workq, &aeq->aeq_work); + + return IRQ_HANDLED; +} + +static irqreturn_t ceq_interrupt(int irq, void *data) +{ + struct hinic3_eq *ceq = data; + int err; + + /* clear resend timer counters */ + hinic3_msix_intr_clear_resend_bit(ceq->hwdev, ceq->msix_entry_idx, + EQ_MSIX_RESEND_TIMER_CLEAR); + err = eq_irq_handler(ceq); + if (err) + return IRQ_NONE; + + return IRQ_HANDLED; +} + +static int hinic3_set_ceq_ctrl_reg(struct hinic3_hwdev *hwdev, u16 q_id, + u32 ctrl0, u32 ctrl1) +{ + struct comm_cmd_set_ceq_ctrl_reg ceq_ctrl = {}; + struct mgmt_msg_params msg_params = {}; + int err; + + ceq_ctrl.func_id = hinic3_global_func_id(hwdev); + ceq_ctrl.q_id = q_id; + ceq_ctrl.ctrl0 = ctrl0; + ceq_ctrl.ctrl1 = ctrl1; + + mgmt_msg_params_init_default(&msg_params, &ceq_ctrl, sizeof(ceq_ctrl)); + + err = hinic3_send_mbox_to_mgmt(hwdev, MGMT_MOD_COMM, + COMM_CMD_SET_CEQ_CTRL_REG, &msg_params); + if (err || ceq_ctrl.head.status) { + dev_err(hwdev->dev, "Failed to set ceq %u ctrl reg, err: %d status: 0x%x\n", + q_id, err, ceq_ctrl.head.status); + return -EFAULT; + } + + return 0; +} + +static int set_eq_ctrls(struct hinic3_eq *eq) +{ + struct hinic3_hwif *hwif = eq->hwdev->hwif; + struct hinic3_queue_pages *qpages; + u8 pci_intf_idx, elem_size; + u32 mask, ctrl0, ctrl1; + u32 page_size_val; + int err; + + qpages = &eq->qpages; + page_size_val = ilog2(qpages->page_size / HINIC3_MIN_PAGE_SIZE); + pci_intf_idx = hwif->attr.pci_intf_idx; + + if (eq->type == HINIC3_AEQ) { + /* set ctrl0 using read-modify-write */ + mask = AEQ_CTRL_0_INTR_IDX_MASK | + AEQ_CTRL_0_DMA_ATTR_MASK | + AEQ_CTRL_0_PCI_INTF_IDX_MASK | + AEQ_CTRL_0_INTR_MODE_MASK; + ctrl0 = hinic3_hwif_read_reg(hwif, HINIC3_CSR_AEQ_CTRL_0_ADDR); + ctrl0 = (ctrl0 & ~mask) | + AEQ_CTRL_0_SET(eq->msix_entry_idx, INTR_IDX) | + AEQ_CTRL_0_SET(0, DMA_ATTR) | + AEQ_CTRL_0_SET(pci_intf_idx, PCI_INTF_IDX) | + AEQ_CTRL_0_SET(HINIC3_INTR_MODE_ARMED, INTR_MODE); + hinic3_hwif_write_reg(hwif, HINIC3_CSR_AEQ_CTRL_0_ADDR, ctrl0); + + /* HW expects log2(number of 32 byte units). */ + elem_size = qpages->elem_size_shift - 5; + ctrl1 = AEQ_CTRL_1_SET(eq->eq_len, LEN) | + AEQ_CTRL_1_SET(elem_size, ELEM_SIZE) | + AEQ_CTRL_1_SET(page_size_val, PAGE_SIZE); + hinic3_hwif_write_reg(hwif, HINIC3_CSR_AEQ_CTRL_1_ADDR, ctrl1); + } else { + ctrl0 = CEQ_CTRL_0_SET(eq->msix_entry_idx, INTR_IDX) | + CEQ_CTRL_0_SET(0, DMA_ATTR) | + CEQ_CTRL_0_SET(0, LIMIT_KICK) | + CEQ_CTRL_0_SET(pci_intf_idx, PCI_INTF_IDX) | + CEQ_CTRL_0_SET(page_size_val, PAGE_SIZE) | + CEQ_CTRL_0_SET(HINIC3_INTR_MODE_ARMED, INTR_MODE); + + ctrl1 = CEQ_CTRL_1_SET(eq->eq_len, LEN); + + /* set ceq ctrl reg through mgmt cpu */ + err = hinic3_set_ceq_ctrl_reg(eq->hwdev, eq->q_id, ctrl0, + ctrl1); + if (err) + return err; + } + + return 0; +} + +static void ceq_elements_init(struct hinic3_eq *eq, u32 init_val) +{ + __be32 *ceqe; + u32 i; + + for (i = 0; i < eq->eq_len; i++) { + ceqe = get_q_element(&eq->qpages, i, NULL); + *ceqe = cpu_to_be32(init_val); + } + + wmb(); /* Clear ceq elements bit */ +} + +static void aeq_elements_init(struct hinic3_eq *eq, u32 init_val) +{ + struct hinic3_aeq_elem *aeqe; + u32 i; + + for (i = 0; i < eq->eq_len; i++) { + aeqe = get_q_element(&eq->qpages, i, NULL); + aeqe->desc = cpu_to_be32(init_val); + } + + wmb(); /* Clear aeq elements bit */ +} + +static void eq_elements_init(struct hinic3_eq *eq, u32 init_val) +{ + if (eq->type == HINIC3_AEQ) + aeq_elements_init(eq, init_val); + else + ceq_elements_init(eq, init_val); +} + +static int alloc_eq_pages(struct hinic3_eq *eq) +{ + struct hinic3_hwif *hwif = eq->hwdev->hwif; + struct hinic3_queue_pages *qpages; + dma_addr_t page_paddr; + u32 reg, init_val; + u16 pg_idx; + int err; + + qpages = &eq->qpages; + err = hinic3_queue_pages_alloc(eq->hwdev, qpages, HINIC3_MIN_PAGE_SIZE); + if (err) + return err; + + for (pg_idx = 0; pg_idx < qpages->num_pages; pg_idx++) { + page_paddr = qpages->pages[pg_idx].align_paddr; + reg = EQ_HI_PHYS_ADDR_REG(eq->type, pg_idx); + hinic3_hwif_write_reg(hwif, reg, upper_32_bits(page_paddr)); + reg = EQ_LO_PHYS_ADDR_REG(eq->type, pg_idx); + hinic3_hwif_write_reg(hwif, reg, lower_32_bits(page_paddr)); + } + + init_val = HINIC3_EQ_WRAPPED(eq); + eq_elements_init(eq, init_val); + + return 0; +} + +static void eq_calc_page_size_and_num(struct hinic3_eq *eq, u32 elem_size) +{ + u32 max_pages, min_page_size, page_size, total_size; + + /* No need for complicated arithmetic. All values must be power of 2. + * Multiplications give power of 2 and divisions give power of 2 without + * remainder. + */ + max_pages = HINIC3_EQ_MAX_PAGES(eq); + min_page_size = HINIC3_MIN_PAGE_SIZE; + total_size = eq->eq_len * elem_size; + + if (total_size <= max_pages * min_page_size) + page_size = min_page_size; + else + page_size = total_size / max_pages; + + hinic3_queue_pages_init(&eq->qpages, eq->eq_len, page_size, elem_size); +} + +static int request_eq_irq(struct hinic3_eq *eq) +{ + int err; + + if (eq->type == HINIC3_AEQ) { + INIT_WORK(&eq->aeq_work, aeq_irq_work); + snprintf(eq->irq_name, sizeof(eq->irq_name), + "hinic3_aeq%u@pci:%s", eq->q_id, + pci_name(eq->hwdev->pdev)); + err = request_irq(eq->irq_id, aeq_interrupt, 0, + eq->irq_name, eq); + } else { + snprintf(eq->irq_name, sizeof(eq->irq_name), + "hinic3_ceq%u@pci:%s", eq->q_id, + pci_name(eq->hwdev->pdev)); + err = request_threaded_irq(eq->irq_id, NULL, ceq_interrupt, + IRQF_ONESHOT, eq->irq_name, eq); + } + + return err; +} + +static void reset_eq(struct hinic3_eq *eq) +{ + /* clear eq_len to force eqe drop in hardware */ + if (eq->type == HINIC3_AEQ) + hinic3_hwif_write_reg(eq->hwdev->hwif, + HINIC3_CSR_AEQ_CTRL_1_ADDR, 0); + else + hinic3_set_ceq_ctrl_reg(eq->hwdev, eq->q_id, 0, 0); + + hinic3_hwif_write_reg(eq->hwdev->hwif, EQ_PROD_IDX_REG_ADDR(eq), 0); +} + +static int init_eq(struct hinic3_eq *eq, struct hinic3_hwdev *hwdev, u16 q_id, + u32 q_len, enum hinic3_eq_type type, + struct msix_entry *msix_entry) +{ + u32 elem_size; + int err; + + eq->hwdev = hwdev; + eq->q_id = q_id; + eq->type = type; + eq->eq_len = q_len; + + /* Indirect access should set q_id first */ + hinic3_hwif_write_reg(hwdev->hwif, HINIC3_EQ_INDIR_IDX_ADDR(eq->type), + eq->q_id); + + reset_eq(eq); + + eq->cons_idx = 0; + eq->wrapped = 0; + + elem_size = (type == HINIC3_AEQ) ? HINIC3_AEQE_SIZE : HINIC3_CEQE_SIZE; + eq_calc_page_size_and_num(eq, elem_size); + + err = alloc_eq_pages(eq); + if (err) { + dev_err(hwdev->dev, "Failed to allocate pages for eq\n"); + return err; + } + + eq->msix_entry_idx = msix_entry->entry; + eq->irq_id = msix_entry->vector; + + err = set_eq_ctrls(eq); + if (err) { + dev_err(hwdev->dev, "Failed to set ctrls for eq\n"); + goto err_free_queue_pages; + } + + set_eq_cons_idx(eq, HINIC3_EQ_ARMED); + + err = request_eq_irq(eq); + if (err) { + dev_err(hwdev->dev, + "Failed to request irq for the eq, err: %d\n", err); + goto err_free_queue_pages; + } + + hinic3_set_msix_state(hwdev, eq->msix_entry_idx, HINIC3_MSIX_DISABLE); + + return 0; + +err_free_queue_pages: + hinic3_queue_pages_free(hwdev, &eq->qpages); + + return err; +} + +static void remove_eq(struct hinic3_eq *eq) +{ + hinic3_set_msix_state(eq->hwdev, eq->msix_entry_idx, + HINIC3_MSIX_DISABLE); + free_irq(eq->irq_id, eq); + /* Indirect access should set q_id first */ + hinic3_hwif_write_reg(eq->hwdev->hwif, + HINIC3_EQ_INDIR_IDX_ADDR(eq->type), + eq->q_id); + + if (eq->type == HINIC3_AEQ) { + disable_work_sync(&eq->aeq_work); + /* clear eq_len to avoid hw access host memory */ + hinic3_hwif_write_reg(eq->hwdev->hwif, + HINIC3_CSR_AEQ_CTRL_1_ADDR, 0); + } else { + hinic3_set_ceq_ctrl_reg(eq->hwdev, eq->q_id, 0, 0); + } + + /* update consumer index to avoid invalid interrupt */ + eq->cons_idx = hinic3_hwif_read_reg(eq->hwdev->hwif, + EQ_PROD_IDX_REG_ADDR(eq)); + set_eq_cons_idx(eq, HINIC3_EQ_NOT_ARMED); + hinic3_queue_pages_free(eq->hwdev, &eq->qpages); +} + +int hinic3_aeqs_init(struct hinic3_hwdev *hwdev, u16 num_aeqs, + struct msix_entry *msix_entries) +{ + struct hinic3_aeqs *aeqs; + u16 q_id; + int err; + + aeqs = kzalloc(sizeof(*aeqs), GFP_KERNEL); + if (!aeqs) + return -ENOMEM; + + hwdev->aeqs = aeqs; + aeqs->hwdev = hwdev; + aeqs->num_aeqs = num_aeqs; + aeqs->workq = alloc_workqueue(HINIC3_EQS_WQ_NAME, WQ_MEM_RECLAIM, + HINIC3_MAX_AEQS); + if (!aeqs->workq) { + dev_err(hwdev->dev, "Failed to initialize aeq workqueue\n"); + err = -ENOMEM; + goto err_free_aeqs; + } + + for (q_id = 0; q_id < num_aeqs; q_id++) { + err = init_eq(&aeqs->aeq[q_id], hwdev, q_id, + HINIC3_DEFAULT_AEQ_LEN, HINIC3_AEQ, + &msix_entries[q_id]); + if (err) { + dev_err(hwdev->dev, "Failed to init aeq %u\n", + q_id); + goto err_remove_eqs; + } + } + for (q_id = 0; q_id < num_aeqs; q_id++) + hinic3_set_msix_state(hwdev, aeqs->aeq[q_id].msix_entry_idx, + HINIC3_MSIX_ENABLE); + + return 0; + +err_remove_eqs: + while (q_id > 0) { + q_id--; + remove_eq(&aeqs->aeq[q_id]); + } + + destroy_workqueue(aeqs->workq); + +err_free_aeqs: + kfree(aeqs); + + return err; +} + +void hinic3_aeqs_free(struct hinic3_hwdev *hwdev) +{ + struct hinic3_aeqs *aeqs = hwdev->aeqs; + enum hinic3_aeq_type aeq_event; + struct hinic3_eq *eq; + u16 q_id; + + for (q_id = 0; q_id < aeqs->num_aeqs; q_id++) { + eq = aeqs->aeq + q_id; + remove_eq(eq); + hinic3_free_irq(hwdev, eq->irq_id); + } + + for (aeq_event = 0; aeq_event < HINIC3_MAX_AEQ_EVENTS; aeq_event++) + hinic3_aeq_unregister_cb(hwdev, aeq_event); + + destroy_workqueue(aeqs->workq); + + kfree(aeqs); +} + +int hinic3_ceqs_init(struct hinic3_hwdev *hwdev, u16 num_ceqs, + struct msix_entry *msix_entries) +{ + struct hinic3_ceqs *ceqs; + u16 q_id; + int err; + + ceqs = kzalloc(sizeof(*ceqs), GFP_KERNEL); + if (!ceqs) + return -ENOMEM; + + hwdev->ceqs = ceqs; + ceqs->hwdev = hwdev; + ceqs->num_ceqs = num_ceqs; + + for (q_id = 0; q_id < num_ceqs; q_id++) { + err = init_eq(&ceqs->ceq[q_id], hwdev, q_id, + HINIC3_DEFAULT_CEQ_LEN, HINIC3_CEQ, + &msix_entries[q_id]); + if (err) { + dev_err(hwdev->dev, "Failed to init ceq %u\n", + q_id); + goto err_free_ceqs; + } + } + for (q_id = 0; q_id < num_ceqs; q_id++) + hinic3_set_msix_state(hwdev, ceqs->ceq[q_id].msix_entry_idx, + HINIC3_MSIX_ENABLE); + + return 0; + +err_free_ceqs: + while (q_id > 0) { + q_id--; + remove_eq(&ceqs->ceq[q_id]); + } + + kfree(ceqs); + + return err; +} + +void hinic3_ceqs_free(struct hinic3_hwdev *hwdev) +{ + struct hinic3_ceqs *ceqs = hwdev->ceqs; + enum hinic3_ceq_event ceq_event; + struct hinic3_eq *eq; + u16 q_id; + + for (q_id = 0; q_id < ceqs->num_ceqs; q_id++) { + eq = ceqs->ceq + q_id; + remove_eq(eq); + hinic3_free_irq(hwdev, eq->irq_id); + } + + for (ceq_event = 0; ceq_event < HINIC3_MAX_CEQ_EVENTS; ceq_event++) + hinic3_ceq_unregister_cb(hwdev, ceq_event); + + kfree(ceqs); +} diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_eqs.h b/drivers/net/ethernet/huawei/hinic3/hinic3_eqs.h new file mode 100644 index 000000000000..005a6e0745b3 --- /dev/null +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_eqs.h @@ -0,0 +1,122 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. */ + +#ifndef _HINIC3_EQS_H_ +#define _HINIC3_EQS_H_ + +#include <linux/interrupt.h> + +#include "hinic3_hw_cfg.h" +#include "hinic3_queue_common.h" + +#define HINIC3_MAX_AEQS 4 +#define HINIC3_MAX_CEQS 32 + +#define HINIC3_AEQ_MAX_PAGES 4 +#define HINIC3_CEQ_MAX_PAGES 8 + +#define HINIC3_AEQE_SIZE 64 +#define HINIC3_CEQE_SIZE 4 + +#define HINIC3_AEQE_DESC_SIZE 4 +#define HINIC3_AEQE_DATA_SIZE (HINIC3_AEQE_SIZE - HINIC3_AEQE_DESC_SIZE) + +#define HINIC3_DEFAULT_AEQ_LEN 0x10000 +#define HINIC3_DEFAULT_CEQ_LEN 0x10000 + +#define HINIC3_EQ_IRQ_NAME_LEN 64 + +#define HINIC3_EQ_USLEEP_LOW_BOUND 900 +#define HINIC3_EQ_USLEEP_HIGH_BOUND 1000 + +enum hinic3_eq_type { + HINIC3_AEQ = 0, + HINIC3_CEQ = 1, +}; + +enum hinic3_eq_intr_mode { + HINIC3_INTR_MODE_ARMED = 0, + HINIC3_INTR_MODE_ALWAYS = 1, +}; + +enum hinic3_eq_ci_arm_state { + HINIC3_EQ_NOT_ARMED = 0, + HINIC3_EQ_ARMED = 1, +}; + +struct hinic3_eq { + struct hinic3_hwdev *hwdev; + struct hinic3_queue_pages qpages; + u16 q_id; + enum hinic3_eq_type type; + u32 eq_len; + u32 cons_idx; + u8 wrapped; + u32 irq_id; + u16 msix_entry_idx; + char irq_name[HINIC3_EQ_IRQ_NAME_LEN]; + struct work_struct aeq_work; +}; + +struct hinic3_aeq_elem { + u8 aeqe_data[HINIC3_AEQE_DATA_SIZE]; + __be32 desc; +}; + +enum hinic3_aeq_type { + HINIC3_HW_INTER_INT = 0, + HINIC3_MBX_FROM_FUNC = 1, + HINIC3_MSG_FROM_FW = 2, + HINIC3_MAX_AEQ_EVENTS = 6, +}; + +typedef void (*hinic3_aeq_event_cb)(struct hinic3_hwdev *hwdev, u8 *data, + u8 size); + +struct hinic3_aeqs { + struct hinic3_hwdev *hwdev; + hinic3_aeq_event_cb aeq_cb[HINIC3_MAX_AEQ_EVENTS]; + struct hinic3_eq aeq[HINIC3_MAX_AEQS]; + u16 num_aeqs; + struct workqueue_struct *workq; + /* lock for aeq event flag */ + spinlock_t aeq_lock; +}; + +enum hinic3_ceq_event { + HINIC3_CMDQ = 3, + HINIC3_MAX_CEQ_EVENTS = 6, +}; + +typedef void (*hinic3_ceq_event_cb)(struct hinic3_hwdev *hwdev, + __le32 ceqe_data); + +struct hinic3_ceqs { + struct hinic3_hwdev *hwdev; + + hinic3_ceq_event_cb ceq_cb[HINIC3_MAX_CEQ_EVENTS]; + + struct hinic3_eq ceq[HINIC3_MAX_CEQS]; + u16 num_ceqs; + /* lock for ceq event flag */ + spinlock_t ceq_lock; +}; + +int hinic3_aeqs_init(struct hinic3_hwdev *hwdev, u16 num_aeqs, + struct msix_entry *msix_entries); +void hinic3_aeqs_free(struct hinic3_hwdev *hwdev); +int hinic3_aeq_register_cb(struct hinic3_hwdev *hwdev, + enum hinic3_aeq_type event, + hinic3_aeq_event_cb hwe_cb); +void hinic3_aeq_unregister_cb(struct hinic3_hwdev *hwdev, + enum hinic3_aeq_type event); +int hinic3_ceqs_init(struct hinic3_hwdev *hwdev, u16 num_ceqs, + struct msix_entry *msix_entries); +void hinic3_ceqs_free(struct hinic3_hwdev *hwdev); +int hinic3_ceq_register_cb(struct hinic3_hwdev *hwdev, + enum hinic3_ceq_event event, + hinic3_ceq_event_cb callback); +void hinic3_ceq_unregister_cb(struct hinic3_hwdev *hwdev, + enum hinic3_ceq_event event); + +#endif diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_hw_cfg.c b/drivers/net/ethernet/huawei/hinic3/hinic3_hw_cfg.c index 87d9450c30ca..0599fc4f3fb0 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_hw_cfg.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_hw_cfg.c @@ -8,6 +8,49 @@ #include "hinic3_hwif.h" #include "hinic3_mbox.h" +int hinic3_alloc_irqs(struct hinic3_hwdev *hwdev, u16 num, + struct msix_entry *alloc_arr, u16 *act_num) +{ + struct hinic3_irq_info *irq_info; + struct hinic3_irq *curr; + u16 i, found = 0; + + irq_info = &hwdev->cfg_mgmt->irq_info; + mutex_lock(&irq_info->irq_mutex); + for (i = 0; i < irq_info->num_irq && found < num; i++) { + curr = irq_info->irq + i; + if (curr->allocated) + continue; + curr->allocated = true; + alloc_arr[found].vector = curr->irq_id; + alloc_arr[found].entry = curr->msix_entry_idx; + found++; + } + mutex_unlock(&irq_info->irq_mutex); + + *act_num = found; + + return found == 0 ? -ENOMEM : 0; +} + +void hinic3_free_irq(struct hinic3_hwdev *hwdev, u32 irq_id) +{ + struct hinic3_irq_info *irq_info; + struct hinic3_irq *curr; + u16 i; + + irq_info = &hwdev->cfg_mgmt->irq_info; + mutex_lock(&irq_info->irq_mutex); + for (i = 0; i < irq_info->num_irq; i++) { + curr = irq_info->irq + i; + if (curr->irq_id == irq_id) { + curr->allocated = false; + break; + } + } + mutex_unlock(&irq_info->irq_mutex); +} + bool hinic3_support_nic(struct hinic3_hwdev *hwdev) { return hwdev->cfg_mgmt->cap.supp_svcs_bitmap & diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_hw_comm.c b/drivers/net/ethernet/huawei/hinic3/hinic3_hw_comm.c index 434696ce7dc2..7adcdd569c7b 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_hw_comm.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_hw_comm.c @@ -8,6 +8,37 @@ #include "hinic3_hwif.h" #include "hinic3_mbox.h" +int hinic3_set_interrupt_cfg_direct(struct hinic3_hwdev *hwdev, + const struct hinic3_interrupt_info *info) +{ + struct comm_cmd_cfg_msix_ctrl_reg msix_cfg = {}; + struct mgmt_msg_params msg_params = {}; + int err; + + msix_cfg.func_id = hinic3_global_func_id(hwdev); + msix_cfg.msix_index = info->msix_index; + msix_cfg.opcode = MGMT_MSG_CMD_OP_SET; + + msix_cfg.lli_credit_cnt = info->lli_credit_limit; + msix_cfg.lli_timer_cnt = info->lli_timer_cfg; + msix_cfg.pending_cnt = info->pending_limit; + msix_cfg.coalesce_timer_cnt = info->coalesc_timer_cfg; + msix_cfg.resend_timer_cnt = info->resend_timer_cfg; + + mgmt_msg_params_init_default(&msg_params, &msix_cfg, sizeof(msix_cfg)); + + err = hinic3_send_mbox_to_mgmt(hwdev, MGMT_MOD_COMM, + COMM_CMD_CFG_MSIX_CTRL_REG, &msg_params); + if (err || msix_cfg.head.status) { + dev_err(hwdev->dev, + "Failed to set interrupt config, err: %d, status: 0x%x\n", + err, msix_cfg.head.status); + return -EINVAL; + } + + return 0; +} + int hinic3_func_reset(struct hinic3_hwdev *hwdev, u16 func_id, u64 reset_flag) { struct comm_cmd_func_reset func_reset = {}; diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_hw_comm.h b/drivers/net/ethernet/huawei/hinic3/hinic3_hw_comm.h index c33a1c77da9c..2270987b126f 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_hw_comm.h +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_hw_comm.h @@ -8,6 +8,19 @@ struct hinic3_hwdev; +struct hinic3_interrupt_info { + u32 lli_set; + u32 interrupt_coalesc_set; + u16 msix_index; + u8 lli_credit_limit; + u8 lli_timer_cfg; + u8 pending_limit; + u8 coalesc_timer_cfg; + u8 resend_timer_cfg; +}; + +int hinic3_set_interrupt_cfg_direct(struct hinic3_hwdev *hwdev, + const struct hinic3_interrupt_info *info); int hinic3_func_reset(struct hinic3_hwdev *hwdev, u16 func_id, u64 reset_flag); #endif diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_hw_intf.h b/drivers/net/ethernet/huawei/hinic3/hinic3_hw_intf.h index 22c84093efa2..379ba4cb042c 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_hw_intf.h +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_hw_intf.h @@ -70,6 +70,20 @@ enum comm_cmd { COMM_CMD_SET_DMA_ATTR = 25, }; +struct comm_cmd_cfg_msix_ctrl_reg { + struct mgmt_msg_head head; + u16 func_id; + u8 opcode; + u8 rsvd1; + u16 msix_index; + u8 pending_cnt; + u8 coalesce_timer_cnt; + u8 resend_timer_cnt; + u8 lli_timer_cnt; + u8 lli_credit_cnt; + u8 rsvd2[5]; +}; + enum comm_func_reset_bits { COMM_FUNC_RESET_BIT_FLUSH = BIT(0), COMM_FUNC_RESET_BIT_MQM = BIT(1), @@ -100,6 +114,28 @@ struct comm_cmd_feature_nego { u64 s_feature[COMM_MAX_FEATURE_QWORD]; }; +struct comm_cmd_set_ceq_ctrl_reg { + struct mgmt_msg_head head; + u16 func_id; + u16 q_id; + u32 ctrl0; + u32 ctrl1; + u32 rsvd1; +}; + +struct comm_cmdq_ctxt_info { + __le64 curr_wqe_page_pfn; + __le64 wq_block_pfn; +}; + +struct comm_cmd_set_cmdq_ctxt { + struct mgmt_msg_head head; + u16 func_id; + u8 cmdq_id; + u8 rsvd1[5]; + struct comm_cmdq_ctxt_info ctxt; +}; + /* Services supported by HW. HW uses these values when delivering events. * HW supports multiple services that are not yet supported by driver * (e.g. RoCE). diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_hwif.c b/drivers/net/ethernet/huawei/hinic3/hinic3_hwif.c index 0865453bf0e7..d4af376b7f35 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_hwif.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_hwif.c @@ -6,13 +6,159 @@ #include <linux/io.h> #include "hinic3_common.h" +#include "hinic3_csr.h" #include "hinic3_hwdev.h" #include "hinic3_hwif.h" +/* config BAR4/5 4MB, DB & DWQE both 2MB */ +#define HINIC3_DB_DWQE_SIZE 0x00400000 + +/* db/dwqe page size: 4K */ +#define HINIC3_DB_PAGE_SIZE 0x00001000 +#define HINIC3_DWQE_OFFSET 0x00000800 +#define HINIC3_DB_MAX_AREAS (HINIC3_DB_DWQE_SIZE / HINIC3_DB_PAGE_SIZE) + +#define HINIC3_GET_REG_ADDR(reg) ((reg) & (HINIC3_REGS_FLAG_MASK)) + +static void __iomem *hinic3_reg_addr(struct hinic3_hwif *hwif, u32 reg) +{ + return hwif->cfg_regs_base + HINIC3_GET_REG_ADDR(reg); +} + +u32 hinic3_hwif_read_reg(struct hinic3_hwif *hwif, u32 reg) +{ + void __iomem *addr = hinic3_reg_addr(hwif, reg); + + return ioread32be(addr); +} + +void hinic3_hwif_write_reg(struct hinic3_hwif *hwif, u32 reg, u32 val) +{ + void __iomem *addr = hinic3_reg_addr(hwif, reg); + + iowrite32be(val, addr); +} + +static int get_db_idx(struct hinic3_hwif *hwif, u32 *idx) +{ + struct hinic3_db_area *db_area = &hwif->db_area; + u32 pg_idx; + + spin_lock(&db_area->idx_lock); + pg_idx = find_first_zero_bit(db_area->db_bitmap_array, + db_area->db_max_areas); + if (pg_idx == db_area->db_max_areas) { + spin_unlock(&db_area->idx_lock); + return -ENOMEM; + } + set_bit(pg_idx, db_area->db_bitmap_array); + spin_unlock(&db_area->idx_lock); + + *idx = pg_idx; + + return 0; +} + +static void free_db_idx(struct hinic3_hwif *hwif, u32 idx) +{ + struct hinic3_db_area *db_area = &hwif->db_area; + + spin_lock(&db_area->idx_lock); + clear_bit(idx, db_area->db_bitmap_array); + spin_unlock(&db_area->idx_lock); +} + +void hinic3_free_db_addr(struct hinic3_hwdev *hwdev, const u8 __iomem *db_base) +{ + struct hinic3_hwif *hwif; + uintptr_t distance; + u32 idx; + + hwif = hwdev->hwif; + distance = db_base - hwif->db_base; + idx = distance / HINIC3_DB_PAGE_SIZE; + + free_db_idx(hwif, idx); +} + +int hinic3_alloc_db_addr(struct hinic3_hwdev *hwdev, void __iomem **db_base, + void __iomem **dwqe_base) +{ + struct hinic3_hwif *hwif; + u8 __iomem *addr; + u32 idx; + int err; + + hwif = hwdev->hwif; + + err = get_db_idx(hwif, &idx); + if (err) + return err; + + addr = hwif->db_base + idx * HINIC3_DB_PAGE_SIZE; + *db_base = addr; + + if (dwqe_base) + *dwqe_base = addr + HINIC3_DWQE_OFFSET; + + return 0; +} + void hinic3_set_msix_state(struct hinic3_hwdev *hwdev, u16 msix_idx, enum hinic3_msix_state flag) { - /* Completed by later submission due to LoC limit. */ + struct hinic3_hwif *hwif; + u8 int_msk = 1; + u32 mask_bits; + u32 addr; + + hwif = hwdev->hwif; + + if (flag) + mask_bits = HINIC3_MSI_CLR_INDIR_SET(int_msk, INT_MSK_SET); + else + mask_bits = HINIC3_MSI_CLR_INDIR_SET(int_msk, INT_MSK_CLR); + mask_bits = mask_bits | + HINIC3_MSI_CLR_INDIR_SET(msix_idx, SIMPLE_INDIR_IDX); + + addr = HINIC3_CSR_FUNC_MSI_CLR_WR_ADDR; + hinic3_hwif_write_reg(hwif, addr, mask_bits); +} + +void hinic3_msix_intr_clear_resend_bit(struct hinic3_hwdev *hwdev, u16 msix_idx, + u8 clear_resend_en) +{ + struct hinic3_hwif *hwif; + u32 msix_ctrl, addr; + + hwif = hwdev->hwif; + + msix_ctrl = HINIC3_MSI_CLR_INDIR_SET(msix_idx, SIMPLE_INDIR_IDX) | + HINIC3_MSI_CLR_INDIR_SET(clear_resend_en, RESEND_TIMER_CLR); + + addr = HINIC3_CSR_FUNC_MSI_CLR_WR_ADDR; + hinic3_hwif_write_reg(hwif, addr, msix_ctrl); +} + +void hinic3_set_msix_auto_mask_state(struct hinic3_hwdev *hwdev, u16 msix_idx, + enum hinic3_msix_auto_mask flag) +{ + struct hinic3_hwif *hwif; + u32 mask_bits; + u32 addr; + + hwif = hwdev->hwif; + + if (flag) + mask_bits = HINIC3_MSI_CLR_INDIR_SET(1, AUTO_MSK_SET); + else + mask_bits = HINIC3_MSI_CLR_INDIR_SET(1, AUTO_MSK_CLR); + + mask_bits = mask_bits | + HINIC3_MSI_CLR_INDIR_SET(msix_idx, SIMPLE_INDIR_IDX); + + addr = HINIC3_CSR_FUNC_MSI_CLR_WR_ADDR; + hinic3_hwif_write_reg(hwif, addr, mask_bits); } u16 hinic3_global_func_id(struct hinic3_hwdev *hwdev) diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_hwif.h b/drivers/net/ethernet/huawei/hinic3/hinic3_hwif.h index 513c9680e6b6..29dd86eb458a 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_hwif.h +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_hwif.h @@ -50,8 +50,24 @@ enum hinic3_msix_state { HINIC3_MSIX_DISABLE, }; +enum hinic3_msix_auto_mask { + HINIC3_CLR_MSIX_AUTO_MASK, + HINIC3_SET_MSIX_AUTO_MASK, +}; + +u32 hinic3_hwif_read_reg(struct hinic3_hwif *hwif, u32 reg); +void hinic3_hwif_write_reg(struct hinic3_hwif *hwif, u32 reg, u32 val); + +int hinic3_alloc_db_addr(struct hinic3_hwdev *hwdev, void __iomem **db_base, + void __iomem **dwqe_base); +void hinic3_free_db_addr(struct hinic3_hwdev *hwdev, const u8 __iomem *db_base); + void hinic3_set_msix_state(struct hinic3_hwdev *hwdev, u16 msix_idx, enum hinic3_msix_state flag); +void hinic3_msix_intr_clear_resend_bit(struct hinic3_hwdev *hwdev, u16 msix_idx, + u8 clear_resend_en); +void hinic3_set_msix_auto_mask_state(struct hinic3_hwdev *hwdev, u16 msix_idx, + enum hinic3_msix_auto_mask flag); u16 hinic3_global_func_id(struct hinic3_hwdev *hwdev); diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_irq.c b/drivers/net/ethernet/huawei/hinic3/hinic3_irq.c index 8b92eed25edf..33eb9080739d 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_irq.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_irq.c @@ -38,7 +38,7 @@ static int hinic3_poll(struct napi_struct *napi, int budget) return work_done; } -void qp_add_napi(struct hinic3_irq_cfg *irq_cfg) +static void qp_add_napi(struct hinic3_irq_cfg *irq_cfg) { struct hinic3_nic_dev *nic_dev = netdev_priv(irq_cfg->netdev); @@ -50,7 +50,7 @@ void qp_add_napi(struct hinic3_irq_cfg *irq_cfg) napi_enable(&irq_cfg->napi); } -void qp_del_napi(struct hinic3_irq_cfg *irq_cfg) +static void qp_del_napi(struct hinic3_irq_cfg *irq_cfg) { napi_disable(&irq_cfg->napi); netif_queue_set_napi(irq_cfg->netdev, irq_cfg->irq_id, @@ -60,3 +60,135 @@ void qp_del_napi(struct hinic3_irq_cfg *irq_cfg) netif_stop_subqueue(irq_cfg->netdev, irq_cfg->irq_id); netif_napi_del(&irq_cfg->napi); } + +static irqreturn_t qp_irq(int irq, void *data) +{ + struct hinic3_irq_cfg *irq_cfg = data; + struct hinic3_nic_dev *nic_dev; + + nic_dev = netdev_priv(irq_cfg->netdev); + hinic3_msix_intr_clear_resend_bit(nic_dev->hwdev, + irq_cfg->msix_entry_idx, 1); + + napi_schedule(&irq_cfg->napi); + + return IRQ_HANDLED; +} + +static int hinic3_request_irq(struct hinic3_irq_cfg *irq_cfg, u16 q_id) +{ + struct hinic3_interrupt_info info = {}; + struct hinic3_nic_dev *nic_dev; + struct net_device *netdev; + int err; + + netdev = irq_cfg->netdev; + nic_dev = netdev_priv(netdev); + qp_add_napi(irq_cfg); + + info.msix_index = irq_cfg->msix_entry_idx; + info.interrupt_coalesc_set = 1; + info.pending_limit = nic_dev->intr_coalesce[q_id].pending_limit; + info.coalesc_timer_cfg = + nic_dev->intr_coalesce[q_id].coalesce_timer_cfg; + info.resend_timer_cfg = nic_dev->intr_coalesce[q_id].resend_timer_cfg; + err = hinic3_set_interrupt_cfg_direct(nic_dev->hwdev, &info); + if (err) { + netdev_err(netdev, "Failed to set RX interrupt coalescing attribute.\n"); + qp_del_napi(irq_cfg); + return err; + } + + err = request_irq(irq_cfg->irq_id, qp_irq, 0, irq_cfg->irq_name, + irq_cfg); + if (err) { + qp_del_napi(irq_cfg); + return err; + } + + irq_set_affinity_hint(irq_cfg->irq_id, &irq_cfg->affinity_mask); + + return 0; +} + +static void hinic3_release_irq(struct hinic3_irq_cfg *irq_cfg) +{ + irq_set_affinity_hint(irq_cfg->irq_id, NULL); + free_irq(irq_cfg->irq_id, irq_cfg); +} + +int hinic3_qps_irq_init(struct net_device *netdev) +{ + struct hinic3_nic_dev *nic_dev = netdev_priv(netdev); + struct pci_dev *pdev = nic_dev->pdev; + struct hinic3_irq_cfg *irq_cfg; + struct msix_entry *msix_entry; + u32 local_cpu; + u16 q_id; + int err; + + for (q_id = 0; q_id < nic_dev->q_params.num_qps; q_id++) { + msix_entry = &nic_dev->qps_msix_entries[q_id]; + irq_cfg = &nic_dev->q_params.irq_cfg[q_id]; + + irq_cfg->irq_id = msix_entry->vector; + irq_cfg->msix_entry_idx = msix_entry->entry; + irq_cfg->netdev = netdev; + irq_cfg->txq = &nic_dev->txqs[q_id]; + irq_cfg->rxq = &nic_dev->rxqs[q_id]; + nic_dev->rxqs[q_id].irq_cfg = irq_cfg; + + local_cpu = cpumask_local_spread(q_id, dev_to_node(&pdev->dev)); + cpumask_set_cpu(local_cpu, &irq_cfg->affinity_mask); + + snprintf(irq_cfg->irq_name, sizeof(irq_cfg->irq_name), + "%s_qp%u", netdev->name, q_id); + + err = hinic3_request_irq(irq_cfg, q_id); + if (err) { + netdev_err(netdev, "Failed to request Rx irq\n"); + goto err_release_irqs; + } + + hinic3_set_msix_auto_mask_state(nic_dev->hwdev, + irq_cfg->msix_entry_idx, + HINIC3_SET_MSIX_AUTO_MASK); + hinic3_set_msix_state(nic_dev->hwdev, irq_cfg->msix_entry_idx, + HINIC3_MSIX_ENABLE); + } + + return 0; + +err_release_irqs: + while (q_id > 0) { + q_id--; + irq_cfg = &nic_dev->q_params.irq_cfg[q_id]; + qp_del_napi(irq_cfg); + hinic3_set_msix_state(nic_dev->hwdev, irq_cfg->msix_entry_idx, + HINIC3_MSIX_DISABLE); + hinic3_set_msix_auto_mask_state(nic_dev->hwdev, + irq_cfg->msix_entry_idx, + HINIC3_CLR_MSIX_AUTO_MASK); + hinic3_release_irq(irq_cfg); + } + + return err; +} + +void hinic3_qps_irq_uninit(struct net_device *netdev) +{ + struct hinic3_nic_dev *nic_dev = netdev_priv(netdev); + struct hinic3_irq_cfg *irq_cfg; + u16 q_id; + + for (q_id = 0; q_id < nic_dev->q_params.num_qps; q_id++) { + irq_cfg = &nic_dev->q_params.irq_cfg[q_id]; + qp_del_napi(irq_cfg); + hinic3_set_msix_state(nic_dev->hwdev, irq_cfg->msix_entry_idx, + HINIC3_MSIX_DISABLE); + hinic3_set_msix_auto_mask_state(nic_dev->hwdev, + irq_cfg->msix_entry_idx, + HINIC3_CLR_MSIX_AUTO_MASK); + hinic3_release_irq(irq_cfg); + } +} diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_main.c b/drivers/net/ethernet/huawei/hinic3/hinic3_main.c index 497f2a36f35d..a0b04fb07c76 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_main.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_main.c @@ -17,12 +17,53 @@ #define HINIC3_NIC_DRV_DESC "Intelligent Network Interface Card Driver" -#define HINIC3_RX_BUF_LEN 2048 -#define HINIC3_LRO_REPLENISH_THLD 256 -#define HINIC3_NIC_DEV_WQ_NAME "hinic3_nic_dev_wq" +#define HINIC3_RX_BUF_LEN 2048 +#define HINIC3_LRO_REPLENISH_THLD 256 +#define HINIC3_NIC_DEV_WQ_NAME "hinic3_nic_dev_wq" -#define HINIC3_SQ_DEPTH 1024 -#define HINIC3_RQ_DEPTH 1024 +#define HINIC3_SQ_DEPTH 1024 +#define HINIC3_RQ_DEPTH 1024 + +#define HINIC3_DEFAULT_TXRX_MSIX_PENDING_LIMIT 2 +#define HINIC3_DEFAULT_TXRX_MSIX_COALESC_TIMER_CFG 25 +#define HINIC3_DEFAULT_TXRX_MSIX_RESEND_TIMER_CFG 7 + +static void init_intr_coal_param(struct net_device *netdev) +{ + struct hinic3_nic_dev *nic_dev = netdev_priv(netdev); + struct hinic3_intr_coal_info *info; + u16 i; + + for (i = 0; i < nic_dev->max_qps; i++) { + info = &nic_dev->intr_coalesce[i]; + info->pending_limit = HINIC3_DEFAULT_TXRX_MSIX_PENDING_LIMIT; + info->coalesce_timer_cfg = HINIC3_DEFAULT_TXRX_MSIX_COALESC_TIMER_CFG; + info->resend_timer_cfg = HINIC3_DEFAULT_TXRX_MSIX_RESEND_TIMER_CFG; + } +} + +static int hinic3_init_intr_coalesce(struct net_device *netdev) +{ + struct hinic3_nic_dev *nic_dev = netdev_priv(netdev); + + nic_dev->intr_coalesce = kcalloc(nic_dev->max_qps, + sizeof(*nic_dev->intr_coalesce), + GFP_KERNEL); + + if (!nic_dev->intr_coalesce) + return -ENOMEM; + + init_intr_coal_param(netdev); + + return 0; +} + +static void hinic3_free_intr_coalesce(struct net_device *netdev) +{ + struct hinic3_nic_dev *nic_dev = netdev_priv(netdev); + + kfree(nic_dev->intr_coalesce); +} static int hinic3_alloc_txrxqs(struct net_device *netdev) { @@ -42,8 +83,17 @@ static int hinic3_alloc_txrxqs(struct net_device *netdev) goto err_free_txqs; } + err = hinic3_init_intr_coalesce(netdev); + if (err) { + dev_err(hwdev->dev, "Failed to init_intr_coalesce\n"); + goto err_free_rxqs; + } + return 0; +err_free_rxqs: + hinic3_free_rxqs(netdev); + err_free_txqs: hinic3_free_txqs(netdev); @@ -52,6 +102,7 @@ err_free_txqs: static void hinic3_free_txrxqs(struct net_device *netdev) { + hinic3_free_intr_coalesce(netdev); hinic3_free_rxqs(netdev); hinic3_free_txqs(netdev); } diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_mbox.c b/drivers/net/ethernet/huawei/hinic3/hinic3_mbox.c index e74d1eb09730..cf67e26acece 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_mbox.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_mbox.c @@ -4,13 +4,857 @@ #include <linux/dma-mapping.h> #include "hinic3_common.h" +#include "hinic3_csr.h" #include "hinic3_hwdev.h" #include "hinic3_hwif.h" #include "hinic3_mbox.h" +#define MBOX_INT_DST_AEQN_MASK GENMASK(11, 10) +#define MBOX_INT_SRC_RESP_AEQN_MASK GENMASK(13, 12) +#define MBOX_INT_STAT_DMA_MASK GENMASK(19, 14) +/* TX size, expressed in 4 bytes units */ +#define MBOX_INT_TX_SIZE_MASK GENMASK(24, 20) +/* SO_RO == strong order, relaxed order */ +#define MBOX_INT_STAT_DMA_SO_RO_MASK GENMASK(26, 25) +#define MBOX_INT_WB_EN_MASK BIT(28) +#define MBOX_INT_SET(val, field) \ + FIELD_PREP(MBOX_INT_##field##_MASK, val) + +#define MBOX_CTRL_TRIGGER_AEQE_MASK BIT(0) +#define MBOX_CTRL_TX_STATUS_MASK BIT(1) +#define MBOX_CTRL_DST_FUNC_MASK GENMASK(28, 16) +#define MBOX_CTRL_SET(val, field) \ + FIELD_PREP(MBOX_CTRL_##field##_MASK, val) + +#define MBOX_MSG_POLLING_TIMEOUT_MS 8000 // send msg seg timeout +#define MBOX_COMP_POLLING_TIMEOUT_MS 40000 // response + +#define MBOX_MAX_BUF_SZ 2048 +#define MBOX_HEADER_SZ 8 + +/* MBOX size is 64B, 8B for mbox_header, 8B reserved */ +#define MBOX_SEG_LEN 48 +#define MBOX_SEG_LEN_ALIGN 4 +#define MBOX_WB_STATUS_LEN 16 + +#define MBOX_SEQ_ID_START_VAL 0 +#define MBOX_SEQ_ID_MAX_VAL 42 +#define MBOX_LAST_SEG_MAX_LEN \ + (MBOX_MAX_BUF_SZ - MBOX_SEQ_ID_MAX_VAL * MBOX_SEG_LEN) + +/* mbox write back status is 16B, only first 4B is used */ +#define MBOX_WB_STATUS_ERRCODE_MASK 0xFFFF +#define MBOX_WB_STATUS_MASK 0xFF +#define MBOX_WB_ERROR_CODE_MASK 0xFF00 +#define MBOX_WB_STATUS_FINISHED_SUCCESS 0xFF +#define MBOX_WB_STATUS_NOT_FINISHED 0x00 + +#define MBOX_STATUS_FINISHED(wb) \ + ((FIELD_PREP(MBOX_WB_STATUS_MASK, (wb))) != MBOX_WB_STATUS_NOT_FINISHED) +#define MBOX_STATUS_SUCCESS(wb) \ + ((FIELD_PREP(MBOX_WB_STATUS_MASK, (wb))) == \ + MBOX_WB_STATUS_FINISHED_SUCCESS) +#define MBOX_STATUS_ERRCODE(wb) \ + ((wb) & MBOX_WB_ERROR_CODE_MASK) + +#define MBOX_DMA_MSG_QUEUE_DEPTH 32 +#define MBOX_AREA(hwif) \ + ((hwif)->cfg_regs_base + HINIC3_FUNC_CSR_MAILBOX_DATA_OFF) + +#define MBOX_MQ_CI_OFFSET \ + (HINIC3_CFG_REGS_FLAG + HINIC3_FUNC_CSR_MAILBOX_DATA_OFF + \ + MBOX_HEADER_SZ + MBOX_SEG_LEN) + +#define MBOX_MQ_SYNC_CI_MASK GENMASK(7, 0) +#define MBOX_MQ_ASYNC_CI_MASK GENMASK(15, 8) +#define MBOX_MQ_CI_GET(val, field) \ + FIELD_GET(MBOX_MQ_##field##_CI_MASK, val) + +#define MBOX_MGMT_FUNC_ID 0x1FFF +#define MBOX_COMM_F_MBOX_SEGMENT BIT(3) + +static u8 *get_mobx_body_from_hdr(u8 *header) +{ + return header + MBOX_HEADER_SZ; +} + +static struct hinic3_msg_desc *get_mbox_msg_desc(struct hinic3_mbox *mbox, + enum mbox_msg_direction_type dir, + u16 src_func_id) +{ + struct hinic3_msg_channel *msg_ch; + + msg_ch = (src_func_id == MBOX_MGMT_FUNC_ID) ? + &mbox->mgmt_msg : mbox->func_msg; + + return (dir == MBOX_MSG_SEND) ? + &msg_ch->recv_msg : &msg_ch->resp_msg; +} + +static void resp_mbox_handler(struct hinic3_mbox *mbox, + const struct hinic3_msg_desc *msg_desc) +{ + spin_lock(&mbox->mbox_lock); + if (msg_desc->msg_info.msg_id == mbox->send_msg_id && + mbox->event_flag == MBOX_EVENT_START) + mbox->event_flag = MBOX_EVENT_SUCCESS; + spin_unlock(&mbox->mbox_lock); +} + +static bool mbox_segment_valid(struct hinic3_mbox *mbox, + struct hinic3_msg_desc *msg_desc, + __le64 mbox_header) +{ + u8 seq_id, seg_len, msg_id, mod; + __le16 src_func_idx, cmd; + + seq_id = MBOX_MSG_HEADER_GET(mbox_header, SEQID); + seg_len = MBOX_MSG_HEADER_GET(mbox_header, SEG_LEN); + msg_id = MBOX_MSG_HEADER_GET(mbox_header, MSG_ID); + mod = MBOX_MSG_HEADER_GET(mbox_header, MODULE); + cmd = cpu_to_le16(MBOX_MSG_HEADER_GET(mbox_header, CMD)); + src_func_idx = cpu_to_le16(MBOX_MSG_HEADER_GET(mbox_header, + SRC_GLB_FUNC_IDX)); + + if (seq_id > MBOX_SEQ_ID_MAX_VAL || seg_len > MBOX_SEG_LEN || + (seq_id == MBOX_SEQ_ID_MAX_VAL && seg_len > MBOX_LAST_SEG_MAX_LEN)) + goto err_seg; + + if (seq_id == 0) { + msg_desc->seq_id = seq_id; + msg_desc->msg_info.msg_id = msg_id; + msg_desc->mod = mod; + msg_desc->cmd = cmd; + } else { + if (seq_id != msg_desc->seq_id + 1 || + msg_id != msg_desc->msg_info.msg_id || + mod != msg_desc->mod || cmd != msg_desc->cmd) + goto err_seg; + + msg_desc->seq_id = seq_id; + } + + return true; + +err_seg: + dev_err(mbox->hwdev->dev, + "Mailbox segment check failed, src func id: 0x%x, front seg info: seq id: 0x%x, msg id: 0x%x, mod: 0x%x, cmd: 0x%x\n", + src_func_idx, msg_desc->seq_id, msg_desc->msg_info.msg_id, + msg_desc->mod, msg_desc->cmd); + dev_err(mbox->hwdev->dev, + "Current seg info: seg len: 0x%x, seq id: 0x%x, msg id: 0x%x, mod: 0x%x, cmd: 0x%x\n", + seg_len, seq_id, msg_id, mod, cmd); + + return false; +} + +static void recv_mbox_handler(struct hinic3_mbox *mbox, + u8 *header, struct hinic3_msg_desc *msg_desc) +{ + __le64 mbox_header = *((__force __le64 *)header); + u8 *mbox_body = get_mobx_body_from_hdr(header); + u8 seq_id, seg_len; + int pos; + + if (!mbox_segment_valid(mbox, msg_desc, mbox_header)) { + msg_desc->seq_id = MBOX_SEQ_ID_MAX_VAL; + return; + } + + seq_id = MBOX_MSG_HEADER_GET(mbox_header, SEQID); + seg_len = MBOX_MSG_HEADER_GET(mbox_header, SEG_LEN); + + pos = seq_id * MBOX_SEG_LEN; + memcpy(msg_desc->msg + pos, mbox_body, seg_len); + + if (!MBOX_MSG_HEADER_GET(mbox_header, LAST)) + return; + + msg_desc->msg_len = cpu_to_le16(MBOX_MSG_HEADER_GET(mbox_header, + MSG_LEN)); + msg_desc->msg_info.status = MBOX_MSG_HEADER_GET(mbox_header, STATUS); + + if (MBOX_MSG_HEADER_GET(mbox_header, DIRECTION) == MBOX_MSG_RESP) + resp_mbox_handler(mbox, msg_desc); +} + +void hinic3_mbox_func_aeqe_handler(struct hinic3_hwdev *hwdev, u8 *header, + u8 size) +{ + __le64 mbox_header = *((__force __le64 *)header); + enum mbox_msg_direction_type dir; + struct hinic3_msg_desc *msg_desc; + struct hinic3_mbox *mbox; + u16 src_func_id; + + mbox = hwdev->mbox; + dir = MBOX_MSG_HEADER_GET(mbox_header, DIRECTION); + src_func_id = MBOX_MSG_HEADER_GET(mbox_header, SRC_GLB_FUNC_IDX); + msg_desc = get_mbox_msg_desc(mbox, dir, src_func_id); + recv_mbox_handler(mbox, header, msg_desc); +} + +static int init_mbox_dma_queue(struct hinic3_hwdev *hwdev, + struct mbox_dma_queue *mq) +{ + u32 size; + + mq->depth = MBOX_DMA_MSG_QUEUE_DEPTH; + mq->prod_idx = 0; + mq->cons_idx = 0; + + size = mq->depth * MBOX_MAX_BUF_SZ; + mq->dma_buf_vaddr = dma_alloc_coherent(hwdev->dev, size, + &mq->dma_buf_paddr, + GFP_KERNEL); + if (!mq->dma_buf_vaddr) + return -ENOMEM; + + return 0; +} + +static void uninit_mbox_dma_queue(struct hinic3_hwdev *hwdev, + struct mbox_dma_queue *mq) +{ + dma_free_coherent(hwdev->dev, mq->depth * MBOX_MAX_BUF_SZ, + mq->dma_buf_vaddr, mq->dma_buf_paddr); +} + +static int hinic3_init_mbox_dma_queue(struct hinic3_mbox *mbox) +{ + u32 val; + int err; + + err = init_mbox_dma_queue(mbox->hwdev, &mbox->sync_msg_queue); + if (err) + return err; + + err = init_mbox_dma_queue(mbox->hwdev, &mbox->async_msg_queue); + if (err) { + uninit_mbox_dma_queue(mbox->hwdev, &mbox->sync_msg_queue); + return err; + } + + val = hinic3_hwif_read_reg(mbox->hwdev->hwif, MBOX_MQ_CI_OFFSET); + val &= ~MBOX_MQ_SYNC_CI_MASK; + val &= ~MBOX_MQ_ASYNC_CI_MASK; + hinic3_hwif_write_reg(mbox->hwdev->hwif, MBOX_MQ_CI_OFFSET, val); + + return 0; +} + +static void hinic3_uninit_mbox_dma_queue(struct hinic3_mbox *mbox) +{ + uninit_mbox_dma_queue(mbox->hwdev, &mbox->sync_msg_queue); + uninit_mbox_dma_queue(mbox->hwdev, &mbox->async_msg_queue); +} + +static int alloc_mbox_msg_channel(struct hinic3_msg_channel *msg_ch) +{ + msg_ch->resp_msg.msg = kzalloc(MBOX_MAX_BUF_SZ, GFP_KERNEL); + if (!msg_ch->resp_msg.msg) + return -ENOMEM; + + msg_ch->recv_msg.msg = kzalloc(MBOX_MAX_BUF_SZ, GFP_KERNEL); + if (!msg_ch->recv_msg.msg) { + kfree(msg_ch->resp_msg.msg); + return -ENOMEM; + } + + msg_ch->resp_msg.seq_id = MBOX_SEQ_ID_MAX_VAL; + msg_ch->recv_msg.seq_id = MBOX_SEQ_ID_MAX_VAL; + + return 0; +} + +static void free_mbox_msg_channel(struct hinic3_msg_channel *msg_ch) +{ + kfree(msg_ch->recv_msg.msg); + kfree(msg_ch->resp_msg.msg); +} + +static int init_mgmt_msg_channel(struct hinic3_mbox *mbox) +{ + int err; + + err = alloc_mbox_msg_channel(&mbox->mgmt_msg); + if (err) { + dev_err(mbox->hwdev->dev, "Failed to alloc mgmt message channel\n"); + return err; + } + + err = hinic3_init_mbox_dma_queue(mbox); + if (err) { + dev_err(mbox->hwdev->dev, "Failed to init mbox dma queue\n"); + free_mbox_msg_channel(&mbox->mgmt_msg); + return err; + } + + return 0; +} + +static void uninit_mgmt_msg_channel(struct hinic3_mbox *mbox) +{ + hinic3_uninit_mbox_dma_queue(mbox); + free_mbox_msg_channel(&mbox->mgmt_msg); +} + +static int hinic3_init_func_mbox_msg_channel(struct hinic3_hwdev *hwdev) +{ + struct hinic3_mbox *mbox; + int err; + + mbox = hwdev->mbox; + mbox->func_msg = kzalloc(sizeof(*mbox->func_msg), GFP_KERNEL); + if (!mbox->func_msg) + return -ENOMEM; + + err = alloc_mbox_msg_channel(mbox->func_msg); + if (err) + goto err_free_func_msg; + + return 0; + +err_free_func_msg: + kfree(mbox->func_msg); + mbox->func_msg = NULL; + + return err; +} + +static void hinic3_uninit_func_mbox_msg_channel(struct hinic3_hwdev *hwdev) +{ + struct hinic3_mbox *mbox = hwdev->mbox; + + free_mbox_msg_channel(mbox->func_msg); + kfree(mbox->func_msg); + mbox->func_msg = NULL; +} + +static void prepare_send_mbox(struct hinic3_mbox *mbox) +{ + struct hinic3_send_mbox *send_mbox = &mbox->send_mbox; + + send_mbox->data = MBOX_AREA(mbox->hwdev->hwif); +} + +static int alloc_mbox_wb_status(struct hinic3_mbox *mbox) +{ + struct hinic3_send_mbox *send_mbox = &mbox->send_mbox; + struct hinic3_hwdev *hwdev = mbox->hwdev; + u32 addr_h, addr_l; + + send_mbox->wb_vaddr = dma_alloc_coherent(hwdev->dev, + MBOX_WB_STATUS_LEN, + &send_mbox->wb_paddr, + GFP_KERNEL); + if (!send_mbox->wb_vaddr) + return -ENOMEM; + + addr_h = upper_32_bits(send_mbox->wb_paddr); + addr_l = lower_32_bits(send_mbox->wb_paddr); + hinic3_hwif_write_reg(hwdev->hwif, HINIC3_FUNC_CSR_MAILBOX_RESULT_H_OFF, + addr_h); + hinic3_hwif_write_reg(hwdev->hwif, HINIC3_FUNC_CSR_MAILBOX_RESULT_L_OFF, + addr_l); + + return 0; +} + +static void free_mbox_wb_status(struct hinic3_mbox *mbox) +{ + struct hinic3_send_mbox *send_mbox = &mbox->send_mbox; + struct hinic3_hwdev *hwdev = mbox->hwdev; + + hinic3_hwif_write_reg(hwdev->hwif, HINIC3_FUNC_CSR_MAILBOX_RESULT_H_OFF, + 0); + hinic3_hwif_write_reg(hwdev->hwif, HINIC3_FUNC_CSR_MAILBOX_RESULT_L_OFF, + 0); + + dma_free_coherent(hwdev->dev, MBOX_WB_STATUS_LEN, + send_mbox->wb_vaddr, send_mbox->wb_paddr); +} + +static int hinic3_mbox_pre_init(struct hinic3_hwdev *hwdev, + struct hinic3_mbox *mbox) +{ + mbox->hwdev = hwdev; + mutex_init(&mbox->mbox_send_lock); + spin_lock_init(&mbox->mbox_lock); + + mbox->workq = create_singlethread_workqueue(HINIC3_MBOX_WQ_NAME); + if (!mbox->workq) { + dev_err(hwdev->dev, "Failed to initialize MBOX workqueue\n"); + return -ENOMEM; + } + hwdev->mbox = mbox; + + return 0; +} + +int hinic3_init_mbox(struct hinic3_hwdev *hwdev) +{ + struct hinic3_mbox *mbox; + int err; + + mbox = kzalloc(sizeof(*mbox), GFP_KERNEL); + if (!mbox) + return -ENOMEM; + + err = hinic3_mbox_pre_init(hwdev, mbox); + if (err) + goto err_free_mbox; + + err = init_mgmt_msg_channel(mbox); + if (err) + goto err_destroy_workqueue; + + err = hinic3_init_func_mbox_msg_channel(hwdev); + if (err) + goto err_uninit_mgmt_msg_ch; + + err = alloc_mbox_wb_status(mbox); + if (err) { + dev_err(hwdev->dev, "Failed to alloc mbox write back status\n"); + goto err_uninit_func_mbox_msg_ch; + } + + prepare_send_mbox(mbox); + + return 0; + +err_uninit_func_mbox_msg_ch: + hinic3_uninit_func_mbox_msg_channel(hwdev); + +err_uninit_mgmt_msg_ch: + uninit_mgmt_msg_channel(mbox); + +err_destroy_workqueue: + destroy_workqueue(mbox->workq); + +err_free_mbox: + kfree(mbox); + + return err; +} + +void hinic3_free_mbox(struct hinic3_hwdev *hwdev) +{ + struct hinic3_mbox *mbox = hwdev->mbox; + + destroy_workqueue(mbox->workq); + free_mbox_wb_status(mbox); + hinic3_uninit_func_mbox_msg_channel(hwdev); + uninit_mgmt_msg_channel(mbox); + kfree(mbox); +} + +#define MBOX_DMA_MSG_INIT_XOR_VAL 0x5a5a5a5a +#define MBOX_XOR_DATA_ALIGN 4 +static u32 mbox_dma_msg_xor(u32 *data, u32 msg_len) +{ + u32 xor = MBOX_DMA_MSG_INIT_XOR_VAL; + u32 dw_len = msg_len / sizeof(u32); + u32 i; + + for (i = 0; i < dw_len; i++) + xor ^= data[i]; + + return xor; +} + +#define MBOX_MQ_ID_MASK(mq, idx) ((idx) & ((mq)->depth - 1)) + +static bool is_msg_queue_full(struct mbox_dma_queue *mq) +{ + return MBOX_MQ_ID_MASK(mq, (mq)->prod_idx + 1) == + MBOX_MQ_ID_MASK(mq, (mq)->cons_idx); +} + +static int mbox_prepare_dma_entry(struct hinic3_mbox *mbox, + struct mbox_dma_queue *mq, + struct mbox_dma_msg *dma_msg, + const void *msg, u32 msg_len) +{ + u64 dma_addr, offset; + void *dma_vaddr; + + if (is_msg_queue_full(mq)) { + dev_err(mbox->hwdev->dev, "Mbox sync message queue is busy, pi: %u, ci: %u\n", + mq->prod_idx, MBOX_MQ_ID_MASK(mq, mq->cons_idx)); + return -EBUSY; + } + + /* copy data to DMA buffer */ + offset = mq->prod_idx * MBOX_MAX_BUF_SZ; + dma_vaddr = (u8 *)mq->dma_buf_vaddr + offset; + memcpy(dma_vaddr, msg, msg_len); + dma_addr = mq->dma_buf_paddr + offset; + dma_msg->dma_addr_high = cpu_to_le32(upper_32_bits(dma_addr)); + dma_msg->dma_addr_low = cpu_to_le32(lower_32_bits(dma_addr)); + dma_msg->msg_len = cpu_to_le32(msg_len); + /* The firmware obtains message based on 4B alignment. */ + dma_msg->xor = cpu_to_le32(mbox_dma_msg_xor(dma_vaddr, + ALIGN(msg_len, MBOX_XOR_DATA_ALIGN))); + mq->prod_idx++; + mq->prod_idx = MBOX_MQ_ID_MASK(mq, mq->prod_idx); + + return 0; +} + +static int mbox_prepare_dma_msg(struct hinic3_mbox *mbox, + enum mbox_msg_ack_type ack_type, + struct mbox_dma_msg *dma_msg, const void *msg, + u32 msg_len) +{ + struct mbox_dma_queue *mq; + u32 val; + + val = hinic3_hwif_read_reg(mbox->hwdev->hwif, MBOX_MQ_CI_OFFSET); + if (ack_type == MBOX_MSG_ACK) { + mq = &mbox->sync_msg_queue; + mq->cons_idx = MBOX_MQ_CI_GET(val, SYNC); + } else { + mq = &mbox->async_msg_queue; + mq->cons_idx = MBOX_MQ_CI_GET(val, ASYNC); + } + + return mbox_prepare_dma_entry(mbox, mq, dma_msg, msg, msg_len); +} + +static void clear_mbox_status(struct hinic3_send_mbox *mbox) +{ + __be64 *wb_status = mbox->wb_vaddr; + + *wb_status = 0; + /* clear mailbox write back status */ + wmb(); +} + +static void mbox_dword_write(const void *src, void __iomem *dst, u32 count) +{ + const __le32 *src32 = src; + u32 __iomem *dst32 = dst; + u32 i; + + /* Data written to mbox is arranged in structs with little endian fields + * but when written to HW every dword (32bits) should be swapped since + * the HW will swap it again. + */ + for (i = 0; i < count; i++) + __raw_writel(swab32((__force __u32)src32[i]), dst32 + i); +} + +static void mbox_copy_header(struct hinic3_hwdev *hwdev, + struct hinic3_send_mbox *mbox, __le64 *header) +{ + mbox_dword_write(header, mbox->data, MBOX_HEADER_SZ / sizeof(__le32)); +} + +static void mbox_copy_send_data(struct hinic3_hwdev *hwdev, + struct hinic3_send_mbox *mbox, void *seg, + u32 seg_len) +{ + u32 __iomem *dst = (u32 __iomem *)(mbox->data + MBOX_HEADER_SZ); + u32 count, leftover, last_dword; + const __le32 *src = seg; + + count = seg_len / sizeof(u32); + leftover = seg_len % sizeof(u32); + if (count > 0) + mbox_dword_write(src, dst, count); + + if (leftover > 0) { + last_dword = 0; + memcpy(&last_dword, src + count, leftover); + mbox_dword_write(&last_dword, dst + count, 1); + } +} + +static void write_mbox_msg_attr(struct hinic3_mbox *mbox, + u16 dst_func, u16 dst_aeqn, u32 seg_len) +{ + struct hinic3_hwif *hwif = mbox->hwdev->hwif; + u32 mbox_int, mbox_ctrl, tx_size; + + tx_size = ALIGN(seg_len + MBOX_HEADER_SZ, MBOX_SEG_LEN_ALIGN) >> 2; + + mbox_int = MBOX_INT_SET(dst_aeqn, DST_AEQN) | + MBOX_INT_SET(0, STAT_DMA) | + MBOX_INT_SET(tx_size, TX_SIZE) | + MBOX_INT_SET(0, STAT_DMA_SO_RO) | + MBOX_INT_SET(1, WB_EN); + + mbox_ctrl = MBOX_CTRL_SET(1, TX_STATUS) | + MBOX_CTRL_SET(0, TRIGGER_AEQE) | + MBOX_CTRL_SET(dst_func, DST_FUNC); + + hinic3_hwif_write_reg(hwif, HINIC3_FUNC_CSR_MAILBOX_INT_OFF, mbox_int); + hinic3_hwif_write_reg(hwif, HINIC3_FUNC_CSR_MAILBOX_CONTROL_OFF, + mbox_ctrl); +} + +static u16 get_mbox_status(const struct hinic3_send_mbox *mbox) +{ + __be64 *wb_status = mbox->wb_vaddr; + u64 wb_val; + + wb_val = be64_to_cpu(*wb_status); + /* verify reading before check */ + rmb(); + + return wb_val & MBOX_WB_STATUS_ERRCODE_MASK; +} + +static enum hinic3_wait_return check_mbox_wb_status(void *priv_data) +{ + struct hinic3_mbox *mbox = priv_data; + u16 wb_status; + + wb_status = get_mbox_status(&mbox->send_mbox); + + return MBOX_STATUS_FINISHED(wb_status) ? + HINIC3_WAIT_PROCESS_CPL : HINIC3_WAIT_PROCESS_WAITING; +} + +static int send_mbox_seg(struct hinic3_mbox *mbox, __le64 header, + u16 dst_func, void *seg, u32 seg_len, void *msg_info) +{ + struct hinic3_send_mbox *send_mbox = &mbox->send_mbox; + struct hinic3_hwdev *hwdev = mbox->hwdev; + u8 num_aeqs = hwdev->hwif->attr.num_aeqs; + enum mbox_msg_direction_type dir; + u16 dst_aeqn, wb_status, errcode; + int err; + + /* mbox to mgmt cpu, hardware doesn't care about dst aeq id */ + if (num_aeqs > MBOX_MSG_AEQ_FOR_MBOX) { + dir = MBOX_MSG_HEADER_GET(header, DIRECTION); + dst_aeqn = (dir == MBOX_MSG_SEND) ? + MBOX_MSG_AEQ_FOR_EVENT : MBOX_MSG_AEQ_FOR_MBOX; + } else { + dst_aeqn = 0; + } + + clear_mbox_status(send_mbox); + mbox_copy_header(hwdev, send_mbox, &header); + mbox_copy_send_data(hwdev, send_mbox, seg, seg_len); + write_mbox_msg_attr(mbox, dst_func, dst_aeqn, seg_len); + + err = hinic3_wait_for_timeout(mbox, check_mbox_wb_status, + MBOX_MSG_POLLING_TIMEOUT_MS, + USEC_PER_MSEC); + wb_status = get_mbox_status(send_mbox); + if (err) { + dev_err(hwdev->dev, "Send mailbox segment timeout, wb status: 0x%x\n", + wb_status); + return err; + } + + if (!MBOX_STATUS_SUCCESS(wb_status)) { + dev_err(hwdev->dev, + "Send mailbox segment to function %u error, wb status: 0x%x\n", + dst_func, wb_status); + errcode = MBOX_STATUS_ERRCODE(wb_status); + return errcode ? errcode : -EFAULT; + } + + return 0; +} + +static int send_mbox_msg(struct hinic3_mbox *mbox, u8 mod, u16 cmd, + const void *msg, u32 msg_len, u16 dst_func, + enum mbox_msg_direction_type direction, + enum mbox_msg_ack_type ack_type, + struct mbox_msg_info *msg_info) +{ + enum mbox_msg_data_type data_type = MBOX_MSG_DATA_INLINE; + struct hinic3_hwdev *hwdev = mbox->hwdev; + struct mbox_dma_msg dma_msg; + u32 seg_len = MBOX_SEG_LEN; + __le64 header = 0; + u32 seq_id = 0; + u16 rsp_aeq_id; + u8 *msg_seg; + int err = 0; + u32 left; + + if (hwdev->hwif->attr.num_aeqs > MBOX_MSG_AEQ_FOR_MBOX) + rsp_aeq_id = MBOX_MSG_AEQ_FOR_MBOX; + else + rsp_aeq_id = 0; + + if (dst_func == MBOX_MGMT_FUNC_ID && + !(hwdev->features[0] & MBOX_COMM_F_MBOX_SEGMENT)) { + err = mbox_prepare_dma_msg(mbox, ack_type, &dma_msg, + msg, msg_len); + if (err) + goto err_send; + + msg = &dma_msg; + msg_len = sizeof(dma_msg); + data_type = MBOX_MSG_DATA_DMA; + } + + msg_seg = (u8 *)msg; + left = msg_len; + + header = cpu_to_le64(MBOX_MSG_HEADER_SET(msg_len, MSG_LEN) | + MBOX_MSG_HEADER_SET(mod, MODULE) | + MBOX_MSG_HEADER_SET(seg_len, SEG_LEN) | + MBOX_MSG_HEADER_SET(ack_type, NO_ACK) | + MBOX_MSG_HEADER_SET(data_type, DATA_TYPE) | + MBOX_MSG_HEADER_SET(MBOX_SEQ_ID_START_VAL, SEQID) | + MBOX_MSG_HEADER_SET(direction, DIRECTION) | + MBOX_MSG_HEADER_SET(cmd, CMD) | + MBOX_MSG_HEADER_SET(msg_info->msg_id, MSG_ID) | + MBOX_MSG_HEADER_SET(rsp_aeq_id, AEQ_ID) | + MBOX_MSG_HEADER_SET(MBOX_MSG_FROM_MBOX, SOURCE) | + MBOX_MSG_HEADER_SET(!!msg_info->status, STATUS)); + + while (!(MBOX_MSG_HEADER_GET(header, LAST))) { + if (left <= MBOX_SEG_LEN) { + header &= cpu_to_le64(~MBOX_MSG_HEADER_SEG_LEN_MASK); + header |= + cpu_to_le64(MBOX_MSG_HEADER_SET(left, SEG_LEN) | + MBOX_MSG_HEADER_SET(1, LAST)); + seg_len = left; + } + + err = send_mbox_seg(mbox, header, dst_func, msg_seg, + seg_len, msg_info); + if (err) { + dev_err(hwdev->dev, "Failed to send mbox seg, seq_id=0x%llx\n", + MBOX_MSG_HEADER_GET(header, SEQID)); + goto err_send; + } + + left -= MBOX_SEG_LEN; + msg_seg += MBOX_SEG_LEN; + seq_id++; + header &= cpu_to_le64(~MBOX_MSG_HEADER_SEG_LEN_MASK); + header |= cpu_to_le64(MBOX_MSG_HEADER_SET(seq_id, SEQID)); + } + +err_send: + return err; +} + +static void set_mbox_to_func_event(struct hinic3_mbox *mbox, + enum mbox_event_state event_flag) +{ + spin_lock(&mbox->mbox_lock); + mbox->event_flag = event_flag; + spin_unlock(&mbox->mbox_lock); +} + +static enum hinic3_wait_return check_mbox_msg_finish(void *priv_data) +{ + struct hinic3_mbox *mbox = priv_data; + + return (mbox->event_flag == MBOX_EVENT_SUCCESS) ? + HINIC3_WAIT_PROCESS_CPL : HINIC3_WAIT_PROCESS_WAITING; +} + +static int wait_mbox_msg_completion(struct hinic3_mbox *mbox, + u32 timeout) +{ + u32 wait_time; + int err; + + wait_time = (timeout != 0) ? timeout : MBOX_COMP_POLLING_TIMEOUT_MS; + err = hinic3_wait_for_timeout(mbox, check_mbox_msg_finish, + wait_time, USEC_PER_MSEC); + if (err) { + set_mbox_to_func_event(mbox, MBOX_EVENT_TIMEOUT); + return err; + } + set_mbox_to_func_event(mbox, MBOX_EVENT_END); + + return 0; +} + int hinic3_send_mbox_to_mgmt(struct hinic3_hwdev *hwdev, u8 mod, u16 cmd, const struct mgmt_msg_params *msg_params) { - /* Completed by later submission due to LoC limit. */ - return -EFAULT; + struct hinic3_mbox *mbox = hwdev->mbox; + struct mbox_msg_info msg_info = {}; + struct hinic3_msg_desc *msg_desc; + u32 msg_len; + int err; + + /* expect response message */ + msg_desc = get_mbox_msg_desc(mbox, MBOX_MSG_RESP, MBOX_MGMT_FUNC_ID); + mutex_lock(&mbox->mbox_send_lock); + msg_info.msg_id = (mbox->send_msg_id + 1) & 0xF; + mbox->send_msg_id = msg_info.msg_id; + set_mbox_to_func_event(mbox, MBOX_EVENT_START); + + err = send_mbox_msg(mbox, mod, cmd, msg_params->buf_in, + msg_params->in_size, MBOX_MGMT_FUNC_ID, + MBOX_MSG_SEND, MBOX_MSG_ACK, &msg_info); + if (err) { + dev_err(hwdev->dev, "Send mailbox mod %u, cmd %u failed, msg_id: %u, err: %d\n", + mod, cmd, msg_info.msg_id, err); + set_mbox_to_func_event(mbox, MBOX_EVENT_FAIL); + goto err_send; + } + + if (wait_mbox_msg_completion(mbox, msg_params->timeout_ms)) { + dev_err(hwdev->dev, + "Send mbox msg timeout, msg_id: %u\n", msg_info.msg_id); + err = -ETIMEDOUT; + goto err_send; + } + + if (mod != msg_desc->mod || cmd != le16_to_cpu(msg_desc->cmd)) { + dev_err(hwdev->dev, + "Invalid response mbox message, mod: 0x%x, cmd: 0x%x, expect mod: 0x%x, cmd: 0x%x\n", + msg_desc->mod, msg_desc->cmd, mod, cmd); + err = -EFAULT; + goto err_send; + } + + if (msg_desc->msg_info.status) { + err = msg_desc->msg_info.status; + goto err_send; + } + + if (msg_params->buf_out) { + msg_len = le16_to_cpu(msg_desc->msg_len); + if (msg_len != msg_params->expected_out_size) { + dev_err(hwdev->dev, + "Invalid response mbox message length: %u for mod %d cmd %u, expected length: %u\n", + msg_desc->msg_len, mod, cmd, + msg_params->expected_out_size); + err = -EFAULT; + goto err_send; + } + + memcpy(msg_params->buf_out, msg_desc->msg, msg_len); + } + +err_send: + mutex_unlock(&mbox->mbox_send_lock); + + return err; +} + +int hinic3_send_mbox_to_mgmt_no_ack(struct hinic3_hwdev *hwdev, u8 mod, u16 cmd, + const struct mgmt_msg_params *msg_params) +{ + struct hinic3_mbox *mbox = hwdev->mbox; + struct mbox_msg_info msg_info = {}; + int err; + + mutex_lock(&mbox->mbox_send_lock); + err = send_mbox_msg(mbox, mod, cmd, msg_params->buf_in, + msg_params->in_size, MBOX_MGMT_FUNC_ID, + MBOX_MSG_SEND, MBOX_MSG_NO_ACK, &msg_info); + if (err) + dev_err(hwdev->dev, "Send mailbox no ack failed\n"); + + mutex_unlock(&mbox->mbox_send_lock); + + return err; } diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_mbox.h b/drivers/net/ethernet/huawei/hinic3/hinic3_mbox.h index d7a6c37b7eff..e71629e95086 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_mbox.h +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_mbox.h @@ -8,8 +8,134 @@ #include <linux/mutex.h> struct hinic3_hwdev; +struct mgmt_msg_params; + +#define MBOX_MSG_HEADER_SRC_GLB_FUNC_IDX_MASK GENMASK_ULL(12, 0) +#define MBOX_MSG_HEADER_STATUS_MASK BIT_ULL(13) +#define MBOX_MSG_HEADER_SOURCE_MASK BIT_ULL(15) +#define MBOX_MSG_HEADER_AEQ_ID_MASK GENMASK_ULL(17, 16) +#define MBOX_MSG_HEADER_MSG_ID_MASK GENMASK_ULL(21, 18) +#define MBOX_MSG_HEADER_CMD_MASK GENMASK_ULL(31, 22) +#define MBOX_MSG_HEADER_MSG_LEN_MASK GENMASK_ULL(42, 32) +#define MBOX_MSG_HEADER_MODULE_MASK GENMASK_ULL(47, 43) +#define MBOX_MSG_HEADER_SEG_LEN_MASK GENMASK_ULL(53, 48) +#define MBOX_MSG_HEADER_NO_ACK_MASK BIT_ULL(54) +#define MBOX_MSG_HEADER_DATA_TYPE_MASK BIT_ULL(55) +#define MBOX_MSG_HEADER_SEQID_MASK GENMASK_ULL(61, 56) +#define MBOX_MSG_HEADER_LAST_MASK BIT_ULL(62) +#define MBOX_MSG_HEADER_DIRECTION_MASK BIT_ULL(63) + +#define MBOX_MSG_HEADER_SET(val, member) \ + FIELD_PREP(MBOX_MSG_HEADER_##member##_MASK, val) +#define MBOX_MSG_HEADER_GET(val, member) \ + FIELD_GET(MBOX_MSG_HEADER_##member##_MASK, le64_to_cpu(val)) + +/* identifies if a segment belongs to a message or to a response. A VF is only + * expected to send messages and receive responses. PF driver could receive + * messages and send responses. + */ +enum mbox_msg_direction_type { + MBOX_MSG_SEND = 0, + MBOX_MSG_RESP = 1, +}; + +/* Indicates if mbox message expects a response (ack) or not */ +enum mbox_msg_ack_type { + MBOX_MSG_ACK = 0, + MBOX_MSG_NO_ACK = 1, +}; + +enum mbox_msg_data_type { + MBOX_MSG_DATA_INLINE = 0, + MBOX_MSG_DATA_DMA = 1, +}; + +enum mbox_msg_src_type { + MBOX_MSG_FROM_MBOX = 1, +}; + +enum mbox_msg_aeq_type { + MBOX_MSG_AEQ_FOR_EVENT = 0, + MBOX_MSG_AEQ_FOR_MBOX = 1, +}; + +#define HINIC3_MBOX_WQ_NAME "hinic3_mbox" + +struct mbox_msg_info { + u8 msg_id; + u8 status; +}; + +struct hinic3_msg_desc { + u8 *msg; + __le16 msg_len; + u8 seq_id; + u8 mod; + __le16 cmd; + struct mbox_msg_info msg_info; +}; + +struct hinic3_msg_channel { + struct hinic3_msg_desc resp_msg; + struct hinic3_msg_desc recv_msg; +}; + +struct hinic3_send_mbox { + u8 __iomem *data; + void *wb_vaddr; + dma_addr_t wb_paddr; +}; + +enum mbox_event_state { + MBOX_EVENT_START = 0, + MBOX_EVENT_FAIL = 1, + MBOX_EVENT_SUCCESS = 2, + MBOX_EVENT_TIMEOUT = 3, + MBOX_EVENT_END = 4, +}; + +struct mbox_dma_msg { + __le32 xor; + __le32 dma_addr_high; + __le32 dma_addr_low; + __le32 msg_len; + __le64 rsvd; +}; + +struct mbox_dma_queue { + void *dma_buf_vaddr; + dma_addr_t dma_buf_paddr; + u16 depth; + u16 prod_idx; + u16 cons_idx; +}; + +struct hinic3_mbox { + struct hinic3_hwdev *hwdev; + /* lock for send mbox message and ack message */ + struct mutex mbox_send_lock; + struct hinic3_send_mbox send_mbox; + struct mbox_dma_queue sync_msg_queue; + struct mbox_dma_queue async_msg_queue; + struct workqueue_struct *workq; + /* driver and MGMT CPU */ + struct hinic3_msg_channel mgmt_msg; + /* VF to PF */ + struct hinic3_msg_channel *func_msg; + u8 send_msg_id; + enum mbox_event_state event_flag; + /* lock for mbox event flag */ + spinlock_t mbox_lock; +}; + +void hinic3_mbox_func_aeqe_handler(struct hinic3_hwdev *hwdev, u8 *header, + u8 size); +int hinic3_init_mbox(struct hinic3_hwdev *hwdev); +void hinic3_free_mbox(struct hinic3_hwdev *hwdev); int hinic3_send_mbox_to_mgmt(struct hinic3_hwdev *hwdev, u8 mod, u16 cmd, const struct mgmt_msg_params *msg_params); +int hinic3_send_mbox_to_mgmt_no_ack(struct hinic3_hwdev *hwdev, u8 mod, u16 cmd, + const struct mgmt_msg_params *msg_params); #endif diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_nic_dev.h b/drivers/net/ethernet/huawei/hinic3/hinic3_nic_dev.h index c994fc9b6ee0..9fad834f9e92 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_nic_dev.h +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_nic_dev.h @@ -51,6 +51,12 @@ struct hinic3_dyna_txrxq_params { struct hinic3_irq_cfg *irq_cfg; }; +struct hinic3_intr_coal_info { + u8 pending_limit; + u8 coalesce_timer_cfg; + u8 resend_timer_cfg; +}; + struct hinic3_nic_dev { struct pci_dev *pdev; struct net_device *netdev; @@ -70,13 +76,13 @@ struct hinic3_nic_dev { u16 num_qp_irq; struct msix_entry *qps_msix_entries; + struct hinic3_intr_coal_info *intr_coalesce; + bool link_status_up; }; void hinic3_set_netdev_ops(struct net_device *netdev); - -/* Temporary prototypes. Functions become static in later submission. */ -void qp_add_napi(struct hinic3_irq_cfg *irq_cfg); -void qp_del_napi(struct hinic3_irq_cfg *irq_cfg); +int hinic3_qps_irq_init(struct net_device *netdev); +void hinic3_qps_irq_uninit(struct net_device *netdev); #endif diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_tx.c b/drivers/net/ethernet/huawei/hinic3/hinic3_tx.c index 3f7f73430be4..f1c745ee3087 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_tx.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_tx.c @@ -55,9 +55,9 @@ void hinic3_free_txqs(struct net_device *netdev) static void hinic3_set_buf_desc(struct hinic3_sq_bufdesc *buf_descs, dma_addr_t addr, u32 len) { - buf_descs->hi_addr = upper_32_bits(addr); - buf_descs->lo_addr = lower_32_bits(addr); - buf_descs->len = len; + buf_descs->hi_addr = cpu_to_le32(upper_32_bits(addr)); + buf_descs->lo_addr = cpu_to_le32(lower_32_bits(addr)); + buf_descs->len = cpu_to_le32(len); } static int hinic3_tx_map_skb(struct net_device *netdev, struct sk_buff *skb, diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_wq.c b/drivers/net/ethernet/huawei/hinic3/hinic3_wq.c index 2ac7efcd1365..bc3ffdc25cf6 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_wq.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_wq.c @@ -6,6 +6,110 @@ #include "hinic3_hwdev.h" #include "hinic3_wq.h" +#define WQ_MIN_DEPTH 64 +#define WQ_MAX_DEPTH 65536 +#define WQ_PAGE_ADDR_SIZE sizeof(u64) +#define WQ_MAX_NUM_PAGES (HINIC3_MIN_PAGE_SIZE / WQ_PAGE_ADDR_SIZE) + +static int wq_init_wq_block(struct hinic3_hwdev *hwdev, struct hinic3_wq *wq) +{ + struct hinic3_queue_pages *qpages = &wq->qpages; + int i; + + if (hinic3_wq_is_0_level_cla(wq)) { + wq->wq_block_paddr = qpages->pages[0].align_paddr; + wq->wq_block_vaddr = qpages->pages[0].align_vaddr; + + return 0; + } + + if (wq->qpages.num_pages > WQ_MAX_NUM_PAGES) { + dev_err(hwdev->dev, "wq num_pages exceed limit: %lu\n", + WQ_MAX_NUM_PAGES); + return -EFAULT; + } + + wq->wq_block_vaddr = dma_alloc_coherent(hwdev->dev, + HINIC3_MIN_PAGE_SIZE, + &wq->wq_block_paddr, + GFP_KERNEL); + if (!wq->wq_block_vaddr) + return -ENOMEM; + + for (i = 0; i < qpages->num_pages; i++) + wq->wq_block_vaddr[i] = cpu_to_be64(qpages->pages[i].align_paddr); + + return 0; +} + +static int wq_alloc_pages(struct hinic3_hwdev *hwdev, struct hinic3_wq *wq) +{ + int err; + + err = hinic3_queue_pages_alloc(hwdev, &wq->qpages, 0); + if (err) + return err; + + err = wq_init_wq_block(hwdev, wq); + if (err) { + hinic3_queue_pages_free(hwdev, &wq->qpages); + return err; + } + + return 0; +} + +static void wq_free_pages(struct hinic3_hwdev *hwdev, struct hinic3_wq *wq) +{ + if (!hinic3_wq_is_0_level_cla(wq)) + dma_free_coherent(hwdev->dev, + HINIC3_MIN_PAGE_SIZE, + wq->wq_block_vaddr, + wq->wq_block_paddr); + + hinic3_queue_pages_free(hwdev, &wq->qpages); +} + +int hinic3_wq_create(struct hinic3_hwdev *hwdev, struct hinic3_wq *wq, + u32 q_depth, u16 wqebb_size) +{ + u32 wq_page_size; + + if (q_depth < WQ_MIN_DEPTH || q_depth > WQ_MAX_DEPTH || + !is_power_of_2(q_depth) || !is_power_of_2(wqebb_size)) { + dev_err(hwdev->dev, "Invalid WQ: q_depth %u, wqebb_size %u\n", + q_depth, wqebb_size); + return -EINVAL; + } + + wq_page_size = ALIGN(hwdev->wq_page_size, HINIC3_MIN_PAGE_SIZE); + + memset(wq, 0, sizeof(*wq)); + wq->q_depth = q_depth; + wq->idx_mask = q_depth - 1; + + hinic3_queue_pages_init(&wq->qpages, q_depth, wq_page_size, wqebb_size); + + return wq_alloc_pages(hwdev, wq); +} + +void hinic3_wq_destroy(struct hinic3_hwdev *hwdev, struct hinic3_wq *wq) +{ + wq_free_pages(hwdev, wq); +} + +void hinic3_wq_reset(struct hinic3_wq *wq) +{ + struct hinic3_queue_pages *qpages = &wq->qpages; + u16 pg_idx; + + wq->cons_idx = 0; + wq->prod_idx = 0; + + for (pg_idx = 0; pg_idx < qpages->num_pages; pg_idx++) + memset(qpages->pages[pg_idx].align_vaddr, 0, qpages->page_size); +} + void hinic3_wq_get_multi_wqebbs(struct hinic3_wq *wq, u16 num_wqebbs, u16 *prod_idx, struct hinic3_sq_bufdesc **first_part_wqebbs, @@ -27,3 +131,8 @@ void hinic3_wq_get_multi_wqebbs(struct hinic3_wq *wq, *second_part_wqebbs = get_q_element(&wq->qpages, idx, NULL); } } + +bool hinic3_wq_is_0_level_cla(const struct hinic3_wq *wq) +{ + return wq->qpages.num_pages == 1; +} diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_wq.h b/drivers/net/ethernet/huawei/hinic3/hinic3_wq.h index ab37893efd7e..9b3f012bec80 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_wq.h +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_wq.h @@ -10,10 +10,10 @@ struct hinic3_sq_bufdesc { /* 31-bits Length, L2NIC only uses length[17:0] */ - u32 len; - u32 rsvd; - u32 hi_addr; - u32 lo_addr; + __le32 len; + __le32 rsvd; + __le32 hi_addr; + __le32 lo_addr; }; /* Work queue is used to submit elements (tx, rx, cmd) to hw. @@ -59,6 +59,7 @@ static inline void *hinic3_wq_get_one_wqebb(struct hinic3_wq *wq, u16 *pi) { *pi = wq->prod_idx & wq->idx_mask; wq->prod_idx++; + return get_q_element(&wq->qpages, *pi, NULL); } @@ -67,10 +68,20 @@ static inline void hinic3_wq_put_wqebbs(struct hinic3_wq *wq, u16 num_wqebbs) wq->cons_idx += num_wqebbs; } +static inline u64 hinic3_wq_get_first_wqe_page_addr(const struct hinic3_wq *wq) +{ + return wq->qpages.pages[0].align_paddr; +} + +int hinic3_wq_create(struct hinic3_hwdev *hwdev, struct hinic3_wq *wq, + u32 q_depth, u16 wqebb_size); +void hinic3_wq_destroy(struct hinic3_hwdev *hwdev, struct hinic3_wq *wq); +void hinic3_wq_reset(struct hinic3_wq *wq); void hinic3_wq_get_multi_wqebbs(struct hinic3_wq *wq, u16 num_wqebbs, u16 *prod_idx, struct hinic3_sq_bufdesc **first_part_wqebbs, struct hinic3_sq_bufdesc **second_part_wqebbs, u16 *first_part_wqebbs_num); +bool hinic3_wq_is_0_level_cla(const struct hinic3_wq *wq); #endif diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index eec971567aac..3808148c1fc7 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -756,6 +756,17 @@ static void deactivate_rx_pools(struct ibmvnic_adapter *adapter) adapter->rx_pool[i].active = 0; } +static void ibmvnic_set_safe_max_ind_descs(struct ibmvnic_adapter *adapter) +{ + if (adapter->cur_max_ind_descs > IBMVNIC_SAFE_IND_DESC) { + netdev_info(adapter->netdev, + "set max ind descs from %u to safe limit %u\n", + adapter->cur_max_ind_descs, + IBMVNIC_SAFE_IND_DESC); + adapter->cur_max_ind_descs = IBMVNIC_SAFE_IND_DESC; + } +} + static void replenish_rx_pool(struct ibmvnic_adapter *adapter, struct ibmvnic_rx_pool *pool) { @@ -843,7 +854,7 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter, sub_crq->rx_add.len = cpu_to_be32(pool->buff_size << shift); /* if send_subcrq_indirect queue is full, flush to VIOS */ - if (ind_bufp->index == IBMVNIC_MAX_IND_DESCS || + if (ind_bufp->index == adapter->cur_max_ind_descs || i == count - 1) { lpar_rc = send_subcrq_indirect(adapter, handle, @@ -862,6 +873,14 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter, failure: if (lpar_rc != H_PARAMETER && lpar_rc != H_CLOSED) dev_err_ratelimited(dev, "rx: replenish packet buffer failed\n"); + + /* Detect platform limit H_PARAMETER */ + if (lpar_rc == H_PARAMETER) + ibmvnic_set_safe_max_ind_descs(adapter); + + /* For all error case, temporarily drop only this batch + * Rely on TCP/IP retransmissions to retry and recover + */ for (i = ind_bufp->index - 1; i >= 0; --i) { struct ibmvnic_rx_buff *rx_buff; @@ -2381,16 +2400,28 @@ static int ibmvnic_tx_scrq_flush(struct ibmvnic_adapter *adapter, rc = send_subcrq_direct(adapter, handle, (u64 *)ind_bufp->indir_arr); - if (rc) + if (rc) { + dev_err_ratelimited(&adapter->vdev->dev, + "tx_flush failed, rc=%u (%llu entries dma=%pad handle=%llx)\n", + rc, entries, &dma_addr, handle); + /* Detect platform limit H_PARAMETER */ + if (rc == H_PARAMETER) + ibmvnic_set_safe_max_ind_descs(adapter); + + /* For all error case, temporarily drop only this batch + * Rely on TCP/IP retransmissions to retry and recover + */ ibmvnic_tx_scrq_clean_buffer(adapter, tx_scrq); - else + } else { ind_bufp->index = 0; + } return rc; } static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) { struct ibmvnic_adapter *adapter = netdev_priv(netdev); + u32 cur_max_ind_descs = adapter->cur_max_ind_descs; int queue_num = skb_get_queue_mapping(skb); u8 *hdrs = (u8 *)&adapter->tx_rx_desc_req; struct device *dev = &adapter->vdev->dev; @@ -2590,7 +2621,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) tx_crq.v1.n_crq_elem = num_entries; tx_buff->num_entries = num_entries; /* flush buffer if current entry can not fit */ - if (num_entries + ind_bufp->index > IBMVNIC_MAX_IND_DESCS) { + if (num_entries + ind_bufp->index > cur_max_ind_descs) { lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, true); if (lpar_rc != H_SUCCESS) goto tx_flush_err; @@ -2603,7 +2634,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) ind_bufp->index += num_entries; if (__netdev_tx_sent_queue(txq, skb->len, netdev_xmit_more() && - ind_bufp->index < IBMVNIC_MAX_IND_DESCS)) { + ind_bufp->index < cur_max_ind_descs)) { lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, true); if (lpar_rc != H_SUCCESS) goto tx_err; @@ -4006,7 +4037,7 @@ static void release_sub_crq_queue(struct ibmvnic_adapter *adapter, } dma_free_coherent(dev, - IBMVNIC_IND_ARR_SZ, + IBMVNIC_IND_MAX_ARR_SZ, scrq->ind_buf.indir_arr, scrq->ind_buf.indir_dma); @@ -4063,7 +4094,7 @@ static struct ibmvnic_sub_crq_queue *init_sub_crq_queue(struct ibmvnic_adapter scrq->ind_buf.indir_arr = dma_alloc_coherent(dev, - IBMVNIC_IND_ARR_SZ, + IBMVNIC_IND_MAX_ARR_SZ, &scrq->ind_buf.indir_dma, GFP_KERNEL); @@ -6369,6 +6400,19 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter, bool reset) rc = reset_sub_crq_queues(adapter); } } else { + if (adapter->reset_reason == VNIC_RESET_MOBILITY) { + /* After an LPM, reset the max number of indirect + * subcrq descriptors per H_SEND_SUB_CRQ_INDIRECT + * hcall to the default max (e.g POWER8 -> POWER10) + * + * If the new destination platform does not support + * the higher limit max (e.g. POWER10-> POWER8 LPM) + * H_PARAMETER will trigger automatic fallback to the + * safe minimum limit. + */ + adapter->cur_max_ind_descs = IBMVNIC_MAX_IND_DESCS; + } + rc = init_sub_crqs(adapter); } @@ -6520,6 +6564,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) adapter->wait_for_reset = false; adapter->last_reset_time = jiffies; + adapter->cur_max_ind_descs = IBMVNIC_MAX_IND_DESCS; rc = register_netdev(netdev); if (rc) { diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index 246ddce753f9..480dc587078f 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -29,8 +29,9 @@ #define IBMVNIC_BUFFS_PER_POOL 100 #define IBMVNIC_MAX_QUEUES 16 #define IBMVNIC_MAX_QUEUE_SZ 4096 -#define IBMVNIC_MAX_IND_DESCS 16 -#define IBMVNIC_IND_ARR_SZ (IBMVNIC_MAX_IND_DESCS * 32) +#define IBMVNIC_MAX_IND_DESCS 128 +#define IBMVNIC_SAFE_IND_DESC 16 +#define IBMVNIC_IND_MAX_ARR_SZ (IBMVNIC_MAX_IND_DESCS * 32) #define IBMVNIC_TSO_BUF_SZ 65536 #define IBMVNIC_TSO_BUFS 64 @@ -930,6 +931,7 @@ struct ibmvnic_adapter { struct ibmvnic_control_ip_offload_buffer ip_offload_ctrl; dma_addr_t ip_offload_ctrl_tok; u32 msg_enable; + u32 cur_max_ind_descs; /* Vital Product Data (VPD) */ struct ibmvnic_vpd *vpd; diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c index 1954a04460d1..bf2029144c1d 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c @@ -560,7 +560,7 @@ static int fm10k_set_ringparam(struct net_device *netdev, /* allocate temporary buffer to store rings in */ i = max_t(int, interface->num_tx_queues, interface->num_rx_queues); - temp_ring = vmalloc(array_size(i, sizeof(struct fm10k_ring))); + temp_ring = vmalloc_array(i, sizeof(struct fm10k_ring)); if (!temp_ring) { err = -ENOMEM; diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile index d0f9c9492363..eac45d7c0cf1 100644 --- a/drivers/net/ethernet/intel/ice/Makefile +++ b/drivers/net/ethernet/intel/ice/Makefile @@ -47,9 +47,11 @@ ice-y := ice_main.o \ ice_adapter.o ice-$(CONFIG_PCI_IOV) += \ ice_sriov.o \ - ice_virtchnl.o \ - ice_virtchnl_allowlist.o \ - ice_virtchnl_fdir.o \ + virt/allowlist.o \ + virt/fdir.o \ + virt/queues.o \ + virt/virtchnl.o \ + virt/rss.o \ ice_vf_mbx.o \ ice_vf_vsi_vlan_ops.o \ ice_vf_lib.o diff --git a/drivers/net/ethernet/intel/ice/devlink/health.c b/drivers/net/ethernet/intel/ice/devlink/health.c index ab519c0f28bf..8e9a8a8178d4 100644 --- a/drivers/net/ethernet/intel/ice/devlink/health.c +++ b/drivers/net/ethernet/intel/ice/devlink/health.c @@ -450,9 +450,8 @@ ice_init_devlink_rep(struct ice_pf *pf, { struct devlink *devlink = priv_to_devlink(pf); struct devlink_health_reporter *rep; - const u64 graceful_period = 0; - rep = devl_health_reporter_create(devlink, ops, graceful_period, pf); + rep = devl_health_reporter_create(devlink, ops, pf); if (IS_ERR(rep)) { struct device *dev = ice_pf_to_dev(pf); diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 8a8a01a4bb40..e952d67388bf 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -203,6 +203,7 @@ enum ice_feature { ICE_F_GCS, ICE_F_ROCE_LAG, ICE_F_SRIOV_LAG, + ICE_F_SRIOV_AA_LAG, ICE_F_MBX_LIMIT, ICE_F_MAX }; diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 3bd3ea3af888..caae1780fd37 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -2060,6 +2060,10 @@ struct ice_aqc_cfg_txqs { #define ICE_AQC_Q_CFG_SRC_PRT_M 0x7 #define ICE_AQC_Q_CFG_DST_PRT_S 3 #define ICE_AQC_Q_CFG_DST_PRT_M (0x7 << ICE_AQC_Q_CFG_DST_PRT_S) +#define ICE_AQC_Q_CFG_MODE_M GENMASK(7, 6) +#define ICE_AQC_Q_CFG_MODE_SAME_PF 0x0 +#define ICE_AQC_Q_CFG_MODE_GIVE_OWN 0x1 +#define ICE_AQC_Q_CFG_MODE_KEEP_OWN 0x2 u8 time_out; #define ICE_AQC_Q_CFG_TIMEOUT_S 2 #define ICE_AQC_Q_CFG_TIMEOUT_M (0x1F << ICE_AQC_Q_CFG_TIMEOUT_S) diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 003d60a4db21..808870539667 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -2418,12 +2418,15 @@ ice_parse_common_caps(struct ice_hw *hw, struct ice_hw_common_caps *caps, caps->reset_restrict_support); break; case LIBIE_AQC_CAPS_FW_LAG_SUPPORT: - caps->roce_lag = !!(number & LIBIE_AQC_BIT_ROCEV2_LAG); + caps->roce_lag = number & LIBIE_AQC_BIT_ROCEV2_LAG; ice_debug(hw, ICE_DBG_INIT, "%s: roce_lag = %u\n", prefix, caps->roce_lag); - caps->sriov_lag = !!(number & LIBIE_AQC_BIT_SRIOV_LAG); + caps->sriov_lag = number & LIBIE_AQC_BIT_SRIOV_LAG; ice_debug(hw, ICE_DBG_INIT, "%s: sriov_lag = %u\n", prefix, caps->sriov_lag); + caps->sriov_aa_lag = number & LIBIE_AQC_BIT_SRIOV_AA_LAG; + ice_debug(hw, ICE_DBG_INIT, "%s: sriov_aa_lag = %u\n", + prefix, caps->sriov_aa_lag); break; case LIBIE_AQC_CAPS_TX_SCHED_TOPO_COMP_MODE: caps->tx_sched_topo_comp_mode_en = (number == 1); @@ -4712,24 +4715,24 @@ do_aq: } /** - * ice_aq_cfg_lan_txq + * ice_aq_cfg_lan_txq - send AQ command 0x0C32 to FW * @hw: pointer to the hardware structure * @buf: buffer for command * @buf_size: size of buffer in bytes * @num_qs: number of queues being configured * @oldport: origination lport * @newport: destination lport + * @mode: cmd_type for move to use * @cd: pointer to command details structure or NULL * * Move/Configure LAN Tx queue (0x0C32) * - * There is a better AQ command to use for moving nodes, so only coding - * this one for configuring the node. + * Return: Zero on success, associated error code on failure. */ int ice_aq_cfg_lan_txq(struct ice_hw *hw, struct ice_aqc_cfg_txqs_buf *buf, u16 buf_size, u16 num_qs, u8 oldport, u8 newport, - struct ice_sq_cd *cd) + u8 mode, struct ice_sq_cd *cd) { struct ice_aqc_cfg_txqs *cmd; struct libie_aq_desc desc; @@ -4742,10 +4745,12 @@ ice_aq_cfg_lan_txq(struct ice_hw *hw, struct ice_aqc_cfg_txqs_buf *buf, if (!buf) return -EINVAL; - cmd->cmd_type = ICE_AQC_Q_CFG_TC_CHNG; + cmd->cmd_type = mode; cmd->num_qs = num_qs; cmd->port_num_chng = (oldport & ICE_AQC_Q_CFG_SRC_PRT_M); cmd->port_num_chng |= FIELD_PREP(ICE_AQC_Q_CFG_DST_PRT_M, newport); + cmd->port_num_chng |= FIELD_PREP(ICE_AQC_Q_CFG_MODE_M, + ICE_AQC_Q_CFG_MODE_KEEP_OWN); cmd->time_out = FIELD_PREP(ICE_AQC_Q_CFG_TIMEOUT_M, 5); cmd->blocked_cgds = 0; diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index 60320cdf7804..dba15ad315a6 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -270,7 +270,7 @@ ice_ena_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 q_handle, int ice_aq_cfg_lan_txq(struct ice_hw *hw, struct ice_aqc_cfg_txqs_buf *buf, u16 buf_size, u16 num_qs, u8 oldport, u8 newport, - struct ice_sq_cd *cd); + u8 mode, struct ice_sq_cd *cd); int ice_replay_vsi(struct ice_hw *hw, u16 vsi_handle); void ice_replay_post(struct ice_hw *hw); struct ice_q_ctx * diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c index b1129da72139..80312e1dcf7f 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.c +++ b/drivers/net/ethernet/intel/ice/ice_lag.c @@ -10,12 +10,17 @@ #define ICE_LAG_RES_SHARED BIT(14) #define ICE_LAG_RES_VALID BIT(15) -#define LACP_TRAIN_PKT_LEN 16 -static const u8 lacp_train_pkt[LACP_TRAIN_PKT_LEN] = { 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0x88, 0x09, 0, 0 }; +#define ICE_TRAIN_PKT_LEN 16 +static const u8 lacp_train_pkt[ICE_TRAIN_PKT_LEN] = { 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0x88, 0x09, 0, 0 }; +static const u8 act_act_train_pkt[ICE_TRAIN_PKT_LEN] = { 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0 }; #define ICE_RECIPE_LEN 64 +#define ICE_LAG_SRIOV_CP_RECIPE 10 + static const u8 ice_dflt_vsi_rcp[ICE_RECIPE_LEN] = { 0x05, 0, 0, 0, 0x20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x85, 0, 0x01, 0, 0, 0, 0xff, 0xff, 0x08, 0, 0, 0, 0, 0, 0, 0, @@ -46,10 +51,10 @@ static void ice_lag_set_primary(struct ice_lag *lag) } /** - * ice_lag_set_backup - set PF LAG state to Backup + * ice_lag_set_bkup - set PF LAG state to Backup * @lag: LAG info struct */ -static void ice_lag_set_backup(struct ice_lag *lag) +static void ice_lag_set_bkup(struct ice_lag *lag) { struct ice_pf *pf = lag->pf; @@ -99,6 +104,28 @@ static bool netif_is_same_ice(struct ice_pf *pf, struct net_device *netdev) } /** + * ice_lag_config_eswitch - configure eswitch to work with LAG + * @lag: lag info struct + * @netdev: active network interface device struct + * + * Updates all port representors in eswitch to use @netdev for Tx. + * + * Configures the netdev to keep dst metadata (also used in representor Tx). + * This is required for an uplink without switchdev mode configured. + */ +static void ice_lag_config_eswitch(struct ice_lag *lag, + struct net_device *netdev) +{ + struct ice_repr *repr; + unsigned long id; + + xa_for_each(&lag->pf->eswitch.reprs, id, repr) + repr->dst->u.port_info.lower_dev = netdev; + + netif_keep_dst(netdev); +} + +/** * ice_netdev_to_lag - return pointer to associated lag struct from netdev * @netdev: pointer to net_device struct to query */ @@ -210,13 +237,12 @@ ice_lag_cfg_fltr(struct ice_lag *lag, u32 act, u16 recipe_id, u16 *rule_idx, u8 direction, bool add) { struct ice_sw_rule_lkup_rx_tx *s_rule; + struct ice_hw *hw = &lag->pf->hw; u16 s_rule_sz, vsi_num; - struct ice_hw *hw; u8 *eth_hdr; u32 opc; int err; - hw = &lag->pf->hw; vsi_num = ice_get_hw_vsi_num(hw, 0); s_rule_sz = ICE_SW_RULE_RX_TX_ETH_HDR_SIZE(s_rule); @@ -314,26 +340,15 @@ ice_lag_cfg_drop_fltr(struct ice_lag *lag, bool add) } /** - * ice_lag_cfg_pf_fltrs - set filters up for new active port + * ice_lag_cfg_pf_fltrs_act_bkup - set filters up for new active port * @lag: local interfaces lag struct - * @ptr: opaque data containing notifier event + * @bonding_info: netdev event bonding info */ static void -ice_lag_cfg_pf_fltrs(struct ice_lag *lag, void *ptr) +ice_lag_cfg_pf_fltrs_act_bkup(struct ice_lag *lag, + struct netdev_bonding_info *bonding_info) { - struct netdev_notifier_bonding_info *info; - struct netdev_bonding_info *bonding_info; - struct net_device *event_netdev; - struct device *dev; - - event_netdev = netdev_notifier_info_to_dev(ptr); - /* not for this netdev */ - if (event_netdev != lag->netdev) - return; - - info = (struct netdev_notifier_bonding_info *)ptr; - bonding_info = &info->bonding_info; - dev = ice_pf_to_dev(lag->pf); + struct device *dev = ice_pf_to_dev(lag->pf); /* interface not active - remove old default VSI rule */ if (bonding_info->slave.state && lag->pf_rx_rule_id) { @@ -354,6 +369,105 @@ ice_lag_cfg_pf_fltrs(struct ice_lag *lag, void *ptr) } /** + * ice_lag_cfg_lp_fltr - configure lport filters + * @lag: local interface's lag struct + * @add: add or remove rule + * @cp: control packet only or general PF lport rule + */ +static void +ice_lag_cfg_lp_fltr(struct ice_lag *lag, bool add, bool cp) +{ + struct ice_sw_rule_lkup_rx_tx *s_rule; + struct ice_vsi *vsi = lag->pf->vsi[0]; + u16 buf_len, opc; + + buf_len = ICE_SW_RULE_RX_TX_HDR_SIZE(s_rule, ICE_TRAIN_PKT_LEN); + s_rule = kzalloc(buf_len, GFP_KERNEL); + if (!s_rule) { + netdev_warn(lag->netdev, "-ENOMEM error configuring CP filter\n"); + return; + } + + if (add) { + if (cp) { + s_rule->recipe_id = + cpu_to_le16(ICE_LAG_SRIOV_CP_RECIPE); + memcpy(s_rule->hdr_data, lacp_train_pkt, + ICE_TRAIN_PKT_LEN); + } else { + s_rule->recipe_id = cpu_to_le16(lag->act_act_recipe); + memcpy(s_rule->hdr_data, act_act_train_pkt, + ICE_TRAIN_PKT_LEN); + } + + s_rule->src = cpu_to_le16(vsi->port_info->lport); + s_rule->act = cpu_to_le32(ICE_FWD_TO_VSI | + ICE_SINGLE_ACT_LAN_ENABLE | + ICE_SINGLE_ACT_VALID_BIT | + FIELD_PREP(ICE_SINGLE_ACT_VSI_ID_M, + vsi->vsi_num)); + s_rule->hdr_len = cpu_to_le16(ICE_TRAIN_PKT_LEN); + s_rule->hdr.type = cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_RX); + opc = ice_aqc_opc_add_sw_rules; + } else { + opc = ice_aqc_opc_remove_sw_rules; + if (cp) + s_rule->index = cpu_to_le16(lag->cp_rule_idx); + else + s_rule->index = cpu_to_le16(lag->act_act_rule_idx); + } + if (ice_aq_sw_rules(&lag->pf->hw, s_rule, buf_len, 1, opc, NULL)) { + netdev_warn(lag->netdev, "Error %s %s rule for aggregate\n", + add ? "ADDING" : "REMOVING", + cp ? "CONTROL PACKET" : "LPORT"); + goto err_cp_free; + } + + if (add) { + if (cp) + lag->cp_rule_idx = le16_to_cpu(s_rule->index); + else + lag->act_act_rule_idx = le16_to_cpu(s_rule->index); + } else { + if (cp) + lag->cp_rule_idx = 0; + else + lag->act_act_rule_idx = 0; + } + +err_cp_free: + kfree(s_rule); +} + +/** + * ice_lag_cfg_pf_fltrs - set filters up for PF traffic + * @lag: local interfaces lag struct + * @ptr: opaque data containing notifier event + */ +static void +ice_lag_cfg_pf_fltrs(struct ice_lag *lag, void *ptr) +{ + struct netdev_notifier_bonding_info *info = ptr; + struct netdev_bonding_info *bonding_info; + struct net_device *event_netdev; + + event_netdev = netdev_notifier_info_to_dev(ptr); + if (event_netdev != lag->netdev) + return; + + bonding_info = &info->bonding_info; + + if (lag->bond_aa) { + if (lag->need_fltr_cfg) { + ice_lag_cfg_lp_fltr(lag, true, false); + lag->need_fltr_cfg = false; + } + } else { + ice_lag_cfg_pf_fltrs_act_bkup(lag, bonding_info); + } +} + +/** * ice_display_lag_info - print LAG info * @lag: LAG info struct */ @@ -402,12 +516,11 @@ static u16 ice_lag_qbuf_recfg(struct ice_hw *hw, struct ice_aqc_cfg_txqs_buf *qbuf, u16 vsi_num, u16 numq, u8 tc) { + struct ice_pf *pf = hw->back; struct ice_q_ctx *q_ctx; u16 qid, count = 0; - struct ice_pf *pf; int i; - pf = hw->back; for (i = 0; i < numq; i++) { q_ctx = ice_get_lan_q_ctx(hw, vsi_num, tc, i); if (!q_ctx) { @@ -577,7 +690,7 @@ ice_lag_move_vf_node_tc(struct ice_lag *lag, u8 oldport, u8 newport, } if (ice_aq_cfg_lan_txq(&lag->pf->hw, qbuf, qbuf_size, valq, oldport, - newport, NULL)) { + newport, ICE_AQC_Q_CFG_TC_CHNG, NULL)) { dev_warn(dev, "Failure to configure queues for LAG failover\n"); goto qbuf_err; } @@ -713,10 +826,17 @@ void ice_lag_move_new_vf_nodes(struct ice_vf *vf) if (lag->upper_netdev) ice_lag_build_netdev_list(lag, &ndlist); - if (ice_is_feature_supported(pf, ICE_F_SRIOV_LAG) && - lag->bonded && lag->primary && pri_port != act_port && - !list_empty(lag->netdev_head)) - ice_lag_move_single_vf_nodes(lag, pri_port, act_port, vsi->idx); + if (lag->bonded && lag->primary && !list_empty(lag->netdev_head)) { + if (lag->bond_aa && + ice_is_feature_supported(pf, ICE_F_SRIOV_AA_LAG)) + ice_lag_aa_failover(lag, ICE_LAGS_IDX, NULL); + + if (!lag->bond_aa && + ice_is_feature_supported(pf, ICE_F_SRIOV_LAG) && + pri_port != act_port) + ice_lag_move_single_vf_nodes(lag, pri_port, act_port, + vsi->idx); + } ice_lag_destroy_netdev_list(lag, &ndlist); @@ -767,61 +887,6 @@ void ice_lag_move_vf_nodes_cfg(struct ice_lag *lag, u8 src_prt, u8 dst_prt) ice_lag_destroy_netdev_list(lag, &ndlist); } -#define ICE_LAG_SRIOV_CP_RECIPE 10 -#define ICE_LAG_SRIOV_TRAIN_PKT_LEN 16 - -/** - * ice_lag_cfg_cp_fltr - configure filter for control packets - * @lag: local interface's lag struct - * @add: add or remove rule - */ -static void -ice_lag_cfg_cp_fltr(struct ice_lag *lag, bool add) -{ - struct ice_sw_rule_lkup_rx_tx *s_rule = NULL; - struct ice_vsi *vsi; - u16 buf_len, opc; - - vsi = lag->pf->vsi[0]; - - buf_len = ICE_SW_RULE_RX_TX_HDR_SIZE(s_rule, - ICE_LAG_SRIOV_TRAIN_PKT_LEN); - s_rule = kzalloc(buf_len, GFP_KERNEL); - if (!s_rule) { - netdev_warn(lag->netdev, "-ENOMEM error configuring CP filter\n"); - return; - } - - if (add) { - s_rule->hdr.type = cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_RX); - s_rule->recipe_id = cpu_to_le16(ICE_LAG_SRIOV_CP_RECIPE); - s_rule->src = cpu_to_le16(vsi->port_info->lport); - s_rule->act = cpu_to_le32(ICE_FWD_TO_VSI | - ICE_SINGLE_ACT_LAN_ENABLE | - ICE_SINGLE_ACT_VALID_BIT | - FIELD_PREP(ICE_SINGLE_ACT_VSI_ID_M, vsi->vsi_num)); - s_rule->hdr_len = cpu_to_le16(ICE_LAG_SRIOV_TRAIN_PKT_LEN); - memcpy(s_rule->hdr_data, lacp_train_pkt, LACP_TRAIN_PKT_LEN); - opc = ice_aqc_opc_add_sw_rules; - } else { - opc = ice_aqc_opc_remove_sw_rules; - s_rule->index = cpu_to_le16(lag->cp_rule_idx); - } - if (ice_aq_sw_rules(&lag->pf->hw, s_rule, buf_len, 1, opc, NULL)) { - netdev_warn(lag->netdev, "Error %s CP rule for fail-over\n", - add ? "ADDING" : "REMOVING"); - goto cp_free; - } - - if (add) - lag->cp_rule_idx = le16_to_cpu(s_rule->index); - else - lag->cp_rule_idx = 0; - -cp_free: - kfree(s_rule); -} - /** * ice_lag_prepare_vf_reset - helper to adjust vf lag for reset * @lag: lag struct for interface that owns VF @@ -835,11 +900,20 @@ u8 ice_lag_prepare_vf_reset(struct ice_lag *lag) u8 pri_prt, act_prt; if (lag && lag->bonded && lag->primary && lag->upper_netdev) { - pri_prt = lag->pf->hw.port_info->lport; - act_prt = lag->active_port; - if (act_prt != pri_prt && act_prt != ICE_LAG_INVALID_PORT) { - ice_lag_move_vf_nodes_cfg(lag, act_prt, pri_prt); - return act_prt; + if (!lag->bond_aa) { + pri_prt = lag->pf->hw.port_info->lport; + act_prt = lag->active_port; + if (act_prt != pri_prt && + act_prt != ICE_LAG_INVALID_PORT) { + ice_lag_move_vf_nodes_cfg(lag, act_prt, pri_prt); + return act_prt; + } + } else { + if (lag->port_bitmap & ICE_LAGS_M) { + lag->port_bitmap &= ~ICE_LAGS_M; + ice_lag_aa_failover(lag, ICE_LAGP_IDX, NULL); + lag->port_bitmap |= ICE_LAGS_M; + } } } @@ -857,10 +931,15 @@ void ice_lag_complete_vf_reset(struct ice_lag *lag, u8 act_prt) { u8 pri_prt; - if (lag && lag->bonded && lag->primary && - act_prt != ICE_LAG_INVALID_PORT) { - pri_prt = lag->pf->hw.port_info->lport; - ice_lag_move_vf_nodes_cfg(lag, pri_prt, act_prt); + if (lag && lag->bonded && lag->primary) { + if (!lag->bond_aa) { + pri_prt = lag->pf->hw.port_info->lport; + if (act_prt != ICE_LAG_INVALID_PORT) + ice_lag_move_vf_nodes_cfg(lag, pri_prt, + act_prt); + } else { + ice_lag_aa_failover(lag, ICE_LAGS_IDX, NULL); + } } } @@ -873,13 +952,12 @@ void ice_lag_complete_vf_reset(struct ice_lag *lag, u8 act_prt) */ static void ice_lag_info_event(struct ice_lag *lag, void *ptr) { - struct netdev_notifier_bonding_info *info; + struct netdev_notifier_bonding_info *info = ptr; struct netdev_bonding_info *bonding_info; struct net_device *event_netdev; const char *lag_netdev_name; event_netdev = netdev_notifier_info_to_dev(ptr); - info = ptr; lag_netdev_name = netdev_name(lag->netdev); bonding_info = &info->bonding_info; @@ -897,7 +975,7 @@ static void ice_lag_info_event(struct ice_lag *lag, void *ptr) } if (bonding_info->slave.state) - ice_lag_set_backup(lag); + ice_lag_set_bkup(lag); else ice_lag_set_primary(lag); @@ -906,6 +984,295 @@ lag_out: } /** + * ice_lag_aa_qbuf_recfg - fill a single queue buffer for recfg cmd + * @hw: HW struct that contains the queue context + * @qbuf: pointer to single queue buffer + * @vsi_num: index of the VF VSI in PF space + * @qnum: queue index + * + * Return: Zero on success, error code on failure. + */ +static int +ice_lag_aa_qbuf_recfg(struct ice_hw *hw, struct ice_aqc_cfg_txqs_buf *qbuf, + u16 vsi_num, int qnum) +{ + struct ice_pf *pf = hw->back; + struct ice_q_ctx *q_ctx; + u16 q_id; + + q_ctx = ice_get_lan_q_ctx(hw, vsi_num, 0, qnum); + if (!q_ctx) { + dev_dbg(ice_hw_to_dev(hw), "LAG queue %d no Q context\n", qnum); + return -ENOENT; + } + + if (q_ctx->q_teid == ICE_INVAL_TEID) { + dev_dbg(ice_hw_to_dev(hw), "LAG queue %d INVAL TEID\n", qnum); + return -EINVAL; + } + + if (q_ctx->q_handle == ICE_INVAL_Q_HANDLE) { + dev_dbg(ice_hw_to_dev(hw), "LAG queue %d INVAL Q HANDLE\n", qnum); + return -EINVAL; + } + + q_id = pf->vsi[vsi_num]->txq_map[q_ctx->q_handle]; + qbuf->queue_info[0].q_handle = cpu_to_le16(q_id); + qbuf->queue_info[0].tc = 0; + qbuf->queue_info[0].q_teid = cpu_to_le32(q_ctx->q_teid); + + return 0; +} + +/** + * ice_lag_aa_move_vf_qs - Move some/all VF queues to destination + * @lag: primary interface's lag struct + * @dest: index of destination port + * @vsi_num: index of VF VSI in PF space + * @all: if true move all queues to destination + * @odd: VF wide q indicator for odd/even + * @e_pf: PF struct for the event interface + * + * the parameter "all" is to control whether we are splitting the queues + * between two interfaces or moving them all to the destination interface + */ +static void ice_lag_aa_move_vf_qs(struct ice_lag *lag, u8 dest, u16 vsi_num, + bool all, bool *odd, struct ice_pf *e_pf) +{ + DEFINE_RAW_FLEX(struct ice_aqc_cfg_txqs_buf, qbuf, queue_info, 1); + struct ice_hw *old_hw, *new_hw, *pri_hw, *sec_hw; + struct device *dev = ice_pf_to_dev(lag->pf); + struct ice_vsi_ctx *pv_ctx, *sv_ctx; + struct ice_lag_netdev_list ndlist; + u16 num_q, qbuf_size, sec_vsi_num; + u8 pri_lport, sec_lport; + u32 pvf_teid, svf_teid; + u16 vf_id; + + vf_id = lag->pf->vsi[vsi_num]->vf->vf_id; + /* If sec_vf[] not defined, then no second interface to share with */ + if (lag->sec_vf[vf_id]) + sec_vsi_num = lag->sec_vf[vf_id]->idx; + else + return; + + pri_lport = lag->bond_lport_pri; + sec_lport = lag->bond_lport_sec; + + if (pri_lport == ICE_LAG_INVALID_PORT || + sec_lport == ICE_LAG_INVALID_PORT) + return; + + if (!e_pf) + ice_lag_build_netdev_list(lag, &ndlist); + + pri_hw = &lag->pf->hw; + if (e_pf && lag->pf != e_pf) + sec_hw = &e_pf->hw; + else + sec_hw = ice_lag_find_hw_by_lport(lag, sec_lport); + + if (!pri_hw || !sec_hw) + return; + + if (dest == ICE_LAGP_IDX) { + struct ice_vsi *vsi; + + vsi = ice_get_main_vsi(lag->pf); + if (!vsi) + return; + + old_hw = sec_hw; + new_hw = pri_hw; + ice_lag_config_eswitch(lag, vsi->netdev); + } else { + struct ice_pf *sec_pf = sec_hw->back; + struct ice_vsi *vsi; + + vsi = ice_get_main_vsi(sec_pf); + if (!vsi) + return; + + old_hw = pri_hw; + new_hw = sec_hw; + ice_lag_config_eswitch(lag, vsi->netdev); + } + + pv_ctx = ice_get_vsi_ctx(pri_hw, vsi_num); + if (!pv_ctx) { + dev_warn(dev, "Unable to locate primary VSI %d context for LAG failover\n", + vsi_num); + return; + } + + sv_ctx = ice_get_vsi_ctx(sec_hw, sec_vsi_num); + if (!sv_ctx) { + dev_warn(dev, "Unable to locate secondary VSI %d context for LAG failover\n", + vsi_num); + return; + } + + num_q = pv_ctx->num_lan_q_entries[0]; + qbuf_size = __struct_size(qbuf); + + /* Suspend traffic for primary VSI VF */ + pvf_teid = le32_to_cpu(pv_ctx->sched.vsi_node[0]->info.node_teid); + ice_sched_suspend_resume_elems(pri_hw, 1, &pvf_teid, true); + + /* Suspend traffic for secondary VSI VF */ + svf_teid = le32_to_cpu(sv_ctx->sched.vsi_node[0]->info.node_teid); + ice_sched_suspend_resume_elems(sec_hw, 1, &svf_teid, true); + + for (int i = 0; i < num_q; i++) { + struct ice_sched_node *n_prt, *q_node, *parent; + struct ice_port_info *pi, *new_pi; + struct ice_vsi_ctx *src_ctx; + struct ice_sched_node *p; + struct ice_q_ctx *q_ctx; + u16 dst_vsi_num; + + pi = old_hw->port_info; + new_pi = new_hw->port_info; + + *odd = !(*odd); + if ((dest == ICE_LAGP_IDX && *odd && !all) || + (dest == ICE_LAGS_IDX && !(*odd) && !all) || + lag->q_home[vf_id][i] == dest) + continue; + + if (dest == ICE_LAGP_IDX) + dst_vsi_num = vsi_num; + else + dst_vsi_num = sec_vsi_num; + + n_prt = ice_sched_get_free_qparent(new_hw->port_info, + dst_vsi_num, 0, + ICE_SCHED_NODE_OWNER_LAN); + if (!n_prt) + continue; + + q_ctx = ice_get_lan_q_ctx(pri_hw, vsi_num, 0, i); + if (!q_ctx) + continue; + + if (dest == ICE_LAGP_IDX) + src_ctx = sv_ctx; + else + src_ctx = pv_ctx; + + q_node = ice_sched_find_node_by_teid(src_ctx->sched.vsi_node[0], + q_ctx->q_teid); + if (!q_node) + continue; + + qbuf->src_parent_teid = q_node->info.parent_teid; + qbuf->dst_parent_teid = n_prt->info.node_teid; + + /* Move the node in the HW/FW */ + if (ice_lag_aa_qbuf_recfg(pri_hw, qbuf, vsi_num, i)) + continue; + + if (dest == ICE_LAGP_IDX) + ice_aq_cfg_lan_txq(pri_hw, qbuf, qbuf_size, 1, + sec_lport, pri_lport, + ICE_AQC_Q_CFG_MOVE_TC_CHNG, + NULL); + else + ice_aq_cfg_lan_txq(pri_hw, qbuf, qbuf_size, 1, + pri_lport, sec_lport, + ICE_AQC_Q_CFG_MOVE_TC_CHNG, + NULL); + + /* Move the node in the SW */ + parent = q_node->parent; + if (!parent) + continue; + + for (int n = 0; n < parent->num_children; n++) { + int j; + + if (parent->children[n] != q_node) + continue; + + for (j = n + 1; j < parent->num_children; + j++) { + parent->children[j - 1] = + parent->children[j]; + } + parent->children[j] = NULL; + parent->num_children--; + break; + } + + p = pi->sib_head[0][q_node->tx_sched_layer]; + while (p) { + if (p->sibling == q_node) { + p->sibling = q_node->sibling; + break; + } + p = p->sibling; + } + + if (pi->sib_head[0][q_node->tx_sched_layer] == q_node) + pi->sib_head[0][q_node->tx_sched_layer] = + q_node->sibling; + + q_node->parent = n_prt; + q_node->info.parent_teid = n_prt->info.node_teid; + q_node->sibling = NULL; + p = new_pi->sib_head[0][q_node->tx_sched_layer]; + if (p) { + while (p) { + if (!p->sibling) { + p->sibling = q_node; + break; + } + p = p->sibling; + } + } else { + new_pi->sib_head[0][q_node->tx_sched_layer] = + q_node; + } + + n_prt->children[n_prt->num_children++] = q_node; + lag->q_home[vf_id][i] = dest; + } + + ice_sched_suspend_resume_elems(pri_hw, 1, &pvf_teid, false); + ice_sched_suspend_resume_elems(sec_hw, 1, &svf_teid, false); + + if (!e_pf) + ice_lag_destroy_netdev_list(lag, &ndlist); +} + +/** + * ice_lag_aa_failover - move VF queues in A/A mode + * @lag: primary lag struct + * @dest: index of destination port + * @e_pf: PF struct for event port + */ +void ice_lag_aa_failover(struct ice_lag *lag, u8 dest, struct ice_pf *e_pf) +{ + bool odd = true, all = false; + int i; + + /* Primary can be a target if down (cleanup), but secondary can't */ + if (dest == ICE_LAGS_IDX && !(lag->port_bitmap & ICE_LAGS_M)) + return; + + /* Move all queues to a destination if only one port is active, + * or no ports are active and dest is primary. + */ + if ((lag->port_bitmap ^ (ICE_LAGP_M | ICE_LAGS_M)) || + (!lag->port_bitmap && dest == ICE_LAGP_IDX)) + all = true; + + ice_for_each_vsi(lag->pf, i) + if (lag->pf->vsi[i] && lag->pf->vsi[i]->type == ICE_VSI_VF) + ice_lag_aa_move_vf_qs(lag, dest, i, all, &odd, e_pf); +} + +/** * ice_lag_reclaim_vf_tc - move scheduling nodes back to primary interface * @lag: primary interface lag struct * @src_hw: HW struct current node location @@ -921,13 +1288,12 @@ ice_lag_reclaim_vf_tc(struct ice_lag *lag, struct ice_hw *src_hw, u16 vsi_num, u16 numq, valq, num_moved, qbuf_size; u16 buf_size = __struct_size(buf); struct ice_aqc_cfg_txqs_buf *qbuf; + struct ice_hw *hw = &lag->pf->hw; struct ice_sched_node *n_prt; __le32 teid, parent_teid; struct ice_vsi_ctx *ctx; - struct ice_hw *hw; u32 tmp_teid; - hw = &lag->pf->hw; ctx = ice_get_vsi_ctx(hw, vsi_num); if (!ctx) { dev_warn(dev, "Unable to locate VSI context for LAG reclaim\n"); @@ -968,7 +1334,7 @@ ice_lag_reclaim_vf_tc(struct ice_lag *lag, struct ice_hw *src_hw, u16 vsi_num, if (ice_aq_cfg_lan_txq(hw, qbuf, qbuf_size, numq, src_hw->port_info->lport, hw->port_info->lport, - NULL)) { + ICE_AQC_Q_CFG_TC_CHNG, NULL)) { dev_warn(dev, "Failure to configure queues for LAG failover\n"); goto reclaim_qerr; } @@ -1039,36 +1405,15 @@ static void ice_lag_link(struct ice_lag *lag) lag->bonded = true; lag->role = ICE_LAG_UNSET; + lag->need_fltr_cfg = true; netdev_info(lag->netdev, "Shared SR-IOV resources in bond are active\n"); } /** - * ice_lag_config_eswitch - configure eswitch to work with LAG - * @lag: lag info struct - * @netdev: active network interface device struct - * - * Updates all port representors in eswitch to use @netdev for Tx. - * - * Configures the netdev to keep dst metadata (also used in representor Tx). - * This is required for an uplink without switchdev mode configured. - */ -static void ice_lag_config_eswitch(struct ice_lag *lag, - struct net_device *netdev) -{ - struct ice_repr *repr; - unsigned long id; - - xa_for_each(&lag->pf->eswitch.reprs, id, repr) - repr->dst->u.port_info.lower_dev = netdev; - - netif_keep_dst(netdev); -} - -/** - * ice_lag_unlink - handle unlink event + * ice_lag_act_bkup_unlink - handle unlink event for A/B bond * @lag: LAG info struct */ -static void ice_lag_unlink(struct ice_lag *lag) +static void ice_lag_act_bkup_unlink(struct ice_lag *lag) { u8 pri_port, act_port, loc_port; struct ice_pf *pf = lag->pf; @@ -1104,10 +1449,32 @@ static void ice_lag_unlink(struct ice_lag *lag) } } } +} - lag->bonded = false; - lag->role = ICE_LAG_NONE; - lag->upper_netdev = NULL; +/** + * ice_lag_aa_unlink - handle unlink event for Active-Active bond + * @lag: LAG info struct + */ +static void ice_lag_aa_unlink(struct ice_lag *lag) +{ + struct ice_lag *pri_lag; + + if (lag->primary) { + pri_lag = lag; + lag->port_bitmap &= ~ICE_LAGP_M; + } else { + pri_lag = ice_lag_find_primary(lag); + if (pri_lag) + pri_lag->port_bitmap &= ICE_LAGS_M; + } + + if (pri_lag) { + ice_lag_aa_failover(pri_lag, ICE_LAGP_IDX, lag->pf); + if (lag->primary) + pri_lag->bond_lport_pri = ICE_LAG_INVALID_PORT; + else + pri_lag->bond_lport_sec = ICE_LAG_INVALID_PORT; + } } /** @@ -1123,10 +1490,20 @@ static void ice_lag_link_unlink(struct ice_lag *lag, void *ptr) if (netdev != lag->netdev) return; - if (info->linking) + if (info->linking) { ice_lag_link(lag); - else - ice_lag_unlink(lag); + } else { + if (lag->bond_aa) + ice_lag_aa_unlink(lag); + else + ice_lag_act_bkup_unlink(lag); + + lag->bonded = false; + lag->role = ICE_LAG_NONE; + lag->upper_netdev = NULL; + lag->bond_aa = false; + lag->need_fltr_cfg = false; + } } /** @@ -1224,11 +1601,8 @@ ice_lag_set_swid(u16 primary_swid, struct ice_lag *local_lag, */ static void ice_lag_primary_swid(struct ice_lag *lag, bool link) { - struct ice_hw *hw; - u16 swid; - - hw = &lag->pf->hw; - swid = hw->port_info->sw_id; + struct ice_hw *hw = &lag->pf->hw; + u16 swid = hw->port_info->sw_id; if (ice_share_res(hw, ICE_AQC_RES_TYPE_SWID, link, swid)) dev_warn(ice_pf_to_dev(lag->pf), "Failure to set primary interface shared status\n"); @@ -1241,12 +1615,10 @@ static void ice_lag_primary_swid(struct ice_lag *lag, bool link) */ static void ice_lag_add_prune_list(struct ice_lag *lag, struct ice_pf *event_pf) { - u16 num_vsi, rule_buf_sz, vsi_list_id, event_vsi_num, prim_vsi_idx; - struct ice_sw_rule_vsi_list *s_rule = NULL; + u16 rule_buf_sz, vsi_list_id, event_vsi_num, prim_vsi_idx, num_vsi = 1; + struct ice_sw_rule_vsi_list *s_rule; struct device *dev; - num_vsi = 1; - dev = ice_pf_to_dev(lag->pf); event_vsi_num = event_pf->vsi[0]->vsi_num; prim_vsi_idx = lag->pf->vsi[0]->idx; @@ -1282,12 +1654,10 @@ static void ice_lag_add_prune_list(struct ice_lag *lag, struct ice_pf *event_pf) */ static void ice_lag_del_prune_list(struct ice_lag *lag, struct ice_pf *event_pf) { - u16 num_vsi, vsi_num, vsi_idx, rule_buf_sz, vsi_list_id; - struct ice_sw_rule_vsi_list *s_rule = NULL; + u16 vsi_num, vsi_idx, rule_buf_sz, vsi_list_id, num_vsi = 1; + struct ice_sw_rule_vsi_list *s_rule; struct device *dev; - num_vsi = 1; - dev = ice_pf_to_dev(lag->pf); vsi_num = event_pf->vsi[0]->vsi_num; vsi_idx = lag->pf->vsi[0]->idx; @@ -1335,6 +1705,11 @@ static void ice_lag_init_feature_support_flag(struct ice_pf *pf) ice_set_feature_support(pf, ICE_F_SRIOV_LAG); else ice_clear_feature_support(pf, ICE_F_SRIOV_LAG); + + if (caps->sriov_aa_lag && ice_pkg_has_lport_extract(&pf->hw)) + ice_set_feature_support(pf, ICE_F_SRIOV_AA_LAG); + else + ice_clear_feature_support(pf, ICE_F_SRIOV_AA_LAG); } /** @@ -1344,11 +1719,10 @@ static void ice_lag_init_feature_support_flag(struct ice_pf *pf) */ static void ice_lag_changeupper_event(struct ice_lag *lag, void *ptr) { - struct netdev_notifier_changeupper_info *info; + struct netdev_notifier_changeupper_info *info = ptr; struct ice_lag *primary_lag; struct net_device *netdev; - info = ptr; netdev = netdev_notifier_info_to_dev(ptr); /* not for this netdev */ @@ -1369,6 +1743,9 @@ static void ice_lag_changeupper_event(struct ice_lag *lag, void *ptr) /* Configure primary's SWID to be shared */ ice_lag_primary_swid(lag, true); primary_lag = lag; + lag->bond_lport_pri = lag->pf->hw.port_info->lport; + lag->bond_lport_sec = ICE_LAG_INVALID_PORT; + lag->port_bitmap = 0; } else { u16 swid; @@ -1378,16 +1755,29 @@ static void ice_lag_changeupper_event(struct ice_lag *lag, void *ptr) swid = primary_lag->pf->hw.port_info->sw_id; ice_lag_set_swid(swid, lag, true); ice_lag_add_prune_list(primary_lag, lag->pf); - ice_lag_cfg_drop_fltr(lag, true); + primary_lag->bond_lport_sec = + lag->pf->hw.port_info->lport; } /* add filter for primary control packets */ - ice_lag_cfg_cp_fltr(lag, true); + ice_lag_cfg_lp_fltr(lag, true, true); } else { if (!primary_lag && lag->primary) primary_lag = lag; + if (primary_lag) { + for (int i = 0; i < ICE_MAX_SRIOV_VFS; i++) { + if (primary_lag->sec_vf[i]) { + ice_vsi_release(primary_lag->sec_vf[i]); + primary_lag->sec_vf[i] = NULL; + } + } + } + if (!lag->primary) { ice_lag_set_swid(0, lag, false); + if (primary_lag) + primary_lag->bond_lport_sec = + ICE_LAG_INVALID_PORT; } else { if (primary_lag && lag->primary) { ice_lag_primary_swid(lag, false); @@ -1395,7 +1785,7 @@ static void ice_lag_changeupper_event(struct ice_lag *lag, void *ptr) } } /* remove filter for control packets */ - ice_lag_cfg_cp_fltr(lag, false); + ice_lag_cfg_lp_fltr(lag, false, !lag->bond_aa); } } @@ -1408,7 +1798,7 @@ static void ice_lag_changeupper_event(struct ice_lag *lag, void *ptr) */ static void ice_lag_monitor_link(struct ice_lag *lag, void *ptr) { - struct netdev_notifier_changeupper_info *info; + struct netdev_notifier_changeupper_info *info = ptr; struct ice_hw *prim_hw, *active_hw; struct net_device *event_netdev; struct ice_pf *pf; @@ -1421,19 +1811,34 @@ static void ice_lag_monitor_link(struct ice_lag *lag, void *ptr) if (!netif_is_same_ice(lag->pf, event_netdev)) return; + if (info->upper_dev != lag->upper_netdev) + return; + + if (info->linking) + return; + pf = lag->pf; prim_hw = &pf->hw; prim_port = prim_hw->port_info->lport; - info = (struct netdev_notifier_changeupper_info *)ptr; - if (info->upper_dev != lag->upper_netdev) - return; - - if (!info->linking) { - /* Since there are only two interfaces allowed in SRIOV+LAG, if - * one port is leaving, then nodes need to be on primary - * interface. - */ + /* Since there are only two interfaces allowed in SRIOV+LAG, if + * one port is leaving, then nodes need to be on primary + * interface. + */ + if (lag->bond_aa) { + struct ice_netdev_priv *e_ndp; + struct ice_pf *e_pf; + + e_ndp = netdev_priv(event_netdev); + e_pf = e_ndp->vsi->back; + + if (lag->bond_lport_pri != ICE_LAG_INVALID_PORT && + lag->port_bitmap & ICE_LAGS_M) { + lag->port_bitmap &= ~ICE_LAGS_M; + ice_lag_aa_failover(lag, ICE_LAGP_IDX, e_pf); + lag->bond_lport_sec = ICE_LAG_INVALID_PORT; + } + } else { if (prim_port != lag->active_port && lag->active_port != ICE_LAG_INVALID_PORT) { active_hw = ice_lag_find_hw_by_lport(lag, @@ -1445,45 +1850,32 @@ static void ice_lag_monitor_link(struct ice_lag *lag, void *ptr) } /** - * ice_lag_monitor_active - main PF keep track of which port is active + * ice_lag_monitor_act_bkup - keep track of which port is active in A/B LAG * @lag: lag info struct - * @ptr: opaque data containing notifier event + * @b_info: bonding info + * @event_netdev: net_device got target netdev * * This function is for the primary PF to monitor changes in which port is * active and handle changes for SRIOV VF functionality */ -static void ice_lag_monitor_active(struct ice_lag *lag, void *ptr) +static void ice_lag_monitor_act_bkup(struct ice_lag *lag, + struct netdev_bonding_info *b_info, + struct net_device *event_netdev) { - struct net_device *event_netdev, *event_upper; - struct netdev_notifier_bonding_info *info; - struct netdev_bonding_info *bonding_info; struct ice_netdev_priv *event_np; struct ice_pf *pf, *event_pf; u8 prim_port, event_port; - if (!lag->primary) - return; - pf = lag->pf; if (!pf) return; - event_netdev = netdev_notifier_info_to_dev(ptr); - rcu_read_lock(); - event_upper = netdev_master_upper_dev_get_rcu(event_netdev); - rcu_read_unlock(); - if (!netif_is_ice(event_netdev) || event_upper != lag->upper_netdev) - return; - event_np = netdev_priv(event_netdev); event_pf = event_np->vsi->back; event_port = event_pf->hw.port_info->lport; prim_port = pf->hw.port_info->lport; - info = (struct netdev_notifier_bonding_info *)ptr; - bonding_info = &info->bonding_info; - - if (!bonding_info->slave.state) { + if (!b_info->slave.state) { /* if no port is currently active, then nodes and filters exist * on primary port, check if we need to move them */ @@ -1520,6 +1912,128 @@ static void ice_lag_monitor_active(struct ice_lag *lag, void *ptr) } /** + * ice_lag_aa_clear_spoof - adjust the placeholder VSI spoofing for A/A LAG + * @vsi: placeholder VSI to adjust + */ +static void ice_lag_aa_clear_spoof(struct ice_vsi *vsi) +{ + ice_vsi_update_security(vsi, ice_vsi_ctx_clear_antispoof); +} + +/** + * ice_lag_monitor_act_act - Keep track of active ports in A/A LAG + * @lag: lag struct for primary interface + * @b_info: bonding_info for event + * @event_netdev: net_device for target netdev + */ +static void ice_lag_monitor_act_act(struct ice_lag *lag, + struct netdev_bonding_info *b_info, + struct net_device *event_netdev) +{ + struct ice_netdev_priv *event_np; + u8 prim_port, event_port; + struct ice_pf *event_pf; + + event_np = netdev_priv(event_netdev); + event_pf = event_np->vsi->back; + event_port = event_pf->hw.port_info->lport; + prim_port = lag->pf->hw.port_info->lport; + + if (b_info->slave.link == BOND_LINK_UP) { + /* Port is coming up */ + if (prim_port == event_port) { + /* Processing event for primary interface */ + if (lag->bond_lport_pri == ICE_LAG_INVALID_PORT) + return; + + if (!(lag->port_bitmap & ICE_LAGP_M)) { + /* Primary port was not marked up before, move + * some|all VF queues to it and mark as up + */ + lag->port_bitmap |= ICE_LAGP_M; + ice_lag_aa_failover(lag, ICE_LAGP_IDX, event_pf); + } + } else { + if (lag->bond_lport_sec == ICE_LAG_INVALID_PORT) + return; + + /* Create placeholder VSIs on secondary PF. + * The placeholder is necessary so that we have + * an element that represents the VF on the secondary + * interface's scheduling tree. This will be a tree + * root for scheduling nodes when they are moved to + * the secondary interface. + */ + if (!lag->sec_vf[0]) { + struct ice_vsi_cfg_params params = {}; + struct ice_vsi *nvsi; + struct ice_vf *vf; + unsigned int bkt; + + params.type = ICE_VSI_VF; + params.port_info = event_pf->hw.port_info; + params.flags = ICE_VSI_FLAG_INIT; + + ice_for_each_vf(lag->pf, bkt, vf) { + params.vf = vf; + nvsi = ice_vsi_setup(event_pf, + ¶ms); + ice_lag_aa_clear_spoof(nvsi); + lag->sec_vf[vf->vf_id] = nvsi; + } + } + + if (!(lag->port_bitmap & ICE_LAGS_M)) { + /* Secondary port was not marked up before, + * move some|all VF queues to it and mark as up + */ + lag->port_bitmap |= ICE_LAGS_M; + ice_lag_aa_failover(lag, ICE_LAGS_IDX, event_pf); + } + } + } else { + /* Port is going down */ + if (prim_port == event_port) { + lag->port_bitmap &= ~ICE_LAGP_M; + ice_lag_aa_failover(lag, ICE_LAGS_IDX, event_pf); + } else { + lag->port_bitmap &= ~ICE_LAGS_M; + ice_lag_aa_failover(lag, ICE_LAGP_IDX, event_pf); + } + } +} + +/** + * ice_lag_monitor_info - Calls relevant A/A or A/B monitoring function + * @lag: lag info struct + * @ptr: opaque data containing notifier event + * + * This function is for the primary PF to monitor changes in which port is + * active and handle changes for SRIOV VF functionality + */ +static void ice_lag_monitor_info(struct ice_lag *lag, void *ptr) +{ + struct netdev_notifier_bonding_info *info = ptr; + struct net_device *event_netdev, *event_upper; + struct netdev_bonding_info *bonding_info; + + if (!lag->primary) + return; + + event_netdev = netdev_notifier_info_to_dev(ptr); + bonding_info = &info->bonding_info; + rcu_read_lock(); + event_upper = netdev_master_upper_dev_get_rcu(event_netdev); + rcu_read_unlock(); + if (!netif_is_ice(event_netdev) || event_upper != lag->upper_netdev) + return; + + if (lag->bond_aa) + ice_lag_monitor_act_act(lag, bonding_info, event_netdev); + else + ice_lag_monitor_act_bkup(lag, bonding_info, event_netdev); +} +/** * ice_lag_chk_comp - evaluate bonded interface for feature support * @lag: lag info struct * @ptr: opaque data for netdev event info @@ -1527,13 +2041,21 @@ static void ice_lag_monitor_active(struct ice_lag *lag, void *ptr) static bool ice_lag_chk_comp(struct ice_lag *lag, void *ptr) { + struct netdev_notifier_bonding_info *info = ptr; struct net_device *event_netdev, *event_upper; - struct netdev_notifier_bonding_info *info; struct netdev_bonding_info *bonding_info; struct list_head *tmp; struct device *dev; int count = 0; + /* All members need to know if bond A/A or A/B */ + bonding_info = &info->bonding_info; + lag->bond_mode = bonding_info->master.bond_mode; + if (lag->bond_mode != BOND_MODE_ACTIVEBACKUP) + lag->bond_aa = true; + else + lag->bond_aa = false; + if (!lag->primary) return true; @@ -1554,13 +2076,9 @@ ice_lag_chk_comp(struct ice_lag *lag, void *ptr) return false; } - info = (struct netdev_notifier_bonding_info *)ptr; - bonding_info = &info->bonding_info; - lag->bond_mode = bonding_info->master.bond_mode; - if (lag->bond_mode != BOND_MODE_ACTIVEBACKUP) { - dev_info(dev, "Bond Mode not ACTIVE-BACKUP - VF LAG disabled\n"); + if (lag->bond_aa && !ice_is_feature_supported(lag->pf, + ICE_F_SRIOV_AA_LAG)) return false; - } list_for_each(tmp, lag->netdev_head) { struct ice_dcbx_cfg *dcb_cfg, *peer_dcb_cfg; @@ -1664,10 +2182,9 @@ ice_lag_unregister(struct ice_lag *lag, struct net_device *event_netdev) static void ice_lag_monitor_rdma(struct ice_lag *lag, void *ptr) { - struct netdev_notifier_changeupper_info *info; + struct netdev_notifier_changeupper_info *info = ptr; struct net_device *netdev; - info = ptr; netdev = netdev_notifier_info_to_dev(ptr); if (netdev != lag->netdev) @@ -1715,12 +2232,30 @@ static void ice_lag_chk_disabled_bond(struct ice_lag *lag, void *ptr) */ static void ice_lag_disable_sriov_bond(struct ice_lag *lag) { - struct ice_netdev_priv *np; - struct ice_pf *pf; + struct ice_netdev_priv *np = netdev_priv(lag->netdev); + struct ice_pf *pf = np->vsi->back; - np = netdev_priv(lag->netdev); - pf = np->vsi->back; ice_clear_feature_support(pf, ICE_F_SRIOV_LAG); + ice_clear_feature_support(pf, ICE_F_SRIOV_AA_LAG); +} + +/** + * ice_lag_preset_drop_fltr - preset drop filter for A/B bonds + * @lag: local lag struct + * @ptr: opaque data containing event + * + * Sets the initial drop filter for secondary interface in an + * active-backup bond + */ +static void ice_lag_preset_drop_fltr(struct ice_lag *lag, void *ptr) +{ + struct net_device *netdev = netdev_notifier_info_to_dev(ptr); + + if (netdev != lag->netdev || lag->primary || !lag->need_fltr_cfg) + return; + + ice_lag_cfg_drop_fltr(lag, true); + lag->need_fltr_cfg = false; } /** @@ -1761,10 +2296,12 @@ static void ice_lag_process_event(struct work_struct *work) ice_lag_unregister(lag_work->lag, netdev); goto lag_cleanup; } - ice_lag_monitor_active(lag_work->lag, - &lag_work->info.bonding_info); ice_lag_cfg_pf_fltrs(lag_work->lag, &lag_work->info.bonding_info); + ice_lag_preset_drop_fltr(lag_work->lag, + &lag_work->info.bonding_info); + ice_lag_monitor_info(lag_work->lag, + &lag_work->info.bonding_info); } ice_lag_info_event(lag_work->lag, &lag_work->info.bonding_info); break; @@ -1837,9 +2374,8 @@ ice_lag_event_handler(struct notifier_block *notif_blk, unsigned long event, lag_work->lag = lag; lag_work->event = event; if (event == NETDEV_CHANGEUPPER) { - struct netdev_notifier_changeupper_info *info; + struct netdev_notifier_changeupper_info *info = ptr; - info = ptr; upper_netdev = info->upper_dev; } else { upper_netdev = netdev_master_upper_dev_get(netdev); @@ -1889,10 +2425,8 @@ ice_lag_event_handler(struct notifier_block *notif_blk, unsigned long event, */ static int ice_register_lag_handler(struct ice_lag *lag) { + struct notifier_block *notif_blk = &lag->notif_block; struct device *dev = ice_pf_to_dev(lag->pf); - struct notifier_block *notif_blk; - - notif_blk = &lag->notif_block; if (!notif_blk->notifier_call) { notif_blk->notifier_call = ice_lag_event_handler; @@ -1912,10 +2446,9 @@ static int ice_register_lag_handler(struct ice_lag *lag) */ static void ice_unregister_lag_handler(struct ice_lag *lag) { + struct notifier_block *notif_blk = &lag->notif_block; struct device *dev = ice_pf_to_dev(lag->pf); - struct notifier_block *notif_blk; - notif_blk = &lag->notif_block; if (notif_blk->notifier_call) { unregister_netdevice_notifier(notif_blk); dev_dbg(dev, "LAG event handler unregistered\n"); @@ -1977,13 +2510,12 @@ ice_lag_move_vf_nodes_tc_sync(struct ice_lag *lag, struct ice_hw *dest_hw, u16 numq, valq, num_moved, qbuf_size; u16 buf_size = __struct_size(buf); struct ice_aqc_cfg_txqs_buf *qbuf; + struct ice_hw *hw = &lag->pf->hw; struct ice_sched_node *n_prt; __le32 teid, parent_teid; struct ice_vsi_ctx *ctx; - struct ice_hw *hw; u32 tmp_teid; - hw = &lag->pf->hw; ctx = ice_get_vsi_ctx(hw, vsi_num); if (!ctx) { dev_warn(dev, "LAG rebuild failed after reset due to VSI Context failure\n"); @@ -2020,7 +2552,8 @@ ice_lag_move_vf_nodes_tc_sync(struct ice_lag *lag, struct ice_hw *dest_hw, } if (ice_aq_cfg_lan_txq(hw, qbuf, qbuf_size, numq, hw->port_info->lport, - dest_hw->port_info->lport, NULL)) { + dest_hw->port_info->lport, + ICE_AQC_Q_CFG_TC_CHNG, NULL)) { dev_warn(dev, "Failure to configure queues for LAG reset rebuild\n"); goto sync_qerr; } @@ -2116,9 +2649,13 @@ int ice_init_lag(struct ice_pf *pf) lag->netdev = vsi->netdev; lag->role = ICE_LAG_NONE; lag->active_port = ICE_LAG_INVALID_PORT; + lag->port_bitmap = 0x0; lag->bonded = false; + lag->bond_aa = false; + lag->need_fltr_cfg = false; lag->upper_netdev = NULL; lag->notif_block.notifier_call = NULL; + memset(lag->sec_vf, 0, sizeof(lag->sec_vf)); err = ice_register_lag_handler(lag); if (err) { @@ -2136,6 +2673,11 @@ int ice_init_lag(struct ice_pf *pf) if (err) goto free_rcp_res; + err = ice_create_lag_recipe(&pf->hw, &lag->act_act_recipe, + ice_lport_rcp, 1); + if (err) + goto free_lport_res; + /* associate recipes to profiles */ for (n = 0; n < ICE_PROFID_IPV6_GTPU_IPV6_TCP_INNER; n++) { err = ice_aq_get_recipe_to_profile(&pf->hw, n, @@ -2145,7 +2687,8 @@ int ice_init_lag(struct ice_pf *pf) if (recipe_bits & BIT(ICE_SW_LKUP_DFLT)) { recipe_bits |= BIT(lag->pf_recipe) | - BIT(lag->lport_recipe); + BIT(lag->lport_recipe) | + BIT(lag->act_act_recipe); ice_aq_map_recipe_to_profile(&pf->hw, n, recipe_bits, NULL); } @@ -2156,9 +2699,13 @@ int ice_init_lag(struct ice_pf *pf) dev_dbg(dev, "INIT LAG complete\n"); return 0; +free_lport_res: + ice_free_hw_res(&pf->hw, ICE_AQC_RES_TYPE_RECIPE, 1, + &lag->lport_recipe); + free_rcp_res: ice_free_hw_res(&pf->hw, ICE_AQC_RES_TYPE_RECIPE, 1, - &pf->lag->pf_recipe); + &lag->pf_recipe); lag_error: kfree(lag); pf->lag = NULL; @@ -2174,9 +2721,7 @@ lag_error: */ void ice_deinit_lag(struct ice_pf *pf) { - struct ice_lag *lag; - - lag = pf->lag; + struct ice_lag *lag = pf->lag; if (!lag) return; @@ -2245,11 +2790,15 @@ void ice_lag_rebuild(struct ice_pf *pf) ice_lag_move_vf_nodes_sync(prim_lag, &pf->hw); } - ice_lag_cfg_cp_fltr(lag, true); + if (!lag->bond_aa) { + ice_lag_cfg_lp_fltr(lag, true, true); + if (lag->pf_rx_rule_id) + if (ice_lag_cfg_dflt_fltr(lag, true)) + dev_err(ice_pf_to_dev(pf), "Error adding default VSI rule in rebuild\n"); + } else { + ice_lag_cfg_lp_fltr(lag, true, false); + } - if (lag->pf_rx_rule_id) - if (ice_lag_cfg_dflt_fltr(lag, true)) - dev_err(ice_pf_to_dev(pf), "Error adding default VSI rule in rebuild\n"); ice_clear_rdma_cap(pf); lag_rebuild_out: diff --git a/drivers/net/ethernet/intel/ice/ice_lag.h b/drivers/net/ethernet/intel/ice/ice_lag.h index 69347d9f986b..e2a0a782bdd7 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.h +++ b/drivers/net/ethernet/intel/ice/ice_lag.h @@ -14,7 +14,11 @@ enum ice_lag_role { ICE_LAG_UNSET }; -#define ICE_LAG_INVALID_PORT 0xFF +#define ICE_LAG_INVALID_PORT 0xFF +#define ICE_LAGP_IDX 0 +#define ICE_LAGS_IDX 1 +#define ICE_LAGP_M 0x1 +#define ICE_LAGS_M 0x2 #define ICE_LAG_RESET_RETRIES 5 #define ICE_SW_DEFAULT_PROFILE 0 @@ -41,12 +45,26 @@ struct ice_lag { u8 active_port; /* lport value for the current active port */ u8 bonded:1; /* currently bonded */ u8 primary:1; /* this is primary */ + u8 bond_aa:1; /* is this bond active-active */ + u8 need_fltr_cfg:1; /* fltrs for A/A bond still need to be make */ + u8 port_bitmap:2; /* bitmap of active ports */ + u8 bond_lport_pri; /* lport values for primary PF */ + u8 bond_lport_sec; /* lport values for secondary PF */ + + /* q_home keeps track of which interface the q is currently on */ + u8 q_home[ICE_MAX_SRIOV_VFS][ICE_MAX_RSS_QS_PER_VF]; + + /* placeholder VSI for hanging VF queues from on secondary interface */ + struct ice_vsi *sec_vf[ICE_MAX_SRIOV_VFS]; + u16 pf_recipe; u16 lport_recipe; + u16 act_act_recipe; u16 pf_rx_rule_id; u16 pf_tx_rule_id; u16 cp_rule_idx; u16 lport_rule_idx; + u16 act_act_rule_idx; u8 role; }; @@ -65,6 +83,7 @@ struct ice_lag_work { }; void ice_lag_move_new_vf_nodes(struct ice_vf *vf); +void ice_lag_aa_failover(struct ice_lag *lag, u8 dest, struct ice_pf *e_pf); int ice_init_lag(struct ice_pf *pf); void ice_deinit_lag(struct ice_pf *pf); void ice_lag_rebuild(struct ice_pf *pf); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 77781277aa8e..92b95d92d599 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -9125,7 +9125,7 @@ static int ice_create_q_channels(struct ice_vsi *vsi) list_add_tail(&ch->list, &vsi->ch_list); vsi->tc_map_vsi[i] = ch->ch_vsi; dev_dbg(ice_pf_to_dev(pf), - "successfully created channel: VSI %pK\n", ch->ch_vsi); + "successfully created channel: VSI %p\n", ch->ch_vsi); } return 0; diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index 9ce4c4db400e..843e82fd3bf9 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -9,7 +9,7 @@ #include "ice_dcb_lib.h" #include "ice_flow.h" #include "ice_eswitch.h" -#include "ice_virtchnl_allowlist.h" +#include "virt/allowlist.h" #include "ice_flex_pipe.h" #include "ice_vf_vsi_vlan_ops.h" #include "ice_vlan.h" diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.h b/drivers/net/ethernet/intel/ice/ice_sriov.h index d1a998a4bef6..6c4fad09a527 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.h +++ b/drivers/net/ethernet/intel/ice/ice_sriov.h @@ -3,9 +3,9 @@ #ifndef _ICE_SRIOV_H_ #define _ICE_SRIOV_H_ -#include "ice_virtchnl_fdir.h" +#include "virt/fdir.h" #include "ice_vf_lib.h" -#include "ice_virtchnl.h" +#include "virt/virtchnl.h" /* Static VF transaction/status register def */ #define VF_DEVICE_STATUS 0xAA diff --git a/drivers/net/ethernet/intel/ice/ice_trace.h b/drivers/net/ethernet/intel/ice/ice_trace.h index 07aab6e130cd..4f35ef8d6b29 100644 --- a/drivers/net/ethernet/intel/ice/ice_trace.h +++ b/drivers/net/ethernet/intel/ice/ice_trace.h @@ -130,7 +130,7 @@ DECLARE_EVENT_CLASS(ice_tx_template, __entry->buf = buf; __assign_str(devname);), - TP_printk("netdev: %s ring: %pK desc: %pK buf %pK", __get_str(devname), + TP_printk("netdev: %s ring: %p desc: %p buf %p", __get_str(devname), __entry->ring, __entry->desc, __entry->buf) ); @@ -158,7 +158,7 @@ DECLARE_EVENT_CLASS(ice_rx_template, __entry->desc = desc; __assign_str(devname);), - TP_printk("netdev: %s ring: %pK desc: %pK", __get_str(devname), + TP_printk("netdev: %s ring: %p desc: %p", __get_str(devname), __entry->ring, __entry->desc) ); DEFINE_EVENT(ice_rx_template, ice_clean_rx_irq, @@ -182,7 +182,7 @@ DECLARE_EVENT_CLASS(ice_rx_indicate_template, __entry->skb = skb; __assign_str(devname);), - TP_printk("netdev: %s ring: %pK desc: %pK skb %pK", __get_str(devname), + TP_printk("netdev: %s ring: %p desc: %p skb %p", __get_str(devname), __entry->ring, __entry->desc, __entry->skb) ); @@ -205,7 +205,7 @@ DECLARE_EVENT_CLASS(ice_xmit_template, __entry->skb = skb; __assign_str(devname);), - TP_printk("netdev: %s skb: %pK ring: %pK", __get_str(devname), + TP_printk("netdev: %s skb: %p ring: %p", __get_str(devname), __entry->skb, __entry->ring) ); @@ -228,7 +228,7 @@ DECLARE_EVENT_CLASS(ice_tx_tstamp_template, TP_fast_assign(__entry->skb = skb; __entry->idx = idx;), - TP_printk("skb %pK idx %d", + TP_printk("skb %p idx %d", __entry->skb, __entry->idx) ); #define DEFINE_TX_TSTAMP_OP_EVENT(name) \ diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 03c6c271865d..8d19efc1df72 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -293,8 +293,10 @@ struct ice_hw_common_caps { u8 dcb; u8 ieee_1588; u8 rdma; - u8 roce_lag; - u8 sriov_lag; + + bool roce_lag; + bool sriov_lag; + bool sriov_aa_lag; bool nvm_update_pending_nvm; bool nvm_update_pending_orom; diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c index 5ee74f3e82dc..de9e81ccee66 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c @@ -5,7 +5,7 @@ #include "ice.h" #include "ice_lib.h" #include "ice_fltr.h" -#include "ice_virtchnl_allowlist.h" +#include "virt/allowlist.h" /* Public functions which may be accessed by all driver files */ diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.h b/drivers/net/ethernet/intel/ice/ice_vf_lib.h index ffe1f9f830ea..b00708907176 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.h @@ -13,7 +13,7 @@ #include <linux/avf/virtchnl.h> #include "ice_type.h" #include "ice_flow.h" -#include "ice_virtchnl_fdir.h" +#include "virt/fdir.h" #include "ice_vsi_vlan_ops.h" #define ICE_MAX_SRIOV_VFS 256 diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c b/drivers/net/ethernet/intel/ice/virt/allowlist.c index 4c2ec2337b38..a07efec19c45 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c +++ b/drivers/net/ethernet/intel/ice/virt/allowlist.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (C) 2021, Intel Corporation. */ -#include "ice_virtchnl_allowlist.h" +#include "allowlist.h" /* Purpose of this file is to share functionality to allowlist or denylist * opcodes used in PF <-> VF communication. Group of opcodes: diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.h b/drivers/net/ethernet/intel/ice/virt/allowlist.h index d3ae86ded219..d3ae86ded219 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.h +++ b/drivers/net/ethernet/intel/ice/virt/allowlist.h diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/virt/fdir.c index ae83c3914e29..ae83c3914e29 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c +++ b/drivers/net/ethernet/intel/ice/virt/fdir.c diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.h b/drivers/net/ethernet/intel/ice/virt/fdir.h index ac6dcab454b4..ac6dcab454b4 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.h +++ b/drivers/net/ethernet/intel/ice/virt/fdir.h diff --git a/drivers/net/ethernet/intel/ice/virt/queues.c b/drivers/net/ethernet/intel/ice/virt/queues.c new file mode 100644 index 000000000000..40575cfe6dd4 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/virt/queues.c @@ -0,0 +1,975 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2022, Intel Corporation. */ + +#include "virtchnl.h" +#include "queues.h" +#include "ice_vf_lib_private.h" +#include "ice.h" +#include "ice_base.h" +#include "ice_lib.h" + +/** + * ice_vc_get_max_frame_size - get max frame size allowed for VF + * @vf: VF used to determine max frame size + * + * Max frame size is determined based on the current port's max frame size and + * whether a port VLAN is configured on this VF. The VF is not aware whether + * it's in a port VLAN so the PF needs to account for this in max frame size + * checks and sending the max frame size to the VF. + */ +u16 ice_vc_get_max_frame_size(struct ice_vf *vf) +{ + struct ice_port_info *pi = ice_vf_get_port_info(vf); + u16 max_frame_size; + + max_frame_size = pi->phy.link_info.max_frame_size; + + if (ice_vf_is_port_vlan_ena(vf)) + max_frame_size -= VLAN_HLEN; + + return max_frame_size; +} + +/** + * ice_vc_isvalid_q_id + * @vsi: VSI to check queue ID against + * @qid: VSI relative queue ID + * + * check for the valid queue ID + */ +static bool ice_vc_isvalid_q_id(struct ice_vsi *vsi, u16 qid) +{ + /* allocated Tx and Rx queues should be always equal for VF VSI */ + return qid < vsi->alloc_txq; +} + +/** + * ice_vc_isvalid_ring_len + * @ring_len: length of ring + * + * check for the valid ring count, should be multiple of ICE_REQ_DESC_MULTIPLE + * or zero + */ +static bool ice_vc_isvalid_ring_len(u16 ring_len) +{ + return ring_len == 0 || + (ring_len >= ICE_MIN_NUM_DESC && + ring_len <= ICE_MAX_NUM_DESC && + !(ring_len % ICE_REQ_DESC_MULTIPLE)); +} + +/** + * ice_vf_cfg_qs_bw - Configure per queue bandwidth + * @vf: pointer to the VF info + * @num_queues: number of queues to be configured + * + * Configure per queue bandwidth. + * + * Return: 0 on success or negative error value. + */ +static int ice_vf_cfg_qs_bw(struct ice_vf *vf, u16 num_queues) +{ + struct ice_hw *hw = &vf->pf->hw; + struct ice_vsi *vsi; + int ret; + u16 i; + + vsi = ice_get_vf_vsi(vf); + if (!vsi) + return -EINVAL; + + for (i = 0; i < num_queues; i++) { + u32 p_rate, min_rate; + u8 tc; + + p_rate = vf->qs_bw[i].peak; + min_rate = vf->qs_bw[i].committed; + tc = vf->qs_bw[i].tc; + if (p_rate) + ret = ice_cfg_q_bw_lmt(hw->port_info, vsi->idx, tc, + vf->qs_bw[i].queue_id, + ICE_MAX_BW, p_rate); + else + ret = ice_cfg_q_bw_dflt_lmt(hw->port_info, vsi->idx, tc, + vf->qs_bw[i].queue_id, + ICE_MAX_BW); + if (ret) + return ret; + + if (min_rate) + ret = ice_cfg_q_bw_lmt(hw->port_info, vsi->idx, tc, + vf->qs_bw[i].queue_id, + ICE_MIN_BW, min_rate); + else + ret = ice_cfg_q_bw_dflt_lmt(hw->port_info, vsi->idx, tc, + vf->qs_bw[i].queue_id, + ICE_MIN_BW); + + if (ret) + return ret; + } + + return 0; +} + +/** + * ice_vf_cfg_q_quanta_profile - Configure quanta profile + * @vf: pointer to the VF info + * @quanta_prof_idx: pointer to the quanta profile index + * @quanta_size: quanta size to be set + * + * This function chooses available quanta profile and configures the register. + * The quanta profile is evenly divided by the number of device ports, and then + * available to the specific PF and VFs. The first profile for each PF is a + * reserved default profile. Only quanta size of the rest unused profile can be + * modified. + * + * Return: 0 on success or negative error value. + */ +static int ice_vf_cfg_q_quanta_profile(struct ice_vf *vf, u16 quanta_size, + u16 *quanta_prof_idx) +{ + const u16 n_desc = calc_quanta_desc(quanta_size); + struct ice_hw *hw = &vf->pf->hw; + const u16 n_cmd = 2 * n_desc; + struct ice_pf *pf = vf->pf; + u16 per_pf, begin_id; + u8 n_used; + u32 reg; + + begin_id = (GLCOMM_QUANTA_PROF_MAX_INDEX + 1) / hw->dev_caps.num_funcs * + hw->logical_pf_id; + + if (quanta_size == ICE_DFLT_QUANTA) { + *quanta_prof_idx = begin_id; + } else { + per_pf = (GLCOMM_QUANTA_PROF_MAX_INDEX + 1) / + hw->dev_caps.num_funcs; + n_used = pf->num_quanta_prof_used; + if (n_used < per_pf) { + *quanta_prof_idx = begin_id + 1 + n_used; + pf->num_quanta_prof_used++; + } else { + return -EINVAL; + } + } + + reg = FIELD_PREP(GLCOMM_QUANTA_PROF_QUANTA_SIZE_M, quanta_size) | + FIELD_PREP(GLCOMM_QUANTA_PROF_MAX_CMD_M, n_cmd) | + FIELD_PREP(GLCOMM_QUANTA_PROF_MAX_DESC_M, n_desc); + wr32(hw, GLCOMM_QUANTA_PROF(*quanta_prof_idx), reg); + + return 0; +} + +/** + * ice_vc_validate_vqs_bitmaps - validate Rx/Tx queue bitmaps from VIRTCHNL + * @vqs: virtchnl_queue_select structure containing bitmaps to validate + * + * Return true on successful validation, else false + */ +static bool ice_vc_validate_vqs_bitmaps(struct virtchnl_queue_select *vqs) +{ + if ((!vqs->rx_queues && !vqs->tx_queues) || + vqs->rx_queues >= BIT(ICE_MAX_RSS_QS_PER_VF) || + vqs->tx_queues >= BIT(ICE_MAX_RSS_QS_PER_VF)) + return false; + + return true; +} + +/** + * ice_vf_ena_txq_interrupt - enable Tx queue interrupt via QINT_TQCTL + * @vsi: VSI of the VF to configure + * @q_idx: VF queue index used to determine the queue in the PF's space + */ +void ice_vf_ena_txq_interrupt(struct ice_vsi *vsi, u32 q_idx) +{ + struct ice_hw *hw = &vsi->back->hw; + u32 pfq = vsi->txq_map[q_idx]; + u32 reg; + + reg = rd32(hw, QINT_TQCTL(pfq)); + + /* MSI-X index 0 in the VF's space is always for the OICR, which means + * this is most likely a poll mode VF driver, so don't enable an + * interrupt that was never configured via VIRTCHNL_OP_CONFIG_IRQ_MAP + */ + if (!(reg & QINT_TQCTL_MSIX_INDX_M)) + return; + + wr32(hw, QINT_TQCTL(pfq), reg | QINT_TQCTL_CAUSE_ENA_M); +} + +/** + * ice_vf_ena_rxq_interrupt - enable Tx queue interrupt via QINT_RQCTL + * @vsi: VSI of the VF to configure + * @q_idx: VF queue index used to determine the queue in the PF's space + */ +void ice_vf_ena_rxq_interrupt(struct ice_vsi *vsi, u32 q_idx) +{ + struct ice_hw *hw = &vsi->back->hw; + u32 pfq = vsi->rxq_map[q_idx]; + u32 reg; + + reg = rd32(hw, QINT_RQCTL(pfq)); + + /* MSI-X index 0 in the VF's space is always for the OICR, which means + * this is most likely a poll mode VF driver, so don't enable an + * interrupt that was never configured via VIRTCHNL_OP_CONFIG_IRQ_MAP + */ + if (!(reg & QINT_RQCTL_MSIX_INDX_M)) + return; + + wr32(hw, QINT_RQCTL(pfq), reg | QINT_RQCTL_CAUSE_ENA_M); +} + +/** + * ice_vc_ena_qs_msg + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer + * + * called from the VF to enable all or specific queue(s) + */ +int ice_vc_ena_qs_msg(struct ice_vf *vf, u8 *msg) +{ + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + struct virtchnl_queue_select *vqs = + (struct virtchnl_queue_select *)msg; + struct ice_vsi *vsi; + unsigned long q_map; + u16 vf_q_id; + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (!ice_vc_isvalid_vsi_id(vf, vqs->vsi_id)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (!ice_vc_validate_vqs_bitmaps(vqs)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + vsi = ice_get_vf_vsi(vf); + if (!vsi) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + /* Enable only Rx rings, Tx rings were enabled by the FW when the + * Tx queue group list was configured and the context bits were + * programmed using ice_vsi_cfg_txqs + */ + q_map = vqs->rx_queues; + for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) { + if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + /* Skip queue if enabled */ + if (test_bit(vf_q_id, vf->rxq_ena)) + continue; + + if (ice_vsi_ctrl_one_rx_ring(vsi, true, vf_q_id, true)) { + dev_err(ice_pf_to_dev(vsi->back), "Failed to enable Rx ring %d on VSI %d\n", + vf_q_id, vsi->vsi_num); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + ice_vf_ena_rxq_interrupt(vsi, vf_q_id); + set_bit(vf_q_id, vf->rxq_ena); + } + + q_map = vqs->tx_queues; + for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) { + if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + /* Skip queue if enabled */ + if (test_bit(vf_q_id, vf->txq_ena)) + continue; + + ice_vf_ena_txq_interrupt(vsi, vf_q_id); + set_bit(vf_q_id, vf->txq_ena); + } + + /* Set flag to indicate that queues are enabled */ + if (v_ret == VIRTCHNL_STATUS_SUCCESS) + set_bit(ICE_VF_STATE_QS_ENA, vf->vf_states); + +error_param: + /* send the response to the VF */ + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ENABLE_QUEUES, v_ret, + NULL, 0); +} + +/** + * ice_vf_vsi_dis_single_txq - disable a single Tx queue + * @vf: VF to disable queue for + * @vsi: VSI for the VF + * @q_id: VF relative (0-based) queue ID + * + * Attempt to disable the Tx queue passed in. If the Tx queue was successfully + * disabled then clear q_id bit in the enabled queues bitmap and return + * success. Otherwise return error. + */ +int ice_vf_vsi_dis_single_txq(struct ice_vf *vf, struct ice_vsi *vsi, u16 q_id) +{ + struct ice_txq_meta txq_meta = { 0 }; + struct ice_tx_ring *ring; + int err; + + if (!test_bit(q_id, vf->txq_ena)) + dev_dbg(ice_pf_to_dev(vsi->back), "Queue %u on VSI %u is not enabled, but stopping it anyway\n", + q_id, vsi->vsi_num); + + ring = vsi->tx_rings[q_id]; + if (!ring) + return -EINVAL; + + ice_fill_txq_meta(vsi, ring, &txq_meta); + + err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, vf->vf_id, ring, &txq_meta); + if (err) { + dev_err(ice_pf_to_dev(vsi->back), "Failed to stop Tx ring %d on VSI %d\n", + q_id, vsi->vsi_num); + return err; + } + + /* Clear enabled queues flag */ + clear_bit(q_id, vf->txq_ena); + + return 0; +} + +/** + * ice_vc_dis_qs_msg + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer + * + * called from the VF to disable all or specific queue(s) + */ +int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg) +{ + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + struct virtchnl_queue_select *vqs = + (struct virtchnl_queue_select *)msg; + struct ice_vsi *vsi; + unsigned long q_map; + u16 vf_q_id; + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) && + !test_bit(ICE_VF_STATE_QS_ENA, vf->vf_states)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (!ice_vc_isvalid_vsi_id(vf, vqs->vsi_id)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (!ice_vc_validate_vqs_bitmaps(vqs)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + vsi = ice_get_vf_vsi(vf); + if (!vsi) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (vqs->tx_queues) { + q_map = vqs->tx_queues; + + for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) { + if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (ice_vf_vsi_dis_single_txq(vf, vsi, vf_q_id)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + } + } + + q_map = vqs->rx_queues; + /* speed up Rx queue disable by batching them if possible */ + if (q_map && + bitmap_equal(&q_map, vf->rxq_ena, ICE_MAX_RSS_QS_PER_VF)) { + if (ice_vsi_stop_all_rx_rings(vsi)) { + dev_err(ice_pf_to_dev(vsi->back), "Failed to stop all Rx rings on VSI %d\n", + vsi->vsi_num); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + bitmap_zero(vf->rxq_ena, ICE_MAX_RSS_QS_PER_VF); + } else if (q_map) { + for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) { + if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + /* Skip queue if not enabled */ + if (!test_bit(vf_q_id, vf->rxq_ena)) + continue; + + if (ice_vsi_ctrl_one_rx_ring(vsi, false, vf_q_id, + true)) { + dev_err(ice_pf_to_dev(vsi->back), "Failed to stop Rx ring %d on VSI %d\n", + vf_q_id, vsi->vsi_num); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + /* Clear enabled queues flag */ + clear_bit(vf_q_id, vf->rxq_ena); + } + } + + /* Clear enabled queues flag */ + if (v_ret == VIRTCHNL_STATUS_SUCCESS && ice_vf_has_no_qs_ena(vf)) + clear_bit(ICE_VF_STATE_QS_ENA, vf->vf_states); + +error_param: + /* send the response to the VF */ + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DISABLE_QUEUES, v_ret, + NULL, 0); +} + +/** + * ice_cfg_interrupt + * @vf: pointer to the VF info + * @vsi: the VSI being configured + * @map: vector map for mapping vectors to queues + * @q_vector: structure for interrupt vector + * configure the IRQ to queue map + */ +static enum virtchnl_status_code +ice_cfg_interrupt(struct ice_vf *vf, struct ice_vsi *vsi, + struct virtchnl_vector_map *map, + struct ice_q_vector *q_vector) +{ + u16 vsi_q_id, vsi_q_id_idx; + unsigned long qmap; + + q_vector->num_ring_rx = 0; + q_vector->num_ring_tx = 0; + + qmap = map->rxq_map; + for_each_set_bit(vsi_q_id_idx, &qmap, ICE_MAX_RSS_QS_PER_VF) { + vsi_q_id = vsi_q_id_idx; + + if (!ice_vc_isvalid_q_id(vsi, vsi_q_id)) + return VIRTCHNL_STATUS_ERR_PARAM; + + q_vector->num_ring_rx++; + q_vector->rx.itr_idx = map->rxitr_idx; + vsi->rx_rings[vsi_q_id]->q_vector = q_vector; + ice_cfg_rxq_interrupt(vsi, vsi_q_id, + q_vector->vf_reg_idx, + q_vector->rx.itr_idx); + } + + qmap = map->txq_map; + for_each_set_bit(vsi_q_id_idx, &qmap, ICE_MAX_RSS_QS_PER_VF) { + vsi_q_id = vsi_q_id_idx; + + if (!ice_vc_isvalid_q_id(vsi, vsi_q_id)) + return VIRTCHNL_STATUS_ERR_PARAM; + + q_vector->num_ring_tx++; + q_vector->tx.itr_idx = map->txitr_idx; + vsi->tx_rings[vsi_q_id]->q_vector = q_vector; + ice_cfg_txq_interrupt(vsi, vsi_q_id, + q_vector->vf_reg_idx, + q_vector->tx.itr_idx); + } + + return VIRTCHNL_STATUS_SUCCESS; +} + +/** + * ice_vc_cfg_irq_map_msg + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer + * + * called from the VF to configure the IRQ to queue map + */ +int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg) +{ + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + u16 num_q_vectors_mapped, vsi_id, vector_id; + struct virtchnl_irq_map_info *irqmap_info; + struct virtchnl_vector_map *map; + struct ice_vsi *vsi; + int i; + + irqmap_info = (struct virtchnl_irq_map_info *)msg; + num_q_vectors_mapped = irqmap_info->num_vectors; + + /* Check to make sure number of VF vectors mapped is not greater than + * number of VF vectors originally allocated, and check that + * there is actually at least a single VF queue vector mapped + */ + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) || + vf->num_msix < num_q_vectors_mapped || + !num_q_vectors_mapped) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + vsi = ice_get_vf_vsi(vf); + if (!vsi) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + for (i = 0; i < num_q_vectors_mapped; i++) { + struct ice_q_vector *q_vector; + + map = &irqmap_info->vecmap[i]; + + vector_id = map->vector_id; + vsi_id = map->vsi_id; + /* vector_id is always 0-based for each VF, and can never be + * larger than or equal to the max allowed interrupts per VF + */ + if (!(vector_id < vf->num_msix) || + !ice_vc_isvalid_vsi_id(vf, vsi_id) || + (!vector_id && (map->rxq_map || map->txq_map))) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + /* No need to map VF miscellaneous or rogue vector */ + if (!vector_id) + continue; + + /* Subtract non queue vector from vector_id passed by VF + * to get actual number of VSI queue vector array index + */ + q_vector = vsi->q_vectors[vector_id - ICE_NONQ_VECS_VF]; + if (!q_vector) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + /* lookout for the invalid queue index */ + v_ret = ice_cfg_interrupt(vf, vsi, map, q_vector); + if (v_ret) + goto error_param; + } + +error_param: + /* send the response to the VF */ + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_IRQ_MAP, v_ret, + NULL, 0); +} + +/** + * ice_vc_cfg_q_bw - Configure per queue bandwidth + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer which holds the command descriptor + * + * Configure VF queues bandwidth. + * + * Return: 0 on success or negative error value. + */ +int ice_vc_cfg_q_bw(struct ice_vf *vf, u8 *msg) +{ + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + struct virtchnl_queues_bw_cfg *qbw = + (struct virtchnl_queues_bw_cfg *)msg; + struct ice_vsi *vsi; + u16 i; + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) || + !ice_vc_isvalid_vsi_id(vf, qbw->vsi_id)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + vsi = ice_get_vf_vsi(vf); + if (!vsi) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + if (qbw->num_queues > ICE_MAX_RSS_QS_PER_VF || + qbw->num_queues > min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)) { + dev_err(ice_pf_to_dev(vf->pf), "VF-%d trying to configure more than allocated number of queues: %d\n", + vf->vf_id, min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + for (i = 0; i < qbw->num_queues; i++) { + if (qbw->cfg[i].shaper.peak != 0 && vf->max_tx_rate != 0 && + qbw->cfg[i].shaper.peak > vf->max_tx_rate) { + dev_warn(ice_pf_to_dev(vf->pf), "The maximum queue %d rate limit configuration may not take effect because the maximum TX rate for VF-%d is %d\n", + qbw->cfg[i].queue_id, vf->vf_id, + vf->max_tx_rate); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + if (qbw->cfg[i].shaper.committed != 0 && vf->min_tx_rate != 0 && + qbw->cfg[i].shaper.committed < vf->min_tx_rate) { + dev_warn(ice_pf_to_dev(vf->pf), "The minimum queue %d rate limit configuration may not take effect because the minimum TX rate for VF-%d is %d\n", + qbw->cfg[i].queue_id, vf->vf_id, + vf->min_tx_rate); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + if (qbw->cfg[i].queue_id > vf->num_vf_qs) { + dev_warn(ice_pf_to_dev(vf->pf), "VF-%d trying to configure invalid queue_id\n", + vf->vf_id); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + if (qbw->cfg[i].tc >= ICE_MAX_TRAFFIC_CLASS) { + dev_warn(ice_pf_to_dev(vf->pf), "VF-%d trying to configure a traffic class higher than allowed\n", + vf->vf_id); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + } + + for (i = 0; i < qbw->num_queues; i++) { + vf->qs_bw[i].queue_id = qbw->cfg[i].queue_id; + vf->qs_bw[i].peak = qbw->cfg[i].shaper.peak; + vf->qs_bw[i].committed = qbw->cfg[i].shaper.committed; + vf->qs_bw[i].tc = qbw->cfg[i].tc; + } + + if (ice_vf_cfg_qs_bw(vf, qbw->num_queues)) + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + +err: + /* send the response to the VF */ + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_QUEUE_BW, + v_ret, NULL, 0); +} + +/** + * ice_vc_cfg_q_quanta - Configure per queue quanta + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer which holds the command descriptor + * + * Configure VF queues quanta. + * + * Return: 0 on success or negative error value. + */ +int ice_vc_cfg_q_quanta(struct ice_vf *vf, u8 *msg) +{ + u16 quanta_prof_id, quanta_size, start_qid, num_queues, end_qid, i; + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + struct virtchnl_quanta_cfg *qquanta = + (struct virtchnl_quanta_cfg *)msg; + struct ice_vsi *vsi; + int ret; + + start_qid = qquanta->queue_select.start_queue_id; + num_queues = qquanta->queue_select.num_queues; + + if (check_add_overflow(start_qid, num_queues, &end_qid)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + vsi = ice_get_vf_vsi(vf); + if (!vsi) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + if (end_qid > ICE_MAX_RSS_QS_PER_VF || + end_qid > min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)) { + dev_err(ice_pf_to_dev(vf->pf), "VF-%d trying to configure more than allocated number of queues: %d\n", + vf->vf_id, min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + quanta_size = qquanta->quanta_size; + if (quanta_size > ICE_MAX_QUANTA_SIZE || + quanta_size < ICE_MIN_QUANTA_SIZE) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + if (quanta_size % 64) { + dev_err(ice_pf_to_dev(vf->pf), "quanta size should be the product of 64\n"); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + ret = ice_vf_cfg_q_quanta_profile(vf, quanta_size, + &quanta_prof_id); + if (ret) { + v_ret = VIRTCHNL_STATUS_ERR_NOT_SUPPORTED; + goto err; + } + + for (i = start_qid; i < end_qid; i++) + vsi->tx_rings[i]->quanta_prof_id = quanta_prof_id; + +err: + /* send the response to the VF */ + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_QUANTA, + v_ret, NULL, 0); +} + +/** + * ice_vc_cfg_qs_msg + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer + * + * called from the VF to configure the Rx/Tx queues + */ +int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) +{ + struct virtchnl_vsi_queue_config_info *qci = + (struct virtchnl_vsi_queue_config_info *)msg; + struct virtchnl_queue_pair_info *qpi; + struct ice_pf *pf = vf->pf; + struct ice_vsi *vsi; + int i = -1, q_idx; + bool ena_ts; + u8 act_prt; + + mutex_lock(&pf->lag_mutex); + act_prt = ice_lag_prepare_vf_reset(pf->lag); + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) + goto error_param; + + if (!ice_vc_isvalid_vsi_id(vf, qci->vsi_id)) + goto error_param; + + vsi = ice_get_vf_vsi(vf); + if (!vsi) + goto error_param; + + if (qci->num_queue_pairs > ICE_MAX_RSS_QS_PER_VF || + qci->num_queue_pairs > min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)) { + dev_err(ice_pf_to_dev(pf), "VF-%d requesting more than supported number of queues: %d\n", + vf->vf_id, min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)); + goto error_param; + } + + for (i = 0; i < qci->num_queue_pairs; i++) { + if (!qci->qpair[i].rxq.crc_disable) + continue; + + if (!(vf->driver_caps & VIRTCHNL_VF_OFFLOAD_CRC) || + vf->vlan_strip_ena) + goto error_param; + } + + for (i = 0; i < qci->num_queue_pairs; i++) { + qpi = &qci->qpair[i]; + if (qpi->txq.vsi_id != qci->vsi_id || + qpi->rxq.vsi_id != qci->vsi_id || + qpi->rxq.queue_id != qpi->txq.queue_id || + qpi->txq.headwb_enabled || + !ice_vc_isvalid_ring_len(qpi->txq.ring_len) || + !ice_vc_isvalid_ring_len(qpi->rxq.ring_len) || + !ice_vc_isvalid_q_id(vsi, qpi->txq.queue_id)) { + goto error_param; + } + + q_idx = qpi->rxq.queue_id; + + /* make sure selected "q_idx" is in valid range of queues + * for selected "vsi" + */ + if (q_idx >= vsi->alloc_txq || q_idx >= vsi->alloc_rxq) { + goto error_param; + } + + /* copy Tx queue info from VF into VSI */ + if (qpi->txq.ring_len > 0) { + vsi->tx_rings[q_idx]->dma = qpi->txq.dma_ring_addr; + vsi->tx_rings[q_idx]->count = qpi->txq.ring_len; + + /* Disable any existing queue first */ + if (ice_vf_vsi_dis_single_txq(vf, vsi, q_idx)) + goto error_param; + + /* Configure a queue with the requested settings */ + if (ice_vsi_cfg_single_txq(vsi, vsi->tx_rings, q_idx)) { + dev_warn(ice_pf_to_dev(pf), "VF-%d failed to configure TX queue %d\n", + vf->vf_id, q_idx); + goto error_param; + } + } + + /* copy Rx queue info from VF into VSI */ + if (qpi->rxq.ring_len > 0) { + u16 max_frame_size = ice_vc_get_max_frame_size(vf); + struct ice_rx_ring *ring = vsi->rx_rings[q_idx]; + u32 rxdid; + + ring->dma = qpi->rxq.dma_ring_addr; + ring->count = qpi->rxq.ring_len; + + if (qpi->rxq.crc_disable) + ring->flags |= ICE_RX_FLAGS_CRC_STRIP_DIS; + else + ring->flags &= ~ICE_RX_FLAGS_CRC_STRIP_DIS; + + if (qpi->rxq.databuffer_size != 0 && + (qpi->rxq.databuffer_size > ((16 * 1024) - 128) || + qpi->rxq.databuffer_size < 1024)) + goto error_param; + ring->rx_buf_len = qpi->rxq.databuffer_size; + if (qpi->rxq.max_pkt_size > max_frame_size || + qpi->rxq.max_pkt_size < 64) + goto error_param; + + ring->max_frame = qpi->rxq.max_pkt_size; + /* add space for the port VLAN since the VF driver is + * not expected to account for it in the MTU + * calculation + */ + if (ice_vf_is_port_vlan_ena(vf)) + ring->max_frame += VLAN_HLEN; + + if (ice_vsi_cfg_single_rxq(vsi, q_idx)) { + dev_warn(ice_pf_to_dev(pf), "VF-%d failed to configure RX queue %d\n", + vf->vf_id, q_idx); + goto error_param; + } + + /* If Rx flex desc is supported, select RXDID for Rx + * queues. Otherwise, use legacy 32byte descriptor + * format. Legacy 16byte descriptor is not supported. + * If this RXDID is selected, return error. + */ + if (vf->driver_caps & + VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) { + rxdid = qpi->rxq.rxdid; + if (!(BIT(rxdid) & pf->supported_rxdids)) + goto error_param; + } else { + rxdid = ICE_RXDID_LEGACY_1; + } + + ena_ts = ((vf->driver_caps & + VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) && + (vf->driver_caps & VIRTCHNL_VF_CAP_PTP) && + (qpi->rxq.flags & VIRTCHNL_PTP_RX_TSTAMP)); + + ice_write_qrxflxp_cntxt(&vsi->back->hw, + vsi->rxq_map[q_idx], rxdid, + ICE_RXDID_PRIO, ena_ts); + } + } + + ice_lag_complete_vf_reset(pf->lag, act_prt); + mutex_unlock(&pf->lag_mutex); + + /* send the response to the VF */ + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_VSI_QUEUES, + VIRTCHNL_STATUS_SUCCESS, NULL, 0); +error_param: + /* disable whatever we can */ + for (; i >= 0; i--) { + if (ice_vsi_ctrl_one_rx_ring(vsi, false, i, true)) + dev_err(ice_pf_to_dev(pf), "VF-%d could not disable RX queue %d\n", + vf->vf_id, i); + if (ice_vf_vsi_dis_single_txq(vf, vsi, i)) + dev_err(ice_pf_to_dev(pf), "VF-%d could not disable TX queue %d\n", + vf->vf_id, i); + } + + ice_lag_complete_vf_reset(pf->lag, act_prt); + mutex_unlock(&pf->lag_mutex); + + ice_lag_move_new_vf_nodes(vf); + + /* send the response to the VF */ + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_VSI_QUEUES, + VIRTCHNL_STATUS_ERR_PARAM, NULL, 0); +} + +/** + * ice_vc_request_qs_msg + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer + * + * VFs get a default number of queues but can use this message to request a + * different number. If the request is successful, PF will reset the VF and + * return 0. If unsuccessful, PF will send message informing VF of number of + * available queue pairs via virtchnl message response to VF. + */ +int ice_vc_request_qs_msg(struct ice_vf *vf, u8 *msg) +{ + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + struct virtchnl_vf_res_request *vfres = + (struct virtchnl_vf_res_request *)msg; + u16 req_queues = vfres->num_queue_pairs; + struct ice_pf *pf = vf->pf; + u16 max_allowed_vf_queues; + u16 tx_rx_queue_left; + struct device *dev; + u16 cur_queues; + + dev = ice_pf_to_dev(pf); + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + cur_queues = vf->num_vf_qs; + tx_rx_queue_left = min_t(u16, ice_get_avail_txq_count(pf), + ice_get_avail_rxq_count(pf)); + max_allowed_vf_queues = tx_rx_queue_left + cur_queues; + if (!req_queues) { + dev_err(dev, "VF %d tried to request 0 queues. Ignoring.\n", + vf->vf_id); + } else if (req_queues > ICE_MAX_RSS_QS_PER_VF) { + dev_err(dev, "VF %d tried to request more than %d queues.\n", + vf->vf_id, ICE_MAX_RSS_QS_PER_VF); + vfres->num_queue_pairs = ICE_MAX_RSS_QS_PER_VF; + } else if (req_queues > cur_queues && + req_queues - cur_queues > tx_rx_queue_left) { + dev_warn(dev, "VF %d requested %u more queues, but only %u left.\n", + vf->vf_id, req_queues - cur_queues, tx_rx_queue_left); + vfres->num_queue_pairs = min_t(u16, max_allowed_vf_queues, + ICE_MAX_RSS_QS_PER_VF); + } else { + /* request is successful, then reset VF */ + vf->num_req_qs = req_queues; + ice_reset_vf(vf, ICE_VF_RESET_NOTIFY); + dev_info(dev, "VF %d granted request of %u queues.\n", + vf->vf_id, req_queues); + return 0; + } + +error_param: + /* send the response to the VF */ + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_REQUEST_QUEUES, + v_ret, (u8 *)vfres, sizeof(*vfres)); +} + diff --git a/drivers/net/ethernet/intel/ice/virt/queues.h b/drivers/net/ethernet/intel/ice/virt/queues.h new file mode 100644 index 000000000000..c4a792cecea1 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/virt/queues.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2022, Intel Corporation. */ + +#ifndef _ICE_VIRT_QUEUES_H_ +#define _ICE_VIRT_QUEUES_H_ + +#include <linux/types.h> + +struct ice_vf; + +u16 ice_vc_get_max_frame_size(struct ice_vf *vf); +int ice_vc_ena_qs_msg(struct ice_vf *vf, u8 *msg); +int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg); +int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg); +int ice_vc_cfg_q_bw(struct ice_vf *vf, u8 *msg); +int ice_vc_cfg_q_quanta(struct ice_vf *vf, u8 *msg); +int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg); +int ice_vc_request_qs_msg(struct ice_vf *vf, u8 *msg); + +#endif /* _ICE_VIRT_QUEUES_H_ */ diff --git a/drivers/net/ethernet/intel/ice/virt/rss.c b/drivers/net/ethernet/intel/ice/virt/rss.c new file mode 100644 index 000000000000..cbdbb32d512b --- /dev/null +++ b/drivers/net/ethernet/intel/ice/virt/rss.c @@ -0,0 +1,719 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2022, Intel Corporation. */ + +#include "rss.h" +#include "ice_vf_lib_private.h" +#include "ice.h" + +#define FIELD_SELECTOR(proto_hdr_field) \ + BIT((proto_hdr_field) & PROTO_HDR_FIELD_MASK) + +struct ice_vc_hdr_match_type { + u32 vc_hdr; /* virtchnl headers (VIRTCHNL_PROTO_HDR_XXX) */ + u32 ice_hdr; /* ice headers (ICE_FLOW_SEG_HDR_XXX) */ +}; + +static const struct ice_vc_hdr_match_type ice_vc_hdr_list[] = { + {VIRTCHNL_PROTO_HDR_NONE, ICE_FLOW_SEG_HDR_NONE}, + {VIRTCHNL_PROTO_HDR_ETH, ICE_FLOW_SEG_HDR_ETH}, + {VIRTCHNL_PROTO_HDR_S_VLAN, ICE_FLOW_SEG_HDR_VLAN}, + {VIRTCHNL_PROTO_HDR_C_VLAN, ICE_FLOW_SEG_HDR_VLAN}, + {VIRTCHNL_PROTO_HDR_IPV4, ICE_FLOW_SEG_HDR_IPV4 | + ICE_FLOW_SEG_HDR_IPV_OTHER}, + {VIRTCHNL_PROTO_HDR_IPV6, ICE_FLOW_SEG_HDR_IPV6 | + ICE_FLOW_SEG_HDR_IPV_OTHER}, + {VIRTCHNL_PROTO_HDR_TCP, ICE_FLOW_SEG_HDR_TCP}, + {VIRTCHNL_PROTO_HDR_UDP, ICE_FLOW_SEG_HDR_UDP}, + {VIRTCHNL_PROTO_HDR_SCTP, ICE_FLOW_SEG_HDR_SCTP}, + {VIRTCHNL_PROTO_HDR_PPPOE, ICE_FLOW_SEG_HDR_PPPOE}, + {VIRTCHNL_PROTO_HDR_GTPU_IP, ICE_FLOW_SEG_HDR_GTPU_IP}, + {VIRTCHNL_PROTO_HDR_GTPU_EH, ICE_FLOW_SEG_HDR_GTPU_EH}, + {VIRTCHNL_PROTO_HDR_GTPU_EH_PDU_DWN, + ICE_FLOW_SEG_HDR_GTPU_DWN}, + {VIRTCHNL_PROTO_HDR_GTPU_EH_PDU_UP, + ICE_FLOW_SEG_HDR_GTPU_UP}, + {VIRTCHNL_PROTO_HDR_L2TPV3, ICE_FLOW_SEG_HDR_L2TPV3}, + {VIRTCHNL_PROTO_HDR_ESP, ICE_FLOW_SEG_HDR_ESP}, + {VIRTCHNL_PROTO_HDR_AH, ICE_FLOW_SEG_HDR_AH}, + {VIRTCHNL_PROTO_HDR_PFCP, ICE_FLOW_SEG_HDR_PFCP_SESSION}, +}; + +struct ice_vc_hash_field_match_type { + u32 vc_hdr; /* virtchnl headers + * (VIRTCHNL_PROTO_HDR_XXX) + */ + u32 vc_hash_field; /* virtchnl hash fields selector + * FIELD_SELECTOR((VIRTCHNL_PROTO_HDR_ETH_XXX)) + */ + u64 ice_hash_field; /* ice hash fields + * (BIT_ULL(ICE_FLOW_FIELD_IDX_XXX)) + */ +}; + +static const struct +ice_vc_hash_field_match_type ice_vc_hash_field_list[] = { + {VIRTCHNL_PROTO_HDR_ETH, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_SRC), + BIT_ULL(ICE_FLOW_FIELD_IDX_ETH_SA)}, + {VIRTCHNL_PROTO_HDR_ETH, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_DST), + BIT_ULL(ICE_FLOW_FIELD_IDX_ETH_DA)}, + {VIRTCHNL_PROTO_HDR_ETH, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_DST), + ICE_FLOW_HASH_ETH}, + {VIRTCHNL_PROTO_HDR_ETH, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_ETHERTYPE), + BIT_ULL(ICE_FLOW_FIELD_IDX_ETH_TYPE)}, + {VIRTCHNL_PROTO_HDR_S_VLAN, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_S_VLAN_ID), + BIT_ULL(ICE_FLOW_FIELD_IDX_S_VLAN)}, + {VIRTCHNL_PROTO_HDR_C_VLAN, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_C_VLAN_ID), + BIT_ULL(ICE_FLOW_FIELD_IDX_C_VLAN)}, + {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA)}, + {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA)}, + {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST), + ICE_FLOW_HASH_IPV4}, + {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)}, + {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)}, + {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT), + ICE_FLOW_HASH_IPV4 | BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)}, + {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)}, + {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_SA)}, + {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_DA)}, + {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST), + ICE_FLOW_HASH_IPV6}, + {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_SA) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)}, + {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_DA) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)}, + {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT), + ICE_FLOW_HASH_IPV6 | BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)}, + {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)}, + {VIRTCHNL_PROTO_HDR_TCP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_SRC_PORT), + BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_SRC_PORT)}, + {VIRTCHNL_PROTO_HDR_TCP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_DST_PORT), + BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_DST_PORT)}, + {VIRTCHNL_PROTO_HDR_TCP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_SRC_PORT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_DST_PORT), + ICE_FLOW_HASH_TCP_PORT}, + {VIRTCHNL_PROTO_HDR_UDP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_SRC_PORT), + BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_SRC_PORT)}, + {VIRTCHNL_PROTO_HDR_UDP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_DST_PORT), + BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_DST_PORT)}, + {VIRTCHNL_PROTO_HDR_UDP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_SRC_PORT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_DST_PORT), + ICE_FLOW_HASH_UDP_PORT}, + {VIRTCHNL_PROTO_HDR_SCTP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT), + BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT)}, + {VIRTCHNL_PROTO_HDR_SCTP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_DST_PORT), + BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_DST_PORT)}, + {VIRTCHNL_PROTO_HDR_SCTP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_DST_PORT), + ICE_FLOW_HASH_SCTP_PORT}, + {VIRTCHNL_PROTO_HDR_PPPOE, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_PPPOE_SESS_ID), + BIT_ULL(ICE_FLOW_FIELD_IDX_PPPOE_SESS_ID)}, + {VIRTCHNL_PROTO_HDR_GTPU_IP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_GTPU_IP_TEID), + BIT_ULL(ICE_FLOW_FIELD_IDX_GTPU_IP_TEID)}, + {VIRTCHNL_PROTO_HDR_L2TPV3, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_L2TPV3_SESS_ID), + BIT_ULL(ICE_FLOW_FIELD_IDX_L2TPV3_SESS_ID)}, + {VIRTCHNL_PROTO_HDR_ESP, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ESP_SPI), + BIT_ULL(ICE_FLOW_FIELD_IDX_ESP_SPI)}, + {VIRTCHNL_PROTO_HDR_AH, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_AH_SPI), + BIT_ULL(ICE_FLOW_FIELD_IDX_AH_SPI)}, + {VIRTCHNL_PROTO_HDR_PFCP, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_PFCP_SEID), + BIT_ULL(ICE_FLOW_FIELD_IDX_PFCP_SEID)}, +}; + +/** + * ice_vc_validate_pattern + * @vf: pointer to the VF info + * @proto: virtchnl protocol headers + * + * validate the pattern is supported or not. + * + * Return: true on success, false on error. + */ +bool +ice_vc_validate_pattern(struct ice_vf *vf, struct virtchnl_proto_hdrs *proto) +{ + bool is_ipv4 = false; + bool is_ipv6 = false; + bool is_udp = false; + u16 ptype = -1; + int i = 0; + + while (i < proto->count && + proto->proto_hdr[i].type != VIRTCHNL_PROTO_HDR_NONE) { + switch (proto->proto_hdr[i].type) { + case VIRTCHNL_PROTO_HDR_ETH: + ptype = ICE_PTYPE_MAC_PAY; + break; + case VIRTCHNL_PROTO_HDR_IPV4: + ptype = ICE_PTYPE_IPV4_PAY; + is_ipv4 = true; + break; + case VIRTCHNL_PROTO_HDR_IPV6: + ptype = ICE_PTYPE_IPV6_PAY; + is_ipv6 = true; + break; + case VIRTCHNL_PROTO_HDR_UDP: + if (is_ipv4) + ptype = ICE_PTYPE_IPV4_UDP_PAY; + else if (is_ipv6) + ptype = ICE_PTYPE_IPV6_UDP_PAY; + is_udp = true; + break; + case VIRTCHNL_PROTO_HDR_TCP: + if (is_ipv4) + ptype = ICE_PTYPE_IPV4_TCP_PAY; + else if (is_ipv6) + ptype = ICE_PTYPE_IPV6_TCP_PAY; + break; + case VIRTCHNL_PROTO_HDR_SCTP: + if (is_ipv4) + ptype = ICE_PTYPE_IPV4_SCTP_PAY; + else if (is_ipv6) + ptype = ICE_PTYPE_IPV6_SCTP_PAY; + break; + case VIRTCHNL_PROTO_HDR_GTPU_IP: + case VIRTCHNL_PROTO_HDR_GTPU_EH: + if (is_ipv4) + ptype = ICE_MAC_IPV4_GTPU; + else if (is_ipv6) + ptype = ICE_MAC_IPV6_GTPU; + goto out; + case VIRTCHNL_PROTO_HDR_L2TPV3: + if (is_ipv4) + ptype = ICE_MAC_IPV4_L2TPV3; + else if (is_ipv6) + ptype = ICE_MAC_IPV6_L2TPV3; + goto out; + case VIRTCHNL_PROTO_HDR_ESP: + if (is_ipv4) + ptype = is_udp ? ICE_MAC_IPV4_NAT_T_ESP : + ICE_MAC_IPV4_ESP; + else if (is_ipv6) + ptype = is_udp ? ICE_MAC_IPV6_NAT_T_ESP : + ICE_MAC_IPV6_ESP; + goto out; + case VIRTCHNL_PROTO_HDR_AH: + if (is_ipv4) + ptype = ICE_MAC_IPV4_AH; + else if (is_ipv6) + ptype = ICE_MAC_IPV6_AH; + goto out; + case VIRTCHNL_PROTO_HDR_PFCP: + if (is_ipv4) + ptype = ICE_MAC_IPV4_PFCP_SESSION; + else if (is_ipv6) + ptype = ICE_MAC_IPV6_PFCP_SESSION; + goto out; + default: + break; + } + i++; + } + +out: + return ice_hw_ptype_ena(&vf->pf->hw, ptype); +} + +/** + * ice_vc_parse_rss_cfg - parses hash fields and headers from + * a specific virtchnl RSS cfg + * @hw: pointer to the hardware + * @rss_cfg: pointer to the virtchnl RSS cfg + * @hash_cfg: pointer to the HW hash configuration + * + * Return true if all the protocol header and hash fields in the RSS cfg could + * be parsed, else return false + * + * This function parses the virtchnl RSS cfg to be the intended + * hash fields and the intended header for RSS configuration + */ +static bool ice_vc_parse_rss_cfg(struct ice_hw *hw, + struct virtchnl_rss_cfg *rss_cfg, + struct ice_rss_hash_cfg *hash_cfg) +{ + const struct ice_vc_hash_field_match_type *hf_list; + const struct ice_vc_hdr_match_type *hdr_list; + int i, hf_list_len, hdr_list_len; + u32 *addl_hdrs = &hash_cfg->addl_hdrs; + u64 *hash_flds = &hash_cfg->hash_flds; + + /* set outer layer RSS as default */ + hash_cfg->hdr_type = ICE_RSS_OUTER_HEADERS; + + if (rss_cfg->rss_algorithm == VIRTCHNL_RSS_ALG_TOEPLITZ_SYMMETRIC) + hash_cfg->symm = true; + else + hash_cfg->symm = false; + + hf_list = ice_vc_hash_field_list; + hf_list_len = ARRAY_SIZE(ice_vc_hash_field_list); + hdr_list = ice_vc_hdr_list; + hdr_list_len = ARRAY_SIZE(ice_vc_hdr_list); + + for (i = 0; i < rss_cfg->proto_hdrs.count; i++) { + struct virtchnl_proto_hdr *proto_hdr = + &rss_cfg->proto_hdrs.proto_hdr[i]; + bool hdr_found = false; + int j; + + /* Find matched ice headers according to virtchnl headers. */ + for (j = 0; j < hdr_list_len; j++) { + struct ice_vc_hdr_match_type hdr_map = hdr_list[j]; + + if (proto_hdr->type == hdr_map.vc_hdr) { + *addl_hdrs |= hdr_map.ice_hdr; + hdr_found = true; + } + } + + if (!hdr_found) + return false; + + /* Find matched ice hash fields according to + * virtchnl hash fields. + */ + for (j = 0; j < hf_list_len; j++) { + struct ice_vc_hash_field_match_type hf_map = hf_list[j]; + + if (proto_hdr->type == hf_map.vc_hdr && + proto_hdr->field_selector == hf_map.vc_hash_field) { + *hash_flds |= hf_map.ice_hash_field; + break; + } + } + } + + return true; +} + +/** + * ice_vf_adv_rss_offload_ena - determine if capabilities support advanced + * RSS offloads + * @caps: VF driver negotiated capabilities + * + * Return true if VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF capability is set, + * else return false + */ +static bool ice_vf_adv_rss_offload_ena(u32 caps) +{ + return !!(caps & VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF); +} + +/** + * ice_vc_handle_rss_cfg + * @vf: pointer to the VF info + * @msg: pointer to the message buffer + * @add: add a RSS config if true, otherwise delete a RSS config + * + * This function adds/deletes a RSS config + */ +int ice_vc_handle_rss_cfg(struct ice_vf *vf, u8 *msg, bool add) +{ + u32 v_opcode = add ? VIRTCHNL_OP_ADD_RSS_CFG : VIRTCHNL_OP_DEL_RSS_CFG; + struct virtchnl_rss_cfg *rss_cfg = (struct virtchnl_rss_cfg *)msg; + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + struct device *dev = ice_pf_to_dev(vf->pf); + struct ice_hw *hw = &vf->pf->hw; + struct ice_vsi *vsi; + + if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) { + dev_dbg(dev, "VF %d attempting to configure RSS, but RSS is not supported by the PF\n", + vf->vf_id); + v_ret = VIRTCHNL_STATUS_ERR_NOT_SUPPORTED; + goto error_param; + } + + if (!ice_vf_adv_rss_offload_ena(vf->driver_caps)) { + dev_dbg(dev, "VF %d attempting to configure RSS, but Advanced RSS offload is not supported\n", + vf->vf_id); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (rss_cfg->proto_hdrs.count > VIRTCHNL_MAX_NUM_PROTO_HDRS || + rss_cfg->rss_algorithm < VIRTCHNL_RSS_ALG_TOEPLITZ_ASYMMETRIC || + rss_cfg->rss_algorithm > VIRTCHNL_RSS_ALG_XOR_SYMMETRIC) { + dev_dbg(dev, "VF %d attempting to configure RSS, but RSS configuration is not valid\n", + vf->vf_id); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + vsi = ice_get_vf_vsi(vf); + if (!vsi) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (!ice_vc_validate_pattern(vf, &rss_cfg->proto_hdrs)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (rss_cfg->rss_algorithm == VIRTCHNL_RSS_ALG_R_ASYMMETRIC) { + struct ice_vsi_ctx *ctx; + u8 lut_type, hash_type; + int status; + + lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI; + hash_type = add ? ICE_AQ_VSI_Q_OPT_RSS_HASH_XOR : + ICE_AQ_VSI_Q_OPT_RSS_HASH_TPLZ; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) { + v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY; + goto error_param; + } + + ctx->info.q_opt_rss = + FIELD_PREP(ICE_AQ_VSI_Q_OPT_RSS_LUT_M, lut_type) | + FIELD_PREP(ICE_AQ_VSI_Q_OPT_RSS_HASH_M, hash_type); + + /* Preserve existing queueing option setting */ + ctx->info.q_opt_rss |= (vsi->info.q_opt_rss & + ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_M); + ctx->info.q_opt_tc = vsi->info.q_opt_tc; + ctx->info.q_opt_flags = vsi->info.q_opt_rss; + + ctx->info.valid_sections = + cpu_to_le16(ICE_AQ_VSI_PROP_Q_OPT_VALID); + + status = ice_update_vsi(hw, vsi->idx, ctx, NULL); + if (status) { + dev_err(dev, "update VSI for RSS failed, err %d aq_err %s\n", + status, libie_aq_str(hw->adminq.sq_last_status)); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + } else { + vsi->info.q_opt_rss = ctx->info.q_opt_rss; + } + + kfree(ctx); + } else { + struct ice_rss_hash_cfg cfg; + + /* Only check for none raw pattern case */ + if (!ice_vc_validate_pattern(vf, &rss_cfg->proto_hdrs)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + cfg.addl_hdrs = ICE_FLOW_SEG_HDR_NONE; + cfg.hash_flds = ICE_HASH_INVALID; + cfg.hdr_type = ICE_RSS_ANY_HEADERS; + + if (!ice_vc_parse_rss_cfg(hw, rss_cfg, &cfg)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (add) { + if (ice_add_rss_cfg(hw, vsi, &cfg)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + dev_err(dev, "ice_add_rss_cfg failed for vsi = %d, v_ret = %d\n", + vsi->vsi_num, v_ret); + } + } else { + int status; + + status = ice_rem_rss_cfg(hw, vsi->idx, &cfg); + /* We just ignore -ENOENT, because if two configurations + * share the same profile remove one of them actually + * removes both, since the profile is deleted. + */ + if (status && status != -ENOENT) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + dev_err(dev, "ice_rem_rss_cfg failed for VF ID:%d, error:%d\n", + vf->vf_id, status); + } + } + } + +error_param: + return ice_vc_send_msg_to_vf(vf, v_opcode, v_ret, NULL, 0); +} + +/** + * ice_vc_config_rss_key + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer + * + * Configure the VF's RSS key + */ +int ice_vc_config_rss_key(struct ice_vf *vf, u8 *msg) +{ + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + struct virtchnl_rss_key *vrk = + (struct virtchnl_rss_key *)msg; + struct ice_vsi *vsi; + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (!ice_vc_isvalid_vsi_id(vf, vrk->vsi_id)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (vrk->key_len != ICE_VSIQF_HKEY_ARRAY_SIZE) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + vsi = ice_get_vf_vsi(vf); + if (!vsi) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (ice_set_rss_key(vsi, vrk->key)) + v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR; +error_param: + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_RSS_KEY, v_ret, + NULL, 0); +} + +/** + * ice_vc_config_rss_lut + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer + * + * Configure the VF's RSS LUT + */ +int ice_vc_config_rss_lut(struct ice_vf *vf, u8 *msg) +{ + struct virtchnl_rss_lut *vrl = (struct virtchnl_rss_lut *)msg; + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + struct ice_vsi *vsi; + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (!ice_vc_isvalid_vsi_id(vf, vrl->vsi_id)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (vrl->lut_entries != ICE_LUT_VSI_SIZE) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + vsi = ice_get_vf_vsi(vf); + if (!vsi) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (ice_set_rss_lut(vsi, vrl->lut, ICE_LUT_VSI_SIZE)) + v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR; +error_param: + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_RSS_LUT, v_ret, + NULL, 0); +} + +/** + * ice_vc_config_rss_hfunc + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer + * + * Configure the VF's RSS Hash function + */ +int ice_vc_config_rss_hfunc(struct ice_vf *vf, u8 *msg) +{ + struct virtchnl_rss_hfunc *vrh = (struct virtchnl_rss_hfunc *)msg; + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + u8 hfunc = ICE_AQ_VSI_Q_OPT_RSS_HASH_TPLZ; + struct ice_vsi *vsi; + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (!ice_vc_isvalid_vsi_id(vf, vrh->vsi_id)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + vsi = ice_get_vf_vsi(vf); + if (!vsi) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (vrh->rss_algorithm == VIRTCHNL_RSS_ALG_TOEPLITZ_SYMMETRIC) + hfunc = ICE_AQ_VSI_Q_OPT_RSS_HASH_SYM_TPLZ; + + if (ice_set_rss_hfunc(vsi, hfunc)) + v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR; +error_param: + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_RSS_HFUNC, v_ret, + NULL, 0); +} + +/** + * ice_vc_get_rss_hashcfg - return the RSS Hash configuration + * @vf: pointer to the VF info + */ +int ice_vc_get_rss_hashcfg(struct ice_vf *vf) +{ + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + struct virtchnl_rss_hashcfg *vrh = NULL; + int len = 0, ret; + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) { + dev_err(ice_pf_to_dev(vf->pf), "RSS not supported by PF\n"); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + len = sizeof(struct virtchnl_rss_hashcfg); + vrh = kzalloc(len, GFP_KERNEL); + if (!vrh) { + v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY; + len = 0; + goto err; + } + + vrh->hashcfg = ICE_DEFAULT_RSS_HASHCFG; +err: + /* send the response back to the VF */ + ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_GET_RSS_HASHCFG_CAPS, v_ret, + (u8 *)vrh, len); + kfree(vrh); + return ret; +} + +/** + * ice_vc_set_rss_hashcfg - set RSS Hash configuration bits for the VF + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer + */ +int ice_vc_set_rss_hashcfg(struct ice_vf *vf, u8 *msg) +{ + struct virtchnl_rss_hashcfg *vrh = (struct virtchnl_rss_hashcfg *)msg; + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + struct ice_pf *pf = vf->pf; + struct ice_vsi *vsi; + struct device *dev; + int status; + + dev = ice_pf_to_dev(pf); + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) { + dev_err(dev, "RSS not supported by PF\n"); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + vsi = ice_get_vf_vsi(vf); + if (!vsi) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + /* clear all previously programmed RSS configuration to allow VF drivers + * the ability to customize the RSS configuration and/or completely + * disable RSS + */ + status = ice_rem_vsi_rss_cfg(&pf->hw, vsi->idx); + if (status && !vrh->hashcfg) { + /* only report failure to clear the current RSS configuration if + * that was clearly the VF's intention (i.e. vrh->hashcfg = 0) + */ + v_ret = ice_err_to_virt_err(status); + goto err; + } else if (status) { + /* allow the VF to update the RSS configuration even on failure + * to clear the current RSS confguration in an attempt to keep + * RSS in a working state + */ + dev_warn(dev, "Failed to clear the RSS configuration for VF %u\n", + vf->vf_id); + } + + if (vrh->hashcfg) { + status = ice_add_avf_rss_cfg(&pf->hw, vsi, vrh->hashcfg); + v_ret = ice_err_to_virt_err(status); + } + + /* save the requested VF configuration */ + if (!v_ret) + vf->rss_hashcfg = vrh->hashcfg; + + /* send the response to the VF */ +err: + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_SET_RSS_HASHCFG, v_ret, + NULL, 0); +} + diff --git a/drivers/net/ethernet/intel/ice/virt/rss.h b/drivers/net/ethernet/intel/ice/virt/rss.h new file mode 100644 index 000000000000..784d4c43ce8b --- /dev/null +++ b/drivers/net/ethernet/intel/ice/virt/rss.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2022, Intel Corporation. */ + +#ifndef _ICE_VIRT_RSS_H_ +#define _ICE_VIRT_RSS_H_ + +#include <linux/types.h> + +struct ice_vf; + +int ice_vc_handle_rss_cfg(struct ice_vf *vf, u8 *msg, bool add); +int ice_vc_config_rss_key(struct ice_vf *vf, u8 *msg); +int ice_vc_config_rss_lut(struct ice_vf *vf, u8 *msg); +int ice_vc_config_rss_hfunc(struct ice_vf *vf, u8 *msg); +int ice_vc_get_rss_hashcfg(struct ice_vf *vf); +int ice_vc_set_rss_hashcfg(struct ice_vf *vf, u8 *msg); + +#endif /* _ICE_VIRT_RSS_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/virt/virtchnl.c index 257967273079..f3f921134379 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/virt/virtchnl.c @@ -1,170 +1,20 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (C) 2022, Intel Corporation. */ -#include "ice_virtchnl.h" +#include "virtchnl.h" +#include "queues.h" +#include "rss.h" #include "ice_vf_lib_private.h" #include "ice.h" #include "ice_base.h" #include "ice_lib.h" #include "ice_fltr.h" -#include "ice_virtchnl_allowlist.h" +#include "allowlist.h" #include "ice_vf_vsi_vlan_ops.h" #include "ice_vlan.h" #include "ice_flex_pipe.h" #include "ice_dcb_lib.h" -#define FIELD_SELECTOR(proto_hdr_field) \ - BIT((proto_hdr_field) & PROTO_HDR_FIELD_MASK) - -struct ice_vc_hdr_match_type { - u32 vc_hdr; /* virtchnl headers (VIRTCHNL_PROTO_HDR_XXX) */ - u32 ice_hdr; /* ice headers (ICE_FLOW_SEG_HDR_XXX) */ -}; - -static const struct ice_vc_hdr_match_type ice_vc_hdr_list[] = { - {VIRTCHNL_PROTO_HDR_NONE, ICE_FLOW_SEG_HDR_NONE}, - {VIRTCHNL_PROTO_HDR_ETH, ICE_FLOW_SEG_HDR_ETH}, - {VIRTCHNL_PROTO_HDR_S_VLAN, ICE_FLOW_SEG_HDR_VLAN}, - {VIRTCHNL_PROTO_HDR_C_VLAN, ICE_FLOW_SEG_HDR_VLAN}, - {VIRTCHNL_PROTO_HDR_IPV4, ICE_FLOW_SEG_HDR_IPV4 | - ICE_FLOW_SEG_HDR_IPV_OTHER}, - {VIRTCHNL_PROTO_HDR_IPV6, ICE_FLOW_SEG_HDR_IPV6 | - ICE_FLOW_SEG_HDR_IPV_OTHER}, - {VIRTCHNL_PROTO_HDR_TCP, ICE_FLOW_SEG_HDR_TCP}, - {VIRTCHNL_PROTO_HDR_UDP, ICE_FLOW_SEG_HDR_UDP}, - {VIRTCHNL_PROTO_HDR_SCTP, ICE_FLOW_SEG_HDR_SCTP}, - {VIRTCHNL_PROTO_HDR_PPPOE, ICE_FLOW_SEG_HDR_PPPOE}, - {VIRTCHNL_PROTO_HDR_GTPU_IP, ICE_FLOW_SEG_HDR_GTPU_IP}, - {VIRTCHNL_PROTO_HDR_GTPU_EH, ICE_FLOW_SEG_HDR_GTPU_EH}, - {VIRTCHNL_PROTO_HDR_GTPU_EH_PDU_DWN, - ICE_FLOW_SEG_HDR_GTPU_DWN}, - {VIRTCHNL_PROTO_HDR_GTPU_EH_PDU_UP, - ICE_FLOW_SEG_HDR_GTPU_UP}, - {VIRTCHNL_PROTO_HDR_L2TPV3, ICE_FLOW_SEG_HDR_L2TPV3}, - {VIRTCHNL_PROTO_HDR_ESP, ICE_FLOW_SEG_HDR_ESP}, - {VIRTCHNL_PROTO_HDR_AH, ICE_FLOW_SEG_HDR_AH}, - {VIRTCHNL_PROTO_HDR_PFCP, ICE_FLOW_SEG_HDR_PFCP_SESSION}, -}; - -struct ice_vc_hash_field_match_type { - u32 vc_hdr; /* virtchnl headers - * (VIRTCHNL_PROTO_HDR_XXX) - */ - u32 vc_hash_field; /* virtchnl hash fields selector - * FIELD_SELECTOR((VIRTCHNL_PROTO_HDR_ETH_XXX)) - */ - u64 ice_hash_field; /* ice hash fields - * (BIT_ULL(ICE_FLOW_FIELD_IDX_XXX)) - */ -}; - -static const struct -ice_vc_hash_field_match_type ice_vc_hash_field_list[] = { - {VIRTCHNL_PROTO_HDR_ETH, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_SRC), - BIT_ULL(ICE_FLOW_FIELD_IDX_ETH_SA)}, - {VIRTCHNL_PROTO_HDR_ETH, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_DST), - BIT_ULL(ICE_FLOW_FIELD_IDX_ETH_DA)}, - {VIRTCHNL_PROTO_HDR_ETH, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_SRC) | - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_DST), - ICE_FLOW_HASH_ETH}, - {VIRTCHNL_PROTO_HDR_ETH, - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_ETHERTYPE), - BIT_ULL(ICE_FLOW_FIELD_IDX_ETH_TYPE)}, - {VIRTCHNL_PROTO_HDR_S_VLAN, - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_S_VLAN_ID), - BIT_ULL(ICE_FLOW_FIELD_IDX_S_VLAN)}, - {VIRTCHNL_PROTO_HDR_C_VLAN, - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_C_VLAN_ID), - BIT_ULL(ICE_FLOW_FIELD_IDX_C_VLAN)}, - {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC), - BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA)}, - {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST), - BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA)}, - {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) | - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST), - ICE_FLOW_HASH_IPV4}, - {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) | - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT), - BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA) | - BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)}, - {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST) | - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT), - BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA) | - BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)}, - {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) | - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST) | - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT), - ICE_FLOW_HASH_IPV4 | BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)}, - {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT), - BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)}, - {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC), - BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_SA)}, - {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST), - BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_DA)}, - {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC) | - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST), - ICE_FLOW_HASH_IPV6}, - {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC) | - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT), - BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_SA) | - BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)}, - {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST) | - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT), - BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_DA) | - BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)}, - {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC) | - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST) | - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT), - ICE_FLOW_HASH_IPV6 | BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)}, - {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT), - BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)}, - {VIRTCHNL_PROTO_HDR_TCP, - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_SRC_PORT), - BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_SRC_PORT)}, - {VIRTCHNL_PROTO_HDR_TCP, - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_DST_PORT), - BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_DST_PORT)}, - {VIRTCHNL_PROTO_HDR_TCP, - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_SRC_PORT) | - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_DST_PORT), - ICE_FLOW_HASH_TCP_PORT}, - {VIRTCHNL_PROTO_HDR_UDP, - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_SRC_PORT), - BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_SRC_PORT)}, - {VIRTCHNL_PROTO_HDR_UDP, - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_DST_PORT), - BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_DST_PORT)}, - {VIRTCHNL_PROTO_HDR_UDP, - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_SRC_PORT) | - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_DST_PORT), - ICE_FLOW_HASH_UDP_PORT}, - {VIRTCHNL_PROTO_HDR_SCTP, - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT), - BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT)}, - {VIRTCHNL_PROTO_HDR_SCTP, - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_DST_PORT), - BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_DST_PORT)}, - {VIRTCHNL_PROTO_HDR_SCTP, - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT) | - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_DST_PORT), - ICE_FLOW_HASH_SCTP_PORT}, - {VIRTCHNL_PROTO_HDR_PPPOE, - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_PPPOE_SESS_ID), - BIT_ULL(ICE_FLOW_FIELD_IDX_PPPOE_SESS_ID)}, - {VIRTCHNL_PROTO_HDR_GTPU_IP, - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_GTPU_IP_TEID), - BIT_ULL(ICE_FLOW_FIELD_IDX_GTPU_IP_TEID)}, - {VIRTCHNL_PROTO_HDR_L2TPV3, - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_L2TPV3_SESS_ID), - BIT_ULL(ICE_FLOW_FIELD_IDX_L2TPV3_SESS_ID)}, - {VIRTCHNL_PROTO_HDR_ESP, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ESP_SPI), - BIT_ULL(ICE_FLOW_FIELD_IDX_ESP_SPI)}, - {VIRTCHNL_PROTO_HDR_AH, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_AH_SPI), - BIT_ULL(ICE_FLOW_FIELD_IDX_AH_SPI)}, - {VIRTCHNL_PROTO_HDR_PFCP, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_PFCP_SEID), - BIT_ULL(ICE_FLOW_FIELD_IDX_PFCP_SEID)}, -}; - /** * ice_vc_vf_broadcast - Broadcast a message to all VFs on PF * @pf: pointer to the PF structure @@ -338,28 +188,6 @@ static int ice_vc_get_ver_msg(struct ice_vf *vf, u8 *msg) } /** - * ice_vc_get_max_frame_size - get max frame size allowed for VF - * @vf: VF used to determine max frame size - * - * Max frame size is determined based on the current port's max frame size and - * whether a port VLAN is configured on this VF. The VF is not aware whether - * it's in a port VLAN so the PF needs to account for this in max frame size - * checks and sending the max frame size to the VF. - */ -static u16 ice_vc_get_max_frame_size(struct ice_vf *vf) -{ - struct ice_port_info *pi = ice_vf_get_port_info(vf); - u16 max_frame_size; - - max_frame_size = pi->phy.link_info.max_frame_size; - - if (ice_vf_is_port_vlan_ena(vf)) - max_frame_size -= VLAN_HLEN; - - return max_frame_size; -} - -/** * ice_vc_get_vlan_caps * @hw: pointer to the hw * @vf: pointer to the VF info @@ -559,488 +387,6 @@ bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id) } /** - * ice_vc_isvalid_q_id - * @vsi: VSI to check queue ID against - * @qid: VSI relative queue ID - * - * check for the valid queue ID - */ -static bool ice_vc_isvalid_q_id(struct ice_vsi *vsi, u16 qid) -{ - /* allocated Tx and Rx queues should be always equal for VF VSI */ - return qid < vsi->alloc_txq; -} - -/** - * ice_vc_isvalid_ring_len - * @ring_len: length of ring - * - * check for the valid ring count, should be multiple of ICE_REQ_DESC_MULTIPLE - * or zero - */ -static bool ice_vc_isvalid_ring_len(u16 ring_len) -{ - return ring_len == 0 || - (ring_len >= ICE_MIN_NUM_DESC && - ring_len <= ICE_MAX_NUM_DESC && - !(ring_len % ICE_REQ_DESC_MULTIPLE)); -} - -/** - * ice_vc_validate_pattern - * @vf: pointer to the VF info - * @proto: virtchnl protocol headers - * - * validate the pattern is supported or not. - * - * Return: true on success, false on error. - */ -bool -ice_vc_validate_pattern(struct ice_vf *vf, struct virtchnl_proto_hdrs *proto) -{ - bool is_ipv4 = false; - bool is_ipv6 = false; - bool is_udp = false; - u16 ptype = -1; - int i = 0; - - while (i < proto->count && - proto->proto_hdr[i].type != VIRTCHNL_PROTO_HDR_NONE) { - switch (proto->proto_hdr[i].type) { - case VIRTCHNL_PROTO_HDR_ETH: - ptype = ICE_PTYPE_MAC_PAY; - break; - case VIRTCHNL_PROTO_HDR_IPV4: - ptype = ICE_PTYPE_IPV4_PAY; - is_ipv4 = true; - break; - case VIRTCHNL_PROTO_HDR_IPV6: - ptype = ICE_PTYPE_IPV6_PAY; - is_ipv6 = true; - break; - case VIRTCHNL_PROTO_HDR_UDP: - if (is_ipv4) - ptype = ICE_PTYPE_IPV4_UDP_PAY; - else if (is_ipv6) - ptype = ICE_PTYPE_IPV6_UDP_PAY; - is_udp = true; - break; - case VIRTCHNL_PROTO_HDR_TCP: - if (is_ipv4) - ptype = ICE_PTYPE_IPV4_TCP_PAY; - else if (is_ipv6) - ptype = ICE_PTYPE_IPV6_TCP_PAY; - break; - case VIRTCHNL_PROTO_HDR_SCTP: - if (is_ipv4) - ptype = ICE_PTYPE_IPV4_SCTP_PAY; - else if (is_ipv6) - ptype = ICE_PTYPE_IPV6_SCTP_PAY; - break; - case VIRTCHNL_PROTO_HDR_GTPU_IP: - case VIRTCHNL_PROTO_HDR_GTPU_EH: - if (is_ipv4) - ptype = ICE_MAC_IPV4_GTPU; - else if (is_ipv6) - ptype = ICE_MAC_IPV6_GTPU; - goto out; - case VIRTCHNL_PROTO_HDR_L2TPV3: - if (is_ipv4) - ptype = ICE_MAC_IPV4_L2TPV3; - else if (is_ipv6) - ptype = ICE_MAC_IPV6_L2TPV3; - goto out; - case VIRTCHNL_PROTO_HDR_ESP: - if (is_ipv4) - ptype = is_udp ? ICE_MAC_IPV4_NAT_T_ESP : - ICE_MAC_IPV4_ESP; - else if (is_ipv6) - ptype = is_udp ? ICE_MAC_IPV6_NAT_T_ESP : - ICE_MAC_IPV6_ESP; - goto out; - case VIRTCHNL_PROTO_HDR_AH: - if (is_ipv4) - ptype = ICE_MAC_IPV4_AH; - else if (is_ipv6) - ptype = ICE_MAC_IPV6_AH; - goto out; - case VIRTCHNL_PROTO_HDR_PFCP: - if (is_ipv4) - ptype = ICE_MAC_IPV4_PFCP_SESSION; - else if (is_ipv6) - ptype = ICE_MAC_IPV6_PFCP_SESSION; - goto out; - default: - break; - } - i++; - } - -out: - return ice_hw_ptype_ena(&vf->pf->hw, ptype); -} - -/** - * ice_vc_parse_rss_cfg - parses hash fields and headers from - * a specific virtchnl RSS cfg - * @hw: pointer to the hardware - * @rss_cfg: pointer to the virtchnl RSS cfg - * @hash_cfg: pointer to the HW hash configuration - * - * Return true if all the protocol header and hash fields in the RSS cfg could - * be parsed, else return false - * - * This function parses the virtchnl RSS cfg to be the intended - * hash fields and the intended header for RSS configuration - */ -static bool ice_vc_parse_rss_cfg(struct ice_hw *hw, - struct virtchnl_rss_cfg *rss_cfg, - struct ice_rss_hash_cfg *hash_cfg) -{ - const struct ice_vc_hash_field_match_type *hf_list; - const struct ice_vc_hdr_match_type *hdr_list; - int i, hf_list_len, hdr_list_len; - u32 *addl_hdrs = &hash_cfg->addl_hdrs; - u64 *hash_flds = &hash_cfg->hash_flds; - - /* set outer layer RSS as default */ - hash_cfg->hdr_type = ICE_RSS_OUTER_HEADERS; - - if (rss_cfg->rss_algorithm == VIRTCHNL_RSS_ALG_TOEPLITZ_SYMMETRIC) - hash_cfg->symm = true; - else - hash_cfg->symm = false; - - hf_list = ice_vc_hash_field_list; - hf_list_len = ARRAY_SIZE(ice_vc_hash_field_list); - hdr_list = ice_vc_hdr_list; - hdr_list_len = ARRAY_SIZE(ice_vc_hdr_list); - - for (i = 0; i < rss_cfg->proto_hdrs.count; i++) { - struct virtchnl_proto_hdr *proto_hdr = - &rss_cfg->proto_hdrs.proto_hdr[i]; - bool hdr_found = false; - int j; - - /* Find matched ice headers according to virtchnl headers. */ - for (j = 0; j < hdr_list_len; j++) { - struct ice_vc_hdr_match_type hdr_map = hdr_list[j]; - - if (proto_hdr->type == hdr_map.vc_hdr) { - *addl_hdrs |= hdr_map.ice_hdr; - hdr_found = true; - } - } - - if (!hdr_found) - return false; - - /* Find matched ice hash fields according to - * virtchnl hash fields. - */ - for (j = 0; j < hf_list_len; j++) { - struct ice_vc_hash_field_match_type hf_map = hf_list[j]; - - if (proto_hdr->type == hf_map.vc_hdr && - proto_hdr->field_selector == hf_map.vc_hash_field) { - *hash_flds |= hf_map.ice_hash_field; - break; - } - } - } - - return true; -} - -/** - * ice_vf_adv_rss_offload_ena - determine if capabilities support advanced - * RSS offloads - * @caps: VF driver negotiated capabilities - * - * Return true if VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF capability is set, - * else return false - */ -static bool ice_vf_adv_rss_offload_ena(u32 caps) -{ - return !!(caps & VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF); -} - -/** - * ice_vc_handle_rss_cfg - * @vf: pointer to the VF info - * @msg: pointer to the message buffer - * @add: add a RSS config if true, otherwise delete a RSS config - * - * This function adds/deletes a RSS config - */ -static int ice_vc_handle_rss_cfg(struct ice_vf *vf, u8 *msg, bool add) -{ - u32 v_opcode = add ? VIRTCHNL_OP_ADD_RSS_CFG : VIRTCHNL_OP_DEL_RSS_CFG; - struct virtchnl_rss_cfg *rss_cfg = (struct virtchnl_rss_cfg *)msg; - enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; - struct device *dev = ice_pf_to_dev(vf->pf); - struct ice_hw *hw = &vf->pf->hw; - struct ice_vsi *vsi; - - if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) { - dev_dbg(dev, "VF %d attempting to configure RSS, but RSS is not supported by the PF\n", - vf->vf_id); - v_ret = VIRTCHNL_STATUS_ERR_NOT_SUPPORTED; - goto error_param; - } - - if (!ice_vf_adv_rss_offload_ena(vf->driver_caps)) { - dev_dbg(dev, "VF %d attempting to configure RSS, but Advanced RSS offload is not supported\n", - vf->vf_id); - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - if (rss_cfg->proto_hdrs.count > VIRTCHNL_MAX_NUM_PROTO_HDRS || - rss_cfg->rss_algorithm < VIRTCHNL_RSS_ALG_TOEPLITZ_ASYMMETRIC || - rss_cfg->rss_algorithm > VIRTCHNL_RSS_ALG_XOR_SYMMETRIC) { - dev_dbg(dev, "VF %d attempting to configure RSS, but RSS configuration is not valid\n", - vf->vf_id); - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - vsi = ice_get_vf_vsi(vf); - if (!vsi) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - if (!ice_vc_validate_pattern(vf, &rss_cfg->proto_hdrs)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - if (rss_cfg->rss_algorithm == VIRTCHNL_RSS_ALG_R_ASYMMETRIC) { - struct ice_vsi_ctx *ctx; - u8 lut_type, hash_type; - int status; - - lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI; - hash_type = add ? ICE_AQ_VSI_Q_OPT_RSS_HASH_XOR : - ICE_AQ_VSI_Q_OPT_RSS_HASH_TPLZ; - - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); - if (!ctx) { - v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY; - goto error_param; - } - - ctx->info.q_opt_rss = - FIELD_PREP(ICE_AQ_VSI_Q_OPT_RSS_LUT_M, lut_type) | - FIELD_PREP(ICE_AQ_VSI_Q_OPT_RSS_HASH_M, hash_type); - - /* Preserve existing queueing option setting */ - ctx->info.q_opt_rss |= (vsi->info.q_opt_rss & - ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_M); - ctx->info.q_opt_tc = vsi->info.q_opt_tc; - ctx->info.q_opt_flags = vsi->info.q_opt_rss; - - ctx->info.valid_sections = - cpu_to_le16(ICE_AQ_VSI_PROP_Q_OPT_VALID); - - status = ice_update_vsi(hw, vsi->idx, ctx, NULL); - if (status) { - dev_err(dev, "update VSI for RSS failed, err %d aq_err %s\n", - status, libie_aq_str(hw->adminq.sq_last_status)); - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - } else { - vsi->info.q_opt_rss = ctx->info.q_opt_rss; - } - - kfree(ctx); - } else { - struct ice_rss_hash_cfg cfg; - - /* Only check for none raw pattern case */ - if (!ice_vc_validate_pattern(vf, &rss_cfg->proto_hdrs)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - cfg.addl_hdrs = ICE_FLOW_SEG_HDR_NONE; - cfg.hash_flds = ICE_HASH_INVALID; - cfg.hdr_type = ICE_RSS_ANY_HEADERS; - - if (!ice_vc_parse_rss_cfg(hw, rss_cfg, &cfg)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - if (add) { - if (ice_add_rss_cfg(hw, vsi, &cfg)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - dev_err(dev, "ice_add_rss_cfg failed for vsi = %d, v_ret = %d\n", - vsi->vsi_num, v_ret); - } - } else { - int status; - - status = ice_rem_rss_cfg(hw, vsi->idx, &cfg); - /* We just ignore -ENOENT, because if two configurations - * share the same profile remove one of them actually - * removes both, since the profile is deleted. - */ - if (status && status != -ENOENT) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - dev_err(dev, "ice_rem_rss_cfg failed for VF ID:%d, error:%d\n", - vf->vf_id, status); - } - } - } - -error_param: - return ice_vc_send_msg_to_vf(vf, v_opcode, v_ret, NULL, 0); -} - -/** - * ice_vc_config_rss_key - * @vf: pointer to the VF info - * @msg: pointer to the msg buffer - * - * Configure the VF's RSS key - */ -static int ice_vc_config_rss_key(struct ice_vf *vf, u8 *msg) -{ - enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; - struct virtchnl_rss_key *vrk = - (struct virtchnl_rss_key *)msg; - struct ice_vsi *vsi; - - if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - if (!ice_vc_isvalid_vsi_id(vf, vrk->vsi_id)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - if (vrk->key_len != ICE_VSIQF_HKEY_ARRAY_SIZE) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - vsi = ice_get_vf_vsi(vf); - if (!vsi) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - if (ice_set_rss_key(vsi, vrk->key)) - v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR; -error_param: - return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_RSS_KEY, v_ret, - NULL, 0); -} - -/** - * ice_vc_config_rss_lut - * @vf: pointer to the VF info - * @msg: pointer to the msg buffer - * - * Configure the VF's RSS LUT - */ -static int ice_vc_config_rss_lut(struct ice_vf *vf, u8 *msg) -{ - struct virtchnl_rss_lut *vrl = (struct virtchnl_rss_lut *)msg; - enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; - struct ice_vsi *vsi; - - if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - if (!ice_vc_isvalid_vsi_id(vf, vrl->vsi_id)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - if (vrl->lut_entries != ICE_LUT_VSI_SIZE) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - vsi = ice_get_vf_vsi(vf); - if (!vsi) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - if (ice_set_rss_lut(vsi, vrl->lut, ICE_LUT_VSI_SIZE)) - v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR; -error_param: - return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_RSS_LUT, v_ret, - NULL, 0); -} - -/** - * ice_vc_config_rss_hfunc - * @vf: pointer to the VF info - * @msg: pointer to the msg buffer - * - * Configure the VF's RSS Hash function - */ -static int ice_vc_config_rss_hfunc(struct ice_vf *vf, u8 *msg) -{ - struct virtchnl_rss_hfunc *vrh = (struct virtchnl_rss_hfunc *)msg; - enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; - u8 hfunc = ICE_AQ_VSI_Q_OPT_RSS_HASH_TPLZ; - struct ice_vsi *vsi; - - if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - if (!ice_vc_isvalid_vsi_id(vf, vrh->vsi_id)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - vsi = ice_get_vf_vsi(vf); - if (!vsi) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - if (vrh->rss_algorithm == VIRTCHNL_RSS_ALG_TOEPLITZ_SYMMETRIC) - hfunc = ICE_AQ_VSI_Q_OPT_RSS_HASH_SYM_TPLZ; - - if (ice_set_rss_hfunc(vsi, hfunc)) - v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR; -error_param: - return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_RSS_HFUNC, v_ret, - NULL, 0); -} - -/** * ice_vc_get_qos_caps - Get current QoS caps from PF * @vf: pointer to the VF info * @@ -1122,110 +468,6 @@ err: } /** - * ice_vf_cfg_qs_bw - Configure per queue bandwidth - * @vf: pointer to the VF info - * @num_queues: number of queues to be configured - * - * Configure per queue bandwidth. - * - * Return: 0 on success or negative error value. - */ -static int ice_vf_cfg_qs_bw(struct ice_vf *vf, u16 num_queues) -{ - struct ice_hw *hw = &vf->pf->hw; - struct ice_vsi *vsi; - int ret; - u16 i; - - vsi = ice_get_vf_vsi(vf); - if (!vsi) - return -EINVAL; - - for (i = 0; i < num_queues; i++) { - u32 p_rate, min_rate; - u8 tc; - - p_rate = vf->qs_bw[i].peak; - min_rate = vf->qs_bw[i].committed; - tc = vf->qs_bw[i].tc; - if (p_rate) - ret = ice_cfg_q_bw_lmt(hw->port_info, vsi->idx, tc, - vf->qs_bw[i].queue_id, - ICE_MAX_BW, p_rate); - else - ret = ice_cfg_q_bw_dflt_lmt(hw->port_info, vsi->idx, tc, - vf->qs_bw[i].queue_id, - ICE_MAX_BW); - if (ret) - return ret; - - if (min_rate) - ret = ice_cfg_q_bw_lmt(hw->port_info, vsi->idx, tc, - vf->qs_bw[i].queue_id, - ICE_MIN_BW, min_rate); - else - ret = ice_cfg_q_bw_dflt_lmt(hw->port_info, vsi->idx, tc, - vf->qs_bw[i].queue_id, - ICE_MIN_BW); - - if (ret) - return ret; - } - - return 0; -} - -/** - * ice_vf_cfg_q_quanta_profile - Configure quanta profile - * @vf: pointer to the VF info - * @quanta_prof_idx: pointer to the quanta profile index - * @quanta_size: quanta size to be set - * - * This function chooses available quanta profile and configures the register. - * The quanta profile is evenly divided by the number of device ports, and then - * available to the specific PF and VFs. The first profile for each PF is a - * reserved default profile. Only quanta size of the rest unused profile can be - * modified. - * - * Return: 0 on success or negative error value. - */ -static int ice_vf_cfg_q_quanta_profile(struct ice_vf *vf, u16 quanta_size, - u16 *quanta_prof_idx) -{ - const u16 n_desc = calc_quanta_desc(quanta_size); - struct ice_hw *hw = &vf->pf->hw; - const u16 n_cmd = 2 * n_desc; - struct ice_pf *pf = vf->pf; - u16 per_pf, begin_id; - u8 n_used; - u32 reg; - - begin_id = (GLCOMM_QUANTA_PROF_MAX_INDEX + 1) / hw->dev_caps.num_funcs * - hw->logical_pf_id; - - if (quanta_size == ICE_DFLT_QUANTA) { - *quanta_prof_idx = begin_id; - } else { - per_pf = (GLCOMM_QUANTA_PROF_MAX_INDEX + 1) / - hw->dev_caps.num_funcs; - n_used = pf->num_quanta_prof_used; - if (n_used < per_pf) { - *quanta_prof_idx = begin_id + 1 + n_used; - pf->num_quanta_prof_used++; - } else { - return -EINVAL; - } - } - - reg = FIELD_PREP(GLCOMM_QUANTA_PROF_QUANTA_SIZE_M, quanta_size) | - FIELD_PREP(GLCOMM_QUANTA_PROF_MAX_CMD_M, n_cmd) | - FIELD_PREP(GLCOMM_QUANTA_PROF_MAX_DESC_M, n_desc); - wr32(hw, GLCOMM_QUANTA_PROF(*quanta_prof_idx), reg); - - return 0; -} - -/** * ice_vc_cfg_promiscuous_mode_msg * @vf: pointer to the VF info * @msg: pointer to the msg buffer @@ -1407,757 +649,6 @@ error_param: } /** - * ice_vc_validate_vqs_bitmaps - validate Rx/Tx queue bitmaps from VIRTCHNL - * @vqs: virtchnl_queue_select structure containing bitmaps to validate - * - * Return true on successful validation, else false - */ -static bool ice_vc_validate_vqs_bitmaps(struct virtchnl_queue_select *vqs) -{ - if ((!vqs->rx_queues && !vqs->tx_queues) || - vqs->rx_queues >= BIT(ICE_MAX_RSS_QS_PER_VF) || - vqs->tx_queues >= BIT(ICE_MAX_RSS_QS_PER_VF)) - return false; - - return true; -} - -/** - * ice_vf_ena_txq_interrupt - enable Tx queue interrupt via QINT_TQCTL - * @vsi: VSI of the VF to configure - * @q_idx: VF queue index used to determine the queue in the PF's space - */ -void ice_vf_ena_txq_interrupt(struct ice_vsi *vsi, u32 q_idx) -{ - struct ice_hw *hw = &vsi->back->hw; - u32 pfq = vsi->txq_map[q_idx]; - u32 reg; - - reg = rd32(hw, QINT_TQCTL(pfq)); - - /* MSI-X index 0 in the VF's space is always for the OICR, which means - * this is most likely a poll mode VF driver, so don't enable an - * interrupt that was never configured via VIRTCHNL_OP_CONFIG_IRQ_MAP - */ - if (!(reg & QINT_TQCTL_MSIX_INDX_M)) - return; - - wr32(hw, QINT_TQCTL(pfq), reg | QINT_TQCTL_CAUSE_ENA_M); -} - -/** - * ice_vf_ena_rxq_interrupt - enable Tx queue interrupt via QINT_RQCTL - * @vsi: VSI of the VF to configure - * @q_idx: VF queue index used to determine the queue in the PF's space - */ -void ice_vf_ena_rxq_interrupt(struct ice_vsi *vsi, u32 q_idx) -{ - struct ice_hw *hw = &vsi->back->hw; - u32 pfq = vsi->rxq_map[q_idx]; - u32 reg; - - reg = rd32(hw, QINT_RQCTL(pfq)); - - /* MSI-X index 0 in the VF's space is always for the OICR, which means - * this is most likely a poll mode VF driver, so don't enable an - * interrupt that was never configured via VIRTCHNL_OP_CONFIG_IRQ_MAP - */ - if (!(reg & QINT_RQCTL_MSIX_INDX_M)) - return; - - wr32(hw, QINT_RQCTL(pfq), reg | QINT_RQCTL_CAUSE_ENA_M); -} - -/** - * ice_vc_ena_qs_msg - * @vf: pointer to the VF info - * @msg: pointer to the msg buffer - * - * called from the VF to enable all or specific queue(s) - */ -static int ice_vc_ena_qs_msg(struct ice_vf *vf, u8 *msg) -{ - enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; - struct virtchnl_queue_select *vqs = - (struct virtchnl_queue_select *)msg; - struct ice_vsi *vsi; - unsigned long q_map; - u16 vf_q_id; - - if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - if (!ice_vc_isvalid_vsi_id(vf, vqs->vsi_id)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - if (!ice_vc_validate_vqs_bitmaps(vqs)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - vsi = ice_get_vf_vsi(vf); - if (!vsi) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - /* Enable only Rx rings, Tx rings were enabled by the FW when the - * Tx queue group list was configured and the context bits were - * programmed using ice_vsi_cfg_txqs - */ - q_map = vqs->rx_queues; - for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) { - if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - /* Skip queue if enabled */ - if (test_bit(vf_q_id, vf->rxq_ena)) - continue; - - if (ice_vsi_ctrl_one_rx_ring(vsi, true, vf_q_id, true)) { - dev_err(ice_pf_to_dev(vsi->back), "Failed to enable Rx ring %d on VSI %d\n", - vf_q_id, vsi->vsi_num); - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - ice_vf_ena_rxq_interrupt(vsi, vf_q_id); - set_bit(vf_q_id, vf->rxq_ena); - } - - q_map = vqs->tx_queues; - for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) { - if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - /* Skip queue if enabled */ - if (test_bit(vf_q_id, vf->txq_ena)) - continue; - - ice_vf_ena_txq_interrupt(vsi, vf_q_id); - set_bit(vf_q_id, vf->txq_ena); - } - - /* Set flag to indicate that queues are enabled */ - if (v_ret == VIRTCHNL_STATUS_SUCCESS) - set_bit(ICE_VF_STATE_QS_ENA, vf->vf_states); - -error_param: - /* send the response to the VF */ - return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ENABLE_QUEUES, v_ret, - NULL, 0); -} - -/** - * ice_vf_vsi_dis_single_txq - disable a single Tx queue - * @vf: VF to disable queue for - * @vsi: VSI for the VF - * @q_id: VF relative (0-based) queue ID - * - * Attempt to disable the Tx queue passed in. If the Tx queue was successfully - * disabled then clear q_id bit in the enabled queues bitmap and return - * success. Otherwise return error. - */ -int ice_vf_vsi_dis_single_txq(struct ice_vf *vf, struct ice_vsi *vsi, u16 q_id) -{ - struct ice_txq_meta txq_meta = { 0 }; - struct ice_tx_ring *ring; - int err; - - if (!test_bit(q_id, vf->txq_ena)) - dev_dbg(ice_pf_to_dev(vsi->back), "Queue %u on VSI %u is not enabled, but stopping it anyway\n", - q_id, vsi->vsi_num); - - ring = vsi->tx_rings[q_id]; - if (!ring) - return -EINVAL; - - ice_fill_txq_meta(vsi, ring, &txq_meta); - - err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, vf->vf_id, ring, &txq_meta); - if (err) { - dev_err(ice_pf_to_dev(vsi->back), "Failed to stop Tx ring %d on VSI %d\n", - q_id, vsi->vsi_num); - return err; - } - - /* Clear enabled queues flag */ - clear_bit(q_id, vf->txq_ena); - - return 0; -} - -/** - * ice_vc_dis_qs_msg - * @vf: pointer to the VF info - * @msg: pointer to the msg buffer - * - * called from the VF to disable all or specific queue(s) - */ -static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg) -{ - enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; - struct virtchnl_queue_select *vqs = - (struct virtchnl_queue_select *)msg; - struct ice_vsi *vsi; - unsigned long q_map; - u16 vf_q_id; - - if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) && - !test_bit(ICE_VF_STATE_QS_ENA, vf->vf_states)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - if (!ice_vc_isvalid_vsi_id(vf, vqs->vsi_id)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - if (!ice_vc_validate_vqs_bitmaps(vqs)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - vsi = ice_get_vf_vsi(vf); - if (!vsi) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - if (vqs->tx_queues) { - q_map = vqs->tx_queues; - - for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) { - if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - if (ice_vf_vsi_dis_single_txq(vf, vsi, vf_q_id)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - } - } - - q_map = vqs->rx_queues; - /* speed up Rx queue disable by batching them if possible */ - if (q_map && - bitmap_equal(&q_map, vf->rxq_ena, ICE_MAX_RSS_QS_PER_VF)) { - if (ice_vsi_stop_all_rx_rings(vsi)) { - dev_err(ice_pf_to_dev(vsi->back), "Failed to stop all Rx rings on VSI %d\n", - vsi->vsi_num); - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - bitmap_zero(vf->rxq_ena, ICE_MAX_RSS_QS_PER_VF); - } else if (q_map) { - for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) { - if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - /* Skip queue if not enabled */ - if (!test_bit(vf_q_id, vf->rxq_ena)) - continue; - - if (ice_vsi_ctrl_one_rx_ring(vsi, false, vf_q_id, - true)) { - dev_err(ice_pf_to_dev(vsi->back), "Failed to stop Rx ring %d on VSI %d\n", - vf_q_id, vsi->vsi_num); - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - /* Clear enabled queues flag */ - clear_bit(vf_q_id, vf->rxq_ena); - } - } - - /* Clear enabled queues flag */ - if (v_ret == VIRTCHNL_STATUS_SUCCESS && ice_vf_has_no_qs_ena(vf)) - clear_bit(ICE_VF_STATE_QS_ENA, vf->vf_states); - -error_param: - /* send the response to the VF */ - return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DISABLE_QUEUES, v_ret, - NULL, 0); -} - -/** - * ice_cfg_interrupt - * @vf: pointer to the VF info - * @vsi: the VSI being configured - * @map: vector map for mapping vectors to queues - * @q_vector: structure for interrupt vector - * configure the IRQ to queue map - */ -static enum virtchnl_status_code -ice_cfg_interrupt(struct ice_vf *vf, struct ice_vsi *vsi, - struct virtchnl_vector_map *map, - struct ice_q_vector *q_vector) -{ - u16 vsi_q_id, vsi_q_id_idx; - unsigned long qmap; - - q_vector->num_ring_rx = 0; - q_vector->num_ring_tx = 0; - - qmap = map->rxq_map; - for_each_set_bit(vsi_q_id_idx, &qmap, ICE_MAX_RSS_QS_PER_VF) { - vsi_q_id = vsi_q_id_idx; - - if (!ice_vc_isvalid_q_id(vsi, vsi_q_id)) - return VIRTCHNL_STATUS_ERR_PARAM; - - q_vector->num_ring_rx++; - q_vector->rx.itr_idx = map->rxitr_idx; - vsi->rx_rings[vsi_q_id]->q_vector = q_vector; - ice_cfg_rxq_interrupt(vsi, vsi_q_id, - q_vector->vf_reg_idx, - q_vector->rx.itr_idx); - } - - qmap = map->txq_map; - for_each_set_bit(vsi_q_id_idx, &qmap, ICE_MAX_RSS_QS_PER_VF) { - vsi_q_id = vsi_q_id_idx; - - if (!ice_vc_isvalid_q_id(vsi, vsi_q_id)) - return VIRTCHNL_STATUS_ERR_PARAM; - - q_vector->num_ring_tx++; - q_vector->tx.itr_idx = map->txitr_idx; - vsi->tx_rings[vsi_q_id]->q_vector = q_vector; - ice_cfg_txq_interrupt(vsi, vsi_q_id, - q_vector->vf_reg_idx, - q_vector->tx.itr_idx); - } - - return VIRTCHNL_STATUS_SUCCESS; -} - -/** - * ice_vc_cfg_irq_map_msg - * @vf: pointer to the VF info - * @msg: pointer to the msg buffer - * - * called from the VF to configure the IRQ to queue map - */ -static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg) -{ - enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; - u16 num_q_vectors_mapped, vsi_id, vector_id; - struct virtchnl_irq_map_info *irqmap_info; - struct virtchnl_vector_map *map; - struct ice_vsi *vsi; - int i; - - irqmap_info = (struct virtchnl_irq_map_info *)msg; - num_q_vectors_mapped = irqmap_info->num_vectors; - - /* Check to make sure number of VF vectors mapped is not greater than - * number of VF vectors originally allocated, and check that - * there is actually at least a single VF queue vector mapped - */ - if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) || - vf->num_msix < num_q_vectors_mapped || - !num_q_vectors_mapped) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - vsi = ice_get_vf_vsi(vf); - if (!vsi) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - for (i = 0; i < num_q_vectors_mapped; i++) { - struct ice_q_vector *q_vector; - - map = &irqmap_info->vecmap[i]; - - vector_id = map->vector_id; - vsi_id = map->vsi_id; - /* vector_id is always 0-based for each VF, and can never be - * larger than or equal to the max allowed interrupts per VF - */ - if (!(vector_id < vf->num_msix) || - !ice_vc_isvalid_vsi_id(vf, vsi_id) || - (!vector_id && (map->rxq_map || map->txq_map))) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - /* No need to map VF miscellaneous or rogue vector */ - if (!vector_id) - continue; - - /* Subtract non queue vector from vector_id passed by VF - * to get actual number of VSI queue vector array index - */ - q_vector = vsi->q_vectors[vector_id - ICE_NONQ_VECS_VF]; - if (!q_vector) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - /* lookout for the invalid queue index */ - v_ret = ice_cfg_interrupt(vf, vsi, map, q_vector); - if (v_ret) - goto error_param; - } - -error_param: - /* send the response to the VF */ - return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_IRQ_MAP, v_ret, - NULL, 0); -} - -/** - * ice_vc_cfg_q_bw - Configure per queue bandwidth - * @vf: pointer to the VF info - * @msg: pointer to the msg buffer which holds the command descriptor - * - * Configure VF queues bandwidth. - * - * Return: 0 on success or negative error value. - */ -static int ice_vc_cfg_q_bw(struct ice_vf *vf, u8 *msg) -{ - enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; - struct virtchnl_queues_bw_cfg *qbw = - (struct virtchnl_queues_bw_cfg *)msg; - struct ice_vsi *vsi; - u16 i; - - if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) || - !ice_vc_isvalid_vsi_id(vf, qbw->vsi_id)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto err; - } - - vsi = ice_get_vf_vsi(vf); - if (!vsi) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto err; - } - - if (qbw->num_queues > ICE_MAX_RSS_QS_PER_VF || - qbw->num_queues > min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)) { - dev_err(ice_pf_to_dev(vf->pf), "VF-%d trying to configure more than allocated number of queues: %d\n", - vf->vf_id, min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)); - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto err; - } - - for (i = 0; i < qbw->num_queues; i++) { - if (qbw->cfg[i].shaper.peak != 0 && vf->max_tx_rate != 0 && - qbw->cfg[i].shaper.peak > vf->max_tx_rate) { - dev_warn(ice_pf_to_dev(vf->pf), "The maximum queue %d rate limit configuration may not take effect because the maximum TX rate for VF-%d is %d\n", - qbw->cfg[i].queue_id, vf->vf_id, - vf->max_tx_rate); - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto err; - } - if (qbw->cfg[i].shaper.committed != 0 && vf->min_tx_rate != 0 && - qbw->cfg[i].shaper.committed < vf->min_tx_rate) { - dev_warn(ice_pf_to_dev(vf->pf), "The minimum queue %d rate limit configuration may not take effect because the minimum TX rate for VF-%d is %d\n", - qbw->cfg[i].queue_id, vf->vf_id, - vf->min_tx_rate); - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto err; - } - if (qbw->cfg[i].queue_id > vf->num_vf_qs) { - dev_warn(ice_pf_to_dev(vf->pf), "VF-%d trying to configure invalid queue_id\n", - vf->vf_id); - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto err; - } - if (qbw->cfg[i].tc >= ICE_MAX_TRAFFIC_CLASS) { - dev_warn(ice_pf_to_dev(vf->pf), "VF-%d trying to configure a traffic class higher than allowed\n", - vf->vf_id); - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto err; - } - } - - for (i = 0; i < qbw->num_queues; i++) { - vf->qs_bw[i].queue_id = qbw->cfg[i].queue_id; - vf->qs_bw[i].peak = qbw->cfg[i].shaper.peak; - vf->qs_bw[i].committed = qbw->cfg[i].shaper.committed; - vf->qs_bw[i].tc = qbw->cfg[i].tc; - } - - if (ice_vf_cfg_qs_bw(vf, qbw->num_queues)) - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - -err: - /* send the response to the VF */ - return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_QUEUE_BW, - v_ret, NULL, 0); -} - -/** - * ice_vc_cfg_q_quanta - Configure per queue quanta - * @vf: pointer to the VF info - * @msg: pointer to the msg buffer which holds the command descriptor - * - * Configure VF queues quanta. - * - * Return: 0 on success or negative error value. - */ -static int ice_vc_cfg_q_quanta(struct ice_vf *vf, u8 *msg) -{ - u16 quanta_prof_id, quanta_size, start_qid, num_queues, end_qid, i; - enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; - struct virtchnl_quanta_cfg *qquanta = - (struct virtchnl_quanta_cfg *)msg; - struct ice_vsi *vsi; - int ret; - - start_qid = qquanta->queue_select.start_queue_id; - num_queues = qquanta->queue_select.num_queues; - - if (check_add_overflow(start_qid, num_queues, &end_qid)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto err; - } - - if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto err; - } - - vsi = ice_get_vf_vsi(vf); - if (!vsi) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto err; - } - - if (end_qid > ICE_MAX_RSS_QS_PER_VF || - end_qid > min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)) { - dev_err(ice_pf_to_dev(vf->pf), "VF-%d trying to configure more than allocated number of queues: %d\n", - vf->vf_id, min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)); - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto err; - } - - quanta_size = qquanta->quanta_size; - if (quanta_size > ICE_MAX_QUANTA_SIZE || - quanta_size < ICE_MIN_QUANTA_SIZE) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto err; - } - - if (quanta_size % 64) { - dev_err(ice_pf_to_dev(vf->pf), "quanta size should be the product of 64\n"); - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto err; - } - - ret = ice_vf_cfg_q_quanta_profile(vf, quanta_size, - &quanta_prof_id); - if (ret) { - v_ret = VIRTCHNL_STATUS_ERR_NOT_SUPPORTED; - goto err; - } - - for (i = start_qid; i < end_qid; i++) - vsi->tx_rings[i]->quanta_prof_id = quanta_prof_id; - -err: - /* send the response to the VF */ - return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_QUANTA, - v_ret, NULL, 0); -} - -/** - * ice_vc_cfg_qs_msg - * @vf: pointer to the VF info - * @msg: pointer to the msg buffer - * - * called from the VF to configure the Rx/Tx queues - */ -static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) -{ - struct virtchnl_vsi_queue_config_info *qci = - (struct virtchnl_vsi_queue_config_info *)msg; - struct virtchnl_queue_pair_info *qpi; - struct ice_pf *pf = vf->pf; - struct ice_vsi *vsi; - int i = -1, q_idx; - bool ena_ts; - u8 act_prt; - - mutex_lock(&pf->lag_mutex); - act_prt = ice_lag_prepare_vf_reset(pf->lag); - - if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) - goto error_param; - - if (!ice_vc_isvalid_vsi_id(vf, qci->vsi_id)) - goto error_param; - - vsi = ice_get_vf_vsi(vf); - if (!vsi) - goto error_param; - - if (qci->num_queue_pairs > ICE_MAX_RSS_QS_PER_VF || - qci->num_queue_pairs > min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)) { - dev_err(ice_pf_to_dev(pf), "VF-%d requesting more than supported number of queues: %d\n", - vf->vf_id, min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)); - goto error_param; - } - - for (i = 0; i < qci->num_queue_pairs; i++) { - if (!qci->qpair[i].rxq.crc_disable) - continue; - - if (!(vf->driver_caps & VIRTCHNL_VF_OFFLOAD_CRC) || - vf->vlan_strip_ena) - goto error_param; - } - - for (i = 0; i < qci->num_queue_pairs; i++) { - qpi = &qci->qpair[i]; - if (qpi->txq.vsi_id != qci->vsi_id || - qpi->rxq.vsi_id != qci->vsi_id || - qpi->rxq.queue_id != qpi->txq.queue_id || - qpi->txq.headwb_enabled || - !ice_vc_isvalid_ring_len(qpi->txq.ring_len) || - !ice_vc_isvalid_ring_len(qpi->rxq.ring_len) || - !ice_vc_isvalid_q_id(vsi, qpi->txq.queue_id)) { - goto error_param; - } - - q_idx = qpi->rxq.queue_id; - - /* make sure selected "q_idx" is in valid range of queues - * for selected "vsi" - */ - if (q_idx >= vsi->alloc_txq || q_idx >= vsi->alloc_rxq) { - goto error_param; - } - - /* copy Tx queue info from VF into VSI */ - if (qpi->txq.ring_len > 0) { - vsi->tx_rings[q_idx]->dma = qpi->txq.dma_ring_addr; - vsi->tx_rings[q_idx]->count = qpi->txq.ring_len; - - /* Disable any existing queue first */ - if (ice_vf_vsi_dis_single_txq(vf, vsi, q_idx)) - goto error_param; - - /* Configure a queue with the requested settings */ - if (ice_vsi_cfg_single_txq(vsi, vsi->tx_rings, q_idx)) { - dev_warn(ice_pf_to_dev(pf), "VF-%d failed to configure TX queue %d\n", - vf->vf_id, q_idx); - goto error_param; - } - } - - /* copy Rx queue info from VF into VSI */ - if (qpi->rxq.ring_len > 0) { - u16 max_frame_size = ice_vc_get_max_frame_size(vf); - struct ice_rx_ring *ring = vsi->rx_rings[q_idx]; - u32 rxdid; - - ring->dma = qpi->rxq.dma_ring_addr; - ring->count = qpi->rxq.ring_len; - - if (qpi->rxq.crc_disable) - ring->flags |= ICE_RX_FLAGS_CRC_STRIP_DIS; - else - ring->flags &= ~ICE_RX_FLAGS_CRC_STRIP_DIS; - - if (qpi->rxq.databuffer_size != 0 && - (qpi->rxq.databuffer_size > ((16 * 1024) - 128) || - qpi->rxq.databuffer_size < 1024)) - goto error_param; - ring->rx_buf_len = qpi->rxq.databuffer_size; - if (qpi->rxq.max_pkt_size > max_frame_size || - qpi->rxq.max_pkt_size < 64) - goto error_param; - - ring->max_frame = qpi->rxq.max_pkt_size; - /* add space for the port VLAN since the VF driver is - * not expected to account for it in the MTU - * calculation - */ - if (ice_vf_is_port_vlan_ena(vf)) - ring->max_frame += VLAN_HLEN; - - if (ice_vsi_cfg_single_rxq(vsi, q_idx)) { - dev_warn(ice_pf_to_dev(pf), "VF-%d failed to configure RX queue %d\n", - vf->vf_id, q_idx); - goto error_param; - } - - /* If Rx flex desc is supported, select RXDID for Rx - * queues. Otherwise, use legacy 32byte descriptor - * format. Legacy 16byte descriptor is not supported. - * If this RXDID is selected, return error. - */ - if (vf->driver_caps & - VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) { - rxdid = qpi->rxq.rxdid; - if (!(BIT(rxdid) & pf->supported_rxdids)) - goto error_param; - } else { - rxdid = ICE_RXDID_LEGACY_1; - } - - ena_ts = ((vf->driver_caps & - VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) && - (vf->driver_caps & VIRTCHNL_VF_CAP_PTP) && - (qpi->rxq.flags & VIRTCHNL_PTP_RX_TSTAMP)); - - ice_write_qrxflxp_cntxt(&vsi->back->hw, - vsi->rxq_map[q_idx], rxdid, - ICE_RXDID_PRIO, ena_ts); - } - } - - ice_lag_complete_vf_reset(pf->lag, act_prt); - mutex_unlock(&pf->lag_mutex); - - /* send the response to the VF */ - return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_VSI_QUEUES, - VIRTCHNL_STATUS_SUCCESS, NULL, 0); -error_param: - /* disable whatever we can */ - for (; i >= 0; i--) { - if (ice_vsi_ctrl_one_rx_ring(vsi, false, i, true)) - dev_err(ice_pf_to_dev(pf), "VF-%d could not disable RX queue %d\n", - vf->vf_id, i); - if (ice_vf_vsi_dis_single_txq(vf, vsi, i)) - dev_err(ice_pf_to_dev(pf), "VF-%d could not disable TX queue %d\n", - vf->vf_id, i); - } - - ice_lag_complete_vf_reset(pf->lag, act_prt); - mutex_unlock(&pf->lag_mutex); - - ice_lag_move_new_vf_nodes(vf); - - /* send the response to the VF */ - return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_VSI_QUEUES, - VIRTCHNL_STATUS_ERR_PARAM, NULL, 0); -} - -/** * ice_can_vf_change_mac * @vf: pointer to the VF info * @@ -2531,66 +1022,6 @@ static int ice_vc_del_mac_addr_msg(struct ice_vf *vf, u8 *msg) } /** - * ice_vc_request_qs_msg - * @vf: pointer to the VF info - * @msg: pointer to the msg buffer - * - * VFs get a default number of queues but can use this message to request a - * different number. If the request is successful, PF will reset the VF and - * return 0. If unsuccessful, PF will send message informing VF of number of - * available queue pairs via virtchnl message response to VF. - */ -static int ice_vc_request_qs_msg(struct ice_vf *vf, u8 *msg) -{ - enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; - struct virtchnl_vf_res_request *vfres = - (struct virtchnl_vf_res_request *)msg; - u16 req_queues = vfres->num_queue_pairs; - struct ice_pf *pf = vf->pf; - u16 max_allowed_vf_queues; - u16 tx_rx_queue_left; - struct device *dev; - u16 cur_queues; - - dev = ice_pf_to_dev(pf); - if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - cur_queues = vf->num_vf_qs; - tx_rx_queue_left = min_t(u16, ice_get_avail_txq_count(pf), - ice_get_avail_rxq_count(pf)); - max_allowed_vf_queues = tx_rx_queue_left + cur_queues; - if (!req_queues) { - dev_err(dev, "VF %d tried to request 0 queues. Ignoring.\n", - vf->vf_id); - } else if (req_queues > ICE_MAX_RSS_QS_PER_VF) { - dev_err(dev, "VF %d tried to request more than %d queues.\n", - vf->vf_id, ICE_MAX_RSS_QS_PER_VF); - vfres->num_queue_pairs = ICE_MAX_RSS_QS_PER_VF; - } else if (req_queues > cur_queues && - req_queues - cur_queues > tx_rx_queue_left) { - dev_warn(dev, "VF %d requested %u more queues, but only %u left.\n", - vf->vf_id, req_queues - cur_queues, tx_rx_queue_left); - vfres->num_queue_pairs = min_t(u16, max_allowed_vf_queues, - ICE_MAX_RSS_QS_PER_VF); - } else { - /* request is successful, then reset VF */ - vf->num_req_qs = req_queues; - ice_reset_vf(vf, ICE_VF_RESET_NOTIFY); - dev_info(dev, "VF %d granted request of %u queues.\n", - vf->vf_id, req_queues); - return 0; - } - -error_param: - /* send the response to the VF */ - return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_REQUEST_QUEUES, - v_ret, (u8 *)vfres, sizeof(*vfres)); -} - -/** * ice_vf_vlan_offload_ena - determine if capabilities support VLAN offloads * @caps: VF driver negotiated capabilities * @@ -2983,112 +1414,6 @@ error_param: } /** - * ice_vc_get_rss_hashcfg - return the RSS Hash configuration - * @vf: pointer to the VF info - */ -static int ice_vc_get_rss_hashcfg(struct ice_vf *vf) -{ - enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; - struct virtchnl_rss_hashcfg *vrh = NULL; - int len = 0, ret; - - if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto err; - } - - if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) { - dev_err(ice_pf_to_dev(vf->pf), "RSS not supported by PF\n"); - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto err; - } - - len = sizeof(struct virtchnl_rss_hashcfg); - vrh = kzalloc(len, GFP_KERNEL); - if (!vrh) { - v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY; - len = 0; - goto err; - } - - vrh->hashcfg = ICE_DEFAULT_RSS_HASHCFG; -err: - /* send the response back to the VF */ - ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_GET_RSS_HASHCFG_CAPS, v_ret, - (u8 *)vrh, len); - kfree(vrh); - return ret; -} - -/** - * ice_vc_set_rss_hashcfg - set RSS Hash configuration bits for the VF - * @vf: pointer to the VF info - * @msg: pointer to the msg buffer - */ -static int ice_vc_set_rss_hashcfg(struct ice_vf *vf, u8 *msg) -{ - struct virtchnl_rss_hashcfg *vrh = (struct virtchnl_rss_hashcfg *)msg; - enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; - struct ice_pf *pf = vf->pf; - struct ice_vsi *vsi; - struct device *dev; - int status; - - dev = ice_pf_to_dev(pf); - - if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto err; - } - - if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) { - dev_err(dev, "RSS not supported by PF\n"); - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto err; - } - - vsi = ice_get_vf_vsi(vf); - if (!vsi) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto err; - } - - /* clear all previously programmed RSS configuration to allow VF drivers - * the ability to customize the RSS configuration and/or completely - * disable RSS - */ - status = ice_rem_vsi_rss_cfg(&pf->hw, vsi->idx); - if (status && !vrh->hashcfg) { - /* only report failure to clear the current RSS configuration if - * that was clearly the VF's intention (i.e. vrh->hashcfg = 0) - */ - v_ret = ice_err_to_virt_err(status); - goto err; - } else if (status) { - /* allow the VF to update the RSS configuration even on failure - * to clear the current RSS confguration in an attempt to keep - * RSS in a working state - */ - dev_warn(dev, "Failed to clear the RSS configuration for VF %u\n", - vf->vf_id); - } - - if (vrh->hashcfg) { - status = ice_add_avf_rss_cfg(&pf->hw, vsi, vrh->hashcfg); - v_ret = ice_err_to_virt_err(status); - } - - /* save the requested VF configuration */ - if (!v_ret) - vf->rss_hashcfg = vrh->hashcfg; - - /* send the response to the VF */ -err: - return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_SET_RSS_HASHCFG, v_ret, - NULL, 0); -} - -/** * ice_vc_query_rxdid - query RXDID supported by DDP package * @vf: pointer to VF info * diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.h b/drivers/net/ethernet/intel/ice/virt/virtchnl.h index 71bb456e2d71..71bb456e2d71 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.h +++ b/drivers/net/ethernet/intel/ice/virt/virtchnl.h diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h index f4c0eaf9bde3..aafbb280c2e7 100644 --- a/drivers/net/ethernet/intel/idpf/idpf.h +++ b/drivers/net/ethernet/intel/idpf/idpf.h @@ -148,6 +148,7 @@ enum idpf_vport_state { * @link_speed_mbps: Link speed in mbps * @vport_idx: Relative vport index * @max_tx_hdr_size: Max header length hardware can support + * @tx_max_bufs: Max buffers that can be transmitted with scatter-gather * @state: See enum idpf_vport_state * @netstats: Packet and byte stats * @stats_lock: Lock to protect stats update @@ -159,6 +160,7 @@ struct idpf_netdev_priv { u32 link_speed_mbps; u16 vport_idx; u16 max_tx_hdr_size; + u16 tx_max_bufs; enum idpf_vport_state state; struct rtnl_link_stats64 netstats; spinlock_t stats_lock; diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index 513032cb5f08..e327950c93d8 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -776,6 +776,7 @@ static int idpf_cfg_netdev(struct idpf_vport *vport) np->vport_idx = vport->idx; np->vport_id = vport->vport_id; np->max_tx_hdr_size = idpf_get_max_tx_hdr_size(adapter); + np->tx_max_bufs = idpf_get_max_tx_bufs(adapter); spin_lock_init(&np->stats_lock); @@ -2272,6 +2273,92 @@ static int idpf_change_mtu(struct net_device *netdev, int new_mtu) } /** + * idpf_chk_tso_segment - Check skb is not using too many buffers + * @skb: send buffer + * @max_bufs: maximum number of buffers + * + * For TSO we need to count the TSO header and segment payload separately. As + * such we need to check cases where we have max_bufs-1 fragments or more as we + * can potentially require max_bufs+1 DMA transactions, 1 for the TSO header, 1 + * for the segment payload in the first descriptor, and another max_buf-1 for + * the fragments. + * + * Returns true if the packet needs to be software segmented by core stack. + */ +static bool idpf_chk_tso_segment(const struct sk_buff *skb, + unsigned int max_bufs) +{ + const struct skb_shared_info *shinfo = skb_shinfo(skb); + const skb_frag_t *frag, *stale; + int nr_frags, sum; + + /* no need to check if number of frags is less than max_bufs - 1 */ + nr_frags = shinfo->nr_frags; + if (nr_frags < (max_bufs - 1)) + return false; + + /* We need to walk through the list and validate that each group + * of max_bufs-2 fragments totals at least gso_size. + */ + nr_frags -= max_bufs - 2; + frag = &shinfo->frags[0]; + + /* Initialize size to the negative value of gso_size minus 1. We use + * this as the worst case scenario in which the frag ahead of us only + * provides one byte which is why we are limited to max_bufs-2 + * descriptors for a single transmit as the header and previous + * fragment are already consuming 2 descriptors. + */ + sum = 1 - shinfo->gso_size; + + /* Add size of frags 0 through 4 to create our initial sum */ + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + + /* Walk through fragments adding latest fragment, testing it, and + * then removing stale fragments from the sum. + */ + for (stale = &shinfo->frags[0];; stale++) { + int stale_size = skb_frag_size(stale); + + sum += skb_frag_size(frag++); + + /* The stale fragment may present us with a smaller + * descriptor than the actual fragment size. To account + * for that we need to remove all the data on the front and + * figure out what the remainder would be in the last + * descriptor associated with the fragment. + */ + if (stale_size > IDPF_TX_MAX_DESC_DATA) { + int align_pad = -(skb_frag_off(stale)) & + (IDPF_TX_MAX_READ_REQ_SIZE - 1); + + sum -= align_pad; + stale_size -= align_pad; + + do { + sum -= IDPF_TX_MAX_DESC_DATA_ALIGNED; + stale_size -= IDPF_TX_MAX_DESC_DATA_ALIGNED; + } while (stale_size > IDPF_TX_MAX_DESC_DATA); + } + + /* if sum is negative we failed to make sufficient progress */ + if (sum < 0) + return true; + + if (!nr_frags--) + break; + + sum -= stale_size; + } + + return false; +} + +/** * idpf_features_check - Validate packet conforms to limits * @skb: skb buffer * @netdev: This port's netdev @@ -2292,12 +2379,15 @@ static netdev_features_t idpf_features_check(struct sk_buff *skb, if (skb->ip_summed != CHECKSUM_PARTIAL) return features; - /* We cannot support GSO if the MSS is going to be less than - * 88 bytes. If it is then we need to drop support for GSO. - */ - if (skb_is_gso(skb) && - (skb_shinfo(skb)->gso_size < IDPF_TX_TSO_MIN_MSS)) - features &= ~NETIF_F_GSO_MASK; + if (skb_is_gso(skb)) { + /* We cannot support GSO if the MSS is going to be less than + * 88 bytes. If it is then we need to drop support for GSO. + */ + if (skb_shinfo(skb)->gso_size < IDPF_TX_TSO_MIN_MSS) + features &= ~NETIF_F_GSO_MASK; + else if (idpf_chk_tso_segment(skb, np->tx_max_bufs)) + features &= ~NETIF_F_GSO_MASK; + } /* Ensure MACLEN is <= 126 bytes (63 words) and not an odd size */ len = skb_network_offset(skb); diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index eaad52a83b04..194f924d2bd6 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -11,8 +11,28 @@ #define idpf_tx_buf_next(buf) (*(u32 *)&(buf)->priv) LIBETH_SQE_CHECK_PRIV(u32); -static bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs, - unsigned int count); +/** + * idpf_chk_linearize - Check if skb exceeds max descriptors per packet + * @skb: send buffer + * @max_bufs: maximum scatter gather buffers for single packet + * @count: number of buffers this packet needs + * + * Make sure we don't exceed maximum scatter gather buffers for a single + * packet. + * TSO case has been handled earlier from idpf_features_check(). + */ +static bool idpf_chk_linearize(const struct sk_buff *skb, + unsigned int max_bufs, + unsigned int count) +{ + if (likely(count <= max_bufs)) + return false; + + if (skb_is_gso(skb)) + return false; + + return true; +} /** * idpf_tx_timeout - Respond to a Tx Hang @@ -2397,111 +2417,6 @@ int idpf_tso(struct sk_buff *skb, struct idpf_tx_offload_params *off) return 1; } -/** - * __idpf_chk_linearize - Check skb is not using too many buffers - * @skb: send buffer - * @max_bufs: maximum number of buffers - * - * For TSO we need to count the TSO header and segment payload separately. As - * such we need to check cases where we have max_bufs-1 fragments or more as we - * can potentially require max_bufs+1 DMA transactions, 1 for the TSO header, 1 - * for the segment payload in the first descriptor, and another max_buf-1 for - * the fragments. - */ -static bool __idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs) -{ - const struct skb_shared_info *shinfo = skb_shinfo(skb); - const skb_frag_t *frag, *stale; - int nr_frags, sum; - - /* no need to check if number of frags is less than max_bufs - 1 */ - nr_frags = shinfo->nr_frags; - if (nr_frags < (max_bufs - 1)) - return false; - - /* We need to walk through the list and validate that each group - * of max_bufs-2 fragments totals at least gso_size. - */ - nr_frags -= max_bufs - 2; - frag = &shinfo->frags[0]; - - /* Initialize size to the negative value of gso_size minus 1. We use - * this as the worst case scenario in which the frag ahead of us only - * provides one byte which is why we are limited to max_bufs-2 - * descriptors for a single transmit as the header and previous - * fragment are already consuming 2 descriptors. - */ - sum = 1 - shinfo->gso_size; - - /* Add size of frags 0 through 4 to create our initial sum */ - sum += skb_frag_size(frag++); - sum += skb_frag_size(frag++); - sum += skb_frag_size(frag++); - sum += skb_frag_size(frag++); - sum += skb_frag_size(frag++); - - /* Walk through fragments adding latest fragment, testing it, and - * then removing stale fragments from the sum. - */ - for (stale = &shinfo->frags[0];; stale++) { - int stale_size = skb_frag_size(stale); - - sum += skb_frag_size(frag++); - - /* The stale fragment may present us with a smaller - * descriptor than the actual fragment size. To account - * for that we need to remove all the data on the front and - * figure out what the remainder would be in the last - * descriptor associated with the fragment. - */ - if (stale_size > IDPF_TX_MAX_DESC_DATA) { - int align_pad = -(skb_frag_off(stale)) & - (IDPF_TX_MAX_READ_REQ_SIZE - 1); - - sum -= align_pad; - stale_size -= align_pad; - - do { - sum -= IDPF_TX_MAX_DESC_DATA_ALIGNED; - stale_size -= IDPF_TX_MAX_DESC_DATA_ALIGNED; - } while (stale_size > IDPF_TX_MAX_DESC_DATA); - } - - /* if sum is negative we failed to make sufficient progress */ - if (sum < 0) - return true; - - if (!nr_frags--) - break; - - sum -= stale_size; - } - - return false; -} - -/** - * idpf_chk_linearize - Check if skb exceeds max descriptors per packet - * @skb: send buffer - * @max_bufs: maximum scatter gather buffers for single packet - * @count: number of buffers this packet needs - * - * Make sure we don't exceed maximum scatter gather buffers for a single - * packet. We have to do some special checking around the boundary (max_bufs-1) - * if TSO is on since we need count the TSO header and payload separately. - * E.g.: a packet with 7 fragments can require 9 DMA transactions; 1 for TSO - * header, 1 for segment payload, and then 7 for the fragments. - */ -static bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs, - unsigned int count) -{ - if (likely(count < max_bufs)) - return false; - if (skb_is_gso(skb)) - return __idpf_chk_linearize(skb, max_bufs); - - return count > max_bufs; -} /** * idpf_tx_splitq_get_ctx_desc - grab next desc and update buffer ring diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 92ef33459aec..51d5cb6599ed 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -920,11 +920,11 @@ static int igb_set_ringparam(struct net_device *netdev, } if (adapter->num_tx_queues > adapter->num_rx_queues) - temp_ring = vmalloc(array_size(sizeof(struct igb_ring), - adapter->num_tx_queues)); + temp_ring = vmalloc_array(adapter->num_tx_queues, + sizeof(struct igb_ring)); else - temp_ring = vmalloc(array_size(sizeof(struct igb_ring), - adapter->num_rx_queues)); + temp_ring = vmalloc_array(adapter->num_rx_queues, + sizeof(struct igb_ring)); if (!temp_ring) { err = -ENOMEM; diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index ecb35b693ce5..f3e7218ba6f3 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -627,11 +627,11 @@ igc_ethtool_set_ringparam(struct net_device *netdev, } if (adapter->num_tx_queues > adapter->num_rx_queues) - temp_ring = vmalloc(array_size(sizeof(struct igc_ring), - adapter->num_tx_queues)); + temp_ring = vmalloc_array(adapter->num_tx_queues, + sizeof(struct igc_ring)); else - temp_ring = vmalloc(array_size(sizeof(struct igc_ring), - adapter->num_rx_queues)); + temp_ring = vmalloc_array(adapter->num_rx_queues, + sizeof(struct igc_ring)); if (!temp_ring) { err = -ENOMEM; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 1a2f1bdb91aa..2d660e9edb80 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -1278,7 +1278,7 @@ static int ixgbe_set_ringparam(struct net_device *netdev, /* allocate temporary buffer to store rings in */ i = max_t(int, adapter->num_tx_queues + adapter->num_xdp_queues, adapter->num_rx_queues); - temp_ring = vmalloc(array_size(i, sizeof(struct ixgbe_ring))); + temp_ring = vmalloc_array(i, sizeof(struct ixgbe_ring)); if (!temp_ring) { err = -ENOMEM; diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c index 7ac53171b041..bebad564188e 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c +++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c @@ -276,9 +276,9 @@ static int ixgbevf_set_ringparam(struct net_device *netdev, } if (new_tx_count != adapter->tx_ring_count) { - tx_ring = vmalloc(array_size(sizeof(*tx_ring), - adapter->num_tx_queues + - adapter->num_xdp_queues)); + tx_ring = vmalloc_array(adapter->num_tx_queues + + adapter->num_xdp_queues, + sizeof(*tx_ring)); if (!tx_ring) { err = -ENOMEM; goto clear_reset; diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 8ebb985d2573..35d1184458fd 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -6222,6 +6222,12 @@ static struct mvpp2_port *mvpp2_pcs_gmac_to_port(struct phylink_pcs *pcs) return container_of(pcs, struct mvpp2_port, pcs_gmac); } +static unsigned int mvpp2_xjg_pcs_inband_caps(struct phylink_pcs *pcs, + phy_interface_t interface) +{ + return LINK_INBAND_DISABLE; +} + static void mvpp2_xlg_pcs_get_state(struct phylink_pcs *pcs, unsigned int neg_mode, struct phylink_link_state *state) @@ -6256,6 +6262,7 @@ static int mvpp2_xlg_pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode, } static const struct phylink_pcs_ops mvpp2_phylink_xlg_pcs_ops = { + .pcs_inband_caps = mvpp2_xjg_pcs_inband_caps, .pcs_get_state = mvpp2_xlg_pcs_get_state, .pcs_config = mvpp2_xlg_pcs_config, }; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h index 950231e7ea71..92ccf343dfe0 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h @@ -161,10 +161,6 @@ int cgx_get_link_info(void *cgxd, int lmac_id, struct cgx_link_user_info *linfo); int cgx_lmac_linkup_start(void *cgxd); int cgx_get_fwdata_base(u64 *base); -int cgx_lmac_get_pause_frm(void *cgxd, int lmac_id, - u8 *tx_pause, u8 *rx_pause); -int cgx_lmac_set_pause_frm(void *cgxd, int lmac_id, - u8 tx_pause, u8 rx_pause); void cgx_lmac_ptp_config(void *cgxd, int lmac_id, bool enable); u8 cgx_lmac_get_p2x(int cgx_id, int lmac_id); int cgx_set_fec(u64 fec, int cgx_id, int lmac_id); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index c6bb3aaa8e0d..2d78e08f985f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -1164,6 +1164,9 @@ cpt: rvu_program_channels(rvu); cgx_start_linkup(rvu); + rvu_block_bcast_xon(rvu, BLKADDR_NIX0); + rvu_block_bcast_xon(rvu, BLKADDR_NIX1); + err = rvu_mcs_init(rvu); if (err) { dev_err(rvu->dev, "%s: Failed to initialize mcs\n", __func__); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index 18c7bb39dbc7..b58283341923 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -1031,6 +1031,7 @@ int rvu_nix_mcast_update_mcam_entry(struct rvu *rvu, u16 pcifunc, void rvu_nix_flr_free_bpids(struct rvu *rvu, u16 pcifunc); int rvu_alloc_cint_qint_mem(struct rvu *rvu, struct rvu_pfvf *pfvf, int blkaddr, int nixlf); +void rvu_block_bcast_xon(struct rvu *rvu, int blkaddr); /* NPC APIs */ void rvu_npc_freemem(struct rvu *rvu); int rvu_npc_get_pkind(struct rvu *rvu, u16 pf); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c index 27c3a2daaaa9..3735372539bd 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c @@ -505,7 +505,9 @@ static int rvu_nix_register_reporters(struct rvu_devlink *rvu_dl) rvu_reporters->nix_event_ctx = nix_event_context; rvu_reporters->rvu_hw_nix_intr_reporter = - devlink_health_reporter_create(rvu_dl->dl, &rvu_hw_nix_intr_reporter_ops, 0, rvu); + devlink_health_reporter_create(rvu_dl->dl, + &rvu_hw_nix_intr_reporter_ops, + rvu); if (IS_ERR(rvu_reporters->rvu_hw_nix_intr_reporter)) { dev_warn(rvu->dev, "Failed to create hw_nix_intr reporter, err=%ld\n", PTR_ERR(rvu_reporters->rvu_hw_nix_intr_reporter)); @@ -513,7 +515,9 @@ static int rvu_nix_register_reporters(struct rvu_devlink *rvu_dl) } rvu_reporters->rvu_hw_nix_gen_reporter = - devlink_health_reporter_create(rvu_dl->dl, &rvu_hw_nix_gen_reporter_ops, 0, rvu); + devlink_health_reporter_create(rvu_dl->dl, + &rvu_hw_nix_gen_reporter_ops, + rvu); if (IS_ERR(rvu_reporters->rvu_hw_nix_gen_reporter)) { dev_warn(rvu->dev, "Failed to create hw_nix_gen reporter, err=%ld\n", PTR_ERR(rvu_reporters->rvu_hw_nix_gen_reporter)); @@ -521,7 +525,9 @@ static int rvu_nix_register_reporters(struct rvu_devlink *rvu_dl) } rvu_reporters->rvu_hw_nix_err_reporter = - devlink_health_reporter_create(rvu_dl->dl, &rvu_hw_nix_err_reporter_ops, 0, rvu); + devlink_health_reporter_create(rvu_dl->dl, + &rvu_hw_nix_err_reporter_ops, + rvu); if (IS_ERR(rvu_reporters->rvu_hw_nix_err_reporter)) { dev_warn(rvu->dev, "Failed to create hw_nix_err reporter, err=%ld\n", PTR_ERR(rvu_reporters->rvu_hw_nix_err_reporter)); @@ -529,7 +535,9 @@ static int rvu_nix_register_reporters(struct rvu_devlink *rvu_dl) } rvu_reporters->rvu_hw_nix_ras_reporter = - devlink_health_reporter_create(rvu_dl->dl, &rvu_hw_nix_ras_reporter_ops, 0, rvu); + devlink_health_reporter_create(rvu_dl->dl, + &rvu_hw_nix_ras_reporter_ops, + rvu); if (IS_ERR(rvu_reporters->rvu_hw_nix_ras_reporter)) { dev_warn(rvu->dev, "Failed to create hw_nix_ras reporter, err=%ld\n", PTR_ERR(rvu_reporters->rvu_hw_nix_ras_reporter)); @@ -1051,7 +1059,9 @@ static int rvu_npa_register_reporters(struct rvu_devlink *rvu_dl) rvu_reporters->npa_event_ctx = npa_event_context; rvu_reporters->rvu_hw_npa_intr_reporter = - devlink_health_reporter_create(rvu_dl->dl, &rvu_hw_npa_intr_reporter_ops, 0, rvu); + devlink_health_reporter_create(rvu_dl->dl, + &rvu_hw_npa_intr_reporter_ops, + rvu); if (IS_ERR(rvu_reporters->rvu_hw_npa_intr_reporter)) { dev_warn(rvu->dev, "Failed to create hw_npa_intr reporter, err=%ld\n", PTR_ERR(rvu_reporters->rvu_hw_npa_intr_reporter)); @@ -1059,7 +1069,9 @@ static int rvu_npa_register_reporters(struct rvu_devlink *rvu_dl) } rvu_reporters->rvu_hw_npa_gen_reporter = - devlink_health_reporter_create(rvu_dl->dl, &rvu_hw_npa_gen_reporter_ops, 0, rvu); + devlink_health_reporter_create(rvu_dl->dl, + &rvu_hw_npa_gen_reporter_ops, + rvu); if (IS_ERR(rvu_reporters->rvu_hw_npa_gen_reporter)) { dev_warn(rvu->dev, "Failed to create hw_npa_gen reporter, err=%ld\n", PTR_ERR(rvu_reporters->rvu_hw_npa_gen_reporter)); @@ -1067,7 +1079,9 @@ static int rvu_npa_register_reporters(struct rvu_devlink *rvu_dl) } rvu_reporters->rvu_hw_npa_err_reporter = - devlink_health_reporter_create(rvu_dl->dl, &rvu_hw_npa_err_reporter_ops, 0, rvu); + devlink_health_reporter_create(rvu_dl->dl, + &rvu_hw_npa_err_reporter_ops, + rvu); if (IS_ERR(rvu_reporters->rvu_hw_npa_err_reporter)) { dev_warn(rvu->dev, "Failed to create hw_npa_err reporter, err=%ld\n", PTR_ERR(rvu_reporters->rvu_hw_npa_err_reporter)); @@ -1075,7 +1089,9 @@ static int rvu_npa_register_reporters(struct rvu_devlink *rvu_dl) } rvu_reporters->rvu_hw_npa_ras_reporter = - devlink_health_reporter_create(rvu_dl->dl, &rvu_hw_npa_ras_reporter_ops, 0, rvu); + devlink_health_reporter_create(rvu_dl->dl, + &rvu_hw_npa_ras_reporter_ops, + rvu); if (IS_ERR(rvu_reporters->rvu_hw_npa_ras_reporter)) { dev_warn(rvu->dev, "Failed to create hw_npa_ras reporter, err=%ld\n", PTR_ERR(rvu_reporters->rvu_hw_npa_ras_reporter)); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 60db1f616cc8..828316211b24 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -6616,3 +6616,19 @@ unlock_grp: return ret; } + +/* On CN10k and older series of silicons, hardware may incorrectly + * assert XOFF on certain channels. Issue a write on NIX_AF_RX_CHANX_CFG + * to broadcacst XON on the same. + */ +void rvu_block_bcast_xon(struct rvu *rvu, int blkaddr) +{ + struct rvu_block *block = &rvu->hw->block[blkaddr]; + u64 cfg; + + if (!block->implemented || is_cn20k(rvu->pdev)) + return; + + cfg = rvu_read64(rvu, blkaddr, NIX_AF_RX_CHANX_CFG(0)); + rvu_write64(rvu, blkaddr, NIX_AF_RX_CHANX_CFG(0), cfg); +} diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c index 0a80d8f8cff7..3dbb113b792c 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed.c +++ b/drivers/net/ethernet/mediatek/mtk_wed.c @@ -59,7 +59,9 @@ struct mtk_wed_flow_block_priv { static const struct mtk_wed_soc_data mt7622_data = { .regmap = { .tx_bm_tkid = 0x088, - .wpdma_rx_ring0 = 0x770, + .wpdma_rx_ring = { + 0x770, + }, .reset_idx_tx_mask = GENMASK(3, 0), .reset_idx_rx_mask = GENMASK(17, 16), }, @@ -70,7 +72,9 @@ static const struct mtk_wed_soc_data mt7622_data = { static const struct mtk_wed_soc_data mt7986_data = { .regmap = { .tx_bm_tkid = 0x0c8, - .wpdma_rx_ring0 = 0x770, + .wpdma_rx_ring = { + 0x770, + }, .reset_idx_tx_mask = GENMASK(1, 0), .reset_idx_rx_mask = GENMASK(7, 6), }, @@ -81,7 +85,10 @@ static const struct mtk_wed_soc_data mt7986_data = { static const struct mtk_wed_soc_data mt7988_data = { .regmap = { .tx_bm_tkid = 0x0c8, - .wpdma_rx_ring0 = 0x7d0, + .wpdma_rx_ring = { + 0x7d0, + 0x7d8, + }, .reset_idx_tx_mask = GENMASK(1, 0), .reset_idx_rx_mask = GENMASK(7, 6), }, @@ -621,8 +628,8 @@ mtk_wed_amsdu_init(struct mtk_wed_device *dev) return ret; } - /* eagle E1 PCIE1 tx ring 22 flow control issue */ - if (dev->wlan.id == 0x7991) + /* Kite and Eagle E1 PCIE1 tx ring 22 flow control issue */ + if (dev->wlan.id == 0x7991 || dev->wlan.id == 0x7992) wed_clr(dev, MTK_WED_AMSDU_FIFO, MTK_WED_AMSDU_IS_PRIOR0_RING); wed_set(dev, MTK_WED_CTRL, MTK_WED_CTRL_TX_AMSDU_EN); @@ -1239,7 +1246,11 @@ mtk_wed_set_wpdma(struct mtk_wed_device *dev) return; wed_w32(dev, MTK_WED_WPDMA_RX_GLO_CFG, dev->wlan.wpdma_rx_glo); - wed_w32(dev, dev->hw->soc->regmap.wpdma_rx_ring0, dev->wlan.wpdma_rx); + wed_w32(dev, dev->hw->soc->regmap.wpdma_rx_ring[0], + dev->wlan.wpdma_rx[0]); + if (mtk_wed_is_v3_or_greater(dev->hw)) + wed_w32(dev, dev->hw->soc->regmap.wpdma_rx_ring[1], + dev->wlan.wpdma_rx[1]); if (!dev->wlan.hw_rro) return; @@ -2323,6 +2334,16 @@ mtk_wed_start(struct mtk_wed_device *dev, u32 irq_mask) if (!dev->rx_wdma[i].desc) mtk_wed_wdma_rx_ring_setup(dev, i, 16, false); + if (dev->wlan.hw_rro) { + for (i = 0; i < MTK_WED_RX_PAGE_QUEUES; i++) { + u32 addr = MTK_WED_RRO_MSDU_PG_CTRL0(i) + + MTK_WED_RING_OFS_COUNT; + + if (!wed_r32(dev, addr)) + wed_w32(dev, addr, 1); + } + } + mtk_wed_hw_init(dev); mtk_wed_configure_irq(dev, irq_mask); diff --git a/drivers/net/ethernet/mediatek/mtk_wed.h b/drivers/net/ethernet/mediatek/mtk_wed.h index c1f0479d7a71..b49aee9a8b65 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed.h +++ b/drivers/net/ethernet/mediatek/mtk_wed.h @@ -17,7 +17,7 @@ struct mtk_wed_wo; struct mtk_wed_soc_data { struct { u32 tx_bm_tkid; - u32 wpdma_rx_ring0; + u32 wpdma_rx_ring[MTK_WED_RX_QUEUES]; u32 reset_idx_tx_mask; u32 reset_idx_rx_mask; } regmap; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 6ec7d6e0181d..8ef2ac2060ba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -8,7 +8,6 @@ config MLX5_CORE depends on PCI select AUXILIARY_BUS select NET_DEVLINK - depends on VXLAN || !VXLAN depends on MLXFW || !MLXFW depends on PTP_1588_CLOCK_OPTIONAL depends on PCI_HYPERV_INTERFACE || !PCI_HYPERV_INTERFACE diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index a253c73db9e5..a65ab661375a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -69,7 +69,7 @@ mlx5_core-$(CONFIG_MLX5_TC_SAMPLE) += en/tc/sample.o # Core extra # mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \ - ecpf.o rdma.o esw/legacy.o \ + ecpf.o rdma.o esw/legacy.o esw/adj_vport.o \ esw/devlink_port.o esw/vporttbl.o esw/qos.o esw/ipsec.o mlx5_core-$(CONFIG_MLX5_ESWITCH) += esw/acl/helper.o \ @@ -85,7 +85,9 @@ mlx5_core-$(CONFIG_MLX5_BRIDGE) += esw/bridge.o esw/bridge_mcast.o esw/bridge mlx5_core-$(CONFIG_HWMON) += hwmon.o mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o -mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o +ifneq ($(CONFIG_VXLAN),) + mlx5_core-y += lib/vxlan.o +endif mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += lib/hv.o lib/hv_vhca.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c index 86253a89c24c..73f5b62b8c7f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. */ +#include <linux/mlx5/vport.h> + #include "reporter_vnic.h" #include "en_stats.h" #include "devlink.h" @@ -133,7 +135,7 @@ void mlx5_reporter_vnic_create(struct mlx5_core_dev *dev) health->vnic_reporter = devlink_health_reporter_create(devlink, &mlx5_reporter_vnic_ops, - 0, dev); + dev); if (IS_ERR(health->vnic_reporter)) mlx5_core_warn(dev, "Failed to create vnic reporter, err = %ld\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index 16c44d628eda..eb1cace5910c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -651,22 +651,26 @@ void mlx5e_reporter_icosq_resume_recovery(struct mlx5e_channel *c) mutex_unlock(&c->icosq_recovery_lock); } +#define MLX5E_REPORTER_RX_GRACEFUL_PERIOD 500 +#define MLX5E_REPORTER_RX_BURST_PERIOD 500 + static const struct devlink_health_reporter_ops mlx5_rx_reporter_ops = { .name = "rx", .recover = mlx5e_rx_reporter_recover, .diagnose = mlx5e_rx_reporter_diagnose, .dump = mlx5e_rx_reporter_dump, + .default_graceful_period = MLX5E_REPORTER_RX_GRACEFUL_PERIOD, + .default_burst_period = MLX5E_REPORTER_RX_BURST_PERIOD, }; -#define MLX5E_REPORTER_RX_GRACEFUL_PERIOD 500 - void mlx5e_reporter_rx_create(struct mlx5e_priv *priv) { + struct devlink_port *port = priv->netdev->devlink_port; struct devlink_health_reporter *reporter; - reporter = devlink_port_health_reporter_create(priv->netdev->devlink_port, + reporter = devlink_port_health_reporter_create(port, &mlx5_rx_reporter_ops, - MLX5E_REPORTER_RX_GRACEFUL_PERIOD, priv); + priv); if (IS_ERR(reporter)) { netdev_warn(priv->netdev, "Failed to create rx reporter, err = %ld\n", PTR_ERR(reporter)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index 85d5cb39b107..5a4fe8403a21 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -539,22 +539,26 @@ void mlx5e_reporter_tx_ptpsq_unhealthy(struct mlx5e_ptpsq *ptpsq) mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); } +#define MLX5E_REPORTER_TX_GRACEFUL_PERIOD 500 +#define MLX5E_REPORTER_TX_BURST_PERIOD 500 + static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = { .name = "tx", .recover = mlx5e_tx_reporter_recover, .diagnose = mlx5e_tx_reporter_diagnose, .dump = mlx5e_tx_reporter_dump, + .default_graceful_period = MLX5E_REPORTER_TX_GRACEFUL_PERIOD, + .default_burst_period = MLX5E_REPORTER_TX_BURST_PERIOD, }; -#define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500 - void mlx5e_reporter_tx_create(struct mlx5e_priv *priv) { + struct devlink_port *port = priv->netdev->devlink_port; struct devlink_health_reporter *reporter; - reporter = devlink_port_health_reporter_create(priv->netdev->devlink_port, + reporter = devlink_port_health_reporter_create(port, &mlx5_tx_reporter_ops, - MLX5_REPORTER_TX_GRACEFUL_PERIOD, priv); + priv); if (IS_ERR(reporter)) { netdev_warn(priv->netdev, "Failed to create tx reporter, err = %ld\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index 2162d776fe35..a14f216048cd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -1,7 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* Copyright (c) 2018 Mellanox Technologies. */ -#include <net/inet_ecn.h> +#include <net/flow.h> +#include <net/inet_dscp.h> #include <net/vxlan.h> #include <net/gre.h> #include <net/geneve.h> @@ -233,7 +234,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, int err; /* add the IP fields */ - attr.fl.fl4.flowi4_tos = tun_key->tos & ~INET_ECN_MASK; + attr.fl.fl4.flowi4_dscp = inet_dsfield_to_dscp(tun_key->tos); attr.fl.fl4.daddr = tun_key->u.ipv4.dst; attr.fl.fl4.saddr = tun_key->u.ipv4.src; attr.ttl = tun_key->ttl; @@ -349,7 +350,7 @@ int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv, int err; /* add the IP fields */ - attr.fl.fl4.flowi4_tos = tun_key->tos & ~INET_ECN_MASK; + attr.fl.fl4.flowi4_dscp = inet_dsfield_to_dscp(tun_key->tos); attr.fl.fl4.daddr = tun_key->u.ipv4.dst; attr.fl.fl4.saddr = tun_key->u.ipv4.src; attr.ttl = tun_key->ttl; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index 65ccb33edafb..d7a11ff9bbdb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -498,9 +498,9 @@ static void resync_update_sn(struct mlx5e_rq *rq, struct sk_buff *skb) depth += sizeof(struct iphdr); th = (void *)iph + sizeof(struct iphdr); - sk = inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, - iph->saddr, th->source, iph->daddr, - th->dest, netdev->ifindex); + sk = inet_lookup_established(net, iph->saddr, th->source, + iph->daddr, th->dest, + netdev->ifindex); #if IS_ENABLED(CONFIG_IPV6) } else { struct ipv6hdr *ipv6h = (struct ipv6hdr *)iph; @@ -508,8 +508,7 @@ static void resync_update_sn(struct mlx5e_rq *rq, struct sk_buff *skb) depth += sizeof(struct ipv6hdr); th = (void *)ipv6h + sizeof(struct ipv6hdr); - sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, - &ipv6h->saddr, th->source, + sk = __inet6_lookup_established(net, &ipv6h->saddr, th->source, &ipv6h->daddr, ntohs(th->dest), netdev->ifindex, 0); #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index e680673ffb72..714cce595692 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -780,13 +780,6 @@ static void mlx5e_rq_shampo_hd_info_free(struct mlx5e_rq *rq) bitmap_free(rq->mpwqe.shampo->bitmap); } -static bool mlx5_rq_needs_separate_hd_pool(struct mlx5e_rq *rq) -{ - struct netdev_rx_queue *rxq = __netif_get_rx_queue(rq->netdev, rq->ix); - - return !!rxq->mp_params.mp_ops; -} - static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_rq_param *rqp, @@ -825,7 +818,7 @@ static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev, hd_pool_size = (rq->mpwqe.shampo->hd_per_wqe * wq_size) / MLX5E_SHAMPO_WQ_HEADER_PER_PAGE; - if (mlx5_rq_needs_separate_hd_pool(rq)) { + if (netif_rxq_has_unreadable_mp(rq->netdev, rq->ix)) { /* Separate page pool for shampo headers */ struct page_pool_params pp_params = { }; @@ -5642,12 +5635,36 @@ static int mlx5e_queue_start(struct net_device *dev, void *newq, return 0; } +static struct device *mlx5e_queue_get_dma_dev(struct net_device *dev, + int queue_index) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_channels *channels; + struct device *pdev = NULL; + struct mlx5e_channel *ch; + + channels = &priv->channels; + + mutex_lock(&priv->state_lock); + + if (queue_index >= channels->num) + goto out; + + ch = channels->c[queue_index]; + pdev = ch->pdev; +out: + mutex_unlock(&priv->state_lock); + + return pdev; +} + static const struct netdev_queue_mgmt_ops mlx5e_queue_mgmt_ops = { .ndo_queue_mem_size = sizeof(struct mlx5_qmgmt_data), .ndo_queue_mem_alloc = mlx5e_queue_mem_alloc, .ndo_queue_mem_free = mlx5e_queue_mem_free, .ndo_queue_start = mlx5e_queue_start, .ndo_queue_stop = mlx5e_queue_stop, + .ndo_queue_get_dma_dev = mlx5e_queue_get_dma_dev, }; static void mlx5e_build_nic_netdev(struct net_device *netdev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 63a7a788fb0d..b231e7855bca 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -1447,7 +1447,7 @@ static void mlx5e_rep_vnic_reporter_create(struct mlx5e_priv *priv, reporter = devl_port_health_reporter_create(dl_port, &mlx5_rep_vnic_reporter_ops, - 0, rpriv); + rpriv); if (IS_ERR(reporter)) { mlx5_core_err(priv->mdev, "Failed to create representor vnic reporter, err = %ld\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/adj_vport.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/adj_vport.c new file mode 100644 index 000000000000..0091ba697bae --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/adj_vport.c @@ -0,0 +1,209 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "fs_core.h" +#include "eswitch.h" + +enum { + MLX5_ADJ_VPORT_DISCONNECT = 0x0, + MLX5_ADJ_VPORT_CONNECT = 0x1, +}; + +static int mlx5_esw_adj_vport_modify(struct mlx5_core_dev *dev, + u16 vport, bool connect) +{ + u32 in[MLX5_ST_SZ_DW(modify_vport_state_in)] = {}; + + MLX5_SET(modify_vport_state_in, in, opcode, + MLX5_CMD_OP_MODIFY_VPORT_STATE); + MLX5_SET(modify_vport_state_in, in, op_mod, + MLX5_VPORT_STATE_OP_MOD_ESW_VPORT); + MLX5_SET(modify_vport_state_in, in, other_vport, 1); + MLX5_SET(modify_vport_state_in, in, vport_number, vport); + MLX5_SET(modify_vport_state_in, in, ingress_connect_valid, 1); + MLX5_SET(modify_vport_state_in, in, egress_connect_valid, 1); + MLX5_SET(modify_vport_state_in, in, ingress_connect, connect); + MLX5_SET(modify_vport_state_in, in, egress_connect, connect); + + return mlx5_cmd_exec_in(dev, modify_vport_state, in); +} + +static void mlx5_esw_destroy_esw_vport(struct mlx5_core_dev *dev, u16 vport) +{ + u32 in[MLX5_ST_SZ_DW(destroy_esw_vport_in)] = {}; + + MLX5_SET(destroy_esw_vport_in, in, opcode, + MLX5_CMD_OPCODE_DESTROY_ESW_VPORT); + MLX5_SET(destroy_esw_vport_in, in, vport_num, vport); + + mlx5_cmd_exec_in(dev, destroy_esw_vport, in); +} + +static int mlx5_esw_create_esw_vport(struct mlx5_core_dev *dev, u16 vhca_id, + u16 *vport_num) +{ + u32 out[MLX5_ST_SZ_DW(create_esw_vport_out)] = {}; + u32 in[MLX5_ST_SZ_DW(create_esw_vport_in)] = {}; + int err; + + MLX5_SET(create_esw_vport_in, in, opcode, + MLX5_CMD_OPCODE_CREATE_ESW_VPORT); + MLX5_SET(create_esw_vport_in, in, managed_vhca_id, vhca_id); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (!err) + *vport_num = MLX5_GET(create_esw_vport_out, out, vport_num); + + return err; +} + +static int mlx5_esw_adj_vport_create(struct mlx5_eswitch *esw, u16 vhca_id, + const void *rid_info_reg) +{ + struct mlx5_vport *vport; + u16 vport_num; + int err; + + err = mlx5_esw_create_esw_vport(esw->dev, vhca_id, &vport_num); + if (err) { + esw_warn(esw->dev, + "Failed to create adjacent vport for vhca_id %d, err %d\n", + vhca_id, err); + return err; + } + + esw_debug(esw->dev, "Created adjacent vport[%d] %d for vhca_id 0x%x\n", + esw->last_vport_idx, vport_num, vhca_id); + + err = mlx5_esw_vport_alloc(esw, esw->last_vport_idx++, vport_num); + if (err) + goto destroy_esw_vport; + + xa_set_mark(&esw->vports, vport_num, MLX5_ESW_VPT_VF); + vport = mlx5_eswitch_get_vport(esw, vport_num); + vport->adjacent = true; + vport->vhca_id = vhca_id; + + vport->adj_info.parent_pci_devfn = + MLX5_GET(function_vhca_rid_info_reg, rid_info_reg, + parent_pci_device_function); + vport->adj_info.function_id = + MLX5_GET(function_vhca_rid_info_reg, rid_info_reg, function_id); + + mlx5_fs_vport_egress_acl_ns_add(esw->dev->priv.steering, vport->index); + mlx5_fs_vport_ingress_acl_ns_add(esw->dev->priv.steering, vport->index); + err = mlx5_esw_offloads_rep_add(esw, vport); + if (err) + goto acl_ns_remove; + + mlx5_esw_adj_vport_modify(esw->dev, vport_num, MLX5_ADJ_VPORT_CONNECT); + return 0; + +acl_ns_remove: + mlx5_fs_vport_ingress_acl_ns_remove(esw->dev->priv.steering, + vport->index); + mlx5_fs_vport_egress_acl_ns_remove(esw->dev->priv.steering, + vport->index); + mlx5_esw_vport_free(esw, vport); +destroy_esw_vport: + mlx5_esw_destroy_esw_vport(esw->dev, vport_num); + return err; +} + +static void mlx5_esw_adj_vport_destroy(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + u16 vport_num = vport->vport; + + esw_debug(esw->dev, "Destroying adjacent vport %d for vhca_id 0x%x\n", + vport_num, vport->vhca_id); + mlx5_esw_adj_vport_modify(esw->dev, vport_num, + MLX5_ADJ_VPORT_DISCONNECT); + mlx5_esw_offloads_rep_remove(esw, vport); + mlx5_fs_vport_egress_acl_ns_remove(esw->dev->priv.steering, + vport->index); + mlx5_fs_vport_ingress_acl_ns_remove(esw->dev->priv.steering, + vport->index); + mlx5_esw_vport_free(esw, vport); + /* Reset the vport index back so new adj vports can use this index. + * When vport count can incrementally change, this needs to be modified. + */ + esw->last_vport_idx--; + mlx5_esw_destroy_esw_vport(esw->dev, vport_num); +} + +void mlx5_esw_adjacent_vhcas_cleanup(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + unsigned long i; + + if (!MLX5_CAP_GEN_2(esw->dev, delegated_vhca_max)) + return; + + mlx5_esw_for_each_vf_vport(esw, i, vport, U16_MAX) { + if (!vport->adjacent) + continue; + mlx5_esw_adj_vport_destroy(esw, vport); + } +} + +void mlx5_esw_adjacent_vhcas_setup(struct mlx5_eswitch *esw) +{ + u32 delegated_vhca_max = MLX5_CAP_GEN_2(esw->dev, delegated_vhca_max); + u32 in[MLX5_ST_SZ_DW(query_delegated_vhca_in)] = {}; + int outlen, err, i = 0; + u8 *out; + u32 count; + + if (!delegated_vhca_max) + return; + + outlen = MLX5_ST_SZ_BYTES(query_delegated_vhca_out) + + delegated_vhca_max * + MLX5_ST_SZ_BYTES(delegated_function_vhca_rid_info); + + esw_debug(esw->dev, "delegated_vhca_max=%d\n", delegated_vhca_max); + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return; + + MLX5_SET(query_delegated_vhca_in, in, opcode, + MLX5_CMD_OPCODE_QUERY_DELEGATED_VHCA); + + err = mlx5_cmd_exec(esw->dev, in, sizeof(in), out, outlen); + if (err) { + kvfree(out); + esw_warn(esw->dev, "Failed to query delegated vhca, err %d\n", + err); + return; + } + + count = MLX5_GET(query_delegated_vhca_out, out, functions_count); + esw_debug(esw->dev, "Delegated vhca functions count %d\n", count); + + for (i = 0; i < count; i++) { + const void *rid_info, *rid_info_reg; + u16 vhca_id; + + rid_info = MLX5_ADDR_OF(query_delegated_vhca_out, out, + delegated_function_vhca_rid_info[i]); + + rid_info_reg = MLX5_ADDR_OF(delegated_function_vhca_rid_info, + rid_info, function_vhca_rid_info); + + vhca_id = MLX5_GET(function_vhca_rid_info_reg, rid_info_reg, + vhca_id); + esw_debug(esw->dev, "Delegating vhca_id 0x%x\n", vhca_id); + + err = mlx5_esw_adj_vport_create(esw, vhca_id, rid_info_reg); + if (err) { + esw_warn(esw->dev, + "Failed to init adjacent vhca 0x%x, err %d\n", + vhca_id, err); + break; + } + } + + kvfree(out); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c index c33accadae0f..cf88a106d80d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c @@ -27,6 +27,7 @@ static void mlx5_esw_offloads_pf_vf_devlink_port_attrs_set(struct mlx5_eswitch * { struct mlx5_core_dev *dev = esw->dev; struct netdev_phys_item_id ppid = {}; + struct mlx5_vport *vport; u32 controller_num = 0; bool external; u16 pfnum; @@ -42,10 +43,18 @@ static void mlx5_esw_offloads_pf_vf_devlink_port_attrs_set(struct mlx5_eswitch * dl_port->attrs.switch_id.id_len = ppid.id_len; devlink_port_attrs_pci_pf_set(dl_port, controller_num, pfnum, external); } else if (mlx5_eswitch_is_vf_vport(esw, vport_num)) { + u16 func_id = vport_num - 1; + + vport = mlx5_eswitch_get_vport(esw, vport_num); memcpy(dl_port->attrs.switch_id.id, ppid.id, ppid.id_len); dl_port->attrs.switch_id.id_len = ppid.id_len; + if (vport->adjacent) { + func_id = vport->adj_info.function_id; + pfnum = vport->adj_info.parent_pci_devfn; + } + devlink_port_attrs_pci_vf_set(dl_port, controller_num, pfnum, - vport_num - 1, external); + func_id, external); } else if (mlx5_core_is_ec_vf_vport(esw->dev, vport_num)) { u16 base_vport = mlx5_core_ec_vf_vport_base(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 4917d185d0c3..10eca910a2db 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -820,6 +820,7 @@ static int mlx5_esw_vport_caps_get(struct mlx5_eswitch *esw, struct mlx5_vport * hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); vport->info.roce_enabled = MLX5_GET(cmd_hca_cap, hca_caps, roce); + vport->vhca_id = MLX5_GET(cmd_hca_cap, hca_caps, vhca_id); if (!MLX5_CAP_GEN_MAX(esw->dev, hca_cap_2)) goto out_free; @@ -839,6 +840,18 @@ out_free: return err; } +bool mlx5_esw_vport_vhca_id(struct mlx5_eswitch *esw, u16 vportn, u16 *vhca_id) +{ + struct mlx5_vport *vport; + + vport = mlx5_eswitch_get_vport(esw, vportn); + if (IS_ERR(vport) || MLX5_VPORT_INVAL_VHCA_ID(vport)) + return false; + + *vhca_id = vport->vhca_id; + return true; +} + static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { bool vst_mode_steering = esw_vst_mode_is_steering(esw); @@ -929,7 +942,7 @@ int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport, if (!mlx5_esw_is_manager_vport(esw, vport_num) && MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) { - ret = mlx5_esw_vport_vhca_id_set(esw, vport_num); + ret = mlx5_esw_vport_vhca_id_map(esw, vport); if (ret) goto err_vhca_mapping; } @@ -973,7 +986,7 @@ void mlx5_esw_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport) if (!mlx5_esw_is_manager_vport(esw, vport_num) && MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) - mlx5_esw_vport_vhca_id_clear(esw, vport_num); + mlx5_esw_vport_vhca_id_unmap(esw, vport); if (vport->vport != MLX5_VPORT_PF && (vport->info.ipsec_crypto_enabled || vport->info.ipsec_packet_enabled)) @@ -1038,6 +1051,25 @@ const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev) return ERR_PTR(err); } +static int mlx5_esw_host_functions_enabled_query(struct mlx5_eswitch *esw) +{ + const u32 *query_host_out; + + if (!mlx5_core_is_ecpf_esw_manager(esw->dev)) + return 0; + + query_host_out = mlx5_esw_query_functions(esw->dev); + if (IS_ERR(query_host_out)) + return PTR_ERR(query_host_out); + + esw->esw_funcs.host_funcs_disabled = + MLX5_GET(query_esw_functions_out, query_host_out, + host_params_context.host_pf_not_exist); + + kvfree(query_host_out); + return 0; +} + static void mlx5_eswitch_event_handler_register(struct mlx5_eswitch *esw) { if (esw->mode == MLX5_ESWITCH_OFFLOADS && mlx5_eswitch_is_funcs_handler(esw->dev)) { @@ -1185,7 +1217,8 @@ void mlx5_eswitch_unload_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs) unsigned long i; mlx5_esw_for_each_vf_vport(esw, i, vport, num_vfs) { - if (!vport->enabled) + /* Adjacent VFs are unloaded separately */ + if (!vport->enabled || vport->adjacent) continue; mlx5_eswitch_unload_pf_vf_vport(esw, vport->vport); } @@ -1204,6 +1237,42 @@ static void mlx5_eswitch_unload_ec_vf_vports(struct mlx5_eswitch *esw, } } +static void mlx5_eswitch_unload_adj_vf_vports(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + unsigned long i; + + mlx5_esw_for_each_vf_vport(esw, i, vport, U16_MAX) { + if (!vport->enabled || !vport->adjacent) + continue; + mlx5_eswitch_unload_pf_vf_vport(esw, vport->vport); + } +} + +static int +mlx5_eswitch_load_adj_vf_vports(struct mlx5_eswitch *esw, + enum mlx5_eswitch_vport_event enabled_events) +{ + struct mlx5_vport *vport; + unsigned long i; + int err; + + mlx5_esw_for_each_vf_vport(esw, i, vport, U16_MAX) { + if (!vport->adjacent) + continue; + err = mlx5_eswitch_load_pf_vf_vport(esw, vport->vport, + enabled_events); + if (err) + goto unload_adj_vf_vport; + } + + return 0; + +unload_adj_vf_vport: + mlx5_eswitch_unload_adj_vf_vports(esw); + return err; +} + int mlx5_eswitch_load_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs, enum mlx5_eswitch_vport_event enabled_events) { @@ -1278,17 +1347,19 @@ mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw, esw->mode == MLX5_ESWITCH_LEGACY; /* Enable PF vport */ - if (pf_needed) { + if (pf_needed && mlx5_esw_host_functions_enabled(esw->dev)) { ret = mlx5_eswitch_load_pf_vf_vport(esw, MLX5_VPORT_PF, enabled_events); if (ret) return ret; } - /* Enable external host PF HCA */ - ret = host_pf_enable_hca(esw->dev); - if (ret) - goto pf_hca_err; + if (mlx5_esw_host_functions_enabled(esw->dev)) { + /* Enable external host PF HCA */ + ret = host_pf_enable_hca(esw->dev); + if (ret) + goto pf_hca_err; + } /* Enable ECPF vport */ if (mlx5_ecpf_vport_exists(esw->dev)) { @@ -1311,8 +1382,16 @@ mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw, enabled_events); if (ret) goto vf_err; + + /* Enable adjacent VF vports */ + ret = mlx5_eswitch_load_adj_vf_vports(esw, enabled_events); + if (ret) + goto unload_vf_vports; + return 0; +unload_vf_vports: + mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs); vf_err: if (mlx5_core_ec_sriov_enabled(esw->dev)) mlx5_eswitch_unload_ec_vf_vports(esw, esw->esw_funcs.num_ec_vfs); @@ -1320,9 +1399,10 @@ ec_vf_err: if (mlx5_ecpf_vport_exists(esw->dev)) mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_ECPF); ecpf_err: - host_pf_disable_hca(esw->dev); + if (mlx5_esw_host_functions_enabled(esw->dev)) + host_pf_disable_hca(esw->dev); pf_hca_err: - if (pf_needed) + if (pf_needed && mlx5_esw_host_functions_enabled(esw->dev)) mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_PF); return ret; } @@ -1332,6 +1412,8 @@ pf_hca_err: */ void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw) { + mlx5_eswitch_unload_adj_vf_vports(esw); + mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs); if (mlx5_core_ec_sriov_enabled(esw->dev)) @@ -1342,10 +1424,12 @@ void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw) mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_ECPF); } - host_pf_disable_hca(esw->dev); + if (mlx5_esw_host_functions_enabled(esw->dev)) + host_pf_disable_hca(esw->dev); - if (mlx5_core_is_ecpf_esw_manager(esw->dev) || - esw->mode == MLX5_ESWITCH_LEGACY) + if ((mlx5_core_is_ecpf_esw_manager(esw->dev) || + esw->mode == MLX5_ESWITCH_LEGACY) && + mlx5_esw_host_functions_enabled(esw->dev)) mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_PF); } @@ -1402,19 +1486,76 @@ static void mlx5_esw_mode_change_notify(struct mlx5_eswitch *esw, u16 mode) blocking_notifier_call_chain(&esw->n_head, 0, &info); } +static int mlx5_esw_egress_acls_init(struct mlx5_core_dev *dev) +{ + struct mlx5_flow_steering *steering = dev->priv.steering; + int total_vports = mlx5_eswitch_get_total_vports(dev); + int err; + int i; + + for (i = 0; i < total_vports; i++) { + err = mlx5_fs_vport_egress_acl_ns_add(steering, i); + if (err) + goto acl_ns_remove; + } + return 0; + +acl_ns_remove: + while (i--) + mlx5_fs_vport_egress_acl_ns_remove(steering, i); + return err; +} + +static void mlx5_esw_egress_acls_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_flow_steering *steering = dev->priv.steering; + int total_vports = mlx5_eswitch_get_total_vports(dev); + int i; + + for (i = total_vports - 1; i >= 0; i--) + mlx5_fs_vport_egress_acl_ns_remove(steering, i); +} + +static int mlx5_esw_ingress_acls_init(struct mlx5_core_dev *dev) +{ + struct mlx5_flow_steering *steering = dev->priv.steering; + int total_vports = mlx5_eswitch_get_total_vports(dev); + int err; + int i; + + for (i = 0; i < total_vports; i++) { + err = mlx5_fs_vport_ingress_acl_ns_add(steering, i); + if (err) + goto acl_ns_remove; + } + return 0; + +acl_ns_remove: + while (i--) + mlx5_fs_vport_ingress_acl_ns_remove(steering, i); + return err; +} + +static void mlx5_esw_ingress_acls_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_flow_steering *steering = dev->priv.steering; + int total_vports = mlx5_eswitch_get_total_vports(dev); + int i; + + for (i = total_vports - 1; i >= 0; i--) + mlx5_fs_vport_ingress_acl_ns_remove(steering, i); +} + static int mlx5_esw_acls_ns_init(struct mlx5_eswitch *esw) { struct mlx5_core_dev *dev = esw->dev; - int total_vports; int err; if (esw->flags & MLX5_ESWITCH_VPORT_ACL_NS_CREATED) return 0; - total_vports = mlx5_eswitch_get_total_vports(dev); - if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) { - err = mlx5_fs_egress_acls_init(dev, total_vports); + err = mlx5_esw_egress_acls_init(dev); if (err) return err; } else { @@ -1422,7 +1563,7 @@ static int mlx5_esw_acls_ns_init(struct mlx5_eswitch *esw) } if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) { - err = mlx5_fs_ingress_acls_init(dev, total_vports); + err = mlx5_esw_ingress_acls_init(dev); if (err) goto err; } else { @@ -1433,7 +1574,7 @@ static int mlx5_esw_acls_ns_init(struct mlx5_eswitch *esw) err: if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) - mlx5_fs_egress_acls_cleanup(dev); + mlx5_esw_egress_acls_cleanup(dev); return err; } @@ -1443,9 +1584,9 @@ static void mlx5_esw_acls_ns_cleanup(struct mlx5_eswitch *esw) esw->flags &= ~MLX5_ESWITCH_VPORT_ACL_NS_CREATED; if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) - mlx5_fs_ingress_acls_cleanup(dev); + mlx5_esw_ingress_acls_cleanup(dev); if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) - mlx5_fs_egress_acls_cleanup(dev); + mlx5_esw_egress_acls_cleanup(dev); } /** @@ -1674,7 +1815,8 @@ int mlx5_esw_sf_max_hpf_functions(struct mlx5_core_dev *dev, u16 *max_sfs, u16 * void *hca_caps; int err; - if (!mlx5_core_is_ecpf(dev)) { + if (!mlx5_core_is_ecpf(dev) || + !mlx5_esw_host_functions_enabled(dev)) { *max_sfs = 0; return 0; } @@ -1696,8 +1838,7 @@ out_free: return err; } -static int mlx5_esw_vport_alloc(struct mlx5_eswitch *esw, - int index, u16 vport_num) +int mlx5_esw_vport_alloc(struct mlx5_eswitch *esw, int index, u16 vport_num) { struct mlx5_vport *vport; int err; @@ -1710,6 +1851,7 @@ static int mlx5_esw_vport_alloc(struct mlx5_eswitch *esw, vport->vport = vport_num; vport->index = index; vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO; + vport->vhca_id = MLX5_VHCA_ID_INVALID; INIT_WORK(&vport->vport_change_handler, esw_vport_change_handler); err = xa_insert(&esw->vports, vport_num, vport, GFP_KERNEL); if (err) @@ -1723,8 +1865,9 @@ insert_err: return err; } -static void mlx5_esw_vport_free(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +void mlx5_esw_vport_free(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { + esw->total_vports--; xa_erase(&esw->vports, vport->vport); kfree(vport); } @@ -1750,21 +1893,23 @@ static int mlx5_esw_vports_init(struct mlx5_eswitch *esw) xa_init(&esw->vports); - err = mlx5_esw_vport_alloc(esw, idx, MLX5_VPORT_PF); - if (err) - goto err; - if (esw->first_host_vport == MLX5_VPORT_PF) - xa_set_mark(&esw->vports, idx, MLX5_ESW_VPT_HOST_FN); - idx++; - - for (i = 0; i < mlx5_core_max_vfs(dev); i++) { - err = mlx5_esw_vport_alloc(esw, idx, idx); + if (mlx5_esw_host_functions_enabled(dev)) { + err = mlx5_esw_vport_alloc(esw, idx, MLX5_VPORT_PF); if (err) goto err; - xa_set_mark(&esw->vports, idx, MLX5_ESW_VPT_VF); - xa_set_mark(&esw->vports, idx, MLX5_ESW_VPT_HOST_FN); + if (esw->first_host_vport == MLX5_VPORT_PF) + xa_set_mark(&esw->vports, idx, MLX5_ESW_VPT_HOST_FN); idx++; + for (i = 0; i < mlx5_core_max_vfs(dev); i++) { + err = mlx5_esw_vport_alloc(esw, idx, idx); + if (err) + goto err; + xa_set_mark(&esw->vports, idx, MLX5_ESW_VPT_VF); + xa_set_mark(&esw->vports, idx, MLX5_ESW_VPT_HOST_FN); + idx++; + } } + base_sf_num = mlx5_sf_start_function_id(dev); for (i = 0; i < mlx5_sf_max_functions(dev); i++) { err = mlx5_esw_vport_alloc(esw, idx, base_sf_num + i); @@ -1806,6 +1951,9 @@ static int mlx5_esw_vports_init(struct mlx5_eswitch *esw) err = mlx5_esw_vport_alloc(esw, idx, MLX5_VPORT_UPLINK); if (err) goto err; + + /* Adjacent vports or other dynamically create vports will use this */ + esw->last_vport_idx = ++idx; return 0; err: @@ -1864,6 +2012,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) goto free_esw; esw->dev = dev; + dev->priv.eswitch = esw; esw->manager_vport = mlx5_eswitch_manager_vport(dev); esw->first_host_vport = mlx5_eswitch_first_host_vport_num(dev); @@ -1874,11 +2023,14 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) goto abort; } + err = mlx5_esw_host_functions_enabled_query(esw); + if (err) + goto abort; + err = mlx5_esw_vports_init(esw); if (err) goto abort; - dev->priv.eswitch = esw; err = esw_offloads_init(esw); if (err) goto reps_err; @@ -2410,3 +2562,11 @@ void mlx5_eswitch_unblock_ipsec(struct mlx5_core_dev *dev) dev->num_ipsec_offloads--; mutex_unlock(&esw->state_lock); } + +bool mlx5_esw_host_functions_enabled(const struct mlx5_core_dev *dev) +{ + if (!dev->priv.eswitch) + return true; + + return !dev->priv.eswitch->esw_funcs.host_funcs_disabled; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 45506ad56847..4fe285ce32aa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -197,6 +197,11 @@ static inline struct mlx5_vport *mlx5_devlink_port_vport_get(struct devlink_port return mlx5_devlink_port_get(dl_port)->vport; } +#define MLX5_VHCA_ID_INVALID (-1) + +#define MLX5_VPORT_INVAL_VHCA_ID(vport) \ + ((vport)->vhca_id == MLX5_VHCA_ID_INVALID) + struct mlx5_vport { struct mlx5_core_dev *dev; struct hlist_head uc_list[MLX5_L2_ADDR_HASH_SIZE]; @@ -209,6 +214,13 @@ struct mlx5_vport { struct vport_egress egress; u32 default_metadata; u32 metadata; + int vhca_id; + + bool adjacent; /* delegated vhca from adjacent function */ + struct { + u16 parent_pci_devfn; /* Adjacent parent PCI device function */ + u16 function_id; /* Function ID of the delegated VPort */ + } adj_info; struct mlx5_vport_info info; @@ -323,6 +335,7 @@ struct mlx5_host_work { struct mlx5_esw_functions { struct mlx5_nb nb; + bool host_funcs_disabled; u16 num_vfs; u16 num_ec_vfs; }; @@ -377,6 +390,7 @@ struct mlx5_eswitch { struct mlx5_esw_bridge_offloads *br_offloads; struct mlx5_esw_offload offloads; + u32 last_vport_idx; int mode; u16 manager_vport; u16 first_host_vport; @@ -410,6 +424,8 @@ int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 /* E-Switch API */ int mlx5_eswitch_init(struct mlx5_core_dev *dev); void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); +int mlx5_esw_vport_alloc(struct mlx5_eswitch *esw, int index, u16 vport_num); +void mlx5_esw_vport_free(struct mlx5_eswitch *esw, struct mlx5_vport *vport); #define MLX5_ESWITCH_IGNORE_NUM_VFS (-1) int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs); @@ -615,6 +631,9 @@ bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0, const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev); +void mlx5_esw_adjacent_vhcas_setup(struct mlx5_eswitch *esw); +void mlx5_esw_adjacent_vhcas_cleanup(struct mlx5_eswitch *esw); + #define MLX5_DEBUG_ESWITCH_MASK BIT(3) #define esw_info(__dev, format, ...) \ @@ -817,9 +836,17 @@ struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u1 int mlx5_esw_sf_max_hpf_functions(struct mlx5_core_dev *dev, u16 *max_sfs, u16 *sf_base_id); -int mlx5_esw_vport_vhca_id_set(struct mlx5_eswitch *esw, u16 vport_num); -void mlx5_esw_vport_vhca_id_clear(struct mlx5_eswitch *esw, u16 vport_num); +int mlx5_esw_vport_vhca_id_map(struct mlx5_eswitch *esw, + struct mlx5_vport *vport); +void mlx5_esw_vport_vhca_id_unmap(struct mlx5_eswitch *esw, + struct mlx5_vport *vport); int mlx5_eswitch_vhca_id_to_vport(struct mlx5_eswitch *esw, u16 vhca_id, u16 *vport_num); +bool mlx5_esw_vport_vhca_id(struct mlx5_eswitch *esw, u16 vportn, u16 *vhca_id); + +void mlx5_esw_offloads_rep_remove(struct mlx5_eswitch *esw, + const struct mlx5_vport *vport); +int mlx5_esw_offloads_rep_add(struct mlx5_eswitch *esw, + const struct mlx5_vport *vport); /** * struct mlx5_esw_event_info - Indicates eswitch mode changed/changing. @@ -893,6 +920,7 @@ int mlx5_esw_ipsec_vf_packet_offload_set(struct mlx5_eswitch *esw, struct mlx5_v bool enable); int mlx5_esw_ipsec_vf_packet_offload_supported(struct mlx5_core_dev *dev, u16 vport_num); +bool mlx5_esw_host_functions_enabled(const struct mlx5_core_dev *dev); #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } @@ -960,6 +988,19 @@ static inline bool mlx5_eswitch_block_ipsec(struct mlx5_core_dev *dev) } static inline void mlx5_eswitch_unblock_ipsec(struct mlx5_core_dev *dev) {} + +static inline bool +mlx5_esw_host_functions_enabled(const struct mlx5_core_dev *dev) +{ + return true; +} + +static inline bool +mlx5_esw_vport_vhca_id(struct mlx5_eswitch *esw, u16 vportn, u16 *vhca_id) +{ + return -EOPNOTSUPP; +} + #endif /* CONFIG_MLX5_ESWITCH */ #endif /* __MLX5_ESWITCH_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index bee906661282..d57f86d297ab 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1213,7 +1213,8 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); - if (mlx5_core_is_ecpf_esw_manager(peer_dev)) { + if (mlx5_core_is_ecpf_esw_manager(peer_dev) && + mlx5_esw_host_functions_enabled(peer_dev)) { peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_PF); esw_set_peer_miss_rule_source_port(esw, peer_esw, spec, MLX5_VPORT_PF); @@ -1239,19 +1240,21 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, flows[peer_vport->index] = flow; } - mlx5_esw_for_each_vf_vport(peer_esw, i, peer_vport, - mlx5_core_max_vfs(peer_dev)) { - esw_set_peer_miss_rule_source_port(esw, - peer_esw, - spec, peer_vport->vport); + if (mlx5_esw_host_functions_enabled(esw->dev)) { + mlx5_esw_for_each_vf_vport(peer_esw, i, peer_vport, + mlx5_core_max_vfs(peer_dev)) { + esw_set_peer_miss_rule_source_port(esw, peer_esw, + spec, + peer_vport->vport); - flow = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw), - spec, &flow_act, &dest, 1); - if (IS_ERR(flow)) { - err = PTR_ERR(flow); - goto add_vf_flow_err; + flow = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw), + spec, &flow_act, &dest, 1); + if (IS_ERR(flow)) { + err = PTR_ERR(flow); + goto add_vf_flow_err; + } + flows[peer_vport->index] = flow; } - flows[peer_vport->index] = flow; } if (mlx5_core_ec_sriov_enabled(peer_dev)) { @@ -1301,7 +1304,9 @@ add_vf_flow_err: mlx5_del_flow_rules(flows[peer_vport->index]); } add_ecpf_flow_err: - if (mlx5_core_is_ecpf_esw_manager(peer_dev)) { + + if (mlx5_core_is_ecpf_esw_manager(peer_dev) && + mlx5_esw_host_functions_enabled(peer_dev)) { peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_PF); mlx5_del_flow_rules(flows[peer_vport->index]); } @@ -2373,7 +2378,20 @@ static int esw_offloads_start(struct mlx5_eswitch *esw, return 0; } -static int mlx5_esw_offloads_rep_init(struct mlx5_eswitch *esw, const struct mlx5_vport *vport) +void mlx5_esw_offloads_rep_remove(struct mlx5_eswitch *esw, + const struct mlx5_vport *vport) +{ + struct mlx5_eswitch_rep *rep = xa_load(&esw->offloads.vport_reps, + vport->vport); + + if (!rep) + return; + xa_erase(&esw->offloads.vport_reps, vport->vport); + kfree(rep); +} + +int mlx5_esw_offloads_rep_add(struct mlx5_eswitch *esw, + const struct mlx5_vport *vport) { struct mlx5_eswitch_rep *rep; int rep_type; @@ -2385,9 +2403,19 @@ static int mlx5_esw_offloads_rep_init(struct mlx5_eswitch *esw, const struct mlx rep->vport = vport->vport; rep->vport_index = vport->index; - for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) - atomic_set(&rep->rep_data[rep_type].state, REP_UNREGISTERED); - + for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) { + if (!esw->offloads.rep_ops[rep_type]) { + atomic_set(&rep->rep_data[rep_type].state, + REP_UNREGISTERED); + continue; + } + /* Dynamic/delegated vports add their representors after + * mlx5_eswitch_register_vport_reps, so mark them as registered + * for them to be loaded later with the others. + */ + rep->esw = esw; + atomic_set(&rep->rep_data[rep_type].state, REP_REGISTERED); + } err = xa_insert(&esw->offloads.vport_reps, rep->vport, rep, GFP_KERNEL); if (err) goto insert_err; @@ -2425,7 +2453,7 @@ static int esw_offloads_init_reps(struct mlx5_eswitch *esw) xa_init(&esw->offloads.vport_reps); mlx5_esw_for_each_vport(esw, i, vport) { - err = mlx5_esw_offloads_rep_init(esw, vport); + err = mlx5_esw_offloads_rep_add(esw, vport); if (err) goto err; } @@ -3533,6 +3561,8 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) int err; mutex_init(&esw->offloads.termtbl_mutex); + mlx5_esw_adjacent_vhcas_setup(esw); + err = mlx5_rdma_enable_roce(esw->dev); if (err) goto err_roce; @@ -3597,6 +3627,7 @@ err_vport_metadata: err_metadata: mlx5_rdma_disable_roce(esw->dev); err_roce: + mlx5_esw_adjacent_vhcas_cleanup(esw); mutex_destroy(&esw->offloads.termtbl_mutex); return err; } @@ -3630,6 +3661,7 @@ void esw_offloads_disable(struct mlx5_eswitch *esw) mapping_destroy(esw->offloads.reg_c0_obj_pool); esw_offloads_metadata_uninit(esw); mlx5_rdma_disable_roce(esw->dev); + mlx5_esw_adjacent_vhcas_cleanup(esw); mutex_destroy(&esw->offloads.termtbl_mutex); } @@ -4059,7 +4091,8 @@ mlx5_eswitch_vport_has_rep(const struct mlx5_eswitch *esw, u16 vport_num) { /* Currently, only ECPF based device has representor for host PF. */ if (vport_num == MLX5_VPORT_PF && - !mlx5_core_is_ecpf_esw_manager(esw->dev)) + (!mlx5_core_is_ecpf_esw_manager(esw->dev) || + !mlx5_esw_host_functions_enabled(esw->dev))) return false; if (vport_num == MLX5_VPORT_ECPF && @@ -4161,23 +4194,28 @@ u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw, } EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match); -int mlx5_esw_vport_vhca_id_set(struct mlx5_eswitch *esw, u16 vport_num) +int mlx5_esw_vport_vhca_id_map(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) { u16 *old_entry, *vhca_map_entry, vhca_id; - int err; - err = mlx5_vport_get_vhca_id(esw->dev, vport_num, &vhca_id); - if (err) { - esw_warn(esw->dev, "Getting vhca_id for vport failed (vport=%u,err=%d)\n", - vport_num, err); - return err; + if (WARN_ONCE(MLX5_VPORT_INVAL_VHCA_ID(vport), + "vport %d vhca_id is not set", vport->vport)) { + int err; + + err = mlx5_vport_get_vhca_id(vport->dev, vport->vport, + &vhca_id); + if (err) + return err; + vport->vhca_id = vhca_id; } + vhca_id = vport->vhca_id; vhca_map_entry = kmalloc(sizeof(*vhca_map_entry), GFP_KERNEL); if (!vhca_map_entry) return -ENOMEM; - *vhca_map_entry = vport_num; + *vhca_map_entry = vport->vport; old_entry = xa_store(&esw->offloads.vhca_map, vhca_id, vhca_map_entry, GFP_KERNEL); if (xa_is_err(old_entry)) { kfree(vhca_map_entry); @@ -4187,17 +4225,12 @@ int mlx5_esw_vport_vhca_id_set(struct mlx5_eswitch *esw, u16 vport_num) return 0; } -void mlx5_esw_vport_vhca_id_clear(struct mlx5_eswitch *esw, u16 vport_num) +void mlx5_esw_vport_vhca_id_unmap(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) { - u16 *vhca_map_entry, vhca_id; - int err; - - err = mlx5_vport_get_vhca_id(esw->dev, vport_num, &vhca_id); - if (err) - esw_warn(esw->dev, "Getting vhca_id for vport failed (vport=%hu,err=%d)\n", - vport_num, err); + u16 *vhca_map_entry; - vhca_map_entry = xa_erase(&esw->offloads.vhca_map, vhca_id); + vhca_map_entry = xa_erase(&esw->offloads.vhca_map, vport->vhca_id); kfree(vhca_map_entry); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index cb165085a4c1..4308e89802f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -2793,30 +2793,32 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, } EXPORT_SYMBOL(mlx5_get_flow_namespace); +struct mlx5_vport_acl_root_ns { + u16 vport_idx; + struct mlx5_flow_root_namespace *root_ns; +}; + struct mlx5_flow_namespace * mlx5_get_flow_vport_namespace(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type, int vport_idx) { struct mlx5_flow_steering *steering = dev->priv.steering; + struct mlx5_vport_acl_root_ns *vport_ns; if (!steering) return NULL; switch (type) { case MLX5_FLOW_NAMESPACE_ESW_EGRESS: - if (vport_idx >= steering->esw_egress_acl_vports) - return NULL; - if (steering->esw_egress_root_ns && - steering->esw_egress_root_ns[vport_idx]) - return &steering->esw_egress_root_ns[vport_idx]->ns; + vport_ns = xa_load(&steering->esw_egress_root_ns, vport_idx); + if (vport_ns) + return &vport_ns->root_ns->ns; else return NULL; case MLX5_FLOW_NAMESPACE_ESW_INGRESS: - if (vport_idx >= steering->esw_ingress_acl_vports) - return NULL; - if (steering->esw_ingress_root_ns && - steering->esw_ingress_root_ns[vport_idx]) - return &steering->esw_ingress_root_ns[vport_idx]->ns; + vport_ns = xa_load(&steering->esw_ingress_root_ns, vport_idx); + if (vport_ns) + return &vport_ns->root_ns->ns; else return NULL; case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX: @@ -3575,118 +3577,102 @@ out_err: return err; } -static int init_egress_acl_root_ns(struct mlx5_flow_steering *steering, int vport) +static void +mlx5_fs_remove_vport_acl_root_ns(struct xarray *esw_acl_root_ns, u16 vport_idx) { - struct fs_prio *prio; - - steering->esw_egress_root_ns[vport] = create_root_ns(steering, FS_FT_ESW_EGRESS_ACL); - if (!steering->esw_egress_root_ns[vport]) - return -ENOMEM; + struct mlx5_vport_acl_root_ns *vport_ns; - /* create 1 prio*/ - prio = fs_create_prio(&steering->esw_egress_root_ns[vport]->ns, 0, 1); - return PTR_ERR_OR_ZERO(prio); + vport_ns = xa_erase(esw_acl_root_ns, vport_idx); + if (vport_ns) { + cleanup_root_ns(vport_ns->root_ns); + kfree(vport_ns); + } } -static int init_ingress_acl_root_ns(struct mlx5_flow_steering *steering, int vport) +static int +mlx5_fs_add_vport_acl_root_ns(struct mlx5_flow_steering *steering, + struct xarray *esw_acl_root_ns, + enum fs_flow_table_type table_type, + u16 vport_idx) { + struct mlx5_vport_acl_root_ns *vport_ns; struct fs_prio *prio; + int err; - steering->esw_ingress_root_ns[vport] = create_root_ns(steering, FS_FT_ESW_INGRESS_ACL); - if (!steering->esw_ingress_root_ns[vport]) - return -ENOMEM; + /* sanity check, intended xarrays are used */ + if (WARN_ON(esw_acl_root_ns != &steering->esw_egress_root_ns && + esw_acl_root_ns != &steering->esw_ingress_root_ns)) + return -EINVAL; - /* create 1 prio*/ - prio = fs_create_prio(&steering->esw_ingress_root_ns[vport]->ns, 0, 1); - return PTR_ERR_OR_ZERO(prio); -} + if (table_type != FS_FT_ESW_EGRESS_ACL && + table_type != FS_FT_ESW_INGRESS_ACL) { + mlx5_core_err(steering->dev, + "Invalid table type %d for egress/ingress ACLs\n", + table_type); + return -EINVAL; + } -int mlx5_fs_egress_acls_init(struct mlx5_core_dev *dev, int total_vports) -{ - struct mlx5_flow_steering *steering = dev->priv.steering; - int err; - int i; + if (xa_load(esw_acl_root_ns, vport_idx)) + return -EEXIST; - steering->esw_egress_root_ns = - kcalloc(total_vports, - sizeof(*steering->esw_egress_root_ns), - GFP_KERNEL); - if (!steering->esw_egress_root_ns) + vport_ns = kzalloc(sizeof(*vport_ns), GFP_KERNEL); + if (!vport_ns) return -ENOMEM; - for (i = 0; i < total_vports; i++) { - err = init_egress_acl_root_ns(steering, i); - if (err) - goto cleanup_root_ns; + vport_ns->root_ns = create_root_ns(steering, table_type); + if (!vport_ns->root_ns) { + err = -ENOMEM; + goto kfree_vport_ns; + } + + /* create 1 prio*/ + prio = fs_create_prio(&vport_ns->root_ns->ns, 0, 1); + if (IS_ERR(prio)) { + err = PTR_ERR(prio); + goto cleanup_root_ns; } - steering->esw_egress_acl_vports = total_vports; + + vport_ns->vport_idx = vport_idx; + err = xa_insert(esw_acl_root_ns, vport_idx, vport_ns, GFP_KERNEL); + if (err) + goto cleanup_root_ns; return 0; cleanup_root_ns: - for (i--; i >= 0; i--) - cleanup_root_ns(steering->esw_egress_root_ns[i]); - kfree(steering->esw_egress_root_ns); - steering->esw_egress_root_ns = NULL; + cleanup_root_ns(vport_ns->root_ns); +kfree_vport_ns: + kfree(vport_ns); return err; } -void mlx5_fs_egress_acls_cleanup(struct mlx5_core_dev *dev) +int mlx5_fs_vport_egress_acl_ns_add(struct mlx5_flow_steering *steering, + u16 vport_idx) { - struct mlx5_flow_steering *steering = dev->priv.steering; - int i; - - if (!steering->esw_egress_root_ns) - return; - - for (i = 0; i < steering->esw_egress_acl_vports; i++) - cleanup_root_ns(steering->esw_egress_root_ns[i]); - - kfree(steering->esw_egress_root_ns); - steering->esw_egress_root_ns = NULL; + return mlx5_fs_add_vport_acl_root_ns(steering, + &steering->esw_egress_root_ns, + FS_FT_ESW_EGRESS_ACL, vport_idx); } -int mlx5_fs_ingress_acls_init(struct mlx5_core_dev *dev, int total_vports) +int mlx5_fs_vport_ingress_acl_ns_add(struct mlx5_flow_steering *steering, + u16 vport_idx) { - struct mlx5_flow_steering *steering = dev->priv.steering; - int err; - int i; - - steering->esw_ingress_root_ns = - kcalloc(total_vports, - sizeof(*steering->esw_ingress_root_ns), - GFP_KERNEL); - if (!steering->esw_ingress_root_ns) - return -ENOMEM; - - for (i = 0; i < total_vports; i++) { - err = init_ingress_acl_root_ns(steering, i); - if (err) - goto cleanup_root_ns; - } - steering->esw_ingress_acl_vports = total_vports; - return 0; - -cleanup_root_ns: - for (i--; i >= 0; i--) - cleanup_root_ns(steering->esw_ingress_root_ns[i]); - kfree(steering->esw_ingress_root_ns); - steering->esw_ingress_root_ns = NULL; - return err; + return mlx5_fs_add_vport_acl_root_ns(steering, + &steering->esw_ingress_root_ns, + FS_FT_ESW_INGRESS_ACL, vport_idx); } -void mlx5_fs_ingress_acls_cleanup(struct mlx5_core_dev *dev) +void mlx5_fs_vport_egress_acl_ns_remove(struct mlx5_flow_steering *steering, + int vport_idx) { - struct mlx5_flow_steering *steering = dev->priv.steering; - int i; - - if (!steering->esw_ingress_root_ns) - return; - - for (i = 0; i < steering->esw_ingress_acl_vports; i++) - cleanup_root_ns(steering->esw_ingress_root_ns[i]); + mlx5_fs_remove_vport_acl_root_ns(&steering->esw_egress_root_ns, + vport_idx); +} - kfree(steering->esw_ingress_root_ns); - steering->esw_ingress_root_ns = NULL; +void mlx5_fs_vport_ingress_acl_ns_remove(struct mlx5_flow_steering *steering, + int vport_idx) +{ + mlx5_fs_remove_vport_acl_root_ns(&steering->esw_ingress_root_ns, + vport_idx); } u32 mlx5_fs_get_capabilities(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type) @@ -3818,6 +3804,11 @@ void mlx5_fs_core_cleanup(struct mlx5_core_dev *dev) { struct mlx5_flow_steering *steering = dev->priv.steering; + WARN_ON(!xa_empty(&steering->esw_egress_root_ns)); + WARN_ON(!xa_empty(&steering->esw_ingress_root_ns)); + xa_destroy(&steering->esw_egress_root_ns); + xa_destroy(&steering->esw_ingress_root_ns); + cleanup_root_ns(steering->root_ns); cleanup_fdb_root_ns(steering); cleanup_root_ns(steering->port_sel_root_ns); @@ -3908,6 +3899,8 @@ int mlx5_fs_core_init(struct mlx5_core_dev *dev) goto err; } + xa_init(&steering->esw_egress_root_ns); + xa_init(&steering->esw_ingress_root_ns); return 0; err: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 500826229b0b..7877d9a2118d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -151,16 +151,14 @@ struct mlx5_flow_steering { struct mlx5_flow_root_namespace *root_ns; struct mlx5_flow_root_namespace *fdb_root_ns; struct mlx5_flow_namespace **fdb_sub_ns; - struct mlx5_flow_root_namespace **esw_egress_root_ns; - struct mlx5_flow_root_namespace **esw_ingress_root_ns; + struct xarray esw_egress_root_ns; + struct xarray esw_ingress_root_ns; struct mlx5_flow_root_namespace *sniffer_tx_root_ns; struct mlx5_flow_root_namespace *sniffer_rx_root_ns; struct mlx5_flow_root_namespace *rdma_rx_root_ns; struct mlx5_flow_root_namespace *rdma_tx_root_ns; struct mlx5_flow_root_namespace *egress_root_ns; struct mlx5_flow_root_namespace *port_sel_root_ns; - int esw_egress_acl_vports; - int esw_ingress_acl_vports; struct mlx5_flow_root_namespace **rdma_transport_rx_root_ns; struct mlx5_flow_root_namespace **rdma_transport_tx_root_ns; int rdma_transport_rx_vports; @@ -378,10 +376,14 @@ void mlx5_fs_core_free(struct mlx5_core_dev *dev); int mlx5_fs_core_init(struct mlx5_core_dev *dev); void mlx5_fs_core_cleanup(struct mlx5_core_dev *dev); -int mlx5_fs_egress_acls_init(struct mlx5_core_dev *dev, int total_vports); -void mlx5_fs_egress_acls_cleanup(struct mlx5_core_dev *dev); -int mlx5_fs_ingress_acls_init(struct mlx5_core_dev *dev, int total_vports); -void mlx5_fs_ingress_acls_cleanup(struct mlx5_core_dev *dev); +int mlx5_fs_vport_egress_acl_ns_add(struct mlx5_flow_steering *steering, + u16 vport_idx); +int mlx5_fs_vport_ingress_acl_ns_add(struct mlx5_flow_steering *steering, + u16 vport_idx); +void mlx5_fs_vport_egress_acl_ns_remove(struct mlx5_flow_steering *steering, + int vport_idx); +void mlx5_fs_vport_ingress_acl_ns_remove(struct mlx5_flow_steering *steering, + int vport_idx); u32 mlx5_fs_get_capabilities(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index 57476487e31f..eeb4437975f2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -294,6 +294,12 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) return err; } + if (MLX5_CAP_GEN(dev, psp)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_PSP); + if (err) + return err; + } + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index cf7a1edd0530..b63c5a221eb9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -669,54 +669,61 @@ static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work) } } +#define MLX5_FW_REPORTER_ECPF_GRACEFUL_PERIOD 180000 +#define MLX5_FW_REPORTER_PF_GRACEFUL_PERIOD 60000 +#define MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD 30000 +#define MLX5_FW_REPORTER_DEFAULT_GRACEFUL_PERIOD \ + MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD + +static +const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ecpf_ops = { + .name = "fw_fatal", + .recover = mlx5_fw_fatal_reporter_recover, + .dump = mlx5_fw_fatal_reporter_dump, + .default_graceful_period = + MLX5_FW_REPORTER_ECPF_GRACEFUL_PERIOD, +}; + static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_pf_ops = { .name = "fw_fatal", .recover = mlx5_fw_fatal_reporter_recover, .dump = mlx5_fw_fatal_reporter_dump, + .default_graceful_period = MLX5_FW_REPORTER_PF_GRACEFUL_PERIOD, }; static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = { .name = "fw_fatal", .recover = mlx5_fw_fatal_reporter_recover, + .default_graceful_period = + MLX5_FW_REPORTER_DEFAULT_GRACEFUL_PERIOD, }; -#define MLX5_FW_REPORTER_ECPF_GRACEFUL_PERIOD 180000 -#define MLX5_FW_REPORTER_PF_GRACEFUL_PERIOD 60000 -#define MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD 30000 -#define MLX5_FW_REPORTER_DEFAULT_GRACEFUL_PERIOD MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD - void mlx5_fw_reporters_create(struct mlx5_core_dev *dev) { const struct devlink_health_reporter_ops *fw_fatal_ops; struct mlx5_core_health *health = &dev->priv.health; const struct devlink_health_reporter_ops *fw_ops; struct devlink *devlink = priv_to_devlink(dev); - u64 grace_period; - fw_fatal_ops = &mlx5_fw_fatal_reporter_pf_ops; fw_ops = &mlx5_fw_reporter_pf_ops; if (mlx5_core_is_ecpf(dev)) { - grace_period = MLX5_FW_REPORTER_ECPF_GRACEFUL_PERIOD; + fw_fatal_ops = &mlx5_fw_fatal_reporter_ecpf_ops; } else if (mlx5_core_is_pf(dev)) { - grace_period = MLX5_FW_REPORTER_PF_GRACEFUL_PERIOD; + fw_fatal_ops = &mlx5_fw_fatal_reporter_pf_ops; } else { /* VF or SF */ - grace_period = MLX5_FW_REPORTER_DEFAULT_GRACEFUL_PERIOD; fw_fatal_ops = &mlx5_fw_fatal_reporter_ops; fw_ops = &mlx5_fw_reporter_ops; } - health->fw_reporter = - devl_health_reporter_create(devlink, fw_ops, 0, dev); + health->fw_reporter = devl_health_reporter_create(devlink, fw_ops, dev); if (IS_ERR(health->fw_reporter)) mlx5_core_warn(dev, "Failed to create fw reporter, err = %ld\n", PTR_ERR(health->fw_reporter)); - health->fw_fatal_reporter = - devl_health_reporter_create(devlink, - fw_fatal_ops, - grace_period, - dev); + health->fw_fatal_reporter = devl_health_reporter_create(devlink, + fw_fatal_ops, + dev); if (IS_ERR(health->fw_fatal_reporter)) mlx5_core_warn(dev, "Failed to create fw fatal reporter, err = %ld\n", PTR_ERR(health->fw_fatal_reporter)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 8517d4e5d5ef..0951c7cc1b5f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1798,6 +1798,7 @@ static const int types[] = { MLX5_CAP_VDPA_EMULATION, MLX5_CAP_IPSEC, MLX5_CAP_PORT_SELECTION, + MLX5_CAP_PSP, MLX5_CAP_MACSEC, MLX5_CAP_ADV_VIRTUALIZATION, MLX5_CAP_CRYPTO, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 9d3504f5abfa..082259b56816 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -449,8 +449,6 @@ int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap #define mlx5_vport_get_other_func_general_cap(dev, vport, out) \ mlx5_vport_get_other_func_cap(dev, vport, out, MLX5_CAP_GENERAL) -int mlx5_vport_get_vhca_id(struct mlx5_core_dev *dev, u16 vport, u16 *vhca_id); - static inline u32 mlx5_sriov_get_vf_total_msix(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/diag/dev_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/diag/dev_tracepoint.h index 0537de86f981..9b0f44253f33 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/diag/dev_tracepoint.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/diag/dev_tracepoint.h @@ -28,7 +28,7 @@ DECLARE_EVENT_CLASS(mlx5_sf_dev_template, __entry->hw_fn_id = sfdev->fn_id; __entry->sfnum = sfdev->sfnum; ), - TP_printk("(%s) sfdev=%pK aux_id=%d hw_id=0x%x sfnum=%u\n", + TP_printk("(%s) sfdev=%p aux_id=%d hw_id=0x%x sfnum=%u\n", __get_str(devname), __entry->sfdev, __entry->aux_id, __entry->hw_fn_id, __entry->sfnum) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c index 0bdcab2e5cf3..f22eaf506d28 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c @@ -1200,34 +1200,20 @@ out: int mlx5hws_cmd_query_gvmi(struct mlx5_core_dev *mdev, bool other_function, u16 vport_number, u16 *gvmi) { - bool ec_vf_func = other_function ? mlx5_core_is_ec_vf_vport(mdev, vport_number) : false; - u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {}; - int out_size; - void *out; int err; - out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out); - out = kzalloc(out_size, GFP_KERNEL); - if (!out) - return -ENOMEM; - - MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); - MLX5_SET(query_hca_cap_in, in, other_function, other_function); - MLX5_SET(query_hca_cap_in, in, function_id, - mlx5_vport_to_func_id(mdev, vport_number, ec_vf_func)); - MLX5_SET(query_hca_cap_in, in, ec_vf_function, ec_vf_func); - MLX5_SET(query_hca_cap_in, in, op_mod, - MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1 | HCA_CAP_OPMOD_GET_CUR); + if (!other_function) { + /* self vhca_id */ + *gvmi = MLX5_CAP_GEN(mdev, vhca_id); + return 0; + } - err = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out); + err = mlx5_vport_get_vhca_id(mdev, vport_number, gvmi); if (err) { - kfree(out); + mlx5_core_err(mdev, "Failed to get vport vhca id for vport %d\n", + vport_number); return err; } - *gvmi = MLX5_GET(query_hca_cap_out, out, capability.cmd_hca_cap.vhca_id); - - kfree(out); - return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c index c6436c3a7a83..c4bb6967f74d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c @@ -1280,7 +1280,7 @@ hws_definer_conv_misc2(struct mlx5hws_definer_conv_data *cd, struct mlx5hws_definer_fc *fc = cd->fc; struct mlx5hws_definer_fc *curr_fc; - if (HWS_IS_FLD_SET_SZ(match_param, misc_parameters_2.reserved_at_1a0, 0x8) || + if (HWS_IS_FLD_SET_SZ(match_param, misc_parameters_2.psp_syndrome, 0x8) || HWS_IS_FLD_SET_SZ(match_param, misc_parameters_2.ipsec_next_header, 0x8) || HWS_IS_FLD_SET_SZ(match_param, misc_parameters_2.reserved_at_1c0, 0x40) || diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_cmd.c index baefb9a3fa05..1ebb2b15c080 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_cmd.c @@ -2,6 +2,7 @@ /* Copyright (c) 2019 Mellanox Technologies. */ #include "dr_types.h" +#include "eswitch.h" int mlx5dr_cmd_query_esw_vport_context(struct mlx5_core_dev *mdev, bool other_vport, @@ -34,34 +35,21 @@ int mlx5dr_cmd_query_esw_vport_context(struct mlx5_core_dev *mdev, int mlx5dr_cmd_query_gvmi(struct mlx5_core_dev *mdev, bool other_vport, u16 vport_number, u16 *gvmi) { - bool ec_vf_func = other_vport ? mlx5_core_is_ec_vf_vport(mdev, vport_number) : false; - u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {}; - int out_size; - void *out; int err; - out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out); - out = kzalloc(out_size, GFP_KERNEL); - if (!out) - return -ENOMEM; - - MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); - MLX5_SET(query_hca_cap_in, in, other_function, other_vport); - MLX5_SET(query_hca_cap_in, in, function_id, mlx5_vport_to_func_id(mdev, vport_number, ec_vf_func)); - MLX5_SET(query_hca_cap_in, in, ec_vf_function, ec_vf_func); - MLX5_SET(query_hca_cap_in, in, op_mod, - MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1 | - HCA_CAP_OPMOD_GET_CUR); + if (!other_vport) { + /* self vhca_id */ + *gvmi = MLX5_CAP_GEN(mdev, vhca_id); + return 0; + } - err = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out); + err = mlx5_vport_get_vhca_id(mdev, vport_number, gvmi); if (err) { - kfree(out); + mlx5_core_err(mdev, "Failed to get vport vhca id for vport %d\n", + vport_number); return err; } - *gvmi = MLX5_GET(query_hca_cap_out, out, capability.cmd_hca_cap.vhca_id); - - kfree(out); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index da5c24fc7b30..2ed2e530b07d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -36,6 +36,7 @@ #include <linux/mlx5/vport.h> #include <linux/mlx5/eswitch.h> #include "mlx5_core.h" +#include "eswitch.h" #include "sf/sf.h" /* Mutex to hold while enabling or disabling RoCE */ @@ -1189,18 +1190,44 @@ u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev) } EXPORT_SYMBOL_GPL(mlx5_query_nic_system_image_guid); +static bool mlx5_vport_use_vhca_id_as_func_id(struct mlx5_core_dev *dev, + u16 vport_num, u16 *vhca_id) +{ + if (!MLX5_CAP_GEN_2(dev, function_id_type_vhca_id)) + return false; + + return mlx5_esw_vport_vhca_id(dev->priv.eswitch, vport_num, vhca_id); +} + int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 vport, void *out, u16 opmod) { - bool ec_vf_func = mlx5_core_is_ec_vf_vport(dev, vport); u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)] = {}; + u16 vhca_id = 0, function_id = 0; + bool ec_vf_func = false; + + /* if this vport is referring to a vport on the ec PF (embedded cpu ) + * let the FW know which domain we are querying since vport numbers or + * function_ids are not unique across the different PF domains, + * unless we use vhca_id as the function_id below. + */ + ec_vf_func = mlx5_core_is_ec_vf_vport(dev, vport); + function_id = mlx5_vport_to_func_id(dev, vport, ec_vf_func); + + if (mlx5_vport_use_vhca_id_as_func_id(dev, vport, &vhca_id)) { + MLX5_SET(query_hca_cap_in, in, function_id_type, 1); + function_id = vhca_id; + ec_vf_func = false; + mlx5_core_dbg(dev, "%s using vhca_id as function_id for vport %d vhca_id 0x%x\n", + __func__, vport, vhca_id); + } opmod = (opmod << 1) | (HCA_CAP_OPMOD_GET_MAX & 0x01); MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); MLX5_SET(query_hca_cap_in, in, op_mod, opmod); - MLX5_SET(query_hca_cap_in, in, function_id, mlx5_vport_to_func_id(dev, vport, ec_vf_func)); MLX5_SET(query_hca_cap_in, in, other_function, true); MLX5_SET(query_hca_cap_in, in, ec_vf_function, ec_vf_func); + MLX5_SET(query_hca_cap_in, in, function_id, function_id); return mlx5_cmd_exec_inout(dev, query_hca_cap, in, out); } EXPORT_SYMBOL_GPL(mlx5_vport_get_other_func_cap); @@ -1212,7 +1239,9 @@ int mlx5_vport_get_vhca_id(struct mlx5_core_dev *dev, u16 vport, u16 *vhca_id) void *hca_caps; int err; - *vhca_id = 0; + /* try get vhca_id via eswitch */ + if (mlx5_esw_vport_vhca_id(dev->priv.eswitch, vport, vhca_id)) + return 0; query_ctx = kzalloc(query_out_sz, GFP_KERNEL); if (!query_ctx) @@ -1229,12 +1258,14 @@ out_free: kfree(query_ctx); return err; } +EXPORT_SYMBOL_GPL(mlx5_vport_get_vhca_id); int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap, u16 vport, u16 opmod) { - bool ec_vf_func = mlx5_core_is_ec_vf_vport(dev, vport); int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); + u16 vhca_id = 0, function_id = 0; + bool ec_vf_func = false; void *set_hca_cap; void *set_ctx; int ret; @@ -1243,14 +1274,29 @@ int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap if (!set_ctx) return -ENOMEM; + /* if this vport is referring to a vport on the ec PF (embedded cpu ) + * let the FW know which domain we are querying since vport numbers or + * function_ids are not unique across the different PF domains, + * unless we use vhca_id as the function_id below. + */ + ec_vf_func = mlx5_core_is_ec_vf_vport(dev, vport); + function_id = mlx5_vport_to_func_id(dev, vport, ec_vf_func); + + if (mlx5_vport_use_vhca_id_as_func_id(dev, vport, &vhca_id)) { + MLX5_SET(set_hca_cap_in, set_ctx, function_id_type, 1); + function_id = vhca_id; + ec_vf_func = false; + mlx5_core_dbg(dev, "%s using vhca_id as function_id for vport %d vhca_id 0x%x\n", + __func__, vport, vhca_id); + } + MLX5_SET(set_hca_cap_in, set_ctx, opcode, MLX5_CMD_OP_SET_HCA_CAP); MLX5_SET(set_hca_cap_in, set_ctx, op_mod, opmod << 1); set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability); memcpy(set_hca_cap, hca_cap, MLX5_ST_SZ_BYTES(cmd_hca_cap)); - MLX5_SET(set_hca_cap_in, set_ctx, function_id, - mlx5_vport_to_func_id(dev, vport, ec_vf_func)); MLX5_SET(set_hca_cap_in, set_ctx, other_function, true); MLX5_SET(set_hca_cap_in, set_ctx, ec_vf_function, ec_vf_func); + MLX5_SET(set_hca_cap_in, set_ctx, function_id, function_id); ret = mlx5_cmd_exec_in(dev, set_hca_cap, set_ctx); kfree(set_ctx); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 2bb2b77351bd..980f3223f124 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -2043,7 +2043,7 @@ static int mlxsw_core_health_init(struct mlxsw_core *mlxsw_core) return 0; fw_fatal = devl_health_reporter_create(devlink, &mlxsw_core_health_fw_fatal_ops, - 0, mlxsw_core); + mlxsw_core); if (IS_ERR(fw_fatal)) { dev_err(mlxsw_core->bus_info->dev, "Failed to create fw fatal reporter"); return PTR_ERR(fw_fatal); diff --git a/drivers/net/ethernet/meta/fbnic/fbnic.h b/drivers/net/ethernet/meta/fbnic/fbnic.h index c376e06880c9..311c7dda911a 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic.h @@ -84,9 +84,6 @@ struct fbnic_dev { /* Local copy of hardware statistics */ struct fbnic_hw_stats hw_stats; - /* Lock protecting access to hw_stats */ - spinlock_t hw_stats_lock; - struct fbnic_fw_log fw_log; }; diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_csr.h b/drivers/net/ethernet/meta/fbnic/fbnic_csr.h index a81db842aa53..e2fffe1597e9 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_csr.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_csr.h @@ -790,6 +790,21 @@ enum { #define FBNIC_CSR_END_PCS 0x10668 /* CSR section delimiter */ #define FBNIC_CSR_START_RSFEC 0x10800 /* CSR section delimiter */ + +/* We have 4 RSFEC engines present in our part, however we are only using 1. + * As such only CCW(0) and NCCW(0) will never be non-zero and the other + * registers can be ignored. + */ +#define FBNIC_RSFEC_CCW_LO(n) (0x10802 + 8 * (n)) /* 0x42008 + 32*n */ +#define FBNIC_RSFEC_CCW_HI(n) (0x10803 + 8 * (n)) /* 0x4200c + 32*n */ +#define FBNIC_RSFEC_NCCW_LO(n) (0x10804 + 8 * (n)) /* 0x42010 + 32*n */ +#define FBNIC_RSFEC_NCCW_HI(n) (0x10805 + 8 * (n)) /* 0x42014 + 32*n */ + +#define FBNIC_PCS_MAX_LANES 4 +#define FBNIC_PCS_SYMBLERR_LO(n) \ + (0x10880 + 2 * (n)) /* 0x42200 + 8*n */ +#define FBNIC_PCS_SYMBLERR_HI(n) \ + (0x10881 + 2 * (n)) /* 0x42204 + 8*n */ #define FBNIC_CSR_END_RSFEC 0x108c8 /* CSR section delimiter */ /* MAC MAC registers (ASIC only) */ @@ -829,6 +844,10 @@ enum { #define FBNIC_CSR_END_SIG 0x1184e /* CSR section delimiter */ #define FBNIC_CSR_START_MAC_STAT 0x11a00 +#define FBNIC_MAC_STAT_RX_XOFF_STB_L 0x11a00 /* 0x46800 */ +#define FBNIC_MAC_STAT_RX_XOFF_STB_H 0x11a01 /* 0x46804 */ +#define FBNIC_MAC_STAT_TX_XOFF_STB_L 0x11a04 /* 0x46810 */ +#define FBNIC_MAC_STAT_TX_XOFF_STB_H 0x11a05 /* 0x46814 */ #define FBNIC_MAC_STAT_RX_BYTE_COUNT_L 0x11a08 /* 0x46820 */ #define FBNIC_MAC_STAT_RX_BYTE_COUNT_H 0x11a09 /* 0x46824 */ #define FBNIC_MAC_STAT_RX_ALIGN_ERROR_L 0x11a0a /* 0x46828 */ diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c index dc7ba8d5fc43..b4ff98ee2051 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c @@ -2,6 +2,7 @@ /* Copyright (c) Meta Platforms, Inc. and affiliates. */ #include <linux/ethtool.h> +#include <linux/ethtool_netlink.h> #include <linux/netdevice.h> #include <linux/pci.h> #include <net/ipv6.h> @@ -111,6 +112,20 @@ static const struct fbnic_stat fbnic_gstrings_hw_q_stats[] = { FBNIC_HW_RXB_DEQUEUE_STATS_LEN * FBNIC_RXB_DEQUEUE_INDICES + \ FBNIC_HW_Q_STATS_LEN * FBNIC_MAX_QUEUES) +#define FBNIC_QUEUE_STAT(name, stat) \ + FBNIC_STAT_FIELDS(fbnic_ring, name, stat) + +static const struct fbnic_stat fbnic_gstrings_xdp_stats[] = { + FBNIC_QUEUE_STAT("xdp_tx_queue_%u_packets", stats.packets), + FBNIC_QUEUE_STAT("xdp_tx_queue_%u_bytes", stats.bytes), + FBNIC_QUEUE_STAT("xdp_tx_queue_%u_dropped", stats.dropped), +}; + +#define FBNIC_XDP_STATS_LEN ARRAY_SIZE(fbnic_gstrings_xdp_stats) + +#define FBNIC_STATS_LEN \ + (FBNIC_HW_STATS_LEN + FBNIC_XDP_STATS_LEN * FBNIC_MAX_XDPQS) + static void fbnic_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) { @@ -160,6 +175,7 @@ static void fbnic_clone_swap_cfg(struct fbnic_net *orig, swap(clone->num_rx_queues, orig->num_rx_queues); swap(clone->num_tx_queues, orig->num_tx_queues); swap(clone->num_napi, orig->num_napi); + swap(clone->hds_thresh, orig->hds_thresh); } static void fbnic_aggregate_vector_counters(struct fbnic_net *fbn, @@ -277,15 +293,21 @@ fbnic_get_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, ring->rx_mini_pending = fbn->hpq_size; ring->rx_jumbo_pending = fbn->ppq_size; ring->tx_pending = fbn->txq_size; + + kernel_ring->tcp_data_split = ETHTOOL_TCP_DATA_SPLIT_ENABLED; + kernel_ring->hds_thresh_max = FBNIC_HDS_THRESH_MAX; + kernel_ring->hds_thresh = fbn->hds_thresh; } static void fbnic_set_rings(struct fbnic_net *fbn, - struct ethtool_ringparam *ring) + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kernel_ring) { fbn->rcq_size = ring->rx_pending; fbn->hpq_size = ring->rx_mini_pending; fbn->ppq_size = ring->rx_jumbo_pending; fbn->txq_size = ring->tx_pending; + fbn->hds_thresh = kernel_ring->hds_thresh; } static int @@ -316,8 +338,24 @@ fbnic_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, return -EINVAL; } + if (kernel_ring->tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_DISABLED) { + NL_SET_ERR_MSG_MOD(extack, "Cannot disable TCP data split"); + return -EINVAL; + } + + /* If an XDP program is attached, we should check for potential frame + * splitting. If the new HDS threshold can cause splitting, we should + * only allow if the attached XDP program can handle frags. + */ + if (fbnic_check_split_frames(fbn->xdp_prog, netdev->mtu, + kernel_ring->hds_thresh)) { + NL_SET_ERR_MSG_MOD(extack, + "Use higher HDS threshold or multi-buf capable program"); + return -EINVAL; + } + if (!netif_running(netdev)) { - fbnic_set_rings(fbn, ring); + fbnic_set_rings(fbn, ring, kernel_ring); return 0; } @@ -325,7 +363,7 @@ fbnic_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, if (!clone) return -ENOMEM; - fbnic_set_rings(clone, ring); + fbnic_set_rings(clone, ring, kernel_ring); err = fbnic_alloc_napi_vectors(clone); if (err) @@ -398,6 +436,16 @@ static void fbnic_get_rxb_dequeue_strings(u8 **data, unsigned int idx) ethtool_sprintf(data, stat->string, idx); } +static void fbnic_get_xdp_queue_strings(u8 **data, unsigned int idx) +{ + const struct fbnic_stat *stat; + int i; + + stat = fbnic_gstrings_xdp_stats; + for (i = 0; i < FBNIC_XDP_STATS_LEN; i++, stat++) + ethtool_sprintf(data, stat->string, idx); +} + static void fbnic_get_strings(struct net_device *dev, u32 sset, u8 *data) { const struct fbnic_stat *stat; @@ -423,6 +471,9 @@ static void fbnic_get_strings(struct net_device *dev, u32 sset, u8 *data) for (i = 0; i < FBNIC_HW_Q_STATS_LEN; i++, stat++) ethtool_sprintf(&data, stat->string, idx); } + + for (i = 0; i < FBNIC_MAX_XDPQS; i++) + fbnic_get_xdp_queue_strings(&data, i); break; } } @@ -440,6 +491,24 @@ static void fbnic_report_hw_stats(const struct fbnic_stat *stat, } } +static void fbnic_get_xdp_queue_stats(struct fbnic_ring *ring, u64 **data) +{ + const struct fbnic_stat *stat; + int i; + + if (!ring) { + *data += FBNIC_XDP_STATS_LEN; + return; + } + + stat = fbnic_gstrings_xdp_stats; + for (i = 0; i < FBNIC_XDP_STATS_LEN; i++, stat++, (*data)++) { + u8 *p = (u8 *)ring + stat->offset; + + **data = *(u64 *)p; + } +} + static void fbnic_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats, u64 *data) { @@ -449,7 +518,7 @@ static void fbnic_get_ethtool_stats(struct net_device *dev, fbnic_get_hw_stats(fbn->fbd); - spin_lock(&fbd->hw_stats_lock); + spin_lock(&fbd->hw_stats.lock); fbnic_report_hw_stats(fbnic_gstrings_hw_stats, &fbd->hw_stats, FBNIC_HW_FIXED_STATS_LEN, &data); @@ -486,14 +555,17 @@ static void fbnic_get_ethtool_stats(struct net_device *dev, fbnic_report_hw_stats(fbnic_gstrings_hw_q_stats, hw_q, FBNIC_HW_Q_STATS_LEN, &data); } - spin_unlock(&fbd->hw_stats_lock); + spin_unlock(&fbd->hw_stats.lock); + + for (i = 0; i < FBNIC_MAX_XDPQS; i++) + fbnic_get_xdp_queue_stats(fbn->tx[i + FBNIC_MAX_TXQS], &data); } static int fbnic_get_sset_count(struct net_device *dev, int sset) { switch (sset) { case ETH_SS_STATS: - return FBNIC_HW_STATS_LEN; + return FBNIC_STATS_LEN; default: return -EOPNOTSUPP; } @@ -1310,7 +1382,7 @@ fbnic_get_rss_hash_opts(struct net_device *netdev, #define FBNIC_L2_HASH_OPTIONS \ (RXH_L2DA | RXH_DISCARD) #define FBNIC_L3_HASH_OPTIONS \ - (FBNIC_L2_HASH_OPTIONS | RXH_IP_SRC | RXH_IP_DST) + (FBNIC_L2_HASH_OPTIONS | RXH_IP_SRC | RXH_IP_DST | RXH_IP6_FL) #define FBNIC_L4_HASH_OPTIONS \ (FBNIC_L3_HASH_OPTIONS | RXH_L4_B_0_1 | RXH_L4_B_2_3) @@ -1570,6 +1642,62 @@ static void fbnic_set_counter(u64 *stat, struct fbnic_stat_counter *counter) } static void +fbnic_get_pause_stats(struct net_device *netdev, + struct ethtool_pause_stats *pause_stats) +{ + struct fbnic_net *fbn = netdev_priv(netdev); + struct fbnic_mac_stats *mac_stats; + struct fbnic_dev *fbd = fbn->fbd; + + mac_stats = &fbd->hw_stats.mac; + + fbd->mac->get_pause_stats(fbd, false, &mac_stats->pause); + + pause_stats->tx_pause_frames = mac_stats->pause.tx_pause_frames.value; + pause_stats->rx_pause_frames = mac_stats->pause.rx_pause_frames.value; +} + +static void +fbnic_get_fec_stats(struct net_device *netdev, + struct ethtool_fec_stats *fec_stats) +{ + struct fbnic_net *fbn = netdev_priv(netdev); + struct fbnic_phy_stats *phy_stats; + struct fbnic_dev *fbd = fbn->fbd; + + fbnic_get_hw_stats32(fbd); + phy_stats = &fbd->hw_stats.phy; + + spin_lock(&fbd->hw_stats.lock); + fec_stats->corrected_blocks.total = + phy_stats->fec.corrected_blocks.value; + fec_stats->uncorrectable_blocks.total = + phy_stats->fec.uncorrectable_blocks.value; + spin_unlock(&fbd->hw_stats.lock); +} + +static void +fbnic_get_eth_phy_stats(struct net_device *netdev, + struct ethtool_eth_phy_stats *eth_phy_stats) +{ + struct fbnic_net *fbn = netdev_priv(netdev); + struct fbnic_phy_stats *phy_stats; + struct fbnic_dev *fbd = fbn->fbd; + u64 total = 0; + int i; + + fbnic_get_hw_stats32(fbd); + phy_stats = &fbd->hw_stats.phy; + + spin_lock(&fbd->hw_stats.lock); + for (i = 0; i < FBNIC_PCS_MAX_LANES; i++) + total += phy_stats->pcs.SymbolErrorDuringCarrier.lanes[i].value; + + eth_phy_stats->SymbolErrorDuringCarrier = total; + spin_unlock(&fbd->hw_stats.lock); +} + +static void fbnic_get_eth_mac_stats(struct net_device *netdev, struct ethtool_eth_mac_stats *eth_mac_stats) { @@ -1678,6 +1806,8 @@ fbnic_get_rmon_stats(struct net_device *netdev, static const struct ethtool_ops fbnic_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_RX_MAX_FRAMES, + .supported_ring_params = ETHTOOL_RING_USE_TCP_DATA_SPLIT | + ETHTOOL_RING_USE_HDS_THRS, .rxfh_max_num_contexts = FBNIC_RPC_RSS_TBL_COUNT, .get_drvinfo = fbnic_get_drvinfo, .get_regs_len = fbnic_get_regs_len, @@ -1687,6 +1817,7 @@ static const struct ethtool_ops fbnic_ethtool_ops = { .set_coalesce = fbnic_set_coalesce, .get_ringparam = fbnic_get_ringparam, .set_ringparam = fbnic_set_ringparam, + .get_pause_stats = fbnic_get_pause_stats, .get_pauseparam = fbnic_phylink_get_pauseparam, .set_pauseparam = fbnic_phylink_set_pauseparam, .get_strings = fbnic_get_strings, @@ -1708,7 +1839,9 @@ static const struct ethtool_ops fbnic_ethtool_ops = { .get_ts_info = fbnic_get_ts_info, .get_ts_stats = fbnic_get_ts_stats, .get_link_ksettings = fbnic_phylink_ethtool_ksettings_get, + .get_fec_stats = fbnic_get_fec_stats, .get_fecparam = fbnic_phylink_get_fecparam, + .get_eth_phy_stats = fbnic_get_eth_phy_stats, .get_eth_mac_stats = fbnic_get_eth_mac_stats, .get_eth_ctrl_stats = fbnic_get_eth_ctrl_stats, .get_rmon_stats = fbnic_get_rmon_stats, diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c index 0c55be7d2547..6e580654493c 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c @@ -653,6 +653,9 @@ static int fbnic_fw_parse_cap_resp(void *opaque, struct fbnic_tlv_msg **results) fbd->fw_cap.anti_rollback_version = fta_get_uint(results, FBNIC_FW_CAP_RESP_ANTI_ROLLBACK_VERSION); + /* Always assume we need a BMC reinit */ + fbd->fw_cap.need_bmc_tcam_reinit = true; + return 0; } @@ -1410,6 +1413,109 @@ void fbnic_mbx_flush_tx(struct fbnic_dev *fbd) } while (time_is_after_jiffies(timeout)); } +int fbnic_fw_xmit_rpc_macda_sync(struct fbnic_dev *fbd) +{ + struct fbnic_tlv_msg *mac_array; + int i, addr_count = 0, err; + struct fbnic_tlv_msg *msg; + u32 rx_flags = 0; + + /* Nothing to do if there is no FW to sync with */ + if (!fbd->mbx[FBNIC_IPC_MBX_TX_IDX].ready) + return 0; + + msg = fbnic_tlv_msg_alloc(FBNIC_TLV_MSG_ID_RPC_MAC_SYNC_REQ); + if (!msg) + return -ENOMEM; + + mac_array = fbnic_tlv_attr_nest_start(msg, + FBNIC_FW_RPC_MAC_SYNC_UC_ARRAY); + if (!mac_array) + goto free_message_nospc; + + /* Populate the unicast MAC addrs and capture PROMISC/ALLMULTI flags */ + for (addr_count = 0, i = FBNIC_RPC_TCAM_MACDA_PROMISC_IDX; + i >= fbd->mac_addr_boundary; i--) { + struct fbnic_mac_addr *mac_addr = &fbd->mac_addr[i]; + + if (mac_addr->state != FBNIC_TCAM_S_VALID) + continue; + if (test_bit(FBNIC_MAC_ADDR_T_ALLMULTI, mac_addr->act_tcam)) + rx_flags |= FW_RPC_MAC_SYNC_RX_FLAGS_ALLMULTI; + if (test_bit(FBNIC_MAC_ADDR_T_PROMISC, mac_addr->act_tcam)) + rx_flags |= FW_RPC_MAC_SYNC_RX_FLAGS_PROMISC; + if (!test_bit(FBNIC_MAC_ADDR_T_UNICAST, mac_addr->act_tcam)) + continue; + if (addr_count == FW_RPC_MAC_SYNC_UC_ARRAY_SIZE) { + rx_flags |= FW_RPC_MAC_SYNC_RX_FLAGS_PROMISC; + continue; + } + + err = fbnic_tlv_attr_put_value(mac_array, + FBNIC_FW_RPC_MAC_SYNC_MAC_ADDR, + mac_addr->value.addr8, + ETH_ALEN); + if (err) + goto free_message; + addr_count++; + } + + /* Close array */ + fbnic_tlv_attr_nest_stop(msg); + + mac_array = fbnic_tlv_attr_nest_start(msg, + FBNIC_FW_RPC_MAC_SYNC_MC_ARRAY); + if (!mac_array) + goto free_message_nospc; + + /* Repeat for multicast addrs, record BROADCAST/ALLMULTI flags */ + for (addr_count = 0, i = FBNIC_RPC_TCAM_MACDA_BROADCAST_IDX; + i < fbd->mac_addr_boundary; i++) { + struct fbnic_mac_addr *mac_addr = &fbd->mac_addr[i]; + + if (mac_addr->state != FBNIC_TCAM_S_VALID) + continue; + if (test_bit(FBNIC_MAC_ADDR_T_BROADCAST, mac_addr->act_tcam)) + rx_flags |= FW_RPC_MAC_SYNC_RX_FLAGS_BROADCAST; + if (test_bit(FBNIC_MAC_ADDR_T_ALLMULTI, mac_addr->act_tcam)) + rx_flags |= FW_RPC_MAC_SYNC_RX_FLAGS_ALLMULTI; + if (!test_bit(FBNIC_MAC_ADDR_T_MULTICAST, mac_addr->act_tcam)) + continue; + if (addr_count == FW_RPC_MAC_SYNC_MC_ARRAY_SIZE) { + rx_flags |= FW_RPC_MAC_SYNC_RX_FLAGS_ALLMULTI; + continue; + } + + err = fbnic_tlv_attr_put_value(mac_array, + FBNIC_FW_RPC_MAC_SYNC_MAC_ADDR, + mac_addr->value.addr8, + ETH_ALEN); + if (err) + goto free_message; + addr_count++; + } + + /* Close array */ + fbnic_tlv_attr_nest_stop(msg); + + /* Report flags at end of list */ + err = fbnic_tlv_attr_put_int(msg, FBNIC_FW_RPC_MAC_SYNC_RX_FLAGS, + rx_flags); + if (err) + goto free_message; + + /* Send message of to FW notifying it of current RPC config */ + err = fbnic_mbx_map_tlv_msg(fbd, msg); + if (err) + goto free_message; + return 0; +free_message_nospc: + err = -ENOSPC; +free_message: + free_page((unsigned long)msg); + return err; +} + void fbnic_get_fw_ver_commit_str(struct fbnic_dev *fbd, char *fw_version, const size_t str_sz) { diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.h b/drivers/net/ethernet/meta/fbnic/fbnic_fw.h index fde331696fdd..ec67b80809b0 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.h @@ -51,8 +51,10 @@ struct fbnic_fw_cap { } stored; u8 active_slot; u8 bmc_mac_addr[4][ETH_ALEN]; - u8 bmc_present : 1; - u8 all_multi : 1; + u8 bmc_present : 1; + u8 need_bmc_tcam_reinit : 1; + u8 need_bmc_macda_sync : 1; + u8 all_multi : 1; u8 link_speed; u8 link_fec; u32 anti_rollback_version; @@ -97,6 +99,7 @@ int fbnic_fw_xmit_tsene_read_msg(struct fbnic_dev *fbd, struct fbnic_fw_completion *cmpl_data); int fbnic_fw_xmit_send_logs(struct fbnic_dev *fbd, bool enable, bool send_log_history); +int fbnic_fw_xmit_rpc_macda_sync(struct fbnic_dev *fbd); struct fbnic_fw_completion *fbnic_fw_alloc_cmpl(u32 msg_type); void fbnic_fw_put_cmpl(struct fbnic_fw_completion *cmpl_data); @@ -143,6 +146,7 @@ enum { FBNIC_TLV_MSG_ID_LOG_SEND_LOGS_REQ = 0x43, FBNIC_TLV_MSG_ID_LOG_MSG_REQ = 0x44, FBNIC_TLV_MSG_ID_LOG_MSG_RESP = 0x45, + FBNIC_TLV_MSG_ID_RPC_MAC_SYNC_REQ = 0x46, }; #define FBNIC_FW_CAP_RESP_VERSION_MAJOR CSR_GENMASK(31, 24) @@ -235,4 +239,19 @@ enum { FBNIC_FW_LOG_MSG_MAX }; +enum { + FBNIC_FW_RPC_MAC_SYNC_RX_FLAGS = 0x0, + FBNIC_FW_RPC_MAC_SYNC_UC_ARRAY = 0x1, + FBNIC_FW_RPC_MAC_SYNC_MC_ARRAY = 0x2, + FBNIC_FW_RPC_MAC_SYNC_MAC_ADDR = 0x3, + FBNIC_FW_RPC_MAC_SYNC_MSG_MAX +}; + +#define FW_RPC_MAC_SYNC_RX_FLAGS_PROMISC 1 +#define FW_RPC_MAC_SYNC_RX_FLAGS_ALLMULTI 2 +#define FW_RPC_MAC_SYNC_RX_FLAGS_BROADCAST 4 + +#define FW_RPC_MAC_SYNC_UC_ARRAY_SIZE 8 +#define FW_RPC_MAC_SYNC_MC_ARRAY_SIZE 8 + #endif /* _FBNIC_FW_H_ */ diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.c b/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.c index 4223d8100e64..8b9b2076beec 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) Meta Platforms, Inc. and affiliates. */ +#include <linux/rtnetlink.h> + #include "fbnic.h" static void fbnic_hw_stat_rst32(struct fbnic_dev *fbd, u32 reg, @@ -421,9 +423,9 @@ static void fbnic_get_hw_rxq_stats32(struct fbnic_dev *fbd, void fbnic_get_hw_q_stats(struct fbnic_dev *fbd, struct fbnic_hw_q_stats *hw_q) { - spin_lock(&fbd->hw_stats_lock); + spin_lock(&fbd->hw_stats.lock); fbnic_get_hw_rxq_stats32(fbd, hw_q); - spin_unlock(&fbd->hw_stats_lock); + spin_unlock(&fbd->hw_stats.lock); } static void fbnic_reset_pcie_stats_asic(struct fbnic_dev *fbd, @@ -510,20 +512,68 @@ static void fbnic_get_pcie_stats_asic64(struct fbnic_dev *fbd, &pcie->ob_rd_no_np_cred); } +static void fbnic_reset_phy_stats(struct fbnic_dev *fbd, + struct fbnic_phy_stats *phy_stats) +{ + const struct fbnic_mac *mac = fbd->mac; + + mac->get_fec_stats(fbd, true, &phy_stats->fec); + mac->get_pcs_stats(fbd, true, &phy_stats->pcs); +} + +static void fbnic_get_phy_stats32(struct fbnic_dev *fbd, + struct fbnic_phy_stats *phy_stats) +{ + const struct fbnic_mac *mac = fbd->mac; + + mac->get_fec_stats(fbd, false, &phy_stats->fec); + mac->get_pcs_stats(fbd, false, &phy_stats->pcs); +} + +static void fbnic_reset_hw_mac_stats(struct fbnic_dev *fbd, + struct fbnic_mac_stats *mac_stats) +{ + const struct fbnic_mac *mac = fbd->mac; + + mac->get_eth_mac_stats(fbd, true, &mac_stats->eth_mac); + mac->get_pause_stats(fbd, true, &mac_stats->pause); + mac->get_eth_ctrl_stats(fbd, true, &mac_stats->eth_ctrl); + mac->get_rmon_stats(fbd, true, &mac_stats->rmon); +} + void fbnic_reset_hw_stats(struct fbnic_dev *fbd) { - spin_lock(&fbd->hw_stats_lock); + spin_lock(&fbd->hw_stats.lock); + fbnic_reset_phy_stats(fbd, &fbd->hw_stats.phy); fbnic_reset_tmi_stats(fbd, &fbd->hw_stats.tmi); fbnic_reset_tti_stats(fbd, &fbd->hw_stats.tti); fbnic_reset_rpc_stats(fbd, &fbd->hw_stats.rpc); fbnic_reset_rxb_stats(fbd, &fbd->hw_stats.rxb); fbnic_reset_hw_rxq_stats(fbd, fbd->hw_stats.hw_q); fbnic_reset_pcie_stats_asic(fbd, &fbd->hw_stats.pcie); - spin_unlock(&fbd->hw_stats_lock); + spin_unlock(&fbd->hw_stats.lock); + + /* Once registered, the only other access to MAC stats is via the + * ethtool API which is protected by the rtnl_lock. The call to + * fbnic_reset_hw_stats() during PCI recovery is also protected + * by the rtnl_lock hence, we don't need the spinlock to access + * the MAC stats. + */ + if (fbd->netdev) + ASSERT_RTNL(); + fbnic_reset_hw_mac_stats(fbd, &fbd->hw_stats.mac); +} + +void fbnic_init_hw_stats(struct fbnic_dev *fbd) +{ + spin_lock_init(&fbd->hw_stats.lock); + + fbnic_reset_hw_stats(fbd); } static void __fbnic_get_hw_stats32(struct fbnic_dev *fbd) { + fbnic_get_phy_stats32(fbd, &fbd->hw_stats.phy); fbnic_get_tmi_stats32(fbd, &fbd->hw_stats.tmi); fbnic_get_tti_stats32(fbd, &fbd->hw_stats.tti); fbnic_get_rpc_stats32(fbd, &fbd->hw_stats.rpc); @@ -533,19 +583,19 @@ static void __fbnic_get_hw_stats32(struct fbnic_dev *fbd) void fbnic_get_hw_stats32(struct fbnic_dev *fbd) { - spin_lock(&fbd->hw_stats_lock); + spin_lock(&fbd->hw_stats.lock); __fbnic_get_hw_stats32(fbd); - spin_unlock(&fbd->hw_stats_lock); + spin_unlock(&fbd->hw_stats.lock); } void fbnic_get_hw_stats(struct fbnic_dev *fbd) { - spin_lock(&fbd->hw_stats_lock); + spin_lock(&fbd->hw_stats.lock); __fbnic_get_hw_stats32(fbd); fbnic_get_tmi_stats(fbd, &fbd->hw_stats.tmi); fbnic_get_tti_stats(fbd, &fbd->hw_stats.tti); fbnic_get_rxb_stats(fbd, &fbd->hw_stats.rxb); fbnic_get_pcie_stats_asic64(fbd, &fbd->hw_stats.pcie); - spin_unlock(&fbd->hw_stats_lock); + spin_unlock(&fbd->hw_stats.lock); } diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.h b/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.h index 4fe239717497..aa3f429a9aed 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.h @@ -5,6 +5,7 @@ #define _FBNIC_HW_STATS_H_ #include <linux/ethtool.h> +#include <linux/spinlock.h> #include "fbnic_csr.h" @@ -22,6 +23,16 @@ struct fbnic_hw_stat { struct fbnic_stat_counter bytes; }; +struct fbnic_fec_stats { + struct fbnic_stat_counter corrected_blocks, uncorrectable_blocks; +}; + +struct fbnic_pcs_stats { + struct { + struct fbnic_stat_counter lanes[FBNIC_PCS_MAX_LANES]; + } SymbolErrorDuringCarrier; +}; + /* Note: not updated by fbnic_get_hw_stats() */ struct fbnic_eth_ctrl_stats { struct fbnic_stat_counter MACControlFramesTransmitted; @@ -39,6 +50,12 @@ struct fbnic_rmon_stats { struct fbnic_stat_counter hist_tx[ETHTOOL_RMON_HIST_MAX]; }; +/* Note: not updated by fbnic_get_hw_stats() */ +struct fbnic_pause_stats { + struct fbnic_stat_counter tx_pause_frames; + struct fbnic_stat_counter rx_pause_frames; +}; + struct fbnic_eth_mac_stats { struct fbnic_stat_counter FramesTransmittedOK; struct fbnic_stat_counter FramesReceivedOK; @@ -55,8 +72,14 @@ struct fbnic_eth_mac_stats { struct fbnic_stat_counter FrameTooLongErrors; }; +struct fbnic_phy_stats { + struct fbnic_fec_stats fec; + struct fbnic_pcs_stats pcs; +}; + struct fbnic_mac_stats { struct fbnic_eth_mac_stats eth_mac; + struct fbnic_pause_stats pause; struct fbnic_eth_ctrl_stats eth_ctrl; struct fbnic_rmon_stats rmon; }; @@ -115,6 +138,7 @@ struct fbnic_pcie_stats { }; struct fbnic_hw_stats { + struct fbnic_phy_stats phy; struct fbnic_mac_stats mac; struct fbnic_tmi_stats tmi; struct fbnic_tti_stats tti; @@ -122,11 +146,15 @@ struct fbnic_hw_stats { struct fbnic_rxb_stats rxb; struct fbnic_hw_q_stats hw_q[FBNIC_MAX_QUEUES]; struct fbnic_pcie_stats pcie; + + /* Lock protecting the access to hw stats */ + spinlock_t lock; }; u64 fbnic_stat_rd64(struct fbnic_dev *fbd, u32 reg, u32 offset); void fbnic_reset_hw_stats(struct fbnic_dev *fbd); +void fbnic_init_hw_stats(struct fbnic_dev *fbd); void fbnic_get_hw_q_stats(struct fbnic_dev *fbd, struct fbnic_hw_q_stats *hw_q); void fbnic_get_hw_stats32(struct fbnic_dev *fbd); diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_mac.c b/drivers/net/ethernet/meta/fbnic/fbnic_mac.c index fd8d67f9048e..8f998d26b9a3 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_mac.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_mac.c @@ -632,6 +632,50 @@ static void fbnic_mac_link_up_asic(struct fbnic_dev *fbd, } static void +fbnic_pcs_rsfec_stat_rd32(struct fbnic_dev *fbd, u32 reg, bool reset, + struct fbnic_stat_counter *stat) +{ + u32 pcs_rsfec_stat; + + /* The PCS/RFSEC registers are only 16b wide each. So what we will + * have after the 64b read is 0x0000xxxx0000xxxx. To make it usable + * as a full stat we will shift the upper bits into the lower set of + * 0s and then mask off the math at 32b. + * + * Read ordering must be lower reg followed by upper reg. + */ + pcs_rsfec_stat = rd32(fbd, reg) & 0xffff; + pcs_rsfec_stat |= rd32(fbd, reg + 1) << 16; + + /* RFSEC registers clear themselves upon being read so there is no + * need to store the old_reg_value. + */ + if (!reset) + stat->value += pcs_rsfec_stat; +} + +static void +fbnic_mac_get_fec_stats(struct fbnic_dev *fbd, bool reset, + struct fbnic_fec_stats *s) +{ + fbnic_pcs_rsfec_stat_rd32(fbd, FBNIC_RSFEC_CCW_LO(0), reset, + &s->corrected_blocks); + fbnic_pcs_rsfec_stat_rd32(fbd, FBNIC_RSFEC_NCCW_LO(0), reset, + &s->uncorrectable_blocks); +} + +static void +fbnic_mac_get_pcs_stats(struct fbnic_dev *fbd, bool reset, + struct fbnic_pcs_stats *s) +{ + int i; + + for (i = 0; i < FBNIC_PCS_MAX_LANES; i++) + fbnic_pcs_rsfec_stat_rd32(fbd, FBNIC_PCS_SYMBLERR_LO(i), reset, + &s->SymbolErrorDuringCarrier.lanes[i]); +} + +static void fbnic_mac_get_eth_mac_stats(struct fbnic_dev *fbd, bool reset, struct fbnic_eth_mac_stats *mac_stats) { @@ -666,6 +710,16 @@ fbnic_mac_get_eth_mac_stats(struct fbnic_dev *fbd, bool reset, } static void +fbnic_mac_get_pause_stats(struct fbnic_dev *fbd, bool reset, + struct fbnic_pause_stats *pause_stats) +{ + fbnic_mac_stat_rd64(fbd, reset, pause_stats->tx_pause_frames, + MAC_STAT_TX_XOFF_STB); + fbnic_mac_stat_rd64(fbd, reset, pause_stats->rx_pause_frames, + MAC_STAT_RX_XOFF_STB); +} + +static void fbnic_mac_get_eth_ctrl_stats(struct fbnic_dev *fbd, bool reset, struct fbnic_eth_ctrl_stats *ctrl_stats) { @@ -809,7 +863,10 @@ static const struct fbnic_mac fbnic_mac_asic = { .pcs_disable = fbnic_pcs_disable_asic, .pcs_get_link = fbnic_pcs_get_link_asic, .pcs_get_link_event = fbnic_pcs_get_link_event_asic, + .get_fec_stats = fbnic_mac_get_fec_stats, + .get_pcs_stats = fbnic_mac_get_pcs_stats, .get_eth_mac_stats = fbnic_mac_get_eth_mac_stats, + .get_pause_stats = fbnic_mac_get_pause_stats, .get_eth_ctrl_stats = fbnic_mac_get_eth_ctrl_stats, .get_rmon_stats = fbnic_mac_get_rmon_stats, .link_down = fbnic_mac_link_down_asic, diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_mac.h b/drivers/net/ethernet/meta/fbnic/fbnic_mac.h index 86fa06da2b3e..ede5ff0dae22 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_mac.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_mac.h @@ -79,8 +79,14 @@ struct fbnic_mac { bool (*pcs_get_link)(struct fbnic_dev *fbd); int (*pcs_get_link_event)(struct fbnic_dev *fbd); + void (*get_fec_stats)(struct fbnic_dev *fbd, bool reset, + struct fbnic_fec_stats *fec_stats); + void (*get_pcs_stats)(struct fbnic_dev *fbd, bool reset, + struct fbnic_pcs_stats *pcs_stats); void (*get_eth_mac_stats)(struct fbnic_dev *fbd, bool reset, struct fbnic_eth_mac_stats *mac_stats); + void (*get_pause_stats)(struct fbnic_dev *fbd, bool reset, + struct fbnic_pause_stats *pause_stats); void (*get_eth_ctrl_stats)(struct fbnic_dev *fbd, bool reset, struct fbnic_eth_ctrl_stats *ctrl_stats); void (*get_rmon_stats)(struct fbnic_dev *fbd, bool reset, diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c index 40581550da1a..dd35de301870 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c @@ -183,11 +183,10 @@ static int fbnic_mc_unsync(struct net_device *netdev, const unsigned char *addr) return ret; } -void __fbnic_set_rx_mode(struct net_device *netdev) +void __fbnic_set_rx_mode(struct fbnic_dev *fbd) { - struct fbnic_net *fbn = netdev_priv(netdev); bool uc_promisc = false, mc_promisc = false; - struct fbnic_dev *fbd = fbn->fbd; + struct net_device *netdev = fbd->netdev; struct fbnic_mac_addr *mac_addr; int err; @@ -224,49 +223,8 @@ void __fbnic_set_rx_mode(struct net_device *netdev) uc_promisc |= !!(netdev->flags & IFF_PROMISC); mc_promisc |= !!(netdev->flags & IFF_ALLMULTI) || uc_promisc; - /* Populate last TCAM entry with promiscuous entry and 0/1 bit mask */ - mac_addr = &fbd->mac_addr[FBNIC_RPC_TCAM_MACDA_PROMISC_IDX]; - if (uc_promisc) { - if (!is_zero_ether_addr(mac_addr->value.addr8) || - mac_addr->state != FBNIC_TCAM_S_VALID) { - eth_zero_addr(mac_addr->value.addr8); - eth_broadcast_addr(mac_addr->mask.addr8); - clear_bit(FBNIC_MAC_ADDR_T_ALLMULTI, - mac_addr->act_tcam); - set_bit(FBNIC_MAC_ADDR_T_PROMISC, - mac_addr->act_tcam); - mac_addr->state = FBNIC_TCAM_S_ADD; - } - } else if (mc_promisc && - (!fbnic_bmc_present(fbd) || !fbd->fw_cap.all_multi)) { - /* We have to add a special handler for multicast as the - * BMC may have an all-multi rule already in place. As such - * adding a rule ourselves won't do any good so we will have - * to modify the rules for the ALL MULTI below if the BMC - * already has the rule in place. - */ - if (!is_multicast_ether_addr(mac_addr->value.addr8) || - mac_addr->state != FBNIC_TCAM_S_VALID) { - eth_zero_addr(mac_addr->value.addr8); - eth_broadcast_addr(mac_addr->mask.addr8); - mac_addr->value.addr8[0] ^= 1; - mac_addr->mask.addr8[0] ^= 1; - set_bit(FBNIC_MAC_ADDR_T_ALLMULTI, - mac_addr->act_tcam); - clear_bit(FBNIC_MAC_ADDR_T_PROMISC, - mac_addr->act_tcam); - mac_addr->state = FBNIC_TCAM_S_ADD; - } - } else if (mac_addr->state == FBNIC_TCAM_S_VALID) { - if (test_bit(FBNIC_MAC_ADDR_T_BMC, mac_addr->act_tcam)) { - clear_bit(FBNIC_MAC_ADDR_T_ALLMULTI, - mac_addr->act_tcam); - clear_bit(FBNIC_MAC_ADDR_T_PROMISC, - mac_addr->act_tcam); - } else { - mac_addr->state = FBNIC_TCAM_S_DELETE; - } - } + /* Update the promiscuous rules */ + fbnic_promisc_sync(fbd, uc_promisc, mc_promisc); /* Add rules for BMC all multicast if it is enabled */ fbnic_bmc_rpc_all_multi_config(fbd, mc_promisc); @@ -282,9 +240,12 @@ void __fbnic_set_rx_mode(struct net_device *netdev) static void fbnic_set_rx_mode(struct net_device *netdev) { + struct fbnic_net *fbn = netdev_priv(netdev); + struct fbnic_dev *fbd = fbn->fbd; + /* No need to update the hardware if we are not running */ if (netif_running(netdev)) - __fbnic_set_rx_mode(netdev); + __fbnic_set_rx_mode(fbd); } static int fbnic_set_mac(struct net_device *netdev, void *p) @@ -301,10 +262,9 @@ static int fbnic_set_mac(struct net_device *netdev, void *p) return 0; } -void fbnic_clear_rx_mode(struct net_device *netdev) +void fbnic_clear_rx_mode(struct fbnic_dev *fbd) { - struct fbnic_net *fbn = netdev_priv(netdev); - struct fbnic_dev *fbd = fbn->fbd; + struct net_device *netdev = fbd->netdev; int idx; for (idx = ARRAY_SIZE(fbd->mac_addr); idx--;) { @@ -411,11 +371,12 @@ static void fbnic_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats64) { u64 rx_bytes, rx_packets, rx_dropped = 0, rx_errors = 0; + u64 rx_over = 0, rx_missed = 0, rx_length = 0; u64 tx_bytes, tx_packets, tx_dropped = 0; struct fbnic_net *fbn = netdev_priv(dev); struct fbnic_dev *fbd = fbn->fbd; struct fbnic_queue_stats *stats; - u64 rx_over = 0, rx_missed = 0; + unsigned int start, i; fbnic_get_hw_stats(fbd); @@ -427,12 +388,12 @@ static void fbnic_get_stats64(struct net_device *dev, tx_dropped = stats->dropped; /* Record drops from Tx HW Datapath */ - spin_lock(&fbd->hw_stats_lock); + spin_lock(&fbd->hw_stats.lock); tx_dropped += fbd->hw_stats.tmi.drop.frames.value + fbd->hw_stats.tti.cm_drop.frames.value + fbd->hw_stats.tti.frame_drop.frames.value + fbd->hw_stats.tti.tbi_drop.frames.value; - spin_unlock(&fbd->hw_stats_lock); + spin_unlock(&fbd->hw_stats.lock); stats64->tx_bytes = tx_bytes; stats64->tx_packets = tx_packets; @@ -463,7 +424,7 @@ static void fbnic_get_stats64(struct net_device *dev, rx_packets = stats->packets; rx_dropped = stats->dropped; - spin_lock(&fbd->hw_stats_lock); + spin_lock(&fbd->hw_stats.lock); /* Record drops for the host FIFOs. * 4: network to Host, 6: BMC to Host * Exclude the BMC and MC FIFOs as those stats may contain drops @@ -483,7 +444,7 @@ static void fbnic_get_stats64(struct net_device *dev, /* Report packets with errors */ rx_errors += fbd->hw_stats.hw_q[i].rde_pkt_err.value; } - spin_unlock(&fbd->hw_stats_lock); + spin_unlock(&fbd->hw_stats.lock); stats64->rx_bytes = rx_bytes; stats64->rx_packets = rx_packets; @@ -493,6 +454,7 @@ static void fbnic_get_stats64(struct net_device *dev, stats64->rx_missed_errors = rx_missed; for (i = 0; i < fbn->num_rx_queues; i++) { + struct fbnic_ring *xdpr = fbn->tx[FBNIC_MAX_TXQS + i]; struct fbnic_ring *rxr = fbn->rx[i]; if (!rxr) @@ -504,14 +466,66 @@ static void fbnic_get_stats64(struct net_device *dev, rx_bytes = stats->bytes; rx_packets = stats->packets; rx_dropped = stats->dropped; + rx_length = stats->rx.length_errors; } while (u64_stats_fetch_retry(&stats->syncp, start)); stats64->rx_bytes += rx_bytes; stats64->rx_packets += rx_packets; stats64->rx_dropped += rx_dropped; + stats64->rx_errors += rx_length; + stats64->rx_length_errors += rx_length; + + if (!xdpr) + continue; + + stats = &xdpr->stats; + do { + start = u64_stats_fetch_begin(&stats->syncp); + tx_bytes = stats->bytes; + tx_packets = stats->packets; + tx_dropped = stats->dropped; + } while (u64_stats_fetch_retry(&stats->syncp, start)); + + stats64->tx_bytes += tx_bytes; + stats64->tx_packets += tx_packets; + stats64->tx_dropped += tx_dropped; } } +bool fbnic_check_split_frames(struct bpf_prog *prog, unsigned int mtu, + u32 hds_thresh) +{ + if (!prog) + return false; + + if (prog->aux->xdp_has_frags) + return false; + + return mtu + ETH_HLEN > hds_thresh; +} + +static int fbnic_bpf(struct net_device *netdev, struct netdev_bpf *bpf) +{ + struct bpf_prog *prog = bpf->prog, *prev_prog; + struct fbnic_net *fbn = netdev_priv(netdev); + + if (bpf->command != XDP_SETUP_PROG) + return -EINVAL; + + if (fbnic_check_split_frames(prog, netdev->mtu, + fbn->hds_thresh)) { + NL_SET_ERR_MSG_MOD(bpf->extack, + "MTU too high, or HDS threshold is too low for single buffer XDP"); + return -EOPNOTSUPP; + } + + prev_prog = xchg(&fbn->xdp_prog, prog); + if (prev_prog) + bpf_prog_put(prev_prog); + + return 0; +} + static const struct net_device_ops fbnic_netdev_ops = { .ndo_open = fbnic_open, .ndo_stop = fbnic_stop, @@ -521,6 +535,7 @@ static const struct net_device_ops fbnic_netdev_ops = { .ndo_set_mac_address = fbnic_set_mac, .ndo_set_rx_mode = fbnic_set_rx_mode, .ndo_get_stats64 = fbnic_get_stats64, + .ndo_bpf = fbnic_bpf, .ndo_hwtstamp_get = fbnic_hwtstamp_get, .ndo_hwtstamp_set = fbnic_hwtstamp_set, }; @@ -557,12 +572,12 @@ static void fbnic_get_queue_stats_rx(struct net_device *dev, int idx, fbnic_get_hw_q_stats(fbd, fbd->hw_stats.hw_q); - spin_lock(&fbd->hw_stats_lock); + spin_lock(&fbd->hw_stats.lock); rx->hw_drop_overruns = fbd->hw_stats.hw_q[idx].rde_pkt_cq_drop.value + fbd->hw_stats.hw_q[idx].rde_pkt_bdq_drop.value; rx->hw_drops = fbd->hw_stats.hw_q[idx].rde_pkt_err.value + rx->hw_drop_overruns; - spin_unlock(&fbd->hw_stats_lock); + spin_unlock(&fbd->hw_stats.lock); } static void fbnic_get_queue_stats_tx(struct net_device *dev, int idx, @@ -572,6 +587,7 @@ static void fbnic_get_queue_stats_tx(struct net_device *dev, int idx, struct fbnic_ring *txr = fbn->tx[idx]; struct fbnic_queue_stats *stats; u64 stop, wake, csum, lso; + struct fbnic_ring *xdpr; unsigned int start; u64 bytes, packets; @@ -595,6 +611,19 @@ static void fbnic_get_queue_stats_tx(struct net_device *dev, int idx, tx->hw_gso_wire_packets = lso; tx->stop = stop; tx->wake = wake; + + xdpr = fbn->tx[FBNIC_MAX_TXQS + idx]; + if (xdpr) { + stats = &xdpr->stats; + do { + start = u64_stats_fetch_begin(&stats->syncp); + bytes = stats->bytes; + packets = stats->packets; + } while (u64_stats_fetch_retry(&stats->syncp, start)); + + tx->bytes += bytes; + tx->packets += packets; + } } static void fbnic_get_base_stats(struct net_device *dev, @@ -682,6 +711,7 @@ struct net_device *fbnic_netdev_alloc(struct fbnic_dev *fbd) netdev->netdev_ops = &fbnic_netdev_ops; netdev->stat_ops = &fbnic_stat_ops; + netdev->queue_mgmt_ops = &fbnic_queue_mgmt_ops; fbnic_set_ethtool_ops(netdev); @@ -699,6 +729,10 @@ struct net_device *fbnic_netdev_alloc(struct fbnic_dev *fbd) fbn->rx_usecs = FBNIC_RX_USECS_DEFAULT; fbn->rx_max_frames = FBNIC_RX_FRAMES_DEFAULT; + /* Initialize the hds_thresh */ + netdev->cfg->hds_thresh = FBNIC_HDS_THRESH_DEFAULT; + fbn->hds_thresh = FBNIC_HDS_THRESH_DEFAULT; + default_queues = netif_get_num_default_rss_queues(); if (default_queues > fbd->max_num_queues) default_queues = fbd->max_num_queues; diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h index 86576ae04262..e84e0527c3a9 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h @@ -18,7 +18,9 @@ #define FBNIC_TUN_GSO_FEATURES NETIF_F_GSO_IPXIP6 struct fbnic_net { - struct fbnic_ring *tx[FBNIC_MAX_TXQS]; + struct bpf_prog *xdp_prog; + + struct fbnic_ring *tx[FBNIC_MAX_TXQS + FBNIC_MAX_XDPQS]; struct fbnic_ring *rx[FBNIC_MAX_RXQS]; struct fbnic_napi_vector *napi[FBNIC_MAX_NAPI_VECTORS]; @@ -31,6 +33,8 @@ struct fbnic_net { u32 ppq_size; u32 rcq_size; + u32 hds_thresh; + u16 rx_usecs; u16 tx_usecs; @@ -90,8 +94,8 @@ void fbnic_time_init(struct fbnic_net *fbn); int fbnic_time_start(struct fbnic_net *fbn); void fbnic_time_stop(struct fbnic_net *fbn); -void __fbnic_set_rx_mode(struct net_device *netdev); -void fbnic_clear_rx_mode(struct net_device *netdev); +void __fbnic_set_rx_mode(struct fbnic_dev *fbd); +void fbnic_clear_rx_mode(struct fbnic_dev *fbd); void fbnic_phylink_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause); @@ -102,4 +106,7 @@ int fbnic_phylink_ethtool_ksettings_get(struct net_device *netdev, int fbnic_phylink_get_fecparam(struct net_device *netdev, struct ethtool_fecparam *fecparam); int fbnic_phylink_init(struct net_device *netdev); + +bool fbnic_check_split_frames(struct bpf_prog *prog, + unsigned int mtu, u32 hds_threshold); #endif /* _FBNIC_NETDEV_H_ */ diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c index 28e23e3ffca8..9fdc8f4f36cc 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c @@ -135,7 +135,7 @@ void fbnic_up(struct fbnic_net *fbn) fbnic_rss_reinit_hw(fbn->fbd, fbn); - __fbnic_set_rx_mode(fbn->netdev); + __fbnic_set_rx_mode(fbn->fbd); /* Enable Tx/Rx processing */ fbnic_napi_enable(fbn); @@ -152,7 +152,7 @@ void fbnic_down_noidle(struct fbnic_net *fbn) fbnic_napi_disable(fbn); netif_tx_disable(fbn->netdev); - fbnic_clear_rx_mode(fbn->netdev); + fbnic_clear_rx_mode(fbn->fbd); fbnic_clear_rules(fbn->fbd); fbnic_rss_disable_hw(fbn->fbd); fbnic_disable(fbn); @@ -204,8 +204,13 @@ static void fbnic_service_task(struct work_struct *work) fbnic_health_check(fbd); - if (netif_carrier_ok(fbd->netdev)) + fbnic_bmc_rpc_check(fbd); + + if (netif_carrier_ok(fbd->netdev)) { + netdev_lock(fbd->netdev); fbnic_napi_depletion_check(fbd->netdev); + netdev_unlock(fbd->netdev); + } if (netif_running(fbd->netdev)) schedule_delayed_work(&fbd->service_task, HZ); @@ -302,10 +307,9 @@ static int fbnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) fbnic_devlink_register(fbd); fbnic_dbg_fbd_init(fbd); - spin_lock_init(&fbd->hw_stats_lock); /* Capture snapshot of hardware stats so netdev can calculate delta */ - fbnic_reset_hw_stats(fbd); + fbnic_init_hw_stats(fbd); fbnic_hwmon_register(fbd); @@ -390,12 +394,14 @@ static int fbnic_pm_suspend(struct device *dev) goto null_uc_addr; rtnl_lock(); + netdev_lock(netdev); netif_device_detach(netdev); if (netif_running(netdev)) netdev->netdev_ops->ndo_stop(netdev); + netdev_unlock(netdev); rtnl_unlock(); null_uc_addr: @@ -460,10 +466,12 @@ static int __fbnic_pm_resume(struct device *dev) fbnic_reset_queues(fbn, fbn->num_tx_queues, fbn->num_rx_queues); rtnl_lock(); + netdev_lock(netdev); if (netif_running(netdev)) err = __fbnic_open(fbn); + netdev_unlock(netdev); rtnl_unlock(); if (err) goto err_free_mbx; @@ -489,6 +497,10 @@ static void __fbnic_pm_attach(struct device *dev) struct net_device *netdev = fbd->netdev; struct fbnic_net *fbn; + rtnl_lock(); + fbnic_reset_hw_stats(fbd); + rtnl_unlock(); + if (fbnic_init_failure(fbd)) return; diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c b/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c index 8ff07b5562e3..4284b3cb7fcc 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c @@ -6,6 +6,7 @@ #include <net/ipv6.h> #include "fbnic.h" +#include "fbnic_fw.h" #include "fbnic_netdev.h" #include "fbnic_rpc.h" @@ -71,6 +72,8 @@ u16 fbnic_flow_hash_2_rss_en_mask(struct fbnic_net *fbn, int flow_type) rss_en_mask |= FBNIC_FH_2_RSSEM_BIT(IP_DST, IP_DST, flow_hash); rss_en_mask |= FBNIC_FH_2_RSSEM_BIT(L4_B_0_1, L4_SRC, flow_hash); rss_en_mask |= FBNIC_FH_2_RSSEM_BIT(L4_B_2_3, L4_DST, flow_hash); + rss_en_mask |= FBNIC_FH_2_RSSEM_BIT(IP6_FL, OV6_FL_LBL, flow_hash); + rss_en_mask |= FBNIC_FH_2_RSSEM_BIT(IP6_FL, IV6_FL_LBL, flow_hash); return rss_en_mask; } @@ -129,12 +132,9 @@ void fbnic_bmc_rpc_all_multi_config(struct fbnic_dev *fbd, else clear_bit(FBNIC_MAC_ADDR_T_ALLMULTI, mac_addr->act_tcam); - } else if (!test_bit(FBNIC_MAC_ADDR_T_BMC, mac_addr->act_tcam) && - !is_zero_ether_addr(mac_addr->mask.addr8) && - mac_addr->state == FBNIC_TCAM_S_VALID) { - clear_bit(FBNIC_MAC_ADDR_T_ALLMULTI, mac_addr->act_tcam); - clear_bit(FBNIC_MAC_ADDR_T_BMC, mac_addr->act_tcam); - mac_addr->state = FBNIC_TCAM_S_DELETE; + } else { + __fbnic_xc_unsync(mac_addr, FBNIC_MAC_ADDR_T_BMC); + __fbnic_xc_unsync(mac_addr, FBNIC_MAC_ADDR_T_ALLMULTI); } /* We have to add a special handler for multicast as the @@ -236,8 +236,25 @@ void fbnic_bmc_rpc_init(struct fbnic_dev *fbd) act_tcam->mask.tcam[j] = 0xffff; act_tcam->state = FBNIC_TCAM_S_UPDATE; +} - fbnic_bmc_rpc_all_multi_config(fbd, false); +void fbnic_bmc_rpc_check(struct fbnic_dev *fbd) +{ + int err; + + if (fbd->fw_cap.need_bmc_tcam_reinit) { + fbnic_bmc_rpc_init(fbd); + __fbnic_set_rx_mode(fbd); + fbd->fw_cap.need_bmc_tcam_reinit = false; + } + + if (fbd->fw_cap.need_bmc_macda_sync) { + err = fbnic_fw_xmit_rpc_macda_sync(fbd); + if (err) + dev_warn(fbd->dev, + "Writing MACDA table to FW failed, err: %d\n", err); + fbd->fw_cap.need_bmc_macda_sync = false; + } } #define FBNIC_ACT1_INIT(_l4, _udp, _ip, _v6) \ @@ -452,6 +469,50 @@ int __fbnic_xc_unsync(struct fbnic_mac_addr *mac_addr, unsigned int tcam_idx) return 0; } +void fbnic_promisc_sync(struct fbnic_dev *fbd, + bool uc_promisc, bool mc_promisc) +{ + struct fbnic_mac_addr *mac_addr; + + /* Populate last TCAM entry with promiscuous entry and 0/1 bit mask */ + mac_addr = &fbd->mac_addr[FBNIC_RPC_TCAM_MACDA_PROMISC_IDX]; + if (uc_promisc) { + if (!is_zero_ether_addr(mac_addr->value.addr8) || + mac_addr->state != FBNIC_TCAM_S_VALID) { + eth_zero_addr(mac_addr->value.addr8); + eth_broadcast_addr(mac_addr->mask.addr8); + clear_bit(FBNIC_MAC_ADDR_T_ALLMULTI, + mac_addr->act_tcam); + set_bit(FBNIC_MAC_ADDR_T_PROMISC, + mac_addr->act_tcam); + mac_addr->state = FBNIC_TCAM_S_ADD; + } + } else if (mc_promisc && + (!fbnic_bmc_present(fbd) || !fbd->fw_cap.all_multi)) { + /* We have to add a special handler for multicast as the + * BMC may have an all-multi rule already in place. As such + * adding a rule ourselves won't do any good so we will have + * to modify the rules for the ALL MULTI below if the BMC + * already has the rule in place. + */ + if (!is_multicast_ether_addr(mac_addr->value.addr8) || + mac_addr->state != FBNIC_TCAM_S_VALID) { + eth_zero_addr(mac_addr->value.addr8); + eth_broadcast_addr(mac_addr->mask.addr8); + mac_addr->value.addr8[0] ^= 1; + mac_addr->mask.addr8[0] ^= 1; + set_bit(FBNIC_MAC_ADDR_T_ALLMULTI, + mac_addr->act_tcam); + clear_bit(FBNIC_MAC_ADDR_T_PROMISC, + mac_addr->act_tcam); + mac_addr->state = FBNIC_TCAM_S_ADD; + } + } else if (mac_addr->state == FBNIC_TCAM_S_VALID) { + __fbnic_xc_unsync(mac_addr, FBNIC_MAC_ADDR_T_ALLMULTI); + __fbnic_xc_unsync(mac_addr, FBNIC_MAC_ADDR_T_PROMISC); + } +} + void fbnic_sift_macda(struct fbnic_dev *fbd) { int dest, src; @@ -556,7 +617,7 @@ static void fbnic_write_macda_entry(struct fbnic_dev *fbd, unsigned int idx, void fbnic_write_macda(struct fbnic_dev *fbd) { - int idx; + int idx, updates = 0; for (idx = ARRAY_SIZE(fbd->mac_addr); idx--;) { struct fbnic_mac_addr *mac_addr = &fbd->mac_addr[idx]; @@ -565,6 +626,9 @@ void fbnic_write_macda(struct fbnic_dev *fbd) if (!(mac_addr->state & FBNIC_TCAM_S_UPDATE)) continue; + /* Record update count */ + updates++; + /* Clear by writing 0s. */ if (mac_addr->state == FBNIC_TCAM_S_DELETE) { /* Invalidate entry and clear addr state info */ @@ -578,6 +642,14 @@ void fbnic_write_macda(struct fbnic_dev *fbd) mac_addr->state = FBNIC_TCAM_S_VALID; } + + /* If reinitializing the BMC TCAM we are doing an initial update */ + if (fbd->fw_cap.need_bmc_tcam_reinit) + updates++; + + /* If needed notify firmware of changes to MACDA TCAM */ + if (updates != 0 && fbnic_bmc_present(fbd)) + fbd->fw_cap.need_bmc_macda_sync = true; } static void fbnic_clear_act_tcam(struct fbnic_dev *fbd, unsigned int idx) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_rpc.h b/drivers/net/ethernet/meta/fbnic/fbnic_rpc.h index 6892414195c3..3d4925b2ac75 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_rpc.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_rpc.h @@ -184,6 +184,7 @@ struct fbnic_net; void fbnic_bmc_rpc_init(struct fbnic_dev *fbd); void fbnic_bmc_rpc_all_multi_config(struct fbnic_dev *fbd, bool enable_host); +void fbnic_bmc_rpc_check(struct fbnic_dev *fbd); void fbnic_reset_indir_tbl(struct fbnic_net *fbn); void fbnic_rss_key_fill(u32 *buffer); @@ -201,6 +202,9 @@ struct fbnic_mac_addr *__fbnic_mc_sync(struct fbnic_dev *fbd, void fbnic_sift_macda(struct fbnic_dev *fbd); void fbnic_write_macda(struct fbnic_dev *fbd); +void fbnic_promisc_sync(struct fbnic_dev *fbd, + bool uc_promisc, bool mc_promisc); + struct fbnic_ip_addr *__fbnic_ip4_sync(struct fbnic_dev *fbd, struct fbnic_ip_addr *ip_addr, const struct in_addr *addr, diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c index f9543d03485f..493f7f4df013 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c @@ -2,11 +2,14 @@ /* Copyright (c) Meta Platforms, Inc. and affiliates. */ #include <linux/bitfield.h> +#include <linux/bpf.h> +#include <linux/bpf_trace.h> #include <linux/iopoll.h> #include <linux/pci.h> #include <net/netdev_queues.h> #include <net/page_pool/helpers.h> #include <net/tcp.h> +#include <net/xdp.h> #include "fbnic.h" #include "fbnic_csr.h" @@ -14,6 +17,13 @@ #include "fbnic_txrx.h" enum { + FBNIC_XDP_PASS = 0, + FBNIC_XDP_CONSUME, + FBNIC_XDP_TX, + FBNIC_XDP_LEN_ERR, +}; + +enum { FBNIC_XMIT_CB_TS = 0x01, }; @@ -606,6 +616,54 @@ static void fbnic_clean_twq0(struct fbnic_napi_vector *nv, int napi_budget, } } +static void fbnic_clean_twq1(struct fbnic_napi_vector *nv, bool pp_allow_direct, + struct fbnic_ring *ring, bool discard, + unsigned int hw_head) +{ + u64 total_bytes = 0, total_packets = 0; + unsigned int head = ring->head; + + while (hw_head != head) { + struct page *page; + u64 twd; + + if (unlikely(!(ring->desc[head] & FBNIC_TWD_TYPE(AL)))) + goto next_desc; + + twd = le64_to_cpu(ring->desc[head]); + page = ring->tx_buf[head]; + + /* TYPE_AL is 2, TYPE_LAST_AL is 3. So this trick gives + * us one increment per packet, with no branches. + */ + total_packets += FIELD_GET(FBNIC_TWD_TYPE_MASK, twd) - + FBNIC_TWD_TYPE_AL; + total_bytes += FIELD_GET(FBNIC_TWD_LEN_MASK, twd); + + page_pool_put_page(page->pp, page, -1, pp_allow_direct); +next_desc: + head++; + head &= ring->size_mask; + } + + if (!total_bytes) + return; + + ring->head = head; + + if (discard) { + u64_stats_update_begin(&ring->stats.syncp); + ring->stats.dropped += total_packets; + u64_stats_update_end(&ring->stats.syncp); + return; + } + + u64_stats_update_begin(&ring->stats.syncp); + ring->stats.bytes += total_bytes; + ring->stats.packets += total_packets; + u64_stats_update_end(&ring->stats.syncp); +} + static void fbnic_clean_tsq(struct fbnic_napi_vector *nv, struct fbnic_ring *ring, u64 tcd, int *ts_head, int *head0) @@ -657,44 +715,65 @@ static void fbnic_clean_tsq(struct fbnic_napi_vector *nv, } static void fbnic_page_pool_init(struct fbnic_ring *ring, unsigned int idx, - struct page *page) + netmem_ref netmem) { struct fbnic_rx_buf *rx_buf = &ring->rx_buf[idx]; - page_pool_fragment_page(page, FBNIC_PAGECNT_BIAS_MAX); + page_pool_fragment_netmem(netmem, FBNIC_PAGECNT_BIAS_MAX); rx_buf->pagecnt_bias = FBNIC_PAGECNT_BIAS_MAX; - rx_buf->page = page; + rx_buf->netmem = netmem; } -static struct page *fbnic_page_pool_get(struct fbnic_ring *ring, - unsigned int idx) +static struct page * +fbnic_page_pool_get_head(struct fbnic_q_triad *qt, unsigned int idx) { - struct fbnic_rx_buf *rx_buf = &ring->rx_buf[idx]; + struct fbnic_rx_buf *rx_buf = &qt->sub0.rx_buf[idx]; rx_buf->pagecnt_bias--; - return rx_buf->page; + /* sub0 is always fed system pages, from the NAPI-level page_pool */ + return netmem_to_page(rx_buf->netmem); +} + +static netmem_ref +fbnic_page_pool_get_data(struct fbnic_q_triad *qt, unsigned int idx) +{ + struct fbnic_rx_buf *rx_buf = &qt->sub1.rx_buf[idx]; + + rx_buf->pagecnt_bias--; + + return rx_buf->netmem; } static void fbnic_page_pool_drain(struct fbnic_ring *ring, unsigned int idx, - struct fbnic_napi_vector *nv, int budget) + int budget) { struct fbnic_rx_buf *rx_buf = &ring->rx_buf[idx]; - struct page *page = rx_buf->page; + netmem_ref netmem = rx_buf->netmem; - if (!page_pool_unref_page(page, rx_buf->pagecnt_bias)) - page_pool_put_unrefed_page(nv->page_pool, page, -1, !!budget); + if (!page_pool_unref_netmem(netmem, rx_buf->pagecnt_bias)) + page_pool_put_unrefed_netmem(ring->page_pool, netmem, -1, + !!budget); - rx_buf->page = NULL; + rx_buf->netmem = 0; } static void fbnic_clean_twq(struct fbnic_napi_vector *nv, int napi_budget, - struct fbnic_q_triad *qt, s32 ts_head, s32 head0) + struct fbnic_q_triad *qt, s32 ts_head, s32 head0, + s32 head1) { if (head0 >= 0) fbnic_clean_twq0(nv, napi_budget, &qt->sub0, false, head0); else if (ts_head >= 0) fbnic_clean_twq0(nv, napi_budget, &qt->sub0, false, ts_head); + + if (head1 >= 0) { + qt->cmpl.deferred_head = -1; + if (napi_budget) + fbnic_clean_twq1(nv, true, &qt->sub1, false, head1); + else + qt->cmpl.deferred_head = head1; + } } static void @@ -702,6 +781,7 @@ fbnic_clean_tcq(struct fbnic_napi_vector *nv, struct fbnic_q_triad *qt, int napi_budget) { struct fbnic_ring *cmpl = &qt->cmpl; + s32 head1 = cmpl->deferred_head; s32 head0 = -1, ts_head = -1; __le64 *raw_tcd, done; u32 head = cmpl->head; @@ -719,7 +799,10 @@ fbnic_clean_tcq(struct fbnic_napi_vector *nv, struct fbnic_q_triad *qt, switch (FIELD_GET(FBNIC_TCD_TYPE_MASK, tcd)) { case FBNIC_TCD_TYPE_0: - if (!(tcd & FBNIC_TCD_TWQ1)) + if (tcd & FBNIC_TCD_TWQ1) + head1 = FIELD_GET(FBNIC_TCD_TYPE0_HEAD1_MASK, + tcd); + else head0 = FIELD_GET(FBNIC_TCD_TYPE0_HEAD0_MASK, tcd); /* Currently all err status bits are related to @@ -752,11 +835,11 @@ fbnic_clean_tcq(struct fbnic_napi_vector *nv, struct fbnic_q_triad *qt, } /* Unmap and free processed buffers */ - fbnic_clean_twq(nv, napi_budget, qt, ts_head, head0); + fbnic_clean_twq(nv, napi_budget, qt, ts_head, head0, head1); } -static void fbnic_clean_bdq(struct fbnic_napi_vector *nv, int napi_budget, - struct fbnic_ring *ring, unsigned int hw_head) +static void fbnic_clean_bdq(struct fbnic_ring *ring, unsigned int hw_head, + int napi_budget) { unsigned int head = ring->head; @@ -764,7 +847,7 @@ static void fbnic_clean_bdq(struct fbnic_napi_vector *nv, int napi_budget, return; do { - fbnic_page_pool_drain(ring, head, nv, napi_budget); + fbnic_page_pool_drain(ring, head, napi_budget); head++; head &= ring->size_mask; @@ -773,10 +856,10 @@ static void fbnic_clean_bdq(struct fbnic_napi_vector *nv, int napi_budget, ring->head = head; } -static void fbnic_bd_prep(struct fbnic_ring *bdq, u16 id, struct page *page) +static void fbnic_bd_prep(struct fbnic_ring *bdq, u16 id, netmem_ref netmem) { __le64 *bdq_desc = &bdq->desc[id * FBNIC_BD_FRAG_COUNT]; - dma_addr_t dma = page_pool_get_dma_addr(page); + dma_addr_t dma = page_pool_get_dma_addr_netmem(netmem); u64 bd, i = FBNIC_BD_FRAG_COUNT; bd = (FBNIC_BD_PAGE_ADDR_MASK & dma) | @@ -794,7 +877,7 @@ static void fbnic_bd_prep(struct fbnic_ring *bdq, u16 id, struct page *page) } while (--i); } -static void fbnic_fill_bdq(struct fbnic_napi_vector *nv, struct fbnic_ring *bdq) +static void fbnic_fill_bdq(struct fbnic_ring *bdq) { unsigned int count = fbnic_desc_unused(bdq); unsigned int i = bdq->tail; @@ -803,10 +886,10 @@ static void fbnic_fill_bdq(struct fbnic_napi_vector *nv, struct fbnic_ring *bdq) return; do { - struct page *page; + netmem_ref netmem; - page = page_pool_dev_alloc_pages(nv->page_pool); - if (!page) { + netmem = page_pool_dev_alloc_netmems(bdq->page_pool); + if (!netmem) { u64_stats_update_begin(&bdq->stats.syncp); bdq->stats.rx.alloc_failed++; u64_stats_update_end(&bdq->stats.syncp); @@ -814,8 +897,8 @@ static void fbnic_fill_bdq(struct fbnic_napi_vector *nv, struct fbnic_ring *bdq) break; } - fbnic_page_pool_init(bdq, i, page); - fbnic_bd_prep(bdq, i, page); + fbnic_page_pool_init(bdq, i, netmem); + fbnic_bd_prep(bdq, i, netmem); i++; i &= bdq->size_mask; @@ -862,7 +945,7 @@ static void fbnic_pkt_prepare(struct fbnic_napi_vector *nv, u64 rcd, { unsigned int hdr_pg_idx = FIELD_GET(FBNIC_RCD_AL_BUFF_PAGE_MASK, rcd); unsigned int hdr_pg_off = FIELD_GET(FBNIC_RCD_AL_BUFF_OFF_MASK, rcd); - struct page *page = fbnic_page_pool_get(&qt->sub0, hdr_pg_idx); + struct page *page = fbnic_page_pool_get_head(qt, hdr_pg_idx); unsigned int len = FIELD_GET(FBNIC_RCD_AL_BUFF_LEN_MASK, rcd); unsigned int frame_sz, hdr_pg_start, hdr_pg_end, headroom; unsigned char *hdr_start; @@ -877,7 +960,7 @@ static void fbnic_pkt_prepare(struct fbnic_napi_vector *nv, u64 rcd, headroom = hdr_pg_off - hdr_pg_start + FBNIC_RX_PAD; frame_sz = hdr_pg_end - hdr_pg_start; - xdp_init_buff(&pkt->buff, frame_sz, NULL); + xdp_init_buff(&pkt->buff, frame_sz, &qt->xdp_rxq); hdr_pg_start += (FBNIC_RCD_AL_BUFF_FRAG_MASK & rcd) * FBNIC_BD_FRAG_SIZE; @@ -888,13 +971,12 @@ static void fbnic_pkt_prepare(struct fbnic_napi_vector *nv, u64 rcd, /* Build frame around buffer */ hdr_start = page_address(page) + hdr_pg_start; - + net_prefetch(pkt->buff.data); xdp_prepare_buff(&pkt->buff, hdr_start, headroom, len - FBNIC_RX_PAD, true); - pkt->data_truesize = 0; - pkt->data_len = 0; - pkt->nr_frags = 0; + pkt->hwtstamp = 0; + pkt->add_frag_failed = false; } static void fbnic_add_rx_frag(struct fbnic_napi_vector *nv, u64 rcd, @@ -904,9 +986,9 @@ static void fbnic_add_rx_frag(struct fbnic_napi_vector *nv, u64 rcd, unsigned int pg_idx = FIELD_GET(FBNIC_RCD_AL_BUFF_PAGE_MASK, rcd); unsigned int pg_off = FIELD_GET(FBNIC_RCD_AL_BUFF_OFF_MASK, rcd); unsigned int len = FIELD_GET(FBNIC_RCD_AL_BUFF_LEN_MASK, rcd); - struct page *page = fbnic_page_pool_get(&qt->sub1, pg_idx); - struct skb_shared_info *shinfo; + netmem_ref netmem = fbnic_page_pool_get_data(qt, pg_idx); unsigned int truesize; + bool added; truesize = FIELD_GET(FBNIC_RCD_AL_PAGE_FIN, rcd) ? FBNIC_BD_FRAG_SIZE - pg_off : ALIGN(len, 128); @@ -915,88 +997,171 @@ static void fbnic_add_rx_frag(struct fbnic_napi_vector *nv, u64 rcd, FBNIC_BD_FRAG_SIZE; /* Sync DMA buffer */ - dma_sync_single_range_for_cpu(nv->dev, page_pool_get_dma_addr(page), - pg_off, truesize, DMA_BIDIRECTIONAL); - - /* Add page to xdp shared info */ - shinfo = xdp_get_shared_info_from_buff(&pkt->buff); - - /* We use gso_segs to store truesize */ - pkt->data_truesize += truesize; - - __skb_fill_page_desc_noacc(shinfo, pkt->nr_frags++, page, pg_off, len); - - /* Store data_len in gso_size */ - pkt->data_len += len; + page_pool_dma_sync_netmem_for_cpu(qt->sub1.page_pool, netmem, + pg_off, truesize); + + added = xdp_buff_add_frag(&pkt->buff, netmem, pg_off, len, truesize); + if (unlikely(!added)) { + pkt->add_frag_failed = true; + netdev_err_once(nv->napi.dev, + "Failed to add fragment to xdp_buff\n"); + } } -static void fbnic_put_pkt_buff(struct fbnic_napi_vector *nv, +static void fbnic_put_pkt_buff(struct fbnic_q_triad *qt, struct fbnic_pkt_buff *pkt, int budget) { - struct skb_shared_info *shinfo; struct page *page; - int nr_frags; if (!pkt->buff.data_hard_start) return; - shinfo = xdp_get_shared_info_from_buff(&pkt->buff); - nr_frags = pkt->nr_frags; + if (xdp_buff_has_frags(&pkt->buff)) { + struct skb_shared_info *shinfo; + netmem_ref netmem; + int nr_frags; + + shinfo = xdp_get_shared_info_from_buff(&pkt->buff); + nr_frags = shinfo->nr_frags; - while (nr_frags--) { - page = skb_frag_page(&shinfo->frags[nr_frags]); - page_pool_put_full_page(nv->page_pool, page, !!budget); + while (nr_frags--) { + netmem = skb_frag_netmem(&shinfo->frags[nr_frags]); + page_pool_put_full_netmem(qt->sub1.page_pool, netmem, + !!budget); + } } page = virt_to_page(pkt->buff.data_hard_start); - page_pool_put_full_page(nv->page_pool, page, !!budget); + page_pool_put_full_page(qt->sub0.page_pool, page, !!budget); } static struct sk_buff *fbnic_build_skb(struct fbnic_napi_vector *nv, struct fbnic_pkt_buff *pkt) { - unsigned int nr_frags = pkt->nr_frags; - struct skb_shared_info *shinfo; - unsigned int truesize; struct sk_buff *skb; - truesize = xdp_data_hard_end(&pkt->buff) + FBNIC_RX_TROOM - - pkt->buff.data_hard_start; - - /* Build frame around buffer */ - skb = napi_build_skb(pkt->buff.data_hard_start, truesize); - if (unlikely(!skb)) + skb = xdp_build_skb_from_buff(&pkt->buff); + if (!skb) return NULL; - /* Push data pointer to start of data, put tail to end of data */ - skb_reserve(skb, pkt->buff.data - pkt->buff.data_hard_start); - __skb_put(skb, pkt->buff.data_end - pkt->buff.data); + /* Add timestamp if present */ + if (pkt->hwtstamp) + skb_hwtstamps(skb)->hwtstamp = pkt->hwtstamp; - /* Add tracking for metadata at the start of the frame */ - skb_metadata_set(skb, pkt->buff.data - pkt->buff.data_meta); + return skb; +} - /* Add Rx frags */ - if (nr_frags) { - /* Verify that shared info didn't move */ +static long fbnic_pkt_tx(struct fbnic_napi_vector *nv, + struct fbnic_pkt_buff *pkt) +{ + struct fbnic_ring *ring = &nv->qt[0].sub1; + int size, offset, nsegs = 1, data_len = 0; + unsigned int tail = ring->tail; + struct skb_shared_info *shinfo; + skb_frag_t *frag = NULL; + struct page *page; + dma_addr_t dma; + __le64 *twd; + + if (unlikely(xdp_buff_has_frags(&pkt->buff))) { shinfo = xdp_get_shared_info_from_buff(&pkt->buff); - WARN_ON(skb_shinfo(skb) != shinfo); + nsegs += shinfo->nr_frags; + data_len = shinfo->xdp_frags_size; + frag = &shinfo->frags[0]; + } - skb->truesize += pkt->data_truesize; - skb->data_len += pkt->data_len; - shinfo->nr_frags = nr_frags; - skb->len += pkt->data_len; + if (fbnic_desc_unused(ring) < nsegs) { + u64_stats_update_begin(&ring->stats.syncp); + ring->stats.dropped++; + u64_stats_update_end(&ring->stats.syncp); + return -FBNIC_XDP_CONSUME; } - skb_mark_for_recycle(skb); + page = virt_to_page(pkt->buff.data_hard_start); + offset = offset_in_page(pkt->buff.data); + dma = page_pool_get_dma_addr(page); - /* Set MAC header specific fields */ - skb->protocol = eth_type_trans(skb, nv->napi.dev); + size = pkt->buff.data_end - pkt->buff.data; - /* Add timestamp if present */ - if (pkt->hwtstamp) - skb_hwtstamps(skb)->hwtstamp = pkt->hwtstamp; + while (nsegs--) { + dma_sync_single_range_for_device(nv->dev, dma, offset, size, + DMA_BIDIRECTIONAL); + dma += offset; - return skb; + ring->tx_buf[tail] = page; + + twd = &ring->desc[tail]; + *twd = cpu_to_le64(FIELD_PREP(FBNIC_TWD_ADDR_MASK, dma) | + FIELD_PREP(FBNIC_TWD_LEN_MASK, size) | + FIELD_PREP(FBNIC_TWD_TYPE_MASK, + FBNIC_TWD_TYPE_AL)); + + tail++; + tail &= ring->size_mask; + + if (!data_len) + break; + + offset = skb_frag_off(frag); + page = skb_frag_page(frag); + dma = page_pool_get_dma_addr(page); + + size = skb_frag_size(frag); + data_len -= size; + frag++; + } + + *twd |= FBNIC_TWD_TYPE(LAST_AL); + + ring->tail = tail; + + return -FBNIC_XDP_TX; +} + +static void fbnic_pkt_commit_tail(struct fbnic_napi_vector *nv, + unsigned int pkt_tail) +{ + struct fbnic_ring *ring = &nv->qt[0].sub1; + + /* Force DMA writes to flush before writing to tail */ + dma_wmb(); + + writel(pkt_tail, ring->doorbell); +} + +static struct sk_buff *fbnic_run_xdp(struct fbnic_napi_vector *nv, + struct fbnic_pkt_buff *pkt) +{ + struct fbnic_net *fbn = netdev_priv(nv->napi.dev); + struct bpf_prog *xdp_prog; + int act; + + xdp_prog = READ_ONCE(fbn->xdp_prog); + if (!xdp_prog) + goto xdp_pass; + + /* Should never happen, config paths enforce HDS threshold > MTU */ + if (xdp_buff_has_frags(&pkt->buff) && !xdp_prog->aux->xdp_has_frags) + return ERR_PTR(-FBNIC_XDP_LEN_ERR); + + act = bpf_prog_run_xdp(xdp_prog, &pkt->buff); + switch (act) { + case XDP_PASS: +xdp_pass: + return fbnic_build_skb(nv, pkt); + case XDP_TX: + return ERR_PTR(fbnic_pkt_tx(nv, pkt)); + default: + bpf_warn_invalid_xdp_action(nv->napi.dev, xdp_prog, act); + fallthrough; + case XDP_ABORTED: + trace_xdp_exception(nv->napi.dev, xdp_prog, act); + fallthrough; + case XDP_DROP: + break; + } + + return ERR_PTR(-FBNIC_XDP_CONSUME); } static enum pkt_hash_types fbnic_skb_hash_type(u64 rcd) @@ -1050,10 +1215,10 @@ static int fbnic_clean_rcq(struct fbnic_napi_vector *nv, struct fbnic_q_triad *qt, int budget) { unsigned int packets = 0, bytes = 0, dropped = 0, alloc_failed = 0; - u64 csum_complete = 0, csum_none = 0; + u64 csum_complete = 0, csum_none = 0, length_errors = 0; + s32 head0 = -1, head1 = -1, pkt_tail = -1; struct fbnic_ring *rcq = &qt->cmpl; struct fbnic_pkt_buff *pkt; - s32 head0 = -1, head1 = -1; __le64 *raw_rcd, done; u32 head = rcq->head; @@ -1094,8 +1259,10 @@ static int fbnic_clean_rcq(struct fbnic_napi_vector *nv, /* We currently ignore the action table index */ break; case FBNIC_RCD_TYPE_META: - if (likely(!fbnic_rcd_metadata_err(rcd))) - skb = fbnic_build_skb(nv, pkt); + if (unlikely(pkt->add_frag_failed)) + skb = NULL; + else if (likely(!fbnic_rcd_metadata_err(rcd))) + skb = fbnic_run_xdp(nv, pkt); /* Populate skb and invalidate XDP */ if (!IS_ERR_OR_NULL(skb)) { @@ -1107,15 +1274,20 @@ static int fbnic_clean_rcq(struct fbnic_napi_vector *nv, bytes += skb->len; napi_gro_receive(&nv->napi, skb); + } else if (skb == ERR_PTR(-FBNIC_XDP_TX)) { + pkt_tail = nv->qt[0].sub1.tail; + bytes += xdp_get_buff_len(&pkt->buff); } else { if (!skb) { alloc_failed++; dropped++; + } else if (skb == ERR_PTR(-FBNIC_XDP_LEN_ERR)) { + length_errors++; } else { dropped++; } - fbnic_put_pkt_buff(nv, pkt, 1); + fbnic_put_pkt_buff(qt, pkt, 1); } pkt->buff.data_hard_start = NULL; @@ -1140,16 +1312,20 @@ static int fbnic_clean_rcq(struct fbnic_napi_vector *nv, rcq->stats.rx.alloc_failed += alloc_failed; rcq->stats.rx.csum_complete += csum_complete; rcq->stats.rx.csum_none += csum_none; + rcq->stats.rx.length_errors += length_errors; u64_stats_update_end(&rcq->stats.syncp); + if (pkt_tail >= 0) + fbnic_pkt_commit_tail(nv, pkt_tail); + /* Unmap and free processed buffers */ if (head0 >= 0) - fbnic_clean_bdq(nv, budget, &qt->sub0, head0); - fbnic_fill_bdq(nv, &qt->sub0); + fbnic_clean_bdq(&qt->sub0, head0, budget); + fbnic_fill_bdq(&qt->sub0); if (head1 >= 0) - fbnic_clean_bdq(nv, budget, &qt->sub1, head1); - fbnic_fill_bdq(nv, &qt->sub1); + fbnic_clean_bdq(&qt->sub1, head1, budget); + fbnic_fill_bdq(&qt->sub1); /* Record the current head/tail of the queue */ if (rcq->head != head) { @@ -1220,8 +1396,9 @@ void fbnic_aggregate_ring_rx_counters(struct fbnic_net *fbn, fbn->rx_stats.rx.alloc_failed += stats->rx.alloc_failed; fbn->rx_stats.rx.csum_complete += stats->rx.csum_complete; fbn->rx_stats.rx.csum_none += stats->rx.csum_none; + fbn->rx_stats.rx.length_errors += stats->rx.length_errors; /* Remember to add new stats here */ - BUILD_BUG_ON(sizeof(fbn->rx_stats.rx) / 8 != 3); + BUILD_BUG_ON(sizeof(fbn->rx_stats.rx) / 8 != 4); } void fbnic_aggregate_ring_tx_counters(struct fbnic_net *fbn, @@ -1243,6 +1420,22 @@ void fbnic_aggregate_ring_tx_counters(struct fbnic_net *fbn, BUILD_BUG_ON(sizeof(fbn->tx_stats.twq) / 8 != 6); } +static void fbnic_aggregate_ring_xdp_counters(struct fbnic_net *fbn, + struct fbnic_ring *xdpr) +{ + struct fbnic_queue_stats *stats = &xdpr->stats; + + if (!(xdpr->flags & FBNIC_RING_F_STATS)) + return; + + /* Capture stats from queues before dissasociating them */ + fbn->rx_stats.bytes += stats->bytes; + fbn->rx_stats.packets += stats->packets; + fbn->rx_stats.dropped += stats->dropped; + fbn->tx_stats.bytes += stats->bytes; + fbn->tx_stats.packets += stats->packets; +} + static void fbnic_remove_tx_ring(struct fbnic_net *fbn, struct fbnic_ring *txr) { @@ -1256,6 +1449,19 @@ static void fbnic_remove_tx_ring(struct fbnic_net *fbn, fbn->tx[txr->q_idx] = NULL; } +static void fbnic_remove_xdp_ring(struct fbnic_net *fbn, + struct fbnic_ring *xdpr) +{ + if (!(xdpr->flags & FBNIC_RING_F_STATS)) + return; + + fbnic_aggregate_ring_xdp_counters(fbn, xdpr); + + /* Remove pointer to the Tx ring */ + WARN_ON(fbn->tx[xdpr->q_idx] && fbn->tx[xdpr->q_idx] != xdpr); + fbn->tx[xdpr->q_idx] = NULL; +} + static void fbnic_remove_rx_ring(struct fbnic_net *fbn, struct fbnic_ring *rxr) { @@ -1269,6 +1475,12 @@ static void fbnic_remove_rx_ring(struct fbnic_net *fbn, fbn->rx[rxr->q_idx] = NULL; } +static void fbnic_free_qt_page_pools(struct fbnic_q_triad *qt) +{ + page_pool_destroy(qt->sub0.page_pool); + page_pool_destroy(qt->sub1.page_pool); +} + static void fbnic_free_napi_vector(struct fbnic_net *fbn, struct fbnic_napi_vector *nv) { @@ -1277,6 +1489,7 @@ static void fbnic_free_napi_vector(struct fbnic_net *fbn, for (i = 0; i < nv->txt_count; i++) { fbnic_remove_tx_ring(fbn, &nv->qt[i].sub0); + fbnic_remove_xdp_ring(fbn, &nv->qt[i].sub1); fbnic_remove_tx_ring(fbn, &nv->qt[i].cmpl); } @@ -1287,8 +1500,7 @@ static void fbnic_free_napi_vector(struct fbnic_net *fbn, } fbnic_napi_free_irq(fbd, nv); - page_pool_destroy(nv->page_pool); - netif_napi_del(&nv->napi); + netif_napi_del_locked(&nv->napi); fbn->napi[fbnic_napi_idx(nv)] = NULL; kfree(nv); } @@ -1302,23 +1514,22 @@ void fbnic_free_napi_vectors(struct fbnic_net *fbn) fbnic_free_napi_vector(fbn, fbn->napi[i]); } -#define FBNIC_PAGE_POOL_FLAGS \ - (PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV) - -static int fbnic_alloc_nv_page_pool(struct fbnic_net *fbn, - struct fbnic_napi_vector *nv) +static int +fbnic_alloc_qt_page_pools(struct fbnic_net *fbn, struct fbnic_q_triad *qt, + unsigned int rxq_idx) { struct page_pool_params pp_params = { .order = 0, - .flags = FBNIC_PAGE_POOL_FLAGS, - .pool_size = (fbn->hpq_size + fbn->ppq_size) * nv->rxt_count, + .flags = PP_FLAG_DMA_MAP | + PP_FLAG_DMA_SYNC_DEV, + .pool_size = fbn->hpq_size + fbn->ppq_size, .nid = NUMA_NO_NODE, - .dev = nv->dev, + .dev = fbn->netdev->dev.parent, .dma_dir = DMA_BIDIRECTIONAL, .offset = 0, .max_len = PAGE_SIZE, - .napi = &nv->napi, .netdev = fbn->netdev, + .queue_idx = rxq_idx, }; struct page_pool *pp; @@ -1338,9 +1549,24 @@ static int fbnic_alloc_nv_page_pool(struct fbnic_net *fbn, if (IS_ERR(pp)) return PTR_ERR(pp); - nv->page_pool = pp; + qt->sub0.page_pool = pp; + if (netif_rxq_has_unreadable_mp(fbn->netdev, rxq_idx)) { + pp_params.flags |= PP_FLAG_ALLOW_UNREADABLE_NETMEM; + pp_params.dma_dir = DMA_FROM_DEVICE; + + pp = page_pool_create(&pp_params); + if (IS_ERR(pp)) + goto err_destroy_sub0; + } else { + page_pool_get(pp); + } + qt->sub1.page_pool = pp; return 0; + +err_destroy_sub0: + page_pool_destroy(pp); + return PTR_ERR(pp); } static void fbnic_ring_init(struct fbnic_ring *ring, u32 __iomem *doorbell, @@ -1350,6 +1576,7 @@ static void fbnic_ring_init(struct fbnic_ring *ring, u32 __iomem *doorbell, ring->doorbell = doorbell; ring->q_idx = q_idx; ring->flags = flags; + ring->deferred_head = -1; } static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn, @@ -1359,11 +1586,18 @@ static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn, { int txt_count = txq_count, rxt_count = rxq_count; u32 __iomem *uc_addr = fbd->uc_addr0; + int xdp_count = 0, qt_count, err; struct fbnic_napi_vector *nv; struct fbnic_q_triad *qt; - int qt_count, err; u32 __iomem *db; + /* We need to reserve at least one Tx Queue Triad for an XDP ring */ + if (rxq_count) { + xdp_count = 1; + if (!txt_count) + txt_count = 1; + } + qt_count = txt_count + rxq_count; if (!qt_count) return -EINVAL; @@ -1387,37 +1621,32 @@ static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn, /* Tie napi to netdev */ fbn->napi[fbnic_napi_idx(nv)] = nv; - netif_napi_add(fbn->netdev, &nv->napi, fbnic_poll); + netif_napi_add_locked(fbn->netdev, &nv->napi, fbnic_poll); /* Record IRQ to NAPI struct */ - netif_napi_set_irq(&nv->napi, - pci_irq_vector(to_pci_dev(fbd->dev), nv->v_idx)); + netif_napi_set_irq_locked(&nv->napi, + pci_irq_vector(to_pci_dev(fbd->dev), + nv->v_idx)); /* Tie nv back to PCIe dev */ nv->dev = fbd->dev; - /* Allocate page pool */ - if (rxq_count) { - err = fbnic_alloc_nv_page_pool(fbn, nv); - if (err) - goto napi_del; - } - /* Request the IRQ for napi vector */ err = fbnic_napi_request_irq(fbd, nv); if (err) - goto pp_destroy; + goto napi_del; /* Initialize queue triads */ qt = nv->qt; while (txt_count) { + u8 flags = FBNIC_RING_F_CTX | FBNIC_RING_F_STATS; + /* Configure Tx queue */ db = &uc_addr[FBNIC_QUEUE(txq_idx) + FBNIC_QUEUE_TWQ0_TAIL]; /* Assign Tx queue to netdev if applicable */ if (txq_count > 0) { - u8 flags = FBNIC_RING_F_CTX | FBNIC_RING_F_STATS; fbnic_ring_init(&qt->sub0, db, txq_idx, flags); fbn->tx[txq_idx] = &qt->sub0; @@ -1427,6 +1656,28 @@ static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn, FBNIC_RING_F_DISABLED); } + /* Configure XDP queue */ + db = &uc_addr[FBNIC_QUEUE(txq_idx) + FBNIC_QUEUE_TWQ1_TAIL]; + + /* Assign XDP queue to netdev if applicable + * + * The setup for this is in itself a bit different. + * 1. We only need one XDP Tx queue per NAPI vector. + * 2. We associate it to the first Rx queue index. + * 3. The hardware side is associated based on the Tx Queue. + * 4. The netdev queue is offset by FBNIC_MAX_TXQs. + */ + if (xdp_count > 0) { + unsigned int xdp_idx = FBNIC_MAX_TXQS + rxq_idx; + + fbnic_ring_init(&qt->sub1, db, xdp_idx, flags); + fbn->tx[xdp_idx] = &qt->sub1; + xdp_count--; + } else { + fbnic_ring_init(&qt->sub1, db, 0, + FBNIC_RING_F_DISABLED); + } + /* Configure Tx completion queue */ db = &uc_addr[FBNIC_QUEUE(txq_idx) + FBNIC_QUEUE_TCQ_HEAD]; fbnic_ring_init(&qt->cmpl, db, 0, 0); @@ -1463,10 +1714,8 @@ static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn, return 0; -pp_destroy: - page_pool_destroy(nv->page_pool); napi_del: - netif_napi_del(&nv->napi); + netif_napi_del_locked(&nv->napi); fbn->napi[fbnic_napi_idx(nv)] = NULL; kfree(nv); return err; @@ -1680,6 +1929,12 @@ static void fbnic_free_qt_resources(struct fbnic_net *fbn, fbnic_free_ring_resources(dev, &qt->cmpl); fbnic_free_ring_resources(dev, &qt->sub1); fbnic_free_ring_resources(dev, &qt->sub0); + + if (xdp_rxq_info_is_reg(&qt->xdp_rxq)) { + xdp_rxq_info_unreg_mem_model(&qt->xdp_rxq); + xdp_rxq_info_unreg(&qt->xdp_rxq); + fbnic_free_qt_page_pools(qt); + } } static int fbnic_alloc_tx_qt_resources(struct fbnic_net *fbn, @@ -1692,6 +1947,10 @@ static int fbnic_alloc_tx_qt_resources(struct fbnic_net *fbn, if (err) return err; + err = fbnic_alloc_tx_ring_resources(fbn, &qt->sub1); + if (err) + goto free_sub0; + err = fbnic_alloc_tx_ring_resources(fbn, &qt->cmpl); if (err) goto free_sub1; @@ -1699,20 +1958,37 @@ static int fbnic_alloc_tx_qt_resources(struct fbnic_net *fbn, return 0; free_sub1: + fbnic_free_ring_resources(dev, &qt->sub1); +free_sub0: fbnic_free_ring_resources(dev, &qt->sub0); return err; } static int fbnic_alloc_rx_qt_resources(struct fbnic_net *fbn, + struct fbnic_napi_vector *nv, struct fbnic_q_triad *qt) { struct device *dev = fbn->netdev->dev.parent; int err; - err = fbnic_alloc_rx_ring_resources(fbn, &qt->sub0); + err = fbnic_alloc_qt_page_pools(fbn, qt, qt->cmpl.q_idx); if (err) return err; + err = xdp_rxq_info_reg(&qt->xdp_rxq, fbn->netdev, qt->sub0.q_idx, + nv->napi.napi_id); + if (err) + goto free_page_pools; + + err = xdp_rxq_info_reg_mem_model(&qt->xdp_rxq, MEM_TYPE_PAGE_POOL, + qt->sub0.page_pool); + if (err) + goto unreg_rxq; + + err = fbnic_alloc_rx_ring_resources(fbn, &qt->sub0); + if (err) + goto unreg_mm; + err = fbnic_alloc_rx_ring_resources(fbn, &qt->sub1); if (err) goto free_sub0; @@ -1727,19 +2003,21 @@ free_sub1: fbnic_free_ring_resources(dev, &qt->sub1); free_sub0: fbnic_free_ring_resources(dev, &qt->sub0); +unreg_mm: + xdp_rxq_info_unreg_mem_model(&qt->xdp_rxq); +unreg_rxq: + xdp_rxq_info_unreg(&qt->xdp_rxq); +free_page_pools: + fbnic_free_qt_page_pools(qt); return err; } static void fbnic_free_nv_resources(struct fbnic_net *fbn, struct fbnic_napi_vector *nv) { - int i, j; - - /* Free Tx Resources */ - for (i = 0; i < nv->txt_count; i++) - fbnic_free_qt_resources(fbn, &nv->qt[i]); + int i; - for (j = 0; j < nv->rxt_count; j++, i++) + for (i = 0; i < nv->txt_count + nv->rxt_count; i++) fbnic_free_qt_resources(fbn, &nv->qt[i]); } @@ -1752,19 +2030,19 @@ static int fbnic_alloc_nv_resources(struct fbnic_net *fbn, for (i = 0; i < nv->txt_count; i++) { err = fbnic_alloc_tx_qt_resources(fbn, &nv->qt[i]); if (err) - goto free_resources; + goto free_qt_resources; } /* Allocate Rx Resources */ for (j = 0; j < nv->rxt_count; j++, i++) { - err = fbnic_alloc_rx_qt_resources(fbn, &nv->qt[i]); + err = fbnic_alloc_rx_qt_resources(fbn, nv, &nv->qt[i]); if (err) - goto free_resources; + goto free_qt_resources; } return 0; -free_resources: +free_qt_resources: while (i--) fbnic_free_qt_resources(fbn, &nv->qt[i]); return err; @@ -1871,6 +2149,15 @@ static void fbnic_disable_twq0(struct fbnic_ring *txr) fbnic_ring_wr32(txr, FBNIC_QUEUE_TWQ0_CTL, twq_ctl); } +static void fbnic_disable_twq1(struct fbnic_ring *txr) +{ + u32 twq_ctl = fbnic_ring_rd32(txr, FBNIC_QUEUE_TWQ1_CTL); + + twq_ctl &= ~FBNIC_QUEUE_TWQ_CTL_ENABLE; + + fbnic_ring_wr32(txr, FBNIC_QUEUE_TWQ1_CTL, twq_ctl); +} + static void fbnic_disable_tcq(struct fbnic_ring *txr) { fbnic_ring_wr32(txr, FBNIC_QUEUE_TCQ_CTL, 0); @@ -1897,36 +2184,48 @@ void fbnic_napi_disable(struct fbnic_net *fbn) int i; for (i = 0; i < fbn->num_napi; i++) { - napi_disable(&fbn->napi[i]->napi); + napi_disable_locked(&fbn->napi[i]->napi); fbnic_nv_irq_disable(fbn->napi[i]); } } -void fbnic_disable(struct fbnic_net *fbn) +static void __fbnic_nv_disable(struct fbnic_napi_vector *nv) { - struct fbnic_dev *fbd = fbn->fbd; - int i, j, t; - - for (i = 0; i < fbn->num_napi; i++) { - struct fbnic_napi_vector *nv = fbn->napi[i]; + int i, t; - /* Disable Tx queue triads */ - for (t = 0; t < nv->txt_count; t++) { - struct fbnic_q_triad *qt = &nv->qt[t]; + /* Disable Tx queue triads */ + for (t = 0; t < nv->txt_count; t++) { + struct fbnic_q_triad *qt = &nv->qt[t]; - fbnic_disable_twq0(&qt->sub0); - fbnic_disable_tcq(&qt->cmpl); - } + fbnic_disable_twq0(&qt->sub0); + fbnic_disable_twq1(&qt->sub1); + fbnic_disable_tcq(&qt->cmpl); + } - /* Disable Rx queue triads */ - for (j = 0; j < nv->rxt_count; j++, t++) { - struct fbnic_q_triad *qt = &nv->qt[t]; + /* Disable Rx queue triads */ + for (i = 0; i < nv->rxt_count; i++, t++) { + struct fbnic_q_triad *qt = &nv->qt[t]; - fbnic_disable_bdq(&qt->sub0, &qt->sub1); - fbnic_disable_rcq(&qt->cmpl); - } + fbnic_disable_bdq(&qt->sub0, &qt->sub1); + fbnic_disable_rcq(&qt->cmpl); } +} + +static void +fbnic_nv_disable(struct fbnic_net *fbn, struct fbnic_napi_vector *nv) +{ + __fbnic_nv_disable(nv); + fbnic_wrfl(fbn->fbd); +} + +void fbnic_disable(struct fbnic_net *fbn) +{ + struct fbnic_dev *fbd = fbn->fbd; + int i; + + for (i = 0; i < fbn->num_napi; i++) + __fbnic_nv_disable(fbn->napi[i]); fbnic_wrfl(fbd); } @@ -2015,73 +2314,119 @@ int fbnic_wait_all_queues_idle(struct fbnic_dev *fbd, bool may_fail) return err; } -void fbnic_flush(struct fbnic_net *fbn) +static int +fbnic_wait_queue_idle(struct fbnic_net *fbn, bool rx, unsigned int idx) { - int i; + static const unsigned int tx_regs[] = { + FBNIC_QM_TWQ_IDLE(0), FBNIC_QM_TQS_IDLE(0), + FBNIC_QM_TDE_IDLE(0), FBNIC_QM_TCQ_IDLE(0), + }, rx_regs[] = { + FBNIC_QM_HPQ_IDLE(0), FBNIC_QM_PPQ_IDLE(0), + FBNIC_QM_RCQ_IDLE(0), + }; + struct fbnic_dev *fbd = fbn->fbd; + unsigned int val, mask, off; + const unsigned int *regs; + unsigned int reg_cnt; + int i, err; - for (i = 0; i < fbn->num_napi; i++) { - struct fbnic_napi_vector *nv = fbn->napi[i]; - int j, t; + regs = rx ? rx_regs : tx_regs; + reg_cnt = rx ? ARRAY_SIZE(rx_regs) : ARRAY_SIZE(tx_regs); - /* Flush any processed Tx Queue Triads and drop the rest */ - for (t = 0; t < nv->txt_count; t++) { - struct fbnic_q_triad *qt = &nv->qt[t]; - struct netdev_queue *tx_queue; + off = idx / 32; + mask = BIT(idx % 32); - /* Clean the work queues of unprocessed work */ - fbnic_clean_twq0(nv, 0, &qt->sub0, true, qt->sub0.tail); + for (i = 0; i < reg_cnt; i++) { + err = read_poll_timeout_atomic(fbnic_rd32, val, val & mask, + 2, 500000, false, + fbd, regs[i] + off); + if (err) { + netdev_err(fbd->netdev, + "wait for queue %s%d idle failed 0x%04x(%d): %08x (mask: %08x)\n", + rx ? "Rx" : "Tx", idx, regs[i] + off, i, + val, mask); + return err; + } + } - /* Reset completion queue descriptor ring */ - memset(qt->cmpl.desc, 0, qt->cmpl.size); + return 0; +} - /* Nothing else to do if Tx queue is disabled */ - if (qt->sub0.flags & FBNIC_RING_F_DISABLED) - continue; +static void fbnic_nv_flush(struct fbnic_napi_vector *nv) +{ + int j, t; - /* Reset BQL associated with Tx queue */ - tx_queue = netdev_get_tx_queue(nv->napi.dev, - qt->sub0.q_idx); - netdev_tx_reset_queue(tx_queue); - } + /* Flush any processed Tx Queue Triads and drop the rest */ + for (t = 0; t < nv->txt_count; t++) { + struct fbnic_q_triad *qt = &nv->qt[t]; + struct netdev_queue *tx_queue; - /* Flush any processed Rx Queue Triads and drop the rest */ - for (j = 0; j < nv->rxt_count; j++, t++) { - struct fbnic_q_triad *qt = &nv->qt[t]; + /* Clean the work queues of unprocessed work */ + fbnic_clean_twq0(nv, 0, &qt->sub0, true, qt->sub0.tail); + fbnic_clean_twq1(nv, false, &qt->sub1, true, + qt->sub1.tail); - /* Clean the work queues of unprocessed work */ - fbnic_clean_bdq(nv, 0, &qt->sub0, qt->sub0.tail); - fbnic_clean_bdq(nv, 0, &qt->sub1, qt->sub1.tail); + /* Reset completion queue descriptor ring */ + memset(qt->cmpl.desc, 0, qt->cmpl.size); - /* Reset completion queue descriptor ring */ - memset(qt->cmpl.desc, 0, qt->cmpl.size); + /* Nothing else to do if Tx queue is disabled */ + if (qt->sub0.flags & FBNIC_RING_F_DISABLED) + continue; - fbnic_put_pkt_buff(nv, qt->cmpl.pkt, 0); - qt->cmpl.pkt->buff.data_hard_start = NULL; - } + /* Reset BQL associated with Tx queue */ + tx_queue = netdev_get_tx_queue(nv->napi.dev, + qt->sub0.q_idx); + netdev_tx_reset_queue(tx_queue); + } + + /* Flush any processed Rx Queue Triads and drop the rest */ + for (j = 0; j < nv->rxt_count; j++, t++) { + struct fbnic_q_triad *qt = &nv->qt[t]; + + /* Clean the work queues of unprocessed work */ + fbnic_clean_bdq(&qt->sub0, qt->sub0.tail, 0); + fbnic_clean_bdq(&qt->sub1, qt->sub1.tail, 0); + + /* Reset completion queue descriptor ring */ + memset(qt->cmpl.desc, 0, qt->cmpl.size); + + fbnic_put_pkt_buff(qt, qt->cmpl.pkt, 0); + memset(qt->cmpl.pkt, 0, sizeof(struct fbnic_pkt_buff)); } } -void fbnic_fill(struct fbnic_net *fbn) +void fbnic_flush(struct fbnic_net *fbn) { int i; - for (i = 0; i < fbn->num_napi; i++) { - struct fbnic_napi_vector *nv = fbn->napi[i]; - int j, t; + for (i = 0; i < fbn->num_napi; i++) + fbnic_nv_flush(fbn->napi[i]); +} - /* Configure NAPI mapping and populate pages - * in the BDQ rings to use for Rx - */ - for (j = 0, t = nv->txt_count; j < nv->rxt_count; j++, t++) { - struct fbnic_q_triad *qt = &nv->qt[t]; +static void fbnic_nv_fill(struct fbnic_napi_vector *nv) +{ + int j, t; - /* Populate the header and payload BDQs */ - fbnic_fill_bdq(nv, &qt->sub0); - fbnic_fill_bdq(nv, &qt->sub1); - } + /* Configure NAPI mapping and populate pages + * in the BDQ rings to use for Rx + */ + for (j = 0, t = nv->txt_count; j < nv->rxt_count; j++, t++) { + struct fbnic_q_triad *qt = &nv->qt[t]; + + /* Populate the header and payload BDQs */ + fbnic_fill_bdq(&qt->sub0); + fbnic_fill_bdq(&qt->sub1); } } +void fbnic_fill(struct fbnic_net *fbn) +{ + int i; + + for (i = 0; i < fbn->num_napi; i++) + fbnic_nv_fill(fbn->napi[i]); +} + static void fbnic_enable_twq0(struct fbnic_ring *twq) { u32 log_size = fls(twq->size_mask); @@ -2104,6 +2449,28 @@ static void fbnic_enable_twq0(struct fbnic_ring *twq) fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ0_CTL, FBNIC_QUEUE_TWQ_CTL_ENABLE); } +static void fbnic_enable_twq1(struct fbnic_ring *twq) +{ + u32 log_size = fls(twq->size_mask); + + if (!twq->size_mask) + return; + + /* Reset head/tail */ + fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ1_CTL, FBNIC_QUEUE_TWQ_CTL_RESET); + twq->tail = 0; + twq->head = 0; + + /* Store descriptor ring address and size */ + fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ1_BAL, lower_32_bits(twq->dma)); + fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ1_BAH, upper_32_bits(twq->dma)); + + /* Write lower 4 bits of log size as 64K ring size is 0 */ + fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ1_SIZE, log_size & 0xf); + + fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ1_CTL, FBNIC_QUEUE_TWQ_CTL_ENABLE); +} + static void fbnic_enable_tcq(struct fbnic_napi_vector *nv, struct fbnic_ring *tcq) { @@ -2232,13 +2599,22 @@ static void fbnic_enable_rcq(struct fbnic_napi_vector *nv, { struct fbnic_net *fbn = netdev_priv(nv->napi.dev); u32 log_size = fls(rcq->size_mask); - u32 rcq_ctl; + u32 hds_thresh = fbn->hds_thresh; + u32 rcq_ctl = 0; fbnic_config_drop_mode_rcq(nv, rcq); - rcq_ctl = FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_PADLEN_MASK, FBNIC_RX_PAD) | - FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_MAX_HDR_MASK, - FBNIC_RX_MAX_HDR) | + /* Force lower bound on MAX_HEADER_BYTES. Below this, all frames should + * be split at L4. It would also result in the frames being split at + * L2/L3 depending on the frame size. + */ + if (fbn->hds_thresh < FBNIC_HDR_BYTES_MIN) { + rcq_ctl = FBNIC_QUEUE_RDE_CTL0_EN_HDR_SPLIT; + hds_thresh = FBNIC_HDR_BYTES_MIN; + } + + rcq_ctl |= FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_PADLEN_MASK, FBNIC_RX_PAD) | + FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_MAX_HDR_MASK, hds_thresh) | FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_PAYLD_OFF_MASK, FBNIC_RX_PAYLD_OFFSET) | FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_PAYLD_PG_CL_MASK, @@ -2266,32 +2642,47 @@ static void fbnic_enable_rcq(struct fbnic_napi_vector *nv, fbnic_ring_wr32(rcq, FBNIC_QUEUE_RCQ_CTL, FBNIC_QUEUE_RCQ_CTL_ENABLE); } -void fbnic_enable(struct fbnic_net *fbn) +static void __fbnic_nv_enable(struct fbnic_napi_vector *nv) { - struct fbnic_dev *fbd = fbn->fbd; - int i; + int j, t; - for (i = 0; i < fbn->num_napi; i++) { - struct fbnic_napi_vector *nv = fbn->napi[i]; - int j, t; + /* Setup Tx Queue Triads */ + for (t = 0; t < nv->txt_count; t++) { + struct fbnic_q_triad *qt = &nv->qt[t]; - /* Setup Tx Queue Triads */ - for (t = 0; t < nv->txt_count; t++) { - struct fbnic_q_triad *qt = &nv->qt[t]; + fbnic_enable_twq0(&qt->sub0); + fbnic_enable_twq1(&qt->sub1); + fbnic_enable_tcq(nv, &qt->cmpl); + } - fbnic_enable_twq0(&qt->sub0); - fbnic_enable_tcq(nv, &qt->cmpl); - } + /* Setup Rx Queue Triads */ + for (j = 0; j < nv->rxt_count; j++, t++) { + struct fbnic_q_triad *qt = &nv->qt[t]; - /* Setup Rx Queue Triads */ - for (j = 0; j < nv->rxt_count; j++, t++) { - struct fbnic_q_triad *qt = &nv->qt[t]; + page_pool_enable_direct_recycling(qt->sub0.page_pool, + &nv->napi); + page_pool_enable_direct_recycling(qt->sub1.page_pool, + &nv->napi); - fbnic_enable_bdq(&qt->sub0, &qt->sub1); - fbnic_config_drop_mode_rcq(nv, &qt->cmpl); - fbnic_enable_rcq(nv, &qt->cmpl); - } + fbnic_enable_bdq(&qt->sub0, &qt->sub1); + fbnic_config_drop_mode_rcq(nv, &qt->cmpl); + fbnic_enable_rcq(nv, &qt->cmpl); } +} + +static void fbnic_nv_enable(struct fbnic_net *fbn, struct fbnic_napi_vector *nv) +{ + __fbnic_nv_enable(nv); + fbnic_wrfl(fbn->fbd); +} + +void fbnic_enable(struct fbnic_net *fbn) +{ + struct fbnic_dev *fbd = fbn->fbd; + int i; + + for (i = 0; i < fbn->num_napi; i++) + __fbnic_nv_enable(fbn->napi[i]); fbnic_wrfl(fbd); } @@ -2310,7 +2701,7 @@ void fbnic_napi_enable(struct fbnic_net *fbn) for (i = 0; i < fbn->num_napi; i++) { struct fbnic_napi_vector *nv = fbn->napi[i]; - napi_enable(&nv->napi); + napi_enable_locked(&nv->napi); fbnic_nv_irq_enable(nv); @@ -2363,3 +2754,123 @@ void fbnic_napi_depletion_check(struct net_device *netdev) fbnic_wrfl(fbd); } + +static int fbnic_queue_mem_alloc(struct net_device *dev, void *qmem, int idx) +{ + struct fbnic_net *fbn = netdev_priv(dev); + const struct fbnic_q_triad *real; + struct fbnic_q_triad *qt = qmem; + struct fbnic_napi_vector *nv; + + if (!netif_running(dev)) + return fbnic_alloc_qt_page_pools(fbn, qt, idx); + + real = container_of(fbn->rx[idx], struct fbnic_q_triad, cmpl); + nv = fbn->napi[idx % fbn->num_napi]; + + fbnic_ring_init(&qt->sub0, real->sub0.doorbell, real->sub0.q_idx, + real->sub0.flags); + fbnic_ring_init(&qt->sub1, real->sub1.doorbell, real->sub1.q_idx, + real->sub1.flags); + fbnic_ring_init(&qt->cmpl, real->cmpl.doorbell, real->cmpl.q_idx, + real->cmpl.flags); + + return fbnic_alloc_rx_qt_resources(fbn, nv, qt); +} + +static void fbnic_queue_mem_free(struct net_device *dev, void *qmem) +{ + struct fbnic_net *fbn = netdev_priv(dev); + struct fbnic_q_triad *qt = qmem; + + if (!netif_running(dev)) + fbnic_free_qt_page_pools(qt); + else + fbnic_free_qt_resources(fbn, qt); +} + +static void __fbnic_nv_restart(struct fbnic_net *fbn, + struct fbnic_napi_vector *nv) +{ + struct fbnic_dev *fbd = fbn->fbd; + int i; + + fbnic_nv_enable(fbn, nv); + fbnic_nv_fill(nv); + + napi_enable_locked(&nv->napi); + fbnic_nv_irq_enable(nv); + fbnic_wr32(fbd, FBNIC_INTR_SET(nv->v_idx / 32), BIT(nv->v_idx % 32)); + fbnic_wrfl(fbd); + + for (i = 0; i < nv->txt_count; i++) + netif_wake_subqueue(fbn->netdev, nv->qt[i].sub0.q_idx); +} + +static int fbnic_queue_start(struct net_device *dev, void *qmem, int idx) +{ + struct fbnic_net *fbn = netdev_priv(dev); + struct fbnic_napi_vector *nv; + struct fbnic_q_triad *real; + + real = container_of(fbn->rx[idx], struct fbnic_q_triad, cmpl); + nv = fbn->napi[idx % fbn->num_napi]; + + fbnic_aggregate_ring_rx_counters(fbn, &real->sub0); + fbnic_aggregate_ring_rx_counters(fbn, &real->sub1); + fbnic_aggregate_ring_rx_counters(fbn, &real->cmpl); + + memcpy(real, qmem, sizeof(*real)); + + __fbnic_nv_restart(fbn, nv); + + return 0; +} + +static int fbnic_queue_stop(struct net_device *dev, void *qmem, int idx) +{ + struct fbnic_net *fbn = netdev_priv(dev); + const struct fbnic_q_triad *real; + struct fbnic_napi_vector *nv; + int i, t; + int err; + + real = container_of(fbn->rx[idx], struct fbnic_q_triad, cmpl); + nv = fbn->napi[idx % fbn->num_napi]; + + napi_disable_locked(&nv->napi); + fbnic_nv_irq_disable(nv); + + for (i = 0; i < nv->txt_count; i++) + netif_stop_subqueue(dev, nv->qt[i].sub0.q_idx); + fbnic_nv_disable(fbn, nv); + + for (t = 0; t < nv->txt_count + nv->rxt_count; t++) { + err = fbnic_wait_queue_idle(fbn, t >= nv->txt_count, + nv->qt[t].sub0.q_idx); + if (err) + goto err_restart; + } + + fbnic_synchronize_irq(fbn->fbd, nv->v_idx); + fbnic_nv_flush(nv); + + page_pool_disable_direct_recycling(real->sub0.page_pool); + page_pool_disable_direct_recycling(real->sub1.page_pool); + + memcpy(qmem, real, sizeof(*real)); + + return 0; + +err_restart: + __fbnic_nv_restart(fbn, nv); + return err; +} + +const struct netdev_queue_mgmt_ops fbnic_queue_mgmt_ops = { + .ndo_queue_mem_size = sizeof(struct fbnic_q_triad), + .ndo_queue_mem_alloc = fbnic_queue_mem_alloc, + .ndo_queue_mem_free = fbnic_queue_mem_free, + .ndo_queue_start = fbnic_queue_start, + .ndo_queue_stop = fbnic_queue_stop, +}; diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h index 34693596e5eb..31fac0ba0902 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h @@ -35,6 +35,7 @@ struct fbnic_net; #define FBNIC_MAX_TXQS 128u #define FBNIC_MAX_RXQS 128u +#define FBNIC_MAX_XDPQS 128u /* These apply to TWQs, TCQ, RCQ */ #define FBNIC_QUEUE_SIZE_MIN 16u @@ -50,10 +51,10 @@ struct fbnic_net; #define FBNIC_RX_TROOM \ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +#define FBNIC_RX_HROOM_PAD 128 #define FBNIC_RX_HROOM \ - (ALIGN(FBNIC_RX_TROOM + NET_SKB_PAD, 128) - FBNIC_RX_TROOM) + (ALIGN(FBNIC_RX_TROOM + FBNIC_RX_HROOM_PAD, 128) - FBNIC_RX_TROOM) #define FBNIC_RX_PAD 0 -#define FBNIC_RX_MAX_HDR (1536 - FBNIC_RX_PAD) #define FBNIC_RX_PAYLD_OFFSET 0 #define FBNIC_RX_PAYLD_PG_CL 0 @@ -61,12 +62,16 @@ struct fbnic_net; #define FBNIC_RING_F_CTX BIT(1) #define FBNIC_RING_F_STATS BIT(2) /* Ring's stats may be used */ +#define FBNIC_HDS_THRESH_MAX \ + (4096 - FBNIC_RX_HROOM - FBNIC_RX_TROOM - FBNIC_RX_PAD) +#define FBNIC_HDS_THRESH_DEFAULT \ + (1536 - FBNIC_RX_PAD) +#define FBNIC_HDR_BYTES_MIN 128 + struct fbnic_pkt_buff { struct xdp_buff buff; ktime_t hwtstamp; - u32 data_truesize; - u16 data_len; - u16 nr_frags; + bool add_frag_failed; }; struct fbnic_queue_stats { @@ -85,6 +90,7 @@ struct fbnic_queue_stats { u64 alloc_failed; u64 csum_complete; u64 csum_none; + u64 length_errors; } rx; }; u64 dropped; @@ -94,7 +100,7 @@ struct fbnic_queue_stats { #define FBNIC_PAGECNT_BIAS_MAX PAGE_SIZE struct fbnic_rx_buf { - struct page *page; + netmem_ref netmem; long pagecnt_bias; }; @@ -115,6 +121,17 @@ struct fbnic_ring { u32 head, tail; /* Head/Tail of ring */ + union { + /* Rx BDQs only */ + struct page_pool *page_pool; + + /* Deferred_head is used to cache the head for TWQ1 if + * an attempt is made to clean TWQ1 with zero napi_budget. + * We do not use it for any other ring. + */ + s32 deferred_head; + }; + struct fbnic_queue_stats stats; /* Slow path fields follow */ @@ -124,12 +141,12 @@ struct fbnic_ring { struct fbnic_q_triad { struct fbnic_ring sub0, sub1, cmpl; + struct xdp_rxq_info xdp_rxq; }; struct fbnic_napi_vector { struct napi_struct napi; struct device *dev; /* Device for DMA unmapping */ - struct page_pool *page_pool; struct fbnic_dev *fbd; u16 v_idx; @@ -139,6 +156,8 @@ struct fbnic_napi_vector { struct fbnic_q_triad qt[]; }; +extern const struct netdev_queue_mgmt_ops fbnic_queue_mgmt_ops; + netdev_tx_t fbnic_xmit_frame(struct sk_buff *skb, struct net_device *dev); netdev_features_t fbnic_features_check(struct sk_buff *skb, struct net_device *dev, diff --git a/drivers/net/ethernet/microchip/lan865x/lan865x.c b/drivers/net/ethernet/microchip/lan865x/lan865x.c index 79b800d2b72c..b428ad6516c5 100644 --- a/drivers/net/ethernet/microchip/lan865x/lan865x.c +++ b/drivers/net/ethernet/microchip/lan865x/lan865x.c @@ -326,6 +326,7 @@ static const struct net_device_ops lan865x_netdev_ops = { .ndo_start_xmit = lan865x_send_packet, .ndo_set_rx_mode = lan865x_set_multicast_list, .ndo_set_mac_address = lan865x_set_mac_address, + .ndo_eth_ioctl = phy_do_ioctl_running, }; static int lan865x_probe(struct spi_device *spi) diff --git a/drivers/net/ethernet/microsoft/mana/mana_bpf.c b/drivers/net/ethernet/microsoft/mana/mana_bpf.c index d30721d4516f..7697c9b52ed3 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_bpf.c +++ b/drivers/net/ethernet/microsoft/mana/mana_bpf.c @@ -174,6 +174,7 @@ static int mana_xdp_set(struct net_device *ndev, struct bpf_prog *prog, struct mana_port_context *apc = netdev_priv(ndev); struct bpf_prog *old_prog; struct gdma_context *gc; + int err; gc = apc->ac->gdma_dev->gdma_context; @@ -195,11 +196,45 @@ static int mana_xdp_set(struct net_device *ndev, struct bpf_prog *prog, */ apc->bpf_prog = prog; - if (old_prog) - bpf_prog_put(old_prog); + if (apc->port_is_up) { + /* Re-create rxq's after xdp prog was loaded or unloaded. + * Ex: re create rxq's to switch from full pages to smaller + * size page fragments when xdp prog is unloaded and + * vice-versa. + */ + + /* Pre-allocate buffers to prevent failure in mana_attach */ + err = mana_pre_alloc_rxbufs(apc, ndev->mtu, apc->num_queues); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "XDP: Insufficient memory for tx/rx re-config"); + return err; + } + + err = mana_detach(ndev, false); + if (err) { + netdev_err(ndev, + "mana_detach failed at xdp set: %d\n", err); + NL_SET_ERR_MSG_MOD(extack, + "XDP: Re-config failed at detach"); + goto err_dealloc_rxbuffs; + } + + err = mana_attach(ndev); + if (err) { + netdev_err(ndev, + "mana_attach failed at xdp set: %d\n", err); + NL_SET_ERR_MSG_MOD(extack, + "XDP: Re-config failed at attach"); + goto err_dealloc_rxbuffs; + } - if (apc->port_is_up) mana_chn_setxdp(apc, prog); + mana_pre_dealloc_rxbufs(apc); + } + + if (old_prog) + bpf_prog_put(old_prog); if (prog) ndev->max_mtu = MANA_XDP_MTU_MAX; @@ -207,6 +242,11 @@ static int mana_xdp_set(struct net_device *ndev, struct bpf_prog *prog, ndev->max_mtu = gc->adapter_mtu - ETH_HLEN; return 0; + +err_dealloc_rxbuffs: + apc->bpf_prog = old_prog; + mana_pre_dealloc_rxbufs(apc); + return err; } int mana_bpf(struct net_device *ndev, struct netdev_bpf *bpf) diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 550843e2164b..f4fc86f20213 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -57,6 +57,15 @@ static bool mana_en_need_log(struct mana_port_context *apc, int err) return true; } +static void mana_put_rx_page(struct mana_rxq *rxq, struct page *page, + bool from_pool) +{ + if (from_pool) + page_pool_put_full_page(rxq->page_pool, page, false); + else + put_page(page); +} + /* Microsoft Azure Network Adapter (MANA) functions */ static int mana_open(struct net_device *ndev) @@ -630,21 +639,40 @@ static void *mana_get_rxbuf_pre(struct mana_rxq *rxq, dma_addr_t *da) } /* Get RX buffer's data size, alloc size, XDP headroom based on MTU */ -static void mana_get_rxbuf_cfg(int mtu, u32 *datasize, u32 *alloc_size, - u32 *headroom) +static void mana_get_rxbuf_cfg(struct mana_port_context *apc, + int mtu, u32 *datasize, u32 *alloc_size, + u32 *headroom, u32 *frag_count) { - if (mtu > MANA_XDP_MTU_MAX) - *headroom = 0; /* no support for XDP */ - else - *headroom = XDP_PACKET_HEADROOM; + u32 len, buf_size; - *alloc_size = SKB_DATA_ALIGN(mtu + MANA_RXBUF_PAD + *headroom); + /* Calculate datasize first (consistent across all cases) */ + *datasize = mtu + ETH_HLEN; - /* Using page pool in this case, so alloc_size is PAGE_SIZE */ - if (*alloc_size < PAGE_SIZE) - *alloc_size = PAGE_SIZE; + /* For xdp and jumbo frames make sure only one packet fits per page */ + if (mtu + MANA_RXBUF_PAD > PAGE_SIZE / 2 || mana_xdp_get(apc)) { + if (mana_xdp_get(apc)) { + *headroom = XDP_PACKET_HEADROOM; + *alloc_size = PAGE_SIZE; + } else { + *headroom = 0; /* no support for XDP */ + *alloc_size = SKB_DATA_ALIGN(mtu + MANA_RXBUF_PAD + + *headroom); + } - *datasize = mtu + ETH_HLEN; + *frag_count = 1; + return; + } + + /* Standard MTU case - optimize for multiple packets per page */ + *headroom = 0; + + /* Calculate base buffer size needed */ + len = SKB_DATA_ALIGN(mtu + MANA_RXBUF_PAD + *headroom); + buf_size = ALIGN(len, MANA_RX_FRAG_ALIGNMENT); + + /* Calculate how many packets can fit in a page */ + *frag_count = PAGE_SIZE / buf_size; + *alloc_size = buf_size; } int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu, int num_queues) @@ -656,8 +684,9 @@ int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu, int num_qu void *va; int i; - mana_get_rxbuf_cfg(new_mtu, &mpc->rxbpre_datasize, - &mpc->rxbpre_alloc_size, &mpc->rxbpre_headroom); + mana_get_rxbuf_cfg(mpc, new_mtu, &mpc->rxbpre_datasize, + &mpc->rxbpre_alloc_size, &mpc->rxbpre_headroom, + &mpc->rxbpre_frag_count); dev = mpc->ac->gdma_dev->gdma_context->dev; @@ -1842,8 +1871,11 @@ drop_xdp: drop: if (from_pool) { - page_pool_recycle_direct(rxq->page_pool, - virt_to_head_page(buf_va)); + if (rxq->frag_count == 1) + page_pool_recycle_direct(rxq->page_pool, + virt_to_head_page(buf_va)); + else + page_pool_free_va(rxq->page_pool, buf_va, true); } else { WARN_ON_ONCE(rxq->xdp_save_va); /* Save for reuse */ @@ -1859,33 +1891,46 @@ static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev, dma_addr_t *da, bool *from_pool) { struct page *page; + u32 offset; void *va; - *from_pool = false; - /* Reuse XDP dropped page if available */ - if (rxq->xdp_save_va) { - va = rxq->xdp_save_va; - rxq->xdp_save_va = NULL; - } else { - page = page_pool_dev_alloc_pages(rxq->page_pool); - if (!page) + /* Don't use fragments for jumbo frames or XDP where it's 1 fragment + * per page. + */ + if (rxq->frag_count == 1) { + /* Reuse XDP dropped page if available */ + if (rxq->xdp_save_va) { + va = rxq->xdp_save_va; + page = virt_to_head_page(va); + rxq->xdp_save_va = NULL; + } else { + page = page_pool_dev_alloc_pages(rxq->page_pool); + if (!page) + return NULL; + + *from_pool = true; + va = page_to_virt(page); + } + + *da = dma_map_single(dev, va + rxq->headroom, rxq->datasize, + DMA_FROM_DEVICE); + if (dma_mapping_error(dev, *da)) { + mana_put_rx_page(rxq, page, *from_pool); return NULL; + } - *from_pool = true; - va = page_to_virt(page); + return va; } - *da = dma_map_single(dev, va + rxq->headroom, rxq->datasize, - DMA_FROM_DEVICE); - if (dma_mapping_error(dev, *da)) { - if (*from_pool) - page_pool_put_full_page(rxq->page_pool, page, false); - else - put_page(virt_to_head_page(va)); - + page = page_pool_dev_alloc_frag(rxq->page_pool, &offset, + rxq->alloc_size); + if (!page) return NULL; - } + + va = page_to_virt(page) + offset; + *da = page_pool_get_dma_addr(page) + offset + rxq->headroom; + *from_pool = true; return va; } @@ -1902,9 +1947,9 @@ static void mana_refill_rx_oob(struct device *dev, struct mana_rxq *rxq, va = mana_get_rxfrag(rxq, dev, &da, &from_pool); if (!va) return; - - dma_unmap_single(dev, rxoob->sgl[0].address, rxq->datasize, - DMA_FROM_DEVICE); + if (!rxoob->from_pool || rxq->frag_count == 1) + dma_unmap_single(dev, rxoob->sgl[0].address, rxq->datasize, + DMA_FROM_DEVICE); *old_buf = rxoob->buf_va; *old_fp = rxoob->from_pool; @@ -2315,15 +2360,15 @@ static void mana_destroy_rxq(struct mana_port_context *apc, if (!rx_oob->buf_va) continue; - dma_unmap_single(dev, rx_oob->sgl[0].address, - rx_oob->sgl[0].size, DMA_FROM_DEVICE); - page = virt_to_head_page(rx_oob->buf_va); - if (rx_oob->from_pool) - page_pool_put_full_page(rxq->page_pool, page, false); - else - put_page(page); + if (rxq->frag_count == 1 || !rx_oob->from_pool) { + dma_unmap_single(dev, rx_oob->sgl[0].address, + rx_oob->sgl[0].size, DMA_FROM_DEVICE); + mana_put_rx_page(rxq, page, rx_oob->from_pool); + } else { + page_pool_free_va(rxq->page_pool, rx_oob->buf_va, true); + } rx_oob->buf_va = NULL; } @@ -2429,11 +2474,22 @@ static int mana_create_page_pool(struct mana_rxq *rxq, struct gdma_context *gc) struct page_pool_params pprm = {}; int ret; - pprm.pool_size = mpc->rx_queue_size; + pprm.pool_size = mpc->rx_queue_size / rxq->frag_count + 1; pprm.nid = gc->numa_node; pprm.napi = &rxq->rx_cq.napi; pprm.netdev = rxq->ndev; pprm.order = get_order(rxq->alloc_size); + pprm.queue_idx = rxq->rxq_idx; + pprm.dev = gc->dev; + + /* Let the page pool do the dma map when page sharing with multiple + * fragments enabled for rx buffers. + */ + if (rxq->frag_count > 1) { + pprm.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV; + pprm.max_len = PAGE_SIZE; + pprm.dma_dir = DMA_FROM_DEVICE; + } rxq->page_pool = page_pool_create(&pprm); @@ -2472,9 +2528,8 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc, rxq->rxq_idx = rxq_idx; rxq->rxobj = INVALID_MANA_HANDLE; - mana_get_rxbuf_cfg(ndev->mtu, &rxq->datasize, &rxq->alloc_size, - &rxq->headroom); - + mana_get_rxbuf_cfg(apc, ndev->mtu, &rxq->datasize, &rxq->alloc_size, + &rxq->headroom, &rxq->frag_count); /* Create page pool for RX queue */ err = mana_create_page_pool(rxq, gc); if (err) { diff --git a/drivers/net/ethernet/netronome/nfp/crypto/tls.c b/drivers/net/ethernet/netronome/nfp/crypto/tls.c index f80f1a6953fa..f252ecdcd2cd 100644 --- a/drivers/net/ethernet/netronome/nfp/crypto/tls.c +++ b/drivers/net/ethernet/netronome/nfp/crypto/tls.c @@ -495,14 +495,13 @@ int nfp_net_tls_rx_resync_req(struct net_device *netdev, switch (ipv6h->version) { case 4: - sk = inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, - iph->saddr, th->source, iph->daddr, - th->dest, netdev->ifindex); + sk = inet_lookup_established(net, iph->saddr, th->source, + iph->daddr, th->dest, + netdev->ifindex); break; #if IS_ENABLED(CONFIG_IPV6) case 6: - sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, - &ipv6h->saddr, th->source, + sk = __inet6_lookup_established(net, &ipv6h->saddr, th->source, &ipv6h->daddr, ntohs(th->dest), netdev->ifindex, 0); break; diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c index 80e4675582bf..dde60c4572fa 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/metadata.c +++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c @@ -564,8 +564,8 @@ int nfp_flower_metadata_init(struct nfp_app *app, u64 host_ctx_count, /* Init ring buffer and unallocated stats_ids. */ priv->stats_ids.free_list.buf = - vmalloc(array_size(NFP_FL_STATS_ELEM_RS, - priv->stats_ring_size)); + vmalloc_array(priv->stats_ring_size, + NFP_FL_STATS_ELEM_RS); if (!priv->stats_ids.free_list.buf) goto err_free_last_used; diff --git a/drivers/net/ethernet/netronome/nfp/nfd3/dp.c b/drivers/net/ethernet/netronome/nfp/nfd3/dp.c index 08086eb76996..91a227929a5f 100644 --- a/drivers/net/ethernet/netronome/nfp/nfd3/dp.c +++ b/drivers/net/ethernet/netronome/nfp/nfd3/dp.c @@ -1169,14 +1169,10 @@ int nfp_nfd3_poll(struct napi_struct *napi, int budget) if (r_vec->nfp_net->rx_coalesce_adapt_on && r_vec->rx_ring) { struct dim_sample dim_sample = {}; - unsigned int start; u64 pkts, bytes; - do { - start = u64_stats_fetch_begin(&r_vec->rx_sync); - pkts = r_vec->rx_pkts; - bytes = r_vec->rx_bytes; - } while (u64_stats_fetch_retry(&r_vec->rx_sync, start)); + pkts = r_vec->rx_pkts; + bytes = r_vec->rx_bytes; dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample); net_dim(&r_vec->rx_dim, &dim_sample); @@ -1184,14 +1180,10 @@ int nfp_nfd3_poll(struct napi_struct *napi, int budget) if (r_vec->nfp_net->tx_coalesce_adapt_on && r_vec->tx_ring) { struct dim_sample dim_sample = {}; - unsigned int start; u64 pkts, bytes; - do { - start = u64_stats_fetch_begin(&r_vec->tx_sync); - pkts = r_vec->tx_pkts; - bytes = r_vec->tx_bytes; - } while (u64_stats_fetch_retry(&r_vec->tx_sync, start)); + pkts = r_vec->tx_pkts; + bytes = r_vec->tx_bytes; dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample); net_dim(&r_vec->tx_dim, &dim_sample); diff --git a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c index ab3cd06ed63e..ee0db3d5fd66 100644 --- a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c +++ b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c @@ -1279,14 +1279,10 @@ int nfp_nfdk_poll(struct napi_struct *napi, int budget) if (r_vec->nfp_net->rx_coalesce_adapt_on && r_vec->rx_ring) { struct dim_sample dim_sample = {}; - unsigned int start; u64 pkts, bytes; - do { - start = u64_stats_fetch_begin(&r_vec->rx_sync); - pkts = r_vec->rx_pkts; - bytes = r_vec->rx_bytes; - } while (u64_stats_fetch_retry(&r_vec->rx_sync, start)); + pkts = r_vec->rx_pkts; + bytes = r_vec->rx_bytes; dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample); net_dim(&r_vec->rx_dim, &dim_sample); @@ -1294,14 +1290,10 @@ int nfp_nfdk_poll(struct napi_struct *napi, int budget) if (r_vec->nfp_net->tx_coalesce_adapt_on && r_vec->tx_ring) { struct dim_sample dim_sample = {}; - unsigned int start; u64 pkts, bytes; - do { - start = u64_stats_fetch_begin(&r_vec->tx_sync); - pkts = r_vec->tx_pkts; - bytes = r_vec->tx_bytes; - } while (u64_stats_fetch_retry(&r_vec->tx_sync, start)); + pkts = r_vec->tx_pkts; + bytes = r_vec->tx_bytes; dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample); net_dim(&r_vec->tx_dim, &dim_sample); diff --git a/drivers/net/ethernet/qlogic/qed/qed_devlink.c b/drivers/net/ethernet/qlogic/qed/qed_devlink.c index 1adc7fbb3f2f..94c5689b5abd 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_devlink.c +++ b/drivers/net/ethernet/qlogic/qed/qed_devlink.c @@ -87,20 +87,21 @@ qed_fw_fatal_reporter_recover(struct devlink_health_reporter *reporter, return 0; } +#define QED_REPORTER_FW_GRACEFUL_PERIOD 0 + static const struct devlink_health_reporter_ops qed_fw_fatal_reporter_ops = { .name = "fw_fatal", .recover = qed_fw_fatal_reporter_recover, .dump = qed_fw_fatal_reporter_dump, + .default_graceful_period = QED_REPORTER_FW_GRACEFUL_PERIOD, }; -#define QED_REPORTER_FW_GRACEFUL_PERIOD 0 - void qed_fw_reporters_create(struct devlink *devlink) { struct qed_devlink *dl = devlink_priv(devlink); - dl->fw_reporter = devlink_health_reporter_create(devlink, &qed_fw_fatal_reporter_ops, - QED_REPORTER_FW_GRACEFUL_PERIOD, dl); + dl->fw_reporter = devlink_health_reporter_create(devlink, + &qed_fw_fatal_reporter_ops, dl); if (IS_ERR(dl->fw_reporter)) { DP_NOTICE(dl->cdev, "Failed to create fw reporter, err = %ld\n", PTR_ERR(dl->fw_reporter)); diff --git a/drivers/net/ethernet/qualcomm/Kconfig b/drivers/net/ethernet/qualcomm/Kconfig index a4434eb38950..ba7efb108637 100644 --- a/drivers/net/ethernet/qualcomm/Kconfig +++ b/drivers/net/ethernet/qualcomm/Kconfig @@ -60,6 +60,21 @@ config QCOM_EMAC low power, Receive-Side Scaling (RSS), and IEEE 1588-2008 Precision Clock Synchronization Protocol. +config QCOM_PPE + tristate "Qualcomm Technologies, Inc. PPE Ethernet support" + depends on COMMON_CLK && HAS_IOMEM && OF + depends on ARCH_QCOM || COMPILE_TEST + select REGMAP_MMIO + help + This driver supports the Qualcomm Technologies, Inc. packet + process engine (PPE) available with IPQ SoC. The PPE includes + the Ethernet MACs, Ethernet DMA (EDMA) and switch core that + supports L3 flow offload, L2 switch function, RSS and tunnel + offload. + + To compile this driver as a module, choose M here. The module + will be called qcom-ppe. + source "drivers/net/ethernet/qualcomm/rmnet/Kconfig" endif # NET_VENDOR_QUALCOMM diff --git a/drivers/net/ethernet/qualcomm/Makefile b/drivers/net/ethernet/qualcomm/Makefile index 9250976dd884..166a59aea363 100644 --- a/drivers/net/ethernet/qualcomm/Makefile +++ b/drivers/net/ethernet/qualcomm/Makefile @@ -11,4 +11,5 @@ qcauart-objs := qca_uart.o obj-y += emac/ +obj-$(CONFIG_QCOM_PPE) += ppe/ obj-$(CONFIG_RMNET) += rmnet/ diff --git a/drivers/net/ethernet/qualcomm/ppe/Makefile b/drivers/net/ethernet/qualcomm/ppe/Makefile new file mode 100644 index 000000000000..9e60b2400c16 --- /dev/null +++ b/drivers/net/ethernet/qualcomm/ppe/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Makefile for the device driver of PPE (Packet Process Engine) in IPQ SoC +# + +obj-$(CONFIG_QCOM_PPE) += qcom-ppe.o +qcom-ppe-objs := ppe.o ppe_config.o ppe_debugfs.o diff --git a/drivers/net/ethernet/qualcomm/ppe/ppe.c b/drivers/net/ethernet/qualcomm/ppe/ppe.c new file mode 100644 index 000000000000..be747510d947 --- /dev/null +++ b/drivers/net/ethernet/qualcomm/ppe/ppe.c @@ -0,0 +1,239 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. + */ + +/* PPE platform device probe, DTSI parser and PPE clock initializations. */ + +#include <linux/clk.h> +#include <linux/interconnect.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/regmap.h> +#include <linux/reset.h> + +#include "ppe.h" +#include "ppe_config.h" +#include "ppe_debugfs.h" + +#define PPE_PORT_MAX 8 +#define PPE_CLK_RATE 353000000 + +/* ICC clocks for enabling PPE device. The avg_bw and peak_bw with value 0 + * will be updated by the clock rate of PPE. + */ +static const struct icc_bulk_data ppe_icc_data[] = { + { + .name = "ppe", + .avg_bw = 0, + .peak_bw = 0, + }, + { + .name = "ppe_cfg", + .avg_bw = 0, + .peak_bw = 0, + }, + { + .name = "qos_gen", + .avg_bw = 6000, + .peak_bw = 6000, + }, + { + .name = "timeout_ref", + .avg_bw = 6000, + .peak_bw = 6000, + }, + { + .name = "nssnoc_memnoc", + .avg_bw = 533333, + .peak_bw = 533333, + }, + { + .name = "memnoc_nssnoc", + .avg_bw = 533333, + .peak_bw = 533333, + }, + { + .name = "memnoc_nssnoc_1", + .avg_bw = 533333, + .peak_bw = 533333, + }, +}; + +static const struct regmap_range ppe_readable_ranges[] = { + regmap_reg_range(0x0, 0x1ff), /* Global */ + regmap_reg_range(0x400, 0x5ff), /* LPI CSR */ + regmap_reg_range(0x1000, 0x11ff), /* GMAC0 */ + regmap_reg_range(0x1200, 0x13ff), /* GMAC1 */ + regmap_reg_range(0x1400, 0x15ff), /* GMAC2 */ + regmap_reg_range(0x1600, 0x17ff), /* GMAC3 */ + regmap_reg_range(0x1800, 0x19ff), /* GMAC4 */ + regmap_reg_range(0x1a00, 0x1bff), /* GMAC5 */ + regmap_reg_range(0xb000, 0xefff), /* PRX CSR */ + regmap_reg_range(0xf000, 0x1efff), /* IPE */ + regmap_reg_range(0x20000, 0x5ffff), /* PTX CSR */ + regmap_reg_range(0x60000, 0x9ffff), /* IPE L2 CSR */ + regmap_reg_range(0xb0000, 0xeffff), /* IPO CSR */ + regmap_reg_range(0x100000, 0x17ffff), /* IPE PC */ + regmap_reg_range(0x180000, 0x1bffff), /* PRE IPO CSR */ + regmap_reg_range(0x1d0000, 0x1dffff), /* Tunnel parser */ + regmap_reg_range(0x1e0000, 0x1effff), /* Ingress parse */ + regmap_reg_range(0x200000, 0x2fffff), /* IPE L3 */ + regmap_reg_range(0x300000, 0x3fffff), /* IPE tunnel */ + regmap_reg_range(0x400000, 0x4fffff), /* Scheduler */ + regmap_reg_range(0x500000, 0x503fff), /* XGMAC0 */ + regmap_reg_range(0x504000, 0x507fff), /* XGMAC1 */ + regmap_reg_range(0x508000, 0x50bfff), /* XGMAC2 */ + regmap_reg_range(0x50c000, 0x50ffff), /* XGMAC3 */ + regmap_reg_range(0x510000, 0x513fff), /* XGMAC4 */ + regmap_reg_range(0x514000, 0x517fff), /* XGMAC5 */ + regmap_reg_range(0x600000, 0x6fffff), /* BM */ + regmap_reg_range(0x800000, 0x9fffff), /* QM */ + regmap_reg_range(0xb00000, 0xbef800), /* EDMA */ +}; + +static const struct regmap_access_table ppe_reg_table = { + .yes_ranges = ppe_readable_ranges, + .n_yes_ranges = ARRAY_SIZE(ppe_readable_ranges), +}; + +static const struct regmap_config regmap_config_ipq9574 = { + .reg_bits = 32, + .reg_stride = 4, + .val_bits = 32, + .rd_table = &ppe_reg_table, + .wr_table = &ppe_reg_table, + .max_register = 0xbef800, + .fast_io = true, +}; + +static int ppe_clock_init_and_reset(struct ppe_device *ppe_dev) +{ + unsigned long ppe_rate = ppe_dev->clk_rate; + struct device *dev = ppe_dev->dev; + struct reset_control *rstc; + struct clk_bulk_data *clks; + struct clk *clk; + int ret, i; + + for (i = 0; i < ppe_dev->num_icc_paths; i++) { + ppe_dev->icc_paths[i].name = ppe_icc_data[i].name; + ppe_dev->icc_paths[i].avg_bw = ppe_icc_data[i].avg_bw ? : + Bps_to_icc(ppe_rate); + + /* PPE does not have an explicit peak bandwidth requirement, + * so set the peak bandwidth to be equal to the average + * bandwidth. + */ + ppe_dev->icc_paths[i].peak_bw = ppe_icc_data[i].peak_bw ? : + Bps_to_icc(ppe_rate); + } + + ret = devm_of_icc_bulk_get(dev, ppe_dev->num_icc_paths, + ppe_dev->icc_paths); + if (ret) + return ret; + + ret = icc_bulk_set_bw(ppe_dev->num_icc_paths, ppe_dev->icc_paths); + if (ret) + return ret; + + /* The PPE clocks have a common parent clock. Setting the clock + * rate of "ppe" ensures the clock rate of all PPE clocks is + * configured to the same rate. + */ + clk = devm_clk_get(dev, "ppe"); + if (IS_ERR(clk)) + return PTR_ERR(clk); + + ret = clk_set_rate(clk, ppe_rate); + if (ret) + return ret; + + ret = devm_clk_bulk_get_all_enabled(dev, &clks); + if (ret < 0) + return ret; + + /* Reset the PPE. */ + rstc = devm_reset_control_get_exclusive(dev, NULL); + if (IS_ERR(rstc)) + return PTR_ERR(rstc); + + ret = reset_control_assert(rstc); + if (ret) + return ret; + + /* The delay 10 ms of assert is necessary for resetting PPE. */ + usleep_range(10000, 11000); + + return reset_control_deassert(rstc); +} + +static int qcom_ppe_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct ppe_device *ppe_dev; + void __iomem *base; + int ret, num_icc; + + num_icc = ARRAY_SIZE(ppe_icc_data); + ppe_dev = devm_kzalloc(dev, struct_size(ppe_dev, icc_paths, num_icc), + GFP_KERNEL); + if (!ppe_dev) + return -ENOMEM; + + base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(base)) + return dev_err_probe(dev, PTR_ERR(base), "PPE ioremap failed\n"); + + ppe_dev->regmap = devm_regmap_init_mmio(dev, base, ®map_config_ipq9574); + if (IS_ERR(ppe_dev->regmap)) + return dev_err_probe(dev, PTR_ERR(ppe_dev->regmap), + "PPE initialize regmap failed\n"); + ppe_dev->dev = dev; + ppe_dev->clk_rate = PPE_CLK_RATE; + ppe_dev->num_ports = PPE_PORT_MAX; + ppe_dev->num_icc_paths = num_icc; + + ret = ppe_clock_init_and_reset(ppe_dev); + if (ret) + return dev_err_probe(dev, ret, "PPE clock config failed\n"); + + ret = ppe_hw_config(ppe_dev); + if (ret) + return dev_err_probe(dev, ret, "PPE HW config failed\n"); + + ppe_debugfs_setup(ppe_dev); + platform_set_drvdata(pdev, ppe_dev); + + return 0; +} + +static void qcom_ppe_remove(struct platform_device *pdev) +{ + struct ppe_device *ppe_dev; + + ppe_dev = platform_get_drvdata(pdev); + ppe_debugfs_teardown(ppe_dev); +} + +static const struct of_device_id qcom_ppe_of_match[] = { + { .compatible = "qcom,ipq9574-ppe" }, + {} +}; +MODULE_DEVICE_TABLE(of, qcom_ppe_of_match); + +static struct platform_driver qcom_ppe_driver = { + .driver = { + .name = "qcom_ppe", + .of_match_table = qcom_ppe_of_match, + }, + .probe = qcom_ppe_probe, + .remove = qcom_ppe_remove, +}; +module_platform_driver(qcom_ppe_driver); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Qualcomm Technologies, Inc. IPQ PPE driver"); diff --git a/drivers/net/ethernet/qualcomm/ppe/ppe.h b/drivers/net/ethernet/qualcomm/ppe/ppe.h new file mode 100644 index 000000000000..27458f0bc206 --- /dev/null +++ b/drivers/net/ethernet/qualcomm/ppe/ppe.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. + */ + +#ifndef __PPE_H__ +#define __PPE_H__ + +#include <linux/compiler.h> +#include <linux/interconnect.h> + +struct device; +struct regmap; +struct dentry; + +/** + * struct ppe_device - PPE device private data. + * @dev: PPE device structure. + * @regmap: PPE register map. + * @clk_rate: PPE clock rate. + * @num_ports: Number of PPE ports. + * @debugfs_root: Debugfs root entry. + * @num_icc_paths: Number of interconnect paths. + * @icc_paths: Interconnect path array. + * + * PPE device is the instance of PPE hardware, which is used to + * configure PPE packet process modules such as BM (buffer management), + * QM (queue management), and scheduler. + */ +struct ppe_device { + struct device *dev; + struct regmap *regmap; + unsigned long clk_rate; + unsigned int num_ports; + struct dentry *debugfs_root; + unsigned int num_icc_paths; + struct icc_bulk_data icc_paths[] __counted_by(num_icc_paths); +}; +#endif diff --git a/drivers/net/ethernet/qualcomm/ppe/ppe_config.c b/drivers/net/ethernet/qualcomm/ppe/ppe_config.c new file mode 100644 index 000000000000..e9a0e22907a6 --- /dev/null +++ b/drivers/net/ethernet/qualcomm/ppe/ppe_config.c @@ -0,0 +1,2034 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. + */ + +/* PPE HW initialization configs such as BM(buffer management), + * QM(queue management) and scheduler configs. + */ + +#include <linux/bitfield.h> +#include <linux/bitmap.h> +#include <linux/bits.h> +#include <linux/device.h> +#include <linux/regmap.h> + +#include "ppe.h" +#include "ppe_config.h" +#include "ppe_regs.h" + +#define PPE_QUEUE_SCH_PRI_NUM 8 + +/** + * struct ppe_bm_port_config - PPE BM port configuration. + * @port_id_start: The fist BM port ID to configure. + * @port_id_end: The last BM port ID to configure. + * @pre_alloc: BM port dedicated buffer number. + * @in_fly_buf: Buffer number for receiving the packet after pause frame sent. + * @ceil: Ceil to generate the back pressure. + * @weight: Weight value. + * @resume_offset: Resume offset from the threshold value. + * @resume_ceil: Ceil to resume from the back pressure state. + * @dynamic: Dynamic threshold used or not. + * + * The is for configuring the threshold that impacts the port + * flow control. + */ +struct ppe_bm_port_config { + unsigned int port_id_start; + unsigned int port_id_end; + unsigned int pre_alloc; + unsigned int in_fly_buf; + unsigned int ceil; + unsigned int weight; + unsigned int resume_offset; + unsigned int resume_ceil; + bool dynamic; +}; + +/** + * struct ppe_qm_queue_config - PPE queue config. + * @queue_start: PPE start of queue ID. + * @queue_end: PPE end of queue ID. + * @prealloc_buf: Queue dedicated buffer number. + * @ceil: Ceil to start drop packet from queue. + * @weight: Weight value. + * @resume_offset: Resume offset from the threshold. + * @dynamic: Threshold value is decided dynamically or statically. + * + * Queue configuration decides the threshold to drop packet from PPE + * hardware queue. + */ +struct ppe_qm_queue_config { + unsigned int queue_start; + unsigned int queue_end; + unsigned int prealloc_buf; + unsigned int ceil; + unsigned int weight; + unsigned int resume_offset; + bool dynamic; +}; + +/** + * enum ppe_scheduler_direction - PPE scheduler direction for packet. + * @PPE_SCH_INGRESS: Scheduler for the packet on ingress, + * @PPE_SCH_EGRESS: Scheduler for the packet on egress, + */ +enum ppe_scheduler_direction { + PPE_SCH_INGRESS = 0, + PPE_SCH_EGRESS = 1, +}; + +/** + * struct ppe_scheduler_bm_config - PPE arbitration for buffer config. + * @valid: Arbitration entry valid or not. + * @dir: Arbitration entry for egress or ingress. + * @port: Port ID to use arbitration entry. + * @backup_port_valid: Backup port valid or not. + * @backup_port: Backup port ID to use. + * + * Configure the scheduler settings for accessing and releasing the PPE buffers. + */ +struct ppe_scheduler_bm_config { + bool valid; + enum ppe_scheduler_direction dir; + unsigned int port; + bool backup_port_valid; + unsigned int backup_port; +}; + +/** + * struct ppe_scheduler_qm_config - PPE arbitration for scheduler config. + * @ensch_port_bmp: Port bit map for enqueue scheduler. + * @ensch_port: Port ID to enqueue scheduler. + * @desch_port: Port ID to dequeue scheduler. + * @desch_backup_port_valid: Dequeue for the backup port valid or not. + * @desch_backup_port: Backup port ID to dequeue scheduler. + * + * Configure the scheduler settings for enqueuing and dequeuing packets on + * the PPE port. + */ +struct ppe_scheduler_qm_config { + unsigned int ensch_port_bmp; + unsigned int ensch_port; + unsigned int desch_port; + bool desch_backup_port_valid; + unsigned int desch_backup_port; +}; + +/** + * struct ppe_scheduler_port_config - PPE port scheduler config. + * @port: Port ID to be scheduled. + * @flow_level: Scheduler flow level or not. + * @node_id: Node ID, for level 0, queue ID is used. + * @loop_num: Loop number of scheduler config. + * @pri_max: Max priority configured. + * @flow_id: Strict priority ID. + * @drr_node_id: Node ID for scheduler. + * + * PPE port scheduler configuration which decides the priority in the + * packet scheduler for the egress port. + */ +struct ppe_scheduler_port_config { + unsigned int port; + bool flow_level; + unsigned int node_id; + unsigned int loop_num; + unsigned int pri_max; + unsigned int flow_id; + unsigned int drr_node_id; +}; + +/** + * struct ppe_port_schedule_resource - PPE port scheduler resource. + * @ucastq_start: Unicast queue start ID. + * @ucastq_end: Unicast queue end ID. + * @mcastq_start: Multicast queue start ID. + * @mcastq_end: Multicast queue end ID. + * @flow_id_start: Flow start ID. + * @flow_id_end: Flow end ID. + * @l0node_start: Scheduler node start ID for queue level. + * @l0node_end: Scheduler node end ID for queue level. + * @l1node_start: Scheduler node start ID for flow level. + * @l1node_end: Scheduler node end ID for flow level. + * + * PPE scheduler resource allocated among the PPE ports. + */ +struct ppe_port_schedule_resource { + unsigned int ucastq_start; + unsigned int ucastq_end; + unsigned int mcastq_start; + unsigned int mcastq_end; + unsigned int flow_id_start; + unsigned int flow_id_end; + unsigned int l0node_start; + unsigned int l0node_end; + unsigned int l1node_start; + unsigned int l1node_end; +}; + +/* There are total 2048 buffers available in PPE, out of which some + * buffers are reserved for some specific purposes per PPE port. The + * rest of the pool of 1550 buffers are assigned to the general 'group0' + * which is shared among all ports of the PPE. + */ +static const int ipq9574_ppe_bm_group_config = 1550; + +/* The buffer configurations per PPE port. There are 15 BM ports and + * 4 BM groups supported by PPE. BM port (0-7) is for EDMA port 0, + * BM port (8-13) is for PPE physical port 1-6 and BM port 14 is for + * EIP port. + */ +static const struct ppe_bm_port_config ipq9574_ppe_bm_port_config[] = { + { + /* Buffer configuration for the BM port ID 0 of EDMA. */ + .port_id_start = 0, + .port_id_end = 0, + .pre_alloc = 0, + .in_fly_buf = 100, + .ceil = 1146, + .weight = 7, + .resume_offset = 8, + .resume_ceil = 0, + .dynamic = true, + }, + { + /* Buffer configuration for the BM port ID 1-7 of EDMA. */ + .port_id_start = 1, + .port_id_end = 7, + .pre_alloc = 0, + .in_fly_buf = 100, + .ceil = 250, + .weight = 4, + .resume_offset = 36, + .resume_ceil = 0, + .dynamic = true, + }, + { + /* Buffer configuration for the BM port ID 8-13 of PPE ports. */ + .port_id_start = 8, + .port_id_end = 13, + .pre_alloc = 0, + .in_fly_buf = 128, + .ceil = 250, + .weight = 4, + .resume_offset = 36, + .resume_ceil = 0, + .dynamic = true, + }, + { + /* Buffer configuration for the BM port ID 14 of EIP. */ + .port_id_start = 14, + .port_id_end = 14, + .pre_alloc = 0, + .in_fly_buf = 40, + .ceil = 250, + .weight = 4, + .resume_offset = 36, + .resume_ceil = 0, + .dynamic = true, + }, +}; + +/* QM fetches the packet from PPE buffer management for transmitting the + * packet out. The QM group configuration limits the total number of buffers + * enqueued by all PPE hardware queues. + * There are total 2048 buffers available, out of which some buffers are + * dedicated to hardware exception handlers. The remaining buffers are + * assigned to the general 'group0', which is the group assigned to all + * queues by default. + */ +static const int ipq9574_ppe_qm_group_config = 2000; + +/* Default QM settings for unicast and multicast queues for IPQ9754. */ +static const struct ppe_qm_queue_config ipq9574_ppe_qm_queue_config[] = { + { + /* QM settings for unicast queues 0 to 255. */ + .queue_start = 0, + .queue_end = 255, + .prealloc_buf = 0, + .ceil = 1200, + .weight = 7, + .resume_offset = 36, + .dynamic = true, + }, + { + /* QM settings for multicast queues 256 to 299. */ + .queue_start = 256, + .queue_end = 299, + .prealloc_buf = 0, + .ceil = 250, + .weight = 0, + .resume_offset = 36, + .dynamic = false, + }, +}; + +/* PPE scheduler configuration for BM includes multiple entries. Each entry + * indicates the primary port to be assigned the buffers for the ingress or + * to release the buffers for the egress. Backup port ID will be used when + * the primary port ID is down. + */ +static const struct ppe_scheduler_bm_config ipq9574_ppe_sch_bm_config[] = { + {true, PPE_SCH_INGRESS, 0, false, 0}, + {true, PPE_SCH_EGRESS, 0, false, 0}, + {true, PPE_SCH_INGRESS, 5, false, 0}, + {true, PPE_SCH_EGRESS, 5, false, 0}, + {true, PPE_SCH_INGRESS, 6, false, 0}, + {true, PPE_SCH_EGRESS, 6, false, 0}, + {true, PPE_SCH_INGRESS, 1, false, 0}, + {true, PPE_SCH_EGRESS, 1, false, 0}, + {true, PPE_SCH_INGRESS, 0, false, 0}, + {true, PPE_SCH_EGRESS, 0, false, 0}, + {true, PPE_SCH_INGRESS, 5, false, 0}, + {true, PPE_SCH_EGRESS, 5, false, 0}, + {true, PPE_SCH_INGRESS, 6, false, 0}, + {true, PPE_SCH_EGRESS, 6, false, 0}, + {true, PPE_SCH_INGRESS, 7, false, 0}, + {true, PPE_SCH_EGRESS, 7, false, 0}, + {true, PPE_SCH_INGRESS, 0, false, 0}, + {true, PPE_SCH_EGRESS, 0, false, 0}, + {true, PPE_SCH_INGRESS, 1, false, 0}, + {true, PPE_SCH_EGRESS, 1, false, 0}, + {true, PPE_SCH_INGRESS, 5, false, 0}, + {true, PPE_SCH_EGRESS, 5, false, 0}, + {true, PPE_SCH_INGRESS, 6, false, 0}, + {true, PPE_SCH_EGRESS, 6, false, 0}, + {true, PPE_SCH_INGRESS, 2, false, 0}, + {true, PPE_SCH_EGRESS, 2, false, 0}, + {true, PPE_SCH_INGRESS, 0, false, 0}, + {true, PPE_SCH_EGRESS, 0, false, 0}, + {true, PPE_SCH_INGRESS, 5, false, 0}, + {true, PPE_SCH_EGRESS, 5, false, 0}, + {true, PPE_SCH_INGRESS, 6, false, 0}, + {true, PPE_SCH_EGRESS, 6, false, 0}, + {true, PPE_SCH_INGRESS, 1, false, 0}, + {true, PPE_SCH_EGRESS, 1, false, 0}, + {true, PPE_SCH_INGRESS, 3, false, 0}, + {true, PPE_SCH_EGRESS, 3, false, 0}, + {true, PPE_SCH_INGRESS, 0, false, 0}, + {true, PPE_SCH_EGRESS, 0, false, 0}, + {true, PPE_SCH_INGRESS, 5, false, 0}, + {true, PPE_SCH_EGRESS, 5, false, 0}, + {true, PPE_SCH_INGRESS, 6, false, 0}, + {true, PPE_SCH_EGRESS, 6, false, 0}, + {true, PPE_SCH_INGRESS, 7, false, 0}, + {true, PPE_SCH_EGRESS, 7, false, 0}, + {true, PPE_SCH_INGRESS, 0, false, 0}, + {true, PPE_SCH_EGRESS, 0, false, 0}, + {true, PPE_SCH_INGRESS, 1, false, 0}, + {true, PPE_SCH_EGRESS, 1, false, 0}, + {true, PPE_SCH_INGRESS, 5, false, 0}, + {true, PPE_SCH_EGRESS, 5, false, 0}, + {true, PPE_SCH_INGRESS, 6, false, 0}, + {true, PPE_SCH_EGRESS, 6, false, 0}, + {true, PPE_SCH_INGRESS, 4, false, 0}, + {true, PPE_SCH_EGRESS, 4, false, 0}, + {true, PPE_SCH_INGRESS, 0, false, 0}, + {true, PPE_SCH_EGRESS, 0, false, 0}, + {true, PPE_SCH_INGRESS, 5, false, 0}, + {true, PPE_SCH_EGRESS, 5, false, 0}, + {true, PPE_SCH_INGRESS, 6, false, 0}, + {true, PPE_SCH_EGRESS, 6, false, 0}, + {true, PPE_SCH_INGRESS, 1, false, 0}, + {true, PPE_SCH_EGRESS, 1, false, 0}, + {true, PPE_SCH_INGRESS, 0, false, 0}, + {true, PPE_SCH_EGRESS, 0, false, 0}, + {true, PPE_SCH_INGRESS, 5, false, 0}, + {true, PPE_SCH_EGRESS, 5, false, 0}, + {true, PPE_SCH_INGRESS, 6, false, 0}, + {true, PPE_SCH_EGRESS, 6, false, 0}, + {true, PPE_SCH_INGRESS, 2, false, 0}, + {true, PPE_SCH_EGRESS, 2, false, 0}, + {true, PPE_SCH_INGRESS, 0, false, 0}, + {true, PPE_SCH_EGRESS, 0, false, 0}, + {true, PPE_SCH_INGRESS, 7, false, 0}, + {true, PPE_SCH_EGRESS, 7, false, 0}, + {true, PPE_SCH_INGRESS, 5, false, 0}, + {true, PPE_SCH_EGRESS, 5, false, 0}, + {true, PPE_SCH_INGRESS, 6, false, 0}, + {true, PPE_SCH_EGRESS, 6, false, 0}, + {true, PPE_SCH_INGRESS, 1, false, 0}, + {true, PPE_SCH_EGRESS, 1, false, 0}, + {true, PPE_SCH_INGRESS, 0, false, 0}, + {true, PPE_SCH_EGRESS, 0, false, 0}, + {true, PPE_SCH_INGRESS, 5, false, 0}, + {true, PPE_SCH_EGRESS, 5, false, 0}, + {true, PPE_SCH_INGRESS, 6, false, 0}, + {true, PPE_SCH_EGRESS, 6, false, 0}, + {true, PPE_SCH_INGRESS, 3, false, 0}, + {true, PPE_SCH_EGRESS, 3, false, 0}, + {true, PPE_SCH_INGRESS, 1, false, 0}, + {true, PPE_SCH_EGRESS, 1, false, 0}, + {true, PPE_SCH_INGRESS, 0, false, 0}, + {true, PPE_SCH_EGRESS, 0, false, 0}, + {true, PPE_SCH_INGRESS, 5, false, 0}, + {true, PPE_SCH_EGRESS, 5, false, 0}, + {true, PPE_SCH_INGRESS, 6, false, 0}, + {true, PPE_SCH_EGRESS, 6, false, 0}, + {true, PPE_SCH_INGRESS, 4, false, 0}, + {true, PPE_SCH_EGRESS, 4, false, 0}, + {true, PPE_SCH_INGRESS, 7, false, 0}, + {true, PPE_SCH_EGRESS, 7, false, 0}, +}; + +/* PPE scheduler configuration for QM includes multiple entries. Each entry + * contains ports to be dispatched for enqueueing and dequeueing. The backup + * port for dequeueing is supported to be used when the primary port for + * dequeueing is down. + */ +static const struct ppe_scheduler_qm_config ipq9574_ppe_sch_qm_config[] = { + {0x98, 6, 0, true, 1}, + {0x94, 5, 6, true, 3}, + {0x86, 0, 5, true, 4}, + {0x8C, 1, 6, true, 0}, + {0x1C, 7, 5, true, 1}, + {0x98, 2, 6, true, 0}, + {0x1C, 5, 7, true, 1}, + {0x34, 3, 6, true, 0}, + {0x8C, 4, 5, true, 1}, + {0x98, 2, 6, true, 0}, + {0x8C, 5, 4, true, 1}, + {0xA8, 0, 6, true, 2}, + {0x98, 5, 1, true, 0}, + {0x98, 6, 5, true, 2}, + {0x89, 1, 6, true, 4}, + {0xA4, 3, 0, true, 1}, + {0x8C, 5, 6, true, 4}, + {0xA8, 0, 2, true, 1}, + {0x98, 6, 5, true, 0}, + {0xC4, 4, 3, true, 1}, + {0x94, 6, 5, true, 0}, + {0x1C, 7, 6, true, 1}, + {0x98, 2, 5, true, 0}, + {0x1C, 6, 7, true, 1}, + {0x1C, 5, 6, true, 0}, + {0x94, 3, 5, true, 1}, + {0x8C, 4, 6, true, 0}, + {0x94, 1, 5, true, 3}, + {0x94, 6, 1, true, 0}, + {0xD0, 3, 5, true, 2}, + {0x98, 6, 0, true, 1}, + {0x94, 5, 6, true, 3}, + {0x94, 1, 5, true, 0}, + {0x98, 2, 6, true, 1}, + {0x8C, 4, 5, true, 0}, + {0x1C, 7, 6, true, 1}, + {0x8C, 0, 5, true, 4}, + {0x89, 1, 6, true, 2}, + {0x98, 5, 0, true, 1}, + {0x94, 6, 5, true, 3}, + {0x92, 0, 6, true, 2}, + {0x98, 1, 5, true, 0}, + {0x98, 6, 2, true, 1}, + {0xD0, 0, 5, true, 3}, + {0x94, 6, 0, true, 1}, + {0x8C, 5, 6, true, 4}, + {0x8C, 1, 5, true, 0}, + {0x1C, 6, 7, true, 1}, + {0x1C, 5, 6, true, 0}, + {0xB0, 2, 3, true, 1}, + {0xC4, 4, 5, true, 0}, + {0x8C, 6, 4, true, 1}, + {0xA4, 3, 6, true, 0}, + {0x1C, 5, 7, true, 1}, + {0x4C, 0, 5, true, 4}, + {0x8C, 6, 0, true, 1}, + {0x34, 7, 6, true, 3}, + {0x94, 5, 0, true, 1}, + {0x98, 6, 5, true, 2}, +}; + +static const struct ppe_scheduler_port_config ppe_port_sch_config[] = { + { + .port = 0, + .flow_level = true, + .node_id = 0, + .loop_num = 1, + .pri_max = 1, + .flow_id = 0, + .drr_node_id = 0, + }, + { + .port = 0, + .flow_level = false, + .node_id = 0, + .loop_num = 8, + .pri_max = 8, + .flow_id = 0, + .drr_node_id = 0, + }, + { + .port = 0, + .flow_level = false, + .node_id = 8, + .loop_num = 8, + .pri_max = 8, + .flow_id = 0, + .drr_node_id = 0, + }, + { + .port = 0, + .flow_level = false, + .node_id = 16, + .loop_num = 8, + .pri_max = 8, + .flow_id = 0, + .drr_node_id = 0, + }, + { + .port = 0, + .flow_level = false, + .node_id = 24, + .loop_num = 8, + .pri_max = 8, + .flow_id = 0, + .drr_node_id = 0, + }, + { + .port = 0, + .flow_level = false, + .node_id = 32, + .loop_num = 8, + .pri_max = 8, + .flow_id = 0, + .drr_node_id = 0, + }, + { + .port = 0, + .flow_level = false, + .node_id = 40, + .loop_num = 8, + .pri_max = 8, + .flow_id = 0, + .drr_node_id = 0, + }, + { + .port = 0, + .flow_level = false, + .node_id = 48, + .loop_num = 8, + .pri_max = 8, + .flow_id = 0, + .drr_node_id = 0, + }, + { + .port = 0, + .flow_level = false, + .node_id = 56, + .loop_num = 8, + .pri_max = 8, + .flow_id = 0, + .drr_node_id = 0, + }, + { + .port = 0, + .flow_level = false, + .node_id = 256, + .loop_num = 8, + .pri_max = 8, + .flow_id = 0, + .drr_node_id = 0, + }, + { + .port = 0, + .flow_level = false, + .node_id = 264, + .loop_num = 8, + .pri_max = 8, + .flow_id = 0, + .drr_node_id = 0, + }, + { + .port = 1, + .flow_level = true, + .node_id = 36, + .loop_num = 2, + .pri_max = 0, + .flow_id = 1, + .drr_node_id = 8, + }, + { + .port = 1, + .flow_level = false, + .node_id = 144, + .loop_num = 16, + .pri_max = 8, + .flow_id = 36, + .drr_node_id = 48, + }, + { + .port = 1, + .flow_level = false, + .node_id = 272, + .loop_num = 4, + .pri_max = 4, + .flow_id = 36, + .drr_node_id = 48, + }, + { + .port = 2, + .flow_level = true, + .node_id = 40, + .loop_num = 2, + .pri_max = 0, + .flow_id = 2, + .drr_node_id = 12, + }, + { + .port = 2, + .flow_level = false, + .node_id = 160, + .loop_num = 16, + .pri_max = 8, + .flow_id = 40, + .drr_node_id = 64, + }, + { + .port = 2, + .flow_level = false, + .node_id = 276, + .loop_num = 4, + .pri_max = 4, + .flow_id = 40, + .drr_node_id = 64, + }, + { + .port = 3, + .flow_level = true, + .node_id = 44, + .loop_num = 2, + .pri_max = 0, + .flow_id = 3, + .drr_node_id = 16, + }, + { + .port = 3, + .flow_level = false, + .node_id = 176, + .loop_num = 16, + .pri_max = 8, + .flow_id = 44, + .drr_node_id = 80, + }, + { + .port = 3, + .flow_level = false, + .node_id = 280, + .loop_num = 4, + .pri_max = 4, + .flow_id = 44, + .drr_node_id = 80, + }, + { + .port = 4, + .flow_level = true, + .node_id = 48, + .loop_num = 2, + .pri_max = 0, + .flow_id = 4, + .drr_node_id = 20, + }, + { + .port = 4, + .flow_level = false, + .node_id = 192, + .loop_num = 16, + .pri_max = 8, + .flow_id = 48, + .drr_node_id = 96, + }, + { + .port = 4, + .flow_level = false, + .node_id = 284, + .loop_num = 4, + .pri_max = 4, + .flow_id = 48, + .drr_node_id = 96, + }, + { + .port = 5, + .flow_level = true, + .node_id = 52, + .loop_num = 2, + .pri_max = 0, + .flow_id = 5, + .drr_node_id = 24, + }, + { + .port = 5, + .flow_level = false, + .node_id = 208, + .loop_num = 16, + .pri_max = 8, + .flow_id = 52, + .drr_node_id = 112, + }, + { + .port = 5, + .flow_level = false, + .node_id = 288, + .loop_num = 4, + .pri_max = 4, + .flow_id = 52, + .drr_node_id = 112, + }, + { + .port = 6, + .flow_level = true, + .node_id = 56, + .loop_num = 2, + .pri_max = 0, + .flow_id = 6, + .drr_node_id = 28, + }, + { + .port = 6, + .flow_level = false, + .node_id = 224, + .loop_num = 16, + .pri_max = 8, + .flow_id = 56, + .drr_node_id = 128, + }, + { + .port = 6, + .flow_level = false, + .node_id = 292, + .loop_num = 4, + .pri_max = 4, + .flow_id = 56, + .drr_node_id = 128, + }, + { + .port = 7, + .flow_level = true, + .node_id = 60, + .loop_num = 2, + .pri_max = 0, + .flow_id = 7, + .drr_node_id = 32, + }, + { + .port = 7, + .flow_level = false, + .node_id = 240, + .loop_num = 16, + .pri_max = 8, + .flow_id = 60, + .drr_node_id = 144, + }, + { + .port = 7, + .flow_level = false, + .node_id = 296, + .loop_num = 4, + .pri_max = 4, + .flow_id = 60, + .drr_node_id = 144, + }, +}; + +/* The scheduler resource is applied to each PPE port, The resource + * includes the unicast & multicast queues, flow nodes and DRR nodes. + */ +static const struct ppe_port_schedule_resource ppe_scheduler_res[] = { + { .ucastq_start = 0, + .ucastq_end = 63, + .mcastq_start = 256, + .mcastq_end = 271, + .flow_id_start = 0, + .flow_id_end = 0, + .l0node_start = 0, + .l0node_end = 7, + .l1node_start = 0, + .l1node_end = 0, + }, + { .ucastq_start = 144, + .ucastq_end = 159, + .mcastq_start = 272, + .mcastq_end = 275, + .flow_id_start = 36, + .flow_id_end = 39, + .l0node_start = 48, + .l0node_end = 63, + .l1node_start = 8, + .l1node_end = 11, + }, + { .ucastq_start = 160, + .ucastq_end = 175, + .mcastq_start = 276, + .mcastq_end = 279, + .flow_id_start = 40, + .flow_id_end = 43, + .l0node_start = 64, + .l0node_end = 79, + .l1node_start = 12, + .l1node_end = 15, + }, + { .ucastq_start = 176, + .ucastq_end = 191, + .mcastq_start = 280, + .mcastq_end = 283, + .flow_id_start = 44, + .flow_id_end = 47, + .l0node_start = 80, + .l0node_end = 95, + .l1node_start = 16, + .l1node_end = 19, + }, + { .ucastq_start = 192, + .ucastq_end = 207, + .mcastq_start = 284, + .mcastq_end = 287, + .flow_id_start = 48, + .flow_id_end = 51, + .l0node_start = 96, + .l0node_end = 111, + .l1node_start = 20, + .l1node_end = 23, + }, + { .ucastq_start = 208, + .ucastq_end = 223, + .mcastq_start = 288, + .mcastq_end = 291, + .flow_id_start = 52, + .flow_id_end = 55, + .l0node_start = 112, + .l0node_end = 127, + .l1node_start = 24, + .l1node_end = 27, + }, + { .ucastq_start = 224, + .ucastq_end = 239, + .mcastq_start = 292, + .mcastq_end = 295, + .flow_id_start = 56, + .flow_id_end = 59, + .l0node_start = 128, + .l0node_end = 143, + .l1node_start = 28, + .l1node_end = 31, + }, + { .ucastq_start = 240, + .ucastq_end = 255, + .mcastq_start = 296, + .mcastq_end = 299, + .flow_id_start = 60, + .flow_id_end = 63, + .l0node_start = 144, + .l0node_end = 159, + .l1node_start = 32, + .l1node_end = 35, + }, + { .ucastq_start = 64, + .ucastq_end = 143, + .mcastq_start = 0, + .mcastq_end = 0, + .flow_id_start = 1, + .flow_id_end = 35, + .l0node_start = 8, + .l0node_end = 47, + .l1node_start = 1, + .l1node_end = 7, + }, +}; + +/* Set the PPE queue level scheduler configuration. */ +static int ppe_scheduler_l0_queue_map_set(struct ppe_device *ppe_dev, + int node_id, int port, + struct ppe_scheduler_cfg scheduler_cfg) +{ + u32 val, reg; + int ret; + + reg = PPE_L0_FLOW_MAP_TBL_ADDR + node_id * PPE_L0_FLOW_MAP_TBL_INC; + val = FIELD_PREP(PPE_L0_FLOW_MAP_TBL_FLOW_ID, scheduler_cfg.flow_id); + val |= FIELD_PREP(PPE_L0_FLOW_MAP_TBL_C_PRI, scheduler_cfg.pri); + val |= FIELD_PREP(PPE_L0_FLOW_MAP_TBL_E_PRI, scheduler_cfg.pri); + val |= FIELD_PREP(PPE_L0_FLOW_MAP_TBL_C_NODE_WT, scheduler_cfg.drr_node_wt); + val |= FIELD_PREP(PPE_L0_FLOW_MAP_TBL_E_NODE_WT, scheduler_cfg.drr_node_wt); + + ret = regmap_write(ppe_dev->regmap, reg, val); + if (ret) + return ret; + + reg = PPE_L0_C_FLOW_CFG_TBL_ADDR + + (scheduler_cfg.flow_id * PPE_QUEUE_SCH_PRI_NUM + scheduler_cfg.pri) * + PPE_L0_C_FLOW_CFG_TBL_INC; + val = FIELD_PREP(PPE_L0_C_FLOW_CFG_TBL_NODE_ID, scheduler_cfg.drr_node_id); + val |= FIELD_PREP(PPE_L0_C_FLOW_CFG_TBL_NODE_CREDIT_UNIT, scheduler_cfg.unit_is_packet); + + ret = regmap_write(ppe_dev->regmap, reg, val); + if (ret) + return ret; + + reg = PPE_L0_E_FLOW_CFG_TBL_ADDR + + (scheduler_cfg.flow_id * PPE_QUEUE_SCH_PRI_NUM + scheduler_cfg.pri) * + PPE_L0_E_FLOW_CFG_TBL_INC; + val = FIELD_PREP(PPE_L0_E_FLOW_CFG_TBL_NODE_ID, scheduler_cfg.drr_node_id); + val |= FIELD_PREP(PPE_L0_E_FLOW_CFG_TBL_NODE_CREDIT_UNIT, scheduler_cfg.unit_is_packet); + + ret = regmap_write(ppe_dev->regmap, reg, val); + if (ret) + return ret; + + reg = PPE_L0_FLOW_PORT_MAP_TBL_ADDR + node_id * PPE_L0_FLOW_PORT_MAP_TBL_INC; + val = FIELD_PREP(PPE_L0_FLOW_PORT_MAP_TBL_PORT_NUM, port); + + ret = regmap_write(ppe_dev->regmap, reg, val); + if (ret) + return ret; + + reg = PPE_L0_COMP_CFG_TBL_ADDR + node_id * PPE_L0_COMP_CFG_TBL_INC; + val = FIELD_PREP(PPE_L0_COMP_CFG_TBL_NODE_METER_LEN, scheduler_cfg.frame_mode); + + return regmap_update_bits(ppe_dev->regmap, reg, + PPE_L0_COMP_CFG_TBL_NODE_METER_LEN, + val); +} + +/* Set the PPE flow level scheduler configuration. */ +static int ppe_scheduler_l1_queue_map_set(struct ppe_device *ppe_dev, + int node_id, int port, + struct ppe_scheduler_cfg scheduler_cfg) +{ + u32 val, reg; + int ret; + + val = FIELD_PREP(PPE_L1_FLOW_MAP_TBL_FLOW_ID, scheduler_cfg.flow_id); + val |= FIELD_PREP(PPE_L1_FLOW_MAP_TBL_C_PRI, scheduler_cfg.pri); + val |= FIELD_PREP(PPE_L1_FLOW_MAP_TBL_E_PRI, scheduler_cfg.pri); + val |= FIELD_PREP(PPE_L1_FLOW_MAP_TBL_C_NODE_WT, scheduler_cfg.drr_node_wt); + val |= FIELD_PREP(PPE_L1_FLOW_MAP_TBL_E_NODE_WT, scheduler_cfg.drr_node_wt); + reg = PPE_L1_FLOW_MAP_TBL_ADDR + node_id * PPE_L1_FLOW_MAP_TBL_INC; + + ret = regmap_write(ppe_dev->regmap, reg, val); + if (ret) + return ret; + + val = FIELD_PREP(PPE_L1_C_FLOW_CFG_TBL_NODE_ID, scheduler_cfg.drr_node_id); + val |= FIELD_PREP(PPE_L1_C_FLOW_CFG_TBL_NODE_CREDIT_UNIT, scheduler_cfg.unit_is_packet); + reg = PPE_L1_C_FLOW_CFG_TBL_ADDR + + (scheduler_cfg.flow_id * PPE_QUEUE_SCH_PRI_NUM + scheduler_cfg.pri) * + PPE_L1_C_FLOW_CFG_TBL_INC; + + ret = regmap_write(ppe_dev->regmap, reg, val); + if (ret) + return ret; + + val = FIELD_PREP(PPE_L1_E_FLOW_CFG_TBL_NODE_ID, scheduler_cfg.drr_node_id); + val |= FIELD_PREP(PPE_L1_E_FLOW_CFG_TBL_NODE_CREDIT_UNIT, scheduler_cfg.unit_is_packet); + reg = PPE_L1_E_FLOW_CFG_TBL_ADDR + + (scheduler_cfg.flow_id * PPE_QUEUE_SCH_PRI_NUM + scheduler_cfg.pri) * + PPE_L1_E_FLOW_CFG_TBL_INC; + + ret = regmap_write(ppe_dev->regmap, reg, val); + if (ret) + return ret; + + val = FIELD_PREP(PPE_L1_FLOW_PORT_MAP_TBL_PORT_NUM, port); + reg = PPE_L1_FLOW_PORT_MAP_TBL_ADDR + node_id * PPE_L1_FLOW_PORT_MAP_TBL_INC; + + ret = regmap_write(ppe_dev->regmap, reg, val); + if (ret) + return ret; + + reg = PPE_L1_COMP_CFG_TBL_ADDR + node_id * PPE_L1_COMP_CFG_TBL_INC; + val = FIELD_PREP(PPE_L1_COMP_CFG_TBL_NODE_METER_LEN, scheduler_cfg.frame_mode); + + return regmap_update_bits(ppe_dev->regmap, reg, PPE_L1_COMP_CFG_TBL_NODE_METER_LEN, val); +} + +/** + * ppe_queue_scheduler_set - Configure scheduler for PPE hardware queue + * @ppe_dev: PPE device + * @node_id: PPE queue ID or flow ID + * @flow_level: Flow level scheduler or queue level scheduler + * @port: PPE port ID set scheduler configuration + * @scheduler_cfg: PPE scheduler configuration + * + * PPE scheduler configuration supports queue level and flow level on + * the PPE egress port. + * + * Return: 0 on success, negative error code on failure. + */ +int ppe_queue_scheduler_set(struct ppe_device *ppe_dev, + int node_id, bool flow_level, int port, + struct ppe_scheduler_cfg scheduler_cfg) +{ + if (flow_level) + return ppe_scheduler_l1_queue_map_set(ppe_dev, node_id, + port, scheduler_cfg); + + return ppe_scheduler_l0_queue_map_set(ppe_dev, node_id, + port, scheduler_cfg); +} + +/** + * ppe_queue_ucast_base_set - Set PPE unicast queue base ID and profile ID + * @ppe_dev: PPE device + * @queue_dst: PPE queue destination configuration + * @queue_base: PPE queue base ID + * @profile_id: Profile ID + * + * The PPE unicast queue base ID and profile ID are configured based on the + * destination port information that can be service code or CPU code or the + * destination port. + * + * Return: 0 on success, negative error code on failure. + */ +int ppe_queue_ucast_base_set(struct ppe_device *ppe_dev, + struct ppe_queue_ucast_dest queue_dst, + int queue_base, int profile_id) +{ + int index, profile_size; + u32 val, reg; + + profile_size = queue_dst.src_profile << 8; + if (queue_dst.service_code_en) + index = PPE_QUEUE_BASE_SERVICE_CODE + profile_size + + queue_dst.service_code; + else if (queue_dst.cpu_code_en) + index = PPE_QUEUE_BASE_CPU_CODE + profile_size + + queue_dst.cpu_code; + else + index = profile_size + queue_dst.dest_port; + + val = FIELD_PREP(PPE_UCAST_QUEUE_MAP_TBL_PROFILE_ID, profile_id); + val |= FIELD_PREP(PPE_UCAST_QUEUE_MAP_TBL_QUEUE_ID, queue_base); + reg = PPE_UCAST_QUEUE_MAP_TBL_ADDR + index * PPE_UCAST_QUEUE_MAP_TBL_INC; + + return regmap_write(ppe_dev->regmap, reg, val); +} + +/** + * ppe_queue_ucast_offset_pri_set - Set PPE unicast queue offset based on priority + * @ppe_dev: PPE device + * @profile_id: Profile ID + * @priority: PPE internal priority to be used to set queue offset + * @queue_offset: Queue offset used for calculating the destination queue ID + * + * The PPE unicast queue offset is configured based on the PPE + * internal priority. + * + * Return: 0 on success, negative error code on failure. + */ +int ppe_queue_ucast_offset_pri_set(struct ppe_device *ppe_dev, + int profile_id, + int priority, + int queue_offset) +{ + u32 val, reg; + int index; + + index = (profile_id << 4) + priority; + val = FIELD_PREP(PPE_UCAST_PRIORITY_MAP_TBL_CLASS, queue_offset); + reg = PPE_UCAST_PRIORITY_MAP_TBL_ADDR + index * PPE_UCAST_PRIORITY_MAP_TBL_INC; + + return regmap_write(ppe_dev->regmap, reg, val); +} + +/** + * ppe_queue_ucast_offset_hash_set - Set PPE unicast queue offset based on hash + * @ppe_dev: PPE device + * @profile_id: Profile ID + * @rss_hash: Packet hash value to be used to set queue offset + * @queue_offset: Queue offset used for calculating the destination queue ID + * + * The PPE unicast queue offset is configured based on the RSS hash value. + * + * Return: 0 on success, negative error code on failure. + */ +int ppe_queue_ucast_offset_hash_set(struct ppe_device *ppe_dev, + int profile_id, + int rss_hash, + int queue_offset) +{ + u32 val, reg; + int index; + + index = (profile_id << 8) + rss_hash; + val = FIELD_PREP(PPE_UCAST_HASH_MAP_TBL_HASH, queue_offset); + reg = PPE_UCAST_HASH_MAP_TBL_ADDR + index * PPE_UCAST_HASH_MAP_TBL_INC; + + return regmap_write(ppe_dev->regmap, reg, val); +} + +/** + * ppe_port_resource_get - Get PPE resource per port + * @ppe_dev: PPE device + * @port: PPE port + * @type: Resource type + * @res_start: Resource start ID returned + * @res_end: Resource end ID returned + * + * PPE resource is assigned per PPE port, which is acquired for QoS scheduler. + * + * Return: 0 on success, negative error code on failure. + */ +int ppe_port_resource_get(struct ppe_device *ppe_dev, int port, + enum ppe_resource_type type, + int *res_start, int *res_end) +{ + struct ppe_port_schedule_resource res; + + /* The reserved resource with the maximum port ID of PPE is + * also allowed to be acquired. + */ + if (port > ppe_dev->num_ports) + return -EINVAL; + + res = ppe_scheduler_res[port]; + switch (type) { + case PPE_RES_UCAST: + *res_start = res.ucastq_start; + *res_end = res.ucastq_end; + break; + case PPE_RES_MCAST: + *res_start = res.mcastq_start; + *res_end = res.mcastq_end; + break; + case PPE_RES_FLOW_ID: + *res_start = res.flow_id_start; + *res_end = res.flow_id_end; + break; + case PPE_RES_L0_NODE: + *res_start = res.l0node_start; + *res_end = res.l0node_end; + break; + case PPE_RES_L1_NODE: + *res_start = res.l1node_start; + *res_end = res.l1node_end; + break; + default: + return -EINVAL; + } + + return 0; +} + +/** + * ppe_sc_config_set - Set PPE service code configuration + * @ppe_dev: PPE device + * @sc: Service ID, 0-255 supported by PPE + * @cfg: Service code configuration + * + * PPE service code is used by the PPE during its packet processing stages, + * to perform or bypass certain selected packet operations on the packet. + * + * Return: 0 on success, negative error code on failure. + */ +int ppe_sc_config_set(struct ppe_device *ppe_dev, int sc, struct ppe_sc_cfg cfg) +{ + u32 val, reg, servcode_val[2] = {}; + unsigned long bitmap_value; + int ret; + + val = FIELD_PREP(PPE_IN_L2_SERVICE_TBL_DST_PORT_ID_VALID, cfg.dest_port_valid); + val |= FIELD_PREP(PPE_IN_L2_SERVICE_TBL_DST_PORT_ID, cfg.dest_port); + val |= FIELD_PREP(PPE_IN_L2_SERVICE_TBL_DST_DIRECTION, cfg.is_src); + + bitmap_value = bitmap_read(cfg.bitmaps.egress, 0, PPE_SC_BYPASS_EGRESS_SIZE); + val |= FIELD_PREP(PPE_IN_L2_SERVICE_TBL_DST_BYPASS_BITMAP, bitmap_value); + val |= FIELD_PREP(PPE_IN_L2_SERVICE_TBL_RX_CNT_EN, + test_bit(PPE_SC_BYPASS_COUNTER_RX, cfg.bitmaps.counter)); + val |= FIELD_PREP(PPE_IN_L2_SERVICE_TBL_TX_CNT_EN, + test_bit(PPE_SC_BYPASS_COUNTER_TX, cfg.bitmaps.counter)); + reg = PPE_IN_L2_SERVICE_TBL_ADDR + PPE_IN_L2_SERVICE_TBL_INC * sc; + + ret = regmap_write(ppe_dev->regmap, reg, val); + if (ret) + return ret; + + bitmap_value = bitmap_read(cfg.bitmaps.ingress, 0, PPE_SC_BYPASS_INGRESS_SIZE); + PPE_SERVICE_SET_BYPASS_BITMAP(servcode_val, bitmap_value); + PPE_SERVICE_SET_RX_CNT_EN(servcode_val, + test_bit(PPE_SC_BYPASS_COUNTER_RX_VLAN, cfg.bitmaps.counter)); + reg = PPE_SERVICE_TBL_ADDR + PPE_SERVICE_TBL_INC * sc; + + ret = regmap_bulk_write(ppe_dev->regmap, reg, + servcode_val, ARRAY_SIZE(servcode_val)); + if (ret) + return ret; + + reg = PPE_EG_SERVICE_TBL_ADDR + PPE_EG_SERVICE_TBL_INC * sc; + ret = regmap_bulk_read(ppe_dev->regmap, reg, + servcode_val, ARRAY_SIZE(servcode_val)); + if (ret) + return ret; + + PPE_EG_SERVICE_SET_NEXT_SERVCODE(servcode_val, cfg.next_service_code); + PPE_EG_SERVICE_SET_UPDATE_ACTION(servcode_val, cfg.eip_field_update_bitmap); + PPE_EG_SERVICE_SET_HW_SERVICE(servcode_val, cfg.eip_hw_service); + PPE_EG_SERVICE_SET_OFFSET_SEL(servcode_val, cfg.eip_offset_sel); + PPE_EG_SERVICE_SET_TX_CNT_EN(servcode_val, + test_bit(PPE_SC_BYPASS_COUNTER_TX_VLAN, cfg.bitmaps.counter)); + + ret = regmap_bulk_write(ppe_dev->regmap, reg, + servcode_val, ARRAY_SIZE(servcode_val)); + if (ret) + return ret; + + bitmap_value = bitmap_read(cfg.bitmaps.tunnel, 0, PPE_SC_BYPASS_TUNNEL_SIZE); + val = FIELD_PREP(PPE_TL_SERVICE_TBL_BYPASS_BITMAP, bitmap_value); + reg = PPE_TL_SERVICE_TBL_ADDR + PPE_TL_SERVICE_TBL_INC * sc; + + return regmap_write(ppe_dev->regmap, reg, val); +} + +/** + * ppe_counter_enable_set - Set PPE port counter enabled + * @ppe_dev: PPE device + * @port: PPE port ID + * + * Enable PPE counters on the given port for the unicast packet, multicast + * packet and VLAN packet received and transmitted by PPE. + * + * Return: 0 on success, negative error code on failure. + */ +int ppe_counter_enable_set(struct ppe_device *ppe_dev, int port) +{ + u32 reg, mru_mtu_val[3]; + int ret; + + reg = PPE_MRU_MTU_CTRL_TBL_ADDR + PPE_MRU_MTU_CTRL_TBL_INC * port; + ret = regmap_bulk_read(ppe_dev->regmap, reg, + mru_mtu_val, ARRAY_SIZE(mru_mtu_val)); + if (ret) + return ret; + + PPE_MRU_MTU_CTRL_SET_RX_CNT_EN(mru_mtu_val, true); + PPE_MRU_MTU_CTRL_SET_TX_CNT_EN(mru_mtu_val, true); + ret = regmap_bulk_write(ppe_dev->regmap, reg, + mru_mtu_val, ARRAY_SIZE(mru_mtu_val)); + if (ret) + return ret; + + reg = PPE_MC_MTU_CTRL_TBL_ADDR + PPE_MC_MTU_CTRL_TBL_INC * port; + ret = regmap_set_bits(ppe_dev->regmap, reg, PPE_MC_MTU_CTRL_TBL_TX_CNT_EN); + if (ret) + return ret; + + reg = PPE_PORT_EG_VLAN_TBL_ADDR + PPE_PORT_EG_VLAN_TBL_INC * port; + + return regmap_set_bits(ppe_dev->regmap, reg, PPE_PORT_EG_VLAN_TBL_TX_COUNTING_EN); +} + +static int ppe_rss_hash_ipv4_config(struct ppe_device *ppe_dev, int index, + struct ppe_rss_hash_cfg cfg) +{ + u32 reg, val; + + switch (index) { + case 0: + val = cfg.hash_sip_mix[0]; + break; + case 1: + val = cfg.hash_dip_mix[0]; + break; + case 2: + val = cfg.hash_protocol_mix; + break; + case 3: + val = cfg.hash_dport_mix; + break; + case 4: + val = cfg.hash_sport_mix; + break; + default: + return -EINVAL; + } + + reg = PPE_RSS_HASH_MIX_IPV4_ADDR + index * PPE_RSS_HASH_MIX_IPV4_INC; + + return regmap_update_bits(ppe_dev->regmap, reg, + PPE_RSS_HASH_MIX_IPV4_VAL, + FIELD_PREP(PPE_RSS_HASH_MIX_IPV4_VAL, val)); +} + +static int ppe_rss_hash_ipv6_config(struct ppe_device *ppe_dev, int index, + struct ppe_rss_hash_cfg cfg) +{ + u32 reg, val; + + switch (index) { + case 0 ... 3: + val = cfg.hash_sip_mix[index]; + break; + case 4 ... 7: + val = cfg.hash_dip_mix[index - 4]; + break; + case 8: + val = cfg.hash_protocol_mix; + break; + case 9: + val = cfg.hash_dport_mix; + break; + case 10: + val = cfg.hash_sport_mix; + break; + default: + return -EINVAL; + } + + reg = PPE_RSS_HASH_MIX_ADDR + index * PPE_RSS_HASH_MIX_INC; + + return regmap_update_bits(ppe_dev->regmap, reg, + PPE_RSS_HASH_MIX_VAL, + FIELD_PREP(PPE_RSS_HASH_MIX_VAL, val)); +} + +/** + * ppe_rss_hash_config_set - Configure the PPE hash settings for the packet received. + * @ppe_dev: PPE device. + * @mode: Configure RSS hash for the packet type IPv4 and IPv6. + * @cfg: RSS hash configuration. + * + * PPE RSS hash settings are configured for the packet type IPv4 and IPv6. + * + * Return: 0 on success, negative error code on failure. + */ +int ppe_rss_hash_config_set(struct ppe_device *ppe_dev, int mode, + struct ppe_rss_hash_cfg cfg) +{ + u32 val, reg; + int i, ret; + + if (mode & PPE_RSS_HASH_MODE_IPV4) { + val = FIELD_PREP(PPE_RSS_HASH_MASK_IPV4_HASH_MASK, cfg.hash_mask); + val |= FIELD_PREP(PPE_RSS_HASH_MASK_IPV4_FRAGMENT, cfg.hash_fragment_mode); + ret = regmap_write(ppe_dev->regmap, PPE_RSS_HASH_MASK_IPV4_ADDR, val); + if (ret) + return ret; + + val = FIELD_PREP(PPE_RSS_HASH_SEED_IPV4_VAL, cfg.hash_seed); + ret = regmap_write(ppe_dev->regmap, PPE_RSS_HASH_SEED_IPV4_ADDR, val); + if (ret) + return ret; + + for (i = 0; i < PPE_RSS_HASH_MIX_IPV4_ENTRIES; i++) { + ret = ppe_rss_hash_ipv4_config(ppe_dev, i, cfg); + if (ret) + return ret; + } + + for (i = 0; i < PPE_RSS_HASH_FIN_IPV4_ENTRIES; i++) { + val = FIELD_PREP(PPE_RSS_HASH_FIN_IPV4_INNER, cfg.hash_fin_inner[i]); + val |= FIELD_PREP(PPE_RSS_HASH_FIN_IPV4_OUTER, cfg.hash_fin_outer[i]); + reg = PPE_RSS_HASH_FIN_IPV4_ADDR + i * PPE_RSS_HASH_FIN_IPV4_INC; + + ret = regmap_write(ppe_dev->regmap, reg, val); + if (ret) + return ret; + } + } + + if (mode & PPE_RSS_HASH_MODE_IPV6) { + val = FIELD_PREP(PPE_RSS_HASH_MASK_HASH_MASK, cfg.hash_mask); + val |= FIELD_PREP(PPE_RSS_HASH_MASK_FRAGMENT, cfg.hash_fragment_mode); + ret = regmap_write(ppe_dev->regmap, PPE_RSS_HASH_MASK_ADDR, val); + if (ret) + return ret; + + val = FIELD_PREP(PPE_RSS_HASH_SEED_VAL, cfg.hash_seed); + ret = regmap_write(ppe_dev->regmap, PPE_RSS_HASH_SEED_ADDR, val); + if (ret) + return ret; + + for (i = 0; i < PPE_RSS_HASH_MIX_ENTRIES; i++) { + ret = ppe_rss_hash_ipv6_config(ppe_dev, i, cfg); + if (ret) + return ret; + } + + for (i = 0; i < PPE_RSS_HASH_FIN_ENTRIES; i++) { + val = FIELD_PREP(PPE_RSS_HASH_FIN_INNER, cfg.hash_fin_inner[i]); + val |= FIELD_PREP(PPE_RSS_HASH_FIN_OUTER, cfg.hash_fin_outer[i]); + reg = PPE_RSS_HASH_FIN_ADDR + i * PPE_RSS_HASH_FIN_INC; + + ret = regmap_write(ppe_dev->regmap, reg, val); + if (ret) + return ret; + } + } + + return 0; +} + +/** + * ppe_ring_queue_map_set - Set the PPE queue to Ethernet DMA ring mapping + * @ppe_dev: PPE device + * @ring_id: Ethernet DMA ring ID + * @queue_map: Bit map of queue IDs to given Ethernet DMA ring + * + * Configure the mapping from a set of PPE queues to a given Ethernet DMA ring. + * + * Return: 0 on success, negative error code on failure. + */ +int ppe_ring_queue_map_set(struct ppe_device *ppe_dev, int ring_id, u32 *queue_map) +{ + u32 reg, queue_bitmap_val[PPE_RING_TO_QUEUE_BITMAP_WORD_CNT]; + + memcpy(queue_bitmap_val, queue_map, sizeof(queue_bitmap_val)); + reg = PPE_RING_Q_MAP_TBL_ADDR + PPE_RING_Q_MAP_TBL_INC * ring_id; + + return regmap_bulk_write(ppe_dev->regmap, reg, + queue_bitmap_val, + ARRAY_SIZE(queue_bitmap_val)); +} + +static int ppe_config_bm_threshold(struct ppe_device *ppe_dev, int bm_port_id, + const struct ppe_bm_port_config port_cfg) +{ + u32 reg, val, bm_fc_val[2]; + int ret; + + reg = PPE_BM_PORT_FC_CFG_TBL_ADDR + PPE_BM_PORT_FC_CFG_TBL_INC * bm_port_id; + ret = regmap_bulk_read(ppe_dev->regmap, reg, + bm_fc_val, ARRAY_SIZE(bm_fc_val)); + if (ret) + return ret; + + /* Configure BM flow control related threshold. */ + PPE_BM_PORT_FC_SET_WEIGHT(bm_fc_val, port_cfg.weight); + PPE_BM_PORT_FC_SET_RESUME_OFFSET(bm_fc_val, port_cfg.resume_offset); + PPE_BM_PORT_FC_SET_RESUME_THRESHOLD(bm_fc_val, port_cfg.resume_ceil); + PPE_BM_PORT_FC_SET_DYNAMIC(bm_fc_val, port_cfg.dynamic); + PPE_BM_PORT_FC_SET_REACT_LIMIT(bm_fc_val, port_cfg.in_fly_buf); + PPE_BM_PORT_FC_SET_PRE_ALLOC(bm_fc_val, port_cfg.pre_alloc); + + /* Configure low/high bits of the ceiling for the BM port. */ + val = FIELD_GET(GENMASK(2, 0), port_cfg.ceil); + PPE_BM_PORT_FC_SET_CEILING_LOW(bm_fc_val, val); + val = FIELD_GET(GENMASK(10, 3), port_cfg.ceil); + PPE_BM_PORT_FC_SET_CEILING_HIGH(bm_fc_val, val); + + ret = regmap_bulk_write(ppe_dev->regmap, reg, + bm_fc_val, ARRAY_SIZE(bm_fc_val)); + if (ret) + return ret; + + /* Assign the default group ID 0 to the BM port. */ + val = FIELD_PREP(PPE_BM_PORT_GROUP_ID_SHARED_GROUP_ID, 0); + reg = PPE_BM_PORT_GROUP_ID_ADDR + PPE_BM_PORT_GROUP_ID_INC * bm_port_id; + ret = regmap_update_bits(ppe_dev->regmap, reg, + PPE_BM_PORT_GROUP_ID_SHARED_GROUP_ID, + val); + if (ret) + return ret; + + /* Enable BM port flow control. */ + reg = PPE_BM_PORT_FC_MODE_ADDR + PPE_BM_PORT_FC_MODE_INC * bm_port_id; + + return regmap_set_bits(ppe_dev->regmap, reg, PPE_BM_PORT_FC_MODE_EN); +} + +/* Configure the buffer threshold for the port flow control function. */ +static int ppe_config_bm(struct ppe_device *ppe_dev) +{ + const struct ppe_bm_port_config *port_cfg; + unsigned int i, bm_port_id, port_cfg_cnt; + u32 reg, val; + int ret; + + /* Configure the allocated buffer number only for group 0. + * The buffer number of group 1-3 is already cleared to 0 + * after PPE reset during the probe of PPE driver. + */ + reg = PPE_BM_SHARED_GROUP_CFG_ADDR; + val = FIELD_PREP(PPE_BM_SHARED_GROUP_CFG_SHARED_LIMIT, + ipq9574_ppe_bm_group_config); + ret = regmap_update_bits(ppe_dev->regmap, reg, + PPE_BM_SHARED_GROUP_CFG_SHARED_LIMIT, + val); + if (ret) + goto bm_config_fail; + + /* Configure buffer thresholds for the BM ports. */ + port_cfg = ipq9574_ppe_bm_port_config; + port_cfg_cnt = ARRAY_SIZE(ipq9574_ppe_bm_port_config); + for (i = 0; i < port_cfg_cnt; i++) { + for (bm_port_id = port_cfg[i].port_id_start; + bm_port_id <= port_cfg[i].port_id_end; bm_port_id++) { + ret = ppe_config_bm_threshold(ppe_dev, bm_port_id, + port_cfg[i]); + if (ret) + goto bm_config_fail; + } + } + + return 0; + +bm_config_fail: + dev_err(ppe_dev->dev, "PPE BM config error %d\n", ret); + return ret; +} + +/* Configure PPE hardware queue depth, which is decided by the threshold + * of queue. + */ +static int ppe_config_qm(struct ppe_device *ppe_dev) +{ + const struct ppe_qm_queue_config *queue_cfg; + int ret, i, queue_id, queue_cfg_count; + u32 reg, multicast_queue_cfg[5]; + u32 unicast_queue_cfg[4]; + u32 group_cfg[3]; + + /* Assign the buffer number to the group 0 by default. */ + reg = PPE_AC_GRP_CFG_TBL_ADDR; + ret = regmap_bulk_read(ppe_dev->regmap, reg, + group_cfg, ARRAY_SIZE(group_cfg)); + if (ret) + goto qm_config_fail; + + PPE_AC_GRP_SET_BUF_LIMIT(group_cfg, ipq9574_ppe_qm_group_config); + + ret = regmap_bulk_write(ppe_dev->regmap, reg, + group_cfg, ARRAY_SIZE(group_cfg)); + if (ret) + goto qm_config_fail; + + queue_cfg = ipq9574_ppe_qm_queue_config; + queue_cfg_count = ARRAY_SIZE(ipq9574_ppe_qm_queue_config); + for (i = 0; i < queue_cfg_count; i++) { + queue_id = queue_cfg[i].queue_start; + + /* Configure threshold for dropping packets separately for + * unicast and multicast PPE queues. + */ + while (queue_id <= queue_cfg[i].queue_end) { + if (queue_id < PPE_AC_UNICAST_QUEUE_CFG_TBL_ENTRIES) { + reg = PPE_AC_UNICAST_QUEUE_CFG_TBL_ADDR + + PPE_AC_UNICAST_QUEUE_CFG_TBL_INC * queue_id; + + ret = regmap_bulk_read(ppe_dev->regmap, reg, + unicast_queue_cfg, + ARRAY_SIZE(unicast_queue_cfg)); + if (ret) + goto qm_config_fail; + + PPE_AC_UNICAST_QUEUE_SET_EN(unicast_queue_cfg, true); + PPE_AC_UNICAST_QUEUE_SET_GRP_ID(unicast_queue_cfg, 0); + PPE_AC_UNICAST_QUEUE_SET_PRE_LIMIT(unicast_queue_cfg, + queue_cfg[i].prealloc_buf); + PPE_AC_UNICAST_QUEUE_SET_DYNAMIC(unicast_queue_cfg, + queue_cfg[i].dynamic); + PPE_AC_UNICAST_QUEUE_SET_WEIGHT(unicast_queue_cfg, + queue_cfg[i].weight); + PPE_AC_UNICAST_QUEUE_SET_THRESHOLD(unicast_queue_cfg, + queue_cfg[i].ceil); + PPE_AC_UNICAST_QUEUE_SET_GRN_RESUME(unicast_queue_cfg, + queue_cfg[i].resume_offset); + + ret = regmap_bulk_write(ppe_dev->regmap, reg, + unicast_queue_cfg, + ARRAY_SIZE(unicast_queue_cfg)); + if (ret) + goto qm_config_fail; + } else { + reg = PPE_AC_MULTICAST_QUEUE_CFG_TBL_ADDR + + PPE_AC_MULTICAST_QUEUE_CFG_TBL_INC * queue_id; + + ret = regmap_bulk_read(ppe_dev->regmap, reg, + multicast_queue_cfg, + ARRAY_SIZE(multicast_queue_cfg)); + if (ret) + goto qm_config_fail; + + PPE_AC_MULTICAST_QUEUE_SET_EN(multicast_queue_cfg, true); + PPE_AC_MULTICAST_QUEUE_SET_GRN_GRP_ID(multicast_queue_cfg, 0); + PPE_AC_MULTICAST_QUEUE_SET_GRN_PRE_LIMIT(multicast_queue_cfg, + queue_cfg[i].prealloc_buf); + PPE_AC_MULTICAST_QUEUE_SET_GRN_THRESHOLD(multicast_queue_cfg, + queue_cfg[i].ceil); + PPE_AC_MULTICAST_QUEUE_SET_GRN_RESUME(multicast_queue_cfg, + queue_cfg[i].resume_offset); + + ret = regmap_bulk_write(ppe_dev->regmap, reg, + multicast_queue_cfg, + ARRAY_SIZE(multicast_queue_cfg)); + if (ret) + goto qm_config_fail; + } + + /* Enable enqueue. */ + reg = PPE_ENQ_OPR_TBL_ADDR + PPE_ENQ_OPR_TBL_INC * queue_id; + ret = regmap_clear_bits(ppe_dev->regmap, reg, + PPE_ENQ_OPR_TBL_ENQ_DISABLE); + if (ret) + goto qm_config_fail; + + /* Enable dequeue. */ + reg = PPE_DEQ_OPR_TBL_ADDR + PPE_DEQ_OPR_TBL_INC * queue_id; + ret = regmap_clear_bits(ppe_dev->regmap, reg, + PPE_DEQ_OPR_TBL_DEQ_DISABLE); + if (ret) + goto qm_config_fail; + + queue_id++; + } + } + + /* Enable queue counter for all PPE hardware queues. */ + ret = regmap_set_bits(ppe_dev->regmap, PPE_EG_BRIDGE_CONFIG_ADDR, + PPE_EG_BRIDGE_CONFIG_QUEUE_CNT_EN); + if (ret) + goto qm_config_fail; + + return 0; + +qm_config_fail: + dev_err(ppe_dev->dev, "PPE QM config error %d\n", ret); + return ret; +} + +static int ppe_node_scheduler_config(struct ppe_device *ppe_dev, + const struct ppe_scheduler_port_config config) +{ + struct ppe_scheduler_cfg sch_cfg; + int ret, i; + + for (i = 0; i < config.loop_num; i++) { + if (!config.pri_max) { + /* Round robin scheduler without priority. */ + sch_cfg.flow_id = config.flow_id; + sch_cfg.pri = 0; + sch_cfg.drr_node_id = config.drr_node_id; + } else { + sch_cfg.flow_id = config.flow_id + (i / config.pri_max); + sch_cfg.pri = i % config.pri_max; + sch_cfg.drr_node_id = config.drr_node_id + i; + } + + /* Scheduler weight, must be more than 0. */ + sch_cfg.drr_node_wt = 1; + /* Byte based to be scheduled. */ + sch_cfg.unit_is_packet = false; + /* Frame + CRC calculated. */ + sch_cfg.frame_mode = PPE_SCH_WITH_FRAME_CRC; + + ret = ppe_queue_scheduler_set(ppe_dev, config.node_id + i, + config.flow_level, + config.port, + sch_cfg); + if (ret) + return ret; + } + + return 0; +} + +/* Initialize scheduler settings for PPE buffer utilization and dispatching + * packet on PPE queue. + */ +static int ppe_config_scheduler(struct ppe_device *ppe_dev) +{ + const struct ppe_scheduler_port_config *port_cfg; + const struct ppe_scheduler_qm_config *qm_cfg; + const struct ppe_scheduler_bm_config *bm_cfg; + int ret, i, count; + u32 val, reg; + + count = ARRAY_SIZE(ipq9574_ppe_sch_bm_config); + bm_cfg = ipq9574_ppe_sch_bm_config; + + /* Configure the depth of BM scheduler entries. */ + val = FIELD_PREP(PPE_BM_SCH_CTRL_SCH_DEPTH, count); + val |= FIELD_PREP(PPE_BM_SCH_CTRL_SCH_OFFSET, 0); + val |= FIELD_PREP(PPE_BM_SCH_CTRL_SCH_EN, 1); + + ret = regmap_write(ppe_dev->regmap, PPE_BM_SCH_CTRL_ADDR, val); + if (ret) + goto sch_config_fail; + + /* Configure each BM scheduler entry with the valid ingress port and + * egress port, the second port takes effect when the specified port + * is in the inactive state. + */ + for (i = 0; i < count; i++) { + val = FIELD_PREP(PPE_BM_SCH_CFG_TBL_VALID, bm_cfg[i].valid); + val |= FIELD_PREP(PPE_BM_SCH_CFG_TBL_DIR, bm_cfg[i].dir); + val |= FIELD_PREP(PPE_BM_SCH_CFG_TBL_PORT_NUM, bm_cfg[i].port); + val |= FIELD_PREP(PPE_BM_SCH_CFG_TBL_SECOND_PORT_VALID, + bm_cfg[i].backup_port_valid); + val |= FIELD_PREP(PPE_BM_SCH_CFG_TBL_SECOND_PORT, + bm_cfg[i].backup_port); + + reg = PPE_BM_SCH_CFG_TBL_ADDR + i * PPE_BM_SCH_CFG_TBL_INC; + ret = regmap_write(ppe_dev->regmap, reg, val); + if (ret) + goto sch_config_fail; + } + + count = ARRAY_SIZE(ipq9574_ppe_sch_qm_config); + qm_cfg = ipq9574_ppe_sch_qm_config; + + /* Configure the depth of QM scheduler entries. */ + val = FIELD_PREP(PPE_PSCH_SCH_DEPTH_CFG_SCH_DEPTH, count); + ret = regmap_write(ppe_dev->regmap, PPE_PSCH_SCH_DEPTH_CFG_ADDR, val); + if (ret) + goto sch_config_fail; + + /* Configure each QM scheduler entry with enqueue port and dequeue + * port, the second port takes effect when the specified dequeue + * port is in the inactive port. + */ + for (i = 0; i < count; i++) { + val = FIELD_PREP(PPE_PSCH_SCH_CFG_TBL_ENS_PORT_BITMAP, + qm_cfg[i].ensch_port_bmp); + val |= FIELD_PREP(PPE_PSCH_SCH_CFG_TBL_ENS_PORT, + qm_cfg[i].ensch_port); + val |= FIELD_PREP(PPE_PSCH_SCH_CFG_TBL_DES_PORT, + qm_cfg[i].desch_port); + val |= FIELD_PREP(PPE_PSCH_SCH_CFG_TBL_DES_SECOND_PORT_EN, + qm_cfg[i].desch_backup_port_valid); + val |= FIELD_PREP(PPE_PSCH_SCH_CFG_TBL_DES_SECOND_PORT, + qm_cfg[i].desch_backup_port); + + reg = PPE_PSCH_SCH_CFG_TBL_ADDR + i * PPE_PSCH_SCH_CFG_TBL_INC; + ret = regmap_write(ppe_dev->regmap, reg, val); + if (ret) + goto sch_config_fail; + } + + count = ARRAY_SIZE(ppe_port_sch_config); + port_cfg = ppe_port_sch_config; + + /* Configure scheduler per PPE queue or flow. */ + for (i = 0; i < count; i++) { + if (port_cfg[i].port >= ppe_dev->num_ports) + break; + + ret = ppe_node_scheduler_config(ppe_dev, port_cfg[i]); + if (ret) + goto sch_config_fail; + } + + return 0; + +sch_config_fail: + dev_err(ppe_dev->dev, "PPE scheduler arbitration config error %d\n", ret); + return ret; +}; + +/* Configure PPE queue destination of each PPE port. */ +static int ppe_queue_dest_init(struct ppe_device *ppe_dev) +{ + int ret, port_id, index, q_base, q_offset, res_start, res_end, pri_max; + struct ppe_queue_ucast_dest queue_dst; + + for (port_id = 0; port_id < ppe_dev->num_ports; port_id++) { + memset(&queue_dst, 0, sizeof(queue_dst)); + + ret = ppe_port_resource_get(ppe_dev, port_id, PPE_RES_UCAST, + &res_start, &res_end); + if (ret) + return ret; + + q_base = res_start; + queue_dst.dest_port = port_id; + + /* Configure queue base ID and profile ID that is same as + * physical port ID. + */ + ret = ppe_queue_ucast_base_set(ppe_dev, queue_dst, + q_base, port_id); + if (ret) + return ret; + + /* Queue priority range supported by each PPE port */ + ret = ppe_port_resource_get(ppe_dev, port_id, PPE_RES_L0_NODE, + &res_start, &res_end); + if (ret) + return ret; + + pri_max = res_end - res_start; + + /* Redirect ARP reply packet with the max priority on CPU port, + * which keeps the ARP reply directed to CPU (CPU code is 101) + * with highest priority queue of EDMA. + */ + if (port_id == 0) { + memset(&queue_dst, 0, sizeof(queue_dst)); + + queue_dst.cpu_code_en = true; + queue_dst.cpu_code = 101; + ret = ppe_queue_ucast_base_set(ppe_dev, queue_dst, + q_base + pri_max, + 0); + if (ret) + return ret; + } + + /* Initialize the queue offset of internal priority. */ + for (index = 0; index < PPE_QUEUE_INTER_PRI_NUM; index++) { + q_offset = index > pri_max ? pri_max : index; + + ret = ppe_queue_ucast_offset_pri_set(ppe_dev, port_id, + index, q_offset); + if (ret) + return ret; + } + + /* Initialize the queue offset of RSS hash as 0 to avoid the + * random hardware value that will lead to the unexpected + * destination queue generated. + */ + for (index = 0; index < PPE_QUEUE_HASH_NUM; index++) { + ret = ppe_queue_ucast_offset_hash_set(ppe_dev, port_id, + index, 0); + if (ret) + return ret; + } + } + + return 0; +} + +/* Initialize the service code 1 used by CPU port. */ +static int ppe_servcode_init(struct ppe_device *ppe_dev) +{ + struct ppe_sc_cfg sc_cfg = {}; + + bitmap_zero(sc_cfg.bitmaps.counter, PPE_SC_BYPASS_COUNTER_SIZE); + bitmap_zero(sc_cfg.bitmaps.tunnel, PPE_SC_BYPASS_TUNNEL_SIZE); + + bitmap_fill(sc_cfg.bitmaps.ingress, PPE_SC_BYPASS_INGRESS_SIZE); + clear_bit(PPE_SC_BYPASS_INGRESS_FAKE_MAC_HEADER, sc_cfg.bitmaps.ingress); + clear_bit(PPE_SC_BYPASS_INGRESS_SERVICE_CODE, sc_cfg.bitmaps.ingress); + clear_bit(PPE_SC_BYPASS_INGRESS_FAKE_L2_PROTO, sc_cfg.bitmaps.ingress); + + bitmap_fill(sc_cfg.bitmaps.egress, PPE_SC_BYPASS_EGRESS_SIZE); + clear_bit(PPE_SC_BYPASS_EGRESS_ACL_POST_ROUTING_CHECK, sc_cfg.bitmaps.egress); + + return ppe_sc_config_set(ppe_dev, PPE_EDMA_SC_BYPASS_ID, sc_cfg); +} + +/* Initialize PPE port configurations. */ +static int ppe_port_config_init(struct ppe_device *ppe_dev) +{ + u32 reg, val, mru_mtu_val[3]; + int i, ret; + + /* MTU and MRU settings are not required for CPU port 0. */ + for (i = 1; i < ppe_dev->num_ports; i++) { + /* Enable Ethernet port counter */ + ret = ppe_counter_enable_set(ppe_dev, i); + if (ret) + return ret; + + reg = PPE_MRU_MTU_CTRL_TBL_ADDR + PPE_MRU_MTU_CTRL_TBL_INC * i; + ret = regmap_bulk_read(ppe_dev->regmap, reg, + mru_mtu_val, ARRAY_SIZE(mru_mtu_val)); + if (ret) + return ret; + + /* Drop the packet when the packet size is more than the MTU + * and redirect the packet to the CPU port when the received + * packet size is more than the MRU of the physical interface. + */ + PPE_MRU_MTU_CTRL_SET_MRU_CMD(mru_mtu_val, PPE_ACTION_REDIRECT_TO_CPU); + PPE_MRU_MTU_CTRL_SET_MTU_CMD(mru_mtu_val, PPE_ACTION_DROP); + ret = regmap_bulk_write(ppe_dev->regmap, reg, + mru_mtu_val, ARRAY_SIZE(mru_mtu_val)); + if (ret) + return ret; + + reg = PPE_MC_MTU_CTRL_TBL_ADDR + PPE_MC_MTU_CTRL_TBL_INC * i; + val = FIELD_PREP(PPE_MC_MTU_CTRL_TBL_MTU_CMD, PPE_ACTION_DROP); + ret = regmap_update_bits(ppe_dev->regmap, reg, + PPE_MC_MTU_CTRL_TBL_MTU_CMD, + val); + if (ret) + return ret; + } + + /* Enable CPU port counters. */ + return ppe_counter_enable_set(ppe_dev, 0); +} + +/* Initialize the PPE RSS configuration for IPv4 and IPv6 packet receive. + * RSS settings are to calculate the random RSS hash value generated during + * packet receive. This hash is then used to generate the queue offset used + * to determine the queue used to transmit the packet. + */ +static int ppe_rss_hash_init(struct ppe_device *ppe_dev) +{ + u16 fins[PPE_RSS_HASH_TUPLES] = { 0x205, 0x264, 0x227, 0x245, 0x201 }; + u8 ips[PPE_RSS_HASH_IP_LENGTH] = { 0x13, 0xb, 0x13, 0xb }; + struct ppe_rss_hash_cfg hash_cfg; + int i, ret; + + hash_cfg.hash_seed = get_random_u32(); + hash_cfg.hash_mask = 0xfff; + + /* Use 5 tuple as RSS hash key for the first fragment of TCP, UDP + * and UDP-Lite packets. + */ + hash_cfg.hash_fragment_mode = false; + + /* The final common seed configs used to calculate the RSS has value, + * which is available for both IPv4 and IPv6 packet. + */ + for (i = 0; i < ARRAY_SIZE(fins); i++) { + hash_cfg.hash_fin_inner[i] = fins[i] & 0x1f; + hash_cfg.hash_fin_outer[i] = fins[i] >> 5; + } + + /* RSS seeds for IP protocol, L4 destination & source port and + * destination & source IP used to calculate the RSS hash value. + */ + hash_cfg.hash_protocol_mix = 0x13; + hash_cfg.hash_dport_mix = 0xb; + hash_cfg.hash_sport_mix = 0x13; + hash_cfg.hash_dip_mix[0] = 0xb; + hash_cfg.hash_sip_mix[0] = 0x13; + + /* Configure RSS seed configs for IPv4 packet. */ + ret = ppe_rss_hash_config_set(ppe_dev, PPE_RSS_HASH_MODE_IPV4, hash_cfg); + if (ret) + return ret; + + for (i = 0; i < ARRAY_SIZE(ips); i++) { + hash_cfg.hash_sip_mix[i] = ips[i]; + hash_cfg.hash_dip_mix[i] = ips[i]; + } + + /* Configure RSS seed configs for IPv6 packet. */ + return ppe_rss_hash_config_set(ppe_dev, PPE_RSS_HASH_MODE_IPV6, hash_cfg); +} + +/* Initialize mapping between PPE queues assigned to CPU port 0 + * to Ethernet DMA ring 0. + */ +static int ppe_queues_to_ring_init(struct ppe_device *ppe_dev) +{ + u32 queue_bmap[PPE_RING_TO_QUEUE_BITMAP_WORD_CNT] = {}; + int ret, queue_id, queue_max; + + ret = ppe_port_resource_get(ppe_dev, 0, PPE_RES_UCAST, + &queue_id, &queue_max); + if (ret) + return ret; + + for (; queue_id <= queue_max; queue_id++) + queue_bmap[queue_id / 32] |= BIT_MASK(queue_id % 32); + + return ppe_ring_queue_map_set(ppe_dev, 0, queue_bmap); +} + +/* Initialize PPE bridge settings to only enable L2 frame receive and + * transmit between CPU port and PPE Ethernet ports. + */ +static int ppe_bridge_init(struct ppe_device *ppe_dev) +{ + u32 reg, mask, port_cfg[4], vsi_cfg[2]; + int ret, i; + + /* Configure the following settings for CPU port0: + * a.) Enable Bridge TX + * b.) Disable FDB new address learning + * c.) Disable station move address learning + */ + mask = PPE_PORT_BRIDGE_TXMAC_EN; + mask |= PPE_PORT_BRIDGE_NEW_LRN_EN; + mask |= PPE_PORT_BRIDGE_STA_MOVE_LRN_EN; + ret = regmap_update_bits(ppe_dev->regmap, + PPE_PORT_BRIDGE_CTRL_ADDR, + mask, + PPE_PORT_BRIDGE_TXMAC_EN); + if (ret) + return ret; + + for (i = 1; i < ppe_dev->num_ports; i++) { + /* Enable invalid VSI forwarding for all the physical ports + * to CPU port0, in case no VSI is assigned to the physical + * port. + */ + reg = PPE_L2_VP_PORT_TBL_ADDR + PPE_L2_VP_PORT_TBL_INC * i; + ret = regmap_bulk_read(ppe_dev->regmap, reg, + port_cfg, ARRAY_SIZE(port_cfg)); + + if (ret) + return ret; + + PPE_L2_PORT_SET_INVALID_VSI_FWD_EN(port_cfg, true); + PPE_L2_PORT_SET_DST_INFO(port_cfg, 0); + + ret = regmap_bulk_write(ppe_dev->regmap, reg, + port_cfg, ARRAY_SIZE(port_cfg)); + if (ret) + return ret; + } + + for (i = 0; i < PPE_VSI_TBL_ENTRIES; i++) { + /* Set the VSI forward membership to include only CPU port0. + * FDB learning and forwarding take place only after switchdev + * is supported later to create the VSI and join the physical + * ports to the VSI port member. + */ + reg = PPE_VSI_TBL_ADDR + PPE_VSI_TBL_INC * i; + ret = regmap_bulk_read(ppe_dev->regmap, reg, + vsi_cfg, ARRAY_SIZE(vsi_cfg)); + if (ret) + return ret; + + PPE_VSI_SET_MEMBER_PORT_BITMAP(vsi_cfg, BIT(0)); + PPE_VSI_SET_UUC_BITMAP(vsi_cfg, BIT(0)); + PPE_VSI_SET_UMC_BITMAP(vsi_cfg, BIT(0)); + PPE_VSI_SET_BC_BITMAP(vsi_cfg, BIT(0)); + PPE_VSI_SET_NEW_ADDR_LRN_EN(vsi_cfg, true); + PPE_VSI_SET_NEW_ADDR_FWD_CMD(vsi_cfg, PPE_ACTION_FORWARD); + PPE_VSI_SET_STATION_MOVE_LRN_EN(vsi_cfg, true); + PPE_VSI_SET_STATION_MOVE_FWD_CMD(vsi_cfg, PPE_ACTION_FORWARD); + + ret = regmap_bulk_write(ppe_dev->regmap, reg, + vsi_cfg, ARRAY_SIZE(vsi_cfg)); + if (ret) + return ret; + } + + return 0; +} + +int ppe_hw_config(struct ppe_device *ppe_dev) +{ + int ret; + + ret = ppe_config_bm(ppe_dev); + if (ret) + return ret; + + ret = ppe_config_qm(ppe_dev); + if (ret) + return ret; + + ret = ppe_config_scheduler(ppe_dev); + if (ret) + return ret; + + ret = ppe_queue_dest_init(ppe_dev); + if (ret) + return ret; + + ret = ppe_servcode_init(ppe_dev); + if (ret) + return ret; + + ret = ppe_port_config_init(ppe_dev); + if (ret) + return ret; + + ret = ppe_rss_hash_init(ppe_dev); + if (ret) + return ret; + + ret = ppe_queues_to_ring_init(ppe_dev); + if (ret) + return ret; + + return ppe_bridge_init(ppe_dev); +} diff --git a/drivers/net/ethernet/qualcomm/ppe/ppe_config.h b/drivers/net/ethernet/qualcomm/ppe/ppe_config.h new file mode 100644 index 000000000000..4bb45ca40144 --- /dev/null +++ b/drivers/net/ethernet/qualcomm/ppe/ppe_config.h @@ -0,0 +1,317 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. + */ + +#ifndef __PPE_CONFIG_H__ +#define __PPE_CONFIG_H__ + +#include <linux/types.h> + +#include "ppe.h" + +/* There are different table index ranges for configuring queue base ID of + * the destination port, CPU code and service code. + */ +#define PPE_QUEUE_BASE_DEST_PORT 0 +#define PPE_QUEUE_BASE_CPU_CODE 1024 +#define PPE_QUEUE_BASE_SERVICE_CODE 2048 + +#define PPE_QUEUE_INTER_PRI_NUM 16 +#define PPE_QUEUE_HASH_NUM 256 + +/* The service code is used by EDMA port to transmit packet to PPE. */ +#define PPE_EDMA_SC_BYPASS_ID 1 + +/* The PPE RSS hash configured for IPv4 and IPv6 packet separately. */ +#define PPE_RSS_HASH_MODE_IPV4 BIT(0) +#define PPE_RSS_HASH_MODE_IPV6 BIT(1) +#define PPE_RSS_HASH_IP_LENGTH 4 +#define PPE_RSS_HASH_TUPLES 5 + +/* PPE supports 300 queues, each bit presents as one queue. */ +#define PPE_RING_TO_QUEUE_BITMAP_WORD_CNT 10 + +/** + * enum ppe_scheduler_frame_mode - PPE scheduler frame mode. + * @PPE_SCH_WITH_IPG_PREAMBLE_FRAME_CRC: The scheduled frame includes IPG, + * preamble, Ethernet packet and CRC. + * @PPE_SCH_WITH_FRAME_CRC: The scheduled frame includes Ethernet frame and CRC + * excluding IPG and preamble. + * @PPE_SCH_WITH_L3_PAYLOAD: The scheduled frame includes layer 3 packet data. + */ +enum ppe_scheduler_frame_mode { + PPE_SCH_WITH_IPG_PREAMBLE_FRAME_CRC = 0, + PPE_SCH_WITH_FRAME_CRC = 1, + PPE_SCH_WITH_L3_PAYLOAD = 2, +}; + +/** + * struct ppe_scheduler_cfg - PPE scheduler configuration. + * @flow_id: PPE flow ID. + * @pri: Scheduler priority. + * @drr_node_id: Node ID for scheduled traffic. + * @drr_node_wt: Weight for scheduled traffic. + * @unit_is_packet: Packet based or byte based unit for scheduled traffic. + * @frame_mode: Packet mode to be scheduled. + * + * PPE scheduler supports commit rate and exceed rate configurations. + */ +struct ppe_scheduler_cfg { + int flow_id; + int pri; + int drr_node_id; + int drr_node_wt; + bool unit_is_packet; + enum ppe_scheduler_frame_mode frame_mode; +}; + +/** + * enum ppe_resource_type - PPE resource type. + * @PPE_RES_UCAST: Unicast queue resource. + * @PPE_RES_MCAST: Multicast queue resource. + * @PPE_RES_L0_NODE: Level 0 for queue based node resource. + * @PPE_RES_L1_NODE: Level 1 for flow based node resource. + * @PPE_RES_FLOW_ID: Flow based node resource. + */ +enum ppe_resource_type { + PPE_RES_UCAST, + PPE_RES_MCAST, + PPE_RES_L0_NODE, + PPE_RES_L1_NODE, + PPE_RES_FLOW_ID, +}; + +/** + * struct ppe_queue_ucast_dest - PPE unicast queue destination. + * @src_profile: Source profile. + * @service_code_en: Enable service code to map the queue base ID. + * @service_code: Service code. + * @cpu_code_en: Enable CPU code to map the queue base ID. + * @cpu_code: CPU code. + * @dest_port: destination port. + * + * PPE egress queue ID is decided by the service code if enabled, otherwise + * by the CPU code if enabled, or by destination port if both service code + * and CPU code are disabled. + */ +struct ppe_queue_ucast_dest { + int src_profile; + bool service_code_en; + int service_code; + bool cpu_code_en; + int cpu_code; + int dest_port; +}; + +/* Hardware bitmaps for bypassing features of the ingress packet. */ +enum ppe_sc_ingress_type { + PPE_SC_BYPASS_INGRESS_VLAN_TAG_FMT_CHECK = 0, + PPE_SC_BYPASS_INGRESS_VLAN_MEMBER_CHECK = 1, + PPE_SC_BYPASS_INGRESS_VLAN_TRANSLATE = 2, + PPE_SC_BYPASS_INGRESS_MY_MAC_CHECK = 3, + PPE_SC_BYPASS_INGRESS_DIP_LOOKUP = 4, + PPE_SC_BYPASS_INGRESS_FLOW_LOOKUP = 5, + PPE_SC_BYPASS_INGRESS_FLOW_ACTION = 6, + PPE_SC_BYPASS_INGRESS_ACL = 7, + PPE_SC_BYPASS_INGRESS_FAKE_MAC_HEADER = 8, + PPE_SC_BYPASS_INGRESS_SERVICE_CODE = 9, + PPE_SC_BYPASS_INGRESS_WRONG_PKT_FMT_L2 = 10, + PPE_SC_BYPASS_INGRESS_WRONG_PKT_FMT_L3_IPV4 = 11, + PPE_SC_BYPASS_INGRESS_WRONG_PKT_FMT_L3_IPV6 = 12, + PPE_SC_BYPASS_INGRESS_WRONG_PKT_FMT_L4 = 13, + PPE_SC_BYPASS_INGRESS_FLOW_SERVICE_CODE = 14, + PPE_SC_BYPASS_INGRESS_ACL_SERVICE_CODE = 15, + PPE_SC_BYPASS_INGRESS_FAKE_L2_PROTO = 16, + PPE_SC_BYPASS_INGRESS_PPPOE_TERMINATION = 17, + PPE_SC_BYPASS_INGRESS_DEFAULT_VLAN = 18, + PPE_SC_BYPASS_INGRESS_DEFAULT_PCP = 19, + PPE_SC_BYPASS_INGRESS_VSI_ASSIGN = 20, + /* Values 21-23 are not specified by hardware. */ + PPE_SC_BYPASS_INGRESS_VLAN_ASSIGN_FAIL = 24, + PPE_SC_BYPASS_INGRESS_SOURCE_GUARD = 25, + PPE_SC_BYPASS_INGRESS_MRU_MTU_CHECK = 26, + PPE_SC_BYPASS_INGRESS_FLOW_SRC_CHECK = 27, + PPE_SC_BYPASS_INGRESS_FLOW_QOS = 28, + /* This must be last as it determines the size of the BITMAP. */ + PPE_SC_BYPASS_INGRESS_SIZE, +}; + +/* Hardware bitmaps for bypassing features of the egress packet. */ +enum ppe_sc_egress_type { + PPE_SC_BYPASS_EGRESS_VLAN_MEMBER_CHECK = 0, + PPE_SC_BYPASS_EGRESS_VLAN_TRANSLATE = 1, + PPE_SC_BYPASS_EGRESS_VLAN_TAG_FMT_CTRL = 2, + PPE_SC_BYPASS_EGRESS_FDB_LEARN = 3, + PPE_SC_BYPASS_EGRESS_FDB_REFRESH = 4, + PPE_SC_BYPASS_EGRESS_L2_SOURCE_SECURITY = 5, + PPE_SC_BYPASS_EGRESS_MANAGEMENT_FWD = 6, + PPE_SC_BYPASS_EGRESS_BRIDGING_FWD = 7, + PPE_SC_BYPASS_EGRESS_IN_STP_FLTR = 8, + PPE_SC_BYPASS_EGRESS_EG_STP_FLTR = 9, + PPE_SC_BYPASS_EGRESS_SOURCE_FLTR = 10, + PPE_SC_BYPASS_EGRESS_POLICER = 11, + PPE_SC_BYPASS_EGRESS_L2_PKT_EDIT = 12, + PPE_SC_BYPASS_EGRESS_L3_PKT_EDIT = 13, + PPE_SC_BYPASS_EGRESS_ACL_POST_ROUTING_CHECK = 14, + PPE_SC_BYPASS_EGRESS_PORT_ISOLATION = 15, + PPE_SC_BYPASS_EGRESS_PRE_ACL_QOS = 16, + PPE_SC_BYPASS_EGRESS_POST_ACL_QOS = 17, + PPE_SC_BYPASS_EGRESS_DSCP_QOS = 18, + PPE_SC_BYPASS_EGRESS_PCP_QOS = 19, + PPE_SC_BYPASS_EGRESS_PREHEADER_QOS = 20, + PPE_SC_BYPASS_EGRESS_FAKE_MAC_DROP = 21, + PPE_SC_BYPASS_EGRESS_TUNL_CONTEXT = 22, + PPE_SC_BYPASS_EGRESS_FLOW_POLICER = 23, + /* This must be last as it determines the size of the BITMAP. */ + PPE_SC_BYPASS_EGRESS_SIZE, +}; + +/* Hardware bitmaps for bypassing counter of packet. */ +enum ppe_sc_counter_type { + PPE_SC_BYPASS_COUNTER_RX_VLAN = 0, + PPE_SC_BYPASS_COUNTER_RX = 1, + PPE_SC_BYPASS_COUNTER_TX_VLAN = 2, + PPE_SC_BYPASS_COUNTER_TX = 3, + /* This must be last as it determines the size of the BITMAP. */ + PPE_SC_BYPASS_COUNTER_SIZE, +}; + +/* Hardware bitmaps for bypassing features of tunnel packet. */ +enum ppe_sc_tunnel_type { + PPE_SC_BYPASS_TUNNEL_SERVICE_CODE = 0, + PPE_SC_BYPASS_TUNNEL_TUNNEL_HANDLE = 1, + PPE_SC_BYPASS_TUNNEL_L3_IF_CHECK = 2, + PPE_SC_BYPASS_TUNNEL_VLAN_CHECK = 3, + PPE_SC_BYPASS_TUNNEL_DMAC_CHECK = 4, + PPE_SC_BYPASS_TUNNEL_UDP_CSUM_0_CHECK = 5, + PPE_SC_BYPASS_TUNNEL_TBL_DE_ACCE_CHECK = 6, + PPE_SC_BYPASS_TUNNEL_PPPOE_MC_TERM_CHECK = 7, + PPE_SC_BYPASS_TUNNEL_TTL_EXCEED_CHECK = 8, + PPE_SC_BYPASS_TUNNEL_MAP_SRC_CHECK = 9, + PPE_SC_BYPASS_TUNNEL_MAP_DST_CHECK = 10, + PPE_SC_BYPASS_TUNNEL_LPM_DST_LOOKUP = 11, + PPE_SC_BYPASS_TUNNEL_LPM_LOOKUP = 12, + PPE_SC_BYPASS_TUNNEL_WRONG_PKT_FMT_L2 = 13, + PPE_SC_BYPASS_TUNNEL_WRONG_PKT_FMT_L3_IPV4 = 14, + PPE_SC_BYPASS_TUNNEL_WRONG_PKT_FMT_L3_IPV6 = 15, + PPE_SC_BYPASS_TUNNEL_WRONG_PKT_FMT_L4 = 16, + PPE_SC_BYPASS_TUNNEL_WRONG_PKT_FMT_TUNNEL = 17, + /* Values 18-19 are not specified by hardware. */ + PPE_SC_BYPASS_TUNNEL_PRE_IPO = 20, + /* This must be last as it determines the size of the BITMAP. */ + PPE_SC_BYPASS_TUNNEL_SIZE, +}; + +/** + * struct ppe_sc_bypass - PPE service bypass bitmaps + * @ingress: Bitmap of features that can be bypassed on the ingress packet. + * @egress: Bitmap of features that can be bypassed on the egress packet. + * @counter: Bitmap of features that can be bypassed on the counter type. + * @tunnel: Bitmap of features that can be bypassed on the tunnel packet. + */ +struct ppe_sc_bypass { + DECLARE_BITMAP(ingress, PPE_SC_BYPASS_INGRESS_SIZE); + DECLARE_BITMAP(egress, PPE_SC_BYPASS_EGRESS_SIZE); + DECLARE_BITMAP(counter, PPE_SC_BYPASS_COUNTER_SIZE); + DECLARE_BITMAP(tunnel, PPE_SC_BYPASS_TUNNEL_SIZE); +}; + +/** + * struct ppe_sc_cfg - PPE service code configuration. + * @dest_port_valid: Generate destination port or not. + * @dest_port: Destination port ID. + * @bitmaps: Bitmap of bypass features. + * @is_src: Destination port acts as source port, packet sent to CPU. + * @next_service_code: New service code generated. + * @eip_field_update_bitmap: Fields updated as actions taken for EIP. + * @eip_hw_service: Selected hardware functions for EIP. + * @eip_offset_sel: Packet offset selection, using packet's layer 4 offset + * or using packet's layer 3 offset for EIP. + * + * Service code is generated during the packet passing through PPE. + */ +struct ppe_sc_cfg { + bool dest_port_valid; + int dest_port; + struct ppe_sc_bypass bitmaps; + bool is_src; + int next_service_code; + int eip_field_update_bitmap; + int eip_hw_service; + int eip_offset_sel; +}; + +/** + * enum ppe_action_type - PPE action of the received packet. + * @PPE_ACTION_FORWARD: Packet forwarded per L2/L3 process. + * @PPE_ACTION_DROP: Packet dropped by PPE. + * @PPE_ACTION_COPY_TO_CPU: Packet copied to CPU port per multicast queue. + * @PPE_ACTION_REDIRECT_TO_CPU: Packet redirected to CPU port per unicast queue. + */ +enum ppe_action_type { + PPE_ACTION_FORWARD = 0, + PPE_ACTION_DROP = 1, + PPE_ACTION_COPY_TO_CPU = 2, + PPE_ACTION_REDIRECT_TO_CPU = 3, +}; + +/** + * struct ppe_rss_hash_cfg - PPE RSS hash configuration. + * @hash_mask: Mask of the generated hash value. + * @hash_fragment_mode: Hash generation mode for the first fragment of TCP, + * UDP and UDP-Lite packets, to use either 3 tuple or 5 tuple for RSS hash + * key computation. + * @hash_seed: Seed to generate RSS hash. + * @hash_sip_mix: Source IP selection. + * @hash_dip_mix: Destination IP selection. + * @hash_protocol_mix: Protocol selection. + * @hash_sport_mix: Source L4 port selection. + * @hash_dport_mix: Destination L4 port selection. + * @hash_fin_inner: RSS hash value first selection. + * @hash_fin_outer: RSS hash value second selection. + * + * PPE RSS hash value is generated for the packet based on the RSS hash + * configured. + */ +struct ppe_rss_hash_cfg { + u32 hash_mask; + bool hash_fragment_mode; + u32 hash_seed; + u8 hash_sip_mix[PPE_RSS_HASH_IP_LENGTH]; + u8 hash_dip_mix[PPE_RSS_HASH_IP_LENGTH]; + u8 hash_protocol_mix; + u8 hash_sport_mix; + u8 hash_dport_mix; + u8 hash_fin_inner[PPE_RSS_HASH_TUPLES]; + u8 hash_fin_outer[PPE_RSS_HASH_TUPLES]; +}; + +int ppe_hw_config(struct ppe_device *ppe_dev); +int ppe_queue_scheduler_set(struct ppe_device *ppe_dev, + int node_id, bool flow_level, int port, + struct ppe_scheduler_cfg scheduler_cfg); +int ppe_queue_ucast_base_set(struct ppe_device *ppe_dev, + struct ppe_queue_ucast_dest queue_dst, + int queue_base, + int profile_id); +int ppe_queue_ucast_offset_pri_set(struct ppe_device *ppe_dev, + int profile_id, + int priority, + int queue_offset); +int ppe_queue_ucast_offset_hash_set(struct ppe_device *ppe_dev, + int profile_id, + int rss_hash, + int queue_offset); +int ppe_port_resource_get(struct ppe_device *ppe_dev, int port, + enum ppe_resource_type type, + int *res_start, int *res_end); +int ppe_sc_config_set(struct ppe_device *ppe_dev, int sc, + struct ppe_sc_cfg cfg); +int ppe_counter_enable_set(struct ppe_device *ppe_dev, int port); +int ppe_rss_hash_config_set(struct ppe_device *ppe_dev, int mode, + struct ppe_rss_hash_cfg hash_cfg); +int ppe_ring_queue_map_set(struct ppe_device *ppe_dev, + int ring_id, + u32 *queue_map); +#endif diff --git a/drivers/net/ethernet/qualcomm/ppe/ppe_debugfs.c b/drivers/net/ethernet/qualcomm/ppe/ppe_debugfs.c new file mode 100644 index 000000000000..fd959a76ff43 --- /dev/null +++ b/drivers/net/ethernet/qualcomm/ppe/ppe_debugfs.c @@ -0,0 +1,847 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. + */ + +/* PPE debugfs routines for display of PPE counters useful for debug. */ + +#include <linux/bitfield.h> +#include <linux/debugfs.h> +#include <linux/dev_printk.h> +#include <linux/device.h> +#include <linux/regmap.h> +#include <linux/seq_file.h> + +#include "ppe.h" +#include "ppe_config.h" +#include "ppe_debugfs.h" +#include "ppe_regs.h" + +#define PPE_PKT_CNT_TBL_SIZE 3 +#define PPE_DROP_PKT_CNT_TBL_SIZE 5 + +#define PPE_W0_PKT_CNT GENMASK(31, 0) +#define PPE_W2_DROP_PKT_CNT_LOW GENMASK(31, 8) +#define PPE_W3_DROP_PKT_CNT_HIGH GENMASK(7, 0) + +#define PPE_GET_PKT_CNT(tbl_cnt) \ + FIELD_GET(PPE_W0_PKT_CNT, *(tbl_cnt)) +#define PPE_GET_DROP_PKT_CNT_LOW(tbl_cnt) \ + FIELD_GET(PPE_W2_DROP_PKT_CNT_LOW, *((tbl_cnt) + 0x2)) +#define PPE_GET_DROP_PKT_CNT_HIGH(tbl_cnt) \ + FIELD_GET(PPE_W3_DROP_PKT_CNT_HIGH, *((tbl_cnt) + 0x3)) + +/** + * enum ppe_cnt_size_type - PPE counter size type + * @PPE_PKT_CNT_SIZE_1WORD: Counter size with single register + * @PPE_PKT_CNT_SIZE_3WORD: Counter size with table of 3 words + * @PPE_PKT_CNT_SIZE_5WORD: Counter size with table of 5 words + * + * PPE takes the different register size to record the packet counters. + * It uses single register, or register table with 3 words or 5 words. + * The counter with table size 5 words also records the drop counter. + * There are also some other counter types occupying sizes less than 32 + * bits, which is not covered by this enumeration type. + */ +enum ppe_cnt_size_type { + PPE_PKT_CNT_SIZE_1WORD, + PPE_PKT_CNT_SIZE_3WORD, + PPE_PKT_CNT_SIZE_5WORD, +}; + +/** + * enum ppe_cnt_type - PPE counter type. + * @PPE_CNT_BM: Packet counter processed by BM. + * @PPE_CNT_PARSE: Packet counter parsed on ingress. + * @PPE_CNT_PORT_RX: Packet counter on the ingress port. + * @PPE_CNT_VLAN_RX: VLAN packet counter received. + * @PPE_CNT_L2_FWD: Packet counter processed by L2 forwarding. + * @PPE_CNT_CPU_CODE: Packet counter marked with various CPU codes. + * @PPE_CNT_VLAN_TX: VLAN packet counter transmitted. + * @PPE_CNT_PORT_TX: Packet counter on the egress port. + * @PPE_CNT_QM: Packet counter processed by QM. + */ +enum ppe_cnt_type { + PPE_CNT_BM, + PPE_CNT_PARSE, + PPE_CNT_PORT_RX, + PPE_CNT_VLAN_RX, + PPE_CNT_L2_FWD, + PPE_CNT_CPU_CODE, + PPE_CNT_VLAN_TX, + PPE_CNT_PORT_TX, + PPE_CNT_QM, +}; + +/** + * struct ppe_debugfs_entry - PPE debugfs entry. + * @name: Debugfs file name. + * @counter_type: PPE packet counter type. + * @ppe: PPE device. + * + * The PPE debugfs entry is used to create the debugfs file and passed + * to debugfs_create_file() as private data. + */ +struct ppe_debugfs_entry { + const char *name; + enum ppe_cnt_type counter_type; + struct ppe_device *ppe; +}; + +static const struct ppe_debugfs_entry debugfs_files[] = { + { + .name = "bm", + .counter_type = PPE_CNT_BM, + }, + { + .name = "parse", + .counter_type = PPE_CNT_PARSE, + }, + { + .name = "port_rx", + .counter_type = PPE_CNT_PORT_RX, + }, + { + .name = "vlan_rx", + .counter_type = PPE_CNT_VLAN_RX, + }, + { + .name = "l2_forward", + .counter_type = PPE_CNT_L2_FWD, + }, + { + .name = "cpu_code", + .counter_type = PPE_CNT_CPU_CODE, + }, + { + .name = "vlan_tx", + .counter_type = PPE_CNT_VLAN_TX, + }, + { + .name = "port_tx", + .counter_type = PPE_CNT_PORT_TX, + }, + { + .name = "qm", + .counter_type = PPE_CNT_QM, + }, +}; + +static int ppe_pkt_cnt_get(struct ppe_device *ppe_dev, u32 reg, + enum ppe_cnt_size_type cnt_type, + u32 *cnt, u32 *drop_cnt) +{ + u32 drop_pkt_cnt[PPE_DROP_PKT_CNT_TBL_SIZE]; + u32 pkt_cnt[PPE_PKT_CNT_TBL_SIZE]; + u32 value; + int ret; + + switch (cnt_type) { + case PPE_PKT_CNT_SIZE_1WORD: + ret = regmap_read(ppe_dev->regmap, reg, &value); + if (ret) + return ret; + + *cnt = value; + break; + case PPE_PKT_CNT_SIZE_3WORD: + ret = regmap_bulk_read(ppe_dev->regmap, reg, + pkt_cnt, ARRAY_SIZE(pkt_cnt)); + if (ret) + return ret; + + *cnt = PPE_GET_PKT_CNT(pkt_cnt); + break; + case PPE_PKT_CNT_SIZE_5WORD: + ret = regmap_bulk_read(ppe_dev->regmap, reg, + drop_pkt_cnt, ARRAY_SIZE(drop_pkt_cnt)); + if (ret) + return ret; + + *cnt = PPE_GET_PKT_CNT(drop_pkt_cnt); + + /* Drop counter with low 24 bits. */ + value = PPE_GET_DROP_PKT_CNT_LOW(drop_pkt_cnt); + *drop_cnt = FIELD_PREP(GENMASK(23, 0), value); + + /* Drop counter with high 8 bits. */ + value = PPE_GET_DROP_PKT_CNT_HIGH(drop_pkt_cnt); + *drop_cnt |= FIELD_PREP(GENMASK(31, 24), value); + break; + } + + return 0; +} + +static void ppe_tbl_pkt_cnt_clear(struct ppe_device *ppe_dev, u32 reg, + enum ppe_cnt_size_type cnt_type) +{ + u32 drop_pkt_cnt[PPE_DROP_PKT_CNT_TBL_SIZE] = {}; + u32 pkt_cnt[PPE_PKT_CNT_TBL_SIZE] = {}; + + switch (cnt_type) { + case PPE_PKT_CNT_SIZE_1WORD: + regmap_write(ppe_dev->regmap, reg, 0); + break; + case PPE_PKT_CNT_SIZE_3WORD: + regmap_bulk_write(ppe_dev->regmap, reg, + pkt_cnt, ARRAY_SIZE(pkt_cnt)); + break; + case PPE_PKT_CNT_SIZE_5WORD: + regmap_bulk_write(ppe_dev->regmap, reg, + drop_pkt_cnt, ARRAY_SIZE(drop_pkt_cnt)); + break; + } +} + +static int ppe_bm_counter_get(struct ppe_device *ppe_dev, struct seq_file *seq) +{ + u32 reg, val, pkt_cnt, pkt_cnt1; + int ret, i, tag; + + seq_printf(seq, "%-24s", "BM SILENT_DROP:"); + tag = 0; + for (i = 0; i < PPE_DROP_CNT_TBL_ENTRIES; i++) { + reg = PPE_DROP_CNT_TBL_ADDR + i * PPE_DROP_CNT_TBL_INC; + ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_1WORD, + &pkt_cnt, NULL); + if (ret) { + dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret); + return ret; + } + + if (pkt_cnt > 0) { + if (!((++tag) % 4)) + seq_printf(seq, "\n%-24s", ""); + + seq_printf(seq, "%10u(%s=%04d)", pkt_cnt, "port", i); + } + } + + seq_putc(seq, '\n'); + + /* The number of packets dropped because hardware buffers were + * available only partially for the packet. + */ + seq_printf(seq, "%-24s", "BM OVERFLOW_DROP:"); + tag = 0; + for (i = 0; i < PPE_DROP_STAT_TBL_ENTRIES; i++) { + reg = PPE_DROP_STAT_TBL_ADDR + PPE_DROP_STAT_TBL_INC * i; + + ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD, + &pkt_cnt, NULL); + if (ret) { + dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret); + return ret; + } + + if (pkt_cnt > 0) { + if (!((++tag) % 4)) + seq_printf(seq, "\n%-24s", ""); + + seq_printf(seq, "%10u(%s=%04d)", pkt_cnt, "port", i); + } + } + + seq_putc(seq, '\n'); + + /* The number of currently occupied buffers, that can't be flushed. */ + seq_printf(seq, "%-24s", "BM USED/REACT:"); + tag = 0; + for (i = 0; i < PPE_BM_USED_CNT_TBL_ENTRIES; i++) { + reg = PPE_BM_USED_CNT_TBL_ADDR + i * PPE_BM_USED_CNT_TBL_INC; + ret = regmap_read(ppe_dev->regmap, reg, &val); + if (ret) { + dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret); + return ret; + } + + /* The number of PPE buffers used for caching the received + * packets before the pause frame sent. + */ + pkt_cnt = FIELD_GET(PPE_BM_USED_CNT_VAL, val); + + reg = PPE_BM_REACT_CNT_TBL_ADDR + i * PPE_BM_REACT_CNT_TBL_INC; + ret = regmap_read(ppe_dev->regmap, reg, &val); + if (ret) { + dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret); + return ret; + } + + /* The number of PPE buffers used for caching the received + * packets after pause frame sent out. + */ + pkt_cnt1 = FIELD_GET(PPE_BM_REACT_CNT_VAL, val); + + if (pkt_cnt > 0 || pkt_cnt1 > 0) { + if (!((++tag) % 4)) + seq_printf(seq, "\n%-24s", ""); + + seq_printf(seq, "%10u/%u(%s=%04d)", pkt_cnt, pkt_cnt1, + "port", i); + } + } + + seq_putc(seq, '\n'); + + return 0; +} + +/* The number of packets processed by the ingress parser module of PPE. */ +static int ppe_parse_pkt_counter_get(struct ppe_device *ppe_dev, + struct seq_file *seq) +{ + u32 reg, cnt = 0, tunnel_cnt = 0; + int i, ret, tag = 0; + + seq_printf(seq, "%-24s", "PARSE TPRX/IPRX:"); + for (i = 0; i < PPE_IPR_PKT_CNT_TBL_ENTRIES; i++) { + reg = PPE_TPR_PKT_CNT_TBL_ADDR + i * PPE_TPR_PKT_CNT_TBL_INC; + ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_1WORD, + &tunnel_cnt, NULL); + if (ret) { + dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret); + return ret; + } + + reg = PPE_IPR_PKT_CNT_TBL_ADDR + i * PPE_IPR_PKT_CNT_TBL_INC; + ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_1WORD, + &cnt, NULL); + if (ret) { + dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret); + return ret; + } + + if (tunnel_cnt > 0 || cnt > 0) { + if (!((++tag) % 4)) + seq_printf(seq, "\n%-24s", ""); + + seq_printf(seq, "%10u/%u(%s=%04d)", tunnel_cnt, cnt, + "port", i); + } + } + + seq_putc(seq, '\n'); + + return 0; +} + +/* The number of packets received or dropped on the ingress port. */ +static int ppe_port_rx_counter_get(struct ppe_device *ppe_dev, + struct seq_file *seq) +{ + u32 reg, pkt_cnt = 0, drop_cnt = 0; + int ret, i, tag; + + seq_printf(seq, "%-24s", "PORT RX/RX_DROP:"); + tag = 0; + for (i = 0; i < PPE_PHY_PORT_RX_CNT_TBL_ENTRIES; i++) { + reg = PPE_PHY_PORT_RX_CNT_TBL_ADDR + PPE_PHY_PORT_RX_CNT_TBL_INC * i; + ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_5WORD, + &pkt_cnt, &drop_cnt); + if (ret) { + dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret); + return ret; + } + + if (pkt_cnt > 0) { + if (!((++tag) % 4)) + seq_printf(seq, "\n%-24s", ""); + + seq_printf(seq, "%10u/%u(%s=%04d)", pkt_cnt, drop_cnt, + "port", i); + } + } + + seq_putc(seq, '\n'); + + seq_printf(seq, "%-24s", "VPORT RX/RX_DROP:"); + tag = 0; + for (i = 0; i < PPE_PORT_RX_CNT_TBL_ENTRIES; i++) { + reg = PPE_PORT_RX_CNT_TBL_ADDR + PPE_PORT_RX_CNT_TBL_INC * i; + ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_5WORD, + &pkt_cnt, &drop_cnt); + if (ret) { + dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret); + return ret; + } + + if (pkt_cnt > 0) { + if (!((++tag) % 4)) + seq_printf(seq, "\n%-24s", ""); + + seq_printf(seq, "%10u/%u(%s=%04d)", pkt_cnt, drop_cnt, + "port", i); + } + } + + seq_putc(seq, '\n'); + + return 0; +} + +/* The number of packets received or dropped by layer 2 processing. */ +static int ppe_l2_counter_get(struct ppe_device *ppe_dev, + struct seq_file *seq) +{ + u32 reg, pkt_cnt = 0, drop_cnt = 0; + int ret, i, tag = 0; + + seq_printf(seq, "%-24s", "L2 RX/RX_DROP:"); + for (i = 0; i < PPE_PRE_L2_CNT_TBL_ENTRIES; i++) { + reg = PPE_PRE_L2_CNT_TBL_ADDR + PPE_PRE_L2_CNT_TBL_INC * i; + ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_5WORD, + &pkt_cnt, &drop_cnt); + if (ret) { + dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret); + return ret; + } + + if (pkt_cnt > 0) { + if (!((++tag) % 4)) + seq_printf(seq, "\n%-24s", ""); + + seq_printf(seq, "%10u/%u(%s=%04d)", pkt_cnt, drop_cnt, + "vsi", i); + } + } + + seq_putc(seq, '\n'); + + return 0; +} + +/* The number of VLAN packets received by PPE. */ +static int ppe_vlan_rx_counter_get(struct ppe_device *ppe_dev, + struct seq_file *seq) +{ + u32 reg, pkt_cnt = 0; + int ret, i, tag = 0; + + seq_printf(seq, "%-24s", "VLAN RX:"); + for (i = 0; i < PPE_VLAN_CNT_TBL_ENTRIES; i++) { + reg = PPE_VLAN_CNT_TBL_ADDR + PPE_VLAN_CNT_TBL_INC * i; + + ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD, + &pkt_cnt, NULL); + if (ret) { + dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret); + return ret; + } + + if (pkt_cnt > 0) { + if (!((++tag) % 4)) + seq_printf(seq, "\n%-24s", ""); + + seq_printf(seq, "%10u(%s=%04d)", pkt_cnt, "vsi", i); + } + } + + seq_putc(seq, '\n'); + + return 0; +} + +/* The number of packets handed to CPU by PPE. */ +static int ppe_cpu_code_counter_get(struct ppe_device *ppe_dev, + struct seq_file *seq) +{ + u32 reg, pkt_cnt = 0; + int ret, i; + + seq_printf(seq, "%-24s", "CPU CODE:"); + for (i = 0; i < PPE_DROP_CPU_CNT_TBL_ENTRIES; i++) { + reg = PPE_DROP_CPU_CNT_TBL_ADDR + PPE_DROP_CPU_CNT_TBL_INC * i; + + ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD, + &pkt_cnt, NULL); + if (ret) { + dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret); + return ret; + } + + if (!pkt_cnt) + continue; + + /* There are 256 CPU codes saved in the first 256 entries + * of register table, and 128 drop codes for each PPE port + * (0-7), the total entries is 256 + 8 * 128. + */ + if (i < 256) + seq_printf(seq, "%10u(cpucode:%d)", pkt_cnt, i); + else + seq_printf(seq, "%10u(port=%04d),dropcode:%d", pkt_cnt, + (i - 256) % 8, (i - 256) / 8); + seq_putc(seq, '\n'); + seq_printf(seq, "%-24s", ""); + } + + seq_putc(seq, '\n'); + + return 0; +} + +/* The number of packets forwarded by VLAN on the egress direction. */ +static int ppe_vlan_tx_counter_get(struct ppe_device *ppe_dev, + struct seq_file *seq) +{ + u32 reg, pkt_cnt = 0; + int ret, i, tag = 0; + + seq_printf(seq, "%-24s", "VLAN TX:"); + for (i = 0; i < PPE_EG_VSI_COUNTER_TBL_ENTRIES; i++) { + reg = PPE_EG_VSI_COUNTER_TBL_ADDR + PPE_EG_VSI_COUNTER_TBL_INC * i; + + ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD, + &pkt_cnt, NULL); + if (ret) { + dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret); + return ret; + } + + if (pkt_cnt > 0) { + if (!((++tag) % 4)) + seq_printf(seq, "\n%-24s", ""); + + seq_printf(seq, "%10u(%s=%04d)", pkt_cnt, "vsi", i); + } + } + + seq_putc(seq, '\n'); + + return 0; +} + +/* The number of packets transmitted or dropped on the egress port. */ +static int ppe_port_tx_counter_get(struct ppe_device *ppe_dev, + struct seq_file *seq) +{ + u32 reg, pkt_cnt = 0, drop_cnt = 0; + int ret, i, tag; + + seq_printf(seq, "%-24s", "VPORT TX/TX_DROP:"); + tag = 0; + for (i = 0; i < PPE_VPORT_TX_COUNTER_TBL_ENTRIES; i++) { + reg = PPE_VPORT_TX_COUNTER_TBL_ADDR + PPE_VPORT_TX_COUNTER_TBL_INC * i; + ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD, + &pkt_cnt, NULL); + if (ret) { + dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret); + return ret; + } + + reg = PPE_VPORT_TX_DROP_CNT_TBL_ADDR + PPE_VPORT_TX_DROP_CNT_TBL_INC * i; + ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD, + &drop_cnt, NULL); + if (ret) { + dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret); + return ret; + } + + if (pkt_cnt > 0 || drop_cnt > 0) { + if (!((++tag) % 4)) + seq_printf(seq, "\n%-24s", ""); + + seq_printf(seq, "%10u/%u(%s=%04d)", pkt_cnt, drop_cnt, + "port", i); + } + } + + seq_putc(seq, '\n'); + + seq_printf(seq, "%-24s", "PORT TX/TX_DROP:"); + tag = 0; + for (i = 0; i < PPE_PORT_TX_COUNTER_TBL_ENTRIES; i++) { + reg = PPE_PORT_TX_COUNTER_TBL_ADDR + PPE_PORT_TX_COUNTER_TBL_INC * i; + ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD, + &pkt_cnt, NULL); + if (ret) { + dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret); + return ret; + } + + reg = PPE_PORT_TX_DROP_CNT_TBL_ADDR + PPE_PORT_TX_DROP_CNT_TBL_INC * i; + ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD, + &drop_cnt, NULL); + if (ret) { + dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret); + return ret; + } + + if (pkt_cnt > 0 || drop_cnt > 0) { + if (!((++tag) % 4)) + seq_printf(seq, "\n%-24s", ""); + + seq_printf(seq, "%10u/%u(%s=%04d)", pkt_cnt, drop_cnt, + "port", i); + } + } + + seq_putc(seq, '\n'); + + return 0; +} + +/* The number of packets transmitted or pending by the PPE queue. */ +static int ppe_queue_counter_get(struct ppe_device *ppe_dev, + struct seq_file *seq) +{ + u32 reg, val, pkt_cnt = 0, pend_cnt = 0, drop_cnt = 0; + int ret, i, tag = 0; + + seq_printf(seq, "%-24s", "QUEUE TX/PEND/DROP:"); + for (i = 0; i < PPE_QUEUE_TX_COUNTER_TBL_ENTRIES; i++) { + reg = PPE_QUEUE_TX_COUNTER_TBL_ADDR + PPE_QUEUE_TX_COUNTER_TBL_INC * i; + ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD, + &pkt_cnt, NULL); + if (ret) { + dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret); + return ret; + } + + if (i < PPE_AC_UNICAST_QUEUE_CFG_TBL_ENTRIES) { + reg = PPE_AC_UNICAST_QUEUE_CNT_TBL_ADDR + + PPE_AC_UNICAST_QUEUE_CNT_TBL_INC * i; + ret = regmap_read(ppe_dev->regmap, reg, &val); + if (ret) { + dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret); + return ret; + } + + pend_cnt = FIELD_GET(PPE_AC_UNICAST_QUEUE_CNT_TBL_PEND_CNT, val); + + reg = PPE_UNICAST_DROP_CNT_TBL_ADDR + + PPE_AC_UNICAST_QUEUE_CNT_TBL_INC * + (i * PPE_UNICAST_DROP_TYPES + PPE_UNICAST_DROP_FORCE_OFFSET); + + ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD, + &drop_cnt, NULL); + if (ret) { + dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret); + return ret; + } + } else { + int mq_offset = i - PPE_AC_UNICAST_QUEUE_CFG_TBL_ENTRIES; + + reg = PPE_AC_MULTICAST_QUEUE_CNT_TBL_ADDR + + PPE_AC_MULTICAST_QUEUE_CNT_TBL_INC * mq_offset; + ret = regmap_read(ppe_dev->regmap, reg, &val); + if (ret) { + dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret); + return ret; + } + + pend_cnt = FIELD_GET(PPE_AC_MULTICAST_QUEUE_CNT_TBL_PEND_CNT, val); + + if (mq_offset < PPE_P0_MULTICAST_QUEUE_NUM) { + reg = PPE_CPU_PORT_MULTICAST_FORCE_DROP_CNT_TBL_ADDR(mq_offset); + } else { + mq_offset -= PPE_P0_MULTICAST_QUEUE_NUM; + + reg = PPE_P1_MULTICAST_DROP_CNT_TBL_ADDR; + reg += (mq_offset / PPE_MULTICAST_QUEUE_NUM) * + PPE_MULTICAST_QUEUE_PORT_ADDR_INC; + reg += (mq_offset % PPE_MULTICAST_QUEUE_NUM) * + PPE_MULTICAST_DROP_CNT_TBL_INC * + PPE_MULTICAST_DROP_TYPES; + } + + ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD, + &drop_cnt, NULL); + if (ret) { + dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret); + return ret; + } + } + + if (pkt_cnt > 0 || pend_cnt > 0 || drop_cnt > 0) { + if (!((++tag) % 4)) + seq_printf(seq, "\n%-24s", ""); + + seq_printf(seq, "%10u/%u/%u(%s=%04d)", + pkt_cnt, pend_cnt, drop_cnt, "queue", i); + } + } + + seq_putc(seq, '\n'); + + return 0; +} + +/* Display the various packet counters of PPE. */ +static int ppe_packet_counter_show(struct seq_file *seq, void *v) +{ + struct ppe_debugfs_entry *entry = seq->private; + struct ppe_device *ppe_dev = entry->ppe; + int ret; + + switch (entry->counter_type) { + case PPE_CNT_BM: + ret = ppe_bm_counter_get(ppe_dev, seq); + break; + case PPE_CNT_PARSE: + ret = ppe_parse_pkt_counter_get(ppe_dev, seq); + break; + case PPE_CNT_PORT_RX: + ret = ppe_port_rx_counter_get(ppe_dev, seq); + break; + case PPE_CNT_VLAN_RX: + ret = ppe_vlan_rx_counter_get(ppe_dev, seq); + break; + case PPE_CNT_L2_FWD: + ret = ppe_l2_counter_get(ppe_dev, seq); + break; + case PPE_CNT_CPU_CODE: + ret = ppe_cpu_code_counter_get(ppe_dev, seq); + break; + case PPE_CNT_VLAN_TX: + ret = ppe_vlan_tx_counter_get(ppe_dev, seq); + break; + case PPE_CNT_PORT_TX: + ret = ppe_port_tx_counter_get(ppe_dev, seq); + break; + case PPE_CNT_QM: + ret = ppe_queue_counter_get(ppe_dev, seq); + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +/* Flush the various packet counters of PPE. */ +static ssize_t ppe_packet_counter_write(struct file *file, + const char __user *buf, + size_t count, loff_t *pos) +{ + struct ppe_debugfs_entry *entry = file_inode(file)->i_private; + struct ppe_device *ppe_dev = entry->ppe; + u32 reg; + int i; + + switch (entry->counter_type) { + case PPE_CNT_BM: + for (i = 0; i < PPE_DROP_CNT_TBL_ENTRIES; i++) { + reg = PPE_DROP_CNT_TBL_ADDR + i * PPE_DROP_CNT_TBL_INC; + ppe_tbl_pkt_cnt_clear(ppe_dev, reg, PPE_PKT_CNT_SIZE_1WORD); + } + + for (i = 0; i < PPE_DROP_STAT_TBL_ENTRIES; i++) { + reg = PPE_DROP_STAT_TBL_ADDR + PPE_DROP_STAT_TBL_INC * i; + ppe_tbl_pkt_cnt_clear(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD); + } + + break; + case PPE_CNT_PARSE: + for (i = 0; i < PPE_IPR_PKT_CNT_TBL_ENTRIES; i++) { + reg = PPE_IPR_PKT_CNT_TBL_ADDR + i * PPE_IPR_PKT_CNT_TBL_INC; + ppe_tbl_pkt_cnt_clear(ppe_dev, reg, PPE_PKT_CNT_SIZE_1WORD); + + reg = PPE_TPR_PKT_CNT_TBL_ADDR + i * PPE_TPR_PKT_CNT_TBL_INC; + ppe_tbl_pkt_cnt_clear(ppe_dev, reg, PPE_PKT_CNT_SIZE_1WORD); + } + + break; + case PPE_CNT_PORT_RX: + for (i = 0; i < PPE_PORT_RX_CNT_TBL_ENTRIES; i++) { + reg = PPE_PORT_RX_CNT_TBL_ADDR + PPE_PORT_RX_CNT_TBL_INC * i; + ppe_tbl_pkt_cnt_clear(ppe_dev, reg, PPE_PKT_CNT_SIZE_5WORD); + } + + for (i = 0; i < PPE_PHY_PORT_RX_CNT_TBL_ENTRIES; i++) { + reg = PPE_PHY_PORT_RX_CNT_TBL_ADDR + PPE_PHY_PORT_RX_CNT_TBL_INC * i; + ppe_tbl_pkt_cnt_clear(ppe_dev, reg, PPE_PKT_CNT_SIZE_5WORD); + } + + break; + case PPE_CNT_VLAN_RX: + for (i = 0; i < PPE_VLAN_CNT_TBL_ENTRIES; i++) { + reg = PPE_VLAN_CNT_TBL_ADDR + PPE_VLAN_CNT_TBL_INC * i; + ppe_tbl_pkt_cnt_clear(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD); + } + + break; + case PPE_CNT_L2_FWD: + for (i = 0; i < PPE_PRE_L2_CNT_TBL_ENTRIES; i++) { + reg = PPE_PRE_L2_CNT_TBL_ADDR + PPE_PRE_L2_CNT_TBL_INC * i; + ppe_tbl_pkt_cnt_clear(ppe_dev, reg, PPE_PKT_CNT_SIZE_5WORD); + } + + break; + case PPE_CNT_CPU_CODE: + for (i = 0; i < PPE_DROP_CPU_CNT_TBL_ENTRIES; i++) { + reg = PPE_DROP_CPU_CNT_TBL_ADDR + PPE_DROP_CPU_CNT_TBL_INC * i; + ppe_tbl_pkt_cnt_clear(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD); + } + + break; + case PPE_CNT_VLAN_TX: + for (i = 0; i < PPE_EG_VSI_COUNTER_TBL_ENTRIES; i++) { + reg = PPE_EG_VSI_COUNTER_TBL_ADDR + PPE_EG_VSI_COUNTER_TBL_INC * i; + ppe_tbl_pkt_cnt_clear(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD); + } + + break; + case PPE_CNT_PORT_TX: + for (i = 0; i < PPE_PORT_TX_COUNTER_TBL_ENTRIES; i++) { + reg = PPE_PORT_TX_DROP_CNT_TBL_ADDR + PPE_PORT_TX_DROP_CNT_TBL_INC * i; + ppe_tbl_pkt_cnt_clear(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD); + + reg = PPE_PORT_TX_COUNTER_TBL_ADDR + PPE_PORT_TX_COUNTER_TBL_INC * i; + ppe_tbl_pkt_cnt_clear(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD); + } + + for (i = 0; i < PPE_VPORT_TX_COUNTER_TBL_ENTRIES; i++) { + reg = PPE_VPORT_TX_COUNTER_TBL_ADDR + PPE_VPORT_TX_COUNTER_TBL_INC * i; + ppe_tbl_pkt_cnt_clear(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD); + + reg = PPE_VPORT_TX_DROP_CNT_TBL_ADDR + PPE_VPORT_TX_DROP_CNT_TBL_INC * i; + ppe_tbl_pkt_cnt_clear(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD); + } + + break; + case PPE_CNT_QM: + for (i = 0; i < PPE_QUEUE_TX_COUNTER_TBL_ENTRIES; i++) { + reg = PPE_QUEUE_TX_COUNTER_TBL_ADDR + PPE_QUEUE_TX_COUNTER_TBL_INC * i; + ppe_tbl_pkt_cnt_clear(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD); + } + + break; + default: + break; + } + + return count; +} +DEFINE_SHOW_STORE_ATTRIBUTE(ppe_packet_counter); + +void ppe_debugfs_setup(struct ppe_device *ppe_dev) +{ + struct ppe_debugfs_entry *entry; + int i; + + ppe_dev->debugfs_root = debugfs_create_dir("ppe", NULL); + if (IS_ERR(ppe_dev->debugfs_root)) + return; + + for (i = 0; i < ARRAY_SIZE(debugfs_files); i++) { + entry = devm_kzalloc(ppe_dev->dev, sizeof(*entry), GFP_KERNEL); + if (!entry) + return; + + entry->ppe = ppe_dev; + entry->counter_type = debugfs_files[i].counter_type; + + debugfs_create_file(debugfs_files[i].name, 0444, + ppe_dev->debugfs_root, entry, + &ppe_packet_counter_fops); + } +} + +void ppe_debugfs_teardown(struct ppe_device *ppe_dev) +{ + debugfs_remove_recursive(ppe_dev->debugfs_root); + ppe_dev->debugfs_root = NULL; +} diff --git a/drivers/net/ethernet/qualcomm/ppe/ppe_debugfs.h b/drivers/net/ethernet/qualcomm/ppe/ppe_debugfs.h new file mode 100644 index 000000000000..81f49a709123 --- /dev/null +++ b/drivers/net/ethernet/qualcomm/ppe/ppe_debugfs.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. + */ + +/* PPE debugfs counters setup. */ + +#ifndef __PPE_DEBUGFS_H__ +#define __PPE_DEBUGFS_H__ + +#include "ppe.h" + +void ppe_debugfs_setup(struct ppe_device *ppe_dev); +void ppe_debugfs_teardown(struct ppe_device *ppe_dev); + +#endif diff --git a/drivers/net/ethernet/qualcomm/ppe/ppe_regs.h b/drivers/net/ethernet/qualcomm/ppe/ppe_regs.h new file mode 100644 index 000000000000..746dfbb5a682 --- /dev/null +++ b/drivers/net/ethernet/qualcomm/ppe/ppe_regs.h @@ -0,0 +1,591 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. + */ + +/* PPE hardware register and table declarations. */ +#ifndef __PPE_REGS_H__ +#define __PPE_REGS_H__ + +#include <linux/bitfield.h> + +/* PPE scheduler configurations for buffer manager block. */ +#define PPE_BM_SCH_CTRL_ADDR 0xb000 +#define PPE_BM_SCH_CTRL_INC 4 +#define PPE_BM_SCH_CTRL_SCH_DEPTH GENMASK(7, 0) +#define PPE_BM_SCH_CTRL_SCH_OFFSET GENMASK(14, 8) +#define PPE_BM_SCH_CTRL_SCH_EN BIT(31) + +/* PPE drop counters. */ +#define PPE_DROP_CNT_TBL_ADDR 0xb024 +#define PPE_DROP_CNT_TBL_ENTRIES 8 +#define PPE_DROP_CNT_TBL_INC 4 + +/* BM port drop counters. */ +#define PPE_DROP_STAT_TBL_ADDR 0xe000 +#define PPE_DROP_STAT_TBL_ENTRIES 30 +#define PPE_DROP_STAT_TBL_INC 0x10 + +/* Egress VLAN counters. */ +#define PPE_EG_VSI_COUNTER_TBL_ADDR 0x41000 +#define PPE_EG_VSI_COUNTER_TBL_ENTRIES 64 +#define PPE_EG_VSI_COUNTER_TBL_INC 0x10 + +/* Port TX counters. */ +#define PPE_PORT_TX_COUNTER_TBL_ADDR 0x45000 +#define PPE_PORT_TX_COUNTER_TBL_ENTRIES 8 +#define PPE_PORT_TX_COUNTER_TBL_INC 0x10 + +/* Virtual port TX counters. */ +#define PPE_VPORT_TX_COUNTER_TBL_ADDR 0x47000 +#define PPE_VPORT_TX_COUNTER_TBL_ENTRIES 256 +#define PPE_VPORT_TX_COUNTER_TBL_INC 0x10 + +/* Queue counters. */ +#define PPE_QUEUE_TX_COUNTER_TBL_ADDR 0x4a000 +#define PPE_QUEUE_TX_COUNTER_TBL_ENTRIES 300 +#define PPE_QUEUE_TX_COUNTER_TBL_INC 0x10 + +/* RSS settings are to calculate the random RSS hash value generated during + * packet receive to ARM cores. This hash is then used to generate the queue + * offset used to determine the queue used to transmit the packet to ARM cores. + */ +#define PPE_RSS_HASH_MASK_ADDR 0xb4318 +#define PPE_RSS_HASH_MASK_HASH_MASK GENMASK(20, 0) +#define PPE_RSS_HASH_MASK_FRAGMENT BIT(28) + +#define PPE_RSS_HASH_SEED_ADDR 0xb431c +#define PPE_RSS_HASH_SEED_VAL GENMASK(31, 0) + +#define PPE_RSS_HASH_MIX_ADDR 0xb4320 +#define PPE_RSS_HASH_MIX_ENTRIES 11 +#define PPE_RSS_HASH_MIX_INC 4 +#define PPE_RSS_HASH_MIX_VAL GENMASK(4, 0) + +#define PPE_RSS_HASH_FIN_ADDR 0xb4350 +#define PPE_RSS_HASH_FIN_ENTRIES 5 +#define PPE_RSS_HASH_FIN_INC 4 +#define PPE_RSS_HASH_FIN_INNER GENMASK(4, 0) +#define PPE_RSS_HASH_FIN_OUTER GENMASK(9, 5) + +#define PPE_RSS_HASH_MASK_IPV4_ADDR 0xb4380 +#define PPE_RSS_HASH_MASK_IPV4_HASH_MASK GENMASK(20, 0) +#define PPE_RSS_HASH_MASK_IPV4_FRAGMENT BIT(28) + +#define PPE_RSS_HASH_SEED_IPV4_ADDR 0xb4384 +#define PPE_RSS_HASH_SEED_IPV4_VAL GENMASK(31, 0) + +#define PPE_RSS_HASH_MIX_IPV4_ADDR 0xb4390 +#define PPE_RSS_HASH_MIX_IPV4_ENTRIES 5 +#define PPE_RSS_HASH_MIX_IPV4_INC 4 +#define PPE_RSS_HASH_MIX_IPV4_VAL GENMASK(4, 0) + +#define PPE_RSS_HASH_FIN_IPV4_ADDR 0xb43b0 +#define PPE_RSS_HASH_FIN_IPV4_ENTRIES 5 +#define PPE_RSS_HASH_FIN_IPV4_INC 4 +#define PPE_RSS_HASH_FIN_IPV4_INNER GENMASK(4, 0) +#define PPE_RSS_HASH_FIN_IPV4_OUTER GENMASK(9, 5) + +#define PPE_BM_SCH_CFG_TBL_ADDR 0xc000 +#define PPE_BM_SCH_CFG_TBL_ENTRIES 128 +#define PPE_BM_SCH_CFG_TBL_INC 0x10 +#define PPE_BM_SCH_CFG_TBL_PORT_NUM GENMASK(3, 0) +#define PPE_BM_SCH_CFG_TBL_DIR BIT(4) +#define PPE_BM_SCH_CFG_TBL_VALID BIT(5) +#define PPE_BM_SCH_CFG_TBL_SECOND_PORT_VALID BIT(6) +#define PPE_BM_SCH_CFG_TBL_SECOND_PORT GENMASK(11, 8) + +/* PPE service code configuration for the ingress direction functions, + * including bypass configuration for relevant PPE switch core functions + * such as flow entry lookup bypass. + */ +#define PPE_SERVICE_TBL_ADDR 0x15000 +#define PPE_SERVICE_TBL_ENTRIES 256 +#define PPE_SERVICE_TBL_INC 0x10 +#define PPE_SERVICE_W0_BYPASS_BITMAP GENMASK(31, 0) +#define PPE_SERVICE_W1_RX_CNT_EN BIT(0) + +#define PPE_SERVICE_SET_BYPASS_BITMAP(tbl_cfg, value) \ + FIELD_MODIFY(PPE_SERVICE_W0_BYPASS_BITMAP, tbl_cfg, value) +#define PPE_SERVICE_SET_RX_CNT_EN(tbl_cfg, value) \ + FIELD_MODIFY(PPE_SERVICE_W1_RX_CNT_EN, (tbl_cfg) + 0x1, value) + +/* PPE port egress VLAN configurations. */ +#define PPE_PORT_EG_VLAN_TBL_ADDR 0x20020 +#define PPE_PORT_EG_VLAN_TBL_ENTRIES 8 +#define PPE_PORT_EG_VLAN_TBL_INC 4 +#define PPE_PORT_EG_VLAN_TBL_VLAN_TYPE BIT(0) +#define PPE_PORT_EG_VLAN_TBL_CTAG_MODE GENMASK(2, 1) +#define PPE_PORT_EG_VLAN_TBL_STAG_MODE GENMASK(4, 3) +#define PPE_PORT_EG_VLAN_TBL_VSI_TAG_MODE_EN BIT(5) +#define PPE_PORT_EG_VLAN_TBL_PCP_PROP_CMD BIT(6) +#define PPE_PORT_EG_VLAN_TBL_DEI_PROP_CMD BIT(7) +#define PPE_PORT_EG_VLAN_TBL_TX_COUNTING_EN BIT(8) + +/* PPE queue counters enable/disable control. */ +#define PPE_EG_BRIDGE_CONFIG_ADDR 0x20044 +#define PPE_EG_BRIDGE_CONFIG_QUEUE_CNT_EN BIT(2) + +/* PPE service code configuration on the egress direction. */ +#define PPE_EG_SERVICE_TBL_ADDR 0x43000 +#define PPE_EG_SERVICE_TBL_ENTRIES 256 +#define PPE_EG_SERVICE_TBL_INC 0x10 +#define PPE_EG_SERVICE_W0_UPDATE_ACTION GENMASK(31, 0) +#define PPE_EG_SERVICE_W1_NEXT_SERVCODE GENMASK(7, 0) +#define PPE_EG_SERVICE_W1_HW_SERVICE GENMASK(13, 8) +#define PPE_EG_SERVICE_W1_OFFSET_SEL BIT(14) +#define PPE_EG_SERVICE_W1_TX_CNT_EN BIT(15) + +#define PPE_EG_SERVICE_SET_UPDATE_ACTION(tbl_cfg, value) \ + FIELD_MODIFY(PPE_EG_SERVICE_W0_UPDATE_ACTION, tbl_cfg, value) +#define PPE_EG_SERVICE_SET_NEXT_SERVCODE(tbl_cfg, value) \ + FIELD_MODIFY(PPE_EG_SERVICE_W1_NEXT_SERVCODE, (tbl_cfg) + 0x1, value) +#define PPE_EG_SERVICE_SET_HW_SERVICE(tbl_cfg, value) \ + FIELD_MODIFY(PPE_EG_SERVICE_W1_HW_SERVICE, (tbl_cfg) + 0x1, value) +#define PPE_EG_SERVICE_SET_OFFSET_SEL(tbl_cfg, value) \ + FIELD_MODIFY(PPE_EG_SERVICE_W1_OFFSET_SEL, (tbl_cfg) + 0x1, value) +#define PPE_EG_SERVICE_SET_TX_CNT_EN(tbl_cfg, value) \ + FIELD_MODIFY(PPE_EG_SERVICE_W1_TX_CNT_EN, (tbl_cfg) + 0x1, value) + +/* PPE port bridge configuration */ +#define PPE_PORT_BRIDGE_CTRL_ADDR 0x60300 +#define PPE_PORT_BRIDGE_CTRL_ENTRIES 8 +#define PPE_PORT_BRIDGE_CTRL_INC 4 +#define PPE_PORT_BRIDGE_NEW_LRN_EN BIT(0) +#define PPE_PORT_BRIDGE_STA_MOVE_LRN_EN BIT(3) +#define PPE_PORT_BRIDGE_TXMAC_EN BIT(16) + +/* PPE port control configurations for the traffic to the multicast queues. */ +#define PPE_MC_MTU_CTRL_TBL_ADDR 0x60a00 +#define PPE_MC_MTU_CTRL_TBL_ENTRIES 8 +#define PPE_MC_MTU_CTRL_TBL_INC 4 +#define PPE_MC_MTU_CTRL_TBL_MTU GENMASK(13, 0) +#define PPE_MC_MTU_CTRL_TBL_MTU_CMD GENMASK(15, 14) +#define PPE_MC_MTU_CTRL_TBL_TX_CNT_EN BIT(16) + +/* PPE VSI configurations */ +#define PPE_VSI_TBL_ADDR 0x63800 +#define PPE_VSI_TBL_ENTRIES 64 +#define PPE_VSI_TBL_INC 0x10 +#define PPE_VSI_W0_MEMBER_PORT_BITMAP GENMASK(7, 0) +#define PPE_VSI_W0_UUC_BITMAP GENMASK(15, 8) +#define PPE_VSI_W0_UMC_BITMAP GENMASK(23, 16) +#define PPE_VSI_W0_BC_BITMAP GENMASK(31, 24) +#define PPE_VSI_W1_NEW_ADDR_LRN_EN BIT(0) +#define PPE_VSI_W1_NEW_ADDR_FWD_CMD GENMASK(2, 1) +#define PPE_VSI_W1_STATION_MOVE_LRN_EN BIT(3) +#define PPE_VSI_W1_STATION_MOVE_FWD_CMD GENMASK(5, 4) + +#define PPE_VSI_SET_MEMBER_PORT_BITMAP(tbl_cfg, value) \ + FIELD_MODIFY(PPE_VSI_W0_MEMBER_PORT_BITMAP, tbl_cfg, value) +#define PPE_VSI_SET_UUC_BITMAP(tbl_cfg, value) \ + FIELD_MODIFY(PPE_VSI_W0_UUC_BITMAP, tbl_cfg, value) +#define PPE_VSI_SET_UMC_BITMAP(tbl_cfg, value) \ + FIELD_MODIFY(PPE_VSI_W0_UMC_BITMAP, tbl_cfg, value) +#define PPE_VSI_SET_BC_BITMAP(tbl_cfg, value) \ + FIELD_MODIFY(PPE_VSI_W0_BC_BITMAP, tbl_cfg, value) +#define PPE_VSI_SET_NEW_ADDR_LRN_EN(tbl_cfg, value) \ + FIELD_MODIFY(PPE_VSI_W1_NEW_ADDR_LRN_EN, (tbl_cfg) + 0x1, value) +#define PPE_VSI_SET_NEW_ADDR_FWD_CMD(tbl_cfg, value) \ + FIELD_MODIFY(PPE_VSI_W1_NEW_ADDR_FWD_CMD, (tbl_cfg) + 0x1, value) +#define PPE_VSI_SET_STATION_MOVE_LRN_EN(tbl_cfg, value) \ + FIELD_MODIFY(PPE_VSI_W1_STATION_MOVE_LRN_EN, (tbl_cfg) + 0x1, value) +#define PPE_VSI_SET_STATION_MOVE_FWD_CMD(tbl_cfg, value) \ + FIELD_MODIFY(PPE_VSI_W1_STATION_MOVE_FWD_CMD, (tbl_cfg) + 0x1, value) + +/* PPE port control configurations for the traffic to the unicast queues. */ +#define PPE_MRU_MTU_CTRL_TBL_ADDR 0x65000 +#define PPE_MRU_MTU_CTRL_TBL_ENTRIES 256 +#define PPE_MRU_MTU_CTRL_TBL_INC 0x10 +#define PPE_MRU_MTU_CTRL_W0_MRU GENMASK(13, 0) +#define PPE_MRU_MTU_CTRL_W0_MRU_CMD GENMASK(15, 14) +#define PPE_MRU_MTU_CTRL_W0_MTU GENMASK(29, 16) +#define PPE_MRU_MTU_CTRL_W0_MTU_CMD GENMASK(31, 30) +#define PPE_MRU_MTU_CTRL_W1_RX_CNT_EN BIT(0) +#define PPE_MRU_MTU_CTRL_W1_TX_CNT_EN BIT(1) +#define PPE_MRU_MTU_CTRL_W1_SRC_PROFILE GENMASK(3, 2) +#define PPE_MRU_MTU_CTRL_W1_INNER_PREC_LOW BIT(31) +#define PPE_MRU_MTU_CTRL_W2_INNER_PREC_HIGH GENMASK(1, 0) + +#define PPE_MRU_MTU_CTRL_SET_MRU(tbl_cfg, value) \ + FIELD_MODIFY(PPE_MRU_MTU_CTRL_W0_MRU, tbl_cfg, value) +#define PPE_MRU_MTU_CTRL_SET_MRU_CMD(tbl_cfg, value) \ + FIELD_MODIFY(PPE_MRU_MTU_CTRL_W0_MRU_CMD, tbl_cfg, value) +#define PPE_MRU_MTU_CTRL_SET_MTU(tbl_cfg, value) \ + FIELD_MODIFY(PPE_MRU_MTU_CTRL_W0_MTU, tbl_cfg, value) +#define PPE_MRU_MTU_CTRL_SET_MTU_CMD(tbl_cfg, value) \ + FIELD_MODIFY(PPE_MRU_MTU_CTRL_W0_MTU_CMD, tbl_cfg, value) +#define PPE_MRU_MTU_CTRL_SET_RX_CNT_EN(tbl_cfg, value) \ + FIELD_MODIFY(PPE_MRU_MTU_CTRL_W1_RX_CNT_EN, (tbl_cfg) + 0x1, value) +#define PPE_MRU_MTU_CTRL_SET_TX_CNT_EN(tbl_cfg, value) \ + FIELD_MODIFY(PPE_MRU_MTU_CTRL_W1_TX_CNT_EN, (tbl_cfg) + 0x1, value) + +/* PPE service code configuration for destination port and counter. */ +#define PPE_IN_L2_SERVICE_TBL_ADDR 0x66000 +#define PPE_IN_L2_SERVICE_TBL_ENTRIES 256 +#define PPE_IN_L2_SERVICE_TBL_INC 0x10 +#define PPE_IN_L2_SERVICE_TBL_DST_PORT_ID_VALID BIT(0) +#define PPE_IN_L2_SERVICE_TBL_DST_PORT_ID GENMASK(4, 1) +#define PPE_IN_L2_SERVICE_TBL_DST_DIRECTION BIT(5) +#define PPE_IN_L2_SERVICE_TBL_DST_BYPASS_BITMAP GENMASK(29, 6) +#define PPE_IN_L2_SERVICE_TBL_RX_CNT_EN BIT(30) +#define PPE_IN_L2_SERVICE_TBL_TX_CNT_EN BIT(31) + +/* L2 Port configurations */ +#define PPE_L2_VP_PORT_TBL_ADDR 0x98000 +#define PPE_L2_VP_PORT_TBL_ENTRIES 256 +#define PPE_L2_VP_PORT_TBL_INC 0x10 +#define PPE_L2_VP_PORT_W0_INVALID_VSI_FWD_EN BIT(0) +#define PPE_L2_VP_PORT_W0_DST_INFO GENMASK(9, 2) + +#define PPE_L2_PORT_SET_INVALID_VSI_FWD_EN(tbl_cfg, value) \ + FIELD_MODIFY(PPE_L2_VP_PORT_W0_INVALID_VSI_FWD_EN, tbl_cfg, value) +#define PPE_L2_PORT_SET_DST_INFO(tbl_cfg, value) \ + FIELD_MODIFY(PPE_L2_VP_PORT_W0_DST_INFO, tbl_cfg, value) + +/* Port RX and RX drop counters. */ +#define PPE_PORT_RX_CNT_TBL_ADDR 0x150000 +#define PPE_PORT_RX_CNT_TBL_ENTRIES 256 +#define PPE_PORT_RX_CNT_TBL_INC 0x20 + +/* Physical port RX and RX drop counters. */ +#define PPE_PHY_PORT_RX_CNT_TBL_ADDR 0x156000 +#define PPE_PHY_PORT_RX_CNT_TBL_ENTRIES 8 +#define PPE_PHY_PORT_RX_CNT_TBL_INC 0x20 + +/* Counters for the packet to CPU port. */ +#define PPE_DROP_CPU_CNT_TBL_ADDR 0x160000 +#define PPE_DROP_CPU_CNT_TBL_ENTRIES 1280 +#define PPE_DROP_CPU_CNT_TBL_INC 0x10 + +/* VLAN counters. */ +#define PPE_VLAN_CNT_TBL_ADDR 0x178000 +#define PPE_VLAN_CNT_TBL_ENTRIES 64 +#define PPE_VLAN_CNT_TBL_INC 0x10 + +/* PPE L2 counters. */ +#define PPE_PRE_L2_CNT_TBL_ADDR 0x17c000 +#define PPE_PRE_L2_CNT_TBL_ENTRIES 64 +#define PPE_PRE_L2_CNT_TBL_INC 0x20 + +/* Port TX drop counters. */ +#define PPE_PORT_TX_DROP_CNT_TBL_ADDR 0x17d000 +#define PPE_PORT_TX_DROP_CNT_TBL_ENTRIES 8 +#define PPE_PORT_TX_DROP_CNT_TBL_INC 0x10 + +/* Virtual port TX counters. */ +#define PPE_VPORT_TX_DROP_CNT_TBL_ADDR 0x17e000 +#define PPE_VPORT_TX_DROP_CNT_TBL_ENTRIES 256 +#define PPE_VPORT_TX_DROP_CNT_TBL_INC 0x10 + +/* Counters for the tunnel packet. */ +#define PPE_TPR_PKT_CNT_TBL_ADDR 0x1d0080 +#define PPE_TPR_PKT_CNT_TBL_ENTRIES 8 +#define PPE_TPR_PKT_CNT_TBL_INC 4 + +/* Counters for the all packet received. */ +#define PPE_IPR_PKT_CNT_TBL_ADDR 0x1e0080 +#define PPE_IPR_PKT_CNT_TBL_ENTRIES 8 +#define PPE_IPR_PKT_CNT_TBL_INC 4 + +/* PPE service code configuration for the tunnel packet. */ +#define PPE_TL_SERVICE_TBL_ADDR 0x306000 +#define PPE_TL_SERVICE_TBL_ENTRIES 256 +#define PPE_TL_SERVICE_TBL_INC 4 +#define PPE_TL_SERVICE_TBL_BYPASS_BITMAP GENMASK(31, 0) + +/* Port scheduler global config. */ +#define PPE_PSCH_SCH_DEPTH_CFG_ADDR 0x400000 +#define PPE_PSCH_SCH_DEPTH_CFG_INC 4 +#define PPE_PSCH_SCH_DEPTH_CFG_SCH_DEPTH GENMASK(7, 0) + +/* PPE queue level scheduler configurations. */ +#define PPE_L0_FLOW_MAP_TBL_ADDR 0x402000 +#define PPE_L0_FLOW_MAP_TBL_ENTRIES 300 +#define PPE_L0_FLOW_MAP_TBL_INC 0x10 +#define PPE_L0_FLOW_MAP_TBL_FLOW_ID GENMASK(5, 0) +#define PPE_L0_FLOW_MAP_TBL_C_PRI GENMASK(8, 6) +#define PPE_L0_FLOW_MAP_TBL_E_PRI GENMASK(11, 9) +#define PPE_L0_FLOW_MAP_TBL_C_NODE_WT GENMASK(21, 12) +#define PPE_L0_FLOW_MAP_TBL_E_NODE_WT GENMASK(31, 22) + +#define PPE_L0_C_FLOW_CFG_TBL_ADDR 0x404000 +#define PPE_L0_C_FLOW_CFG_TBL_ENTRIES 512 +#define PPE_L0_C_FLOW_CFG_TBL_INC 0x10 +#define PPE_L0_C_FLOW_CFG_TBL_NODE_ID GENMASK(7, 0) +#define PPE_L0_C_FLOW_CFG_TBL_NODE_CREDIT_UNIT BIT(8) + +#define PPE_L0_E_FLOW_CFG_TBL_ADDR 0x406000 +#define PPE_L0_E_FLOW_CFG_TBL_ENTRIES 512 +#define PPE_L0_E_FLOW_CFG_TBL_INC 0x10 +#define PPE_L0_E_FLOW_CFG_TBL_NODE_ID GENMASK(7, 0) +#define PPE_L0_E_FLOW_CFG_TBL_NODE_CREDIT_UNIT BIT(8) + +#define PPE_L0_FLOW_PORT_MAP_TBL_ADDR 0x408000 +#define PPE_L0_FLOW_PORT_MAP_TBL_ENTRIES 300 +#define PPE_L0_FLOW_PORT_MAP_TBL_INC 0x10 +#define PPE_L0_FLOW_PORT_MAP_TBL_PORT_NUM GENMASK(3, 0) + +#define PPE_L0_COMP_CFG_TBL_ADDR 0x428000 +#define PPE_L0_COMP_CFG_TBL_ENTRIES 300 +#define PPE_L0_COMP_CFG_TBL_INC 0x10 +#define PPE_L0_COMP_CFG_TBL_SHAPER_METER_LEN GENMASK(1, 0) +#define PPE_L0_COMP_CFG_TBL_NODE_METER_LEN GENMASK(3, 2) + +/* PPE queue to Ethernet DMA ring mapping table. */ +#define PPE_RING_Q_MAP_TBL_ADDR 0x42a000 +#define PPE_RING_Q_MAP_TBL_ENTRIES 24 +#define PPE_RING_Q_MAP_TBL_INC 0x40 + +/* Table addresses for per-queue dequeue setting. */ +#define PPE_DEQ_OPR_TBL_ADDR 0x430000 +#define PPE_DEQ_OPR_TBL_ENTRIES 300 +#define PPE_DEQ_OPR_TBL_INC 0x10 +#define PPE_DEQ_OPR_TBL_DEQ_DISABLE BIT(0) + +/* PPE flow level scheduler configurations. */ +#define PPE_L1_FLOW_MAP_TBL_ADDR 0x440000 +#define PPE_L1_FLOW_MAP_TBL_ENTRIES 64 +#define PPE_L1_FLOW_MAP_TBL_INC 0x10 +#define PPE_L1_FLOW_MAP_TBL_FLOW_ID GENMASK(3, 0) +#define PPE_L1_FLOW_MAP_TBL_C_PRI GENMASK(6, 4) +#define PPE_L1_FLOW_MAP_TBL_E_PRI GENMASK(9, 7) +#define PPE_L1_FLOW_MAP_TBL_C_NODE_WT GENMASK(19, 10) +#define PPE_L1_FLOW_MAP_TBL_E_NODE_WT GENMASK(29, 20) + +#define PPE_L1_C_FLOW_CFG_TBL_ADDR 0x442000 +#define PPE_L1_C_FLOW_CFG_TBL_ENTRIES 64 +#define PPE_L1_C_FLOW_CFG_TBL_INC 0x10 +#define PPE_L1_C_FLOW_CFG_TBL_NODE_ID GENMASK(5, 0) +#define PPE_L1_C_FLOW_CFG_TBL_NODE_CREDIT_UNIT BIT(6) + +#define PPE_L1_E_FLOW_CFG_TBL_ADDR 0x444000 +#define PPE_L1_E_FLOW_CFG_TBL_ENTRIES 64 +#define PPE_L1_E_FLOW_CFG_TBL_INC 0x10 +#define PPE_L1_E_FLOW_CFG_TBL_NODE_ID GENMASK(5, 0) +#define PPE_L1_E_FLOW_CFG_TBL_NODE_CREDIT_UNIT BIT(6) + +#define PPE_L1_FLOW_PORT_MAP_TBL_ADDR 0x446000 +#define PPE_L1_FLOW_PORT_MAP_TBL_ENTRIES 64 +#define PPE_L1_FLOW_PORT_MAP_TBL_INC 0x10 +#define PPE_L1_FLOW_PORT_MAP_TBL_PORT_NUM GENMASK(3, 0) + +#define PPE_L1_COMP_CFG_TBL_ADDR 0x46a000 +#define PPE_L1_COMP_CFG_TBL_ENTRIES 64 +#define PPE_L1_COMP_CFG_TBL_INC 0x10 +#define PPE_L1_COMP_CFG_TBL_SHAPER_METER_LEN GENMASK(1, 0) +#define PPE_L1_COMP_CFG_TBL_NODE_METER_LEN GENMASK(3, 2) + +/* PPE port scheduler configurations for egress. */ +#define PPE_PSCH_SCH_CFG_TBL_ADDR 0x47a000 +#define PPE_PSCH_SCH_CFG_TBL_ENTRIES 128 +#define PPE_PSCH_SCH_CFG_TBL_INC 0x10 +#define PPE_PSCH_SCH_CFG_TBL_DES_PORT GENMASK(3, 0) +#define PPE_PSCH_SCH_CFG_TBL_ENS_PORT GENMASK(7, 4) +#define PPE_PSCH_SCH_CFG_TBL_ENS_PORT_BITMAP GENMASK(15, 8) +#define PPE_PSCH_SCH_CFG_TBL_DES_SECOND_PORT_EN BIT(16) +#define PPE_PSCH_SCH_CFG_TBL_DES_SECOND_PORT GENMASK(20, 17) + +/* There are 15 BM ports and 4 BM groups supported by PPE. + * BM port (0-7) is for EDMA port 0, BM port (8-13) is for + * PPE physical port 1-6 and BM port 14 is for EIP port. + */ +#define PPE_BM_PORT_FC_MODE_ADDR 0x600100 +#define PPE_BM_PORT_FC_MODE_ENTRIES 15 +#define PPE_BM_PORT_FC_MODE_INC 0x4 +#define PPE_BM_PORT_FC_MODE_EN BIT(0) + +#define PPE_BM_PORT_GROUP_ID_ADDR 0x600180 +#define PPE_BM_PORT_GROUP_ID_ENTRIES 15 +#define PPE_BM_PORT_GROUP_ID_INC 0x4 +#define PPE_BM_PORT_GROUP_ID_SHARED_GROUP_ID GENMASK(1, 0) + +/* Counters for PPE buffers used for packets cached. */ +#define PPE_BM_USED_CNT_TBL_ADDR 0x6001c0 +#define PPE_BM_USED_CNT_TBL_ENTRIES 15 +#define PPE_BM_USED_CNT_TBL_INC 0x4 +#define PPE_BM_USED_CNT_VAL GENMASK(10, 0) + +/* Counters for PPE buffers used for packets received after pause frame sent. */ +#define PPE_BM_REACT_CNT_TBL_ADDR 0x600240 +#define PPE_BM_REACT_CNT_TBL_ENTRIES 15 +#define PPE_BM_REACT_CNT_TBL_INC 0x4 +#define PPE_BM_REACT_CNT_VAL GENMASK(8, 0) + +#define PPE_BM_SHARED_GROUP_CFG_ADDR 0x600290 +#define PPE_BM_SHARED_GROUP_CFG_ENTRIES 4 +#define PPE_BM_SHARED_GROUP_CFG_INC 0x4 +#define PPE_BM_SHARED_GROUP_CFG_SHARED_LIMIT GENMASK(10, 0) + +#define PPE_BM_PORT_FC_CFG_TBL_ADDR 0x601000 +#define PPE_BM_PORT_FC_CFG_TBL_ENTRIES 15 +#define PPE_BM_PORT_FC_CFG_TBL_INC 0x10 +#define PPE_BM_PORT_FC_W0_REACT_LIMIT GENMASK(8, 0) +#define PPE_BM_PORT_FC_W0_RESUME_THRESHOLD GENMASK(17, 9) +#define PPE_BM_PORT_FC_W0_RESUME_OFFSET GENMASK(28, 18) +#define PPE_BM_PORT_FC_W0_CEILING_LOW GENMASK(31, 29) +#define PPE_BM_PORT_FC_W1_CEILING_HIGH GENMASK(7, 0) +#define PPE_BM_PORT_FC_W1_WEIGHT GENMASK(10, 8) +#define PPE_BM_PORT_FC_W1_DYNAMIC BIT(11) +#define PPE_BM_PORT_FC_W1_PRE_ALLOC GENMASK(22, 12) + +#define PPE_BM_PORT_FC_SET_REACT_LIMIT(tbl_cfg, value) \ + FIELD_MODIFY(PPE_BM_PORT_FC_W0_REACT_LIMIT, tbl_cfg, value) +#define PPE_BM_PORT_FC_SET_RESUME_THRESHOLD(tbl_cfg, value) \ + FIELD_MODIFY(PPE_BM_PORT_FC_W0_RESUME_THRESHOLD, tbl_cfg, value) +#define PPE_BM_PORT_FC_SET_RESUME_OFFSET(tbl_cfg, value) \ + FIELD_MODIFY(PPE_BM_PORT_FC_W0_RESUME_OFFSET, tbl_cfg, value) +#define PPE_BM_PORT_FC_SET_CEILING_LOW(tbl_cfg, value) \ + FIELD_MODIFY(PPE_BM_PORT_FC_W0_CEILING_LOW, tbl_cfg, value) +#define PPE_BM_PORT_FC_SET_CEILING_HIGH(tbl_cfg, value) \ + FIELD_MODIFY(PPE_BM_PORT_FC_W1_CEILING_HIGH, (tbl_cfg) + 0x1, value) +#define PPE_BM_PORT_FC_SET_WEIGHT(tbl_cfg, value) \ + FIELD_MODIFY(PPE_BM_PORT_FC_W1_WEIGHT, (tbl_cfg) + 0x1, value) +#define PPE_BM_PORT_FC_SET_DYNAMIC(tbl_cfg, value) \ + FIELD_MODIFY(PPE_BM_PORT_FC_W1_DYNAMIC, (tbl_cfg) + 0x1, value) +#define PPE_BM_PORT_FC_SET_PRE_ALLOC(tbl_cfg, value) \ + FIELD_MODIFY(PPE_BM_PORT_FC_W1_PRE_ALLOC, (tbl_cfg) + 0x1, value) + +/* The queue base configurations based on destination port, + * service code or CPU code. + */ +#define PPE_UCAST_QUEUE_MAP_TBL_ADDR 0x810000 +#define PPE_UCAST_QUEUE_MAP_TBL_ENTRIES 3072 +#define PPE_UCAST_QUEUE_MAP_TBL_INC 0x10 +#define PPE_UCAST_QUEUE_MAP_TBL_PROFILE_ID GENMASK(3, 0) +#define PPE_UCAST_QUEUE_MAP_TBL_QUEUE_ID GENMASK(11, 4) + +/* The queue offset configurations based on RSS hash value. */ +#define PPE_UCAST_HASH_MAP_TBL_ADDR 0x830000 +#define PPE_UCAST_HASH_MAP_TBL_ENTRIES 4096 +#define PPE_UCAST_HASH_MAP_TBL_INC 0x10 +#define PPE_UCAST_HASH_MAP_TBL_HASH GENMASK(7, 0) + +/* The queue offset configurations based on PPE internal priority. */ +#define PPE_UCAST_PRIORITY_MAP_TBL_ADDR 0x842000 +#define PPE_UCAST_PRIORITY_MAP_TBL_ENTRIES 256 +#define PPE_UCAST_PRIORITY_MAP_TBL_INC 0x10 +#define PPE_UCAST_PRIORITY_MAP_TBL_CLASS GENMASK(3, 0) + +/* PPE unicast queue (0-255) configurations. */ +#define PPE_AC_UNICAST_QUEUE_CFG_TBL_ADDR 0x848000 +#define PPE_AC_UNICAST_QUEUE_CFG_TBL_ENTRIES 256 +#define PPE_AC_UNICAST_QUEUE_CFG_TBL_INC 0x10 +#define PPE_AC_UNICAST_QUEUE_CFG_W0_EN BIT(0) +#define PPE_AC_UNICAST_QUEUE_CFG_W0_WRED_EN BIT(1) +#define PPE_AC_UNICAST_QUEUE_CFG_W0_FC_EN BIT(2) +#define PPE_AC_UNICAST_QUEUE_CFG_W0_CLR_AWARE BIT(3) +#define PPE_AC_UNICAST_QUEUE_CFG_W0_GRP_ID GENMASK(5, 4) +#define PPE_AC_UNICAST_QUEUE_CFG_W0_PRE_LIMIT GENMASK(16, 6) +#define PPE_AC_UNICAST_QUEUE_CFG_W0_DYNAMIC BIT(17) +#define PPE_AC_UNICAST_QUEUE_CFG_W0_WEIGHT GENMASK(20, 18) +#define PPE_AC_UNICAST_QUEUE_CFG_W0_THRESHOLD GENMASK(31, 21) +#define PPE_AC_UNICAST_QUEUE_CFG_W3_GRN_RESUME GENMASK(23, 13) + +#define PPE_AC_UNICAST_QUEUE_SET_EN(tbl_cfg, value) \ + FIELD_MODIFY(PPE_AC_UNICAST_QUEUE_CFG_W0_EN, tbl_cfg, value) +#define PPE_AC_UNICAST_QUEUE_SET_GRP_ID(tbl_cfg, value) \ + FIELD_MODIFY(PPE_AC_UNICAST_QUEUE_CFG_W0_GRP_ID, tbl_cfg, value) +#define PPE_AC_UNICAST_QUEUE_SET_PRE_LIMIT(tbl_cfg, value) \ + FIELD_MODIFY(PPE_AC_UNICAST_QUEUE_CFG_W0_PRE_LIMIT, tbl_cfg, value) +#define PPE_AC_UNICAST_QUEUE_SET_DYNAMIC(tbl_cfg, value) \ + FIELD_MODIFY(PPE_AC_UNICAST_QUEUE_CFG_W0_DYNAMIC, tbl_cfg, value) +#define PPE_AC_UNICAST_QUEUE_SET_WEIGHT(tbl_cfg, value) \ + FIELD_MODIFY(PPE_AC_UNICAST_QUEUE_CFG_W0_WEIGHT, tbl_cfg, value) +#define PPE_AC_UNICAST_QUEUE_SET_THRESHOLD(tbl_cfg, value) \ + FIELD_MODIFY(PPE_AC_UNICAST_QUEUE_CFG_W0_THRESHOLD, tbl_cfg, value) +#define PPE_AC_UNICAST_QUEUE_SET_GRN_RESUME(tbl_cfg, value) \ + FIELD_MODIFY(PPE_AC_UNICAST_QUEUE_CFG_W3_GRN_RESUME, (tbl_cfg) + 0x3, value) + +/* PPE multicast queue (256-299) configurations. */ +#define PPE_AC_MULTICAST_QUEUE_CFG_TBL_ADDR 0x84a000 +#define PPE_AC_MULTICAST_QUEUE_CFG_TBL_ENTRIES 44 +#define PPE_AC_MULTICAST_QUEUE_CFG_TBL_INC 0x10 +#define PPE_AC_MULTICAST_QUEUE_CFG_W0_EN BIT(0) +#define PPE_AC_MULTICAST_QUEUE_CFG_W0_FC_EN BIT(1) +#define PPE_AC_MULTICAST_QUEUE_CFG_W0_CLR_AWARE BIT(2) +#define PPE_AC_MULTICAST_QUEUE_CFG_W0_GRP_ID GENMASK(4, 3) +#define PPE_AC_MULTICAST_QUEUE_CFG_W0_PRE_LIMIT GENMASK(15, 5) +#define PPE_AC_MULTICAST_QUEUE_CFG_W0_THRESHOLD GENMASK(26, 16) +#define PPE_AC_MULTICAST_QUEUE_CFG_W2_RESUME GENMASK(17, 7) + +#define PPE_AC_MULTICAST_QUEUE_SET_EN(tbl_cfg, value) \ + FIELD_MODIFY(PPE_AC_MULTICAST_QUEUE_CFG_W0_EN, tbl_cfg, value) +#define PPE_AC_MULTICAST_QUEUE_SET_GRN_GRP_ID(tbl_cfg, value) \ + FIELD_MODIFY(PPE_AC_MULTICAST_QUEUE_CFG_W0_GRP_ID, tbl_cfg, value) +#define PPE_AC_MULTICAST_QUEUE_SET_GRN_PRE_LIMIT(tbl_cfg, value) \ + FIELD_MODIFY(PPE_AC_MULTICAST_QUEUE_CFG_W0_PRE_LIMIT, tbl_cfg, value) +#define PPE_AC_MULTICAST_QUEUE_SET_GRN_THRESHOLD(tbl_cfg, value) \ + FIELD_MODIFY(PPE_AC_MULTICAST_QUEUE_CFG_W0_THRESHOLD, tbl_cfg, value) +#define PPE_AC_MULTICAST_QUEUE_SET_GRN_RESUME(tbl_cfg, value) \ + FIELD_MODIFY(PPE_AC_MULTICAST_QUEUE_CFG_W2_RESUME, (tbl_cfg) + 0x2, value) + +/* PPE admission control group (0-3) configurations */ +#define PPE_AC_GRP_CFG_TBL_ADDR 0x84c000 +#define PPE_AC_GRP_CFG_TBL_ENTRIES 0x4 +#define PPE_AC_GRP_CFG_TBL_INC 0x10 +#define PPE_AC_GRP_W0_AC_EN BIT(0) +#define PPE_AC_GRP_W0_AC_FC_EN BIT(1) +#define PPE_AC_GRP_W0_CLR_AWARE BIT(2) +#define PPE_AC_GRP_W0_THRESHOLD_LOW GENMASK(31, 25) +#define PPE_AC_GRP_W1_THRESHOLD_HIGH GENMASK(3, 0) +#define PPE_AC_GRP_W1_BUF_LIMIT GENMASK(14, 4) +#define PPE_AC_GRP_W2_RESUME_GRN GENMASK(15, 5) +#define PPE_AC_GRP_W2_PRE_ALLOC GENMASK(26, 16) + +#define PPE_AC_GRP_SET_BUF_LIMIT(tbl_cfg, value) \ + FIELD_MODIFY(PPE_AC_GRP_W1_BUF_LIMIT, (tbl_cfg) + 0x1, value) + +/* Counters for packets handled by unicast queues (0-255). */ +#define PPE_AC_UNICAST_QUEUE_CNT_TBL_ADDR 0x84e000 +#define PPE_AC_UNICAST_QUEUE_CNT_TBL_ENTRIES 256 +#define PPE_AC_UNICAST_QUEUE_CNT_TBL_INC 0x10 +#define PPE_AC_UNICAST_QUEUE_CNT_TBL_PEND_CNT GENMASK(12, 0) + +/* Counters for packets handled by multicast queues (256-299). */ +#define PPE_AC_MULTICAST_QUEUE_CNT_TBL_ADDR 0x852000 +#define PPE_AC_MULTICAST_QUEUE_CNT_TBL_ENTRIES 44 +#define PPE_AC_MULTICAST_QUEUE_CNT_TBL_INC 0x10 +#define PPE_AC_MULTICAST_QUEUE_CNT_TBL_PEND_CNT GENMASK(12, 0) + +/* Table addresses for per-queue enqueue setting. */ +#define PPE_ENQ_OPR_TBL_ADDR 0x85c000 +#define PPE_ENQ_OPR_TBL_ENTRIES 300 +#define PPE_ENQ_OPR_TBL_INC 0x10 +#define PPE_ENQ_OPR_TBL_ENQ_DISABLE BIT(0) + +/* Unicast drop count includes the possible drops with WRED for the green, + * yellow and red categories. + */ +#define PPE_UNICAST_DROP_CNT_TBL_ADDR 0x9e0000 +#define PPE_UNICAST_DROP_CNT_TBL_ENTRIES 1536 +#define PPE_UNICAST_DROP_CNT_TBL_INC 0x10 +#define PPE_UNICAST_DROP_TYPES 6 +#define PPE_UNICAST_DROP_FORCE_OFFSET 3 + +/* There are 16 multicast queues dedicated to CPU port 0. Multicast drop + * count includes the force drop for green, yellow and red category packets. + */ +#define PPE_P0_MULTICAST_DROP_CNT_TBL_ADDR 0x9f0000 +#define PPE_P0_MULTICAST_DROP_CNT_TBL_ENTRIES 48 +#define PPE_P0_MULTICAST_DROP_CNT_TBL_INC 0x10 +#define PPE_P0_MULTICAST_QUEUE_NUM 16 + +/* Each PPE physical port has four dedicated multicast queues, providing + * a total of 12 entries per port. The multicast drop count includes forced + * drops for green, yellow, and red category packets. + */ +#define PPE_MULTICAST_QUEUE_PORT_ADDR_INC 0x1000 +#define PPE_MULTICAST_DROP_CNT_TBL_INC 0x10 +#define PPE_MULTICAST_DROP_TYPES 3 +#define PPE_MULTICAST_QUEUE_NUM 4 +#define PPE_MULTICAST_DROP_CNT_TBL_ENTRIES 12 + +#define PPE_CPU_PORT_MULTICAST_FORCE_DROP_CNT_TBL_ADDR(mq_offset) \ + (PPE_P0_MULTICAST_DROP_CNT_TBL_ADDR + \ + (mq_offset) * PPE_P0_MULTICAST_DROP_CNT_TBL_INC * \ + PPE_MULTICAST_DROP_TYPES) + +#define PPE_P1_MULTICAST_DROP_CNT_TBL_ADDR \ + (PPE_P0_MULTICAST_DROP_CNT_TBL_ADDR + PPE_MULTICAST_QUEUE_PORT_ADDR_INC) +#endif diff --git a/drivers/net/ethernet/renesas/Makefile b/drivers/net/ethernet/renesas/Makefile index f65fc76f8b4d..d63e0c61bb68 100644 --- a/drivers/net/ethernet/renesas/Makefile +++ b/drivers/net/ethernet/renesas/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_SH_ETH) += sh_eth.o ravb-objs := ravb_main.o ravb_ptp.o obj-$(CONFIG_RAVB) += ravb.o +rswitch-objs := rswitch_main.o rswitch_l2.o obj-$(CONFIG_RENESAS_ETHER_SWITCH) += rswitch.o obj-$(CONFIG_RENESAS_GEN4_PTP) += rcar_gen4_ptp.o diff --git a/drivers/net/ethernet/renesas/rswitch.h b/drivers/net/ethernet/renesas/rswitch.h index 532192cbca4b..a1d4a877e5bd 100644 --- a/drivers/net/ethernet/renesas/rswitch.h +++ b/drivers/net/ethernet/renesas/rswitch.h @@ -1,19 +1,25 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* Renesas Ethernet Switch device driver * - * Copyright (C) 2022 Renesas Electronics Corporation + * Copyright (C) 2022-2025 Renesas Electronics Corporation */ #ifndef __RSWITCH_H__ #define __RSWITCH_H__ #include <linux/platform_device.h> +#include <linux/phy.h> + #include "rcar_gen4_ptp.h" #define RSWITCH_MAX_NUM_QUEUES 128 #define RSWITCH_NUM_AGENTS 5 #define RSWITCH_NUM_PORTS 3 + +#define rswitch_for_all_ports(_priv, _rdev) \ + list_for_each_entry(_rdev, &_priv->port_list, list) + #define rswitch_for_each_enabled_port(priv, i) \ for (i = 0; i < RSWITCH_NUM_PORTS; i++) \ if (priv->rdev[i]->disabled) \ @@ -809,7 +815,8 @@ enum rswitch_gwca_mode { #define FWPC0_IP4EA BIT(10) #define FWPC0_IPDSA BIT(12) #define FWPC0_IPHLA BIT(18) -#define FWPC0_MACSDA BIT(20) +#define FWPC0_MACDSA BIT(20) +#define FWPC0_MACSSA BIT(23) #define FWPC0_MACHLA BIT(26) #define FWPC0_MACHMA BIT(27) #define FWPC0_VLANSA BIT(28) @@ -820,12 +827,30 @@ enum rswitch_gwca_mode { #define FWPC2(i) (FWPC20 + (i) * 0x10) #define FWCP2_LTWFW GENMASK(16 + (RSWITCH_NUM_AGENTS - 1), 16) +#define FWCP2_LTWFW_MASK GENMASK(16 + (RSWITCH_NUM_AGENTS - 1), 16) #define FWPBFC(i) (FWPBFC0 + (i) * 0x10) #define FWPBFC_PBDV GENMASK(RSWITCH_NUM_AGENTS - 1, 0) #define FWPBFCSDC(j, i) (FWPBFCSDC00 + (i) * 0x10 + (j) * 0x04) +#define FWMACHEC_MACHMUE_MASK GENMASK(26, 16) + +#define FWMACTIM_MACTIOG BIT(0) +#define FWMACTIM_MACTR BIT(1) + +#define FWMACAGUSPC_MACAGUSP GENMASK(9, 0) +#define FWMACAGC_MACAGT GENMASK(15, 0) +#define FWMACAGC_MACAGE BIT(16) +#define FWMACAGC_MACAGSL BIT(17) +#define FWMACAGC_MACAGPM BIT(18) +#define FWMACAGC_MACDES BIT(24) +#define FWMACAGC_MACAGOG BIT(28) +#define FWMACAGC_MACDESOG BIT(29) + +#define RSW_AGEING_CLK_PER_US 0x140 +#define RSW_AGEING_TIME 300 + /* TOP */ #define TPEMIMC7(queue) (TPEMIMC70 + (queue) * 4) @@ -994,10 +1019,18 @@ struct rswitch_device { DECLARE_BITMAP(ts_skb_used, TS_TAGS_PER_PORT); bool disabled; + struct list_head list; + int port; struct rswitch_etha *etha; struct device_node *np_port; struct phy *serdes; + + struct net_device *brdev; /* master bridge device */ + unsigned int learning_requested : 1; + unsigned int learning_offloaded : 1; + unsigned int forwarding_requested : 1; + unsigned int forwarding_offloaded : 1; }; struct rswitch_mfwd_mac_table_entry { @@ -1022,11 +1055,17 @@ struct rswitch_private { struct rswitch_etha etha[RSWITCH_NUM_PORTS]; struct rswitch_mfwd mfwd; + struct list_head port_list; + spinlock_t lock; /* lock interrupt registers' control */ struct clk *clk; bool etha_no_runtime_change; bool gwca_halt; + struct net_device *offload_brdev; }; +bool is_rdev(const struct net_device *ndev); +void rswitch_modify(void __iomem *addr, enum rswitch_reg reg, u32 clear, u32 set); + #endif /* #ifndef __RSWITCH_H__ */ diff --git a/drivers/net/ethernet/renesas/rswitch_l2.c b/drivers/net/ethernet/renesas/rswitch_l2.c new file mode 100644 index 000000000000..4a69ec77d69c --- /dev/null +++ b/drivers/net/ethernet/renesas/rswitch_l2.c @@ -0,0 +1,316 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Renesas Ethernet Switch device driver + * + * Copyright (C) 2025 Renesas Electronics Corporation + */ + +#include <linux/err.h> +#include <linux/etherdevice.h> +#include <linux/if_bridge.h> +#include <linux/kernel.h> +#include <net/switchdev.h> + +#include "rswitch.h" +#include "rswitch_l2.h" + +static bool rdev_for_l2_offload(struct rswitch_device *rdev) +{ + return rdev->priv->offload_brdev && + rdev->brdev == rdev->priv->offload_brdev && + (test_bit(rdev->port, rdev->priv->opened_ports)); +} + +static void rswitch_change_l2_hw_offloading(struct rswitch_device *rdev, + bool start, bool learning) +{ + u32 bits = learning ? FWPC0_MACSSA | FWPC0_MACHLA | FWPC0_MACHMA : FWPC0_MACDSA; + u32 clear = start ? 0 : bits; + u32 set = start ? bits : 0; + + if ((learning && rdev->learning_offloaded == start) || + (!learning && rdev->forwarding_offloaded == start)) + return; + + rswitch_modify(rdev->priv->addr, FWPC0(rdev->port), clear, set); + + if (learning) + rdev->learning_offloaded = start; + else + rdev->forwarding_offloaded = start; + + netdev_info(rdev->ndev, "%s hw %s\n", start ? "starting" : "stopping", + learning ? "learning" : "forwarding"); +} + +static void rswitch_update_l2_hw_learning(struct rswitch_private *priv) +{ + struct rswitch_device *rdev; + bool learning_needed; + + rswitch_for_all_ports(priv, rdev) { + if (rdev_for_l2_offload(rdev)) + learning_needed = rdev->learning_requested; + else + learning_needed = false; + + rswitch_change_l2_hw_offloading(rdev, learning_needed, true); + } +} + +static void rswitch_update_l2_hw_forwarding(struct rswitch_private *priv) +{ + struct rswitch_device *rdev; + unsigned int fwd_mask; + + /* calculate fwd_mask with zeroes in bits corresponding to ports that + * shall participate in hardware forwarding + */ + fwd_mask = GENMASK(RSWITCH_NUM_AGENTS - 1, 0); + + rswitch_for_all_ports(priv, rdev) { + if (rdev_for_l2_offload(rdev) && rdev->forwarding_requested) + fwd_mask &= ~BIT(rdev->port); + } + + rswitch_for_all_ports(priv, rdev) { + if ((rdev_for_l2_offload(rdev) && rdev->forwarding_requested) || + rdev->forwarding_offloaded) { + /* Update allowed offload destinations even for ports + * with L2 offload enabled earlier. + * + * Do not allow L2 forwarding to self for hw port. + */ + iowrite32(FIELD_PREP(FWCP2_LTWFW_MASK, fwd_mask | BIT(rdev->port)), + priv->addr + FWPC2(rdev->port)); + } + + if (rdev_for_l2_offload(rdev) && + rdev->forwarding_requested && + !rdev->forwarding_offloaded) { + rswitch_change_l2_hw_offloading(rdev, true, false); + } else if (rdev->forwarding_offloaded) { + rswitch_change_l2_hw_offloading(rdev, false, false); + } + } +} + +void rswitch_update_l2_offload(struct rswitch_private *priv) +{ + rswitch_update_l2_hw_learning(priv); + rswitch_update_l2_hw_forwarding(priv); +} + +static void rswitch_update_offload_brdev(struct rswitch_private *priv) +{ + struct net_device *offload_brdev = NULL; + struct rswitch_device *rdev, *rdev2; + + rswitch_for_all_ports(priv, rdev) { + if (!rdev->brdev) + continue; + rswitch_for_all_ports(priv, rdev2) { + if (rdev2 == rdev) + break; + if (rdev2->brdev == rdev->brdev) { + offload_brdev = rdev->brdev; + break; + } + } + if (offload_brdev) + break; + } + + if (offload_brdev == priv->offload_brdev) + dev_dbg(&priv->pdev->dev, + "changing l2 offload from %s to %s\n", + netdev_name(priv->offload_brdev), + netdev_name(offload_brdev)); + else if (offload_brdev) + dev_dbg(&priv->pdev->dev, "starting l2 offload for %s\n", + netdev_name(offload_brdev)); + else if (!offload_brdev) + dev_dbg(&priv->pdev->dev, "stopping l2 offload for %s\n", + netdev_name(priv->offload_brdev)); + + priv->offload_brdev = offload_brdev; + + rswitch_update_l2_offload(priv); +} + +static bool rswitch_port_check(const struct net_device *ndev) +{ + return is_rdev(ndev); +} + +static void rswitch_port_update_brdev(struct net_device *ndev, + struct net_device *brdev) +{ + struct rswitch_device *rdev; + + if (!is_rdev(ndev)) + return; + + rdev = netdev_priv(ndev); + rdev->brdev = brdev; + rswitch_update_offload_brdev(rdev->priv); +} + +static int rswitch_port_update_stp_state(struct net_device *ndev, u8 stp_state) +{ + struct rswitch_device *rdev; + + if (!is_rdev(ndev)) + return -ENODEV; + + rdev = netdev_priv(ndev); + rdev->learning_requested = (stp_state == BR_STATE_LEARNING || + stp_state == BR_STATE_FORWARDING); + rdev->forwarding_requested = (stp_state == BR_STATE_FORWARDING); + rswitch_update_l2_offload(rdev->priv); + + return 0; +} + +static int rswitch_netdevice_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct net_device *ndev = netdev_notifier_info_to_dev(ptr); + struct netdev_notifier_changeupper_info *info; + struct net_device *brdev; + + if (!rswitch_port_check(ndev)) + return NOTIFY_DONE; + if (event != NETDEV_CHANGEUPPER) + return NOTIFY_DONE; + + info = ptr; + + if (netif_is_bridge_master(info->upper_dev)) { + brdev = info->linking ? info->upper_dev : NULL; + rswitch_port_update_brdev(ndev, brdev); + } + + return NOTIFY_OK; +} + +static int rswitch_update_ageing_time(struct net_device *ndev, clock_t time) +{ + struct rswitch_device *rdev = netdev_priv(ndev); + u32 reg_val; + + if (!is_rdev(ndev)) + return -ENODEV; + + if (!FIELD_FIT(FWMACAGC_MACAGT, time)) + return -EINVAL; + + reg_val = FIELD_PREP(FWMACAGC_MACAGT, time); + reg_val |= FWMACAGC_MACAGE | FWMACAGC_MACAGSL; + iowrite32(reg_val, rdev->priv->addr + FWMACAGC); + + return 0; +} + +static int rswitch_port_attr_set(struct net_device *ndev, const void *ctx, + const struct switchdev_attr *attr, + struct netlink_ext_ack *extack) +{ + switch (attr->id) { + case SWITCHDEV_ATTR_ID_PORT_STP_STATE: + return rswitch_port_update_stp_state(ndev, attr->u.stp_state); + case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME: + return rswitch_update_ageing_time(ndev, attr->u.ageing_time); + default: + return -EOPNOTSUPP; + } +} + +static int rswitch_switchdev_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct net_device *ndev = switchdev_notifier_info_to_dev(ptr); + int ret; + + if (event == SWITCHDEV_PORT_ATTR_SET) { + ret = switchdev_handle_port_attr_set(ndev, ptr, + rswitch_port_check, + rswitch_port_attr_set); + return notifier_from_errno(ret); + } + + if (!rswitch_port_check(ndev)) + return NOTIFY_DONE; + + return notifier_from_errno(-EOPNOTSUPP); +} + +static int rswitch_switchdev_blocking_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct net_device *ndev = switchdev_notifier_info_to_dev(ptr); + int ret; + + switch (event) { + case SWITCHDEV_PORT_OBJ_ADD: + return -EOPNOTSUPP; + case SWITCHDEV_PORT_OBJ_DEL: + return -EOPNOTSUPP; + case SWITCHDEV_PORT_ATTR_SET: + ret = switchdev_handle_port_attr_set(ndev, ptr, + rswitch_port_check, + rswitch_port_attr_set); + break; + default: + if (!rswitch_port_check(ndev)) + return NOTIFY_DONE; + ret = -EOPNOTSUPP; + } + + return notifier_from_errno(ret); +} + +static struct notifier_block rswitch_netdevice_nb = { + .notifier_call = rswitch_netdevice_event, +}; + +static struct notifier_block rswitch_switchdev_nb = { + .notifier_call = rswitch_switchdev_event, +}; + +static struct notifier_block rswitch_switchdev_blocking_nb = { + .notifier_call = rswitch_switchdev_blocking_event, +}; + +int rswitch_register_notifiers(void) +{ + int ret; + + ret = register_netdevice_notifier(&rswitch_netdevice_nb); + if (ret) + goto register_netdevice_notifier_failed; + + ret = register_switchdev_notifier(&rswitch_switchdev_nb); + if (ret) + goto register_switchdev_notifier_failed; + + ret = register_switchdev_blocking_notifier(&rswitch_switchdev_blocking_nb); + if (ret) + goto register_switchdev_blocking_notifier_failed; + + return 0; + +register_switchdev_blocking_notifier_failed: + unregister_switchdev_notifier(&rswitch_switchdev_nb); +register_switchdev_notifier_failed: + unregister_netdevice_notifier(&rswitch_netdevice_nb); +register_netdevice_notifier_failed: + + return ret; +} + +void rswitch_unregister_notifiers(void) +{ + unregister_switchdev_blocking_notifier(&rswitch_switchdev_blocking_nb); + unregister_switchdev_notifier(&rswitch_switchdev_nb); + unregister_netdevice_notifier(&rswitch_netdevice_nb); +} diff --git a/drivers/net/ethernet/renesas/rswitch_l2.h b/drivers/net/ethernet/renesas/rswitch_l2.h new file mode 100644 index 000000000000..57050ede8f31 --- /dev/null +++ b/drivers/net/ethernet/renesas/rswitch_l2.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Renesas Ethernet Switch device driver + * + * Copyright (C) 2025 Renesas Electronics Corporation + */ + +#ifndef __RSWITCH_L2_H__ +#define __RSWITCH_L2_H__ + +void rswitch_update_l2_offload(struct rswitch_private *priv); + +int rswitch_register_notifiers(void); +void rswitch_unregister_notifiers(void); + +#endif /* #ifndef __RSWITCH_L2_H__ */ diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch_main.c index aba772e14555..59dceb81607c 100644 --- a/drivers/net/ethernet/renesas/rswitch.c +++ b/drivers/net/ethernet/renesas/rswitch_main.c @@ -1,15 +1,18 @@ // SPDX-License-Identifier: GPL-2.0 /* Renesas Ethernet Switch device driver * - * Copyright (C) 2022 Renesas Electronics Corporation + * Copyright (C) 2022-2025 Renesas Electronics Corporation */ #include <linux/clk.h> #include <linux/dma-mapping.h> #include <linux/err.h> #include <linux/etherdevice.h> +#include <linux/ethtool.h> +#include <linux/ip.h> #include <linux/iopoll.h> #include <linux/kernel.h> +#include <linux/list.h> #include <linux/module.h> #include <linux/net_tstamp.h> #include <linux/of.h> @@ -25,6 +28,7 @@ #include <linux/sys_soc.h> #include "rswitch.h" +#include "rswitch_l2.h" static int rswitch_reg_wait(void __iomem *addr, u32 offs, u32 mask, u32 expected) { @@ -34,7 +38,7 @@ static int rswitch_reg_wait(void __iomem *addr, u32 offs, u32 mask, u32 expected 1, RSWITCH_TIMEOUT_US); } -static void rswitch_modify(void __iomem *addr, enum rswitch_reg reg, u32 clear, u32 set) +void rswitch_modify(void __iomem *addr, enum rswitch_reg reg, u32 clear, u32 set) { iowrite32((ioread32(addr + reg) & ~clear) | set, addr + reg); } @@ -109,10 +113,11 @@ static void rswitch_top_init(struct rswitch_private *priv) } /* Forwarding engine block (MFWD) */ -static void rswitch_fwd_init(struct rswitch_private *priv) +static int rswitch_fwd_init(struct rswitch_private *priv) { u32 all_ports_mask = GENMASK(RSWITCH_NUM_AGENTS - 1, 0); unsigned int i; + u32 reg_val; /* Start with empty configuration */ for (i = 0; i < RSWITCH_NUM_AGENTS; i++) { @@ -128,6 +133,14 @@ static void rswitch_fwd_init(struct rswitch_private *priv) iowrite32(0, priv->addr + FWPBFC(i)); } + /* Configure MAC table aging */ + rswitch_modify(priv->addr, FWMACAGUSPC, FWMACAGUSPC_MACAGUSP, + FIELD_PREP(FWMACAGUSPC_MACAGUSP, RSW_AGEING_CLK_PER_US)); + + reg_val = FIELD_PREP(FWMACAGC_MACAGT, RSW_AGEING_TIME); + reg_val |= FWMACAGC_MACAGE | FWMACAGC_MACAGSL; + iowrite32(reg_val, priv->addr + FWMACAGC); + /* For enabled ETHA ports, setup port based forwarding */ rswitch_for_each_enabled_port(priv, i) { /* Port based forwarding from port i to GWCA port */ @@ -140,6 +153,16 @@ static void rswitch_fwd_init(struct rswitch_private *priv) /* For GWCA port, allow direct descriptor forwarding */ rswitch_modify(priv->addr, FWPC1(priv->gwca.index), FWPC1_DDE, FWPC1_DDE); + + /* Initialize hardware L2 forwarding table */ + + /* Allow entire table to be used for "unsecure" entries */ + rswitch_modify(priv->addr, FWMACHEC, 0, FWMACHEC_MACHMUE_MASK); + + /* Initialize MAC hash table */ + iowrite32(FWMACTIM_MACTIOG, priv->addr + FWMACTIM); + + return rswitch_reg_wait(priv->addr, FWMACTIM, FWMACTIM_MACTIOG, 0); } /* Gateway CPU agent block (GWCA) */ @@ -1602,6 +1625,9 @@ static int rswitch_open(struct net_device *ndev) netif_start_queue(ndev); + if (rdev->brdev) + rswitch_update_l2_offload(rdev->priv); + return 0; }; @@ -1624,6 +1650,9 @@ static int rswitch_stop(struct net_device *ndev) napi_disable(&rdev->napi); + if (rdev->brdev) + rswitch_update_l2_offload(rdev->priv); + if (bitmap_empty(rdev->priv->opened_ports, RSWITCH_NUM_PORTS)) iowrite32(GWCA_TS_IRQ_BIT, rdev->priv->addr + GWTSDID); @@ -1850,16 +1879,46 @@ static int rswitch_eth_ioctl(struct net_device *ndev, struct ifreq *req, int cmd } } +static int rswitch_get_port_parent_id(struct net_device *ndev, + struct netdev_phys_item_id *ppid) +{ + struct rswitch_device *rdev = netdev_priv(ndev); + const char *name; + + name = dev_name(&rdev->priv->pdev->dev); + ppid->id_len = min_t(size_t, strlen(name), sizeof(ppid->id)); + memcpy(ppid->id, name, ppid->id_len); + + return 0; +} + +static int rswitch_get_phys_port_name(struct net_device *ndev, + char *name, size_t len) +{ + struct rswitch_device *rdev = netdev_priv(ndev); + + snprintf(name, len, "tsn%d", rdev->port); + + return 0; +} + static const struct net_device_ops rswitch_netdev_ops = { .ndo_open = rswitch_open, .ndo_stop = rswitch_stop, .ndo_start_xmit = rswitch_start_xmit, .ndo_get_stats = rswitch_get_stats, .ndo_eth_ioctl = rswitch_eth_ioctl, + .ndo_get_port_parent_id = rswitch_get_port_parent_id, + .ndo_get_phys_port_name = rswitch_get_phys_port_name, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, }; +bool is_rdev(const struct net_device *ndev) +{ + return (ndev->netdev_ops == &rswitch_netdev_ops); +} + static int rswitch_get_ts_info(struct net_device *ndev, struct kernel_ethtool_ts_info *info) { struct rswitch_device *rdev = netdev_priv(ndev); @@ -1959,6 +2018,8 @@ static int rswitch_device_alloc(struct rswitch_private *priv, unsigned int index if (err < 0) goto out_txdmac; + list_add_tail(&rdev->list, &priv->port_list); + return 0; out_txdmac: @@ -1978,6 +2039,7 @@ static void rswitch_device_free(struct rswitch_private *priv, unsigned int index struct rswitch_device *rdev = priv->rdev[index]; struct net_device *ndev = rdev->ndev; + list_del(&rdev->list); rswitch_txdmac_free(ndev); rswitch_rxdmac_free(ndev); of_node_put(rdev->np_port); @@ -2024,7 +2086,9 @@ static int rswitch_init(struct rswitch_private *priv) } } - rswitch_fwd_init(priv); + err = rswitch_fwd_init(priv); + if (err < 0) + goto err_fwd_init; err = rcar_gen4_ptp_register(priv->ptp_priv, RCAR_GEN4_PTP_REG_LAYOUT, clk_get_rate(priv->clk)); @@ -2073,6 +2137,7 @@ err_gwca_ts_request_irq: err_gwca_request_irq: rcar_gen4_ptp_unregister(priv->ptp_priv); +err_fwd_init: err_ptp_register: for (i = 0; i < RSWITCH_NUM_PORTS; i++) rswitch_device_free(priv, i); @@ -2107,6 +2172,7 @@ static int renesas_eth_sw_probe(struct platform_device *pdev) priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; + spin_lock_init(&priv->lock); priv->clk = devm_clk_get(&pdev->dev, NULL); @@ -2144,6 +2210,8 @@ static int renesas_eth_sw_probe(struct platform_device *pdev) if (!priv->gwca.queues) return -ENOMEM; + INIT_LIST_HEAD(&priv->port_list); + pm_runtime_enable(&pdev->dev); pm_runtime_get_sync(&pdev->dev); @@ -2154,6 +2222,15 @@ static int renesas_eth_sw_probe(struct platform_device *pdev) return ret; } + if (list_empty(&priv->port_list)) + dev_warn(&pdev->dev, "could not initialize any ports\n"); + + ret = rswitch_register_notifiers(); + if (ret) { + dev_err(&pdev->dev, "could not register notifiers\n"); + return ret; + } + device_set_wakeup_capable(&pdev->dev, 1); return ret; @@ -2187,6 +2264,7 @@ static void renesas_eth_sw_remove(struct platform_device *pdev) { struct rswitch_private *priv = platform_get_drvdata(pdev); + rswitch_unregister_notifiers(); rswitch_deinit(priv); pm_runtime_put(&pdev->dev); diff --git a/drivers/net/ethernet/sfc/efx_channels.c b/drivers/net/ethernet/sfc/efx_channels.c index 06b4f52713ef..0f66324ed351 100644 --- a/drivers/net/ethernet/sfc/efx_channels.c +++ b/drivers/net/ethernet/sfc/efx_channels.c @@ -216,8 +216,8 @@ static int efx_allocate_msix_channels(struct efx_nic *efx, if (efx_separate_tx_channels) { efx->n_tx_channels = - min(max(n_channels / 2, 1U), - efx->max_tx_channels); + clamp(n_channels / 2, 1U, + efx->max_tx_channels); efx->tx_channel_offset = n_channels - efx->n_tx_channels; efx->n_rx_channels = diff --git a/drivers/net/ethernet/sfc/falcon/efx.c b/drivers/net/ethernet/sfc/falcon/efx.c index b07f7e4e2877..d19fbf8732ff 100644 --- a/drivers/net/ethernet/sfc/falcon/efx.c +++ b/drivers/net/ethernet/sfc/falcon/efx.c @@ -1394,9 +1394,8 @@ static int ef4_probe_interrupts(struct ef4_nic *efx) if (n_channels > extra_channels) n_channels -= extra_channels; if (ef4_separate_tx_channels) { - efx->n_tx_channels = min(max(n_channels / 2, - 1U), - efx->max_tx_channels); + efx->n_tx_channels = clamp(n_channels / 2, 1U, + efx->max_tx_channels); efx->n_rx_channels = max(n_channels - efx->n_tx_channels, 1U); diff --git a/drivers/net/ethernet/sfc/siena/efx_channels.c b/drivers/net/ethernet/sfc/siena/efx_channels.c index d120b3c83ac0..703419866d18 100644 --- a/drivers/net/ethernet/sfc/siena/efx_channels.c +++ b/drivers/net/ethernet/sfc/siena/efx_channels.c @@ -217,8 +217,8 @@ static int efx_allocate_msix_channels(struct efx_nic *efx, if (efx_siena_separate_tx_channels) { efx->n_tx_channels = - min(max(n_channels / 2, 1U), - efx->max_tx_channels); + clamp(n_channels / 2, 1U, + efx->max_tx_channels); efx->tx_channel_offset = n_channels - efx->n_tx_channels; efx->n_rx_channels = diff --git a/drivers/net/ethernet/sfc/tc_encap_actions.c b/drivers/net/ethernet/sfc/tc_encap_actions.c index e872f926e438..eef06e48185d 100644 --- a/drivers/net/ethernet/sfc/tc_encap_actions.c +++ b/drivers/net/ethernet/sfc/tc_encap_actions.c @@ -11,6 +11,8 @@ #include "tc_encap_actions.h" #include "tc.h" #include "mae.h" +#include <net/flow.h> +#include <net/inet_dscp.h> #include <net/vxlan.h> #include <net/geneve.h> #include <net/netevent.h> @@ -99,7 +101,7 @@ static int efx_bind_neigh(struct efx_nic *efx, case EFX_ENCAP_TYPE_GENEVE: flow4.flowi4_proto = IPPROTO_UDP; flow4.fl4_dport = encap->key.tp_dst; - flow4.flowi4_tos = encap->key.tos; + flow4.flowi4_dscp = inet_dsfield_to_dscp(encap->key.tos); flow4.daddr = encap->key.u.ipv4.dst; flow4.saddr = encap->key.u.ipv4.src; break; diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index cbffccb3b9af..eaa1f2e1c5a5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -602,7 +602,6 @@ struct mac_device_info { unsigned int mcast_bits_log2; unsigned int rx_csum; unsigned int pcs; - unsigned int pmt; unsigned int ps; unsigned int xlgmac; unsigned int num_vlan; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c index 889e2bb6f7f5..80200a6aa0cb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c @@ -49,7 +49,7 @@ struct imx_dwmac_ops { u32 flags; bool mac_rgmii_txclk_auto_adj; - int (*fix_soc_reset)(void *priv, void __iomem *ioaddr); + int (*fix_soc_reset)(struct stmmac_priv *priv, void __iomem *ioaddr); int (*set_intf_mode)(struct plat_stmmacenet_data *plat_dat); void (*fix_mac_speed)(void *priv, int speed, unsigned int mode); }; @@ -265,9 +265,9 @@ static void imx93_dwmac_fix_speed(void *priv, int speed, unsigned int mode) writel(old_ctrl, dwmac->base_addr + MAC_CTRL_REG); } -static int imx_dwmac_mx93_reset(void *priv, void __iomem *ioaddr) +static int imx_dwmac_mx93_reset(struct stmmac_priv *priv, void __iomem *ioaddr) { - struct plat_stmmacenet_data *plat_dat = priv; + struct plat_stmmacenet_data *plat_dat = priv->plat; u32 value = readl(ioaddr + DMA_BUS_MODE); /* DMA SW reset */ @@ -301,6 +301,7 @@ imx_dwmac_parse_dt(struct imx_priv_data *dwmac, struct device *dev) dwmac->clk_mem = NULL; if (of_machine_is_compatible("fsl,imx8dxl") || + of_machine_is_compatible("fsl,imx91") || of_machine_is_compatible("fsl,imx93")) { dwmac->clk_mem = devm_clk_get(dev, "mem"); if (IS_ERR(dwmac->clk_mem)) { @@ -310,9 +311,10 @@ imx_dwmac_parse_dt(struct imx_priv_data *dwmac, struct device *dev) } if (of_machine_is_compatible("fsl,imx8mp") || + of_machine_is_compatible("fsl,imx91") || of_machine_is_compatible("fsl,imx93")) { /* Binding doc describes the propety: - * is required by i.MX8MP, i.MX93. + * is required by i.MX8MP, i.MX91, i.MX93. * is optinoal for i.MX8DXL. */ dwmac->intf_regmap = diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index ea33ae39be6b..3fac3945cbfa 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -1231,6 +1231,37 @@ static int stmmac_config_multi_msi(struct pci_dev *pdev, return 0; } +static int intel_eth_pci_suspend(struct device *dev, void *bsp_priv) +{ + struct pci_dev *pdev = to_pci_dev(dev); + int ret; + + ret = pci_save_state(pdev); + if (ret) + return ret; + + pci_wake_from_d3(pdev, true); + pci_set_power_state(pdev, PCI_D3hot); + return 0; +} + +static int intel_eth_pci_resume(struct device *dev, void *bsp_priv) +{ + struct pci_dev *pdev = to_pci_dev(dev); + int ret; + + pci_restore_state(pdev); + pci_set_power_state(pdev, PCI_D0); + + ret = pcim_enable_device(pdev); + if (ret) + return ret; + + pci_set_master(pdev); + + return 0; +} + /** * intel_eth_pci_probe * @@ -1292,6 +1323,9 @@ static int intel_eth_pci_probe(struct pci_dev *pdev, pci_set_master(pdev); plat->bsp_priv = intel_priv; + plat->suspend = intel_eth_pci_suspend; + plat->resume = intel_eth_pci_resume; + intel_priv->mdio_adhoc_addr = INTEL_MGBE_ADHOC_ADDR; intel_priv->crossts_adj = 1; @@ -1355,44 +1389,6 @@ static void intel_eth_pci_remove(struct pci_dev *pdev) clk_unregister_fixed_rate(priv->plat->stmmac_clk); } -static int __maybe_unused intel_eth_pci_suspend(struct device *dev) -{ - struct pci_dev *pdev = to_pci_dev(dev); - int ret; - - ret = stmmac_suspend(dev); - if (ret) - return ret; - - ret = pci_save_state(pdev); - if (ret) - return ret; - - pci_wake_from_d3(pdev, true); - pci_set_power_state(pdev, PCI_D3hot); - return 0; -} - -static int __maybe_unused intel_eth_pci_resume(struct device *dev) -{ - struct pci_dev *pdev = to_pci_dev(dev); - int ret; - - pci_restore_state(pdev); - pci_set_power_state(pdev, PCI_D0); - - ret = pcim_enable_device(pdev); - if (ret) - return ret; - - pci_set_master(pdev); - - return stmmac_resume(dev); -} - -static SIMPLE_DEV_PM_OPS(intel_eth_pm_ops, intel_eth_pci_suspend, - intel_eth_pci_resume); - #define PCI_DEVICE_ID_INTEL_QUARK 0x0937 #define PCI_DEVICE_ID_INTEL_EHL_RGMII1G 0x4b30 #define PCI_DEVICE_ID_INTEL_EHL_SGMII1G 0x4b31 @@ -1442,7 +1438,7 @@ static struct pci_driver intel_eth_pci_driver = { .probe = intel_eth_pci_probe, .remove = intel_eth_pci_remove, .driver = { - .pm = &intel_eth_pm_ops, + .pm = &stmmac_simple_pm_ops, }, }; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c index e1591e6217d4..6fca0fca4892 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c @@ -509,10 +509,15 @@ static int loongson_dwmac_acpi_config(struct pci_dev *pdev, } /* Loongson's DWMAC device may take nearly two seconds to complete DMA reset */ -static int loongson_dwmac_fix_reset(void *priv, void __iomem *ioaddr) +static int loongson_dwmac_fix_reset(struct stmmac_priv *priv, void __iomem *ioaddr) { u32 value = readl(ioaddr + DMA_BUS_MODE); + if (value & DMA_BUS_MODE_SFT_RESET) { + netdev_err(priv->dev, "the PHY clock is missing\n"); + return -EINVAL; + } + value |= DMA_BUS_MODE_SFT_RESET; writel(value, ioaddr + DMA_BUS_MODE); @@ -521,6 +526,37 @@ static int loongson_dwmac_fix_reset(void *priv, void __iomem *ioaddr) 10000, 2000000); } +static int loongson_dwmac_suspend(struct device *dev, void *bsp_priv) +{ + struct pci_dev *pdev = to_pci_dev(dev); + int ret; + + ret = pci_save_state(pdev); + if (ret) + return ret; + + pci_disable_device(pdev); + pci_wake_from_d3(pdev, true); + return 0; +} + +static int loongson_dwmac_resume(struct device *dev, void *bsp_priv) +{ + struct pci_dev *pdev = to_pci_dev(dev); + int ret; + + pci_restore_state(pdev); + pci_set_power_state(pdev, PCI_D0); + + ret = pci_enable_device(pdev); + if (ret) + return ret; + + pci_set_master(pdev); + + return 0; +} + static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct plat_stmmacenet_data *plat; @@ -565,6 +601,8 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id plat->bsp_priv = ld; plat->setup = loongson_dwmac_setup; plat->fix_soc_reset = loongson_dwmac_fix_reset; + plat->suspend = loongson_dwmac_suspend; + plat->resume = loongson_dwmac_resume; ld->dev = &pdev->dev; ld->loongson_id = readl(res.addr + GMAC_VERSION) & 0xff; @@ -621,44 +659,6 @@ static void loongson_dwmac_remove(struct pci_dev *pdev) pci_disable_device(pdev); } -static int __maybe_unused loongson_dwmac_suspend(struct device *dev) -{ - struct pci_dev *pdev = to_pci_dev(dev); - int ret; - - ret = stmmac_suspend(dev); - if (ret) - return ret; - - ret = pci_save_state(pdev); - if (ret) - return ret; - - pci_disable_device(pdev); - pci_wake_from_d3(pdev, true); - return 0; -} - -static int __maybe_unused loongson_dwmac_resume(struct device *dev) -{ - struct pci_dev *pdev = to_pci_dev(dev); - int ret; - - pci_restore_state(pdev); - pci_set_power_state(pdev, PCI_D0); - - ret = pci_enable_device(pdev); - if (ret) - return ret; - - pci_set_master(pdev); - - return stmmac_resume(dev); -} - -static SIMPLE_DEV_PM_OPS(loongson_dwmac_pm_ops, loongson_dwmac_suspend, - loongson_dwmac_resume); - static const struct pci_device_id loongson_dwmac_id_table[] = { { PCI_DEVICE_DATA(LOONGSON, GMAC1, &loongson_gmac_pci_info) }, { PCI_DEVICE_DATA(LOONGSON, GMAC2, &loongson_gmac_pci_info) }, @@ -673,7 +673,7 @@ static struct pci_driver loongson_dwmac_driver = { .probe = loongson_dwmac_probe, .remove = loongson_dwmac_remove, .driver = { - .pm = &loongson_dwmac_pm_ops, + .pm = &stmmac_simple_pm_ops, }, }; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c index 39421d6a34e4..f1b36f0a401d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c @@ -523,7 +523,7 @@ static int mediatek_dwmac_clk_init(struct mediatek_dwmac_plat_data *plat) return ret; } -static int mediatek_dwmac_init(struct platform_device *pdev, void *priv) +static int mediatek_dwmac_init(struct device *dev, void *priv) { struct mediatek_dwmac_plat_data *plat = priv; const struct mediatek_dwmac_variant *variant = plat->variant; @@ -532,7 +532,7 @@ static int mediatek_dwmac_init(struct platform_device *pdev, void *priv) if (variant->dwmac_set_phy_interface) { ret = variant->dwmac_set_phy_interface(plat); if (ret) { - dev_err(plat->dev, "failed to set phy interface, err = %d\n", ret); + dev_err(dev, "failed to set phy interface, err = %d\n", ret); return ret; } } @@ -540,7 +540,7 @@ static int mediatek_dwmac_init(struct platform_device *pdev, void *priv) if (variant->dwmac_set_delay) { ret = variant->dwmac_set_delay(plat); if (ret) { - dev_err(plat->dev, "failed to set delay value, err = %d\n", ret); + dev_err(dev, "failed to set delay value, err = %d\n", ret); return ret; } } @@ -589,7 +589,7 @@ static int mediatek_dwmac_common_data(struct platform_device *pdev, plat->maxmtu = ETH_DATA_LEN; plat->host_dma_width = priv_plat->variant->dma_bit_mask; plat->bsp_priv = priv_plat; - plat->init = mediatek_dwmac_init; + plat->resume = mediatek_dwmac_init; plat->clks_config = mediatek_dwmac_clks_config; plat->safety_feat_cfg = devm_kzalloc(&pdev->dev, @@ -654,7 +654,7 @@ static int mediatek_dwmac_probe(struct platform_device *pdev) return PTR_ERR(plat_dat); mediatek_dwmac_common_data(pdev, plat_dat, priv_plat); - mediatek_dwmac_init(pdev, priv_plat); + mediatek_dwmac_init(&pdev->dev, priv_plat); ret = mediatek_dwmac_clks_config(priv_plat, true); if (ret) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index f6687c2f30f6..266c53379236 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -71,7 +71,6 @@ struct rk_priv_data { phy_interface_t phy_iface; int id; struct regulator *regulator; - bool suspended; const struct rk_gmac_ops *ops; bool clk_enabled; @@ -557,9 +556,7 @@ static const struct rk_gmac_ops rk3308_ops = { #define RK3328_GMAC_RMII_MODE GRF_BIT(9) #define RK3328_GMAC_RMII_MODE_CLR GRF_CLR_BIT(9) #define RK3328_GMAC_TXCLK_DLY_ENABLE GRF_BIT(0) -#define RK3328_GMAC_TXCLK_DLY_DISABLE GRF_CLR_BIT(0) #define RK3328_GMAC_RXCLK_DLY_ENABLE GRF_BIT(1) -#define RK3328_GMAC_RXCLK_DLY_DISABLE GRF_CLR_BIT(0) /* RK3328_GRF_MACPHY_CON1 */ #define RK3328_MACPHY_RMII_MODE GRF_BIT(9) @@ -1413,12 +1410,15 @@ static int rk_gmac_clk_init(struct plat_stmmacenet_data *plat) clk_set_rate(plat->stmmac_clk, 50000000); } - if (plat->phy_node && bsp_priv->integrated_phy) { + if (plat->phy_node) { bsp_priv->clk_phy = of_clk_get(plat->phy_node, 0); ret = PTR_ERR_OR_ZERO(bsp_priv->clk_phy); - if (ret) - return dev_err_probe(dev, ret, "Cannot get PHY clock\n"); - clk_set_rate(bsp_priv->clk_phy, 50000000); + /* If it is not integrated_phy, clk_phy is optional */ + if (bsp_priv->integrated_phy) { + if (ret) + return dev_err_probe(dev, ret, "Cannot get PHY clock\n"); + clk_set_rate(bsp_priv->clk_phy, 50000000); + } } return 0; @@ -1706,6 +1706,28 @@ static int rk_set_clk_tx_rate(void *bsp_priv_, struct clk *clk_tx_i, return -EINVAL; } +static int rk_gmac_suspend(struct device *dev, void *bsp_priv_) +{ + struct rk_priv_data *bsp_priv = bsp_priv_; + + /* Keep the PHY up if we use Wake-on-Lan. */ + if (!device_may_wakeup(dev)) + rk_gmac_powerdown(bsp_priv); + + return 0; +} + +static int rk_gmac_resume(struct device *dev, void *bsp_priv_) +{ + struct rk_priv_data *bsp_priv = bsp_priv_; + + /* The PHY was up for Wake-on-Lan. */ + if (!device_may_wakeup(dev)) + rk_gmac_powerup(bsp_priv); + + return 0; +} + static int rk_gmac_probe(struct platform_device *pdev) { struct plat_stmmacenet_data *plat_dat; @@ -1738,6 +1760,8 @@ static int rk_gmac_probe(struct platform_device *pdev) plat_dat->get_interfaces = rk_get_interfaces; plat_dat->set_clk_tx_rate = rk_set_clk_tx_rate; + plat_dat->suspend = rk_gmac_suspend; + plat_dat->resume = rk_gmac_resume; plat_dat->bsp_priv = rk_gmac_setup(pdev, plat_dat, data); if (IS_ERR(plat_dat->bsp_priv)) @@ -1776,37 +1800,6 @@ static void rk_gmac_remove(struct platform_device *pdev) clk_put(bsp_priv->clk_phy); } -#ifdef CONFIG_PM_SLEEP -static int rk_gmac_suspend(struct device *dev) -{ - struct rk_priv_data *bsp_priv = get_stmmac_bsp_priv(dev); - int ret = stmmac_suspend(dev); - - /* Keep the PHY up if we use Wake-on-Lan. */ - if (!device_may_wakeup(dev)) { - rk_gmac_powerdown(bsp_priv); - bsp_priv->suspended = true; - } - - return ret; -} - -static int rk_gmac_resume(struct device *dev) -{ - struct rk_priv_data *bsp_priv = get_stmmac_bsp_priv(dev); - - /* The PHY was up for Wake-on-Lan. */ - if (bsp_priv->suspended) { - rk_gmac_powerup(bsp_priv); - bsp_priv->suspended = false; - } - - return stmmac_resume(dev); -} -#endif /* CONFIG_PM_SLEEP */ - -static SIMPLE_DEV_PM_OPS(rk_gmac_pm_ops, rk_gmac_suspend, rk_gmac_resume); - static const struct of_device_id rk_gmac_dwmac_match[] = { { .compatible = "rockchip,px30-gmac", .data = &px30_ops }, { .compatible = "rockchip,rk3128-gmac", .data = &rk3128_ops }, @@ -1832,7 +1825,7 @@ static struct platform_driver rk_gmac_dwmac_driver = { .remove = rk_gmac_remove, .driver = { .name = "rk_gmac-dwmac", - .pm = &rk_gmac_pm_ops, + .pm = &stmmac_simple_pm_ops, .of_match_table = rk_gmac_dwmac_match, }, }; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c index 1eb16eec9c0d..77a04c4579c9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c @@ -498,6 +498,26 @@ static int stm32mp1_parse_data(struct stm32_dwmac *dwmac, return err; } +static int stm32_dwmac_suspend(struct device *dev, void *bsp_priv) +{ + struct stm32_dwmac *dwmac = bsp_priv; + + stm32_dwmac_clk_disable(dwmac); + + return dwmac->ops->suspend ? dwmac->ops->suspend(dwmac) : 0; +} + +static int stm32_dwmac_resume(struct device *dev, void *bsp_priv) +{ + struct stmmac_priv *priv = netdev_priv(dev_get_drvdata(dev)); + struct stm32_dwmac *dwmac = bsp_priv; + + if (dwmac->ops->resume) + dwmac->ops->resume(dwmac); + + return stm32_dwmac_init(priv->plat); +} + static int stm32_dwmac_probe(struct platform_device *pdev) { struct plat_stmmacenet_data *plat_dat; @@ -535,6 +555,8 @@ static int stm32_dwmac_probe(struct platform_device *pdev) plat_dat->flags |= STMMAC_FLAG_EN_TX_LPI_CLK_PHY_CAP; plat_dat->bsp_priv = dwmac; + plat_dat->suspend = stm32_dwmac_suspend; + plat_dat->resume = stm32_dwmac_resume; ret = stm32_dwmac_init(plat_dat); if (ret) @@ -600,50 +622,6 @@ static void stm32mp1_resume(struct stm32_dwmac *dwmac) clk_disable_unprepare(dwmac->clk_ethstp); } -#ifdef CONFIG_PM_SLEEP -static int stm32_dwmac_suspend(struct device *dev) -{ - struct net_device *ndev = dev_get_drvdata(dev); - struct stmmac_priv *priv = netdev_priv(ndev); - struct stm32_dwmac *dwmac = priv->plat->bsp_priv; - - int ret; - - ret = stmmac_suspend(dev); - if (ret) - return ret; - - stm32_dwmac_clk_disable(dwmac); - - if (dwmac->ops->suspend) - ret = dwmac->ops->suspend(dwmac); - - return ret; -} - -static int stm32_dwmac_resume(struct device *dev) -{ - struct net_device *ndev = dev_get_drvdata(dev); - struct stmmac_priv *priv = netdev_priv(ndev); - struct stm32_dwmac *dwmac = priv->plat->bsp_priv; - int ret; - - if (dwmac->ops->resume) - dwmac->ops->resume(dwmac); - - ret = stm32_dwmac_init(priv->plat); - if (ret) - return ret; - - ret = stmmac_resume(dev); - - return ret; -} -#endif /* CONFIG_PM_SLEEP */ - -static SIMPLE_DEV_PM_OPS(stm32_dwmac_pm_ops, - stm32_dwmac_suspend, stm32_dwmac_resume); - static struct stm32_ops stm32mcu_dwmac_data = { .set_mode = stm32mcu_set_mode }; @@ -691,7 +669,7 @@ static struct platform_driver stm32_dwmac_driver = { .remove = stm32_dwmac_remove, .driver = { .name = "stm32-dwmac", - .pm = &stm32_dwmac_pm_ops, + .pm = &stmmac_simple_pm_ops, .of_match_table = stm32_dwmac_match, }, }; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index 2796dc426943..690f3650f84e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -31,10 +31,6 @@ */ /* struct emac_variant - Describe dwmac-sun8i hardware variant - * @default_syscon_value: The default value of the EMAC register in syscon - * This value is used for disabling properly EMAC - * and used as a good starting value in case of the - * boot process(uboot) leave some stuff. * @syscon_field reg_field for the syscon's gmac register * @soc_has_internal_phy: Does the MAC embed an internal PHY * @support_mii: Does the MAC handle MII @@ -48,7 +44,6 @@ * value of zero indicates this is not supported. */ struct emac_variant { - u32 default_syscon_value; const struct reg_field *syscon_field; bool soc_has_internal_phy; bool support_mii; @@ -94,7 +89,6 @@ static const struct reg_field sun8i_ccu_reg_field = { }; static const struct emac_variant emac_variant_h3 = { - .default_syscon_value = 0x58000, .syscon_field = &sun8i_syscon_reg_field, .soc_has_internal_phy = true, .support_mii = true, @@ -105,14 +99,12 @@ static const struct emac_variant emac_variant_h3 = { }; static const struct emac_variant emac_variant_v3s = { - .default_syscon_value = 0x38000, .syscon_field = &sun8i_syscon_reg_field, .soc_has_internal_phy = true, .support_mii = true }; static const struct emac_variant emac_variant_a83t = { - .default_syscon_value = 0, .syscon_field = &sun8i_syscon_reg_field, .soc_has_internal_phy = false, .support_mii = true, @@ -122,7 +114,6 @@ static const struct emac_variant emac_variant_a83t = { }; static const struct emac_variant emac_variant_r40 = { - .default_syscon_value = 0, .syscon_field = &sun8i_ccu_reg_field, .support_mii = true, .support_rgmii = true, @@ -130,7 +121,6 @@ static const struct emac_variant emac_variant_r40 = { }; static const struct emac_variant emac_variant_a64 = { - .default_syscon_value = 0, .syscon_field = &sun8i_syscon_reg_field, .soc_has_internal_phy = false, .support_mii = true, @@ -141,7 +131,6 @@ static const struct emac_variant emac_variant_a64 = { }; static const struct emac_variant emac_variant_h6 = { - .default_syscon_value = 0x50000, .syscon_field = &sun8i_syscon_reg_field, /* The "Internal PHY" of H6 is not on the die. It's on the * co-packaged AC200 chip instead. @@ -933,25 +922,11 @@ static int sun8i_dwmac_set_syscon(struct device *dev, struct sunxi_priv_data *gmac = plat->bsp_priv; struct device_node *node = dev->of_node; int ret; - u32 reg, val; - - ret = regmap_field_read(gmac->regmap_field, &val); - if (ret) { - dev_err(dev, "Fail to read from regmap field.\n"); - return ret; - } - - reg = gmac->variant->default_syscon_value; - if (reg != val) - dev_warn(dev, - "Current syscon value is not the default %x (expect %x)\n", - val, reg); + u32 reg = 0, val; if (gmac->variant->soc_has_internal_phy) { if (of_property_read_bool(node, "allwinner,leds-active-low")) reg |= H3_EPHY_LED_POL; - else - reg &= ~H3_EPHY_LED_POL; /* Force EPHY xtal frequency to 24MHz. */ reg |= H3_EPHY_CLK_SEL; @@ -965,11 +940,6 @@ static int sun8i_dwmac_set_syscon(struct device *dev, * address. No need to mask it again. */ reg |= ret << H3_EPHY_ADDR_SHIFT; - } else { - /* For SoCs without internal PHY the PHY selection bit should be - * set to 0 (external PHY). - */ - reg &= ~H3_EPHY_SELECT; } if (!of_property_read_u32(node, "allwinner,tx-delay-ps", &val)) { @@ -980,8 +950,6 @@ static int sun8i_dwmac_set_syscon(struct device *dev, val /= 100; dev_dbg(dev, "set tx-delay to %x\n", val); if (val <= gmac->variant->tx_delay_max) { - reg &= ~(gmac->variant->tx_delay_max << - SYSCON_ETXDC_SHIFT); reg |= (val << SYSCON_ETXDC_SHIFT); } else { dev_err(dev, "Invalid TX clock delay: %d\n", @@ -998,8 +966,6 @@ static int sun8i_dwmac_set_syscon(struct device *dev, val /= 100; dev_dbg(dev, "set rx-delay to %x\n", val); if (val <= gmac->variant->rx_delay_max) { - reg &= ~(gmac->variant->rx_delay_max << - SYSCON_ERXDC_SHIFT); reg |= (val << SYSCON_ERXDC_SHIFT); } else { dev_err(dev, "Invalid RX clock delay: %d\n", @@ -1008,11 +974,6 @@ static int sun8i_dwmac_set_syscon(struct device *dev, } } - /* Clear interface mode bits */ - reg &= ~(SYSCON_ETCS_MASK | SYSCON_EPIT); - if (gmac->variant->support_rmii) - reg &= ~SYSCON_RMII_EN; - switch (plat->mac_interface) { case PHY_INTERFACE_MODE_MII: /* default */ @@ -1039,9 +1000,9 @@ static int sun8i_dwmac_set_syscon(struct device *dev, static void sun8i_dwmac_unset_syscon(struct sunxi_priv_data *gmac) { - u32 reg = gmac->variant->default_syscon_value; - - regmap_field_write(gmac->regmap_field, reg); + if (gmac->variant->soc_has_internal_phy) + regmap_field_write(gmac->regmap_field, + (H3_EPHY_SHUTDOWN | H3_EPHY_SELECT)); } static void sun8i_dwmac_exit(struct platform_device *pdev, void *priv) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h index f4694fd576f5..3dec1a264cf6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h @@ -341,6 +341,7 @@ static inline u32 mtl_chanx_base_addr(const struct dwmac4_addrs *addrs, #define MTL_OP_MODE_RFA_SHIFT 8 #define MTL_OP_MODE_EHFC BIT(7) +#define MTL_OP_MODE_DIS_TCP_EF BIT(6) #define MTL_OP_MODE_RTC_MASK GENMASK(1, 0) #define MTL_OP_MODE_RTC_SHIFT 0 diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c index a5fb31eb0192..aac68dc28dc1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c @@ -110,16 +110,20 @@ static int dwmac4_wrback_get_rx_status(struct stmmac_extra_stats *x, message_type = (rdes1 & ERDES4_MSG_TYPE_MASK) >> 8; - if (rdes1 & RDES1_IP_HDR_ERROR) + if (rdes1 & RDES1_IP_HDR_ERROR) { x->ip_hdr_err++; + ret |= csum_none; + } if (rdes1 & RDES1_IP_CSUM_BYPASSED) x->ip_csum_bypassed++; if (rdes1 & RDES1_IPV4_HEADER) x->ipv4_pkt_rcvd++; if (rdes1 & RDES1_IPV6_HEADER) x->ipv6_pkt_rcvd++; - if (rdes1 & RDES1_IP_PAYLOAD_ERROR) + if (rdes1 & RDES1_IP_PAYLOAD_ERROR) { x->ip_payload_err++; + ret |= csum_none; + } if (message_type == RDES_EXT_NO_PTP) x->no_ptp_rx_msg_type_ext++; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c index 0cb84a0041a4..d87a8b595e6a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c @@ -268,6 +268,8 @@ static void dwmac4_dma_rx_chan_op_mode(struct stmmac_priv *priv, mtl_rx_op = readl(ioaddr + MTL_CHAN_RX_OP_MODE(dwmac4_addrs, channel)); + mtl_rx_op |= MTL_OP_MODE_DIS_TCP_EF; + if (mode == SF_DMA_MODE) { pr_debug("GMAC: enable RX store and forward mode\n"); mtl_rx_op |= MTL_OP_MODE_RSF; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c index 4846bf49c576..467f1a05747e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c @@ -251,7 +251,7 @@ void dwmac_dma_flush_tx_fifo(void __iomem *ioaddr) void stmmac_set_mac_addr(void __iomem *ioaddr, const u8 addr[6], unsigned int high, unsigned int low) { - unsigned long data; + u32 data; data = (addr[5] << 8) | addr[4]; /* For MAC Addr registers we have to set the Address Enable (AE) diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.c b/drivers/net/ethernet/stmicro/stmmac/hwif.c index 99635b37044a..3f7c765dcb79 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.c +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.c @@ -100,7 +100,7 @@ int stmmac_reset(struct stmmac_priv *priv, void __iomem *ioaddr) return -EINVAL; if (plat && plat->fix_soc_reset) - return plat->fix_soc_reset(plat, ioaddr); + return plat->fix_soc_reset(priv, ioaddr); return stmmac_do_callback(priv, dma, reset, ioaddr); } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index cda09cf5dcca..78d6b3737a26 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -289,7 +289,6 @@ struct stmmac_priv { u32 msg_enable; int wolopts; int wol_irq; - bool wol_irq_disabled; int clk_csr; struct timer_list eee_ctrl_timer; int lpi_irq; @@ -374,6 +373,18 @@ enum stmmac_state { STMMAC_SERVICE_SCHED, }; +extern const struct dev_pm_ops stmmac_simple_pm_ops; + +static inline bool stmmac_wol_enabled_mac(struct stmmac_priv *priv) +{ + return priv->plat->pmt && device_may_wakeup(priv->device); +} + +static inline bool stmmac_wol_enabled_phy(struct stmmac_priv *priv) +{ + return !priv->plat->pmt && device_may_wakeup(priv->device); +} + int stmmac_mdio_unregister(struct net_device *ndev); int stmmac_mdio_register(struct net_device *ndev); int stmmac_mdio_reset(struct mii_bus *mii); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index 77758a7299b4..39fa1ec92f82 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -803,7 +803,6 @@ static void stmmac_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) static int stmmac_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) { struct stmmac_priv *priv = netdev_priv(dev); - u32 support = WAKE_MAGIC | WAKE_UCAST; if (!device_can_wakeup(priv->device)) return -EOPNOTSUPP; @@ -816,29 +815,7 @@ static int stmmac_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) return ret; } - /* By default almost all GMAC devices support the WoL via - * magic frame but we can disable it if the HW capability - * register shows no support for pmt_magic_frame. */ - if ((priv->hw_cap_support) && (!priv->dma_cap.pmt_magic_frame)) - wol->wolopts &= ~WAKE_MAGIC; - - if (wol->wolopts & ~support) - return -EINVAL; - - if (wol->wolopts) { - pr_info("stmmac: wakeup enable\n"); - device_set_wakeup_enable(priv->device, 1); - /* Avoid unbalanced enable_irq_wake calls */ - if (priv->wol_irq_disabled) - enable_irq_wake(priv->wol_irq); - priv->wol_irq_disabled = false; - } else { - device_set_wakeup_enable(priv->device, 0); - /* Avoid unbalanced disable_irq_wake calls */ - if (!priv->wol_irq_disabled) - disable_irq_wake(priv->wol_irq); - priv->wol_irq_disabled = true; - } + device_set_wakeup_enable(priv->device, !!wol->wolopts); mutex_lock(&priv->lock); priv->wolopts = wol->wolopts; @@ -852,9 +829,6 @@ static int stmmac_ethtool_op_get_eee(struct net_device *dev, { struct stmmac_priv *priv = netdev_priv(dev); - if (!priv->dma_cap.eee) - return -EOPNOTSUPP; - return phylink_ethtool_get_eee(priv->phylink, edata); } @@ -863,9 +837,6 @@ static int stmmac_ethtool_op_set_eee(struct net_device *dev, { struct stmmac_priv *priv = netdev_priv(dev); - if (!priv->dma_cap.eee) - return -EOPNOTSUPP; - return phylink_ethtool_set_eee(priv->phylink, edata); } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 7b16d1207b80..f0abd99fd137 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -29,6 +29,7 @@ #include <linux/dma-mapping.h> #include <linux/slab.h> #include <linux/pm_runtime.h> +#include <linux/pm_wakeirq.h> #include <linux/prefetch.h> #include <linux/pinctrl/consumer.h> #ifdef CONFIG_DEBUG_FS @@ -148,33 +149,34 @@ static void stmmac_exit_fs(struct net_device *dev); int stmmac_bus_clks_config(struct stmmac_priv *priv, bool enabled) { - int ret = 0; + struct plat_stmmacenet_data *plat_dat = priv->plat; + int ret; if (enabled) { - ret = clk_prepare_enable(priv->plat->stmmac_clk); + ret = clk_prepare_enable(plat_dat->stmmac_clk); if (ret) return ret; - ret = clk_prepare_enable(priv->plat->pclk); + ret = clk_prepare_enable(plat_dat->pclk); if (ret) { - clk_disable_unprepare(priv->plat->stmmac_clk); + clk_disable_unprepare(plat_dat->stmmac_clk); return ret; } - if (priv->plat->clks_config) { - ret = priv->plat->clks_config(priv->plat->bsp_priv, enabled); + if (plat_dat->clks_config) { + ret = plat_dat->clks_config(plat_dat->bsp_priv, enabled); if (ret) { - clk_disable_unprepare(priv->plat->stmmac_clk); - clk_disable_unprepare(priv->plat->pclk); + clk_disable_unprepare(plat_dat->stmmac_clk); + clk_disable_unprepare(plat_dat->pclk); return ret; } } } else { - clk_disable_unprepare(priv->plat->stmmac_clk); - clk_disable_unprepare(priv->plat->pclk); - if (priv->plat->clks_config) - priv->plat->clks_config(priv->plat->bsp_priv, enabled); + clk_disable_unprepare(plat_dat->stmmac_clk); + clk_disable_unprepare(plat_dat->pclk); + if (plat_dat->clks_config) + plat_dat->clks_config(plat_dat->bsp_priv, enabled); } - return ret; + return 0; } EXPORT_SYMBOL_GPL(stmmac_bus_clks_config); @@ -3725,7 +3727,6 @@ static int stmmac_request_irq_multi_msi(struct net_device *dev) /* Request the Wake IRQ in case of another line * is used for WoL */ - priv->wol_irq_disabled = true; if (priv->wol_irq > 0 && priv->wol_irq != dev->irq) { int_name = priv->int_name_wol; sprintf(int_name, "%s:%s", dev->name, "wol"); @@ -3886,7 +3887,6 @@ static int stmmac_request_irq_single(struct net_device *dev) /* Request the Wake IRQ in case of another line * is used for WoL */ - priv->wol_irq_disabled = true; if (priv->wol_irq > 0 && priv->wol_irq != dev->irq) { ret = request_irq(priv->wol_irq, stmmac_interrupt, IRQF_SHARED, dev->name, dev); @@ -4140,8 +4140,13 @@ static int stmmac_release(struct net_device *dev) struct stmmac_priv *priv = netdev_priv(dev); u32 chan; + /* If the PHY or MAC has WoL enabled, then the PHY will not be + * suspended when phylink_stop() is called below. Set the PHY + * to its slowest speed to save power. + */ if (device_may_wakeup(priv->device)) phylink_speed_down(priv->phylink, false); + /* Stop and disconnect the PHY */ phylink_stop(priv->phylink); phylink_disconnect_phy(priv->phylink); @@ -5735,7 +5740,8 @@ drain_data: skb->protocol = eth_type_trans(skb, priv->dev); - if (unlikely(!coe) || !stmmac_has_ip_ethertype(skb)) + if (unlikely(!coe) || !stmmac_has_ip_ethertype(skb) || + (status & csum_none)) skb_checksum_none_assert(skb); else skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -7242,7 +7248,6 @@ static int stmmac_hw_init(struct stmmac_priv *priv) priv->plat->enh_desc = priv->dma_cap.enh_desc; priv->plat->pmt = priv->dma_cap.pmt_remote_wake_up && !(priv->plat->flags & STMMAC_FLAG_USE_PHY_WOL); - priv->hw->pmt = priv->plat->pmt; if (priv->dma_cap.hash_tb_sz) { priv->hw->multicast_filter_bins = (BIT(priv->dma_cap.hash_tb_sz) << 5); @@ -7280,6 +7285,7 @@ static int stmmac_hw_init(struct stmmac_priv *priv) if (priv->plat->pmt) { dev_info(priv->device, "Wake-Up On Lan supported\n"); device_set_wakeup_capable(priv->device, 1); + devm_pm_set_wake_irq(priv->device, priv->wol_irq); } if (priv->dma_cap.tsoen) @@ -7860,7 +7866,7 @@ int stmmac_suspend(struct device *dev) priv->plat->serdes_powerdown(ndev, priv->plat->bsp_priv); /* Enable Power down mode by programming the PMT regs */ - if (device_may_wakeup(priv->device) && priv->plat->pmt) { + if (stmmac_wol_enabled_mac(priv)) { stmmac_pmt(priv, priv->hw, priv->wolopts); priv->irq_wake = 1; } else { @@ -7871,16 +7877,18 @@ int stmmac_suspend(struct device *dev) mutex_unlock(&priv->lock); rtnl_lock(); - if (device_may_wakeup(priv->device) && !priv->plat->pmt) + if (stmmac_wol_enabled_phy(priv)) phylink_speed_down(priv->phylink, false); - phylink_suspend(priv->phylink, - device_may_wakeup(priv->device) && priv->plat->pmt); + phylink_suspend(priv->phylink, stmmac_wol_enabled_mac(priv)); rtnl_unlock(); if (stmmac_fpe_supported(priv)) ethtool_mmsv_stop(&priv->fpe_cfg.mmsv); + if (priv->plat->suspend) + return priv->plat->suspend(dev, priv->plat->bsp_priv); + return 0; } EXPORT_SYMBOL_GPL(stmmac_suspend); @@ -7933,6 +7941,12 @@ int stmmac_resume(struct device *dev) struct stmmac_priv *priv = netdev_priv(ndev); int ret; + if (priv->plat->resume) { + ret = priv->plat->resume(dev, priv->plat->bsp_priv); + if (ret) + return ret; + } + if (!netif_running(ndev)) return 0; @@ -7942,7 +7956,7 @@ int stmmac_resume(struct device *dev) * this bit because it can generate problems while resuming * from another devices (e.g. serial console). */ - if (device_may_wakeup(priv->device) && priv->plat->pmt) { + if (stmmac_wol_enabled_mac(priv)) { mutex_lock(&priv->lock); stmmac_pmt(priv, priv->hw, 0); mutex_unlock(&priv->lock); @@ -7977,7 +7991,14 @@ int stmmac_resume(struct device *dev) stmmac_free_tx_skbufs(priv); stmmac_clear_descriptors(priv, &priv->dma_conf); - stmmac_hw_setup(ndev, false); + ret = stmmac_hw_setup(ndev, false); + if (ret < 0) { + netdev_err(priv->dev, "%s: Hw setup failed\n", __func__); + mutex_unlock(&priv->lock); + rtnl_unlock(); + return ret; + } + stmmac_init_coalesce(priv); phylink_rx_clk_stop_block(priv->phylink); stmmac_set_rx_mode(ndev); @@ -7995,7 +8016,7 @@ int stmmac_resume(struct device *dev) * workqueue thread, which will race with initialisation. */ phylink_resume(priv->phylink); - if (device_may_wakeup(priv->device) && !priv->plat->pmt) + if (stmmac_wol_enabled_phy(priv)) phylink_speed_up(priv->phylink); rtnl_unlock(); @@ -8006,6 +8027,10 @@ int stmmac_resume(struct device *dev) } EXPORT_SYMBOL_GPL(stmmac_resume); +/* This is not the same as EXPORT_GPL_SIMPLE_DEV_PM_OPS() when CONFIG_PM=n */ +DEFINE_SIMPLE_DEV_PM_OPS(stmmac_simple_pm_ops, stmmac_suspend, stmmac_resume); +EXPORT_SYMBOL_GPL(stmmac_simple_pm_ops); + #ifndef MODULE static int __init stmmac_cmdline_opt(char *str) { diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index 836f2848dfeb..0a302b711bc2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -131,12 +131,9 @@ err_disable_clks: static int stmmac_xgmac2_mdio_read_c22(struct mii_bus *bus, int phyaddr, int phyreg) { - struct net_device *ndev = bus->priv; - struct stmmac_priv *priv; + struct stmmac_priv *priv = netdev_priv(bus->priv); u32 addr; - priv = netdev_priv(ndev); - /* Until ver 2.20 XGMAC does not support C22 addr >= 4 */ if (priv->synopsys_id < DWXGMAC_CORE_2_20 && phyaddr > MII_XGMAC_MAX_C22ADDR) @@ -150,12 +147,9 @@ static int stmmac_xgmac2_mdio_read_c22(struct mii_bus *bus, int phyaddr, static int stmmac_xgmac2_mdio_read_c45(struct mii_bus *bus, int phyaddr, int devad, int phyreg) { - struct net_device *ndev = bus->priv; - struct stmmac_priv *priv; + struct stmmac_priv *priv = netdev_priv(bus->priv); u32 addr; - priv = netdev_priv(ndev); - stmmac_xgmac2_c45_format(priv, phyaddr, devad, phyreg, &addr); return stmmac_xgmac2_mdio_read(priv, addr, MII_XGMAC_BUSY); @@ -209,12 +203,9 @@ err_disable_clks: static int stmmac_xgmac2_mdio_write_c22(struct mii_bus *bus, int phyaddr, int phyreg, u16 phydata) { - struct net_device *ndev = bus->priv; - struct stmmac_priv *priv; + struct stmmac_priv *priv = netdev_priv(bus->priv); u32 addr; - priv = netdev_priv(ndev); - /* Until ver 2.20 XGMAC does not support C22 addr >= 4 */ if (priv->synopsys_id < DWXGMAC_CORE_2_20 && phyaddr > MII_XGMAC_MAX_C22ADDR) @@ -229,12 +220,9 @@ static int stmmac_xgmac2_mdio_write_c22(struct mii_bus *bus, int phyaddr, static int stmmac_xgmac2_mdio_write_c45(struct mii_bus *bus, int phyaddr, int devad, int phyreg, u16 phydata) { - struct net_device *ndev = bus->priv; - struct stmmac_priv *priv; + struct stmmac_priv *priv = netdev_priv(bus->priv); u32 addr; - priv = netdev_priv(ndev); - stmmac_xgmac2_c45_format(priv, phyaddr, devad, phyreg, &addr); return stmmac_xgmac2_mdio_write(priv, addr, MII_XGMAC_BUSY, @@ -274,8 +262,7 @@ static int stmmac_mdio_read(struct stmmac_priv *priv, int data, u32 value) */ static int stmmac_mdio_read_c22(struct mii_bus *bus, int phyaddr, int phyreg) { - struct net_device *ndev = bus->priv; - struct stmmac_priv *priv = netdev_priv(ndev); + struct stmmac_priv *priv = netdev_priv(bus->priv); u32 value = MII_BUSY; int data = 0; @@ -312,25 +299,20 @@ static int stmmac_mdio_read_c22(struct mii_bus *bus, int phyaddr, int phyreg) static int stmmac_mdio_read_c45(struct mii_bus *bus, int phyaddr, int devad, int phyreg) { - struct net_device *ndev = bus->priv; - struct stmmac_priv *priv = netdev_priv(ndev); + struct stmmac_priv *priv = netdev_priv(bus->priv); u32 value = MII_BUSY; int data = 0; - data = pm_runtime_get_sync(priv->device); - if (data < 0) { - pm_runtime_put_noidle(priv->device); + data = pm_runtime_resume_and_get(priv->device); + if (data < 0) return data; - } value |= (phyaddr << priv->hw->mii.addr_shift) & priv->hw->mii.addr_mask; - value |= (phyreg << priv->hw->mii.reg_shift) & priv->hw->mii.reg_mask; value |= (priv->clk_csr << priv->hw->mii.clk_csr_shift) & priv->hw->mii.clk_csr_mask; value |= MII_GMAC4_READ; value |= MII_GMAC4_C45E; - value &= ~priv->hw->mii.reg_mask; value |= (devad << priv->hw->mii.reg_shift) & priv->hw->mii.reg_mask; data |= phyreg << MII_GMAC4_REG_ADDR_SHIFT; @@ -373,8 +355,7 @@ static int stmmac_mdio_write(struct stmmac_priv *priv, int data, u32 value) static int stmmac_mdio_write_c22(struct mii_bus *bus, int phyaddr, int phyreg, u16 phydata) { - struct net_device *ndev = bus->priv; - struct stmmac_priv *priv = netdev_priv(ndev); + struct stmmac_priv *priv = netdev_priv(bus->priv); int ret, data = phydata; u32 value = MII_BUSY; @@ -412,27 +393,22 @@ static int stmmac_mdio_write_c22(struct mii_bus *bus, int phyaddr, int phyreg, static int stmmac_mdio_write_c45(struct mii_bus *bus, int phyaddr, int devad, int phyreg, u16 phydata) { - struct net_device *ndev = bus->priv; - struct stmmac_priv *priv = netdev_priv(ndev); + struct stmmac_priv *priv = netdev_priv(bus->priv); int ret, data = phydata; u32 value = MII_BUSY; - ret = pm_runtime_get_sync(priv->device); - if (ret < 0) { - pm_runtime_put_noidle(priv->device); + ret = pm_runtime_resume_and_get(priv->device); + if (ret < 0) return ret; - } value |= (phyaddr << priv->hw->mii.addr_shift) & priv->hw->mii.addr_mask; - value |= (phyreg << priv->hw->mii.reg_shift) & priv->hw->mii.reg_mask; value |= (priv->clk_csr << priv->hw->mii.clk_csr_shift) & priv->hw->mii.clk_csr_mask; value |= MII_GMAC4_WRITE; value |= MII_GMAC4_C45E; - value &= ~priv->hw->mii.reg_mask; value |= (devad << priv->hw->mii.reg_shift) & priv->hw->mii.reg_mask; data |= phyreg << MII_GMAC4_REG_ADDR_SHIFT; @@ -452,8 +428,7 @@ static int stmmac_mdio_write_c45(struct mii_bus *bus, int phyaddr, int stmmac_mdio_reset(struct mii_bus *bus) { #if IS_ENABLED(CONFIG_STMMAC_PLATFORM) - struct net_device *ndev = bus->priv; - struct stmmac_priv *priv = netdev_priv(ndev); + struct stmmac_priv *priv = netdev_priv(bus->priv); unsigned int mii_address = priv->hw->mii.addr; #ifdef CONFIG_OF @@ -497,12 +472,11 @@ int stmmac_mdio_reset(struct mii_bus *bus) int stmmac_pcs_setup(struct net_device *ndev) { + struct stmmac_priv *priv = netdev_priv(ndev); struct fwnode_handle *devnode, *pcsnode; struct dw_xpcs *xpcs = NULL; - struct stmmac_priv *priv; int addr, ret; - priv = netdev_priv(ndev); devnode = priv->plat->port_node; if (priv->plat->pcs_init) { diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c index 9c1b54b701f7..e6a7d0ddac2a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c @@ -138,6 +138,37 @@ static const struct stmmac_pci_info snps_gmac5_pci_info = { .setup = snps_gmac5_default_data, }; +static int stmmac_pci_suspend(struct device *dev, void *bsp_priv) +{ + struct pci_dev *pdev = to_pci_dev(dev); + int ret; + + ret = pci_save_state(pdev); + if (ret) + return ret; + + pci_disable_device(pdev); + pci_wake_from_d3(pdev, true); + return 0; +} + +static int stmmac_pci_resume(struct device *dev, void *bsp_priv) +{ + struct pci_dev *pdev = to_pci_dev(dev); + int ret; + + pci_restore_state(pdev); + pci_set_power_state(pdev, PCI_D0); + + ret = pci_enable_device(pdev); + if (ret) + return ret; + + pci_set_master(pdev); + + return 0; +} + /** * stmmac_pci_probe * @@ -217,6 +248,9 @@ static int stmmac_pci_probe(struct pci_dev *pdev, plat->safety_feat_cfg->prtyen = 1; plat->safety_feat_cfg->tmouten = 1; + plat->suspend = stmmac_pci_suspend; + plat->resume = stmmac_pci_resume; + return stmmac_dvr_probe(&pdev->dev, plat, &res); } @@ -231,43 +265,6 @@ static void stmmac_pci_remove(struct pci_dev *pdev) stmmac_dvr_remove(&pdev->dev); } -static int __maybe_unused stmmac_pci_suspend(struct device *dev) -{ - struct pci_dev *pdev = to_pci_dev(dev); - int ret; - - ret = stmmac_suspend(dev); - if (ret) - return ret; - - ret = pci_save_state(pdev); - if (ret) - return ret; - - pci_disable_device(pdev); - pci_wake_from_d3(pdev, true); - return 0; -} - -static int __maybe_unused stmmac_pci_resume(struct device *dev) -{ - struct pci_dev *pdev = to_pci_dev(dev); - int ret; - - pci_restore_state(pdev); - pci_set_power_state(pdev, PCI_D0); - - ret = pci_enable_device(pdev); - if (ret) - return ret; - - pci_set_master(pdev); - - return stmmac_resume(dev); -} - -static SIMPLE_DEV_PM_OPS(stmmac_pm_ops, stmmac_pci_suspend, stmmac_pci_resume); - /* synthetic ID, no official vendor */ #define PCI_VENDOR_ID_STMMAC 0x0700 @@ -289,7 +286,7 @@ static struct pci_driver stmmac_pci_driver = { .probe = stmmac_pci_probe, .remove = stmmac_pci_remove, .driver = { - .pm = &stmmac_pm_ops, + .pm = &stmmac_simple_pm_ops, }, }; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 030fcf1b5993..a3e077f225d1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -811,6 +811,22 @@ static void stmmac_pltfr_exit(struct platform_device *pdev, plat->exit(pdev, plat->bsp_priv); } +static int stmmac_plat_suspend(struct device *dev, void *bsp_priv) +{ + struct stmmac_priv *priv = netdev_priv(dev_get_drvdata(dev)); + + stmmac_pltfr_exit(to_platform_device(dev), priv->plat); + + return 0; +} + +static int stmmac_plat_resume(struct device *dev, void *bsp_priv) +{ + struct stmmac_priv *priv = netdev_priv(dev_get_drvdata(dev)); + + return stmmac_pltfr_init(to_platform_device(dev), priv->plat); +} + /** * stmmac_pltfr_probe * @pdev: platform device pointer @@ -825,6 +841,11 @@ int stmmac_pltfr_probe(struct platform_device *pdev, { int ret; + if (!plat->suspend && plat->exit) + plat->suspend = stmmac_plat_suspend; + if (!plat->resume && plat->init) + plat->resume = stmmac_plat_resume; + ret = stmmac_pltfr_init(pdev, plat); if (ret) return ret; @@ -886,47 +907,6 @@ void stmmac_pltfr_remove(struct platform_device *pdev) } EXPORT_SYMBOL_GPL(stmmac_pltfr_remove); -/** - * stmmac_pltfr_suspend - * @dev: device pointer - * Description: this function is invoked when suspend the driver and it direcly - * call the main suspend function and then, if required, on some platform, it - * can call an exit helper. - */ -static int __maybe_unused stmmac_pltfr_suspend(struct device *dev) -{ - int ret; - struct net_device *ndev = dev_get_drvdata(dev); - struct stmmac_priv *priv = netdev_priv(ndev); - struct platform_device *pdev = to_platform_device(dev); - - ret = stmmac_suspend(dev); - stmmac_pltfr_exit(pdev, priv->plat); - - return ret; -} - -/** - * stmmac_pltfr_resume - * @dev: device pointer - * Description: this function is invoked when resume the driver before calling - * the main resume function, on some platforms, it can call own init helper - * if required. - */ -static int __maybe_unused stmmac_pltfr_resume(struct device *dev) -{ - struct net_device *ndev = dev_get_drvdata(dev); - struct stmmac_priv *priv = netdev_priv(ndev); - struct platform_device *pdev = to_platform_device(dev); - int ret; - - ret = stmmac_pltfr_init(pdev, priv->plat); - if (ret) - return ret; - - return stmmac_resume(dev); -} - static int __maybe_unused stmmac_runtime_suspend(struct device *dev) { struct net_device *ndev = dev_get_drvdata(dev); @@ -954,7 +934,7 @@ static int __maybe_unused stmmac_pltfr_noirq_suspend(struct device *dev) if (!netif_running(ndev)) return 0; - if (!device_may_wakeup(priv->device) || !priv->plat->pmt) { + if (!stmmac_wol_enabled_mac(priv)) { /* Disable clock in case of PWM is off */ clk_disable_unprepare(priv->plat->clk_ptp_ref); @@ -975,7 +955,7 @@ static int __maybe_unused stmmac_pltfr_noirq_resume(struct device *dev) if (!netif_running(ndev)) return 0; - if (!device_may_wakeup(priv->device) || !priv->plat->pmt) { + if (!stmmac_wol_enabled_mac(priv)) { /* enable the clk previously disabled */ ret = pm_runtime_force_resume(dev); if (ret) @@ -994,7 +974,7 @@ static int __maybe_unused stmmac_pltfr_noirq_resume(struct device *dev) } const struct dev_pm_ops stmmac_pltfr_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(stmmac_pltfr_suspend, stmmac_pltfr_resume) + SET_SYSTEM_SLEEP_PM_OPS(stmmac_suspend, stmmac_resume) SET_RUNTIME_PM_OPS(stmmac_runtime_suspend, stmmac_runtime_resume, NULL) SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(stmmac_pltfr_noirq_suspend, stmmac_pltfr_noirq_resume) }; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index 3767ba495e78..ecbff20771f4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -10,6 +10,8 @@ #include "stmmac.h" #include "stmmac_ptp.h" +#define PTP_SAFE_TIME_OFFSET_NS 500000 + /** * stmmac_adjust_freq * @@ -171,7 +173,11 @@ static int stmmac_enable(struct ptp_clock_info *ptp, u32 acr_value; switch (rq->type) { - case PTP_CLK_REQ_PEROUT: + case PTP_CLK_REQ_PEROUT: { + struct timespec64 curr_time; + u64 target_ns = 0; + u64 ns = 0; + /* Reject requests with unsupported flags */ if (rq->perout.flags) return -EOPNOTSUPP; @@ -180,6 +186,31 @@ static int stmmac_enable(struct ptp_clock_info *ptp, cfg->start.tv_sec = rq->perout.start.sec; cfg->start.tv_nsec = rq->perout.start.nsec; + + /* A time set in the past won't trigger the start of the flexible PPS generation for + * the GMAC5. For some reason it does for the GMAC4 but setting a time in the past + * should be addressed anyway. Therefore, any value set it the past is considered as + * an offset compared to the current MAC system time. + * Be aware that an offset too low may not trigger flexible PPS generation + * if time spent in this configuration makes the targeted time already outdated. + * To address this, add a safe time offset. + */ + if (!cfg->start.tv_sec && cfg->start.tv_nsec < PTP_SAFE_TIME_OFFSET_NS) + cfg->start.tv_nsec += PTP_SAFE_TIME_OFFSET_NS; + + target_ns = cfg->start.tv_nsec + ((u64)cfg->start.tv_sec * NSEC_PER_SEC); + + stmmac_get_systime(priv, priv->ptpaddr, &ns); + if (ns > TIME64_MAX - PTP_SAFE_TIME_OFFSET_NS) + return -EINVAL; + + curr_time = ns_to_timespec64(ns); + if (target_ns < ns + PTP_SAFE_TIME_OFFSET_NS) { + cfg->start = timespec64_add_safe(cfg->start, curr_time); + if (cfg->start.tv_sec == TIME64_MAX) + return -EINVAL; + } + cfg->period.tv_sec = rq->perout.period.sec; cfg->period.tv_nsec = rq->perout.period.nsec; @@ -190,6 +221,7 @@ static int stmmac_enable(struct ptp_clock_info *ptp, priv->systime_flags); write_unlock_irqrestore(&priv->ptp_lock, flags); break; + } case PTP_CLK_REQ_EXTTS: { u8 channel; diff --git a/drivers/net/ethernet/wangxun/Kconfig b/drivers/net/ethernet/wangxun/Kconfig index 424ec3212128..d138dea7d208 100644 --- a/drivers/net/ethernet/wangxun/Kconfig +++ b/drivers/net/ethernet/wangxun/Kconfig @@ -20,6 +20,7 @@ config LIBWX tristate depends on PTP_1588_CLOCK_OPTIONAL select PAGE_POOL + select DIMLIB help Common library for Wangxun(R) Ethernet drivers. diff --git a/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c b/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c index c12a4cb951f6..9572b9f28e59 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c @@ -303,6 +303,11 @@ int wx_get_coalesce(struct net_device *netdev, else ec->rx_coalesce_usecs = wx->rx_itr_setting >> 2; + if (wx->adaptive_itr) { + ec->use_adaptive_rx_coalesce = 1; + ec->use_adaptive_tx_coalesce = 1; + } + /* if in mixed tx/rx queues per vector mode, report only rx settings */ if (wx->q_vector[0]->tx.count && wx->q_vector[0]->rx.count) return 0; @@ -334,19 +339,28 @@ int wx_set_coalesce(struct net_device *netdev, return -EOPNOTSUPP; } - if (ec->tx_max_coalesced_frames_irq) - wx->tx_work_limit = ec->tx_max_coalesced_frames_irq; + if (ec->tx_max_coalesced_frames_irq > U16_MAX || + !ec->tx_max_coalesced_frames_irq) + return -EINVAL; + + wx->tx_work_limit = ec->tx_max_coalesced_frames_irq; switch (wx->mac.type) { case wx_mac_sp: max_eitr = WX_SP_MAX_EITR; + rx_itr_param = WX_20K_ITR; + tx_itr_param = WX_12K_ITR; break; case wx_mac_aml: case wx_mac_aml40: max_eitr = WX_AML_MAX_EITR; + rx_itr_param = WX_20K_ITR; + tx_itr_param = WX_12K_ITR; break; default: max_eitr = WX_EM_MAX_EITR; + rx_itr_param = WX_7K_ITR; + tx_itr_param = WX_7K_ITR; break; } @@ -354,36 +368,37 @@ int wx_set_coalesce(struct net_device *netdev, (ec->tx_coalesce_usecs > (max_eitr >> 2))) return -EINVAL; + if (ec->use_adaptive_rx_coalesce) { + wx->adaptive_itr = true; + wx->rx_itr_setting = 1; + wx->tx_itr_setting = 1; + return 0; + } + if (ec->rx_coalesce_usecs > 1) wx->rx_itr_setting = ec->rx_coalesce_usecs << 2; else wx->rx_itr_setting = ec->rx_coalesce_usecs; - if (wx->rx_itr_setting == 1) - rx_itr_param = WX_20K_ITR; - else - rx_itr_param = wx->rx_itr_setting; - if (ec->tx_coalesce_usecs > 1) wx->tx_itr_setting = ec->tx_coalesce_usecs << 2; else wx->tx_itr_setting = ec->tx_coalesce_usecs; - if (wx->tx_itr_setting == 1) { - switch (wx->mac.type) { - case wx_mac_sp: - case wx_mac_aml: - case wx_mac_aml40: - tx_itr_param = WX_12K_ITR; - break; - default: - tx_itr_param = WX_20K_ITR; - break; - } - } else { - tx_itr_param = wx->tx_itr_setting; + if (wx->adaptive_itr) { + wx->adaptive_itr = false; + wx->rx_itr_setting = rx_itr_param; + wx->tx_itr_setting = tx_itr_param; + } else if (wx->rx_itr_setting == 1 || wx->tx_itr_setting == 1) { + wx->adaptive_itr = true; } + if (wx->rx_itr_setting != 1) + rx_itr_param = wx->rx_itr_setting; + + if (wx->tx_itr_setting != 1) + tx_itr_param = wx->tx_itr_setting; + /* mixed Rx/Tx */ if (wx->q_vector[0]->tx.count && wx->q_vector[0]->rx.count) wx->tx_itr_setting = wx->rx_itr_setting; diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c index 723785ef87bb..5086db060c61 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c @@ -16,6 +16,7 @@ #include "wx_lib.h" #include "wx_ptp.h" #include "wx_hw.h" +#include "wx_vf_lib.h" /* Lookup table mapping the HW PTYPE to the bit field for decoding */ static struct wx_dec_ptype wx_ptype_lookup[256] = { @@ -832,6 +833,36 @@ static bool wx_clean_tx_irq(struct wx_q_vector *q_vector, return !!budget; } +static void wx_update_rx_dim_sample(struct wx_q_vector *q_vector) +{ + struct dim_sample sample = {}; + + dim_update_sample(q_vector->total_events, + q_vector->rx.total_packets, + q_vector->rx.total_bytes, + &sample); + + net_dim(&q_vector->rx.dim, &sample); +} + +static void wx_update_tx_dim_sample(struct wx_q_vector *q_vector) +{ + struct dim_sample sample = {}; + + dim_update_sample(q_vector->total_events, + q_vector->tx.total_packets, + q_vector->tx.total_bytes, + &sample); + + net_dim(&q_vector->tx.dim, &sample); +} + +static void wx_update_dim_sample(struct wx_q_vector *q_vector) +{ + wx_update_rx_dim_sample(q_vector); + wx_update_tx_dim_sample(q_vector); +} + /** * wx_poll - NAPI polling RX/TX cleanup routine * @napi: napi struct with our devices info in it @@ -878,6 +909,8 @@ static int wx_poll(struct napi_struct *napi, int budget) /* all work done, exit the polling mode */ if (likely(napi_complete_done(napi, work_done))) { + if (wx->adaptive_itr) + wx_update_dim_sample(q_vector); if (netif_running(wx->netdev)) wx_intr_enable(wx, WX_INTR_Q(q_vector->v_idx)); } @@ -1591,6 +1624,65 @@ netdev_tx_t wx_xmit_frame(struct sk_buff *skb, } EXPORT_SYMBOL(wx_xmit_frame); +static void wx_set_itr(struct wx_q_vector *q_vector) +{ + struct wx *wx = q_vector->wx; + u32 new_itr; + + if (!wx->adaptive_itr) + return; + + /* use the smallest value of new ITR delay calculations */ + new_itr = min(q_vector->rx.itr, q_vector->tx.itr); + new_itr <<= 2; + + if (new_itr != q_vector->itr) { + /* save the algorithm value here */ + q_vector->itr = new_itr; + + if (wx->pdev->is_virtfn) + wx_write_eitr_vf(q_vector); + else + wx_write_eitr(q_vector); + } +} + +static void wx_rx_dim_work(struct work_struct *work) +{ + struct dim *dim = container_of(work, struct dim, work); + struct dim_cq_moder rx_moder; + struct wx_ring_container *rx; + struct wx_q_vector *q_vector; + + rx = container_of(dim, struct wx_ring_container, dim); + + rx_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix); + rx->itr = rx_moder.usec; + + q_vector = container_of(rx, struct wx_q_vector, rx); + wx_set_itr(q_vector); + + dim->state = DIM_START_MEASURE; +} + +static void wx_tx_dim_work(struct work_struct *work) +{ + struct dim *dim = container_of(work, struct dim, work); + struct dim_cq_moder tx_moder; + struct wx_ring_container *tx; + struct wx_q_vector *q_vector; + + tx = container_of(dim, struct wx_ring_container, dim); + + tx_moder = net_dim_get_tx_moderation(dim->mode, dim->profile_ix); + tx->itr = tx_moder.usec; + + q_vector = container_of(tx, struct wx_q_vector, tx); + wx_set_itr(q_vector); + + dim->state = DIM_START_MEASURE; +} + void wx_napi_enable_all(struct wx *wx) { struct wx_q_vector *q_vector; @@ -1598,6 +1690,11 @@ void wx_napi_enable_all(struct wx *wx) for (q_idx = 0; q_idx < wx->num_q_vectors; q_idx++) { q_vector = wx->q_vector[q_idx]; + + INIT_WORK(&q_vector->rx.dim.work, wx_rx_dim_work); + INIT_WORK(&q_vector->tx.dim.work, wx_tx_dim_work); + q_vector->rx.dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_CQE; + q_vector->tx.dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_CQE; napi_enable(&q_vector->napi); } } @@ -1611,6 +1708,8 @@ void wx_napi_disable_all(struct wx *wx) for (q_idx = 0; q_idx < wx->num_q_vectors; q_idx++) { q_vector = wx->q_vector[q_idx]; napi_disable(&q_vector->napi); + disable_work_sync(&q_vector->rx.dim.work); + disable_work_sync(&q_vector->tx.dim.work); } } EXPORT_SYMBOL(wx_napi_disable_all); @@ -2197,8 +2296,10 @@ irqreturn_t wx_msix_clean_rings(int __always_unused irq, void *data) struct wx_q_vector *q_vector = data; /* EIAM disabled interrupts (on this vector) for us */ - if (q_vector->rx.ring || q_vector->tx.ring) + if (q_vector->rx.ring || q_vector->tx.ring) { napi_schedule_irqoff(&q_vector->napi); + q_vector->total_events++; + } return IRQ_HANDLED; } diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index 9d5d10f9e410..ec63e7ec8b24 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -10,6 +10,7 @@ #include <linux/netdevice.h> #include <linux/if_vlan.h> #include <linux/phylink.h> +#include <linux/dim.h> #include <net/ip.h> #define WX_NCSI_SUP 0x8000 @@ -1033,6 +1034,7 @@ struct wx_ring_container { unsigned int total_packets; /* total packets processed this int */ u8 count; /* total number of rings in vector */ u8 itr; /* current ITR setting for ring */ + struct dim dim; /* data for net_dim algorithm */ }; struct wx_ring { struct wx_ring *next; /* pointer to next ring in q_vector */ @@ -1089,6 +1091,8 @@ struct wx_q_vector { struct napi_struct napi; struct rcu_head rcu; /* to avoid race with update stats on free */ + u16 total_events; /* number of interrupts processed */ + char name[IFNAMSIZ + 17]; /* for dynamic allocation of rings associated with this q_vector */ @@ -1268,6 +1272,7 @@ struct wx { int num_rx_queues; u16 rx_itr_setting; u16 rx_work_limit; + bool adaptive_itr; int num_q_vectors; /* current number of q_vectors for device */ int max_q_vectors; /* upper limit of q_vectors for device */ diff --git a/drivers/net/ethernet/wangxun/libwx/wx_vf.h b/drivers/net/ethernet/wangxun/libwx/wx_vf.h index fec1126703e3..3f16de0fa427 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_vf.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_vf.h @@ -4,6 +4,7 @@ #ifndef _WX_VF_H_ #define _WX_VF_H_ +/* Control registers */ #define WX_VF_MAX_RING_NUMS 8 #define WX_VX_PF_BME 0x4B8 #define WX_VF_BME_ENABLE BIT(0) @@ -12,16 +13,32 @@ #define WX_VXCTRL_RST BIT(0) #define WX_VXMRQC 0x78 +#define WX_VXMRQC_PSR_L4HDR BIT(0) +#define WX_VXMRQC_PSR_L3HDR BIT(1) +#define WX_VXMRQC_PSR_L2HDR BIT(2) +#define WX_VXMRQC_PSR_TUNHDR BIT(3) +#define WX_VXMRQC_PSR_TUNMAC BIT(4) +#define WX_VXMRQC_PSR_MASK GENMASK(5, 1) +#define WX_VXMRQC_PSR(f) FIELD_PREP(GENMASK(5, 1), f) +#define WX_VXMRQC_RSS_HASH(f) FIELD_PREP(GENMASK(15, 13), f) +#define WX_VXMRQC_RSS_MASK GENMASK(31, 16) +#define WX_VXMRQC_RSS(f) FIELD_PREP(GENMASK(31, 16), f) +#define WX_VXMRQC_RSS_ALG_IPV4_TCP BIT(0) +#define WX_VXMRQC_RSS_ALG_IPV4 BIT(1) +#define WX_VXMRQC_RSS_ALG_IPV6 BIT(4) +#define WX_VXMRQC_RSS_ALG_IPV6_TCP BIT(5) +#define WX_VXMRQC_RSS_EN BIT(8) + +#define WX_VXRSSRK(i) (0x80 + ((i) * 4)) /* i=[0,9] */ +#define WX_VXRETA(i) (0xC0 + ((i) * 4)) /* i=[0,15] */ + +/* Interrupt registers */ #define WX_VXICR 0x100 #define WX_VXIMS 0x108 #define WX_VXIMC 0x10C #define WX_VF_IRQ_CLEAR_MASK 7 #define WX_VF_MAX_TX_QUEUES 4 #define WX_VF_MAX_RX_QUEUES 4 -#define WX_VXTXDCTL(r) (0x3010 + (0x40 * (r))) -#define WX_VXRXDCTL(r) (0x1010 + (0x40 * (r))) -#define WX_VXRXDCTL_ENABLE BIT(0) -#define WX_VXTXDCTL_FLUSH BIT(26) #define WX_VXITR(i) (0x200 + (4 * (i))) /* i=[0,1] */ #define WX_VXITR_MASK GENMASK(8, 0) @@ -29,16 +46,6 @@ #define WX_VXIVAR_MISC 0x260 #define WX_VXIVAR(i) (0x240 + (4 * (i))) /* i=[0,3] */ -#define WX_VXRXDCTL_RSCMAX(f) FIELD_PREP(GENMASK(24, 23), f) -#define WX_VXRXDCTL_BUFLEN(f) FIELD_PREP(GENMASK(6, 1), f) -#define WX_VXRXDCTL_BUFSZ(f) FIELD_PREP(GENMASK(11, 8), f) -#define WX_VXRXDCTL_HDRSZ(f) FIELD_PREP(GENMASK(15, 12), f) - -#define WX_VXRXDCTL_RSCMAX_MASK GENMASK(24, 23) -#define WX_VXRXDCTL_BUFLEN_MASK GENMASK(6, 1) -#define WX_VXRXDCTL_BUFSZ_MASK GENMASK(11, 8) -#define WX_VXRXDCTL_HDRSZ_MASK GENMASK(15, 12) - #define wx_conf_size(v, mwidth, uwidth) ({ \ typeof(v) _v = (v); \ (_v == 2 << (mwidth) ? 0 : _v >> (uwidth)); \ @@ -59,44 +66,35 @@ #define WX_VXRDBAH(r) (0x1004 + (0x40 * (r))) #define WX_VXRDT(r) (0x1008 + (0x40 * (r))) #define WX_VXRDH(r) (0x100C + (0x40 * (r))) - +#define WX_VXRXDCTL(r) (0x1010 + (0x40 * (r))) +#define WX_VXRXDCTL_ENABLE BIT(0) +#define WX_VXRXDCTL_BUFLEN_MASK GENMASK(6, 1) +#define WX_VXRXDCTL_BUFLEN(f) FIELD_PREP(GENMASK(6, 1), f) +#define WX_VXRXDCTL_BUFSZ_MASK GENMASK(11, 8) +#define WX_VXRXDCTL_BUFSZ(f) FIELD_PREP(GENMASK(11, 8), f) +#define WX_VXRXDCTL_HDRSZ_MASK GENMASK(15, 12) +#define WX_VXRXDCTL_HDRSZ(f) FIELD_PREP(GENMASK(15, 12), f) +#define WX_VXRXDCTL_RSCMAX_MASK GENMASK(24, 23) +#define WX_VXRXDCTL_RSCMAX(f) FIELD_PREP(GENMASK(24, 23), f) #define WX_VXRXDCTL_RSCEN BIT(29) #define WX_VXRXDCTL_DROP BIT(30) #define WX_VXRXDCTL_VLAN BIT(31) +/* Transimit Path */ #define WX_VXTDBAL(r) (0x3000 + (0x40 * (r))) #define WX_VXTDBAH(r) (0x3004 + (0x40 * (r))) #define WX_VXTDT(r) (0x3008 + (0x40 * (r))) #define WX_VXTDH(r) (0x300C + (0x40 * (r))) - +#define WX_VXTXDCTL(r) (0x3010 + (0x40 * (r))) #define WX_VXTXDCTL_ENABLE BIT(0) #define WX_VXTXDCTL_BUFLEN(f) FIELD_PREP(GENMASK(6, 1), f) #define WX_VXTXDCTL_PTHRESH(f) FIELD_PREP(GENMASK(11, 8), f) #define WX_VXTXDCTL_WTHRESH(f) FIELD_PREP(GENMASK(22, 16), f) - -#define WX_VXMRQC_PSR(f) FIELD_PREP(GENMASK(5, 1), f) -#define WX_VXMRQC_PSR_MASK GENMASK(5, 1) -#define WX_VXMRQC_PSR_L4HDR BIT(0) -#define WX_VXMRQC_PSR_L3HDR BIT(1) -#define WX_VXMRQC_PSR_L2HDR BIT(2) -#define WX_VXMRQC_PSR_TUNHDR BIT(3) -#define WX_VXMRQC_PSR_TUNMAC BIT(4) - -#define WX_VXRSSRK(i) (0x80 + ((i) * 4)) /* i=[0,9] */ -#define WX_VXRETA(i) (0xC0 + ((i) * 4)) /* i=[0,15] */ - -#define WX_VXMRQC_RSS(f) FIELD_PREP(GENMASK(31, 16), f) -#define WX_VXMRQC_RSS_MASK GENMASK(31, 16) -#define WX_VXMRQC_RSS_ALG_IPV4_TCP BIT(0) -#define WX_VXMRQC_RSS_ALG_IPV4 BIT(1) -#define WX_VXMRQC_RSS_ALG_IPV6 BIT(4) -#define WX_VXMRQC_RSS_ALG_IPV6_TCP BIT(5) -#define WX_VXMRQC_RSS_EN BIT(8) -#define WX_VXMRQC_RSS_HASH(f) FIELD_PREP(GENMASK(15, 13), f) +#define WX_VXTXDCTL_FLUSH BIT(26) #define WX_PFLINK_STATUS(g) FIELD_GET(BIT(0), g) #define WX_PFLINK_SPEED(g) FIELD_GET(GENMASK(31, 1), g) -#define WX_VXSTATUS_SPEED(g) FIELD_GET(GENMASK(4, 1), g) +#define WX_VXSTATUS_SPEED(g) FIELD_GET(GENMASK(4, 1), g) struct wx_link_reg_fields { u32 mac_type; diff --git a/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.c index 3023ea2732ef..a87887b9f8ee 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.c @@ -10,7 +10,7 @@ #include "wx_vf.h" #include "wx_vf_lib.h" -static void wx_write_eitr_vf(struct wx_q_vector *q_vector) +void wx_write_eitr_vf(struct wx_q_vector *q_vector) { struct wx *wx = q_vector->wx; int v_idx = q_vector->v_idx; diff --git a/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.h b/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.h index 43ea126b79eb..a4bd23c92800 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.h @@ -4,6 +4,7 @@ #ifndef _WX_VF_LIB_H_ #define _WX_VF_LIB_H_ +void wx_write_eitr_vf(struct wx_q_vector *q_vector); void wx_configure_msix_vf(struct wx *wx); int wx_write_uc_addr_list_vf(struct net_device *netdev); void wx_setup_psrtype_vf(struct wx *wx); diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c index 7e2d9ec38a30..4363bab33496 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c @@ -115,7 +115,8 @@ static int ngbe_set_channels(struct net_device *dev, static const struct ethtool_ops ngbe_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | - ETHTOOL_COALESCE_TX_MAX_FRAMES_IRQ, + ETHTOOL_COALESCE_TX_MAX_FRAMES_IRQ | + ETHTOOL_COALESCE_USE_ADAPTIVE, .get_drvinfo = wx_get_drvinfo, .get_link = ethtool_op_get_link, .get_link_ksettings = wx_get_link_ksettings, diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c index e0fc897b0a58..58488e138beb 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c @@ -119,9 +119,9 @@ static int ngbe_sw_init(struct wx *wx) num_online_cpus()); wx->rss_enabled = true; - /* enable itr by default in dynamic mode */ - wx->rx_itr_setting = 1; - wx->tx_itr_setting = 1; + wx->adaptive_itr = false; + wx->rx_itr_setting = WX_7K_ITR; + wx->tx_itr_setting = WX_7K_ITR; /* set default ring sizes */ wx->tx_ring_count = NGBE_DEFAULT_TXD; diff --git a/drivers/net/ethernet/wangxun/ngbevf/ngbevf_main.c b/drivers/net/ethernet/wangxun/ngbevf/ngbevf_main.c index c1246ab5239c..5f9ddb5e5403 100644 --- a/drivers/net/ethernet/wangxun/ngbevf/ngbevf_main.c +++ b/drivers/net/ethernet/wangxun/ngbevf/ngbevf_main.c @@ -100,6 +100,7 @@ static int ngbevf_sw_init(struct wx *wx) wx->mac.max_tx_queues = NGBEVF_MAX_TX_QUEUES; wx->mac.max_rx_queues = NGBEVF_MAX_RX_QUEUES; /* Enable dynamic interrupt throttling rates */ + wx->adaptive_itr = true; wx->rx_itr_setting = 1; wx->tx_itr_setting = 1; /* set default ring sizes */ diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c index a4753402660e..b496ec502fed 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c @@ -538,7 +538,8 @@ static int txgbe_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) static const struct ethtool_ops txgbe_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | - ETHTOOL_COALESCE_TX_MAX_FRAMES_IRQ, + ETHTOOL_COALESCE_TX_MAX_FRAMES_IRQ | + ETHTOOL_COALESCE_USE_ADAPTIVE, .get_drvinfo = wx_get_drvinfo, .nway_reset = wx_nway_reset, .get_link = ethtool_op_get_link, diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c index a5867f3c93fc..c4c4d70d8466 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c @@ -401,6 +401,7 @@ static int txgbe_sw_init(struct wx *wx) set_bit(WX_FLAG_MULTI_64_FUNC, wx->flags); /* enable itr by default in dynamic mode */ + wx->adaptive_itr = true; wx->rx_itr_setting = 1; wx->tx_itr_setting = 1; diff --git a/drivers/net/ethernet/wangxun/txgbevf/txgbevf_main.c b/drivers/net/ethernet/wangxun/txgbevf/txgbevf_main.c index ebfce3cf753e..3755bb399f71 100644 --- a/drivers/net/ethernet/wangxun/txgbevf/txgbevf_main.c +++ b/drivers/net/ethernet/wangxun/txgbevf/txgbevf_main.c @@ -144,6 +144,7 @@ static int txgbevf_sw_init(struct wx *wx) wx->mac.max_tx_queues = TXGBEVF_MAX_TX_QUEUES; wx->mac.max_rx_queues = TXGBEVF_MAX_RX_QUEUES; /* Enable dynamic interrupt throttling rates */ + wx->adaptive_itr = true; wx->rx_itr_setting = 1; wx->tx_itr_setting = 1; /* set default ring sizes */ diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 4b668ebaa0f7..5cb59d72bc82 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -21,9 +21,10 @@ #include <linux/file.h> #include <linux/gtp.h> +#include <net/flow.h> +#include <net/inet_dscp.h> #include <net/net_namespace.h> #include <net/protocol.h> -#include <net/inet_dscp.h> #include <net/inet_sock.h> #include <net/ip.h> #include <net/ipv6.h> @@ -352,7 +353,7 @@ static struct rtable *ip4_route_output_gtp(struct flowi4 *fl4, fl4->flowi4_oif = sk->sk_bound_dev_if; fl4->daddr = daddr; fl4->saddr = saddr; - fl4->flowi4_tos = inet_dscp_to_dsfield(inet_sk_dscp(inet_sk(sk))); + fl4->flowi4_dscp = inet_sk_dscp(inet_sk(sk)); fl4->flowi4_scope = ip_sock_rt_scope(sk); fl4->flowi4_proto = sk->sk_protocol; @@ -2401,7 +2402,7 @@ static int gtp_genl_send_echo_req(struct sk_buff *skb, struct genl_info *info) udp_tunnel_xmit_skb(rt, sk, skb_to_send, fl4.saddr, fl4.daddr, - fl4.flowi4_tos, + inet_dscp_to_dsfield(fl4.flowi4_dscp), ip4_dst_hoplimit(&rt->dst), 0, port, port, diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index e3e65772c599..d7e3ddbcab6f 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -2,7 +2,7 @@ /* Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com> */ -#include <net/inet_dscp.h> +#include <net/flow.h> #include <net/ip.h> #include "ipvlan.h" @@ -433,7 +433,7 @@ static noinline_for_stack int ipvlan_process_v4_outbound(struct sk_buff *skb) ip4h = ip_hdr(skb); fl4.daddr = ip4h->daddr; fl4.saddr = ip4h->saddr; - fl4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(ip4h)); + fl4.flowi4_dscp = ip4h_dscp(ip4h); rt = ip_route_output_flow(net, &fl4, NULL); if (IS_ERR(rt)) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 01329fe7451a..0b21818e4925 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -1583,9 +1583,6 @@ static struct macsec_tx_sa *get_txsa_from_nl(struct net *net, if (IS_ERR(dev)) return ERR_CAST(dev); - if (*assoc_num >= MACSEC_NUM_AN) - return ERR_PTR(-EINVAL); - secy = &macsec_priv(dev)->secy; tx_sc = &secy->tx_sc; @@ -1646,8 +1643,6 @@ static struct macsec_rx_sa *get_rxsa_from_nl(struct net *net, return ERR_PTR(-EINVAL); *assoc_num = nla_get_u8(tb_sa[MACSEC_SA_ATTR_AN]); - if (*assoc_num >= MACSEC_NUM_AN) - return ERR_PTR(-EINVAL); rx_sc = get_rxsc_from_nl(net, attrs, tb_rxsc, devp, secyp); if (IS_ERR(rx_sc)) @@ -1670,24 +1665,21 @@ static const struct nla_policy macsec_genl_policy[NUM_MACSEC_ATTR] = { static const struct nla_policy macsec_genl_rxsc_policy[NUM_MACSEC_RXSC_ATTR] = { [MACSEC_RXSC_ATTR_SCI] = { .type = NLA_U64 }, - [MACSEC_RXSC_ATTR_ACTIVE] = { .type = NLA_U8 }, + [MACSEC_RXSC_ATTR_ACTIVE] = NLA_POLICY_MAX(NLA_U8, 1), }; static const struct nla_policy macsec_genl_sa_policy[NUM_MACSEC_SA_ATTR] = { - [MACSEC_SA_ATTR_AN] = { .type = NLA_U8 }, - [MACSEC_SA_ATTR_ACTIVE] = { .type = NLA_U8 }, - [MACSEC_SA_ATTR_PN] = NLA_POLICY_MIN_LEN(4), - [MACSEC_SA_ATTR_KEYID] = { .type = NLA_BINARY, - .len = MACSEC_KEYID_LEN, }, - [MACSEC_SA_ATTR_KEY] = { .type = NLA_BINARY, - .len = MACSEC_MAX_KEY_LEN, }, + [MACSEC_SA_ATTR_AN] = NLA_POLICY_MAX(NLA_U8, MACSEC_NUM_AN - 1), + [MACSEC_SA_ATTR_ACTIVE] = NLA_POLICY_MAX(NLA_U8, 1), + [MACSEC_SA_ATTR_PN] = NLA_POLICY_MIN(NLA_UINT, 1), + [MACSEC_SA_ATTR_KEYID] = NLA_POLICY_EXACT_LEN(MACSEC_KEYID_LEN), + [MACSEC_SA_ATTR_KEY] = NLA_POLICY_MAX_LEN(MACSEC_MAX_KEY_LEN), [MACSEC_SA_ATTR_SSCI] = { .type = NLA_U32 }, - [MACSEC_SA_ATTR_SALT] = { .type = NLA_BINARY, - .len = MACSEC_SALT_LEN, }, + [MACSEC_SA_ATTR_SALT] = NLA_POLICY_EXACT_LEN(MACSEC_SALT_LEN), }; static const struct nla_policy macsec_genl_offload_policy[NUM_MACSEC_OFFLOAD_ATTR] = { - [MACSEC_OFFLOAD_ATTR_TYPE] = { .type = NLA_U8 }, + [MACSEC_OFFLOAD_ATTR_TYPE] = NLA_POLICY_MAX(NLA_U8, MACSEC_OFFLOAD_MAX), }; /* Offloads an operation to a device driver */ @@ -1739,21 +1731,6 @@ static bool validate_add_rxsa(struct nlattr **attrs) !attrs[MACSEC_SA_ATTR_KEYID]) return false; - if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN) - return false; - - if (attrs[MACSEC_SA_ATTR_PN] && - nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0) - return false; - - if (attrs[MACSEC_SA_ATTR_ACTIVE]) { - if (nla_get_u8(attrs[MACSEC_SA_ATTR_ACTIVE]) > 1) - return false; - } - - if (nla_len(attrs[MACSEC_SA_ATTR_KEYID]) != MACSEC_KEYID_LEN) - return false; - return true; } @@ -1812,14 +1789,6 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info) rtnl_unlock(); return -EINVAL; } - - if (nla_len(tb_sa[MACSEC_SA_ATTR_SALT]) != MACSEC_SALT_LEN) { - pr_notice("macsec: nl: add_rxsa: bad salt length: %d != %d\n", - nla_len(tb_sa[MACSEC_SA_ATTR_SALT]), - MACSEC_SALT_LEN); - rtnl_unlock(); - return -EINVAL; - } } rx_sa = rtnl_dereference(rx_sc->sa[assoc_num]); @@ -1895,19 +1864,6 @@ cleanup: return err; } -static bool validate_add_rxsc(struct nlattr **attrs) -{ - if (!attrs[MACSEC_RXSC_ATTR_SCI]) - return false; - - if (attrs[MACSEC_RXSC_ATTR_ACTIVE]) { - if (nla_get_u8(attrs[MACSEC_RXSC_ATTR_ACTIVE]) > 1) - return false; - } - - return true; -} - static int macsec_add_rxsc(struct sk_buff *skb, struct genl_info *info) { struct net_device *dev; @@ -1925,7 +1881,7 @@ static int macsec_add_rxsc(struct sk_buff *skb, struct genl_info *info) if (parse_rxsc_config(attrs, tb_rxsc)) return -EINVAL; - if (!validate_add_rxsc(tb_rxsc)) + if (!tb_rxsc[MACSEC_RXSC_ATTR_SCI]) return -EINVAL; rtnl_lock(); @@ -1984,20 +1940,6 @@ static bool validate_add_txsa(struct nlattr **attrs) !attrs[MACSEC_SA_ATTR_KEYID]) return false; - if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN) - return false; - - if (nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0) - return false; - - if (attrs[MACSEC_SA_ATTR_ACTIVE]) { - if (nla_get_u8(attrs[MACSEC_SA_ATTR_ACTIVE]) > 1) - return false; - } - - if (nla_len(attrs[MACSEC_SA_ATTR_KEYID]) != MACSEC_KEYID_LEN) - return false; - return true; } @@ -2055,14 +1997,6 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info) rtnl_unlock(); return -EINVAL; } - - if (nla_len(tb_sa[MACSEC_SA_ATTR_SALT]) != MACSEC_SALT_LEN) { - pr_notice("macsec: nl: add_txsa: bad salt length: %d != %d\n", - nla_len(tb_sa[MACSEC_SA_ATTR_SALT]), - MACSEC_SALT_LEN); - rtnl_unlock(); - return -EINVAL; - } } tx_sa = rtnl_dereference(tx_sc->sa[assoc_num]); @@ -2339,17 +2273,6 @@ static bool validate_upd_sa(struct nlattr **attrs) attrs[MACSEC_SA_ATTR_SALT]) return false; - if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN) - return false; - - if (attrs[MACSEC_SA_ATTR_PN] && nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0) - return false; - - if (attrs[MACSEC_SA_ATTR_ACTIVE]) { - if (nla_get_u8(attrs[MACSEC_SA_ATTR_ACTIVE]) > 1) - return false; - } - return true; } @@ -2556,7 +2479,7 @@ static int macsec_upd_rxsc(struct sk_buff *skb, struct genl_info *info) if (parse_rxsc_config(attrs, tb_rxsc)) return -EINVAL; - if (!validate_add_rxsc(tb_rxsc)) + if (!tb_rxsc[MACSEC_RXSC_ATTR_SCI]) return -EINVAL; rtnl_lock(); @@ -3834,21 +3757,23 @@ static const struct device_type macsec_type = { .name = "macsec", }; +static int validate_cipher_suite(const struct nlattr *attr, + struct netlink_ext_ack *extack); static const struct nla_policy macsec_rtnl_policy[IFLA_MACSEC_MAX + 1] = { [IFLA_MACSEC_SCI] = { .type = NLA_U64 }, [IFLA_MACSEC_PORT] = { .type = NLA_U16 }, - [IFLA_MACSEC_ICV_LEN] = { .type = NLA_U8 }, - [IFLA_MACSEC_CIPHER_SUITE] = { .type = NLA_U64 }, + [IFLA_MACSEC_ICV_LEN] = NLA_POLICY_RANGE(NLA_U8, MACSEC_MIN_ICV_LEN, MACSEC_STD_ICV_LEN), + [IFLA_MACSEC_CIPHER_SUITE] = NLA_POLICY_VALIDATE_FN(NLA_U64, validate_cipher_suite), [IFLA_MACSEC_WINDOW] = { .type = NLA_U32 }, - [IFLA_MACSEC_ENCODING_SA] = { .type = NLA_U8 }, - [IFLA_MACSEC_ENCRYPT] = { .type = NLA_U8 }, - [IFLA_MACSEC_PROTECT] = { .type = NLA_U8 }, - [IFLA_MACSEC_INC_SCI] = { .type = NLA_U8 }, - [IFLA_MACSEC_ES] = { .type = NLA_U8 }, - [IFLA_MACSEC_SCB] = { .type = NLA_U8 }, - [IFLA_MACSEC_REPLAY_PROTECT] = { .type = NLA_U8 }, - [IFLA_MACSEC_VALIDATION] = { .type = NLA_U8 }, - [IFLA_MACSEC_OFFLOAD] = { .type = NLA_U8 }, + [IFLA_MACSEC_ENCODING_SA] = NLA_POLICY_MAX(NLA_U8, MACSEC_NUM_AN - 1), + [IFLA_MACSEC_ENCRYPT] = NLA_POLICY_MAX(NLA_U8, 1), + [IFLA_MACSEC_PROTECT] = NLA_POLICY_MAX(NLA_U8, 1), + [IFLA_MACSEC_INC_SCI] = NLA_POLICY_MAX(NLA_U8, 1), + [IFLA_MACSEC_ES] = NLA_POLICY_MAX(NLA_U8, 1), + [IFLA_MACSEC_SCB] = NLA_POLICY_MAX(NLA_U8, 1), + [IFLA_MACSEC_REPLAY_PROTECT] = NLA_POLICY_MAX(NLA_U8, 1), + [IFLA_MACSEC_VALIDATION] = NLA_POLICY_MAX(NLA_U8, MACSEC_VALIDATE_MAX), + [IFLA_MACSEC_OFFLOAD] = NLA_POLICY_MAX(NLA_U8, MACSEC_OFFLOAD_MAX), }; static void macsec_free_netdev(struct net_device *dev) @@ -4302,20 +4227,30 @@ unregister: return err; } +static int validate_cipher_suite(const struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + switch (nla_get_u64(attr)) { + case MACSEC_CIPHER_ID_GCM_AES_128: + case MACSEC_CIPHER_ID_GCM_AES_256: + case MACSEC_CIPHER_ID_GCM_AES_XPN_128: + case MACSEC_CIPHER_ID_GCM_AES_XPN_256: + case MACSEC_DEFAULT_CIPHER_ID: + return 0; + default: + return -EINVAL; + } +} + static int macsec_validate_attr(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { - u64 csid = MACSEC_DEFAULT_CIPHER_ID; u8 icv_len = MACSEC_DEFAULT_ICV_LEN; - int flag; bool es, scb, sci; if (!data) return 0; - if (data[IFLA_MACSEC_CIPHER_SUITE]) - csid = nla_get_u64(data[IFLA_MACSEC_CIPHER_SUITE]); - if (data[IFLA_MACSEC_ICV_LEN]) { icv_len = nla_get_u8(data[IFLA_MACSEC_ICV_LEN]); if (icv_len != MACSEC_DEFAULT_ICV_LEN) { @@ -4331,34 +4266,6 @@ static int macsec_validate_attr(struct nlattr *tb[], struct nlattr *data[], } } - switch (csid) { - case MACSEC_CIPHER_ID_GCM_AES_128: - case MACSEC_CIPHER_ID_GCM_AES_256: - case MACSEC_CIPHER_ID_GCM_AES_XPN_128: - case MACSEC_CIPHER_ID_GCM_AES_XPN_256: - case MACSEC_DEFAULT_CIPHER_ID: - if (icv_len < MACSEC_MIN_ICV_LEN || - icv_len > MACSEC_STD_ICV_LEN) - return -EINVAL; - break; - default: - return -EINVAL; - } - - if (data[IFLA_MACSEC_ENCODING_SA]) { - if (nla_get_u8(data[IFLA_MACSEC_ENCODING_SA]) >= MACSEC_NUM_AN) - return -EINVAL; - } - - for (flag = IFLA_MACSEC_ENCODING_SA + 1; - flag < IFLA_MACSEC_VALIDATION; - flag++) { - if (data[flag]) { - if (nla_get_u8(data[flag]) > 1) - return -EINVAL; - } - } - es = nla_get_u8_default(data[IFLA_MACSEC_ES], false); sci = nla_get_u8_default(data[IFLA_MACSEC_INC_SCI], false); scb = nla_get_u8_default(data[IFLA_MACSEC_SCB], false); @@ -4366,10 +4273,6 @@ static int macsec_validate_attr(struct nlattr *tb[], struct nlattr *data[], if ((sci && (scb || es)) || (scb && es)) return -EINVAL; - if (data[IFLA_MACSEC_VALIDATION] && - nla_get_u8(data[IFLA_MACSEC_VALIDATION]) > MACSEC_VALIDATE_MAX) - return -EINVAL; - if ((data[IFLA_MACSEC_REPLAY_PROTECT] && nla_get_u8(data[IFLA_MACSEC_REPLAY_PROTECT])) && !data[IFLA_MACSEC_WINDOW]) diff --git a/drivers/net/mdio/mdio-bcm-unimac.c b/drivers/net/mdio/mdio-bcm-unimac.c index 7baab230008a..37e35f282d9a 100644 --- a/drivers/net/mdio/mdio-bcm-unimac.c +++ b/drivers/net/mdio/mdio-bcm-unimac.c @@ -215,7 +215,9 @@ static int unimac_mdio_clk_set(struct unimac_mdio_priv *priv) div = (rate / (2 * priv->clk_freq)) - 1; if (div & ~MDIO_CLK_DIV_MASK) { - pr_warn("Incorrect MDIO clock frequency, ignoring\n"); + dev_warn(priv->mii_bus->parent, + "Ignoring MDIO clock frequency request: %d vs. rate: %ld\n", + priv->clk_freq, rate); ret = 0; goto out; } diff --git a/drivers/net/mdio/of_mdio.c b/drivers/net/mdio/of_mdio.c index 98f667b121f7..d8ca63ed8719 100644 --- a/drivers/net/mdio/of_mdio.c +++ b/drivers/net/mdio/of_mdio.c @@ -473,6 +473,5 @@ void of_phy_deregister_fixed_link(struct device_node *np) fixed_phy_unregister(phydev); put_device(&phydev->mdio.dev); /* of_phy_find_device() */ - phy_device_free(phydev); /* fixed_phy_register() */ } EXPORT_SYMBOL(of_phy_deregister_fixed_link); diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index e3722de08ea9..194570443493 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -300,6 +300,33 @@ static void netconsole_print_banner(struct netpoll *np) np_info(np, "remote ethernet address %pM\n", np->remote_mac); } +/* Parse the string and populate the `inet_addr` union. Return 0 if IPv4 is + * populated, 1 if IPv6 is populated, and -1 upon failure. + */ +static int netpoll_parse_ip_addr(const char *str, union inet_addr *addr) +{ + const char *end = NULL; + int len; + + len = strlen(str); + if (!len) + return -1; + + if (str[len - 1] == '\n') + len -= 1; + + if (in4_pton(str, len, (void *)addr, -1, &end) > 0 && + (!end || *end == 0 || *end == '\n')) + return 0; + + if (IS_ENABLED(CONFIG_IPV6) && + in6_pton(str, len, (void *)addr, -1, &end) > 0 && + (!end || *end == 0 || *end == '\n')) + return 1; + + return -1; +} + #ifdef CONFIG_NETCONSOLE_DYNAMIC /* @@ -730,6 +757,7 @@ static ssize_t local_ip_store(struct config_item *item, const char *buf, { struct netconsole_target *nt = to_target(item); ssize_t ret = -EINVAL; + int ipv6; mutex_lock(&dynamic_netconsole_mutex); if (nt->enabled) { @@ -738,23 +766,10 @@ static ssize_t local_ip_store(struct config_item *item, const char *buf, goto out_unlock; } - if (strnchr(buf, count, ':')) { - const char *end; - - if (in6_pton(buf, count, nt->np.local_ip.in6.s6_addr, -1, &end) > 0) { - if (*end && *end != '\n') { - pr_err("invalid IPv6 address at: <%c>\n", *end); - goto out_unlock; - } - nt->np.ipv6 = true; - } else - goto out_unlock; - } else { - if (!nt->np.ipv6) - nt->np.local_ip.ip = in_aton(buf); - else - goto out_unlock; - } + ipv6 = netpoll_parse_ip_addr(buf, &nt->np.local_ip); + if (ipv6 == -1) + goto out_unlock; + nt->np.ipv6 = !!ipv6; ret = strnlen(buf, count); out_unlock: @@ -767,6 +782,7 @@ static ssize_t remote_ip_store(struct config_item *item, const char *buf, { struct netconsole_target *nt = to_target(item); ssize_t ret = -EINVAL; + int ipv6; mutex_lock(&dynamic_netconsole_mutex); if (nt->enabled) { @@ -775,23 +791,10 @@ static ssize_t remote_ip_store(struct config_item *item, const char *buf, goto out_unlock; } - if (strnchr(buf, count, ':')) { - const char *end; - - if (in6_pton(buf, count, nt->np.remote_ip.in6.s6_addr, -1, &end) > 0) { - if (*end && *end != '\n') { - pr_err("invalid IPv6 address at: <%c>\n", *end); - goto out_unlock; - } - nt->np.ipv6 = true; - } else - goto out_unlock; - } else { - if (!nt->np.ipv6) - nt->np.remote_ip.ip = in_aton(buf); - else - goto out_unlock; - } + ipv6 = netpoll_parse_ip_addr(buf, &nt->np.remote_ip); + if (ipv6 == -1) + goto out_unlock; + nt->np.ipv6 = !!ipv6; ret = strnlen(buf, count); out_unlock: @@ -1742,26 +1745,6 @@ static void write_msg(struct console *con, const char *msg, unsigned int len) spin_unlock_irqrestore(&target_list_lock, flags); } -static int netpoll_parse_ip_addr(const char *str, union inet_addr *addr) -{ - const char *end; - - if (!strchr(str, ':') && - in4_pton(str, -1, (void *)addr, -1, &end) > 0) { - if (!*end) - return 0; - } - if (in6_pton(str, -1, addr->in6.s6_addr, -1, &end) > 0) { -#if IS_ENABLED(CONFIG_IPV6) - if (!*end) - return 1; -#else - return -1; -#endif - } - return -1; -} - static int netconsole_parser_cmdline(struct netpoll *np, char *opt) { bool ipversion_set = false; diff --git a/drivers/net/netdevsim/health.c b/drivers/net/netdevsim/health.c index 688f05316b5e..3bd0e7a489c3 100644 --- a/drivers/net/netdevsim/health.c +++ b/drivers/net/netdevsim/health.c @@ -183,14 +183,14 @@ int nsim_dev_health_init(struct nsim_dev *nsim_dev, struct devlink *devlink) health->empty_reporter = devl_health_reporter_create(devlink, &nsim_dev_empty_reporter_ops, - 0, health); + health); if (IS_ERR(health->empty_reporter)) return PTR_ERR(health->empty_reporter); health->dummy_reporter = devl_health_reporter_create(devlink, &nsim_dev_dummy_reporter_ops, - 0, health); + health); if (IS_ERR(health->dummy_reporter)) { err = PTR_ERR(health->dummy_reporter); goto err_empty_reporter_destroy; diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 28acc6392cfc..a7fb1d7cae94 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -465,7 +465,3 @@ config XILINX_GMII2RGMII Ethernet physical media devices and the Gigabit Ethernet controller. endif # PHYLIB - -config MICREL_KS8995MA - tristate "Micrel KS8995MA 5-ports 10/100 managed Ethernet switch" - depends on SPI diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index b4795aaf9c1c..402a33d559de 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -72,7 +72,6 @@ obj-$(CONFIG_MAXLINEAR_GPHY) += mxl-gpy.o obj-$(CONFIG_MAXLINEAR_86110_PHY) += mxl-86110.o obj-y += mediatek/ obj-$(CONFIG_MESON_GXL_PHY) += meson-gxl.o -obj-$(CONFIG_MICREL_KS8995MA) += spi_ks8995.o obj-$(CONFIG_MICREL_PHY) += micrel.o obj-$(CONFIG_MICROCHIP_PHY) += microchip.o obj-$(CONFIG_MICROCHIP_PHY_RDS_PTP) += microchip_rds_ptp.o diff --git a/drivers/net/phy/aquantia/aquantia.h b/drivers/net/phy/aquantia/aquantia.h index 0c78bfabace5..492052cf1e6e 100644 --- a/drivers/net/phy/aquantia/aquantia.h +++ b/drivers/net/phy/aquantia/aquantia.h @@ -174,10 +174,38 @@ static const struct aqr107_hw_stat aqr107_hw_stats[] = { #define AQR107_SGMII_STAT_SZ ARRAY_SIZE(aqr107_hw_stats) +static const struct { + int speed; + u16 reg; +} aqr_global_cfg_regs[] = { + { SPEED_10, VEND1_GLOBAL_CFG_10M, }, + { SPEED_100, VEND1_GLOBAL_CFG_100M, }, + { SPEED_1000, VEND1_GLOBAL_CFG_1G, }, + { SPEED_2500, VEND1_GLOBAL_CFG_2_5G, }, + { SPEED_5000, VEND1_GLOBAL_CFG_5G, }, + { SPEED_10000, VEND1_GLOBAL_CFG_10G, }, +}; + +#define AQR_NUM_GLOBAL_CFG ARRAY_SIZE(aqr_global_cfg_regs) + +enum aqr_rate_adaptation { + AQR_RATE_ADAPT_NONE, + AQR_RATE_ADAPT_USX, + AQR_RATE_ADAPT_PAUSE, +}; + +struct aqr_global_syscfg { + int speed; + phy_interface_t interface; + enum aqr_rate_adaptation rate_adapt; +}; + struct aqr107_priv { u64 sgmii_stats[AQR107_SGMII_STAT_SZ]; unsigned long leds_active_low; unsigned long leds_active_high; + bool wait_on_global_cfg; + struct aqr_global_syscfg global_cfg[AQR_NUM_GLOBAL_CFG]; }; #if IS_REACHABLE(CONFIG_HWMON) diff --git a/drivers/net/phy/aquantia/aquantia_main.c b/drivers/net/phy/aquantia/aquantia_main.c index 77a48635d7bf..8516690e34db 100644 --- a/drivers/net/phy/aquantia/aquantia_main.c +++ b/drivers/net/phy/aquantia/aquantia_main.c @@ -26,10 +26,12 @@ #define PHY_ID_AQR111 0x03a1b610 #define PHY_ID_AQR111B0 0x03a1b612 #define PHY_ID_AQR112 0x03a1b662 -#define PHY_ID_AQR412 0x03a1b712 +#define PHY_ID_AQR412 0x03a1b6f2 +#define PHY_ID_AQR412C 0x03a1b712 #define PHY_ID_AQR113 0x31c31c40 #define PHY_ID_AQR113C 0x31c31c12 #define PHY_ID_AQR114C 0x31c31c22 +#define PHY_ID_AQR115 0x31c31c63 #define PHY_ID_AQR115C 0x31c31c33 #define PHY_ID_AQR813 0x31c31cb2 @@ -465,7 +467,7 @@ static int aqr105_config_aneg(struct phy_device *phydev) return genphy_c45_check_and_restart_aneg(phydev, changed); } -static int aqr105_read_rate(struct phy_device *phydev) +static int aqr_gen1_read_rate(struct phy_device *phydev) { int val; @@ -504,7 +506,7 @@ static int aqr105_read_rate(struct phy_device *phydev) return 0; } -static int aqr105_read_status(struct phy_device *phydev) +static int aqr_gen1_read_status(struct phy_device *phydev) { int ret; int val; @@ -562,124 +564,32 @@ static int aqr105_read_status(struct phy_device *phydev) } /* Read rate from vendor register */ - return aqr105_read_rate(phydev); + return aqr_gen1_read_rate(phydev); } -static int aqr107_read_rate(struct phy_device *phydev) +static int aqr_gen2_read_status(struct phy_device *phydev) { - u32 config_reg; - int val; - - val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_TX_VEND_STATUS1); - if (val < 0) - return val; - - if (val & MDIO_AN_TX_VEND_STATUS1_FULL_DUPLEX) - phydev->duplex = DUPLEX_FULL; - else - phydev->duplex = DUPLEX_HALF; - - switch (FIELD_GET(MDIO_AN_TX_VEND_STATUS1_RATE_MASK, val)) { - case MDIO_AN_TX_VEND_STATUS1_10BASET: - phydev->speed = SPEED_10; - config_reg = VEND1_GLOBAL_CFG_10M; - break; - case MDIO_AN_TX_VEND_STATUS1_100BASETX: - phydev->speed = SPEED_100; - config_reg = VEND1_GLOBAL_CFG_100M; - break; - case MDIO_AN_TX_VEND_STATUS1_1000BASET: - phydev->speed = SPEED_1000; - config_reg = VEND1_GLOBAL_CFG_1G; - break; - case MDIO_AN_TX_VEND_STATUS1_2500BASET: - phydev->speed = SPEED_2500; - config_reg = VEND1_GLOBAL_CFG_2_5G; - break; - case MDIO_AN_TX_VEND_STATUS1_5000BASET: - phydev->speed = SPEED_5000; - config_reg = VEND1_GLOBAL_CFG_5G; - break; - case MDIO_AN_TX_VEND_STATUS1_10GBASET: - phydev->speed = SPEED_10000; - config_reg = VEND1_GLOBAL_CFG_10G; - break; - default: - phydev->speed = SPEED_UNKNOWN; - return 0; - } - - val = phy_read_mmd(phydev, MDIO_MMD_VEND1, config_reg); - if (val < 0) - return val; - - if (FIELD_GET(VEND1_GLOBAL_CFG_RATE_ADAPT, val) == - VEND1_GLOBAL_CFG_RATE_ADAPT_PAUSE) - phydev->rate_matching = RATE_MATCH_PAUSE; - else - phydev->rate_matching = RATE_MATCH_NONE; - - return 0; -} - -static int aqr107_read_status(struct phy_device *phydev) -{ - int val, ret; + struct aqr107_priv *priv = phydev->priv; + int i, ret; - ret = aqr_read_status(phydev); + ret = aqr_gen1_read_status(phydev); if (ret) return ret; - if (!phydev->link || phydev->autoneg == AUTONEG_DISABLE) - return 0; + for (i = 0; i < AQR_NUM_GLOBAL_CFG; i++) { + struct aqr_global_syscfg *syscfg = &priv->global_cfg[i]; - /* The status register is not immediately correct on line side link up. - * Poll periodically until it reflects the correct ON state. - * Only return fail for read error, timeout defaults to OFF state. - */ - ret = phy_read_mmd_poll_timeout(phydev, MDIO_MMD_PHYXS, - MDIO_PHYXS_VEND_IF_STATUS, val, - (FIELD_GET(MDIO_PHYXS_VEND_IF_STATUS_TYPE_MASK, val) != - MDIO_PHYXS_VEND_IF_STATUS_TYPE_OFF), - AQR107_OP_IN_PROG_SLEEP, - AQR107_OP_IN_PROG_TIMEOUT, false); - if (ret && ret != -ETIMEDOUT) - return ret; + if (syscfg->speed != phydev->speed) + continue; - switch (FIELD_GET(MDIO_PHYXS_VEND_IF_STATUS_TYPE_MASK, val)) { - case MDIO_PHYXS_VEND_IF_STATUS_TYPE_KR: - phydev->interface = PHY_INTERFACE_MODE_10GKR; - break; - case MDIO_PHYXS_VEND_IF_STATUS_TYPE_KX: - phydev->interface = PHY_INTERFACE_MODE_1000BASEKX; - break; - case MDIO_PHYXS_VEND_IF_STATUS_TYPE_XFI: - phydev->interface = PHY_INTERFACE_MODE_10GBASER; - break; - case MDIO_PHYXS_VEND_IF_STATUS_TYPE_USXGMII: - phydev->interface = PHY_INTERFACE_MODE_USXGMII; - break; - case MDIO_PHYXS_VEND_IF_STATUS_TYPE_XAUI: - phydev->interface = PHY_INTERFACE_MODE_XAUI; - break; - case MDIO_PHYXS_VEND_IF_STATUS_TYPE_SGMII: - phydev->interface = PHY_INTERFACE_MODE_SGMII; - break; - case MDIO_PHYXS_VEND_IF_STATUS_TYPE_RXAUI: - phydev->interface = PHY_INTERFACE_MODE_RXAUI; - break; - case MDIO_PHYXS_VEND_IF_STATUS_TYPE_OCSGMII: - phydev->interface = PHY_INTERFACE_MODE_2500BASEX; - break; - case MDIO_PHYXS_VEND_IF_STATUS_TYPE_OFF: - default: - phydev->link = false; - phydev->interface = PHY_INTERFACE_MODE_NA; + if (syscfg->rate_adapt == AQR_RATE_ADAPT_PAUSE) + phydev->rate_matching = RATE_MATCH_PAUSE; + else + phydev->rate_matching = RATE_MATCH_NONE; break; } - /* Read possibly downshifted rate from vendor register */ - return aqr107_read_rate(phydev); + return 0; } static int aqr107_get_downshift(struct phy_device *phydev, u8 *data) @@ -810,7 +720,7 @@ static int aqr107_config_mdi(struct phy_device *phydev) mdi_conf | PMAPMD_RSVD_VEND_PROV_MDI_FORCE); } -static int aqr107_config_init(struct phy_device *phydev) +static int aqr_gen1_config_init(struct phy_device *phydev) { struct aqr107_priv *priv = phydev->priv; u32 led_idx; @@ -859,20 +769,137 @@ static int aqr107_config_init(struct phy_device *phydev) return 0; } -static int aqcs109_config_init(struct phy_device *phydev) +/* Walk the media-speed configuration registers to determine which + * host-side serdes modes may be used by the PHY depending on the + * negotiated media speed. + */ +static int aqr_gen2_read_global_syscfg(struct phy_device *phydev) +{ + struct aqr107_priv *priv = phydev->priv; + unsigned int serdes_mode, rate_adapt; + phy_interface_t interface; + int i, val; + + for (i = 0; i < AQR_NUM_GLOBAL_CFG; i++) { + struct aqr_global_syscfg *syscfg = &priv->global_cfg[i]; + + syscfg->speed = aqr_global_cfg_regs[i].speed; + + val = phy_read_mmd(phydev, MDIO_MMD_VEND1, + aqr_global_cfg_regs[i].reg); + if (val < 0) + return val; + + serdes_mode = FIELD_GET(VEND1_GLOBAL_CFG_SERDES_MODE, val); + rate_adapt = FIELD_GET(VEND1_GLOBAL_CFG_RATE_ADAPT, val); + + switch (serdes_mode) { + case VEND1_GLOBAL_CFG_SERDES_MODE_XFI: + if (rate_adapt == VEND1_GLOBAL_CFG_RATE_ADAPT_USX) + interface = PHY_INTERFACE_MODE_USXGMII; + else + interface = PHY_INTERFACE_MODE_10GBASER; + break; + + case VEND1_GLOBAL_CFG_SERDES_MODE_XFI5G: + interface = PHY_INTERFACE_MODE_5GBASER; + break; + + case VEND1_GLOBAL_CFG_SERDES_MODE_OCSGMII: + interface = PHY_INTERFACE_MODE_2500BASEX; + break; + + case VEND1_GLOBAL_CFG_SERDES_MODE_SGMII: + interface = PHY_INTERFACE_MODE_SGMII; + break; + + default: + phydev_warn(phydev, "unrecognised serdes mode %u\n", + serdes_mode); + interface = PHY_INTERFACE_MODE_NA; + break; + } + + syscfg->interface = interface; + + switch (rate_adapt) { + case VEND1_GLOBAL_CFG_RATE_ADAPT_NONE: + syscfg->rate_adapt = AQR_RATE_ADAPT_NONE; + break; + case VEND1_GLOBAL_CFG_RATE_ADAPT_USX: + syscfg->rate_adapt = AQR_RATE_ADAPT_USX; + break; + case VEND1_GLOBAL_CFG_RATE_ADAPT_PAUSE: + syscfg->rate_adapt = AQR_RATE_ADAPT_PAUSE; + break; + default: + phydev_warn(phydev, "unrecognized rate adapt mode %u\n", + rate_adapt); + break; + } + } + + return 0; +} + +static int aqr_gen2_fill_interface_modes(struct phy_device *phydev) +{ + unsigned long *possible = phydev->possible_interfaces; + struct aqr107_priv *priv = phydev->priv; + phy_interface_t interface; + int i, val, ret; + + /* It's been observed on some models that - when coming out of suspend + * - the FW signals that the PHY is ready but the GLOBAL_CFG registers + * continue on returning zeroes for some time. Let's poll the 100M + * register until it returns a real value as both 113c and 115c support + * this mode. + */ + if (priv->wait_on_global_cfg) { + ret = phy_read_mmd_poll_timeout(phydev, MDIO_MMD_VEND1, + VEND1_GLOBAL_CFG_100M, val, + val != 0, 1000, 100000, false); + if (ret) + return ret; + } + + ret = aqr_gen2_read_global_syscfg(phydev); + if (ret) + return ret; + + for (i = 0; i < AQR_NUM_GLOBAL_CFG; i++) { + interface = priv->global_cfg[i].interface; + if (interface != PHY_INTERFACE_MODE_NA) + __set_bit(interface, possible); + } + + return 0; +} + +static int aqr_gen2_config_init(struct phy_device *phydev) { int ret; + ret = aqr_gen1_config_init(phydev); + if (ret) + return ret; + + return aqr_gen2_fill_interface_modes(phydev); +} + +static int aqr_gen3_config_init(struct phy_device *phydev) +{ + return aqr_gen2_config_init(phydev); +} + +static int aqcs109_config_init(struct phy_device *phydev) +{ /* Check that the PHY interface type is compatible */ if (phydev->interface != PHY_INTERFACE_MODE_SGMII && phydev->interface != PHY_INTERFACE_MODE_2500BASEX) return -ENODEV; - ret = aqr_wait_reset_complete(phydev); - if (!ret) - aqr107_chip_info(phydev); - - return aqr107_set_downshift(phydev, MDIO_AN_VEND_PROV_DOWNSHIFT_DFLT); + return aqr_gen2_config_init(phydev); } static void aqr107_link_change_notify(struct phy_device *phydev) @@ -920,7 +947,7 @@ static void aqr107_link_change_notify(struct phy_device *phydev) phydev_info(phydev, "Aquantia 1000Base-T2 mode active\n"); } -static int aqr107_wait_processor_intensive_op(struct phy_device *phydev) +static int aqr_gen1_wait_processor_intensive_op(struct phy_device *phydev) { int val, err; @@ -944,17 +971,16 @@ static int aqr107_wait_processor_intensive_op(struct phy_device *phydev) return 0; } -static int aqr107_get_rate_matching(struct phy_device *phydev, - phy_interface_t iface) +static int aqr_gen2_get_rate_matching(struct phy_device *phydev, + phy_interface_t iface) { if (iface == PHY_INTERFACE_MODE_10GBASER || - iface == PHY_INTERFACE_MODE_2500BASEX || - iface == PHY_INTERFACE_MODE_NA) + iface == PHY_INTERFACE_MODE_2500BASEX) return RATE_MATCH_PAUSE; return RATE_MATCH_NONE; } -static int aqr107_suspend(struct phy_device *phydev) +static int aqr_gen1_suspend(struct phy_device *phydev) { int err; @@ -963,10 +989,10 @@ static int aqr107_suspend(struct phy_device *phydev) if (err) return err; - return aqr107_wait_processor_intensive_op(phydev); + return aqr_gen1_wait_processor_intensive_op(phydev); } -static int aqr107_resume(struct phy_device *phydev) +static int aqr_gen1_resume(struct phy_device *phydev) { int err; @@ -975,89 +1001,7 @@ static int aqr107_resume(struct phy_device *phydev) if (err) return err; - return aqr107_wait_processor_intensive_op(phydev); -} - -static const u16 aqr_global_cfg_regs[] = { - VEND1_GLOBAL_CFG_10M, - VEND1_GLOBAL_CFG_100M, - VEND1_GLOBAL_CFG_1G, - VEND1_GLOBAL_CFG_2_5G, - VEND1_GLOBAL_CFG_5G, - VEND1_GLOBAL_CFG_10G -}; - -static int aqr107_fill_interface_modes(struct phy_device *phydev) -{ - unsigned long *possible = phydev->possible_interfaces; - unsigned int serdes_mode, rate_adapt; - phy_interface_t interface; - int i, val; - - /* Walk the media-speed configuration registers to determine which - * host-side serdes modes may be used by the PHY depending on the - * negotiated media speed. - */ - for (i = 0; i < ARRAY_SIZE(aqr_global_cfg_regs); i++) { - val = phy_read_mmd(phydev, MDIO_MMD_VEND1, - aqr_global_cfg_regs[i]); - if (val < 0) - return val; - - serdes_mode = FIELD_GET(VEND1_GLOBAL_CFG_SERDES_MODE, val); - rate_adapt = FIELD_GET(VEND1_GLOBAL_CFG_RATE_ADAPT, val); - - switch (serdes_mode) { - case VEND1_GLOBAL_CFG_SERDES_MODE_XFI: - if (rate_adapt == VEND1_GLOBAL_CFG_RATE_ADAPT_USX) - interface = PHY_INTERFACE_MODE_USXGMII; - else - interface = PHY_INTERFACE_MODE_10GBASER; - break; - - case VEND1_GLOBAL_CFG_SERDES_MODE_XFI5G: - interface = PHY_INTERFACE_MODE_5GBASER; - break; - - case VEND1_GLOBAL_CFG_SERDES_MODE_OCSGMII: - interface = PHY_INTERFACE_MODE_2500BASEX; - break; - - case VEND1_GLOBAL_CFG_SERDES_MODE_SGMII: - interface = PHY_INTERFACE_MODE_SGMII; - break; - - default: - phydev_warn(phydev, "unrecognised serdes mode %u\n", - serdes_mode); - interface = PHY_INTERFACE_MODE_NA; - break; - } - - if (interface != PHY_INTERFACE_MODE_NA) - __set_bit(interface, possible); - } - - return 0; -} - -static int aqr113c_fill_interface_modes(struct phy_device *phydev) -{ - int val, ret; - - /* It's been observed on some models that - when coming out of suspend - * - the FW signals that the PHY is ready but the GLOBAL_CFG registers - * continue on returning zeroes for some time. Let's poll the 100M - * register until it returns a real value as both 113c and 115c support - * this mode. - */ - ret = phy_read_mmd_poll_timeout(phydev, MDIO_MMD_VEND1, - VEND1_GLOBAL_CFG_100M, val, val != 0, - 1000, 100000, false); - if (ret) - return ret; - - return aqr107_fill_interface_modes(phydev); + return aqr_gen1_wait_processor_intensive_op(phydev); } static int aqr115c_get_features(struct phy_device *phydev) @@ -1085,11 +1029,14 @@ static int aqr111_get_features(struct phy_device *phydev) return 0; } -static int aqr113c_config_init(struct phy_device *phydev) +static int aqr_gen4_config_init(struct phy_device *phydev) { + struct aqr107_priv *priv = phydev->priv; int ret; - ret = aqr107_config_init(phydev); + priv->wait_on_global_cfg = true; + + ret = aqr_gen3_config_init(phydev); if (ret < 0) return ret; @@ -1098,11 +1045,7 @@ static int aqr113c_config_init(struct phy_device *phydev) if (ret) return ret; - ret = aqr107_wait_processor_intensive_op(phydev); - if (ret) - return ret; - - return aqr113c_fill_interface_modes(phydev); + return aqr_gen1_wait_processor_intensive_op(phydev); } static int aqr107_probe(struct phy_device *phydev) @@ -1144,13 +1087,13 @@ static struct phy_driver aqr_driver[] = { .name = "Aquantia AQR105", .get_features = aqr105_get_features, .probe = aqr107_probe, - .config_init = aqr107_config_init, + .config_init = aqr_gen1_config_init, .config_aneg = aqr105_config_aneg, .config_intr = aqr_config_intr, .handle_interrupt = aqr_handle_interrupt, - .read_status = aqr105_read_status, - .suspend = aqr107_suspend, - .resume = aqr107_resume, + .read_status = aqr_gen1_read_status, + .suspend = aqr_gen1_suspend, + .resume = aqr_gen1_resume, }, { PHY_ID_MATCH_MODEL(PHY_ID_AQR106), @@ -1164,16 +1107,16 @@ static struct phy_driver aqr_driver[] = { PHY_ID_MATCH_MODEL(PHY_ID_AQR107), .name = "Aquantia AQR107", .probe = aqr107_probe, - .get_rate_matching = aqr107_get_rate_matching, - .config_init = aqr107_config_init, + .get_rate_matching = aqr_gen2_get_rate_matching, + .config_init = aqr_gen2_config_init, .config_aneg = aqr_config_aneg, .config_intr = aqr_config_intr, .handle_interrupt = aqr_handle_interrupt, - .read_status = aqr107_read_status, + .read_status = aqr_gen2_read_status, .get_tunable = aqr107_get_tunable, .set_tunable = aqr107_set_tunable, - .suspend = aqr107_suspend, - .resume = aqr107_resume, + .suspend = aqr_gen1_suspend, + .resume = aqr_gen1_resume, .get_sset_count = aqr107_get_sset_count, .get_strings = aqr107_get_strings, .get_stats = aqr107_get_stats, @@ -1188,16 +1131,16 @@ static struct phy_driver aqr_driver[] = { PHY_ID_MATCH_MODEL(PHY_ID_AQCS109), .name = "Aquantia AQCS109", .probe = aqr107_probe, - .get_rate_matching = aqr107_get_rate_matching, + .get_rate_matching = aqr_gen2_get_rate_matching, .config_init = aqcs109_config_init, .config_aneg = aqr_config_aneg, .config_intr = aqr_config_intr, .handle_interrupt = aqr_handle_interrupt, - .read_status = aqr107_read_status, + .read_status = aqr_gen2_read_status, .get_tunable = aqr107_get_tunable, .set_tunable = aqr107_set_tunable, - .suspend = aqr107_suspend, - .resume = aqr107_resume, + .suspend = aqr_gen1_suspend, + .resume = aqr_gen1_resume, .get_sset_count = aqr107_get_sset_count, .get_strings = aqr107_get_strings, .get_stats = aqr107_get_stats, @@ -1213,16 +1156,16 @@ static struct phy_driver aqr_driver[] = { PHY_ID_MATCH_MODEL(PHY_ID_AQR111), .name = "Aquantia AQR111", .probe = aqr107_probe, - .get_rate_matching = aqr107_get_rate_matching, - .config_init = aqr107_config_init, + .get_rate_matching = aqr_gen2_get_rate_matching, + .config_init = aqr_gen3_config_init, .config_aneg = aqr_config_aneg, .config_intr = aqr_config_intr, .handle_interrupt = aqr_handle_interrupt, - .read_status = aqr107_read_status, + .read_status = aqr_gen2_read_status, .get_tunable = aqr107_get_tunable, .set_tunable = aqr107_set_tunable, - .suspend = aqr107_suspend, - .resume = aqr107_resume, + .suspend = aqr_gen1_suspend, + .resume = aqr_gen1_resume, .get_sset_count = aqr107_get_sset_count, .get_strings = aqr107_get_strings, .get_stats = aqr107_get_stats, @@ -1238,16 +1181,16 @@ static struct phy_driver aqr_driver[] = { PHY_ID_MATCH_MODEL(PHY_ID_AQR111B0), .name = "Aquantia AQR111B0", .probe = aqr107_probe, - .get_rate_matching = aqr107_get_rate_matching, - .config_init = aqr107_config_init, + .get_rate_matching = aqr_gen2_get_rate_matching, + .config_init = aqr_gen3_config_init, .config_aneg = aqr_config_aneg, .config_intr = aqr_config_intr, .handle_interrupt = aqr_handle_interrupt, - .read_status = aqr107_read_status, + .read_status = aqr_gen2_read_status, .get_tunable = aqr107_get_tunable, .set_tunable = aqr107_set_tunable, - .suspend = aqr107_suspend, - .resume = aqr107_resume, + .suspend = aqr_gen1_suspend, + .resume = aqr_gen1_resume, .get_sset_count = aqr107_get_sset_count, .get_strings = aqr107_get_strings, .get_stats = aqr107_get_stats, @@ -1271,15 +1214,16 @@ static struct phy_driver aqr_driver[] = { PHY_ID_MATCH_MODEL(PHY_ID_AQR112), .name = "Aquantia AQR112", .probe = aqr107_probe, + .config_init = aqr_gen3_config_init, .config_aneg = aqr_config_aneg, .config_intr = aqr_config_intr, .handle_interrupt = aqr_handle_interrupt, .get_tunable = aqr107_get_tunable, .set_tunable = aqr107_set_tunable, - .suspend = aqr107_suspend, - .resume = aqr107_resume, - .read_status = aqr107_read_status, - .get_rate_matching = aqr107_get_rate_matching, + .suspend = aqr_gen1_suspend, + .resume = aqr_gen1_resume, + .read_status = aqr_gen2_read_status, + .get_rate_matching = aqr_gen2_get_rate_matching, .get_sset_count = aqr107_get_sset_count, .get_strings = aqr107_get_strings, .get_stats = aqr107_get_stats, @@ -1294,15 +1238,35 @@ static struct phy_driver aqr_driver[] = { PHY_ID_MATCH_MODEL(PHY_ID_AQR412), .name = "Aquantia AQR412", .probe = aqr107_probe, + .config_init = aqr_gen3_config_init, .config_aneg = aqr_config_aneg, .config_intr = aqr_config_intr, .handle_interrupt = aqr_handle_interrupt, .get_tunable = aqr107_get_tunable, .set_tunable = aqr107_set_tunable, - .suspend = aqr107_suspend, - .resume = aqr107_resume, - .read_status = aqr107_read_status, - .get_rate_matching = aqr107_get_rate_matching, + .suspend = aqr_gen1_suspend, + .resume = aqr_gen1_resume, + .read_status = aqr_gen2_read_status, + .get_rate_matching = aqr_gen2_get_rate_matching, + .get_sset_count = aqr107_get_sset_count, + .get_strings = aqr107_get_strings, + .get_stats = aqr107_get_stats, + .link_change_notify = aqr107_link_change_notify, +}, +{ + PHY_ID_MATCH_MODEL(PHY_ID_AQR412C), + .name = "Aquantia AQR412C", + .probe = aqr107_probe, + .config_init = aqr_gen3_config_init, + .config_aneg = aqr_config_aneg, + .config_intr = aqr_config_intr, + .handle_interrupt = aqr_handle_interrupt, + .get_tunable = aqr107_get_tunable, + .set_tunable = aqr107_set_tunable, + .suspend = aqr_gen1_suspend, + .resume = aqr_gen1_resume, + .read_status = aqr_gen2_read_status, + .get_rate_matching = aqr_gen2_get_rate_matching, .get_sset_count = aqr107_get_sset_count, .get_strings = aqr107_get_strings, .get_stats = aqr107_get_stats, @@ -1312,16 +1276,16 @@ static struct phy_driver aqr_driver[] = { PHY_ID_MATCH_MODEL(PHY_ID_AQR113), .name = "Aquantia AQR113", .probe = aqr107_probe, - .get_rate_matching = aqr107_get_rate_matching, - .config_init = aqr113c_config_init, + .get_rate_matching = aqr_gen2_get_rate_matching, + .config_init = aqr_gen4_config_init, .config_aneg = aqr_config_aneg, .config_intr = aqr_config_intr, .handle_interrupt = aqr_handle_interrupt, - .read_status = aqr107_read_status, + .read_status = aqr_gen2_read_status, .get_tunable = aqr107_get_tunable, .set_tunable = aqr107_set_tunable, - .suspend = aqr107_suspend, - .resume = aqr107_resume, + .suspend = aqr_gen1_suspend, + .resume = aqr_gen1_resume, .get_sset_count = aqr107_get_sset_count, .get_strings = aqr107_get_strings, .get_stats = aqr107_get_stats, @@ -1336,16 +1300,16 @@ static struct phy_driver aqr_driver[] = { PHY_ID_MATCH_MODEL(PHY_ID_AQR113C), .name = "Aquantia AQR113C", .probe = aqr107_probe, - .get_rate_matching = aqr107_get_rate_matching, - .config_init = aqr113c_config_init, + .get_rate_matching = aqr_gen2_get_rate_matching, + .config_init = aqr_gen4_config_init, .config_aneg = aqr_config_aneg, .config_intr = aqr_config_intr, .handle_interrupt = aqr_handle_interrupt, - .read_status = aqr107_read_status, + .read_status = aqr_gen2_read_status, .get_tunable = aqr107_get_tunable, .set_tunable = aqr107_set_tunable, - .suspend = aqr107_suspend, - .resume = aqr107_resume, + .suspend = aqr_gen1_suspend, + .resume = aqr_gen1_resume, .get_sset_count = aqr107_get_sset_count, .get_strings = aqr107_get_strings, .get_stats = aqr107_get_stats, @@ -1360,16 +1324,16 @@ static struct phy_driver aqr_driver[] = { PHY_ID_MATCH_MODEL(PHY_ID_AQR114C), .name = "Aquantia AQR114C", .probe = aqr107_probe, - .get_rate_matching = aqr107_get_rate_matching, - .config_init = aqr107_config_init, + .get_rate_matching = aqr_gen2_get_rate_matching, + .config_init = aqr_gen4_config_init, .config_aneg = aqr_config_aneg, .config_intr = aqr_config_intr, .handle_interrupt = aqr_handle_interrupt, - .read_status = aqr107_read_status, + .read_status = aqr_gen2_read_status, .get_tunable = aqr107_get_tunable, .set_tunable = aqr107_set_tunable, - .suspend = aqr107_suspend, - .resume = aqr107_resume, + .suspend = aqr_gen1_suspend, + .resume = aqr_gen1_resume, .get_sset_count = aqr107_get_sset_count, .get_strings = aqr107_get_strings, .get_stats = aqr107_get_stats, @@ -1382,19 +1346,44 @@ static struct phy_driver aqr_driver[] = { .led_polarity_set = aqr_phy_led_polarity_set, }, { + PHY_ID_MATCH_MODEL(PHY_ID_AQR115), + .name = "Aquantia AQR115", + .probe = aqr107_probe, + .get_rate_matching = aqr_gen2_get_rate_matching, + .config_init = aqr_gen4_config_init, + .config_aneg = aqr_config_aneg, + .config_intr = aqr_config_intr, + .handle_interrupt = aqr_handle_interrupt, + .read_status = aqr_gen2_read_status, + .get_tunable = aqr107_get_tunable, + .set_tunable = aqr107_set_tunable, + .suspend = aqr_gen1_suspend, + .resume = aqr_gen1_resume, + .get_sset_count = aqr107_get_sset_count, + .get_strings = aqr107_get_strings, + .get_stats = aqr107_get_stats, + .get_features = aqr115c_get_features, + .link_change_notify = aqr107_link_change_notify, + .led_brightness_set = aqr_phy_led_brightness_set, + .led_hw_is_supported = aqr_phy_led_hw_is_supported, + .led_hw_control_set = aqr_phy_led_hw_control_set, + .led_hw_control_get = aqr_phy_led_hw_control_get, + .led_polarity_set = aqr_phy_led_polarity_set, +}, +{ PHY_ID_MATCH_MODEL(PHY_ID_AQR115C), .name = "Aquantia AQR115C", .probe = aqr107_probe, - .get_rate_matching = aqr107_get_rate_matching, - .config_init = aqr113c_config_init, + .get_rate_matching = aqr_gen2_get_rate_matching, + .config_init = aqr_gen4_config_init, .config_aneg = aqr_config_aneg, .config_intr = aqr_config_intr, .handle_interrupt = aqr_handle_interrupt, - .read_status = aqr107_read_status, + .read_status = aqr_gen2_read_status, .get_tunable = aqr107_get_tunable, .set_tunable = aqr107_set_tunable, - .suspend = aqr107_suspend, - .resume = aqr107_resume, + .suspend = aqr_gen1_suspend, + .resume = aqr_gen1_resume, .get_sset_count = aqr107_get_sset_count, .get_strings = aqr107_get_strings, .get_stats = aqr107_get_stats, @@ -1410,16 +1399,16 @@ static struct phy_driver aqr_driver[] = { PHY_ID_MATCH_MODEL(PHY_ID_AQR813), .name = "Aquantia AQR813", .probe = aqr107_probe, - .get_rate_matching = aqr107_get_rate_matching, - .config_init = aqr107_config_init, + .get_rate_matching = aqr_gen2_get_rate_matching, + .config_init = aqr_gen4_config_init, .config_aneg = aqr_config_aneg, .config_intr = aqr_config_intr, .handle_interrupt = aqr_handle_interrupt, - .read_status = aqr107_read_status, + .read_status = aqr_gen2_read_status, .get_tunable = aqr107_get_tunable, .set_tunable = aqr107_set_tunable, - .suspend = aqr107_suspend, - .resume = aqr107_resume, + .suspend = aqr_gen1_suspend, + .resume = aqr_gen1_resume, .get_sset_count = aqr107_get_sset_count, .get_strings = aqr107_get_strings, .get_stats = aqr107_get_stats, @@ -1446,9 +1435,11 @@ static const struct mdio_device_id __maybe_unused aqr_tbl[] = { { PHY_ID_MATCH_MODEL(PHY_ID_AQR111B0) }, { PHY_ID_MATCH_MODEL(PHY_ID_AQR112) }, { PHY_ID_MATCH_MODEL(PHY_ID_AQR412) }, + { PHY_ID_MATCH_MODEL(PHY_ID_AQR412C) }, { PHY_ID_MATCH_MODEL(PHY_ID_AQR113) }, { PHY_ID_MATCH_MODEL(PHY_ID_AQR113C) }, { PHY_ID_MATCH_MODEL(PHY_ID_AQR114C) }, + { PHY_ID_MATCH_MODEL(PHY_ID_AQR115) }, { PHY_ID_MATCH_MODEL(PHY_ID_AQR115C) }, { PHY_ID_MATCH_MODEL(PHY_ID_AQR813) }, { } diff --git a/drivers/net/phy/as21xxx.c b/drivers/net/phy/as21xxx.c index 92697f43087d..005277360656 100644 --- a/drivers/net/phy/as21xxx.c +++ b/drivers/net/phy/as21xxx.c @@ -884,11 +884,12 @@ static int as21xxx_match_phy_device(struct phy_device *phydev, u32 phy_id; int ret; - /* Skip PHY that are not AS21xxx or already have firmware loaded */ - if (phydev->c45_ids.device_ids[MDIO_MMD_PCS] != PHY_ID_AS21XXX) + /* Skip PHY that are not AS21xxx */ + if (!phy_id_compare_vendor(phydev->c45_ids.device_ids[MDIO_MMD_PCS], + PHY_VENDOR_AEONSEMI)) return genphy_match_phy_device(phydev, phydrv); - /* Read PHY ID to handle firmware just loaded */ + /* Read PHY ID to handle firmware loaded or HW reset */ ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MII_PHYSID1); if (ret < 0) return ret; diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c index 033656d574b8..7f4e1a155a0f 100644 --- a/drivers/net/phy/fixed_phy.c +++ b/drivers/net/phy/fixed_phy.c @@ -10,7 +10,6 @@ #include <linux/kernel.h> #include <linux/module.h> -#include <linux/device/faux.h> #include <linux/list.h> #include <linux/mii.h> #include <linux/phy.h> @@ -40,7 +39,6 @@ struct fixed_phy { struct gpio_desc *link_gpiod; }; -static struct faux_device *fdev; static struct fixed_mdio_bus platform_fmb = { .phys = LIST_HEAD_INIT(platform_fmb.phys), }; @@ -77,8 +75,6 @@ static int fixed_mdio_read(struct mii_bus *bus, int phy_addr, int reg_num) list_for_each_entry(fp, &fmb->phys, node) { if (fp->addr == phy_addr) { - struct fixed_phy_status state; - fp->status.link = !fp->no_carrier; /* Issue callback if user registered it. */ @@ -88,9 +84,8 @@ static int fixed_mdio_read(struct mii_bus *bus, int phy_addr, int reg_num) /* Check the GPIO for change in status */ fixed_phy_update(fp); - state = fp->status; - return swphy_read_reg(reg_num, &state); + return swphy_read_reg(reg_num, &fp->status); } } @@ -160,9 +155,9 @@ static int fixed_phy_add_gpiod(unsigned int irq, int phy_addr, return 0; } -int fixed_phy_add(int phy_addr, const struct fixed_phy_status *status) +void fixed_phy_add(const struct fixed_phy_status *status) { - return fixed_phy_add_gpiod(PHY_POLL, phy_addr, status, NULL); + fixed_phy_add_gpiod(PHY_POLL, 0, status, NULL); } EXPORT_SYMBOL_GPL(fixed_phy_add); @@ -309,6 +304,7 @@ void fixed_phy_unregister(struct phy_device *phy) phy_device_remove(phy); of_node_put(phy->mdio.dev.of_node); fixed_phy_del(phy->mdio.addr); + phy_device_free(phy); } EXPORT_SYMBOL_GPL(fixed_phy_unregister); @@ -317,20 +313,13 @@ static int __init fixed_mdio_bus_init(void) struct fixed_mdio_bus *fmb = &platform_fmb; int ret; - fdev = faux_device_create("Fixed MDIO bus", NULL, NULL); - if (!fdev) - return -ENODEV; - fmb->mii_bus = mdiobus_alloc(); - if (fmb->mii_bus == NULL) { - ret = -ENOMEM; - goto err_mdiobus_reg; - } + if (!fmb->mii_bus) + return -ENOMEM; snprintf(fmb->mii_bus->id, MII_BUS_ID_SIZE, "fixed-0"); fmb->mii_bus->name = "Fixed MDIO Bus"; fmb->mii_bus->priv = fmb; - fmb->mii_bus->parent = &fdev->dev; fmb->mii_bus->read = &fixed_mdio_read; fmb->mii_bus->write = &fixed_mdio_write; fmb->mii_bus->phy_mask = ~0; @@ -343,8 +332,6 @@ static int __init fixed_mdio_bus_init(void) err_mdiobus_alloc: mdiobus_free(fmb->mii_bus); -err_mdiobus_reg: - faux_device_destroy(fdev); return ret; } module_init(fixed_mdio_bus_init); @@ -356,7 +343,6 @@ static void __exit fixed_mdio_bus_exit(void) mdiobus_unregister(fmb->mii_bus); mdiobus_free(fmb->mii_bus); - faux_device_destroy(fdev); list_for_each_entry_safe(fp, tmp, &fmb->phys, node) { list_del(&fp->node); diff --git a/drivers/net/phy/mediatek/mtk-2p5ge.c b/drivers/net/phy/mediatek/mtk-2p5ge.c index e147eab523ef..de8a41a1841d 100644 --- a/drivers/net/phy/mediatek/mtk-2p5ge.c +++ b/drivers/net/phy/mediatek/mtk-2p5ge.c @@ -249,8 +249,80 @@ static int mt798x_2p5ge_phy_get_rate_matching(struct phy_device *phydev, return RATE_MATCH_PAUSE; } +static const unsigned long supported_triggers = + BIT(TRIGGER_NETDEV_FULL_DUPLEX) | + BIT(TRIGGER_NETDEV_LINK) | + BIT(TRIGGER_NETDEV_LINK_10) | + BIT(TRIGGER_NETDEV_LINK_100) | + BIT(TRIGGER_NETDEV_LINK_1000) | + BIT(TRIGGER_NETDEV_LINK_2500) | + BIT(TRIGGER_NETDEV_RX) | + BIT(TRIGGER_NETDEV_TX); + +static int mt798x_2p5ge_phy_led_blink_set(struct phy_device *phydev, u8 index, + unsigned long *delay_on, + unsigned long *delay_off) +{ + bool blinking = false; + int err = 0; + + err = mtk_phy_led_num_dly_cfg(index, delay_on, delay_off, &blinking); + if (err < 0) + return err; + + err = mtk_phy_hw_led_blink_set(phydev, index, blinking); + if (err) + return err; + + if (blinking) + mtk_phy_hw_led_on_set(phydev, index, MTK_2P5GPHY_LED_ON_MASK, + false); + + return 0; +} + +static int mt798x_2p5ge_phy_led_brightness_set(struct phy_device *phydev, + u8 index, + enum led_brightness value) +{ + int err; + + err = mtk_phy_hw_led_blink_set(phydev, index, false); + if (err) + return err; + + return mtk_phy_hw_led_on_set(phydev, index, MTK_2P5GPHY_LED_ON_MASK, + (value != LED_OFF)); +} + +static int mt798x_2p5ge_phy_led_hw_is_supported(struct phy_device *phydev, + u8 index, unsigned long rules) +{ + return mtk_phy_led_hw_is_supported(phydev, index, rules, + supported_triggers); +} + +static int mt798x_2p5ge_phy_led_hw_control_get(struct phy_device *phydev, + u8 index, unsigned long *rules) +{ + return mtk_phy_led_hw_ctrl_get(phydev, index, rules, + MTK_2P5GPHY_LED_ON_SET, + MTK_2P5GPHY_LED_RX_BLINK_SET, + MTK_2P5GPHY_LED_TX_BLINK_SET); +}; + +static int mt798x_2p5ge_phy_led_hw_control_set(struct phy_device *phydev, + u8 index, unsigned long rules) +{ + return mtk_phy_led_hw_ctrl_set(phydev, index, rules, + MTK_2P5GPHY_LED_ON_SET, + MTK_2P5GPHY_LED_RX_BLINK_SET, + MTK_2P5GPHY_LED_TX_BLINK_SET); +}; + static int mt798x_2p5ge_phy_probe(struct phy_device *phydev) { + struct mtk_socphy_priv *priv; struct pinctrl *pinctrl; int ret; @@ -273,19 +345,34 @@ static int mt798x_2p5ge_phy_probe(struct phy_device *phydev) if (ret < 0) return ret; - /* Setup LED */ + /* Setup LED. On default, LED0 is on/off when link is up/down. As for + * LED1, it blinks as tx/rx transmission takes place. + */ phy_set_bits_mmd(phydev, MDIO_MMD_VEND2, MTK_PHY_LED0_ON_CTRL, - MTK_PHY_LED_ON_POLARITY | MTK_PHY_LED_ON_LINK10 | - MTK_PHY_LED_ON_LINK100 | MTK_PHY_LED_ON_LINK1000 | - MTK_PHY_LED_ON_LINK2500); - phy_set_bits_mmd(phydev, MDIO_MMD_VEND2, MTK_PHY_LED1_ON_CTRL, - MTK_PHY_LED_ON_FDX | MTK_PHY_LED_ON_HDX); + MTK_PHY_LED_ON_POLARITY | MTK_2P5GPHY_LED_ON_SET); + phy_clear_bits_mmd(phydev, MDIO_MMD_VEND2, MTK_PHY_LED0_BLINK_CTRL, + MTK_2P5GPHY_LED_TX_BLINK_SET | + MTK_2P5GPHY_LED_RX_BLINK_SET); + phy_clear_bits_mmd(phydev, MDIO_MMD_VEND2, MTK_PHY_LED1_ON_CTRL, + MTK_PHY_LED_ON_FDX | MTK_PHY_LED_ON_HDX | + MTK_2P5GPHY_LED_ON_SET); + phy_set_bits_mmd(phydev, MDIO_MMD_VEND2, MTK_PHY_LED1_BLINK_CTRL, + MTK_2P5GPHY_LED_TX_BLINK_SET | + MTK_2P5GPHY_LED_RX_BLINK_SET); /* Switch pinctrl after setting polarity to avoid bogus blinking */ pinctrl = devm_pinctrl_get_select(&phydev->mdio.dev, "i2p5gbe-led"); if (IS_ERR(pinctrl)) dev_err(&phydev->mdio.dev, "Fail to set LED pins!\n"); + priv = devm_kzalloc(&phydev->mdio.dev, sizeof(struct mtk_socphy_priv), + GFP_KERNEL); + if (!priv) + return -ENOMEM; + phydev->priv = priv; + + mtk_phy_leds_state_init(phydev); + return 0; } @@ -303,6 +390,11 @@ static struct phy_driver mtk_2p5gephy_driver[] = { .resume = genphy_resume, .read_page = mtk_phy_read_page, .write_page = mtk_phy_write_page, + .led_blink_set = mt798x_2p5ge_phy_led_blink_set, + .led_brightness_set = mt798x_2p5ge_phy_led_brightness_set, + .led_hw_is_supported = mt798x_2p5ge_phy_led_hw_is_supported, + .led_hw_control_get = mt798x_2p5ge_phy_led_hw_control_get, + .led_hw_control_set = mt798x_2p5ge_phy_led_hw_control_set, }, }; diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 605b0315b4cb..e403cbbcead5 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -362,6 +362,8 @@ /* Delay used to get the second part from the LTC */ #define LAN8841_GET_SEC_LTC_DELAY (500 * NSEC_PER_MSEC) +#define LAN8842_REV_8832 0x8832 + struct kszphy_hw_stat { const char *string; u8 reg; @@ -448,6 +450,19 @@ struct kszphy_priv { struct kszphy_phy_stats phy_stats; }; +struct lan8842_phy_stats { + u64 rx_packets; + u64 rx_errors; + u64 tx_packets; + u64 tx_errors; +}; + +struct lan8842_priv { + struct lan8842_phy_stats phy_stats; + struct kszphy_ptp_priv ptp_priv; + u16 rev; +}; + static const struct kszphy_type lan8814_type = { .led_mode_reg = ~LAN8814_LED_CTRL_1, .cable_diag_reg = LAN8814_CABLE_DIAG, @@ -2790,6 +2805,52 @@ static int ksz886x_cable_test_get_status(struct phy_device *phydev, return ret; } +/** + * LAN8814_PAGE_AFE_PMA - Selects Extended Page 1. + * + * This page appears to control the Analog Front-End (AFE) and Physical + * Medium Attachment (PMA) layers. It is used to access registers like + * LAN8814_PD_CONTROLS and LAN8814_LINK_QUALITY. + */ +#define LAN8814_PAGE_AFE_PMA 1 + +/** + * LAN8814_PAGE_PCS_DIGITAL - Selects Extended Page 2. + * + * This page seems dedicated to the Physical Coding Sublayer (PCS) and other + * digital logic. It is used for MDI-X alignment (LAN8814_ALIGN_SWAP) and EEE + * state (LAN8814_EEE_STATE) in the LAN8814, and is repurposed for statistics + * and self-test counters in the LAN8842. + */ +#define LAN8814_PAGE_PCS_DIGITAL 2 + +/** + * LAN8814_PAGE_COMMON_REGS - Selects Extended Page 4. + * + * This page contains device-common registers that affect the entire chip. + * It includes controls for chip-level resets, strap status, GPIO, + * QSGMII, the shared 1588 PTP block, and the PVT monitor. + */ +#define LAN8814_PAGE_COMMON_REGS 4 + +/** + * LAN8814_PAGE_PORT_REGS - Selects Extended Page 5. + * + * This page contains port-specific registers that must be accessed + * on a per-port basis. It includes controls for port LEDs, QSGMII PCS, + * rate adaptation FIFOs, and the per-port 1588 TSU block. + */ +#define LAN8814_PAGE_PORT_REGS 5 + +/** + * LAN8814_PAGE_SYSTEM_CTRL - Selects Extended Page 31. + * + * This page appears to hold fundamental system or global controls. In the + * driver, it is used by the related LAN8804 to access the + * LAN8814_CLOCK_MANAGEMENT register. + */ +#define LAN8814_PAGE_SYSTEM_CTRL 31 + #define LAN_EXT_PAGE_ACCESS_CONTROL 0x16 #define LAN_EXT_PAGE_ACCESS_ADDRESS_DATA 0x17 #define LAN_EXT_PAGE_ACCESS_CTRL_EP_FUNC 0x4000 @@ -2840,6 +2901,27 @@ static int lanphy_write_page_reg(struct phy_device *phydev, int page, u16 addr, return val; } +static int lanphy_modify_page_reg(struct phy_device *phydev, int page, u16 addr, + u16 mask, u16 set) +{ + int ret; + + phy_lock_mdio_bus(phydev); + __phy_write(phydev, LAN_EXT_PAGE_ACCESS_CONTROL, page); + __phy_write(phydev, LAN_EXT_PAGE_ACCESS_ADDRESS_DATA, addr); + __phy_write(phydev, LAN_EXT_PAGE_ACCESS_CONTROL, + (page | LAN_EXT_PAGE_ACCESS_CTRL_EP_FUNC)); + ret = __phy_modify_changed(phydev, LAN_EXT_PAGE_ACCESS_ADDRESS_DATA, + mask, set); + phy_unlock_mdio_bus(phydev); + + if (ret < 0) + phydev_err(phydev, "__phy_modify_changed() failed: %pe\n", + ERR_PTR(ret)); + + return ret; +} + static int lan8814_config_ts_intr(struct phy_device *phydev, bool enable) { u16 val = 0; @@ -2850,35 +2932,46 @@ static int lan8814_config_ts_intr(struct phy_device *phydev, bool enable) PTP_TSU_INT_EN_PTP_RX_TS_EN_ | PTP_TSU_INT_EN_PTP_RX_TS_OVRFL_EN_; - return lanphy_write_page_reg(phydev, 5, PTP_TSU_INT_EN, val); + return lanphy_write_page_reg(phydev, LAN8814_PAGE_PORT_REGS, + PTP_TSU_INT_EN, val); } static void lan8814_ptp_rx_ts_get(struct phy_device *phydev, u32 *seconds, u32 *nano_seconds, u16 *seq_id) { - *seconds = lanphy_read_page_reg(phydev, 5, PTP_RX_INGRESS_SEC_HI); + *seconds = lanphy_read_page_reg(phydev, LAN8814_PAGE_PORT_REGS, + PTP_RX_INGRESS_SEC_HI); *seconds = (*seconds << 16) | - lanphy_read_page_reg(phydev, 5, PTP_RX_INGRESS_SEC_LO); + lanphy_read_page_reg(phydev, LAN8814_PAGE_PORT_REGS, + PTP_RX_INGRESS_SEC_LO); - *nano_seconds = lanphy_read_page_reg(phydev, 5, PTP_RX_INGRESS_NS_HI); + *nano_seconds = lanphy_read_page_reg(phydev, LAN8814_PAGE_PORT_REGS, + PTP_RX_INGRESS_NS_HI); *nano_seconds = ((*nano_seconds & 0x3fff) << 16) | - lanphy_read_page_reg(phydev, 5, PTP_RX_INGRESS_NS_LO); + lanphy_read_page_reg(phydev, LAN8814_PAGE_PORT_REGS, + PTP_RX_INGRESS_NS_LO); - *seq_id = lanphy_read_page_reg(phydev, 5, PTP_RX_MSG_HEADER2); + *seq_id = lanphy_read_page_reg(phydev, LAN8814_PAGE_PORT_REGS, + PTP_RX_MSG_HEADER2); } static void lan8814_ptp_tx_ts_get(struct phy_device *phydev, u32 *seconds, u32 *nano_seconds, u16 *seq_id) { - *seconds = lanphy_read_page_reg(phydev, 5, PTP_TX_EGRESS_SEC_HI); + *seconds = lanphy_read_page_reg(phydev, LAN8814_PAGE_PORT_REGS, + PTP_TX_EGRESS_SEC_HI); *seconds = *seconds << 16 | - lanphy_read_page_reg(phydev, 5, PTP_TX_EGRESS_SEC_LO); + lanphy_read_page_reg(phydev, LAN8814_PAGE_PORT_REGS, + PTP_TX_EGRESS_SEC_LO); - *nano_seconds = lanphy_read_page_reg(phydev, 5, PTP_TX_EGRESS_NS_HI); + *nano_seconds = lanphy_read_page_reg(phydev, LAN8814_PAGE_PORT_REGS, + PTP_TX_EGRESS_NS_HI); *nano_seconds = ((*nano_seconds & 0x3fff) << 16) | - lanphy_read_page_reg(phydev, 5, PTP_TX_EGRESS_NS_LO); + lanphy_read_page_reg(phydev, LAN8814_PAGE_PORT_REGS, + PTP_TX_EGRESS_NS_LO); - *seq_id = lanphy_read_page_reg(phydev, 5, PTP_TX_MSG_HEADER2); + *seq_id = lanphy_read_page_reg(phydev, LAN8814_PAGE_PORT_REGS, + PTP_TX_MSG_HEADER2); } static int lan8814_ts_info(struct mii_timestamper *mii_ts, struct kernel_ethtool_ts_info *info) @@ -2912,11 +3005,11 @@ static void lan8814_flush_fifo(struct phy_device *phydev, bool egress) int i; for (i = 0; i < FIFO_SIZE; ++i) - lanphy_read_page_reg(phydev, 5, + lanphy_read_page_reg(phydev, LAN8814_PAGE_PORT_REGS, egress ? PTP_TX_MSG_HEADER2 : PTP_RX_MSG_HEADER2); /* Read to clear overflow status bit */ - lanphy_read_page_reg(phydev, 5, PTP_TSU_INT_STS); + lanphy_read_page_reg(phydev, LAN8814_PAGE_PORT_REGS, PTP_TSU_INT_STS); } static int lan8814_hwtstamp(struct mii_timestamper *mii_ts, @@ -2928,7 +3021,6 @@ static int lan8814_hwtstamp(struct mii_timestamper *mii_ts, struct lan8814_ptp_rx_ts *rx_ts, *tmp; int txcfg = 0, rxcfg = 0; int pkt_ts_enable; - int tx_mod; ptp_priv->hwts_tx_type = config->tx_type; ptp_priv->rx_filter = config->rx_filter; @@ -2967,21 +3059,28 @@ static int lan8814_hwtstamp(struct mii_timestamper *mii_ts, rxcfg |= PTP_RX_PARSE_CONFIG_IPV4_EN_ | PTP_RX_PARSE_CONFIG_IPV6_EN_; txcfg |= PTP_TX_PARSE_CONFIG_IPV4_EN_ | PTP_TX_PARSE_CONFIG_IPV6_EN_; } - lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_RX_PARSE_CONFIG, rxcfg); - lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_PARSE_CONFIG, txcfg); + lanphy_write_page_reg(ptp_priv->phydev, LAN8814_PAGE_PORT_REGS, + PTP_RX_PARSE_CONFIG, rxcfg); + lanphy_write_page_reg(ptp_priv->phydev, LAN8814_PAGE_PORT_REGS, + PTP_TX_PARSE_CONFIG, txcfg); pkt_ts_enable = PTP_TIMESTAMP_EN_SYNC_ | PTP_TIMESTAMP_EN_DREQ_ | PTP_TIMESTAMP_EN_PDREQ_ | PTP_TIMESTAMP_EN_PDRES_; - lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_RX_TIMESTAMP_EN, pkt_ts_enable); - lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_TIMESTAMP_EN, pkt_ts_enable); + lanphy_write_page_reg(ptp_priv->phydev, LAN8814_PAGE_PORT_REGS, + PTP_RX_TIMESTAMP_EN, pkt_ts_enable); + lanphy_write_page_reg(ptp_priv->phydev, LAN8814_PAGE_PORT_REGS, + PTP_TX_TIMESTAMP_EN, pkt_ts_enable); - tx_mod = lanphy_read_page_reg(ptp_priv->phydev, 5, PTP_TX_MOD); if (ptp_priv->hwts_tx_type == HWTSTAMP_TX_ONESTEP_SYNC) { - lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_MOD, - tx_mod | PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_); + lanphy_modify_page_reg(ptp_priv->phydev, LAN8814_PAGE_PORT_REGS, + PTP_TX_MOD, + PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_, + PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_); } else if (ptp_priv->hwts_tx_type == HWTSTAMP_TX_ON) { - lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_MOD, - tx_mod & ~PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_); + lanphy_modify_page_reg(ptp_priv->phydev, LAN8814_PAGE_PORT_REGS, + PTP_TX_MOD, + PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_, + 0); } if (config->rx_filter != HWTSTAMP_FILTER_NONE) @@ -3103,29 +3202,41 @@ static bool lan8814_rxtstamp(struct mii_timestamper *mii_ts, struct sk_buff *skb static void lan8814_ptp_clock_set(struct phy_device *phydev, time64_t sec, u32 nsec) { - lanphy_write_page_reg(phydev, 4, PTP_CLOCK_SET_SEC_LO, lower_16_bits(sec)); - lanphy_write_page_reg(phydev, 4, PTP_CLOCK_SET_SEC_MID, upper_16_bits(sec)); - lanphy_write_page_reg(phydev, 4, PTP_CLOCK_SET_SEC_HI, upper_32_bits(sec)); - lanphy_write_page_reg(phydev, 4, PTP_CLOCK_SET_NS_LO, lower_16_bits(nsec)); - lanphy_write_page_reg(phydev, 4, PTP_CLOCK_SET_NS_HI, upper_16_bits(nsec)); + lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + PTP_CLOCK_SET_SEC_LO, lower_16_bits(sec)); + lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + PTP_CLOCK_SET_SEC_MID, upper_16_bits(sec)); + lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + PTP_CLOCK_SET_SEC_HI, upper_32_bits(sec)); + lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + PTP_CLOCK_SET_NS_LO, lower_16_bits(nsec)); + lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + PTP_CLOCK_SET_NS_HI, upper_16_bits(nsec)); - lanphy_write_page_reg(phydev, 4, PTP_CMD_CTL, PTP_CMD_CTL_PTP_CLOCK_LOAD_); + lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, PTP_CMD_CTL, + PTP_CMD_CTL_PTP_CLOCK_LOAD_); } static void lan8814_ptp_clock_get(struct phy_device *phydev, time64_t *sec, u32 *nsec) { - lanphy_write_page_reg(phydev, 4, PTP_CMD_CTL, PTP_CMD_CTL_PTP_CLOCK_READ_); + lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, PTP_CMD_CTL, + PTP_CMD_CTL_PTP_CLOCK_READ_); - *sec = lanphy_read_page_reg(phydev, 4, PTP_CLOCK_READ_SEC_HI); + *sec = lanphy_read_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + PTP_CLOCK_READ_SEC_HI); *sec <<= 16; - *sec |= lanphy_read_page_reg(phydev, 4, PTP_CLOCK_READ_SEC_MID); + *sec |= lanphy_read_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + PTP_CLOCK_READ_SEC_MID); *sec <<= 16; - *sec |= lanphy_read_page_reg(phydev, 4, PTP_CLOCK_READ_SEC_LO); + *sec |= lanphy_read_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + PTP_CLOCK_READ_SEC_LO); - *nsec = lanphy_read_page_reg(phydev, 4, PTP_CLOCK_READ_NS_HI); + *nsec = lanphy_read_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + PTP_CLOCK_READ_NS_HI); *nsec <<= 16; - *nsec |= lanphy_read_page_reg(phydev, 4, PTP_CLOCK_READ_NS_LO); + *nsec |= lanphy_read_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + PTP_CLOCK_READ_NS_LO); } static int lan8814_ptpci_gettime64(struct ptp_clock_info *ptpci, @@ -3164,14 +3275,18 @@ static void lan8814_ptp_set_target(struct phy_device *phydev, int event, s64 start_sec, u32 start_nsec) { /* Set the start time */ - lanphy_write_page_reg(phydev, 4, LAN8814_PTP_CLOCK_TARGET_SEC_LO(event), + lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + LAN8814_PTP_CLOCK_TARGET_SEC_LO(event), lower_16_bits(start_sec)); - lanphy_write_page_reg(phydev, 4, LAN8814_PTP_CLOCK_TARGET_SEC_HI(event), + lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + LAN8814_PTP_CLOCK_TARGET_SEC_HI(event), upper_16_bits(start_sec)); - lanphy_write_page_reg(phydev, 4, LAN8814_PTP_CLOCK_TARGET_NS_LO(event), + lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + LAN8814_PTP_CLOCK_TARGET_NS_LO(event), lower_16_bits(start_nsec)); - lanphy_write_page_reg(phydev, 4, LAN8814_PTP_CLOCK_TARGET_NS_HI(event), + lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + LAN8814_PTP_CLOCK_TARGET_NS_HI(event), upper_16_bits(start_nsec) & 0x3fff); } @@ -3269,9 +3384,11 @@ static void lan8814_ptp_clock_step(struct phy_device *phydev, adjustment_value_lo = adjustment_value & 0xffff; adjustment_value_hi = (adjustment_value >> 16) & 0x3fff; - lanphy_write_page_reg(phydev, 4, PTP_LTC_STEP_ADJ_LO, + lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + PTP_LTC_STEP_ADJ_LO, adjustment_value_lo); - lanphy_write_page_reg(phydev, 4, PTP_LTC_STEP_ADJ_HI, + lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + PTP_LTC_STEP_ADJ_HI, PTP_LTC_STEP_ADJ_DIR_ | adjustment_value_hi); seconds -= ((s32)adjustment_value); @@ -3289,9 +3406,11 @@ static void lan8814_ptp_clock_step(struct phy_device *phydev, adjustment_value_lo = adjustment_value & 0xffff; adjustment_value_hi = (adjustment_value >> 16) & 0x3fff; - lanphy_write_page_reg(phydev, 4, PTP_LTC_STEP_ADJ_LO, + lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + PTP_LTC_STEP_ADJ_LO, adjustment_value_lo); - lanphy_write_page_reg(phydev, 4, PTP_LTC_STEP_ADJ_HI, + lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + PTP_LTC_STEP_ADJ_HI, adjustment_value_hi); seconds += ((s32)adjustment_value); @@ -3299,8 +3418,8 @@ static void lan8814_ptp_clock_step(struct phy_device *phydev, set_seconds += adjustment_value; lan8814_ptp_update_target(phydev, set_seconds); } - lanphy_write_page_reg(phydev, 4, PTP_CMD_CTL, - PTP_CMD_CTL_PTP_LTC_STEP_SEC_); + lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + PTP_CMD_CTL, PTP_CMD_CTL_PTP_LTC_STEP_SEC_); } if (nano_seconds) { u16 nano_seconds_lo; @@ -3309,12 +3428,14 @@ static void lan8814_ptp_clock_step(struct phy_device *phydev, nano_seconds_lo = nano_seconds & 0xffff; nano_seconds_hi = (nano_seconds >> 16) & 0x3fff; - lanphy_write_page_reg(phydev, 4, PTP_LTC_STEP_ADJ_LO, + lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + PTP_LTC_STEP_ADJ_LO, nano_seconds_lo); - lanphy_write_page_reg(phydev, 4, PTP_LTC_STEP_ADJ_HI, + lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + PTP_LTC_STEP_ADJ_HI, PTP_LTC_STEP_ADJ_DIR_ | nano_seconds_hi); - lanphy_write_page_reg(phydev, 4, PTP_CMD_CTL, + lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, PTP_CMD_CTL, PTP_CMD_CTL_PTP_LTC_STEP_NSEC_); } } @@ -3356,8 +3477,10 @@ static int lan8814_ptpci_adjfine(struct ptp_clock_info *ptpci, long scaled_ppm) kszphy_rate_adj_hi |= PTP_CLOCK_RATE_ADJ_DIR_; mutex_lock(&shared->shared_lock); - lanphy_write_page_reg(phydev, 4, PTP_CLOCK_RATE_ADJ_HI, kszphy_rate_adj_hi); - lanphy_write_page_reg(phydev, 4, PTP_CLOCK_RATE_ADJ_LO, kszphy_rate_adj_lo); + lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, PTP_CLOCK_RATE_ADJ_HI, + kszphy_rate_adj_hi); + lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, PTP_CLOCK_RATE_ADJ_LO, + kszphy_rate_adj_lo); mutex_unlock(&shared->shared_lock); return 0; @@ -3366,17 +3489,17 @@ static int lan8814_ptpci_adjfine(struct ptp_clock_info *ptpci, long scaled_ppm) static void lan8814_ptp_set_reload(struct phy_device *phydev, int event, s64 period_sec, u32 period_nsec) { - lanphy_write_page_reg(phydev, 4, + lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, LAN8814_PTP_CLOCK_TARGET_RELOAD_SEC_LO(event), lower_16_bits(period_sec)); - lanphy_write_page_reg(phydev, 4, + lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, LAN8814_PTP_CLOCK_TARGET_RELOAD_SEC_HI(event), upper_16_bits(period_sec)); - lanphy_write_page_reg(phydev, 4, + lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, LAN8814_PTP_CLOCK_TARGET_RELOAD_NS_LO(event), lower_16_bits(period_nsec)); - lanphy_write_page_reg(phydev, 4, + lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, LAN8814_PTP_CLOCK_TARGET_RELOAD_NS_HI(event), upper_16_bits(period_nsec) & 0x3fff); } @@ -3384,73 +3507,72 @@ static void lan8814_ptp_set_reload(struct phy_device *phydev, int event, static void lan8814_ptp_enable_event(struct phy_device *phydev, int event, int pulse_width) { - u16 val; - - val = lanphy_read_page_reg(phydev, 4, LAN8814_PTP_GENERAL_CONFIG); - /* Set the pulse width of the event */ - val &= ~(LAN8814_PTP_GENERAL_CONFIG_LTC_EVENT_MASK(event)); - /* Make sure that the target clock will be incremented each time when + /* Set the pulse width of the event, + * Make sure that the target clock will be incremented each time when * local time reaches or pass it + * Set the polarity high */ - val |= LAN8814_PTP_GENERAL_CONFIG_LTC_EVENT_SET(event, pulse_width); - val &= ~(LAN8814_PTP_GENERAL_CONFIG_RELOAD_ADD_X(event)); - /* Set the polarity high */ - val |= LAN8814_PTP_GENERAL_CONFIG_POLARITY_X(event); - lanphy_write_page_reg(phydev, 4, LAN8814_PTP_GENERAL_CONFIG, val); + lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, LAN8814_PTP_GENERAL_CONFIG, + LAN8814_PTP_GENERAL_CONFIG_LTC_EVENT_MASK(event) | + LAN8814_PTP_GENERAL_CONFIG_LTC_EVENT_SET(event, pulse_width) | + LAN8814_PTP_GENERAL_CONFIG_RELOAD_ADD_X(event) | + LAN8814_PTP_GENERAL_CONFIG_POLARITY_X(event), + LAN8814_PTP_GENERAL_CONFIG_LTC_EVENT_SET(event, pulse_width) | + LAN8814_PTP_GENERAL_CONFIG_POLARITY_X(event)); } static void lan8814_ptp_disable_event(struct phy_device *phydev, int event) { - u16 val; - /* Set target to too far in the future, effectively disabling it */ lan8814_ptp_set_target(phydev, event, 0xFFFFFFFF, 0); /* And then reload once it recheas the target */ - val = lanphy_read_page_reg(phydev, 4, LAN8814_PTP_GENERAL_CONFIG); - val |= LAN8814_PTP_GENERAL_CONFIG_RELOAD_ADD_X(event); - lanphy_write_page_reg(phydev, 4, LAN8814_PTP_GENERAL_CONFIG, val); + lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, LAN8814_PTP_GENERAL_CONFIG, + LAN8814_PTP_GENERAL_CONFIG_RELOAD_ADD_X(event), + LAN8814_PTP_GENERAL_CONFIG_RELOAD_ADD_X(event)); } static void lan8814_ptp_perout_off(struct phy_device *phydev, int pin) { - u16 val; - /* Disable gpio alternate function, * 1: select as gpio, * 0: select alt func */ - val = lanphy_read_page_reg(phydev, 4, LAN8814_GPIO_EN_ADDR(pin)); - val |= LAN8814_GPIO_EN_BIT(pin); - lanphy_write_page_reg(phydev, 4, LAN8814_GPIO_EN_ADDR(pin), val); + lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + LAN8814_GPIO_EN_ADDR(pin), + LAN8814_GPIO_EN_BIT(pin), + LAN8814_GPIO_EN_BIT(pin)); - val = lanphy_read_page_reg(phydev, 4, LAN8814_GPIO_DIR_ADDR(pin)); - val &= ~LAN8814_GPIO_DIR_BIT(pin); - lanphy_write_page_reg(phydev, 4, LAN8814_GPIO_DIR_ADDR(pin), val); + lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + LAN8814_GPIO_DIR_ADDR(pin), + LAN8814_GPIO_DIR_BIT(pin), + 0); - val = lanphy_read_page_reg(phydev, 4, LAN8814_GPIO_BUF_ADDR(pin)); - val &= ~LAN8814_GPIO_BUF_BIT(pin); - lanphy_write_page_reg(phydev, 4, LAN8814_GPIO_BUF_ADDR(pin), val); + lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + LAN8814_GPIO_BUF_ADDR(pin), + LAN8814_GPIO_BUF_BIT(pin), + 0); } static void lan8814_ptp_perout_on(struct phy_device *phydev, int pin) { - int val; - /* Set as gpio output */ - val = lanphy_read_page_reg(phydev, 4, LAN8814_GPIO_DIR_ADDR(pin)); - val |= LAN8814_GPIO_DIR_BIT(pin); - lanphy_write_page_reg(phydev, 4, LAN8814_GPIO_DIR_ADDR(pin), val); + lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + LAN8814_GPIO_DIR_ADDR(pin), + LAN8814_GPIO_DIR_BIT(pin), + LAN8814_GPIO_DIR_BIT(pin)); /* Enable gpio 0:for alternate function, 1:gpio */ - val = lanphy_read_page_reg(phydev, 4, LAN8814_GPIO_EN_ADDR(pin)); - val &= ~LAN8814_GPIO_EN_BIT(pin); - lanphy_write_page_reg(phydev, 4, LAN8814_GPIO_EN_ADDR(pin), val); + lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + LAN8814_GPIO_EN_ADDR(pin), + LAN8814_GPIO_EN_BIT(pin), + 0); /* Set buffer type to push pull */ - val = lanphy_read_page_reg(phydev, 4, LAN8814_GPIO_BUF_ADDR(pin)); - val |= LAN8814_GPIO_BUF_BIT(pin); - lanphy_write_page_reg(phydev, 4, LAN8814_GPIO_BUF_ADDR(pin), val); + lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + LAN8814_GPIO_BUF_ADDR(pin), + LAN8814_GPIO_BUF_BIT(pin), + LAN8814_GPIO_BUF_BIT(pin)); } static int lan8814_ptp_perout(struct ptp_clock_info *ptpci, @@ -3565,61 +3687,64 @@ static int lan8814_ptp_perout(struct ptp_clock_info *ptpci, static void lan8814_ptp_extts_on(struct phy_device *phydev, int pin, u32 flags) { - u16 tmp; - /* Set as gpio input */ - tmp = lanphy_read_page_reg(phydev, 4, LAN8814_GPIO_DIR_ADDR(pin)); - tmp &= ~LAN8814_GPIO_DIR_BIT(pin); - lanphy_write_page_reg(phydev, 4, LAN8814_GPIO_DIR_ADDR(pin), tmp); + lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + LAN8814_GPIO_DIR_ADDR(pin), + LAN8814_GPIO_DIR_BIT(pin), + 0); /* Map the pin to ltc pin 0 of the capture map registers */ - tmp = lanphy_read_page_reg(phydev, 4, PTP_GPIO_CAP_MAP_LO); - tmp |= pin; - lanphy_write_page_reg(phydev, 4, PTP_GPIO_CAP_MAP_LO, tmp); + lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + PTP_GPIO_CAP_MAP_LO, pin, pin); /* Enable capture on the edges of the ltc pin */ - tmp = lanphy_read_page_reg(phydev, 4, PTP_GPIO_CAP_EN); if (flags & PTP_RISING_EDGE) - tmp |= PTP_GPIO_CAP_EN_GPIO_RE_CAPTURE_ENABLE(0); + lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + PTP_GPIO_CAP_EN, + PTP_GPIO_CAP_EN_GPIO_RE_CAPTURE_ENABLE(0), + PTP_GPIO_CAP_EN_GPIO_RE_CAPTURE_ENABLE(0)); if (flags & PTP_FALLING_EDGE) - tmp |= PTP_GPIO_CAP_EN_GPIO_FE_CAPTURE_ENABLE(0); - lanphy_write_page_reg(phydev, 4, PTP_GPIO_CAP_EN, tmp); + lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + PTP_GPIO_CAP_EN, + PTP_GPIO_CAP_EN_GPIO_FE_CAPTURE_ENABLE(0), + PTP_GPIO_CAP_EN_GPIO_FE_CAPTURE_ENABLE(0)); /* Enable interrupt top interrupt */ - tmp = lanphy_read_page_reg(phydev, 4, PTP_COMMON_INT_ENA); - tmp |= PTP_COMMON_INT_ENA_GPIO_CAP_EN; - lanphy_write_page_reg(phydev, 4, PTP_COMMON_INT_ENA, tmp); + lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, PTP_COMMON_INT_ENA, + PTP_COMMON_INT_ENA_GPIO_CAP_EN, + PTP_COMMON_INT_ENA_GPIO_CAP_EN); } static void lan8814_ptp_extts_off(struct phy_device *phydev, int pin) { - u16 tmp; - /* Set as gpio out */ - tmp = lanphy_read_page_reg(phydev, 4, LAN8814_GPIO_DIR_ADDR(pin)); - tmp |= LAN8814_GPIO_DIR_BIT(pin); - lanphy_write_page_reg(phydev, 4, LAN8814_GPIO_DIR_ADDR(pin), tmp); + lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + LAN8814_GPIO_DIR_ADDR(pin), + LAN8814_GPIO_DIR_BIT(pin), + LAN8814_GPIO_DIR_BIT(pin)); /* Enable alternate, 0:for alternate function, 1:gpio */ - tmp = lanphy_read_page_reg(phydev, 4, LAN8814_GPIO_EN_ADDR(pin)); - tmp &= ~LAN8814_GPIO_EN_BIT(pin); - lanphy_write_page_reg(phydev, 4, LAN8814_GPIO_EN_ADDR(pin), tmp); + lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + LAN8814_GPIO_EN_ADDR(pin), + LAN8814_GPIO_EN_BIT(pin), + 0); /* Clear the mapping of pin to registers 0 of the capture registers */ - tmp = lanphy_read_page_reg(phydev, 4, PTP_GPIO_CAP_MAP_LO); - tmp &= ~GENMASK(3, 0); - lanphy_write_page_reg(phydev, 4, PTP_GPIO_CAP_MAP_LO, tmp); + lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + PTP_GPIO_CAP_MAP_LO, + GENMASK(3, 0), + 0); /* Disable capture on both of the edges */ - tmp = lanphy_read_page_reg(phydev, 4, PTP_GPIO_CAP_EN); - tmp &= ~PTP_GPIO_CAP_EN_GPIO_RE_CAPTURE_ENABLE(pin); - tmp &= ~PTP_GPIO_CAP_EN_GPIO_FE_CAPTURE_ENABLE(pin); - lanphy_write_page_reg(phydev, 4, PTP_GPIO_CAP_EN, tmp); + lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, PTP_GPIO_CAP_EN, + PTP_GPIO_CAP_EN_GPIO_RE_CAPTURE_ENABLE(pin) | + PTP_GPIO_CAP_EN_GPIO_FE_CAPTURE_ENABLE(pin), + 0); /* Disable interrupt top interrupt */ - tmp = lanphy_read_page_reg(phydev, 4, PTP_COMMON_INT_ENA); - tmp &= ~PTP_COMMON_INT_ENA_GPIO_CAP_EN; - lanphy_write_page_reg(phydev, 4, PTP_COMMON_INT_ENA, tmp); + lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, PTP_COMMON_INT_ENA, + PTP_COMMON_INT_ENA_GPIO_CAP_EN, + 0); } static int lan8814_ptp_extts(struct ptp_clock_info *ptpci, @@ -3749,7 +3874,8 @@ static void lan8814_get_tx_ts(struct kszphy_ptp_priv *ptp_priv) /* If other timestamps are available in the FIFO, * process them. */ - reg = lanphy_read_page_reg(phydev, 5, PTP_CAP_INFO); + reg = lanphy_read_page_reg(phydev, LAN8814_PAGE_PORT_REGS, + PTP_CAP_INFO); } while (PTP_CAP_INFO_TX_TS_CNT_GET_(reg) > 0); } @@ -3822,7 +3948,8 @@ static void lan8814_get_rx_ts(struct kszphy_ptp_priv *ptp_priv) /* If other timestamps are available in the FIFO, * process them. */ - reg = lanphy_read_page_reg(phydev, 5, PTP_CAP_INFO); + reg = lanphy_read_page_reg(phydev, LAN8814_PAGE_PORT_REGS, + PTP_CAP_INFO); } while (PTP_CAP_INFO_RX_TS_CNT_GET_(reg) > 0); } @@ -3859,31 +3986,40 @@ static int lan8814_gpio_process_cap(struct lan8814_shared_priv *shared) /* This is 0 because whatever was the input pin it was mapped it to * ltc gpio pin 0 */ - tmp = lanphy_read_page_reg(phydev, 4, PTP_GPIO_SEL); - tmp |= PTP_GPIO_SEL_GPIO_SEL(0); - lanphy_write_page_reg(phydev, 4, PTP_GPIO_SEL, tmp); + lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, PTP_GPIO_SEL, + PTP_GPIO_SEL_GPIO_SEL(0), + PTP_GPIO_SEL_GPIO_SEL(0)); - tmp = lanphy_read_page_reg(phydev, 4, PTP_GPIO_CAP_STS); + tmp = lanphy_read_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + PTP_GPIO_CAP_STS); if (!(tmp & PTP_GPIO_CAP_STS_PTP_GPIO_RE_STS(0)) && !(tmp & PTP_GPIO_CAP_STS_PTP_GPIO_FE_STS(0))) return -1; if (tmp & BIT(0)) { - sec = lanphy_read_page_reg(phydev, 4, PTP_GPIO_RE_LTC_SEC_HI_CAP); + sec = lanphy_read_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + PTP_GPIO_RE_LTC_SEC_HI_CAP); sec <<= 16; - sec |= lanphy_read_page_reg(phydev, 4, PTP_GPIO_RE_LTC_SEC_LO_CAP); + sec |= lanphy_read_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + PTP_GPIO_RE_LTC_SEC_LO_CAP); - nsec = lanphy_read_page_reg(phydev, 4, PTP_GPIO_RE_LTC_NS_HI_CAP) & 0x3fff; + nsec = lanphy_read_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + PTP_GPIO_RE_LTC_NS_HI_CAP) & 0x3fff; nsec <<= 16; - nsec |= lanphy_read_page_reg(phydev, 4, PTP_GPIO_RE_LTC_NS_LO_CAP); + nsec |= lanphy_read_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + PTP_GPIO_RE_LTC_NS_LO_CAP); } else { - sec = lanphy_read_page_reg(phydev, 4, PTP_GPIO_FE_LTC_SEC_HI_CAP); + sec = lanphy_read_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + PTP_GPIO_FE_LTC_SEC_HI_CAP); sec <<= 16; - sec |= lanphy_read_page_reg(phydev, 4, PTP_GPIO_FE_LTC_SEC_LO_CAP); + sec |= lanphy_read_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + PTP_GPIO_FE_LTC_SEC_LO_CAP); - nsec = lanphy_read_page_reg(phydev, 4, PTP_GPIO_FE_LTC_NS_HI_CAP) & 0x3fff; + nsec = lanphy_read_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + PTP_GPIO_FE_LTC_NS_HI_CAP) & 0x3fff; nsec <<= 16; - nsec |= lanphy_read_page_reg(phydev, 4, PTP_GPIO_RE_LTC_NS_LO_CAP); + nsec |= lanphy_read_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + PTP_GPIO_RE_LTC_NS_LO_CAP); } ptp_event.index = 0; @@ -3908,19 +4044,17 @@ static int lan8814_handle_gpio_interrupt(struct phy_device *phydev, u16 status) static int lan8804_config_init(struct phy_device *phydev) { - int val; - /* MDI-X setting for swap A,B transmit */ - val = lanphy_read_page_reg(phydev, 2, LAN8804_ALIGN_SWAP); - val &= ~LAN8804_ALIGN_TX_A_B_SWAP_MASK; - val |= LAN8804_ALIGN_TX_A_B_SWAP; - lanphy_write_page_reg(phydev, 2, LAN8804_ALIGN_SWAP, val); + lanphy_modify_page_reg(phydev, LAN8814_PAGE_PCS_DIGITAL, LAN8804_ALIGN_SWAP, + LAN8804_ALIGN_TX_A_B_SWAP_MASK, + LAN8804_ALIGN_TX_A_B_SWAP); /* Make sure that the PHY will not stop generating the clock when the * link partner goes down */ - lanphy_write_page_reg(phydev, 31, LAN8814_CLOCK_MANAGEMENT, 0x27e); - lanphy_read_page_reg(phydev, 1, LAN8814_LINK_QUALITY); + lanphy_write_page_reg(phydev, LAN8814_PAGE_SYSTEM_CTRL, + LAN8814_CLOCK_MANAGEMENT, 0x27e); + lanphy_read_page_reg(phydev, LAN8814_PAGE_AFE_PMA, LAN8814_LINK_QUALITY); return 0; } @@ -4002,7 +4136,8 @@ static irqreturn_t lan8814_handle_interrupt(struct phy_device *phydev) } while (true) { - irq_status = lanphy_read_page_reg(phydev, 5, PTP_TSU_INT_STS); + irq_status = lanphy_read_page_reg(phydev, LAN8814_PAGE_PORT_REGS, + PTP_TSU_INT_STS); if (!irq_status) break; @@ -4030,7 +4165,7 @@ static int lan8814_config_intr(struct phy_device *phydev) { int err; - lanphy_write_page_reg(phydev, 4, LAN8814_INTR_CTRL_REG, + lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, LAN8814_INTR_CTRL_REG, LAN8814_INTR_CTRL_REG_POLARITY | LAN8814_INTR_CTRL_REG_INTR_ENABLE); @@ -4056,35 +4191,41 @@ static void lan8814_ptp_init(struct phy_device *phydev) { struct kszphy_priv *priv = phydev->priv; struct kszphy_ptp_priv *ptp_priv = &priv->ptp_priv; - u32 temp; if (!IS_ENABLED(CONFIG_PTP_1588_CLOCK) || !IS_ENABLED(CONFIG_NETWORK_PHY_TIMESTAMPING)) return; - lanphy_write_page_reg(phydev, 5, TSU_HARD_RESET, TSU_HARD_RESET_); + lanphy_write_page_reg(phydev, LAN8814_PAGE_PORT_REGS, + TSU_HARD_RESET, TSU_HARD_RESET_); - temp = lanphy_read_page_reg(phydev, 5, PTP_TX_MOD); - temp |= PTP_TX_MOD_BAD_UDPV4_CHKSUM_FORCE_FCS_DIS_; - lanphy_write_page_reg(phydev, 5, PTP_TX_MOD, temp); + lanphy_modify_page_reg(phydev, LAN8814_PAGE_PORT_REGS, PTP_TX_MOD, + PTP_TX_MOD_BAD_UDPV4_CHKSUM_FORCE_FCS_DIS_, + PTP_TX_MOD_BAD_UDPV4_CHKSUM_FORCE_FCS_DIS_); - temp = lanphy_read_page_reg(phydev, 5, PTP_RX_MOD); - temp |= PTP_RX_MOD_BAD_UDPV4_CHKSUM_FORCE_FCS_DIS_; - lanphy_write_page_reg(phydev, 5, PTP_RX_MOD, temp); + lanphy_modify_page_reg(phydev, LAN8814_PAGE_PORT_REGS, PTP_RX_MOD, + PTP_RX_MOD_BAD_UDPV4_CHKSUM_FORCE_FCS_DIS_, + PTP_RX_MOD_BAD_UDPV4_CHKSUM_FORCE_FCS_DIS_); - lanphy_write_page_reg(phydev, 5, PTP_RX_PARSE_CONFIG, 0); - lanphy_write_page_reg(phydev, 5, PTP_TX_PARSE_CONFIG, 0); + lanphy_write_page_reg(phydev, LAN8814_PAGE_PORT_REGS, + PTP_RX_PARSE_CONFIG, 0); + lanphy_write_page_reg(phydev, LAN8814_PAGE_PORT_REGS, + PTP_TX_PARSE_CONFIG, 0); /* Removing default registers configs related to L2 and IP */ - lanphy_write_page_reg(phydev, 5, PTP_TX_PARSE_L2_ADDR_EN, 0); - lanphy_write_page_reg(phydev, 5, PTP_RX_PARSE_L2_ADDR_EN, 0); - lanphy_write_page_reg(phydev, 5, PTP_TX_PARSE_IP_ADDR_EN, 0); - lanphy_write_page_reg(phydev, 5, PTP_RX_PARSE_IP_ADDR_EN, 0); + lanphy_write_page_reg(phydev, LAN8814_PAGE_PORT_REGS, + PTP_TX_PARSE_L2_ADDR_EN, 0); + lanphy_write_page_reg(phydev, LAN8814_PAGE_PORT_REGS, + PTP_RX_PARSE_L2_ADDR_EN, 0); + lanphy_write_page_reg(phydev, LAN8814_PAGE_PORT_REGS, + PTP_TX_PARSE_IP_ADDR_EN, 0); + lanphy_write_page_reg(phydev, LAN8814_PAGE_PORT_REGS, + PTP_RX_PARSE_IP_ADDR_EN, 0); /* Disable checking for minorVersionPTP field */ - lanphy_write_page_reg(phydev, 5, PTP_RX_VERSION, + lanphy_write_page_reg(phydev, LAN8814_PAGE_PORT_REGS, PTP_RX_VERSION, PTP_MAX_VERSION(0xff) | PTP_MIN_VERSION(0x0)); - lanphy_write_page_reg(phydev, 5, PTP_TX_VERSION, + lanphy_write_page_reg(phydev, LAN8814_PAGE_PORT_REGS, PTP_TX_VERSION, PTP_MAX_VERSION(0xff) | PTP_MIN_VERSION(0x0)); skb_queue_head_init(&ptp_priv->tx_queue); @@ -4105,7 +4246,8 @@ static void lan8814_ptp_init(struct phy_device *phydev) phydev->default_timestamp = true; } -static int lan8814_ptp_probe_once(struct phy_device *phydev) +static int __lan8814_ptp_probe_once(struct phy_device *phydev, char *pin_name, + int gpios) { struct lan8814_shared_priv *shared = phy_package_get_priv(phydev); @@ -4113,18 +4255,18 @@ static int lan8814_ptp_probe_once(struct phy_device *phydev) mutex_init(&shared->shared_lock); shared->pin_config = devm_kmalloc_array(&phydev->mdio.dev, - LAN8814_PTP_GPIO_NUM, + gpios, sizeof(*shared->pin_config), GFP_KERNEL); if (!shared->pin_config) return -ENOMEM; - for (int i = 0; i < LAN8814_PTP_GPIO_NUM; i++) { + for (int i = 0; i < gpios; i++) { struct ptp_pin_desc *ptp_pin = &shared->pin_config[i]; memset(ptp_pin, 0, sizeof(*ptp_pin)); snprintf(ptp_pin->name, - sizeof(ptp_pin->name), "lan8814_ptp_pin_%02d", i); + sizeof(ptp_pin->name), "%s_%02d", pin_name, i); ptp_pin->index = i; ptp_pin->func = PTP_PF_NONE; } @@ -4134,7 +4276,7 @@ static int lan8814_ptp_probe_once(struct phy_device *phydev) shared->ptp_clock_info.max_adj = 31249999; shared->ptp_clock_info.n_alarm = 0; shared->ptp_clock_info.n_ext_ts = LAN8814_PTP_EXTTS_NUM; - shared->ptp_clock_info.n_pins = LAN8814_PTP_GPIO_NUM; + shared->ptp_clock_info.n_pins = gpios; shared->ptp_clock_info.pps = 0; shared->ptp_clock_info.supported_extts_flags = PTP_RISING_EDGE | PTP_FALLING_EDGE | @@ -4169,50 +4311,60 @@ static int lan8814_ptp_probe_once(struct phy_device *phydev) /* The EP.4 is shared between all the PHYs in the package and also it * can be accessed by any of the PHYs */ - lanphy_write_page_reg(phydev, 4, LTC_HARD_RESET, LTC_HARD_RESET_); - lanphy_write_page_reg(phydev, 4, PTP_OPERATING_MODE, + lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + LTC_HARD_RESET, LTC_HARD_RESET_); + lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, PTP_OPERATING_MODE, PTP_OPERATING_MODE_STANDALONE_); /* Enable ptp to run LTC clock for ptp and gpio 1PPS operation */ - lanphy_write_page_reg(phydev, 4, PTP_CMD_CTL, PTP_CMD_CTL_PTP_ENABLE_); + lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, PTP_CMD_CTL, + PTP_CMD_CTL_PTP_ENABLE_); return 0; } +static int lan8814_ptp_probe_once(struct phy_device *phydev) +{ + return __lan8814_ptp_probe_once(phydev, "lan8814_ptp_pin", + LAN8814_PTP_GPIO_NUM); +} + static void lan8814_setup_led(struct phy_device *phydev, int val) { int temp; - temp = lanphy_read_page_reg(phydev, 5, LAN8814_LED_CTRL_1); + temp = lanphy_read_page_reg(phydev, LAN8814_PAGE_PORT_REGS, + LAN8814_LED_CTRL_1); if (val) temp |= LAN8814_LED_CTRL_1_KSZ9031_LED_MODE_; else temp &= ~LAN8814_LED_CTRL_1_KSZ9031_LED_MODE_; - lanphy_write_page_reg(phydev, 5, LAN8814_LED_CTRL_1, temp); + lanphy_write_page_reg(phydev, LAN8814_PAGE_PORT_REGS, + LAN8814_LED_CTRL_1, temp); } static int lan8814_config_init(struct phy_device *phydev) { struct kszphy_priv *lan8814 = phydev->priv; - int val; /* Reset the PHY */ - val = lanphy_read_page_reg(phydev, 4, LAN8814_QSGMII_SOFT_RESET); - val |= LAN8814_QSGMII_SOFT_RESET_BIT; - lanphy_write_page_reg(phydev, 4, LAN8814_QSGMII_SOFT_RESET, val); + lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + LAN8814_QSGMII_SOFT_RESET, + LAN8814_QSGMII_SOFT_RESET_BIT, + LAN8814_QSGMII_SOFT_RESET_BIT); /* Disable ANEG with QSGMII PCS Host side */ - val = lanphy_read_page_reg(phydev, 5, LAN8814_QSGMII_PCS1G_ANEG_CONFIG); - val &= ~LAN8814_QSGMII_PCS1G_ANEG_CONFIG_ANEG_ENA; - lanphy_write_page_reg(phydev, 5, LAN8814_QSGMII_PCS1G_ANEG_CONFIG, val); + lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + LAN8814_QSGMII_PCS1G_ANEG_CONFIG, + LAN8814_QSGMII_PCS1G_ANEG_CONFIG_ANEG_ENA, + 0); /* MDI-X setting for swap A,B transmit */ - val = lanphy_read_page_reg(phydev, 2, LAN8814_ALIGN_SWAP); - val &= ~LAN8814_ALIGN_TX_A_B_SWAP_MASK; - val |= LAN8814_ALIGN_TX_A_B_SWAP; - lanphy_write_page_reg(phydev, 2, LAN8814_ALIGN_SWAP, val); + lanphy_modify_page_reg(phydev, LAN8814_PAGE_PCS_DIGITAL, LAN8814_ALIGN_SWAP, + LAN8814_ALIGN_TX_A_B_SWAP_MASK, + LAN8814_ALIGN_TX_A_B_SWAP); if (lan8814->led_mode >= 0) lan8814_setup_led(phydev, lan8814->led_mode); @@ -4243,29 +4395,24 @@ static int lan8814_release_coma_mode(struct phy_device *phydev) static void lan8814_clear_2psp_bit(struct phy_device *phydev) { - u16 val; - /* It was noticed that when traffic is passing through the PHY and the * cable is removed then the LED was still one even though there is no * link */ - val = lanphy_read_page_reg(phydev, 2, LAN8814_EEE_STATE); - val &= ~LAN8814_EEE_STATE_MASK2P5P; - lanphy_write_page_reg(phydev, 2, LAN8814_EEE_STATE, val); + lanphy_modify_page_reg(phydev, LAN8814_PAGE_PCS_DIGITAL, LAN8814_EEE_STATE, + LAN8814_EEE_STATE_MASK2P5P, + 0); } static void lan8814_update_meas_time(struct phy_device *phydev) { - u16 val; - /* By setting the measure time to a value of 0xb this will allow cables * longer than 100m to be used. This configuration can be used * regardless of the mode of operation of the PHY */ - val = lanphy_read_page_reg(phydev, 1, LAN8814_PD_CONTROLS); - val &= ~LAN8814_PD_CONTROLS_PD_MEAS_TIME_MASK; - val |= LAN8814_PD_CONTROLS_PD_MEAS_TIME_VAL; - lanphy_write_page_reg(phydev, 1, LAN8814_PD_CONTROLS, val); + lanphy_modify_page_reg(phydev, LAN8814_PAGE_AFE_PMA, LAN8814_PD_CONTROLS, + LAN8814_PD_CONTROLS_PD_MEAS_TIME_MASK, + LAN8814_PD_CONTROLS_PD_MEAS_TIME_VAL); } static int lan8814_probe(struct phy_device *phydev) @@ -4288,7 +4435,7 @@ static int lan8814_probe(struct phy_device *phydev) /* Strap-in value for PHY address, below register read gives starting * phy address value */ - addr = lanphy_read_page_reg(phydev, 4, 0) & 0x1F; + addr = lanphy_read_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, 0) & 0x1F; devm_phy_package_join(&phydev->mdio.dev, phydev, addr, sizeof(struct lan8814_shared_priv)); @@ -5643,10 +5790,286 @@ static int ksz9131_resume(struct phy_device *phydev) return kszphy_resume(phydev); } +#define LAN8842_PTP_GPIO_NUM 16 + +static int lan8842_ptp_probe_once(struct phy_device *phydev) +{ + return __lan8814_ptp_probe_once(phydev, "lan8842_ptp_pin", + LAN8842_PTP_GPIO_NUM); +} + +#define LAN8842_STRAP_REG 0 /* 0x0 */ +#define LAN8842_STRAP_REG_PHYADDR_MASK GENMASK(4, 0) +#define LAN8842_SKU_REG 11 /* 0x0b */ +#define LAN8842_SELF_TEST 14 /* 0x0e */ +#define LAN8842_SELF_TEST_RX_CNT_ENA BIT(8) +#define LAN8842_SELF_TEST_TX_CNT_ENA BIT(4) + +static int lan8842_probe(struct phy_device *phydev) +{ + struct lan8842_priv *priv; + int addr; + int ret; + + priv = devm_kzalloc(&phydev->mdio.dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + phydev->priv = priv; + + /* Similar to lan8814 this PHY has a pin which needs to be pulled down + * to enable to pass any traffic through it. Therefore use the same + * function as lan8814 + */ + ret = lan8814_release_coma_mode(phydev); + if (ret) + return ret; + + /* Enable to count the RX and TX packets */ + ret = lanphy_write_page_reg(phydev, LAN8814_PAGE_PCS_DIGITAL, + LAN8842_SELF_TEST, + LAN8842_SELF_TEST_RX_CNT_ENA | + LAN8842_SELF_TEST_TX_CNT_ENA); + if (ret < 0) + return ret; + + /* Revision lan8832 doesn't have support for PTP, therefore don't add + * any PTP clocks + */ + ret = lanphy_read_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + LAN8842_SKU_REG); + if (ret < 0) + return ret; + + priv->rev = ret; + if (priv->rev == LAN8842_REV_8832) + return 0; + + /* As the lan8814 and lan8842 has the same IP for the PTP block, the + * only difference is the number of the GPIOs, then make sure that the + * lan8842 initialized also the shared data pointer as this is used in + * all the PTP functions for lan8814. The lan8842 doesn't have multiple + * PHYs in the same package. + */ + addr = lanphy_read_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + LAN8842_STRAP_REG); + if (addr < 0) + return addr; + addr &= LAN8842_STRAP_REG_PHYADDR_MASK; + + ret = devm_phy_package_join(&phydev->mdio.dev, phydev, addr, + sizeof(struct lan8814_shared_priv)); + if (ret) + return ret; + + if (phy_package_init_once(phydev)) { + ret = lan8842_ptp_probe_once(phydev); + if (ret) + return ret; + } + + lan8814_ptp_init(phydev); + + return 0; +} + +static int lan8842_config_init(struct phy_device *phydev) +{ + int ret; + + /* Reset the PHY */ + ret = lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + LAN8814_QSGMII_SOFT_RESET, + LAN8814_QSGMII_SOFT_RESET_BIT, + LAN8814_QSGMII_SOFT_RESET_BIT); + if (ret < 0) + return ret; + + /* To allow the PHY to control the LEDs the GPIOs of the PHY should have + * a function mode and not the GPIO. Apparently by default the value is + * GPIO and not function even though the datasheet it says that it is + * function. Therefore set this value. + */ + return lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + LAN8814_GPIO_EN2, 0); +} + +#define LAN8842_INTR_CTRL_REG 52 /* 0x34 */ + +static int lan8842_config_intr(struct phy_device *phydev) +{ + int err; + + lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + LAN8842_INTR_CTRL_REG, + LAN8814_INTR_CTRL_REG_INTR_ENABLE); + + /* enable / disable interrupts */ + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { + err = lan8814_ack_interrupt(phydev); + if (err) + return err; + + err = phy_write(phydev, LAN8814_INTC, LAN8814_INT_LINK); + } else { + err = phy_write(phydev, LAN8814_INTC, 0); + if (err) + return err; + + err = lan8814_ack_interrupt(phydev); + } + + return err; +} + +static unsigned int lan8842_inband_caps(struct phy_device *phydev, + phy_interface_t interface) +{ + /* Inband configuration can be enabled or disabled using the registers + * PCS1G_ANEG_CONFIG. + */ + return LINK_INBAND_DISABLE | LINK_INBAND_ENABLE; +} + +static int lan8842_config_inband(struct phy_device *phydev, unsigned int modes) +{ + bool enable; + + if (modes == LINK_INBAND_DISABLE) + enable = false; + else + enable = true; + + /* Disable or enable in-band autoneg with PCS Host side + * It has the same address as lan8814 + */ + return lanphy_modify_page_reg(phydev, LAN8814_PAGE_PORT_REGS, + LAN8814_QSGMII_PCS1G_ANEG_CONFIG, + LAN8814_QSGMII_PCS1G_ANEG_CONFIG_ANEG_ENA, + enable ? LAN8814_QSGMII_PCS1G_ANEG_CONFIG_ANEG_ENA : 0); +} + +static void lan8842_handle_ptp_interrupt(struct phy_device *phydev, u16 status) +{ + struct kszphy_ptp_priv *ptp_priv; + struct lan8842_priv *priv; + + priv = phydev->priv; + ptp_priv = &priv->ptp_priv; + + if (status & PTP_TSU_INT_STS_PTP_TX_TS_EN_) + lan8814_get_tx_ts(ptp_priv); + + if (status & PTP_TSU_INT_STS_PTP_RX_TS_EN_) + lan8814_get_rx_ts(ptp_priv); + + if (status & PTP_TSU_INT_STS_PTP_TX_TS_OVRFL_INT_) { + lan8814_flush_fifo(phydev, true); + skb_queue_purge(&ptp_priv->tx_queue); + } + + if (status & PTP_TSU_INT_STS_PTP_RX_TS_OVRFL_INT_) { + lan8814_flush_fifo(phydev, false); + skb_queue_purge(&ptp_priv->rx_queue); + } +} + +static irqreturn_t lan8842_handle_interrupt(struct phy_device *phydev) +{ + struct lan8842_priv *priv = phydev->priv; + int ret = IRQ_NONE; + int irq_status; + + irq_status = phy_read(phydev, LAN8814_INTS); + if (irq_status < 0) { + phy_error(phydev); + return IRQ_NONE; + } + + if (irq_status & LAN8814_INT_LINK) { + phy_trigger_machine(phydev); + ret = IRQ_HANDLED; + } + + /* Phy revision lan8832 doesn't have support for PTP therefore there is + * not need to check the PTP and GPIO interrupts + */ + if (priv->rev == LAN8842_REV_8832) + goto out; + + while (true) { + irq_status = lanphy_read_page_reg(phydev, LAN8814_PAGE_PORT_REGS, + PTP_TSU_INT_STS); + if (!irq_status) + break; + + lan8842_handle_ptp_interrupt(phydev, irq_status); + ret = IRQ_HANDLED; + } + + if (!lan8814_handle_gpio_interrupt(phydev, irq_status)) + ret = IRQ_HANDLED; + +out: + return ret; +} + +static u64 lan8842_get_stat(struct phy_device *phydev, int count, int *regs) +{ + u64 ret = 0; + int val; + + for (int j = 0; j < count; ++j) { + val = lanphy_read_page_reg(phydev, LAN8814_PAGE_PCS_DIGITAL, + regs[j]); + if (val < 0) + return U64_MAX; + + ret <<= 16; + ret += val; + } + return ret; +} + +static int lan8842_update_stats(struct phy_device *phydev) +{ + struct lan8842_priv *priv = phydev->priv; + int rx_packets_regs[] = {88, 61, 60}; + int rx_errors_regs[] = {63, 62}; + int tx_packets_regs[] = {89, 85, 84}; + int tx_errors_regs[] = {87, 86}; + + priv->phy_stats.rx_packets = lan8842_get_stat(phydev, + ARRAY_SIZE(rx_packets_regs), + rx_packets_regs); + priv->phy_stats.rx_errors = lan8842_get_stat(phydev, + ARRAY_SIZE(rx_errors_regs), + rx_errors_regs); + priv->phy_stats.tx_packets = lan8842_get_stat(phydev, + ARRAY_SIZE(tx_packets_regs), + tx_packets_regs); + priv->phy_stats.tx_errors = lan8842_get_stat(phydev, + ARRAY_SIZE(tx_errors_regs), + tx_errors_regs); + + return 0; +} + +static void lan8842_get_phy_stats(struct phy_device *phydev, + struct ethtool_eth_phy_stats *eth_stats, + struct ethtool_phy_stats *stats) +{ + struct lan8842_priv *priv = phydev->priv; + + stats->rx_packets = priv->phy_stats.rx_packets; + stats->rx_errors = priv->phy_stats.rx_errors; + stats->tx_packets = priv->phy_stats.tx_packets; + stats->tx_errors = priv->phy_stats.tx_errors; +} + static struct phy_driver ksphy_driver[] = { { - .phy_id = PHY_ID_KS8737, - .phy_id_mask = MICREL_PHY_ID_MASK, + PHY_ID_MATCH_MODEL(PHY_ID_KS8737), .name = "Micrel KS8737", /* PHY_BASIC_FEATURES */ .driver_data = &ks8737_type, @@ -5687,8 +6110,7 @@ static struct phy_driver ksphy_driver[] = { .suspend = kszphy_suspend, .resume = kszphy_resume, }, { - .phy_id = PHY_ID_KSZ8041, - .phy_id_mask = MICREL_PHY_ID_MASK, + PHY_ID_MATCH_MODEL(PHY_ID_KSZ8041), .name = "Micrel KSZ8041", /* PHY_BASIC_FEATURES */ .driver_data = &ksz8041_type, @@ -5703,8 +6125,7 @@ static struct phy_driver ksphy_driver[] = { .suspend = ksz8041_suspend, .resume = ksz8041_resume, }, { - .phy_id = PHY_ID_KSZ8041RNLI, - .phy_id_mask = MICREL_PHY_ID_MASK, + PHY_ID_MATCH_MODEL(PHY_ID_KSZ8041RNLI), .name = "Micrel KSZ8041RNLI", /* PHY_BASIC_FEATURES */ .driver_data = &ksz8041_type, @@ -5747,9 +6168,8 @@ static struct phy_driver ksphy_driver[] = { .suspend = kszphy_suspend, .resume = kszphy_resume, }, { - .phy_id = PHY_ID_KSZ8081, + PHY_ID_MATCH_MODEL(PHY_ID_KSZ8081), .name = "Micrel KSZ8081 or KSZ8091", - .phy_id_mask = MICREL_PHY_ID_MASK, .flags = PHY_POLL_CABLE_TEST, /* PHY_BASIC_FEATURES */ .driver_data = &ksz8081_type, @@ -5768,9 +6188,8 @@ static struct phy_driver ksphy_driver[] = { .cable_test_start = ksz886x_cable_test_start, .cable_test_get_status = ksz886x_cable_test_get_status, }, { - .phy_id = PHY_ID_KSZ8061, + PHY_ID_MATCH_MODEL(PHY_ID_KSZ8061), .name = "Micrel KSZ8061", - .phy_id_mask = MICREL_PHY_ID_MASK, /* PHY_BASIC_FEATURES */ .probe = kszphy_probe, .config_init = ksz8061_config_init, @@ -5798,8 +6217,7 @@ static struct phy_driver ksphy_driver[] = { .read_mmd = genphy_read_mmd_unsupported, .write_mmd = genphy_write_mmd_unsupported, }, { - .phy_id = PHY_ID_KSZ9031, - .phy_id_mask = MICREL_PHY_ID_MASK, + PHY_ID_MATCH_MODEL(PHY_ID_KSZ9031), .name = "Micrel KSZ9031 Gigabit PHY", .flags = PHY_POLL_CABLE_TEST, .driver_data = &ksz9021_type, @@ -5819,8 +6237,7 @@ static struct phy_driver ksphy_driver[] = { .cable_test_get_status = ksz9x31_cable_test_get_status, .set_loopback = ksz9031_set_loopback, }, { - .phy_id = PHY_ID_LAN8814, - .phy_id_mask = MICREL_PHY_ID_MASK, + PHY_ID_MATCH_MODEL(PHY_ID_LAN8814), .name = "Microchip INDY Gigabit Quad PHY", .flags = PHY_POLL_CABLE_TEST, .config_init = lan8814_config_init, @@ -5838,8 +6255,7 @@ static struct phy_driver ksphy_driver[] = { .cable_test_start = lan8814_cable_test_start, .cable_test_get_status = ksz886x_cable_test_get_status, }, { - .phy_id = PHY_ID_LAN8804, - .phy_id_mask = MICREL_PHY_ID_MASK, + PHY_ID_MATCH_MODEL(PHY_ID_LAN8804), .name = "Microchip LAN966X Gigabit PHY", .config_init = lan8804_config_init, .driver_data = &ksz9021_type, @@ -5854,8 +6270,7 @@ static struct phy_driver ksphy_driver[] = { .config_intr = lan8804_config_intr, .handle_interrupt = lan8804_handle_interrupt, }, { - .phy_id = PHY_ID_LAN8841, - .phy_id_mask = MICREL_PHY_ID_MASK, + PHY_ID_MATCH_MODEL(PHY_ID_LAN8841), .name = "Microchip LAN8841 Gigabit PHY", .flags = PHY_POLL_CABLE_TEST, .driver_data = &lan8841_type, @@ -5872,8 +6287,22 @@ static struct phy_driver ksphy_driver[] = { .cable_test_start = lan8814_cable_test_start, .cable_test_get_status = ksz886x_cable_test_get_status, }, { - .phy_id = PHY_ID_KSZ9131, - .phy_id_mask = MICREL_PHY_ID_MASK, + PHY_ID_MATCH_MODEL(PHY_ID_LAN8842), + .name = "Microchip LAN8842 Gigabit PHY", + .flags = PHY_POLL_CABLE_TEST, + .driver_data = &lan8814_type, + .probe = lan8842_probe, + .config_init = lan8842_config_init, + .config_intr = lan8842_config_intr, + .inband_caps = lan8842_inband_caps, + .config_inband = lan8842_config_inband, + .handle_interrupt = lan8842_handle_interrupt, + .get_phy_stats = lan8842_get_phy_stats, + .update_stats = lan8842_update_stats, + .cable_test_start = lan8814_cable_test_start, + .cable_test_get_status = ksz886x_cable_test_get_status, +}, { + PHY_ID_MATCH_MODEL(PHY_ID_KSZ9131), .name = "Microchip KSZ9131 Gigabit PHY", /* PHY_GBIT_FEATURES */ .flags = PHY_POLL_CABLE_TEST, @@ -5894,8 +6323,7 @@ static struct phy_driver ksphy_driver[] = { .cable_test_get_status = ksz9x31_cable_test_get_status, .get_features = ksz9477_get_features, }, { - .phy_id = PHY_ID_KSZ8873MLL, - .phy_id_mask = MICREL_PHY_ID_MASK, + PHY_ID_MATCH_MODEL(PHY_ID_KSZ8873MLL), .name = "Micrel KSZ8873MLL Switch", /* PHY_BASIC_FEATURES */ .config_init = kszphy_config_init, @@ -5904,8 +6332,7 @@ static struct phy_driver ksphy_driver[] = { .suspend = genphy_suspend, .resume = genphy_resume, }, { - .phy_id = PHY_ID_KSZ886X, - .phy_id_mask = MICREL_PHY_ID_MASK, + PHY_ID_MATCH_MODEL(PHY_ID_KSZ886X), .name = "Micrel KSZ8851 Ethernet MAC or KSZ886X Switch", .driver_data = &ksz886x_type, /* PHY_BASIC_FEATURES */ @@ -5925,8 +6352,7 @@ static struct phy_driver ksphy_driver[] = { .suspend = genphy_suspend, .resume = genphy_resume, }, { - .phy_id = PHY_ID_KSZ9477, - .phy_id_mask = MICREL_PHY_ID_MASK, + PHY_ID_MATCH_MODEL(PHY_ID_KSZ9477), .name = "Microchip KSZ9477", .probe = kszphy_probe, /* PHY_GBIT_FEATURES */ @@ -5953,22 +6379,24 @@ MODULE_LICENSE("GPL"); static const struct mdio_device_id __maybe_unused micrel_tbl[] = { { PHY_ID_KSZ9021, 0x000ffffe }, - { PHY_ID_KSZ9031, MICREL_PHY_ID_MASK }, - { PHY_ID_KSZ9131, MICREL_PHY_ID_MASK }, + { PHY_ID_MATCH_MODEL(PHY_ID_KSZ9031) }, + { PHY_ID_MATCH_MODEL(PHY_ID_KSZ9131) }, { PHY_ID_KSZ8001, 0x00fffffc }, - { PHY_ID_KS8737, MICREL_PHY_ID_MASK }, + { PHY_ID_MATCH_MODEL(PHY_ID_KS8737) }, { PHY_ID_KSZ8021, 0x00ffffff }, { PHY_ID_KSZ8031, 0x00ffffff }, - { PHY_ID_KSZ8041, MICREL_PHY_ID_MASK }, - { PHY_ID_KSZ8051, MICREL_PHY_ID_MASK }, - { PHY_ID_KSZ8061, MICREL_PHY_ID_MASK }, - { PHY_ID_KSZ8081, MICREL_PHY_ID_MASK }, - { PHY_ID_KSZ8873MLL, MICREL_PHY_ID_MASK }, - { PHY_ID_KSZ886X, MICREL_PHY_ID_MASK }, - { PHY_ID_KSZ9477, MICREL_PHY_ID_MASK }, - { PHY_ID_LAN8814, MICREL_PHY_ID_MASK }, - { PHY_ID_LAN8804, MICREL_PHY_ID_MASK }, - { PHY_ID_LAN8841, MICREL_PHY_ID_MASK }, + { PHY_ID_MATCH_MODEL(PHY_ID_KSZ8041) }, + { PHY_ID_MATCH_MODEL(PHY_ID_KSZ8041RNLI) }, + { PHY_ID_MATCH_MODEL(PHY_ID_KSZ8051) }, + { PHY_ID_MATCH_MODEL(PHY_ID_KSZ8061) }, + { PHY_ID_MATCH_MODEL(PHY_ID_KSZ8081) }, + { PHY_ID_MATCH_MODEL(PHY_ID_KSZ8873MLL) }, + { PHY_ID_MATCH_MODEL(PHY_ID_KSZ886X) }, + { PHY_ID_MATCH_MODEL(PHY_ID_KSZ9477) }, + { PHY_ID_MATCH_MODEL(PHY_ID_LAN8814) }, + { PHY_ID_MATCH_MODEL(PHY_ID_LAN8804) }, + { PHY_ID_MATCH_MODEL(PHY_ID_LAN8841) }, + { PHY_ID_MATCH_MODEL(PHY_ID_LAN8842) }, { } }; diff --git a/drivers/net/phy/motorcomm.c b/drivers/net/phy/motorcomm.c index 0e91f5d1a4fd..a3593e663059 100644 --- a/drivers/net/phy/motorcomm.c +++ b/drivers/net/phy/motorcomm.c @@ -213,6 +213,20 @@ #define YT8521_RC1R_RGMII_2_100_NS 14 #define YT8521_RC1R_RGMII_2_250_NS 15 +/* LED CONFIG */ +#define YT8521_MAX_LEDS 3 +#define YT8521_LED0_CFG_REG 0xA00C +#define YT8521_LED1_CFG_REG 0xA00D +#define YT8521_LED2_CFG_REG 0xA00E +#define YT8521_LED_ACT_BLK_IND BIT(13) +#define YT8521_LED_FDX_ON_EN BIT(12) +#define YT8521_LED_HDX_ON_EN BIT(11) +#define YT8521_LED_TXACT_BLK_EN BIT(10) +#define YT8521_LED_RXACT_BLK_EN BIT(9) +#define YT8521_LED_1000_ON_EN BIT(6) +#define YT8521_LED_100_ON_EN BIT(5) +#define YT8521_LED_10_ON_EN BIT(4) + #define YTPHY_MISC_CONFIG_REG 0xA006 #define YTPHY_MCR_FIBER_SPEED_MASK BIT(0) #define YTPHY_MCR_FIBER_1000BX (0x1 << 0) @@ -1681,6 +1695,106 @@ err_restore_page: return phy_restore_page(phydev, old_page, ret); } +static const unsigned long supported_trgs = (BIT(TRIGGER_NETDEV_FULL_DUPLEX) | + BIT(TRIGGER_NETDEV_HALF_DUPLEX) | + BIT(TRIGGER_NETDEV_LINK) | + BIT(TRIGGER_NETDEV_LINK_10) | + BIT(TRIGGER_NETDEV_LINK_100) | + BIT(TRIGGER_NETDEV_LINK_1000) | + BIT(TRIGGER_NETDEV_RX) | + BIT(TRIGGER_NETDEV_TX)); + +static int yt8521_led_hw_is_supported(struct phy_device *phydev, u8 index, + unsigned long rules) +{ + if (index >= YT8521_MAX_LEDS) + return -EINVAL; + + /* All combinations of the supported triggers are allowed */ + if (rules & ~supported_trgs) + return -EOPNOTSUPP; + + return 0; +} + +static int yt8521_led_hw_control_set(struct phy_device *phydev, u8 index, + unsigned long rules) +{ + u16 val = 0; + + if (index >= YT8521_MAX_LEDS) + return -EINVAL; + + if (test_bit(TRIGGER_NETDEV_LINK, &rules)) { + val |= YT8521_LED_10_ON_EN; + val |= YT8521_LED_100_ON_EN; + val |= YT8521_LED_1000_ON_EN; + } + + if (test_bit(TRIGGER_NETDEV_LINK_10, &rules)) + val |= YT8521_LED_10_ON_EN; + + if (test_bit(TRIGGER_NETDEV_LINK_100, &rules)) + val |= YT8521_LED_100_ON_EN; + + if (test_bit(TRIGGER_NETDEV_LINK_1000, &rules)) + val |= YT8521_LED_1000_ON_EN; + + if (test_bit(TRIGGER_NETDEV_FULL_DUPLEX, &rules)) + val |= YT8521_LED_HDX_ON_EN; + + if (test_bit(TRIGGER_NETDEV_HALF_DUPLEX, &rules)) + val |= YT8521_LED_FDX_ON_EN; + + if (test_bit(TRIGGER_NETDEV_TX, &rules) || + test_bit(TRIGGER_NETDEV_RX, &rules)) + val |= YT8521_LED_ACT_BLK_IND; + + if (test_bit(TRIGGER_NETDEV_TX, &rules)) + val |= YT8521_LED_TXACT_BLK_EN; + + if (test_bit(TRIGGER_NETDEV_RX, &rules)) + val |= YT8521_LED_RXACT_BLK_EN; + + return ytphy_write_ext(phydev, YT8521_LED0_CFG_REG + index, val); +} + +static int yt8521_led_hw_control_get(struct phy_device *phydev, u8 index, + unsigned long *rules) +{ + int val; + + if (index >= YT8521_MAX_LEDS) + return -EINVAL; + + val = ytphy_read_ext(phydev, YT8521_LED0_CFG_REG + index); + if (val < 0) + return val; + + if (val & YT8521_LED_TXACT_BLK_EN || val & YT8521_LED_ACT_BLK_IND) + __set_bit(TRIGGER_NETDEV_TX, rules); + + if (val & YT8521_LED_RXACT_BLK_EN || val & YT8521_LED_ACT_BLK_IND) + __set_bit(TRIGGER_NETDEV_RX, rules); + + if (val & YT8521_LED_FDX_ON_EN) + __set_bit(TRIGGER_NETDEV_FULL_DUPLEX, rules); + + if (val & YT8521_LED_HDX_ON_EN) + __set_bit(TRIGGER_NETDEV_HALF_DUPLEX, rules); + + if (val & YT8521_LED_1000_ON_EN) + __set_bit(TRIGGER_NETDEV_LINK_1000, rules); + + if (val & YT8521_LED_100_ON_EN) + __set_bit(TRIGGER_NETDEV_LINK_100, rules); + + if (val & YT8521_LED_10_ON_EN) + __set_bit(TRIGGER_NETDEV_LINK_10, rules); + + return 0; +} + static int yt8531_config_init(struct phy_device *phydev) { struct device_node *node = phydev->mdio.dev.of_node; @@ -2920,6 +3034,9 @@ static struct phy_driver motorcomm_phy_drvs[] = { .soft_reset = yt8521_soft_reset, .suspend = yt8521_suspend, .resume = yt8521_resume, + .led_hw_is_supported = yt8521_led_hw_is_supported, + .led_hw_control_set = yt8521_led_hw_control_set, + .led_hw_control_get = yt8521_led_hw_control_get, }, { PHY_ID_MATCH_EXACT(PHY_ID_YT8531), diff --git a/drivers/net/phy/mscc/mscc.h b/drivers/net/phy/mscc/mscc.h index 2bfe314ef881..2d8eca54c40a 100644 --- a/drivers/net/phy/mscc/mscc.h +++ b/drivers/net/phy/mscc/mscc.h @@ -196,6 +196,9 @@ enum rgmii_clock_delay { #define MSCC_PHY_EXTENDED_INT_MS_EGR BIT(9) /* Extended Page 3 Registers */ +#define MSCC_PHY_SERDES_PCS_CTRL 16 +#define MSCC_PHY_SERDES_ANEG BIT(7) + #define MSCC_PHY_SERDES_TX_VALID_CNT 21 #define MSCC_PHY_SERDES_TX_CRC_ERR_CNT 22 #define MSCC_PHY_SERDES_RX_VALID_CNT 28 diff --git a/drivers/net/phy/mscc/mscc_main.c b/drivers/net/phy/mscc/mscc_main.c index 24c75903f535..ef0ef1570d39 100644 --- a/drivers/net/phy/mscc/mscc_main.c +++ b/drivers/net/phy/mscc/mscc_main.c @@ -2202,6 +2202,28 @@ static int vsc85xx_read_status(struct phy_device *phydev) return genphy_read_status(phydev); } +static unsigned int vsc85xx_inband_caps(struct phy_device *phydev, + phy_interface_t interface) +{ + if (interface != PHY_INTERFACE_MODE_SGMII && + interface != PHY_INTERFACE_MODE_QSGMII) + return 0; + + return LINK_INBAND_DISABLE | LINK_INBAND_ENABLE; +} + +static int vsc85xx_config_inband(struct phy_device *phydev, unsigned int modes) +{ + u16 reg_val = 0; + + if (modes == LINK_INBAND_ENABLE) + reg_val = MSCC_PHY_SERDES_ANEG; + + return phy_modify_paged(phydev, MSCC_PHY_PAGE_EXTENDED_3, + MSCC_PHY_SERDES_PCS_CTRL, MSCC_PHY_SERDES_ANEG, + reg_val); +} + static int vsc8514_probe(struct phy_device *phydev) { struct vsc8531_private *vsc8531; @@ -2414,6 +2436,8 @@ static struct phy_driver vsc85xx_driver[] = { .get_sset_count = &vsc85xx_get_sset_count, .get_strings = &vsc85xx_get_strings, .get_stats = &vsc85xx_get_stats, + .inband_caps = vsc85xx_inband_caps, + .config_inband = vsc85xx_config_inband, }, { .phy_id = PHY_ID_VSC8514, @@ -2437,6 +2461,8 @@ static struct phy_driver vsc85xx_driver[] = { .get_sset_count = &vsc85xx_get_sset_count, .get_strings = &vsc85xx_get_strings, .get_stats = &vsc85xx_get_stats, + .inband_caps = vsc85xx_inband_caps, + .config_inband = vsc85xx_config_inband, }, { .phy_id = PHY_ID_VSC8530, @@ -2557,6 +2583,8 @@ static struct phy_driver vsc85xx_driver[] = { .get_sset_count = &vsc85xx_get_sset_count, .get_strings = &vsc85xx_get_strings, .get_stats = &vsc85xx_get_stats, + .inband_caps = vsc85xx_inband_caps, + .config_inband = vsc85xx_config_inband, }, { .phy_id = PHY_ID_VSC856X, @@ -2579,6 +2607,8 @@ static struct phy_driver vsc85xx_driver[] = { .get_sset_count = &vsc85xx_get_sset_count, .get_strings = &vsc85xx_get_strings, .get_stats = &vsc85xx_get_stats, + .inband_caps = vsc85xx_inband_caps, + .config_inband = vsc85xx_config_inband, }, { .phy_id = PHY_ID_VSC8572, @@ -2605,6 +2635,8 @@ static struct phy_driver vsc85xx_driver[] = { .get_sset_count = &vsc85xx_get_sset_count, .get_strings = &vsc85xx_get_strings, .get_stats = &vsc85xx_get_stats, + .inband_caps = vsc85xx_inband_caps, + .config_inband = vsc85xx_config_inband, }, { .phy_id = PHY_ID_VSC8574, @@ -2631,6 +2663,8 @@ static struct phy_driver vsc85xx_driver[] = { .get_sset_count = &vsc85xx_get_sset_count, .get_strings = &vsc85xx_get_strings, .get_stats = &vsc85xx_get_stats, + .inband_caps = vsc85xx_inband_caps, + .config_inband = vsc85xx_config_inband, }, { .phy_id = PHY_ID_VSC8575, @@ -2655,6 +2689,8 @@ static struct phy_driver vsc85xx_driver[] = { .get_sset_count = &vsc85xx_get_sset_count, .get_strings = &vsc85xx_get_strings, .get_stats = &vsc85xx_get_stats, + .inband_caps = vsc85xx_inband_caps, + .config_inband = vsc85xx_config_inband, }, { .phy_id = PHY_ID_VSC8582, @@ -2679,6 +2715,8 @@ static struct phy_driver vsc85xx_driver[] = { .get_sset_count = &vsc85xx_get_sset_count, .get_strings = &vsc85xx_get_strings, .get_stats = &vsc85xx_get_stats, + .inband_caps = vsc85xx_inband_caps, + .config_inband = vsc85xx_config_inband, }, { .phy_id = PHY_ID_VSC8584, @@ -2704,6 +2742,8 @@ static struct phy_driver vsc85xx_driver[] = { .get_strings = &vsc85xx_get_strings, .get_stats = &vsc85xx_get_stats, .link_change_notify = &vsc85xx_link_change_notify, + .inband_caps = vsc85xx_inband_caps, + .config_inband = vsc85xx_config_inband, } }; diff --git a/drivers/net/phy/mxl-86110.c b/drivers/net/phy/mxl-86110.c index ff2a3a22bd5b..e5d137a37a1d 100644 --- a/drivers/net/phy/mxl-86110.c +++ b/drivers/net/phy/mxl-86110.c @@ -15,6 +15,7 @@ /* PHY ID */ #define PHY_ID_MXL86110 0xc1335580 +#define PHY_ID_MXL86111 0xc1335588 /* required to access extended registers */ #define MXL86110_EXTD_REG_ADDR_OFFSET 0x1E @@ -22,7 +23,15 @@ #define PHY_IRQ_ENABLE_REG 0x12 #define PHY_IRQ_ENABLE_REG_WOL BIT(6) -/* SyncE Configuration Register - COM_EXT SYNCE_CFG */ +/* different pages for EXTD access for MXL86111 */ +/* SerDes/PHY Control Access Register - COM_EXT_SMI_SDS_PHY */ +#define MXL86111_EXT_SMI_SDS_PHY_REG 0xA000 +#define MXL86111_EXT_SMI_SDS_PHYSPACE_MASK BIT(1) +#define MXL86111_EXT_SMI_SDS_PHYFIBER_SPACE (0x1 << 1) +#define MXL86111_EXT_SMI_SDS_PHYUTP_SPACE (0x0 << 1) +#define MXL86111_EXT_SMI_SDS_PHY_AUTO 0xff + +/* SyncE Configuration Register - COM_EXT_SYNCE_CFG */ #define MXL86110_EXT_SYNCE_CFG_REG 0xA012 #define MXL86110_EXT_SYNCE_CFG_CLK_FRE_SEL BIT(4) #define MXL86110_EXT_SYNCE_CFG_EN_SYNC_E_DURING_LNKDN BIT(5) @@ -71,6 +80,11 @@ #define MXL86110_MAX_LEDS 3 /* LED registers and defines */ +#define MXL86110_COM_EXT_LED_GEN_CFG 0xA00B +# define MXL86110_COM_EXT_LED_GEN_CFG_LFM(x) ((BIT(0) | BIT(1)) << (3 * (x))) +# define MXL86110_COM_EXT_LED_GEN_CFG_LFME(x) (BIT(0) << (3 * (x))) +# define MXL86110_COM_EXT_LED_GEN_CFG_LFE(x) (BIT(2) << (3 * (x))) + #define MXL86110_LED0_CFG_REG 0xA00C #define MXL86110_LED1_CFG_REG 0xA00D #define MXL86110_LED2_CFG_REG 0xA00E @@ -110,9 +124,67 @@ /* Chip Configuration Register - COM_EXT_CHIP_CFG */ #define MXL86110_EXT_CHIP_CFG_REG 0xA001 +#define MXL86111_EXT_CHIP_CFG_MODE_SEL_MASK GENMASK(2, 0) +#define MXL86111_EXT_CHIP_CFG_MODE_UTP_TO_RGMII 0 +#define MXL86111_EXT_CHIP_CFG_MODE_FIBER_TO_RGMII 1 +#define MXL86111_EXT_CHIP_CFG_MODE_UTP_FIBER_TO_RGMII 2 +#define MXL86111_EXT_CHIP_CFG_MODE_UTP_TO_SGMII 3 +#define MXL86111_EXT_CHIP_CFG_MODE_SGPHY_TO_RGMAC 4 +#define MXL86111_EXT_CHIP_CFG_MODE_SGMAC_TO_RGPHY 5 +#define MXL86111_EXT_CHIP_CFG_MODE_UTP_TO_FIBER_AUTO 6 +#define MXL86111_EXT_CHIP_CFG_MODE_UTP_TO_FIBER_FORCE 7 + +#define MXL86111_EXT_CHIP_CFG_CLDO_MASK GENMASK(5, 4) +#define MXL86111_EXT_CHIP_CFG_CLDO_3V3 0 +#define MXL86111_EXT_CHIP_CFG_CLDO_2V5 1 +#define MXL86111_EXT_CHIP_CFG_CLDO_1V8_2 2 +#define MXL86111_EXT_CHIP_CFG_CLDO_1V8_3 3 +#define MXL86111_EXT_CHIP_CFG_CLDO_SHIFT 4 +#define MXL86111_EXT_CHIP_CFG_ELDO BIT(6) #define MXL86110_EXT_CHIP_CFG_RXDLY_ENABLE BIT(8) #define MXL86110_EXT_CHIP_CFG_SW_RST_N_MODE BIT(15) +/* Specific Status Register - PHY_STAT */ +#define MXL86111_PHY_STAT_REG 0x11 +#define MXL86111_PHY_STAT_SPEED_MASK GENMASK(15, 14) +#define MXL86111_PHY_STAT_SPEED_OFFSET 14 +#define MXL86111_PHY_STAT_SPEED_10M 0x0 +#define MXL86111_PHY_STAT_SPEED_100M 0x1 +#define MXL86111_PHY_STAT_SPEED_1000M 0x2 +#define MXL86111_PHY_STAT_DPX_OFFSET 13 +#define MXL86111_PHY_STAT_DPX BIT(13) +#define MXL86111_PHY_STAT_LSRT BIT(10) + +/* 3 phy reg page modes,auto mode combines utp and fiber mode*/ +#define MXL86111_MODE_FIBER 0x1 +#define MXL86111_MODE_UTP 0x2 +#define MXL86111_MODE_AUTO 0x3 + +/* FIBER Auto-Negotiation link partner ability - SDS_AN_LPA */ +#define MXL86111_SDS_AN_LPA_PAUSE (0x3 << 7) +#define MXL86111_SDS_AN_LPA_ASYM_PAUSE (0x2 << 7) + +/* Miscellaneous Control Register - COM_EXT _MISC_CFG */ +#define MXL86111_EXT_MISC_CONFIG_REG 0xa006 +#define MXL86111_EXT_MISC_CONFIG_FIB_SPEED_SEL BIT(0) +#define MXL86111_EXT_MISC_CONFIG_FIB_SPEED_SEL_1000BX (0x1 << 0) +#define MXL86111_EXT_MISC_CONFIG_FIB_SPEED_SEL_100BX (0x0 << 0) + +/* Phy fiber Link timer cfg2 Register - EXT_SDS_LINK_TIMER_CFG2 */ +#define MXL86111_EXT_SDS_LINK_TIMER_CFG2_REG 0xA5 +#define MXL86111_EXT_SDS_LINK_TIMER_CFG2_EN_AUTOSEN BIT(15) + +/* default values of PHY register, required for Dual Media mode */ +#define MII_BMSR_DEFAULT_VAL 0x7949 +#define MII_ESTATUS_DEFAULT_VAL 0x2000 + +/* Timeout in ms for PHY SW reset check in STD_CTRL/SDS_CTRL */ +#define BMCR_RESET_TIMEOUT 500 + +/* PL P1 requires optimized RGMII timing for 1.8V RGMII voltage + */ +#define MXL86111_PL_P1 0x500 + /** * __mxl86110_write_extended_reg() - write to a PHY's extended register * @phydev: pointer to the PHY device structure @@ -236,6 +308,29 @@ static int mxl86110_read_extended_reg(struct phy_device *phydev, u16 regnum) } /** + * mxl86110_modify_extended_reg() - modify bits of a PHY's extended register + * @phydev: pointer to the PHY device structure + * @regnum: register number to write + * @mask: bit mask of bits to clear + * @set: bit mask of bits to set + * + * Note: register value = (old register value & ~mask) | set. + * + * Return: 0 or negative error code + */ +static int mxl86110_modify_extended_reg(struct phy_device *phydev, + u16 regnum, u16 mask, u16 set) +{ + int ret; + + phy_lock_mdio_bus(phydev); + ret = __mxl86110_modify_extended_reg(phydev, regnum, mask, set); + phy_unlock_mdio_bus(phydev); + + return ret; +} + +/** * mxl86110_get_wol() - report if wake-on-lan is enabled * @phydev: pointer to the phy_device * @wol: a pointer to a &struct ethtool_wolinfo @@ -394,6 +489,7 @@ static int mxl86110_led_hw_control_set(struct phy_device *phydev, u8 index, unsigned long rules) { u16 val = 0; + int ret; if (index >= MXL86110_MAX_LEDS) return -EINVAL; @@ -423,8 +519,43 @@ static int mxl86110_led_hw_control_set(struct phy_device *phydev, u8 index, rules & BIT(TRIGGER_NETDEV_RX)) val |= MXL86110_LEDX_CFG_BLINK; - return mxl86110_write_extended_reg(phydev, + ret = mxl86110_write_extended_reg(phydev, MXL86110_LED0_CFG_REG + index, val); + if (ret) + return ret; + + /* clear manual control bit */ + ret = mxl86110_modify_extended_reg(phydev, + MXL86110_COM_EXT_LED_GEN_CFG, + MXL86110_COM_EXT_LED_GEN_CFG_LFE(index), + 0); + + return ret; +} + +static int mxl86110_led_brightness_set(struct phy_device *phydev, + u8 index, enum led_brightness value) +{ + u16 mask, set; + int ret; + + if (index >= MXL86110_MAX_LEDS) + return -EINVAL; + + /* force manual control */ + set = MXL86110_COM_EXT_LED_GEN_CFG_LFE(index); + /* clear previous force mode */ + mask = MXL86110_COM_EXT_LED_GEN_CFG_LFM(index); + + /* force LED to be permanently on */ + if (value != LED_OFF) + set |= MXL86110_COM_EXT_LED_GEN_CFG_LFME(index); + + ret = mxl86110_modify_extended_reg(phydev, + MXL86110_COM_EXT_LED_GEN_CFG, + mask, set); + + return ret; } /** @@ -521,22 +652,15 @@ static int mxl86110_enable_led_activity_blink(struct phy_device *phydev) } /** - * mxl86110_config_init() - initialize the PHY + * mxl86110_config_rgmii_delay() - configure RGMII delays * @phydev: pointer to the phy_device * * Return: 0 or negative errno code */ -static int mxl86110_config_init(struct phy_device *phydev) +static int mxl86110_config_rgmii_delay(struct phy_device *phydev) { - u16 val = 0; int ret; - - phy_lock_mdio_bus(phydev); - - /* configure syncE / clk output */ - ret = mxl86110_synce_clk_cfg(phydev); - if (ret < 0) - goto out; + u16 val; switch (phydev->interface) { case PHY_INTERFACE_MODE_RGMII: @@ -578,6 +702,31 @@ static int mxl86110_config_init(struct phy_device *phydev) if (ret < 0) goto out; +out: + return ret; +} + +/** + * mxl86110_config_init() - initialize the MXL86110 PHY + * @phydev: pointer to the phy_device + * + * Return: 0 or negative errno code + */ +static int mxl86110_config_init(struct phy_device *phydev) +{ + int ret; + + phy_lock_mdio_bus(phydev); + + /* configure syncE / clk output */ + ret = mxl86110_synce_clk_cfg(phydev); + if (ret < 0) + goto out; + + ret = mxl86110_config_rgmii_delay(phydev); + if (ret < 0) + goto out; + ret = mxl86110_enable_led_activity_blink(phydev); if (ret < 0) goto out; @@ -589,6 +738,201 @@ out: return ret; } +/** + * mxl86111_probe() - validate bootstrap chip config and set UTP page + * @phydev: pointer to the phy_device + * + * Return: 0 or negative errno code + */ +static int mxl86111_probe(struct phy_device *phydev) +{ + int chip_config; + u16 reg_page; + int ret; + + chip_config = mxl86110_read_extended_reg(phydev, MXL86110_EXT_CHIP_CFG_REG); + if (chip_config < 0) + return chip_config; + + switch (chip_config & MXL86111_EXT_CHIP_CFG_MODE_SEL_MASK) { + case MXL86111_EXT_CHIP_CFG_MODE_UTP_TO_SGMII: + case MXL86111_EXT_CHIP_CFG_MODE_UTP_TO_RGMII: + phydev->port = PORT_TP; + reg_page = MXL86111_EXT_SMI_SDS_PHYUTP_SPACE; + break; + default: + return -EOPNOTSUPP; + } + + ret = mxl86110_write_extended_reg(phydev, + MXL86111_EXT_SMI_SDS_PHY_REG, + reg_page); + if (ret < 0) + return ret; + + return 0; +} + +/** + * mxl86111_config_init() - initialize the MXL86111 PHY + * @phydev: pointer to the phy_device + * + * Return: 0 or negative errno code + */ +static int mxl86111_config_init(struct phy_device *phydev) +{ + int ret; + + phy_lock_mdio_bus(phydev); + + /* configure syncE / clk output */ + ret = mxl86110_synce_clk_cfg(phydev); + if (ret < 0) + goto out; + + switch (phydev->interface) { + case PHY_INTERFACE_MODE_100BASEX: + ret = __mxl86110_modify_extended_reg(phydev, + MXL86111_EXT_MISC_CONFIG_REG, + MXL86111_EXT_MISC_CONFIG_FIB_SPEED_SEL, + MXL86111_EXT_MISC_CONFIG_FIB_SPEED_SEL_100BX); + if (ret < 0) + goto out; + break; + case PHY_INTERFACE_MODE_1000BASEX: + case PHY_INTERFACE_MODE_SGMII: + ret = __mxl86110_modify_extended_reg(phydev, + MXL86111_EXT_MISC_CONFIG_REG, + MXL86111_EXT_MISC_CONFIG_FIB_SPEED_SEL, + MXL86111_EXT_MISC_CONFIG_FIB_SPEED_SEL_1000BX); + if (ret < 0) + goto out; + break; + default: + /* RGMII modes */ + ret = mxl86110_config_rgmii_delay(phydev); + if (ret < 0) + goto out; + ret = __mxl86110_modify_extended_reg(phydev, MXL86110_EXT_RGMII_CFG1_REG, + MXL86110_EXT_RGMII_CFG1_FULL_MASK, ret); + + /* PL P1 requires optimized RGMII timing for 1.8V RGMII voltage + */ + ret = __mxl86110_read_extended_reg(phydev, 0xf); + if (ret < 0) + goto out; + + if (ret == MXL86111_PL_P1) { + ret = __mxl86110_read_extended_reg(phydev, MXL86110_EXT_CHIP_CFG_REG); + if (ret < 0) + goto out; + + /* check if LDO is in 1.8V mode */ + switch (FIELD_GET(MXL86111_EXT_CHIP_CFG_CLDO_MASK, ret)) { + case MXL86111_EXT_CHIP_CFG_CLDO_1V8_3: + case MXL86111_EXT_CHIP_CFG_CLDO_1V8_2: + ret = __mxl86110_write_extended_reg(phydev, 0xa010, 0xabff); + if (ret < 0) + goto out; + break; + default: + break; + } + } + break; + } + + ret = mxl86110_enable_led_activity_blink(phydev); + if (ret < 0) + goto out; + + ret = mxl86110_broadcast_cfg(phydev); +out: + phy_unlock_mdio_bus(phydev); + + return ret; +} + +/** + * mxl86111_read_page() - read reg page + * @phydev: pointer to the phy_device + * + * Return: current reg space of mxl86111 or negative errno code + */ +static int mxl86111_read_page(struct phy_device *phydev) +{ + int page; + + page = __mxl86110_read_extended_reg(phydev, MXL86111_EXT_SMI_SDS_PHY_REG); + if (page < 0) + return page; + + return page & MXL86111_EXT_SMI_SDS_PHYSPACE_MASK; +}; + +/** + * mxl86111_write_page() - Set reg page + * @phydev: pointer to the phy_device + * @page: The reg page to set + * + * Return: 0 or negative errno code + */ +static int mxl86111_write_page(struct phy_device *phydev, int page) +{ + return __mxl86110_modify_extended_reg(phydev, MXL86111_EXT_SMI_SDS_PHY_REG, + MXL86111_EXT_SMI_SDS_PHYSPACE_MASK, page); +}; + +static int mxl86111_config_inband(struct phy_device *phydev, unsigned int modes) +{ + int ret; + + ret = phy_modify_paged(phydev, MXL86111_EXT_SMI_SDS_PHYFIBER_SPACE, + MII_BMCR, BMCR_ANENABLE, + (modes == LINK_INBAND_DISABLE) ? 0 : BMCR_ANENABLE); + if (ret < 0) + goto out; + + phy_lock_mdio_bus(phydev); + + ret = __mxl86110_modify_extended_reg(phydev, MXL86111_EXT_SDS_LINK_TIMER_CFG2_REG, + MXL86111_EXT_SDS_LINK_TIMER_CFG2_EN_AUTOSEN, + (modes == LINK_INBAND_DISABLE) ? 0 : + MXL86111_EXT_SDS_LINK_TIMER_CFG2_EN_AUTOSEN); + if (ret < 0) + goto out; + + ret = __mxl86110_modify_extended_reg(phydev, MXL86110_EXT_CHIP_CFG_REG, + MXL86110_EXT_CHIP_CFG_SW_RST_N_MODE, 0); + if (ret < 0) + goto out; + + /* For fiber forced mode, power down/up to re-aneg */ + if (modes != LINK_INBAND_DISABLE) { + __phy_modify(phydev, MII_BMCR, 0, BMCR_PDOWN); + usleep_range(1000, 1050); + __phy_modify(phydev, MII_BMCR, BMCR_PDOWN, 0); + } + +out: + phy_unlock_mdio_bus(phydev); + + return ret; +} + +static unsigned int mxl86111_inband_caps(struct phy_device *phydev, + phy_interface_t interface) +{ + switch (interface) { + case PHY_INTERFACE_MODE_100BASEX: + case PHY_INTERFACE_MODE_1000BASEX: + case PHY_INTERFACE_MODE_SGMII: + return LINK_INBAND_DISABLE | LINK_INBAND_ENABLE; + default: + return 0; + } +} + static struct phy_driver mxl_phy_drvs[] = { { PHY_ID_MATCH_EXACT(PHY_ID_MXL86110), @@ -596,9 +940,26 @@ static struct phy_driver mxl_phy_drvs[] = { .config_init = mxl86110_config_init, .get_wol = mxl86110_get_wol, .set_wol = mxl86110_set_wol, + .led_brightness_set = mxl86110_led_brightness_set, + .led_hw_is_supported = mxl86110_led_hw_is_supported, + .led_hw_control_get = mxl86110_led_hw_control_get, + .led_hw_control_set = mxl86110_led_hw_control_set, + }, + { + PHY_ID_MATCH_EXACT(PHY_ID_MXL86111), + .name = "MXL86111 Gigabit Ethernet", + .probe = mxl86111_probe, + .config_init = mxl86111_config_init, + .get_wol = mxl86110_get_wol, + .set_wol = mxl86110_set_wol, + .inband_caps = mxl86111_inband_caps, + .config_inband = mxl86111_config_inband, + .read_page = mxl86111_read_page, + .write_page = mxl86111_write_page, + .led_brightness_set = mxl86110_led_brightness_set, .led_hw_is_supported = mxl86110_led_hw_is_supported, - .led_hw_control_get = mxl86110_led_hw_control_get, - .led_hw_control_set = mxl86110_led_hw_control_set, + .led_hw_control_get = mxl86110_led_hw_control_get, + .led_hw_control_set = mxl86110_led_hw_control_set, }, }; @@ -606,11 +967,12 @@ module_phy_driver(mxl_phy_drvs); static const struct mdio_device_id __maybe_unused mxl_tbl[] = { { PHY_ID_MATCH_EXACT(PHY_ID_MXL86110) }, + { PHY_ID_MATCH_EXACT(PHY_ID_MXL86111) }, { } }; MODULE_DEVICE_TABLE(mdio, mxl_tbl); -MODULE_DESCRIPTION("MaxLinear MXL86110 PHY driver"); +MODULE_DESCRIPTION("MaxLinear MXL86110/MXL86111 PHY driver"); MODULE_AUTHOR("Stefano Radaelli"); MODULE_LICENSE("GPL"); diff --git a/drivers/net/phy/realtek/realtek_main.c b/drivers/net/phy/realtek/realtek_main.c index dd0d675149ad..82d8e1335215 100644 --- a/drivers/net/phy/realtek/realtek_main.c +++ b/drivers/net/phy/realtek/realtek_main.c @@ -10,6 +10,7 @@ #include <linux/bitops.h> #include <linux/of.h> #include <linux/phy.h> +#include <linux/pm_wakeirq.h> #include <linux/netdevice.h> #include <linux/module.h> #include <linux/delay.h> @@ -31,6 +32,7 @@ #define RTL821x_INER 0x12 #define RTL8211B_INER_INIT 0x6400 #define RTL8211E_INER_LINK_STATUS BIT(10) +#define RTL8211F_INER_PME BIT(7) #define RTL8211F_INER_LINK_STATUS BIT(4) #define RTL821x_INSR 0x13 @@ -96,17 +98,13 @@ #define RTL8211F_RXCR 0x15 #define RTL8211F_RX_DELAY BIT(3) -/* RTL8211F WOL interrupt configuration */ -#define RTL8211F_INTBCR_PAGE 0xd40 -#define RTL8211F_INTBCR 0x16 -#define RTL8211F_INTBCR_INTB_PMEB BIT(5) - /* RTL8211F WOL settings */ -#define RTL8211F_WOL_SETTINGS_PAGE 0xd8a +#define RTL8211F_WOL_PAGE 0xd8a #define RTL8211F_WOL_SETTINGS_EVENTS 16 #define RTL8211F_WOL_EVENT_MAGIC BIT(12) -#define RTL8211F_WOL_SETTINGS_STATUS 17 -#define RTL8211F_WOL_STATUS_RESET (BIT(15) | 0x1fff) +#define RTL8211F_WOL_RST_RMSQ 17 +#define RTL8211F_WOL_RG_RSTB BIT(15) +#define RTL8211F_WOL_RMSQ 0x1fff /* RTL8211F Unique phyiscal and multicast address (WOL) */ #define RTL8211F_PHYSICAL_ADDR_PAGE 0xd8c @@ -172,7 +170,8 @@ struct rtl821x_priv { u16 phycr2; bool has_phycr2; struct clk *clk; - u32 saved_wolopts; + /* rtl8211f */ + u16 iner; }; static int rtl821x_read_page(struct phy_device *phydev) @@ -255,6 +254,34 @@ static int rtl821x_probe(struct phy_device *phydev) return 0; } +static int rtl8211f_probe(struct phy_device *phydev) +{ + struct device *dev = &phydev->mdio.dev; + int ret; + + ret = rtl821x_probe(phydev); + if (ret < 0) + return ret; + + /* Disable all PME events */ + ret = phy_write_paged(phydev, RTL8211F_WOL_PAGE, + RTL8211F_WOL_SETTINGS_EVENTS, 0); + if (ret < 0) + return ret; + + /* Mark this PHY as wakeup capable and register the interrupt as a + * wakeup IRQ if the PHY is marked as a wakeup source in firmware, + * and the interrupt is valid. + */ + if (device_property_read_bool(dev, "wakeup-source") && + phy_interrupt_is_valid(phydev)) { + device_set_wakeup_capable(dev, true); + devm_pm_set_wake_irq(dev, phydev->irq); + } + + return ret; +} + static int rtl8201_ack_interrupt(struct phy_device *phydev) { int err; @@ -352,6 +379,7 @@ static int rtl8211e_config_intr(struct phy_device *phydev) static int rtl8211f_config_intr(struct phy_device *phydev) { + struct rtl821x_priv *priv = phydev->priv; u16 val; int err; @@ -362,8 +390,10 @@ static int rtl8211f_config_intr(struct phy_device *phydev) val = RTL8211F_INER_LINK_STATUS; err = phy_write_paged(phydev, 0xa42, RTL821x_INER, val); + if (err == 0) + priv->iner = val; } else { - val = 0; + priv->iner = val = 0; err = phy_write_paged(phydev, 0xa42, RTL821x_INER, val); if (err) return err; @@ -426,21 +456,34 @@ static irqreturn_t rtl8211f_handle_interrupt(struct phy_device *phydev) return IRQ_NONE; } - if (!(irq_status & RTL8211F_INER_LINK_STATUS)) - return IRQ_NONE; + if (irq_status & RTL8211F_INER_LINK_STATUS) { + phy_trigger_machine(phydev); + return IRQ_HANDLED; + } - phy_trigger_machine(phydev); + if (irq_status & RTL8211F_INER_PME) { + pm_wakeup_event(&phydev->mdio.dev, 0); + return IRQ_HANDLED; + } - return IRQ_HANDLED; + return IRQ_NONE; } static void rtl8211f_get_wol(struct phy_device *dev, struct ethtool_wolinfo *wol) { int wol_events; + /* If the PHY is not capable of waking the system, then WoL can not + * be supported. + */ + if (!device_can_wakeup(&dev->mdio.dev)) { + wol->supported = 0; + return; + } + wol->supported = WAKE_MAGIC; - wol_events = phy_read_paged(dev, RTL8211F_WOL_SETTINGS_PAGE, RTL8211F_WOL_SETTINGS_EVENTS); + wol_events = phy_read_paged(dev, RTL8211F_WOL_PAGE, RTL8211F_WOL_SETTINGS_EVENTS); if (wol_events < 0) return; @@ -453,6 +496,9 @@ static int rtl8211f_set_wol(struct phy_device *dev, struct ethtool_wolinfo *wol) const u8 *mac_addr = dev->attached_dev->dev_addr; int oldpage; + if (!device_can_wakeup(&dev->mdio.dev)) + return -EOPNOTSUPP; + oldpage = phy_save_page(dev); if (oldpage < 0) goto err; @@ -464,25 +510,23 @@ static int rtl8211f_set_wol(struct phy_device *dev, struct ethtool_wolinfo *wol) __phy_write(dev, RTL8211F_PHYSICAL_ADDR_WORD1, mac_addr[3] << 8 | (mac_addr[2])); __phy_write(dev, RTL8211F_PHYSICAL_ADDR_WORD2, mac_addr[5] << 8 | (mac_addr[4])); - /* Enable magic packet matching and reset WOL status */ - rtl821x_write_page(dev, RTL8211F_WOL_SETTINGS_PAGE); + /* Enable magic packet matching */ + rtl821x_write_page(dev, RTL8211F_WOL_PAGE); __phy_write(dev, RTL8211F_WOL_SETTINGS_EVENTS, RTL8211F_WOL_EVENT_MAGIC); - __phy_write(dev, RTL8211F_WOL_SETTINGS_STATUS, RTL8211F_WOL_STATUS_RESET); - - /* Enable the WOL interrupt */ - rtl821x_write_page(dev, RTL8211F_INTBCR_PAGE); - __phy_set_bits(dev, RTL8211F_INTBCR, RTL8211F_INTBCR_INTB_PMEB); + /* Set the maximum packet size, and assert WoL reset */ + __phy_write(dev, RTL8211F_WOL_RST_RMSQ, RTL8211F_WOL_RMSQ); } else { - /* Disable the WOL interrupt */ - rtl821x_write_page(dev, RTL8211F_INTBCR_PAGE); - __phy_clear_bits(dev, RTL8211F_INTBCR, RTL8211F_INTBCR_INTB_PMEB); - - /* Disable magic packet matching and reset WOL status */ - rtl821x_write_page(dev, RTL8211F_WOL_SETTINGS_PAGE); + /* Disable magic packet matching */ + rtl821x_write_page(dev, RTL8211F_WOL_PAGE); __phy_write(dev, RTL8211F_WOL_SETTINGS_EVENTS, 0); - __phy_write(dev, RTL8211F_WOL_SETTINGS_STATUS, RTL8211F_WOL_STATUS_RESET); + + /* Place WoL in reset */ + __phy_clear_bits(dev, RTL8211F_WOL_RST_RMSQ, + RTL8211F_WOL_RG_RSTB); } + device_set_wakeup_enable(&dev->mdio.dev, !!(wol->wolopts & WAKE_MAGIC)); + err: return phy_restore_page(dev, oldpage, 0); } @@ -628,6 +672,52 @@ static int rtl821x_suspend(struct phy_device *phydev) return ret; } +static int rtl8211f_suspend(struct phy_device *phydev) +{ + u16 wol_rst; + int ret; + + ret = rtl821x_suspend(phydev); + if (ret < 0) + return ret; + + /* If a PME event is enabled, then configure the interrupt for + * PME events only, disabling link interrupt. We avoid switching + * to PMEB mode as we don't have a status bit for that. + */ + if (device_may_wakeup(&phydev->mdio.dev)) { + ret = phy_write_paged(phydev, 0xa42, RTL821x_INER, + RTL8211F_INER_PME); + if (ret < 0) + goto err; + + /* Read the INSR to clear any pending interrupt */ + phy_read_paged(phydev, RTL8211F_INSR_PAGE, RTL8211F_INSR); + + /* Reset the WoL to ensure that an event is picked up. + * Unless we do this, even if we receive another packet, + * we may not have a PME interrupt raised. + */ + ret = phy_read_paged(phydev, RTL8211F_WOL_PAGE, + RTL8211F_WOL_RST_RMSQ); + if (ret < 0) + goto err; + + wol_rst = ret & ~RTL8211F_WOL_RG_RSTB; + ret = phy_write_paged(phydev, RTL8211F_WOL_PAGE, + RTL8211F_WOL_RST_RMSQ, wol_rst); + if (ret < 0) + goto err; + + wol_rst |= RTL8211F_WOL_RG_RSTB; + ret = phy_write_paged(phydev, RTL8211F_WOL_PAGE, + RTL8211F_WOL_RST_RMSQ, wol_rst); + } + +err: + return ret; +} + static int rtl821x_resume(struct phy_device *phydev) { struct rtl821x_priv *priv = phydev->priv; @@ -645,10 +735,29 @@ static int rtl821x_resume(struct phy_device *phydev) return 0; } +static int rtl8211f_resume(struct phy_device *phydev) +{ + struct rtl821x_priv *priv = phydev->priv; + int ret; + + ret = rtl821x_resume(phydev); + if (ret < 0) + return ret; + + /* If the device was programmed for a PME event, restore the interrupt + * enable so phylib can receive link state interrupts. + */ + if (device_may_wakeup(&phydev->mdio.dev)) + ret = phy_write_paged(phydev, 0xa42, RTL821x_INER, priv->iner); + + return ret; +} + static int rtl8211x_led_hw_is_supported(struct phy_device *phydev, u8 index, unsigned long rules) { - const unsigned long mask = BIT(TRIGGER_NETDEV_LINK_10) | + const unsigned long mask = BIT(TRIGGER_NETDEV_LINK) | + BIT(TRIGGER_NETDEV_LINK_10) | BIT(TRIGGER_NETDEV_LINK_100) | BIT(TRIGGER_NETDEV_LINK_1000) | BIT(TRIGGER_NETDEV_RX) | @@ -706,6 +815,12 @@ static int rtl8211f_led_hw_control_get(struct phy_device *phydev, u8 index, if (val & RTL8211F_LEDCR_LINK_1000) __set_bit(TRIGGER_NETDEV_LINK_1000, rules); + if ((val & RTL8211F_LEDCR_LINK_10) && + (val & RTL8211F_LEDCR_LINK_100) && + (val & RTL8211F_LEDCR_LINK_1000)) { + __set_bit(TRIGGER_NETDEV_LINK, rules); + } + if (val & RTL8211F_LEDCR_ACT_TXRX) { __set_bit(TRIGGER_NETDEV_RX, rules); __set_bit(TRIGGER_NETDEV_TX, rules); @@ -723,14 +838,20 @@ static int rtl8211f_led_hw_control_set(struct phy_device *phydev, u8 index, if (index >= RTL8211x_LED_COUNT) return -EINVAL; - if (test_bit(TRIGGER_NETDEV_LINK_10, &rules)) + if (test_bit(TRIGGER_NETDEV_LINK, &rules) || + test_bit(TRIGGER_NETDEV_LINK_10, &rules)) { reg |= RTL8211F_LEDCR_LINK_10; + } - if (test_bit(TRIGGER_NETDEV_LINK_100, &rules)) + if (test_bit(TRIGGER_NETDEV_LINK, &rules) || + test_bit(TRIGGER_NETDEV_LINK_100, &rules)) { reg |= RTL8211F_LEDCR_LINK_100; + } - if (test_bit(TRIGGER_NETDEV_LINK_1000, &rules)) + if (test_bit(TRIGGER_NETDEV_LINK, &rules) || + test_bit(TRIGGER_NETDEV_LINK_1000, &rules)) { reg |= RTL8211F_LEDCR_LINK_1000; + } if (test_bit(TRIGGER_NETDEV_RX, &rules) || test_bit(TRIGGER_NETDEV_TX, &rules)) { @@ -778,6 +899,12 @@ static int rtl8211e_led_hw_control_get(struct phy_device *phydev, u8 index, if (cr2 & RTL8211E_LEDCR2_LINK_1000) __set_bit(TRIGGER_NETDEV_LINK_1000, rules); + if ((cr2 & RTL8211E_LEDCR2_LINK_10) && + (cr2 & RTL8211E_LEDCR2_LINK_100) && + (cr2 & RTL8211E_LEDCR2_LINK_1000)) { + __set_bit(TRIGGER_NETDEV_LINK, rules); + } + return ret; } @@ -805,14 +932,20 @@ static int rtl8211e_led_hw_control_set(struct phy_device *phydev, u8 index, if (ret < 0) return ret; - if (test_bit(TRIGGER_NETDEV_LINK_10, &rules)) + if (test_bit(TRIGGER_NETDEV_LINK, &rules) || + test_bit(TRIGGER_NETDEV_LINK_10, &rules)) { cr2 |= RTL8211E_LEDCR2_LINK_10; + } - if (test_bit(TRIGGER_NETDEV_LINK_100, &rules)) + if (test_bit(TRIGGER_NETDEV_LINK, &rules) || + test_bit(TRIGGER_NETDEV_LINK_100, &rules)) { cr2 |= RTL8211E_LEDCR2_LINK_100; + } - if (test_bit(TRIGGER_NETDEV_LINK_1000, &rules)) + if (test_bit(TRIGGER_NETDEV_LINK, &rules) || + test_bit(TRIGGER_NETDEV_LINK_1000, &rules)) { cr2 |= RTL8211E_LEDCR2_LINK_1000; + } cr2 <<= RTL8211E_LEDCR2_SHIFT * index; ret = rtl821x_modify_ext_page(phydev, RTL8211E_LEDCR_EXT_PAGE, @@ -1038,7 +1171,7 @@ static int rtl822x_probe(struct phy_device *phydev) return 0; } -static int rtl822xb_config_init(struct phy_device *phydev) +static int rtl822x_set_serdes_option_mode(struct phy_device *phydev, bool gen1) { bool has_2500, has_sgmii; u16 mode; @@ -1073,15 +1206,18 @@ static int rtl822xb_config_init(struct phy_device *phydev) /* the following sequence with magic numbers sets up the SerDes * option mode */ - ret = phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x75f3, 0); - if (ret < 0) - return ret; + + if (!gen1) { + ret = phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x75f3, 0); + if (ret < 0) + return ret; + } ret = phy_modify_mmd_changed(phydev, MDIO_MMD_VEND1, RTL822X_VND1_SERDES_OPTION, RTL822X_VND1_SERDES_OPTION_MODE_MASK, mode); - if (ret < 0) + if (gen1 || ret < 0) return ret; ret = phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x6a04, 0x0503); @@ -1095,6 +1231,16 @@ static int rtl822xb_config_init(struct phy_device *phydev) return phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x6f11, 0x8020); } +static int rtl822x_config_init(struct phy_device *phydev) +{ + return rtl822x_set_serdes_option_mode(phydev, true); +} + +static int rtl822xb_config_init(struct phy_device *phydev) +{ + return rtl822x_set_serdes_option_mode(phydev, false); +} + static int rtl822xb_get_rate_matching(struct phy_device *phydev, phy_interface_t iface) { @@ -1280,6 +1426,21 @@ static int rtl822x_c45_read_status(struct phy_device *phydev) return 0; } +static int rtl822x_c45_soft_reset(struct phy_device *phydev) +{ + int ret, val; + + ret = phy_modify_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_CTRL1, + MDIO_CTRL1_RESET, MDIO_CTRL1_RESET); + if (ret < 0) + return ret; + + return phy_read_mmd_poll_timeout(phydev, MDIO_MMD_PMAPMD, + MDIO_CTRL1, val, + !(val & MDIO_CTRL1_RESET), + 5000, 100000, true); +} + static int rtl822xb_c45_read_status(struct phy_device *phydev) { int ret; @@ -1612,15 +1773,15 @@ static struct phy_driver realtek_drvs[] = { }, { PHY_ID_MATCH_EXACT(0x001cc916), .name = "RTL8211F Gigabit Ethernet", - .probe = rtl821x_probe, + .probe = rtl8211f_probe, .config_init = &rtl8211f_config_init, .read_status = rtlgen_read_status, .config_intr = &rtl8211f_config_intr, .handle_interrupt = rtl8211f_handle_interrupt, .set_wol = rtl8211f_set_wol, .get_wol = rtl8211f_get_wol, - .suspend = rtl821x_suspend, - .resume = rtl821x_resume, + .suspend = rtl8211f_suspend, + .resume = rtl8211f_resume, .read_page = rtl821x_read_page, .write_page = rtl821x_write_page, .flags = PHY_ALWAYS_CALL_SUSPEND, @@ -1675,13 +1836,13 @@ static struct phy_driver realtek_drvs[] = { }, { PHY_ID_MATCH_EXACT(0x001cc838), .name = "RTL8226-CG 2.5Gbps PHY", - .get_features = rtl822x_get_features, - .config_aneg = rtl822x_config_aneg, - .read_status = rtl822x_read_status, - .suspend = genphy_suspend, - .resume = rtlgen_resume, - .read_page = rtl821x_read_page, - .write_page = rtl821x_write_page, + .soft_reset = rtl822x_c45_soft_reset, + .get_features = rtl822x_c45_get_features, + .config_aneg = rtl822x_c45_config_aneg, + .config_init = rtl822x_config_init, + .read_status = rtl822xb_c45_read_status, + .suspend = genphy_c45_pma_suspend, + .resume = rtlgen_c45_resume, }, { PHY_ID_MATCH_EXACT(0x001cc848), .name = "RTL8226B-CG_RTL8221B-CG 2.5Gbps PHY", diff --git a/drivers/net/ppp/Kconfig b/drivers/net/ppp/Kconfig index 8c9ed1889d1a..a1806b4b84be 100644 --- a/drivers/net/ppp/Kconfig +++ b/drivers/net/ppp/Kconfig @@ -85,9 +85,8 @@ config PPP_FILTER config PPP_MPPE tristate "PPP MPPE compression (encryption)" depends on PPP - select CRYPTO - select CRYPTO_SHA1 select CRYPTO_LIB_ARC4 + select CRYPTO_LIB_SHA1 help Support for the MPPE Encryption protocol, as employed by the Microsoft Point-to-Point Tunneling Protocol. diff --git a/drivers/net/ppp/bsd_comp.c b/drivers/net/ppp/bsd_comp.c index 55954594e157..f385b759d5cf 100644 --- a/drivers/net/ppp/bsd_comp.c +++ b/drivers/net/ppp/bsd_comp.c @@ -406,7 +406,7 @@ static void *bsd_alloc (unsigned char *options, int opt_len, int decomp) * Allocate space for the dictionary. This may be more than one page in * length. */ - db->dict = vmalloc(array_size(hsize, sizeof(struct bsd_dict))); + db->dict = vmalloc_array(hsize, sizeof(struct bsd_dict)); if (!db->dict) { bsd_free (db); @@ -425,7 +425,7 @@ static void *bsd_alloc (unsigned char *options, int opt_len, int decomp) */ else { - db->lens = vmalloc(array_size(sizeof(db->lens[0]), (maxmaxcode + 1))); + db->lens = vmalloc_array(maxmaxcode + 1, sizeof(db->lens[0])); if (!db->lens) { bsd_free (db); diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 702a7f7183ce..f9f0f16c41d1 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -179,11 +179,11 @@ struct channel { struct ppp_channel *chan; /* public channel data structure */ struct rw_semaphore chan_sem; /* protects `chan' during chan ioctl */ spinlock_t downl; /* protects `chan', file.xq dequeue */ - struct ppp *ppp; /* ppp unit we're connected to */ + struct ppp __rcu *ppp; /* ppp unit we're connected to */ struct net *chan_net; /* the net channel belongs to */ netns_tracker ns_tracker; struct list_head clist; /* link in list of channels per unit */ - rwlock_t upl; /* protects `ppp' and 'bridge' */ + spinlock_t upl; /* protects `ppp' and 'bridge' */ struct channel __rcu *bridge; /* "bridged" ppp channel */ #ifdef CONFIG_PPP_MULTILINK u8 avail; /* flag used in multilink stuff */ @@ -645,34 +645,34 @@ static struct bpf_prog *compat_ppp_get_filter(struct sock_fprog32 __user *p) */ static int ppp_bridge_channels(struct channel *pch, struct channel *pchb) { - write_lock_bh(&pch->upl); - if (pch->ppp || + spin_lock(&pch->upl); + if (rcu_dereference_protected(pch->ppp, lockdep_is_held(&pch->upl)) || rcu_dereference_protected(pch->bridge, lockdep_is_held(&pch->upl))) { - write_unlock_bh(&pch->upl); + spin_unlock(&pch->upl); return -EALREADY; } refcount_inc(&pchb->file.refcnt); rcu_assign_pointer(pch->bridge, pchb); - write_unlock_bh(&pch->upl); + spin_unlock(&pch->upl); - write_lock_bh(&pchb->upl); - if (pchb->ppp || + spin_lock(&pchb->upl); + if (rcu_dereference_protected(pchb->ppp, lockdep_is_held(&pchb->upl)) || rcu_dereference_protected(pchb->bridge, lockdep_is_held(&pchb->upl))) { - write_unlock_bh(&pchb->upl); + spin_unlock(&pchb->upl); goto err_unset; } refcount_inc(&pch->file.refcnt); rcu_assign_pointer(pchb->bridge, pch); - write_unlock_bh(&pchb->upl); + spin_unlock(&pchb->upl); return 0; err_unset: - write_lock_bh(&pch->upl); + spin_lock(&pch->upl); /* Re-read pch->bridge with upl held in case it was modified concurrently */ pchb = rcu_dereference_protected(pch->bridge, lockdep_is_held(&pch->upl)); RCU_INIT_POINTER(pch->bridge, NULL); - write_unlock_bh(&pch->upl); + spin_unlock(&pch->upl); synchronize_rcu(); if (pchb) @@ -686,25 +686,25 @@ static int ppp_unbridge_channels(struct channel *pch) { struct channel *pchb, *pchbb; - write_lock_bh(&pch->upl); + spin_lock(&pch->upl); pchb = rcu_dereference_protected(pch->bridge, lockdep_is_held(&pch->upl)); if (!pchb) { - write_unlock_bh(&pch->upl); + spin_unlock(&pch->upl); return -EINVAL; } RCU_INIT_POINTER(pch->bridge, NULL); - write_unlock_bh(&pch->upl); + spin_unlock(&pch->upl); /* Only modify pchb if phcb->bridge points back to pch. * If not, it implies that there has been a race unbridging (and possibly * even rebridging) pchb. We should leave pchb alone to avoid either a * refcount underflow, or breaking another established bridge instance. */ - write_lock_bh(&pchb->upl); + spin_lock(&pchb->upl); pchbb = rcu_dereference_protected(pchb->bridge, lockdep_is_held(&pchb->upl)); if (pchbb == pch) RCU_INIT_POINTER(pchb->bridge, NULL); - write_unlock_bh(&pchb->upl); + spin_unlock(&pchb->upl); synchronize_rcu(); @@ -2158,10 +2158,9 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb) #endif /* CONFIG_PPP_MULTILINK */ /* Try to send data out on a channel */ -static void __ppp_channel_push(struct channel *pch) +static void __ppp_channel_push(struct channel *pch, struct ppp *ppp) { struct sk_buff *skb; - struct ppp *ppp; spin_lock(&pch->downl); if (pch->chan) { @@ -2180,7 +2179,6 @@ static void __ppp_channel_push(struct channel *pch) spin_unlock(&pch->downl); /* see if there is anything from the attached unit to be sent */ if (skb_queue_empty(&pch->file.xq)) { - ppp = pch->ppp; if (ppp) __ppp_xmit_process(ppp, NULL); } @@ -2189,19 +2187,21 @@ static void __ppp_channel_push(struct channel *pch) static void ppp_channel_push(struct channel *pch) { struct ppp_xmit_recursion *xmit_recursion; + struct ppp *ppp; - read_lock_bh(&pch->upl); - if (pch->ppp) { - xmit_recursion = this_cpu_ptr(pch->ppp->xmit_recursion); - local_lock_nested_bh(&pch->ppp->xmit_recursion->bh_lock); + rcu_read_lock_bh(); + ppp = rcu_dereference_bh(pch->ppp); + if (ppp) { + xmit_recursion = this_cpu_ptr(ppp->xmit_recursion); + local_lock_nested_bh(&ppp->xmit_recursion->bh_lock); xmit_recursion->owner = current; - __ppp_channel_push(pch); + __ppp_channel_push(pch, ppp); xmit_recursion->owner = NULL; - local_unlock_nested_bh(&pch->ppp->xmit_recursion->bh_lock); + local_unlock_nested_bh(&ppp->xmit_recursion->bh_lock); } else { - __ppp_channel_push(pch); + __ppp_channel_push(pch, NULL); } - read_unlock_bh(&pch->upl); + rcu_read_unlock_bh(); } /* @@ -2303,6 +2303,7 @@ void ppp_input(struct ppp_channel *chan, struct sk_buff *skb) { struct channel *pch = chan->ppp; + struct ppp *ppp; int proto; if (!pch) { @@ -2314,18 +2315,19 @@ ppp_input(struct ppp_channel *chan, struct sk_buff *skb) if (ppp_channel_bridge_input(pch, skb)) return; - read_lock_bh(&pch->upl); + rcu_read_lock_bh(); + ppp = rcu_dereference_bh(pch->ppp); if (!ppp_decompress_proto(skb)) { kfree_skb(skb); - if (pch->ppp) { - ++pch->ppp->dev->stats.rx_length_errors; - ppp_receive_error(pch->ppp); + if (ppp) { + ++ppp->dev->stats.rx_length_errors; + ppp_receive_error(ppp); } goto done; } proto = PPP_PROTO(skb); - if (!pch->ppp || proto >= 0xc000 || proto == PPP_CCPFRAG) { + if (!ppp || proto >= 0xc000 || proto == PPP_CCPFRAG) { /* put it on the channel queue */ skb_queue_tail(&pch->file.rq, skb); /* drop old frames if queue too long */ @@ -2334,11 +2336,11 @@ ppp_input(struct ppp_channel *chan, struct sk_buff *skb) kfree_skb(skb); wake_up_interruptible(&pch->file.rwait); } else { - ppp_do_recv(pch->ppp, skb, pch); + ppp_do_recv(ppp, skb, pch); } done: - read_unlock_bh(&pch->upl); + rcu_read_unlock_bh(); } /* Put a 0-length skb in the receive queue as an error indication */ @@ -2347,20 +2349,22 @@ ppp_input_error(struct ppp_channel *chan, int code) { struct channel *pch = chan->ppp; struct sk_buff *skb; + struct ppp *ppp; if (!pch) return; - read_lock_bh(&pch->upl); - if (pch->ppp) { + rcu_read_lock_bh(); + ppp = rcu_dereference_bh(pch->ppp); + if (ppp) { skb = alloc_skb(0, GFP_ATOMIC); if (skb) { skb->len = 0; /* probably unnecessary */ skb->cb[0] = code; - ppp_do_recv(pch->ppp, skb, pch); + ppp_do_recv(ppp, skb, pch); } } - read_unlock_bh(&pch->upl); + rcu_read_unlock_bh(); } /* @@ -2908,7 +2912,6 @@ int ppp_register_net_channel(struct net *net, struct ppp_channel *chan) pn = ppp_pernet(net); - pch->ppp = NULL; pch->chan = chan; pch->chan_net = get_net_track(net, &pch->ns_tracker, GFP_KERNEL); chan->ppp = pch; @@ -2919,7 +2922,7 @@ int ppp_register_net_channel(struct net *net, struct ppp_channel *chan) #endif /* CONFIG_PPP_MULTILINK */ init_rwsem(&pch->chan_sem); spin_lock_init(&pch->downl); - rwlock_init(&pch->upl); + spin_lock_init(&pch->upl); spin_lock_bh(&pn->all_channels_lock); pch->file.index = ++pn->last_channel_index; @@ -2948,13 +2951,15 @@ int ppp_channel_index(struct ppp_channel *chan) int ppp_unit_number(struct ppp_channel *chan) { struct channel *pch = chan->ppp; + struct ppp *ppp; int unit = -1; if (pch) { - read_lock_bh(&pch->upl); - if (pch->ppp) - unit = pch->ppp->file.index; - read_unlock_bh(&pch->upl); + rcu_read_lock(); + ppp = rcu_dereference(pch->ppp); + if (ppp) + unit = ppp->file.index; + rcu_read_unlock(); } return unit; } @@ -2966,12 +2971,14 @@ char *ppp_dev_name(struct ppp_channel *chan) { struct channel *pch = chan->ppp; char *name = NULL; + struct ppp *ppp; if (pch) { - read_lock_bh(&pch->upl); - if (pch->ppp && pch->ppp->dev) - name = pch->ppp->dev->name; - read_unlock_bh(&pch->upl); + rcu_read_lock(); + ppp = rcu_dereference(pch->ppp); + if (ppp && ppp->dev) + name = ppp->dev->name; + rcu_read_unlock(); } return name; } @@ -3494,9 +3501,9 @@ ppp_connect_channel(struct channel *pch, int unit) ppp = ppp_find_unit(pn, unit); if (!ppp) goto out; - write_lock_bh(&pch->upl); + spin_lock(&pch->upl); ret = -EINVAL; - if (pch->ppp || + if (rcu_dereference_protected(pch->ppp, lockdep_is_held(&pch->upl)) || rcu_dereference_protected(pch->bridge, lockdep_is_held(&pch->upl))) goto outl; @@ -3521,13 +3528,13 @@ ppp_connect_channel(struct channel *pch, int unit) ppp->dev->hard_header_len = hdrlen; list_add_tail_rcu(&pch->clist, &ppp->channels); ++ppp->n_channels; - pch->ppp = ppp; + rcu_assign_pointer(pch->ppp, ppp); refcount_inc(&ppp->file.refcnt); ppp_unlock(ppp); ret = 0; outl: - write_unlock_bh(&pch->upl); + spin_unlock(&pch->upl); out: mutex_unlock(&pn->all_ppp_mutex); return ret; @@ -3542,10 +3549,9 @@ ppp_disconnect_channel(struct channel *pch) struct ppp *ppp; int err = -EINVAL; - write_lock_bh(&pch->upl); - ppp = pch->ppp; - pch->ppp = NULL; - write_unlock_bh(&pch->upl); + spin_lock(&pch->upl); + ppp = rcu_replace_pointer(pch->ppp, NULL, lockdep_is_held(&pch->upl)); + spin_unlock(&pch->upl); if (ppp) { /* remove it from the ppp unit's list */ ppp_lock(ppp); diff --git a/drivers/net/ppp/ppp_mppe.c b/drivers/net/ppp/ppp_mppe.c index bcc1eaedf58f..630cbf71c147 100644 --- a/drivers/net/ppp/ppp_mppe.c +++ b/drivers/net/ppp/ppp_mppe.c @@ -43,7 +43,7 @@ */ #include <crypto/arc4.h> -#include <crypto/hash.h> +#include <crypto/sha1.h> #include <linux/err.h> #include <linux/fips.h> #include <linux/module.h> @@ -55,7 +55,6 @@ #include <linux/mm.h> #include <linux/ppp_defs.h> #include <linux/ppp-comp.h> -#include <linux/scatterlist.h> #include <linux/unaligned.h> #include "ppp_mppe.h" @@ -67,31 +66,15 @@ MODULE_ALIAS("ppp-compress-" __stringify(CI_MPPE)); MODULE_VERSION("1.0.2"); #define SHA1_PAD_SIZE 40 - -/* - * kernel crypto API needs its arguments to be in kmalloc'd memory, not in the module - * static data area. That means sha_pad needs to be kmalloc'd. - */ - -struct sha_pad { - unsigned char sha_pad1[SHA1_PAD_SIZE]; - unsigned char sha_pad2[SHA1_PAD_SIZE]; -}; -static struct sha_pad *sha_pad; - -static inline void sha_pad_init(struct sha_pad *shapad) -{ - memset(shapad->sha_pad1, 0x00, sizeof(shapad->sha_pad1)); - memset(shapad->sha_pad2, 0xF2, sizeof(shapad->sha_pad2)); -} +static const u8 sha_pad1[SHA1_PAD_SIZE] = { 0 }; +static const u8 sha_pad2[SHA1_PAD_SIZE] = { [0 ... SHA1_PAD_SIZE - 1] = 0xF2 }; /* * State for an MPPE (de)compressor. */ struct ppp_mppe_state { struct arc4_ctx arc4; - struct shash_desc *sha1; - unsigned char *sha1_digest; + unsigned char sha1_digest[SHA1_DIGEST_SIZE]; unsigned char master_key[MPPE_MAX_KEY_LEN]; unsigned char session_key[MPPE_MAX_KEY_LEN]; unsigned keylen; /* key length in bytes */ @@ -130,16 +113,14 @@ struct ppp_mppe_state { */ static void get_new_key_from_sha(struct ppp_mppe_state * state) { - crypto_shash_init(state->sha1); - crypto_shash_update(state->sha1, state->master_key, - state->keylen); - crypto_shash_update(state->sha1, sha_pad->sha_pad1, - sizeof(sha_pad->sha_pad1)); - crypto_shash_update(state->sha1, state->session_key, - state->keylen); - crypto_shash_update(state->sha1, sha_pad->sha_pad2, - sizeof(sha_pad->sha_pad2)); - crypto_shash_final(state->sha1, state->sha1_digest); + struct sha1_ctx ctx; + + sha1_init(&ctx); + sha1_update(&ctx, state->master_key, state->keylen); + sha1_update(&ctx, sha_pad1, sizeof(sha_pad1)); + sha1_update(&ctx, state->session_key, state->keylen); + sha1_update(&ctx, sha_pad2, sizeof(sha_pad2)); + sha1_final(&ctx, state->sha1_digest); } /* @@ -171,39 +152,15 @@ static void mppe_rekey(struct ppp_mppe_state * state, int initial_key) static void *mppe_alloc(unsigned char *options, int optlen) { struct ppp_mppe_state *state; - struct crypto_shash *shash; - unsigned int digestsize; if (optlen != CILEN_MPPE + sizeof(state->master_key) || options[0] != CI_MPPE || options[1] != CILEN_MPPE || fips_enabled) - goto out; + return NULL; state = kzalloc(sizeof(*state), GFP_KERNEL); if (state == NULL) - goto out; - - - shash = crypto_alloc_shash("sha1", 0, 0); - if (IS_ERR(shash)) - goto out_free; - - state->sha1 = kmalloc(sizeof(*state->sha1) + - crypto_shash_descsize(shash), - GFP_KERNEL); - if (!state->sha1) { - crypto_free_shash(shash); - goto out_free; - } - state->sha1->tfm = shash; - - digestsize = crypto_shash_digestsize(shash); - if (digestsize < MPPE_MAX_KEY_LEN) - goto out_free; - - state->sha1_digest = kmalloc(digestsize, GFP_KERNEL); - if (!state->sha1_digest) - goto out_free; + return NULL; /* Save keys. */ memcpy(state->master_key, &options[CILEN_MPPE], @@ -217,16 +174,6 @@ static void *mppe_alloc(unsigned char *options, int optlen) */ return (void *)state; - -out_free: - kfree(state->sha1_digest); - if (state->sha1) { - crypto_free_shash(state->sha1->tfm); - kfree_sensitive(state->sha1); - } - kfree(state); -out: - return NULL; } /* @@ -235,12 +182,8 @@ out: static void mppe_free(void *arg) { struct ppp_mppe_state *state = (struct ppp_mppe_state *) arg; - if (state) { - kfree(state->sha1_digest); - crypto_free_shash(state->sha1->tfm); - kfree_sensitive(state->sha1); - kfree_sensitive(state); - } + + kfree_sensitive(state); } /* @@ -649,31 +592,17 @@ static struct compressor ppp_mppe = { .comp_extra = MPPE_PAD, }; -/* - * ppp_mppe_init() - * - * Prior to allowing load, try to load the arc4 and sha1 crypto - * libraries. The actual use will be allocated later, but - * this way the module will fail to insmod if they aren't available. - */ - static int __init ppp_mppe_init(void) { int answer; - if (fips_enabled || !crypto_has_ahash("sha1", 0, CRYPTO_ALG_ASYNC)) - return -ENODEV; - sha_pad = kmalloc(sizeof(struct sha_pad), GFP_KERNEL); - if (!sha_pad) - return -ENOMEM; - sha_pad_init(sha_pad); + if (fips_enabled) + return -ENODEV; answer = ppp_register_compressor(&ppp_mppe); if (answer == 0) printk(KERN_INFO "PPP MPPE Compression module registered\n"); - else - kfree(sha_pad); return answer; } @@ -681,7 +610,6 @@ static int __init ppp_mppe_init(void) static void __exit ppp_mppe_cleanup(void) { ppp_unregister_compressor(&ppp_mppe); - kfree(sha_pad); } module_init(ppp_mppe_init); diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 410effa42ade..4ac6afce267b 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -100,8 +100,8 @@ struct pppoe_net { * as well, moreover in case of SMP less locking * controversy here */ - struct pppox_sock *hash_table[PPPOE_HASH_SIZE]; - rwlock_t hash_lock; + struct pppox_sock __rcu *hash_table[PPPOE_HASH_SIZE]; + spinlock_t hash_lock; }; /* @@ -162,13 +162,13 @@ static struct pppox_sock *__get_item(struct pppoe_net *pn, __be16 sid, int hash = hash_item(sid, addr); struct pppox_sock *ret; - ret = pn->hash_table[hash]; + ret = rcu_dereference(pn->hash_table[hash]); while (ret) { if (cmp_addr(&ret->pppoe_pa, sid, addr) && ret->pppoe_ifindex == ifindex) return ret; - ret = ret->next; + ret = rcu_dereference(ret->next); } return NULL; @@ -177,19 +177,20 @@ static struct pppox_sock *__get_item(struct pppoe_net *pn, __be16 sid, static int __set_item(struct pppoe_net *pn, struct pppox_sock *po) { int hash = hash_item(po->pppoe_pa.sid, po->pppoe_pa.remote); - struct pppox_sock *ret; + struct pppox_sock *ret, *first; - ret = pn->hash_table[hash]; + first = rcu_dereference_protected(pn->hash_table[hash], lockdep_is_held(&pn->hash_lock)); + ret = first; while (ret) { if (cmp_2_addr(&ret->pppoe_pa, &po->pppoe_pa) && ret->pppoe_ifindex == po->pppoe_ifindex) return -EALREADY; - ret = ret->next; + ret = rcu_dereference_protected(ret->next, lockdep_is_held(&pn->hash_lock)); } - po->next = pn->hash_table[hash]; - pn->hash_table[hash] = po; + RCU_INIT_POINTER(po->next, first); + rcu_assign_pointer(pn->hash_table[hash], po); return 0; } @@ -198,20 +199,24 @@ static void __delete_item(struct pppoe_net *pn, __be16 sid, char *addr, int ifindex) { int hash = hash_item(sid, addr); - struct pppox_sock *ret, **src; + struct pppox_sock *ret, __rcu **src; - ret = pn->hash_table[hash]; + ret = rcu_dereference_protected(pn->hash_table[hash], lockdep_is_held(&pn->hash_lock)); src = &pn->hash_table[hash]; while (ret) { if (cmp_addr(&ret->pppoe_pa, sid, addr) && ret->pppoe_ifindex == ifindex) { - *src = ret->next; + struct pppox_sock *next; + + next = rcu_dereference_protected(ret->next, + lockdep_is_held(&pn->hash_lock)); + rcu_assign_pointer(*src, next); break; } src = &ret->next; - ret = ret->next; + ret = rcu_dereference_protected(ret->next, lockdep_is_held(&pn->hash_lock)); } } @@ -225,17 +230,15 @@ static inline struct pppox_sock *get_item(struct pppoe_net *pn, __be16 sid, { struct pppox_sock *po; - read_lock_bh(&pn->hash_lock); po = __get_item(pn, sid, addr, ifindex); - if (po) - sock_hold(sk_pppox(po)); - read_unlock_bh(&pn->hash_lock); + if (po && !refcount_inc_not_zero(&sk_pppox(po)->sk_refcnt)) + po = NULL; return po; } -static inline struct pppox_sock *get_item_by_addr(struct net *net, - struct sockaddr_pppox *sp) +static inline struct pppox_sock *__get_item_by_addr(struct net *net, + struct sockaddr_pppox *sp) { struct net_device *dev; struct pppoe_net *pn; @@ -243,24 +246,22 @@ static inline struct pppox_sock *get_item_by_addr(struct net *net, int ifindex; - rcu_read_lock(); dev = dev_get_by_name_rcu(net, sp->sa_addr.pppoe.dev); if (dev) { ifindex = dev->ifindex; pn = pppoe_pernet(net); - pppox_sock = get_item(pn, sp->sa_addr.pppoe.sid, - sp->sa_addr.pppoe.remote, ifindex); + pppox_sock = __get_item(pn, sp->sa_addr.pppoe.sid, + sp->sa_addr.pppoe.remote, ifindex); } - rcu_read_unlock(); return pppox_sock; } static inline void delete_item(struct pppoe_net *pn, __be16 sid, char *addr, int ifindex) { - write_lock_bh(&pn->hash_lock); + spin_lock(&pn->hash_lock); __delete_item(pn, sid, addr, ifindex); - write_unlock_bh(&pn->hash_lock); + spin_unlock(&pn->hash_lock); } /*************************************************************************** @@ -276,14 +277,16 @@ static void pppoe_flush_dev(struct net_device *dev) int i; pn = pppoe_pernet(dev_net(dev)); - write_lock_bh(&pn->hash_lock); + spin_lock(&pn->hash_lock); for (i = 0; i < PPPOE_HASH_SIZE; i++) { - struct pppox_sock *po = pn->hash_table[i]; + struct pppox_sock *po = rcu_dereference_protected(pn->hash_table[i], + lockdep_is_held(&pn->hash_lock)); struct sock *sk; while (po) { while (po && po->pppoe_dev != dev) { - po = po->next; + po = rcu_dereference_protected(po->next, + lockdep_is_held(&pn->hash_lock)); } if (!po) @@ -300,7 +303,7 @@ static void pppoe_flush_dev(struct net_device *dev) */ sock_hold(sk); - write_unlock_bh(&pn->hash_lock); + spin_unlock(&pn->hash_lock); lock_sock(sk); if (po->pppoe_dev == dev && @@ -320,11 +323,12 @@ static void pppoe_flush_dev(struct net_device *dev) */ BUG_ON(pppoe_pernet(dev_net(dev)) == NULL); - write_lock_bh(&pn->hash_lock); - po = pn->hash_table[i]; + spin_lock(&pn->hash_lock); + po = rcu_dereference_protected(pn->hash_table[i], + lockdep_is_held(&pn->hash_lock)); } } - write_unlock_bh(&pn->hash_lock); + spin_unlock(&pn->hash_lock); } static int pppoe_device_event(struct notifier_block *this, @@ -375,18 +379,16 @@ static int pppoe_rcv_core(struct sock *sk, struct sk_buff *skb) if (sk->sk_state & PPPOX_BOUND) { ppp_input(&po->chan, skb); } else if (sk->sk_state & PPPOX_RELAY) { - relay_po = get_item_by_addr(sock_net(sk), - &po->pppoe_relay); + relay_po = __get_item_by_addr(sock_net(sk), + &po->pppoe_relay); if (relay_po == NULL) goto abort_kfree; if ((sk_pppox(relay_po)->sk_state & PPPOX_CONNECTED) == 0) - goto abort_put; + goto abort_kfree; if (!__pppoe_xmit(sk_pppox(relay_po), skb)) - goto abort_put; - - sock_put(sk_pppox(relay_po)); + goto abort_kfree; } else { if (sock_queue_rcv_skb(sk, skb)) goto abort_kfree; @@ -394,9 +396,6 @@ static int pppoe_rcv_core(struct sock *sk, struct sk_buff *skb) return NET_RX_SUCCESS; -abort_put: - sock_put(sk_pppox(relay_po)); - abort_kfree: kfree_skb(skb); return NET_RX_DROP; @@ -441,14 +440,11 @@ static int pppoe_rcv(struct sk_buff *skb, struct net_device *dev, ph = pppoe_hdr(skb); pn = pppoe_pernet(dev_net(dev)); - /* Note that get_item does a sock_hold(), so sk_pppox(po) - * is known to be safe. - */ - po = get_item(pn, ph->sid, eth_hdr(skb)->h_source, dev->ifindex); + po = __get_item(pn, ph->sid, eth_hdr(skb)->h_source, dev->ifindex); if (!po) goto drop; - return sk_receive_skb(sk_pppox(po), skb, 0); + return __sk_receive_skb(sk_pppox(po), skb, 0, 1, false); drop: kfree_skb(skb); @@ -528,6 +524,11 @@ static struct proto pppoe_sk_proto __read_mostly = { .obj_size = sizeof(struct pppox_sock), }; +static void pppoe_destruct(struct sock *sk) +{ + skb_queue_purge(&sk->sk_receive_queue); +} + /*********************************************************************** * * Initialize a new struct sock. @@ -542,11 +543,13 @@ static int pppoe_create(struct net *net, struct socket *sock, int kern) return -ENOMEM; sock_init_data(sock, sk); + sock_set_flag(sk, SOCK_RCU_FREE); sock->state = SS_UNCONNECTED; sock->ops = &pppoe_ops; sk->sk_backlog_rcv = pppoe_rcv_core; + sk->sk_destruct = pppoe_destruct; sk->sk_state = PPPOX_NONE; sk->sk_type = SOCK_STREAM; sk->sk_family = PF_PPPOX; @@ -599,7 +602,6 @@ static int pppoe_release(struct socket *sock) sock_orphan(sk); sock->sk = NULL; - skb_queue_purge(&sk->sk_receive_queue); release_sock(sk); sock_put(sk); @@ -681,9 +683,9 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr, &sp->sa_addr.pppoe, sizeof(struct pppoe_addr)); - write_lock_bh(&pn->hash_lock); + spin_lock(&pn->hash_lock); error = __set_item(pn, po); - write_unlock_bh(&pn->hash_lock); + spin_unlock(&pn->hash_lock); if (error < 0) goto err_put; @@ -808,11 +810,12 @@ static int pppoe_ioctl(struct socket *sock, unsigned int cmd, /* Check that the socket referenced by the address actually exists. */ - relay_po = get_item_by_addr(sock_net(sk), &po->pppoe_relay); + rcu_read_lock(); + relay_po = __get_item_by_addr(sock_net(sk), &po->pppoe_relay); + rcu_read_unlock(); if (!relay_po) break; - sock_put(sk_pppox(relay_po)); sk->sk_state |= PPPOX_RELAY; err = 0; break; @@ -1052,11 +1055,11 @@ static inline struct pppox_sock *pppoe_get_idx(struct pppoe_net *pn, loff_t pos) int i; for (i = 0; i < PPPOE_HASH_SIZE; i++) { - po = pn->hash_table[i]; + po = rcu_dereference(pn->hash_table[i]); while (po) { if (!pos--) goto out; - po = po->next; + po = rcu_dereference(po->next); } } @@ -1065,19 +1068,19 @@ out: } static void *pppoe_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(pn->hash_lock) + __acquires(RCU) { struct pppoe_net *pn = pppoe_pernet(seq_file_net(seq)); loff_t l = *pos; - read_lock_bh(&pn->hash_lock); + rcu_read_lock(); return l ? pppoe_get_idx(pn, --l) : SEQ_START_TOKEN; } static void *pppoe_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct pppoe_net *pn = pppoe_pernet(seq_file_net(seq)); - struct pppox_sock *po; + struct pppox_sock *po, *next; ++*pos; if (v == SEQ_START_TOKEN) { @@ -1085,14 +1088,15 @@ static void *pppoe_seq_next(struct seq_file *seq, void *v, loff_t *pos) goto out; } po = v; - if (po->next) - po = po->next; + next = rcu_dereference(po->next); + if (next) + po = next; else { int hash = hash_item(po->pppoe_pa.sid, po->pppoe_pa.remote); po = NULL; while (++hash < PPPOE_HASH_SIZE) { - po = pn->hash_table[hash]; + po = rcu_dereference(pn->hash_table[hash]); if (po) break; } @@ -1103,10 +1107,9 @@ out: } static void pppoe_seq_stop(struct seq_file *seq, void *v) - __releases(pn->hash_lock) + __releases(RCU) { - struct pppoe_net *pn = pppoe_pernet(seq_file_net(seq)); - read_unlock_bh(&pn->hash_lock); + rcu_read_unlock(); } static const struct seq_operations pppoe_seq_ops = { @@ -1149,7 +1152,7 @@ static __net_init int pppoe_init_net(struct net *net) struct pppoe_net *pn = pppoe_pernet(net); struct proc_dir_entry *pde; - rwlock_init(&pn->hash_lock); + spin_lock_init(&pn->hash_lock); pde = proc_create_net("pppoe", 0444, net->proc_net, &pppoe_seq_ops, sizeof(struct seq_net_private)); diff --git a/drivers/net/pse-pd/Kconfig b/drivers/net/pse-pd/Kconfig index 7fab916a7f46..7ef29657ee5d 100644 --- a/drivers/net/pse-pd/Kconfig +++ b/drivers/net/pse-pd/Kconfig @@ -32,6 +32,17 @@ config PSE_PD692X0 To compile this driver as a module, choose M here: the module will be called pd692x0. +config PSE_SI3474 + tristate "Si3474 PSE controller" + depends on I2C + help + This module provides support for Si3474 regulator based Ethernet + Power Sourcing Equipment. + Only 4-pair PSE configurations are supported. + + To compile this driver as a module, choose M here: the + module will be called si3474. + config PSE_TPS23881 tristate "TPS23881 PSE controller" depends on I2C diff --git a/drivers/net/pse-pd/Makefile b/drivers/net/pse-pd/Makefile index 9d2898b36737..cc78f7ea7f5f 100644 --- a/drivers/net/pse-pd/Makefile +++ b/drivers/net/pse-pd/Makefile @@ -5,4 +5,5 @@ obj-$(CONFIG_PSE_CONTROLLER) += pse_core.o obj-$(CONFIG_PSE_REGULATOR) += pse_regulator.o obj-$(CONFIG_PSE_PD692X0) += pd692x0.o +obj-$(CONFIG_PSE_SI3474) += si3474.o obj-$(CONFIG_PSE_TPS23881) += tps23881.o diff --git a/drivers/net/pse-pd/si3474.c b/drivers/net/pse-pd/si3474.c new file mode 100644 index 000000000000..aa07ffbce54d --- /dev/null +++ b/drivers/net/pse-pd/si3474.c @@ -0,0 +1,578 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Driver for the Skyworks Si3474 PoE PSE Controller + * + * Chip Architecture & Terminology: + * + * The Si3474 is a single-chip PoE PSE controller managing 8 physical power + * delivery channels. Internally, it's structured into two logical "Quads". + * + * Quad 0: Manages physical channels ('ports' in datasheet) 0, 1, 2, 3 + * Quad 1: Manages physical channels ('ports' in datasheet) 4, 5, 6, 7 + * + * Each Quad is accessed via a separate I2C address. The base address range is + * set by hardware pins A1-A4, and the specific address selects Quad 0 (usually + * the lower/even address) or Quad 1 (usually the higher/odd address). + * See datasheet Table 2.2 for the address mapping. + * + * While the Quads manage channel-specific operations, the Si3474 package has + * several resources shared across the entire chip: + * - Single RESETb input pin. + * - Single INTb output pin (signals interrupts from *either* Quad). + * - Single OSS input pin (Emergency Shutdown). + * - Global I2C Address (0x7F) used for firmware updates. + * - Global status monitoring (Temperature, VDD/VPWR Undervoltage Lockout). + * + * Driver Architecture: + * + * To handle the mix of per-Quad access and shared resources correctly, this + * driver treats the entire Si3474 package as one logical device. The driver + * instance associated with the primary I2C address (Quad 0) takes ownership. + * It discovers and manages the I2C client for the secondary address (Quad 1). + * This primary instance handles shared resources like IRQ management and + * registers a single PSE controller device representing all logical PIs. + * Internal functions route I2C commands to the appropriate Quad's i2c_client + * based on the target channel or PI. + * + * Terminology Mapping: + * + * - "PI" (Power Interface): Refers to the logical PSE port as defined by + * IEEE 802.3 (typically corresponds to an RJ45 connector). This is the + * `id` (0-7) used in the pse_controller_ops. + * - "Channel": Refers to one of the 8 physical power control paths within + * the Si3474 chip itself (hardware channels 0-7). This terminology is + * used internally within the driver to avoid confusion with 'ports'. + * - "Quad": One of the two internal 4-channel management units within the + * Si3474, each accessed via its own I2C address. + * + * Relationship: + * - A 2-Pair PoE PI uses 1 Channel. + * - A 4-Pair PoE PI uses 2 Channels. + * + * ASCII Schematic: + * + * +-----------------------------------------------------+ + * | Si3474 Chip | + * | | + * | +---------------------+ +---------------------+ | + * | | Quad 0 | | Quad 1 | | + * | | Channels 0, 1, 2, 3 | | Channels 4, 5, 6, 7 | | + * | +----------^----------+ +-------^-------------+ | + * | I2C Addr 0 | | I2C Addr 1 | + * | +------------------------+ | + * | (Primary Driver Instance) (Managed by Primary) | + * | | + * | Shared Resources (affect whole chip): | + * | - Single INTb Output -> Handled by Primary | + * | - Single RESETb Input | + * | - Single OSS Input -> Handled by Primary | + * | - Global I2C Addr (0x7F) for Firmware Update | + * | - Global Status (Temp, VDD/VPWR UVLO) | + * +-----------------------------------------------------+ + * | | | | | | | | + * Ch0 Ch1 Ch2 Ch3 Ch4 Ch5 Ch6 Ch7 (Physical Channels) + * + * Example Mapping (Logical PI to Physical Channel(s)): + * * 2-Pair Mode (8 PIs): + * PI 0 -> Ch 0 + * PI 1 -> Ch 1 + * ... + * PI 7 -> Ch 7 + * * 4-Pair Mode (4 PIs): + * PI 0 -> Ch 0 + Ch 1 (Managed via Quad 0 Addr) + * PI 1 -> Ch 2 + Ch 3 (Managed via Quad 0 Addr) + * PI 2 -> Ch 4 + Ch 5 (Managed via Quad 1 Addr) + * PI 3 -> Ch 6 + Ch 7 (Managed via Quad 1 Addr) + * (Note: Actual mapping depends on Device Tree and PORT_REMAP config) + */ + +#include <linux/i2c.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/pse-pd/pse.h> + +#define SI3474_MAX_CHANS 8 + +#define MANUFACTURER_ID 0x08 +#define IC_ID 0x05 +#define SI3474_DEVICE_ID (MANUFACTURER_ID << 3 | IC_ID) + +/* Misc registers */ +#define VENDOR_IC_ID_REG 0x1B +#define TEMPERATURE_REG 0x2C +#define FIRMWARE_REVISION_REG 0x41 +#define CHIP_REVISION_REG 0x43 + +/* Main status registers */ +#define POWER_STATUS_REG 0x10 +#define PORT_MODE_REG 0x12 +#define DETECT_CLASS_ENABLE_REG 0x14 + +/* PORTn Current */ +#define PORT1_CURRENT_LSB_REG 0x30 + +/* PORTn Current [mA], return in [nA] */ +/* 1000 * ((PORTn_CURRENT_MSB << 8) + PORTn_CURRENT_LSB) / 16384 */ +#define SI3474_NA_STEP (1000 * 1000 * 1000 / 16384) + +/* VPWR Voltage */ +#define VPWR_LSB_REG 0x2E +#define VPWR_MSB_REG 0x2F + +/* PORTn Voltage */ +#define PORT1_VOLTAGE_LSB_REG 0x32 + +/* VPWR Voltage [V], return in [uV] */ +/* 60 * (( VPWR_MSB << 8) + VPWR_LSB) / 16384 */ +#define SI3474_UV_STEP (1000 * 1000 * 60 / 16384) + +/* Helper macros */ +#define CHAN_IDX(chan) ((chan) % 4) +#define CHAN_BIT(chan) BIT(CHAN_IDX(chan)) +#define CHAN_UPPER_BIT(chan) BIT(CHAN_IDX(chan) + 4) + +#define CHAN_MASK(chan) (0x03U << (2 * CHAN_IDX(chan))) +#define CHAN_REG(base, chan) ((base) + (CHAN_IDX(chan) * 4)) + +struct si3474_pi_desc { + u8 chan[2]; + bool is_4p; +}; + +struct si3474_priv { + struct i2c_client *client[2]; + struct pse_controller_dev pcdev; + struct device_node *np; + struct si3474_pi_desc pi[SI3474_MAX_CHANS]; +}; + +static struct si3474_priv *to_si3474_priv(struct pse_controller_dev *pcdev) +{ + return container_of(pcdev, struct si3474_priv, pcdev); +} + +static void si3474_get_channels(struct si3474_priv *priv, int id, + u8 *chan0, u8 *chan1) +{ + *chan0 = priv->pi[id].chan[0]; + *chan1 = priv->pi[id].chan[1]; +} + +static struct i2c_client *si3474_get_chan_client(struct si3474_priv *priv, + u8 chan) +{ + return (chan < 4) ? priv->client[0] : priv->client[1]; +} + +static int si3474_pi_get_admin_state(struct pse_controller_dev *pcdev, int id, + struct pse_admin_state *admin_state) +{ + struct si3474_priv *priv = to_si3474_priv(pcdev); + struct i2c_client *client; + bool is_enabled; + u8 chan0, chan1; + s32 ret; + + si3474_get_channels(priv, id, &chan0, &chan1); + client = si3474_get_chan_client(priv, chan0); + + ret = i2c_smbus_read_byte_data(client, PORT_MODE_REG); + if (ret < 0) { + admin_state->c33_admin_state = + ETHTOOL_C33_PSE_ADMIN_STATE_UNKNOWN; + return ret; + } + + is_enabled = ret & (CHAN_MASK(chan0) | CHAN_MASK(chan1)); + + if (is_enabled) + admin_state->c33_admin_state = + ETHTOOL_C33_PSE_ADMIN_STATE_ENABLED; + else + admin_state->c33_admin_state = + ETHTOOL_C33_PSE_ADMIN_STATE_DISABLED; + + return 0; +} + +static int si3474_pi_get_pw_status(struct pse_controller_dev *pcdev, int id, + struct pse_pw_status *pw_status) +{ + struct si3474_priv *priv = to_si3474_priv(pcdev); + struct i2c_client *client; + bool delivering; + u8 chan0, chan1; + s32 ret; + + si3474_get_channels(priv, id, &chan0, &chan1); + client = si3474_get_chan_client(priv, chan0); + + ret = i2c_smbus_read_byte_data(client, POWER_STATUS_REG); + if (ret < 0) { + pw_status->c33_pw_status = ETHTOOL_C33_PSE_PW_D_STATUS_UNKNOWN; + return ret; + } + + delivering = ret & (CHAN_UPPER_BIT(chan0) | CHAN_UPPER_BIT(chan1)); + + if (delivering) + pw_status->c33_pw_status = + ETHTOOL_C33_PSE_PW_D_STATUS_DELIVERING; + else + pw_status->c33_pw_status = ETHTOOL_C33_PSE_PW_D_STATUS_DISABLED; + + return 0; +} + +static int si3474_get_of_channels(struct si3474_priv *priv) +{ + struct pse_pi *pi; + u32 chan_id; + u8 pi_no; + s32 ret; + + for (pi_no = 0; pi_no < SI3474_MAX_CHANS; pi_no++) { + pi = &priv->pcdev.pi[pi_no]; + bool pairset_found = false; + u8 pairset_no; + + for (pairset_no = 0; pairset_no < 2; pairset_no++) { + if (!pi->pairset[pairset_no].np) + continue; + + pairset_found = true; + + ret = of_property_read_u32(pi->pairset[pairset_no].np, + "reg", &chan_id); + if (ret) { + dev_err(&priv->client[0]->dev, + "Failed to read channel reg property\n"); + return ret; + } + if (chan_id > SI3474_MAX_CHANS) { + dev_err(&priv->client[0]->dev, + "Incorrect channel number: %d\n", chan_id); + return -EINVAL; + } + + priv->pi[pi_no].chan[pairset_no] = chan_id; + /* Mark as 4-pair if second pairset is present */ + priv->pi[pi_no].is_4p = (pairset_no == 1); + } + + if (pairset_found && !priv->pi[pi_no].is_4p) { + dev_err(&priv->client[0]->dev, + "Second pairset is missing for PI %pOF, only 4p configs are supported\n", + pi->np); + return -EINVAL; + } + } + + return 0; +} + +static int si3474_setup_pi_matrix(struct pse_controller_dev *pcdev) +{ + struct si3474_priv *priv = to_si3474_priv(pcdev); + s32 ret; + + ret = si3474_get_of_channels(priv); + if (ret < 0) + dev_warn(&priv->client[0]->dev, + "Unable to parse DT PSE power interface matrix\n"); + + return ret; +} + +static int si3474_pi_enable(struct pse_controller_dev *pcdev, int id) +{ + struct si3474_priv *priv = to_si3474_priv(pcdev); + struct i2c_client *client; + u8 chan0, chan1; + s32 ret; + u8 val; + + si3474_get_channels(priv, id, &chan0, &chan1); + client = si3474_get_chan_client(priv, chan0); + + /* Release PI from shutdown */ + ret = i2c_smbus_read_byte_data(client, PORT_MODE_REG); + if (ret < 0) + return ret; + + val = (u8)ret; + val |= CHAN_MASK(chan0); + val |= CHAN_MASK(chan1); + + ret = i2c_smbus_write_byte_data(client, PORT_MODE_REG, val); + if (ret) + return ret; + + /* DETECT_CLASS_ENABLE must be set when using AUTO mode, + * otherwise PI does not power up - datasheet section 2.10.2 + */ + val = CHAN_BIT(chan0) | CHAN_UPPER_BIT(chan0) | + CHAN_BIT(chan1) | CHAN_UPPER_BIT(chan1); + + ret = i2c_smbus_write_byte_data(client, DETECT_CLASS_ENABLE_REG, val); + if (ret) + return ret; + + return 0; +} + +static int si3474_pi_disable(struct pse_controller_dev *pcdev, int id) +{ + struct si3474_priv *priv = to_si3474_priv(pcdev); + struct i2c_client *client; + u8 chan0, chan1; + s32 ret; + u8 val; + + si3474_get_channels(priv, id, &chan0, &chan1); + client = si3474_get_chan_client(priv, chan0); + + /* Set PI in shutdown mode */ + ret = i2c_smbus_read_byte_data(client, PORT_MODE_REG); + if (ret < 0) + return ret; + + val = (u8)ret; + val &= ~CHAN_MASK(chan0); + val &= ~CHAN_MASK(chan1); + + ret = i2c_smbus_write_byte_data(client, PORT_MODE_REG, val); + if (ret) + return ret; + + return 0; +} + +static int si3474_pi_get_chan_current(struct si3474_priv *priv, u8 chan) +{ + struct i2c_client *client; + u64 tmp_64; + s32 ret; + u8 reg; + + client = si3474_get_chan_client(priv, chan); + + /* Registers 0x30 to 0x3d */ + reg = CHAN_REG(PORT1_CURRENT_LSB_REG, chan); + + ret = i2c_smbus_read_word_data(client, reg); + if (ret < 0) + return ret; + + tmp_64 = ret * SI3474_NA_STEP; + + /* uA = nA / 1000 */ + tmp_64 = DIV_ROUND_CLOSEST_ULL(tmp_64, 1000); + return (int)tmp_64; +} + +static int si3474_pi_get_chan_voltage(struct si3474_priv *priv, u8 chan) +{ + struct i2c_client *client; + s32 ret; + u32 val; + u8 reg; + + client = si3474_get_chan_client(priv, chan); + + /* Registers 0x32 to 0x3f */ + reg = CHAN_REG(PORT1_VOLTAGE_LSB_REG, chan); + + ret = i2c_smbus_read_word_data(client, reg); + if (ret < 0) + return ret; + + val = ret * SI3474_UV_STEP; + + return (int)val; +} + +static int si3474_pi_get_voltage(struct pse_controller_dev *pcdev, int id) +{ + struct si3474_priv *priv = to_si3474_priv(pcdev); + struct i2c_client *client; + u8 chan0, chan1; + s32 ret; + + si3474_get_channels(priv, id, &chan0, &chan1); + client = si3474_get_chan_client(priv, chan0); + + /* Check which channels are enabled*/ + ret = i2c_smbus_read_byte_data(client, POWER_STATUS_REG); + if (ret < 0) + return ret; + + /* Take voltage from the first enabled channel */ + if (ret & CHAN_BIT(chan0)) + ret = si3474_pi_get_chan_voltage(priv, chan0); + else if (ret & CHAN_BIT(chan1)) + ret = si3474_pi_get_chan_voltage(priv, chan1); + else + /* 'should' be no voltage in this case */ + return 0; + + return ret; +} + +static int si3474_pi_get_actual_pw(struct pse_controller_dev *pcdev, int id) +{ + struct si3474_priv *priv = to_si3474_priv(pcdev); + u8 chan0, chan1; + u32 uV, uA; + u64 tmp_64; + s32 ret; + + ret = si3474_pi_get_voltage(&priv->pcdev, id); + + /* Do not read currents if voltage is 0 */ + if (ret <= 0) + return ret; + uV = ret; + + si3474_get_channels(priv, id, &chan0, &chan1); + + ret = si3474_pi_get_chan_current(priv, chan0); + if (ret < 0) + return ret; + uA = ret; + + ret = si3474_pi_get_chan_current(priv, chan1); + if (ret < 0) + return ret; + uA += ret; + + tmp_64 = uV; + tmp_64 *= uA; + /* mW = uV * uA / 1000000000 */ + return DIV_ROUND_CLOSEST_ULL(tmp_64, 1000000000); +} + +static const struct pse_controller_ops si3474_ops = { + .setup_pi_matrix = si3474_setup_pi_matrix, + .pi_enable = si3474_pi_enable, + .pi_disable = si3474_pi_disable, + .pi_get_actual_pw = si3474_pi_get_actual_pw, + .pi_get_voltage = si3474_pi_get_voltage, + .pi_get_admin_state = si3474_pi_get_admin_state, + .pi_get_pw_status = si3474_pi_get_pw_status, +}; + +static void si3474_ancillary_i2c_remove(void *data) +{ + struct i2c_client *client = data; + + i2c_unregister_device(client); +} + +static int si3474_i2c_probe(struct i2c_client *client) +{ + struct device *dev = &client->dev; + struct si3474_priv *priv; + u8 fw_version; + s32 ret; + + if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) { + dev_err(dev, "i2c check functionality failed\n"); + return -ENXIO; + } + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + ret = i2c_smbus_read_byte_data(client, VENDOR_IC_ID_REG); + if (ret < 0) + return ret; + + if (ret != SI3474_DEVICE_ID) { + dev_err(dev, "Wrong device ID: 0x%x\n", ret); + return -ENXIO; + } + + ret = i2c_smbus_read_byte_data(client, FIRMWARE_REVISION_REG); + if (ret < 0) + return ret; + fw_version = ret; + + ret = i2c_smbus_read_byte_data(client, CHIP_REVISION_REG); + if (ret < 0) + return ret; + + dev_dbg(dev, "Chip revision: 0x%x, firmware version: 0x%x\n", + ret, fw_version); + + priv->client[0] = client; + i2c_set_clientdata(client, priv); + + priv->client[1] = i2c_new_ancillary_device(priv->client[0], "secondary", + priv->client[0]->addr + 1); + if (IS_ERR(priv->client[1])) + return PTR_ERR(priv->client[1]); + + ret = devm_add_action_or_reset(dev, si3474_ancillary_i2c_remove, priv->client[1]); + if (ret < 0) { + dev_err(&priv->client[1]->dev, "Cannot register remove callback\n"); + return ret; + } + + ret = i2c_smbus_read_byte_data(priv->client[1], VENDOR_IC_ID_REG); + if (ret < 0) { + dev_err(&priv->client[1]->dev, "Cannot access secondary PSE controller\n"); + return ret; + } + + if (ret != SI3474_DEVICE_ID) { + dev_err(&priv->client[1]->dev, + "Wrong device ID for secondary PSE controller: 0x%x\n", ret); + return -ENXIO; + } + + priv->np = dev->of_node; + priv->pcdev.owner = THIS_MODULE; + priv->pcdev.ops = &si3474_ops; + priv->pcdev.dev = dev; + priv->pcdev.types = ETHTOOL_PSE_C33; + priv->pcdev.nr_lines = SI3474_MAX_CHANS; + + ret = devm_pse_controller_register(dev, &priv->pcdev); + if (ret) { + dev_err(dev, "Failed to register PSE controller: 0x%x\n", ret); + return ret; + } + + return 0; +} + +static const struct i2c_device_id si3474_id[] = { + { "si3474" }, + {} +}; +MODULE_DEVICE_TABLE(i2c, si3474_id); + +static const struct of_device_id si3474_of_match[] = { + { + .compatible = "skyworks,si3474", + }, + {}, +}; +MODULE_DEVICE_TABLE(of, si3474_of_match); + +static struct i2c_driver si3474_driver = { + .probe = si3474_i2c_probe, + .id_table = si3474_id, + .driver = { + .name = "si3474", + .of_match_table = si3474_of_match, + }, +}; +module_i2c_driver(si3474_driver); + +MODULE_AUTHOR("Piotr Kubik <piotr.kubik@adtran.com>"); +MODULE_DESCRIPTION("Skyworks Si3474 PoE PSE Controller driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/tun.c b/drivers/net/tun.c index cc6c50180663..86a9e927d0ff 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -2823,13 +2823,13 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) if (netif_running(tun->dev)) netif_tx_wake_all_queues(tun->dev); - strcpy(ifr->ifr_name, tun->dev->name); + strscpy(ifr->ifr_name, tun->dev->name); return 0; } static void tun_get_iff(struct tun_struct *tun, struct ifreq *ifr) { - strcpy(ifr->ifr_name, tun->dev->name); + strscpy(ifr->ifr_name, tun->dev->name); ifr->ifr_flags = tun_flags(tun); diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig index 0a678e31cfaa..856e648d804e 100644 --- a/drivers/net/usb/Kconfig +++ b/drivers/net/usb/Kconfig @@ -116,6 +116,7 @@ config USB_LAN78XX select PHYLINK select MICROCHIP_PHY select CRC32 + imply NET_SELFTESTS help This option adds support for Microchip LAN78XX based USB 2 & USB 3 10/100/1000 Ethernet adapters. diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 1ff25f57329a..b56e2459ee3c 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -20,6 +20,7 @@ #include <linux/mdio.h> #include <linux/phy.h> #include <net/ip6_checksum.h> +#include <net/selftests.h> #include <net/vxlan.h> #include <linux/interrupt.h> #include <linux/irqdomain.h> @@ -1702,12 +1703,16 @@ static void lan78xx_get_strings(struct net_device *netdev, u32 stringset, { if (stringset == ETH_SS_STATS) memcpy(data, lan78xx_gstrings, sizeof(lan78xx_gstrings)); + else if (stringset == ETH_SS_TEST) + net_selftest_get_strings(data); } static int lan78xx_get_sset_count(struct net_device *netdev, int sset) { if (sset == ETH_SS_STATS) return ARRAY_SIZE(lan78xx_gstrings); + else if (sset == ETH_SS_TEST) + return net_selftest_get_count(); else return -EOPNOTSUPP; } @@ -1894,6 +1899,7 @@ static const struct ethtool_ops lan78xx_ethtool_ops = { .set_eeprom = lan78xx_ethtool_set_eeprom, .get_ethtool_stats = lan78xx_get_stats, .get_sset_count = lan78xx_get_sset_count, + .self_test = net_selftest, .get_strings = lan78xx_get_strings, .get_wol = lan78xx_get_wol, .set_wol = lan78xx_set_wol, diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 3ccd649913b5..571847a7f86d 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -26,6 +26,7 @@ #include <linux/inetdevice.h> #include <net/arp.h> +#include <net/flow.h> #include <net/ip.h> #include <net/ip_fib.h> #include <net/ip6_fib.h> @@ -38,7 +39,6 @@ #include <net/sch_generic.h> #include <net/netns/generic.h> #include <net/netfilter/nf_conntrack.h> -#include <net/inet_dscp.h> #define DRV_NAME "vrf" #define DRV_VERSION "1.1" @@ -505,7 +505,7 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb, /* needed to match OIF rule */ fl4.flowi4_l3mdev = vrf_dev->ifindex; fl4.flowi4_iif = LOOPBACK_IFINDEX; - fl4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(ip4h)); + fl4.flowi4_dscp = ip4h_dscp(ip4h); fl4.flowi4_flags = FLOWI_FLAG_ANYSRC; fl4.flowi4_proto = ip4h->protocol; fl4.daddr = ip4h->daddr; diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c index 4a82f8e4c118..36488aa6cc20 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c @@ -664,8 +664,8 @@ int mt7915_mmio_wed_init(struct mt7915_dev *dev, void *pdev_ptr, MT_RXQ_WED_RING_BASE; wed->wlan.wpdma_rx_glo = pci_resource_start(pci_dev, 0) + MT_WPDMA_GLO_CFG; - wed->wlan.wpdma_rx = pci_resource_start(pci_dev, 0) + - MT_RXQ_WED_DATA_RING_BASE; + wed->wlan.wpdma_rx[0] = pci_resource_start(pci_dev, 0) + + MT_RXQ_WED_DATA_RING_BASE; } else { struct platform_device *plat_dev = pdev_ptr; struct resource *res; @@ -687,7 +687,7 @@ int mt7915_mmio_wed_init(struct mt7915_dev *dev, void *pdev_ptr, wed->wlan.wpdma_tx = res->start + MT_TXQ_WED_RING_BASE; wed->wlan.wpdma_txfree = res->start + MT_RXQ_WED_RING_BASE; wed->wlan.wpdma_rx_glo = res->start + MT_WPDMA_GLO_CFG; - wed->wlan.wpdma_rx = res->start + MT_RXQ_WED_DATA_RING_BASE; + wed->wlan.wpdma_rx[0] = res->start + MT_RXQ_WED_DATA_RING_BASE; } wed->wlan.nbuf = MT7915_HW_TOKEN_SIZE; wed->wlan.tx_tbit[0] = is_mt7915(&dev->mt76) ? 4 : 30; diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mmio.c b/drivers/net/wireless/mediatek/mt76/mt7996/mmio.c index 30b40f4a91be..fb2428a9b877 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mmio.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mmio.c @@ -503,9 +503,9 @@ int mt7996_mmio_wed_init(struct mt7996_dev *dev, void *pdev_ptr, } wed->wlan.wpdma_rx_glo = wed->wlan.phy_base + hif1_ofs + MT_WFDMA0_GLO_CFG; - wed->wlan.wpdma_rx = wed->wlan.phy_base + hif1_ofs + - MT_RXQ_RING_BASE(MT7996_RXQ_BAND0) + - MT7996_RXQ_BAND0 * MT_RING_SIZE; + wed->wlan.wpdma_rx[0] = wed->wlan.phy_base + hif1_ofs + + MT_RXQ_RING_BASE(MT7996_RXQ_BAND0) + + MT7996_RXQ_BAND0 * MT_RING_SIZE; wed->wlan.id = MT7996_DEVICE_ID_2; wed->wlan.tx_tbit[0] = ffs(MT_INT_TX_DONE_BAND2) - 1; @@ -518,9 +518,9 @@ int mt7996_mmio_wed_init(struct mt7996_dev *dev, void *pdev_ptr, wed->wlan.wpdma_rx_glo = wed->wlan.phy_base + MT_WFDMA0_GLO_CFG; - wed->wlan.wpdma_rx = wed->wlan.phy_base + - MT_RXQ_RING_BASE(MT7996_RXQ_BAND0) + - MT7996_RXQ_BAND0 * MT_RING_SIZE; + wed->wlan.wpdma_rx[0] = wed->wlan.phy_base + + MT_RXQ_RING_BASE(MT7996_RXQ_BAND0) + + MT7996_RXQ_BAND0 * MT_RING_SIZE; wed->wlan.wpdma_rx_rro[0] = wed->wlan.phy_base + MT_RXQ_RING_BASE(MT7996_RXQ_RRO_BAND0) + diff --git a/drivers/net/wwan/iosm/iosm_ipc_pcie.c b/drivers/net/wwan/iosm/iosm_ipc_pcie.c index a066977af0be..08ff0d6ccfab 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_pcie.c +++ b/drivers/net/wwan/iosm/iosm_ipc_pcie.c @@ -69,7 +69,7 @@ static int ipc_pcie_resources_request(struct iosm_pcie *ipc_pcie) { struct pci_dev *pci = ipc_pcie->pci; u32 cap = 0; - u32 ret; + int ret; /* Reserved PCI I/O and memory resources. * Mark all PCI regions associated with PCI device pci as diff --git a/drivers/nfc/pn533/pn533.c b/drivers/nfc/pn533/pn533.c index 14661249c690..2b043a9f9533 100644 --- a/drivers/nfc/pn533/pn533.c +++ b/drivers/nfc/pn533/pn533.c @@ -1412,11 +1412,9 @@ static int pn533_autopoll_complete(struct pn533 *dev, void *arg, if (dev->poll_mod_count != 0) return rc; goto stop_poll; - } else if (rc < 0) { - nfc_err(dev->dev, - "Error %d when running autopoll\n", rc); - goto stop_poll; } + nfc_err(dev->dev, "Error %d when running autopoll\n", rc); + goto stop_poll; } nbtg = resp->data[0]; @@ -1505,11 +1503,9 @@ static int pn533_poll_complete(struct pn533 *dev, void *arg, if (dev->poll_mod_count != 0) return rc; goto stop_poll; - } else if (rc < 0) { - nfc_err(dev->dev, - "Error %d when running poll\n", rc); - goto stop_poll; } + nfc_err(dev->dev, "Error %d when running poll\n", rc); + goto stop_poll; } cur_mod = dev->poll_mod_active[dev->poll_mod_curr]; diff --git a/drivers/nfc/s3fwrn5/Kconfig b/drivers/nfc/s3fwrn5/Kconfig index 8a6b1a79de25..96386b73fa2b 100644 --- a/drivers/nfc/s3fwrn5/Kconfig +++ b/drivers/nfc/s3fwrn5/Kconfig @@ -1,8 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only config NFC_S3FWRN5 tristate - select CRYPTO - select CRYPTO_HASH + select CRYPTO_LIB_SHA1 help Core driver for Samsung S3FWRN5 NFC chip. Contains core utilities of chip. It's intended to be used by PHYs to avoid duplicating lots diff --git a/drivers/nfc/s3fwrn5/firmware.c b/drivers/nfc/s3fwrn5/firmware.c index 781cdbcac104..64d61b2a715a 100644 --- a/drivers/nfc/s3fwrn5/firmware.c +++ b/drivers/nfc/s3fwrn5/firmware.c @@ -8,7 +8,6 @@ #include <linux/completion.h> #include <linux/firmware.h> -#include <crypto/hash.h> #include <crypto/sha1.h> #include "s3fwrn5.h" @@ -411,27 +410,13 @@ int s3fwrn5_fw_download(struct s3fwrn5_fw_info *fw_info) struct device *dev = &fw_info->ndev->nfc_dev->dev; struct s3fwrn5_fw_image *fw = &fw_info->fw; u8 hash_data[SHA1_DIGEST_SIZE]; - struct crypto_shash *tfm; u32 image_size, off; int ret; image_size = fw_info->sector_size * fw->image_sectors; /* Compute SHA of firmware data */ - - tfm = crypto_alloc_shash("sha1", 0, 0); - if (IS_ERR(tfm)) { - dev_err(dev, "Cannot allocate shash (code=%pe)\n", tfm); - return PTR_ERR(tfm); - } - - ret = crypto_shash_tfm_digest(tfm, fw->image, image_size, hash_data); - - crypto_free_shash(tfm); - if (ret) { - dev_err(dev, "Cannot compute hash (code=%d)\n", ret); - return ret; - } + sha1(fw->image, image_size, hash_data); /* Firmware update process */ diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig index 204278eb215e..9256bf2e8ad4 100644 --- a/drivers/ptp/Kconfig +++ b/drivers/ptp/Kconfig @@ -252,4 +252,15 @@ config PTP_S390 driver provides the raw clock value without the delta to userspace. That way userspace programs like chrony could steer the kernel clock. + +config PTP_NETC_V4_TIMER + tristate "NXP NETC V4 Timer PTP Driver" + depends on PTP_1588_CLOCK + depends on PCI_MSI + help + This driver adds support for using the NXP NETC V4 Timer as a PTP + clock, the clock is used by ENETC V4 or NETC V4 Switch for PTP time + synchronization. It also supports periodic output signal (e.g. PPS) + and external trigger timestamping. + endmenu diff --git a/drivers/ptp/Makefile b/drivers/ptp/Makefile index 25f846fe48c9..8985d723d29c 100644 --- a/drivers/ptp/Makefile +++ b/drivers/ptp/Makefile @@ -23,3 +23,4 @@ obj-$(CONFIG_PTP_1588_CLOCK_VMW) += ptp_vmw.o obj-$(CONFIG_PTP_1588_CLOCK_OCP) += ptp_ocp.o obj-$(CONFIG_PTP_DFL_TOD) += ptp_dfl_tod.o obj-$(CONFIG_PTP_S390) += ptp_s390.o +obj-$(CONFIG_PTP_NETC_V4_TIMER) += ptp_netc.o diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index 1cc06b7cb17e..5739a57958c7 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -11,6 +11,7 @@ #include <linux/module.h> #include <linux/posix-clock.h> #include <linux/pps_kernel.h> +#include <linux/property.h> #include <linux/slab.h> #include <linux/syscalls.h> #include <linux/uaccess.h> @@ -100,6 +101,9 @@ static int ptp_clock_settime(struct posix_clock *pc, const struct timespec64 *tp return -EBUSY; } + if (!timespec64_valid_settod(tp)) + return -EINVAL; + return ptp->info->settime64(ptp->info, tp); } @@ -130,7 +134,7 @@ static int ptp_clock_adjtime(struct posix_clock *pc, struct __kernel_timex *tx) ops = ptp->info; if (tx->modes & ADJ_SETOFFSET) { - struct timespec64 ts; + struct timespec64 ts, ts2; ktime_t kt; s64 delta; @@ -143,6 +147,14 @@ static int ptp_clock_adjtime(struct posix_clock *pc, struct __kernel_timex *tx) if ((unsigned long) ts.tv_nsec >= NSEC_PER_SEC) return -EINVAL; + /* Make sure the offset is valid */ + err = ptp_clock_gettime(pc, &ts2); + if (err) + return err; + ts2 = timespec64_add(ts2, ts); + if (!timespec64_valid_settod(&ts2)) + return -EINVAL; + kt = timespec64_to_ktime(ts); delta = ktime_to_ns(kt); err = ops->adjtime(ops, delta); @@ -477,6 +489,58 @@ int ptp_clock_index(struct ptp_clock *ptp) } EXPORT_SYMBOL(ptp_clock_index); +static int ptp_clock_of_node_match(struct device *dev, const void *data) +{ + const struct device_node *parent_np = data; + + return (dev->parent && dev_of_node(dev->parent) == parent_np); +} + +int ptp_clock_index_by_of_node(struct device_node *np) +{ + struct ptp_clock *ptp; + struct device *dev; + int phc_index; + + dev = class_find_device(&ptp_class, NULL, np, + ptp_clock_of_node_match); + if (!dev) + return -1; + + ptp = dev_get_drvdata(dev); + phc_index = ptp_clock_index(ptp); + put_device(dev); + + return phc_index; +} +EXPORT_SYMBOL_GPL(ptp_clock_index_by_of_node); + +static int ptp_clock_dev_match(struct device *dev, const void *data) +{ + const struct device *parent = data; + + return dev->parent == parent; +} + +int ptp_clock_index_by_dev(struct device *parent) +{ + struct ptp_clock *ptp; + struct device *dev; + int phc_index; + + dev = class_find_device(&ptp_class, NULL, parent, + ptp_clock_dev_match); + if (!dev) + return -1; + + ptp = dev_get_drvdata(dev); + phc_index = ptp_clock_index(ptp); + put_device(dev); + + return phc_index; +} +EXPORT_SYMBOL_GPL(ptp_clock_index_by_dev); + int ptp_find_pin(struct ptp_clock *ptp, enum ptp_pin_function func, unsigned int chan) { diff --git a/drivers/ptp/ptp_clockmatrix.c b/drivers/ptp/ptp_clockmatrix.c index b8d4df8c6da2..59cd6bbb33f3 100644 --- a/drivers/ptp/ptp_clockmatrix.c +++ b/drivers/ptp/ptp_clockmatrix.c @@ -1161,7 +1161,7 @@ static int set_pll_output_mask(struct idtcm *idtcm, u16 addr, u8 val) SET_U16_MSB(idtcm->channel[3].output_mask, val); break; default: - err = -EFAULT; /* Bad address */; + err = -EFAULT; /* Bad address */ break; } diff --git a/drivers/ptp/ptp_netc.c b/drivers/ptp/ptp_netc.c new file mode 100644 index 000000000000..8c5fea1f43fa --- /dev/null +++ b/drivers/ptp/ptp_netc.c @@ -0,0 +1,1017 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) +/* + * NXP NETC V4 Timer driver + * Copyright 2025 NXP + */ + +#include <linux/bitfield.h> +#include <linux/clk.h> +#include <linux/fsl/netc_global.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_platform.h> +#include <linux/pci.h> +#include <linux/ptp_clock_kernel.h> + +#define NETC_TMR_PCI_VENDOR_NXP 0x1131 + +#define NETC_TMR_CTRL 0x0080 +#define TMR_CTRL_CK_SEL GENMASK(1, 0) +#define TMR_CTRL_TE BIT(2) +#define TMR_ETEP(i) BIT(8 + (i)) +#define TMR_COMP_MODE BIT(15) +#define TMR_CTRL_TCLK_PERIOD GENMASK(25, 16) +#define TMR_CTRL_FS BIT(28) + +#define NETC_TMR_TEVENT 0x0084 +#define TMR_TEVNET_PPEN(i) BIT(7 - (i)) +#define TMR_TEVENT_PPEN_ALL GENMASK(7, 5) +#define TMR_TEVENT_ALMEN(i) BIT(16 + (i)) +#define TMR_TEVENT_ETS_THREN(i) BIT(20 + (i)) +#define TMR_TEVENT_ETSEN(i) BIT(24 + (i)) +#define TMR_TEVENT_ETS_OVEN(i) BIT(28 + (i)) +#define TMR_TEVENT_ETS(i) (TMR_TEVENT_ETS_THREN(i) | \ + TMR_TEVENT_ETSEN(i) | \ + TMR_TEVENT_ETS_OVEN(i)) + +#define NETC_TMR_TEMASK 0x0088 +#define NETC_TMR_STAT 0x0094 +#define TMR_STAT_ETS_VLD(i) BIT(24 + (i)) + +#define NETC_TMR_CNT_L 0x0098 +#define NETC_TMR_CNT_H 0x009c +#define NETC_TMR_ADD 0x00a0 +#define NETC_TMR_PRSC 0x00a8 +#define NETC_TMR_ECTRL 0x00ac +#define NETC_TMR_OFF_L 0x00b0 +#define NETC_TMR_OFF_H 0x00b4 + +/* i = 0, 1, i indicates the index of TMR_ALARM */ +#define NETC_TMR_ALARM_L(i) (0x00b8 + (i) * 8) +#define NETC_TMR_ALARM_H(i) (0x00bc + (i) * 8) + +/* i = 0, 1, 2. i indicates the index of TMR_FIPER. */ +#define NETC_TMR_FIPER(i) (0x00d0 + (i) * 4) + +#define NETC_TMR_FIPER_CTRL 0x00dc +#define FIPER_CTRL_DIS(i) (BIT(7) << (i) * 8) +#define FIPER_CTRL_PG(i) (BIT(6) << (i) * 8) +#define FIPER_CTRL_FS_ALARM(i) (BIT(5) << (i) * 8) +#define FIPER_CTRL_PW(i) (GENMASK(4, 0) << (i) * 8) +#define FIPER_CTRL_SET_PW(i, v) (((v) & GENMASK(4, 0)) << 8 * (i)) + +/* i = 0, 1, i indicates the index of TMR_ETTS */ +#define NETC_TMR_ETTS_L(i) (0x00e0 + (i) * 8) +#define NETC_TMR_ETTS_H(i) (0x00e4 + (i) * 8) +#define NETC_TMR_CUR_TIME_L 0x00f0 +#define NETC_TMR_CUR_TIME_H 0x00f4 + +#define NETC_TMR_REGS_BAR 0 +#define NETC_GLOBAL_OFFSET 0x10000 +#define NETC_GLOBAL_IPBRR0 0xbf8 +#define IPBRR0_IP_REV GENMASK(15, 0) +#define NETC_REV_4_1 0x0401 + +#define NETC_TMR_FIPER_NUM 3 +#define NETC_TMR_INVALID_CHANNEL NETC_TMR_FIPER_NUM +#define NETC_TMR_DEFAULT_PRSC 2 +#define NETC_TMR_DEFAULT_ALARM GENMASK_ULL(63, 0) +#define NETC_TMR_DEFAULT_FIPER GENMASK(31, 0) +#define NETC_TMR_FIPER_MAX_PW GENMASK(4, 0) +#define NETC_TMR_ALARM_NUM 2 +#define NETC_TMR_DEFAULT_ETTF_THR 7 + +/* 1588 timer reference clock source select */ +#define NETC_TMR_CCM_TIMER1 0 /* enet_timer1_clk_root, from CCM */ +#define NETC_TMR_SYSTEM_CLK 1 /* enet_clk_root/2, from CCM */ +#define NETC_TMR_EXT_OSC 2 /* tmr_1588_clk, from IO pins */ + +#define NETC_TMR_SYSCLK_333M 333333333U + +enum netc_pp_type { + NETC_PP_PPS = 1, + NETC_PP_PEROUT, +}; + +struct netc_pp { + enum netc_pp_type type; + bool enabled; + int alarm_id; + u32 period; /* pulse period, ns */ + u64 stime; /* start time, ns */ +}; + +struct netc_timer { + void __iomem *base; + struct pci_dev *pdev; + spinlock_t lock; /* Prevent concurrent access to registers */ + + struct ptp_clock *clock; + struct ptp_clock_info caps; + u32 clk_select; + u32 clk_freq; + u32 oclk_prsc; + /* High 32-bit is integer part, low 32-bit is fractional part */ + u64 period; + + int irq; + char irq_name[24]; + int revision; + u32 tmr_emask; + u8 pps_channel; + u8 fs_alarm_num; + u8 fs_alarm_bitmap; + struct netc_pp pp[NETC_TMR_FIPER_NUM]; /* periodic pulse */ +}; + +#define netc_timer_rd(p, o) netc_read((p)->base + (o)) +#define netc_timer_wr(p, o, v) netc_write((p)->base + (o), v) +#define ptp_to_netc_timer(ptp) container_of((ptp), struct netc_timer, caps) + +static const char *const timer_clk_src[] = { + "ccm", + "ext" +}; + +static void netc_timer_cnt_write(struct netc_timer *priv, u64 ns) +{ + u32 tmr_cnt_h = upper_32_bits(ns); + u32 tmr_cnt_l = lower_32_bits(ns); + + /* Writes to the TMR_CNT_L register copies the written value + * into the shadow TMR_CNT_L register. Writes to the TMR_CNT_H + * register copies the values written into the shadow TMR_CNT_H + * register. Contents of the shadow registers are copied into + * the TMR_CNT_L and TMR_CNT_H registers following a write into + * the TMR_CNT_H register. So the user must writes to TMR_CNT_L + * register first. Other H/L registers should have the same + * behavior. + */ + netc_timer_wr(priv, NETC_TMR_CNT_L, tmr_cnt_l); + netc_timer_wr(priv, NETC_TMR_CNT_H, tmr_cnt_h); +} + +static u64 netc_timer_offset_read(struct netc_timer *priv) +{ + u32 tmr_off_l, tmr_off_h; + u64 offset; + + tmr_off_l = netc_timer_rd(priv, NETC_TMR_OFF_L); + tmr_off_h = netc_timer_rd(priv, NETC_TMR_OFF_H); + offset = (((u64)tmr_off_h) << 32) | tmr_off_l; + + return offset; +} + +static void netc_timer_offset_write(struct netc_timer *priv, u64 offset) +{ + u32 tmr_off_h = upper_32_bits(offset); + u32 tmr_off_l = lower_32_bits(offset); + + netc_timer_wr(priv, NETC_TMR_OFF_L, tmr_off_l); + netc_timer_wr(priv, NETC_TMR_OFF_H, tmr_off_h); +} + +static u64 netc_timer_cur_time_read(struct netc_timer *priv) +{ + u32 time_h, time_l; + u64 ns; + + /* The user should read NETC_TMR_CUR_TIME_L first to + * get correct current time. + */ + time_l = netc_timer_rd(priv, NETC_TMR_CUR_TIME_L); + time_h = netc_timer_rd(priv, NETC_TMR_CUR_TIME_H); + ns = (u64)time_h << 32 | time_l; + + return ns; +} + +static void netc_timer_alarm_write(struct netc_timer *priv, + u64 alarm, int index) +{ + u32 alarm_h = upper_32_bits(alarm); + u32 alarm_l = lower_32_bits(alarm); + + netc_timer_wr(priv, NETC_TMR_ALARM_L(index), alarm_l); + netc_timer_wr(priv, NETC_TMR_ALARM_H(index), alarm_h); +} + +static u32 netc_timer_get_integral_period(struct netc_timer *priv) +{ + u32 tmr_ctrl, integral_period; + + tmr_ctrl = netc_timer_rd(priv, NETC_TMR_CTRL); + integral_period = FIELD_GET(TMR_CTRL_TCLK_PERIOD, tmr_ctrl); + + return integral_period; +} + +static u32 netc_timer_calculate_fiper_pw(struct netc_timer *priv, + u32 fiper) +{ + u64 divisor, pulse_width; + + /* Set the FIPER pulse width to half FIPER interval by default. + * pulse_width = (fiper / 2) / TMR_GCLK_period, + * TMR_GCLK_period = NSEC_PER_SEC / TMR_GCLK_freq, + * TMR_GCLK_freq = (clk_freq / oclk_prsc) Hz, + * so pulse_width = fiper * clk_freq / (2 * NSEC_PER_SEC * oclk_prsc). + */ + divisor = mul_u32_u32(2 * NSEC_PER_SEC, priv->oclk_prsc); + pulse_width = div64_u64(mul_u32_u32(fiper, priv->clk_freq), divisor); + + /* The FIPER_PW field only has 5 bits, need to update oclk_prsc */ + if (pulse_width > NETC_TMR_FIPER_MAX_PW) + pulse_width = NETC_TMR_FIPER_MAX_PW; + + return pulse_width; +} + +static void netc_timer_set_pps_alarm(struct netc_timer *priv, int channel, + u32 integral_period) +{ + struct netc_pp *pp = &priv->pp[channel]; + u64 alarm; + + /* Get the alarm value */ + alarm = netc_timer_cur_time_read(priv) + NSEC_PER_MSEC; + alarm = roundup_u64(alarm, NSEC_PER_SEC); + alarm = roundup_u64(alarm, integral_period); + + netc_timer_alarm_write(priv, alarm, pp->alarm_id); +} + +static void netc_timer_set_perout_alarm(struct netc_timer *priv, int channel, + u32 integral_period) +{ + u64 cur_time = netc_timer_cur_time_read(priv); + struct netc_pp *pp = &priv->pp[channel]; + u64 alarm, delta, min_time; + u32 period = pp->period; + u64 stime = pp->stime; + + min_time = cur_time + NSEC_PER_MSEC + period; + if (stime < min_time) { + delta = min_time - stime; + stime += roundup_u64(delta, period); + } + + alarm = roundup_u64(stime - period, integral_period); + netc_timer_alarm_write(priv, alarm, pp->alarm_id); +} + +static int netc_timer_get_alarm_id(struct netc_timer *priv) +{ + int i; + + for (i = 0; i < priv->fs_alarm_num; i++) { + if (!(priv->fs_alarm_bitmap & BIT(i))) { + priv->fs_alarm_bitmap |= BIT(i); + break; + } + } + + return i; +} + +static u64 netc_timer_get_gclk_period(struct netc_timer *priv) +{ + /* TMR_GCLK_freq = (clk_freq / oclk_prsc) Hz. + * TMR_GCLK_period = NSEC_PER_SEC / TMR_GCLK_freq. + * TMR_GCLK_period = (NSEC_PER_SEC * oclk_prsc) / clk_freq + */ + + return div_u64(mul_u32_u32(NSEC_PER_SEC, priv->oclk_prsc), + priv->clk_freq); +} + +static void netc_timer_enable_periodic_pulse(struct netc_timer *priv, + u8 channel) +{ + u32 fiper_pw, fiper, fiper_ctrl, integral_period; + struct netc_pp *pp = &priv->pp[channel]; + int alarm_id = pp->alarm_id; + + integral_period = netc_timer_get_integral_period(priv); + /* Set to desired FIPER interval in ns - TCLK_PERIOD */ + fiper = pp->period - integral_period; + fiper_pw = netc_timer_calculate_fiper_pw(priv, fiper); + + fiper_ctrl = netc_timer_rd(priv, NETC_TMR_FIPER_CTRL); + fiper_ctrl &= ~(FIPER_CTRL_DIS(channel) | FIPER_CTRL_PW(channel) | + FIPER_CTRL_FS_ALARM(channel)); + fiper_ctrl |= FIPER_CTRL_SET_PW(channel, fiper_pw); + fiper_ctrl |= alarm_id ? FIPER_CTRL_FS_ALARM(channel) : 0; + + priv->tmr_emask |= TMR_TEVNET_PPEN(channel) | + TMR_TEVENT_ALMEN(alarm_id); + + if (pp->type == NETC_PP_PPS) + netc_timer_set_pps_alarm(priv, channel, integral_period); + else + netc_timer_set_perout_alarm(priv, channel, integral_period); + + netc_timer_wr(priv, NETC_TMR_TEMASK, priv->tmr_emask); + netc_timer_wr(priv, NETC_TMR_FIPER(channel), fiper); + netc_timer_wr(priv, NETC_TMR_FIPER_CTRL, fiper_ctrl); +} + +static void netc_timer_disable_periodic_pulse(struct netc_timer *priv, + u8 channel) +{ + struct netc_pp *pp = &priv->pp[channel]; + int alarm_id = pp->alarm_id; + u32 fiper_ctrl; + + if (!pp->enabled) + return; + + priv->tmr_emask &= ~(TMR_TEVNET_PPEN(channel) | + TMR_TEVENT_ALMEN(alarm_id)); + + fiper_ctrl = netc_timer_rd(priv, NETC_TMR_FIPER_CTRL); + fiper_ctrl |= FIPER_CTRL_DIS(channel); + + netc_timer_alarm_write(priv, NETC_TMR_DEFAULT_ALARM, alarm_id); + netc_timer_wr(priv, NETC_TMR_TEMASK, priv->tmr_emask); + netc_timer_wr(priv, NETC_TMR_FIPER(channel), NETC_TMR_DEFAULT_FIPER); + netc_timer_wr(priv, NETC_TMR_FIPER_CTRL, fiper_ctrl); +} + +static u8 netc_timer_select_pps_channel(struct netc_timer *priv) +{ + int i; + + for (i = 0; i < NETC_TMR_FIPER_NUM; i++) { + if (!priv->pp[i].enabled) + return i; + } + + return NETC_TMR_INVALID_CHANNEL; +} + +/* Note that users should not use this API to output PPS signal on + * external pins, because PTP_CLK_REQ_PPS trigger internal PPS event + * for input into kernel PPS subsystem. See: + * https://lore.kernel.org/r/20201117213826.18235-1-a.fatoum@pengutronix.de + */ +static int netc_timer_enable_pps(struct netc_timer *priv, + struct ptp_clock_request *rq, int on) +{ + struct device *dev = &priv->pdev->dev; + unsigned long flags; + struct netc_pp *pp; + int err = 0; + + spin_lock_irqsave(&priv->lock, flags); + + if (on) { + int alarm_id; + u8 channel; + + if (priv->pps_channel < NETC_TMR_FIPER_NUM) { + channel = priv->pps_channel; + } else { + channel = netc_timer_select_pps_channel(priv); + if (channel == NETC_TMR_INVALID_CHANNEL) { + dev_err(dev, "No available FIPERs\n"); + err = -EBUSY; + goto unlock_spinlock; + } + } + + pp = &priv->pp[channel]; + if (pp->enabled) + goto unlock_spinlock; + + alarm_id = netc_timer_get_alarm_id(priv); + if (alarm_id == priv->fs_alarm_num) { + dev_err(dev, "No available ALARMs\n"); + err = -EBUSY; + goto unlock_spinlock; + } + + pp->enabled = true; + pp->type = NETC_PP_PPS; + pp->alarm_id = alarm_id; + pp->period = NSEC_PER_SEC; + priv->pps_channel = channel; + + netc_timer_enable_periodic_pulse(priv, channel); + } else { + /* pps_channel is invalid if PPS is not enabled, so no + * processing is needed. + */ + if (priv->pps_channel >= NETC_TMR_FIPER_NUM) + goto unlock_spinlock; + + netc_timer_disable_periodic_pulse(priv, priv->pps_channel); + pp = &priv->pp[priv->pps_channel]; + priv->fs_alarm_bitmap &= ~BIT(pp->alarm_id); + memset(pp, 0, sizeof(*pp)); + priv->pps_channel = NETC_TMR_INVALID_CHANNEL; + } + +unlock_spinlock: + spin_unlock_irqrestore(&priv->lock, flags); + + return err; +} + +static int net_timer_enable_perout(struct netc_timer *priv, + struct ptp_clock_request *rq, int on) +{ + struct device *dev = &priv->pdev->dev; + u32 channel = rq->perout.index; + unsigned long flags; + struct netc_pp *pp; + int err = 0; + + spin_lock_irqsave(&priv->lock, flags); + + pp = &priv->pp[channel]; + if (pp->type == NETC_PP_PPS) { + dev_err(dev, "FIPER%u is being used for PPS\n", channel); + err = -EBUSY; + goto unlock_spinlock; + } + + if (on) { + u64 period_ns, gclk_period, max_period, min_period; + struct timespec64 period, stime; + u32 integral_period; + int alarm_id; + + period.tv_sec = rq->perout.period.sec; + period.tv_nsec = rq->perout.period.nsec; + period_ns = timespec64_to_ns(&period); + + integral_period = netc_timer_get_integral_period(priv); + max_period = (u64)NETC_TMR_DEFAULT_FIPER + integral_period; + gclk_period = netc_timer_get_gclk_period(priv); + min_period = gclk_period * 4 + integral_period; + if (period_ns > max_period || period_ns < min_period) { + dev_err(dev, "The period range is %llu ~ %llu\n", + min_period, max_period); + err = -EINVAL; + goto unlock_spinlock; + } + + if (pp->enabled) { + alarm_id = pp->alarm_id; + } else { + alarm_id = netc_timer_get_alarm_id(priv); + if (alarm_id == priv->fs_alarm_num) { + dev_err(dev, "No available ALARMs\n"); + err = -EBUSY; + goto unlock_spinlock; + } + + pp->type = NETC_PP_PEROUT; + pp->enabled = true; + pp->alarm_id = alarm_id; + } + + stime.tv_sec = rq->perout.start.sec; + stime.tv_nsec = rq->perout.start.nsec; + pp->stime = timespec64_to_ns(&stime); + pp->period = period_ns; + + netc_timer_enable_periodic_pulse(priv, channel); + } else { + netc_timer_disable_periodic_pulse(priv, channel); + priv->fs_alarm_bitmap &= ~BIT(pp->alarm_id); + memset(pp, 0, sizeof(*pp)); + } + +unlock_spinlock: + spin_unlock_irqrestore(&priv->lock, flags); + + return err; +} + +static void netc_timer_handle_etts_event(struct netc_timer *priv, int index, + bool update_event) +{ + struct ptp_clock_event event; + u32 etts_l = 0, etts_h = 0; + + while (netc_timer_rd(priv, NETC_TMR_STAT) & TMR_STAT_ETS_VLD(index)) { + etts_l = netc_timer_rd(priv, NETC_TMR_ETTS_L(index)); + etts_h = netc_timer_rd(priv, NETC_TMR_ETTS_H(index)); + } + + /* Invalid time stamp */ + if (!etts_l && !etts_h) + return; + + if (update_event) { + event.type = PTP_CLOCK_EXTTS; + event.index = index; + event.timestamp = (u64)etts_h << 32; + event.timestamp |= etts_l; + ptp_clock_event(priv->clock, &event); + } +} + +static int netc_timer_enable_extts(struct netc_timer *priv, + struct ptp_clock_request *rq, int on) +{ + int index = rq->extts.index; + unsigned long flags; + u32 tmr_ctrl; + + /* Reject requests to enable time stamping on both edges */ + if ((rq->extts.flags & PTP_EXTTS_EDGES) == PTP_EXTTS_EDGES) + return -EOPNOTSUPP; + + spin_lock_irqsave(&priv->lock, flags); + + netc_timer_handle_etts_event(priv, rq->extts.index, false); + if (on) { + tmr_ctrl = netc_timer_rd(priv, NETC_TMR_CTRL); + if (rq->extts.flags & PTP_FALLING_EDGE) + tmr_ctrl |= TMR_ETEP(index); + else + tmr_ctrl &= ~TMR_ETEP(index); + + netc_timer_wr(priv, NETC_TMR_CTRL, tmr_ctrl); + priv->tmr_emask |= TMR_TEVENT_ETS(index); + } else { + priv->tmr_emask &= ~TMR_TEVENT_ETS(index); + } + + netc_timer_wr(priv, NETC_TMR_TEMASK, priv->tmr_emask); + + spin_unlock_irqrestore(&priv->lock, flags); + + return 0; +} + +static void netc_timer_disable_fiper(struct netc_timer *priv) +{ + u32 fiper_ctrl = netc_timer_rd(priv, NETC_TMR_FIPER_CTRL); + int i; + + for (i = 0; i < NETC_TMR_FIPER_NUM; i++) { + if (!priv->pp[i].enabled) + continue; + + fiper_ctrl |= FIPER_CTRL_DIS(i); + netc_timer_wr(priv, NETC_TMR_FIPER(i), NETC_TMR_DEFAULT_FIPER); + } + + netc_timer_wr(priv, NETC_TMR_FIPER_CTRL, fiper_ctrl); +} + +static void netc_timer_enable_fiper(struct netc_timer *priv) +{ + u32 integral_period = netc_timer_get_integral_period(priv); + u32 fiper_ctrl = netc_timer_rd(priv, NETC_TMR_FIPER_CTRL); + int i; + + for (i = 0; i < NETC_TMR_FIPER_NUM; i++) { + struct netc_pp *pp = &priv->pp[i]; + u32 fiper; + + if (!pp->enabled) + continue; + + fiper_ctrl &= ~FIPER_CTRL_DIS(i); + + if (pp->type == NETC_PP_PPS) + netc_timer_set_pps_alarm(priv, i, integral_period); + else if (pp->type == NETC_PP_PEROUT) + netc_timer_set_perout_alarm(priv, i, integral_period); + + fiper = pp->period - integral_period; + netc_timer_wr(priv, NETC_TMR_FIPER(i), fiper); + } + + netc_timer_wr(priv, NETC_TMR_FIPER_CTRL, fiper_ctrl); +} + +static int netc_timer_enable(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, int on) +{ + struct netc_timer *priv = ptp_to_netc_timer(ptp); + + switch (rq->type) { + case PTP_CLK_REQ_PPS: + return netc_timer_enable_pps(priv, rq, on); + case PTP_CLK_REQ_PEROUT: + return net_timer_enable_perout(priv, rq, on); + case PTP_CLK_REQ_EXTTS: + return netc_timer_enable_extts(priv, rq, on); + default: + return -EOPNOTSUPP; + } +} + +static void netc_timer_adjust_period(struct netc_timer *priv, u64 period) +{ + u32 fractional_period = lower_32_bits(period); + u32 integral_period = upper_32_bits(period); + u32 tmr_ctrl, old_tmr_ctrl; + unsigned long flags; + + spin_lock_irqsave(&priv->lock, flags); + + old_tmr_ctrl = netc_timer_rd(priv, NETC_TMR_CTRL); + tmr_ctrl = u32_replace_bits(old_tmr_ctrl, integral_period, + TMR_CTRL_TCLK_PERIOD); + if (tmr_ctrl != old_tmr_ctrl) { + netc_timer_disable_fiper(priv); + netc_timer_wr(priv, NETC_TMR_CTRL, tmr_ctrl); + netc_timer_enable_fiper(priv); + } + + netc_timer_wr(priv, NETC_TMR_ADD, fractional_period); + + spin_unlock_irqrestore(&priv->lock, flags); +} + +static int netc_timer_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) +{ + struct netc_timer *priv = ptp_to_netc_timer(ptp); + u64 new_period; + + new_period = adjust_by_scaled_ppm(priv->period, scaled_ppm); + netc_timer_adjust_period(priv, new_period); + + return 0; +} + +static int netc_timer_adjtime(struct ptp_clock_info *ptp, s64 delta) +{ + struct netc_timer *priv = ptp_to_netc_timer(ptp); + unsigned long flags; + s64 tmr_off; + + spin_lock_irqsave(&priv->lock, flags); + + netc_timer_disable_fiper(priv); + + /* Adjusting TMROFF instead of TMR_CNT is that the timer + * counter keeps increasing during reading and writing + * TMR_CNT, which will cause latency. + */ + tmr_off = netc_timer_offset_read(priv); + tmr_off += delta; + netc_timer_offset_write(priv, tmr_off); + + netc_timer_enable_fiper(priv); + + spin_unlock_irqrestore(&priv->lock, flags); + + return 0; +} + +static int netc_timer_gettimex64(struct ptp_clock_info *ptp, + struct timespec64 *ts, + struct ptp_system_timestamp *sts) +{ + struct netc_timer *priv = ptp_to_netc_timer(ptp); + unsigned long flags; + u64 ns; + + spin_lock_irqsave(&priv->lock, flags); + + ptp_read_system_prets(sts); + ns = netc_timer_cur_time_read(priv); + ptp_read_system_postts(sts); + + spin_unlock_irqrestore(&priv->lock, flags); + + *ts = ns_to_timespec64(ns); + + return 0; +} + +static int netc_timer_settime64(struct ptp_clock_info *ptp, + const struct timespec64 *ts) +{ + struct netc_timer *priv = ptp_to_netc_timer(ptp); + u64 ns = timespec64_to_ns(ts); + unsigned long flags; + + spin_lock_irqsave(&priv->lock, flags); + + netc_timer_disable_fiper(priv); + netc_timer_offset_write(priv, 0); + netc_timer_cnt_write(priv, ns); + netc_timer_enable_fiper(priv); + + spin_unlock_irqrestore(&priv->lock, flags); + + return 0; +} + +static const struct ptp_clock_info netc_timer_ptp_caps = { + .owner = THIS_MODULE, + .name = "NETC Timer PTP clock", + .max_adj = 500000000, + .n_pins = 0, + .n_alarm = 2, + .pps = 1, + .n_per_out = 3, + .n_ext_ts = 2, + .supported_extts_flags = PTP_RISING_EDGE | PTP_FALLING_EDGE | + PTP_STRICT_FLAGS, + .adjfine = netc_timer_adjfine, + .adjtime = netc_timer_adjtime, + .gettimex64 = netc_timer_gettimex64, + .settime64 = netc_timer_settime64, + .enable = netc_timer_enable, +}; + +static void netc_timer_init(struct netc_timer *priv) +{ + u32 fractional_period = lower_32_bits(priv->period); + u32 integral_period = upper_32_bits(priv->period); + u32 tmr_ctrl, fiper_ctrl; + struct timespec64 now; + u64 ns; + int i; + + /* Software must enable timer first and the clock selected must be + * active, otherwise, the registers which are in the timer clock + * domain are not accessible. + */ + tmr_ctrl = FIELD_PREP(TMR_CTRL_CK_SEL, priv->clk_select) | + TMR_CTRL_TE | TMR_CTRL_FS; + netc_timer_wr(priv, NETC_TMR_CTRL, tmr_ctrl); + netc_timer_wr(priv, NETC_TMR_PRSC, priv->oclk_prsc); + + /* Disable FIPER by default */ + fiper_ctrl = netc_timer_rd(priv, NETC_TMR_FIPER_CTRL); + for (i = 0; i < NETC_TMR_FIPER_NUM; i++) { + fiper_ctrl |= FIPER_CTRL_DIS(i); + fiper_ctrl &= ~FIPER_CTRL_PG(i); + } + netc_timer_wr(priv, NETC_TMR_FIPER_CTRL, fiper_ctrl); + netc_timer_wr(priv, NETC_TMR_ECTRL, NETC_TMR_DEFAULT_ETTF_THR); + + ktime_get_real_ts64(&now); + ns = timespec64_to_ns(&now); + netc_timer_cnt_write(priv, ns); + + /* Allow atomic writes to TCLK_PERIOD and TMR_ADD, An update to + * TCLK_PERIOD does not take effect until TMR_ADD is written. + */ + tmr_ctrl |= FIELD_PREP(TMR_CTRL_TCLK_PERIOD, integral_period) | + TMR_COMP_MODE; + netc_timer_wr(priv, NETC_TMR_CTRL, tmr_ctrl); + netc_timer_wr(priv, NETC_TMR_ADD, fractional_period); +} + +static int netc_timer_pci_probe(struct pci_dev *pdev) +{ + struct device *dev = &pdev->dev; + struct netc_timer *priv; + int err; + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + pcie_flr(pdev); + err = pci_enable_device_mem(pdev); + if (err) + return dev_err_probe(dev, err, "Failed to enable device\n"); + + dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)); + err = pci_request_mem_regions(pdev, KBUILD_MODNAME); + if (err) { + dev_err(dev, "pci_request_regions() failed, err:%pe\n", + ERR_PTR(err)); + goto disable_dev; + } + + pci_set_master(pdev); + + priv->pdev = pdev; + priv->base = pci_ioremap_bar(pdev, NETC_TMR_REGS_BAR); + if (!priv->base) { + err = -ENOMEM; + goto release_mem_regions; + } + + pci_set_drvdata(pdev, priv); + + return 0; + +release_mem_regions: + pci_release_mem_regions(pdev); +disable_dev: + pci_disable_device(pdev); + + return err; +} + +static void netc_timer_pci_remove(struct pci_dev *pdev) +{ + struct netc_timer *priv = pci_get_drvdata(pdev); + + iounmap(priv->base); + pci_release_mem_regions(pdev); + pci_disable_device(pdev); +} + +static int netc_timer_get_reference_clk_source(struct netc_timer *priv) +{ + struct device *dev = &priv->pdev->dev; + struct clk *clk; + int i; + + /* Select NETC system clock as the reference clock by default */ + priv->clk_select = NETC_TMR_SYSTEM_CLK; + priv->clk_freq = NETC_TMR_SYSCLK_333M; + + /* Update the clock source of the reference clock if the clock + * is specified in DT node. + */ + for (i = 0; i < ARRAY_SIZE(timer_clk_src); i++) { + clk = devm_clk_get_optional_enabled(dev, timer_clk_src[i]); + if (IS_ERR(clk)) + return dev_err_probe(dev, PTR_ERR(clk), + "Failed to enable clock\n"); + + if (clk) { + priv->clk_freq = clk_get_rate(clk); + priv->clk_select = i ? NETC_TMR_EXT_OSC : + NETC_TMR_CCM_TIMER1; + break; + } + } + + /* The period is a 64-bit number, the high 32-bit is the integer + * part of the period, the low 32-bit is the fractional part of + * the period. In order to get the desired 32-bit fixed-point + * format, multiply the numerator of the fraction by 2^32. + */ + priv->period = div_u64((u64)NSEC_PER_SEC << 32, priv->clk_freq); + + return 0; +} + +static int netc_timer_parse_dt(struct netc_timer *priv) +{ + return netc_timer_get_reference_clk_source(priv); +} + +static irqreturn_t netc_timer_isr(int irq, void *data) +{ + struct netc_timer *priv = data; + struct ptp_clock_event event; + u32 tmr_event; + + spin_lock(&priv->lock); + + tmr_event = netc_timer_rd(priv, NETC_TMR_TEVENT); + tmr_event &= priv->tmr_emask; + /* Clear interrupts status */ + netc_timer_wr(priv, NETC_TMR_TEVENT, tmr_event); + + if (tmr_event & TMR_TEVENT_ALMEN(0)) + netc_timer_alarm_write(priv, NETC_TMR_DEFAULT_ALARM, 0); + + if (tmr_event & TMR_TEVENT_ALMEN(1)) + netc_timer_alarm_write(priv, NETC_TMR_DEFAULT_ALARM, 1); + + if (tmr_event & TMR_TEVENT_PPEN_ALL) { + event.type = PTP_CLOCK_PPS; + ptp_clock_event(priv->clock, &event); + } + + if (tmr_event & TMR_TEVENT_ETS(0)) + netc_timer_handle_etts_event(priv, 0, true); + + if (tmr_event & TMR_TEVENT_ETS(1)) + netc_timer_handle_etts_event(priv, 1, true); + + spin_unlock(&priv->lock); + + return IRQ_HANDLED; +} + +static int netc_timer_init_msix_irq(struct netc_timer *priv) +{ + struct pci_dev *pdev = priv->pdev; + int err, n; + + n = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSIX); + if (n != 1) { + err = (n < 0) ? n : -EPERM; + dev_err(&pdev->dev, "pci_alloc_irq_vectors() failed\n"); + return err; + } + + priv->irq = pci_irq_vector(pdev, 0); + err = request_irq(priv->irq, netc_timer_isr, 0, priv->irq_name, priv); + if (err) { + dev_err(&pdev->dev, "request_irq() failed\n"); + pci_free_irq_vectors(pdev); + + return err; + } + + return 0; +} + +static void netc_timer_free_msix_irq(struct netc_timer *priv) +{ + struct pci_dev *pdev = priv->pdev; + + disable_irq(priv->irq); + free_irq(priv->irq, priv); + pci_free_irq_vectors(pdev); +} + +static int netc_timer_get_global_ip_rev(struct netc_timer *priv) +{ + u32 val; + + val = netc_timer_rd(priv, NETC_GLOBAL_OFFSET + NETC_GLOBAL_IPBRR0); + + return val & IPBRR0_IP_REV; +} + +static int netc_timer_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct device *dev = &pdev->dev; + struct netc_timer *priv; + int err; + + err = netc_timer_pci_probe(pdev); + if (err) + return err; + + priv = pci_get_drvdata(pdev); + priv->revision = netc_timer_get_global_ip_rev(priv); + if (priv->revision == NETC_REV_4_1) + priv->fs_alarm_num = 1; + else + priv->fs_alarm_num = NETC_TMR_ALARM_NUM; + + err = netc_timer_parse_dt(priv); + if (err) + goto timer_pci_remove; + + priv->caps = netc_timer_ptp_caps; + priv->oclk_prsc = NETC_TMR_DEFAULT_PRSC; + priv->pps_channel = NETC_TMR_INVALID_CHANNEL; + spin_lock_init(&priv->lock); + snprintf(priv->irq_name, sizeof(priv->irq_name), "ptp-netc %s", + pci_name(pdev)); + + err = netc_timer_init_msix_irq(priv); + if (err) + goto timer_pci_remove; + + netc_timer_init(priv); + priv->clock = ptp_clock_register(&priv->caps, dev); + if (IS_ERR(priv->clock)) { + err = PTR_ERR(priv->clock); + goto free_msix_irq; + } + + return 0; + +free_msix_irq: + netc_timer_free_msix_irq(priv); +timer_pci_remove: + netc_timer_pci_remove(pdev); + + return err; +} + +static void netc_timer_remove(struct pci_dev *pdev) +{ + struct netc_timer *priv = pci_get_drvdata(pdev); + + netc_timer_wr(priv, NETC_TMR_TEMASK, 0); + netc_timer_wr(priv, NETC_TMR_CTRL, 0); + ptp_clock_unregister(priv->clock); + netc_timer_free_msix_irq(priv); + netc_timer_pci_remove(pdev); +} + +static const struct pci_device_id netc_timer_id_table[] = { + { PCI_DEVICE(NETC_TMR_PCI_VENDOR_NXP, 0xee02) }, + { } +}; +MODULE_DEVICE_TABLE(pci, netc_timer_id_table); + +static struct pci_driver netc_timer_driver = { + .name = KBUILD_MODNAME, + .id_table = netc_timer_id_table, + .probe = netc_timer_probe, + .remove = netc_timer_remove, +}; +module_pci_driver(netc_timer_driver); + +MODULE_DESCRIPTION("NXP NETC Timer PTP Driver"); +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/staging/octeon/ethernet-tx.c b/drivers/staging/octeon/ethernet-tx.c index 261f8dbdc382..0ba240e634a1 100644 --- a/drivers/staging/octeon/ethernet-tx.c +++ b/drivers/staging/octeon/ethernet-tx.c @@ -346,8 +346,7 @@ netdev_tx_t cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev) * The skbuff will be reused without ever being freed. We must * cleanup a bunch of core things. */ - dst_release(skb_dst(skb)); - skb_dst_set(skb, NULL); + skb_dst_drop(skb); skb_ext_reset(skb); nf_reset_ct(skb); skb_reset_redirect(skb); diff --git a/include/linux/bnxt/hsi.h b/include/linux/bnxt/hsi.h index 549231703bce..8c5dac3b3ef3 100644 --- a/include/linux/bnxt/hsi.h +++ b/include/linux/bnxt/hsi.h @@ -276,6 +276,10 @@ struct cmd_nums { #define HWRM_REG_POWER_QUERY 0xe1UL #define HWRM_CORE_FREQUENCY_QUERY 0xe2UL #define HWRM_REG_POWER_HISTOGRAM 0xe3UL + #define HWRM_MONITOR_PAX_HISTOGRAM_START 0xe4UL + #define HWRM_MONITOR_PAX_HISTOGRAM_COLLECT 0xe5UL + #define HWRM_STAT_QUERY_ROCE_STATS 0xe6UL + #define HWRM_STAT_QUERY_ROCE_STATS_EXT 0xe7UL #define HWRM_WOL_FILTER_ALLOC 0xf0UL #define HWRM_WOL_FILTER_FREE 0xf1UL #define HWRM_WOL_FILTER_QCFG 0xf2UL @@ -407,9 +411,8 @@ struct cmd_nums { #define HWRM_FUNC_LAG_UPDATE 0x1b1UL #define HWRM_FUNC_LAG_FREE 0x1b2UL #define HWRM_FUNC_LAG_QCFG 0x1b3UL - #define HWRM_FUNC_TIMEDTX_PACING_RATE_ADD 0x1c2UL - #define HWRM_FUNC_TIMEDTX_PACING_RATE_DELETE 0x1c3UL - #define HWRM_FUNC_TIMEDTX_PACING_RATE_QUERY 0x1c4UL + #define HWRM_FUNC_TTX_PACING_RATE_PROF_QUERY 0x1c3UL + #define HWRM_FUNC_TTX_PACING_RATE_QUERY 0x1c4UL #define HWRM_SELFTEST_QLIST 0x200UL #define HWRM_SELFTEST_EXEC 0x201UL #define HWRM_SELFTEST_IRQ 0x202UL @@ -441,6 +444,7 @@ struct cmd_nums { #define HWRM_MFG_WRITE_CERT_NVM 0x21cUL #define HWRM_PORT_POE_CFG 0x230UL #define HWRM_PORT_POE_QCFG 0x231UL + #define HWRM_PORT_PHY_FDRSTAT 0x232UL #define HWRM_UDCC_QCAPS 0x258UL #define HWRM_UDCC_CFG 0x259UL #define HWRM_UDCC_QCFG 0x25aUL @@ -453,6 +457,8 @@ struct cmd_nums { #define HWRM_QUEUE_PFCWD_TIMEOUT_QCAPS 0x261UL #define HWRM_QUEUE_PFCWD_TIMEOUT_CFG 0x262UL #define HWRM_QUEUE_PFCWD_TIMEOUT_QCFG 0x263UL + #define HWRM_QUEUE_ADPTV_QOS_RX_QCFG 0x264UL + #define HWRM_QUEUE_ADPTV_QOS_TX_QCFG 0x265UL #define HWRM_TF 0x2bcUL #define HWRM_TF_VERSION_GET 0x2bdUL #define HWRM_TF_SESSION_OPEN 0x2c6UL @@ -551,6 +557,8 @@ struct cmd_nums { #define HWRM_DBG_COREDUMP_CAPTURE 0xff2cUL #define HWRM_DBG_PTRACE 0xff2dUL #define HWRM_DBG_SIM_CABLE_STATE 0xff2eUL + #define HWRM_DBG_TOKEN_QUERY_AUTH_IDS 0xff2fUL + #define HWRM_DBG_TOKEN_CFG 0xff30UL #define HWRM_NVM_GET_VPD_FIELD_INFO 0xffeaUL #define HWRM_NVM_SET_VPD_FIELD_INFO 0xffebUL #define HWRM_NVM_DEFRAG 0xffecUL @@ -632,8 +640,8 @@ struct hwrm_err_output { #define HWRM_VERSION_MAJOR 1 #define HWRM_VERSION_MINOR 10 #define HWRM_VERSION_UPDATE 3 -#define HWRM_VERSION_RSVD 97 -#define HWRM_VERSION_STR "1.10.3.97" +#define HWRM_VERSION_RSVD 133 +#define HWRM_VERSION_STR "1.10.3.133" /* hwrm_ver_get_input (size:192b/24B) */ struct hwrm_ver_get_input { @@ -688,6 +696,7 @@ struct hwrm_ver_get_output { #define VER_GET_RESP_DEV_CAPS_CFG_CFA_TRUFLOW_SUPPORTED 0x4000UL #define VER_GET_RESP_DEV_CAPS_CFG_SECURE_BOOT_CAPABLE 0x8000UL #define VER_GET_RESP_DEV_CAPS_CFG_SECURE_SOC_CAPABLE 0x10000UL + #define VER_GET_RESP_DEV_CAPS_CFG_DEBUG_TOKEN_SUPPORTED 0x20000UL u8 roce_fw_maj_8b; u8 roce_fw_min_8b; u8 roce_fw_bld_8b; @@ -872,7 +881,8 @@ struct hwrm_async_event_cmpl { #define ASYNC_EVENT_CMPL_EVENT_ID_REPRESENTOR_PAIR_CHANGE 0x4eUL #define ASYNC_EVENT_CMPL_EVENT_ID_VF_STAT_CHANGE 0x4fUL #define ASYNC_EVENT_CMPL_EVENT_ID_HOST_COREDUMP 0x50UL - #define ASYNC_EVENT_CMPL_EVENT_ID_MAX_RGTR_EVENT_ID 0x51UL + #define ASYNC_EVENT_CMPL_EVENT_ID_ADPTV_QOS 0x51UL + #define ASYNC_EVENT_CMPL_EVENT_ID_MAX_RGTR_EVENT_ID 0x52UL #define ASYNC_EVENT_CMPL_EVENT_ID_FW_TRACE_MSG 0xfeUL #define ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR 0xffUL #define ASYNC_EVENT_CMPL_EVENT_ID_LAST ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR @@ -1344,7 +1354,8 @@ struct hwrm_async_event_cmpl_dbg_buf_producer { #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_CA2_TRACE 0x9UL #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_RIGP1_TRACE 0xaUL #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_AFM_KONG_HWRM_TRACE 0xbUL - #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_LAST ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_AFM_KONG_HWRM_TRACE + #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_ERR_QPC_TRACE 0xcUL + #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_LAST ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_ERR_QPC_TRACE }; /* hwrm_async_event_cmpl_hwrm_error (size:128b/16B) */ @@ -1401,7 +1412,11 @@ struct hwrm_async_event_cmpl_error_report_base { #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DOORBELL_DROP_THRESHOLD 0x4UL #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_THERMAL_THRESHOLD 0x5UL #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DUAL_DATA_RATE_NOT_SUPPORTED 0x6UL - #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DUAL_DATA_RATE_NOT_SUPPORTED + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DUP_UDCC_SES 0x7UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DB_DROP 0x8UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_MD_TEMP 0x9UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_VNIC_ERR 0xaUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_VNIC_ERR }; /* hwrm_async_event_cmpl_error_report_pause_storm (size:128b/16B) */ @@ -1914,6 +1929,12 @@ struct hwrm_func_qcaps_output { #define FUNC_QCAPS_RESP_FLAGS_EXT3_RX_RATE_PROFILE_SEL_SUPPORTED 0x8UL #define FUNC_QCAPS_RESP_FLAGS_EXT3_BIDI_OPT_SUPPORTED 0x10UL #define FUNC_QCAPS_RESP_FLAGS_EXT3_MIRROR_ON_ROCE_SUPPORTED 0x20UL + #define FUNC_QCAPS_RESP_FLAGS_EXT3_ROCE_VF_DYN_ALLOC_SUPPORT 0x40UL + #define FUNC_QCAPS_RESP_FLAGS_EXT3_CHANGE_UDP_SRCPORT_SUPPORT 0x80UL + #define FUNC_QCAPS_RESP_FLAGS_EXT3_PCIE_COMPLIANCE_SUPPORTED 0x100UL + #define FUNC_QCAPS_RESP_FLAGS_EXT3_MULTI_L2_DB_SUPPORTED 0x200UL + #define FUNC_QCAPS_RESP_FLAGS_EXT3_PCIE_SECURE_ATS_SUPPORTED 0x400UL + #define FUNC_QCAPS_RESP_FLAGS_EXT3_MBUF_STATS_SUPPORTED 0x800UL __le16 max_roce_vfs; __le16 max_crypto_rx_flow_filters; u8 unused_3[3]; @@ -1931,7 +1952,7 @@ struct hwrm_func_qcfg_input { u8 unused_0[6]; }; -/* hwrm_func_qcfg_output (size:1344b/168B) */ +/* hwrm_func_qcfg_output (size:1408b/176B) */ struct hwrm_func_qcfg_output { __le16 error_code; __le16 req_type; @@ -2124,7 +2145,43 @@ struct hwrm_func_qcfg_output { #define FUNC_QCFG_RESP_XID_PARTITION_CFG_TX_CK 0x1UL #define FUNC_QCFG_RESP_XID_PARTITION_CFG_RX_CK 0x2UL __le16 mirror_vnic_id; - u8 unused_7[7]; + u8 max_link_width; + #define FUNC_QCFG_RESP_MAX_LINK_WIDTH_UNKNOWN 0x0UL + #define FUNC_QCFG_RESP_MAX_LINK_WIDTH_X1 0x1UL + #define FUNC_QCFG_RESP_MAX_LINK_WIDTH_X2 0x2UL + #define FUNC_QCFG_RESP_MAX_LINK_WIDTH_X4 0x4UL + #define FUNC_QCFG_RESP_MAX_LINK_WIDTH_X8 0x8UL + #define FUNC_QCFG_RESP_MAX_LINK_WIDTH_X16 0x10UL + #define FUNC_QCFG_RESP_MAX_LINK_WIDTH_LAST FUNC_QCFG_RESP_MAX_LINK_WIDTH_X16 + u8 max_link_speed; + #define FUNC_QCFG_RESP_MAX_LINK_SPEED_UNKNOWN 0x0UL + #define FUNC_QCFG_RESP_MAX_LINK_SPEED_G1 0x1UL + #define FUNC_QCFG_RESP_MAX_LINK_SPEED_G2 0x2UL + #define FUNC_QCFG_RESP_MAX_LINK_SPEED_G3 0x3UL + #define FUNC_QCFG_RESP_MAX_LINK_SPEED_G4 0x4UL + #define FUNC_QCFG_RESP_MAX_LINK_SPEED_G5 0x5UL + #define FUNC_QCFG_RESP_MAX_LINK_SPEED_LAST FUNC_QCFG_RESP_MAX_LINK_SPEED_G5 + u8 negotiated_link_width; + #define FUNC_QCFG_RESP_NEGOTIATED_LINK_WIDTH_UNKNOWN 0x0UL + #define FUNC_QCFG_RESP_NEGOTIATED_LINK_WIDTH_X1 0x1UL + #define FUNC_QCFG_RESP_NEGOTIATED_LINK_WIDTH_X2 0x2UL + #define FUNC_QCFG_RESP_NEGOTIATED_LINK_WIDTH_X4 0x4UL + #define FUNC_QCFG_RESP_NEGOTIATED_LINK_WIDTH_X8 0x8UL + #define FUNC_QCFG_RESP_NEGOTIATED_LINK_WIDTH_X16 0x10UL + #define FUNC_QCFG_RESP_NEGOTIATED_LINK_WIDTH_LAST FUNC_QCFG_RESP_NEGOTIATED_LINK_WIDTH_X16 + u8 negotiated_link_speed; + #define FUNC_QCFG_RESP_NEGOTIATED_LINK_SPEED_UNKNOWN 0x0UL + #define FUNC_QCFG_RESP_NEGOTIATED_LINK_SPEED_G1 0x1UL + #define FUNC_QCFG_RESP_NEGOTIATED_LINK_SPEED_G2 0x2UL + #define FUNC_QCFG_RESP_NEGOTIATED_LINK_SPEED_G3 0x3UL + #define FUNC_QCFG_RESP_NEGOTIATED_LINK_SPEED_G4 0x4UL + #define FUNC_QCFG_RESP_NEGOTIATED_LINK_SPEED_G5 0x5UL + #define FUNC_QCFG_RESP_NEGOTIATED_LINK_SPEED_LAST FUNC_QCFG_RESP_NEGOTIATED_LINK_SPEED_G5 + u8 unused_7[2]; + u8 pcie_compliance; + u8 unused_8; + __le16 l2_db_multi_page_size_kb; + u8 unused_9[5]; u8 valid; }; @@ -2322,6 +2379,7 @@ struct hwrm_func_cfg_input { #define FUNC_CFG_REQ_ENABLES2_ROCE_MAX_GID_PER_VF 0x200UL #define FUNC_CFG_REQ_ENABLES2_XID_PARTITION_CFG 0x400UL #define FUNC_CFG_REQ_ENABLES2_PHYSICAL_SLOT_NUMBER 0x800UL + #define FUNC_CFG_REQ_ENABLES2_PCIE_COMPLIANCE 0x1000UL u8 port_kdnet_mode; #define FUNC_CFG_REQ_PORT_KDNET_MODE_DISABLED 0x0UL #define FUNC_CFG_REQ_PORT_KDNET_MODE_ENABLED 0x1UL @@ -2353,7 +2411,8 @@ struct hwrm_func_cfg_input { __le16 xid_partition_cfg; #define FUNC_CFG_REQ_XID_PARTITION_CFG_TX_CK 0x1UL #define FUNC_CFG_REQ_XID_PARTITION_CFG_RX_CK 0x2UL - __le16 unused_2; + u8 pcie_compliance; + u8 unused_2; }; /* hwrm_func_cfg_output (size:128b/16B) */ @@ -2370,11 +2429,41 @@ struct hwrm_func_cfg_output { struct hwrm_func_cfg_cmd_err { u8 code; #define FUNC_CFG_CMD_ERR_CODE_UNKNOWN 0x0UL - #define FUNC_CFG_CMD_ERR_CODE_PARTITION_MIN_BW_RANGE 0x1UL - #define FUNC_CFG_CMD_ERR_CODE_PARTITION_MIN_MORE_THAN_MAX 0x2UL - #define FUNC_CFG_CMD_ERR_CODE_PARTITION_MIN_BW_UNSUPPORTED 0x3UL - #define FUNC_CFG_CMD_ERR_CODE_PARTITION_BW_PERCENT 0x4UL - #define FUNC_CFG_CMD_ERR_CODE_LAST FUNC_CFG_CMD_ERR_CODE_PARTITION_BW_PERCENT + #define FUNC_CFG_CMD_ERR_CODE_PARTITION_BW_OUT_OF_RANGE 0x1UL + #define FUNC_CFG_CMD_ERR_CODE_NPAR_PARTITION_DOWN_FAILED 0x2UL + #define FUNC_CFG_CMD_ERR_CODE_TPID_SET_DFLT_VLAN_NOT_SET 0x3UL + #define FUNC_CFG_CMD_ERR_CODE_RES_ARRAY_ALLOC_FAILED 0x4UL + #define FUNC_CFG_CMD_ERR_CODE_TX_RING_ASSET_TEST_FAILED 0x5UL + #define FUNC_CFG_CMD_ERR_CODE_TX_RING_RES_UPDATE_FAILED 0x6UL + #define FUNC_CFG_CMD_ERR_CODE_APPLY_MAX_BW_FAILED 0x7UL + #define FUNC_CFG_CMD_ERR_CODE_ENABLE_EVB_FAILED 0x8UL + #define FUNC_CFG_CMD_ERR_CODE_RSS_CTXT_ASSET_TEST_FAILED 0x9UL + #define FUNC_CFG_CMD_ERR_CODE_RSS_CTXT_RES_UPDATE_FAILED 0xaUL + #define FUNC_CFG_CMD_ERR_CODE_CMPL_RING_ASSET_TEST_FAILED 0xbUL + #define FUNC_CFG_CMD_ERR_CODE_CMPL_RING_RES_UPDATE_FAILED 0xcUL + #define FUNC_CFG_CMD_ERR_CODE_NQ_ASSET_TEST_FAILED 0xdUL + #define FUNC_CFG_CMD_ERR_CODE_NQ_RES_UPDATE_FAILED 0xeUL + #define FUNC_CFG_CMD_ERR_CODE_RX_RING_ASSET_TEST_FAILED 0xfUL + #define FUNC_CFG_CMD_ERR_CODE_RX_RING_RES_UPDATE_FAILED 0x10UL + #define FUNC_CFG_CMD_ERR_CODE_VNIC_ASSET_TEST_FAILED 0x11UL + #define FUNC_CFG_CMD_ERR_CODE_VNIC_RES_UPDATE_FAILED 0x12UL + #define FUNC_CFG_CMD_ERR_CODE_FAILED_TO_START_STATS_THREAD 0x13UL + #define FUNC_CFG_CMD_ERR_CODE_RDMA_SRIOV_DISABLED 0x14UL + #define FUNC_CFG_CMD_ERR_CODE_TX_KTLS_DISABLED 0x15UL + #define FUNC_CFG_CMD_ERR_CODE_TX_KTLS_ASSET_TEST_FAILED 0x16UL + #define FUNC_CFG_CMD_ERR_CODE_TX_KTLS_RES_UPDATE_FAILED 0x17UL + #define FUNC_CFG_CMD_ERR_CODE_RX_KTLS_DISABLED 0x18UL + #define FUNC_CFG_CMD_ERR_CODE_RX_KTLS_ASSET_TEST_FAILED 0x19UL + #define FUNC_CFG_CMD_ERR_CODE_RX_KTLS_RES_UPDATE_FAILED 0x1aUL + #define FUNC_CFG_CMD_ERR_CODE_TX_QUIC_DISABLED 0x1bUL + #define FUNC_CFG_CMD_ERR_CODE_TX_QUIC_ASSET_TEST_FAILED 0x1cUL + #define FUNC_CFG_CMD_ERR_CODE_TX_QUIC_RES_UPDATE_FAILED 0x1dUL + #define FUNC_CFG_CMD_ERR_CODE_RX_QUIC_DISABLED 0x1eUL + #define FUNC_CFG_CMD_ERR_CODE_RX_QUIC_ASSET_TEST_FAILED 0x1fUL + #define FUNC_CFG_CMD_ERR_CODE_RX_QUIC_RES_UPDATE_FAILED 0x20UL + #define FUNC_CFG_CMD_ERR_CODE_INVALID_KDNET_MODE 0x21UL + #define FUNC_CFG_CMD_ERR_CODE_SCHQ_CFG_FAIL 0x22UL + #define FUNC_CFG_CMD_ERR_CODE_LAST FUNC_CFG_CMD_ERR_CODE_SCHQ_CFG_FAIL u8 unused_0[7]; }; @@ -3780,6 +3869,7 @@ struct hwrm_func_backing_store_cfg_v2_input { #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CA2_TRACE 0x28UL #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_RIGP1_TRACE 0x29UL #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_AFM_KONG_HWRM_TRACE 0x2aUL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_ERR_QPC_TRACE 0x2bUL #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_INVALID 0xffffUL #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_LAST FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_INVALID __le16 instance; @@ -3865,6 +3955,7 @@ struct hwrm_func_backing_store_qcfg_v2_input { #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_CA2_TRACE 0x28UL #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_RIGP1_TRACE 0x29UL #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_AFM_KONG_HWRM_TRACE 0x2aUL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_ERR_QPC_TRACE 0x2bUL #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_INVALID 0xffffUL #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_LAST FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_INVALID __le16 instance; @@ -3904,6 +3995,7 @@ struct hwrm_func_backing_store_qcfg_v2_output { #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_CA1_TRACE 0x27UL #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_CA2_TRACE 0x28UL #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_RIGP1_TRACE 0x29UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_ERR_QPC_TRACE 0x2aUL #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_INVALID 0xffffUL #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_LAST FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_INVALID __le16 instance; @@ -4027,6 +4119,7 @@ struct hwrm_func_backing_store_qcaps_v2_input { #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CA2_TRACE 0x28UL #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_RIGP1_TRACE 0x29UL #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_AFM_KONG_HWRM_TRACE 0x2aUL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_ERR_QPC_TRACE 0x2bUL #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_INVALID 0xffffUL #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_LAST FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_INVALID u8 rsvd[6]; @@ -4070,6 +4163,7 @@ struct hwrm_func_backing_store_qcaps_v2_output { #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CA2_TRACE 0x28UL #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_RIGP1_TRACE 0x29UL #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_AFM_KONG_HWRM_TRACE 0x2aUL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_ERR_QPC_TRACE 0x2bUL #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_INVALID 0xffffUL #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_LAST FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_INVALID __le16 entry_size; @@ -4216,6 +4310,10 @@ struct hwrm_port_phy_cfg_input { #define PORT_PHY_CFG_REQ_FLAGS_FEC_RS272_1XN_DISABLE 0x100000UL #define PORT_PHY_CFG_REQ_FLAGS_FEC_RS272_IEEE_ENABLE 0x200000UL #define PORT_PHY_CFG_REQ_FLAGS_FEC_RS272_IEEE_DISABLE 0x400000UL + #define PORT_PHY_CFG_REQ_FLAGS_LINK_TRAINING_ENABLE 0x800000UL + #define PORT_PHY_CFG_REQ_FLAGS_LINK_TRAINING_DISABLE 0x1000000UL + #define PORT_PHY_CFG_REQ_FLAGS_PRECODING_ENABLE 0x2000000UL + #define PORT_PHY_CFG_REQ_FLAGS_PRECODING_DISABLE 0x4000000UL __le32 enables; #define PORT_PHY_CFG_REQ_ENABLES_AUTO_MODE 0x1UL #define PORT_PHY_CFG_REQ_ENABLES_AUTO_DUPLEX 0x2UL @@ -4703,6 +4801,8 @@ struct hwrm_port_phy_qcfg_output { #define PORT_PHY_QCFG_RESP_OPTION_FLAGS_MEDIA_AUTO_DETECT 0x1UL #define PORT_PHY_QCFG_RESP_OPTION_FLAGS_SIGNAL_MODE_KNOWN 0x2UL #define PORT_PHY_QCFG_RESP_OPTION_FLAGS_SPEEDS2_SUPPORTED 0x4UL + #define PORT_PHY_QCFG_RESP_OPTION_FLAGS_LINK_TRAINING 0x8UL + #define PORT_PHY_QCFG_RESP_OPTION_FLAGS_PRECODING 0x10UL char phy_vendor_name[16]; char phy_vendor_partnumber[16]; __le16 support_pam4_speeds; @@ -4725,6 +4825,10 @@ struct hwrm_port_phy_qcfg_output { u8 link_down_reason; #define PORT_PHY_QCFG_RESP_LINK_DOWN_REASON_RF 0x1UL #define PORT_PHY_QCFG_RESP_LINK_DOWN_REASON_OTP_SPEED_VIOLATION 0x2UL + #define PORT_PHY_QCFG_RESP_LINK_DOWN_REASON_CABLE_REMOVED 0x4UL + #define PORT_PHY_QCFG_RESP_LINK_DOWN_REASON_MODULE_FAULT 0x8UL + #define PORT_PHY_QCFG_RESP_LINK_DOWN_REASON_BMC_REQUEST 0x10UL + #define PORT_PHY_QCFG_RESP_LINK_DOWN_REASON_TX_LASER_DISABLED 0x20UL __le16 support_speeds2; #define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS2_1GB 0x1UL #define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS2_10GB 0x2UL @@ -5882,9 +5986,10 @@ struct hwrm_port_led_qcaps_output { #define PORT_LED_QCAPS_RESP_LED0_STATE_CAPS_BLINK_SUPPORTED 0x8UL #define PORT_LED_QCAPS_RESP_LED0_STATE_CAPS_BLINK_ALT_SUPPORTED 0x10UL __le16 led0_color_caps; - #define PORT_LED_QCAPS_RESP_LED0_COLOR_CAPS_RSVD 0x1UL - #define PORT_LED_QCAPS_RESP_LED0_COLOR_CAPS_AMBER_SUPPORTED 0x2UL - #define PORT_LED_QCAPS_RESP_LED0_COLOR_CAPS_GREEN_SUPPORTED 0x4UL + #define PORT_LED_QCAPS_RESP_LED0_COLOR_CAPS_RSVD 0x1UL + #define PORT_LED_QCAPS_RESP_LED0_COLOR_CAPS_AMBER_SUPPORTED 0x2UL + #define PORT_LED_QCAPS_RESP_LED0_COLOR_CAPS_GREEN_SUPPORTED 0x4UL + #define PORT_LED_QCAPS_RESP_LED0_COLOR_CAPS_GRNAMB_SUPPORTED 0x8UL u8 led1_id; u8 led1_type; #define PORT_LED_QCAPS_RESP_LED1_TYPE_SPEED 0x0UL @@ -5900,9 +6005,10 @@ struct hwrm_port_led_qcaps_output { #define PORT_LED_QCAPS_RESP_LED1_STATE_CAPS_BLINK_SUPPORTED 0x8UL #define PORT_LED_QCAPS_RESP_LED1_STATE_CAPS_BLINK_ALT_SUPPORTED 0x10UL __le16 led1_color_caps; - #define PORT_LED_QCAPS_RESP_LED1_COLOR_CAPS_RSVD 0x1UL - #define PORT_LED_QCAPS_RESP_LED1_COLOR_CAPS_AMBER_SUPPORTED 0x2UL - #define PORT_LED_QCAPS_RESP_LED1_COLOR_CAPS_GREEN_SUPPORTED 0x4UL + #define PORT_LED_QCAPS_RESP_LED1_COLOR_CAPS_RSVD 0x1UL + #define PORT_LED_QCAPS_RESP_LED1_COLOR_CAPS_AMBER_SUPPORTED 0x2UL + #define PORT_LED_QCAPS_RESP_LED1_COLOR_CAPS_GREEN_SUPPORTED 0x4UL + #define PORT_LED_QCAPS_RESP_LED1_COLOR_CAPS_GRNAMB_SUPPORTED 0x8UL u8 led2_id; u8 led2_type; #define PORT_LED_QCAPS_RESP_LED2_TYPE_SPEED 0x0UL @@ -5918,9 +6024,10 @@ struct hwrm_port_led_qcaps_output { #define PORT_LED_QCAPS_RESP_LED2_STATE_CAPS_BLINK_SUPPORTED 0x8UL #define PORT_LED_QCAPS_RESP_LED2_STATE_CAPS_BLINK_ALT_SUPPORTED 0x10UL __le16 led2_color_caps; - #define PORT_LED_QCAPS_RESP_LED2_COLOR_CAPS_RSVD 0x1UL - #define PORT_LED_QCAPS_RESP_LED2_COLOR_CAPS_AMBER_SUPPORTED 0x2UL - #define PORT_LED_QCAPS_RESP_LED2_COLOR_CAPS_GREEN_SUPPORTED 0x4UL + #define PORT_LED_QCAPS_RESP_LED2_COLOR_CAPS_RSVD 0x1UL + #define PORT_LED_QCAPS_RESP_LED2_COLOR_CAPS_AMBER_SUPPORTED 0x2UL + #define PORT_LED_QCAPS_RESP_LED2_COLOR_CAPS_GREEN_SUPPORTED 0x4UL + #define PORT_LED_QCAPS_RESP_LED2_COLOR_CAPS_GRNAMB_SUPPORTED 0x8UL u8 led3_id; u8 led3_type; #define PORT_LED_QCAPS_RESP_LED3_TYPE_SPEED 0x0UL @@ -5936,9 +6043,10 @@ struct hwrm_port_led_qcaps_output { #define PORT_LED_QCAPS_RESP_LED3_STATE_CAPS_BLINK_SUPPORTED 0x8UL #define PORT_LED_QCAPS_RESP_LED3_STATE_CAPS_BLINK_ALT_SUPPORTED 0x10UL __le16 led3_color_caps; - #define PORT_LED_QCAPS_RESP_LED3_COLOR_CAPS_RSVD 0x1UL - #define PORT_LED_QCAPS_RESP_LED3_COLOR_CAPS_AMBER_SUPPORTED 0x2UL - #define PORT_LED_QCAPS_RESP_LED3_COLOR_CAPS_GREEN_SUPPORTED 0x4UL + #define PORT_LED_QCAPS_RESP_LED3_COLOR_CAPS_RSVD 0x1UL + #define PORT_LED_QCAPS_RESP_LED3_COLOR_CAPS_AMBER_SUPPORTED 0x2UL + #define PORT_LED_QCAPS_RESP_LED3_COLOR_CAPS_GREEN_SUPPORTED 0x4UL + #define PORT_LED_QCAPS_RESP_LED3_COLOR_CAPS_GRNAMB_SUPPORTED 0x8UL u8 unused_4[3]; u8 valid; }; @@ -7036,9 +7144,22 @@ struct hwrm_vnic_rss_cfg_output { /* hwrm_vnic_rss_cfg_cmd_err (size:64b/8B) */ struct hwrm_vnic_rss_cfg_cmd_err { u8 code; - #define VNIC_RSS_CFG_CMD_ERR_CODE_UNKNOWN 0x0UL - #define VNIC_RSS_CFG_CMD_ERR_CODE_INTERFACE_NOT_READY 0x1UL - #define VNIC_RSS_CFG_CMD_ERR_CODE_LAST VNIC_RSS_CFG_CMD_ERR_CODE_INTERFACE_NOT_READY + #define VNIC_RSS_CFG_CMD_ERR_CODE_UNKNOWN 0x0UL + #define VNIC_RSS_CFG_CMD_ERR_CODE_INTERFACE_NOT_READY 0x1UL + #define VNIC_RSS_CFG_CMD_ERR_CODE_UNABLE_TO_GET_RSS_CFG 0x2UL + #define VNIC_RSS_CFG_CMD_ERR_CODE_HASH_TYPE_UNSUPPORTED 0x3UL + #define VNIC_RSS_CFG_CMD_ERR_CODE_HASH_TYPE_ERR 0x4UL + #define VNIC_RSS_CFG_CMD_ERR_CODE_HASH_MODE_FAIL 0x5UL + #define VNIC_RSS_CFG_CMD_ERR_CODE_RING_GRP_TABLE_ALLOC_ERR 0x6UL + #define VNIC_RSS_CFG_CMD_ERR_CODE_HASH_KEY_ALLOC_ERR 0x7UL + #define VNIC_RSS_CFG_CMD_ERR_CODE_DMA_FAILED 0x8UL + #define VNIC_RSS_CFG_CMD_ERR_CODE_RX_RING_ALLOC_ERR 0x9UL + #define VNIC_RSS_CFG_CMD_ERR_CODE_CMPL_RING_ALLOC_ERR 0xaUL + #define VNIC_RSS_CFG_CMD_ERR_CODE_HW_SET_RSS_FAILED 0xbUL + #define VNIC_RSS_CFG_CMD_ERR_CODE_CTX_INVALID 0xcUL + #define VNIC_RSS_CFG_CMD_ERR_CODE_VNIC_INVALID 0xdUL + #define VNIC_RSS_CFG_CMD_ERR_CODE_VNIC_RING_TABLE_PAIR_INVALID 0xeUL + #define VNIC_RSS_CFG_CMD_ERR_CODE_LAST VNIC_RSS_CFG_CMD_ERR_CODE_VNIC_RING_TABLE_PAIR_INVALID u8 unused_0[7]; }; @@ -7177,7 +7298,7 @@ struct hwrm_vnic_rss_cos_lb_ctx_free_output { u8 valid; }; -/* hwrm_ring_alloc_input (size:704b/88B) */ +/* hwrm_ring_alloc_input (size:768b/96B) */ struct hwrm_ring_alloc_input { __le16 req_type; __le16 cmpl_ring; @@ -7195,6 +7316,7 @@ struct hwrm_ring_alloc_input { #define RING_ALLOC_REQ_ENABLES_MPC_CHNLS_TYPE 0x400UL #define RING_ALLOC_REQ_ENABLES_STEERING_TAG_VALID 0x800UL #define RING_ALLOC_REQ_ENABLES_RX_RATE_PROFILE_VALID 0x1000UL + #define RING_ALLOC_REQ_ENABLES_DPI_VALID 0x2000UL u8 ring_type; #define RING_ALLOC_REQ_RING_TYPE_L2_CMPL 0x0UL #define RING_ALLOC_REQ_RING_TYPE_TX 0x1UL @@ -7287,6 +7409,8 @@ struct hwrm_ring_alloc_input { #define RING_ALLOC_REQ_RX_RATE_PROFILE_SEL_LAST RING_ALLOC_REQ_RX_RATE_PROFILE_SEL_POLL_MODE u8 unused_4; __le64 cq_handle; + __le16 dpi; + __le16 unused_5[3]; }; /* hwrm_ring_alloc_output (size:128b/16B) */ @@ -7776,7 +7900,10 @@ struct hwrm_cfa_l2_set_rx_mask_cmd_err { u8 code; #define CFA_L2_SET_RX_MASK_CMD_ERR_CODE_UNKNOWN 0x0UL #define CFA_L2_SET_RX_MASK_CMD_ERR_CODE_NTUPLE_FILTER_CONFLICT_ERR 0x1UL - #define CFA_L2_SET_RX_MASK_CMD_ERR_CODE_LAST CFA_L2_SET_RX_MASK_CMD_ERR_CODE_NTUPLE_FILTER_CONFLICT_ERR + #define CFA_L2_SET_RX_MASK_CMD_ERR_CODE_MAX_VLAN_TAGS 0x2UL + #define CFA_L2_SET_RX_MASK_CMD_ERR_CODE_INVALID_VNIC_ID 0x3UL + #define CFA_L2_SET_RX_MASK_CMD_ERR_CODE_INVALID_ACTION 0x4UL + #define CFA_L2_SET_RX_MASK_CMD_ERR_CODE_LAST CFA_L2_SET_RX_MASK_CMD_ERR_CODE_INVALID_ACTION u8 unused_0[7]; }; @@ -8109,9 +8236,38 @@ struct hwrm_cfa_ntuple_filter_alloc_output { /* hwrm_cfa_ntuple_filter_alloc_cmd_err (size:64b/8B) */ struct hwrm_cfa_ntuple_filter_alloc_cmd_err { u8 code; - #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_UNKNOWN 0x0UL - #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_RX_MASK_VLAN_CONFLICT_ERR 0x1UL - #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_LAST CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_RX_MASK_VLAN_CONFLICT_ERR + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_UNKNOWN 0x0UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_ZERO_MAC 0x65UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_BC_MC_MAC 0x66UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_VNIC 0x67UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_PF_FID 0x68UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_L2_CTXT_ID 0x69UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_NULL_L2_CTXT_CFG 0x6aUL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_NULL_L2_DATA_FLD 0x6bUL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_CFA_LAYOUT 0x6cUL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_L2_CTXT_ALLOC_FAIL 0x6dUL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_ROCE_FLOW_ERR 0x6eUL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_OWNER_FID 0x6fUL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_ZERO_REF_CNT 0x70UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_FLOW_TYPE 0x71UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_IVLAN 0x72UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_MAX_VLAN_ID 0x73UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_TNL_REQ 0x74UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_L2_ADDR 0x75UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_L2_IVLAN 0x76UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_L3_ADDR 0x77UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_L3_ADDR_TYPE 0x78UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_T_L3_ADDR_TYPE 0x79UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_DST_VNIC_ID 0x7aUL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_VNI 0x7bUL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_DST_ID 0x7cUL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_FAIL_ROCE_L2_FLOW 0x7dUL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_NPAR_VLAN 0x7eUL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_ATSP_ADD 0x7fUL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_DFLT_VLAN_FAIL 0x80UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_L3_TYPE 0x81UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_VAL_FAIL_TNL_FLOW 0x82UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_LAST CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_VAL_FAIL_TNL_FLOW u8 unused_0[7]; }; @@ -9181,7 +9337,7 @@ struct pcie_ctx_hw_stats { __le64 pcie_recovery_histogram; }; -/* pcie_ctx_hw_stats_v2 (size:4096b/512B) */ +/* pcie_ctx_hw_stats_v2 (size:4544b/568B) */ struct pcie_ctx_hw_stats_v2 { __le64 pcie_pl_signal_integrity; __le64 pcie_dl_signal_integrity; @@ -9212,6 +9368,9 @@ struct pcie_ctx_hw_stats_v2 { __le64 pcie_other_packet_count; __le64 pcie_blocked_packet_count; __le64 pcie_cmpl_packet_count; + __le32 pcie_rd_latency_histogram[12]; + __le32 pcie_rd_latency_all_normal_count; + __le32 unused_2; }; /* hwrm_stat_generic_qstats_input (size:256b/32B) */ @@ -9406,7 +9565,8 @@ struct hwrm_struct_hdr { #define STRUCT_HDR_STRUCT_ID_MSIX_PER_VF 0xc8UL #define STRUCT_HDR_STRUCT_ID_UDCC_RTT_BUCKET_COUNT 0x12cUL #define STRUCT_HDR_STRUCT_ID_UDCC_RTT_BUCKET_BOUND 0x12dUL - #define STRUCT_HDR_STRUCT_ID_LAST STRUCT_HDR_STRUCT_ID_UDCC_RTT_BUCKET_BOUND + #define STRUCT_HDR_STRUCT_ID_DBG_TOKEN_CLAIMS 0x190UL + #define STRUCT_HDR_STRUCT_ID_LAST STRUCT_HDR_STRUCT_ID_DBG_TOKEN_CLAIMS __le16 len; u8 version; #define STRUCT_HDR_VERSION_0 0x0UL @@ -9459,11 +9619,13 @@ struct hwrm_fw_set_structured_data_output { /* hwrm_fw_set_structured_data_cmd_err (size:64b/8B) */ struct hwrm_fw_set_structured_data_cmd_err { u8 code; - #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_UNKNOWN 0x0UL - #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_HDR_CNT 0x1UL - #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_FMT 0x2UL - #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_ID 0x3UL - #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_LAST FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_ID + #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_UNKNOWN 0x0UL + #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_HDR_CNT 0x1UL + #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_FMT 0x2UL + #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_ID 0x3UL + #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_ALREADY_ADDED 0x4UL + #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_INST_IN_PROG 0x5UL + #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_LAST FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_INST_IN_PROG u8 unused_0[7]; }; @@ -9487,7 +9649,9 @@ struct hwrm_fw_get_structured_data_input { #define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_NON_TPMR_PEER 0x201UL #define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_NON_TPMR_OPERATIONAL 0x202UL #define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_HOST_OPERATIONAL 0x300UL - #define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_LAST FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_HOST_OPERATIONAL + #define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_CLAIMS_SUPPORTED 0x320UL + #define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_CLAIMS_ACTIVE 0x321UL + #define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_LAST FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_CLAIMS_ACTIVE u8 count; u8 unused_0; }; @@ -10172,7 +10336,8 @@ struct hwrm_dbg_log_buffer_flush_input { #define DBG_LOG_BUFFER_FLUSH_REQ_TYPE_CA2_TRACE 0x9UL #define DBG_LOG_BUFFER_FLUSH_REQ_TYPE_RIGP1_TRACE 0xaUL #define DBG_LOG_BUFFER_FLUSH_REQ_TYPE_AFM_KONG_HWRM_TRACE 0xbUL - #define DBG_LOG_BUFFER_FLUSH_REQ_TYPE_LAST DBG_LOG_BUFFER_FLUSH_REQ_TYPE_AFM_KONG_HWRM_TRACE + #define DBG_LOG_BUFFER_FLUSH_REQ_TYPE_ERR_QPC_TRACE 0xcUL + #define DBG_LOG_BUFFER_FLUSH_REQ_TYPE_LAST DBG_LOG_BUFFER_FLUSH_REQ_TYPE_ERR_QPC_TRACE u8 unused_1[2]; __le32 flags; #define DBG_LOG_BUFFER_FLUSH_REQ_FLAGS_FLUSH_ALL_BUFFERS 0x1UL @@ -10295,10 +10460,15 @@ struct hwrm_nvm_write_output { /* hwrm_nvm_write_cmd_err (size:64b/8B) */ struct hwrm_nvm_write_cmd_err { u8 code; - #define NVM_WRITE_CMD_ERR_CODE_UNKNOWN 0x0UL - #define NVM_WRITE_CMD_ERR_CODE_FRAG_ERR 0x1UL - #define NVM_WRITE_CMD_ERR_CODE_NO_SPACE 0x2UL - #define NVM_WRITE_CMD_ERR_CODE_LAST NVM_WRITE_CMD_ERR_CODE_NO_SPACE + #define NVM_WRITE_CMD_ERR_CODE_UNKNOWN 0x0UL + #define NVM_WRITE_CMD_ERR_CODE_FRAG_ERR 0x1UL + #define NVM_WRITE_CMD_ERR_CODE_NO_SPACE 0x2UL + #define NVM_WRITE_CMD_ERR_CODE_WRITE_FAILED 0x3UL + #define NVM_WRITE_CMD_ERR_CODE_REQD_ERASE_FAILED 0x4UL + #define NVM_WRITE_CMD_ERR_CODE_VERIFY_FAILED 0x5UL + #define NVM_WRITE_CMD_ERR_CODE_INVALID_HEADER 0x6UL + #define NVM_WRITE_CMD_ERR_CODE_UPDATE_DIGEST_FAILED 0x7UL + #define NVM_WRITE_CMD_ERR_CODE_LAST NVM_WRITE_CMD_ERR_CODE_UPDATE_DIGEST_FAILED u8 unused_0[7]; }; @@ -10438,7 +10608,11 @@ struct hwrm_nvm_get_dev_info_output { __le16 srt2_fw_minor; __le16 srt2_fw_build; __le16 srt2_fw_patch; - u8 unused_0[7]; + u8 security_soc_fw_major; + u8 security_soc_fw_minor; + u8 security_soc_fw_build; + u8 security_soc_fw_patch; + u8 unused_0[3]; u8 valid; }; @@ -10568,7 +10742,9 @@ struct hwrm_nvm_install_update_cmd_err { #define NVM_INSTALL_UPDATE_CMD_ERR_CODE_NO_SPACE 0x2UL #define NVM_INSTALL_UPDATE_CMD_ERR_CODE_ANTI_ROLLBACK 0x3UL #define NVM_INSTALL_UPDATE_CMD_ERR_CODE_NO_VOLTREG_SUPPORT 0x4UL - #define NVM_INSTALL_UPDATE_CMD_ERR_CODE_LAST NVM_INSTALL_UPDATE_CMD_ERR_CODE_NO_VOLTREG_SUPPORT + #define NVM_INSTALL_UPDATE_CMD_ERR_CODE_DEFRAG_FAILED 0x5UL + #define NVM_INSTALL_UPDATE_CMD_ERR_CODE_UNKNOWN_DIR_ERR 0x6UL + #define NVM_INSTALL_UPDATE_CMD_ERR_CODE_LAST NVM_INSTALL_UPDATE_CMD_ERR_CODE_UNKNOWN_DIR_ERR u8 unused_0[7]; }; @@ -10591,7 +10767,8 @@ struct hwrm_nvm_get_variable_input { __le16 index_2; __le16 index_3; u8 flags; - #define NVM_GET_VARIABLE_REQ_FLAGS_FACTORY_DFLT 0x1UL + #define NVM_GET_VARIABLE_REQ_FLAGS_FACTORY_DFLT 0x1UL + #define NVM_GET_VARIABLE_REQ_FLAGS_VALIDATE_OPT_VALUE 0x2UL u8 unused_0; }; @@ -10606,18 +10783,25 @@ struct hwrm_nvm_get_variable_output { #define NVM_GET_VARIABLE_RESP_OPTION_NUM_RSVD_0 0x0UL #define NVM_GET_VARIABLE_RESP_OPTION_NUM_RSVD_FFFF 0xffffUL #define NVM_GET_VARIABLE_RESP_OPTION_NUM_LAST NVM_GET_VARIABLE_RESP_OPTION_NUM_RSVD_FFFF - u8 unused_0[3]; + u8 flags; + #define NVM_GET_VARIABLE_RESP_FLAGS_VALIDATE_OPT_VALUE 0x1UL + u8 unused_0[2]; u8 valid; }; /* hwrm_nvm_get_variable_cmd_err (size:64b/8B) */ struct hwrm_nvm_get_variable_cmd_err { u8 code; - #define NVM_GET_VARIABLE_CMD_ERR_CODE_UNKNOWN 0x0UL - #define NVM_GET_VARIABLE_CMD_ERR_CODE_VAR_NOT_EXIST 0x1UL - #define NVM_GET_VARIABLE_CMD_ERR_CODE_CORRUPT_VAR 0x2UL - #define NVM_GET_VARIABLE_CMD_ERR_CODE_LEN_TOO_SHORT 0x3UL - #define NVM_GET_VARIABLE_CMD_ERR_CODE_LAST NVM_GET_VARIABLE_CMD_ERR_CODE_LEN_TOO_SHORT + #define NVM_GET_VARIABLE_CMD_ERR_CODE_UNKNOWN 0x0UL + #define NVM_GET_VARIABLE_CMD_ERR_CODE_VAR_NOT_EXIST 0x1UL + #define NVM_GET_VARIABLE_CMD_ERR_CODE_CORRUPT_VAR 0x2UL + #define NVM_GET_VARIABLE_CMD_ERR_CODE_LEN_TOO_SHORT 0x3UL + #define NVM_GET_VARIABLE_CMD_ERR_CODE_INDEX_INVALID 0x4UL + #define NVM_GET_VARIABLE_CMD_ERR_CODE_ACCESS_DENIED 0x5UL + #define NVM_GET_VARIABLE_CMD_ERR_CODE_CB_FAILED 0x6UL + #define NVM_GET_VARIABLE_CMD_ERR_CODE_INVALID_DATA_LEN 0x7UL + #define NVM_GET_VARIABLE_CMD_ERR_CODE_NO_MEM 0x8UL + #define NVM_GET_VARIABLE_CMD_ERR_CODE_LAST NVM_GET_VARIABLE_CMD_ERR_CODE_NO_MEM u8 unused_0[7]; }; @@ -10667,10 +10851,17 @@ struct hwrm_nvm_set_variable_output { /* hwrm_nvm_set_variable_cmd_err (size:64b/8B) */ struct hwrm_nvm_set_variable_cmd_err { u8 code; - #define NVM_SET_VARIABLE_CMD_ERR_CODE_UNKNOWN 0x0UL - #define NVM_SET_VARIABLE_CMD_ERR_CODE_VAR_NOT_EXIST 0x1UL - #define NVM_SET_VARIABLE_CMD_ERR_CODE_CORRUPT_VAR 0x2UL - #define NVM_SET_VARIABLE_CMD_ERR_CODE_LAST NVM_SET_VARIABLE_CMD_ERR_CODE_CORRUPT_VAR + #define NVM_SET_VARIABLE_CMD_ERR_CODE_UNKNOWN 0x0UL + #define NVM_SET_VARIABLE_CMD_ERR_CODE_VAR_NOT_EXIST 0x1UL + #define NVM_SET_VARIABLE_CMD_ERR_CODE_CORRUPT_VAR 0x2UL + #define NVM_SET_VARIABLE_CMD_ERR_CODE_LEN_TOO_SHORT 0x3UL + #define NVM_SET_VARIABLE_CMD_ERR_CODE_ACTION_NOT_SUPPORTED 0x4UL + #define NVM_SET_VARIABLE_CMD_ERR_CODE_INDEX_INVALID 0x5UL + #define NVM_SET_VARIABLE_CMD_ERR_CODE_ACCESS_DENIED 0x6UL + #define NVM_SET_VARIABLE_CMD_ERR_CODE_CB_FAILED 0x7UL + #define NVM_SET_VARIABLE_CMD_ERR_CODE_INVALID_DATA_LEN 0x8UL + #define NVM_SET_VARIABLE_CMD_ERR_CODE_NO_MEM 0x9UL + #define NVM_SET_VARIABLE_CMD_ERR_CODE_LAST NVM_SET_VARIABLE_CMD_ERR_CODE_NO_MEM u8 unused_0[7]; }; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index cc700925b802..ec527b476dba 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -767,12 +767,15 @@ enum bpf_type_flag { */ MEM_WRITE = BIT(18 + BPF_BASE_TYPE_BITS), + /* DYNPTR points to skb_metadata_end()-skb_metadata_len() */ + DYNPTR_TYPE_SKB_META = BIT(19 + BPF_BASE_TYPE_BITS), + __BPF_TYPE_FLAG_MAX, __BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1, }; #define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF | DYNPTR_TYPE_SKB \ - | DYNPTR_TYPE_XDP) + | DYNPTR_TYPE_XDP | DYNPTR_TYPE_SKB_META) /* Max number of base types. */ #define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS) @@ -1358,6 +1361,8 @@ enum bpf_dynptr_type { BPF_DYNPTR_TYPE_SKB, /* Underlying data is a xdp_buff */ BPF_DYNPTR_TYPE_XDP, + /* Points to skb_metadata_end()-skb_metadata_len() */ + BPF_DYNPTR_TYPE_SKB_META, }; int bpf_dynptr_check_size(u32 size); diff --git a/include/linux/filter.h b/include/linux/filter.h index 1e7fd3ee759e..9ed21b65e2e9 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1784,6 +1784,7 @@ int __bpf_xdp_store_bytes(struct xdp_buff *xdp, u32 offset, void *buf, u32 len); void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len); void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, void *buf, unsigned long len, bool flush); +void *bpf_skb_meta_pointer(struct sk_buff *skb, u32 offset); #else /* CONFIG_NET */ static inline int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len) @@ -1818,6 +1819,11 @@ static inline void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, voi unsigned long len, bool flush) { } + +static inline void *bpf_skb_meta_pointer(struct sk_buff *skb, u32 offset) +{ + return NULL; +} #endif /* CONFIG_NET */ #endif /* __LINUX_FILTER_H__ */ diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h index ff3beda1312c..db45d6f1c4f4 100644 --- a/include/linux/if_pppox.h +++ b/include/linux/if_pppox.h @@ -43,7 +43,7 @@ struct pppox_sock { /* struct sock must be the first member of pppox_sock */ struct sock sk; struct ppp_channel chan; - struct pppox_sock *next; /* for hash table */ + struct pppox_sock __rcu *next; /* for hash table */ union { struct pppoe_opt pppoe; struct pptp_opt pptp; diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index a9033696b0aa..704fd415c2b4 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -24,9 +24,6 @@ struct inet_diag_handler { bool net_admin, struct sk_buff *skb); - size_t (*idiag_get_aux_size)(struct sock *sk, - bool net_admin); - int (*destroy)(struct sk_buff *in_skb, const struct inet_diag_req_v2 *req); @@ -41,6 +38,11 @@ struct inet_diag_dump_data { #define inet_diag_nla_bpf_stgs req_nlas[INET_DIAG_REQ_SK_BPF_STORAGES] struct bpf_sk_storage_diag *bpf_stg_diag; + bool mark_needed; /* INET_DIAG_BC_MARK_COND present. */ +#ifdef CONFIG_SOCK_CGROUP_DATA + bool cgroup_needed; /* INET_DIAG_BC_CGROUP_COND present. */ +#endif + bool userlocks_needed; /* INET_DIAG_BC_AUTO present. */ }; struct inet_connection_sock; @@ -48,18 +50,8 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *req, u16 nlmsg_flags, bool net_admin); -void inet_diag_dump_icsk(struct inet_hashinfo *h, struct sk_buff *skb, - struct netlink_callback *cb, - const struct inet_diag_req_v2 *r); -int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, - struct netlink_callback *cb, - const struct inet_diag_req_v2 *req); - -struct sock *inet_diag_find_one_icsk(struct net *net, - struct inet_hashinfo *hashinfo, - const struct inet_diag_req_v2 *req); -int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk); +int inet_diag_bc_sk(const struct inet_diag_dump_data *cb_data, struct sock *sk); void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk); diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index bc6ec2959173..261d02efb615 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -295,7 +295,7 @@ struct raw6_sock { __u32 offset; /* checksum offset */ struct icmp6_filter filter; __u32 ip6mr_table; - + struct socket_drop_counters drop_counters; struct ipv6_pinfo inet6; }; diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 785173aa0739..fb27e3d2fdac 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1596,14 +1596,16 @@ static inline void mem_cgroup_flush_foreign(struct bdi_writeback *wb) #endif /* CONFIG_CGROUP_WRITEBACK */ struct sock; -bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages, - gfp_t gfp_mask); -void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages); #ifdef CONFIG_MEMCG extern struct static_key_false memcg_sockets_enabled_key; #define mem_cgroup_sockets_enabled static_branch_unlikely(&memcg_sockets_enabled_key) + void mem_cgroup_sk_alloc(struct sock *sk); void mem_cgroup_sk_free(struct sock *sk); +void mem_cgroup_sk_inherit(const struct sock *sk, struct sock *newsk); +bool mem_cgroup_sk_charge(const struct sock *sk, unsigned int nr_pages, + gfp_t gfp_mask); +void mem_cgroup_sk_uncharge(const struct sock *sk, unsigned int nr_pages); #if BITS_PER_LONG < 64 static inline void mem_cgroup_set_socket_pressure(struct mem_cgroup *memcg) @@ -1640,32 +1642,37 @@ static inline u64 mem_cgroup_get_socket_pressure(struct mem_cgroup *memcg) } #endif -static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) -{ -#ifdef CONFIG_MEMCG_V1 - if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) - return !!memcg->tcpmem_pressure; -#endif /* CONFIG_MEMCG_V1 */ - do { - if (time_before64(get_jiffies_64(), mem_cgroup_get_socket_pressure(memcg))) - return true; - } while ((memcg = parent_mem_cgroup(memcg))); - return false; -} - int alloc_shrinker_info(struct mem_cgroup *memcg); void free_shrinker_info(struct mem_cgroup *memcg); void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id); void reparent_shrinker_deferred(struct mem_cgroup *memcg); #else #define mem_cgroup_sockets_enabled 0 -static inline void mem_cgroup_sk_alloc(struct sock *sk) { }; -static inline void mem_cgroup_sk_free(struct sock *sk) { }; -static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) + +static inline void mem_cgroup_sk_alloc(struct sock *sk) +{ +} + +static inline void mem_cgroup_sk_free(struct sock *sk) +{ +} + +static inline void mem_cgroup_sk_inherit(const struct sock *sk, struct sock *newsk) +{ +} + +static inline bool mem_cgroup_sk_charge(const struct sock *sk, + unsigned int nr_pages, + gfp_t gfp_mask) { return false; } +static inline void mem_cgroup_sk_uncharge(const struct sock *sk, + unsigned int nr_pages) +{ +} + static inline void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id) { diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h index 9af01bdd86d2..ca691641788b 100644 --- a/include/linux/micrel_phy.h +++ b/include/linux/micrel_phy.h @@ -32,6 +32,7 @@ #define PHY_ID_LAN8814 0x00221660 #define PHY_ID_LAN8804 0x00221670 #define PHY_ID_LAN8841 0x00221650 +#define PHY_ID_LAN8842 0x002216C0 #define PHY_ID_KSZ886X 0x00221430 #define PHY_ID_KSZ8863 0x00221435 diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 9d2467f982ad..72a83666e67f 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1248,6 +1248,7 @@ enum mlx5_cap_type { MLX5_CAP_IPSEC, MLX5_CAP_CRYPTO = 0x1a, MLX5_CAP_SHAMPO = 0x1d, + MLX5_CAP_PSP = 0x1e, MLX5_CAP_MACSEC = 0x1f, MLX5_CAP_GENERAL_2 = 0x20, MLX5_CAP_PORT_SELECTION = 0x25, @@ -1487,6 +1488,9 @@ enum mlx5_qcam_feature_groups { #define MLX5_CAP_SHAMPO(mdev, cap) \ MLX5_GET(shampo_cap, mdev->caps.hca[MLX5_CAP_SHAMPO]->cur, cap) +#define MLX5_CAP_PSP(mdev, cap)\ + MLX5_GET(psp_cap, (mdev)->caps.hca[MLX5_CAP_PSP]->cur, cap) + enum { MLX5_CMD_STAT_OK = 0x0, MLX5_CMD_STAT_INT_ERR = 0x1, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 8360d9011d4f..e9f14a0c7f4f 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -189,6 +189,9 @@ enum { MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS = 0x727, MLX5_CMD_OP_RELEASE_XRQ_ERROR = 0x729, MLX5_CMD_OP_MODIFY_XRQ = 0x72a, + MLX5_CMD_OPCODE_QUERY_DELEGATED_VHCA = 0x732, + MLX5_CMD_OPCODE_CREATE_ESW_VPORT = 0x733, + MLX5_CMD_OPCODE_DESTROY_ESW_VPORT = 0x734, MLX5_CMD_OP_QUERY_ESW_FUNCTIONS = 0x740, MLX5_CMD_OP_QUERY_VPORT_STATE = 0x750, MLX5_CMD_OP_MODIFY_VPORT_STATE = 0x751, @@ -311,6 +314,8 @@ enum { MLX5_CMD_OP_CREATE_UMEM = 0xa08, MLX5_CMD_OP_DESTROY_UMEM = 0xa0a, MLX5_CMD_OP_SYNC_STEERING = 0xb00, + MLX5_CMD_OP_PSP_GEN_SPI = 0xb10, + MLX5_CMD_OP_PSP_ROTATE_KEY = 0xb11, MLX5_CMD_OP_QUERY_VHCA_STATE = 0xb0d, MLX5_CMD_OP_MODIFY_VHCA_STATE = 0xb0e, MLX5_CMD_OP_SYNC_CRYPTO = 0xb12, @@ -486,12 +491,14 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 execute_aso[0x1]; u8 reserved_at_47[0x19]; - u8 reserved_at_60[0x2]; + u8 reformat_l2_to_l3_psp_tunnel[0x1]; + u8 reformat_l3_psp_tunnel_to_l2[0x1]; u8 reformat_insert[0x1]; u8 reformat_remove[0x1]; u8 macsec_encrypt[0x1]; u8 macsec_decrypt[0x1]; - u8 reserved_at_66[0x2]; + u8 psp_encrypt[0x1]; + u8 psp_decrypt[0x1]; u8 reformat_add_macsec[0x1]; u8 reformat_remove_macsec[0x1]; u8 reparse[0x1]; @@ -700,7 +707,7 @@ struct mlx5_ifc_fte_match_set_misc2_bits { u8 metadata_reg_a[0x20]; - u8 reserved_at_1a0[0x8]; + u8 psp_syndrome[0x8]; u8 macsec_syndrome[0x8]; u8 ipsec_syndrome[0x8]; u8 ipsec_next_header[0x8]; @@ -1508,6 +1515,21 @@ struct mlx5_ifc_macsec_cap_bits { u8 reserved_at_40[0x7c0]; }; +struct mlx5_ifc_psp_cap_bits { + u8 reserved_at_0[0x1]; + u8 psp_crypto_offload[0x1]; + u8 reserved_at_2[0x1]; + u8 psp_crypto_esp_aes_gcm_256_encrypt[0x1]; + u8 psp_crypto_esp_aes_gcm_128_encrypt[0x1]; + u8 psp_crypto_esp_aes_gcm_256_decrypt[0x1]; + u8 psp_crypto_esp_aes_gcm_128_decrypt[0x1]; + u8 reserved_at_7[0x4]; + u8 log_max_num_of_psp_spi[0x5]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x7e0]; +}; + enum { MLX5_WQ_TYPE_LINKED_LIST = 0x0, MLX5_WQ_TYPE_CYCLIC = 0x1, @@ -1873,7 +1895,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_2a0[0x7]; u8 mkey_pcie_tph[0x1]; - u8 reserved_at_2a8[0x3]; + u8 reserved_at_2a8[0x2]; + + u8 psp[0x1]; u8 shampo[0x1]; u8 reserved_at_2ac[0x4]; u8 max_wqe_sz_rq[0x10]; @@ -2207,7 +2231,19 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 reserved_at_440[0x8]; u8 max_num_eqs_24b[0x18]; - u8 reserved_at_460[0x3a0]; + + u8 reserved_at_460[0x160]; + + u8 query_adjacent_functions_id[0x1]; + u8 ingress_egress_esw_vport_connect[0x1]; + u8 function_id_type_vhca_id[0x1]; + u8 reserved_at_5c3[0xd]; + u8 delegate_vhca_management_profiles[0x10]; + + u8 delegated_vhca_max[0x10]; + u8 delegate_vhca_max[0x10]; + + u8 reserved_at_600[0x200]; }; enum mlx5_ifc_flow_destination_type { @@ -3788,6 +3824,7 @@ union mlx5_ifc_hca_cap_union_bits { struct mlx5_ifc_macsec_cap_bits macsec_cap; struct mlx5_ifc_crypto_cap_bits crypto_cap; struct mlx5_ifc_ipsec_cap_bits ipsec_cap; + struct mlx5_ifc_psp_cap_bits psp_cap; u8 reserved_at_0[0x8000]; }; @@ -3817,6 +3854,7 @@ enum { enum { MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_IPSEC = 0x0, MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_MACSEC = 0x1, + MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_PSP = 0x2, }; struct mlx5_ifc_vlan_bits { @@ -5159,7 +5197,9 @@ struct mlx5_ifc_set_hca_cap_in_bits { u8 other_function[0x1]; u8 ec_vf_function[0x1]; - u8 reserved_at_42[0xe]; + u8 reserved_at_42[0x1]; + u8 function_id_type[0x1]; + u8 reserved_at_44[0xc]; u8 function_id[0x10]; u8 reserved_at_60[0x20]; @@ -6357,7 +6397,9 @@ struct mlx5_ifc_query_hca_cap_in_bits { u8 other_function[0x1]; u8 ec_vf_function[0x1]; - u8 reserved_at_42[0xe]; + u8 reserved_at_42[0x1]; + u8 function_id_type[0x1]; + u8 reserved_at_44[0xc]; u8 function_id[0x10]; u8 reserved_at_60[0x20]; @@ -6983,6 +7025,28 @@ struct mlx5_ifc_query_esw_vport_context_in_bits { u8 reserved_at_60[0x20]; }; +struct mlx5_ifc_destroy_esw_vport_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x20]; +}; + +struct mlx5_ifc_destroy_esw_vport_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x10]; + u8 vport_num[0x10]; + + u8 reserved_at_60[0x20]; +}; + struct mlx5_ifc_modify_esw_vport_context_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -7118,6 +7182,8 @@ enum mlx5_reformat_ctx_type { MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT_OVER_UDP = 0xa, MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV6 = 0xb, MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_UDPV6 = 0xc, + MLX5_REFORMAT_TYPE_ADD_PSP_TUNNEL = 0xd, + MLX5_REFORMAT_TYPE_DEL_PSP_TUNNEL = 0xe, MLX5_REFORMAT_TYPE_INSERT_HDR = 0xf, MLX5_REFORMAT_TYPE_REMOVE_HDR = 0x10, MLX5_REFORMAT_TYPE_ADD_MACSEC = 0x11, @@ -7244,6 +7310,7 @@ enum { MLX5_ACTION_IN_FIELD_IPSEC_SYNDROME = 0x5D, MLX5_ACTION_IN_FIELD_OUT_EMD_47_32 = 0x6F, MLX5_ACTION_IN_FIELD_OUT_EMD_31_0 = 0x70, + MLX5_ACTION_IN_FIELD_PSP_SYNDROME = 0x71, }; struct mlx5_ifc_alloc_modify_header_context_out_bits { @@ -7484,6 +7551,85 @@ struct mlx5_ifc_query_adapter_in_bits { u8 reserved_at_40[0x40]; }; +struct mlx5_ifc_function_vhca_rid_info_reg_bits { + u8 host_number[0x8]; + u8 host_pci_device_function[0x8]; + u8 host_pci_bus[0x8]; + u8 reserved_at_18[0x3]; + u8 pci_bus_assigned[0x1]; + u8 function_type[0x4]; + + u8 parent_pci_device_function[0x8]; + u8 parent_pci_bus[0x8]; + u8 vhca_id[0x10]; + + u8 reserved_at_40[0x10]; + u8 function_id[0x10]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_delegated_function_vhca_rid_info_bits { + struct mlx5_ifc_function_vhca_rid_info_reg_bits function_vhca_rid_info; + + u8 reserved_at_80[0x18]; + u8 manage_profile[0x8]; + + u8 reserved_at_a0[0x60]; +}; + +struct mlx5_ifc_query_delegated_vhca_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x20]; + + u8 reserved_at_60[0x10]; + u8 functions_count[0x10]; + + u8 reserved_at_80[0x80]; + + struct mlx5_ifc_delegated_function_vhca_rid_info_bits + delegated_function_vhca_rid_info[]; +}; + +struct mlx5_ifc_query_delegated_vhca_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_create_esw_vport_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x20]; + + u8 reserved_at_60[0x10]; + u8 vport_num[0x10]; +}; + +struct mlx5_ifc_create_esw_vport_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x10]; + u8 managed_vhca_id[0x10]; + + u8 reserved_at_60[0x20]; +}; + struct mlx5_ifc_qp_2rst_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -7611,7 +7757,12 @@ struct mlx5_ifc_modify_vport_state_in_bits { u8 reserved_at_41[0xf]; u8 vport_number[0x10]; - u8 reserved_at_60[0x18]; + u8 reserved_at_60[0x10]; + u8 ingress_connect[0x1]; + u8 egress_connect[0x1]; + u8 ingress_connect_valid[0x1]; + u8 egress_connect_valid[0x1]; + u8 reserved_at_74[0x4]; u8 admin_state[0x4]; u8 reserved_at_7c[0x4]; }; @@ -12954,6 +13105,7 @@ enum { MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_PURPOSE_TLS = 0x1, MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_PURPOSE_IPSEC = 0x2, MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_PURPOSE_MACSEC = 0x4, + MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_PURPOSE_PSP = 0x6, }; struct mlx5_ifc_tls_static_params_bits { @@ -13371,4 +13523,64 @@ enum mlx5e_pcie_cong_event_mod_field { MLX5_PCIE_CONG_EVENT_MOD_THRESH = BIT(2), }; +struct mlx5_ifc_psp_rotate_key_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_psp_rotate_key_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +enum mlx5_psp_gen_spi_in_key_size { + MLX5_PSP_GEN_SPI_IN_KEY_SIZE_128 = 0x0, + MLX5_PSP_GEN_SPI_IN_KEY_SIZE_256 = 0x1, +}; + +struct mlx5_ifc_key_spi_bits { + u8 spi[0x20]; + + u8 reserved_at_20[0x60]; + + u8 key[8][0x20]; +}; + +struct mlx5_ifc_psp_gen_spi_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x20]; + + u8 key_size[0x2]; + u8 reserved_at_62[0xe]; + u8 num_of_spi[0x10]; +}; + +struct mlx5_ifc_psp_gen_spi_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x10]; + u8 num_of_spi[0x10]; + + u8 reserved_at_60[0x20]; + + struct mlx5_ifc_key_spi_bits key_spi[]; +}; + #endif /* MLX5_IFC_H */ diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index c36cc6d82926..c87b9507cfa1 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -135,4 +135,6 @@ int mlx5_nic_vport_unaffiliate_multiport(struct mlx5_core_dev *port_mdev); u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev); int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 vport, void *out, u16 opmod); +int mlx5_vport_get_vhca_id(struct mlx5_core_dev *dev, u16 vport, u16 *vhca_id); + #endif /* __MLX5_VPORT_H__ */ diff --git a/include/linux/net/intel/libie/adminq.h b/include/linux/net/intel/libie/adminq.h index 012b5d499c1a..ba62f703df43 100644 --- a/include/linux/net/intel/libie/adminq.h +++ b/include/linux/net/intel/libie/adminq.h @@ -192,8 +192,9 @@ LIBIE_CHECK_STRUCT_LEN(16, libie_aqc_list_caps); #define LIBIE_AQC_CAPS_TX_SCHED_TOPO_COMP_MODE 0x0085 #define LIBIE_AQC_CAPS_NAC_TOPOLOGY 0x0087 #define LIBIE_AQC_CAPS_FW_LAG_SUPPORT 0x0092 -#define LIBIE_AQC_BIT_ROCEV2_LAG 0x01 -#define LIBIE_AQC_BIT_SRIOV_LAG 0x02 +#define LIBIE_AQC_BIT_ROCEV2_LAG BIT(0) +#define LIBIE_AQC_BIT_SRIOV_LAG BIT(1) +#define LIBIE_AQC_BIT_SRIOV_AA_LAG BIT(2) #define LIBIE_AQC_CAPS_FLEX10 0x00F1 #define LIBIE_AQC_CAPS_CEM 0x00F2 diff --git a/include/linux/phy.h b/include/linux/phy.h index bb45787d8684..04553419adc3 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1273,9 +1273,13 @@ struct phy_driver { #define to_phy_driver(d) container_of_const(to_mdio_common_driver(d), \ struct phy_driver, mdiodrv) -#define PHY_ID_MATCH_EXACT(id) .phy_id = (id), .phy_id_mask = GENMASK(31, 0) -#define PHY_ID_MATCH_MODEL(id) .phy_id = (id), .phy_id_mask = GENMASK(31, 4) -#define PHY_ID_MATCH_VENDOR(id) .phy_id = (id), .phy_id_mask = GENMASK(31, 10) +#define PHY_ID_MATCH_EXTACT_MASK GENMASK(31, 0) +#define PHY_ID_MATCH_MODEL_MASK GENMASK(31, 4) +#define PHY_ID_MATCH_VENDOR_MASK GENMASK(31, 10) + +#define PHY_ID_MATCH_EXACT(id) .phy_id = (id), .phy_id_mask = PHY_ID_MATCH_EXTACT_MASK +#define PHY_ID_MATCH_MODEL(id) .phy_id = (id), .phy_id_mask = PHY_ID_MATCH_MODEL_MASK +#define PHY_ID_MATCH_VENDOR(id) .phy_id = (id), .phy_id_mask = PHY_ID_MATCH_VENDOR_MASK /** * phy_id_compare - compare @id1 with @id2 taking account of @mask @@ -1292,6 +1296,19 @@ static inline bool phy_id_compare(u32 id1, u32 id2, u32 mask) } /** + * phy_id_compare_vendor - compare @id with @vendor mask + * @id: PHY ID + * @vendor_mask: PHY Vendor mask + * + * Return: true if the bits from @id match @vendor using the + * generic PHY Vendor mask. + */ +static inline bool phy_id_compare_vendor(u32 id, u32 vendor_mask) +{ + return phy_id_compare(id, vendor_mask, PHY_ID_MATCH_VENDOR_MASK); +} + +/** * phydev_id_compare - compare @id with the PHY's Clause 22 ID * @phydev: the PHY device * @id: the PHY ID to be matched diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h index 5399b9e41e35..6227a1bdefec 100644 --- a/include/linux/phy_fixed.h +++ b/include/linux/phy_fixed.h @@ -17,7 +17,7 @@ struct net_device; #if IS_ENABLED(CONFIG_FIXED_PHY) extern int fixed_phy_change_carrier(struct net_device *dev, bool new_carrier); -int fixed_phy_add(int phy_id, const struct fixed_phy_status *status); +void fixed_phy_add(const struct fixed_phy_status *status); struct phy_device *fixed_phy_register(const struct fixed_phy_status *status, struct device_node *np); @@ -26,11 +26,7 @@ extern int fixed_phy_set_link_update(struct phy_device *phydev, int (*link_update)(struct net_device *, struct fixed_phy_status *)); #else -static inline int fixed_phy_add(int phy_id, - const struct fixed_phy_status *status) -{ - return -ENODEV; -} +static inline void fixed_phy_add(const struct fixed_phy_status *status) {} static inline struct phy_device * fixed_phy_register(const struct fixed_phy_status *status, struct device_node *np) diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 30659b615fca..9af0411761d7 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -320,9 +320,8 @@ int mac_prepare(struct phylink_config *config, unsigned int mode, * If in 802.3z mode, the link speed is fixed, dependent on the * @state->interface. Duplex and pause modes are negotiated via * the in-band configuration word. Advertised pause modes are set - * according to the @state->an_enabled and @state->advertising - * flags. Beware of MACs which only support full duplex at gigabit - * and higher speeds. + * according to @state->advertising. Beware of MACs which only + * support full duplex at gigabit and higher speeds. * * If in Cisco SGMII mode, the link speed and duplex mode are passed * in the serial bitstream 16-bit configuration word, and the MAC @@ -331,7 +330,7 @@ int mac_prepare(struct phylink_config *config, unsigned int mode, * responsible for reading the configuration word and configuring * itself accordingly. * - * Valid state members: interface, an_enabled, pause, advertising. + * Valid state members: interface, pause, advertising. * * Implementations are expected to update the MAC to reflect the * requested settings - i.o.w., if nothing has changed between two diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 3d089bd4d5e9..7dd7951b23d5 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -361,6 +361,24 @@ extern void ptp_clock_event(struct ptp_clock *ptp, extern int ptp_clock_index(struct ptp_clock *ptp); /** + * ptp_clock_index_by_of_node() - obtain the device index of + * a PTP clock based on the PTP device of_node + * + * @np: The device of_node pointer of the PTP device. + * Return: The PHC index on success or -1 on failure. + */ +int ptp_clock_index_by_of_node(struct device_node *np); + +/** + * ptp_clock_index_by_dev() - obtain the device index of + * a PTP clock based on the PTP device. + * + * @parent: The parent device (PTP device) pointer of the PTP clock. + * Return: The PHC index on success or -1 on failure. + */ +int ptp_clock_index_by_dev(struct device *parent); + +/** * ptp_find_pin() - obtain the pin index of a given auxiliary function * * The caller must hold ptp_clock::pincfg_mux. Drivers do not have @@ -425,6 +443,10 @@ static inline void ptp_clock_event(struct ptp_clock *ptp, { } static inline int ptp_clock_index(struct ptp_clock *ptp) { return -1; } +static inline int ptp_clock_index_by_of_node(struct device_node *np) +{ return -1; } +static inline int ptp_clock_index_by_dev(struct device *parent) +{ return -1; } static inline int ptp_find_pin(struct ptp_clock *ptp, enum ptp_pin_function func, unsigned int chan) { return -1; } diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index fa633657e4c0..62e7addccdf6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1159,6 +1159,45 @@ static inline struct dst_entry *skb_dst(const struct sk_buff *skb) return (struct dst_entry *)(skb->_skb_refdst & SKB_DST_PTRMASK); } +static inline void skb_dst_check_unset(struct sk_buff *skb) +{ + DEBUG_NET_WARN_ON_ONCE((skb->_skb_refdst & SKB_DST_PTRMASK) && + !(skb->_skb_refdst & SKB_DST_NOREF)); +} + +/** + * skb_dstref_steal() - return current dst_entry value and clear it + * @skb: buffer + * + * Resets skb dst_entry without adjusting its reference count. Useful in + * cases where dst_entry needs to be temporarily reset and restored. + * Note that the returned value cannot be used directly because it + * might contain SKB_DST_NOREF bit. + * + * When in doubt, prefer skb_dst_drop() over skb_dstref_steal() to correctly + * handle dst_entry reference counting. + * + * Returns: original skb dst_entry. + */ +static inline unsigned long skb_dstref_steal(struct sk_buff *skb) +{ + unsigned long refdst = skb->_skb_refdst; + + skb->_skb_refdst = 0; + return refdst; +} + +/** + * skb_dstref_restore() - restore skb dst_entry removed via skb_dstref_steal() + * @skb: buffer + * @refdst: dst entry from a call to skb_dstref_steal() + */ +static inline void skb_dstref_restore(struct sk_buff *skb, unsigned long refdst) +{ + skb_dst_check_unset(skb); + skb->_skb_refdst = refdst; +} + /** * skb_dst_set - sets skb dst * @skb: buffer @@ -1169,6 +1208,7 @@ static inline struct dst_entry *skb_dst(const struct sk_buff *skb) */ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst) { + skb_dst_check_unset(skb); skb->slow_gro |= !!dst; skb->_skb_refdst = (unsigned long)dst; } @@ -1185,6 +1225,7 @@ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst) */ static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst) { + skb_dst_check_unset(skb); WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); skb->slow_gro |= !!dst; skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF; diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 0b9095a281b8..49847888c287 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -315,7 +315,7 @@ static inline bool sk_psock_test_state(const struct sk_psock *psock, static inline void sock_drop(struct sock *sk, struct sk_buff *skb) { - sk_drops_add(sk, skb); + sk_drops_skbadd(sk, skb); kfree_skb(skb); } diff --git a/include/linux/soc/airoha/airoha_offload.h b/include/linux/soc/airoha/airoha_offload.h new file mode 100644 index 000000000000..1dc5b4e35ef9 --- /dev/null +++ b/include/linux/soc/airoha/airoha_offload.h @@ -0,0 +1,315 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2025 AIROHA Inc + * Author: Lorenzo Bianconi <lorenzo@kernel.org> + */ +#ifndef AIROHA_OFFLOAD_H +#define AIROHA_OFFLOAD_H + +#include <linux/spinlock.h> +#include <linux/workqueue.h> + +enum { + PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED = 0x0f, +}; + +struct airoha_ppe_dev { + struct { + int (*setup_tc_block_cb)(struct airoha_ppe_dev *dev, + void *type_data); + void (*check_skb)(struct airoha_ppe_dev *dev, + struct sk_buff *skb, u16 hash, + bool rx_wlan); + } ops; + + void *priv; +}; + +#if (IS_BUILTIN(CONFIG_NET_AIROHA) || IS_MODULE(CONFIG_NET_AIROHA)) +struct airoha_ppe_dev *airoha_ppe_get_dev(struct device *dev); +void airoha_ppe_put_dev(struct airoha_ppe_dev *dev); + +static inline int airoha_ppe_dev_setup_tc_block_cb(struct airoha_ppe_dev *dev, + void *type_data) +{ + return dev->ops.setup_tc_block_cb(dev, type_data); +} + +static inline void airoha_ppe_dev_check_skb(struct airoha_ppe_dev *dev, + struct sk_buff *skb, + u16 hash, bool rx_wlan) +{ + dev->ops.check_skb(dev, skb, hash, rx_wlan); +} +#else +static inline struct airoha_ppe_dev *airoha_ppe_get_dev(struct device *dev) +{ + return NULL; +} + +static inline void airoha_ppe_put_dev(struct airoha_ppe_dev *dev) +{ +} + +static inline int airoha_ppe_setup_tc_block_cb(struct airoha_ppe_dev *dev, + void *type_data) +{ + return -EOPNOTSUPP; +} + +static inline void airoha_ppe_dev_check_skb(struct airoha_ppe_dev *dev, + struct sk_buff *skb, u16 hash, + bool rx_wlan) +{ +} +#endif + +#define NPU_NUM_CORES 8 +#define NPU_NUM_IRQ 6 +#define NPU_RX0_DESC_NUM 512 +#define NPU_RX1_DESC_NUM 512 + +/* CTRL */ +#define NPU_RX_DMA_DESC_LAST_MASK BIT(29) +#define NPU_RX_DMA_DESC_LEN_MASK GENMASK(28, 15) +#define NPU_RX_DMA_DESC_CUR_LEN_MASK GENMASK(14, 1) +#define NPU_RX_DMA_DESC_DONE_MASK BIT(0) +/* INFO */ +#define NPU_RX_DMA_PKT_COUNT_MASK GENMASK(31, 28) +#define NPU_RX_DMA_PKT_ID_MASK GENMASK(28, 26) +#define NPU_RX_DMA_SRC_PORT_MASK GENMASK(25, 21) +#define NPU_RX_DMA_CRSN_MASK GENMASK(20, 16) +#define NPU_RX_DMA_FOE_ID_MASK GENMASK(15, 0) +/* DATA */ +#define NPU_RX_DMA_SID_MASK GENMASK(31, 16) +#define NPU_RX_DMA_FRAG_TYPE_MASK GENMASK(15, 14) +#define NPU_RX_DMA_PRIORITY_MASK GENMASK(13, 10) +#define NPU_RX_DMA_RADIO_ID_MASK GENMASK(9, 6) +#define NPU_RX_DMA_VAP_ID_MASK GENMASK(5, 2) +#define NPU_RX_DMA_FRAME_TYPE_MASK GENMASK(1, 0) + +struct airoha_npu_rx_dma_desc { + u32 ctrl; + u32 info; + u32 data; + u32 addr; + u64 rsv; +} __packed; + +/* CTRL */ +#define NPU_TX_DMA_DESC_SCHED_MASK BIT(31) +#define NPU_TX_DMA_DESC_LEN_MASK GENMASK(30, 18) +#define NPU_TX_DMA_DESC_VEND_LEN_MASK GENMASK(17, 1) +#define NPU_TX_DMA_DESC_DONE_MASK BIT(0) + +#define NPU_TXWI_LEN 192 + +struct airoha_npu_tx_dma_desc { + u32 ctrl; + u32 addr; + u64 rsv; + u8 txwi[NPU_TXWI_LEN]; +} __packed; + +enum airoha_npu_wlan_set_cmd { + WLAN_FUNC_SET_WAIT_PCIE_ADDR, + WLAN_FUNC_SET_WAIT_DESC, + WLAN_FUNC_SET_WAIT_NPU_INIT_DONE, + WLAN_FUNC_SET_WAIT_TRAN_TO_CPU, + WLAN_FUNC_SET_WAIT_BA_WIN_SIZE, + WLAN_FUNC_SET_WAIT_DRIVER_MODEL, + WLAN_FUNC_SET_WAIT_DEL_STA, + WLAN_FUNC_SET_WAIT_DRAM_BA_NODE_ADDR, + WLAN_FUNC_SET_WAIT_PKT_BUF_ADDR, + WLAN_FUNC_SET_WAIT_IS_TEST_NOBA, + WLAN_FUNC_SET_WAIT_FLUSHONE_TIMEOUT, + WLAN_FUNC_SET_WAIT_FLUSHALL_TIMEOUT, + WLAN_FUNC_SET_WAIT_IS_FORCE_TO_CPU, + WLAN_FUNC_SET_WAIT_PCIE_STATE, + WLAN_FUNC_SET_WAIT_PCIE_PORT_TYPE, + WLAN_FUNC_SET_WAIT_ERROR_RETRY_TIMES, + WLAN_FUNC_SET_WAIT_BAR_INFO, + WLAN_FUNC_SET_WAIT_FAST_FLAG, + WLAN_FUNC_SET_WAIT_NPU_BAND0_ONCPU, + WLAN_FUNC_SET_WAIT_TX_RING_PCIE_ADDR, + WLAN_FUNC_SET_WAIT_TX_DESC_HW_BASE, + WLAN_FUNC_SET_WAIT_TX_BUF_SPACE_HW_BASE, + WLAN_FUNC_SET_WAIT_RX_RING_FOR_TXDONE_HW_BASE, + WLAN_FUNC_SET_WAIT_TX_PKT_BUF_ADDR, + WLAN_FUNC_SET_WAIT_INODE_TXRX_REG_ADDR, + WLAN_FUNC_SET_WAIT_INODE_DEBUG_FLAG, + WLAN_FUNC_SET_WAIT_INODE_HW_CFG_INFO, + WLAN_FUNC_SET_WAIT_INODE_STOP_ACTION, + WLAN_FUNC_SET_WAIT_INODE_PCIE_SWAP, + WLAN_FUNC_SET_WAIT_RATELIMIT_CTRL, + WLAN_FUNC_SET_WAIT_HWNAT_INIT, + WLAN_FUNC_SET_WAIT_ARHT_CHIP_INFO, + WLAN_FUNC_SET_WAIT_TX_BUF_CHECK_ADDR, + WLAN_FUNC_SET_WAIT_TOKEN_ID_SIZE, +}; + +enum airoha_npu_wlan_get_cmd { + WLAN_FUNC_GET_WAIT_NPU_INFO, + WLAN_FUNC_GET_WAIT_LAST_RATE, + WLAN_FUNC_GET_WAIT_COUNTER, + WLAN_FUNC_GET_WAIT_DBG_COUNTER, + WLAN_FUNC_GET_WAIT_RXDESC_BASE, + WLAN_FUNC_GET_WAIT_WCID_DBG_COUNTER, + WLAN_FUNC_GET_WAIT_DMA_ADDR, + WLAN_FUNC_GET_WAIT_RING_SIZE, + WLAN_FUNC_GET_WAIT_NPU_SUPPORT_MAP, + WLAN_FUNC_GET_WAIT_MDC_LOCK_ADDRESS, + WLAN_FUNC_GET_WAIT_NPU_VERSION, +}; + +struct airoha_npu { +#if (IS_BUILTIN(CONFIG_NET_AIROHA_NPU) || IS_MODULE(CONFIG_NET_AIROHA_NPU)) + struct device *dev; + struct regmap *regmap; + + struct airoha_npu_core { + struct airoha_npu *npu; + /* protect concurrent npu memory accesses */ + spinlock_t lock; + struct work_struct wdt_work; + } cores[NPU_NUM_CORES]; + + int irqs[NPU_NUM_IRQ]; + + struct airoha_foe_stats __iomem *stats; + + struct { + int (*ppe_init)(struct airoha_npu *npu); + int (*ppe_deinit)(struct airoha_npu *npu); + int (*ppe_flush_sram_entries)(struct airoha_npu *npu, + dma_addr_t foe_addr, + int sram_num_entries); + int (*ppe_foe_commit_entry)(struct airoha_npu *npu, + dma_addr_t foe_addr, + u32 entry_size, u32 hash, + bool ppe2); + int (*wlan_init_reserved_memory)(struct airoha_npu *npu); + int (*wlan_send_msg)(struct airoha_npu *npu, int ifindex, + enum airoha_npu_wlan_set_cmd func_id, + void *data, int data_len, gfp_t gfp); + int (*wlan_get_msg)(struct airoha_npu *npu, int ifindex, + enum airoha_npu_wlan_get_cmd func_id, + void *data, int data_len, gfp_t gfp); + u32 (*wlan_get_queue_addr)(struct airoha_npu *npu, int qid, + bool xmit); + void (*wlan_set_irq_status)(struct airoha_npu *npu, u32 val); + u32 (*wlan_get_irq_status)(struct airoha_npu *npu, int q); + void (*wlan_enable_irq)(struct airoha_npu *npu, int q); + void (*wlan_disable_irq)(struct airoha_npu *npu, int q); + } ops; +#endif +}; + +#if (IS_BUILTIN(CONFIG_NET_AIROHA_NPU) || IS_MODULE(CONFIG_NET_AIROHA_NPU)) +struct airoha_npu *airoha_npu_get(struct device *dev, dma_addr_t *stats_addr); +void airoha_npu_put(struct airoha_npu *npu); + +static inline int airoha_npu_wlan_init_reserved_memory(struct airoha_npu *npu) +{ + return npu->ops.wlan_init_reserved_memory(npu); +} + +static inline int airoha_npu_wlan_send_msg(struct airoha_npu *npu, + int ifindex, + enum airoha_npu_wlan_set_cmd cmd, + void *data, int data_len, gfp_t gfp) +{ + return npu->ops.wlan_send_msg(npu, ifindex, cmd, data, data_len, gfp); +} + +static inline int airoha_npu_wlan_get_msg(struct airoha_npu *npu, int ifindex, + enum airoha_npu_wlan_get_cmd cmd, + void *data, int data_len, gfp_t gfp) +{ + return npu->ops.wlan_get_msg(npu, ifindex, cmd, data, data_len, gfp); +} + +static inline u32 airoha_npu_wlan_get_queue_addr(struct airoha_npu *npu, + int qid, bool xmit) +{ + return npu->ops.wlan_get_queue_addr(npu, qid, xmit); +} + +static inline void airoha_npu_wlan_set_irq_status(struct airoha_npu *npu, + u32 val) +{ + npu->ops.wlan_set_irq_status(npu, val); +} + +static inline u32 airoha_npu_wlan_get_irq_status(struct airoha_npu *npu, int q) +{ + return npu->ops.wlan_get_irq_status(npu, q); +} + +static inline void airoha_npu_wlan_enable_irq(struct airoha_npu *npu, int q) +{ + npu->ops.wlan_enable_irq(npu, q); +} + +static inline void airoha_npu_wlan_disable_irq(struct airoha_npu *npu, int q) +{ + npu->ops.wlan_disable_irq(npu, q); +} +#else +static inline struct airoha_npu *airoha_npu_get(struct device *dev, + dma_addr_t *foe_stats_addr) +{ + return NULL; +} + +static inline void airoha_npu_put(struct airoha_npu *npu) +{ +} + +static inline int airoha_npu_wlan_init_reserved_memory(struct airoha_npu *npu) +{ + return -EOPNOTSUPP; +} + +static inline int airoha_npu_wlan_send_msg(struct airoha_npu *npu, + int ifindex, + enum airoha_npu_wlan_set_cmd cmd, + void *data, int data_len, gfp_t gfp) +{ + return -EOPNOTSUPP; +} + +static inline int airoha_npu_wlan_get_msg(struct airoha_npu *npu, int ifindex, + enum airoha_npu_wlan_get_cmd cmd, + void *data, int data_len, gfp_t gfp) +{ + return -EOPNOTSUPP; +} + +static inline u32 airoha_npu_wlan_get_queue_addr(struct airoha_npu *npu, + int qid, bool xmit) +{ + return 0; +} + +static inline void airoha_npu_wlan_set_irq_status(struct airoha_npu *npu, + u32 val) +{ +} + +static inline u32 airoha_npu_wlan_get_irq_status(struct airoha_npu *npu, + int q) +{ + return 0; +} + +static inline void airoha_npu_wlan_enable_irq(struct airoha_npu *npu, int q) +{ +} + +static inline void airoha_npu_wlan_disable_irq(struct airoha_npu *npu, int q) +{ +} +#endif + +#endif /* AIROHA_OFFLOAD_H */ diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h index d8949a4ed0dc..c4ff6bab176d 100644 --- a/include/linux/soc/mediatek/mtk_wed.h +++ b/include/linux/soc/mediatek/mtk_wed.h @@ -147,7 +147,7 @@ struct mtk_wed_device { u32 wpdma_tx; u32 wpdma_txfree; u32 wpdma_rx_glo; - u32 wpdma_rx; + u32 wpdma_rx[MTK_WED_RX_QUEUES]; u32 wpdma_rx_rro[MTK_WED_RX_QUEUES]; u32 wpdma_rx_pg; diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 26ddf95d23f9..e284f04964bf 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -238,7 +238,7 @@ struct plat_stmmacenet_data { int (*set_clk_tx_rate)(void *priv, struct clk *clk_tx_i, phy_interface_t interface, int speed); void (*fix_mac_speed)(void *priv, int speed, unsigned int mode); - int (*fix_soc_reset)(void *priv, void __iomem *ioaddr); + int (*fix_soc_reset)(struct stmmac_priv *priv, void __iomem *ioaddr); int (*serdes_powerup)(struct net_device *ndev, void *priv); void (*serdes_powerdown)(struct net_device *ndev, void *priv); int (*mac_finish)(struct net_device *ndev, @@ -248,6 +248,8 @@ struct plat_stmmacenet_data { void (*ptp_clk_freq_config)(struct stmmac_priv *priv); int (*init)(struct platform_device *pdev, void *priv); void (*exit)(struct platform_device *pdev, void *priv); + int (*suspend)(struct device *dev, void *priv); + int (*resume)(struct device *dev, void *priv); struct mac_device_info *(*setup)(void *priv); int (*clks_config)(void *priv, bool enabled); int (*crosststamp)(ktime_t *device, struct system_counterval_t *system, diff --git a/include/linux/udp.h b/include/linux/udp.h index 4e1a672af4c5..981506be1e15 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -108,6 +108,7 @@ struct udp_sock { * the last UDP socket cacheline. */ struct hlist_node tunnel_list; + struct socket_drop_counters drop_counters; }; #define udp_test_bit(nr, sk) \ diff --git a/include/net/act_api.h b/include/net/act_api.h index 2894cfff2da3..91a24b5e0b93 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -33,7 +33,10 @@ struct tc_action { struct tcf_t tcfa_tm; struct gnet_stats_basic_sync tcfa_bstats; struct gnet_stats_basic_sync tcfa_bstats_hw; - struct gnet_stats_queue tcfa_qstats; + + atomic_t tcfa_drops; + atomic_t tcfa_overlimits; + struct net_rate_estimator __rcu *tcfa_rate_est; spinlock_t tcfa_lock; struct gnet_stats_basic_sync __percpu *cpu_bstats; @@ -53,7 +56,6 @@ struct tc_action { #define tcf_action common.tcfa_action #define tcf_tm common.tcfa_tm #define tcf_bstats common.tcfa_bstats -#define tcf_qstats common.tcfa_qstats #define tcf_rate_est common.tcfa_rate_est #define tcf_lock common.tcfa_lock @@ -241,9 +243,7 @@ static inline void tcf_action_inc_drop_qstats(struct tc_action *a) qstats_drop_inc(this_cpu_ptr(a->cpu_qstats)); return; } - spin_lock(&a->tcfa_lock); - qstats_drop_inc(&a->tcfa_qstats); - spin_unlock(&a->tcfa_lock); + atomic_inc(&a->tcfa_drops); } static inline void tcf_action_inc_overlimit_qstats(struct tc_action *a) @@ -252,9 +252,7 @@ static inline void tcf_action_inc_overlimit_qstats(struct tc_action *a) qstats_overlimit_inc(this_cpu_ptr(a->cpu_qstats)); return; } - spin_lock(&a->tcfa_lock); - qstats_overlimit_inc(&a->tcfa_qstats); - spin_unlock(&a->tcfa_lock); + atomic_inc(&a->tcfa_overlimits); } void tcf_action_update_stats(struct tc_action *a, u64 bytes, u64 packets, diff --git a/include/net/bonding.h b/include/net/bonding.h index e06f0d63b2c1..37335f62f579 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -126,7 +126,6 @@ struct bond_params { int arp_interval; int arp_validate; int arp_all_targets; - int use_carrier; int fail_over_mac; int updelay; int downdelay; diff --git a/include/net/devlink.h b/include/net/devlink.h index b32c9ceeb81d..5f44e702c25c 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -746,6 +746,10 @@ enum devlink_health_reporter_state { * if priv_ctx is NULL, run a full dump * @diagnose: callback to diagnose the current status * @test: callback to trigger a test event + * @default_graceful_period: default min time (in msec) + * between recovery attempts + * @default_burst_period: default time (in msec) for + * error recoveries before starting the grace period */ struct devlink_health_reporter_ops { @@ -760,6 +764,8 @@ struct devlink_health_reporter_ops { struct netlink_ext_ack *extack); int (*test)(struct devlink_health_reporter *reporter, struct netlink_ext_ack *extack); + u64 default_graceful_period; + u64 default_burst_period; }; /** @@ -1743,7 +1749,7 @@ void devlink_port_type_ib_set(struct devlink_port *devlink_port, struct ib_device *ibdev); void devlink_port_type_clear(struct devlink_port *devlink_port); void devlink_port_attrs_set(struct devlink_port *devlink_port, - struct devlink_port_attrs *devlink_port_attrs); + const struct devlink_port_attrs *attrs); void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u32 controller, u16 pf, bool external); void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 controller, @@ -1928,22 +1934,22 @@ void devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name, struct devlink_health_reporter * devl_port_health_reporter_create(struct devlink_port *port, const struct devlink_health_reporter_ops *ops, - u64 graceful_period, void *priv); + void *priv); struct devlink_health_reporter * devlink_port_health_reporter_create(struct devlink_port *port, const struct devlink_health_reporter_ops *ops, - u64 graceful_period, void *priv); + void *priv); struct devlink_health_reporter * devl_health_reporter_create(struct devlink *devlink, const struct devlink_health_reporter_ops *ops, - u64 graceful_period, void *priv); + void *priv); struct devlink_health_reporter * devlink_health_reporter_create(struct devlink *devlink, const struct devlink_health_reporter_ops *ops, - u64 graceful_period, void *priv); + void *priv); void devl_health_reporter_destroy(struct devlink_health_reporter *reporter); diff --git a/include/net/dst.h b/include/net/dst.h index bab01363bb97..f8aa1239b4db 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -24,7 +24,10 @@ struct sk_buff; struct dst_entry { - struct net_device *dev; + union { + struct net_device *dev; + struct net_device __rcu *dev_rcu; + }; struct dst_ops *ops; unsigned long _metrics; unsigned long expires; @@ -570,9 +573,12 @@ static inline struct net_device *dst_dev(const struct dst_entry *dst) static inline struct net_device *dst_dev_rcu(const struct dst_entry *dst) { - /* In the future, use rcu_dereference(dst->dev) */ - WARN_ON_ONCE(!rcu_read_lock_held()); - return READ_ONCE(dst->dev); + return rcu_dereference(dst->dev_rcu); +} + +static inline struct net *dst_dev_net_rcu(const struct dst_entry *dst) +{ + return dev_net_rcu(dst_dev_rcu(dst)); } static inline struct net_device *skb_dst_dev(const struct sk_buff *skb) @@ -592,7 +598,7 @@ static inline struct net *skb_dst_dev_net(const struct sk_buff *skb) static inline struct net *skb_dst_dev_net_rcu(const struct sk_buff *skb) { - return dev_net_rcu(skb_dst_dev(skb)); + return dev_net_rcu(skb_dst_dev_rcu(skb)); } struct dst_entry *dst_blackhole_check(struct dst_entry *dst, u32 cookie); diff --git a/include/net/flow.h b/include/net/flow.h index a1839c278d87..ae9481c40063 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -12,6 +12,7 @@ #include <linux/atomic.h> #include <linux/container_of.h> #include <linux/uidgid.h> +#include <net/inet_dscp.h> struct flow_keys; @@ -32,7 +33,7 @@ struct flowi_common { int flowic_iif; int flowic_l3mdev; __u32 flowic_mark; - __u8 flowic_tos; + dscp_t flowic_dscp; __u8 flowic_scope; __u8 flowic_proto; __u8 flowic_flags; @@ -70,7 +71,7 @@ struct flowi4 { #define flowi4_iif __fl_common.flowic_iif #define flowi4_l3mdev __fl_common.flowic_l3mdev #define flowi4_mark __fl_common.flowic_mark -#define flowi4_tos __fl_common.flowic_tos +#define flowi4_dscp __fl_common.flowic_dscp #define flowi4_scope __fl_common.flowic_scope #define flowi4_proto __fl_common.flowic_proto #define flowi4_flags __fl_common.flowic_flags @@ -103,7 +104,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, fl4->flowi4_iif = LOOPBACK_IFINDEX; fl4->flowi4_l3mdev = 0; fl4->flowi4_mark = mark; - fl4->flowi4_tos = tos; + fl4->flowi4_dscp = inet_dsfield_to_dscp(tos); fl4->flowi4_scope = scope; fl4->flowi4_proto = proto; fl4->flowi4_flags = flags; @@ -141,7 +142,7 @@ struct flowi6 { #define flowi6_uid __fl_common.flowic_uid struct in6_addr daddr; struct in6_addr saddr; - /* Note: flowi6_tos is encoded in flowlabel, too. */ + /* Note: flowi6_dscp is encoded in flowlabel, too. */ __be32 flowlabel; union flowi_uli uli; #define fl6_sport uli.ports.sport @@ -163,7 +164,7 @@ struct flowi { #define flowi_iif u.__fl_common.flowic_iif #define flowi_l3mdev u.__fl_common.flowic_l3mdev #define flowi_mark u.__fl_common.flowic_mark -#define flowi_tos u.__fl_common.flowic_tos +#define flowi_dscp u.__fl_common.flowic_dscp #define flowi_scope u.__fl_common.flowic_scope #define flowi_proto u.__fl_common.flowic_proto #define flowi_flags u.__fl_common.flowic_flags diff --git a/include/net/genetlink.h b/include/net/genetlink.h index a03d56765832..7b84f2cef8b1 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -62,7 +62,7 @@ struct genl_info; * @small_ops: the small-struct operations supported by this family * @n_small_ops: number of small-struct operations supported by this family * @split_ops: the split do/dump form of operation definition - * @n_split_ops: number of entries in @split_ops, not that with split do/dump + * @n_split_ops: number of entries in @split_ops, note that with split do/dump * ops the number of entries is not the same as number of commands * @sock_priv_size: the size of per-socket private memory * @sock_priv_init: the per-socket private memory initializer diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index ab3929a2a956..1f985d2012ce 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -41,7 +41,6 @@ static inline unsigned int __inet6_ehashfn(const u32 lhash, * The sockhash lock must be held as a reader here. */ struct sock *__inet6_lookup_established(const struct net *net, - struct inet_hashinfo *hashinfo, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, @@ -65,7 +64,6 @@ struct sock *inet6_lookup_reuseport(const struct net *net, struct sock *sk, inet6_ehashfn_t *ehashfn); struct sock *inet6_lookup_listener(const struct net *net, - struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const struct in6_addr *saddr, const __be16 sport, @@ -83,7 +81,6 @@ struct sock *inet6_lookup_run_sk_lookup(const struct net *net, inet6_ehashfn_t *ehashfn); static inline struct sock *__inet6_lookup(const struct net *net, - struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const struct in6_addr *saddr, const __be16 sport, @@ -92,14 +89,14 @@ static inline struct sock *__inet6_lookup(const struct net *net, const int dif, const int sdif, bool *refcounted) { - struct sock *sk = __inet6_lookup_established(net, hashinfo, saddr, - sport, daddr, hnum, + struct sock *sk = __inet6_lookup_established(net, saddr, sport, + daddr, hnum, dif, sdif); *refcounted = true; if (sk) return sk; *refcounted = false; - return inet6_lookup_listener(net, hashinfo, skb, doff, saddr, sport, + return inet6_lookup_listener(net, skb, doff, saddr, sport, daddr, hnum, dif, sdif); } @@ -143,8 +140,7 @@ struct sock *inet6_steal_sock(struct net *net, struct sk_buff *skb, int doff, return reuse_sk; } -static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo, - struct sk_buff *skb, int doff, +static inline struct sock *__inet6_lookup_skb(struct sk_buff *skb, int doff, const __be16 sport, const __be16 dport, int iif, int sdif, @@ -161,14 +157,12 @@ static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo, if (sk) return sk; - return __inet6_lookup(net, hashinfo, skb, - doff, &ip6h->saddr, sport, + return __inet6_lookup(net, skb, doff, &ip6h->saddr, sport, &ip6h->daddr, ntohs(dport), iif, sdif, refcounted); } -struct sock *inet6_lookup(const struct net *net, struct inet_hashinfo *hashinfo, - struct sk_buff *skb, int doff, +struct sock *inet6_lookup(const struct net *net, struct sk_buff *skb, int doff, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const __be16 dport, const int dif); diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 1735db332aab..0737d8e178dd 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -299,14 +299,8 @@ reqsk_timeout(struct request_sock *req, unsigned long max_timeout) return (unsigned long)min_t(u64, timeout, max_timeout); } -static inline void inet_csk_prepare_for_destroy_sock(struct sock *sk) -{ - /* The below has to be done to allow calling inet_csk_destroy_sock */ - sock_set_flag(sk, SOCK_DEAD); - this_cpu_inc(*sk->sk_prot->orphan_count); -} - void inet_csk_destroy_sock(struct sock *sk); +void inet_csk_prepare_for_destroy_sock(struct sock *sk); void inet_csk_prepare_forced_close(struct sock *sk); /* diff --git a/include/net/inet_dscp.h b/include/net/inet_dscp.h index 72f250dffada..1aa9f04ed1ab 100644 --- a/include/net/inet_dscp.h +++ b/include/net/inet_dscp.h @@ -39,6 +39,12 @@ typedef u8 __bitwise dscp_t; #define INET_DSCP_MASK 0xfc +/* A few places in the IPv4 code need to ignore the three high order bits of + * DSCP because of backward compatibility (as these bits used to represent the + * IPv4 Precedence in RFC 791's TOS field and were ignored). + */ +#define INET_DSCP_LEGACY_TOS_MASK ((__force dscp_t)0x1c) + static inline dscp_t inet_dsfield_to_dscp(__u8 dsfield) { return (__force dscp_t)(dsfield & INET_DSCP_MASK); diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 19dbd9081d5a..a3b32241c2f2 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -294,7 +294,6 @@ int inet_hash(struct sock *sk); void inet_unhash(struct sock *sk); struct sock *__inet_lookup_listener(const struct net *net, - struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const __be32 saddr, const __be16 sport, const __be32 daddr, @@ -302,12 +301,12 @@ struct sock *__inet_lookup_listener(const struct net *net, const int dif, const int sdif); static inline struct sock *inet_lookup_listener(struct net *net, - struct inet_hashinfo *hashinfo, - struct sk_buff *skb, int doff, - __be32 saddr, __be16 sport, - __be32 daddr, __be16 dport, int dif, int sdif) + struct sk_buff *skb, int doff, + __be32 saddr, __be16 sport, + __be32 daddr, __be16 dport, + int dif, int sdif) { - return __inet_lookup_listener(net, hashinfo, skb, doff, saddr, sport, + return __inet_lookup_listener(net, skb, doff, saddr, sport, daddr, ntohs(dport), dif, sdif); } @@ -358,7 +357,6 @@ static inline bool inet_match(const struct net *net, const struct sock *sk, * not check it for lookups anymore, thanks Alexey. -DaveM */ struct sock *__inet_lookup_established(const struct net *net, - struct inet_hashinfo *hashinfo, const __be32 saddr, const __be16 sport, const __be32 daddr, const u16 hnum, const int dif, const int sdif); @@ -384,18 +382,16 @@ struct sock *inet_lookup_run_sk_lookup(const struct net *net, __be32 daddr, u16 hnum, const int dif, inet_ehashfn_t *ehashfn); -static inline struct sock * - inet_lookup_established(struct net *net, struct inet_hashinfo *hashinfo, - const __be32 saddr, const __be16 sport, - const __be32 daddr, const __be16 dport, - const int dif) +static inline struct sock *inet_lookup_established(struct net *net, + const __be32 saddr, const __be16 sport, + const __be32 daddr, const __be16 dport, + const int dif) { - return __inet_lookup_established(net, hashinfo, saddr, sport, daddr, + return __inet_lookup_established(net, saddr, sport, daddr, ntohs(dport), dif, 0); } static inline struct sock *__inet_lookup(struct net *net, - struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const __be32 saddr, const __be16 sport, const __be32 daddr, const __be16 dport, @@ -405,18 +401,17 @@ static inline struct sock *__inet_lookup(struct net *net, u16 hnum = ntohs(dport); struct sock *sk; - sk = __inet_lookup_established(net, hashinfo, saddr, sport, + sk = __inet_lookup_established(net, saddr, sport, daddr, hnum, dif, sdif); *refcounted = true; if (sk) return sk; *refcounted = false; - return __inet_lookup_listener(net, hashinfo, skb, doff, saddr, + return __inet_lookup_listener(net, skb, doff, saddr, sport, daddr, hnum, dif, sdif); } static inline struct sock *inet_lookup(struct net *net, - struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const __be32 saddr, const __be16 sport, const __be32 daddr, const __be16 dport, @@ -425,7 +420,7 @@ static inline struct sock *inet_lookup(struct net *net, struct sock *sk; bool refcounted; - sk = __inet_lookup(net, hashinfo, skb, doff, saddr, sport, daddr, + sk = __inet_lookup(net, skb, doff, saddr, sport, daddr, dport, dif, 0, &refcounted); if (sk && !refcounted && !refcount_inc_not_zero(&sk->sk_refcnt)) @@ -473,8 +468,7 @@ struct sock *inet_steal_sock(struct net *net, struct sk_buff *skb, int doff, return reuse_sk; } -static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo, - struct sk_buff *skb, +static inline struct sock *__inet_lookup_skb(struct sk_buff *skb, int doff, const __be16 sport, const __be16 dport, @@ -492,8 +486,7 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo, if (sk) return sk; - return __inet_lookup(net, hashinfo, skb, - doff, iph->saddr, sport, + return __inet_lookup(net, skb, doff, iph->saddr, sport, iph->daddr, dport, inet_iif(skb), sdif, refcounted); } diff --git a/include/net/ip.h b/include/net/ip.h index befcba575129..6dbd2bf8fa9c 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -467,12 +467,14 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, bool forwarding) { const struct rtable *rt = dst_rtable(dst); + const struct net_device *dev; unsigned int mtu, res; struct net *net; rcu_read_lock(); - net = dev_net_rcu(dst_dev(dst)); + dev = dst_dev_rcu(dst); + net = dev_net_rcu(dev); if (READ_ONCE(net->ipv4.sysctl_ip_fwd_use_pmtu) || ip_mtu_locked(dst) || !forwarding) { @@ -486,7 +488,7 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, if (mtu) goto out; - mtu = READ_ONCE(dst_dev(dst)->mtu); + mtu = READ_ONCE(dev->mtu); if (unlikely(ip_mtu_locked(dst))) { if (rt->rt_uses_gateway && mtu > 576) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 9255f21818ee..59f48ca3abdf 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -337,7 +337,7 @@ static inline unsigned int ip6_dst_mtu_maybe_forward(const struct dst_entry *dst mtu = IPV6_MIN_MTU; rcu_read_lock(); - idev = __in6_dev_get(dst_dev(dst)); + idev = __in6_dev_get(dst_dev_rcu(dst)); if (idev) mtu = READ_ONCE(idev->cnf.mtu6); rcu_read_unlock(); diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 48bb3cf41469..b4495c38e0a0 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -440,7 +440,7 @@ static inline bool fib4_rules_early_flow_dissect(struct net *net, static inline bool fib_dscp_masked_match(dscp_t dscp, const struct flowi4 *fl4) { - return dscp == inet_dsfield_to_dscp(RT_TOS(fl4->flowi4_tos)); + return dscp == (fl4->flowi4_dscp & INET_DSCP_LEGACY_TOS_MASK); } /* Exported by fib_frontend.c */ diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 8cf1380f3656..4314a97702ea 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -11,7 +11,9 @@ #include <linux/bitops.h> #include <net/dsfield.h> +#include <net/flow.h> #include <net/gro_cells.h> +#include <net/inet_dscp.h> #include <net/inet_ecn.h> #include <net/netns/generic.h> #include <net/rtnetlink.h> @@ -362,7 +364,7 @@ static inline void ip_tunnel_init_flow(struct flowi4 *fl4, fl4->daddr = daddr; fl4->saddr = saddr; - fl4->flowi4_tos = tos; + fl4->flowi4_dscp = inet_dsfield_to_dscp(tos); fl4->flowi4_proto = proto; fl4->fl4_gre_key = key; fl4->flowi4_mark = mark; diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index e1030a7d2daa..0921485565c0 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -65,6 +65,8 @@ enum TRI_STATE { #define MANA_STATS_RX_COUNT 5 #define MANA_STATS_TX_COUNT 11 +#define MANA_RX_FRAG_ALIGNMENT 64 + struct mana_stats_rx { u64 packets; u64 bytes; @@ -328,6 +330,7 @@ struct mana_rxq { u32 datasize; u32 alloc_size; u32 headroom; + u32 frag_count; mana_handle_t rxobj; @@ -510,6 +513,7 @@ struct mana_port_context { u32 rxbpre_datasize; u32 rxbpre_alloc_size; u32 rxbpre_headroom; + u32 rxbpre_frag_count; struct bpf_prog *bpf_prog; diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h index 6e835972abd1..cd00e0406cf4 100644 --- a/include/net/netdev_queues.h +++ b/include/net/netdev_queues.h @@ -127,6 +127,9 @@ void netdev_stat_queue_sum(struct net_device *netdev, * @ndo_queue_stop: Stop the RX queue at the specified index. The stopped * queue's memory is written at the specified address. * + * @ndo_queue_get_dma_dev: Get dma device for zero-copy operations to be used + * for this queue. Return NULL on error. + * * Note that @ndo_queue_mem_alloc and @ndo_queue_mem_free may be called while * the interface is closed. @ndo_queue_start and @ndo_queue_stop will only * be called for an interface which is open. @@ -144,8 +147,12 @@ struct netdev_queue_mgmt_ops { int (*ndo_queue_stop)(struct net_device *dev, void *per_queue_mem, int idx); + struct device * (*ndo_queue_get_dma_dev)(struct net_device *dev, + int idx); }; +bool netif_rxq_has_unreadable_mp(struct net_device *dev, int idx); + /** * DOC: Lockless queue stopping / waking helpers. * @@ -321,4 +328,6 @@ static inline void netif_subqueue_sent(const struct net_device *dev, get_desc, start_thrs); \ }) +struct device *netdev_queue_get_dma_dev(struct net_device *dev, int idx); + #endif diff --git a/include/net/netfilter/ipv4/nf_reject.h b/include/net/netfilter/ipv4/nf_reject.h index c653fcb88354..09de2f2686b5 100644 --- a/include/net/netfilter/ipv4/nf_reject.h +++ b/include/net/netfilter/ipv4/nf_reject.h @@ -10,14 +10,6 @@ void nf_send_unreach(struct sk_buff *skb_in, int code, int hook); void nf_send_reset(struct net *net, struct sock *, struct sk_buff *oldskb, int hook); -const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb, - struct tcphdr *_oth, int hook); -struct iphdr *nf_reject_iphdr_put(struct sk_buff *nskb, - const struct sk_buff *oldskb, - __u8 protocol, int ttl); -void nf_reject_ip_tcphdr_put(struct sk_buff *nskb, const struct sk_buff *oldskb, - const struct tcphdr *oth); - struct sk_buff *nf_reject_skb_v4_unreach(struct net *net, struct sk_buff *oldskb, const struct net_device *dev, diff --git a/include/net/netfilter/ipv6/nf_reject.h b/include/net/netfilter/ipv6/nf_reject.h index d729344ba644..94ec0b9f2838 100644 --- a/include/net/netfilter/ipv6/nf_reject.h +++ b/include/net/netfilter/ipv6/nf_reject.h @@ -9,16 +9,6 @@ void nf_send_unreach6(struct net *net, struct sk_buff *skb_in, unsigned char cod unsigned int hooknum); void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb, int hook); -const struct tcphdr *nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb, - struct tcphdr *otcph, - unsigned int *otcplen, int hook); -struct ipv6hdr *nf_reject_ip6hdr_put(struct sk_buff *nskb, - const struct sk_buff *oldskb, - __u8 protocol, int hoplimit); -void nf_reject_ip6_tcphdr_put(struct sk_buff *nskb, - const struct sk_buff *oldskb, - const struct tcphdr *oth, unsigned int otcplen); - struct sk_buff *nf_reject_skb_v6_tcp_reset(struct net *net, struct sk_buff *oldskb, const struct net_device *dev, diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 891e43a01bdc..e2128663b160 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -556,6 +556,7 @@ struct nft_set_elem_expr { * @size: maximum set size * @field_len: length of each field in concatenation, bytes * @field_count: number of concatenated fields in element + * @in_update_walk: true during ->walk() in transaction phase * @use: number of rules references to this set * @nelems: number of elements * @ndeact: number of deactivated elements queued for removal @@ -590,6 +591,7 @@ struct nft_set { u32 size; u8 field_len[NFT_REG32_COUNT]; u8 field_count; + bool in_update_walk; u32 use; atomic_t nelems; u32 ndeact; diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 6c2f483d9828..7644cfe9267d 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -73,7 +73,7 @@ struct nft_ct { struct nft_payload { enum nft_payload_bases base:8; - u8 offset; + u16 offset; u8 len; u8 dreg; }; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 6373e3f17da8..54a7d187f62a 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -251,6 +251,7 @@ struct netns_ipv4 { int sysctl_igmp_qrv; struct ping_group_range ping_group_range; + u16 ping_port_rover; atomic_t dev_addr_genid; diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h index d25cd7a9c5ff..c0f97f36389e 100644 --- a/include/net/netns/sctp.h +++ b/include/net/netns/sctp.h @@ -75,8 +75,8 @@ struct netns_sctp { /* Whether Cookie Preservative is enabled(1) or not(0) */ int cookie_preserve_enable; - /* The namespace default hmac alg */ - char *sctp_hmac_alg; + /* Whether cookie authentication is enabled(1) or not(0) */ + int cookie_auth_enable; /* Valid.Cookie.Life - 60 seconds */ unsigned int valid_cookie_life; diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h index e180bdf2f82b..664d5058e66e 100644 --- a/include/net/nfc/nci_core.h +++ b/include/net/nfc/nci_core.h @@ -52,7 +52,7 @@ enum nci_state { #define NCI_RF_DISC_SELECT_TIMEOUT 5000 #define NCI_RF_DEACTIVATE_TIMEOUT 30000 #define NCI_CMD_TIMEOUT 5000 -#define NCI_DATA_TIMEOUT 700 +#define NCI_DATA_TIMEOUT 3000 struct nci_dev; diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h index db180626be06..3247026e096a 100644 --- a/include/net/page_pool/helpers.h +++ b/include/net/page_pool/helpers.h @@ -489,6 +489,11 @@ page_pool_dma_sync_netmem_for_cpu(const struct page_pool *pool, offset, dma_sync_size); } +static inline void page_pool_get(struct page_pool *pool) +{ + refcount_inc(&pool->user_cnt); +} + static inline bool page_pool_put(struct page_pool *pool) { return refcount_dec_and_test(&pool->user_cnt); @@ -500,6 +505,18 @@ static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid) page_pool_update_nid(pool, new_nid); } +/** + * page_pool_is_unreadable() - will allocated buffers be unreadable for the CPU + * @pool: queried page pool + * + * Check if page pool will return buffers which are unreadable to the CPU / + * kernel. This will only be the case if user space bound a memory provider (mp) + * which returns unreadable memory to the queue served by the page pool. + * If %PP_FLAG_ALLOW_UNREADABLE_NETMEM was set but there is no mp bound + * this helper will return false. See also netif_rxq_has_unreadable_mp(). + * + * Return: true if memory allocated by the page pool may be unreadable + */ static inline bool page_pool_is_unreadable(struct page_pool *pool) { return !!pool->mp_ops; diff --git a/include/net/ping.h b/include/net/ping.h index bc7779262e60..9634b8800814 100644 --- a/include/net/ping.h +++ b/include/net/ping.h @@ -54,7 +54,6 @@ struct pingfakehdr { }; int ping_get_port(struct sock *sk, unsigned short ident); -int ping_hash(struct sock *sk); void ping_unhash(struct sock *sk); int ping_init_sock(struct sock *sk); diff --git a/include/net/proto_memory.h b/include/net/proto_memory.h index a6ab2f4f5e28..8e91a8fa31b5 100644 --- a/include/net/proto_memory.h +++ b/include/net/proto_memory.h @@ -31,8 +31,8 @@ static inline bool sk_under_memory_pressure(const struct sock *sk) if (!sk->sk_prot->memory_pressure) return false; - if (mem_cgroup_sockets_enabled && sk->sk_memcg && - mem_cgroup_under_socket_pressure(sk->sk_memcg)) + if (mem_cgroup_sk_enabled(sk) && + mem_cgroup_sk_under_memory_pressure(sk)) return true; return !!READ_ONCE(*sk->sk_prot->memory_pressure); diff --git a/include/net/raw.h b/include/net/raw.h index 32a61481a253..d52709139060 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -81,6 +81,7 @@ struct raw_sock { struct inet_sock inet; struct icmp_filter filter; u32 ipmr_table; + struct socket_drop_counters drop_counters; }; #define raw_sk(ptr) container_of_const(ptr, struct raw_sock, inet.sk) diff --git a/include/net/route.h b/include/net/route.h index 7ea840daa775..f90106f383c5 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -189,7 +189,7 @@ static inline struct rtable *ip_route_output(struct net *net, __be32 daddr, { struct flowi4 fl4 = { .flowi4_oif = oif, - .flowi4_tos = inet_dscp_to_dsfield(dscp), + .flowi4_dscp = dscp, .flowi4_scope = scope, .daddr = daddr, .saddr = saddr, @@ -390,7 +390,7 @@ static inline int ip4_dst_hoplimit(const struct dst_entry *dst) const struct net *net; rcu_read_lock(); - net = dev_net_rcu(dst_dev(dst)); + net = dst_dev_net_rcu(dst); hoplimit = READ_ONCE(net->ipv4.sysctl_ip_default_ttl); rcu_read_unlock(); } diff --git a/include/net/rps.h b/include/net/rps.h index d8ab3a08bcc4..f1794cd2e7fb 100644 --- a/include/net/rps.h +++ b/include/net/rps.h @@ -25,13 +25,16 @@ struct rps_map { /* * The rps_dev_flow structure contains the mapping of a flow to a CPU, the - * tail pointer for that CPU's input queue at the time of last enqueue, and - * a hardware filter index. + * tail pointer for that CPU's input queue at the time of last enqueue, a + * hardware filter index, and the hash of the flow if aRFS is enabled. */ struct rps_dev_flow { u16 cpu; u16 filter; unsigned int last_qtail; +#ifdef CONFIG_RFS_ACCEL + u32 hash; +#endif }; #define RPS_NO_FILTER 0xffff @@ -82,11 +85,8 @@ static inline void rps_record_sock_flow(struct rps_sock_flow_table *table, WRITE_ONCE(table->ents[index], val); } -#endif /* CONFIG_RPS */ - -static inline void sock_rps_record_flow_hash(__u32 hash) +static inline void _sock_rps_record_flow_hash(__u32 hash) { -#ifdef CONFIG_RPS struct rps_sock_flow_table *sock_flow_table; if (!hash) @@ -96,42 +96,33 @@ static inline void sock_rps_record_flow_hash(__u32 hash) if (sock_flow_table) rps_record_sock_flow(sock_flow_table, hash); rcu_read_unlock(); -#endif } -static inline void sock_rps_record_flow(const struct sock *sk) +static inline void _sock_rps_record_flow(const struct sock *sk) { -#ifdef CONFIG_RPS - if (static_branch_unlikely(&rfs_needed)) { - /* Reading sk->sk_rxhash might incur an expensive cache line - * miss. - * - * TCP_ESTABLISHED does cover almost all states where RFS - * might be useful, and is cheaper [1] than testing : - * IPv4: inet_sk(sk)->inet_daddr - * IPv6: ipv6_addr_any(&sk->sk_v6_daddr) - * OR an additional socket flag - * [1] : sk_state and sk_prot are in the same cache line. + /* Reading sk->sk_rxhash might incur an expensive cache line + * miss. + * + * TCP_ESTABLISHED does cover almost all states where RFS + * might be useful, and is cheaper [1] than testing : + * IPv4: inet_sk(sk)->inet_daddr + * IPv6: ipv6_addr_any(&sk->sk_v6_daddr) + * OR an additional socket flag + * [1] : sk_state and sk_prot are in the same cache line. + */ + if (sk->sk_state == TCP_ESTABLISHED) { + /* This READ_ONCE() is paired with the WRITE_ONCE() + * from sock_rps_save_rxhash() and sock_rps_reset_rxhash(). */ - if (sk->sk_state == TCP_ESTABLISHED) { - /* This READ_ONCE() is paired with the WRITE_ONCE() - * from sock_rps_save_rxhash() and sock_rps_reset_rxhash(). - */ - sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash)); - } + _sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash)); } -#endif } -static inline void sock_rps_delete_flow(const struct sock *sk) +static inline void _sock_rps_delete_flow(const struct sock *sk) { -#ifdef CONFIG_RPS struct rps_sock_flow_table *table; u32 hash, index; - if (!static_branch_unlikely(&rfs_needed)) - return; - hash = READ_ONCE(sk->sk_rxhash); if (!hash) return; @@ -144,6 +135,45 @@ static inline void sock_rps_delete_flow(const struct sock *sk) WRITE_ONCE(table->ents[index], RPS_NO_CPU); } rcu_read_unlock(); +} +#endif /* CONFIG_RPS */ + +static inline bool rfs_is_needed(void) +{ +#ifdef CONFIG_RPS + return static_branch_unlikely(&rfs_needed); +#else + return false; +#endif +} + +static inline void sock_rps_record_flow_hash(__u32 hash) +{ +#ifdef CONFIG_RPS + if (!rfs_is_needed()) + return; + + _sock_rps_record_flow_hash(hash); +#endif +} + +static inline void sock_rps_record_flow(const struct sock *sk) +{ +#ifdef CONFIG_RPS + if (!rfs_is_needed()) + return; + + _sock_rps_record_flow(sk); +#endif +} + +static inline void sock_rps_delete_flow(const struct sock *sk) +{ +#ifdef CONFIG_RPS + if (!rfs_is_needed()) + return; + + _sock_rps_delete_flow(sk); #endif } diff --git a/include/net/sctp/auth.h b/include/net/sctp/auth.h index d4b3b2dcd15b..3d5879e08e78 100644 --- a/include/net/sctp/auth.h +++ b/include/net/sctp/auth.h @@ -22,16 +22,11 @@ struct sctp_endpoint; struct sctp_association; struct sctp_authkey; struct sctp_hmacalgo; -struct crypto_shash; -/* - * Define a generic struct that will hold all the info - * necessary for an HMAC transform - */ +/* Defines an HMAC algorithm supported by SCTP chunk authentication */ struct sctp_hmac { - __u16 hmac_id; /* one of the above ids */ - char *hmac_name; /* name for loading */ - __u16 hmac_len; /* length of the signature */ + __u16 hmac_id; /* one of SCTP_AUTH_HMAC_ID_* */ + __u16 hmac_len; /* length of the HMAC value in bytes */ }; /* This is generic structure that containst authentication bytes used @@ -78,9 +73,9 @@ int sctp_auth_asoc_copy_shkeys(const struct sctp_endpoint *ep, struct sctp_association *asoc, gfp_t gfp); int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp); -void sctp_auth_destroy_hmacs(struct crypto_shash *auth_hmacs[]); -struct sctp_hmac *sctp_auth_get_hmac(__u16 hmac_id); -struct sctp_hmac *sctp_auth_asoc_get_hmac(const struct sctp_association *asoc); +const struct sctp_hmac *sctp_auth_get_hmac(__u16 hmac_id); +const struct sctp_hmac * +sctp_auth_asoc_get_hmac(const struct sctp_association *asoc); void sctp_auth_asoc_set_default_hmac(struct sctp_association *asoc, struct sctp_hmac_algo_param *hmacs); int sctp_auth_asoc_verify_hmac_id(const struct sctp_association *asoc, diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 5859e0a16a58..ae3376ba0b99 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -296,9 +296,8 @@ enum { SCTP_MAX_GABS = 16 }; */ #define SCTP_DEFAULT_MINSEGMENT 512 /* MTU size ... if no mtu disc */ -#define SCTP_SECRET_SIZE 32 /* Number of octets in a 256 bits. */ - -#define SCTP_SIGNATURE_SIZE 20 /* size of a SLA-1 signature */ +#define SCTP_COOKIE_KEY_SIZE 32 /* size of cookie HMAC key */ +#define SCTP_COOKIE_MAC_SIZE 32 /* size of HMAC field in cookies */ #define SCTP_COOKIE_MULTIPLE 32 /* Pad out our cookie to make our hash * functions simpler to write. @@ -417,16 +416,12 @@ enum { SCTP_AUTH_HMAC_ID_RESERVED_0, SCTP_AUTH_HMAC_ID_SHA1, SCTP_AUTH_HMAC_ID_RESERVED_2, -#if defined (CONFIG_CRYPTO_SHA256) || defined (CONFIG_CRYPTO_SHA256_MODULE) SCTP_AUTH_HMAC_ID_SHA256, -#endif __SCTP_AUTH_HMAC_MAX }; #define SCTP_AUTH_HMAC_ID_MAX __SCTP_AUTH_HMAC_MAX - 1 #define SCTP_AUTH_NUM_HMACS __SCTP_AUTH_HMAC_MAX -#define SCTP_SHA1_SIG_SIZE 20 -#define SCTP_SHA256_SIG_SIZE 32 /* SCTP-AUTH, Section 3.2 * The chunk types for INIT, INIT-ACK, SHUTDOWN-COMPLETE and AUTH chunks diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 8a540ad9b509..2ae390219efd 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -32,6 +32,7 @@ #ifndef __sctp_structs_h__ #define __sctp_structs_h__ +#include <crypto/sha2.h> #include <linux/ktime.h> #include <linux/generic-radix-tree.h> #include <linux/rhashtable-types.h> @@ -68,7 +69,6 @@ struct sctp_outq; struct sctp_bind_addr; struct sctp_ulpq; struct sctp_ep_common; -struct crypto_shash; struct sctp_stream; @@ -155,10 +155,6 @@ struct sctp_sock { /* PF_ family specific functions. */ struct sctp_pf *pf; - /* Access to HMAC transform. */ - struct crypto_shash *hmac; - char *sctp_hmac_alg; - /* What is our base endpointer? */ struct sctp_endpoint *ep; @@ -227,7 +223,8 @@ struct sctp_sock { frag_interleave:1, recvrcvinfo:1, recvnxtinfo:1, - data_ready_signalled:1; + data_ready_signalled:1, + cookie_auth_enable:1; atomic_t pd_mode; @@ -335,7 +332,7 @@ struct sctp_cookie { /* The format of our cookie that we send to our peer. */ struct sctp_signed_cookie { - __u8 signature[SCTP_SECRET_SIZE]; + __u8 mac[SCTP_COOKIE_MAC_SIZE]; __u32 __pad; /* force sctp_cookie alignment to 64 bits */ struct sctp_cookie c; } __packed; @@ -1307,33 +1304,15 @@ struct sctp_endpoint { /* This is really a list of struct sctp_association entries. */ struct list_head asocs; - /* Secret Key: A secret key used by this endpoint to compute - * the MAC. This SHOULD be a cryptographic quality - * random number with a sufficient length. - * Discussion in [RFC1750] can be helpful in - * selection of the key. - */ - __u8 secret_key[SCTP_SECRET_SIZE]; - - /* digest: This is a digest of the sctp cookie. This field is - * only used on the receive path when we try to validate - * that the cookie has not been tampered with. We put - * this here so we pre-allocate this once and can re-use - * on every receive. - */ - __u8 *digest; - + /* Cookie authentication key used by this endpoint */ + struct hmac_sha256_key cookie_auth_key; + /* sendbuf acct. policy. */ __u32 sndbuf_policy; /* rcvbuf acct. policy. */ __u32 rcvbuf_policy; - /* SCTP AUTH: array of the HMACs that will be allocated - * we need this per association so that we don't serialize - */ - struct crypto_shash **auth_hmacs; - /* SCTP-AUTH: hmacs for the endpoint encoded into parameter */ struct sctp_hmac_algo_param *auth_hmacs_list; diff --git a/include/net/seg6_hmac.h b/include/net/seg6_hmac.h index 24f733b3e3fe..e9f41725933e 100644 --- a/include/net/seg6_hmac.h +++ b/include/net/seg6_hmac.h @@ -9,6 +9,8 @@ #ifndef _NET_SEG6_HMAC_H #define _NET_SEG6_HMAC_H +#include <crypto/sha1.h> +#include <crypto/sha2.h> #include <net/flow.h> #include <net/ip6_fib.h> #include <net/sock.h> @@ -19,7 +21,6 @@ #include <linux/seg6_hmac.h> #include <linux/rhashtable-types.h> -#define SEG6_HMAC_MAX_DIGESTSIZE 160 #define SEG6_HMAC_RING_SIZE 256 struct seg6_hmac_info { @@ -27,16 +28,15 @@ struct seg6_hmac_info { struct rcu_head rcu; u32 hmackeyid; + /* The raw key, kept only so it can be returned back to userspace */ char secret[SEG6_HMAC_SECRET_LEN]; u8 slen; u8 alg_id; -}; - -struct seg6_hmac_algo { - u8 alg_id; - char name[64]; - struct crypto_shash * __percpu *tfms; - struct shash_desc * __percpu *shashs; + /* The prepared key, which the calculations actually use */ + union { + struct hmac_sha1_key sha1; + struct hmac_sha256_key sha256; + } key; }; extern int seg6_hmac_compute(struct seg6_hmac_info *hinfo, @@ -50,13 +50,9 @@ extern int seg6_push_hmac(struct net *net, struct in6_addr *saddr, struct ipv6_sr_hdr *srh); extern bool seg6_hmac_validate_skb(struct sk_buff *skb); #ifdef CONFIG_IPV6_SEG6_HMAC -extern int seg6_hmac_init(void); -extern void seg6_hmac_exit(void); extern int seg6_hmac_net_init(struct net *net); extern void seg6_hmac_net_exit(struct net *net); #else -static inline int seg6_hmac_init(void) { return 0; } -static inline void seg6_hmac_exit(void) {} static inline int seg6_hmac_net_init(struct net *net) { return 0; } static inline void seg6_hmac_net_exit(struct net *net) {} #endif diff --git a/include/net/sock.h b/include/net/sock.h index fb13322a11fc..896bec2d2176 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -102,6 +102,11 @@ struct net; typedef __u32 __bitwise __portpair; typedef __u64 __bitwise __addrpair; +struct socket_drop_counters { + atomic_t drops0 ____cacheline_aligned_in_smp; + atomic_t drops1 ____cacheline_aligned_in_smp; +}; + /** * struct sock_common - minimal network layer representation of sockets * @skc_daddr: Foreign IPv4 addr @@ -282,6 +287,7 @@ struct sk_filter; * @sk_err_soft: errors that don't cause failure but are the cause of a * persistent failure not just 'timed out' * @sk_drops: raw/udp drops counter + * @sk_drop_counters: optional pointer to socket_drop_counters * @sk_ack_backlog: current listen backlog * @sk_max_ack_backlog: listen backlog set in listen() * @sk_uid: user id of owner @@ -444,10 +450,13 @@ struct sock { __cacheline_group_begin(sock_read_rxtx); int sk_err; struct socket *sk_socket; +#ifdef CONFIG_MEMCG struct mem_cgroup *sk_memcg; +#endif #ifdef CONFIG_XFRM struct xfrm_policy __rcu *sk_policy[2]; #endif + struct socket_drop_counters *sk_drop_counters; __cacheline_group_end(sock_read_rxtx); __cacheline_group_begin(sock_write_rxtx); @@ -1346,8 +1355,6 @@ struct proto { unsigned int useroffset; /* Usercopy region offset */ unsigned int usersize; /* Usercopy region size */ - unsigned int __percpu *orphan_count; - struct request_sock_ops *rsk_prot; struct timewait_sock_ops *twsk_prot; @@ -2603,6 +2610,50 @@ static inline gfp_t gfp_memcg_charge(void) return in_softirq() ? GFP_ATOMIC : GFP_KERNEL; } +#ifdef CONFIG_MEMCG +static inline struct mem_cgroup *mem_cgroup_from_sk(const struct sock *sk) +{ + return sk->sk_memcg; +} + +static inline bool mem_cgroup_sk_enabled(const struct sock *sk) +{ + return mem_cgroup_sockets_enabled && mem_cgroup_from_sk(sk); +} + +static inline bool mem_cgroup_sk_under_memory_pressure(const struct sock *sk) +{ + struct mem_cgroup *memcg = mem_cgroup_from_sk(sk); + +#ifdef CONFIG_MEMCG_V1 + if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) + return !!memcg->tcpmem_pressure; +#endif /* CONFIG_MEMCG_V1 */ + + do { + if (time_before64(get_jiffies_64(), mem_cgroup_get_socket_pressure(memcg))) + return true; + } while ((memcg = parent_mem_cgroup(memcg))); + + return false; +} +#else +static inline struct mem_cgroup *mem_cgroup_from_sk(const struct sock *sk) +{ + return NULL; +} + +static inline bool mem_cgroup_sk_enabled(const struct sock *sk) +{ + return false; +} + +static inline bool mem_cgroup_sk_under_memory_pressure(const struct sock *sk) +{ + return false; +} +#endif + static inline long sock_rcvtimeo(const struct sock *sk, bool noblock) { return noblock ? 0 : READ_ONCE(sk->sk_rcvtimeo); @@ -2645,18 +2696,61 @@ struct sock_skb_cb { #define sock_skb_cb_check_size(size) \ BUILD_BUG_ON((size) > SOCK_SKB_CB_OFFSET) +static inline void sk_drops_add(struct sock *sk, int segs) +{ + struct socket_drop_counters *sdc = sk->sk_drop_counters; + + if (sdc) { + int n = numa_node_id() % 2; + + if (n) + atomic_add(segs, &sdc->drops1); + else + atomic_add(segs, &sdc->drops0); + } else { + atomic_add(segs, &sk->sk_drops); + } +} + +static inline void sk_drops_inc(struct sock *sk) +{ + sk_drops_add(sk, 1); +} + +static inline int sk_drops_read(const struct sock *sk) +{ + const struct socket_drop_counters *sdc = sk->sk_drop_counters; + + if (sdc) { + DEBUG_NET_WARN_ON_ONCE(atomic_read(&sk->sk_drops)); + return atomic_read(&sdc->drops0) + atomic_read(&sdc->drops1); + } + return atomic_read(&sk->sk_drops); +} + +static inline void sk_drops_reset(struct sock *sk) +{ + struct socket_drop_counters *sdc = sk->sk_drop_counters; + + if (sdc) { + atomic_set(&sdc->drops0, 0); + atomic_set(&sdc->drops1, 0); + } + atomic_set(&sk->sk_drops, 0); +} + static inline void sock_skb_set_dropcount(const struct sock *sk, struct sk_buff *skb) { SOCK_SKB_CB(skb)->dropcount = sock_flag(sk, SOCK_RXQ_OVFL) ? - atomic_read(&sk->sk_drops) : 0; + sk_drops_read(sk) : 0; } -static inline void sk_drops_add(struct sock *sk, const struct sk_buff *skb) +static inline void sk_drops_skbadd(struct sock *sk, const struct sk_buff *skb) { int segs = max_t(u16, 1, skb_shinfo(skb)->gso_segs); - atomic_add(segs, &sk->sk_drops); + sk_drops_add(sk, segs); } static inline ktime_t sock_read_timestamp(struct sock *sk) @@ -2933,8 +3027,8 @@ void sk_get_meminfo(const struct sock *sk, u32 *meminfo); */ #define _SK_MEM_PACKETS 256 #define _SK_MEM_OVERHEAD SKB_TRUESIZE(256) -#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) -#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) +#define SK_WMEM_DEFAULT (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) +#define SK_RMEM_DEFAULT (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) extern __u32 sysctl_wmem_max; extern __u32 sysctl_rmem_max; diff --git a/include/net/tc_act/tc_skbmod.h b/include/net/tc_act/tc_skbmod.h index 7c240d2fed4e..626704cd6241 100644 --- a/include/net/tc_act/tc_skbmod.h +++ b/include/net/tc_act/tc_skbmod.h @@ -12,6 +12,7 @@ struct tcf_skbmod_params { struct rcu_head rcu; u64 flags; /*up to 64 types of operations; extend if needed */ + int action; u8 eth_dst[ETH_ALEN]; u16 eth_type; u8 eth_src[ETH_ALEN]; diff --git a/include/net/tc_act/tc_tunnel_key.h b/include/net/tc_act/tc_tunnel_key.h index 879fe8cff581..0f1925f97520 100644 --- a/include/net/tc_act/tc_tunnel_key.h +++ b/include/net/tc_act/tc_tunnel_key.h @@ -14,6 +14,7 @@ struct tcf_tunnel_key_params { struct rcu_head rcu; int tcft_action; + int action; struct metadata_dst *tcft_enc_metadata; }; diff --git a/include/net/tc_act/tc_vlan.h b/include/net/tc_act/tc_vlan.h index 3f5e9242b5e8..beadee41669a 100644 --- a/include/net/tc_act/tc_vlan.h +++ b/include/net/tc_act/tc_vlan.h @@ -10,6 +10,7 @@ #include <linux/tc_act/tc_vlan.h> struct tcf_vlan_params { + int action; int tcfv_action; unsigned char tcfv_push_dst[ETH_ALEN]; unsigned char tcfv_push_src[ETH_ALEN]; diff --git a/include/net/tcp.h b/include/net/tcp.h index 526a26e7a150..0fb7923b8367 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -54,6 +54,16 @@ extern struct inet_hashinfo tcp_hashinfo; DECLARE_PER_CPU(unsigned int, tcp_orphan_count); int tcp_orphan_count_sum(void); +static inline void tcp_orphan_count_inc(void) +{ + this_cpu_inc(tcp_orphan_count); +} + +static inline void tcp_orphan_count_dec(void) +{ + this_cpu_dec(tcp_orphan_count); +} + DECLARE_PER_CPU(u32, tcp_tw_isn); void tcp_time_wait(struct sock *sk, int state, int timeo); @@ -275,8 +285,8 @@ extern unsigned long tcp_memory_pressure; /* optimized version of sk_under_memory_pressure() for TCP sockets */ static inline bool tcp_under_memory_pressure(const struct sock *sk) { - if (mem_cgroup_sockets_enabled && sk->sk_memcg && - mem_cgroup_under_socket_pressure(sk->sk_memcg)) + if (mem_cgroup_sk_enabled(sk) && + mem_cgroup_sk_under_memory_pressure(sk)) return true; return READ_ONCE(tcp_memory_pressure); @@ -2612,7 +2622,7 @@ static inline void tcp_segs_in(struct tcp_sock *tp, const struct sk_buff *skb) */ static inline void tcp_listendrop(const struct sock *sk) { - atomic_inc(&((struct sock *)sk)->sk_drops); + sk_drops_inc((struct sock *)sk); __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS); } diff --git a/include/net/timewait_sock.h b/include/net/timewait_sock.h index 62b3e9f2aed4..0a85ac64a66d 100644 --- a/include/net/timewait_sock.h +++ b/include/net/timewait_sock.h @@ -15,13 +15,6 @@ struct timewait_sock_ops { struct kmem_cache *twsk_slab; char *twsk_slab_name; unsigned int twsk_obj_size; - void (*twsk_destructor)(struct sock *sk); }; -static inline void twsk_destructor(struct sock *sk) -{ - if (sk->sk_prot->twsk_prot->twsk_destructor != NULL) - sk->sk_prot->twsk_prot->twsk_destructor(sk); -} - #endif /* _TIMEWAIT_SOCK_H */ diff --git a/include/net/udp.h b/include/net/udp.h index e2af3bda90c9..93b159f30e88 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -288,6 +288,7 @@ static inline void udp_lib_init_sock(struct sock *sk) { struct udp_sock *up = udp_sk(sk); + sk->sk_drop_counters = &up->drop_counters; skb_queue_head_init(&up->reader_queue); INIT_HLIST_NODE(&up->tunnel_list); up->forward_threshold = sk->sk_rcvbuf >> 2; @@ -627,7 +628,7 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk, return segs; drop: - atomic_add(drop_count, &sk->sk_drops); + sk_drops_add(sk, drop_count); SNMP_ADD_STATS(__UDPX_MIB(sk, ipv4), UDP_MIB_INERRORS, drop_count); kfree_skb(skb); return NULL; diff --git a/include/trace/events/fib.h b/include/trace/events/fib.h index 20b914250ce9..feb28b359eff 100644 --- a/include/trace/events/fib.h +++ b/include/trace/events/fib.h @@ -7,6 +7,8 @@ #include <linux/skbuff.h> #include <linux/netdevice.h> +#include <net/flow.h> +#include <net/inet_dscp.h> #include <net/ip_fib.h> #include <linux/tracepoint.h> @@ -44,7 +46,7 @@ TRACE_EVENT(fib_table_lookup, __entry->err = err; __entry->oif = flp->flowi4_oif; __entry->iif = flp->flowi4_iif; - __entry->tos = flp->flowi4_tos; + __entry->tos = inet_dscp_to_dsfield(flp->flowi4_dscp); __entry->scope = flp->flowi4_scope; __entry->flags = flp->flowi4_flags; diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 9fcb25a0f447..bcad11a787a5 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -636,6 +636,8 @@ enum devlink_attr { DEVLINK_ATTR_RATE_TC_BWS, /* nested */ + DEVLINK_ATTR_HEALTH_REPORTER_BURST_PERIOD, /* u64 */ + /* Add new attributes above here, update the spec in * Documentation/netlink/specs/devlink.yaml and re-generate * net/devlink/netlink_gen.c. diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 9e9afdd1238a..8bd5ea5469d9 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -2380,6 +2380,7 @@ enum { #define RXH_L4_B_0_1 (1 << 6) /* src port in case of TCP/UDP/SCTP */ #define RXH_L4_B_2_3 (1 << 7) /* dst port in case of TCP/UDP/SCTP */ #define RXH_GTP_TEID (1 << 8) /* teid in case of GTP */ +#define RXH_IP6_FL (1 << 9) /* IPv6 flow label */ #define RXH_DISCARD (1 << 31) #define RX_CLS_FLOW_DISC 0xffffffffffffffffULL diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h index b87df1b485c2..9a28f7d9a334 100644 --- a/include/uapi/linux/stddef.h +++ b/include/uapi/linux/stddef.h @@ -2,7 +2,9 @@ #ifndef _UAPI_LINUX_STDDEF_H #define _UAPI_LINUX_STDDEF_H +#ifdef __KERNEL__ #include <linux/compiler_types.h> +#endif #ifndef __always_inline #define __always_inline inline diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index e5ff49f3425e..319eddfd30e0 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c @@ -12,6 +12,7 @@ #include <net/page_pool/helpers.h> #include <net/page_pool/memory_provider.h> #include <net/netlink.h> +#include <net/netdev_queues.h> #include <net/netdev_rx_queue.h> #include <net/tcp.h> #include <net/rps.h> @@ -599,7 +600,7 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx, goto err; } - ifq->dev = ifq->netdev->dev.parent; + ifq->dev = netdev_queue_get_dma_dev(ifq->netdev, ifq->if_rxq); if (!ifq->dev) { ret = -EOPNOTSUPP; goto err; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 6b4877e85a68..cdffd74ddbe6 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1780,6 +1780,9 @@ static int __bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr_kern *s return __bpf_skb_load_bytes(src->data, src->offset + offset, dst, len); case BPF_DYNPTR_TYPE_XDP: return __bpf_xdp_load_bytes(src->data, src->offset + offset, dst, len); + case BPF_DYNPTR_TYPE_SKB_META: + memmove(dst, bpf_skb_meta_pointer(src->data, src->offset + offset), len); + return 0; default: WARN_ONCE(true, "bpf_dynptr_read: unknown dynptr type %d\n", type); return -EFAULT; @@ -1836,6 +1839,11 @@ int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset, void *src, if (flags) return -EINVAL; return __bpf_xdp_store_bytes(dst->data, dst->offset + offset, src, len); + case BPF_DYNPTR_TYPE_SKB_META: + if (flags) + return -EINVAL; + memmove(bpf_skb_meta_pointer(dst->data, dst->offset + offset), src, len); + return 0; default: WARN_ONCE(true, "bpf_dynptr_write: unknown dynptr type %d\n", type); return -EFAULT; @@ -1882,6 +1890,7 @@ BPF_CALL_3(bpf_dynptr_data, const struct bpf_dynptr_kern *, ptr, u32, offset, u3 return (unsigned long)(ptr->data + ptr->offset + offset); case BPF_DYNPTR_TYPE_SKB: case BPF_DYNPTR_TYPE_XDP: + case BPF_DYNPTR_TYPE_SKB_META: /* skb and xdp dynptrs should use bpf_dynptr_slice / bpf_dynptr_slice_rdwr */ return 0; default: @@ -2710,6 +2719,8 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr *p, u32 offset, bpf_xdp_copy_buf(ptr->data, ptr->offset + offset, buffer__opt, len, false); return buffer__opt; } + case BPF_DYNPTR_TYPE_SKB_META: + return bpf_skb_meta_pointer(ptr->data, ptr->offset + offset); default: WARN_ONCE(true, "unknown dynptr type %d\n", type); return NULL; diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c index 38050f4ee400..e4983c1303e7 100644 --- a/kernel/bpf/log.c +++ b/kernel/bpf/log.c @@ -498,6 +498,8 @@ const char *dynptr_type_str(enum bpf_dynptr_type type) return "skb"; case BPF_DYNPTR_TYPE_XDP: return "xdp"; + case BPF_DYNPTR_TYPE_SKB_META: + return "skb_meta"; case BPF_DYNPTR_TYPE_INVALID: return "<invalid>"; default: diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index c4f69a9e9af6..5964bed40ffb 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -674,6 +674,8 @@ static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type) return BPF_DYNPTR_TYPE_SKB; case DYNPTR_TYPE_XDP: return BPF_DYNPTR_TYPE_XDP; + case DYNPTR_TYPE_SKB_META: + return BPF_DYNPTR_TYPE_SKB_META; default: return BPF_DYNPTR_TYPE_INVALID; } @@ -690,6 +692,8 @@ static enum bpf_type_flag get_dynptr_type_flag(enum bpf_dynptr_type type) return DYNPTR_TYPE_SKB; case BPF_DYNPTR_TYPE_XDP: return DYNPTR_TYPE_XDP; + case BPF_DYNPTR_TYPE_SKB_META: + return DYNPTR_TYPE_SKB_META; default: return 0; } @@ -2274,7 +2278,8 @@ static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg) static bool reg_is_dynptr_slice_pkt(const struct bpf_reg_state *reg) { return base_type(reg->type) == PTR_TO_MEM && - (reg->type & DYNPTR_TYPE_SKB || reg->type & DYNPTR_TYPE_XDP); + (reg->type & + (DYNPTR_TYPE_SKB | DYNPTR_TYPE_XDP | DYNPTR_TYPE_SKB_META)); } /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */ @@ -11641,7 +11646,8 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn if (dynptr_type == BPF_DYNPTR_TYPE_INVALID) return -EFAULT; - if (dynptr_type == BPF_DYNPTR_TYPE_SKB) + if (dynptr_type == BPF_DYNPTR_TYPE_SKB || + dynptr_type == BPF_DYNPTR_TYPE_SKB_META) /* this will trigger clear_all_pkt_pointers(), which will * invalidate all dynptr slices associated with the skb */ @@ -12228,6 +12234,7 @@ enum special_kfunc_type { KF_bpf_rbtree_right, KF_bpf_dynptr_from_skb, KF_bpf_dynptr_from_xdp, + KF_bpf_dynptr_from_skb_meta, KF_bpf_dynptr_slice, KF_bpf_dynptr_slice_rdwr, KF_bpf_dynptr_clone, @@ -12277,9 +12284,11 @@ BTF_ID(func, bpf_rbtree_right) #ifdef CONFIG_NET BTF_ID(func, bpf_dynptr_from_skb) BTF_ID(func, bpf_dynptr_from_xdp) +BTF_ID(func, bpf_dynptr_from_skb_meta) #else BTF_ID_UNUSED BTF_ID_UNUSED +BTF_ID_UNUSED #endif BTF_ID(func, bpf_dynptr_slice) BTF_ID(func, bpf_dynptr_slice_rdwr) @@ -13253,6 +13262,8 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ dynptr_arg_type |= DYNPTR_TYPE_SKB; } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_xdp]) { dynptr_arg_type |= DYNPTR_TYPE_XDP; + } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb_meta]) { + dynptr_arg_type |= DYNPTR_TYPE_SKB_META; } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_clone] && (dynptr_arg_type & MEM_UNINIT)) { enum bpf_dynptr_type parent_type = meta->initialized_dynptr.type; diff --git a/kernel/time/time.c b/kernel/time/time.c index 1b69caa87480..0ba8e3c50d62 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c @@ -858,6 +858,7 @@ struct timespec64 timespec64_add_safe(const struct timespec64 lhs, return res; } +EXPORT_SYMBOL_GPL(timespec64_add_safe); /** * get_timespec64 - get user's time value into kernel space diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 8dd7fbed5a94..df3e9205c9e6 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5020,22 +5020,42 @@ out: void mem_cgroup_sk_free(struct sock *sk) { - if (sk->sk_memcg) - css_put(&sk->sk_memcg->css); + struct mem_cgroup *memcg = mem_cgroup_from_sk(sk); + + if (memcg) + css_put(&memcg->css); +} + +void mem_cgroup_sk_inherit(const struct sock *sk, struct sock *newsk) +{ + struct mem_cgroup *memcg; + + if (sk->sk_memcg == newsk->sk_memcg) + return; + + mem_cgroup_sk_free(newsk); + + memcg = mem_cgroup_from_sk(sk); + if (memcg) + css_get(&memcg->css); + + newsk->sk_memcg = sk->sk_memcg; } /** - * mem_cgroup_charge_skmem - charge socket memory - * @memcg: memcg to charge + * mem_cgroup_sk_charge - charge socket memory + * @sk: socket in memcg to charge * @nr_pages: number of pages to charge * @gfp_mask: reclaim mode * * Charges @nr_pages to @memcg. Returns %true if the charge fit within * @memcg's configured limit, %false if it doesn't. */ -bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages, - gfp_t gfp_mask) +bool mem_cgroup_sk_charge(const struct sock *sk, unsigned int nr_pages, + gfp_t gfp_mask) { + struct mem_cgroup *memcg = mem_cgroup_from_sk(sk); + if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) return memcg1_charge_skmem(memcg, nr_pages, gfp_mask); @@ -5048,12 +5068,14 @@ bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages, } /** - * mem_cgroup_uncharge_skmem - uncharge socket memory - * @memcg: memcg to uncharge + * mem_cgroup_sk_uncharge - uncharge socket memory + * @sk: socket in memcg to uncharge * @nr_pages: number of pages to uncharge */ -void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages) +void mem_cgroup_sk_uncharge(const struct sock *sk, unsigned int nr_pages) { + struct mem_cgroup *memcg = mem_cgroup_from_sk(sk); + if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) { memcg1_uncharge_skmem(memcg, nr_pages); return; diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 29097e984b4f..870bdf2e082c 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -148,7 +148,8 @@ void br_forward(const struct net_bridge_port *to, goto out; /* redirect to backup link if the destination port is down */ - if (rcu_access_pointer(to->backup_port) && !netif_carrier_ok(to->dev)) { + if (rcu_access_pointer(to->backup_port) && + (!netif_carrier_ok(to->dev) || !netif_running(to->dev))) { struct net_bridge_port *backup_port; backup_port = rcu_dereference(to->backup_port); diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 8ce145938b02..22d12e545966 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -4049,8 +4049,7 @@ int br_multicast_rcv(struct net_bridge_mcast **brmctx, } static void br_multicast_query_expired(struct net_bridge_mcast *brmctx, - struct bridge_mcast_own_query *query, - struct bridge_mcast_querier *querier) + struct bridge_mcast_own_query *query) { spin_lock(&brmctx->br->multicast_lock); if (br_multicast_ctx_vlan_disabled(brmctx)) @@ -4069,8 +4068,7 @@ static void br_ip4_multicast_query_expired(struct timer_list *t) struct net_bridge_mcast *brmctx = timer_container_of(brmctx, t, ip4_own_query.timer); - br_multicast_query_expired(brmctx, &brmctx->ip4_own_query, - &brmctx->ip4_querier); + br_multicast_query_expired(brmctx, &brmctx->ip4_own_query); } #if IS_ENABLED(CONFIG_IPV6) @@ -4079,8 +4077,7 @@ static void br_ip6_multicast_query_expired(struct timer_list *t) struct net_bridge_mcast *brmctx = timer_container_of(brmctx, t, ip6_own_query.timer); - br_multicast_query_expired(brmctx, &brmctx->ip6_own_query, - &brmctx->ip6_querier); + br_multicast_query_expired(brmctx, &brmctx->ip6_own_query); } #endif diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 3e67d4aff419..5697e3949a36 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -920,8 +920,8 @@ static int translate_table(struct net *net, const char *name, * if an error occurs */ newinfo->chainstack = - vmalloc(array_size(nr_cpu_ids, - sizeof(*(newinfo->chainstack)))); + vmalloc_array(nr_cpu_ids, + sizeof(*(newinfo->chainstack))); if (!newinfo->chainstack) return -ENOMEM; for_each_possible_cpu(i) { @@ -938,7 +938,7 @@ static int translate_table(struct net *net, const char *name, } } - cl_s = vmalloc(array_size(udc_cnt, sizeof(*cl_s))); + cl_s = vmalloc_array(udc_cnt, sizeof(*cl_s)); if (!cl_s) return -ENOMEM; i = 0; /* the i'th udc */ @@ -1018,8 +1018,8 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl, * the check on the size is done later, when we have the lock */ if (repl->num_counters) { - unsigned long size = repl->num_counters * sizeof(*counterstmp); - counterstmp = vmalloc(size); + counterstmp = vmalloc_array(repl->num_counters, + sizeof(*counterstmp)); if (!counterstmp) return -ENOMEM; } @@ -1386,7 +1386,7 @@ static int do_update_counters(struct net *net, const char *name, if (num_counters == 0) return -EINVAL; - tmp = vmalloc(array_size(num_counters, sizeof(*tmp))); + tmp = vmalloc_array(num_counters, sizeof(*tmp)); if (!tmp) return -ENOMEM; @@ -1526,7 +1526,7 @@ static int copy_counters_to_user(struct ebt_table *t, if (num_counters != nentries) return -EINVAL; - counterstmp = vmalloc(array_size(nentries, sizeof(*counterstmp))); + counterstmp = vmalloc_array(nentries, sizeof(*counterstmp)); if (!counterstmp) return -ENOMEM; diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c index 06b604cf9d58..2aa1e7d46eb2 100644 --- a/net/caif/cfctrl.c +++ b/net/caif/cfctrl.c @@ -257,9 +257,7 @@ int cfctrl_linkup_request(struct cflayer *layer, cfpkt_add_body(pkt, &tmp16, 2); tmp16 = cpu_to_le16(param->u.utility.fifosize_bufs); cfpkt_add_body(pkt, &tmp16, 2); - memset(utility_name, 0, sizeof(utility_name)); - strscpy(utility_name, param->u.utility.name, - UTILITY_NAME_LENGTH); + strscpy_pad(utility_name, param->u.utility.name); cfpkt_add_body(pkt, utility_name, UTILITY_NAME_LENGTH); tmp8 = param->u.utility.paramlen; cfpkt_add_body(pkt, &tmp8, 1); diff --git a/net/core/Makefile b/net/core/Makefile index b2a76ce33932..9ef2099c5426 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_NETDEV_ADDR_LIST_TEST) += dev_addr_lists_test.o obj-y += net-sysfs.o obj-y += hotdata.o obj-y += netdev_rx_queue.o +obj-y += netdev_queues.o obj-$(CONFIG_PAGE_POOL) += page_pool.o page_pool_user.o obj-$(CONFIG_PROC_FS) += net-procfs.o obj-$(CONFIG_NET_PKTGEN) += pktgen.o diff --git a/net/core/datagram.c b/net/core/datagram.c index f474b9b120f9..cb4b9ef2e4e3 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -345,7 +345,7 @@ int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue, spin_unlock_bh(&sk_queue->lock); } - atomic_inc(&sk->sk_drops); + sk_drops_inc(sk); return err; } EXPORT_SYMBOL(__sk_queue_drop_skb); diff --git a/net/core/dev.c b/net/core/dev.c index 93a25d87b86b..1d1650d9ecff 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4849,9 +4849,40 @@ static u32 rfs_slot(u32 hash, const struct rps_dev_flow_table *flow_table) return hash_32(hash, flow_table->log); } +#ifdef CONFIG_RFS_ACCEL +/** + * rps_flow_is_active - check whether the flow is recently active. + * @rflow: Specific flow to check activity. + * @flow_table: per-queue flowtable that @rflow belongs to. + * @cpu: CPU saved in @rflow. + * + * If the CPU has processed many packets since the flow's last activity + * (beyond 10 times the table size), the flow is considered stale. + * + * Return: true if flow was recently active. + */ +static bool rps_flow_is_active(struct rps_dev_flow *rflow, + struct rps_dev_flow_table *flow_table, + unsigned int cpu) +{ + unsigned int flow_last_active; + unsigned int sd_input_head; + + if (cpu >= nr_cpu_ids) + return false; + + sd_input_head = READ_ONCE(per_cpu(softnet_data, cpu).input_queue_head); + flow_last_active = READ_ONCE(rflow->last_qtail); + + return (int)(sd_input_head - flow_last_active) < + (int)(10 << flow_table->log); +} +#endif + static struct rps_dev_flow * set_rps_cpu(struct net_device *dev, struct sk_buff *skb, - struct rps_dev_flow *rflow, u16 next_cpu) + struct rps_dev_flow *rflow, u16 next_cpu, u32 hash, + u32 flow_id) { if (next_cpu < nr_cpu_ids) { u32 head; @@ -4859,8 +4890,9 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb, struct netdev_rx_queue *rxqueue; struct rps_dev_flow_table *flow_table; struct rps_dev_flow *old_rflow; + struct rps_dev_flow *tmp_rflow; + unsigned int tmp_cpu; u16 rxq_index; - u32 flow_id; int rc; /* Should we steer this flow to a different hardware queue? */ @@ -4875,14 +4907,29 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb, flow_table = rcu_dereference(rxqueue->rps_flow_table); if (!flow_table) goto out; - flow_id = rfs_slot(skb_get_hash(skb), flow_table); + + tmp_rflow = &flow_table->flows[flow_id]; + tmp_cpu = READ_ONCE(tmp_rflow->cpu); + + if (READ_ONCE(tmp_rflow->filter) != RPS_NO_FILTER) { + if (rps_flow_is_active(tmp_rflow, flow_table, + tmp_cpu)) { + if (hash != READ_ONCE(tmp_rflow->hash) || + next_cpu == tmp_cpu) + goto out; + } + } + rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb, rxq_index, flow_id); if (rc < 0) goto out; + old_rflow = rflow; - rflow = &flow_table->flows[flow_id]; + rflow = tmp_rflow; WRITE_ONCE(rflow->filter, rc); + WRITE_ONCE(rflow->hash, hash); + if (old_rflow->filter == rc) WRITE_ONCE(old_rflow->filter, RPS_NO_FILTER); out: @@ -4908,6 +4955,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, struct rps_dev_flow_table *flow_table; struct rps_map *map; int cpu = -1; + u32 flow_id; u32 tcpu; u32 hash; @@ -4954,7 +5002,8 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, /* OK, now we know there is a match, * we can look at the local (per receive queue) flow table */ - rflow = &flow_table->flows[rfs_slot(hash, flow_table)]; + flow_id = rfs_slot(hash, flow_table); + rflow = &flow_table->flows[flow_id]; tcpu = rflow->cpu; /* @@ -4973,7 +5022,8 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, ((int)(READ_ONCE(per_cpu(softnet_data, tcpu).input_queue_head) - rflow->last_qtail)) >= 0)) { tcpu = next_cpu; - rflow = set_rps_cpu(dev, skb, rflow, next_cpu); + rflow = set_rps_cpu(dev, skb, rflow, next_cpu, hash, + flow_id); } if (tcpu < nr_cpu_ids && cpu_online(tcpu)) { @@ -5017,17 +5067,16 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, struct rps_dev_flow_table *flow_table; struct rps_dev_flow *rflow; bool expire = true; - unsigned int cpu; rcu_read_lock(); flow_table = rcu_dereference(rxqueue->rps_flow_table); if (flow_table && flow_id < (1UL << flow_table->log)) { + unsigned int cpu; + rflow = &flow_table->flows[flow_id]; cpu = READ_ONCE(rflow->cpu); - if (READ_ONCE(rflow->filter) == filter_id && cpu < nr_cpu_ids && - ((int)(READ_ONCE(per_cpu(softnet_data, cpu).input_queue_head) - - READ_ONCE(rflow->last_qtail)) < - (int)(10 << flow_table->log))) + if (READ_ONCE(rflow->filter) == filter_id && + rps_flow_is_active(rflow, flow_table, cpu)) expire = false; } rcu_read_unlock(); diff --git a/net/core/devmem.c b/net/core/devmem.c index 24c591ab38ae..d9de31a6cc7f 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -176,6 +176,7 @@ err_close_rxq: struct net_devmem_dmabuf_binding * net_devmem_bind_dmabuf(struct net_device *dev, + struct device *dma_dev, enum dma_data_direction direction, unsigned int dmabuf_fd, struct netdev_nl_sock *priv, struct netlink_ext_ack *extack) @@ -188,6 +189,11 @@ net_devmem_bind_dmabuf(struct net_device *dev, unsigned long virtual; int err; + if (!dma_dev) { + NL_SET_ERR_MSG(extack, "Device doesn't support DMA"); + return ERR_PTR(-EOPNOTSUPP); + } + dmabuf = dma_buf_get(dmabuf_fd); if (IS_ERR(dmabuf)) return ERR_CAST(dmabuf); @@ -209,7 +215,7 @@ net_devmem_bind_dmabuf(struct net_device *dev, binding->dmabuf = dmabuf; binding->direction = direction; - binding->attachment = dma_buf_attach(binding->dmabuf, dev->dev.parent); + binding->attachment = dma_buf_attach(binding->dmabuf, dma_dev); if (IS_ERR(binding->attachment)) { err = PTR_ERR(binding->attachment); NL_SET_ERR_MSG(extack, "Failed to bind dmabuf to device"); diff --git a/net/core/devmem.h b/net/core/devmem.h index 41cd6e1c9141..101150d761af 100644 --- a/net/core/devmem.h +++ b/net/core/devmem.h @@ -85,6 +85,7 @@ struct dmabuf_genpool_chunk_owner { void __net_devmem_dmabuf_binding_free(struct work_struct *wq); struct net_devmem_dmabuf_binding * net_devmem_bind_dmabuf(struct net_device *dev, + struct device *dma_dev, enum dma_data_direction direction, unsigned int dmabuf_fd, struct netdev_nl_sock *priv, struct netlink_ext_ack *extack); @@ -170,6 +171,7 @@ static inline void net_devmem_put_net_iov(struct net_iov *niov) static inline struct net_devmem_dmabuf_binding * net_devmem_bind_dmabuf(struct net_device *dev, + struct device *dma_dev, enum dma_data_direction direction, unsigned int dmabuf_fd, struct netdev_nl_sock *priv, diff --git a/net/core/dst.c b/net/core/dst.c index e2de8b68c41d..e9d35f49c9e7 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -150,7 +150,7 @@ void dst_dev_put(struct dst_entry *dst) dst->ops->ifdown(dst, dev); WRITE_ONCE(dst->input, dst_discard); WRITE_ONCE(dst->output, dst_discard_out); - WRITE_ONCE(dst->dev, blackhole_netdev); + rcu_assign_pointer(dst->dev_rcu, blackhole_netdev); netdev_ref_replace(dev, blackhole_netdev, &dst->dev_tracker, GFP_ATOMIC); } diff --git a/net/core/filter.c b/net/core/filter.c index da391e2b0788..b005363f482c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2373,7 +2373,7 @@ static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev, struct flowi4 fl4 = { .flowi4_flags = FLOWI_FLAG_ANYSRC, .flowi4_mark = skb->mark, - .flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(ip4h)), + .flowi4_dscp = ip4h_dscp(ip4h), .flowi4_oif = dev->ifindex, .flowi4_proto = ip4h->protocol, .daddr = ip4h->daddr, @@ -6020,7 +6020,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, fl4.flowi4_iif = params->ifindex; fl4.flowi4_oif = 0; } - fl4.flowi4_tos = params->tos & INET_DSCP_MASK; + fl4.flowi4_dscp = inet_dsfield_to_dscp(params->tos); fl4.flowi4_scope = RT_SCOPE_UNIVERSE; fl4.flowi4_flags = 0; @@ -6767,7 +6767,6 @@ static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = { static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple, int dif, int sdif, u8 family, u8 proto) { - struct inet_hashinfo *hinfo = net->ipv4.tcp_death_row.hashinfo; bool refcounted = false; struct sock *sk = NULL; @@ -6776,7 +6775,7 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple, __be32 dst4 = tuple->ipv4.daddr; if (proto == IPPROTO_TCP) - sk = __inet_lookup(net, hinfo, NULL, 0, + sk = __inet_lookup(net, NULL, 0, src4, tuple->ipv4.sport, dst4, tuple->ipv4.dport, dif, sdif, &refcounted); @@ -6790,7 +6789,7 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple, struct in6_addr *dst6 = (struct in6_addr *)&tuple->ipv6.daddr; if (proto == IPPROTO_TCP) - sk = __inet6_lookup(net, hinfo, NULL, 0, + sk = __inet6_lookup(net, NULL, 0, src6, tuple->ipv6.sport, dst6, ntohs(tuple->ipv6.dport), dif, sdif, &refcounted); @@ -11990,6 +11989,16 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return func; } +/** + * bpf_skb_meta_pointer() - Gets a mutable pointer within the skb metadata area. + * @skb: socket buffer carrying the metadata + * @offset: offset into the metadata area, must be <= skb_metadata_len() + */ +void *bpf_skb_meta_pointer(struct sk_buff *skb, u32 offset) +{ + return skb_metadata_end(skb) - skb_metadata_len(skb) + offset; +} + __bpf_kfunc_start_defs(); __bpf_kfunc int bpf_dynptr_from_skb(struct __sk_buff *s, u64 flags, struct bpf_dynptr *ptr__uninit) @@ -12007,6 +12016,42 @@ __bpf_kfunc int bpf_dynptr_from_skb(struct __sk_buff *s, u64 flags, return 0; } +/** + * bpf_dynptr_from_skb_meta() - Initialize a dynptr to the skb metadata area. + * @skb_: socket buffer carrying the metadata + * @flags: future use, must be zero + * @ptr__uninit: dynptr to initialize + * + * Set up a dynptr for access to the metadata area earlier allocated from the + * XDP context with bpf_xdp_adjust_meta(). Serves as an alternative to + * &__sk_buff->data_meta. + * + * If passed @skb_ is a clone which shares the data with the original, the + * dynptr will be read-only. This limitation may be lifted in the future. + * + * Return: + * * %0 - dynptr ready to use + * * %-EINVAL - invalid flags, dynptr set to null + */ +__bpf_kfunc int bpf_dynptr_from_skb_meta(struct __sk_buff *skb_, u64 flags, + struct bpf_dynptr *ptr__uninit) +{ + struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)ptr__uninit; + struct sk_buff *skb = (struct sk_buff *)skb_; + + if (flags) { + bpf_dynptr_set_null(ptr); + return -EINVAL; + } + + bpf_dynptr_init(ptr, skb, BPF_DYNPTR_TYPE_SKB_META, 0, skb_metadata_len(skb)); + + if (skb_cloned(skb)) + bpf_dynptr_set_rdonly(ptr); + + return 0; +} + __bpf_kfunc int bpf_dynptr_from_xdp(struct xdp_md *x, u64 flags, struct bpf_dynptr *ptr__uninit) { @@ -12181,6 +12226,10 @@ BTF_KFUNCS_START(bpf_kfunc_check_set_skb) BTF_ID_FLAGS(func, bpf_dynptr_from_skb, KF_TRUSTED_ARGS) BTF_KFUNCS_END(bpf_kfunc_check_set_skb) +BTF_KFUNCS_START(bpf_kfunc_check_set_skb_meta) +BTF_ID_FLAGS(func, bpf_dynptr_from_skb_meta, KF_TRUSTED_ARGS) +BTF_KFUNCS_END(bpf_kfunc_check_set_skb_meta) + BTF_KFUNCS_START(bpf_kfunc_check_set_xdp) BTF_ID_FLAGS(func, bpf_dynptr_from_xdp) BTF_KFUNCS_END(bpf_kfunc_check_set_xdp) @@ -12202,6 +12251,11 @@ static const struct btf_kfunc_id_set bpf_kfunc_set_skb = { .set = &bpf_kfunc_check_set_skb, }; +static const struct btf_kfunc_id_set bpf_kfunc_set_skb_meta = { + .owner = THIS_MODULE, + .set = &bpf_kfunc_check_set_skb_meta, +}; + static const struct btf_kfunc_id_set bpf_kfunc_set_xdp = { .owner = THIS_MODULE, .set = &bpf_kfunc_check_set_xdp, @@ -12237,6 +12291,8 @@ static int __init bpf_kfunc_init(void) ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_SEG6LOCAL, &bpf_kfunc_set_skb); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_NETFILTER, &bpf_kfunc_set_skb); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_kfunc_set_skb); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_kfunc_set_skb_meta); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_ACT, &bpf_kfunc_set_skb_meta); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, &bpf_kfunc_set_sock_addr); diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index ae74634310a3..9f40be0c3e71 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -8,12 +8,12 @@ #include <linux/skbuff.h> #include <linux/types.h> #include <linux/bpf.h> +#include <net/flow.h> #include <net/lwtunnel.h> #include <net/gre.h> #include <net/ip.h> #include <net/ip6_route.h> #include <net/ipv6_stubs.h> -#include <net/inet_dscp.h> struct bpf_lwt_prog { struct bpf_prog *prog; @@ -209,7 +209,7 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb) fl4.flowi4_oif = oif; fl4.flowi4_mark = skb->mark; fl4.flowi4_uid = sock_net_uid(net, sk); - fl4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph)); + fl4.flowi4_dscp = ip4h_dscp(iph); fl4.flowi4_flags = FLOWI_FLAG_ANYSRC; fl4.flowi4_proto = iph->protocol; fl4.daddr = iph->daddr; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index c28cd6665444..5ea9f64adce3 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1120,8 +1120,10 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, return -ENOMEM; table->log = ilog2(mask) + 1; - for (count = 0; count <= mask; count++) + for (count = 0; count <= mask; count++) { table->flows[count].cpu = RPS_NO_CPU; + table->flows[count].filter = RPS_NO_FILTER; + } } else { table = NULL; } diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 6314eb7bdf69..470fabbeacd9 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -869,16 +869,79 @@ int netdev_nl_qstats_get_dumpit(struct sk_buff *skb, return err; } -int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info) +static int netdev_nl_read_rxq_bitmap(struct genl_info *info, + u32 rxq_bitmap_len, + unsigned long *rxq_bitmap) { + const int maxtype = ARRAY_SIZE(netdev_queue_id_nl_policy) - 1; struct nlattr *tb[ARRAY_SIZE(netdev_queue_id_nl_policy)]; + struct nlattr *attr; + int rem, err = 0; + u32 rxq_idx; + + nla_for_each_attr_type(attr, NETDEV_A_DMABUF_QUEUES, + genlmsg_data(info->genlhdr), + genlmsg_len(info->genlhdr), rem) { + err = nla_parse_nested(tb, maxtype, attr, + netdev_queue_id_nl_policy, info->extack); + if (err < 0) + return err; + + if (NL_REQ_ATTR_CHECK(info->extack, attr, tb, NETDEV_A_QUEUE_ID) || + NL_REQ_ATTR_CHECK(info->extack, attr, tb, NETDEV_A_QUEUE_TYPE)) + return -EINVAL; + + if (nla_get_u32(tb[NETDEV_A_QUEUE_TYPE]) != NETDEV_QUEUE_TYPE_RX) { + NL_SET_BAD_ATTR(info->extack, tb[NETDEV_A_QUEUE_TYPE]); + return -EINVAL; + } + + rxq_idx = nla_get_u32(tb[NETDEV_A_QUEUE_ID]); + if (rxq_idx >= rxq_bitmap_len) { + NL_SET_BAD_ATTR(info->extack, tb[NETDEV_A_QUEUE_ID]); + return -EINVAL; + } + + bitmap_set(rxq_bitmap, rxq_idx, 1); + } + + return 0; +} + +static struct device * +netdev_nl_get_dma_dev(struct net_device *netdev, unsigned long *rxq_bitmap, + struct netlink_ext_ack *extack) +{ + struct device *dma_dev = NULL; + u32 rxq_idx, prev_rxq_idx; + + for_each_set_bit(rxq_idx, rxq_bitmap, netdev->real_num_rx_queues) { + struct device *rxq_dma_dev; + + rxq_dma_dev = netdev_queue_get_dma_dev(netdev, rxq_idx); + if (dma_dev && rxq_dma_dev != dma_dev) { + NL_SET_ERR_MSG_FMT(extack, "DMA device mismatch between queue %u and %u (multi-PF device?)", + rxq_idx, prev_rxq_idx); + return ERR_PTR(-EOPNOTSUPP); + } + + dma_dev = rxq_dma_dev; + prev_rxq_idx = rxq_idx; + } + + return dma_dev; +} + +int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info) +{ struct net_devmem_dmabuf_binding *binding; u32 ifindex, dmabuf_fd, rxq_idx; struct netdev_nl_sock *priv; struct net_device *netdev; + unsigned long *rxq_bitmap; + struct device *dma_dev; struct sk_buff *rsp; - struct nlattr *attr; - int rem, err = 0; + int err = 0; void *hdr; if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_DEV_IFINDEX) || @@ -921,36 +984,31 @@ int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info) goto err_unlock; } - binding = net_devmem_bind_dmabuf(netdev, DMA_FROM_DEVICE, dmabuf_fd, - priv, info->extack); - if (IS_ERR(binding)) { - err = PTR_ERR(binding); + rxq_bitmap = bitmap_zalloc(netdev->real_num_rx_queues, GFP_KERNEL); + if (!rxq_bitmap) { + err = -ENOMEM; goto err_unlock; } - nla_for_each_attr_type(attr, NETDEV_A_DMABUF_QUEUES, - genlmsg_data(info->genlhdr), - genlmsg_len(info->genlhdr), rem) { - err = nla_parse_nested( - tb, ARRAY_SIZE(netdev_queue_id_nl_policy) - 1, attr, - netdev_queue_id_nl_policy, info->extack); - if (err < 0) - goto err_unbind; - - if (NL_REQ_ATTR_CHECK(info->extack, attr, tb, NETDEV_A_QUEUE_ID) || - NL_REQ_ATTR_CHECK(info->extack, attr, tb, NETDEV_A_QUEUE_TYPE)) { - err = -EINVAL; - goto err_unbind; - } + err = netdev_nl_read_rxq_bitmap(info, netdev->real_num_rx_queues, + rxq_bitmap); + if (err) + goto err_rxq_bitmap; - if (nla_get_u32(tb[NETDEV_A_QUEUE_TYPE]) != NETDEV_QUEUE_TYPE_RX) { - NL_SET_BAD_ATTR(info->extack, tb[NETDEV_A_QUEUE_TYPE]); - err = -EINVAL; - goto err_unbind; - } + dma_dev = netdev_nl_get_dma_dev(netdev, rxq_bitmap, info->extack); + if (IS_ERR(dma_dev)) { + err = PTR_ERR(dma_dev); + goto err_rxq_bitmap; + } - rxq_idx = nla_get_u32(tb[NETDEV_A_QUEUE_ID]); + binding = net_devmem_bind_dmabuf(netdev, dma_dev, DMA_FROM_DEVICE, + dmabuf_fd, priv, info->extack); + if (IS_ERR(binding)) { + err = PTR_ERR(binding); + goto err_rxq_bitmap; + } + for_each_set_bit(rxq_idx, rxq_bitmap, netdev->real_num_rx_queues) { err = net_devmem_bind_dmabuf_to_queue(netdev, rxq_idx, binding, info->extack); if (err) @@ -964,6 +1022,8 @@ int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info) if (err) goto err_unbind; + bitmap_free(rxq_bitmap); + netdev_unlock(netdev); mutex_unlock(&priv->lock); @@ -972,6 +1032,8 @@ int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info) err_unbind: net_devmem_unbind_dmabuf(binding); +err_rxq_bitmap: + bitmap_free(rxq_bitmap); err_unlock: netdev_unlock(netdev); err_unlock_sock: @@ -986,6 +1048,7 @@ int netdev_nl_bind_tx_doit(struct sk_buff *skb, struct genl_info *info) struct net_devmem_dmabuf_binding *binding; struct netdev_nl_sock *priv; struct net_device *netdev; + struct device *dma_dev; u32 ifindex, dmabuf_fd; struct sk_buff *rsp; int err = 0; @@ -1032,8 +1095,9 @@ int netdev_nl_bind_tx_doit(struct sk_buff *skb, struct genl_info *info) goto err_unlock_netdev; } - binding = net_devmem_bind_dmabuf(netdev, DMA_TO_DEVICE, dmabuf_fd, priv, - info->extack); + dma_dev = netdev_queue_get_dma_dev(netdev, 0); + binding = net_devmem_bind_dmabuf(netdev, dma_dev, DMA_TO_DEVICE, + dmabuf_fd, priv, info->extack); if (IS_ERR(binding)) { err = PTR_ERR(binding); goto err_unlock_netdev; diff --git a/net/core/netdev_queues.c b/net/core/netdev_queues.c new file mode 100644 index 000000000000..251f27a8307f --- /dev/null +++ b/net/core/netdev_queues.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <net/netdev_queues.h> + +/** + * netdev_queue_get_dma_dev() - get dma device for zero-copy operations + * @dev: net_device + * @idx: queue index + * + * Get dma device for zero-copy operations to be used for this queue. + * When such device is not available or valid, the function will return NULL. + * + * Return: Device or NULL on error + */ +struct device *netdev_queue_get_dma_dev(struct net_device *dev, int idx) +{ + const struct netdev_queue_mgmt_ops *queue_ops = dev->queue_mgmt_ops; + struct device *dma_dev; + + if (queue_ops && queue_ops->ndo_queue_get_dma_dev) + dma_dev = queue_ops->ndo_queue_get_dma_dev(dev, idx); + else + dma_dev = dev->dev.parent; + + return dma_dev && dma_dev->dma_mask ? dma_dev : NULL; +} + diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c index 3bf1151d8061..c7d9341b7630 100644 --- a/net/core/netdev_rx_queue.c +++ b/net/core/netdev_rx_queue.c @@ -9,6 +9,15 @@ #include "page_pool_priv.h" +/* See also page_pool_is_unreadable() */ +bool netif_rxq_has_unreadable_mp(struct net_device *dev, int idx) +{ + struct netdev_rx_queue *rxq = __netif_get_rx_queue(dev, idx); + + return !!rxq->mp_params.mp_ops; +} +EXPORT_SYMBOL(netif_rxq_has_unreadable_mp); + int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq_idx) { struct netdev_rx_queue *rxq = __netif_get_rx_queue(dev, rxq_idx); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 0ebe5461d4d9..d41b03fd1f63 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -114,6 +114,7 @@ #include <linux/sys.h> #include <linux/types.h> +#include <linux/minmax.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/kernel.h> @@ -2841,8 +2842,7 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, } i = 0; - frag_len = (datalen/frags) < PAGE_SIZE ? - (datalen/frags) : PAGE_SIZE; + frag_len = min_t(int, datalen / frags, PAGE_SIZE); while (datalen > 0) { if (unlikely(!pkt_dev->page)) { int node = numa_node_id(); @@ -2859,8 +2859,7 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, if (i == (frags - 1)) skb_frag_fill_page_desc(&skb_shinfo(skb)->frags[i], pkt_dev->page, 0, - (datalen < PAGE_SIZE ? - datalen : PAGE_SIZE)); + min(datalen, PAGE_SIZE)); else skb_frag_fill_page_desc(&skb_shinfo(skb)->frags[i], pkt_dev->page, 0, frag_len); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ee0274417948..23b776cd9879 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3112,7 +3112,9 @@ static bool __splice_segment(struct page *page, unsigned int poff, poff += flen; plen -= flen; *len -= flen; - } while (*len && plen); + if (!*len) + return true; + } while (plen); return false; } diff --git a/net/core/sock.c b/net/core/sock.c index 158bddd23134..02f31f21b4af 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -281,12 +281,12 @@ static struct lock_class_key af_elock_keys[AF_MAX]; static struct lock_class_key af_kern_callback_keys[AF_MAX]; /* Run time adjustable parameters. */ -__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX; +__u32 sysctl_wmem_max __read_mostly = 4 << 20; EXPORT_SYMBOL(sysctl_wmem_max); -__u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX; +__u32 sysctl_rmem_max __read_mostly = 4 << 20; EXPORT_SYMBOL(sysctl_rmem_max); -__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX; -__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; +__u32 sysctl_wmem_default __read_mostly = SK_WMEM_DEFAULT; +__u32 sysctl_rmem_default __read_mostly = SK_RMEM_DEFAULT; DEFINE_STATIC_KEY_FALSE(memalloc_socks_key); EXPORT_SYMBOL_GPL(memalloc_socks_key); @@ -491,13 +491,13 @@ int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) struct sk_buff_head *list = &sk->sk_receive_queue; if (atomic_read(&sk->sk_rmem_alloc) >= READ_ONCE(sk->sk_rcvbuf)) { - atomic_inc(&sk->sk_drops); + sk_drops_inc(sk); trace_sock_rcvqueue_full(sk, skb); return -ENOMEM; } if (!sk_rmem_schedule(sk, skb, skb->truesize)) { - atomic_inc(&sk->sk_drops); + sk_drops_inc(sk); return -ENOBUFS; } @@ -562,7 +562,7 @@ int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, skb->dev = NULL; if (sk_rcvqueues_full(sk, READ_ONCE(sk->sk_rcvbuf))) { - atomic_inc(&sk->sk_drops); + sk_drops_inc(sk); reason = SKB_DROP_REASON_SOCKET_RCVBUFF; goto discard_and_relse; } @@ -585,7 +585,7 @@ int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, reason = SKB_DROP_REASON_PFMEMALLOC; if (err == -ENOBUFS) reason = SKB_DROP_REASON_SOCKET_BACKLOG; - atomic_inc(&sk->sk_drops); + sk_drops_inc(sk); goto discard_and_relse; } @@ -1032,7 +1032,7 @@ static int sock_reserve_memory(struct sock *sk, int bytes) bool charged; int pages; - if (!mem_cgroup_sockets_enabled || !sk->sk_memcg || !sk_has_account(sk)) + if (!mem_cgroup_sk_enabled(sk) || !sk_has_account(sk)) return -EOPNOTSUPP; if (!bytes) @@ -1041,8 +1041,8 @@ static int sock_reserve_memory(struct sock *sk, int bytes) pages = sk_mem_pages(bytes); /* pre-charge to memcg */ - charged = mem_cgroup_charge_skmem(sk->sk_memcg, pages, - GFP_KERNEL | __GFP_RETRY_MAYFAIL); + charged = mem_cgroup_sk_charge(sk, pages, + GFP_KERNEL | __GFP_RETRY_MAYFAIL); if (!charged) return -ENOMEM; @@ -1054,7 +1054,7 @@ static int sock_reserve_memory(struct sock *sk, int bytes) */ if (allocated > sk_prot_mem_limits(sk, 1)) { sk_memory_allocated_sub(sk, pages); - mem_cgroup_uncharge_skmem(sk->sk_memcg, pages); + mem_cgroup_sk_uncharge(sk, pages); return -ENOMEM; } sk_forward_alloc_add(sk, pages << PAGE_SHIFT); @@ -2505,15 +2505,18 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) newsk->sk_wmem_queued = 0; newsk->sk_forward_alloc = 0; newsk->sk_reserved_mem = 0; - atomic_set(&newsk->sk_drops, 0); + DEBUG_NET_WARN_ON_ONCE(newsk->sk_drop_counters); + sk_drops_reset(newsk); newsk->sk_send_head = NULL; newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; atomic_set(&newsk->sk_zckey, 0); sock_reset_flag(newsk, SOCK_DONE); +#ifdef CONFIG_MEMCG /* sk->sk_memcg will be populated at accept() time */ newsk->sk_memcg = NULL; +#endif cgroup_sk_clone(&newsk->sk_cgrp_data); @@ -2584,7 +2587,7 @@ free: } EXPORT_SYMBOL_GPL(sk_clone_lock); -static u32 sk_dst_gso_max_size(struct sock *sk, struct dst_entry *dst) +static u32 sk_dst_gso_max_size(struct sock *sk, const struct net_device *dev) { bool is_ipv6 = false; u32 max_size; @@ -2594,8 +2597,8 @@ static u32 sk_dst_gso_max_size(struct sock *sk, struct dst_entry *dst) !ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)); #endif /* pairs with the WRITE_ONCE() in netif_set_gso(_ipv4)_max_size() */ - max_size = is_ipv6 ? READ_ONCE(dst_dev(dst)->gso_max_size) : - READ_ONCE(dst_dev(dst)->gso_ipv4_max_size); + max_size = is_ipv6 ? READ_ONCE(dev->gso_max_size) : + READ_ONCE(dev->gso_ipv4_max_size); if (max_size > GSO_LEGACY_MAX_SIZE && !sk_is_tcp(sk)) max_size = GSO_LEGACY_MAX_SIZE; @@ -2604,9 +2607,12 @@ static u32 sk_dst_gso_max_size(struct sock *sk, struct dst_entry *dst) void sk_setup_caps(struct sock *sk, struct dst_entry *dst) { + const struct net_device *dev; u32 max_segs = 1; - sk->sk_route_caps = dst_dev(dst)->features; + rcu_read_lock(); + dev = dst_dev_rcu(dst); + sk->sk_route_caps = dev->features; if (sk_is_tcp(sk)) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -2622,13 +2628,14 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst) sk->sk_route_caps &= ~NETIF_F_GSO_MASK; } else { sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; - sk->sk_gso_max_size = sk_dst_gso_max_size(sk, dst); + sk->sk_gso_max_size = sk_dst_gso_max_size(sk, dev); /* pairs with the WRITE_ONCE() in netif_set_gso_max_segs() */ - max_segs = max_t(u32, READ_ONCE(dst_dev(dst)->gso_max_segs), 1); + max_segs = max_t(u32, READ_ONCE(dev->gso_max_segs), 1); } } sk->sk_gso_max_segs = max_segs; sk_dst_set(sk, dst); + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(sk_setup_caps); @@ -3241,16 +3248,16 @@ EXPORT_SYMBOL(sk_wait_data); */ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind) { - struct mem_cgroup *memcg = mem_cgroup_sockets_enabled ? sk->sk_memcg : NULL; + bool memcg_enabled = false, charged = false; struct proto *prot = sk->sk_prot; - bool charged = true; long allocated; sk_memory_allocated_add(sk, amt); allocated = sk_memory_allocated(sk); - if (memcg) { - charged = mem_cgroup_charge_skmem(memcg, amt, gfp_memcg_charge()); + if (mem_cgroup_sk_enabled(sk)) { + memcg_enabled = true; + charged = mem_cgroup_sk_charge(sk, amt, gfp_memcg_charge()); if (!charged) goto suppress_allocation; } @@ -3324,21 +3331,19 @@ suppress_allocation: */ if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) { /* Force charge with __GFP_NOFAIL */ - if (memcg && !charged) { - mem_cgroup_charge_skmem(memcg, amt, - gfp_memcg_charge() | __GFP_NOFAIL); - } + if (memcg_enabled && !charged) + mem_cgroup_sk_charge(sk, amt, + gfp_memcg_charge() | __GFP_NOFAIL); return 1; } } - if (kind == SK_MEM_SEND || (kind == SK_MEM_RECV && charged)) - trace_sock_exceed_buf_limit(sk, prot, allocated, kind); + trace_sock_exceed_buf_limit(sk, prot, allocated, kind); sk_memory_allocated_sub(sk, amt); - if (memcg && charged) - mem_cgroup_uncharge_skmem(memcg, amt); + if (charged) + mem_cgroup_sk_uncharge(sk, amt); return 0; } @@ -3376,8 +3381,8 @@ void __sk_mem_reduce_allocated(struct sock *sk, int amount) { sk_memory_allocated_sub(sk, amount); - if (mem_cgroup_sockets_enabled && sk->sk_memcg) - mem_cgroup_uncharge_skmem(sk->sk_memcg, amount); + if (mem_cgroup_sk_enabled(sk)) + mem_cgroup_sk_uncharge(sk, amount); if (sk_under_global_memory_pressure(sk) && (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0))) @@ -3691,7 +3696,7 @@ void sock_init_data_uid(struct socket *sock, struct sock *sk, kuid_t uid) */ smp_wmb(); refcount_set(&sk->sk_refcnt, 1); - atomic_set(&sk->sk_drops, 0); + sk_drops_reset(sk); } EXPORT_SYMBOL(sock_init_data_uid); @@ -3951,7 +3956,7 @@ void sk_get_meminfo(const struct sock *sk, u32 *mem) mem[SK_MEMINFO_WMEM_QUEUED] = READ_ONCE(sk->sk_wmem_queued); mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc); mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len); - mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops); + mem[SK_MEMINFO_DROPS] = sk_drops_read(sk); } #ifdef CONFIG_PROC_FS @@ -4432,7 +4437,10 @@ static int __init sock_struct_check(void) CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rxtx, sk_err); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rxtx, sk_socket); +#ifdef CONFIG_MEMCG CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rxtx, sk_memcg); +#endif + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rxtx, sk_drop_counters); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_lock); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_reserved_mem); diff --git a/net/devlink/health.c b/net/devlink/health.c index b3ce8ecbb7fb..136a67c36a20 100644 --- a/net/devlink/health.c +++ b/net/devlink/health.c @@ -60,6 +60,7 @@ struct devlink_health_reporter { struct devlink_port *devlink_port; struct devlink_fmsg *dump_fmsg; u64 graceful_period; + u64 burst_period; bool auto_recover; bool auto_dump; u8 health_state; @@ -108,11 +109,14 @@ devlink_port_health_reporter_find_by_name(struct devlink_port *devlink_port, static struct devlink_health_reporter * __devlink_health_reporter_create(struct devlink *devlink, const struct devlink_health_reporter_ops *ops, - u64 graceful_period, void *priv) + void *priv) { struct devlink_health_reporter *reporter; - if (WARN_ON(graceful_period && !ops->recover)) + if (WARN_ON(ops->default_graceful_period && !ops->recover)) + return ERR_PTR(-EINVAL); + + if (WARN_ON(ops->default_burst_period && !ops->default_graceful_period)) return ERR_PTR(-EINVAL); reporter = kzalloc(sizeof(*reporter), GFP_KERNEL); @@ -122,7 +126,8 @@ __devlink_health_reporter_create(struct devlink *devlink, reporter->priv = priv; reporter->ops = ops; reporter->devlink = devlink; - reporter->graceful_period = graceful_period; + reporter->graceful_period = ops->default_graceful_period; + reporter->burst_period = ops->default_burst_period; reporter->auto_recover = !!ops->recover; reporter->auto_dump = !!ops->dump; return reporter; @@ -134,13 +139,12 @@ __devlink_health_reporter_create(struct devlink *devlink, * * @port: devlink_port to which health reports will relate * @ops: devlink health reporter ops - * @graceful_period: min time (in msec) between recovery attempts * @priv: driver priv pointer */ struct devlink_health_reporter * devl_port_health_reporter_create(struct devlink_port *port, const struct devlink_health_reporter_ops *ops, - u64 graceful_period, void *priv) + void *priv) { struct devlink_health_reporter *reporter; @@ -150,8 +154,7 @@ devl_port_health_reporter_create(struct devlink_port *port, ops->name)) return ERR_PTR(-EEXIST); - reporter = __devlink_health_reporter_create(port->devlink, ops, - graceful_period, priv); + reporter = __devlink_health_reporter_create(port->devlink, ops, priv); if (IS_ERR(reporter)) return reporter; @@ -164,14 +167,13 @@ EXPORT_SYMBOL_GPL(devl_port_health_reporter_create); struct devlink_health_reporter * devlink_port_health_reporter_create(struct devlink_port *port, const struct devlink_health_reporter_ops *ops, - u64 graceful_period, void *priv) + void *priv) { struct devlink_health_reporter *reporter; struct devlink *devlink = port->devlink; devl_lock(devlink); - reporter = devl_port_health_reporter_create(port, ops, - graceful_period, priv); + reporter = devl_port_health_reporter_create(port, ops, priv); devl_unlock(devlink); return reporter; } @@ -182,13 +184,12 @@ EXPORT_SYMBOL_GPL(devlink_port_health_reporter_create); * * @devlink: devlink instance which the health reports will relate * @ops: devlink health reporter ops - * @graceful_period: min time (in msec) between recovery attempts * @priv: driver priv pointer */ struct devlink_health_reporter * devl_health_reporter_create(struct devlink *devlink, const struct devlink_health_reporter_ops *ops, - u64 graceful_period, void *priv) + void *priv) { struct devlink_health_reporter *reporter; @@ -197,8 +198,7 @@ devl_health_reporter_create(struct devlink *devlink, if (devlink_health_reporter_find_by_name(devlink, ops->name)) return ERR_PTR(-EEXIST); - reporter = __devlink_health_reporter_create(devlink, ops, - graceful_period, priv); + reporter = __devlink_health_reporter_create(devlink, ops, priv); if (IS_ERR(reporter)) return reporter; @@ -210,13 +210,12 @@ EXPORT_SYMBOL_GPL(devl_health_reporter_create); struct devlink_health_reporter * devlink_health_reporter_create(struct devlink *devlink, const struct devlink_health_reporter_ops *ops, - u64 graceful_period, void *priv) + void *priv) { struct devlink_health_reporter *reporter; devl_lock(devlink); - reporter = devl_health_reporter_create(devlink, ops, - graceful_period, priv); + reporter = devl_health_reporter_create(devlink, ops, priv); devl_unlock(devlink); return reporter; } @@ -298,6 +297,10 @@ devlink_nl_health_reporter_fill(struct sk_buff *msg, reporter->graceful_period)) goto reporter_nest_cancel; if (reporter->ops->recover && + devlink_nl_put_u64(msg, DEVLINK_ATTR_HEALTH_REPORTER_BURST_PERIOD, + reporter->burst_period)) + goto reporter_nest_cancel; + if (reporter->ops->recover && nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER, reporter->auto_recover)) goto reporter_nest_cancel; @@ -462,16 +465,33 @@ int devlink_nl_health_reporter_set_doit(struct sk_buff *skb, if (!reporter->ops->recover && (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] || - info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER])) + info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] || + info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_BURST_PERIOD])) return -EOPNOTSUPP; if (!reporter->ops->dump && info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP]) return -EOPNOTSUPP; - if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD]) + if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD]) { reporter->graceful_period = nla_get_u64(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD]); + if (!reporter->graceful_period) + reporter->burst_period = 0; + } + + if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_BURST_PERIOD]) { + u64 burst_period = + nla_get_u64(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_BURST_PERIOD]); + + if (!reporter->graceful_period && burst_period) { + NL_SET_ERR_MSG_MOD(info->extack, + "Cannot set burst period without a grace period."); + return -EINVAL; + } + + reporter->burst_period = burst_period; + } if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]) reporter->auto_recover = @@ -514,11 +534,25 @@ static void devlink_recover_notify(struct devlink_health_reporter *reporter, devlink_nl_notify_send_desc(devlink, msg, &desc); } +static bool +devlink_health_reporter_in_burst(struct devlink_health_reporter *reporter) +{ + unsigned long burst_threshold = reporter->last_recovery_ts + + msecs_to_jiffies(reporter->burst_period); + + return time_is_after_jiffies(burst_threshold); +} + void devlink_health_reporter_recovery_done(struct devlink_health_reporter *reporter) { reporter->recovery_count++; - reporter->last_recovery_ts = jiffies; + if (!devlink_health_reporter_in_burst(reporter)) + /* When burst period is set, last_recovery_ts marks the first + * recovery within the burst period, not necessarily the last + * one. + */ + reporter->last_recovery_ts = jiffies; } EXPORT_SYMBOL_GPL(devlink_health_reporter_recovery_done); @@ -592,12 +626,37 @@ dump_err: return err; } +static bool +devlink_health_recover_abort(struct devlink_health_reporter *reporter, + enum devlink_health_reporter_state prev_state) +{ + unsigned long recover_ts_threshold; + + if (!reporter->auto_recover) + return false; + + /* abort if the previous error wasn't recovered */ + if (prev_state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY) + return true; + + if (devlink_health_reporter_in_burst(reporter)) + return false; + + recover_ts_threshold = reporter->last_recovery_ts + + msecs_to_jiffies(reporter->burst_period) + + msecs_to_jiffies(reporter->graceful_period); + if (reporter->last_recovery_ts && reporter->recovery_count && + time_is_after_jiffies(recover_ts_threshold)) + return true; + + return false; +} + int devlink_health_report(struct devlink_health_reporter *reporter, const char *msg, void *priv_ctx) { enum devlink_health_reporter_state prev_health_state; struct devlink *devlink = reporter->devlink; - unsigned long recover_ts_threshold; int ret; /* write a log message of the current error */ @@ -608,13 +667,7 @@ int devlink_health_report(struct devlink_health_reporter *reporter, reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_ERROR; devlink_recover_notify(reporter, DEVLINK_CMD_HEALTH_REPORTER_RECOVER); - /* abort if the previous error wasn't recovered */ - recover_ts_threshold = reporter->last_recovery_ts + - msecs_to_jiffies(reporter->graceful_period); - if (reporter->auto_recover && - (prev_health_state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY || - (reporter->last_recovery_ts && reporter->recovery_count && - time_is_after_jiffies(recover_ts_threshold)))) { + if (devlink_health_recover_abort(reporter, prev_health_state)) { trace_devlink_health_recover_aborted(devlink, reporter->ops->name, reporter->health_state, diff --git a/net/devlink/netlink_gen.c b/net/devlink/netlink_gen.c index d97c326a9045..9fd00977d59e 100644 --- a/net/devlink/netlink_gen.c +++ b/net/devlink/netlink_gen.c @@ -389,7 +389,7 @@ static const struct nla_policy devlink_health_reporter_get_dump_nl_policy[DEVLIN }; /* DEVLINK_CMD_HEALTH_REPORTER_SET - do */ -static const struct nla_policy devlink_health_reporter_set_nl_policy[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP + 1] = { +static const struct nla_policy devlink_health_reporter_set_nl_policy[DEVLINK_ATTR_HEALTH_REPORTER_BURST_PERIOD + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, @@ -397,6 +397,7 @@ static const struct nla_policy devlink_health_reporter_set_nl_policy[DEVLINK_ATT [DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = { .type = NLA_U64, }, [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8, }, [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP] = { .type = NLA_U8, }, + [DEVLINK_ATTR_HEALTH_REPORTER_BURST_PERIOD] = { .type = NLA_U64, }, }; /* DEVLINK_CMD_HEALTH_REPORTER_RECOVER - do */ @@ -1032,7 +1033,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_health_reporter_set_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_health_reporter_set_nl_policy, - .maxattr = DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP, + .maxattr = DEVLINK_ATTR_HEALTH_REPORTER_BURST_PERIOD, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { diff --git a/net/devlink/port.c b/net/devlink/port.c index cb8d4df61619..93d8a25bb920 100644 --- a/net/devlink/port.c +++ b/net/devlink/port.c @@ -1333,8 +1333,8 @@ int devlink_port_netdevice_event(struct notifier_block *nb, return NOTIFY_OK; } -static int __devlink_port_attrs_set(struct devlink_port *devlink_port, - enum devlink_port_flavour flavour) +static void __devlink_port_attrs_set(struct devlink_port *devlink_port, + enum devlink_port_flavour flavour) { struct devlink_port_attrs *attrs = &devlink_port->attrs; @@ -1347,7 +1347,6 @@ static int __devlink_port_attrs_set(struct devlink_port *devlink_port, } else { devlink_port->switch_port = false; } - return 0; } /** @@ -1357,17 +1356,13 @@ static int __devlink_port_attrs_set(struct devlink_port *devlink_port, * @attrs: devlink port attrs */ void devlink_port_attrs_set(struct devlink_port *devlink_port, - struct devlink_port_attrs *attrs) + const struct devlink_port_attrs *attrs) { - int ret; - ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port); + WARN_ON(attrs->splittable && attrs->split); devlink_port->attrs = *attrs; - ret = __devlink_port_attrs_set(devlink_port, attrs->flavour); - if (ret) - return; - WARN_ON(attrs->splittable && attrs->split); + __devlink_port_attrs_set(devlink_port, attrs->flavour); } EXPORT_SYMBOL_GPL(devlink_port_attrs_set); @@ -1383,14 +1378,10 @@ void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u32 contro u16 pf, bool external) { struct devlink_port_attrs *attrs = &devlink_port->attrs; - int ret; ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port); - ret = __devlink_port_attrs_set(devlink_port, - DEVLINK_PORT_FLAVOUR_PCI_PF); - if (ret) - return; + __devlink_port_attrs_set(devlink_port, DEVLINK_PORT_FLAVOUR_PCI_PF); attrs->pci_pf.controller = controller; attrs->pci_pf.pf = pf; attrs->pci_pf.external = external; @@ -1411,14 +1402,10 @@ void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 contro u16 pf, u16 vf, bool external) { struct devlink_port_attrs *attrs = &devlink_port->attrs; - int ret; ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port); - ret = __devlink_port_attrs_set(devlink_port, - DEVLINK_PORT_FLAVOUR_PCI_VF); - if (ret) - return; + __devlink_port_attrs_set(devlink_port, DEVLINK_PORT_FLAVOUR_PCI_VF); attrs->pci_vf.controller = controller; attrs->pci_vf.pf = pf; attrs->pci_vf.vf = vf; @@ -1439,14 +1426,10 @@ void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 contro u16 pf, u32 sf, bool external) { struct devlink_port_attrs *attrs = &devlink_port->attrs; - int ret; ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port); - ret = __devlink_port_attrs_set(devlink_port, - DEVLINK_PORT_FLAVOUR_PCI_SF); - if (ret) - return; + __devlink_port_attrs_set(devlink_port, DEVLINK_PORT_FLAVOUR_PCI_SF); attrs->pci_sf.controller = controller; attrs->pci_sf.pf = pf; attrs->pci_sf.sf = sf; diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 43a7854e784e..0b2a4d0573b3 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1014,6 +1014,28 @@ static bool flow_type_hashable(u32 flow_type) return false; } +static bool flow_type_v6(u32 flow_type) +{ + switch (flow_type) { + case TCP_V6_FLOW: + case UDP_V6_FLOW: + case SCTP_V6_FLOW: + case AH_ESP_V6_FLOW: + case AH_V6_FLOW: + case ESP_V6_FLOW: + case IPV6_FLOW: + case GTPU_V6_FLOW: + case GTPC_V6_FLOW: + case GTPC_TEID_V6_FLOW: + case GTPU_EH_V6_FLOW: + case GTPU_UL_V6_FLOW: + case GTPU_DL_V6_FLOW: + return true; + } + + return false; +} + /* When adding a new type, update the assert and, if it's hashable, add it to * the flow_type_hashable switch case. */ @@ -1077,6 +1099,9 @@ ethtool_set_rxfh_fields(struct net_device *dev, u32 cmd, void __user *useraddr) if (rc) return rc; + if (info.data & RXH_IP6_FL && !flow_type_v6(info.flow_type)) + return -EINVAL; + if (info.flow_type & FLOW_RSS && info.rss_context && !ops->rxfh_per_ctx_fields) return -EINVAL; diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c index 992e98abe9dd..202d95e8bf3e 100644 --- a/net/ethtool/rss.c +++ b/net/ethtool/rss.c @@ -536,35 +536,36 @@ void ethtool_rss_notify(struct net_device *dev, u32 type, u32 rss_context) #define RFH_MASK (RXH_L2DA | RXH_VLAN | RXH_IP_SRC | RXH_IP_DST | \ RXH_L3_PROTO | RXH_L4_B_0_1 | RXH_L4_B_2_3 | \ RXH_GTP_TEID | RXH_DISCARD) +#define RFH_MASKv6 (RFH_MASK | RXH_IP6_FL) static const struct nla_policy ethnl_rss_flows_policy[] = { [ETHTOOL_A_FLOW_ETHER] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), [ETHTOOL_A_FLOW_IP4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), - [ETHTOOL_A_FLOW_IP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), + [ETHTOOL_A_FLOW_IP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6), [ETHTOOL_A_FLOW_TCP4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), [ETHTOOL_A_FLOW_UDP4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), [ETHTOOL_A_FLOW_SCTP4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), [ETHTOOL_A_FLOW_AH_ESP4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), - [ETHTOOL_A_FLOW_TCP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), - [ETHTOOL_A_FLOW_UDP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), - [ETHTOOL_A_FLOW_SCTP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), - [ETHTOOL_A_FLOW_AH_ESP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), + [ETHTOOL_A_FLOW_TCP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6), + [ETHTOOL_A_FLOW_UDP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6), + [ETHTOOL_A_FLOW_SCTP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6), + [ETHTOOL_A_FLOW_AH_ESP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6), [ETHTOOL_A_FLOW_AH4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), [ETHTOOL_A_FLOW_ESP4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), - [ETHTOOL_A_FLOW_AH6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), - [ETHTOOL_A_FLOW_ESP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), + [ETHTOOL_A_FLOW_AH6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6), + [ETHTOOL_A_FLOW_ESP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6), [ETHTOOL_A_FLOW_GTPU4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), - [ETHTOOL_A_FLOW_GTPU6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), + [ETHTOOL_A_FLOW_GTPU6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6), [ETHTOOL_A_FLOW_GTPC4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), - [ETHTOOL_A_FLOW_GTPC6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), + [ETHTOOL_A_FLOW_GTPC6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6), [ETHTOOL_A_FLOW_GTPC_TEID4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), - [ETHTOOL_A_FLOW_GTPC_TEID6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), + [ETHTOOL_A_FLOW_GTPC_TEID6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6), [ETHTOOL_A_FLOW_GTPU_EH4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), - [ETHTOOL_A_FLOW_GTPU_EH6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), + [ETHTOOL_A_FLOW_GTPU_EH6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6), [ETHTOOL_A_FLOW_GTPU_UL4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), - [ETHTOOL_A_FLOW_GTPU_UL6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), + [ETHTOOL_A_FLOW_GTPU_UL6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6), [ETHTOOL_A_FLOW_GTPU_DL4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), - [ETHTOOL_A_FLOW_GTPU_DL6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), + [ETHTOOL_A_FLOW_GTPU_DL6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6), }; const struct nla_policy ethnl_rss_set_policy[ETHTOOL_A_RSS_FLOW_HASH + 1] = { diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 5cfc1c939673..833f2cf97178 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -170,7 +170,7 @@ struct neigh_table arp_tbl = { [NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ, [NEIGH_VAR_INTERVAL_PROBE_TIME_MS] = 5 * HZ, [NEIGH_VAR_GC_STALETIME] = 60 * HZ, - [NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_MAX, + [NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_DEFAULT, [NEIGH_VAR_PROXY_QLEN] = 64, [NEIGH_VAR_ANYCAST_DELAY] = 1 * HZ, [NEIGH_VAR_PROXY_DELAY] = (8 * HZ) / 10, diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index f14a41ee4aa1..2c922afadb8f 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -132,8 +132,8 @@ static struct sock *esp_find_tcp_sk(struct xfrm_state *x) dport = encap->encap_dport; spin_unlock_bh(&x->lock); - sk = inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, x->id.daddr.a4, - dport, x->props.saddr.a4, sport, 0); + sk = inet_lookup_established(net, x->id.daddr.a4, dport, + x->props.saddr.a4, sport, 0); if (!sk) return ERR_PTR(-ENOENT); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 6e1b94796f67..1dab44e13d3b 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -32,6 +32,7 @@ #include <linux/list.h> #include <linux/slab.h> +#include <net/flow.h> #include <net/inet_dscp.h> #include <net/ip.h> #include <net/protocol.h> @@ -293,7 +294,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) .flowi4_iif = LOOPBACK_IFINDEX, .flowi4_l3mdev = l3mdev_master_ifindex_rcu(dev), .daddr = ip_hdr(skb)->saddr, - .flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(ip_hdr(skb))), + .flowi4_dscp = ip4h_dscp(ip_hdr(skb)), .flowi4_scope = scope, .flowi4_mark = vmark ? skb->mark : 0, }; @@ -358,7 +359,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX; fl4.daddr = src; fl4.saddr = dst; - fl4.flowi4_tos = inet_dscp_to_dsfield(dscp); + fl4.flowi4_dscp = dscp; fl4.flowi4_scope = RT_SCOPE_UNIVERSE; fl4.flowi4_tun_key.tun_id = 0; fl4.flowi4_flags = 0; @@ -1372,7 +1373,7 @@ static void nl_fib_lookup(struct net *net, struct fib_result_nl *frn) struct flowi4 fl4 = { .flowi4_mark = frn->fl_mark, .daddr = frn->fl_addr, - .flowi4_tos = frn->fl_tos & INET_DSCP_MASK, + .flowi4_dscp = inet_dsfield_to_dscp(frn->fl_tos), .flowi4_scope = frn->fl_scope, }; struct fib_table *tb; diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index fa58d6620ed6..51f0193092f0 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -23,6 +23,7 @@ #include <linux/list.h> #include <linux/rcupdate.h> #include <linux/export.h> +#include <net/flow.h> #include <net/inet_dscp.h> #include <net/ip.h> #include <net/route.h> @@ -193,8 +194,7 @@ INDIRECT_CALLABLE_SCOPE int fib4_rule_match(struct fib_rule *rule, * to mask the upper three DSCP bits prior to matching to maintain * legacy behavior. */ - if (r->dscp_full && - (r->dscp ^ inet_dsfield_to_dscp(fl4->flowi4_tos)) & r->dscp_mask) + if (r->dscp_full && (r->dscp ^ fl4->flowi4_dscp) & r->dscp_mask) return 0; else if (!r->dscp_full && r->dscp && !fib_dscp_masked_match(r->dscp, fl4)) diff --git a/net/ipv4/fou_nl.c b/net/ipv4/fou_nl.c index 3d9614609b2d..506260b4a4dc 100644 --- a/net/ipv4/fou_nl.c +++ b/net/ipv4/fou_nl.c @@ -18,9 +18,9 @@ const struct nla_policy fou_nl_policy[FOU_ATTR_IFINDEX + 1] = { [FOU_ATTR_TYPE] = { .type = NLA_U8, }, [FOU_ATTR_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG, }, [FOU_ATTR_LOCAL_V4] = { .type = NLA_U32, }, - [FOU_ATTR_LOCAL_V6] = { .len = 16, }, + [FOU_ATTR_LOCAL_V6] = NLA_POLICY_EXACT_LEN(16), [FOU_ATTR_PEER_V4] = { .type = NLA_U32, }, - [FOU_ATTR_PEER_V6] = { .len = 16, }, + [FOU_ATTR_PEER_V6] = NLA_POLICY_EXACT_LEN(16), [FOU_ATTR_PEER_PORT] = { .type = NLA_BE16, }, [FOU_ATTR_IFINDEX] = { .type = NLA_S32, }, }; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index c48c572f024d..863bf5023f2a 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -72,6 +72,7 @@ #include <linux/string.h> #include <linux/netfilter_ipv4.h> #include <linux/slab.h> +#include <net/flow.h> #include <net/snmp.h> #include <net/ip.h> #include <net/route.h> @@ -318,17 +319,17 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, return true; /* No rate limit on loopback */ - dev = dst_dev(dst); + rcu_read_lock(); + dev = dst_dev_rcu(dst); if (dev && (dev->flags & IFF_LOOPBACK)) goto out; - rcu_read_lock(); peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, l3mdev_master_ifindex_rcu(dev)); rc = inet_peer_xrlim_allow(peer, READ_ONCE(net->ipv4.sysctl_icmp_ratelimit)); - rcu_read_unlock(); out: + rcu_read_unlock(); if (!rc) __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITHOST); else @@ -444,7 +445,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) fl4.saddr = saddr; fl4.flowi4_mark = mark; fl4.flowi4_uid = sock_net_uid(net, NULL); - fl4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(ip_hdr(skb))); + fl4.flowi4_dscp = ip4h_dscp(ip_hdr(skb)); fl4.flowi4_proto = IPPROTO_ICMP; fl4.flowi4_oif = l3mdev_master_ifindex(skb->dev); security_skb_classify_flow(skb, flowi4_to_flowi_common(&fl4)); @@ -495,7 +496,7 @@ static struct rtable *icmp_route_lookup(struct net *net, struct flowi4 *fl4, fl4->saddr = saddr; fl4->flowi4_mark = mark; fl4->flowi4_uid = sock_net_uid(net, NULL); - fl4->flowi4_tos = inet_dscp_to_dsfield(dscp); + fl4->flowi4_dscp = dscp; fl4->flowi4_proto = IPPROTO_ICMP; fl4->fl4_icmp_type = type; fl4->fl4_icmp_code = code; @@ -544,14 +545,15 @@ static struct rtable *icmp_route_lookup(struct net *net, struct flowi4 *fl4, goto relookup_failed; } /* Ugh! */ - orefdst = skb_in->_skb_refdst; /* save old refdst */ - skb_dst_set(skb_in, NULL); + orefdst = skb_dstref_steal(skb_in); err = ip_route_input(skb_in, fl4_dec.daddr, fl4_dec.saddr, dscp, rt2->dst.dev) ? -EINVAL : 0; dst_release(&rt2->dst); rt2 = skb_rtable(skb_in); - skb_in->_skb_refdst = orefdst; /* restore old refdst */ + /* steal dst entry from skb_in, don't drop refcnt */ + skb_dstref_steal(skb_in); + skb_dstref_restore(skb_in, orefdst); } if (err) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 1e2df51427fe..142ff8d86fc2 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -706,9 +706,9 @@ struct sock *inet_csk_accept(struct sock *sk, struct proto_accept_arg *arg) spin_unlock_bh(&queue->fastopenq.lock); } -out: release_sock(sk); - if (newsk && mem_cgroup_sockets_enabled) { + + if (mem_cgroup_sockets_enabled) { gfp_t gfp = GFP_KERNEL | __GFP_NOFAIL; int amt = 0; @@ -718,7 +718,7 @@ out: lock_sock(newsk); mem_cgroup_sk_alloc(newsk); - if (newsk->sk_memcg) { + if (mem_cgroup_from_sk(newsk)) { /* The socket has not been accepted yet, no need * to look at newsk->sk_wmem_queued. */ @@ -727,23 +727,22 @@ out: } if (amt) - mem_cgroup_charge_skmem(newsk->sk_memcg, amt, gfp); + mem_cgroup_sk_charge(newsk, amt, gfp); kmem_cache_charge(newsk, gfp); release_sock(newsk); } + if (req) reqsk_put(req); - if (newsk) - inet_init_csk_locks(newsk); - + inet_init_csk_locks(newsk); return newsk; + out_err: - newsk = NULL; - req = NULL; + release_sock(sk); arg->err = error; - goto out; + return NULL; } EXPORT_SYMBOL(inet_csk_accept); @@ -1297,12 +1296,19 @@ void inet_csk_destroy_sock(struct sock *sk) xfrm_sk_free_policy(sk); - this_cpu_dec(*sk->sk_prot->orphan_count); + tcp_orphan_count_dec(); sock_put(sk); } EXPORT_SYMBOL(inet_csk_destroy_sock); +void inet_csk_prepare_for_destroy_sock(struct sock *sk) +{ + /* The below has to be done to allow calling inet_csk_destroy_sock */ + sock_set_flag(sk, SOCK_DEAD); + tcp_orphan_count_inc(); +} + /* This function allows to force a closure of a socket after the call to * tcp_create_openreq_child(). */ @@ -1370,7 +1376,7 @@ static void inet_child_forget(struct sock *sk, struct request_sock *req, sock_orphan(child); - this_cpu_inc(*sk->sk_prot->orphan_count); + tcp_orphan_count_inc(); if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) { BUG_ON(rcu_access_pointer(tcp_sk(child)->fastopen_rsk) != req); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 2fa53b16fe77..f0b6c5a411a2 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -20,9 +20,6 @@ #include <net/ipv6.h> #include <net/inet_common.h> #include <net/inet_connection_sock.h> -#include <net/inet_hashtables.h> -#include <net/inet_timewait_sock.h> -#include <net/inet6_hashtables.h> #include <net/bpf_sk_storage.h> #include <net/netlink.h> @@ -74,54 +71,29 @@ static void inet_diag_unlock_handler(const struct inet_diag_handler *handler) void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk) { - r->idiag_family = sk->sk_family; + r->idiag_family = READ_ONCE(sk->sk_family); - r->id.idiag_sport = htons(sk->sk_num); - r->id.idiag_dport = sk->sk_dport; - r->id.idiag_if = sk->sk_bound_dev_if; + r->id.idiag_sport = htons(READ_ONCE(sk->sk_num)); + r->id.idiag_dport = READ_ONCE(sk->sk_dport); + r->id.idiag_if = READ_ONCE(sk->sk_bound_dev_if); sock_diag_save_cookie(sk, r->id.idiag_cookie); #if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == AF_INET6) { - *(struct in6_addr *)r->id.idiag_src = sk->sk_v6_rcv_saddr; - *(struct in6_addr *)r->id.idiag_dst = sk->sk_v6_daddr; + if (r->idiag_family == AF_INET6) { + data_race(*(struct in6_addr *)r->id.idiag_src = sk->sk_v6_rcv_saddr); + data_race(*(struct in6_addr *)r->id.idiag_dst = sk->sk_v6_daddr); } else #endif { memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); - r->id.idiag_src[0] = sk->sk_rcv_saddr; - r->id.idiag_dst[0] = sk->sk_daddr; + r->id.idiag_src[0] = READ_ONCE(sk->sk_rcv_saddr); + r->id.idiag_dst[0] = READ_ONCE(sk->sk_daddr); } } EXPORT_SYMBOL_GPL(inet_diag_msg_common_fill); -static size_t inet_sk_attr_size(struct sock *sk, - const struct inet_diag_req_v2 *req, - bool net_admin) -{ - const struct inet_diag_handler *handler; - size_t aux = 0; - - rcu_read_lock(); - handler = rcu_dereference(inet_diag_table[req->sdiag_protocol]); - DEBUG_NET_WARN_ON_ONCE(!handler); - if (handler && handler->idiag_get_aux_size) - aux = handler->idiag_get_aux_size(sk, net_admin); - rcu_read_unlock(); - - return nla_total_size(sizeof(struct tcp_info)) - + nla_total_size(sizeof(struct inet_diag_msg)) - + inet_diag_msg_attrs_size() - + nla_total_size(sizeof(struct inet_diag_meminfo)) - + nla_total_size(SK_MEMINFO_VARS * sizeof(u32)) - + nla_total_size(TCP_CA_NAME_MAX) - + nla_total_size(sizeof(struct tcpvegas_info)) - + aux - + 64; -} - int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, struct inet_diag_msg *r, int ext, struct user_namespace *user_ns, @@ -313,17 +285,17 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, icsk_pending == ICSK_TIME_REO_TIMEOUT || icsk_pending == ICSK_TIME_LOSS_PROBE) { r->idiag_timer = 1; - r->idiag_retrans = icsk->icsk_retransmits; + r->idiag_retrans = READ_ONCE(icsk->icsk_retransmits); r->idiag_expires = jiffies_delta_to_msecs(icsk_timeout(icsk) - jiffies); } else if (icsk_pending == ICSK_TIME_PROBE0) { r->idiag_timer = 4; - r->idiag_retrans = icsk->icsk_probes_out; + r->idiag_retrans = READ_ONCE(icsk->icsk_probes_out); r->idiag_expires = jiffies_delta_to_msecs(icsk_timeout(icsk) - jiffies); } else if (timer_pending(&sk->sk_timer)) { r->idiag_timer = 2; - r->idiag_retrans = icsk->icsk_probes_out; + r->idiag_retrans = READ_ONCE(icsk->icsk_probes_out); r->idiag_expires = jiffies_delta_to_msecs(sk->sk_timer.expires - jiffies); } @@ -422,183 +394,6 @@ errout: } EXPORT_SYMBOL_GPL(inet_sk_diag_fill); -static int inet_twsk_diag_fill(struct sock *sk, - struct sk_buff *skb, - struct netlink_callback *cb, - u16 nlmsg_flags, bool net_admin) -{ - struct inet_timewait_sock *tw = inet_twsk(sk); - struct inet_diag_msg *r; - struct nlmsghdr *nlh; - long tmo; - - nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, cb->nlh->nlmsg_type, - sizeof(*r), nlmsg_flags); - if (!nlh) - return -EMSGSIZE; - - r = nlmsg_data(nlh); - BUG_ON(tw->tw_state != TCP_TIME_WAIT); - - inet_diag_msg_common_fill(r, sk); - r->idiag_retrans = 0; - - r->idiag_state = READ_ONCE(tw->tw_substate); - r->idiag_timer = 3; - tmo = tw->tw_timer.expires - jiffies; - r->idiag_expires = jiffies_delta_to_msecs(tmo); - r->idiag_rqueue = 0; - r->idiag_wqueue = 0; - r->idiag_uid = 0; - r->idiag_inode = 0; - - if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, - tw->tw_mark)) { - nlmsg_cancel(skb, nlh); - return -EMSGSIZE; - } - - nlmsg_end(skb, nlh); - return 0; -} - -static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb, - struct netlink_callback *cb, - u16 nlmsg_flags, bool net_admin) -{ - struct request_sock *reqsk = inet_reqsk(sk); - struct inet_diag_msg *r; - struct nlmsghdr *nlh; - long tmo; - - nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - cb->nlh->nlmsg_type, sizeof(*r), nlmsg_flags); - if (!nlh) - return -EMSGSIZE; - - r = nlmsg_data(nlh); - inet_diag_msg_common_fill(r, sk); - r->idiag_state = TCP_SYN_RECV; - r->idiag_timer = 1; - r->idiag_retrans = reqsk->num_retrans; - - BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) != - offsetof(struct sock, sk_cookie)); - - tmo = inet_reqsk(sk)->rsk_timer.expires - jiffies; - r->idiag_expires = jiffies_delta_to_msecs(tmo); - r->idiag_rqueue = 0; - r->idiag_wqueue = 0; - r->idiag_uid = 0; - r->idiag_inode = 0; - - if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, - inet_rsk(reqsk)->ir_mark)) { - nlmsg_cancel(skb, nlh); - return -EMSGSIZE; - } - - nlmsg_end(skb, nlh); - return 0; -} - -static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, - struct netlink_callback *cb, - const struct inet_diag_req_v2 *r, - u16 nlmsg_flags, bool net_admin) -{ - if (sk->sk_state == TCP_TIME_WAIT) - return inet_twsk_diag_fill(sk, skb, cb, nlmsg_flags, net_admin); - - if (sk->sk_state == TCP_NEW_SYN_RECV) - return inet_req_diag_fill(sk, skb, cb, nlmsg_flags, net_admin); - - return inet_sk_diag_fill(sk, inet_csk(sk), skb, cb, r, nlmsg_flags, - net_admin); -} - -struct sock *inet_diag_find_one_icsk(struct net *net, - struct inet_hashinfo *hashinfo, - const struct inet_diag_req_v2 *req) -{ - struct sock *sk; - - rcu_read_lock(); - if (req->sdiag_family == AF_INET) - sk = inet_lookup(net, hashinfo, NULL, 0, req->id.idiag_dst[0], - req->id.idiag_dport, req->id.idiag_src[0], - req->id.idiag_sport, req->id.idiag_if); -#if IS_ENABLED(CONFIG_IPV6) - else if (req->sdiag_family == AF_INET6) { - if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) && - ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src)) - sk = inet_lookup(net, hashinfo, NULL, 0, req->id.idiag_dst[3], - req->id.idiag_dport, req->id.idiag_src[3], - req->id.idiag_sport, req->id.idiag_if); - else - sk = inet6_lookup(net, hashinfo, NULL, 0, - (struct in6_addr *)req->id.idiag_dst, - req->id.idiag_dport, - (struct in6_addr *)req->id.idiag_src, - req->id.idiag_sport, - req->id.idiag_if); - } -#endif - else { - rcu_read_unlock(); - return ERR_PTR(-EINVAL); - } - rcu_read_unlock(); - if (!sk) - return ERR_PTR(-ENOENT); - - if (sock_diag_check_cookie(sk, req->id.idiag_cookie)) { - sock_gen_put(sk); - return ERR_PTR(-ENOENT); - } - - return sk; -} -EXPORT_SYMBOL_GPL(inet_diag_find_one_icsk); - -int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, - struct netlink_callback *cb, - const struct inet_diag_req_v2 *req) -{ - struct sk_buff *in_skb = cb->skb; - bool net_admin = netlink_net_capable(in_skb, CAP_NET_ADMIN); - struct net *net = sock_net(in_skb->sk); - struct sk_buff *rep; - struct sock *sk; - int err; - - sk = inet_diag_find_one_icsk(net, hashinfo, req); - if (IS_ERR(sk)) - return PTR_ERR(sk); - - rep = nlmsg_new(inet_sk_attr_size(sk, req, net_admin), GFP_KERNEL); - if (!rep) { - err = -ENOMEM; - goto out; - } - - err = sk_diag_fill(sk, rep, cb, req, 0, net_admin); - if (err < 0) { - WARN_ON(err == -EMSGSIZE); - nlmsg_free(rep); - goto out; - } - err = nlmsg_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid); - -out: - if (sk) - sock_gen_put(sk); - - return err; -} -EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk); - static int inet_diag_cmd_exact(int cmd, struct sk_buff *in_skb, const struct nlmsghdr *nlh, int hdrlen, @@ -785,7 +580,7 @@ static void entry_fill_addrs(struct inet_diag_entry *entry, const struct sock *sk) { #if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == AF_INET6) { + if (entry->family == AF_INET6) { entry->saddr = sk->sk_v6_rcv_saddr.s6_addr32; entry->daddr = sk->sk_v6_daddr.s6_addr32; } else @@ -796,31 +591,36 @@ static void entry_fill_addrs(struct inet_diag_entry *entry, } } -int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk) +int inet_diag_bc_sk(const struct inet_diag_dump_data *cb_data, struct sock *sk) { - struct inet_sock *inet = inet_sk(sk); + const struct nlattr *bc = cb_data->inet_diag_nla_bc; + const struct inet_sock *inet = inet_sk(sk); struct inet_diag_entry entry; if (!bc) return 1; - entry.family = sk->sk_family; + entry.family = READ_ONCE(sk->sk_family); entry_fill_addrs(&entry, sk); - entry.sport = inet->inet_num; - entry.dport = ntohs(inet->inet_dport); - entry.ifindex = sk->sk_bound_dev_if; - entry.userlocks = sk_fullsock(sk) ? sk->sk_userlocks : 0; - if (sk_fullsock(sk)) - entry.mark = READ_ONCE(sk->sk_mark); - else if (sk->sk_state == TCP_NEW_SYN_RECV) - entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark; - else if (sk->sk_state == TCP_TIME_WAIT) - entry.mark = inet_twsk(sk)->tw_mark; - else - entry.mark = 0; + entry.sport = READ_ONCE(inet->inet_num); + entry.dport = ntohs(READ_ONCE(inet->inet_dport)); + entry.ifindex = READ_ONCE(sk->sk_bound_dev_if); + if (cb_data->userlocks_needed) + entry.userlocks = sk_fullsock(sk) ? READ_ONCE(sk->sk_userlocks) : 0; + if (cb_data->mark_needed) { + if (sk_fullsock(sk)) + entry.mark = READ_ONCE(sk->sk_mark); + else if (sk->sk_state == TCP_NEW_SYN_RECV) + entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark; + else if (sk->sk_state == TCP_TIME_WAIT) + entry.mark = inet_twsk(sk)->tw_mark; + else + entry.mark = 0; + } #ifdef CONFIG_SOCK_CGROUP_DATA - entry.cgroup_id = sk_fullsock(sk) ? - cgroup_id(sock_cgroup_ptr(&sk->sk_cgrp_data)) : 0; + if (cb_data->cgroup_needed) + entry.cgroup_id = sk_fullsock(sk) ? + cgroup_id(sock_cgroup_ptr(&sk->sk_cgrp_data)) : 0; #endif return inet_diag_bc_run(bc, &entry); @@ -920,16 +720,21 @@ static bool valid_cgroupcond(const struct inet_diag_bc_op *op, int len, } #endif -static int inet_diag_bc_audit(const struct nlattr *attr, +static int inet_diag_bc_audit(struct inet_diag_dump_data *cb_data, const struct sk_buff *skb) { - bool net_admin = netlink_net_capable(skb, CAP_NET_ADMIN); + const struct nlattr *attr = cb_data->inet_diag_nla_bc; const void *bytecode, *bc; int bytecode_len, len; + bool net_admin; + + if (!attr) + return 0; - if (!attr || nla_len(attr) < sizeof(struct inet_diag_bc_op)) + if (nla_len(attr) < sizeof(struct inet_diag_bc_op)) return -EINVAL; + net_admin = netlink_net_capable(skb, CAP_NET_ADMIN); bytecode = bc = nla_data(attr); len = bytecode_len = nla_len(attr); @@ -961,14 +766,18 @@ static int inet_diag_bc_audit(const struct nlattr *attr, return -EPERM; if (!valid_markcond(bc, len, &min_len)) return -EINVAL; + cb_data->mark_needed = true; break; #ifdef CONFIG_SOCK_CGROUP_DATA case INET_DIAG_BC_CGROUP_COND: if (!valid_cgroupcond(bc, len, &min_len)) return -EINVAL; + cb_data->cgroup_needed = true; break; #endif case INET_DIAG_BC_AUTO: + cb_data->userlocks_needed = true; + fallthrough; case INET_DIAG_BC_JMP: case INET_DIAG_BC_NOP: break; @@ -992,280 +801,6 @@ static int inet_diag_bc_audit(const struct nlattr *attr, return len == 0 ? 0 : -EINVAL; } -static void twsk_build_assert(void) -{ - BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_family) != - offsetof(struct sock, sk_family)); - - BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_num) != - offsetof(struct inet_sock, inet_num)); - - BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_dport) != - offsetof(struct inet_sock, inet_dport)); - - BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_rcv_saddr) != - offsetof(struct inet_sock, inet_rcv_saddr)); - - BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_daddr) != - offsetof(struct inet_sock, inet_daddr)); - -#if IS_ENABLED(CONFIG_IPV6) - BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_v6_rcv_saddr) != - offsetof(struct sock, sk_v6_rcv_saddr)); - - BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_v6_daddr) != - offsetof(struct sock, sk_v6_daddr)); -#endif -} - -void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, - struct netlink_callback *cb, - const struct inet_diag_req_v2 *r) -{ - bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); - struct inet_diag_dump_data *cb_data = cb->data; - struct net *net = sock_net(skb->sk); - u32 idiag_states = r->idiag_states; - int i, num, s_i, s_num; - struct nlattr *bc; - struct sock *sk; - - bc = cb_data->inet_diag_nla_bc; - if (idiag_states & TCPF_SYN_RECV) - idiag_states |= TCPF_NEW_SYN_RECV; - s_i = cb->args[1]; - s_num = num = cb->args[2]; - - if (cb->args[0] == 0) { - if (!(idiag_states & TCPF_LISTEN) || r->id.idiag_dport) - goto skip_listen_ht; - - for (i = s_i; i <= hashinfo->lhash2_mask; i++) { - struct inet_listen_hashbucket *ilb; - struct hlist_nulls_node *node; - - num = 0; - ilb = &hashinfo->lhash2[i]; - - if (hlist_nulls_empty(&ilb->nulls_head)) { - s_num = 0; - continue; - } - spin_lock(&ilb->lock); - sk_nulls_for_each(sk, node, &ilb->nulls_head) { - struct inet_sock *inet = inet_sk(sk); - - if (!net_eq(sock_net(sk), net)) - continue; - - if (num < s_num) { - num++; - continue; - } - - if (r->sdiag_family != AF_UNSPEC && - sk->sk_family != r->sdiag_family) - goto next_listen; - - if (r->id.idiag_sport != inet->inet_sport && - r->id.idiag_sport) - goto next_listen; - - if (!inet_diag_bc_sk(bc, sk)) - goto next_listen; - - if (inet_sk_diag_fill(sk, inet_csk(sk), skb, - cb, r, NLM_F_MULTI, - net_admin) < 0) { - spin_unlock(&ilb->lock); - goto done; - } - -next_listen: - ++num; - } - spin_unlock(&ilb->lock); - - s_num = 0; - } -skip_listen_ht: - cb->args[0] = 1; - s_i = num = s_num = 0; - } - -/* Process a maximum of SKARR_SZ sockets at a time when walking hash buckets - * with bh disabled. - */ -#define SKARR_SZ 16 - - /* Dump bound but inactive (not listening, connecting, etc.) sockets */ - if (cb->args[0] == 1) { - if (!(idiag_states & TCPF_BOUND_INACTIVE)) - goto skip_bind_ht; - - for (i = s_i; i < hashinfo->bhash_size; i++) { - struct inet_bind_hashbucket *ibb; - struct inet_bind2_bucket *tb2; - struct sock *sk_arr[SKARR_SZ]; - int num_arr[SKARR_SZ]; - int idx, accum, res; - -resume_bind_walk: - num = 0; - accum = 0; - ibb = &hashinfo->bhash2[i]; - - if (hlist_empty(&ibb->chain)) { - s_num = 0; - continue; - } - spin_lock_bh(&ibb->lock); - inet_bind_bucket_for_each(tb2, &ibb->chain) { - if (!net_eq(ib2_net(tb2), net)) - continue; - - sk_for_each_bound(sk, &tb2->owners) { - struct inet_sock *inet = inet_sk(sk); - - if (num < s_num) - goto next_bind; - - if (sk->sk_state != TCP_CLOSE || - !inet->inet_num) - goto next_bind; - - if (r->sdiag_family != AF_UNSPEC && - r->sdiag_family != sk->sk_family) - goto next_bind; - - if (!inet_diag_bc_sk(bc, sk)) - goto next_bind; - - sock_hold(sk); - num_arr[accum] = num; - sk_arr[accum] = sk; - if (++accum == SKARR_SZ) - goto pause_bind_walk; -next_bind: - num++; - } - } -pause_bind_walk: - spin_unlock_bh(&ibb->lock); - - res = 0; - for (idx = 0; idx < accum; idx++) { - if (res >= 0) { - res = inet_sk_diag_fill(sk_arr[idx], - NULL, skb, cb, - r, NLM_F_MULTI, - net_admin); - if (res < 0) - num = num_arr[idx]; - } - sock_put(sk_arr[idx]); - } - if (res < 0) - goto done; - - cond_resched(); - - if (accum == SKARR_SZ) { - s_num = num + 1; - goto resume_bind_walk; - } - - s_num = 0; - } -skip_bind_ht: - cb->args[0] = 2; - s_i = num = s_num = 0; - } - - if (!(idiag_states & ~TCPF_LISTEN)) - goto out; - - for (i = s_i; i <= hashinfo->ehash_mask; i++) { - struct inet_ehash_bucket *head = &hashinfo->ehash[i]; - spinlock_t *lock = inet_ehash_lockp(hashinfo, i); - struct hlist_nulls_node *node; - struct sock *sk_arr[SKARR_SZ]; - int num_arr[SKARR_SZ]; - int idx, accum, res; - - if (hlist_nulls_empty(&head->chain)) - continue; - - if (i > s_i) - s_num = 0; - -next_chunk: - num = 0; - accum = 0; - spin_lock_bh(lock); - sk_nulls_for_each(sk, node, &head->chain) { - int state; - - if (!net_eq(sock_net(sk), net)) - continue; - if (num < s_num) - goto next_normal; - state = (sk->sk_state == TCP_TIME_WAIT) ? - READ_ONCE(inet_twsk(sk)->tw_substate) : sk->sk_state; - if (!(idiag_states & (1 << state))) - goto next_normal; - if (r->sdiag_family != AF_UNSPEC && - sk->sk_family != r->sdiag_family) - goto next_normal; - if (r->id.idiag_sport != htons(sk->sk_num) && - r->id.idiag_sport) - goto next_normal; - if (r->id.idiag_dport != sk->sk_dport && - r->id.idiag_dport) - goto next_normal; - twsk_build_assert(); - - if (!inet_diag_bc_sk(bc, sk)) - goto next_normal; - - if (!refcount_inc_not_zero(&sk->sk_refcnt)) - goto next_normal; - - num_arr[accum] = num; - sk_arr[accum] = sk; - if (++accum == SKARR_SZ) - break; -next_normal: - ++num; - } - spin_unlock_bh(lock); - res = 0; - for (idx = 0; idx < accum; idx++) { - if (res >= 0) { - res = sk_diag_fill(sk_arr[idx], skb, cb, r, - NLM_F_MULTI, net_admin); - if (res < 0) - num = num_arr[idx]; - } - sock_gen_put(sk_arr[idx]); - } - if (res < 0) - break; - cond_resched(); - if (accum == SKARR_SZ) { - s_num = num + 1; - goto next_chunk; - } - } - -done: - cb->args[1] = i; - cb->args[2] = num; -out: - ; -} -EXPORT_SYMBOL_GPL(inet_diag_dump_icsk); - static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r) { @@ -1319,13 +854,10 @@ static int __inet_diag_dump_start(struct netlink_callback *cb, int hdrlen) kfree(cb_data); return err; } - nla = cb_data->inet_diag_nla_bc; - if (nla) { - err = inet_diag_bc_audit(nla, skb); - if (err) { - kfree(cb_data); - return err; - } + err = inet_diag_bc_audit(cb_data, skb); + if (err) { + kfree(cb_data); + return err; } nla = cb_data->inet_diag_nla_bpf_stgs; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index ceeeec9b7290..ef4ccfd46ff6 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -425,19 +425,18 @@ struct sock *inet_lookup_run_sk_lookup(const struct net *net, } struct sock *__inet_lookup_listener(const struct net *net, - struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const __be32 saddr, __be16 sport, const __be32 daddr, const unsigned short hnum, const int dif, const int sdif) { struct inet_listen_hashbucket *ilb2; + struct inet_hashinfo *hashinfo; struct sock *result = NULL; unsigned int hash2; /* Lookup redirect from BPF */ - if (static_branch_unlikely(&bpf_sk_lookup_enabled) && - hashinfo == net->ipv4.tcp_death_row.hashinfo) { + if (static_branch_unlikely(&bpf_sk_lookup_enabled)) { result = inet_lookup_run_sk_lookup(net, IPPROTO_TCP, skb, doff, saddr, sport, daddr, hnum, dif, inet_ehashfn); @@ -445,6 +444,7 @@ struct sock *__inet_lookup_listener(const struct net *net, goto done; } + hashinfo = net->ipv4.tcp_death_row.hashinfo; hash2 = ipv4_portaddr_hash(net, daddr, hnum); ilb2 = inet_lhash2_bucket(hashinfo, hash2); @@ -490,21 +490,22 @@ void sock_edemux(struct sk_buff *skb) EXPORT_SYMBOL(sock_edemux); struct sock *__inet_lookup_established(const struct net *net, - struct inet_hashinfo *hashinfo, - const __be32 saddr, const __be16 sport, - const __be32 daddr, const u16 hnum, - const int dif, const int sdif) + const __be32 saddr, const __be16 sport, + const __be32 daddr, const u16 hnum, + const int dif, const int sdif) { - INET_ADDR_COOKIE(acookie, saddr, daddr); const __portpair ports = INET_COMBINED_PORTS(sport, hnum); - struct sock *sk; + INET_ADDR_COOKIE(acookie, saddr, daddr); const struct hlist_nulls_node *node; - /* Optimize here for direct hit, only listening connections can - * have wildcards anyways. - */ - unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport); - unsigned int slot = hash & hashinfo->ehash_mask; - struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; + struct inet_ehash_bucket *head; + struct inet_hashinfo *hashinfo; + unsigned int hash, slot; + struct sock *sk; + + hashinfo = net->ipv4.tcp_death_row.hashinfo; + hash = inet_ehashfn(net, daddr, hnum, saddr, sport); + slot = hash & hashinfo->ehash_mask; + head = &hashinfo->ehash[slot]; begin: sk_nulls_for_each_rcu(sk, node, &head->chain) { @@ -579,8 +580,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, if (likely(inet_match(net, sk2, acookie, ports, dif, sdif))) { if (sk2->sk_state == TCP_TIME_WAIT) { tw = inet_twsk(sk2); - if (sk->sk_protocol == IPPROTO_TCP && - tcp_twsk_unique(sk, sk2, twp)) + if (tcp_twsk_unique(sk, sk2, twp)) break; } goto not_unique; @@ -707,7 +707,7 @@ bool inet_ehash_nolisten(struct sock *sk, struct sock *osk, bool *found_dup_sk) if (ok) { sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); } else { - this_cpu_inc(*sk->sk_prot->orphan_count); + tcp_orphan_count_inc(); inet_sk_set_state(sk, TCP_CLOSE); sock_set_flag(sk, SOCK_DEAD); inet_csk_destroy_sock(sk); diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 875ff923a8ed..5b5426b8ee92 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -15,7 +15,7 @@ #include <net/inet_hashtables.h> #include <net/inet_timewait_sock.h> #include <net/ip.h> - +#include <net/tcp.h> /** * inet_twsk_bind_unhash - unhash a timewait socket from bind hash @@ -74,7 +74,8 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw) void inet_twsk_free(struct inet_timewait_sock *tw) { struct module *owner = tw->tw_prot->owner; - twsk_destructor((struct sock *)tw); + + tcp_twsk_destructor((struct sock *)tw); kmem_cache_free(tw->tw_prot->twsk_prot->twsk_slab, tw); module_put(owner); } diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index b2584cce90ae..f7012479713b 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -476,14 +476,16 @@ out_fail: /* Process an incoming IP datagram fragment. */ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user) { - struct net_device *dev = skb->dev ? : skb_dst_dev(skb); - int vif = l3mdev_master_ifindex_rcu(dev); + struct net_device *dev; struct ipq *qp; + int vif; __IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS); /* Lookup (or create) queue header */ rcu_read_lock(); + dev = skb->dev ? : skb_dst_dev_rcu(skb); + vif = l3mdev_master_ifindex_rcu(dev); qp = ip_find(net, ip_hdr(skb), user, vif); if (qp) { int ret, refs = 0; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index f5b9004d6938..761a53c6a89a 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -28,6 +28,7 @@ #include <linux/etherdevice.h> #include <linux/if_ether.h> +#include <net/flow.h> #include <net/sock.h> #include <net/ip.h> #include <net/icmp.h> @@ -44,7 +45,6 @@ #include <net/gre.h> #include <net/dst_metadata.h> #include <net/erspan.h> -#include <net/inet_dscp.h> /* Problems & solutions @@ -930,7 +930,7 @@ static int ipgre_open(struct net_device *dev) if (ipv4_is_multicast(t->parms.iph.daddr)) { struct flowi4 fl4 = { .flowi4_oif = t->parms.link, - .flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(&t->parms.iph)), + .flowi4_dscp = ip4h_dscp(&t->parms.iph), .flowi4_scope = RT_SCOPE_UNIVERSE, .flowi4_proto = IPPROTO_GRE, .saddr = t->parms.iph.saddr, diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index fc323994b1fa..a09aca2c8567 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -587,9 +587,13 @@ static void ip_sublist_rcv_finish(struct list_head *head) } static struct sk_buff *ip_extract_route_hint(const struct net *net, - struct sk_buff *skb, int rt_type) + struct sk_buff *skb) { - if (fib4_has_custom_rules(net) || rt_type == RTN_BROADCAST || + const struct iphdr *iph = ip_hdr(skb); + + if (fib4_has_custom_rules(net) || + ipv4_is_lbcast(iph->daddr) || + ipv4_is_zeronet(iph->daddr) || IPCB(skb)->flags & IPSKB_MULTIPATH) return NULL; @@ -618,8 +622,7 @@ static void ip_list_rcv_finish(struct net *net, struct list_head *head) dst = skb_dst(skb); if (curr_dst != dst) { - hint = ip_extract_route_hint(net, skb, - dst_rtable(dst)->rt_type); + hint = ip_extract_route_hint(net, skb); /* dispatch old sublist */ if (!list_empty(&sublist)) diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index e3321932bec0..be8815ce3ac2 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -615,14 +615,13 @@ int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev) } memcpy(&nexthop, &optptr[srrptr-1], 4); - orefdst = skb->_skb_refdst; - skb_dst_set(skb, NULL); + orefdst = skb_dstref_steal(skb); err = ip_route_input(skb, nexthop, iph->saddr, ip4h_dscp(iph), dev) ? -EINVAL : 0; rt2 = skb_rtable(skb); if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) { skb_dst_drop(skb); - skb->_skb_refdst = orefdst; + skb_dstref_restore(skb, orefdst); return -EINVAL; } refdst_drop(orefdst); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 84e7f8a2f50f..2b96651d719b 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -63,6 +63,7 @@ #include <linux/stat.h> #include <linux/init.h> +#include <net/flow.h> #include <net/snmp.h> #include <net/ip.h> #include <net/protocol.h> @@ -485,7 +486,7 @@ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, inet_sk_init_flowi4(inet, fl4); /* sctp_v4_xmit() uses its own DSCP value */ - fl4->flowi4_tos = tos & INET_DSCP_MASK; + fl4->flowi4_dscp = inet_dsfield_to_dscp(tos); /* If this fails, retransmit mechanism of transport layer will * keep trying until route appears or the connection times diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index e86a8a862c41..ca9eaee4c2ef 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -42,6 +42,7 @@ #include <linux/init.h> #include <linux/if_ether.h> #include <linux/slab.h> +#include <net/flow.h> #include <net/net_namespace.h> #include <net/ip.h> #include <net/protocol.h> @@ -1904,7 +1905,7 @@ static int ipmr_prepare_xmit(struct net *net, struct mr_table *mrt, return -1; } - encap += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len; + encap += LL_RESERVED_SPACE(dst_dev_rcu(&rt->dst)) + rt->dst.header_len; if (skb_cow(skb, encap)) { ip_rt_put(rt); @@ -1957,7 +1958,7 @@ static void ipmr_queue_fwd_xmit(struct net *net, struct mr_table *mrt, * result in receiving multiple packets. */ NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, - net, NULL, skb, skb->dev, rt->dst.dev, + net, NULL, skb, skb->dev, dst_dev_rcu(&rt->dst), ipmr_forward_finish); return; @@ -2120,7 +2121,7 @@ static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb) struct flowi4 fl4 = { .daddr = iph->daddr, .saddr = iph->saddr, - .flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph)), + .flowi4_dscp = ip4h_dscp(iph), .flowi4_oif = (rt_is_output_route(rt) ? skb->dev->ifindex : 0), .flowi4_iif = (rt_is_output_route(rt) ? @@ -2301,7 +2302,7 @@ int ip_mr_output(struct net *net, struct sock *sk, struct sk_buff *skb) guard(rcu)(); - dev = rt->dst.dev; + dev = dst_dev_rcu(&rt->dst); if (IPCB(skb)->flags & IPSKB_FORWARDED) goto mc_output; diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 0565f001120d..ce310eb779e0 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -11,10 +11,10 @@ #include <linux/skbuff.h> #include <linux/gfp.h> #include <linux/export.h> +#include <net/flow.h> #include <net/route.h> #include <net/xfrm.h> #include <net/ip.h> -#include <net/inet_dscp.h> #include <net/netfilter/nf_queue.h> /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ @@ -44,7 +44,7 @@ int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, un */ fl4.daddr = iph->daddr; fl4.saddr = saddr; - fl4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph)); + fl4.flowi4_dscp = ip4h_dscp(iph); fl4.flowi4_oif = sk ? sk->sk_bound_dev_if : 0; fl4.flowi4_l3mdev = l3mdev_master_ifindex(dev); fl4.flowi4_mark = skb->mark; @@ -65,7 +65,10 @@ int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, un if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && xfrm_decode_session(net, skb, flowi4_to_flowi(&fl4), AF_INET) == 0) { struct dst_entry *dst = skb_dst(skb); - skb_dst_set(skb, NULL); + /* ignore return value from skb_dstref_steal, xfrm_lookup takes + * care of dropping the refcnt if needed. + */ + skb_dstref_steal(skb); dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), sk, 0); if (IS_ERR(dst)) return PTR_ERR(dst); diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index a27782d7653e..6d9bf5106868 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -8,8 +8,8 @@ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/netdevice.h> -#include <net/inet_dscp.h> #include <linux/ip.h> +#include <net/flow.h> #include <net/ip.h> #include <net/ip_fib.h> #include <net/route.h> @@ -76,7 +76,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) flow.daddr = iph->saddr; flow.saddr = rpfilter_get_saddr(iph->daddr); flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0; - flow.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph)); + flow.flowi4_dscp = ip4h_dscp(iph); flow.flowi4_scope = RT_SCOPE_UNIVERSE; flow.flowi4_l3mdev = l3mdev_master_ifindex_rcu(xt_in(par)); flow.flowi4_uid = sock_net_uid(xt_net(par), NULL); diff --git a/net/ipv4/netfilter/nf_dup_ipv4.c b/net/ipv4/netfilter/nf_dup_ipv4.c index ed08fb78cfa8..9a773502f10a 100644 --- a/net/ipv4/netfilter/nf_dup_ipv4.c +++ b/net/ipv4/netfilter/nf_dup_ipv4.c @@ -12,10 +12,10 @@ #include <linux/skbuff.h> #include <linux/netfilter.h> #include <net/checksum.h> +#include <net/flow.h> #include <net/icmp.h> #include <net/ip.h> #include <net/route.h> -#include <net/inet_dscp.h> #include <net/netfilter/ipv4/nf_dup_ipv4.h> #if IS_ENABLED(CONFIG_NF_CONNTRACK) #include <net/netfilter/nf_conntrack.h> @@ -33,7 +33,7 @@ static bool nf_dup_ipv4_route(struct net *net, struct sk_buff *skb, fl4.flowi4_oif = oif; fl4.daddr = gw->s_addr; - fl4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph)); + fl4.flowi4_dscp = ip4h_dscp(iph); fl4.flowi4_scope = RT_SCOPE_UNIVERSE; fl4.flowi4_flags = FLOWI_FLAG_KNOWN_NH; rt = ip_route_output_key(net, &fl4); diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index 0d3cb2ba6fc8..05631abe3f0d 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -12,6 +12,15 @@ #include <linux/netfilter_ipv4.h> #include <linux/netfilter_bridge.h> +static struct iphdr *nf_reject_iphdr_put(struct sk_buff *nskb, + const struct sk_buff *oldskb, + __u8 protocol, int ttl); +static void nf_reject_ip_tcphdr_put(struct sk_buff *nskb, const struct sk_buff *oldskb, + const struct tcphdr *oth); +static const struct tcphdr * +nf_reject_ip_tcphdr_get(struct sk_buff *oldskb, + struct tcphdr *_oth, int hook); + static int nf_reject_iphdr_validate(struct sk_buff *skb) { struct iphdr *iph; @@ -136,8 +145,9 @@ struct sk_buff *nf_reject_skb_v4_unreach(struct net *net, } EXPORT_SYMBOL_GPL(nf_reject_skb_v4_unreach); -const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb, - struct tcphdr *_oth, int hook) +static const struct tcphdr * +nf_reject_ip_tcphdr_get(struct sk_buff *oldskb, + struct tcphdr *_oth, int hook) { const struct tcphdr *oth; @@ -163,11 +173,10 @@ const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb, return oth; } -EXPORT_SYMBOL_GPL(nf_reject_ip_tcphdr_get); -struct iphdr *nf_reject_iphdr_put(struct sk_buff *nskb, - const struct sk_buff *oldskb, - __u8 protocol, int ttl) +static struct iphdr *nf_reject_iphdr_put(struct sk_buff *nskb, + const struct sk_buff *oldskb, + __u8 protocol, int ttl) { struct iphdr *niph, *oiph = ip_hdr(oldskb); @@ -188,10 +197,9 @@ struct iphdr *nf_reject_iphdr_put(struct sk_buff *nskb, return niph; } -EXPORT_SYMBOL_GPL(nf_reject_iphdr_put); -void nf_reject_ip_tcphdr_put(struct sk_buff *nskb, const struct sk_buff *oldskb, - const struct tcphdr *oth) +static void nf_reject_ip_tcphdr_put(struct sk_buff *nskb, const struct sk_buff *oldskb, + const struct tcphdr *oth) { struct iphdr *niph = ip_hdr(nskb); struct tcphdr *tcph; @@ -218,7 +226,6 @@ void nf_reject_ip_tcphdr_put(struct sk_buff *nskb, const struct sk_buff *oldskb, nskb->csum_start = (unsigned char *)tcph - nskb->head; nskb->csum_offset = offsetof(struct tcphdr, check); } -EXPORT_SYMBOL_GPL(nf_reject_ip_tcphdr_put); static int nf_reject_fill_skb_dst(struct sk_buff *skb_in) { diff --git a/net/ipv4/netfilter/nf_socket_ipv4.c b/net/ipv4/netfilter/nf_socket_ipv4.c index a1350fc25838..5080fa5fbf6a 100644 --- a/net/ipv4/netfilter/nf_socket_ipv4.c +++ b/net/ipv4/netfilter/nf_socket_ipv4.c @@ -71,8 +71,7 @@ nf_socket_get_sock_v4(struct net *net, struct sk_buff *skb, const int doff, { switch (protocol) { case IPPROTO_TCP: - return inet_lookup(net, net->ipv4.tcp_death_row.hashinfo, - skb, doff, saddr, sport, daddr, dport, + return inet_lookup(net, skb, doff, saddr, sport, daddr, dport, in->ifindex); case IPPROTO_UDP: return udp4_lib_lookup(net, saddr, sport, daddr, dport, diff --git a/net/ipv4/netfilter/nf_tproxy_ipv4.c b/net/ipv4/netfilter/nf_tproxy_ipv4.c index 73e66a088e25..041c3f37f237 100644 --- a/net/ipv4/netfilter/nf_tproxy_ipv4.c +++ b/net/ipv4/netfilter/nf_tproxy_ipv4.c @@ -81,7 +81,6 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, const struct net_device *in, const enum nf_tproxy_lookup_t lookup_type) { - struct inet_hashinfo *hinfo = net->ipv4.tcp_death_row.hashinfo; struct sock *sk; switch (protocol) { @@ -95,7 +94,7 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, switch (lookup_type) { case NF_TPROXY_LOOKUP_LISTENER: - sk = inet_lookup_listener(net, hinfo, skb, + sk = inet_lookup_listener(net, skb, ip_hdrlen(skb) + __tcp_hdrlen(hp), saddr, sport, daddr, dport, in->ifindex, 0); @@ -109,7 +108,7 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, */ break; case NF_TPROXY_LOOKUP_ESTABLISHED: - sk = inet_lookup_established(net, hinfo, saddr, sport, + sk = inet_lookup_established(net, saddr, sport, daddr, dport, in->ifindex); break; default: diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c index 7e7c49535e3f..82af6cd76d13 100644 --- a/net/ipv4/netfilter/nft_fib_ipv4.c +++ b/net/ipv4/netfilter/nft_fib_ipv4.c @@ -10,7 +10,7 @@ #include <net/netfilter/nf_tables.h> #include <net/netfilter/nft_fib.h> -#include <net/inet_dscp.h> +#include <net/flow.h> #include <net/ip.h> #include <net/ip_fib.h> #include <net/route.h> @@ -114,7 +114,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, if (priv->flags & NFTA_FIB_F_MARK) fl4.flowi4_mark = pkt->skb->mark; - fl4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph)); + fl4.flowi4_dscp = ip4h_dscp(iph); if (priv->flags & NFTA_FIB_F_DADDR) { fl4.daddr = iph->daddr; diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 29118c43ebf5..0a20625f5ffb 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -2087,6 +2087,12 @@ static void remove_nexthop_from_groups(struct net *net, struct nexthop *nh, { struct nh_grp_entry *nhge, *tmp; + /* If there is nothing to do, let's avoid the costly call to + * synchronize_net() + */ + if (list_empty(&nh->grp_list)) + return; + list_for_each_entry_safe(nhge, tmp, &nh->grp_list, nh_list) remove_nh_grp_entry(net, nhge, nlinfo); @@ -3511,12 +3517,42 @@ static int rtm_dump_walk_nexthops(struct sk_buff *skb, int err; s_idx = ctx->idx; - for (node = rb_first(root); node; node = rb_next(node)) { + + /* If this is not the first invocation, ctx->idx will contain the id of + * the last nexthop we processed. Instead of starting from the very + * first element of the red/black tree again and linearly skipping the + * (potentially large) set of nodes with an id smaller than s_idx, walk + * the tree and find the left-most node whose id is >= s_idx. This + * provides an efficient O(log n) starting point for the dump + * continuation. + */ + if (s_idx != 0) { + struct rb_node *tmp = root->rb_node; + + node = NULL; + while (tmp) { + struct nexthop *nh; + + nh = rb_entry(tmp, struct nexthop, rb_node); + if (nh->id < s_idx) { + tmp = tmp->rb_right; + } else { + /* Track current candidate and keep looking on + * the left side to find the left-most + * (smallest id) that is still >= s_idx. + */ + node = tmp; + tmp = tmp->rb_left; + } + } + } else { + node = rb_first(root); + } + + for (; node; node = rb_next(node)) { struct nexthop *nh; nh = rb_entry(node, struct nexthop, rb_node); - if (nh->id < s_idx) - continue; ctx->idx = nh->id; err = nh_cb(skb, cb, nh, data); diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 031df4c19fcc..5321c5801c64 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -56,9 +56,7 @@ struct ping_table { static struct ping_table ping_table; struct pingv6_ops pingv6_ops; -EXPORT_SYMBOL_GPL(pingv6_ops); - -static u16 ping_port_rover; +EXPORT_IPV6_MOD_GPL(pingv6_ops); static inline u32 ping_hashfn(const struct net *net, u32 num, u32 mask) { @@ -67,7 +65,6 @@ static inline u32 ping_hashfn(const struct net *net, u32 num, u32 mask) pr_debug("hash(%u) = %u\n", num, res); return res; } -EXPORT_SYMBOL_GPL(ping_hash); static inline struct hlist_head *ping_hashslot(struct ping_table *table, struct net *net, unsigned int num) @@ -77,6 +74,7 @@ static inline struct hlist_head *ping_hashslot(struct ping_table *table, int ping_get_port(struct sock *sk, unsigned short ident) { + struct net *net = sock_net(sk); struct inet_sock *isk, *isk2; struct hlist_head *hlist; struct sock *sk2 = NULL; @@ -84,15 +82,16 @@ int ping_get_port(struct sock *sk, unsigned short ident) isk = inet_sk(sk); spin_lock(&ping_table.lock); if (ident == 0) { + u16 result = net->ipv4.ping_port_rover + 1; u32 i; - u16 result = ping_port_rover + 1; for (i = 0; i < (1L << 16); i++, result++) { if (!result) - result++; /* avoid zero */ - hlist = ping_hashslot(&ping_table, sock_net(sk), - result); + continue; /* avoid zero */ + hlist = ping_hashslot(&ping_table, net, result); sk_for_each(sk2, hlist) { + if (!net_eq(sock_net(sk2), net)) + continue; isk2 = inet_sk(sk2); if (isk2->inet_num == result) @@ -100,7 +99,7 @@ int ping_get_port(struct sock *sk, unsigned short ident) } /* found */ - ping_port_rover = ident = result; + net->ipv4.ping_port_rover = ident = result; break; next_port: ; @@ -108,8 +107,10 @@ next_port: if (i >= (1L << 16)) goto fail; } else { - hlist = ping_hashslot(&ping_table, sock_net(sk), ident); + hlist = ping_hashslot(&ping_table, net, ident); sk_for_each(sk2, hlist) { + if (!net_eq(sock_net(sk2), net)) + continue; isk2 = inet_sk(sk2); /* BUG? Why is this reuse and not reuseaddr? ping.c @@ -129,7 +130,7 @@ next_port: pr_debug("was not hashed\n"); sk_add_node_rcu(sk, hlist); sock_set_flag(sk, SOCK_RCU_FREE); - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); + sock_prot_inuse_add(net, sk->sk_prot, 1); } spin_unlock(&ping_table.lock); return 0; @@ -138,15 +139,7 @@ fail: spin_unlock(&ping_table.lock); return -EADDRINUSE; } -EXPORT_SYMBOL_GPL(ping_get_port); - -int ping_hash(struct sock *sk) -{ - pr_debug("ping_hash(sk->port=%u)\n", inet_sk(sk)->inet_num); - BUG(); /* "Please do not press this button again." */ - - return 0; -} +EXPORT_IPV6_MOD_GPL(ping_get_port); void ping_unhash(struct sock *sk) { @@ -161,7 +154,7 @@ void ping_unhash(struct sock *sk) } spin_unlock(&ping_table.lock); } -EXPORT_SYMBOL_GPL(ping_unhash); +EXPORT_IPV6_MOD_GPL(ping_unhash); /* Called under rcu_read_lock() */ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) @@ -188,6 +181,8 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) } sk_for_each_rcu(sk, hslot) { + if (!net_eq(sock_net(sk), net)) + continue; isk = inet_sk(sk); pr_debug("iterate\n"); @@ -279,7 +274,7 @@ out_release_group: put_group_info(group_info); return ret; } -EXPORT_SYMBOL_GPL(ping_init_sock); +EXPORT_IPV6_MOD_GPL(ping_init_sock); void ping_close(struct sock *sk, long timeout) { @@ -289,7 +284,7 @@ void ping_close(struct sock *sk, long timeout) sk_common_release(sk); } -EXPORT_SYMBOL_GPL(ping_close); +EXPORT_IPV6_MOD_GPL(ping_close); static int ping_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) @@ -467,7 +462,7 @@ out: pr_debug("ping_v4_bind -> %d\n", err); return err; } -EXPORT_SYMBOL_GPL(ping_bind); +EXPORT_IPV6_MOD_GPL(ping_bind); /* * Is this a supported type of ICMP message? @@ -600,7 +595,7 @@ void ping_err(struct sk_buff *skb, int offset, u32 info) out: return; } -EXPORT_SYMBOL_GPL(ping_err); +EXPORT_IPV6_MOD_GPL(ping_err); /* * Copy and checksum an ICMP Echo packet from user space into a buffer @@ -630,7 +625,7 @@ int ping_getfrag(void *from, char *to, return 0; } -EXPORT_SYMBOL_GPL(ping_getfrag); +EXPORT_IPV6_MOD_GPL(ping_getfrag); static int ping_v4_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh, struct flowi4 *fl4) @@ -691,7 +686,7 @@ int ping_common_sendmsg(int family, struct msghdr *msg, size_t len, return 0; } -EXPORT_SYMBOL_GPL(ping_common_sendmsg); +EXPORT_IPV6_MOD_GPL(ping_common_sendmsg); static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { @@ -936,7 +931,7 @@ out: pr_debug("ping_recvmsg -> %d\n", err); return err; } -EXPORT_SYMBOL_GPL(ping_recvmsg); +EXPORT_IPV6_MOD_GPL(ping_recvmsg); static enum skb_drop_reason __ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) @@ -957,7 +952,7 @@ int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { return __ping_queue_rcv_skb(sk, skb) ? -1 : 0; } -EXPORT_SYMBOL_GPL(ping_queue_rcv_skb); +EXPORT_IPV6_MOD_GPL(ping_queue_rcv_skb); /* @@ -985,7 +980,7 @@ enum skb_drop_reason ping_rcv(struct sk_buff *skb) kfree_skb_reason(skb, SKB_DROP_REASON_NO_SOCKET); return SKB_DROP_REASON_NO_SOCKET; } -EXPORT_SYMBOL_GPL(ping_rcv); +EXPORT_IPV6_MOD_GPL(ping_rcv); struct proto ping_prot = { .name = "PING", @@ -1002,13 +997,12 @@ struct proto ping_prot = { .bind = ping_bind, .backlog_rcv = ping_queue_rcv_skb, .release_cb = ip4_datagram_release_cb, - .hash = ping_hash, .unhash = ping_unhash, .get_port = ping_get_port, .put_port = ping_unhash, .obj_size = sizeof(struct inet_sock), }; -EXPORT_SYMBOL(ping_prot); +EXPORT_IPV6_MOD(ping_prot); #ifdef CONFIG_PROC_FS @@ -1073,7 +1067,7 @@ void *ping_seq_start(struct seq_file *seq, loff_t *pos, sa_family_t family) return *pos ? ping_get_idx(seq, *pos-1) : SEQ_START_TOKEN; } -EXPORT_SYMBOL_GPL(ping_seq_start); +EXPORT_IPV6_MOD_GPL(ping_seq_start); static void *ping_v4_seq_start(struct seq_file *seq, loff_t *pos) { @@ -1092,14 +1086,14 @@ void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos) ++*pos; return sk; } -EXPORT_SYMBOL_GPL(ping_seq_next); +EXPORT_IPV6_MOD_GPL(ping_seq_next); void ping_seq_stop(struct seq_file *seq, void *v) __releases(ping_table.lock) { spin_unlock(&ping_table.lock); } -EXPORT_SYMBOL_GPL(ping_seq_stop); +EXPORT_IPV6_MOD_GPL(ping_seq_stop); static void ping_v4_format_sock(struct sock *sp, struct seq_file *f, int bucket) @@ -1119,7 +1113,7 @@ static void ping_v4_format_sock(struct sock *sp, struct seq_file *f, from_kuid_munged(seq_user_ns(f), sk_uid(sp)), 0, sock_i_ino(sp), refcount_read(&sp->sk_refcnt), sp, - atomic_read(&sp->sk_drops)); + sk_drops_read(sp)); } static int ping_v4_seq_show(struct seq_file *seq, void *v) @@ -1150,6 +1144,8 @@ static int __net_init ping_v4_proc_init_net(struct net *net) if (!proc_create_net("icmp", 0444, net->proc_net, &ping_v4_seq_ops, sizeof(struct ping_iter_state))) return -ENOMEM; + + net->ipv4.ping_port_rover = get_random_u16(); return 0; } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 1d2c89d63cc7..d54ebb7df966 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -178,7 +178,7 @@ static int raw_v4_input(struct net *net, struct sk_buff *skb, if (atomic_read(&sk->sk_rmem_alloc) >= READ_ONCE(sk->sk_rcvbuf)) { - atomic_inc(&sk->sk_drops); + sk_drops_inc(sk); continue; } @@ -311,7 +311,7 @@ static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb) int raw_rcv(struct sock *sk, struct sk_buff *skb) { if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) { - atomic_inc(&sk->sk_drops); + sk_drops_inc(sk); sk_skb_reason_drop(sk, skb, SKB_DROP_REASON_XFRM_POLICY); return NET_RX_DROP; } @@ -793,6 +793,7 @@ static int raw_sk_init(struct sock *sk) { struct raw_sock *rp = raw_sk(sk); + sk->sk_drop_counters = &rp->drop_counters; if (inet_sk(sk)->inet_num == IPPROTO_ICMP) memset(&rp->filter, 0, sizeof(rp->filter)); return 0; @@ -1045,7 +1046,7 @@ static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) 0, 0L, 0, from_kuid_munged(seq_user_ns(seq), sk_uid(sp)), 0, sock_i_ino(sp), - refcount_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); + refcount_read(&sp->sk_refcnt), sp, sk_drops_read(sp)); } static int raw_seq_show(struct seq_file *seq, void *v) diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c index cc793bd8de25..943e5998e0ad 100644 --- a/net/ipv4/raw_diag.c +++ b/net/ipv4/raw_diag.c @@ -126,9 +126,9 @@ static int raw_diag_dump_one(struct netlink_callback *cb, static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r, - struct nlattr *bc, bool net_admin) + bool net_admin) { - if (!inet_diag_bc_sk(bc, sk)) + if (!inet_diag_bc_sk(cb->data, sk)) return 0; return inet_sk_diag_fill(sk, NULL, skb, cb, r, NLM_F_MULTI, net_admin); @@ -140,17 +140,13 @@ static void raw_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); struct raw_hashinfo *hashinfo = raw_get_hashinfo(r); struct net *net = sock_net(skb->sk); - struct inet_diag_dump_data *cb_data; int num, s_num, slot, s_slot; struct hlist_head *hlist; struct sock *sk = NULL; - struct nlattr *bc; if (IS_ERR(hashinfo)) return; - cb_data = cb->data; - bc = cb_data->inet_diag_nla_bc; s_slot = cb->args[0]; num = s_num = cb->args[1]; @@ -174,7 +170,7 @@ static void raw_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, if (r->id.idiag_dport != inet->inet_dport && r->id.idiag_dport) goto next; - if (sk_diag_dump(sk, skb, cb, r, bc, net_admin) < 0) + if (sk_diag_dump(sk, skb, cb, r, net_admin) < 0) goto out_unlock; next: num++; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index baa43e5966b1..50309f2ab132 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -84,6 +84,7 @@ #include <linux/jhash.h> #include <net/dst.h> #include <net/dst_metadata.h> +#include <net/flow.h> #include <net/inet_dscp.h> #include <net/net_namespace.h> #include <net/ip.h> @@ -413,11 +414,11 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const void *daddr) { const struct rtable *rt = container_of(dst, struct rtable, dst); - struct net_device *dev = dst_dev(dst); + struct net_device *dev; struct neighbour *n; rcu_read_lock(); - + dev = dst_dev_rcu(dst); if (likely(rt->rt_gw_family == AF_INET)) { n = ip_neigh_gw4(dev, rt->rt_gw4); } else if (rt->rt_gw_family == AF_INET6) { @@ -1026,7 +1027,7 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) return; rcu_read_lock(); - net = dev_net_rcu(dst_dev(dst)); + net = dst_dev_net_rcu(dst); if (mtu < net->ipv4.ip_rt_min_pmtu) { lock = true; mtu = min(old_mtu, net->ipv4.ip_rt_min_pmtu); @@ -1291,7 +1292,7 @@ void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt) struct flowi4 fl4 = { .daddr = iph->daddr, .saddr = iph->saddr, - .flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph)), + .flowi4_dscp = ip4h_dscp(iph), .flowi4_oif = rt->dst.dev->ifindex, .flowi4_iif = skb->dev->ifindex, .flowi4_mark = skb->mark, @@ -1326,7 +1327,7 @@ static unsigned int ipv4_default_advmss(const struct dst_entry *dst) struct net *net; rcu_read_lock(); - net = dev_net_rcu(dst_dev(dst)); + net = dst_dev_net_rcu(dst); advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size, net->ipv4.ip_rt_min_advmss); rcu_read_unlock(); @@ -2210,7 +2211,7 @@ ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr, goto martian_source; } - if (rt->rt_type != RTN_LOCAL) + if (!(rt->rt_flags & RTCF_LOCAL)) goto skip_validate_source; reason = fib_validate_source_reason(skb, saddr, daddr, dscp, 0, dev, @@ -2331,7 +2332,7 @@ ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, fl4.flowi4_oif = 0; fl4.flowi4_iif = dev->ifindex; fl4.flowi4_mark = skb->mark; - fl4.flowi4_tos = inet_dscp_to_dsfield(dscp); + fl4.flowi4_dscp = dscp; fl4.flowi4_scope = RT_SCOPE_UNIVERSE; fl4.flowi4_flags = 0; fl4.daddr = daddr; @@ -2694,7 +2695,6 @@ struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, struct rtable *rth; fl4->flowi4_iif = LOOPBACK_IFINDEX; - fl4->flowi4_tos &= INET_DSCP_MASK; rcu_read_lock(); rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb); @@ -3337,7 +3337,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, fl4.daddr = dst; fl4.saddr = src; - fl4.flowi4_tos = inet_dscp_to_dsfield(dscp); + fl4.flowi4_dscp = dscp; fl4.flowi4_oif = nla_get_u32_default(tb[RTA_OIF], 0); fl4.flowi4_mark = mark; fl4.flowi4_uid = uid; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 71a956fbfc55..40b774b4f587 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3195,7 +3195,7 @@ adjudge_to_death: /* remove backlog if any, without releasing ownership. */ __release_sock(sk); - this_cpu_inc(tcp_orphan_count); + tcp_orphan_count_inc(); /* Have we already been destroyed by a softirq or backlog? */ if (state != TCP_CLOSE && sk->sk_state == TCP_CLOSE) @@ -3376,7 +3376,7 @@ int tcp_disconnect(struct sock *sk, int flags) WRITE_ONCE(tp->write_seq, seq); icsk->icsk_backoff = 0; - icsk->icsk_probes_out = 0; + WRITE_ONCE(icsk->icsk_probes_out, 0); icsk->icsk_probes_tstamp = 0; icsk->icsk_rto = TCP_TIMEOUT_INIT; WRITE_ONCE(icsk->icsk_rto_min, TCP_RTO_MIN); @@ -3760,7 +3760,7 @@ int tcp_sock_set_maxseg(struct sock *sk, int val) if (val && (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW)) return -EINVAL; - tcp_sk(sk)->rx_opt.user_mss = val; + WRITE_ONCE(tcp_sk(sk)->rx_opt.user_mss, val); return 0; } @@ -3890,15 +3890,13 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname, WRITE_ONCE(inet_csk(sk)->icsk_delack_max, delack_max); return 0; } + case TCP_MAXSEG: + return tcp_sock_set_maxseg(sk, val); } sockopt_lock_sock(sk); switch (optname) { - case TCP_MAXSEG: - err = tcp_sock_set_maxseg(sk, val); - break; - case TCP_NODELAY: __tcp_sock_set_nodelay(sk, val); break; @@ -4348,7 +4346,8 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk, nla_put_u32(stats, TCP_NLA_REORDERING, tp->reordering); nla_put_u32(stats, TCP_NLA_MIN_RTT, tcp_min_rtt(tp)); - nla_put_u8(stats, TCP_NLA_RECUR_RETRANS, inet_csk(sk)->icsk_retransmits); + nla_put_u8(stats, TCP_NLA_RECUR_RETRANS, + READ_ONCE(inet_csk(sk)->icsk_retransmits)); nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, !!tp->rate_app_limited); nla_put_u32(stats, TCP_NLA_SND_SSTHRESH, tp->snd_ssthresh); nla_put_u32(stats, TCP_NLA_DELIVERED, tp->delivered); @@ -4383,6 +4382,7 @@ int do_tcp_getsockopt(struct sock *sk, int level, struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); + int user_mss; int val, len; if (copy_from_sockptr(&len, optlen, sizeof(int))) @@ -4396,9 +4396,10 @@ int do_tcp_getsockopt(struct sock *sk, int level, switch (optname) { case TCP_MAXSEG: val = tp->mss_cache; - if (tp->rx_opt.user_mss && + user_mss = READ_ONCE(tp->rx_opt.user_mss); + if (user_mss && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) - val = tp->rx_opt.user_mss; + val = user_mss; if (tp->repair) val = tp->rx_opt.mss_clamp; break; diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c index ba4d98e510e0..fbad6c35dee9 100644 --- a/net/ipv4/tcp_cdg.c +++ b/net/ipv4/tcp_cdg.c @@ -379,7 +379,7 @@ static void tcp_cdg_init(struct sock *sk) /* We silently fall back to window = 1 if allocation fails. */ if (window > 1) ca->gradients = kcalloc(window, sizeof(ca->gradients[0]), - GFP_NOWAIT | __GFP_NOWARN); + GFP_NOWAIT); ca->rtt_seq = tp->snd_nxt; ca->shadow_wnd = tcp_snd_cwnd(tp); } diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 45e174b8cd22..d83efd91f461 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -12,6 +12,9 @@ #include <linux/tcp.h> +#include <net/inet_hashtables.h> +#include <net/inet6_hashtables.h> +#include <net/inet_timewait_sock.h> #include <net/netlink.h> #include <net/tcp.h> @@ -174,27 +177,465 @@ static size_t tcp_diag_get_aux_size(struct sock *sk, bool net_admin) size += ulp_ops->get_info_size(sk, net_admin); } } - return size; + + return size + + nla_total_size(sizeof(struct tcp_info)) + + nla_total_size(sizeof(struct inet_diag_msg)) + + inet_diag_msg_attrs_size() + + nla_total_size(sizeof(struct inet_diag_meminfo)) + + nla_total_size(SK_MEMINFO_VARS * sizeof(u32)) + + nla_total_size(TCP_CA_NAME_MAX) + + nla_total_size(sizeof(struct tcpvegas_info)) + + 64; +} + +static int tcp_twsk_diag_fill(struct sock *sk, + struct sk_buff *skb, + struct netlink_callback *cb, + u16 nlmsg_flags, bool net_admin) +{ + struct inet_timewait_sock *tw = inet_twsk(sk); + struct inet_diag_msg *r; + struct nlmsghdr *nlh; + long tmo; + + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, cb->nlh->nlmsg_type, + sizeof(*r), nlmsg_flags); + if (!nlh) + return -EMSGSIZE; + + r = nlmsg_data(nlh); + DEBUG_NET_WARN_ON_ONCE(tw->tw_state != TCP_TIME_WAIT); + + inet_diag_msg_common_fill(r, sk); + r->idiag_retrans = 0; + + r->idiag_state = READ_ONCE(tw->tw_substate); + r->idiag_timer = 3; + tmo = tw->tw_timer.expires - jiffies; + r->idiag_expires = jiffies_delta_to_msecs(tmo); + r->idiag_rqueue = 0; + r->idiag_wqueue = 0; + r->idiag_uid = 0; + r->idiag_inode = 0; + + if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, + tw->tw_mark)) { + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; + } + + nlmsg_end(skb, nlh); + return 0; +} + +static int tcp_req_diag_fill(struct sock *sk, struct sk_buff *skb, + struct netlink_callback *cb, + u16 nlmsg_flags, bool net_admin) +{ + struct request_sock *reqsk = inet_reqsk(sk); + struct inet_diag_msg *r; + struct nlmsghdr *nlh; + long tmo; + + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + cb->nlh->nlmsg_type, sizeof(*r), nlmsg_flags); + if (!nlh) + return -EMSGSIZE; + + r = nlmsg_data(nlh); + inet_diag_msg_common_fill(r, sk); + r->idiag_state = TCP_SYN_RECV; + r->idiag_timer = 1; + r->idiag_retrans = READ_ONCE(reqsk->num_retrans); + + BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) != + offsetof(struct sock, sk_cookie)); + + tmo = READ_ONCE(inet_reqsk(sk)->rsk_timer.expires) - jiffies; + r->idiag_expires = jiffies_delta_to_msecs(tmo); + r->idiag_rqueue = 0; + r->idiag_wqueue = 0; + r->idiag_uid = 0; + r->idiag_inode = 0; + + if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, + inet_rsk(reqsk)->ir_mark)) { + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; + } + + nlmsg_end(skb, nlh); + return 0; +} + +static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, + struct netlink_callback *cb, + const struct inet_diag_req_v2 *r, + u16 nlmsg_flags, bool net_admin) +{ + if (sk->sk_state == TCP_TIME_WAIT) + return tcp_twsk_diag_fill(sk, skb, cb, nlmsg_flags, net_admin); + + if (sk->sk_state == TCP_NEW_SYN_RECV) + return tcp_req_diag_fill(sk, skb, cb, nlmsg_flags, net_admin); + + return inet_sk_diag_fill(sk, inet_csk(sk), skb, cb, r, nlmsg_flags, + net_admin); +} + +static void twsk_build_assert(void) +{ + BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_family) != + offsetof(struct sock, sk_family)); + + BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_num) != + offsetof(struct inet_sock, inet_num)); + + BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_dport) != + offsetof(struct inet_sock, inet_dport)); + + BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_rcv_saddr) != + offsetof(struct inet_sock, inet_rcv_saddr)); + + BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_daddr) != + offsetof(struct inet_sock, inet_daddr)); + +#if IS_ENABLED(CONFIG_IPV6) + BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_v6_rcv_saddr) != + offsetof(struct sock, sk_v6_rcv_saddr)); + + BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_v6_daddr) != + offsetof(struct sock, sk_v6_daddr)); +#endif } static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r) { - struct inet_hashinfo *hinfo; + bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); + struct inet_diag_dump_data *cb_data = cb->data; + struct net *net = sock_net(skb->sk); + u32 idiag_states = r->idiag_states; + struct inet_hashinfo *hashinfo; + int i, num, s_i, s_num; + struct sock *sk; - hinfo = sock_net(cb->skb->sk)->ipv4.tcp_death_row.hashinfo; + hashinfo = net->ipv4.tcp_death_row.hashinfo; + if (idiag_states & TCPF_SYN_RECV) + idiag_states |= TCPF_NEW_SYN_RECV; + s_i = cb->args[1]; + s_num = num = cb->args[2]; + + if (cb->args[0] == 0) { + if (!(idiag_states & TCPF_LISTEN) || r->id.idiag_dport) + goto skip_listen_ht; + + for (i = s_i; i <= hashinfo->lhash2_mask; i++) { + struct inet_listen_hashbucket *ilb; + struct hlist_nulls_node *node; + + num = 0; + ilb = &hashinfo->lhash2[i]; + + if (hlist_nulls_empty(&ilb->nulls_head)) { + s_num = 0; + continue; + } + spin_lock(&ilb->lock); + sk_nulls_for_each(sk, node, &ilb->nulls_head) { + struct inet_sock *inet = inet_sk(sk); + + if (!net_eq(sock_net(sk), net)) + continue; + + if (num < s_num) { + num++; + continue; + } + + if (r->sdiag_family != AF_UNSPEC && + sk->sk_family != r->sdiag_family) + goto next_listen; + + if (r->id.idiag_sport != inet->inet_sport && + r->id.idiag_sport) + goto next_listen; + + if (!inet_diag_bc_sk(cb_data, sk)) + goto next_listen; + + if (inet_sk_diag_fill(sk, inet_csk(sk), skb, + cb, r, NLM_F_MULTI, + net_admin) < 0) { + spin_unlock(&ilb->lock); + goto done; + } + +next_listen: + ++num; + } + spin_unlock(&ilb->lock); + + s_num = 0; + } +skip_listen_ht: + cb->args[0] = 1; + s_i = num = s_num = 0; + } + +/* Process a maximum of SKARR_SZ sockets at a time when walking hash buckets + * with bh disabled. + */ +#define SKARR_SZ 16 + + /* Dump bound but inactive (not listening, connecting, etc.) sockets */ + if (cb->args[0] == 1) { + if (!(idiag_states & TCPF_BOUND_INACTIVE)) + goto skip_bind_ht; + + for (i = s_i; i < hashinfo->bhash_size; i++) { + struct inet_bind_hashbucket *ibb; + struct inet_bind2_bucket *tb2; + struct sock *sk_arr[SKARR_SZ]; + int num_arr[SKARR_SZ]; + int idx, accum, res; + +resume_bind_walk: + num = 0; + accum = 0; + ibb = &hashinfo->bhash2[i]; + + if (hlist_empty(&ibb->chain)) { + s_num = 0; + continue; + } + spin_lock_bh(&ibb->lock); + inet_bind_bucket_for_each(tb2, &ibb->chain) { + if (!net_eq(ib2_net(tb2), net)) + continue; + + sk_for_each_bound(sk, &tb2->owners) { + struct inet_sock *inet = inet_sk(sk); + + if (num < s_num) + goto next_bind; + + if (sk->sk_state != TCP_CLOSE || + !inet->inet_num) + goto next_bind; + + if (r->sdiag_family != AF_UNSPEC && + r->sdiag_family != sk->sk_family) + goto next_bind; + + if (!inet_diag_bc_sk(cb_data, sk)) + goto next_bind; + + sock_hold(sk); + num_arr[accum] = num; + sk_arr[accum] = sk; + if (++accum == SKARR_SZ) + goto pause_bind_walk; +next_bind: + num++; + } + } +pause_bind_walk: + spin_unlock_bh(&ibb->lock); + + res = 0; + for (idx = 0; idx < accum; idx++) { + if (res >= 0) { + res = inet_sk_diag_fill(sk_arr[idx], + NULL, skb, cb, + r, NLM_F_MULTI, + net_admin); + if (res < 0) + num = num_arr[idx]; + } + sock_put(sk_arr[idx]); + } + if (res < 0) + goto done; + + cond_resched(); + + if (accum == SKARR_SZ) { + s_num = num + 1; + goto resume_bind_walk; + } + + s_num = 0; + } +skip_bind_ht: + cb->args[0] = 2; + s_i = num = s_num = 0; + } - inet_diag_dump_icsk(hinfo, skb, cb, r); + if (!(idiag_states & ~TCPF_LISTEN)) + goto out; + + for (i = s_i; i <= hashinfo->ehash_mask; i++) { + struct inet_ehash_bucket *head = &hashinfo->ehash[i]; + spinlock_t *lock = inet_ehash_lockp(hashinfo, i); + struct hlist_nulls_node *node; + struct sock *sk_arr[SKARR_SZ]; + int num_arr[SKARR_SZ]; + int idx, accum, res; + + if (hlist_nulls_empty(&head->chain)) + continue; + + if (i > s_i) + s_num = 0; + +next_chunk: + num = 0; + accum = 0; + spin_lock_bh(lock); + sk_nulls_for_each(sk, node, &head->chain) { + int state; + + if (!net_eq(sock_net(sk), net)) + continue; + if (num < s_num) + goto next_normal; + state = (sk->sk_state == TCP_TIME_WAIT) ? + READ_ONCE(inet_twsk(sk)->tw_substate) : sk->sk_state; + if (!(idiag_states & (1 << state))) + goto next_normal; + if (r->sdiag_family != AF_UNSPEC && + sk->sk_family != r->sdiag_family) + goto next_normal; + if (r->id.idiag_sport != htons(sk->sk_num) && + r->id.idiag_sport) + goto next_normal; + if (r->id.idiag_dport != sk->sk_dport && + r->id.idiag_dport) + goto next_normal; + twsk_build_assert(); + + if (!inet_diag_bc_sk(cb_data, sk)) + goto next_normal; + + if (!refcount_inc_not_zero(&sk->sk_refcnt)) + goto next_normal; + + num_arr[accum] = num; + sk_arr[accum] = sk; + if (++accum == SKARR_SZ) + break; +next_normal: + ++num; + } + spin_unlock_bh(lock); + + res = 0; + for (idx = 0; idx < accum; idx++) { + if (res >= 0) { + res = sk_diag_fill(sk_arr[idx], skb, cb, r, + NLM_F_MULTI, net_admin); + if (res < 0) + num = num_arr[idx]; + } + sock_gen_put(sk_arr[idx]); + } + if (res < 0) + break; + + cond_resched(); + + if (accum == SKARR_SZ) { + s_num = num + 1; + goto next_chunk; + } + } + +done: + cb->args[1] = i; + cb->args[2] = num; +out: + ; +} + +static struct sock *tcp_diag_find_one_icsk(struct net *net, + const struct inet_diag_req_v2 *req) +{ + struct sock *sk; + + rcu_read_lock(); + if (req->sdiag_family == AF_INET) { + sk = inet_lookup(net, NULL, 0, req->id.idiag_dst[0], + req->id.idiag_dport, req->id.idiag_src[0], + req->id.idiag_sport, req->id.idiag_if); +#if IS_ENABLED(CONFIG_IPV6) + } else if (req->sdiag_family == AF_INET6) { + if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) && + ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src)) + sk = inet_lookup(net, NULL, 0, req->id.idiag_dst[3], + req->id.idiag_dport, req->id.idiag_src[3], + req->id.idiag_sport, req->id.idiag_if); + else + sk = inet6_lookup(net, NULL, 0, + (struct in6_addr *)req->id.idiag_dst, + req->id.idiag_dport, + (struct in6_addr *)req->id.idiag_src, + req->id.idiag_sport, + req->id.idiag_if); +#endif + } else { + rcu_read_unlock(); + return ERR_PTR(-EINVAL); + } + rcu_read_unlock(); + if (!sk) + return ERR_PTR(-ENOENT); + + if (sock_diag_check_cookie(sk, req->id.idiag_cookie)) { + sock_gen_put(sk); + return ERR_PTR(-ENOENT); + } + + return sk; } static int tcp_diag_dump_one(struct netlink_callback *cb, const struct inet_diag_req_v2 *req) { - struct inet_hashinfo *hinfo; + struct sk_buff *in_skb = cb->skb; + struct sk_buff *rep; + struct sock *sk; + struct net *net; + bool net_admin; + int err; - hinfo = sock_net(cb->skb->sk)->ipv4.tcp_death_row.hashinfo; + net = sock_net(in_skb->sk); + sk = tcp_diag_find_one_icsk(net, req); + if (IS_ERR(sk)) + return PTR_ERR(sk); - return inet_diag_dump_one_icsk(hinfo, cb, req); + net_admin = netlink_net_capable(in_skb, CAP_NET_ADMIN); + rep = nlmsg_new(tcp_diag_get_aux_size(sk, net_admin), GFP_KERNEL); + if (!rep) { + err = -ENOMEM; + goto out; + } + + err = sk_diag_fill(sk, rep, cb, req, 0, net_admin); + if (err < 0) { + WARN_ON(err == -EMSGSIZE); + nlmsg_free(rep); + goto out; + } + err = nlmsg_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid); + +out: + if (sk) + sock_gen_put(sk); + + return err; } #ifdef CONFIG_INET_DIAG_DESTROY @@ -202,13 +643,10 @@ static int tcp_diag_destroy(struct sk_buff *in_skb, const struct inet_diag_req_v2 *req) { struct net *net = sock_net(in_skb->sk); - struct inet_hashinfo *hinfo; struct sock *sk; int err; - hinfo = net->ipv4.tcp_death_row.hashinfo; - sk = inet_diag_find_one_icsk(net, hinfo, req); - + sk = tcp_diag_find_one_icsk(net, req); if (IS_ERR(sk)) return PTR_ERR(sk); @@ -226,7 +664,6 @@ static const struct inet_diag_handler tcp_diag_handler = { .dump_one = tcp_diag_dump_one, .idiag_get_info = tcp_diag_get_info, .idiag_get_aux = tcp_diag_get_aux, - .idiag_get_aux_size = tcp_diag_get_aux_size, .idiag_type = IPPROTO_TCP, .idiag_info_size = sizeof(struct tcp_info), #ifdef CONFIG_INET_DIAG_DESTROY diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index f1884f0c9e52..7d945a527daf 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -576,11 +576,12 @@ void tcp_fastopen_active_disable_ofo_check(struct sock *sk) } } else if (tp->syn_fastopen_ch && atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times)) { - dst = sk_dst_get(sk); - dev = dst ? dst_dev(dst) : NULL; + rcu_read_lock(); + dst = __sk_dst_get(sk); + dev = dst ? dst_dev_rcu(dst) : NULL; if (!(dev && (dev->flags & IFF_LOOPBACK))) atomic_set(&sock_net(sk)->ipv4.tfo_active_disable_times, 0); - dst_release(dst); + rcu_read_unlock(); } } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 71b76e98371a..f1be65af1a77 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2569,7 +2569,7 @@ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo) if (frto_undo) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS); - inet_csk(sk)->icsk_retransmits = 0; + WRITE_ONCE(inet_csk(sk)->icsk_retransmits, 0); if (tcp_is_non_sack_preventing_reopen(sk)) return true; if (frto_undo || tcp_is_sack(tp)) { @@ -3851,7 +3851,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (after(ack, prior_snd_una)) { flag |= FLAG_SND_UNA_ADVANCED; - icsk->icsk_retransmits = 0; + WRITE_ONCE(icsk->icsk_retransmits, 0); #if IS_ENABLED(CONFIG_TLS_DEVICE) if (static_branch_unlikely(&clean_acked_data_enabled.key)) @@ -3913,7 +3913,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) * log. Something worked... */ WRITE_ONCE(sk->sk_err_soft, 0); - icsk->icsk_probes_out = 0; + WRITE_ONCE(icsk->icsk_probes_out, 0); tp->rcv_tstamp = tcp_jiffies32; if (!prior_packets) goto no_queue; @@ -4830,7 +4830,7 @@ static bool tcp_ooo_try_coalesce(struct sock *sk, noinline_for_tracing static void tcp_drop_reason(struct sock *sk, struct sk_buff *skb, enum skb_drop_reason reason) { - sk_drops_add(sk, skb); + sk_drops_skbadd(sk, skb); sk_skb_reason_drop(sk, skb, reason); } @@ -6297,7 +6297,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, u16 mss = tp->rx_opt.mss_clamp, try_exp = 0; bool syn_drop = false; - if (mss == tp->rx_opt.user_mss) { + if (mss == READ_ONCE(tp->rx_opt.user_mss)) { struct tcp_options_received opt; /* Get original SYNACK MSS value if user MSS sets mss_clamp */ @@ -6636,7 +6636,7 @@ static void tcp_rcv_synrecv_state_fastopen(struct sock *sk) tcp_try_undo_recovery(sk); tcp_update_rto_time(tp); - inet_csk(sk)->icsk_retransmits = 0; + WRITE_ONCE(inet_csk(sk)->icsk_retransmits, 0); /* In tcp_fastopen_synack_timer() on the first SYNACK RTO we set * retrans_stamp but don't enter CA_Loss, so in case that happened we * need to zero retrans_stamp here to prevent spurious @@ -7117,7 +7117,7 @@ u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops, return 0; } - mss = tcp_parse_mss_option(th, tp->rx_opt.user_mss); + mss = tcp_parse_mss_option(th, READ_ONCE(tp->rx_opt.user_mss)); if (!mss) mss = af_ops->mss_clamp; @@ -7131,7 +7131,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, { struct tcp_fastopen_cookie foc = { .len = -1 }; struct tcp_options_received tmp_opt; - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); struct sock *fastopen_sk = NULL; struct request_sock *req; @@ -7182,7 +7182,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, tcp_clear_options(&tmp_opt); tmp_opt.mss_clamp = af_ops->mss_clamp; - tmp_opt.user_mss = tp->rx_opt.user_mss; + tmp_opt.user_mss = READ_ONCE(tp->rx_opt.user_mss); tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0, want_cookie ? NULL : &foc); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 84d3d556ed80..1e58a8a9ff7a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -506,8 +506,7 @@ int tcp_v4_err(struct sk_buff *skb, u32 info) struct sock *sk; int err; - sk = __inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, - iph->daddr, th->dest, iph->saddr, + sk = __inet_lookup_established(net, iph->daddr, th->dest, iph->saddr, ntohs(th->source), inet_iif(skb), 0); if (!sk) { __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); @@ -823,8 +822,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb, * Incoming packet is checked with md5 hash with finding key, * no RST generated if md5 hash doesn't match. */ - sk1 = __inet_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo, - NULL, 0, ip_hdr(skb)->saddr, + sk1 = __inet_lookup_listener(net, NULL, 0, ip_hdr(skb)->saddr, th->source, ip_hdr(skb)->daddr, ntohs(th->source), dif, sdif); /* don't send rst if it can't find key */ @@ -1992,8 +1990,7 @@ int tcp_v4_early_demux(struct sk_buff *skb) if (th->doff < sizeof(struct tcphdr) / 4) return 0; - sk = __inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, - iph->saddr, th->source, + sk = __inet_lookup_established(net, iph->saddr, th->source, iph->daddr, ntohs(th->dest), skb->skb_iif, inet_sdif(skb)); if (sk) { @@ -2236,8 +2233,7 @@ int tcp_v4_rcv(struct sk_buff *skb) th = (const struct tcphdr *)skb->data; iph = ip_hdr(skb); lookup: - sk = __inet_lookup_skb(net->ipv4.tcp_death_row.hashinfo, - skb, __tcp_hdrlen(th), th->source, + sk = __inet_lookup_skb(skb, __tcp_hdrlen(th), th->source, th->dest, sdif, &refcounted); if (!sk) goto no_tcp_socket; @@ -2258,7 +2254,7 @@ lookup: &iph->saddr, &iph->daddr, AF_INET, dif, sdif); if (unlikely(drop_reason)) { - sk_drops_add(sk, skb); + sk_drops_skbadd(sk, skb); reqsk_put(req); goto discard_it; } @@ -2403,7 +2399,7 @@ discard_it: return 0; discard_and_relse: - sk_drops_add(sk, skb); + sk_drops_skbadd(sk, skb); if (refcounted) sock_put(sk); goto discard_it; @@ -2426,9 +2422,7 @@ do_time_wait: &drop_reason); switch (tw_status) { case TCP_TW_SYN: { - struct sock *sk2 = inet_lookup_listener(net, - net->ipv4.tcp_death_row.hashinfo, - skb, __tcp_hdrlen(th), + struct sock *sk2 = inet_lookup_listener(net, skb, __tcp_hdrlen(th), iph->saddr, th->source, iph->daddr, th->dest, inet_iif(skb), @@ -2459,7 +2453,6 @@ do_time_wait: static struct timewait_sock_ops tcp_timewait_sock_ops = { .twsk_obj_size = sizeof(struct tcp_timewait_sock), - .twsk_destructor= tcp_twsk_destructor, }; void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) @@ -2958,9 +2951,9 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) rx_queue, timer_active, jiffies_delta_to_clock_t(timer_expires - jiffies), - icsk->icsk_retransmits, + READ_ONCE(icsk->icsk_retransmits), from_kuid_munged(seq_user_ns(f), sk_uid(sk)), - icsk->icsk_probes_out, + READ_ONCE(icsk->icsk_probes_out), sock_i_ino(sk), refcount_read(&sk->sk_refcnt), sk, jiffies_to_clock_t(icsk->icsk_rto), @@ -3524,7 +3517,6 @@ struct proto tcp_prot = { .leave_memory_pressure = tcp_leave_memory_pressure, .stream_memory_free = tcp_stream_memory_free, .sockets_allocated = &tcp_sockets_allocated, - .orphan_count = &tcp_orphan_count, .memory_allocated = &net_aligned_data.tcp_memory_allocated, .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc, diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 03c068ea27b6..10e86f1008e9 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -170,7 +170,7 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, struct net *net; spin_lock_bh(&tcp_metrics_lock); - net = dev_net_rcu(dst_dev(dst)); + net = dst_dev_net_rcu(dst); /* While waiting for the spin-lock the cache might have been populated * with this entry and so we have to check again. @@ -273,7 +273,7 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, return NULL; } - net = dev_net_rcu(dst_dev(dst)); + net = dst_dev_net_rcu(dst); hash ^= net_hash_mix(net); hash = hash_32(hash, tcp_metrics_hash_log); @@ -318,7 +318,7 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, else return NULL; - net = dev_net_rcu(dst_dev(dst)); + net = dst_dev_net_rcu(dst); hash ^= net_hash_mix(net); hash = hash_32(hash, tcp_metrics_hash_log); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 2994c9222c9c..d1c9e4088646 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -401,7 +401,6 @@ void tcp_twsk_destructor(struct sock *sk) #endif tcp_ao_destroy_sock(sk, true); } -EXPORT_IPV6_MOD_GPL(tcp_twsk_destructor); void tcp_twsk_purge(struct list_head *net_exit_list) { diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index be5c2294610e..e6612bd84d09 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -434,8 +434,7 @@ static void tcp4_check_fraglist_gro(struct list_head *head, struct sk_buff *skb, inet_get_iif_sdif(skb, &iif, &sdif); iph = skb_gro_network_header(skb); net = dev_net_rcu(skb->dev); - sk = __inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, - iph->saddr, th->source, + sk = __inet_lookup_established(net, iph->saddr, th->source, iph->daddr, ntohs(th->dest), iif, sdif); NAPI_GRO_CB(skb)->is_flist = !sk; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index caf11920a878..e180364b8dda 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3578,9 +3578,8 @@ void sk_forced_mem_schedule(struct sock *sk, int size) sk_forward_alloc_add(sk, amt << PAGE_SHIFT); sk_memory_allocated_add(sk, amt); - if (mem_cgroup_sockets_enabled && sk->sk_memcg) - mem_cgroup_charge_skmem(sk->sk_memcg, amt, - gfp_memcg_charge() | __GFP_NOFAIL); + if (mem_cgroup_sk_enabled(sk)) + mem_cgroup_sk_charge(sk, amt, gfp_memcg_charge() | __GFP_NOFAIL); } /* Send a FIN. The caller locks the socket for us. @@ -3891,6 +3890,7 @@ static void tcp_connect_init(struct sock *sk) const struct dst_entry *dst = __sk_dst_get(sk); struct tcp_sock *tp = tcp_sk(sk); __u8 rcv_wscale; + u16 user_mss; u32 rcv_wnd; /* We'll fix this up when we get a response from the other end. @@ -3903,8 +3903,9 @@ static void tcp_connect_init(struct sock *sk) tcp_ao_connect_init(sk); /* If user gave his TCP_MAXSEG, record it to clamp */ - if (tp->rx_opt.user_mss) - tp->rx_opt.mss_clamp = tp->rx_opt.user_mss; + user_mss = READ_ONCE(tp->rx_opt.user_mss); + if (user_mss) + tp->rx_opt.mss_clamp = user_mss; tp->max_window = 0; tcp_mtup_init(sk); tcp_sync_mss(sk, dst_mtu(dst)); @@ -3955,7 +3956,7 @@ static void tcp_connect_init(struct sock *sk) WRITE_ONCE(tp->copied_seq, tp->rcv_nxt); inet_csk(sk)->icsk_rto = tcp_timeout_init(sk); - inet_csk(sk)->icsk_retransmits = 0; + WRITE_ONCE(inet_csk(sk)->icsk_retransmits, 0); tcp_clear_retrans(tp); } @@ -4393,13 +4394,13 @@ void tcp_send_probe0(struct sock *sk) if (tp->packets_out || tcp_write_queue_empty(sk)) { /* Cancel probe timer, if it is not required. */ - icsk->icsk_probes_out = 0; + WRITE_ONCE(icsk->icsk_probes_out, 0); icsk->icsk_backoff = 0; icsk->icsk_probes_tstamp = 0; return; } - icsk->icsk_probes_out++; + WRITE_ONCE(icsk->icsk_probes_out, icsk->icsk_probes_out + 1); if (err <= 0) { if (icsk->icsk_backoff < READ_ONCE(net->ipv4.sysctl_tcp_retries2)) icsk->icsk_backoff++; @@ -4437,7 +4438,7 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req) tcp_sk_rw(sk)->total_retrans++; } trace_tcp_retransmit_synack(sk, req); - req->num_retrans++; + WRITE_ONCE(req->num_retrans, req->num_retrans + 1); } return res; } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index a207877270fb..2dd73a4e8e51 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -392,7 +392,7 @@ static void tcp_probe_timer(struct sock *sk) int max_probes; if (tp->packets_out || !skb) { - icsk->icsk_probes_out = 0; + WRITE_ONCE(icsk->icsk_probes_out, 0); icsk->icsk_probes_tstamp = 0; return; } @@ -444,7 +444,7 @@ static void tcp_update_rto_stats(struct sock *sk) tp->total_rto_recoveries++; tp->rto_stamp = tcp_time_stamp_ms(tp); } - icsk->icsk_retransmits++; + WRITE_ONCE(icsk->icsk_retransmits, icsk->icsk_retransmits + 1); tp->total_rto++; } @@ -839,7 +839,7 @@ static void tcp_keepalive_timer(struct timer_list *t) goto out; } if (tcp_write_wakeup(sk, LINUX_MIB_TCPKEEPALIVE) <= 0) { - icsk->icsk_probes_out++; + WRITE_ONCE(icsk->icsk_probes_out, icsk->icsk_probes_out + 1); elapsed = keepalive_intvl_when(tp); } else { /* If keepalive was lost due to local congestion, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index cc3ce0f762ec..732bdad43626 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1787,7 +1787,7 @@ uncharge_drop: atomic_sub(skb->truesize, &sk->sk_rmem_alloc); drop: - atomic_inc(&sk->sk_drops); + sk_drops_inc(sk); busylock_release(busy); return err; } @@ -1852,7 +1852,7 @@ static struct sk_buff *__first_packet_length(struct sock *sk, IS_UDPLITE(sk)); __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, IS_UDPLITE(sk)); - atomic_inc(&sk->sk_drops); + sk_drops_inc(sk); __skb_unlink(skb, rcvq); *total += skb->truesize; kfree_skb_reason(skb, SKB_DROP_REASON_UDP_CSUM); @@ -2008,7 +2008,7 @@ try_again: __UDP_INC_STATS(net, UDP_MIB_CSUMERRORS, is_udplite); __UDP_INC_STATS(net, UDP_MIB_INERRORS, is_udplite); - atomic_inc(&sk->sk_drops); + sk_drops_inc(sk); kfree_skb_reason(skb, SKB_DROP_REASON_UDP_CSUM); goto try_again; } @@ -2078,7 +2078,7 @@ try_again: if (unlikely(err)) { if (!peeking) { - atomic_inc(&sk->sk_drops); + sk_drops_inc(sk); UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); } @@ -2449,7 +2449,7 @@ csum_error: __UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); drop: __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); - atomic_inc(&sk->sk_drops); + sk_drops_inc(sk); sk_skb_reason_drop(sk, skb, drop_reason); return -1; } @@ -2534,7 +2534,7 @@ start_lookup: nskb = skb_clone(skb, GFP_ATOMIC); if (unlikely(!nskb)) { - atomic_inc(&sk->sk_drops); + sk_drops_inc(sk); __UDP_INC_STATS(net, UDP_MIB_RCVBUFERRORS, IS_UDPLITE(sk)); __UDP_INC_STATS(net, UDP_MIB_INERRORS, @@ -3386,7 +3386,7 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f, from_kuid_munged(seq_user_ns(f), sk_uid(sp)), 0, sock_i_ino(sp), refcount_read(&sp->sk_refcnt), sp, - atomic_read(&sp->sk_drops)); + sk_drops_read(sp)); } int udp4_seq_show(struct seq_file *seq, void *v) diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 38cb3a28e4ed..6e491c720c90 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -16,9 +16,9 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *req, - struct nlattr *bc, bool net_admin) + bool net_admin) { - if (!inet_diag_bc_sk(bc, sk)) + if (!inet_diag_bc_sk(cb->data, sk)) return 0; return inet_sk_diag_fill(sk, NULL, skb, cb, req, NLM_F_MULTI, @@ -92,12 +92,8 @@ static void udp_dump(struct udp_table *table, struct sk_buff *skb, { bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); struct net *net = sock_net(skb->sk); - struct inet_diag_dump_data *cb_data; int num, s_num, slot, s_slot; - struct nlattr *bc; - cb_data = cb->data; - bc = cb_data->inet_diag_nla_bc; s_slot = cb->args[0]; num = s_num = cb->args[1]; @@ -130,7 +126,7 @@ static void udp_dump(struct udp_table *table, struct sk_buff *skb, r->id.idiag_dport) goto next; - if (sk_diag_dump(sk, skb, cb, r, bc, net_admin) < 0) { + if (sk_diag_dump(sk, skb, cb, r, net_admin) < 0) { spin_unlock_bh(&hslot->lock); goto done; } diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c index fce945f23069..54386e06a813 100644 --- a/net/ipv4/udp_tunnel_core.c +++ b/net/ipv4/udp_tunnel_core.c @@ -4,6 +4,7 @@ #include <linux/socket.h> #include <linux/kernel.h> #include <net/dst_metadata.h> +#include <net/flow.h> #include <net/udp.h> #include <net/udp_tunnel.h> #include <net/inet_dscp.h> @@ -253,7 +254,7 @@ struct rtable *udp_tunnel_dst_lookup(struct sk_buff *skb, fl4.saddr = key->u.ipv4.src; fl4.fl4_dport = dport; fl4.fl4_sport = sport; - fl4.flowi4_tos = tos & INET_DSCP_MASK; + fl4.flowi4_dscp = inet_dsfield_to_dscp(tos); fl4.flowi4_flags = key->flow_flags; rt = ip_route_output_key(net, &fl4); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 7fb6205619e7..58faf1ddd2b1 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -14,7 +14,7 @@ #include <linux/inetdevice.h> #include <net/dst.h> #include <net/xfrm.h> -#include <net/inet_dscp.h> +#include <net/flow.h> #include <net/ip.h> #include <net/l3mdev.h> @@ -25,7 +25,7 @@ static struct dst_entry *__xfrm4_dst_lookup(struct flowi4 *fl4, memset(fl4, 0, sizeof(*fl4)); fl4->daddr = params->daddr->a4; - fl4->flowi4_tos = inet_dscp_to_dsfield(params->dscp); + fl4->flowi4_dscp = params->dscp; fl4->flowi4_l3mdev = l3mdev_master_ifindex_by_index(params->net, params->oif); fl4->flowi4_mark = params->mark; diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 1c9c686d9522..b8f9a8c0302e 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -304,10 +304,9 @@ config IPV6_SEG6_LWTUNNEL config IPV6_SEG6_HMAC bool "IPv6: Segment Routing HMAC support" depends on IPV6 - select CRYPTO - select CRYPTO_HMAC - select CRYPTO_SHA1 - select CRYPTO_SHA256 + select CRYPTO_LIB_SHA1 + select CRYPTO_LIB_SHA256 + select CRYPTO_LIB_UTILS help Support for HMAC signature generation and verification of SR-enabled packets. diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f17a5dd4789f..40e9c336f6c5 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -7238,7 +7238,9 @@ static const struct ctl_table addrconf_sysctl[] = { .data = &ipv6_devconf.rpl_seg_enabled, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, { .procname = "ioam6_enabled", diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index f8a8e46286b8..52599584422b 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -104,7 +104,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) rcu_read_lock(); rt = rt6_lookup(net, addr, NULL, 0, NULL, 0); if (rt) { - dev = dst_dev(&rt->dst); + dev = dst_dev_rcu(&rt->dst); netdev_hold(dev, &dev_tracker, GFP_ATOMIC); ip6_rt_put(rt); } else if (ishost) { diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 972bf0426d59..33ebe93d80e3 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -1068,5 +1068,5 @@ void __ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp, 0, sock_i_ino(sp), refcount_read(&sp->sk_refcnt), sp, - atomic_read(&sp->sk_drops)); + sk_drops_read(sp)); } diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 72adfc107b55..e75da98f5283 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -149,8 +149,8 @@ static struct sock *esp6_find_tcp_sk(struct xfrm_state *x) dport = encap->encap_dport; spin_unlock_bh(&x->lock); - sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, &x->id.daddr.in6, - dport, &x->props.saddr.in6, ntohs(sport), 0, 0); + sk = __inet6_lookup_established(net, &x->id.daddr.in6, dport, + &x->props.saddr.in6, ntohs(sport), 0, 0); if (!sk) return ERR_PTR(-ENOENT); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 44550957fd4e..95cdd4cacb00 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -209,7 +209,8 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, * this lookup should be more aggressive (not longer than timeout). */ dst = ip6_route_output(net, sk, fl6); - dev = dst_dev(dst); + rcu_read_lock(); + dev = dst_dev_rcu(dst); if (dst->error) { IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); @@ -224,11 +225,10 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, if (rt->rt6i_dst.plen < 128) tmo >>= ((128 - rt->rt6i_dst.plen)>>5); - rcu_read_lock(); peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr); res = inet_peer_xrlim_allow(peer, tmo); - rcu_read_unlock(); } + rcu_read_unlock(); if (!res) __ICMP6_INC_STATS(net, ip6_dst_idev(dst), ICMP6_MIB_RATELIMITHOST); diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 76ee521189eb..a3a9ea49fee2 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -47,24 +47,23 @@ EXPORT_SYMBOL_GPL(inet6_ehashfn); * The sockhash lock must be held as a reader here. */ struct sock *__inet6_lookup_established(const struct net *net, - struct inet_hashinfo *hashinfo, - const struct in6_addr *saddr, - const __be16 sport, - const struct in6_addr *daddr, - const u16 hnum, - const int dif, const int sdif) + const struct in6_addr *saddr, + const __be16 sport, + const struct in6_addr *daddr, + const u16 hnum, + const int dif, const int sdif) { - struct sock *sk; - const struct hlist_nulls_node *node; const __portpair ports = INET_COMBINED_PORTS(sport, hnum); - /* Optimize here for direct hit, only listening connections can - * have wildcards anyways. - */ - unsigned int hash = inet6_ehashfn(net, daddr, hnum, saddr, sport); - unsigned int slot = hash & hashinfo->ehash_mask; - struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; - + const struct hlist_nulls_node *node; + struct inet_ehash_bucket *head; + struct inet_hashinfo *hashinfo; + unsigned int hash, slot; + struct sock *sk; + hashinfo = net->ipv4.tcp_death_row.hashinfo; + hash = inet6_ehashfn(net, daddr, hnum, saddr, sport); + slot = hash & hashinfo->ehash_mask; + head = &hashinfo->ehash[slot]; begin: sk_nulls_for_each_rcu(sk, node, &head->chain) { if (sk->sk_hash != hash) @@ -200,19 +199,20 @@ struct sock *inet6_lookup_run_sk_lookup(const struct net *net, EXPORT_SYMBOL_GPL(inet6_lookup_run_sk_lookup); struct sock *inet6_lookup_listener(const struct net *net, - struct inet_hashinfo *hashinfo, - struct sk_buff *skb, int doff, - const struct in6_addr *saddr, - const __be16 sport, const struct in6_addr *daddr, - const unsigned short hnum, const int dif, const int sdif) + struct sk_buff *skb, int doff, + const struct in6_addr *saddr, + const __be16 sport, + const struct in6_addr *daddr, + const unsigned short hnum, + const int dif, const int sdif) { struct inet_listen_hashbucket *ilb2; + struct inet_hashinfo *hashinfo; struct sock *result = NULL; unsigned int hash2; /* Lookup redirect from BPF */ - if (static_branch_unlikely(&bpf_sk_lookup_enabled) && - hashinfo == net->ipv4.tcp_death_row.hashinfo) { + if (static_branch_unlikely(&bpf_sk_lookup_enabled)) { result = inet6_lookup_run_sk_lookup(net, IPPROTO_TCP, skb, doff, saddr, sport, daddr, hnum, dif, inet6_ehashfn); @@ -220,6 +220,7 @@ struct sock *inet6_lookup_listener(const struct net *net, goto done; } + hashinfo = net->ipv4.tcp_death_row.hashinfo; hash2 = ipv6_portaddr_hash(net, daddr, hnum); ilb2 = inet_lhash2_bucket(hashinfo, hash2); @@ -244,7 +245,6 @@ done: EXPORT_SYMBOL_GPL(inet6_lookup_listener); struct sock *inet6_lookup(const struct net *net, - struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const __be16 dport, @@ -253,7 +253,7 @@ struct sock *inet6_lookup(const struct net *net, struct sock *sk; bool refcounted; - sk = __inet6_lookup(net, hashinfo, skb, doff, saddr, sport, daddr, + sk = __inet6_lookup(net, skb, doff, saddr, sport, daddr, ntohs(dport), dif, 0, &refcounted); if (sk && !refcounted && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; @@ -305,8 +305,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, dif, sdif))) { if (sk2->sk_state == TCP_TIME_WAIT) { tw = inet_twsk(sk2); - if (sk->sk_protocol == IPPROTO_TCP && - tcp_twsk_unique(sk, sk2, twp)) + if (tcp_twsk_unique(sk, sk2, twp)) break; } goto not_unique; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 74d49dd6124d..c82a75510c0e 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -329,9 +329,9 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net, if (parms->name[0]) { if (!dev_valid_name(parms->name)) return NULL; - strscpy(name, parms->name, IFNAMSIZ); + strscpy(name, parms->name); } else { - strcpy(name, "ip6gre%d"); + strscpy(name, "ip6gre%d"); } dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, ip6gre_tunnel_setup); @@ -1469,7 +1469,7 @@ static int ip6gre_tunnel_init_common(struct net_device *dev) tunnel = netdev_priv(dev); tunnel->dev = dev; - strcpy(tunnel->parms.name, dev->name); + strscpy(tunnel->parms.name, dev->name); ret = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL); if (ret) @@ -1529,7 +1529,7 @@ static void ip6gre_fb_tunnel_init(struct net_device *dev) tunnel->dev = dev; tunnel->net = dev_net(dev); - strcpy(tunnel->parms.name, dev->name); + strscpy(tunnel->parms.name, dev->name); tunnel->hlen = sizeof(struct ipv6hdr) + 4; } @@ -1842,7 +1842,7 @@ static int ip6erspan_tap_init(struct net_device *dev) tunnel = netdev_priv(dev); tunnel->dev = dev; - strcpy(tunnel->parms.name, dev->name); + strscpy(tunnel->parms.name, dev->name); ret = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL); if (ret) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 1e1410237b6e..9d64c13bab5e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -60,7 +60,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); - struct net_device *dev = dst_dev(dst); + struct net_device *dev = dst_dev_rcu(dst); struct inet6_dev *idev = ip6_dst_idev(dst); unsigned int hh_len = LL_RESERVED_SPACE(dev); const struct in6_addr *daddr, *nexthop; @@ -70,15 +70,12 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * /* Be paranoid, rather than too clever. */ if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) { - /* Make sure idev stays alive */ - rcu_read_lock(); + /* idev stays alive because we hold rcu_read_lock(). */ skb = skb_expand_head(skb, hh_len); if (!skb) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); - rcu_read_unlock(); return -ENOMEM; } - rcu_read_unlock(); } hdr = ipv6_hdr(skb); @@ -123,7 +120,6 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); - rcu_read_lock(); nexthop = rt6_nexthop(dst_rt6_info(dst), daddr); neigh = __ipv6_neigh_lookup_noref(dev, nexthop); @@ -131,7 +127,6 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * if (unlikely(!neigh)) neigh = __neigh_create(&nd_tbl, nexthop, dev, false); if (IS_ERR(neigh)) { - rcu_read_unlock(); IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES); kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_CREATEFAIL); return -EINVAL; @@ -139,7 +134,6 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * } sock_confirm_neigh(skb, neigh); ret = neigh_output(neigh, skb, false); - rcu_read_unlock(); return ret; } @@ -233,22 +227,29 @@ static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *s int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); - struct net_device *dev = dst_dev(dst), *indev = skb->dev; - struct inet6_dev *idev = ip6_dst_idev(dst); + struct net_device *dev, *indev = skb->dev; + struct inet6_dev *idev; + int ret; skb->protocol = htons(ETH_P_IPV6); + rcu_read_lock(); + dev = dst_dev_rcu(dst); + idev = ip6_dst_idev(dst); skb->dev = dev; if (unlikely(!idev || READ_ONCE(idev->cnf.disable_ipv6))) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); + rcu_read_unlock(); kfree_skb_reason(skb, SKB_DROP_REASON_IPV6DISABLED); return 0; } - return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, - net, sk, skb, indev, dev, - ip6_finish_output, - !(IP6CB(skb)->flags & IP6SKB_REROUTED)); + ret = NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, + net, sk, skb, indev, dev, + ip6_finish_output, + !(IP6CB(skb)->flags & IP6SKB_REROUTED)); + rcu_read_unlock(); + return ret; } EXPORT_SYMBOL(ip6_output); @@ -268,35 +269,36 @@ bool ip6_autoflowlabel(struct net *net, const struct sock *sk) int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority) { - struct net *net = sock_net(sk); const struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *first_hop = &fl6->daddr; struct dst_entry *dst = skb_dst(skb); - struct net_device *dev = dst_dev(dst); struct inet6_dev *idev = ip6_dst_idev(dst); struct hop_jumbo_hdr *hop_jumbo; int hoplen = sizeof(*hop_jumbo); + struct net *net = sock_net(sk); unsigned int head_room; + struct net_device *dev; struct ipv6hdr *hdr; u8 proto = fl6->flowi6_proto; int seg_len = skb->len; - int hlimit = -1; + int ret, hlimit = -1; u32 mtu; + rcu_read_lock(); + + dev = dst_dev_rcu(dst); head_room = sizeof(struct ipv6hdr) + hoplen + LL_RESERVED_SPACE(dev); if (opt) head_room += opt->opt_nflen + opt->opt_flen; if (unlikely(head_room > skb_headroom(skb))) { - /* Make sure idev stays alive */ - rcu_read_lock(); + /* idev stays alive while we hold rcu_read_lock(). */ skb = skb_expand_head(skb, head_room); if (!skb) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); - rcu_read_unlock(); - return -ENOBUFS; + ret = -ENOBUFS; + goto unlock; } - rcu_read_unlock(); } if (opt) { @@ -358,17 +360,21 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, * skb to its handler for processing */ skb = l3mdev_ip6_out((struct sock *)sk, skb); - if (unlikely(!skb)) - return 0; + if (unlikely(!skb)) { + ret = 0; + goto unlock; + } /* hooks should never assume socket lock is held. * we promote our socket to non const */ - return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, - net, (struct sock *)sk, skb, NULL, dev, - dst_output); + ret = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, + net, (struct sock *)sk, skb, NULL, dev, + dst_output); + goto unlock; } + ret = -EMSGSIZE; skb->dev = dev; /* ipv6_local_error() does not require socket lock, * we promote our socket to non const @@ -377,7 +383,9 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, IP6_INC_STATS(net, idev, IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); - return -EMSGSIZE; +unlock: + rcu_read_unlock(); + return ret; } EXPORT_SYMBOL(ip6_xmit); diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 36ca27496b3c..016b572e7d6f 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -169,6 +169,29 @@ static int unsolicited_report_interval(struct inet6_dev *idev) return iv > 0 ? iv : 1; } +static struct net_device *ip6_mc_find_dev(struct net *net, + const struct in6_addr *group, + int ifindex) +{ + struct net_device *dev = NULL; + struct rt6_info *rt; + + if (ifindex == 0) { + rcu_read_lock(); + rt = rt6_lookup(net, group, NULL, 0, NULL, 0); + if (rt) { + dev = dst_dev_rcu(&rt->dst); + dev_hold(dev); + ip6_rt_put(rt); + } + rcu_read_unlock(); + } else { + dev = dev_get_by_index(net, ifindex); + } + + return dev; +} + /* * socket join on multicast group */ @@ -191,28 +214,13 @@ static int __ipv6_sock_mc_join(struct sock *sk, int ifindex, } mc_lst = sock_kmalloc(sk, sizeof(struct ipv6_mc_socklist), GFP_KERNEL); - if (!mc_lst) return -ENOMEM; mc_lst->next = NULL; mc_lst->addr = *addr; - if (ifindex == 0) { - struct rt6_info *rt; - - rcu_read_lock(); - rt = rt6_lookup(net, addr, NULL, 0, NULL, 0); - if (rt) { - dev = dst_dev(&rt->dst); - dev_hold(dev); - ip6_rt_put(rt); - } - rcu_read_unlock(); - } else { - dev = dev_get_by_index(net, ifindex); - } - + dev = ip6_mc_find_dev(net, addr, ifindex); if (!dev) { sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); return -ENODEV; @@ -302,27 +310,14 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) } EXPORT_SYMBOL(ipv6_sock_mc_drop); -static struct inet6_dev *ip6_mc_find_dev(struct net *net, - const struct in6_addr *group, - int ifindex) +static struct inet6_dev *ip6_mc_find_idev(struct net *net, + const struct in6_addr *group, + int ifindex) { - struct net_device *dev = NULL; + struct net_device *dev; struct inet6_dev *idev; - if (ifindex == 0) { - struct rt6_info *rt; - - rcu_read_lock(); - rt = rt6_lookup(net, group, NULL, 0, NULL, 0); - if (rt) { - dev = dst_dev(&rt->dst); - dev_hold(dev); - ip6_rt_put(rt); - } - rcu_read_unlock(); - } else { - dev = dev_get_by_index(net, ifindex); - } + dev = ip6_mc_find_dev(net, group, ifindex); if (!dev) return NULL; @@ -374,7 +369,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk, if (!ipv6_addr_is_multicast(group)) return -EINVAL; - idev = ip6_mc_find_dev(net, group, pgsr->gsr_interface); + idev = ip6_mc_find_idev(net, group, pgsr->gsr_interface); if (!idev) return -ENODEV; @@ -509,7 +504,7 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, gsf->gf_fmode != MCAST_EXCLUDE) return -EINVAL; - idev = ip6_mc_find_dev(net, group, gsf->gf_interface); + idev = ip6_mc_find_idev(net, group, gsf->gf_interface); if (!idev) return -ENODEV; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 7d5abb3158ec..f427e41e9c49 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -130,7 +130,7 @@ struct neigh_table nd_tbl = { [NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ, [NEIGH_VAR_INTERVAL_PROBE_TIME_MS] = 5 * HZ, [NEIGH_VAR_GC_STALETIME] = 60 * HZ, - [NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_MAX, + [NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_DEFAULT, [NEIGH_VAR_PROXY_QLEN] = 64, [NEIGH_VAR_ANYCAST_DELAY] = 1 * HZ, [NEIGH_VAR_PROXY_DELAY] = (8 * HZ) / 10, @@ -505,7 +505,7 @@ void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr, ip6_nd_hdr(skb, saddr, daddr, READ_ONCE(inet6_sk(sk)->hop_limit), skb->len); - dev = dst_dev(dst); + dev = dst_dev_rcu(dst); idev = __in6_dev_get(dev); IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS); diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 45f9105f9ac1..46540a5a4331 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -63,7 +63,10 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff #ifdef CONFIG_XFRM if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && xfrm_decode_session(net, skb, flowi6_to_flowi(&fl6), AF_INET6) == 0) { - skb_dst_set(skb, NULL); + /* ignore return value from skb_dstref_steal, xfrm_lookup takes + * care of dropping the refcnt if needed. + */ + skb_dstref_steal(skb); dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0); if (IS_ERR(dst)) return PTR_ERR(dst); diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index cb2d38e80de9..6b022449f867 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -12,6 +12,19 @@ #include <linux/netfilter_ipv6.h> #include <linux/netfilter_bridge.h> +static struct ipv6hdr * +nf_reject_ip6hdr_put(struct sk_buff *nskb, + const struct sk_buff *oldskb, + __u8 protocol, int hoplimit); +static void +nf_reject_ip6_tcphdr_put(struct sk_buff *nskb, + const struct sk_buff *oldskb, + const struct tcphdr *oth, unsigned int otcplen); +static const struct tcphdr * +nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb, + struct tcphdr *otcph, + unsigned int *otcplen, int hook); + static bool nf_reject_v6_csum_ok(struct sk_buff *skb, int hook) { const struct ipv6hdr *ip6h = ipv6_hdr(skb); @@ -146,9 +159,10 @@ struct sk_buff *nf_reject_skb_v6_unreach(struct net *net, } EXPORT_SYMBOL_GPL(nf_reject_skb_v6_unreach); -const struct tcphdr *nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb, - struct tcphdr *otcph, - unsigned int *otcplen, int hook) +static const struct tcphdr * +nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb, + struct tcphdr *otcph, + unsigned int *otcplen, int hook) { const struct ipv6hdr *oip6h = ipv6_hdr(oldskb); u8 proto; @@ -192,11 +206,11 @@ const struct tcphdr *nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb, return otcph; } -EXPORT_SYMBOL_GPL(nf_reject_ip6_tcphdr_get); -struct ipv6hdr *nf_reject_ip6hdr_put(struct sk_buff *nskb, - const struct sk_buff *oldskb, - __u8 protocol, int hoplimit) +static struct ipv6hdr * +nf_reject_ip6hdr_put(struct sk_buff *nskb, + const struct sk_buff *oldskb, + __u8 protocol, int hoplimit) { struct ipv6hdr *ip6h; const struct ipv6hdr *oip6h = ipv6_hdr(oldskb); @@ -216,11 +230,11 @@ struct ipv6hdr *nf_reject_ip6hdr_put(struct sk_buff *nskb, return ip6h; } -EXPORT_SYMBOL_GPL(nf_reject_ip6hdr_put); -void nf_reject_ip6_tcphdr_put(struct sk_buff *nskb, - const struct sk_buff *oldskb, - const struct tcphdr *oth, unsigned int otcplen) +static void +nf_reject_ip6_tcphdr_put(struct sk_buff *nskb, + const struct sk_buff *oldskb, + const struct tcphdr *oth, unsigned int otcplen) { struct tcphdr *tcph; @@ -248,7 +262,6 @@ void nf_reject_ip6_tcphdr_put(struct sk_buff *nskb, csum_partial(tcph, sizeof(struct tcphdr), 0)); } -EXPORT_SYMBOL_GPL(nf_reject_ip6_tcphdr_put); static int nf_reject6_fill_skb_dst(struct sk_buff *skb_in) { diff --git a/net/ipv6/netfilter/nf_socket_ipv6.c b/net/ipv6/netfilter/nf_socket_ipv6.c index 9ea5ef56cb27..ced8bd44828e 100644 --- a/net/ipv6/netfilter/nf_socket_ipv6.c +++ b/net/ipv6/netfilter/nf_socket_ipv6.c @@ -83,8 +83,7 @@ nf_socket_get_sock_v6(struct net *net, struct sk_buff *skb, int doff, { switch (protocol) { case IPPROTO_TCP: - return inet6_lookup(net, net->ipv4.tcp_death_row.hashinfo, - skb, doff, saddr, sport, daddr, dport, + return inet6_lookup(net, skb, doff, saddr, sport, daddr, dport, in->ifindex); case IPPROTO_UDP: return udp6_lib_lookup(net, saddr, sport, daddr, dport, diff --git a/net/ipv6/netfilter/nf_tproxy_ipv6.c b/net/ipv6/netfilter/nf_tproxy_ipv6.c index 52f828bb5a83..b2f59ed9d7cc 100644 --- a/net/ipv6/netfilter/nf_tproxy_ipv6.c +++ b/net/ipv6/netfilter/nf_tproxy_ipv6.c @@ -80,7 +80,6 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, const struct net_device *in, const enum nf_tproxy_lookup_t lookup_type) { - struct inet_hashinfo *hinfo = net->ipv4.tcp_death_row.hashinfo; struct sock *sk; switch (protocol) { @@ -94,7 +93,7 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, switch (lookup_type) { case NF_TPROXY_LOOKUP_LISTENER: - sk = inet6_lookup_listener(net, hinfo, skb, + sk = inet6_lookup_listener(net, skb, thoff + __tcp_hdrlen(hp), saddr, sport, daddr, ntohs(dport), @@ -109,7 +108,7 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, */ break; case NF_TPROXY_LOOKUP_ESTABLISHED: - sk = __inet6_lookup_established(net, hinfo, saddr, sport, daddr, + sk = __inet6_lookup_established(net, saddr, sport, daddr, ntohs(dport), in->ifindex, 0); break; default: diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index d21fe27fe21e..1c9b283a4132 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -104,18 +104,20 @@ EXPORT_SYMBOL(ip6_find_1stfragopt); int ip6_dst_hoplimit(struct dst_entry *dst) { int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT); + + rcu_read_lock(); if (hoplimit == 0) { - struct net_device *dev = dst_dev(dst); + struct net_device *dev = dst_dev_rcu(dst); struct inet6_dev *idev; - rcu_read_lock(); idev = __in6_dev_get(dev); if (idev) hoplimit = READ_ONCE(idev->cnf.hop_limit); else hoplimit = READ_ONCE(dev_net(dev)->ipv6.devconf_all->hop_limit); - rcu_read_unlock(); } + rcu_read_unlock(); + return hoplimit; } EXPORT_SYMBOL(ip6_dst_hoplimit); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 82b0492923d4..d7a2cdaa2631 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -208,7 +208,6 @@ struct proto pingv6_prot = { .recvmsg = ping_recvmsg, .bind = ping_bind, .backlog_rcv = ping_queue_rcv_skb, - .hash = ping_hash, .unhash = ping_unhash, .get_port = ping_get_port, .put_port = ping_unhash, diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 4c3f8245c40f..4ae07a67b4d4 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -163,7 +163,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) if (atomic_read(&sk->sk_rmem_alloc) >= READ_ONCE(sk->sk_rcvbuf)) { - atomic_inc(&sk->sk_drops); + sk_drops_inc(sk); continue; } @@ -361,7 +361,7 @@ static inline int rawv6_rcv_skb(struct sock *sk, struct sk_buff *skb) if ((raw6_sk(sk)->checksum || rcu_access_pointer(sk->sk_filter)) && skb_checksum_complete(skb)) { - atomic_inc(&sk->sk_drops); + sk_drops_inc(sk); sk_skb_reason_drop(sk, skb, SKB_DROP_REASON_SKB_CSUM); return NET_RX_DROP; } @@ -389,7 +389,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb) struct raw6_sock *rp = raw6_sk(sk); if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { - atomic_inc(&sk->sk_drops); + sk_drops_inc(sk); sk_skb_reason_drop(sk, skb, SKB_DROP_REASON_XFRM_POLICY); return NET_RX_DROP; } @@ -414,7 +414,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb) if (inet_test_bit(HDRINCL, sk)) { if (skb_checksum_complete(skb)) { - atomic_inc(&sk->sk_drops); + sk_drops_inc(sk); sk_skb_reason_drop(sk, skb, SKB_DROP_REASON_SKB_CSUM); return NET_RX_DROP; } @@ -1175,6 +1175,7 @@ static int rawv6_init_sk(struct sock *sk) { struct raw6_sock *rp = raw6_sk(sk); + sk->sk_drop_counters = &rp->drop_counters; switch (inet_sk(sk)->inet_num) { case IPPROTO_ICMPV6: rp->checksum = 1; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 3299cfa12e21..3371f16b7a3e 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2943,7 +2943,7 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, if (res.f6i->nh) { struct fib6_nh_match_arg arg = { - .dev = dst_dev(dst), + .dev = dst_dev_rcu(dst), .gw = &rt6->rt6i_gateway, }; @@ -3238,7 +3238,6 @@ EXPORT_SYMBOL_GPL(ip6_sk_redirect); static unsigned int ip6_default_advmss(const struct dst_entry *dst) { - struct net_device *dev = dst_dev(dst); unsigned int mtu = dst_mtu(dst); struct net *net; @@ -3246,7 +3245,7 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst) rcu_read_lock(); - net = dev_net_rcu(dev); + net = dst_dev_net_rcu(dst); if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss) mtu = net->ipv6.sysctl.ip6_rt_min_advmss; @@ -4301,7 +4300,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu if (res.f6i->nh) { struct fib6_nh_match_arg arg = { - .dev = dst_dev(dst), + .dev = dst_dev_rcu(dst), .gw = &rt->rt6i_gateway, }; diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index 180da19c148c..a5c4c629b788 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -522,16 +522,10 @@ int __init seg6_init(void) if (err) goto out_unregister_iptun; - err = seg6_hmac_init(); - if (err) - goto out_unregister_seg6; - pr_info("Segment Routing with IPv6\n"); out: return err; -out_unregister_seg6: - seg6_local_exit(); out_unregister_iptun: seg6_iptunnel_exit(); out_unregister_genl: @@ -543,7 +537,6 @@ out_unregister_pernet: void seg6_exit(void) { - seg6_hmac_exit(); seg6_local_exit(); seg6_iptunnel_exit(); genl_unregister_family(&seg6_genl_family); diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c index fd58426f222b..ee6bac0160ac 100644 --- a/net/ipv6/seg6_hmac.c +++ b/net/ipv6/seg6_hmac.c @@ -16,7 +16,6 @@ #include <linux/in6.h> #include <linux/icmpv6.h> #include <linux/mroute6.h> -#include <linux/slab.h> #include <linux/rhashtable.h> #include <linux/netfilter.h> @@ -34,7 +33,8 @@ #include <net/addrconf.h> #include <net/xfrm.h> -#include <crypto/hash.h> +#include <crypto/sha1.h> +#include <crypto/sha2.h> #include <crypto/utils.h> #include <net/seg6.h> #include <net/genetlink.h> @@ -78,17 +78,6 @@ static const struct rhashtable_params rht_params = { .obj_cmpfn = seg6_hmac_cmpfn, }; -static struct seg6_hmac_algo hmac_algos[] = { - { - .alg_id = SEG6_HMAC_ALGO_SHA1, - .name = "hmac(sha1)", - }, - { - .alg_id = SEG6_HMAC_ALGO_SHA256, - .name = "hmac(sha256)", - }, -}; - static struct sr6_tlv_hmac *seg6_get_tlv_hmac(struct ipv6_sr_hdr *srh) { struct sr6_tlv_hmac *tlv; @@ -108,75 +97,13 @@ static struct sr6_tlv_hmac *seg6_get_tlv_hmac(struct ipv6_sr_hdr *srh) return tlv; } -static struct seg6_hmac_algo *__hmac_get_algo(u8 alg_id) -{ - struct seg6_hmac_algo *algo; - int i, alg_count; - - alg_count = ARRAY_SIZE(hmac_algos); - for (i = 0; i < alg_count; i++) { - algo = &hmac_algos[i]; - if (algo->alg_id == alg_id) - return algo; - } - - return NULL; -} - -static int __do_hmac(struct seg6_hmac_info *hinfo, const char *text, u8 psize, - u8 *output, int outlen) -{ - struct seg6_hmac_algo *algo; - struct crypto_shash *tfm; - struct shash_desc *shash; - int ret, dgsize; - - algo = __hmac_get_algo(hinfo->alg_id); - if (!algo) - return -ENOENT; - - tfm = *this_cpu_ptr(algo->tfms); - - dgsize = crypto_shash_digestsize(tfm); - if (dgsize > outlen) { - pr_debug("sr-ipv6: __do_hmac: digest size too big (%d / %d)\n", - dgsize, outlen); - return -ENOMEM; - } - - ret = crypto_shash_setkey(tfm, hinfo->secret, hinfo->slen); - if (ret < 0) { - pr_debug("sr-ipv6: crypto_shash_setkey failed: err %d\n", ret); - goto failed; - } - - shash = *this_cpu_ptr(algo->shashs); - shash->tfm = tfm; - - ret = crypto_shash_digest(shash, text, psize, output); - if (ret < 0) { - pr_debug("sr-ipv6: crypto_shash_digest failed: err %d\n", ret); - goto failed; - } - - return dgsize; - -failed: - return ret; -} - int seg6_hmac_compute(struct seg6_hmac_info *hinfo, struct ipv6_sr_hdr *hdr, struct in6_addr *saddr, u8 *output) { __be32 hmackeyid = cpu_to_be32(hinfo->hmackeyid); - u8 tmp_out[SEG6_HMAC_MAX_DIGESTSIZE]; - int plen, i, dgsize, wrsize; + int plen, i, ret = 0; char *ring, *off; - /* a 160-byte buffer for digest output allows to store highest known - * hash function (RadioGatun) with up to 1216 bits - */ - /* saddr(16) + first_seg(1) + flags(1) + keyid(4) + seglist(16n) */ plen = 16 + 1 + 1 + 4 + (hdr->first_segment + 1) * 16; @@ -219,22 +146,25 @@ int seg6_hmac_compute(struct seg6_hmac_info *hinfo, struct ipv6_sr_hdr *hdr, off += 16; } - dgsize = __do_hmac(hinfo, ring, plen, tmp_out, - SEG6_HMAC_MAX_DIGESTSIZE); + switch (hinfo->alg_id) { + case SEG6_HMAC_ALGO_SHA1: + hmac_sha1(&hinfo->key.sha1, ring, plen, output); + static_assert(SEG6_HMAC_FIELD_LEN > SHA1_DIGEST_SIZE); + memset(&output[SHA1_DIGEST_SIZE], 0, + SEG6_HMAC_FIELD_LEN - SHA1_DIGEST_SIZE); + break; + case SEG6_HMAC_ALGO_SHA256: + hmac_sha256(&hinfo->key.sha256, ring, plen, output); + static_assert(SEG6_HMAC_FIELD_LEN == SHA256_DIGEST_SIZE); + break; + default: + WARN_ON_ONCE(1); + ret = -EINVAL; + break; + } local_unlock_nested_bh(&hmac_storage.bh_lock); local_bh_enable(); - - if (dgsize < 0) - return dgsize; - - wrsize = SEG6_HMAC_FIELD_LEN; - if (wrsize > dgsize) - wrsize = dgsize; - - memset(output, 0, SEG6_HMAC_FIELD_LEN); - memcpy(output, tmp_out, wrsize); - - return 0; + return ret; } EXPORT_SYMBOL(seg6_hmac_compute); @@ -305,8 +235,18 @@ int seg6_hmac_info_add(struct net *net, u32 key, struct seg6_hmac_info *hinfo) struct seg6_pernet_data *sdata = seg6_pernet(net); int err; - if (!__hmac_get_algo(hinfo->alg_id)) + switch (hinfo->alg_id) { + case SEG6_HMAC_ALGO_SHA1: + hmac_sha1_preparekey(&hinfo->key.sha1, + hinfo->secret, hinfo->slen); + break; + case SEG6_HMAC_ALGO_SHA256: + hmac_sha256_preparekey(&hinfo->key.sha256, + hinfo->secret, hinfo->slen); + break; + default: return -EINVAL; + } err = rhashtable_lookup_insert_fast(&sdata->hmac_infos, &hinfo->node, rht_params); @@ -363,65 +303,6 @@ out: } EXPORT_SYMBOL(seg6_push_hmac); -static int seg6_hmac_init_algo(void) -{ - struct seg6_hmac_algo *algo; - struct crypto_shash *tfm; - struct shash_desc *shash; - int i, alg_count, cpu; - int ret = -ENOMEM; - - alg_count = ARRAY_SIZE(hmac_algos); - - for (i = 0; i < alg_count; i++) { - struct crypto_shash **p_tfm; - int shsize; - - algo = &hmac_algos[i]; - algo->tfms = alloc_percpu(struct crypto_shash *); - if (!algo->tfms) - goto error_out; - - for_each_possible_cpu(cpu) { - tfm = crypto_alloc_shash(algo->name, 0, 0); - if (IS_ERR(tfm)) { - ret = PTR_ERR(tfm); - goto error_out; - } - p_tfm = per_cpu_ptr(algo->tfms, cpu); - *p_tfm = tfm; - } - - p_tfm = raw_cpu_ptr(algo->tfms); - tfm = *p_tfm; - - shsize = sizeof(*shash) + crypto_shash_descsize(tfm); - - algo->shashs = alloc_percpu(struct shash_desc *); - if (!algo->shashs) - goto error_out; - - for_each_possible_cpu(cpu) { - shash = kzalloc_node(shsize, GFP_KERNEL, - cpu_to_node(cpu)); - if (!shash) - goto error_out; - *per_cpu_ptr(algo->shashs, cpu) = shash; - } - } - - return 0; - -error_out: - seg6_hmac_exit(); - return ret; -} - -int __init seg6_hmac_init(void) -{ - return seg6_hmac_init_algo(); -} - int __net_init seg6_hmac_net_init(struct net *net) { struct seg6_pernet_data *sdata = seg6_pernet(net); @@ -429,36 +310,6 @@ int __net_init seg6_hmac_net_init(struct net *net) return rhashtable_init(&sdata->hmac_infos, &rht_params); } -void seg6_hmac_exit(void) -{ - struct seg6_hmac_algo *algo = NULL; - struct crypto_shash *tfm; - struct shash_desc *shash; - int i, alg_count, cpu; - - alg_count = ARRAY_SIZE(hmac_algos); - for (i = 0; i < alg_count; i++) { - algo = &hmac_algos[i]; - - if (algo->shashs) { - for_each_possible_cpu(cpu) { - shash = *per_cpu_ptr(algo->shashs, cpu); - kfree(shash); - } - free_percpu(algo->shashs); - } - - if (algo->tfms) { - for_each_possible_cpu(cpu) { - tfm = *per_cpu_ptr(algo->tfms, cpu); - crypto_free_shash(tfm); - } - free_percpu(algo->tfms); - } - } -} -EXPORT_SYMBOL(seg6_hmac_exit); - void __net_exit seg6_hmac_net_exit(struct net *net) { struct seg6_pernet_data *sdata = seg6_pernet(net); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 12496ba1b7d4..cf37ad9686e6 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -848,6 +848,49 @@ static inline __be32 try_6rd(struct ip_tunnel *tunnel, return dst; } +static bool ipip6_tunnel_dst_find(struct sk_buff *skb, __be32 *dst, + bool is_isatap) +{ + const struct ipv6hdr *iph6 = ipv6_hdr(skb); + struct neighbour *neigh = NULL; + const struct in6_addr *addr6; + bool found = false; + int addr_type; + + if (skb_dst(skb)) + neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); + + if (!neigh) { + net_dbg_ratelimited("nexthop == NULL\n"); + return false; + } + + addr6 = (const struct in6_addr *)&neigh->primary_key; + addr_type = ipv6_addr_type(addr6); + + if (is_isatap) { + if ((addr_type & IPV6_ADDR_UNICAST) && + ipv6_addr_is_isatap(addr6)) { + *dst = addr6->s6_addr32[3]; + found = true; + } + } else { + if (addr_type == IPV6_ADDR_ANY) { + addr6 = &ipv6_hdr(skb)->daddr; + addr_type = ipv6_addr_type(addr6); + } + + if ((addr_type & IPV6_ADDR_COMPATv4) != 0) { + *dst = addr6->s6_addr32[3]; + found = true; + } + } + + neigh_release(neigh); + + return found; +} + /* * This function assumes it is being called from dev_queue_xmit() * and that skb is filled properly by that function. @@ -867,8 +910,6 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, __be32 dst = tiph->daddr; struct flowi4 fl4; int mtu; - const struct in6_addr *addr6; - int addr_type; u8 ttl; u8 protocol = IPPROTO_IPV6; int t_hlen = tunnel->hlen + sizeof(struct iphdr); @@ -877,64 +918,15 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, tos = ipv6_get_dsfield(iph6); /* ISATAP (RFC4214) - must come before 6to4 */ - if (dev->priv_flags & IFF_ISATAP) { - struct neighbour *neigh = NULL; - bool do_tx_error = false; - - if (skb_dst(skb)) - neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); - - if (!neigh) { - net_dbg_ratelimited("nexthop == NULL\n"); - goto tx_error; - } - - addr6 = (const struct in6_addr *)&neigh->primary_key; - addr_type = ipv6_addr_type(addr6); - - if ((addr_type & IPV6_ADDR_UNICAST) && - ipv6_addr_is_isatap(addr6)) - dst = addr6->s6_addr32[3]; - else - do_tx_error = true; - - neigh_release(neigh); - if (do_tx_error) - goto tx_error; - } + if ((dev->priv_flags & IFF_ISATAP) && + !ipip6_tunnel_dst_find(skb, &dst, true)) + goto tx_error; if (!dst) dst = try_6rd(tunnel, &iph6->daddr); - if (!dst) { - struct neighbour *neigh = NULL; - bool do_tx_error = false; - - if (skb_dst(skb)) - neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); - - if (!neigh) { - net_dbg_ratelimited("nexthop == NULL\n"); - goto tx_error; - } - - addr6 = (const struct in6_addr *)&neigh->primary_key; - addr_type = ipv6_addr_type(addr6); - - if (addr_type == IPV6_ADDR_ANY) { - addr6 = &ipv6_hdr(skb)->daddr; - addr_type = ipv6_addr_type(addr6); - } - - if ((addr_type & IPV6_ADDR_COMPATv4) != 0) - dst = addr6->s6_addr32[3]; - else - do_tx_error = true; - - neigh_release(neigh); - if (do_tx_error) - goto tx_error; - } + if (!dst && !ipip6_tunnel_dst_find(skb, &dst, false)) + goto tx_error; flowi4_init_output(&fl4, tunnel->parms.link, tunnel->fwmark, tos & INET_DSCP_MASK, RT_SCOPE_UNIVERSE, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index e885629312a4..0562e939b2e3 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -388,8 +388,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, bool fatal; int err; - sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, - &hdr->daddr, th->dest, + sk = __inet6_lookup_established(net, &hdr->daddr, th->dest, &hdr->saddr, ntohs(th->source), skb->dev->ifindex, inet6_sdif(skb)); @@ -1073,8 +1072,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb, * Incoming packet is checked with md5 hash with finding key, * no RST generated if md5 hash doesn't match. */ - sk1 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo, - NULL, 0, &ipv6h->saddr, th->source, + sk1 = inet6_lookup_listener(net, NULL, 0, &ipv6h->saddr, th->source, &ipv6h->daddr, ntohs(th->source), dif, sdif); if (!sk1) @@ -1787,7 +1785,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) hdr = ipv6_hdr(skb); lookup: - sk = __inet6_lookup_skb(net->ipv4.tcp_death_row.hashinfo, skb, __tcp_hdrlen(th), + sk = __inet6_lookup_skb(skb, __tcp_hdrlen(th), th->source, th->dest, inet6_iif(skb), sdif, &refcounted); if (!sk) @@ -1809,7 +1807,7 @@ lookup: &hdr->saddr, &hdr->daddr, AF_INET6, dif, sdif); if (drop_reason) { - sk_drops_add(sk, skb); + sk_drops_skbadd(sk, skb); reqsk_put(req); goto discard_it; } @@ -1948,7 +1946,7 @@ discard_it: return 0; discard_and_relse: - sk_drops_add(sk, skb); + sk_drops_skbadd(sk, skb); if (refcounted) sock_put(sk); goto discard_it; @@ -1974,8 +1972,7 @@ do_time_wait: { struct sock *sk2; - sk2 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo, - skb, __tcp_hdrlen(th), + sk2 = inet6_lookup_listener(net, skb, __tcp_hdrlen(th), &ipv6_hdr(skb)->saddr, th->source, &ipv6_hdr(skb)->daddr, ntohs(th->dest), @@ -2027,8 +2024,7 @@ void tcp_v6_early_demux(struct sk_buff *skb) return; /* Note : We use inet6_iif() here, not tcp_v6_iif() */ - sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, - &hdr->saddr, th->source, + sk = __inet6_lookup_established(net, &hdr->saddr, th->source, &hdr->daddr, ntohs(th->dest), inet6_iif(skb), inet6_sdif(skb)); if (sk) { @@ -2048,7 +2044,6 @@ void tcp_v6_early_demux(struct sk_buff *skb) static struct timewait_sock_ops tcp6_timewait_sock_ops = { .twsk_obj_size = sizeof(struct tcp6_timewait_sock), - .twsk_destructor = tcp_twsk_destructor, }; INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb) @@ -2228,9 +2223,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) rx_queue, timer_active, jiffies_delta_to_clock_t(timer_expires - jiffies), - icsk->icsk_retransmits, + READ_ONCE(icsk->icsk_retransmits), from_kuid_munged(seq_user_ns(seq), sk_uid(sp)), - icsk->icsk_probes_out, + READ_ONCE(icsk->icsk_probes_out), sock_i_ino(sp), refcount_read(&sp->sk_refcnt), sp, jiffies_to_clock_t(icsk->icsk_rto), @@ -2356,7 +2351,6 @@ struct proto tcpv6_prot = { .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc, .memory_pressure = &tcp_memory_pressure, - .orphan_count = &tcp_orphan_count, .sysctl_mem = sysctl_tcp_mem, .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index a8a04f441e78..effeba58630b 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -36,8 +36,7 @@ static void tcp6_check_fraglist_gro(struct list_head *head, struct sk_buff *skb, inet6_get_iif_sdif(skb, &iif, &sdif); hdr = skb_gro_network_header(skb); net = dev_net_rcu(skb->dev); - sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, - &hdr->saddr, th->source, + sk = __inet6_lookup_established(net, &hdr->saddr, th->source, &hdr->daddr, ntohs(th->dest), iif, sdif); NAPI_GRO_CB(skb)->is_flist = !sk; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 6a68f77da44b..a35ee6d693a8 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -524,7 +524,7 @@ try_again: } if (unlikely(err)) { if (!peeking) { - atomic_inc(&sk->sk_drops); + sk_drops_inc(sk); SNMP_INC_STATS(mib, UDP_MIB_INERRORS); } kfree_skb(skb); @@ -908,7 +908,7 @@ csum_error: __UDP6_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); drop: __UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); - atomic_inc(&sk->sk_drops); + sk_drops_inc(sk); sk_skb_reason_drop(sk, skb, drop_reason); return -1; } @@ -1013,7 +1013,7 @@ start_lookup: } nskb = skb_clone(skb, GFP_ATOMIC); if (unlikely(!nskb)) { - atomic_inc(&sk->sk_drops); + sk_drops_inc(sk); __UDP6_INC_STATS(net, UDP_MIB_RCVBUFERRORS, IS_UDPLITE(sk)); __UDP6_INC_STATS(net, UDP_MIB_INERRORS, diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index cc2b3c44bc05..6c717a7ef292 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1187,7 +1187,7 @@ static void iucv_process_message(struct sock *sk, struct sk_buff *skb, IUCV_SKB_CB(skb)->offset = 0; if (sk_filter(sk, skb)) { - atomic_inc(&sk->sk_drops); /* skb rejected by filter */ + sk_drops_inc(sk); /* skb rejected by filter */ kfree_skb(skb); return; } @@ -2011,7 +2011,7 @@ static int afiucv_hs_callback_rx(struct sock *sk, struct sk_buff *skb) skb_reset_network_header(skb); IUCV_SKB_CB(skb)->offset = 0; if (sk_filter(sk, skb)) { - atomic_inc(&sk->sk_drops); /* skb rejected by filter */ + sk_drops_inc(sk); /* skb rejected by filter */ kfree_skb(skb); return NET_RX_SUCCESS; } diff --git a/net/mptcp/crypto.c b/net/mptcp/crypto.c index b08ba959ac4f..31948e18d97d 100644 --- a/net/mptcp/crypto.c +++ b/net/mptcp/crypto.c @@ -22,7 +22,6 @@ #include <linux/kernel.h> #include <crypto/sha2.h> -#include <linux/unaligned.h> #include "protocol.h" @@ -43,39 +42,9 @@ void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn) void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac) { - u8 input[SHA256_BLOCK_SIZE + SHA256_DIGEST_SIZE]; - u8 key1be[8]; - u8 key2be[8]; - int i; + __be64 key[2] = { cpu_to_be64(key1), cpu_to_be64(key2) }; - if (WARN_ON_ONCE(len > SHA256_DIGEST_SIZE)) - len = SHA256_DIGEST_SIZE; - - put_unaligned_be64(key1, key1be); - put_unaligned_be64(key2, key2be); - - /* Generate key xored with ipad */ - memset(input, 0x36, SHA256_BLOCK_SIZE); - for (i = 0; i < 8; i++) - input[i] ^= key1be[i]; - for (i = 0; i < 8; i++) - input[i + 8] ^= key2be[i]; - - memcpy(&input[SHA256_BLOCK_SIZE], msg, len); - - /* emit sha256(K1 || msg) on the second input block, so we can - * reuse 'input' for the last hashing - */ - sha256(input, SHA256_BLOCK_SIZE + len, &input[SHA256_BLOCK_SIZE]); - - /* Prepare second part of hmac */ - memset(input, 0x5C, SHA256_BLOCK_SIZE); - for (i = 0; i < 8; i++) - input[i] ^= key1be[i]; - for (i = 0; i < 8; i++) - input[i + 8] ^= key2be[i]; - - sha256(input, SHA256_BLOCK_SIZE + SHA256_DIGEST_SIZE, hmac); + hmac_sha256_usingrawkey((const u8 *)key, sizeof(key), msg, len, hmac); } #if IS_MODULE(CONFIG_MPTCP_KUNIT_TEST) diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c index 0566dd793810..ac974299de71 100644 --- a/net/mptcp/mptcp_diag.c +++ b/net/mptcp/mptcp_diag.c @@ -15,9 +15,9 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *req, - struct nlattr *bc, bool net_admin) + bool net_admin) { - if (!inet_diag_bc_sk(bc, sk)) + if (!inet_diag_bc_sk(cb->data, sk)) return 0; return inet_sk_diag_fill(sk, inet_csk(sk), skb, cb, req, NLM_F_MULTI, @@ -76,9 +76,7 @@ static void mptcp_diag_dump_listeners(struct sk_buff *skb, struct netlink_callba const struct inet_diag_req_v2 *r, bool net_admin) { - struct inet_diag_dump_data *cb_data = cb->data; struct mptcp_diag_ctx *diag_ctx = (void *)cb->ctx; - struct nlattr *bc = cb_data->inet_diag_nla_bc; struct net *net = sock_net(skb->sk); struct inet_hashinfo *hinfo; int i; @@ -121,7 +119,7 @@ static void mptcp_diag_dump_listeners(struct sk_buff *skb, struct netlink_callba if (!refcount_inc_not_zero(&sk->sk_refcnt)) goto next_listen; - ret = sk_diag_dump(sk, skb, cb, r, bc, net_admin); + ret = sk_diag_dump(sk, skb, cb, r, net_admin); sock_put(sk); @@ -154,15 +152,10 @@ static void mptcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); struct mptcp_diag_ctx *diag_ctx = (void *)cb->ctx; struct net *net = sock_net(skb->sk); - struct inet_diag_dump_data *cb_data; struct mptcp_sock *msk; - struct nlattr *bc; BUILD_BUG_ON(sizeof(cb->ctx) < sizeof(*diag_ctx)); - cb_data = cb->data; - bc = cb_data->inet_diag_nla_bc; - while ((msk = mptcp_token_iter_next(net, &diag_ctx->s_slot, &diag_ctx->s_num)) != NULL) { struct inet_sock *inet = (struct inet_sock *)msk; @@ -181,7 +174,7 @@ static void mptcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, r->id.idiag_dport) goto next; - ret = sk_diag_dump(sk, skb, cb, r, bc, net_admin); + ret = sk_diag_dump(sk, skb, cb, r, net_admin); next: sock_put(sk); if (ret < 0) { diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index e6fd97b21e9e..7e9eb0ab21c3 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -12,6 +12,7 @@ #include <linux/sched/signal.h> #include <linux/atomic.h> #include <net/aligned_data.h> +#include <net/rps.h> #include <net/sock.h> #include <net/inet_common.h> #include <net/inet_hashtables.h> @@ -137,7 +138,7 @@ struct sock *__mptcp_nmpc_sk(struct mptcp_sock *msk) static void mptcp_drop(struct sock *sk, struct sk_buff *skb) { - sk_drops_add(sk, skb); + sk_drops_skbadd(sk, skb); __kfree_skb(skb); } @@ -1740,6 +1741,20 @@ static u32 mptcp_send_limit(const struct sock *sk) return limit - not_sent; } +static void mptcp_rps_record_subflows(const struct mptcp_sock *msk) +{ + struct mptcp_subflow_context *subflow; + + if (!rfs_is_needed()) + return; + + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + + sock_rps_record_flow(ssk); + } +} + static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct mptcp_sock *msk = mptcp_sk(sk); @@ -1753,6 +1768,8 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) lock_sock(sk); + mptcp_rps_record_subflows(msk); + if (unlikely(inet_test_bit(DEFER_CONNECT, sk) || msg->msg_flags & MSG_FASTOPEN)) { int copied_syn = 0; @@ -2131,6 +2148,8 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, goto out_err; } + mptcp_rps_record_subflows(msk); + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); len = min_t(size_t, len, INT_MAX); @@ -2587,7 +2606,8 @@ static void __mptcp_retrans(struct sock *sk) if (mptcp_data_fin_enabled(msk)) { struct inet_connection_sock *icsk = inet_csk(sk); - icsk->icsk_retransmits++; + WRITE_ONCE(icsk->icsk_retransmits, + icsk->icsk_retransmits + 1); mptcp_set_datafin_timeout(sk); mptcp_send_ack(msk); @@ -3920,6 +3940,8 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, mptcp_sock_graft(ssk, newsock); } + mptcp_rps_record_subflows(msk); + /* Do late cleanup for the first subflow as necessary. Also * deal with bad peers not doing a complete shutdown. */ diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index b15d7fab5c4b..a1787a1344ac 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -788,9 +788,7 @@ static inline bool mptcp_epollin_ready(const struct sock *sk) * as it can always coalesce them */ return (data_avail >= sk->sk_rcvlowat) || - (mem_cgroup_sockets_enabled && sk->sk_memcg && - mem_cgroup_under_socket_pressure(sk->sk_memcg)) || - READ_ONCE(tcp_memory_pressure); + tcp_under_memory_pressure(sk); } int mptcp_set_rcvlowat(struct sock *sk, int val); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 3f1b62a9fe88..c8a7e4b59db1 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1717,19 +1717,14 @@ static void mptcp_attach_cgroup(struct sock *parent, struct sock *child) /* only the additional subflows created by kworkers have to be modified */ if (cgroup_id(sock_cgroup_ptr(parent_skcd)) != cgroup_id(sock_cgroup_ptr(child_skcd))) { -#ifdef CONFIG_MEMCG - struct mem_cgroup *memcg = parent->sk_memcg; - - mem_cgroup_sk_free(child); - if (memcg && css_tryget(&memcg->css)) - child->sk_memcg = memcg; -#endif /* CONFIG_MEMCG */ - cgroup_sk_free(child_skcd); *child_skcd = *parent_skcd; cgroup_sk_clone(child_skcd); } #endif /* CONFIG_SOCK_CGROUP_DATA */ + + if (mem_cgroup_sockets_enabled) + mem_cgroup_sk_inherit(parent, child); } static void mptcp_subflow_ops_override(struct sock *ssk) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 50fd6809380f..3a04665adf99 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -60,7 +60,7 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("List and change connection tracking table"); struct ctnetlink_list_dump_ctx { - struct nf_conn *last; + unsigned long last_id; unsigned int cpu; bool done; }; @@ -1733,16 +1733,6 @@ static int ctnetlink_get_conntrack(struct sk_buff *skb, return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid); } -static int ctnetlink_done_list(struct netlink_callback *cb) -{ - struct ctnetlink_list_dump_ctx *ctx = (void *)cb->ctx; - - if (ctx->last) - nf_ct_put(ctx->last); - - return 0; -} - #ifdef CONFIG_NF_CONNTRACK_EVENTS static int ctnetlink_dump_one_entry(struct sk_buff *skb, struct netlink_callback *cb, @@ -1757,11 +1747,11 @@ static int ctnetlink_dump_one_entry(struct sk_buff *skb, if (l3proto && nf_ct_l3num(ct) != l3proto) return 0; - if (ctx->last) { - if (ct != ctx->last) + if (ctx->last_id) { + if (ctnetlink_get_id(ct) != ctx->last_id) return 0; - ctx->last = NULL; + ctx->last_id = 0; } /* We can't dump extension info for the unconfirmed @@ -1775,12 +1765,8 @@ static int ctnetlink_dump_one_entry(struct sk_buff *skb, cb->nlh->nlmsg_seq, NFNL_MSG_TYPE(cb->nlh->nlmsg_type), ct, dying, 0); - if (res < 0) { - if (!refcount_inc_not_zero(&ct->ct_general.use)) - return 0; - - ctx->last = ct; - } + if (res < 0) + ctx->last_id = ctnetlink_get_id(ct); return res; } @@ -1796,10 +1782,10 @@ static int ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb) { struct ctnetlink_list_dump_ctx *ctx = (void *)cb->ctx; - struct nf_conn *last = ctx->last; #ifdef CONFIG_NF_CONNTRACK_EVENTS const struct net *net = sock_net(skb->sk); struct nf_conntrack_net_ecache *ecache_net; + unsigned long last_id = ctx->last_id; struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; #endif @@ -1807,7 +1793,7 @@ ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb) if (ctx->done) return 0; - ctx->last = NULL; + ctx->last_id = 0; #ifdef CONFIG_NF_CONNTRACK_EVENTS ecache_net = nf_conn_pernet_ecache(net); @@ -1818,24 +1804,21 @@ ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb) int res; ct = nf_ct_tuplehash_to_ctrack(h); - if (last && last != ct) + if (last_id && last_id != ctnetlink_get_id(ct)) continue; res = ctnetlink_dump_one_entry(skb, cb, ct, true); if (res < 0) { spin_unlock_bh(&ecache_net->dying_lock); - nf_ct_put(last); return skb->len; } - nf_ct_put(last); - last = NULL; + last_id = 0; } spin_unlock_bh(&ecache_net->dying_lock); #endif ctx->done = true; - nf_ct_put(last); return skb->len; } @@ -1847,7 +1830,6 @@ static int ctnetlink_get_ct_dying(struct sk_buff *skb, if (info->nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .dump = ctnetlink_dump_dying, - .done = ctnetlink_done_list, }; return netlink_dump_start(info->sk, skb, info->nlh, &c); } @@ -1862,7 +1844,6 @@ static int ctnetlink_get_ct_unconfirmed(struct sk_buff *skb, if (info->nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .dump = ctnetlink_dump_unconfirmed, - .done = ctnetlink_done_list, }; return netlink_dump_start(info->sk, skb, info->nlh, &c); } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c1082de09656..68e273d8821a 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -151,12 +151,12 @@ static void nft_ctx_init(struct nft_ctx *ctx, bitmap_zero(ctx->reg_inited, NFT_REG32_NUM); } -static struct nft_trans *nft_trans_alloc_gfp(const struct nft_ctx *ctx, - int msg_type, u32 size, gfp_t gfp) +static struct nft_trans *nft_trans_alloc(const struct nft_ctx *ctx, + int msg_type, u32 size) { struct nft_trans *trans; - trans = kzalloc(size, gfp); + trans = kzalloc(size, GFP_KERNEL); if (trans == NULL) return NULL; @@ -172,12 +172,6 @@ static struct nft_trans *nft_trans_alloc_gfp(const struct nft_ctx *ctx, return trans; } -static struct nft_trans *nft_trans_alloc(const struct nft_ctx *ctx, - int msg_type, u32 size) -{ - return nft_trans_alloc_gfp(ctx, msg_type, size, GFP_KERNEL); -} - static struct nft_trans_binding *nft_trans_get_binding(struct nft_trans *trans) { switch (trans->msg_type) { @@ -442,8 +436,7 @@ static bool nft_trans_collapse_set_elem_allowed(const struct nft_trans_elem *a, static bool nft_trans_collapse_set_elem(struct nftables_pernet *nft_net, struct nft_trans_elem *tail, - struct nft_trans_elem *trans, - gfp_t gfp) + struct nft_trans_elem *trans) { unsigned int nelems, old_nelems = tail->nelems; struct nft_trans_elem *new_trans; @@ -466,9 +459,11 @@ static bool nft_trans_collapse_set_elem(struct nftables_pernet *nft_net, /* krealloc might free tail which invalidates list pointers */ list_del_init(&tail->nft_trans.list); - new_trans = krealloc(tail, struct_size(tail, elems, nelems), gfp); + new_trans = krealloc(tail, struct_size(tail, elems, nelems), + GFP_KERNEL); if (!new_trans) { - list_add_tail(&tail->nft_trans.list, &nft_net->commit_list); + list_add_tail(&tail->nft_trans.list, + &nft_net->commit_list); return false; } @@ -484,7 +479,7 @@ static bool nft_trans_collapse_set_elem(struct nftables_pernet *nft_net, } static bool nft_trans_try_collapse(struct nftables_pernet *nft_net, - struct nft_trans *trans, gfp_t gfp) + struct nft_trans *trans) { struct nft_trans *tail; @@ -501,7 +496,7 @@ static bool nft_trans_try_collapse(struct nftables_pernet *nft_net, case NFT_MSG_DELSETELEM: return nft_trans_collapse_set_elem(nft_net, nft_trans_container_elem(tail), - nft_trans_container_elem(trans), gfp); + nft_trans_container_elem(trans)); } return false; @@ -537,17 +532,14 @@ static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *tr } } -static void nft_trans_commit_list_add_elem(struct net *net, struct nft_trans *trans, - gfp_t gfp) +static void nft_trans_commit_list_add_elem(struct net *net, struct nft_trans *trans) { struct nftables_pernet *nft_net = nft_pernet(net); WARN_ON_ONCE(trans->msg_type != NFT_MSG_NEWSETELEM && trans->msg_type != NFT_MSG_DELSETELEM); - might_alloc(gfp); - - if (nft_trans_try_collapse(nft_net, trans, gfp)) { + if (nft_trans_try_collapse(nft_net, trans)) { kfree(trans); return; } @@ -7570,7 +7562,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, } ue->priv = elem_priv; - nft_trans_commit_list_add_elem(ctx->net, trans, GFP_KERNEL); + nft_trans_commit_list_add_elem(ctx->net, trans); goto err_elem_free; } } @@ -7594,7 +7586,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, } nft_trans_container_elem(trans)->elems[0].priv = elem.priv; - nft_trans_commit_list_add_elem(ctx->net, trans, GFP_KERNEL); + nft_trans_commit_list_add_elem(ctx->net, trans); return 0; err_set_full: @@ -7860,7 +7852,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, nft_setelem_data_deactivate(ctx->net, set, elem.priv); nft_trans_container_elem(trans)->elems[0].priv = elem.priv; - nft_trans_commit_list_add_elem(ctx->net, trans, GFP_KERNEL); + nft_trans_commit_list_add_elem(ctx->net, trans); return 0; fail_ops: @@ -7885,9 +7877,8 @@ static int nft_setelem_flush(const struct nft_ctx *ctx, if (!nft_set_elem_active(ext, iter->genmask)) return 0; - trans = nft_trans_alloc_gfp(ctx, NFT_MSG_DELSETELEM, - struct_size_t(struct nft_trans_elem, elems, 1), - GFP_ATOMIC); + trans = nft_trans_alloc(ctx, NFT_MSG_DELSETELEM, + struct_size_t(struct nft_trans_elem, elems, 1)); if (!trans) return -ENOMEM; @@ -7898,7 +7889,7 @@ static int nft_setelem_flush(const struct nft_ctx *ctx, nft_trans_elem_set(trans) = set; nft_trans_container_elem(trans)->nelems = 1; nft_trans_container_elem(trans)->elems[0].priv = elem_priv; - nft_trans_commit_list_add_elem(ctx->net, trans, GFP_ATOMIC); + nft_trans_commit_list_add_elem(ctx->net, trans); return 0; } @@ -7915,7 +7906,7 @@ static int __nft_set_catchall_flush(const struct nft_ctx *ctx, nft_setelem_data_deactivate(ctx->net, set, elem_priv); nft_trans_container_elem(trans)->elems[0].priv = elem_priv; - nft_trans_commit_list_add_elem(ctx->net, trans, GFP_KERNEL); + nft_trans_commit_list_add_elem(ctx->net, trans); return 0; } diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index 225ff293cd50..14dd1c0698c3 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -9,7 +9,7 @@ #include <linux/netfilter/nf_conntrack_common.h> #include <linux/netfilter/nf_tables.h> #include <net/ip.h> -#include <net/inet_dscp.h> +#include <net/flow.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_conntrack_core.h> @@ -236,7 +236,7 @@ static int nft_flow_route(const struct nft_pktinfo *pkt, fl.u.ip4.saddr = ct->tuplehash[!dir].tuple.src.u3.ip; fl.u.ip4.flowi4_oif = nft_in(pkt)->ifindex; fl.u.ip4.flowi4_iif = this_dst->dev->ifindex; - fl.u.ip4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(ip_hdr(pkt->skb))); + fl.u.ip4.flowi4_dscp = ip4h_dscp(ip_hdr(pkt->skb)); fl.u.ip4.flowi4_mark = pkt->skb->mark; fl.u.ip4.flowi4_flags = FLOWI_FLAG_ANYSRC; break; diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 7dfc5343dae4..b0214418f75a 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -40,7 +40,7 @@ static bool nft_payload_rebuild_vlan_hdr(const struct sk_buff *skb, int mac_off, /* add vlan header into the user buffer for if tag was removed by offloads */ static bool -nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u8 offset, u8 len) +nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u16 offset, u8 len) { int mac_off = skb_mac_header(skb) - skb->data; u8 *vlanh, *dst_u8 = (u8 *) d; @@ -212,7 +212,7 @@ static const struct nla_policy nft_payload_policy[NFTA_PAYLOAD_MAX + 1] = { [NFTA_PAYLOAD_SREG] = { .type = NLA_U32 }, [NFTA_PAYLOAD_DREG] = { .type = NLA_U32 }, [NFTA_PAYLOAD_BASE] = { .type = NLA_U32 }, - [NFTA_PAYLOAD_OFFSET] = NLA_POLICY_MAX(NLA_BE32, 255), + [NFTA_PAYLOAD_OFFSET] = { .type = NLA_BE32 }, [NFTA_PAYLOAD_LEN] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_PAYLOAD_CSUM_TYPE] = { .type = NLA_U32 }, [NFTA_PAYLOAD_CSUM_OFFSET] = NLA_POLICY_MAX(NLA_BE32, 255), @@ -684,7 +684,7 @@ static const struct nft_expr_ops nft_payload_inner_ops = { static inline void nft_csum_replace(__sum16 *sum, __wsum fsum, __wsum tsum) { - *sum = csum_fold(csum_add(csum_sub(~csum_unfold(*sum), fsum), tsum)); + csum_replace4(sum, (__force __be32)fsum, (__force __be32)tsum); if (*sum == 0) *sum = CSUM_MANGLED_0; } @@ -797,7 +797,7 @@ static int nft_payload_csum_inet(struct sk_buff *skb, const u32 *src, struct nft_payload_set { enum nft_payload_bases base:8; - u8 offset; + u16 offset; u8 len; u8 sreg; u8 csum_type; @@ -812,7 +812,7 @@ struct nft_payload_vlan_hdr { }; static bool -nft_payload_set_vlan(const u32 *src, struct sk_buff *skb, u8 offset, u8 len, +nft_payload_set_vlan(const u32 *src, struct sk_buff *skb, u16 offset, u8 len, int *vlan_hlen) { struct nft_payload_vlan_hdr *vlanh; @@ -940,14 +940,18 @@ static int nft_payload_set_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { + u32 csum_offset, offset, csum_type = NFT_PAYLOAD_CSUM_NONE; struct nft_payload_set *priv = nft_expr_priv(expr); - u32 csum_offset, csum_type = NFT_PAYLOAD_CSUM_NONE; int err; priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE])); - priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET])); priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); + err = nft_parse_u32_check(tb[NFTA_PAYLOAD_OFFSET], U16_MAX, &offset); + if (err < 0) + return err; + priv->offset = offset; + if (tb[NFTA_PAYLOAD_CSUM_TYPE]) csum_type = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_TYPE])); if (tb[NFTA_PAYLOAD_CSUM_OFFSET]) { @@ -1069,7 +1073,7 @@ nft_payload_select_ops(const struct nft_ctx *ctx, if (tb[NFTA_PAYLOAD_DREG] == NULL) return ERR_PTR(-EINVAL); - err = nft_parse_u32_check(tb[NFTA_PAYLOAD_OFFSET], U8_MAX, &offset); + err = nft_parse_u32_check(tb[NFTA_PAYLOAD_OFFSET], U16_MAX, &offset); if (err < 0) return ERR_PTR(err); diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 266d0c637225..ba01ce75d6de 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -30,6 +30,7 @@ struct nft_rhash { struct nft_rhash_elem { struct nft_elem_priv priv; struct rhash_head node; + struct llist_node walk_node; u32 wq_gc_seq; struct nft_set_ext ext; }; @@ -144,6 +145,7 @@ nft_rhash_update(struct nft_set *set, const u32 *key, goto err1; he = nft_elem_priv_cast(elem_priv); + init_llist_node(&he->walk_node); prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node, nft_rhash_params); if (IS_ERR(prev)) @@ -180,6 +182,7 @@ static int nft_rhash_insert(const struct net *net, const struct nft_set *set, }; struct nft_rhash_elem *prev; + init_llist_node(&he->walk_node); prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node, nft_rhash_params); if (IS_ERR(prev)) @@ -261,12 +264,12 @@ static bool nft_rhash_delete(const struct nft_set *set, return true; } -static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set, - struct nft_set_iter *iter) +static void nft_rhash_walk_ro(const struct nft_ctx *ctx, struct nft_set *set, + struct nft_set_iter *iter) { struct nft_rhash *priv = nft_set_priv(set); - struct nft_rhash_elem *he; struct rhashtable_iter hti; + struct nft_rhash_elem *he; rhashtable_walk_enter(&priv->ht, &hti); rhashtable_walk_start(&hti); @@ -295,6 +298,97 @@ cont: rhashtable_walk_exit(&hti); } +static void nft_rhash_walk_update(const struct nft_ctx *ctx, + struct nft_set *set, + struct nft_set_iter *iter) +{ + struct nft_rhash *priv = nft_set_priv(set); + struct nft_rhash_elem *he, *tmp; + struct llist_node *first_node; + struct rhashtable_iter hti; + LLIST_HEAD(walk_list); + + lockdep_assert_held(&nft_pernet(ctx->net)->commit_mutex); + + if (set->in_update_walk) { + /* This can happen with bogus rulesets during ruleset validation + * when a verdict map causes a jump back to the same map. + * + * Without this extra check the walk_next loop below will see + * elems on the callers walk_list and skip (not validate) them. + */ + iter->err = -EMLINK; + return; + } + + /* walk happens under RCU. + * + * We create a snapshot list so ->iter callback can sleep. + * commit_mutex is held, elements can ... + * .. be added in parallel from dataplane (dynset) + * .. be marked as dead in parallel from dataplane (dynset). + * .. be queued for removal in parallel (gc timeout). + * .. not be freed: transaction mutex is held. + */ + rhashtable_walk_enter(&priv->ht, &hti); + rhashtable_walk_start(&hti); + + while ((he = rhashtable_walk_next(&hti))) { + if (IS_ERR(he)) { + if (PTR_ERR(he) != -EAGAIN) { + iter->err = PTR_ERR(he); + break; + } + + continue; + } + + /* rhashtable resized during walk, skip */ + if (llist_on_list(&he->walk_node)) + continue; + + llist_add(&he->walk_node, &walk_list); + } + rhashtable_walk_stop(&hti); + rhashtable_walk_exit(&hti); + + first_node = __llist_del_all(&walk_list); + set->in_update_walk = true; + llist_for_each_entry_safe(he, tmp, first_node, walk_node) { + if (iter->err == 0) { + iter->err = iter->fn(ctx, set, iter, &he->priv); + if (iter->err == 0) + iter->count++; + } + + /* all entries must be cleared again, else next ->walk iteration + * will skip entries. + */ + init_llist_node(&he->walk_node); + } + set->in_update_walk = false; +} + +static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set, + struct nft_set_iter *iter) +{ + switch (iter->type) { + case NFT_ITER_UPDATE: + /* only relevant for netlink dumps which use READ type */ + WARN_ON_ONCE(iter->skip != 0); + + nft_rhash_walk_update(ctx, set, iter); + break; + case NFT_ITER_READ: + nft_rhash_walk_ro(ctx, set, iter); + break; + default: + iter->err = -EINVAL; + WARN_ON_ONCE(1); + break; + } +} + static bool nft_rhash_expr_needs_gc_run(const struct nft_set *set, struct nft_set_ext *ext) { diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 9a10251228fd..4b64c3bd8e70 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -397,7 +397,7 @@ int pipapo_refill(unsigned long *map, unsigned int len, unsigned int rules, } /** - * pipapo_get() - Get matching element reference given key data + * pipapo_get_slow() - Get matching element reference given key data * @m: storage containing the set elements * @data: Key data to be matched against existing elements * @genmask: If set, check that element is active in given genmask @@ -414,12 +414,12 @@ int pipapo_refill(unsigned long *map, unsigned int len, unsigned int rules, * * Return: pointer to &struct nft_pipapo_elem on match, NULL otherwise. */ -static struct nft_pipapo_elem *pipapo_get(const struct nft_pipapo_match *m, - const u8 *data, u8 genmask, - u64 tstamp) +static struct nft_pipapo_elem *pipapo_get_slow(const struct nft_pipapo_match *m, + const u8 *data, u8 genmask, + u64 tstamp) { + unsigned long *res_map, *fill_map, *map; struct nft_pipapo_scratch *scratch; - unsigned long *res_map, *fill_map; const struct nft_pipapo_field *f; bool map_index; int i; @@ -429,11 +429,13 @@ static struct nft_pipapo_elem *pipapo_get(const struct nft_pipapo_match *m, scratch = *raw_cpu_ptr(m->scratch); if (unlikely(!scratch)) goto out; + __local_lock_nested_bh(&scratch->bh_lock); map_index = scratch->map_index; - res_map = scratch->map + (map_index ? m->bsize_max : 0); - fill_map = scratch->map + (map_index ? 0 : m->bsize_max); + map = NFT_PIPAPO_LT_ALIGN(&scratch->__map[0]); + res_map = map + (map_index ? m->bsize_max : 0); + fill_map = map + (map_index ? 0 : m->bsize_max); pipapo_resmap_init(m, res_map); @@ -464,6 +466,7 @@ next_match: last); if (b < 0) { scratch->map_index = map_index; + __local_unlock_nested_bh(&scratch->bh_lock); local_bh_enable(); return NULL; @@ -483,6 +486,7 @@ next_match: * *next* bitmap (not initial) for the next packet. */ scratch->map_index = map_index; + __local_unlock_nested_bh(&scratch->bh_lock); local_bh_enable(); return e; } @@ -497,12 +501,47 @@ next_match: data += NFT_PIPAPO_GROUPS_PADDING(f); } + __local_unlock_nested_bh(&scratch->bh_lock); out: local_bh_enable(); return NULL; } /** + * pipapo_get() - Get matching element reference given key data + * @m: Storage containing the set elements + * @data: Key data to be matched against existing elements + * @genmask: If set, check that element is active in given genmask + * @tstamp: Timestamp to check for expired elements + * + * This is a dispatcher function, either calling out the generic C + * implementation or, if available, the AVX2 one. + * This helper is only called from the control plane, with either RCU + * read lock or transaction mutex held. + * + * Return: pointer to &struct nft_pipapo_elem on match, NULL otherwise. + */ +static struct nft_pipapo_elem *pipapo_get(const struct nft_pipapo_match *m, + const u8 *data, u8 genmask, + u64 tstamp) +{ + struct nft_pipapo_elem *e; + + local_bh_disable(); + +#if defined(CONFIG_X86_64) && !defined(CONFIG_UML) + if (boot_cpu_has(X86_FEATURE_AVX2) && irq_fpu_usable()) { + e = pipapo_get_avx2(m, data, genmask, tstamp); + local_bh_enable(); + return e; + } +#endif + e = pipapo_get_slow(m, data, genmask, tstamp); + local_bh_enable(); + return e; +} + +/** * nft_pipapo_lookup() - Dataplane fronted for main lookup function * @net: Network namespace * @set: nftables API set representation @@ -523,7 +562,7 @@ nft_pipapo_lookup(const struct net *net, const struct nft_set *set, const struct nft_pipapo_elem *e; m = rcu_dereference(priv->match); - e = pipapo_get(m, (const u8 *)key, genmask, get_jiffies_64()); + e = pipapo_get_slow(m, (const u8 *)key, genmask, get_jiffies_64()); return e ? &e->ext : NULL; } @@ -1136,22 +1175,17 @@ static void pipapo_map(struct nft_pipapo_match *m, } /** - * pipapo_free_scratch() - Free per-CPU map at original (not aligned) address + * pipapo_free_scratch() - Free per-CPU map at original address * @m: Matching data * @cpu: CPU number */ static void pipapo_free_scratch(const struct nft_pipapo_match *m, unsigned int cpu) { struct nft_pipapo_scratch *s; - void *mem; s = *per_cpu_ptr(m->scratch, cpu); - if (!s) - return; - mem = s; - mem -= s->align_off; - kvfree(mem); + kvfree(s); } /** @@ -1168,11 +1202,8 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone, for_each_possible_cpu(i) { struct nft_pipapo_scratch *scratch; -#ifdef NFT_PIPAPO_ALIGN - void *scratch_aligned; - u32 align_off; -#endif - scratch = kvzalloc_node(struct_size(scratch, map, bsize_max * 2) + + + scratch = kvzalloc_node(struct_size(scratch, __map, bsize_max * 2) + NFT_PIPAPO_ALIGN_HEADROOM, GFP_KERNEL_ACCOUNT, cpu_to_node(i)); if (!scratch) { @@ -1187,23 +1218,7 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone, } pipapo_free_scratch(clone, i); - -#ifdef NFT_PIPAPO_ALIGN - /* Align &scratch->map (not the struct itself): the extra - * %NFT_PIPAPO_ALIGN_HEADROOM bytes passed to kzalloc_node() - * above guarantee we can waste up to those bytes in order - * to align the map field regardless of its offset within - * the struct. - */ - BUILD_BUG_ON(offsetof(struct nft_pipapo_scratch, map) > NFT_PIPAPO_ALIGN_HEADROOM); - - scratch_aligned = NFT_PIPAPO_LT_ALIGN(&scratch->map); - scratch_aligned -= offsetof(struct nft_pipapo_scratch, map); - align_off = scratch_aligned - (void *)scratch; - - scratch = scratch_aligned; - scratch->align_off = align_off; -#endif + local_lock_init(&scratch->bh_lock); *per_cpu_ptr(clone->scratch, i) = scratch; } diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h index 4a2ff85ce1c4..eaab422aa56a 100644 --- a/net/netfilter/nft_set_pipapo.h +++ b/net/netfilter/nft_set_pipapo.h @@ -124,14 +124,14 @@ struct nft_pipapo_field { /** * struct nft_pipapo_scratch - percpu data used for lookup and matching + * @bh_lock: PREEMPT_RT local spinlock * @map_index: Current working bitmap index, toggled between field matches - * @align_off: Offset to get the originally allocated address - * @map: store partial matching results during lookup + * @__map: store partial matching results during lookup */ struct nft_pipapo_scratch { + local_lock_t bh_lock; u8 map_index; - u32 align_off; - unsigned long map[]; + unsigned long __map[]; }; /** diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c index 2f090e253caf..7559306d0aed 100644 --- a/net/netfilter/nft_set_pipapo_avx2.c +++ b/net/netfilter/nft_set_pipapo_avx2.c @@ -1099,7 +1099,7 @@ bool nft_pipapo_avx2_estimate(const struct nft_set_desc *desc, u32 features, desc->field_count < NFT_PIPAPO_MIN_FIELDS) return false; - if (!boot_cpu_has(X86_FEATURE_AVX2) || !boot_cpu_has(X86_FEATURE_AVX)) + if (!boot_cpu_has(X86_FEATURE_AVX2)) return false; est->size = pipapo_estimate_size(desc); @@ -1133,66 +1133,50 @@ static inline void pipapo_resmap_init_avx2(const struct nft_pipapo_match *m, uns } /** - * nft_pipapo_avx2_lookup() - Lookup function for AVX2 implementation - * @net: Network namespace - * @set: nftables API set representation - * @key: nftables API element representation containing key data + * pipapo_get_avx2() - Lookup function for AVX2 implementation + * @m: Storage containing the set elements + * @data: Key data to be matched against existing elements + * @genmask: If set, check that element is active in given genmask + * @tstamp: Timestamp to check for expired elements * * For more details, see DOC: Theory of Operation in nft_set_pipapo.c. * * This implementation exploits the repetitive characteristic of the algorithm * to provide a fast, vectorised version using the AVX2 SIMD instruction set. * - * Return: true on match, false otherwise. + * The caller must check that the FPU is usable. + * This function must be called with BH disabled. + * + * Return: pointer to &struct nft_pipapo_elem on match, NULL otherwise. */ -const struct nft_set_ext * -nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, - const u32 *key) +struct nft_pipapo_elem *pipapo_get_avx2(const struct nft_pipapo_match *m, + const u8 *data, u8 genmask, + u64 tstamp) { - struct nft_pipapo *priv = nft_set_priv(set); - const struct nft_set_ext *ext = NULL; struct nft_pipapo_scratch *scratch; - u8 genmask = nft_genmask_cur(net); - const struct nft_pipapo_match *m; const struct nft_pipapo_field *f; - const u8 *rp = (const u8 *)key; - unsigned long *res, *fill; + unsigned long *res, *fill, *map; bool map_index; int i; - local_bh_disable(); - - if (unlikely(!irq_fpu_usable())) { - ext = nft_pipapo_lookup(net, set, key); + scratch = *raw_cpu_ptr(m->scratch); + if (unlikely(!scratch)) + return NULL; - local_bh_enable(); - return ext; - } + __local_lock_nested_bh(&scratch->bh_lock); + map_index = scratch->map_index; + map = NFT_PIPAPO_LT_ALIGN(&scratch->__map[0]); + res = map + (map_index ? m->bsize_max : 0); + fill = map + (map_index ? 0 : m->bsize_max); - m = rcu_dereference(priv->match); + pipapo_resmap_init_avx2(m, res); - /* This also protects access to all data related to scratch maps. - * - * Note that we don't need a valid MXCSR state for any of the + /* Note that we don't need a valid MXCSR state for any of the * operations we use here, so pass 0 as mask and spare a LDMXCSR * instruction. */ kernel_fpu_begin_mask(0); - scratch = *raw_cpu_ptr(m->scratch); - if (unlikely(!scratch)) { - kernel_fpu_end(); - local_bh_enable(); - return NULL; - } - - map_index = scratch->map_index; - - res = scratch->map + (map_index ? m->bsize_max : 0); - fill = scratch->map + (map_index ? 0 : m->bsize_max); - - pipapo_resmap_init_avx2(m, res); - nft_pipapo_avx2_prepare(); next_match: @@ -1202,7 +1186,7 @@ next_match: #define NFT_SET_PIPAPO_AVX2_LOOKUP(b, n) \ (ret = nft_pipapo_avx2_lookup_##b##b_##n(res, fill, f, \ - ret, rp, \ + ret, data, \ first, last)) if (likely(f->bb == 8)) { @@ -1218,7 +1202,7 @@ next_match: NFT_SET_PIPAPO_AVX2_LOOKUP(8, 16); } else { ret = nft_pipapo_avx2_lookup_slow(m, res, fill, f, - ret, rp, + ret, data, first, last); } } else { @@ -1234,7 +1218,7 @@ next_match: NFT_SET_PIPAPO_AVX2_LOOKUP(4, 32); } else { ret = nft_pipapo_avx2_lookup_slow(m, res, fill, f, - ret, rp, + ret, data, first, last); } } @@ -1242,29 +1226,75 @@ next_match: #undef NFT_SET_PIPAPO_AVX2_LOOKUP - if (ret < 0) - goto out; + if (ret < 0) { + scratch->map_index = map_index; + kernel_fpu_end(); + __local_unlock_nested_bh(&scratch->bh_lock); + return NULL; + } if (last) { - const struct nft_set_ext *e = &f->mt[ret].e->ext; + struct nft_pipapo_elem *e; - if (unlikely(nft_set_elem_expired(e) || - !nft_set_elem_active(e, genmask))) + e = f->mt[ret].e; + if (unlikely(__nft_set_elem_expired(&e->ext, tstamp) || + !nft_set_elem_active(&e->ext, genmask))) goto next_match; - ext = e; - goto out; + scratch->map_index = map_index; + kernel_fpu_end(); + __local_unlock_nested_bh(&scratch->bh_lock); + return e; } + map_index = !map_index; swap(res, fill); - rp += NFT_PIPAPO_GROUPS_PADDED_SIZE(f); + data += NFT_PIPAPO_GROUPS_PADDED_SIZE(f); } -out: - if (i % 2) - scratch->map_index = !map_index; kernel_fpu_end(); + __local_unlock_nested_bh(&scratch->bh_lock); + return NULL; +} + +/** + * nft_pipapo_avx2_lookup() - Dataplane frontend for AVX2 implementation + * @net: Network namespace + * @set: nftables API set representation + * @key: nftables API element representation containing key data + * + * This function is called from the data path. It will search for + * an element matching the given key in the current active copy using + * the AVX2 routines if the FPU is usable or fall back to the generic + * implementation of the algorithm otherwise. + * + * Return: nftables API extension pointer or NULL if no match. + */ +const struct nft_set_ext * +nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, + const u32 *key) +{ + struct nft_pipapo *priv = nft_set_priv(set); + u8 genmask = nft_genmask_cur(net); + const struct nft_pipapo_match *m; + const u8 *rp = (const u8 *)key; + const struct nft_pipapo_elem *e; + + local_bh_disable(); + + if (unlikely(!irq_fpu_usable())) { + const struct nft_set_ext *ext; + + ext = nft_pipapo_lookup(net, set, key); + + local_bh_enable(); + return ext; + } + + m = rcu_dereference(priv->match); + + e = pipapo_get_avx2(m, rp, genmask, get_jiffies_64()); local_bh_enable(); - return ext; + return e ? &e->ext : NULL; } diff --git a/net/netfilter/nft_set_pipapo_avx2.h b/net/netfilter/nft_set_pipapo_avx2.h index dbb6aaca8a7a..c2999b63da3f 100644 --- a/net/netfilter/nft_set_pipapo_avx2.h +++ b/net/netfilter/nft_set_pipapo_avx2.h @@ -5,8 +5,12 @@ #include <asm/fpu/xstate.h> #define NFT_PIPAPO_ALIGN (XSAVE_YMM_SIZE / BITS_PER_BYTE) +struct nft_pipapo_match; bool nft_pipapo_avx2_estimate(const struct nft_set_desc *desc, u32 features, struct nft_set_estimate *est); +struct nft_pipapo_elem *pipapo_get_avx2(const struct nft_pipapo_match *m, + const u8 *data, u8 genmask, + u64 tstamp); #endif /* defined(CONFIG_X86_64) && !defined(CONFIG_UML) */ #endif /* _NFT_SET_PIPAPO_AVX2_H */ diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 938a257c069e..b311b66df3e9 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -584,15 +584,14 @@ nft_rbtree_deactivate(const struct net *net, const struct nft_set *set, return NULL; } -static void nft_rbtree_walk(const struct nft_ctx *ctx, - struct nft_set *set, - struct nft_set_iter *iter) +static void nft_rbtree_do_walk(const struct nft_ctx *ctx, + struct nft_set *set, + struct nft_set_iter *iter) { struct nft_rbtree *priv = nft_set_priv(set); struct nft_rbtree_elem *rbe; struct rb_node *node; - read_lock_bh(&priv->lock); for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) { rbe = rb_entry(node, struct nft_rbtree_elem, node); @@ -600,14 +599,34 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, goto cont; iter->err = iter->fn(ctx, set, iter, &rbe->priv); - if (iter->err < 0) { - read_unlock_bh(&priv->lock); + if (iter->err < 0) return; - } cont: iter->count++; } - read_unlock_bh(&priv->lock); +} + +static void nft_rbtree_walk(const struct nft_ctx *ctx, + struct nft_set *set, + struct nft_set_iter *iter) +{ + struct nft_rbtree *priv = nft_set_priv(set); + + switch (iter->type) { + case NFT_ITER_UPDATE: + lockdep_assert_held(&nft_pernet(ctx->net)->commit_mutex); + nft_rbtree_do_walk(ctx, set, iter); + break; + case NFT_ITER_READ: + read_lock_bh(&priv->lock); + nft_rbtree_do_walk(ctx, set, iter); + read_unlock_bh(&priv->lock); + break; + default: + iter->err = -EINVAL; + WARN_ON_ONCE(1); + break; + } } static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set, diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index e2f7080dd5d7..2b46c0cd752a 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -356,7 +356,7 @@ static void netlink_overrun(struct sock *sk) sk_error_report(sk); } } - atomic_inc(&sk->sk_drops); + sk_drops_inc(sk); } static void netlink_rcv_wake(struct sock *sk) @@ -2711,7 +2711,7 @@ static int netlink_native_seq_show(struct seq_file *seq, void *v) sk_wmem_alloc_get(s), READ_ONCE(nlk->cb_running), refcount_read(&s->sk_refcnt), - atomic_read(&s->sk_drops), + sk_drops_read(s), sock_i_ino(s) ); diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index b80bd3a90773..66366982f604 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -129,15 +129,13 @@ void ovs_flow_stats_get(const struct sw_flow *flow, struct ovs_flow_stats *ovs_stats, unsigned long *used, __be16 *tcp_flags) { - int cpu; + unsigned int cpu; *used = 0; *tcp_flags = 0; memset(ovs_stats, 0, sizeof(*ovs_stats)); - /* We open code this to make sure cpu 0 is always considered */ - for (cpu = 0; cpu < nr_cpu_ids; - cpu = cpumask_next(cpu, flow->cpu_used_mask)) { + for_each_cpu(cpu, flow->cpu_used_mask) { struct sw_flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]); if (stats) { @@ -158,11 +156,9 @@ void ovs_flow_stats_get(const struct sw_flow *flow, /* Called with ovs_mutex. */ void ovs_flow_stats_clear(struct sw_flow *flow) { - int cpu; + unsigned int cpu; - /* We open code this to make sure cpu 0 is always considered */ - for (cpu = 0; cpu < nr_cpu_ids; - cpu = cpumask_next(cpu, flow->cpu_used_mask)) { + for_each_cpu(cpu, flow->cpu_used_mask) { struct sw_flow_stats *stats = ovsl_dereference(flow->stats[cpu]); if (stats) { diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index d108ae0bd0ee..ffc72a741a50 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -107,16 +107,15 @@ int ovs_flow_tbl_count(const struct flow_table *table) static void flow_free(struct sw_flow *flow) { - int cpu; + unsigned int cpu; if (ovs_identifier_is_key(&flow->id)) kfree(flow->id.unmasked_key); if (flow->sf_acts) ovs_nla_free_flow_actions((struct sw_flow_actions __force *) flow->sf_acts); - /* We open code this to make sure cpu 0 is always considered */ - for (cpu = 0; cpu < nr_cpu_ids; - cpu = cpumask_next(cpu, flow->cpu_used_mask)) { + + for_each_cpu(cpu, flow->cpu_used_mask) { if (flow->stats[cpu]) kmem_cache_free(flow_stats_cache, (struct sw_flow_stats __force *)flow->stats[cpu]); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index a7017d7f0927..9d42c4bd6e39 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2265,7 +2265,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, drop_n_acct: atomic_inc(&po->tp_drops); - atomic_inc(&sk->sk_drops); + sk_drops_inc(sk); drop_reason = SKB_DROP_REASON_PACKET_SOCK_ERROR; drop_n_restore: diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index a27efa4faa4e..238a9638d2b0 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -22,7 +22,7 @@ #include <net/phonet/pn_dev.h> /* Transport protocol registration */ -static const struct phonet_protocol *proto_tab[PHONET_NPROTO] __read_mostly; +static const struct phonet_protocol __rcu *proto_tab[PHONET_NPROTO] __read_mostly; static const struct phonet_protocol *phonet_proto_get(unsigned int protocol) { @@ -482,7 +482,7 @@ void phonet_proto_unregister(unsigned int protocol, const struct phonet_protocol *pp) { mutex_lock(&proto_tab_lock); - BUG_ON(proto_tab[protocol] != pp); + BUG_ON(rcu_access_pointer(proto_tab[protocol]) != pp); RCU_INIT_POINTER(proto_tab[protocol], NULL); mutex_unlock(&proto_tab_lock); synchronize_rcu(); diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 62527e1ebb88..4db564d9d522 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -376,7 +376,7 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) case PNS_PEP_CTRL_REQ: if (skb_queue_len(&pn->ctrlreq_queue) >= PNPIPE_CTRLREQ_MAX) { - atomic_inc(&sk->sk_drops); + sk_drops_inc(sk); break; } __skb_pull(skb, 4); @@ -397,7 +397,7 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) } if (pn->rx_credits == 0) { - atomic_inc(&sk->sk_drops); + sk_drops_inc(sk); err = -ENOBUFS; break; } @@ -567,7 +567,7 @@ static int pipe_handler_do_rcv(struct sock *sk, struct sk_buff *skb) } if (pn->rx_credits == 0) { - atomic_inc(&sk->sk_drops); + sk_drops_inc(sk); err = NET_RX_DROP; break; } diff --git a/net/phonet/socket.c b/net/phonet/socket.c index ea4d5e6533db..db2d552e9b32 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -587,7 +587,7 @@ static int pn_sock_seq_show(struct seq_file *seq, void *v) from_kuid_munged(seq_user_ns(seq), sk_uid(sk)), sock_i_ino(sk), refcount_read(&sk->sk_refcnt), sk, - atomic_read(&sk->sk_drops)); + sk_drops_read(sk)); } seq_pad(seq, '\n'); return 0; @@ -602,7 +602,7 @@ const struct seq_operations pn_sock_seq_ops = { #endif static struct { - struct sock *sk[256]; + struct sock __rcu *sk[256]; } pnres; /* @@ -654,7 +654,7 @@ int pn_sock_unbind_res(struct sock *sk, u8 res) return -EPERM; mutex_lock(&resource_mutex); - if (pnres.sk[res] == sk) { + if (rcu_access_pointer(pnres.sk[res]) == sk) { RCU_INIT_POINTER(pnres.sk[res], NULL); ret = 0; } @@ -673,7 +673,7 @@ void pn_sock_unbind_all_res(struct sock *sk) mutex_lock(&resource_mutex); for (res = 0; res < 256; res++) { - if (pnres.sk[res] == sk) { + if (rcu_access_pointer(pnres.sk[res]) == sk) { RCU_INIT_POINTER(pnres.sk[res], NULL); match++; } @@ -688,7 +688,7 @@ void pn_sock_unbind_all_res(struct sock *sk) } #ifdef CONFIG_PROC_FS -static struct sock **pn_res_get_idx(struct seq_file *seq, loff_t pos) +static struct sock __rcu **pn_res_get_idx(struct seq_file *seq, loff_t pos) { struct net *net = seq_file_net(seq); unsigned int i; @@ -697,7 +697,7 @@ static struct sock **pn_res_get_idx(struct seq_file *seq, loff_t pos) return NULL; for (i = 0; i < 256; i++) { - if (pnres.sk[i] == NULL) + if (rcu_access_pointer(pnres.sk[i]) == NULL) continue; if (!pos) return pnres.sk + i; @@ -706,7 +706,7 @@ static struct sock **pn_res_get_idx(struct seq_file *seq, loff_t pos) return NULL; } -static struct sock **pn_res_get_next(struct seq_file *seq, struct sock **sk) +static struct sock __rcu **pn_res_get_next(struct seq_file *seq, struct sock __rcu **sk) { struct net *net = seq_file_net(seq); unsigned int i; @@ -728,7 +728,7 @@ static void *pn_res_seq_start(struct seq_file *seq, loff_t *pos) static void *pn_res_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct sock **sk; + struct sock __rcu **sk; if (v == SEQ_START_TOKEN) sk = pn_res_get_idx(seq, 0); @@ -747,11 +747,12 @@ static void pn_res_seq_stop(struct seq_file *seq, void *v) static int pn_res_seq_show(struct seq_file *seq, void *v) { seq_setwidth(seq, 63); - if (v == SEQ_START_TOKEN) + if (v == SEQ_START_TOKEN) { seq_puts(seq, "rs uid inode"); - else { - struct sock **psk = v; - struct sock *sk = *psk; + } else { + struct sock __rcu **psk = v; + struct sock *sk = rcu_dereference_protected(*psk, + lockdep_is_held(&resource_mutex)); seq_printf(seq, "%02X %5u %lu", (int) (psk - pnres.sk), diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index 086a13170e09..4a7217fbeab6 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -242,7 +242,7 @@ static __poll_t rds_poll(struct file *file, struct socket *sock, if (rs->rs_snd_bytes < rds_sk_sndbuf(rs)) mask |= (EPOLLOUT | EPOLLWRNORM); if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) - mask |= POLLERR; + mask |= EPOLLERR; read_unlock_irqrestore(&rs->rs_recv_lock, flags); /* clear state any time we wake a seen-congested socket */ diff --git a/net/rds/connection.c b/net/rds/connection.c index d62f486ab29f..68bc88cce84e 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -57,16 +57,17 @@ static struct hlist_head *rds_conn_bucket(const struct in6_addr *laddr, static u32 rds6_hash_secret __read_mostly; static u32 rds_hash_secret __read_mostly; - u32 lhash, fhash, hash; + __be32 lhash, fhash; + u32 hash; net_get_random_once(&rds_hash_secret, sizeof(rds_hash_secret)); net_get_random_once(&rds6_hash_secret, sizeof(rds6_hash_secret)); - lhash = (__force u32)laddr->s6_addr32[3]; + lhash = laddr->s6_addr32[3]; #if IS_ENABLED(CONFIG_IPV6) - fhash = __ipv6_addr_jhash(faddr, rds6_hash_secret); + fhash = (__force __be32)__ipv6_addr_jhash(faddr, rds6_hash_secret); #else - fhash = (__force u32)faddr->s6_addr32[3]; + fhash = faddr->s6_addr32[3]; #endif hash = __inet_ehashfn(lhash, 0, fhash, 0, rds_hash_secret); diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index e53b7f266bd7..4248dfa816eb 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -1034,7 +1034,7 @@ void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic, rds_ib_stats_inc(s_ib_rx_ring_empty); if (rds_ib_ring_low(&ic->i_recv_ring)) { - rds_ib_recv_refill(conn, 0, GFP_NOWAIT | __GFP_NOWARN); + rds_ib_recv_refill(conn, 0, GFP_NOWAIT); rds_ib_stats_inc(s_ib_rx_refill_from_cq); } } diff --git a/net/rds/message.c b/net/rds/message.c index 7af59d2443e5..199a899a43e9 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -44,8 +44,8 @@ static unsigned int rds_exthdr_size[__RDS_EXTHDR_MAX] = { [RDS_EXTHDR_VERSION] = sizeof(struct rds_ext_header_version), [RDS_EXTHDR_RDMA] = sizeof(struct rds_ext_header_rdma), [RDS_EXTHDR_RDMA_DEST] = sizeof(struct rds_ext_header_rdma_dest), -[RDS_EXTHDR_NPATHS] = sizeof(u16), -[RDS_EXTHDR_GEN_NUM] = sizeof(u32), +[RDS_EXTHDR_NPATHS] = sizeof(__be16), +[RDS_EXTHDR_GEN_NUM] = sizeof(__be32), }; void rds_message_addref(struct rds_message *rm) diff --git a/net/rds/rds.h b/net/rds/rds.h index dc360252c515..5b1c072e2e7f 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -93,7 +93,7 @@ enum { /* Max number of multipaths per RDS connection. Must be a power of 2 */ #define RDS_MPATH_WORKERS 8 -#define RDS_MPATH_HASH(rs, n) (jhash_1word((rs)->rs_bound_port, \ +#define RDS_MPATH_HASH(rs, n) (jhash_1word(ntohs((rs)->rs_bound_port), \ (rs)->rs_hash_initval) & ((n) - 1)) #define IS_CANONICAL(laddr, faddr) (htonl(laddr) < htonl(faddr)) diff --git a/net/rds/recv.c b/net/rds/recv.c index 5627f80013f8..66205d6924bf 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -202,8 +202,8 @@ static void rds_recv_hs_exthdrs(struct rds_header *hdr, unsigned int pos = 0, type, len; union { struct rds_ext_header_version version; - u16 rds_npaths; - u32 rds_gen_num; + __be16 rds_npaths; + __be32 rds_gen_num; } buffer; u32 new_peer_gen_num = 0; diff --git a/net/rds/send.c b/net/rds/send.c index 42d991bc8543..0b3d0ef2f008 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -1454,8 +1454,8 @@ rds_send_probe(struct rds_conn_path *cp, __be16 sport, if (RDS_HS_PROBE(be16_to_cpu(sport), be16_to_cpu(dport)) && cp->cp_conn->c_trans->t_mp_capable) { - u16 npaths = cpu_to_be16(RDS_MPATH_WORKERS); - u32 my_gen_num = cpu_to_be32(cp->cp_conn->c_my_gen_num); + __be16 npaths = cpu_to_be16(RDS_MPATH_WORKERS); + __be32 my_gen_num = cpu_to_be32(cp->cp_conn->c_my_gen_num); rds_message_add_extension(&rm->m_inc.i_hdr, RDS_EXTHDR_NPATHS, &npaths, diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 9e468e463467..ff6be5cfe2b0 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -1585,7 +1585,7 @@ void tcf_action_update_stats(struct tc_action *a, u64 bytes, u64 packets, } _bstats_update(&a->tcfa_bstats, bytes, packets); - a->tcfa_qstats.drops += drops; + atomic_add(drops, &a->tcfa_drops); if (hw) _bstats_update(&a->tcfa_bstats_hw, bytes, packets); } @@ -1594,8 +1594,9 @@ EXPORT_SYMBOL(tcf_action_update_stats); int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *p, int compat_mode) { - int err = 0; + struct gnet_stats_queue qstats = {0}; struct gnet_dump d; + int err = 0; if (p == NULL) goto errout; @@ -1619,14 +1620,17 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *p, if (err < 0) goto errout; + qstats.drops = atomic_read(&p->tcfa_drops); + qstats.overlimits = atomic_read(&p->tcfa_overlimits); + if (gnet_stats_copy_basic(&d, p->cpu_bstats, &p->tcfa_bstats, false) < 0 || gnet_stats_copy_basic_hw(&d, p->cpu_bstats_hw, &p->tcfa_bstats_hw, false) < 0 || gnet_stats_copy_rate_est(&d, &p->tcfa_rate_est) < 0 || gnet_stats_copy_queue(&d, p->cpu_qstats, - &p->tcfa_qstats, - p->tcfa_qstats.qlen) < 0) + &qstats, + qstats.qlen) < 0) goto errout; if (gnet_stats_finish_copy(&d) < 0) diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index f3abe0545989..8e69a919b4fe 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -72,7 +72,6 @@ static int reset_policy(struct tc_action *a, const struct nlattr *defdata, d = to_defact(a); spin_lock_bh(&d->tcf_lock); goto_ch = tcf_action_set_ctrlact(a, p->action, goto_ch); - memset(d->tcfd_defdata, 0, SIMP_MAX_DATA); nla_strscpy(d->tcfd_defdata, defdata, SIMP_MAX_DATA); spin_unlock_bh(&d->tcf_lock); if (goto_ch) diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index dc0229693461..a9e0c1326e2a 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -27,19 +27,18 @@ TC_INDIRECT_SCOPE int tcf_skbmod_act(struct sk_buff *skb, struct tcf_result *res) { struct tcf_skbmod *d = to_skbmod(a); - int action, max_edit_len, err; struct tcf_skbmod_params *p; + int max_edit_len, err; u64 flags; tcf_lastuse_update(&d->tcf_tm); bstats_update(this_cpu_ptr(d->common.cpu_bstats), skb); - action = READ_ONCE(d->tcf_action); - if (unlikely(action == TC_ACT_SHOT)) + p = rcu_dereference_bh(d->skbmod_p); + if (unlikely(p->action == TC_ACT_SHOT)) goto drop; max_edit_len = skb_mac_header_len(skb); - p = rcu_dereference_bh(d->skbmod_p); flags = p->flags; /* tcf_skbmod_init() guarantees "flags" to be one of the following: @@ -85,7 +84,7 @@ TC_INDIRECT_SCOPE int tcf_skbmod_act(struct sk_buff *skb, INET_ECN_set_ce(skb); out: - return action; + return p->action; drop: qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats)); @@ -193,7 +192,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, } p->flags = lflags; - + p->action = parm->action; if (ovr) spin_lock_bh(&d->tcf_lock); /* Protected by tcf_lock if overwriting existing action. */ @@ -248,10 +247,9 @@ static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a, opt.index = d->tcf_index; opt.refcnt = refcount_read(&d->tcf_refcnt) - ref; opt.bindcnt = atomic_read(&d->tcf_bindcnt) - bind; - spin_lock_bh(&d->tcf_lock); - opt.action = d->tcf_action; - p = rcu_dereference_protected(d->skbmod_p, - lockdep_is_held(&d->tcf_lock)); + rcu_read_lock(); + p = rcu_dereference(d->skbmod_p); + opt.action = p->action; opt.flags = p->flags; if (nla_put(skb, TCA_SKBMOD_PARMS, sizeof(opt), &opt)) goto nla_put_failure; @@ -269,10 +267,10 @@ static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a, if (nla_put_64bit(skb, TCA_SKBMOD_TM, sizeof(t), &t, TCA_SKBMOD_PAD)) goto nla_put_failure; - spin_unlock_bh(&d->tcf_lock); + rcu_read_unlock(); return skb->len; nla_put_failure: - spin_unlock_bh(&d->tcf_lock); + rcu_read_unlock(); nlmsg_trim(skb, b); return -1; } diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 2cef4b08befb..876b30c5709e 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -29,13 +29,11 @@ TC_INDIRECT_SCOPE int tunnel_key_act(struct sk_buff *skb, { struct tcf_tunnel_key *t = to_tunnel_key(a); struct tcf_tunnel_key_params *params; - int action; params = rcu_dereference_bh(t->params); tcf_lastuse_update(&t->tcf_tm); tcf_action_update_bstats(&t->common, skb); - action = READ_ONCE(t->tcf_action); switch (params->tcft_action) { case TCA_TUNNEL_KEY_ACT_RELEASE: @@ -51,7 +49,7 @@ TC_INDIRECT_SCOPE int tunnel_key_act(struct sk_buff *skb, break; } - return action; + return params->action; } static const struct nla_policy @@ -532,6 +530,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, params_new->tcft_action = parm->t_action; params_new->tcft_enc_metadata = metadata; + params_new->action = parm->action; spin_lock_bh(&t->tcf_lock); goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); params_new = rcu_replace_pointer(t->params, params_new, @@ -726,10 +725,9 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a, }; struct tcf_t tm; - spin_lock_bh(&t->tcf_lock); - params = rcu_dereference_protected(t->params, - lockdep_is_held(&t->tcf_lock)); - opt.action = t->tcf_action; + rcu_read_lock(); + params = rcu_dereference(t->params); + opt.action = params->action; opt.t_action = params->tcft_action; if (nla_put(skb, TCA_TUNNEL_KEY_PARMS, sizeof(opt), &opt)) @@ -766,12 +764,12 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a, if (nla_put_64bit(skb, TCA_TUNNEL_KEY_TM, sizeof(tm), &tm, TCA_TUNNEL_KEY_PAD)) goto nla_put_failure; - spin_unlock_bh(&t->tcf_lock); + rcu_read_unlock(); return skb->len; nla_put_failure: - spin_unlock_bh(&t->tcf_lock); + rcu_read_unlock(); nlmsg_trim(skb, b); return -1; } diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 383bf18b6862..a74621797d69 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -25,7 +25,6 @@ TC_INDIRECT_SCOPE int tcf_vlan_act(struct sk_buff *skb, { struct tcf_vlan *v = to_vlan(a); struct tcf_vlan_params *p; - int action; int err; u16 tci; @@ -38,8 +37,6 @@ TC_INDIRECT_SCOPE int tcf_vlan_act(struct sk_buff *skb, if (skb_at_tc_ingress(skb)) skb_push_rcsum(skb, skb->mac_len); - action = READ_ONCE(v->tcf_action); - p = rcu_dereference_bh(v->vlan_p); switch (p->tcfv_action) { @@ -97,7 +94,7 @@ out: skb_pull_rcsum(skb, skb->mac_len); skb_reset_mac_len(skb); - return action; + return p->action; drop: tcf_action_inc_drop_qstats(&v->common); @@ -255,6 +252,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, ETH_ALEN); } + p->action = parm->action; spin_lock_bh(&v->tcf_lock); goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); p = rcu_replace_pointer(v->vlan_p, p, lockdep_is_held(&v->tcf_lock)); @@ -297,9 +295,9 @@ static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a, }; struct tcf_t t; - spin_lock_bh(&v->tcf_lock); - opt.action = v->tcf_action; - p = rcu_dereference_protected(v->vlan_p, lockdep_is_held(&v->tcf_lock)); + rcu_read_lock(); + p = rcu_dereference(v->vlan_p); + opt.action = p->action; opt.v_action = p->tcfv_action; if (nla_put(skb, TCA_VLAN_PARMS, sizeof(opt), &opt)) goto nla_put_failure; @@ -325,12 +323,12 @@ static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a, tcf_tm_dump(&t, &v->tcf_tm); if (nla_put_64bit(skb, TCA_VLAN_TM, sizeof(t), &t, TCA_VLAN_PAD)) goto nla_put_failure; - spin_unlock_bh(&v->tcf_lock); + rcu_read_unlock(); return skb->len; nla_put_failure: - spin_unlock_bh(&v->tcf_lock); + rcu_read_unlock(); nlmsg_trim(skb, b); return -1; } diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index d7c767b861a4..1e058b46d3e1 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -431,7 +431,7 @@ struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) { if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) && - !memcmp(&rtab->data, nla_data(tab), 1024)) { + !memcmp(&rtab->data, nla_data(tab), TC_RTAB_SIZE)) { rtab->refcnt++; return rtab; } @@ -441,7 +441,7 @@ struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, if (rtab) { rtab->rate = *r; rtab->refcnt = 1; - memcpy(rtab->data, nla_data(tab), 1024); + memcpy(rtab->data, nla_data(tab), TC_RTAB_SIZE); if (r->linklayer == TC_LINKLAYER_UNAWARE) r->linklayer = __detect_linklayer(r, rtab->data); rtab->next = qdisc_rtab_list; diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig index 24d5a35ce894..e947646a380c 100644 --- a/net/sctp/Kconfig +++ b/net/sctp/Kconfig @@ -7,9 +7,9 @@ menuconfig IP_SCTP tristate "The SCTP Protocol" depends on INET depends on IPV6 || IPV6=n - select CRYPTO - select CRYPTO_HMAC - select CRYPTO_SHA1 + select CRYPTO_LIB_SHA1 + select CRYPTO_LIB_SHA256 + select CRYPTO_LIB_UTILS select NET_CRC32C select NET_UDP_TUNNEL help @@ -49,46 +49,25 @@ config SCTP_DBG_OBJCNT 'cat /proc/net/sctp/sctp_dbg_objcnt' If unsure, say N + choice - prompt "Default SCTP cookie HMAC encoding" - default SCTP_DEFAULT_COOKIE_HMAC_MD5 + prompt "Default SCTP cookie authentication method" + default SCTP_DEFAULT_COOKIE_HMAC_SHA256 help - This option sets the default sctp cookie hmac algorithm - when in doubt select 'md5' + This option sets the default SCTP cookie authentication method, for + when a method hasn't been explicitly selected via the + net.sctp.cookie_hmac_alg sysctl. -config SCTP_DEFAULT_COOKIE_HMAC_MD5 - bool "Enable optional MD5 hmac cookie generation" - help - Enable optional MD5 hmac based SCTP cookie generation - select SCTP_COOKIE_HMAC_MD5 + If unsure, choose the default (HMAC-SHA256). -config SCTP_DEFAULT_COOKIE_HMAC_SHA1 - bool "Enable optional SHA1 hmac cookie generation" - help - Enable optional SHA1 hmac based SCTP cookie generation - select SCTP_COOKIE_HMAC_SHA1 +config SCTP_DEFAULT_COOKIE_HMAC_SHA256 + bool "HMAC-SHA256" config SCTP_DEFAULT_COOKIE_HMAC_NONE - bool "Use no hmac alg in SCTP cookie generation" - help - Use no hmac algorithm in SCTP cookie generation + bool "None" endchoice -config SCTP_COOKIE_HMAC_MD5 - bool "Enable optional MD5 hmac cookie generation" - help - Enable optional MD5 hmac based SCTP cookie generation - select CRYPTO_HMAC if SCTP_COOKIE_HMAC_MD5 - select CRYPTO_MD5 if SCTP_COOKIE_HMAC_MD5 - -config SCTP_COOKIE_HMAC_SHA1 - bool "Enable optional SHA1 hmac cookie generation" - help - Enable optional SHA1 hmac based SCTP cookie generation - select CRYPTO_HMAC if SCTP_COOKIE_HMAC_SHA1 - select CRYPTO_SHA1 if SCTP_COOKIE_HMAC_SHA1 - config INET_SCTP_DIAG depends on INET_DIAG def_tristate INET_DIAG diff --git a/net/sctp/auth.c b/net/sctp/auth.c index c58fffc86a0c..82aad477590e 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -12,36 +12,37 @@ * Vlad Yasevich <vladislav.yasevich@hp.com> */ -#include <crypto/hash.h> +#include <crypto/sha1.h> +#include <crypto/sha2.h> #include <linux/slab.h> #include <linux/types.h> -#include <linux/scatterlist.h> #include <net/sctp/sctp.h> #include <net/sctp/auth.h> -static struct sctp_hmac sctp_hmac_list[SCTP_AUTH_NUM_HMACS] = { +static const struct sctp_hmac sctp_hmac_list[SCTP_AUTH_NUM_HMACS] = { { /* id 0 is reserved. as all 0 */ .hmac_id = SCTP_AUTH_HMAC_ID_RESERVED_0, }, { .hmac_id = SCTP_AUTH_HMAC_ID_SHA1, - .hmac_name = "hmac(sha1)", - .hmac_len = SCTP_SHA1_SIG_SIZE, + .hmac_len = SHA1_DIGEST_SIZE, }, { /* id 2 is reserved as well */ .hmac_id = SCTP_AUTH_HMAC_ID_RESERVED_2, }, -#if IS_ENABLED(CONFIG_CRYPTO_SHA256) { .hmac_id = SCTP_AUTH_HMAC_ID_SHA256, - .hmac_name = "hmac(sha256)", - .hmac_len = SCTP_SHA256_SIG_SIZE, + .hmac_len = SHA256_DIGEST_SIZE, } -#endif }; +static bool sctp_hmac_supported(__u16 hmac_id) +{ + return hmac_id < ARRAY_SIZE(sctp_hmac_list) && + sctp_hmac_list[hmac_id].hmac_len != 0; +} void sctp_auth_key_put(struct sctp_auth_bytes *key) { @@ -444,76 +445,7 @@ struct sctp_shared_key *sctp_auth_get_shkey( return NULL; } -/* - * Initialize all the possible digest transforms that we can use. Right - * now, the supported digests are SHA1 and SHA256. We do this here once - * because of the restrictiong that transforms may only be allocated in - * user context. This forces us to pre-allocated all possible transforms - * at the endpoint init time. - */ -int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp) -{ - struct crypto_shash *tfm = NULL; - __u16 id; - - /* If the transforms are already allocated, we are done */ - if (ep->auth_hmacs) - return 0; - - /* Allocated the array of pointers to transorms */ - ep->auth_hmacs = kcalloc(SCTP_AUTH_NUM_HMACS, - sizeof(struct crypto_shash *), - gfp); - if (!ep->auth_hmacs) - return -ENOMEM; - - for (id = 0; id < SCTP_AUTH_NUM_HMACS; id++) { - - /* See is we support the id. Supported IDs have name and - * length fields set, so that we can allocated and use - * them. We can safely just check for name, for without the - * name, we can't allocate the TFM. - */ - if (!sctp_hmac_list[id].hmac_name) - continue; - - /* If this TFM has been allocated, we are all set */ - if (ep->auth_hmacs[id]) - continue; - - /* Allocate the ID */ - tfm = crypto_alloc_shash(sctp_hmac_list[id].hmac_name, 0, 0); - if (IS_ERR(tfm)) - goto out_err; - - ep->auth_hmacs[id] = tfm; - } - - return 0; - -out_err: - /* Clean up any successful allocations */ - sctp_auth_destroy_hmacs(ep->auth_hmacs); - ep->auth_hmacs = NULL; - return -ENOMEM; -} - -/* Destroy the hmac tfm array */ -void sctp_auth_destroy_hmacs(struct crypto_shash *auth_hmacs[]) -{ - int i; - - if (!auth_hmacs) - return; - - for (i = 0; i < SCTP_AUTH_NUM_HMACS; i++) { - crypto_free_shash(auth_hmacs[i]); - } - kfree(auth_hmacs); -} - - -struct sctp_hmac *sctp_auth_get_hmac(__u16 hmac_id) +const struct sctp_hmac *sctp_auth_get_hmac(__u16 hmac_id) { return &sctp_hmac_list[hmac_id]; } @@ -521,7 +453,8 @@ struct sctp_hmac *sctp_auth_get_hmac(__u16 hmac_id) /* Get an hmac description information that we can use to build * the AUTH chunk */ -struct sctp_hmac *sctp_auth_asoc_get_hmac(const struct sctp_association *asoc) +const struct sctp_hmac * +sctp_auth_asoc_get_hmac(const struct sctp_association *asoc) { struct sctp_hmac_algo_param *hmacs; __u16 n_elt; @@ -543,26 +476,10 @@ struct sctp_hmac *sctp_auth_asoc_get_hmac(const struct sctp_association *asoc) sizeof(struct sctp_paramhdr)) >> 1; for (i = 0; i < n_elt; i++) { id = ntohs(hmacs->hmac_ids[i]); - - /* Check the id is in the supported range. And - * see if we support the id. Supported IDs have name and - * length fields set, so that we can allocate and use - * them. We can safely just check for name, for without the - * name, we can't allocate the TFM. - */ - if (id > SCTP_AUTH_HMAC_ID_MAX || - !sctp_hmac_list[id].hmac_name) { - id = 0; - continue; - } - - break; + if (sctp_hmac_supported(id)) + return &sctp_hmac_list[id]; } - - if (id == 0) - return NULL; - - return &sctp_hmac_list[id]; + return NULL; } static int __sctp_auth_find_hmacid(__be16 *hmacs, int n_elts, __be16 hmac_id) @@ -606,7 +523,6 @@ int sctp_auth_asoc_verify_hmac_id(const struct sctp_association *asoc, void sctp_auth_asoc_set_default_hmac(struct sctp_association *asoc, struct sctp_hmac_algo_param *hmacs) { - struct sctp_endpoint *ep; __u16 id; int i; int n_params; @@ -617,16 +533,9 @@ void sctp_auth_asoc_set_default_hmac(struct sctp_association *asoc, n_params = (ntohs(hmacs->param_hdr.length) - sizeof(struct sctp_paramhdr)) >> 1; - ep = asoc->ep; for (i = 0; i < n_params; i++) { id = ntohs(hmacs->hmac_ids[i]); - - /* Check the id is in the supported range */ - if (id > SCTP_AUTH_HMAC_ID_MAX) - continue; - - /* If this TFM has been allocated, use this id */ - if (ep->auth_hmacs[id]) { + if (sctp_hmac_supported(id)) { asoc->default_hmac_id = id; break; } @@ -709,10 +618,9 @@ void sctp_auth_calculate_hmac(const struct sctp_association *asoc, struct sctp_shared_key *ep_key, gfp_t gfp) { struct sctp_auth_bytes *asoc_key; - struct crypto_shash *tfm; __u16 key_id, hmac_id; - unsigned char *end; int free_key = 0; + size_t data_len; __u8 *digest; /* Extract the info we need: @@ -733,19 +641,17 @@ void sctp_auth_calculate_hmac(const struct sctp_association *asoc, free_key = 1; } - /* set up scatter list */ - end = skb_tail_pointer(skb); - - tfm = asoc->ep->auth_hmacs[hmac_id]; - + data_len = skb_tail_pointer(skb) - (unsigned char *)auth; digest = (u8 *)(&auth->auth_hdr + 1); - if (crypto_shash_setkey(tfm, &asoc_key->data[0], asoc_key->len)) - goto free; - - crypto_shash_tfm_digest(tfm, (u8 *)auth, end - (unsigned char *)auth, - digest); + if (hmac_id == SCTP_AUTH_HMAC_ID_SHA1) { + hmac_sha1_usingrawkey(asoc_key->data, asoc_key->len, + (const u8 *)auth, data_len, digest); + } else { + WARN_ON_ONCE(hmac_id != SCTP_AUTH_HMAC_ID_SHA256); + hmac_sha256_usingrawkey(asoc_key->data, asoc_key->len, + (const u8 *)auth, data_len, digest); + } -free: if (free_key) sctp_auth_key_put(asoc_key); } @@ -788,14 +694,11 @@ int sctp_auth_ep_set_hmacs(struct sctp_endpoint *ep, for (i = 0; i < hmacs->shmac_num_idents; i++) { id = hmacs->shmac_idents[i]; - if (id > SCTP_AUTH_HMAC_ID_MAX) + if (!sctp_hmac_supported(id)) return -EOPNOTSUPP; if (SCTP_AUTH_HMAC_ID_SHA1 == id) has_sha1 = 1; - - if (!sctp_hmac_list[id].hmac_name) - return -EOPNOTSUPP; } if (!has_sha1) @@ -1021,8 +924,6 @@ int sctp_auth_deact_key_id(struct sctp_endpoint *ep, int sctp_auth_init(struct sctp_endpoint *ep, gfp_t gfp) { - int err = -ENOMEM; - /* Allocate space for HMACS and CHUNKS authentication * variables. There are arrays that we encode directly * into parameters to make the rest of the operations easier. @@ -1060,13 +961,6 @@ int sctp_auth_init(struct sctp_endpoint *ep, gfp_t gfp) ep->auth_chunk_list = auth_chunks; } - /* Allocate and initialize transorms arrays for supported - * HMACs. - */ - err = sctp_auth_init_hmacs(ep, gfp); - if (err) - goto nomem; - return 0; nomem: @@ -1075,7 +969,7 @@ nomem: kfree(ep->auth_chunk_list); ep->auth_hmacs_list = NULL; ep->auth_chunk_list = NULL; - return err; + return -ENOMEM; } void sctp_auth_free(struct sctp_endpoint *ep) @@ -1084,6 +978,4 @@ void sctp_auth_free(struct sctp_endpoint *ep) kfree(ep->auth_chunk_list); ep->auth_hmacs_list = NULL; ep->auth_chunk_list = NULL; - sctp_auth_destroy_hmacs(ep->auth_hmacs); - ep->auth_hmacs = NULL; } diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index fd4f8243cc35..c655b571ca01 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -184,7 +184,8 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, * DATA. */ if (sctp_auth_send_cid(SCTP_CID_DATA, asoc)) { - struct sctp_hmac *hmac_desc = sctp_auth_asoc_get_hmac(asoc); + const struct sctp_hmac *hmac_desc = + sctp_auth_asoc_get_hmac(asoc); if (hmac_desc) max_data -= SCTP_PAD4(sizeof(struct sctp_auth_chunk) + diff --git a/net/sctp/diag.c b/net/sctp/diag.c index 23359e522273..996c2018f0e6 100644 --- a/net/sctp/diag.c +++ b/net/sctp/diag.c @@ -173,7 +173,7 @@ static int inet_sctp_diag_fill(struct sock *sk, struct sctp_association *asoc, mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued; mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc); mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len); - mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops); + mem[SK_MEMINFO_DROPS] = sk_drops_read(sk); if (nla_put(skb, INET_DIAG_SKMEMINFO, sizeof(mem), &mem) < 0) goto errout; diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 7e77b450697c..31e989dfe846 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -35,6 +35,15 @@ /* Forward declarations for internal helpers. */ static void sctp_endpoint_bh_rcv(struct work_struct *work); +static void gen_cookie_auth_key(struct hmac_sha256_key *key) +{ + u8 raw_key[SCTP_COOKIE_KEY_SIZE]; + + get_random_bytes(raw_key, sizeof(raw_key)); + hmac_sha256_preparekey(key, raw_key, sizeof(raw_key)); + memzero_explicit(raw_key, sizeof(raw_key)); +} + /* * Initialize the base fields of the endpoint structure. */ @@ -45,10 +54,6 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, struct net *net = sock_net(sk); struct sctp_shared_key *null_key; - ep->digest = kzalloc(SCTP_SIGNATURE_SIZE, gfp); - if (!ep->digest) - return NULL; - ep->asconf_enable = net->sctp.addip_enable; ep->auth_enable = net->sctp.auth_enable; if (ep->auth_enable) { @@ -90,8 +95,8 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, /* Get the receive buffer policy for this endpoint */ ep->rcvbuf_policy = net->sctp.rcvbuf_policy; - /* Initialize the secret key used with cookie. */ - get_random_bytes(ep->secret_key, sizeof(ep->secret_key)); + /* Generate the cookie authentication key. */ + gen_cookie_auth_key(&ep->cookie_auth_key); /* SCTP-AUTH extensions*/ INIT_LIST_HEAD(&ep->endpoint_shared_keys); @@ -118,7 +123,6 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, nomem_shkey: sctp_auth_free(ep); nomem: - kfree(ep->digest); return NULL; } @@ -205,9 +209,6 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep) return; } - /* Free the digest buffer */ - kfree(ep->digest); - /* SCTP-AUTH: Free up AUTH releated data such as shared keys * chunks and hmacs arrays that were allocated */ @@ -218,7 +219,7 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep) sctp_inq_free(&ep->base.inqueue); sctp_bind_addr_free(&ep->base.bind_addr); - memset(ep->secret_key, 0, sizeof(ep->secret_key)); + memzero_explicit(&ep->cookie_auth_key, sizeof(ep->cookie_auth_key)); sk = ep->base.sk; /* Remove and free the port */ diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index a5ccada55f2b..9dbc24af749b 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -34,6 +34,7 @@ #include <linux/memblock.h> #include <linux/highmem.h> #include <linux/slab.h> +#include <net/flow.h> #include <net/net_namespace.h> #include <net/protocol.h> #include <net/ip.h> @@ -437,7 +438,7 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr, fl4->fl4_dport = daddr->v4.sin_port; fl4->flowi4_proto = IPPROTO_SCTP; if (asoc) { - fl4->flowi4_tos = inet_dscp_to_dsfield(dscp); + fl4->flowi4_dscp = dscp; fl4->flowi4_scope = ip_sock_rt_scope(asoc->base.sk); fl4->flowi4_oif = asoc->base.sk->sk_bound_dev_if; fl4->fl4_sport = htons(asoc->base.bind_addr.port); @@ -1334,14 +1335,9 @@ static int __net_init sctp_defaults_init(struct net *net) /* Whether Cookie Preservative is enabled(1) or not(0) */ net->sctp.cookie_preserve_enable = 1; - /* Default sctp sockets to use md5 as their hmac alg */ -#if defined (CONFIG_SCTP_DEFAULT_COOKIE_HMAC_MD5) - net->sctp.sctp_hmac_alg = "md5"; -#elif defined (CONFIG_SCTP_DEFAULT_COOKIE_HMAC_SHA1) - net->sctp.sctp_hmac_alg = "sha1"; -#else - net->sctp.sctp_hmac_alg = NULL; -#endif + /* Whether cookie authentication is enabled(1) or not(0) */ + net->sctp.cookie_auth_enable = + !IS_ENABLED(CONFIG_SCTP_DEFAULT_COOKIE_HMAC_NONE); /* Max.Burst - 4 */ net->sctp.max_burst = SCTP_DEFAULT_MAX_BURST; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 3ead591c72fd..2c0017d058d4 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -30,7 +30,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include <crypto/hash.h> +#include <crypto/utils.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/ip.h> @@ -1319,7 +1319,7 @@ struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc, __u16 key_id) { struct sctp_authhdr auth_hdr; - struct sctp_hmac *hmac_desc; + const struct sctp_hmac *hmac_desc; struct sctp_chunk *retval; /* Get the first hmac that the peer told us to use */ @@ -1674,8 +1674,10 @@ static struct sctp_cookie_param *sctp_pack_cookie( * out on the network. */ retval = kzalloc(*cookie_len, GFP_ATOMIC); - if (!retval) - goto nodata; + if (!retval) { + *cookie_len = 0; + return NULL; + } cookie = (struct sctp_signed_cookie *) retval->body; @@ -1706,26 +1708,14 @@ static struct sctp_cookie_param *sctp_pack_cookie( memcpy((__u8 *)(cookie + 1) + ntohs(init_chunk->chunk_hdr->length), raw_addrs, addrs_len); - if (sctp_sk(ep->base.sk)->hmac) { - struct crypto_shash *tfm = sctp_sk(ep->base.sk)->hmac; - int err; - - /* Sign the message. */ - err = crypto_shash_setkey(tfm, ep->secret_key, - sizeof(ep->secret_key)) ?: - crypto_shash_tfm_digest(tfm, (u8 *)&cookie->c, bodysize, - cookie->signature); - if (err) - goto free_cookie; + /* Sign the cookie, if cookie authentication is enabled. */ + if (sctp_sk(ep->base.sk)->cookie_auth_enable) { + static_assert(sizeof(cookie->mac) == SHA256_DIGEST_SIZE); + hmac_sha256(&ep->cookie_auth_key, (const u8 *)&cookie->c, + bodysize, cookie->mac); } return retval; - -free_cookie: - kfree(retval); -nodata: - *cookie_len = 0; - return NULL; } /* Unpack the cookie from COOKIE ECHO chunk, recreating the association. */ @@ -1740,7 +1730,6 @@ struct sctp_association *sctp_unpack_cookie( struct sctp_signed_cookie *cookie; struct sk_buff *skb = chunk->skb; struct sctp_cookie *bear_cookie; - __u8 *digest = ep->digest; enum sctp_scope scope; unsigned int len; ktime_t kt; @@ -1770,30 +1759,19 @@ struct sctp_association *sctp_unpack_cookie( cookie = chunk->subh.cookie_hdr; bear_cookie = &cookie->c; - if (!sctp_sk(ep->base.sk)->hmac) - goto no_hmac; + /* Verify the cookie's MAC, if cookie authentication is enabled. */ + if (sctp_sk(ep->base.sk)->cookie_auth_enable) { + u8 mac[SHA256_DIGEST_SIZE]; - /* Check the signature. */ - { - struct crypto_shash *tfm = sctp_sk(ep->base.sk)->hmac; - int err; - - err = crypto_shash_setkey(tfm, ep->secret_key, - sizeof(ep->secret_key)) ?: - crypto_shash_tfm_digest(tfm, (u8 *)bear_cookie, bodysize, - digest); - if (err) { - *error = -SCTP_IERROR_NOMEM; + hmac_sha256(&ep->cookie_auth_key, (const u8 *)bear_cookie, + bodysize, mac); + static_assert(sizeof(cookie->mac) == sizeof(mac)); + if (crypto_memneq(mac, cookie->mac, sizeof(mac))) { + *error = -SCTP_IERROR_BAD_SIG; goto fail; } } - if (memcmp(digest, cookie->signature, SCTP_SIGNATURE_SIZE)) { - *error = -SCTP_IERROR_BAD_SIG; - goto fail; - } - -no_hmac: /* IG Section 2.35.2: * 3) Compare the port numbers and the verification tag contained * within the COOKIE ECHO chunk to the actual port numbers and the diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index a0524ba8d787..4cb8f393434d 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -30,6 +30,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <crypto/utils.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/ip.h> @@ -4361,7 +4362,7 @@ static enum sctp_ierror sctp_sf_authenticate( struct sctp_shared_key *sh_key = NULL; struct sctp_authhdr *auth_hdr; __u8 *save_digest, *digest; - struct sctp_hmac *hmac; + const struct sctp_hmac *hmac; unsigned int sig_len; __u16 key_id; @@ -4416,7 +4417,7 @@ static enum sctp_ierror sctp_sf_authenticate( sh_key, GFP_ATOMIC); /* Discard the packet if the digests do not match */ - if (memcmp(save_digest, digest, sig_len)) { + if (crypto_memneq(save_digest, digest, sig_len)) { kfree(save_digest); return SCTP_IERROR_BAD_SIG; } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 4921416434f9..ed8293a34240 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -37,7 +37,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include <crypto/hash.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/wait.h> @@ -4987,7 +4986,7 @@ static int sctp_init_sock(struct sock *sk) sp->default_rcv_context = 0; sp->max_burst = net->sctp.max_burst; - sp->sctp_hmac_alg = net->sctp.sctp_hmac_alg; + sp->cookie_auth_enable = net->sctp.cookie_auth_enable; /* Initialize default setup parameters. These parameters * can be modified with the SCTP_INITMSG socket option or @@ -5079,8 +5078,6 @@ static int sctp_init_sock(struct sock *sk) if (!sp->ep) return -ENOMEM; - sp->hmac = NULL; - sk->sk_destruct = sctp_destruct_sock; SCTP_DBG_OBJCNT_INC(sock); @@ -5117,18 +5114,8 @@ static void sctp_destroy_sock(struct sock *sk) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); } -/* Triggered when there are no references on the socket anymore */ -static void sctp_destruct_common(struct sock *sk) -{ - struct sctp_sock *sp = sctp_sk(sk); - - /* Free up the HMAC transform. */ - crypto_free_shash(sp->hmac); -} - static void sctp_destruct_sock(struct sock *sk) { - sctp_destruct_common(sk); inet_sock_destruct(sk); } @@ -8530,22 +8517,8 @@ static int sctp_listen_start(struct sock *sk, int backlog) { struct sctp_sock *sp = sctp_sk(sk); struct sctp_endpoint *ep = sp->ep; - struct crypto_shash *tfm = NULL; - char alg[32]; int err; - /* Allocate HMAC for generating cookie. */ - if (!sp->hmac && sp->sctp_hmac_alg) { - sprintf(alg, "hmac(%s)", sp->sctp_hmac_alg); - tfm = crypto_alloc_shash(alg, 0, 0); - if (IS_ERR(tfm)) { - net_info_ratelimited("failed to load transform for %s: %ld\n", - sp->sctp_hmac_alg, PTR_ERR(tfm)); - return -ENOSYS; - } - sctp_sk(sk)->hmac = tfm; - } - /* * If a bind() or sctp_bindx() is not called prior to a listen() * call that allows new associations to be accepted, the system @@ -9561,7 +9534,6 @@ static int sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, * copy. */ newsp->ep = newep; - newsp->hmac = NULL; /* Hook this new socket in to the bind_hash list. */ head = &sctp_port_hashtable[sctp_phashfn(sock_net(oldsk), @@ -9581,16 +9553,6 @@ static int sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, if (err) return err; - /* New ep's auth_hmacs should be set if old ep's is set, in case - * that net->sctp.auth_enable has been changed to 0 by users and - * new ep's auth_hmacs couldn't be set in sctp_endpoint_init(). - */ - if (oldsp->ep->auth_hmacs) { - err = sctp_auth_init_hmacs(newsp->ep, GFP_KERNEL); - if (err) - return err; - } - sctp_auto_asconf_init(newsp); /* Move any messages in the old socket's receive queue that are for the @@ -9723,7 +9685,6 @@ struct proto sctp_prot = { static void sctp_v6_destruct_sock(struct sock *sk) { - sctp_destruct_common(sk); inet6_sock_destruct(sk); } diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index ee3eac338a9d..15e7db9a3ab2 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -174,7 +174,7 @@ static struct ctl_table sctp_net_table[] = { }, { .procname = "cookie_hmac_alg", - .data = &init_net.sctp.sctp_hmac_alg, + .data = &init_net.sctp.cookie_auth_enable, .maxlen = 8, .mode = 0644, .proc_handler = proc_sctp_do_hmac_alg, @@ -388,10 +388,8 @@ static int proc_sctp_do_hmac_alg(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = container_of(ctl->data, struct net, - sctp.sctp_hmac_alg); + sctp.cookie_auth_enable); struct ctl_table tbl; - bool changed = false; - char *none = "none"; char tmp[8] = {0}; int ret; @@ -399,35 +397,26 @@ static int proc_sctp_do_hmac_alg(const struct ctl_table *ctl, int write, if (write) { tbl.data = tmp; - tbl.maxlen = sizeof(tmp); - } else { - tbl.data = net->sctp.sctp_hmac_alg ? : none; - tbl.maxlen = strlen(tbl.data); - } - - ret = proc_dostring(&tbl, write, buffer, lenp, ppos); - if (write && ret == 0) { -#ifdef CONFIG_CRYPTO_MD5 - if (!strncmp(tmp, "md5", 3)) { - net->sctp.sctp_hmac_alg = "md5"; - changed = true; + tbl.maxlen = sizeof(tmp) - 1; + ret = proc_dostring(&tbl, 1, buffer, lenp, ppos); + if (ret) + return ret; + if (!strcmp(tmp, "sha256")) { + net->sctp.cookie_auth_enable = 1; + return 0; } -#endif -#ifdef CONFIG_CRYPTO_SHA1 - if (!strncmp(tmp, "sha1", 4)) { - net->sctp.sctp_hmac_alg = "sha1"; - changed = true; + if (!strcmp(tmp, "none")) { + net->sctp.cookie_auth_enable = 0; + return 0; } -#endif - if (!strncmp(tmp, "none", 4)) { - net->sctp.sctp_hmac_alg = NULL; - changed = true; - } - if (!changed) - ret = -EINVAL; + return -EINVAL; } - - return ret; + if (net->sctp.cookie_auth_enable) + tbl.data = (char *)"sha256"; + else + tbl.data = (char *)"none"; + tbl.maxlen = strlen(tbl.data); + return proc_dostring(&tbl, 0, buffer, lenp, ppos); } static int proc_sctp_do_rto_min(const struct ctl_table *ctl, int write, diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index a42ef3f77b96..0052f02756eb 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -974,13 +974,17 @@ static int smc_ib_add_dev(struct ib_device *ibdev) smcibdev->pnetid[i])) smc_pnetid_by_table_ib(smcibdev, i + 1); smc_copy_netdev_ifindex(smcibdev, i); - pr_warn_ratelimited("smc: ib device %s port %d has pnetid " - "%.16s%s\n", - smcibdev->ibdev->name, i + 1, - smcibdev->pnetid[i], - smcibdev->pnetid_by_user[i] ? - " (user defined)" : - ""); + if (smc_pnet_is_pnetid_set(smcibdev->pnetid[i])) + pr_warn_ratelimited("smc: ib device %s port %d has pnetid %.16s%s\n", + smcibdev->ibdev->name, i + 1, + smcibdev->pnetid[i], + smcibdev->pnetid_by_user[i] ? + " (user defined)" : + ""); + else + pr_warn_ratelimited("smc: ib device %s port %d has no pnetid\n", + smcibdev->ibdev->name, i + 1); + } schedule_work(&smcibdev->port_event_work); return 0; diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c index 84f98e18c7db..a58ffb7a0610 100644 --- a/net/smc/smc_ism.c +++ b/net/smc/smc_ism.c @@ -518,10 +518,15 @@ static void smcd_register_dev(struct ism_dev *ism) } mutex_unlock(&smcd_dev_list.mutex); - pr_warn_ratelimited("smc: adding smcd device %s with pnetid %.16s%s\n", - dev_name(&ism->dev), smcd->pnetid, - smcd->pnetid_by_user ? " (user defined)" : ""); - + if (smc_pnet_is_pnetid_set(smcd->pnetid)) + pr_warn_ratelimited("smc: adding smcd device %s with pnetid %.16s%s\n", + dev_name(&ism->dev), smcd->pnetid, + smcd->pnetid_by_user ? + " (user defined)" : + ""); + else + pr_warn_ratelimited("smc: adding smcd device %s without pnetid\n", + dev_name(&ism->dev)); return; } diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 76ad29e31d60..b90337f86e83 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -450,7 +450,7 @@ static int smc_pnet_add_ib(struct smc_pnettable *pnettable, char *ib_name, return -ENOMEM; new_pe->type = SMC_PNET_IB; memcpy(new_pe->pnet_name, pnet_name, SMC_MAX_PNETID_LEN); - strncpy(new_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX); + strscpy(new_pe->ib_name, ib_name); new_pe->ib_port = ib_port; new_ibdev = true; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index e028bf658499..1574a83384f8 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2366,7 +2366,7 @@ static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb, else if (sk_rmem_alloc_get(sk) + skb->truesize >= limit) { trace_tipc_sk_dump(sk, skb, TIPC_DUMP_ALL, "err_overload2!"); - atomic_inc(&sk->sk_drops); + sk_drops_inc(sk); err = TIPC_ERR_OVERLOAD; } @@ -2458,7 +2458,7 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, trace_tipc_sk_dump(sk, skb, TIPC_DUMP_ALL, "err_overload!"); /* Overload => reject message back to sender */ onode = tipc_own_addr(sock_net(sk)); - atomic_inc(&sk->sk_drops); + sk_drops_inc(sk); if (tipc_msg_reverse(onode, &skb, TIPC_ERR_OVERLOAD)) { trace_tipc_sk_rej_msg(sk, skb, TIPC_DUMP_ALL, "@sk_enqueue!"); @@ -3657,7 +3657,7 @@ int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct netlink_callback *cb, nla_put_u32(skb, TIPC_NLA_SOCK_STAT_SENDQ, skb_queue_len(&sk->sk_write_queue)) || nla_put_u32(skb, TIPC_NLA_SOCK_STAT_DROP, - atomic_read(&sk->sk_drops))) + sk_drops_read(sk))) goto stat_msg_cancel; if (tsk->cong_link_cnt && diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index bebb355f3ffe..0538948d5fd9 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1029,12 +1029,7 @@ static int vsock_getname(struct socket *sock, vm_addr = &vsk->local_addr; } - /* sys_getsockname() and sys_getpeername() pass us a - * MAX_SOCK_ADDR-sized buffer and don't set addr_len. Unfortunately - * that macro is defined in socket.c instead of .h, so we hardcode its - * value here. - */ - BUILD_BUG_ON(sizeof(*vm_addr) > 128); + BUILD_BUG_ON(sizeof(*vm_addr) > sizeof(struct sockaddr_storage)); memcpy(addr, vm_addr, sizeof(*vm_addr)); err = sizeof(*vm_addr); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index c5035a9bc3bb..62486f866975 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2594,7 +2594,7 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl, static dscp_t xfrm_get_dscp(const struct flowi *fl, int family) { if (family == AF_INET) - return inet_dsfield_to_dscp(fl->u.ip4.flowi4_tos); + return fl->u.ip4.flowi4_dscp; return 0; } @@ -3462,7 +3462,7 @@ decode_session4(const struct xfrm_flow_keys *flkeys, struct flowi *fl, bool reve } fl4->flowi4_proto = flkeys->basic.ip_proto; - fl4->flowi4_tos = flkeys->ip.tos & ~INET_ECN_MASK; + fl4->flowi4_dscp = inet_dsfield_to_dscp(flkeys->ip.tos); } #if IS_ENABLED(CONFIG_IPV6) @@ -3594,7 +3594,7 @@ static bool xfrm_icmp_flow_decode(struct sk_buff *skb, unsigned short family, fl1->flowi_oif = fl->flowi_oif; fl1->flowi_mark = fl->flowi_mark; - fl1->flowi_tos = fl->flowi_tos; + fl1->flowi_dscp = fl->flowi_dscp; nf_nat_decode_session(newskb, fl1, family); ret = false; @@ -3881,12 +3881,18 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) } skb_dst_force(skb); - if (!skb_dst(skb)) { + dst = skb_dst(skb); + if (!dst) { XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR); return 0; } - dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, XFRM_LOOKUP_QUEUE); + /* ignore return value from skb_dstref_steal, xfrm_lookup takes + * care of dropping the refcnt if needed. + */ + skb_dstref_steal(skb); + + dst = xfrm_lookup(net, dst, &fl, NULL, XFRM_LOOKUP_QUEUE); if (IS_ERR(dst)) { res = 0; dst = NULL; diff --git a/rust/kernel/net/phy.rs b/rust/kernel/net/phy.rs index 7de5cc7a0eee..c895582cd624 100644 --- a/rust/kernel/net/phy.rs +++ b/rust/kernel/net/phy.rs @@ -196,11 +196,8 @@ impl Device { // SAFETY: `phydev` is pointing to a valid object by the type invariant of `Self`. // So it's just an FFI call. let ret = unsafe { bindings::phy_read_paged(phydev, page.into(), regnum.into()) }; - if ret < 0 { - Err(Error::from_errno(ret)) - } else { - Ok(ret as u16) - } + + to_result(ret).map(|()| ret as u16) } /// Resolves the advertisements into PHY settings. diff --git a/scripts/headers_install.sh b/scripts/headers_install.sh index 6bbccb43f7e7..4c20c62c4faf 100755 --- a/scripts/headers_install.sh +++ b/scripts/headers_install.sh @@ -32,7 +32,7 @@ fi sed -E -e ' s/([[:space:](])(__user|__force|__iomem)[[:space:]]/\1/g s/__attribute_const__([[:space:]]|$)/\1/g - s@^#include <linux/compiler(|_types).h>@@ + s@^#include <linux/compiler.h>@@ s/(^|[^a-zA-Z0-9])__packed([^a-zA-Z0-9_]|$)/\1__attribute__((packed))\2/g s/(^|[[:space:](])(inline|asm|volatile)([[:space:](]|$)/\1__\2__\3/g s@#(ifndef|define|endif[[:space:]]*/[*])[[:space:]]*_UAPI@#\1 @ diff --git a/tools/net/ynl/pyynl/lib/__init__.py b/tools/net/ynl/pyynl/lib/__init__.py index 71518b9842ee..5f266ebe4526 100644 --- a/tools/net/ynl/pyynl/lib/__init__.py +++ b/tools/net/ynl/pyynl/lib/__init__.py @@ -4,6 +4,8 @@ from .nlspec import SpecAttr, SpecAttrSet, SpecEnumEntry, SpecEnumSet, \ SpecFamily, SpecOperation, SpecSubMessage, SpecSubMessageFormat from .ynl import YnlFamily, Netlink, NlError +from .doc_generator import YnlDocGenerator + __all__ = ["SpecAttr", "SpecAttrSet", "SpecEnumEntry", "SpecEnumSet", "SpecFamily", "SpecOperation", "SpecSubMessage", "SpecSubMessageFormat", "YnlFamily", "Netlink", "NlError"] diff --git a/tools/net/ynl/pyynl/lib/doc_generator.py b/tools/net/ynl/pyynl/lib/doc_generator.py new file mode 100644 index 000000000000..403abf1a2eda --- /dev/null +++ b/tools/net/ynl/pyynl/lib/doc_generator.py @@ -0,0 +1,398 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# -*- coding: utf-8; mode: python -*- + +""" + Class to auto generate the documentation for Netlink specifications. + + :copyright: Copyright (C) 2023 Breno Leitao <leitao@debian.org> + :license: GPL Version 2, June 1991 see linux/COPYING for details. + + This class performs extensive parsing to the Linux kernel's netlink YAML + spec files, in an effort to avoid needing to heavily mark up the original + YAML file. + + This code is split in two classes: + 1) RST formatters: Use to convert a string to a RST output + 2) YAML Netlink (YNL) doc generator: Generate docs from YAML data +""" + +from typing import Any, Dict, List +import yaml + +LINE_STR = '__lineno__' + +class NumberedSafeLoader(yaml.SafeLoader): # pylint: disable=R0901 + """Override the SafeLoader class to add line number to parsed data""" + + def construct_mapping(self, node, *args, **kwargs): + mapping = super().construct_mapping(node, *args, **kwargs) + mapping[LINE_STR] = node.start_mark.line + + return mapping + +class RstFormatters: + """RST Formatters""" + + SPACE_PER_LEVEL = 4 + + @staticmethod + def headroom(level: int) -> str: + """Return space to format""" + return " " * (level * RstFormatters.SPACE_PER_LEVEL) + + @staticmethod + def bold(text: str) -> str: + """Format bold text""" + return f"**{text}**" + + @staticmethod + def inline(text: str) -> str: + """Format inline text""" + return f"``{text}``" + + @staticmethod + def sanitize(text: str) -> str: + """Remove newlines and multiple spaces""" + # This is useful for some fields that are spread across multiple lines + return str(text).replace("\n", " ").strip() + + def rst_fields(self, key: str, value: str, level: int = 0) -> str: + """Return a RST formatted field""" + return self.headroom(level) + f":{key}: {value}" + + def rst_definition(self, key: str, value: Any, level: int = 0) -> str: + """Format a single rst definition""" + return self.headroom(level) + key + "\n" + self.headroom(level + 1) + str(value) + + def rst_paragraph(self, paragraph: str, level: int = 0) -> str: + """Return a formatted paragraph""" + return self.headroom(level) + paragraph + + def rst_bullet(self, item: str, level: int = 0) -> str: + """Return a formatted a bullet""" + return self.headroom(level) + f"- {item}" + + @staticmethod + def rst_subsection(title: str) -> str: + """Add a sub-section to the document""" + return f"{title}\n" + "-" * len(title) + + @staticmethod + def rst_subsubsection(title: str) -> str: + """Add a sub-sub-section to the document""" + return f"{title}\n" + "~" * len(title) + + @staticmethod + def rst_section(namespace: str, prefix: str, title: str) -> str: + """Add a section to the document""" + return f".. _{namespace}-{prefix}-{title}:\n\n{title}\n" + "=" * len(title) + + @staticmethod + def rst_subtitle(title: str) -> str: + """Add a subtitle to the document""" + return "\n" + "-" * len(title) + f"\n{title}\n" + "-" * len(title) + "\n\n" + + @staticmethod + def rst_title(title: str) -> str: + """Add a title to the document""" + return "=" * len(title) + f"\n{title}\n" + "=" * len(title) + "\n\n" + + def rst_list_inline(self, list_: List[str], level: int = 0) -> str: + """Format a list using inlines""" + return self.headroom(level) + "[" + ", ".join(self.inline(i) for i in list_) + "]" + + @staticmethod + def rst_ref(namespace: str, prefix: str, name: str) -> str: + """Add a hyperlink to the document""" + mappings = {'enum': 'definition', + 'fixed-header': 'definition', + 'nested-attributes': 'attribute-set', + 'struct': 'definition'} + if prefix in mappings: + prefix = mappings[prefix] + return f":ref:`{namespace}-{prefix}-{name}`" + + def rst_header(self) -> str: + """The headers for all the auto generated RST files""" + lines = [] + + lines.append(self.rst_paragraph(".. SPDX-License-Identifier: GPL-2.0")) + lines.append(self.rst_paragraph(".. NOTE: This document was auto-generated.\n\n")) + + return "\n".join(lines) + + @staticmethod + def rst_toctree(maxdepth: int = 2) -> str: + """Generate a toctree RST primitive""" + lines = [] + + lines.append(".. toctree::") + lines.append(f" :maxdepth: {maxdepth}\n\n") + + return "\n".join(lines) + + @staticmethod + def rst_label(title: str) -> str: + """Return a formatted label""" + return f".. _{title}:\n\n" + + @staticmethod + def rst_lineno(lineno: int) -> str: + """Return a lineno comment""" + return f".. LINENO {lineno}\n" + +class YnlDocGenerator: + """YAML Netlink specs Parser""" + + fmt = RstFormatters() + + def parse_mcast_group(self, mcast_group: List[Dict[str, Any]]) -> str: + """Parse 'multicast' group list and return a formatted string""" + lines = [] + for group in mcast_group: + lines.append(self.fmt.rst_bullet(group["name"])) + + return "\n".join(lines) + + def parse_do(self, do_dict: Dict[str, Any], level: int = 0) -> str: + """Parse 'do' section and return a formatted string""" + lines = [] + if LINE_STR in do_dict: + lines.append(self.fmt.rst_lineno(do_dict[LINE_STR])) + + for key in do_dict.keys(): + if key == LINE_STR: + continue + lines.append(self.fmt.rst_paragraph(self.fmt.bold(key), level + 1)) + if key in ['request', 'reply']: + lines.append(self.parse_do_attributes(do_dict[key], level + 1) + "\n") + else: + lines.append(self.fmt.headroom(level + 2) + do_dict[key] + "\n") + + return "\n".join(lines) + + def parse_do_attributes(self, attrs: Dict[str, Any], level: int = 0) -> str: + """Parse 'attributes' section""" + if "attributes" not in attrs: + return "" + lines = [self.fmt.rst_fields("attributes", + self.fmt.rst_list_inline(attrs["attributes"]), + level + 1)] + + return "\n".join(lines) + + def parse_operations(self, operations: List[Dict[str, Any]], namespace: str) -> str: + """Parse operations block""" + preprocessed = ["name", "doc", "title", "do", "dump", "flags"] + linkable = ["fixed-header", "attribute-set"] + lines = [] + + for operation in operations: + if LINE_STR in operation: + lines.append(self.fmt.rst_lineno(operation[LINE_STR])) + + lines.append(self.fmt.rst_section(namespace, 'operation', + operation["name"])) + lines.append(self.fmt.rst_paragraph(operation["doc"]) + "\n") + + for key in operation.keys(): + if key == LINE_STR: + continue + + if key in preprocessed: + # Skip the special fields + continue + value = operation[key] + if key in linkable: + value = self.fmt.rst_ref(namespace, key, value) + lines.append(self.fmt.rst_fields(key, value, 0)) + if 'flags' in operation: + lines.append(self.fmt.rst_fields('flags', + self.fmt.rst_list_inline(operation['flags']))) + + if "do" in operation: + lines.append(self.fmt.rst_paragraph(":do:", 0)) + lines.append(self.parse_do(operation["do"], 0)) + if "dump" in operation: + lines.append(self.fmt.rst_paragraph(":dump:", 0)) + lines.append(self.parse_do(operation["dump"], 0)) + + # New line after fields + lines.append("\n") + + return "\n".join(lines) + + def parse_entries(self, entries: List[Dict[str, Any]], level: int) -> str: + """Parse a list of entries""" + ignored = ["pad"] + lines = [] + for entry in entries: + if isinstance(entry, dict): + # entries could be a list or a dictionary + field_name = entry.get("name", "") + if field_name in ignored: + continue + type_ = entry.get("type") + if type_: + field_name += f" ({self.fmt.inline(type_)})" + lines.append( + self.fmt.rst_fields(field_name, + self.fmt.sanitize(entry.get("doc", "")), + level) + ) + elif isinstance(entry, list): + lines.append(self.fmt.rst_list_inline(entry, level)) + else: + lines.append(self.fmt.rst_bullet(self.fmt.inline(self.fmt.sanitize(entry)), + level)) + + lines.append("\n") + return "\n".join(lines) + + def parse_definitions(self, defs: Dict[str, Any], namespace: str) -> str: + """Parse definitions section""" + preprocessed = ["name", "entries", "members"] + ignored = ["render-max"] # This is not printed + lines = [] + + for definition in defs: + if LINE_STR in definition: + lines.append(self.fmt.rst_lineno(definition[LINE_STR])) + + lines.append(self.fmt.rst_section(namespace, 'definition', definition["name"])) + for k in definition.keys(): + if k == LINE_STR: + continue + if k in preprocessed + ignored: + continue + lines.append(self.fmt.rst_fields(k, self.fmt.sanitize(definition[k]), 0)) + + # Field list needs to finish with a new line + lines.append("\n") + if "entries" in definition: + lines.append(self.fmt.rst_paragraph(":entries:", 0)) + lines.append(self.parse_entries(definition["entries"], 1)) + if "members" in definition: + lines.append(self.fmt.rst_paragraph(":members:", 0)) + lines.append(self.parse_entries(definition["members"], 1)) + + return "\n".join(lines) + + def parse_attr_sets(self, entries: List[Dict[str, Any]], namespace: str) -> str: + """Parse attribute from attribute-set""" + preprocessed = ["name", "type"] + linkable = ["enum", "nested-attributes", "struct", "sub-message"] + ignored = ["checks"] + lines = [] + + for entry in entries: + lines.append(self.fmt.rst_section(namespace, 'attribute-set', + entry["name"])) + for attr in entry["attributes"]: + if LINE_STR in attr: + lines.append(self.fmt.rst_lineno(attr[LINE_STR])) + + type_ = attr.get("type") + attr_line = attr["name"] + if type_: + # Add the attribute type in the same line + attr_line += f" ({self.fmt.inline(type_)})" + + lines.append(self.fmt.rst_subsubsection(attr_line)) + + for k in attr.keys(): + if k == LINE_STR: + continue + if k in preprocessed + ignored: + continue + if k in linkable: + value = self.fmt.rst_ref(namespace, k, attr[k]) + else: + value = self.fmt.sanitize(attr[k]) + lines.append(self.fmt.rst_fields(k, value, 0)) + lines.append("\n") + + return "\n".join(lines) + + def parse_sub_messages(self, entries: List[Dict[str, Any]], namespace: str) -> str: + """Parse sub-message definitions""" + lines = [] + + for entry in entries: + lines.append(self.fmt.rst_section(namespace, 'sub-message', + entry["name"])) + for fmt in entry["formats"]: + value = fmt["value"] + + lines.append(self.fmt.rst_bullet(self.fmt.bold(value))) + for attr in ['fixed-header', 'attribute-set']: + if attr in fmt: + lines.append(self.fmt.rst_fields(attr, + self.fmt.rst_ref(namespace, + attr, + fmt[attr]), + 1)) + lines.append("\n") + + return "\n".join(lines) + + def parse_yaml(self, obj: Dict[str, Any]) -> str: + """Format the whole YAML into a RST string""" + lines = [] + + # Main header + lineno = obj.get('__lineno__', 0) + lines.append(self.fmt.rst_lineno(lineno)) + + family = obj['name'] + + lines.append(self.fmt.rst_header()) + lines.append(self.fmt.rst_label("netlink-" + family)) + + title = f"Family ``{family}`` netlink specification" + lines.append(self.fmt.rst_title(title)) + lines.append(self.fmt.rst_paragraph(".. contents:: :depth: 3\n")) + + if "doc" in obj: + lines.append(self.fmt.rst_subtitle("Summary")) + lines.append(self.fmt.rst_paragraph(obj["doc"], 0)) + + # Operations + if "operations" in obj: + lines.append(self.fmt.rst_subtitle("Operations")) + lines.append(self.parse_operations(obj["operations"]["list"], + family)) + + # Multicast groups + if "mcast-groups" in obj: + lines.append(self.fmt.rst_subtitle("Multicast groups")) + lines.append(self.parse_mcast_group(obj["mcast-groups"]["list"])) + + # Definitions + if "definitions" in obj: + lines.append(self.fmt.rst_subtitle("Definitions")) + lines.append(self.parse_definitions(obj["definitions"], family)) + + # Attributes set + if "attribute-sets" in obj: + lines.append(self.fmt.rst_subtitle("Attribute sets")) + lines.append(self.parse_attr_sets(obj["attribute-sets"], family)) + + # Sub-messages + if "sub-messages" in obj: + lines.append(self.fmt.rst_subtitle("Sub-messages")) + lines.append(self.parse_sub_messages(obj["sub-messages"], family)) + + return "\n".join(lines) + + # Main functions + # ============== + + def parse_yaml_file(self, filename: str) -> str: + """Transform the YAML specified by filename into an RST-formatted string""" + with open(filename, "r", encoding="utf-8") as spec_file: + numbered_yaml = yaml.load(spec_file, Loader=NumberedSafeLoader) + content = self.parse_yaml(numbered_yaml) + + return content diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py index eb295756c3bf..fb7e03805a11 100755 --- a/tools/net/ynl/pyynl/ynl_gen_c.py +++ b/tools/net/ynl/pyynl/ynl_gen_c.py @@ -556,7 +556,7 @@ class TypeBinary(Type): elif 'exact-len' in self.checks: mem = 'NLA_POLICY_EXACT_LEN(' + self.get_limit_str('exact-len') + ')' elif 'min-len' in self.checks: - mem = '{ .len = ' + self.get_limit_str('min-len') + ', }' + mem = 'NLA_POLICY_MIN_LEN(' + self.get_limit_str('min-len') + ')' elif 'max-len' in self.checks: mem = 'NLA_POLICY_MAX_LEN(' + self.get_limit_str('max-len') + ')' diff --git a/tools/net/ynl/pyynl/ynl_gen_rst.py b/tools/net/ynl/pyynl/ynl_gen_rst.py index 0cb6348e28d3..90ae19aac89d 100755 --- a/tools/net/ynl/pyynl/ynl_gen_rst.py +++ b/tools/net/ynl/pyynl/ynl_gen_rst.py @@ -10,353 +10,17 @@ This script performs extensive parsing to the Linux kernel's netlink YAML spec files, in an effort to avoid needing to heavily mark up the original - YAML file. - - This code is split in three big parts: - 1) RST formatters: Use to convert a string to a RST output - 2) Parser helpers: Functions to parse the YAML data structure - 3) Main function and small helpers + YAML file. It uses the library code from scripts/lib. """ -from typing import Any, Dict, List import os.path +import pathlib import sys import argparse import logging -import yaml - - -SPACE_PER_LEVEL = 4 - - -# RST Formatters -# ============== -def headroom(level: int) -> str: - """Return space to format""" - return " " * (level * SPACE_PER_LEVEL) - - -def bold(text: str) -> str: - """Format bold text""" - return f"**{text}**" - - -def inline(text: str) -> str: - """Format inline text""" - return f"``{text}``" - - -def sanitize(text: str) -> str: - """Remove newlines and multiple spaces""" - # This is useful for some fields that are spread across multiple lines - return str(text).replace("\n", " ").strip() - - -def rst_fields(key: str, value: str, level: int = 0) -> str: - """Return a RST formatted field""" - return headroom(level) + f":{key}: {value}" - - -def rst_definition(key: str, value: Any, level: int = 0) -> str: - """Format a single rst definition""" - return headroom(level) + key + "\n" + headroom(level + 1) + str(value) - - -def rst_paragraph(paragraph: str, level: int = 0) -> str: - """Return a formatted paragraph""" - return headroom(level) + paragraph - - -def rst_bullet(item: str, level: int = 0) -> str: - """Return a formatted a bullet""" - return headroom(level) + f"- {item}" - - -def rst_subsection(title: str) -> str: - """Add a sub-section to the document""" - return f"{title}\n" + "-" * len(title) - - -def rst_subsubsection(title: str) -> str: - """Add a sub-sub-section to the document""" - return f"{title}\n" + "~" * len(title) - - -def rst_section(namespace: str, prefix: str, title: str) -> str: - """Add a section to the document""" - return f".. _{namespace}-{prefix}-{title}:\n\n{title}\n" + "=" * len(title) - - -def rst_subtitle(title: str) -> str: - """Add a subtitle to the document""" - return "\n" + "-" * len(title) + f"\n{title}\n" + "-" * len(title) + "\n\n" - - -def rst_title(title: str) -> str: - """Add a title to the document""" - return "=" * len(title) + f"\n{title}\n" + "=" * len(title) + "\n\n" - - -def rst_list_inline(list_: List[str], level: int = 0) -> str: - """Format a list using inlines""" - return headroom(level) + "[" + ", ".join(inline(i) for i in list_) + "]" - - -def rst_ref(namespace: str, prefix: str, name: str) -> str: - """Add a hyperlink to the document""" - mappings = {'enum': 'definition', - 'fixed-header': 'definition', - 'nested-attributes': 'attribute-set', - 'struct': 'definition'} - if prefix in mappings: - prefix = mappings[prefix] - return f":ref:`{namespace}-{prefix}-{name}`" - - -def rst_header() -> str: - """The headers for all the auto generated RST files""" - lines = [] - - lines.append(rst_paragraph(".. SPDX-License-Identifier: GPL-2.0")) - lines.append(rst_paragraph(".. NOTE: This document was auto-generated.\n\n")) - - return "\n".join(lines) - - -def rst_toctree(maxdepth: int = 2) -> str: - """Generate a toctree RST primitive""" - lines = [] - - lines.append(".. toctree::") - lines.append(f" :maxdepth: {maxdepth}\n\n") - - return "\n".join(lines) - - -def rst_label(title: str) -> str: - """Return a formatted label""" - return f".. _{title}:\n\n" - - -# Parsers -# ======= - - -def parse_mcast_group(mcast_group: List[Dict[str, Any]]) -> str: - """Parse 'multicast' group list and return a formatted string""" - lines = [] - for group in mcast_group: - lines.append(rst_bullet(group["name"])) - - return "\n".join(lines) - - -def parse_do(do_dict: Dict[str, Any], level: int = 0) -> str: - """Parse 'do' section and return a formatted string""" - lines = [] - for key in do_dict.keys(): - lines.append(rst_paragraph(bold(key), level + 1)) - if key in ['request', 'reply']: - lines.append(parse_do_attributes(do_dict[key], level + 1) + "\n") - else: - lines.append(headroom(level + 2) + do_dict[key] + "\n") - - return "\n".join(lines) - - -def parse_do_attributes(attrs: Dict[str, Any], level: int = 0) -> str: - """Parse 'attributes' section""" - if "attributes" not in attrs: - return "" - lines = [rst_fields("attributes", rst_list_inline(attrs["attributes"]), level + 1)] - - return "\n".join(lines) - - -def parse_operations(operations: List[Dict[str, Any]], namespace: str) -> str: - """Parse operations block""" - preprocessed = ["name", "doc", "title", "do", "dump", "flags"] - linkable = ["fixed-header", "attribute-set"] - lines = [] - - for operation in operations: - lines.append(rst_section(namespace, 'operation', operation["name"])) - lines.append(rst_paragraph(operation["doc"]) + "\n") - - for key in operation.keys(): - if key in preprocessed: - # Skip the special fields - continue - value = operation[key] - if key in linkable: - value = rst_ref(namespace, key, value) - lines.append(rst_fields(key, value, 0)) - if 'flags' in operation: - lines.append(rst_fields('flags', rst_list_inline(operation['flags']))) - - if "do" in operation: - lines.append(rst_paragraph(":do:", 0)) - lines.append(parse_do(operation["do"], 0)) - if "dump" in operation: - lines.append(rst_paragraph(":dump:", 0)) - lines.append(parse_do(operation["dump"], 0)) - - # New line after fields - lines.append("\n") - - return "\n".join(lines) - - -def parse_entries(entries: List[Dict[str, Any]], level: int) -> str: - """Parse a list of entries""" - ignored = ["pad"] - lines = [] - for entry in entries: - if isinstance(entry, dict): - # entries could be a list or a dictionary - field_name = entry.get("name", "") - if field_name in ignored: - continue - type_ = entry.get("type") - if type_: - field_name += f" ({inline(type_)})" - lines.append( - rst_fields(field_name, sanitize(entry.get("doc", "")), level) - ) - elif isinstance(entry, list): - lines.append(rst_list_inline(entry, level)) - else: - lines.append(rst_bullet(inline(sanitize(entry)), level)) - - lines.append("\n") - return "\n".join(lines) - - -def parse_definitions(defs: Dict[str, Any], namespace: str) -> str: - """Parse definitions section""" - preprocessed = ["name", "entries", "members"] - ignored = ["render-max"] # This is not printed - lines = [] - - for definition in defs: - lines.append(rst_section(namespace, 'definition', definition["name"])) - for k in definition.keys(): - if k in preprocessed + ignored: - continue - lines.append(rst_fields(k, sanitize(definition[k]), 0)) - - # Field list needs to finish with a new line - lines.append("\n") - if "entries" in definition: - lines.append(rst_paragraph(":entries:", 0)) - lines.append(parse_entries(definition["entries"], 1)) - if "members" in definition: - lines.append(rst_paragraph(":members:", 0)) - lines.append(parse_entries(definition["members"], 1)) - - return "\n".join(lines) - - -def parse_attr_sets(entries: List[Dict[str, Any]], namespace: str) -> str: - """Parse attribute from attribute-set""" - preprocessed = ["name", "type"] - linkable = ["enum", "nested-attributes", "struct", "sub-message"] - ignored = ["checks"] - lines = [] - - for entry in entries: - lines.append(rst_section(namespace, 'attribute-set', entry["name"])) - for attr in entry["attributes"]: - type_ = attr.get("type") - attr_line = attr["name"] - if type_: - # Add the attribute type in the same line - attr_line += f" ({inline(type_)})" - - lines.append(rst_subsubsection(attr_line)) - - for k in attr.keys(): - if k in preprocessed + ignored: - continue - if k in linkable: - value = rst_ref(namespace, k, attr[k]) - else: - value = sanitize(attr[k]) - lines.append(rst_fields(k, value, 0)) - lines.append("\n") - - return "\n".join(lines) - - -def parse_sub_messages(entries: List[Dict[str, Any]], namespace: str) -> str: - """Parse sub-message definitions""" - lines = [] - - for entry in entries: - lines.append(rst_section(namespace, 'sub-message', entry["name"])) - for fmt in entry["formats"]: - value = fmt["value"] - - lines.append(rst_bullet(bold(value))) - for attr in ['fixed-header', 'attribute-set']: - if attr in fmt: - lines.append(rst_fields(attr, - rst_ref(namespace, attr, fmt[attr]), - 1)) - lines.append("\n") - - return "\n".join(lines) - - -def parse_yaml(obj: Dict[str, Any]) -> str: - """Format the whole YAML into a RST string""" - lines = [] - - # Main header - - lines.append(rst_header()) - - family = obj['name'] - - title = f"Family ``{family}`` netlink specification" - lines.append(rst_title(title)) - lines.append(rst_paragraph(".. contents:: :depth: 3\n")) - - if "doc" in obj: - lines.append(rst_subtitle("Summary")) - lines.append(rst_paragraph(obj["doc"], 0)) - - # Operations - if "operations" in obj: - lines.append(rst_subtitle("Operations")) - lines.append(parse_operations(obj["operations"]["list"], family)) - - # Multicast groups - if "mcast-groups" in obj: - lines.append(rst_subtitle("Multicast groups")) - lines.append(parse_mcast_group(obj["mcast-groups"]["list"])) - - # Definitions - if "definitions" in obj: - lines.append(rst_subtitle("Definitions")) - lines.append(parse_definitions(obj["definitions"], family)) - - # Attributes set - if "attribute-sets" in obj: - lines.append(rst_subtitle("Attribute sets")) - lines.append(parse_attr_sets(obj["attribute-sets"], family)) - - # Sub-messages - if "sub-messages" in obj: - lines.append(rst_subtitle("Sub-messages")) - lines.append(parse_sub_messages(obj["sub-messages"], family)) - - return "\n".join(lines) - - -# Main functions -# ============== +sys.path.append(pathlib.Path(__file__).resolve().parent.as_posix()) +from lib import YnlDocGenerator # pylint: disable=C0413 def parse_arguments() -> argparse.Namespace: """Parse arguments from user""" @@ -367,9 +31,6 @@ def parse_arguments() -> argparse.Namespace: # Index and input are mutually exclusive group = parser.add_mutually_exclusive_group() - group.add_argument( - "-x", "--index", action="store_true", help="Generate the index page" - ) group.add_argument("-i", "--input", help="YAML file name") args = parser.parse_args() @@ -391,15 +52,6 @@ def parse_arguments() -> argparse.Namespace: return args -def parse_yaml_file(filename: str) -> str: - """Transform the YAML specified by filename into an RST-formatted string""" - with open(filename, "r", encoding="utf-8") as spec_file: - yaml_data = yaml.safe_load(spec_file) - content = parse_yaml(yaml_data) - - return content - - def write_to_rstfile(content: str, filename: str) -> None: """Write the generated content into an RST file""" logging.debug("Saving RST file to %s", filename) @@ -408,35 +60,17 @@ def write_to_rstfile(content: str, filename: str) -> None: rst_file.write(content) -def generate_main_index_rst(output: str) -> None: - """Generate the `networking_spec/index` content and write to the file""" - lines = [] - - lines.append(rst_header()) - lines.append(rst_label("specs")) - lines.append(rst_title("Netlink Family Specifications")) - lines.append(rst_toctree(1)) - - index_dir = os.path.dirname(output) - logging.debug("Looking for .rst files in %s", index_dir) - for filename in sorted(os.listdir(index_dir)): - if not filename.endswith(".rst") or filename == "index.rst": - continue - lines.append(f" {filename.replace('.rst', '')}\n") - - logging.debug("Writing an index file at %s", output) - write_to_rstfile("".join(lines), output) - - def main() -> None: """Main function that reads the YAML files and generates the RST files""" args = parse_arguments() + parser = YnlDocGenerator() + if args.input: logging.debug("Parsing %s", args.input) try: - content = parse_yaml_file(os.path.join(args.input)) + content = parser.parse_yaml_file(os.path.join(args.input)) except Exception as exception: logging.warning("Failed to parse %s.", args.input) logging.warning(exception) @@ -444,10 +78,6 @@ def main() -> None: write_to_rstfile(content, args.output) - if args.index: - # Generate the index RST file - generate_main_index_rst(args.output) - if __name__ == "__main__": main() diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h index 9386dfe8b884..794d44d19c88 100644 --- a/tools/testing/selftests/bpf/bpf_kfuncs.h +++ b/tools/testing/selftests/bpf/bpf_kfuncs.h @@ -19,6 +19,9 @@ extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags, extern int bpf_dynptr_from_xdp(struct xdp_md *xdp, __u64 flags, struct bpf_dynptr *ptr__uninit) __ksym __weak; +extern int bpf_dynptr_from_skb_meta(struct __sk_buff *skb, __u64 flags, + struct bpf_dynptr *ptr__uninit) __ksym __weak; + /* Description * Obtain a read-only pointer to the dynptr's data * Returns diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 8916ab814a3e..70b28c1e653e 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -61,6 +61,7 @@ CONFIG_MPLS_IPTUNNEL=y CONFIG_MPLS_ROUTING=y CONFIG_MPTCP=y CONFIG_NET_ACT_GACT=y +CONFIG_NET_ACT_MIRRED=y CONFIG_NET_ACT_SKBMOD=y CONFIG_NET_CLS=y CONFIG_NET_CLS_ACT=y diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c index 9b2d9ceda210..b9f86cb91e81 100644 --- a/tools/testing/selftests/bpf/prog_tests/dynptr.c +++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c @@ -32,6 +32,8 @@ static struct { {"test_ringbuf", SETUP_SYSCALL_SLEEP}, {"test_skb_readonly", SETUP_SKB_PROG}, {"test_dynptr_skb_data", SETUP_SKB_PROG}, + {"test_dynptr_skb_meta_data", SETUP_SKB_PROG}, + {"test_dynptr_skb_meta_flags", SETUP_SKB_PROG}, {"test_adjust", SETUP_SYSCALL_SLEEP}, {"test_adjust_err", SETUP_SYSCALL_SLEEP}, {"test_zero_size_dynptr", SETUP_SYSCALL_SLEEP}, diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c index b9d9f0a502ce..46e0730174ed 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c @@ -9,6 +9,7 @@ #define TX_NETNS "xdp_context_tx" #define RX_NETNS "xdp_context_rx" #define TAP_NAME "tap0" +#define DUMMY_NAME "dum0" #define TAP_NETNS "xdp_context_tuntap" #define TEST_PAYLOAD_LEN 32 @@ -156,15 +157,30 @@ err: return -1; } -static void assert_test_result(struct test_xdp_meta *skel) +static int write_test_packet(int tap_fd) +{ + __u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN]; + int n; + + /* The ethernet header doesn't need to be valid for this test */ + memset(packet, 0, sizeof(struct ethhdr)); + memcpy(packet + sizeof(struct ethhdr), test_payload, TEST_PAYLOAD_LEN); + + n = write(tap_fd, packet, sizeof(packet)); + if (!ASSERT_EQ(n, sizeof(packet), "write packet")) + return -1; + + return 0; +} + +static void assert_test_result(const struct bpf_map *result_map) { int err; __u32 map_key = 0; __u8 map_value[TEST_PAYLOAD_LEN]; - err = bpf_map__lookup_elem(skel->maps.test_result, &map_key, - sizeof(map_key), &map_value, - TEST_PAYLOAD_LEN, BPF_ANY); + err = bpf_map__lookup_elem(result_map, &map_key, sizeof(map_key), + &map_value, TEST_PAYLOAD_LEN, BPF_ANY); if (!ASSERT_OK(err, "lookup test_result")) return; @@ -172,6 +188,18 @@ static void assert_test_result(struct test_xdp_meta *skel) "test_result map contains test payload"); } +static bool clear_test_result(struct bpf_map *result_map) +{ + const __u8 v[sizeof(test_payload)] = {}; + const __u32 k = 0; + int err; + + err = bpf_map__update_elem(result_map, &k, sizeof(k), v, sizeof(v), BPF_ANY); + ASSERT_OK(err, "update test_result"); + + return err == 0; +} + void test_xdp_context_veth(void) { LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS); @@ -248,7 +276,7 @@ void test_xdp_context_veth(void) if (!ASSERT_OK(ret, "send_test_packet")) goto close; - assert_test_result(skel); + assert_test_result(skel->maps.test_result); close: close_netns(nstoken); @@ -257,17 +285,21 @@ close: netns_free(tx_ns); } -void test_xdp_context_tuntap(void) +static void test_tuntap(struct bpf_program *xdp_prog, + struct bpf_program *tc_prio_1_prog, + struct bpf_program *tc_prio_2_prog, + struct bpf_map *result_map) { LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS); LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1); struct netns_obj *ns = NULL; - struct test_xdp_meta *skel = NULL; - __u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN]; int tap_fd = -1; int tap_ifindex; int ret; + if (!clear_test_result(result_map)) + return; + ns = netns_new(TAP_NETNS, true); if (!ASSERT_OK_PTR(ns, "create and open ns")) return; @@ -278,10 +310,6 @@ void test_xdp_context_tuntap(void) SYS(close, "ip link set dev " TAP_NAME " up"); - skel = test_xdp_meta__open_and_load(); - if (!ASSERT_OK_PTR(skel, "open and load skeleton")) - goto close; - tap_ifindex = if_nametoindex(TAP_NAME); if (!ASSERT_GE(tap_ifindex, 0, "if_nametoindex")) goto close; @@ -291,33 +319,175 @@ void test_xdp_context_tuntap(void) if (!ASSERT_OK(ret, "bpf_tc_hook_create")) goto close; - tc_opts.prog_fd = bpf_program__fd(skel->progs.ing_cls); + tc_opts.prog_fd = bpf_program__fd(tc_prio_1_prog); ret = bpf_tc_attach(&tc_hook, &tc_opts); if (!ASSERT_OK(ret, "bpf_tc_attach")) goto close; - ret = bpf_xdp_attach(tap_ifindex, bpf_program__fd(skel->progs.ing_xdp), + if (tc_prio_2_prog) { + LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 2, + .prog_fd = bpf_program__fd(tc_prio_2_prog)); + + ret = bpf_tc_attach(&tc_hook, &tc_opts); + if (!ASSERT_OK(ret, "bpf_tc_attach")) + goto close; + } + + ret = bpf_xdp_attach(tap_ifindex, bpf_program__fd(xdp_prog), 0, NULL); if (!ASSERT_GE(ret, 0, "bpf_xdp_attach")) goto close; - /* The ethernet header is not relevant for this test and doesn't need to - * be meaningful. - */ - struct ethhdr eth = { 0 }; + ret = write_test_packet(tap_fd); + if (!ASSERT_OK(ret, "write_test_packet")) + goto close; - memcpy(packet, ð, sizeof(eth)); - memcpy(packet + sizeof(eth), test_payload, TEST_PAYLOAD_LEN); + assert_test_result(result_map); + +close: + if (tap_fd >= 0) + close(tap_fd); + netns_free(ns); +} - ret = write(tap_fd, packet, sizeof(packet)); - if (!ASSERT_EQ(ret, sizeof(packet), "write packet")) +/* Write a packet to a tap dev and copy it to ingress of a dummy dev */ +static void test_tuntap_mirred(struct bpf_program *xdp_prog, + struct bpf_program *tc_prog, + bool *test_pass) +{ + LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS); + LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1); + struct netns_obj *ns = NULL; + int dummy_ifindex; + int tap_fd = -1; + int tap_ifindex; + int ret; + + *test_pass = false; + + ns = netns_new(TAP_NETNS, true); + if (!ASSERT_OK_PTR(ns, "netns_new")) + return; + + /* Setup dummy interface */ + SYS(close, "ip link add name " DUMMY_NAME " type dummy"); + SYS(close, "ip link set dev " DUMMY_NAME " up"); + + dummy_ifindex = if_nametoindex(DUMMY_NAME); + if (!ASSERT_GE(dummy_ifindex, 0, "if_nametoindex")) + goto close; + + tc_hook.ifindex = dummy_ifindex; + ret = bpf_tc_hook_create(&tc_hook); + if (!ASSERT_OK(ret, "bpf_tc_hook_create")) + goto close; + + tc_opts.prog_fd = bpf_program__fd(tc_prog); + ret = bpf_tc_attach(&tc_hook, &tc_opts); + if (!ASSERT_OK(ret, "bpf_tc_attach")) + goto close; + + /* Setup TAP interface */ + tap_fd = open_tuntap(TAP_NAME, true); + if (!ASSERT_GE(tap_fd, 0, "open_tuntap")) + goto close; + + SYS(close, "ip link set dev " TAP_NAME " up"); + + tap_ifindex = if_nametoindex(TAP_NAME); + if (!ASSERT_GE(tap_ifindex, 0, "if_nametoindex")) + goto close; + + ret = bpf_xdp_attach(tap_ifindex, bpf_program__fd(xdp_prog), 0, NULL); + if (!ASSERT_GE(ret, 0, "bpf_xdp_attach")) goto close; - assert_test_result(skel); + /* Copy all packets received from TAP to dummy ingress */ + SYS(close, "tc qdisc add dev " TAP_NAME " clsact"); + SYS(close, "tc filter add dev " TAP_NAME " ingress " + "protocol all matchall " + "action mirred ingress mirror dev " DUMMY_NAME); + + /* Receive a packet on TAP */ + ret = write_test_packet(tap_fd); + if (!ASSERT_OK(ret, "write_test_packet")) + goto close; + + ASSERT_TRUE(*test_pass, "test_pass"); close: if (tap_fd >= 0) close(tap_fd); - test_xdp_meta__destroy(skel); netns_free(ns); } + +void test_xdp_context_tuntap(void) +{ + struct test_xdp_meta *skel = NULL; + + skel = test_xdp_meta__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open and load skeleton")) + return; + + if (test__start_subtest("data_meta")) + test_tuntap(skel->progs.ing_xdp, + skel->progs.ing_cls, + NULL, /* tc prio 2 */ + skel->maps.test_result); + if (test__start_subtest("dynptr_read")) + test_tuntap(skel->progs.ing_xdp, + skel->progs.ing_cls_dynptr_read, + NULL, /* tc prio 2 */ + skel->maps.test_result); + if (test__start_subtest("dynptr_slice")) + test_tuntap(skel->progs.ing_xdp, + skel->progs.ing_cls_dynptr_slice, + NULL, /* tc prio 2 */ + skel->maps.test_result); + if (test__start_subtest("dynptr_write")) + test_tuntap(skel->progs.ing_xdp_zalloc_meta, + skel->progs.ing_cls_dynptr_write, + skel->progs.ing_cls_dynptr_read, + skel->maps.test_result); + if (test__start_subtest("dynptr_slice_rdwr")) + test_tuntap(skel->progs.ing_xdp_zalloc_meta, + skel->progs.ing_cls_dynptr_slice_rdwr, + skel->progs.ing_cls_dynptr_slice, + skel->maps.test_result); + if (test__start_subtest("dynptr_offset")) + test_tuntap(skel->progs.ing_xdp_zalloc_meta, + skel->progs.ing_cls_dynptr_offset_wr, + skel->progs.ing_cls_dynptr_offset_rd, + skel->maps.test_result); + if (test__start_subtest("dynptr_offset_oob")) + test_tuntap(skel->progs.ing_xdp, + skel->progs.ing_cls_dynptr_offset_oob, + skel->progs.ing_cls, + skel->maps.test_result); + if (test__start_subtest("clone_data_meta_empty_on_data_write")) + test_tuntap_mirred(skel->progs.ing_xdp, + skel->progs.clone_data_meta_empty_on_data_write, + &skel->bss->test_pass); + if (test__start_subtest("clone_data_meta_empty_on_meta_write")) + test_tuntap_mirred(skel->progs.ing_xdp, + skel->progs.clone_data_meta_empty_on_meta_write, + &skel->bss->test_pass); + if (test__start_subtest("clone_dynptr_empty_on_data_slice_write")) + test_tuntap_mirred(skel->progs.ing_xdp, + skel->progs.clone_dynptr_empty_on_data_slice_write, + &skel->bss->test_pass); + if (test__start_subtest("clone_dynptr_empty_on_meta_slice_write")) + test_tuntap_mirred(skel->progs.ing_xdp, + skel->progs.clone_dynptr_empty_on_meta_slice_write, + &skel->bss->test_pass); + if (test__start_subtest("clone_dynptr_rdonly_before_data_dynptr_write")) + test_tuntap_mirred(skel->progs.ing_xdp, + skel->progs.clone_dynptr_rdonly_before_data_dynptr_write, + &skel->bss->test_pass); + if (test__start_subtest("clone_dynptr_rdonly_before_meta_dynptr_write")) + test_tuntap_mirred(skel->progs.ing_xdp, + skel->progs.clone_dynptr_rdonly_before_meta_dynptr_write, + &skel->bss->test_pass); + + test_xdp_meta__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c index ffbd4b116d17..23b2aa2604de 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c @@ -64,7 +64,8 @@ int dump_udp4(struct bpf_iter__udp *ctx) 0, 0L, 0, ctx->uid, 0, sock_i_ino(&inet->sk), inet->sk.sk_refcnt.refs.counter, udp_sk, - inet->sk.sk_drops.counter); + udp_sk->drop_counters.drops0.counter + + udp_sk->drop_counters.drops1.counter); return 0; } diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c index 47ff7754f4fd..c48b05aa2a4b 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c @@ -72,7 +72,7 @@ int dump_udp6(struct bpf_iter__udp *ctx) 0, 0L, 0, ctx->uid, 0, sock_i_ino(&inet->sk), inet->sk.sk_refcnt.refs.counter, udp_sk, - inet->sk.sk_drops.counter); - + udp_sk->drop_counters.drops0.counter + + udp_sk->drop_counters.drops1.counter); return 0; } diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c index bd8f15229f5c..dda6a8dada82 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_fail.c +++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c @@ -269,6 +269,26 @@ int data_slice_out_of_bounds_skb(struct __sk_buff *skb) return SK_PASS; } +/* A metadata slice can't be accessed out of bounds */ +SEC("?tc") +__failure __msg("value is outside of the allowed memory range") +int data_slice_out_of_bounds_skb_meta(struct __sk_buff *skb) +{ + struct bpf_dynptr meta; + __u8 *md; + + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md)); + if (!md) + return SK_DROP; + + /* this should fail */ + *(md + 1) = 42; + + return SK_PASS; +} + SEC("?raw_tp") __failure __msg("value is outside of the allowed memory range") int data_slice_out_of_bounds_map_value(void *ctx) @@ -1089,6 +1109,26 @@ int skb_invalid_slice_write(struct __sk_buff *skb) return SK_PASS; } +/* bpf_dynptr_slice()s are read-only and cannot be written to */ +SEC("?tc") +__failure __msg("R{{[0-9]+}} cannot write into rdonly_mem") +int skb_meta_invalid_slice_write(struct __sk_buff *skb) +{ + struct bpf_dynptr meta; + __u8 *md; + + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + md = bpf_dynptr_slice(&meta, 0, NULL, sizeof(*md)); + if (!md) + return SK_DROP; + + /* this should fail */ + *md = 42; + + return SK_PASS; +} + /* The read-only data slice is invalidated whenever a helper changes packet data */ SEC("?tc") __failure __msg("invalid mem access 'scalar'") @@ -1192,6 +1232,188 @@ int skb_invalid_data_slice4(struct __sk_buff *skb) return SK_PASS; } +/* Read-only skb data slice is invalidated on write to skb metadata */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int ro_skb_slice_invalid_after_metadata_write(struct __sk_buff *skb) +{ + struct bpf_dynptr data, meta; + __u8 *d; + + bpf_dynptr_from_skb(skb, 0, &data); + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + d = bpf_dynptr_slice(&data, 0, NULL, sizeof(*d)); + if (!d) + return SK_DROP; + + bpf_dynptr_write(&meta, 0, "x", 1, 0); + + /* this should fail */ + val = *d; + + return SK_PASS; +} + +/* Read-write skb data slice is invalidated on write to skb metadata */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int rw_skb_slice_invalid_after_metadata_write(struct __sk_buff *skb) +{ + struct bpf_dynptr data, meta; + __u8 *d; + + bpf_dynptr_from_skb(skb, 0, &data); + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + d = bpf_dynptr_slice_rdwr(&data, 0, NULL, sizeof(*d)); + if (!d) + return SK_DROP; + + bpf_dynptr_write(&meta, 0, "x", 1, 0); + + /* this should fail */ + *d = 42; + + return SK_PASS; +} + +/* Read-only skb metadata slice is invalidated on write to skb data */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int ro_skb_meta_slice_invalid_after_payload_write(struct __sk_buff *skb) +{ + struct bpf_dynptr data, meta; + __u8 *md; + + bpf_dynptr_from_skb(skb, 0, &data); + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + md = bpf_dynptr_slice(&meta, 0, NULL, sizeof(*md)); + if (!md) + return SK_DROP; + + bpf_dynptr_write(&data, 0, "x", 1, 0); + + /* this should fail */ + val = *md; + + return SK_PASS; +} + +/* Read-write skb metadata slice is invalidated on write to skb data slice */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int rw_skb_meta_slice_invalid_after_payload_write(struct __sk_buff *skb) +{ + struct bpf_dynptr data, meta; + __u8 *md; + + bpf_dynptr_from_skb(skb, 0, &data); + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md)); + if (!md) + return SK_DROP; + + bpf_dynptr_write(&data, 0, "x", 1, 0); + + /* this should fail */ + *md = 42; + + return SK_PASS; +} + +/* Read-only skb metadata slice is invalidated whenever a helper changes packet data */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int ro_skb_meta_slice_invalid_after_payload_helper(struct __sk_buff *skb) +{ + struct bpf_dynptr meta; + __u8 *md; + + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + md = bpf_dynptr_slice(&meta, 0, NULL, sizeof(*md)); + if (!md) + return SK_DROP; + + if (bpf_skb_pull_data(skb, skb->len)) + return SK_DROP; + + /* this should fail */ + val = *md; + + return SK_PASS; +} + +/* Read-write skb metadata slice is invalidated whenever a helper changes packet data */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int rw_skb_meta_slice_invalid_after_payload_helper(struct __sk_buff *skb) +{ + struct bpf_dynptr meta; + __u8 *md; + + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md)); + if (!md) + return SK_DROP; + + if (bpf_skb_pull_data(skb, skb->len)) + return SK_DROP; + + /* this should fail */ + *md = 42; + + return SK_PASS; +} + +/* Read-only skb metadata slice is invalidated on write to skb metadata */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int ro_skb_meta_slice_invalid_after_metadata_write(struct __sk_buff *skb) +{ + struct bpf_dynptr meta; + __u8 *md; + + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + md = bpf_dynptr_slice(&meta, 0, NULL, sizeof(*md)); + if (!md) + return SK_DROP; + + bpf_dynptr_write(&meta, 0, "x", 1, 0); + + /* this should fail */ + val = *md; + + return SK_PASS; +} + +/* Read-write skb metadata slice is invalidated on write to skb metadata */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int rw_skb_meta_slice_invalid_after_metadata_write(struct __sk_buff *skb) +{ + struct bpf_dynptr meta; + __u8 *md; + + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md)); + if (!md) + return SK_DROP; + + bpf_dynptr_write(&meta, 0, "x", 1, 0); + + /* this should fail */ + *md = 42; + + return SK_PASS; +} + /* The read-only data slice is invalidated whenever a helper changes packet data */ SEC("?xdp") __failure __msg("invalid mem access 'scalar'") @@ -1255,6 +1477,19 @@ int skb_invalid_ctx(void *ctx) return 0; } +/* Only supported prog type can create skb_meta-type dynptrs */ +SEC("?raw_tp") +__failure __msg("calling kernel function bpf_dynptr_from_skb_meta is not allowed") +int skb_meta_invalid_ctx(void *ctx) +{ + struct bpf_dynptr meta; + + /* this should fail */ + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + + return 0; +} + SEC("fentry/skb_tx_error") __failure __msg("must be referenced or trusted") int BPF_PROG(skb_invalid_ctx_fentry, void *skb) @@ -1665,6 +1900,29 @@ int clone_skb_packet_data(struct __sk_buff *skb) return 0; } +/* A skb clone's metadata slice becomes invalid anytime packet data changes */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int clone_skb_packet_meta(struct __sk_buff *skb) +{ + struct bpf_dynptr clone, meta; + __u8 *md; + + bpf_dynptr_from_skb_meta(skb, 0, &meta); + bpf_dynptr_clone(&meta, &clone); + md = bpf_dynptr_slice_rdwr(&clone, 0, NULL, sizeof(*md)); + if (!md) + return SK_DROP; + + if (bpf_skb_pull_data(skb, skb->len)) + return SK_DROP; + + /* this should fail */ + *md = 42; + + return 0; +} + /* A xdp clone's data slices should be invalid anytime packet data changes */ SEC("?xdp") __failure __msg("invalid mem access 'scalar'") diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c index 8315273cb900..127dea342e5a 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_success.c +++ b/tools/testing/selftests/bpf/progs/dynptr_success.c @@ -211,6 +211,61 @@ int test_dynptr_skb_data(struct __sk_buff *skb) return 1; } +SEC("?tc") +int test_dynptr_skb_meta_data(struct __sk_buff *skb) +{ + struct bpf_dynptr meta; + __u8 *md; + int ret; + + err = 1; + ret = bpf_dynptr_from_skb_meta(skb, 0, &meta); + if (ret) + return 1; + + /* This should return NULL. Must use bpf_dynptr_slice API */ + err = 2; + md = bpf_dynptr_data(&meta, 0, sizeof(*md)); + if (md) + return 1; + + err = 0; + return 1; +} + +/* Check that skb metadata dynptr ops don't accept any flags. */ +SEC("?tc") +int test_dynptr_skb_meta_flags(struct __sk_buff *skb) +{ + const __u64 INVALID_FLAGS = ~0ULL; + struct bpf_dynptr meta; + __u8 buf; + int ret; + + err = 1; + ret = bpf_dynptr_from_skb_meta(skb, INVALID_FLAGS, &meta); + if (ret != -EINVAL) + return 1; + + err = 2; + ret = bpf_dynptr_from_skb_meta(skb, 0, &meta); + if (ret) + return 1; + + err = 3; + ret = bpf_dynptr_read(&buf, 0, &meta, 0, INVALID_FLAGS); + if (ret != -EINVAL) + return 1; + + err = 4; + ret = bpf_dynptr_write(&meta, 0, &buf, 0, INVALID_FLAGS); + if (ret != -EINVAL) + return 1; + + err = 0; + return 1; +} + SEC("tp/syscalls/sys_enter_nanosleep") int test_adjust(void *ctx) { diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c index fcf6ca14f2ea..d79cb74b571e 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c @@ -1,8 +1,11 @@ +#include <stdbool.h> #include <linux/bpf.h> +#include <linux/errno.h> #include <linux/if_ether.h> #include <linux/pkt_cls.h> #include <bpf/bpf_helpers.h> +#include "bpf_kfuncs.h" #define META_SIZE 32 @@ -23,6 +26,8 @@ struct { __uint(value_size, META_SIZE); } test_result SEC(".maps"); +bool test_pass; + SEC("tc") int ing_cls(struct __sk_buff *ctx) { @@ -40,6 +45,231 @@ int ing_cls(struct __sk_buff *ctx) return TC_ACT_SHOT; } +/* Read from metadata using bpf_dynptr_read helper */ +SEC("tc") +int ing_cls_dynptr_read(struct __sk_buff *ctx) +{ + struct bpf_dynptr meta; + const __u32 zero = 0; + __u8 *dst; + + dst = bpf_map_lookup_elem(&test_result, &zero); + if (!dst) + return TC_ACT_SHOT; + + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + bpf_dynptr_read(dst, META_SIZE, &meta, 0, 0); + + return TC_ACT_SHOT; +} + +/* Write to metadata using bpf_dynptr_write helper */ +SEC("tc") +int ing_cls_dynptr_write(struct __sk_buff *ctx) +{ + struct bpf_dynptr data, meta; + __u8 *src; + + bpf_dynptr_from_skb(ctx, 0, &data); + src = bpf_dynptr_slice(&data, sizeof(struct ethhdr), NULL, META_SIZE); + if (!src) + return TC_ACT_SHOT; + + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + bpf_dynptr_write(&meta, 0, src, META_SIZE, 0); + + return TC_ACT_UNSPEC; /* pass */ +} + +/* Read from metadata using read-only dynptr slice */ +SEC("tc") +int ing_cls_dynptr_slice(struct __sk_buff *ctx) +{ + struct bpf_dynptr meta; + const __u32 zero = 0; + __u8 *dst, *src; + + dst = bpf_map_lookup_elem(&test_result, &zero); + if (!dst) + return TC_ACT_SHOT; + + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + src = bpf_dynptr_slice(&meta, 0, NULL, META_SIZE); + if (!src) + return TC_ACT_SHOT; + + __builtin_memcpy(dst, src, META_SIZE); + + return TC_ACT_SHOT; +} + +/* Write to metadata using writeable dynptr slice */ +SEC("tc") +int ing_cls_dynptr_slice_rdwr(struct __sk_buff *ctx) +{ + struct bpf_dynptr data, meta; + __u8 *src, *dst; + + bpf_dynptr_from_skb(ctx, 0, &data); + src = bpf_dynptr_slice(&data, sizeof(struct ethhdr), NULL, META_SIZE); + if (!src) + return TC_ACT_SHOT; + + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + dst = bpf_dynptr_slice_rdwr(&meta, 0, NULL, META_SIZE); + if (!dst) + return TC_ACT_SHOT; + + __builtin_memcpy(dst, src, META_SIZE); + + return TC_ACT_UNSPEC; /* pass */ +} + +/* Read skb metadata in chunks from various offsets in different ways. */ +SEC("tc") +int ing_cls_dynptr_offset_rd(struct __sk_buff *ctx) +{ + struct bpf_dynptr meta; + const __u32 chunk_len = META_SIZE / 4; + const __u32 zero = 0; + __u8 *dst, *src; + + dst = bpf_map_lookup_elem(&test_result, &zero); + if (!dst) + return TC_ACT_SHOT; + + /* 1. Regular read */ + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + bpf_dynptr_read(dst, chunk_len, &meta, 0, 0); + dst += chunk_len; + + /* 2. Read from an offset-adjusted dynptr */ + bpf_dynptr_adjust(&meta, chunk_len, bpf_dynptr_size(&meta)); + bpf_dynptr_read(dst, chunk_len, &meta, 0, 0); + dst += chunk_len; + + /* 3. Read at an offset */ + bpf_dynptr_read(dst, chunk_len, &meta, chunk_len, 0); + dst += chunk_len; + + /* 4. Read from a slice starting at an offset */ + src = bpf_dynptr_slice(&meta, 2 * chunk_len, NULL, chunk_len); + if (!src) + return TC_ACT_SHOT; + __builtin_memcpy(dst, src, chunk_len); + + return TC_ACT_SHOT; +} + +/* Write skb metadata in chunks at various offsets in different ways. */ +SEC("tc") +int ing_cls_dynptr_offset_wr(struct __sk_buff *ctx) +{ + const __u32 chunk_len = META_SIZE / 4; + __u8 payload[META_SIZE]; + struct bpf_dynptr meta; + __u8 *dst, *src; + + bpf_skb_load_bytes(ctx, sizeof(struct ethhdr), payload, sizeof(payload)); + src = payload; + + /* 1. Regular write */ + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + bpf_dynptr_write(&meta, 0, src, chunk_len, 0); + src += chunk_len; + + /* 2. Write to an offset-adjusted dynptr */ + bpf_dynptr_adjust(&meta, chunk_len, bpf_dynptr_size(&meta)); + bpf_dynptr_write(&meta, 0, src, chunk_len, 0); + src += chunk_len; + + /* 3. Write at an offset */ + bpf_dynptr_write(&meta, chunk_len, src, chunk_len, 0); + src += chunk_len; + + /* 4. Write to a slice starting at an offset */ + dst = bpf_dynptr_slice_rdwr(&meta, 2 * chunk_len, NULL, chunk_len); + if (!dst) + return TC_ACT_SHOT; + __builtin_memcpy(dst, src, chunk_len); + + return TC_ACT_UNSPEC; /* pass */ +} + +/* Pass an OOB offset to dynptr read, write, adjust, slice. */ +SEC("tc") +int ing_cls_dynptr_offset_oob(struct __sk_buff *ctx) +{ + struct bpf_dynptr meta; + __u8 md, *p; + int err; + + err = bpf_dynptr_from_skb_meta(ctx, 0, &meta); + if (err) + goto fail; + + /* read offset OOB */ + err = bpf_dynptr_read(&md, sizeof(md), &meta, META_SIZE, 0); + if (err != -E2BIG) + goto fail; + + /* write offset OOB */ + err = bpf_dynptr_write(&meta, META_SIZE, &md, sizeof(md), 0); + if (err != -E2BIG) + goto fail; + + /* adjust end offset OOB */ + err = bpf_dynptr_adjust(&meta, 0, META_SIZE + 1); + if (err != -ERANGE) + goto fail; + + /* adjust start offset OOB */ + err = bpf_dynptr_adjust(&meta, META_SIZE + 1, META_SIZE + 1); + if (err != -ERANGE) + goto fail; + + /* slice offset OOB */ + p = bpf_dynptr_slice(&meta, META_SIZE, NULL, sizeof(*p)); + if (p) + goto fail; + + /* slice rdwr offset OOB */ + p = bpf_dynptr_slice_rdwr(&meta, META_SIZE, NULL, sizeof(*p)); + if (p) + goto fail; + + return TC_ACT_UNSPEC; +fail: + return TC_ACT_SHOT; +} + +/* Reserve and clear space for metadata but don't populate it */ +SEC("xdp") +int ing_xdp_zalloc_meta(struct xdp_md *ctx) +{ + struct ethhdr *eth = ctx_ptr(ctx, data); + __u8 *meta; + int ret; + + /* Drop any non-test packets */ + if (eth + 1 > ctx_ptr(ctx, data_end)) + return XDP_DROP; + if (eth->h_proto != 0) + return XDP_DROP; + + ret = bpf_xdp_adjust_meta(ctx, -META_SIZE); + if (ret < 0) + return XDP_DROP; + + meta = ctx_ptr(ctx, data_meta); + if (meta + META_SIZE > ctx_ptr(ctx, data)) + return XDP_DROP; + + __builtin_memset(meta, 0, META_SIZE); + + return XDP_PASS; +} + SEC("xdp") int ing_xdp(struct xdp_md *ctx) { @@ -73,4 +303,193 @@ int ing_xdp(struct xdp_md *ctx) return XDP_PASS; } +/* + * Check that skb->data_meta..skb->data is empty if prog writes to packet + * _payload_ using packet pointers. Applies only to cloned skbs. + */ +SEC("tc") +int clone_data_meta_empty_on_data_write(struct __sk_buff *ctx) +{ + struct ethhdr *eth = ctx_ptr(ctx, data); + + if (eth + 1 > ctx_ptr(ctx, data_end)) + goto out; + /* Ignore non-test packets */ + if (eth->h_proto != 0) + goto out; + + /* Expect no metadata */ + if (ctx->data_meta != ctx->data) + goto out; + + /* Packet write to trigger unclone in prologue */ + eth->h_proto = 42; + + test_pass = true; +out: + return TC_ACT_SHOT; +} + +/* + * Check that skb->data_meta..skb->data is empty if prog writes to packet + * _metadata_ using packet pointers. Applies only to cloned skbs. + */ +SEC("tc") +int clone_data_meta_empty_on_meta_write(struct __sk_buff *ctx) +{ + struct ethhdr *eth = ctx_ptr(ctx, data); + __u8 *md = ctx_ptr(ctx, data_meta); + + if (eth + 1 > ctx_ptr(ctx, data_end)) + goto out; + /* Ignore non-test packets */ + if (eth->h_proto != 0) + goto out; + + if (md + 1 > ctx_ptr(ctx, data)) { + /* Expect no metadata */ + test_pass = true; + } else { + /* Metadata write to trigger unclone in prologue */ + *md = 42; + } +out: + return TC_ACT_SHOT; +} + +/* + * Check that skb_meta dynptr is writable but empty if prog writes to packet + * _payload_ using a dynptr slice. Applies only to cloned skbs. + */ +SEC("tc") +int clone_dynptr_empty_on_data_slice_write(struct __sk_buff *ctx) +{ + struct bpf_dynptr data, meta; + struct ethhdr *eth; + + bpf_dynptr_from_skb(ctx, 0, &data); + eth = bpf_dynptr_slice_rdwr(&data, 0, NULL, sizeof(*eth)); + if (!eth) + goto out; + /* Ignore non-test packets */ + if (eth->h_proto != 0) + goto out; + + /* Expect no metadata */ + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) > 0) + goto out; + + /* Packet write to trigger unclone in prologue */ + eth->h_proto = 42; + + test_pass = true; +out: + return TC_ACT_SHOT; +} + +/* + * Check that skb_meta dynptr is writable but empty if prog writes to packet + * _metadata_ using a dynptr slice. Applies only to cloned skbs. + */ +SEC("tc") +int clone_dynptr_empty_on_meta_slice_write(struct __sk_buff *ctx) +{ + struct bpf_dynptr data, meta; + const struct ethhdr *eth; + __u8 *md; + + bpf_dynptr_from_skb(ctx, 0, &data); + eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth)); + if (!eth) + goto out; + /* Ignore non-test packets */ + if (eth->h_proto != 0) + goto out; + + /* Expect no metadata */ + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) > 0) + goto out; + + /* Metadata write to trigger unclone in prologue */ + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md)); + if (md) + *md = 42; + + test_pass = true; +out: + return TC_ACT_SHOT; +} + +/* + * Check that skb_meta dynptr is read-only before prog writes to packet payload + * using dynptr_write helper. Applies only to cloned skbs. + */ +SEC("tc") +int clone_dynptr_rdonly_before_data_dynptr_write(struct __sk_buff *ctx) +{ + struct bpf_dynptr data, meta; + const struct ethhdr *eth; + + bpf_dynptr_from_skb(ctx, 0, &data); + eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth)); + if (!eth) + goto out; + /* Ignore non-test packets */ + if (eth->h_proto != 0) + goto out; + + /* Expect read-only metadata before unclone */ + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + if (!bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != META_SIZE) + goto out; + + /* Helper write to payload will unclone the packet */ + bpf_dynptr_write(&data, offsetof(struct ethhdr, h_proto), "x", 1, 0); + + /* Expect no metadata after unclone */ + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != 0) + goto out; + + test_pass = true; +out: + return TC_ACT_SHOT; +} + +/* + * Check that skb_meta dynptr is read-only if prog writes to packet + * metadata using dynptr_write helper. Applies only to cloned skbs. + */ +SEC("tc") +int clone_dynptr_rdonly_before_meta_dynptr_write(struct __sk_buff *ctx) +{ + struct bpf_dynptr data, meta; + const struct ethhdr *eth; + + bpf_dynptr_from_skb(ctx, 0, &data); + eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth)); + if (!eth) + goto out; + /* Ignore non-test packets */ + if (eth->h_proto != 0) + goto out; + + /* Expect read-only metadata */ + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + if (!bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != META_SIZE) + goto out; + + /* Metadata write. Expect failure. */ + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + if (bpf_dynptr_write(&meta, 0, "x", 1, 0) != -EINVAL) + goto out; + + test_pass = true; +out: + return TC_ACT_SHOT; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/drivers/net/hds.py b/tools/testing/selftests/drivers/net/hds.py index 7c90a040ce45..a2011474e625 100755 --- a/tools/testing/selftests/drivers/net/hds.py +++ b/tools/testing/selftests/drivers/net/hds.py @@ -3,6 +3,7 @@ import errno import os +from typing import Union from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_raises, KsftSkipEx from lib.py import CmdExitFailure, EthtoolFamily, NlError from lib.py import NetDrvEnv @@ -58,7 +59,39 @@ def get_hds_thresh(cfg, netnl) -> None: if 'hds-thresh' not in rings: raise KsftSkipEx('hds-thresh not supported by device') + +def _hds_reset(cfg, netnl, rings) -> None: + cur = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + + arg = {'header': {'dev-index': cfg.ifindex}} + if cur.get('tcp-data-split') != rings.get('tcp-data-split'): + # Try to reset to "unknown" first, we don't know if the setting + # was the default or user chose it. Default seems more likely. + arg['tcp-data-split'] = "unknown" + netnl.rings_set(arg) + cur = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + if cur['tcp-data-split'] == rings['tcp-data-split']: + del arg['tcp-data-split'] + else: + # Try the explicit setting + arg['tcp-data-split'] = rings['tcp-data-split'] + if cur.get('hds-thresh') != rings.get('hds-thresh'): + arg['hds-thresh'] = rings['hds-thresh'] + if len(arg) > 1: + netnl.rings_set(arg) + + +def _defer_reset_hds(cfg, netnl) -> Union[dict, None]: + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + if 'hds-thresh' in rings or 'tcp-data-split' in rings: + defer(_hds_reset, cfg, netnl, rings) + except NlError as e: + pass + + def set_hds_enable(cfg, netnl) -> None: + _defer_reset_hds(cfg, netnl) try: netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'tcp-data-split': 'enabled'}) except NlError as e: @@ -76,6 +109,7 @@ def set_hds_enable(cfg, netnl) -> None: ksft_eq('enabled', rings['tcp-data-split']) def set_hds_disable(cfg, netnl) -> None: + _defer_reset_hds(cfg, netnl) try: netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'tcp-data-split': 'disabled'}) except NlError as e: @@ -93,6 +127,7 @@ def set_hds_disable(cfg, netnl) -> None: ksft_eq('disabled', rings['tcp-data-split']) def set_hds_thresh_zero(cfg, netnl) -> None: + _defer_reset_hds(cfg, netnl) try: netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': 0}) except NlError as e: @@ -110,6 +145,7 @@ def set_hds_thresh_zero(cfg, netnl) -> None: ksft_eq(0, rings['hds-thresh']) def set_hds_thresh_random(cfg, netnl) -> None: + _defer_reset_hds(cfg, netnl) try: rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) except NlError as e: @@ -140,6 +176,7 @@ def set_hds_thresh_random(cfg, netnl) -> None: ksft_eq(hds_thresh, rings['hds-thresh']) def set_hds_thresh_max(cfg, netnl) -> None: + _defer_reset_hds(cfg, netnl) try: rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) except NlError as e: @@ -157,6 +194,7 @@ def set_hds_thresh_max(cfg, netnl) -> None: ksft_eq(rings['hds-thresh'], rings['hds-thresh-max']) def set_hds_thresh_gt(cfg, netnl) -> None: + _defer_reset_hds(cfg, netnl) try: rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) except NlError as e: @@ -178,6 +216,7 @@ def set_xdp(cfg, netnl) -> None: """ mode = _get_hds_mode(cfg, netnl) if mode == 'enabled': + _defer_reset_hds(cfg, netnl) netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'tcp-data-split': 'unknown'}) diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile index fdc97355588c..5159fd34cb33 100644 --- a/tools/testing/selftests/drivers/net/hw/Makefile +++ b/tools/testing/selftests/drivers/net/hw/Makefile @@ -18,6 +18,7 @@ TEST_PROGS = \ pp_alloc_fail.py \ rss_api.py \ rss_ctx.py \ + rss_flow_label.py \ rss_input_xfrm.py \ tso.py \ xsk_reconfig.py \ diff --git a/tools/testing/selftests/drivers/net/hw/config b/tools/testing/selftests/drivers/net/hw/config index 88ae719e6f8f..e8a06aa1471c 100644 --- a/tools/testing/selftests/drivers/net/hw/config +++ b/tools/testing/selftests/drivers/net/hw/config @@ -1,5 +1,7 @@ +CONFIG_IO_URING=y CONFIG_IPV6=y CONFIG_IPV6_GRE=y CONFIG_NET_IPGRE=y CONFIG_NET_IPGRE_DEMUX=y +CONFIG_UDMABUF=y CONFIG_VXLAN=y diff --git a/tools/testing/selftests/drivers/net/hw/devmem.py b/tools/testing/selftests/drivers/net/hw/devmem.py index baa2f24240ba..45c2d49d55b6 100755 --- a/tools/testing/selftests/drivers/net/hw/devmem.py +++ b/tools/testing/selftests/drivers/net/hw/devmem.py @@ -24,7 +24,7 @@ def check_rx(cfg) -> None: require_devmem(cfg) port = rand_port() - socat = f"socat -u - TCP{cfg.addr_ipver}:{cfg.addr}:{port},bind={cfg.remote_addr}:{port}" + socat = f"socat -u - TCP{cfg.addr_ipver}:{cfg.baddr}:{port},bind={cfg.remote_baddr}:{port}" listen_cmd = f"{cfg.bin_local} -l -f {cfg.ifname} -s {cfg.addr} -p {port} -c {cfg.remote_addr} -v 7" with bkg(listen_cmd, exit_wait=True) as ncdevmem: @@ -42,9 +42,9 @@ def check_tx(cfg) -> None: port = rand_port() listen_cmd = f"socat -U - TCP{cfg.addr_ipver}-LISTEN:{port}" - with bkg(listen_cmd) as socat: - wait_port_listen(port) - cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_remote} -f {cfg.ifname} -s {cfg.addr} -p {port}", host=cfg.remote, shell=True) + with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as socat: + wait_port_listen(port, host=cfg.remote) + cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_local} -f {cfg.ifname} -s {cfg.remote_addr} -p {port}", shell=True) ksft_eq(socat.stdout.strip(), "hello\nworld") @@ -56,9 +56,9 @@ def check_tx_chunks(cfg) -> None: port = rand_port() listen_cmd = f"socat -U - TCP{cfg.addr_ipver}-LISTEN:{port}" - with bkg(listen_cmd, exit_wait=True) as socat: - wait_port_listen(port) - cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_remote} -f {cfg.ifname} -s {cfg.addr} -p {port} -z 3", host=cfg.remote, shell=True) + with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as socat: + wait_port_listen(port, host=cfg.remote) + cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_local} -f {cfg.ifname} -s {cfg.remote_addr} -p {port} -z 3", shell=True) ksft_eq(socat.stdout.strip(), "hello\nworld") diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c index 72f828021f83..8dc9511d046f 100644 --- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c +++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c @@ -39,6 +39,7 @@ #define __EXPORTED_HEADERS__ #include <linux/uio.h> +#include <stdarg.h> #include <stdio.h> #include <stdlib.h> #include <unistd.h> @@ -97,6 +98,10 @@ static unsigned int dmabuf_id; static uint32_t tx_dmabuf_id; static int waittime_ms = 500; +/* System state loaded by current_config_load() */ +#define MAX_FLOWS 8 +static int ntuple_ids[MAX_FLOWS] = { -1, -1, -1, -1, -1, -1, -1, -1, }; + struct memory_buffer { int fd; size_t size; @@ -115,6 +120,21 @@ struct memory_provider { size_t off, int n); }; +static void pr_err(const char *fmt, ...) +{ + va_list args; + + fprintf(stderr, "%s: ", TEST_PREFIX); + + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); + + if (errno != 0) + fprintf(stderr, ": %s", strerror(errno)); + fprintf(stderr, "\n"); +} + static struct memory_buffer *udmabuf_alloc(size_t size) { struct udmabuf_create create; @@ -123,27 +143,33 @@ static struct memory_buffer *udmabuf_alloc(size_t size) ctx = malloc(sizeof(*ctx)); if (!ctx) - error(1, ENOMEM, "malloc failed"); + return NULL; ctx->size = size; ctx->devfd = open("/dev/udmabuf", O_RDWR); - if (ctx->devfd < 0) - error(1, errno, - "%s: [skip,no-udmabuf: Unable to access DMA buffer device file]\n", - TEST_PREFIX); + if (ctx->devfd < 0) { + pr_err("[skip,no-udmabuf: Unable to access DMA buffer device file]"); + goto err_free_ctx; + } ctx->memfd = memfd_create("udmabuf-test", MFD_ALLOW_SEALING); - if (ctx->memfd < 0) - error(1, errno, "%s: [skip,no-memfd]\n", TEST_PREFIX); + if (ctx->memfd < 0) { + pr_err("[skip,no-memfd]"); + goto err_close_dev; + } ret = fcntl(ctx->memfd, F_ADD_SEALS, F_SEAL_SHRINK); - if (ret < 0) - error(1, errno, "%s: [skip,fcntl-add-seals]\n", TEST_PREFIX); + if (ret < 0) { + pr_err("[skip,fcntl-add-seals]"); + goto err_close_memfd; + } ret = ftruncate(ctx->memfd, size); - if (ret == -1) - error(1, errno, "%s: [FAIL,memfd-truncate]\n", TEST_PREFIX); + if (ret == -1) { + pr_err("[FAIL,memfd-truncate]"); + goto err_close_memfd; + } memset(&create, 0, sizeof(create)); @@ -151,15 +177,29 @@ static struct memory_buffer *udmabuf_alloc(size_t size) create.offset = 0; create.size = size; ctx->fd = ioctl(ctx->devfd, UDMABUF_CREATE, &create); - if (ctx->fd < 0) - error(1, errno, "%s: [FAIL, create udmabuf]\n", TEST_PREFIX); + if (ctx->fd < 0) { + pr_err("[FAIL, create udmabuf]"); + goto err_close_fd; + } ctx->buf_mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, ctx->fd, 0); - if (ctx->buf_mem == MAP_FAILED) - error(1, errno, "%s: [FAIL, map udmabuf]\n", TEST_PREFIX); + if (ctx->buf_mem == MAP_FAILED) { + pr_err("[FAIL, map udmabuf]"); + goto err_close_fd; + } return ctx; + +err_close_fd: + close(ctx->fd); +err_close_memfd: + close(ctx->memfd); +err_close_dev: + close(ctx->devfd); +err_free_ctx: + free(ctx); + return NULL; } static void udmabuf_free(struct memory_buffer *ctx) @@ -217,7 +257,7 @@ static void print_nonzero_bytes(void *ptr, size_t size) putchar(p[i]); } -void validate_buffer(void *line, size_t size) +int validate_buffer(void *line, size_t size) { static unsigned char seed = 1; unsigned char *ptr = line; @@ -232,8 +272,10 @@ void validate_buffer(void *line, size_t size) "Failed validation: expected=%u, actual=%u, index=%lu\n", expected, ptr[i], i); errors++; - if (errors > 20) - error(1, 0, "validation failed."); + if (errors > 20) { + pr_err("validation failed"); + return -1; + } } seed++; if (seed == do_validation) @@ -241,6 +283,86 @@ void validate_buffer(void *line, size_t size) } fprintf(stdout, "Validated buffer\n"); + return 0; +} + +static int +__run_command(char *out, size_t outlen, const char *cmd, va_list args) +{ + char command[256]; + FILE *fp; + + vsnprintf(command, sizeof(command), cmd, args); + + fprintf(stderr, "Running: %s\n", command); + fp = popen(command, "r"); + if (!fp) + return -1; + if (out) { + size_t len; + + if (!fgets(out, outlen, fp)) + return -1; + + /* Remove trailing newline if present */ + len = strlen(out); + if (len && out[len - 1] == '\n') + out[len - 1] = '\0'; + } + return pclose(fp); +} + +static int run_command(const char *cmd, ...) +{ + va_list args; + int ret; + + va_start(args, cmd); + ret = __run_command(NULL, 0, cmd, args); + va_end(args); + + return ret; +} + +static int ethtool_add_flow(const char *format, ...) +{ + char local_output[256], cmd[256]; + const char *id_start; + int flow_idx, ret; + char *endptr; + long flow_id; + va_list args; + + for (flow_idx = 0; flow_idx < MAX_FLOWS; flow_idx++) + if (ntuple_ids[flow_idx] == -1) + break; + if (flow_idx == MAX_FLOWS) { + fprintf(stderr, "Error: too many flows\n"); + return -1; + } + + snprintf(cmd, sizeof(cmd), "ethtool -N %s %s", ifname, format); + + va_start(args, format); + ret = __run_command(local_output, sizeof(local_output), cmd, args); + va_end(args); + + if (ret != 0) + return ret; + + /* Extract the ID from the output */ + id_start = strstr(local_output, "Added rule with ID "); + if (!id_start) + return -1; + id_start += strlen("Added rule with ID "); + + flow_id = strtol(id_start, &endptr, 10); + if (endptr == id_start || flow_id < 0 || flow_id > INT_MAX) + return -1; + + fprintf(stderr, "Added flow rule with ID %ld\n", flow_id); + ntuple_ids[flow_idx] = flow_id; + return flow_id; } static int rxq_num(int ifindex) @@ -270,29 +392,17 @@ static int rxq_num(int ifindex) return num; } -#define run_command(cmd, ...) \ - ({ \ - char command[256]; \ - memset(command, 0, sizeof(command)); \ - snprintf(command, sizeof(command), cmd, ##__VA_ARGS__); \ - fprintf(stderr, "Running: %s\n", command); \ - system(command); \ - }) - -static int reset_flow_steering(void) +static void reset_flow_steering(void) { - /* Depending on the NIC, toggling ntuple off and on might not - * be allowed. Additionally, attempting to delete existing filters - * will fail if no filters are present. Therefore, do not enforce - * the exit status. - */ - - run_command("sudo ethtool -K %s ntuple off >&2", ifname); - run_command("sudo ethtool -K %s ntuple on >&2", ifname); - run_command( - "sudo ethtool -n %s | grep 'Filter:' | awk '{print $2}' | xargs -n1 ethtool -N %s delete >&2", - ifname, ifname); - return 0; + int i; + + for (i = 0; i < MAX_FLOWS; i++) { + if (ntuple_ids[i] == -1) + continue; + run_command("ethtool -N %s delete %d", + ifname, ntuple_ids[i]); + ntuple_ids[i] = -1; + } } static const char *tcp_data_split_str(int val) @@ -309,7 +419,81 @@ static const char *tcp_data_split_str(int val) } } -static int configure_headersplit(bool on) +static struct ethtool_rings_get_rsp *get_ring_config(void) +{ + struct ethtool_rings_get_req *get_req; + struct ethtool_rings_get_rsp *get_rsp; + struct ynl_error yerr; + struct ynl_sock *ys; + + ys = ynl_sock_create(&ynl_ethtool_family, &yerr); + if (!ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return NULL; + } + + get_req = ethtool_rings_get_req_alloc(); + ethtool_rings_get_req_set_header_dev_index(get_req, ifindex); + get_rsp = ethtool_rings_get(ys, get_req); + ethtool_rings_get_req_free(get_req); + + ynl_sock_destroy(ys); + + return get_rsp; +} + +static void restore_ring_config(const struct ethtool_rings_get_rsp *config) +{ + struct ethtool_rings_get_req *get_req; + struct ethtool_rings_get_rsp *get_rsp; + struct ethtool_rings_set_req *req; + struct ynl_error yerr; + struct ynl_sock *ys; + int ret; + + if (!config) + return; + + ys = ynl_sock_create(&ynl_ethtool_family, &yerr); + if (!ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return; + } + + req = ethtool_rings_set_req_alloc(); + ethtool_rings_set_req_set_header_dev_index(req, ifindex); + ethtool_rings_set_req_set_tcp_data_split(req, + ETHTOOL_TCP_DATA_SPLIT_UNKNOWN); + if (config->_present.hds_thresh) + ethtool_rings_set_req_set_hds_thresh(req, config->hds_thresh); + + ret = ethtool_rings_set(ys, req); + if (ret < 0) + fprintf(stderr, "YNL restoring HDS cfg: %s\n", ys->err.msg); + + get_req = ethtool_rings_get_req_alloc(); + ethtool_rings_get_req_set_header_dev_index(get_req, ifindex); + get_rsp = ethtool_rings_get(ys, get_req); + ethtool_rings_get_req_free(get_req); + + /* use explicit value if UKNOWN didn't give us the previous */ + if (get_rsp->tcp_data_split != config->tcp_data_split) { + ethtool_rings_set_req_set_tcp_data_split(req, + config->tcp_data_split); + ret = ethtool_rings_set(ys, req); + if (ret < 0) + fprintf(stderr, "YNL restoring expl HDS cfg: %s\n", + ys->err.msg); + } + + ethtool_rings_get_rsp_free(get_rsp); + ethtool_rings_set_req_free(req); + + ynl_sock_destroy(ys); +} + +static int +configure_headersplit(const struct ethtool_rings_get_rsp *old, bool on) { struct ethtool_rings_get_req *get_req; struct ethtool_rings_get_rsp *get_rsp; @@ -326,8 +510,15 @@ static int configure_headersplit(bool on) req = ethtool_rings_set_req_alloc(); ethtool_rings_set_req_set_header_dev_index(req, ifindex); - /* 0 - off, 1 - auto, 2 - on */ - ethtool_rings_set_req_set_tcp_data_split(req, on ? 2 : 0); + if (on) { + ethtool_rings_set_req_set_tcp_data_split(req, + ETHTOOL_TCP_DATA_SPLIT_ENABLED); + if (old->_present.hds_thresh) + ethtool_rings_set_req_set_hds_thresh(req, 0); + } else { + ethtool_rings_set_req_set_tcp_data_split(req, + ETHTOOL_TCP_DATA_SPLIT_UNKNOWN); + } ret = ethtool_rings_set(ys, req); if (ret < 0) fprintf(stderr, "YNL failed: %s\n", ys->err.msg); @@ -351,12 +542,103 @@ static int configure_headersplit(bool on) static int configure_rss(void) { - return run_command("sudo ethtool -X %s equal %d >&2", ifname, start_queue); + return run_command("ethtool -X %s equal %d >&2", ifname, start_queue); +} + +static void reset_rss(void) +{ + run_command("ethtool -X %s default >&2", ifname, start_queue); } -static int configure_channels(unsigned int rx, unsigned int tx) +static int check_changing_channels(unsigned int rx, unsigned int tx) { - return run_command("sudo ethtool -L %s rx %u tx %u", ifname, rx, tx); + struct ethtool_channels_get_req *gchan; + struct ethtool_channels_set_req *schan; + struct ethtool_channels_get_rsp *chan; + struct ynl_error yerr; + struct ynl_sock *ys; + int ret; + + fprintf(stderr, "setting channel count rx:%u tx:%u\n", rx, tx); + + ys = ynl_sock_create(&ynl_ethtool_family, &yerr); + if (!ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return -1; + } + + gchan = ethtool_channels_get_req_alloc(); + if (!gchan) { + ret = -1; + goto exit_close_sock; + } + + ethtool_channels_get_req_set_header_dev_index(gchan, ifindex); + chan = ethtool_channels_get(ys, gchan); + ethtool_channels_get_req_free(gchan); + if (!chan) { + fprintf(stderr, "YNL get channels: %s\n", ys->err.msg); + ret = -1; + goto exit_close_sock; + } + + schan = ethtool_channels_set_req_alloc(); + if (!schan) { + ret = -1; + goto exit_free_chan; + } + + ethtool_channels_set_req_set_header_dev_index(schan, ifindex); + + if (chan->_present.combined_count) { + if (chan->_present.rx_count || chan->_present.tx_count) { + ethtool_channels_set_req_set_rx_count(schan, 0); + ethtool_channels_set_req_set_tx_count(schan, 0); + } + + if (rx == tx) { + ethtool_channels_set_req_set_combined_count(schan, rx); + } else if (rx > tx) { + ethtool_channels_set_req_set_combined_count(schan, tx); + ethtool_channels_set_req_set_rx_count(schan, rx - tx); + } else { + ethtool_channels_set_req_set_combined_count(schan, rx); + ethtool_channels_set_req_set_tx_count(schan, tx - rx); + } + + } else if (chan->_present.rx_count) { + ethtool_channels_set_req_set_rx_count(schan, rx); + ethtool_channels_set_req_set_tx_count(schan, tx); + } else { + fprintf(stderr, "Error: device has neither combined nor rx channels\n"); + ret = -1; + goto exit_free_schan; + } + + ret = ethtool_channels_set(ys, schan); + if (ret) { + fprintf(stderr, "YNL set channels: %s\n", ys->err.msg); + } else { + /* We were expecting a failure, go back to previous settings */ + ethtool_channels_set_req_set_combined_count(schan, + chan->combined_count); + ethtool_channels_set_req_set_rx_count(schan, chan->rx_count); + ethtool_channels_set_req_set_tx_count(schan, chan->tx_count); + + ret = ethtool_channels_set(ys, schan); + if (ret) + fprintf(stderr, "YNL un-setting channels: %s\n", + ys->err.msg); + } + +exit_free_schan: + ethtool_channels_set_req_free(schan); +exit_free_chan: + ethtool_channels_get_rsp_free(chan); +exit_close_sock: + ynl_sock_destroy(ys); + + return ret; } static int configure_flow_steering(struct sockaddr_in6 *server_sin) @@ -364,6 +646,7 @@ static int configure_flow_steering(struct sockaddr_in6 *server_sin) const char *type = "tcp6"; const char *server_addr; char buf[40]; + int flow_id; inet_ntop(AF_INET6, &server_sin->sin6_addr, buf, sizeof(buf)); server_addr = buf; @@ -374,23 +657,22 @@ static int configure_flow_steering(struct sockaddr_in6 *server_sin) } /* Try configure 5-tuple */ - if (run_command("sudo ethtool -N %s flow-type %s %s %s dst-ip %s %s %s dst-port %s queue %d >&2", - ifname, - type, - client_ip ? "src-ip" : "", - client_ip ?: "", - server_addr, - client_ip ? "src-port" : "", - client_ip ? port : "", - port, start_queue)) + flow_id = ethtool_add_flow("flow-type %s %s %s dst-ip %s %s %s dst-port %s queue %d", + type, + client_ip ? "src-ip" : "", + client_ip ?: "", + server_addr, + client_ip ? "src-port" : "", + client_ip ? port : "", + port, start_queue); + if (flow_id < 0) { /* If that fails, try configure 3-tuple */ - if (run_command("sudo ethtool -N %s flow-type %s dst-ip %s dst-port %s queue %d >&2", - ifname, - type, - server_addr, - port, start_queue)) + flow_id = ethtool_add_flow("flow-type %s dst-ip %s dst-port %s queue %d", + type, server_addr, port, start_queue); + if (flow_id < 0) /* If that fails, return error */ return -1; + } return 0; } @@ -405,6 +687,7 @@ static int bind_rx_queue(unsigned int ifindex, unsigned int dmabuf_fd, *ys = ynl_sock_create(&ynl_netdev_family, &yerr); if (!*ys) { + netdev_queue_id_free(queues); fprintf(stderr, "YNL: %s\n", yerr.msg); return -1; } @@ -483,18 +766,24 @@ err_close: return -1; } -static void enable_reuseaddr(int fd) +static int enable_reuseaddr(int fd) { int opt = 1; int ret; ret = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)); - if (ret) - error(1, errno, "%s: [FAIL, SO_REUSEPORT]\n", TEST_PREFIX); + if (ret) { + pr_err("SO_REUSEPORT failed"); + return -1; + } ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)); - if (ret) - error(1, errno, "%s: [FAIL, SO_REUSEADDR]\n", TEST_PREFIX); + if (ret) { + pr_err("SO_REUSEADDR failed"); + return -1; + } + + return 0; } static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6) @@ -537,6 +826,7 @@ static struct netdev_queue_id *create_queues(void) static int do_server(struct memory_buffer *mem) { + struct ethtool_rings_get_rsp *ring_config; char ctrl_data[sizeof(int) * 20000]; size_t non_page_aligned_frags = 0; struct sockaddr_in6 client_addr; @@ -548,54 +838,74 @@ static int do_server(struct memory_buffer *mem) char *tmp_mem = NULL; struct ynl_sock *ys; char iobuf[819200]; + int ret, err = -1; char buffer[256]; int socket_fd; int client_fd; - int ret; ret = parse_address(server_ip, atoi(port), &server_sin); - if (ret < 0) - error(1, 0, "parse server address"); + if (ret < 0) { + pr_err("parse server address"); + return -1; + } - if (reset_flow_steering()) - error(1, 0, "Failed to reset flow steering\n"); + ring_config = get_ring_config(); + if (!ring_config) { + pr_err("Failed to get current ring configuration"); + return -1; + } - if (configure_headersplit(1)) - error(1, 0, "Failed to enable TCP header split\n"); + if (configure_headersplit(ring_config, 1)) { + pr_err("Failed to enable TCP header split"); + goto err_free_ring_config; + } /* Configure RSS to divert all traffic from our devmem queues */ - if (configure_rss()) - error(1, 0, "Failed to configure rss\n"); + if (configure_rss()) { + pr_err("Failed to configure rss"); + goto err_reset_headersplit; + } /* Flow steer our devmem flows to start_queue */ - if (configure_flow_steering(&server_sin)) - error(1, 0, "Failed to configure flow steering\n"); + if (configure_flow_steering(&server_sin)) { + pr_err("Failed to configure flow steering"); + goto err_reset_rss; + } sleep(1); - if (bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) - error(1, 0, "Failed to bind\n"); + if (bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) { + pr_err("Failed to bind"); + goto err_reset_flow_steering; + } tmp_mem = malloc(mem->size); if (!tmp_mem) - error(1, ENOMEM, "malloc failed"); + goto err_unbind; socket_fd = socket(AF_INET6, SOCK_STREAM, 0); - if (socket_fd < 0) - error(1, errno, "%s: [FAIL, create socket]\n", TEST_PREFIX); + if (socket_fd < 0) { + pr_err("Failed to create socket"); + goto err_free_tmp; + } - enable_reuseaddr(socket_fd); + if (enable_reuseaddr(socket_fd)) + goto err_close_socket; fprintf(stderr, "binding to address %s:%d\n", server_ip, ntohs(server_sin.sin6_port)); ret = bind(socket_fd, &server_sin, sizeof(server_sin)); - if (ret) - error(1, errno, "%s: [FAIL, bind]\n", TEST_PREFIX); + if (ret) { + pr_err("Failed to bind"); + goto err_close_socket; + } ret = listen(socket_fd, 1); - if (ret) - error(1, errno, "%s: [FAIL, listen]\n", TEST_PREFIX); + if (ret) { + pr_err("Failed to listen"); + goto err_close_socket; + } client_addr_len = sizeof(client_addr); @@ -604,6 +914,10 @@ static int do_server(struct memory_buffer *mem) fprintf(stderr, "Waiting or connection on %s:%d\n", buffer, ntohs(server_sin.sin6_port)); client_fd = accept(socket_fd, &client_addr, &client_addr_len); + if (client_fd < 0) { + pr_err("Failed to accept"); + goto err_close_socket; + } inet_ntop(AF_INET6, &client_addr.sin6_addr, buffer, sizeof(buffer)); @@ -634,7 +948,8 @@ static int do_server(struct memory_buffer *mem) continue; } if (ret == 0) { - fprintf(stderr, "client exited\n"); + errno = 0; + pr_err("client exited"); goto cleanup; } @@ -672,9 +987,10 @@ static int do_server(struct memory_buffer *mem) dmabuf_cmsg->frag_size, dmabuf_cmsg->frag_token, total_received, dmabuf_cmsg->dmabuf_id); - if (dmabuf_cmsg->dmabuf_id != dmabuf_id) - error(1, 0, - "received on wrong dmabuf_id: flow steering error\n"); + if (dmabuf_cmsg->dmabuf_id != dmabuf_id) { + pr_err("received on wrong dmabuf_id: flow steering error"); + goto err_close_client; + } if (dmabuf_cmsg->frag_size % getpagesize()) non_page_aligned_frags++; @@ -685,22 +1001,27 @@ static int do_server(struct memory_buffer *mem) dmabuf_cmsg->frag_offset, dmabuf_cmsg->frag_size); - if (do_validation) - validate_buffer(tmp_mem, - dmabuf_cmsg->frag_size); - else + if (do_validation) { + if (validate_buffer(tmp_mem, + dmabuf_cmsg->frag_size)) + goto err_close_client; + } else { print_nonzero_bytes(tmp_mem, dmabuf_cmsg->frag_size); + } ret = setsockopt(client_fd, SOL_SOCKET, SO_DEVMEM_DONTNEED, &token, sizeof(token)); - if (ret != 1) - error(1, 0, - "SO_DEVMEM_DONTNEED not enough tokens"); + if (ret != 1) { + pr_err("SO_DEVMEM_DONTNEED not enough tokens"); + goto err_close_client; + } + } + if (!is_devmem) { + pr_err("flow steering error"); + goto err_close_client; } - if (!is_devmem) - error(1, 0, "flow steering error\n"); fprintf(stderr, "total_received=%lu\n", total_received); } @@ -711,54 +1032,121 @@ static int do_server(struct memory_buffer *mem) page_aligned_frags, non_page_aligned_frags); cleanup: + err = 0; - free(tmp_mem); +err_close_client: close(client_fd); +err_close_socket: close(socket_fd); +err_free_tmp: + free(tmp_mem); +err_unbind: ynl_sock_destroy(ys); - - return 0; +err_reset_flow_steering: + reset_flow_steering(); +err_reset_rss: + reset_rss(); +err_reset_headersplit: + restore_ring_config(ring_config); +err_free_ring_config: + ethtool_rings_get_rsp_free(ring_config); + return err; } -void run_devmem_tests(void) +int run_devmem_tests(void) { + struct ethtool_rings_get_rsp *ring_config; + struct netdev_queue_id *queues; struct memory_buffer *mem; struct ynl_sock *ys; + int err = -1; mem = provider->alloc(getpagesize() * NUM_PAGES); + if (!mem) { + pr_err("Failed to allocate memory buffer"); + return -1; + } + + ring_config = get_ring_config(); + if (!ring_config) { + pr_err("Failed to get current ring configuration"); + goto err_free_mem; + } /* Configure RSS to divert all traffic from our devmem queues */ - if (configure_rss()) - error(1, 0, "rss error\n"); + if (configure_rss()) { + pr_err("rss error"); + goto err_free_ring_config; + } - if (configure_headersplit(1)) - error(1, 0, "Failed to configure header split\n"); + if (configure_headersplit(ring_config, 1)) { + pr_err("Failed to configure header split"); + goto err_reset_rss; + } - if (!bind_rx_queue(ifindex, mem->fd, - calloc(num_queues, sizeof(struct netdev_queue_id)), - num_queues, &ys)) - error(1, 0, "Binding empty queues array should have failed\n"); + queues = netdev_queue_id_alloc(num_queues); + if (!queues) { + pr_err("Failed to allocate empty queues array"); + goto err_reset_headersplit; + } - if (configure_headersplit(0)) - error(1, 0, "Failed to configure header split\n"); + if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) { + pr_err("Binding empty queues array should have failed"); + goto err_unbind; + } - if (!bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) - error(1, 0, "Configure dmabuf with header split off should have failed\n"); + if (configure_headersplit(ring_config, 0)) { + pr_err("Failed to configure header split"); + goto err_reset_headersplit; + } - if (configure_headersplit(1)) - error(1, 0, "Failed to configure header split\n"); + queues = create_queues(); + if (!queues) { + pr_err("Failed to create queues"); + goto err_reset_headersplit; + } - if (bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) - error(1, 0, "Failed to bind\n"); + if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) { + pr_err("Configure dmabuf with header split off should have failed"); + goto err_unbind; + } + + if (configure_headersplit(ring_config, 1)) { + pr_err("Failed to configure header split"); + goto err_reset_headersplit; + } + + queues = create_queues(); + if (!queues) { + pr_err("Failed to create queues"); + goto err_reset_headersplit; + } + + if (bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) { + pr_err("Failed to bind"); + goto err_reset_headersplit; + } /* Deactivating a bound queue should not be legal */ - if (!configure_channels(num_queues, num_queues - 1)) - error(1, 0, "Deactivating a bound queue should be illegal.\n"); + if (!check_changing_channels(num_queues, num_queues)) { + pr_err("Deactivating a bound queue should be illegal"); + goto err_unbind; + } - /* Closing the netlink socket does an implicit unbind */ - ynl_sock_destroy(ys); + err = 0; + goto err_unbind; +err_unbind: + ynl_sock_destroy(ys); +err_reset_headersplit: + restore_ring_config(ring_config); +err_reset_rss: + reset_rss(); +err_free_ring_config: + ethtool_rings_get_rsp_free(ring_config); +err_free_mem: provider->free(mem); + return err; } static uint64_t gettimeofday_ms(void) @@ -778,13 +1166,15 @@ static int do_poll(int fd) pfd.fd = fd; ret = poll(&pfd, 1, waittime_ms); - if (ret == -1) - error(1, errno, "poll"); + if (ret == -1) { + pr_err("poll"); + return -1; + } return ret && (pfd.revents & POLLERR); } -static void wait_compl(int fd) +static int wait_compl(int fd) { int64_t tstop = gettimeofday_ms() + waittime_ms; char control[CMSG_SPACE(100)] = {}; @@ -798,18 +1188,23 @@ static void wait_compl(int fd) msg.msg_controllen = sizeof(control); while (gettimeofday_ms() < tstop) { - if (!do_poll(fd)) + ret = do_poll(fd); + if (ret < 0) + return ret; + if (!ret) continue; ret = recvmsg(fd, &msg, MSG_ERRQUEUE); if (ret < 0) { if (errno == EAGAIN) continue; - error(1, errno, "recvmsg(MSG_ERRQUEUE)"); - return; + pr_err("recvmsg(MSG_ERRQUEUE)"); + return -1; + } + if (msg.msg_flags & MSG_CTRUNC) { + pr_err("MSG_CTRUNC"); + return -1; } - if (msg.msg_flags & MSG_CTRUNC) - error(1, 0, "MSG_CTRUNC\n"); for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) { if (cm->cmsg_level != SOL_IP && @@ -823,20 +1218,25 @@ static void wait_compl(int fd) continue; serr = (void *)CMSG_DATA(cm); - if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) - error(1, 0, "wrong origin %u", serr->ee_origin); - if (serr->ee_errno != 0) - error(1, 0, "wrong errno %d", serr->ee_errno); + if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) { + pr_err("wrong origin %u", serr->ee_origin); + return -1; + } + if (serr->ee_errno != 0) { + pr_err("wrong errno %d", serr->ee_errno); + return -1; + } hi = serr->ee_data; lo = serr->ee_info; fprintf(stderr, "tx complete [%d,%d]\n", lo, hi); - return; + return 0; } } - error(1, 0, "did not receive tx completion"); + pr_err("did not receive tx completion"); + return -1; } static int do_client(struct memory_buffer *mem) @@ -850,50 +1250,69 @@ static int do_client(struct memory_buffer *mem) ssize_t line_size = 0; struct cmsghdr *cmsg; char *line = NULL; + int ret, err = -1; size_t len = 0; int socket_fd; __u32 ddmabuf; int opt = 1; - int ret; ret = parse_address(server_ip, atoi(port), &server_sin); - if (ret < 0) - error(1, 0, "parse server address"); + if (ret < 0) { + pr_err("parse server address"); + return -1; + } + + if (client_ip) { + ret = parse_address(client_ip, atoi(port), &client_sin); + if (ret < 0) { + pr_err("parse client address"); + return ret; + } + } socket_fd = socket(AF_INET6, SOCK_STREAM, 0); - if (socket_fd < 0) - error(1, socket_fd, "create socket"); + if (socket_fd < 0) { + pr_err("create socket"); + return -1; + } - enable_reuseaddr(socket_fd); + if (enable_reuseaddr(socket_fd)) + goto err_close_socket; ret = setsockopt(socket_fd, SOL_SOCKET, SO_BINDTODEVICE, ifname, strlen(ifname) + 1); - if (ret) - error(1, errno, "bindtodevice"); + if (ret) { + pr_err("bindtodevice"); + goto err_close_socket; + } - if (bind_tx_queue(ifindex, mem->fd, &ys)) - error(1, 0, "Failed to bind\n"); + if (bind_tx_queue(ifindex, mem->fd, &ys)) { + pr_err("Failed to bind"); + goto err_close_socket; + } if (client_ip) { - ret = parse_address(client_ip, atoi(port), &client_sin); - if (ret < 0) - error(1, 0, "parse client address"); - ret = bind(socket_fd, &client_sin, sizeof(client_sin)); - if (ret) - error(1, errno, "bind"); + if (ret) { + pr_err("bind"); + goto err_unbind; + } } ret = setsockopt(socket_fd, SOL_SOCKET, SO_ZEROCOPY, &opt, sizeof(opt)); - if (ret) - error(1, errno, "set sock opt"); + if (ret) { + pr_err("set sock opt"); + goto err_unbind; + } fprintf(stderr, "Connect to %s %d (via %s)\n", server_ip, ntohs(server_sin.sin6_port), ifname); ret = connect(socket_fd, &server_sin, sizeof(server_sin)); - if (ret) - error(1, errno, "connect"); + if (ret) { + pr_err("connect"); + goto err_unbind; + } while (1) { free(line); @@ -906,10 +1325,11 @@ static int do_client(struct memory_buffer *mem) if (max_chunk) { msg.msg_iovlen = (line_size + max_chunk - 1) / max_chunk; - if (msg.msg_iovlen > MAX_IOV) - error(1, 0, - "can't partition %zd bytes into maximum of %d chunks", - line_size, MAX_IOV); + if (msg.msg_iovlen > MAX_IOV) { + pr_err("can't partition %zd bytes into maximum of %d chunks", + line_size, MAX_IOV); + goto err_free_line; + } for (int i = 0; i < msg.msg_iovlen; i++) { iov[i].iov_base = (void *)(i * max_chunk); @@ -940,34 +1360,40 @@ static int do_client(struct memory_buffer *mem) *((__u32 *)CMSG_DATA(cmsg)) = ddmabuf; ret = sendmsg(socket_fd, &msg, MSG_ZEROCOPY); - if (ret < 0) - error(1, errno, "Failed sendmsg"); + if (ret < 0) { + pr_err("Failed sendmsg"); + goto err_free_line; + } fprintf(stderr, "sendmsg_ret=%d\n", ret); - if (ret != line_size) - error(1, errno, "Did not send all bytes %d vs %zd", ret, - line_size); + if (ret != line_size) { + pr_err("Did not send all bytes %d vs %zd", ret, line_size); + goto err_free_line; + } - wait_compl(socket_fd); + if (wait_compl(socket_fd)) + goto err_free_line; } fprintf(stderr, "%s: tx ok\n", TEST_PREFIX); + err = 0; + +err_free_line: free(line); +err_unbind: + ynl_sock_destroy(ys); +err_close_socket: close(socket_fd); - - if (ys) - ynl_sock_destroy(ys); - - return 0; + return err; } int main(int argc, char *argv[]) { struct memory_buffer *mem; int is_server = 0, opt; - int ret; + int ret, err = 1; while ((opt = getopt(argc, argv, "ls:c:p:v:q:t:f:z:")) != -1) { switch (opt) { @@ -1004,8 +1430,10 @@ int main(int argc, char *argv[]) } } - if (!ifname) - error(1, 0, "Missing -f argument\n"); + if (!ifname) { + pr_err("Missing -f argument"); + return 1; + } ifindex = if_nametoindex(ifname); @@ -1014,33 +1442,41 @@ int main(int argc, char *argv[]) if (!server_ip && !client_ip) { if (start_queue < 0 && num_queues < 0) { num_queues = rxq_num(ifindex); - if (num_queues < 0) - error(1, 0, "couldn't detect number of queues\n"); - if (num_queues < 2) - error(1, 0, - "number of device queues is too low\n"); + if (num_queues < 0) { + pr_err("couldn't detect number of queues"); + return 1; + } + if (num_queues < 2) { + pr_err("number of device queues is too low"); + return 1; + } /* make sure can bind to multiple queues */ start_queue = num_queues / 2; num_queues /= 2; } - if (start_queue < 0 || num_queues < 0) - error(1, 0, "Both -t and -q are required\n"); + if (start_queue < 0 || num_queues < 0) { + pr_err("Both -t and -q are required"); + return 1; + } - run_devmem_tests(); - return 0; + return run_devmem_tests(); } if (start_queue < 0 && num_queues < 0) { num_queues = rxq_num(ifindex); - if (num_queues < 2) - error(1, 0, "number of device queues is too low\n"); + if (num_queues < 2) { + pr_err("number of device queues is too low"); + return 1; + } num_queues = 1; start_queue = rxq_num(ifindex) - num_queues; - if (start_queue < 0) - error(1, 0, "couldn't detect number of queues\n"); + if (start_queue < 0) { + pr_err("couldn't detect number of queues"); + return 1; + } fprintf(stderr, "using queues %d..%d\n", start_queue, start_queue + num_queues); } @@ -1048,21 +1484,39 @@ int main(int argc, char *argv[]) for (; optind < argc; optind++) fprintf(stderr, "extra arguments: %s\n", argv[optind]); - if (start_queue < 0) - error(1, 0, "Missing -t argument\n"); + if (start_queue < 0) { + pr_err("Missing -t argument"); + return 1; + } - if (num_queues < 0) - error(1, 0, "Missing -q argument\n"); + if (num_queues < 0) { + pr_err("Missing -q argument"); + return 1; + } - if (!server_ip) - error(1, 0, "Missing -s argument\n"); + if (!server_ip) { + pr_err("Missing -s argument"); + return 1; + } - if (!port) - error(1, 0, "Missing -p argument\n"); + if (!port) { + pr_err("Missing -p argument"); + return 1; + } mem = provider->alloc(getpagesize() * NUM_PAGES); + if (!mem) { + pr_err("Failed to allocate memory buffer"); + return 1; + } + ret = is_server ? do_server(mem) : do_client(mem); - provider->free(mem); + if (ret) + goto err_free_mem; - return ret; + err = 0; + +err_free_mem: + provider->free(mem); + return err; } diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py index 7bb552f8b182..ed7e405682f0 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py +++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py @@ -118,7 +118,7 @@ def test_rss_key_indir(cfg): qcnt = len(_get_rx_cnts(cfg)) if qcnt < 3: - KsftSkipEx("Device has fewer than 3 queues (or doesn't support queue stats)") + raise KsftSkipEx("Device has fewer than 3 queues (or doesn't support queue stats)") data = get_rss(cfg) want_keys = ['rss-hash-key', 'rss-hash-function', 'rss-indirection-table'] @@ -178,8 +178,13 @@ def test_rss_key_indir(cfg): cnts = _get_rx_cnts(cfg) GenerateTraffic(cfg).wait_pkts_and_stop(20000) cnts = _get_rx_cnts(cfg, prev=cnts) - # First two queues get less traffic than all the rest - ksft_lt(sum(cnts[:2]), sum(cnts[2:]), "traffic distributed: " + str(cnts)) + if qcnt > 4: + # First two queues get less traffic than all the rest + ksft_lt(sum(cnts[:2]), sum(cnts[2:]), + "traffic distributed: " + str(cnts)) + else: + # When queue count is low make sure third queue got significant pkts + ksft_ge(cnts[2], 3500, "traffic distributed: " + str(cnts)) def test_rss_queue_reconfigure(cfg, main_ctx=True): @@ -335,19 +340,20 @@ def test_hitless_key_update(cfg): data = get_rss(cfg) key_len = len(data['rss-hash-key']) - key = _rss_key_rand(key_len) + ethnl = EthtoolFamily() + key = random.randbytes(key_len) tgen = GenerateTraffic(cfg) try: errors0, carrier0 = get_drop_err_sum(cfg) t0 = datetime.datetime.now() - ethtool(f"-X {cfg.ifname} hkey " + _rss_key_str(key)) + ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, "hkey": key}) t1 = datetime.datetime.now() errors1, carrier1 = get_drop_err_sum(cfg) finally: tgen.wait_pkts_and_stop(5000) - ksft_lt((t1 - t0).total_seconds(), 0.2) + ksft_lt((t1 - t0).total_seconds(), 0.15) ksft_eq(errors1 - errors1, 0) ksft_eq(carrier1 - carrier0, 0) diff --git a/tools/testing/selftests/drivers/net/hw/rss_flow_label.py b/tools/testing/selftests/drivers/net/hw/rss_flow_label.py new file mode 100755 index 000000000000..6fa95fe27c47 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/rss_flow_label.py @@ -0,0 +1,167 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +Tests for RSS hashing on IPv6 Flow Label. +""" + +import glob +import os +import socket +from lib.py import CmdExitFailure +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge, ksft_in, \ + ksft_not_in, ksft_raises, KsftSkipEx +from lib.py import bkg, cmd, defer, fd_read_timeout, rand_port +from lib.py import NetDrvEpEnv + + +def _check_system(cfg): + if not hasattr(socket, "SO_INCOMING_CPU"): + raise KsftSkipEx("socket.SO_INCOMING_CPU was added in Python 3.11") + + qcnt = len(glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*")) + if qcnt < 2: + raise KsftSkipEx(f"Local has only {qcnt} queues") + + for f in [f"/sys/class/net/{cfg.ifname}/queues/rx-0/rps_flow_cnt", + f"/sys/class/net/{cfg.ifname}/queues/rx-0/rps_cpus"]: + try: + with open(f, 'r') as fp: + setting = fp.read().strip() + # CPU mask will be zeros and commas + if setting.replace("0", "").replace(",", ""): + raise KsftSkipEx(f"RPS/RFS is configured: {f}: {setting}") + except FileNotFoundError: + pass + + # 1 is the default, if someone changed it we probably shouldn"t mess with it + af = cmd("cat /proc/sys/net/ipv6/auto_flowlabels", host=cfg.remote).stdout + if af.strip() != "1": + raise KsftSkipEx("Remote does not have auto_flowlabels enabled") + + +def _ethtool_get_cfg(cfg, fl_type): + descr = cmd(f"ethtool -n {cfg.ifname} rx-flow-hash {fl_type}").stdout + + converter = { + "IP SA": "s", + "IP DA": "d", + "L3 proto": "t", + "L4 bytes 0 & 1 [TCP/UDP src port]": "f", + "L4 bytes 2 & 3 [TCP/UDP dst port]": "n", + "IPv6 Flow Label": "l", + } + + ret = "" + for line in descr.split("\n")[1:-2]: + # if this raises we probably need to add more keys to converter above + ret += converter[line] + return ret + + +def _traffic(cfg, one_sock, one_cpu): + local_port = rand_port(socket.SOCK_DGRAM) + remote_port = rand_port(socket.SOCK_DGRAM) + + sock = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM) + sock.bind(("", local_port)) + sock.connect((cfg.remote_addr_v["6"], 0)) + if one_sock: + send = f"exec 5<>/dev/udp/{cfg.addr_v['6']}/{local_port}; " \ + "for i in `seq 20`; do echo a >&5; sleep 0.02; done; exec 5>&-" + else: + send = "for i in `seq 20`; do echo a | socat -t0.02 - UDP6:" \ + f"[{cfg.addr_v['6']}]:{local_port},sourceport={remote_port}; done" + + cpus = set() + with bkg(send, shell=True, host=cfg.remote, exit_wait=True): + for _ in range(20): + fd_read_timeout(sock.fileno(), 1) + cpu = sock.getsockopt(socket.SOL_SOCKET, socket.SO_INCOMING_CPU) + cpus.add(cpu) + + if one_cpu: + ksft_eq(len(cpus), 1, + f"{one_sock=} - expected one CPU, got traffic on: {cpus=}") + else: + ksft_ge(len(cpus), 2, + f"{one_sock=} - expected many CPUs, got traffic on: {cpus=}") + + +def test_rss_flow_label(cfg): + """ + Test hashing on IPv6 flow label. Send traffic over a single socket + and over multiple sockets. Depend on the remote having auto-label + enabled so that it randomizes the label per socket. + """ + + cfg.require_ipver("6") + cfg.require_cmd("socat", remote=True) + _check_system(cfg) + + # Enable flow label hashing for UDP6 + initial = _ethtool_get_cfg(cfg, "udp6") + no_lbl = initial.replace("l", "") + if "l" not in initial: + try: + cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 l{no_lbl}") + except CmdExitFailure as exc: + raise KsftSkipEx("Device doesn't support Flow Label for UDP6") from exc + + defer(cmd, f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {initial}") + + _traffic(cfg, one_sock=True, one_cpu=True) + _traffic(cfg, one_sock=False, one_cpu=False) + + # Disable it, we should see no hashing (reset was already defer()ed) + cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {no_lbl}") + + _traffic(cfg, one_sock=False, one_cpu=True) + + +def _check_v4_flow_types(cfg): + for fl_type in ["tcp4", "udp4", "ah4", "esp4", "sctp4"]: + try: + cur = cmd(f"ethtool -n {cfg.ifname} rx-flow-hash {fl_type}").stdout + ksft_not_in("Flow Label", cur, + comment=f"{fl_type=} has Flow Label:" + cur) + except CmdExitFailure: + # Probably does not support this flow type + pass + + +def test_rss_flow_label_6only(cfg): + """ + Test interactions with IPv4 flow types. It should not be possible to set + IPv6 Flow Label hashing for an IPv4 flow type. The Flow Label should also + not appear in the IPv4 "current config". + """ + + with ksft_raises(CmdExitFailure) as cm: + cmd(f"ethtool -N {cfg.ifname} rx-flow-hash tcp4 sdfnl") + ksft_in("Invalid argument", cm.exception.cmd.stderr) + + _check_v4_flow_types(cfg) + + # Try to enable Flow Labels and check again, in case it leaks thru + initial = _ethtool_get_cfg(cfg, "udp6") + changed = initial.replace("l", "") if "l" in initial else initial + "l" + + cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {changed}") + restore = defer(cmd, f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {initial}") + + _check_v4_flow_types(cfg) + restore.exec() + _check_v4_flow_types(cfg) + + +def main() -> None: + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + ksft_run([test_rss_flow_label, + test_rss_flow_label_6only], + args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/tso.py b/tools/testing/selftests/drivers/net/hw/tso.py index c13dd5efa27a..0998e68ebaf0 100755 --- a/tools/testing/selftests/drivers/net/hw/tso.py +++ b/tools/testing/selftests/drivers/net/hw/tso.py @@ -60,16 +60,17 @@ def run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso): sock_wait_drain(sock) qstat_new = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] - # No math behind the 10 here, but try to catch cases where - # TCP falls back to non-LSO. - ksft_lt(tcp_sock_get_retrans(sock), 10) - sock.close() - # Check that at least 90% of the data was sent as LSO packets. # System noise may cause false negatives. Also header overheads # will add up to 5% of extra packes... The check is best effort. total_lso_wire = len(buf) * 0.90 // cfg.dev["mtu"] total_lso_super = len(buf) * 0.90 // cfg.dev["tso_max_size"] + + # Make sure we have order of magnitude more LSO packets than + # retransmits, in case TCP retransmitted all the LSO packets. + ksft_lt(tcp_sock_get_retrans(sock), total_lso_wire / 4) + sock.close() + if should_lso: if cfg.have_stat_super_count: ksft_ge(qstat_new['tx-hw-gso-packets'] - diff --git a/tools/testing/selftests/drivers/net/lib/py/__init__.py b/tools/testing/selftests/drivers/net/lib/py/__init__.py index 8711c67ad658..a07b56a75c8a 100644 --- a/tools/testing/selftests/drivers/net/lib/py/__init__.py +++ b/tools/testing/selftests/drivers/net/lib/py/__init__.py @@ -15,7 +15,7 @@ try: NlError, RtnlFamily, DevlinkFamily from net.lib.py import CmdExitFailure from net.lib.py import bkg, cmd, bpftool, bpftrace, defer, ethtool, \ - fd_read_timeout, ip, rand_port, tool, wait_port_listen + fd_read_timeout, ip, rand_port, tool, wait_port_listen, wait_file from net.lib.py import fd_read_timeout from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \ diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py index 1b8bd648048f..c1f3b608c6d8 100644 --- a/tools/testing/selftests/drivers/net/lib/py/env.py +++ b/tools/testing/selftests/drivers/net/lib/py/env.py @@ -4,7 +4,7 @@ import os import time from pathlib import Path from lib.py import KsftSkipEx, KsftXfailEx -from lib.py import ksft_setup +from lib.py import ksft_setup, wait_file from lib.py import cmd, ethtool, ip, CmdExitFailure from lib.py import NetNS, NetdevSimDev from .remote import Remote @@ -25,6 +25,9 @@ class NetDrvEnvBase: self.env = self._load_env_file() + # Following attrs must be set be inheriting classes + self.dev = None + def _load_env_file(self): env = os.environ.copy() @@ -48,6 +51,22 @@ class NetDrvEnvBase: env[pair[0]] = pair[1] return ksft_setup(env) + def __del__(self): + pass + + def __enter__(self): + ip(f"link set dev {self.dev['ifname']} up") + wait_file(f"/sys/class/net/{self.dev['ifname']}/carrier", + lambda x: x.strip() == "1") + + return self + + def __exit__(self, ex_type, ex_value, ex_tb): + """ + __exit__ gets called at the end of a "with" block. + """ + self.__del__() + class NetDrvEnv(NetDrvEnvBase): """ @@ -72,17 +91,6 @@ class NetDrvEnv(NetDrvEnvBase): self.ifname = self.dev['ifname'] self.ifindex = self.dev['ifindex'] - def __enter__(self): - ip(f"link set dev {self.dev['ifname']} up") - - return self - - def __exit__(self, ex_type, ex_value, ex_tb): - """ - __exit__ gets called at the end of a "with" block. - """ - self.__del__() - def __del__(self): if self._ns: self._ns.remove() @@ -219,15 +227,6 @@ class NetDrvEpEnv(NetDrvEnvBase): raise Exception("Can't resolve remote interface name, multiple interfaces match") return v6[0]["ifname"] if v6 else v4[0]["ifname"] - def __enter__(self): - return self - - def __exit__(self, ex_type, ex_value, ex_tb): - """ - __exit__ gets called at the end of a "with" block. - """ - self.__del__() - def __del__(self): if self._ns: self._ns.remove() diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh index b6071e80ebbb..8e1085e89647 100644 --- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh +++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh @@ -148,12 +148,20 @@ function create_dynamic_target() { # Generate the command line argument for netconsole following: # netconsole=[+][src-port]@[src-ip]/[<dev>],[tgt-port]@<tgt-ip>/[tgt-macaddr] function create_cmdline_str() { + local BINDMODE=${1:-"ifname"} + if [ "${BINDMODE}" == "ifname" ] + then + SRCDEV=${SRCIF} + else + SRCDEV=$(mac_get "${SRCIF}") + fi + DSTMAC=$(ip netns exec "${NAMESPACE}" \ ip link show "${DSTIF}" | awk '/ether/ {print $2}') SRCPORT="1514" TGTPORT="6666" - echo "netconsole=\"+${SRCPORT}@${SRCIP}/${SRCIF},${TGTPORT}@${DSTIP}/${DSTMAC}\"" + echo "netconsole=\"+${SRCPORT}@${SRCIP}/${SRCDEV},${TGTPORT}@${DSTIP}/${DSTMAC}\"" } # Do not append the release to the header of the message diff --git a/tools/testing/selftests/drivers/net/napi_threaded.py b/tools/testing/selftests/drivers/net/napi_threaded.py index 9699a100a87d..f4be72b2145a 100755 --- a/tools/testing/selftests/drivers/net/napi_threaded.py +++ b/tools/testing/selftests/drivers/net/napi_threaded.py @@ -24,7 +24,8 @@ def _assert_napi_threaded_disabled(nl, napi_id) -> None: def _set_threaded_state(cfg, threaded) -> None: - cmd(f"echo {threaded} > /sys/class/net/{cfg.ifname}/threaded") + with open(f"/sys/class/net/{cfg.ifname}/threaded", "wb") as fp: + fp.write(str(threaded).encode('utf-8')) def _setup_deferred_cleanup(cfg) -> None: @@ -38,6 +39,34 @@ def _setup_deferred_cleanup(cfg) -> None: return combined +def napi_init(cfg, nl) -> None: + """ + Test that threaded state (in the persistent NAPI config) gets updated + even when NAPI with given ID is not allocated at the time. + """ + + qcnt = _setup_deferred_cleanup(cfg) + + _set_threaded_state(cfg, 1) + cmd(f"ethtool -L {cfg.ifname} combined 1") + _set_threaded_state(cfg, 0) + cmd(f"ethtool -L {cfg.ifname} combined {qcnt}") + + napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True) + for napi in napis: + ksft_eq(napi['threaded'], 'disabled') + ksft_eq(napi.get('pid'), None) + + cmd(f"ethtool -L {cfg.ifname} combined 1") + _set_threaded_state(cfg, 1) + cmd(f"ethtool -L {cfg.ifname} combined {qcnt}") + + napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True) + for napi in napis: + ksft_eq(napi['threaded'], 'enabled') + ksft_ne(napi.get('pid'), None) + + def enable_dev_threaded_disable_napi_threaded(cfg, nl) -> None: """ Test that when napi threaded is enabled at device level and @@ -103,7 +132,8 @@ def main() -> None: """ Ksft boiler plate main """ with NetDrvEnv(__file__, queue_count=2) as cfg: - ksft_run([change_num_queues, + ksft_run([napi_init, + change_num_queues, enable_dev_threaded_disable_napi_threaded], args=(cfg, NetdevFamily())) ksft_exit() diff --git a/tools/testing/selftests/drivers/net/netcons_cmdline.sh b/tools/testing/selftests/drivers/net/netcons_cmdline.sh index ad2fb8b1c463..d1d23dc67f99 100755 --- a/tools/testing/selftests/drivers/net/netcons_cmdline.sh +++ b/tools/testing/selftests/drivers/net/netcons_cmdline.sh @@ -19,9 +19,6 @@ check_netconsole_module modprobe netdevsim 2> /dev/null || true rmmod netconsole 2> /dev/null || true -# The content of kmsg will be save to the following file -OUTPUT_FILE="/tmp/${TARGET}" - # Check for basic system dependency and exit if not found # check_for_dependencies # Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5) @@ -30,23 +27,39 @@ echo "6 5" > /proc/sys/kernel/printk trap do_cleanup EXIT # Create one namespace and two interfaces set_network -# Create the command line for netconsole, with the configuration from the -# function above -CMDLINE="$(create_cmdline_str)" - -# Load the module, with the cmdline set -modprobe netconsole "${CMDLINE}" - -# Listed for netconsole port inside the namespace and destination interface -listen_port_and_save_to "${OUTPUT_FILE}" & -# Wait for socat to start and listen to the port. -wait_local_port_listen "${NAMESPACE}" "${PORT}" udp -# Send the message -echo "${MSG}: ${TARGET}" > /dev/kmsg -# Wait until socat saves the file to disk -busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" -# Make sure the message was received in the dst part -# and exit -validate_msg "${OUTPUT_FILE}" + +# Run the test twice, with different cmdline parameters +for BINDMODE in "ifname" "mac" +do + echo "Running with bind mode: ${BINDMODE}" >&2 + # Create the command line for netconsole, with the configuration from + # the function above + CMDLINE=$(create_cmdline_str "${BINDMODE}") + + # The content of kmsg will be save to the following file + OUTPUT_FILE="/tmp/${TARGET}-${BINDMODE}" + + # Load the module, with the cmdline set + modprobe netconsole "${CMDLINE}" + + # Listed for netconsole port inside the namespace and destination + # interface + listen_port_and_save_to "${OUTPUT_FILE}" & + # Wait for socat to start and listen to the port. + wait_local_port_listen "${NAMESPACE}" "${PORT}" udp + # Send the message + echo "${MSG}: ${TARGET}" > /dev/kmsg + # Wait until socat saves the file to disk + busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" + # Make sure the message was received in the dst part + # and exit + validate_msg "${OUTPUT_FILE}" + + # kill socat in case it is still running + pkill_socat + # Unload the module + rmmod netconsole + echo "${BINDMODE} : Test passed" >&2 +done exit "${ksft_pass}" diff --git a/tools/testing/selftests/drivers/net/xdp.py b/tools/testing/selftests/drivers/net/xdp.py index 1dd8bf3bf6c9..08fea4230759 100755 --- a/tools/testing/selftests/drivers/net/xdp.py +++ b/tools/testing/selftests/drivers/net/xdp.py @@ -112,10 +112,10 @@ def _load_xdp_prog(cfg, bpf_info): defer(ip, f"link set dev {cfg.remote_ifname} mtu 1500", host=cfg.remote) cmd( - f"ip link set dev {cfg.ifname} mtu {bpf_info.mtu} xdp obj {abs_path} sec {bpf_info.xdp_sec}", + f"ip link set dev {cfg.ifname} mtu {bpf_info.mtu} xdpdrv obj {abs_path} sec {bpf_info.xdp_sec}", shell=True ) - defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off") + defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdpdrv off") xdp_info = ip(f"-d link show dev {cfg.ifname}", json=True)[0] prog_info["id"] = xdp_info["xdp"]["prog"]["id"] @@ -290,34 +290,78 @@ def test_xdp_native_drop_mb(cfg): _test_drop(cfg, bpf_info, 8000) -def test_xdp_native_tx_mb(cfg): +def _test_xdp_native_tx(cfg, bpf_info, payload_lens): """ - Tests the XDP_TX action for a multi-buff case. + Tests the XDP_TX action. Args: cfg: Configuration object containing network settings. + bpf_info: BPFProgInfo object containing the BPF program metadata. + payload_lens: Array of packet lengths to send. """ cfg.require_cmd("socat", remote=True) - - bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000) prog_info = _load_xdp_prog(cfg, bpf_info) port = rand_port() _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.TX.value) _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port) - test_string = ''.join(random.choice(string.ascii_lowercase) for _ in range(8000)) - rx_udp = f"socat -{cfg.addr_ipver} -T 2 -u UDP-RECV:{port},reuseport STDOUT" - tx_udp = f"echo {test_string} | socat -t 2 -u STDIN UDP:{cfg.baddr}:{port}" + expected_pkts = 0 + for payload_len in payload_lens: + test_string = "".join( + random.choice(string.ascii_lowercase) for _ in range(payload_len) + ) + + rx_udp = f"socat -{cfg.addr_ipver} -T 2 " + \ + f"-u UDP-RECV:{port},reuseport STDOUT" + + # Writing zero bytes to stdin gets ignored by socat, + # but with the shut-null flag socat generates a zero sized packet + # when the socket is closed. + tx_cmd_suffix = ",shut-null" if payload_len == 0 else "" + tx_udp = f"echo -n {test_string} | socat -t 2 " + \ + f"-u STDIN UDP:{cfg.baddr}:{port}{tx_cmd_suffix}" + + with bkg(rx_udp, host=cfg.remote, exit_wait=True) as rnc: + wait_port_listen(port, proto="udp", host=cfg.remote) + cmd(tx_udp, host=cfg.remote, shell=True) + + ksft_eq(rnc.stdout.strip(), test_string, "UDP packet exchange failed") + + expected_pkts += 1 + stats = _get_stats(prog_info["maps"]["map_xdp_stats"]) + ksft_eq(stats[XDPStats.RX.value], expected_pkts, "RX stats mismatch") + ksft_eq(stats[XDPStats.TX.value], expected_pkts, "TX stats mismatch") + + +def test_xdp_native_tx_sb(cfg): + """ + Tests the XDP_TX action for a single-buff case. + + Args: + cfg: Configuration object containing network settings. + """ + bpf_info = BPFProgInfo("xdp_prog", "xdp_native.bpf.o", "xdp", 1500) + + # Ensure there's enough room for an ETH / IP / UDP header + pkt_hdr_len = 42 if cfg.addr_ipver == "4" else 62 - with bkg(rx_udp, host=cfg.remote, exit_wait=True) as rnc: - wait_port_listen(port, proto="udp", host=cfg.remote) - cmd(tx_udp, host=cfg.remote, shell=True) + _test_xdp_native_tx(cfg, bpf_info, [0, 1500 // 2, 1500 - pkt_hdr_len]) - stats = _get_stats(prog_info['maps']['map_xdp_stats']) - ksft_eq(rnc.stdout.strip(), test_string, "UDP packet exchange failed") - ksft_eq(stats[XDPStats.TX.value], 1, "TX stats mismatch") +def test_xdp_native_tx_mb(cfg): + """ + Tests the XDP_TX action for a multi-buff case. + + Args: + cfg: Configuration object containing network settings. + """ + bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", + "xdp.frags", 9000) + # The first packet ensures we exercise the fragmented code path. + # And the subsequent 0-sized packet ensures the driver + # reinitializes xdp_buff correctly. + _test_xdp_native_tx(cfg, bpf_info, [8000, 0]) def _validate_res(res, offset_lst, pkt_sz_lst): @@ -644,6 +688,7 @@ def main(): test_xdp_native_pass_mb, test_xdp_native_drop_sb, test_xdp_native_drop_mb, + test_xdp_native_tx_sb, test_xdp_native_tx_mb, test_xdp_native_adjst_tail_grow_data, test_xdp_native_adjst_tail_shrnk_data, diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index c7e03e1d6f63..9926a14fd279 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -118,6 +118,7 @@ TEST_GEN_FILES += tfo TEST_PROGS += tfo_passive.sh TEST_PROGS += broadcast_pmtu.sh TEST_PROGS += ipv6_force_forwarding.sh +TEST_PROGS += route_hint.sh # YNL files, must be before "include ..lib.mk" YNL_GEN_FILES := busy_poller netlink-dumps diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile index a4b61c6d0290..0a20c98bbcfd 100644 --- a/tools/testing/selftests/net/af_unix/Makefile +++ b/tools/testing/selftests/net/af_unix/Makefile @@ -1,4 +1,4 @@ -CFLAGS += $(KHDR_INCLUDES) +CFLAGS += $(KHDR_INCLUDES) -Wall -Wflex-array-member-not-at-end TEST_GEN_PROGS := diag_uid msg_oob scm_inq scm_pidfd scm_rights unix_connect include ../../lib.mk diff --git a/tools/testing/selftests/net/af_unix/scm_inq.c b/tools/testing/selftests/net/af_unix/scm_inq.c index 9d22561e7b8f..fc467714387e 100644 --- a/tools/testing/selftests/net/af_unix/scm_inq.c +++ b/tools/testing/selftests/net/af_unix/scm_inq.c @@ -11,11 +11,6 @@ #define NR_CHUNKS 100 #define MSG_LEN 256 -struct scm_inq { - struct cmsghdr cmsghdr; - int inq; -}; - FIXTURE(scm_inq) { int fd[2]; @@ -70,35 +65,38 @@ static void send_chunks(struct __test_metadata *_metadata, static void recv_chunks(struct __test_metadata *_metadata, FIXTURE_DATA(scm_inq) *self) { + char cmsg_buf[CMSG_SPACE(sizeof(int))]; struct msghdr msg = {}; struct iovec iov = {}; - struct scm_inq cmsg; + struct cmsghdr *cmsg; char buf[MSG_LEN]; int i, ret; int inq; msg.msg_iov = &iov; msg.msg_iovlen = 1; - msg.msg_control = &cmsg; - msg.msg_controllen = CMSG_SPACE(sizeof(cmsg.inq)); + msg.msg_control = cmsg_buf; + msg.msg_controllen = sizeof(cmsg_buf); iov.iov_base = buf; iov.iov_len = sizeof(buf); for (i = 0; i < NR_CHUNKS; i++) { memset(buf, 0, sizeof(buf)); - memset(&cmsg, 0, sizeof(cmsg)); + memset(cmsg_buf, 0, sizeof(cmsg_buf)); ret = recvmsg(self->fd[1], &msg, 0); ASSERT_EQ(MSG_LEN, ret); - ASSERT_NE(NULL, CMSG_FIRSTHDR(&msg)); - ASSERT_EQ(CMSG_LEN(sizeof(cmsg.inq)), cmsg.cmsghdr.cmsg_len); - ASSERT_EQ(SOL_SOCKET, cmsg.cmsghdr.cmsg_level); - ASSERT_EQ(SCM_INQ, cmsg.cmsghdr.cmsg_type); + + cmsg = CMSG_FIRSTHDR(&msg); + ASSERT_NE(NULL, cmsg); + ASSERT_EQ(CMSG_LEN(sizeof(int)), cmsg->cmsg_len); + ASSERT_EQ(SOL_SOCKET, cmsg->cmsg_level); + ASSERT_EQ(SCM_INQ, cmsg->cmsg_type); ret = ioctl(self->fd[1], SIOCINQ, &inq); ASSERT_EQ(0, ret); - ASSERT_EQ(cmsg.inq, inq); + ASSERT_EQ(*(int *)CMSG_DATA(cmsg), inq); } } diff --git a/tools/testing/selftests/net/af_unix/scm_pidfd.c b/tools/testing/selftests/net/af_unix/scm_pidfd.c index 37e034874034..ef2921988e5f 100644 --- a/tools/testing/selftests/net/af_unix/scm_pidfd.c +++ b/tools/testing/selftests/net/af_unix/scm_pidfd.c @@ -137,7 +137,6 @@ struct cmsg_data { static int parse_cmsg(struct msghdr *msg, struct cmsg_data *res) { struct cmsghdr *cmsg; - int data = 0; if (msg->msg_flags & (MSG_TRUNC | MSG_CTRUNC)) { log_err("recvmsg: truncated"); @@ -243,7 +242,6 @@ static int cmsg_check_dead(int fd, int expected_pid) int data = 0; char control[CMSG_SPACE(sizeof(struct ucred)) + CMSG_SPACE(sizeof(int))] = { 0 }; - pid_t client_pid; struct pidfd_info info = { .mask = PIDFD_INFO_EXIT, }; diff --git a/tools/testing/selftests/net/af_unix/scm_rights.c b/tools/testing/selftests/net/af_unix/scm_rights.c index 8b015f16c03d..914f99d153ce 100644 --- a/tools/testing/selftests/net/af_unix/scm_rights.c +++ b/tools/testing/selftests/net/af_unix/scm_rights.c @@ -271,20 +271,11 @@ void __send_fd(struct __test_metadata *_metadata, { #define MSG "x" #define MSGLEN 1 - struct { - struct cmsghdr cmsghdr; - int fd[2]; - } cmsg = { - .cmsghdr = { - .cmsg_len = CMSG_LEN(sizeof(cmsg.fd)), - .cmsg_level = SOL_SOCKET, - .cmsg_type = SCM_RIGHTS, - }, - .fd = { - self->fd[inflight * 2], - self->fd[inflight * 2], - }, + int fds[2] = { + self->fd[inflight * 2], + self->fd[inflight * 2], }; + char cmsg_buf[CMSG_SPACE(sizeof(fds))]; struct iovec iov = { .iov_base = MSG, .iov_len = MSGLEN, @@ -294,11 +285,18 @@ void __send_fd(struct __test_metadata *_metadata, .msg_namelen = 0, .msg_iov = &iov, .msg_iovlen = 1, - .msg_control = &cmsg, - .msg_controllen = CMSG_SPACE(sizeof(cmsg.fd)), + .msg_control = cmsg_buf, + .msg_controllen = sizeof(cmsg_buf), }; + struct cmsghdr *cmsg; int ret; + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + cmsg->cmsg_len = CMSG_LEN(sizeof(fds)); + memcpy(CMSG_DATA(cmsg), fds, sizeof(fds)); + ret = sendmsg(self->fd[receiver * 2 + 1], &msg, variant->flags); if (variant->disabled) { diff --git a/tools/testing/selftests/net/bpf_offload.py b/tools/testing/selftests/net/bpf_offload.py index b2c271b79240..c856d266c8f3 100755 --- a/tools/testing/selftests/net/bpf_offload.py +++ b/tools/testing/selftests/net/bpf_offload.py @@ -184,8 +184,8 @@ def bpftool_prog_list(expected=None, ns="", exclude_orphaned=True): progs = [ p for p in progs if not p['orphaned'] ] if expected is not None: if len(progs) != expected: - fail(True, "%d BPF programs loaded, expected %d" % - (len(progs), expected)) + fail(True, "%d BPF programs loaded, expected %d\nLoaded Progs:\n%s" % + (len(progs), expected, pp.pformat(progs))) return progs def bpftool_map_list(expected=None, ns=""): diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c index a825e628aee7..ded9b925865e 100644 --- a/tools/testing/selftests/net/cmsg_sender.c +++ b/tools/testing/selftests/net/cmsg_sender.c @@ -491,7 +491,8 @@ int main(int argc, char *argv[]) if (err) { fprintf(stderr, "Can't resolve address [%s]:%s\n", opt.host, opt.service); - return ERN_SOCK_CREATE; + err = ERN_SOCK_CREATE; + goto err_free_buff; } if (ai->ai_family == AF_INET6 && opt.sock.proto == IPPROTO_ICMP) @@ -500,8 +501,8 @@ int main(int argc, char *argv[]) fd = socket(ai->ai_family, opt.sock.type, opt.sock.proto); if (fd < 0) { fprintf(stderr, "Can't open socket: %s\n", strerror(errno)); - freeaddrinfo(ai); - return ERN_RESOLVE; + err = ERN_RESOLVE; + goto err_free_info; } if (opt.sock.proto == IPPROTO_ICMP) { @@ -574,6 +575,9 @@ int main(int argc, char *argv[]) err_out: close(fd); +err_free_info: freeaddrinfo(ai); +err_free_buff: + free(buf); return err; } diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index c24417d0047b..d548611e2698 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -26,6 +26,7 @@ CONFIG_IFB=y CONFIG_INET_DIAG=y CONFIG_INET_ESP=y CONFIG_INET_ESP_OFFLOAD=y +CONFIG_CRYPTO_SHA1=y CONFIG_NET_FOU=y CONFIG_NET_FOU_IP_TUNNELS=y CONFIG_NETFILTER=y diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index d7bb2e80e88c..0a0d4c2a85f7 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -1,6 +1,8 @@ # SPDX-License-Identifier: GPL-2.0+ OR MIT -TEST_PROGS = bridge_fdb_learning_limit.sh \ +TEST_PROGS = \ + bridge_activity_notify.sh \ + bridge_fdb_learning_limit.sh \ bridge_igmp.sh \ bridge_locked_port.sh \ bridge_mdb.sh \ diff --git a/tools/testing/selftests/net/forwarding/bridge_activity_notify.sh b/tools/testing/selftests/net/forwarding/bridge_activity_notify.sh new file mode 100755 index 000000000000..a20ef4bd310b --- /dev/null +++ b/tools/testing/selftests/net/forwarding/bridge_activity_notify.sh @@ -0,0 +1,173 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +-----------------------+ +------------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | 192.0.2.1/28 | | 192.0.2.2/28 | +# | + $h1 | | + $h2 | +# +----|------------------+ +----|-------------------+ +# | | +# +----|--------------------------------------------------|-------------------+ +# | SW | | | +# | +--|--------------------------------------------------|-----------------+ | +# | | + $swp1 BR1 (802.1d) + $swp2 | | +# | | | | +# | +-----------------------------------------------------------------------+ | +# +---------------------------------------------------------------------------+ + +ALL_TESTS=" + new_inactive_test + existing_active_test + norefresh_test +" + +NUM_NETIFS=4 +source lib.sh + +h1_create() +{ + simple_if_init "$h1" 192.0.2.1/28 + defer simple_if_fini "$h1" 192.0.2.1/28 +} + +h2_create() +{ + simple_if_init "$h2" 192.0.2.2/28 + defer simple_if_fini "$h2" 192.0.2.2/28 +} + +switch_create() +{ + ip_link_add br1 type bridge vlan_filtering 0 mcast_snooping 0 \ + ageing_time "$LOW_AGEING_TIME" + ip_link_set_up br1 + + ip_link_set_master "$swp1" br1 + ip_link_set_up "$swp1" + + ip_link_set_master "$swp2" br1 + ip_link_set_up "$swp2" +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + defer vrf_cleanup + + h1_create + h2_create + switch_create +} + +fdb_active_wait() +{ + local mac=$1; shift + + bridge -d fdb get "$mac" br br1 | grep -q -v "inactive" +} + +fdb_inactive_wait() +{ + local mac=$1; shift + + bridge -d fdb get "$mac" br br1 | grep -q "inactive" +} + +new_inactive_test() +{ + local mac="00:11:22:33:44:55" + + # Add a new FDB entry as static and inactive and check that it + # becomes active upon traffic. + RET=0 + + bridge fdb add "$mac" dev "$swp1" master static activity_notify inactive + bridge -d fdb get "$mac" br br1 | grep -q "inactive" + check_err $? "FDB entry not present as \"inactive\" when should" + + $MZ "$h1" -c 1 -p 64 -a "$mac" -b bcast -t ip -q + + busywait "$BUSYWAIT_TIMEOUT" fdb_active_wait "$mac" + check_err $? "FDB entry present as \"inactive\" when should not" + + log_test "Transition from inactive to active" + + bridge fdb del "$mac" dev "$swp1" master +} + +existing_active_test() +{ + local mac="00:11:22:33:44:55" + local ageing_time + + # Enable activity notifications on an existing dynamic FDB entry and + # check that it becomes inactive after the ageing time passed. + RET=0 + + bridge fdb add "$mac" dev "$swp1" master dynamic + bridge fdb replace "$mac" dev "$swp1" master static activity_notify norefresh + + bridge -d fdb get "$mac" br br1 | grep -q "activity_notify" + check_err $? "FDB entry not present as \"activity_notify\" when should" + + bridge -d fdb get "$mac" br br1 | grep -q "inactive" + check_fail $? "FDB entry present as \"inactive\" when should not" + + ageing_time=$(bridge_ageing_time_get br1) + slowwait $((ageing_time * 2)) fdb_inactive_wait "$mac" + check_err $? "FDB entry not present as \"inactive\" when should" + + log_test "Transition from active to inactive" + + bridge fdb del "$mac" dev "$swp1" master +} + +norefresh_test() +{ + local mac="00:11:22:33:44:55" + local updated_time + + # Check that the "updated" time is reset when replacing an FDB entry + # without the "norefresh" keyword and that it is not reset when + # replacing with the "norefresh" keyword. + RET=0 + + bridge fdb add "$mac" dev "$swp1" master static + sleep 1 + + bridge fdb replace "$mac" dev "$swp1" master static activity_notify + updated_time=$(bridge -d -s -j fdb get "$mac" br br1 | jq '.[]["updated"]') + if [[ $updated_time -ne 0 ]]; then + check_err 1 "\"updated\" time was not reset when should" + fi + + sleep 1 + bridge fdb replace "$mac" dev "$swp1" master static norefresh + updated_time=$(bridge -d -s -j fdb get "$mac" br br1 | jq '.[]["updated"]') + if [[ $updated_time -eq 0 ]]; then + check_err 1 "\"updated\" time was reset when should not" + fi + + log_test "Resetting of \"updated\" time" + + bridge fdb del "$mac" dev "$swp1" master +} + +if ! bridge fdb help 2>&1 | grep -q "activity_notify"; then + echo "SKIP: iproute2 too old, missing bridge FDB activity notification control" + exit "$ksft_skip" +fi + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py index f395c90fb0f1..cb40ecef9456 100644 --- a/tools/testing/selftests/net/lib/py/utils.py +++ b/tools/testing/selftests/net/lib/py/utils.py @@ -1,9 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 -import errno import json as _json import os -import random import re import select import socket @@ -21,17 +19,19 @@ def fd_read_timeout(fd, timeout): rlist, _, _ = select.select([fd], [], [], timeout) if rlist: return os.read(fd, 1024) - else: - raise TimeoutError("Timeout waiting for fd read") + raise TimeoutError("Timeout waiting for fd read") class cmd: """ Execute a command on local or remote host. + @shell defaults to false, and class will try to split @comm into a list + if it's a string with spaces. + Use bkg() instead to run a command in the background. """ - def __init__(self, comm, shell=True, fail=True, ns=None, background=False, + def __init__(self, comm, shell=None, fail=True, ns=None, background=False, host=None, timeout=5, ksft_wait=None): if ns: comm = f'ip netns exec {ns} ' + comm @@ -45,6 +45,10 @@ class cmd: if host: self.proc = host.cmd(comm) else: + # If user doesn't explicitly request shell try to avoid it. + if shell is None and isinstance(comm, str) and ' ' in comm: + comm = comm.split() + # ksft_wait lets us wait for the background process to fully start, # we pass an FD to the child process, and wait for it to write back. # Similarly term_fd tells child it's time to exit. @@ -111,12 +115,13 @@ class bkg(cmd): with bkg("my_binary", ksft_wait=5): """ - def __init__(self, comm, shell=True, fail=None, ns=None, host=None, + def __init__(self, comm, shell=None, fail=None, ns=None, host=None, exit_wait=False, ksft_wait=None): super().__init__(comm, background=True, shell=shell, fail=fail, ns=ns, host=host, ksft_wait=ksft_wait) self.terminate = not exit_wait and not ksft_wait + self._exit_wait = exit_wait self.check_fail = fail if shell and self.terminate: @@ -127,7 +132,9 @@ class bkg(cmd): return self def __exit__(self, ex_type, ex_value, ex_tb): - return self.process(terminate=self.terminate, fail=self.check_fail) + # Force termination on exception + terminate = self.terminate or (self._exit_wait and ex_type) + return self.process(terminate=terminate, fail=self.check_fail) global_defer_queue = [] @@ -135,8 +142,6 @@ global_defer_queue = [] class defer: def __init__(self, func, *args, **kwargs): - global global_defer_queue - if not callable(func): raise Exception("defer created with un-callable object, did you call the function instead of passing its name?") @@ -224,11 +229,11 @@ def bpftrace(expr, json=None, ns=None, host=None, timeout=None): return cmd_obj -def rand_port(type=socket.SOCK_STREAM): +def rand_port(stype=socket.SOCK_STREAM): """ Get a random unprivileged port. """ - with socket.socket(socket.AF_INET6, type) as s: + with socket.socket(socket.AF_INET6, stype) as s: s.bind(("", 0)) return s.getsockname()[1] @@ -249,3 +254,21 @@ def wait_port_listen(port, proto="tcp", ns=None, host=None, sleep=0.005, deadlin if time.monotonic() > end: raise Exception("Waiting for port listen timed out") time.sleep(sleep) + + +def wait_file(fname, test_fn, sleep=0.005, deadline=5, encoding='utf-8'): + """ + Wait for file contents on the local system to satisfy a condition. + test_fn() should take one argument (file contents) and return whether + condition is met. + """ + end = time.monotonic() + deadline + + with open(fname, "r", encoding=encoding) as fp: + while True: + if test_fn(fp.read()): + break + fp.seek(0) + if time.monotonic() > end: + raise TimeoutError("Wait for file contents failed", fname) + time.sleep(sleep) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 82cae37d9c20..2f046167a0b6 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -74,6 +74,17 @@ unset join_create_err unset join_bind_err unset join_connect_err +unset fb_ns1 +unset fb_ns2 +unset fb_infinite_map_tx +unset fb_dss_corruption +unset fb_simult_conn +unset fb_mpc_passive +unset fb_mpc_active +unset fb_mpc_data +unset fb_md5_sig +unset fb_dss + # generated using "nfbpf_compile '(ip && (ip[54] & 0xf0) == 0x30) || # (ip6 && (ip6[74] & 0xf0) == 0x30)'" CBPF_MPTCP_SUBOPTION_ADD_ADDR="14, @@ -1399,6 +1410,115 @@ chk_join_tx_nr() print_results "join Tx" ${rc} } +chk_fallback_nr() +{ + local infinite_map_tx=${fb_infinite_map_tx:-0} + local dss_corruption=${fb_dss_corruption:-0} + local simult_conn=${fb_simult_conn:-0} + local mpc_passive=${fb_mpc_passive:-0} + local mpc_active=${fb_mpc_active:-0} + local mpc_data=${fb_mpc_data:-0} + local md5_sig=${fb_md5_sig:-0} + local dss=${fb_dss:-0} + local rc=${KSFT_PASS} + local ns=$1 + local count + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtInfiniteMapTx") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$infinite_map_tx" ]; then + rc=${KSFT_FAIL} + print_check "$ns infinite map tx fallback" + fail_test "got $count infinite map tx fallback[s] in $ns expected $infinite_map_tx" + fi + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtDSSCorruptionFallback") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$dss_corruption" ]; then + rc=${KSFT_FAIL} + print_check "$ns dss corruption fallback" + fail_test "got $count dss corruption fallback[s] in $ns expected $dss_corruption" + fi + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtSimultConnectFallback") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$simult_conn" ]; then + rc=${KSFT_FAIL} + print_check "$ns simult conn fallback" + fail_test "got $count simult conn fallback[s] in $ns expected $simult_conn" + fi + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableFallbackACK") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$mpc_passive" ]; then + rc=${KSFT_FAIL} + print_check "$ns mpc passive fallback" + fail_test "got $count mpc passive fallback[s] in $ns expected $mpc_passive" + fi + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableFallbackSYNACK") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$mpc_active" ]; then + rc=${KSFT_FAIL} + print_check "$ns mpc active fallback" + fail_test "got $count mpc active fallback[s] in $ns expected $mpc_active" + fi + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableDataFallback") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$mpc_data" ]; then + rc=${KSFT_FAIL} + print_check "$ns mpc data fallback" + fail_test "got $count mpc data fallback[s] in $ns expected $mpc_data" + fi + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMD5SigFallback") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$md5_sig" ]; then + rc=${KSFT_FAIL} + print_check "$ns MD5 Sig fallback" + fail_test "got $count MD5 Sig fallback[s] in $ns expected $md5_sig" + fi + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtDssFallback") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$dss" ]; then + rc=${KSFT_FAIL} + print_check "$ns dss fallback" + fail_test "got $count dss fallback[s] in $ns expected $dss" + fi + + return $rc +} + +chk_fallback_nr_all() +{ + local netns=("ns1" "ns2") + local fb_ns=("fb_ns1" "fb_ns2") + local rc=${KSFT_PASS} + + for i in 0 1; do + if [ -n "${!fb_ns[i]}" ]; then + eval "${!fb_ns[i]}" \ + chk_fallback_nr ${netns[i]} || rc=${?} + else + chk_fallback_nr ${netns[i]} || rc=${?} + fi + done + + if [ "${rc}" != "${KSFT_PASS}" ]; then + print_results "fallback" ${rc} + fi +} + chk_join_nr() { local syn_nr=$1 @@ -1484,6 +1604,8 @@ chk_join_nr() join_syn_tx="${join_syn_tx:-${syn_nr}}" \ chk_join_tx_nr + chk_fallback_nr_all + if $validate_checksum; then chk_csum_nr $csum_ns1 $csum_ns2 chk_fail_nr $fail_nr $fail_nr @@ -3337,6 +3459,7 @@ fail_tests() join_csum_ns1=+1 join_csum_ns2=+0 \ join_fail_nr=1 join_rst_nr=0 join_infi_nr=1 \ join_corrupted_pkts="$(pedit_action_pkts)" \ + fb_ns1="fb_dss=1" fb_ns2="fb_infinite_map_tx=1" \ chk_join_nr 0 0 0 chk_fail_nr 1 -1 invert fi diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config index 79d5b33966ba..305e46b819cb 100644 --- a/tools/testing/selftests/net/netfilter/config +++ b/tools/testing/selftests/net/netfilter/config @@ -13,6 +13,7 @@ CONFIG_BRIDGE_VLAN_FILTERING=y CONFIG_CGROUP_BPF=y CONFIG_DUMMY=m CONFIG_INET_ESP=m +CONFIG_CRYPTO_SHA1=m CONFIG_IP_NF_MATCH_RPFILTER=m CONFIG_IP6_NF_MATCH_RPFILTER=m CONFIG_IP_NF_IPTABLES=m diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py index 8a0396bfaf99..b521e0dea506 100644 --- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py +++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py @@ -1877,7 +1877,7 @@ class OvsPacket(GenericNetlinkSocket): elif msg["cmd"] == OvsPacket.OVS_PACKET_CMD_EXECUTE: up.execute(msg) else: - print("Unkonwn cmd: %d" % msg["cmd"]) + print("Unknown cmd: %d" % msg["cmd"]) except NetlinkError as ne: raise ne diff --git a/tools/testing/selftests/net/packetdrill/ksft_runner.sh b/tools/testing/selftests/net/packetdrill/ksft_runner.sh index a7e790af38ff..0ae6eeeb1a8e 100755 --- a/tools/testing/selftests/net/packetdrill/ksft_runner.sh +++ b/tools/testing/selftests/net/packetdrill/ksft_runner.sh @@ -3,21 +3,22 @@ source "$(dirname $(realpath $0))/../../kselftest/ktap_helpers.sh" -readonly ipv4_args=('--ip_version=ipv4 ' - '--local_ip=192.168.0.1 ' - '--gateway_ip=192.168.0.1 ' - '--netmask_ip=255.255.0.0 ' - '--remote_ip=192.0.2.1 ' - '-D CMSG_LEVEL_IP=SOL_IP ' - '-D CMSG_TYPE_RECVERR=IP_RECVERR ') - -readonly ipv6_args=('--ip_version=ipv6 ' - '--mtu=1520 ' - '--local_ip=fd3d:0a0b:17d6::1 ' - '--gateway_ip=fd3d:0a0b:17d6:8888::1 ' - '--remote_ip=fd3d:fa7b:d17d::1 ' - '-D CMSG_LEVEL_IP=SOL_IPV6 ' - '-D CMSG_TYPE_RECVERR=IPV6_RECVERR ') +declare -A ip_args=( + [ipv4]="--ip_version=ipv4 + --local_ip=192.168.0.1 + --gateway_ip=192.168.0.1 + --netmask_ip=255.255.0.0 + --remote_ip=192.0.2.1 + -D CMSG_LEVEL_IP=SOL_IP + -D CMSG_TYPE_RECVERR=IP_RECVERR" + [ipv6]="--ip_version=ipv6 + --mtu=1520 + --local_ip=fd3d:0a0b:17d6::1 + --gateway_ip=fd3d:0a0b:17d6:8888::1 + --remote_ip=fd3d:fa7b:d17d::1 + -D CMSG_LEVEL_IP=SOL_IPV6 + -D CMSG_TYPE_RECVERR=IPV6_RECVERR" +) if [ $# -ne 1 ]; then ktap_exit_fail_msg "usage: $0 <script>" @@ -38,12 +39,20 @@ if [[ -n "${KSFT_MACHINE_SLOW}" ]]; then failfunc=ktap_test_xfail fi +ip_versions=$(grep -E '^--ip_version=' $script | cut -d '=' -f 2) +if [[ -z $ip_versions ]]; then + ip_versions="ipv4 ipv6" +elif [[ ! "$ip_versions" =~ ^ipv[46]$ ]]; then + ktap_exit_fail_msg "Too many or unsupported --ip_version: $ip_versions" + exit "$KSFT_FAIL" +fi + ktap_print_header ktap_set_plan 2 -unshare -n packetdrill ${ipv4_args[@]} ${optargs[@]} $script > /dev/null \ - && ktap_test_pass "ipv4" || $failfunc "ipv4" -unshare -n packetdrill ${ipv6_args[@]} ${optargs[@]} $script > /dev/null \ - && ktap_test_pass "ipv6" || $failfunc "ipv6" +for ip_version in $ip_versions; do + unshare -n packetdrill ${ip_args[$ip_version]} ${optargs[@]} $script > /dev/null \ + && ktap_test_pass $ip_version || $failfunc $ip_version +done ktap_finished diff --git a/tools/testing/selftests/net/psock_tpacket.c b/tools/testing/selftests/net/psock_tpacket.c index 221270cee3ea..2938045c5cf9 100644 --- a/tools/testing/selftests/net/psock_tpacket.c +++ b/tools/testing/selftests/net/psock_tpacket.c @@ -22,6 +22,7 @@ * - TPACKET_V3: RX_RING */ +#undef NDEBUG #include <stdio.h> #include <stdlib.h> #include <sys/types.h> @@ -33,7 +34,6 @@ #include <ctype.h> #include <fcntl.h> #include <unistd.h> -#include <bits/wordsize.h> #include <net/ethernet.h> #include <netinet/ip.h> #include <arpa/inet.h> @@ -785,7 +785,7 @@ static int test_kernel_bit_width(void) static int test_user_bit_width(void) { - return __WORDSIZE; + return sizeof(long) * 8; } static const char *tpacket_str[] = { diff --git a/tools/testing/selftests/net/route_hint.sh b/tools/testing/selftests/net/route_hint.sh new file mode 100755 index 000000000000..2db01ece0cc1 --- /dev/null +++ b/tools/testing/selftests/net/route_hint.sh @@ -0,0 +1,79 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test ensures directed broadcast routes use dst hint mechanism + +source lib.sh + +CLIENT_IP4="192.168.0.1" +SERVER_IP4="192.168.0.2" +BROADCAST_ADDRESS="192.168.0.255" + +setup() { + setup_ns CLIENT_NS SERVER_NS + + ip -net "${SERVER_NS}" link add link1 type veth peer name link0 netns "${CLIENT_NS}" + + ip -net "${CLIENT_NS}" link set link0 up + ip -net "${CLIENT_NS}" addr add "${CLIENT_IP4}/24" dev link0 + + ip -net "${SERVER_NS}" link set link1 up + ip -net "${SERVER_NS}" addr add "${SERVER_IP4}/24" dev link1 + + ip netns exec "${CLIENT_NS}" ethtool -K link0 tcp-segmentation-offload off + ip netns exec "${SERVER_NS}" sh -c "echo 500000000 > /sys/class/net/link1/gro_flush_timeout" + ip netns exec "${SERVER_NS}" sh -c "echo 1 > /sys/class/net/link1/napi_defer_hard_irqs" + ip netns exec "${SERVER_NS}" ethtool -K link1 generic-receive-offload on +} + +cleanup() { + ip -net "${SERVER_NS}" link del link1 + cleanup_ns "${CLIENT_NS}" "${SERVER_NS}" +} + +directed_bcast_hint_test() +{ + local rc=0 + + echo "Testing for directed broadcast route hint" + + orig_in_brd=$(ip netns exec "${SERVER_NS}" lnstat -j -i1 -c1 | jq '.in_brd') + ip netns exec "${CLIENT_NS}" mausezahn link0 -a own -b bcast -A "${CLIENT_IP4}" \ + -B "${BROADCAST_ADDRESS}" -c1 -t tcp "sp=1-100,dp=1234,s=1,a=0" -p 5 -q + sleep 1 + new_in_brd=$(ip netns exec "${SERVER_NS}" lnstat -j -i1 -c1 | jq '.in_brd') + + res=$(echo "${new_in_brd} - ${orig_in_brd}" | bc) + + if [ "${res}" -lt 100 ]; then + echo "[ OK ]" + rc="${ksft_pass}" + else + echo "[FAIL] expected in_brd to be under 100, got ${res}" + rc="${ksft_fail}" + fi + + return "${rc}" +} + +if [ ! -x "$(command -v mausezahn)" ]; then + echo "SKIP: Could not run test without mausezahn tool" + exit "${ksft_skip}" +fi + +if [ ! -x "$(command -v jq)" ]; then + echo "SKIP: Could not run test without jq tool" + exit "${ksft_skip}" +fi + +if [ ! -x "$(command -v bc)" ]; then + echo "SKIP: Could not run test without bc tool" + exit "${ksft_skip}" +fi + +trap cleanup EXIT + +setup + +directed_bcast_hint_test +exit $? diff --git a/tools/testing/selftests/net/rps_default_mask.sh b/tools/testing/selftests/net/rps_default_mask.sh index 4287a8529890..b200019b3c80 100755 --- a/tools/testing/selftests/net/rps_default_mask.sh +++ b/tools/testing/selftests/net/rps_default_mask.sh @@ -54,16 +54,16 @@ cleanup echo 1 > /proc/sys/net/core/rps_default_mask setup -chk_rps "changing rps_default_mask dont affect existing devices" "" lo $INITIAL_RPS_DEFAULT_MASK +chk_rps "changing rps_default_mask doesn't affect existing devices" "" lo $INITIAL_RPS_DEFAULT_MASK echo 3 > /proc/sys/net/core/rps_default_mask -chk_rps "changing rps_default_mask dont affect existing netns" $NETNS lo 0 +chk_rps "changing rps_default_mask doesn't affect existing netns" $NETNS lo 0 ip link add name $VETH type veth peer netns $NETNS name $VETH ip link set dev $VETH up ip -n $NETNS link set dev $VETH up -chk_rps "changing rps_default_mask affect newly created devices" "" $VETH 3 -chk_rps "changing rps_default_mask don't affect newly child netns[II]" $NETNS $VETH 0 +chk_rps "changing rps_default_mask affects newly created devices" "" $VETH 3 +chk_rps "changing rps_default_mask doesn't affect newly child netns[II]" $NETNS $VETH 0 ip link del dev $VETH ip netns del $NETNS @@ -72,8 +72,8 @@ chk_rps "rps_default_mask is 0 by default in child netns" "$NETNS" lo 0 ip netns exec $NETNS sysctl -qw net.core.rps_default_mask=1 ip link add name $VETH type veth peer netns $NETNS name $VETH -chk_rps "changing rps_default_mask in child ns don't affect the main one" "" lo $INITIAL_RPS_DEFAULT_MASK +chk_rps "changing rps_default_mask in child ns doesn't affect the main one" "" lo $INITIAL_RPS_DEFAULT_MASK chk_rps "changing rps_default_mask in child ns affects new childns devices" $NETNS $VETH 1 -chk_rps "changing rps_default_mask in child ns don't affect existing devices" $NETNS lo 0 +chk_rps "changing rps_default_mask in child ns doesn't affect existing devices" $NETNS lo 0 exit $ret diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index d6c00efeb664..9da47a845be6 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -313,6 +313,8 @@ kci_test_addrlft() slowwait 5 check_addr_not_exist "$devdummy" "10.23.11." if [ $? -eq 1 ]; then + # troubleshoot the reason for our failure + run_cmd ip addr show dev "$devdummy" check_err 1 end_test "FAIL: preferred_lft addresses remaining" return @@ -323,6 +325,11 @@ kci_test_addrlft() kci_test_promote_secondaries() { + run_cmd ifconfig "$devdummy" + if [ $ret -ne 0 ]; then + end_test "SKIP: ifconfig not installed" + return $ksft_skip + fi promote=$(sysctl -n net.ipv4.conf.$devdummy.promote_secondaries) sysctl -q net.ipv4.conf.$devdummy.promote_secondaries=1 @@ -1201,6 +1208,12 @@ do_test_address_proto() local ret=0 local err + run_cmd_grep 'proto' ip address help + if [ $? -ne 0 ];then + end_test "SKIP: addr proto ${what}: iproute2 too old" + return $ksft_skip + fi + ip address add dev "$devdummy" "$addr3" check_err $? proto=$(address_get_proto "$addr3") diff --git a/tools/testing/selftests/net/socket.c b/tools/testing/selftests/net/socket.c index db1aeb8c5d1e..be1080003c61 100644 --- a/tools/testing/selftests/net/socket.c +++ b/tools/testing/selftests/net/socket.c @@ -39,6 +39,7 @@ static int run_tests(void) { char err_string1[ERR_STRING_SZ]; char err_string2[ERR_STRING_SZ]; + const char *msg1, *msg2; int i, err; err = 0; @@ -56,13 +57,13 @@ static int run_tests(void) errno == -s->expect) continue; - strerror_r(-s->expect, err_string1, ERR_STRING_SZ); - strerror_r(errno, err_string2, ERR_STRING_SZ); + msg1 = strerror_r(-s->expect, err_string1, ERR_STRING_SZ); + msg2 = strerror_r(errno, err_string2, ERR_STRING_SZ); fprintf(stderr, "socket(%d, %d, %d) expected " "err (%s) got (%s)\n", s->domain, s->type, s->protocol, - err_string1, err_string2); + msg1, msg2); err = -1; break; @@ -70,12 +71,12 @@ static int run_tests(void) close(fd); if (s->expect < 0) { - strerror_r(errno, err_string1, ERR_STRING_SZ); + msg1 = strerror_r(errno, err_string1, ERR_STRING_SZ); fprintf(stderr, "socket(%d, %d, %d) expected " "success got err (%s)\n", s->domain, s->type, s->protocol, - err_string1); + msg1); err = -1; break; diff --git a/tools/testing/selftests/net/test_bridge_backup_port.sh b/tools/testing/selftests/net/test_bridge_backup_port.sh index 1b3f89e2b86e..2a7224fe74f2 100755 --- a/tools/testing/selftests/net/test_bridge_backup_port.sh +++ b/tools/testing/selftests/net/test_bridge_backup_port.sh @@ -315,6 +315,29 @@ backup_port() tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "No forwarding out of vx0" + # Check that packets are forwarded out of vx0 when swp1 is + # administratively down and out of swp1 when it is administratively up + # again. + run_cmd "ip -n $sw1 link set dev swp1 down" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled + log_test $? 0 "swp1 administratively down" + + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 3 + log_test $? 0 "No forwarding out of swp1" + tc_check_packets $sw1 "dev vx0 egress" 101 2 + log_test $? 0 "Forwarding out of vx0" + + run_cmd "ip -n $sw1 link set dev swp1 up" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 forwarding + log_test $? 0 "swp1 administratively up" + + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 4 + log_test $? 0 "Forwarding out of swp1" + tc_check_packets $sw1 "dev vx0 egress" 101 2 + log_test $? 0 "No forwarding out of vx0" + # Remove vx0 as the backup port of swp1 and check that packets are no # longer forwarded out of vx0 when swp1 does not have a carrier. run_cmd "bridge -n $sw1 link set dev swp1 nobackup_port" @@ -322,9 +345,9 @@ backup_port() log_test $? 1 "vx0 not configured as backup port of swp1" run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets $sw1 "dev swp1 egress" 101 4 + tc_check_packets $sw1 "dev swp1 egress" 101 5 log_test $? 0 "Forwarding out of swp1" - tc_check_packets $sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 2 log_test $? 0 "No forwarding out of vx0" run_cmd "ip -n $sw1 link set dev swp1 carrier off" @@ -332,9 +355,9 @@ backup_port() log_test $? 0 "swp1 carrier off" run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets $sw1 "dev swp1 egress" 101 4 + tc_check_packets $sw1 "dev swp1 egress" 101 5 log_test $? 0 "No forwarding out of swp1" - tc_check_packets $sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 2 log_test $? 0 "No forwarding out of vx0" } diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 0f5640d8dc7f..cd67b0ae75a7 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -439,6 +439,8 @@ TEST_F(tls, sendfile) EXPECT_GE(filefd, 0); fstat(filefd, &st); EXPECT_GE(sendfile(self->fd, filefd, 0, st.st_size), 0); + + close(filefd); } TEST_F(tls, send_then_sendfile) @@ -460,6 +462,9 @@ TEST_F(tls, send_then_sendfile) EXPECT_GE(sendfile(self->fd, filefd, 0, st.st_size), 0); EXPECT_EQ(recv(self->cfd, buf, st.st_size, MSG_WAITALL), st.st_size); + + free(buf); + close(filefd); } static void chunked_sendfile(struct __test_metadata *_metadata, diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c index 7b861a8e997a..d843643ced6b 100644 --- a/tools/testing/vsock/util.c +++ b/tools/testing/vsock/util.c @@ -756,7 +756,6 @@ void setsockopt_ull_check(int fd, int level, int optname, fail: fprintf(stderr, "%s val %llu\n", errmsg, val); exit(EXIT_FAILURE); -; } /* Set "int" socket option and check that it's indeed set */ |