diff options
77 files changed, 1217 insertions, 269 deletions
diff --git a/Documentation/devicetree/bindings/net/imx-dwmac.txt b/Documentation/devicetree/bindings/net/imx-dwmac.txt deleted file mode 100644 index 921d522fe8d7..000000000000 --- a/Documentation/devicetree/bindings/net/imx-dwmac.txt +++ /dev/null @@ -1,56 +0,0 @@ -IMX8 glue layer controller, NXP imx8 families support Synopsys MAC 5.10a IP. - -This file documents platform glue layer for IMX. -Please see stmmac.txt for the other unchanged properties. - -The device node has following properties. - -Required properties: -- compatible: Should be "nxp,imx8mp-dwmac-eqos" to select glue layer - and "snps,dwmac-5.10a" to select IP version. -- clocks: Must contain a phandle for each entry in clock-names. -- clock-names: Should be "stmmaceth" for the host clock. - Should be "pclk" for the MAC apb clock. - Should be "ptp_ref" for the MAC timer clock. - Should be "tx" for the MAC RGMII TX clock: - Should be "mem" for EQOS MEM clock. - - "mem" clock is required for imx8dxl platform. - - "mem" clock is not required for imx8mp platform. -- interrupt-names: Should contain a list of interrupt names corresponding to - the interrupts in the interrupts property, if available. - Should be "macirq" for the main MAC IRQ - Should be "eth_wake_irq" for the IT which wake up system -- intf_mode: Should be phandle/offset pair. The phandle to the syscon node which - encompases the GPR register, and the offset of the GPR register. - - required for imx8mp platform. - - is optional for imx8dxl platform. - -Optional properties: -- intf_mode: is optional for imx8dxl platform. -- snps,rmii_refclk_ext: to select RMII reference clock from external. - -Example: - eqos: ethernet@30bf0000 { - compatible = "nxp,imx8mp-dwmac-eqos", "snps,dwmac-5.10a"; - reg = <0x30bf0000 0x10000>; - interrupts = <GIC_SPI 134 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 135 IRQ_TYPE_LEVEL_HIGH>; - interrupt-names = "eth_wake_irq", "macirq"; - clocks = <&clk IMX8MP_CLK_ENET_QOS_ROOT>, - <&clk IMX8MP_CLK_QOS_ENET_ROOT>, - <&clk IMX8MP_CLK_ENET_QOS_TIMER>, - <&clk IMX8MP_CLK_ENET_QOS>; - clock-names = "stmmaceth", "pclk", "ptp_ref", "tx"; - assigned-clocks = <&clk IMX8MP_CLK_ENET_AXI>, - <&clk IMX8MP_CLK_ENET_QOS_TIMER>, - <&clk IMX8MP_CLK_ENET_QOS>; - assigned-clock-parents = <&clk IMX8MP_SYS_PLL1_266M>, - <&clk IMX8MP_SYS_PLL2_100M>, - <&clk IMX8MP_SYS_PLL2_125M>; - assigned-clock-rates = <0>, <100000000>, <125000000>; - nvmem-cells = <ð_mac0>; - nvmem-cell-names = "mac-address"; - nvmem_macaddr_swap; - intf_mode = <&gpr 0x4>; - status = "disabled"; - }; diff --git a/Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml b/Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml new file mode 100644 index 000000000000..5629b2e4ccf8 --- /dev/null +++ b/Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml @@ -0,0 +1,93 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/nxp,dwmac-imx.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: NXP i.MX8 DWMAC glue layer Device Tree Bindings + +maintainers: + - Joakim Zhang <qiangqing.zhang@nxp.com> + +# We need a select here so we don't match all nodes with 'snps,dwmac' +select: + properties: + compatible: + contains: + enum: + - nxp,imx8mp-dwmac-eqos + - nxp,imx8dxl-dwmac-eqos + required: + - compatible + +allOf: + - $ref: "snps,dwmac.yaml#" + +properties: + compatible: + oneOf: + - items: + - enum: + - nxp,imx8mp-dwmac-eqos + - nxp,imx8dxl-dwmac-eqos + - const: snps,dwmac-5.10a + + clocks: + minItems: 3 + maxItems: 5 + items: + - description: MAC host clock + - description: MAC apb clock + - description: MAC timer clock + - description: MAC RGMII TX clock + - description: EQOS MEM clock + + clock-names: + minItems: 3 + maxItems: 5 + contains: + enum: + - stmmaceth + - pclk + - ptp_ref + - tx + - mem + + intf_mode: + $ref: /schemas/types.yaml#/definitions/phandle-array + description: + Should be phandle/offset pair. The phandle to the syscon node which + encompases the GPR register, and the offset of the GPR register. + + snps,rmii_refclk_ext: + $ref: /schemas/types.yaml#/definitions/flag + description: + To select RMII reference clock from external. + +required: + - compatible + - clocks + - clock-names + +unevaluatedProperties: false + +examples: + - | + #include <dt-bindings/interrupt-controller/arm-gic.h> + #include <dt-bindings/interrupt-controller/irq.h> + #include <dt-bindings/clock/imx8mp-clock.h> + + eqos: ethernet@30bf0000 { + compatible = "nxp,imx8mp-dwmac-eqos","snps,dwmac-5.10a"; + reg = <0x30bf0000 0x10000>; + interrupts = <GIC_SPI 135 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 134 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "macirq", "eth_wake_irq"; + clocks = <&clk IMX8MP_CLK_ENET_QOS_ROOT>, + <&clk IMX8MP_CLK_QOS_ENET_ROOT>, + <&clk IMX8MP_CLK_ENET_QOS_TIMER>, + <&clk IMX8MP_CLK_ENET_QOS>; + clock-names = "stmmaceth", "pclk", "ptp_ref", "tx"; + phy-mode = "rgmii"; + status = "disabled"; + }; diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml index d7652596a09b..42689b7d03a2 100644 --- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml +++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml @@ -28,6 +28,7 @@ select: - snps,dwmac-4.00 - snps,dwmac-4.10a - snps,dwmac-4.20a + - snps,dwmac-5.10a - snps,dwxgmac - snps,dwxgmac-2.10 @@ -82,6 +83,7 @@ properties: - snps,dwmac-4.00 - snps,dwmac-4.10a - snps,dwmac-4.20a + - snps,dwmac-5.10a - snps,dwxgmac - snps,dwxgmac-2.10 @@ -375,6 +377,7 @@ allOf: - snps,dwmac-4.00 - snps,dwmac-4.10a - snps,dwmac-4.20a + - snps,dwmac-5.10a - snps,dwxgmac - snps,dwxgmac-2.10 - st,spear600-gmac diff --git a/Documentation/networking/af_xdp.rst b/Documentation/networking/af_xdp.rst index 42576880aa4a..60b217b436be 100644 --- a/Documentation/networking/af_xdp.rst +++ b/Documentation/networking/af_xdp.rst @@ -243,8 +243,8 @@ Configuration Flags and Socket Options These are the various configuration flags that can be used to control and monitor the behavior of AF_XDP sockets. -XDP_COPY and XDP_ZERO_COPY bind flags -------------------------------------- +XDP_COPY and XDP_ZEROCOPY bind flags +------------------------------------ When you bind to a socket, the kernel will first try to use zero-copy copy. If zero-copy is not supported, it will fall back on using copy @@ -252,7 +252,7 @@ mode, i.e. copying all packets out to user space. But if you would like to force a certain mode, you can use the following flags. If you pass the XDP_COPY flag to the bind call, the kernel will force the socket into copy mode. If it cannot use copy mode, the bind call will -fail with an error. Conversely, the XDP_ZERO_COPY flag will force the +fail with an error. Conversely, the XDP_ZEROCOPY flag will force the socket into zero-copy mode or fail. XDP_SHARED_UMEM bind flag diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index b3fa522e4cd9..316c7dfa9693 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -826,7 +826,7 @@ tcp_fastopen_blackhole_timeout_sec - INTEGER initial value when the blackhole issue goes away. 0 to disable the blackhole detection. - By default, it is set to 1hr. + By default, it is set to 0 (feature is disabled). tcp_fastopen_key - list of comma separated 32-digit hexadecimal INTEGERs The list consists of a primary key and an optional backup key. The diff --git a/MAINTAINERS b/MAINTAINERS index 6c8be735cc91..58afeb12d3b3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11758,6 +11758,7 @@ F: drivers/char/hw_random/mtk-rng.c MEDIATEK SWITCH DRIVER M: Sean Wang <sean.wang@mediatek.com> M: Landen Chao <Landen.Chao@mediatek.com> +M: DENG Qingfang <dqfext@gmail.com> L: netdev@vger.kernel.org S: Maintained F: drivers/net/dsa/mt7530.* diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi index 9f7c7f587d38..ca38d0d6c3c4 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi @@ -821,9 +821,9 @@ eqos: ethernet@30bf0000 { compatible = "nxp,imx8mp-dwmac-eqos", "snps,dwmac-5.10a"; reg = <0x30bf0000 0x10000>; - interrupts = <GIC_SPI 134 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 135 IRQ_TYPE_LEVEL_HIGH>; - interrupt-names = "eth_wake_irq", "macirq"; + interrupts = <GIC_SPI 135 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 134 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "macirq", "eth_wake_irq"; clocks = <&clk IMX8MP_CLK_ENET_QOS_ROOT>, <&clk IMX8MP_CLK_QOS_ENET_ROOT>, <&clk IMX8MP_CLK_ENET_QOS_TIMER>, diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 63cae0476bb4..2ae419f5115a 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -112,7 +112,7 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1) { u32 r1 = reg2hex[b1]; - if (!jit->seen_reg[r1] && r1 >= 6 && r1 <= 15) + if (r1 >= 6 && r1 <= 15 && !jit->seen_reg[r1]) jit->seen_reg[r1] = 1; } diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index d22d78303311..31730efa7538 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3450,7 +3450,9 @@ static int bond_master_netdev_event(unsigned long event, return bond_event_changename(event_bond); case NETDEV_UNREGISTER: bond_remove_proc_entry(event_bond); +#ifdef CONFIG_XFRM_OFFLOAD xfrm_dev_state_flush(dev_net(bond_dev), bond_dev, true); +#endif /* CONFIG_XFRM_OFFLOAD */ break; case NETDEV_REGISTER: bond_create_proc_entry(event_bond); diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 93136f7e69f5..69f21b71614c 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -366,6 +366,8 @@ mt7530_fdb_write(struct mt7530_priv *priv, u16 vid, int i; reg[1] |= vid & CVID_MASK; + if (vid > 1) + reg[1] |= ATA2_IVL; reg[2] |= (aging & AGE_TIMER_MASK) << AGE_TIMER; reg[2] |= (port_mask & PORT_MAP_MASK) << PORT_MAP; /* STATIC_ENT indicate that entry is static wouldn't diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h index 334d610a503d..b19b389ff10a 100644 --- a/drivers/net/dsa/mt7530.h +++ b/drivers/net/dsa/mt7530.h @@ -79,6 +79,7 @@ enum mt753x_bpdu_port_fw { #define STATIC_EMP 0 #define STATIC_ENT 3 #define MT7530_ATA2 0x78 +#define ATA2_IVL BIT(15) /* Register for address table write data */ #define MT7530_ATWD 0x7c diff --git a/drivers/net/dsa/mv88e6xxx/Kconfig b/drivers/net/dsa/mv88e6xxx/Kconfig index 05af632b0f59..634a48e6616b 100644 --- a/drivers/net/dsa/mv88e6xxx/Kconfig +++ b/drivers/net/dsa/mv88e6xxx/Kconfig @@ -12,7 +12,7 @@ config NET_DSA_MV88E6XXX config NET_DSA_MV88E6XXX_PTP bool "PTP support for Marvell 88E6xxx" default n - depends on PTP_1588_CLOCK + depends on NET_DSA_MV88E6XXX && PTP_1588_CLOCK help Say Y to enable PTP hardware timestamping on Marvell 88E6xxx switch chips that support it. diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index ced8c9cb29c2..e2dc997580a8 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -397,6 +397,12 @@ static int sja1105_init_static_vlan(struct sja1105_private *priv) if (dsa_is_cpu_port(ds, port)) v->pvid = true; list_add(&v->list, &priv->dsa_8021q_vlans); + + v = kmemdup(v, sizeof(*v), GFP_KERNEL); + if (!v) + return -ENOMEM; + + list_add(&v->list, &priv->bridge_vlans); } ((struct sja1105_vlan_lookup_entry *)table->entries)[0] = pvid; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index f56245eeef7b..4db162cee911 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1671,11 +1671,16 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, if ((tpa_info->flags2 & RX_CMP_FLAGS2_META_FORMAT_VLAN) && (skb->dev->features & BNXT_HW_FEATURE_VLAN_ALL_RX)) { - u16 vlan_proto = tpa_info->metadata >> - RX_CMP_FLAGS2_METADATA_TPID_SFT; + __be16 vlan_proto = htons(tpa_info->metadata >> + RX_CMP_FLAGS2_METADATA_TPID_SFT); u16 vtag = tpa_info->metadata & RX_CMP_FLAGS2_METADATA_TCI_MASK; - __vlan_hwaccel_put_tag(skb, htons(vlan_proto), vtag); + if (eth_type_vlan(vlan_proto)) { + __vlan_hwaccel_put_tag(skb, vlan_proto, vtag); + } else { + dev_kfree_skb(skb); + return NULL; + } } skb_checksum_none_assert(skb); @@ -1897,9 +1902,15 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, (skb->dev->features & BNXT_HW_FEATURE_VLAN_ALL_RX)) { u32 meta_data = le32_to_cpu(rxcmp1->rx_cmp_meta_data); u16 vtag = meta_data & RX_CMP_FLAGS2_METADATA_TCI_MASK; - u16 vlan_proto = meta_data >> RX_CMP_FLAGS2_METADATA_TPID_SFT; + __be16 vlan_proto = htons(meta_data >> + RX_CMP_FLAGS2_METADATA_TPID_SFT); - __vlan_hwaccel_put_tag(skb, htons(vlan_proto), vtag); + if (eth_type_vlan(vlan_proto)) { + __vlan_hwaccel_put_tag(skb, vlan_proto, vtag); + } else { + dev_kfree_skb(skb); + goto next_rx; + } } skb_checksum_none_assert(skb); @@ -7563,8 +7574,12 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp) bp->flags &= ~BNXT_FLAG_WOL_CAP; if (flags & FUNC_QCAPS_RESP_FLAGS_WOL_MAGICPKT_SUPPORTED) bp->flags |= BNXT_FLAG_WOL_CAP; - if (flags & FUNC_QCAPS_RESP_FLAGS_PTP_SUPPORTED) + if (flags & FUNC_QCAPS_RESP_FLAGS_PTP_SUPPORTED) { __bnxt_hwrm_ptp_qcfg(bp); + } else { + kfree(bp->ptp_cfg); + bp->ptp_cfg = NULL; + } } else { #ifdef CONFIG_BNXT_SRIOV struct bnxt_vf_info *vf = &bp->vf; @@ -10123,7 +10138,6 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) } } - bnxt_ptp_start(bp); rc = bnxt_init_nic(bp, irq_re_init); if (rc) { netdev_err(bp->dev, "bnxt_init_nic err: %x\n", rc); @@ -10197,6 +10211,12 @@ int bnxt_half_open_nic(struct bnxt *bp) { int rc = 0; + if (test_bit(BNXT_STATE_ABORT_ERR, &bp->state)) { + netdev_err(bp->dev, "A previous firmware reset has not completed, aborting half open\n"); + rc = -ENODEV; + goto half_open_err; + } + rc = bnxt_alloc_mem(bp, false); if (rc) { netdev_err(bp->dev, "bnxt_alloc_mem err: %x\n", rc); @@ -10256,9 +10276,16 @@ static int bnxt_open(struct net_device *dev) rc = bnxt_hwrm_if_change(bp, true); if (rc) return rc; + + if (bnxt_ptp_init(bp)) { + netdev_warn(dev, "PTP initialization failed.\n"); + kfree(bp->ptp_cfg); + bp->ptp_cfg = NULL; + } rc = __bnxt_open_nic(bp, true, true); if (rc) { bnxt_hwrm_if_change(bp, false); + bnxt_ptp_clear(bp); } else { if (test_and_clear_bit(BNXT_STATE_FW_RESET_DET, &bp->state)) { if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) { @@ -10349,6 +10376,7 @@ static int bnxt_close(struct net_device *dev) { struct bnxt *bp = netdev_priv(dev); + bnxt_ptp_clear(bp); bnxt_hwmon_close(bp); bnxt_close_nic(bp, true, true); bnxt_hwrm_shutdown_link(bp); @@ -11335,6 +11363,7 @@ static void bnxt_fw_reset_close(struct bnxt *bp) bnxt_clear_int_mode(bp); pci_disable_device(bp->pdev); } + bnxt_ptp_clear(bp); __bnxt_close_nic(bp, true, false); bnxt_vf_reps_free(bp); bnxt_clear_int_mode(bp); @@ -11959,10 +11988,21 @@ static bool bnxt_fw_reset_timeout(struct bnxt *bp) (bp->fw_reset_max_dsecs * HZ / 10)); } +static void bnxt_fw_reset_abort(struct bnxt *bp, int rc) +{ + clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); + if (bp->fw_reset_state != BNXT_FW_RESET_STATE_POLL_VF) { + bnxt_ulp_start(bp, rc); + bnxt_dl_health_status_update(bp, false); + } + bp->fw_reset_state = 0; + dev_close(bp->dev); +} + static void bnxt_fw_reset_task(struct work_struct *work) { struct bnxt *bp = container_of(work, struct bnxt, fw_reset_task.work); - int rc; + int rc = 0; if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) { netdev_err(bp->dev, "bnxt_fw_reset_task() called when not in fw reset mode!\n"); @@ -11992,6 +12032,11 @@ static void bnxt_fw_reset_task(struct work_struct *work) } bp->fw_reset_timestamp = jiffies; rtnl_lock(); + if (test_bit(BNXT_STATE_ABORT_ERR, &bp->state)) { + bnxt_fw_reset_abort(bp, rc); + rtnl_unlock(); + return; + } bnxt_fw_reset_close(bp); if (bp->fw_cap & BNXT_FW_CAP_ERR_RECOVER_RELOAD) { bp->fw_reset_state = BNXT_FW_RESET_STATE_POLL_FW_DOWN; @@ -12039,6 +12084,7 @@ static void bnxt_fw_reset_task(struct work_struct *work) if (val == 0xffff) { if (bnxt_fw_reset_timeout(bp)) { netdev_err(bp->dev, "Firmware reset aborted, PCI config space invalid\n"); + rc = -ETIMEDOUT; goto fw_reset_abort; } bnxt_queue_fw_reset_work(bp, HZ / 1000); @@ -12048,6 +12094,7 @@ static void bnxt_fw_reset_task(struct work_struct *work) clear_bit(BNXT_STATE_FW_FATAL_COND, &bp->state); if (pci_enable_device(bp->pdev)) { netdev_err(bp->dev, "Cannot re-enable PCI device\n"); + rc = -ENODEV; goto fw_reset_abort; } pci_set_master(bp->pdev); @@ -12074,9 +12121,10 @@ static void bnxt_fw_reset_task(struct work_struct *work) } rc = bnxt_open(bp->dev); if (rc) { - netdev_err(bp->dev, "bnxt_open_nic() failed\n"); - clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); - dev_close(bp->dev); + netdev_err(bp->dev, "bnxt_open() failed during FW reset\n"); + bnxt_fw_reset_abort(bp, rc); + rtnl_unlock(); + return; } bp->fw_reset_state = 0; @@ -12103,12 +12151,8 @@ fw_reset_abort_status: netdev_err(bp->dev, "fw_health_status 0x%x\n", sts); } fw_reset_abort: - clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); - if (bp->fw_reset_state != BNXT_FW_RESET_STATE_POLL_VF) - bnxt_dl_health_status_update(bp, false); - bp->fw_reset_state = 0; rtnl_lock(); - dev_close(bp->dev); + bnxt_fw_reset_abort(bp, rc); rtnl_unlock(); } @@ -12662,7 +12706,6 @@ static void bnxt_remove_one(struct pci_dev *pdev) if (BNXT_PF(bp)) devlink_port_type_clear(&bp->dl_port); - bnxt_ptp_clear(bp); pci_disable_pcie_error_reporting(pdev); unregister_netdev(dev); clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); @@ -13246,11 +13289,6 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) rc); } - if (bnxt_ptp_init(bp)) { - netdev_warn(dev, "PTP initialization failed.\n"); - kfree(bp->ptp_cfg); - bp->ptp_cfg = NULL; - } bnxt_inv_fw_health_reg(bp); bnxt_dl_register(bp); @@ -13436,7 +13474,8 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev, if (netif_running(netdev)) bnxt_close(netdev); - pci_disable_device(pdev); + if (pci_is_enabled(pdev)) + pci_disable_device(pdev); bnxt_free_ctx_mem(bp); kfree(bp->ctx); bp->ctx = NULL; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c index 8e90224c43a2..8a68df4d9e59 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c @@ -433,6 +433,7 @@ static int bnxt_hwrm_queue_dscp2pri_cfg(struct bnxt *bp, struct dcb_app *app, static int bnxt_ets_validate(struct bnxt *bp, struct ieee_ets *ets, u8 *tc) { int total_ets_bw = 0; + bool zero = false; u8 max_tc = 0; int i; @@ -453,13 +454,20 @@ static int bnxt_ets_validate(struct bnxt *bp, struct ieee_ets *ets, u8 *tc) break; case IEEE_8021QAZ_TSA_ETS: total_ets_bw += ets->tc_tx_bw[i]; + zero = zero || !ets->tc_tx_bw[i]; break; default: return -ENOTSUPP; } } - if (total_ets_bw > 100) + if (total_ets_bw > 100) { + netdev_warn(bp->dev, "rejecting ETS config exceeding available bandwidth\n"); return -EINVAL; + } + if (zero && total_ets_bw == 100) { + netdev_warn(bp->dev, "rejecting ETS config starving a TC\n"); + return -EINVAL; + } if (max_tc >= bp->max_tc) *tc = bp->max_tc; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index f698b6bd4ff8..9089e7f3fbd4 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -385,22 +385,6 @@ int bnxt_get_rx_ts_p5(struct bnxt *bp, u64 *ts, u32 pkt_ts) return 0; } -void bnxt_ptp_start(struct bnxt *bp) -{ - struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; - - if (!ptp) - return; - - if (bp->flags & BNXT_FLAG_CHIP_P5) { - spin_lock_bh(&ptp->ptp_lock); - ptp->current_time = bnxt_refclk_read(bp, NULL); - WRITE_ONCE(ptp->old_time, ptp->current_time); - spin_unlock_bh(&ptp->ptp_lock); - ptp_schedule_worker(ptp->ptp_clock, 0); - } -} - static const struct ptp_clock_info bnxt_ptp_caps = { .owner = THIS_MODULE, .name = "bnxt clock", @@ -450,7 +434,13 @@ int bnxt_ptp_init(struct bnxt *bp) bnxt_unmap_ptp_regs(bp); return err; } - + if (bp->flags & BNXT_FLAG_CHIP_P5) { + spin_lock_bh(&ptp->ptp_lock); + ptp->current_time = bnxt_refclk_read(bp, NULL); + WRITE_ONCE(ptp->old_time, ptp->current_time); + spin_unlock_bh(&ptp->ptp_lock); + ptp_schedule_worker(ptp->ptp_clock, 0); + } return 0; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h index 6b6245750e20..4135ea3ec788 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h @@ -75,7 +75,6 @@ int bnxt_hwtstamp_set(struct net_device *dev, struct ifreq *ifr); int bnxt_hwtstamp_get(struct net_device *dev, struct ifreq *ifr); int bnxt_get_tx_ts_p5(struct bnxt *bp, struct sk_buff *skb); int bnxt_get_rx_ts_p5(struct bnxt *bp, u64 *ts, u32 pkt_ts); -void bnxt_ptp_start(struct bnxt *bp); int bnxt_ptp_init(struct bnxt *bp); void bnxt_ptp_clear(struct bnxt *bp); #endif diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index a918e374f3c5..187ff643ad2a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -479,16 +479,17 @@ struct bnxt_en_dev *bnxt_ulp_probe(struct net_device *dev) if (!edev) return ERR_PTR(-ENOMEM); edev->en_ops = &bnxt_en_ops_tbl; - if (bp->flags & BNXT_FLAG_ROCEV1_CAP) - edev->flags |= BNXT_EN_FLAG_ROCEV1_CAP; - if (bp->flags & BNXT_FLAG_ROCEV2_CAP) - edev->flags |= BNXT_EN_FLAG_ROCEV2_CAP; edev->net = dev; edev->pdev = bp->pdev; edev->l2_db_size = bp->db_size; edev->l2_db_size_nc = bp->db_size; bp->edev = edev; } + edev->flags &= ~BNXT_EN_FLAG_ROCE_CAP; + if (bp->flags & BNXT_FLAG_ROCEV1_CAP) + edev->flags |= BNXT_EN_FLAG_ROCEV1_CAP; + if (bp->flags & BNXT_FLAG_ROCEV2_CAP) + edev->flags |= BNXT_EN_FLAG_ROCEV2_CAP; return bp->edev; } EXPORT_SYMBOL(bnxt_ulp_probe); diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c index 4cddd628d41b..9ed3d1ab2ca5 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c +++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c @@ -420,7 +420,7 @@ static int cn23xx_pf_setup_global_input_regs(struct octeon_device *oct) * bits 32:47 indicate the PVF num. */ for (q_no = 0; q_no < ern; q_no++) { - reg_val = oct->pcie_port << CN23XX_PKT_INPUT_CTL_MAC_NUM_POS; + reg_val = (u64)oct->pcie_port << CN23XX_PKT_INPUT_CTL_MAC_NUM_POS; /* for VF assigned queues. */ if (q_no < oct->sriov_info.pf_srn) { diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c index f3d12d0714fb..68b78642c045 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c @@ -2770,32 +2770,32 @@ static int dpaa2_switch_ctrl_if_setup(struct ethsw_core *ethsw) if (err) return err; - err = dpaa2_switch_seed_bp(ethsw); - if (err) - goto err_free_dpbp; - err = dpaa2_switch_alloc_rings(ethsw); if (err) - goto err_drain_dpbp; + goto err_free_dpbp; err = dpaa2_switch_setup_dpio(ethsw); if (err) goto err_destroy_rings; + err = dpaa2_switch_seed_bp(ethsw); + if (err) + goto err_deregister_dpio; + err = dpsw_ctrl_if_enable(ethsw->mc_io, 0, ethsw->dpsw_handle); if (err) { dev_err(ethsw->dev, "dpsw_ctrl_if_enable err %d\n", err); - goto err_deregister_dpio; + goto err_drain_dpbp; } return 0; +err_drain_dpbp: + dpaa2_switch_drain_bp(ethsw); err_deregister_dpio: dpaa2_switch_free_dpio(ethsw); err_destroy_rings: dpaa2_switch_destroy_rings(ethsw); -err_drain_dpbp: - dpaa2_switch_drain_bp(ethsw); err_free_dpbp: dpaa2_switch_free_dpbp(ethsw); diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c index 46ecb42f2ef8..d9fc5c456bf3 100644 --- a/drivers/net/ethernet/freescale/fman/mac.c +++ b/drivers/net/ethernet/freescale/fman/mac.c @@ -524,6 +524,7 @@ static void setup_memac(struct mac_device *mac_dev) | SUPPORTED_Autoneg \ | SUPPORTED_Pause \ | SUPPORTED_Asym_Pause \ + | SUPPORTED_FIBRE \ | SUPPORTED_MII) static DEFINE_MUTEX(eth_lock); diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c index 12f6c2442a7a..e53512f6878a 100644 --- a/drivers/net/ethernet/hisilicon/hip04_eth.c +++ b/drivers/net/ethernet/hisilicon/hip04_eth.c @@ -131,7 +131,7 @@ /* buf unit size is cache_line_size, which is 64, so the shift is 6 */ #define PPE_BUF_SIZE_SHIFT 6 #define PPE_TX_BUF_HOLD BIT(31) -#define CACHE_LINE_MASK 0x3F +#define SOC_CACHE_LINE_MASK 0x3F #else #define PPE_CFG_QOS_VMID_GRP_SHIFT 8 #define PPE_CFG_RX_CTRL_ALIGN_SHIFT 11 @@ -531,8 +531,8 @@ hip04_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev) #if defined(CONFIG_HI13X1_GMAC) desc->cfg = (__force u32)cpu_to_be32(TX_CLEAR_WB | TX_FINISH_CACHE_INV | TX_RELEASE_TO_PPE | priv->port << TX_POOL_SHIFT); - desc->data_offset = (__force u32)cpu_to_be32(phys & CACHE_LINE_MASK); - desc->send_addr = (__force u32)cpu_to_be32(phys & ~CACHE_LINE_MASK); + desc->data_offset = (__force u32)cpu_to_be32(phys & SOC_CACHE_LINE_MASK); + desc->send_addr = (__force u32)cpu_to_be32(phys & ~SOC_CACHE_LINE_MASK); #else desc->cfg = (__force u32)cpu_to_be32(TX_CLEAR_WB | TX_FINISH_CACHE_INV); desc->send_addr = (__force u32)cpu_to_be32(phys); diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h index 0a6cda309b24..aa86a81c8f4a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h +++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h @@ -98,6 +98,7 @@ struct hclgevf_mbx_resp_status { u32 origin_mbx_msg; bool received_resp; int resp_status; + u16 match_id; u8 additional_info[HCLGE_MBX_MAX_RESP_DATA_SIZE]; }; @@ -143,7 +144,8 @@ struct hclge_mbx_vf_to_pf_cmd { u8 mbx_need_resp; u8 rsv1[1]; u8 msg_len; - u8 rsv2[3]; + u8 rsv2; + u16 match_id; struct hclge_vf_to_pf_msg msg; }; @@ -153,7 +155,8 @@ struct hclge_mbx_pf_to_vf_cmd { u8 dest_vfid; u8 rsv[3]; u8 msg_len; - u8 rsv1[3]; + u8 rsv1; + u16 match_id; struct hclge_pf_to_vf_msg msg; }; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index dd3354a57c62..ebeaf12e409b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -9552,13 +9552,17 @@ static int hclge_set_vport_vlan_filter(struct hclge_vport *vport, bool enable) if (ret) return ret; - if (test_bit(HNAE3_DEV_SUPPORT_PORT_VLAN_BYPASS_B, ae_dev->caps)) + if (test_bit(HNAE3_DEV_SUPPORT_PORT_VLAN_BYPASS_B, ae_dev->caps)) { ret = hclge_set_port_vlan_filter_bypass(hdev, vport->vport_id, !enable); - else if (!vport->vport_id) + } else if (!vport->vport_id) { + if (test_bit(HNAE3_DEV_SUPPORT_VLAN_FLTR_MDF_B, ae_dev->caps)) + enable = false; + ret = hclge_set_vlan_filter_ctrl(hdev, HCLGE_FILTER_TYPE_PORT, HCLGE_FILTER_FE_INGRESS, enable, 0); + } return ret; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c index e10a2c36b706..c0a478ae9583 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c @@ -47,6 +47,7 @@ static int hclge_gen_resp_to_vf(struct hclge_vport *vport, resp_pf_to_vf->dest_vfid = vf_to_pf_req->mbx_src_vfid; resp_pf_to_vf->msg_len = vf_to_pf_req->msg_len; + resp_pf_to_vf->match_id = vf_to_pf_req->match_id; resp_pf_to_vf->msg.code = HCLGE_MBX_PF_VF_RESP; resp_pf_to_vf->msg.vf_mbx_msg_code = vf_to_pf_req->msg.code; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 52eaf82b7cd7..8784d61e833f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -2641,6 +2641,16 @@ static int hclgevf_rss_init_hw(struct hclgevf_dev *hdev) static int hclgevf_init_vlan_config(struct hclgevf_dev *hdev) { + struct hnae3_handle *nic = &hdev->nic; + int ret; + + ret = hclgevf_en_hw_strip_rxvtag(nic, true); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to enable rx vlan offload, ret = %d\n", ret); + return ret; + } + return hclgevf_set_vlan_filter(&hdev->nic, htons(ETH_P_8021Q), 0, false); } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c index 9b17735b9f4c..772b2f8acd2e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c @@ -13,6 +13,7 @@ static int hclgevf_resp_to_errno(u16 resp_code) return resp_code ? -resp_code : 0; } +#define HCLGEVF_MBX_MATCH_ID_START 1 static void hclgevf_reset_mbx_resp_status(struct hclgevf_dev *hdev) { /* this function should be called with mbx_resp.mbx_mutex held @@ -21,6 +22,10 @@ static void hclgevf_reset_mbx_resp_status(struct hclgevf_dev *hdev) hdev->mbx_resp.received_resp = false; hdev->mbx_resp.origin_mbx_msg = 0; hdev->mbx_resp.resp_status = 0; + hdev->mbx_resp.match_id++; + /* Update match_id and ensure the value of match_id is not zero */ + if (hdev->mbx_resp.match_id == 0) + hdev->mbx_resp.match_id = HCLGEVF_MBX_MATCH_ID_START; memset(hdev->mbx_resp.additional_info, 0, HCLGE_MBX_MAX_RESP_DATA_SIZE); } @@ -115,6 +120,7 @@ int hclgevf_send_mbx_msg(struct hclgevf_dev *hdev, if (need_resp) { mutex_lock(&hdev->mbx_resp.mbx_mutex); hclgevf_reset_mbx_resp_status(hdev); + req->match_id = hdev->mbx_resp.match_id; status = hclgevf_cmd_send(&hdev->hw, &desc, 1); if (status) { dev_err(&hdev->pdev->dev, @@ -211,6 +217,19 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev) resp->additional_info[i] = *temp; temp++; } + + /* If match_id is not zero, it means PF support + * match_id. If the match_id is right, VF get the + * right response, otherwise ignore the response. + * Driver will clear hdev->mbx_resp when send + * next message which need response. + */ + if (req->match_id) { + if (req->match_id == resp->match_id) + resp->received_resp = true; + } else { + resp->received_resp = true; + } break; case HCLGE_MBX_LINK_STAT_CHANGE: case HCLGE_MBX_ASSERTING_RESET: diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index ed77191d19f4..a775c69e4fd7 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1731,7 +1731,6 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) tx_send_failed++; tx_dropped++; ret = NETDEV_TX_OK; - ibmvnic_tx_scrq_flush(adapter, tx_scrq); goto out; } @@ -1753,6 +1752,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) dev_kfree_skb_any(skb); tx_send_failed++; tx_dropped++; + ibmvnic_tx_scrq_flush(adapter, tx_scrq); ret = NETDEV_TX_OK; goto out; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 913253f8ecb4..14aea40da50f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1825,7 +1825,8 @@ static void ixgbe_dma_sync_frag(struct ixgbe_ring *rx_ring, struct sk_buff *skb) { if (ring_uses_build_skb(rx_ring)) { - unsigned long offset = (unsigned long)(skb->data) & ~PAGE_MASK; + unsigned long mask = (unsigned long)ixgbe_rx_pg_size(rx_ring) - 1; + unsigned long offset = (unsigned long)(skb->data) & mask; dma_sync_single_range_for_cpu(rx_ring->dev, IXGBE_CB(skb)->dma, diff --git a/drivers/net/ethernet/marvell/octeontx2/af/Makefile b/drivers/net/ethernet/marvell/octeontx2/af/Makefile index 1a3455620b38..cc8ac36cf687 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/Makefile +++ b/drivers/net/ethernet/marvell/octeontx2/af/Makefile @@ -10,4 +10,4 @@ obj-$(CONFIG_OCTEONTX2_AF) += rvu_af.o rvu_mbox-y := mbox.o rvu_trace.o rvu_af-y := cgx.o rvu.o rvu_cgx.o rvu_npa.o rvu_nix.o \ rvu_reg.o rvu_npc.o rvu_debugfs.o ptp.o rvu_npc_fs.o \ - rvu_cpt.o rvu_devlink.o rpm.o rvu_cn10k.o + rvu_cpt.o rvu_devlink.o rpm.o rvu_cn10k.o rvu_switch.o diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index 10cddf1ac7b9..017163fb3cd5 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -1314,7 +1314,7 @@ int rvu_mbox_handler_detach_resources(struct rvu *rvu, return rvu_detach_rsrcs(rvu, detach, detach->hdr.pcifunc); } -static int rvu_get_nix_blkaddr(struct rvu *rvu, u16 pcifunc) +int rvu_get_nix_blkaddr(struct rvu *rvu, u16 pcifunc) { struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc); int blkaddr = BLKADDR_NIX0, vf; @@ -2859,6 +2859,12 @@ static int rvu_enable_sriov(struct rvu *rvu) if (!vfs) return 0; + /* LBK channel number 63 is used for switching packets between + * CGX mapped VFs. Hence limit LBK pairs till 62 only. + */ + if (vfs > 62) + vfs = 62; + /* Save VFs number for reference in VF interrupts handlers. * Since interrupts might start arriving during SRIOV enablement * ordinary API cannot be used to get number of enabled VFs. @@ -3001,6 +3007,8 @@ static int rvu_probe(struct pci_dev *pdev, const struct pci_device_id *id) /* Initialize debugfs */ rvu_dbg_init(rvu); + mutex_init(&rvu->rswitch.switch_lock); + return 0; err_dl: rvu_unregister_dl(rvu); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index 10e58a5d5861..91503fb2762c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -415,6 +415,16 @@ struct npc_kpu_profile_adapter { size_t kpus; }; +#define RVU_SWITCH_LBK_CHAN 63 + +struct rvu_switch { + struct mutex switch_lock; /* Serialize flow installation */ + u32 used_entries; + u16 *entry2pcifunc; + u16 mode; + u16 start_entry; +}; + struct rvu { void __iomem *afreg_base; void __iomem *pfreg_base; @@ -445,6 +455,7 @@ struct rvu { /* CGX */ #define PF_CGXMAP_BASE 1 /* PF 0 is reserved for RVU PF */ + u16 cgx_mapped_vfs; /* maximum CGX mapped VFs */ u8 cgx_mapped_pfs; u8 cgx_cnt_max; /* CGX port count max */ u8 *pf2cgxlmac_map; /* pf to cgx_lmac map */ @@ -477,6 +488,9 @@ struct rvu { struct rvu_debugfs rvu_dbg; #endif struct rvu_devlink *rvu_dl; + + /* RVU switch implementation over NPC with DMAC rules */ + struct rvu_switch rswitch; }; static inline void rvu_write64(struct rvu *rvu, u64 block, u64 offset, u64 val) @@ -691,6 +705,7 @@ int nix_aq_context_read(struct rvu *rvu, struct nix_hw *nix_hw, struct nix_cn10k_aq_enq_req *aq_req, struct nix_cn10k_aq_enq_rsp *aq_rsp, u16 pcifunc, u8 ctype, u32 qidx); +int rvu_get_nix_blkaddr(struct rvu *rvu, u16 pcifunc); /* NPC APIs */ int rvu_npc_init(struct rvu *rvu); @@ -768,4 +783,10 @@ void rvu_dbg_exit(struct rvu *rvu); static inline void rvu_dbg_init(struct rvu *rvu) {} static inline void rvu_dbg_exit(struct rvu *rvu) {} #endif + +/* RVU Switch */ +void rvu_switch_enable(struct rvu *rvu); +void rvu_switch_disable(struct rvu *rvu); +void rvu_switch_update_rules(struct rvu *rvu, u16 pcifunc); + #endif /* RVU_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index 6cc8fbb7190c..fe99ac4a4dd8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -126,6 +126,7 @@ static int rvu_map_cgx_lmac_pf(struct rvu *rvu) unsigned long lmac_bmap; int size, free_pkind; int cgx, lmac, iter; + int numvfs, hwvfs; if (!cgx_cnt_max) return 0; @@ -166,6 +167,8 @@ static int rvu_map_cgx_lmac_pf(struct rvu *rvu) pkind->pfchan_map[free_pkind] = ((pf) & 0x3F) << 16; rvu_map_cgx_nix_block(rvu, pf, cgx, lmac); rvu->cgx_mapped_pfs++; + rvu_get_pf_numvfs(rvu, pf, &numvfs, &hwvfs); + rvu->cgx_mapped_vfs += numvfs; pf++; } } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index 370d4ca1e5ed..9b2dfbf90e51 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -2113,9 +2113,6 @@ static void rvu_print_npc_mcam_info(struct seq_file *s, int entry_acnt, entry_ecnt; int cntr_acnt, cntr_ecnt; - /* Skip PF0 */ - if (!pcifunc) - return; rvu_npc_get_mcam_entry_alloc_info(rvu, pcifunc, blkaddr, &entry_acnt, &entry_ecnt); rvu_npc_get_mcam_counter_alloc_info(rvu, pcifunc, blkaddr, @@ -2298,7 +2295,7 @@ static void rvu_dbg_npc_mcam_show_flows(struct seq_file *s, static void rvu_dbg_npc_mcam_show_action(struct seq_file *s, struct rvu_npc_mcam_rule *rule) { - if (rule->intf == NIX_INTF_TX) { + if (is_npc_intf_tx(rule->intf)) { switch (rule->tx_action.op) { case NIX_TX_ACTIONOP_DROP: seq_puts(s, "\taction: Drop\n"); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c index 10a98bcb7c54..2688186066d9 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c @@ -1364,6 +1364,44 @@ static void rvu_health_reporters_destroy(struct rvu *rvu) rvu_nix_health_reporters_destroy(rvu_dl); } +static int rvu_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) +{ + struct rvu_devlink *rvu_dl = devlink_priv(devlink); + struct rvu *rvu = rvu_dl->rvu; + struct rvu_switch *rswitch; + + rswitch = &rvu->rswitch; + *mode = rswitch->mode; + + return 0; +} + +static int rvu_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, + struct netlink_ext_ack *extack) +{ + struct rvu_devlink *rvu_dl = devlink_priv(devlink); + struct rvu *rvu = rvu_dl->rvu; + struct rvu_switch *rswitch; + + rswitch = &rvu->rswitch; + switch (mode) { + case DEVLINK_ESWITCH_MODE_LEGACY: + case DEVLINK_ESWITCH_MODE_SWITCHDEV: + if (rswitch->mode == mode) + return 0; + rswitch->mode = mode; + if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) + rvu_switch_enable(rvu); + else + rvu_switch_disable(rvu); + break; + default: + return -EINVAL; + } + + return 0; +} + static int rvu_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, struct netlink_ext_ack *extack) { @@ -1372,6 +1410,8 @@ static int rvu_devlink_info_get(struct devlink *devlink, struct devlink_info_req static const struct devlink_ops rvu_devlink_ops = { .info_get = rvu_devlink_info_get, + .eswitch_mode_get = rvu_devlink_eswitch_mode_get, + .eswitch_mode_set = rvu_devlink_eswitch_mode_set, }; int rvu_register_dl(struct rvu *rvu) @@ -1380,14 +1420,9 @@ int rvu_register_dl(struct rvu *rvu) struct devlink *dl; int err; - rvu_dl = kzalloc(sizeof(*rvu_dl), GFP_KERNEL); - if (!rvu_dl) - return -ENOMEM; - dl = devlink_alloc(&rvu_devlink_ops, sizeof(struct rvu_devlink)); if (!dl) { dev_warn(rvu->dev, "devlink_alloc failed\n"); - kfree(rvu_dl); return -ENOMEM; } @@ -1395,10 +1430,10 @@ int rvu_register_dl(struct rvu *rvu) if (err) { dev_err(rvu->dev, "devlink register failed with error %d\n", err); devlink_free(dl); - kfree(rvu_dl); return err; } + rvu_dl = devlink_priv(dl); rvu_dl->dl = dl; rvu_dl->rvu = rvu; rvu->rvu_dl = rvu_dl; @@ -1417,5 +1452,4 @@ void rvu_unregister_dl(struct rvu *rvu) rvu_health_reporters_destroy(rvu); devlink_unregister(dl); devlink_free(dl); - kfree(rvu_dl); } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index aeae37704428..0933699a0d2d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -1952,6 +1952,35 @@ static void nix_tl1_default_cfg(struct rvu *rvu, struct nix_hw *nix_hw, pfvf_map[schq] = TXSCH_SET_FLAG(pfvf_map[schq], NIX_TXSCHQ_CFG_DONE); } +static void rvu_nix_tx_tl2_cfg(struct rvu *rvu, int blkaddr, + u16 pcifunc, struct nix_txsch *txsch) +{ + struct rvu_hwinfo *hw = rvu->hw; + int lbk_link_start, lbk_links; + u8 pf = rvu_get_pf(pcifunc); + int schq; + + if (!is_pf_cgxmapped(rvu, pf)) + return; + + lbk_link_start = hw->cgx_links; + + for (schq = 0; schq < txsch->schq.max; schq++) { + if (TXSCH_MAP_FUNC(txsch->pfvf_map[schq]) != pcifunc) + continue; + /* Enable all LBK links with channel 63 by default so that + * packets can be sent to LBK with a NPC TX MCAM rule + */ + lbk_links = hw->lbk_links; + while (lbk_links--) + rvu_write64(rvu, blkaddr, + NIX_AF_TL3_TL2X_LINKX_CFG(schq, + lbk_link_start + + lbk_links), + BIT_ULL(12) | RVU_SWITCH_LBK_CHAN); + } +} + int rvu_mbox_handler_nix_txschq_cfg(struct rvu *rvu, struct nix_txschq_config *req, struct msg_rsp *rsp) @@ -2040,6 +2069,9 @@ int rvu_mbox_handler_nix_txschq_cfg(struct rvu *rvu, rvu_write64(rvu, blkaddr, reg, regval); } + rvu_nix_tx_tl2_cfg(rvu, blkaddr, pcifunc, + &nix_hw->txsch[NIX_TXSCH_LVL_TL2]); + return 0; } @@ -3180,6 +3212,8 @@ int rvu_mbox_handler_nix_set_mac_addr(struct rvu *rvu, if (test_bit(PF_SET_VF_TRUSTED, &pfvf->flags) && from_vf) ether_addr_copy(pfvf->default_mac, req->mac_addr); + rvu_switch_update_rules(rvu, pcifunc); + return 0; } @@ -3849,6 +3883,8 @@ int rvu_mbox_handler_nix_lf_start_rx(struct rvu *rvu, struct msg_req *req, pfvf = rvu_get_pfvf(rvu, pcifunc); set_bit(NIXLF_INITIALIZED, &pfvf->flags); + rvu_switch_update_rules(rvu, pcifunc); + return rvu_cgx_start_stop_io(rvu, pcifunc, true); } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 3612e0a2cab3..1097291aaa45 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -442,7 +442,8 @@ static void npc_fixup_vf_rule(struct rvu *rvu, struct npc_mcam *mcam, owner = mcam->entry2pfvf_map[index]; target_func = (entry->action >> 4) & 0xffff; /* do nothing when target is LBK/PF or owner is not PF */ - if (is_afvf(target_func) || (owner & RVU_PFVF_FUNC_MASK) || + if (is_pffunc_af(owner) || is_afvf(target_func) || + (owner & RVU_PFVF_FUNC_MASK) || !(target_func & RVU_PFVF_FUNC_MASK)) return; @@ -468,6 +469,8 @@ static void npc_config_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam, { int bank = npc_get_bank(mcam, index); int kw = 0, actbank, actindex; + u8 tx_intf_mask = ~intf & 0x3; + u8 tx_intf = intf; u64 cam0, cam1; actbank = bank; /* Save bank id, to set action later on */ @@ -488,12 +491,21 @@ static void npc_config_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam, */ for (; bank < (actbank + mcam->banks_per_entry); bank++, kw = kw + 2) { /* Interface should be set in all banks */ + if (is_npc_intf_tx(intf)) { + /* Last bit must be set and rest don't care + * for TX interfaces + */ + tx_intf_mask = 0x1; + tx_intf = intf & tx_intf_mask; + tx_intf_mask = ~tx_intf & tx_intf_mask; + } + rvu_write64(rvu, blkaddr, NPC_AF_MCAMEX_BANKX_CAMX_INTF(index, bank, 1), - intf); + tx_intf); rvu_write64(rvu, blkaddr, NPC_AF_MCAMEX_BANKX_CAMX_INTF(index, bank, 0), - ~intf & 0x3); + tx_intf_mask); /* Set the match key */ npc_get_keyword(entry, kw, &cam0, &cam1); @@ -650,6 +662,7 @@ void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc, eth_broadcast_addr((u8 *)&req.mask.dmac); req.features = BIT_ULL(NPC_DMAC); req.channel = chan; + req.chan_mask = 0xFFFU; req.intf = pfvf->nix_rx_intf; req.op = action.op; req.hdr.pcifunc = 0; /* AF is requester */ @@ -799,6 +812,7 @@ void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc, eth_broadcast_addr((u8 *)&req.mask.dmac); req.features = BIT_ULL(NPC_DMAC); req.channel = chan; + req.chan_mask = 0xFFFU; req.intf = pfvf->nix_rx_intf; req.entry = index; req.hdr.pcifunc = 0; /* AF is requester */ @@ -1745,6 +1759,8 @@ static int npc_mcam_rsrcs_init(struct rvu *rvu, int blkaddr) int nixlf_count = rvu_get_nixlf_count(rvu); struct npc_mcam *mcam = &rvu->hw->mcam; int rsvd, err; + u16 index; + int cntr; u64 cfg; /* Actual number of MCAM entries vary by entry size */ @@ -1845,6 +1861,14 @@ static int npc_mcam_rsrcs_init(struct rvu *rvu, int blkaddr) if (!mcam->entry2target_pffunc) goto free_mem; + for (index = 0; index < mcam->bmap_entries; index++) { + mcam->entry2pfvf_map[index] = NPC_MCAM_INVALID_MAP; + mcam->entry2cntr_map[index] = NPC_MCAM_INVALID_MAP; + } + + for (cntr = 0; cntr < mcam->counters.max; cntr++) + mcam->cntr2pfvf_map[cntr] = NPC_MCAM_INVALID_MAP; + mutex_init(&mcam->lock); return 0; @@ -2562,7 +2586,7 @@ int rvu_mbox_handler_npc_mcam_alloc_entry(struct rvu *rvu, } /* Alloc request from PFFUNC with no NIXLF attached should be denied */ - if (!is_nixlf_attached(rvu, pcifunc)) + if (!is_pffunc_af(pcifunc) && !is_nixlf_attached(rvu, pcifunc)) return NPC_MCAM_ALLOC_DENIED; return npc_mcam_alloc_entries(mcam, pcifunc, req, rsp); @@ -2582,7 +2606,7 @@ int rvu_mbox_handler_npc_mcam_free_entry(struct rvu *rvu, return NPC_MCAM_INVALID_REQ; /* Free request from PFFUNC with no NIXLF attached, ignore */ - if (!is_nixlf_attached(rvu, pcifunc)) + if (!is_pffunc_af(pcifunc) && !is_nixlf_attached(rvu, pcifunc)) return NPC_MCAM_INVALID_REQ; mutex_lock(&mcam->lock); @@ -2594,7 +2618,7 @@ int rvu_mbox_handler_npc_mcam_free_entry(struct rvu *rvu, if (rc) goto exit; - mcam->entry2pfvf_map[req->entry] = 0; + mcam->entry2pfvf_map[req->entry] = NPC_MCAM_INVALID_MAP; mcam->entry2target_pffunc[req->entry] = 0x0; npc_mcam_clear_bit(mcam, req->entry); npc_enable_mcam_entry(rvu, mcam, blkaddr, req->entry, false); @@ -2679,13 +2703,14 @@ int rvu_mbox_handler_npc_mcam_write_entry(struct rvu *rvu, else nix_intf = pfvf->nix_rx_intf; - if (npc_mcam_verify_channel(rvu, pcifunc, req->intf, channel)) { + if (!is_pffunc_af(pcifunc) && + npc_mcam_verify_channel(rvu, pcifunc, req->intf, channel)) { rc = NPC_MCAM_INVALID_REQ; goto exit; } - if (npc_mcam_verify_pf_func(rvu, &req->entry_data, req->intf, - pcifunc)) { + if (!is_pffunc_af(pcifunc) && + npc_mcam_verify_pf_func(rvu, &req->entry_data, req->intf, pcifunc)) { rc = NPC_MCAM_INVALID_REQ; goto exit; } @@ -2836,7 +2861,7 @@ int rvu_mbox_handler_npc_mcam_alloc_counter(struct rvu *rvu, return NPC_MCAM_INVALID_REQ; /* If the request is from a PFFUNC with no NIXLF attached, ignore */ - if (!is_nixlf_attached(rvu, pcifunc)) + if (!is_pffunc_af(pcifunc) && !is_nixlf_attached(rvu, pcifunc)) return NPC_MCAM_INVALID_REQ; /* Since list of allocated counter IDs needs to be sent to requester, @@ -3081,7 +3106,7 @@ int rvu_mbox_handler_npc_mcam_alloc_and_write_entry(struct rvu *rvu, if (rc) { /* Free allocated MCAM entry */ mutex_lock(&mcam->lock); - mcam->entry2pfvf_map[entry] = 0; + mcam->entry2pfvf_map[entry] = NPC_MCAM_INVALID_MAP; npc_mcam_clear_bit(mcam, entry); mutex_unlock(&mcam->lock); return rc; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c index 68633145a8b8..5c01cf4a9c5b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c @@ -910,14 +910,17 @@ static void rvu_mcam_add_counter_to_rule(struct rvu *rvu, u16 pcifunc, static void npc_update_rx_entry(struct rvu *rvu, struct rvu_pfvf *pfvf, struct mcam_entry *entry, - struct npc_install_flow_req *req, u16 target) + struct npc_install_flow_req *req, + u16 target, bool pf_set_vfs_mac) { + struct rvu_switch *rswitch = &rvu->rswitch; struct nix_rx_action action; - u64 chan_mask; - chan_mask = req->chan_mask ? req->chan_mask : ~0ULL; - npc_update_entry(rvu, NPC_CHAN, entry, req->channel, 0, chan_mask, 0, - NIX_INTF_RX); + if (rswitch->mode == DEVLINK_ESWITCH_MODE_SWITCHDEV && pf_set_vfs_mac) + req->chan_mask = 0x0; /* Do not care channel */ + + npc_update_entry(rvu, NPC_CHAN, entry, req->channel, 0, req->chan_mask, + 0, NIX_INTF_RX); *(u64 *)&action = 0x00; action.pf_func = target; @@ -949,9 +952,16 @@ static void npc_update_tx_entry(struct rvu *rvu, struct rvu_pfvf *pfvf, struct npc_install_flow_req *req, u16 target) { struct nix_tx_action action; + u64 mask = ~0ULL; + + /* If AF is installing then do not care about + * PF_FUNC in Send Descriptor + */ + if (is_pffunc_af(req->hdr.pcifunc)) + mask = 0; npc_update_entry(rvu, NPC_PF_FUNC, entry, (__force u16)htons(target), - 0, ~0ULL, 0, NIX_INTF_TX); + 0, mask, 0, NIX_INTF_TX); *(u64 *)&action = 0x00; action.op = req->op; @@ -1002,7 +1012,7 @@ static int npc_install_flow(struct rvu *rvu, int blkaddr, u16 target, req->intf); if (is_npc_intf_rx(req->intf)) - npc_update_rx_entry(rvu, pfvf, entry, req, target); + npc_update_rx_entry(rvu, pfvf, entry, req, target, pf_set_vfs_mac); else npc_update_tx_entry(rvu, pfvf, entry, req, target); @@ -1164,7 +1174,9 @@ int rvu_mbox_handler_npc_install_flow(struct rvu *rvu, if (err) return err; - if (npc_mcam_verify_channel(rvu, target, req->intf, req->channel)) + /* Skip channel validation if AF is installing */ + if (!is_pffunc_af(req->hdr.pcifunc) && + npc_mcam_verify_channel(rvu, target, req->intf, req->channel)) return -EINVAL; pfvf = rvu_get_pfvf(rvu, target); @@ -1180,6 +1192,7 @@ int rvu_mbox_handler_npc_install_flow(struct rvu *rvu, eth_broadcast_addr((u8 *)&req->mask.dmac); } + /* Proceed if NIXLF is attached or not for TX rules */ err = nix_get_nixlf(rvu, target, &nixlf, NULL); if (err && is_npc_intf_rx(req->intf) && !pf_set_vfs_mac) return -EINVAL; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c new file mode 100644 index 000000000000..2e5379710aa5 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c @@ -0,0 +1,258 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell OcteonTx2 RVU Admin Function driver + * + * Copyright (C) 2021 Marvell. + */ + +#include <linux/bitfield.h> +#include "rvu.h" + +static int rvu_switch_install_rx_rule(struct rvu *rvu, u16 pcifunc, + u16 chan_mask) +{ + struct npc_install_flow_req req = { 0 }; + struct npc_install_flow_rsp rsp = { 0 }; + struct rvu_pfvf *pfvf; + + pfvf = rvu_get_pfvf(rvu, pcifunc); + /* If the pcifunc is not initialized then nothing to do. + * This same function will be called again via rvu_switch_update_rules + * after pcifunc is initialized. + */ + if (!test_bit(NIXLF_INITIALIZED, &pfvf->flags)) + return 0; + + ether_addr_copy(req.packet.dmac, pfvf->mac_addr); + eth_broadcast_addr((u8 *)&req.mask.dmac); + req.hdr.pcifunc = 0; /* AF is requester */ + req.vf = pcifunc; + req.features = BIT_ULL(NPC_DMAC); + req.channel = pfvf->rx_chan_base; + req.chan_mask = chan_mask; + req.intf = pfvf->nix_rx_intf; + req.op = NIX_RX_ACTION_DEFAULT; + req.default_rule = 1; + + return rvu_mbox_handler_npc_install_flow(rvu, &req, &rsp); +} + +static int rvu_switch_install_tx_rule(struct rvu *rvu, u16 pcifunc, u16 entry) +{ + struct npc_install_flow_req req = { 0 }; + struct npc_install_flow_rsp rsp = { 0 }; + struct rvu_pfvf *pfvf; + u8 lbkid; + + pfvf = rvu_get_pfvf(rvu, pcifunc); + /* If the pcifunc is not initialized then nothing to do. + * This same function will be called again via rvu_switch_update_rules + * after pcifunc is initialized. + */ + if (!test_bit(NIXLF_INITIALIZED, &pfvf->flags)) + return 0; + + lbkid = pfvf->nix_blkaddr == BLKADDR_NIX0 ? 0 : 1; + ether_addr_copy(req.packet.dmac, pfvf->mac_addr); + eth_broadcast_addr((u8 *)&req.mask.dmac); + req.hdr.pcifunc = 0; /* AF is requester */ + req.vf = pcifunc; + req.entry = entry; + req.features = BIT_ULL(NPC_DMAC); + req.intf = pfvf->nix_tx_intf; + req.op = NIX_TX_ACTIONOP_UCAST_CHAN; + req.index = (lbkid << 8) | RVU_SWITCH_LBK_CHAN; + req.set_cntr = 1; + + return rvu_mbox_handler_npc_install_flow(rvu, &req, &rsp); +} + +static int rvu_switch_install_rules(struct rvu *rvu) +{ + struct rvu_switch *rswitch = &rvu->rswitch; + u16 start = rswitch->start_entry; + struct rvu_hwinfo *hw = rvu->hw; + int pf, vf, numvfs, hwvf; + u16 pcifunc, entry = 0; + int err; + + for (pf = 1; pf < hw->total_pfs; pf++) { + if (!is_pf_cgxmapped(rvu, pf)) + continue; + + pcifunc = pf << 10; + /* rvu_get_nix_blkaddr sets up the corresponding NIX block + * address and NIX RX and TX interfaces for a pcifunc. + * Generally it is called during attach call of a pcifunc but it + * is called here since we are pre-installing rules before + * nixlfs are attached + */ + rvu_get_nix_blkaddr(rvu, pcifunc); + + /* MCAM RX rule for a PF/VF already exists as default unicast + * rules installed by AF. Hence change the channel in those + * rules to ignore channel so that packets with the required + * DMAC received from LBK(by other PF/VFs in system) or from + * external world (from wire) are accepted. + */ + err = rvu_switch_install_rx_rule(rvu, pcifunc, 0x0); + if (err) { + dev_err(rvu->dev, "RX rule for PF%d failed(%d)\n", + pf, err); + return err; + } + + err = rvu_switch_install_tx_rule(rvu, pcifunc, start + entry); + if (err) { + dev_err(rvu->dev, "TX rule for PF%d failed(%d)\n", + pf, err); + return err; + } + + rswitch->entry2pcifunc[entry++] = pcifunc; + + rvu_get_pf_numvfs(rvu, pf, &numvfs, &hwvf); + for (vf = 0; vf < numvfs; vf++, hwvf++) { + pcifunc = pf << 10 | ((vf + 1) & 0x3FF); + rvu_get_nix_blkaddr(rvu, pcifunc); + + err = rvu_switch_install_rx_rule(rvu, pcifunc, 0x0); + if (err) { + dev_err(rvu->dev, + "RX rule for PF%dVF%d failed(%d)\n", + pf, vf, err); + return err; + } + + err = rvu_switch_install_tx_rule(rvu, pcifunc, + start + entry); + if (err) { + dev_err(rvu->dev, + "TX rule for PF%dVF%d failed(%d)\n", + pf, vf, err); + return err; + } + + rswitch->entry2pcifunc[entry++] = pcifunc; + } + } + + return 0; +} + +void rvu_switch_enable(struct rvu *rvu) +{ + struct npc_mcam_alloc_entry_req alloc_req = { 0 }; + struct npc_mcam_alloc_entry_rsp alloc_rsp = { 0 }; + struct npc_delete_flow_req uninstall_req = { 0 }; + struct npc_mcam_free_entry_req free_req = { 0 }; + struct rvu_switch *rswitch = &rvu->rswitch; + struct msg_rsp rsp; + int ret; + + alloc_req.contig = true; + alloc_req.count = rvu->cgx_mapped_pfs + rvu->cgx_mapped_vfs; + ret = rvu_mbox_handler_npc_mcam_alloc_entry(rvu, &alloc_req, + &alloc_rsp); + if (ret) { + dev_err(rvu->dev, + "Unable to allocate MCAM entries\n"); + goto exit; + } + + if (alloc_rsp.count != alloc_req.count) { + dev_err(rvu->dev, + "Unable to allocate %d MCAM entries, got %d\n", + alloc_req.count, alloc_rsp.count); + goto free_entries; + } + + rswitch->entry2pcifunc = kcalloc(alloc_req.count, sizeof(u16), + GFP_KERNEL); + if (!rswitch->entry2pcifunc) + goto free_entries; + + rswitch->used_entries = alloc_rsp.count; + rswitch->start_entry = alloc_rsp.entry; + + ret = rvu_switch_install_rules(rvu); + if (ret) + goto uninstall_rules; + + return; + +uninstall_rules: + uninstall_req.start = rswitch->start_entry; + uninstall_req.end = rswitch->start_entry + rswitch->used_entries - 1; + rvu_mbox_handler_npc_delete_flow(rvu, &uninstall_req, &rsp); + kfree(rswitch->entry2pcifunc); +free_entries: + free_req.all = 1; + rvu_mbox_handler_npc_mcam_free_entry(rvu, &free_req, &rsp); +exit: + return; +} + +void rvu_switch_disable(struct rvu *rvu) +{ + struct npc_delete_flow_req uninstall_req = { 0 }; + struct npc_mcam_free_entry_req free_req = { 0 }; + struct rvu_switch *rswitch = &rvu->rswitch; + struct rvu_hwinfo *hw = rvu->hw; + int pf, vf, numvfs, hwvf; + struct msg_rsp rsp; + u16 pcifunc; + int err; + + if (!rswitch->used_entries) + return; + + for (pf = 1; pf < hw->total_pfs; pf++) { + if (!is_pf_cgxmapped(rvu, pf)) + continue; + + pcifunc = pf << 10; + err = rvu_switch_install_rx_rule(rvu, pcifunc, 0xFFF); + if (err) + dev_err(rvu->dev, + "Reverting RX rule for PF%d failed(%d)\n", + pf, err); + + for (vf = 0; vf < numvfs; vf++, hwvf++) { + pcifunc = pf << 10 | ((vf + 1) & 0x3FF); + err = rvu_switch_install_rx_rule(rvu, pcifunc, 0xFFF); + if (err) + dev_err(rvu->dev, + "Reverting RX rule for PF%dVF%d failed(%d)\n", + pf, vf, err); + } + } + + uninstall_req.start = rswitch->start_entry; + uninstall_req.end = rswitch->start_entry + rswitch->used_entries - 1; + free_req.all = 1; + rvu_mbox_handler_npc_delete_flow(rvu, &uninstall_req, &rsp); + rvu_mbox_handler_npc_mcam_free_entry(rvu, &free_req, &rsp); + rswitch->used_entries = 0; + kfree(rswitch->entry2pcifunc); +} + +void rvu_switch_update_rules(struct rvu *rvu, u16 pcifunc) +{ + struct rvu_switch *rswitch = &rvu->rswitch; + u32 max = rswitch->used_entries; + u16 entry; + + if (!rswitch->used_entries) + return; + + for (entry = 0; entry < max; entry++) { + if (rswitch->entry2pcifunc[entry] == pcifunc) + break; + } + + if (entry >= max) + return; + + rvu_switch_install_tx_rule(rvu, pcifunc, rswitch->start_entry + entry); + rvu_switch_install_rx_rule(rvu, pcifunc, 0x0); +} diff --git a/drivers/net/ethernet/microchip/sparx5/Kconfig b/drivers/net/ethernet/microchip/sparx5/Kconfig index ac403d43c74c..7bdbb2d09a14 100644 --- a/drivers/net/ethernet/microchip/sparx5/Kconfig +++ b/drivers/net/ethernet/microchip/sparx5/Kconfig @@ -3,6 +3,7 @@ config SPARX5_SWITCH depends on NET_SWITCHDEV depends on HAS_IOMEM depends on OF + depends on ARCH_SPARX5 || COMPILE_TEST select PHYLINK select PHY_SPARX5_SERDES select RESET_CONTROLLER diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index f744557c33a3..c7af5bc3b8af 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -5084,7 +5084,8 @@ static int r8169_mdio_register(struct rtl8169_private *tp) new_bus->priv = tp; new_bus->parent = &pdev->dev; new_bus->irq[0] = PHY_MAC_INTERRUPT; - snprintf(new_bus->id, MII_BUS_ID_SIZE, "r8169-%x", pci_dev_id(pdev)); + snprintf(new_bus->id, MII_BUS_ID_SIZE, "r8169-%x-%x", + pci_domain_nr(pdev->bus), pci_dev_id(pdev)); new_bus->read = r8169_mdio_read_reg; new_bus->write = r8169_mdio_write_reg; diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h index 86a1eb0634e8..80e62ca2e3d3 100644 --- a/drivers/net/ethernet/renesas/ravb.h +++ b/drivers/net/ethernet/renesas/ravb.h @@ -864,7 +864,7 @@ enum GECMR_BIT { /* The Ethernet AVB descriptor definitions. */ struct ravb_desc { - __le16 ds; /* Descriptor size */ + __le16 ds; /* Descriptor size */ u8 cc; /* Content control MSBs (reserved) */ u8 die_dt; /* Descriptor interrupt enable and type */ __le32 dptr; /* Descriptor pointer */ diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 69c50f81e1cb..805397088850 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -920,7 +920,7 @@ static int ravb_poll(struct napi_struct *napi, int budget) if (ravb_rx(ndev, "a, q)) goto out; - /* Processing RX Descriptor Ring */ + /* Processing TX Descriptor Ring */ spin_lock_irqsave(&priv->lock, flags); /* Clear TX interrupt */ ravb_write(ndev, ~(mask | TIS_RESERVED), TIS); diff --git a/drivers/net/ethernet/xscale/ptp_ixp46x.c b/drivers/net/ethernet/xscale/ptp_ixp46x.c index 99d4d9439d05..a6fb88fd42f7 100644 --- a/drivers/net/ethernet/xscale/ptp_ixp46x.c +++ b/drivers/net/ethernet/xscale/ptp_ixp46x.c @@ -14,6 +14,8 @@ #include <linux/kernel.h> #include <linux/ptp_clock_kernel.h> #include <linux/soc/ixp4xx/cpu.h> +#include <linux/module.h> +#include <mach/ixp4xx-regs.h> #include "ixp46x_ts.h" diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index 63006838bdcc..dec96e8ab567 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -2495,7 +2495,7 @@ static struct hso_device *hso_create_net_device(struct usb_interface *interface, hso_net_init); if (!net) { dev_err(&interface->dev, "Unable to create ethernet device\n"); - goto exit; + goto err_hso_dev; } hso_net = netdev_priv(net); @@ -2508,13 +2508,13 @@ static struct hso_device *hso_create_net_device(struct usb_interface *interface, USB_DIR_IN); if (!hso_net->in_endp) { dev_err(&interface->dev, "Can't find BULK IN endpoint\n"); - goto exit; + goto err_net; } hso_net->out_endp = hso_get_ep(interface, USB_ENDPOINT_XFER_BULK, USB_DIR_OUT); if (!hso_net->out_endp) { dev_err(&interface->dev, "Can't find BULK OUT endpoint\n"); - goto exit; + goto err_net; } SET_NETDEV_DEV(net, &interface->dev); SET_NETDEV_DEVTYPE(net, &hso_type); @@ -2523,18 +2523,18 @@ static struct hso_device *hso_create_net_device(struct usb_interface *interface, for (i = 0; i < MUX_BULK_RX_BUF_COUNT; i++) { hso_net->mux_bulk_rx_urb_pool[i] = usb_alloc_urb(0, GFP_KERNEL); if (!hso_net->mux_bulk_rx_urb_pool[i]) - goto exit; + goto err_mux_bulk_rx; hso_net->mux_bulk_rx_buf_pool[i] = kzalloc(MUX_BULK_RX_BUF_SIZE, GFP_KERNEL); if (!hso_net->mux_bulk_rx_buf_pool[i]) - goto exit; + goto err_mux_bulk_rx; } hso_net->mux_bulk_tx_urb = usb_alloc_urb(0, GFP_KERNEL); if (!hso_net->mux_bulk_tx_urb) - goto exit; + goto err_mux_bulk_rx; hso_net->mux_bulk_tx_buf = kzalloc(MUX_BULK_TX_BUF_SIZE, GFP_KERNEL); if (!hso_net->mux_bulk_tx_buf) - goto exit; + goto err_free_tx_urb; add_net_device(hso_dev); @@ -2542,7 +2542,7 @@ static struct hso_device *hso_create_net_device(struct usb_interface *interface, result = register_netdev(net); if (result) { dev_err(&interface->dev, "Failed to register device\n"); - goto exit; + goto err_free_tx_buf; } hso_log_port(hso_dev); @@ -2550,8 +2550,21 @@ static struct hso_device *hso_create_net_device(struct usb_interface *interface, hso_create_rfkill(hso_dev, interface); return hso_dev; -exit: - hso_free_net_device(hso_dev, true); + +err_free_tx_buf: + remove_net_device(hso_dev); + kfree(hso_net->mux_bulk_tx_buf); +err_free_tx_urb: + usb_free_urb(hso_net->mux_bulk_tx_urb); +err_mux_bulk_rx: + for (i = 0; i < MUX_BULK_RX_BUF_COUNT; i++) { + usb_free_urb(hso_net->mux_bulk_rx_urb_pool[i]); + kfree(hso_net->mux_bulk_rx_buf_pool[i]); + } +err_net: + free_netdev(net); +err_hso_dev: + kfree(hso_dev); return NULL; } diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 1692d3b1b6e1..e09b107b5c99 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -1552,7 +1552,8 @@ static int rtl8152_set_speed(struct r8152 *tp, u8 autoneg, u32 speed, u8 duplex, u32 advertising); -static int rtl8152_set_mac_address(struct net_device *netdev, void *p) +static int __rtl8152_set_mac_address(struct net_device *netdev, void *p, + bool in_resume) { struct r8152 *tp = netdev_priv(netdev); struct sockaddr *addr = p; @@ -1561,9 +1562,11 @@ static int rtl8152_set_mac_address(struct net_device *netdev, void *p) if (!is_valid_ether_addr(addr->sa_data)) goto out1; - ret = usb_autopm_get_interface(tp->intf); - if (ret < 0) - goto out1; + if (!in_resume) { + ret = usb_autopm_get_interface(tp->intf); + if (ret < 0) + goto out1; + } mutex_lock(&tp->control); @@ -1575,11 +1578,17 @@ static int rtl8152_set_mac_address(struct net_device *netdev, void *p) mutex_unlock(&tp->control); - usb_autopm_put_interface(tp->intf); + if (!in_resume) + usb_autopm_put_interface(tp->intf); out1: return ret; } +static int rtl8152_set_mac_address(struct net_device *netdev, void *p) +{ + return __rtl8152_set_mac_address(netdev, p, false); +} + /* Devices containing proper chips can support a persistent * host system provided MAC address. * Examples of this are Dell TB15 and Dell WD15 docks @@ -1698,7 +1707,7 @@ static int determine_ethernet_addr(struct r8152 *tp, struct sockaddr *sa) return ret; } -static int set_ethernet_addr(struct r8152 *tp) +static int set_ethernet_addr(struct r8152 *tp, bool in_resume) { struct net_device *dev = tp->netdev; struct sockaddr sa; @@ -1711,7 +1720,7 @@ static int set_ethernet_addr(struct r8152 *tp) if (tp->version == RTL_VER_01) ether_addr_copy(dev->dev_addr, sa.sa_data); else - ret = rtl8152_set_mac_address(dev, &sa); + ret = __rtl8152_set_mac_address(dev, &sa, in_resume); return ret; } @@ -6763,9 +6772,10 @@ static int rtl8152_close(struct net_device *netdev) tp->rtl_ops.down(tp); mutex_unlock(&tp->control); + } + if (!res) usb_autopm_put_interface(tp->intf); - } free_all_mem(tp); @@ -8443,7 +8453,7 @@ static int rtl8152_reset_resume(struct usb_interface *intf) clear_bit(SELECTIVE_SUSPEND, &tp->flags); tp->rtl_ops.init(tp); queue_delayed_work(system_long_wq, &tp->hw_phy_work, 0); - set_ethernet_addr(tp); + set_ethernet_addr(tp, true); return rtl8152_resume(intf); } @@ -9644,7 +9654,7 @@ static int rtl8152_probe(struct usb_interface *intf, tp->rtl_fw.retry = true; #endif queue_delayed_work(system_long_wq, &tp->hw_phy_work, 0); - set_ethernet_addr(tp); + set_ethernet_addr(tp, false); usb_set_intfdata(intf, tp); diff --git a/include/net/tcp.h b/include/net/tcp.h index 17df9b047ee4..784d5c3ef1c5 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1709,7 +1709,6 @@ struct tcp_fastopen_context { struct rcu_head rcu; }; -extern unsigned int sysctl_tcp_fastopen_blackhole_timeout; void tcp_fastopen_active_disable(struct sock *sk); bool tcp_fastopen_active_should_disable(struct sock *sk); void tcp_fastopen_active_disable_ofo_check(struct sock *sk); diff --git a/include/trace/events/net.h b/include/trace/events/net.h index 2399073c3afc..78c448c6ab4c 100644 --- a/include/trace/events/net.h +++ b/include/trace/events/net.h @@ -136,7 +136,7 @@ DECLARE_EVENT_CLASS(net_dev_template, __assign_str(name, skb->dev->name); ), - TP_printk("dev=%s skbaddr=%p len=%u", + TP_printk("dev=%s skbaddr=%px len=%u", __get_str(name), __entry->skbaddr, __entry->len) ) diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h index 330d32d84485..c3006c6b4a87 100644 --- a/include/trace/events/qdisc.h +++ b/include/trace/events/qdisc.h @@ -41,11 +41,37 @@ TRACE_EVENT(qdisc_dequeue, __entry->txq_state = txq->state; ), - TP_printk("dequeue ifindex=%d qdisc handle=0x%X parent=0x%X txq_state=0x%lX packets=%d skbaddr=%p", + TP_printk("dequeue ifindex=%d qdisc handle=0x%X parent=0x%X txq_state=0x%lX packets=%d skbaddr=%px", __entry->ifindex, __entry->handle, __entry->parent, __entry->txq_state, __entry->packets, __entry->skbaddr ) ); +TRACE_EVENT(qdisc_enqueue, + + TP_PROTO(struct Qdisc *qdisc, const struct netdev_queue *txq, struct sk_buff *skb), + + TP_ARGS(qdisc, txq, skb), + + TP_STRUCT__entry( + __field(struct Qdisc *, qdisc) + __field(void *, skbaddr) + __field(int, ifindex) + __field(u32, handle) + __field(u32, parent) + ), + + TP_fast_assign( + __entry->qdisc = qdisc; + __entry->skbaddr = skb; + __entry->ifindex = txq->dev ? txq->dev->ifindex : 0; + __entry->handle = qdisc->handle; + __entry->parent = qdisc->parent; + ), + + TP_printk("enqueue ifindex=%d qdisc handle=0x%X parent=0x%X skbaddr=%px", + __entry->ifindex, __entry->handle, __entry->parent, __entry->skbaddr) +); + TRACE_EVENT(qdisc_reset, TP_PROTO(struct Qdisc *q), diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 42a4063de7cd..9de3c9c3267c 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3677,6 +3677,8 @@ continue_func: if (tail_call_reachable) for (j = 0; j < frame; j++) subprog[ret_prog[j]].tail_call_reachable = true; + if (subprog[0].tail_call_reachable) + env->prog->aux->tail_call_reachable = true; /* end of for() loop means the last insn of the 'subprog' * was reached. Doesn't matter whether it was JA or EXIT diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index aa47af349ba8..1cc75c811e24 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -701,6 +701,9 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, void *data; int ret; + if (prog->expected_attach_type == BPF_XDP_DEVMAP || + prog->expected_attach_type == BPF_XDP_CPUMAP) + return -EINVAL; if (kattr->test.ctx_in || kattr->test.ctx_out) return -EINVAL; diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 2b862cffc03a..a16191dcaed1 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -780,7 +780,7 @@ int br_fdb_replay(const struct net_device *br_dev, const struct net_device *dev, struct net_device *dst_dev; dst_dev = dst ? dst->dev : br->dev; - if (dst_dev != br_dev && dst_dev != dev) + if (dst_dev && dst_dev != dev) continue; err = br_fdb_replay_one(nb, fdb, dst_dev, action, ctx); diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 647554c9813b..e12fd3cad619 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -539,7 +539,8 @@ static int caif_seqpkt_sendmsg(struct socket *sock, struct msghdr *msg, goto err; ret = -EINVAL; - if (unlikely(msg->msg_iter.iov->iov_base == NULL)) + if (unlikely(msg->msg_iter.nr_segs == 0) || + unlikely(msg->msg_iter.iov->iov_base == NULL)) goto err; noblock = msg->msg_flags & MSG_DONTWAIT; diff --git a/net/core/dev.c b/net/core/dev.c index 64b21f0a2048..8f1a47ad6781 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -131,6 +131,7 @@ #include <trace/events/napi.h> #include <trace/events/net.h> #include <trace/events/skb.h> +#include <trace/events/qdisc.h> #include <linux/inetdevice.h> #include <linux/cpu_rmap.h> #include <linux/static_key.h> @@ -3844,6 +3845,18 @@ static void qdisc_pkt_len_init(struct sk_buff *skb) } } +static int dev_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *q, + struct sk_buff **to_free, + struct netdev_queue *txq) +{ + int rc; + + rc = q->enqueue(skb, q, to_free) & NET_XMIT_MASK; + if (rc == NET_XMIT_SUCCESS) + trace_qdisc_enqueue(q, txq, skb); + return rc; +} + static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, struct net_device *dev, struct netdev_queue *txq) @@ -3862,8 +3875,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, * of q->seqlock to protect from racing with requeuing. */ if (unlikely(!nolock_qdisc_is_empty(q))) { - rc = q->enqueue(skb, q, &to_free) & - NET_XMIT_MASK; + rc = dev_qdisc_enqueue(skb, q, &to_free, txq); __qdisc_run(q); qdisc_run_end(q); @@ -3879,7 +3891,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, return NET_XMIT_SUCCESS; } - rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; + rc = dev_qdisc_enqueue(skb, q, &to_free, txq); qdisc_run(q); no_lock_out: @@ -3923,7 +3935,7 @@ no_lock_out: qdisc_run_end(q); rc = NET_XMIT_SUCCESS; } else { - rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; + rc = dev_qdisc_enqueue(skb, q, &to_free, txq); if (qdisc_run_begin(q)) { if (unlikely(contended)) { spin_unlock(&q->busylock); @@ -9700,14 +9712,17 @@ int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) struct net_device *dev; int err, fd; + rtnl_lock(); dev = dev_get_by_index(net, attr->link_create.target_ifindex); - if (!dev) + if (!dev) { + rtnl_unlock(); return -EINVAL; + } link = kzalloc(sizeof(*link), GFP_USER); if (!link) { err = -ENOMEM; - goto out_put_dev; + goto unlock; } bpf_link_init(&link->link, BPF_LINK_TYPE_XDP, &bpf_xdp_link_lops, prog); @@ -9717,14 +9732,14 @@ int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) err = bpf_link_prime(&link->link, &link_primer); if (err) { kfree(link); - goto out_put_dev; + goto unlock; } - rtnl_lock(); err = dev_xdp_attach_link(dev, NULL, link); rtnl_unlock(); if (err) { + link->dev = NULL; bpf_link_cleanup(&link_primer); goto out_put_dev; } @@ -9734,6 +9749,9 @@ int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) dev_put(dev); return fd; +unlock: + rtnl_unlock(); + out_put_dev: dev_put(dev); return err; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f63de967ac25..fc7942c0dddc 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -663,7 +663,7 @@ static void skb_release_data(struct sk_buff *skb) if (skb->cloned && atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1, &shinfo->dataref)) - return; + goto exit; skb_zcopy_clear(skb, true); @@ -674,6 +674,17 @@ static void skb_release_data(struct sk_buff *skb) kfree_skb_list(shinfo->frag_list); skb_free_head(skb); +exit: + /* When we clone an SKB we copy the reycling bit. The pp_recycle + * bit is only set on the head though, so in order to avoid races + * while trying to recycle fragments on __skb_frag_unref() we need + * to make one SKB responsible for triggering the recycle path. + * So disable the recycling bit if an SKB is cloned and we have + * additional references to to the fragmented part of the SKB. + * Eventually the last SKB will have the recycling bit set and it's + * dataref set to 0, which will trigger the recycling + */ + skb->pp_recycle = 0; } /* @@ -3011,8 +3022,11 @@ skb_zerocopy_headlen(const struct sk_buff *from) if (!from->head_frag || skb_headlen(from) < L1_CACHE_BYTES || - skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) + skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) { hlen = skb_headlen(from); + if (!hlen) + hlen = from->len; + } if (skb_has_frag_list(from)) hlen = from->len; diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 9b6160a191f8..15d71288e741 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -508,10 +508,8 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb, if (skb_linearize(skb)) return -EAGAIN; num_sge = skb_to_sgvec(skb, msg->sg.data, 0, skb->len); - if (unlikely(num_sge < 0)) { - kfree(msg); + if (unlikely(num_sge < 0)) return num_sge; - } copied = skb->len; msg->sg.start = 0; @@ -530,6 +528,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb) { struct sock *sk = psock->sk; struct sk_msg *msg; + int err; /* If we are receiving on the same sock skb->sk is already assigned, * skip memory accounting and owner transition seeing it already set @@ -548,7 +547,10 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb) * into user buffers. */ skb_set_owner_r(skb, sk); - return sk_psock_skb_ingress_enqueue(skb, psock, sk, msg); + err = sk_psock_skb_ingress_enqueue(skb, psock, sk, msg); + if (err < 0) + kfree(msg); + return err; } /* Puts an skb on the ingress queue of the socket already assigned to the @@ -559,12 +561,16 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb { struct sk_msg *msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC); struct sock *sk = psock->sk; + int err; if (unlikely(!msg)) return -EAGAIN; sk_msg_init(msg); skb_set_owner_r(skb, sk); - return sk_psock_skb_ingress_enqueue(skb, psock, sk, msg); + err = sk_psock_skb_ingress_enqueue(skb, psock, sk, msg); + if (err < 0) + kfree(msg); + return err; } static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb, diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 5dbd45dc35ad..dc92a67baea3 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -816,7 +816,7 @@ static int dn_auto_bind(struct socket *sock) static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation) { struct dn_scp *scp = DN_SK(sk); - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); int err; if (scp->state != DN_CR) @@ -826,11 +826,11 @@ static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation) scp->segsize_loc = dst_metric_advmss(__sk_dst_get(sk)); dn_send_conn_conf(sk, allocation); - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + add_wait_queue(sk_sleep(sk), &wait); for(;;) { release_sock(sk); if (scp->state == DN_CC) - *timeo = schedule_timeout(*timeo); + *timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, *timeo); lock_sock(sk); err = 0; if (scp->state == DN_RUN) @@ -844,9 +844,8 @@ static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation) err = -EAGAIN; if (!*timeo) break; - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); } - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); if (err == 0) { sk->sk_socket->state = SS_CONNECTED; } else if (scp->state != DN_CC) { @@ -858,7 +857,7 @@ static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation) static int dn_wait_run(struct sock *sk, long *timeo) { struct dn_scp *scp = DN_SK(sk); - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); int err = 0; if (scp->state == DN_RUN) @@ -867,11 +866,11 @@ static int dn_wait_run(struct sock *sk, long *timeo) if (!*timeo) return -EALREADY; - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + add_wait_queue(sk_sleep(sk), &wait); for(;;) { release_sock(sk); if (scp->state == DN_CI || scp->state == DN_CC) - *timeo = schedule_timeout(*timeo); + *timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, *timeo); lock_sock(sk); err = 0; if (scp->state == DN_RUN) @@ -885,9 +884,8 @@ static int dn_wait_run(struct sock *sk, long *timeo) err = -ETIMEDOUT; if (!*timeo) break; - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); } - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); out: if (err == 0) { sk->sk_socket->state = SS_CONNECTED; @@ -1032,16 +1030,16 @@ static void dn_user_copy(struct sk_buff *skb, struct optdata_dn *opt) static struct sk_buff *dn_wait_for_connect(struct sock *sk, long *timeo) { - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); struct sk_buff *skb = NULL; int err = 0; - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + add_wait_queue(sk_sleep(sk), &wait); for(;;) { release_sock(sk); skb = skb_dequeue(&sk->sk_receive_queue); if (skb == NULL) { - *timeo = schedule_timeout(*timeo); + *timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, *timeo); skb = skb_dequeue(&sk->sk_receive_queue); } lock_sock(sk); @@ -1056,9 +1054,8 @@ static struct sk_buff *dn_wait_for_connect(struct sock *sk, long *timeo) err = -EAGAIN; if (!*timeo) break; - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); } - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); return skb == NULL ? ERR_PTR(err) : skb; } diff --git a/net/dsa/slave.c b/net/dsa/slave.c index ffbba1e71551..532085da8d8f 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1808,6 +1808,7 @@ void dsa_slave_setup_tagger(struct net_device *slave) struct dsa_slave_priv *p = netdev_priv(slave); const struct dsa_port *cpu_dp = dp->cpu_dp; struct net_device *master = cpu_dp->master; + const struct dsa_switch *ds = dp->ds; slave->needed_headroom = cpu_dp->tag_ops->needed_headroom; slave->needed_tailroom = cpu_dp->tag_ops->needed_tailroom; @@ -1819,6 +1820,14 @@ void dsa_slave_setup_tagger(struct net_device *slave) slave->needed_tailroom += master->needed_tailroom; p->xmit = cpu_dp->tag_ops->xmit; + + slave->features = master->vlan_features | NETIF_F_HW_TC; + if (ds->ops->port_vlan_add && ds->ops->port_vlan_del) + slave->features |= NETIF_F_HW_VLAN_CTAG_FILTER; + slave->hw_features |= NETIF_F_HW_TC; + slave->features |= NETIF_F_LLTX; + if (slave->needed_tailroom) + slave->features &= ~(NETIF_F_SG | NETIF_F_FRAGLIST); } static struct lock_class_key dsa_slave_netdev_xmit_lock_key; @@ -1881,11 +1890,6 @@ int dsa_slave_create(struct dsa_port *port) if (slave_dev == NULL) return -ENOMEM; - slave_dev->features = master->vlan_features | NETIF_F_HW_TC; - if (ds->ops->port_vlan_add && ds->ops->port_vlan_del) - slave_dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; - slave_dev->hw_features |= NETIF_F_HW_TC; - slave_dev->features |= NETIF_F_LLTX; slave_dev->ethtool_ops = &dsa_slave_ethtool_ops; if (!is_zero_ether_addr(port->mac)) ether_addr_copy(slave_dev->dev_addr, port->mac); diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c index 53565f48934c..a201ccf2435d 100644 --- a/net/dsa/tag_ksz.c +++ b/net/dsa/tag_ksz.c @@ -53,6 +53,9 @@ static struct sk_buff *ksz8795_xmit(struct sk_buff *skb, struct net_device *dev) u8 *tag; u8 *addr; + if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(skb)) + return NULL; + /* Tag encoding */ tag = skb_put(skb, KSZ_INGRESS_TAG_LEN); addr = skb_mac_header(skb); @@ -114,6 +117,9 @@ static struct sk_buff *ksz9477_xmit(struct sk_buff *skb, u8 *addr; u16 val; + if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(skb)) + return NULL; + /* Tag encoding */ tag = skb_put(skb, KSZ9477_INGRESS_TAG_LEN); addr = skb_mac_header(skb); @@ -164,6 +170,9 @@ static struct sk_buff *ksz9893_xmit(struct sk_buff *skb, u8 *addr; u8 *tag; + if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(skb)) + return NULL; + /* Tag encoding */ tag = skb_put(skb, KSZ_INGRESS_TAG_LEN); addr = skb_mac_header(skb); diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index f26916a62f25..d3e9386b493e 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -503,7 +503,7 @@ static int __init tcp_bpf_v4_build_proto(void) tcp_bpf_rebuild_protos(tcp_bpf_prots[TCP_BPF_IPV4], &tcp_prot); return 0; } -core_initcall(tcp_bpf_v4_build_proto); +late_initcall(tcp_bpf_v4_build_proto); static int tcp_bpf_assert_proto_ops(struct proto *ops) { diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 47c32604d38f..25fa4c01a17f 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -507,8 +507,18 @@ void tcp_fastopen_active_disable(struct sock *sk) { struct net *net = sock_net(sk); + if (!sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout) + return; + + /* Paired with READ_ONCE() in tcp_fastopen_active_should_disable() */ + WRITE_ONCE(net->ipv4.tfo_active_disable_stamp, jiffies); + + /* Paired with smp_rmb() in tcp_fastopen_active_should_disable(). + * We want net->ipv4.tfo_active_disable_stamp to be updated first. + */ + smp_mb__before_atomic(); atomic_inc(&net->ipv4.tfo_active_disable_times); - net->ipv4.tfo_active_disable_stamp = jiffies; + NET_INC_STATS(net, LINUX_MIB_TCPFASTOPENBLACKHOLE); } @@ -519,17 +529,27 @@ void tcp_fastopen_active_disable(struct sock *sk) bool tcp_fastopen_active_should_disable(struct sock *sk) { unsigned int tfo_bh_timeout = sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout; - int tfo_da_times = atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times); unsigned long timeout; + int tfo_da_times; int multiplier; + if (!tfo_bh_timeout) + return false; + + tfo_da_times = atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times); if (!tfo_da_times) return false; + /* Paired with smp_mb__before_atomic() in tcp_fastopen_active_disable() */ + smp_rmb(); + /* Limit timeout to max: 2^6 * initial timeout */ multiplier = 1 << min(tfo_da_times - 1, 6); - timeout = multiplier * tfo_bh_timeout * HZ; - if (time_before(jiffies, sock_net(sk)->ipv4.tfo_active_disable_stamp + timeout)) + + /* Paired with the WRITE_ONCE() in tcp_fastopen_active_disable(). */ + timeout = READ_ONCE(sock_net(sk)->ipv4.tfo_active_disable_stamp) + + multiplier * tfo_bh_timeout * HZ; + if (time_before(jiffies, timeout)) return true; /* Mark check bit so we can check for successful active TFO diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b9dc2d6197be..a692626c19e4 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2965,7 +2965,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_comp_sack_nr = 44; net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock); - net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60; + net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 0; atomic_set(&net->ipv4.tfo_active_disable_times, 0); /* Reno is always built in */ diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 62cd4cd52e84..1a742b710e54 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -645,10 +645,12 @@ static struct sock *__udp4_lib_err_encap(struct net *net, const struct iphdr *iph, struct udphdr *uh, struct udp_table *udptable, + struct sock *sk, struct sk_buff *skb, u32 info) { + int (*lookup)(struct sock *sk, struct sk_buff *skb); int network_offset, transport_offset; - struct sock *sk; + struct udp_sock *up; network_offset = skb_network_offset(skb); transport_offset = skb_transport_offset(skb); @@ -659,18 +661,28 @@ static struct sock *__udp4_lib_err_encap(struct net *net, /* Transport header needs to point to the UDP header */ skb_set_transport_header(skb, iph->ihl << 2); + if (sk) { + up = udp_sk(sk); + + lookup = READ_ONCE(up->encap_err_lookup); + if (lookup && lookup(sk, skb)) + sk = NULL; + + goto out; + } + sk = __udp4_lib_lookup(net, iph->daddr, uh->source, iph->saddr, uh->dest, skb->dev->ifindex, 0, udptable, NULL); if (sk) { - int (*lookup)(struct sock *sk, struct sk_buff *skb); - struct udp_sock *up = udp_sk(sk); + up = udp_sk(sk); lookup = READ_ONCE(up->encap_err_lookup); if (!lookup || lookup(sk, skb)) sk = NULL; } +out: if (!sk) sk = ERR_PTR(__udp4_lib_err_encap_no_sk(skb, info)); @@ -707,15 +719,16 @@ int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) sk = __udp4_lib_lookup(net, iph->daddr, uh->dest, iph->saddr, uh->source, skb->dev->ifindex, inet_sdif(skb), udptable, NULL); + if (!sk || udp_sk(sk)->encap_type) { /* No socket for error: try tunnels before discarding */ - sk = ERR_PTR(-ENOENT); if (static_branch_unlikely(&udp_encap_needed_key)) { - sk = __udp4_lib_err_encap(net, iph, uh, udptable, skb, + sk = __udp4_lib_err_encap(net, iph, uh, udptable, sk, skb, info); if (!sk) return 0; - } + } else + sk = ERR_PTR(-ENOENT); if (IS_ERR(sk)) { __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c index 45b8782aec0c..9f5a5cdc38e6 100644 --- a/net/ipv4/udp_bpf.c +++ b/net/ipv4/udp_bpf.c @@ -134,7 +134,7 @@ static int __init udp_bpf_v4_build_proto(void) udp_bpf_rebuild_protos(&udp_bpf_prots[UDP_BPF_IPV4], &udp_prot); return 0; } -core_initcall(udp_bpf_v4_build_proto); +late_initcall(udp_bpf_v4_build_proto); int udp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) { diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 01bea76e3891..e1b9f7ac8bad 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -74,7 +74,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * if (likely(nskb)) { if (skb->sk) - skb_set_owner_w(skb, skb->sk); + skb_set_owner_w(nskb, skb->sk); consume_skb(skb); } else { kfree_skb(skb); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 7b756a7dc036..b6ddf23d3833 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3769,7 +3769,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, err = PTR_ERR(rt->fib6_metrics); /* Do not leave garbage there. */ rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics; - goto out; + goto out_free; } if (cfg->fc_flags & RTF_ADDRCONF) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 0cc7ba531b34..c5e15e94bb00 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -502,12 +502,14 @@ static struct sock *__udp6_lib_err_encap(struct net *net, const struct ipv6hdr *hdr, int offset, struct udphdr *uh, struct udp_table *udptable, + struct sock *sk, struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, __be32 info) { + int (*lookup)(struct sock *sk, struct sk_buff *skb); int network_offset, transport_offset; - struct sock *sk; + struct udp_sock *up; network_offset = skb_network_offset(skb); transport_offset = skb_transport_offset(skb); @@ -518,18 +520,28 @@ static struct sock *__udp6_lib_err_encap(struct net *net, /* Transport header needs to point to the UDP header */ skb_set_transport_header(skb, offset); + if (sk) { + up = udp_sk(sk); + + lookup = READ_ONCE(up->encap_err_lookup); + if (lookup && lookup(sk, skb)) + sk = NULL; + + goto out; + } + sk = __udp6_lib_lookup(net, &hdr->daddr, uh->source, &hdr->saddr, uh->dest, inet6_iif(skb), 0, udptable, skb); if (sk) { - int (*lookup)(struct sock *sk, struct sk_buff *skb); - struct udp_sock *up = udp_sk(sk); + up = udp_sk(sk); lookup = READ_ONCE(up->encap_err_lookup); if (!lookup || lookup(sk, skb)) sk = NULL; } +out: if (!sk) { sk = ERR_PTR(__udp6_lib_err_encap_no_sk(skb, opt, type, code, offset, info)); @@ -558,16 +570,17 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, inet6_iif(skb), inet6_sdif(skb), udptable, NULL); + if (!sk || udp_sk(sk)->encap_type) { /* No socket for error: try tunnels before discarding */ - sk = ERR_PTR(-ENOENT); if (static_branch_unlikely(&udpv6_encap_needed_key)) { sk = __udp6_lib_err_encap(net, hdr, offset, uh, - udptable, skb, + udptable, sk, skb, opt, type, code, info); if (!sk) return 0; - } + } else + sk = ERR_PTR(-ENOENT); if (IS_ERR(sk)) { __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c index 9115f8a7dd45..a8da88db7893 100644 --- a/net/netrom/nr_timer.c +++ b/net/netrom/nr_timer.c @@ -121,11 +121,9 @@ static void nr_heartbeat_expiry(struct timer_list *t) is accepted() it isn't 'dead' so doesn't get removed. */ if (sock_flag(sk, SOCK_DESTROY) || (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) { - sock_hold(sk); bh_unlock_sock(sk); nr_destroy_socket(sk); - sock_put(sk); - return; + goto out; } break; @@ -146,6 +144,8 @@ static void nr_heartbeat_expiry(struct timer_list *t) nr_start_heartbeat(sk); bh_unlock_sock(sk); +out: + sock_put(sk); } static void nr_t2timer_expiry(struct timer_list *t) @@ -159,6 +159,7 @@ static void nr_t2timer_expiry(struct timer_list *t) nr_enquiry_response(sk); } bh_unlock_sock(sk); + sock_put(sk); } static void nr_t4timer_expiry(struct timer_list *t) @@ -169,6 +170,7 @@ static void nr_t4timer_expiry(struct timer_list *t) bh_lock_sock(sk); nr_sk(sk)->condition &= ~NR_COND_PEER_RX_BUSY; bh_unlock_sock(sk); + sock_put(sk); } static void nr_idletimer_expiry(struct timer_list *t) @@ -197,6 +199,7 @@ static void nr_idletimer_expiry(struct timer_list *t) sock_set_flag(sk, SOCK_DEAD); } bh_unlock_sock(sk); + sock_put(sk); } static void nr_t1timer_expiry(struct timer_list *t) @@ -209,8 +212,7 @@ static void nr_t1timer_expiry(struct timer_list *t) case NR_STATE_1: if (nr->n2count == nr->n2) { nr_disconnect(sk, ETIMEDOUT); - bh_unlock_sock(sk); - return; + goto out; } else { nr->n2count++; nr_write_internal(sk, NR_CONNREQ); @@ -220,8 +222,7 @@ static void nr_t1timer_expiry(struct timer_list *t) case NR_STATE_2: if (nr->n2count == nr->n2) { nr_disconnect(sk, ETIMEDOUT); - bh_unlock_sock(sk); - return; + goto out; } else { nr->n2count++; nr_write_internal(sk, NR_DISCREQ); @@ -231,8 +232,7 @@ static void nr_t1timer_expiry(struct timer_list *t) case NR_STATE_3: if (nr->n2count == nr->n2) { nr_disconnect(sk, ETIMEDOUT); - bh_unlock_sock(sk); - return; + goto out; } else { nr->n2count++; nr_requeue_frames(sk); @@ -241,5 +241,7 @@ static void nr_t1timer_expiry(struct timer_list *t) } nr_start_t1timer(sk); +out: bh_unlock_sock(sk); + sock_put(sk); } diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index 81a1c67335be..8d17a543cc9f 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -6,6 +6,7 @@ */ #include <linux/module.h> +#include <linux/if_arp.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/skbuff.h> @@ -33,6 +34,13 @@ static int tcf_skbmod_act(struct sk_buff *skb, const struct tc_action *a, tcf_lastuse_update(&d->tcf_tm); bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb); + action = READ_ONCE(d->tcf_action); + if (unlikely(action == TC_ACT_SHOT)) + goto drop; + + if (!skb->dev || skb->dev->type != ARPHRD_ETHER) + return action; + /* XXX: if you are going to edit more fields beyond ethernet header * (example when you add IP header replacement or vlan swap) * then MAX_EDIT_LEN needs to change appropriately @@ -41,10 +49,6 @@ static int tcf_skbmod_act(struct sk_buff *skb, const struct tc_action *a, if (unlikely(err)) /* best policy is to drop on the floor */ goto drop; - action = READ_ONCE(d->tcf_action); - if (unlikely(action == TC_ACT_SHOT)) - goto drop; - p = rcu_dereference_bh(d->skbmod_p); flags = p->flags; if (flags & SKBMOD_F_DMAC) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index d73b5c5514a9..e3e79e9bd706 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -2904,7 +2904,7 @@ replay: break; case RTM_GETCHAIN: err = tc_chain_notify(chain, skb, n->nlmsg_seq, - n->nlmsg_seq, n->nlmsg_type, true); + n->nlmsg_flags, n->nlmsg_type, true); if (err < 0) NL_SET_ERR_MSG(extack, "Failed to send chain notify message"); break; diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 5b274534264c..e9a8a2c86bbd 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -278,6 +278,8 @@ static int tcindex_filter_result_init(struct tcindex_filter_result *r, TCA_TCINDEX_POLICE); } +static void tcindex_free_perfect_hash(struct tcindex_data *cp); + static void tcindex_partial_destroy_work(struct work_struct *work) { struct tcindex_data *p = container_of(to_rcu_work(work), @@ -285,7 +287,8 @@ static void tcindex_partial_destroy_work(struct work_struct *work) rwork); rtnl_lock(); - kfree(p->perfect); + if (p->perfect) + tcindex_free_perfect_hash(p); kfree(p); rtnl_unlock(); } diff --git a/net/sctp/auth.c b/net/sctp/auth.c index 6f8319b828b0..fe74c5f95630 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -860,6 +860,8 @@ int sctp_auth_set_key(struct sctp_endpoint *ep, if (replace) { list_del_init(&shkey->key_list); sctp_auth_shkey_release(shkey); + if (asoc && asoc->active_key_id == auth_key->sca_keynumber) + sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL); } list_add(&cur_key->key_list, sh_keys); diff --git a/net/sctp/output.c b/net/sctp/output.c index 9032ce60d50e..4dfb5ea82b05 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -104,8 +104,8 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag, if (asoc->param_flags & SPP_PMTUD_ENABLE) sctp_assoc_sync_pmtu(asoc); } else if (!sctp_transport_pl_enabled(tp) && - !sctp_transport_pmtu_check(tp)) { - if (asoc->param_flags & SPP_PMTUD_ENABLE) + asoc->param_flags & SPP_PMTUD_ENABLE) { + if (!sctp_transport_pmtu_check(tp)) sctp_assoc_sync_pmtu(asoc); } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index e64e01f61b11..6b937bfd4751 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4577,6 +4577,10 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, } if (optlen > 0) { + /* Trim it to the biggest size sctp sockopt may need if necessary */ + optlen = min_t(unsigned int, optlen, + PAGE_ALIGN(USHRT_MAX + + sizeof(__u16) * sizeof(struct sctp_reset_streams))); kopt = memdup_sockptr(optval, optlen); if (IS_ERR(kopt)) return PTR_ERR(kopt); diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 1828bba19020..dc6daa193557 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -222,6 +222,11 @@ int mount_bpffs_for_pin(const char *name) int err = 0; file = malloc(strlen(name) + 1); + if (!file) { + p_err("mem alloc failed"); + return -1; + } + strcpy(file, name); dir = dirname(file); diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c index 6365c7fd1262..bd6288302094 100644 --- a/tools/testing/selftests/net/nettest.c +++ b/tools/testing/selftests/net/nettest.c @@ -11,9 +11,11 @@ #include <sys/socket.h> #include <sys/wait.h> #include <linux/tcp.h> +#include <linux/udp.h> #include <arpa/inet.h> #include <net/if.h> #include <netinet/in.h> +#include <netinet/ip.h> #include <netdb.h> #include <fcntl.h> #include <libgen.h> @@ -27,6 +29,10 @@ #include <time.h> #include <errno.h> +#include <linux/xfrm.h> +#include <linux/ipsec.h> +#include <linux/pfkeyv2.h> + #ifndef IPV6_UNICAST_IF #define IPV6_UNICAST_IF 76 #endif @@ -114,6 +120,9 @@ struct sock_args { struct in_addr in; struct in6_addr in6; } expected_raddr; + + /* ESP in UDP encap test */ + int use_xfrm; }; static int server_mode; @@ -1346,6 +1355,41 @@ static int bind_socket(int sd, struct sock_args *args) return 0; } +static int config_xfrm_policy(int sd, struct sock_args *args) +{ + struct xfrm_userpolicy_info policy = {}; + int type = UDP_ENCAP_ESPINUDP; + int xfrm_af = IP_XFRM_POLICY; + int level = SOL_IP; + + if (args->type != SOCK_DGRAM) { + log_error("Invalid socket type. Only DGRAM could be used for XFRM\n"); + return 1; + } + + policy.action = XFRM_POLICY_ALLOW; + policy.sel.family = args->version; + if (args->version == AF_INET6) { + xfrm_af = IPV6_XFRM_POLICY; + level = SOL_IPV6; + } + + policy.dir = XFRM_POLICY_OUT; + if (setsockopt(sd, level, xfrm_af, &policy, sizeof(policy)) < 0) + return 1; + + policy.dir = XFRM_POLICY_IN; + if (setsockopt(sd, level, xfrm_af, &policy, sizeof(policy)) < 0) + return 1; + + if (setsockopt(sd, IPPROTO_UDP, UDP_ENCAP, &type, sizeof(type)) < 0) { + log_err_errno("Failed to set xfrm encap"); + return 1; + } + + return 0; +} + static int lsock_init(struct sock_args *args) { long flags; @@ -1389,6 +1433,11 @@ static int lsock_init(struct sock_args *args) if (fcntl(sd, F_SETFD, FD_CLOEXEC) < 0) log_err_errno("Failed to set close-on-exec flag"); + if (args->use_xfrm && config_xfrm_policy(sd, args)) { + log_err_errno("Failed to set xfrm policy"); + goto err; + } + out: return sd; @@ -1772,7 +1821,7 @@ static int ipc_parent(int cpid, int fd, struct sock_args *args) return client_status; } -#define GETOPT_STR "sr:l:c:p:t:g:P:DRn:M:X:m:d:I:BN:O:SCi6L:0:1:2:3:Fbq" +#define GETOPT_STR "sr:l:c:p:t:g:P:DRn:M:X:m:d:I:BN:O:SCi6xL:0:1:2:3:Fbq" static void print_usage(char *prog) { @@ -1795,6 +1844,7 @@ static void print_usage(char *prog) " -D|R datagram (D) / raw (R) socket (default stream)\n" " -l addr local address to bind to in server mode\n" " -c addr local address to bind to in client mode\n" + " -x configure XFRM policy on socket\n" "\n" " -d dev bind socket to given device name\n" " -I dev bind socket to given device name - server mode\n" @@ -1966,6 +2016,9 @@ int main(int argc, char *argv[]) case 'q': quiet = 1; break; + case 'x': + args.use_xfrm = 1; + break; default: print_usage(argv[0]); return 1; diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 64cd2e23c568..543ad7513a8e 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -118,6 +118,16 @@ # below for IPv6 doesn't apply here, because, on IPv4, administrative MTU # changes alone won't affect PMTU # +# - pmtu_vti4_udp_exception +# Same as pmtu_vti4_exception, but using ESP-in-UDP +# +# - pmtu_vti4_udp_routed_exception +# Set up vti tunnel on top of veth connected through routing namespace and +# add xfrm states and policies with ESP-in-UDP encapsulation. Check that +# route exception is not created if link layer MTU is not exceeded, then +# lower MTU on second part of routed environment and check that exception +# is created with the expected PMTU. +# # - pmtu_vti6_exception # Set up vti6 tunnel on top of veth, with xfrm states and policies, in two # namespaces with matching endpoints. Check that route exception is @@ -125,6 +135,13 @@ # decrease and increase MTU of tunnel, checking that route exception PMTU # changes accordingly # +# - pmtu_vti6_udp_exception +# Same as pmtu_vti6_exception, but using ESP-in-UDP +# +# - pmtu_vti6_udp_routed_exception +# Same as pmtu_vti6_udp_routed_exception but with routing between vti +# endpoints +# # - pmtu_vti4_default_mtu # Set up vti4 tunnel on top of veth, in two namespaces with matching # endpoints. Check that MTU assigned to vti interface is the MTU of the @@ -224,6 +241,10 @@ tests=" pmtu_ipv6_ipv6_exception IPv6 over IPv6: PMTU exceptions 1 pmtu_vti6_exception vti6: PMTU exceptions 0 pmtu_vti4_exception vti4: PMTU exceptions 0 + pmtu_vti6_udp_exception vti6: PMTU exceptions (ESP-in-UDP) 0 + pmtu_vti4_udp_exception vti4: PMTU exceptions (ESP-in-UDP) 0 + pmtu_vti6_udp_routed_exception vti6: PMTU exceptions, routed (ESP-in-UDP) 0 + pmtu_vti4_udp_routed_exception vti4: PMTU exceptions, routed (ESP-in-UDP) 0 pmtu_vti4_default_mtu vti4: default MTU assignment 0 pmtu_vti6_default_mtu vti6: default MTU assignment 0 pmtu_vti4_link_add_mtu vti4: MTU setting on link creation 0 @@ -246,7 +267,6 @@ ns_b="ip netns exec ${NS_B}" ns_c="ip netns exec ${NS_C}" ns_r1="ip netns exec ${NS_R1}" ns_r2="ip netns exec ${NS_R2}" - # Addressing and routing for tests with routers: four network segments, with # index SEGMENT between 1 and 4, a common prefix (PREFIX4 or PREFIX6) and an # identifier ID, which is 1 for hosts (A and B), 2 for routers (R1 and R2). @@ -279,7 +299,6 @@ routes=" A ${prefix6}:${b_r2}::1 ${prefix6}:${a_r2}::2 B default ${prefix6}:${b_r1}::2 " - USE_NH="no" # ns family nh id destination gateway nexthops=" @@ -326,6 +345,7 @@ dummy6_mask="64" err_buf= tcpdump_pids= +nettest_pids= err() { err_buf="${err_buf}${1} @@ -548,6 +568,14 @@ setup_vti6() { setup_vti 6 ${veth6_a_addr} ${veth6_b_addr} ${tunnel6_a_addr} ${tunnel6_b_addr} ${tunnel6_mask} } +setup_vti4routed() { + setup_vti 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 ${tunnel4_a_addr} ${tunnel4_b_addr} ${tunnel4_mask} +} + +setup_vti6routed() { + setup_vti 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 ${tunnel6_a_addr} ${tunnel6_b_addr} ${tunnel6_mask} +} + setup_vxlan_or_geneve() { type="${1}" a_addr="${2}" @@ -619,18 +647,36 @@ setup_xfrm() { proto=${1} veth_a_addr="${2}" veth_b_addr="${3}" + encap=${4} - run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel || return 1 - run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel + run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap} || return 1 + run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap} run_cmd ${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel run_cmd ${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel - run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel - run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel + run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap} + run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap} run_cmd ${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel run_cmd ${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel } +setup_nettest_xfrm() { + which nettest >/dev/null + if [ $? -ne 0 ]; then + echo "'nettest' command not found; skipping tests" + return 1 + fi + + [ ${1} -eq 6 ] && proto="-6" || proto="" + port=${2} + + run_cmd ${ns_a} nettest ${proto} -q -D -s -x -p ${port} -t 5 & + nettest_pids="${nettest_pids} $!" + + run_cmd ${ns_b} nettest ${proto} -q -D -s -x -p ${port} -t 5 & + nettest_pids="${nettest_pids} $!" +} + setup_xfrm4() { setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr} } @@ -639,6 +685,26 @@ setup_xfrm6() { setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} } +setup_xfrm4udp() { + setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr} "encap espinudp 4500 4500 0.0.0.0" + setup_nettest_xfrm 4 4500 +} + +setup_xfrm6udp() { + setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} "encap espinudp 4500 4500 0.0.0.0" + setup_nettest_xfrm 6 4500 +} + +setup_xfrm4udprouted() { + setup_xfrm 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "encap espinudp 4500 4500 0.0.0.0" + setup_nettest_xfrm 4 4500 +} + +setup_xfrm6udprouted() { + setup_xfrm 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "encap espinudp 4500 4500 0.0.0.0" + setup_nettest_xfrm 6 4500 +} + setup_routing_old() { for i in ${routes}; do [ "${ns}" = "" ] && ns="${i}" && continue @@ -823,6 +889,11 @@ cleanup() { done tcpdump_pids= + for pid in ${nettest_pids}; do + kill ${pid} + done + nettest_pids= + for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do ip netns del ${n} 2> /dev/null done @@ -1432,6 +1503,135 @@ test_pmtu_vti6_exception() { return ${fail} } +test_pmtu_vti4_udp_exception() { + setup namespaces veth vti4 xfrm4udp || return $ksft_skip + trace "${ns_a}" veth_a "${ns_b}" veth_b \ + "${ns_a}" vti4_a "${ns_b}" vti4_b + + veth_mtu=1500 + vti_mtu=$((veth_mtu - 20)) + + # UDP SPI SN IV ICV pad length next header + esp_payload_rfc4106=$((vti_mtu - 8 - 4 - 4 - 8 - 16 - 1 - 1)) + ping_payload=$((esp_payload_rfc4106 - 28)) + + mtu "${ns_a}" veth_a ${veth_mtu} + mtu "${ns_b}" veth_b ${veth_mtu} + mtu "${ns_a}" vti4_a ${vti_mtu} + mtu "${ns_b}" vti4_b ${vti_mtu} + + # Send DF packet without exceeding link layer MTU, check that no + # exception is created + run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr} + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" + check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1 + + # Now exceed link layer MTU by one byte, check that exception is created + # with the right PMTU value + run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload + 1)) ${tunnel4_b_addr} + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" + check_pmtu_value "${esp_payload_rfc4106}" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106 + 1)))" +} + +test_pmtu_vti6_udp_exception() { + setup namespaces veth vti6 xfrm6udp || return $ksft_skip + trace "${ns_a}" veth_a "${ns_b}" veth_b \ + "${ns_a}" vti6_a "${ns_b}" vti6_b + fail=0 + + # Create route exception by exceeding link layer MTU + mtu "${ns_a}" veth_a 4000 + mtu "${ns_b}" veth_b 4000 + mtu "${ns_a}" vti6_a 5000 + mtu "${ns_b}" vti6_b 5000 + run_cmd ${ns_a} ${ping6} -q -i 0.1 -w 1 -s 60000 ${tunnel6_b_addr} + + # Check that exception was created + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" + check_pmtu_value any "${pmtu}" "creating tunnel exceeding link layer MTU" || return 1 + + # Decrease tunnel MTU, check for PMTU decrease in route exception + mtu "${ns_a}" vti6_a 3000 + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" + check_pmtu_value "3000" "${pmtu}" "decreasing tunnel MTU" || fail=1 + + # Increase tunnel MTU, check for PMTU increase in route exception + mtu "${ns_a}" vti6_a 9000 + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" + check_pmtu_value "9000" "${pmtu}" "increasing tunnel MTU" || fail=1 + + return ${fail} +} + +test_pmtu_vti4_udp_routed_exception() { + setup namespaces routing vti4routed xfrm4udprouted || return $ksft_skip + trace "${ns_a}" veth_A-R1 "${ns_b}" veth_B-R1 \ + "${ns_a}" vti4_a "${ns_b}" vti4_b + + veth_mtu=1500 + vti_mtu=$((veth_mtu - 20)) + + # UDP SPI SN IV ICV pad length next header + esp_payload_rfc4106=$((vti_mtu - 8 - 4 - 4 - 8 - 16 - 1 - 1)) + ping_payload=$((esp_payload_rfc4106 - 28)) + + mtu "${ns_a}" veth_A-R1 ${veth_mtu} + mtu "${ns_r1}" veth_R1-A ${veth_mtu} + mtu "${ns_b}" veth_B-R1 ${veth_mtu} + mtu "${ns_r1}" veth_R1-B ${veth_mtu} + + mtu "${ns_a}" vti4_a ${vti_mtu} + mtu "${ns_b}" vti4_b ${vti_mtu} + + # Send DF packet without exceeding link layer MTU, check that no + # exception is created + run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr} + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" + check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1 + + # Now decrease link layer MTU by 8 bytes on R1, check that exception is created + # with the right PMTU value + mtu "${ns_r1}" veth_R1-B $((veth_mtu - 8)) + run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload)) ${tunnel4_b_addr} + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" + check_pmtu_value "$((esp_payload_rfc4106 - 8))" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106)))" +} + +test_pmtu_vti6_udp_routed_exception() { + setup namespaces routing vti6routed xfrm6udprouted || return $ksft_skip + trace "${ns_a}" veth_A-R1 "${ns_b}" veth_B-R1 \ + "${ns_a}" vti6_a "${ns_b}" vti6_b + + veth_mtu=1500 + vti_mtu=$((veth_mtu - 40)) + + # UDP SPI SN IV ICV pad length next header + esp_payload_rfc4106=$((vti_mtu - 8 - 4 - 4 - 8 - 16 - 1 - 1)) + ping_payload=$((esp_payload_rfc4106 - 48)) + + mtu "${ns_a}" veth_A-R1 ${veth_mtu} + mtu "${ns_r1}" veth_R1-A ${veth_mtu} + mtu "${ns_b}" veth_B-R1 ${veth_mtu} + mtu "${ns_r1}" veth_R1-B ${veth_mtu} + + # mtu "${ns_a}" vti6_a ${vti_mtu} + # mtu "${ns_b}" vti6_b ${vti_mtu} + + run_cmd ${ns_a} ${ping6} -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel6_b_addr} + + # Check that exception was not created + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" + check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1 + + # Now decrease link layer MTU by 8 bytes on R1, check that exception is created + # with the right PMTU value + mtu "${ns_r1}" veth_R1-B $((veth_mtu - 8)) + run_cmd ${ns_a} ${ping6} -q -M want -i 0.1 -w 1 -s $((ping_payload)) ${tunnel6_b_addr} + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" + check_pmtu_value "$((esp_payload_rfc4106 - 8))" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106)))" + +} + test_pmtu_vti4_default_mtu() { setup namespaces veth vti4 || return $ksft_skip |