diff options
author | Jakub Kicinski <kuba@kernel.org> | 2022-01-09 16:59:30 -0800 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2022-01-09 17:00:17 -0800 |
commit | 8aaaf2f3af2ae212428f4db1af34214225f5cec3 (patch) | |
tree | 43c7f4f8af2fad7919f169b0924dba5e43147d97 | |
parent | 208dd45d8d050360b46ded439a057bcc7cbf3b09 (diff) | |
parent | dd3ca4c5184ea98e40acb8eb293d85b88ea04ee2 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Merge in fixes directly in prep for the 5.17 merge window.
No conflicts.
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
48 files changed, 398 insertions, 145 deletions
diff --git a/drivers/net/amt.c b/drivers/net/amt.c index a1c7a8acd9c8..f1a36d7e2151 100644 --- a/drivers/net/amt.c +++ b/drivers/net/amt.c @@ -1107,7 +1107,7 @@ static bool amt_send_membership_query(struct amt_dev *amt, rt = ip_route_output_key(amt->net, &fl4); if (IS_ERR(rt)) { netdev_dbg(amt->dev, "no route to %pI4\n", &tunnel->ip4); - return -1; + return true; } amtmq = skb_push(skb, sizeof(*amtmq)); diff --git a/drivers/net/can/rcar/rcar_canfd.c b/drivers/net/can/rcar/rcar_canfd.c index 82cc71d5ee10..b7dc1c32875f 100644 --- a/drivers/net/can/rcar/rcar_canfd.c +++ b/drivers/net/can/rcar/rcar_canfd.c @@ -1633,8 +1633,7 @@ static int rcar_canfd_channel_probe(struct rcar_canfd_global *gpriv, u32 ch, ndev = alloc_candev(sizeof(*priv), RCANFD_FIFO_DEPTH); if (!ndev) { dev_err(&pdev->dev, "alloc_candev() failed\n"); - err = -ENOMEM; - goto fail; + return -ENOMEM; } priv = netdev_priv(ndev); @@ -1730,8 +1729,8 @@ static int rcar_canfd_channel_probe(struct rcar_canfd_global *gpriv, u32 ch, fail_candev: netif_napi_del(&priv->napi); - free_candev(ndev); fail: + free_candev(ndev); return err; } diff --git a/drivers/net/can/softing/softing_cs.c b/drivers/net/can/softing/softing_cs.c index 2e93ee792373..e5c939b63fa6 100644 --- a/drivers/net/can/softing/softing_cs.c +++ b/drivers/net/can/softing/softing_cs.c @@ -293,7 +293,7 @@ static int softingcs_probe(struct pcmcia_device *pcmcia) return 0; platform_failed: - kfree(dev); + platform_device_put(pdev); mem_failed: pcmcia_bad: pcmcia_failed: diff --git a/drivers/net/can/softing/softing_fw.c b/drivers/net/can/softing/softing_fw.c index 7e1536877993..32286f861a19 100644 --- a/drivers/net/can/softing/softing_fw.c +++ b/drivers/net/can/softing/softing_fw.c @@ -565,18 +565,19 @@ int softing_startstop(struct net_device *dev, int up) if (ret < 0) goto failed; } - /* enable_error_frame */ - /* + + /* enable_error_frame + * * Error reporting is switched off at the moment since * the receiving of them is not yet 100% verified * This should be enabled sooner or later - * - if (error_reporting) { + */ + if (0 && error_reporting) { ret = softing_fct_cmd(card, 51, "enable_error_frame"); if (ret < 0) goto failed; } - */ + /* initialize interface */ iowrite16(1, &card->dpram[DPRAM_FCT_PARAM + 2]); iowrite16(1, &card->dpram[DPRAM_FCT_PARAM + 4]); diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index 03b012963c20..b487e3fe770a 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -321,7 +321,7 @@ static void gs_usb_receive_bulk_callback(struct urb *urb) /* device reports out of range channel id */ if (hf->channel >= GS_MAX_INTF) - goto resubmit_urb; + goto device_detach; dev = usbcan->canch[hf->channel]; @@ -405,6 +405,7 @@ static void gs_usb_receive_bulk_callback(struct urb *urb) /* USB failure take down all interfaces */ if (rc == -ENODEV) { + device_detach: for (rc = 0; rc < GS_MAX_INTF; rc++) { if (usbcan->canch[rc]) netif_device_detach(usbcan->canch[rc]->netdev); @@ -506,6 +507,8 @@ static netdev_tx_t gs_can_start_xmit(struct sk_buff *skb, hf->echo_id = idx; hf->channel = dev->channel; + hf->flags = 0; + hf->reserved = 0; cf = (struct can_frame *)skb->data; diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c index ffca1cd3b384..1674b561c9a2 100644 --- a/drivers/net/can/xilinx_can.c +++ b/drivers/net/can/xilinx_can.c @@ -1758,7 +1758,12 @@ static int xcan_probe(struct platform_device *pdev) spin_lock_init(&priv->tx_lock); /* Get IRQ for the device */ - ndev->irq = platform_get_irq(pdev, 0); + ret = platform_get_irq(pdev, 0); + if (ret < 0) + goto err_free; + + ndev->irq = ret; + ndev->flags |= IFF_ECHO; /* We support local echo */ platform_set_drvdata(pdev, ndev); diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c index d9fc5c456bf3..39ae965cd4f6 100644 --- a/drivers/net/ethernet/freescale/fman/mac.c +++ b/drivers/net/ethernet/freescale/fman/mac.c @@ -94,14 +94,17 @@ static void mac_exception(void *handle, enum fman_mac_exceptions ex) __func__, ex); } -static void set_fman_mac_params(struct mac_device *mac_dev, - struct fman_mac_params *params) +static int set_fman_mac_params(struct mac_device *mac_dev, + struct fman_mac_params *params) { struct mac_priv_s *priv = mac_dev->priv; params->base_addr = (typeof(params->base_addr)) devm_ioremap(priv->dev, mac_dev->res->start, resource_size(mac_dev->res)); + if (!params->base_addr) + return -ENOMEM; + memcpy(¶ms->addr, mac_dev->addr, sizeof(mac_dev->addr)); params->max_speed = priv->max_speed; params->phy_if = mac_dev->phy_if; @@ -112,6 +115,8 @@ static void set_fman_mac_params(struct mac_device *mac_dev, params->event_cb = mac_exception; params->dev_id = mac_dev; params->internal_phy_node = priv->internal_phy_node; + + return 0; } static int tgec_initialization(struct mac_device *mac_dev) @@ -123,7 +128,9 @@ static int tgec_initialization(struct mac_device *mac_dev) priv = mac_dev->priv; - set_fman_mac_params(mac_dev, ¶ms); + err = set_fman_mac_params(mac_dev, ¶ms); + if (err) + goto _return; mac_dev->fman_mac = tgec_config(¶ms); if (!mac_dev->fman_mac) { @@ -169,7 +176,9 @@ static int dtsec_initialization(struct mac_device *mac_dev) priv = mac_dev->priv; - set_fman_mac_params(mac_dev, ¶ms); + err = set_fman_mac_params(mac_dev, ¶ms); + if (err) + goto _return; mac_dev->fman_mac = dtsec_config(¶ms); if (!mac_dev->fman_mac) { @@ -218,7 +227,9 @@ static int memac_initialization(struct mac_device *mac_dev) priv = mac_dev->priv; - set_fman_mac_params(mac_dev, ¶ms); + err = set_fman_mac_params(mac_dev, ¶ms); + if (err) + goto _return; if (priv->max_speed == SPEED_10000) params.phy_if = PHY_INTERFACE_MODE_XGMII; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/ptp.c b/drivers/net/ethernet/marvell/octeontx2/af/ptp.c index d6321de3cc17..e682b7bfde64 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/ptp.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/ptp.c @@ -60,6 +60,8 @@ struct ptp *ptp_get(void) /* Check driver is bound to PTP block */ if (!ptp) ptp = ERR_PTR(-EPROBE_DEFER); + else + pci_dev_get(ptp->pdev); return ptp; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c index 45357deecabb..a73a8017e0ee 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c @@ -172,14 +172,13 @@ static int cpt_10k_register_interrupts(struct rvu_block *block, int off) { struct rvu *rvu = block->rvu; int blkaddr = block->addr; - char irq_name[16]; int i, ret; for (i = CPT_10K_AF_INT_VEC_FLT0; i < CPT_10K_AF_INT_VEC_RVU; i++) { - snprintf(irq_name, sizeof(irq_name), "CPTAF FLT%d", i); + sprintf(&rvu->irq_name[(off + i) * NAME_SIZE], "CPTAF FLT%d", i); ret = rvu_cpt_do_register_interrupt(block, off + i, rvu_cpt_af_flt_intr_handler, - irq_name); + &rvu->irq_name[(off + i) * NAME_SIZE]); if (ret) goto err; rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1S(i), 0x1); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c index 70bacd38a6d9..d0ab8f233a02 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c @@ -41,7 +41,7 @@ static bool rvu_common_request_irq(struct rvu *rvu, int offset, struct rvu_devlink *rvu_dl = rvu->rvu_dl; int rc; - sprintf(&rvu->irq_name[offset * NAME_SIZE], name); + sprintf(&rvu->irq_name[offset * NAME_SIZE], "%s", name); rc = request_irq(pci_irq_vector(rvu->pdev, offset), fn, 0, &rvu->irq_name[offset * NAME_SIZE], rvu_dl); if (rc) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index 254bebffe8c1..925b74ebb8b0 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -684,7 +684,7 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id) err = register_netdev(netdev); if (err) { dev_err(dev, "Failed to register netdevice\n"); - goto err_detach_rsrc; + goto err_ptp_destroy; } err = otx2_wq_init(vf); @@ -709,6 +709,8 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id) err_unreg_netdev: unregister_netdev(netdev); +err_ptp_destroy: + otx2_ptp_destroy(vf); err_detach_rsrc: if (test_bit(CN10K_LMTST, &vf->hw.cap_flag)) qmem_free(vf->dev, vf->dync_lmt); @@ -742,6 +744,7 @@ static void otx2vf_remove(struct pci_dev *pdev) unregister_netdev(netdev); if (vf->otx2_wq) destroy_workqueue(vf->otx2_wq); + otx2_ptp_destroy(vf); otx2vf_disable_mbox_intr(vf); otx2_detach_resources(&vf->mbox); if (test_bit(CN10K_LMTST, &vf->hw.cap_flag)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index a46284ca5172..17fe05809653 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -148,8 +148,12 @@ static void cmd_ent_put(struct mlx5_cmd_work_ent *ent) if (!refcount_dec_and_test(&ent->refcnt)) return; - if (ent->idx >= 0) - cmd_free_index(ent->cmd, ent->idx); + if (ent->idx >= 0) { + struct mlx5_cmd *cmd = ent->cmd; + + cmd_free_index(cmd, ent->idx); + up(ent->page_queue ? &cmd->pages_sem : &cmd->sem); + } cmd_free_ent(ent); } @@ -900,25 +904,6 @@ static bool opcode_allowed(struct mlx5_cmd *cmd, u16 opcode) return cmd->allowed_opcode == opcode; } -static int cmd_alloc_index_retry(struct mlx5_cmd *cmd) -{ - unsigned long alloc_end = jiffies + msecs_to_jiffies(1000); - int idx; - -retry: - idx = cmd_alloc_index(cmd); - if (idx < 0 && time_before(jiffies, alloc_end)) { - /* Index allocation can fail on heavy load of commands. This is a temporary - * situation as the current command already holds the semaphore, meaning that - * another command completion is being handled and it is expected to release - * the entry index soon. - */ - cpu_relax(); - goto retry; - } - return idx; -} - bool mlx5_cmd_is_down(struct mlx5_core_dev *dev) { return pci_channel_offline(dev->pdev) || @@ -946,7 +931,7 @@ static void cmd_work_handler(struct work_struct *work) sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem; down(sem); if (!ent->page_queue) { - alloc_ret = cmd_alloc_index_retry(cmd); + alloc_ret = cmd_alloc_index(cmd); if (alloc_ret < 0) { mlx5_core_err_rl(dev, "failed to allocate command entry\n"); if (ent->callback) { @@ -1602,8 +1587,6 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force vector = vec & 0xffffffff; for (i = 0; i < (1 << cmd->log_sz); i++) { if (test_bit(i, &vector)) { - struct semaphore *sem; - ent = cmd->ent_arr[i]; /* if we already completed the command, ignore it */ @@ -1626,10 +1609,6 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) cmd_ent_put(ent); - if (ent->page_queue) - sem = &cmd->pages_sem; - else - sem = &cmd->sem; ent->ts2 = ktime_get_ns(); memcpy(ent->out->first.data, ent->lay->out, sizeof(ent->lay->out)); dump_command(dev, ent, 0); @@ -1683,7 +1662,6 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force */ complete(&ent->done); } - up(sem); } } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index 3c8851ee8172..9918ed8c059b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -1579,6 +1579,8 @@ mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv, struct net_device *fib_dev; fen_info = container_of(info, struct fib_entry_notifier_info, info); + if (fen_info->fi->nh) + return NULL; fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev; if (!fib_dev || fib_dev->netdev_ops != &mlx5e_netdev_ops || fen_info->dst_len != 32) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c index 7b562d2c8a19..279cd8f4e79f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c @@ -11,13 +11,13 @@ static int mlx5e_xsk_map_pool(struct mlx5e_priv *priv, { struct device *dev = mlx5_core_dma_dev(priv->mdev); - return xsk_pool_dma_map(pool, dev, 0); + return xsk_pool_dma_map(pool, dev, DMA_ATTR_SKIP_CPU_SYNC); } static void mlx5e_xsk_unmap_pool(struct mlx5e_priv *priv, struct xsk_buff_pool *pool) { - return xsk_pool_dma_unmap(pool, 0); + return xsk_pool_dma_unmap(pool, DMA_ATTR_SKIP_CPU_SYNC); } static int mlx5e_xsk_get_pools(struct mlx5e_xsk *xsk) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index d36489363a26..ac69e0aa09bf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4818,15 +4818,22 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) } if (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev)) { - netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; - netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL; - netdev->vlan_features |= NETIF_F_GSO_UDP_TUNNEL; + netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM; + netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM; + netdev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM; + netdev->vlan_features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM; } if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_GRE)) { - netdev->hw_features |= NETIF_F_GSO_GRE; - netdev->hw_enc_features |= NETIF_F_GSO_GRE; - netdev->gso_partial_features |= NETIF_F_GSO_GRE; + netdev->hw_features |= NETIF_F_GSO_GRE | + NETIF_F_GSO_GRE_CSUM; + netdev->hw_enc_features |= NETIF_F_GSO_GRE | + NETIF_F_GSO_GRE_CSUM; + netdev->gso_partial_features |= NETIF_F_GSO_GRE | + NETIF_F_GSO_GRE_CSUM; } if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_IPIP)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 8c0f4cfbe471..06d1f46f1688 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -50,6 +50,7 @@ #include "fs_core.h" #include "lib/mlx5.h" #include "lib/devcom.h" +#include "lib/vxlan.h" #define CREATE_TRACE_POINTS #include "diag/en_rep_tracepoint.h" #include "en_accel/ipsec.h" @@ -1039,6 +1040,7 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv) rtnl_lock(); if (netif_running(netdev)) mlx5e_open(netdev); + udp_tunnel_nic_reset_ntf(priv->netdev); netif_device_attach(netdev); rtnl_unlock(); } @@ -1060,6 +1062,7 @@ static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv) mlx5_notifier_unregister(mdev, &priv->events_nb); mlx5e_rep_tc_disable(priv); mlx5_lag_remove_netdev(mdev, priv->netdev); + mlx5_vxlan_reset_to_default(mdev->vxlan); } static MLX5E_DEFINE_STATS_GRP(sw_rep, 0); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 96e260fd7987..e86ccc22fb82 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -279,8 +279,8 @@ static inline int mlx5e_page_alloc_pool(struct mlx5e_rq *rq, if (unlikely(!dma_info->page)) return -ENOMEM; - dma_info->addr = dma_map_page(rq->pdev, dma_info->page, 0, - PAGE_SIZE, rq->buff.map_dir); + dma_info->addr = dma_map_page_attrs(rq->pdev, dma_info->page, 0, PAGE_SIZE, + rq->buff.map_dir, DMA_ATTR_SKIP_CPU_SYNC); if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) { page_pool_recycle_direct(rq->page_pool, dma_info->page); dma_info->page = NULL; @@ -301,7 +301,8 @@ static inline int mlx5e_page_alloc(struct mlx5e_rq *rq, void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info) { - dma_unmap_page(rq->pdev, dma_info->addr, PAGE_SIZE, rq->buff.map_dir); + dma_unmap_page_attrs(rq->pdev, dma_info->addr, PAGE_SIZE, rq->buff.map_dir, + DMA_ATTR_SKIP_CPU_SYNC); } void mlx5e_page_release_dynamic(struct mlx5e_rq *rq, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 0d09743b5f9a..3d908a7e1406 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1938,6 +1938,111 @@ u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer) return ip_version; } +/* Tunnel device follows RFC 6040, see include/net/inet_ecn.h. + * And changes inner ip_ecn depending on inner and outer ip_ecn as follows: + * +---------+----------------------------------------+ + * |Arriving | Arriving Outer Header | + * | Inner +---------+---------+---------+----------+ + * | Header | Not-ECT | ECT(0) | ECT(1) | CE | + * +---------+---------+---------+---------+----------+ + * | Not-ECT | Not-ECT | Not-ECT | Not-ECT | <drop> | + * | ECT(0) | ECT(0) | ECT(0) | ECT(1) | CE* | + * | ECT(1) | ECT(1) | ECT(1) | ECT(1)* | CE* | + * | CE | CE | CE | CE | CE | + * +---------+---------+---------+---------+----------+ + * + * Tc matches on inner after decapsulation on tunnel device, but hw offload matches + * the inner ip_ecn value before hardware decap action. + * + * Cells marked are changed from original inner packet ip_ecn value during decap, and + * so matching those values on inner ip_ecn before decap will fail. + * + * The following helper allows offload when inner ip_ecn won't be changed by outer ip_ecn, + * except for the outer ip_ecn = CE, where in all cases inner ip_ecn will be changed to CE, + * and such we can drop the inner ip_ecn=CE match. + */ + +static int mlx5e_tc_verify_tunnel_ecn(struct mlx5e_priv *priv, + struct flow_cls_offload *f, + bool *match_inner_ecn) +{ + u8 outer_ecn_mask = 0, outer_ecn_key = 0, inner_ecn_mask = 0, inner_ecn_key = 0; + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct flow_match_ip match; + + *match_inner_ecn = true; + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) { + flow_rule_match_enc_ip(rule, &match); + outer_ecn_key = match.key->tos & INET_ECN_MASK; + outer_ecn_mask = match.mask->tos & INET_ECN_MASK; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) { + flow_rule_match_ip(rule, &match); + inner_ecn_key = match.key->tos & INET_ECN_MASK; + inner_ecn_mask = match.mask->tos & INET_ECN_MASK; + } + + if (outer_ecn_mask != 0 && outer_ecn_mask != INET_ECN_MASK) { + NL_SET_ERR_MSG_MOD(extack, "Partial match on enc_tos ecn bits isn't supported"); + netdev_warn(priv->netdev, "Partial match on enc_tos ecn bits isn't supported"); + return -EOPNOTSUPP; + } + + if (!outer_ecn_mask) { + if (!inner_ecn_mask) + return 0; + + NL_SET_ERR_MSG_MOD(extack, + "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported"); + netdev_warn(priv->netdev, + "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported"); + return -EOPNOTSUPP; + } + + if (inner_ecn_mask && inner_ecn_mask != INET_ECN_MASK) { + NL_SET_ERR_MSG_MOD(extack, + "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported"); + netdev_warn(priv->netdev, + "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported"); + return -EOPNOTSUPP; + } + + if (!inner_ecn_mask) + return 0; + + /* Both inner and outer have full mask on ecn */ + + if (outer_ecn_key == INET_ECN_ECT_1) { + /* inner ecn might change by DECAP action */ + + NL_SET_ERR_MSG_MOD(extack, "Match on enc_tos ecn = ECT(1) isn't supported"); + netdev_warn(priv->netdev, "Match on enc_tos ecn = ECT(1) isn't supported"); + return -EOPNOTSUPP; + } + + if (outer_ecn_key != INET_ECN_CE) + return 0; + + if (inner_ecn_key != INET_ECN_CE) { + /* Can't happen in software, as packet ecn will be changed to CE after decap */ + NL_SET_ERR_MSG_MOD(extack, + "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported"); + netdev_warn(priv->netdev, + "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported"); + return -EOPNOTSUPP; + } + + /* outer ecn = CE, inner ecn = CE, as decap will change inner ecn to CE in anycase, + * drop match on inner ecn + */ + *match_inner_ecn = false; + + return 0; +} + static int parse_tunnel_attr(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec, @@ -2131,6 +2236,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, struct flow_rule *rule = flow_cls_offload_flow_rule(f); struct flow_dissector *dissector = rule->match.dissector; enum fs_flow_table_type fs_type; + bool match_inner_ecn = true; u16 addr_type = 0; u8 ip_proto = 0; u8 *match_level; @@ -2184,6 +2290,10 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, headers_c = get_match_inner_headers_criteria(spec); headers_v = get_match_inner_headers_value(spec); } + + err = mlx5e_tc_verify_tunnel_ecn(priv, f, &match_inner_ecn); + if (err) + return err; } err = mlx5e_flower_parse_meta(filter_dev, f); @@ -2407,10 +2517,12 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, struct flow_match_ip match; flow_rule_match_ip(rule, &match); - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, - match.mask->tos & 0x3); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, - match.key->tos & 0x3); + if (match_inner_ecn) { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, + match.mask->tos & 0x3); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, + match.key->tos & 0x3); + } MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, match.mask->tos >> 2); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 3bd5ae6ecd04..9a7b25692505 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -295,26 +295,28 @@ esw_setup_chain_src_port_rewrite(struct mlx5_flow_destination *dest, int *i) { struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; - int j, err; + int err; if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SRC_REWRITE)) return -EOPNOTSUPP; - for (j = esw_attr->split_count; j < esw_attr->out_count; j++, (*i)++) { - err = esw_setup_chain_dest(dest, flow_act, chains, attr->dest_chain, 1, 0, *i); - if (err) - goto err_setup_chain; + /* flow steering cannot handle more than one dest with the same ft + * in a single flow + */ + if (esw_attr->out_count - esw_attr->split_count > 1) + return -EOPNOTSUPP; - if (esw_attr->dests[j].pkt_reformat) { - flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; - flow_act->pkt_reformat = esw_attr->dests[j].pkt_reformat; - } + err = esw_setup_chain_dest(dest, flow_act, chains, attr->dest_chain, 1, 0, *i); + if (err) + return err; + + if (esw_attr->dests[esw_attr->split_count].pkt_reformat) { + flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + flow_act->pkt_reformat = esw_attr->dests[esw_attr->split_count].pkt_reformat; } - return 0; + (*i)++; -err_setup_chain: - esw_put_dest_tables_loop(esw, attr, esw_attr->split_count, j); - return err; + return 0; } static void esw_cleanup_chain_src_port_rewrite(struct mlx5_eswitch *esw, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c index bf4d3cbefa63..1ca01a5b6cdd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c @@ -268,10 +268,8 @@ static int mlx5_lag_fib_event(struct notifier_block *nb, fen_info = container_of(info, struct fib_entry_notifier_info, info); fi = fen_info->fi; - if (fi->nh) { - NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported"); - return notifier_from_errno(-EINVAL); - } + if (fi->nh) + return NOTIFY_DONE; fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev; if (fib_dev != ldev->pf[MLX5_LAG_P1].netdev && fib_dev != ldev->pf[MLX5_LAG_P2].netdev) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c index f37db7cc32a6..7da012ff0d41 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c @@ -30,10 +30,7 @@ bool mlx5_sf_dev_allocated(const struct mlx5_core_dev *dev) { struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table; - if (!mlx5_sf_dev_supported(dev)) - return false; - - return !xa_empty(&table->devices); + return table && !xa_empty(&table->devices); } static ssize_t sfnum_show(struct device *dev, struct device_attribute *attr, char *buf) diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c index 3e1ca7a8d029..bc70c6abd6a5 100644 --- a/drivers/net/ethernet/rocker/rocker_ofdpa.c +++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c @@ -2783,7 +2783,8 @@ static void ofdpa_fib4_abort(struct rocker *rocker) if (!ofdpa_port) continue; nh->fib_nh_flags &= ~RTNH_F_OFFLOAD; - ofdpa_flow_tbl_del(ofdpa_port, OFDPA_OP_FLAG_REMOVE, + ofdpa_flow_tbl_del(ofdpa_port, + OFDPA_OP_FLAG_REMOVE | OFDPA_OP_FLAG_NOWAIT, flow_entry); } spin_unlock_irqrestore(&ofdpa->flow_tbl_lock, flags); diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 9e52c5d2d77f..4a365f15533e 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -69,6 +69,8 @@ #define MPHDRLEN 6 /* multilink protocol header length */ #define MPHDRLEN_SSN 4 /* ditto with short sequence numbers */ +#define PPP_PROTO_LEN 2 + /* * An instance of /dev/ppp can be associated with either a ppp * interface unit or a ppp channel. In both cases, file->private_data @@ -498,6 +500,9 @@ static ssize_t ppp_write(struct file *file, const char __user *buf, if (!pf) return -ENXIO; + /* All PPP packets should start with the 2-byte protocol */ + if (count < PPP_PROTO_LEN) + return -EINVAL; ret = -ENOMEM; skb = alloc_skb(count + pf->hdrlen, GFP_KERNEL); if (!skb) @@ -1765,7 +1770,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb) } ++ppp->stats64.tx_packets; - ppp->stats64.tx_bytes += skb->len - 2; + ppp->stats64.tx_bytes += skb->len - PPP_PROTO_LEN; switch (proto) { case PPP_IP: diff --git a/drivers/net/usb/mcs7830.c b/drivers/net/usb/mcs7830.c index 326cc4e749d8..fdda0616704e 100644 --- a/drivers/net/usb/mcs7830.c +++ b/drivers/net/usb/mcs7830.c @@ -108,8 +108,16 @@ static const char driver_name[] = "MOSCHIP usb-ethernet driver"; static int mcs7830_get_reg(struct usbnet *dev, u16 index, u16 size, void *data) { - return usbnet_read_cmd(dev, MCS7830_RD_BREQ, MCS7830_RD_BMREQ, - 0x0000, index, data, size); + int ret; + + ret = usbnet_read_cmd(dev, MCS7830_RD_BREQ, MCS7830_RD_BMREQ, + 0x0000, index, data, size); + if (ret < 0) + return ret; + else if (ret < size) + return -ENODATA; + + return ret; } static int mcs7830_set_reg(struct usbnet *dev, u16 index, u16 size, const void *data) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index af64c7de9b53..bf11e1fbd69b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -288,7 +288,9 @@ struct tc_skb_ext { __u32 chain; __u16 mru; __u16 zone; - bool post_ct; + u8 post_ct:1; + u8 post_ct_snat:1; + u8 post_ct_dnat:1; }; #endif diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 9e71691c491b..9e7b21c0b3a6 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -197,7 +197,9 @@ struct tc_skb_cb { struct qdisc_skb_cb qdisc_cb; u16 mru; - bool post_ct; + u8 post_ct:1; + u8 post_ct_snat:1; + u8 post_ct_dnat:1; u16 zone; /* Only valid if post_ct = true */ }; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index d728d7b9a72e..fdb41e8bb626 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1168,7 +1168,7 @@ static inline int xfrm_route_forward(struct sk_buff *skb, unsigned short family) { struct net *net = dev_net(skb->dev); - if (xfrm_default_allow(net, XFRM_POLICY_FWD)) + if (xfrm_default_allow(net, XFRM_POLICY_OUT)) return !net->xfrm.policy_count[XFRM_POLICY_OUT] || (skb_dst(skb)->flags & DST_NOXFRM) || __xfrm_route_forward(skb, family); diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index cfca99e295b8..02f43f3e2c56 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -536,7 +536,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname, ax25_cb *ax25; struct net_device *dev; char devname[IFNAMSIZ]; - unsigned long opt; + unsigned int opt; int res = 0; if (level != SOL_AX25) @@ -568,7 +568,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname, break; case AX25_T1: - if (opt < 1 || opt > ULONG_MAX / HZ) { + if (opt < 1 || opt > UINT_MAX / HZ) { res = -EINVAL; break; } @@ -577,7 +577,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname, break; case AX25_T2: - if (opt < 1 || opt > ULONG_MAX / HZ) { + if (opt < 1 || opt > UINT_MAX / HZ) { res = -EINVAL; break; } @@ -593,7 +593,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname, break; case AX25_T3: - if (opt < 1 || opt > ULONG_MAX / HZ) { + if (opt < 1 || opt > UINT_MAX / HZ) { res = -EINVAL; break; } @@ -601,7 +601,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname, break; case AX25_IDLE: - if (opt > ULONG_MAX / (60 * HZ)) { + if (opt > UINT_MAX / (60 * HZ)) { res = -EINVAL; break; } diff --git a/net/can/isotp.c b/net/can/isotp.c index df6968b28bf4..02cbcb2ecf0d 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -119,8 +119,8 @@ enum { }; struct tpcon { - int idx; - int len; + unsigned int idx; + unsigned int len; u32 state; u8 bs; u8 sn; diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 8fd1aba8af31..b518f20c9a24 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -520,8 +520,11 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par) if (IS_ERR(config)) return PTR_ERR(config); } - } else if (memcmp(&config->clustermac, &cipinfo->clustermac, ETH_ALEN)) + } else if (memcmp(&config->clustermac, &cipinfo->clustermac, ETH_ALEN)) { + clusterip_config_entry_put(config); + clusterip_config_put(config); return -EINVAL; + } ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) { diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 110839a88bc2..8753e9cec326 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -755,6 +755,7 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, fl6->daddr = key->u.ipv6.dst; fl6->flowlabel = key->label; fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL); + fl6->fl6_gre_key = tunnel_id_to_key32(key->tun_id); dsfield = key->tos; flags = key->tun_flags & @@ -990,6 +991,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, fl6.daddr = key->u.ipv6.dst; fl6.flowlabel = key->label; fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + fl6.fl6_gre_key = tunnel_id_to_key32(key->tun_id); dsfield = key->tos; if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT)) @@ -1098,6 +1100,7 @@ static void ip6gre_tnl_link_config_common(struct ip6_tnl *t) fl6->flowi6_oif = p->link; fl6->flowlabel = 0; fl6->flowi6_proto = IPPROTO_GRE; + fl6->fl6_gre_key = t->parms.o_key; if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS)) fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo; @@ -1544,7 +1547,7 @@ static void ip6gre_fb_tunnel_init(struct net_device *dev) static struct inet6_protocol ip6gre_protocol __read_mostly = { .handler = gre_rcv, .err_handler = ip6gre_err, - .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, + .flags = INET6_PROTO_FINAL, }; static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 8ed2d9f4a84d..645dd984fef0 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -845,10 +845,13 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, if (mptcp_established_options_mp(sk, skb, snd_data_fin, &opt_size, remaining, opts)) ret = true; else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, remaining, opts)) { + unsigned int mp_fail_size; + ret = true; - if (mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) { - *size += opt_size; - remaining -= opt_size; + if (mptcp_established_options_mp_fail(sk, &mp_fail_size, + remaining - opt_size, opts)) { + *size += opt_size + mp_fail_size; + remaining -= opt_size - mp_fail_size; return true; } } @@ -1331,6 +1334,7 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, put_unaligned_be32(mpext->data_len << 16 | TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); } + ptr += 1; } } else if (OPTIONS_MPTCP_MPC & opts->suboptions) { u8 len, flag = MPTCP_CAP_HMAC_SHA256; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 62d418813503..f60f01b14fac 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -966,7 +966,9 @@ static void __mptcp_mem_reclaim_partial(struct sock *sk) lockdep_assert_held_once(&sk->sk_lock.slock); - __mptcp_rmem_reclaim(sk, reclaimable - 1); + if (reclaimable > SK_MEM_QUANTUM) + __mptcp_rmem_reclaim(sk, reclaimable - 1); + sk_mem_reclaim_partial(sk); } diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 10f422caa76a..940fed9a760b 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -577,6 +577,9 @@ static int nft_payload_l4csum_offset(const struct nft_pktinfo *pkt, struct sk_buff *skb, unsigned int *l4csum_offset) { + if (pkt->fragoff) + return -1; + switch (pkt->tprot) { case IPPROTO_TCP: *l4csum_offset = offsetof(struct tcphdr, check); diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index dce866d93fee..2c8051d8cca6 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1290,6 +1290,11 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old) if (!new->scratch_aligned) goto out_scratch; #endif + for_each_possible_cpu(i) + *per_cpu_ptr(new->scratch, i) = NULL; + + if (pipapo_realloc_scratch(new, old->bsize_max)) + goto out_scratch_realloc; rcu_head_init(&new->rcu); @@ -1334,6 +1339,9 @@ out_lt: kvfree(dst->lt); dst--; } +out_scratch_realloc: + for_each_possible_cpu(i) + kfree(*per_cpu_ptr(new->scratch, i)); #ifdef NFT_PIPAPO_ALIGN free_percpu(new->scratch_aligned); #endif diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index f1ba7dd3d253..fa9dc2ba3941 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -298,7 +298,7 @@ static int nr_setsockopt(struct socket *sock, int level, int optname, { struct sock *sk = sock->sk; struct nr_sock *nr = nr_sk(sk); - unsigned long opt; + unsigned int opt; if (level != SOL_NETROM) return -ENOPROTOOPT; @@ -306,18 +306,18 @@ static int nr_setsockopt(struct socket *sock, int level, int optname, if (optlen < sizeof(unsigned int)) return -EINVAL; - if (copy_from_sockptr(&opt, optval, sizeof(unsigned long))) + if (copy_from_sockptr(&opt, optval, sizeof(opt))) return -EFAULT; switch (optname) { case NETROM_T1: - if (opt < 1 || opt > ULONG_MAX / HZ) + if (opt < 1 || opt > UINT_MAX / HZ) return -EINVAL; nr->t1 = opt * HZ; return 0; case NETROM_T2: - if (opt < 1 || opt > ULONG_MAX / HZ) + if (opt < 1 || opt > UINT_MAX / HZ) return -EINVAL; nr->t2 = opt * HZ; return 0; @@ -329,13 +329,13 @@ static int nr_setsockopt(struct socket *sock, int level, int optname, return 0; case NETROM_T4: - if (opt < 1 || opt > ULONG_MAX / HZ) + if (opt < 1 || opt > UINT_MAX / HZ) return -EINVAL; nr->t4 = opt * HZ; return 0; case NETROM_IDLE: - if (opt > ULONG_MAX / (60 * HZ)) + if (opt > UINT_MAX / (60 * HZ)) return -EINVAL; nr->idle = opt * 60 * HZ; return 0; diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 6d262d9aa10e..02096f2ec678 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -859,7 +859,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) struct tc_skb_ext *tc_ext; #endif - bool post_ct = false; + bool post_ct = false, post_ct_snat = false, post_ct_dnat = false; int res, err; u16 zone = 0; @@ -900,6 +900,8 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, key->recirc_id = tc_ext ? tc_ext->chain : 0; OVS_CB(skb)->mru = tc_ext ? tc_ext->mru : 0; post_ct = tc_ext ? tc_ext->post_ct : false; + post_ct_snat = post_ct ? tc_ext->post_ct_snat : false; + post_ct_dnat = post_ct ? tc_ext->post_ct_dnat : false; zone = post_ct ? tc_ext->zone : 0; } else { key->recirc_id = 0; @@ -911,8 +913,16 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, err = key_extract(skb, key); if (!err) { ovs_ct_fill_key(skb, key, post_ct); /* Must be after key_extract(). */ - if (post_ct && !skb_get_nfct(skb)) - key->ct_zone = zone; + if (post_ct) { + if (!skb_get_nfct(skb)) { + key->ct_zone = zone; + } else { + if (!post_ct_dnat) + key->ct_state &= ~OVS_CS_F_DST_NAT; + if (!post_ct_snat) + key->ct_state &= ~OVS_CS_F_SRC_NAT; + } + } } return err; } diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index e986f2dc8d61..f99247fc6468 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -856,6 +856,12 @@ static int ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, } err = nf_nat_packet(ct, ctinfo, hooknum, skb); + if (err == NF_ACCEPT) { + if (maniptype == NF_NAT_MANIP_SRC) + tc_skb_cb(skb)->post_ct_snat = 1; + if (maniptype == NF_NAT_MANIP_DST) + tc_skb_cb(skb)->post_ct_dnat = 1; + } out: return err; } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index a53c72e6d944..d4e27c679123 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1625,6 +1625,8 @@ int tcf_classify(struct sk_buff *skb, ext->chain = last_executed_chain; ext->mru = cb->mru; ext->post_ct = cb->post_ct; + ext->post_ct_snat = cb->post_ct_snat; + ext->post_ct_dnat = cb->post_ct_dnat; ext->zone = cb->zone; } diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 5ea86b044214..aa3bcaaeabf7 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -632,10 +632,12 @@ static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code, static void smc_conn_abort(struct smc_sock *smc, int local_first) { + struct smc_connection *conn = &smc->conn; + struct smc_link_group *lgr = conn->lgr; + + smc_conn_free(conn); if (local_first) - smc_lgr_cleanup_early(&smc->conn); - else - smc_conn_free(&smc->conn); + smc_lgr_cleanup_early(lgr); } /* check if there is a rdma device available for this connection. */ diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 4ecf1c702b57..8935ef4811b0 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -171,8 +171,10 @@ static int smc_lgr_register_conn(struct smc_connection *conn, bool first) if (!conn->lgr->is_smcd) { rc = smcr_lgr_conn_assign_link(conn, first); - if (rc) + if (rc) { + conn->lgr = NULL; return rc; + } } /* find a new alert_token_local value not yet used by some connection * in this link group @@ -625,15 +627,13 @@ int smcd_nl_get_lgr(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } -void smc_lgr_cleanup_early(struct smc_connection *conn) +void smc_lgr_cleanup_early(struct smc_link_group *lgr) { - struct smc_link_group *lgr = conn->lgr; spinlock_t *lgr_lock; if (!lgr) return; - smc_conn_free(conn); smc_lgr_list_head(lgr, &lgr_lock); spin_lock_bh(lgr_lock); /* do not use this link group for new connections */ @@ -1851,8 +1851,10 @@ create: write_lock_bh(&lgr->conns_lock); rc = smc_lgr_register_conn(conn, true); write_unlock_bh(&lgr->conns_lock); - if (rc) + if (rc) { + smc_lgr_cleanup_early(lgr); goto out; + } } conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index e778f97da700..521c64a3d8d3 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -485,7 +485,7 @@ static inline void smc_set_pci_values(struct pci_dev *pci_dev, struct smc_sock; struct smc_clc_msg_accept_confirm; -void smc_lgr_cleanup_early(struct smc_connection *conn); +void smc_lgr_cleanup_early(struct smc_link_group *lgr); void smc_lgr_terminate_sched(struct smc_link_group *lgr); void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport); void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport); diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index 41de46b5ffa9..57448fc519fc 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -637,11 +637,16 @@ static int xfrmi_newlink(struct net *src_net, struct net_device *dev, struct netlink_ext_ack *extack) { struct net *net = dev_net(dev); - struct xfrm_if_parms p; + struct xfrm_if_parms p = {}; struct xfrm_if *xi; int err; xfrmi_netlink_parms(data, &p); + if (!p.if_id) { + NL_SET_ERR_MSG(extack, "if_id must be non zero"); + return -EINVAL; + } + xi = xfrmi_locate(net, &p); if (xi) return -EEXIST; @@ -666,7 +671,12 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[], { struct xfrm_if *xi = netdev_priv(dev); struct net *net = xi->net; - struct xfrm_if_parms p; + struct xfrm_if_parms p = {}; + + if (!p.if_id) { + NL_SET_ERR_MSG(extack, "if_id must be non zero"); + return -EINVAL; + } xfrmi_netlink_parms(data, &p); xi = xfrmi_locate(net, &p); diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 3585bfc302f9..d4935b3b9983 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -648,10 +648,12 @@ static int xfrm_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb * This requires hardware to know the inner packet type to calculate * the inner header checksum. Save inner ip protocol here to avoid * traversing the packet in the vendor's xmit code. - * If the encap type is IPIP, just save skb->inner_ipproto. Otherwise, - * get the ip protocol from the IP header. + * For IPsec tunnel mode save the ip protocol from the IP header of the + * plain text packet. Otherwise If the encap type is IPIP, just save + * skb->inner_ipproto in any other case get the ip protocol from the IP + * header. */ -static void xfrm_get_inner_ipproto(struct sk_buff *skb) +static void xfrm_get_inner_ipproto(struct sk_buff *skb, struct xfrm_state *x) { struct xfrm_offload *xo = xfrm_offload(skb); const struct ethhdr *eth; @@ -659,6 +661,25 @@ static void xfrm_get_inner_ipproto(struct sk_buff *skb) if (!xo) return; + if (x->outer_mode.encap == XFRM_MODE_TUNNEL) { + switch (x->outer_mode.family) { + case AF_INET: + xo->inner_ipproto = ip_hdr(skb)->protocol; + break; + case AF_INET6: + xo->inner_ipproto = ipv6_hdr(skb)->nexthdr; + break; + default: + break; + } + + return; + } + + /* non-Tunnel Mode */ + if (!skb->encapsulation) + return; + if (skb->inner_protocol_type == ENCAP_TYPE_IPPROTO) { xo->inner_ipproto = skb->inner_ipproto; return; @@ -713,8 +734,7 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb) sp->xvec[sp->len++] = x; xfrm_state_hold(x); - if (skb->encapsulation) - xfrm_get_inner_ipproto(skb); + xfrm_get_inner_ipproto(skb, x); skb->encapsulation = 1; if (skb_is_gso(skb)) { diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 9341298b2a70..dccb8f3318ef 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -33,6 +33,7 @@ #include <net/flow.h> #include <net/xfrm.h> #include <net/ip.h> +#include <net/gre.h> #if IS_ENABLED(CONFIG_IPV6_MIP6) #include <net/mip6.h> #endif @@ -3421,6 +3422,26 @@ decode_session6(struct sk_buff *skb, struct flowi *fl, bool reverse) } fl6->flowi6_proto = nexthdr; return; + case IPPROTO_GRE: + if (!onlyproto && + (nh + offset + 12 < skb->data || + pskb_may_pull(skb, nh + offset + 12 - skb->data))) { + struct gre_base_hdr *gre_hdr; + __be32 *gre_key; + + nh = skb_network_header(skb); + gre_hdr = (struct gre_base_hdr *)(nh + offset); + gre_key = (__be32 *)(gre_hdr + 1); + + if (gre_hdr->flags & GRE_KEY) { + if (gre_hdr->flags & GRE_CSUM) + gre_key++; + fl6->fl6_gre_key = *gre_key; + } + } + fl6->flowi6_proto = nexthdr; + return; + #if IS_ENABLED(CONFIG_IPV6_MIP6) case IPPROTO_MH: offset += ipv6_optlen(exthdr); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 6228c0089b78..8cd6c8129004 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -630,8 +630,13 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, xfrm_smark_init(attrs, &x->props.smark); - if (attrs[XFRMA_IF_ID]) + if (attrs[XFRMA_IF_ID]) { x->if_id = nla_get_u32(attrs[XFRMA_IF_ID]); + if (!x->if_id) { + err = -EINVAL; + goto error; + } + } err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV]); if (err) @@ -1427,8 +1432,13 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, mark = xfrm_mark_get(attrs, &m); - if (attrs[XFRMA_IF_ID]) + if (attrs[XFRMA_IF_ID]) { if_id = nla_get_u32(attrs[XFRMA_IF_ID]); + if (!if_id) { + err = -EINVAL; + goto out_noput; + } + } if (p->info.seq) { x = xfrm_find_acq_byseq(net, mark, p->info.seq); @@ -1741,8 +1751,13 @@ static struct xfrm_policy *xfrm_policy_construct(struct net *net, struct xfrm_us xfrm_mark_get(attrs, &xp->mark); - if (attrs[XFRMA_IF_ID]) + if (attrs[XFRMA_IF_ID]) { xp->if_id = nla_get_u32(attrs[XFRMA_IF_ID]); + if (!xp->if_id) { + err = -EINVAL; + goto error; + } + } return xp; error: @@ -3072,7 +3087,7 @@ static inline unsigned int xfrm_sa_len(struct xfrm_state *x) if (x->props.extra_flags) l += nla_total_size(sizeof(x->props.extra_flags)); if (x->xso.dev) - l += nla_total_size(sizeof(x->xso)); + l += nla_total_size(sizeof(struct xfrm_user_offload)); if (x->props.smark.v | x->props.smark.m) { l += nla_total_size(sizeof(x->props.smark.v)); l += nla_total_size(sizeof(x->props.smark.m)); diff --git a/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh b/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh index 4a6f5c3b3215..eb9553e4986b 100755 --- a/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh +++ b/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh @@ -41,7 +41,7 @@ checktool (){ checktool "iptables --version" "run test without iptables" checktool "ip -Version" "run test without ip tool" -checktool "which nc" "run test without nc (netcat)" +checktool "which socat" "run test without socat" checktool "ip netns add ${r_a}" "create net namespace" for n in ${r_b} ${r_w} ${c_a} ${c_b};do @@ -60,11 +60,12 @@ trap cleanup EXIT test_path() { msg="$1" - ip netns exec ${c_b} nc -n -w 3 -q 3 -u -l -p 5000 > ${rx} < /dev/null & + ip netns exec ${c_b} socat -t 3 - udp4-listen:5000,reuseaddr > ${rx} < /dev/null & sleep 1 for i in 1 2 3; do - head -c1400 /dev/zero | tr "\000" "a" | ip netns exec ${c_a} nc -n -w 1 -u 192.168.20.2 5000 + head -c1400 /dev/zero | tr "\000" "a" | \ + ip netns exec ${c_a} socat -t 1 -u STDIN UDP:192.168.20.2:5000 done wait @@ -189,7 +190,7 @@ ip netns exec ${r_w} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null #--------------------- #Now we send a 1400 bytes UDP packet from Client A to Client B: -# clienta:~# head -c1400 /dev/zero | tr "\000" "a" | nc -u 192.168.20.2 5000 +# clienta:~# head -c1400 /dev/zero | tr "\000" "a" | socat -u STDIN UDP:192.168.20.2:5000 test_path "without" # The IPv4 stack on Client A already knows the PMTU to Client B, so the diff --git a/tools/testing/selftests/netfilter/nf_nat_edemux.sh b/tools/testing/selftests/netfilter/nf_nat_edemux.sh index cfee3b65be0f..1092bbcb1fba 100755 --- a/tools/testing/selftests/netfilter/nf_nat_edemux.sh +++ b/tools/testing/selftests/netfilter/nf_nat_edemux.sh @@ -76,23 +76,23 @@ ip netns exec $ns2 ip route add 10.96.0.1 via 192.168.1.1 sleep 1 # add a persistent connection from the other namespace -ip netns exec $ns2 nc -q 10 -w 10 192.168.1.1 5201 > /dev/null & +ip netns exec $ns2 socat -t 10 - TCP:192.168.1.1:5201 > /dev/null & sleep 1 # ip daddr:dport will be rewritten to 192.168.1.1 5201 # NAT must reallocate source port 10000 because # 192.168.1.2:10000 -> 192.168.1.1:5201 is already in use -echo test | ip netns exec $ns2 nc -w 3 -q 3 10.96.0.1 443 >/dev/null +echo test | ip netns exec $ns2 socat -t 3 -u STDIN TCP:10.96.0.1:443 >/dev/null ret=$? kill $iperfs -# Check nc can connect to 10.96.0.1:443 (aka 192.168.1.1:5201). +# Check socat can connect to 10.96.0.1:443 (aka 192.168.1.1:5201). if [ $ret -eq 0 ]; then - echo "PASS: nc can connect via NAT'd address" + echo "PASS: socat can connect via NAT'd address" else - echo "FAIL: nc cannot connect via NAT'd address" + echo "FAIL: socat cannot connect via NAT'd address" exit 1 fi |