diff options
112 files changed, 940 insertions, 454 deletions
diff --git a/Documentation/devicetree/bindings/net/dsa/b53.txt b/Documentation/devicetree/bindings/net/dsa/b53.txt index 5201bc15fdd6..cfd1afdc6e94 100644 --- a/Documentation/devicetree/bindings/net/dsa/b53.txt +++ b/Documentation/devicetree/bindings/net/dsa/b53.txt @@ -110,6 +110,9 @@ Ethernet switch connected via MDIO to the host, CPU port wired to eth0: #size-cells = <0>; ports { + #address-cells = <1>; + #size-cells = <0>; + port0@0 { reg = <0>; label = "lan1"; diff --git a/drivers/net/can/ifi_canfd/ifi_canfd.c b/drivers/net/can/ifi_canfd/ifi_canfd.c index 04d59bede5ea..74503cacf594 100644 --- a/drivers/net/can/ifi_canfd/ifi_canfd.c +++ b/drivers/net/can/ifi_canfd/ifi_canfd.c @@ -947,8 +947,11 @@ static int ifi_canfd_plat_probe(struct platform_device *pdev) u32 id, rev; addr = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(addr)) + return PTR_ERR(addr); + irq = platform_get_irq(pdev, 0); - if (IS_ERR(addr) || irq < 0) + if (irq < 0) return -EINVAL; id = readl(addr + IFI_CANFD_IP_ID); diff --git a/drivers/net/can/sun4i_can.c b/drivers/net/can/sun4i_can.c index e3ba8ab0cbf4..e2c6cf4b2228 100644 --- a/drivers/net/can/sun4i_can.c +++ b/drivers/net/can/sun4i_can.c @@ -792,7 +792,7 @@ static int sun4ican_probe(struct platform_device *pdev) addr = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(addr)) { - err = -EBUSY; + err = PTR_ERR(addr); goto exit; } diff --git a/drivers/net/dsa/b53/b53_srab.c b/drivers/net/dsa/b53/b53_srab.c index 0a1be5259be0..38cd8285ac67 100644 --- a/drivers/net/dsa/b53/b53_srab.c +++ b/drivers/net/dsa/b53/b53_srab.c @@ -609,7 +609,7 @@ static int b53_srab_probe(struct platform_device *pdev) priv->regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(priv->regs)) - return -ENOMEM; + return PTR_ERR(priv->regs); dev = b53_switch_alloc(&pdev->dev, &b53_srab_ops, priv); if (!dev) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 5c444cd722bd..34e4aadfa705 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -628,11 +628,8 @@ mt7530_cpu_port_enable(struct mt7530_priv *priv, mt7530_write(priv, MT7530_PVC_P(port), PORT_SPEC_TAG); - /* Disable auto learning on the cpu port */ - mt7530_set(priv, MT7530_PSC_P(port), SA_DIS); - - /* Unknown unicast frame fordwarding to the cpu port */ - mt7530_set(priv, MT7530_MFC, UNU_FFP(BIT(port))); + /* Unknown multicast frame forwarding to the cpu port */ + mt7530_rmw(priv, MT7530_MFC, UNM_FFP_MASK, UNM_FFP(BIT(port))); /* Set CPU port number */ if (priv->id == ID_MT7621) @@ -1294,8 +1291,6 @@ mt7530_setup(struct dsa_switch *ds) /* Enable and reset MIB counters */ mt7530_mib_reset(ds); - mt7530_clear(priv, MT7530_MFC, UNU_FFP_MASK); - for (i = 0; i < MT7530_NUM_PORTS; i++) { /* Disable forwarding by default on all ports */ mt7530_rmw(priv, MT7530_PCR_P(i), PCR_MATRIX_MASK, diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h index 979bb6374678..82af4d2d406e 100644 --- a/drivers/net/dsa/mt7530.h +++ b/drivers/net/dsa/mt7530.h @@ -31,6 +31,7 @@ enum { #define MT7530_MFC 0x10 #define BC_FFP(x) (((x) & 0xff) << 24) #define UNM_FFP(x) (((x) & 0xff) << 16) +#define UNM_FFP_MASK UNM_FFP(~0) #define UNU_FFP(x) (((x) & 0xff) << 8) #define UNU_FFP_MASK UNU_FFP(~0) #define CPU_EN BIT(7) diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index e2c6bf0e430e..e8aae64db1ca 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -388,6 +388,7 @@ static int felix_init_structs(struct felix *felix, int num_phys_ports) struct ocelot *ocelot = &felix->ocelot; phy_interface_t *port_phy_modes; resource_size_t switch_base; + struct resource res; int port, i, err; ocelot->num_phys_ports = num_phys_ports; @@ -422,17 +423,16 @@ static int felix_init_structs(struct felix *felix, int num_phys_ports) for (i = 0; i < TARGET_MAX; i++) { struct regmap *target; - struct resource *res; if (!felix->info->target_io_res[i].name) continue; - res = &felix->info->target_io_res[i]; - res->flags = IORESOURCE_MEM; - res->start += switch_base; - res->end += switch_base; + memcpy(&res, &felix->info->target_io_res[i], sizeof(res)); + res.flags = IORESOURCE_MEM; + res.start += switch_base; + res.end += switch_base; - target = ocelot_regmap_init(ocelot, res); + target = ocelot_regmap_init(ocelot, &res); if (IS_ERR(target)) { dev_err(ocelot->dev, "Failed to map device memory space\n"); @@ -453,7 +453,6 @@ static int felix_init_structs(struct felix *felix, int num_phys_ports) for (port = 0; port < num_phys_ports; port++) { struct ocelot_port *ocelot_port; void __iomem *port_regs; - struct resource *res; ocelot_port = devm_kzalloc(ocelot->dev, sizeof(struct ocelot_port), @@ -465,12 +464,12 @@ static int felix_init_structs(struct felix *felix, int num_phys_ports) return -ENOMEM; } - res = &felix->info->port_io_res[port]; - res->flags = IORESOURCE_MEM; - res->start += switch_base; - res->end += switch_base; + memcpy(&res, &felix->info->port_io_res[port], sizeof(res)); + res.flags = IORESOURCE_MEM; + res.start += switch_base; + res.end += switch_base; - port_regs = devm_ioremap_resource(ocelot->dev, res); + port_regs = devm_ioremap_resource(ocelot->dev, &res); if (IS_ERR(port_regs)) { dev_err(ocelot->dev, "failed to map registers for port %d\n", port); diff --git a/drivers/net/dsa/ocelot/felix.h b/drivers/net/dsa/ocelot/felix.h index 9af106513e53..730a8a90e1f7 100644 --- a/drivers/net/dsa/ocelot/felix.h +++ b/drivers/net/dsa/ocelot/felix.h @@ -8,9 +8,9 @@ /* Platform-specific information */ struct felix_info { - struct resource *target_io_res; - struct resource *port_io_res; - struct resource *imdio_res; + const struct resource *target_io_res; + const struct resource *port_io_res; + const struct resource *imdio_res; const struct reg_field *regfields; const u32 *const *map; const struct ocelot_ops *ops; diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index 8bf395f12b47..5211f05ef2fb 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -333,10 +333,8 @@ static const u32 *vsc9959_regmap[] = { [GCB] = vsc9959_gcb_regmap, }; -/* Addresses are relative to the PCI device's base address and - * will be fixed up at ioremap time. - */ -static struct resource vsc9959_target_io_res[] = { +/* Addresses are relative to the PCI device's base address */ +static const struct resource vsc9959_target_io_res[] = { [ANA] = { .start = 0x0280000, .end = 0x028ffff, @@ -379,7 +377,7 @@ static struct resource vsc9959_target_io_res[] = { }, }; -static struct resource vsc9959_port_io_res[] = { +static const struct resource vsc9959_port_io_res[] = { { .start = 0x0100000, .end = 0x010ffff, @@ -415,7 +413,7 @@ static struct resource vsc9959_port_io_res[] = { /* Port MAC 0 Internal MDIO bus through which the SerDes acting as an * SGMII/QSGMII MAC PCS can be found. */ -static struct resource vsc9959_imdio_res = { +static const struct resource vsc9959_imdio_res = { .start = 0x8030, .end = 0x8040, .name = "imdio", @@ -1111,7 +1109,7 @@ static int vsc9959_mdio_bus_alloc(struct ocelot *ocelot) struct device *dev = ocelot->dev; resource_size_t imdio_base; void __iomem *imdio_regs; - struct resource *res; + struct resource res; struct enetc_hw *hw; struct mii_bus *bus; int port; @@ -1128,12 +1126,12 @@ static int vsc9959_mdio_bus_alloc(struct ocelot *ocelot) imdio_base = pci_resource_start(felix->pdev, felix->info->imdio_pci_bar); - res = felix->info->imdio_res; - res->flags = IORESOURCE_MEM; - res->start += imdio_base; - res->end += imdio_base; + memcpy(&res, felix->info->imdio_res, sizeof(res)); + res.flags = IORESOURCE_MEM; + res.start += imdio_base; + res.end += imdio_base; - imdio_regs = devm_ioremap_resource(dev, res); + imdio_regs = devm_ioremap_resource(dev, &res); if (IS_ERR(imdio_regs)) { dev_err(dev, "failed to map internal MDIO registers\n"); return PTR_ERR(imdio_regs); diff --git a/drivers/net/ethernet/apple/bmac.c b/drivers/net/ethernet/apple/bmac.c index a58185b1d8bf..3e3711b60d01 100644 --- a/drivers/net/ethernet/apple/bmac.c +++ b/drivers/net/ethernet/apple/bmac.c @@ -1182,7 +1182,7 @@ bmac_get_station_address(struct net_device *dev, unsigned char *ea) int i; unsigned short data; - for (i = 0; i < 6; i++) + for (i = 0; i < 3; i++) { reset_and_select_srom(dev); data = read_srom(dev, i + EnetAddressOffset/2, SROMAddressBits); diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index 6e5f6dd169b5..552e7554a9f8 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -42,6 +42,7 @@ #include <soc/fsl/qe/ucc.h> #include <soc/fsl/qe/ucc_fast.h> #include <asm/machdep.h> +#include <net/sch_generic.h> #include "ucc_geth.h" @@ -1548,11 +1549,8 @@ static int ugeth_disable(struct ucc_geth_private *ugeth, enum comm_dir mode) static void ugeth_quiesce(struct ucc_geth_private *ugeth) { - /* Prevent any further xmits, plus detach the device. */ - netif_device_detach(ugeth->ndev); - - /* Wait for any current xmits to finish. */ - netif_tx_disable(ugeth->ndev); + /* Prevent any further xmits */ + netif_tx_stop_all_queues(ugeth->ndev); /* Disable the interrupt to avoid NAPI rescheduling. */ disable_irq(ugeth->ug_info->uf_info.irq); @@ -1565,7 +1563,10 @@ static void ugeth_activate(struct ucc_geth_private *ugeth) { napi_enable(&ugeth->napi); enable_irq(ugeth->ug_info->uf_info.irq); - netif_device_attach(ugeth->ndev); + + /* allow to xmit again */ + netif_tx_wake_all_queues(ugeth->ndev); + __netdev_watchdog_up(ugeth->ndev); } /* Called every time the controller might need to be made diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c index 7352244c5e68..d4a4e241333d 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c @@ -1070,7 +1070,7 @@ void mvpp2_cls_oversize_rxq_set(struct mvpp2_port *port) (port->first_rxq >> MVPP2_CLS_OVERSIZE_RXQ_LOW_BITS)); val = mvpp2_read(port->priv, MVPP2_CLS_SWFWD_PCTRL_REG); - val |= MVPP2_CLS_SWFWD_PCTRL_MASK(port->id); + val &= ~MVPP2_CLS_SWFWD_PCTRL_MASK(port->id); mvpp2_write(port->priv, MVPP2_CLS_SWFWD_PCTRL_REG, val); } diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c index 7a0d785b826c..17243bb5ba91 100644 --- a/drivers/net/ethernet/marvell/pxa168_eth.c +++ b/drivers/net/ethernet/marvell/pxa168_eth.c @@ -1418,7 +1418,7 @@ static int pxa168_eth_probe(struct platform_device *pdev) pep->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(pep->base)) { - err = -ENOMEM; + err = PTR_ERR(pep->base); goto err_netdev; } diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 6e501af0e532..f6ff9620a137 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -2734,7 +2734,7 @@ void mlx4_opreq_action(struct work_struct *work) if (err) { mlx4_err(dev, "Failed to retrieve required operation: %d\n", err); - return; + goto out; } MLX4_GET(modifier, outbox, GET_OP_REQ_MODIFIER_OFFSET); MLX4_GET(token, outbox, GET_OP_REQ_TOKEN_OFFSET); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index cede5bdfd598..7a77fe40af3a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -848,6 +848,14 @@ static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg); static void mlx5_free_cmd_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg); +static bool opcode_allowed(struct mlx5_cmd *cmd, u16 opcode) +{ + if (cmd->allowed_opcode == CMD_ALLOWED_OPCODE_ALL) + return true; + + return cmd->allowed_opcode == opcode; +} + static void cmd_work_handler(struct work_struct *work) { struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work); @@ -861,6 +869,7 @@ static void cmd_work_handler(struct work_struct *work) int alloc_ret; int cmd_mode; + complete(&ent->handling); sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem; down(sem); if (!ent->page_queue) { @@ -913,7 +922,9 @@ static void cmd_work_handler(struct work_struct *work) /* Skip sending command to fw if internal error */ if (pci_channel_offline(dev->pdev) || - dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR || + cmd->state != MLX5_CMDIF_STATE_UP || + !opcode_allowed(&dev->cmd, ent->op)) { u8 status = 0; u32 drv_synd; @@ -978,6 +989,11 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent) struct mlx5_cmd *cmd = &dev->cmd; int err; + if (!wait_for_completion_timeout(&ent->handling, timeout) && + cancel_work_sync(&ent->work)) { + ent->ret = -ECANCELED; + goto out_err; + } if (cmd->mode == CMD_MODE_POLLING || ent->polling) { wait_for_completion(&ent->done); } else if (!wait_for_completion_timeout(&ent->done, timeout)) { @@ -985,12 +1001,17 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent) mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true); } +out_err: err = ent->ret; if (err == -ETIMEDOUT) { mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n", mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in)); + } else if (err == -ECANCELED) { + mlx5_core_warn(dev, "%s(0x%x) canceled on out of queue timeout.\n", + mlx5_command_str(msg_to_opcode(ent->in)), + msg_to_opcode(ent->in)); } mlx5_core_dbg(dev, "err %d, delivery status %s(%d)\n", err, deliv_status_to_str(ent->status), ent->status); @@ -1026,6 +1047,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, ent->token = token; ent->polling = force_polling; + init_completion(&ent->handling); if (!callback) init_completion(&ent->done); @@ -1045,6 +1067,8 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, err = wait_func(dev, ent); if (err == -ETIMEDOUT) goto out; + if (err == -ECANCELED) + goto out_free; ds = ent->ts2 - ent->ts1; op = MLX5_GET(mbox_in, in->first.data, opcode); @@ -1391,6 +1415,22 @@ static void create_debugfs_files(struct mlx5_core_dev *dev) mlx5_cmdif_debugfs_init(dev); } +void mlx5_cmd_allowed_opcode(struct mlx5_core_dev *dev, u16 opcode) +{ + struct mlx5_cmd *cmd = &dev->cmd; + int i; + + for (i = 0; i < cmd->max_reg_cmds; i++) + down(&cmd->sem); + down(&cmd->pages_sem); + + cmd->allowed_opcode = opcode; + + up(&cmd->pages_sem); + for (i = 0; i < cmd->max_reg_cmds; i++) + up(&cmd->sem); +} + static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode) { struct mlx5_cmd *cmd = &dev->cmd; @@ -1667,12 +1707,14 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int err; u8 status = 0; u32 drv_synd; + u16 opcode; u8 token; + opcode = MLX5_GET(mbox_in, in, opcode); if (pci_channel_offline(dev->pdev) || - dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { - u16 opcode = MLX5_GET(mbox_in, in, opcode); - + dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR || + dev->cmd.state != MLX5_CMDIF_STATE_UP || + !opcode_allowed(&dev->cmd, opcode)) { err = mlx5_internal_err_ret_value(dev, opcode, &drv_synd, &status); MLX5_SET(mbox_out, out, status, status); MLX5_SET(mbox_out, out, syndrome, drv_synd); @@ -1937,6 +1979,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev) goto err_free_page; } + cmd->state = MLX5_CMDIF_STATE_DOWN; cmd->checksum_disabled = 1; cmd->max_reg_cmds = (1 << cmd->log_sz) - 1; cmd->bitmask = (1UL << cmd->max_reg_cmds) - 1; @@ -1974,6 +2017,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev) mlx5_core_dbg(dev, "descriptor at dma 0x%llx\n", (unsigned long long)(cmd->dma)); cmd->mode = CMD_MODE_POLLING; + cmd->allowed_opcode = CMD_ALLOWED_OPCODE_ALL; create_msg_cache(dev); @@ -2013,3 +2057,10 @@ void mlx5_cmd_cleanup(struct mlx5_core_dev *dev) dma_pool_destroy(cmd->pool); } EXPORT_SYMBOL(mlx5_cmd_cleanup); + +void mlx5_cmd_set_state(struct mlx5_core_dev *dev, + enum mlx5_cmdif_state cmdif_state) +{ + dev->cmd.state = cmdif_state; +} +EXPORT_SYMBOL(mlx5_cmd_set_state); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 23701c0e36ec..59745402747b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -1121,7 +1121,7 @@ void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq); int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv); int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc); -void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc); +void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv); int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs); void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index a172c5e39710..4eb305af0106 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -699,6 +699,7 @@ mlx5_tc_ct_parse_match(struct mlx5e_priv *priv, struct netlink_ext_ack *extack) { struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); + struct flow_rule *rule = flow_cls_offload_flow_rule(f); struct flow_dissector_key_ct *mask, *key; bool trk, est, untrk, unest, new; u32 ctstate = 0, ctstate_mask = 0; @@ -706,7 +707,7 @@ mlx5_tc_ct_parse_match(struct mlx5e_priv *priv, u16 ct_state, ct_state_mask; struct flow_match_ct match; - if (!flow_rule_match_key(f->rule, FLOW_DISSECTOR_KEY_CT)) + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT)) return 0; if (!ct_priv) { @@ -715,7 +716,7 @@ mlx5_tc_ct_parse_match(struct mlx5e_priv *priv, return -EOPNOTSUPP; } - flow_rule_match_ct(f->rule, &match); + flow_rule_match_ct(rule, &match); key = match.key; mask = match.mask; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h index 091d305b633e..626f6c04882e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h @@ -130,7 +130,9 @@ mlx5_tc_ct_parse_match(struct mlx5e_priv *priv, struct flow_cls_offload *f, struct netlink_ext_ack *extack) { - if (!flow_rule_match_key(f->rule, FLOW_DISSECTOR_KEY_CT)) + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT)) return 0; NL_SET_ERR_MSG_MOD(extack, "mlx5 tc ct offload isn't enabled."); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c index 46725cd743a3..7d1985fa0d4f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c @@ -69,8 +69,8 @@ static void mlx5e_ktls_del(struct net_device *netdev, struct mlx5e_ktls_offload_context_tx *tx_priv = mlx5e_get_ktls_tx_priv_ctx(tls_ctx); - mlx5_ktls_destroy_key(priv->mdev, tx_priv->key_id); mlx5e_destroy_tis(priv->mdev, tx_priv->tisn); + mlx5_ktls_destroy_key(priv->mdev, tx_priv->key_id); kvfree(tx_priv); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index b314adf438da..c6b83042d431 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2717,7 +2717,8 @@ void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen) mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in, inlen); } - if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) + /* Verify inner tirs resources allocated */ + if (!priv->inner_indir_tir[0].tirn) return; for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { @@ -3408,14 +3409,15 @@ out: return err; } -void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc) +void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv) { int i; for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[i]); - if (!inner_ttc || !mlx5e_tunnel_inner_ft_supported(priv->mdev)) + /* Verify inner tirs resources allocated */ + if (!priv->inner_indir_tir[0].tirn) return; for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) @@ -5123,7 +5125,7 @@ err_destroy_xsk_rqts: err_destroy_direct_tirs: mlx5e_destroy_direct_tirs(priv, priv->direct_tir); err_destroy_indirect_tirs: - mlx5e_destroy_indirect_tirs(priv, true); + mlx5e_destroy_indirect_tirs(priv); err_destroy_direct_rqts: mlx5e_destroy_direct_rqts(priv, priv->direct_tir); err_destroy_indirect_rqts: @@ -5142,7 +5144,7 @@ static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv) mlx5e_destroy_direct_tirs(priv, priv->xsk_tir); mlx5e_destroy_direct_rqts(priv, priv->xsk_tir); mlx5e_destroy_direct_tirs(priv, priv->direct_tir); - mlx5e_destroy_indirect_tirs(priv, true); + mlx5e_destroy_indirect_tirs(priv); mlx5e_destroy_direct_rqts(priv, priv->direct_tir); mlx5e_destroy_rqt(priv, &priv->indir_rqt); mlx5e_close_drop_rq(&priv->drop_rq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index f372e94948fd..4a8e0dfdc5f2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -1484,13 +1484,9 @@ bool mlx5e_eswitch_uplink_rep(struct net_device *netdev) return netdev->netdev_ops == &mlx5e_netdev_ops_uplink_rep; } -bool mlx5e_eswitch_rep(struct net_device *netdev) +bool mlx5e_eswitch_vf_rep(struct net_device *netdev) { - if (netdev->netdev_ops == &mlx5e_netdev_ops_rep || - netdev->netdev_ops == &mlx5e_netdev_ops_uplink_rep) - return true; - - return false; + return netdev->netdev_ops == &mlx5e_netdev_ops_rep; } static void mlx5e_build_rep_params(struct net_device *netdev) @@ -1747,7 +1743,7 @@ err_destroy_ttc_table: err_destroy_direct_tirs: mlx5e_destroy_direct_tirs(priv, priv->direct_tir); err_destroy_indirect_tirs: - mlx5e_destroy_indirect_tirs(priv, false); + mlx5e_destroy_indirect_tirs(priv); err_destroy_direct_rqts: mlx5e_destroy_direct_rqts(priv, priv->direct_tir); err_destroy_indirect_rqts: @@ -1765,7 +1761,7 @@ static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv) mlx5e_destroy_rep_root_ft(priv); mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); mlx5e_destroy_direct_tirs(priv, priv->direct_tir); - mlx5e_destroy_indirect_tirs(priv, false); + mlx5e_destroy_indirect_tirs(priv); mlx5e_destroy_direct_rqts(priv, priv->direct_tir); mlx5e_destroy_rqt(priv, &priv->indir_rqt); mlx5e_close_drop_rq(&priv->drop_rq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index 6a2337900420..612b5cf0673d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -210,8 +210,13 @@ void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv, void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv); -bool mlx5e_eswitch_rep(struct net_device *netdev); +bool mlx5e_eswitch_vf_rep(struct net_device *netdev); bool mlx5e_eswitch_uplink_rep(struct net_device *netdev); +static inline bool mlx5e_eswitch_rep(struct net_device *netdev) +{ + return mlx5e_eswitch_vf_rep(netdev) || + mlx5e_eswitch_uplink_rep(netdev); +} #else /* CONFIG_MLX5_ESWITCH */ static inline bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) { return false; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index a574c588269a..5bcf95fcdd59 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -3073,6 +3073,11 @@ static bool actions_match_supported(struct mlx5e_priv *priv, return true; } +static bool same_port_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv) +{ + return priv->mdev == peer_priv->mdev; +} + static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv) { struct mlx5_core_dev *fmdev, *pmdev; @@ -3291,7 +3296,7 @@ static inline int hash_encap_info(struct encap_key *key) } -static bool is_merged_eswitch_dev(struct mlx5e_priv *priv, +static bool is_merged_eswitch_vfs(struct mlx5e_priv *priv, struct net_device *peer_netdev) { struct mlx5e_priv *peer_priv; @@ -3299,13 +3304,11 @@ static bool is_merged_eswitch_dev(struct mlx5e_priv *priv, peer_priv = netdev_priv(peer_netdev); return (MLX5_CAP_ESW(priv->mdev, merged_eswitch) && - mlx5e_eswitch_rep(priv->netdev) && - mlx5e_eswitch_rep(peer_netdev) && + mlx5e_eswitch_vf_rep(priv->netdev) && + mlx5e_eswitch_vf_rep(peer_netdev) && same_hw_devs(priv, peer_priv)); } - - bool mlx5e_encap_take(struct mlx5e_encap_entry *e) { return refcount_inc_not_zero(&e->refcnt); @@ -3575,14 +3578,37 @@ static int add_vlan_pop_action(struct mlx5e_priv *priv, return err; } +static bool same_hw_reps(struct mlx5e_priv *priv, + struct net_device *peer_netdev) +{ + struct mlx5e_priv *peer_priv; + + peer_priv = netdev_priv(peer_netdev); + + return mlx5e_eswitch_rep(priv->netdev) && + mlx5e_eswitch_rep(peer_netdev) && + same_hw_devs(priv, peer_priv); +} + +static bool is_lag_dev(struct mlx5e_priv *priv, + struct net_device *peer_netdev) +{ + return ((mlx5_lag_is_sriov(priv->mdev) || + mlx5_lag_is_multipath(priv->mdev)) && + same_hw_reps(priv, peer_netdev)); +} + bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv, struct net_device *out_dev) { - if (is_merged_eswitch_dev(priv, out_dev)) + if (is_merged_eswitch_vfs(priv, out_dev)) + return true; + + if (is_lag_dev(priv, out_dev)) return true; return mlx5e_eswitch_rep(out_dev) && - same_hw_devs(priv, netdev_priv(out_dev)); + same_port_devs(priv, netdev_priv(out_dev)); } static bool is_duplicated_output_device(struct net_device *dev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index fd6b2a1898c5..119a5c6cc167 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -537,10 +537,9 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq) { struct mlx5e_tx_wqe_info *wi; + u32 dma_fifo_cc, nbytes = 0; + u16 ci, sqcc, npkts = 0; struct sk_buff *skb; - u32 dma_fifo_cc; - u16 sqcc; - u16 ci; int i; sqcc = sq->cc; @@ -565,11 +564,15 @@ void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq) } dev_kfree_skb_any(skb); + npkts++; + nbytes += wi->num_bytes; sqcc += wi->num_wqebbs; } sq->dma_fifo_cc = dma_fifo_cc; sq->cc = sqcc; + + netdev_tx_completed_queue(sq->txq, npkts, nbytes); } #ifdef CONFIG_MLX5_CORE_IPOIB diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index cccea3a8eddd..ce6c621af043 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -611,11 +611,13 @@ static int create_async_eqs(struct mlx5_core_dev *dev) .nent = MLX5_NUM_CMD_EQE, .mask[0] = 1ull << MLX5_EVENT_TYPE_CMD, }; + mlx5_cmd_allowed_opcode(dev, MLX5_CMD_OP_CREATE_EQ); err = setup_async_eq(dev, &table->cmd_eq, ¶m, "cmd"); if (err) goto err1; mlx5_cmd_use_events(dev); + mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL); param = (struct mlx5_eq_param) { .irq_index = 0, @@ -645,6 +647,7 @@ err2: mlx5_cmd_use_polling(dev); cleanup_async_eq(dev, &table->cmd_eq, "cmd"); err1: + mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL); mlx5_eq_notifier_unregister(dev, &table->cq_err_nb); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c index 8bcf3426b9c6..3ce17c3d7a00 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/events.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -346,8 +346,10 @@ int mlx5_events_init(struct mlx5_core_dev *dev) events->dev = dev; dev->priv.events = events; events->wq = create_singlethread_workqueue("mlx5_events"); - if (!events->wq) + if (!events->wq) { + kfree(events); return -ENOMEM; + } INIT_WORK(&events->pcie_core_work, mlx5_pcie_event); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index d5defe09339a..9620c8650e13 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -344,17 +344,12 @@ static void tree_put_node(struct fs_node *node, bool locked) if (node->del_hw_func) node->del_hw_func(node); if (parent_node) { - /* Only root namespace doesn't have parent and we just - * need to free its node. - */ down_write_ref_node(parent_node, locked); list_del_init(&node->list); - if (node->del_sw_func) - node->del_sw_func(node); - up_write_ref_node(parent_node, locked); - } else { - kfree(node); } + node->del_sw_func(node); + if (parent_node) + up_write_ref_node(parent_node, locked); node = NULL; } if (!node && parent_node) @@ -468,8 +463,10 @@ static void del_sw_flow_table(struct fs_node *node) fs_get_obj(ft, node); rhltable_destroy(&ft->fgs_hash); - fs_get_obj(prio, ft->node.parent); - prio->num_ft--; + if (ft->node.parent) { + fs_get_obj(prio, ft->node.parent); + prio->num_ft--; + } kfree(ft); } @@ -2351,6 +2348,17 @@ static int init_root_tree(struct mlx5_flow_steering *steering, return 0; } +static void del_sw_root_ns(struct fs_node *node) +{ + struct mlx5_flow_root_namespace *root_ns; + struct mlx5_flow_namespace *ns; + + fs_get_obj(ns, node); + root_ns = container_of(ns, struct mlx5_flow_root_namespace, ns); + mutex_destroy(&root_ns->chain_lock); + kfree(node); +} + static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_flow_steering *steering, enum fs_flow_table_type table_type) @@ -2377,7 +2385,7 @@ static struct mlx5_flow_root_namespace ns = &root_ns->ns; fs_init_namespace(ns); mutex_init(&root_ns->chain_lock); - tree_init_node(&ns->node, NULL, NULL); + tree_init_node(&ns->node, NULL, del_sw_root_ns); tree_add_node(&ns->node, NULL); return root_ns; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 673aaa815f57..505cf6eeae25 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -396,7 +396,7 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv) err_destroy_direct_tirs: mlx5e_destroy_direct_tirs(priv, priv->direct_tir); err_destroy_indirect_tirs: - mlx5e_destroy_indirect_tirs(priv, true); + mlx5e_destroy_indirect_tirs(priv); err_destroy_direct_rqts: mlx5e_destroy_direct_rqts(priv, priv->direct_tir); err_destroy_indirect_rqts: @@ -412,7 +412,7 @@ static void mlx5i_cleanup_rx(struct mlx5e_priv *priv) { mlx5i_destroy_flow_steering(priv); mlx5e_destroy_direct_tirs(priv, priv->direct_tir); - mlx5e_destroy_indirect_tirs(priv, true); + mlx5e_destroy_indirect_tirs(priv); mlx5e_destroy_direct_rqts(priv, priv->direct_tir); mlx5e_destroy_rqt(priv, &priv->indir_rqt); mlx5e_close_drop_rq(&priv->drop_rq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 7af4210c1b96..c1618b818f3a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -965,6 +965,8 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot) goto err_cmd_cleanup; } + mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_UP); + err = mlx5_core_enable_hca(dev, 0); if (err) { mlx5_core_err(dev, "enable hca failed\n"); @@ -1026,6 +1028,7 @@ reclaim_boot_pages: err_disable_hca: mlx5_core_disable_hca(dev, 0); err_cmd_cleanup: + mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN); mlx5_cmd_cleanup(dev); return err; @@ -1043,6 +1046,7 @@ static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot) } mlx5_reclaim_startup_pages(dev); mlx5_core_disable_hca(dev, 0); + mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN); mlx5_cmd_cleanup(dev); return 0; @@ -1191,7 +1195,7 @@ int mlx5_load_one(struct mlx5_core_dev *dev, bool boot) err = mlx5_function_setup(dev, boot); if (err) - goto out; + goto err_function; if (boot) { err = mlx5_init_once(dev); @@ -1229,6 +1233,7 @@ err_load: mlx5_cleanup_once(dev); function_teardown: mlx5_function_teardown(dev, boot); +err_function: dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; mutex_unlock(&dev->intf_state_mutex); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 24ca8d5bc564..6b39978acd07 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -3986,6 +3986,7 @@ static void mlxsw_sp_ports_remove(struct mlxsw_sp *mlxsw_sp) mlxsw_sp_port_remove(mlxsw_sp, i); mlxsw_sp_cpu_port_remove(mlxsw_sp); kfree(mlxsw_sp->ports); + mlxsw_sp->ports = NULL; } static int mlxsw_sp_ports_create(struct mlxsw_sp *mlxsw_sp) @@ -4022,6 +4023,7 @@ err_port_create: mlxsw_sp_cpu_port_remove(mlxsw_sp); err_cpu_port_create: kfree(mlxsw_sp->ports); + mlxsw_sp->ports = NULL; return err; } @@ -4143,6 +4145,14 @@ static int mlxsw_sp_local_ports_offset(struct mlxsw_core *mlxsw_core, return mlxsw_core_res_get(mlxsw_core, local_ports_in_x_res_id); } +static struct mlxsw_sp_port * +mlxsw_sp_port_get_by_local_port(struct mlxsw_sp *mlxsw_sp, u8 local_port) +{ + if (mlxsw_sp->ports && mlxsw_sp->ports[local_port]) + return mlxsw_sp->ports[local_port]; + return NULL; +} + static int mlxsw_sp_port_split(struct mlxsw_core *mlxsw_core, u8 local_port, unsigned int count, struct netlink_ext_ack *extack) @@ -4156,7 +4166,7 @@ static int mlxsw_sp_port_split(struct mlxsw_core *mlxsw_core, u8 local_port, int i; int err; - mlxsw_sp_port = mlxsw_sp->ports[local_port]; + mlxsw_sp_port = mlxsw_sp_port_get_by_local_port(mlxsw_sp, local_port); if (!mlxsw_sp_port) { dev_err(mlxsw_sp->bus_info->dev, "Port number \"%d\" does not exist\n", local_port); @@ -4251,7 +4261,7 @@ static int mlxsw_sp_port_unsplit(struct mlxsw_core *mlxsw_core, u8 local_port, int offset; int i; - mlxsw_sp_port = mlxsw_sp->ports[local_port]; + mlxsw_sp_port = mlxsw_sp_port_get_by_local_port(mlxsw_sp, local_port); if (!mlxsw_sp_port) { dev_err(mlxsw_sp->bus_info->dev, "Port number \"%d\" does not exist\n", local_port); diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index 90535820b559..2503f61db5fb 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -1259,6 +1259,7 @@ static void mlxsw_sx_ports_remove(struct mlxsw_sx *mlxsw_sx) if (mlxsw_sx_port_created(mlxsw_sx, i)) mlxsw_sx_port_remove(mlxsw_sx, i); kfree(mlxsw_sx->ports); + mlxsw_sx->ports = NULL; } static int mlxsw_sx_ports_create(struct mlxsw_sx *mlxsw_sx) @@ -1293,6 +1294,7 @@ err_port_module_info_get: if (mlxsw_sx_port_created(mlxsw_sx, i)) mlxsw_sx_port_remove(mlxsw_sx, i); kfree(mlxsw_sx->ports); + mlxsw_sx->ports = NULL; return err; } @@ -1376,6 +1378,12 @@ static int mlxsw_sx_port_type_set(struct mlxsw_core *mlxsw_core, u8 local_port, u8 module, width; int err; + if (!mlxsw_sx->ports || !mlxsw_sx->ports[local_port]) { + dev_err(mlxsw_sx->bus_info->dev, "Port number \"%d\" does not exist\n", + local_port); + return -EINVAL; + } + if (new_type == DEVLINK_PORT_TYPE_AUTO) return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 02350c3d9d01..efb3965a3e42 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -1467,7 +1467,7 @@ static void ocelot_port_attr_ageing_set(struct ocelot *ocelot, int port, unsigned long ageing_clock_t) { unsigned long ageing_jiffies = clock_t_to_jiffies(ageing_clock_t); - u32 ageing_time = jiffies_to_msecs(ageing_jiffies) / 1000; + u32 ageing_time = jiffies_to_msecs(ageing_jiffies); ocelot_set_ageing_time(ocelot, ageing_time); } diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 78e15cc00e0a..c51b48dc3639 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -1050,6 +1050,13 @@ static u16 rtl_ephy_read(struct rtl8169_private *tp, int reg_addr) RTL_R32(tp, EPHYAR) & EPHYAR_DATA_MASK : ~0; } +static void r8168fp_adjust_ocp_cmd(struct rtl8169_private *tp, u32 *cmd, int type) +{ + /* based on RTL8168FP_OOBMAC_BASE in vendor driver */ + if (tp->mac_version == RTL_GIGA_MAC_VER_52 && type == ERIAR_OOB) + *cmd |= 0x7f0 << 18; +} + DECLARE_RTL_COND(rtl_eriar_cond) { return RTL_R32(tp, ERIAR) & ERIAR_FLAG; @@ -1058,9 +1065,12 @@ DECLARE_RTL_COND(rtl_eriar_cond) static void _rtl_eri_write(struct rtl8169_private *tp, int addr, u32 mask, u32 val, int type) { + u32 cmd = ERIAR_WRITE_CMD | type | mask | addr; + BUG_ON((addr & 3) || (mask == 0)); RTL_W32(tp, ERIDR, val); - RTL_W32(tp, ERIAR, ERIAR_WRITE_CMD | type | mask | addr); + r8168fp_adjust_ocp_cmd(tp, &cmd, type); + RTL_W32(tp, ERIAR, cmd); rtl_udelay_loop_wait_low(tp, &rtl_eriar_cond, 100, 100); } @@ -1073,7 +1083,10 @@ static void rtl_eri_write(struct rtl8169_private *tp, int addr, u32 mask, static u32 _rtl_eri_read(struct rtl8169_private *tp, int addr, int type) { - RTL_W32(tp, ERIAR, ERIAR_READ_CMD | type | ERIAR_MASK_1111 | addr); + u32 cmd = ERIAR_READ_CMD | type | ERIAR_MASK_1111 | addr; + + r8168fp_adjust_ocp_cmd(tp, &cmd, type); + RTL_W32(tp, ERIAR, cmd); return rtl_udelay_loop_wait_high(tp, &rtl_eriar_cond, 100, 100) ? RTL_R32(tp, ERIDR) : ~0; diff --git a/drivers/net/ethernet/sgi/ioc3-eth.c b/drivers/net/ethernet/sgi/ioc3-eth.c index 7305e8e86c51..6646eba9f57f 100644 --- a/drivers/net/ethernet/sgi/ioc3-eth.c +++ b/drivers/net/ethernet/sgi/ioc3-eth.c @@ -848,14 +848,14 @@ static int ioc3eth_probe(struct platform_device *pdev) ip = netdev_priv(dev); ip->dma_dev = pdev->dev.parent; ip->regs = devm_platform_ioremap_resource(pdev, 0); - if (!ip->regs) { - err = -ENOMEM; + if (IS_ERR(ip->regs)) { + err = PTR_ERR(ip->regs); goto out_free; } ip->ssram = devm_platform_ioremap_resource(pdev, 1); - if (!ip->ssram) { - err = -ENOMEM; + if (IS_ERR(ip->ssram)) { + err = PTR_ERR(ip->ssram); goto out_free; } diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index 49a6a9167af4..fc168f85e7af 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -2493,20 +2493,20 @@ static int smsc911x_drv_probe(struct platform_device *pdev) retval = smsc911x_init(dev); if (retval < 0) - goto out_disable_resources; + goto out_init_fail; netif_carrier_off(dev); retval = smsc911x_mii_init(pdev, dev); if (retval) { SMSC_WARN(pdata, probe, "Error %i initialising mii", retval); - goto out_disable_resources; + goto out_init_fail; } retval = register_netdev(dev); if (retval) { SMSC_WARN(pdata, probe, "Error %i registering device", retval); - goto out_disable_resources; + goto out_init_fail; } else { SMSC_TRACE(pdata, probe, "Network interface: \"%s\"", dev->name); @@ -2547,9 +2547,10 @@ static int smsc911x_drv_probe(struct platform_device *pdev) return 0; -out_disable_resources: +out_init_fail: pm_runtime_put(&pdev->dev); pm_runtime_disable(&pdev->dev); +out_disable_resources: (void)smsc911x_disable_resources(pdev); out_enable_resources_fail: smsc911x_free_resources(pdev); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c index 6ae13dc19510..02102c781a8c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c @@ -319,6 +319,19 @@ static int ipq806x_gmac_probe(struct platform_device *pdev) /* Enable PTP clock */ regmap_read(gmac->nss_common, NSS_COMMON_CLK_GATE, &val); val |= NSS_COMMON_CLK_GATE_PTP_EN(gmac->id); + switch (gmac->phy_mode) { + case PHY_INTERFACE_MODE_RGMII: + val |= NSS_COMMON_CLK_GATE_RGMII_RX_EN(gmac->id) | + NSS_COMMON_CLK_GATE_RGMII_TX_EN(gmac->id); + break; + case PHY_INTERFACE_MODE_SGMII: + val |= NSS_COMMON_CLK_GATE_GMII_RX_EN(gmac->id) | + NSS_COMMON_CLK_GATE_GMII_TX_EN(gmac->id); + break; + default: + /* We don't get here; the switch above will have errored out */ + unreachable(); + } regmap_write(gmac->nss_common, NSS_COMMON_CLK_GATE, val); if (gmac->phy_mode == PHY_INTERFACE_MODE_SGMII) { diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index a999d6b33a64..1f319c9cee46 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -5190,8 +5190,6 @@ int stmmac_resume(struct device *dev) return ret; } - netif_device_attach(ndev); - mutex_lock(&priv->lock); stmmac_reset_queues_param(priv); @@ -5218,6 +5216,8 @@ int stmmac_resume(struct device *dev) phylink_mac_change(priv->phylink, true); + netif_device_attach(ndev); + return 0; } EXPORT_SYMBOL_GPL(stmmac_resume); diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c index e6d1aa882fa5..f1c8615ab6f0 100644 --- a/drivers/net/ethernet/sun/cassini.c +++ b/drivers/net/ethernet/sun/cassini.c @@ -4963,7 +4963,7 @@ static int cas_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) cas_cacheline_size)) { dev_err(&pdev->dev, "Could not set PCI cache " "line size\n"); - goto err_write_cacheline; + goto err_out_free_res; } } #endif @@ -5136,7 +5136,6 @@ err_out_iounmap: err_out_free_res: pci_release_regions(pdev); -err_write_cacheline: /* Try to restore it in case the error occurred after we * set it. */ diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 2517ffba8178..88f52a2f85b3 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -1895,8 +1895,9 @@ static int am65_cpsw_nuss_probe(struct platform_device *pdev) ale_params.nu_switch_ale = true; common->ale = cpsw_ale_create(&ale_params); - if (!common->ale) { + if (IS_ERR(common->ale)) { dev_err(dev, "error initializing ale engine\n"); + ret = PTR_ERR(common->ale); goto err_of_clear; } diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index c2c5bf87da01..ffeb8633e530 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1753,11 +1753,15 @@ static int cpsw_suspend(struct device *dev) struct cpsw_common *cpsw = dev_get_drvdata(dev); int i; + rtnl_lock(); + for (i = 0; i < cpsw->data.slaves; i++) if (cpsw->slaves[i].ndev) if (netif_running(cpsw->slaves[i].ndev)) cpsw_ndo_stop(cpsw->slaves[i].ndev); + rtnl_unlock(); + /* Select sleep pin state */ pinctrl_pm_select_sleep_state(dev); diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c index 0374e6936091..8dc6be11b2ff 100644 --- a/drivers/net/ethernet/ti/cpsw_ale.c +++ b/drivers/net/ethernet/ti/cpsw_ale.c @@ -955,7 +955,7 @@ struct cpsw_ale *cpsw_ale_create(struct cpsw_ale_params *params) ale = devm_kzalloc(params->dev, sizeof(*ale), GFP_KERNEL); if (!ale) - return NULL; + return ERR_PTR(-ENOMEM); ale->p0_untag_vid_mask = devm_kmalloc_array(params->dev, BITS_TO_LONGS(VLAN_N_VID), diff --git a/drivers/net/ethernet/ti/cpsw_priv.c b/drivers/net/ethernet/ti/cpsw_priv.c index 97a058ca60ac..d0b6c418a870 100644 --- a/drivers/net/ethernet/ti/cpsw_priv.c +++ b/drivers/net/ethernet/ti/cpsw_priv.c @@ -490,9 +490,9 @@ int cpsw_init_common(struct cpsw_common *cpsw, void __iomem *ss_regs, ale_params.ale_ports = CPSW_ALE_PORTS_NUM; cpsw->ale = cpsw_ale_create(&ale_params); - if (!cpsw->ale) { + if (IS_ERR(cpsw->ale)) { dev_err(dev, "error initializing ale engine\n"); - return -ENODEV; + return PTR_ERR(cpsw->ale); } dma_params.dev = dev; diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c index fb36115e9c51..fdbae734acce 100644 --- a/drivers/net/ethernet/ti/netcp_ethss.c +++ b/drivers/net/ethernet/ti/netcp_ethss.c @@ -3704,9 +3704,9 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev, ale_params.nu_switch_ale = true; } gbe_dev->ale = cpsw_ale_create(&ale_params); - if (!gbe_dev->ale) { + if (IS_ERR(gbe_dev->ale)) { dev_err(gbe_dev->dev, "error initializing ale engine\n"); - ret = -ENODEV; + ret = PTR_ERR(gbe_dev->ale); goto free_sec_ports; } else { dev_dbg(gbe_dev->dev, "Created a gbe ale engine\n"); diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index b671bea0aa7c..8d9ca1c335e8 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -1392,6 +1392,7 @@ static int gsi_channel_poll(struct napi_struct *napi, int budget) while (count < budget) { struct gsi_trans *trans; + count++; trans = gsi_channel_poll_one(channel); if (!trans) break; diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 68668a22b9dd..dc3ff0e20944 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -858,8 +858,7 @@ nsim_dev_devlink_trap_policer_counter_get(struct devlink *devlink, return -EINVAL; cnt = &nsim_dev->trap_data->trap_policers_cnt_arr[policer->id - 1]; - *p_drops = *cnt; - *cnt += jiffies % 64; + *p_drops = (*cnt)++; return 0; } diff --git a/drivers/net/phy/mscc/mscc.h b/drivers/net/phy/mscc/mscc.h index 030bf8b600df..414e3b31bb1f 100644 --- a/drivers/net/phy/mscc/mscc.h +++ b/drivers/net/phy/mscc/mscc.h @@ -354,6 +354,8 @@ struct vsc8531_private { u64 *stats; int nstats; bool pkg_init; + /* PHY address within the package. */ + u8 addr; /* For multiple port PHYs; the MDIO address of the base PHY in the * package. */ diff --git a/drivers/net/phy/mscc/mscc_mac.h b/drivers/net/phy/mscc/mscc_mac.h index fcb5ba5e5d03..59b6837c60b3 100644 --- a/drivers/net/phy/mscc/mscc_mac.h +++ b/drivers/net/phy/mscc/mscc_mac.h @@ -152,8 +152,8 @@ #define MSCC_MAC_PAUSE_CFG_STATE_PAUSE_STATE BIT(0) #define MSCC_MAC_PAUSE_CFG_STATE_MAC_TX_PAUSE_GEN BIT(4) -#define MSCC_PROC_0_IP_1588_TOP_CFG_STAT_MODE_CTL 0x2 -#define MSCC_PROC_0_IP_1588_TOP_CFG_STAT_MODE_CTL_PROTOCOL_MODE(x) (x) -#define MSCC_PROC_0_IP_1588_TOP_CFG_STAT_MODE_CTL_PROTOCOL_MODE_M GENMASK(2, 0) +#define MSCC_PROC_IP_1588_TOP_CFG_STAT_MODE_CTL 0x2 +#define MSCC_PROC_IP_1588_TOP_CFG_STAT_MODE_CTL_PROTOCOL_MODE(x) (x) +#define MSCC_PROC_IP_1588_TOP_CFG_STAT_MODE_CTL_PROTOCOL_MODE_M GENMASK(2, 0) #endif /* _MSCC_PHY_LINE_MAC_H_ */ diff --git a/drivers/net/phy/mscc/mscc_macsec.c b/drivers/net/phy/mscc/mscc_macsec.c index e99e2cd72a0c..b4d3dc4068e2 100644 --- a/drivers/net/phy/mscc/mscc_macsec.c +++ b/drivers/net/phy/mscc/mscc_macsec.c @@ -316,6 +316,8 @@ static void vsc8584_macsec_mac_init(struct phy_device *phydev, /* Must be called with mdio_lock taken */ static int __vsc8584_macsec_init(struct phy_device *phydev) { + struct vsc8531_private *priv = phydev->priv; + enum macsec_bank proc_bank; u32 val; vsc8584_macsec_block_init(phydev, MACSEC_INGR); @@ -351,12 +353,14 @@ static int __vsc8584_macsec_init(struct phy_device *phydev) val |= MSCC_FCBUF_ENA_CFG_TX_ENA | MSCC_FCBUF_ENA_CFG_RX_ENA; vsc8584_macsec_phy_write(phydev, FC_BUFFER, MSCC_FCBUF_ENA_CFG, val); - val = vsc8584_macsec_phy_read(phydev, IP_1588, - MSCC_PROC_0_IP_1588_TOP_CFG_STAT_MODE_CTL); - val &= ~MSCC_PROC_0_IP_1588_TOP_CFG_STAT_MODE_CTL_PROTOCOL_MODE_M; - val |= MSCC_PROC_0_IP_1588_TOP_CFG_STAT_MODE_CTL_PROTOCOL_MODE(4); - vsc8584_macsec_phy_write(phydev, IP_1588, - MSCC_PROC_0_IP_1588_TOP_CFG_STAT_MODE_CTL, val); + proc_bank = (priv->addr < 2) ? PROC_0 : PROC_2; + + val = vsc8584_macsec_phy_read(phydev, proc_bank, + MSCC_PROC_IP_1588_TOP_CFG_STAT_MODE_CTL); + val &= ~MSCC_PROC_IP_1588_TOP_CFG_STAT_MODE_CTL_PROTOCOL_MODE_M; + val |= MSCC_PROC_IP_1588_TOP_CFG_STAT_MODE_CTL_PROTOCOL_MODE(4); + vsc8584_macsec_phy_write(phydev, proc_bank, + MSCC_PROC_IP_1588_TOP_CFG_STAT_MODE_CTL, val); return 0; } diff --git a/drivers/net/phy/mscc/mscc_macsec.h b/drivers/net/phy/mscc/mscc_macsec.h index d0783944d106..d751f2946b79 100644 --- a/drivers/net/phy/mscc/mscc_macsec.h +++ b/drivers/net/phy/mscc/mscc_macsec.h @@ -64,7 +64,8 @@ enum macsec_bank { FC_BUFFER = 0x04, HOST_MAC = 0x05, LINE_MAC = 0x06, - IP_1588 = 0x0e, + PROC_0 = 0x0e, + PROC_2 = 0x0f, MACSEC_INGR = 0x38, MACSEC_EGR = 0x3c, }; diff --git a/drivers/net/phy/mscc/mscc_main.c b/drivers/net/phy/mscc/mscc_main.c index acddef79f4e8..c8aa6d905d8e 100644 --- a/drivers/net/phy/mscc/mscc_main.c +++ b/drivers/net/phy/mscc/mscc_main.c @@ -1347,6 +1347,8 @@ static int vsc8584_config_init(struct phy_device *phydev) else vsc8531->base_addr = phydev->mdio.addr - addr; + vsc8531->addr = addr; + /* Some parts of the init sequence are identical for every PHY in the * package. Some parts are modifying the GPIO register bank which is a * set of registers that are affecting all PHYs, a few resetting the @@ -1771,6 +1773,8 @@ static int vsc8514_config_init(struct phy_device *phydev) else vsc8531->base_addr = phydev->mdio.addr - addr; + vsc8531->addr = addr; + /* Some parts of the init sequence are identical for every PHY in the * package. Some parts are modifying the GPIO register bank which is a * set of registers that are affecting all PHYs, a few resetting the diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index ac2784192472..697c74deb222 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1233,7 +1233,7 @@ int phy_sfp_probe(struct phy_device *phydev, const struct sfp_upstream_ops *ops) { struct sfp_bus *bus; - int ret; + int ret = 0; if (phydev->mdio.dev.fwnode) { bus = sfp_bus_find_fwnode(phydev->mdio.dev.fwnode); @@ -1245,7 +1245,7 @@ int phy_sfp_probe(struct phy_device *phydev, ret = sfp_bus_add_upstream(bus, phydev, ops); sfp_bus_put(bus); } - return 0; + return ret; } EXPORT_SYMBOL(phy_sfp_probe); diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 0cdb2ce47645..a657943c9f01 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -815,14 +815,21 @@ static const struct usb_device_id products[] = { .driver_info = 0, }, -/* Microsoft Surface 3 dock (based on Realtek RTL8153) */ +/* Microsoft Surface Ethernet Adapter (based on Realtek RTL8153) */ { USB_DEVICE_AND_INTERFACE_INFO(MICROSOFT_VENDOR_ID, 0x07c6, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = 0, }, - /* TP-LINK UE300 USB 3.0 Ethernet Adapters (based on Realtek RTL8153) */ +/* Microsoft Surface Ethernet Adapter (based on Realtek RTL8153B) */ +{ + USB_DEVICE_AND_INTERFACE_INFO(MICROSOFT_VENDOR_ID, 0x0927, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), + .driver_info = 0, +}, + +/* TP-LINK UE300 USB 3.0 Ethernet Adapters (based on Realtek RTL8153) */ { USB_DEVICE_AND_INTERFACE_INFO(TPLINK_VENDOR_ID, 0x0601, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 8f8d9883d363..c8c873a613b6 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -6880,6 +6880,7 @@ static const struct usb_device_id rtl8152_table[] = { {REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8153)}, {REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x07ab)}, {REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x07c6)}, + {REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x0927)}, {REALTEK_USB_DEVICE(VENDOR_ID_SAMSUNG, 0xa101)}, {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x304f)}, {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x3062)}, diff --git a/drivers/net/wireguard/messages.h b/drivers/net/wireguard/messages.h index b8a7b9ce32ba..208da72673fc 100644 --- a/drivers/net/wireguard/messages.h +++ b/drivers/net/wireguard/messages.h @@ -32,7 +32,7 @@ enum cookie_values { }; enum counter_values { - COUNTER_BITS_TOTAL = 2048, + COUNTER_BITS_TOTAL = 8192, COUNTER_REDUNDANT_BITS = BITS_PER_LONG, COUNTER_WINDOW_SIZE = COUNTER_BITS_TOTAL - COUNTER_REDUNDANT_BITS }; diff --git a/drivers/net/wireguard/noise.c b/drivers/net/wireguard/noise.c index 708dc61c974f..626433690abb 100644 --- a/drivers/net/wireguard/noise.c +++ b/drivers/net/wireguard/noise.c @@ -104,6 +104,7 @@ static struct noise_keypair *keypair_create(struct wg_peer *peer) if (unlikely(!keypair)) return NULL; + spin_lock_init(&keypair->receiving_counter.lock); keypair->internal_id = atomic64_inc_return(&keypair_counter); keypair->entry.type = INDEX_HASHTABLE_KEYPAIR; keypair->entry.peer = peer; @@ -358,25 +359,16 @@ out: memzero_explicit(output, BLAKE2S_HASH_SIZE + 1); } -static void symmetric_key_init(struct noise_symmetric_key *key) -{ - spin_lock_init(&key->counter.receive.lock); - atomic64_set(&key->counter.counter, 0); - memset(key->counter.receive.backtrack, 0, - sizeof(key->counter.receive.backtrack)); - key->birthdate = ktime_get_coarse_boottime_ns(); - key->is_valid = true; -} - static void derive_keys(struct noise_symmetric_key *first_dst, struct noise_symmetric_key *second_dst, const u8 chaining_key[NOISE_HASH_LEN]) { + u64 birthdate = ktime_get_coarse_boottime_ns(); kdf(first_dst->key, second_dst->key, NULL, NULL, NOISE_SYMMETRIC_KEY_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, 0, chaining_key); - symmetric_key_init(first_dst); - symmetric_key_init(second_dst); + first_dst->birthdate = second_dst->birthdate = birthdate; + first_dst->is_valid = second_dst->is_valid = true; } static bool __must_check mix_dh(u8 chaining_key[NOISE_HASH_LEN], @@ -715,6 +707,7 @@ wg_noise_handshake_consume_response(struct message_handshake_response *src, u8 e[NOISE_PUBLIC_KEY_LEN]; u8 ephemeral_private[NOISE_PUBLIC_KEY_LEN]; u8 static_private[NOISE_PUBLIC_KEY_LEN]; + u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN]; down_read(&wg->static_identity.lock); @@ -733,6 +726,8 @@ wg_noise_handshake_consume_response(struct message_handshake_response *src, memcpy(chaining_key, handshake->chaining_key, NOISE_HASH_LEN); memcpy(ephemeral_private, handshake->ephemeral_private, NOISE_PUBLIC_KEY_LEN); + memcpy(preshared_key, handshake->preshared_key, + NOISE_SYMMETRIC_KEY_LEN); up_read(&handshake->lock); if (state != HANDSHAKE_CREATED_INITIATION) @@ -750,7 +745,7 @@ wg_noise_handshake_consume_response(struct message_handshake_response *src, goto fail; /* psk */ - mix_psk(chaining_key, hash, key, handshake->preshared_key); + mix_psk(chaining_key, hash, key, preshared_key); /* {} */ if (!message_decrypt(NULL, src->encrypted_nothing, @@ -783,6 +778,7 @@ out: memzero_explicit(chaining_key, NOISE_HASH_LEN); memzero_explicit(ephemeral_private, NOISE_PUBLIC_KEY_LEN); memzero_explicit(static_private, NOISE_PUBLIC_KEY_LEN); + memzero_explicit(preshared_key, NOISE_SYMMETRIC_KEY_LEN); up_read(&wg->static_identity.lock); return ret_peer; } diff --git a/drivers/net/wireguard/noise.h b/drivers/net/wireguard/noise.h index f532d59d3f19..c527253dba80 100644 --- a/drivers/net/wireguard/noise.h +++ b/drivers/net/wireguard/noise.h @@ -15,18 +15,14 @@ #include <linux/mutex.h> #include <linux/kref.h> -union noise_counter { - struct { - u64 counter; - unsigned long backtrack[COUNTER_BITS_TOTAL / BITS_PER_LONG]; - spinlock_t lock; - } receive; - atomic64_t counter; +struct noise_replay_counter { + u64 counter; + spinlock_t lock; + unsigned long backtrack[COUNTER_BITS_TOTAL / BITS_PER_LONG]; }; struct noise_symmetric_key { u8 key[NOISE_SYMMETRIC_KEY_LEN]; - union noise_counter counter; u64 birthdate; bool is_valid; }; @@ -34,7 +30,9 @@ struct noise_symmetric_key { struct noise_keypair { struct index_hashtable_entry entry; struct noise_symmetric_key sending; + atomic64_t sending_counter; struct noise_symmetric_key receiving; + struct noise_replay_counter receiving_counter; __le32 remote_index; bool i_am_the_initiator; struct kref refcount; diff --git a/drivers/net/wireguard/queueing.h b/drivers/net/wireguard/queueing.h index 3432232afe06..c58df439dbbe 100644 --- a/drivers/net/wireguard/queueing.h +++ b/drivers/net/wireguard/queueing.h @@ -87,12 +87,20 @@ static inline bool wg_check_packet_protocol(struct sk_buff *skb) return real_protocol && skb->protocol == real_protocol; } -static inline void wg_reset_packet(struct sk_buff *skb) +static inline void wg_reset_packet(struct sk_buff *skb, bool encapsulating) { + u8 l4_hash = skb->l4_hash; + u8 sw_hash = skb->sw_hash; + u32 hash = skb->hash; skb_scrub_packet(skb, true); memset(&skb->headers_start, 0, offsetof(struct sk_buff, headers_end) - offsetof(struct sk_buff, headers_start)); + if (encapsulating) { + skb->l4_hash = l4_hash; + skb->sw_hash = sw_hash; + skb->hash = hash; + } skb->queue_mapping = 0; skb->nohdr = 0; skb->peeked = 0; diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c index 3bb5b9ae7cd1..91438144e4f7 100644 --- a/drivers/net/wireguard/receive.c +++ b/drivers/net/wireguard/receive.c @@ -245,20 +245,20 @@ static void keep_key_fresh(struct wg_peer *peer) } } -static bool decrypt_packet(struct sk_buff *skb, struct noise_symmetric_key *key) +static bool decrypt_packet(struct sk_buff *skb, struct noise_keypair *keypair) { struct scatterlist sg[MAX_SKB_FRAGS + 8]; struct sk_buff *trailer; unsigned int offset; int num_frags; - if (unlikely(!key)) + if (unlikely(!keypair)) return false; - if (unlikely(!READ_ONCE(key->is_valid) || - wg_birthdate_has_expired(key->birthdate, REJECT_AFTER_TIME) || - key->counter.receive.counter >= REJECT_AFTER_MESSAGES)) { - WRITE_ONCE(key->is_valid, false); + if (unlikely(!READ_ONCE(keypair->receiving.is_valid) || + wg_birthdate_has_expired(keypair->receiving.birthdate, REJECT_AFTER_TIME) || + keypair->receiving_counter.counter >= REJECT_AFTER_MESSAGES)) { + WRITE_ONCE(keypair->receiving.is_valid, false); return false; } @@ -283,7 +283,7 @@ static bool decrypt_packet(struct sk_buff *skb, struct noise_symmetric_key *key) if (!chacha20poly1305_decrypt_sg_inplace(sg, skb->len, NULL, 0, PACKET_CB(skb)->nonce, - key->key)) + keypair->receiving.key)) return false; /* Another ugly situation of pushing and pulling the header so as to @@ -298,41 +298,41 @@ static bool decrypt_packet(struct sk_buff *skb, struct noise_symmetric_key *key) } /* This is RFC6479, a replay detection bitmap algorithm that avoids bitshifts */ -static bool counter_validate(union noise_counter *counter, u64 their_counter) +static bool counter_validate(struct noise_replay_counter *counter, u64 their_counter) { unsigned long index, index_current, top, i; bool ret = false; - spin_lock_bh(&counter->receive.lock); + spin_lock_bh(&counter->lock); - if (unlikely(counter->receive.counter >= REJECT_AFTER_MESSAGES + 1 || + if (unlikely(counter->counter >= REJECT_AFTER_MESSAGES + 1 || their_counter >= REJECT_AFTER_MESSAGES)) goto out; ++their_counter; if (unlikely((COUNTER_WINDOW_SIZE + their_counter) < - counter->receive.counter)) + counter->counter)) goto out; index = their_counter >> ilog2(BITS_PER_LONG); - if (likely(their_counter > counter->receive.counter)) { - index_current = counter->receive.counter >> ilog2(BITS_PER_LONG); + if (likely(their_counter > counter->counter)) { + index_current = counter->counter >> ilog2(BITS_PER_LONG); top = min_t(unsigned long, index - index_current, COUNTER_BITS_TOTAL / BITS_PER_LONG); for (i = 1; i <= top; ++i) - counter->receive.backtrack[(i + index_current) & + counter->backtrack[(i + index_current) & ((COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1)] = 0; - counter->receive.counter = their_counter; + counter->counter = their_counter; } index &= (COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1; ret = !test_and_set_bit(their_counter & (BITS_PER_LONG - 1), - &counter->receive.backtrack[index]); + &counter->backtrack[index]); out: - spin_unlock_bh(&counter->receive.lock); + spin_unlock_bh(&counter->lock); return ret; } @@ -472,19 +472,19 @@ int wg_packet_rx_poll(struct napi_struct *napi, int budget) if (unlikely(state != PACKET_STATE_CRYPTED)) goto next; - if (unlikely(!counter_validate(&keypair->receiving.counter, + if (unlikely(!counter_validate(&keypair->receiving_counter, PACKET_CB(skb)->nonce))) { net_dbg_ratelimited("%s: Packet has invalid nonce %llu (max %llu)\n", peer->device->dev->name, PACKET_CB(skb)->nonce, - keypair->receiving.counter.receive.counter); + keypair->receiving_counter.counter); goto next; } if (unlikely(wg_socket_endpoint_from_skb(&endpoint, skb))) goto next; - wg_reset_packet(skb); + wg_reset_packet(skb, false); wg_packet_consume_data_done(peer, skb, &endpoint); free = false; @@ -511,8 +511,8 @@ void wg_packet_decrypt_worker(struct work_struct *work) struct sk_buff *skb; while ((skb = ptr_ring_consume_bh(&queue->ring)) != NULL) { - enum packet_state state = likely(decrypt_packet(skb, - &PACKET_CB(skb)->keypair->receiving)) ? + enum packet_state state = + likely(decrypt_packet(skb, PACKET_CB(skb)->keypair)) ? PACKET_STATE_CRYPTED : PACKET_STATE_DEAD; wg_queue_enqueue_per_peer_napi(skb, state); if (need_resched()) diff --git a/drivers/net/wireguard/selftest/counter.c b/drivers/net/wireguard/selftest/counter.c index f4fbb9072ed7..ec3c156bf91b 100644 --- a/drivers/net/wireguard/selftest/counter.c +++ b/drivers/net/wireguard/selftest/counter.c @@ -6,18 +6,24 @@ #ifdef DEBUG bool __init wg_packet_counter_selftest(void) { + struct noise_replay_counter *counter; unsigned int test_num = 0, i; - union noise_counter counter; bool success = true; -#define T_INIT do { \ - memset(&counter, 0, sizeof(union noise_counter)); \ - spin_lock_init(&counter.receive.lock); \ + counter = kmalloc(sizeof(*counter), GFP_KERNEL); + if (unlikely(!counter)) { + pr_err("nonce counter self-test malloc: FAIL\n"); + return false; + } + +#define T_INIT do { \ + memset(counter, 0, sizeof(*counter)); \ + spin_lock_init(&counter->lock); \ } while (0) #define T_LIM (COUNTER_WINDOW_SIZE + 1) #define T(n, v) do { \ ++test_num; \ - if (counter_validate(&counter, n) != (v)) { \ + if (counter_validate(counter, n) != (v)) { \ pr_err("nonce counter self-test %u: FAIL\n", \ test_num); \ success = false; \ @@ -99,6 +105,7 @@ bool __init wg_packet_counter_selftest(void) if (success) pr_info("nonce counter self-tests: pass\n"); + kfree(counter); return success; } #endif diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c index 6687db699803..f74b9341ab0f 100644 --- a/drivers/net/wireguard/send.c +++ b/drivers/net/wireguard/send.c @@ -129,7 +129,7 @@ static void keep_key_fresh(struct wg_peer *peer) rcu_read_lock_bh(); keypair = rcu_dereference_bh(peer->keypairs.current_keypair); send = keypair && READ_ONCE(keypair->sending.is_valid) && - (atomic64_read(&keypair->sending.counter.counter) > REKEY_AFTER_MESSAGES || + (atomic64_read(&keypair->sending_counter) > REKEY_AFTER_MESSAGES || (keypair->i_am_the_initiator && wg_birthdate_has_expired(keypair->sending.birthdate, REKEY_AFTER_TIME))); rcu_read_unlock_bh(); @@ -167,6 +167,11 @@ static bool encrypt_packet(struct sk_buff *skb, struct noise_keypair *keypair) struct sk_buff *trailer; int num_frags; + /* Force hash calculation before encryption so that flow analysis is + * consistent over the inner packet. + */ + skb_get_hash(skb); + /* Calculate lengths. */ padding_len = calculate_skb_padding(skb); trailer_len = padding_len + noise_encrypted_len(0); @@ -295,7 +300,7 @@ void wg_packet_encrypt_worker(struct work_struct *work) skb_list_walk_safe(first, skb, next) { if (likely(encrypt_packet(skb, PACKET_CB(first)->keypair))) { - wg_reset_packet(skb); + wg_reset_packet(skb, true); } else { state = PACKET_STATE_DEAD; break; @@ -344,7 +349,6 @@ void wg_packet_purge_staged_packets(struct wg_peer *peer) void wg_packet_send_staged_packets(struct wg_peer *peer) { - struct noise_symmetric_key *key; struct noise_keypair *keypair; struct sk_buff_head packets; struct sk_buff *skb; @@ -364,10 +368,9 @@ void wg_packet_send_staged_packets(struct wg_peer *peer) rcu_read_unlock_bh(); if (unlikely(!keypair)) goto out_nokey; - key = &keypair->sending; - if (unlikely(!READ_ONCE(key->is_valid))) + if (unlikely(!READ_ONCE(keypair->sending.is_valid))) goto out_nokey; - if (unlikely(wg_birthdate_has_expired(key->birthdate, + if (unlikely(wg_birthdate_has_expired(keypair->sending.birthdate, REJECT_AFTER_TIME))) goto out_invalid; @@ -382,7 +385,7 @@ void wg_packet_send_staged_packets(struct wg_peer *peer) */ PACKET_CB(skb)->ds = ip_tunnel_ecn_encap(0, ip_hdr(skb), skb); PACKET_CB(skb)->nonce = - atomic64_inc_return(&key->counter.counter) - 1; + atomic64_inc_return(&keypair->sending_counter) - 1; if (unlikely(PACKET_CB(skb)->nonce >= REJECT_AFTER_MESSAGES)) goto out_invalid; } @@ -394,7 +397,7 @@ void wg_packet_send_staged_packets(struct wg_peer *peer) return; out_invalid: - WRITE_ONCE(key->is_valid, false); + WRITE_ONCE(keypair->sending.is_valid, false); out_nokey: wg_noise_keypair_put(keypair, false); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 6744c0281ffb..29971c25dba4 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -1092,6 +1092,10 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) iwl_trans->cfg = &iwl_ax101_cfg_quz_hr; else if (iwl_trans->cfg == &iwl_ax201_cfg_qu_hr) iwl_trans->cfg = &iwl_ax201_cfg_quz_hr; + else if (iwl_trans->cfg == &killer1650s_2ax_cfg_qu_b0_hr_b0) + iwl_trans->cfg = &iwl_ax1650s_cfg_quz_hr; + else if (iwl_trans->cfg == &killer1650i_2ax_cfg_qu_b0_hr_b0) + iwl_trans->cfg = &iwl_ax1650i_cfg_quz_hr; } #endif diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c index a587767b6ae1..37d1bba57b00 100644 --- a/fs/afs/fs_probe.c +++ b/fs/afs/fs_probe.c @@ -32,9 +32,8 @@ void afs_fileserver_probe_result(struct afs_call *call) struct afs_server *server = call->server; unsigned int server_index = call->server_index; unsigned int index = call->addr_ix; - unsigned int rtt = UINT_MAX; + unsigned int rtt_us = 0; bool have_result = false; - u64 _rtt; int ret = call->error; _enter("%pU,%u", &server->uuid, index); @@ -93,15 +92,9 @@ responded: } } - /* Get the RTT and scale it to fit into a 32-bit value that represents - * over a minute of time so that we can access it with one instruction - * on a 32-bit system. - */ - _rtt = rxrpc_kernel_get_rtt(call->net->socket, call->rxcall); - _rtt /= 64; - rtt = (_rtt > UINT_MAX) ? UINT_MAX : _rtt; - if (rtt < server->probe.rtt) { - server->probe.rtt = rtt; + rtt_us = rxrpc_kernel_get_srtt(call->net->socket, call->rxcall); + if (rtt_us < server->probe.rtt) { + server->probe.rtt = rtt_us; alist->preferred = index; have_result = true; } @@ -113,8 +106,7 @@ out: spin_unlock(&server->probe_lock); _debug("probe [%u][%u] %pISpc rtt=%u ret=%d", - server_index, index, &alist->addrs[index].transport, - (unsigned int)rtt, ret); + server_index, index, &alist->addrs[index].transport, rtt_us, ret); have_result |= afs_fs_probe_done(server); if (have_result) diff --git a/fs/afs/vl_probe.c b/fs/afs/vl_probe.c index 858498cc1b05..e3aa013c2177 100644 --- a/fs/afs/vl_probe.c +++ b/fs/afs/vl_probe.c @@ -31,10 +31,9 @@ void afs_vlserver_probe_result(struct afs_call *call) struct afs_addr_list *alist = call->alist; struct afs_vlserver *server = call->vlserver; unsigned int server_index = call->server_index; + unsigned int rtt_us = 0; unsigned int index = call->addr_ix; - unsigned int rtt = UINT_MAX; bool have_result = false; - u64 _rtt; int ret = call->error; _enter("%s,%u,%u,%d,%d", server->name, server_index, index, ret, call->abort_code); @@ -93,15 +92,9 @@ responded: } } - /* Get the RTT and scale it to fit into a 32-bit value that represents - * over a minute of time so that we can access it with one instruction - * on a 32-bit system. - */ - _rtt = rxrpc_kernel_get_rtt(call->net->socket, call->rxcall); - _rtt /= 64; - rtt = (_rtt > UINT_MAX) ? UINT_MAX : _rtt; - if (rtt < server->probe.rtt) { - server->probe.rtt = rtt; + rtt_us = rxrpc_kernel_get_srtt(call->net->socket, call->rxcall); + if (rtt_us < server->probe.rtt) { + server->probe.rtt = rtt_us; alist->preferred = index; have_result = true; } @@ -113,8 +106,7 @@ out: spin_unlock(&server->probe_lock); _debug("probe [%u][%u] %pISpc rtt=%u ret=%d", - server_index, index, &alist->addrs[index].transport, - (unsigned int)rtt, ret); + server_index, index, &alist->addrs[index].transport, rtt_us, ret); have_result |= afs_vl_probe_done(server); if (have_result) { diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 6f8f79ef829b..8397b6558dc7 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -213,6 +213,12 @@ enum mlx5_port_status { MLX5_PORT_DOWN = 2, }; +enum mlx5_cmdif_state { + MLX5_CMDIF_STATE_UNINITIALIZED, + MLX5_CMDIF_STATE_UP, + MLX5_CMDIF_STATE_DOWN, +}; + struct mlx5_cmd_first { __be32 data[4]; }; @@ -258,6 +264,7 @@ struct mlx5_cmd_stats { struct mlx5_cmd { struct mlx5_nb nb; + enum mlx5_cmdif_state state; void *cmd_alloc_buf; dma_addr_t alloc_dma; int alloc_size; @@ -284,6 +291,7 @@ struct mlx5_cmd { struct semaphore sem; struct semaphore pages_sem; int mode; + u16 allowed_opcode; struct mlx5_cmd_work_ent *ent_arr[MLX5_MAX_COMMANDS]; struct dma_pool *pool; struct mlx5_cmd_debug dbg; @@ -743,6 +751,7 @@ struct mlx5_cmd_work_ent { struct delayed_work cb_timeout_work; void *context; int idx; + struct completion handling; struct completion done; struct mlx5_cmd *cmd; struct work_struct work; @@ -874,10 +883,17 @@ mlx5_frag_buf_get_idx_last_contig_stride(struct mlx5_frag_buf_ctrl *fbc, u32 ix) return min_t(u32, last_frag_stride_idx - fbc->strides_offset, fbc->sz_m1); } +enum { + CMD_ALLOWED_OPCODE_ALL, +}; + int mlx5_cmd_init(struct mlx5_core_dev *dev); void mlx5_cmd_cleanup(struct mlx5_core_dev *dev); +void mlx5_cmd_set_state(struct mlx5_core_dev *dev, + enum mlx5_cmdif_state cmdif_state); void mlx5_cmd_use_events(struct mlx5_core_dev *dev); void mlx5_cmd_use_polling(struct mlx5_core_dev *dev); +void mlx5_cmd_allowed_opcode(struct mlx5_core_dev *dev, u16 opcode); struct mlx5_async_ctx { struct mlx5_core_dev *dev; diff --git a/include/net/act_api.h b/include/net/act_api.h index c24d7643548e..124bd139886c 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -75,7 +75,8 @@ static inline void tcf_tm_dump(struct tcf_t *dtm, const struct tcf_t *stm) { dtm->install = jiffies_to_clock_t(jiffies - stm->install); dtm->lastuse = jiffies_to_clock_t(jiffies - stm->lastuse); - dtm->firstuse = jiffies_to_clock_t(jiffies - stm->firstuse); + dtm->firstuse = stm->firstuse ? + jiffies_to_clock_t(jiffies - stm->firstuse) : 0; dtm->expires = jiffies_to_clock_t(stm->expires); } diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index 04e97bab6f28..ab988940bf04 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -59,7 +59,7 @@ bool rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *, void rxrpc_kernel_end_call(struct socket *, struct rxrpc_call *); void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *, struct sockaddr_rxrpc *); -u64 rxrpc_kernel_get_rtt(struct socket *, struct rxrpc_call *); +u32 rxrpc_kernel_get_srtt(struct socket *, struct rxrpc_call *); int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t, rxrpc_user_attach_call_t, unsigned long, gfp_t, unsigned int); diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 59e0d4e99f94..b219a8fe0950 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -257,7 +257,6 @@ struct fib_dump_filter { u32 table_id; /* filter_set is an optimization that an entry is set */ bool filter_set; - bool dump_all_families; bool dump_routes; bool dump_exceptions; unsigned char protocol; diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 191fe447f990..ba9efdc848f9 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -1112,18 +1112,17 @@ TRACE_EVENT(rxrpc_rtt_tx, TRACE_EVENT(rxrpc_rtt_rx, TP_PROTO(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial, - s64 rtt, u8 nr, s64 avg), + u32 rtt, u32 rto), - TP_ARGS(call, why, send_serial, resp_serial, rtt, nr, avg), + TP_ARGS(call, why, send_serial, resp_serial, rtt, rto), TP_STRUCT__entry( __field(unsigned int, call ) __field(enum rxrpc_rtt_rx_trace, why ) - __field(u8, nr ) __field(rxrpc_serial_t, send_serial ) __field(rxrpc_serial_t, resp_serial ) - __field(s64, rtt ) - __field(u64, avg ) + __field(u32, rtt ) + __field(u32, rto ) ), TP_fast_assign( @@ -1132,18 +1131,16 @@ TRACE_EVENT(rxrpc_rtt_rx, __entry->send_serial = send_serial; __entry->resp_serial = resp_serial; __entry->rtt = rtt; - __entry->nr = nr; - __entry->avg = avg; + __entry->rto = rto; ), - TP_printk("c=%08x %s sr=%08x rr=%08x rtt=%lld nr=%u avg=%lld", + TP_printk("c=%08x %s sr=%08x rr=%08x rtt=%u rto=%u", __entry->call, __print_symbolic(__entry->why, rxrpc_rtt_rx_traces), __entry->send_serial, __entry->resp_serial, __entry->rtt, - __entry->nr, - __entry->avg) + __entry->rto) ); TRACE_EVENT(rxrpc_timer, @@ -1544,6 +1541,41 @@ TRACE_EVENT(rxrpc_notify_socket, __entry->serial) ); +TRACE_EVENT(rxrpc_rx_discard_ack, + TP_PROTO(unsigned int debug_id, rxrpc_serial_t serial, + rxrpc_seq_t first_soft_ack, rxrpc_seq_t call_ackr_first, + rxrpc_seq_t prev_pkt, rxrpc_seq_t call_ackr_prev), + + TP_ARGS(debug_id, serial, first_soft_ack, call_ackr_first, + prev_pkt, call_ackr_prev), + + TP_STRUCT__entry( + __field(unsigned int, debug_id ) + __field(rxrpc_serial_t, serial ) + __field(rxrpc_seq_t, first_soft_ack) + __field(rxrpc_seq_t, call_ackr_first) + __field(rxrpc_seq_t, prev_pkt) + __field(rxrpc_seq_t, call_ackr_prev) + ), + + TP_fast_assign( + __entry->debug_id = debug_id; + __entry->serial = serial; + __entry->first_soft_ack = first_soft_ack; + __entry->call_ackr_first = call_ackr_first; + __entry->prev_pkt = prev_pkt; + __entry->call_ackr_prev = call_ackr_prev; + ), + + TP_printk("c=%08x r=%08x %08x<%08x %08x<%08x", + __entry->debug_id, + __entry->serial, + __entry->first_soft_ack, + __entry->call_ackr_first, + __entry->prev_pkt, + __entry->call_ackr_prev) + ); + #endif /* _TRACE_RXRPC_H */ /* This part must be outside protection */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 2843bbba9ca1..4e6dee19a668 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -623,9 +623,20 @@ static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma) mutex_lock(&map->freeze_mutex); - if ((vma->vm_flags & VM_WRITE) && map->frozen) { - err = -EPERM; - goto out; + if (vma->vm_flags & VM_WRITE) { + if (map->frozen) { + err = -EPERM; + goto out; + } + /* map is meant to be read-only, so do not allow mapping as + * writable, because it's possible to leak a writable page + * reference and allows user-space to still modify it after + * freezing, while verifier will assume contents do not change + */ + if (map->map_flags & BPF_F_RDONLY_PROG) { + err = -EACCES; + goto out; + } } /* set default open/close callbacks */ diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index ff57ea89c27e..fd91cd34f25e 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -635,8 +635,10 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname, break; case SO_BINDTODEVICE: - if (optlen > IFNAMSIZ) - optlen = IFNAMSIZ; + if (optlen > IFNAMSIZ - 1) + optlen = IFNAMSIZ - 1; + + memset(devname, 0, sizeof(devname)); if (copy_from_user(devname, optval, optlen)) { res = -EFAULT; diff --git a/net/core/dev.c b/net/core/dev.c index 6d327b7aa813..2d8aceee4284 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4988,11 +4988,12 @@ static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev, return 0; } -static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc, +static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc, struct packet_type **ppt_prev) { struct packet_type *ptype, *pt_prev; rx_handler_func_t *rx_handler; + struct sk_buff *skb = *pskb; struct net_device *orig_dev; bool deliver_exact = false; int ret = NET_RX_DROP; @@ -5023,8 +5024,10 @@ another_round: ret2 = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog), skb); preempt_enable(); - if (ret2 != XDP_PASS) - return NET_RX_DROP; + if (ret2 != XDP_PASS) { + ret = NET_RX_DROP; + goto out; + } skb_reset_mac_len(skb); } @@ -5174,6 +5177,13 @@ drop: } out: + /* The invariant here is that if *ppt_prev is not NULL + * then skb should also be non-NULL. + * + * Apparently *ppt_prev assignment above holds this invariant due to + * skb dereferencing near it. + */ + *pskb = skb; return ret; } @@ -5183,7 +5193,7 @@ static int __netif_receive_skb_one_core(struct sk_buff *skb, bool pfmemalloc) struct packet_type *pt_prev = NULL; int ret; - ret = __netif_receive_skb_core(skb, pfmemalloc, &pt_prev); + ret = __netif_receive_skb_core(&skb, pfmemalloc, &pt_prev); if (pt_prev) ret = INDIRECT_CALL_INET(pt_prev->func, ipv6_rcv, ip_rcv, skb, skb->dev, pt_prev, orig_dev); @@ -5261,7 +5271,7 @@ static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemallo struct packet_type *pt_prev = NULL; skb_list_del_init(skb); - __netif_receive_skb_core(skb, pfmemalloc, &pt_prev); + __netif_receive_skb_core(&skb, pfmemalloc, &pt_prev); if (!pt_prev) continue; if (pt_curr != pt_prev || od_curr != orig_dev) { diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 3eff84824c8b..5dceed467f64 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -160,12 +160,10 @@ out: return ret; } -int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr) +static int flow_dissector_bpf_prog_detach(struct net *net) { struct bpf_prog *attached; - struct net *net; - net = current->nsproxy->net_ns; mutex_lock(&flow_dissector_mutex); attached = rcu_dereference_protected(net->flow_dissector_prog, lockdep_is_held(&flow_dissector_mutex)); @@ -179,6 +177,24 @@ int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr) return 0; } +int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr) +{ + return flow_dissector_bpf_prog_detach(current->nsproxy->net_ns); +} + +static void __net_exit flow_dissector_pernet_pre_exit(struct net *net) +{ + /* We're not racing with attach/detach because there are no + * references to netns left when pre_exit gets called. + */ + if (rcu_access_pointer(net->flow_dissector_prog)) + flow_dissector_bpf_prog_detach(net); +} + +static struct pernet_operations flow_dissector_pernet_ops __net_initdata = { + .pre_exit = flow_dissector_pernet_pre_exit, +}; + /** * __skb_flow_get_ports - extract the upper layer ports and return them * @skb: sk_buff to extract the ports from @@ -1836,7 +1852,7 @@ static int __init init_default_flow_dissectors(void) skb_flow_dissector_init(&flow_keys_basic_dissector, flow_keys_basic_dissector_keys, ARRAY_SIZE(flow_keys_basic_dissector_keys)); - return 0; -} + return register_pernet_subsys(&flow_dissector_pernet_ops); +} core_initcall(init_default_flow_dissectors); diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c index b5705cba8318..d6619edd53e5 100644 --- a/net/dsa/tag_mtk.c +++ b/net/dsa/tag_mtk.c @@ -15,6 +15,7 @@ #define MTK_HDR_XMIT_TAGGED_TPID_8100 1 #define MTK_HDR_RECV_SOURCE_PORT_MASK GENMASK(2, 0) #define MTK_HDR_XMIT_DP_BIT_MASK GENMASK(5, 0) +#define MTK_HDR_XMIT_SA_DIS BIT(6) static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, struct net_device *dev) @@ -22,6 +23,9 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, struct dsa_port *dp = dsa_slave_to_port(dev); u8 *mtk_tag; bool is_vlan_skb = true; + unsigned char *dest = eth_hdr(skb)->h_dest; + bool is_multicast_skb = is_multicast_ether_addr(dest) && + !is_broadcast_ether_addr(dest); /* Build the special tag after the MAC Source Address. If VLAN header * is present, it's required that VLAN header and special tag is @@ -47,6 +51,10 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, MTK_HDR_XMIT_UNTAGGED; mtk_tag[1] = (1 << dp->index) & MTK_HDR_XMIT_DP_BIT_MASK; + /* Disable SA learning for multicast frames */ + if (unlikely(is_multicast_skb)) + mtk_tag[1] |= MTK_HDR_XMIT_SA_DIS; + /* Tag control information is kept for 802.1Q */ if (!is_vlan_skb) { mtk_tag[2] = 0; @@ -61,6 +69,9 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev, { int port; __be16 *phdr, hdr; + unsigned char *dest = eth_hdr(skb)->h_dest; + bool is_multicast_skb = is_multicast_ether_addr(dest) && + !is_broadcast_ether_addr(dest); if (unlikely(!pskb_may_pull(skb, MTK_HDR_LEN))) return NULL; @@ -86,6 +97,10 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev, if (!skb->dev) return NULL; + /* Only unicast or broadcast frames are offloaded */ + if (likely(!is_multicast_skb)) + skb->offload_fwd_mark = 1; + return skb; } diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 0c772318c023..ed5357210193 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -342,7 +342,7 @@ static int ethnl_default_doit(struct sk_buff *skb, struct genl_info *info) ret = ops->reply_size(req_info, reply_data); if (ret < 0) goto err_cleanup; - reply_len = ret; + reply_len = ret + ethnl_reply_header_size(); ret = -ENOMEM; rskb = ethnl_reply_init(reply_len, req_info->dev, ops->reply_cmd, ops->hdr_attr, info, &reply_payload); @@ -588,7 +588,7 @@ static void ethnl_default_notify(struct net_device *dev, unsigned int cmd, ret = ops->reply_size(req_info, reply_data); if (ret < 0) goto err_cleanup; - reply_len = ret; + reply_len = ret + ethnl_reply_header_size(); ret = -ENOMEM; skb = genlmsg_new(reply_len, GFP_KERNEL); if (!skb) diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c index 95eae5c68a52..0eed4e4909ab 100644 --- a/net/ethtool/strset.c +++ b/net/ethtool/strset.c @@ -324,7 +324,6 @@ static int strset_reply_size(const struct ethnl_req_info *req_base, int len = 0; int ret; - len += ethnl_reply_header_size(); for (i = 0; i < ETH_SS_COUNT; i++) { const struct strset_info *set_info = &data->sets[i]; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 213be9c050ad..1bf9da3a75f9 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -918,7 +918,6 @@ int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, else filter->dump_exceptions = false; - filter->dump_all_families = (rtm->rtm_family == AF_UNSPEC); filter->flags = rtm->rtm_flags; filter->protocol = rtm->rtm_protocol; filter->rt_type = rtm->rtm_type; @@ -990,7 +989,7 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) if (filter.table_id) { tb = fib_get_table(net, filter.table_id); if (!tb) { - if (filter.dump_all_families) + if (rtnl_msg_family(cb->nlh) != PF_INET) return skb->len; NL_SET_ERR_MSG(cb->extack, "ipv4: FIB table does not exist"); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 5f34eb951627..65c29f2bd89f 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -24,17 +24,19 @@ #include <net/addrconf.h> #if IS_ENABLED(CONFIG_IPV6) -/* match_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses if IPv6 - * only, and any IPv4 addresses if not IPv6 only - * match_wildcard == false: addresses must be exactly the same, i.e. - * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY, - * and 0.0.0.0 equals to 0.0.0.0 only +/* match_sk*_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses + * if IPv6 only, and any IPv4 addresses + * if not IPv6 only + * match_sk*_wildcard == false: addresses must be exactly the same, i.e. + * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY, + * and 0.0.0.0 equals to 0.0.0.0 only */ static bool ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6, const struct in6_addr *sk2_rcv_saddr6, __be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr, bool sk1_ipv6only, bool sk2_ipv6only, - bool match_wildcard) + bool match_sk1_wildcard, + bool match_sk2_wildcard) { int addr_type = ipv6_addr_type(sk1_rcv_saddr6); int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; @@ -44,8 +46,8 @@ static bool ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6, if (!sk2_ipv6only) { if (sk1_rcv_saddr == sk2_rcv_saddr) return true; - if (!sk1_rcv_saddr || !sk2_rcv_saddr) - return match_wildcard; + return (match_sk1_wildcard && !sk1_rcv_saddr) || + (match_sk2_wildcard && !sk2_rcv_saddr); } return false; } @@ -53,11 +55,11 @@ static bool ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6, if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY) return true; - if (addr_type2 == IPV6_ADDR_ANY && match_wildcard && + if (addr_type2 == IPV6_ADDR_ANY && match_sk2_wildcard && !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) return true; - if (addr_type == IPV6_ADDR_ANY && match_wildcard && + if (addr_type == IPV6_ADDR_ANY && match_sk1_wildcard && !(sk1_ipv6only && addr_type2 == IPV6_ADDR_MAPPED)) return true; @@ -69,18 +71,19 @@ static bool ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6, } #endif -/* match_wildcard == true: 0.0.0.0 equals to any IPv4 addresses - * match_wildcard == false: addresses must be exactly the same, i.e. - * 0.0.0.0 only equals to 0.0.0.0 +/* match_sk*_wildcard == true: 0.0.0.0 equals to any IPv4 addresses + * match_sk*_wildcard == false: addresses must be exactly the same, i.e. + * 0.0.0.0 only equals to 0.0.0.0 */ static bool ipv4_rcv_saddr_equal(__be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr, - bool sk2_ipv6only, bool match_wildcard) + bool sk2_ipv6only, bool match_sk1_wildcard, + bool match_sk2_wildcard) { if (!sk2_ipv6only) { if (sk1_rcv_saddr == sk2_rcv_saddr) return true; - if (!sk1_rcv_saddr || !sk2_rcv_saddr) - return match_wildcard; + return (match_sk1_wildcard && !sk1_rcv_saddr) || + (match_sk2_wildcard && !sk2_rcv_saddr); } return false; } @@ -96,10 +99,12 @@ bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, sk2->sk_rcv_saddr, ipv6_only_sock(sk), ipv6_only_sock(sk2), + match_wildcard, match_wildcard); #endif return ipv4_rcv_saddr_equal(sk->sk_rcv_saddr, sk2->sk_rcv_saddr, - ipv6_only_sock(sk2), match_wildcard); + ipv6_only_sock(sk2), match_wildcard, + match_wildcard); } EXPORT_SYMBOL(inet_rcv_saddr_equal); @@ -285,10 +290,10 @@ static inline int sk_reuseport_match(struct inet_bind_bucket *tb, tb->fast_rcv_saddr, sk->sk_rcv_saddr, tb->fast_ipv6_only, - ipv6_only_sock(sk), true); + ipv6_only_sock(sk), true, false); #endif return ipv4_rcv_saddr_equal(tb->fast_rcv_saddr, sk->sk_rcv_saddr, - ipv6_only_sock(sk), true); + ipv6_only_sock(sk), true, false); } /* Obtain a reference to a local port for the given sock, diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 2f01cf6fa0de..678575adaf3b 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -698,7 +698,7 @@ out: rtnl_link_failed: #if IS_ENABLED(CONFIG_MPLS) - xfrm4_tunnel_deregister(&mplsip_handler, AF_INET); + xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS); xfrm_tunnel_mplsip_failed: #endif diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 5c218db2dede..b2363b82b48d 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2613,7 +2613,7 @@ static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) mrt = ipmr_get_table(sock_net(skb->sk), filter.table_id); if (!mrt) { - if (filter.dump_all_families) + if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IPMR) return skb->len; NL_SET_ERR_MSG(cb->extack, "ipv4: MR table does not exist"); diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index fdfca534d094..715e14475220 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -276,6 +276,7 @@ out: return 0; nla_put_failure: + nlmsg_cancel(skb, nlh); return -EMSGSIZE; } @@ -433,7 +434,7 @@ static int nh_check_attr_group(struct net *net, struct nlattr *tb[], if (!valid_group_nh(nh, len, extack)) return -EINVAL; } - for (i = NHA_GROUP + 1; i < __NHA_MAX; ++i) { + for (i = NHA_GROUP_TYPE + 1; i < __NHA_MAX; ++i) { if (!tb[i]) continue; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index fa829f31a3f5..b73f540fa19b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -491,18 +491,16 @@ u32 ip_idents_reserve(u32 hash, int segs) atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ; u32 old = READ_ONCE(*p_tstamp); u32 now = (u32)jiffies; - u32 new, delta = 0; + u32 delta = 0; if (old != now && cmpxchg(p_tstamp, old, now) == old) delta = prandom_u32_max(now - old); - /* Do not use atomic_add_return() as it makes UBSAN unhappy */ - do { - old = (u32)atomic_read(p_id); - new = old + delta + segs; - } while (atomic_cmpxchg(p_id, old, new) != old); - - return new - segs; + /* If UBSAN reports an error there, please make sure your compiler + * supports -fno-strict-overflow before reporting it that was a bug + * in UBSAN, and it has been fixed in GCC-8. + */ + return atomic_add_return(segs + delta, p_id) - segs; } EXPORT_SYMBOL(ip_idents_reserve); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 46ed56719476..20314895509c 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -664,7 +664,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) if (arg.filter.table_id) { tb = fib6_get_table(net, arg.filter.table_id); if (!tb) { - if (arg.filter.dump_all_families) + if (rtnl_msg_family(cb->nlh) != PF_INET6) goto out; NL_SET_ERR_MSG_MOD(cb->extack, "FIB table does not exist"); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 65a54d74acc1..1f4d20e97c07 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -98,7 +98,8 @@ static void ipmr_expire_process(struct timer_list *t); #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES #define ip6mr_for_each_table(mrt, net) \ list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list, \ - lockdep_rtnl_is_held()) + lockdep_rtnl_is_held() || \ + list_empty(&net->ipv6.mr6_tables)) static struct mr_table *ip6mr_mr_table_iter(struct net *net, struct mr_table *mrt) @@ -2502,7 +2503,7 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) mrt = ip6mr_get_table(sock_net(skb->sk), filter.table_id); if (!mrt) { - if (filter.dump_all_families) + if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IP6MR) return skb->len; NL_SET_ERR_MSG_MOD(cb->extack, "MR table does not exist"); diff --git a/net/mptcp/crypto.c b/net/mptcp/crypto.c index c151628bd416..0f5a414a9366 100644 --- a/net/mptcp/crypto.c +++ b/net/mptcp/crypto.c @@ -47,8 +47,6 @@ void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn) void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac) { u8 input[SHA256_BLOCK_SIZE + SHA256_DIGEST_SIZE]; - __be32 mptcp_hashed_key[SHA256_DIGEST_WORDS]; - __be32 *hash_out = (__force __be32 *)hmac; struct sha256_state state; u8 key1be[8]; u8 key2be[8]; @@ -86,11 +84,7 @@ void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac) sha256_init(&state); sha256_update(&state, input, SHA256_BLOCK_SIZE + SHA256_DIGEST_SIZE); - sha256_final(&state, (u8 *)mptcp_hashed_key); - - /* takes only first 160 bits */ - for (i = 0; i < 5; i++) - hash_out[i] = mptcp_hashed_key[i]; + sha256_final(&state, (u8 *)hmac); } #ifdef CONFIG_MPTCP_HMAC_TEST @@ -101,29 +95,29 @@ struct test_cast { }; /* we can't reuse RFC 4231 test vectors, as we have constraint on the - * input and key size, and we truncate the output. + * input and key size. */ static struct test_cast tests[] = { { .key = "0b0b0b0b0b0b0b0b", .msg = "48692054", - .result = "8385e24fb4235ac37556b6b886db106284a1da67", + .result = "8385e24fb4235ac37556b6b886db106284a1da671699f46db1f235ec622dcafa", }, { .key = "aaaaaaaaaaaaaaaa", .msg = "dddddddd", - .result = "2c5e219164ff1dca1c4a92318d847bb6b9d44492", + .result = "2c5e219164ff1dca1c4a92318d847bb6b9d44492984e1eb71aff9022f71046e9", }, { .key = "0102030405060708", .msg = "cdcdcdcd", - .result = "e73b9ba9969969cefb04aa0d6df18ec2fcc075b6", + .result = "e73b9ba9969969cefb04aa0d6df18ec2fcc075b6f23b4d8c4da736a5dbbc6e7d", }, }; static int __init test_mptcp_crypto(void) { - char hmac[20], hmac_hex[41]; + char hmac[32], hmac_hex[65]; u32 nonce1, nonce2; u64 key1, key2; u8 msg[8]; @@ -140,11 +134,11 @@ static int __init test_mptcp_crypto(void) put_unaligned_be32(nonce2, &msg[4]); mptcp_crypto_hmac_sha(key1, key2, msg, 8, hmac); - for (j = 0; j < 20; ++j) + for (j = 0; j < 32; ++j) sprintf(&hmac_hex[j << 1], "%02x", hmac[j] & 0xff); - hmac_hex[40] = 0; + hmac_hex[64] = 0; - if (memcmp(hmac_hex, tests[i].result, 40)) + if (memcmp(hmac_hex, tests[i].result, 64)) pr_err("test %d failed, got %s expected %s", i, hmac_hex, tests[i].result); else diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 45497af23906..7793b6011fa7 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -7,6 +7,7 @@ #define pr_fmt(fmt) "MPTCP: " fmt #include <linux/kernel.h> +#include <crypto/sha.h> #include <net/tcp.h> #include <net/mptcp.h> #include "protocol.h" @@ -535,7 +536,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, static u64 add_addr_generate_hmac(u64 key1, u64 key2, u8 addr_id, struct in_addr *addr) { - u8 hmac[MPTCP_ADDR_HMAC_LEN]; + u8 hmac[SHA256_DIGEST_SIZE]; u8 msg[7]; msg[0] = addr_id; @@ -545,14 +546,14 @@ static u64 add_addr_generate_hmac(u64 key1, u64 key2, u8 addr_id, mptcp_crypto_hmac_sha(key1, key2, msg, 7, hmac); - return get_unaligned_be64(hmac); + return get_unaligned_be64(&hmac[SHA256_DIGEST_SIZE - sizeof(u64)]); } #if IS_ENABLED(CONFIG_MPTCP_IPV6) static u64 add_addr6_generate_hmac(u64 key1, u64 key2, u8 addr_id, struct in6_addr *addr) { - u8 hmac[MPTCP_ADDR_HMAC_LEN]; + u8 hmac[SHA256_DIGEST_SIZE]; u8 msg[19]; msg[0] = addr_id; @@ -562,7 +563,7 @@ static u64 add_addr6_generate_hmac(u64 key1, u64 key2, u8 addr_id, mptcp_crypto_hmac_sha(key1, key2, msg, 19, hmac); - return get_unaligned_be64(hmac); + return get_unaligned_be64(&hmac[SHA256_DIGEST_SIZE - sizeof(u64)]); } #endif diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index e4ca6320ce76..d0803dfb8108 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -81,7 +81,6 @@ /* MPTCP ADD_ADDR flags */ #define MPTCP_ADDR_ECHO BIT(0) -#define MPTCP_ADDR_HMAC_LEN 20 #define MPTCP_ADDR_IPVERSION_4 4 #define MPTCP_ADDR_IPVERSION_6 6 diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 4931a29a6f08..8968b2c065e7 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -10,6 +10,7 @@ #include <linux/module.h> #include <linux/netdevice.h> #include <crypto/algapi.h> +#include <crypto/sha.h> #include <net/sock.h> #include <net/inet_common.h> #include <net/inet_hashtables.h> @@ -89,7 +90,7 @@ static bool subflow_token_join_request(struct request_sock *req, const struct sk_buff *skb) { struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); - u8 hmac[MPTCPOPT_HMAC_LEN]; + u8 hmac[SHA256_DIGEST_SIZE]; struct mptcp_sock *msk; int local_id; @@ -201,7 +202,7 @@ static void subflow_v6_init_req(struct request_sock *req, /* validate received truncated hmac and create hmac for third ACK */ static bool subflow_thmac_valid(struct mptcp_subflow_context *subflow) { - u8 hmac[MPTCPOPT_HMAC_LEN]; + u8 hmac[SHA256_DIGEST_SIZE]; u64 thmac; subflow_generate_hmac(subflow->remote_key, subflow->local_key, @@ -267,6 +268,8 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->ssn_offset = TCP_SKB_CB(skb)->seq; } } else if (subflow->mp_join) { + u8 hmac[SHA256_DIGEST_SIZE]; + pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow, subflow->thmac, subflow->remote_nonce); @@ -279,7 +282,9 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow_generate_hmac(subflow->local_key, subflow->remote_key, subflow->local_nonce, subflow->remote_nonce, - subflow->hmac); + hmac); + + memcpy(subflow->hmac, hmac, MPTCPOPT_HMAC_LEN); if (skb) subflow->ssn_offset = TCP_SKB_CB(skb)->seq; @@ -347,7 +352,7 @@ static bool subflow_hmac_valid(const struct request_sock *req, const struct mptcp_options_received *mp_opt) { const struct mptcp_subflow_request_sock *subflow_req; - u8 hmac[MPTCPOPT_HMAC_LEN]; + u8 hmac[SHA256_DIGEST_SIZE]; struct mptcp_sock *msk; bool ret; @@ -361,7 +366,7 @@ static bool subflow_hmac_valid(const struct request_sock *req, subflow_req->local_nonce, hmac); ret = true; - if (crypto_memneq(hmac, mp_opt->hmac, sizeof(hmac))) + if (crypto_memneq(hmac, mp_opt->hmac, MPTCPOPT_HMAC_LEN)) ret = false; sock_put((struct sock *)msk); diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 7ed31b5e77e4..2d8d6131bc5f 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -854,7 +854,7 @@ static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb, } mutex_unlock(&qrtr_node_lock); - qrtr_local_enqueue(node, skb, type, from, to); + qrtr_local_enqueue(NULL, skb, type, from, to); return 0; } diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile index 6ffb7e9887ce..ddd0f95713a9 100644 --- a/net/rxrpc/Makefile +++ b/net/rxrpc/Makefile @@ -25,6 +25,7 @@ rxrpc-y := \ peer_event.o \ peer_object.o \ recvmsg.o \ + rtt.o \ security.o \ sendmsg.o \ skbuff.o \ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 3eb1ab40ca5c..9fe264bec70c 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -7,6 +7,7 @@ #include <linux/atomic.h> #include <linux/seqlock.h> +#include <linux/win_minmax.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <net/sock.h> @@ -311,11 +312,14 @@ struct rxrpc_peer { #define RXRPC_RTT_CACHE_SIZE 32 spinlock_t rtt_input_lock; /* RTT lock for input routine */ ktime_t rtt_last_req; /* Time of last RTT request */ - u64 rtt; /* Current RTT estimate (in nS) */ - u64 rtt_sum; /* Sum of cache contents */ - u64 rtt_cache[RXRPC_RTT_CACHE_SIZE]; /* Determined RTT cache */ - u8 rtt_cursor; /* next entry at which to insert */ - u8 rtt_usage; /* amount of cache actually used */ + unsigned int rtt_count; /* Number of samples we've got */ + + u32 srtt_us; /* smoothed round trip time << 3 in usecs */ + u32 mdev_us; /* medium deviation */ + u32 mdev_max_us; /* maximal mdev for the last rtt period */ + u32 rttvar_us; /* smoothed mdev_max */ + u32 rto_j; /* Retransmission timeout in jiffies */ + u8 backoff; /* Backoff timeout */ u8 cong_cwnd; /* Congestion window size */ }; @@ -1041,7 +1045,6 @@ extern unsigned long rxrpc_idle_ack_delay; extern unsigned int rxrpc_rx_window_size; extern unsigned int rxrpc_rx_mtu; extern unsigned int rxrpc_rx_jumbo_max; -extern unsigned long rxrpc_resend_timeout; extern const s8 rxrpc_ack_priority[]; @@ -1069,8 +1072,6 @@ void rxrpc_send_keepalive(struct rxrpc_peer *); * peer_event.c */ void rxrpc_error_report(struct sock *); -void rxrpc_peer_add_rtt(struct rxrpc_call *, enum rxrpc_rtt_rx_trace, - rxrpc_serial_t, rxrpc_serial_t, ktime_t, ktime_t); void rxrpc_peer_keepalive_worker(struct work_struct *); /* @@ -1103,6 +1104,14 @@ void rxrpc_notify_socket(struct rxrpc_call *); int rxrpc_recvmsg(struct socket *, struct msghdr *, size_t, int); /* + * rtt.c + */ +void rxrpc_peer_add_rtt(struct rxrpc_call *, enum rxrpc_rtt_rx_trace, + rxrpc_serial_t, rxrpc_serial_t, ktime_t, ktime_t); +unsigned long rxrpc_get_rto_backoff(struct rxrpc_peer *, bool); +void rxrpc_peer_init_rtt(struct rxrpc_peer *); + +/* * rxkad.c */ #ifdef CONFIG_RXKAD diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 70e44abf106c..b7611cc159e5 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -248,7 +248,7 @@ static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb) struct rxrpc_skb_priv *sp = rxrpc_skb(skb); ktime_t now = skb->tstamp; - if (call->peer->rtt_usage < 3 || + if (call->peer->rtt_count < 3 || ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), now)) rxrpc_propose_ACK(call, RXRPC_ACK_PING, sp->hdr.serial, true, true, diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index cedbbb3a7c2e..2a65ac41055f 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -111,8 +111,8 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, } else { unsigned long now = jiffies, ack_at; - if (call->peer->rtt_usage > 0) - ack_at = nsecs_to_jiffies(call->peer->rtt); + if (call->peer->srtt_us != 0) + ack_at = usecs_to_jiffies(call->peer->srtt_us >> 3); else ack_at = expiry; @@ -157,24 +157,18 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call) static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) { struct sk_buff *skb; - unsigned long resend_at; + unsigned long resend_at, rto_j; rxrpc_seq_t cursor, seq, top; - ktime_t now, max_age, oldest, ack_ts, timeout, min_timeo; + ktime_t now, max_age, oldest, ack_ts; int ix; u8 annotation, anno_type, retrans = 0, unacked = 0; _enter("{%d,%d}", call->tx_hard_ack, call->tx_top); - if (call->peer->rtt_usage > 1) - timeout = ns_to_ktime(call->peer->rtt * 3 / 2); - else - timeout = ms_to_ktime(rxrpc_resend_timeout); - min_timeo = ns_to_ktime((1000000000 / HZ) * 4); - if (ktime_before(timeout, min_timeo)) - timeout = min_timeo; + rto_j = call->peer->rto_j; now = ktime_get_real(); - max_age = ktime_sub(now, timeout); + max_age = ktime_sub(now, jiffies_to_usecs(rto_j)); spin_lock_bh(&call->lock); @@ -219,7 +213,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) } resend_at = nsecs_to_jiffies(ktime_to_ns(ktime_sub(now, oldest))); - resend_at += jiffies + rxrpc_resend_timeout; + resend_at += jiffies + rto_j; WRITE_ONCE(call->resend_at, resend_at); if (unacked) @@ -234,7 +228,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) rxrpc_timer_set_for_resend); spin_unlock_bh(&call->lock); ack_ts = ktime_sub(now, call->acks_latest_ts); - if (ktime_to_ns(ack_ts) < call->peer->rtt) + if (ktime_to_us(ack_ts) < (call->peer->srtt_us >> 3)) goto out; rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, true, false, rxrpc_propose_ack_ping_for_lost_ack); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 69e09d69c896..3be4177baf70 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -91,11 +91,11 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, /* We analyse the number of packets that get ACK'd per RTT * period and increase the window if we managed to fill it. */ - if (call->peer->rtt_usage == 0) + if (call->peer->rtt_count == 0) goto out; if (ktime_before(skb->tstamp, - ktime_add_ns(call->cong_tstamp, - call->peer->rtt))) + ktime_add_us(call->cong_tstamp, + call->peer->srtt_us >> 3))) goto out_no_clear_ca; change = rxrpc_cong_rtt_window_end; call->cong_tstamp = skb->tstamp; @@ -803,6 +803,30 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, } /* + * Return true if the ACK is valid - ie. it doesn't appear to have regressed + * with respect to the ack state conveyed by preceding ACKs. + */ +static bool rxrpc_is_ack_valid(struct rxrpc_call *call, + rxrpc_seq_t first_pkt, rxrpc_seq_t prev_pkt) +{ + rxrpc_seq_t base = READ_ONCE(call->ackr_first_seq); + + if (after(first_pkt, base)) + return true; /* The window advanced */ + + if (before(first_pkt, base)) + return false; /* firstPacket regressed */ + + if (after_eq(prev_pkt, call->ackr_prev_seq)) + return true; /* previousPacket hasn't regressed. */ + + /* Some rx implementations put a serial number in previousPacket. */ + if (after_eq(prev_pkt, base + call->tx_winsize)) + return false; + return true; +} + +/* * Process an ACK packet. * * ack.firstPacket is the sequence number of the first soft-ACK'd/NAK'd packet @@ -865,9 +889,12 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) } /* Discard any out-of-order or duplicate ACKs (outside lock). */ - if (before(first_soft_ack, call->ackr_first_seq) || - before(prev_pkt, call->ackr_prev_seq)) + if (!rxrpc_is_ack_valid(call, first_soft_ack, prev_pkt)) { + trace_rxrpc_rx_discard_ack(call->debug_id, sp->hdr.serial, + first_soft_ack, call->ackr_first_seq, + prev_pkt, call->ackr_prev_seq); return; + } buf.info.rxMTU = 0; ioffset = offset + nr_acks + 3; @@ -878,9 +905,12 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) spin_lock(&call->input_lock); /* Discard any out-of-order or duplicate ACKs (inside lock). */ - if (before(first_soft_ack, call->ackr_first_seq) || - before(prev_pkt, call->ackr_prev_seq)) + if (!rxrpc_is_ack_valid(call, first_soft_ack, prev_pkt)) { + trace_rxrpc_rx_discard_ack(call->debug_id, sp->hdr.serial, + first_soft_ack, call->ackr_first_seq, + prev_pkt, call->ackr_prev_seq); goto out; + } call->acks_latest_ts = skb->tstamp; call->ackr_first_seq = first_soft_ack; diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 214405f75346..d4144fd86f84 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -63,11 +63,6 @@ unsigned int rxrpc_rx_mtu = 5692; */ unsigned int rxrpc_rx_jumbo_max = 4; -/* - * Time till packet resend (in milliseconds). - */ -unsigned long rxrpc_resend_timeout = 4 * HZ; - const s8 rxrpc_ack_priority[] = { [0] = 0, [RXRPC_ACK_DELAY] = 1, diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 90e263c6aa69..f8b632a5c619 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -369,7 +369,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events) || retrans || call->cong_mode == RXRPC_CALL_SLOW_START || - (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) || + (call->peer->rtt_count < 3 && sp->hdr.seq & 1) || ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), ktime_get_real()))) whdr.flags |= RXRPC_REQUEST_ACK; @@ -423,13 +423,10 @@ done: if (whdr.flags & RXRPC_REQUEST_ACK) { call->peer->rtt_last_req = skb->tstamp; trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, serial); - if (call->peer->rtt_usage > 1) { + if (call->peer->rtt_count > 1) { unsigned long nowj = jiffies, ack_lost_at; - ack_lost_at = nsecs_to_jiffies(2 * call->peer->rtt); - if (ack_lost_at < 1) - ack_lost_at = 1; - + ack_lost_at = rxrpc_get_rto_backoff(call->peer, retrans); ack_lost_at += nowj; WRITE_ONCE(call->ack_lost_at, ack_lost_at); rxrpc_reduce_call_timer(call, ack_lost_at, nowj, diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index 923b263c401b..b1449d971883 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -296,52 +296,6 @@ static void rxrpc_distribute_error(struct rxrpc_peer *peer, int error, } /* - * Add RTT information to cache. This is called in softirq mode and has - * exclusive access to the peer RTT data. - */ -void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, - rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial, - ktime_t send_time, ktime_t resp_time) -{ - struct rxrpc_peer *peer = call->peer; - s64 rtt; - u64 sum = peer->rtt_sum, avg; - u8 cursor = peer->rtt_cursor, usage = peer->rtt_usage; - - rtt = ktime_to_ns(ktime_sub(resp_time, send_time)); - if (rtt < 0) - return; - - spin_lock(&peer->rtt_input_lock); - - /* Replace the oldest datum in the RTT buffer */ - sum -= peer->rtt_cache[cursor]; - sum += rtt; - peer->rtt_cache[cursor] = rtt; - peer->rtt_cursor = (cursor + 1) & (RXRPC_RTT_CACHE_SIZE - 1); - peer->rtt_sum = sum; - if (usage < RXRPC_RTT_CACHE_SIZE) { - usage++; - peer->rtt_usage = usage; - } - - spin_unlock(&peer->rtt_input_lock); - - /* Now recalculate the average */ - if (usage == RXRPC_RTT_CACHE_SIZE) { - avg = sum / RXRPC_RTT_CACHE_SIZE; - } else { - avg = sum; - do_div(avg, usage); - } - - /* Don't need to update this under lock */ - peer->rtt = avg; - trace_rxrpc_rtt_rx(call, why, send_serial, resp_serial, rtt, - usage, avg); -} - -/* * Perform keep-alive pings. */ static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet, diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 452163eadb98..ca29976bb193 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -225,6 +225,8 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp) spin_lock_init(&peer->rtt_input_lock); peer->debug_id = atomic_inc_return(&rxrpc_debug_id); + rxrpc_peer_init_rtt(peer); + if (RXRPC_TX_SMSS > 2190) peer->cong_cwnd = 2; else if (RXRPC_TX_SMSS > 1095) @@ -497,14 +499,14 @@ void rxrpc_kernel_get_peer(struct socket *sock, struct rxrpc_call *call, EXPORT_SYMBOL(rxrpc_kernel_get_peer); /** - * rxrpc_kernel_get_rtt - Get a call's peer RTT + * rxrpc_kernel_get_srtt - Get a call's peer smoothed RTT * @sock: The socket on which the call is in progress. * @call: The call to query * - * Get the call's peer RTT. + * Get the call's peer smoothed RTT. */ -u64 rxrpc_kernel_get_rtt(struct socket *sock, struct rxrpc_call *call) +u32 rxrpc_kernel_get_srtt(struct socket *sock, struct rxrpc_call *call) { - return call->peer->rtt; + return call->peer->srtt_us >> 3; } -EXPORT_SYMBOL(rxrpc_kernel_get_rtt); +EXPORT_SYMBOL(rxrpc_kernel_get_srtt); diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index b9d053e42821..8b179e3c802a 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -222,7 +222,7 @@ static int rxrpc_peer_seq_show(struct seq_file *seq, void *v) seq_puts(seq, "Proto Local " " Remote " - " Use CW MTU LastUse RTT Rc\n" + " Use CW MTU LastUse RTT RTO\n" ); return 0; } @@ -236,15 +236,15 @@ static int rxrpc_peer_seq_show(struct seq_file *seq, void *v) now = ktime_get_seconds(); seq_printf(seq, "UDP %-47.47s %-47.47s %3u" - " %3u %5u %6llus %12llu %2u\n", + " %3u %5u %6llus %8u %8u\n", lbuff, rbuff, atomic_read(&peer->usage), peer->cong_cwnd, peer->mtu, now - peer->last_tx_at, - peer->rtt, - peer->rtt_cursor); + peer->srtt_us >> 3, + jiffies_to_usecs(peer->rto_j)); return 0; } diff --git a/net/rxrpc/rtt.c b/net/rxrpc/rtt.c new file mode 100644 index 000000000000..928d8b34a3ee --- /dev/null +++ b/net/rxrpc/rtt.c @@ -0,0 +1,195 @@ +// SPDX-License-Identifier: GPL-2.0 +/* RTT/RTO calculation. + * + * Adapted from TCP for AF_RXRPC by David Howells (dhowells@redhat.com) + * + * https://tools.ietf.org/html/rfc6298 + * https://tools.ietf.org/html/rfc1122#section-4.2.3.1 + * http://ccr.sigcomm.org/archive/1995/jan95/ccr-9501-partridge87.pdf + */ + +#include <linux/net.h> +#include "ar-internal.h" + +#define RXRPC_RTO_MAX ((unsigned)(120 * HZ)) +#define RXRPC_TIMEOUT_INIT ((unsigned)(1*HZ)) /* RFC6298 2.1 initial RTO value */ +#define rxrpc_jiffies32 ((u32)jiffies) /* As rxrpc_jiffies32 */ +#define rxrpc_min_rtt_wlen 300 /* As sysctl_tcp_min_rtt_wlen */ + +static u32 rxrpc_rto_min_us(struct rxrpc_peer *peer) +{ + return 200; +} + +static u32 __rxrpc_set_rto(const struct rxrpc_peer *peer) +{ + return _usecs_to_jiffies((peer->srtt_us >> 3) + peer->rttvar_us); +} + +static u32 rxrpc_bound_rto(u32 rto) +{ + return min(rto, RXRPC_RTO_MAX); +} + +/* + * Called to compute a smoothed rtt estimate. The data fed to this + * routine either comes from timestamps, or from segments that were + * known _not_ to have been retransmitted [see Karn/Partridge + * Proceedings SIGCOMM 87]. The algorithm is from the SIGCOMM 88 + * piece by Van Jacobson. + * NOTE: the next three routines used to be one big routine. + * To save cycles in the RFC 1323 implementation it was better to break + * it up into three procedures. -- erics + */ +static void rxrpc_rtt_estimator(struct rxrpc_peer *peer, long sample_rtt_us) +{ + long m = sample_rtt_us; /* RTT */ + u32 srtt = peer->srtt_us; + + /* The following amusing code comes from Jacobson's + * article in SIGCOMM '88. Note that rtt and mdev + * are scaled versions of rtt and mean deviation. + * This is designed to be as fast as possible + * m stands for "measurement". + * + * On a 1990 paper the rto value is changed to: + * RTO = rtt + 4 * mdev + * + * Funny. This algorithm seems to be very broken. + * These formulae increase RTO, when it should be decreased, increase + * too slowly, when it should be increased quickly, decrease too quickly + * etc. I guess in BSD RTO takes ONE value, so that it is absolutely + * does not matter how to _calculate_ it. Seems, it was trap + * that VJ failed to avoid. 8) + */ + if (srtt != 0) { + m -= (srtt >> 3); /* m is now error in rtt est */ + srtt += m; /* rtt = 7/8 rtt + 1/8 new */ + if (m < 0) { + m = -m; /* m is now abs(error) */ + m -= (peer->mdev_us >> 2); /* similar update on mdev */ + /* This is similar to one of Eifel findings. + * Eifel blocks mdev updates when rtt decreases. + * This solution is a bit different: we use finer gain + * for mdev in this case (alpha*beta). + * Like Eifel it also prevents growth of rto, + * but also it limits too fast rto decreases, + * happening in pure Eifel. + */ + if (m > 0) + m >>= 3; + } else { + m -= (peer->mdev_us >> 2); /* similar update on mdev */ + } + + peer->mdev_us += m; /* mdev = 3/4 mdev + 1/4 new */ + if (peer->mdev_us > peer->mdev_max_us) { + peer->mdev_max_us = peer->mdev_us; + if (peer->mdev_max_us > peer->rttvar_us) + peer->rttvar_us = peer->mdev_max_us; + } + } else { + /* no previous measure. */ + srtt = m << 3; /* take the measured time to be rtt */ + peer->mdev_us = m << 1; /* make sure rto = 3*rtt */ + peer->rttvar_us = max(peer->mdev_us, rxrpc_rto_min_us(peer)); + peer->mdev_max_us = peer->rttvar_us; + } + + peer->srtt_us = max(1U, srtt); +} + +/* + * Calculate rto without backoff. This is the second half of Van Jacobson's + * routine referred to above. + */ +static void rxrpc_set_rto(struct rxrpc_peer *peer) +{ + u32 rto; + + /* 1. If rtt variance happened to be less 50msec, it is hallucination. + * It cannot be less due to utterly erratic ACK generation made + * at least by solaris and freebsd. "Erratic ACKs" has _nothing_ + * to do with delayed acks, because at cwnd>2 true delack timeout + * is invisible. Actually, Linux-2.4 also generates erratic + * ACKs in some circumstances. + */ + rto = __rxrpc_set_rto(peer); + + /* 2. Fixups made earlier cannot be right. + * If we do not estimate RTO correctly without them, + * all the algo is pure shit and should be replaced + * with correct one. It is exactly, which we pretend to do. + */ + + /* NOTE: clamping at RXRPC_RTO_MIN is not required, current algo + * guarantees that rto is higher. + */ + peer->rto_j = rxrpc_bound_rto(rto); +} + +static void rxrpc_ack_update_rtt(struct rxrpc_peer *peer, long rtt_us) +{ + if (rtt_us < 0) + return; + + //rxrpc_update_rtt_min(peer, rtt_us); + rxrpc_rtt_estimator(peer, rtt_us); + rxrpc_set_rto(peer); + + /* RFC6298: only reset backoff on valid RTT measurement. */ + peer->backoff = 0; +} + +/* + * Add RTT information to cache. This is called in softirq mode and has + * exclusive access to the peer RTT data. + */ +void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, + rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial, + ktime_t send_time, ktime_t resp_time) +{ + struct rxrpc_peer *peer = call->peer; + s64 rtt_us; + + rtt_us = ktime_to_us(ktime_sub(resp_time, send_time)); + if (rtt_us < 0) + return; + + spin_lock(&peer->rtt_input_lock); + rxrpc_ack_update_rtt(peer, rtt_us); + if (peer->rtt_count < 3) + peer->rtt_count++; + spin_unlock(&peer->rtt_input_lock); + + trace_rxrpc_rtt_rx(call, why, send_serial, resp_serial, + peer->srtt_us >> 3, peer->rto_j); +} + +/* + * Get the retransmission timeout to set in jiffies, backing it off each time + * we retransmit. + */ +unsigned long rxrpc_get_rto_backoff(struct rxrpc_peer *peer, bool retrans) +{ + u64 timo_j; + u8 backoff = READ_ONCE(peer->backoff); + + timo_j = peer->rto_j; + timo_j <<= backoff; + if (retrans && timo_j * 2 <= RXRPC_RTO_MAX) + WRITE_ONCE(peer->backoff, backoff + 1); + + if (timo_j < 1) + timo_j = 1; + + return timo_j; +} + +void rxrpc_peer_init_rtt(struct rxrpc_peer *peer) +{ + peer->rto_j = RXRPC_TIMEOUT_INIT; + peer->mdev_us = jiffies_to_usecs(RXRPC_TIMEOUT_INIT); + peer->backoff = 0; + //minmax_reset(&peer->rtt_min, rxrpc_jiffies32, ~0U); +} diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 098f1f9ec53b..52a24d4ef5d8 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -1148,7 +1148,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, ret = rxkad_decrypt_ticket(conn, skb, ticket, ticket_len, &session_key, &expiry, _abort_code); if (ret < 0) - goto temporary_error_free_resp; + goto temporary_error_free_ticket; /* use the session key from inside the ticket to decrypt the * response */ @@ -1230,7 +1230,6 @@ protocol_error: temporary_error_free_ticket: kfree(ticket); -temporary_error_free_resp: kfree(response); temporary_error: /* Ignore the response packet if we got a temporary error such as diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 0fcf157aa09f..5e9c43d4a314 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -66,15 +66,14 @@ static int rxrpc_wait_for_tx_window_waitall(struct rxrpc_sock *rx, struct rxrpc_call *call) { rxrpc_seq_t tx_start, tx_win; - signed long rtt2, timeout; - u64 rtt; + signed long rtt, timeout; - rtt = READ_ONCE(call->peer->rtt); - rtt2 = nsecs_to_jiffies64(rtt) * 2; - if (rtt2 < 2) - rtt2 = 2; + rtt = READ_ONCE(call->peer->srtt_us) >> 3; + rtt = usecs_to_jiffies(rtt) * 2; + if (rtt < 2) + rtt = 2; - timeout = rtt2; + timeout = rtt; tx_start = READ_ONCE(call->tx_hard_ack); for (;;) { @@ -92,7 +91,7 @@ static int rxrpc_wait_for_tx_window_waitall(struct rxrpc_sock *rx, return -EINTR; if (tx_win != tx_start) { - timeout = rtt2; + timeout = rtt; tx_start = tx_win; } @@ -271,16 +270,9 @@ static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, _debug("need instant resend %d", ret); rxrpc_instant_resend(call, ix); } else { - unsigned long now = jiffies, resend_at; + unsigned long now = jiffies; + unsigned long resend_at = now + call->peer->rto_j; - if (call->peer->rtt_usage > 1) - resend_at = nsecs_to_jiffies(call->peer->rtt * 3 / 2); - else - resend_at = rxrpc_resend_timeout; - if (resend_at < 1) - resend_at = 1; - - resend_at += now; WRITE_ONCE(call->resend_at, resend_at); rxrpc_reduce_call_timer(call, resend_at, now, rxrpc_timer_set_for_send); diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c index 2bbb38161851..18dade4e6f9a 100644 --- a/net/rxrpc/sysctl.c +++ b/net/rxrpc/sysctl.c @@ -71,15 +71,6 @@ static struct ctl_table rxrpc_sysctl_table[] = { .extra1 = (void *)&one_jiffy, .extra2 = (void *)&max_jiffies, }, - { - .procname = "resend_timeout", - .data = &rxrpc_resend_timeout, - .maxlen = sizeof(unsigned long), - .mode = 0644, - .proc_handler = proc_doulongvec_ms_jiffies_minmax, - .extra1 = (void *)&one_jiffy, - .extra2 = (void *)&max_jiffies, - }, /* Non-time values */ { diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 2bc29463e1dc..9f36fe911d08 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1523,9 +1523,17 @@ static int sctp_cmd_interpreter(enum sctp_event_type event_type, timeout = asoc->timeouts[cmd->obj.to]; BUG_ON(!timeout); - timer->expires = jiffies + timeout; - sctp_association_hold(asoc); - add_timer(timer); + /* + * SCTP has a hard time with timer starts. Because we process + * timer starts as side effects, it can be hard to tell if we + * have already started a timer or not, which leads to BUG + * halts when we call add_timer. So here, instead of just starting + * a timer, if the timer is already started, and just mod + * the timer with the shorter of the two expiration times + */ + if (!timer_pending(timer)) + sctp_association_hold(asoc); + timer_reduce(timer, jiffies + timeout); break; case SCTP_CMD_TIMER_RESTART: diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 26788f4a3b9e..e86620fbd90f 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -1856,12 +1856,13 @@ static enum sctp_disposition sctp_sf_do_dupcook_a( /* Update the content of current association. */ sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc)); sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); - if (sctp_state(asoc, SHUTDOWN_PENDING) && + if ((sctp_state(asoc, SHUTDOWN_PENDING) || + sctp_state(asoc, SHUTDOWN_SENT)) && (sctp_sstate(asoc->base.sk, CLOSING) || sock_flag(asoc->base.sk, SOCK_DEAD))) { - /* if were currently in SHUTDOWN_PENDING, but the socket - * has been closed by user, don't transition to ESTABLISHED. - * Instead trigger SHUTDOWN bundled with COOKIE_ACK. + /* If the socket has been closed by user, don't + * transition to ESTABLISHED. Instead trigger SHUTDOWN + * bundled with COOKIE_ACK. */ sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); return sctp_sf_do_9_2_start_shutdown(net, ep, asoc, diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index d6620ad53546..28a283f26a8d 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -161,9 +161,11 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb, struct udp_bearer *ub, struct udp_media_addr *src, struct udp_media_addr *dst, struct dst_cache *cache) { - struct dst_entry *ndst = dst_cache_get(cache); + struct dst_entry *ndst; int ttl, err = 0; + local_bh_disable(); + ndst = dst_cache_get(cache); if (dst->proto == htons(ETH_P_IP)) { struct rtable *rt = (struct rtable *)ndst; @@ -210,9 +212,11 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb, src->port, dst->port, false); #endif } + local_bh_enable(); return err; tx_error: + local_bh_enable(); kfree_skb(skb); return err; } diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index e23f94a5549b..2d399b6c4075 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -780,7 +780,7 @@ static int tls_push_record(struct sock *sk, int flags, static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk, bool full_record, u8 record_type, - size_t *copied, int flags) + ssize_t *copied, int flags) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); @@ -796,9 +796,10 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk, psock = sk_psock_get(sk); if (!psock || !policy) { err = tls_push_record(sk, flags, record_type); - if (err && err != -EINPROGRESS) { + if (err && sk->sk_err == EBADMSG) { *copied -= sk_msg_free(sk, msg); tls_free_open_rec(sk); + err = -sk->sk_err; } if (psock) sk_psock_put(sk, psock); @@ -824,9 +825,10 @@ more_data: switch (psock->eval) { case __SK_PASS: err = tls_push_record(sk, flags, record_type); - if (err && err != -EINPROGRESS) { + if (err && sk->sk_err == EBADMSG) { *copied -= sk_msg_free(sk, msg); tls_free_open_rec(sk); + err = -sk->sk_err; goto out_err; } break; @@ -916,7 +918,8 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) unsigned char record_type = TLS_RECORD_TYPE_DATA; bool is_kvec = iov_iter_is_kvec(&msg->msg_iter); bool eor = !(msg->msg_flags & MSG_MORE); - size_t try_to_copy, copied = 0; + size_t try_to_copy; + ssize_t copied = 0; struct sk_msg *msg_pl, *msg_en; struct tls_rec *rec; int required_size; @@ -1118,7 +1121,7 @@ send_end: release_sock(sk); mutex_unlock(&tls_ctx->tx_lock); - return copied ? copied : ret; + return copied > 0 ? copied : ret; } static int tls_sw_do_sendpage(struct sock *sk, struct page *page, @@ -1132,7 +1135,7 @@ static int tls_sw_do_sendpage(struct sock *sk, struct page *page, struct sk_msg *msg_pl; struct tls_rec *rec; int num_async = 0; - size_t copied = 0; + ssize_t copied = 0; bool full_record; int record_room; int ret = 0; @@ -1234,7 +1237,7 @@ wait_for_memory: } sendpage_end: ret = sk_stream_error(sk, flags, ret); - return copied ? copied : ret; + return copied > 0 ? copied : ret; } int tls_sw_sendpage_locked(struct sock *sk, struct page *page, diff --git a/security/security.c b/security/security.c index 7fed24b9d57e..51de970fbb1e 100644 --- a/security/security.c +++ b/security/security.c @@ -1965,8 +1965,20 @@ EXPORT_SYMBOL(security_ismaclabel); int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen) { - return call_int_hook(secid_to_secctx, -EOPNOTSUPP, secid, secdata, - seclen); + struct security_hook_list *hp; + int rc; + + /* + * Currently, only one LSM can implement secid_to_secctx (i.e this + * LSM hook is not "stackable"). + */ + hlist_for_each_entry(hp, &security_hook_heads.secid_to_secctx, list) { + rc = hp->hook.secid_to_secctx(secid, secdata, seclen); + if (rc != LSM_RET_DEFAULT(secid_to_secctx)) + return rc; + } + + return LSM_RET_DEFAULT(secid_to_secctx); } EXPORT_SYMBOL(security_secid_to_secctx); diff --git a/tools/testing/selftests/bpf/prog_tests/mmap.c b/tools/testing/selftests/bpf/prog_tests/mmap.c index 6b9dce431d41..43d0b5578f46 100644 --- a/tools/testing/selftests/bpf/prog_tests/mmap.c +++ b/tools/testing/selftests/bpf/prog_tests/mmap.c @@ -19,7 +19,7 @@ void test_mmap(void) const size_t map_sz = roundup_page(sizeof(struct map_data)); const int zero = 0, one = 1, two = 2, far = 1500; const long page_size = sysconf(_SC_PAGE_SIZE); - int err, duration = 0, i, data_map_fd, data_map_id, tmp_fd; + int err, duration = 0, i, data_map_fd, data_map_id, tmp_fd, rdmap_fd; struct bpf_map *data_map, *bss_map; void *bss_mmaped = NULL, *map_mmaped = NULL, *tmp1, *tmp2; struct test_mmap__bss *bss_data; @@ -37,6 +37,17 @@ void test_mmap(void) data_map = skel->maps.data_map; data_map_fd = bpf_map__fd(data_map); + rdmap_fd = bpf_map__fd(skel->maps.rdonly_map); + tmp1 = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, rdmap_fd, 0); + if (CHECK(tmp1 != MAP_FAILED, "rdonly_write_mmap", "unexpected success\n")) { + munmap(tmp1, 4096); + goto cleanup; + } + /* now double-check if it's mmap()'able at all */ + tmp1 = mmap(NULL, 4096, PROT_READ, MAP_SHARED, rdmap_fd, 0); + if (CHECK(tmp1 == MAP_FAILED, "rdonly_read_mmap", "failed: %d\n", errno)) + goto cleanup; + /* get map's ID */ memset(&map_info, 0, map_info_sz); err = bpf_obj_get_info_by_fd(data_map_fd, &map_info, &map_info_sz); diff --git a/tools/testing/selftests/bpf/progs/test_mmap.c b/tools/testing/selftests/bpf/progs/test_mmap.c index 6239596cd14e..4eb42cff5fe9 100644 --- a/tools/testing/selftests/bpf/progs/test_mmap.c +++ b/tools/testing/selftests/bpf/progs/test_mmap.c @@ -9,6 +9,14 @@ char _license[] SEC("license") = "GPL"; struct { __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 4096); + __uint(map_flags, BPF_F_MMAPABLE | BPF_F_RDONLY_PROG); + __type(key, __u32); + __type(value, char); +} rdonly_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); __uint(max_entries, 512 * 4); /* at least 4 pages of data */ __uint(map_flags, BPF_F_MMAPABLE); __type(key, __u32); diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh index 24dd8ed48580..b025daea062d 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh @@ -300,7 +300,7 @@ test_uc_aware() local i for ((i = 0; i < attempts; ++i)); do - if $ARPING -c 1 -I $h1 -b 192.0.2.66 -q -w 0.1; then + if $ARPING -c 1 -I $h1 -b 192.0.2.66 -q -w 1; then ((passes++)) fi diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh index dbd1e014ba17..da49ad2761b5 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh @@ -264,6 +264,8 @@ trap_policer_test() local packets_t0 local packets_t1 + RET=0 + if [ $(devlink_trap_policers_num_get) -eq 0 ]; then check_err 1 "Failed to dump policers" fi @@ -328,6 +330,8 @@ trap_group_check_policer() trap_policer_bind_test() { + RET=0 + devlink trap group set $DEVLINK_DEV group l2_drops policer 1 check_err $? "Failed to bind a valid policer" if [ $(devlink_trap_group_policer_get "l2_drops") -ne 1 ]; then diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile index 90598a425c18..4bdd6c1a19d3 100644 --- a/tools/testing/selftests/wireguard/qemu/Makefile +++ b/tools/testing/selftests/wireguard/qemu/Makefile @@ -44,7 +44,7 @@ endef $(eval $(call tar_download,MUSL,musl,1.2.0,.tar.gz,https://musl.libc.org/releases/,c6de7b191139142d3f9a7b5b702c9cae1b5ee6e7f57e582da9328629408fd4e8)) $(eval $(call tar_download,IPERF,iperf,3.7,.tar.gz,https://downloads.es.net/pub/iperf/,d846040224317caf2f75c843d309a950a7db23f9b44b94688ccbe557d6d1710c)) $(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d)) -$(eval $(call tar_download,IPROUTE2,iproute2,5.4.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,fe97aa60a0d4c5ac830be18937e18dc3400ca713a33a89ad896ff1e3d46086ae)) +$(eval $(call tar_download,IPROUTE2,iproute2,5.6.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,1b5b0e25ce6e23da7526ea1da044e814ad85ba761b10dd29c2b027c056b04692)) $(eval $(call tar_download,IPTABLES,iptables,1.8.4,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,993a3a5490a544c2cbf2ef15cf7e7ed21af1845baf228318d5c36ef8827e157c)) $(eval $(call tar_download,NMAP,nmap,7.80,.tar.bz2,https://nmap.org/dist/,fcfa5a0e42099e12e4bf7a68ebe6fde05553383a682e816a7ec9256ab4773faa)) $(eval $(call tar_download,IPUTILS,iputils,s20190709,.tar.gz,https://github.com/iputils/iputils/archive/s20190709.tar.gz/#,a15720dd741d7538dd2645f9f516d193636ae4300ff7dbc8bfca757bf166490a)) |