diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox')
109 files changed, 6774 insertions, 2763 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 2f79378fbf6e..65cb63f6c465 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -228,7 +228,9 @@ void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv, static inline bool mlx4_en_is_tx_ring_full(struct mlx4_en_tx_ring *ring) { - return ring->prod - ring->cons > ring->full_size; + u32 used = READ_ONCE(ring->prod) - READ_ONCE(ring->cons); + + return used > ring->full_size; } static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv, @@ -1083,7 +1085,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) op_own |= cpu_to_be32(MLX4_WQE_CTRL_IIP); } - ring->prod += nr_txbb; + WRITE_ONCE(ring->prod, ring->prod + nr_txbb); /* If we used a bounce buffer then copy descriptor back into place */ if (unlikely(bounce)) @@ -1214,7 +1216,7 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, rx_ring->xdp_tx++; - ring->prod += MLX4_EN_XDP_TX_NRTXBB; + WRITE_ONCE(ring->prod, ring->prod + MLX4_EN_XDP_TX_NRTXBB); /* Ensure new descriptor hits memory * before setting ownership of this descriptor to HW diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 4ac4d883047b..321f801c1d7c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -323,7 +323,7 @@ struct mlx4_en_tx_ring { struct mlx4_en_rx_desc { /* actual number of entries depends on rx ring stride */ - struct mlx4_wqe_data_seg data[0]; + DECLARE_FLEX_ARRAY(struct mlx4_wqe_data_seg, data); }; struct mlx4_en_rx_ring { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 8d4e25cc54ea..ddf1e352f51d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -16,7 +16,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \ fs_counters.o fs_ft_pool.o rl.o lag/debugfs.o lag/lag.o dev.o events.o wq.o lib/gid.o \ lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \ - diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o \ + diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o diag/reporter_vnic.o \ fw_reset.o qos.o lib/tout.o lib/aso.o # @@ -69,14 +69,15 @@ mlx5_core-$(CONFIG_MLX5_TC_SAMPLE) += en/tc/sample.o # mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \ ecpf.o rdma.o esw/legacy.o \ - esw/debugfs.o esw/devlink_port.o esw/vporttbl.o esw/qos.o + esw/devlink_port.o esw/vporttbl.o esw/qos.o mlx5_core-$(CONFIG_MLX5_ESWITCH) += esw/acl/helper.o \ esw/acl/egress_lgcy.o esw/acl/egress_ofld.o \ esw/acl/ingress_lgcy.o esw/acl/ingress_ofld.o -mlx5_core-$(CONFIG_MLX5_BRIDGE) += esw/bridge.o en/rep/bridge.o +mlx5_core-$(CONFIG_MLX5_BRIDGE) += esw/bridge.o esw/bridge_mcast.o en/rep/bridge.o +mlx5_core-$(CONFIG_THERMAL) += thermal.o mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o @@ -111,8 +112,8 @@ mlx5_core-$(CONFIG_MLX5_SW_STEERING) += steering/dr_domain.o steering/dr_table.o steering/dr_ste_v2.o \ steering/dr_cmd.o steering/dr_fw.o \ steering/dr_action.o steering/fs_dr.o \ - steering/dr_definer.o \ - steering/dr_dbg.o lib/smfs.o + steering/dr_definer.o steering/dr_ptrn.o \ + steering/dr_arg.o steering/dr_dbg.o lib/smfs.o # # SF device # diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index b00e33ed05e9..d53de39539a8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -1802,7 +1802,7 @@ static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size, if (in_size <= 16) goto cache_miss; - for (i = 0; i < MLX5_NUM_COMMAND_CACHES; i++) { + for (i = 0; i < dev->profile.num_cmd_caches; i++) { ch = &cmd->cache[i]; if (in_size > ch->max_inbox_size) continue; @@ -2097,7 +2097,7 @@ static void destroy_msg_cache(struct mlx5_core_dev *dev) struct mlx5_cmd_msg *n; int i; - for (i = 0; i < MLX5_NUM_COMMAND_CACHES; i++) { + for (i = 0; i < dev->profile.num_cmd_caches; i++) { ch = &dev->cmd.cache[i]; list_for_each_entry_safe(msg, n, &ch->head, list) { list_del(&msg->list); @@ -2127,7 +2127,7 @@ static void create_msg_cache(struct mlx5_core_dev *dev) int k; /* Initialize and fill the caches with initial entries */ - for (k = 0; k < MLX5_NUM_COMMAND_CACHES; k++) { + for (k = 0; k < dev->profile.num_cmd_caches; k++) { ch = &cmd->cache[k]; spin_lock_init(&ch->lock); INIT_LIST_HEAD(&ch->head); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index 2e7806001fdc..1b33533b15de 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -35,6 +35,7 @@ #include <linux/mlx5/mlx5_ifc_vdpa.h> #include <linux/mlx5/vport.h> #include "mlx5_core.h" +#include "devlink.h" /* intf dev list mutex */ static DEFINE_MUTEX(mlx5_intf_mutex); @@ -106,17 +107,6 @@ bool mlx5_eth_supported(struct mlx5_core_dev *dev) return true; } -static bool is_eth_enabled(struct mlx5_core_dev *dev) -{ - union devlink_param_value val; - int err; - - err = devl_param_driverinit_value_get(priv_to_devlink(dev), - DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH, - &val); - return err ? false : val.vbool; -} - bool mlx5_vnet_supported(struct mlx5_core_dev *dev) { if (!IS_ENABLED(CONFIG_MLX5_VDPA_NET)) @@ -245,7 +235,7 @@ static const struct mlx5_adev_device { .is_enabled = &is_ib_enabled }, [MLX5_INTERFACE_PROTOCOL_ETH] = { .suffix = "eth", .is_supported = &mlx5_eth_supported, - .is_enabled = &is_eth_enabled }, + .is_enabled = &mlx5_core_is_eth_enabled }, [MLX5_INTERFACE_PROTOCOL_ETH_REP] = { .suffix = "eth-rep", .is_supported = &is_eth_rep_supported }, [MLX5_INTERFACE_PROTOCOL_IB_REP] = { .suffix = "rdma-rep", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index c5d2fdcabd56..4b607785d694 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -202,7 +202,7 @@ static int mlx5_devlink_reload_up(struct devlink *devlink, enum devlink_reload_a break; /* On fw_activate action, also driver is reloaded and reinit performed */ *actions_performed |= BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT); - ret = mlx5_load_one_devl_locked(dev, false); + ret = mlx5_load_one_devl_locked(dev, true); break; default: /* Unsupported action should not get to this function */ @@ -494,6 +494,61 @@ static int mlx5_devlink_eq_depth_validate(struct devlink *devlink, u32 id, return (val.vu32 >= 64 && val.vu32 <= 4096) ? 0 : -EINVAL; } +static int +mlx5_devlink_hairpin_num_queues_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + return val.vu32 ? 0 : -EINVAL; +} + +static int +mlx5_devlink_hairpin_queue_size_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + u32 val32 = val.vu32; + + if (!is_power_of_2(val32)) { + NL_SET_ERR_MSG_MOD(extack, "Value is not power of two"); + return -EINVAL; + } + + if (val32 > BIT(MLX5_CAP_GEN(dev, log_max_hairpin_num_packets))) { + NL_SET_ERR_MSG_FMT_MOD( + extack, "Maximum hairpin queue size is %lu", + BIT(MLX5_CAP_GEN(dev, log_max_hairpin_num_packets))); + return -EINVAL; + } + + return 0; +} + +static void mlx5_devlink_hairpin_params_init_values(struct devlink *devlink) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + union devlink_param_value value; + u32 link_speed = 0; + u64 link_speed64; + + /* set hairpin pair per each 50Gbs share of the link */ + mlx5_port_max_linkspeed(dev, &link_speed); + link_speed = max_t(u32, link_speed, 50000); + link_speed64 = link_speed; + do_div(link_speed64, 50000); + + value.vu32 = link_speed64; + devl_param_driverinit_value_set( + devlink, MLX5_DEVLINK_PARAM_ID_HAIRPIN_NUM_QUEUES, value); + + value.vu32 = + BIT(min_t(u32, 16 - MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(dev), + MLX5_CAP_GEN(dev, log_max_hairpin_num_packets))); + devl_param_driverinit_value_set( + devlink, MLX5_DEVLINK_PARAM_ID_HAIRPIN_QUEUE_SIZE, value); +} + static const struct devlink_param mlx5_devlink_params[] = { DEVLINK_PARAM_GENERIC(ENABLE_ROCE, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL, mlx5_devlink_enable_roce_validate), @@ -547,6 +602,14 @@ static void mlx5_devlink_set_params_init_values(struct devlink *devlink) static const struct devlink_param mlx5_devlink_eth_params[] = { DEVLINK_PARAM_GENERIC(ENABLE_ETH, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL, NULL), + DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_HAIRPIN_NUM_QUEUES, + "hairpin_num_queues", DEVLINK_PARAM_TYPE_U32, + BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL, + mlx5_devlink_hairpin_num_queues_validate), + DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_HAIRPIN_QUEUE_SIZE, + "hairpin_queue_size", DEVLINK_PARAM_TYPE_U32, + BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL, + mlx5_devlink_hairpin_queue_size_validate), }; static int mlx5_devlink_eth_params_register(struct devlink *devlink) @@ -567,6 +630,9 @@ static int mlx5_devlink_eth_params_register(struct devlink *devlink) devl_param_driverinit_value_set(devlink, DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH, value); + + mlx5_devlink_hairpin_params_init_values(devlink); + return 0; } @@ -805,6 +871,11 @@ int mlx5_devlink_params_register(struct devlink *devlink) { int err; + /* Here only the driver init params should be registered. + * Runtime params should be registered by the code which + * behaviour they configure. + */ + err = devl_params_register(devlink, mlx5_devlink_params, ARRAY_SIZE(mlx5_devlink_params)); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h index 212b12424146..defba5bd91d9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h @@ -12,6 +12,8 @@ enum mlx5_devlink_param_id { MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM, MLX5_DEVLINK_PARAM_ID_ESW_PORT_METADATA, MLX5_DEVLINK_PARAM_ID_ESW_MULTIPORT, + MLX5_DEVLINK_PARAM_ID_HAIRPIN_NUM_QUEUES, + MLX5_DEVLINK_PARAM_ID_HAIRPIN_QUEUE_SIZE, }; struct mlx5_trap_ctx { @@ -44,4 +46,15 @@ void mlx5_devlink_free(struct devlink *devlink); int mlx5_devlink_params_register(struct devlink *devlink); void mlx5_devlink_params_unregister(struct devlink *devlink); +static inline bool mlx5_core_is_eth_enabled(struct mlx5_core_dev *dev) +{ + union devlink_param_value val; + int err; + + err = devl_param_driverinit_value_get(priv_to_devlink(dev), + DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH, + &val); + return err ? false : val.vbool; +} + #endif /* __MLX5_DEVLINK_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c new file mode 100644 index 000000000000..9114661cd967 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. */ + +#include "reporter_vnic.h" +#include "devlink.h" + +#define VNIC_ENV_GET64(vnic_env_stats, c) \ + MLX5_GET64(query_vnic_env_out, (vnic_env_stats)->query_vnic_env_out, \ + vport_env.c) + +struct mlx5_vnic_diag_stats { + __be64 query_vnic_env_out[MLX5_ST_SZ_QW(query_vnic_env_out)]; +}; + +int mlx5_reporter_vnic_diagnose_counters(struct mlx5_core_dev *dev, + struct devlink_fmsg *fmsg, + u16 vport_num, bool other_vport) +{ + u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {}; + struct mlx5_vnic_diag_stats vnic; + int err; + + MLX5_SET(query_vnic_env_in, in, opcode, MLX5_CMD_OP_QUERY_VNIC_ENV); + MLX5_SET(query_vnic_env_in, in, vport_number, vport_num); + MLX5_SET(query_vnic_env_in, in, other_vport, !!other_vport); + + err = mlx5_cmd_exec_inout(dev, query_vnic_env, in, &vnic.query_vnic_env_out); + if (err) + return err; + + err = devlink_fmsg_pair_nest_start(fmsg, "vNIC env counters"); + if (err) + return err; + + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + return err; + + err = devlink_fmsg_u64_pair_put(fmsg, "total_error_queues", + VNIC_ENV_GET64(&vnic, total_error_queues)); + if (err) + return err; + + err = devlink_fmsg_u64_pair_put(fmsg, "send_queue_priority_update_flow", + VNIC_ENV_GET64(&vnic, send_queue_priority_update_flow)); + if (err) + return err; + + err = devlink_fmsg_u64_pair_put(fmsg, "comp_eq_overrun", + VNIC_ENV_GET64(&vnic, comp_eq_overrun)); + if (err) + return err; + + err = devlink_fmsg_u64_pair_put(fmsg, "async_eq_overrun", + VNIC_ENV_GET64(&vnic, async_eq_overrun)); + if (err) + return err; + + err = devlink_fmsg_u64_pair_put(fmsg, "cq_overrun", + VNIC_ENV_GET64(&vnic, cq_overrun)); + if (err) + return err; + + err = devlink_fmsg_u64_pair_put(fmsg, "invalid_command", + VNIC_ENV_GET64(&vnic, invalid_command)); + if (err) + return err; + + err = devlink_fmsg_u64_pair_put(fmsg, "quota_exceeded_command", + VNIC_ENV_GET64(&vnic, quota_exceeded_command)); + if (err) + return err; + + err = devlink_fmsg_u64_pair_put(fmsg, "nic_receive_steering_discard", + VNIC_ENV_GET64(&vnic, nic_receive_steering_discard)); + if (err) + return err; + + err = devlink_fmsg_obj_nest_end(fmsg); + if (err) + return err; + + err = devlink_fmsg_pair_nest_end(fmsg); + if (err) + return err; + + return 0; +} + +static int mlx5_reporter_vnic_diagnose(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); + + return mlx5_reporter_vnic_diagnose_counters(dev, fmsg, 0, false); +} + +static const struct devlink_health_reporter_ops mlx5_reporter_vnic_ops = { + .name = "vnic", + .diagnose = mlx5_reporter_vnic_diagnose, +}; + +void mlx5_reporter_vnic_create(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + struct devlink *devlink = priv_to_devlink(dev); + + health->vnic_reporter = + devlink_health_reporter_create(devlink, + &mlx5_reporter_vnic_ops, + 0, dev); + if (IS_ERR(health->vnic_reporter)) + mlx5_core_warn(dev, + "Failed to create vnic reporter, err = %ld\n", + PTR_ERR(health->vnic_reporter)); +} + +void mlx5_reporter_vnic_destroy(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + + if (!IS_ERR_OR_NULL(health->vnic_reporter)) + devlink_health_reporter_destroy(health->vnic_reporter); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.h new file mode 100644 index 000000000000..eba87a39e9b1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB + * Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. + */ +#ifndef __MLX5_REPORTER_VNIC_H +#define __MLX5_REPORTER_VNIC_H + +#include "mlx5_core.h" + +void mlx5_reporter_vnic_create(struct mlx5_core_dev *dev); +void mlx5_reporter_vnic_destroy(struct mlx5_core_dev *dev); + +int mlx5_reporter_vnic_diagnose_counters(struct mlx5_core_dev *dev, + struct devlink_fmsg *fmsg, + u16 vport_num, bool other_vport); + +#endif /* __MLX5_REPORTER_VNIC_H */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 4a19ef4a9811..b8987a404d75 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -335,15 +335,20 @@ static inline u8 mlx5e_get_dcb_num_tc(struct mlx5e_params *params) params->mqprio.num_tc : 1; } +/* Keep this enum consistent with the corresponding strings array + * declared in en/reporter_rx.c + */ enum { - MLX5E_RQ_STATE_ENABLED, + MLX5E_RQ_STATE_ENABLED = 0, MLX5E_RQ_STATE_RECOVERING, - MLX5E_RQ_STATE_AM, + MLX5E_RQ_STATE_DIM, MLX5E_RQ_STATE_NO_CSUM_COMPLETE, MLX5E_RQ_STATE_CSUM_FULL, /* cqe_csum_full hw bit is set */ MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, /* set when mini_cqe_resp_stride_index cap is used */ MLX5E_RQ_STATE_SHAMPO, /* set when SHAMPO cap is used */ MLX5E_RQ_STATE_MINI_CQE_ENHANCED, /* set when enhanced mini_cqe_cap is used */ + MLX5E_RQ_STATE_XSK, /* set to indicate an xsk rq */ + MLX5E_NUM_RQ_STATES, /* Must be kept last */ }; struct mlx5e_cq { @@ -384,16 +389,20 @@ struct mlx5e_sq_dma { enum mlx5e_dma_map_type type; }; +/* Keep this enum consistent with with the corresponding strings array + * declared in en/reporter_tx.c + */ enum { - MLX5E_SQ_STATE_ENABLED, + MLX5E_SQ_STATE_ENABLED = 0, MLX5E_SQ_STATE_MPWQE, MLX5E_SQ_STATE_RECOVERING, MLX5E_SQ_STATE_IPSEC, - MLX5E_SQ_STATE_AM, + MLX5E_SQ_STATE_DIM, MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, MLX5E_SQ_STATE_PENDING_XSK_TX, MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, MLX5E_SQ_STATE_XDP_MULTIBUF, + MLX5E_NUM_SQ_STATES, /* Must be kept last */ }; struct mlx5e_tx_mpwqe { @@ -466,64 +475,18 @@ struct mlx5e_txqsq { cqe_ts_to_ns ptp_cyc2time; } ____cacheline_aligned_in_smp; -union mlx5e_alloc_unit { - struct page *page; - struct xdp_buff *xsk; -}; - -/* XDP packets can be transmitted in different ways. On completion, we need to - * distinguish between them to clean up things in a proper way. - */ -enum mlx5e_xdp_xmit_mode { - /* An xdp_frame was transmitted due to either XDP_REDIRECT from another - * device or XDP_TX from an XSK RQ. The frame has to be unmapped and - * returned. - */ - MLX5E_XDP_XMIT_MODE_FRAME, - - /* The xdp_frame was created in place as a result of XDP_TX from a - * regular RQ. No DMA remapping happened, and the page belongs to us. - */ - MLX5E_XDP_XMIT_MODE_PAGE, - - /* No xdp_frame was created at all, the transmit happened from a UMEM - * page. The UMEM Completion Ring producer pointer has to be increased. - */ - MLX5E_XDP_XMIT_MODE_XSK, -}; - -struct mlx5e_xdp_info { - enum mlx5e_xdp_xmit_mode mode; - union { - struct { - struct xdp_frame *xdpf; - dma_addr_t dma_addr; - } frame; - struct { - struct mlx5e_rq *rq; - struct page *page; - } page; - }; -}; - -struct mlx5e_xmit_data { - dma_addr_t dma_addr; - void *data; - u32 len; -}; - struct mlx5e_xdp_info_fifo { - struct mlx5e_xdp_info *xi; + union mlx5e_xdp_info *xi; u32 *cc; u32 *pc; u32 mask; }; struct mlx5e_xdpsq; +struct mlx5e_xmit_data; typedef int (*mlx5e_fp_xmit_xdp_frame_check)(struct mlx5e_xdpsq *); typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq *, struct mlx5e_xmit_data *, - struct skb_shared_info *, int); struct mlx5e_xdpsq { @@ -596,16 +559,36 @@ struct mlx5e_icosq { struct work_struct recover_work; } ____cacheline_aligned_in_smp; +struct mlx5e_frag_page { + struct page *page; + u16 frags; +}; + +enum mlx5e_wqe_frag_flag { + MLX5E_WQE_FRAG_LAST_IN_PAGE, + MLX5E_WQE_FRAG_SKIP_RELEASE, +}; + struct mlx5e_wqe_frag_info { - union mlx5e_alloc_unit *au; + union { + struct mlx5e_frag_page *frag_page; + struct xdp_buff **xskp; + }; u32 offset; - bool last_in_page; + u8 flags; +}; + +union mlx5e_alloc_units { + DECLARE_FLEX_ARRAY(struct mlx5e_frag_page, frag_pages); + DECLARE_FLEX_ARRAY(struct page *, pages); + DECLARE_FLEX_ARRAY(struct xdp_buff *, xsk_buffs); }; struct mlx5e_mpw_info { u16 consumed_strides; - DECLARE_BITMAP(xdp_xmit_bitmap, MLX5_MPWRQ_MAX_PAGES_PER_WQE); - union mlx5e_alloc_unit alloc_units[]; + DECLARE_BITMAP(skip_release_bitmap, MLX5_MPWRQ_MAX_PAGES_PER_WQE); + struct mlx5e_frag_page linear_page; + union mlx5e_alloc_units alloc_units; }; #define MLX5E_MAX_RX_FRAGS 4 @@ -616,11 +599,6 @@ struct mlx5e_mpw_info { #define MLX5E_CACHE_UNIT (MLX5_MPWRQ_MAX_PAGES_PER_WQE > NAPI_POLL_WEIGHT ? \ MLX5_MPWRQ_MAX_PAGES_PER_WQE : NAPI_POLL_WEIGHT) #define MLX5E_CACHE_SIZE (4 * roundup_pow_of_two(MLX5E_CACHE_UNIT)) -struct mlx5e_page_cache { - u32 head; - u32 tail; - struct page *page_cache[MLX5E_CACHE_SIZE]; -}; struct mlx5e_rq; typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq*, struct mlx5_cqe64*); @@ -652,19 +630,24 @@ struct mlx5e_rq_frags_info { struct mlx5e_rq_frag_info arr[MLX5E_MAX_RX_FRAGS]; u8 num_frags; u8 log_num_frags; - u8 wqe_bulk; + u16 wqe_bulk; + u16 refill_unit; u8 wqe_index_mask; }; struct mlx5e_dma_info { dma_addr_t addr; - struct page *page; + union { + struct mlx5e_frag_page *frag_page; + struct page *page; + }; }; struct mlx5e_shampo_hd { u32 mkey; struct mlx5e_dma_info *info; - struct page *last_page; + struct mlx5e_frag_page *pages; + u16 curr_page_index; u16 hd_per_wq; u16 hd_per_wqe; unsigned long *bitmap; @@ -693,7 +676,7 @@ struct mlx5e_rq { struct { struct mlx5_wq_cyc wq; struct mlx5e_wqe_frag_info *frags; - union mlx5e_alloc_unit *alloc_units; + union mlx5e_alloc_units *alloc_units; struct mlx5e_rq_frags_info info; mlx5e_fp_skb_from_cqe skb_from_cqe; } wqe; @@ -729,7 +712,6 @@ struct mlx5e_rq { struct mlx5e_rq_stats *stats; struct mlx5e_cq cq; struct mlx5e_cq_decomp cqd; - struct mlx5e_page_cache page_cache; struct hwtstamp_config *tstamp; struct mlx5_clock *clock; struct mlx5e_icosq *icosq; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index a21bd1179477..ef546ed8b4d9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -253,17 +253,20 @@ static u32 mlx5e_rx_get_linear_stride_sz(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk, bool mpwqe) { + u32 sz; + /* XSK frames are mapped as individual pages, because frames may come in * an arbitrary order from random locations in the UMEM. */ if (xsk) return mpwqe ? 1 << mlx5e_mpwrq_page_shift(mdev, xsk) : PAGE_SIZE; - /* XDP in mlx5e doesn't support multiple packets per page. */ - if (params->xdp_prog) - return PAGE_SIZE; + sz = roundup_pow_of_two(mlx5e_rx_get_linear_sz_skb(params, false)); - return roundup_pow_of_two(mlx5e_rx_get_linear_sz_skb(params, false)); + /* XDP in mlx5e doesn't support multiple packets per page. + * Do not assume sz <= PAGE_SIZE if params->xdp_prog is set. + */ + return params->xdp_prog && sz < PAGE_SIZE ? PAGE_SIZE : sz; } static u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5_core_dev *mdev, @@ -320,6 +323,20 @@ static bool mlx5e_verify_rx_mpwqe_strides(struct mlx5_core_dev *mdev, return log_num_strides >= MLX5_MPWQE_LOG_NUM_STRIDES_BASE; } +bool mlx5e_verify_params_rx_mpwqe_strides(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + u8 log_wqe_num_of_strides = mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk); + u8 log_wqe_stride_size = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk); + enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); + u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); + + return mlx5e_verify_rx_mpwqe_strides(mdev, log_wqe_stride_size, + log_wqe_num_of_strides, + page_shift, umr_mode); +} + bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) @@ -402,6 +419,10 @@ u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev, if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk)) return order_base_2(mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, true)); + /* XDP in mlx5e doesn't support multiple packets per page. */ + if (params->xdp_prog) + return PAGE_SHIFT; + return MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev); } @@ -553,7 +574,7 @@ bool slow_pci_heuristic(struct mlx5_core_dev *mdev) u32 link_speed = 0; u32 pci_bw = 0; - mlx5e_port_max_linkspeed(mdev, &link_speed); + mlx5_port_max_linkspeed(mdev, &link_speed); pci_bw = pcie_bandwidth_available(mdev->pdev, NULL, NULL, NULL); mlx5_core_dbg_once(mdev, "Max link speed = %d, PCI BW = %d\n", link_speed, pci_bw); @@ -572,9 +593,6 @@ int mlx5e_mpwrq_validate_regular(struct mlx5_core_dev *mdev, struct mlx5e_params if (!mlx5e_check_fragmented_striding_rq_cap(mdev, page_shift, umr_mode)) return -EOPNOTSUPP; - if (params->xdp_prog && !mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL)) - return -EINVAL; - return 0; } @@ -667,6 +685,48 @@ static int mlx5e_max_nonlinear_mtu(int first_frag_size, int frag_size, bool xdp) return first_frag_size + (MLX5E_MAX_RX_FRAGS - 2) * frag_size + PAGE_SIZE; } +static void mlx5e_rx_compute_wqe_bulk_params(struct mlx5e_params *params, + struct mlx5e_rq_frags_info *info) +{ + u16 bulk_bound_rq_size = (1 << params->log_rq_mtu_frames) / 4; + u32 bulk_bound_rq_size_in_bytes; + u32 sum_frag_strides = 0; + u32 wqe_bulk_in_bytes; + u16 split_factor; + u32 wqe_bulk; + int i; + + for (i = 0; i < info->num_frags; i++) + sum_frag_strides += info->arr[i].frag_stride; + + /* For MTUs larger than PAGE_SIZE, align to PAGE_SIZE to reflect + * amount of consumed pages per wqe in bytes. + */ + if (sum_frag_strides > PAGE_SIZE) + sum_frag_strides = ALIGN(sum_frag_strides, PAGE_SIZE); + + bulk_bound_rq_size_in_bytes = bulk_bound_rq_size * sum_frag_strides; + +#define MAX_WQE_BULK_BYTES(xdp) ((xdp ? 256 : 512) * 1024) + + /* A WQE bulk should not exceed min(512KB, 1/4 of rq size). For XDP + * keep bulk size smaller to avoid filling the page_pool cache on + * every bulk refill. + */ + wqe_bulk_in_bytes = min_t(u32, MAX_WQE_BULK_BYTES(params->xdp_prog), + bulk_bound_rq_size_in_bytes); + wqe_bulk = DIV_ROUND_UP(wqe_bulk_in_bytes, sum_frag_strides); + + /* Make sure that allocations don't start when the page is still used + * by older WQEs. + */ + info->wqe_bulk = max_t(u16, info->wqe_index_mask + 1, wqe_bulk); + + split_factor = DIV_ROUND_UP(MAX_WQE_BULK_BYTES(params->xdp_prog), + PP_ALLOC_CACHE_REFILL * PAGE_SIZE); + info->refill_unit = DIV_ROUND_UP(info->wqe_bulk, split_factor); +} + #define DEFAULT_FRAG_SIZE (2048) static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, @@ -774,11 +834,14 @@ static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, } out: - /* Bulking optimization to skip allocation until at least 8 WQEs can be - * allocated in a row. At the same time, never start allocation when - * the page is still used by older WQEs. + /* Bulking optimization to skip allocation until a large enough number + * of WQEs can be allocated in a row. Bulking also influences how well + * deferred page release works. */ - info->wqe_bulk = max_t(u8, info->wqe_index_mask + 1, 8); + mlx5e_rx_compute_wqe_bulk_params(params, info); + + mlx5_core_dbg(mdev, "%s: wqe_bulk = %u, wqe_bulk_refill_unit = %u\n", + __func__, info->wqe_bulk, info->refill_unit); info->log_num_frags = order_base_2(info->num_frags); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index c9be6eb88012..a5d20f6d6d9c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -153,6 +153,9 @@ int mlx5e_build_channel_param(struct mlx5_core_dev *mdev, u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params); int mlx5e_validate_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params); +bool mlx5e_verify_params_rx_mpwqe_strides(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); static inline void mlx5e_params_print_info(struct mlx5_core_dev *mdev, struct mlx5e_params *params) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c index 505ba41195b9..dbe2b19a9570 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c @@ -32,101 +32,6 @@ #include "port.h" -/* speed in units of 1Mb */ -static const u32 mlx5e_link_speed[MLX5E_LINK_MODES_NUMBER] = { - [MLX5E_1000BASE_CX_SGMII] = 1000, - [MLX5E_1000BASE_KX] = 1000, - [MLX5E_10GBASE_CX4] = 10000, - [MLX5E_10GBASE_KX4] = 10000, - [MLX5E_10GBASE_KR] = 10000, - [MLX5E_20GBASE_KR2] = 20000, - [MLX5E_40GBASE_CR4] = 40000, - [MLX5E_40GBASE_KR4] = 40000, - [MLX5E_56GBASE_R4] = 56000, - [MLX5E_10GBASE_CR] = 10000, - [MLX5E_10GBASE_SR] = 10000, - [MLX5E_10GBASE_ER] = 10000, - [MLX5E_40GBASE_SR4] = 40000, - [MLX5E_40GBASE_LR4] = 40000, - [MLX5E_50GBASE_SR2] = 50000, - [MLX5E_100GBASE_CR4] = 100000, - [MLX5E_100GBASE_SR4] = 100000, - [MLX5E_100GBASE_KR4] = 100000, - [MLX5E_100GBASE_LR4] = 100000, - [MLX5E_100BASE_TX] = 100, - [MLX5E_1000BASE_T] = 1000, - [MLX5E_10GBASE_T] = 10000, - [MLX5E_25GBASE_CR] = 25000, - [MLX5E_25GBASE_KR] = 25000, - [MLX5E_25GBASE_SR] = 25000, - [MLX5E_50GBASE_CR2] = 50000, - [MLX5E_50GBASE_KR2] = 50000, -}; - -static const u32 mlx5e_ext_link_speed[MLX5E_EXT_LINK_MODES_NUMBER] = { - [MLX5E_SGMII_100M] = 100, - [MLX5E_1000BASE_X_SGMII] = 1000, - [MLX5E_5GBASE_R] = 5000, - [MLX5E_10GBASE_XFI_XAUI_1] = 10000, - [MLX5E_40GBASE_XLAUI_4_XLPPI_4] = 40000, - [MLX5E_25GAUI_1_25GBASE_CR_KR] = 25000, - [MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2] = 50000, - [MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR] = 50000, - [MLX5E_CAUI_4_100GBASE_CR4_KR4] = 100000, - [MLX5E_100GAUI_2_100GBASE_CR2_KR2] = 100000, - [MLX5E_200GAUI_4_200GBASE_CR4_KR4] = 200000, - [MLX5E_400GAUI_8] = 400000, - [MLX5E_100GAUI_1_100GBASE_CR_KR] = 100000, - [MLX5E_200GAUI_2_200GBASE_CR2_KR2] = 200000, - [MLX5E_400GAUI_4_400GBASE_CR4_KR4] = 400000, -}; - -bool mlx5e_ptys_ext_supported(struct mlx5_core_dev *mdev) -{ - struct mlx5e_port_eth_proto eproto; - int err; - - if (MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet)) - return true; - - err = mlx5_port_query_eth_proto(mdev, 1, true, &eproto); - if (err) - return false; - - return !!eproto.cap; -} - -static void mlx5e_port_get_speed_arr(struct mlx5_core_dev *mdev, - const u32 **arr, u32 *size, - bool force_legacy) -{ - bool ext = force_legacy ? false : mlx5e_ptys_ext_supported(mdev); - - *size = ext ? ARRAY_SIZE(mlx5e_ext_link_speed) : - ARRAY_SIZE(mlx5e_link_speed); - *arr = ext ? mlx5e_ext_link_speed : mlx5e_link_speed; -} - -int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext, - struct mlx5e_port_eth_proto *eproto) -{ - u32 out[MLX5_ST_SZ_DW(ptys_reg)]; - int err; - - if (!eproto) - return -EINVAL; - - err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, port); - if (err) - return err; - - eproto->cap = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, - eth_proto_capability); - eproto->admin = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_admin); - eproto->oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_oper); - return 0; -} - void mlx5_port_query_eth_autoneg(struct mlx5_core_dev *dev, u8 *an_status, u8 *an_disable_cap, u8 *an_disable_admin) { @@ -172,30 +77,14 @@ int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable, sizeof(out), MLX5_REG_PTYS, 0, 1); } -u32 mlx5e_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper, - bool force_legacy) -{ - unsigned long temp = eth_proto_oper; - const u32 *table; - u32 speed = 0; - u32 max_size; - int i; - - mlx5e_port_get_speed_arr(mdev, &table, &max_size, force_legacy); - i = find_first_bit(&temp, max_size); - if (i < max_size) - speed = table[i]; - return speed; -} - int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) { - struct mlx5e_port_eth_proto eproto; + struct mlx5_port_eth_proto eproto; bool force_legacy = false; bool ext; int err; - ext = mlx5e_ptys_ext_supported(mdev); + ext = mlx5_ptys_ext_supported(mdev); err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto); if (err) goto out; @@ -205,7 +94,7 @@ int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) if (err) goto out; } - *speed = mlx5e_port_ptys2speed(mdev, eproto.oper, force_legacy); + *speed = mlx5_port_ptys2speed(mdev, eproto.oper, force_legacy); if (!(*speed)) err = -EINVAL; @@ -213,46 +102,6 @@ out: return err; } -int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) -{ - struct mlx5e_port_eth_proto eproto; - u32 max_speed = 0; - const u32 *table; - u32 max_size; - bool ext; - int err; - int i; - - ext = mlx5e_ptys_ext_supported(mdev); - err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto); - if (err) - return err; - - mlx5e_port_get_speed_arr(mdev, &table, &max_size, false); - for (i = 0; i < max_size; ++i) - if (eproto.cap & MLX5E_PROT_MASK(i)) - max_speed = max(max_speed, table[i]); - - *speed = max_speed; - return 0; -} - -u32 mlx5e_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed, - bool force_legacy) -{ - u32 link_modes = 0; - const u32 *table; - u32 max_size; - int i; - - mlx5e_port_get_speed_arr(mdev, &table, &max_size, force_legacy); - for (i = 0; i < max_size; ++i) { - if (table[i] == speed) - link_modes |= MLX5E_PROT_MASK(i); - } - return link_modes; -} - int mlx5e_port_query_pbmc(struct mlx5_core_dev *mdev, void *out) { int sz = MLX5_ST_SZ_BYTES(pbmc_reg); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h index 3f474e370828..d1da225f35da 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h @@ -36,25 +36,11 @@ #include <linux/mlx5/driver.h> #include "en.h" -struct mlx5e_port_eth_proto { - u32 cap; - u32 admin; - u32 oper; -}; - -int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext, - struct mlx5e_port_eth_proto *eproto); void mlx5_port_query_eth_autoneg(struct mlx5_core_dev *dev, u8 *an_status, u8 *an_disable_cap, u8 *an_disable_admin); int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable, u32 proto_admin, bool ext); -u32 mlx5e_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper, - bool force_legacy); int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); -int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); -u32 mlx5e_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed, - bool force_legacy); -bool mlx5e_ptys_ext_supported(struct mlx5_core_dev *mdev); int mlx5e_port_query_pbmc(struct mlx5_core_dev *mdev, void *out); int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in); int mlx5e_port_query_sbpr(struct mlx5_core_dev *mdev, u32 desc, u8 dir, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index eb5aeba3addf..eb5abd0e55d9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -81,23 +81,23 @@ void mlx5e_skb_cb_hwtstamp_handler(struct sk_buff *skb, int hwtstamp_type, #define PTP_WQE_CTR2IDX(val) ((val) & ptpsq->ts_cqe_ctr_mask) -static bool mlx5e_ptp_ts_cqe_drop(struct mlx5e_ptpsq *ptpsq, u16 skb_cc, u16 skb_id) +static bool mlx5e_ptp_ts_cqe_drop(struct mlx5e_ptpsq *ptpsq, u16 skb_ci, u16 skb_id) { - return (ptpsq->ts_cqe_ctr_mask && (skb_cc != skb_id)); + return (ptpsq->ts_cqe_ctr_mask && (skb_ci != skb_id)); } static bool mlx5e_ptp_ts_cqe_ooo(struct mlx5e_ptpsq *ptpsq, u16 skb_id) { - u16 skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc); - u16 skb_pc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_pc); + u16 skb_ci = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc); + u16 skb_pi = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_pc); - if (PTP_WQE_CTR2IDX(skb_id - skb_cc) >= PTP_WQE_CTR2IDX(skb_pc - skb_cc)) + if (PTP_WQE_CTR2IDX(skb_id - skb_ci) >= PTP_WQE_CTR2IDX(skb_pi - skb_ci)) return true; return false; } -static void mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq *ptpsq, u16 skb_cc, +static void mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq *ptpsq, u16 skb_ci, u16 skb_id, int budget) { struct skb_shared_hwtstamps hwts = {}; @@ -105,13 +105,13 @@ static void mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq *ptpsq, u16 skb_ ptpsq->cq_stats->resync_event++; - while (skb_cc != skb_id) { + while (skb_ci != skb_id) { skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo); hwts.hwtstamp = mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp; skb_tstamp_tx(skb, &hwts); ptpsq->cq_stats->resync_cqe++; napi_consume_skb(skb, budget); - skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc); + skb_ci = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc); } } @@ -120,7 +120,7 @@ static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq, int budget) { u16 skb_id = PTP_WQE_CTR2IDX(be16_to_cpu(cqe->wqe_counter)); - u16 skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc); + u16 skb_ci = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc); struct mlx5e_txqsq *sq = &ptpsq->txqsq; struct sk_buff *skb; ktime_t hwtstamp; @@ -131,13 +131,13 @@ static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq, goto out; } - if (mlx5e_ptp_ts_cqe_drop(ptpsq, skb_cc, skb_id)) { + if (mlx5e_ptp_ts_cqe_drop(ptpsq, skb_ci, skb_id)) { if (mlx5e_ptp_ts_cqe_ooo(ptpsq, skb_id)) { /* already handled by a previous resync */ ptpsq->cq_stats->ooo_cqe_drop++; return; } - mlx5e_ptp_skb_fifo_ts_cqe_resync(ptpsq, skb_cc, skb_id, budget); + mlx5e_ptp_skb_fifo_ts_cqe_resync(ptpsq, skb_ci, skb_id, budget); } skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c index ce85b48d327d..fd191925ab4b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c @@ -220,6 +220,7 @@ mlx5_esw_bridge_port_obj_add(struct net_device *dev, struct netlink_ext_ack *extack = switchdev_notifier_info_to_extack(&port_obj_info->info); const struct switchdev_obj *obj = port_obj_info->obj; const struct switchdev_obj_port_vlan *vlan; + const struct switchdev_obj_port_mdb *mdb; u16 vport_num, esw_owner_vhca_id; int err; @@ -235,6 +236,11 @@ mlx5_esw_bridge_port_obj_add(struct net_device *dev, err = mlx5_esw_bridge_port_vlan_add(vport_num, esw_owner_vhca_id, vlan->vid, vlan->flags, br_offloads, extack); break; + case SWITCHDEV_OBJ_ID_PORT_MDB: + mdb = SWITCHDEV_OBJ_PORT_MDB(obj); + err = mlx5_esw_bridge_port_mdb_add(dev, vport_num, esw_owner_vhca_id, mdb->addr, + mdb->vid, br_offloads, extack); + break; default: return -EOPNOTSUPP; } @@ -248,6 +254,7 @@ mlx5_esw_bridge_port_obj_del(struct net_device *dev, { const struct switchdev_obj *obj = port_obj_info->obj; const struct switchdev_obj_port_vlan *vlan; + const struct switchdev_obj_port_mdb *mdb; u16 vport_num, esw_owner_vhca_id; if (!mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, br_offloads->esw, &vport_num, @@ -261,6 +268,11 @@ mlx5_esw_bridge_port_obj_del(struct net_device *dev, vlan = SWITCHDEV_OBJ_PORT_VLAN(obj); mlx5_esw_bridge_port_vlan_del(vport_num, esw_owner_vhca_id, vlan->vid, br_offloads); break; + case SWITCHDEV_OBJ_ID_PORT_MDB: + mdb = SWITCHDEV_OBJ_PORT_MDB(obj); + mlx5_esw_bridge_port_mdb_del(dev, vport_num, esw_owner_vhca_id, mdb->addr, mdb->vid, + br_offloads); + break; default: return -EOPNOTSUPP; } @@ -306,6 +318,10 @@ mlx5_esw_bridge_port_obj_attr_set(struct net_device *dev, attr->u.vlan_protocol, br_offloads); break; + case SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED: + err = mlx5_esw_bridge_mcast_set(vport_num, esw_owner_vhca_id, + !attr->u.mc_disabled, br_offloads); + break; default: err = -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c index 8f7452dc00ee..b5c773ffc763 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c @@ -426,39 +426,58 @@ static bool mlx5e_rep_macvlan_mode_supported(const struct net_device *dev) return macvlan->mode == MACVLAN_MODE_PASSTHRU; } -static int -mlx5e_rep_indr_setup_block(struct net_device *netdev, struct Qdisc *sch, - struct mlx5e_rep_priv *rpriv, - struct flow_block_offload *f, - flow_setup_cb_t *setup_cb, - void *data, - void (*cleanup)(struct flow_block_cb *block_cb)) +static bool +mlx5e_rep_check_indr_block_supported(struct mlx5e_rep_priv *rpriv, + struct net_device *netdev, + struct flow_block_offload *f) { struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - bool is_ovs_int_port = netif_is_ovs_master(netdev); - struct mlx5e_rep_indr_block_priv *indr_priv; - struct flow_block_cb *block_cb; + struct net_device *macvlan_real_dev; - if (!mlx5e_tc_tun_device_to_offload(priv, netdev) && - !(is_vlan_dev(netdev) && vlan_dev_real_dev(netdev) == rpriv->netdev) && - !is_ovs_int_port) { - if (!(netif_is_macvlan(netdev) && macvlan_dev_real_dev(netdev) == rpriv->netdev)) - return -EOPNOTSUPP; + if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS && + f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS) + return false; + + if (mlx5e_tc_tun_device_to_offload(priv, netdev)) + return true; + + if (is_vlan_dev(netdev) && vlan_dev_real_dev(netdev) == rpriv->netdev) + return true; + + if (netif_is_macvlan(netdev)) { if (!mlx5e_rep_macvlan_mode_supported(netdev)) { netdev_warn(netdev, "Offloading ingress filter is supported only with macvlan passthru mode"); - return -EOPNOTSUPP; + return false; } + + macvlan_real_dev = macvlan_dev_real_dev(netdev); + + if (macvlan_real_dev == rpriv->netdev) + return true; + if (netif_is_bond_master(macvlan_real_dev)) + return true; } - if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS && - f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS) - return -EOPNOTSUPP; + if (netif_is_ovs_master(netdev) && f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS && + mlx5e_tc_int_port_supported(esw)) + return true; - if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS && !is_ovs_int_port) - return -EOPNOTSUPP; + return false; +} - if (is_ovs_int_port && !mlx5e_tc_int_port_supported(esw)) +static int +mlx5e_rep_indr_setup_block(struct net_device *netdev, struct Qdisc *sch, + struct mlx5e_rep_priv *rpriv, + struct flow_block_offload *f, + flow_setup_cb_t *setup_cb, + void *data, + void (*cleanup)(struct flow_block_cb *block_cb)) +{ + struct mlx5e_rep_indr_block_priv *indr_priv; + struct flow_block_cb *block_cb; + + if (!mlx5e_rep_check_indr_block_supported(rpriv, netdev, f)) return -EOPNOTSUPP; f->unlocked_driver_cb = true; @@ -715,5 +734,6 @@ forward: return; free_skb: + dev_put(tc_priv.fwd_dev); dev_kfree_skb_any(skb); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index c462fe76495b..e8eea9ffd5eb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -8,6 +8,19 @@ #include "ptp.h" #include "lib/tout.h" +/* Keep this string array consistent with the MLX5E_RQ_STATE_* enums in en.h */ +static const char * const rq_sw_state_type_name[] = { + [MLX5E_RQ_STATE_ENABLED] = "enabled", + [MLX5E_RQ_STATE_RECOVERING] = "recovering", + [MLX5E_RQ_STATE_DIM] = "dim", + [MLX5E_RQ_STATE_NO_CSUM_COMPLETE] = "no_csum_complete", + [MLX5E_RQ_STATE_CSUM_FULL] = "csum_full", + [MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX] = "mini_cqe_hw_stridx", + [MLX5E_RQ_STATE_SHAMPO] = "shampo", + [MLX5E_RQ_STATE_MINI_CQE_ENHANCED] = "mini_cqe_enhanced", + [MLX5E_RQ_STATE_XSK] = "xsk", +}; + static int mlx5e_query_rq_state(struct mlx5_core_dev *dev, u32 rqn, u8 *state) { int outlen = MLX5_ST_SZ_BYTES(query_rq_out); @@ -108,9 +121,9 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx) mlx5e_reset_icosq_cc_pc(icosq); - mlx5e_free_rx_in_progress_descs(rq); + mlx5e_free_rx_missing_descs(rq); if (xskrq) - mlx5e_free_rx_in_progress_descs(xskrq); + mlx5e_free_rx_missing_descs(xskrq); clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state); mlx5e_activate_icosq(icosq); @@ -239,6 +252,27 @@ static int mlx5e_reporter_icosq_diagnose(struct mlx5e_icosq *icosq, u8 hw_state, return mlx5e_health_fmsg_named_obj_nest_end(fmsg); } +static int mlx5e_health_rq_put_sw_state(struct devlink_fmsg *fmsg, struct mlx5e_rq *rq) +{ + int err; + int i; + + BUILD_BUG_ON_MSG(ARRAY_SIZE(rq_sw_state_type_name) != MLX5E_NUM_RQ_STATES, + "rq_sw_state_type_name string array must be consistent with MLX5E_RQ_STATE_* enum in en.h"); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SW State"); + if (err) + return err; + + for (i = 0; i < ARRAY_SIZE(rq_sw_state_type_name); ++i) { + err = devlink_fmsg_u32_pair_put(fmsg, rq_sw_state_type_name[i], + test_bit(i, &rq->state)); + if (err) + return err; + } + + return mlx5e_health_fmsg_named_obj_nest_end(fmsg); +} + static int mlx5e_rx_reporter_build_diagnose_output_rq_common(struct mlx5e_rq *rq, struct devlink_fmsg *fmsg) @@ -265,10 +299,6 @@ mlx5e_rx_reporter_build_diagnose_output_rq_common(struct mlx5e_rq *rq, if (err) return err; - err = devlink_fmsg_u8_pair_put(fmsg, "SW state", rq->state); - if (err) - return err; - err = devlink_fmsg_u32_pair_put(fmsg, "WQE counter", wqe_counter); if (err) return err; @@ -281,6 +311,10 @@ mlx5e_rx_reporter_build_diagnose_output_rq_common(struct mlx5e_rq *rq, if (err) return err; + err = mlx5e_health_rq_put_sw_state(fmsg, rq); + if (err) + return err; + err = mlx5e_health_cq_diag_fmsg(&rq->cq, fmsg); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index 34666e2b3871..b35ff289af49 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -6,6 +6,19 @@ #include "en/devlink.h" #include "lib/tout.h" +/* Keep this string array consistent with the MLX5E_SQ_STATE_* enums in en.h */ +static const char * const sq_sw_state_type_name[] = { + [MLX5E_SQ_STATE_ENABLED] = "enabled", + [MLX5E_SQ_STATE_MPWQE] = "mpwqe", + [MLX5E_SQ_STATE_RECOVERING] = "recovering", + [MLX5E_SQ_STATE_IPSEC] = "ipsec", + [MLX5E_SQ_STATE_DIM] = "dim", + [MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE] = "vlan_need_l2_inline", + [MLX5E_SQ_STATE_PENDING_XSK_TX] = "pending_xsk_tx", + [MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC] = "pending_tls_rx_resync", + [MLX5E_SQ_STATE_XDP_MULTIBUF] = "xdp_multibuf", +}; + static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq) { struct mlx5_core_dev *dev = sq->mdev; @@ -37,6 +50,27 @@ static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq) sq->pc = 0; } +static int mlx5e_health_sq_put_sw_state(struct devlink_fmsg *fmsg, struct mlx5e_txqsq *sq) +{ + int err; + int i; + + BUILD_BUG_ON_MSG(ARRAY_SIZE(sq_sw_state_type_name) != MLX5E_NUM_SQ_STATES, + "sq_sw_state_type_name string array must be consistent with MLX5E_SQ_STATE_* enum in en.h"); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SW State"); + if (err) + return err; + + for (i = 0; i < ARRAY_SIZE(sq_sw_state_type_name); ++i) { + err = devlink_fmsg_u32_pair_put(fmsg, sq_sw_state_type_name[i], + test_bit(i, &sq->state)); + if (err) + return err; + } + + return mlx5e_health_fmsg_named_obj_nest_end(fmsg); +} + static int mlx5e_tx_reporter_err_cqe_recover(void *ctx) { struct mlx5_core_dev *mdev; @@ -190,6 +224,10 @@ mlx5e_tx_reporter_build_diagnose_output_sq_common(struct devlink_fmsg *fmsg, if (err) return err; + err = mlx5e_health_sq_put_sw_state(fmsg, sq); + if (err) + return err; + err = mlx5e_health_cq_diag_fmsg(&sq->cq, fmsg); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c index a278f52d52b0..9db1b5307a8d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c @@ -4,15 +4,6 @@ #include "act.h" #include "en/tc_priv.h" -static bool -tc_act_can_offload_accept(struct mlx5e_tc_act_parse_state *parse_state, - const struct flow_action_entry *act, - int act_index, - struct mlx5_flow_attr *attr) -{ - return true; -} - static int tc_act_parse_accept(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, @@ -26,7 +17,6 @@ tc_act_parse_accept(struct mlx5e_tc_act_parse_state *parse_state, } struct mlx5e_tc_act mlx5e_tc_act_accept = { - .can_offload = tc_act_can_offload_accept, .parse_action = tc_act_parse_accept, .is_terminating_action = true, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c index eba0c8698926..fc923a99b6a4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c @@ -82,26 +82,6 @@ mlx5e_tc_act_init_parse_state(struct mlx5e_tc_act_parse_state *parse_state, parse_state->flow_action = flow_action; } -void -mlx5e_tc_act_reorder_flow_actions(struct flow_action *flow_action, - struct mlx5e_tc_flow_action *flow_action_reorder) -{ - struct flow_action_entry *act; - int i, j = 0; - - flow_action_for_each(i, act, flow_action) { - /* Add CT action to be first. */ - if (act->id == FLOW_ACTION_CT) - flow_action_reorder->entries[j++] = act; - } - - flow_action_for_each(i, act, flow_action) { - if (act->id == FLOW_ACTION_CT) - continue; - flow_action_reorder->entries[j++] = act; - } -} - int mlx5e_tc_act_post_parse(struct mlx5e_tc_act_parse_state *parse_state, struct flow_action *flow_action, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h index 8346557eeaf6..0e6e1872ac62 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h @@ -17,8 +17,6 @@ struct mlx5e_tc_act_parse_state { struct mlx5e_tc_flow *flow; struct netlink_ext_ack *extack; u32 actions; - bool ct; - bool ct_clear; bool encap; bool decap; bool mpls_push; @@ -56,6 +54,8 @@ struct mlx5e_tc_act { const struct flow_action_entry *act, struct mlx5_flow_attr *attr); + bool (*is_missable)(const struct flow_action_entry *act); + int (*offload_action)(struct mlx5e_priv *priv, struct flow_offload_action *fl_act, struct flow_action_entry *act); @@ -110,10 +110,6 @@ mlx5e_tc_act_init_parse_state(struct mlx5e_tc_act_parse_state *parse_state, struct flow_action *flow_action, struct netlink_ext_ack *extack); -void -mlx5e_tc_act_reorder_flow_actions(struct flow_action *flow_action, - struct mlx5e_tc_flow_action *flow_action_reorder); - int mlx5e_tc_act_post_parse(struct mlx5e_tc_act_parse_state *parse_state, struct flow_action *flow_action, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c index a829c94289c1..92d3952dfa8b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c @@ -5,53 +5,22 @@ #include "en/tc_priv.h" #include "en/tc_ct.h" -static bool -tc_act_can_offload_ct(struct mlx5e_tc_act_parse_state *parse_state, - const struct flow_action_entry *act, - int act_index, - struct mlx5_flow_attr *attr) -{ - bool clear_action = act->ct.action & TCA_CT_ACT_CLEAR; - struct netlink_ext_ack *extack = parse_state->extack; - - if (parse_state->ct && !clear_action) { - NL_SET_ERR_MSG_MOD(extack, "Multiple CT actions are not supported"); - return false; - } - - return true; -} - static int tc_act_parse_ct(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, struct mlx5e_priv *priv, struct mlx5_flow_attr *attr) { - bool clear_action = act->ct.action & TCA_CT_ACT_CLEAR; int err; - /* It's redundant to do ct clear more than once. */ - if (clear_action && parse_state->ct_clear) - return 0; - - err = mlx5_tc_ct_parse_action(parse_state->ct_priv, attr, - &attr->parse_attr->mod_hdr_acts, - act, parse_state->extack); + err = mlx5_tc_ct_parse_action(parse_state->ct_priv, attr, act, parse_state->extack); if (err) return err; - if (mlx5e_is_eswitch_flow(parse_state->flow)) attr->esw_attr->split_count = attr->esw_attr->out_count; - if (clear_action) { - parse_state->ct_clear = true; - } else { - attr->flags |= MLX5_ATTR_FLAG_CT; - flow_flag_set(parse_state->flow, CT); - parse_state->ct = true; - } + attr->flags |= MLX5_ATTR_FLAG_CT; return 0; } @@ -61,27 +30,10 @@ tc_act_post_parse_ct(struct mlx5e_tc_act_parse_state *parse_state, struct mlx5e_priv *priv, struct mlx5_flow_attr *attr) { - struct mlx5e_tc_mod_hdr_acts *mod_acts = &attr->parse_attr->mod_hdr_acts; - int err; - - /* If ct action exist, we can ignore previous ct_clear actions */ - if (parse_state->ct) + if (!(attr->flags & MLX5_ATTR_FLAG_CT)) return 0; - if (parse_state->ct_clear) { - err = mlx5_tc_ct_set_ct_clear_regs(parse_state->ct_priv, mod_acts); - if (err) { - NL_SET_ERR_MSG_MOD(parse_state->extack, - "Failed to set registers for ct clear"); - return err; - } - attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - - /* Prevent handling of additional, redundant clear actions */ - parse_state->ct_clear = false; - } - - return 0; + return mlx5_tc_ct_flow_offload(parse_state->ct_priv, attr); } static bool @@ -95,10 +47,16 @@ tc_act_is_multi_table_act_ct(struct mlx5e_priv *priv, return true; } +static bool +tc_act_is_missable_ct(const struct flow_action_entry *act) +{ + return !(act->ct.action & TCA_CT_ACT_CLEAR); +} + struct mlx5e_tc_act mlx5e_tc_act_ct = { - .can_offload = tc_act_can_offload_ct, .parse_action = tc_act_parse_ct, - .is_multi_table_act = tc_act_is_multi_table_act_ct, .post_parse = tc_act_post_parse_ct, + .is_multi_table_act = tc_act_is_multi_table_act_ct, + .is_missable = tc_act_is_missable_ct, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c index 7d16aeabb119..5dc81715d625 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c @@ -4,15 +4,6 @@ #include "act.h" #include "en/tc_priv.h" -static bool -tc_act_can_offload_drop(struct mlx5e_tc_act_parse_state *parse_state, - const struct flow_action_entry *act, - int act_index, - struct mlx5_flow_attr *attr) -{ - return true; -} - static int tc_act_parse_drop(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, @@ -25,7 +16,6 @@ tc_act_parse_drop(struct mlx5e_tc_act_parse_state *parse_state, } struct mlx5e_tc_act mlx5e_tc_act_drop = { - .can_offload = tc_act_can_offload_drop, .parse_action = tc_act_parse_drop, .is_terminating_action = true, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c index 07cc65596f89..291193f7120d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c @@ -234,6 +234,9 @@ parse_mirred(struct mlx5e_tc_act_parse_state *parse_state, if (mlx5_lag_mpesw_do_mirred(priv->mdev, out_dev, extack)) return -EOPNOTSUPP; + if (netif_is_macvlan(out_dev)) + out_dev = macvlan_dev_real_dev(out_dev); + out_dev = get_fdb_out_dev(uplink_dev, out_dev); if (!out_dev) return -ENODEV; @@ -250,9 +253,6 @@ parse_mirred(struct mlx5e_tc_act_parse_state *parse_state, return err; } - if (netif_is_macvlan(out_dev)) - out_dev = macvlan_dev_real_dev(out_dev); - err = verify_uplink_forwarding(priv, attr, out_dev, extack); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c index 47597c524e59..3b272bbf4c53 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c @@ -78,15 +78,6 @@ out_err: return err; } -static bool -tc_act_can_offload_pedit(struct mlx5e_tc_act_parse_state *parse_state, - const struct flow_action_entry *act, - int act_index, - struct mlx5_flow_attr *attr) -{ - return true; -} - static int tc_act_parse_pedit(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, @@ -114,6 +105,5 @@ tc_act_parse_pedit(struct mlx5e_tc_act_parse_state *parse_state, } struct mlx5e_tc_act mlx5e_tc_act_pedit = { - .can_offload = tc_act_can_offload_pedit, .parse_action = tc_act_parse_pedit, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c index 6454b031ff7a..80b4bc64380a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c @@ -4,15 +4,6 @@ #include "act.h" #include "en/tc_priv.h" -static bool -tc_act_can_offload_ptype(struct mlx5e_tc_act_parse_state *parse_state, - const struct flow_action_entry *act, - int act_index, - struct mlx5_flow_attr *attr) -{ - return true; -} - static int tc_act_parse_ptype(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, @@ -31,6 +22,5 @@ tc_act_parse_ptype(struct mlx5e_tc_act_parse_state *parse_state, } struct mlx5e_tc_act mlx5e_tc_act_ptype = { - .can_offload = tc_act_can_offload_ptype, .parse_action = tc_act_parse_ptype, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c index 2c0196431302..2df02f99cecf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c @@ -6,25 +6,6 @@ #include "en/tc_priv.h" #include "en/tc/act/sample.h" -static bool -tc_act_can_offload_sample(struct mlx5e_tc_act_parse_state *parse_state, - const struct flow_action_entry *act, - int act_index, - struct mlx5_flow_attr *attr) -{ - struct netlink_ext_ack *extack = parse_state->extack; - bool ct_nat; - - ct_nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT; - - if (flow_flag_test(parse_state->flow, CT) && ct_nat) { - NL_SET_ERR_MSG_MOD(extack, "Sample action with CT NAT is not supported"); - return false; - } - - return true; -} - static int tc_act_parse_sample(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, @@ -65,7 +46,6 @@ tc_act_is_multi_table_act_sample(struct mlx5e_priv *priv, } struct mlx5e_tc_act mlx5e_tc_act_sample = { - .can_offload = tc_act_can_offload_sample, .parse_action = tc_act_parse_sample, .is_multi_table_act = tc_act_is_multi_table_act_sample, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c index 915ce201aeb2..1b78bd9c106a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c @@ -5,15 +5,6 @@ #include "en/tc_priv.h" #include "eswitch.h" -static bool -tc_act_can_offload_trap(struct mlx5e_tc_act_parse_state *parse_state, - const struct flow_action_entry *act, - int act_index, - struct mlx5_flow_attr *attr) -{ - return true; -} - static int tc_act_parse_trap(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, @@ -27,6 +18,5 @@ tc_act_parse_trap(struct mlx5e_tc_act_parse_state *parse_state, } struct mlx5e_tc_act mlx5e_tc_act_trap = { - .can_offload = tc_act_can_offload_trap, .parse_action = tc_act_parse_trap, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c index b4fa2de9711d..f1cae21c2c37 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c @@ -32,15 +32,6 @@ tc_act_parse_tun_encap(struct mlx5e_tc_act_parse_state *parse_state, return 0; } -static bool -tc_act_can_offload_tun_decap(struct mlx5e_tc_act_parse_state *parse_state, - const struct flow_action_entry *act, - int act_index, - struct mlx5_flow_attr *attr) -{ - return true; -} - static int tc_act_parse_tun_decap(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, @@ -58,6 +49,5 @@ struct mlx5e_tc_act mlx5e_tc_act_tun_encap = { }; struct mlx5e_tc_act mlx5e_tc_act_tun_decap = { - .can_offload = tc_act_can_offload_tun_decap, .parse_action = tc_act_parse_tun_decap, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c index 2e0d88b513aa..c8a3eaf189f6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c @@ -141,15 +141,6 @@ mlx5e_tc_act_vlan_add_pop_action(struct mlx5e_priv *priv, return err; } -static bool -tc_act_can_offload_vlan(struct mlx5e_tc_act_parse_state *parse_state, - const struct flow_action_entry *act, - int act_index, - struct mlx5_flow_attr *attr) -{ - return true; -} - static int tc_act_parse_vlan(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, @@ -205,7 +196,6 @@ tc_act_post_parse_vlan(struct mlx5e_tc_act_parse_state *parse_state, } struct mlx5e_tc_act mlx5e_tc_act_vlan = { - .can_offload = tc_act_can_offload_vlan, .parse_action = tc_act_parse_vlan, .post_parse = tc_act_post_parse_vlan, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c index 9a8a1a6bd99e..310b99230760 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c @@ -50,15 +50,6 @@ mlx5e_tc_act_vlan_add_rewrite_action(struct mlx5e_priv *priv, int namespace, return err; } -static bool -tc_act_can_offload_vlan_mangle(struct mlx5e_tc_act_parse_state *parse_state, - const struct flow_action_entry *act, - int act_index, - struct mlx5_flow_attr *attr) -{ - return true; -} - static int tc_act_parse_vlan_mangle(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, @@ -81,6 +72,5 @@ tc_act_parse_vlan_mangle(struct mlx5e_tc_act_parse_state *parse_state, } struct mlx5e_tc_act mlx5e_tc_act_vlan_mangle = { - .can_offload = tc_act_can_offload_vlan_mangle, .parse_action = tc_act_parse_vlan_mangle, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c index 4e48946c4c2a..0290e0dea539 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c @@ -106,22 +106,17 @@ err_rule: } struct mlx5e_post_act_handle * -mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *attr) +mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *post_attr) { - u32 attr_sz = ns_to_attr_sz(post_act->ns_type); struct mlx5e_post_act_handle *handle; - struct mlx5_flow_attr *post_attr; int err; handle = kzalloc(sizeof(*handle), GFP_KERNEL); - post_attr = mlx5_alloc_flow_attr(post_act->ns_type); - if (!handle || !post_attr) { - kfree(post_attr); + if (!handle) { kfree(handle); return ERR_PTR(-ENOMEM); } - memcpy(post_attr, attr, attr_sz); post_attr->chain = 0; post_attr->prio = 0; post_attr->ft = post_act->ft; @@ -145,7 +140,6 @@ mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *at return handle; err_xarray: - kfree(post_attr); kfree(handle); return ERR_PTR(err); } @@ -164,7 +158,6 @@ mlx5e_tc_post_act_del(struct mlx5e_post_act *post_act, struct mlx5e_post_act_han if (!IS_ERR_OR_NULL(handle->rule)) mlx5e_tc_post_act_unoffload(post_act, handle); xa_erase(&post_act->ids, handle->id); - kfree(handle->attr); kfree(handle); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h index f476774c0b75..40b8df184af5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h @@ -19,7 +19,7 @@ void mlx5e_tc_post_act_destroy(struct mlx5e_post_act *post_act); struct mlx5e_post_act_handle * -mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *attr); +mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *post_attr); void mlx5e_tc_post_act_del(struct mlx5e_post_act *post_act, struct mlx5e_post_act_handle *handle); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c index 558a776359af..5db239cae814 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c @@ -14,10 +14,10 @@ #define MLX5_ESW_VPORT_TBL_SIZE_SAMPLE (64 * 1024) -static const struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_sample_ns = { +static struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_sample_ns = { .max_fte = MLX5_ESW_VPORT_TBL_SIZE_SAMPLE, .max_num_groups = 0, /* default num of groups */ - .flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | MLX5_FLOW_TABLE_TUNNEL_EN_DECAP, + .flags = 0, }; struct mlx5e_tc_psample { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index 314983bc6f08..ead38ef69483 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -83,12 +83,6 @@ struct mlx5_tc_ct_priv { struct mlx5_tc_ct_debugfs debugfs; }; -struct mlx5_ct_flow { - struct mlx5_flow_attr *pre_ct_attr; - struct mlx5_flow_handle *pre_ct_rule; - struct mlx5_ct_ft *ft; -}; - struct mlx5_ct_zone_rule { struct mlx5_ct_fs_rule *rule; struct mlx5e_mod_hdr_handle *mh; @@ -598,12 +592,6 @@ mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv, return 0; } -int mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv, - struct mlx5e_tc_mod_hdr_acts *mod_acts) -{ - return mlx5_tc_ct_entry_set_registers(priv, mod_acts, 0, 0, 0, 0); -} - static int mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act, char *modact) @@ -920,6 +908,7 @@ mlx5_tc_ct_entry_replace_rule(struct mlx5_tc_ct_priv *ct_priv, zone_rule->rule = rule; mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, old_attr, zone_rule->mh); zone_rule->mh = mh; + mlx5_put_label_mapping(ct_priv, old_attr->ct_attr.ct_labels_id); kfree(old_attr); kvfree(spec); @@ -1545,7 +1534,6 @@ mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv, int mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_attr *attr, - struct mlx5e_tc_mod_hdr_acts *mod_acts, const struct flow_action_entry *act, struct netlink_ext_ack *extack) { @@ -1555,8 +1543,8 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, return -EOPNOTSUPP; } + attr->ct_attr.ct_action |= act->ct.action; /* So we can have clear + ct */ attr->ct_attr.zone = act->ct.zone; - attr->ct_attr.ct_action = act->ct.action; attr->ct_attr.nf_ft = act->ct.flow_table; attr->ct_attr.act_miss_cookie = act->miss_cookie; @@ -1892,14 +1880,14 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) /* We translate the tc filter with CT action to the following HW model: * - * +---------------------+ - * + ft prio (tc chain) + - * + original match + - * +---------------------+ + * +-----------------------+ + * + rule (either original + + * + or post_act rule) + + * +-----------------------+ * | set act_miss_cookie mapping * | set fte_id * | set tunnel_id - * | do decap + * | rest of actions before the CT action (for this orig/post_act rule) * | * +-------------+ * | Chain 0 | @@ -1924,32 +1912,21 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) * | do nat (if needed) * v * +--------------+ - * + post_act + original filter actions + * + post_act + rest of parsed filter's actions * + fte_id match +------------------------> * +--------------+ * */ -static struct mlx5_flow_handle * +static int __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, - struct mlx5_flow_spec *orig_spec, struct mlx5_flow_attr *attr) { bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT; struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev); - struct mlx5e_tc_mod_hdr_acts *pre_mod_acts; - u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type); - struct mlx5_flow_attr *pre_ct_attr; - struct mlx5_modify_hdr *mod_hdr; - struct mlx5_ct_flow *ct_flow; int act_miss_mapping = 0, err; struct mlx5_ct_ft *ft; u16 zone; - ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL); - if (!ct_flow) { - return ERR_PTR(-ENOMEM); - } - /* Register for CT established events */ ft = mlx5_tc_ct_add_ft_cb(ct_priv, attr->ct_attr.zone, attr->ct_attr.nf_ft); @@ -1958,23 +1935,7 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, ct_dbg("Failed to register to ft callback"); goto err_ft; } - ct_flow->ft = ft; - - /* Base flow attributes of both rules on original rule attribute */ - ct_flow->pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type); - if (!ct_flow->pre_ct_attr) { - err = -ENOMEM; - goto err_alloc_pre; - } - - pre_ct_attr = ct_flow->pre_ct_attr; - memcpy(pre_ct_attr, attr, attr_sz); - pre_mod_acts = &pre_ct_attr->parse_attr->mod_hdr_acts; - - /* Modify the original rule's action to fwd and modify, leave decap */ - pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP; - pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + attr->ct_attr.ft = ft; err = mlx5e_tc_action_miss_mapping_get(ct_priv->priv, attr, attr->ct_attr.act_miss_cookie, &act_miss_mapping); @@ -1982,136 +1943,89 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, ct_dbg("Failed to get register mapping for act miss"); goto err_get_act_miss; } - attr->ct_attr.act_miss_mapping = act_miss_mapping; - err = mlx5e_tc_match_to_reg_set(priv->mdev, pre_mod_acts, ct_priv->ns_type, - MAPPED_OBJ_TO_REG, act_miss_mapping); + err = mlx5e_tc_match_to_reg_set(priv->mdev, &attr->parse_attr->mod_hdr_acts, + ct_priv->ns_type, MAPPED_OBJ_TO_REG, act_miss_mapping); if (err) { ct_dbg("Failed to set act miss register mapping"); goto err_mapping; } - /* If original flow is decap, we do it before going into ct table - * so add a rewrite for the tunnel match_id. - */ - if ((pre_ct_attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) && - attr->chain == 0) { - err = mlx5e_tc_match_to_reg_set(priv->mdev, pre_mod_acts, - ct_priv->ns_type, - TUNNEL_TO_REG, - attr->tunnel_id); - if (err) { - ct_dbg("Failed to set tunnel register mapping"); - goto err_mapping; - } - } - - /* Change original rule point to ct table - * Chain 0 sets the zone and jumps to ct table + /* Chain 0 sets the zone and jumps to ct table * Other chains jump to pre_ct table to align with act_ct cached logic */ - pre_ct_attr->dest_chain = 0; if (!attr->chain) { zone = ft->zone & MLX5_CT_ZONE_MASK; - err = mlx5e_tc_match_to_reg_set(priv->mdev, pre_mod_acts, ct_priv->ns_type, - ZONE_TO_REG, zone); + err = mlx5e_tc_match_to_reg_set(priv->mdev, &attr->parse_attr->mod_hdr_acts, + ct_priv->ns_type, ZONE_TO_REG, zone); if (err) { ct_dbg("Failed to set zone register mapping"); goto err_mapping; } - pre_ct_attr->dest_ft = nat ? ct_priv->ct_nat : ct_priv->ct; + attr->dest_ft = nat ? ct_priv->ct_nat : ct_priv->ct; } else { - pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.ft : ft->pre_ct.ft; - } - - mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type, - pre_mod_acts->num_actions, - pre_mod_acts->actions); - if (IS_ERR(mod_hdr)) { - err = PTR_ERR(mod_hdr); - ct_dbg("Failed to create pre ct mod hdr"); - goto err_mapping; - } - pre_ct_attr->modify_hdr = mod_hdr; - ct_flow->pre_ct_rule = mlx5_tc_rule_insert(priv, orig_spec, - pre_ct_attr); - if (IS_ERR(ct_flow->pre_ct_rule)) { - err = PTR_ERR(ct_flow->pre_ct_rule); - ct_dbg("Failed to add pre ct rule"); - goto err_insert_orig; + attr->dest_ft = nat ? ft->pre_ct_nat.ft : ft->pre_ct.ft; } - attr->ct_attr.ct_flow = ct_flow; - mlx5e_mod_hdr_dealloc(pre_mod_acts); + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + attr->ct_attr.act_miss_mapping = act_miss_mapping; - return ct_flow->pre_ct_rule; + return 0; -err_insert_orig: - mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr); err_mapping: - mlx5e_mod_hdr_dealloc(pre_mod_acts); mlx5e_tc_action_miss_mapping_put(ct_priv->priv, attr, act_miss_mapping); err_get_act_miss: - kfree(ct_flow->pre_ct_attr); -err_alloc_pre: mlx5_tc_ct_del_ft_cb(ct_priv, ft); err_ft: - kfree(ct_flow); netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err); - return ERR_PTR(err); + return err; } -struct mlx5_flow_handle * -mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, - struct mlx5_flow_spec *spec, - struct mlx5_flow_attr *attr, - struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) +int +mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_attr *attr) { - struct mlx5_flow_handle *rule; + int err; if (!priv) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; + + if (attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR) { + err = mlx5_tc_ct_entry_set_registers(priv, &attr->parse_attr->mod_hdr_acts, + 0, 0, 0, 0); + if (err) + return err; + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + } + + if (!attr->ct_attr.nf_ft) /* means only ct clear action, and not ct_clear,ct() */ + return 0; mutex_lock(&priv->control_lock); - rule = __mlx5_tc_ct_flow_offload(priv, spec, attr); + err = __mlx5_tc_ct_flow_offload(priv, attr); mutex_unlock(&priv->control_lock); - return rule; + return err; } static void __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv, - struct mlx5_ct_flow *ct_flow, struct mlx5_flow_attr *attr) { - struct mlx5_flow_attr *pre_ct_attr = ct_flow->pre_ct_attr; - struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev); - - mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule, pre_ct_attr); - mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr); - mlx5e_tc_action_miss_mapping_put(ct_priv->priv, attr, attr->ct_attr.act_miss_mapping); - mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft); - - kfree(ct_flow->pre_ct_attr); - kfree(ct_flow); + mlx5_tc_ct_del_ft_cb(ct_priv, attr->ct_attr.ft); } void mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_attr *attr) { - struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow; - - /* We are called on error to clean up stuff from parsing - * but we don't have anything for now - */ - if (!ct_flow) + if (!attr->ct_attr.nf_ft) /* means only ct clear action, and not ct_clear,ct() */ return; mutex_lock(&priv->control_lock); - __mlx5_tc_ct_delete_flow(priv, ct_flow, attr); + __mlx5_tc_ct_delete_flow(priv, attr); mutex_unlock(&priv->control_lock); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h index 5c5ddaa83055..8e9316fa46d4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h @@ -25,11 +25,11 @@ struct nf_flowtable; struct mlx5_ct_attr { u16 zone; u16 ct_action; - struct mlx5_ct_flow *ct_flow; struct nf_flowtable *nf_ft; u32 ct_labels_id; u32 act_miss_mapping; u64 act_miss_cookie; + struct mlx5_ct_ft *ft; }; #define zone_to_reg_ct {\ @@ -113,15 +113,12 @@ int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec); int mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_attr *attr, - struct mlx5e_tc_mod_hdr_acts *mod_acts, const struct flow_action_entry *act, struct netlink_ext_ack *extack); -struct mlx5_flow_handle * -mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, - struct mlx5_flow_spec *spec, - struct mlx5_flow_attr *attr, - struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts); +int +mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_attr *attr); + void mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_attr *attr); @@ -130,10 +127,6 @@ bool mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv, struct sk_buff *skb, u8 zone_restore_id); -int -mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv, - struct mlx5e_tc_mod_hdr_acts *mod_acts); - #else /* CONFIG_MLX5_TC_CT */ static inline struct mlx5_tc_ct_priv * @@ -176,16 +169,8 @@ mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec) } static inline int -mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv, - struct mlx5e_tc_mod_hdr_acts *mod_acts) -{ - return -EOPNOTSUPP; -} - -static inline int mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_attr *attr, - struct mlx5e_tc_mod_hdr_acts *mod_acts, const struct flow_action_entry *act, struct netlink_ext_ack *extack) { @@ -193,13 +178,11 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, return -EOPNOTSUPP; } -static inline struct mlx5_flow_handle * +static inline int mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, - struct mlx5_flow_spec *spec, - struct mlx5_flow_attr *attr, - struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) + struct mlx5_flow_attr *attr) { - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; } static inline void diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h index 451fd4342a5a..ba2b1f24ff14 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h @@ -25,12 +25,11 @@ enum { MLX5E_TC_FLOW_FLAG_DUP = MLX5E_TC_FLOW_BASE + 4, MLX5E_TC_FLOW_FLAG_NOT_READY = MLX5E_TC_FLOW_BASE + 5, MLX5E_TC_FLOW_FLAG_DELETED = MLX5E_TC_FLOW_BASE + 6, - MLX5E_TC_FLOW_FLAG_CT = MLX5E_TC_FLOW_BASE + 7, - MLX5E_TC_FLOW_FLAG_L3_TO_L2_DECAP = MLX5E_TC_FLOW_BASE + 8, - MLX5E_TC_FLOW_FLAG_TUN_RX = MLX5E_TC_FLOW_BASE + 9, - MLX5E_TC_FLOW_FLAG_FAILED = MLX5E_TC_FLOW_BASE + 10, - MLX5E_TC_FLOW_FLAG_SAMPLE = MLX5E_TC_FLOW_BASE + 11, - MLX5E_TC_FLOW_FLAG_USE_ACT_STATS = MLX5E_TC_FLOW_BASE + 12, + MLX5E_TC_FLOW_FLAG_L3_TO_L2_DECAP = MLX5E_TC_FLOW_BASE + 7, + MLX5E_TC_FLOW_FLAG_TUN_RX = MLX5E_TC_FLOW_BASE + 8, + MLX5E_TC_FLOW_FLAG_FAILED = MLX5E_TC_FLOW_BASE + 9, + MLX5E_TC_FLOW_FLAG_SAMPLE = MLX5E_TC_FLOW_BASE + 10, + MLX5E_TC_FLOW_FLAG_USE_ACT_STATS = MLX5E_TC_FLOW_BASE + 11, }; struct mlx5e_tc_flow_parse_attr { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h index b38f693bbb52..92065568bb19 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h @@ -115,6 +115,9 @@ int mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv *priv, bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a, struct mlx5e_encap_key *b); +bool mlx5e_tc_tun_encap_info_equal_options(struct mlx5e_encap_key *a, + struct mlx5e_encap_key *b, + __be16 tun_flags); #endif /* CONFIG_MLX5_ESWITCH */ #endif //__MLX5_EN_TC_TUNNEL_H__ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index 780224fd67a1..20c2d2ecaf93 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -3,6 +3,7 @@ #include <net/fib_notifier.h> #include <net/nexthop.h> +#include <net/ip_tunnels.h> #include "tc_tun_encap.h" #include "en_tc.h" #include "tc_tun.h" @@ -97,7 +98,6 @@ int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow, #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) else if (ip_version == 6) { int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6); - struct in6_addr zerov6 = {}; daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6); @@ -105,8 +105,8 @@ int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow, outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6); memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size); memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size); - if (!memcmp(&tun_attr->dst_ip.v6, &zerov6, sizeof(zerov6)) || - !memcmp(&tun_attr->src_ip.v6, &zerov6, sizeof(zerov6))) + if (ipv6_addr_any(&tun_attr->dst_ip.v6) || + ipv6_addr_any(&tun_attr->src_ip.v6)) return 0; } #endif @@ -571,6 +571,37 @@ bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a, a->tc_tunnel->tunnel_type == b->tc_tunnel->tunnel_type; } +bool mlx5e_tc_tun_encap_info_equal_options(struct mlx5e_encap_key *a, + struct mlx5e_encap_key *b, + __be16 tun_flags) +{ + struct ip_tunnel_info *a_info; + struct ip_tunnel_info *b_info; + bool a_has_opts, b_has_opts; + + if (!mlx5e_tc_tun_encap_info_equal_generic(a, b)) + return false; + + a_has_opts = !!(a->ip_tun_key->tun_flags & tun_flags); + b_has_opts = !!(b->ip_tun_key->tun_flags & tun_flags); + + /* keys are equal when both don't have any options attached */ + if (!a_has_opts && !b_has_opts) + return true; + + if (a_has_opts != b_has_opts) + return false; + + /* options stored in memory next to ip_tunnel_info struct */ + a_info = container_of(a->ip_tun_key, struct ip_tunnel_info, key); + b_info = container_of(b->ip_tun_key, struct ip_tunnel_info, key); + + return a_info->options_len == b_info->options_len && + !memcmp(ip_tunnel_info_opts(a_info), + ip_tunnel_info_opts(b_info), + a_info->options_len); +} + static int cmp_decap_info(struct mlx5e_decap_key *a, struct mlx5e_decap_key *b) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c index 054d80c4e65c..2bcd10b6d653 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c @@ -337,29 +337,7 @@ static int mlx5e_tc_tun_parse_geneve(struct mlx5e_priv *priv, static bool mlx5e_tc_tun_encap_info_equal_geneve(struct mlx5e_encap_key *a, struct mlx5e_encap_key *b) { - struct ip_tunnel_info *a_info; - struct ip_tunnel_info *b_info; - bool a_has_opts, b_has_opts; - - if (!mlx5e_tc_tun_encap_info_equal_generic(a, b)) - return false; - - a_has_opts = !!(a->ip_tun_key->tun_flags & TUNNEL_GENEVE_OPT); - b_has_opts = !!(b->ip_tun_key->tun_flags & TUNNEL_GENEVE_OPT); - - /* keys are equal when both don't have any options attached */ - if (!a_has_opts && !b_has_opts) - return true; - - if (a_has_opts != b_has_opts) - return false; - - /* geneve options stored in memory next to ip_tunnel_info struct */ - a_info = container_of(a->ip_tun_key, struct ip_tunnel_info, key); - b_info = container_of(b->ip_tun_key, struct ip_tunnel_info, key); - - return a_info->options_len == b_info->options_len && - memcmp(a_info + 1, b_info + 1, a_info->options_len) == 0; + return mlx5e_tc_tun_encap_info_equal_options(a, b, TUNNEL_GENEVE_OPT); } struct mlx5e_tc_tunnel geneve_tunnel = { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c index 1f62c702b625..a184d739d5f8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* Copyright (c) 2018 Mellanox Technologies. */ +#include <net/ip_tunnels.h> #include <net/vxlan.h> #include "lib/vxlan.h" #include "en/tc_tun.h" @@ -86,9 +87,11 @@ static int mlx5e_gen_ip_tunnel_header_vxlan(char buf[], const struct ip_tunnel_key *tun_key = &e->tun_info->key; __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id); struct udphdr *udp = (struct udphdr *)(buf); + const struct vxlan_metadata *md; struct vxlanhdr *vxh; - if (tun_key->tun_flags & TUNNEL_VXLAN_OPT) + if ((tun_key->tun_flags & TUNNEL_VXLAN_OPT) && + e->tun_info->options_len != sizeof(*md)) return -EOPNOTSUPP; vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr)); *ip_proto = IPPROTO_UDP; @@ -96,6 +99,57 @@ static int mlx5e_gen_ip_tunnel_header_vxlan(char buf[], udp->dest = tun_key->tp_dst; vxh->vx_flags = VXLAN_HF_VNI; vxh->vx_vni = vxlan_vni_field(tun_id); + if (tun_key->tun_flags & TUNNEL_VXLAN_OPT) { + md = ip_tunnel_info_opts(e->tun_info); + vxlan_build_gbp_hdr(vxh, md); + } + + return 0; +} + +static int mlx5e_tc_tun_parse_vxlan_gbp_option(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct flow_match_enc_opts enc_opts; + void *misc5_c, *misc5_v; + u32 *gbp, *gbp_mask; + + flow_rule_match_enc_opts(rule, &enc_opts); + + if (memchr_inv(&enc_opts.mask->data, 0, sizeof(enc_opts.mask->data)) && + !MLX5_CAP_ESW_FT_FIELD_SUPPORT_2(priv->mdev, tunnel_header_0_1)) { + NL_SET_ERR_MSG_MOD(extack, "Matching on VxLAN GBP is not supported"); + return -EOPNOTSUPP; + } + + if (enc_opts.key->dst_opt_type != TUNNEL_VXLAN_OPT) { + NL_SET_ERR_MSG_MOD(extack, "Wrong VxLAN option type: not GBP"); + return -EOPNOTSUPP; + } + + if (enc_opts.key->len != sizeof(*gbp) || + enc_opts.mask->len != sizeof(*gbp_mask)) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN GBP option/mask len is not 32 bits"); + return -EINVAL; + } + + gbp = (u32 *)&enc_opts.key->data[0]; + gbp_mask = (u32 *)&enc_opts.mask->data[0]; + + if (*gbp_mask & ~VXLAN_GBP_MASK) { + NL_SET_ERR_MSG_FMT_MOD(extack, "Wrong VxLAN GBP mask(0x%08X)\n", *gbp_mask); + return -EINVAL; + } + + misc5_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_5); + misc5_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_5); + MLX5_SET(fte_match_set_misc5, misc5_c, tunnel_header_0, *gbp_mask); + MLX5_SET(fte_match_set_misc5, misc5_v, tunnel_header_0, *gbp); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_5; return 0; } @@ -122,6 +176,14 @@ static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv, if (!enc_keyid.mask->keyid) return 0; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_OPTS)) { + int err; + + err = mlx5e_tc_tun_parse_vxlan_gbp_option(priv, spec, f); + if (err) + return err; + } + /* match on VNI is required */ if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, @@ -143,6 +205,12 @@ static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv, return 0; } +static bool mlx5e_tc_tun_encap_info_equal_vxlan(struct mlx5e_encap_key *a, + struct mlx5e_encap_key *b) +{ + return mlx5e_tc_tun_encap_info_equal_options(a, b, TUNNEL_VXLAN_OPT); +} + static int mlx5e_tc_tun_get_remote_ifindex(struct net_device *mirred_dev) { const struct vxlan_dev *vxlan = netdev_priv(mirred_dev); @@ -160,6 +228,6 @@ struct mlx5e_tc_tunnel vxlan_tunnel = { .generate_ip_tun_hdr = mlx5e_gen_ip_tunnel_header_vxlan, .parse_udp_ports = mlx5e_tc_tun_parse_udp_ports_vxlan, .parse_tunnel = mlx5e_tc_tun_parse_vxlan, - .encap_info_equal = mlx5e_tc_tun_encap_info_equal_generic, + .encap_info_equal = mlx5e_tc_tun_encap_info_equal_vxlan, .get_remote_ifindex = mlx5e_tc_tun_get_remote_ifindex, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index b9c2f67d3794..47381e949f1f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -65,13 +65,11 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget); int mlx5e_poll_ico_cq(struct mlx5e_cq *cq); /* RX */ -void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct page *page); -void mlx5e_page_release_dynamic(struct mlx5e_rq *rq, struct page *page, bool recycle); INDIRECT_CALLABLE_DECLARE(bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)); INDIRECT_CALLABLE_DECLARE(bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)); int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); void mlx5e_free_rx_descs(struct mlx5e_rq *rq); -void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq); +void mlx5e_free_rx_missing_descs(struct mlx5e_rq *rq); static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config) { @@ -79,6 +77,19 @@ static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config) } /* TX */ +struct mlx5e_xmit_data { + dma_addr_t dma_addr; + void *data; + u32 len : 31; + u32 has_frags : 1; +}; + +struct mlx5e_xmit_data_frags { + struct mlx5e_xmit_data xd; + struct skb_shared_info *sinfo; + dma_addr_t *dma_arr; +}; + netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev); bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget); void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq); @@ -86,7 +97,7 @@ void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq); static inline bool mlx5e_skb_fifo_has_room(struct mlx5e_skb_fifo *fifo) { - return (u16)(*fifo->pc - *fifo->cc) < fifo->mask; + return (u16)(*fifo->pc - *fifo->cc) <= fifo->mask; } static inline bool @@ -489,7 +500,7 @@ static inline bool mlx5e_icosq_can_post_wqe(struct mlx5e_icosq *sq, u16 wqe_size static inline struct mlx5e_mpw_info *mlx5e_get_mpw_info(struct mlx5e_rq *rq, int i) { - size_t isz = struct_size(rq->mpwqe.info, alloc_units, rq->mpwqe.pages_per_wqe); + size_t isz = struct_size(rq->mpwqe.info, alloc_units.frag_pages, rq->mpwqe.pages_per_wqe); return (struct mlx5e_mpw_info *)((char *)rq->mpwqe.info + array_size(i, isz)); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index d9d3b9e1f15a..f0e6095809fa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -61,9 +61,8 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq, struct xdp_buff *xdp) { struct page *page = virt_to_page(xdp->data); - struct skb_shared_info *sinfo = NULL; - struct mlx5e_xmit_data xdptxd; - struct mlx5e_xdp_info xdpi; + struct mlx5e_xmit_data_frags xdptxdf = {}; + struct mlx5e_xmit_data *xdptxd; struct xdp_frame *xdpf; dma_addr_t dma_addr; int i; @@ -72,8 +71,10 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq, if (unlikely(!xdpf)) return false; - xdptxd.data = xdpf->data; - xdptxd.len = xdpf->len; + xdptxd = &xdptxdf.xd; + xdptxd->data = xdpf->data; + xdptxd->len = xdpf->len; + xdptxd->has_frags = xdp_frame_has_frags(xdpf); if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL) { /* The xdp_buff was in the UMEM and was copied into a newly @@ -88,24 +89,29 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq, */ __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */ - xdpi.mode = MLX5E_XDP_XMIT_MODE_FRAME; + if (unlikely(xdptxd->has_frags)) + return false; - dma_addr = dma_map_single(sq->pdev, xdptxd.data, xdptxd.len, + dma_addr = dma_map_single(sq->pdev, xdptxd->data, xdptxd->len, DMA_TO_DEVICE); if (dma_mapping_error(sq->pdev, dma_addr)) { xdp_return_frame(xdpf); return false; } - xdptxd.dma_addr = dma_addr; - xdpi.frame.xdpf = xdpf; - xdpi.frame.dma_addr = dma_addr; + xdptxd->dma_addr = dma_addr; if (unlikely(!INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe, - mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL, 0))) + mlx5e_xmit_xdp_frame, sq, xdptxd, 0))) return false; - mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi); + /* xmit_mode == MLX5E_XDP_XMIT_MODE_FRAME */ + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, + (union mlx5e_xdp_info) { .mode = MLX5E_XDP_XMIT_MODE_FRAME }); + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, + (union mlx5e_xdp_info) { .frame.xdpf = xdpf }); + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, + (union mlx5e_xdp_info) { .frame.dma_addr = dma_addr }); return true; } @@ -115,17 +121,15 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq, * mode. */ - xdpi.mode = MLX5E_XDP_XMIT_MODE_PAGE; - xdpi.page.rq = rq; - dma_addr = page_pool_get_dma_addr(page) + (xdpf->data - (void *)xdpf); - dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd.len, DMA_BIDIRECTIONAL); + dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd->len, DMA_BIDIRECTIONAL); - if (unlikely(xdp_frame_has_frags(xdpf))) { - sinfo = xdp_get_shared_info_from_frame(xdpf); + if (xdptxd->has_frags) { + xdptxdf.sinfo = xdp_get_shared_info_from_frame(xdpf); + xdptxdf.dma_arr = NULL; - for (i = 0; i < sinfo->nr_frags; i++) { - skb_frag_t *frag = &sinfo->frags[i]; + for (i = 0; i < xdptxdf.sinfo->nr_frags; i++) { + skb_frag_t *frag = &xdptxdf.sinfo->frags[i]; dma_addr_t addr; u32 len; @@ -137,22 +141,34 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq, } } - xdptxd.dma_addr = dma_addr; + xdptxd->dma_addr = dma_addr; if (unlikely(!INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe, - mlx5e_xmit_xdp_frame, sq, &xdptxd, sinfo, 0))) + mlx5e_xmit_xdp_frame, sq, xdptxd, 0))) return false; - xdpi.page.page = page; - mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi); - - if (unlikely(xdp_frame_has_frags(xdpf))) { - for (i = 0; i < sinfo->nr_frags; i++) { - skb_frag_t *frag = &sinfo->frags[i]; - - xdpi.page.page = skb_frag_page(frag); - mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi); + /* xmit_mode == MLX5E_XDP_XMIT_MODE_PAGE */ + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, + (union mlx5e_xdp_info) { .mode = MLX5E_XDP_XMIT_MODE_PAGE }); + + if (xdptxd->has_frags) { + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, + (union mlx5e_xdp_info) + { .page.num = 1 + xdptxdf.sinfo->nr_frags }); + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, + (union mlx5e_xdp_info) { .page.page = page }); + for (i = 0; i < xdptxdf.sinfo->nr_frags; i++) { + skb_frag_t *frag = &xdptxdf.sinfo->frags[i]; + + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, + (union mlx5e_xdp_info) + { .page.page = skb_frag_page(frag) }); } + } else { + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, + (union mlx5e_xdp_info) { .page.num = 1 }); + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, + (union mlx5e_xdp_info) { .page.page = page }); } return true; @@ -268,8 +284,6 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, goto xdp_abort; __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); __set_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags); - if (xdp->rxq->mem.type != MEM_TYPE_XSK_BUFF_POOL) - mlx5e_page_dma_unmap(rq, virt_to_page(xdp->data)); rq->stats->xdp_redirect++; return true; default: @@ -383,26 +397,43 @@ INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq INDIRECT_CALLABLE_SCOPE bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, - struct skb_shared_info *sinfo, int check_result); + int check_result); INDIRECT_CALLABLE_SCOPE bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, - struct skb_shared_info *sinfo, int check_result) + int check_result) { struct mlx5e_tx_mpwqe *session = &sq->mpwqe; struct mlx5e_xdpsq_stats *stats = sq->stats; + struct mlx5e_xmit_data *p = xdptxd; + struct mlx5e_xmit_data tmp; - if (unlikely(sinfo)) { - /* MPWQE is enabled, but a multi-buffer packet is queued for - * transmission. MPWQE can't send fragmented packets, so close - * the current session and fall back to a regular WQE. - */ - if (unlikely(sq->mpwqe.wqe)) - mlx5e_xdp_mpwqe_complete(sq); - return mlx5e_xmit_xdp_frame(sq, xdptxd, sinfo, 0); + if (xdptxd->has_frags) { + struct mlx5e_xmit_data_frags *xdptxdf = + container_of(xdptxd, struct mlx5e_xmit_data_frags, xd); + + if (!!xdptxd->len + xdptxdf->sinfo->nr_frags > 1) { + /* MPWQE is enabled, but a multi-buffer packet is queued for + * transmission. MPWQE can't send fragmented packets, so close + * the current session and fall back to a regular WQE. + */ + if (unlikely(sq->mpwqe.wqe)) + mlx5e_xdp_mpwqe_complete(sq); + return mlx5e_xmit_xdp_frame(sq, xdptxd, 0); + } + if (!xdptxd->len) { + skb_frag_t *frag = &xdptxdf->sinfo->frags[0]; + + tmp.data = skb_frag_address(frag); + tmp.len = skb_frag_size(frag); + tmp.dma_addr = xdptxdf->dma_arr ? xdptxdf->dma_arr[0] : + page_pool_get_dma_addr(skb_frag_page(frag)) + + skb_frag_off(frag); + p = &tmp; + } } - if (unlikely(xdptxd->len > sq->hw_mtu)) { + if (unlikely(p->len > sq->hw_mtu)) { stats->err++; return false; } @@ -420,7 +451,7 @@ mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptx mlx5e_xdp_mpwqe_session_start(sq); } - mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats); + mlx5e_xdp_mpwqe_add_dseg(sq, p, stats); if (unlikely(mlx5e_xdp_mpwqe_is_full(session, sq->max_sq_mpw_wqebbs))) mlx5e_xdp_mpwqe_complete(sq); @@ -448,8 +479,10 @@ INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq) INDIRECT_CALLABLE_SCOPE bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, - struct skb_shared_info *sinfo, int check_result) + int check_result) { + struct mlx5e_xmit_data_frags *xdptxdf = + container_of(xdptxd, struct mlx5e_xmit_data_frags, xd); struct mlx5_wq_cyc *wq = &sq->wq; struct mlx5_wqe_ctrl_seg *cseg; struct mlx5_wqe_data_seg *dseg; @@ -461,26 +494,34 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, u16 ds_cnt, inline_hdr_sz; u8 num_wqebbs = 1; int num_frags = 0; + bool inline_ok; + bool linear; u16 pi; struct mlx5e_xdpsq_stats *stats = sq->stats; - if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || sq->hw_mtu < dma_len)) { + inline_ok = sq->min_inline_mode == MLX5_INLINE_MODE_NONE || + dma_len >= MLX5E_XDP_MIN_INLINE; + + if (unlikely(!inline_ok || sq->hw_mtu < dma_len)) { stats->err++; return false; } - ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT + 1; + inline_hdr_sz = 0; if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) - ds_cnt++; + inline_hdr_sz = MLX5E_XDP_MIN_INLINE; + + linear = !!(dma_len - inline_hdr_sz); + ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT + linear + !!inline_hdr_sz; /* check_result must be 0 if sinfo is passed. */ if (!check_result) { int stop_room = 1; - if (unlikely(sinfo)) { - ds_cnt += sinfo->nr_frags; - num_frags = sinfo->nr_frags; + if (xdptxd->has_frags) { + ds_cnt += xdptxdf->sinfo->nr_frags; + num_frags = xdptxdf->sinfo->nr_frags; num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); /* Assuming MLX5_CAP_GEN(mdev, max_wqe_sz_sq) is big * enough to hold all fragments. @@ -501,53 +542,53 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, eseg = &wqe->eth; dseg = wqe->data; - inline_hdr_sz = 0; - /* copy the inline part if required */ - if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) { + if (inline_hdr_sz) { memcpy(eseg->inline_hdr.start, xdptxd->data, sizeof(eseg->inline_hdr.start)); memcpy(dseg, xdptxd->data + sizeof(eseg->inline_hdr.start), - MLX5E_XDP_MIN_INLINE - sizeof(eseg->inline_hdr.start)); - dma_len -= MLX5E_XDP_MIN_INLINE; - dma_addr += MLX5E_XDP_MIN_INLINE; - inline_hdr_sz = MLX5E_XDP_MIN_INLINE; + inline_hdr_sz - sizeof(eseg->inline_hdr.start)); + dma_len -= inline_hdr_sz; + dma_addr += inline_hdr_sz; dseg++; } /* write the dma part */ - dseg->addr = cpu_to_be64(dma_addr); - dseg->byte_count = cpu_to_be32(dma_len); + if (linear) { + dseg->addr = cpu_to_be64(dma_addr); + dseg->byte_count = cpu_to_be32(dma_len); + dseg->lkey = sq->mkey_be; + dseg++; + } cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND); - if (unlikely(test_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state))) { - u8 num_pkts = 1 + num_frags; + if (test_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state)) { int i; memset(&cseg->trailer, 0, sizeof(cseg->trailer)); memset(eseg, 0, sizeof(*eseg) - sizeof(eseg->trailer)); eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz); - dseg->lkey = sq->mkey_be; for (i = 0; i < num_frags; i++) { - skb_frag_t *frag = &sinfo->frags[i]; + skb_frag_t *frag = &xdptxdf->sinfo->frags[i]; dma_addr_t addr; - addr = page_pool_get_dma_addr(skb_frag_page(frag)) + + addr = xdptxdf->dma_arr ? xdptxdf->dma_arr[i] : + page_pool_get_dma_addr(skb_frag_page(frag)) + skb_frag_off(frag); - dseg++; dseg->addr = cpu_to_be64(addr); dseg->byte_count = cpu_to_be32(skb_frag_size(frag)); dseg->lkey = sq->mkey_be; + dseg++; } cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); sq->db.wqe_info[pi] = (struct mlx5e_xdp_wqe_info) { .num_wqebbs = num_wqebbs, - .num_pkts = num_pkts, + .num_pkts = 1, }; sq->pc += num_wqebbs; @@ -566,26 +607,67 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_wqe_info *wi, u32 *xsk_frames, - bool recycle, struct xdp_frame_bulk *bq) { struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo; u16 i; for (i = 0; i < wi->num_pkts; i++) { - struct mlx5e_xdp_info xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo); + union mlx5e_xdp_info xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo); switch (xdpi.mode) { - case MLX5E_XDP_XMIT_MODE_FRAME: + case MLX5E_XDP_XMIT_MODE_FRAME: { /* XDP_TX from the XSK RQ and XDP_REDIRECT */ - dma_unmap_single(sq->pdev, xdpi.frame.dma_addr, - xdpi.frame.xdpf->len, DMA_TO_DEVICE); - xdp_return_frame_bulk(xdpi.frame.xdpf, bq); + struct xdp_frame *xdpf; + dma_addr_t dma_addr; + + xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo); + xdpf = xdpi.frame.xdpf; + xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo); + dma_addr = xdpi.frame.dma_addr; + + dma_unmap_single(sq->pdev, dma_addr, + xdpf->len, DMA_TO_DEVICE); + if (xdp_frame_has_frags(xdpf)) { + struct skb_shared_info *sinfo; + int j; + + sinfo = xdp_get_shared_info_from_frame(xdpf); + for (j = 0; j < sinfo->nr_frags; j++) { + skb_frag_t *frag = &sinfo->frags[j]; + + xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo); + dma_addr = xdpi.frame.dma_addr; + + dma_unmap_single(sq->pdev, dma_addr, + skb_frag_size(frag), DMA_TO_DEVICE); + } + } + xdp_return_frame_bulk(xdpf, bq); break; - case MLX5E_XDP_XMIT_MODE_PAGE: + } + case MLX5E_XDP_XMIT_MODE_PAGE: { /* XDP_TX from the regular RQ */ - mlx5e_page_release_dynamic(xdpi.page.rq, xdpi.page.page, recycle); + u8 num, n = 0; + + xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo); + num = xdpi.page.num; + + do { + struct page *page; + + xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo); + page = xdpi.page.page; + + /* No need to check ((page->pp_magic & ~0x3UL) == PP_SIGNATURE) + * as we know this is a page_pool page. + */ + page_pool_put_defragged_page(page->pp, + page, -1, true); + } while (++n < num); + break; + } case MLX5E_XDP_XMIT_MODE_XSK: /* AF_XDP send */ (*xsk_frames)++; @@ -638,7 +720,7 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) sqcc += wi->num_wqebbs; - mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, true, &bq); + mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, &bq); } while (!last_wqe); if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) { @@ -685,7 +767,7 @@ void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq) sq->cc += wi->num_wqebbs; - mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, false, &bq); + mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, &bq); } xdp_flush_frame_bulk(&bq); @@ -719,34 +801,79 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, sq = &priv->channels.c[sq_num]->xdpsq; for (i = 0; i < n; i++) { + struct mlx5e_xmit_data_frags xdptxdf = {}; struct xdp_frame *xdpf = frames[i]; - struct mlx5e_xmit_data xdptxd; - struct mlx5e_xdp_info xdpi; + dma_addr_t dma_arr[MAX_SKB_FRAGS]; + struct mlx5e_xmit_data *xdptxd; bool ret; - xdptxd.data = xdpf->data; - xdptxd.len = xdpf->len; - xdptxd.dma_addr = dma_map_single(sq->pdev, xdptxd.data, - xdptxd.len, DMA_TO_DEVICE); + xdptxd = &xdptxdf.xd; + xdptxd->data = xdpf->data; + xdptxd->len = xdpf->len; + xdptxd->has_frags = xdp_frame_has_frags(xdpf); + xdptxd->dma_addr = dma_map_single(sq->pdev, xdptxd->data, + xdptxd->len, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(sq->pdev, xdptxd.dma_addr))) + if (unlikely(dma_mapping_error(sq->pdev, xdptxd->dma_addr))) break; - xdpi.mode = MLX5E_XDP_XMIT_MODE_FRAME; - xdpi.frame.xdpf = xdpf; - xdpi.frame.dma_addr = xdptxd.dma_addr; + if (xdptxd->has_frags) { + int j; + + xdptxdf.sinfo = xdp_get_shared_info_from_frame(xdpf); + xdptxdf.dma_arr = dma_arr; + for (j = 0; j < xdptxdf.sinfo->nr_frags; j++) { + skb_frag_t *frag = &xdptxdf.sinfo->frags[j]; + + dma_arr[j] = dma_map_single(sq->pdev, skb_frag_address(frag), + skb_frag_size(frag), DMA_TO_DEVICE); + + if (!dma_mapping_error(sq->pdev, dma_arr[j])) + continue; + /* mapping error */ + while (--j >= 0) + dma_unmap_single(sq->pdev, dma_arr[j], + skb_frag_size(&xdptxdf.sinfo->frags[j]), + DMA_TO_DEVICE); + goto out; + } + } ret = INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe, - mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL, 0); + mlx5e_xmit_xdp_frame, sq, xdptxd, 0); if (unlikely(!ret)) { - dma_unmap_single(sq->pdev, xdptxd.dma_addr, - xdptxd.len, DMA_TO_DEVICE); + int j; + + dma_unmap_single(sq->pdev, xdptxd->dma_addr, + xdptxd->len, DMA_TO_DEVICE); + if (!xdptxd->has_frags) + break; + for (j = 0; j < xdptxdf.sinfo->nr_frags; j++) + dma_unmap_single(sq->pdev, dma_arr[j], + skb_frag_size(&xdptxdf.sinfo->frags[j]), + DMA_TO_DEVICE); break; } - mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi); + + /* xmit_mode == MLX5E_XDP_XMIT_MODE_FRAME */ + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, + (union mlx5e_xdp_info) { .mode = MLX5E_XDP_XMIT_MODE_FRAME }); + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, + (union mlx5e_xdp_info) { .frame.xdpf = xdpf }); + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, + (union mlx5e_xdp_info) { .frame.dma_addr = xdptxd->dma_addr }); + if (xdptxd->has_frags) { + int j; + + for (j = 0; j < xdptxdf.sinfo->nr_frags; j++) + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, + (union mlx5e_xdp_info) + { .frame.dma_addr = dma_arr[j] }); + } nxmit++; } +out: if (flags & XDP_XMIT_FLUSH) { if (sq->mpwqe.wqe) mlx5e_xdp_mpwqe_complete(sq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h index 10bcfa6f88c1..9e8e6184f9e4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h @@ -50,6 +50,53 @@ struct mlx5e_xdp_buff { struct mlx5e_rq *rq; }; +/* XDP packets can be transmitted in different ways. On completion, we need to + * distinguish between them to clean up things in a proper way. + */ +enum mlx5e_xdp_xmit_mode { + /* An xdp_frame was transmitted due to either XDP_REDIRECT from another + * device or XDP_TX from an XSK RQ. The frame has to be unmapped and + * returned. + */ + MLX5E_XDP_XMIT_MODE_FRAME, + + /* The xdp_frame was created in place as a result of XDP_TX from a + * regular RQ. No DMA remapping happened, and the page belongs to us. + */ + MLX5E_XDP_XMIT_MODE_PAGE, + + /* No xdp_frame was created at all, the transmit happened from a UMEM + * page. The UMEM Completion Ring producer pointer has to be increased. + */ + MLX5E_XDP_XMIT_MODE_XSK, +}; + +/* xmit_mode entry is pushed to the fifo per packet, followed by multiple + * entries, as follows: + * + * MLX5E_XDP_XMIT_MODE_FRAME: + * xdpf, dma_addr_1, dma_addr_2, ... , dma_addr_num. + * 'num' is derived from xdpf. + * + * MLX5E_XDP_XMIT_MODE_PAGE: + * num, page_1, page_2, ... , page_num. + * + * MLX5E_XDP_XMIT_MODE_XSK: + * none. + */ +union mlx5e_xdp_info { + enum mlx5e_xdp_xmit_mode mode; + union { + struct xdp_frame *xdpf; + dma_addr_t dma_addr; + } frame; + union { + struct mlx5e_rq *rq; + u8 num; + struct page *page; + } page; +}; + struct mlx5e_xsk_param; int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); bool mlx5e_xdp_handle(struct mlx5e_rq *rq, @@ -66,11 +113,9 @@ extern const struct xdp_metadata_ops mlx5e_xdp_metadata_ops; INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, - struct skb_shared_info *sinfo, int check_result)); INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, - struct skb_shared_info *sinfo, int check_result)); INDIRECT_CALLABLE_DECLARE(int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq)); INDIRECT_CALLABLE_DECLARE(int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq)); @@ -179,14 +224,14 @@ mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, static inline void mlx5e_xdpi_fifo_push(struct mlx5e_xdp_info_fifo *fifo, - struct mlx5e_xdp_info *xi) + union mlx5e_xdp_info xi) { u32 i = (*fifo->pc)++ & fifo->mask; - fifo->xi[i] = *xi; + fifo->xi[i] = xi; } -static inline struct mlx5e_xdp_info +static inline union mlx5e_xdp_info mlx5e_xdpi_fifo_pop(struct mlx5e_xdp_info_fifo *fifo) { return fifo->xi[(*fifo->cc)++ & fifo->mask]; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c index fab787600459..d97e6df66f45 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c @@ -22,6 +22,7 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) struct mlx5e_icosq *icosq = rq->icosq; struct mlx5_wq_cyc *wq = &icosq->wq; struct mlx5e_umr_wqe *umr_wqe; + struct xdp_buff **xsk_buffs; int batch, i; u32 offset; /* 17-bit value with MTT. */ u16 pi; @@ -29,9 +30,9 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) if (unlikely(!xsk_buff_can_alloc(rq->xsk_pool, rq->mpwqe.pages_per_wqe))) goto err; - BUILD_BUG_ON(sizeof(wi->alloc_units[0]) != sizeof(wi->alloc_units[0].xsk)); XSK_CHECK_PRIV_TYPE(struct mlx5e_xdp_buff); - batch = xsk_buff_alloc_batch(rq->xsk_pool, (struct xdp_buff **)wi->alloc_units, + xsk_buffs = (struct xdp_buff **)wi->alloc_units.xsk_buffs; + batch = xsk_buff_alloc_batch(rq->xsk_pool, xsk_buffs, rq->mpwqe.pages_per_wqe); /* If batch < pages_per_wqe, either: @@ -41,8 +42,8 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) * the first error, which will mean there are no more valid descriptors. */ for (; batch < rq->mpwqe.pages_per_wqe; batch++) { - wi->alloc_units[batch].xsk = xsk_buff_alloc(rq->xsk_pool); - if (unlikely(!wi->alloc_units[batch].xsk)) + xsk_buffs[batch] = xsk_buff_alloc(rq->xsk_pool); + if (unlikely(!xsk_buffs[batch])) goto err_reuse_batch; } @@ -52,8 +53,8 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED)) { for (i = 0; i < batch; i++) { - struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units[i].xsk); - dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk); + struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xsk_buffs[i]); + dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xsk_buffs[i]); umr_wqe->inline_mtts[i] = (struct mlx5_mtt) { .ptag = cpu_to_be64(addr | MLX5_EN_WR), @@ -62,8 +63,8 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) } } else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED)) { for (i = 0; i < batch; i++) { - struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units[i].xsk); - dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk); + struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xsk_buffs[i]); + dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xsk_buffs[i]); umr_wqe->inline_ksms[i] = (struct mlx5_ksm) { .key = rq->mkey_be, @@ -75,8 +76,8 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) u32 mapping_size = 1 << (rq->mpwqe.page_shift - 2); for (i = 0; i < batch; i++) { - struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units[i].xsk); - dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk); + struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xsk_buffs[i]); + dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xsk_buffs[i]); umr_wqe->inline_ksms[i << 2] = (struct mlx5_ksm) { .key = rq->mkey_be, @@ -102,8 +103,8 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) __be32 frame_size = cpu_to_be32(rq->xsk_pool->chunk_size); for (i = 0; i < batch; i++) { - struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units[i].xsk); - dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk); + struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xsk_buffs[i]); + dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xsk_buffs[i]); umr_wqe->inline_klms[i << 1] = (struct mlx5_klm) { .key = rq->mkey_be, @@ -119,7 +120,7 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) } } - bitmap_zero(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe); + bitmap_zero(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe); wi->consumed_strides = 0; umr_wqe->ctrl.opmod_idx_opcode = @@ -149,7 +150,7 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) err_reuse_batch: while (--batch >= 0) - xsk_buff_free(wi->alloc_units[batch].xsk); + xsk_buff_free(xsk_buffs[batch]); err: rq->stats->buff_alloc_err++; @@ -163,13 +164,10 @@ int mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) u32 contig, alloc; int i; - /* mlx5e_init_frags_partition creates a 1:1 mapping between - * rq->wqe.frags and rq->wqe.alloc_units, which allows us to - * allocate XDP buffers straight into alloc_units. + /* Each rq->wqe.frags->xskp is 1:1 mapped to an element inside the + * rq->wqe.alloc_units->xsk_buffs array allocated here. */ - BUILD_BUG_ON(sizeof(rq->wqe.alloc_units[0]) != - sizeof(rq->wqe.alloc_units[0].xsk)); - buffs = (struct xdp_buff **)rq->wqe.alloc_units; + buffs = rq->wqe.alloc_units->xsk_buffs; contig = mlx5_wq_cyc_get_size(wq) - ix; if (wqe_bulk <= contig) { alloc = xsk_buff_alloc_batch(rq->xsk_pool, buffs + ix, wqe_bulk); @@ -189,8 +187,9 @@ int mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) /* Assumes log_num_frags == 0. */ frag = &rq->wqe.frags[j]; - addr = xsk_buff_xdp_get_frame_dma(frag->au->xsk); + addr = xsk_buff_xdp_get_frame_dma(*frag->xskp); wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom); + frag->flags &= ~BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); } return alloc; @@ -211,12 +210,13 @@ int mlx5e_xsk_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) /* Assumes log_num_frags == 0. */ frag = &rq->wqe.frags[j]; - frag->au->xsk = xsk_buff_alloc(rq->xsk_pool); - if (unlikely(!frag->au->xsk)) + *frag->xskp = xsk_buff_alloc(rq->xsk_pool); + if (unlikely(!*frag->xskp)) return i; - addr = xsk_buff_xdp_get_frame_dma(frag->au->xsk); + addr = xsk_buff_xdp_get_frame_dma(*frag->xskp); wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom); + frag->flags &= ~BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); } return wqe_bulk; @@ -251,7 +251,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, u32 head_offset, u32 page_idx) { - struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units[page_idx].xsk); + struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units.xsk_buffs[page_idx]); struct bpf_prog *prog; /* Check packet size. Note LRO doesn't use linear SKB */ @@ -291,7 +291,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, prog = rcu_dereference(rq->xdp_prog); if (likely(prog && mlx5e_xdp_handle(rq, prog, mxbuf))) { if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))) - __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */ + __set_bit(page_idx, wi->skip_release_bitmap); /* non-atomic */ return NULL; /* page/packet was consumed by XDP */ } @@ -306,7 +306,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, u32 cqe_bcnt) { - struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->au->xsk); + struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(*wi->xskp); struct bpf_prog *prog; /* wi->offset is not used in this function, because xdp->data and the diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index 81a567e17264..ed279f450976 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -93,13 +93,19 @@ static int mlx5e_open_xsk_rq(struct mlx5e_channel *c, struct mlx5e_params *param struct mlx5e_rq_param *rq_params, struct xsk_buff_pool *pool, struct mlx5e_xsk_param *xsk) { + struct mlx5e_rq *xskrq = &c->xskrq; int err; - err = mlx5e_init_xsk_rq(c, params, pool, xsk, &c->xskrq); + err = mlx5e_init_xsk_rq(c, params, pool, xsk, xskrq); if (err) return err; - return mlx5e_open_rq(params, rq_params, xsk, cpu_to_node(c->cpu), &c->xskrq); + err = mlx5e_open_rq(params, rq_params, xsk, cpu_to_node(c->cpu), xskrq); + if (err) + return err; + + __set_bit(MLX5E_RQ_STATE_XSK, &xskrq->state); + return 0; } int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c index 367a9505ca4f..597f319d4770 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c @@ -44,7 +44,7 @@ int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags) * same. */ static void mlx5e_xsk_tx_post_err(struct mlx5e_xdpsq *sq, - struct mlx5e_xdp_info *xdpi) + union mlx5e_xdp_info *xdpi) { u16 pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc); struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi]; @@ -54,15 +54,14 @@ static void mlx5e_xsk_tx_post_err(struct mlx5e_xdpsq *sq, wi->num_pkts = 1; nopwqe = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc); - mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi); + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, *xdpi); sq->doorbell_cseg = &nopwqe->ctrl; } bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget) { struct xsk_buff_pool *pool = sq->xsk_pool; - struct mlx5e_xmit_data xdptxd; - struct mlx5e_xdp_info xdpi; + union mlx5e_xdp_info xdpi; bool work_done = true; bool flush = false; @@ -73,6 +72,7 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget) mlx5e_xmit_xdp_frame_check_mpwqe, mlx5e_xmit_xdp_frame_check, sq); + struct mlx5e_xmit_data xdptxd = {}; struct xdp_desc desc; bool ret; @@ -97,7 +97,7 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget) xsk_buff_raw_dma_sync_for_device(pool, xdptxd.dma_addr, xdptxd.len); ret = INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe, - mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL, + mlx5e_xmit_xdp_frame, sq, &xdptxd, check_result); if (unlikely(!ret)) { if (sq->mpwqe.wqe) @@ -105,7 +105,7 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget) mlx5e_xsk_tx_post_err(sq, &xdpi); } else { - mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi); + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi); } flush = true; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 7b0d3de0ec6c..55b38544422f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -35,11 +35,15 @@ #include <crypto/aead.h> #include <linux/inetdevice.h> #include <linux/netdevice.h> +#include <net/netevent.h> #include "en.h" #include "ipsec.h" #include "ipsec_rxtx.h" +#define MLX5_IPSEC_RESCHED msecs_to_jiffies(1000) +#define MLX5E_IPSEC_TUNNEL_SA XA_MARK_1 + static struct mlx5e_ipsec_sa_entry *to_ipsec_sa_entry(struct xfrm_state *x) { return (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle; @@ -50,27 +54,71 @@ static struct mlx5e_ipsec_pol_entry *to_ipsec_pol_entry(struct xfrm_policy *x) return (struct mlx5e_ipsec_pol_entry *)x->xdo.offload_handle; } +static void mlx5e_ipsec_handle_tx_limit(struct work_struct *_work) +{ + struct mlx5e_ipsec_dwork *dwork = + container_of(_work, struct mlx5e_ipsec_dwork, dwork.work); + struct mlx5e_ipsec_sa_entry *sa_entry = dwork->sa_entry; + struct xfrm_state *x = sa_entry->x; + + spin_lock(&x->lock); + xfrm_state_check_expire(x); + if (x->km.state == XFRM_STATE_EXPIRED) { + sa_entry->attrs.drop = true; + mlx5e_accel_ipsec_fs_modify(sa_entry); + } + spin_unlock(&x->lock); + + if (sa_entry->attrs.drop) + return; + + queue_delayed_work(sa_entry->ipsec->wq, &dwork->dwork, + MLX5_IPSEC_RESCHED); +} + static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry) { - struct xfrm_replay_state_esn *replay_esn; + struct xfrm_state *x = sa_entry->x; u32 seq_bottom = 0; + u32 esn, esn_msb; u8 overlap; - if (!(sa_entry->x->props.flags & XFRM_STATE_ESN)) { - sa_entry->esn_state.trigger = 0; + switch (x->xso.type) { + case XFRM_DEV_OFFLOAD_PACKET: + switch (x->xso.dir) { + case XFRM_DEV_OFFLOAD_IN: + esn = x->replay_esn->seq; + esn_msb = x->replay_esn->seq_hi; + break; + case XFRM_DEV_OFFLOAD_OUT: + esn = x->replay_esn->oseq; + esn_msb = x->replay_esn->oseq_hi; + break; + default: + WARN_ON(true); + return false; + } + break; + case XFRM_DEV_OFFLOAD_CRYPTO: + /* Already parsed by XFRM core */ + esn = x->replay_esn->seq; + break; + default: + WARN_ON(true); return false; } - replay_esn = sa_entry->x->replay_esn; - if (replay_esn->seq >= replay_esn->replay_window) - seq_bottom = replay_esn->seq - replay_esn->replay_window + 1; - overlap = sa_entry->esn_state.overlap; - sa_entry->esn_state.esn = xfrm_replay_seqhi(sa_entry->x, - htonl(seq_bottom)); + if (esn >= x->replay_esn->replay_window) + seq_bottom = esn - x->replay_esn->replay_window + 1; + + if (x->xso.type == XFRM_DEV_OFFLOAD_CRYPTO) + esn_msb = xfrm_replay_seqhi(x, htonl(seq_bottom)); + + sa_entry->esn_state.esn = esn; + sa_entry->esn_state.esn_msb = esn_msb; - sa_entry->esn_state.trigger = 1; if (unlikely(overlap && seq_bottom < MLX5E_IPSEC_ESN_SCOPE_MID)) { sa_entry->esn_state.overlap = 0; return true; @@ -87,25 +135,161 @@ static void mlx5e_ipsec_init_limits(struct mlx5e_ipsec_sa_entry *sa_entry, struct mlx5_accel_esp_xfrm_attrs *attrs) { struct xfrm_state *x = sa_entry->x; + s64 start_value, n; - attrs->hard_packet_limit = x->lft.hard_packet_limit; + attrs->lft.hard_packet_limit = x->lft.hard_packet_limit; + attrs->lft.soft_packet_limit = x->lft.soft_packet_limit; if (x->lft.soft_packet_limit == XFRM_INF) return; - /* Hardware decrements hard_packet_limit counter through - * the operation. While fires an event when soft_packet_limit - * is reached. It emans that we need substitute the numbers - * in order to properly count soft limit. + /* Compute hard limit initial value and number of rounds. + * + * The counting pattern of hardware counter goes: + * value -> 2^31-1 + * 2^31 | (2^31-1) -> 2^31-1 + * 2^31 | (2^31-1) -> 2^31-1 + * [..] + * 2^31 | (2^31-1) -> 0 * - * As an example: - * XFRM user sets soft limit is 2 and hard limit is 9 and - * expects to see soft event after 2 packets and hard event - * after 9 packets. In our case, the hard limit will be set - * to 9 and soft limit is comparator to 7 so user gets the - * soft event after 2 packeta + * The pattern is created by using an ASO operation to atomically set + * bit 31 after the down counter clears bit 31. This is effectively an + * atomic addition of 2**31 to the counter. + * + * We wish to configure the counter, within the above pattern, so that + * when it reaches 0, it has hit the hard limit. This is defined by this + * system of equations: + * + * hard_limit == start_value + n * 2^31 + * n >= 0 + * start_value < 2^32, start_value >= 0 + * + * These equations are not single-solution, there are often two choices: + * hard_limit == start_value + n * 2^31 + * hard_limit == (start_value+2^31) + (n-1) * 2^31 + * + * The algorithm selects the solution that keeps the counter value + * above 2^31 until the final iteration. + */ + + /* Start by estimating n and compute start_value */ + n = attrs->lft.hard_packet_limit / BIT_ULL(31); + start_value = attrs->lft.hard_packet_limit - n * BIT_ULL(31); + + /* Choose the best of the two solutions: */ + if (n >= 1) + n -= 1; + + /* Computed values solve the system of equations: */ + start_value = attrs->lft.hard_packet_limit - n * BIT_ULL(31); + + /* The best solution means: when there are multiple iterations we must + * start above 2^31 and count down to 2**31 to get the interrupt. + */ + attrs->lft.hard_packet_limit = lower_32_bits(start_value); + attrs->lft.numb_rounds_hard = (u64)n; + + /* Compute soft limit initial value and number of rounds. + * + * The soft_limit is achieved by adjusting the counter's + * interrupt_value. This is embedded in the counting pattern created by + * hard packet calculations above. + * + * We wish to compute the interrupt_value for the soft_limit. This is + * defined by this system of equations: + * + * soft_limit == start_value - soft_value + n * 2^31 + * n >= 0 + * soft_value < 2^32, soft_value >= 0 + * for n == 0 start_value > soft_value + * + * As with compute_hard_n_value() the equations are not single-solution. + * The algorithm selects the solution that has: + * 2^30 <= soft_limit < 2^31 + 2^30 + * for the interior iterations, which guarantees a large guard band + * around the counter hard limit and next interrupt. + */ + + /* Start by estimating n and compute soft_value */ + n = (x->lft.soft_packet_limit - attrs->lft.hard_packet_limit) / BIT_ULL(31); + start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) - + x->lft.soft_packet_limit; + + /* Compare against constraints and adjust n */ + if (n < 0) + n = 0; + else if (start_value >= BIT_ULL(32)) + n -= 1; + else if (start_value < 0) + n += 1; + + /* Choose the best of the two solutions: */ + start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) - start_value; + if (n != attrs->lft.numb_rounds_hard && start_value < BIT_ULL(30)) + n += 1; + + /* Note that the upper limit of soft_value happens naturally because we + * always select the lowest soft_value. */ - attrs->soft_packet_limit = - x->lft.hard_packet_limit - x->lft.soft_packet_limit; + + /* Computed values solve the system of equations: */ + start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) - start_value; + + /* The best solution means: when there are multiple iterations we must + * not fall below 2^30 as that would get too close to the false + * hard_limit and when we reach an interior iteration for soft_limit it + * has to be far away from 2**32-1 which is the counter reset point + * after the +2^31 to accommodate latency. + */ + attrs->lft.soft_packet_limit = lower_32_bits(start_value); + attrs->lft.numb_rounds_soft = (u64)n; +} + +static void mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry *sa_entry, + struct mlx5_accel_esp_xfrm_attrs *attrs) +{ + struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry); + struct xfrm_state *x = sa_entry->x; + struct net_device *netdev; + struct neighbour *n; + u8 addr[ETH_ALEN]; + const void *pkey; + u8 *dst, *src; + + if (attrs->mode != XFRM_MODE_TUNNEL || + attrs->type != XFRM_DEV_OFFLOAD_PACKET) + return; + + netdev = x->xso.real_dev; + + mlx5_query_mac_address(mdev, addr); + switch (attrs->dir) { + case XFRM_DEV_OFFLOAD_IN: + src = attrs->dmac; + dst = attrs->smac; + pkey = &attrs->saddr.a4; + break; + case XFRM_DEV_OFFLOAD_OUT: + src = attrs->smac; + dst = attrs->dmac; + pkey = &attrs->daddr.a4; + break; + default: + return; + } + + ether_addr_copy(src, addr); + n = neigh_lookup(&arp_tbl, pkey, netdev); + if (!n) { + n = neigh_create(&arp_tbl, pkey, netdev); + if (IS_ERR(n)) + return; + neigh_event_send(n, NULL); + attrs->drop = true; + } else { + neigh_ha_snapshot(addr, n, netdev); + ether_addr_copy(dst, addr); + } + neigh_release(n); } void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry, @@ -141,11 +325,11 @@ void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry, aes_gcm->icv_len = x->aead->alg_icv_len; /* esn */ - if (sa_entry->esn_state.trigger) { - attrs->esn_trigger = true; - attrs->esn = sa_entry->esn_state.esn; - attrs->esn_overlap = sa_entry->esn_state.overlap; - attrs->replay_window = x->replay_esn->replay_window; + if (x->props.flags & XFRM_STATE_ESN) { + attrs->replay_esn.trigger = true; + attrs->replay_esn.esn = sa_entry->esn_state.esn; + attrs->replay_esn.esn_msb = sa_entry->esn_state.esn_msb; + attrs->replay_esn.overlap = sa_entry->esn_state.overlap; } attrs->dir = x->xso.dir; @@ -163,8 +347,10 @@ void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry, attrs->upspec.sport = ntohs(x->sel.sport); attrs->upspec.sport_mask = ntohs(x->sel.sport_mask); attrs->upspec.proto = x->sel.proto; + attrs->mode = x->props.mode; mlx5e_ipsec_init_limits(sa_entry, attrs); + mlx5e_ipsec_init_macs(sa_entry, attrs); } static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev, @@ -233,6 +419,11 @@ static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev, return -EINVAL; } + if (x->props.mode != XFRM_MODE_TRANSPORT && x->props.mode != XFRM_MODE_TUNNEL) { + NL_SET_ERR_MSG_MOD(extack, "Only transport and tunnel xfrm states may be offloaded"); + return -EINVAL; + } + switch (x->xso.type) { case XFRM_DEV_OFFLOAD_CRYPTO: if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_CRYPTO)) { @@ -240,11 +431,6 @@ static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev, return -EINVAL; } - if (x->props.mode != XFRM_MODE_TRANSPORT && - x->props.mode != XFRM_MODE_TUNNEL) { - NL_SET_ERR_MSG_MOD(extack, "Only transport and tunnel xfrm states may be offloaded"); - return -EINVAL; - } break; case XFRM_DEV_OFFLOAD_PACKET: if (!(mlx5_ipsec_device_caps(mdev) & @@ -253,8 +439,9 @@ static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev, return -EINVAL; } - if (x->props.mode != XFRM_MODE_TRANSPORT) { - NL_SET_ERR_MSG_MOD(extack, "Only transport xfrm states may be offloaded in packet mode"); + if (x->props.mode == XFRM_MODE_TUNNEL && + !(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_TUNNEL)) { + NL_SET_ERR_MSG_MOD(extack, "Packet offload is not supported for tunnel mode"); return -EINVAL; } @@ -283,6 +470,11 @@ static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev, NL_SET_ERR_MSG_MOD(extack, "Hard packet limit must be greater than soft one"); return -EINVAL; } + + if (!x->lft.soft_packet_limit || !x->lft.hard_packet_limit) { + NL_SET_ERR_MSG_MOD(extack, "Soft/hard packet limits can't be 0"); + return -EINVAL; + } break; default: NL_SET_ERR_MSG_MOD(extack, "Unsupported xfrm offload type"); @@ -291,14 +483,134 @@ static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev, return 0; } -static void _update_xfrm_state(struct work_struct *work) +static void mlx5e_ipsec_modify_state(struct work_struct *_work) +{ + struct mlx5e_ipsec_work *work = + container_of(_work, struct mlx5e_ipsec_work, work); + struct mlx5e_ipsec_sa_entry *sa_entry = work->sa_entry; + struct mlx5_accel_esp_xfrm_attrs *attrs; + + attrs = &((struct mlx5e_ipsec_sa_entry *)work->data)->attrs; + + mlx5_accel_esp_modify_xfrm(sa_entry, attrs); +} + +static void mlx5e_ipsec_set_esn_ops(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct xfrm_state *x = sa_entry->x; + + if (x->xso.type != XFRM_DEV_OFFLOAD_CRYPTO || + x->xso.dir != XFRM_DEV_OFFLOAD_OUT) + return; + + if (x->props.flags & XFRM_STATE_ESN) { + sa_entry->set_iv_op = mlx5e_ipsec_set_iv_esn; + return; + } + + sa_entry->set_iv_op = mlx5e_ipsec_set_iv; +} + +static void mlx5e_ipsec_handle_netdev_event(struct work_struct *_work) +{ + struct mlx5e_ipsec_work *work = + container_of(_work, struct mlx5e_ipsec_work, work); + struct mlx5e_ipsec_sa_entry *sa_entry = work->sa_entry; + struct mlx5e_ipsec_netevent_data *data = work->data; + struct mlx5_accel_esp_xfrm_attrs *attrs; + + attrs = &sa_entry->attrs; + + switch (attrs->dir) { + case XFRM_DEV_OFFLOAD_IN: + ether_addr_copy(attrs->smac, data->addr); + break; + case XFRM_DEV_OFFLOAD_OUT: + ether_addr_copy(attrs->dmac, data->addr); + break; + default: + WARN_ON_ONCE(true); + } + attrs->drop = false; + mlx5e_accel_ipsec_fs_modify(sa_entry); +} + +static int mlx5_ipsec_create_work(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct xfrm_state *x = sa_entry->x; + struct mlx5e_ipsec_work *work; + void *data = NULL; + + switch (x->xso.type) { + case XFRM_DEV_OFFLOAD_CRYPTO: + if (!(x->props.flags & XFRM_STATE_ESN)) + return 0; + break; + case XFRM_DEV_OFFLOAD_PACKET: + if (x->props.mode != XFRM_MODE_TUNNEL) + return 0; + break; + default: + break; + } + + work = kzalloc(sizeof(*work), GFP_KERNEL); + if (!work) + return -ENOMEM; + + switch (x->xso.type) { + case XFRM_DEV_OFFLOAD_CRYPTO: + data = kzalloc(sizeof(*sa_entry), GFP_KERNEL); + if (!data) + goto free_work; + + INIT_WORK(&work->work, mlx5e_ipsec_modify_state); + break; + case XFRM_DEV_OFFLOAD_PACKET: + data = kzalloc(sizeof(struct mlx5e_ipsec_netevent_data), + GFP_KERNEL); + if (!data) + goto free_work; + + INIT_WORK(&work->work, mlx5e_ipsec_handle_netdev_event); + break; + default: + break; + } + + work->data = data; + work->sa_entry = sa_entry; + sa_entry->work = work; + return 0; + +free_work: + kfree(work); + return -ENOMEM; +} + +static int mlx5e_ipsec_create_dwork(struct mlx5e_ipsec_sa_entry *sa_entry) { - struct mlx5e_ipsec_modify_state_work *modify_work = - container_of(work, struct mlx5e_ipsec_modify_state_work, work); - struct mlx5e_ipsec_sa_entry *sa_entry = container_of( - modify_work, struct mlx5e_ipsec_sa_entry, modify_work); + struct xfrm_state *x = sa_entry->x; + struct mlx5e_ipsec_dwork *dwork; + + if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) + return 0; + + if (x->xso.dir != XFRM_DEV_OFFLOAD_OUT) + return 0; + + if (x->lft.soft_packet_limit == XFRM_INF && + x->lft.hard_packet_limit == XFRM_INF) + return 0; - mlx5_accel_esp_modify_xfrm(sa_entry, &modify_work->attrs); + dwork = kzalloc(sizeof(*dwork), GFP_KERNEL); + if (!dwork) + return -ENOMEM; + + dwork->sa_entry = sa_entry; + INIT_DELAYED_WORK(&dwork->dwork, mlx5e_ipsec_handle_tx_limit); + sa_entry->dwork = dwork; + return 0; } static int mlx5e_xfrm_add_state(struct xfrm_state *x, @@ -308,6 +620,7 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x, struct net_device *netdev = x->xso.real_dev; struct mlx5e_ipsec *ipsec; struct mlx5e_priv *priv; + gfp_t gfp; int err; priv = netdev_priv(netdev); @@ -315,30 +628,52 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x, return -EOPNOTSUPP; ipsec = priv->ipsec; - err = mlx5e_xfrm_validate_state(priv->mdev, x, extack); - if (err) - return err; - - sa_entry = kzalloc(sizeof(*sa_entry), GFP_KERNEL); + gfp = (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) ? GFP_ATOMIC : GFP_KERNEL; + sa_entry = kzalloc(sizeof(*sa_entry), gfp); if (!sa_entry) return -ENOMEM; sa_entry->x = x; sa_entry->ipsec = ipsec; + /* Check if this SA is originated from acquire flow temporary SA */ + if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) + goto out; + + err = mlx5e_xfrm_validate_state(priv->mdev, x, extack); + if (err) + goto err_xfrm; /* check esn */ - mlx5e_ipsec_update_esn_state(sa_entry); + if (x->props.flags & XFRM_STATE_ESN) + mlx5e_ipsec_update_esn_state(sa_entry); mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &sa_entry->attrs); + + err = mlx5_ipsec_create_work(sa_entry); + if (err) + goto err_xfrm; + + err = mlx5e_ipsec_create_dwork(sa_entry); + if (err) + goto release_work; + /* create hw context */ err = mlx5_ipsec_create_sa_ctx(sa_entry); if (err) - goto err_xfrm; + goto release_dwork; err = mlx5e_accel_ipsec_fs_add_rule(sa_entry); if (err) goto err_hw_ctx; + if (x->props.mode == XFRM_MODE_TUNNEL && + x->xso.type == XFRM_DEV_OFFLOAD_PACKET && + !mlx5e_ipsec_fs_tunnel_enabled(sa_entry)) { + NL_SET_ERR_MSG_MOD(extack, "Packet offload tunnel mode is disabled due to encap settings"); + err = -EINVAL; + goto err_add_rule; + } + /* We use *_bh() variant because xfrm_timer_handler(), which runs * in softirq context, can reach our state delete logic and we need * xa_erase_bh() there. @@ -348,11 +683,18 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x, if (err) goto err_add_rule; - if (x->xso.dir == XFRM_DEV_OFFLOAD_OUT) - sa_entry->set_iv_op = (x->props.flags & XFRM_STATE_ESN) ? - mlx5e_ipsec_set_iv_esn : mlx5e_ipsec_set_iv; + mlx5e_ipsec_set_esn_ops(sa_entry); + + if (sa_entry->dwork) + queue_delayed_work(ipsec->wq, &sa_entry->dwork->dwork, + MLX5_IPSEC_RESCHED); - INIT_WORK(&sa_entry->modify_work.work, _update_xfrm_state); + if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET && + x->props.mode == XFRM_MODE_TUNNEL) + xa_set_mark(&ipsec->sadb, sa_entry->ipsec_obj_id, + MLX5E_IPSEC_TUNNEL_SA); + +out: x->xso.offload_handle = (unsigned long)sa_entry; return 0; @@ -360,32 +702,101 @@ err_add_rule: mlx5e_accel_ipsec_fs_del_rule(sa_entry); err_hw_ctx: mlx5_ipsec_free_sa_ctx(sa_entry); +release_dwork: + kfree(sa_entry->dwork); +release_work: + if (sa_entry->work) + kfree(sa_entry->work->data); + kfree(sa_entry->work); err_xfrm: kfree(sa_entry); - NL_SET_ERR_MSG_MOD(extack, "Device failed to offload this policy"); + NL_SET_ERR_MSG_WEAK_MOD(extack, "Device failed to offload this state"); return err; } static void mlx5e_xfrm_del_state(struct xfrm_state *x) { struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); + struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs; struct mlx5e_ipsec *ipsec = sa_entry->ipsec; struct mlx5e_ipsec_sa_entry *old; + if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) + return; + old = xa_erase_bh(&ipsec->sadb, sa_entry->ipsec_obj_id); WARN_ON(old != sa_entry); + + if (attrs->mode == XFRM_MODE_TUNNEL && + attrs->type == XFRM_DEV_OFFLOAD_PACKET) + /* Make sure that no ARP requests are running in parallel */ + flush_workqueue(ipsec->wq); + } static void mlx5e_xfrm_free_state(struct xfrm_state *x) { struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); - cancel_work_sync(&sa_entry->modify_work.work); + if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) + goto sa_entry_free; + + if (sa_entry->work) + cancel_work_sync(&sa_entry->work->work); + + if (sa_entry->dwork) + cancel_delayed_work_sync(&sa_entry->dwork->dwork); + mlx5e_accel_ipsec_fs_del_rule(sa_entry); mlx5_ipsec_free_sa_ctx(sa_entry); + kfree(sa_entry->dwork); + if (sa_entry->work) + kfree(sa_entry->work->data); + kfree(sa_entry->work); +sa_entry_free: kfree(sa_entry); } +static int mlx5e_ipsec_netevent_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct mlx5_accel_esp_xfrm_attrs *attrs; + struct mlx5e_ipsec_netevent_data *data; + struct mlx5e_ipsec_sa_entry *sa_entry; + struct mlx5e_ipsec *ipsec; + struct neighbour *n = ptr; + struct net_device *netdev; + struct xfrm_state *x; + unsigned long idx; + + if (event != NETEVENT_NEIGH_UPDATE || !(n->nud_state & NUD_VALID)) + return NOTIFY_DONE; + + ipsec = container_of(nb, struct mlx5e_ipsec, netevent_nb); + xa_for_each_marked(&ipsec->sadb, idx, sa_entry, MLX5E_IPSEC_TUNNEL_SA) { + attrs = &sa_entry->attrs; + + if (attrs->family == AF_INET) { + if (!neigh_key_eq32(n, &attrs->saddr.a4) && + !neigh_key_eq32(n, &attrs->daddr.a4)) + continue; + } else { + if (!neigh_key_eq128(n, &attrs->saddr.a4) && + !neigh_key_eq128(n, &attrs->daddr.a4)) + continue; + } + + x = sa_entry->x; + netdev = x->xso.real_dev; + data = sa_entry->work->data; + + neigh_ha_snapshot(data->addr, n, netdev); + queue_work(ipsec->wq, &sa_entry->work->work); + } + + return NOTIFY_DONE; +} + void mlx5e_ipsec_init(struct mlx5e_priv *priv) { struct mlx5e_ipsec *ipsec; @@ -402,8 +813,8 @@ void mlx5e_ipsec_init(struct mlx5e_priv *priv) xa_init_flags(&ipsec->sadb, XA_FLAGS_ALLOC); ipsec->mdev = priv->mdev; - ipsec->wq = alloc_ordered_workqueue("mlx5e_ipsec: %s", 0, - priv->netdev->name); + ipsec->wq = alloc_workqueue("mlx5e_ipsec: %s", WQ_UNBOUND, 0, + priv->netdev->name); if (!ipsec->wq) goto err_wq; @@ -414,6 +825,13 @@ void mlx5e_ipsec_init(struct mlx5e_priv *priv) goto err_aso; } + if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL) { + ipsec->netevent_nb.notifier_call = mlx5e_ipsec_netevent_event; + ret = register_netevent_notifier(&ipsec->netevent_nb); + if (ret) + goto clear_aso; + } + ret = mlx5e_accel_ipsec_fs_init(ipsec); if (ret) goto err_fs_init; @@ -424,6 +842,9 @@ void mlx5e_ipsec_init(struct mlx5e_priv *priv) return; err_fs_init: + if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL) + unregister_netevent_notifier(&ipsec->netevent_nb); +clear_aso: if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD) mlx5e_ipsec_aso_cleanup(ipsec); err_aso: @@ -442,6 +863,8 @@ void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv) return; mlx5e_accel_ipsec_fs_cleanup(ipsec); + if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL) + unregister_netevent_notifier(&ipsec->netevent_nb); if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD) mlx5e_ipsec_aso_cleanup(ipsec); destroy_workqueue(ipsec->wq); @@ -467,41 +890,43 @@ static bool mlx5e_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x) static void mlx5e_xfrm_advance_esn_state(struct xfrm_state *x) { struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); - struct mlx5e_ipsec_modify_state_work *modify_work = - &sa_entry->modify_work; + struct mlx5e_ipsec_work *work = sa_entry->work; + struct mlx5e_ipsec_sa_entry *sa_entry_shadow; bool need_update; need_update = mlx5e_ipsec_update_esn_state(sa_entry); if (!need_update) return; - mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &modify_work->attrs); - queue_work(sa_entry->ipsec->wq, &modify_work->work); + sa_entry_shadow = work->data; + memset(sa_entry_shadow, 0x00, sizeof(*sa_entry_shadow)); + mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &sa_entry_shadow->attrs); + queue_work(sa_entry->ipsec->wq, &work->work); } static void mlx5e_xfrm_update_curlft(struct xfrm_state *x) { struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); - int err; + struct mlx5e_ipsec_rule *ipsec_rule = &sa_entry->ipsec_rule; + u64 packets, bytes, lastuse; - lockdep_assert_held(&x->lock); + lockdep_assert(lockdep_is_held(&x->lock) || + lockdep_is_held(&dev_net(x->xso.real_dev)->xfrm.xfrm_cfg_mutex)); - if (sa_entry->attrs.soft_packet_limit == XFRM_INF) - /* Limits are not configured, as soft limit - * must be lowever than hard limit. - */ + if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) return; - err = mlx5e_ipsec_aso_query(sa_entry, NULL); - if (err) - return; - - mlx5e_ipsec_aso_update_curlft(sa_entry, &x->curlft.packets); + mlx5_fc_query_cached(ipsec_rule->fc, &bytes, &packets, &lastuse); + x->curlft.packets += packets; + x->curlft.bytes += bytes; } -static int mlx5e_xfrm_validate_policy(struct xfrm_policy *x, +static int mlx5e_xfrm_validate_policy(struct mlx5_core_dev *mdev, + struct xfrm_policy *x, struct netlink_ext_ack *extack) { + struct xfrm_selector *sel = &x->selector; + if (x->type != XFRM_POLICY_TYPE_MAIN) { NL_SET_ERR_MSG_MOD(extack, "Cannot offload non-main policy types"); return -EINVAL; @@ -519,8 +944,9 @@ static int mlx5e_xfrm_validate_policy(struct xfrm_policy *x, return -EINVAL; } - if (!x->xfrm_vec[0].reqid) { - NL_SET_ERR_MSG_MOD(extack, "Cannot offload policy without reqid"); + if (!x->xfrm_vec[0].reqid && sel->proto == IPPROTO_IP && + addr6_all_zero(sel->saddr.a6) && addr6_all_zero(sel->daddr.a6)) { + NL_SET_ERR_MSG_MOD(extack, "Unsupported policy with reqid 0 without at least one of upper protocol or ip addr(s) different than 0"); return -EINVAL; } @@ -529,12 +955,24 @@ static int mlx5e_xfrm_validate_policy(struct xfrm_policy *x, return -EINVAL; } - if (x->selector.proto != IPPROTO_IP && - (x->selector.proto != IPPROTO_UDP || x->xdo.dir != XFRM_DEV_OFFLOAD_OUT)) { + if (sel->proto != IPPROTO_IP && + (sel->proto != IPPROTO_UDP || x->xdo.dir != XFRM_DEV_OFFLOAD_OUT)) { NL_SET_ERR_MSG_MOD(extack, "Device does not support upper protocol other than UDP, and only Tx direction"); return -EINVAL; } + if (x->priority) { + if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PRIO)) { + NL_SET_ERR_MSG_MOD(extack, "Device does not support policy priority"); + return -EINVAL; + } + + if (x->priority == U32_MAX) { + NL_SET_ERR_MSG_MOD(extack, "Device does not support requested policy priority"); + return -EINVAL; + } + } + return 0; } @@ -560,6 +998,7 @@ mlx5e_ipsec_build_accel_pol_attrs(struct mlx5e_ipsec_pol_entry *pol_entry, attrs->upspec.sport = ntohs(sel->sport); attrs->upspec.sport_mask = ntohs(sel->sport_mask); attrs->upspec.proto = sel->proto; + attrs->prio = x->priority; } static int mlx5e_xfrm_add_policy(struct xfrm_policy *x, @@ -576,7 +1015,7 @@ static int mlx5e_xfrm_add_policy(struct xfrm_policy *x, return -EOPNOTSUPP; } - err = mlx5e_xfrm_validate_policy(x, extack); + err = mlx5e_xfrm_validate_policy(priv->mdev, x, extack); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h index 12f044330639..4e9887171508 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h @@ -60,10 +60,24 @@ struct upspec { u8 proto; }; +struct mlx5_ipsec_lft { + u64 hard_packet_limit; + u64 soft_packet_limit; + u64 numb_rounds_hard; + u64 numb_rounds_soft; +}; + +struct mlx5_replay_esn { + u32 replay_window; + u32 esn; + u32 esn_msb; + u8 overlap : 1; + u8 trigger : 1; +}; + struct mlx5_accel_esp_xfrm_attrs { - u32 esn; u32 spi; - u32 flags; + u32 mode; struct aes_gcm_keymat aes_gcm; union { @@ -78,15 +92,15 @@ struct mlx5_accel_esp_xfrm_attrs { struct upspec upspec; u8 dir : 2; - u8 esn_overlap : 1; - u8 esn_trigger : 1; u8 type : 2; + u8 drop : 1; u8 family; - u32 replay_window; + struct mlx5_replay_esn replay_esn; u32 authsize; u32 reqid; - u64 hard_packet_limit; - u64 soft_packet_limit; + struct mlx5_ipsec_lft lft; + u8 smac[ETH_ALEN]; + u8 dmac[ETH_ALEN]; }; enum mlx5_ipsec_cap { @@ -94,6 +108,8 @@ enum mlx5_ipsec_cap { MLX5_IPSEC_CAP_ESN = 1 << 1, MLX5_IPSEC_CAP_PACKET_OFFLOAD = 1 << 2, MLX5_IPSEC_CAP_ROCE = 1 << 3, + MLX5_IPSEC_CAP_PRIO = 1 << 4, + MLX5_IPSEC_CAP_TUNNEL = 1 << 5, }; struct mlx5e_priv; @@ -124,8 +140,17 @@ struct mlx5e_ipsec_tx; struct mlx5e_ipsec_work { struct work_struct work; - struct mlx5e_ipsec *ipsec; - u32 id; + struct mlx5e_ipsec_sa_entry *sa_entry; + void *data; +}; + +struct mlx5e_ipsec_netevent_data { + u8 addr[ETH_ALEN]; +}; + +struct mlx5e_ipsec_dwork { + struct delayed_work dwork; + struct mlx5e_ipsec_sa_entry *sa_entry; }; struct mlx5e_ipsec_aso { @@ -148,12 +173,13 @@ struct mlx5e_ipsec { struct mlx5e_ipsec_tx *tx; struct mlx5e_ipsec_aso *aso; struct notifier_block nb; + struct notifier_block netevent_nb; struct mlx5_ipsec_fs *roce; }; struct mlx5e_ipsec_esn_state { u32 esn; - u8 trigger: 1; + u32 esn_msb; u8 overlap: 1; }; @@ -161,11 +187,13 @@ struct mlx5e_ipsec_rule { struct mlx5_flow_handle *rule; struct mlx5_modify_hdr *modify_hdr; struct mlx5_pkt_reformat *pkt_reformat; + struct mlx5_fc *fc; }; -struct mlx5e_ipsec_modify_state_work { - struct work_struct work; - struct mlx5_accel_esp_xfrm_attrs attrs; +struct mlx5e_ipsec_limits { + u64 round; + u8 soft_limit_hit : 1; + u8 fix_limit : 1; }; struct mlx5e_ipsec_sa_entry { @@ -178,7 +206,9 @@ struct mlx5e_ipsec_sa_entry { u32 ipsec_obj_id; u32 enc_key_id; struct mlx5e_ipsec_rule ipsec_rule; - struct mlx5e_ipsec_modify_state_work modify_work; + struct mlx5e_ipsec_work *work; + struct mlx5e_ipsec_dwork *dwork; + struct mlx5e_ipsec_limits limits; }; struct mlx5_accel_pol_xfrm_attrs { @@ -198,6 +228,7 @@ struct mlx5_accel_pol_xfrm_attrs { u8 type : 2; u8 dir : 2; u32 reqid; + u32 prio; }; struct mlx5e_ipsec_pol_entry { @@ -219,6 +250,8 @@ int mlx5e_accel_ipsec_fs_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry); void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_ipsec_sa_entry *sa_entry); int mlx5e_accel_ipsec_fs_add_pol(struct mlx5e_ipsec_pol_entry *pol_entry); void mlx5e_accel_ipsec_fs_del_pol(struct mlx5e_ipsec_pol_entry *pol_entry); +void mlx5e_accel_ipsec_fs_modify(struct mlx5e_ipsec_sa_entry *sa_entry); +bool mlx5e_ipsec_fs_tunnel_enabled(struct mlx5e_ipsec_sa_entry *sa_entry); int mlx5_ipsec_create_sa_ctx(struct mlx5e_ipsec_sa_entry *sa_entry); void mlx5_ipsec_free_sa_ctx(struct mlx5e_ipsec_sa_entry *sa_entry); @@ -233,9 +266,6 @@ void mlx5e_ipsec_aso_cleanup(struct mlx5e_ipsec *ipsec); int mlx5e_ipsec_aso_query(struct mlx5e_ipsec_sa_entry *sa_entry, struct mlx5_wqe_aso_ctrl_seg *data); -void mlx5e_ipsec_aso_update_curlft(struct mlx5e_ipsec_sa_entry *sa_entry, - u64 *packets); - void mlx5e_accel_ipsec_fs_read_stats(struct mlx5e_priv *priv, void *ipsec_stats); @@ -252,6 +282,13 @@ mlx5e_ipsec_pol2dev(struct mlx5e_ipsec_pol_entry *pol_entry) { return pol_entry->ipsec->mdev; } + +static inline bool addr6_all_zero(__be32 *addr6) +{ + static const __be32 zaddr6[4] = {}; + + return !memcmp(addr6, zaddr6, sizeof(zaddr6)); +} #else static inline void mlx5e_ipsec_init(struct mlx5e_priv *priv) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index 9871ba1b25ff..dbe87bf89c0d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -4,11 +4,15 @@ #include <linux/netdevice.h> #include "en.h" #include "en/fs.h" +#include "eswitch.h" #include "ipsec.h" #include "fs_core.h" #include "lib/ipsec_fs_roce.h" +#include "lib/fs_chains.h" #define NUM_IPSEC_FTE BIT(15) +#define MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_SIZE 16 +#define IPSEC_TUNNEL_DEFAULT_TTL 0x40 struct mlx5e_ipsec_fc { struct mlx5_fc *cnt; @@ -34,13 +38,18 @@ struct mlx5e_ipsec_rx { struct mlx5e_ipsec_miss sa; struct mlx5e_ipsec_rule status; struct mlx5e_ipsec_fc *fc; + struct mlx5_fs_chains *chains; + u8 allow_tunnel_mode : 1; }; struct mlx5e_ipsec_tx { struct mlx5e_ipsec_ft ft; struct mlx5e_ipsec_miss pol; + struct mlx5e_ipsec_rule status; struct mlx5_flow_namespace *ns; struct mlx5e_ipsec_fc *fc; + struct mlx5_fs_chains *chains; + u8 allow_tunnel_mode : 1; }; /* IPsec RX flow steering */ @@ -51,9 +60,70 @@ static enum mlx5_traffic_types family2tt(u32 family) return MLX5_TT_IPV6_IPSEC_ESP; } +static struct mlx5e_ipsec_rx *ipsec_rx(struct mlx5e_ipsec *ipsec, u32 family) +{ + if (family == AF_INET) + return ipsec->rx_ipv4; + + return ipsec->rx_ipv6; +} + +static struct mlx5_fs_chains * +ipsec_chains_create(struct mlx5_core_dev *mdev, struct mlx5_flow_table *miss_ft, + enum mlx5_flow_namespace_type ns, int base_prio, + int base_level, struct mlx5_flow_table **root_ft) +{ + struct mlx5_chains_attr attr = {}; + struct mlx5_fs_chains *chains; + struct mlx5_flow_table *ft; + int err; + + attr.flags = MLX5_CHAINS_AND_PRIOS_SUPPORTED | + MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED; + attr.max_grp_num = 2; + attr.default_ft = miss_ft; + attr.ns = ns; + attr.fs_base_prio = base_prio; + attr.fs_base_level = base_level; + chains = mlx5_chains_create(mdev, &attr); + if (IS_ERR(chains)) + return chains; + + /* Create chain 0, prio 1, level 0 to connect chains to prev in fs_core */ + ft = mlx5_chains_get_table(chains, 0, 1, 0); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + goto err_chains_get; + } + + *root_ft = ft; + return chains; + +err_chains_get: + mlx5_chains_destroy(chains); + return ERR_PTR(err); +} + +static void ipsec_chains_destroy(struct mlx5_fs_chains *chains) +{ + mlx5_chains_put_table(chains, 0, 1, 0); + mlx5_chains_destroy(chains); +} + +static struct mlx5_flow_table * +ipsec_chains_get_table(struct mlx5_fs_chains *chains, u32 prio) +{ + return mlx5_chains_get_table(chains, 0, prio + 1, 0); +} + +static void ipsec_chains_put_table(struct mlx5_fs_chains *chains, u32 prio) +{ + mlx5_chains_put_table(chains, 0, prio + 1, 0); +} + static struct mlx5_flow_table *ipsec_ft_create(struct mlx5_flow_namespace *ns, int level, int prio, - int max_num_groups) + int max_num_groups, u32 flags) { struct mlx5_flow_table_attr ft_attr = {}; @@ -62,6 +132,7 @@ static struct mlx5_flow_table *ipsec_ft_create(struct mlx5_flow_namespace *ns, ft_attr.max_fte = NUM_IPSEC_FTE; ft_attr.level = level; ft_attr.prio = prio; + ft_attr.flags = flags; return mlx5_create_auto_grouped_flow_table(ns, &ft_attr); } @@ -170,14 +241,24 @@ out: static void rx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_rx *rx, u32 family) { - mlx5_del_flow_rules(rx->pol.rule); - mlx5_destroy_flow_group(rx->pol.group); - mlx5_destroy_flow_table(rx->ft.pol); + struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(ipsec->fs, false); + + /* disconnect */ + mlx5_ttc_fwd_default_dest(ttc, family2tt(family)); + + if (rx->chains) { + ipsec_chains_destroy(rx->chains); + } else { + mlx5_del_flow_rules(rx->pol.rule); + mlx5_destroy_flow_group(rx->pol.group); + mlx5_destroy_flow_table(rx->ft.pol); + } mlx5_del_flow_rules(rx->sa.rule); mlx5_destroy_flow_group(rx->sa.group); mlx5_destroy_flow_table(rx->ft.sa); - + if (rx->allow_tunnel_mode) + mlx5_eswitch_unblock_encap(mdev); mlx5_del_flow_rules(rx->status.rule); mlx5_modify_header_dealloc(mdev, rx->status.modify_hdr); mlx5_destroy_flow_table(rx->ft.status); @@ -193,6 +274,7 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, struct mlx5_flow_destination default_dest; struct mlx5_flow_destination dest[2]; struct mlx5_flow_table *ft; + u32 flags = 0; int err; default_dest = mlx5_ttc_get_default_dest(ttc, family2tt(family)); @@ -203,7 +285,7 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, return err; ft = ipsec_ft_create(ns, MLX5E_ACCEL_FS_ESP_FT_ERR_LEVEL, - MLX5E_NIC_PRIO, 1); + MLX5E_NIC_PRIO, 1, 0); if (IS_ERR(ft)) { err = PTR_ERR(ft); goto err_fs_ft_status; @@ -226,8 +308,12 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, goto err_add; /* Create FT */ - ft = ipsec_ft_create(ns, MLX5E_ACCEL_FS_ESP_FT_LEVEL, MLX5E_NIC_PRIO, - 2); + if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_TUNNEL) + rx->allow_tunnel_mode = mlx5_eswitch_block_encap(mdev); + if (rx->allow_tunnel_mode) + flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; + ft = ipsec_ft_create(ns, MLX5E_ACCEL_FS_ESP_FT_LEVEL, MLX5E_NIC_PRIO, 2, + flags); if (IS_ERR(ft)) { err = PTR_ERR(ft); goto err_fs_ft; @@ -238,8 +324,22 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, if (err) goto err_fs; + if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PRIO) { + rx->chains = ipsec_chains_create(mdev, rx->ft.sa, + MLX5_FLOW_NAMESPACE_KERNEL, + MLX5E_NIC_PRIO, + MLX5E_ACCEL_FS_POL_FT_LEVEL, + &rx->ft.pol); + if (IS_ERR(rx->chains)) { + err = PTR_ERR(rx->chains); + goto err_pol_ft; + } + + goto connect; + } + ft = ipsec_ft_create(ns, MLX5E_ACCEL_FS_POL_FT_LEVEL, MLX5E_NIC_PRIO, - 2); + 2, 0); if (IS_ERR(ft)) { err = PTR_ERR(ft); goto err_pol_ft; @@ -252,6 +352,12 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, if (err) goto err_pol_miss; +connect: + /* connect */ + memset(dest, 0x00, sizeof(*dest)); + dest[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[0].ft = rx->ft.pol; + mlx5_ttc_fwd_dest(ttc, family2tt(family), &dest[0]); return 0; err_pol_miss: @@ -262,6 +368,8 @@ err_pol_ft: err_fs: mlx5_destroy_flow_table(rx->ft.sa); err_fs_ft: + if (rx->allow_tunnel_mode) + mlx5_eswitch_unblock_encap(mdev); mlx5_del_flow_rules(rx->status.rule); mlx5_modify_header_dealloc(mdev, rx->status.modify_hdr); err_add: @@ -271,83 +379,191 @@ err_fs_ft_status: return err; } -static struct mlx5e_ipsec_rx *rx_ft_get(struct mlx5_core_dev *mdev, - struct mlx5e_ipsec *ipsec, u32 family) +static int rx_get(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx, u32 family) { - struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(ipsec->fs, false); - struct mlx5_flow_destination dest = {}; - struct mlx5e_ipsec_rx *rx; - int err = 0; - - if (family == AF_INET) - rx = ipsec->rx_ipv4; - else - rx = ipsec->rx_ipv6; + int err; - mutex_lock(&rx->ft.mutex); if (rx->ft.refcnt) goto skip; - /* create FT */ err = rx_create(mdev, ipsec, rx, family); if (err) - goto out; - - /* connect */ - dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest.ft = rx->ft.pol; - mlx5_ttc_fwd_dest(ttc, family2tt(family), &dest); + return err; skip: rx->ft.refcnt++; -out: + return 0; +} + +static void rx_put(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_rx *rx, + u32 family) +{ + if (--rx->ft.refcnt) + return; + + rx_destroy(ipsec->mdev, ipsec, rx, family); +} + +static struct mlx5e_ipsec_rx *rx_ft_get(struct mlx5_core_dev *mdev, + struct mlx5e_ipsec *ipsec, u32 family) +{ + struct mlx5e_ipsec_rx *rx = ipsec_rx(ipsec, family); + int err; + + mutex_lock(&rx->ft.mutex); + err = rx_get(mdev, ipsec, rx, family); mutex_unlock(&rx->ft.mutex); if (err) return ERR_PTR(err); + return rx; } -static void rx_ft_put(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, - u32 family) +static struct mlx5_flow_table *rx_ft_get_policy(struct mlx5_core_dev *mdev, + struct mlx5e_ipsec *ipsec, + u32 family, u32 prio) { - struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(ipsec->fs, false); - struct mlx5e_ipsec_rx *rx; + struct mlx5e_ipsec_rx *rx = ipsec_rx(ipsec, family); + struct mlx5_flow_table *ft; + int err; - if (family == AF_INET) - rx = ipsec->rx_ipv4; - else - rx = ipsec->rx_ipv6; + mutex_lock(&rx->ft.mutex); + err = rx_get(mdev, ipsec, rx, family); + if (err) + goto err_get; + + ft = rx->chains ? ipsec_chains_get_table(rx->chains, prio) : rx->ft.pol; + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + goto err_get_ft; + } + + mutex_unlock(&rx->ft.mutex); + return ft; + +err_get_ft: + rx_put(ipsec, rx, family); +err_get: + mutex_unlock(&rx->ft.mutex); + return ERR_PTR(err); +} + +static void rx_ft_put(struct mlx5e_ipsec *ipsec, u32 family) +{ + struct mlx5e_ipsec_rx *rx = ipsec_rx(ipsec, family); mutex_lock(&rx->ft.mutex); - rx->ft.refcnt--; - if (rx->ft.refcnt) - goto out; + rx_put(ipsec, rx, family); + mutex_unlock(&rx->ft.mutex); +} - /* disconnect */ - mlx5_ttc_fwd_default_dest(ttc, family2tt(family)); +static void rx_ft_put_policy(struct mlx5e_ipsec *ipsec, u32 family, u32 prio) +{ + struct mlx5e_ipsec_rx *rx = ipsec_rx(ipsec, family); - /* remove FT */ - rx_destroy(mdev, ipsec, rx, family); + mutex_lock(&rx->ft.mutex); + if (rx->chains) + ipsec_chains_put_table(rx->chains, prio); -out: + rx_put(ipsec, rx, family); mutex_unlock(&rx->ft.mutex); } +static int ipsec_counter_rule_tx(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_tx *tx) +{ + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *fte; + struct mlx5_flow_spec *spec; + int err; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + /* create fte */ + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest.counter_id = mlx5_fc_id(tx->fc->cnt); + fte = mlx5_add_flow_rules(tx->ft.status, spec, &flow_act, &dest, 1); + if (IS_ERR(fte)) { + err = PTR_ERR(fte); + mlx5_core_err(mdev, "Fail to add ipsec tx counter rule err=%d\n", err); + goto err_rule; + } + + kvfree(spec); + tx->status.rule = fte; + return 0; + +err_rule: + kvfree(spec); + return err; +} + /* IPsec TX flow steering */ +static void tx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_tx *tx, + struct mlx5_ipsec_fs *roce) +{ + mlx5_ipsec_fs_roce_tx_destroy(roce); + if (tx->chains) { + ipsec_chains_destroy(tx->chains); + } else { + mlx5_del_flow_rules(tx->pol.rule); + mlx5_destroy_flow_group(tx->pol.group); + mlx5_destroy_flow_table(tx->ft.pol); + } + + mlx5_destroy_flow_table(tx->ft.sa); + if (tx->allow_tunnel_mode) + mlx5_eswitch_unblock_encap(mdev); + mlx5_del_flow_rules(tx->status.rule); + mlx5_destroy_flow_table(tx->ft.status); +} + static int tx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_tx *tx, struct mlx5_ipsec_fs *roce) { struct mlx5_flow_destination dest = {}; struct mlx5_flow_table *ft; + u32 flags = 0; int err; - ft = ipsec_ft_create(tx->ns, 1, 0, 4); + ft = ipsec_ft_create(tx->ns, 2, 0, 1, 0); if (IS_ERR(ft)) return PTR_ERR(ft); + tx->ft.status = ft; + + err = ipsec_counter_rule_tx(mdev, tx); + if (err) + goto err_status_rule; + if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_TUNNEL) + tx->allow_tunnel_mode = mlx5_eswitch_block_encap(mdev); + if (tx->allow_tunnel_mode) + flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; + ft = ipsec_ft_create(tx->ns, 1, 0, 4, flags); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + goto err_sa_ft; + } tx->ft.sa = ft; - ft = ipsec_ft_create(tx->ns, 0, 0, 2); + if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PRIO) { + tx->chains = ipsec_chains_create( + mdev, tx->ft.sa, MLX5_FLOW_NAMESPACE_EGRESS_IPSEC, 0, 0, + &tx->ft.pol); + if (IS_ERR(tx->chains)) { + err = PTR_ERR(tx->chains); + goto err_pol_ft; + } + + goto connect_roce; + } + + ft = ipsec_ft_create(tx->ns, 0, 0, 2, 0); if (IS_ERR(ft)) { err = PTR_ERR(ft); goto err_pol_ft; @@ -356,44 +572,102 @@ static int tx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_tx *tx, dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest.ft = tx->ft.sa; err = ipsec_miss_create(mdev, tx->ft.pol, &tx->pol, &dest); - if (err) - goto err_pol_miss; + if (err) { + mlx5_destroy_flow_table(tx->ft.pol); + goto err_pol_ft; + } +connect_roce: err = mlx5_ipsec_fs_roce_tx_create(mdev, roce, tx->ft.pol); if (err) goto err_roce; return 0; err_roce: - mlx5_del_flow_rules(tx->pol.rule); - mlx5_destroy_flow_group(tx->pol.group); -err_pol_miss: - mlx5_destroy_flow_table(tx->ft.pol); + if (tx->chains) { + ipsec_chains_destroy(tx->chains); + } else { + mlx5_del_flow_rules(tx->pol.rule); + mlx5_destroy_flow_group(tx->pol.group); + mlx5_destroy_flow_table(tx->ft.pol); + } err_pol_ft: mlx5_destroy_flow_table(tx->ft.sa); +err_sa_ft: + if (tx->allow_tunnel_mode) + mlx5_eswitch_unblock_encap(mdev); + mlx5_del_flow_rules(tx->status.rule); +err_status_rule: + mlx5_destroy_flow_table(tx->ft.status); return err; } -static struct mlx5e_ipsec_tx *tx_ft_get(struct mlx5_core_dev *mdev, - struct mlx5e_ipsec *ipsec) +static int tx_get(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_tx *tx) { - struct mlx5e_ipsec_tx *tx = ipsec->tx; - int err = 0; + int err; - mutex_lock(&tx->ft.mutex); if (tx->ft.refcnt) goto skip; err = tx_create(mdev, tx, ipsec->roce); if (err) - goto out; + return err; skip: tx->ft.refcnt++; -out: + return 0; +} + +static void tx_put(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_tx *tx) +{ + if (--tx->ft.refcnt) + return; + + tx_destroy(ipsec->mdev, tx, ipsec->roce); +} + +static struct mlx5_flow_table *tx_ft_get_policy(struct mlx5_core_dev *mdev, + struct mlx5e_ipsec *ipsec, + u32 prio) +{ + struct mlx5e_ipsec_tx *tx = ipsec->tx; + struct mlx5_flow_table *ft; + int err; + + mutex_lock(&tx->ft.mutex); + err = tx_get(mdev, ipsec, tx); + if (err) + goto err_get; + + ft = tx->chains ? ipsec_chains_get_table(tx->chains, prio) : tx->ft.pol; + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + goto err_get_ft; + } + + mutex_unlock(&tx->ft.mutex); + return ft; + +err_get_ft: + tx_put(ipsec, tx); +err_get: + mutex_unlock(&tx->ft.mutex); + return ERR_PTR(err); +} + +static struct mlx5e_ipsec_tx *tx_ft_get(struct mlx5_core_dev *mdev, + struct mlx5e_ipsec *ipsec) +{ + struct mlx5e_ipsec_tx *tx = ipsec->tx; + int err; + + mutex_lock(&tx->ft.mutex); + err = tx_get(mdev, ipsec, tx); mutex_unlock(&tx->ft.mutex); if (err) return ERR_PTR(err); + return tx; } @@ -402,53 +676,72 @@ static void tx_ft_put(struct mlx5e_ipsec *ipsec) struct mlx5e_ipsec_tx *tx = ipsec->tx; mutex_lock(&tx->ft.mutex); - tx->ft.refcnt--; - if (tx->ft.refcnt) - goto out; + tx_put(ipsec, tx); + mutex_unlock(&tx->ft.mutex); +} - mlx5_ipsec_fs_roce_tx_destroy(ipsec->roce); - mlx5_del_flow_rules(tx->pol.rule); - mlx5_destroy_flow_group(tx->pol.group); - mlx5_destroy_flow_table(tx->ft.pol); - mlx5_destroy_flow_table(tx->ft.sa); -out: +static void tx_ft_put_policy(struct mlx5e_ipsec *ipsec, u32 prio) +{ + struct mlx5e_ipsec_tx *tx = ipsec->tx; + + mutex_lock(&tx->ft.mutex); + if (tx->chains) + ipsec_chains_put_table(tx->chains, prio); + + tx_put(ipsec, tx); mutex_unlock(&tx->ft.mutex); } static void setup_fte_addr4(struct mlx5_flow_spec *spec, __be32 *saddr, __be32 *daddr) { + if (!*saddr && !*daddr) + return; + spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version); MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, 4); - memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, - outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4), saddr, 4); - memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, - outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4), daddr, 4); - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, - outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4); - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, - outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + if (*saddr) { + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4), saddr, 4); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4); + } + + if (*daddr) { + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4), daddr, 4); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + } } static void setup_fte_addr6(struct mlx5_flow_spec *spec, __be32 *saddr, __be32 *daddr) { + if (addr6_all_zero(saddr) && addr6_all_zero(daddr)) + return; + spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version); MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, 6); - memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, - outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), saddr, 16); - memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, - outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), daddr, 16); - memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), 0xff, 16); - memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 0xff, 16); + if (!addr6_all_zero(saddr)) { + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), saddr, 16); + memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), 0xff, 16); + } + + if (!addr6_all_zero(daddr)) { + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), daddr, 16); + memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 0xff, 16); + } } static void setup_fte_esp(struct mlx5_flow_spec *spec) @@ -560,40 +853,181 @@ static int setup_modify_header(struct mlx5_core_dev *mdev, u32 val, u8 dir, return 0; } +static int +setup_pkt_tunnel_reformat(struct mlx5_core_dev *mdev, + struct mlx5_accel_esp_xfrm_attrs *attrs, + struct mlx5_pkt_reformat_params *reformat_params) +{ + struct ip_esp_hdr *esp_hdr; + struct ipv6hdr *ipv6hdr; + struct ethhdr *eth_hdr; + struct iphdr *iphdr; + char *reformatbf; + size_t bfflen; + void *hdr; + + bfflen = sizeof(*eth_hdr); + + if (attrs->dir == XFRM_DEV_OFFLOAD_OUT) { + bfflen += sizeof(*esp_hdr) + 8; + + switch (attrs->family) { + case AF_INET: + bfflen += sizeof(*iphdr); + break; + case AF_INET6: + bfflen += sizeof(*ipv6hdr); + break; + default: + return -EINVAL; + } + } + + reformatbf = kzalloc(bfflen, GFP_KERNEL); + if (!reformatbf) + return -ENOMEM; + + eth_hdr = (struct ethhdr *)reformatbf; + switch (attrs->family) { + case AF_INET: + eth_hdr->h_proto = htons(ETH_P_IP); + break; + case AF_INET6: + eth_hdr->h_proto = htons(ETH_P_IPV6); + break; + default: + goto free_reformatbf; + } + + ether_addr_copy(eth_hdr->h_dest, attrs->dmac); + ether_addr_copy(eth_hdr->h_source, attrs->smac); + + switch (attrs->dir) { + case XFRM_DEV_OFFLOAD_IN: + reformat_params->type = MLX5_REFORMAT_TYPE_L3_ESP_TUNNEL_TO_L2; + break; + case XFRM_DEV_OFFLOAD_OUT: + reformat_params->type = MLX5_REFORMAT_TYPE_L2_TO_L3_ESP_TUNNEL; + reformat_params->param_0 = attrs->authsize; + + hdr = reformatbf + sizeof(*eth_hdr); + switch (attrs->family) { + case AF_INET: + iphdr = (struct iphdr *)hdr; + memcpy(&iphdr->saddr, &attrs->saddr.a4, 4); + memcpy(&iphdr->daddr, &attrs->daddr.a4, 4); + iphdr->version = 4; + iphdr->ihl = 5; + iphdr->ttl = IPSEC_TUNNEL_DEFAULT_TTL; + iphdr->protocol = IPPROTO_ESP; + hdr += sizeof(*iphdr); + break; + case AF_INET6: + ipv6hdr = (struct ipv6hdr *)hdr; + memcpy(&ipv6hdr->saddr, &attrs->saddr.a6, 16); + memcpy(&ipv6hdr->daddr, &attrs->daddr.a6, 16); + ipv6hdr->nexthdr = IPPROTO_ESP; + ipv6hdr->version = 6; + ipv6hdr->hop_limit = IPSEC_TUNNEL_DEFAULT_TTL; + hdr += sizeof(*ipv6hdr); + break; + default: + goto free_reformatbf; + } + + esp_hdr = (struct ip_esp_hdr *)hdr; + esp_hdr->spi = htonl(attrs->spi); + break; + default: + goto free_reformatbf; + } + + reformat_params->size = bfflen; + reformat_params->data = reformatbf; + return 0; + +free_reformatbf: + kfree(reformatbf); + return -EINVAL; +} + +static int +setup_pkt_transport_reformat(struct mlx5_accel_esp_xfrm_attrs *attrs, + struct mlx5_pkt_reformat_params *reformat_params) +{ + u8 *reformatbf; + __be32 spi; + + switch (attrs->dir) { + case XFRM_DEV_OFFLOAD_IN: + reformat_params->type = MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT; + break; + case XFRM_DEV_OFFLOAD_OUT: + if (attrs->family == AF_INET) + reformat_params->type = + MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV4; + else + reformat_params->type = + MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV6; + + reformatbf = kzalloc(MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_SIZE, + GFP_KERNEL); + if (!reformatbf) + return -ENOMEM; + + /* convert to network format */ + spi = htonl(attrs->spi); + memcpy(reformatbf, &spi, sizeof(spi)); + + reformat_params->param_0 = attrs->authsize; + reformat_params->size = + MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_SIZE; + reformat_params->data = reformatbf; + break; + default: + return -EINVAL; + } + + return 0; +} + static int setup_pkt_reformat(struct mlx5_core_dev *mdev, struct mlx5_accel_esp_xfrm_attrs *attrs, struct mlx5_flow_act *flow_act) { - enum mlx5_flow_namespace_type ns_type = MLX5_FLOW_NAMESPACE_EGRESS; struct mlx5_pkt_reformat_params reformat_params = {}; struct mlx5_pkt_reformat *pkt_reformat; - u8 reformatbf[16] = {}; - __be32 spi; + enum mlx5_flow_namespace_type ns_type; + int ret; - if (attrs->dir == XFRM_DEV_OFFLOAD_IN) { - reformat_params.type = MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT; + switch (attrs->dir) { + case XFRM_DEV_OFFLOAD_IN: ns_type = MLX5_FLOW_NAMESPACE_KERNEL; - goto cmd; + break; + case XFRM_DEV_OFFLOAD_OUT: + ns_type = MLX5_FLOW_NAMESPACE_EGRESS; + break; + default: + return -EINVAL; } - if (attrs->family == AF_INET) - reformat_params.type = - MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV4; - else - reformat_params.type = - MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV6; - - /* convert to network format */ - spi = htonl(attrs->spi); - memcpy(reformatbf, &spi, 4); + switch (attrs->mode) { + case XFRM_MODE_TRANSPORT: + ret = setup_pkt_transport_reformat(attrs, &reformat_params); + break; + case XFRM_MODE_TUNNEL: + ret = setup_pkt_tunnel_reformat(mdev, attrs, &reformat_params); + break; + default: + ret = -EINVAL; + } - reformat_params.param_0 = attrs->authsize; - reformat_params.size = sizeof(reformatbf); - reformat_params.data = &reformatbf; + if (ret) + return ret; -cmd: pkt_reformat = mlx5_packet_reformat_alloc(mdev, &reformat_params, ns_type); + kfree(reformat_params.data); if (IS_ERR(pkt_reformat)) return PTR_ERR(pkt_reformat); @@ -607,11 +1041,12 @@ static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs; struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry); struct mlx5e_ipsec *ipsec = sa_entry->ipsec; - struct mlx5_flow_destination dest = {}; + struct mlx5_flow_destination dest[2]; struct mlx5_flow_act flow_act = {}; struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; struct mlx5e_ipsec_rx *rx; + struct mlx5_fc *counter; int err; rx = rx_ft_get(mdev, ipsec, attrs->family); @@ -648,14 +1083,25 @@ static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) break; } + counter = mlx5_fc_create(mdev, true); + if (IS_ERR(counter)) { + err = PTR_ERR(counter); + goto err_add_cnt; + } flow_act.crypto.type = MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_IPSEC; flow_act.crypto.obj_id = sa_entry->ipsec_obj_id; flow_act.flags |= FLOW_ACT_NO_APPEND; - flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_CRYPTO_DECRYPT; - dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest.ft = rx->ft.status; - rule = mlx5_add_flow_rules(rx->ft.sa, spec, &flow_act, &dest, 1); + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_CRYPTO_DECRYPT | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + if (attrs->drop) + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP; + else + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + dest[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[0].ft = rx->ft.status; + dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest[1].counter_id = mlx5_fc_id(counter); + rule = mlx5_add_flow_rules(rx->ft.sa, spec, &flow_act, dest, 2); if (IS_ERR(rule)) { err = PTR_ERR(rule); mlx5_core_err(mdev, "fail to add RX ipsec rule err=%d\n", err); @@ -665,10 +1111,13 @@ static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) sa_entry->ipsec_rule.rule = rule; sa_entry->ipsec_rule.modify_hdr = flow_act.modify_hdr; + sa_entry->ipsec_rule.fc = counter; sa_entry->ipsec_rule.pkt_reformat = flow_act.pkt_reformat; return 0; err_add_flow: + mlx5_fc_destroy(mdev, counter); +err_add_cnt: if (flow_act.pkt_reformat) mlx5_packet_reformat_dealloc(mdev, flow_act.pkt_reformat); err_pkt_reformat: @@ -676,7 +1125,7 @@ err_pkt_reformat: err_mod_header: kvfree(spec); err_alloc: - rx_ft_put(mdev, ipsec, attrs->family); + rx_ft_put(ipsec, attrs->family); return err; } @@ -685,12 +1134,13 @@ static int tx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs; struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry); struct mlx5e_ipsec *ipsec = sa_entry->ipsec; - struct mlx5_flow_destination dest = {}; + struct mlx5_flow_destination dest[2]; struct mlx5_flow_act flow_act = {}; struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; struct mlx5e_ipsec_tx *tx; - int err = 0; + struct mlx5_fc *counter; + int err; tx = tx_ft_get(mdev, ipsec); if (IS_ERR(tx)) @@ -717,7 +1167,8 @@ static int tx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) setup_fte_reg_a(spec); break; case XFRM_DEV_OFFLOAD_PACKET: - setup_fte_reg_c0(spec, attrs->reqid); + if (attrs->reqid) + setup_fte_reg_c0(spec, attrs->reqid); err = setup_pkt_reformat(mdev, attrs, &flow_act); if (err) goto err_pkt_reformat; @@ -726,15 +1177,27 @@ static int tx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) break; } + counter = mlx5_fc_create(mdev, true); + if (IS_ERR(counter)) { + err = PTR_ERR(counter); + goto err_add_cnt; + } + flow_act.crypto.type = MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_IPSEC; flow_act.crypto.obj_id = sa_entry->ipsec_obj_id; flow_act.flags |= FLOW_ACT_NO_APPEND; - flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW | - MLX5_FLOW_CONTEXT_ACTION_CRYPTO_ENCRYPT | + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_CRYPTO_ENCRYPT | MLX5_FLOW_CONTEXT_ACTION_COUNT; - dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest.counter_id = mlx5_fc_id(tx->fc->cnt); - rule = mlx5_add_flow_rules(tx->ft.sa, spec, &flow_act, &dest, 1); + if (attrs->drop) + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP; + else + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + + dest[0].ft = tx->ft.status; + dest[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest[1].counter_id = mlx5_fc_id(counter); + rule = mlx5_add_flow_rules(tx->ft.sa, spec, &flow_act, dest, 2); if (IS_ERR(rule)) { err = PTR_ERR(rule); mlx5_core_err(mdev, "fail to add TX ipsec rule err=%d\n", err); @@ -743,10 +1206,13 @@ static int tx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) kvfree(spec); sa_entry->ipsec_rule.rule = rule; + sa_entry->ipsec_rule.fc = counter; sa_entry->ipsec_rule.pkt_reformat = flow_act.pkt_reformat; return 0; err_add_flow: + mlx5_fc_destroy(mdev, counter); +err_add_cnt: if (flow_act.pkt_reformat) mlx5_packet_reformat_dealloc(mdev, flow_act.pkt_reformat); err_pkt_reformat: @@ -760,16 +1226,17 @@ static int tx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry) { struct mlx5_accel_pol_xfrm_attrs *attrs = &pol_entry->attrs; struct mlx5_core_dev *mdev = mlx5e_ipsec_pol2dev(pol_entry); + struct mlx5e_ipsec_tx *tx = pol_entry->ipsec->tx; struct mlx5_flow_destination dest[2] = {}; struct mlx5_flow_act flow_act = {}; struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; - struct mlx5e_ipsec_tx *tx; + struct mlx5_flow_table *ft; int err, dstn = 0; - tx = tx_ft_get(mdev, pol_entry->ipsec); - if (IS_ERR(tx)) - return PTR_ERR(tx); + ft = tx_ft_get_policy(mdev, pol_entry->ipsec, attrs->prio); + if (IS_ERR(ft)) + return PTR_ERR(ft); spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) { @@ -785,14 +1252,16 @@ static int tx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry) setup_fte_no_frags(spec); setup_fte_upper_proto_match(spec, &attrs->upspec); - err = setup_modify_header(mdev, attrs->reqid, XFRM_DEV_OFFLOAD_OUT, - &flow_act); - if (err) - goto err_mod_header; - switch (attrs->action) { case XFRM_POLICY_ALLOW: flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + if (!attrs->reqid) + break; + + err = setup_modify_header(mdev, attrs->reqid, + XFRM_DEV_OFFLOAD_OUT, &flow_act); + if (err) + goto err_mod_header; break; case XFRM_POLICY_BLOCK: flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP | @@ -804,14 +1273,14 @@ static int tx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry) default: WARN_ON(true); err = -EINVAL; - goto err_action; + goto err_mod_header; } flow_act.flags |= FLOW_ACT_NO_APPEND; dest[dstn].ft = tx->ft.sa; dest[dstn].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dstn++; - rule = mlx5_add_flow_rules(tx->ft.pol, spec, &flow_act, dest, dstn); + rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, dstn); if (IS_ERR(rule)) { err = PTR_ERR(rule); mlx5_core_err(mdev, "fail to add TX ipsec rule err=%d\n", err); @@ -824,11 +1293,12 @@ static int tx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry) return 0; err_action: - mlx5_modify_header_dealloc(mdev, flow_act.modify_hdr); + if (flow_act.modify_hdr) + mlx5_modify_header_dealloc(mdev, flow_act.modify_hdr); err_mod_header: kvfree(spec); err_alloc: - tx_ft_put(pol_entry->ipsec); + tx_ft_put_policy(pol_entry->ipsec, attrs->prio); return err; } @@ -840,12 +1310,15 @@ static int rx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry) struct mlx5_flow_act flow_act = {}; struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; + struct mlx5_flow_table *ft; struct mlx5e_ipsec_rx *rx; int err, dstn = 0; - rx = rx_ft_get(mdev, pol_entry->ipsec, attrs->family); - if (IS_ERR(rx)) - return PTR_ERR(rx); + ft = rx_ft_get_policy(mdev, pol_entry->ipsec, attrs->family, attrs->prio); + if (IS_ERR(ft)) + return PTR_ERR(ft); + + rx = ipsec_rx(pol_entry->ipsec, attrs->family); spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) { @@ -880,7 +1353,7 @@ static int rx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry) dest[dstn].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest[dstn].ft = rx->ft.sa; dstn++; - rule = mlx5_add_flow_rules(rx->ft.pol, spec, &flow_act, dest, dstn); + rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, dstn); if (IS_ERR(rule)) { err = PTR_ERR(rule); mlx5_core_err(mdev, "Fail to add RX IPsec policy rule err=%d\n", err); @@ -894,7 +1367,7 @@ static int rx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry) err_action: kvfree(spec); err_alloc: - rx_ft_put(mdev, pol_entry->ipsec, attrs->family); + rx_ft_put_policy(pol_entry->ipsec, attrs->family, attrs->prio); return err; } @@ -1022,7 +1495,7 @@ void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_ipsec_sa_entry *sa_entry) struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry); mlx5_del_flow_rules(ipsec_rule->rule); - + mlx5_fc_destroy(mdev, ipsec_rule->fc); if (ipsec_rule->pkt_reformat) mlx5_packet_reformat_dealloc(mdev, ipsec_rule->pkt_reformat); @@ -1032,7 +1505,7 @@ void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_ipsec_sa_entry *sa_entry) } mlx5_modify_header_dealloc(mdev, ipsec_rule->modify_hdr); - rx_ft_put(mdev, sa_entry->ipsec, sa_entry->attrs.family); + rx_ft_put(sa_entry->ipsec, sa_entry->attrs.family); } int mlx5e_accel_ipsec_fs_add_pol(struct mlx5e_ipsec_pol_entry *pol_entry) @@ -1051,12 +1524,15 @@ void mlx5e_accel_ipsec_fs_del_pol(struct mlx5e_ipsec_pol_entry *pol_entry) mlx5_del_flow_rules(ipsec_rule->rule); if (pol_entry->attrs.dir == XFRM_DEV_OFFLOAD_IN) { - rx_ft_put(mdev, pol_entry->ipsec, pol_entry->attrs.family); + rx_ft_put_policy(pol_entry->ipsec, pol_entry->attrs.family, + pol_entry->attrs.prio); return; } - mlx5_modify_header_dealloc(mdev, ipsec_rule->modify_hdr); - tx_ft_put(pol_entry->ipsec); + if (ipsec_rule->modify_hdr) + mlx5_modify_header_dealloc(mdev, ipsec_rule->modify_hdr); + + tx_ft_put_policy(pol_entry->ipsec, pol_entry->attrs.prio); } void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_ipsec *ipsec) @@ -1126,3 +1602,31 @@ err_rx_ipv4: kfree(ipsec->tx); return err; } + +void mlx5e_accel_ipsec_fs_modify(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct mlx5e_ipsec_sa_entry sa_entry_shadow = {}; + int err; + + memcpy(&sa_entry_shadow, sa_entry, sizeof(*sa_entry)); + memset(&sa_entry_shadow.ipsec_rule, 0x00, sizeof(sa_entry->ipsec_rule)); + + err = mlx5e_accel_ipsec_fs_add_rule(&sa_entry_shadow); + if (err) + return; + + mlx5e_accel_ipsec_fs_del_rule(sa_entry); + memcpy(sa_entry, &sa_entry_shadow, sizeof(*sa_entry)); +} + +bool mlx5e_ipsec_fs_tunnel_enabled(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct mlx5e_ipsec_rx *rx = + ipsec_rx(sa_entry->ipsec, sa_entry->attrs.family); + struct mlx5e_ipsec_tx *tx = sa_entry->ipsec->tx; + + if (sa_entry->attrs.dir == XFRM_DEV_OFFLOAD_OUT) + return tx->allow_tunnel_mode; + + return rx->allow_tunnel_mode; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c index 5fa7a4c40429..df90e19066bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c @@ -8,6 +8,7 @@ enum { MLX5_IPSEC_ASO_REMOVE_FLOW_PKT_CNT_OFFSET, + MLX5_IPSEC_ASO_REMOVE_FLOW_SOFT_LFT_OFFSET, }; u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev) @@ -36,11 +37,24 @@ u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev) MLX5_CAP_ETH(mdev, insert_trailer) && MLX5_CAP_ETH(mdev, swp)) caps |= MLX5_IPSEC_CAP_CRYPTO; - if (MLX5_CAP_IPSEC(mdev, ipsec_full_offload) && - MLX5_CAP_FLOWTABLE_NIC_TX(mdev, reformat_add_esp_trasport) && - MLX5_CAP_FLOWTABLE_NIC_RX(mdev, reformat_del_esp_trasport) && - MLX5_CAP_FLOWTABLE_NIC_RX(mdev, decap)) - caps |= MLX5_IPSEC_CAP_PACKET_OFFLOAD; + if (MLX5_CAP_IPSEC(mdev, ipsec_full_offload)) { + if (MLX5_CAP_FLOWTABLE_NIC_TX(mdev, + reformat_add_esp_trasport) && + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + reformat_del_esp_trasport) && + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, decap)) + caps |= MLX5_IPSEC_CAP_PACKET_OFFLOAD; + + if (MLX5_CAP_FLOWTABLE_NIC_TX(mdev, ignore_flow_level) && + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ignore_flow_level)) + caps |= MLX5_IPSEC_CAP_PRIO; + + if (MLX5_CAP_FLOWTABLE_NIC_TX(mdev, + reformat_l2_to_l3_esp_tunnel) && + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + reformat_l3_esp_tunnel_to_l2)) + caps |= MLX5_IPSEC_CAP_TUNNEL; + } if (mlx5_get_roce_state(mdev) && MLX5_CAP_GEN_2(mdev, flow_table_type_2_type) & MLX5_FT_NIC_RX_2_NIC_RX_RDMA && @@ -68,15 +82,17 @@ static void mlx5e_ipsec_packet_setup(void *obj, u32 pdn, void *aso_ctx; aso_ctx = MLX5_ADDR_OF(ipsec_obj, obj, ipsec_aso); - if (attrs->esn_trigger) { + if (attrs->replay_esn.trigger) { MLX5_SET(ipsec_aso, aso_ctx, esn_event_arm, 1); if (attrs->dir == XFRM_DEV_OFFLOAD_IN) { MLX5_SET(ipsec_aso, aso_ctx, window_sz, - attrs->replay_window / 64); + attrs->replay_esn.replay_window / 64); MLX5_SET(ipsec_aso, aso_ctx, mode, MLX5_IPSEC_ASO_REPLAY_PROTECTION); - } + } + MLX5_SET(ipsec_aso, aso_ctx, mode_parameter, + attrs->replay_esn.esn); } /* ASO context */ @@ -93,15 +109,15 @@ static void mlx5e_ipsec_packet_setup(void *obj, u32 pdn, if (attrs->dir == XFRM_DEV_OFFLOAD_OUT) MLX5_SET(ipsec_aso, aso_ctx, mode, MLX5_IPSEC_ASO_INC_SN); - if (attrs->hard_packet_limit != XFRM_INF) { + if (attrs->lft.hard_packet_limit != XFRM_INF) { MLX5_SET(ipsec_aso, aso_ctx, remove_flow_pkt_cnt, - lower_32_bits(attrs->hard_packet_limit)); + attrs->lft.hard_packet_limit); MLX5_SET(ipsec_aso, aso_ctx, hard_lft_arm, 1); } - if (attrs->soft_packet_limit != XFRM_INF) { + if (attrs->lft.soft_packet_limit != XFRM_INF) { MLX5_SET(ipsec_aso, aso_ctx, remove_flow_soft_lft, - lower_32_bits(attrs->soft_packet_limit)); + attrs->lft.soft_packet_limit); MLX5_SET(ipsec_aso, aso_ctx, soft_lft_arm, 1); } @@ -128,10 +144,10 @@ static int mlx5_create_ipsec_obj(struct mlx5e_ipsec_sa_entry *sa_entry) salt_iv_p = MLX5_ADDR_OF(ipsec_obj, obj, implicit_iv); memcpy(salt_iv_p, &aes_gcm->seq_iv, sizeof(aes_gcm->seq_iv)); /* esn */ - if (attrs->esn_trigger) { + if (attrs->replay_esn.trigger) { MLX5_SET(ipsec_obj, obj, esn_en, 1); - MLX5_SET(ipsec_obj, obj, esn_msb, attrs->esn); - MLX5_SET(ipsec_obj, obj, esn_overlap, attrs->esn_overlap); + MLX5_SET(ipsec_obj, obj, esn_msb, attrs->replay_esn.esn_msb); + MLX5_SET(ipsec_obj, obj, esn_overlap, attrs->replay_esn.overlap); } MLX5_SET(ipsec_obj, obj, dekn, sa_entry->enc_key_id); @@ -217,9 +233,6 @@ static int mlx5_modify_ipsec_obj(struct mlx5e_ipsec_sa_entry *sa_entry, void *obj; int err; - if (!attrs->esn_trigger) - return 0; - general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types); if (!(general_obj_types & MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_IPSEC)) return -EINVAL; @@ -247,8 +260,8 @@ static int mlx5_modify_ipsec_obj(struct mlx5e_ipsec_sa_entry *sa_entry, MLX5_SET64(ipsec_obj, obj, modify_field_select, MLX5_MODIFY_IPSEC_BITMASK_ESN_OVERLAP | MLX5_MODIFY_IPSEC_BITMASK_ESN_MSB); - MLX5_SET(ipsec_obj, obj, esn_msb, attrs->esn); - MLX5_SET(ipsec_obj, obj, esn_overlap, attrs->esn_overlap); + MLX5_SET(ipsec_obj, obj, esn_msb, attrs->replay_esn.esn_msb); + MLX5_SET(ipsec_obj, obj, esn_overlap, attrs->replay_esn.overlap); /* general object fields set */ MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT); @@ -268,29 +281,24 @@ void mlx5_accel_esp_modify_xfrm(struct mlx5e_ipsec_sa_entry *sa_entry, memcpy(&sa_entry->attrs, attrs, sizeof(sa_entry->attrs)); } -static void -mlx5e_ipsec_aso_update_esn(struct mlx5e_ipsec_sa_entry *sa_entry, - const struct mlx5_accel_esp_xfrm_attrs *attrs) +static void mlx5e_ipsec_aso_update(struct mlx5e_ipsec_sa_entry *sa_entry, + struct mlx5_wqe_aso_ctrl_seg *data) { - struct mlx5_wqe_aso_ctrl_seg data = {}; + data->data_mask_mode = MLX5_ASO_DATA_MASK_MODE_BITWISE_64BIT << 6; + data->condition_1_0_operand = MLX5_ASO_ALWAYS_TRUE | + MLX5_ASO_ALWAYS_TRUE << 4; - data.data_mask_mode = MLX5_ASO_DATA_MASK_MODE_BITWISE_64BIT << 6; - data.condition_1_0_operand = MLX5_ASO_ALWAYS_TRUE | MLX5_ASO_ALWAYS_TRUE - << 4; - data.data_offset_condition_operand = MLX5_IPSEC_ASO_REMOVE_FLOW_PKT_CNT_OFFSET; - data.bitwise_data = cpu_to_be64(BIT_ULL(54)); - data.data_mask = data.bitwise_data; - - mlx5e_ipsec_aso_query(sa_entry, &data); + mlx5e_ipsec_aso_query(sa_entry, data); } static void mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry, u32 mode_param) { struct mlx5_accel_esp_xfrm_attrs attrs = {}; + struct mlx5_wqe_aso_ctrl_seg data = {}; if (mode_param < MLX5E_IPSEC_ESN_SCOPE_MID) { - sa_entry->esn_state.esn++; + sa_entry->esn_state.esn_msb++; sa_entry->esn_state.overlap = 0; } else { sa_entry->esn_state.overlap = 1; @@ -298,25 +306,129 @@ static void mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry, mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &attrs); mlx5_accel_esp_modify_xfrm(sa_entry, &attrs); - mlx5e_ipsec_aso_update_esn(sa_entry, &attrs); + + data.data_offset_condition_operand = + MLX5_IPSEC_ASO_REMOVE_FLOW_PKT_CNT_OFFSET; + data.bitwise_data = cpu_to_be64(BIT_ULL(54)); + data.data_mask = data.bitwise_data; + + mlx5e_ipsec_aso_update(sa_entry, &data); +} + +static void mlx5e_ipsec_aso_update_hard(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct mlx5_wqe_aso_ctrl_seg data = {}; + + data.data_offset_condition_operand = + MLX5_IPSEC_ASO_REMOVE_FLOW_PKT_CNT_OFFSET; + data.bitwise_data = cpu_to_be64(BIT_ULL(57) + BIT_ULL(31)); + data.data_mask = data.bitwise_data; + mlx5e_ipsec_aso_update(sa_entry, &data); +} + +static void mlx5e_ipsec_aso_update_soft(struct mlx5e_ipsec_sa_entry *sa_entry, + u32 val) +{ + struct mlx5_wqe_aso_ctrl_seg data = {}; + + data.data_offset_condition_operand = + MLX5_IPSEC_ASO_REMOVE_FLOW_SOFT_LFT_OFFSET; + data.bitwise_data = cpu_to_be64(val); + data.data_mask = cpu_to_be64(U32_MAX); + mlx5e_ipsec_aso_update(sa_entry, &data); +} + +static void mlx5e_ipsec_handle_limits(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs; + struct mlx5e_ipsec *ipsec = sa_entry->ipsec; + struct mlx5e_ipsec_aso *aso = ipsec->aso; + bool soft_arm, hard_arm; + u64 hard_cnt; + + lockdep_assert_held(&sa_entry->x->lock); + + soft_arm = !MLX5_GET(ipsec_aso, aso->ctx, soft_lft_arm); + hard_arm = !MLX5_GET(ipsec_aso, aso->ctx, hard_lft_arm); + if (!soft_arm && !hard_arm) + /* It is not lifetime event */ + return; + + hard_cnt = MLX5_GET(ipsec_aso, aso->ctx, remove_flow_pkt_cnt); + if (!hard_cnt || hard_arm) { + /* It is possible to see packet counter equal to zero without + * hard limit event armed. Such situation can be if packet + * decreased, while we handled soft limit event. + * + * However it will be HW/FW bug if hard limit event is raised + * and packet counter is not zero. + */ + WARN_ON_ONCE(hard_arm && hard_cnt); + + /* Notify about hard limit */ + xfrm_state_check_expire(sa_entry->x); + return; + } + + /* We are in soft limit event. */ + if (!sa_entry->limits.soft_limit_hit && + sa_entry->limits.round == attrs->lft.numb_rounds_soft) { + sa_entry->limits.soft_limit_hit = true; + /* Notify about soft limit */ + xfrm_state_check_expire(sa_entry->x); + + if (sa_entry->limits.round == attrs->lft.numb_rounds_hard) + goto hard; + + if (attrs->lft.soft_packet_limit > BIT_ULL(31)) { + /* We cannot avoid a soft_value that might have the high + * bit set. For instance soft_value=2^31+1 cannot be + * adjusted to the low bit clear version of soft_value=1 + * because it is too close to 0. + * + * Thus we have this corner case where we can hit the + * soft_limit with the high bit set, but cannot adjust + * the counter. Thus we set a temporary interrupt_value + * at least 2^30 away from here and do the adjustment + * then. + */ + mlx5e_ipsec_aso_update_soft(sa_entry, + BIT_ULL(31) - BIT_ULL(30)); + sa_entry->limits.fix_limit = true; + return; + } + + sa_entry->limits.fix_limit = true; + } + +hard: + if (sa_entry->limits.round == attrs->lft.numb_rounds_hard) { + mlx5e_ipsec_aso_update_soft(sa_entry, 0); + attrs->lft.soft_packet_limit = XFRM_INF; + return; + } + + mlx5e_ipsec_aso_update_hard(sa_entry); + sa_entry->limits.round++; + if (sa_entry->limits.round == attrs->lft.numb_rounds_soft) + mlx5e_ipsec_aso_update_soft(sa_entry, + attrs->lft.soft_packet_limit); + if (sa_entry->limits.fix_limit) { + sa_entry->limits.fix_limit = false; + mlx5e_ipsec_aso_update_soft(sa_entry, BIT_ULL(31) - 1); + } } static void mlx5e_ipsec_handle_event(struct work_struct *_work) { struct mlx5e_ipsec_work *work = container_of(_work, struct mlx5e_ipsec_work, work); + struct mlx5e_ipsec_sa_entry *sa_entry = work->data; struct mlx5_accel_esp_xfrm_attrs *attrs; - struct mlx5e_ipsec_sa_entry *sa_entry; struct mlx5e_ipsec_aso *aso; - struct mlx5e_ipsec *ipsec; int ret; - sa_entry = xa_load(&work->ipsec->sadb, work->id); - if (!sa_entry) - goto out; - - ipsec = sa_entry->ipsec; - aso = ipsec->aso; + aso = sa_entry->ipsec->aso; attrs = &sa_entry->attrs; spin_lock(&sa_entry->x->lock); @@ -324,21 +436,18 @@ static void mlx5e_ipsec_handle_event(struct work_struct *_work) if (ret) goto unlock; - if (attrs->esn_trigger && + if (attrs->replay_esn.trigger && !MLX5_GET(ipsec_aso, aso->ctx, esn_event_arm)) { u32 mode_param = MLX5_GET(ipsec_aso, aso->ctx, mode_parameter); mlx5e_ipsec_update_esn_state(sa_entry, mode_param); } - if (attrs->soft_packet_limit != XFRM_INF) - if (!MLX5_GET(ipsec_aso, aso->ctx, soft_lft_arm) || - !MLX5_GET(ipsec_aso, aso->ctx, hard_lft_arm)) - xfrm_state_check_expire(sa_entry->x); + if (attrs->lft.soft_packet_limit != XFRM_INF) + mlx5e_ipsec_handle_limits(sa_entry); unlock: spin_unlock(&sa_entry->x->lock); -out: kfree(work); } @@ -346,6 +455,7 @@ static int mlx5e_ipsec_event(struct notifier_block *nb, unsigned long event, void *data) { struct mlx5e_ipsec *ipsec = container_of(nb, struct mlx5e_ipsec, nb); + struct mlx5e_ipsec_sa_entry *sa_entry; struct mlx5_eqe_obj_change *object; struct mlx5e_ipsec_work *work; struct mlx5_eqe *eqe = data; @@ -360,13 +470,16 @@ static int mlx5e_ipsec_event(struct notifier_block *nb, unsigned long event, if (type != MLX5_GENERAL_OBJECT_TYPES_IPSEC) return NOTIFY_DONE; + sa_entry = xa_load(&ipsec->sadb, be32_to_cpu(object->obj_id)); + if (!sa_entry) + return NOTIFY_DONE; + work = kmalloc(sizeof(*work), GFP_ATOMIC); if (!work) return NOTIFY_DONE; INIT_WORK(&work->work, mlx5e_ipsec_handle_event); - work->ipsec = ipsec; - work->id = be32_to_cpu(object->obj_id); + work->data = sa_entry; queue_work(ipsec->wq, &work->work); return NOTIFY_OK; @@ -457,6 +570,7 @@ int mlx5e_ipsec_aso_query(struct mlx5e_ipsec_sa_entry *sa_entry, struct mlx5_wqe_aso_ctrl_seg *ctrl; struct mlx5e_hw_objs *res; struct mlx5_aso_wqe *wqe; + unsigned long expires; u8 ds_cnt; int ret; @@ -478,22 +592,12 @@ int mlx5e_ipsec_aso_query(struct mlx5e_ipsec_sa_entry *sa_entry, mlx5e_ipsec_aso_copy(ctrl, data); mlx5_aso_post_wqe(aso->aso, false, &wqe->ctrl); - ret = mlx5_aso_poll_cq(aso->aso, false); + expires = jiffies + msecs_to_jiffies(10); + do { + ret = mlx5_aso_poll_cq(aso->aso, false); + if (ret) + usleep_range(2, 10); + } while (ret && time_is_after_jiffies(expires)); spin_unlock_bh(&aso->lock); return ret; } - -void mlx5e_ipsec_aso_update_curlft(struct mlx5e_ipsec_sa_entry *sa_entry, - u64 *packets) -{ - struct mlx5e_ipsec *ipsec = sa_entry->ipsec; - struct mlx5e_ipsec_aso *aso = ipsec->aso; - u64 hard_cnt; - - hard_cnt = MLX5_GET(ipsec_aso, aso->ctx, remove_flow_pkt_cnt); - /* HW decresases the limit till it reaches zero to fire an avent. - * We need to fix the calculations, so the returned count is a total - * number of passed packets and not how much left. - */ - *packets = sa_entry->attrs.hard_packet_limit - hard_cnt; -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index 51f1cd8364c2..6b7b563f844a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -4,6 +4,7 @@ #include <linux/mlx5/device.h> #include <linux/mlx5/mlx5_ifc.h> #include <linux/xarray.h> +#include <linux/if_vlan.h> #include "en.h" #include "lib/aso.h" @@ -348,12 +349,21 @@ static void mlx5e_macsec_cleanup_sa(struct mlx5e_macsec *macsec, sa->macsec_rule = NULL; } +static struct mlx5e_priv *macsec_netdev_priv(const struct net_device *dev) +{ +#if IS_ENABLED(CONFIG_VLAN_8021Q) + if (is_vlan_dev(dev)) + return netdev_priv(vlan_dev_priv(dev)->real_dev); +#endif + return netdev_priv(dev); +} + static int mlx5e_macsec_init_sa(struct macsec_context *ctx, struct mlx5e_macsec_sa *sa, bool encrypt, bool is_tx) { - struct mlx5e_priv *priv = netdev_priv(ctx->netdev); + struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev); struct mlx5e_macsec *macsec = priv->macsec; struct mlx5_macsec_rule_attrs rule_attrs; struct mlx5_core_dev *mdev = priv->mdev; @@ -427,7 +437,7 @@ static int macsec_rx_sa_active_update(struct macsec_context *ctx, struct mlx5e_macsec_sa *rx_sa, bool active) { - struct mlx5e_priv *priv = netdev_priv(ctx->netdev); + struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev); struct mlx5e_macsec *macsec = priv->macsec; int err = 0; @@ -508,9 +518,9 @@ static void update_macsec_epn(struct mlx5e_macsec_sa *sa, const struct macsec_ke static int mlx5e_macsec_add_txsa(struct macsec_context *ctx) { + struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev); const struct macsec_tx_sc *tx_sc = &ctx->secy->tx_sc; const struct macsec_tx_sa *ctx_tx_sa = ctx->sa.tx_sa; - struct mlx5e_priv *priv = netdev_priv(ctx->netdev); const struct macsec_secy *secy = ctx->secy; struct mlx5e_macsec_device *macsec_device; struct mlx5_core_dev *mdev = priv->mdev; @@ -583,9 +593,9 @@ out: static int mlx5e_macsec_upd_txsa(struct macsec_context *ctx) { + struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev); const struct macsec_tx_sc *tx_sc = &ctx->secy->tx_sc; const struct macsec_tx_sa *ctx_tx_sa = ctx->sa.tx_sa; - struct mlx5e_priv *priv = netdev_priv(ctx->netdev); struct mlx5e_macsec_device *macsec_device; u8 assoc_num = ctx->sa.assoc_num; struct mlx5e_macsec_sa *tx_sa; @@ -645,7 +655,7 @@ out: static int mlx5e_macsec_del_txsa(struct macsec_context *ctx) { - struct mlx5e_priv *priv = netdev_priv(ctx->netdev); + struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev); struct mlx5e_macsec_device *macsec_device; u8 assoc_num = ctx->sa.assoc_num; struct mlx5e_macsec_sa *tx_sa; @@ -696,7 +706,7 @@ static u32 mlx5e_macsec_get_sa_from_hashtable(struct rhashtable *sci_hash, sci_t static int mlx5e_macsec_add_rxsc(struct macsec_context *ctx) { struct mlx5e_macsec_rx_sc_xarray_element *sc_xarray_element; - struct mlx5e_priv *priv = netdev_priv(ctx->netdev); + struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev); const struct macsec_rx_sc *ctx_rx_sc = ctx->rx_sc; struct mlx5e_macsec_device *macsec_device; struct mlx5e_macsec_rx_sc *rx_sc; @@ -776,7 +786,7 @@ out: static int mlx5e_macsec_upd_rxsc(struct macsec_context *ctx) { - struct mlx5e_priv *priv = netdev_priv(ctx->netdev); + struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev); const struct macsec_rx_sc *ctx_rx_sc = ctx->rx_sc; struct mlx5e_macsec_device *macsec_device; struct mlx5e_macsec_rx_sc *rx_sc; @@ -854,7 +864,7 @@ static void macsec_del_rxsc_ctx(struct mlx5e_macsec *macsec, struct mlx5e_macsec static int mlx5e_macsec_del_rxsc(struct macsec_context *ctx) { - struct mlx5e_priv *priv = netdev_priv(ctx->netdev); + struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev); struct mlx5e_macsec_device *macsec_device; struct mlx5e_macsec_rx_sc *rx_sc; struct mlx5e_macsec *macsec; @@ -890,8 +900,8 @@ out: static int mlx5e_macsec_add_rxsa(struct macsec_context *ctx) { + struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev); const struct macsec_rx_sa *ctx_rx_sa = ctx->sa.rx_sa; - struct mlx5e_priv *priv = netdev_priv(ctx->netdev); struct mlx5e_macsec_device *macsec_device; struct mlx5_core_dev *mdev = priv->mdev; u8 assoc_num = ctx->sa.assoc_num; @@ -976,8 +986,8 @@ out: static int mlx5e_macsec_upd_rxsa(struct macsec_context *ctx) { + struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev); const struct macsec_rx_sa *ctx_rx_sa = ctx->sa.rx_sa; - struct mlx5e_priv *priv = netdev_priv(ctx->netdev); struct mlx5e_macsec_device *macsec_device; u8 assoc_num = ctx->sa.assoc_num; struct mlx5e_macsec_rx_sc *rx_sc; @@ -1033,7 +1043,7 @@ out: static int mlx5e_macsec_del_rxsa(struct macsec_context *ctx) { - struct mlx5e_priv *priv = netdev_priv(ctx->netdev); + struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev); struct mlx5e_macsec_device *macsec_device; sci_t sci = ctx->sa.rx_sa->sc->sci; struct mlx5e_macsec_rx_sc *rx_sc; @@ -1085,7 +1095,7 @@ out: static int mlx5e_macsec_add_secy(struct macsec_context *ctx) { - struct mlx5e_priv *priv = netdev_priv(ctx->netdev); + struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev); const struct net_device *dev = ctx->secy->netdev; const struct net_device *netdev = ctx->netdev; struct mlx5e_macsec_device *macsec_device; @@ -1137,7 +1147,7 @@ out: static int macsec_upd_secy_hw_address(struct macsec_context *ctx, struct mlx5e_macsec_device *macsec_device) { - struct mlx5e_priv *priv = netdev_priv(ctx->netdev); + struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev); const struct net_device *dev = ctx->secy->netdev; struct mlx5e_macsec *macsec = priv->macsec; struct mlx5e_macsec_rx_sc *rx_sc, *tmp; @@ -1184,8 +1194,8 @@ out: */ static int mlx5e_macsec_upd_secy(struct macsec_context *ctx) { + struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev); const struct macsec_tx_sc *tx_sc = &ctx->secy->tx_sc; - struct mlx5e_priv *priv = netdev_priv(ctx->netdev); const struct net_device *dev = ctx->secy->netdev; struct mlx5e_macsec_device *macsec_device; struct mlx5e_macsec_sa *tx_sa; @@ -1240,7 +1250,7 @@ out: static int mlx5e_macsec_del_secy(struct macsec_context *ctx) { - struct mlx5e_priv *priv = netdev_priv(ctx->netdev); + struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev); struct mlx5e_macsec_device *macsec_device; struct mlx5e_macsec_rx_sc *rx_sc, *tmp; struct mlx5e_macsec_sa *tx_sa; @@ -1741,7 +1751,7 @@ void mlx5e_macsec_offload_handle_rx_skb(struct net_device *netdev, { struct mlx5e_macsec_rx_sc_xarray_element *sc_xarray_element; u32 macsec_meta_data = be32_to_cpu(cqe->ft_metadata); - struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_priv *priv = macsec_netdev_priv(netdev); struct mlx5e_macsec_rx_sc *rx_sc; struct mlx5e_macsec *macsec; u32 fs_id; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c index 5b658a5588c6..7fc901a6ec5f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c @@ -4,6 +4,7 @@ #include <net/macsec.h> #include <linux/netdevice.h> #include <linux/mlx5/qp.h> +#include <linux/if_vlan.h> #include "fs_core.h" #include "en/fs.h" #include "en_accel/macsec_fs.h" @@ -292,8 +293,6 @@ static int macsec_fs_tx_create(struct mlx5e_macsec_fs *macsec_fs) } /* Tx crypto table MKE rule - MKE packets shouldn't be offloaded */ - memset(&flow_act, 0, sizeof(flow_act)); - memset(spec, 0, sizeof(*spec)); spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype); @@ -510,6 +509,8 @@ static void macsec_fs_tx_del_rule(struct mlx5e_macsec_fs *macsec_fs, macsec_fs_tx_ft_put(macsec_fs); } +#define MLX5_REFORMAT_PARAM_ADD_MACSEC_OFFSET_4_BYTES 1 + static union mlx5e_macsec_rule * macsec_fs_tx_add_rule(struct mlx5e_macsec_fs *macsec_fs, const struct macsec_context *macsec_ctx, @@ -555,6 +556,10 @@ macsec_fs_tx_add_rule(struct mlx5e_macsec_fs *macsec_fs, reformat_params.type = MLX5_REFORMAT_TYPE_ADD_MACSEC; reformat_params.size = reformat_size; reformat_params.data = reformatbf; + + if (is_vlan_dev(macsec_ctx->netdev)) + reformat_params.param_0 = MLX5_REFORMAT_PARAM_ADD_MACSEC_OFFSET_4_BYTES; + flow_act.pkt_reformat = mlx5_packet_reformat_alloc(macsec_fs->mdev, &reformat_params, MLX5_FLOW_NAMESPACE_EGRESS_MACSEC); @@ -1109,7 +1114,6 @@ static void macsec_fs_rx_setup_fte(struct mlx5_flow_spec *spec, static union mlx5e_macsec_rule * macsec_fs_rx_add_rule(struct mlx5e_macsec_fs *macsec_fs, - const struct macsec_context *macsec_ctx, struct mlx5_macsec_rule_attrs *attrs, u32 fs_id) { @@ -1334,7 +1338,7 @@ mlx5e_macsec_fs_add_rule(struct mlx5e_macsec_fs *macsec_fs, { return (attrs->action == MLX5_ACCEL_MACSEC_ACTION_ENCRYPT) ? macsec_fs_tx_add_rule(macsec_fs, macsec_ctx, attrs, sa_fs_id) : - macsec_fs_rx_add_rule(macsec_fs, macsec_ctx, attrs, *sa_fs_id); + macsec_fs_rx_add_rule(macsec_fs, attrs, *sa_fs_id); } void mlx5e_macsec_fs_del_rule(struct mlx5e_macsec_fs *macsec_fs, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 79fd21ecb9cb..1f5a2110d31f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -220,7 +220,7 @@ static void mlx5e_ethtool_get_speed_arr(struct mlx5_core_dev *mdev, struct ptys2ethtool_config **arr, u32 *size) { - bool ext = mlx5e_ptys_ext_supported(mdev); + bool ext = mlx5_ptys_ext_supported(mdev); *arr = ext ? ptys2ext_ethtool_table : ptys2legacy_ethtool_table; *size = ext ? ARRAY_SIZE(ptys2ext_ethtool_table) : @@ -895,7 +895,7 @@ static void get_speed_duplex(struct net_device *netdev, if (!netif_carrier_ok(netdev)) goto out; - speed = mlx5e_port_ptys2speed(priv->mdev, eth_proto_oper, force_legacy); + speed = mlx5_port_ptys2speed(priv->mdev, eth_proto_oper, force_legacy); if (!speed) { if (data_rate_oper) speed = 100 * data_rate_oper; @@ -980,7 +980,7 @@ static void get_lp_advertising(struct mlx5_core_dev *mdev, u32 eth_proto_lp, struct ethtool_link_ksettings *link_ksettings) { unsigned long *lp_advertising = link_ksettings->link_modes.lp_advertising; - bool ext = mlx5e_ptys_ext_supported(mdev); + bool ext = mlx5_ptys_ext_supported(mdev); ptys2ethtool_adver_link(lp_advertising, eth_proto_lp, ext); } @@ -1160,7 +1160,7 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, const struct ethtool_link_ksettings *link_ksettings) { struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5e_port_eth_proto eproto; + struct mlx5_port_eth_proto eproto; const unsigned long *adver; bool an_changes = false; u8 an_disable_admin; @@ -1180,7 +1180,7 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, autoneg = link_ksettings->base.autoneg; speed = link_ksettings->base.speed; - ext_supported = mlx5e_ptys_ext_supported(mdev); + ext_supported = mlx5_ptys_ext_supported(mdev); ext = ext_requested(autoneg, adver, ext_supported); if (!ext_supported && ext) return -EOPNOTSUPP; @@ -1194,7 +1194,7 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, goto out; } link_modes = autoneg == AUTONEG_ENABLE ? ethtool2ptys_adver_func(adver) : - mlx5e_port_speed2linkmodes(mdev, speed, !ext); + mlx5_port_speed2linkmodes(mdev, speed, !ext); err = mlx5e_speed_validate(priv->netdev, ext, link_modes, autoneg); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 05796f8b1d7c..33bfe4d7338b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -783,6 +783,7 @@ static int mlx5e_create_promisc_table(struct mlx5e_flow_steering *fs) ft->t = mlx5_create_auto_grouped_flow_table(fs->ns, &ft_attr); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); + ft->t = NULL; fs_err(fs, "fail to create promisc table err=%d\n", err); return err; } @@ -810,7 +811,7 @@ static void mlx5e_del_promisc_rule(struct mlx5e_flow_steering *fs) static void mlx5e_destroy_promisc_table(struct mlx5e_flow_steering *fs) { - if (WARN(!fs->promisc.ft.t, "Trying to remove non-existing promiscuous table")) + if (!fs->promisc.ft.t) return; mlx5e_del_promisc_rule(fs); mlx5_destroy_flow_table(fs->promisc.ft.t); @@ -1490,6 +1491,8 @@ err: void mlx5e_fs_cleanup(struct mlx5e_flow_steering *fs) { + if (!fs) + return; debugfs_remove_recursive(fs->dfs_root); mlx5e_fs_ethtool_free(fs); mlx5e_fs_tc_free(fs); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 7ca7e9b57607..2944691f06ad 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -262,23 +262,30 @@ static int mlx5e_rq_shampo_hd_info_alloc(struct mlx5e_rq *rq, int node) shampo->bitmap = bitmap_zalloc_node(shampo->hd_per_wq, GFP_KERNEL, node); - if (!shampo->bitmap) - return -ENOMEM; - shampo->info = kvzalloc_node(array_size(shampo->hd_per_wq, sizeof(*shampo->info)), GFP_KERNEL, node); - if (!shampo->info) { - kvfree(shampo->bitmap); - return -ENOMEM; - } + shampo->pages = kvzalloc_node(array_size(shampo->hd_per_wq, + sizeof(*shampo->pages)), + GFP_KERNEL, node); + if (!shampo->bitmap || !shampo->info || !shampo->pages) + goto err_nomem; + return 0; + +err_nomem: + kvfree(shampo->info); + kvfree(shampo->bitmap); + kvfree(shampo->pages); + + return -ENOMEM; } static void mlx5e_rq_shampo_hd_info_free(struct mlx5e_rq *rq) { kvfree(rq->mpwqe.shampo->bitmap); kvfree(rq->mpwqe.shampo->info); + kvfree(rq->mpwqe.shampo->pages); } static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node) @@ -286,13 +293,23 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node) int wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq); size_t alloc_size; - alloc_size = array_size(wq_sz, struct_size(rq->mpwqe.info, alloc_units, + alloc_size = array_size(wq_sz, struct_size(rq->mpwqe.info, + alloc_units.frag_pages, rq->mpwqe.pages_per_wqe)); rq->mpwqe.info = kvzalloc_node(alloc_size, GFP_KERNEL, node); if (!rq->mpwqe.info) return -ENOMEM; + /* For deferred page release (release right before alloc), make sure + * that on first round release is not called. + */ + for (int i = 0; i < wq_sz; i++) { + struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, i); + + bitmap_fill(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe); + } + mlx5e_build_umr_wqe(rq, rq->icosq, &rq->mpwqe.umr_wqe); return 0; @@ -499,14 +516,12 @@ static void mlx5e_init_frags_partition(struct mlx5e_rq *rq) struct mlx5e_wqe_frag_info *prev = NULL; int i; - if (rq->xsk_pool) { - /* Assumptions used by XSK batched allocator. */ - WARN_ON(rq->wqe.info.num_frags != 1); - WARN_ON(rq->wqe.info.log_num_frags != 0); - WARN_ON(rq->wqe.info.arr[0].frag_stride != PAGE_SIZE); - } + WARN_ON(rq->xsk_pool); + + next_frag.frag_page = &rq->wqe.alloc_units->frag_pages[0]; - next_frag.au = &rq->wqe.alloc_units[0]; + /* Skip first release due to deferred release. */ + next_frag.flags = BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); for (i = 0; i < mlx5_wq_cyc_get_size(&rq->wqe.wq); i++) { struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0]; @@ -516,10 +531,11 @@ static void mlx5e_init_frags_partition(struct mlx5e_rq *rq) for (f = 0; f < rq->wqe.info.num_frags; f++, frag++) { if (next_frag.offset + frag_info[f].frag_stride > PAGE_SIZE) { - next_frag.au++; + /* Pages are assigned at runtime. */ + next_frag.frag_page++; next_frag.offset = 0; if (prev) - prev->last_in_page = true; + prev->flags |= BIT(MLX5E_WQE_FRAG_LAST_IN_PAGE); } *frag = next_frag; @@ -530,25 +546,68 @@ static void mlx5e_init_frags_partition(struct mlx5e_rq *rq) } if (prev) - prev->last_in_page = true; + prev->flags |= BIT(MLX5E_WQE_FRAG_LAST_IN_PAGE); +} + +static void mlx5e_init_xsk_buffs(struct mlx5e_rq *rq) +{ + int i; + + /* Assumptions used by XSK batched allocator. */ + WARN_ON(rq->wqe.info.num_frags != 1); + WARN_ON(rq->wqe.info.log_num_frags != 0); + WARN_ON(rq->wqe.info.arr[0].frag_stride != PAGE_SIZE); + + /* Considering the above assumptions a fragment maps to a single + * xsk_buff. + */ + for (i = 0; i < mlx5_wq_cyc_get_size(&rq->wqe.wq); i++) { + rq->wqe.frags[i].xskp = &rq->wqe.alloc_units->xsk_buffs[i]; + + /* Skip first release due to deferred release as WQES are + * not allocated yet. + */ + rq->wqe.frags[i].flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); + } } -static int mlx5e_init_au_list(struct mlx5e_rq *rq, int wq_sz, int node) +static int mlx5e_init_wqe_alloc_info(struct mlx5e_rq *rq, int node) { + int wq_sz = mlx5_wq_cyc_get_size(&rq->wqe.wq); int len = wq_sz << rq->wqe.info.log_num_frags; + struct mlx5e_wqe_frag_info *frags; + union mlx5e_alloc_units *aus; + int aus_sz; - rq->wqe.alloc_units = kvzalloc_node(array_size(len, sizeof(*rq->wqe.alloc_units)), - GFP_KERNEL, node); - if (!rq->wqe.alloc_units) + if (rq->xsk_pool) + aus_sz = sizeof(*aus->xsk_buffs); + else + aus_sz = sizeof(*aus->frag_pages); + + aus = kvzalloc_node(array_size(len, aus_sz), GFP_KERNEL, node); + if (!aus) return -ENOMEM; - mlx5e_init_frags_partition(rq); + frags = kvzalloc_node(array_size(len, sizeof(*frags)), GFP_KERNEL, node); + if (!frags) { + kvfree(aus); + return -ENOMEM; + } + + rq->wqe.alloc_units = aus; + rq->wqe.frags = frags; + + if (rq->xsk_pool) + mlx5e_init_xsk_buffs(rq); + else + mlx5e_init_frags_partition(rq); return 0; } -static void mlx5e_free_au_list(struct mlx5e_rq *rq) +static void mlx5e_free_wqe_alloc_info(struct mlx5e_rq *rq) { + kvfree(rq->wqe.frags); kvfree(rq->wqe.alloc_units); } @@ -693,7 +752,6 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, struct mlx5e_rq_param *rqp, int node, struct mlx5e_rq *rq) { - struct page_pool_params pp_params = { 0 }; struct mlx5_core_dev *mdev = rq->mdev; void *rqc = rqp->rqc; void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); @@ -745,6 +803,9 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, pool_size = rq->mpwqe.pages_per_wqe << mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk); + if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk) && params->xdp_prog) + pool_size *= 2; /* additional page per packet for the linear part */ + rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk); rq->mpwqe.num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk)); @@ -778,18 +839,9 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, rq->wqe.info = rqp->frags_info; rq->buff.frame0_sz = rq->wqe.info.arr[0].frag_stride; - rq->wqe.frags = - kvzalloc_node(array_size(sizeof(*rq->wqe.frags), - (wq_sz << rq->wqe.info.log_num_frags)), - GFP_KERNEL, node); - if (!rq->wqe.frags) { - err = -ENOMEM; - goto err_rq_wq_destroy; - } - - err = mlx5e_init_au_list(rq, wq_sz, node); + err = mlx5e_init_wqe_alloc_info(rq, node); if (err) - goto err_rq_frags; + goto err_rq_wq_destroy; } if (xsk) { @@ -798,12 +850,16 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, xsk_pool_set_rxq_info(rq->xsk_pool, &rq->xdp_rxq); } else { /* Create a page_pool and register it with rxq */ + struct page_pool_params pp_params = { 0 }; + pp_params.order = 0; - pp_params.flags = 0; /* No-internal DMA mapping in page_pool */ + pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV | PP_FLAG_PAGE_FRAG; pp_params.pool_size = pool_size; pp_params.nid = node; pp_params.dev = rq->pdev; + pp_params.napi = rq->cq.napi; pp_params.dma_dir = rq->buff.map_dir; + pp_params.max_len = PAGE_SIZE; /* page_pool can be used even when there is no rq->xdp_prog, * given page_pool does not handle DMA mapping there is no @@ -869,9 +925,6 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, rq->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; } - rq->page_cache.head = 0; - rq->page_cache.tail = 0; - return 0; err_destroy_page_pool: @@ -888,9 +941,7 @@ err_rq_drop_page: mlx5e_free_mpwqe_rq_drop_page(rq); break; default: /* MLX5_WQ_TYPE_CYCLIC */ - mlx5e_free_au_list(rq); -err_rq_frags: - kvfree(rq->wqe.frags); + mlx5e_free_wqe_alloc_info(rq); } err_rq_wq_destroy: mlx5_wq_destroy(&rq->wq_ctrl); @@ -904,7 +955,6 @@ err_rq_xdp_prog: static void mlx5e_free_rq(struct mlx5e_rq *rq) { struct bpf_prog *old_prog; - int i; if (xdp_rxq_info_is_reg(&rq->xdp_rxq)) { old_prog = rcu_dereference_protected(rq->xdp_prog, @@ -921,17 +971,7 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq) mlx5e_rq_free_shampo(rq); break; default: /* MLX5_WQ_TYPE_CYCLIC */ - kvfree(rq->wqe.frags); - mlx5e_free_au_list(rq); - } - - for (i = rq->page_cache.head; i != rq->page_cache.tail; - i = (i + 1) & (MLX5E_CACHE_SIZE - 1)) { - /* With AF_XDP, page_cache is not used, so this loop is not - * entered, and it's safe to call mlx5e_page_release_dynamic - * directly. - */ - mlx5e_page_release_dynamic(rq, rq->page_cache.page_cache[i], false); + mlx5e_free_wqe_alloc_info(rq); } xdp_rxq_info_unreg(&rq->xdp_rxq); @@ -1094,7 +1134,7 @@ int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time) return -ETIMEDOUT; } -void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq) +void mlx5e_free_rx_missing_descs(struct mlx5e_rq *rq) { struct mlx5_wq_ll *wq; u16 head; @@ -1106,8 +1146,12 @@ void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq) wq = &rq->mpwqe.wq; head = wq->head; - /* Outstanding UMR WQEs (in progress) start at wq->head */ - for (i = 0; i < rq->mpwqe.umr_in_progress; i++) { + /* Release WQEs that are in missing state: they have been + * popped from the list after completion but were not freed + * due to deferred release. + * Also free the linked-list reserved entry, hence the "+ 1". + */ + for (i = 0; i < mlx5_wq_ll_missing(wq) + 1; i++) { rq->dealloc_wqe(rq, head); head = mlx5_wq_ll_get_wqe_next_ix(wq, head); } @@ -1134,7 +1178,7 @@ void mlx5e_free_rx_descs(struct mlx5e_rq *rq) if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { struct mlx5_wq_ll *wq = &rq->mpwqe.wq; - mlx5e_free_rx_in_progress_descs(rq); + mlx5e_free_rx_missing_descs(rq); while (!mlx5_wq_ll_is_empty(wq)) { struct mlx5e_rx_wqe_ll *wqe; @@ -1152,12 +1196,21 @@ void mlx5e_free_rx_descs(struct mlx5e_rq *rq) 0, true); } else { struct mlx5_wq_cyc *wq = &rq->wqe.wq; + u16 missing = mlx5_wq_cyc_missing(wq); + u16 head = mlx5_wq_cyc_get_head(wq); while (!mlx5_wq_cyc_is_empty(wq)) { wqe_ix = mlx5_wq_cyc_get_tail(wq); rq->dealloc_wqe(rq, wqe_ix); mlx5_wq_cyc_pop(wq); } + /* Missing slots might also contain unreleased pages due to + * deferred release. + */ + while (missing--) { + wqe_ix = mlx5_wq_cyc_ctr2ix(wq, head++); + rq->dealloc_wqe(rq, wqe_ix); + } } } @@ -1188,7 +1241,7 @@ int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param, __set_bit(MLX5E_RQ_STATE_CSUM_FULL, &rq->state); if (params->rx_dim_enabled) - __set_bit(MLX5E_RQ_STATE_AM, &rq->state); + __set_bit(MLX5E_RQ_STATE_DIM, &rq->state); /* We disable csum_complete when XDP is enabled since * XDP programs might manipulate packets which will render @@ -1251,17 +1304,19 @@ static int mlx5e_alloc_xdpsq_fifo(struct mlx5e_xdpsq *sq, int numa) { struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo; int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); - int dsegs_per_wq = wq_sz * MLX5_SEND_WQEBB_NUM_DS; + int entries = wq_sz * MLX5_SEND_WQEBB_NUM_DS * 2; /* upper bound for maximum num of + * entries of all xmit_modes. + */ size_t size; - size = array_size(sizeof(*xdpi_fifo->xi), dsegs_per_wq); + size = array_size(sizeof(*xdpi_fifo->xi), entries); xdpi_fifo->xi = kvzalloc_node(size, GFP_KERNEL, numa); if (!xdpi_fifo->xi) return -ENOMEM; xdpi_fifo->pc = &sq->xdpi_fifo_pc; xdpi_fifo->cc = &sq->xdpi_fifo_cc; - xdpi_fifo->mask = dsegs_per_wq - 1; + xdpi_fifo->mask = entries - 1; return 0; } @@ -1664,7 +1719,7 @@ int mlx5e_open_txqsq(struct mlx5e_channel *c, u32 tisn, int txq_ix, mlx5e_set_sq_maxrate(c->netdev, sq, tx_rate); if (params->tx_dim_enabled) - sq->state |= BIT(MLX5E_SQ_STATE_AM); + sq->state |= BIT(MLX5E_SQ_STATE_DIM); return 0; @@ -1811,11 +1866,7 @@ int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, csp.min_inline_mode = sq->min_inline_mode; set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); - /* Don't enable multi buffer on XDP_REDIRECT SQ, as it's not yet - * supported by upstream, and there is no defined trigger to allow - * transmitting redirected multi-buffer frames. - */ - if (param->is_xdp_mb && !is_redirect) + if (param->is_xdp_mb) set_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state); err = mlx5e_create_sq_rdy(c->mdev, param, &csp, 0, &sq->sqn); @@ -1839,7 +1890,6 @@ int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, i); struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; struct mlx5_wqe_eth_seg *eseg = &wqe->eth; - struct mlx5_wqe_data_seg *dseg; sq->db.wqe_info[i] = (struct mlx5e_xdp_wqe_info) { .num_wqebbs = 1, @@ -1848,9 +1898,6 @@ int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz); - - dseg = (struct mlx5_wqe_data_seg *)cseg + (ds_cnt - 1); - dseg->lkey = sq->mkey_be; } } @@ -4017,9 +4064,9 @@ void mlx5e_set_xdp_feature(struct net_device *netdev) val = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | NETDEV_XDP_ACT_XSK_ZEROCOPY | - NETDEV_XDP_ACT_NDO_XMIT; - if (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC) - val |= NETDEV_XDP_ACT_RX_SG; + NETDEV_XDP_ACT_RX_SG | + NETDEV_XDP_ACT_NDO_XMIT | + NETDEV_XDP_ACT_NDO_XMIT_SG; xdp_set_features_flag(netdev, val); } @@ -4213,19 +4260,24 @@ static bool mlx5e_params_validate_xdp(struct net_device *netdev, /* No XSK params: AF_XDP can't be enabled yet at the point of setting * the XDP program. */ - is_linear = mlx5e_rx_is_linear_skb(mdev, params, NULL); - - if (!is_linear && params->rq_wq_type != MLX5_WQ_TYPE_CYCLIC) { - netdev_warn(netdev, "XDP is not allowed with striding RQ and MTU(%d) > %d\n", - params->sw_mtu, - mlx5e_xdp_max_mtu(params, NULL)); - return false; - } - if (!is_linear && !params->xdp_prog->aux->xdp_has_frags) { - netdev_warn(netdev, "MTU(%d) > %d, too big for an XDP program not aware of multi buffer\n", - params->sw_mtu, - mlx5e_xdp_max_mtu(params, NULL)); - return false; + is_linear = params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC ? + mlx5e_rx_is_linear_skb(mdev, params, NULL) : + mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL); + + if (!is_linear) { + if (!params->xdp_prog->aux->xdp_has_frags) { + netdev_warn(netdev, "MTU(%d) > %d, too big for an XDP program not aware of multi buffer\n", + params->sw_mtu, + mlx5e_xdp_max_mtu(params, NULL)); + return false; + } + if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ && + !mlx5e_verify_params_rx_mpwqe_strides(mdev, params, NULL)) { + netdev_warn(netdev, "XDP is not allowed with striding RQ and MTU(%d) > %d\n", + params->sw_mtu, + mlx5e_xdp_max_mtu(params, NULL)); + return false; + } } return true; @@ -4717,20 +4769,15 @@ static void mlx5e_tx_timeout(struct net_device *dev, unsigned int txqueue) queue_work(priv->wq, &priv->tx_timeout_work); } -static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog) +static int mlx5e_xdp_allowed(struct net_device *netdev, struct mlx5_core_dev *mdev, + struct mlx5e_params *params) { - struct net_device *netdev = priv->netdev; - struct mlx5e_params new_params; - - if (priv->channels.params.packet_merge.type != MLX5E_PACKET_MERGE_NONE) { + if (params->packet_merge.type != MLX5E_PACKET_MERGE_NONE) { netdev_warn(netdev, "can't set XDP while HW-GRO/LRO is on, disable them first\n"); return -EINVAL; } - new_params = priv->channels.params; - new_params.xdp_prog = prog; - - if (!mlx5e_params_validate_xdp(netdev, priv->mdev, &new_params)) + if (!mlx5e_params_validate_xdp(netdev, mdev, params)) return -EINVAL; return 0; @@ -4757,8 +4804,11 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) mutex_lock(&priv->state_lock); + new_params = priv->channels.params; + new_params.xdp_prog = prog; + if (prog) { - err = mlx5e_xdp_allowed(priv, prog); + err = mlx5e_xdp_allowed(netdev, priv->mdev, &new_params); if (err) goto unlock; } @@ -4766,22 +4816,6 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) /* no need for full reset when exchanging programs */ reset = (!priv->channels.params.xdp_prog || !prog); - new_params = priv->channels.params; - new_params.xdp_prog = prog; - - /* XDP affects striding RQ parameters. Block XDP if striding RQ won't be - * supported with the new parameters: if PAGE_SIZE is bigger than - * MLX5_MPWQE_LOG_STRIDE_SZ_MAX, striding RQ can't be used, even though - * the MTU is small enough for the linear mode, because XDP uses strides - * of PAGE_SIZE on regular RQs. - */ - if (reset && MLX5E_GET_PFLAG(&new_params, MLX5E_PFLAG_RX_STRIDING_RQ)) { - /* Checking for regular RQs here; XSK RQs were checked on XSK bind. */ - err = mlx5e_mpwrq_validate_regular(priv->mdev, &new_params); - if (err) - goto unlock; - } - old_prog = priv->channels.params.xdp_prog; err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, reset); @@ -5076,6 +5110,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->vlan_features |= NETIF_F_SG; netdev->vlan_features |= NETIF_F_HW_CSUM; + netdev->vlan_features |= NETIF_F_HW_MACSEC; netdev->vlan_features |= NETIF_F_GRO; netdev->vlan_features |= NETIF_F_TSO; netdev->vlan_features |= NETIF_F_TSO6; @@ -5270,6 +5305,7 @@ static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) mlx5e_health_destroy_reporters(priv); mlx5e_ktls_cleanup(priv); mlx5e_fs_cleanup(priv->fs); + priv->fs = NULL; } static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) @@ -5725,8 +5761,8 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv) /* Validate the max_wqe_size_sq capability. */ if (WARN_ON_ONCE(mlx5e_get_max_sq_wqebbs(priv->mdev) < MLX5E_MAX_TX_WQEBBS)) { - mlx5_core_warn(priv->mdev, "MLX5E: Max SQ WQEBBs firmware capability: %u, needed %lu\n", - mlx5e_get_max_sq_wqebbs(priv->mdev), MLX5E_MAX_TX_WQEBBS); + mlx5_core_warn(priv->mdev, "MLX5E: Max SQ WQEBBs firmware capability: %u, needed %u\n", + mlx5e_get_max_sq_wqebbs(priv->mdev), (unsigned int)MLX5E_MAX_TX_WQEBBS); return -EIO; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 8ff654b4e9e1..1fc386eccaf8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -53,6 +53,7 @@ #include "lib/vxlan.h" #define CREATE_TRACE_POINTS #include "diag/en_rep_tracepoint.h" +#include "diag/reporter_vnic.h" #include "en_accel/ipsec.h" #include "en/tc/int_port.h" #include "en/ptp.h" @@ -828,6 +829,7 @@ static int mlx5e_init_ul_rep(struct mlx5_core_dev *mdev, static void mlx5e_cleanup_rep(struct mlx5e_priv *priv) { mlx5e_fs_cleanup(priv->fs); + priv->fs = NULL; } static int mlx5e_create_rep_ttc_table(struct mlx5e_priv *priv) @@ -994,6 +996,7 @@ err_close_drop_rq: priv->rx_res = NULL; err_free_fs: mlx5e_fs_cleanup(priv->fs); + priv->fs = NULL; return err; } @@ -1294,6 +1297,50 @@ static unsigned int mlx5e_ul_rep_stats_grps_num(struct mlx5e_priv *priv) return ARRAY_SIZE(mlx5e_ul_rep_stats_grps); } +static int +mlx5e_rep_vnic_reporter_diagnose(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) +{ + struct mlx5e_rep_priv *rpriv = devlink_health_reporter_priv(reporter); + struct mlx5_eswitch_rep *rep = rpriv->rep; + + return mlx5_reporter_vnic_diagnose_counters(rep->esw->dev, fmsg, + rep->vport, true); +} + +static const struct devlink_health_reporter_ops mlx5_rep_vnic_reporter_ops = { + .name = "vnic", + .diagnose = mlx5e_rep_vnic_reporter_diagnose, +}; + +static void mlx5e_rep_vnic_reporter_create(struct mlx5e_priv *priv, + struct devlink_port *dl_port) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct devlink_health_reporter *reporter; + + reporter = devl_port_health_reporter_create(dl_port, + &mlx5_rep_vnic_reporter_ops, + 0, rpriv); + if (IS_ERR(reporter)) { + mlx5_core_err(priv->mdev, + "Failed to create representor vnic reporter, err = %ld\n", + PTR_ERR(reporter)); + return; + } + + rpriv->rep_vnic_reporter = reporter; +} + +static void mlx5e_rep_vnic_reporter_destroy(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + + if (!IS_ERR_OR_NULL(rpriv->rep_vnic_reporter)) + devl_health_reporter_destroy(rpriv->rep_vnic_reporter); +} + static const struct mlx5e_profile mlx5e_rep_profile = { .init = mlx5e_init_rep, .cleanup = mlx5e_cleanup_rep, @@ -1394,8 +1441,10 @@ mlx5e_vport_vf_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport); - if (dl_port) + if (dl_port) { SET_NETDEV_DEVLINK_PORT(netdev, dl_port); + mlx5e_rep_vnic_reporter_create(priv, dl_port); + } err = register_netdev(netdev); if (err) { @@ -1408,8 +1457,8 @@ mlx5e_vport_vf_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) return 0; err_detach_netdev: + mlx5e_rep_vnic_reporter_destroy(priv); mlx5e_detach_netdev(netdev_priv(netdev)); - err_cleanup_profile: priv->profile->cleanup(priv); @@ -1458,6 +1507,7 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep) } unregister_netdev(netdev); + mlx5e_rep_vnic_reporter_destroy(priv); mlx5e_detach_netdev(priv); priv->profile->cleanup(priv); mlx5e_destroy_netdev(priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index dcfad0bf0f45..80b7f5079a5a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -118,6 +118,7 @@ struct mlx5e_rep_priv { struct rtnl_link_stats64 prev_vf_vport_stats; struct mlx5_flow_handle *send_to_vport_meta_rule; struct rhashtable tc_ht; + struct devlink_health_reporter *rep_vnic_reporter; }; static inline diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 3f7b63d6616b..69634829558e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -271,98 +271,35 @@ static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq, return mlx5e_decompress_cqes_cont(rq, wq, 1, budget_rem); } -static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, struct page *page) -{ - struct mlx5e_page_cache *cache = &rq->page_cache; - u32 tail_next = (cache->tail + 1) & (MLX5E_CACHE_SIZE - 1); - struct mlx5e_rq_stats *stats = rq->stats; - - if (tail_next == cache->head) { - stats->cache_full++; - return false; - } - - if (!dev_page_is_reusable(page)) { - stats->cache_waive++; - return false; - } - - cache->page_cache[cache->tail] = page; - cache->tail = tail_next; - return true; -} - -static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq, union mlx5e_alloc_unit *au) -{ - struct mlx5e_page_cache *cache = &rq->page_cache; - struct mlx5e_rq_stats *stats = rq->stats; - dma_addr_t addr; - - if (unlikely(cache->head == cache->tail)) { - stats->cache_empty++; - return false; - } - - if (page_ref_count(cache->page_cache[cache->head]) != 1) { - stats->cache_busy++; - return false; - } - - au->page = cache->page_cache[cache->head]; - cache->head = (cache->head + 1) & (MLX5E_CACHE_SIZE - 1); - stats->cache_reuse++; +#define MLX5E_PAGECNT_BIAS_MAX (PAGE_SIZE / 64) - addr = page_pool_get_dma_addr(au->page); - /* Non-XSK always uses PAGE_SIZE. */ - dma_sync_single_for_device(rq->pdev, addr, PAGE_SIZE, rq->buff.map_dir); - return true; -} - -static inline int mlx5e_page_alloc_pool(struct mlx5e_rq *rq, union mlx5e_alloc_unit *au) +static int mlx5e_page_alloc_fragmented(struct mlx5e_rq *rq, + struct mlx5e_frag_page *frag_page) { - dma_addr_t addr; + struct page *page; - if (mlx5e_rx_cache_get(rq, au)) - return 0; - - au->page = page_pool_dev_alloc_pages(rq->page_pool); - if (unlikely(!au->page)) + page = page_pool_dev_alloc_pages(rq->page_pool); + if (unlikely(!page)) return -ENOMEM; - /* Non-XSK always uses PAGE_SIZE. */ - addr = dma_map_page(rq->pdev, au->page, 0, PAGE_SIZE, rq->buff.map_dir); - if (unlikely(dma_mapping_error(rq->pdev, addr))) { - page_pool_recycle_direct(rq->page_pool, au->page); - au->page = NULL; - return -ENOMEM; - } - page_pool_set_dma_addr(au->page, addr); + page_pool_fragment_page(page, MLX5E_PAGECNT_BIAS_MAX); + + *frag_page = (struct mlx5e_frag_page) { + .page = page, + .frags = 0, + }; return 0; } -void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct page *page) +static void mlx5e_page_release_fragmented(struct mlx5e_rq *rq, + struct mlx5e_frag_page *frag_page) { - dma_addr_t dma_addr = page_pool_get_dma_addr(page); + u16 drain_count = MLX5E_PAGECNT_BIAS_MAX - frag_page->frags; + struct page *page = frag_page->page; - dma_unmap_page_attrs(rq->pdev, dma_addr, PAGE_SIZE, rq->buff.map_dir, - DMA_ATTR_SKIP_CPU_SYNC); - page_pool_set_dma_addr(page, 0); -} - -void mlx5e_page_release_dynamic(struct mlx5e_rq *rq, struct page *page, bool recycle) -{ - if (likely(recycle)) { - if (mlx5e_rx_cache_put(rq, page)) - return; - - mlx5e_page_dma_unmap(rq, page); - page_pool_recycle_direct(rq->page_pool, page); - } else { - mlx5e_page_dma_unmap(rq, page); - page_pool_release_page(rq->page_pool, page); - put_page(page); - } + if (page_pool_defrag_page(page, drain_count) == 0) + page_pool_put_defragged_page(rq->page_pool, page, -1, true); } static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq, @@ -371,22 +308,31 @@ static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq, int err = 0; if (!frag->offset) - /* On first frag (offset == 0), replenish page (alloc_unit actually). - * Other frags that point to the same alloc_unit (with a different + /* On first frag (offset == 0), replenish page. + * Other frags that point to the same page (with a different * offset) should just use the new one without replenishing again * by themselves. */ - err = mlx5e_page_alloc_pool(rq, frag->au); + err = mlx5e_page_alloc_fragmented(rq, frag->frag_page); return err; } +static bool mlx5e_frag_can_release(struct mlx5e_wqe_frag_info *frag) +{ +#define CAN_RELEASE_MASK \ + (BIT(MLX5E_WQE_FRAG_LAST_IN_PAGE) | BIT(MLX5E_WQE_FRAG_SKIP_RELEASE)) + +#define CAN_RELEASE_VALUE BIT(MLX5E_WQE_FRAG_LAST_IN_PAGE) + + return (frag->flags & CAN_RELEASE_MASK) == CAN_RELEASE_VALUE; +} + static inline void mlx5e_put_rx_frag(struct mlx5e_rq *rq, - struct mlx5e_wqe_frag_info *frag, - bool recycle) + struct mlx5e_wqe_frag_info *frag) { - if (frag->last_in_page) - mlx5e_page_release_dynamic(rq, frag->au->page, recycle); + if (mlx5e_frag_can_release(frag)) + mlx5e_page_release_fragmented(rq, frag->frag_page); } static inline struct mlx5e_wqe_frag_info *get_frag(struct mlx5e_rq *rq, u16 ix) @@ -409,8 +355,10 @@ static int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe_cyc *wqe, if (unlikely(err)) goto free_frags; + frag->flags &= ~BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); + headroom = i == 0 ? rq->buff.headroom : 0; - addr = page_pool_get_dma_addr(frag->au->page); + addr = page_pool_get_dma_addr(frag->frag_page->page); wqe->data[i].addr = cpu_to_be64(addr + frag->offset + headroom); } @@ -418,35 +366,66 @@ static int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe_cyc *wqe, free_frags: while (--i >= 0) - mlx5e_put_rx_frag(rq, --frag, true); + mlx5e_put_rx_frag(rq, --frag); return err; } static inline void mlx5e_free_rx_wqe(struct mlx5e_rq *rq, - struct mlx5e_wqe_frag_info *wi, - bool recycle) + struct mlx5e_wqe_frag_info *wi) { int i; - if (rq->xsk_pool) { - /* The `recycle` parameter is ignored, and the page is always - * put into the Reuse Ring, because there is no way to return - * the page to the userspace when the interface goes down. - */ - xsk_buff_free(wi->au->xsk); - return; - } - for (i = 0; i < rq->wqe.info.num_frags; i++, wi++) - mlx5e_put_rx_frag(rq, wi, recycle); + mlx5e_put_rx_frag(rq, wi); +} + +static void mlx5e_xsk_free_rx_wqe(struct mlx5e_wqe_frag_info *wi) +{ + if (!(wi->flags & BIT(MLX5E_WQE_FRAG_SKIP_RELEASE))) + xsk_buff_free(*wi->xskp); } static void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix) { struct mlx5e_wqe_frag_info *wi = get_frag(rq, ix); - mlx5e_free_rx_wqe(rq, wi, false); + if (rq->xsk_pool) + mlx5e_xsk_free_rx_wqe(wi); + else + mlx5e_free_rx_wqe(rq, wi); +} + +static void mlx5e_xsk_free_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) +{ + struct mlx5_wq_cyc *wq = &rq->wqe.wq; + int i; + + for (i = 0; i < wqe_bulk; i++) { + int j = mlx5_wq_cyc_ctr2ix(wq, ix + i); + struct mlx5e_wqe_frag_info *wi; + + wi = get_frag(rq, j); + /* The page is always put into the Reuse Ring, because there + * is no way to return the page to the userspace when the + * interface goes down. + */ + mlx5e_xsk_free_rx_wqe(wi); + } +} + +static void mlx5e_free_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) +{ + struct mlx5_wq_cyc *wq = &rq->wqe.wq; + int i; + + for (i = 0; i < wqe_bulk; i++) { + int j = mlx5_wq_cyc_ctr2ix(wq, ix + i); + struct mlx5e_wqe_frag_info *wi; + + wi = get_frag(rq, j); + mlx5e_free_rx_wqe(rq, wi); + } } static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) @@ -467,18 +446,71 @@ static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) return i; } +static int mlx5e_refill_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) +{ + int remaining = wqe_bulk; + int i = 0; + + /* The WQE bulk is split into smaller bulks that are sized + * according to the page pool cache refill size to avoid overflowing + * the page pool cache due to too many page releases at once. + */ + do { + int refill = min_t(u16, rq->wqe.info.refill_unit, remaining); + int alloc_count; + + mlx5e_free_rx_wqes(rq, ix + i, refill); + alloc_count = mlx5e_alloc_rx_wqes(rq, ix + i, refill); + i += alloc_count; + if (unlikely(alloc_count != refill)) + break; + + remaining -= refill; + } while (remaining); + + return i; +} + +static void +mlx5e_add_skb_shared_info_frag(struct mlx5e_rq *rq, struct skb_shared_info *sinfo, + struct xdp_buff *xdp, struct mlx5e_frag_page *frag_page, + u32 frag_offset, u32 len) +{ + skb_frag_t *frag; + + dma_addr_t addr = page_pool_get_dma_addr(frag_page->page); + + dma_sync_single_for_cpu(rq->pdev, addr + frag_offset, len, rq->buff.map_dir); + if (!xdp_buff_has_frags(xdp)) { + /* Init on the first fragment to avoid cold cache access + * when possible. + */ + sinfo->nr_frags = 0; + sinfo->xdp_frags_size = 0; + xdp_buff_set_frags_flag(xdp); + } + + frag = &sinfo->frags[sinfo->nr_frags++]; + __skb_frag_set_page(frag, frag_page->page); + skb_frag_off_set(frag, frag_offset); + skb_frag_size_set(frag, len); + + if (page_is_pfmemalloc(frag_page->page)) + xdp_buff_set_frag_pfmemalloc(xdp); + sinfo->xdp_frags_size += len; +} + static inline void mlx5e_add_skb_frag(struct mlx5e_rq *rq, struct sk_buff *skb, - union mlx5e_alloc_unit *au, u32 frag_offset, u32 len, + struct page *page, u32 frag_offset, u32 len, unsigned int truesize) { - dma_addr_t addr = page_pool_get_dma_addr(au->page); + dma_addr_t addr = page_pool_get_dma_addr(page); dma_sync_single_for_cpu(rq->pdev, addr + frag_offset, len, rq->buff.map_dir); - page_ref_inc(au->page); skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, - au->page, frag_offset, len, truesize); + page, frag_offset, len, truesize); } static inline void @@ -496,30 +528,36 @@ mlx5e_copy_skb_header(struct mlx5e_rq *rq, struct sk_buff *skb, } static void -mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, bool recycle) +mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi) { - union mlx5e_alloc_unit *alloc_units = wi->alloc_units; bool no_xdp_xmit; int i; /* A common case for AF_XDP. */ - if (bitmap_full(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe)) + if (bitmap_full(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe)) return; - no_xdp_xmit = bitmap_empty(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe); + no_xdp_xmit = bitmap_empty(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe); if (rq->xsk_pool) { - /* The `recycle` parameter is ignored, and the page is always - * put into the Reuse Ring, because there is no way to return - * the page to the userspace when the interface goes down. + struct xdp_buff **xsk_buffs = wi->alloc_units.xsk_buffs; + + /* The page is always put into the Reuse Ring, because there + * is no way to return the page to userspace when the interface + * goes down. */ for (i = 0; i < rq->mpwqe.pages_per_wqe; i++) - if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap)) - xsk_buff_free(alloc_units[i].xsk); + if (no_xdp_xmit || !test_bit(i, wi->skip_release_bitmap)) + xsk_buff_free(xsk_buffs[i]); } else { - for (i = 0; i < rq->mpwqe.pages_per_wqe; i++) - if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap)) - mlx5e_page_release_dynamic(rq, alloc_units[i].page, recycle); + for (i = 0; i < rq->mpwqe.pages_per_wqe; i++) { + if (no_xdp_xmit || !test_bit(i, wi->skip_release_bitmap)) { + struct mlx5e_frag_page *frag_page; + + frag_page = &wi->alloc_units.frag_pages[i]; + mlx5e_page_release_fragmented(rq, frag_page); + } + } } } @@ -583,7 +621,8 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; u16 entries, pi, header_offset, err, wqe_bbs, new_entries; u32 lkey = rq->mdev->mlx5e_res.hw_objs.mkey; - struct page *page = shampo->last_page; + u16 page_index = shampo->curr_page_index; + struct mlx5e_frag_page *frag_page; u64 addr = shampo->last_addr; struct mlx5e_dma_info *dma_info; struct mlx5e_umr_wqe *umr_wqe; @@ -597,6 +636,8 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, umr_wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); build_klm_umr(sq, umr_wqe, shampo->key, index, entries, wqe_bbs); + frag_page = &shampo->pages[page_index]; + for (i = 0; i < entries; i++, index++) { dma_info = &shampo->info[index]; if (i >= klm_entries || (index < shampo->pi && shampo->pi - index < @@ -605,16 +646,20 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, header_offset = (index & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) << MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE; if (!(header_offset & (PAGE_SIZE - 1))) { - union mlx5e_alloc_unit au; + page_index = (page_index + 1) & (shampo->hd_per_wq - 1); + frag_page = &shampo->pages[page_index]; - err = mlx5e_page_alloc_pool(rq, &au); + err = mlx5e_page_alloc_fragmented(rq, frag_page); if (unlikely(err)) goto err_unmap; - page = dma_info->page = au.page; - addr = dma_info->addr = page_pool_get_dma_addr(au.page); + + addr = page_pool_get_dma_addr(frag_page->page); + + dma_info->addr = addr; + dma_info->frag_page = frag_page; } else { dma_info->addr = addr + header_offset; - dma_info->page = page; + dma_info->frag_page = frag_page; } update_klm: @@ -632,7 +677,7 @@ update_klm: }; shampo->pi = (shampo->pi + new_entries) & (shampo->hd_per_wq - 1); - shampo->last_page = page; + shampo->curr_page_index = page_index; shampo->last_addr = addr; sq->pc += wqe_bbs; sq->doorbell_cseg = &umr_wqe->ctrl; @@ -644,7 +689,7 @@ err_unmap: dma_info = &shampo->info[--index]; if (!(i & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1))) { dma_info->addr = ALIGN_DOWN(dma_info->addr, PAGE_SIZE); - mlx5e_page_release_dynamic(rq, dma_info->page, true); + mlx5e_page_release_fragmented(rq, dma_info->frag_page); } } rq->stats->buff_alloc_err++; @@ -693,8 +738,8 @@ static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq) static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) { struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix); - union mlx5e_alloc_unit *au = &wi->alloc_units[0]; struct mlx5e_icosq *sq = rq->icosq; + struct mlx5e_frag_page *frag_page; struct mlx5_wq_cyc *wq = &sq->wq; struct mlx5e_umr_wqe *umr_wqe; u32 offset; /* 17-bit value with MTT. */ @@ -712,13 +757,15 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi); memcpy(umr_wqe, &rq->mpwqe.umr_wqe, sizeof(struct mlx5e_umr_wqe)); - for (i = 0; i < rq->mpwqe.pages_per_wqe; i++, au++) { + frag_page = &wi->alloc_units.frag_pages[0]; + + for (i = 0; i < rq->mpwqe.pages_per_wqe; i++, frag_page++) { dma_addr_t addr; - err = mlx5e_page_alloc_pool(rq, au); + err = mlx5e_page_alloc_fragmented(rq, frag_page); if (unlikely(err)) goto err_unmap; - addr = page_pool_get_dma_addr(au->page); + addr = page_pool_get_dma_addr(frag_page->page); umr_wqe->inline_mtts[i] = (struct mlx5_mtt) { .ptag = cpu_to_be64(addr | MLX5_EN_WR), }; @@ -735,7 +782,7 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) sizeof(*umr_wqe->inline_mtts) * pad); } - bitmap_zero(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe); + bitmap_zero(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe); wi->consumed_strides = 0; umr_wqe->ctrl.opmod_idx_opcode = @@ -759,8 +806,8 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) err_unmap: while (--i >= 0) { - au--; - mlx5e_page_release_dynamic(rq, au->page, true); + frag_page--; + mlx5e_page_release_fragmented(rq, frag_page); } err: @@ -778,8 +825,8 @@ err: void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close) { struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; + struct mlx5e_frag_page *deleted_page = NULL; int hd_per_wq = shampo->hd_per_wq; - struct page *deleted_page = NULL; struct mlx5e_dma_info *hd_info; int i, index = start; @@ -792,10 +839,12 @@ void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close hd_info = &shampo->info[index]; hd_info->addr = ALIGN_DOWN(hd_info->addr, PAGE_SIZE); - if (hd_info->page != deleted_page) { - deleted_page = hd_info->page; - mlx5e_page_release_dynamic(rq, hd_info->page, false); + if (hd_info->frag_page && hd_info->frag_page != deleted_page) { + deleted_page = hd_info->frag_page; + mlx5e_page_release_fragmented(rq, hd_info->frag_page); } + + hd_info->frag_page = NULL; } if (start + len > hd_per_wq) { @@ -810,8 +859,13 @@ void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close static void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) { struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix); - /* Don't recycle, this function is called on rq/netdev close */ - mlx5e_free_rx_mpwqe(rq, wi, false); + /* This function is called on rq/netdev close. */ + mlx5e_free_rx_mpwqe(rq, wi); + + /* Avoid a second release of the wqe pages: dealloc is called also + * for missing wqes on an already flushed RQ. + */ + bitmap_fill(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe); } INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) @@ -838,17 +892,20 @@ INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) */ wqe_bulk -= (head + wqe_bulk) & rq->wqe.info.wqe_index_mask; - if (!rq->xsk_pool) - count = mlx5e_alloc_rx_wqes(rq, head, wqe_bulk); - else if (likely(!rq->xsk_pool->dma_need_sync)) + if (!rq->xsk_pool) { + count = mlx5e_refill_rx_wqes(rq, head, wqe_bulk); + } else if (likely(!rq->xsk_pool->dma_need_sync)) { + mlx5e_xsk_free_rx_wqes(rq, head, wqe_bulk); count = mlx5e_xsk_alloc_rx_wqes_batched(rq, head, wqe_bulk); - else + } else { + mlx5e_xsk_free_rx_wqes(rq, head, wqe_bulk); /* If dma_need_sync is true, it's more efficient to call * xsk_buff_alloc in a loop, rather than xsk_buff_alloc_batch, * because the latter does the same check and returns only one * frame. */ count = mlx5e_xsk_alloc_rx_wqes(rq, head, wqe_bulk); + } mlx5_wq_cyc_push_n(wq, count); if (unlikely(count != wqe_bulk)) { @@ -1029,6 +1086,11 @@ INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq) head = rq->mpwqe.actual_wq_head; i = missing; do { + struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, head); + + /* Deferred free for better page pool cache usage. */ + mlx5e_free_rx_mpwqe(rq, wi); + alloc_err = rq->xsk_pool ? mlx5e_xsk_alloc_rx_mpwqe(rq, head) : mlx5e_alloc_rx_mpwqe(rq, head); @@ -1133,7 +1195,7 @@ static void *mlx5e_shampo_get_packet_hd(struct mlx5e_rq *rq, u16 header_index) struct mlx5e_dma_info *last_head = &rq->mpwqe.shampo->info[header_index]; u16 head_offset = (last_head->addr & (PAGE_SIZE - 1)) + rq->buff.headroom; - return page_address(last_head->page) + head_offset; + return page_address(last_head->frag_page->page) + head_offset; } static void mlx5e_shampo_update_ipv4_udp_hdr(struct mlx5e_rq *rq, struct iphdr *ipv4) @@ -1573,10 +1635,10 @@ struct sk_buff *mlx5e_build_linear_skb(struct mlx5e_rq *rq, void *va, } static void mlx5e_fill_mxbuf(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, - void *va, u16 headroom, u32 len, + void *va, u16 headroom, u32 frame_sz, u32 len, struct mlx5e_xdp_buff *mxbuf) { - xdp_init_buff(&mxbuf->xdp, rq->buff.frame0_sz, &rq->xdp_rxq); + xdp_init_buff(&mxbuf->xdp, frame_sz, &rq->xdp_rxq); xdp_prepare_buff(&mxbuf->xdp, va, headroom, len, true); mxbuf->cqe = cqe; mxbuf->rq = rq; @@ -1586,7 +1648,7 @@ static struct sk_buff * mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi, struct mlx5_cqe64 *cqe, u32 cqe_bcnt) { - union mlx5e_alloc_unit *au = wi->au; + struct mlx5e_frag_page *frag_page = wi->frag_page; u16 rx_headroom = rq->buff.headroom; struct bpf_prog *prog; struct sk_buff *skb; @@ -1595,11 +1657,11 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi, dma_addr_t addr; u32 frag_size; - va = page_address(au->page) + wi->offset; + va = page_address(frag_page->page) + wi->offset; data = va + rx_headroom; frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); - addr = page_pool_get_dma_addr(au->page); + addr = page_pool_get_dma_addr(frag_page->page); dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset, frag_size, rq->buff.map_dir); net_prefetch(data); @@ -1609,7 +1671,8 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi, struct mlx5e_xdp_buff mxbuf; net_prefetchw(va); /* xdp_frame data area */ - mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, cqe_bcnt, &mxbuf); + mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, rq->buff.frame0_sz, + cqe_bcnt, &mxbuf); if (mlx5e_xdp_handle(rq, prog, &mxbuf)) return NULL; /* page/packet was consumed by XDP */ @@ -1623,7 +1686,8 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi, return NULL; /* queue up for recycling/reuse */ - page_ref_inc(au->page); + skb_mark_for_recycle(skb); + frag_page->frags++; return skb; } @@ -1634,8 +1698,8 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi { struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0]; struct mlx5e_wqe_frag_info *head_wi = wi; - union mlx5e_alloc_unit *au = wi->au; u16 rx_headroom = rq->buff.headroom; + struct mlx5e_frag_page *frag_page; struct skb_shared_info *sinfo; struct mlx5e_xdp_buff mxbuf; u32 frag_consumed_bytes; @@ -1645,16 +1709,19 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi u32 truesize; void *va; - va = page_address(au->page) + wi->offset; + frag_page = wi->frag_page; + + va = page_address(frag_page->page) + wi->offset; frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt); - addr = page_pool_get_dma_addr(au->page); + addr = page_pool_get_dma_addr(frag_page->page); dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset, rq->buff.frame0_sz, rq->buff.map_dir); net_prefetchw(va); /* xdp_frame data area */ net_prefetch(va + rx_headroom); - mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, frag_consumed_bytes, &mxbuf); + mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, rq->buff.frame0_sz, + frag_consumed_bytes, &mxbuf); sinfo = xdp_get_shared_info_from_buff(&mxbuf.xdp); truesize = 0; @@ -1663,34 +1730,12 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi wi++; while (cqe_bcnt) { - skb_frag_t *frag; - - au = wi->au; + frag_page = wi->frag_page; frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt); - addr = page_pool_get_dma_addr(au->page); - dma_sync_single_for_cpu(rq->pdev, addr + wi->offset, - frag_consumed_bytes, rq->buff.map_dir); - - if (!xdp_buff_has_frags(&mxbuf.xdp)) { - /* Init on the first fragment to avoid cold cache access - * when possible. - */ - sinfo->nr_frags = 0; - sinfo->xdp_frags_size = 0; - xdp_buff_set_frags_flag(&mxbuf.xdp); - } - - frag = &sinfo->frags[sinfo->nr_frags++]; - __skb_frag_set_page(frag, au->page); - skb_frag_off_set(frag, wi->offset); - skb_frag_size_set(frag, frag_consumed_bytes); - - if (page_is_pfmemalloc(au->page)) - xdp_buff_set_frag_pfmemalloc(&mxbuf.xdp); - - sinfo->xdp_frags_size += frag_consumed_bytes; + mlx5e_add_skb_shared_info_frag(rq, sinfo, &mxbuf.xdp, frag_page, + wi->offset, frag_consumed_bytes); truesize += frag_info->frag_stride; cqe_bcnt -= frag_consumed_bytes; @@ -1701,10 +1746,10 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi prog = rcu_dereference(rq->xdp_prog); if (prog && mlx5e_xdp_handle(rq, prog, &mxbuf)) { if (test_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { - int i; + struct mlx5e_wqe_frag_info *pwi; - for (i = wi - head_wi; i < rq->wqe.info.num_frags; i++) - mlx5e_put_rx_frag(rq, &head_wi[i], true); + for (pwi = head_wi; pwi < wi; pwi++) + pwi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); } return NULL; /* page/packet was consumed by XDP */ } @@ -1716,21 +1761,17 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi if (unlikely(!skb)) return NULL; - page_ref_inc(head_wi->au->page); + skb_mark_for_recycle(skb); + head_wi->frag_page->frags++; if (xdp_buff_has_frags(&mxbuf.xdp)) { - int i; - /* sinfo->nr_frags is reset by build_skb, calculate again. */ xdp_update_skb_shared_info(skb, wi - head_wi - 1, sinfo->xdp_frags_size, truesize, xdp_buff_is_frag_pfmemalloc(&mxbuf.xdp)); - for (i = 0; i < sinfo->nr_frags; i++) { - skb_frag_t *frag = &sinfo->frags[i]; - - page_ref_inc(skb_frag_page(frag)); - } + for (struct mlx5e_wqe_frag_info *pwi = head_wi + 1; pwi < wi; pwi++) + pwi->frag_page->frags++; } return skb; @@ -1768,7 +1809,7 @@ static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { mlx5e_handle_rx_err_cqe(rq, cqe); - goto free_wqe; + goto wq_cyc_pop; } skb = INDIRECT_CALL_3(rq->wqe.skb_from_cqe, @@ -1782,9 +1823,9 @@ static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) /* do not return page to cache, * it will be returned on XDP_TX completion. */ - goto wq_cyc_pop; + wi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); } - goto free_wqe; + goto wq_cyc_pop; } mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); @@ -1792,13 +1833,11 @@ static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) if (mlx5e_cqe_regb_chain(cqe)) if (!mlx5e_tc_update_skb_nic(cqe, skb)) { dev_kfree_skb_any(skb); - goto free_wqe; + goto wq_cyc_pop; } napi_gro_receive(rq->cq.napi, skb); -free_wqe: - mlx5e_free_rx_wqe(rq, wi, true); wq_cyc_pop: mlx5_wq_cyc_pop(wq); } @@ -1822,7 +1861,7 @@ static void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { mlx5e_handle_rx_err_cqe(rq, cqe); - goto free_wqe; + goto wq_cyc_pop; } skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe, @@ -1835,9 +1874,9 @@ static void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) /* do not return page to cache, * it will be returned on XDP_TX completion. */ - goto wq_cyc_pop; + wi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); } - goto free_wqe; + goto wq_cyc_pop; } mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); @@ -1847,8 +1886,6 @@ static void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) mlx5e_rep_tc_receive(cqe, rq, skb); -free_wqe: - mlx5e_free_rx_wqe(rq, wi, true); wq_cyc_pop: mlx5_wq_cyc_pop(wq); } @@ -1901,7 +1938,6 @@ mpwrq_cqe_out: wq = &rq->mpwqe.wq; wqe = mlx5_wq_ll_get_wqe(wq, wqe_id); - mlx5e_free_rx_mpwqe(rq, wi, true); mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index); } @@ -1913,7 +1949,8 @@ const struct mlx5e_rx_handlers mlx5e_rx_handlers_rep = { static void mlx5e_fill_skb_data(struct sk_buff *skb, struct mlx5e_rq *rq, - union mlx5e_alloc_unit *au, u32 data_bcnt, u32 data_offset) + struct mlx5e_frag_page *frag_page, + u32 data_bcnt, u32 data_offset) { net_prefetchw(skb->data); @@ -1927,12 +1964,13 @@ mlx5e_fill_skb_data(struct sk_buff *skb, struct mlx5e_rq *rq, else truesize = ALIGN(pg_consumed_bytes, BIT(rq->mpwqe.log_stride_sz)); - mlx5e_add_skb_frag(rq, skb, au, data_offset, + frag_page->frags++; + mlx5e_add_skb_frag(rq, skb, frag_page->page, data_offset, pg_consumed_bytes, truesize); data_bcnt -= pg_consumed_bytes; data_offset = 0; - au++; + frag_page++; } } @@ -1941,37 +1979,142 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w struct mlx5_cqe64 *cqe, u16 cqe_bcnt, u32 head_offset, u32 page_idx) { - union mlx5e_alloc_unit *au = &wi->alloc_units[page_idx]; + struct mlx5e_frag_page *frag_page = &wi->alloc_units.frag_pages[page_idx]; u16 headlen = min_t(u16, MLX5E_RX_MAX_HEAD, cqe_bcnt); - u32 frag_offset = head_offset + headlen; - u32 byte_cnt = cqe_bcnt - headlen; - union mlx5e_alloc_unit *head_au = au; + struct mlx5e_frag_page *head_page = frag_page; + u32 frag_offset = head_offset; + u32 byte_cnt = cqe_bcnt; + struct skb_shared_info *sinfo; + struct mlx5e_xdp_buff mxbuf; + unsigned int truesize = 0; + struct bpf_prog *prog; struct sk_buff *skb; - dma_addr_t addr; + u32 linear_frame_sz; + u16 linear_data_len; + u16 linear_hr; + void *va; - skb = napi_alloc_skb(rq->cq.napi, - ALIGN(MLX5E_RX_MAX_HEAD, sizeof(long))); - if (unlikely(!skb)) { - rq->stats->buff_alloc_err++; - return NULL; + prog = rcu_dereference(rq->xdp_prog); + + if (prog) { + /* area for bpf_xdp_[store|load]_bytes */ + net_prefetchw(page_address(frag_page->page) + frag_offset); + if (unlikely(mlx5e_page_alloc_fragmented(rq, &wi->linear_page))) { + rq->stats->buff_alloc_err++; + return NULL; + } + va = page_address(wi->linear_page.page); + net_prefetchw(va); /* xdp_frame data area */ + linear_hr = XDP_PACKET_HEADROOM; + linear_data_len = 0; + linear_frame_sz = MLX5_SKB_FRAG_SZ(linear_hr + MLX5E_RX_MAX_HEAD); + } else { + skb = napi_alloc_skb(rq->cq.napi, + ALIGN(MLX5E_RX_MAX_HEAD, sizeof(long))); + if (unlikely(!skb)) { + rq->stats->buff_alloc_err++; + return NULL; + } + skb_mark_for_recycle(skb); + va = skb->head; + net_prefetchw(va); /* xdp_frame data area */ + net_prefetchw(skb->data); + + frag_offset += headlen; + byte_cnt -= headlen; + linear_hr = skb_headroom(skb); + linear_data_len = headlen; + linear_frame_sz = MLX5_SKB_FRAG_SZ(skb_end_offset(skb)); + if (unlikely(frag_offset >= PAGE_SIZE)) { + frag_page++; + frag_offset -= PAGE_SIZE; + } } - net_prefetchw(skb->data); + mlx5e_fill_mxbuf(rq, cqe, va, linear_hr, linear_frame_sz, linear_data_len, &mxbuf); + + sinfo = xdp_get_shared_info_from_buff(&mxbuf.xdp); + + while (byte_cnt) { + /* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */ + u32 pg_consumed_bytes = min_t(u32, PAGE_SIZE - frag_offset, byte_cnt); - /* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */ - if (unlikely(frag_offset >= PAGE_SIZE)) { - au++; - frag_offset -= PAGE_SIZE; + if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) + truesize += pg_consumed_bytes; + else + truesize += ALIGN(pg_consumed_bytes, BIT(rq->mpwqe.log_stride_sz)); + + mlx5e_add_skb_shared_info_frag(rq, sinfo, &mxbuf.xdp, frag_page, frag_offset, + pg_consumed_bytes); + byte_cnt -= pg_consumed_bytes; + frag_offset = 0; + frag_page++; } - mlx5e_fill_skb_data(skb, rq, au, byte_cnt, frag_offset); - /* copy header */ - addr = page_pool_get_dma_addr(head_au->page); - mlx5e_copy_skb_header(rq, skb, head_au->page, addr, - head_offset, head_offset, headlen); - /* skb linear part was allocated with headlen and aligned to long */ - skb->tail += headlen; - skb->len += headlen; + if (prog) { + if (mlx5e_xdp_handle(rq, prog, &mxbuf)) { + if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { + int i; + + for (i = 0; i < sinfo->nr_frags; i++) + /* non-atomic */ + __set_bit(page_idx + i, wi->skip_release_bitmap); + return NULL; + } + mlx5e_page_release_fragmented(rq, &wi->linear_page); + return NULL; /* page/packet was consumed by XDP */ + } + + skb = mlx5e_build_linear_skb(rq, mxbuf.xdp.data_hard_start, + linear_frame_sz, + mxbuf.xdp.data - mxbuf.xdp.data_hard_start, 0, + mxbuf.xdp.data - mxbuf.xdp.data_meta); + if (unlikely(!skb)) { + mlx5e_page_release_fragmented(rq, &wi->linear_page); + return NULL; + } + + skb_mark_for_recycle(skb); + wi->linear_page.frags++; + mlx5e_page_release_fragmented(rq, &wi->linear_page); + + if (xdp_buff_has_frags(&mxbuf.xdp)) { + struct mlx5e_frag_page *pagep; + + /* sinfo->nr_frags is reset by build_skb, calculate again. */ + xdp_update_skb_shared_info(skb, frag_page - head_page, + sinfo->xdp_frags_size, truesize, + xdp_buff_is_frag_pfmemalloc(&mxbuf.xdp)); + + pagep = head_page; + do + pagep->frags++; + while (++pagep < frag_page); + } + __pskb_pull_tail(skb, headlen); + } else { + dma_addr_t addr; + + if (xdp_buff_has_frags(&mxbuf.xdp)) { + struct mlx5e_frag_page *pagep; + + xdp_update_skb_shared_info(skb, sinfo->nr_frags, + sinfo->xdp_frags_size, truesize, + xdp_buff_is_frag_pfmemalloc(&mxbuf.xdp)); + + pagep = frag_page - sinfo->nr_frags; + do + pagep->frags++; + while (++pagep < frag_page); + } + /* copy header */ + addr = page_pool_get_dma_addr(head_page->page); + mlx5e_copy_skb_header(rq, skb, head_page->page, addr, + head_offset, head_offset, headlen); + /* skb linear part was allocated with headlen and aligned to long */ + skb->tail += headlen; + skb->len += headlen; + } return skb; } @@ -1981,7 +2124,7 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, struct mlx5_cqe64 *cqe, u16 cqe_bcnt, u32 head_offset, u32 page_idx) { - union mlx5e_alloc_unit *au = &wi->alloc_units[page_idx]; + struct mlx5e_frag_page *frag_page = &wi->alloc_units.frag_pages[page_idx]; u16 rx_headroom = rq->buff.headroom; struct bpf_prog *prog; struct sk_buff *skb; @@ -1996,11 +2139,11 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, return NULL; } - va = page_address(au->page) + head_offset; + va = page_address(frag_page->page) + head_offset; data = va + rx_headroom; frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); - addr = page_pool_get_dma_addr(au->page); + addr = page_pool_get_dma_addr(frag_page->page); dma_sync_single_range_for_cpu(rq->pdev, addr, head_offset, frag_size, rq->buff.map_dir); net_prefetch(data); @@ -2010,10 +2153,11 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, struct mlx5e_xdp_buff mxbuf; net_prefetchw(va); /* xdp_frame data area */ - mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, cqe_bcnt, &mxbuf); + mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, rq->buff.frame0_sz, + cqe_bcnt, &mxbuf); if (mlx5e_xdp_handle(rq, prog, &mxbuf)) { if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) - __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */ + __set_bit(page_idx, wi->skip_release_bitmap); /* non-atomic */ return NULL; /* page/packet was consumed by XDP */ } @@ -2027,7 +2171,8 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, return NULL; /* queue up for recycling/reuse */ - page_ref_inc(au->page); + skb_mark_for_recycle(skb); + frag_page->frags++; return skb; } @@ -2044,7 +2189,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, void *hdr, *data; u32 frag_size; - hdr = page_address(head->page) + head_offset; + hdr = page_address(head->frag_page->page) + head_offset; data = hdr + rx_headroom; frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + head_size); @@ -2058,9 +2203,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, if (unlikely(!skb)) return NULL; - /* queue up for recycling/reuse */ - page_ref_inc(head->page); - + head->frag_page->frags++; } else { /* allocate SKB and copy header for large header */ rq->stats->gro_large_hds++; @@ -2072,13 +2215,17 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, } prefetchw(skb->data); - mlx5e_copy_skb_header(rq, skb, head->page, head->addr, + mlx5e_copy_skb_header(rq, skb, head->frag_page->page, head->addr, head_offset + rx_headroom, rx_headroom, head_size); /* skb linear part was allocated with headlen and aligned to long */ skb->tail += head_size; skb->len += head_size; } + + /* queue up for recycling/reuse */ + skb_mark_for_recycle(skb); + return skb; } @@ -2123,8 +2270,10 @@ mlx5e_free_rx_shampo_hd_entry(struct mlx5e_rq *rq, u16 header_index) u64 addr = shampo->info[header_index].addr; if (((header_index + 1) & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) == 0) { - shampo->info[header_index].addr = ALIGN_DOWN(addr, PAGE_SIZE); - mlx5e_page_release_dynamic(rq, shampo->info[header_index].page, true); + struct mlx5e_dma_info *dma_info = &shampo->info[header_index]; + + dma_info->addr = ALIGN_DOWN(addr, PAGE_SIZE); + mlx5e_page_release_fragmented(rq, dma_info->frag_page); } bitmap_clear(shampo->bitmap, header_index, 1); } @@ -2145,7 +2294,6 @@ static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cq bool match = cqe->shampo.match; struct mlx5e_rq_stats *stats = rq->stats; struct mlx5e_rx_wqe_ll *wqe; - union mlx5e_alloc_unit *au; struct mlx5e_mpw_info *wi; struct mlx5_wq_ll *wq; @@ -2195,8 +2343,10 @@ static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cq } if (likely(head_size)) { - au = &wi->alloc_units[page_idx]; - mlx5e_fill_skb_data(*skb, rq, au, data_bcnt, data_offset); + struct mlx5e_frag_page *frag_page; + + frag_page = &wi->alloc_units.frag_pages[page_idx]; + mlx5e_fill_skb_data(*skb, rq, frag_page, data_bcnt, data_offset); } mlx5e_shampo_complete_rx_cqe(rq, cqe, cqe_bcnt, *skb); @@ -2210,7 +2360,6 @@ mpwrq_cqe_out: wq = &rq->mpwqe.wq; wqe = mlx5_wq_ll_get_wqe(wq, wqe_id); - mlx5e_free_rx_mpwqe(rq, wi, true); mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index); } @@ -2270,7 +2419,6 @@ mpwrq_cqe_out: wq = &rq->mpwqe.wq; wqe = mlx5_wq_ll_get_wqe(wq, wqe_id); - mlx5e_free_rx_mpwqe(rq, wi, true); mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index); } @@ -2489,7 +2637,7 @@ static void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { rq->stats->wqe_err++; - goto wq_free_wqe; + goto wq_cyc_pop; } skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe, @@ -2497,17 +2645,16 @@ static void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) mlx5e_skb_from_cqe_nonlinear, rq, wi, cqe, cqe_bcnt); if (!skb) - goto wq_free_wqe; + goto wq_cyc_pop; mlx5i_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); if (unlikely(!skb->dev)) { dev_kfree_skb_any(skb); - goto wq_free_wqe; + goto wq_cyc_pop; } napi_gro_receive(rq->cq.napi, skb); -wq_free_wqe: - mlx5e_free_rx_wqe(rq, wi, true); +wq_cyc_pop: mlx5_wq_cyc_pop(wq); } @@ -2582,12 +2729,12 @@ static void mlx5e_trap_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { rq->stats->wqe_err++; - goto free_wqe; + goto wq_cyc_pop; } skb = mlx5e_skb_from_cqe_nonlinear(rq, wi, cqe, cqe_bcnt); if (!skb) - goto free_wqe; + goto wq_cyc_pop; mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); skb_push(skb, ETH_HLEN); @@ -2596,8 +2743,7 @@ static void mlx5e_trap_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe rq->netdev->devlink_port); dev_kfree_skb_any(skb); -free_wqe: - mlx5e_free_rx_wqe(rq, wi, false); +wq_cyc_pop: mlx5_wq_cyc_pop(wq); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 4478223c1720..f1d9596905c6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -179,11 +179,6 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_reuse) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_full) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_empty) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_busy) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_waive) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_congst_umr) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_arfs_err) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_recover) }, @@ -358,11 +353,6 @@ static void mlx5e_stats_grp_sw_update_stats_rq_stats(struct mlx5e_sw_stats *s, s->rx_buff_alloc_err += rq_stats->buff_alloc_err; s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks; s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts; - s->rx_cache_reuse += rq_stats->cache_reuse; - s->rx_cache_full += rq_stats->cache_full; - s->rx_cache_empty += rq_stats->cache_empty; - s->rx_cache_busy += rq_stats->cache_busy; - s->rx_cache_waive += rq_stats->cache_waive; s->rx_congst_umr += rq_stats->congst_umr; s->rx_arfs_err += rq_stats->arfs_err; s->rx_recover += rq_stats->recover; @@ -1978,11 +1968,6 @@ static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_reuse) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_full) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_empty) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_busy) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_waive) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, congst_umr) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, arfs_err) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, recover) }, @@ -2163,11 +2148,6 @@ static const struct counter_desc ptp_rq_stats_desc[] = { { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, - { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_reuse) }, - { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_full) }, - { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_empty) }, - { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_busy) }, - { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_waive) }, { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, congst_umr) }, { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, arfs_err) }, { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, recover) }, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index b77100b60b50..1ff8a06027dc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -193,11 +193,6 @@ struct mlx5e_sw_stats { u64 rx_buff_alloc_err; u64 rx_cqe_compress_blks; u64 rx_cqe_compress_pkts; - u64 rx_cache_reuse; - u64 rx_cache_full; - u64 rx_cache_empty; - u64 rx_cache_busy; - u64 rx_cache_waive; u64 rx_congst_umr; u64 rx_arfs_err; u64 rx_recover; @@ -362,11 +357,6 @@ struct mlx5e_rq_stats { u64 buff_alloc_err; u64 cqe_compress_blks; u64 cqe_compress_pkts; - u64 cache_reuse; - u64 cache_full; - u64 cache_empty; - u64 cache_busy; - u64 cache_waive; u64 congst_umr; u64 arfs_err; u64 recover; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 87a2850b32d0..728b82ce4031 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -44,6 +44,7 @@ #include <net/bareudp.h> #include <net/bonding.h> #include <net/dst_metadata.h> +#include "devlink.h" #include "en.h" #include "en/tc/post_act.h" #include "en/tc/act_stats.h" @@ -73,12 +74,6 @@ #define MLX5E_TC_TABLE_NUM_GROUPS 4 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18) -struct mlx5e_hairpin_params { - struct mlx5_core_dev *mdev; - u32 num_queues; - u32 queue_size; -}; - struct mlx5e_tc_table { /* Protects the dynamic assignment of the t parameter * which is the nic tc root table. @@ -101,7 +96,6 @@ struct mlx5e_tc_table { struct mlx5_tc_ct_priv *ct; struct mapping_ctx *mapping; - struct mlx5e_hairpin_params hairpin_params; struct dentry *dfs_root; /* tc action stats */ @@ -183,7 +177,8 @@ static struct lock_class_key tc_ht_wq_key; static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow); static void free_flow_post_acts(struct mlx5e_tc_flow *flow); -static void mlx5_free_flow_attr(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr); +static void mlx5_free_flow_attr_actions(struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr); void mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec, @@ -493,15 +488,6 @@ mlx5e_tc_rule_offload(struct mlx5e_priv *priv, struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; int err; - if (attr->flags & MLX5_ATTR_FLAG_CT) { - struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts = - &attr->parse_attr->mod_hdr_acts; - - return mlx5_tc_ct_flow_offload(get_ct_priv(priv), - spec, attr, - mod_hdr_acts); - } - if (!is_mdev_switchdev_mode(priv->mdev)) return mlx5e_add_offloaded_nic_rule(priv, spec, attr); @@ -524,11 +510,6 @@ mlx5e_tc_rule_unoffload(struct mlx5e_priv *priv, { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - if (attr->flags & MLX5_ATTR_FLAG_CT) { - mlx5_tc_ct_delete_flow(get_ct_priv(priv), attr); - return; - } - if (!is_mdev_switchdev_mode(priv->mdev)) { mlx5e_del_offloaded_nic_rule(priv, rule, attr); return; @@ -589,6 +570,7 @@ struct mlx5e_hairpin { struct mlx5e_tir direct_tir; int num_channels; + u8 log_num_packets; struct mlx5e_rqt indir_rqt; struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS]; struct mlx5_ttc_table *ttc; @@ -935,6 +917,7 @@ mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params hp->func_mdev = func_mdev; hp->func_priv = priv; hp->num_channels = params->num_channels; + hp->log_num_packets = params->log_num_packets; err = mlx5e_hairpin_create_transport(hp); if (err) @@ -1076,9 +1059,11 @@ static int debugfs_hairpin_table_dump_show(struct seq_file *file, void *priv) mutex_lock(&tc->hairpin_tbl_lock); hash_for_each(tc->hairpin_tbl, bkt, hpe, hairpin_hlist) - seq_printf(file, "Hairpin peer_vhca_id %u prio %u refcnt %u\n", + seq_printf(file, + "Hairpin peer_vhca_id %u prio %u refcnt %u num_channels %u num_packets %lu\n", hpe->peer_vhca_id, hpe->prio, - refcount_read(&hpe->refcnt)); + refcount_read(&hpe->refcnt), hpe->hp->num_channels, + BIT(hpe->hp->log_num_packets)); mutex_unlock(&tc->hairpin_tbl_lock); return 0; @@ -1099,33 +1084,15 @@ static void mlx5e_tc_debugfs_init(struct mlx5e_tc_table *tc, &debugfs_hairpin_table_dump_fops); } -static void -mlx5e_hairpin_params_init(struct mlx5e_hairpin_params *hairpin_params, - struct mlx5_core_dev *mdev) -{ - u32 link_speed = 0; - u64 link_speed64; - - hairpin_params->mdev = mdev; - /* set hairpin pair per each 50Gbs share of the link */ - mlx5e_port_max_linkspeed(mdev, &link_speed); - link_speed = max_t(u32, link_speed, 50000); - link_speed64 = link_speed; - do_div(link_speed64, 50000); - hairpin_params->num_queues = link_speed64; - - hairpin_params->queue_size = - BIT(min_t(u32, 16 - MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev), - MLX5_CAP_GEN(mdev, log_max_hairpin_num_packets))); -} - static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct mlx5e_tc_flow_parse_attr *parse_attr, struct netlink_ext_ack *extack) { struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); + struct devlink *devlink = priv_to_devlink(priv->mdev); int peer_ifindex = parse_attr->mirred_ifindex[0]; + union devlink_param_value val = {}; struct mlx5_hairpin_params params; struct mlx5_core_dev *peer_mdev; struct mlx5e_hairpin_entry *hpe; @@ -1182,7 +1149,14 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, hash_hairpin_info(peer_id, match_prio)); mutex_unlock(&tc->hairpin_tbl_lock); - params.log_num_packets = ilog2(tc->hairpin_params.queue_size); + err = devl_param_driverinit_value_get( + devlink, MLX5_DEVLINK_PARAM_ID_HAIRPIN_QUEUE_SIZE, &val); + if (err) { + err = -ENOMEM; + goto out_err; + } + + params.log_num_packets = ilog2(val.vu32); params.log_data_size = clamp_t(u32, params.log_num_packets + @@ -1191,7 +1165,14 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz)); params.q_counter = priv->q_counter; - params.num_channels = tc->hairpin_params.num_queues; + err = devl_param_driverinit_value_get( + devlink, MLX5_DEVLINK_PARAM_ID_HAIRPIN_NUM_QUEUES, &val); + if (err) { + err = -ENOMEM; + goto out_err; + } + + params.num_channels = val.vu32; hp = mlx5e_hairpin_create(priv, ¶ms, peer_ifindex); hpe->hp = hp; @@ -1401,13 +1382,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, return err; } - if (attr->flags & MLX5_ATTR_FLAG_CT) - flow->rule[0] = mlx5_tc_ct_flow_offload(get_ct_priv(priv), &parse_attr->spec, - attr, &parse_attr->mod_hdr_acts); - else - flow->rule[0] = mlx5e_add_offloaded_nic_rule(priv, &parse_attr->spec, - attr); - + flow->rule[0] = mlx5e_add_offloaded_nic_rule(priv, &parse_attr->spec, attr); return PTR_ERR_OR_ZERO(flow->rule[0]); } @@ -1438,9 +1413,7 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, flow_flag_clear(flow, OFFLOADED); - if (attr->flags & MLX5_ATTR_FLAG_CT) - mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), attr); - else if (!IS_ERR_OR_NULL(flow->rule[0])) + if (!IS_ERR_OR_NULL(flow->rule[0])) mlx5e_del_offloaded_nic_rule(priv, flow->rule[0], attr); /* Remove root table if no rules are left to avoid @@ -1791,8 +1764,7 @@ out: static void clean_encap_dests(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, - struct mlx5_flow_attr *attr, - bool *vf_tun) + struct mlx5_flow_attr *attr) { struct mlx5_esw_flow_attr *esw_attr; int out_index; @@ -1801,17 +1773,11 @@ clean_encap_dests(struct mlx5e_priv *priv, return; esw_attr = attr->esw_attr; - *vf_tun = false; for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)) continue; - if (esw_attr->dests[out_index].flags & - MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE && - !esw_attr->dest_int_port) - *vf_tun = true; - mlx5e_detach_encap(priv, flow, attr, out_index); kfree(attr->parse_attr->tun_info[out_index]); } @@ -2034,7 +2000,7 @@ static void free_branch_attr(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr * if (!attr) return; - mlx5_free_flow_attr(flow, attr); + mlx5_free_flow_attr_actions(flow, attr); kvfree(attr->parse_attr); kfree(attr); } @@ -2045,7 +2011,6 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_flow_attr *attr = flow->attr; struct mlx5_esw_flow_attr *esw_attr; - bool vf_tun; esw_attr = attr->esw_attr; mlx5e_put_flow_tunnel_id(flow); @@ -2067,18 +2032,8 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, if (flow->decap_route) mlx5e_detach_decap_route(priv, flow); - clean_encap_dests(priv, flow, attr, &vf_tun); - mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr); - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { - mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts); - mlx5e_tc_detach_mod_hdr(priv, flow, attr); - } - - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) - mlx5_fc_destroy(esw_attr->counter_dev, attr->counter); - if (esw_attr->int_port) mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->int_port); @@ -2091,8 +2046,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, mlx5e_tc_act_stats_del_flow(get_act_stats_handle(priv), flow); free_flow_post_acts(flow); - free_branch_attr(flow, attr->branch_true); - free_branch_attr(flow, attr->branch_false); + mlx5_free_flow_attr_actions(flow, attr); kvfree(attr->esw_attr->rx_tun_attr); kvfree(attr->parse_attr); @@ -3469,114 +3423,59 @@ struct ipv6_hoplimit_word { }; static bool -is_action_keys_supported(const struct flow_action_entry *act, bool ct_flow, - bool *modify_ip_header, bool *modify_tuple, - struct netlink_ext_ack *extack) +is_flow_action_modify_ip_header(struct flow_action *flow_action) { + const struct flow_action_entry *act; u32 mask, offset; u8 htype; + int i; - htype = act->mangle.htype; - offset = act->mangle.offset; - mask = ~act->mangle.mask; /* For IPv4 & IPv6 header check 4 byte word, * to determine that modified fields * are NOT ttl & hop_limit only. */ - if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4) { - struct ip_ttl_word *ttl_word = - (struct ip_ttl_word *)&mask; - - if (offset != offsetof(struct iphdr, ttl) || - ttl_word->protocol || - ttl_word->check) { - *modify_ip_header = true; - } - - if (offset >= offsetof(struct iphdr, saddr)) - *modify_tuple = true; - - if (ct_flow && *modify_tuple) { - NL_SET_ERR_MSG_MOD(extack, - "can't offload re-write of ipv4 address with action ct"); - return false; - } - } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) { - struct ipv6_hoplimit_word *hoplimit_word = - (struct ipv6_hoplimit_word *)&mask; - - if (offset != offsetof(struct ipv6hdr, payload_len) || - hoplimit_word->payload_len || - hoplimit_word->nexthdr) { - *modify_ip_header = true; - } - - if (ct_flow && offset >= offsetof(struct ipv6hdr, saddr)) - *modify_tuple = true; + flow_action_for_each(i, act, flow_action) { + if (act->id != FLOW_ACTION_MANGLE && + act->id != FLOW_ACTION_ADD) + continue; - if (ct_flow && *modify_tuple) { - NL_SET_ERR_MSG_MOD(extack, - "can't offload re-write of ipv6 address with action ct"); - return false; - } - } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_TCP || - htype == FLOW_ACT_MANGLE_HDR_TYPE_UDP) { - *modify_tuple = true; - if (ct_flow) { - NL_SET_ERR_MSG_MOD(extack, - "can't offload re-write of transport header ports with action ct"); - return false; + htype = act->mangle.htype; + offset = act->mangle.offset; + mask = ~act->mangle.mask; + + if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4) { + struct ip_ttl_word *ttl_word = + (struct ip_ttl_word *)&mask; + + if (offset != offsetof(struct iphdr, ttl) || + ttl_word->protocol || + ttl_word->check) + return true; + } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) { + struct ipv6_hoplimit_word *hoplimit_word = + (struct ipv6_hoplimit_word *)&mask; + + if (offset != offsetof(struct ipv6hdr, payload_len) || + hoplimit_word->payload_len || + hoplimit_word->nexthdr) + return true; } } - return true; -} - -static bool modify_tuple_supported(bool modify_tuple, bool ct_clear, - bool ct_flow, struct netlink_ext_ack *extack, - struct mlx5e_priv *priv, - struct mlx5_flow_spec *spec) -{ - if (!modify_tuple || ct_clear) - return true; - - if (ct_flow) { - NL_SET_ERR_MSG_MOD(extack, - "can't offload tuple modification with non-clear ct()"); - netdev_info(priv->netdev, - "can't offload tuple modification with non-clear ct()"); - return false; - } - - /* Add ct_state=-trk match so it will be offloaded for non ct flows - * (or after clear action), as otherwise, since the tuple is changed, - * we can't restore ct state - */ - if (mlx5_tc_ct_add_no_trk_match(spec)) { - NL_SET_ERR_MSG_MOD(extack, - "can't offload tuple modification with ct matches and no ct(clear) action"); - netdev_info(priv->netdev, - "can't offload tuple modification with ct matches and no ct(clear) action"); - return false; - } - - return true; + return false; } static bool modify_header_match_supported(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, struct flow_action *flow_action, - u32 actions, bool ct_flow, - bool ct_clear, + u32 actions, struct netlink_ext_ack *extack) { - const struct flow_action_entry *act; - bool modify_ip_header, modify_tuple; + bool modify_ip_header; void *headers_c; void *headers_v; u16 ethertype; u8 ip_proto; - int i; headers_c = mlx5e_get_match_headers_criteria(actions, spec); headers_v = mlx5e_get_match_headers_value(actions, spec); @@ -3587,23 +3486,7 @@ static bool modify_header_match_supported(struct mlx5e_priv *priv, ethertype != ETH_P_IP && ethertype != ETH_P_IPV6) goto out_ok; - modify_ip_header = false; - modify_tuple = false; - flow_action_for_each(i, act, flow_action) { - if (act->id != FLOW_ACTION_MANGLE && - act->id != FLOW_ACTION_ADD) - continue; - - if (!is_action_keys_supported(act, ct_flow, - &modify_ip_header, - &modify_tuple, extack)) - return false; - } - - if (!modify_tuple_supported(modify_tuple, ct_clear, ct_flow, extack, - priv, spec)) - return false; - + modify_ip_header = is_flow_action_modify_ip_header(flow_action); ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol); if (modify_ip_header && ip_proto != IPPROTO_TCP && ip_proto != IPPROTO_UDP && ip_proto != IPPROTO_ICMP) { @@ -3624,19 +3507,6 @@ actions_match_supported_fdb(struct mlx5e_priv *priv, struct netlink_ext_ack *extack) { struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr; - bool ct_flow, ct_clear; - - ct_clear = flow->attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR; - ct_flow = flow_flag_test(flow, CT) && !ct_clear; - - if (esw_attr->split_count && ct_flow && - !MLX5_CAP_GEN(esw_attr->in_mdev, reg_c_preserve)) { - /* All registers used by ct are cleared when using - * split rules. - */ - NL_SET_ERR_MSG_MOD(extack, "Can't offload mirroring with action ct"); - return false; - } if (esw_attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) { NL_SET_ERR_MSG_MOD(extack, @@ -3657,14 +3527,9 @@ actions_match_supported(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { - bool ct_flow, ct_clear; - - ct_clear = flow->attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR; - ct_flow = flow_flag_test(flow, CT) && !ct_clear; - if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && - !modify_header_match_supported(priv, &parse_attr->spec, flow_action, - actions, ct_flow, ct_clear, extack)) + !modify_header_match_supported(priv, &parse_attr->spec, flow_action, actions, + extack)) return false; if (mlx5e_is_eswitch_flow(flow) && @@ -3758,6 +3623,7 @@ mlx5e_clone_flow_attr_for_post_act(struct mlx5_flow_attr *attr, attr2->dest_chain = 0; attr2->dest_ft = NULL; attr2->act_id_restore_rule = NULL; + memset(&attr2->ct_attr, 0, sizeof(attr2->ct_attr)); if (ns_type == MLX5_FLOW_NAMESPACE_FDB) { attr2->esw_attr->out_count = 0; @@ -3811,9 +3677,7 @@ free_flow_post_acts(struct mlx5e_tc_flow *flow) if (list_is_last(&attr->list, &flow->attrs)) break; - mlx5_free_flow_attr(flow, attr); - free_branch_attr(flow, attr->branch_true); - free_branch_attr(flow, attr->branch_false); + mlx5_free_flow_attr_actions(flow, attr); list_del(&attr->list); kvfree(attr->parse_attr); @@ -4071,76 +3935,79 @@ parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state, struct flow_action *flow_action) { struct netlink_ext_ack *extack = parse_state->extack; - struct mlx5e_tc_flow_action flow_action_reorder; struct mlx5e_tc_flow *flow = parse_state->flow; struct mlx5e_tc_jump_state jump_state = {}; struct mlx5_flow_attr *attr = flow->attr; enum mlx5_flow_namespace_type ns_type; struct mlx5e_priv *priv = flow->priv; - struct flow_action_entry *act, **_act; + struct mlx5_flow_attr *prev_attr; + struct flow_action_entry *act; struct mlx5e_tc_act *tc_act; + bool is_missable; int err, i; - flow_action_reorder.num_entries = flow_action->num_entries; - flow_action_reorder.entries = kcalloc(flow_action->num_entries, - sizeof(flow_action), GFP_KERNEL); - if (!flow_action_reorder.entries) - return -ENOMEM; - - mlx5e_tc_act_reorder_flow_actions(flow_action, &flow_action_reorder); - ns_type = mlx5e_get_flow_namespace(flow); list_add(&attr->list, &flow->attrs); - flow_action_for_each(i, _act, &flow_action_reorder) { + flow_action_for_each(i, act, flow_action) { jump_state.jump_target = false; - act = *_act; + is_missable = false; + prev_attr = attr; + tc_act = mlx5e_tc_act_get(act->id, ns_type); if (!tc_act) { NL_SET_ERR_MSG_MOD(extack, "Not implemented offload action"); err = -EOPNOTSUPP; - goto out_free; + goto out_free_post_acts; } - if (!tc_act->can_offload(parse_state, act, i, attr)) { + if (tc_act->can_offload && !tc_act->can_offload(parse_state, act, i, attr)) { err = -EOPNOTSUPP; - goto out_free; + goto out_free_post_acts; } err = tc_act->parse_action(parse_state, act, priv, attr); if (err) - goto out_free; + goto out_free_post_acts; dec_jump_count(act, tc_act, attr, priv, &jump_state); err = parse_branch_ctrl(act, tc_act, flow, attr, &jump_state, extack); if (err) - goto out_free; + goto out_free_post_acts; parse_state->actions |= attr->action; - if (!tc_act->stats_action) - attr->tc_act_cookies[attr->tc_act_cookies_count++] = act->cookie; /* Split attr for multi table act if not the last act. */ if (jump_state.jump_target || (tc_act->is_multi_table_act && tc_act->is_multi_table_act(priv, act, attr) && - i < flow_action_reorder.num_entries - 1)) { + i < flow_action->num_entries - 1)) { + is_missable = tc_act->is_missable ? tc_act->is_missable(act) : false; + err = mlx5e_tc_act_post_parse(parse_state, flow_action, attr, ns_type); if (err) - goto out_free; + goto out_free_post_acts; attr = mlx5e_clone_flow_attr_for_post_act(flow->attr, ns_type); if (!attr) { err = -ENOMEM; - goto out_free; + goto out_free_post_acts; } list_add(&attr->list, &flow->attrs); } - } - kfree(flow_action_reorder.entries); + if (is_missable) { + /* Add counter to prev, and assign act to new (next) attr */ + prev_attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + flow_flag_set(flow, USE_ACT_STATS); + + attr->tc_act_cookies[attr->tc_act_cookies_count++] = act->cookie; + } else if (!tc_act->stats_action) { + prev_attr->tc_act_cookies[prev_attr->tc_act_cookies_count++] = act->cookie; + } + } err = mlx5e_tc_act_post_parse(parse_state, flow_action, attr, ns_type); if (err) @@ -4152,8 +4019,6 @@ parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state, return 0; -out_free: - kfree(flow_action_reorder.entries); out_free_post_acts: free_flow_post_acts(flow); @@ -4448,10 +4313,9 @@ mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type) } static void -mlx5_free_flow_attr(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr) +mlx5_free_flow_attr_actions(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr) { struct mlx5_core_dev *counter_dev = get_flow_counter_dev(flow); - bool vf_tun; if (!attr) return; @@ -4459,7 +4323,7 @@ mlx5_free_flow_attr(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr) if (attr->post_act_handle) mlx5e_tc_post_act_del(get_post_action(flow->priv), attr->post_act_handle); - clean_encap_dests(flow->priv, flow, attr, &vf_tun); + clean_encap_dests(flow->priv, flow, attr); if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) mlx5_fc_destroy(counter_dev, attr->counter); @@ -4468,6 +4332,11 @@ mlx5_free_flow_attr(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr) mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts); mlx5e_tc_detach_mod_hdr(flow->priv, flow, attr); } + + mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), attr); + + free_branch_attr(flow, attr->branch_true); + free_branch_attr(flow, attr->branch_false); } static int @@ -4929,7 +4798,7 @@ int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, goto errout; } - if (mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, CT)) { + if (mlx5e_is_offloaded_flow(flow)) { if (flow_flag_test(flow, USE_ACT_STATS)) { f->use_act_stats = true; } else { @@ -5187,22 +5056,6 @@ static int mlx5e_tc_netdev_event(struct notifier_block *this, return NOTIFY_DONE; } -static int mlx5e_tc_nic_get_ft_size(struct mlx5_core_dev *dev) -{ - int tc_grp_size, tc_tbl_size; - u32 max_flow_counter; - - max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) | - MLX5_CAP_GEN(dev, max_flow_counter_15_0); - - tc_grp_size = min_t(int, max_flow_counter, MLX5E_TC_TABLE_MAX_GROUP_SIZE); - - tc_tbl_size = min_t(int, tc_grp_size * MLX5E_TC_TABLE_NUM_GROUPS, - BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev, log_max_ft_size))); - - return tc_tbl_size; -} - static int mlx5e_tc_nic_create_miss_table(struct mlx5e_priv *priv) { struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); @@ -5275,10 +5128,10 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv) attr.flags = MLX5_CHAINS_AND_PRIOS_SUPPORTED | MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED; attr.ns = MLX5_FLOW_NAMESPACE_KERNEL; - attr.max_ft_sz = mlx5e_tc_nic_get_ft_size(dev); attr.max_grp_num = MLX5E_TC_TABLE_NUM_GROUPS; attr.default_ft = tc->miss_t; attr.mapping = chains_mapping; + attr.fs_base_prio = MLX5E_TC_PRIO; tc->chains = mlx5_chains_create(dev, &attr); if (IS_ERR(tc->chains)) { @@ -5286,12 +5139,12 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv) goto err_miss; } + mlx5_chains_print_info(tc->chains); + tc->post_act = mlx5e_tc_post_act_init(priv, tc->chains, MLX5_FLOW_NAMESPACE_KERNEL); tc->ct = mlx5_tc_ct_init(priv, tc->chains, &tc->mod_hdr, MLX5_FLOW_NAMESPACE_KERNEL, tc->post_act); - mlx5e_hairpin_params_init(&tc->hairpin_params, dev); - tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event; err = register_netdevice_notifier_dev_net(priv->netdev, &tc->netdevice_nb, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 9a458a5d9853..a50bfda18e96 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -51,7 +51,7 @@ static void mlx5e_handle_tx_dim(struct mlx5e_txqsq *sq) struct mlx5e_sq_stats *stats = sq->stats; struct dim_sample dim_sample = {}; - if (unlikely(!test_bit(MLX5E_SQ_STATE_AM, &sq->state))) + if (unlikely(!test_bit(MLX5E_SQ_STATE_DIM, &sq->state))) return; dim_update_sample(sq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample); @@ -63,7 +63,7 @@ static void mlx5e_handle_rx_dim(struct mlx5e_rq *rq) struct mlx5e_rq_stats *stats = rq->stats; struct dim_sample dim_sample = {}; - if (unlikely(!test_bit(MLX5E_RQ_STATE_AM, &rq->state))) + if (unlikely(!test_bit(MLX5E_RQ_STATE_DIM, &rq->state))) return; dim_update_sample(rq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 38b32e98f3bd..1c35d721a31d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -18,6 +18,7 @@ #include "lib/clock.h" #include "diag/fw_tracer.h" #include "mlx5_irq.h" +#include "pci_irq.h" #include "devlink.h" #include "en_accel/ipsec.h" @@ -61,9 +62,7 @@ struct mlx5_eq_table { struct mlx5_irq_table *irq_table; struct mlx5_irq **comp_irqs; struct mlx5_irq *ctrl_irq; -#ifdef CONFIG_RFS_ACCEL struct cpu_rmap *rmap; -#endif }; #define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG) | \ @@ -637,6 +636,7 @@ static u16 async_eq_depth_devlink_param_get(struct mlx5_core_dev *dev) mlx5_core_dbg(dev, "Failed to get param. using default. err = %d\n", err); return MLX5_NUM_ASYNC_EQE; } + static int create_async_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; @@ -803,44 +803,28 @@ void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm) } EXPORT_SYMBOL(mlx5_eq_update_ci); -static void comp_irqs_release(struct mlx5_core_dev *dev) +static void comp_irqs_release_pci(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; - if (mlx5_core_is_sf(dev)) - mlx5_irq_affinity_irqs_release(dev, table->comp_irqs, table->num_comp_eqs); - else - mlx5_irqs_release_vectors(table->comp_irqs, table->num_comp_eqs); - kfree(table->comp_irqs); + mlx5_irqs_release_vectors(table->comp_irqs, table->num_comp_eqs); } -static int comp_irqs_request(struct mlx5_core_dev *dev) +static int comp_irqs_request_pci(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; const struct cpumask *prev = cpu_none_mask; const struct cpumask *mask; - int ncomp_eqs = table->num_comp_eqs; + int ncomp_eqs; u16 *cpus; int ret; int cpu; int i; ncomp_eqs = table->num_comp_eqs; - table->comp_irqs = kcalloc(ncomp_eqs, sizeof(*table->comp_irqs), GFP_KERNEL); - if (!table->comp_irqs) - return -ENOMEM; - if (mlx5_core_is_sf(dev)) { - ret = mlx5_irq_affinity_irqs_request_auto(dev, ncomp_eqs, table->comp_irqs); - if (ret < 0) - goto free_irqs; - return ret; - } - cpus = kcalloc(ncomp_eqs, sizeof(*cpus), GFP_KERNEL); - if (!cpus) { + if (!cpus) ret = -ENOMEM; - goto free_irqs; - } i = 0; rcu_read_lock(); @@ -854,17 +838,89 @@ static int comp_irqs_request(struct mlx5_core_dev *dev) } spread_done: rcu_read_unlock(); - ret = mlx5_irqs_request_vectors(dev, cpus, ncomp_eqs, table->comp_irqs); + ret = mlx5_irqs_request_vectors(dev, cpus, ncomp_eqs, table->comp_irqs, &table->rmap); kfree(cpus); - if (ret < 0) - goto free_irqs; return ret; +} + +static void comp_irqs_release_sf(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + + mlx5_irq_affinity_irqs_release(dev, table->comp_irqs, table->num_comp_eqs); +} + +static int comp_irqs_request_sf(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + int ncomp_eqs = table->num_comp_eqs; + + return mlx5_irq_affinity_irqs_request_auto(dev, ncomp_eqs, table->comp_irqs); +} + +static void comp_irqs_release(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + + mlx5_core_is_sf(dev) ? comp_irqs_release_sf(dev) : + comp_irqs_release_pci(dev); -free_irqs: kfree(table->comp_irqs); +} + +static int comp_irqs_request(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + int ncomp_eqs; + int ret; + + ncomp_eqs = table->num_comp_eqs; + table->comp_irqs = kcalloc(ncomp_eqs, sizeof(*table->comp_irqs), GFP_KERNEL); + if (!table->comp_irqs) + return -ENOMEM; + + ret = mlx5_core_is_sf(dev) ? comp_irqs_request_sf(dev) : + comp_irqs_request_pci(dev); + if (ret < 0) + kfree(table->comp_irqs); + return ret; } +#ifdef CONFIG_RFS_ACCEL +static int alloc_rmap(struct mlx5_core_dev *mdev) +{ + struct mlx5_eq_table *eq_table = mdev->priv.eq_table; + + /* rmap is a mapping between irq number and queue number. + * Each irq can be assigned only to a single rmap. + * Since SFs share IRQs, rmap mapping cannot function correctly + * for irqs that are shared between different core/netdev RX rings. + * Hence we don't allow netdev rmap for SFs. + */ + if (mlx5_core_is_sf(mdev)) + return 0; + + eq_table->rmap = alloc_irq_cpu_rmap(eq_table->num_comp_eqs); + if (!eq_table->rmap) + return -ENOMEM; + return 0; +} + +static void free_rmap(struct mlx5_core_dev *mdev) +{ + struct mlx5_eq_table *eq_table = mdev->priv.eq_table; + + if (eq_table->rmap) { + free_irq_cpu_rmap(eq_table->rmap); + eq_table->rmap = NULL; + } +} +#else +static int alloc_rmap(struct mlx5_core_dev *mdev) { return 0; } +static void free_rmap(struct mlx5_core_dev *mdev) {} +#endif + static void destroy_comp_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; @@ -880,6 +936,7 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev) kfree(eq); } comp_irqs_release(dev); + free_rmap(dev); } static u16 comp_eq_depth_devlink_param_get(struct mlx5_core_dev *dev) @@ -906,9 +963,16 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) int err; int i; + err = alloc_rmap(dev); + if (err) + return err; + ncomp_eqs = comp_irqs_request(dev); - if (ncomp_eqs < 0) - return ncomp_eqs; + if (ncomp_eqs < 0) { + err = ncomp_eqs; + goto err_irqs_req; + } + INIT_LIST_HEAD(&table->comp_eqs_list); nent = comp_eq_depth_devlink_param_get(dev); @@ -953,6 +1017,8 @@ clean_eq: kfree(eq); clean: destroy_comp_eqs(dev); +err_irqs_req: + free_rmap(dev); return err; } @@ -1004,10 +1070,11 @@ mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector) list_for_each_entry(eq, &table->comp_eqs_list, list) { if (i++ == vector) - break; + return mlx5_irq_get_affinity_mask(eq->core.irq); } - return mlx5_irq_get_affinity_mask(eq->core.irq); + WARN_ON_ONCE(1); + return NULL; } EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask); @@ -1031,55 +1098,12 @@ struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn) return ERR_PTR(-ENOENT); } -static void clear_rmap(struct mlx5_core_dev *dev) -{ -#ifdef CONFIG_RFS_ACCEL - struct mlx5_eq_table *eq_table = dev->priv.eq_table; - - free_irq_cpu_rmap(eq_table->rmap); -#endif -} - -static int set_rmap(struct mlx5_core_dev *mdev) -{ - int err = 0; -#ifdef CONFIG_RFS_ACCEL - struct mlx5_eq_table *eq_table = mdev->priv.eq_table; - int vecidx; - - eq_table->rmap = alloc_irq_cpu_rmap(eq_table->num_comp_eqs); - if (!eq_table->rmap) { - err = -ENOMEM; - mlx5_core_err(mdev, "Failed to allocate cpu_rmap. err %d", err); - goto err_out; - } - - for (vecidx = 0; vecidx < eq_table->num_comp_eqs; vecidx++) { - err = irq_cpu_rmap_add(eq_table->rmap, - pci_irq_vector(mdev->pdev, vecidx)); - if (err) { - mlx5_core_err(mdev, "irq_cpu_rmap_add failed. err %d", - err); - goto err_irq_cpu_rmap_add; - } - } - return 0; - -err_irq_cpu_rmap_add: - clear_rmap(mdev); -err_out: -#endif - return err; -} - /* This function should only be called after mlx5_cmd_force_teardown_hca */ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; mutex_lock(&table->lock); /* sync with create/destroy_async_eq */ - if (!mlx5_core_is_sf(dev)) - clear_rmap(dev); mlx5_irq_table_destroy(dev); mutex_unlock(&table->lock); } @@ -1090,44 +1114,47 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) #define MLX5_MAX_ASYNC_EQS 3 #endif -int mlx5_eq_table_create(struct mlx5_core_dev *dev) +static int get_num_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *eq_table = dev->priv.eq_table; - int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? + int max_dev_eqs; + int max_eqs_sf; + int num_eqs; + + /* If ethernet is disabled we use just a single completion vector to + * have the other vectors available for other drivers using mlx5_core. For + * example, mlx5_vdpa + */ + if (!mlx5_core_is_eth_enabled(dev) && mlx5_eth_supported(dev)) + return 1; + + max_dev_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? MLX5_CAP_GEN(dev, max_num_eqs) : 1 << MLX5_CAP_GEN(dev, log_max_eq); - int max_eqs_sf; - int err; - eq_table->num_comp_eqs = - min_t(int, - mlx5_irq_table_get_num_comp(eq_table->irq_table), - num_eqs - MLX5_MAX_ASYNC_EQS); + num_eqs = min_t(int, mlx5_irq_table_get_num_comp(eq_table->irq_table), + max_dev_eqs - MLX5_MAX_ASYNC_EQS); if (mlx5_core_is_sf(dev)) { max_eqs_sf = min_t(int, MLX5_COMP_EQS_PER_SF, mlx5_irq_table_get_sfs_vec(eq_table->irq_table)); - eq_table->num_comp_eqs = min_t(int, eq_table->num_comp_eqs, - max_eqs_sf); + num_eqs = min_t(int, num_eqs, max_eqs_sf); } + return num_eqs; +} + +int mlx5_eq_table_create(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *eq_table = dev->priv.eq_table; + int err; + + eq_table->num_comp_eqs = get_num_eqs(dev); err = create_async_eqs(dev); if (err) { mlx5_core_err(dev, "Failed to create async EQs\n"); goto err_async_eqs; } - if (!mlx5_core_is_sf(dev)) { - /* rmap is a mapping between irq number and queue number. - * each irq can be assign only to a single rmap. - * since SFs share IRQs, rmap mapping cannot function correctly - * for irqs that are shared for different core/netdev RX rings. - * Hence we don't allow netdev rmap for SFs - */ - err = set_rmap(dev); - if (err) - goto err_rmap; - } - err = create_comp_eqs(dev); if (err) { mlx5_core_err(dev, "Failed to create completion EQs\n"); @@ -1135,10 +1162,8 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev) } return 0; + err_comp_eqs: - if (!mlx5_core_is_sf(dev)) - clear_rmap(dev); -err_rmap: destroy_async_eqs(dev); err_async_eqs: return err; @@ -1146,8 +1171,6 @@ err_async_eqs: void mlx5_eq_table_destroy(struct mlx5_core_dev *dev) { - if (!mlx5_core_is_sf(dev)) - clear_rmap(dev); destroy_comp_eqs(dev); destroy_async_eqs(dev); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c index 3cdcb0e0b20f..1ba03e219111 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c @@ -13,66 +13,6 @@ #define CREATE_TRACE_POINTS #include "diag/bridge_tracepoint.h" -#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE 12000 -#define MLX5_ESW_BRIDGE_INGRESS_TABLE_UNTAGGED_GRP_SIZE 16000 -#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_FROM 0 -#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_TO \ - (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1) -#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_FROM \ - (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_TO + 1) -#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_TO \ - (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_FROM + \ - MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1) -#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_FROM \ - (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_TO + 1) -#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_TO \ - (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_FROM + \ - MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1) -#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_FROM \ - (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_TO + 1) -#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_TO \ - (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_FROM + \ - MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1) -#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_FROM \ - (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_TO + 1) -#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_TO \ - (MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_FROM + \ - MLX5_ESW_BRIDGE_INGRESS_TABLE_UNTAGGED_GRP_SIZE - 1) -#define MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE \ - (MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_TO + 1) -static_assert(MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE == 64000); - -#define MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_SIZE 16000 -#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_SIZE (32000 - 1) -#define MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_FROM 0 -#define MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_TO \ - (MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_SIZE - 1) -#define MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_FROM \ - (MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_TO + 1) -#define MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_TO \ - (MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_FROM + \ - MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_SIZE - 1) -#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_FROM \ - (MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_TO + 1) -#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_TO \ - (MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_FROM + \ - MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_SIZE - 1) -#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_FROM \ - (MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_TO + 1) -#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_TO \ - MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_FROM -#define MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE \ - (MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_TO + 1) -static_assert(MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE == 64000); - -#define MLX5_ESW_BRIDGE_SKIP_TABLE_SIZE 0 - -enum { - MLX5_ESW_BRIDGE_LEVEL_INGRESS_TABLE, - MLX5_ESW_BRIDGE_LEVEL_EGRESS_TABLE, - MLX5_ESW_BRIDGE_LEVEL_SKIP_TABLE, -}; - static const struct rhashtable_params fdb_ht_params = { .key_offset = offsetof(struct mlx5_esw_bridge_fdb_entry, key), .key_len = sizeof(struct mlx5_esw_bridge_fdb_key), @@ -80,31 +20,6 @@ static const struct rhashtable_params fdb_ht_params = { .automatic_shrinking = true, }; -enum { - MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG = BIT(0), -}; - -struct mlx5_esw_bridge { - int ifindex; - int refcnt; - struct list_head list; - struct mlx5_esw_bridge_offloads *br_offloads; - - struct list_head fdb_list; - struct rhashtable fdb_ht; - - struct mlx5_flow_table *egress_ft; - struct mlx5_flow_group *egress_vlan_fg; - struct mlx5_flow_group *egress_qinq_fg; - struct mlx5_flow_group *egress_mac_fg; - struct mlx5_flow_group *egress_miss_fg; - struct mlx5_pkt_reformat *egress_miss_pkt_reformat; - struct mlx5_flow_handle *egress_miss_handle; - unsigned long ageing_time; - u32 flags; - u16 vlan_proto; -}; - static void mlx5_esw_bridge_fdb_offload_notify(struct net_device *dev, const unsigned char *addr, u16 vid, unsigned long val) @@ -146,7 +61,7 @@ mlx5_esw_bridge_pkt_reformat_vlan_pop_create(struct mlx5_eswitch *esw) return mlx5_packet_reformat_alloc(esw->dev, &reformat_params, MLX5_FLOW_NAMESPACE_FDB); } -static struct mlx5_flow_table * +struct mlx5_flow_table * mlx5_esw_bridge_table_create(int max_fte, u32 level, struct mlx5_eswitch *esw) { struct mlx5_flow_table_attr ft_attr = {}; @@ -925,6 +840,10 @@ static struct mlx5_esw_bridge *mlx5_esw_bridge_create(int ifindex, if (err) goto err_fdb_ht; + err = mlx5_esw_bridge_mdb_init(bridge); + if (err) + goto err_mdb_ht; + INIT_LIST_HEAD(&bridge->fdb_list); bridge->ifindex = ifindex; bridge->refcnt = 1; @@ -934,6 +853,8 @@ static struct mlx5_esw_bridge *mlx5_esw_bridge_create(int ifindex, return bridge; +err_mdb_ht: + rhashtable_destroy(&bridge->fdb_ht); err_fdb_ht: mlx5_esw_bridge_egress_table_cleanup(bridge); err_egress_tbl: @@ -953,7 +874,9 @@ static void mlx5_esw_bridge_put(struct mlx5_esw_bridge_offloads *br_offloads, return; mlx5_esw_bridge_egress_table_cleanup(bridge); + mlx5_esw_bridge_mcast_disable(bridge); list_del(&bridge->list); + mlx5_esw_bridge_mdb_cleanup(bridge); rhashtable_destroy(&bridge->fdb_ht); kvfree(bridge); @@ -993,7 +916,7 @@ static unsigned long mlx5_esw_bridge_port_key_from_data(u16 vport_num, u16 esw_o return vport_num | (unsigned long)esw_owner_vhca_id << sizeof(vport_num) * BITS_PER_BYTE; } -static unsigned long mlx5_esw_bridge_port_key(struct mlx5_esw_bridge_port *port) +unsigned long mlx5_esw_bridge_port_key(struct mlx5_esw_bridge_port *port) { return mlx5_esw_bridge_port_key_from_data(port->vport_num, port->esw_owner_vhca_id); } @@ -1018,6 +941,19 @@ static void mlx5_esw_bridge_port_erase(struct mlx5_esw_bridge_port *port, xa_erase(&br_offloads->ports, mlx5_esw_bridge_port_key(port)); } +static struct mlx5_esw_bridge * +mlx5_esw_bridge_from_port_lookup(u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads) +{ + struct mlx5_esw_bridge_port *port; + + port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); + if (!port) + return NULL; + + return port->bridge; +} + static void mlx5_esw_bridge_fdb_entry_refresh(struct mlx5_esw_bridge_fdb_entry *entry) { trace_mlx5_esw_bridge_fdb_entry_refresh(entry); @@ -1166,8 +1102,21 @@ mlx5_esw_bridge_vlan_push_mark_cleanup(struct mlx5_esw_bridge_vlan *vlan, struct } static int -mlx5_esw_bridge_vlan_push_pop_create(u16 vlan_proto, u16 flags, struct mlx5_esw_bridge_vlan *vlan, - struct mlx5_eswitch *esw) +mlx5_esw_bridge_vlan_push_pop_fhs_create(u16 vlan_proto, struct mlx5_esw_bridge_port *port, + struct mlx5_esw_bridge_vlan *vlan) +{ + return mlx5_esw_bridge_vlan_mcast_init(vlan_proto, port, vlan); +} + +static void +mlx5_esw_bridge_vlan_push_pop_fhs_cleanup(struct mlx5_esw_bridge_vlan *vlan) +{ + mlx5_esw_bridge_vlan_mcast_cleanup(vlan); +} + +static int +mlx5_esw_bridge_vlan_push_pop_create(u16 vlan_proto, u16 flags, struct mlx5_esw_bridge_port *port, + struct mlx5_esw_bridge_vlan *vlan, struct mlx5_eswitch *esw) { int err; @@ -1185,10 +1134,16 @@ mlx5_esw_bridge_vlan_push_pop_create(u16 vlan_proto, u16 flags, struct mlx5_esw_ err = mlx5_esw_bridge_vlan_pop_create(vlan, esw); if (err) goto err_vlan_pop; + + err = mlx5_esw_bridge_vlan_push_pop_fhs_create(vlan_proto, port, vlan); + if (err) + goto err_vlan_pop_fhs; } return 0; +err_vlan_pop_fhs: + mlx5_esw_bridge_vlan_pop_cleanup(vlan, esw); err_vlan_pop: if (vlan->pkt_mod_hdr_push_mark) mlx5_esw_bridge_vlan_push_mark_cleanup(vlan, esw); @@ -1213,7 +1168,7 @@ mlx5_esw_bridge_vlan_create(u16 vlan_proto, u16 vid, u16 flags, struct mlx5_esw_ vlan->flags = flags; INIT_LIST_HEAD(&vlan->fdb_list); - err = mlx5_esw_bridge_vlan_push_pop_create(vlan_proto, flags, vlan, esw); + err = mlx5_esw_bridge_vlan_push_pop_create(vlan_proto, flags, port, vlan, esw); if (err) goto err_vlan_push_pop; @@ -1225,6 +1180,8 @@ mlx5_esw_bridge_vlan_create(u16 vlan_proto, u16 vid, u16 flags, struct mlx5_esw_ return vlan; err_xa_insert: + if (vlan->mcast_handle) + mlx5_esw_bridge_vlan_push_pop_fhs_cleanup(vlan); if (vlan->pkt_reformat_pop) mlx5_esw_bridge_vlan_pop_cleanup(vlan, esw); if (vlan->pkt_mod_hdr_push_mark) @@ -1242,7 +1199,8 @@ static void mlx5_esw_bridge_vlan_erase(struct mlx5_esw_bridge_port *port, xa_erase(&port->vlans, vlan->vid); } -static void mlx5_esw_bridge_vlan_flush(struct mlx5_esw_bridge_vlan *vlan, +static void mlx5_esw_bridge_vlan_flush(struct mlx5_esw_bridge_port *port, + struct mlx5_esw_bridge_vlan *vlan, struct mlx5_esw_bridge *bridge) { struct mlx5_eswitch *esw = bridge->br_offloads->esw; @@ -1250,7 +1208,10 @@ static void mlx5_esw_bridge_vlan_flush(struct mlx5_esw_bridge_vlan *vlan, list_for_each_entry_safe(entry, tmp, &vlan->fdb_list, vlan_list) mlx5_esw_bridge_fdb_entry_notify_and_cleanup(entry, bridge); + mlx5_esw_bridge_port_mdb_vlan_flush(port, vlan); + if (vlan->mcast_handle) + mlx5_esw_bridge_vlan_push_pop_fhs_cleanup(vlan); if (vlan->pkt_reformat_pop) mlx5_esw_bridge_vlan_pop_cleanup(vlan, esw); if (vlan->pkt_mod_hdr_push_mark) @@ -1264,7 +1225,7 @@ static void mlx5_esw_bridge_vlan_cleanup(struct mlx5_esw_bridge_port *port, struct mlx5_esw_bridge *bridge) { trace_mlx5_esw_bridge_vlan_cleanup(vlan); - mlx5_esw_bridge_vlan_flush(vlan, bridge); + mlx5_esw_bridge_vlan_flush(port, vlan, bridge); mlx5_esw_bridge_vlan_erase(port, vlan); kvfree(vlan); } @@ -1288,9 +1249,9 @@ static int mlx5_esw_bridge_port_vlans_recreate(struct mlx5_esw_bridge_port *port int err; xa_for_each(&port->vlans, i, vlan) { - mlx5_esw_bridge_vlan_flush(vlan, bridge); - err = mlx5_esw_bridge_vlan_push_pop_create(bridge->vlan_proto, vlan->flags, vlan, - br_offloads->esw); + mlx5_esw_bridge_vlan_flush(port, vlan, bridge); + err = mlx5_esw_bridge_vlan_push_pop_create(bridge->vlan_proto, vlan->flags, port, + vlan, br_offloads->esw); if (err) { esw_warn(br_offloads->esw->dev, "Failed to create VLAN=%u(proto=%x) push/pop actions (vport=%u,err=%d)\n", @@ -1473,33 +1434,32 @@ err_ingress_fc_create: int mlx5_esw_bridge_ageing_time_set(u16 vport_num, u16 esw_owner_vhca_id, unsigned long ageing_time, struct mlx5_esw_bridge_offloads *br_offloads) { - struct mlx5_esw_bridge_port *port; + struct mlx5_esw_bridge *bridge; - port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); - if (!port) + bridge = mlx5_esw_bridge_from_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); + if (!bridge) return -EINVAL; - port->bridge->ageing_time = clock_t_to_jiffies(ageing_time); + bridge->ageing_time = clock_t_to_jiffies(ageing_time); return 0; } int mlx5_esw_bridge_vlan_filtering_set(u16 vport_num, u16 esw_owner_vhca_id, bool enable, struct mlx5_esw_bridge_offloads *br_offloads) { - struct mlx5_esw_bridge_port *port; struct mlx5_esw_bridge *bridge; bool filtering; - port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); - if (!port) + bridge = mlx5_esw_bridge_from_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); + if (!bridge) return -EINVAL; - bridge = port->bridge; filtering = bridge->flags & MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG; if (filtering == enable) return 0; mlx5_esw_bridge_fdb_flush(bridge); + mlx5_esw_bridge_mdb_flush(bridge); if (enable) bridge->flags |= MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG; else @@ -1511,15 +1471,13 @@ int mlx5_esw_bridge_vlan_filtering_set(u16 vport_num, u16 esw_owner_vhca_id, boo int mlx5_esw_bridge_vlan_proto_set(u16 vport_num, u16 esw_owner_vhca_id, u16 proto, struct mlx5_esw_bridge_offloads *br_offloads) { - struct mlx5_esw_bridge_port *port; struct mlx5_esw_bridge *bridge; - port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, - br_offloads); - if (!port) + bridge = mlx5_esw_bridge_from_port_lookup(vport_num, esw_owner_vhca_id, + br_offloads); + if (!bridge) return -EINVAL; - bridge = port->bridge; if (bridge->vlan_proto == proto) return 0; if (proto != ETH_P_8021Q && proto != ETH_P_8021AD) { @@ -1528,12 +1486,43 @@ int mlx5_esw_bridge_vlan_proto_set(u16 vport_num, u16 esw_owner_vhca_id, u16 pro } mlx5_esw_bridge_fdb_flush(bridge); + mlx5_esw_bridge_mdb_flush(bridge); bridge->vlan_proto = proto; mlx5_esw_bridge_vlans_recreate(bridge); return 0; } +int mlx5_esw_bridge_mcast_set(u16 vport_num, u16 esw_owner_vhca_id, bool enable, + struct mlx5_esw_bridge_offloads *br_offloads) +{ + struct mlx5_eswitch *esw = br_offloads->esw; + struct mlx5_esw_bridge *bridge; + int err = 0; + bool mcast; + + if (!(MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_multi_path_any_table) || + MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_multi_path_any_table_limit_regc)) || + !MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_uplink_hairpin) || + !MLX5_CAP_ESW_FLOWTABLE_FDB((esw)->dev, ignore_flow_level)) + return -EOPNOTSUPP; + + bridge = mlx5_esw_bridge_from_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); + if (!bridge) + return -EINVAL; + + mcast = bridge->flags & MLX5_ESW_BRIDGE_MCAST_FLAG; + if (mcast == enable) + return 0; + + if (enable) + err = mlx5_esw_bridge_mcast_enable(bridge); + else + mlx5_esw_bridge_mcast_disable(bridge); + + return err; +} + static int mlx5_esw_bridge_vport_init(u16 vport_num, u16 esw_owner_vhca_id, u16 flags, struct mlx5_esw_bridge_offloads *br_offloads, struct mlx5_esw_bridge *bridge) @@ -1551,6 +1540,15 @@ static int mlx5_esw_bridge_vport_init(u16 vport_num, u16 esw_owner_vhca_id, u16 port->bridge = bridge; port->flags |= flags; xa_init(&port->vlans); + + err = mlx5_esw_bridge_port_mcast_init(port); + if (err) { + esw_warn(esw->dev, + "Failed to initialize port multicast (vport=%u,esw_owner_vhca_id=%u,err=%d)\n", + port->vport_num, port->esw_owner_vhca_id, err); + goto err_port_mcast; + } + err = mlx5_esw_bridge_port_insert(port, br_offloads); if (err) { esw_warn(esw->dev, @@ -1563,6 +1561,8 @@ static int mlx5_esw_bridge_vport_init(u16 vport_num, u16 esw_owner_vhca_id, u16 return 0; err_port_insert: + mlx5_esw_bridge_port_mcast_cleanup(port); +err_port_mcast: kvfree(port); return err; } @@ -1580,6 +1580,7 @@ static int mlx5_esw_bridge_vport_cleanup(struct mlx5_esw_bridge_offloads *br_off trace_mlx5_esw_bridge_vport_cleanup(port); mlx5_esw_bridge_port_vlans_flush(port, bridge); + mlx5_esw_bridge_port_mcast_cleanup(port); mlx5_esw_bridge_port_erase(port, br_offloads); kvfree(port); mlx5_esw_bridge_put(br_offloads, bridge); @@ -1711,14 +1712,12 @@ void mlx5_esw_bridge_fdb_update_used(struct net_device *dev, u16 vport_num, u16 struct switchdev_notifier_fdb_info *fdb_info) { struct mlx5_esw_bridge_fdb_entry *entry; - struct mlx5_esw_bridge_port *port; struct mlx5_esw_bridge *bridge; - port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); - if (!port) + bridge = mlx5_esw_bridge_from_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); + if (!bridge) return; - bridge = port->bridge; entry = mlx5_esw_bridge_fdb_lookup(bridge, fdb_info->addr, fdb_info->vid); if (!entry) { esw_debug(br_offloads->esw->dev, @@ -1765,14 +1764,12 @@ void mlx5_esw_bridge_fdb_remove(struct net_device *dev, u16 vport_num, u16 esw_o { struct mlx5_eswitch *esw = br_offloads->esw; struct mlx5_esw_bridge_fdb_entry *entry; - struct mlx5_esw_bridge_port *port; struct mlx5_esw_bridge *bridge; - port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); - if (!port) + bridge = mlx5_esw_bridge_from_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); + if (!bridge) return; - bridge = port->bridge; entry = mlx5_esw_bridge_fdb_lookup(bridge, fdb_info->addr, fdb_info->vid); if (!entry) { esw_debug(esw->dev, @@ -1806,6 +1803,64 @@ void mlx5_esw_bridge_update(struct mlx5_esw_bridge_offloads *br_offloads) } } +int mlx5_esw_bridge_port_mdb_add(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, + const unsigned char *addr, u16 vid, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_bridge_vlan *vlan; + struct mlx5_esw_bridge_port *port; + struct mlx5_esw_bridge *bridge; + int err; + + port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); + if (!port) { + esw_warn(br_offloads->esw->dev, + "Failed to lookup bridge port to add MDB (MAC=%pM,vport=%u)\n", + addr, vport_num); + NL_SET_ERR_MSG_FMT_MOD(extack, + "Failed to lookup bridge port to add MDB (MAC=%pM,vport=%u)\n", + addr, vport_num); + return -EINVAL; + } + + bridge = port->bridge; + if (bridge->flags & MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG && vid) { + vlan = mlx5_esw_bridge_vlan_lookup(vid, port); + if (!vlan) { + esw_warn(br_offloads->esw->dev, + "Failed to lookup bridge port vlan metadata to create MDB (MAC=%pM,vid=%u,vport=%u)\n", + addr, vid, vport_num); + NL_SET_ERR_MSG_FMT_MOD(extack, + "Failed to lookup bridge port vlan metadata to create MDB (MAC=%pM,vid=%u,vport=%u)\n", + addr, vid, vport_num); + return -EINVAL; + } + } + + err = mlx5_esw_bridge_port_mdb_attach(dev, port, addr, vid); + if (err) { + NL_SET_ERR_MSG_FMT_MOD(extack, "Failed to add MDB (MAC=%pM,vid=%u,vport=%u)\n", + addr, vid, vport_num); + return err; + } + + return 0; +} + +void mlx5_esw_bridge_port_mdb_del(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, + const unsigned char *addr, u16 vid, + struct mlx5_esw_bridge_offloads *br_offloads) +{ + struct mlx5_esw_bridge_port *port; + + port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); + if (!port) + return; + + mlx5_esw_bridge_port_mdb_detach(dev, port, addr, vid); +} + static void mlx5_esw_bridge_flush(struct mlx5_esw_bridge_offloads *br_offloads) { struct mlx5_esw_bridge_port *port; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h index 10851a515bca..a9dd18c73d6a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h @@ -25,12 +25,19 @@ struct mlx5_esw_bridge_offloads { struct delayed_work update_work; struct mlx5_flow_table *ingress_ft; + struct mlx5_flow_group *ingress_igmp_fg; + struct mlx5_flow_group *ingress_mld_fg; struct mlx5_flow_group *ingress_vlan_fg; struct mlx5_flow_group *ingress_vlan_filter_fg; struct mlx5_flow_group *ingress_qinq_fg; struct mlx5_flow_group *ingress_qinq_filter_fg; struct mlx5_flow_group *ingress_mac_fg; + struct mlx5_flow_handle *igmp_handle; + struct mlx5_flow_handle *mld_query_handle; + struct mlx5_flow_handle *mld_report_handle; + struct mlx5_flow_handle *mld_done_handle; + struct mlx5_flow_table *skip_ft; }; @@ -64,10 +71,20 @@ int mlx5_esw_bridge_vlan_filtering_set(u16 vport_num, u16 esw_owner_vhca_id, boo struct mlx5_esw_bridge_offloads *br_offloads); int mlx5_esw_bridge_vlan_proto_set(u16 vport_num, u16 esw_owner_vhca_id, u16 proto, struct mlx5_esw_bridge_offloads *br_offloads); +int mlx5_esw_bridge_mcast_set(u16 vport_num, u16 esw_owner_vhca_id, bool enable, + struct mlx5_esw_bridge_offloads *br_offloads); int mlx5_esw_bridge_port_vlan_add(u16 vport_num, u16 esw_owner_vhca_id, u16 vid, u16 flags, struct mlx5_esw_bridge_offloads *br_offloads, struct netlink_ext_ack *extack); void mlx5_esw_bridge_port_vlan_del(u16 vport_num, u16 esw_owner_vhca_id, u16 vid, struct mlx5_esw_bridge_offloads *br_offloads); +int mlx5_esw_bridge_port_mdb_add(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, + const unsigned char *addr, u16 vid, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack); +void mlx5_esw_bridge_port_mdb_del(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, + const unsigned char *addr, u16 vid, + struct mlx5_esw_bridge_offloads *br_offloads); + #endif /* __MLX5_ESW_BRIDGE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c new file mode 100644 index 000000000000..2eae594a5e80 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c @@ -0,0 +1,1126 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include "lib/devcom.h" +#include "bridge.h" +#include "eswitch.h" +#include "bridge_priv.h" +#include "diag/bridge_tracepoint.h" + +static const struct rhashtable_params mdb_ht_params = { + .key_offset = offsetof(struct mlx5_esw_bridge_mdb_entry, key), + .key_len = sizeof(struct mlx5_esw_bridge_mdb_key), + .head_offset = offsetof(struct mlx5_esw_bridge_mdb_entry, ht_node), + .automatic_shrinking = true, +}; + +int mlx5_esw_bridge_mdb_init(struct mlx5_esw_bridge *bridge) +{ + INIT_LIST_HEAD(&bridge->mdb_list); + return rhashtable_init(&bridge->mdb_ht, &mdb_ht_params); +} + +void mlx5_esw_bridge_mdb_cleanup(struct mlx5_esw_bridge *bridge) +{ + rhashtable_destroy(&bridge->mdb_ht); +} + +static struct mlx5_esw_bridge_port * +mlx5_esw_bridge_mdb_port_lookup(struct mlx5_esw_bridge_port *port, + struct mlx5_esw_bridge_mdb_entry *entry) +{ + return xa_load(&entry->ports, mlx5_esw_bridge_port_key(port)); +} + +static int mlx5_esw_bridge_mdb_port_insert(struct mlx5_esw_bridge_port *port, + struct mlx5_esw_bridge_mdb_entry *entry) +{ + int err = xa_insert(&entry->ports, mlx5_esw_bridge_port_key(port), port, GFP_KERNEL); + + if (!err) + entry->num_ports++; + return err; +} + +static void mlx5_esw_bridge_mdb_port_remove(struct mlx5_esw_bridge_port *port, + struct mlx5_esw_bridge_mdb_entry *entry) +{ + xa_erase(&entry->ports, mlx5_esw_bridge_port_key(port)); + entry->num_ports--; +} + +static struct mlx5_flow_handle * +mlx5_esw_bridge_mdb_flow_create(u16 esw_owner_vhca_id, struct mlx5_esw_bridge_mdb_entry *entry, + struct mlx5_esw_bridge *bridge) +{ + struct mlx5_flow_act flow_act = { + .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + .flags = FLOW_ACT_NO_APPEND | FLOW_ACT_IGNORE_FLOW_LEVEL, + }; + int num_dests = entry->num_ports, i = 0; + struct mlx5_flow_destination *dests; + struct mlx5_esw_bridge_port *port; + struct mlx5_flow_spec *rule_spec; + struct mlx5_flow_handle *handle; + u8 *dmac_v, *dmac_c; + unsigned long idx; + + rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL); + if (!rule_spec) + return ERR_PTR(-ENOMEM); + + dests = kvcalloc(num_dests, sizeof(*dests), GFP_KERNEL); + if (!dests) { + kvfree(rule_spec); + return ERR_PTR(-ENOMEM); + } + + xa_for_each(&entry->ports, idx, port) { + dests[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dests[i].ft = port->mcast.ft; + i++; + } + + rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + dmac_v = MLX5_ADDR_OF(fte_match_param, rule_spec->match_value, outer_headers.dmac_47_16); + ether_addr_copy(dmac_v, entry->key.addr); + dmac_c = MLX5_ADDR_OF(fte_match_param, rule_spec->match_criteria, outer_headers.dmac_47_16); + eth_broadcast_addr(dmac_c); + + if (entry->key.vid) { + if (bridge->vlan_proto == ETH_P_8021Q) { + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, + outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value, + outer_headers.cvlan_tag); + } else if (bridge->vlan_proto == ETH_P_8021AD) { + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, + outer_headers.svlan_tag); + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value, + outer_headers.svlan_tag); + } + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, + outer_headers.first_vid); + MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.first_vid, + entry->key.vid); + } + + handle = mlx5_add_flow_rules(bridge->egress_ft, rule_spec, &flow_act, dests, num_dests); + + kvfree(dests); + kvfree(rule_spec); + return handle; +} + +static int +mlx5_esw_bridge_port_mdb_offload(struct mlx5_esw_bridge_port *port, + struct mlx5_esw_bridge_mdb_entry *entry) +{ + struct mlx5_flow_handle *handle; + + handle = mlx5_esw_bridge_mdb_flow_create(port->esw_owner_vhca_id, entry, port->bridge); + if (entry->egress_handle) { + mlx5_del_flow_rules(entry->egress_handle); + entry->egress_handle = NULL; + } + if (IS_ERR(handle)) + return PTR_ERR(handle); + + entry->egress_handle = handle; + return 0; +} + +static struct mlx5_esw_bridge_mdb_entry * +mlx5_esw_bridge_mdb_lookup(struct mlx5_esw_bridge *bridge, + const unsigned char *addr, u16 vid) +{ + struct mlx5_esw_bridge_mdb_key key = {}; + + ether_addr_copy(key.addr, addr); + key.vid = vid; + return rhashtable_lookup_fast(&bridge->mdb_ht, &key, mdb_ht_params); +} + +static struct mlx5_esw_bridge_mdb_entry * +mlx5_esw_bridge_port_mdb_entry_init(struct mlx5_esw_bridge_port *port, + const unsigned char *addr, u16 vid) +{ + struct mlx5_esw_bridge *bridge = port->bridge; + struct mlx5_esw_bridge_mdb_entry *entry; + int err; + + entry = kvzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return ERR_PTR(-ENOMEM); + + ether_addr_copy(entry->key.addr, addr); + entry->key.vid = vid; + xa_init(&entry->ports); + err = rhashtable_insert_fast(&bridge->mdb_ht, &entry->ht_node, mdb_ht_params); + if (err) + goto err_ht_insert; + + list_add(&entry->list, &bridge->mdb_list); + + return entry; + +err_ht_insert: + xa_destroy(&entry->ports); + kvfree(entry); + return ERR_PTR(err); +} + +static void mlx5_esw_bridge_port_mdb_entry_cleanup(struct mlx5_esw_bridge *bridge, + struct mlx5_esw_bridge_mdb_entry *entry) +{ + if (entry->egress_handle) + mlx5_del_flow_rules(entry->egress_handle); + list_del(&entry->list); + rhashtable_remove_fast(&bridge->mdb_ht, &entry->ht_node, mdb_ht_params); + xa_destroy(&entry->ports); + kvfree(entry); +} + +int mlx5_esw_bridge_port_mdb_attach(struct net_device *dev, struct mlx5_esw_bridge_port *port, + const unsigned char *addr, u16 vid) +{ + struct mlx5_esw_bridge *bridge = port->bridge; + struct mlx5_esw_bridge_mdb_entry *entry; + int err; + + if (!(bridge->flags & MLX5_ESW_BRIDGE_MCAST_FLAG)) + return -EOPNOTSUPP; + + entry = mlx5_esw_bridge_mdb_lookup(bridge, addr, vid); + if (entry) { + if (mlx5_esw_bridge_mdb_port_lookup(port, entry)) { + esw_warn(bridge->br_offloads->esw->dev, "MDB attach entry is already attached to port (MAC=%pM,vid=%u,vport=%u)\n", + addr, vid, port->vport_num); + return 0; + } + } else { + entry = mlx5_esw_bridge_port_mdb_entry_init(port, addr, vid); + if (IS_ERR(entry)) { + err = PTR_ERR(entry); + esw_warn(bridge->br_offloads->esw->dev, "MDB attach failed to init entry (MAC=%pM,vid=%u,vport=%u,err=%d)\n", + addr, vid, port->vport_num, err); + return err; + } + } + + err = mlx5_esw_bridge_mdb_port_insert(port, entry); + if (err) { + if (!entry->num_ports) + mlx5_esw_bridge_port_mdb_entry_cleanup(bridge, entry); /* new mdb entry */ + esw_warn(bridge->br_offloads->esw->dev, + "MDB attach failed to insert port (MAC=%pM,vid=%u,vport=%u,err=%d)\n", + addr, vid, port->vport_num, err); + return err; + } + + err = mlx5_esw_bridge_port_mdb_offload(port, entry); + if (err) + /* Single mdb can be used by multiple ports, so just log the + * error and continue. + */ + esw_warn(bridge->br_offloads->esw->dev, "MDB attach failed to offload (MAC=%pM,vid=%u,vport=%u,err=%d)\n", + addr, vid, port->vport_num, err); + + trace_mlx5_esw_bridge_port_mdb_attach(dev, entry); + return 0; +} + +static void mlx5_esw_bridge_port_mdb_entry_detach(struct mlx5_esw_bridge_port *port, + struct mlx5_esw_bridge_mdb_entry *entry) +{ + struct mlx5_esw_bridge *bridge = port->bridge; + int err; + + mlx5_esw_bridge_mdb_port_remove(port, entry); + if (!entry->num_ports) { + mlx5_esw_bridge_port_mdb_entry_cleanup(bridge, entry); + return; + } + + err = mlx5_esw_bridge_port_mdb_offload(port, entry); + if (err) + /* Single mdb can be used by multiple ports, so just log the + * error and continue. + */ + esw_warn(bridge->br_offloads->esw->dev, "MDB detach failed to offload (MAC=%pM,vid=%u,vport=%u)\n", + entry->key.addr, entry->key.vid, port->vport_num); +} + +void mlx5_esw_bridge_port_mdb_detach(struct net_device *dev, struct mlx5_esw_bridge_port *port, + const unsigned char *addr, u16 vid) +{ + struct mlx5_esw_bridge *bridge = port->bridge; + struct mlx5_esw_bridge_mdb_entry *entry; + + entry = mlx5_esw_bridge_mdb_lookup(bridge, addr, vid); + if (!entry) { + esw_debug(bridge->br_offloads->esw->dev, + "MDB detach entry not found (MAC=%pM,vid=%u,vport=%u)\n", + addr, vid, port->vport_num); + return; + } + + if (!mlx5_esw_bridge_mdb_port_lookup(port, entry)) { + esw_debug(bridge->br_offloads->esw->dev, + "MDB detach entry not attached to the port (MAC=%pM,vid=%u,vport=%u)\n", + addr, vid, port->vport_num); + return; + } + + trace_mlx5_esw_bridge_port_mdb_detach(dev, entry); + mlx5_esw_bridge_port_mdb_entry_detach(port, entry); +} + +void mlx5_esw_bridge_port_mdb_vlan_flush(struct mlx5_esw_bridge_port *port, + struct mlx5_esw_bridge_vlan *vlan) +{ + struct mlx5_esw_bridge *bridge = port->bridge; + struct mlx5_esw_bridge_mdb_entry *entry, *tmp; + + list_for_each_entry_safe(entry, tmp, &bridge->mdb_list, list) + if (entry->key.vid == vlan->vid && mlx5_esw_bridge_mdb_port_lookup(port, entry)) + mlx5_esw_bridge_port_mdb_entry_detach(port, entry); +} + +static void mlx5_esw_bridge_port_mdb_flush(struct mlx5_esw_bridge_port *port) +{ + struct mlx5_esw_bridge *bridge = port->bridge; + struct mlx5_esw_bridge_mdb_entry *entry, *tmp; + + list_for_each_entry_safe(entry, tmp, &bridge->mdb_list, list) + if (mlx5_esw_bridge_mdb_port_lookup(port, entry)) + mlx5_esw_bridge_port_mdb_entry_detach(port, entry); +} + +void mlx5_esw_bridge_mdb_flush(struct mlx5_esw_bridge *bridge) +{ + struct mlx5_esw_bridge_mdb_entry *entry, *tmp; + + list_for_each_entry_safe(entry, tmp, &bridge->mdb_list, list) + mlx5_esw_bridge_port_mdb_entry_cleanup(bridge, entry); +} +static int mlx5_esw_bridge_port_mcast_fts_init(struct mlx5_esw_bridge_port *port, + struct mlx5_esw_bridge *bridge) +{ + struct mlx5_eswitch *esw = bridge->br_offloads->esw; + struct mlx5_flow_table *mcast_ft; + + mcast_ft = mlx5_esw_bridge_table_create(MLX5_ESW_BRIDGE_MCAST_TABLE_SIZE, + MLX5_ESW_BRIDGE_LEVEL_MCAST_TABLE, + esw); + if (IS_ERR(mcast_ft)) + return PTR_ERR(mcast_ft); + + port->mcast.ft = mcast_ft; + return 0; +} + +static void mlx5_esw_bridge_port_mcast_fts_cleanup(struct mlx5_esw_bridge_port *port) +{ + if (port->mcast.ft) + mlx5_destroy_flow_table(port->mcast.ft); + port->mcast.ft = NULL; +} + +static struct mlx5_flow_group * +mlx5_esw_bridge_mcast_filter_fg_create(struct mlx5_eswitch *esw, + struct mlx5_flow_table *mcast_ft) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *fg; + u32 *in, *match; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return ERR_PTR(-ENOMEM); + + MLX5_SET(create_flow_group_in, in, match_criteria_enable, MLX5_MATCH_MISC_PARAMETERS_2); + match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + + MLX5_SET(fte_match_param, match, misc_parameters_2.metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); + + MLX5_SET(create_flow_group_in, in, start_flow_index, + MLX5_ESW_BRIDGE_MCAST_TABLE_FILTER_GRP_IDX_FROM); + MLX5_SET(create_flow_group_in, in, end_flow_index, + MLX5_ESW_BRIDGE_MCAST_TABLE_FILTER_GRP_IDX_TO); + + fg = mlx5_create_flow_group(mcast_ft, in); + kvfree(in); + if (IS_ERR(fg)) + esw_warn(esw->dev, + "Failed to create filter flow group for bridge mcast table (err=%pe)\n", + fg); + + return fg; +} + +static struct mlx5_flow_group * +mlx5_esw_bridge_mcast_vlan_proto_fg_create(unsigned int from, unsigned int to, u16 vlan_proto, + struct mlx5_eswitch *esw, + struct mlx5_flow_table *mcast_ft) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *fg; + u32 *in, *match; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return ERR_PTR(-ENOMEM); + + MLX5_SET(create_flow_group_in, in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + + if (vlan_proto == ETH_P_8021Q) + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.cvlan_tag); + else if (vlan_proto == ETH_P_8021AD) + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.svlan_tag); + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.first_vid); + + MLX5_SET(create_flow_group_in, in, start_flow_index, from); + MLX5_SET(create_flow_group_in, in, end_flow_index, to); + + fg = mlx5_create_flow_group(mcast_ft, in); + kvfree(in); + if (IS_ERR(fg)) + esw_warn(esw->dev, + "Failed to create VLAN(proto=%x) flow group for bridge mcast table (err=%pe)\n", + vlan_proto, fg); + + return fg; +} + +static struct mlx5_flow_group * +mlx5_esw_bridge_mcast_vlan_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *mcast_ft) +{ + unsigned int from = MLX5_ESW_BRIDGE_MCAST_TABLE_VLAN_GRP_IDX_FROM; + unsigned int to = MLX5_ESW_BRIDGE_MCAST_TABLE_VLAN_GRP_IDX_TO; + + return mlx5_esw_bridge_mcast_vlan_proto_fg_create(from, to, ETH_P_8021Q, esw, mcast_ft); +} + +static struct mlx5_flow_group * +mlx5_esw_bridge_mcast_qinq_fg_create(struct mlx5_eswitch *esw, + struct mlx5_flow_table *mcast_ft) +{ + unsigned int from = MLX5_ESW_BRIDGE_MCAST_TABLE_QINQ_GRP_IDX_FROM; + unsigned int to = MLX5_ESW_BRIDGE_MCAST_TABLE_QINQ_GRP_IDX_TO; + + return mlx5_esw_bridge_mcast_vlan_proto_fg_create(from, to, ETH_P_8021AD, esw, mcast_ft); +} + +static struct mlx5_flow_group * +mlx5_esw_bridge_mcast_fwd_fg_create(struct mlx5_eswitch *esw, + struct mlx5_flow_table *mcast_ft) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *fg; + u32 *in; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return ERR_PTR(-ENOMEM); + + MLX5_SET(create_flow_group_in, in, start_flow_index, + MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_IDX_FROM); + MLX5_SET(create_flow_group_in, in, end_flow_index, + MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_IDX_TO); + + fg = mlx5_create_flow_group(mcast_ft, in); + kvfree(in); + if (IS_ERR(fg)) + esw_warn(esw->dev, + "Failed to create forward flow group for bridge mcast table (err=%pe)\n", + fg); + + return fg; +} + +static int mlx5_esw_bridge_port_mcast_fgs_init(struct mlx5_esw_bridge_port *port) +{ + struct mlx5_flow_group *fwd_fg, *qinq_fg, *vlan_fg, *filter_fg; + struct mlx5_eswitch *esw = port->bridge->br_offloads->esw; + struct mlx5_flow_table *mcast_ft = port->mcast.ft; + int err; + + filter_fg = mlx5_esw_bridge_mcast_filter_fg_create(esw, mcast_ft); + if (IS_ERR(filter_fg)) + return PTR_ERR(filter_fg); + + vlan_fg = mlx5_esw_bridge_mcast_vlan_fg_create(esw, mcast_ft); + if (IS_ERR(vlan_fg)) { + err = PTR_ERR(vlan_fg); + goto err_vlan_fg; + } + + qinq_fg = mlx5_esw_bridge_mcast_qinq_fg_create(esw, mcast_ft); + if (IS_ERR(qinq_fg)) { + err = PTR_ERR(qinq_fg); + goto err_qinq_fg; + } + + fwd_fg = mlx5_esw_bridge_mcast_fwd_fg_create(esw, mcast_ft); + if (IS_ERR(fwd_fg)) { + err = PTR_ERR(fwd_fg); + goto err_fwd_fg; + } + + port->mcast.filter_fg = filter_fg; + port->mcast.vlan_fg = vlan_fg; + port->mcast.qinq_fg = qinq_fg; + port->mcast.fwd_fg = fwd_fg; + + return 0; + +err_fwd_fg: + mlx5_destroy_flow_group(qinq_fg); +err_qinq_fg: + mlx5_destroy_flow_group(vlan_fg); +err_vlan_fg: + mlx5_destroy_flow_group(filter_fg); + return err; +} + +static void mlx5_esw_bridge_port_mcast_fgs_cleanup(struct mlx5_esw_bridge_port *port) +{ + if (port->mcast.fwd_fg) + mlx5_destroy_flow_group(port->mcast.fwd_fg); + port->mcast.fwd_fg = NULL; + if (port->mcast.qinq_fg) + mlx5_destroy_flow_group(port->mcast.qinq_fg); + port->mcast.qinq_fg = NULL; + if (port->mcast.vlan_fg) + mlx5_destroy_flow_group(port->mcast.vlan_fg); + port->mcast.vlan_fg = NULL; + if (port->mcast.filter_fg) + mlx5_destroy_flow_group(port->mcast.filter_fg); + port->mcast.filter_fg = NULL; +} + +static struct mlx5_flow_handle * +mlx5_esw_bridge_mcast_flow_with_esw_create(struct mlx5_esw_bridge_port *port, + struct mlx5_eswitch *esw) +{ + struct mlx5_flow_act flow_act = { + .action = MLX5_FLOW_CONTEXT_ACTION_DROP, + .flags = FLOW_ACT_NO_APPEND, + }; + struct mlx5_flow_spec *rule_spec; + struct mlx5_flow_handle *handle; + + rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL); + if (!rule_spec) + return ERR_PTR(-ENOMEM); + + rule_spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + + MLX5_SET(fte_match_param, rule_spec->match_criteria, + misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask()); + MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_for_match(esw, port->vport_num)); + + handle = mlx5_add_flow_rules(port->mcast.ft, rule_spec, &flow_act, NULL, 0); + + kvfree(rule_spec); + return handle; +} + +static struct mlx5_flow_handle * +mlx5_esw_bridge_mcast_filter_flow_create(struct mlx5_esw_bridge_port *port) +{ + return mlx5_esw_bridge_mcast_flow_with_esw_create(port, port->bridge->br_offloads->esw); +} + +static struct mlx5_flow_handle * +mlx5_esw_bridge_mcast_filter_flow_peer_create(struct mlx5_esw_bridge_port *port) +{ + struct mlx5_devcom *devcom = port->bridge->br_offloads->esw->dev->priv.devcom; + static struct mlx5_flow_handle *handle; + struct mlx5_eswitch *peer_esw; + + peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + if (!peer_esw) + return ERR_PTR(-ENODEV); + + handle = mlx5_esw_bridge_mcast_flow_with_esw_create(port, peer_esw); + + mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + return handle; +} + +static struct mlx5_flow_handle * +mlx5_esw_bridge_mcast_vlan_flow_create(u16 vlan_proto, struct mlx5_esw_bridge_port *port, + struct mlx5_esw_bridge_vlan *vlan) +{ + struct mlx5_flow_act flow_act = { + .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + .flags = FLOW_ACT_NO_APPEND, + }; + struct mlx5_flow_destination dest = { + .type = MLX5_FLOW_DESTINATION_TYPE_VPORT, + .vport.num = port->vport_num, + }; + struct mlx5_esw_bridge *bridge = port->bridge; + struct mlx5_flow_spec *rule_spec; + struct mlx5_flow_handle *handle; + + rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL); + if (!rule_spec) + return ERR_PTR(-ENOMEM); + + if (MLX5_CAP_ESW_FLOWTABLE(bridge->br_offloads->esw->dev, flow_source) && + port->vport_num == MLX5_VPORT_UPLINK) + rule_spec->flow_context.flow_source = + MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT; + rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + flow_act.pkt_reformat = vlan->pkt_reformat_pop; + + if (vlan_proto == ETH_P_8021Q) { + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, + outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value, + outer_headers.cvlan_tag); + } else if (vlan_proto == ETH_P_8021AD) { + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, + outer_headers.svlan_tag); + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value, + outer_headers.svlan_tag); + } + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, outer_headers.first_vid); + MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.first_vid, vlan->vid); + + if (MLX5_CAP_ESW(bridge->br_offloads->esw->dev, merged_eswitch)) { + dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID; + dest.vport.vhca_id = port->esw_owner_vhca_id; + } + handle = mlx5_add_flow_rules(port->mcast.ft, rule_spec, &flow_act, &dest, 1); + + kvfree(rule_spec); + return handle; +} + +int mlx5_esw_bridge_vlan_mcast_init(u16 vlan_proto, struct mlx5_esw_bridge_port *port, + struct mlx5_esw_bridge_vlan *vlan) +{ + struct mlx5_flow_handle *handle; + + if (!(port->bridge->flags & MLX5_ESW_BRIDGE_MCAST_FLAG)) + return 0; + + handle = mlx5_esw_bridge_mcast_vlan_flow_create(vlan_proto, port, vlan); + if (IS_ERR(handle)) + return PTR_ERR(handle); + + vlan->mcast_handle = handle; + return 0; +} + +void mlx5_esw_bridge_vlan_mcast_cleanup(struct mlx5_esw_bridge_vlan *vlan) +{ + if (vlan->mcast_handle) + mlx5_del_flow_rules(vlan->mcast_handle); + vlan->mcast_handle = NULL; +} + +static struct mlx5_flow_handle * +mlx5_esw_bridge_mcast_fwd_flow_create(struct mlx5_esw_bridge_port *port) +{ + struct mlx5_flow_act flow_act = { + .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + .flags = FLOW_ACT_NO_APPEND, + }; + struct mlx5_flow_destination dest = { + .type = MLX5_FLOW_DESTINATION_TYPE_VPORT, + .vport.num = port->vport_num, + }; + struct mlx5_esw_bridge *bridge = port->bridge; + struct mlx5_flow_spec *rule_spec; + struct mlx5_flow_handle *handle; + + rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL); + if (!rule_spec) + return ERR_PTR(-ENOMEM); + + if (MLX5_CAP_ESW_FLOWTABLE(bridge->br_offloads->esw->dev, flow_source) && + port->vport_num == MLX5_VPORT_UPLINK) + rule_spec->flow_context.flow_source = + MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT; + + if (MLX5_CAP_ESW(bridge->br_offloads->esw->dev, merged_eswitch)) { + dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID; + dest.vport.vhca_id = port->esw_owner_vhca_id; + } + handle = mlx5_add_flow_rules(port->mcast.ft, rule_spec, &flow_act, &dest, 1); + + kvfree(rule_spec); + return handle; +} + +static int mlx5_esw_bridge_port_mcast_fhs_init(struct mlx5_esw_bridge_port *port) +{ + struct mlx5_flow_handle *filter_handle, *fwd_handle; + struct mlx5_esw_bridge_vlan *vlan, *failed; + unsigned long index; + int err; + + + filter_handle = (port->flags & MLX5_ESW_BRIDGE_PORT_FLAG_PEER) ? + mlx5_esw_bridge_mcast_filter_flow_peer_create(port) : + mlx5_esw_bridge_mcast_filter_flow_create(port); + if (IS_ERR(filter_handle)) + return PTR_ERR(filter_handle); + + fwd_handle = mlx5_esw_bridge_mcast_fwd_flow_create(port); + if (IS_ERR(fwd_handle)) { + err = PTR_ERR(fwd_handle); + goto err_fwd; + } + + xa_for_each(&port->vlans, index, vlan) { + err = mlx5_esw_bridge_vlan_mcast_init(port->bridge->vlan_proto, port, vlan); + if (err) { + failed = vlan; + goto err_vlan; + } + } + + port->mcast.filter_handle = filter_handle; + port->mcast.fwd_handle = fwd_handle; + + return 0; + +err_vlan: + xa_for_each(&port->vlans, index, vlan) { + if (vlan == failed) + break; + + mlx5_esw_bridge_vlan_mcast_cleanup(vlan); + } + mlx5_del_flow_rules(fwd_handle); +err_fwd: + mlx5_del_flow_rules(filter_handle); + return err; +} + +static void mlx5_esw_bridge_port_mcast_fhs_cleanup(struct mlx5_esw_bridge_port *port) +{ + struct mlx5_esw_bridge_vlan *vlan; + unsigned long index; + + xa_for_each(&port->vlans, index, vlan) + mlx5_esw_bridge_vlan_mcast_cleanup(vlan); + + if (port->mcast.fwd_handle) + mlx5_del_flow_rules(port->mcast.fwd_handle); + port->mcast.fwd_handle = NULL; + if (port->mcast.filter_handle) + mlx5_del_flow_rules(port->mcast.filter_handle); + port->mcast.filter_handle = NULL; +} + +int mlx5_esw_bridge_port_mcast_init(struct mlx5_esw_bridge_port *port) +{ + struct mlx5_esw_bridge *bridge = port->bridge; + int err; + + if (!(bridge->flags & MLX5_ESW_BRIDGE_MCAST_FLAG)) + return 0; + + err = mlx5_esw_bridge_port_mcast_fts_init(port, bridge); + if (err) + return err; + + err = mlx5_esw_bridge_port_mcast_fgs_init(port); + if (err) + goto err_fgs; + + err = mlx5_esw_bridge_port_mcast_fhs_init(port); + if (err) + goto err_fhs; + return err; + +err_fhs: + mlx5_esw_bridge_port_mcast_fgs_cleanup(port); +err_fgs: + mlx5_esw_bridge_port_mcast_fts_cleanup(port); + return err; +} + +void mlx5_esw_bridge_port_mcast_cleanup(struct mlx5_esw_bridge_port *port) +{ + mlx5_esw_bridge_port_mdb_flush(port); + mlx5_esw_bridge_port_mcast_fhs_cleanup(port); + mlx5_esw_bridge_port_mcast_fgs_cleanup(port); + mlx5_esw_bridge_port_mcast_fts_cleanup(port); +} + +static struct mlx5_flow_group * +mlx5_esw_bridge_ingress_igmp_fg_create(struct mlx5_eswitch *esw, + struct mlx5_flow_table *ingress_ft) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *fg; + u32 *in, *match; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return ERR_PTR(-ENOMEM); + + MLX5_SET(create_flow_group_in, in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.ip_version); + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.ip_protocol); + + MLX5_SET(create_flow_group_in, in, start_flow_index, + MLX5_ESW_BRIDGE_INGRESS_TABLE_IGMP_GRP_IDX_FROM); + MLX5_SET(create_flow_group_in, in, end_flow_index, + MLX5_ESW_BRIDGE_INGRESS_TABLE_IGMP_GRP_IDX_TO); + + fg = mlx5_create_flow_group(ingress_ft, in); + kvfree(in); + if (IS_ERR(fg)) + esw_warn(esw->dev, + "Failed to create IGMP flow group for bridge ingress table (err=%pe)\n", + fg); + + return fg; +} + +static struct mlx5_flow_group * +mlx5_esw_bridge_ingress_mld_fg_create(struct mlx5_eswitch *esw, + struct mlx5_flow_table *ingress_ft) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *fg; + u32 *in, *match; + + if (!(MLX5_CAP_GEN(esw->dev, flex_parser_protocols) & MLX5_FLEX_PROTO_ICMPV6)) { + esw_warn(esw->dev, + "Can't create MLD flow group due to missing hardware ICMPv6 parsing support\n"); + return NULL; + } + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return ERR_PTR(-ENOMEM); + + MLX5_SET(create_flow_group_in, in, match_criteria_enable, + MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS_3); + match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.ip_version); + MLX5_SET_TO_ONES(fte_match_param, match, misc_parameters_3.icmpv6_type); + + MLX5_SET(create_flow_group_in, in, start_flow_index, + MLX5_ESW_BRIDGE_INGRESS_TABLE_MLD_GRP_IDX_FROM); + MLX5_SET(create_flow_group_in, in, end_flow_index, + MLX5_ESW_BRIDGE_INGRESS_TABLE_MLD_GRP_IDX_TO); + + fg = mlx5_create_flow_group(ingress_ft, in); + kvfree(in); + if (IS_ERR(fg)) + esw_warn(esw->dev, + "Failed to create MLD flow group for bridge ingress table (err=%pe)\n", + fg); + + return fg; +} + +static int +mlx5_esw_bridge_ingress_mcast_fgs_init(struct mlx5_esw_bridge_offloads *br_offloads) +{ + struct mlx5_flow_table *ingress_ft = br_offloads->ingress_ft; + struct mlx5_eswitch *esw = br_offloads->esw; + struct mlx5_flow_group *igmp_fg, *mld_fg; + + igmp_fg = mlx5_esw_bridge_ingress_igmp_fg_create(esw, ingress_ft); + if (IS_ERR(igmp_fg)) + return PTR_ERR(igmp_fg); + + mld_fg = mlx5_esw_bridge_ingress_mld_fg_create(esw, ingress_ft); + if (IS_ERR(mld_fg)) { + mlx5_destroy_flow_group(igmp_fg); + return PTR_ERR(mld_fg); + } + + br_offloads->ingress_igmp_fg = igmp_fg; + br_offloads->ingress_mld_fg = mld_fg; + return 0; +} + +static void +mlx5_esw_bridge_ingress_mcast_fgs_cleanup(struct mlx5_esw_bridge_offloads *br_offloads) +{ + if (br_offloads->ingress_mld_fg) + mlx5_destroy_flow_group(br_offloads->ingress_mld_fg); + br_offloads->ingress_mld_fg = NULL; + if (br_offloads->ingress_igmp_fg) + mlx5_destroy_flow_group(br_offloads->ingress_igmp_fg); + br_offloads->ingress_igmp_fg = NULL; +} + +static struct mlx5_flow_handle * +mlx5_esw_bridge_ingress_igmp_fh_create(struct mlx5_flow_table *ingress_ft, + struct mlx5_flow_table *skip_ft) +{ + struct mlx5_flow_destination dest = { + .type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE, + .ft = skip_ft, + }; + struct mlx5_flow_act flow_act = { + .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + .flags = FLOW_ACT_NO_APPEND, + }; + struct mlx5_flow_spec *rule_spec; + struct mlx5_flow_handle *handle; + + rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL); + if (!rule_spec) + return ERR_PTR(-ENOMEM); + + rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, outer_headers.ip_version); + MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.ip_version, 4); + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, outer_headers.ip_protocol); + MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.ip_protocol, IPPROTO_IGMP); + + handle = mlx5_add_flow_rules(ingress_ft, rule_spec, &flow_act, &dest, 1); + + kvfree(rule_spec); + return handle; +} + +static struct mlx5_flow_handle * +mlx5_esw_bridge_ingress_mld_fh_create(u8 type, struct mlx5_flow_table *ingress_ft, + struct mlx5_flow_table *skip_ft) +{ + struct mlx5_flow_destination dest = { + .type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE, + .ft = skip_ft, + }; + struct mlx5_flow_act flow_act = { + .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + .flags = FLOW_ACT_NO_APPEND, + }; + struct mlx5_flow_spec *rule_spec; + struct mlx5_flow_handle *handle; + + rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL); + if (!rule_spec) + return ERR_PTR(-ENOMEM); + + rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS_3; + + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, outer_headers.ip_version); + MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.ip_version, 6); + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, misc_parameters_3.icmpv6_type); + MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_3.icmpv6_type, type); + + handle = mlx5_add_flow_rules(ingress_ft, rule_spec, &flow_act, &dest, 1); + + kvfree(rule_spec); + return handle; +} + +static int +mlx5_esw_bridge_ingress_mcast_fhs_create(struct mlx5_esw_bridge_offloads *br_offloads) +{ + struct mlx5_flow_handle *igmp_handle, *mld_query_handle, *mld_report_handle, + *mld_done_handle; + struct mlx5_flow_table *ingress_ft = br_offloads->ingress_ft, + *skip_ft = br_offloads->skip_ft; + int err; + + igmp_handle = mlx5_esw_bridge_ingress_igmp_fh_create(ingress_ft, skip_ft); + if (IS_ERR(igmp_handle)) + return PTR_ERR(igmp_handle); + + if (br_offloads->ingress_mld_fg) { + mld_query_handle = mlx5_esw_bridge_ingress_mld_fh_create(ICMPV6_MGM_QUERY, + ingress_ft, + skip_ft); + if (IS_ERR(mld_query_handle)) { + err = PTR_ERR(mld_query_handle); + goto err_mld_query; + } + + mld_report_handle = mlx5_esw_bridge_ingress_mld_fh_create(ICMPV6_MGM_REPORT, + ingress_ft, + skip_ft); + if (IS_ERR(mld_report_handle)) { + err = PTR_ERR(mld_report_handle); + goto err_mld_report; + } + + mld_done_handle = mlx5_esw_bridge_ingress_mld_fh_create(ICMPV6_MGM_REDUCTION, + ingress_ft, + skip_ft); + if (IS_ERR(mld_done_handle)) { + err = PTR_ERR(mld_done_handle); + goto err_mld_done; + } + } else { + mld_query_handle = NULL; + mld_report_handle = NULL; + mld_done_handle = NULL; + } + + br_offloads->igmp_handle = igmp_handle; + br_offloads->mld_query_handle = mld_query_handle; + br_offloads->mld_report_handle = mld_report_handle; + br_offloads->mld_done_handle = mld_done_handle; + + return 0; + +err_mld_done: + mlx5_del_flow_rules(mld_report_handle); +err_mld_report: + mlx5_del_flow_rules(mld_query_handle); +err_mld_query: + mlx5_del_flow_rules(igmp_handle); + return err; +} + +static void +mlx5_esw_bridge_ingress_mcast_fhs_cleanup(struct mlx5_esw_bridge_offloads *br_offloads) +{ + if (br_offloads->mld_done_handle) + mlx5_del_flow_rules(br_offloads->mld_done_handle); + br_offloads->mld_done_handle = NULL; + if (br_offloads->mld_report_handle) + mlx5_del_flow_rules(br_offloads->mld_report_handle); + br_offloads->mld_report_handle = NULL; + if (br_offloads->mld_query_handle) + mlx5_del_flow_rules(br_offloads->mld_query_handle); + br_offloads->mld_query_handle = NULL; + if (br_offloads->igmp_handle) + mlx5_del_flow_rules(br_offloads->igmp_handle); + br_offloads->igmp_handle = NULL; +} + +static int mlx5_esw_brige_mcast_init(struct mlx5_esw_bridge *bridge) +{ + struct mlx5_esw_bridge_offloads *br_offloads = bridge->br_offloads; + struct mlx5_esw_bridge_port *port, *failed; + unsigned long i; + int err; + + xa_for_each(&br_offloads->ports, i, port) { + if (port->bridge != bridge) + continue; + + err = mlx5_esw_bridge_port_mcast_init(port); + if (err) { + failed = port; + goto err_port; + } + } + return 0; + +err_port: + xa_for_each(&br_offloads->ports, i, port) { + if (port == failed) + break; + if (port->bridge != bridge) + continue; + + mlx5_esw_bridge_port_mcast_cleanup(port); + } + return err; +} + +static void mlx5_esw_brige_mcast_cleanup(struct mlx5_esw_bridge *bridge) +{ + struct mlx5_esw_bridge_offloads *br_offloads = bridge->br_offloads; + struct mlx5_esw_bridge_port *port; + unsigned long i; + + xa_for_each(&br_offloads->ports, i, port) { + if (port->bridge != bridge) + continue; + + mlx5_esw_bridge_port_mcast_cleanup(port); + } +} + +static int mlx5_esw_brige_mcast_global_enable(struct mlx5_esw_bridge_offloads *br_offloads) +{ + int err; + + if (br_offloads->ingress_igmp_fg) + return 0; /* already enabled by another bridge */ + + err = mlx5_esw_bridge_ingress_mcast_fgs_init(br_offloads); + if (err) { + esw_warn(br_offloads->esw->dev, + "Failed to create global multicast flow groups (err=%d)\n", + err); + return err; + } + + err = mlx5_esw_bridge_ingress_mcast_fhs_create(br_offloads); + if (err) { + esw_warn(br_offloads->esw->dev, + "Failed to create global multicast flows (err=%d)\n", + err); + goto err_fhs; + } + + return 0; + +err_fhs: + mlx5_esw_bridge_ingress_mcast_fgs_cleanup(br_offloads); + return err; +} + +static void mlx5_esw_brige_mcast_global_disable(struct mlx5_esw_bridge_offloads *br_offloads) +{ + struct mlx5_esw_bridge *br; + + list_for_each_entry(br, &br_offloads->bridges, list) { + /* Ingress table is global, so only disable snooping when all + * bridges on esw have multicast disabled. + */ + if (br->flags & MLX5_ESW_BRIDGE_MCAST_FLAG) + return; + } + + mlx5_esw_bridge_ingress_mcast_fhs_cleanup(br_offloads); + mlx5_esw_bridge_ingress_mcast_fgs_cleanup(br_offloads); +} + +int mlx5_esw_bridge_mcast_enable(struct mlx5_esw_bridge *bridge) +{ + int err; + + err = mlx5_esw_brige_mcast_global_enable(bridge->br_offloads); + if (err) + return err; + + bridge->flags |= MLX5_ESW_BRIDGE_MCAST_FLAG; + + err = mlx5_esw_brige_mcast_init(bridge); + if (err) { + esw_warn(bridge->br_offloads->esw->dev, "Failed to enable multicast (err=%d)\n", + err); + bridge->flags &= ~MLX5_ESW_BRIDGE_MCAST_FLAG; + mlx5_esw_brige_mcast_global_disable(bridge->br_offloads); + } + return err; +} + +void mlx5_esw_bridge_mcast_disable(struct mlx5_esw_bridge *bridge) +{ + mlx5_esw_brige_mcast_cleanup(bridge); + bridge->flags &= ~MLX5_ESW_BRIDGE_MCAST_FLAG; + mlx5_esw_brige_mcast_global_disable(bridge->br_offloads); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h index 878311fe950a..c9595801bdb4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h @@ -12,11 +12,124 @@ #include <linux/xarray.h> #include "fs_core.h" +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_IGMP_GRP_SIZE 1 +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MLD_GRP_SIZE 3 +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE 131072 +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_UNTAGGED_GRP_SIZE \ + (524288 - MLX5_ESW_BRIDGE_INGRESS_TABLE_IGMP_GRP_SIZE - \ + MLX5_ESW_BRIDGE_INGRESS_TABLE_MLD_GRP_SIZE) + +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_IGMP_GRP_IDX_FROM 0 +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_IGMP_GRP_IDX_TO \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_IGMP_GRP_SIZE - 1) +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MLD_GRP_IDX_FROM \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_IGMP_GRP_IDX_TO + 1) +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MLD_GRP_IDX_TO \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_MLD_GRP_IDX_FROM + \ + MLX5_ESW_BRIDGE_INGRESS_TABLE_MLD_GRP_SIZE - 1) +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_FROM \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_MLD_GRP_IDX_TO + 1) +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_TO \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_FROM + \ + MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1) +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_FROM \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_TO + 1) +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_TO \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_FROM + \ + MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1) +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_FROM \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_TO + 1) +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_TO \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_FROM + \ + MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1) +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_FROM \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_TO + 1) +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_TO \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_FROM + \ + MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1) +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_FROM \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_TO + 1) +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_TO \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_FROM + \ + MLX5_ESW_BRIDGE_INGRESS_TABLE_UNTAGGED_GRP_SIZE - 1) +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_TO + 1) +static_assert(MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE == 1048576); + +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_SIZE 131072 +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_SIZE (262144 - 1) +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_FROM 0 +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_TO \ + (MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_SIZE - 1) +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_FROM \ + (MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_TO + 1) +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_TO \ + (MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_FROM + \ + MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_SIZE - 1) +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_FROM \ + (MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_TO + 1) +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_TO \ + (MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_FROM + \ + MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_SIZE - 1) +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_FROM \ + (MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_TO + 1) +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_TO \ + MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_FROM +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE \ + (MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_TO + 1) +static_assert(MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE == 524288); + +#define MLX5_ESW_BRIDGE_SKIP_TABLE_SIZE 0 + +#define MLX5_ESW_BRIDGE_MCAST_TABLE_FILTER_GRP_SIZE 1 +#define MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_SIZE 1 +#define MLX5_ESW_BRIDGE_MCAST_TABLE_VLAN_GRP_SIZE 4095 +#define MLX5_ESW_BRIDGE_MCAST_TABLE_QINQ_GRP_SIZE MLX5_ESW_BRIDGE_MCAST_TABLE_VLAN_GRP_SIZE +#define MLX5_ESW_BRIDGE_MCAST_TABLE_FILTER_GRP_IDX_FROM 0 +#define MLX5_ESW_BRIDGE_MCAST_TABLE_FILTER_GRP_IDX_TO \ + (MLX5_ESW_BRIDGE_MCAST_TABLE_FILTER_GRP_SIZE - 1) +#define MLX5_ESW_BRIDGE_MCAST_TABLE_VLAN_GRP_IDX_FROM \ + (MLX5_ESW_BRIDGE_MCAST_TABLE_FILTER_GRP_IDX_TO + 1) +#define MLX5_ESW_BRIDGE_MCAST_TABLE_VLAN_GRP_IDX_TO \ + (MLX5_ESW_BRIDGE_MCAST_TABLE_VLAN_GRP_IDX_FROM + \ + MLX5_ESW_BRIDGE_MCAST_TABLE_VLAN_GRP_SIZE - 1) +#define MLX5_ESW_BRIDGE_MCAST_TABLE_QINQ_GRP_IDX_FROM \ + (MLX5_ESW_BRIDGE_MCAST_TABLE_VLAN_GRP_IDX_TO + 1) +#define MLX5_ESW_BRIDGE_MCAST_TABLE_QINQ_GRP_IDX_TO \ + (MLX5_ESW_BRIDGE_MCAST_TABLE_QINQ_GRP_IDX_FROM + \ + MLX5_ESW_BRIDGE_MCAST_TABLE_QINQ_GRP_SIZE - 1) +#define MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_IDX_FROM \ + (MLX5_ESW_BRIDGE_MCAST_TABLE_QINQ_GRP_IDX_TO + 1) +#define MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_IDX_TO \ + (MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_IDX_FROM + \ + MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_SIZE - 1) + +#define MLX5_ESW_BRIDGE_MCAST_TABLE_SIZE \ + (MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_IDX_TO + 1) +static_assert(MLX5_ESW_BRIDGE_MCAST_TABLE_SIZE == 8192); + +enum { + MLX5_ESW_BRIDGE_LEVEL_INGRESS_TABLE, + MLX5_ESW_BRIDGE_LEVEL_EGRESS_TABLE, + MLX5_ESW_BRIDGE_LEVEL_MCAST_TABLE, + MLX5_ESW_BRIDGE_LEVEL_SKIP_TABLE, +}; + +enum { + MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG = BIT(0), + MLX5_ESW_BRIDGE_MCAST_FLAG = BIT(1), +}; + struct mlx5_esw_bridge_fdb_key { unsigned char addr[ETH_ALEN]; u16 vid; }; +struct mlx5_esw_bridge_mdb_key { + unsigned char addr[ETH_ALEN]; + u16 vid; +}; + enum { MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER = BIT(0), MLX5_ESW_BRIDGE_FLAG_PEER = BIT(1), @@ -43,6 +156,16 @@ struct mlx5_esw_bridge_fdb_entry { struct mlx5_flow_handle *filter_handle; }; +struct mlx5_esw_bridge_mdb_entry { + struct mlx5_esw_bridge_mdb_key key; + struct rhash_head ht_node; + struct list_head list; + struct xarray ports; + int num_ports; + + struct mlx5_flow_handle *egress_handle; +}; + struct mlx5_esw_bridge_vlan { u16 vid; u16 flags; @@ -50,6 +173,7 @@ struct mlx5_esw_bridge_vlan { struct mlx5_pkt_reformat *pkt_reformat_push; struct mlx5_pkt_reformat *pkt_reformat_pop; struct mlx5_modify_hdr *pkt_mod_hdr_push_mark; + struct mlx5_flow_handle *mcast_handle; }; struct mlx5_esw_bridge_port { @@ -58,6 +182,63 @@ struct mlx5_esw_bridge_port { u16 flags; struct mlx5_esw_bridge *bridge; struct xarray vlans; + struct { + struct mlx5_flow_table *ft; + struct mlx5_flow_group *filter_fg; + struct mlx5_flow_group *vlan_fg; + struct mlx5_flow_group *qinq_fg; + struct mlx5_flow_group *fwd_fg; + + struct mlx5_flow_handle *filter_handle; + struct mlx5_flow_handle *fwd_handle; + } mcast; }; +struct mlx5_esw_bridge { + int ifindex; + int refcnt; + struct list_head list; + struct mlx5_esw_bridge_offloads *br_offloads; + + struct list_head fdb_list; + struct rhashtable fdb_ht; + + struct list_head mdb_list; + struct rhashtable mdb_ht; + + struct mlx5_flow_table *egress_ft; + struct mlx5_flow_group *egress_vlan_fg; + struct mlx5_flow_group *egress_qinq_fg; + struct mlx5_flow_group *egress_mac_fg; + struct mlx5_flow_group *egress_miss_fg; + struct mlx5_pkt_reformat *egress_miss_pkt_reformat; + struct mlx5_flow_handle *egress_miss_handle; + unsigned long ageing_time; + u32 flags; + u16 vlan_proto; +}; + +struct mlx5_flow_table *mlx5_esw_bridge_table_create(int max_fte, u32 level, + struct mlx5_eswitch *esw); +unsigned long mlx5_esw_bridge_port_key(struct mlx5_esw_bridge_port *port); + +int mlx5_esw_bridge_port_mcast_init(struct mlx5_esw_bridge_port *port); +void mlx5_esw_bridge_port_mcast_cleanup(struct mlx5_esw_bridge_port *port); +int mlx5_esw_bridge_vlan_mcast_init(u16 vlan_proto, struct mlx5_esw_bridge_port *port, + struct mlx5_esw_bridge_vlan *vlan); +void mlx5_esw_bridge_vlan_mcast_cleanup(struct mlx5_esw_bridge_vlan *vlan); + +int mlx5_esw_bridge_mcast_enable(struct mlx5_esw_bridge *bridge); +void mlx5_esw_bridge_mcast_disable(struct mlx5_esw_bridge *bridge); + +int mlx5_esw_bridge_mdb_init(struct mlx5_esw_bridge *bridge); +void mlx5_esw_bridge_mdb_cleanup(struct mlx5_esw_bridge *bridge); +int mlx5_esw_bridge_port_mdb_attach(struct net_device *dev, struct mlx5_esw_bridge_port *port, + const unsigned char *addr, u16 vid); +void mlx5_esw_bridge_port_mdb_detach(struct net_device *dev, struct mlx5_esw_bridge_port *port, + const unsigned char *addr, u16 vid); +void mlx5_esw_bridge_port_mdb_vlan_flush(struct mlx5_esw_bridge_port *port, + struct mlx5_esw_bridge_vlan *vlan); +void mlx5_esw_bridge_mdb_flush(struct mlx5_esw_bridge *bridge); + #endif /* _MLX5_ESW_BRIDGE_PRIVATE_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c deleted file mode 100644 index 3d0bbcca1cb9..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c +++ /dev/null @@ -1,198 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB -/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ - -#include <linux/debugfs.h> -#include "eswitch.h" - -enum vnic_diag_counter { - MLX5_VNIC_DIAG_TOTAL_Q_UNDER_PROCESSOR_HANDLE, - MLX5_VNIC_DIAG_SEND_QUEUE_PRIORITY_UPDATE_FLOW, - MLX5_VNIC_DIAG_COMP_EQ_OVERRUN, - MLX5_VNIC_DIAG_ASYNC_EQ_OVERRUN, - MLX5_VNIC_DIAG_CQ_OVERRUN, - MLX5_VNIC_DIAG_INVALID_COMMAND, - MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND, - MLX5_VNIC_DIAG_RX_STEERING_DISCARD, -}; - -static int mlx5_esw_query_vnic_diag(struct mlx5_vport *vport, enum vnic_diag_counter counter, - u64 *val) -{ - u32 out[MLX5_ST_SZ_DW(query_vnic_env_out)] = {}; - u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {}; - struct mlx5_core_dev *dev = vport->dev; - u16 vport_num = vport->vport; - void *vnic_diag_out; - int err; - - MLX5_SET(query_vnic_env_in, in, opcode, MLX5_CMD_OP_QUERY_VNIC_ENV); - MLX5_SET(query_vnic_env_in, in, vport_number, vport_num); - if (!mlx5_esw_is_manager_vport(dev->priv.eswitch, vport_num)) - MLX5_SET(query_vnic_env_in, in, other_vport, 1); - - err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); - if (err) - return err; - - vnic_diag_out = MLX5_ADDR_OF(query_vnic_env_out, out, vport_env); - switch (counter) { - case MLX5_VNIC_DIAG_TOTAL_Q_UNDER_PROCESSOR_HANDLE: - *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, total_error_queues); - break; - case MLX5_VNIC_DIAG_SEND_QUEUE_PRIORITY_UPDATE_FLOW: - *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, - send_queue_priority_update_flow); - break; - case MLX5_VNIC_DIAG_COMP_EQ_OVERRUN: - *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, comp_eq_overrun); - break; - case MLX5_VNIC_DIAG_ASYNC_EQ_OVERRUN: - *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, async_eq_overrun); - break; - case MLX5_VNIC_DIAG_CQ_OVERRUN: - *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, cq_overrun); - break; - case MLX5_VNIC_DIAG_INVALID_COMMAND: - *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, invalid_command); - break; - case MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND: - *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, quota_exceeded_command); - break; - case MLX5_VNIC_DIAG_RX_STEERING_DISCARD: - *val = MLX5_GET64(vnic_diagnostic_statistics, vnic_diag_out, - nic_receive_steering_discard); - break; - } - - return 0; -} - -static int __show_vnic_diag(struct seq_file *file, struct mlx5_vport *vport, - enum vnic_diag_counter type) -{ - u64 val = 0; - int ret; - - ret = mlx5_esw_query_vnic_diag(vport, type, &val); - if (ret) - return ret; - - seq_printf(file, "%llu\n", val); - return 0; -} - -static int total_q_under_processor_handle_show(struct seq_file *file, void *priv) -{ - return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_TOTAL_Q_UNDER_PROCESSOR_HANDLE); -} - -static int send_queue_priority_update_flow_show(struct seq_file *file, void *priv) -{ - return __show_vnic_diag(file, file->private, - MLX5_VNIC_DIAG_SEND_QUEUE_PRIORITY_UPDATE_FLOW); -} - -static int comp_eq_overrun_show(struct seq_file *file, void *priv) -{ - return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_COMP_EQ_OVERRUN); -} - -static int async_eq_overrun_show(struct seq_file *file, void *priv) -{ - return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_ASYNC_EQ_OVERRUN); -} - -static int cq_overrun_show(struct seq_file *file, void *priv) -{ - return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_CQ_OVERRUN); -} - -static int invalid_command_show(struct seq_file *file, void *priv) -{ - return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_INVALID_COMMAND); -} - -static int quota_exceeded_command_show(struct seq_file *file, void *priv) -{ - return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND); -} - -static int rx_steering_discard_show(struct seq_file *file, void *priv) -{ - return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_RX_STEERING_DISCARD); -} - -DEFINE_SHOW_ATTRIBUTE(total_q_under_processor_handle); -DEFINE_SHOW_ATTRIBUTE(send_queue_priority_update_flow); -DEFINE_SHOW_ATTRIBUTE(comp_eq_overrun); -DEFINE_SHOW_ATTRIBUTE(async_eq_overrun); -DEFINE_SHOW_ATTRIBUTE(cq_overrun); -DEFINE_SHOW_ATTRIBUTE(invalid_command); -DEFINE_SHOW_ATTRIBUTE(quota_exceeded_command); -DEFINE_SHOW_ATTRIBUTE(rx_steering_discard); - -void mlx5_esw_vport_debugfs_destroy(struct mlx5_eswitch *esw, u16 vport_num) -{ - struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); - - debugfs_remove_recursive(vport->dbgfs); - vport->dbgfs = NULL; -} - -/* vnic diag dir name is "pf", "ecpf" or "{vf/sf}_xxxx" */ -#define VNIC_DIAG_DIR_NAME_MAX_LEN 8 - -void mlx5_esw_vport_debugfs_create(struct mlx5_eswitch *esw, u16 vport_num, bool is_sf, u16 sf_num) -{ - struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); - struct dentry *vnic_diag; - char dir_name[VNIC_DIAG_DIR_NAME_MAX_LEN]; - int err; - - if (!MLX5_CAP_GEN(esw->dev, vport_group_manager)) - return; - - if (vport_num == MLX5_VPORT_PF) { - strcpy(dir_name, "pf"); - } else if (vport_num == MLX5_VPORT_ECPF) { - strcpy(dir_name, "ecpf"); - } else { - err = snprintf(dir_name, VNIC_DIAG_DIR_NAME_MAX_LEN, "%s_%d", is_sf ? "sf" : "vf", - is_sf ? sf_num : vport_num - MLX5_VPORT_FIRST_VF); - if (WARN_ON(err < 0)) - return; - } - - vport->dbgfs = debugfs_create_dir(dir_name, esw->dbgfs); - vnic_diag = debugfs_create_dir("vnic_diag", vport->dbgfs); - - if (MLX5_CAP_GEN(esw->dev, vnic_env_queue_counters)) { - debugfs_create_file("total_q_under_processor_handle", 0444, vnic_diag, vport, - &total_q_under_processor_handle_fops); - debugfs_create_file("send_queue_priority_update_flow", 0444, vnic_diag, vport, - &send_queue_priority_update_flow_fops); - } - - if (MLX5_CAP_GEN(esw->dev, eq_overrun_count)) { - debugfs_create_file("comp_eq_overrun", 0444, vnic_diag, vport, - &comp_eq_overrun_fops); - debugfs_create_file("async_eq_overrun", 0444, vnic_diag, vport, - &async_eq_overrun_fops); - } - - if (MLX5_CAP_GEN(esw->dev, vnic_env_cq_overrun)) - debugfs_create_file("cq_overrun", 0444, vnic_diag, vport, &cq_overrun_fops); - - if (MLX5_CAP_GEN(esw->dev, invalid_command_count)) - debugfs_create_file("invalid_command", 0444, vnic_diag, vport, - &invalid_command_fops); - - if (MLX5_CAP_GEN(esw->dev, quota_exceeded_count)) - debugfs_create_file("quota_exceeded_command", 0444, vnic_diag, vport, - "a_exceeded_command_fops); - - if (MLX5_CAP_GEN(esw->dev, nic_receive_steering_discard)) - debugfs_create_file("rx_steering_discard", 0444, vnic_diag, vport, - &rx_steering_discard_fops); - -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h index 51ac24e6ec3c..1808da214094 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h @@ -110,6 +110,41 @@ DEFINE_EVENT(mlx5_esw_bridge_port_template, TP_ARGS(port) ); +DECLARE_EVENT_CLASS(mlx5_esw_bridge_mdb_port_change_template, + TP_PROTO(const struct net_device *dev, + const struct mlx5_esw_bridge_mdb_entry *mdb), + TP_ARGS(dev, mdb), + TP_STRUCT__entry( + __array(char, dev_name, IFNAMSIZ) + __array(unsigned char, addr, ETH_ALEN) + __field(u16, vid) + __field(int, num_ports) + __field(bool, offloaded)), + TP_fast_assign( + strscpy(__entry->dev_name, netdev_name(dev), IFNAMSIZ); + memcpy(__entry->addr, mdb->key.addr, ETH_ALEN); + __entry->vid = mdb->key.vid; + __entry->num_ports = mdb->num_ports; + __entry->offloaded = mdb->egress_handle;), + TP_printk("net_device=%s addr=%pM vid=%u num_ports=%d offloaded=%d", + __entry->dev_name, + __entry->addr, + __entry->vid, + __entry->num_ports, + __entry->offloaded)); + +DEFINE_EVENT(mlx5_esw_bridge_mdb_port_change_template, + mlx5_esw_bridge_port_mdb_attach, + TP_PROTO(const struct net_device *dev, + const struct mlx5_esw_bridge_mdb_entry *mdb), + TP_ARGS(dev, mdb)); + +DEFINE_EVENT(mlx5_esw_bridge_mdb_port_change_template, + mlx5_esw_bridge_port_mdb_detach, + TP_PROTO(const struct net_device *dev, + const struct mlx5_esw_bridge_mdb_entry *mdb), + TP_ARGS(dev, mdb)); + #endif /* This part must be outside protection */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index 75015d370922..7c79476cc5f9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -744,7 +744,7 @@ static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char * u64 value; int err; - err = mlx5e_port_max_linkspeed(mdev, &link_speed_max); + err = mlx5_port_max_linkspeed(mdev, &link_speed_max); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed to get link maximum speed"); return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c index 9e72118f2e4c..749c3957a128 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c @@ -11,7 +11,7 @@ struct mlx5_vport_key { u16 prio; u16 vport; u16 vhca_id; - const struct esw_vport_tbl_namespace *vport_ns; + struct esw_vport_tbl_namespace *vport_ns; } __packed; struct mlx5_vport_table { @@ -21,6 +21,14 @@ struct mlx5_vport_table { struct mlx5_vport_key key; }; +static void +esw_vport_tbl_init(struct mlx5_eswitch *esw, struct esw_vport_tbl_namespace *ns) +{ + if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) + ns->flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | + MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); +} + static struct mlx5_flow_table * esw_vport_tbl_create(struct mlx5_eswitch *esw, struct mlx5_flow_namespace *ns, const struct esw_vport_tbl_namespace *vport_ns) @@ -80,6 +88,7 @@ mlx5_esw_vporttbl_get(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr u32 hkey; mutex_lock(&esw->fdb_table.offloads.vports.lock); + esw_vport_tbl_init(esw, attr->vport_ns); hkey = flow_attr_to_vport_key(esw, attr, &skey); e = esw_vport_tbl_lookup(esw, &skey, hkey); if (e) { @@ -127,6 +136,7 @@ mlx5_esw_vporttbl_put(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr u32 hkey; mutex_lock(&esw->fdb_table.offloads.vports.lock); + esw_vport_tbl_init(esw, attr->vport_ns); hkey = flow_attr_to_vport_key(esw, attr, &key); e = esw_vport_tbl_lookup(esw, &key, hkey); if (!e || --e->num_rules) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 19fed514fc17..901c53751b0a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -36,7 +36,6 @@ #include <linux/mlx5/vport.h> #include <linux/mlx5/fs.h> #include <linux/mlx5/mpfs.h> -#include <linux/debugfs.h> #include "esw/acl/lgcy.h" #include "esw/legacy.h" #include "esw/qos.h" @@ -1056,7 +1055,6 @@ int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num, if (err) return err; - mlx5_esw_vport_debugfs_create(esw, vport_num, false, 0); err = esw_offloads_load_rep(esw, vport_num); if (err) goto err_rep; @@ -1064,7 +1062,6 @@ int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num, return err; err_rep: - mlx5_esw_vport_debugfs_destroy(esw, vport_num); mlx5_esw_vport_disable(esw, vport_num); return err; } @@ -1072,7 +1069,6 @@ err_rep: void mlx5_eswitch_unload_vport(struct mlx5_eswitch *esw, u16 vport_num) { esw_offloads_unload_rep(esw, vport_num); - mlx5_esw_vport_debugfs_destroy(esw, vport_num); mlx5_esw_vport_disable(esw, vport_num); } @@ -1510,7 +1506,7 @@ out_free: return err; } -static int mlx5_esw_vport_alloc(struct mlx5_eswitch *esw, struct mlx5_core_dev *dev, +static int mlx5_esw_vport_alloc(struct mlx5_eswitch *esw, int index, u16 vport_num) { struct mlx5_vport *vport; @@ -1564,7 +1560,7 @@ static int mlx5_esw_vports_init(struct mlx5_eswitch *esw) xa_init(&esw->vports); - err = mlx5_esw_vport_alloc(esw, dev, idx, MLX5_VPORT_PF); + err = mlx5_esw_vport_alloc(esw, idx, MLX5_VPORT_PF); if (err) goto err; if (esw->first_host_vport == MLX5_VPORT_PF) @@ -1572,7 +1568,7 @@ static int mlx5_esw_vports_init(struct mlx5_eswitch *esw) idx++; for (i = 0; i < mlx5_core_max_vfs(dev); i++) { - err = mlx5_esw_vport_alloc(esw, dev, idx, idx); + err = mlx5_esw_vport_alloc(esw, idx, idx); if (err) goto err; xa_set_mark(&esw->vports, idx, MLX5_ESW_VPT_VF); @@ -1581,7 +1577,7 @@ static int mlx5_esw_vports_init(struct mlx5_eswitch *esw) } base_sf_num = mlx5_sf_start_function_id(dev); for (i = 0; i < mlx5_sf_max_functions(dev); i++) { - err = mlx5_esw_vport_alloc(esw, dev, idx, base_sf_num + i); + err = mlx5_esw_vport_alloc(esw, idx, base_sf_num + i); if (err) goto err; xa_set_mark(&esw->vports, base_sf_num + i, MLX5_ESW_VPT_SF); @@ -1592,7 +1588,7 @@ static int mlx5_esw_vports_init(struct mlx5_eswitch *esw) if (err) goto err; for (i = 0; i < max_host_pf_sfs; i++) { - err = mlx5_esw_vport_alloc(esw, dev, idx, base_sf_num + i); + err = mlx5_esw_vport_alloc(esw, idx, base_sf_num + i); if (err) goto err; xa_set_mark(&esw->vports, base_sf_num + i, MLX5_ESW_VPT_SF); @@ -1600,12 +1596,12 @@ static int mlx5_esw_vports_init(struct mlx5_eswitch *esw) } if (mlx5_ecpf_vport_exists(dev)) { - err = mlx5_esw_vport_alloc(esw, dev, idx, MLX5_VPORT_ECPF); + err = mlx5_esw_vport_alloc(esw, idx, MLX5_VPORT_ECPF); if (err) goto err; idx++; } - err = mlx5_esw_vport_alloc(esw, dev, idx, MLX5_VPORT_UPLINK); + err = mlx5_esw_vport_alloc(esw, idx, MLX5_VPORT_UPLINK); if (err) goto err; return 0; @@ -1672,7 +1668,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) dev->priv.eswitch = esw; BLOCKING_INIT_NOTIFIER_HEAD(&esw->n_head); - esw->dbgfs = debugfs_create_dir("esw", mlx5_debugfs_get_dev_root(esw->dev)); esw_info(dev, "Total vports %d, per vport: max uc(%d) max mc(%d)\n", esw->total_vports, @@ -1696,7 +1691,6 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) esw_info(esw->dev, "cleanup\n"); - debugfs_remove_recursive(esw->dbgfs); esw->dev->priv.eswitch = NULL; destroy_workqueue(esw->work_queue); WARN_ON(refcount_read(&esw->qos.refcnt)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 19e9a77c4633..1a042c981713 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -195,7 +195,6 @@ struct mlx5_vport { enum mlx5_eswitch_vport_event enabled_events; int index; struct devlink_port *dl_port; - struct dentry *dbgfs; }; struct mlx5_esw_indir_table; @@ -263,6 +262,7 @@ struct mlx5_esw_offload { const struct mlx5_eswitch_rep_ops *rep_ops[NUM_REP_TYPES]; u8 inline_mode; atomic64_t num_flows; + u64 num_block_encap; enum devlink_eswitch_encap_mode encap; struct ida vport_metadata_ida; unsigned int host_number; /* ECPF supports one external host */ @@ -342,7 +342,6 @@ struct mlx5_eswitch { u32 large_group_num; } params; struct blocking_notifier_head n_head; - struct dentry *dbgfs; }; void esw_offloads_disable(struct mlx5_eswitch *esw); @@ -355,7 +354,6 @@ mlx5_eswitch_add_send_to_vport_meta_rule(struct mlx5_eswitch *esw, u16 vport_num void mlx5_eswitch_del_send_to_vport_meta_rule(struct mlx5_flow_handle *rule); bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw); -int mlx5_esw_offloads_vport_metadata_set(struct mlx5_eswitch *esw, bool enable); u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw); void mlx5_esw_match_metadata_free(struct mlx5_eswitch *esw, u32 metadata); @@ -674,7 +672,7 @@ struct mlx5_vport_tbl_attr { u32 chain; u16 prio; u16 vport; - const struct esw_vport_tbl_namespace *vport_ns; + struct esw_vport_tbl_namespace *vport_ns; }; struct mlx5_flow_table * @@ -703,9 +701,6 @@ int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_ void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vport_num); struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u16 vport_num); -void mlx5_esw_vport_debugfs_create(struct mlx5_eswitch *esw, u16 vport_num, bool is_sf, u16 sf_num); -void mlx5_esw_vport_debugfs_destroy(struct mlx5_eswitch *esw, u16 vport_num); - int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_port *dl_port, u16 vport_num, u32 controller, u32 sfnum); void mlx5_esw_devlink_sf_port_unregister(struct mlx5_eswitch *esw, u16 vport_num); @@ -748,6 +743,9 @@ void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw, struct mlx5_eswitch *slave_esw); int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw); +bool mlx5_eswitch_block_encap(struct mlx5_core_dev *dev); +void mlx5_eswitch_unblock_encap(struct mlx5_core_dev *dev); + static inline int mlx5_eswitch_num_vfs(struct mlx5_eswitch *esw) { if (mlx5_esw_allowed(esw)) @@ -761,6 +759,7 @@ mlx5_eswitch_get_slow_fdb(struct mlx5_eswitch *esw) { return esw->fdb_table.offloads.slow_fdb; } + #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } @@ -805,6 +804,15 @@ mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw) { return 0; } + +static inline bool mlx5_eswitch_block_encap(struct mlx5_core_dev *dev) +{ + return true; +} + +static inline void mlx5_eswitch_unblock_encap(struct mlx5_core_dev *dev) +{ +} #endif /* CONFIG_MLX5_ESWITCH */ #endif /* __MLX5_ESWITCH_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 25a8076a77bf..69215ffb9999 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -73,7 +73,7 @@ #define MLX5_ESW_FT_OFFLOADS_DROP_RULE (1) -static const struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_mirror_ns = { +static struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_mirror_ns = { .max_fte = MLX5_ESW_VPORT_TBL_SIZE, .max_num_groups = MLX5_ESW_VPORT_TBL_NUM_GROUPS, .flags = 0, @@ -760,7 +760,6 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, kfree(dest); return rule; err_chain_src_rewrite: - esw_put_dest_tables_loop(esw, attr, 0, i); mlx5_esw_vporttbl_put(esw, &fwd_attr); err_get_fwd: mlx5_chains_put_table(chains, attr->chain, attr->prio, 0); @@ -803,7 +802,6 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw, if (fwd_rule) { mlx5_esw_vporttbl_put(esw, &fwd_attr); mlx5_chains_put_table(chains, attr->chain, attr->prio, 0); - esw_put_dest_tables_loop(esw, attr, 0, esw_attr->split_count); } else { if (split) mlx5_esw_vporttbl_put(esw, &fwd_attr); @@ -1374,14 +1372,11 @@ esw_chains_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *miss_fdb) struct mlx5_flow_table *nf_ft, *ft; struct mlx5_chains_attr attr = {}; struct mlx5_fs_chains *chains; - u32 fdb_max; int err; - fdb_max = 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size); - esw_init_chains_offload_flags(esw, &attr.flags); attr.ns = MLX5_FLOW_NAMESPACE_FDB; - attr.max_ft_sz = fdb_max; + attr.fs_base_prio = FDB_TC_OFFLOAD; attr.max_grp_num = esw->params.large_group_num; attr.default_ft = miss_fdb; attr.mapping = esw->offloads.reg_c0_obj_pool; @@ -1392,6 +1387,7 @@ esw_chains_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *miss_fdb) esw_warn(dev, "Failed to create fdb chains err(%d)\n", err); return err; } + mlx5_chains_print_info(chains); esw->fdb_table.offloads.esw_chains_priv = chains; @@ -2941,28 +2937,6 @@ metadata_err: return err; } -int mlx5_esw_offloads_vport_metadata_set(struct mlx5_eswitch *esw, bool enable) -{ - int err = 0; - - down_write(&esw->mode_lock); - if (mlx5_esw_is_fdb_created(esw)) { - err = -EBUSY; - goto done; - } - if (!mlx5_esw_vport_match_metadata_supported(esw)) { - err = -EOPNOTSUPP; - goto done; - } - if (enable) - esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA; - else - esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA; -done: - up_write(&esw->mode_lock); - return err; -} - int esw_vport_create_offloads_acl_tables(struct mlx5_eswitch *esw, struct mlx5_vport *vport) @@ -3588,6 +3562,47 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode) return err; } +bool mlx5_eswitch_block_encap(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + struct mlx5_eswitch *esw; + + devl_lock(devlink); + esw = mlx5_devlink_eswitch_get(devlink); + if (IS_ERR(esw)) { + devl_unlock(devlink); + /* Failure means no eswitch => not possible to change encap */ + return true; + } + + down_write(&esw->mode_lock); + if (esw->mode != MLX5_ESWITCH_LEGACY && + esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) { + up_write(&esw->mode_lock); + devl_unlock(devlink); + return false; + } + + esw->offloads.num_block_encap++; + up_write(&esw->mode_lock); + devl_unlock(devlink); + return true; +} + +void mlx5_eswitch_unblock_encap(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + struct mlx5_eswitch *esw; + + esw = mlx5_devlink_eswitch_get(devlink); + if (IS_ERR(esw)) + return; + + down_write(&esw->mode_lock); + esw->offloads.num_block_encap--; + up_write(&esw->mode_lock); +} + int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, enum devlink_eswitch_encap_mode encap, struct netlink_ext_ack *extack) @@ -3629,6 +3644,13 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, goto unlock; } + if (esw->offloads.num_block_encap) { + NL_SET_ERR_MSG_MOD(extack, + "Can't set encapsulation when IPsec SA and/or policies are configured"); + err = -EOPNOTSUPP; + goto unlock; + } + esw_destroy_offloads_fdb_tables(esw); esw->offloads.encap = encap; @@ -3782,14 +3804,12 @@ int mlx5_esw_offloads_sf_vport_enable(struct mlx5_eswitch *esw, struct devlink_p if (err) goto devlink_err; - mlx5_esw_vport_debugfs_create(esw, vport_num, true, sfnum); err = mlx5_esw_offloads_rep_load(esw, vport_num); if (err) goto rep_err; return 0; rep_err: - mlx5_esw_vport_debugfs_destroy(esw, vport_num); mlx5_esw_devlink_sf_port_unregister(esw, vport_num); devlink_err: mlx5_esw_vport_disable(esw, vport_num); @@ -3799,7 +3819,6 @@ devlink_err: void mlx5_esw_offloads_sf_vport_disable(struct mlx5_eswitch *esw, u16 vport_num) { mlx5_esw_offloads_rep_unload(esw, vport_num); - mlx5_esw_vport_debugfs_destroy(esw, vport_num); mlx5_esw_devlink_sf_port_unregister(esw, vport_num); mlx5_esw_vport_disable(esw, vport_num); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c index 3a9a6bb9158d..edd910258314 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c @@ -210,18 +210,6 @@ static bool mlx5_eswitch_offload_is_uplink_port(const struct mlx5_eswitch *esw, return (port_mask & port_value) == MLX5_VPORT_UPLINK; } -static bool -mlx5_eswitch_is_push_vlan_no_cap(struct mlx5_eswitch *esw, - struct mlx5_flow_act *flow_act) -{ - if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH && - !(mlx5_fs_get_capabilities(esw->dev, MLX5_FLOW_NAMESPACE_FDB) & - MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX)) - return true; - - return false; -} - bool mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr, @@ -237,7 +225,10 @@ mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw, (!mlx5_eswitch_offload_is_uplink_port(esw, spec) && !esw_attr->int_port)) return false; - if (mlx5_eswitch_is_push_vlan_no_cap(esw, flow_act)) + /* push vlan on RX */ + if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH && + !(mlx5_fs_get_capabilities(esw->dev, MLX5_FLOW_NAMESPACE_FDB) & + MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX)) return true; /* hairpin */ @@ -261,31 +252,19 @@ mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw, struct mlx5_flow_act term_tbl_act = {}; struct mlx5_flow_handle *rule = NULL; bool term_table_created = false; - bool is_push_vlan_on_rx; int num_vport_dests = 0; int i, curr_dest; - is_push_vlan_on_rx = mlx5_eswitch_is_push_vlan_no_cap(esw, flow_act); mlx5_eswitch_termtbl_actions_move(flow_act, &term_tbl_act); term_tbl_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; for (i = 0; i < num_dest; i++) { struct mlx5_termtbl_handle *tt; - bool hairpin = false; /* only vport destinations can be terminated */ if (dest[i].type != MLX5_FLOW_DESTINATION_TYPE_VPORT) continue; - if (attr->dests[num_vport_dests].rep && - attr->dests[num_vport_dests].rep->vport == MLX5_VPORT_UPLINK) - hairpin = true; - - if (!is_push_vlan_on_rx && !hairpin) { - num_vport_dests++; - continue; - } - if (attr->dests[num_vport_dests].flags & MLX5_ESW_DEST_ENCAP) { term_tbl_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; term_tbl_act.pkt_reformat = attr->dests[num_vport_dests].pkt_reformat; @@ -333,9 +312,6 @@ revert_changes: for (curr_dest = 0; curr_dest < num_vport_dests; curr_dest++) { struct mlx5_termtbl_handle *tt = attr->dests[curr_dest].termtbl; - if (!tt) - continue; - attr->dests[curr_dest].termtbl = NULL; /* search for the destination associated with the diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 731acbe22dc7..19da02c41616 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -137,7 +137,7 @@ #define LAG_MIN_LEVEL (OFFLOADS_MIN_LEVEL + KERNEL_RX_MACSEC_MIN_LEVEL + 1) #define KERNEL_TX_IPSEC_NUM_PRIOS 1 -#define KERNEL_TX_IPSEC_NUM_LEVELS 2 +#define KERNEL_TX_IPSEC_NUM_LEVELS 3 #define KERNEL_TX_IPSEC_MIN_LEVEL (KERNEL_TX_IPSEC_NUM_LEVELS) #define KERNEL_TX_MACSEC_NUM_PRIOS 1 @@ -1762,7 +1762,8 @@ static bool dest_is_valid(struct mlx5_flow_destination *dest, if (ignore_level) { if (ft->type != FS_FT_FDB && - ft->type != FS_FT_NIC_RX) + ft->type != FS_FT_NIC_RX && + ft->type != FS_FT_NIC_TX) return false; if (dest->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE && @@ -2996,7 +2997,7 @@ static int init_fdb_root_ns(struct mlx5_flow_steering *steering) goto out_err; } - maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_BR_OFFLOAD, 3); + maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_BR_OFFLOAD, 4); if (IS_ERR(maj_prio)) { err = PTR_ERR(maj_prio); goto out_err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index 4c2dad9d7cfb..50022e7565f1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -167,7 +167,7 @@ static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev) if (mlx5_health_wait_pci_up(dev)) mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n"); else - mlx5_load_one(dev); + mlx5_load_one(dev, true); devlink_remote_reload_actions_performed(priv_to_devlink(dev), 0, BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) | BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE)); @@ -499,7 +499,7 @@ int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev) err = fw_reset->ret; if (test_and_clear_bit(MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, &fw_reset->reset_flags)) { mlx5_unload_one_devl_locked(dev, false); - mlx5_load_one_devl_locked(dev, false); + mlx5_load_one_devl_locked(dev, true); } out: clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index f9438d4e43ca..871c32dda66e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -42,6 +42,7 @@ #include "lib/pci_vsc.h" #include "lib/tout.h" #include "diag/fw_tracer.h" +#include "diag/reporter_vnic.h" enum { MAX_MISSES = 3, @@ -325,6 +326,10 @@ int mlx5_health_wait_pci_up(struct mlx5_core_dev *dev) while (sensor_pci_not_working(dev)) { if (time_after(jiffies, end)) return -ETIMEDOUT; + if (test_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state)) { + mlx5_core_warn(dev, "device is being removed, stop waiting for PCI\n"); + return -ENODEV; + } msleep(100); } return 0; @@ -894,6 +899,7 @@ void mlx5_health_cleanup(struct mlx5_core_dev *dev) cancel_delayed_work_sync(&health->update_fw_log_ts_work); destroy_workqueue(health->wq); + mlx5_reporter_vnic_destroy(dev); mlx5_fw_reporters_destroy(dev); } @@ -903,6 +909,7 @@ int mlx5_health_init(struct mlx5_core_dev *dev) char *name; mlx5_fw_reporters_create(dev); + mlx5_reporter_vnic_create(dev); health = &dev->priv.health; name = kmalloc(64, GFP_KERNEL); @@ -922,6 +929,7 @@ int mlx5_health_init(struct mlx5_core_dev *dev) return 0; out_err: + mlx5_reporter_vnic_destroy(dev); mlx5_fw_reporters_destroy(dev); return -ENOMEM; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c index 380a208ab137..fa467335526e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c @@ -45,30 +45,28 @@ static int cpu_get_least_loaded(struct mlx5_irq_pool *pool, /* Creating an IRQ from irq_pool */ static struct mlx5_irq * -irq_pool_request_irq(struct mlx5_irq_pool *pool, const struct cpumask *req_mask) +irq_pool_request_irq(struct mlx5_irq_pool *pool, struct irq_affinity_desc *af_desc) { - cpumask_var_t auto_mask; - struct mlx5_irq *irq; + struct irq_affinity_desc auto_desc = {}; u32 irq_index; int err; - if (!zalloc_cpumask_var(&auto_mask, GFP_KERNEL)) - return ERR_PTR(-ENOMEM); err = xa_alloc(&pool->irqs, &irq_index, NULL, pool->xa_num_irqs, GFP_KERNEL); if (err) return ERR_PTR(err); if (pool->irqs_per_cpu) { - if (cpumask_weight(req_mask) > 1) + if (cpumask_weight(&af_desc->mask) > 1) /* if req_mask contain more then one CPU, set the least loadad CPU * of req_mask */ - cpumask_set_cpu(cpu_get_least_loaded(pool, req_mask), auto_mask); + cpumask_set_cpu(cpu_get_least_loaded(pool, &af_desc->mask), + &auto_desc.mask); else - cpu_get(pool, cpumask_first(req_mask)); + cpu_get(pool, cpumask_first(&af_desc->mask)); } - irq = mlx5_irq_alloc(pool, irq_index, cpumask_empty(auto_mask) ? req_mask : auto_mask); - free_cpumask_var(auto_mask); - return irq; + return mlx5_irq_alloc(pool, irq_index, + cpumask_empty(&auto_desc.mask) ? af_desc : &auto_desc, + NULL); } /* Looking for the IRQ with the smallest refcount that fits req_mask. @@ -115,22 +113,22 @@ irq_pool_find_least_loaded(struct mlx5_irq_pool *pool, const struct cpumask *req /** * mlx5_irq_affinity_request - request an IRQ according to the given mask. * @pool: IRQ pool to request from. - * @req_mask: cpumask requested for this IRQ. + * @af_desc: affinity descriptor for this IRQ. * * This function returns a pointer to IRQ, or ERR_PTR in case of error. */ struct mlx5_irq * -mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, const struct cpumask *req_mask) +mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, struct irq_affinity_desc *af_desc) { struct mlx5_irq *least_loaded_irq, *new_irq; mutex_lock(&pool->lock); - least_loaded_irq = irq_pool_find_least_loaded(pool, req_mask); + least_loaded_irq = irq_pool_find_least_loaded(pool, &af_desc->mask); if (least_loaded_irq && mlx5_irq_read_locked(least_loaded_irq) < pool->min_threshold) goto out; /* We didn't find an IRQ with less than min_thres, try to allocate a new IRQ */ - new_irq = irq_pool_request_irq(pool, req_mask); + new_irq = irq_pool_request_irq(pool, af_desc); if (IS_ERR(new_irq)) { if (!least_loaded_irq) { /* We failed to create an IRQ and we didn't find an IRQ */ @@ -194,32 +192,30 @@ int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs, struct mlx5_irq **irqs) { struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev); - cpumask_var_t req_mask; + struct irq_affinity_desc af_desc = {}; struct mlx5_irq *irq; int i = 0; - if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL)) - return -ENOMEM; - cpumask_copy(req_mask, cpu_online_mask); + af_desc.is_managed = 1; + cpumask_copy(&af_desc.mask, cpu_online_mask); for (i = 0; i < nirqs; i++) { if (mlx5_irq_pool_is_sf_pool(pool)) - irq = mlx5_irq_affinity_request(pool, req_mask); + irq = mlx5_irq_affinity_request(pool, &af_desc); else /* In case SF pool doesn't exists, fallback to the PF IRQs. * The PF IRQs are already allocated and binded to CPU * at this point. Hence, only an index is needed. */ - irq = mlx5_irq_request(dev, i, NULL); + irq = mlx5_irq_request(dev, i, NULL, NULL); if (IS_ERR(irq)) break; irqs[i] = irq; - cpumask_clear_cpu(cpumask_first(mlx5_irq_get_affinity_mask(irq)), req_mask); + cpumask_clear_cpu(cpumask_first(mlx5_irq_get_affinity_mask(irq)), &af_desc.mask); mlx5_core_dbg(pool->dev, "IRQ %u mapped to cpu %*pbl, %u EQs on this irq\n", pci_irq_vector(dev->pdev, mlx5_irq_get_index(irq)), cpumask_pr_args(mlx5_irq_get_affinity_mask(irq)), mlx5_irq_read_locked(irq) / MLX5_EQ_REFS_PER_IRQ); } - free_cpumask_var(req_mask); if (!i) return PTR_ERR(irq); return i; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index 4c9a40211059..932fbc843c69 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -39,7 +39,7 @@ #include "clock.h" enum { - MLX5_CYCLES_SHIFT = 23 + MLX5_CYCLES_SHIFT = 31 }; enum { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c index 81ed91fee59b..db9df9798ffa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c @@ -14,10 +14,8 @@ #define chains_lock(chains) ((chains)->lock) #define chains_ht(chains) ((chains)->chains_ht) #define prios_ht(chains) ((chains)->prios_ht) -#define tc_default_ft(chains) ((chains)->tc_default_ft) -#define tc_end_ft(chains) ((chains)->tc_end_ft) -#define ns_to_chains_fs_prio(ns) ((ns) == MLX5_FLOW_NAMESPACE_FDB ? \ - FDB_TC_OFFLOAD : MLX5E_TC_PRIO) +#define chains_default_ft(chains) ((chains)->chains_default_ft) +#define chains_end_ft(chains) ((chains)->chains_end_ft) #define FT_TBL_SZ (64 * 1024) struct mlx5_fs_chains { @@ -28,13 +26,15 @@ struct mlx5_fs_chains { /* Protects above chains_ht and prios_ht */ struct mutex lock; - struct mlx5_flow_table *tc_default_ft; - struct mlx5_flow_table *tc_end_ft; + struct mlx5_flow_table *chains_default_ft; + struct mlx5_flow_table *chains_end_ft; struct mapping_ctx *chains_mapping; enum mlx5_flow_namespace_type ns; u32 group_num; u32 flags; + int fs_base_prio; + int fs_base_level; }; struct fs_chain { @@ -145,7 +145,7 @@ void mlx5_chains_set_end_ft(struct mlx5_fs_chains *chains, struct mlx5_flow_table *ft) { - tc_end_ft(chains) = ft; + chains_end_ft(chains) = ft; } static struct mlx5_flow_table * @@ -164,11 +164,11 @@ mlx5_chains_create_table(struct mlx5_fs_chains *chains, sz = (chain == mlx5_chains_get_nf_ft_chain(chains)) ? FT_TBL_SZ : POOL_NEXT_SIZE; ft_attr.max_fte = sz; - /* We use tc_default_ft(chains) as the table's next_ft till + /* We use chains_default_ft(chains) as the table's next_ft till * ignore_flow_level is allowed on FT creation and not just for FTEs. * Instead caller should add an explicit miss rule if needed. */ - ft_attr.next_ft = tc_default_ft(chains); + ft_attr.next_ft = chains_default_ft(chains); /* The root table(chain 0, prio 1, level 0) is required to be * connected to the previous fs_core managed prio. @@ -177,22 +177,22 @@ mlx5_chains_create_table(struct mlx5_fs_chains *chains, */ if (!mlx5_chains_ignore_flow_level_supported(chains) || (chain == 0 && prio == 1 && level == 0)) { - ft_attr.level = level; - ft_attr.prio = prio - 1; + ft_attr.level = chains->fs_base_level; + ft_attr.prio = chains->fs_base_prio; ns = (chains->ns == MLX5_FLOW_NAMESPACE_FDB) ? mlx5_get_fdb_sub_ns(chains->dev, chain) : mlx5_get_flow_namespace(chains->dev, chains->ns); } else { ft_attr.flags |= MLX5_FLOW_TABLE_UNMANAGED; - ft_attr.prio = ns_to_chains_fs_prio(chains->ns); + ft_attr.prio = chains->fs_base_prio; /* Firmware doesn't allow us to create another level 0 table, - * so we create all unmanaged tables as level 1. + * so we create all unmanaged tables as level 1 (base + 1). * * To connect them, we use explicit miss rules with * ignore_flow_level. Caller is responsible to create * these rules (if needed). */ - ft_attr.level = 1; + ft_attr.level = chains->fs_base_level + 1; ns = mlx5_get_flow_namespace(chains->dev, chains->ns); } @@ -220,7 +220,8 @@ create_chain_restore(struct fs_chain *chain) int err; if (chain->chain == mlx5_chains_get_nf_ft_chain(chains) || - !mlx5_chains_prios_supported(chains)) + !mlx5_chains_prios_supported(chains) || + !chains->chains_mapping) return 0; err = mlx5_chains_get_chain_mapping(chains, chain->chain, &index); @@ -380,7 +381,7 @@ mlx5_chains_add_miss_rule(struct fs_chain *chain, dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest.ft = next_ft; - if (next_ft == tc_end_ft(chains) && + if (chains->chains_mapping && next_ft == chains_end_ft(chains) && chain->chain != mlx5_chains_get_nf_ft_chain(chains) && mlx5_chains_prios_supported(chains)) { act.modify_hdr = chain->miss_modify_hdr; @@ -494,8 +495,8 @@ mlx5_chains_create_prio(struct mlx5_fs_chains *chains, /* Default miss for each chain: */ next_ft = (chain == mlx5_chains_get_nf_ft_chain(chains)) ? - tc_default_ft(chains) : - tc_end_ft(chains); + chains_default_ft(chains) : + chains_end_ft(chains); list_for_each(pos, &chain_s->prios_list) { struct prio *p = list_entry(pos, struct prio, list); @@ -681,7 +682,7 @@ err_get_prio: struct mlx5_flow_table * mlx5_chains_get_tc_end_ft(struct mlx5_fs_chains *chains) { - return tc_end_ft(chains); + return chains_end_ft(chains); } struct mlx5_flow_table * @@ -718,48 +719,38 @@ mlx5_chains_destroy_global_table(struct mlx5_fs_chains *chains, static struct mlx5_fs_chains * mlx5_chains_init(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr) { - struct mlx5_fs_chains *chains_priv; - u32 max_flow_counter; + struct mlx5_fs_chains *chains; int err; - chains_priv = kzalloc(sizeof(*chains_priv), GFP_KERNEL); - if (!chains_priv) + chains = kzalloc(sizeof(*chains), GFP_KERNEL); + if (!chains) return ERR_PTR(-ENOMEM); - max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) | - MLX5_CAP_GEN(dev, max_flow_counter_15_0); - - mlx5_core_dbg(dev, - "Init flow table chains, max counters(%d), groups(%d), max flow table size(%d)\n", - max_flow_counter, attr->max_grp_num, attr->max_ft_sz); - - chains_priv->dev = dev; - chains_priv->flags = attr->flags; - chains_priv->ns = attr->ns; - chains_priv->group_num = attr->max_grp_num; - chains_priv->chains_mapping = attr->mapping; - tc_default_ft(chains_priv) = tc_end_ft(chains_priv) = attr->default_ft; + chains->dev = dev; + chains->flags = attr->flags; + chains->ns = attr->ns; + chains->group_num = attr->max_grp_num; + chains->chains_mapping = attr->mapping; + chains->fs_base_prio = attr->fs_base_prio; + chains->fs_base_level = attr->fs_base_level; + chains_default_ft(chains) = chains_end_ft(chains) = attr->default_ft; - mlx5_core_info(dev, "Supported tc offload range - chains: %u, prios: %u\n", - mlx5_chains_get_chain_range(chains_priv), - mlx5_chains_get_prio_range(chains_priv)); - - err = rhashtable_init(&chains_ht(chains_priv), &chain_params); + err = rhashtable_init(&chains_ht(chains), &chain_params); if (err) goto init_chains_ht_err; - err = rhashtable_init(&prios_ht(chains_priv), &prio_params); + err = rhashtable_init(&prios_ht(chains), &prio_params); if (err) goto init_prios_ht_err; - mutex_init(&chains_lock(chains_priv)); + mutex_init(&chains_lock(chains)); - return chains_priv; + return chains; init_prios_ht_err: - rhashtable_destroy(&chains_ht(chains_priv)); + rhashtable_destroy(&chains_ht(chains)); init_chains_ht_err: - kfree(chains_priv); + kfree(chains); return ERR_PTR(err); } @@ -808,3 +799,9 @@ mlx5_chains_put_chain_mapping(struct mlx5_fs_chains *chains, u32 chain_mapping) return mapping_remove(ctx, chain_mapping); } + +void +mlx5_chains_print_info(struct mlx5_fs_chains *chains) +{ + mlx5_core_dbg(chains->dev, "Flow table chains groups(%d)\n", chains->group_num); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h index d50bdb226cef..8972fe05723a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h @@ -17,8 +17,9 @@ enum mlx5_chains_flags { struct mlx5_chains_attr { enum mlx5_flow_namespace_type ns; + int fs_base_prio; + int fs_base_level; u32 flags; - u32 max_ft_sz; u32 max_grp_num; struct mlx5_flow_table *default_ft; struct mapping_ctx *mapping; @@ -68,6 +69,8 @@ void mlx5_chains_destroy(struct mlx5_fs_chains *chains); void mlx5_chains_set_end_ft(struct mlx5_fs_chains *chains, struct mlx5_flow_table *ft); +void +mlx5_chains_print_info(struct mlx5_fs_chains *chains); #else /* CONFIG_MLX5_CLS_ACT */ @@ -89,7 +92,9 @@ static inline struct mlx5_fs_chains * mlx5_chains_create(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr) { return NULL; } static inline void -mlx5_chains_destroy(struct mlx5_fs_chains *chains) {}; +mlx5_chains_destroy(struct mlx5_fs_chains *chains) {} +static inline void +mlx5_chains_print_info(struct mlx5_fs_chains *chains) {} #endif /* CONFIG_MLX5_CLS_ACT */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index f1de152a6113..edc738e86cac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -46,12 +46,10 @@ #include <linux/kmod.h> #include <linux/mlx5/mlx5_ifc.h> #include <linux/mlx5/vport.h> -#ifdef CONFIG_RFS_ACCEL -#include <linux/cpu_rmap.h> -#endif #include <linux/version.h> #include <net/devlink.h> #include "mlx5_core.h" +#include "thermal.h" #include "lib/eq.h" #include "fs_core.h" #include "lib/mpfs.h" @@ -102,15 +100,19 @@ enum { static struct mlx5_profile profile[] = { [0] = { .mask = 0, + .num_cmd_caches = MLX5_NUM_COMMAND_CACHES, }, [1] = { .mask = MLX5_PROF_MASK_QP_SIZE, .log_max_qp = 12, + .num_cmd_caches = MLX5_NUM_COMMAND_CACHES, + }, [2] = { .mask = MLX5_PROF_MASK_QP_SIZE | MLX5_PROF_MASK_MR_CACHE, .log_max_qp = LOG_MAX_SUPPORTED_QPS, + .num_cmd_caches = MLX5_NUM_COMMAND_CACHES, .mr_cache[0] = { .size = 500, .limit = 250 @@ -176,6 +178,11 @@ static struct mlx5_profile profile[] = { .limit = 4 }, }, + [3] = { + .mask = MLX5_PROF_MASK_QP_SIZE, + .log_max_qp = LOG_MAX_SUPPORTED_QPS, + .num_cmd_caches = 0, + }, }; static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili, @@ -191,7 +198,7 @@ static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili, if (!(fw_initializing >> 31)) break; if (time_after(jiffies, end) || - test_and_clear_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state)) { + test_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state)) { err = -EBUSY; break; } @@ -710,7 +717,7 @@ static int handle_hca_cap_port_selection(struct mlx5_core_dev *dev, MLX5_ST_SZ_BYTES(port_selection_cap)); MLX5_SET(port_selection_cap, set_hca_cap, port_select_flow_table_bypass, 1); - err = set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MODE_PORT_SELECTION); + err = set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_PORT_SELECTION); return err; } @@ -917,7 +924,6 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct pci_dev *pdev, return 0; err_clr_master: - pci_clear_master(dev->pdev); release_bar(dev->pdev); err_disable: mlx5_pci_disable_device(dev); @@ -932,7 +938,6 @@ static void mlx5_pci_close(struct mlx5_core_dev *dev) */ mlx5_drain_health_wq(dev); iounmap(dev->iseg); - pci_clear_master(dev->pdev); release_bar(dev->pdev); mlx5_pci_disable_device(dev); } @@ -1402,16 +1407,16 @@ int mlx5_init_one(struct mlx5_core_dev *dev) goto function_teardown; } + err = mlx5_devlink_params_register(priv_to_devlink(dev)); + if (err) + goto err_devlink_params_reg; + err = mlx5_load(dev); if (err) goto err_load; set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); - err = mlx5_devlink_params_register(priv_to_devlink(dev)); - if (err) - goto err_devlink_params_reg; - err = mlx5_register_device(dev); if (err) goto err_register; @@ -1421,11 +1426,11 @@ int mlx5_init_one(struct mlx5_core_dev *dev) return 0; err_register: - mlx5_devlink_params_unregister(priv_to_devlink(dev)); -err_devlink_params_reg: clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); mlx5_unload(dev); err_load: + mlx5_devlink_params_unregister(priv_to_devlink(dev)); +err_devlink_params_reg: mlx5_cleanup_once(dev); function_teardown: mlx5_function_teardown(dev, true); @@ -1444,7 +1449,6 @@ void mlx5_uninit_one(struct mlx5_core_dev *dev) mutex_lock(&dev->intf_state_mutex); mlx5_unregister_device(dev); - mlx5_devlink_params_unregister(priv_to_devlink(dev)); if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { mlx5_core_warn(dev, "%s: interface is down, NOP\n", @@ -1455,6 +1459,7 @@ void mlx5_uninit_one(struct mlx5_core_dev *dev) clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); mlx5_unload(dev); + mlx5_devlink_params_unregister(priv_to_devlink(dev)); mlx5_cleanup_once(dev); mlx5_function_teardown(dev, true); out: @@ -1509,13 +1514,13 @@ out: return err; } -int mlx5_load_one(struct mlx5_core_dev *dev) +int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery) { struct devlink *devlink = priv_to_devlink(dev); int ret; devl_lock(devlink); - ret = mlx5_load_one_devl_locked(dev, false); + ret = mlx5_load_one_devl_locked(dev, recovery); devl_unlock(devlink); return ret; } @@ -1768,6 +1773,10 @@ static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id) if (err) dev_err(&pdev->dev, "mlx5_crdump_enable failed with error code %d\n", err); + err = mlx5_thermal_init(dev); + if (err) + dev_err(&pdev->dev, "mlx5_thermal_init failed with error code %d\n", err); + pci_save_state(pdev); devlink_register(devlink); return 0; @@ -1796,6 +1805,7 @@ static void remove_one(struct pci_dev *pdev) mlx5_drain_fw_reset(dev); devlink_unregister(devlink); mlx5_sriov_disable(pdev); + mlx5_thermal_uninit(dev); mlx5_crdump_disable(dev); mlx5_drain_health_wq(dev); mlx5_uninit_one(dev); @@ -1912,7 +1922,8 @@ static void mlx5_pci_resume(struct pci_dev *pdev) mlx5_pci_trace(dev, "Enter, loading driver..\n"); - err = mlx5_load_one(dev); + err = mlx5_load_one(dev, false); + if (!err) devlink_health_reporter_state_update(dev->priv.health.fw_fatal_reporter, DEVLINK_HEALTH_REPORTER_STATE_HEALTHY); @@ -2003,7 +2014,7 @@ static int mlx5_resume(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); - return mlx5_load_one(dev); + return mlx5_load_one(dev, false); } static const struct pci_device_id mlx5_core_pci_table[] = { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index be0785f83083..1d879374acaa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -142,6 +142,7 @@ enum mlx5_semaphore_space_address { }; #define MLX5_DEFAULT_PROF 2 +#define MLX5_SF_PROF 3 static inline int mlx5_flexible_inlen(struct mlx5_core_dev *dev, size_t fixed, size_t item_size, size_t num_items, @@ -321,7 +322,7 @@ int mlx5_init_one(struct mlx5_core_dev *dev); void mlx5_uninit_one(struct mlx5_core_dev *dev); void mlx5_unload_one(struct mlx5_core_dev *dev, bool suspend); void mlx5_unload_one_devl_locked(struct mlx5_core_dev *dev, bool suspend); -int mlx5_load_one(struct mlx5_core_dev *dev); +int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery); int mlx5_load_one_devl_locked(struct mlx5_core_dev *dev, bool recovery); int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap, u16 function_id, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h index 23cb63fa4588..efd0c299c5c7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h @@ -9,6 +9,7 @@ #define MLX5_COMP_EQS_PER_SF 8 struct mlx5_irq; +struct cpu_rmap; int mlx5_irq_table_init(struct mlx5_core_dev *dev); void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev); @@ -25,9 +26,10 @@ int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs); struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev); void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq); struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx, - struct cpumask *affinity); + struct irq_affinity_desc *af_desc, + struct cpu_rmap **rmap); int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs, - struct mlx5_irq **irqs); + struct mlx5_irq **irqs, struct cpu_rmap **rmap); void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs); int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb); int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb); @@ -39,7 +41,7 @@ struct mlx5_irq_pool; int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs, struct mlx5_irq **irqs); struct mlx5_irq *mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, - const struct cpumask *req_mask); + struct irq_affinity_desc *af_desc); void mlx5_irq_affinity_irqs_release(struct mlx5_core_dev *dev, struct mlx5_irq **irqs, int num_irqs); #else @@ -50,7 +52,7 @@ static inline int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, } static inline struct mlx5_irq * -mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, const struct cpumask *req_mask) +mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, struct irq_affinity_desc *af_desc) { return ERR_PTR(-EOPNOTSUPP); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 6bde18bcd42f..2245d3b2f393 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* Copyright (c) 2019 Mellanox Technologies. */ +#include <linux/pci.h> #include <linux/interrupt.h> #include <linux/notifier.h> #include <linux/mlx5/driver.h> @@ -9,6 +10,7 @@ #include "mlx5_irq.h" #include "pci_irq.h" #include "lib/sf.h" +#include "lib/eq.h" #ifdef CONFIG_RFS_ACCEL #include <linux/cpu_rmap.h> #endif @@ -29,12 +31,11 @@ struct mlx5_irq { char name[MLX5_MAX_IRQ_NAME]; struct mlx5_irq_pool *pool; int refcount; - u32 index; - int irqn; + struct msi_map map; }; struct mlx5_irq_table { - struct mlx5_irq_pool *pf_pool; + struct mlx5_irq_pool *pcif_pool; struct mlx5_irq_pool *sf_ctrl_pool; struct mlx5_irq_pool *sf_comp_pool; }; @@ -127,15 +128,26 @@ out: static void irq_release(struct mlx5_irq *irq) { struct mlx5_irq_pool *pool = irq->pool; +#ifdef CONFIG_RFS_ACCEL + struct cpu_rmap *rmap; +#endif - xa_erase(&pool->irqs, irq->index); - /* free_irq requires that affinity_hint and rmap will be cleared - * before calling it. This is why there is asymmetry with set_rmap - * which should be called after alloc_irq but before request_irq. + xa_erase(&pool->irqs, irq->map.index); + /* free_irq requires that affinity_hint and rmap will be cleared before + * calling it. To satisfy this requirement, we call + * irq_cpu_rmap_remove() to remove the notifier */ - irq_update_affinity_hint(irq->irqn, NULL); + irq_update_affinity_hint(irq->map.virq, NULL); +#ifdef CONFIG_RFS_ACCEL + rmap = mlx5_eq_table_get_rmap(pool->dev); + if (rmap && irq->map.index) + irq_cpu_rmap_remove(rmap, irq->map.virq); +#endif + free_cpumask_var(irq->mask); - free_irq(irq->irqn, &irq->nh); + free_irq(irq->map.virq, &irq->nh); + if (irq->map.index && pci_msix_can_alloc_dyn(pool->dev->pdev)) + pci_msix_free_irq(pool->dev->pdev, irq->map); kfree(irq); } @@ -198,7 +210,7 @@ static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx) return; } - if (vecidx == pool->xa_num_irqs.max) { + if (!vecidx) { snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async%d", vecidx); return; } @@ -207,7 +219,8 @@ static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx) } struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i, - const struct cpumask *affinity) + struct irq_affinity_desc *af_desc, + struct cpu_rmap **rmap) { struct mlx5_core_dev *dev = pool->dev; char name[MLX5_MAX_IRQ_NAME]; @@ -217,7 +230,28 @@ struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i, irq = kzalloc(sizeof(*irq), GFP_KERNEL); if (!irq) return ERR_PTR(-ENOMEM); - irq->irqn = pci_irq_vector(dev->pdev, i); + if (!i || !pci_msix_can_alloc_dyn(dev->pdev)) { + /* The vector at index 0 was already allocated. + * Just get the irq number. If dynamic irq is not supported + * vectors have also been allocated. + */ + irq->map.virq = pci_irq_vector(dev->pdev, i); + irq->map.index = 0; + } else { + irq->map = pci_msix_alloc_irq_at(dev->pdev, MSI_ANY_INDEX, af_desc); + if (!irq->map.virq) { + err = irq->map.index; + goto err_alloc_irq; + } + } + + if (i && rmap && *rmap) { +#ifdef CONFIG_RFS_ACCEL + err = irq_cpu_rmap_add(*rmap, irq->map.virq); + if (err) + goto err_irq_rmap; +#endif + } if (!mlx5_irq_pool_is_sf_pool(pool)) irq_set_name(pool, name, i); else @@ -225,7 +259,7 @@ struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i, ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh); snprintf(irq->name, MLX5_MAX_IRQ_NAME, "%s@pci:%s", name, pci_name(dev->pdev)); - err = request_irq(irq->irqn, irq_int_handler, 0, irq->name, + err = request_irq(irq->map.virq, irq_int_handler, 0, irq->name, &irq->nh); if (err) { mlx5_core_err(dev, "Failed to request irq. err = %d\n", err); @@ -236,26 +270,37 @@ struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i, err = -ENOMEM; goto err_cpumask; } - if (affinity) { - cpumask_copy(irq->mask, affinity); - irq_set_affinity_and_hint(irq->irqn, irq->mask); + if (af_desc) { + cpumask_copy(irq->mask, &af_desc->mask); + irq_set_affinity_and_hint(irq->map.virq, irq->mask); } irq->pool = pool; irq->refcount = 1; - irq->index = i; - err = xa_err(xa_store(&pool->irqs, irq->index, irq, GFP_KERNEL)); + irq->map.index = i; + err = xa_err(xa_store(&pool->irqs, irq->map.index, irq, GFP_KERNEL)); if (err) { mlx5_core_err(dev, "Failed to alloc xa entry for irq(%u). err = %d\n", - irq->index, err); + irq->map.index, err); goto err_xa; } return irq; err_xa: - irq_update_affinity_hint(irq->irqn, NULL); + if (af_desc) + irq_update_affinity_hint(irq->map.virq, NULL); free_cpumask_var(irq->mask); err_cpumask: - free_irq(irq->irqn, &irq->nh); + free_irq(irq->map.virq, &irq->nh); err_req_irq: +#ifdef CONFIG_RFS_ACCEL + if (i && rmap && *rmap) { + free_irq_cpu_rmap(*rmap); + *rmap = NULL; + } +err_irq_rmap: +#endif + if (i && pci_msix_can_alloc_dyn(dev->pdev)) + pci_msix_free_irq(dev->pdev, irq->map); +err_alloc_irq: kfree(irq); return ERR_PTR(err); } @@ -292,7 +337,7 @@ struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq) int mlx5_irq_get_index(struct mlx5_irq *irq) { - return irq->index; + return irq->map.index; } /* irq_pool API */ @@ -300,7 +345,8 @@ int mlx5_irq_get_index(struct mlx5_irq *irq) /* requesting an irq from a given pool according to given index */ static struct mlx5_irq * irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx, - struct cpumask *affinity) + struct irq_affinity_desc *af_desc, + struct cpu_rmap **rmap) { struct mlx5_irq *irq; @@ -310,7 +356,7 @@ irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx, mlx5_irq_get_locked(irq); goto unlock; } - irq = mlx5_irq_alloc(pool, vecidx, affinity); + irq = mlx5_irq_alloc(pool, vecidx, af_desc, rmap); unlock: mutex_unlock(&pool->lock); return irq; @@ -337,7 +383,7 @@ struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev) /* In some configs, there won't be a pool of SFs IRQs. Hence, returning * the PF IRQs pool in case the SF pool doesn't exist. */ - return pool ? pool : irq_table->pf_pool; + return pool ? pool : irq_table->pcif_pool; } static struct mlx5_irq_pool *ctrl_irq_pool_get(struct mlx5_core_dev *dev) @@ -351,7 +397,7 @@ static struct mlx5_irq_pool *ctrl_irq_pool_get(struct mlx5_core_dev *dev) /* In some configs, there won't be a pool of SFs IRQs. Hence, returning * the PF IRQs pool in case the SF pool doesn't exist. */ - return pool ? pool : irq_table->pf_pool; + return pool ? pool : irq_table->pcif_pool; } /** @@ -364,7 +410,7 @@ static void mlx5_irqs_release(struct mlx5_irq **irqs, int nirqs) int i; for (i = 0; i < nirqs; i++) { - synchronize_irq(irqs[i]->irqn); + synchronize_irq(irqs[i]->map.virq); mlx5_irq_put(irqs[i]); } } @@ -387,26 +433,26 @@ void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq) struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev) { struct mlx5_irq_pool *pool = ctrl_irq_pool_get(dev); - cpumask_var_t req_mask; + struct irq_affinity_desc af_desc; struct mlx5_irq *irq; - if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL)) - return ERR_PTR(-ENOMEM); - cpumask_copy(req_mask, cpu_online_mask); + cpumask_copy(&af_desc.mask, cpu_online_mask); + af_desc.is_managed = false; if (!mlx5_irq_pool_is_sf_pool(pool)) { - /* In case we are allocating a control IRQ for PF/VF */ + /* In case we are allocating a control IRQ from a pci device's pool. + * This can happen also for a SF if the SFs pool is empty. + */ if (!pool->xa_num_irqs.max) { - cpumask_clear(req_mask); + cpumask_clear(&af_desc.mask); /* In case we only have a single IRQ for PF/VF */ - cpumask_set_cpu(cpumask_first(cpu_online_mask), req_mask); + cpumask_set_cpu(cpumask_first(cpu_online_mask), &af_desc.mask); } - /* Allocate the IRQ in the last index of the pool */ - irq = irq_pool_request_vector(pool, pool->xa_num_irqs.max, req_mask); + /* Allocate the IRQ in index 0. The vector was already allocated */ + irq = irq_pool_request_vector(pool, 0, &af_desc, NULL); } else { - irq = mlx5_irq_affinity_request(pool, req_mask); + irq = mlx5_irq_affinity_request(pool, &af_desc); } - free_cpumask_var(req_mask); return irq; } @@ -415,28 +461,82 @@ struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev) * @dev: mlx5 device that requesting the IRQ. * @vecidx: vector index of the IRQ. This argument is ignore if affinity is * provided. - * @affinity: cpumask requested for this IRQ. + * @af_desc: affinity descriptor for this IRQ. + * @rmap: pointer to reverse map pointer for completion interrupts * * This function returns a pointer to IRQ, or ERR_PTR in case of error. */ struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx, - struct cpumask *affinity) + struct irq_affinity_desc *af_desc, + struct cpu_rmap **rmap) { struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev); struct mlx5_irq_pool *pool; struct mlx5_irq *irq; - pool = irq_table->pf_pool; - irq = irq_pool_request_vector(pool, vecidx, affinity); + pool = irq_table->pcif_pool; + irq = irq_pool_request_vector(pool, vecidx, af_desc, rmap); if (IS_ERR(irq)) return irq; mlx5_core_dbg(dev, "irq %u mapped to cpu %*pbl, %u EQs on this irq\n", - irq->irqn, cpumask_pr_args(affinity), + irq->map.virq, cpumask_pr_args(&af_desc->mask), irq->refcount / MLX5_EQ_REFS_PER_IRQ); return irq; } /** + * mlx5_msix_alloc - allocate msix interrupt + * @dev: mlx5 device from which to request + * @handler: interrupt handler + * @affdesc: affinity descriptor + * @name: interrupt name + * + * Returns: struct msi_map with result encoded. + * Note: the caller must make sure to release the irq by calling + * mlx5_msix_free() if shutdown was initiated. + */ +struct msi_map mlx5_msix_alloc(struct mlx5_core_dev *dev, + irqreturn_t (*handler)(int, void *), + const struct irq_affinity_desc *affdesc, + const char *name) +{ + struct msi_map map; + int err; + + if (!dev->pdev) { + map.virq = 0; + map.index = -EINVAL; + return map; + } + + map = pci_msix_alloc_irq_at(dev->pdev, MSI_ANY_INDEX, affdesc); + if (!map.virq) + return map; + + err = request_irq(map.virq, handler, 0, name, NULL); + if (err) { + mlx5_core_warn(dev, "err %d\n", err); + pci_msix_free_irq(dev->pdev, map); + map.virq = 0; + map.index = -ENOMEM; + } + return map; +} +EXPORT_SYMBOL(mlx5_msix_alloc); + +/** + * mlx5_msix_free - free a previously allocated msix interrupt + * @dev: mlx5 device associated with interrupt + * @map: map previously returned by mlx5_msix_alloc() + */ +void mlx5_msix_free(struct mlx5_core_dev *dev, struct msi_map map) +{ + free_irq(map.virq, NULL); + pci_msix_free_irq(dev->pdev, map); +} +EXPORT_SYMBOL(mlx5_msix_free); + +/** * mlx5_irqs_release_vectors - release one or more IRQs back to the system. * @irqs: IRQs to be released. * @nirqs: number of IRQs to be released. @@ -452,6 +552,7 @@ void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs) * @cpus: CPUs array for binding the IRQs * @nirqs: number of IRQs to request. * @irqs: an output array of IRQs pointers. + * @rmap: pointer to reverse map pointer for completion interrupts * * Each IRQ is bound to at most 1 CPU. * This function is requests nirqs IRQs, starting from @vecidx. @@ -460,24 +561,22 @@ void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs) * @nirqs), if successful, or a negative error code in case of an error. */ int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs, - struct mlx5_irq **irqs) + struct mlx5_irq **irqs, struct cpu_rmap **rmap) { - cpumask_var_t req_mask; + struct irq_affinity_desc af_desc; struct mlx5_irq *irq; int i; - if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL)) - return -ENOMEM; + af_desc.is_managed = 1; for (i = 0; i < nirqs; i++) { - cpumask_set_cpu(cpus[i], req_mask); - irq = mlx5_irq_request(dev, i, req_mask); + cpumask_set_cpu(cpus[i], &af_desc.mask); + irq = mlx5_irq_request(dev, i + 1, &af_desc, rmap); if (IS_ERR(irq)) break; - cpumask_clear(req_mask); + cpumask_clear(&af_desc.mask); irqs[i] = irq; } - free_cpumask_var(req_mask); return i ? i : PTR_ERR(irq); } @@ -521,7 +620,7 @@ static void irq_pool_free(struct mlx5_irq_pool *pool) kvfree(pool); } -static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pf_vec) +static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pcif_vec) { struct mlx5_irq_table *table = dev->priv.irq_table; int num_sf_ctrl_by_msix; @@ -529,12 +628,12 @@ static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pf_vec) int num_sf_ctrl; int err; - /* init pf_pool */ - table->pf_pool = irq_pool_alloc(dev, 0, pf_vec, NULL, - MLX5_EQ_SHARE_IRQ_MIN_COMP, - MLX5_EQ_SHARE_IRQ_MAX_COMP); - if (IS_ERR(table->pf_pool)) - return PTR_ERR(table->pf_pool); + /* init pcif_pool */ + table->pcif_pool = irq_pool_alloc(dev, 0, pcif_vec, NULL, + MLX5_EQ_SHARE_IRQ_MIN_COMP, + MLX5_EQ_SHARE_IRQ_MAX_COMP); + if (IS_ERR(table->pcif_pool)) + return PTR_ERR(table->pcif_pool); if (!mlx5_sf_max_functions(dev)) return 0; if (sf_vec < MLX5_IRQ_VEC_COMP_BASE_SF) { @@ -548,7 +647,7 @@ static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pf_vec) MLX5_SFS_PER_CTRL_IRQ); num_sf_ctrl = min_t(int, num_sf_ctrl_by_msix, num_sf_ctrl_by_sfs); num_sf_ctrl = min_t(int, MLX5_IRQ_CTRL_SF_MAX, num_sf_ctrl); - table->sf_ctrl_pool = irq_pool_alloc(dev, pf_vec, num_sf_ctrl, + table->sf_ctrl_pool = irq_pool_alloc(dev, pcif_vec, num_sf_ctrl, "mlx5_sf_ctrl", MLX5_EQ_SHARE_IRQ_MIN_CTRL, MLX5_EQ_SHARE_IRQ_MAX_CTRL); @@ -557,7 +656,7 @@ static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pf_vec) goto err_pf; } /* init sf_comp_pool */ - table->sf_comp_pool = irq_pool_alloc(dev, pf_vec + num_sf_ctrl, + table->sf_comp_pool = irq_pool_alloc(dev, pcif_vec + num_sf_ctrl, sf_vec - num_sf_ctrl, "mlx5_sf_comp", MLX5_EQ_SHARE_IRQ_MIN_COMP, MLX5_EQ_SHARE_IRQ_MAX_COMP); @@ -579,7 +678,7 @@ err_irqs_per_cpu: err_sf_ctrl: irq_pool_free(table->sf_ctrl_pool); err_pf: - irq_pool_free(table->pf_pool); + irq_pool_free(table->pcif_pool); return err; } @@ -589,7 +688,7 @@ static void irq_pools_destroy(struct mlx5_irq_table *table) irq_pool_free(table->sf_comp_pool); irq_pool_free(table->sf_ctrl_pool); } - irq_pool_free(table->pf_pool); + irq_pool_free(table->pcif_pool); } /* irq_table API */ @@ -620,9 +719,9 @@ void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev) int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table) { - if (!table->pf_pool->xa_num_irqs.max) + if (!table->pcif_pool->xa_num_irqs.max) return 1; - return table->pf_pool->xa_num_irqs.max - table->pf_pool->xa_num_irqs.min; + return table->pcif_pool->xa_num_irqs.max - table->pcif_pool->xa_num_irqs.min; } int mlx5_irq_table_create(struct mlx5_core_dev *dev) @@ -631,26 +730,30 @@ int mlx5_irq_table_create(struct mlx5_core_dev *dev) MLX5_CAP_GEN(dev, max_num_eqs) : 1 << MLX5_CAP_GEN(dev, log_max_eq); int total_vec; - int pf_vec; + int pcif_vec; + int req_vec; int err; + int n; if (mlx5_core_is_sf(dev)) return 0; - pf_vec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + 1; - pf_vec = min_t(int, pf_vec, num_eqs); + pcif_vec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + 1; + pcif_vec = min_t(int, pcif_vec, num_eqs); - total_vec = pf_vec; + total_vec = pcif_vec; if (mlx5_sf_max_functions(dev)) total_vec += MLX5_IRQ_CTRL_SF_MAX + MLX5_COMP_EQS_PER_SF * mlx5_sf_max_functions(dev); + total_vec = min_t(int, total_vec, pci_msix_vec_count(dev->pdev)); + pcif_vec = min_t(int, pcif_vec, pci_msix_vec_count(dev->pdev)); - total_vec = pci_alloc_irq_vectors(dev->pdev, 1, total_vec, PCI_IRQ_MSIX); - if (total_vec < 0) - return total_vec; - pf_vec = min(pf_vec, total_vec); + req_vec = pci_msix_can_alloc_dyn(dev->pdev) ? 1 : total_vec; + n = pci_alloc_irq_vectors(dev->pdev, 1, req_vec, PCI_IRQ_MSIX); + if (n < 0) + return n; - err = irq_pools_init(dev, total_vec - pf_vec, pf_vec); + err = irq_pools_init(dev, total_vec - pcif_vec, pcif_vec); if (err) pci_free_irq_vectors(dev->pdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h index 5c7e68bee43a..d3a77a0ab848 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h @@ -12,6 +12,7 @@ #define MLX5_EQ_REFS_PER_IRQ (2) struct mlx5_irq; +struct cpu_rmap; struct mlx5_irq_pool { char name[MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS]; @@ -31,7 +32,8 @@ static inline bool mlx5_irq_pool_is_sf_pool(struct mlx5_irq_pool *pool) } struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i, - const struct cpumask *affinity); + struct irq_affinity_desc *af_desc, + struct cpu_rmap **rmap); int mlx5_irq_get_locked(struct mlx5_irq *irq); int mlx5_irq_read_locked(struct mlx5_irq *irq); int mlx5_irq_put(struct mlx5_irq *irq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index a1548e6bfb35..0daeb4b72cca 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -1054,3 +1054,154 @@ out: kfree(out); return err; } + +/* speed in units of 1Mb */ +static const u32 mlx5e_link_speed[MLX5E_LINK_MODES_NUMBER] = { + [MLX5E_1000BASE_CX_SGMII] = 1000, + [MLX5E_1000BASE_KX] = 1000, + [MLX5E_10GBASE_CX4] = 10000, + [MLX5E_10GBASE_KX4] = 10000, + [MLX5E_10GBASE_KR] = 10000, + [MLX5E_20GBASE_KR2] = 20000, + [MLX5E_40GBASE_CR4] = 40000, + [MLX5E_40GBASE_KR4] = 40000, + [MLX5E_56GBASE_R4] = 56000, + [MLX5E_10GBASE_CR] = 10000, + [MLX5E_10GBASE_SR] = 10000, + [MLX5E_10GBASE_ER] = 10000, + [MLX5E_40GBASE_SR4] = 40000, + [MLX5E_40GBASE_LR4] = 40000, + [MLX5E_50GBASE_SR2] = 50000, + [MLX5E_100GBASE_CR4] = 100000, + [MLX5E_100GBASE_SR4] = 100000, + [MLX5E_100GBASE_KR4] = 100000, + [MLX5E_100GBASE_LR4] = 100000, + [MLX5E_100BASE_TX] = 100, + [MLX5E_1000BASE_T] = 1000, + [MLX5E_10GBASE_T] = 10000, + [MLX5E_25GBASE_CR] = 25000, + [MLX5E_25GBASE_KR] = 25000, + [MLX5E_25GBASE_SR] = 25000, + [MLX5E_50GBASE_CR2] = 50000, + [MLX5E_50GBASE_KR2] = 50000, +}; + +static const u32 mlx5e_ext_link_speed[MLX5E_EXT_LINK_MODES_NUMBER] = { + [MLX5E_SGMII_100M] = 100, + [MLX5E_1000BASE_X_SGMII] = 1000, + [MLX5E_5GBASE_R] = 5000, + [MLX5E_10GBASE_XFI_XAUI_1] = 10000, + [MLX5E_40GBASE_XLAUI_4_XLPPI_4] = 40000, + [MLX5E_25GAUI_1_25GBASE_CR_KR] = 25000, + [MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2] = 50000, + [MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR] = 50000, + [MLX5E_CAUI_4_100GBASE_CR4_KR4] = 100000, + [MLX5E_100GAUI_2_100GBASE_CR2_KR2] = 100000, + [MLX5E_200GAUI_4_200GBASE_CR4_KR4] = 200000, + [MLX5E_400GAUI_8] = 400000, + [MLX5E_100GAUI_1_100GBASE_CR_KR] = 100000, + [MLX5E_200GAUI_2_200GBASE_CR2_KR2] = 200000, + [MLX5E_400GAUI_4_400GBASE_CR4_KR4] = 400000, +}; + +int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext, + struct mlx5_port_eth_proto *eproto) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + int err; + + if (!eproto) + return -EINVAL; + + err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, port); + if (err) + return err; + + eproto->cap = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, + eth_proto_capability); + eproto->admin = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_admin); + eproto->oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_oper); + return 0; +} + +bool mlx5_ptys_ext_supported(struct mlx5_core_dev *mdev) +{ + struct mlx5_port_eth_proto eproto; + int err; + + if (MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet)) + return true; + + err = mlx5_port_query_eth_proto(mdev, 1, true, &eproto); + if (err) + return false; + + return !!eproto.cap; +} + +static void mlx5e_port_get_speed_arr(struct mlx5_core_dev *mdev, + const u32 **arr, u32 *size, + bool force_legacy) +{ + bool ext = force_legacy ? false : mlx5_ptys_ext_supported(mdev); + + *size = ext ? ARRAY_SIZE(mlx5e_ext_link_speed) : + ARRAY_SIZE(mlx5e_link_speed); + *arr = ext ? mlx5e_ext_link_speed : mlx5e_link_speed; +} + +u32 mlx5_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper, + bool force_legacy) +{ + unsigned long temp = eth_proto_oper; + const u32 *table; + u32 speed = 0; + u32 max_size; + int i; + + mlx5e_port_get_speed_arr(mdev, &table, &max_size, force_legacy); + i = find_first_bit(&temp, max_size); + if (i < max_size) + speed = table[i]; + return speed; +} + +u32 mlx5_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed, + bool force_legacy) +{ + u32 link_modes = 0; + const u32 *table; + u32 max_size; + int i; + + mlx5e_port_get_speed_arr(mdev, &table, &max_size, force_legacy); + for (i = 0; i < max_size; ++i) { + if (table[i] == speed) + link_modes |= MLX5E_PROT_MASK(i); + } + return link_modes; +} + +int mlx5_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) +{ + struct mlx5_port_eth_proto eproto; + u32 max_speed = 0; + const u32 *table; + u32 max_size; + bool ext; + int err; + int i; + + ext = mlx5_ptys_ext_supported(mdev); + err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto); + if (err) + return err; + + mlx5e_port_get_speed_arr(mdev, &table, &max_size, false); + for (i = 0; i < max_size; ++i) + if (eproto.cap & MLX5E_PROT_MASK(i)) + max_speed = max(max_speed, table[i]); + + *speed = max_speed; + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c index a7377619ba6f..e2f26d0bc615 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c @@ -28,7 +28,7 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia mdev->priv.adev_idx = adev->id; sf_dev->mdev = mdev; - err = mlx5_mdev_init(mdev, MLX5_DEFAULT_PROF); + err = mlx5_mdev_init(mdev, MLX5_SF_PROF); if (err) { mlx5_core_warn(mdev, "mlx5_mdev_init on err=%d\n", err); goto mdev_err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c index ee104cf04392..0eb9a8d7f282 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c @@ -819,14 +819,34 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, case DR_ACTION_TYP_TNL_L2_TO_L2: break; case DR_ACTION_TYP_TNL_L3_TO_L2: - attr.decap_index = action->rewrite->index; - attr.decap_actions = action->rewrite->num_of_actions; - attr.decap_with_vlan = - attr.decap_actions == WITH_VLAN_NUM_HW_ACTIONS; + if (action->rewrite->ptrn && action->rewrite->arg) { + attr.decap_index = mlx5dr_arg_get_obj_id(action->rewrite->arg); + attr.decap_actions = action->rewrite->ptrn->num_of_actions; + attr.decap_pat_idx = action->rewrite->ptrn->index; + } else { + attr.decap_index = action->rewrite->index; + attr.decap_actions = action->rewrite->num_of_actions; + attr.decap_with_vlan = + attr.decap_actions == WITH_VLAN_NUM_HW_ACTIONS; + attr.decap_pat_idx = MLX5DR_INVALID_PATTERN_INDEX; + } break; case DR_ACTION_TYP_MODIFY_HDR: - attr.modify_index = action->rewrite->index; - attr.modify_actions = action->rewrite->num_of_actions; + if (action->rewrite->single_action_opt) { + attr.modify_actions = action->rewrite->num_of_actions; + attr.single_modify_action = action->rewrite->data; + } else { + if (action->rewrite->ptrn && action->rewrite->arg) { + attr.modify_index = + mlx5dr_arg_get_obj_id(action->rewrite->arg); + attr.modify_actions = action->rewrite->ptrn->num_of_actions; + attr.modify_pat_idx = action->rewrite->ptrn->index; + } else { + attr.modify_index = action->rewrite->index; + attr.modify_actions = action->rewrite->num_of_actions; + attr.modify_pat_idx = MLX5DR_INVALID_PATTERN_INDEX; + } + } if (action->rewrite->modify_ttl) dr_action_modify_ttl_adjust(dmn, &attr, rx_rule, &recalc_cs_required); @@ -1365,8 +1385,6 @@ out_err: return -EINVAL; } -#define ACTION_CACHE_LINE_SIZE 64 - static int dr_action_create_reformat_action(struct mlx5dr_domain *dmn, u8 reformat_param_0, u8 reformat_param_1, @@ -1403,36 +1421,25 @@ dr_action_create_reformat_action(struct mlx5dr_domain *dmn, } case DR_ACTION_TYP_TNL_L3_TO_L2: { - u8 hw_actions[ACTION_CACHE_LINE_SIZE] = {}; + u8 hw_actions[DR_ACTION_CACHE_LINE_SIZE] = {}; int ret; ret = mlx5dr_ste_set_action_decap_l3_list(dmn->ste_ctx, data, data_sz, hw_actions, - ACTION_CACHE_LINE_SIZE, + DR_ACTION_CACHE_LINE_SIZE, &action->rewrite->num_of_actions); if (ret) { mlx5dr_dbg(dmn, "Failed creating decap l3 action list\n"); return ret; } - action->rewrite->chunk = mlx5dr_icm_alloc_chunk(dmn->action_icm_pool, - DR_CHUNK_SIZE_8); - if (!action->rewrite->chunk) { - mlx5dr_dbg(dmn, "Failed allocating modify header chunk\n"); - return -ENOMEM; - } - - action->rewrite->data = (void *)hw_actions; - action->rewrite->index = (mlx5dr_icm_pool_get_chunk_icm_addr - (action->rewrite->chunk) - - dmn->info.caps.hdr_modify_icm_addr) / - ACTION_CACHE_LINE_SIZE; + action->rewrite->data = hw_actions; + action->rewrite->dmn = dmn; - ret = mlx5dr_send_postsend_action(dmn, action); + ret = mlx5dr_ste_alloc_modify_hdr(action); if (ret) { - mlx5dr_dbg(dmn, "Writing decap l3 actions to ICM failed\n"); - mlx5dr_icm_free_chunk(action->rewrite->chunk); + mlx5dr_dbg(dmn, "Failed preparing reformat data\n"); return ret; } return 0; @@ -1963,7 +1970,6 @@ static int dr_action_create_modify_action(struct mlx5dr_domain *dmn, __be64 actions[], struct mlx5dr_action *action) { - struct mlx5dr_icm_chunk *chunk; u32 max_hw_actions; u32 num_hw_actions; u32 num_sw_actions; @@ -1980,15 +1986,9 @@ static int dr_action_create_modify_action(struct mlx5dr_domain *dmn, return -EINVAL; } - chunk = mlx5dr_icm_alloc_chunk(dmn->action_icm_pool, DR_CHUNK_SIZE_16); - if (!chunk) - return -ENOMEM; - hw_actions = kcalloc(1, max_hw_actions * DR_MODIFY_ACTION_SIZE, GFP_KERNEL); - if (!hw_actions) { - ret = -ENOMEM; - goto free_chunk; - } + if (!hw_actions) + return -ENOMEM; ret = dr_actions_convert_modify_header(action, max_hw_actions, @@ -2000,24 +2000,24 @@ static int dr_action_create_modify_action(struct mlx5dr_domain *dmn, if (ret) goto free_hw_actions; - action->rewrite->chunk = chunk; action->rewrite->modify_ttl = modify_ttl; action->rewrite->data = (u8 *)hw_actions; action->rewrite->num_of_actions = num_hw_actions; - action->rewrite->index = (mlx5dr_icm_pool_get_chunk_icm_addr(chunk) - - dmn->info.caps.hdr_modify_icm_addr) / - ACTION_CACHE_LINE_SIZE; - ret = mlx5dr_send_postsend_action(dmn, action); - if (ret) - goto free_hw_actions; + if (num_hw_actions == 1 && + dmn->info.caps.sw_format_ver >= MLX5_STEERING_FORMAT_CONNECTX_6DX) { + action->rewrite->single_action_opt = true; + } else { + action->rewrite->single_action_opt = false; + ret = mlx5dr_ste_alloc_modify_hdr(action); + if (ret) + goto free_hw_actions; + } return 0; free_hw_actions: kfree(hw_actions); -free_chunk: - mlx5dr_icm_free_chunk(chunk); return ret; } @@ -2162,7 +2162,8 @@ int mlx5dr_action_destroy(struct mlx5dr_action *action) refcount_dec(&action->reformat->dmn->refcount); break; case DR_ACTION_TYP_TNL_L3_TO_L2: - mlx5dr_icm_free_chunk(action->rewrite->chunk); + mlx5dr_ste_free_modify_hdr(action); + kfree(action->rewrite->data); refcount_dec(&action->rewrite->dmn->refcount); break; case DR_ACTION_TYP_L2_TO_TNL_L2: @@ -2173,7 +2174,8 @@ int mlx5dr_action_destroy(struct mlx5dr_action *action) refcount_dec(&action->reformat->dmn->refcount); break; case DR_ACTION_TYP_MODIFY_HDR: - mlx5dr_icm_free_chunk(action->rewrite->chunk); + if (!action->rewrite->single_action_opt) + mlx5dr_ste_free_modify_hdr(action); kfree(action->rewrite->data); refcount_dec(&action->rewrite->dmn->refcount); break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_arg.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_arg.c new file mode 100644 index 000000000000..01ed6442095d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_arg.c @@ -0,0 +1,273 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "dr_types.h" + +#define DR_ICM_MODIFY_HDR_GRANULARITY_4K 12 + +/* modify-header arg pool */ +enum dr_arg_chunk_size { + DR_ARG_CHUNK_SIZE_1, + DR_ARG_CHUNK_SIZE_MIN = DR_ARG_CHUNK_SIZE_1, /* keep updated when changing */ + DR_ARG_CHUNK_SIZE_2, + DR_ARG_CHUNK_SIZE_3, + DR_ARG_CHUNK_SIZE_4, + DR_ARG_CHUNK_SIZE_MAX, +}; + +/* argument pool area */ +struct dr_arg_pool { + enum dr_arg_chunk_size log_chunk_size; + struct mlx5dr_domain *dmn; + struct list_head free_list; + struct mutex mutex; /* protect arg pool */ +}; + +struct mlx5dr_arg_mgr { + struct mlx5dr_domain *dmn; + struct dr_arg_pool *pools[DR_ARG_CHUNK_SIZE_MAX]; +}; + +static int dr_arg_pool_alloc_objs(struct dr_arg_pool *pool) +{ + struct mlx5dr_arg_obj *arg_obj, *tmp_arg; + struct list_head cur_list; + u16 object_range; + int num_of_objects; + u32 obj_id = 0; + int i, ret; + + INIT_LIST_HEAD(&cur_list); + + object_range = + pool->dmn->info.caps.log_header_modify_argument_granularity; + + object_range = + max_t(u32, pool->dmn->info.caps.log_header_modify_argument_granularity, + DR_ICM_MODIFY_HDR_GRANULARITY_4K); + object_range = + min_t(u32, pool->dmn->info.caps.log_header_modify_argument_max_alloc, + object_range); + + if (pool->log_chunk_size > object_range) { + mlx5dr_err(pool->dmn, "Required chunk size (%d) is not supported\n", + pool->log_chunk_size); + return -ENOMEM; + } + + num_of_objects = (1 << (object_range - pool->log_chunk_size)); + /* Only one devx object per range */ + ret = mlx5dr_cmd_create_modify_header_arg(pool->dmn->mdev, + object_range, + pool->dmn->pdn, + &obj_id); + if (ret) { + mlx5dr_err(pool->dmn, "failed allocating object with range: %d:\n", + object_range); + return -EAGAIN; + } + + for (i = 0; i < num_of_objects; i++) { + arg_obj = kzalloc(sizeof(*arg_obj), GFP_KERNEL); + if (!arg_obj) { + ret = -ENOMEM; + goto clean_arg_obj; + } + + arg_obj->log_chunk_size = pool->log_chunk_size; + + list_add_tail(&arg_obj->list_node, &cur_list); + + arg_obj->obj_id = obj_id; + arg_obj->obj_offset = i * (1 << pool->log_chunk_size); + } + list_splice_tail_init(&cur_list, &pool->free_list); + + return 0; + +clean_arg_obj: + mlx5dr_cmd_destroy_modify_header_arg(pool->dmn->mdev, obj_id); + list_for_each_entry_safe(arg_obj, tmp_arg, &cur_list, list_node) { + list_del(&arg_obj->list_node); + kfree(arg_obj); + } + return ret; +} + +static struct mlx5dr_arg_obj *dr_arg_pool_get_arg_obj(struct dr_arg_pool *pool) +{ + struct mlx5dr_arg_obj *arg_obj = NULL; + int ret; + + mutex_lock(&pool->mutex); + if (list_empty(&pool->free_list)) { + ret = dr_arg_pool_alloc_objs(pool); + if (ret) + goto out; + } + + arg_obj = list_first_entry_or_null(&pool->free_list, + struct mlx5dr_arg_obj, + list_node); + WARN(!arg_obj, "couldn't get dr arg obj from pool"); + + if (arg_obj) + list_del_init(&arg_obj->list_node); + +out: + mutex_unlock(&pool->mutex); + return arg_obj; +} + +static void dr_arg_pool_put_arg_obj(struct dr_arg_pool *pool, + struct mlx5dr_arg_obj *arg_obj) +{ + mutex_lock(&pool->mutex); + list_add(&arg_obj->list_node, &pool->free_list); + mutex_unlock(&pool->mutex); +} + +static struct dr_arg_pool *dr_arg_pool_create(struct mlx5dr_domain *dmn, + enum dr_arg_chunk_size chunk_size) +{ + struct dr_arg_pool *pool; + + pool = kzalloc(sizeof(*pool), GFP_KERNEL); + if (!pool) + return NULL; + + pool->dmn = dmn; + + INIT_LIST_HEAD(&pool->free_list); + mutex_init(&pool->mutex); + + pool->log_chunk_size = chunk_size; + if (dr_arg_pool_alloc_objs(pool)) + goto free_pool; + + return pool; + +free_pool: + kfree(pool); + + return NULL; +} + +static void dr_arg_pool_destroy(struct dr_arg_pool *pool) +{ + struct mlx5dr_arg_obj *arg_obj, *tmp_arg; + + list_for_each_entry_safe(arg_obj, tmp_arg, &pool->free_list, list_node) { + list_del(&arg_obj->list_node); + if (!arg_obj->obj_offset) /* the first in range */ + mlx5dr_cmd_destroy_modify_header_arg(pool->dmn->mdev, arg_obj->obj_id); + kfree(arg_obj); + } + + mutex_destroy(&pool->mutex); + kfree(pool); +} + +static enum dr_arg_chunk_size dr_arg_get_chunk_size(u16 num_of_actions) +{ + if (num_of_actions <= 8) + return DR_ARG_CHUNK_SIZE_1; + if (num_of_actions <= 16) + return DR_ARG_CHUNK_SIZE_2; + if (num_of_actions <= 32) + return DR_ARG_CHUNK_SIZE_3; + if (num_of_actions <= 64) + return DR_ARG_CHUNK_SIZE_4; + + return DR_ARG_CHUNK_SIZE_MAX; +} + +u32 mlx5dr_arg_get_obj_id(struct mlx5dr_arg_obj *arg_obj) +{ + return (arg_obj->obj_id + arg_obj->obj_offset); +} + +struct mlx5dr_arg_obj *mlx5dr_arg_get_obj(struct mlx5dr_arg_mgr *mgr, + u16 num_of_actions, + u8 *data) +{ + u32 size = dr_arg_get_chunk_size(num_of_actions); + struct mlx5dr_arg_obj *arg_obj; + int ret; + + if (size >= DR_ARG_CHUNK_SIZE_MAX) + return NULL; + + arg_obj = dr_arg_pool_get_arg_obj(mgr->pools[size]); + if (!arg_obj) { + mlx5dr_err(mgr->dmn, "Failed allocating args object for modify header\n"); + return NULL; + } + + /* write it into the hw */ + ret = mlx5dr_send_postsend_args(mgr->dmn, + mlx5dr_arg_get_obj_id(arg_obj), + num_of_actions, data); + if (ret) { + mlx5dr_err(mgr->dmn, "Failed writing args object\n"); + goto put_obj; + } + + return arg_obj; + +put_obj: + mlx5dr_arg_put_obj(mgr, arg_obj); + return NULL; +} + +void mlx5dr_arg_put_obj(struct mlx5dr_arg_mgr *mgr, + struct mlx5dr_arg_obj *arg_obj) +{ + dr_arg_pool_put_arg_obj(mgr->pools[arg_obj->log_chunk_size], arg_obj); +} + +struct mlx5dr_arg_mgr* +mlx5dr_arg_mgr_create(struct mlx5dr_domain *dmn) +{ + struct mlx5dr_arg_mgr *pool_mgr; + int i; + + if (!mlx5dr_domain_is_support_ptrn_arg(dmn)) + return NULL; + + pool_mgr = kzalloc(sizeof(*pool_mgr), GFP_KERNEL); + if (!pool_mgr) + return NULL; + + pool_mgr->dmn = dmn; + + for (i = 0; i < DR_ARG_CHUNK_SIZE_MAX; i++) { + pool_mgr->pools[i] = dr_arg_pool_create(dmn, i); + if (!pool_mgr->pools[i]) + goto clean_pools; + } + + return pool_mgr; + +clean_pools: + for (i--; i >= 0; i--) + dr_arg_pool_destroy(pool_mgr->pools[i]); + + kfree(pool_mgr); + return NULL; +} + +void mlx5dr_arg_mgr_destroy(struct mlx5dr_arg_mgr *mgr) +{ + struct dr_arg_pool **pools; + int i; + + if (!mgr) + return; + + pools = mgr->pools; + for (i = 0; i < DR_ARG_CHUNK_SIZE_MAX; i++) + dr_arg_pool_destroy(pools[i]); + + kfree(mgr); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c index 07b6a6dcb92f..3835ba3f4dda 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c @@ -132,6 +132,17 @@ int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev, caps->isolate_vl_tc = MLX5_CAP_GEN(mdev, isolate_vl_tc_new); + caps->support_modify_argument = + MLX5_CAP_GEN_64(mdev, general_obj_types) & + MLX5_GENERAL_OBJ_TYPES_CAP_HEADER_MODIFY_ARGUMENT; + + if (caps->support_modify_argument) { + caps->log_header_modify_argument_granularity = + MLX5_CAP_GEN(mdev, log_header_modify_argument_granularity); + caps->log_header_modify_argument_max_alloc = + MLX5_CAP_GEN(mdev, log_header_modify_argument_max_alloc); + } + /* geneve_tlv_option_0_exist is the indication of * STE support for lookup type flex_parser_ok */ @@ -200,6 +211,12 @@ int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev, caps->hdr_modify_icm_addr = MLX5_CAP64_DEV_MEM(mdev, header_modify_sw_icm_start_address); + caps->log_modify_pattern_icm_size = + MLX5_CAP_DEV_MEM(mdev, log_header_modify_pattern_sw_icm_size); + + caps->hdr_modify_pattern_icm_addr = + MLX5_CAP64_DEV_MEM(mdev, header_modify_pattern_sw_icm_start_address); + caps->roce_min_src_udp = MLX5_CAP_ROCE(mdev, r_roce_min_src_udp_port); caps->is_ecpf = mlx5_core_is_ecpf_esw_manager(mdev); @@ -676,6 +693,49 @@ int mlx5dr_cmd_query_gid(struct mlx5_core_dev *mdev, u8 vhca_port_num, return 0; } +int mlx5dr_cmd_create_modify_header_arg(struct mlx5_core_dev *dev, + u16 log_obj_range, u32 pd, + u32 *obj_id) +{ + u32 in[MLX5_ST_SZ_DW(create_modify_header_arg_in)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; + void *attr; + int ret; + + attr = MLX5_ADDR_OF(create_modify_header_arg_in, in, hdr); + MLX5_SET(general_obj_in_cmd_hdr, attr, opcode, + MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, attr, obj_type, + MLX5_OBJ_TYPE_HEADER_MODIFY_ARGUMENT); + MLX5_SET(general_obj_in_cmd_hdr, attr, + op_param.create.log_obj_range, log_obj_range); + + attr = MLX5_ADDR_OF(create_modify_header_arg_in, in, arg); + MLX5_SET(modify_header_arg, attr, access_pd, pd); + + ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (ret) + return ret; + + *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + return 0; +} + +void mlx5dr_cmd_destroy_modify_header_arg(struct mlx5_core_dev *dev, + u32 obj_id) +{ + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, + MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, + MLX5_OBJ_TYPE_HEADER_MODIFY_ARGUMENT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id); + + mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + static int mlx5dr_cmd_set_extended_dest(struct mlx5_core_dev *dev, struct mlx5dr_cmd_fte_info *fte, bool *extended_dest) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c index db81d881d38e..7e36e1062139 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c @@ -4,6 +4,7 @@ #include <linux/debugfs.h> #include <linux/kernel.h> #include <linux/seq_file.h> +#include <linux/version.h> #include "dr_types.h" #define DR_DBG_PTR_TO_ID(p) ((u64)(uintptr_t)(p) & 0xFFFFFFFFULL) @@ -140,10 +141,33 @@ dr_dump_rule_action_mem(struct seq_file *file, const u64 rule_id, action->flow_tag->flow_tag); break; case DR_ACTION_TYP_MODIFY_HDR: - seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n", + { + struct mlx5dr_ptrn_obj *ptrn = action->rewrite->ptrn; + struct mlx5dr_arg_obj *arg = action->rewrite->arg; + u8 *rewrite_data = action->rewrite->data; + bool ptrn_arg; + int i; + + ptrn_arg = !action->rewrite->single_action_opt && ptrn && arg; + + seq_printf(file, "%d,0x%llx,0x%llx,0x%x,%d,0x%x,0x%x,0x%x", DR_DUMP_REC_TYPE_ACTION_MODIFY_HDR, action_id, - rule_id, action->rewrite->index); + rule_id, action->rewrite->index, + action->rewrite->single_action_opt, + ptrn_arg ? action->rewrite->num_of_actions : 0, + ptrn_arg ? ptrn->index : 0, + ptrn_arg ? mlx5dr_arg_get_obj_id(arg) : 0); + + if (ptrn_arg) { + for (i = 0; i < action->rewrite->num_of_actions; i++) { + seq_printf(file, ",0x%016llx", + be64_to_cpu(((__be64 *)rewrite_data)[i])); + } + } + + seq_puts(file, "\n"); break; + } case DR_ACTION_TYP_VPORT: seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n", DR_DUMP_REC_TYPE_ACTION_VPORT, action_id, rule_id, @@ -157,7 +181,10 @@ dr_dump_rule_action_mem(struct seq_file *file, const u64 rule_id, case DR_ACTION_TYP_TNL_L3_TO_L2: seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n", DR_DUMP_REC_TYPE_ACTION_DECAP_L3, action_id, - rule_id, action->rewrite->index); + rule_id, + (action->rewrite->ptrn && action->rewrite->arg) ? + mlx5dr_arg_get_obj_id(action->rewrite->arg) : + action->rewrite->index); break; case DR_ACTION_TYP_L2_TO_TNL_L2: seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n", @@ -606,9 +633,18 @@ dr_dump_domain(struct seq_file *file, struct mlx5dr_domain *dmn) u64 domain_id = DR_DBG_PTR_TO_ID(dmn); int ret; - seq_printf(file, "%d,0x%llx,%d,0%x,%d,%s\n", DR_DUMP_REC_TYPE_DOMAIN, + seq_printf(file, "%d,0x%llx,%d,0%x,%d,%u.%u.%u,%s,%d,%u,%u,%u\n", + DR_DUMP_REC_TYPE_DOMAIN, domain_id, dmn->type, dmn->info.caps.gvmi, - dmn->info.supp_sw_steering, pci_name(dmn->mdev->pdev)); + dmn->info.supp_sw_steering, + /* package version */ + LINUX_VERSION_MAJOR, LINUX_VERSION_PATCHLEVEL, + LINUX_VERSION_SUBLEVEL, + pci_name(dmn->mdev->pdev), + 0, /* domain flags */ + dmn->num_buddies[DR_ICM_TYPE_STE], + dmn->num_buddies[DR_ICM_TYPE_MODIFY_ACTION], + dmn->num_buddies[DR_ICM_TYPE_MODIFY_HDR_PTRN]); ret = dr_dump_domain_info(file, &dmn->info, domain_id); if (ret < 0) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c index 5b8bb2ca31e6..9a2dfe6ebe31 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c @@ -10,6 +10,46 @@ ((dmn)->info.caps.dmn_type##_sw_owner_v2 && \ (dmn)->info.caps.sw_format_ver <= MLX5_STEERING_FORMAT_CONNECTX_7)) +bool mlx5dr_domain_is_support_ptrn_arg(struct mlx5dr_domain *dmn) +{ + return dmn->info.caps.sw_format_ver >= MLX5_STEERING_FORMAT_CONNECTX_6DX && + dmn->info.caps.support_modify_argument; +} + +static int dr_domain_init_modify_header_resources(struct mlx5dr_domain *dmn) +{ + if (!mlx5dr_domain_is_support_ptrn_arg(dmn)) + return 0; + + dmn->ptrn_mgr = mlx5dr_ptrn_mgr_create(dmn); + if (!dmn->ptrn_mgr) { + mlx5dr_err(dmn, "Couldn't create ptrn_mgr\n"); + return -ENOMEM; + } + + /* create argument pool */ + dmn->arg_mgr = mlx5dr_arg_mgr_create(dmn); + if (!dmn->arg_mgr) { + mlx5dr_err(dmn, "Couldn't create arg_mgr\n"); + goto free_modify_header_pattern; + } + + return 0; + +free_modify_header_pattern: + mlx5dr_ptrn_mgr_destroy(dmn->ptrn_mgr); + return -ENOMEM; +} + +static void dr_domain_destroy_modify_header_resources(struct mlx5dr_domain *dmn) +{ + if (!mlx5dr_domain_is_support_ptrn_arg(dmn)) + return; + + mlx5dr_arg_mgr_destroy(dmn->arg_mgr); + mlx5dr_ptrn_mgr_destroy(dmn->ptrn_mgr); +} + static void dr_domain_init_csum_recalc_fts(struct mlx5dr_domain *dmn) { /* Per vport cached FW FT for checksum recalculation, this @@ -149,14 +189,22 @@ static int dr_domain_init_resources(struct mlx5dr_domain *dmn) goto clean_uar; } + ret = dr_domain_init_modify_header_resources(dmn); + if (ret) { + mlx5dr_err(dmn, "Couldn't create modify-header-resources\n"); + goto clean_mem_resources; + } + ret = mlx5dr_send_ring_alloc(dmn); if (ret) { mlx5dr_err(dmn, "Couldn't create send-ring\n"); - goto clean_mem_resources; + goto clean_modify_hdr; } return 0; +clean_modify_hdr: + dr_domain_destroy_modify_header_resources(dmn); clean_mem_resources: dr_domain_uninit_mem_resources(dmn); clean_uar: @@ -170,6 +218,7 @@ clean_pd: static void dr_domain_uninit_resources(struct mlx5dr_domain *dmn) { mlx5dr_send_ring_free(dmn, dmn->send_ring); + dr_domain_destroy_modify_header_resources(dmn); dr_domain_uninit_mem_resources(dmn); mlx5_put_uars_page(dmn->mdev, dmn->uar); mlx5_core_dealloc_pd(dmn->mdev, dmn->pdn); @@ -215,7 +264,7 @@ static int dr_domain_query_vport(struct mlx5dr_domain *dmn, return 0; } -static int dr_domain_query_esw_mngr(struct mlx5dr_domain *dmn) +static int dr_domain_query_esw_mgr(struct mlx5dr_domain *dmn) { return dr_domain_query_vport(dmn, 0, false, &dmn->info.caps.vports.esw_manager_caps); @@ -321,7 +370,7 @@ static int dr_domain_query_fdb_caps(struct mlx5_core_dev *mdev, * vports (vport 0, VFs and SFs) will be queried dynamically. */ - ret = dr_domain_query_esw_mngr(dmn); + ret = dr_domain_query_esw_mgr(dmn); if (ret) { mlx5dr_err(dmn, "Failed to query eswitch manager vport caps (err: %d)", ret); goto free_vports_caps_xa; @@ -435,6 +484,9 @@ mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type) dmn->info.max_log_action_icm_sz = DR_CHUNK_SIZE_4K; dmn->info.max_log_sw_icm_sz = min_t(u32, DR_CHUNK_SIZE_1024K, dmn->info.caps.log_icm_size); + dmn->info.max_log_modify_hdr_pattern_icm_sz = + min_t(u32, DR_CHUNK_SIZE_4K, + dmn->info.caps.log_modify_pattern_icm_size); if (!dmn->info.supp_sw_steering) { mlx5dr_err(dmn, "SW steering is not supported\n"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c index 3eb6719bc8eb..0b5af9f3f605 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c @@ -4,7 +4,9 @@ #include "dr_types.h" #define DR_ICM_MODIFY_HDR_ALIGN_BASE 64 -#define DR_ICM_POOL_HOT_MEMORY_FRACTION 4 +#define DR_ICM_POOL_STE_HOT_MEM_PERCENT 25 +#define DR_ICM_POOL_MODIFY_HDR_PTRN_HOT_MEM_PERCENT 50 +#define DR_ICM_POOL_MODIFY_ACTION_HOT_MEM_PERCENT 90 struct mlx5dr_icm_hot_chunk { struct mlx5dr_icm_buddy_mem *buddy_mem; @@ -29,6 +31,8 @@ struct mlx5dr_icm_pool { struct mlx5dr_icm_hot_chunk *hot_chunks_arr; u32 hot_chunks_num; u64 hot_memory_size; + /* hot memory size threshold for triggering sync */ + u64 th; }; struct mlx5dr_icm_dm { @@ -107,9 +111,9 @@ static struct mlx5dr_icm_mr * dr_icm_pool_mr_create(struct mlx5dr_icm_pool *pool) { struct mlx5_core_dev *mdev = pool->dmn->mdev; - enum mlx5_sw_icm_type dm_type; + enum mlx5_sw_icm_type dm_type = 0; struct mlx5dr_icm_mr *icm_mr; - size_t log_align_base; + size_t log_align_base = 0; int err; icm_mr = kvzalloc(sizeof(*icm_mr), GFP_KERNEL); @@ -121,14 +125,25 @@ dr_icm_pool_mr_create(struct mlx5dr_icm_pool *pool) icm_mr->dm.length = mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz, pool->icm_type); - if (pool->icm_type == DR_ICM_TYPE_STE) { + switch (pool->icm_type) { + case DR_ICM_TYPE_STE: dm_type = MLX5_SW_ICM_TYPE_STEERING; log_align_base = ilog2(icm_mr->dm.length); - } else { + break; + case DR_ICM_TYPE_MODIFY_ACTION: dm_type = MLX5_SW_ICM_TYPE_HEADER_MODIFY; /* Align base is 64B */ log_align_base = ilog2(DR_ICM_MODIFY_HDR_ALIGN_BASE); + break; + case DR_ICM_TYPE_MODIFY_HDR_PTRN: + dm_type = MLX5_SW_ICM_TYPE_HEADER_MODIFY_PATTERN; + /* Align base is 64B */ + log_align_base = ilog2(DR_ICM_MODIFY_HDR_ALIGN_BASE); + break; + default: + WARN_ON(pool->icm_type); } + icm_mr->dm.type = dm_type; err = mlx5_dm_sw_icm_alloc(mdev, icm_mr->dm.type, icm_mr->dm.length, @@ -273,6 +288,8 @@ static int dr_icm_buddy_create(struct mlx5dr_icm_pool *pool) /* add it to the -start- of the list in order to search in it first */ list_add(&buddy->list_node, &pool->buddy_mem_list); + pool->dmn->num_buddies[pool->icm_type]++; + return 0; err_cleanup_buddy: @@ -286,13 +303,17 @@ free_mr: static void dr_icm_buddy_destroy(struct mlx5dr_icm_buddy_mem *buddy) { + enum mlx5dr_icm_type icm_type = buddy->pool->icm_type; + dr_icm_pool_mr_destroy(buddy->icm_mr); mlx5dr_buddy_cleanup(buddy); - if (buddy->pool->icm_type == DR_ICM_TYPE_STE) + if (icm_type == DR_ICM_TYPE_STE) dr_icm_buddy_cleanup_ste_cache(buddy); + buddy->pool->dmn->num_buddies[icm_type]--; + kvfree(buddy); } @@ -319,15 +340,7 @@ dr_icm_chunk_init(struct mlx5dr_icm_chunk *chunk, static bool dr_icm_pool_is_sync_required(struct mlx5dr_icm_pool *pool) { - int allow_hot_size; - - /* sync when hot memory reaches a certain fraction of the pool size */ - allow_hot_size = - mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz, - pool->icm_type) / - DR_ICM_POOL_HOT_MEMORY_FRACTION; - - return pool->hot_memory_size > allow_hot_size; + return pool->hot_memory_size > pool->th; } static void dr_icm_pool_clear_hot_chunks_arr(struct mlx5dr_icm_pool *pool) @@ -492,14 +505,9 @@ void mlx5dr_icm_pool_free_htbl(struct mlx5dr_icm_pool *pool, struct mlx5dr_ste_h struct mlx5dr_icm_pool *mlx5dr_icm_pool_create(struct mlx5dr_domain *dmn, enum mlx5dr_icm_type icm_type) { - u32 num_of_chunks, entry_size, max_hot_size; - enum mlx5dr_icm_chunk_size max_log_chunk_sz; + u32 num_of_chunks, entry_size; struct mlx5dr_icm_pool *pool; - - if (icm_type == DR_ICM_TYPE_STE) - max_log_chunk_sz = dmn->info.max_log_sw_icm_sz; - else - max_log_chunk_sz = dmn->info.max_log_action_icm_sz; + u32 max_hot_size = 0; pool = kvzalloc(sizeof(*pool), GFP_KERNEL); if (!pool) @@ -507,20 +515,38 @@ struct mlx5dr_icm_pool *mlx5dr_icm_pool_create(struct mlx5dr_domain *dmn, pool->dmn = dmn; pool->icm_type = icm_type; - pool->max_log_chunk_sz = max_log_chunk_sz; pool->chunks_kmem_cache = dmn->chunks_kmem_cache; INIT_LIST_HEAD(&pool->buddy_mem_list); - mutex_init(&pool->mutex); - entry_size = mlx5dr_icm_pool_dm_type_to_entry_size(pool->icm_type); + switch (icm_type) { + case DR_ICM_TYPE_STE: + pool->max_log_chunk_sz = dmn->info.max_log_sw_icm_sz; + max_hot_size = mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz, + pool->icm_type) * + DR_ICM_POOL_STE_HOT_MEM_PERCENT / 100; + break; + case DR_ICM_TYPE_MODIFY_ACTION: + pool->max_log_chunk_sz = dmn->info.max_log_action_icm_sz; + max_hot_size = mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz, + pool->icm_type) * + DR_ICM_POOL_MODIFY_ACTION_HOT_MEM_PERCENT / 100; + break; + case DR_ICM_TYPE_MODIFY_HDR_PTRN: + pool->max_log_chunk_sz = dmn->info.max_log_modify_hdr_pattern_icm_sz; + max_hot_size = mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz, + pool->icm_type) * + DR_ICM_POOL_MODIFY_HDR_PTRN_HOT_MEM_PERCENT / 100; + break; + default: + WARN_ON(icm_type); + } - max_hot_size = mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz, - pool->icm_type) / - DR_ICM_POOL_HOT_MEMORY_FRACTION; + entry_size = mlx5dr_icm_pool_dm_type_to_entry_size(pool->icm_type); num_of_chunks = DIV_ROUND_UP(max_hot_size, entry_size) + 1; + pool->th = max_hot_size; pool->hot_chunks_arr = kvcalloc(num_of_chunks, sizeof(struct mlx5dr_icm_hot_chunk), diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c new file mode 100644 index 000000000000..13e06a6a6b22 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c @@ -0,0 +1,241 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "dr_types.h" +#include "mlx5_ifc_dr_ste_v1.h" + +enum dr_ptrn_modify_hdr_action_id { + DR_PTRN_MODIFY_HDR_ACTION_ID_NOP = 0x00, + DR_PTRN_MODIFY_HDR_ACTION_ID_COPY = 0x05, + DR_PTRN_MODIFY_HDR_ACTION_ID_SET = 0x06, + DR_PTRN_MODIFY_HDR_ACTION_ID_ADD = 0x07, + DR_PTRN_MODIFY_HDR_ACTION_ID_INSERT_INLINE = 0x0a, +}; + +struct mlx5dr_ptrn_mgr { + struct mlx5dr_domain *dmn; + struct mlx5dr_icm_pool *ptrn_icm_pool; + /* cache for modify_header ptrn */ + struct list_head ptrn_list; + struct mutex modify_hdr_mutex; /* protect the pattern cache */ +}; + +/* Cache structure and functions */ +static bool dr_ptrn_compare_modify_hdr(size_t cur_num_of_actions, + __be64 cur_hw_actions[], + size_t num_of_actions, + __be64 hw_actions[]) +{ + int i; + + if (cur_num_of_actions != num_of_actions) + return false; + + for (i = 0; i < num_of_actions; i++) { + u8 action_id = + MLX5_GET(ste_double_action_set_v1, &hw_actions[i], action_id); + + if (action_id == DR_PTRN_MODIFY_HDR_ACTION_ID_COPY) { + if (hw_actions[i] != cur_hw_actions[i]) + return false; + } else { + if ((__force __be32)hw_actions[i] != + (__force __be32)cur_hw_actions[i]) + return false; + } + } + + return true; +} + +static struct mlx5dr_ptrn_obj * +dr_ptrn_find_cached_pattern(struct mlx5dr_ptrn_mgr *mgr, + size_t num_of_actions, + __be64 hw_actions[]) +{ + struct mlx5dr_ptrn_obj *cached_pattern; + struct mlx5dr_ptrn_obj *tmp; + + list_for_each_entry_safe(cached_pattern, tmp, &mgr->ptrn_list, list) { + if (dr_ptrn_compare_modify_hdr(cached_pattern->num_of_actions, + (__be64 *)cached_pattern->data, + num_of_actions, + hw_actions)) { + /* Put this pattern in the head of the list, + * as we will probably use it more. + */ + list_del_init(&cached_pattern->list); + list_add(&cached_pattern->list, &mgr->ptrn_list); + return cached_pattern; + } + } + + return NULL; +} + +static struct mlx5dr_ptrn_obj * +dr_ptrn_alloc_pattern(struct mlx5dr_ptrn_mgr *mgr, + u16 num_of_actions, u8 *data) +{ + struct mlx5dr_ptrn_obj *pattern; + struct mlx5dr_icm_chunk *chunk; + u32 chunk_size; + u32 index; + + chunk_size = ilog2(num_of_actions); + /* HW modify action index granularity is at least 64B */ + chunk_size = max_t(u32, chunk_size, DR_CHUNK_SIZE_8); + + chunk = mlx5dr_icm_alloc_chunk(mgr->ptrn_icm_pool, chunk_size); + if (!chunk) + return NULL; + + index = (mlx5dr_icm_pool_get_chunk_icm_addr(chunk) - + mgr->dmn->info.caps.hdr_modify_pattern_icm_addr) / + DR_ACTION_CACHE_LINE_SIZE; + + pattern = kzalloc(sizeof(*pattern), GFP_KERNEL); + if (!pattern) + goto free_chunk; + + pattern->data = kzalloc(num_of_actions * DR_MODIFY_ACTION_SIZE * + sizeof(*pattern->data), GFP_KERNEL); + if (!pattern->data) + goto free_pattern; + + memcpy(pattern->data, data, num_of_actions * DR_MODIFY_ACTION_SIZE); + pattern->chunk = chunk; + pattern->index = index; + pattern->num_of_actions = num_of_actions; + + list_add(&pattern->list, &mgr->ptrn_list); + refcount_set(&pattern->refcount, 1); + + return pattern; + +free_pattern: + kfree(pattern); +free_chunk: + mlx5dr_icm_free_chunk(chunk); + return NULL; +} + +static void +dr_ptrn_free_pattern(struct mlx5dr_ptrn_obj *pattern) +{ + list_del(&pattern->list); + mlx5dr_icm_free_chunk(pattern->chunk); + kfree(pattern->data); + kfree(pattern); +} + +struct mlx5dr_ptrn_obj * +mlx5dr_ptrn_cache_get_pattern(struct mlx5dr_ptrn_mgr *mgr, + u16 num_of_actions, + u8 *data) +{ + struct mlx5dr_ptrn_obj *pattern; + u64 *hw_actions; + u8 action_id; + int i; + + mutex_lock(&mgr->modify_hdr_mutex); + pattern = dr_ptrn_find_cached_pattern(mgr, + num_of_actions, + (__be64 *)data); + if (!pattern) { + /* Alloc and add new pattern to cache */ + pattern = dr_ptrn_alloc_pattern(mgr, num_of_actions, data); + if (!pattern) + goto out_unlock; + + hw_actions = (u64 *)pattern->data; + /* Here we mask the pattern data to create a valid pattern + * since we do an OR operation between the arg and pattern + */ + for (i = 0; i < num_of_actions; i++) { + action_id = MLX5_GET(ste_double_action_set_v1, &hw_actions[i], action_id); + + if (action_id == DR_PTRN_MODIFY_HDR_ACTION_ID_SET || + action_id == DR_PTRN_MODIFY_HDR_ACTION_ID_ADD || + action_id == DR_PTRN_MODIFY_HDR_ACTION_ID_INSERT_INLINE) + MLX5_SET(ste_double_action_set_v1, &hw_actions[i], inline_data, 0); + } + + if (mlx5dr_send_postsend_pattern(mgr->dmn, pattern->chunk, + num_of_actions, pattern->data)) { + refcount_dec(&pattern->refcount); + goto free_pattern; + } + } else { + refcount_inc(&pattern->refcount); + } + + mutex_unlock(&mgr->modify_hdr_mutex); + + return pattern; + +free_pattern: + dr_ptrn_free_pattern(pattern); +out_unlock: + mutex_unlock(&mgr->modify_hdr_mutex); + return NULL; +} + +void +mlx5dr_ptrn_cache_put_pattern(struct mlx5dr_ptrn_mgr *mgr, + struct mlx5dr_ptrn_obj *pattern) +{ + mutex_lock(&mgr->modify_hdr_mutex); + + if (refcount_dec_and_test(&pattern->refcount)) + dr_ptrn_free_pattern(pattern); + + mutex_unlock(&mgr->modify_hdr_mutex); +} + +struct mlx5dr_ptrn_mgr *mlx5dr_ptrn_mgr_create(struct mlx5dr_domain *dmn) +{ + struct mlx5dr_ptrn_mgr *mgr; + + if (!mlx5dr_domain_is_support_ptrn_arg(dmn)) + return NULL; + + mgr = kzalloc(sizeof(*mgr), GFP_KERNEL); + if (!mgr) + return NULL; + + mgr->dmn = dmn; + mgr->ptrn_icm_pool = mlx5dr_icm_pool_create(dmn, DR_ICM_TYPE_MODIFY_HDR_PTRN); + if (!mgr->ptrn_icm_pool) { + mlx5dr_err(dmn, "Couldn't get modify-header-pattern memory\n"); + goto free_mgr; + } + + INIT_LIST_HEAD(&mgr->ptrn_list); + return mgr; + +free_mgr: + kfree(mgr); + return NULL; +} + +void mlx5dr_ptrn_mgr_destroy(struct mlx5dr_ptrn_mgr *mgr) +{ + struct mlx5dr_ptrn_obj *pattern; + struct mlx5dr_ptrn_obj *tmp; + + if (!mgr) + return; + + WARN_ON(!list_empty(&mgr->ptrn_list)); + + list_for_each_entry_safe(pattern, tmp, &mgr->ptrn_list, list) { + list_del(&pattern->list); + kfree(pattern->data); + kfree(pattern); + } + + mlx5dr_icm_pool_destroy(mgr->ptrn_icm_pool); + kfree(mgr); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c index fd2d31cdbcf9..4a5ae86e2b62 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c @@ -18,7 +18,13 @@ struct dr_data_seg { unsigned int send_flags; }; +enum send_info_type { + WRITE_ICM = 0, + GTA_ARG = 1, +}; + struct postsend_info { + enum send_info_type type; struct dr_data_seg write; struct dr_data_seg read; u64 remote_addr; @@ -261,9 +267,10 @@ static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev, dr_qp->rq.pc = 0; dr_qp->rq.cc = 0; - dr_qp->rq.wqe_cnt = 4; + dr_qp->rq.wqe_cnt = 256; dr_qp->sq.pc = 0; dr_qp->sq.cc = 0; + dr_qp->sq.head = 0; dr_qp->sq.wqe_cnt = roundup_pow_of_two(attr->max_send_wr); MLX5_SET(qpc, temp_qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4); @@ -362,39 +369,113 @@ static void dr_cmd_notify_hw(struct mlx5dr_qp *dr_qp, void *ctrl) mlx5_write64(ctrl, dr_qp->uar->map + MLX5_BF_OFFSET); } -static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr, - u32 rkey, struct dr_data_seg *data_seg, - u32 opcode, bool notify_hw) +static void +dr_rdma_handle_flow_access_arg_segments(struct mlx5_wqe_ctrl_seg *wq_ctrl, + u32 remote_addr, + struct dr_data_seg *data_seg, + int *size) { - struct mlx5_wqe_raddr_seg *wq_raddr; - struct mlx5_wqe_ctrl_seg *wq_ctrl; - struct mlx5_wqe_data_seg *wq_dseg; - unsigned int size; - unsigned int idx; + struct mlx5_wqe_header_modify_argument_update_seg *wq_arg_seg; + struct mlx5_wqe_flow_update_ctrl_seg *wq_flow_seg; - size = sizeof(*wq_ctrl) / 16 + sizeof(*wq_dseg) / 16 + - sizeof(*wq_raddr) / 16; + wq_ctrl->general_id = cpu_to_be32(remote_addr); + wq_flow_seg = (void *)(wq_ctrl + 1); - idx = dr_qp->sq.pc & (dr_qp->sq.wqe_cnt - 1); + /* mlx5_wqe_flow_update_ctrl_seg - all reserved */ + memset(wq_flow_seg, 0, sizeof(*wq_flow_seg)); + wq_arg_seg = (void *)(wq_flow_seg + 1); + + memcpy(wq_arg_seg->argument_list, + (void *)(uintptr_t)data_seg->addr, + data_seg->length); + + *size = (sizeof(*wq_ctrl) + /* WQE ctrl segment */ + sizeof(*wq_flow_seg) + /* WQE flow update ctrl seg - reserved */ + sizeof(*wq_arg_seg)) / /* WQE hdr modify arg seg - data */ + MLX5_SEND_WQE_DS; +} + +static void +dr_rdma_handle_icm_write_segments(struct mlx5_wqe_ctrl_seg *wq_ctrl, + u64 remote_addr, + u32 rkey, + struct dr_data_seg *data_seg, + unsigned int *size) +{ + struct mlx5_wqe_raddr_seg *wq_raddr; + struct mlx5_wqe_data_seg *wq_dseg; - wq_ctrl = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx); - wq_ctrl->imm = 0; - wq_ctrl->fm_ce_se = (data_seg->send_flags) ? - MLX5_WQE_CTRL_CQ_UPDATE : 0; - wq_ctrl->opmod_idx_opcode = cpu_to_be32(((dr_qp->sq.pc & 0xffff) << 8) | - opcode); - wq_ctrl->qpn_ds = cpu_to_be32(size | dr_qp->qpn << 8); wq_raddr = (void *)(wq_ctrl + 1); + wq_raddr->raddr = cpu_to_be64(remote_addr); wq_raddr->rkey = cpu_to_be32(rkey); wq_raddr->reserved = 0; wq_dseg = (void *)(wq_raddr + 1); + wq_dseg->byte_count = cpu_to_be32(data_seg->length); wq_dseg->lkey = cpu_to_be32(data_seg->lkey); wq_dseg->addr = cpu_to_be64(data_seg->addr); - dr_qp->sq.wqe_head[idx] = dr_qp->sq.pc++; + *size = (sizeof(*wq_ctrl) + /* WQE ctrl segment */ + sizeof(*wq_dseg) + /* WQE data segment */ + sizeof(*wq_raddr)) / /* WQE remote addr segment */ + MLX5_SEND_WQE_DS; +} + +static void dr_set_ctrl_seg(struct mlx5_wqe_ctrl_seg *wq_ctrl, + struct dr_data_seg *data_seg) +{ + wq_ctrl->signature = 0; + wq_ctrl->rsvd[0] = 0; + wq_ctrl->rsvd[1] = 0; + wq_ctrl->fm_ce_se = data_seg->send_flags & IB_SEND_SIGNALED ? + MLX5_WQE_CTRL_CQ_UPDATE : 0; + wq_ctrl->imm = 0; +} + +static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr, + u32 rkey, struct dr_data_seg *data_seg, + u32 opcode, bool notify_hw) +{ + struct mlx5_wqe_ctrl_seg *wq_ctrl; + int opcode_mod = 0; + unsigned int size; + unsigned int idx; + + idx = dr_qp->sq.pc & (dr_qp->sq.wqe_cnt - 1); + + wq_ctrl = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx); + dr_set_ctrl_seg(wq_ctrl, data_seg); + + switch (opcode) { + case MLX5_OPCODE_RDMA_READ: + case MLX5_OPCODE_RDMA_WRITE: + dr_rdma_handle_icm_write_segments(wq_ctrl, remote_addr, + rkey, data_seg, &size); + break; + case MLX5_OPCODE_FLOW_TBL_ACCESS: + opcode_mod = MLX5_CMD_OP_MOD_UPDATE_HEADER_MODIFY_ARGUMENT; + dr_rdma_handle_flow_access_arg_segments(wq_ctrl, remote_addr, + data_seg, &size); + break; + default: + WARN(true, "illegal opcode %d", opcode); + return; + } + + /* -------------------------------------------------------- + * |opcode_mod (8 bit)|wqe_index (16 bits)| opcod (8 bits)| + * -------------------------------------------------------- + */ + wq_ctrl->opmod_idx_opcode = + cpu_to_be32((opcode_mod << 24) | + ((dr_qp->sq.pc & 0xffff) << 8) | + opcode); + wq_ctrl->qpn_ds = cpu_to_be32(size | dr_qp->qpn << 8); + + dr_qp->sq.pc += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB); + dr_qp->sq.wqe_head[idx] = dr_qp->sq.head++; if (notify_hw) dr_cmd_notify_hw(dr_qp, wq_ctrl); @@ -402,10 +483,16 @@ static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr, static void dr_post_send(struct mlx5dr_qp *dr_qp, struct postsend_info *send_info) { - dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey, - &send_info->write, MLX5_OPCODE_RDMA_WRITE, false); - dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey, - &send_info->read, MLX5_OPCODE_RDMA_READ, true); + if (send_info->type == WRITE_ICM) { + dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey, + &send_info->write, MLX5_OPCODE_RDMA_WRITE, false); + dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey, + &send_info->read, MLX5_OPCODE_RDMA_READ, true); + } else { /* GTA_ARG */ + dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey, + &send_info->write, MLX5_OPCODE_FLOW_TBL_ACCESS, true); + } + } /** @@ -471,24 +558,54 @@ static int dr_handle_pending_wc(struct mlx5dr_domain *dmn, } else if (ne == 1) { send_ring->pending_wqe -= send_ring->signal_th; } - } while (is_drain && send_ring->pending_wqe); + } while (ne == 1 || + (is_drain && send_ring->pending_wqe >= send_ring->signal_th)); return 0; } -static void dr_fill_data_segs(struct mlx5dr_send_ring *send_ring, - struct postsend_info *send_info) +static void dr_fill_write_args_segs(struct mlx5dr_send_ring *send_ring, + struct postsend_info *send_info) { send_ring->pending_wqe++; if (send_ring->pending_wqe % send_ring->signal_th == 0) send_info->write.send_flags |= IB_SEND_SIGNALED; + else + send_info->write.send_flags = 0; +} + +static void dr_fill_write_icm_segs(struct mlx5dr_domain *dmn, + struct mlx5dr_send_ring *send_ring, + struct postsend_info *send_info) +{ + u32 buff_offset; + + if (send_info->write.length > dmn->info.max_inline_size) { + buff_offset = (send_ring->tx_head & + (dmn->send_ring->signal_th - 1)) * + send_ring->max_post_send_size; + /* Copy to ring mr */ + memcpy(send_ring->buf + buff_offset, + (void *)(uintptr_t)send_info->write.addr, + send_info->write.length); + send_info->write.addr = (uintptr_t)send_ring->mr->dma_addr + buff_offset; + send_info->write.lkey = send_ring->mr->mkey; + + send_ring->tx_head++; + } + + send_ring->pending_wqe++; + + if (send_ring->pending_wqe % send_ring->signal_th == 0) + send_info->write.send_flags |= IB_SEND_SIGNALED; send_ring->pending_wqe++; send_info->read.length = send_info->write.length; - /* Read into the same write area */ - send_info->read.addr = (uintptr_t)send_info->write.addr; - send_info->read.lkey = send_ring->mr->mkey; + + /* Read into dedicated sync buffer */ + send_info->read.addr = (uintptr_t)send_ring->sync_mr->dma_addr; + send_info->read.lkey = send_ring->sync_mr->mkey; if (send_ring->pending_wqe % send_ring->signal_th == 0) send_info->read.send_flags = IB_SEND_SIGNALED; @@ -496,11 +613,20 @@ static void dr_fill_data_segs(struct mlx5dr_send_ring *send_ring, send_info->read.send_flags = 0; } +static void dr_fill_data_segs(struct mlx5dr_domain *dmn, + struct mlx5dr_send_ring *send_ring, + struct postsend_info *send_info) +{ + if (send_info->type == WRITE_ICM) + dr_fill_write_icm_segs(dmn, send_ring, send_info); + else /* args */ + dr_fill_write_args_segs(send_ring, send_info); +} + static int dr_postsend_icm_data(struct mlx5dr_domain *dmn, struct postsend_info *send_info) { struct mlx5dr_send_ring *send_ring = dmn->send_ring; - u32 buff_offset; int ret; if (unlikely(dmn->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR || @@ -517,20 +643,7 @@ static int dr_postsend_icm_data(struct mlx5dr_domain *dmn, if (ret) goto out_unlock; - if (send_info->write.length > dmn->info.max_inline_size) { - buff_offset = (send_ring->tx_head & - (dmn->send_ring->signal_th - 1)) * - send_ring->max_post_send_size; - /* Copy to ring mr */ - memcpy(send_ring->buf + buff_offset, - (void *)(uintptr_t)send_info->write.addr, - send_info->write.length); - send_info->write.addr = (uintptr_t)send_ring->mr->dma_addr + buff_offset; - send_info->write.lkey = send_ring->mr->mkey; - } - - send_ring->tx_head++; - dr_fill_data_segs(send_ring, send_info); + dr_fill_data_segs(dmn, send_ring, send_info); dr_post_send(send_ring->qp, send_info); out_unlock: @@ -736,6 +849,59 @@ int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn, return dr_postsend_icm_data(dmn, &send_info); } +int mlx5dr_send_postsend_pattern(struct mlx5dr_domain *dmn, + struct mlx5dr_icm_chunk *chunk, + u16 num_of_actions, + u8 *data) +{ + struct postsend_info send_info = {}; + int ret; + + send_info.write.addr = (uintptr_t)data; + send_info.write.length = num_of_actions * DR_MODIFY_ACTION_SIZE; + send_info.remote_addr = mlx5dr_icm_pool_get_chunk_mr_addr(chunk); + send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(chunk); + + ret = dr_postsend_icm_data(dmn, &send_info); + if (ret) + return ret; + + return 0; +} + +int mlx5dr_send_postsend_args(struct mlx5dr_domain *dmn, u64 arg_id, + u16 num_of_actions, u8 *actions_data) +{ + int data_len, iter = 0, cur_sent; + u64 addr; + int ret; + + addr = (uintptr_t)actions_data; + data_len = num_of_actions * DR_MODIFY_ACTION_SIZE; + + do { + struct postsend_info send_info = {}; + + send_info.type = GTA_ARG; + send_info.write.addr = addr; + cur_sent = min_t(u32, data_len, DR_ACTION_CACHE_LINE_SIZE); + send_info.write.length = cur_sent; + send_info.write.lkey = 0; + send_info.remote_addr = arg_id + iter; + + ret = dr_postsend_icm_data(dmn, &send_info); + if (ret) + goto out; + + iter++; + addr += cur_sent; + data_len -= cur_sent; + } while (data_len > 0); + +out: + return ret; +} + static int dr_modify_qp_rst2init(struct mlx5_core_dev *mdev, struct mlx5dr_qp *dr_qp, int port) @@ -1123,16 +1289,25 @@ int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn) goto free_mem; } + dmn->send_ring->sync_buff = kzalloc(dmn->send_ring->max_post_send_size, + GFP_KERNEL); + if (!dmn->send_ring->sync_buff) { + ret = -ENOMEM; + goto clean_mr; + } + dmn->send_ring->sync_mr = dr_reg_mr(dmn->mdev, dmn->pdn, dmn->send_ring->sync_buff, - MIN_READ_SYNC); + dmn->send_ring->max_post_send_size); if (!dmn->send_ring->sync_mr) { ret = -ENOMEM; - goto clean_mr; + goto free_sync_mem; } return 0; +free_sync_mem: + kfree(dmn->send_ring->sync_buff); clean_mr: dr_dereg_mr(dmn->mdev, dmn->send_ring->mr); free_mem: @@ -1155,6 +1330,7 @@ void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn, dr_dereg_mr(dmn->mdev, send_ring->sync_mr); dr_dereg_mr(dmn->mdev, send_ring->mr); kfree(send_ring->buf); + kfree(send_ring->sync_buff); kfree(send_ring); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c index 1e15f605df6e..9413aaf51251 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c @@ -633,6 +633,63 @@ int mlx5dr_ste_set_action_decap_l3_list(struct mlx5dr_ste_ctx *ste_ctx, used_hw_action_num); } +static int +dr_ste_alloc_modify_hdr_chunk(struct mlx5dr_action *action) +{ + struct mlx5dr_domain *dmn = action->rewrite->dmn; + u32 chunk_size; + int ret; + + chunk_size = ilog2(roundup_pow_of_two(action->rewrite->num_of_actions)); + + /* HW modify action index granularity is at least 64B */ + chunk_size = max_t(u32, chunk_size, DR_CHUNK_SIZE_8); + + action->rewrite->chunk = mlx5dr_icm_alloc_chunk(dmn->action_icm_pool, + chunk_size); + if (!action->rewrite->chunk) + return -ENOMEM; + + action->rewrite->index = (mlx5dr_icm_pool_get_chunk_icm_addr(action->rewrite->chunk) - + dmn->info.caps.hdr_modify_icm_addr) / + DR_ACTION_CACHE_LINE_SIZE; + + ret = mlx5dr_send_postsend_action(action->rewrite->dmn, action); + if (ret) + goto free_chunk; + + return 0; + +free_chunk: + mlx5dr_icm_free_chunk(action->rewrite->chunk); + return -ENOMEM; +} + +static void dr_ste_free_modify_hdr_chunk(struct mlx5dr_action *action) +{ + mlx5dr_icm_free_chunk(action->rewrite->chunk); +} + +int mlx5dr_ste_alloc_modify_hdr(struct mlx5dr_action *action) +{ + struct mlx5dr_domain *dmn = action->rewrite->dmn; + + if (mlx5dr_domain_is_support_ptrn_arg(dmn)) + return dmn->ste_ctx->alloc_modify_hdr_chunk(action); + + return dr_ste_alloc_modify_hdr_chunk(action); +} + +void mlx5dr_ste_free_modify_hdr(struct mlx5dr_action *action) +{ + struct mlx5dr_domain *dmn = action->rewrite->dmn; + + if (mlx5dr_domain_is_support_ptrn_arg(dmn)) + return dmn->ste_ctx->dealloc_modify_hdr_chunk(action); + + return dr_ste_free_modify_hdr_chunk(action); +} + static int dr_ste_build_pre_check_spec(struct mlx5dr_domain *dmn, struct mlx5dr_match_spec *spec) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h index 7075142bcfb6..54a6619c3ecb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h @@ -195,6 +195,8 @@ struct mlx5dr_ste_ctx { u8 *hw_action, u32 hw_action_sz, u16 *used_hw_action_num); + int (*alloc_modify_hdr_chunk)(struct mlx5dr_action *action); + void (*dealloc_modify_hdr_chunk)(struct mlx5dr_action *action); /* Send */ void (*prepare_for_postsend)(u8 *hw_ste_p, u32 ste_size); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c index 084145f18084..4c0704ad166b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c @@ -495,21 +495,66 @@ static void dr_ste_v1_set_rx_decap(u8 *hw_ste_p, u8 *s_action) dr_ste_v1_set_reparse(hw_ste_p); } -static void dr_ste_v1_set_rewrite_actions(u8 *hw_ste_p, - u8 *s_action, - u16 num_of_actions, - u32 re_write_index) +static void dr_ste_v1_set_accelerated_rewrite_actions(u8 *hw_ste_p, + u8 *d_action, + u16 num_of_actions, + u32 rewrite_pattern, + u32 rewrite_args, + u8 *action_data) +{ + if (action_data) { + memcpy(d_action, action_data, DR_MODIFY_ACTION_SIZE); + } else { + MLX5_SET(ste_double_action_accelerated_modify_action_list_v1, d_action, + action_id, DR_STE_V1_ACTION_ID_ACCELERATED_LIST); + MLX5_SET(ste_double_action_accelerated_modify_action_list_v1, d_action, + modify_actions_pattern_pointer, rewrite_pattern); + MLX5_SET(ste_double_action_accelerated_modify_action_list_v1, d_action, + number_of_modify_actions, num_of_actions); + MLX5_SET(ste_double_action_accelerated_modify_action_list_v1, d_action, + modify_actions_argument_pointer, rewrite_args); + } + + dr_ste_v1_set_reparse(hw_ste_p); +} + +static void dr_ste_v1_set_basic_rewrite_actions(u8 *hw_ste_p, + u8 *s_action, + u16 num_of_actions, + u32 rewrite_index) { MLX5_SET(ste_single_action_modify_list_v1, s_action, action_id, DR_STE_V1_ACTION_ID_MODIFY_LIST); MLX5_SET(ste_single_action_modify_list_v1, s_action, num_of_modify_actions, num_of_actions); MLX5_SET(ste_single_action_modify_list_v1, s_action, modify_actions_ptr, - re_write_index); + rewrite_index); dr_ste_v1_set_reparse(hw_ste_p); } +static void dr_ste_v1_set_rewrite_actions(u8 *hw_ste_p, + u8 *action, + u16 num_of_actions, + u32 rewrite_pattern, + u32 rewrite_args, + u8 *action_data) +{ + if (rewrite_pattern != MLX5DR_INVALID_PATTERN_INDEX) + return dr_ste_v1_set_accelerated_rewrite_actions(hw_ste_p, + action, + num_of_actions, + rewrite_pattern, + rewrite_args, + action_data); + + /* fall back to the code that doesn't support accelerated modify header */ + return dr_ste_v1_set_basic_rewrite_actions(hw_ste_p, + action, + num_of_actions, + rewrite_args); +} + static void dr_ste_v1_set_aso_flow_meter(u8 *d_action, u32 object_id, u32 offset, @@ -604,9 +649,6 @@ void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn, allow_modify_hdr = false; } - if (action_type_set[DR_ACTION_TYP_CTR]) - dr_ste_v1_set_counter_id(last_ste, attr->ctr_id); - if (action_type_set[DR_ACTION_TYP_MODIFY_HDR]) { if (!allow_modify_hdr || action_sz < DR_STE_ACTION_DOUBLE_SZ) { dr_ste_v1_arr_init_next_match(&last_ste, added_stes, @@ -617,7 +659,9 @@ void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn, } dr_ste_v1_set_rewrite_actions(last_ste, action, attr->modify_actions, - attr->modify_index); + attr->modify_pat_idx, + attr->modify_index, + attr->single_modify_action); action_sz -= DR_STE_ACTION_DOUBLE_SZ; action += DR_STE_ACTION_DOUBLE_SZ; allow_encap = false; @@ -724,6 +768,10 @@ void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn, attr->range.max); } + /* set counter ID on the last STE to adhere to DMFS behavior */ + if (action_type_set[DR_ACTION_TYP_CTR]) + dr_ste_v1_set_counter_id(last_ste, attr->ctr_id); + dr_ste_v1_set_hit_gvmi(last_ste, attr->hit_gvmi); dr_ste_v1_set_hit_addr(last_ste, attr->final_icm_addr, 1); } @@ -743,7 +791,9 @@ void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn, if (action_type_set[DR_ACTION_TYP_TNL_L3_TO_L2]) { dr_ste_v1_set_rewrite_actions(last_ste, action, attr->decap_actions, - attr->decap_index); + attr->decap_pat_idx, + attr->decap_index, + NULL); action_sz -= DR_STE_ACTION_DOUBLE_SZ; action += DR_STE_ACTION_DOUBLE_SZ; allow_modify_hdr = false; @@ -798,7 +848,9 @@ void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn, } dr_ste_v1_set_rewrite_actions(last_ste, action, attr->modify_actions, - attr->modify_index); + attr->modify_pat_idx, + attr->modify_index, + attr->single_modify_action); action_sz -= DR_STE_ACTION_DOUBLE_SZ; action += DR_STE_ACTION_DOUBLE_SZ; } @@ -2175,6 +2227,49 @@ dr_ste_v1_build_tnl_gtpu_flex_parser_1_init(struct mlx5dr_ste_build *sb, sb->ste_build_tag_func = &dr_ste_v1_build_tnl_gtpu_flex_parser_1_tag; } +int dr_ste_v1_alloc_modify_hdr_ptrn_arg(struct mlx5dr_action *action) +{ + struct mlx5dr_ptrn_mgr *ptrn_mgr; + int ret; + + ptrn_mgr = action->rewrite->dmn->ptrn_mgr; + if (!ptrn_mgr) + return -EOPNOTSUPP; + + action->rewrite->arg = mlx5dr_arg_get_obj(action->rewrite->dmn->arg_mgr, + action->rewrite->num_of_actions, + action->rewrite->data); + if (!action->rewrite->arg) { + mlx5dr_err(action->rewrite->dmn, "Failed allocating args for modify header\n"); + return -EAGAIN; + } + + action->rewrite->ptrn = + mlx5dr_ptrn_cache_get_pattern(ptrn_mgr, + action->rewrite->num_of_actions, + action->rewrite->data); + if (!action->rewrite->ptrn) { + mlx5dr_err(action->rewrite->dmn, "Failed to get pattern\n"); + ret = -EAGAIN; + goto put_arg; + } + + return 0; + +put_arg: + mlx5dr_arg_put_obj(action->rewrite->dmn->arg_mgr, + action->rewrite->arg); + return ret; +} + +void dr_ste_v1_free_modify_hdr_ptrn_arg(struct mlx5dr_action *action) +{ + mlx5dr_ptrn_cache_put_pattern(action->rewrite->dmn->ptrn_mgr, + action->rewrite->ptrn); + mlx5dr_arg_put_obj(action->rewrite->dmn->arg_mgr, + action->rewrite->arg); +} + static struct mlx5dr_ste_ctx ste_ctx_v1 = { /* Builders */ .build_eth_l2_src_dst_init = &dr_ste_v1_build_eth_l2_src_dst_init, @@ -2231,6 +2326,9 @@ static struct mlx5dr_ste_ctx ste_ctx_v1 = { .set_action_add = &dr_ste_v1_set_action_add, .set_action_copy = &dr_ste_v1_set_action_copy, .set_action_decap_l3_list = &dr_ste_v1_set_action_decap_l3_list, + .alloc_modify_hdr_chunk = &dr_ste_v1_alloc_modify_hdr_ptrn_arg, + .dealloc_modify_hdr_chunk = &dr_ste_v1_free_modify_hdr_ptrn_arg, + /* Send */ .prepare_for_postsend = &dr_ste_v1_prepare_for_postsend, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h index b5c0f0f8392f..e2fc69867088 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h @@ -31,6 +31,8 @@ void dr_ste_v1_set_action_copy(u8 *d_action, u8 dst_hw_field, u8 dst_shifter, u8 dst_len, u8 src_hw_field, u8 src_shifter); int dr_ste_v1_set_action_decap_l3_list(void *data, u32 data_sz, u8 *hw_action, u32 hw_action_sz, u16 *used_hw_action_num); +int dr_ste_v1_alloc_modify_hdr_ptrn_arg(struct mlx5dr_action *action); +void dr_ste_v1_free_modify_hdr_ptrn_arg(struct mlx5dr_action *action); void dr_ste_v1_build_eth_l2_src_dst_init(struct mlx5dr_ste_build *sb, struct mlx5dr_match_param *mask); void dr_ste_v1_build_eth_l3_ipv6_dst_init(struct mlx5dr_ste_build *sb, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c index cf1a3c9a1cf4..808b013cf48c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c @@ -221,6 +221,8 @@ static struct mlx5dr_ste_ctx ste_ctx_v2 = { .set_action_add = &dr_ste_v1_set_action_add, .set_action_copy = &dr_ste_v1_set_action_copy, .set_action_decap_l3_list = &dr_ste_v1_set_action_decap_l3_list, + .alloc_modify_hdr_chunk = &dr_ste_v1_alloc_modify_hdr_ptrn_arg, + .dealloc_modify_hdr_chunk = &dr_ste_v1_free_modify_hdr_ptrn_arg, /* Send */ .prepare_for_postsend = &dr_ste_v1_prepare_for_postsend, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h index 2b769dcbd453..678a993ab053 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h @@ -21,11 +21,16 @@ #define DR_NUM_OF_FLEX_PARSERS 8 #define DR_STE_MAX_FLEX_0_ID 3 #define DR_STE_MAX_FLEX_1_ID 7 +#define DR_ACTION_CACHE_LINE_SIZE 64 #define mlx5dr_err(dmn, arg...) mlx5_core_err((dmn)->mdev, ##arg) #define mlx5dr_info(dmn, arg...) mlx5_core_info((dmn)->mdev, ##arg) #define mlx5dr_dbg(dmn, arg...) mlx5_core_dbg((dmn)->mdev, ##arg) +struct mlx5dr_ptrn_mgr; +struct mlx5dr_arg_mgr; +struct mlx5dr_arg_obj; + static inline bool dr_is_flex_parser_0_id(u8 parser_id) { return parser_id <= DR_STE_MAX_FLEX_0_ID; @@ -66,6 +71,8 @@ enum mlx5dr_icm_chunk_size { enum mlx5dr_icm_type { DR_ICM_TYPE_STE, DR_ICM_TYPE_MODIFY_ACTION, + DR_ICM_TYPE_MODIFY_HDR_PTRN, + DR_ICM_TYPE_MAX, }; static inline enum mlx5dr_icm_chunk_size @@ -255,11 +262,15 @@ u64 mlx5dr_ste_get_mr_addr(struct mlx5dr_ste *ste); struct list_head *mlx5dr_ste_get_miss_list(struct mlx5dr_ste *ste); #define MLX5DR_MAX_VLANS 2 +#define MLX5DR_INVALID_PATTERN_INDEX 0xffffffff struct mlx5dr_ste_actions_attr { u32 modify_index; + u32 modify_pat_idx; u16 modify_actions; + u8 *single_modify_action; u32 decap_index; + u32 decap_pat_idx; u16 decap_actions; u8 decap_with_vlan:1; u64 final_icm_addr; @@ -331,6 +342,8 @@ int mlx5dr_ste_set_action_decap_l3_list(struct mlx5dr_ste_ctx *ste_ctx, u8 *hw_action, u32 hw_action_sz, u16 *used_hw_action_num); +int mlx5dr_ste_alloc_modify_hdr(struct mlx5dr_action *action); +void mlx5dr_ste_free_modify_hdr(struct mlx5dr_action *action); const struct mlx5dr_ste_action_modify_field * mlx5dr_ste_conv_modify_hdr_sw_field(struct mlx5dr_ste_ctx *ste_ctx, u16 sw_field); @@ -861,6 +874,8 @@ struct mlx5dr_cmd_caps { u64 esw_tx_drop_address; u32 log_icm_size; u64 hdr_modify_icm_addr; + u32 log_modify_pattern_icm_size; + u64 hdr_modify_pattern_icm_addr; u32 flex_protocols; u8 flex_parser_id_icmp_dw0; u8 flex_parser_id_icmp_dw1; @@ -888,6 +903,9 @@ struct mlx5dr_cmd_caps { struct mlx5dr_vports vports; bool prio_tag_required; struct mlx5dr_roce_cap roce_caps; + u16 log_header_modify_argument_granularity; + u16 log_header_modify_argument_max_alloc; + bool support_modify_argument; u8 is_ecpf:1; u8 isolate_vl_tc:1; }; @@ -910,6 +928,7 @@ struct mlx5dr_domain_info { u32 max_send_wr; u32 max_log_sw_icm_sz; u32 max_log_action_icm_sz; + u32 max_log_modify_hdr_pattern_icm_sz; struct mlx5dr_domain_rx_tx rx; struct mlx5dr_domain_rx_tx tx; struct mlx5dr_cmd_caps caps; @@ -928,6 +947,8 @@ struct mlx5dr_domain { struct mlx5dr_send_info_pool *send_info_pool_tx; struct kmem_cache *chunks_kmem_cache; struct kmem_cache *htbls_kmem_cache; + struct mlx5dr_ptrn_mgr *ptrn_mgr; + struct mlx5dr_arg_mgr *arg_mgr; struct mlx5dr_send_ring *send_ring; struct mlx5dr_domain_info info; struct xarray csum_fts_xa; @@ -935,6 +956,8 @@ struct mlx5dr_domain { struct list_head dbg_tbl_list; struct mlx5dr_dbg_dump_info dump_info; struct xarray definers_xa; + /* memory management statistics */ + u32 num_buddies[DR_ICM_TYPE_MAX]; }; struct mlx5dr_table_rx_tx { @@ -994,15 +1017,34 @@ struct mlx5dr_ste_action_modify_field { u8 l4_type; }; +struct mlx5dr_ptrn_obj { + struct mlx5dr_icm_chunk *chunk; + u8 *data; + u16 num_of_actions; + u32 index; + refcount_t refcount; + struct list_head list; +}; + +struct mlx5dr_arg_obj { + u32 obj_id; + u32 obj_offset; + struct list_head list_node; + u32 log_chunk_size; +}; + struct mlx5dr_action_rewrite { struct mlx5dr_domain *dmn; struct mlx5dr_icm_chunk *chunk; u8 *data; u16 num_of_actions; u32 index; + u8 single_action_opt:1; u8 allow_rx:1; u8 allow_tx:1; u8 modify_ttl:1; + struct mlx5dr_ptrn_obj *ptrn; + struct mlx5dr_arg_obj *arg; }; struct mlx5dr_action_reformat { @@ -1334,6 +1376,12 @@ struct mlx5dr_cmd_gid_attr { int mlx5dr_cmd_query_gid(struct mlx5_core_dev *mdev, u8 vhca_port_num, u16 index, struct mlx5dr_cmd_gid_attr *attr); +int mlx5dr_cmd_create_modify_header_arg(struct mlx5_core_dev *dev, + u16 log_obj_range, u32 pd, + u32 *obj_id); +void mlx5dr_cmd_destroy_modify_header_arg(struct mlx5_core_dev *dev, + u32 obj_id); + struct mlx5dr_icm_pool *mlx5dr_icm_pool_create(struct mlx5dr_domain *dmn, enum mlx5dr_icm_type icm_type); void mlx5dr_icm_pool_destroy(struct mlx5dr_icm_pool *pool); @@ -1368,6 +1416,7 @@ struct mlx5dr_qp { struct mlx5_wq_ctrl wq_ctrl; u32 qpn; struct { + unsigned int head; unsigned int pc; unsigned int cc; unsigned int size; @@ -1399,9 +1448,6 @@ struct mlx5dr_mr { size_t size; }; -#define MAX_SEND_CQE 64 -#define MIN_READ_SYNC 64 - struct mlx5dr_send_ring { struct mlx5dr_cq *cq; struct mlx5dr_qp *qp; @@ -1416,7 +1462,7 @@ struct mlx5dr_send_ring { u32 tx_head; void *buf; u32 buf_size; - u8 sync_buff[MIN_READ_SYNC]; + u8 *sync_buff; struct mlx5dr_mr *sync_mr; spinlock_t lock; /* Protect the data path of the send ring */ bool err_state; /* send_ring is not usable in err state */ @@ -1440,6 +1486,12 @@ int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn, bool update_hw_ste); int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn, struct mlx5dr_action *action); +int mlx5dr_send_postsend_pattern(struct mlx5dr_domain *dmn, + struct mlx5dr_icm_chunk *chunk, + u16 num_of_actions, + u8 *data); +int mlx5dr_send_postsend_args(struct mlx5dr_domain *dmn, u64 arg_id, + u16 num_of_actions, u8 *actions_data); int mlx5dr_send_info_pool_create(struct mlx5dr_domain *dmn); void mlx5dr_send_info_pool_destroy(struct mlx5dr_domain *dmn); @@ -1526,4 +1578,20 @@ static inline bool mlx5dr_supp_match_ranges(struct mlx5_core_dev *dev) (1ULL << MLX5_IFC_DEFINER_FORMAT_ID_SELECT)); } +bool mlx5dr_domain_is_support_ptrn_arg(struct mlx5dr_domain *dmn); +struct mlx5dr_ptrn_mgr *mlx5dr_ptrn_mgr_create(struct mlx5dr_domain *dmn); +void mlx5dr_ptrn_mgr_destroy(struct mlx5dr_ptrn_mgr *mgr); +struct mlx5dr_ptrn_obj *mlx5dr_ptrn_cache_get_pattern(struct mlx5dr_ptrn_mgr *mgr, + u16 num_of_actions, u8 *data); +void mlx5dr_ptrn_cache_put_pattern(struct mlx5dr_ptrn_mgr *mgr, + struct mlx5dr_ptrn_obj *pattern); +struct mlx5dr_arg_mgr *mlx5dr_arg_mgr_create(struct mlx5dr_domain *dmn); +void mlx5dr_arg_mgr_destroy(struct mlx5dr_arg_mgr *mgr); +struct mlx5dr_arg_obj *mlx5dr_arg_get_obj(struct mlx5dr_arg_mgr *mgr, + u16 num_of_actions, + u8 *data); +void mlx5dr_arg_put_obj(struct mlx5dr_arg_mgr *mgr, + struct mlx5dr_arg_obj *arg_obj); +u32 mlx5dr_arg_get_obj_id(struct mlx5dr_arg_obj *arg_obj); + #endif /* _DR_TYPES_H_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h index 790a17d6207f..ca3b0f1453a7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h @@ -100,7 +100,7 @@ struct mlx5_ifc_ste_double_action_insert_with_ptr_v1_bits { u8 pointer[0x20]; }; -struct mlx5_ifc_ste_double_action_modify_action_list_v1_bits { +struct mlx5_ifc_ste_double_action_accelerated_modify_action_list_v1_bits { u8 action_id[0x8]; u8 modify_actions_pattern_pointer[0x18]; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/thermal.c b/drivers/net/ethernet/mellanox/mlx5/core/thermal.c new file mode 100644 index 000000000000..e47fa6fb836f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/thermal.c @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/device.h> +#include <linux/thermal.h> +#include <linux/err.h> +#include <linux/mlx5/driver.h> +#include "mlx5_core.h" +#include "thermal.h" + +#define MLX5_THERMAL_POLL_INT_MSEC 1000 +#define MLX5_THERMAL_NUM_TRIPS 0 +#define MLX5_THERMAL_ASIC_SENSOR_INDEX 0 + +/* Bit string indicating the writeablility of trip points if any */ +#define MLX5_THERMAL_TRIP_MASK (BIT(MLX5_THERMAL_NUM_TRIPS) - 1) + +struct mlx5_thermal { + struct mlx5_core_dev *mdev; + struct thermal_zone_device *tzdev; +}; + +static int mlx5_thermal_get_mtmp_temp(struct mlx5_core_dev *mdev, u32 id, int *p_temp) +{ + u32 mtmp_out[MLX5_ST_SZ_DW(mtmp_reg)] = {}; + u32 mtmp_in[MLX5_ST_SZ_DW(mtmp_reg)] = {}; + int err; + + MLX5_SET(mtmp_reg, mtmp_in, sensor_index, id); + + err = mlx5_core_access_reg(mdev, mtmp_in, sizeof(mtmp_in), + mtmp_out, sizeof(mtmp_out), + MLX5_REG_MTMP, 0, 0); + + if (err) + return err; + + *p_temp = MLX5_GET(mtmp_reg, mtmp_out, temperature); + + return 0; +} + +static int mlx5_thermal_get_temp(struct thermal_zone_device *tzdev, + int *p_temp) +{ + struct mlx5_thermal *thermal = tzdev->devdata; + struct mlx5_core_dev *mdev = thermal->mdev; + int err; + + err = mlx5_thermal_get_mtmp_temp(mdev, MLX5_THERMAL_ASIC_SENSOR_INDEX, p_temp); + + if (err) + return err; + + /* The unit of temp returned is in 0.125 C. The thermal + * framework expects the value in 0.001 C. + */ + *p_temp *= 125; + + return 0; +} + +static struct thermal_zone_device_ops mlx5_thermal_ops = { + .get_temp = mlx5_thermal_get_temp, +}; + +int mlx5_thermal_init(struct mlx5_core_dev *mdev) +{ + struct mlx5_thermal *thermal; + struct thermal_zone_device *tzd; + const char *data = "mlx5"; + + tzd = thermal_zone_get_zone_by_name(data); + if (!IS_ERR(tzd)) + return 0; + + thermal = kzalloc(sizeof(*thermal), GFP_KERNEL); + if (!thermal) + return -ENOMEM; + + thermal->mdev = mdev; + thermal->tzdev = thermal_zone_device_register(data, + MLX5_THERMAL_NUM_TRIPS, + MLX5_THERMAL_TRIP_MASK, + thermal, + &mlx5_thermal_ops, + NULL, 0, MLX5_THERMAL_POLL_INT_MSEC); + if (IS_ERR(thermal->tzdev)) { + dev_err(mdev->device, "Failed to register thermal zone device (%s) %ld\n", + data, PTR_ERR(thermal->tzdev)); + kfree(thermal); + return -EINVAL; + } + + mdev->thermal = thermal; + return 0; +} + +void mlx5_thermal_uninit(struct mlx5_core_dev *mdev) +{ + if (!mdev->thermal) + return; + + thermal_zone_device_unregister(mdev->thermal->tzdev); + kfree(mdev->thermal); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/thermal.h b/drivers/net/ethernet/mellanox/mlx5/core/thermal.h new file mode 100644 index 000000000000..7d752c122192 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/thermal.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB + * Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. + */ +#ifndef __MLX5_THERMAL_DRIVER_H +#define __MLX5_THERMAL_DRIVER_H + +#if IS_ENABLED(CONFIG_THERMAL) +int mlx5_thermal_init(struct mlx5_core_dev *mdev); +void mlx5_thermal_uninit(struct mlx5_core_dev *mdev); +#else +static inline int mlx5_thermal_init(struct mlx5_core_dev *mdev) +{ + mdev->thermal = NULL; + return 0; +} + +static inline void mlx5_thermal_uninit(struct mlx5_core_dev *mdev) { } +#endif + +#endif /* __MLX5_THERMAL_DRIVER_H */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index 66dd42a8e72f..70d7fff24fa2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -19,6 +19,9 @@ #define MLXSW_THERMAL_ASIC_TEMP_NORM 75000 /* 75C */ #define MLXSW_THERMAL_ASIC_TEMP_HIGH 85000 /* 85C */ #define MLXSW_THERMAL_ASIC_TEMP_HOT 105000 /* 105C */ +#define MLXSW_THERMAL_MODULE_TEMP_NORM 55000 /* 55C */ +#define MLXSW_THERMAL_MODULE_TEMP_HIGH 65000 /* 65C */ +#define MLXSW_THERMAL_MODULE_TEMP_HOT 80000 /* 80C */ #define MLXSW_THERMAL_HYSTERESIS_TEMP 5000 /* 5C */ #define MLXSW_THERMAL_MODULE_TEMP_SHIFT (MLXSW_THERMAL_HYSTERESIS_TEMP * 2) #define MLXSW_THERMAL_MAX_STATE 10 @@ -30,12 +33,6 @@ static char * const mlxsw_thermal_external_allowed_cdev[] = { "mlxreg_fan", }; -enum mlxsw_thermal_trips { - MLXSW_THERMAL_TEMP_TRIP_NORM, - MLXSW_THERMAL_TEMP_TRIP_HIGH, - MLXSW_THERMAL_TEMP_TRIP_HOT, -}; - struct mlxsw_cooling_states { int min_state; int max_state; @@ -59,6 +56,24 @@ static const struct thermal_trip default_thermal_trips[] = { }, }; +static const struct thermal_trip default_thermal_module_trips[] = { + { /* In range - 0-40% PWM */ + .type = THERMAL_TRIP_ACTIVE, + .temperature = MLXSW_THERMAL_MODULE_TEMP_NORM, + .hysteresis = MLXSW_THERMAL_HYSTERESIS_TEMP, + }, + { + /* In range - 40-100% PWM */ + .type = THERMAL_TRIP_ACTIVE, + .temperature = MLXSW_THERMAL_MODULE_TEMP_HIGH, + .hysteresis = MLXSW_THERMAL_HYSTERESIS_TEMP, + }, + { /* Warning */ + .type = THERMAL_TRIP_HOT, + .temperature = MLXSW_THERMAL_MODULE_TEMP_HOT, + }, +}; + static const struct mlxsw_cooling_states default_cooling_states[] = { { .min_state = 0, @@ -140,63 +155,6 @@ static int mlxsw_get_cooling_device_idx(struct mlxsw_thermal *thermal, return -ENODEV; } -static void -mlxsw_thermal_module_trips_reset(struct mlxsw_thermal_module *tz) -{ - tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temperature = 0; - tz->trips[MLXSW_THERMAL_TEMP_TRIP_HIGH].temperature = 0; - tz->trips[MLXSW_THERMAL_TEMP_TRIP_HOT].temperature = 0; -} - -static int -mlxsw_thermal_module_trips_update(struct device *dev, struct mlxsw_core *core, - struct mlxsw_thermal_module *tz, - int crit_temp, int emerg_temp) -{ - int err; - - /* Do not try to query temperature thresholds directly from the module's - * EEPROM if we got valid thresholds from MTMP. - */ - if (!emerg_temp || !crit_temp) { - err = mlxsw_env_module_temp_thresholds_get(core, tz->slot_index, - tz->module, - SFP_TEMP_HIGH_WARN, - &crit_temp); - if (err) - return err; - - err = mlxsw_env_module_temp_thresholds_get(core, tz->slot_index, - tz->module, - SFP_TEMP_HIGH_ALARM, - &emerg_temp); - if (err) - return err; - } - - if (crit_temp > emerg_temp) { - dev_warn(dev, "%s : Critical threshold %d is above emergency threshold %d\n", - thermal_zone_device_type(tz->tzdev), crit_temp, emerg_temp); - return 0; - } - - /* According to the system thermal requirements, the thermal zones are - * defined with three trip points. The critical and emergency - * temperature thresholds, provided by QSFP module are set as "active" - * and "hot" trip points, "normal" trip point is derived from "active" - * by subtracting double hysteresis value. - */ - if (crit_temp >= MLXSW_THERMAL_MODULE_TEMP_SHIFT) - tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temperature = crit_temp - - MLXSW_THERMAL_MODULE_TEMP_SHIFT; - else - tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temperature = crit_temp; - tz->trips[MLXSW_THERMAL_TEMP_TRIP_HIGH].temperature = crit_temp; - tz->trips[MLXSW_THERMAL_TEMP_TRIP_HOT].temperature = emerg_temp; - - return 0; -} - static int mlxsw_thermal_bind(struct thermal_zone_device *tzdev, struct thermal_cooling_device *cdev) { @@ -325,59 +283,22 @@ static int mlxsw_thermal_module_unbind(struct thermal_zone_device *tzdev, return err; } -static void -mlxsw_thermal_module_temp_and_thresholds_get(struct mlxsw_core *core, - u8 slot_index, u16 sensor_index, - int *p_temp, int *p_crit_temp, - int *p_emerg_temp) -{ - char mtmp_pl[MLXSW_REG_MTMP_LEN]; - int err; - - /* Read module temperature and thresholds. */ - mlxsw_reg_mtmp_pack(mtmp_pl, slot_index, sensor_index, - false, false); - err = mlxsw_reg_query(core, MLXSW_REG(mtmp), mtmp_pl); - if (err) { - /* Set temperature and thresholds to zero to avoid passing - * uninitialized data back to the caller. - */ - *p_temp = 0; - *p_crit_temp = 0; - *p_emerg_temp = 0; - - return; - } - mlxsw_reg_mtmp_unpack(mtmp_pl, p_temp, NULL, p_crit_temp, p_emerg_temp, - NULL); -} - static int mlxsw_thermal_module_temp_get(struct thermal_zone_device *tzdev, int *p_temp) { struct mlxsw_thermal_module *tz = thermal_zone_device_priv(tzdev); struct mlxsw_thermal *thermal = tz->parent; - int temp, crit_temp, emerg_temp; - struct device *dev; + char mtmp_pl[MLXSW_REG_MTMP_LEN]; u16 sensor_index; + int err; - dev = thermal->bus_info->dev; sensor_index = MLXSW_REG_MTMP_MODULE_INDEX_MIN + tz->module; - - /* Read module temperature and thresholds. */ - mlxsw_thermal_module_temp_and_thresholds_get(thermal->core, - tz->slot_index, - sensor_index, &temp, - &crit_temp, &emerg_temp); - *p_temp = temp; - - if (!temp) - return 0; - - /* Update trip points. */ - mlxsw_thermal_module_trips_update(dev, thermal->core, tz, - crit_temp, emerg_temp); - + mlxsw_reg_mtmp_pack(mtmp_pl, tz->slot_index, sensor_index, + false, false); + err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl); + if (err) + return err; + mlxsw_reg_mtmp_unpack(mtmp_pl, p_temp, NULL, NULL, NULL, NULL); return 0; } @@ -521,36 +442,26 @@ static void mlxsw_thermal_module_tz_fini(struct thermal_zone_device *tzdev) thermal_zone_device_unregister(tzdev); } -static int +static void mlxsw_thermal_module_init(struct device *dev, struct mlxsw_core *core, struct mlxsw_thermal *thermal, struct mlxsw_thermal_area *area, u8 module) { struct mlxsw_thermal_module *module_tz; - int dummy_temp, crit_temp, emerg_temp; - u16 sensor_index; - sensor_index = MLXSW_REG_MTMP_MODULE_INDEX_MIN + module; module_tz = &area->tz_module_arr[module]; /* Skip if parent is already set (case of port split). */ if (module_tz->parent) - return 0; + return; module_tz->module = module; module_tz->slot_index = area->slot_index; module_tz->parent = thermal; - memcpy(module_tz->trips, default_thermal_trips, + BUILD_BUG_ON(ARRAY_SIZE(default_thermal_module_trips) != + MLXSW_THERMAL_NUM_TRIPS); + memcpy(module_tz->trips, default_thermal_module_trips, sizeof(thermal->trips)); memcpy(module_tz->cooling_states, default_cooling_states, sizeof(thermal->cooling_states)); - /* Initialize all trip point. */ - mlxsw_thermal_module_trips_reset(module_tz); - /* Read module temperature and thresholds. */ - mlxsw_thermal_module_temp_and_thresholds_get(core, area->slot_index, - sensor_index, &dummy_temp, - &crit_temp, &emerg_temp); - /* Update trip point according to the module data. */ - return mlxsw_thermal_module_trips_update(dev, core, module_tz, - crit_temp, emerg_temp); } static void mlxsw_thermal_module_fini(struct mlxsw_thermal_module *module_tz) @@ -589,11 +500,8 @@ mlxsw_thermal_modules_init(struct device *dev, struct mlxsw_core *core, if (!area->tz_module_arr) return -ENOMEM; - for (i = 0; i < area->tz_module_num; i++) { - err = mlxsw_thermal_module_init(dev, core, thermal, area, i); - if (err) - goto err_thermal_module_init; - } + for (i = 0; i < area->tz_module_num; i++) + mlxsw_thermal_module_init(dev, core, thermal, area, i); for (i = 0; i < area->tz_module_num; i++) { module_tz = &area->tz_module_arr[i]; @@ -607,7 +515,6 @@ mlxsw_thermal_modules_init(struct device *dev, struct mlxsw_core *core, return 0; err_thermal_module_tz_init: -err_thermal_module_init: for (i = area->tz_module_num - 1; i >= 0; i--) mlxsw_thermal_module_fini(&area->tz_module_arr[i]); kfree(area->tz_module_arr); |