diff options
35 files changed, 2331 insertions, 484 deletions
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index 557c565c26fc..32bad014d76c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -641,8 +641,8 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget) struct i40e_tx_desc *tx_desc = NULL; struct i40e_tx_buffer *tx_bi; bool work_done = true; + struct xdp_desc desc; dma_addr_t dma; - u32 len; while (budget-- > 0) { if (!unlikely(I40E_DESC_UNUSED(xdp_ring))) { @@ -651,21 +651,23 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget) break; } - if (!xsk_umem_consume_tx(xdp_ring->xsk_umem, &dma, &len)) + if (!xsk_umem_consume_tx(xdp_ring->xsk_umem, &desc)) break; - dma_sync_single_for_device(xdp_ring->dev, dma, len, + dma = xdp_umem_get_dma(xdp_ring->xsk_umem, desc.addr); + + dma_sync_single_for_device(xdp_ring->dev, dma, desc.len, DMA_BIDIRECTIONAL); tx_bi = &xdp_ring->tx_bi[xdp_ring->next_to_use]; - tx_bi->bytecount = len; + tx_bi->bytecount = desc.len; tx_desc = I40E_TX_DESC(xdp_ring, xdp_ring->next_to_use); tx_desc->buffer_addr = cpu_to_le64(dma); tx_desc->cmd_type_offset_bsz = build_ctob(I40E_TX_DESC_CMD_ICRC | I40E_TX_DESC_CMD_EOP, - 0, len, 0); + 0, desc.len, 0); xdp_ring->next_to_use++; if (xdp_ring->next_to_use == xdp_ring->count) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c index 6af55bb3bef3..6b609553329f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c @@ -571,8 +571,9 @@ static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget) union ixgbe_adv_tx_desc *tx_desc = NULL; struct ixgbe_tx_buffer *tx_bi; bool work_done = true; - u32 len, cmd_type; + struct xdp_desc desc; dma_addr_t dma; + u32 cmd_type; while (budget-- > 0) { if (unlikely(!ixgbe_desc_unused(xdp_ring)) || @@ -581,14 +582,16 @@ static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget) break; } - if (!xsk_umem_consume_tx(xdp_ring->xsk_umem, &dma, &len)) + if (!xsk_umem_consume_tx(xdp_ring->xsk_umem, &desc)) break; - dma_sync_single_for_device(xdp_ring->dev, dma, len, + dma = xdp_umem_get_dma(xdp_ring->xsk_umem, desc.addr); + + dma_sync_single_for_device(xdp_ring->dev, dma, desc.len, DMA_BIDIRECTIONAL); tx_bi = &xdp_ring->tx_buffer_info[xdp_ring->next_to_use]; - tx_bi->bytecount = len; + tx_bi->bytecount = desc.len; tx_bi->xdpf = NULL; tx_bi->gso_segs = 1; @@ -599,10 +602,10 @@ static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget) cmd_type = IXGBE_ADVTXD_DTYP_DATA | IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DCMD_IFCS; - cmd_type |= len | IXGBE_TXD_CMD; + cmd_type |= desc.len | IXGBE_TXD_CMD; tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); tx_desc->read.olinfo_status = - cpu_to_le32(len << IXGBE_ADVTXD_PAYLEN_SHIFT); + cpu_to_le32(desc.len << IXGBE_ADVTXD_PAYLEN_SHIFT); xdp_ring->next_to_use++; if (xdp_ring->next_to_use == xdp_ring->count) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 5fe2bf916c06..90db891fcc22 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -24,7 +24,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \ en_selftest.o en/port.o en/monitor_stats.o en/reporter_tx.o \ - en/params.o + en/params.o en/xsk/umem.o en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o # # Netdev extra diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 992d5cb646b2..cb00e622c006 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -137,6 +137,7 @@ struct page_pool; #define MLX5E_MAX_NUM_CHANNELS (MLX5E_INDIR_RQT_SIZE >> 1) #define MLX5E_MAX_NUM_SQS (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC) #define MLX5E_TX_CQ_POLL_BUDGET 128 +#define MLX5E_TX_XSK_POLL_BUDGET 64 #define MLX5E_SQ_RECOVER_MIN_INTERVAL 500 /* msecs */ #define MLX5E_UMR_WQE_INLINE_SZ \ @@ -155,6 +156,11 @@ do { \ ##__VA_ARGS__); \ } while (0) +enum mlx5e_rq_group { + MLX5E_RQ_GROUP_REGULAR, + MLX5E_RQ_GROUP_XSK, + MLX5E_NUM_RQ_GROUPS /* Keep last. */ +}; static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size) { @@ -179,7 +185,8 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) /* Use this function to get max num channels after netdev was created */ static inline int mlx5e_get_netdev_max_channels(struct net_device *netdev) { - return min_t(unsigned int, netdev->num_rx_queues, + return min_t(unsigned int, + netdev->num_rx_queues / MLX5E_NUM_RQ_GROUPS, netdev->num_tx_queues); } @@ -250,6 +257,7 @@ struct mlx5e_params { u32 lro_timeout; u32 pflags; struct bpf_prog *xdp_prog; + struct mlx5e_xsk *xsk; unsigned int sw_mtu; int hard_mtu; }; @@ -348,6 +356,13 @@ enum { struct mlx5e_sq_wqe_info { u8 opcode; + + /* Auxiliary data for different opcodes. */ + union { + struct { + struct mlx5e_rq *rq; + } umr; + }; }; struct mlx5e_txqsq { @@ -392,14 +407,55 @@ struct mlx5e_txqsq { } ____cacheline_aligned_in_smp; struct mlx5e_dma_info { - struct page *page; - dma_addr_t addr; + dma_addr_t addr; + union { + struct page *page; + struct { + u64 handle; + void *data; + } xsk; + }; +}; + +/* XDP packets can be transmitted in different ways. On completion, we need to + * distinguish between them to clean up things in a proper way. + */ +enum mlx5e_xdp_xmit_mode { + /* An xdp_frame was transmitted due to either XDP_REDIRECT from another + * device or XDP_TX from an XSK RQ. The frame has to be unmapped and + * returned. + */ + MLX5E_XDP_XMIT_MODE_FRAME, + + /* The xdp_frame was created in place as a result of XDP_TX from a + * regular RQ. No DMA remapping happened, and the page belongs to us. + */ + MLX5E_XDP_XMIT_MODE_PAGE, + + /* No xdp_frame was created at all, the transmit happened from a UMEM + * page. The UMEM Completion Ring producer pointer has to be increased. + */ + MLX5E_XDP_XMIT_MODE_XSK, }; struct mlx5e_xdp_info { - struct xdp_frame *xdpf; - dma_addr_t dma_addr; - struct mlx5e_dma_info di; + enum mlx5e_xdp_xmit_mode mode; + union { + struct { + struct xdp_frame *xdpf; + dma_addr_t dma_addr; + } frame; + struct { + struct mlx5e_rq *rq; + struct mlx5e_dma_info di; + } page; + }; +}; + +struct mlx5e_xdp_xmit_data { + dma_addr_t dma_addr; + void *data; + u32 len; }; struct mlx5e_xdp_info_fifo { @@ -425,8 +481,12 @@ struct mlx5e_xdp_mpwqe { }; struct mlx5e_xdpsq; -typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq*, - struct mlx5e_xdp_info*); +typedef int (*mlx5e_fp_xmit_xdp_frame_check)(struct mlx5e_xdpsq *); +typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq *, + struct mlx5e_xdp_xmit_data *, + struct mlx5e_xdp_info *, + int); + struct mlx5e_xdpsq { /* data path */ @@ -443,8 +503,10 @@ struct mlx5e_xdpsq { struct mlx5e_cq cq; /* read only */ + struct xdp_umem *umem; struct mlx5_wq_cyc wq; struct mlx5e_xdpsq_stats *stats; + mlx5e_fp_xmit_xdp_frame_check xmit_xdp_frame_check; mlx5e_fp_xmit_xdp_frame xmit_xdp_frame; struct { struct mlx5e_xdp_wqe_info *wqe_info; @@ -571,9 +633,11 @@ struct mlx5e_rq { u8 log_stride_sz; u8 umr_in_progress; u8 umr_last_bulk; + u8 umr_completed; } mpwqe; }; struct { + u16 umem_headroom; u16 headroom; u8 map_dir; /* dma map direction */ } buff; @@ -600,10 +664,14 @@ struct mlx5e_rq { /* XDP */ struct bpf_prog *xdp_prog; - struct mlx5e_xdpsq xdpsq; + struct mlx5e_xdpsq *xdpsq; DECLARE_BITMAP(flags, 8); struct page_pool *page_pool; + /* AF_XDP zero-copy */ + struct zero_copy_allocator zca; + struct xdp_umem *umem; + /* control */ struct mlx5_wq_ctrl wq_ctrl; __be32 mkey_be; @@ -616,9 +684,15 @@ struct mlx5e_rq { struct xdp_rxq_info xdp_rxq; } ____cacheline_aligned_in_smp; +enum mlx5e_channel_state { + MLX5E_CHANNEL_STATE_XSK, + MLX5E_CHANNEL_NUM_STATES +}; + struct mlx5e_channel { /* data path */ struct mlx5e_rq rq; + struct mlx5e_xdpsq rq_xdpsq; struct mlx5e_txqsq sq[MLX5E_MAX_NUM_TC]; struct mlx5e_icosq icosq; /* internal control operations */ bool xdp; @@ -631,6 +705,13 @@ struct mlx5e_channel { /* XDP_REDIRECT */ struct mlx5e_xdpsq xdpsq; + /* AF_XDP zero-copy */ + struct mlx5e_rq xskrq; + struct mlx5e_xdpsq xsksq; + struct mlx5e_icosq xskicosq; + /* xskicosq can be accessed from any CPU - the spinlock protects it. */ + spinlock_t xskicosq_lock; + /* data path - accessed per napi poll */ struct irq_desc *irq_desc; struct mlx5e_ch_stats *stats; @@ -639,6 +720,7 @@ struct mlx5e_channel { struct mlx5e_priv *priv; struct mlx5_core_dev *mdev; struct hwtstamp_config *tstamp; + DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES); int ix; int cpu; cpumask_var_t xps_cpumask; @@ -654,14 +736,17 @@ struct mlx5e_channel_stats { struct mlx5e_ch_stats ch; struct mlx5e_sq_stats sq[MLX5E_MAX_NUM_TC]; struct mlx5e_rq_stats rq; + struct mlx5e_rq_stats xskrq; struct mlx5e_xdpsq_stats rq_xdpsq; struct mlx5e_xdpsq_stats xdpsq; + struct mlx5e_xdpsq_stats xsksq; } ____cacheline_aligned_in_smp; enum { MLX5E_STATE_OPENED, MLX5E_STATE_DESTROYING, MLX5E_STATE_XDP_TX_ENABLED, + MLX5E_STATE_XDP_OPEN, }; struct mlx5e_rqt { @@ -694,6 +779,17 @@ struct mlx5e_modify_sq_param { int rl_index; }; +struct mlx5e_xsk { + /* UMEMs are stored separately from channels, because we don't want to + * lose them when channels are recreated. The kernel also stores UMEMs, + * but it doesn't distinguish between zero-copy and non-zero-copy UMEMs, + * so rely on our mechanism. + */ + struct xdp_umem **umems; + u16 refcnt; + bool ever_used; +}; + struct mlx5e_priv { /* priv data path fields - start */ struct mlx5e_txqsq *txq2sq[MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC]; @@ -714,6 +810,7 @@ struct mlx5e_priv { struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS]; struct mlx5e_tir inner_indir_tir[MLX5E_NUM_INDIR_TIRS]; struct mlx5e_tir direct_tir[MLX5E_MAX_NUM_CHANNELS]; + struct mlx5e_tir xsk_tir[MLX5E_MAX_NUM_CHANNELS]; struct mlx5e_rss_params rss_params; u32 tx_rates[MLX5E_MAX_NUM_SQS]; @@ -750,6 +847,7 @@ struct mlx5e_priv { struct mlx5e_tls *tls; #endif struct devlink_health_reporter *tx_reporter; + struct mlx5e_xsk xsk; }; struct mlx5e_profile { @@ -793,11 +891,13 @@ bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev, struct mlx5e_params *params); void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info); -void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, - bool recycle); +void mlx5e_page_release_dynamic(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info, + bool recycle); void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq); +void mlx5e_poll_ico_cq(struct mlx5e_cq *cq); bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq); void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix); void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix); @@ -853,6 +953,30 @@ void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_rss_params *rss_params, void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen); struct mlx5e_tirc_config mlx5e_tirc_get_default_config(enum mlx5e_traffic_types tt); +struct mlx5e_xsk_param; + +struct mlx5e_rq_param; +int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_rq_param *param, struct mlx5e_xsk_param *xsk, + struct xdp_umem *umem, struct mlx5e_rq *rq); +int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time); +void mlx5e_deactivate_rq(struct mlx5e_rq *rq); +void mlx5e_close_rq(struct mlx5e_rq *rq); + +struct mlx5e_sq_param; +int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_sq_param *param, struct mlx5e_icosq *sq); +void mlx5e_close_icosq(struct mlx5e_icosq *sq); +int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_sq_param *param, struct xdp_umem *umem, + struct mlx5e_xdpsq *sq, bool is_redirect); +void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq); + +struct mlx5e_cq_param; +int mlx5e_open_cq(struct mlx5e_channel *c, struct net_dim_cq_moder moder, + struct mlx5e_cq_param *param, struct mlx5e_cq *cq); +void mlx5e_close_cq(struct mlx5e_cq *cq); + int mlx5e_open_locked(struct net_device *netdev); int mlx5e_close_locked(struct net_device *netdev); @@ -1023,10 +1147,10 @@ int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv); int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc); void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc); -int mlx5e_create_direct_rqts(struct mlx5e_priv *priv); -void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv); -int mlx5e_create_direct_tirs(struct mlx5e_priv *priv); -void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv); +int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs); +void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs); +int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs); +void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs); void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt); int mlx5e_create_tis(struct mlx5_core_dev *mdev, int tc, @@ -1095,6 +1219,7 @@ void mlx5e_detach_netdev(struct mlx5e_priv *priv); void mlx5e_destroy_netdev(struct mlx5e_priv *priv); void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv); void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, + struct mlx5e_xsk *xsk, struct mlx5e_rss_params *rss_params, struct mlx5e_params *params, u16 max_channels, u16 mtu); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index d3744bffbae3..79301d116667 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -3,65 +3,102 @@ #include "en/params.h" -u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params) +static inline bool mlx5e_rx_is_xdp(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) { - u16 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); - u16 linear_rq_headroom = params->xdp_prog ? - XDP_PACKET_HEADROOM : MLX5_RX_HEADROOM; - u32 frag_sz; + return params->xdp_prog || xsk; +} + +u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + u16 headroom = NET_IP_ALIGN; + + if (mlx5e_rx_is_xdp(params, xsk)) { + headroom += XDP_PACKET_HEADROOM; + if (xsk) + headroom += xsk->headroom; + } else { + headroom += MLX5_RX_HEADROOM; + } + + return headroom; +} + +u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + u32 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); + u16 linear_rq_headroom = mlx5e_get_linear_rq_headroom(params, xsk); + u32 frag_sz = linear_rq_headroom + hw_mtu; - linear_rq_headroom += NET_IP_ALIGN; + /* AF_XDP doesn't build SKBs in place. */ + if (!xsk) + frag_sz = MLX5_SKB_FRAG_SZ(frag_sz); - frag_sz = MLX5_SKB_FRAG_SZ(linear_rq_headroom + hw_mtu); + /* XDP in mlx5e doesn't support multiple packets per page. */ + if (mlx5e_rx_is_xdp(params, xsk)) + frag_sz = max_t(u32, frag_sz, PAGE_SIZE); - if (params->xdp_prog && frag_sz < PAGE_SIZE) - frag_sz = PAGE_SIZE; + /* Even if we can go with a smaller fragment size, we must not put + * multiple packets into a single frame. + */ + if (xsk) + frag_sz = max_t(u32, frag_sz, xsk->chunk_size); return frag_sz; } -u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params) +u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) { - u32 linear_frag_sz = mlx5e_rx_get_linear_frag_sz(params); + u32 linear_frag_sz = mlx5e_rx_get_linear_frag_sz(params, xsk); return MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(linear_frag_sz); } -bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params) +bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) { - u32 frag_sz = mlx5e_rx_get_linear_frag_sz(params); + /* AF_XDP allocates SKBs on XDP_PASS - ensure they don't occupy more + * than one page. For this, check both with and without xsk. + */ + u32 linear_frag_sz = max(mlx5e_rx_get_linear_frag_sz(params, xsk), + mlx5e_rx_get_linear_frag_sz(params, NULL)); - return !params->lro_en && frag_sz <= PAGE_SIZE; + return !params->lro_en && linear_frag_sz <= PAGE_SIZE; } #define MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ ((BIT(__mlx5_bit_sz(wq, log_wqe_stride_size)) - 1) + \ MLX5_MPWQE_LOG_STRIDE_SZ_BASE) bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev, - struct mlx5e_params *params) + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) { - u32 frag_sz = mlx5e_rx_get_linear_frag_sz(params); + u32 linear_frag_sz = mlx5e_rx_get_linear_frag_sz(params, xsk); s8 signed_log_num_strides_param; u8 log_num_strides; - if (!mlx5e_rx_is_linear_skb(params)) + if (!mlx5e_rx_is_linear_skb(params, xsk)) return false; - if (order_base_2(frag_sz) > MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ) + if (order_base_2(linear_frag_sz) > MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ) return false; if (MLX5_CAP_GEN(mdev, ext_stride_num_range)) return true; - log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(frag_sz); + log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(linear_frag_sz); signed_log_num_strides_param = (s8)log_num_strides - MLX5_MPWQE_LOG_NUM_STRIDES_BASE; return signed_log_num_strides_param >= 0; } -u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params) +u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) { - u8 log_pkts_per_wqe = mlx5e_mpwqe_log_pkts_per_wqe(params); + u8 log_pkts_per_wqe = mlx5e_mpwqe_log_pkts_per_wqe(params, xsk); /* Numbers are unsigned, don't subtract to avoid underflow. */ if (params->log_rq_mtu_frames < @@ -72,33 +109,30 @@ u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params) } u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev, - struct mlx5e_params *params) + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) { - if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params)) - return order_base_2(mlx5e_rx_get_linear_frag_sz(params)); + if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk)) + return order_base_2(mlx5e_rx_get_linear_frag_sz(params, xsk)); return MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev); } u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev, - struct mlx5e_params *params) + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) { return MLX5_MPWRQ_LOG_WQE_SZ - - mlx5e_mpwqe_get_log_stride_size(mdev, params); + mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk); } u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev, - struct mlx5e_params *params) + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) { - u16 linear_rq_headroom = params->xdp_prog ? - XDP_PACKET_HEADROOM : MLX5_RX_HEADROOM; - bool is_linear_skb; - - linear_rq_headroom += NET_IP_ALIGN; - - is_linear_skb = (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC) ? - mlx5e_rx_is_linear_skb(params) : - mlx5e_rx_mpwqe_is_linear_skb(mdev, params); + bool is_linear_skb = (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC) ? + mlx5e_rx_is_linear_skb(params, xsk) : + mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk); - return is_linear_skb ? linear_rq_headroom : 0; + return is_linear_skb ? mlx5e_get_linear_rq_headroom(params, xsk) : 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index b106a0236f36..bd882b5ee9a7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -6,17 +6,119 @@ #include "en.h" -u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params); -u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params); -bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params); +struct mlx5e_xsk_param { + u16 headroom; + u16 chunk_size; +}; + +struct mlx5e_rq_param { + u32 rqc[MLX5_ST_SZ_DW(rqc)]; + struct mlx5_wq_param wq; + struct mlx5e_rq_frags_info frags_info; +}; + +struct mlx5e_sq_param { + u32 sqc[MLX5_ST_SZ_DW(sqc)]; + struct mlx5_wq_param wq; + bool is_mpw; +}; + +struct mlx5e_cq_param { + u32 cqc[MLX5_ST_SZ_DW(cqc)]; + struct mlx5_wq_param wq; + u16 eq_ix; + u8 cq_period_mode; +}; + +struct mlx5e_channel_param { + struct mlx5e_rq_param rq; + struct mlx5e_sq_param sq; + struct mlx5e_sq_param xdp_sq; + struct mlx5e_sq_param icosq; + struct mlx5e_cq_param rx_cq; + struct mlx5e_cq_param tx_cq; + struct mlx5e_cq_param icosq_cq; +}; + +static inline bool mlx5e_qid_get_ch_if_in_group(struct mlx5e_params *params, + u16 qid, + enum mlx5e_rq_group group, + u16 *ix) +{ + int nch = params->num_channels; + int ch = qid - nch * group; + + if (ch < 0 || ch >= nch) + return false; + + *ix = ch; + return true; +} + +static inline void mlx5e_qid_get_ch_and_group(struct mlx5e_params *params, + u16 qid, + u16 *ix, + enum mlx5e_rq_group *group) +{ + u16 nch = params->num_channels; + + *ix = qid % nch; + *group = qid / nch; +} + +static inline bool mlx5e_qid_validate(struct mlx5e_params *params, u64 qid) +{ + return qid < params->num_channels * MLX5E_NUM_RQ_GROUPS; +} + +/* Parameter calculations */ + +u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); +u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); +u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); +bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev, - struct mlx5e_params *params); -u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params); + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); +u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev, - struct mlx5e_params *params); + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev, - struct mlx5e_params *params); + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev, - struct mlx5e_params *params); + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); + +/* Build queue parameters */ + +void mlx5e_build_rq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5e_rq_param *param); +void mlx5e_build_sq_param_common(struct mlx5e_priv *priv, + struct mlx5e_sq_param *param); +void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5e_cq_param *param); +void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, + struct mlx5e_cq_param *param); +void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv, + u8 log_wq_size, + struct mlx5e_cq_param *param); +void mlx5e_build_icosq_param(struct mlx5e_priv *priv, + u8 log_wq_size, + struct mlx5e_sq_param *param); +void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, + struct mlx5e_sq_param *param); #endif /* __MLX5_EN_PARAMS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index eb8ef78e5626..b0b982cf69bb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -31,11 +31,13 @@ */ #include <linux/bpf_trace.h> +#include <net/xdp_sock.h> #include "en/xdp.h" +#include "en/params.h" -int mlx5e_xdp_max_mtu(struct mlx5e_params *params) +int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) { - int hr = NET_IP_ALIGN + XDP_PACKET_HEADROOM; + int hr = mlx5e_get_linear_rq_headroom(params, xsk); /* Let S := SKB_DATA_ALIGN(sizeof(struct skb_shared_info)). * The condition checked in mlx5e_rx_is_linear_skb is: @@ -54,25 +56,70 @@ int mlx5e_xdp_max_mtu(struct mlx5e_params *params) } static inline bool -mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_dma_info *di, - struct xdp_buff *xdp) +mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq, + struct mlx5e_dma_info *di, struct xdp_buff *xdp) { + struct mlx5e_xdp_xmit_data xdptxd; struct mlx5e_xdp_info xdpi; + struct xdp_frame *xdpf; + dma_addr_t dma_addr; - xdpi.xdpf = convert_to_xdp_frame(xdp); - if (unlikely(!xdpi.xdpf)) + xdpf = convert_to_xdp_frame(xdp); + if (unlikely(!xdpf)) return false; - xdpi.dma_addr = di->addr + (xdpi.xdpf->data - (void *)xdpi.xdpf); - dma_sync_single_for_device(sq->pdev, xdpi.dma_addr, - xdpi.xdpf->len, PCI_DMA_TODEVICE); - xdpi.di = *di; - return sq->xmit_xdp_frame(sq, &xdpi); + xdptxd.data = xdpf->data; + xdptxd.len = xdpf->len; + + if (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY) { + /* The xdp_buff was in the UMEM and was copied into a newly + * allocated page. The UMEM page was returned via the ZCA, and + * this new page has to be mapped at this point and has to be + * unmapped and returned via xdp_return_frame on completion. + */ + + /* Prevent double recycling of the UMEM page. Even in case this + * function returns false, the xdp_buff shouldn't be recycled, + * as it was already done in xdp_convert_zc_to_xdp_frame. + */ + __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */ + + xdpi.mode = MLX5E_XDP_XMIT_MODE_FRAME; + + dma_addr = dma_map_single(sq->pdev, xdptxd.data, xdptxd.len, + DMA_TO_DEVICE); + if (dma_mapping_error(sq->pdev, dma_addr)) { + xdp_return_frame(xdpf); + return false; + } + + xdptxd.dma_addr = dma_addr; + xdpi.frame.xdpf = xdpf; + xdpi.frame.dma_addr = dma_addr; + } else { + /* Driver assumes that convert_to_xdp_frame returns an xdp_frame + * that points to the same memory region as the original + * xdp_buff. It allows to map the memory only once and to use + * the DMA_BIDIRECTIONAL mode. + */ + + xdpi.mode = MLX5E_XDP_XMIT_MODE_PAGE; + + dma_addr = di->addr + (xdpf->data - (void *)xdpf); + dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd.len, + DMA_TO_DEVICE); + + xdptxd.dma_addr = dma_addr; + xdpi.page.rq = rq; + xdpi.page.di = *di; + } + + return sq->xmit_xdp_frame(sq, &xdptxd, &xdpi, 0); } /* returns true if packet was consumed by xdp */ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, - void *va, u16 *rx_headroom, u32 *len) + void *va, u16 *rx_headroom, u32 *len, bool xsk) { struct bpf_prog *prog = READ_ONCE(rq->xdp_prog); struct xdp_buff xdp; @@ -86,16 +133,20 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, xdp_set_data_meta_invalid(&xdp); xdp.data_end = xdp.data + *len; xdp.data_hard_start = va; + if (xsk) + xdp.handle = di->xsk.handle; xdp.rxq = &rq->xdp_rxq; act = bpf_prog_run_xdp(prog, &xdp); + if (xsk) + xdp.handle += xdp.data - xdp.data_hard_start; switch (act) { case XDP_PASS: *rx_headroom = xdp.data - xdp.data_hard_start; *len = xdp.data_end - xdp.data; return false; case XDP_TX: - if (unlikely(!mlx5e_xmit_xdp_buff(&rq->xdpsq, di, &xdp))) + if (unlikely(!mlx5e_xmit_xdp_buff(rq->xdpsq, rq, di, &xdp))) goto xdp_abort; __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */ return true; @@ -106,7 +157,8 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, goto xdp_abort; __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); __set_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags); - mlx5e_page_dma_unmap(rq, di); + if (!xsk) + mlx5e_page_dma_unmap(rq, di); rq->stats->xdp_redirect++; return true; default: @@ -160,7 +212,7 @@ static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq) stats->mpwqe++; } -static void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq) +void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq) { struct mlx5_wq_cyc *wq = &sq->wq; struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; @@ -183,32 +235,55 @@ static void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq) session->wqe = NULL; /* Close session */ } +enum { + MLX5E_XDP_CHECK_OK = 1, + MLX5E_XDP_CHECK_START_MPWQE = 2, +}; + +static int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq) +{ + if (unlikely(!sq->mpwqe.wqe)) { + if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, + MLX5_SEND_WQE_MAX_WQEBBS))) { + /* SQ is full, ring doorbell */ + mlx5e_xmit_xdp_doorbell(sq); + sq->stats->full++; + return -EBUSY; + } + + return MLX5E_XDP_CHECK_START_MPWQE; + } + + return MLX5E_XDP_CHECK_OK; +} + static bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, - struct mlx5e_xdp_info *xdpi) + struct mlx5e_xdp_xmit_data *xdptxd, + struct mlx5e_xdp_info *xdpi, + int check_result) { struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; struct mlx5e_xdpsq_stats *stats = sq->stats; - struct xdp_frame *xdpf = xdpi->xdpf; - - if (unlikely(sq->hw_mtu < xdpf->len)) { + if (unlikely(xdptxd->len > sq->hw_mtu)) { stats->err++; return false; } - if (unlikely(!session->wqe)) { - if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, - MLX5_SEND_WQE_MAX_WQEBBS))) { - /* SQ is full, ring doorbell */ - mlx5e_xmit_xdp_doorbell(sq); - stats->full++; - return false; - } + if (!check_result) + check_result = mlx5e_xmit_xdp_frame_check_mpwqe(sq); + if (unlikely(check_result < 0)) + return false; + if (check_result == MLX5E_XDP_CHECK_START_MPWQE) { + /* Start the session when nothing can fail, so it's guaranteed + * that if there is an active session, it has at least one dseg, + * and it's safe to complete it at any time. + */ mlx5e_xdp_mpwqe_session_start(sq); } - mlx5e_xdp_mpwqe_add_dseg(sq, xdpi, stats); + mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats); if (unlikely(session->complete || session->ds_count == session->max_ds_count)) @@ -219,7 +294,22 @@ static bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, return true; } -static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi) +static int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq) +{ + if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, 1))) { + /* SQ is full, ring doorbell */ + mlx5e_xmit_xdp_doorbell(sq); + sq->stats->full++; + return -EBUSY; + } + + return MLX5E_XDP_CHECK_OK; +} + +static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, + struct mlx5e_xdp_xmit_data *xdptxd, + struct mlx5e_xdp_info *xdpi, + int check_result) { struct mlx5_wq_cyc *wq = &sq->wq; u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); @@ -229,9 +319,8 @@ static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info * struct mlx5_wqe_eth_seg *eseg = &wqe->eth; struct mlx5_wqe_data_seg *dseg = wqe->data; - struct xdp_frame *xdpf = xdpi->xdpf; - dma_addr_t dma_addr = xdpi->dma_addr; - unsigned int dma_len = xdpf->len; + dma_addr_t dma_addr = xdptxd->dma_addr; + u32 dma_len = xdptxd->len; struct mlx5e_xdpsq_stats *stats = sq->stats; @@ -242,18 +331,16 @@ static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info * return false; } - if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1))) { - /* SQ is full, ring doorbell */ - mlx5e_xmit_xdp_doorbell(sq); - stats->full++; + if (!check_result) + check_result = mlx5e_xmit_xdp_frame_check(sq); + if (unlikely(check_result < 0)) return false; - } cseg->fm_ce_se = 0; /* copy the inline part if required */ if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) { - memcpy(eseg->inline_hdr.start, xdpf->data, MLX5E_XDP_MIN_INLINE); + memcpy(eseg->inline_hdr.start, xdptxd->data, MLX5E_XDP_MIN_INLINE); eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE); dma_len -= MLX5E_XDP_MIN_INLINE; dma_addr += MLX5E_XDP_MIN_INLINE; @@ -277,7 +364,7 @@ static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info * static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_wqe_info *wi, - struct mlx5e_rq *rq, + u32 *xsk_frames, bool recycle) { struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo; @@ -286,22 +373,32 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq, for (i = 0; i < wi->num_pkts; i++) { struct mlx5e_xdp_info xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo); - if (rq) { - /* XDP_TX */ - mlx5e_page_release(rq, &xdpi.di, recycle); - } else { - /* XDP_REDIRECT */ - dma_unmap_single(sq->pdev, xdpi.dma_addr, - xdpi.xdpf->len, DMA_TO_DEVICE); - xdp_return_frame(xdpi.xdpf); + switch (xdpi.mode) { + case MLX5E_XDP_XMIT_MODE_FRAME: + /* XDP_TX from the XSK RQ and XDP_REDIRECT */ + dma_unmap_single(sq->pdev, xdpi.frame.dma_addr, + xdpi.frame.xdpf->len, DMA_TO_DEVICE); + xdp_return_frame(xdpi.frame.xdpf); + break; + case MLX5E_XDP_XMIT_MODE_PAGE: + /* XDP_TX from the regular RQ */ + mlx5e_page_release_dynamic(xdpi.page.rq, &xdpi.page.di, recycle); + break; + case MLX5E_XDP_XMIT_MODE_XSK: + /* AF_XDP send */ + (*xsk_frames)++; + break; + default: + WARN_ON_ONCE(true); } } } -bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq) +bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) { struct mlx5e_xdpsq *sq; struct mlx5_cqe64 *cqe; + u32 xsk_frames = 0; u16 sqcc; int i; @@ -343,10 +440,13 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq) sqcc += wi->num_wqebbs; - mlx5e_free_xdpsq_desc(sq, wi, rq, true); + mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, true); } while (!last_wqe); } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); + if (xsk_frames) + xsk_umem_complete_tx(sq->umem, xsk_frames); + sq->stats->cqes += i; mlx5_cqwq_update_db_record(&cq->wq); @@ -358,8 +458,10 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq) return (i == MLX5E_TX_CQ_POLL_BUDGET); } -void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq) +void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq) { + u32 xsk_frames = 0; + while (sq->cc != sq->pc) { struct mlx5e_xdp_wqe_info *wi; u16 ci; @@ -369,8 +471,11 @@ void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq) sq->cc += wi->num_wqebbs; - mlx5e_free_xdpsq_desc(sq, wi, rq, false); + mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, false); } + + if (xsk_frames) + xsk_umem_complete_tx(sq->umem, xsk_frames); } int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, @@ -398,21 +503,27 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, for (i = 0; i < n; i++) { struct xdp_frame *xdpf = frames[i]; + struct mlx5e_xdp_xmit_data xdptxd; struct mlx5e_xdp_info xdpi; - xdpi.dma_addr = dma_map_single(sq->pdev, xdpf->data, xdpf->len, - DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(sq->pdev, xdpi.dma_addr))) { + xdptxd.data = xdpf->data; + xdptxd.len = xdpf->len; + xdptxd.dma_addr = dma_map_single(sq->pdev, xdptxd.data, + xdptxd.len, DMA_TO_DEVICE); + + if (unlikely(dma_mapping_error(sq->pdev, xdptxd.dma_addr))) { xdp_return_frame_rx_napi(xdpf); drops++; continue; } - xdpi.xdpf = xdpf; + xdpi.mode = MLX5E_XDP_XMIT_MODE_FRAME; + xdpi.frame.xdpf = xdpf; + xdpi.frame.dma_addr = xdptxd.dma_addr; - if (unlikely(!sq->xmit_xdp_frame(sq, &xdpi))) { - dma_unmap_single(sq->pdev, xdpi.dma_addr, - xdpf->len, DMA_TO_DEVICE); + if (unlikely(!sq->xmit_xdp_frame(sq, &xdptxd, &xdpi, 0))) { + dma_unmap_single(sq->pdev, xdptxd.dma_addr, + xdptxd.len, DMA_TO_DEVICE); xdp_return_frame_rx_napi(xdpf); drops++; } @@ -429,7 +540,7 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq) { - struct mlx5e_xdpsq *xdpsq = &rq->xdpsq; + struct mlx5e_xdpsq *xdpsq = rq->xdpsq; if (xdpsq->mpwqe.wqe) mlx5e_xdp_mpwqe_complete(xdpsq); @@ -444,6 +555,8 @@ void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq) void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw) { + sq->xmit_xdp_frame_check = is_mpw ? + mlx5e_xmit_xdp_frame_check_mpwqe : mlx5e_xmit_xdp_frame_check; sq->xmit_xdp_frame = is_mpw ? mlx5e_xmit_xdp_frame_mpwqe : mlx5e_xmit_xdp_frame; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h index 8b537a4b0840..2d934c8d3807 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h @@ -39,11 +39,13 @@ (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) #define MLX5E_XDP_TX_DS_COUNT (MLX5E_XDP_TX_EMPTY_DS_COUNT + 1 /* SG DS */) -int mlx5e_xdp_max_mtu(struct mlx5e_params *params); +struct mlx5e_xsk_param; +int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, - void *va, u16 *rx_headroom, u32 *len); -bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq); -void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq); + void *va, u16 *rx_headroom, u32 *len, bool xsk); +void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq); +bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq); +void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq); void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw); void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq); int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, @@ -66,6 +68,21 @@ static inline bool mlx5e_xdp_tx_is_enabled(struct mlx5e_priv *priv) return test_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state); } +static inline void mlx5e_xdp_set_open(struct mlx5e_priv *priv) +{ + set_bit(MLX5E_STATE_XDP_OPEN, &priv->state); +} + +static inline void mlx5e_xdp_set_closed(struct mlx5e_priv *priv) +{ + clear_bit(MLX5E_STATE_XDP_OPEN, &priv->state); +} + +static inline bool mlx5e_xdp_is_open(struct mlx5e_priv *priv) +{ + return test_bit(MLX5E_STATE_XDP_OPEN, &priv->state); +} + static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq) { if (sq->doorbell_cseg) { @@ -97,15 +114,14 @@ static inline void mlx5e_xdp_update_inline_state(struct mlx5e_xdpsq *sq) } static inline void -mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi, +mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, + struct mlx5e_xdp_xmit_data *xdptxd, struct mlx5e_xdpsq_stats *stats) { struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; - dma_addr_t dma_addr = xdpi->dma_addr; - struct xdp_frame *xdpf = xdpi->xdpf; struct mlx5_wqe_data_seg *dseg = (struct mlx5_wqe_data_seg *)session->wqe + session->ds_count; - u16 dma_len = xdpf->len; + u32 dma_len = xdptxd->len; session->pkt_count++; @@ -124,7 +140,7 @@ mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi, } inline_dseg->byte_count = cpu_to_be32(dma_len | MLX5_INLINE_SEG); - memcpy(inline_dseg->data, xdpf->data, dma_len); + memcpy(inline_dseg->data, xdptxd->data, dma_len); session->ds_count += ds_cnt; stats->inlnw++; @@ -132,7 +148,7 @@ mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi, } no_inline: - dseg->addr = cpu_to_be64(dma_addr); + dseg->addr = cpu_to_be64(xdptxd->dma_addr); dseg->byte_count = cpu_to_be32(dma_len); dseg->lkey = sq->mkey_be; session->ds_count++; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/Makefile new file mode 100644 index 000000000000..5ee42991900a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/Makefile @@ -0,0 +1 @@ +subdir-ccflags-y += -I$(src)/../.. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c new file mode 100644 index 000000000000..6a55573ec8f2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c @@ -0,0 +1,192 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "rx.h" +#include "en/xdp.h" +#include <net/xdp_sock.h> + +/* RX data path */ + +bool mlx5e_xsk_pages_enough_umem(struct mlx5e_rq *rq, int count) +{ + /* Check in advance that we have enough frames, instead of allocating + * one-by-one, failing and moving frames to the Reuse Ring. + */ + return xsk_umem_has_addrs_rq(rq->umem, count); +} + +int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) +{ + struct xdp_umem *umem = rq->umem; + u64 handle; + + if (!xsk_umem_peek_addr_rq(umem, &handle)) + return -ENOMEM; + + dma_info->xsk.handle = handle + rq->buff.umem_headroom; + dma_info->xsk.data = xdp_umem_get_data(umem, dma_info->xsk.handle); + + /* No need to add headroom to the DMA address. In striding RQ case, we + * just provide pages for UMR, and headroom is counted at the setup + * stage when creating a WQE. In non-striding RQ case, headroom is + * accounted in mlx5e_alloc_rx_wqe. + */ + dma_info->addr = xdp_umem_get_dma(umem, handle); + + xsk_umem_discard_addr_rq(umem); + + dma_sync_single_for_device(rq->pdev, dma_info->addr, PAGE_SIZE, + DMA_BIDIRECTIONAL); + + return 0; +} + +static inline void mlx5e_xsk_recycle_frame(struct mlx5e_rq *rq, u64 handle) +{ + xsk_umem_fq_reuse(rq->umem, handle & rq->umem->chunk_mask); +} + +/* XSKRQ uses pages from UMEM, they must not be released. They are returned to + * the userspace if possible, and if not, this function is called to reuse them + * in the driver. + */ +void mlx5e_xsk_page_release(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) +{ + mlx5e_xsk_recycle_frame(rq, dma_info->xsk.handle); +} + +/* Return a frame back to the hardware to fill in again. It is used by XDP when + * the XDP program returns XDP_TX or XDP_REDIRECT not to an XSKMAP. + */ +void mlx5e_xsk_zca_free(struct zero_copy_allocator *zca, unsigned long handle) +{ + struct mlx5e_rq *rq = container_of(zca, struct mlx5e_rq, zca); + + mlx5e_xsk_recycle_frame(rq, handle); +} + +static struct sk_buff *mlx5e_xsk_construct_skb(struct mlx5e_rq *rq, void *data, + u32 cqe_bcnt) +{ + struct sk_buff *skb; + + skb = napi_alloc_skb(rq->cq.napi, cqe_bcnt); + if (unlikely(!skb)) { + rq->stats->buff_alloc_err++; + return NULL; + } + + skb_put_data(skb, data, cqe_bcnt); + + return skb; +} + +struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, + struct mlx5e_mpw_info *wi, + u16 cqe_bcnt, + u32 head_offset, + u32 page_idx) +{ + struct mlx5e_dma_info *di = &wi->umr.dma_info[page_idx]; + u16 rx_headroom = rq->buff.headroom - rq->buff.umem_headroom; + u32 cqe_bcnt32 = cqe_bcnt; + void *va, *data; + u32 frag_size; + bool consumed; + + /* Check packet size. Note LRO doesn't use linear SKB */ + if (unlikely(cqe_bcnt > rq->hw_mtu)) { + rq->stats->oversize_pkts_sw_drop++; + return NULL; + } + + /* head_offset is not used in this function, because di->xsk.data and + * di->addr point directly to the necessary place. Furthermore, in the + * current implementation, one page = one packet = one frame, so + * head_offset should always be 0. + */ + WARN_ON_ONCE(head_offset); + + va = di->xsk.data; + data = va + rx_headroom; + frag_size = rq->buff.headroom + cqe_bcnt32; + + dma_sync_single_for_cpu(rq->pdev, di->addr, frag_size, DMA_BIDIRECTIONAL); + prefetch(data); + + rcu_read_lock(); + consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt32, true); + rcu_read_unlock(); + + /* Possible flows: + * - XDP_REDIRECT to XSKMAP: + * The page is owned by the userspace from now. + * - XDP_TX and other XDP_REDIRECTs: + * The page was returned by ZCA and recycled. + * - XDP_DROP: + * Recycle the page. + * - XDP_PASS: + * Allocate an SKB, copy the data and recycle the page. + * + * Pages to be recycled go to the Reuse Ring on MPWQE deallocation. Its + * size is the same as the Driver RX Ring's size, and pages for WQEs are + * allocated first from the Reuse Ring, so it has enough space. + */ + + if (likely(consumed)) { + if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))) + __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */ + return NULL; /* page/packet was consumed by XDP */ + } + + /* XDP_PASS: copy the data from the UMEM to a new SKB and reuse the + * frame. On SKB allocation failure, NULL is returned. + */ + return mlx5e_xsk_construct_skb(rq, data, cqe_bcnt32); +} + +struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe, + struct mlx5e_wqe_frag_info *wi, + u32 cqe_bcnt) +{ + struct mlx5e_dma_info *di = wi->di; + u16 rx_headroom = rq->buff.headroom - rq->buff.umem_headroom; + void *va, *data; + bool consumed; + u32 frag_size; + + /* wi->offset is not used in this function, because di->xsk.data and + * di->addr point directly to the necessary place. Furthermore, in the + * current implementation, one page = one packet = one frame, so + * wi->offset should always be 0. + */ + WARN_ON_ONCE(wi->offset); + + va = di->xsk.data; + data = va + rx_headroom; + frag_size = rq->buff.headroom + cqe_bcnt; + + dma_sync_single_for_cpu(rq->pdev, di->addr, frag_size, DMA_BIDIRECTIONAL); + prefetch(data); + + if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) { + rq->stats->wqe_err++; + return NULL; + } + + rcu_read_lock(); + consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt, true); + rcu_read_unlock(); + + if (likely(consumed)) + return NULL; /* page/packet was consumed by XDP */ + + /* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse + * will be handled by mlx5e_put_rx_frag. + * On SKB allocation failure, NULL is returned. + */ + return mlx5e_xsk_construct_skb(rq, data, cqe_bcnt); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h new file mode 100644 index 000000000000..307b923a1361 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_EN_XSK_RX_H__ +#define __MLX5_EN_XSK_RX_H__ + +#include "en.h" + +/* RX data path */ + +bool mlx5e_xsk_pages_enough_umem(struct mlx5e_rq *rq, int count); +int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info); +void mlx5e_xsk_page_release(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info); +void mlx5e_xsk_zca_free(struct zero_copy_allocator *zca, unsigned long handle); +struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, + struct mlx5e_mpw_info *wi, + u16 cqe_bcnt, + u32 head_offset, + u32 page_idx); +struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe, + struct mlx5e_wqe_frag_info *wi, + u32 cqe_bcnt); + +#endif /* __MLX5_EN_XSK_RX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c new file mode 100644 index 000000000000..9b4d47c47c92 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -0,0 +1,223 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "setup.h" +#include "en/params.h" + +bool mlx5e_validate_xsk_param(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5_core_dev *mdev) +{ + /* AF_XDP doesn't support frames larger than PAGE_SIZE, and the current + * mlx5e XDP implementation doesn't support multiple packets per page. + */ + if (xsk->chunk_size != PAGE_SIZE) + return false; + + /* Current MTU and XSK headroom don't allow packets to fit the frames. */ + if (mlx5e_rx_get_linear_frag_sz(params, xsk) > xsk->chunk_size) + return false; + + /* frag_sz is different for regular and XSK RQs, so ensure that linear + * SKB mode is possible. + */ + switch (params->rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk); + default: /* MLX5_WQ_TYPE_CYCLIC */ + return mlx5e_rx_is_linear_skb(params, xsk); + } +} + +static void mlx5e_build_xskicosq_param(struct mlx5e_priv *priv, + u8 log_wq_size, + struct mlx5e_sq_param *param) +{ + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + + mlx5e_build_sq_param_common(priv, param); + + MLX5_SET(wq, wq, log_wq_sz, log_wq_size); +} + +static void mlx5e_build_xsk_cparam(struct mlx5e_priv *priv, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5e_channel_param *cparam) +{ + const u8 xskicosq_size = MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; + + mlx5e_build_rq_param(priv, params, xsk, &cparam->rq); + mlx5e_build_xdpsq_param(priv, params, &cparam->xdp_sq); + mlx5e_build_xskicosq_param(priv, xskicosq_size, &cparam->icosq); + mlx5e_build_rx_cq_param(priv, params, xsk, &cparam->rx_cq); + mlx5e_build_tx_cq_param(priv, params, &cparam->tx_cq); + mlx5e_build_ico_cq_param(priv, xskicosq_size, &cparam->icosq_cq); +} + +int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, struct xdp_umem *umem, + struct mlx5e_channel *c) +{ + struct mlx5e_channel_param cparam = {}; + struct net_dim_cq_moder icocq_moder = {}; + int err; + + if (!mlx5e_validate_xsk_param(params, xsk, priv->mdev)) + return -EINVAL; + + mlx5e_build_xsk_cparam(priv, params, xsk, &cparam); + + err = mlx5e_open_cq(c, params->rx_cq_moderation, &cparam.rx_cq, &c->xskrq.cq); + if (unlikely(err)) + return err; + + err = mlx5e_open_rq(c, params, &cparam.rq, xsk, umem, &c->xskrq); + if (unlikely(err)) + goto err_close_rx_cq; + + err = mlx5e_open_cq(c, params->tx_cq_moderation, &cparam.tx_cq, &c->xsksq.cq); + if (unlikely(err)) + goto err_close_rq; + + /* Create a separate SQ, so that when the UMEM is disabled, we could + * close this SQ safely and stop receiving CQEs. In other case, e.g., if + * the XDPSQ was used instead, we might run into trouble when the UMEM + * is disabled and then reenabled, but the SQ continues receiving CQEs + * from the old UMEM. + */ + err = mlx5e_open_xdpsq(c, params, &cparam.xdp_sq, umem, &c->xsksq, true); + if (unlikely(err)) + goto err_close_tx_cq; + + err = mlx5e_open_cq(c, icocq_moder, &cparam.icosq_cq, &c->xskicosq.cq); + if (unlikely(err)) + goto err_close_sq; + + /* Create a dedicated SQ for posting NOPs whenever we need an IRQ to be + * triggered and NAPI to be called on the correct CPU. + */ + err = mlx5e_open_icosq(c, params, &cparam.icosq, &c->xskicosq); + if (unlikely(err)) + goto err_close_icocq; + + spin_lock_init(&c->xskicosq_lock); + + set_bit(MLX5E_CHANNEL_STATE_XSK, c->state); + + return 0; + +err_close_icocq: + mlx5e_close_cq(&c->xskicosq.cq); + +err_close_sq: + mlx5e_close_xdpsq(&c->xsksq); + +err_close_tx_cq: + mlx5e_close_cq(&c->xsksq.cq); + +err_close_rq: + mlx5e_close_rq(&c->xskrq); + +err_close_rx_cq: + mlx5e_close_cq(&c->xskrq.cq); + + return err; +} + +void mlx5e_close_xsk(struct mlx5e_channel *c) +{ + clear_bit(MLX5E_CHANNEL_STATE_XSK, c->state); + napi_synchronize(&c->napi); + + mlx5e_close_rq(&c->xskrq); + mlx5e_close_cq(&c->xskrq.cq); + mlx5e_close_icosq(&c->xskicosq); + mlx5e_close_cq(&c->xskicosq.cq); + mlx5e_close_xdpsq(&c->xsksq); + mlx5e_close_cq(&c->xsksq.cq); +} + +void mlx5e_activate_xsk(struct mlx5e_channel *c) +{ + set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); + /* TX queue is created active. */ + mlx5e_trigger_irq(&c->xskicosq); +} + +void mlx5e_deactivate_xsk(struct mlx5e_channel *c) +{ + mlx5e_deactivate_rq(&c->xskrq); + /* TX queue is disabled on close. */ +} + +static int mlx5e_redirect_xsk_rqt(struct mlx5e_priv *priv, u16 ix, u32 rqn) +{ + struct mlx5e_redirect_rqt_param direct_rrp = { + .is_rss = false, + { + .rqn = rqn, + }, + }; + + u32 rqtn = priv->xsk_tir[ix].rqt.rqtn; + + return mlx5e_redirect_rqt(priv, rqtn, 1, direct_rrp); +} + +int mlx5e_xsk_redirect_rqt_to_channel(struct mlx5e_priv *priv, struct mlx5e_channel *c) +{ + return mlx5e_redirect_xsk_rqt(priv, c->ix, c->xskrq.rqn); +} + +int mlx5e_xsk_redirect_rqt_to_drop(struct mlx5e_priv *priv, u16 ix) +{ + return mlx5e_redirect_xsk_rqt(priv, ix, priv->drop_rq.rqn); +} + +int mlx5e_xsk_redirect_rqts_to_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs) +{ + int err, i; + + if (!priv->xsk.refcnt) + return 0; + + for (i = 0; i < chs->num; i++) { + struct mlx5e_channel *c = chs->c[i]; + + if (!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) + continue; + + err = mlx5e_xsk_redirect_rqt_to_channel(priv, c); + if (unlikely(err)) + goto err_stop; + } + + return 0; + +err_stop: + for (i--; i >= 0; i--) { + if (!test_bit(MLX5E_CHANNEL_STATE_XSK, chs->c[i]->state)) + continue; + + mlx5e_xsk_redirect_rqt_to_drop(priv, i); + } + + return err; +} + +void mlx5e_xsk_redirect_rqts_to_drop(struct mlx5e_priv *priv, struct mlx5e_channels *chs) +{ + int i; + + if (!priv->xsk.refcnt) + return; + + for (i = 0; i < chs->num; i++) { + if (!test_bit(MLX5E_CHANNEL_STATE_XSK, chs->c[i]->state)) + continue; + + mlx5e_xsk_redirect_rqt_to_drop(priv, i); + } +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h new file mode 100644 index 000000000000..0dd11b81c046 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_EN_XSK_SETUP_H__ +#define __MLX5_EN_XSK_SETUP_H__ + +#include "en.h" + +struct mlx5e_xsk_param; + +bool mlx5e_validate_xsk_param(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5_core_dev *mdev); +int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, struct xdp_umem *umem, + struct mlx5e_channel *c); +void mlx5e_close_xsk(struct mlx5e_channel *c); +void mlx5e_activate_xsk(struct mlx5e_channel *c); +void mlx5e_deactivate_xsk(struct mlx5e_channel *c); +int mlx5e_xsk_redirect_rqt_to_channel(struct mlx5e_priv *priv, struct mlx5e_channel *c); +int mlx5e_xsk_redirect_rqt_to_drop(struct mlx5e_priv *priv, u16 ix); +int mlx5e_xsk_redirect_rqts_to_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs); +void mlx5e_xsk_redirect_rqts_to_drop(struct mlx5e_priv *priv, struct mlx5e_channels *chs); + +#endif /* __MLX5_EN_XSK_SETUP_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c new file mode 100644 index 000000000000..35e188cf4ea4 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "tx.h" +#include "umem.h" +#include "en/xdp.h" +#include "en/params.h" +#include <net/xdp_sock.h> + +int mlx5e_xsk_async_xmit(struct net_device *dev, u32 qid) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_params *params = &priv->channels.params; + struct mlx5e_channel *c; + u16 ix; + + if (unlikely(!mlx5e_xdp_is_open(priv))) + return -ENETDOWN; + + if (unlikely(!mlx5e_qid_get_ch_if_in_group(params, qid, MLX5E_RQ_GROUP_XSK, &ix))) + return -EINVAL; + + c = priv->channels.c[ix]; + + if (unlikely(!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))) + return -ENXIO; + + if (!napi_if_scheduled_mark_missed(&c->napi)) { + spin_lock(&c->xskicosq_lock); + mlx5e_trigger_irq(&c->xskicosq); + spin_unlock(&c->xskicosq_lock); + } + + return 0; +} + +/* When TX fails (because of the size of the packet), we need to get completions + * in order, so post a NOP to get a CQE. Since AF_XDP doesn't distinguish + * between successful TX and errors, handling in mlx5e_poll_xdpsq_cq is the + * same. + */ +static void mlx5e_xsk_tx_post_err(struct mlx5e_xdpsq *sq, + struct mlx5e_xdp_info *xdpi) +{ + u16 pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc); + struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi]; + struct mlx5e_tx_wqe *nopwqe; + + wi->num_wqebbs = 1; + wi->num_pkts = 1; + + nopwqe = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc); + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi); + sq->doorbell_cseg = &nopwqe->ctrl; +} + +bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget) +{ + struct xdp_umem *umem = sq->umem; + struct mlx5e_xdp_info xdpi; + struct mlx5e_xdp_xmit_data xdptxd; + bool work_done = true; + bool flush = false; + + xdpi.mode = MLX5E_XDP_XMIT_MODE_XSK; + + for (; budget; budget--) { + int check_result = sq->xmit_xdp_frame_check(sq); + struct xdp_desc desc; + + if (unlikely(check_result < 0)) { + work_done = false; + break; + } + + if (!xsk_umem_consume_tx(umem, &desc)) { + /* TX will get stuck until something wakes it up by + * triggering NAPI. Currently it's expected that the + * application calls sendto() if there are consumed, but + * not completed frames. + */ + break; + } + + xdptxd.dma_addr = xdp_umem_get_dma(umem, desc.addr); + xdptxd.data = xdp_umem_get_data(umem, desc.addr); + xdptxd.len = desc.len; + + dma_sync_single_for_device(sq->pdev, xdptxd.dma_addr, + xdptxd.len, DMA_BIDIRECTIONAL); + + if (unlikely(!sq->xmit_xdp_frame(sq, &xdptxd, &xdpi, check_result))) { + if (sq->mpwqe.wqe) + mlx5e_xdp_mpwqe_complete(sq); + + mlx5e_xsk_tx_post_err(sq, &xdpi); + } + + flush = true; + } + + if (flush) { + if (sq->mpwqe.wqe) + mlx5e_xdp_mpwqe_complete(sq); + mlx5e_xmit_xdp_doorbell(sq); + + xsk_umem_consume_tx_done(umem); + } + + return !(budget && work_done); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h new file mode 100644 index 000000000000..7add18bf78d8 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_EN_XSK_TX_H__ +#define __MLX5_EN_XSK_TX_H__ + +#include "en.h" + +/* TX data path */ + +int mlx5e_xsk_async_xmit(struct net_device *dev, u32 qid); + +bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget); + +#endif /* __MLX5_EN_XSK_TX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c new file mode 100644 index 000000000000..4baaa5788320 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c @@ -0,0 +1,267 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include <net/xdp_sock.h> +#include "umem.h" +#include "setup.h" +#include "en/params.h" + +static int mlx5e_xsk_map_umem(struct mlx5e_priv *priv, + struct xdp_umem *umem) +{ + struct device *dev = priv->mdev->device; + u32 i; + + for (i = 0; i < umem->npgs; i++) { + dma_addr_t dma = dma_map_page(dev, umem->pgs[i], 0, PAGE_SIZE, + DMA_BIDIRECTIONAL); + + if (unlikely(dma_mapping_error(dev, dma))) + goto err_unmap; + umem->pages[i].dma = dma; + } + + return 0; + +err_unmap: + while (i--) { + dma_unmap_page(dev, umem->pages[i].dma, PAGE_SIZE, + DMA_BIDIRECTIONAL); + umem->pages[i].dma = 0; + } + + return -ENOMEM; +} + +static void mlx5e_xsk_unmap_umem(struct mlx5e_priv *priv, + struct xdp_umem *umem) +{ + struct device *dev = priv->mdev->device; + u32 i; + + for (i = 0; i < umem->npgs; i++) { + dma_unmap_page(dev, umem->pages[i].dma, PAGE_SIZE, + DMA_BIDIRECTIONAL); + umem->pages[i].dma = 0; + } +} + +static int mlx5e_xsk_get_umems(struct mlx5e_xsk *xsk) +{ + if (!xsk->umems) { + xsk->umems = kcalloc(MLX5E_MAX_NUM_CHANNELS, + sizeof(*xsk->umems), GFP_KERNEL); + if (unlikely(!xsk->umems)) + return -ENOMEM; + } + + xsk->refcnt++; + xsk->ever_used = true; + + return 0; +} + +static void mlx5e_xsk_put_umems(struct mlx5e_xsk *xsk) +{ + if (!--xsk->refcnt) { + kfree(xsk->umems); + xsk->umems = NULL; + } +} + +static int mlx5e_xsk_add_umem(struct mlx5e_xsk *xsk, struct xdp_umem *umem, u16 ix) +{ + int err; + + err = mlx5e_xsk_get_umems(xsk); + if (unlikely(err)) + return err; + + xsk->umems[ix] = umem; + return 0; +} + +static void mlx5e_xsk_remove_umem(struct mlx5e_xsk *xsk, u16 ix) +{ + xsk->umems[ix] = NULL; + + mlx5e_xsk_put_umems(xsk); +} + +static bool mlx5e_xsk_is_umem_sane(struct xdp_umem *umem) +{ + return umem->headroom <= 0xffff && umem->chunk_size_nohr <= 0xffff; +} + +void mlx5e_build_xsk_param(struct xdp_umem *umem, struct mlx5e_xsk_param *xsk) +{ + xsk->headroom = umem->headroom; + xsk->chunk_size = umem->chunk_size_nohr + umem->headroom; +} + +static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv, + struct xdp_umem *umem, u16 ix) +{ + struct mlx5e_params *params = &priv->channels.params; + struct mlx5e_xsk_param xsk; + struct mlx5e_channel *c; + int err; + + if (unlikely(mlx5e_xsk_get_umem(&priv->channels.params, &priv->xsk, ix))) + return -EBUSY; + + if (unlikely(!mlx5e_xsk_is_umem_sane(umem))) + return -EINVAL; + + err = mlx5e_xsk_map_umem(priv, umem); + if (unlikely(err)) + return err; + + err = mlx5e_xsk_add_umem(&priv->xsk, umem, ix); + if (unlikely(err)) + goto err_unmap_umem; + + mlx5e_build_xsk_param(umem, &xsk); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + /* XSK objects will be created on open. */ + goto validate_closed; + } + + if (!params->xdp_prog) { + /* XSK objects will be created when an XDP program is set, + * and the channels are reopened. + */ + goto validate_closed; + } + + c = priv->channels.c[ix]; + + err = mlx5e_open_xsk(priv, params, &xsk, umem, c); + if (unlikely(err)) + goto err_remove_umem; + + mlx5e_activate_xsk(c); + + /* Don't wait for WQEs, because the newer xdpsock sample doesn't provide + * any Fill Ring entries at the setup stage. + */ + + err = mlx5e_xsk_redirect_rqt_to_channel(priv, priv->channels.c[ix]); + if (unlikely(err)) + goto err_deactivate; + + return 0; + +err_deactivate: + mlx5e_deactivate_xsk(c); + mlx5e_close_xsk(c); + +err_remove_umem: + mlx5e_xsk_remove_umem(&priv->xsk, ix); + +err_unmap_umem: + mlx5e_xsk_unmap_umem(priv, umem); + + return err; + +validate_closed: + /* Check the configuration in advance, rather than fail at a later stage + * (in mlx5e_xdp_set or on open) and end up with no channels. + */ + if (!mlx5e_validate_xsk_param(params, &xsk, priv->mdev)) { + err = -EINVAL; + goto err_remove_umem; + } + + return 0; +} + +static int mlx5e_xsk_disable_locked(struct mlx5e_priv *priv, u16 ix) +{ + struct xdp_umem *umem = mlx5e_xsk_get_umem(&priv->channels.params, + &priv->xsk, ix); + struct mlx5e_channel *c; + + if (unlikely(!umem)) + return -EINVAL; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto remove_umem; + + /* XSK RQ and SQ are only created if XDP program is set. */ + if (!priv->channels.params.xdp_prog) + goto remove_umem; + + c = priv->channels.c[ix]; + mlx5e_xsk_redirect_rqt_to_drop(priv, ix); + mlx5e_deactivate_xsk(c); + mlx5e_close_xsk(c); + +remove_umem: + mlx5e_xsk_remove_umem(&priv->xsk, ix); + mlx5e_xsk_unmap_umem(priv, umem); + + return 0; +} + +static int mlx5e_xsk_enable_umem(struct mlx5e_priv *priv, struct xdp_umem *umem, + u16 ix) +{ + int err; + + mutex_lock(&priv->state_lock); + err = mlx5e_xsk_enable_locked(priv, umem, ix); + mutex_unlock(&priv->state_lock); + + return err; +} + +static int mlx5e_xsk_disable_umem(struct mlx5e_priv *priv, u16 ix) +{ + int err; + + mutex_lock(&priv->state_lock); + err = mlx5e_xsk_disable_locked(priv, ix); + mutex_unlock(&priv->state_lock); + + return err; +} + +int mlx5e_xsk_setup_umem(struct net_device *dev, struct xdp_umem *umem, u16 qid) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_params *params = &priv->channels.params; + u16 ix; + + if (unlikely(!mlx5e_qid_get_ch_if_in_group(params, qid, MLX5E_RQ_GROUP_XSK, &ix))) + return -EINVAL; + + return umem ? mlx5e_xsk_enable_umem(priv, umem, ix) : + mlx5e_xsk_disable_umem(priv, ix); +} + +int mlx5e_xsk_resize_reuseq(struct xdp_umem *umem, u32 nentries) +{ + struct xdp_umem_fq_reuse *reuseq; + + reuseq = xsk_reuseq_prepare(nentries); + if (unlikely(!reuseq)) + return -ENOMEM; + xsk_reuseq_free(xsk_reuseq_swap(umem, reuseq)); + + return 0; +} + +u16 mlx5e_xsk_first_unused_channel(struct mlx5e_params *params, struct mlx5e_xsk *xsk) +{ + u16 res = xsk->refcnt ? params->num_channels : 0; + + while (res) { + if (mlx5e_xsk_get_umem(params, xsk, res - 1)) + break; + --res; + } + + return res; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h new file mode 100644 index 000000000000..25b4cbe58b54 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_EN_XSK_UMEM_H__ +#define __MLX5_EN_XSK_UMEM_H__ + +#include "en.h" + +static inline struct xdp_umem *mlx5e_xsk_get_umem(struct mlx5e_params *params, + struct mlx5e_xsk *xsk, u16 ix) +{ + if (!xsk || !xsk->umems) + return NULL; + + if (unlikely(ix >= params->num_channels)) + return NULL; + + return xsk->umems[ix]; +} + +struct mlx5e_xsk_param; +void mlx5e_build_xsk_param(struct xdp_umem *umem, struct mlx5e_xsk_param *xsk); + +/* .ndo_bpf callback. */ +int mlx5e_xsk_setup_umem(struct net_device *dev, struct xdp_umem *umem, u16 qid); + +int mlx5e_xsk_resize_reuseq(struct xdp_umem *umem, u32 nentries); + +u16 mlx5e_xsk_first_unused_channel(struct mlx5e_params *params, struct mlx5e_xsk *xsk); + +#endif /* __MLX5_EN_XSK_UMEM_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index ea59097dd4f8..74235317d4dc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -32,6 +32,7 @@ #include "en.h" #include "en/port.h" +#include "en/xsk/umem.h" #include "lib/clock.h" void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv, @@ -388,8 +389,17 @@ static int mlx5e_set_ringparam(struct net_device *dev, void mlx5e_ethtool_get_channels(struct mlx5e_priv *priv, struct ethtool_channels *ch) { + mutex_lock(&priv->state_lock); + ch->max_combined = mlx5e_get_netdev_max_channels(priv->netdev); ch->combined_count = priv->channels.params.num_channels; + if (priv->xsk.refcnt) { + /* The upper half are XSK queues. */ + ch->max_combined *= 2; + ch->combined_count *= 2; + } + + mutex_unlock(&priv->state_lock); } static void mlx5e_get_channels(struct net_device *dev, @@ -403,6 +413,7 @@ static void mlx5e_get_channels(struct net_device *dev, int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, struct ethtool_channels *ch) { + struct mlx5e_params *cur_params = &priv->channels.params; unsigned int count = ch->combined_count; struct mlx5e_channels new_channels = {}; bool arfs_enabled; @@ -414,16 +425,26 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, return -EINVAL; } - if (priv->channels.params.num_channels == count) + if (cur_params->num_channels == count) return 0; mutex_lock(&priv->state_lock); + /* Don't allow changing the number of channels if there is an active + * XSK, because the numeration of the XSK and regular RQs will change. + */ + if (priv->xsk.refcnt) { + err = -EINVAL; + netdev_err(priv->netdev, "%s: AF_XDP is active, cannot change the number of channels\n", + __func__); + goto out; + } + new_channels.params = priv->channels.params; new_channels.params.num_channels = count; if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { - priv->channels.params = new_channels.params; + *cur_params = new_channels.params; if (!netif_is_rxfh_configured(priv->netdev)) mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt, MLX5E_INDIR_RQT_SIZE, count); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index 4421c10f58ae..ec5392baabc8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -32,6 +32,8 @@ #include <linux/mlx5/fs.h> #include "en.h" +#include "en/params.h" +#include "en/xsk/umem.h" struct mlx5e_ethtool_rule { struct list_head list; @@ -414,6 +416,14 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv, if (fs->ring_cookie == RX_CLS_FLOW_DISC) { flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; } else { + struct mlx5e_params *params = &priv->channels.params; + enum mlx5e_rq_group group; + struct mlx5e_tir *tir; + u16 ix; + + mlx5e_qid_get_ch_and_group(params, fs->ring_cookie, &ix, &group); + tir = group == MLX5E_RQ_GROUP_XSK ? priv->xsk_tir : priv->direct_tir; + dst = kzalloc(sizeof(*dst), GFP_KERNEL); if (!dst) { err = -ENOMEM; @@ -421,7 +431,7 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv, } dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; - dst->tir_num = priv->direct_tir[fs->ring_cookie].tirn; + dst->tir_num = tir[ix].tirn; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; } @@ -600,9 +610,9 @@ static int validate_flow(struct mlx5e_priv *priv, if (fs->location >= MAX_NUM_OF_ETHTOOL_RULES) return -ENOSPC; - if (fs->ring_cookie >= priv->channels.params.num_channels && - fs->ring_cookie != RX_CLS_FLOW_DISC) - return -EINVAL; + if (fs->ring_cookie != RX_CLS_FLOW_DISC) + if (!mlx5e_qid_validate(&priv->channels.params, fs->ring_cookie)) + return -EINVAL; switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) { case ETHER_FLOW: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 5e40db8f92e6..67b562c7f8a1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -38,6 +38,7 @@ #include <linux/bpf.h> #include <linux/if_bridge.h> #include <net/page_pool.h> +#include <net/xdp_sock.h> #include "eswitch.h" #include "en.h" #include "en_tc.h" @@ -56,35 +57,10 @@ #include "en/monitor_stats.h" #include "en/reporter.h" #include "en/params.h" - -struct mlx5e_rq_param { - u32 rqc[MLX5_ST_SZ_DW(rqc)]; - struct mlx5_wq_param wq; - struct mlx5e_rq_frags_info frags_info; -}; - -struct mlx5e_sq_param { - u32 sqc[MLX5_ST_SZ_DW(sqc)]; - struct mlx5_wq_param wq; - bool is_mpw; -}; - -struct mlx5e_cq_param { - u32 cqc[MLX5_ST_SZ_DW(cqc)]; - struct mlx5_wq_param wq; - u16 eq_ix; - u8 cq_period_mode; -}; - -struct mlx5e_channel_param { - struct mlx5e_rq_param rq; - struct mlx5e_sq_param sq; - struct mlx5e_sq_param xdp_sq; - struct mlx5e_sq_param icosq; - struct mlx5e_cq_param rx_cq; - struct mlx5e_cq_param tx_cq; - struct mlx5e_cq_param icosq_cq; -}; +#include "en/xsk/umem.h" +#include "en/xsk/setup.h" +#include "en/xsk/rx.h" +#include "en/xsk/tx.h" bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) { @@ -114,18 +90,31 @@ void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, mlx5_core_info(mdev, "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n", params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ, params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ? - BIT(mlx5e_mpwqe_get_log_rq_size(params)) : + BIT(mlx5e_mpwqe_get_log_rq_size(params, NULL)) : BIT(params->log_rq_mtu_frames), - BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params)), + BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params, NULL)), MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)); } bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev, struct mlx5e_params *params) { - return mlx5e_check_fragmented_striding_rq_cap(mdev) && - !MLX5_IPSEC_DEV(mdev) && - !(params->xdp_prog && !mlx5e_rx_mpwqe_is_linear_skb(mdev, params)); + if (!mlx5e_check_fragmented_striding_rq_cap(mdev)) + return false; + + if (MLX5_IPSEC_DEV(mdev)) + return false; + + if (params->xdp_prog) { + /* XSK params are not considered here. If striding RQ is in use, + * and an XSK is being opened, mlx5e_rx_mpwqe_is_linear_skb will + * be called with the known XSK params. + */ + if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL)) + return false; + } + + return true; } void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params) @@ -394,6 +383,8 @@ static void mlx5e_free_di_list(struct mlx5e_rq *rq) static int mlx5e_alloc_rq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct xdp_umem *umem, struct mlx5e_rq_param *rqp, struct mlx5e_rq *rq) { @@ -401,6 +392,8 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, struct mlx5_core_dev *mdev = c->mdev; void *rqc = rqp->rqc; void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); + u32 num_xsk_frames = 0; + u32 rq_xdp_ix; u32 pool_size; int wq_sz; int err; @@ -417,7 +410,13 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, rq->ix = c->ix; rq->mdev = mdev; rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); - rq->stats = &c->priv->channel_stats[c->ix].rq; + rq->xdpsq = &c->rq_xdpsq; + rq->umem = umem; + + if (rq->umem) + rq->stats = &c->priv->channel_stats[c->ix].xskrq; + else + rq->stats = &c->priv->channel_stats[c->ix].rq; rq->xdp_prog = params->xdp_prog ? bpf_prog_inc(params->xdp_prog) : NULL; if (IS_ERR(rq->xdp_prog)) { @@ -426,12 +425,16 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, goto err_rq_wq_destroy; } - err = xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix); + rq_xdp_ix = rq->ix; + if (xsk) + rq_xdp_ix += params->num_channels * MLX5E_RQ_GROUP_XSK; + err = xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq_xdp_ix); if (err < 0) goto err_rq_wq_destroy; rq->buff.map_dir = rq->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; - rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params); + rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params, xsk); + rq->buff.umem_headroom = xsk ? xsk->headroom : 0; pool_size = 1 << params->log_rq_mtu_frames; switch (rq->wq_type) { @@ -445,7 +448,12 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq); - pool_size = MLX5_MPWRQ_PAGES_PER_WQE << mlx5e_mpwqe_get_log_rq_size(params); + if (xsk) + num_xsk_frames = wq_sz << + mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk); + + pool_size = MLX5_MPWRQ_PAGES_PER_WQE << + mlx5e_mpwqe_get_log_rq_size(params, xsk); rq->post_wqes = mlx5e_post_rx_mpwqes; rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe; @@ -464,12 +472,15 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, goto err_rq_wq_destroy; } - rq->mpwqe.skb_from_cqe_mpwrq = - mlx5e_rx_mpwqe_is_linear_skb(mdev, params) ? - mlx5e_skb_from_cqe_mpwrq_linear : - mlx5e_skb_from_cqe_mpwrq_nonlinear; - rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params); - rq->mpwqe.num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params)); + rq->mpwqe.skb_from_cqe_mpwrq = xsk ? + mlx5e_xsk_skb_from_cqe_mpwrq_linear : + mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) ? + mlx5e_skb_from_cqe_mpwrq_linear : + mlx5e_skb_from_cqe_mpwrq_nonlinear; + + rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk); + rq->mpwqe.num_strides = + BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk)); err = mlx5e_create_rq_umr_mkey(mdev, rq); if (err) @@ -490,6 +501,9 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, wq_sz = mlx5_wq_cyc_get_size(&rq->wqe.wq); + if (xsk) + num_xsk_frames = wq_sz << rq->wqe.info.log_num_frags; + rq->wqe.info = rqp->frags_info; rq->wqe.frags = kvzalloc_node(array_size(sizeof(*rq->wqe.frags), @@ -503,6 +517,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, err = mlx5e_init_di_list(rq, wq_sz, c->cpu); if (err) goto err_free; + rq->post_wqes = mlx5e_post_rx_wqes; rq->dealloc_wqe = mlx5e_dealloc_rx_wqe; @@ -518,37 +533,53 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, goto err_free; } - rq->wqe.skb_from_cqe = mlx5e_rx_is_linear_skb(params) ? - mlx5e_skb_from_cqe_linear : - mlx5e_skb_from_cqe_nonlinear; + rq->wqe.skb_from_cqe = xsk ? + mlx5e_xsk_skb_from_cqe_linear : + mlx5e_rx_is_linear_skb(params, NULL) ? + mlx5e_skb_from_cqe_linear : + mlx5e_skb_from_cqe_nonlinear; rq->mkey_be = c->mkey_be; } - /* Create a page_pool and register it with rxq */ - pp_params.order = 0; - pp_params.flags = 0; /* No-internal DMA mapping in page_pool */ - pp_params.pool_size = pool_size; - pp_params.nid = cpu_to_node(c->cpu); - pp_params.dev = c->pdev; - pp_params.dma_dir = rq->buff.map_dir; - - /* page_pool can be used even when there is no rq->xdp_prog, - * given page_pool does not handle DMA mapping there is no - * required state to clear. And page_pool gracefully handle - * elevated refcnt. - */ - rq->page_pool = page_pool_create(&pp_params); - if (IS_ERR(rq->page_pool)) { - err = PTR_ERR(rq->page_pool); - rq->page_pool = NULL; - goto err_free; + if (xsk) { + err = mlx5e_xsk_resize_reuseq(umem, num_xsk_frames); + if (unlikely(err)) { + mlx5_core_err(mdev, "Unable to allocate the Reuse Ring for %u frames\n", + num_xsk_frames); + goto err_free; + } + + rq->zca.free = mlx5e_xsk_zca_free; + err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, + MEM_TYPE_ZERO_COPY, + &rq->zca); + } else { + /* Create a page_pool and register it with rxq */ + pp_params.order = 0; + pp_params.flags = 0; /* No-internal DMA mapping in page_pool */ + pp_params.pool_size = pool_size; + pp_params.nid = cpu_to_node(c->cpu); + pp_params.dev = c->pdev; + pp_params.dma_dir = rq->buff.map_dir; + + /* page_pool can be used even when there is no rq->xdp_prog, + * given page_pool does not handle DMA mapping there is no + * required state to clear. And page_pool gracefully handle + * elevated refcnt. + */ + rq->page_pool = page_pool_create(&pp_params); + if (IS_ERR(rq->page_pool)) { + err = PTR_ERR(rq->page_pool); + rq->page_pool = NULL; + goto err_free; + } + err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, + MEM_TYPE_PAGE_POOL, rq->page_pool); + if (err) + page_pool_free(rq->page_pool); } - err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, - MEM_TYPE_PAGE_POOL, rq->page_pool); - if (err) { - page_pool_free(rq->page_pool); + if (err) goto err_free; - } for (i = 0; i < wq_sz; i++) { if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { @@ -639,7 +670,11 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq) i = (i + 1) & (MLX5E_CACHE_SIZE - 1)) { struct mlx5e_dma_info *dma_info = &rq->page_cache.page_cache[i]; - mlx5e_page_release(rq, dma_info, false); + /* With AF_XDP, page_cache is not used, so this loop is not + * entered, and it's safe to call mlx5e_page_release_dynamic + * directly. + */ + mlx5e_page_release_dynamic(rq, dma_info, false); } xdp_rxq_info_unreg(&rq->xdp_rxq); @@ -776,7 +811,7 @@ static void mlx5e_destroy_rq(struct mlx5e_rq *rq) mlx5_core_destroy_rq(rq->mdev, rq->rqn); } -static int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time) +int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time) { unsigned long exp_time = jiffies + msecs_to_jiffies(wait_time); struct mlx5e_channel *c = rq->channel; @@ -834,14 +869,13 @@ static void mlx5e_free_rx_descs(struct mlx5e_rq *rq) } -static int mlx5e_open_rq(struct mlx5e_channel *c, - struct mlx5e_params *params, - struct mlx5e_rq_param *param, - struct mlx5e_rq *rq) +int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_rq_param *param, struct mlx5e_xsk_param *xsk, + struct xdp_umem *umem, struct mlx5e_rq *rq) { int err; - err = mlx5e_alloc_rq(c, params, param, rq); + err = mlx5e_alloc_rq(c, params, xsk, umem, param, rq); if (err) return err; @@ -879,13 +913,13 @@ static void mlx5e_activate_rq(struct mlx5e_rq *rq) mlx5e_trigger_irq(&rq->channel->icosq); } -static void mlx5e_deactivate_rq(struct mlx5e_rq *rq) +void mlx5e_deactivate_rq(struct mlx5e_rq *rq) { clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); napi_synchronize(&rq->channel->napi); /* prevent mlx5e_post_rx_wqes */ } -static void mlx5e_close_rq(struct mlx5e_rq *rq) +void mlx5e_close_rq(struct mlx5e_rq *rq) { cancel_work_sync(&rq->dim.work); mlx5e_destroy_rq(rq); @@ -938,6 +972,7 @@ static int mlx5e_alloc_xdpsq_db(struct mlx5e_xdpsq *sq, int numa) static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct xdp_umem *umem, struct mlx5e_sq_param *param, struct mlx5e_xdpsq *sq, bool is_redirect) @@ -953,9 +988,13 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c, sq->uar_map = mdev->mlx5e_res.bfreg.map; sq->min_inline_mode = params->tx_min_inline_mode; sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); - sq->stats = is_redirect ? - &c->priv->channel_stats[c->ix].xdpsq : - &c->priv->channel_stats[c->ix].rq_xdpsq; + sq->umem = umem; + + sq->stats = sq->umem ? + &c->priv->channel_stats[c->ix].xsksq : + is_redirect ? + &c->priv->channel_stats[c->ix].xdpsq : + &c->priv->channel_stats[c->ix].rq_xdpsq; param->wq.db_numa_node = cpu_to_node(c->cpu); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); @@ -1335,10 +1374,8 @@ static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work) mlx5e_tx_reporter_err_cqe(sq); } -static int mlx5e_open_icosq(struct mlx5e_channel *c, - struct mlx5e_params *params, - struct mlx5e_sq_param *param, - struct mlx5e_icosq *sq) +int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_sq_param *param, struct mlx5e_icosq *sq) { struct mlx5e_create_sq_param csp = {}; int err; @@ -1364,7 +1401,7 @@ err_free_icosq: return err; } -static void mlx5e_close_icosq(struct mlx5e_icosq *sq) +void mlx5e_close_icosq(struct mlx5e_icosq *sq) { struct mlx5e_channel *c = sq->channel; @@ -1375,16 +1412,14 @@ static void mlx5e_close_icosq(struct mlx5e_icosq *sq) mlx5e_free_icosq(sq); } -static int mlx5e_open_xdpsq(struct mlx5e_channel *c, - struct mlx5e_params *params, - struct mlx5e_sq_param *param, - struct mlx5e_xdpsq *sq, - bool is_redirect) +int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_sq_param *param, struct xdp_umem *umem, + struct mlx5e_xdpsq *sq, bool is_redirect) { struct mlx5e_create_sq_param csp = {}; int err; - err = mlx5e_alloc_xdpsq(c, params, param, sq, is_redirect); + err = mlx5e_alloc_xdpsq(c, params, umem, param, sq, is_redirect); if (err) return err; @@ -1438,7 +1473,7 @@ err_free_xdpsq: return err; } -static void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq) +void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq) { struct mlx5e_channel *c = sq->channel; @@ -1446,7 +1481,7 @@ static void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq) napi_synchronize(&c->napi); mlx5e_destroy_sq(c->mdev, sq->sqn); - mlx5e_free_xdpsq_descs(sq, rq); + mlx5e_free_xdpsq_descs(sq); mlx5e_free_xdpsq(sq); } @@ -1567,10 +1602,8 @@ static void mlx5e_destroy_cq(struct mlx5e_cq *cq) mlx5_core_destroy_cq(cq->mdev, &cq->mcq); } -static int mlx5e_open_cq(struct mlx5e_channel *c, - struct net_dim_cq_moder moder, - struct mlx5e_cq_param *param, - struct mlx5e_cq *cq) +int mlx5e_open_cq(struct mlx5e_channel *c, struct net_dim_cq_moder moder, + struct mlx5e_cq_param *param, struct mlx5e_cq *cq) { struct mlx5_core_dev *mdev = c->mdev; int err; @@ -1593,7 +1626,7 @@ err_free_cq: return err; } -static void mlx5e_close_cq(struct mlx5e_cq *cq) +void mlx5e_close_cq(struct mlx5e_cq *cq) { mlx5e_destroy_cq(cq); mlx5e_free_cq(cq); @@ -1767,49 +1800,16 @@ static void mlx5e_free_xps_cpumask(struct mlx5e_channel *c) free_cpumask_var(c->xps_cpumask); } -static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, - struct mlx5e_params *params, - struct mlx5e_channel_param *cparam, - struct mlx5e_channel **cp) +static int mlx5e_open_queues(struct mlx5e_channel *c, + struct mlx5e_params *params, + struct mlx5e_channel_param *cparam) { - int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, ix)); struct net_dim_cq_moder icocq_moder = {0, 0}; - struct net_device *netdev = priv->netdev; - struct mlx5e_channel *c; - unsigned int irq; int err; - int eqn; - - err = mlx5_vector2eqn(priv->mdev, ix, &eqn, &irq); - if (err) - return err; - - c = kvzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu)); - if (!c) - return -ENOMEM; - - c->priv = priv; - c->mdev = priv->mdev; - c->tstamp = &priv->tstamp; - c->ix = ix; - c->cpu = cpu; - c->pdev = priv->mdev->device; - c->netdev = priv->netdev; - c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key); - c->num_tc = params->num_tc; - c->xdp = !!params->xdp_prog; - c->stats = &priv->channel_stats[ix].ch; - c->irq_desc = irq_to_desc(irq); - - err = mlx5e_alloc_xps_cpumask(c, params); - if (err) - goto err_free_channel; - - netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64); err = mlx5e_open_cq(c, icocq_moder, &cparam->icosq_cq, &c->icosq.cq); if (err) - goto err_napi_del; + return err; err = mlx5e_open_tx_cqs(c, params, cparam); if (err) @@ -1825,7 +1825,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, /* XDP SQ CQ params are same as normal TXQ sq CQ params */ err = c->xdp ? mlx5e_open_cq(c, params->tx_cq_moderation, - &cparam->tx_cq, &c->rq.xdpsq.cq) : 0; + &cparam->tx_cq, &c->rq_xdpsq.cq) : 0; if (err) goto err_close_rx_cq; @@ -1839,20 +1839,21 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, if (err) goto err_close_icosq; - err = c->xdp ? mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, &c->rq.xdpsq, false) : 0; - if (err) - goto err_close_sqs; + if (c->xdp) { + err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, NULL, + &c->rq_xdpsq, false); + if (err) + goto err_close_sqs; + } - err = mlx5e_open_rq(c, params, &cparam->rq, &c->rq); + err = mlx5e_open_rq(c, params, &cparam->rq, NULL, NULL, &c->rq); if (err) goto err_close_xdp_sq; - err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, &c->xdpsq, true); + err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, NULL, &c->xdpsq, true); if (err) goto err_close_rq; - *cp = c; - return 0; err_close_rq: @@ -1860,7 +1861,7 @@ err_close_rq: err_close_xdp_sq: if (c->xdp) - mlx5e_close_xdpsq(&c->rq.xdpsq, &c->rq); + mlx5e_close_xdpsq(&c->rq_xdpsq); err_close_sqs: mlx5e_close_sqs(c); @@ -1870,8 +1871,9 @@ err_close_icosq: err_disable_napi: napi_disable(&c->napi); + if (c->xdp) - mlx5e_close_cq(&c->rq.xdpsq.cq); + mlx5e_close_cq(&c->rq_xdpsq.cq); err_close_rx_cq: mlx5e_close_cq(&c->rq.cq); @@ -1885,6 +1887,85 @@ err_close_tx_cqs: err_close_icosq_cq: mlx5e_close_cq(&c->icosq.cq); + return err; +} + +static void mlx5e_close_queues(struct mlx5e_channel *c) +{ + mlx5e_close_xdpsq(&c->xdpsq); + mlx5e_close_rq(&c->rq); + if (c->xdp) + mlx5e_close_xdpsq(&c->rq_xdpsq); + mlx5e_close_sqs(c); + mlx5e_close_icosq(&c->icosq); + napi_disable(&c->napi); + if (c->xdp) + mlx5e_close_cq(&c->rq_xdpsq.cq); + mlx5e_close_cq(&c->rq.cq); + mlx5e_close_cq(&c->xdpsq.cq); + mlx5e_close_tx_cqs(c); + mlx5e_close_cq(&c->icosq.cq); +} + +static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, + struct mlx5e_params *params, + struct mlx5e_channel_param *cparam, + struct xdp_umem *umem, + struct mlx5e_channel **cp) +{ + int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, ix)); + struct net_device *netdev = priv->netdev; + struct mlx5e_xsk_param xsk; + struct mlx5e_channel *c; + unsigned int irq; + int err; + int eqn; + + err = mlx5_vector2eqn(priv->mdev, ix, &eqn, &irq); + if (err) + return err; + + c = kvzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu)); + if (!c) + return -ENOMEM; + + c->priv = priv; + c->mdev = priv->mdev; + c->tstamp = &priv->tstamp; + c->ix = ix; + c->cpu = cpu; + c->pdev = priv->mdev->device; + c->netdev = priv->netdev; + c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key); + c->num_tc = params->num_tc; + c->xdp = !!params->xdp_prog; + c->stats = &priv->channel_stats[ix].ch; + c->irq_desc = irq_to_desc(irq); + + err = mlx5e_alloc_xps_cpumask(c, params); + if (err) + goto err_free_channel; + + netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64); + + err = mlx5e_open_queues(c, params, cparam); + if (unlikely(err)) + goto err_napi_del; + + if (umem) { + mlx5e_build_xsk_param(umem, &xsk); + err = mlx5e_open_xsk(priv, params, &xsk, umem, c); + if (unlikely(err)) + goto err_close_queues; + } + + *cp = c; + + return 0; + +err_close_queues: + mlx5e_close_queues(c); + err_napi_del: netif_napi_del(&c->napi); mlx5e_free_xps_cpumask(c); @@ -1903,12 +1984,18 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c) mlx5e_activate_txqsq(&c->sq[tc]); mlx5e_activate_rq(&c->rq); netif_set_xps_queue(c->netdev, c->xps_cpumask, c->ix); + + if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) + mlx5e_activate_xsk(c); } static void mlx5e_deactivate_channel(struct mlx5e_channel *c) { int tc; + if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) + mlx5e_deactivate_xsk(c); + mlx5e_deactivate_rq(&c->rq); for (tc = 0; tc < c->num_tc; tc++) mlx5e_deactivate_txqsq(&c->sq[tc]); @@ -1916,19 +2003,9 @@ static void mlx5e_deactivate_channel(struct mlx5e_channel *c) static void mlx5e_close_channel(struct mlx5e_channel *c) { - mlx5e_close_xdpsq(&c->xdpsq, NULL); - mlx5e_close_rq(&c->rq); - if (c->xdp) - mlx5e_close_xdpsq(&c->rq.xdpsq, &c->rq); - mlx5e_close_sqs(c); - mlx5e_close_icosq(&c->icosq); - napi_disable(&c->napi); - if (c->xdp) - mlx5e_close_cq(&c->rq.xdpsq.cq); - mlx5e_close_cq(&c->rq.cq); - mlx5e_close_cq(&c->xdpsq.cq); - mlx5e_close_tx_cqs(c); - mlx5e_close_cq(&c->icosq.cq); + if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) + mlx5e_close_xsk(c); + mlx5e_close_queues(c); netif_napi_del(&c->napi); mlx5e_free_xps_cpumask(c); @@ -1939,6 +2016,7 @@ static void mlx5e_close_channel(struct mlx5e_channel *c) static void mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, struct mlx5e_rq_frags_info *info) { u32 byte_count = MLX5E_SW2HW_MTU(params, params->sw_mtu); @@ -1951,10 +2029,10 @@ static void mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, byte_count += MLX5E_METADATA_ETHER_LEN; #endif - if (mlx5e_rx_is_linear_skb(params)) { + if (mlx5e_rx_is_linear_skb(params, xsk)) { int frag_stride; - frag_stride = mlx5e_rx_get_linear_frag_sz(params); + frag_stride = mlx5e_rx_get_linear_frag_sz(params, xsk); frag_stride = roundup_pow_of_two(frag_stride); info->arr[0].frag_size = byte_count; @@ -2012,9 +2090,10 @@ static u8 mlx5e_get_rq_log_wq_sz(void *rqc) return MLX5_GET(wq, wq, log_wq_sz); } -static void mlx5e_build_rq_param(struct mlx5e_priv *priv, - struct mlx5e_params *params, - struct mlx5e_rq_param *param) +void mlx5e_build_rq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5e_rq_param *param) { struct mlx5_core_dev *mdev = priv->mdev; void *rqc = param->rqc; @@ -2024,16 +2103,16 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv, switch (params->rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: MLX5_SET(wq, wq, log_wqe_num_of_strides, - mlx5e_mpwqe_get_log_num_strides(mdev, params) - + mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk) - MLX5_MPWQE_LOG_NUM_STRIDES_BASE); MLX5_SET(wq, wq, log_wqe_stride_size, - mlx5e_mpwqe_get_log_stride_size(mdev, params) - + mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk) - MLX5_MPWQE_LOG_STRIDE_SZ_BASE); - MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(params)); + MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(params, xsk)); break; default: /* MLX5_WQ_TYPE_CYCLIC */ MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames); - mlx5e_build_rq_frags_info(mdev, params, ¶m->frags_info); + mlx5e_build_rq_frags_info(mdev, params, xsk, ¶m->frags_info); ndsegs = param->frags_info.num_frags; } @@ -2064,8 +2143,8 @@ static void mlx5e_build_drop_rq_param(struct mlx5e_priv *priv, param->wq.buf_numa_node = dev_to_node(mdev->device); } -static void mlx5e_build_sq_param_common(struct mlx5e_priv *priv, - struct mlx5e_sq_param *param) +void mlx5e_build_sq_param_common(struct mlx5e_priv *priv, + struct mlx5e_sq_param *param) { void *sqc = param->sqc; void *wq = MLX5_ADDR_OF(sqc, sqc, wq); @@ -2101,9 +2180,10 @@ static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, MLX5_SET(cqc, cqc, cqe_sz, CQE_STRIDE_128_PAD); } -static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, - struct mlx5e_params *params, - struct mlx5e_cq_param *param) +void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5e_cq_param *param) { struct mlx5_core_dev *mdev = priv->mdev; void *cqc = param->cqc; @@ -2111,8 +2191,8 @@ static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, switch (params->rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - log_cq_size = mlx5e_mpwqe_get_log_rq_size(params) + - mlx5e_mpwqe_get_log_num_strides(mdev, params); + log_cq_size = mlx5e_mpwqe_get_log_rq_size(params, xsk) + + mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk); break; default: /* MLX5_WQ_TYPE_CYCLIC */ log_cq_size = params->log_rq_mtu_frames; @@ -2128,9 +2208,9 @@ static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, param->cq_period_mode = params->rx_cq_moderation.cq_period_mode; } -static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, - struct mlx5e_params *params, - struct mlx5e_cq_param *param) +void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, + struct mlx5e_cq_param *param) { void *cqc = param->cqc; @@ -2140,9 +2220,9 @@ static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, param->cq_period_mode = params->tx_cq_moderation.cq_period_mode; } -static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv, - u8 log_wq_size, - struct mlx5e_cq_param *param) +void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv, + u8 log_wq_size, + struct mlx5e_cq_param *param) { void *cqc = param->cqc; @@ -2153,9 +2233,9 @@ static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv, param->cq_period_mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE; } -static void mlx5e_build_icosq_param(struct mlx5e_priv *priv, - u8 log_wq_size, - struct mlx5e_sq_param *param) +void mlx5e_build_icosq_param(struct mlx5e_priv *priv, + u8 log_wq_size, + struct mlx5e_sq_param *param) { void *sqc = param->sqc; void *wq = MLX5_ADDR_OF(sqc, sqc, wq); @@ -2166,9 +2246,9 @@ static void mlx5e_build_icosq_param(struct mlx5e_priv *priv, MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(priv->mdev, reg_umr_sq)); } -static void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv, - struct mlx5e_params *params, - struct mlx5e_sq_param *param) +void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, + struct mlx5e_sq_param *param) { void *sqc = param->sqc; void *wq = MLX5_ADDR_OF(sqc, sqc, wq); @@ -2196,14 +2276,14 @@ static void mlx5e_build_channel_param(struct mlx5e_priv *priv, { u8 icosq_log_wq_sz; - mlx5e_build_rq_param(priv, params, &cparam->rq); + mlx5e_build_rq_param(priv, params, NULL, &cparam->rq); icosq_log_wq_sz = mlx5e_build_icosq_log_wq_sz(params, &cparam->rq); mlx5e_build_sq_param(priv, params, &cparam->sq); mlx5e_build_xdpsq_param(priv, params, &cparam->xdp_sq); mlx5e_build_icosq_param(priv, icosq_log_wq_sz, &cparam->icosq); - mlx5e_build_rx_cq_param(priv, params, &cparam->rx_cq); + mlx5e_build_rx_cq_param(priv, params, NULL, &cparam->rx_cq); mlx5e_build_tx_cq_param(priv, params, &cparam->tx_cq); mlx5e_build_ico_cq_param(priv, icosq_log_wq_sz, &cparam->icosq_cq); } @@ -2224,7 +2304,12 @@ int mlx5e_open_channels(struct mlx5e_priv *priv, mlx5e_build_channel_param(priv, &chs->params, cparam); for (i = 0; i < chs->num; i++) { - err = mlx5e_open_channel(priv, i, &chs->params, cparam, &chs->c[i]); + struct xdp_umem *umem = NULL; + + if (chs->params.xdp_prog) + umem = mlx5e_xsk_get_umem(&chs->params, chs->params.xsk, i); + + err = mlx5e_open_channel(priv, i, &chs->params, cparam, umem, &chs->c[i]); if (err) goto err_close_channels; } @@ -2266,6 +2351,10 @@ static int mlx5e_wait_channels_min_rx_wqes(struct mlx5e_channels *chs) int timeout = err ? 0 : MLX5E_RQ_WQES_TIMEOUT; err |= mlx5e_wait_for_min_rx_wqes(&chs->c[i]->rq, timeout); + + /* Don't wait on the XSK RQ, because the newer xdpsock sample + * doesn't provide any Fill Ring entries at the setup stage. + */ } return err ? -ETIMEDOUT : 0; @@ -2338,35 +2427,35 @@ int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv) return err; } -int mlx5e_create_direct_rqts(struct mlx5e_priv *priv) +int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs) { - struct mlx5e_rqt *rqt; + const int max_nch = mlx5e_get_netdev_max_channels(priv->netdev); int err; int ix; - for (ix = 0; ix < mlx5e_get_netdev_max_channels(priv->netdev); ix++) { - rqt = &priv->direct_tir[ix].rqt; - err = mlx5e_create_rqt(priv, 1 /*size */, rqt); - if (err) + for (ix = 0; ix < max_nch; ix++) { + err = mlx5e_create_rqt(priv, 1 /*size */, &tirs[ix].rqt); + if (unlikely(err)) goto err_destroy_rqts; } return 0; err_destroy_rqts: - mlx5_core_warn(priv->mdev, "create direct rqts failed, %d\n", err); + mlx5_core_warn(priv->mdev, "create rqts failed, %d\n", err); for (ix--; ix >= 0; ix--) - mlx5e_destroy_rqt(priv, &priv->direct_tir[ix].rqt); + mlx5e_destroy_rqt(priv, &tirs[ix].rqt); return err; } -void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv) +void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs) { + const int max_nch = mlx5e_get_netdev_max_channels(priv->netdev); int i; - for (i = 0; i < mlx5e_get_netdev_max_channels(priv->netdev); i++) - mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt); + for (i = 0; i < max_nch; i++) + mlx5e_destroy_rqt(priv, &tirs[i].rqt); } static int mlx5e_rx_hash_fn(int hfunc) @@ -2786,11 +2875,12 @@ static void mlx5e_build_tx2sq_maps(struct mlx5e_priv *priv) void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) { int num_txqs = priv->channels.num * priv->channels.params.num_tc; + int num_rxqs = priv->channels.num * MLX5E_NUM_RQ_GROUPS; struct net_device *netdev = priv->netdev; mlx5e_netdev_set_tcs(netdev); netif_set_real_num_tx_queues(netdev, num_txqs); - netif_set_real_num_rx_queues(netdev, priv->channels.num); + netif_set_real_num_rx_queues(netdev, num_rxqs); mlx5e_build_tx2sq_maps(priv); mlx5e_activate_channels(&priv->channels); @@ -2802,10 +2892,14 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) mlx5e_wait_channels_min_rx_wqes(&priv->channels); mlx5e_redirect_rqts_to_channels(priv, &priv->channels); + + mlx5e_xsk_redirect_rqts_to_channels(priv, &priv->channels); } void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv) { + mlx5e_xsk_redirect_rqts_to_drop(priv, &priv->channels); + mlx5e_redirect_rqts_to_drop(priv); if (mlx5e_is_vport_rep(priv)) @@ -2884,9 +2978,12 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv) int mlx5e_open_locked(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); + bool is_xdp = priv->channels.params.xdp_prog; int err; set_bit(MLX5E_STATE_OPENED, &priv->state); + if (is_xdp) + mlx5e_xdp_set_open(priv); err = mlx5e_open_channels(priv, &priv->channels); if (err) @@ -2901,6 +2998,8 @@ int mlx5e_open_locked(struct net_device *netdev) return 0; err_clear_state_opened_flag: + if (is_xdp) + mlx5e_xdp_set_closed(priv); clear_bit(MLX5E_STATE_OPENED, &priv->state); return err; } @@ -2932,6 +3031,8 @@ int mlx5e_close_locked(struct net_device *netdev) if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) return 0; + if (priv->channels.params.xdp_prog) + mlx5e_xdp_set_closed(priv); clear_bit(MLX5E_STATE_OPENED, &priv->state); netif_carrier_off(priv->netdev); @@ -3188,13 +3289,13 @@ err_destroy_inner_tirs: return err; } -int mlx5e_create_direct_tirs(struct mlx5e_priv *priv) +int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs) { - int nch = mlx5e_get_netdev_max_channels(priv->netdev); + const int max_nch = mlx5e_get_netdev_max_channels(priv->netdev); struct mlx5e_tir *tir; void *tirc; int inlen; - int err; + int err = 0; u32 *in; int ix; @@ -3203,25 +3304,24 @@ int mlx5e_create_direct_tirs(struct mlx5e_priv *priv) if (!in) return -ENOMEM; - for (ix = 0; ix < nch; ix++) { + for (ix = 0; ix < max_nch; ix++) { memset(in, 0, inlen); - tir = &priv->direct_tir[ix]; + tir = &tirs[ix]; tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); - mlx5e_build_direct_tir_ctx(priv, priv->direct_tir[ix].rqt.rqtn, tirc); + mlx5e_build_direct_tir_ctx(priv, tir->rqt.rqtn, tirc); err = mlx5e_create_tir(priv->mdev, tir, in, inlen); - if (err) + if (unlikely(err)) goto err_destroy_ch_tirs; } - kvfree(in); - - return 0; + goto out; err_destroy_ch_tirs: - mlx5_core_warn(priv->mdev, "create direct tirs failed, %d\n", err); + mlx5_core_warn(priv->mdev, "create tirs failed, %d\n", err); for (ix--; ix >= 0; ix--) - mlx5e_destroy_tir(priv->mdev, &priv->direct_tir[ix]); + mlx5e_destroy_tir(priv->mdev, &tirs[ix]); +out: kvfree(in); return err; @@ -3241,13 +3341,13 @@ void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc) mlx5e_destroy_tir(priv->mdev, &priv->inner_indir_tir[i]); } -void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv) +void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs) { - int nch = mlx5e_get_netdev_max_channels(priv->netdev); + const int max_nch = mlx5e_get_netdev_max_channels(priv->netdev); int i; - for (i = 0; i < nch; i++) - mlx5e_destroy_tir(priv->mdev, &priv->direct_tir[i]); + for (i = 0; i < max_nch; i++) + mlx5e_destroy_tir(priv->mdev, &tirs[i]); } static int mlx5e_modify_channels_scatter_fcs(struct mlx5e_channels *chs, bool enable) @@ -3389,11 +3489,12 @@ void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s) for (i = 0; i < mlx5e_get_netdev_max_channels(priv->netdev); i++) { struct mlx5e_channel_stats *channel_stats = &priv->channel_stats[i]; + struct mlx5e_rq_stats *xskrq_stats = &channel_stats->xskrq; struct mlx5e_rq_stats *rq_stats = &channel_stats->rq; int j; - s->rx_packets += rq_stats->packets; - s->rx_bytes += rq_stats->bytes; + s->rx_packets += rq_stats->packets + xskrq_stats->packets; + s->rx_bytes += rq_stats->bytes + xskrq_stats->bytes; for (j = 0; j < priv->max_opened_tc; j++) { struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j]; @@ -3492,6 +3593,13 @@ static int set_feature_lro(struct net_device *netdev, bool enable) mutex_lock(&priv->state_lock); + if (enable && priv->xsk.refcnt) { + netdev_warn(netdev, "LRO is incompatible with AF_XDP (%hu XSKs are active)\n", + priv->xsk.refcnt); + err = -EINVAL; + goto out; + } + old_params = &priv->channels.params; if (enable && !MLX5E_GET_PFLAG(old_params, MLX5E_PFLAG_RX_STRIDING_RQ)) { netdev_warn(netdev, "can't set LRO with legacy RQ\n"); @@ -3505,8 +3613,8 @@ static int set_feature_lro(struct net_device *netdev, bool enable) new_channels.params.lro_en = enable; if (old_params->rq_wq_type != MLX5_WQ_TYPE_CYCLIC) { - if (mlx5e_rx_mpwqe_is_linear_skb(mdev, old_params) == - mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_channels.params)) + if (mlx5e_rx_mpwqe_is_linear_skb(mdev, old_params, NULL) == + mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_channels.params, NULL)) reset = false; } @@ -3696,6 +3804,43 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev, return features; } +static bool mlx5e_xsk_validate_mtu(struct net_device *netdev, + struct mlx5e_channels *chs, + struct mlx5e_params *new_params, + struct mlx5_core_dev *mdev) +{ + u16 ix; + + for (ix = 0; ix < chs->params.num_channels; ix++) { + struct xdp_umem *umem = mlx5e_xsk_get_umem(&chs->params, chs->params.xsk, ix); + struct mlx5e_xsk_param xsk; + + if (!umem) + continue; + + mlx5e_build_xsk_param(umem, &xsk); + + if (!mlx5e_validate_xsk_param(new_params, &xsk, mdev)) { + u32 hr = mlx5e_get_linear_rq_headroom(new_params, &xsk); + int max_mtu_frame, max_mtu_page, max_mtu; + + /* Two criteria must be met: + * 1. HW MTU + all headrooms <= XSK frame size. + * 2. Size of SKBs allocated on XDP_PASS <= PAGE_SIZE. + */ + max_mtu_frame = MLX5E_HW2SW_MTU(new_params, xsk.chunk_size - hr); + max_mtu_page = mlx5e_xdp_max_mtu(new_params, &xsk); + max_mtu = min(max_mtu_frame, max_mtu_page); + + netdev_err(netdev, "MTU %d is too big for an XSK running on channel %hu. Try MTU <= %d\n", + new_params->sw_mtu, ix, max_mtu); + return false; + } + } + + return true; +} + int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, change_hw_mtu_cb set_mtu_cb) { @@ -3716,18 +3861,31 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, new_channels.params.sw_mtu = new_mtu; if (params->xdp_prog && - !mlx5e_rx_is_linear_skb(&new_channels.params)) { + !mlx5e_rx_is_linear_skb(&new_channels.params, NULL)) { netdev_err(netdev, "MTU(%d) > %d is not allowed while XDP enabled\n", - new_mtu, mlx5e_xdp_max_mtu(params)); + new_mtu, mlx5e_xdp_max_mtu(params, NULL)); + err = -EINVAL; + goto out; + } + + if (priv->xsk.refcnt && + !mlx5e_xsk_validate_mtu(netdev, &priv->channels, + &new_channels.params, priv->mdev)) { err = -EINVAL; goto out; } if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { - bool is_linear = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev, &new_channels.params); - u8 ppw_old = mlx5e_mpwqe_log_pkts_per_wqe(params); - u8 ppw_new = mlx5e_mpwqe_log_pkts_per_wqe(&new_channels.params); + bool is_linear = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev, + &new_channels.params, + NULL); + u8 ppw_old = mlx5e_mpwqe_log_pkts_per_wqe(params, NULL); + u8 ppw_new = mlx5e_mpwqe_log_pkts_per_wqe(&new_channels.params, NULL); + + /* If XSK is active, XSK RQs are linear. */ + is_linear |= priv->xsk.refcnt; + /* Always reset in linear mode - hw_mtu is used in data path. */ reset = reset && (is_linear || (ppw_old != ppw_new)); } @@ -4160,16 +4318,29 @@ static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog) new_channels.params = priv->channels.params; new_channels.params.xdp_prog = prog; - if (!mlx5e_rx_is_linear_skb(&new_channels.params)) { + /* No XSK params: AF_XDP can't be enabled yet at the point of setting + * the XDP program. + */ + if (!mlx5e_rx_is_linear_skb(&new_channels.params, NULL)) { netdev_warn(netdev, "XDP is not allowed with MTU(%d) > %d\n", new_channels.params.sw_mtu, - mlx5e_xdp_max_mtu(&new_channels.params)); + mlx5e_xdp_max_mtu(&new_channels.params, NULL)); return -EINVAL; } return 0; } +static int mlx5e_xdp_update_state(struct mlx5e_priv *priv) +{ + if (priv->channels.params.xdp_prog) + mlx5e_xdp_set_open(priv); + else + mlx5e_xdp_set_closed(priv); + + return 0; +} + static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -4190,8 +4361,6 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) /* no need for full reset when exchanging programs */ reset = (!priv->channels.params.xdp_prog || !prog); - if (was_opened && reset) - mlx5e_close_locked(netdev); if (was_opened && !reset) { /* num_channels is invariant here, so we can take the * batched reference right upfront. @@ -4203,20 +4372,31 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) } } - /* exchange programs, extra prog reference we got from caller - * as long as we don't fail from this point onwards. - */ - old_prog = xchg(&priv->channels.params.xdp_prog, prog); + if (was_opened && reset) { + struct mlx5e_channels new_channels = {}; + + new_channels.params = priv->channels.params; + new_channels.params.xdp_prog = prog; + mlx5e_set_rq_type(priv->mdev, &new_channels.params); + old_prog = priv->channels.params.xdp_prog; + + err = mlx5e_safe_switch_channels(priv, &new_channels, mlx5e_xdp_update_state); + if (err) + goto unlock; + } else { + /* exchange programs, extra prog reference we got from caller + * as long as we don't fail from this point onwards. + */ + old_prog = xchg(&priv->channels.params.xdp_prog, prog); + } + if (old_prog) bpf_prog_put(old_prog); - if (reset) /* change RQ type according to priv->xdp_prog */ + if (!was_opened && reset) /* change RQ type according to priv->xdp_prog */ mlx5e_set_rq_type(priv->mdev, &priv->channels.params); - if (was_opened && reset) - err = mlx5e_open_locked(netdev); - - if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || reset) + if (!was_opened || reset) goto unlock; /* exchanging programs w/o reset, we update ref counts on behalf @@ -4224,19 +4404,29 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) */ for (i = 0; i < priv->channels.num; i++) { struct mlx5e_channel *c = priv->channels.c[i]; + bool xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state); clear_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state); + if (xsk_open) + clear_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); napi_synchronize(&c->napi); /* prevent mlx5e_poll_rx_cq from accessing rq->xdp_prog */ old_prog = xchg(&c->rq.xdp_prog, prog); + if (old_prog) + bpf_prog_put(old_prog); + + if (xsk_open) { + old_prog = xchg(&c->xskrq.xdp_prog, prog); + if (old_prog) + bpf_prog_put(old_prog); + } set_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state); + if (xsk_open) + set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); /* napi_schedule in case we have missed anything */ napi_schedule(&c->napi); - - if (old_prog) - bpf_prog_put(old_prog); } unlock: @@ -4267,6 +4457,9 @@ static int mlx5e_xdp(struct net_device *dev, struct netdev_bpf *xdp) case XDP_QUERY_PROG: xdp->prog_id = mlx5e_xdp_query(dev); return 0; + case XDP_SETUP_XSK_UMEM: + return mlx5e_xsk_setup_umem(dev, xdp->xsk.umem, + xdp->xsk.queue_id); default: return -EINVAL; } @@ -4349,6 +4542,7 @@ const struct net_device_ops mlx5e_netdev_ops = { .ndo_tx_timeout = mlx5e_tx_timeout, .ndo_bpf = mlx5e_xdp, .ndo_xdp_xmit = mlx5e_xdp_xmit, + .ndo_xsk_async_xmit = mlx5e_xsk_async_xmit, #ifdef CONFIG_MLX5_EN_ARFS .ndo_rx_flow_steer = mlx5e_rx_flow_steer, #endif @@ -4500,11 +4694,13 @@ void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, * - Striding RQ configuration is not possible/supported. * - Slow PCI heuristic. * - Legacy RQ would use linear SKB while Striding RQ would use non-linear. + * + * No XSK params: checking the availability of striding RQ in general. */ if (!slow_pci_heuristic(mdev) && mlx5e_striding_rq_possible(mdev, params) && - (mlx5e_rx_mpwqe_is_linear_skb(mdev, params) || - !mlx5e_rx_is_linear_skb(params))) + (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) || + !mlx5e_rx_is_linear_skb(params, NULL))) MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, true); mlx5e_set_rq_type(mdev, params); mlx5e_init_rq_type_params(mdev, params); @@ -4526,6 +4722,7 @@ void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params, } void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, + struct mlx5e_xsk *xsk, struct mlx5e_rss_params *rss_params, struct mlx5e_params *params, u16 max_channels, u16 mtu) @@ -4561,9 +4758,11 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, /* HW LRO */ /* TODO: && MLX5_CAP_ETH(mdev, lro_cap) */ - if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) - if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params)) + if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { + /* No XSK params: checking the availability of striding RQ in general. */ + if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL)) params->lro_en = !slow_pci_heuristic(mdev); + } params->lro_timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT); /* CQ moderation params */ @@ -4582,6 +4781,9 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, mlx5e_build_rss_params(rss_params, params->num_channels); params->tunneled_offload_en = mlx5e_tunnel_inner_ft_supported(mdev); + + /* AF_XDP */ + params->xsk = xsk; } static void mlx5e_set_netdev_dev_addr(struct net_device *netdev) @@ -4754,7 +4956,7 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev, if (err) return err; - mlx5e_build_nic_params(mdev, rss, &priv->channels.params, + mlx5e_build_nic_params(mdev, &priv->xsk, rss, &priv->channels.params, mlx5e_get_netdev_max_channels(netdev), netdev->mtu); @@ -4796,7 +4998,7 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) if (err) goto err_close_drop_rq; - err = mlx5e_create_direct_rqts(priv); + err = mlx5e_create_direct_rqts(priv, priv->direct_tir); if (err) goto err_destroy_indirect_rqts; @@ -4804,14 +5006,22 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) if (err) goto err_destroy_direct_rqts; - err = mlx5e_create_direct_tirs(priv); + err = mlx5e_create_direct_tirs(priv, priv->direct_tir); if (err) goto err_destroy_indirect_tirs; + err = mlx5e_create_direct_rqts(priv, priv->xsk_tir); + if (unlikely(err)) + goto err_destroy_direct_tirs; + + err = mlx5e_create_direct_tirs(priv, priv->xsk_tir); + if (unlikely(err)) + goto err_destroy_xsk_rqts; + err = mlx5e_create_flow_steering(priv); if (err) { mlx5_core_warn(mdev, "create flow steering failed, %d\n", err); - goto err_destroy_direct_tirs; + goto err_destroy_xsk_tirs; } err = mlx5e_tc_nic_init(priv); @@ -4822,12 +5032,16 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) err_destroy_flow_steering: mlx5e_destroy_flow_steering(priv); +err_destroy_xsk_tirs: + mlx5e_destroy_direct_tirs(priv, priv->xsk_tir); +err_destroy_xsk_rqts: + mlx5e_destroy_direct_rqts(priv, priv->xsk_tir); err_destroy_direct_tirs: - mlx5e_destroy_direct_tirs(priv); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir); err_destroy_indirect_tirs: mlx5e_destroy_indirect_tirs(priv, true); err_destroy_direct_rqts: - mlx5e_destroy_direct_rqts(priv); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir); err_destroy_indirect_rqts: mlx5e_destroy_rqt(priv, &priv->indir_rqt); err_close_drop_rq: @@ -4841,9 +5055,11 @@ static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv) { mlx5e_tc_nic_cleanup(priv); mlx5e_destroy_flow_steering(priv); - mlx5e_destroy_direct_tirs(priv); + mlx5e_destroy_direct_tirs(priv, priv->xsk_tir); + mlx5e_destroy_direct_rqts(priv, priv->xsk_tir); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir); mlx5e_destroy_indirect_tirs(priv, true); - mlx5e_destroy_direct_rqts(priv); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir); mlx5e_destroy_rqt(priv, &priv->indir_rqt); mlx5e_close_drop_rq(&priv->drop_rq); mlx5e_destroy_q_counters(priv); @@ -4993,7 +5209,7 @@ struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev, netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv), nch * profile->max_tc, - nch); + nch * MLX5E_NUM_RQ_GROUPS); if (!netdev) { mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n"); return NULL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 3999da3e6314..8a7f60f30838 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -1519,7 +1519,7 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) if (err) goto err_close_drop_rq; - err = mlx5e_create_direct_rqts(priv); + err = mlx5e_create_direct_rqts(priv, priv->direct_tir); if (err) goto err_destroy_indirect_rqts; @@ -1527,7 +1527,7 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) if (err) goto err_destroy_direct_rqts; - err = mlx5e_create_direct_tirs(priv); + err = mlx5e_create_direct_tirs(priv, priv->direct_tir); if (err) goto err_destroy_indirect_tirs; @@ -1544,11 +1544,11 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) err_destroy_ttc_table: mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); err_destroy_direct_tirs: - mlx5e_destroy_direct_tirs(priv); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir); err_destroy_indirect_tirs: mlx5e_destroy_indirect_tirs(priv, false); err_destroy_direct_rqts: - mlx5e_destroy_direct_rqts(priv); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir); err_destroy_indirect_rqts: mlx5e_destroy_rqt(priv, &priv->indir_rqt); err_close_drop_rq: @@ -1562,9 +1562,9 @@ static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv) mlx5_del_flow_rules(rpriv->vport_rx_rule); mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); - mlx5e_destroy_direct_tirs(priv); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir); mlx5e_destroy_indirect_tirs(priv, false); - mlx5e_destroy_direct_rqts(priv); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir); mlx5e_destroy_rqt(priv, &priv->indir_rqt); mlx5e_close_drop_rq(&priv->drop_rq); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 234a3fd39901..56a2f4666c47 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -47,6 +47,7 @@ #include "en_accel/tls_rxtx.h" #include "lib/clock.h" #include "en/xdp.h" +#include "en/xsk/rx.h" static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config) { @@ -235,8 +236,8 @@ static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq, return true; } -static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq, - struct mlx5e_dma_info *dma_info) +static inline int mlx5e_page_alloc_pool(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) { if (mlx5e_rx_cache_get(rq, dma_info)) return 0; @@ -256,13 +257,23 @@ static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq, return 0; } +static inline int mlx5e_page_alloc(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) +{ + if (rq->umem) + return mlx5e_xsk_page_alloc_umem(rq, dma_info); + else + return mlx5e_page_alloc_pool(rq, dma_info); +} + void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info) { dma_unmap_page(rq->pdev, dma_info->addr, PAGE_SIZE, rq->buff.map_dir); } -void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, - bool recycle) +void mlx5e_page_release_dynamic(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info, + bool recycle) { if (likely(recycle)) { if (mlx5e_rx_cache_put(rq, dma_info)) @@ -277,6 +288,20 @@ void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, } } +static inline void mlx5e_page_release(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info, + bool recycle) +{ + if (rq->umem) + /* The `recycle` parameter is ignored, and the page is always + * put into the Reuse Ring, because there is no way to return + * the page to the userspace when the interface goes down. + */ + mlx5e_xsk_page_release(rq, dma_info); + else + mlx5e_page_release_dynamic(rq, dma_info, recycle); +} + static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *frag) { @@ -288,7 +313,7 @@ static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq, * offset) should just use the new one without replenishing again * by themselves. */ - err = mlx5e_page_alloc_mapped(rq, frag->di); + err = mlx5e_page_alloc(rq, frag->di); return err; } @@ -354,6 +379,13 @@ static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, u8 wqe_bulk) int err; int i; + if (rq->umem) { + int pages_desired = wqe_bulk << rq->wqe.info.log_num_frags; + + if (unlikely(!mlx5e_xsk_pages_enough_umem(rq, pages_desired))) + return -ENOMEM; + } + for (i = 0; i < wqe_bulk; i++) { struct mlx5e_rx_wqe_cyc *wqe = mlx5_wq_cyc_get_wqe(wq, ix + i); @@ -401,11 +433,17 @@ mlx5e_copy_skb_header(struct device *pdev, struct sk_buff *skb, static void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, bool recycle) { - const bool no_xdp_xmit = - bitmap_empty(wi->xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE); + bool no_xdp_xmit; struct mlx5e_dma_info *dma_info = wi->umr.dma_info; int i; + /* A common case for AF_XDP. */ + if (bitmap_full(wi->xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE)) + return; + + no_xdp_xmit = bitmap_empty(wi->xdp_xmit_bitmap, + MLX5_MPWRQ_PAGES_PER_WQE); + for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap)) mlx5e_page_release(rq, &dma_info[i], recycle); @@ -427,11 +465,6 @@ static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq, u8 n) mlx5_wq_ll_update_db_record(wq); } -static inline u16 mlx5e_icosq_wrap_cnt(struct mlx5e_icosq *sq) -{ - return mlx5_wq_cyc_get_ctr_wrap_cnt(&sq->wq, sq->pc); -} - static inline void mlx5e_fill_icosq_frag_edge(struct mlx5e_icosq *sq, struct mlx5_wq_cyc *wq, u16 pi, u16 nnops) @@ -459,6 +492,12 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) int err; int i; + if (rq->umem && + unlikely(!mlx5e_xsk_pages_enough_umem(rq, MLX5_MPWRQ_PAGES_PER_WQE))) { + err = -ENOMEM; + goto err; + } + pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi); if (unlikely(contig_wqebbs_room < MLX5E_UMR_WQEBBS)) { @@ -467,12 +506,10 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) } umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi); - if (unlikely(mlx5e_icosq_wrap_cnt(sq) < 2)) - memcpy(umr_wqe, &rq->mpwqe.umr_wqe, - offsetof(struct mlx5e_umr_wqe, inline_mtts)); + memcpy(umr_wqe, &rq->mpwqe.umr_wqe, offsetof(struct mlx5e_umr_wqe, inline_mtts)); for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++, dma_info++) { - err = mlx5e_page_alloc_mapped(rq, dma_info); + err = mlx5e_page_alloc(rq, dma_info); if (unlikely(err)) goto err_unmap; umr_wqe->inline_mtts[i].ptag = cpu_to_be64(dma_info->addr | MLX5_EN_WR); @@ -487,6 +524,7 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) umr_wqe->uctrl.xlt_offset = cpu_to_be16(xlt_offset); sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_UMR; + sq->db.ico_wqe[pi].umr.rq = rq; sq->pc += MLX5E_UMR_WQEBBS; sq->doorbell_cseg = &umr_wqe->ctrl; @@ -498,6 +536,8 @@ err_unmap: dma_info--; mlx5e_page_release(rq, dma_info, true); } + +err: rq->stats->buff_alloc_err++; return err; @@ -544,11 +584,10 @@ bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) return !!err; } -static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq) +void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) { struct mlx5e_icosq *sq = container_of(cq, struct mlx5e_icosq, cq); struct mlx5_cqe64 *cqe; - u8 completed_umr = 0; u16 sqcc; int i; @@ -589,7 +628,7 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq) if (likely(wi->opcode == MLX5_OPCODE_UMR)) { sqcc += MLX5E_UMR_WQEBBS; - completed_umr++; + wi->umr.rq->mpwqe.umr_completed++; } else if (likely(wi->opcode == MLX5_OPCODE_NOP)) { sqcc++; } else { @@ -605,24 +644,25 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq) sq->cc = sqcc; mlx5_cqwq_update_db_record(&cq->wq); - - if (likely(completed_umr)) { - mlx5e_post_rx_mpwqe(rq, completed_umr); - rq->mpwqe.umr_in_progress -= completed_umr; - } } bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq) { struct mlx5e_icosq *sq = &rq->channel->icosq; struct mlx5_wq_ll *wq = &rq->mpwqe.wq; + u8 umr_completed = rq->mpwqe.umr_completed; + int alloc_err = 0; u8 missing, i; u16 head; if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state))) return false; - mlx5e_poll_ico_cq(&sq->cq, rq); + if (umr_completed) { + mlx5e_post_rx_mpwqe(rq, umr_completed); + rq->mpwqe.umr_in_progress -= umr_completed; + rq->mpwqe.umr_completed = 0; + } missing = mlx5_wq_ll_missing(wq) - rq->mpwqe.umr_in_progress; @@ -636,7 +676,9 @@ bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq) head = rq->mpwqe.actual_wq_head; i = missing; do { - if (unlikely(mlx5e_alloc_rx_mpwqe(rq, head))) + alloc_err = mlx5e_alloc_rx_mpwqe(rq, head); + + if (unlikely(alloc_err)) break; head = mlx5_wq_ll_get_wqe_next_ix(wq, head); } while (--i); @@ -650,6 +692,12 @@ bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq) rq->mpwqe.umr_in_progress += rq->mpwqe.umr_last_bulk; rq->mpwqe.actual_wq_head = head; + /* If XSK Fill Ring doesn't have enough frames, busy poll by + * rescheduling the NAPI poll. + */ + if (unlikely(alloc_err == -ENOMEM && rq->umem)) + return true; + return false; } @@ -1018,7 +1066,7 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, } rcu_read_lock(); - consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt); + consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt, false); rcu_read_unlock(); if (consumed) return NULL; /* page/packet was consumed by XDP */ @@ -1235,7 +1283,7 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, prefetch(data); rcu_read_lock(); - consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt32); + consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt32, false); rcu_read_unlock(); if (consumed) { if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 483d321d2151..5f540db47cc9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -104,7 +104,33 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_poll) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_arm) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_aff_change) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_force_irq) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_eq_rearm) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_csum_complete) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_csum_unnecessary) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_csum_unnecessary_inner) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_csum_none) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_ecn_mark) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_removed_vlan_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_xdp_drop) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_xdp_redirect) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_wqe_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_mpwqe_filler_cqes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_mpwqe_filler_strides) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_oversize_pkts_sw_drop) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_buff_alloc_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_cqe_compress_blks) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_cqe_compress_pkts) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_congst_umr) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_arfs_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_xmit) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_mpwqe) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_inlnw) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_full) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_cqes) }, }; #define NUM_SW_COUNTERS ARRAY_SIZE(sw_stats_desc) @@ -144,6 +170,8 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) &priv->channel_stats[i]; struct mlx5e_xdpsq_stats *xdpsq_red_stats = &channel_stats->xdpsq; struct mlx5e_xdpsq_stats *xdpsq_stats = &channel_stats->rq_xdpsq; + struct mlx5e_xdpsq_stats *xsksq_stats = &channel_stats->xsksq; + struct mlx5e_rq_stats *xskrq_stats = &channel_stats->xskrq; struct mlx5e_rq_stats *rq_stats = &channel_stats->rq; struct mlx5e_ch_stats *ch_stats = &channel_stats->ch; int j; @@ -186,6 +214,7 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) s->ch_poll += ch_stats->poll; s->ch_arm += ch_stats->arm; s->ch_aff_change += ch_stats->aff_change; + s->ch_force_irq += ch_stats->force_irq; s->ch_eq_rearm += ch_stats->eq_rearm; /* xdp redirect */ s->tx_xdp_xmit += xdpsq_red_stats->xmit; @@ -194,6 +223,32 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) s->tx_xdp_full += xdpsq_red_stats->full; s->tx_xdp_err += xdpsq_red_stats->err; s->tx_xdp_cqes += xdpsq_red_stats->cqes; + /* AF_XDP zero-copy */ + s->rx_xsk_packets += xskrq_stats->packets; + s->rx_xsk_bytes += xskrq_stats->bytes; + s->rx_xsk_csum_complete += xskrq_stats->csum_complete; + s->rx_xsk_csum_unnecessary += xskrq_stats->csum_unnecessary; + s->rx_xsk_csum_unnecessary_inner += xskrq_stats->csum_unnecessary_inner; + s->rx_xsk_csum_none += xskrq_stats->csum_none; + s->rx_xsk_ecn_mark += xskrq_stats->ecn_mark; + s->rx_xsk_removed_vlan_packets += xskrq_stats->removed_vlan_packets; + s->rx_xsk_xdp_drop += xskrq_stats->xdp_drop; + s->rx_xsk_xdp_redirect += xskrq_stats->xdp_redirect; + s->rx_xsk_wqe_err += xskrq_stats->wqe_err; + s->rx_xsk_mpwqe_filler_cqes += xskrq_stats->mpwqe_filler_cqes; + s->rx_xsk_mpwqe_filler_strides += xskrq_stats->mpwqe_filler_strides; + s->rx_xsk_oversize_pkts_sw_drop += xskrq_stats->oversize_pkts_sw_drop; + s->rx_xsk_buff_alloc_err += xskrq_stats->buff_alloc_err; + s->rx_xsk_cqe_compress_blks += xskrq_stats->cqe_compress_blks; + s->rx_xsk_cqe_compress_pkts += xskrq_stats->cqe_compress_pkts; + s->rx_xsk_congst_umr += xskrq_stats->congst_umr; + s->rx_xsk_arfs_err += xskrq_stats->arfs_err; + s->tx_xsk_xmit += xsksq_stats->xmit; + s->tx_xsk_mpwqe += xsksq_stats->mpwqe; + s->tx_xsk_inlnw += xsksq_stats->inlnw; + s->tx_xsk_full += xsksq_stats->full; + s->tx_xsk_err += xsksq_stats->err; + s->tx_xsk_cqes += xsksq_stats->cqes; for (j = 0; j < priv->max_opened_tc; j++) { struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j]; @@ -1266,11 +1321,43 @@ static const struct counter_desc xdpsq_stats_desc[] = { { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, cqes) }, }; +static const struct counter_desc xskrq_stats_desc[] = { + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, packets) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, bytes) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, csum_complete) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, csum_unnecessary) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, csum_none) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, ecn_mark) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, removed_vlan_packets) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, xdp_drop) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, xdp_redirect) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, wqe_err) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, mpwqe_filler_cqes) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, mpwqe_filler_strides) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, oversize_pkts_sw_drop) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, congst_umr) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, arfs_err) }, +}; + +static const struct counter_desc xsksq_stats_desc[] = { + { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, xmit) }, + { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, mpwqe) }, + { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, inlnw) }, + { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, full) }, + { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, err) }, + { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, cqes) }, +}; + static const struct counter_desc ch_stats_desc[] = { { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, events) }, { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, poll) }, { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, arm) }, { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, aff_change) }, + { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, force_irq) }, { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, eq_rearm) }, }; @@ -1278,6 +1365,8 @@ static const struct counter_desc ch_stats_desc[] = { #define NUM_SQ_STATS ARRAY_SIZE(sq_stats_desc) #define NUM_XDPSQ_STATS ARRAY_SIZE(xdpsq_stats_desc) #define NUM_RQ_XDPSQ_STATS ARRAY_SIZE(rq_xdpsq_stats_desc) +#define NUM_XSKRQ_STATS ARRAY_SIZE(xskrq_stats_desc) +#define NUM_XSKSQ_STATS ARRAY_SIZE(xsksq_stats_desc) #define NUM_CH_STATS ARRAY_SIZE(ch_stats_desc) static int mlx5e_grp_channels_get_num_stats(struct mlx5e_priv *priv) @@ -1288,13 +1377,16 @@ static int mlx5e_grp_channels_get_num_stats(struct mlx5e_priv *priv) (NUM_CH_STATS * max_nch) + (NUM_SQ_STATS * max_nch * priv->max_opened_tc) + (NUM_RQ_XDPSQ_STATS * max_nch) + - (NUM_XDPSQ_STATS * max_nch); + (NUM_XDPSQ_STATS * max_nch) + + (NUM_XSKRQ_STATS * max_nch * priv->xsk.ever_used) + + (NUM_XSKSQ_STATS * max_nch * priv->xsk.ever_used); } static int mlx5e_grp_channels_fill_strings(struct mlx5e_priv *priv, u8 *data, int idx) { int max_nch = mlx5e_get_netdev_max_channels(priv->netdev); + bool is_xsk = priv->xsk.ever_used; int i, j, tc; for (i = 0; i < max_nch; i++) @@ -1306,6 +1398,9 @@ static int mlx5e_grp_channels_fill_strings(struct mlx5e_priv *priv, u8 *data, for (j = 0; j < NUM_RQ_STATS; j++) sprintf(data + (idx++) * ETH_GSTRING_LEN, rq_stats_desc[j].format, i); + for (j = 0; j < NUM_XSKRQ_STATS * is_xsk; j++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + xskrq_stats_desc[j].format, i); for (j = 0; j < NUM_RQ_XDPSQ_STATS; j++) sprintf(data + (idx++) * ETH_GSTRING_LEN, rq_xdpsq_stats_desc[j].format, i); @@ -1318,10 +1413,14 @@ static int mlx5e_grp_channels_fill_strings(struct mlx5e_priv *priv, u8 *data, sq_stats_desc[j].format, priv->channel_tc2txq[i][tc]); - for (i = 0; i < max_nch; i++) + for (i = 0; i < max_nch; i++) { + for (j = 0; j < NUM_XSKSQ_STATS * is_xsk; j++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + xsksq_stats_desc[j].format, i); for (j = 0; j < NUM_XDPSQ_STATS; j++) sprintf(data + (idx++) * ETH_GSTRING_LEN, xdpsq_stats_desc[j].format, i); + } return idx; } @@ -1330,6 +1429,7 @@ static int mlx5e_grp_channels_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx) { int max_nch = mlx5e_get_netdev_max_channels(priv->netdev); + bool is_xsk = priv->xsk.ever_used; int i, j, tc; for (i = 0; i < max_nch; i++) @@ -1343,6 +1443,10 @@ static int mlx5e_grp_channels_fill_stats(struct mlx5e_priv *priv, u64 *data, data[idx++] = MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].rq, rq_stats_desc, j); + for (j = 0; j < NUM_XSKRQ_STATS * is_xsk; j++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].xskrq, + xskrq_stats_desc, j); for (j = 0; j < NUM_RQ_XDPSQ_STATS; j++) data[idx++] = MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].rq_xdpsq, @@ -1356,11 +1460,16 @@ static int mlx5e_grp_channels_fill_stats(struct mlx5e_priv *priv, u64 *data, MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].sq[tc], sq_stats_desc, j); - for (i = 0; i < max_nch; i++) + for (i = 0; i < max_nch; i++) { + for (j = 0; j < NUM_XSKSQ_STATS * is_xsk; j++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].xsksq, + xsksq_stats_desc, j); for (j = 0; j < NUM_XDPSQ_STATS; j++) data[idx++] = MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].xdpsq, xdpsq_stats_desc, j); + } return idx; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index cdddcc46971b..fb3ad7231e11 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -46,6 +46,8 @@ #define MLX5E_DECLARE_TX_STAT(type, fld) "tx%d_"#fld, offsetof(type, fld) #define MLX5E_DECLARE_XDPSQ_STAT(type, fld) "tx%d_xdp_"#fld, offsetof(type, fld) #define MLX5E_DECLARE_RQ_XDPSQ_STAT(type, fld) "rx%d_xdp_tx_"#fld, offsetof(type, fld) +#define MLX5E_DECLARE_XSKRQ_STAT(type, fld) "rx%d_xsk_"#fld, offsetof(type, fld) +#define MLX5E_DECLARE_XSKSQ_STAT(type, fld) "tx%d_xsk_"#fld, offsetof(type, fld) #define MLX5E_DECLARE_CH_STAT(type, fld) "ch%d_"#fld, offsetof(type, fld) struct counter_desc { @@ -116,12 +118,39 @@ struct mlx5e_sw_stats { u64 ch_poll; u64 ch_arm; u64 ch_aff_change; + u64 ch_force_irq; u64 ch_eq_rearm; #ifdef CONFIG_MLX5_EN_TLS u64 tx_tls_ooo; u64 tx_tls_resync_bytes; #endif + + u64 rx_xsk_packets; + u64 rx_xsk_bytes; + u64 rx_xsk_csum_complete; + u64 rx_xsk_csum_unnecessary; + u64 rx_xsk_csum_unnecessary_inner; + u64 rx_xsk_csum_none; + u64 rx_xsk_ecn_mark; + u64 rx_xsk_removed_vlan_packets; + u64 rx_xsk_xdp_drop; + u64 rx_xsk_xdp_redirect; + u64 rx_xsk_wqe_err; + u64 rx_xsk_mpwqe_filler_cqes; + u64 rx_xsk_mpwqe_filler_strides; + u64 rx_xsk_oversize_pkts_sw_drop; + u64 rx_xsk_buff_alloc_err; + u64 rx_xsk_cqe_compress_blks; + u64 rx_xsk_cqe_compress_pkts; + u64 rx_xsk_congst_umr; + u64 rx_xsk_arfs_err; + u64 tx_xsk_xmit; + u64 tx_xsk_mpwqe; + u64 tx_xsk_inlnw; + u64 tx_xsk_full; + u64 tx_xsk_err; + u64 tx_xsk_cqes; }; struct mlx5e_qcounter_stats { @@ -256,6 +285,7 @@ struct mlx5e_ch_stats { u64 poll; u64 arm; u64 aff_change; + u64 force_irq; u64 eq_rearm; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index f9862bf75491..9ae327e80d6d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -33,6 +33,7 @@ #include <linux/irq.h> #include "en.h" #include "en/xdp.h" +#include "en/xsk/tx.h" static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c) { @@ -87,7 +88,12 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel, napi); struct mlx5e_ch_stats *ch_stats = c->stats; + struct mlx5e_xdpsq *xsksq = &c->xsksq; + struct mlx5e_rq *xskrq = &c->xskrq; struct mlx5e_rq *rq = &c->rq; + bool xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state); + bool aff_change = false; + bool busy_xsk = false; bool busy = false; int work_done = 0; int i; @@ -97,22 +103,38 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) for (i = 0; i < c->num_tc; i++) busy |= mlx5e_poll_tx_cq(&c->sq[i].cq, budget); - busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq, NULL); + busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq); if (c->xdp) - busy |= mlx5e_poll_xdpsq_cq(&rq->xdpsq.cq, rq); + busy |= mlx5e_poll_xdpsq_cq(&c->rq_xdpsq.cq); if (likely(budget)) { /* budget=0 means: don't poll rx rings */ - work_done = mlx5e_poll_rx_cq(&rq->cq, budget); + if (xsk_open) + work_done = mlx5e_poll_rx_cq(&xskrq->cq, budget); + + if (likely(budget - work_done)) + work_done += mlx5e_poll_rx_cq(&rq->cq, budget - work_done); + busy |= work_done == budget; } - busy |= c->rq.post_wqes(rq); + mlx5e_poll_ico_cq(&c->icosq.cq); + + busy |= rq->post_wqes(rq); + if (xsk_open) { + mlx5e_poll_ico_cq(&c->xskicosq.cq); + busy |= mlx5e_poll_xdpsq_cq(&xsksq->cq); + busy_xsk |= mlx5e_xsk_tx(xsksq, MLX5E_TX_XSK_POLL_BUDGET); + busy_xsk |= xskrq->post_wqes(xskrq); + } + + busy |= busy_xsk; if (busy) { if (likely(mlx5e_channel_no_affinity_change(c))) return budget; ch_stats->aff_change++; + aff_change = true; if (budget && work_done == budget) work_done--; } @@ -133,6 +155,18 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) mlx5e_cq_arm(&c->icosq.cq); mlx5e_cq_arm(&c->xdpsq.cq); + if (xsk_open) { + mlx5e_handle_rx_dim(xskrq); + mlx5e_cq_arm(&c->xskicosq.cq); + mlx5e_cq_arm(&xsksq->cq); + mlx5e_cq_arm(&xskrq->cq); + } + + if (unlikely(aff_change && busy_xsk)) { + mlx5e_trigger_irq(&c->icosq); + ch_stats->force_irq++; + } + return work_done; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 9ca492b430d8..da81a5a7b8e9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -87,7 +87,7 @@ int mlx5i_init(struct mlx5_core_dev *mdev, mlx5e_set_netdev_mtu_boundaries(priv); netdev->mtu = netdev->max_mtu; - mlx5e_build_nic_params(mdev, &priv->rss_params, &priv->channels.params, + mlx5e_build_nic_params(mdev, NULL, &priv->rss_params, &priv->channels.params, mlx5e_get_netdev_max_channels(netdev), netdev->mtu); mlx5i_build_nic_params(mdev, &priv->channels.params); @@ -365,7 +365,7 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv) if (err) goto err_close_drop_rq; - err = mlx5e_create_direct_rqts(priv); + err = mlx5e_create_direct_rqts(priv, priv->direct_tir); if (err) goto err_destroy_indirect_rqts; @@ -373,7 +373,7 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv) if (err) goto err_destroy_direct_rqts; - err = mlx5e_create_direct_tirs(priv); + err = mlx5e_create_direct_tirs(priv, priv->direct_tir); if (err) goto err_destroy_indirect_tirs; @@ -384,11 +384,11 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv) return 0; err_destroy_direct_tirs: - mlx5e_destroy_direct_tirs(priv); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir); err_destroy_indirect_tirs: mlx5e_destroy_indirect_tirs(priv, true); err_destroy_direct_rqts: - mlx5e_destroy_direct_rqts(priv); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir); err_destroy_indirect_rqts: mlx5e_destroy_rqt(priv, &priv->indir_rqt); err_close_drop_rq: @@ -401,9 +401,9 @@ err_destroy_q_counters: static void mlx5i_cleanup_rx(struct mlx5e_priv *priv) { mlx5i_destroy_flow_steering(priv); - mlx5e_destroy_direct_tirs(priv); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir); mlx5e_destroy_indirect_tirs(priv, true); - mlx5e_destroy_direct_rqts(priv); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir); mlx5e_destroy_rqt(priv, &priv->indir_rqt); mlx5e_close_drop_rq(&priv->drop_rq); mlx5e_destroy_q_counters(priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h index 1f87cce421e0..f1ec58c9e9e3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h @@ -134,11 +134,6 @@ static inline void mlx5_wq_cyc_update_db_record(struct mlx5_wq_cyc *wq) *wq->db = cpu_to_be32(wq->wqe_ctr); } -static inline u16 mlx5_wq_cyc_get_ctr_wrap_cnt(struct mlx5_wq_cyc *wq, u16 ctr) -{ - return ctr >> wq->fbc.log_sz; -} - static inline u16 mlx5_wq_cyc_ctr2ix(struct mlx5_wq_cyc *wq, u16 ctr) { return ctr & wq->fbc.sz_m1; diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index ae0f368a62bb..057b159ff8b9 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -77,10 +77,11 @@ int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp); void xsk_flush(struct xdp_sock *xs); bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs); /* Used from netdev driver */ +bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt); u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr); void xsk_umem_discard_addr(struct xdp_umem *umem); void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries); -bool xsk_umem_consume_tx(struct xdp_umem *umem, dma_addr_t *dma, u32 *len); +bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc); void xsk_umem_consume_tx_done(struct xdp_umem *umem); struct xdp_umem_fq_reuse *xsk_reuseq_prepare(u32 nentries); struct xdp_umem_fq_reuse *xsk_reuseq_swap(struct xdp_umem *umem, @@ -99,6 +100,16 @@ static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr) } /* Reuse-queue aware version of FILL queue helpers */ +static inline bool xsk_umem_has_addrs_rq(struct xdp_umem *umem, u32 cnt) +{ + struct xdp_umem_fq_reuse *rq = umem->fq_reuse; + + if (rq->length >= cnt) + return true; + + return xsk_umem_has_addrs(umem, cnt - rq->length); +} + static inline u64 *xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr) { struct xdp_umem_fq_reuse *rq = umem->fq_reuse; @@ -146,6 +157,11 @@ static inline bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs) return false; } +static inline bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt) +{ + return false; +} + static inline u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr) { return NULL; @@ -159,8 +175,8 @@ static inline void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries) { } -static inline bool xsk_umem_consume_tx(struct xdp_umem *umem, dma_addr_t *dma, - u32 *len) +static inline bool xsk_umem_consume_tx(struct xdp_umem *umem, + struct xdp_desc *desc) { return false; } @@ -200,6 +216,11 @@ static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr) return 0; } +static inline bool xsk_umem_has_addrs_rq(struct xdp_umem *umem, u32 cnt) +{ + return false; +} + static inline u64 *xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr) { return NULL; diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h index caed8b1614ff..faaa5ca2a117 100644 --- a/include/uapi/linux/if_xdp.h +++ b/include/uapi/linux/if_xdp.h @@ -46,6 +46,7 @@ struct xdp_mmap_offsets { #define XDP_UMEM_FILL_RING 5 #define XDP_UMEM_COMPLETION_RING 6 #define XDP_STATISTICS 7 +#define XDP_OPTIONS 8 struct xdp_umem_reg { __u64 addr; /* Start of packet data area */ @@ -60,6 +61,13 @@ struct xdp_statistics { __u64 tx_invalid_descs; /* Dropped due to invalid descriptor */ }; +struct xdp_options { + __u32 flags; +}; + +/* Flags for the flags field of struct xdp_options */ +#define XDP_OPTIONS_ZEROCOPY (1 << 0) + /* Pgoff for mmaping the rings */ #define XDP_PGOFF_RX_RING 0 #define XDP_PGOFF_TX_RING 0x80000000 diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index a14e8864e4fa..74417a851ed5 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -37,6 +37,12 @@ bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs) READ_ONCE(xs->umem->fq); } +bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt) +{ + return xskq_has_addrs(umem->fq, cnt); +} +EXPORT_SYMBOL(xsk_umem_has_addrs); + u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr) { return xskq_peek_addr(umem->fq, addr); @@ -166,22 +172,18 @@ void xsk_umem_consume_tx_done(struct xdp_umem *umem) } EXPORT_SYMBOL(xsk_umem_consume_tx_done); -bool xsk_umem_consume_tx(struct xdp_umem *umem, dma_addr_t *dma, u32 *len) +bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc) { - struct xdp_desc desc; struct xdp_sock *xs; rcu_read_lock(); list_for_each_entry_rcu(xs, &umem->xsk_list, list) { - if (!xskq_peek_desc(xs->tx, &desc)) + if (!xskq_peek_desc(xs->tx, desc)) continue; - if (xskq_produce_addr_lazy(umem->cq, desc.addr)) + if (xskq_produce_addr_lazy(umem->cq, desc->addr)) goto out; - *dma = xdp_umem_get_dma(umem, desc.addr); - *len = desc.len; - xskq_discard_desc(xs->tx); rcu_read_unlock(); return true; @@ -644,6 +646,26 @@ static int xsk_getsockopt(struct socket *sock, int level, int optname, return 0; } + case XDP_OPTIONS: + { + struct xdp_options opts = {}; + + if (len < sizeof(opts)) + return -EINVAL; + + mutex_lock(&xs->mutex); + if (xs->zc) + opts.flags |= XDP_OPTIONS_ZEROCOPY; + mutex_unlock(&xs->mutex); + + len = sizeof(opts); + if (copy_to_user(optval, &opts, len)) + return -EFAULT; + if (put_user(len, optlen)) + return -EFAULT; + + return 0; + } default: break; } diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index 88b9ae24658d..12b49784a6d5 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -117,6 +117,20 @@ static inline u32 xskq_nb_free(struct xsk_queue *q, u32 producer, u32 dcnt) return q->nentries - (producer - q->cons_tail); } +static inline bool xskq_has_addrs(struct xsk_queue *q, u32 cnt) +{ + u32 entries = q->prod_tail - q->cons_tail; + + if (entries >= cnt) + return true; + + /* Refresh the local pointer. */ + q->prod_tail = READ_ONCE(q->ring->producer); + entries = q->prod_tail - q->cons_tail; + + return entries >= cnt; +} + /* UMEM queue */ static inline bool xskq_is_valid_addr(struct xsk_queue *q, u64 addr) diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index 0f5eb0d7f2df..93eaaf7239b2 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -68,6 +68,7 @@ static int opt_queue; static int opt_poll; static int opt_interval = 1; static u32 opt_xdp_bind_flags; +static int opt_xsk_frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; static __u32 prog_id; struct xsk_umem_info { @@ -276,6 +277,12 @@ static size_t gen_eth_frame(struct xsk_umem_info *umem, u64 addr) static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size) { struct xsk_umem_info *umem; + struct xsk_umem_config cfg = { + .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, + .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, + .frame_size = opt_xsk_frame_size, + .frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM, + }; int ret; umem = calloc(1, sizeof(*umem)); @@ -283,7 +290,7 @@ static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size) exit_with_error(errno); ret = xsk_umem__create(&umem->umem, buffer, size, &umem->fq, &umem->cq, - NULL); + &cfg); if (ret) exit_with_error(-ret); @@ -323,11 +330,9 @@ static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem) &idx); if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS) exit_with_error(-ret); - for (i = 0; - i < XSK_RING_PROD__DEFAULT_NUM_DESCS * - XSK_UMEM__DEFAULT_FRAME_SIZE; - i += XSK_UMEM__DEFAULT_FRAME_SIZE) - *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx++) = i; + for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS; i++) + *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx++) = + i * opt_xsk_frame_size; xsk_ring_prod__submit(&xsk->umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS); @@ -346,6 +351,7 @@ static struct option long_options[] = { {"interval", required_argument, 0, 'n'}, {"zero-copy", no_argument, 0, 'z'}, {"copy", no_argument, 0, 'c'}, + {"frame-size", required_argument, 0, 'f'}, {0, 0, 0, 0} }; @@ -365,8 +371,9 @@ static void usage(const char *prog) " -n, --interval=n Specify statistics update interval (default 1 sec).\n" " -z, --zero-copy Force zero-copy mode.\n" " -c, --copy Force copy mode.\n" + " -f, --frame-size=n Set the frame size (must be a power of two, default is %d).\n" "\n"; - fprintf(stderr, str, prog); + fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE); exit(EXIT_FAILURE); } @@ -377,7 +384,7 @@ static void parse_command_line(int argc, char **argv) opterr = 0; for (;;) { - c = getopt_long(argc, argv, "Frtli:q:psSNn:cz", long_options, + c = getopt_long(argc, argv, "Frtli:q:psSNn:czf:", long_options, &option_index); if (c == -1) break; @@ -420,6 +427,9 @@ static void parse_command_line(int argc, char **argv) case 'F': opt_xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; break; + case 'f': + opt_xsk_frame_size = atoi(optarg); + break; default: usage(basename(argv[0])); } @@ -432,6 +442,11 @@ static void parse_command_line(int argc, char **argv) usage(basename(argv[0])); } + if (opt_xsk_frame_size & (opt_xsk_frame_size - 1)) { + fprintf(stderr, "--frame-size=%d is not a power of two\n", + opt_xsk_frame_size); + usage(basename(argv[0])); + } } static void kick_tx(struct xsk_socket_info *xsk) @@ -583,8 +598,7 @@ static void tx_only(struct xsk_socket_info *xsk) for (i = 0; i < BATCH_SIZE; i++) { xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->addr - = (frame_nb + i) << - XSK_UMEM__DEFAULT_FRAME_SHIFT; + = (frame_nb + i) * opt_xsk_frame_size; xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->len = sizeof(pkt_data) - 1; } @@ -661,21 +675,19 @@ int main(int argc, char **argv) } ret = posix_memalign(&bufs, getpagesize(), /* PAGE_SIZE aligned */ - NUM_FRAMES * XSK_UMEM__DEFAULT_FRAME_SIZE); + NUM_FRAMES * opt_xsk_frame_size); if (ret) exit_with_error(ret); /* Create sockets... */ - umem = xsk_configure_umem(bufs, - NUM_FRAMES * XSK_UMEM__DEFAULT_FRAME_SIZE); + umem = xsk_configure_umem(bufs, NUM_FRAMES * opt_xsk_frame_size); xsks[num_socks++] = xsk_configure_socket(umem); if (opt_bench == BENCH_TXONLY) { int i; - for (i = 0; i < NUM_FRAMES * XSK_UMEM__DEFAULT_FRAME_SIZE; - i += XSK_UMEM__DEFAULT_FRAME_SIZE) - (void)gen_eth_frame(umem, i); + for (i = 0; i < NUM_FRAMES; i++) + (void)gen_eth_frame(umem, i * opt_xsk_frame_size); } signal(SIGINT, int_exit); diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h index caed8b1614ff..faaa5ca2a117 100644 --- a/tools/include/uapi/linux/if_xdp.h +++ b/tools/include/uapi/linux/if_xdp.h @@ -46,6 +46,7 @@ struct xdp_mmap_offsets { #define XDP_UMEM_FILL_RING 5 #define XDP_UMEM_COMPLETION_RING 6 #define XDP_STATISTICS 7 +#define XDP_OPTIONS 8 struct xdp_umem_reg { __u64 addr; /* Start of packet data area */ @@ -60,6 +61,13 @@ struct xdp_statistics { __u64 tx_invalid_descs; /* Dropped due to invalid descriptor */ }; +struct xdp_options { + __u32 flags; +}; + +/* Flags for the flags field of struct xdp_options */ +#define XDP_OPTIONS_ZEROCOPY (1 << 0) + /* Pgoff for mmaping the rings */ #define XDP_PGOFF_RX_RING 0 #define XDP_PGOFF_TX_RING 0x80000000 diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index 7ef6293b4fd7..bf15a80a37c2 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -65,6 +65,7 @@ struct xsk_socket { int xsks_map_fd; __u32 queue_id; char ifname[IFNAMSIZ]; + bool zc; }; struct xsk_nl_info { @@ -480,6 +481,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, void *rx_map = NULL, *tx_map = NULL; struct sockaddr_xdp sxdp = {}; struct xdp_mmap_offsets off; + struct xdp_options opts; struct xsk_socket *xsk; socklen_t optlen; int err; @@ -597,6 +599,16 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, } xsk->prog_fd = -1; + + optlen = sizeof(opts); + err = getsockopt(xsk->fd, SOL_XDP, XDP_OPTIONS, &opts, &optlen); + if (err) { + err = -errno; + goto out_mmap_tx; + } + + xsk->zc = opts.flags & XDP_OPTIONS_ZEROCOPY; + if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) { err = xsk_setup_xdp_prog(xsk); if (err) diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h index 82ea71a0f3ec..833a6e60d065 100644 --- a/tools/lib/bpf/xsk.h +++ b/tools/lib/bpf/xsk.h @@ -167,7 +167,7 @@ LIBBPF_API int xsk_socket__fd(const struct xsk_socket *xsk); #define XSK_RING_CONS__DEFAULT_NUM_DESCS 2048 #define XSK_RING_PROD__DEFAULT_NUM_DESCS 2048 -#define XSK_UMEM__DEFAULT_FRAME_SHIFT 11 /* 2048 bytes */ +#define XSK_UMEM__DEFAULT_FRAME_SHIFT 12 /* 4096 bytes */ #define XSK_UMEM__DEFAULT_FRAME_SIZE (1 << XSK_UMEM__DEFAULT_FRAME_SHIFT) #define XSK_UMEM__DEFAULT_FRAME_HEADROOM 0 |