diff options
author | Maxim Mikityanskiy <maximmi@nvidia.com> | 2022-01-26 17:18:26 +0200 |
---|---|---|
committer | Saeed Mahameed <saeedm@nvidia.com> | 2022-03-17 11:51:55 -0700 |
commit | 8d35fb57fd907251992f85e97fa25e8db20d4bca (patch) | |
tree | 6377ef3d207d3586e190ceabef3af0351bb44218 /drivers | |
parent | c3cce0fff3a3be0372caa4821be58fc7cefaeb3c (diff) |
net/mlx5e: Build SKB in place over the first fragment in non-linear legacy RQ
As a performance optimization and preparation to enabling XDP multi
buffer on non-linear legacy RQ, build the linear part of the SKB over
the first fragment, instead of allocating a new buffer and copying the
first 256 bytes there.
To achieve this, add headroom and tailroom to the first fragment.
Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/params.c | 43 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 48 |
2 files changed, 57 insertions, 34 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 0f258e7a65e0..5c4711be6fae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -188,12 +188,18 @@ u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) { - bool is_linear_skb = (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC) ? - mlx5e_rx_is_linear_skb(params, xsk) : - mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk); + u16 linear_headroom = mlx5e_get_linear_rq_headroom(params, xsk); - return is_linear_skb || params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO ? - mlx5e_get_linear_rq_headroom(params, xsk) : 0; + if (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC) + return linear_headroom; + + if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk)) + return linear_headroom; + + if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) + return linear_headroom; + + return 0; } u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params) @@ -392,10 +398,10 @@ void mlx5e_build_create_cq_param(struct mlx5e_create_cq_param *ccp, struct mlx5e }; } -static int mlx5e_max_nonlinear_mtu(int frag_size) +static int mlx5e_max_nonlinear_mtu(int first_frag_size, int frag_size) { /* Optimization for small packets: the last fragment is bigger than the others. */ - return (MLX5E_MAX_RX_FRAGS - 1) * frag_size + PAGE_SIZE; + return first_frag_size + (MLX5E_MAX_RX_FRAGS - 2) * frag_size + PAGE_SIZE; } #define DEFAULT_FRAG_SIZE (2048) @@ -407,7 +413,9 @@ static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, { u32 byte_count = MLX5E_SW2HW_MTU(params, params->sw_mtu); int frag_size_max = DEFAULT_FRAG_SIZE; + int first_frag_size_max; u32 buf_size = 0; + u16 headroom; int max_mtu; int i; @@ -427,11 +435,15 @@ static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, goto out; } - max_mtu = mlx5e_max_nonlinear_mtu(frag_size_max); + headroom = mlx5e_get_linear_rq_headroom(params, xsk); + first_frag_size_max = SKB_WITH_OVERHEAD(frag_size_max - headroom); + + max_mtu = mlx5e_max_nonlinear_mtu(first_frag_size_max, frag_size_max); if (byte_count > max_mtu) { frag_size_max = PAGE_SIZE; + first_frag_size_max = SKB_WITH_OVERHEAD(frag_size_max - headroom); - max_mtu = mlx5e_max_nonlinear_mtu(frag_size_max); + max_mtu = mlx5e_max_nonlinear_mtu(first_frag_size_max, frag_size_max); if (byte_count > max_mtu) { mlx5_core_err(mdev, "MTU %u is too big for non-linear legacy RQ (max %d)\n", params->sw_mtu, max_mtu); @@ -443,13 +455,22 @@ static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, while (buf_size < byte_count) { int frag_size = byte_count - buf_size; - if (i < MLX5E_MAX_RX_FRAGS - 1) + if (i == 0) + frag_size = min(frag_size, first_frag_size_max); + else if (i < MLX5E_MAX_RX_FRAGS - 1) frag_size = min(frag_size, frag_size_max); info->arr[i].frag_size = frag_size; + buf_size += frag_size; + + if (i == 0) { + /* Ensure that headroom and tailroom are included. */ + frag_size += headroom; + frag_size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + } + info->arr[i].frag_stride = roundup_pow_of_two(frag_size); - buf_size += frag_size; i++; } info->num_frags = i; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 6eda906342c0..b06aac087b2a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1560,43 +1560,45 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt) { struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0]; - struct mlx5e_wqe_frag_info *head_wi = wi; - u16 headlen = min_t(u32, MLX5E_RX_MAX_HEAD, cqe_bcnt); - u16 frag_headlen = headlen; - u16 byte_cnt = cqe_bcnt - headlen; + u16 rx_headroom = rq->buff.headroom; + struct mlx5e_dma_info *di = wi->di; + u32 frag_consumed_bytes; + u32 first_frag_size; struct sk_buff *skb; + void *va; + + va = page_address(di->page) + wi->offset; + frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt); + first_frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + frag_consumed_bytes); + + dma_sync_single_range_for_cpu(rq->pdev, di->addr, wi->offset, + first_frag_size, DMA_FROM_DEVICE); + net_prefetch(va + rx_headroom); /* XDP is not supported in this configuration, as incoming packets * might spread among multiple pages. */ - skb = napi_alloc_skb(rq->cq.napi, - ALIGN(MLX5E_RX_MAX_HEAD, sizeof(long))); - if (unlikely(!skb)) { - rq->stats->buff_alloc_err++; + skb = mlx5e_build_linear_skb(rq, va, first_frag_size, rx_headroom, + frag_consumed_bytes, 0); + if (unlikely(!skb)) return NULL; - } - net_prefetchw(skb->data); + page_ref_inc(di->page); - while (byte_cnt) { - u16 frag_consumed_bytes = - min_t(u16, frag_info->frag_size - frag_headlen, byte_cnt); + cqe_bcnt -= frag_consumed_bytes; + frag_info++; + wi++; - mlx5e_add_skb_frag(rq, skb, wi->di, wi->offset + frag_headlen, + while (cqe_bcnt) { + frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt); + + mlx5e_add_skb_frag(rq, skb, wi->di, wi->offset, frag_consumed_bytes, frag_info->frag_stride); - byte_cnt -= frag_consumed_bytes; - frag_headlen = 0; + cqe_bcnt -= frag_consumed_bytes; frag_info++; wi++; } - /* copy header */ - mlx5e_copy_skb_header(rq->pdev, skb, head_wi->di, head_wi->offset, head_wi->offset, - headlen); - /* skb linear part was allocated with headlen and aligned to long */ - skb->tail += headlen; - skb->len += headlen; - return skb; } |