diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx4/en_tx.c')
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/en_tx.c | 274 |
1 files changed, 210 insertions, 64 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 76aa4d27183c..9df87ca0515a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -196,6 +196,7 @@ int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv, ring->last_nr_txbb = 1; memset(ring->tx_info, 0, ring->size * sizeof(struct mlx4_en_tx_info)); memset(ring->buf, 0, ring->buf_size); + ring->free_tx_desc = mlx4_en_free_tx_desc; ring->qp_state = MLX4_QP_STATE_RST; ring->doorbell_qpn = cpu_to_be32(ring->qp.qpn << 8); @@ -265,10 +266,10 @@ static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv, } -static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, - struct mlx4_en_tx_ring *ring, - int index, u8 owner, u64 timestamp, - int napi_mode) +u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring, + int index, u8 owner, u64 timestamp, + int napi_mode) { struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE; @@ -344,6 +345,27 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, return tx_info->nr_txbb; } +u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring, + int index, u8 owner, u64 timestamp, + int napi_mode) +{ + struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; + struct mlx4_en_rx_alloc frame = { + .page = tx_info->page, + .dma = tx_info->map0_dma, + .page_offset = 0, + .page_size = PAGE_SIZE, + }; + + if (!mlx4_en_rx_recycle(ring->recycle_ring, &frame)) { + dma_unmap_page(priv->ddev, tx_info->map0_dma, + PAGE_SIZE, priv->frag_info[0].dma_dir); + put_page(tx_info->page); + } + + return tx_info->nr_txbb; +} int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring) { @@ -362,7 +384,7 @@ int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring) } while (ring->cons != ring->prod) { - ring->last_nr_txbb = mlx4_en_free_tx_desc(priv, ring, + ring->last_nr_txbb = ring->free_tx_desc(priv, ring, ring->cons & ring->size_mask, !!(ring->cons & ring->size), 0, 0 /* Non-NAPI caller */); @@ -444,7 +466,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, timestamp = mlx4_en_get_cqe_ts(cqe); /* free next descriptor */ - last_nr_txbb = mlx4_en_free_tx_desc( + last_nr_txbb = ring->free_tx_desc( priv, ring, ring_index, !!((ring_cons + txbbs_skipped) & ring->size), timestamp, napi_budget); @@ -476,6 +498,9 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, ACCESS_ONCE(ring->last_nr_txbb) = last_nr_txbb; ACCESS_ONCE(ring->cons) = ring_cons + txbbs_skipped; + if (ring->free_tx_desc == mlx4_en_recycle_tx_desc) + return done < budget; + netdev_tx_completed_queue(ring->tx_queue, packets, bytes); /* Wakeup Tx queue if this stopped, and ring is not full. @@ -631,8 +656,7 @@ static int get_real_size(const struct sk_buff *skb, static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc, const struct sk_buff *skb, const struct skb_shared_info *shinfo, - int real_size, u16 *vlan_tag, - int tx_ind, void *fragptr) + void *fragptr) { struct mlx4_wqe_inline_seg *inl = &tx_desc->inl; int spc = MLX4_INLINE_ALIGN - CTRL_SIZE - sizeof *inl; @@ -700,10 +724,66 @@ static void mlx4_bf_copy(void __iomem *dst, const void *src, __iowrite64_copy(dst, src, bytecnt / 8); } +void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring) +{ + wmb(); + /* Since there is no iowrite*_native() that writes the + * value as is, without byteswapping - using the one + * the doesn't do byteswapping in the relevant arch + * endianness. + */ +#if defined(__LITTLE_ENDIAN) + iowrite32( +#else + iowrite32be( +#endif + ring->doorbell_qpn, + ring->bf.uar->map + MLX4_SEND_DOORBELL); +} + +static void mlx4_en_tx_write_desc(struct mlx4_en_tx_ring *ring, + struct mlx4_en_tx_desc *tx_desc, + union mlx4_wqe_qpn_vlan qpn_vlan, + int desc_size, int bf_index, + __be32 op_own, bool bf_ok, + bool send_doorbell) +{ + tx_desc->ctrl.qpn_vlan = qpn_vlan; + + if (bf_ok) { + op_own |= htonl((bf_index & 0xffff) << 8); + /* Ensure new descriptor hits memory + * before setting ownership of this descriptor to HW + */ + dma_wmb(); + tx_desc->ctrl.owner_opcode = op_own; + + wmb(); + + mlx4_bf_copy(ring->bf.reg + ring->bf.offset, &tx_desc->ctrl, + desc_size); + + wmb(); + + ring->bf.offset ^= ring->bf.buf_size; + } else { + /* Ensure new descriptor hits memory + * before setting ownership of this descriptor to HW + */ + dma_wmb(); + tx_desc->ctrl.owner_opcode = op_own; + if (send_doorbell) + mlx4_en_xmit_doorbell(ring); + else + ring->xmit_more++; + } +} + netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) { struct skb_shared_info *shinfo = skb_shinfo(skb); struct mlx4_en_priv *priv = netdev_priv(dev); + union mlx4_wqe_qpn_vlan qpn_vlan = {}; struct device *ddev = priv->ddev; struct mlx4_en_tx_ring *ring; struct mlx4_en_tx_desc *tx_desc; @@ -715,7 +795,6 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) int real_size; u32 index, bf_index; __be32 op_own; - u16 vlan_tag = 0; u16 vlan_proto = 0; int i_frag; int lso_header_size; @@ -725,6 +804,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) bool stop_queue; bool inline_ok; u32 ring_cons; + bool bf_ok; tx_ind = skb_get_queue_mapping(skb); ring = priv->tx_ring[tx_ind]; @@ -749,9 +829,17 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) goto tx_drop; } + bf_ok = ring->bf_enabled; if (skb_vlan_tag_present(skb)) { - vlan_tag = skb_vlan_tag_get(skb); + qpn_vlan.vlan_tag = cpu_to_be16(skb_vlan_tag_get(skb)); vlan_proto = be16_to_cpu(skb->vlan_proto); + if (vlan_proto == ETH_P_8021AD) + qpn_vlan.ins_vlan = MLX4_WQE_CTRL_INS_SVLAN; + else if (vlan_proto == ETH_P_8021Q) + qpn_vlan.ins_vlan = MLX4_WQE_CTRL_INS_CVLAN; + else + qpn_vlan.ins_vlan = 0; + bf_ok = false; } netdev_txq_bql_enqueue_prefetchw(ring->tx_queue); @@ -771,6 +859,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) else { tx_desc = (struct mlx4_en_tx_desc *) ring->bounce_buf; bounce = true; + bf_ok = false; } /* Save skb in tx_info ring */ @@ -907,8 +996,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, skb->len); if (tx_info->inl) - build_inline_wqe(tx_desc, skb, shinfo, real_size, &vlan_tag, - tx_ind, fragptr); + build_inline_wqe(tx_desc, skb, shinfo, fragptr); if (skb->encapsulation) { union { @@ -946,60 +1034,15 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) real_size = (real_size / 16) & 0x3f; - if (ring->bf_enabled && desc_size <= MAX_BF && !bounce && - !skb_vlan_tag_present(skb) && send_doorbell) { - tx_desc->ctrl.bf_qpn = ring->doorbell_qpn | - cpu_to_be32(real_size); - - op_own |= htonl((bf_index & 0xffff) << 8); - /* Ensure new descriptor hits memory - * before setting ownership of this descriptor to HW - */ - dma_wmb(); - tx_desc->ctrl.owner_opcode = op_own; - - wmb(); + bf_ok &= desc_size <= MAX_BF && send_doorbell; - mlx4_bf_copy(ring->bf.reg + ring->bf.offset, &tx_desc->ctrl, - desc_size); - - wmb(); - - ring->bf.offset ^= ring->bf.buf_size; - } else { - tx_desc->ctrl.vlan_tag = cpu_to_be16(vlan_tag); - if (vlan_proto == ETH_P_8021AD) - tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_SVLAN; - else if (vlan_proto == ETH_P_8021Q) - tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_CVLAN; - else - tx_desc->ctrl.ins_vlan = 0; - - tx_desc->ctrl.fence_size = real_size; + if (bf_ok) + qpn_vlan.bf_qpn = ring->doorbell_qpn | cpu_to_be32(real_size); + else + qpn_vlan.fence_size = real_size; - /* Ensure new descriptor hits memory - * before setting ownership of this descriptor to HW - */ - dma_wmb(); - tx_desc->ctrl.owner_opcode = op_own; - if (send_doorbell) { - wmb(); - /* Since there is no iowrite*_native() that writes the - * value as is, without byteswapping - using the one - * the doesn't do byteswapping in the relevant arch - * endianness. - */ -#if defined(__LITTLE_ENDIAN) - iowrite32( -#else - iowrite32be( -#endif - ring->doorbell_qpn, - ring->bf.uar->map + MLX4_SEND_DOORBELL); - } else { - ring->xmit_more++; - } - } + mlx4_en_tx_write_desc(ring, tx_desc, qpn_vlan, desc_size, bf_index, + op_own, bf_ok, send_doorbell); if (unlikely(stop_queue)) { /* If queue was emptied after the if (stop_queue) , and before @@ -1034,3 +1077,106 @@ tx_drop: return NETDEV_TX_OK; } +netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_alloc *frame, + struct net_device *dev, unsigned int length, + int tx_ind, int *doorbell_pending) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + union mlx4_wqe_qpn_vlan qpn_vlan = {}; + struct mlx4_en_tx_ring *ring; + struct mlx4_en_tx_desc *tx_desc; + struct mlx4_wqe_data_seg *data; + struct mlx4_en_tx_info *tx_info; + int index, bf_index; + bool send_doorbell; + int nr_txbb = 1; + bool stop_queue; + dma_addr_t dma; + int real_size; + __be32 op_own; + u32 ring_cons; + bool bf_ok; + + BUILD_BUG_ON_MSG(ALIGN(CTRL_SIZE + DS_SIZE, TXBB_SIZE) != TXBB_SIZE, + "mlx4_en_xmit_frame requires minimum size tx desc"); + + ring = priv->tx_ring[tx_ind]; + + if (!priv->port_up) + goto tx_drop; + + if (mlx4_en_is_tx_ring_full(ring)) + goto tx_drop; + + /* fetch ring->cons far ahead before needing it to avoid stall */ + ring_cons = READ_ONCE(ring->cons); + + index = ring->prod & ring->size_mask; + tx_info = &ring->tx_info[index]; + + bf_ok = ring->bf_enabled; + + /* Track current inflight packets for performance analysis */ + AVG_PERF_COUNTER(priv->pstats.inflight_avg, + (u32)(ring->prod - ring_cons - 1)); + + bf_index = ring->prod; + tx_desc = ring->buf + index * TXBB_SIZE; + data = &tx_desc->data; + + dma = frame->dma; + + tx_info->page = frame->page; + frame->page = NULL; + tx_info->map0_dma = dma; + tx_info->map0_byte_count = length; + tx_info->nr_txbb = nr_txbb; + tx_info->nr_bytes = max_t(unsigned int, length, ETH_ZLEN); + tx_info->data_offset = (void *)data - (void *)tx_desc; + tx_info->ts_requested = 0; + tx_info->nr_maps = 1; + tx_info->linear = 1; + tx_info->inl = 0; + + dma_sync_single_for_device(priv->ddev, dma, length, PCI_DMA_TODEVICE); + + data->addr = cpu_to_be64(dma); + data->lkey = ring->mr_key; + dma_wmb(); + data->byte_count = cpu_to_be32(length); + + /* tx completion can avoid cache line miss for common cases */ + tx_desc->ctrl.srcrb_flags = priv->ctrl_flags; + + op_own = cpu_to_be32(MLX4_OPCODE_SEND) | + ((ring->prod & ring->size) ? + cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0); + + ring->packets++; + ring->bytes += tx_info->nr_bytes; + AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, length); + + ring->prod += nr_txbb; + + stop_queue = mlx4_en_is_tx_ring_full(ring); + send_doorbell = stop_queue || + *doorbell_pending > MLX4_EN_DOORBELL_BUDGET; + bf_ok &= send_doorbell; + + real_size = ((CTRL_SIZE + nr_txbb * DS_SIZE) / 16) & 0x3f; + + if (bf_ok) + qpn_vlan.bf_qpn = ring->doorbell_qpn | cpu_to_be32(real_size); + else + qpn_vlan.fence_size = real_size; + + mlx4_en_tx_write_desc(ring, tx_desc, qpn_vlan, TXBB_SIZE, bf_index, + op_own, bf_ok, send_doorbell); + *doorbell_pending = send_doorbell ? 0 : *doorbell_pending + 1; + + return NETDEV_TX_OK; + +tx_drop: + ring->tx_dropped++; + return NETDEV_TX_BUSY; +} |