diff options
Diffstat (limited to 'drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c')
-rw-r--r-- | drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 1390 |
1 files changed, 607 insertions, 783 deletions
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 3ceafb55d6da..cf147ca419a8 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -1,7 +1,7 @@ /* * This file is part of the Chelsio T4 Ethernet driver for Linux. * - * Copyright (c) 2003-2014 Chelsio Communications, Inc. All rights reserved. + * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -67,6 +67,7 @@ #include <linux/crash_dump.h> #include "cxgb4.h" +#include "cxgb4_filter.h" #include "t4_regs.h" #include "t4_values.h" #include "t4_msg.h" @@ -76,6 +77,8 @@ #include "cxgb4_debugfs.h" #include "clip_tbl.h" #include "l2t.h" +#include "sched.h" +#include "cxgb4_tc_u32.h" char cxgb4_driver_name[] = KBUILD_MODNAME; @@ -86,30 +89,6 @@ char cxgb4_driver_name[] = KBUILD_MODNAME; const char cxgb4_driver_version[] = DRV_VERSION; #define DRV_DESC "Chelsio T4/T5/T6 Network Driver" -/* Host shadow copy of ingress filter entry. This is in host native format - * and doesn't match the ordering or bit order, etc. of the hardware of the - * firmware command. The use of bit-field structure elements is purely to - * remind ourselves of the field size limitations and save memory in the case - * where the filter table is large. - */ -struct filter_entry { - /* Administrative fields for filter. - */ - u32 valid:1; /* filter allocated and valid */ - u32 locked:1; /* filter is administratively locked */ - - u32 pending:1; /* filter action is pending firmware reply */ - u32 smtidx:8; /* Source MAC Table index for smac */ - struct l2t_entry *l2t; /* Layer Two Table entry for dmac */ - - /* The filter itself. Most of this is a straight copy of information - * provided by the extended ioctl(). Some fields are translated to - * internal forms -- for instance the Ingress Queue ID passed in from - * the ioctl() is translated into the Absolute Ingress Queue ID. - */ - struct ch_filter_specification fs; -}; - #define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \ NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\ NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR) @@ -223,13 +202,8 @@ MODULE_PARM_DESC(select_queue, static struct dentry *cxgb4_debugfs_root; -static LIST_HEAD(adapter_list); -static DEFINE_MUTEX(uld_mutex); -/* Adapter list to be accessed from atomic context */ -static LIST_HEAD(adap_rcu_list); -static DEFINE_SPINLOCK(adap_rcu_lock); -static struct cxgb4_uld_info ulds[CXGB4_ULD_MAX]; -static const char *const uld_str[] = { "RDMA", "iSCSI", "iSCSIT" }; +LIST_HEAD(adapter_list); +DEFINE_MUTEX(uld_mutex); static void link_report(struct net_device *dev) { @@ -303,11 +277,9 @@ static void dcb_tx_queue_prio_enable(struct net_device *dev, int enable) txq->dcb_prio = value; } } -#endif /* CONFIG_CHELSIO_T4_DCB */ -int cxgb4_dcb_enabled(const struct net_device *dev) +static int cxgb4_dcb_enabled(const struct net_device *dev) { -#ifdef CONFIG_CHELSIO_T4_DCB struct port_info *pi = netdev_priv(dev); if (!pi->dcb.enabled) @@ -315,11 +287,8 @@ int cxgb4_dcb_enabled(const struct net_device *dev) return ((pi->dcb.state == CXGB4_DCB_STATE_FW_ALLSYNCED) || (pi->dcb.state == CXGB4_DCB_STATE_HOST)); -#else - return 0; -#endif } -EXPORT_SYMBOL(cxgb4_dcb_enabled); +#endif /* CONFIG_CHELSIO_T4_DCB */ void t4_os_link_changed(struct adapter *adapter, int port_id, int link_stat) { @@ -531,66 +500,6 @@ static void dcb_rpl(struct adapter *adap, const struct fw_port_cmd *pcmd) } #endif /* CONFIG_CHELSIO_T4_DCB */ -/* Clear a filter and release any of its resources that we own. This also - * clears the filter's "pending" status. - */ -static void clear_filter(struct adapter *adap, struct filter_entry *f) -{ - /* If the new or old filter have loopback rewriteing rules then we'll - * need to free any existing Layer Two Table (L2T) entries of the old - * filter rule. The firmware will handle freeing up any Source MAC - * Table (SMT) entries used for rewriting Source MAC Addresses in - * loopback rules. - */ - if (f->l2t) - cxgb4_l2t_release(f->l2t); - - /* The zeroing of the filter rule below clears the filter valid, - * pending, locked flags, l2t pointer, etc. so it's all we need for - * this operation. - */ - memset(f, 0, sizeof(*f)); -} - -/* Handle a filter write/deletion reply. - */ -static void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl) -{ - unsigned int idx = GET_TID(rpl); - unsigned int nidx = idx - adap->tids.ftid_base; - unsigned int ret; - struct filter_entry *f; - - if (idx >= adap->tids.ftid_base && nidx < - (adap->tids.nftids + adap->tids.nsftids)) { - idx = nidx; - ret = TCB_COOKIE_G(rpl->cookie); - f = &adap->tids.ftid_tab[idx]; - - if (ret == FW_FILTER_WR_FLT_DELETED) { - /* Clear the filter when we get confirmation from the - * hardware that the filter has been deleted. - */ - clear_filter(adap, f); - } else if (ret == FW_FILTER_WR_SMT_TBL_FULL) { - dev_err(adap->pdev_dev, "filter %u setup failed due to full SMT\n", - idx); - clear_filter(adap, f); - } else if (ret == FW_FILTER_WR_FLT_ADDED) { - f->smtidx = (be64_to_cpu(rpl->oldval) >> 24) & 0xff; - f->pending = 0; /* asynchronous setup completed */ - f->valid = 1; - } else { - /* Something went wrong. Issue a warning about the - * problem and clear everything out. - */ - dev_err(adap->pdev_dev, "filter %u setup failed with error %u\n", - idx, ret); - clear_filter(adap, f); - } - } -} - /* Response queue handler for the FW event queue. */ static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp, @@ -677,56 +586,6 @@ out: return 0; } -/* Flush the aggregated lro sessions */ -static void uldrx_flush_handler(struct sge_rspq *q) -{ - if (ulds[q->uld].lro_flush) - ulds[q->uld].lro_flush(&q->lro_mgr); -} - -/** - * uldrx_handler - response queue handler for ULD queues - * @q: the response queue that received the packet - * @rsp: the response queue descriptor holding the offload message - * @gl: the gather list of packet fragments - * - * Deliver an ingress offload packet to a ULD. All processing is done by - * the ULD, we just maintain statistics. - */ -static int uldrx_handler(struct sge_rspq *q, const __be64 *rsp, - const struct pkt_gl *gl) -{ - struct sge_ofld_rxq *rxq = container_of(q, struct sge_ofld_rxq, rspq); - int ret; - - /* FW can send CPLs encapsulated in a CPL_FW4_MSG. - */ - if (((const struct rss_header *)rsp)->opcode == CPL_FW4_MSG && - ((const struct cpl_fw4_msg *)(rsp + 1))->type == FW_TYPE_RSSCPL) - rsp += 2; - - if (q->flush_handler) - ret = ulds[q->uld].lro_rx_handler(q->adap->uld_handle[q->uld], - rsp, gl, &q->lro_mgr, - &q->napi); - else - ret = ulds[q->uld].rx_handler(q->adap->uld_handle[q->uld], - rsp, gl); - - if (ret) { - rxq->stats.nomem++; - return -1; - } - - if (gl == NULL) - rxq->stats.imm++; - else if (gl == CXGB4_MSG_AN) - rxq->stats.an++; - else - rxq->stats.pkts++; - return 0; -} - static void disable_msi(struct adapter *adapter) { if (adapter->flags & USING_MSIX) { @@ -778,30 +637,12 @@ static void name_msix_vecs(struct adapter *adap) snprintf(adap->msix_info[msi_idx].desc, n, "%s-Rx%d", d->name, i); } - - /* offload queues */ - for_each_iscsirxq(&adap->sge, i) - snprintf(adap->msix_info[msi_idx++].desc, n, "%s-iscsi%d", - adap->port[0]->name, i); - - for_each_iscsitrxq(&adap->sge, i) - snprintf(adap->msix_info[msi_idx++].desc, n, "%s-iSCSIT%d", - adap->port[0]->name, i); - - for_each_rdmarxq(&adap->sge, i) - snprintf(adap->msix_info[msi_idx++].desc, n, "%s-rdma%d", - adap->port[0]->name, i); - - for_each_rdmaciq(&adap->sge, i) - snprintf(adap->msix_info[msi_idx++].desc, n, "%s-rdma-ciq%d", - adap->port[0]->name, i); } static int request_msix_queue_irqs(struct adapter *adap) { struct sge *s = &adap->sge; - int err, ethqidx, iscsiqidx = 0, rdmaqidx = 0, rdmaciqqidx = 0; - int iscsitqidx = 0; + int err, ethqidx; int msi_index = 2; err = request_irq(adap->msix_info[1].vec, t4_sge_intr_msix, 0, @@ -818,57 +659,9 @@ static int request_msix_queue_irqs(struct adapter *adap) goto unwind; msi_index++; } - for_each_iscsirxq(s, iscsiqidx) { - err = request_irq(adap->msix_info[msi_index].vec, - t4_sge_intr_msix, 0, - adap->msix_info[msi_index].desc, - &s->iscsirxq[iscsiqidx].rspq); - if (err) - goto unwind; - msi_index++; - } - for_each_iscsitrxq(s, iscsitqidx) { - err = request_irq(adap->msix_info[msi_index].vec, - t4_sge_intr_msix, 0, - adap->msix_info[msi_index].desc, - &s->iscsitrxq[iscsitqidx].rspq); - if (err) - goto unwind; - msi_index++; - } - for_each_rdmarxq(s, rdmaqidx) { - err = request_irq(adap->msix_info[msi_index].vec, - t4_sge_intr_msix, 0, - adap->msix_info[msi_index].desc, - &s->rdmarxq[rdmaqidx].rspq); - if (err) - goto unwind; - msi_index++; - } - for_each_rdmaciq(s, rdmaciqqidx) { - err = request_irq(adap->msix_info[msi_index].vec, - t4_sge_intr_msix, 0, - adap->msix_info[msi_index].desc, - &s->rdmaciq[rdmaciqqidx].rspq); - if (err) - goto unwind; - msi_index++; - } return 0; unwind: - while (--rdmaciqqidx >= 0) - free_irq(adap->msix_info[--msi_index].vec, - &s->rdmaciq[rdmaciqqidx].rspq); - while (--rdmaqidx >= 0) - free_irq(adap->msix_info[--msi_index].vec, - &s->rdmarxq[rdmaqidx].rspq); - while (--iscsitqidx >= 0) - free_irq(adap->msix_info[--msi_index].vec, - &s->iscsitrxq[iscsitqidx].rspq); - while (--iscsiqidx >= 0) - free_irq(adap->msix_info[--msi_index].vec, - &s->iscsirxq[iscsiqidx].rspq); while (--ethqidx >= 0) free_irq(adap->msix_info[--msi_index].vec, &s->ethrxq[ethqidx].rspq); @@ -884,16 +677,6 @@ static void free_msix_queue_irqs(struct adapter *adap) free_irq(adap->msix_info[1].vec, &s->fw_evtq); for_each_ethrxq(s, i) free_irq(adap->msix_info[msi_index++].vec, &s->ethrxq[i].rspq); - for_each_iscsirxq(s, i) - free_irq(adap->msix_info[msi_index++].vec, - &s->iscsirxq[i].rspq); - for_each_iscsitrxq(s, i) - free_irq(adap->msix_info[msi_index++].vec, - &s->iscsitrxq[i].rspq); - for_each_rdmarxq(s, i) - free_irq(adap->msix_info[msi_index++].vec, &s->rdmarxq[i].rspq); - for_each_rdmaciq(s, i) - free_irq(adap->msix_info[msi_index++].vec, &s->rdmaciq[i].rspq); } /** @@ -1032,28 +815,30 @@ static void enable_rx(struct adapter *adap) } } -static int alloc_ofld_rxqs(struct adapter *adap, struct sge_ofld_rxq *q, - unsigned int nq, unsigned int per_chan, int msi_idx, - u16 *ids, bool lro) + +static int setup_fw_sge_queues(struct adapter *adap) { - int i, err; + struct sge *s = &adap->sge; + int err = 0; + + bitmap_zero(s->starving_fl, s->egr_sz); + bitmap_zero(s->txq_maperr, s->egr_sz); - for (i = 0; i < nq; i++, q++) { - if (msi_idx > 0) - msi_idx++; - err = t4_sge_alloc_rxq(adap, &q->rspq, false, - adap->port[i / per_chan], - msi_idx, q->fl.size ? &q->fl : NULL, - uldrx_handler, - lro ? uldrx_flush_handler : NULL, - 0); + if (adap->flags & USING_MSIX) + adap->msi_idx = 1; /* vector 0 is for non-queue interrupts */ + else { + err = t4_sge_alloc_rxq(adap, &s->intrq, false, adap->port[0], 0, + NULL, NULL, NULL, -1); if (err) return err; - memset(&q->stats, 0, sizeof(q->stats)); - if (ids) - ids[i] = q->rspq.abs_id; + adap->msi_idx = -((int)s->intrq.abs_id + 1); } - return 0; + + err = t4_sge_alloc_rxq(adap, &s->fw_evtq, true, adap->port[0], + adap->msi_idx, NULL, fwevtq_handler, NULL, -1); + if (err) + t4_free_sge_resources(adap); + return err; } /** @@ -1066,41 +851,10 @@ static int alloc_ofld_rxqs(struct adapter *adap, struct sge_ofld_rxq *q, */ static int setup_sge_queues(struct adapter *adap) { - int err, msi_idx, i, j; + int err, i, j; struct sge *s = &adap->sge; - - bitmap_zero(s->starving_fl, s->egr_sz); - bitmap_zero(s->txq_maperr, s->egr_sz); - - if (adap->flags & USING_MSIX) - msi_idx = 1; /* vector 0 is for non-queue interrupts */ - else { - err = t4_sge_alloc_rxq(adap, &s->intrq, false, adap->port[0], 0, - NULL, NULL, NULL, -1); - if (err) - return err; - msi_idx = -((int)s->intrq.abs_id + 1); - } - - /* NOTE: If you add/delete any Ingress/Egress Queue allocations in here, - * don't forget to update the following which need to be - * synchronized to and changes here. - * - * 1. The calculations of MAX_INGQ in cxgb4.h. - * - * 2. Update enable_msix/name_msix_vecs/request_msix_queue_irqs - * to accommodate any new/deleted Ingress Queues - * which need MSI-X Vectors. - * - * 3. Update sge_qinfo_show() to include information on the - * new/deleted queues. - */ - err = t4_sge_alloc_rxq(adap, &s->fw_evtq, true, adap->port[0], - msi_idx, NULL, fwevtq_handler, NULL, -1); - if (err) { -freeout: t4_free_sge_resources(adap); - return err; - } + struct sge_uld_rxq_info *rxq_info = s->uld_rxq_info[CXGB4_ULD_RDMA]; + unsigned int cmplqid = 0; for_each_port(adap, i) { struct net_device *dev = adap->port[i]; @@ -1109,10 +863,10 @@ freeout: t4_free_sge_resources(adap); struct sge_eth_txq *t = &s->ethtxq[pi->first_qset]; for (j = 0; j < pi->nqsets; j++, q++) { - if (msi_idx > 0) - msi_idx++; + if (adap->msi_idx > 0) + adap->msi_idx++; err = t4_sge_alloc_rxq(adap, &q->rspq, false, dev, - msi_idx, &q->fl, + adap->msi_idx, &q->fl, t4_ethrx_handler, NULL, t4_get_mps_bg_map(adap, @@ -1131,8 +885,8 @@ freeout: t4_free_sge_resources(adap); } } - j = s->iscsiqsets / adap->params.nports; /* iscsi queues per channel */ - for_each_iscsirxq(s, i) { + j = s->ofldqsets / adap->params.nports; /* iscsi queues per channel */ + for_each_ofldtxq(s, i) { err = t4_sge_alloc_ofld_txq(adap, &s->ofldtxq[i], adap->port[i / j], s->fw_evtq.cntxt_id); @@ -1140,30 +894,15 @@ freeout: t4_free_sge_resources(adap); goto freeout; } -#define ALLOC_OFLD_RXQS(firstq, nq, per_chan, ids, lro) do { \ - err = alloc_ofld_rxqs(adap, firstq, nq, per_chan, msi_idx, ids, lro); \ - if (err) \ - goto freeout; \ - if (msi_idx > 0) \ - msi_idx += nq; \ -} while (0) - - ALLOC_OFLD_RXQS(s->iscsirxq, s->iscsiqsets, j, s->iscsi_rxq, false); - ALLOC_OFLD_RXQS(s->iscsitrxq, s->niscsitq, j, s->iscsit_rxq, true); - ALLOC_OFLD_RXQS(s->rdmarxq, s->rdmaqs, 1, s->rdma_rxq, false); - j = s->rdmaciqs / adap->params.nports; /* rdmaq queues per channel */ - ALLOC_OFLD_RXQS(s->rdmaciq, s->rdmaciqs, j, s->rdma_ciq, false); - -#undef ALLOC_OFLD_RXQS - for_each_port(adap, i) { - /* - * Note that ->rdmarxq[i].rspq.cntxt_id below is 0 if we don't + /* Note that cmplqid below is 0 if we don't * have RDMA queues, and that's the right value. */ + if (rxq_info) + cmplqid = rxq_info->uldrxq[i].rspq.cntxt_id; + err = t4_sge_alloc_ctrl_txq(adap, &s->ctrlq[i], adap->port[i], - s->fw_evtq.cntxt_id, - s->rdmarxq[i].rspq.cntxt_id); + s->fw_evtq.cntxt_id, cmplqid); if (err) goto freeout; } @@ -1174,6 +913,9 @@ freeout: t4_free_sge_resources(adap); RSSCONTROL_V(netdev2pinfo(adap->port[0])->tx_chan) | QUEUENUMBER_V(s->ethrxq[0].rspq.abs_id)); return 0; +freeout: + t4_free_sge_resources(adap); + return err; } /* @@ -1197,151 +939,6 @@ void t4_free_mem(void *addr) kvfree(addr); } -/* Send a Work Request to write the filter at a specified index. We construct - * a Firmware Filter Work Request to have the work done and put the indicated - * filter into "pending" mode which will prevent any further actions against - * it till we get a reply from the firmware on the completion status of the - * request. - */ -static int set_filter_wr(struct adapter *adapter, int fidx) -{ - struct filter_entry *f = &adapter->tids.ftid_tab[fidx]; - struct sk_buff *skb; - struct fw_filter_wr *fwr; - unsigned int ftid; - - skb = alloc_skb(sizeof(*fwr), GFP_KERNEL); - if (!skb) - return -ENOMEM; - - /* If the new filter requires loopback Destination MAC and/or VLAN - * rewriting then we need to allocate a Layer 2 Table (L2T) entry for - * the filter. - */ - if (f->fs.newdmac || f->fs.newvlan) { - /* allocate L2T entry for new filter */ - f->l2t = t4_l2t_alloc_switching(adapter, f->fs.vlan, - f->fs.eport, f->fs.dmac); - if (f->l2t == NULL) { - kfree_skb(skb); - return -ENOMEM; - } - } - - ftid = adapter->tids.ftid_base + fidx; - - fwr = (struct fw_filter_wr *)__skb_put(skb, sizeof(*fwr)); - memset(fwr, 0, sizeof(*fwr)); - - /* It would be nice to put most of the following in t4_hw.c but most - * of the work is translating the cxgbtool ch_filter_specification - * into the Work Request and the definition of that structure is - * currently in cxgbtool.h which isn't appropriate to pull into the - * common code. We may eventually try to come up with a more neutral - * filter specification structure but for now it's easiest to simply - * put this fairly direct code in line ... - */ - fwr->op_pkd = htonl(FW_WR_OP_V(FW_FILTER_WR)); - fwr->len16_pkd = htonl(FW_WR_LEN16_V(sizeof(*fwr)/16)); - fwr->tid_to_iq = - htonl(FW_FILTER_WR_TID_V(ftid) | - FW_FILTER_WR_RQTYPE_V(f->fs.type) | - FW_FILTER_WR_NOREPLY_V(0) | - FW_FILTER_WR_IQ_V(f->fs.iq)); - fwr->del_filter_to_l2tix = - htonl(FW_FILTER_WR_RPTTID_V(f->fs.rpttid) | - FW_FILTER_WR_DROP_V(f->fs.action == FILTER_DROP) | - FW_FILTER_WR_DIRSTEER_V(f->fs.dirsteer) | - FW_FILTER_WR_MASKHASH_V(f->fs.maskhash) | - FW_FILTER_WR_DIRSTEERHASH_V(f->fs.dirsteerhash) | - FW_FILTER_WR_LPBK_V(f->fs.action == FILTER_SWITCH) | - FW_FILTER_WR_DMAC_V(f->fs.newdmac) | - FW_FILTER_WR_SMAC_V(f->fs.newsmac) | - FW_FILTER_WR_INSVLAN_V(f->fs.newvlan == VLAN_INSERT || - f->fs.newvlan == VLAN_REWRITE) | - FW_FILTER_WR_RMVLAN_V(f->fs.newvlan == VLAN_REMOVE || - f->fs.newvlan == VLAN_REWRITE) | - FW_FILTER_WR_HITCNTS_V(f->fs.hitcnts) | - FW_FILTER_WR_TXCHAN_V(f->fs.eport) | - FW_FILTER_WR_PRIO_V(f->fs.prio) | - FW_FILTER_WR_L2TIX_V(f->l2t ? f->l2t->idx : 0)); - fwr->ethtype = htons(f->fs.val.ethtype); - fwr->ethtypem = htons(f->fs.mask.ethtype); - fwr->frag_to_ovlan_vldm = - (FW_FILTER_WR_FRAG_V(f->fs.val.frag) | - FW_FILTER_WR_FRAGM_V(f->fs.mask.frag) | - FW_FILTER_WR_IVLAN_VLD_V(f->fs.val.ivlan_vld) | - FW_FILTER_WR_OVLAN_VLD_V(f->fs.val.ovlan_vld) | - FW_FILTER_WR_IVLAN_VLDM_V(f->fs.mask.ivlan_vld) | - FW_FILTER_WR_OVLAN_VLDM_V(f->fs.mask.ovlan_vld)); - fwr->smac_sel = 0; - fwr->rx_chan_rx_rpl_iq = - htons(FW_FILTER_WR_RX_CHAN_V(0) | - FW_FILTER_WR_RX_RPL_IQ_V(adapter->sge.fw_evtq.abs_id)); - fwr->maci_to_matchtypem = - htonl(FW_FILTER_WR_MACI_V(f->fs.val.macidx) | - FW_FILTER_WR_MACIM_V(f->fs.mask.macidx) | - FW_FILTER_WR_FCOE_V(f->fs.val.fcoe) | - FW_FILTER_WR_FCOEM_V(f->fs.mask.fcoe) | - FW_FILTER_WR_PORT_V(f->fs.val.iport) | - FW_FILTER_WR_PORTM_V(f->fs.mask.iport) | - FW_FILTER_WR_MATCHTYPE_V(f->fs.val.matchtype) | - FW_FILTER_WR_MATCHTYPEM_V(f->fs.mask.matchtype)); - fwr->ptcl = f->fs.val.proto; - fwr->ptclm = f->fs.mask.proto; - fwr->ttyp = f->fs.val.tos; - fwr->ttypm = f->fs.mask.tos; - fwr->ivlan = htons(f->fs.val.ivlan); - fwr->ivlanm = htons(f->fs.mask.ivlan); - fwr->ovlan = htons(f->fs.val.ovlan); - fwr->ovlanm = htons(f->fs.mask.ovlan); - memcpy(fwr->lip, f->fs.val.lip, sizeof(fwr->lip)); - memcpy(fwr->lipm, f->fs.mask.lip, sizeof(fwr->lipm)); - memcpy(fwr->fip, f->fs.val.fip, sizeof(fwr->fip)); - memcpy(fwr->fipm, f->fs.mask.fip, sizeof(fwr->fipm)); - fwr->lp = htons(f->fs.val.lport); - fwr->lpm = htons(f->fs.mask.lport); - fwr->fp = htons(f->fs.val.fport); - fwr->fpm = htons(f->fs.mask.fport); - if (f->fs.newsmac) - memcpy(fwr->sma, f->fs.smac, sizeof(fwr->sma)); - - /* Mark the filter as "pending" and ship off the Filter Work Request. - * When we get the Work Request Reply we'll clear the pending status. - */ - f->pending = 1; - set_wr_txq(skb, CPL_PRIORITY_CONTROL, f->fs.val.iport & 0x3); - t4_ofld_send(adapter, skb); - return 0; -} - -/* Delete the filter at a specified index. - */ -static int del_filter_wr(struct adapter *adapter, int fidx) -{ - struct filter_entry *f = &adapter->tids.ftid_tab[fidx]; - struct sk_buff *skb; - struct fw_filter_wr *fwr; - unsigned int len, ftid; - - len = sizeof(*fwr); - ftid = adapter->tids.ftid_base + fidx; - - skb = alloc_skb(len, GFP_KERNEL); - if (!skb) - return -ENOMEM; - - fwr = (struct fw_filter_wr *)__skb_put(skb, len); - t4_mk_filtdelwr(ftid, fwr, adapter->sge.fw_evtq.abs_id); - - /* Mark the filter as "pending" and ship off the Filter Work Request. - * When we get the Work Request Reply we'll clear the pending status. - */ - f->pending = 1; - t4_mgmt_tx(adapter, skb); - return 0; -} - static u16 cxgb_select_queue(struct net_device *dev, struct sk_buff *skb, void *accel_priv, select_queue_fallback_t fallback) { @@ -1723,19 +1320,22 @@ EXPORT_SYMBOL(cxgb4_remove_tid); */ static int tid_init(struct tid_info *t) { - size_t size; - unsigned int stid_bmap_size; - unsigned int natids = t->natids; struct adapter *adap = container_of(t, struct adapter, tids); + unsigned int max_ftids = t->nftids + t->nsftids; + unsigned int natids = t->natids; + unsigned int stid_bmap_size; + unsigned int ftid_bmap_size; + size_t size; stid_bmap_size = BITS_TO_LONGS(t->nstids + t->nsftids); + ftid_bmap_size = BITS_TO_LONGS(t->nftids); size = t->ntids * sizeof(*t->tid_tab) + natids * sizeof(*t->atid_tab) + t->nstids * sizeof(*t->stid_tab) + t->nsftids * sizeof(*t->stid_tab) + stid_bmap_size * sizeof(long) + - t->nftids * sizeof(*t->ftid_tab) + - t->nsftids * sizeof(*t->ftid_tab); + max_ftids * sizeof(*t->ftid_tab) + + ftid_bmap_size * sizeof(long); t->tid_tab = t4_alloc_mem(size); if (!t->tid_tab) @@ -1745,8 +1345,10 @@ static int tid_init(struct tid_info *t) t->stid_tab = (struct serv_entry *)&t->atid_tab[natids]; t->stid_bmap = (unsigned long *)&t->stid_tab[t->nstids + t->nsftids]; t->ftid_tab = (struct filter_entry *)&t->stid_bmap[stid_bmap_size]; + t->ftid_bmap = (unsigned long *)&t->ftid_tab[max_ftids]; spin_lock_init(&t->stid_lock); spin_lock_init(&t->atid_lock); + spin_lock_init(&t->ftid_lock); t->stids_in_use = 0; t->sftids_in_use = 0; @@ -1761,12 +1363,16 @@ static int tid_init(struct tid_info *t) t->atid_tab[natids - 1].next = &t->atid_tab[natids]; t->afree = t->atid_tab; } - bitmap_zero(t->stid_bmap, t->nstids + t->nsftids); - /* Reserve stid 0 for T4/T5 adapters */ - if (!t->stid_base && - (CHELSIO_CHIP_VERSION(adap->params.chip) <= CHELSIO_T5)) - __set_bit(0, t->stid_bmap); + if (is_offload(adap)) { + bitmap_zero(t->stid_bmap, t->nstids + t->nsftids); + /* Reserve stid 0 for T4/T5 adapters */ + if (!t->stid_base && + CHELSIO_CHIP_VERSION(adap->params.chip) <= CHELSIO_T5) + __set_bit(0, t->stid_bmap); + } + + bitmap_zero(t->ftid_bmap, t->nftids); return 0; } @@ -2316,7 +1922,7 @@ static void disable_dbs(struct adapter *adap) for_each_ethrxq(&adap->sge, i) disable_txq_db(&adap->sge.ethtxq[i].q); - for_each_iscsirxq(&adap->sge, i) + for_each_ofldtxq(&adap->sge, i) disable_txq_db(&adap->sge.ofldtxq[i].q); for_each_port(adap, i) disable_txq_db(&adap->sge.ctrlq[i].q); @@ -2328,7 +1934,7 @@ static void enable_dbs(struct adapter *adap) for_each_ethrxq(&adap->sge, i) enable_txq_db(adap, &adap->sge.ethtxq[i].q); - for_each_iscsirxq(&adap->sge, i) + for_each_ofldtxq(&adap->sge, i) enable_txq_db(adap, &adap->sge.ofldtxq[i].q); for_each_port(adap, i) enable_txq_db(adap, &adap->sge.ctrlq[i].q); @@ -2336,9 +1942,10 @@ static void enable_dbs(struct adapter *adap) static void notify_rdma_uld(struct adapter *adap, enum cxgb4_control cmd) { - if (adap->uld_handle[CXGB4_ULD_RDMA]) - ulds[CXGB4_ULD_RDMA].control(adap->uld_handle[CXGB4_ULD_RDMA], - cmd); + enum cxgb4_uld type = CXGB4_ULD_RDMA; + + if (adap->uld && adap->uld[type].handle) + adap->uld[type].control(adap->uld[type].handle, cmd); } static void process_db_full(struct work_struct *work) @@ -2392,13 +1999,14 @@ out: if (ret) CH_WARN(adap, "DB drop recovery failed.\n"); } + static void recover_all_queues(struct adapter *adap) { int i; for_each_ethrxq(&adap->sge, i) sync_txq_pidx(adap, &adap->sge.ethtxq[i].q); - for_each_iscsirxq(&adap->sge, i) + for_each_ofldtxq(&adap->sge, i) sync_txq_pidx(adap, &adap->sge.ofldtxq[i].q); for_each_port(adap, i) sync_txq_pidx(adap, &adap->sge.ctrlq[i].q); @@ -2463,94 +2071,12 @@ void t4_db_dropped(struct adapter *adap) queue_work(adap->workq, &adap->db_drop_task); } -static void uld_attach(struct adapter *adap, unsigned int uld) -{ - void *handle; - struct cxgb4_lld_info lli; - unsigned short i; - - lli.pdev = adap->pdev; - lli.pf = adap->pf; - lli.l2t = adap->l2t; - lli.tids = &adap->tids; - lli.ports = adap->port; - lli.vr = &adap->vres; - lli.mtus = adap->params.mtus; - if (uld == CXGB4_ULD_RDMA) { - lli.rxq_ids = adap->sge.rdma_rxq; - lli.ciq_ids = adap->sge.rdma_ciq; - lli.nrxq = adap->sge.rdmaqs; - lli.nciq = adap->sge.rdmaciqs; - } else if (uld == CXGB4_ULD_ISCSI) { - lli.rxq_ids = adap->sge.iscsi_rxq; - lli.nrxq = adap->sge.iscsiqsets; - } else if (uld == CXGB4_ULD_ISCSIT) { - lli.rxq_ids = adap->sge.iscsit_rxq; - lli.nrxq = adap->sge.niscsitq; - } - lli.ntxq = adap->sge.iscsiqsets; - lli.nchan = adap->params.nports; - lli.nports = adap->params.nports; - lli.wr_cred = adap->params.ofldq_wr_cred; - lli.adapter_type = adap->params.chip; - lli.iscsi_iolen = MAXRXDATA_G(t4_read_reg(adap, TP_PARA_REG2_A)); - lli.iscsi_tagmask = t4_read_reg(adap, ULP_RX_ISCSI_TAGMASK_A); - lli.iscsi_pgsz_order = t4_read_reg(adap, ULP_RX_ISCSI_PSZ_A); - lli.iscsi_llimit = t4_read_reg(adap, ULP_RX_ISCSI_LLIMIT_A); - lli.iscsi_ppm = &adap->iscsi_ppm; - lli.cclk_ps = 1000000000 / adap->params.vpd.cclk; - lli.udb_density = 1 << adap->params.sge.eq_qpp; - lli.ucq_density = 1 << adap->params.sge.iq_qpp; - lli.filt_mode = adap->params.tp.vlan_pri_map; - /* MODQ_REQ_MAP sets queues 0-3 to chan 0-3 */ - for (i = 0; i < NCHAN; i++) - lli.tx_modq[i] = i; - lli.gts_reg = adap->regs + MYPF_REG(SGE_PF_GTS_A); - lli.db_reg = adap->regs + MYPF_REG(SGE_PF_KDOORBELL_A); - lli.fw_vers = adap->params.fw_vers; - lli.dbfifo_int_thresh = dbfifo_int_thresh; - lli.sge_ingpadboundary = adap->sge.fl_align; - lli.sge_egrstatuspagesize = adap->sge.stat_len; - lli.sge_pktshift = adap->sge.pktshift; - lli.enable_fw_ofld_conn = adap->flags & FW_OFLD_CONN; - lli.max_ordird_qp = adap->params.max_ordird_qp; - lli.max_ird_adapter = adap->params.max_ird_adapter; - lli.ulptx_memwrite_dsgl = adap->params.ulptx_memwrite_dsgl; - lli.nodeid = dev_to_node(adap->pdev_dev); - - handle = ulds[uld].add(&lli); - if (IS_ERR(handle)) { - dev_warn(adap->pdev_dev, - "could not attach to the %s driver, error %ld\n", - uld_str[uld], PTR_ERR(handle)); - return; - } - - adap->uld_handle[uld] = handle; - +void t4_register_netevent_notifier(void) +{ if (!netevent_registered) { register_netevent_notifier(&cxgb4_netevent_nb); netevent_registered = true; } - - if (adap->flags & FULL_INIT_DONE) - ulds[uld].state_change(handle, CXGB4_STATE_UP); -} - -static void attach_ulds(struct adapter *adap) -{ - unsigned int i; - - spin_lock(&adap_rcu_lock); - list_add_tail_rcu(&adap->rcu_node, &adap_rcu_list); - spin_unlock(&adap_rcu_lock); - - mutex_lock(&uld_mutex); - list_add_tail(&adap->list_node, &adapter_list); - for (i = 0; i < CXGB4_ULD_MAX; i++) - if (ulds[i].add) - uld_attach(adap, i); - mutex_unlock(&uld_mutex); } static void detach_ulds(struct adapter *adap) @@ -2560,20 +2086,16 @@ static void detach_ulds(struct adapter *adap) mutex_lock(&uld_mutex); list_del(&adap->list_node); for (i = 0; i < CXGB4_ULD_MAX; i++) - if (adap->uld_handle[i]) { - ulds[i].state_change(adap->uld_handle[i], + if (adap->uld && adap->uld[i].handle) { + adap->uld[i].state_change(adap->uld[i].handle, CXGB4_STATE_DETACH); - adap->uld_handle[i] = NULL; + adap->uld[i].handle = NULL; } if (netevent_registered && list_empty(&adapter_list)) { unregister_netevent_notifier(&cxgb4_netevent_nb); netevent_registered = false; } mutex_unlock(&uld_mutex); - - spin_lock(&adap_rcu_lock); - list_del_rcu(&adap->rcu_node); - spin_unlock(&adap_rcu_lock); } static void notify_ulds(struct adapter *adap, enum cxgb4_state new_state) @@ -2582,60 +2104,11 @@ static void notify_ulds(struct adapter *adap, enum cxgb4_state new_state) mutex_lock(&uld_mutex); for (i = 0; i < CXGB4_ULD_MAX; i++) - if (adap->uld_handle[i]) - ulds[i].state_change(adap->uld_handle[i], new_state); - mutex_unlock(&uld_mutex); -} - -/** - * cxgb4_register_uld - register an upper-layer driver - * @type: the ULD type - * @p: the ULD methods - * - * Registers an upper-layer driver with this driver and notifies the ULD - * about any presently available devices that support its type. Returns - * %-EBUSY if a ULD of the same type is already registered. - */ -int cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p) -{ - int ret = 0; - struct adapter *adap; - - if (type >= CXGB4_ULD_MAX) - return -EINVAL; - mutex_lock(&uld_mutex); - if (ulds[type].add) { - ret = -EBUSY; - goto out; - } - ulds[type] = *p; - list_for_each_entry(adap, &adapter_list, list_node) - uld_attach(adap, type); -out: mutex_unlock(&uld_mutex); - return ret; -} -EXPORT_SYMBOL(cxgb4_register_uld); - -/** - * cxgb4_unregister_uld - unregister an upper-layer driver - * @type: the ULD type - * - * Unregisters an existing upper-layer driver. - */ -int cxgb4_unregister_uld(enum cxgb4_uld type) -{ - struct adapter *adap; - - if (type >= CXGB4_ULD_MAX) - return -EINVAL; - mutex_lock(&uld_mutex); - list_for_each_entry(adap, &adapter_list, list_node) - adap->uld_handle[type] = NULL; - ulds[type].add = NULL; + if (adap->uld && adap->uld[i].handle) + adap->uld[i].state_change(adap->uld[i].handle, + new_state); mutex_unlock(&uld_mutex); - return 0; } -EXPORT_SYMBOL(cxgb4_unregister_uld); #if IS_ENABLED(CONFIG_IPV6) static int cxgb4_inet6addr_handler(struct notifier_block *this, @@ -2741,7 +2214,6 @@ static int cxgb_up(struct adapter *adap) adap->msix_info[0].desc, adap); if (err) goto irq_err; - err = request_msix_queue_irqs(adap); if (err) { free_irq(adap->msix_info[0].vec, adap); @@ -2819,40 +2291,6 @@ static int cxgb_close(struct net_device *dev) return t4_enable_vi(adapter, adapter->pf, pi->viid, false, false); } -/* Return an error number if the indicated filter isn't writable ... - */ -static int writable_filter(struct filter_entry *f) -{ - if (f->locked) - return -EPERM; - if (f->pending) - return -EBUSY; - - return 0; -} - -/* Delete the filter at the specified index (if valid). The checks for all - * the common problems with doing this like the filter being locked, currently - * pending in another operation, etc. - */ -static int delete_filter(struct adapter *adapter, unsigned int fidx) -{ - struct filter_entry *f; - int ret; - - if (fidx >= adapter->tids.nftids + adapter->tids.nsftids) - return -EINVAL; - - f = &adapter->tids.ftid_tab[fidx]; - ret = writable_filter(f); - if (ret) - return ret; - if (f->valid) - return del_filter_wr(adapter, fidx); - - return 0; -} - int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid, __be32 sip, __be16 sport, __be16 vlan, unsigned int queue, unsigned char port, unsigned char mask) @@ -2922,7 +2360,6 @@ EXPORT_SYMBOL(cxgb4_create_server_filter); int cxgb4_remove_server_filter(const struct net_device *dev, unsigned int stid, unsigned int queue, bool ipv6) { - int ret; struct filter_entry *f; struct adapter *adap; @@ -2936,11 +2373,7 @@ int cxgb4_remove_server_filter(const struct net_device *dev, unsigned int stid, /* Unlock the filter */ f->locked = 0; - ret = delete_filter(adap, stid); - if (ret) - return ret; - - return 0; + return delete_filter(adap, stid); } EXPORT_SYMBOL(cxgb4_remove_server_filter); @@ -3078,6 +2511,85 @@ static int cxgb_change_mtu(struct net_device *dev, int new_mtu) return ret; } +#ifdef CONFIG_PCI_IOV +static int dummy_open(struct net_device *dev) +{ + /* Turn carrier off since we don't have to transmit anything on this + * interface. + */ + netif_carrier_off(dev); + return 0; +} + +/* Fill MAC address that will be assigned by the FW */ +static void fill_vf_station_mac_addr(struct adapter *adap) +{ + unsigned int i; + u8 hw_addr[ETH_ALEN], macaddr[ETH_ALEN]; + int err; + u8 *na; + u16 a, b; + + err = t4_get_raw_vpd_params(adap, &adap->params.vpd); + if (!err) { + na = adap->params.vpd.na; + for (i = 0; i < ETH_ALEN; i++) + hw_addr[i] = (hex2val(na[2 * i + 0]) * 16 + + hex2val(na[2 * i + 1])); + a = (hw_addr[0] << 8) | hw_addr[1]; + b = (hw_addr[1] << 8) | hw_addr[2]; + a ^= b; + a |= 0x0200; /* locally assigned Ethernet MAC address */ + a &= ~0x0100; /* not a multicast Ethernet MAC address */ + macaddr[0] = a >> 8; + macaddr[1] = a & 0xff; + + for (i = 2; i < 5; i++) + macaddr[i] = hw_addr[i + 1]; + + for (i = 0; i < adap->num_vfs; i++) { + macaddr[5] = adap->pf * 16 + i; + ether_addr_copy(adap->vfinfo[i].vf_mac_addr, macaddr); + } + } +} + +static int cxgb_set_vf_mac(struct net_device *dev, int vf, u8 *mac) +{ + struct port_info *pi = netdev_priv(dev); + struct adapter *adap = pi->adapter; + int ret; + + /* verify MAC addr is valid */ + if (!is_valid_ether_addr(mac)) { + dev_err(pi->adapter->pdev_dev, + "Invalid Ethernet address %pM for VF %d\n", + mac, vf); + return -EINVAL; + } + + dev_info(pi->adapter->pdev_dev, + "Setting MAC %pM on VF %d\n", mac, vf); + ret = t4_set_vf_mac_acl(adap, vf + 1, 1, mac); + if (!ret) + ether_addr_copy(adap->vfinfo[vf].vf_mac_addr, mac); + return ret; +} + +static int cxgb_get_vf_config(struct net_device *dev, + int vf, struct ifla_vf_info *ivi) +{ + struct port_info *pi = netdev_priv(dev); + struct adapter *adap = pi->adapter; + + if (vf >= adap->num_vfs) + return -EINVAL; + ivi->vf = vf; + ether_addr_copy(ivi->mac, adap->vfinfo[vf].vf_mac_addr); + return 0; +} +#endif + static int cxgb_set_mac_addr(struct net_device *dev, void *p) { int ret; @@ -3114,6 +2626,116 @@ static void cxgb_netpoll(struct net_device *dev) } #endif +static int cxgb_set_tx_maxrate(struct net_device *dev, int index, u32 rate) +{ + struct port_info *pi = netdev_priv(dev); + struct adapter *adap = pi->adapter; + struct sched_class *e; + struct ch_sched_params p; + struct ch_sched_queue qe; + u32 req_rate; + int err = 0; + + if (!can_sched(dev)) + return -ENOTSUPP; + + if (index < 0 || index > pi->nqsets - 1) + return -EINVAL; + + if (!(adap->flags & FULL_INIT_DONE)) { + dev_err(adap->pdev_dev, + "Failed to rate limit on queue %d. Link Down?\n", + index); + return -EINVAL; + } + + /* Convert from Mbps to Kbps */ + req_rate = rate << 10; + + /* Max rate is 10 Gbps */ + if (req_rate >= SCHED_MAX_RATE_KBPS) { + dev_err(adap->pdev_dev, + "Invalid rate %u Mbps, Max rate is %u Gbps\n", + rate, SCHED_MAX_RATE_KBPS); + return -ERANGE; + } + + /* First unbind the queue from any existing class */ + memset(&qe, 0, sizeof(qe)); + qe.queue = index; + qe.class = SCHED_CLS_NONE; + + err = cxgb4_sched_class_unbind(dev, (void *)(&qe), SCHED_QUEUE); + if (err) { + dev_err(adap->pdev_dev, + "Unbinding Queue %d on port %d fail. Err: %d\n", + index, pi->port_id, err); + return err; + } + + /* Queue already unbound */ + if (!req_rate) + return 0; + + /* Fetch any available unused or matching scheduling class */ + memset(&p, 0, sizeof(p)); + p.type = SCHED_CLASS_TYPE_PACKET; + p.u.params.level = SCHED_CLASS_LEVEL_CL_RL; + p.u.params.mode = SCHED_CLASS_MODE_CLASS; + p.u.params.rateunit = SCHED_CLASS_RATEUNIT_BITS; + p.u.params.ratemode = SCHED_CLASS_RATEMODE_ABS; + p.u.params.channel = pi->tx_chan; + p.u.params.class = SCHED_CLS_NONE; + p.u.params.minrate = 0; + p.u.params.maxrate = req_rate; + p.u.params.weight = 0; + p.u.params.pktsize = dev->mtu; + + e = cxgb4_sched_class_alloc(dev, &p); + if (!e) + return -ENOMEM; + + /* Bind the queue to a scheduling class */ + memset(&qe, 0, sizeof(qe)); + qe.queue = index; + qe.class = e->idx; + + err = cxgb4_sched_class_bind(dev, (void *)(&qe), SCHED_QUEUE); + if (err) + dev_err(adap->pdev_dev, + "Queue rate limiting failed. Err: %d\n", err); + return err; +} + +static int cxgb_setup_tc(struct net_device *dev, u32 handle, __be16 proto, + struct tc_to_netdev *tc) +{ + struct port_info *pi = netdev2pinfo(dev); + struct adapter *adap = netdev2adap(dev); + + if (!(adap->flags & FULL_INIT_DONE)) { + dev_err(adap->pdev_dev, + "Failed to setup tc on port %d. Link Down?\n", + pi->port_id); + return -EINVAL; + } + + if (TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS) && + tc->type == TC_SETUP_CLSU32) { + switch (tc->cls_u32->command) { + case TC_CLSU32_NEW_KNODE: + case TC_CLSU32_REPLACE_KNODE: + return cxgb4_config_knode(dev, proto, tc->cls_u32); + case TC_CLSU32_DELETE_KNODE: + return cxgb4_delete_knode(dev, proto, tc->cls_u32); + default: + return -EOPNOTSUPP; + } + } + + return -EOPNOTSUPP; +} + static const struct net_device_ops cxgb4_netdev_ops = { .ndo_open = cxgb_open, .ndo_stop = cxgb_close, @@ -3136,7 +2758,31 @@ static const struct net_device_ops cxgb4_netdev_ops = { #ifdef CONFIG_NET_RX_BUSY_POLL .ndo_busy_poll = cxgb_busy_poll, #endif + .ndo_set_tx_maxrate = cxgb_set_tx_maxrate, + .ndo_setup_tc = cxgb_setup_tc, +}; + +#ifdef CONFIG_PCI_IOV +static const struct net_device_ops cxgb4_mgmt_netdev_ops = { + .ndo_open = dummy_open, + .ndo_set_vf_mac = cxgb_set_vf_mac, + .ndo_get_vf_config = cxgb_get_vf_config, +}; +#endif + +static void get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) +{ + struct adapter *adapter = netdev2adap(dev); + + strlcpy(info->driver, cxgb4_driver_name, sizeof(info->driver)); + strlcpy(info->version, cxgb4_driver_version, + sizeof(info->version)); + strlcpy(info->bus_info, pci_name(adapter->pdev), + sizeof(info->bus_info)); +} +static const struct ethtool_ops cxgb4_mgmt_ethtool_ops = { + .get_drvinfo = get_drvinfo, }; void t4_fatal_err(struct adapter *adap) @@ -3979,6 +3625,12 @@ static int adap_init0(struct adapter *adap) adap->clipt_start = val[0]; adap->clipt_end = val[1]; + /* We don't yet have a PARAMs calls to retrieve the number of Traffic + * Classes supported by the hardware/firmware so we hard code it here + * for now. + */ + adap->params.nsched_cls = is_t4(adap->params.chip) ? 15 : 16; + /* query params related to active filter region */ params[0] = FW_PARAM_PFVF(ACTIVE_FILTER_START); params[1] = FW_PARAM_PFVF(ACTIVE_FILTER_END); @@ -4067,6 +3719,7 @@ static int adap_init0(struct adapter *adap) adap->params.ofldq_wr_cred = val[5]; adap->params.offload = 1; + adap->num_ofld_uld += 1; } if (caps_cmd.rdmacaps) { params[0] = FW_PARAM_PFVF(STAG_START); @@ -4119,6 +3772,7 @@ static int adap_init0(struct adapter *adap) "max_ordird_qp %d max_ird_adapter %d\n", adap->params.max_ordird_qp, adap->params.max_ird_adapter); + adap->num_ofld_uld += 2; } if (caps_cmd.iscsicaps) { params[0] = FW_PARAM_PFVF(ISCSI_START); @@ -4129,6 +3783,13 @@ static int adap_init0(struct adapter *adap) goto bye; adap->vres.iscsi.start = val[0]; adap->vres.iscsi.size = val[1] - val[0] + 1; + /* LIO target and cxgb4i initiaitor */ + adap->num_ofld_uld += 2; + } + if (caps_cmd.cryptocaps) { + /* Should query params here...TODO */ + adap->params.crypto |= ULP_CRYPTO_LOOKASIDE; + adap->num_uld += 1; } #undef FW_PARAM_PFVF #undef FW_PARAM_DEV @@ -4318,16 +3979,6 @@ static inline bool is_x_10g_port(const struct link_config *lc) return high_speeds != 0; } -static inline void init_rspq(struct adapter *adap, struct sge_rspq *q, - unsigned int us, unsigned int cnt, - unsigned int size, unsigned int iqe_size) -{ - q->adap = adap; - cxgb4_set_rspq_intr_params(q, us, cnt); - q->iqe_len = iqe_size; - q->size = size; -} - /* * Perform default configuration of DMA queues depending on the number and type * of ports we found and the number of available CPUs. Most settings can be @@ -4340,12 +3991,16 @@ static void cfg_queues(struct adapter *adap) #ifndef CONFIG_CHELSIO_T4_DCB int q10g = 0; #endif - int ciq_size; /* Reduce memory usage in kdump environment, disable all offload. */ - if (is_kdump_kernel()) + if (is_kdump_kernel()) { adap->params.offload = 0; + adap->params.crypto = 0; + } else if (is_uld(adap) && t4_uld_mem_alloc(adap)) { + adap->params.offload = 0; + adap->params.crypto = 0; + } for_each_port(adap, i) n10g += is_x_10g_port(&adap2pinfo(adap, i)->link_cfg); @@ -4389,33 +4044,18 @@ static void cfg_queues(struct adapter *adap) s->ethqsets = qidx; s->max_ethqsets = qidx; /* MSI-X may lower it later */ - if (is_offload(adap)) { + if (is_uld(adap)) { /* * For offload we use 1 queue/channel if all ports are up to 1G, * otherwise we divide all available queues amongst the channels * capped by the number of available cores. */ if (n10g) { - i = min_t(int, ARRAY_SIZE(s->iscsirxq), - num_online_cpus()); - s->iscsiqsets = roundup(i, adap->params.nports); - } else - s->iscsiqsets = adap->params.nports; - /* For RDMA one Rx queue per channel suffices */ - s->rdmaqs = adap->params.nports; - /* Try and allow at least 1 CIQ per cpu rounding down - * to the number of ports, with a minimum of 1 per port. - * A 2 port card in a 6 cpu system: 6 CIQs, 3 / port. - * A 4 port card in a 6 cpu system: 4 CIQs, 1 / port. - * A 4 port card in a 2 cpu system: 4 CIQs, 1 / port. - */ - s->rdmaciqs = min_t(int, MAX_RDMA_CIQS, num_online_cpus()); - s->rdmaciqs = (s->rdmaciqs / adap->params.nports) * - adap->params.nports; - s->rdmaciqs = max_t(int, s->rdmaciqs, adap->params.nports); - - if (!is_t4(adap->params.chip)) - s->niscsitq = s->iscsiqsets; + i = num_online_cpus(); + s->ofldqsets = roundup(i, adap->params.nports); + } else { + s->ofldqsets = adap->params.nports; + } } for (i = 0; i < ARRAY_SIZE(s->ethrxq); i++) { @@ -4434,47 +4074,8 @@ static void cfg_queues(struct adapter *adap) for (i = 0; i < ARRAY_SIZE(s->ofldtxq); i++) s->ofldtxq[i].q.size = 1024; - for (i = 0; i < ARRAY_SIZE(s->iscsirxq); i++) { - struct sge_ofld_rxq *r = &s->iscsirxq[i]; - - init_rspq(adap, &r->rspq, 5, 1, 1024, 64); - r->rspq.uld = CXGB4_ULD_ISCSI; - r->fl.size = 72; - } - - if (!is_t4(adap->params.chip)) { - for (i = 0; i < ARRAY_SIZE(s->iscsitrxq); i++) { - struct sge_ofld_rxq *r = &s->iscsitrxq[i]; - - init_rspq(adap, &r->rspq, 5, 1, 1024, 64); - r->rspq.uld = CXGB4_ULD_ISCSIT; - r->fl.size = 72; - } - } - - for (i = 0; i < ARRAY_SIZE(s->rdmarxq); i++) { - struct sge_ofld_rxq *r = &s->rdmarxq[i]; - - init_rspq(adap, &r->rspq, 5, 1, 511, 64); - r->rspq.uld = CXGB4_ULD_RDMA; - r->fl.size = 72; - } - - ciq_size = 64 + adap->vres.cq.size + adap->tids.nftids; - if (ciq_size > SGE_MAX_IQ_SIZE) { - CH_WARN(adap, "CIQ size too small for available IQs\n"); - ciq_size = SGE_MAX_IQ_SIZE; - } - - for (i = 0; i < ARRAY_SIZE(s->rdmaciq); i++) { - struct sge_ofld_rxq *r = &s->rdmaciq[i]; - - init_rspq(adap, &r->rspq, 5, 1, ciq_size, 64); - r->rspq.uld = CXGB4_ULD_RDMA; - } - init_rspq(adap, &s->fw_evtq, 0, 1, 1024, 64); - init_rspq(adap, &s->intrq, 0, 1, 2 * MAX_INGQ, 64); + init_rspq(adap, &s->intrq, 0, 1, 512, 64); } /* @@ -4505,42 +4106,90 @@ static void reduce_ethqs(struct adapter *adap, int n) } } +static int get_msix_info(struct adapter *adap) +{ + struct uld_msix_info *msix_info; + unsigned int max_ingq = 0; + + if (is_offload(adap)) + max_ingq += MAX_OFLD_QSETS * adap->num_ofld_uld; + if (is_pci_uld(adap)) + max_ingq += MAX_OFLD_QSETS * adap->num_uld; + + if (!max_ingq) + goto out; + + msix_info = kcalloc(max_ingq, sizeof(*msix_info), GFP_KERNEL); + if (!msix_info) + return -ENOMEM; + + adap->msix_bmap_ulds.msix_bmap = kcalloc(BITS_TO_LONGS(max_ingq), + sizeof(long), GFP_KERNEL); + if (!adap->msix_bmap_ulds.msix_bmap) { + kfree(msix_info); + return -ENOMEM; + } + spin_lock_init(&adap->msix_bmap_ulds.lock); + adap->msix_info_ulds = msix_info; +out: + return 0; +} + +static void free_msix_info(struct adapter *adap) +{ + if (!(adap->num_uld && adap->num_ofld_uld)) + return; + + kfree(adap->msix_info_ulds); + kfree(adap->msix_bmap_ulds.msix_bmap); +} + /* 2 MSI-X vectors needed for the FW queue and non-data interrupts */ #define EXTRA_VECS 2 static int enable_msix(struct adapter *adap) { - int ofld_need = 0; - int i, want, need, allocated; + int ofld_need = 0, uld_need = 0; + int i, j, want, need, allocated; struct sge *s = &adap->sge; unsigned int nchan = adap->params.nports; struct msix_entry *entries; + int max_ingq = MAX_INGQ; - entries = kmalloc(sizeof(*entries) * (MAX_INGQ + 1), + if (is_pci_uld(adap)) + max_ingq += (MAX_OFLD_QSETS * adap->num_uld); + if (is_offload(adap)) + max_ingq += (MAX_OFLD_QSETS * adap->num_ofld_uld); + entries = kmalloc(sizeof(*entries) * (max_ingq + 1), GFP_KERNEL); if (!entries) return -ENOMEM; - for (i = 0; i < MAX_INGQ + 1; ++i) + /* map for msix */ + if (get_msix_info(adap)) { + adap->params.offload = 0; + adap->params.crypto = 0; + } + + for (i = 0; i < max_ingq + 1; ++i) entries[i].entry = i; want = s->max_ethqsets + EXTRA_VECS; if (is_offload(adap)) { - want += s->rdmaqs + s->rdmaciqs + s->iscsiqsets + - s->niscsitq; - /* need nchan for each possible ULD */ - if (is_t4(adap->params.chip)) - ofld_need = 3 * nchan; - else - ofld_need = 4 * nchan; + want += adap->num_ofld_uld * s->ofldqsets; + ofld_need = adap->num_ofld_uld * nchan; + } + if (is_pci_uld(adap)) { + want += adap->num_uld * s->ofldqsets; + uld_need = adap->num_uld * nchan; } #ifdef CONFIG_CHELSIO_T4_DCB /* For Data Center Bridging we need 8 Ethernet TX Priority Queues for * each port. */ - need = 8 * adap->params.nports + EXTRA_VECS + ofld_need; + need = 8 * adap->params.nports + EXTRA_VECS + ofld_need + uld_need; #else - need = adap->params.nports + EXTRA_VECS + ofld_need; + need = adap->params.nports + EXTRA_VECS + ofld_need + uld_need; #endif allocated = pci_enable_msix_range(adap->pdev, entries, need, want); if (allocated < 0) { @@ -4554,33 +4203,31 @@ static int enable_msix(struct adapter *adap) * Every group gets its minimum requirement and NIC gets top * priority for leftovers. */ - i = allocated - EXTRA_VECS - ofld_need; + i = allocated - EXTRA_VECS - ofld_need - uld_need; if (i < s->max_ethqsets) { s->max_ethqsets = i; if (i < s->ethqsets) reduce_ethqs(adap, i); } - if (is_offload(adap)) { - if (allocated < want) { - s->rdmaqs = nchan; - s->rdmaciqs = nchan; + if (is_uld(adap)) { + if (allocated < want) + s->nqs_per_uld = nchan; + else + s->nqs_per_uld = s->ofldqsets; + } - if (!is_t4(adap->params.chip)) - s->niscsitq = nchan; + for (i = 0; i < (s->max_ethqsets + EXTRA_VECS); ++i) + adap->msix_info[i].vec = entries[i].vector; + if (is_uld(adap)) { + for (j = 0 ; i < allocated; ++i, j++) { + adap->msix_info_ulds[j].vec = entries[i].vector; + adap->msix_info_ulds[j].idx = i; } - - /* leftovers go to OFLD */ - i = allocated - EXTRA_VECS - s->max_ethqsets - - s->rdmaqs - s->rdmaciqs - s->niscsitq; - s->iscsiqsets = (i / nchan) * nchan; /* round down */ - + adap->msix_bmap_ulds.mapsize = j; } - for (i = 0; i < allocated; ++i) - adap->msix_info[i].vec = entries[i].vector; dev_info(adap->pdev_dev, "%d MSI-X vectors allocated, " - "nic %d iscsi %d rdma cpl %d rdma ciq %d\n", - allocated, s->max_ethqsets, s->iscsiqsets, s->rdmaqs, - s->rdmaciqs); + "nic %d per uld %d\n", + allocated, s->max_ethqsets, s->nqs_per_uld); kfree(entries); return 0; @@ -4794,7 +4441,9 @@ static void free_some_resources(struct adapter *adapter) unsigned int i; t4_free_mem(adapter->l2t); + t4_cleanup_sched(adapter); t4_free_mem(adapter->tids.tid_tab); + cxgb4_cleanup_tc_u32(adapter); kfree(adapter->sge.egr_map); kfree(adapter->sge.ingr_map); kfree(adapter->sge.starving_fl); @@ -4845,21 +4494,59 @@ static int get_chip_type(struct pci_dev *pdev, u32 pl_rev) } #ifdef CONFIG_PCI_IOV +static void dummy_setup(struct net_device *dev) +{ + dev->type = ARPHRD_NONE; + dev->mtu = 0; + dev->hard_header_len = 0; + dev->addr_len = 0; + dev->tx_queue_len = 0; + dev->flags |= IFF_NOARP; + dev->priv_flags |= IFF_NO_QUEUE; + + /* Initialize the device structure. */ + dev->netdev_ops = &cxgb4_mgmt_netdev_ops; + dev->ethtool_ops = &cxgb4_mgmt_ethtool_ops; + dev->destructor = free_netdev; +} + +static int config_mgmt_dev(struct pci_dev *pdev) +{ + struct adapter *adap = pci_get_drvdata(pdev); + struct net_device *netdev; + struct port_info *pi; + char name[IFNAMSIZ]; + int err; + + snprintf(name, IFNAMSIZ, "mgmtpf%d%d", adap->adap_idx, adap->pf); + netdev = alloc_netdev(0, name, NET_NAME_UNKNOWN, dummy_setup); + if (!netdev) + return -ENOMEM; + + pi = netdev_priv(netdev); + pi->adapter = adap; + SET_NETDEV_DEV(netdev, &pdev->dev); + + adap->port[0] = netdev; + + err = register_netdev(adap->port[0]); + if (err) { + pr_info("Unable to register VF mgmt netdev %s\n", name); + free_netdev(adap->port[0]); + adap->port[0] = NULL; + return err; + } + return 0; +} + static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs) { + struct adapter *adap = pci_get_drvdata(pdev); int err = 0; int current_vfs = pci_num_vf(pdev); u32 pcie_fw; - void __iomem *regs; - - regs = pci_ioremap_bar(pdev, 0); - if (!regs) { - dev_err(&pdev->dev, "cannot map device registers\n"); - return -ENOMEM; - } - pcie_fw = readl(regs + PCIE_FW_A); - iounmap(regs); + pcie_fw = readl(adap->regs + PCIE_FW_A); /* Check if cxgb4 is the MASTER and fw is initialized */ if (!(pcie_fw & PCIE_FW_INIT_F) || !(pcie_fw & PCIE_FW_MASTER_VLD_F) || @@ -4886,6 +4573,14 @@ static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs) */ if (!num_vfs) { pci_disable_sriov(pdev); + if (adap->port[0]) { + unregister_netdev(adap->port[0]); + adap->port[0] = NULL; + } + /* free VF resources */ + kfree(adap->vfinfo); + adap->vfinfo = NULL; + adap->num_vfs = 0; return num_vfs; } @@ -4893,7 +4588,17 @@ static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs) err = pci_enable_sriov(pdev, num_vfs); if (err) return err; + + adap->num_vfs = num_vfs; + err = config_mgmt_dev(pdev); + if (err) + return err; } + + adap->vfinfo = kcalloc(adap->num_vfs, + sizeof(struct vf_info), GFP_KERNEL); + if (adap->vfinfo) + fill_vf_station_mac_addr(adap); return num_vfs; } #endif @@ -4904,9 +4609,11 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) struct port_info *pi; bool highdma = false; struct adapter *adapter = NULL; + struct net_device *netdev; void __iomem *regs; u32 whoami, pl_rev; enum chip_type chip; + static int adap_idx = 1; printk_once(KERN_INFO "%s - version %s\n", DRV_DESC, DRV_VERSION); @@ -4941,7 +4648,9 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) func = CHELSIO_CHIP_VERSION(chip) <= CHELSIO_T5 ? SOURCEPF_G(whoami) : T6_SOURCEPF_G(whoami); if (func != ent->driver_data) { +#ifndef CONFIG_PCI_IOV iounmap(regs); +#endif pci_disable_device(pdev); pci_save_state(pdev); /* to restore SR-IOV later */ goto sriov; @@ -4973,6 +4682,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) err = -ENOMEM; goto out_unmap_bar0; } + adap_idx++; adapter->workq = create_singlethread_workqueue("cxgb4"); if (!adapter->workq) { @@ -5059,8 +4769,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) T6_STATMODE_V(0))); for_each_port(adapter, i) { - struct net_device *netdev; - netdev = alloc_etherdev_mq(sizeof(struct port_info), MAX_ETH_QSETS); if (!netdev) { @@ -5080,7 +4788,8 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->hw_features = NETIF_F_SG | TSO_FLAGS | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM | NETIF_F_RXHASH | - NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; + NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_TC; if (highdma) netdev->hw_features |= NETIF_F_HIGHDMA; netdev->features |= netdev->hw_features; @@ -5154,10 +4863,26 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) } } #endif - if (is_offload(adapter) && tid_init(&adapter->tids) < 0) { + + for_each_port(adapter, i) { + pi = adap2pinfo(adapter, i); + pi->sched_tbl = t4_init_sched(adapter->params.nsched_cls); + if (!pi->sched_tbl) + dev_warn(&pdev->dev, + "could not activate scheduling on port %d\n", + i); + } + + if (tid_init(&adapter->tids) < 0) { dev_warn(&pdev->dev, "could not allocate TID table, " "continuing\n"); adapter->params.offload = 0; + } else { + adapter->tc_u32 = cxgb4_init_tc_u32(adapter, + CXGB4_MAX_LINK_HANDLE); + if (!adapter->tc_u32) + dev_warn(&pdev->dev, + "could not offload tc u32, continuing\n"); } if (is_offload(adapter)) { @@ -5179,8 +4904,11 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* See what interrupts we'll be using */ if (msi > 1 && enable_msix(adapter) == 0) adapter->flags |= USING_MSIX; - else if (msi > 0 && pci_enable_msi(pdev) == 0) + else if (msi > 0 && pci_enable_msi(pdev) == 0) { adapter->flags |= USING_MSI; + if (msi > 1) + free_msix_info(adapter); + } /* check for PCI Express bandwidth capabiltites */ cxgb4_check_pcie_caps(adapter); @@ -5224,10 +4952,15 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* PCIe EEH recovery on powerpc platforms needs fundamental reset */ pdev->needs_freset = 1; - if (is_offload(adapter)) - attach_ulds(adapter); + if (is_uld(adapter)) { + mutex_lock(&uld_mutex); + list_add_tail(&adapter->list_node, &adapter_list); + mutex_unlock(&uld_mutex); + } print_adapter_info(adapter); + setup_fw_sge_queues(adapter); + return 0; sriov: #ifdef CONFIG_PCI_IOV @@ -5241,11 +4974,48 @@ sriov: "instantiated %u virtual functions\n", num_vf[func]); } -#endif + + adapter = kzalloc(sizeof(*adapter), GFP_KERNEL); + if (!adapter) { + err = -ENOMEM; + goto free_pci_region; + } + + adapter->pdev = pdev; + adapter->pdev_dev = &pdev->dev; + adapter->name = pci_name(pdev); + adapter->mbox = func; + adapter->pf = func; + adapter->regs = regs; + adapter->adap_idx = adap_idx; + adapter->mbox_log = kzalloc(sizeof(*adapter->mbox_log) + + (sizeof(struct mbox_cmd) * + T4_OS_LOG_MBOX_CMDS), + GFP_KERNEL); + if (!adapter->mbox_log) { + err = -ENOMEM; + goto free_adapter; + } + pci_set_drvdata(pdev, adapter); + return 0; + + free_adapter: + kfree(adapter); + free_pci_region: + iounmap(regs); + pci_disable_sriov(pdev); + pci_release_regions(pdev); + return err; +#else return 0; +#endif out_free_dev: free_some_resources(adapter); + if (adapter->flags & USING_MSIX) + free_msix_info(adapter); + if (adapter->num_uld || adapter->num_ofld_uld) + t4_uld_mem_free(adapter); out_unmap_bar: if (!is_t4(adapter->params.chip)) iounmap(adapter->bar2); @@ -5269,12 +5039,12 @@ static void remove_one(struct pci_dev *pdev) { struct adapter *adapter = pci_get_drvdata(pdev); -#ifdef CONFIG_PCI_IOV - pci_disable_sriov(pdev); - -#endif + if (!adapter) { + pci_release_regions(pdev); + return; + } - if (adapter) { + if (adapter->pf == 4) { int i; /* Tear down per-adapter Work Queue first since it can contain @@ -5282,7 +5052,7 @@ static void remove_one(struct pci_dev *pdev) */ destroy_workqueue(adapter->workq); - if (is_offload(adapter)) + if (is_uld(adapter)) detach_ulds(adapter); disable_interrupts(adapter); @@ -5296,17 +5066,15 @@ static void remove_one(struct pci_dev *pdev) /* If we allocated filters, free up state associated with any * valid filters ... */ - if (adapter->tids.ftid_tab) { - struct filter_entry *f = &adapter->tids.ftid_tab[0]; - for (i = 0; i < (adapter->tids.nftids + - adapter->tids.nsftids); i++, f++) - if (f->valid) - clear_filter(adapter, f); - } + clear_all_filters(adapter); if (adapter->flags & FULL_INIT_DONE) cxgb_down(adapter); + if (adapter->flags & USING_MSIX) + free_msix_info(adapter); + if (adapter->num_uld || adapter->num_ofld_uld) + t4_uld_mem_free(adapter); free_some_resources(adapter); #if IS_ENABLED(CONFIG_IPV6) t4_cleanup_clip_tbl(adapter); @@ -5323,8 +5091,64 @@ static void remove_one(struct pci_dev *pdev) kfree(adapter->mbox_log); synchronize_rcu(); kfree(adapter); - } else + } +#ifdef CONFIG_PCI_IOV + else { + if (adapter->port[0]) + unregister_netdev(adapter->port[0]); + iounmap(adapter->regs); + kfree(adapter->vfinfo); + kfree(adapter); + pci_disable_sriov(pdev); + pci_release_regions(pdev); + } +#endif +} + +/* "Shutdown" quiesces the device, stopping Ingress Packet and Interrupt + * delivery. This is essentially a stripped down version of the PCI remove() + * function where we do the minimal amount of work necessary to shutdown any + * further activity. + */ +static void shutdown_one(struct pci_dev *pdev) +{ + struct adapter *adapter = pci_get_drvdata(pdev); + + /* As with remove_one() above (see extended comment), we only want do + * do cleanup on PCI Devices which went all the way through init_one() + * ... + */ + if (!adapter) { + pci_release_regions(pdev); + return; + } + + if (adapter->pf == 4) { + int i; + + for_each_port(adapter, i) + if (adapter->port[i]->reg_state == NETREG_REGISTERED) + cxgb_close(adapter->port[i]); + + t4_uld_clean_up(adapter); + disable_interrupts(adapter); + disable_msi(adapter); + + t4_sge_stop(adapter); + if (adapter->flags & FW_OK) + t4_fw_bye(adapter, adapter->mbox); + } +#ifdef CONFIG_PCI_IOV + else { + if (adapter->port[0]) + unregister_netdev(adapter->port[0]); + iounmap(adapter->regs); + kfree(adapter->vfinfo); + kfree(adapter); + pci_disable_sriov(pdev); pci_release_regions(pdev); + } +#endif } static struct pci_driver cxgb4_driver = { @@ -5332,7 +5156,7 @@ static struct pci_driver cxgb4_driver = { .id_table = cxgb4_pci_tbl, .probe = init_one, .remove = remove_one, - .shutdown = remove_one, + .shutdown = shutdown_one, #ifdef CONFIG_PCI_IOV .sriov_configure = cxgb4_iov_configure, #endif |