diff options
Diffstat (limited to 'drivers/infiniband/hw/hns/hns_roce_hw_v2.c')
-rw-r--r-- | drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 596 |
1 files changed, 493 insertions, 103 deletions
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 543fa1504cd3..1c54390e1c85 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -587,7 +587,7 @@ out: roce_set_field(sq_db.parameter, V2_DB_PARAMETER_SL_M, V2_DB_PARAMETER_SL_S, qp->sl); - hns_roce_write64_k((__le32 *)&sq_db, qp->sq.db_reg_l); + hns_roce_write64(hr_dev, (__le32 *)&sq_db, qp->sq.db_reg_l); qp->sq_next_wqe = ind; qp->next_sge = sge_ind; @@ -712,6 +712,113 @@ out: return ret; } +static int hns_roce_v2_cmd_hw_reseted(struct hns_roce_dev *hr_dev, + unsigned long instance_stage, + unsigned long reset_stage) +{ + /* When hardware reset has been completed once or more, we should stop + * sending mailbox&cmq&doorbell to hardware. If now in .init_instance() + * function, we should exit with error. If now at HNAE3_INIT_CLIENT + * stage of soft reset process, we should exit with error, and then + * HNAE3_INIT_CLIENT related process can rollback the operation like + * notifing hardware to free resources, HNAE3_INIT_CLIENT related + * process will exit with error to notify NIC driver to reschedule soft + * reset process once again. + */ + hr_dev->is_reset = true; + hr_dev->dis_db = true; + + if (reset_stage == HNS_ROCE_STATE_RST_INIT || + instance_stage == HNS_ROCE_STATE_INIT) + return CMD_RST_PRC_EBUSY; + + return CMD_RST_PRC_SUCCESS; +} + +static int hns_roce_v2_cmd_hw_resetting(struct hns_roce_dev *hr_dev, + unsigned long instance_stage, + unsigned long reset_stage) +{ + struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv; + struct hnae3_handle *handle = priv->handle; + const struct hnae3_ae_ops *ops = handle->ae_algo->ops; + + /* When hardware reset is detected, we should stop sending mailbox&cmq& + * doorbell to hardware. If now in .init_instance() function, we should + * exit with error. If now at HNAE3_INIT_CLIENT stage of soft reset + * process, we should exit with error, and then HNAE3_INIT_CLIENT + * related process can rollback the operation like notifing hardware to + * free resources, HNAE3_INIT_CLIENT related process will exit with + * error to notify NIC driver to reschedule soft reset process once + * again. + */ + hr_dev->dis_db = true; + if (!ops->get_hw_reset_stat(handle)) + hr_dev->is_reset = true; + + if (!hr_dev->is_reset || reset_stage == HNS_ROCE_STATE_RST_INIT || + instance_stage == HNS_ROCE_STATE_INIT) + return CMD_RST_PRC_EBUSY; + + return CMD_RST_PRC_SUCCESS; +} + +static int hns_roce_v2_cmd_sw_resetting(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv; + struct hnae3_handle *handle = priv->handle; + const struct hnae3_ae_ops *ops = handle->ae_algo->ops; + + /* When software reset is detected at .init_instance() function, we + * should stop sending mailbox&cmq&doorbell to hardware, and exit + * with error. + */ + hr_dev->dis_db = true; + if (ops->ae_dev_reset_cnt(handle) != hr_dev->reset_cnt) + hr_dev->is_reset = true; + + return CMD_RST_PRC_EBUSY; +} + +static int hns_roce_v2_rst_process_cmd(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv; + struct hnae3_handle *handle = priv->handle; + const struct hnae3_ae_ops *ops = handle->ae_algo->ops; + unsigned long instance_stage; /* the current instance stage */ + unsigned long reset_stage; /* the current reset stage */ + unsigned long reset_cnt; + bool sw_resetting; + bool hw_resetting; + + if (hr_dev->is_reset) + return CMD_RST_PRC_SUCCESS; + + /* Get information about reset from NIC driver or RoCE driver itself, + * the meaning of the following variables from NIC driver are described + * as below: + * reset_cnt -- The count value of completed hardware reset. + * hw_resetting -- Whether hardware device is resetting now. + * sw_resetting -- Whether NIC's software reset process is running now. + */ + instance_stage = handle->rinfo.instance_state; + reset_stage = handle->rinfo.reset_state; + reset_cnt = ops->ae_dev_reset_cnt(handle); + hw_resetting = ops->get_hw_reset_stat(handle); + sw_resetting = ops->ae_dev_resetting(handle); + + if (reset_cnt != hr_dev->reset_cnt) + return hns_roce_v2_cmd_hw_reseted(hr_dev, instance_stage, + reset_stage); + else if (hw_resetting) + return hns_roce_v2_cmd_hw_resetting(hr_dev, instance_stage, + reset_stage); + else if (sw_resetting && instance_stage == HNS_ROCE_STATE_INIT) + return hns_roce_v2_cmd_sw_resetting(hr_dev); + + return 0; +} + static int hns_roce_cmq_space(struct hns_roce_v2_cmq_ring *ring) { int ntu = ring->next_to_use; @@ -892,8 +999,8 @@ static int hns_roce_cmq_csq_clean(struct hns_roce_dev *hr_dev) return clean; } -static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev, - struct hns_roce_cmq_desc *desc, int num) +static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, + struct hns_roce_cmq_desc *desc, int num) { struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv; struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq; @@ -905,9 +1012,6 @@ static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev, int ret = 0; int ntc; - if (hr_dev->is_reset) - return 0; - spin_lock_bh(&csq->lock); if (num > hns_roce_cmq_space(csq)) { @@ -982,6 +1086,30 @@ static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev, return ret; } +int hns_roce_cmq_send(struct hns_roce_dev *hr_dev, + struct hns_roce_cmq_desc *desc, int num) +{ + int retval; + int ret; + + ret = hns_roce_v2_rst_process_cmd(hr_dev); + if (ret == CMD_RST_PRC_SUCCESS) + return 0; + if (ret == CMD_RST_PRC_EBUSY) + return ret; + + ret = __hns_roce_cmq_send(hr_dev, desc, num); + if (ret) { + retval = hns_roce_v2_rst_process_cmd(hr_dev); + if (retval == CMD_RST_PRC_SUCCESS) + return 0; + else if (retval == CMD_RST_PRC_EBUSY) + return retval; + } + + return ret; +} + static int hns_roce_cmq_query_hw_info(struct hns_roce_dev *hr_dev) { struct hns_roce_query_version *resp; @@ -1078,6 +1206,44 @@ static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev) hr_dev->caps.sl_num = roce_get_field(req_b->qid_idx_sl_num, PF_RES_DATA_3_PF_SL_NUM_M, PF_RES_DATA_3_PF_SL_NUM_S); + hr_dev->caps.sccc_bt_num = roce_get_field(req_b->sccc_bt_idx_num, + PF_RES_DATA_4_PF_SCCC_BT_NUM_M, + PF_RES_DATA_4_PF_SCCC_BT_NUM_S); + + return 0; +} + +static int hns_roce_query_pf_timer_resource(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_pf_timer_res_a *req_a; + struct hns_roce_cmq_desc desc[2]; + int ret, i; + + for (i = 0; i < 2; i++) { + hns_roce_cmq_setup_basic_desc(&desc[i], + HNS_ROCE_OPC_QUERY_PF_TIMER_RES, + true); + + if (i == 0) + desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); + else + desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); + } + + ret = hns_roce_cmq_send(hr_dev, desc, 2); + if (ret) + return ret; + + req_a = (struct hns_roce_pf_timer_res_a *)desc[0].data; + + hr_dev->caps.qpc_timer_bt_num = + roce_get_field(req_a->qpc_timer_bt_idx_num, + PF_RES_DATA_1_PF_QPC_TIMER_BT_NUM_M, + PF_RES_DATA_1_PF_QPC_TIMER_BT_NUM_S); + hr_dev->caps.cqc_timer_bt_num = + roce_get_field(req_a->cqc_timer_bt_idx_num, + PF_RES_DATA_2_PF_CQC_TIMER_BT_NUM_M, + PF_RES_DATA_2_PF_CQC_TIMER_BT_NUM_S); return 0; } @@ -1193,6 +1359,14 @@ static int hns_roce_alloc_vf_resource(struct hns_roce_dev *hr_dev) VF_RES_B_DATA_3_VF_SL_NUM_M, VF_RES_B_DATA_3_VF_SL_NUM_S, HNS_ROCE_VF_SL_NUM); + + roce_set_field(req_b->vf_sccc_idx_num, + VF_RES_B_DATA_4_VF_SCCC_BT_IDX_M, + VF_RES_B_DATA_4_VF_SCCC_BT_IDX_S, 0); + roce_set_field(req_b->vf_sccc_idx_num, + VF_RES_B_DATA_4_VF_SCCC_BT_NUM_M, + VF_RES_B_DATA_4_VF_SCCC_BT_NUM_S, + HNS_ROCE_VF_SCCC_BT_NUM); } } @@ -1205,6 +1379,7 @@ static int hns_roce_v2_set_bt(struct hns_roce_dev *hr_dev) u8 qpc_hop_num = hr_dev->caps.qpc_hop_num; u8 cqc_hop_num = hr_dev->caps.cqc_hop_num; u8 mpt_hop_num = hr_dev->caps.mpt_hop_num; + u8 sccc_hop_num = hr_dev->caps.sccc_hop_num; struct hns_roce_cfg_bt_attr *req; struct hns_roce_cmq_desc desc; @@ -1252,6 +1427,20 @@ static int hns_roce_v2_set_bt(struct hns_roce_dev *hr_dev) CFG_BT_ATTR_DATA_3_VF_MPT_HOPNUM_S, mpt_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : mpt_hop_num); + roce_set_field(req->vf_sccc_cfg, + CFG_BT_ATTR_DATA_4_VF_SCCC_BA_PGSZ_M, + CFG_BT_ATTR_DATA_4_VF_SCCC_BA_PGSZ_S, + hr_dev->caps.sccc_ba_pg_sz + PG_SHIFT_OFFSET); + roce_set_field(req->vf_sccc_cfg, + CFG_BT_ATTR_DATA_4_VF_SCCC_BUF_PGSZ_M, + CFG_BT_ATTR_DATA_4_VF_SCCC_BUF_PGSZ_S, + hr_dev->caps.sccc_buf_pg_sz + PG_SHIFT_OFFSET); + roce_set_field(req->vf_sccc_cfg, + CFG_BT_ATTR_DATA_4_VF_SCCC_HOPNUM_M, + CFG_BT_ATTR_DATA_4_VF_SCCC_HOPNUM_S, + sccc_hop_num == + HNS_ROCE_HOP_NUM_0 ? 0 : sccc_hop_num); + return hns_roce_cmq_send(hr_dev, &desc, 1); } @@ -1289,6 +1478,16 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) return ret; } + if (hr_dev->pci_dev->revision == 0x21) { + ret = hns_roce_query_pf_timer_resource(hr_dev); + if (ret) { + dev_err(hr_dev->dev, + "Query pf timer resource fail, ret = %d.\n", + ret); + return ret; + } + } + ret = hns_roce_alloc_vf_resource(hr_dev); if (ret) { dev_err(hr_dev->dev, "Allocate vf resource fail, ret = %d.\n", @@ -1313,6 +1512,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->max_wqes = HNS_ROCE_V2_MAX_WQE_NUM; caps->num_cqs = HNS_ROCE_V2_MAX_CQ_NUM; caps->num_srqs = HNS_ROCE_V2_MAX_SRQ_NUM; + caps->min_cqes = HNS_ROCE_MIN_CQE_NUM; caps->max_cqes = HNS_ROCE_V2_MAX_CQE_NUM; caps->max_srqwqes = HNS_ROCE_V2_MAX_SRQWQE_NUM; caps->max_sq_sg = HNS_ROCE_V2_MAX_SQ_SGE_NUM; @@ -1366,7 +1566,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->mpt_ba_pg_sz = 0; caps->mpt_buf_pg_sz = 0; caps->mpt_hop_num = HNS_ROCE_CONTEXT_HOP_NUM; - caps->pbl_ba_pg_sz = 0; + caps->pbl_ba_pg_sz = 2; caps->pbl_buf_pg_sz = 0; caps->pbl_hop_num = HNS_ROCE_PBL_HOP_NUM; caps->mtt_ba_pg_sz = 0; @@ -1408,9 +1608,27 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->max_srq_wrs = HNS_ROCE_V2_MAX_SRQ_WR; caps->max_srq_sges = HNS_ROCE_V2_MAX_SRQ_SGE; - if (hr_dev->pci_dev->revision == 0x21) + if (hr_dev->pci_dev->revision == 0x21) { caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC | - HNS_ROCE_CAP_FLAG_SRQ; + HNS_ROCE_CAP_FLAG_SRQ | + HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL; + + caps->num_qpc_timer = HNS_ROCE_V2_MAX_QPC_TIMER_NUM; + caps->qpc_timer_entry_sz = HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ; + caps->qpc_timer_ba_pg_sz = 0; + caps->qpc_timer_buf_pg_sz = 0; + caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0; + caps->num_cqc_timer = HNS_ROCE_V2_MAX_CQC_TIMER_NUM; + caps->cqc_timer_entry_sz = HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ; + caps->cqc_timer_ba_pg_sz = 0; + caps->cqc_timer_buf_pg_sz = 0; + caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0; + + caps->sccc_entry_sz = HNS_ROCE_V2_SCCC_ENTRY_SZ; + caps->sccc_ba_pg_sz = 0; + caps->sccc_buf_pg_sz = 0; + caps->sccc_hop_num = HNS_ROCE_SCCC_HOP_NUM; + } ret = hns_roce_v2_set_bt(hr_dev); if (ret) @@ -1611,7 +1829,8 @@ static void hns_roce_free_link_table(struct hns_roce_dev *hr_dev, static int hns_roce_v2_init(struct hns_roce_dev *hr_dev) { struct hns_roce_v2_priv *priv = hr_dev->priv; - int ret; + int qpc_count, cqc_count; + int ret, i; /* TSQ includes SQ doorbell and ack doorbell */ ret = hns_roce_init_link_table(hr_dev, TSQ_LINK_TABLE); @@ -1626,8 +1845,40 @@ static int hns_roce_v2_init(struct hns_roce_dev *hr_dev) goto err_tpq_init_failed; } + /* Alloc memory for QPC Timer buffer space chunk*/ + for (qpc_count = 0; qpc_count < hr_dev->caps.qpc_timer_bt_num; + qpc_count++) { + ret = hns_roce_table_get(hr_dev, &hr_dev->qpc_timer_table, + qpc_count); + if (ret) { + dev_err(hr_dev->dev, "QPC Timer get failed\n"); + goto err_qpc_timer_failed; + } + } + + /* Alloc memory for CQC Timer buffer space chunk*/ + for (cqc_count = 0; cqc_count < hr_dev->caps.cqc_timer_bt_num; + cqc_count++) { + ret = hns_roce_table_get(hr_dev, &hr_dev->cqc_timer_table, + cqc_count); + if (ret) { + dev_err(hr_dev->dev, "CQC Timer get failed\n"); + goto err_cqc_timer_failed; + } + } + return 0; +err_cqc_timer_failed: + for (i = 0; i < cqc_count; i++) + hns_roce_table_put(hr_dev, &hr_dev->cqc_timer_table, i); + +err_qpc_timer_failed: + for (i = 0; i < qpc_count; i++) + hns_roce_table_put(hr_dev, &hr_dev->qpc_timer_table, i); + + hns_roce_free_link_table(hr_dev, &priv->tpq); + err_tpq_init_failed: hns_roce_free_link_table(hr_dev, &priv->tsq); @@ -1735,6 +1986,9 @@ static int hns_roce_v2_chk_mbox(struct hns_roce_dev *hr_dev, status = hns_roce_v2_cmd_complete(hr_dev); if (status != 0x1) { + if (status == CMD_RST_PRC_EBUSY) + return status; + dev_err(dev, "mailbox status 0x%x!\n", status); return -EBUSY; } @@ -1831,12 +2085,10 @@ static int hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, static int set_mtpt_pbl(struct hns_roce_v2_mpt_entry *mpt_entry, struct hns_roce_mr *mr) { - struct scatterlist *sg; + struct sg_dma_page_iter sg_iter; u64 page_addr; u64 *pages; - int i, j; - int len; - int entry; + int i; mpt_entry->pbl_size = cpu_to_le32(mr->pbl_size); mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(mr->pbl_ba >> 3)); @@ -1849,17 +2101,14 @@ static int set_mtpt_pbl(struct hns_roce_v2_mpt_entry *mpt_entry, return -ENOMEM; i = 0; - for_each_sg(mr->umem->sg_head.sgl, sg, mr->umem->nmap, entry) { - len = sg_dma_len(sg) >> PAGE_SHIFT; - for (j = 0; j < len; ++j) { - page_addr = sg_dma_address(sg) + - (j << mr->umem->page_shift); - pages[i] = page_addr >> 6; - /* Record the first 2 entry directly to MTPT table */ - if (i >= HNS_ROCE_V2_MAX_INNER_MTPT_NUM - 1) - goto found; - i++; - } + for_each_sg_dma_page(mr->umem->sg_head.sgl, &sg_iter, mr->umem->nmap, 0) { + page_addr = sg_page_iter_dma_address(&sg_iter); + pages[i] = page_addr >> 6; + + /* Record the first 2 entry directly to MTPT table */ + if (i >= HNS_ROCE_V2_MAX_INNER_MTPT_NUM - 1) + goto found; + i++; } found: mpt_entry->pa0_l = cpu_to_le32(lower_32_bits(pages[0])); @@ -1941,6 +2190,9 @@ static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev, struct hns_roce_v2_mpt_entry *mpt_entry = mb_buf; int ret = 0; + roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M, + V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_VALID); + if (flags & IB_MR_REREG_PD) { roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M, V2_MPT_BYTE_4_PD_S, pdn); @@ -2245,6 +2497,7 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev, static int hns_roce_v2_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) { + struct hns_roce_dev *hr_dev = to_hr_dev(ibcq->device); struct hns_roce_cq *hr_cq = to_hr_cq(ibcq); u32 notification_flag; u32 doorbell[2]; @@ -2270,7 +2523,7 @@ static int hns_roce_v2_req_notify_cq(struct ib_cq *ibcq, roce_set_bit(doorbell[1], V2_CQ_DB_PARAMETER_NOTIFY_S, notification_flag); - hns_roce_write64_k(doorbell, hr_cq->cq_db_l); + hns_roce_write64(hr_dev, doorbell, hr_cq->cq_db_l); return 0; } @@ -2663,17 +2916,33 @@ static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev, case HEM_TYPE_SRQC: op = HNS_ROCE_CMD_WRITE_SRQC_BT0; break; + case HEM_TYPE_SCCC: + op = HNS_ROCE_CMD_WRITE_SCCC_BT0; + break; + case HEM_TYPE_QPC_TIMER: + op = HNS_ROCE_CMD_WRITE_QPC_TIMER_BT0; + break; + case HEM_TYPE_CQC_TIMER: + op = HNS_ROCE_CMD_WRITE_CQC_TIMER_BT0; + break; default: dev_warn(dev, "Table %d not to be written by mailbox!\n", table->type); return 0; } + + if (table->type == HEM_TYPE_SCCC && step_idx) + return 0; + op += step_idx; mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); + if (table->type == HEM_TYPE_SCCC) + obj = mhop.l0_idx; + if (check_whether_last_step(hop_num, step_idx)) { hem = table->hem[hem_idx]; for (hns_roce_hem_first(hem, &iter); @@ -2722,6 +2991,10 @@ static int hns_roce_v2_clear_hem(struct hns_roce_dev *hr_dev, case HEM_TYPE_CQC: op = HNS_ROCE_CMD_DESTROY_CQC_BT0; break; + case HEM_TYPE_SCCC: + case HEM_TYPE_QPC_TIMER: + case HEM_TYPE_CQC_TIMER: + break; case HEM_TYPE_SRQC: op = HNS_ROCE_CMD_DESTROY_SRQC_BT0; break; @@ -2730,6 +3003,12 @@ static int hns_roce_v2_clear_hem(struct hns_roce_dev *hr_dev, table->type); return 0; } + + if (table->type == HEM_TYPE_SCCC || + table->type == HEM_TYPE_QPC_TIMER || + table->type == HEM_TYPE_CQC_TIMER) + return 0; + op += step_idx; mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); @@ -3686,10 +3965,16 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, V2_QPC_BYTE_212_LSN_S, 0); if (attr_mask & IB_QP_TIMEOUT) { - roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_AT_M, - V2_QPC_BYTE_28_AT_S, attr->timeout); - roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_AT_M, - V2_QPC_BYTE_28_AT_S, 0); + if (attr->timeout < 31) { + roce_set_field(context->byte_28_at_fl, + V2_QPC_BYTE_28_AT_M, V2_QPC_BYTE_28_AT_S, + attr->timeout); + roce_set_field(qpc_mask->byte_28_at_fl, + V2_QPC_BYTE_28_AT_M, V2_QPC_BYTE_28_AT_S, + 0); + } else { + dev_warn(dev, "Local ACK timeout shall be 0 to 30.\n"); + } } roce_set_field(context->byte_172_sq_psn, V2_QPC_BYTE_172_SQ_CUR_PSN_M, @@ -3742,7 +4027,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, struct device *dev = hr_dev->dev; int ret = -EINVAL; - context = kcalloc(2, sizeof(*context), GFP_KERNEL); + context = kcalloc(2, sizeof(*context), GFP_ATOMIC); if (!context) return -ENOMEM; @@ -3789,13 +4074,16 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, roce_set_field(qpc_mask->byte_160_sq_ci_pi, V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M, V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S, 0); - roce_set_field(context->byte_84_rq_ci_pi, + + if (!ibqp->srq) { + roce_set_field(context->byte_84_rq_ci_pi, V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M, V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, hr_qp->rq.head); - roce_set_field(qpc_mask->byte_84_rq_ci_pi, + roce_set_field(qpc_mask->byte_84_rq_ci_pi, V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M, V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, 0); + } } if (attr_mask & IB_QP_AV) { @@ -4224,6 +4512,59 @@ static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp) return 0; } +static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp) +{ + struct hns_roce_sccc_clr_done *resp; + struct hns_roce_sccc_clr *clr; + struct hns_roce_cmq_desc desc; + int ret, i; + + mutex_lock(&hr_dev->qp_table.scc_mutex); + + /* set scc ctx clear done flag */ + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_RESET_SCCC, false); + ret = hns_roce_cmq_send(hr_dev, &desc, 1); + if (ret) { + dev_err(hr_dev->dev, "Reset SCC ctx failed(%d)\n", ret); + goto out; + } + + /* clear scc context */ + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CLR_SCCC, false); + clr = (struct hns_roce_sccc_clr *)desc.data; + clr->qpn = cpu_to_le32(hr_qp->qpn); + ret = hns_roce_cmq_send(hr_dev, &desc, 1); + if (ret) { + dev_err(hr_dev->dev, "Clear SCC ctx failed(%d)\n", ret); + goto out; + } + + /* query scc context clear is done or not */ + resp = (struct hns_roce_sccc_clr_done *)desc.data; + for (i = 0; i <= HNS_ROCE_CMQ_SCC_CLR_DONE_CNT; i++) { + hns_roce_cmq_setup_basic_desc(&desc, + HNS_ROCE_OPC_QUERY_SCCC, true); + ret = hns_roce_cmq_send(hr_dev, &desc, 1); + if (ret) { + dev_err(hr_dev->dev, "Query clr cmq failed(%d)\n", ret); + goto out; + } + + if (resp->clr_done) + goto out; + + msleep(20); + } + + dev_err(hr_dev->dev, "Query SCC clr done flag overtime.\n"); + ret = -ETIMEDOUT; + +out: + mutex_unlock(&hr_dev->qp_table.scc_mutex); + return ret; +} + static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) { struct hns_roce_dev *hr_dev = to_hr_dev(cq->device); @@ -4281,7 +4622,8 @@ static void hns_roce_set_qps_to_err(struct hns_roce_dev *hr_dev, u32 qpn) if (hr_qp->ibqp.uobject) { if (hr_qp->sdb_en == 1) { hr_qp->sq.head = *(int *)(hr_qp->sdb.virt_addr); - hr_qp->rq.head = *(int *)(hr_qp->rdb.virt_addr); + if (hr_qp->rdb_en == 1) + hr_qp->rq.head = *(int *)(hr_qp->rdb.virt_addr); } else { dev_warn(hr_dev->dev, "flush cqe is unsupported in userspace!\n"); return; @@ -4319,64 +4661,19 @@ static void hns_roce_irq_work_handle(struct work_struct *work) dev_warn(dev, "Send queue drained.\n"); break; case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: - dev_err(dev, "Local work queue catastrophic error.\n"); + dev_err(dev, "Local work queue 0x%x catas error, sub_type:%d\n", + qpn, irq_work->sub_type); hns_roce_set_qps_to_err(irq_work->hr_dev, qpn); - switch (irq_work->sub_type) { - case HNS_ROCE_LWQCE_QPC_ERROR: - dev_err(dev, "QP %d, QPC error.\n", qpn); - break; - case HNS_ROCE_LWQCE_MTU_ERROR: - dev_err(dev, "QP %d, MTU error.\n", qpn); - break; - case HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR: - dev_err(dev, "QP %d, WQE BA addr error.\n", qpn); - break; - case HNS_ROCE_LWQCE_WQE_ADDR_ERROR: - dev_err(dev, "QP %d, WQE addr error.\n", qpn); - break; - case HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR: - dev_err(dev, "QP %d, WQE shift error.\n", qpn); - break; - default: - dev_err(dev, "Unhandled sub_event type %d.\n", - irq_work->sub_type); - break; - } break; case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: - dev_err(dev, "Invalid request local work queue error.\n"); + dev_err(dev, "Invalid request local work queue 0x%x error.\n", + qpn); hns_roce_set_qps_to_err(irq_work->hr_dev, qpn); break; case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: - dev_err(dev, "Local access violation work queue error.\n"); + dev_err(dev, "Local access violation work queue 0x%x error, sub_type:%d\n", + qpn, irq_work->sub_type); hns_roce_set_qps_to_err(irq_work->hr_dev, qpn); - switch (irq_work->sub_type) { - case HNS_ROCE_LAVWQE_R_KEY_VIOLATION: - dev_err(dev, "QP %d, R_key violation.\n", qpn); - break; - case HNS_ROCE_LAVWQE_LENGTH_ERROR: - dev_err(dev, "QP %d, length error.\n", qpn); - break; - case HNS_ROCE_LAVWQE_VA_ERROR: - dev_err(dev, "QP %d, VA error.\n", qpn); - break; - case HNS_ROCE_LAVWQE_PD_ERROR: - dev_err(dev, "QP %d, PD error.\n", qpn); - break; - case HNS_ROCE_LAVWQE_RW_ACC_ERROR: - dev_err(dev, "QP %d, rw acc error.\n", qpn); - break; - case HNS_ROCE_LAVWQE_KEY_STATE_ERROR: - dev_err(dev, "QP %d, key state error.\n", qpn); - break; - case HNS_ROCE_LAVWQE_MR_OPERATION_ERROR: - dev_err(dev, "QP %d, MR operation error.\n", qpn); - break; - default: - dev_err(dev, "Unhandled sub_event type %d.\n", - irq_work->sub_type); - break; - } break; case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: dev_warn(dev, "SRQ limit reach.\n"); @@ -4427,6 +4724,7 @@ static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev, static void set_eq_cons_index_v2(struct hns_roce_eq *eq) { + struct hns_roce_dev *hr_dev = eq->hr_dev; u32 doorbell[2]; doorbell[0] = 0; @@ -4453,7 +4751,7 @@ static void set_eq_cons_index_v2(struct hns_roce_eq *eq) HNS_ROCE_V2_EQ_DB_PARA_S, (eq->cons_index & HNS_ROCE_V2_CONS_IDX_M)); - hns_roce_write64_k(doorbell, eq->doorbell); + hns_roce_write64(hr_dev, doorbell, eq->doorbell); } static struct hns_roce_aeqe *get_aeqe_v2(struct hns_roce_eq *eq, u32 entry) @@ -4568,7 +4866,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, dev_err(dev, "Unhandled event %d on EQ %d at idx %u.\n", event_type, eq->eqn, eq->cons_index); break; - }; + } eq->event_type = event_type; eq->sub_type = sub_type; @@ -4692,11 +4990,22 @@ static irqreturn_t hns_roce_v2_msix_interrupt_abn(int irq, void *dev_id) int_en = roce_read(hr_dev, ROCEE_VF_ABN_INT_EN_REG); if (roce_get_bit(int_st, HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S)) { + struct pci_dev *pdev = hr_dev->pci_dev; + struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev); + const struct hnae3_ae_ops *ops = ae_dev->ops; + dev_err(dev, "AEQ overflow!\n"); roce_set_bit(int_st, HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S, 1); roce_write(hr_dev, ROCEE_VF_ABN_INT_ST_REG, int_st); + /* Set reset level for reset_event() */ + if (ops->set_default_reset_request) + ops->set_default_reset_request(ae_dev, + HNAE3_FUNC_RESET); + if (ops->reset_event) + ops->reset_event(pdev, NULL); + roce_set_bit(int_en, HNS_ROCE_V2_VF_ABN_INT_EN_S, 1); roce_write(hr_dev, ROCEE_VF_ABN_INT_EN_REG, int_en); @@ -5599,7 +5908,7 @@ static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq, return 0; } -int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) +static int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) { struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device); struct hns_roce_srq *srq = to_hr_srq(ibsrq); @@ -5664,6 +5973,7 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr) { + struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device); struct hns_roce_srq *srq = to_hr_srq(ibsrq); struct hns_roce_v2_wqe_data_seg *dseg; struct hns_roce_v2_db srq_db; @@ -5725,7 +6035,7 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, srq_db.byte_4 = HNS_ROCE_V2_SRQ_DB << 24 | srq->srqn; srq_db.parameter = srq->head; - hns_roce_write64_k((__le32 *)&srq_db, srq->db_reg_l); + hns_roce_write64(hr_dev, (__le32 *)&srq_db, srq->db_reg_l); } @@ -5758,6 +6068,7 @@ static const struct hns_roce_hw hns_roce_hw_v2 = { .hw_exit = hns_roce_v2_exit, .post_mbox = hns_roce_v2_post_mbox, .chk_mbox = hns_roce_v2_chk_mbox, + .rst_prc_mbox = hns_roce_v2_rst_process_cmd, .set_gid = hns_roce_v2_set_gid, .set_mac = hns_roce_v2_set_mac, .write_mtpt = hns_roce_v2_write_mtpt, @@ -5770,6 +6081,7 @@ static const struct hns_roce_hw hns_roce_hw_v2 = { .modify_qp = hns_roce_v2_modify_qp, .query_qp = hns_roce_v2_query_qp, .destroy_qp = hns_roce_v2_destroy_qp, + .qp_flow_control_init = hns_roce_v2_qp_flow_control_init, .modify_cq = hns_roce_v2_modify_cq, .post_send = hns_roce_v2_post_send, .post_recv = hns_roce_v2_post_recv, @@ -5800,6 +6112,7 @@ MODULE_DEVICE_TABLE(pci, hns_roce_hw_v2_pci_tbl); static int hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev, struct hnae3_handle *handle) { + struct hns_roce_v2_priv *priv = hr_dev->priv; const struct pci_device_id *id; int i; @@ -5830,15 +6143,18 @@ static int hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev, hr_dev->cmd_mod = 1; hr_dev->loop_idc = 0; + hr_dev->reset_cnt = handle->ae_algo->ops->ae_dev_reset_cnt(handle); + priv->handle = handle; + return 0; } -static int hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) +static int __hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) { struct hns_roce_dev *hr_dev; int ret; - hr_dev = (struct hns_roce_dev *)ib_alloc_device(sizeof(*hr_dev)); + hr_dev = ib_alloc_device(hns_roce_dev, ib_dev); if (!hr_dev) return -ENOMEM; @@ -5850,7 +6166,6 @@ static int hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) hr_dev->pci_dev = handle->pdev; hr_dev->dev = &handle->pdev->dev; - handle->priv = hr_dev; ret = hns_roce_hw_v2_get_cfg(hr_dev, handle); if (ret) { @@ -5864,6 +6179,8 @@ static int hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) goto error_failed_get_cfg; } + handle->priv = hr_dev; + return 0; error_failed_get_cfg: @@ -5875,7 +6192,7 @@ error_failed_kzalloc: return ret; } -static void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle, +static void __hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle, bool reset) { struct hns_roce_dev *hr_dev = (struct hns_roce_dev *)handle->priv; @@ -5883,24 +6200,79 @@ static void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle, if (!hr_dev) return; + handle->priv = NULL; hns_roce_exit(hr_dev); kfree(hr_dev->priv); ib_dealloc_device(&hr_dev->ib_dev); } +static int hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) +{ + const struct hnae3_ae_ops *ops = handle->ae_algo->ops; + struct device *dev = &handle->pdev->dev; + int ret; + + handle->rinfo.instance_state = HNS_ROCE_STATE_INIT; + + if (ops->ae_dev_resetting(handle) || ops->get_hw_reset_stat(handle)) { + handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT; + goto reset_chk_err; + } + + ret = __hns_roce_hw_v2_init_instance(handle); + if (ret) { + handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT; + dev_err(dev, "RoCE instance init failed! ret = %d\n", ret); + if (ops->ae_dev_resetting(handle) || + ops->get_hw_reset_stat(handle)) + goto reset_chk_err; + else + return ret; + } + + handle->rinfo.instance_state = HNS_ROCE_STATE_INITED; + + + return 0; + +reset_chk_err: + dev_err(dev, "Device is busy in resetting state.\n" + "please retry later.\n"); + + return -EBUSY; +} + +static void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle, + bool reset) +{ + if (handle->rinfo.instance_state != HNS_ROCE_STATE_INITED) + return; + + handle->rinfo.instance_state = HNS_ROCE_STATE_UNINIT; + + __hns_roce_hw_v2_uninit_instance(handle, reset); + + handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT; +} static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle) { - struct hns_roce_dev *hr_dev = (struct hns_roce_dev *)handle->priv; + struct hns_roce_dev *hr_dev; struct ib_event event; - if (!hr_dev) { - dev_err(&handle->pdev->dev, - "Input parameter handle->priv is NULL!\n"); - return -EINVAL; + if (handle->rinfo.instance_state != HNS_ROCE_STATE_INITED) { + set_bit(HNS_ROCE_RST_DIRECT_RETURN, &handle->rinfo.state); + return 0; } + handle->rinfo.reset_state = HNS_ROCE_STATE_RST_DOWN; + clear_bit(HNS_ROCE_RST_DIRECT_RETURN, &handle->rinfo.state); + + hr_dev = (struct hns_roce_dev *)handle->priv; + if (!hr_dev) + return 0; + hr_dev->active = false; - hr_dev->is_reset = true; + hr_dev->dis_db = true; event.event = IB_EVENT_DEVICE_FATAL; event.device = &hr_dev->ib_dev; @@ -5912,17 +6284,29 @@ static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle) static int hns_roce_hw_v2_reset_notify_init(struct hnae3_handle *handle) { + struct device *dev = &handle->pdev->dev; int ret; - ret = hns_roce_hw_v2_init_instance(handle); + if (test_and_clear_bit(HNS_ROCE_RST_DIRECT_RETURN, + &handle->rinfo.state)) { + handle->rinfo.reset_state = HNS_ROCE_STATE_RST_INITED; + return 0; + } + + handle->rinfo.reset_state = HNS_ROCE_STATE_RST_INIT; + + dev_info(&handle->pdev->dev, "In reset process RoCE client reinit.\n"); + ret = __hns_roce_hw_v2_init_instance(handle); if (ret) { /* when reset notify type is HNAE3_INIT_CLIENT In reset notify * callback function, RoCE Engine reinitialize. If RoCE reinit * failed, we should inform NIC driver. */ handle->priv = NULL; - dev_err(&handle->pdev->dev, - "In reset process RoCE reinit failed %d.\n", ret); + dev_err(dev, "In reset process RoCE reinit failed %d.\n", ret); + } else { + handle->rinfo.reset_state = HNS_ROCE_STATE_RST_INITED; + dev_info(dev, "Reset done, RoCE client reinit finished.\n"); } return ret; @@ -5930,8 +6314,14 @@ static int hns_roce_hw_v2_reset_notify_init(struct hnae3_handle *handle) static int hns_roce_hw_v2_reset_notify_uninit(struct hnae3_handle *handle) { + if (test_bit(HNS_ROCE_RST_DIRECT_RETURN, &handle->rinfo.state)) + return 0; + + handle->rinfo.reset_state = HNS_ROCE_STATE_RST_UNINIT; + dev_info(&handle->pdev->dev, "In reset process RoCE client uninit.\n"); msleep(100); - hns_roce_hw_v2_uninit_instance(handle, false); + __hns_roce_hw_v2_uninit_instance(handle, false); + return 0; } |