diff options
23 files changed, 859 insertions, 269 deletions
diff --git a/Documentation/target/target-export-device b/Documentation/target/target-export-device new file mode 100755 index 000000000000..b803f4f886b5 --- /dev/null +++ b/Documentation/target/target-export-device @@ -0,0 +1,80 @@ +#!/bin/sh +# +# This script illustrates the sequence of operations in configfs to +# create a very simple LIO iSCSI target with a file or block device +# backstore. +# +# (C) Copyright 2014 Christophe Vu-Brugier <cvubrugier@fastmail.fm> +# + +print_usage() { + cat <<EOF +Usage: $(basename $0) [-p PORTAL] DEVICE|FILE +Export a block device or a file as an iSCSI target with a single LUN +EOF +} + +die() { + echo $1 + exit 1 +} + +while getopts "hp:" arg; do + case $arg in + h) print_usage; exit 0;; + p) PORTAL=${OPTARG};; + esac +done +shift $(($OPTIND - 1)) + +DEVICE=$1 +[ -n "$DEVICE" ] || die "Missing device or file argument" +[ -b $DEVICE -o -f $DEVICE ] || die "Invalid device or file: ${DEVICE}" +IQN="iqn.2003-01.org.linux-iscsi.$(hostname):$(basename $DEVICE)" +[ -n "$PORTAL" ] || PORTAL="0.0.0.0:3260" + +CONFIGFS=/sys/kernel/config +CORE_DIR=$CONFIGFS/target/core +ISCSI_DIR=$CONFIGFS/target/iscsi + +# Load the target modules and mount the config file system +lsmod | grep -q configfs || modprobe configfs +lsmod | grep -q target_core_mod || modprobe target_core_mod +mount | grep -q ^configfs || mount -t configfs none $CONFIGFS +mkdir -p $ISCSI_DIR + +# Create a backstore +if [ -b $DEVICE ]; then + BACKSTORE_DIR=$CORE_DIR/iblock_0/data + mkdir -p $BACKSTORE_DIR + echo "udev_path=${DEVICE}" > $BACKSTORE_DIR/control +else + BACKSTORE_DIR=$CORE_DIR/fileio_0/data + mkdir -p $BACKSTORE_DIR + DEVICE_SIZE=$(du -b $DEVICE | cut -f1) + echo "fd_dev_name=${DEVICE}" > $BACKSTORE_DIR/control + echo "fd_dev_size=${DEVICE_SIZE}" > $BACKSTORE_DIR/control + echo 1 > $BACKSTORE_DIR/attrib/emulate_write_cache +fi +echo 1 > $BACKSTORE_DIR/enable + +# Create an iSCSI target and a target portal group (TPG) +mkdir $ISCSI_DIR/$IQN +mkdir $ISCSI_DIR/$IQN/tpgt_1/ + +# Create a LUN +mkdir $ISCSI_DIR/$IQN/tpgt_1/lun/lun_0 +ln -s $BACKSTORE_DIR $ISCSI_DIR/$IQN/tpgt_1/lun/lun_0/data +echo 1 > $ISCSI_DIR/$IQN/tpgt_1/enable + +# Create a network portal +mkdir $ISCSI_DIR/$IQN/tpgt_1/np/$PORTAL + +# Disable authentication +echo 0 > $ISCSI_DIR/$IQN/tpgt_1/attrib/authentication +echo 1 > $ISCSI_DIR/$IQN/tpgt_1/attrib/generate_node_acls + +# Allow write access for non authenticated initiators +echo 0 > $ISCSI_DIR/$IQN/tpgt_1/attrib/demo_mode_write_protect + +echo "Target ${IQN}, portal ${PORTAL} has been created" diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 7d6c199de2d6..1ced0731c140 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -2302,12 +2302,8 @@ static void srpt_queue_response(struct se_cmd *cmd) } spin_unlock_irqrestore(&ioctx->spinlock, flags); - if (unlikely(transport_check_aborted_status(&ioctx->cmd, false) - || WARN_ON_ONCE(state == SRPT_STATE_CMD_RSP_SENT))) { - atomic_inc(&ch->req_lim_delta); - srpt_abort_cmd(ioctx); + if (unlikely(WARN_ON_ONCE(state == SRPT_STATE_CMD_RSP_SENT))) return; - } /* For read commands, transfer the data to the initiator. */ if (ioctx->cmd.data_direction == DMA_FROM_DEVICE && @@ -2689,7 +2685,8 @@ static void srpt_release_cmd(struct se_cmd *se_cmd) struct srpt_rdma_ch *ch = ioctx->ch; unsigned long flags; - WARN_ON(ioctx->state != SRPT_STATE_DONE); + WARN_ON_ONCE(ioctx->state != SRPT_STATE_DONE && + !(ioctx->cmd.transport_state & CMD_T_ABORTED)); if (ioctx->n_rw_ctx) { srpt_free_rw_ctxs(ch, ioctx); diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c index 0f807798c624..d390325c99ec 100644 --- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c +++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c @@ -1170,6 +1170,7 @@ static struct ibmvscsis_cmd *ibmvscsis_get_free_cmd(struct scsi_info *vscsi) cmd = list_first_entry_or_null(&vscsi->free_cmd, struct ibmvscsis_cmd, list); if (cmd) { + cmd->flags &= ~(DELAY_SEND); list_del(&cmd->list); cmd->iue = iue; cmd->type = UNSET_TYPE; @@ -1749,45 +1750,79 @@ static void srp_snd_msg_failed(struct scsi_info *vscsi, long rc) static void ibmvscsis_send_messages(struct scsi_info *vscsi) { u64 msg_hi = 0; - /* note do not attmempt to access the IU_data_ptr with this pointer + /* note do not attempt to access the IU_data_ptr with this pointer * it is not valid */ struct viosrp_crq *crq = (struct viosrp_crq *)&msg_hi; struct ibmvscsis_cmd *cmd, *nxt; struct iu_entry *iue; long rc = ADAPT_SUCCESS; + bool retry = false; if (!(vscsi->flags & RESPONSE_Q_DOWN)) { - list_for_each_entry_safe(cmd, nxt, &vscsi->waiting_rsp, list) { - iue = cmd->iue; + do { + retry = false; + list_for_each_entry_safe(cmd, nxt, &vscsi->waiting_rsp, + list) { + /* + * Check to make sure abort cmd gets processed + * prior to the abort tmr cmd + */ + if (cmd->flags & DELAY_SEND) + continue; - crq->valid = VALID_CMD_RESP_EL; - crq->format = cmd->rsp.format; + if (cmd->abort_cmd) { + retry = true; + cmd->abort_cmd->flags &= ~(DELAY_SEND); + } - if (cmd->flags & CMD_FAST_FAIL) - crq->status = VIOSRP_ADAPTER_FAIL; + /* + * If CMD_T_ABORTED w/o CMD_T_TAS scenarios and + * the case where LIO issued a + * ABORT_TASK: Sending TMR_TASK_DOES_NOT_EXIST + * case then we dont send a response, since it + * was already done. + */ + if (cmd->se_cmd.transport_state & CMD_T_ABORTED && + !(cmd->se_cmd.transport_state & CMD_T_TAS)) { + list_del(&cmd->list); + ibmvscsis_free_cmd_resources(vscsi, + cmd); + } else { + iue = cmd->iue; - crq->IU_length = cpu_to_be16(cmd->rsp.len); + crq->valid = VALID_CMD_RESP_EL; + crq->format = cmd->rsp.format; - rc = h_send_crq(vscsi->dma_dev->unit_address, - be64_to_cpu(msg_hi), - be64_to_cpu(cmd->rsp.tag)); + if (cmd->flags & CMD_FAST_FAIL) + crq->status = VIOSRP_ADAPTER_FAIL; - pr_debug("send_messages: cmd %p, tag 0x%llx, rc %ld\n", - cmd, be64_to_cpu(cmd->rsp.tag), rc); + crq->IU_length = cpu_to_be16(cmd->rsp.len); - /* if all ok free up the command element resources */ - if (rc == H_SUCCESS) { - /* some movement has occurred */ - vscsi->rsp_q_timer.timer_pops = 0; - list_del(&cmd->list); + rc = h_send_crq(vscsi->dma_dev->unit_address, + be64_to_cpu(msg_hi), + be64_to_cpu(cmd->rsp.tag)); - ibmvscsis_free_cmd_resources(vscsi, cmd); - } else { - srp_snd_msg_failed(vscsi, rc); - break; + pr_debug("send_messages: cmd %p, tag 0x%llx, rc %ld\n", + cmd, be64_to_cpu(cmd->rsp.tag), rc); + + /* if all ok free up the command + * element resources + */ + if (rc == H_SUCCESS) { + /* some movement has occurred */ + vscsi->rsp_q_timer.timer_pops = 0; + list_del(&cmd->list); + + ibmvscsis_free_cmd_resources(vscsi, + cmd); + } else { + srp_snd_msg_failed(vscsi, rc); + break; + } + } } - } + } while (retry); if (!rc) { /* @@ -2708,6 +2743,7 @@ static int ibmvscsis_alloc_cmds(struct scsi_info *vscsi, int num) for (i = 0, cmd = (struct ibmvscsis_cmd *)vscsi->cmd_pool; i < num; i++, cmd++) { + cmd->abort_cmd = NULL; cmd->adapter = vscsi; INIT_WORK(&cmd->work, ibmvscsis_scheduler); list_add_tail(&cmd->list, &vscsi->free_cmd); @@ -3579,9 +3615,20 @@ static int ibmvscsis_write_pending(struct se_cmd *se_cmd) { struct ibmvscsis_cmd *cmd = container_of(se_cmd, struct ibmvscsis_cmd, se_cmd); + struct scsi_info *vscsi = cmd->adapter; struct iu_entry *iue = cmd->iue; int rc; + /* + * If CLIENT_FAILED OR RESPONSE_Q_DOWN, then just return success + * since LIO can't do anything about it, and we dont want to + * attempt an srp_transfer_data. + */ + if ((vscsi->flags & (CLIENT_FAILED | RESPONSE_Q_DOWN))) { + pr_err("write_pending failed since: %d\n", vscsi->flags); + return 0; + } + rc = srp_transfer_data(cmd, &vio_iu(iue)->srp.cmd, ibmvscsis_rdma, 1, 1); if (rc) { @@ -3660,11 +3707,28 @@ static void ibmvscsis_queue_tm_rsp(struct se_cmd *se_cmd) struct ibmvscsis_cmd *cmd = container_of(se_cmd, struct ibmvscsis_cmd, se_cmd); struct scsi_info *vscsi = cmd->adapter; + struct ibmvscsis_cmd *cmd_itr; + struct iu_entry *iue = iue = cmd->iue; + struct srp_tsk_mgmt *srp_tsk = &vio_iu(iue)->srp.tsk_mgmt; + u64 tag_to_abort = be64_to_cpu(srp_tsk->task_tag); uint len; pr_debug("queue_tm_rsp %p, status %d\n", se_cmd, (int)se_cmd->se_tmr_req->response); + if (srp_tsk->tsk_mgmt_func == SRP_TSK_ABORT_TASK && + cmd->se_cmd.se_tmr_req->response == TMR_TASK_DOES_NOT_EXIST) { + spin_lock_bh(&vscsi->intr_lock); + list_for_each_entry(cmd_itr, &vscsi->active_q, list) { + if (tag_to_abort == cmd_itr->se_cmd.tag) { + cmd_itr->abort_cmd = cmd; + cmd->flags |= DELAY_SEND; + break; + } + } + spin_unlock_bh(&vscsi->intr_lock); + } + srp_build_response(vscsi, cmd, &len); cmd->rsp.format = SRP_FORMAT; cmd->rsp.len = len; @@ -3672,8 +3736,8 @@ static void ibmvscsis_queue_tm_rsp(struct se_cmd *se_cmd) static void ibmvscsis_aborted_task(struct se_cmd *se_cmd) { - /* TBD: What (if anything) should we do here? */ - pr_debug("ibmvscsis_aborted_task %p\n", se_cmd); + pr_debug("ibmvscsis_aborted_task %p task_tag: %llu\n", + se_cmd, se_cmd->tag); } static struct se_wwn *ibmvscsis_make_tport(struct target_fabric_configfs *tf, diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h index 65c6189885ab..b4391a8de456 100644 --- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h +++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h @@ -168,10 +168,12 @@ struct ibmvscsis_cmd { struct iu_rsp rsp; struct work_struct work; struct scsi_info *adapter; + struct ibmvscsis_cmd *abort_cmd; /* Sense buffer that will be mapped into outgoing status */ unsigned char sense_buf[TRANSPORT_SENSE_BUFFER]; u64 init_time; #define CMD_FAST_FAIL BIT(0) +#define DELAY_SEND BIT(1) u32 flags; char type; }; diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index e3f9ed3690b7..26a9bcd5ee6a 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -128,11 +128,9 @@ struct iscsi_tiqn *iscsit_add_tiqn(unsigned char *buf) return ERR_PTR(-EINVAL); } - tiqn = kzalloc(sizeof(struct iscsi_tiqn), GFP_KERNEL); - if (!tiqn) { - pr_err("Unable to allocate struct iscsi_tiqn\n"); + tiqn = kzalloc(sizeof(*tiqn), GFP_KERNEL); + if (!tiqn) return ERR_PTR(-ENOMEM); - } sprintf(tiqn->tiqn, "%s", buf); INIT_LIST_HEAD(&tiqn->tiqn_list); @@ -362,9 +360,8 @@ struct iscsi_np *iscsit_add_np( return np; } - np = kzalloc(sizeof(struct iscsi_np), GFP_KERNEL); + np = kzalloc(sizeof(*np), GFP_KERNEL); if (!np) { - pr_err("Unable to allocate memory for struct iscsi_np\n"); mutex_unlock(&np_lock); return ERR_PTR(-ENOMEM); } @@ -696,12 +693,10 @@ static int __init iscsi_target_init_module(void) int ret = 0, size; pr_debug("iSCSI-Target "ISCSIT_VERSION"\n"); - - iscsit_global = kzalloc(sizeof(struct iscsit_global), GFP_KERNEL); - if (!iscsit_global) { - pr_err("Unable to allocate memory for iscsit_global\n"); + iscsit_global = kzalloc(sizeof(*iscsit_global), GFP_KERNEL); + if (!iscsit_global) return -1; - } + spin_lock_init(&iscsit_global->ts_bitmap_lock); mutex_init(&auth_id_lock); spin_lock_init(&sess_idr_lock); @@ -714,10 +709,8 @@ static int __init iscsi_target_init_module(void) size = BITS_TO_LONGS(ISCSIT_BITMAP_BITS) * sizeof(long); iscsit_global->ts_bitmap = vzalloc(size); - if (!iscsit_global->ts_bitmap) { - pr_err("Unable to allocate iscsit_global->ts_bitmap\n"); + if (!iscsit_global->ts_bitmap) goto configfs_out; - } lio_qr_cache = kmem_cache_create("lio_qr_cache", sizeof(struct iscsi_queue_req), @@ -984,12 +977,9 @@ static int iscsit_allocate_iovecs(struct iscsi_cmd *cmd) u32 iov_count = max(1UL, DIV_ROUND_UP(cmd->se_cmd.data_length, PAGE_SIZE)); iov_count += ISCSI_IOV_DATA_BUFFER; - - cmd->iov_data = kzalloc(iov_count * sizeof(struct kvec), GFP_KERNEL); - if (!cmd->iov_data) { - pr_err("Unable to allocate cmd->iov_data\n"); + cmd->iov_data = kcalloc(iov_count, sizeof(*cmd->iov_data), GFP_KERNEL); + if (!cmd->iov_data) return -ENOMEM; - } cmd->orig_iov_data_count = iov_count; return 0; @@ -1850,8 +1840,6 @@ static int iscsit_handle_nop_out(struct iscsi_conn *conn, struct iscsi_cmd *cmd, ping_data = kzalloc(payload_length + 1, GFP_KERNEL); if (!ping_data) { - pr_err("Unable to allocate memory for" - " NOPOUT ping data.\n"); ret = -1; goto out; } @@ -1997,15 +1985,11 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, hdr->refcmdsn = cpu_to_be32(ISCSI_RESERVED_TAG); cmd->data_direction = DMA_NONE; - - cmd->tmr_req = kzalloc(sizeof(struct iscsi_tmr_req), GFP_KERNEL); - if (!cmd->tmr_req) { - pr_err("Unable to allocate memory for" - " Task Management command!\n"); + cmd->tmr_req = kzalloc(sizeof(*cmd->tmr_req), GFP_KERNEL); + if (!cmd->tmr_req) return iscsit_add_reject_cmd(cmd, ISCSI_REASON_BOOKMARK_NO_RESOURCES, buf); - } /* * TASK_REASSIGN for ERL=2 / connection stays inside of @@ -2265,11 +2249,9 @@ iscsit_handle_text_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, struct kvec iov[3]; text_in = kzalloc(payload_length, GFP_KERNEL); - if (!text_in) { - pr_err("Unable to allocate memory for" - " incoming text parameters\n"); + if (!text_in) goto reject; - } + cmd->text_in_ptr = text_in; memset(iov, 0, 3 * sizeof(struct kvec)); @@ -3353,11 +3335,9 @@ iscsit_build_sendtargets_response(struct iscsi_cmd *cmd, SENDTARGETS_BUF_LIMIT); payload = kzalloc(buffer_len, GFP_KERNEL); - if (!payload) { - pr_err("Unable to allocate memory for sendtargets" - " response.\n"); + if (!payload) return -ENOMEM; - } + /* * Locate pointer to iqn./eui. string for ICF_SENDTARGETS_SINGLE * explicit case.. @@ -4683,6 +4663,7 @@ int iscsit_release_sessions_for_tpg(struct iscsi_portal_group *tpg, int force) continue; } atomic_set(&sess->session_reinstatement, 1); + atomic_set(&sess->session_fall_back_to_erl0, 1); spin_unlock(&sess->conn_lock); list_move_tail(&se_sess->sess_list, &free_list); diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index 5798810197ec..535a8e06a401 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -1506,6 +1506,7 @@ static void lio_tpg_close_session(struct se_session *se_sess) return; } atomic_set(&sess->session_reinstatement, 1); + atomic_set(&sess->session_fall_back_to_erl0, 1); spin_unlock(&sess->conn_lock); iscsit_stop_time2retain_timer(sess); diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index ad8f3011bdc2..66238477137b 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -208,6 +208,7 @@ int iscsi_check_for_session_reinstatement(struct iscsi_conn *conn) initiatorname_param->value) && (sess_p->sess_ops->SessionType == sessiontype))) { atomic_set(&sess_p->session_reinstatement, 1); + atomic_set(&sess_p->session_fall_back_to_erl0, 1); spin_unlock(&sess_p->conn_lock); iscsit_inc_session_usage_count(sess_p); iscsit_stop_time2retain_timer(sess_p); diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index 70657fd56440..0326607e5ab8 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -533,6 +533,7 @@ DEF_CONFIGFS_ATTRIB_SHOW(emulate_3pc); DEF_CONFIGFS_ATTRIB_SHOW(pi_prot_type); DEF_CONFIGFS_ATTRIB_SHOW(hw_pi_prot_type); DEF_CONFIGFS_ATTRIB_SHOW(pi_prot_format); +DEF_CONFIGFS_ATTRIB_SHOW(pi_prot_verify); DEF_CONFIGFS_ATTRIB_SHOW(enforce_pr_isids); DEF_CONFIGFS_ATTRIB_SHOW(is_nonrot); DEF_CONFIGFS_ATTRIB_SHOW(emulate_rest_reord); @@ -823,6 +824,7 @@ static ssize_t pi_prot_type_store(struct config_item *item, ret = dev->transport->init_prot(dev); if (ret) { da->pi_prot_type = old_prot; + da->pi_prot_verify = (bool) da->pi_prot_type; return ret; } @@ -830,6 +832,7 @@ static ssize_t pi_prot_type_store(struct config_item *item, dev->transport->free_prot(dev); } + da->pi_prot_verify = (bool) da->pi_prot_type; pr_debug("dev[%p]: SE Device Protection Type: %d\n", dev, flag); return count; } @@ -872,6 +875,35 @@ static ssize_t pi_prot_format_store(struct config_item *item, return count; } +static ssize_t pi_prot_verify_store(struct config_item *item, + const char *page, size_t count) +{ + struct se_dev_attrib *da = to_attrib(item); + bool flag; + int ret; + + ret = strtobool(page, &flag); + if (ret < 0) + return ret; + + if (!flag) { + da->pi_prot_verify = flag; + return count; + } + if (da->hw_pi_prot_type) { + pr_warn("DIF protection enabled on underlying hardware," + " ignoring\n"); + return count; + } + if (!da->pi_prot_type) { + pr_warn("DIF protection not supported by backend, ignoring\n"); + return count; + } + da->pi_prot_verify = flag; + + return count; +} + static ssize_t force_pr_aptpl_store(struct config_item *item, const char *page, size_t count) { @@ -1067,6 +1099,7 @@ CONFIGFS_ATTR(, emulate_3pc); CONFIGFS_ATTR(, pi_prot_type); CONFIGFS_ATTR_RO(, hw_pi_prot_type); CONFIGFS_ATTR(, pi_prot_format); +CONFIGFS_ATTR(, pi_prot_verify); CONFIGFS_ATTR(, enforce_pr_isids); CONFIGFS_ATTR(, is_nonrot); CONFIGFS_ATTR(, emulate_rest_reord); @@ -1104,6 +1137,7 @@ struct configfs_attribute *sbc_attrib_attrs[] = { &attr_pi_prot_type, &attr_hw_pi_prot_type, &attr_pi_prot_format, + &attr_pi_prot_verify, &attr_enforce_pr_isids, &attr_is_nonrot, &attr_emulate_rest_reord, @@ -1366,7 +1400,7 @@ static ssize_t target_pr_res_holder_show(struct config_item *item, char *page) struct se_device *dev = pr_to_dev(item); int ret; - if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR) return sprintf(page, "Passthrough\n"); spin_lock(&dev->dev_reservation_lock); @@ -1506,7 +1540,7 @@ static ssize_t target_pr_res_type_show(struct config_item *item, char *page) { struct se_device *dev = pr_to_dev(item); - if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR) return sprintf(page, "SPC_PASSTHROUGH\n"); else if (dev->dev_reservation_flags & DRF_SPC2_RESERVATIONS) return sprintf(page, "SPC2_RESERVATIONS\n"); @@ -1519,7 +1553,7 @@ static ssize_t target_pr_res_aptpl_active_show(struct config_item *item, { struct se_device *dev = pr_to_dev(item); - if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR) return 0; return sprintf(page, "APTPL Bit Status: %s\n", @@ -1531,7 +1565,7 @@ static ssize_t target_pr_res_aptpl_metadata_show(struct config_item *item, { struct se_device *dev = pr_to_dev(item); - if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR) return 0; return sprintf(page, "Ready to process PR APTPL metadata..\n"); @@ -1577,7 +1611,7 @@ static ssize_t target_pr_res_aptpl_metadata_store(struct config_item *item, u16 tpgt = 0; u8 type = 0; - if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR) return count; if (dev->dev_reservation_flags & DRF_SPC2_RESERVATIONS) return count; @@ -2511,7 +2545,7 @@ static ssize_t target_tg_pt_gp_alua_support_##_name##_store( \ int ret; \ \ if (!t->tg_pt_gp_valid_id) { \ - pr_err("Unable to do set ##_name ALUA state on non" \ + pr_err("Unable to do set " #_name " ALUA state on non" \ " valid tg_pt_gp ID: %hu\n", \ t->tg_pt_gp_valid_id); \ return -EINVAL; \ @@ -2643,13 +2677,13 @@ static ssize_t target_tg_pt_gp_tg_pt_gp_id_store(struct config_item *item, ret = kstrtoul(page, 0, &tg_pt_gp_id); if (ret < 0) { - pr_err("kstrtoul() returned %d for" - " tg_pt_gp_id\n", ret); + pr_err("ALUA tg_pt_gp_id: invalid value '%s' for tg_pt_gp_id\n", + page); return ret; } if (tg_pt_gp_id > 0x0000ffff) { - pr_err("ALUA tg_pt_gp_id: %lu exceeds maximum:" - " 0x0000ffff\n", tg_pt_gp_id); + pr_err("ALUA tg_pt_gp_id: %lu exceeds maximum: 0x0000ffff\n", + tg_pt_gp_id); return -EINVAL; } diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index d2f089cfa9ae..8add07f387f9 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -1045,6 +1045,8 @@ passthrough_parse_cdb(struct se_cmd *cmd, sense_reason_t (*exec_cmd)(struct se_cmd *cmd)) { unsigned char *cdb = cmd->t_task_cdb; + struct se_device *dev = cmd->se_dev; + unsigned int size; /* * Clear a lun set in the cdb if the initiator talking to use spoke @@ -1076,6 +1078,42 @@ passthrough_parse_cdb(struct se_cmd *cmd, return TCM_NO_SENSE; } + /* + * For PERSISTENT RESERVE IN/OUT, RELEASE, and RESERVE we need to + * emulate the response, since tcmu does not have the information + * required to process these commands. + */ + if (!(dev->transport->transport_flags & + TRANSPORT_FLAG_PASSTHROUGH_PGR)) { + if (cdb[0] == PERSISTENT_RESERVE_IN) { + cmd->execute_cmd = target_scsi3_emulate_pr_in; + size = (cdb[7] << 8) + cdb[8]; + return target_cmd_size_check(cmd, size); + } + if (cdb[0] == PERSISTENT_RESERVE_OUT) { + cmd->execute_cmd = target_scsi3_emulate_pr_out; + size = (cdb[7] << 8) + cdb[8]; + return target_cmd_size_check(cmd, size); + } + + if (cdb[0] == RELEASE || cdb[0] == RELEASE_10) { + cmd->execute_cmd = target_scsi2_reservation_release; + if (cdb[0] == RELEASE_10) + size = (cdb[7] << 8) | cdb[8]; + else + size = cmd->data_length; + return target_cmd_size_check(cmd, size); + } + if (cdb[0] == RESERVE || cdb[0] == RESERVE_10) { + cmd->execute_cmd = target_scsi2_reservation_reserve; + if (cdb[0] == RESERVE_10) + size = (cdb[7] << 8) | cdb[8]; + else + size = cmd->data_length; + return target_cmd_size_check(cmd, size); + } + } + /* Set DATA_CDB flag for ops that should have it */ switch (cdb[0]) { case READ_6: diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index 87aa376a1a1a..73b8f93a5fef 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -277,12 +277,11 @@ static int fd_do_rw(struct se_cmd *cmd, struct file *fd, else ret = vfs_iter_read(fd, &iter, &pos); - kfree(bvec); - if (is_write) { if (ret < 0 || ret != data_length) { pr_err("%s() write returned %d\n", __func__, ret); - return (ret < 0 ? ret : -EINVAL); + if (ret >= 0) + ret = -EINVAL; } } else { /* @@ -295,17 +294,29 @@ static int fd_do_rw(struct se_cmd *cmd, struct file *fd, pr_err("%s() returned %d, expecting %u for " "S_ISBLK\n", __func__, ret, data_length); - return (ret < 0 ? ret : -EINVAL); + if (ret >= 0) + ret = -EINVAL; } } else { if (ret < 0) { pr_err("%s() returned %d for non S_ISBLK\n", __func__, ret); - return ret; + } else if (ret != data_length) { + /* + * Short read case: + * Probably some one truncate file under us. + * We must explicitly zero sg-pages to prevent + * expose uninizialized pages to userspace. + */ + if (ret < data_length) + ret += iov_iter_zero(data_length - ret, &iter); + else + ret = -EINVAL; } } } - return 1; + kfree(bvec); + return ret; } static sense_reason_t @@ -543,7 +554,8 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, ret = fd_do_rw(cmd, file, dev->dev_attrib.block_size, sgl, sgl_nents, cmd->data_length, 0); - if (ret > 0 && cmd->prot_type && dev->dev_attrib.pi_prot_type) { + if (ret > 0 && cmd->prot_type && dev->dev_attrib.pi_prot_type && + dev->dev_attrib.pi_prot_verify) { u32 sectors = cmd->data_length >> ilog2(dev->dev_attrib.block_size); @@ -553,7 +565,8 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, return rc; } } else { - if (cmd->prot_type && dev->dev_attrib.pi_prot_type) { + if (cmd->prot_type && dev->dev_attrib.pi_prot_type && + dev->dev_attrib.pi_prot_verify) { u32 sectors = cmd->data_length >> ilog2(dev->dev_attrib.block_size); @@ -595,8 +608,7 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, if (ret < 0) return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; - if (ret) - target_complete_cmd(cmd, SAM_STAT_GOOD); + target_complete_cmd(cmd, SAM_STAT_GOOD); return 0; } diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index d316ed537d59..bb069ebe4aa6 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -279,7 +279,7 @@ static void iblock_complete_cmd(struct se_cmd *cmd) struct iblock_req *ibr = cmd->priv; u8 status; - if (!atomic_dec_and_test(&ibr->pending)) + if (!refcount_dec_and_test(&ibr->pending)) return; if (atomic_read(&ibr->ib_bio_err_cnt)) @@ -487,7 +487,7 @@ iblock_execute_write_same(struct se_cmd *cmd) bio_list_init(&list); bio_list_add(&list, bio); - atomic_set(&ibr->pending, 1); + refcount_set(&ibr->pending, 1); while (sectors) { while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset) @@ -498,7 +498,7 @@ iblock_execute_write_same(struct se_cmd *cmd) if (!bio) goto fail_put_bios; - atomic_inc(&ibr->pending); + refcount_inc(&ibr->pending); bio_list_add(&list, bio); } @@ -706,7 +706,7 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, cmd->priv = ibr; if (!sgl_nents) { - atomic_set(&ibr->pending, 1); + refcount_set(&ibr->pending, 1); iblock_complete_cmd(cmd); return 0; } @@ -719,7 +719,7 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, bio_list_init(&list); bio_list_add(&list, bio); - atomic_set(&ibr->pending, 2); + refcount_set(&ibr->pending, 2); bio_cnt = 1; for_each_sg(sgl, sg, sgl_nents, i) { @@ -740,7 +740,7 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, if (!bio) goto fail_put_bios; - atomic_inc(&ibr->pending); + refcount_inc(&ibr->pending); bio_list_add(&list, bio); bio_cnt++; } diff --git a/drivers/target/target_core_iblock.h b/drivers/target/target_core_iblock.h index 718d3fcd3e7c..f2a5797217d4 100644 --- a/drivers/target/target_core_iblock.h +++ b/drivers/target/target_core_iblock.h @@ -2,6 +2,7 @@ #define TARGET_CORE_IBLOCK_H #include <linux/atomic.h> +#include <linux/refcount.h> #include <target/target_core_base.h> #define IBLOCK_VERSION "4.0" @@ -10,7 +11,7 @@ #define IBLOCK_LBA_SHIFT 9 struct iblock_req { - atomic_t pending; + refcount_t pending; atomic_t ib_bio_err_cnt; } ____cacheline_aligned; diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index e18051185846..129ca572673c 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -4147,7 +4147,7 @@ target_check_reservation(struct se_cmd *cmd) return 0; if (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE) return 0; - if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR) return 0; spin_lock(&dev->dev_reservation_lock); diff --git a/drivers/target/target_core_pr.h b/drivers/target/target_core_pr.h index 847bd470339c..772f9148e75c 100644 --- a/drivers/target/target_core_pr.h +++ b/drivers/target/target_core_pr.h @@ -7,7 +7,7 @@ /* * PERSISTENT_RESERVE_OUT service action codes * - * spc4r17 section 6.14.2 Table 171 + * spc5r04b section 6.15.2 Table 174 */ #define PRO_REGISTER 0x00 #define PRO_RESERVE 0x01 @@ -17,10 +17,11 @@ #define PRO_PREEMPT_AND_ABORT 0x05 #define PRO_REGISTER_AND_IGNORE_EXISTING_KEY 0x06 #define PRO_REGISTER_AND_MOVE 0x07 +#define PRO_REPLACE_LOST_RESERVATION 0x08 /* * PERSISTENT_RESERVE_IN service action codes * - * spc4r17 section 6.13.1 Table 159 + * spc5r04b section 6.14.1 Table 162 */ #define PRI_READ_KEYS 0x00 #define PRI_READ_RESERVATION 0x01 @@ -29,13 +30,13 @@ /* * PERSISTENT_RESERVE_ SCOPE field * - * spc4r17 section 6.13.3.3 Table 163 + * spc5r04b section 6.14.3.2 Table 166 */ #define PR_SCOPE_LU_SCOPE 0x00 /* * PERSISTENT_RESERVE_* TYPE field * - * spc4r17 section 6.13.3.4 Table 164 + * spc5r04b section 6.14.3.3 Table 167 */ #define PR_TYPE_WRITE_EXCLUSIVE 0x01 #define PR_TYPE_EXCLUSIVE_ACCESS 0x03 diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index a93d94e68ab5..3e4abb13f8ea 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -1081,7 +1081,8 @@ static const struct target_backend_ops pscsi_ops = { .name = "pscsi", .owner = THIS_MODULE, .transport_flags = TRANSPORT_FLAG_PASSTHROUGH | - TRANSPORT_FLAG_PASSTHROUGH_ALUA, + TRANSPORT_FLAG_PASSTHROUGH_ALUA | + TRANSPORT_FLAG_PASSTHROUGH_PGR, .attach_hba = pscsi_attach_hba, .detach_hba = pscsi_detach_hba, .pmode_enable_hba = pscsi_pmode_enable_hba, diff --git a/drivers/target/target_core_rd.c b/drivers/target/target_core_rd.c index ddc216c9f1f6..20253d04103f 100644 --- a/drivers/target/target_core_rd.c +++ b/drivers/target/target_core_rd.c @@ -47,11 +47,9 @@ static int rd_attach_hba(struct se_hba *hba, u32 host_id) { struct rd_host *rd_host; - rd_host = kzalloc(sizeof(struct rd_host), GFP_KERNEL); - if (!rd_host) { - pr_err("Unable to allocate memory for struct rd_host\n"); + rd_host = kzalloc(sizeof(*rd_host), GFP_KERNEL); + if (!rd_host) return -ENOMEM; - } rd_host->rd_host_id = host_id; @@ -148,11 +146,8 @@ static int rd_allocate_sgl_table(struct rd_dev *rd_dev, struct rd_dev_sg_table * sg = kcalloc(sg_per_table + chain_entry, sizeof(*sg), GFP_KERNEL); - if (!sg) { - pr_err("Unable to allocate scatterlist array" - " for struct rd_dev\n"); + if (!sg) return -ENOMEM; - } sg_init_table(sg, sg_per_table + chain_entry); @@ -210,13 +205,9 @@ static int rd_build_device_space(struct rd_dev *rd_dev) total_sg_needed = rd_dev->rd_page_count; sg_tables = (total_sg_needed / max_sg_per_table) + 1; - - sg_table = kzalloc(sg_tables * sizeof(struct rd_dev_sg_table), GFP_KERNEL); - if (!sg_table) { - pr_err("Unable to allocate memory for Ramdisk" - " scatterlist tables\n"); + sg_table = kcalloc(sg_tables, sizeof(*sg_table), GFP_KERNEL); + if (!sg_table) return -ENOMEM; - } rd_dev->sg_table_array = sg_table; rd_dev->sg_table_count = sg_tables; @@ -271,13 +262,9 @@ static int rd_build_prot_space(struct rd_dev *rd_dev, int prot_length, int block total_sg_needed = (rd_dev->rd_page_count * prot_length / block_size) + 1; sg_tables = (total_sg_needed / max_sg_per_table) + 1; - - sg_table = kzalloc(sg_tables * sizeof(struct rd_dev_sg_table), GFP_KERNEL); - if (!sg_table) { - pr_err("Unable to allocate memory for Ramdisk protection" - " scatterlist tables\n"); + sg_table = kcalloc(sg_tables, sizeof(*sg_table), GFP_KERNEL); + if (!sg_table) return -ENOMEM; - } rd_dev->sg_prot_array = sg_table; rd_dev->sg_prot_count = sg_tables; @@ -298,11 +285,9 @@ static struct se_device *rd_alloc_device(struct se_hba *hba, const char *name) struct rd_dev *rd_dev; struct rd_host *rd_host = hba->hba_ptr; - rd_dev = kzalloc(sizeof(struct rd_dev), GFP_KERNEL); - if (!rd_dev) { - pr_err("Unable to allocate memory for struct rd_dev\n"); + rd_dev = kzalloc(sizeof(*rd_dev), GFP_KERNEL); + if (!rd_dev) return NULL; - } rd_dev->rd_host = rd_host; @@ -410,7 +395,7 @@ static sense_reason_t rd_do_prot_rw(struct se_cmd *cmd, bool is_read) u32 prot_offset, prot_page; u32 prot_npages __maybe_unused; u64 tmp; - sense_reason_t rc = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; + sense_reason_t rc = 0; tmp = cmd->t_task_lba * se_dev->prot_length; prot_offset = do_div(tmp, PAGE_SIZE); @@ -423,13 +408,14 @@ static sense_reason_t rd_do_prot_rw(struct se_cmd *cmd, bool is_read) prot_sg = &prot_table->sg_table[prot_page - prot_table->page_start_offset]; - if (is_read) - rc = sbc_dif_verify(cmd, cmd->t_task_lba, sectors, 0, - prot_sg, prot_offset); - else - rc = sbc_dif_verify(cmd, cmd->t_task_lba, sectors, 0, - cmd->t_prot_sg, 0); - + if (se_dev->dev_attrib.pi_prot_verify) { + if (is_read) + rc = sbc_dif_verify(cmd, cmd->t_task_lba, sectors, 0, + prot_sg, prot_offset); + else + rc = sbc_dif_verify(cmd, cmd->t_task_lba, sectors, 0, + cmd->t_prot_sg, 0); + } if (!rc) sbc_dif_copy_prot(cmd, sectors, is_read, prot_sg, prot_offset); diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index c194063f169b..4316f7b65fb7 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -507,8 +507,11 @@ static sense_reason_t compare_and_write_callback(struct se_cmd *cmd, bool succes * been failed with a non-zero SCSI status. */ if (cmd->scsi_status) { - pr_err("compare_and_write_callback: non zero scsi_status:" + pr_debug("compare_and_write_callback: non zero scsi_status:" " 0x%02x\n", cmd->scsi_status); + *post_ret = 1; + if (cmd->scsi_status == SAM_STAT_CHECK_CONDITION) + ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; goto out; } @@ -519,8 +522,8 @@ static sense_reason_t compare_and_write_callback(struct se_cmd *cmd, bool succes goto out; } - write_sg = kmalloc(sizeof(struct scatterlist) * cmd->t_data_nents, - GFP_KERNEL); + write_sg = kmalloc_array(cmd->t_data_nents, sizeof(*write_sg), + GFP_KERNEL); if (!write_sg) { pr_err("Unable to allocate compare_and_write sg\n"); ret = TCM_OUT_OF_RESOURCES; @@ -924,6 +927,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) cmd->execute_cmd = sbc_execute_rw; break; case WRITE_16: + case WRITE_VERIFY_16: sectors = transport_get_sectors_16(cdb); cmd->t_task_lba = transport_lba_64(cdb); diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index dfaef4d3b2d2..310d9e55c6eb 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -398,6 +398,13 @@ int core_tpg_set_initiator_node_queue_depth( struct se_portal_group *tpg = acl->se_tpg; /* + * Allow the setting of se_node_acl queue_depth to be idempotent, + * and not force a session shutdown event if the value is not + * changing. + */ + if (acl->queue_depth == queue_depth) + return 0; + /* * User has requested to change the queue depth for a Initiator Node. * Change the value in the Node's struct se_node_acl, and call * target_set_nacl_queue_depth() to set the new queue depth. diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index a0cd56ee5fe9..37f57357d4a0 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -2311,7 +2311,7 @@ void *transport_kmap_data_sg(struct se_cmd *cmd) return kmap(sg_page(sg)) + sg->offset; /* >1 page. use vmap */ - pages = kmalloc(sizeof(*pages) * cmd->t_data_nents, GFP_KERNEL); + pages = kmalloc_array(cmd->t_data_nents, sizeof(*pages), GFP_KERNEL); if (!pages) return NULL; diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index f615c3bbb73e..9045837f748b 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -2,6 +2,7 @@ * Copyright (C) 2013 Shaohua Li <shli@kernel.org> * Copyright (C) 2014 Red Hat, Inc. * Copyright (C) 2015 Arrikto, Inc. + * Copyright (C) 2017 Chinamobile, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -25,10 +26,13 @@ #include <linux/parser.h> #include <linux/vmalloc.h> #include <linux/uio_driver.h> +#include <linux/radix-tree.h> #include <linux/stringify.h> #include <linux/bitops.h> #include <linux/highmem.h> #include <linux/configfs.h> +#include <linux/mutex.h> +#include <linux/kthread.h> #include <net/genetlink.h> #include <scsi/scsi_common.h> #include <scsi/scsi_proto.h> @@ -63,17 +67,26 @@ * this may have a 'UAM' comment. */ - #define TCMU_TIME_OUT (30 * MSEC_PER_SEC) -#define DATA_BLOCK_BITS 256 -#define DATA_BLOCK_SIZE 4096 +/* For cmd area, the size is fixed 8MB */ +#define CMDR_SIZE (8 * 1024 * 1024) -#define CMDR_SIZE (16 * 4096) +/* + * For data area, the block size is PAGE_SIZE and + * the total size is 256K * PAGE_SIZE. + */ +#define DATA_BLOCK_SIZE PAGE_SIZE +#define DATA_BLOCK_BITS (256 * 1024) #define DATA_SIZE (DATA_BLOCK_BITS * DATA_BLOCK_SIZE) +#define DATA_BLOCK_INIT_BITS 128 +/* The total size of the ring is 8M + 256K * PAGE_SIZE */ #define TCMU_RING_SIZE (CMDR_SIZE + DATA_SIZE) +/* Default maximum of the global data blocks(512K * PAGE_SIZE) */ +#define TCMU_GLOBAL_MAX_BLOCKS (512 * 1024) + static struct device *tcmu_root_device; struct tcmu_hba { @@ -83,6 +96,8 @@ struct tcmu_hba { #define TCMU_CONFIG_LEN 256 struct tcmu_dev { + struct list_head node; + struct se_device se_dev; char *name; @@ -94,6 +109,8 @@ struct tcmu_dev { struct uio_info uio_info; + struct inode *inode; + struct tcmu_mailbox *mb_addr; size_t dev_size; u32 cmdr_size; @@ -103,11 +120,14 @@ struct tcmu_dev { size_t data_off; size_t data_size; - DECLARE_BITMAP(data_bitmap, DATA_BLOCK_BITS); - wait_queue_head_t wait_cmdr; - /* TODO should this be a mutex? */ - spinlock_t cmdr_lock; + struct mutex cmdr_lock; + + bool waiting_global; + uint32_t dbi_max; + uint32_t dbi_thresh; + DECLARE_BITMAP(data_bitmap, DATA_BLOCK_BITS); + struct radix_tree_root data_blocks; struct idr commands; spinlock_t commands_lock; @@ -130,7 +150,9 @@ struct tcmu_cmd { /* Can't use se_cmd when cleaning up expired cmds, because if cmd has been completed then accessing se_cmd is off limits */ - DECLARE_BITMAP(data_bitmap, DATA_BLOCK_BITS); + uint32_t dbi_cnt; + uint32_t dbi_cur; + uint32_t *dbi; unsigned long deadline; @@ -138,6 +160,13 @@ struct tcmu_cmd { unsigned long flags; }; +static struct task_struct *unmap_thread; +static wait_queue_head_t unmap_wait; +static DEFINE_MUTEX(root_udev_mutex); +static LIST_HEAD(root_udev); + +static atomic_t global_db_count = ATOMIC_INIT(0); + static struct kmem_cache *tcmu_cmd_cache; /* multicast group */ @@ -161,6 +190,114 @@ static struct genl_family tcmu_genl_family __ro_after_init = { .netnsok = true, }; +#define tcmu_cmd_set_dbi_cur(cmd, index) ((cmd)->dbi_cur = (index)) +#define tcmu_cmd_reset_dbi_cur(cmd) tcmu_cmd_set_dbi_cur(cmd, 0) +#define tcmu_cmd_set_dbi(cmd, index) ((cmd)->dbi[(cmd)->dbi_cur++] = (index)) +#define tcmu_cmd_get_dbi(cmd) ((cmd)->dbi[(cmd)->dbi_cur++]) + +static void tcmu_cmd_free_data(struct tcmu_cmd *tcmu_cmd, uint32_t len) +{ + struct tcmu_dev *udev = tcmu_cmd->tcmu_dev; + uint32_t i; + + for (i = 0; i < len; i++) + clear_bit(tcmu_cmd->dbi[i], udev->data_bitmap); +} + +static inline bool tcmu_get_empty_block(struct tcmu_dev *udev, + struct tcmu_cmd *tcmu_cmd) +{ + struct page *page; + int ret, dbi; + + dbi = find_first_zero_bit(udev->data_bitmap, udev->dbi_thresh); + if (dbi == udev->dbi_thresh) + return false; + + page = radix_tree_lookup(&udev->data_blocks, dbi); + if (!page) { + + if (atomic_add_return(1, &global_db_count) > + TCMU_GLOBAL_MAX_BLOCKS) { + atomic_dec(&global_db_count); + return false; + } + + /* try to get new page from the mm */ + page = alloc_page(GFP_KERNEL); + if (!page) + return false; + + ret = radix_tree_insert(&udev->data_blocks, dbi, page); + if (ret) { + __free_page(page); + return false; + } + + } + + if (dbi > udev->dbi_max) + udev->dbi_max = dbi; + + set_bit(dbi, udev->data_bitmap); + tcmu_cmd_set_dbi(tcmu_cmd, dbi); + + return true; +} + +static bool tcmu_get_empty_blocks(struct tcmu_dev *udev, + struct tcmu_cmd *tcmu_cmd) +{ + int i; + + udev->waiting_global = false; + + for (i = tcmu_cmd->dbi_cur; i < tcmu_cmd->dbi_cnt; i++) { + if (!tcmu_get_empty_block(udev, tcmu_cmd)) + goto err; + } + return true; + +err: + udev->waiting_global = true; + /* Try to wake up the unmap thread */ + wake_up(&unmap_wait); + return false; +} + +static inline struct page * +tcmu_get_block_page(struct tcmu_dev *udev, uint32_t dbi) +{ + return radix_tree_lookup(&udev->data_blocks, dbi); +} + +static inline void tcmu_free_cmd(struct tcmu_cmd *tcmu_cmd) +{ + kfree(tcmu_cmd->dbi); + kmem_cache_free(tcmu_cmd_cache, tcmu_cmd); +} + +static inline size_t tcmu_cmd_get_data_length(struct tcmu_cmd *tcmu_cmd) +{ + struct se_cmd *se_cmd = tcmu_cmd->se_cmd; + size_t data_length = round_up(se_cmd->data_length, DATA_BLOCK_SIZE); + + if (se_cmd->se_cmd_flags & SCF_BIDI) { + BUG_ON(!(se_cmd->t_bidi_data_sg && se_cmd->t_bidi_data_nents)); + data_length += round_up(se_cmd->t_bidi_data_sg->length, + DATA_BLOCK_SIZE); + } + + return data_length; +} + +static inline uint32_t tcmu_cmd_get_block_cnt(struct tcmu_cmd *tcmu_cmd) +{ + size_t data_length = tcmu_cmd_get_data_length(tcmu_cmd); + + return data_length / DATA_BLOCK_SIZE; +} + static struct tcmu_cmd *tcmu_alloc_cmd(struct se_cmd *se_cmd) { struct se_device *se_dev = se_cmd->se_dev; @@ -178,6 +315,15 @@ static struct tcmu_cmd *tcmu_alloc_cmd(struct se_cmd *se_cmd) tcmu_cmd->deadline = jiffies + msecs_to_jiffies(udev->cmd_time_out); + tcmu_cmd_reset_dbi_cur(tcmu_cmd); + tcmu_cmd->dbi_cnt = tcmu_cmd_get_block_cnt(tcmu_cmd); + tcmu_cmd->dbi = kcalloc(tcmu_cmd->dbi_cnt, sizeof(uint32_t), + GFP_KERNEL); + if (!tcmu_cmd->dbi) { + kmem_cache_free(tcmu_cmd_cache, tcmu_cmd); + return NULL; + } + idr_preload(GFP_KERNEL); spin_lock_irq(&udev->commands_lock); cmd_id = idr_alloc(&udev->commands, tcmu_cmd, 0, @@ -186,7 +332,7 @@ static struct tcmu_cmd *tcmu_alloc_cmd(struct se_cmd *se_cmd) idr_preload_end(); if (cmd_id < 0) { - kmem_cache_free(tcmu_cmd_cache, tcmu_cmd); + tcmu_free_cmd(tcmu_cmd); return NULL; } tcmu_cmd->cmd_id = cmd_id; @@ -248,10 +394,10 @@ static inline void new_iov(struct iovec **iov, int *iov_cnt, #define UPDATE_HEAD(head, used, size) smp_store_release(&head, ((head % size) + used) % size) /* offset is relative to mb_addr */ -static inline size_t get_block_offset(struct tcmu_dev *dev, - int block, int remaining) +static inline size_t get_block_offset_user(struct tcmu_dev *dev, + int dbi, int remaining) { - return dev->data_off + block * DATA_BLOCK_SIZE + + return dev->data_off + dbi * DATA_BLOCK_SIZE + DATA_BLOCK_SIZE - remaining; } @@ -260,37 +406,45 @@ static inline size_t iov_tail(struct tcmu_dev *udev, struct iovec *iov) return (size_t)iov->iov_base + iov->iov_len; } -static void alloc_and_scatter_data_area(struct tcmu_dev *udev, - struct scatterlist *data_sg, unsigned int data_nents, - struct iovec **iov, int *iov_cnt, bool copy_data) +static int scatter_data_area(struct tcmu_dev *udev, + struct tcmu_cmd *tcmu_cmd, struct scatterlist *data_sg, + unsigned int data_nents, struct iovec **iov, + int *iov_cnt, bool copy_data) { - int i, block; + int i, dbi; int block_remaining = 0; - void *from, *to; - size_t copy_bytes, to_offset; + void *from, *to = NULL; + size_t copy_bytes, to_offset, offset; struct scatterlist *sg; + struct page *page; for_each_sg(data_sg, sg, data_nents, i) { int sg_remaining = sg->length; from = kmap_atomic(sg_page(sg)) + sg->offset; while (sg_remaining > 0) { if (block_remaining == 0) { - block = find_first_zero_bit(udev->data_bitmap, - DATA_BLOCK_BITS); + if (to) + kunmap_atomic(to); + block_remaining = DATA_BLOCK_SIZE; - set_bit(block, udev->data_bitmap); + dbi = tcmu_cmd_get_dbi(tcmu_cmd); + page = tcmu_get_block_page(udev, dbi); + to = kmap_atomic(page); } + copy_bytes = min_t(size_t, sg_remaining, block_remaining); - to_offset = get_block_offset(udev, block, + to_offset = get_block_offset_user(udev, dbi, block_remaining); - to = (void *)udev->mb_addr + to_offset; + offset = DATA_BLOCK_SIZE - block_remaining; + to = (void *)(unsigned long)to + offset; + if (*iov_cnt != 0 && to_offset == iov_tail(udev, *iov)) { (*iov)->iov_len += copy_bytes; } else { new_iov(iov, iov_cnt, udev); - (*iov)->iov_base = (void __user *) to_offset; + (*iov)->iov_base = (void __user *)to_offset; (*iov)->iov_len = copy_bytes; } if (copy_data) { @@ -303,33 +457,29 @@ static void alloc_and_scatter_data_area(struct tcmu_dev *udev, } kunmap_atomic(from - sg->offset); } -} + if (to) + kunmap_atomic(to); -static void free_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd) -{ - bitmap_xor(udev->data_bitmap, udev->data_bitmap, cmd->data_bitmap, - DATA_BLOCK_BITS); + return 0; } static void gather_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd, bool bidi) { struct se_cmd *se_cmd = cmd->se_cmd; - int i, block; + int i, dbi; int block_remaining = 0; - void *from, *to; - size_t copy_bytes, from_offset; + void *from = NULL, *to; + size_t copy_bytes, offset; struct scatterlist *sg, *data_sg; + struct page *page; unsigned int data_nents; - DECLARE_BITMAP(bitmap, DATA_BLOCK_BITS); - - bitmap_copy(bitmap, cmd->data_bitmap, DATA_BLOCK_BITS); + uint32_t count = 0; if (!bidi) { data_sg = se_cmd->t_data_sg; data_nents = se_cmd->t_data_nents; } else { - uint32_t count; /* * For bidi case, the first count blocks are for Data-Out @@ -337,30 +487,30 @@ static void gather_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd, * the Data-Out buffer blocks should be discarded. */ count = DIV_ROUND_UP(se_cmd->data_length, DATA_BLOCK_SIZE); - while (count--) { - block = find_first_bit(bitmap, DATA_BLOCK_BITS); - clear_bit(block, bitmap); - } data_sg = se_cmd->t_bidi_data_sg; data_nents = se_cmd->t_bidi_data_nents; } + tcmu_cmd_set_dbi_cur(cmd, count); + for_each_sg(data_sg, sg, data_nents, i) { int sg_remaining = sg->length; to = kmap_atomic(sg_page(sg)) + sg->offset; while (sg_remaining > 0) { if (block_remaining == 0) { - block = find_first_bit(bitmap, - DATA_BLOCK_BITS); + if (from) + kunmap_atomic(from); + block_remaining = DATA_BLOCK_SIZE; - clear_bit(block, bitmap); + dbi = tcmu_cmd_get_dbi(cmd); + page = tcmu_get_block_page(udev, dbi); + from = kmap_atomic(page); } copy_bytes = min_t(size_t, sg_remaining, block_remaining); - from_offset = get_block_offset(udev, block, - block_remaining); - from = (void *) udev->mb_addr + from_offset; + offset = DATA_BLOCK_SIZE - block_remaining; + from = (void *)(unsigned long)from + offset; tcmu_flush_dcache_range(from, copy_bytes); memcpy(to + sg->length - sg_remaining, from, copy_bytes); @@ -370,12 +520,13 @@ static void gather_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd, } kunmap_atomic(to - sg->offset); } + if (from) + kunmap_atomic(from); } -static inline size_t spc_bitmap_free(unsigned long *bitmap) +static inline size_t spc_bitmap_free(unsigned long *bitmap, uint32_t thresh) { - return DATA_BLOCK_SIZE * (DATA_BLOCK_BITS - - bitmap_weight(bitmap, DATA_BLOCK_BITS)); + return DATA_BLOCK_SIZE * (thresh - bitmap_weight(bitmap, thresh)); } /* @@ -384,9 +535,12 @@ static inline size_t spc_bitmap_free(unsigned long *bitmap) * * Called with ring lock held. */ -static bool is_ring_space_avail(struct tcmu_dev *udev, size_t cmd_size, size_t data_needed) +static bool is_ring_space_avail(struct tcmu_dev *udev, struct tcmu_cmd *cmd, + size_t cmd_size, size_t data_needed) { struct tcmu_mailbox *mb = udev->mb_addr; + uint32_t blocks_needed = (data_needed + DATA_BLOCK_SIZE - 1) + / DATA_BLOCK_SIZE; size_t space, cmd_needed; u32 cmd_head; @@ -410,35 +564,63 @@ static bool is_ring_space_avail(struct tcmu_dev *udev, size_t cmd_size, size_t d return false; } - space = spc_bitmap_free(udev->data_bitmap); + /* try to check and get the data blocks as needed */ + space = spc_bitmap_free(udev->data_bitmap, udev->dbi_thresh); if (space < data_needed) { - pr_debug("no data space: only %zu available, but ask for %zu\n", - space, data_needed); - return false; + unsigned long blocks_left = DATA_BLOCK_BITS - udev->dbi_thresh; + unsigned long grow; + + if (blocks_left < blocks_needed) { + pr_debug("no data space: only %lu available, but ask for %zu\n", + blocks_left * DATA_BLOCK_SIZE, + data_needed); + return false; + } + + /* Try to expand the thresh */ + if (!udev->dbi_thresh) { + /* From idle state */ + uint32_t init_thresh = DATA_BLOCK_INIT_BITS; + + udev->dbi_thresh = max(blocks_needed, init_thresh); + } else { + /* + * Grow the data area by max(blocks needed, + * dbi_thresh / 2), but limited to the max + * DATA_BLOCK_BITS size. + */ + grow = max(blocks_needed, udev->dbi_thresh / 2); + udev->dbi_thresh += grow; + if (udev->dbi_thresh > DATA_BLOCK_BITS) + udev->dbi_thresh = DATA_BLOCK_BITS; + } } + if (!tcmu_get_empty_blocks(udev, cmd)) + return false; + return true; } -static inline size_t tcmu_cmd_get_data_length(struct tcmu_cmd *tcmu_cmd) +static inline size_t tcmu_cmd_get_base_cmd_size(size_t iov_cnt) { - struct se_cmd *se_cmd = tcmu_cmd->se_cmd; - size_t data_length = round_up(se_cmd->data_length, DATA_BLOCK_SIZE); - - if (se_cmd->se_cmd_flags & SCF_BIDI) { - BUG_ON(!(se_cmd->t_bidi_data_sg && se_cmd->t_bidi_data_nents)); - data_length += round_up(se_cmd->t_bidi_data_sg->length, - DATA_BLOCK_SIZE); - } - - return data_length; + return max(offsetof(struct tcmu_cmd_entry, req.iov[iov_cnt]), + sizeof(struct tcmu_cmd_entry)); } -static inline uint32_t tcmu_cmd_get_block_cnt(struct tcmu_cmd *tcmu_cmd) +static inline size_t tcmu_cmd_get_cmd_size(struct tcmu_cmd *tcmu_cmd, + size_t base_command_size) { - size_t data_length = tcmu_cmd_get_data_length(tcmu_cmd); + struct se_cmd *se_cmd = tcmu_cmd->se_cmd; + size_t command_size; - return data_length / DATA_BLOCK_SIZE; + command_size = base_command_size + + round_up(scsi_command_size(se_cmd->t_task_cdb), + TCMU_OP_ALIGN_SIZE); + + WARN_ON(command_size & (TCMU_OP_ALIGN_SIZE-1)); + + return command_size; } static sense_reason_t @@ -450,12 +632,11 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) struct tcmu_mailbox *mb; struct tcmu_cmd_entry *entry; struct iovec *iov; - int iov_cnt; + int iov_cnt, ret; uint32_t cmd_head; uint64_t cdb_off; bool copy_to_data_area; size_t data_length = tcmu_cmd_get_data_length(tcmu_cmd); - DECLARE_BITMAP(old_bitmap, DATA_BLOCK_BITS); if (test_bit(TCMU_DEV_BIT_BROKEN, &udev->flags)) return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; @@ -464,18 +645,18 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) * Must be a certain minimum size for response sense info, but * also may be larger if the iov array is large. * - * We prepare way too many iovs for potential uses here, because it's - * expensive to tell how many regions are freed in the bitmap - */ - base_command_size = max(offsetof(struct tcmu_cmd_entry, - req.iov[tcmu_cmd_get_block_cnt(tcmu_cmd)]), - sizeof(struct tcmu_cmd_entry)); - command_size = base_command_size - + round_up(scsi_command_size(se_cmd->t_task_cdb), TCMU_OP_ALIGN_SIZE); - - WARN_ON(command_size & (TCMU_OP_ALIGN_SIZE-1)); + * We prepare as many iovs as possbile for potential uses here, + * because it's expensive to tell how many regions are freed in + * the bitmap & global data pool, as the size calculated here + * will only be used to do the checks. + * + * The size will be recalculated later as actually needed to save + * cmd area memories. + */ + base_command_size = tcmu_cmd_get_base_cmd_size(tcmu_cmd->dbi_cnt); + command_size = tcmu_cmd_get_cmd_size(tcmu_cmd, base_command_size); - spin_lock_irq(&udev->cmdr_lock); + mutex_lock(&udev->cmdr_lock); mb = udev->mb_addr; cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */ @@ -484,18 +665,18 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) pr_warn("TCMU: Request of size %zu/%zu is too big for %u/%zu " "cmd ring/data area\n", command_size, data_length, udev->cmdr_size, udev->data_size); - spin_unlock_irq(&udev->cmdr_lock); + mutex_unlock(&udev->cmdr_lock); return TCM_INVALID_CDB_FIELD; } - while (!is_ring_space_avail(udev, command_size, data_length)) { + while (!is_ring_space_avail(udev, tcmu_cmd, command_size, data_length)) { int ret; DEFINE_WAIT(__wait); prepare_to_wait(&udev->wait_cmdr, &__wait, TASK_INTERRUPTIBLE); pr_debug("sleeping for ring space\n"); - spin_unlock_irq(&udev->cmdr_lock); + mutex_unlock(&udev->cmdr_lock); if (udev->cmd_time_out) ret = schedule_timeout( msecs_to_jiffies(udev->cmd_time_out)); @@ -507,7 +688,7 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; } - spin_lock_irq(&udev->cmdr_lock); + mutex_lock(&udev->cmdr_lock); /* We dropped cmdr_lock, cmd_head is stale */ cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */ @@ -534,20 +715,26 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) entry = (void *) mb + CMDR_OFF + cmd_head; tcmu_flush_dcache_range(entry, sizeof(*entry)); tcmu_hdr_set_op(&entry->hdr.len_op, TCMU_OP_CMD); - tcmu_hdr_set_len(&entry->hdr.len_op, command_size); entry->hdr.cmd_id = tcmu_cmd->cmd_id; entry->hdr.kflags = 0; entry->hdr.uflags = 0; - bitmap_copy(old_bitmap, udev->data_bitmap, DATA_BLOCK_BITS); - /* Handle allocating space from the data area */ + tcmu_cmd_reset_dbi_cur(tcmu_cmd); iov = &entry->req.iov[0]; iov_cnt = 0; copy_to_data_area = (se_cmd->data_direction == DMA_TO_DEVICE || se_cmd->se_cmd_flags & SCF_BIDI); - alloc_and_scatter_data_area(udev, se_cmd->t_data_sg, - se_cmd->t_data_nents, &iov, &iov_cnt, copy_to_data_area); + ret = scatter_data_area(udev, tcmu_cmd, se_cmd->t_data_sg, + se_cmd->t_data_nents, &iov, &iov_cnt, + copy_to_data_area); + if (ret) { + tcmu_cmd_free_data(tcmu_cmd, tcmu_cmd->dbi_cnt); + mutex_unlock(&udev->cmdr_lock); + + pr_err("tcmu: alloc and scatter data failed\n"); + return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; + } entry->req.iov_cnt = iov_cnt; entry->req.iov_dif_cnt = 0; @@ -555,14 +742,29 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) if (se_cmd->se_cmd_flags & SCF_BIDI) { iov_cnt = 0; iov++; - alloc_and_scatter_data_area(udev, se_cmd->t_bidi_data_sg, - se_cmd->t_bidi_data_nents, &iov, &iov_cnt, - false); + ret = scatter_data_area(udev, tcmu_cmd, + se_cmd->t_bidi_data_sg, + se_cmd->t_bidi_data_nents, + &iov, &iov_cnt, false); + if (ret) { + tcmu_cmd_free_data(tcmu_cmd, tcmu_cmd->dbi_cnt); + mutex_unlock(&udev->cmdr_lock); + + pr_err("tcmu: alloc and scatter bidi data failed\n"); + return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; + } entry->req.iov_bidi_cnt = iov_cnt; } - /* cmd's data_bitmap is what changed in process */ - bitmap_xor(tcmu_cmd->data_bitmap, old_bitmap, udev->data_bitmap, - DATA_BLOCK_BITS); + + /* + * Recalaulate the command's base size and size according + * to the actual needs + */ + base_command_size = tcmu_cmd_get_base_cmd_size(entry->req.iov_cnt + + entry->req.iov_bidi_cnt); + command_size = tcmu_cmd_get_cmd_size(tcmu_cmd, base_command_size); + + tcmu_hdr_set_len(&entry->hdr.len_op, command_size); /* All offsets relative to mb_addr, not start of entry! */ cdb_off = CMDR_OFF + cmd_head + base_command_size; @@ -572,8 +774,7 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) UPDATE_HEAD(mb->cmd_head, command_size, udev->cmdr_size); tcmu_flush_dcache_range(mb, sizeof(*mb)); - - spin_unlock_irq(&udev->cmdr_lock); + mutex_unlock(&udev->cmdr_lock); /* TODO: only if FLUSH and FUA? */ uio_event_notify(&udev->uio_info); @@ -604,7 +805,7 @@ tcmu_queue_cmd(struct se_cmd *se_cmd) idr_remove(&udev->commands, tcmu_cmd->cmd_id); spin_unlock_irq(&udev->commands_lock); - kmem_cache_free(tcmu_cmd_cache, tcmu_cmd); + tcmu_free_cmd(tcmu_cmd); } return ret; @@ -615,50 +816,45 @@ static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry * struct se_cmd *se_cmd = cmd->se_cmd; struct tcmu_dev *udev = cmd->tcmu_dev; - if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) { - /* - * cmd has been completed already from timeout, just reclaim - * data area space and free cmd - */ - free_data_area(udev, cmd); + /* + * cmd has been completed already from timeout, just reclaim + * data area space and free cmd + */ + if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) + goto out; - kmem_cache_free(tcmu_cmd_cache, cmd); - return; - } + tcmu_cmd_reset_dbi_cur(cmd); if (entry->hdr.uflags & TCMU_UFLAG_UNKNOWN_OP) { - free_data_area(udev, cmd); pr_warn("TCMU: Userspace set UNKNOWN_OP flag on se_cmd %p\n", cmd->se_cmd); entry->rsp.scsi_status = SAM_STAT_CHECK_CONDITION; } else if (entry->rsp.scsi_status == SAM_STAT_CHECK_CONDITION) { memcpy(se_cmd->sense_buffer, entry->rsp.sense_buffer, se_cmd->scsi_sense_length); - free_data_area(udev, cmd); } else if (se_cmd->se_cmd_flags & SCF_BIDI) { /* Get Data-In buffer before clean up */ gather_data_area(udev, cmd, true); - free_data_area(udev, cmd); } else if (se_cmd->data_direction == DMA_FROM_DEVICE) { gather_data_area(udev, cmd, false); - free_data_area(udev, cmd); } else if (se_cmd->data_direction == DMA_TO_DEVICE) { - free_data_area(udev, cmd); + /* TODO: */ } else if (se_cmd->data_direction != DMA_NONE) { pr_warn("TCMU: data direction was %d!\n", se_cmd->data_direction); } target_complete_cmd(cmd->se_cmd, entry->rsp.scsi_status); - cmd->se_cmd = NULL; - kmem_cache_free(tcmu_cmd_cache, cmd); +out: + cmd->se_cmd = NULL; + tcmu_cmd_free_data(cmd, cmd->dbi_cnt); + tcmu_free_cmd(cmd); } static unsigned int tcmu_handle_completions(struct tcmu_dev *udev) { struct tcmu_mailbox *mb; - unsigned long flags; int handled = 0; if (test_bit(TCMU_DEV_BIT_BROKEN, &udev->flags)) { @@ -666,8 +862,6 @@ static unsigned int tcmu_handle_completions(struct tcmu_dev *udev) return 0; } - spin_lock_irqsave(&udev->cmdr_lock, flags); - mb = udev->mb_addr; tcmu_flush_dcache_range(mb, sizeof(*mb)); @@ -708,8 +902,6 @@ static unsigned int tcmu_handle_completions(struct tcmu_dev *udev) if (mb->cmd_tail == mb->cmd_head) del_timer(&udev->timeout); /* no more pending cmds */ - spin_unlock_irqrestore(&udev->cmdr_lock, flags); - wake_up(&udev->wait_cmdr); return handled; @@ -736,16 +928,14 @@ static void tcmu_device_timedout(unsigned long data) { struct tcmu_dev *udev = (struct tcmu_dev *)data; unsigned long flags; - int handled; - - handled = tcmu_handle_completions(udev); - - pr_warn("%d completions handled from timeout\n", handled); spin_lock_irqsave(&udev->commands_lock, flags); idr_for_each(&udev->commands, tcmu_check_expired_cmd, NULL); spin_unlock_irqrestore(&udev->commands_lock, flags); + /* Try to wake up the ummap thread */ + wake_up(&unmap_wait); + /* * We don't need to wakeup threads on wait_cmdr since they have their * own timeout. @@ -790,7 +980,7 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name) udev->cmd_time_out = TCMU_TIME_OUT; init_waitqueue_head(&udev->wait_cmdr); - spin_lock_init(&udev->cmdr_lock); + mutex_init(&udev->cmdr_lock); idr_init(&udev->commands); spin_lock_init(&udev->commands_lock); @@ -805,7 +995,9 @@ static int tcmu_irqcontrol(struct uio_info *info, s32 irq_on) { struct tcmu_dev *tcmu_dev = container_of(info, struct tcmu_dev, uio_info); + mutex_lock(&tcmu_dev->cmdr_lock); tcmu_handle_completions(tcmu_dev); + mutex_unlock(&tcmu_dev->cmdr_lock); return 0; } @@ -827,6 +1019,60 @@ static int tcmu_find_mem_index(struct vm_area_struct *vma) return -1; } +static struct page *tcmu_try_get_block_page(struct tcmu_dev *udev, uint32_t dbi) +{ + struct page *page; + int ret; + + mutex_lock(&udev->cmdr_lock); + page = tcmu_get_block_page(udev, dbi); + if (likely(page)) { + mutex_unlock(&udev->cmdr_lock); + return page; + } + + /* + * Normally it shouldn't be here: + * Only when the userspace has touched the blocks which + * are out of the tcmu_cmd's data iov[], and will return + * one zeroed page. + */ + pr_warn("Block(%u) out of cmd's iov[] has been touched!\n", dbi); + pr_warn("Mostly it will be a bug of userspace, please have a check!\n"); + + if (dbi >= udev->dbi_thresh) { + /* Extern the udev->dbi_thresh to dbi + 1 */ + udev->dbi_thresh = dbi + 1; + udev->dbi_max = dbi; + } + + page = radix_tree_lookup(&udev->data_blocks, dbi); + if (!page) { + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!page) { + mutex_unlock(&udev->cmdr_lock); + return NULL; + } + + ret = radix_tree_insert(&udev->data_blocks, dbi, page); + if (ret) { + mutex_unlock(&udev->cmdr_lock); + __free_page(page); + return NULL; + } + + /* + * Since this case is rare in page fault routine, here we + * will allow the global_db_count >= TCMU_GLOBAL_MAX_BLOCKS + * to reduce possible page fault call trace. + */ + atomic_inc(&global_db_count); + } + mutex_unlock(&udev->cmdr_lock); + + return page; +} + static int tcmu_vma_fault(struct vm_fault *vmf) { struct tcmu_dev *udev = vmf->vma->vm_private_data; @@ -845,11 +1091,20 @@ static int tcmu_vma_fault(struct vm_fault *vmf) */ offset = (vmf->pgoff - mi) << PAGE_SHIFT; - addr = (void *)(unsigned long)info->mem[mi].addr + offset; - if (info->mem[mi].memtype == UIO_MEM_LOGICAL) - page = virt_to_page(addr); - else + if (offset < udev->data_off) { + /* For the vmalloc()ed cmd area pages */ + addr = (void *)(unsigned long)info->mem[mi].addr + offset; page = vmalloc_to_page(addr); + } else { + uint32_t dbi; + + /* For the dynamically growing data area pages */ + dbi = (offset - udev->data_off) / DATA_BLOCK_SIZE; + page = tcmu_try_get_block_page(udev, dbi); + if (!page) + return VM_FAULT_NOPAGE; + } + get_page(page); vmf->page = page; return 0; @@ -883,6 +1138,8 @@ static int tcmu_open(struct uio_info *info, struct inode *inode) if (test_and_set_bit(TCMU_DEV_BIT_OPEN, &udev->flags)) return -EBUSY; + udev->inode = inode; + pr_debug("open\n"); return 0; @@ -963,7 +1220,7 @@ static int tcmu_configure_device(struct se_device *dev) info->name = str; - udev->mb_addr = vzalloc(TCMU_RING_SIZE); + udev->mb_addr = vzalloc(CMDR_SIZE); if (!udev->mb_addr) { ret = -ENOMEM; goto err_vzalloc; @@ -972,8 +1229,11 @@ static int tcmu_configure_device(struct se_device *dev) /* mailbox fits in first part of CMDR space */ udev->cmdr_size = CMDR_SIZE - CMDR_OFF; udev->data_off = CMDR_SIZE; - udev->data_size = TCMU_RING_SIZE - CMDR_SIZE; + udev->data_size = DATA_SIZE; + udev->dbi_thresh = 0; /* Default in Idle state */ + udev->waiting_global = false; + /* Initialise the mailbox of the ring buffer */ mb = udev->mb_addr; mb->version = TCMU_MAILBOX_VERSION; mb->flags = TCMU_MAILBOX_FLAG_CAP_OOOC; @@ -984,12 +1244,14 @@ static int tcmu_configure_device(struct se_device *dev) WARN_ON(udev->data_size % PAGE_SIZE); WARN_ON(udev->data_size % DATA_BLOCK_SIZE); + INIT_RADIX_TREE(&udev->data_blocks, GFP_KERNEL); + info->version = __stringify(TCMU_MAILBOX_VERSION); info->mem[0].name = "tcm-user command & data buffer"; info->mem[0].addr = (phys_addr_t)(uintptr_t)udev->mb_addr; info->mem[0].size = TCMU_RING_SIZE; - info->mem[0].memtype = UIO_MEM_VIRTUAL; + info->mem[0].memtype = UIO_MEM_NONE; info->irqcontrol = tcmu_irqcontrol; info->irq = UIO_IRQ_CUSTOM; @@ -1015,6 +1277,10 @@ static int tcmu_configure_device(struct se_device *dev) if (ret) goto err_netlink; + mutex_lock(&root_udev_mutex); + list_add(&udev->node, &root_udev); + mutex_unlock(&root_udev_mutex); + return 0; err_netlink: @@ -1049,6 +1315,23 @@ static bool tcmu_dev_configured(struct tcmu_dev *udev) return udev->uio_info.uio_dev ? true : false; } +static void tcmu_blocks_release(struct tcmu_dev *udev) +{ + int i; + struct page *page; + + /* Try to release all block pages */ + mutex_lock(&udev->cmdr_lock); + for (i = 0; i <= udev->dbi_max; i++) { + page = radix_tree_delete(&udev->data_blocks, i); + if (page) { + __free_page(page); + atomic_dec(&global_db_count); + } + } + mutex_unlock(&udev->cmdr_lock); +} + static void tcmu_free_device(struct se_device *dev) { struct tcmu_dev *udev = TCMU_DEV(dev); @@ -1058,6 +1341,10 @@ static void tcmu_free_device(struct se_device *dev) del_timer_sync(&udev->timeout); + mutex_lock(&root_udev_mutex); + list_del(&udev->node); + mutex_unlock(&root_udev_mutex); + vfree(udev->mb_addr); /* Upper layer should drain all requests before calling this */ @@ -1070,6 +1357,8 @@ static void tcmu_free_device(struct se_device *dev) spin_unlock_irq(&udev->commands_lock); WARN_ON(!all_expired); + tcmu_blocks_release(udev); + if (tcmu_dev_configured(udev)) { tcmu_netlink_event(TCMU_CMD_REMOVED_DEVICE, udev->uio_info.name, udev->uio_info.uio_dev->minor); @@ -1256,6 +1545,84 @@ static struct target_backend_ops tcmu_ops = { .tb_dev_attrib_attrs = NULL, }; +static int unmap_thread_fn(void *data) +{ + struct tcmu_dev *udev; + loff_t off; + uint32_t start, end, block; + struct page *page; + int i; + + while (1) { + DEFINE_WAIT(__wait); + + prepare_to_wait(&unmap_wait, &__wait, TASK_INTERRUPTIBLE); + schedule(); + finish_wait(&unmap_wait, &__wait); + + if (kthread_should_stop()) + break; + + mutex_lock(&root_udev_mutex); + list_for_each_entry(udev, &root_udev, node) { + mutex_lock(&udev->cmdr_lock); + + /* Try to complete the finished commands first */ + tcmu_handle_completions(udev); + + /* Skip the udevs waiting the global pool or in idle */ + if (udev->waiting_global || !udev->dbi_thresh) { + mutex_unlock(&udev->cmdr_lock); + continue; + } + + end = udev->dbi_max + 1; + block = find_last_bit(udev->data_bitmap, end); + if (block == udev->dbi_max) { + /* + * The last bit is dbi_max, so there is + * no need to shrink any blocks. + */ + mutex_unlock(&udev->cmdr_lock); + continue; + } else if (block == end) { + /* The current udev will goto idle state */ + udev->dbi_thresh = start = 0; + udev->dbi_max = 0; + } else { + udev->dbi_thresh = start = block + 1; + udev->dbi_max = block; + } + + /* Here will truncate the data area from off */ + off = udev->data_off + start * DATA_BLOCK_SIZE; + unmap_mapping_range(udev->inode->i_mapping, off, 0, 1); + + /* Release the block pages */ + for (i = start; i < end; i++) { + page = radix_tree_delete(&udev->data_blocks, i); + if (page) { + __free_page(page); + atomic_dec(&global_db_count); + } + } + mutex_unlock(&udev->cmdr_lock); + } + + /* + * Try to wake up the udevs who are waiting + * for the global data pool. + */ + list_for_each_entry(udev, &root_udev, node) { + if (udev->waiting_global) + wake_up(&udev->wait_cmdr); + } + mutex_unlock(&root_udev_mutex); + } + + return 0; +} + static int __init tcmu_module_init(void) { int ret, i, len = 0; @@ -1301,8 +1668,17 @@ static int __init tcmu_module_init(void) if (ret) goto out_attrs; + init_waitqueue_head(&unmap_wait); + unmap_thread = kthread_run(unmap_thread_fn, NULL, "tcmu_unmap"); + if (IS_ERR(unmap_thread)) { + ret = PTR_ERR(unmap_thread); + goto out_unreg_transport; + } + return 0; +out_unreg_transport: + target_backend_unregister(&tcmu_ops); out_attrs: kfree(tcmu_attrs); out_unreg_genl: @@ -1317,6 +1693,7 @@ out_free_cache: static void __exit tcmu_module_exit(void) { + kthread_stop(unmap_thread); target_backend_unregister(&tcmu_ops); kfree(tcmu_attrs); genl_unregister_family(&tcmu_genl_family); diff --git a/include/scsi/scsi_proto.h b/include/scsi/scsi_proto.h index 6ba66e01f6df..ce78ec8e367d 100644 --- a/include/scsi/scsi_proto.h +++ b/include/scsi/scsi_proto.h @@ -112,6 +112,7 @@ #define WRITE_16 0x8a #define READ_ATTRIBUTE 0x8c #define WRITE_ATTRIBUTE 0x8d +#define WRITE_VERIFY_16 0x8e #define VERIFY_16 0x8f #define SYNCHRONIZE_CACHE_16 0x91 #define WRITE_SAME_16 0x93 diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index 1b0f447ce850..e475531565fd 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -10,6 +10,7 @@ * backend module. */ #define TRANSPORT_FLAG_PASSTHROUGH_ALUA 0x2 +#define TRANSPORT_FLAG_PASSTHROUGH_PGR 0x4 struct request_queue; struct scatterlist; diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index ccfad0e9c2cd..0c1dce2ac6f0 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -664,6 +664,7 @@ struct se_dev_attrib { int pi_prot_format; enum target_prot_type pi_prot_type; enum target_prot_type hw_pi_prot_type; + int pi_prot_verify; int enforce_pr_isids; int force_pr_aptpl; int is_nonrot; |