vhost/scsi: Add ANY_LAYOUT support in vhost_scsi_handle_vq
This patch adds ANY_LAYOUT compatible support within the existing vhost_scsi_handle_vq() ->handle_kick() callback. It calculates data_direction + exp_data_len for the new tcm_vhost_cmd descriptor by walking both outgoing + incoming iovecs using iov_iter, assuming the layout of outgoing request header + T10_PI + Data payload comes first. It also uses copy_from_iter() to copy leading virtio-scsi request header that may or may not include SCSI CDB, that returns a re-calculated iovec to start of T10_PI or Data SGL memory. Also, go ahead and drop the legacy pre virtio v1.0 !ANY_LAYOUT logic. Cc: Michael S. Tsirkin <mst@redhat.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
This commit is contained in:
		
							parent
							
								
									e8de56b5e7
								
							
						
					
					
						commit
						09b13fa8c1
					
				| @ -827,93 +827,6 @@ out: | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| static int | ||||
| vhost_scsi_map_iov_to_sgl(struct tcm_vhost_cmd *cmd, | ||||
| 			  struct iovec *iov, | ||||
| 			  int niov, | ||||
| 			  bool write) | ||||
| { | ||||
| 	struct scatterlist *sg = cmd->tvc_sgl; | ||||
| 	unsigned int sgl_count = 0; | ||||
| 	int ret, i; | ||||
| 
 | ||||
| 	for (i = 0; i < niov; i++) | ||||
| 		sgl_count += iov_num_pages(iov[i].iov_base, iov[i].iov_len); | ||||
| 
 | ||||
| 	if (sgl_count > TCM_VHOST_PREALLOC_SGLS) { | ||||
| 		pr_err("vhost_scsi_map_iov_to_sgl() sgl_count: %u greater than" | ||||
| 			" preallocated TCM_VHOST_PREALLOC_SGLS: %u\n", | ||||
| 			sgl_count, TCM_VHOST_PREALLOC_SGLS); | ||||
| 		return -ENOBUFS; | ||||
| 	} | ||||
| 
 | ||||
| 	pr_debug("%s sg %p sgl_count %u\n", __func__, sg, sgl_count); | ||||
| 	sg_init_table(sg, sgl_count); | ||||
| 	cmd->tvc_sgl_count = sgl_count; | ||||
| 
 | ||||
| 	pr_debug("Mapping iovec %p for %u pages\n", &iov[0], sgl_count); | ||||
| 
 | ||||
| 	for (i = 0; i < niov; i++) { | ||||
| 		ret = vhost_scsi_map_to_sgl(cmd, iov[i].iov_base, iov[i].iov_len, | ||||
| 					    sg, write); | ||||
| 		if (ret < 0) { | ||||
| 			for (i = 0; i < cmd->tvc_sgl_count; i++) { | ||||
| 				struct page *page = sg_page(&cmd->tvc_sgl[i]); | ||||
| 				if (page) | ||||
| 					put_page(page); | ||||
| 			} | ||||
| 			cmd->tvc_sgl_count = 0; | ||||
| 			return ret; | ||||
| 		} | ||||
| 		sg += ret; | ||||
| 		sgl_count -= ret; | ||||
| 	} | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static int | ||||
| vhost_scsi_map_iov_to_prot(struct tcm_vhost_cmd *cmd, | ||||
| 			   struct iovec *iov, | ||||
| 			   int niov, | ||||
| 			   bool write) | ||||
| { | ||||
| 	struct scatterlist *prot_sg = cmd->tvc_prot_sgl; | ||||
| 	unsigned int prot_sgl_count = 0; | ||||
| 	int ret, i; | ||||
| 
 | ||||
| 	for (i = 0; i < niov; i++) | ||||
| 		prot_sgl_count += iov_num_pages(iov[i].iov_base, iov[i].iov_len); | ||||
| 
 | ||||
| 	if (prot_sgl_count > TCM_VHOST_PREALLOC_PROT_SGLS) { | ||||
| 		pr_err("vhost_scsi_map_iov_to_prot() sgl_count: %u greater than" | ||||
| 			" preallocated TCM_VHOST_PREALLOC_PROT_SGLS: %u\n", | ||||
| 			prot_sgl_count, TCM_VHOST_PREALLOC_PROT_SGLS); | ||||
| 		return -ENOBUFS; | ||||
| 	} | ||||
| 
 | ||||
| 	pr_debug("%s prot_sg %p prot_sgl_count %u\n", __func__, | ||||
| 		 prot_sg, prot_sgl_count); | ||||
| 	sg_init_table(prot_sg, prot_sgl_count); | ||||
| 	cmd->tvc_prot_sgl_count = prot_sgl_count; | ||||
| 
 | ||||
| 	for (i = 0; i < niov; i++) { | ||||
| 		ret = vhost_scsi_map_to_sgl(cmd, iov[i].iov_base, iov[i].iov_len, | ||||
| 					    prot_sg, write); | ||||
| 		if (ret < 0) { | ||||
| 			for (i = 0; i < cmd->tvc_prot_sgl_count; i++) { | ||||
| 				struct page *page = sg_page(&cmd->tvc_prot_sgl[i]); | ||||
| 				if (page) | ||||
| 					put_page(page); | ||||
| 			} | ||||
| 			cmd->tvc_prot_sgl_count = 0; | ||||
| 			return ret; | ||||
| 		} | ||||
| 		prot_sg += ret; | ||||
| 		prot_sgl_count -= ret; | ||||
| 	} | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static int | ||||
| vhost_scsi_calc_sgls(struct iov_iter *iter, size_t bytes, int max_sgls) | ||||
| { | ||||
| @ -1064,19 +977,20 @@ vhost_scsi_send_bad_target(struct vhost_scsi *vs, | ||||
| static void | ||||
| vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) | ||||
| { | ||||
| 	struct tcm_vhost_tpg **vs_tpg; | ||||
| 	struct tcm_vhost_tpg **vs_tpg, *tpg; | ||||
| 	struct virtio_scsi_cmd_req v_req; | ||||
| 	struct virtio_scsi_cmd_req_pi v_req_pi; | ||||
| 	struct tcm_vhost_tpg *tpg; | ||||
| 	struct tcm_vhost_cmd *cmd; | ||||
| 	struct iov_iter out_iter, in_iter, prot_iter, data_iter; | ||||
| 	u64 tag; | ||||
| 	u32 exp_data_len, data_first, data_num, data_direction, prot_first; | ||||
| 	unsigned out, in, i; | ||||
| 	int head, ret, data_niov, prot_niov, prot_bytes; | ||||
| 	size_t req_size; | ||||
| 	u32 exp_data_len, data_direction; | ||||
| 	unsigned out, in; | ||||
| 	int head, ret, prot_bytes; | ||||
| 	size_t req_size, rsp_size = sizeof(struct virtio_scsi_cmd_resp); | ||||
| 	size_t out_size, in_size; | ||||
| 	u16 lun; | ||||
| 	u8 *target, *lunp, task_attr; | ||||
| 	bool hdr_pi; | ||||
| 	bool t10_pi = vhost_has_feature(vq, VIRTIO_SCSI_F_T10_PI); | ||||
| 	void *req, *cdb; | ||||
| 
 | ||||
| 	mutex_lock(&vq->mutex); | ||||
| @ -1107,117 +1021,134 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) | ||||
| 			} | ||||
| 			break; | ||||
| 		} | ||||
| 
 | ||||
| 		/* FIXME: BIDI operation */ | ||||
| 		if (out == 1 && in == 1) { | ||||
| 			data_direction = DMA_NONE; | ||||
| 			data_first = 0; | ||||
| 			data_num = 0; | ||||
| 		} else if (out == 1 && in > 1) { | ||||
| 			data_direction = DMA_FROM_DEVICE; | ||||
| 			data_first = out + 1; | ||||
| 			data_num = in - 1; | ||||
| 		} else if (out > 1 && in == 1) { | ||||
| 			data_direction = DMA_TO_DEVICE; | ||||
| 			data_first = 1; | ||||
| 			data_num = out - 1; | ||||
| 		} else { | ||||
| 			vq_err(vq, "Invalid buffer layout out: %u in: %u\n", | ||||
| 					out, in); | ||||
| 			break; | ||||
| 		} | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * Check for a sane resp buffer so we can report errors to | ||||
| 		 * the guest. | ||||
| 		 * Check for a sane response buffer so we can report early | ||||
| 		 * errors back to the guest. | ||||
| 		 */ | ||||
| 		if (unlikely(vq->iov[out].iov_len != | ||||
| 					sizeof(struct virtio_scsi_cmd_resp))) { | ||||
| 			vq_err(vq, "Expecting virtio_scsi_cmd_resp, got %zu" | ||||
| 				" bytes\n", vq->iov[out].iov_len); | ||||
| 		if (unlikely(vq->iov[out].iov_len < rsp_size)) { | ||||
| 			vq_err(vq, "Expecting at least virtio_scsi_cmd_resp" | ||||
| 				" size, got %zu bytes\n", vq->iov[out].iov_len); | ||||
| 			break; | ||||
| 		} | ||||
| 
 | ||||
| 		if (vhost_has_feature(vq, VIRTIO_SCSI_F_T10_PI)) { | ||||
| 		/*
 | ||||
| 		 * Setup pointers and values based upon different virtio-scsi | ||||
| 		 * request header if T10_PI is enabled in KVM guest. | ||||
| 		 */ | ||||
| 		if (t10_pi) { | ||||
| 			req = &v_req_pi; | ||||
| 			req_size = sizeof(v_req_pi); | ||||
| 			lunp = &v_req_pi.lun[0]; | ||||
| 			target = &v_req_pi.lun[1]; | ||||
| 			req_size = sizeof(v_req_pi); | ||||
| 			hdr_pi = true; | ||||
| 		} else { | ||||
| 			req = &v_req; | ||||
| 			req_size = sizeof(v_req); | ||||
| 			lunp = &v_req.lun[0]; | ||||
| 			target = &v_req.lun[1]; | ||||
| 			req_size = sizeof(v_req); | ||||
| 			hdr_pi = false; | ||||
| 		} | ||||
| 		/*
 | ||||
| 		 * FIXME: Not correct for BIDI operation | ||||
| 		 */ | ||||
| 		out_size = iov_length(vq->iov, out); | ||||
| 		in_size = iov_length(&vq->iov[out], in); | ||||
| 
 | ||||
| 		if (unlikely(vq->iov[0].iov_len < req_size)) { | ||||
| 			pr_err("Expecting virtio-scsi header: %zu, got %zu\n", | ||||
| 			       req_size, vq->iov[0].iov_len); | ||||
| 		/*
 | ||||
| 		 * Copy over the virtio-scsi request header, which for a | ||||
| 		 * ANY_LAYOUT enabled guest may span multiple iovecs, or a | ||||
| 		 * single iovec may contain both the header + outgoing | ||||
| 		 * WRITE payloads. | ||||
| 		 * | ||||
| 		 * copy_from_iter() will advance out_iter, so that it will | ||||
| 		 * point at the start of the outgoing WRITE payload, if | ||||
| 		 * DMA_TO_DEVICE is set. | ||||
| 		 */ | ||||
| 		iov_iter_init(&out_iter, WRITE, vq->iov, out, out_size); | ||||
| 
 | ||||
| 		ret = copy_from_iter(req, req_size, &out_iter); | ||||
| 		if (unlikely(ret != req_size)) { | ||||
| 			vq_err(vq, "Faulted on copy_from_iter\n"); | ||||
| 			vhost_scsi_send_bad_target(vs, vq, head, out); | ||||
| 			continue; | ||||
| 		} | ||||
| 		ret = memcpy_fromiovecend(req, &vq->iov[0], 0, req_size); | ||||
| 		if (unlikely(ret)) { | ||||
| 			vq_err(vq, "Faulted on virtio_scsi_cmd_req\n"); | ||||
| 			vhost_scsi_send_bad_target(vs, vq, head, out); | ||||
| 			continue; | ||||
| 		} | ||||
| 
 | ||||
| 		/* virtio-scsi spec requires byte 0 of the lun to be 1 */ | ||||
| 		if (unlikely(*lunp != 1)) { | ||||
| 			vq_err(vq, "Illegal virtio-scsi lun: %u\n", *lunp); | ||||
| 			vhost_scsi_send_bad_target(vs, vq, head, out); | ||||
| 			continue; | ||||
| 		} | ||||
| 
 | ||||
| 		tpg = ACCESS_ONCE(vs_tpg[*target]); | ||||
| 
 | ||||
| 		/* Target does not exist, fail the request */ | ||||
| 		if (unlikely(!tpg)) { | ||||
| 			/* Target does not exist, fail the request */ | ||||
| 			vhost_scsi_send_bad_target(vs, vq, head, out); | ||||
| 			continue; | ||||
| 		} | ||||
| 
 | ||||
| 		data_niov = data_num; | ||||
| 		prot_niov = prot_first = prot_bytes = 0; | ||||
| 		/*
 | ||||
| 		 * Determine if any protection information iovecs are preceeding | ||||
| 		 * the actual data payload, and adjust data_first + data_niov | ||||
| 		 * values accordingly for vhost_scsi_map_iov_to_sgl() below. | ||||
| 		 * Determine data_direction by calculating the total outgoing | ||||
| 		 * iovec sizes + incoming iovec sizes vs. virtio-scsi request + | ||||
| 		 * response headers respectively. | ||||
| 		 * | ||||
| 		 * Also extract virtio_scsi header bits for vhost_scsi_get_tag() | ||||
| 		 * For DMA_TO_DEVICE this is out_iter, which is already pointing | ||||
| 		 * to the right place. | ||||
| 		 * | ||||
| 		 * For DMA_FROM_DEVICE, the iovec will be just past the end | ||||
| 		 * of the virtio-scsi response header in either the same | ||||
| 		 * or immediately following iovec. | ||||
| 		 * | ||||
| 		 * Any associated T10_PI bytes for the outgoing / incoming | ||||
| 		 * payloads are included in calculation of exp_data_len here. | ||||
| 		 */ | ||||
| 		if (hdr_pi) { | ||||
| 		prot_bytes = 0; | ||||
| 
 | ||||
| 		if (out_size > req_size) { | ||||
| 			data_direction = DMA_TO_DEVICE; | ||||
| 			exp_data_len = out_size - req_size; | ||||
| 			data_iter = out_iter; | ||||
| 		} else if (in_size > rsp_size) { | ||||
| 			data_direction = DMA_FROM_DEVICE; | ||||
| 			exp_data_len = in_size - rsp_size; | ||||
| 
 | ||||
| 			iov_iter_init(&in_iter, READ, &vq->iov[out], in, | ||||
| 				      rsp_size + exp_data_len); | ||||
| 			iov_iter_advance(&in_iter, rsp_size); | ||||
| 			data_iter = in_iter; | ||||
| 		} else { | ||||
| 			data_direction = DMA_NONE; | ||||
| 			exp_data_len = 0; | ||||
| 		} | ||||
| 		/*
 | ||||
| 		 * If T10_PI header + payload is present, setup prot_iter values | ||||
| 		 * and recalculate data_iter for vhost_scsi_mapal() mapping to | ||||
| 		 * host scatterlists via get_user_pages_fast(). | ||||
| 		 */ | ||||
| 		if (t10_pi) { | ||||
| 			if (v_req_pi.pi_bytesout) { | ||||
| 				if (data_direction != DMA_TO_DEVICE) { | ||||
| 					vq_err(vq, "Received non zero do_pi_niov" | ||||
| 						", but wrong data_direction\n"); | ||||
| 					vq_err(vq, "Received non zero pi_bytesout," | ||||
| 						" but wrong data_direction\n"); | ||||
| 					vhost_scsi_send_bad_target(vs, vq, head, out); | ||||
| 					continue; | ||||
| 				} | ||||
| 				prot_bytes = vhost32_to_cpu(vq, v_req_pi.pi_bytesout); | ||||
| 			} else if (v_req_pi.pi_bytesin) { | ||||
| 				if (data_direction != DMA_FROM_DEVICE) { | ||||
| 					vq_err(vq, "Received non zero di_pi_niov" | ||||
| 						", but wrong data_direction\n"); | ||||
| 					vq_err(vq, "Received non zero pi_bytesin," | ||||
| 						" but wrong data_direction\n"); | ||||
| 					vhost_scsi_send_bad_target(vs, vq, head, out); | ||||
| 					continue; | ||||
| 				} | ||||
| 				prot_bytes = vhost32_to_cpu(vq, v_req_pi.pi_bytesin); | ||||
| 			} | ||||
| 			/*
 | ||||
| 			 * Set prot_iter to data_iter, and advance past any | ||||
| 			 * preceeding prot_bytes that may be present. | ||||
| 			 * | ||||
| 			 * Also fix up the exp_data_len to reflect only the | ||||
| 			 * actual data payload length. | ||||
| 			 */ | ||||
| 			if (prot_bytes) { | ||||
| 				int tmp = 0; | ||||
| 
 | ||||
| 				for (i = 0; i < data_num; i++) { | ||||
| 					tmp += vq->iov[data_first + i].iov_len; | ||||
| 					prot_niov++; | ||||
| 					if (tmp >= prot_bytes) | ||||
| 						break; | ||||
| 				} | ||||
| 				prot_first = data_first; | ||||
| 				data_first += prot_niov; | ||||
| 				data_niov = data_num - prot_niov; | ||||
| 				exp_data_len -= prot_bytes; | ||||
| 				prot_iter = data_iter; | ||||
| 				iov_iter_advance(&data_iter, prot_bytes); | ||||
| 			} | ||||
| 			tag = vhost64_to_cpu(vq, v_req_pi.tag); | ||||
| 			task_attr = v_req_pi.task_attr; | ||||
| @ -1229,12 +1160,10 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) | ||||
| 			cdb = &v_req.cdb[0]; | ||||
| 			lun = ((v_req.lun[2] << 8) | v_req.lun[3]) & 0x3FFF; | ||||
| 		} | ||||
| 		exp_data_len = 0; | ||||
| 		for (i = 0; i < data_niov; i++) | ||||
| 			exp_data_len += vq->iov[data_first + i].iov_len; | ||||
| 		/*
 | ||||
| 		 * Check that the recieved CDB size does not exceeded our | ||||
| 		 * hardcoded max for vhost-scsi | ||||
| 		 * Check that the received CDB size does not exceeded our | ||||
| 		 * hardcoded max for vhost-scsi, then get a pre-allocated | ||||
| 		 * cmd descriptor for the new virtio-scsi tag. | ||||
| 		 * | ||||
| 		 * TODO what if cdb was too small for varlen cdb header? | ||||
| 		 */ | ||||
| @ -1245,7 +1174,6 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) | ||||
| 			vhost_scsi_send_bad_target(vs, vq, head, out); | ||||
| 			continue; | ||||
| 		} | ||||
| 
 | ||||
| 		cmd = vhost_scsi_get_tag(vq, tpg, cdb, tag, lun, task_attr, | ||||
| 					 exp_data_len + prot_bytes, | ||||
| 					 data_direction); | ||||
| @ -1255,10 +1183,6 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) | ||||
| 			vhost_scsi_send_bad_target(vs, vq, head, out); | ||||
| 			continue; | ||||
| 		} | ||||
| 
 | ||||
| 		pr_debug("Allocated tv_cmd: %p exp_data_len: %d, data_direction" | ||||
| 			": %d\n", cmd, exp_data_len, data_direction); | ||||
| 
 | ||||
| 		cmd->tvc_vhost = vs; | ||||
| 		cmd->tvc_vq = vq; | ||||
| 		cmd->tvc_resp_iov = &vq->iov[out]; | ||||
| @ -1266,23 +1190,13 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) | ||||
| 
 | ||||
| 		pr_debug("vhost_scsi got command opcode: %#02x, lun: %d\n", | ||||
| 			 cmd->tvc_cdb[0], cmd->tvc_lun); | ||||
| 		pr_debug("cmd: %p exp_data_len: %d, prot_bytes: %d data_direction:" | ||||
| 			 " %d\n", cmd, exp_data_len, prot_bytes, data_direction); | ||||
| 
 | ||||
| 		if (prot_niov) { | ||||
| 			ret = vhost_scsi_map_iov_to_prot(cmd, | ||||
| 					&vq->iov[prot_first], prot_niov, | ||||
| 					data_direction == DMA_FROM_DEVICE); | ||||
| 			if (unlikely(ret)) { | ||||
| 				vq_err(vq, "Failed to map iov to" | ||||
| 					" prot_sgl\n"); | ||||
| 				tcm_vhost_release_cmd(&cmd->tvc_se_cmd); | ||||
| 				vhost_scsi_send_bad_target(vs, vq, head, out); | ||||
| 				continue; | ||||
| 			} | ||||
| 		} | ||||
| 		if (data_direction != DMA_NONE) { | ||||
| 			ret = vhost_scsi_map_iov_to_sgl(cmd, | ||||
| 					&vq->iov[data_first], data_niov, | ||||
| 					data_direction == DMA_FROM_DEVICE); | ||||
| 			ret = vhost_scsi_mapal(cmd, | ||||
| 					       prot_bytes, &prot_iter, | ||||
| 					       exp_data_len, &data_iter); | ||||
| 			if (unlikely(ret)) { | ||||
| 				vq_err(vq, "Failed to map iov to sgl\n"); | ||||
| 				tcm_vhost_release_cmd(&cmd->tvc_se_cmd); | ||||
| @ -1293,14 +1207,14 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) | ||||
| 		/*
 | ||||
| 		 * Save the descriptor from vhost_get_vq_desc() to be used to | ||||
| 		 * complete the virtio-scsi request in TCM callback context via | ||||
| 		 * tcm_vhost_queue_data_in() and tcm_vhost_queue_status() | ||||
| 		 * vhost_scsi_queue_data_in() and vhost_scsi_queue_status() | ||||
| 		 */ | ||||
| 		cmd->tvc_vq_desc = head; | ||||
| 		/*
 | ||||
| 		 * Dispatch tv_cmd descriptor for cmwq execution in process | ||||
| 		 * context provided by tcm_vhost_workqueue.  This also ensures | ||||
| 		 * tv_cmd is executed on the same kworker CPU as this vhost | ||||
| 		 * thread to gain positive L2 cache locality effects.. | ||||
| 		 * Dispatch cmd descriptor for cmwq execution in process | ||||
| 		 * context provided by vhost_scsi_workqueue.  This also ensures | ||||
| 		 * cmd is executed on the same kworker CPU as this vhost | ||||
| 		 * thread to gain positive L2 cache locality effects. | ||||
| 		 */ | ||||
| 		INIT_WORK(&cmd->work, tcm_vhost_submission_work); | ||||
| 		queue_work(tcm_vhost_workqueue, &cmd->work); | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user