mirror of
https://github.com/torvalds/linux.git
synced 2024-12-23 19:31:53 +00:00
block-5.10-2020-10-24
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAl+UQjkQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgpvN9D/9iHU7Vgi8J3SiLrHYUiDtMSI5VnEmSBo6K Ej/wbbrk4tm2UYi550krOk0dMaHxWD3XWSTlpnrE0sUfjs69G676yzxrlnPt50f5 XbcMc0YOHZfffeu9xXykUO8Q2918PTPC08eLaTK1I8lhKAuuTFCT/syGYu+prfd7 AogyuczaDok8nqJEK9QNr0iaEUbe17GQwmvpWyjHl/qfKhWvV2r6jCZZf6pzQj2c zv3kbiT3u6xw9OEuhY0sgpTEfhAHEXbNIln6Ob4qVgxmOjwgiZdU/QXyw1i2s6pc ks7e28P43r3VfNYGBfr/hQCeAJT9gOeUG5yBiQr7ooX6uNPL6GOCG7DO/g5y2thQ NkV4hub/FjYWbSmRzDlJGj1fWn4L+3r/O8g5nMr+F1L3JYeaW0hOyStqBQ4O74Cj 04tvWQ8ndXdPQrm/iDhM6KxfCvR5TC6k4fy9XPpRW8JOxauhIwTZQJyEQUnXTH3v pwv3IxRmuWGa3mrJZ5kGhsNAEGHdZCL5soLI+BXAD2MUW2IB5v2HpD/z1bvWL/51 uYiVIt/2LxgLkF7BXP40PnY0qqTsOwGxdd6wQhi5Jn9Et+JkmAAR6cVwXx4AhuQg FT5mq7ZTQBZrErQu4Mr1k3UyqBFm4MB+mbJhWrVWnUnnyA6pcr1NUsUTz5JcyrWz jWI7T1Si7w== =dFJi -----END PGP SIGNATURE----- Merge tag 'block-5.10-2020-10-24' of git://git.kernel.dk/linux-block Pull block fixes from Jens Axboe: - NVMe pull request from Christoph - rdma error handling fixes (Chao Leng) - fc error handling and reconnect fixes (James Smart) - fix the qid displace when tracing ioctl command (Keith Busch) - don't use BLK_MQ_REQ_NOWAIT for passthru (Chaitanya Kulkarni) - fix MTDT for passthru (Logan Gunthorpe) - blacklist Write Same on more devices (Kai-Heng Feng) - fix an uninitialized work struct (zhenwei pi)" - lightnvm out-of-bounds fix (Colin) - SG allocation leak fix (Doug) - rnbd fixes (Gioh, Guoqing, Jack) - zone error translation fixes (Keith) - kerneldoc markup fix (Mauro) - zram lockdep fix (Peter) - Kill unused io_context members (Yufen) - NUMA memory allocation cleanup (Xianting) - NBD config wakeup fix (Xiubo) * tag 'block-5.10-2020-10-24' of git://git.kernel.dk/linux-block: (27 commits) block: blk-mq: fix a kernel-doc markup nvme-fc: shorten reconnect delay if possible for FC nvme-fc: wait for queues to freeze before calling update_hr_hw_queues nvme-fc: fix error loop in create_hw_io_queues nvme-fc: fix io timeout to abort I/O null_blk: use zone status for max active/open nvmet: don't use BLK_MQ_REQ_NOWAIT for passthru nvmet: cleanup nvmet_passthru_map_sg() nvmet: limit passthru MTDS by BIO_MAX_PAGES nvmet: fix uninitialized work for zero kato nvme-pci: disable Write Zeroes on Sandisk Skyhawk nvme: use queuedata for nvme_req_qid nvme-rdma: fix crash due to incorrect cqe nvme-rdma: fix crash when connect rejected block: remove unused members for io_context blk-mq: remove the calling of local_memory_node() zram: Fix __zram_bvec_{read,write}() locking order skd_main: remove unused including <linux/version.h> sgl_alloc_order: fix memory leak lightnvm: fix out-of-bounds write to array devices->info[] ...
This commit is contained in:
commit
d769139081
@ -124,6 +124,10 @@ For zoned block devices (zoned attribute indicating "host-managed" or
|
||||
EXPLICIT OPEN, IMPLICIT OPEN or CLOSED, is limited by this value.
|
||||
If this value is 0, there is no limit.
|
||||
|
||||
If the host attempts to exceed this limit, the driver should report this error
|
||||
with BLK_STS_ZONE_ACTIVE_RESOURCE, which user space may see as the EOVERFLOW
|
||||
errno.
|
||||
|
||||
max_open_zones (RO)
|
||||
-------------------
|
||||
For zoned block devices (zoned attribute indicating "host-managed" or
|
||||
@ -131,6 +135,10 @@ For zoned block devices (zoned attribute indicating "host-managed" or
|
||||
EXPLICIT OPEN or IMPLICIT OPEN, is limited by this value.
|
||||
If this value is 0, there is no limit.
|
||||
|
||||
If the host attempts to exceed this limit, the driver should report this error
|
||||
with BLK_STS_ZONE_OPEN_RESOURCE, which user space may see as the ETOOMANYREFS
|
||||
errno.
|
||||
|
||||
max_sectors_kb (RW)
|
||||
-------------------
|
||||
This is the maximum number of kilobytes that the block layer will allow
|
||||
|
@ -186,6 +186,10 @@ static const struct {
|
||||
/* device mapper special case, should not leak out: */
|
||||
[BLK_STS_DM_REQUEUE] = { -EREMCHG, "dm internal retry" },
|
||||
|
||||
/* zone device specific errors */
|
||||
[BLK_STS_ZONE_OPEN_RESOURCE] = { -ETOOMANYREFS, "open zones exceeded" },
|
||||
[BLK_STS_ZONE_ACTIVE_RESOURCE] = { -EOVERFLOW, "active zones exceeded" },
|
||||
|
||||
/* everything else not covered above: */
|
||||
[BLK_STS_IOERR] = { -EIO, "I/O" },
|
||||
};
|
||||
|
@ -89,7 +89,7 @@ int blk_mq_hw_queue_to_node(struct blk_mq_queue_map *qmap, unsigned int index)
|
||||
|
||||
for_each_possible_cpu(i) {
|
||||
if (index == qmap->mq_map[i])
|
||||
return local_memory_node(cpu_to_node(i));
|
||||
return cpu_to_node(i);
|
||||
}
|
||||
|
||||
return NUMA_NO_NODE;
|
||||
|
@ -1664,7 +1664,7 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
|
||||
EXPORT_SYMBOL(blk_mq_run_hw_queue);
|
||||
|
||||
/**
|
||||
* blk_mq_run_hw_queue - Run all hardware queues in a request queue.
|
||||
* blk_mq_run_hw_queues - Run all hardware queues in a request queue.
|
||||
* @q: Pointer to the request queue to run.
|
||||
* @async: If we want to run the queue asynchronously.
|
||||
*/
|
||||
@ -2743,7 +2743,7 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
|
||||
for (j = 0; j < set->nr_maps; j++) {
|
||||
hctx = blk_mq_map_queue_type(q, j, i);
|
||||
if (nr_hw_queues > 1 && hctx->numa_node == NUMA_NO_NODE)
|
||||
hctx->numa_node = local_memory_node(cpu_to_node(i));
|
||||
hctx->numa_node = cpu_to_node(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -802,9 +802,9 @@ static void recv_work(struct work_struct *work)
|
||||
if (likely(!blk_should_fake_timeout(rq->q)))
|
||||
blk_mq_complete_request(rq);
|
||||
}
|
||||
nbd_config_put(nbd);
|
||||
atomic_dec(&config->recv_threads);
|
||||
wake_up(&config->recv_wq);
|
||||
nbd_config_put(nbd);
|
||||
kfree(args);
|
||||
}
|
||||
|
||||
|
@ -220,29 +220,34 @@ static void null_close_first_imp_zone(struct nullb_device *dev)
|
||||
}
|
||||
}
|
||||
|
||||
static bool null_can_set_active(struct nullb_device *dev)
|
||||
static blk_status_t null_check_active(struct nullb_device *dev)
|
||||
{
|
||||
if (!dev->zone_max_active)
|
||||
return true;
|
||||
return BLK_STS_OK;
|
||||
|
||||
return dev->nr_zones_exp_open + dev->nr_zones_imp_open +
|
||||
dev->nr_zones_closed < dev->zone_max_active;
|
||||
if (dev->nr_zones_exp_open + dev->nr_zones_imp_open +
|
||||
dev->nr_zones_closed < dev->zone_max_active)
|
||||
return BLK_STS_OK;
|
||||
|
||||
return BLK_STS_ZONE_ACTIVE_RESOURCE;
|
||||
}
|
||||
|
||||
static bool null_can_open(struct nullb_device *dev)
|
||||
static blk_status_t null_check_open(struct nullb_device *dev)
|
||||
{
|
||||
if (!dev->zone_max_open)
|
||||
return true;
|
||||
return BLK_STS_OK;
|
||||
|
||||
if (dev->nr_zones_exp_open + dev->nr_zones_imp_open < dev->zone_max_open)
|
||||
return true;
|
||||
return BLK_STS_OK;
|
||||
|
||||
if (dev->nr_zones_imp_open && null_can_set_active(dev)) {
|
||||
null_close_first_imp_zone(dev);
|
||||
return true;
|
||||
if (dev->nr_zones_imp_open) {
|
||||
if (null_check_active(dev) == BLK_STS_OK) {
|
||||
null_close_first_imp_zone(dev);
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
return BLK_STS_ZONE_OPEN_RESOURCE;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -258,19 +263,22 @@ static bool null_can_open(struct nullb_device *dev)
|
||||
* it is not certain that closing an implicit open zone will allow a new zone
|
||||
* to be opened, since we might already be at the active limit capacity.
|
||||
*/
|
||||
static bool null_has_zone_resources(struct nullb_device *dev, struct blk_zone *zone)
|
||||
static blk_status_t null_check_zone_resources(struct nullb_device *dev, struct blk_zone *zone)
|
||||
{
|
||||
blk_status_t ret;
|
||||
|
||||
switch (zone->cond) {
|
||||
case BLK_ZONE_COND_EMPTY:
|
||||
if (!null_can_set_active(dev))
|
||||
return false;
|
||||
ret = null_check_active(dev);
|
||||
if (ret != BLK_STS_OK)
|
||||
return ret;
|
||||
fallthrough;
|
||||
case BLK_ZONE_COND_CLOSED:
|
||||
return null_can_open(dev);
|
||||
return null_check_open(dev);
|
||||
default:
|
||||
/* Should never be called for other states */
|
||||
WARN_ON(1);
|
||||
return false;
|
||||
return BLK_STS_IOERR;
|
||||
}
|
||||
}
|
||||
|
||||
@ -293,8 +301,9 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
|
||||
return BLK_STS_IOERR;
|
||||
case BLK_ZONE_COND_EMPTY:
|
||||
case BLK_ZONE_COND_CLOSED:
|
||||
if (!null_has_zone_resources(dev, zone))
|
||||
return BLK_STS_IOERR;
|
||||
ret = null_check_zone_resources(dev, zone);
|
||||
if (ret != BLK_STS_OK)
|
||||
return ret;
|
||||
break;
|
||||
case BLK_ZONE_COND_IMP_OPEN:
|
||||
case BLK_ZONE_COND_EXP_OPEN:
|
||||
@ -349,6 +358,8 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
|
||||
|
||||
static blk_status_t null_open_zone(struct nullb_device *dev, struct blk_zone *zone)
|
||||
{
|
||||
blk_status_t ret;
|
||||
|
||||
if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
|
||||
return BLK_STS_IOERR;
|
||||
|
||||
@ -357,15 +368,17 @@ static blk_status_t null_open_zone(struct nullb_device *dev, struct blk_zone *zo
|
||||
/* open operation on exp open is not an error */
|
||||
return BLK_STS_OK;
|
||||
case BLK_ZONE_COND_EMPTY:
|
||||
if (!null_has_zone_resources(dev, zone))
|
||||
return BLK_STS_IOERR;
|
||||
ret = null_check_zone_resources(dev, zone);
|
||||
if (ret != BLK_STS_OK)
|
||||
return ret;
|
||||
break;
|
||||
case BLK_ZONE_COND_IMP_OPEN:
|
||||
dev->nr_zones_imp_open--;
|
||||
break;
|
||||
case BLK_ZONE_COND_CLOSED:
|
||||
if (!null_has_zone_resources(dev, zone))
|
||||
return BLK_STS_IOERR;
|
||||
ret = null_check_zone_resources(dev, zone);
|
||||
if (ret != BLK_STS_OK)
|
||||
return ret;
|
||||
dev->nr_zones_closed--;
|
||||
break;
|
||||
case BLK_ZONE_COND_FULL:
|
||||
@ -381,6 +394,8 @@ static blk_status_t null_open_zone(struct nullb_device *dev, struct blk_zone *zo
|
||||
|
||||
static blk_status_t null_finish_zone(struct nullb_device *dev, struct blk_zone *zone)
|
||||
{
|
||||
blk_status_t ret;
|
||||
|
||||
if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
|
||||
return BLK_STS_IOERR;
|
||||
|
||||
@ -389,8 +404,9 @@ static blk_status_t null_finish_zone(struct nullb_device *dev, struct blk_zone *
|
||||
/* finish operation on full is not an error */
|
||||
return BLK_STS_OK;
|
||||
case BLK_ZONE_COND_EMPTY:
|
||||
if (!null_has_zone_resources(dev, zone))
|
||||
return BLK_STS_IOERR;
|
||||
ret = null_check_zone_resources(dev, zone);
|
||||
if (ret != BLK_STS_OK)
|
||||
return ret;
|
||||
break;
|
||||
case BLK_ZONE_COND_IMP_OPEN:
|
||||
dev->nr_zones_imp_open--;
|
||||
@ -399,8 +415,9 @@ static blk_status_t null_finish_zone(struct nullb_device *dev, struct blk_zone *
|
||||
dev->nr_zones_exp_open--;
|
||||
break;
|
||||
case BLK_ZONE_COND_CLOSED:
|
||||
if (!null_has_zone_resources(dev, zone))
|
||||
return BLK_STS_IOERR;
|
||||
ret = null_check_zone_resources(dev, zone);
|
||||
if (ret != BLK_STS_OK)
|
||||
return ret;
|
||||
dev->nr_zones_closed--;
|
||||
break;
|
||||
default:
|
||||
|
@ -91,11 +91,6 @@ static int rnbd_clt_set_dev_attr(struct rnbd_clt_dev *dev,
|
||||
dev->max_hw_sectors = sess->max_io_size / SECTOR_SIZE;
|
||||
dev->max_segments = BMAX_SEGMENTS;
|
||||
|
||||
dev->max_hw_sectors = min_t(u32, dev->max_hw_sectors,
|
||||
le32_to_cpu(rsp->max_hw_sectors));
|
||||
dev->max_segments = min_t(u16, dev->max_segments,
|
||||
le16_to_cpu(rsp->max_segments));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -427,7 +422,7 @@ enum wait_type {
|
||||
};
|
||||
|
||||
static int send_usr_msg(struct rtrs_clt *rtrs, int dir,
|
||||
struct rnbd_iu *iu, struct kvec *vec, size_t nr,
|
||||
struct rnbd_iu *iu, struct kvec *vec,
|
||||
size_t len, struct scatterlist *sg, unsigned int sg_len,
|
||||
void (*conf)(struct work_struct *work),
|
||||
int *errno, enum wait_type wait)
|
||||
@ -441,7 +436,7 @@ static int send_usr_msg(struct rtrs_clt *rtrs, int dir,
|
||||
.conf_fn = msg_conf,
|
||||
};
|
||||
err = rtrs_clt_request(dir, &req_ops, rtrs, iu->permit,
|
||||
vec, nr, len, sg, sg_len);
|
||||
vec, 1, len, sg, sg_len);
|
||||
if (!err && wait) {
|
||||
wait_event(iu->comp.wait, iu->comp.errno != INT_MAX);
|
||||
*errno = iu->comp.errno;
|
||||
@ -486,7 +481,7 @@ static int send_msg_close(struct rnbd_clt_dev *dev, u32 device_id, bool wait)
|
||||
msg.device_id = cpu_to_le32(device_id);
|
||||
|
||||
WARN_ON(!rnbd_clt_get_dev(dev));
|
||||
err = send_usr_msg(sess->rtrs, WRITE, iu, &vec, 1, 0, NULL, 0,
|
||||
err = send_usr_msg(sess->rtrs, WRITE, iu, &vec, 0, NULL, 0,
|
||||
msg_close_conf, &errno, wait);
|
||||
if (err) {
|
||||
rnbd_clt_put_dev(dev);
|
||||
@ -575,7 +570,7 @@ static int send_msg_open(struct rnbd_clt_dev *dev, bool wait)
|
||||
|
||||
WARN_ON(!rnbd_clt_get_dev(dev));
|
||||
err = send_usr_msg(sess->rtrs, READ, iu,
|
||||
&vec, 1, sizeof(*rsp), iu->sglist, 1,
|
||||
&vec, sizeof(*rsp), iu->sglist, 1,
|
||||
msg_open_conf, &errno, wait);
|
||||
if (err) {
|
||||
rnbd_clt_put_dev(dev);
|
||||
@ -629,7 +624,7 @@ static int send_msg_sess_info(struct rnbd_clt_session *sess, bool wait)
|
||||
goto put_iu;
|
||||
}
|
||||
err = send_usr_msg(sess->rtrs, READ, iu,
|
||||
&vec, 1, sizeof(*rsp), iu->sglist, 1,
|
||||
&vec, sizeof(*rsp), iu->sglist, 1,
|
||||
msg_sess_info_conf, &errno, wait);
|
||||
if (err) {
|
||||
rnbd_clt_put_sess(sess);
|
||||
@ -1514,7 +1509,7 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname,
|
||||
"map_device: Failed to configure device, err: %d\n",
|
||||
ret);
|
||||
mutex_unlock(&dev->lock);
|
||||
goto del_dev;
|
||||
goto send_close;
|
||||
}
|
||||
|
||||
rnbd_clt_info(dev,
|
||||
@ -1533,6 +1528,8 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname,
|
||||
|
||||
return dev;
|
||||
|
||||
send_close:
|
||||
send_msg_close(dev, dev->device_id, WAIT);
|
||||
del_dev:
|
||||
delete_dev(dev);
|
||||
put_dev:
|
||||
|
@ -25,7 +25,6 @@
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <linux/completion.h>
|
||||
#include <linux/scatterlist.h>
|
||||
#include <linux/version.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/aer.h>
|
||||
#include <linux/wait.h>
|
||||
|
@ -1218,10 +1218,11 @@ out:
|
||||
static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
|
||||
struct bio *bio, bool partial_io)
|
||||
{
|
||||
int ret;
|
||||
struct zcomp_strm *zstrm;
|
||||
unsigned long handle;
|
||||
unsigned int size;
|
||||
void *src, *dst;
|
||||
int ret;
|
||||
|
||||
zram_slot_lock(zram, index);
|
||||
if (zram_test_flag(zram, index, ZRAM_WB)) {
|
||||
@ -1252,6 +1253,9 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
|
||||
|
||||
size = zram_get_obj_size(zram, index);
|
||||
|
||||
if (size != PAGE_SIZE)
|
||||
zstrm = zcomp_stream_get(zram->comp);
|
||||
|
||||
src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO);
|
||||
if (size == PAGE_SIZE) {
|
||||
dst = kmap_atomic(page);
|
||||
@ -1259,8 +1263,6 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
|
||||
kunmap_atomic(dst);
|
||||
ret = 0;
|
||||
} else {
|
||||
struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp);
|
||||
|
||||
dst = kmap_atomic(page);
|
||||
ret = zcomp_decompress(zstrm, src, size, dst);
|
||||
kunmap_atomic(dst);
|
||||
|
@ -1311,8 +1311,9 @@ static long nvm_ioctl_get_devices(struct file *file, void __user *arg)
|
||||
strlcpy(info->bmname, "gennvm", sizeof(info->bmname));
|
||||
i++;
|
||||
|
||||
if (i > 31) {
|
||||
pr_err("max 31 devices can be reported.\n");
|
||||
if (i >= ARRAY_SIZE(devices->info)) {
|
||||
pr_err("max %zd devices can be reported.\n",
|
||||
ARRAY_SIZE(devices->info));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -248,6 +248,10 @@ static blk_status_t nvme_error_status(u16 status)
|
||||
return BLK_STS_NEXUS;
|
||||
case NVME_SC_HOST_PATH_ERROR:
|
||||
return BLK_STS_TRANSPORT;
|
||||
case NVME_SC_ZONE_TOO_MANY_ACTIVE:
|
||||
return BLK_STS_ZONE_ACTIVE_RESOURCE;
|
||||
case NVME_SC_ZONE_TOO_MANY_OPEN:
|
||||
return BLK_STS_ZONE_OPEN_RESOURCE;
|
||||
default:
|
||||
return BLK_STS_IOERR;
|
||||
}
|
||||
|
@ -26,6 +26,10 @@ enum nvme_fc_queue_flags {
|
||||
};
|
||||
|
||||
#define NVME_FC_DEFAULT_DEV_LOSS_TMO 60 /* seconds */
|
||||
#define NVME_FC_DEFAULT_RECONNECT_TMO 2 /* delay between reconnects
|
||||
* when connected and a
|
||||
* connection failure.
|
||||
*/
|
||||
|
||||
struct nvme_fc_queue {
|
||||
struct nvme_fc_ctrl *ctrl;
|
||||
@ -1837,8 +1841,10 @@ __nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op)
|
||||
opstate = atomic_xchg(&op->state, FCPOP_STATE_ABORTED);
|
||||
if (opstate != FCPOP_STATE_ACTIVE)
|
||||
atomic_set(&op->state, opstate);
|
||||
else if (test_bit(FCCTRL_TERMIO, &ctrl->flags))
|
||||
else if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) {
|
||||
op->flags |= FCOP_FLAGS_TERMIO;
|
||||
ctrl->iocnt++;
|
||||
}
|
||||
spin_unlock_irqrestore(&ctrl->lock, flags);
|
||||
|
||||
if (opstate != FCPOP_STATE_ACTIVE)
|
||||
@ -1874,7 +1880,8 @@ __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl,
|
||||
|
||||
if (opstate == FCPOP_STATE_ABORTED) {
|
||||
spin_lock_irqsave(&ctrl->lock, flags);
|
||||
if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) {
|
||||
if (test_bit(FCCTRL_TERMIO, &ctrl->flags) &&
|
||||
op->flags & FCOP_FLAGS_TERMIO) {
|
||||
if (!--ctrl->iocnt)
|
||||
wake_up(&ctrl->ioabort_wait);
|
||||
}
|
||||
@ -2314,7 +2321,7 @@ nvme_fc_create_hw_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize)
|
||||
return 0;
|
||||
|
||||
delete_queues:
|
||||
for (; i >= 0; i--)
|
||||
for (; i > 0; i--)
|
||||
__nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[i], i);
|
||||
return ret;
|
||||
}
|
||||
@ -2433,7 +2440,7 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
|
||||
return;
|
||||
|
||||
dev_warn(ctrl->ctrl.device,
|
||||
"NVME-FC{%d}: transport association error detected: %s\n",
|
||||
"NVME-FC{%d}: transport association event: %s\n",
|
||||
ctrl->cnum, errmsg);
|
||||
dev_warn(ctrl->ctrl.device,
|
||||
"NVME-FC{%d}: resetting controller\n", ctrl->cnum);
|
||||
@ -2446,15 +2453,20 @@ nvme_fc_timeout(struct request *rq, bool reserved)
|
||||
{
|
||||
struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
|
||||
struct nvme_fc_ctrl *ctrl = op->ctrl;
|
||||
struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu;
|
||||
struct nvme_command *sqe = &cmdiu->sqe;
|
||||
|
||||
/*
|
||||
* we can't individually ABTS an io without affecting the queue,
|
||||
* thus killing the queue, and thus the association.
|
||||
* So resolve by performing a controller reset, which will stop
|
||||
* the host/io stack, terminate the association on the link,
|
||||
* and recreate an association on the link.
|
||||
* Attempt to abort the offending command. Command completion
|
||||
* will detect the aborted io and will fail the connection.
|
||||
*/
|
||||
nvme_fc_error_recovery(ctrl, "io timeout error");
|
||||
dev_info(ctrl->ctrl.device,
|
||||
"NVME-FC{%d.%d}: io timeout: opcode %d fctype %d w10/11: "
|
||||
"x%08x/x%08x\n",
|
||||
ctrl->cnum, op->queue->qnum, sqe->common.opcode,
|
||||
sqe->connect.fctype, sqe->common.cdw10, sqe->common.cdw11);
|
||||
if (__nvme_fc_abort_op(ctrl, op))
|
||||
nvme_fc_error_recovery(ctrl, "io timeout abort failed");
|
||||
|
||||
/*
|
||||
* the io abort has been initiated. Have the reset timer
|
||||
@ -2726,6 +2738,7 @@ nvme_fc_complete_rq(struct request *rq)
|
||||
struct nvme_fc_ctrl *ctrl = op->ctrl;
|
||||
|
||||
atomic_set(&op->state, FCPOP_STATE_IDLE);
|
||||
op->flags &= ~FCOP_FLAGS_TERMIO;
|
||||
|
||||
nvme_fc_unmap_data(ctrl, rq, op);
|
||||
nvme_complete_rq(rq);
|
||||
@ -2876,11 +2889,14 @@ nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl)
|
||||
if (ret)
|
||||
goto out_delete_hw_queues;
|
||||
|
||||
if (prior_ioq_cnt != nr_io_queues)
|
||||
if (prior_ioq_cnt != nr_io_queues) {
|
||||
dev_info(ctrl->ctrl.device,
|
||||
"reconnect: revising io queue count from %d to %d\n",
|
||||
prior_ioq_cnt, nr_io_queues);
|
||||
blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues);
|
||||
nvme_wait_freeze(&ctrl->ctrl);
|
||||
blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues);
|
||||
nvme_unfreeze(&ctrl->ctrl);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
@ -3090,6 +3106,61 @@ out_free_queue:
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* This routine runs through all outstanding commands on the association
|
||||
* and aborts them. This routine is typically be called by the
|
||||
* delete_association routine. It is also called due to an error during
|
||||
* reconnect. In that scenario, it is most likely a command that initializes
|
||||
* the controller, including fabric Connect commands on io queues, that
|
||||
* may have timed out or failed thus the io must be killed for the connect
|
||||
* thread to see the error.
|
||||
*/
|
||||
static void
|
||||
__nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues)
|
||||
{
|
||||
/*
|
||||
* If io queues are present, stop them and terminate all outstanding
|
||||
* ios on them. As FC allocates FC exchange for each io, the
|
||||
* transport must contact the LLDD to terminate the exchange,
|
||||
* thus releasing the FC exchange. We use blk_mq_tagset_busy_itr()
|
||||
* to tell us what io's are busy and invoke a transport routine
|
||||
* to kill them with the LLDD. After terminating the exchange
|
||||
* the LLDD will call the transport's normal io done path, but it
|
||||
* will have an aborted status. The done path will return the
|
||||
* io requests back to the block layer as part of normal completions
|
||||
* (but with error status).
|
||||
*/
|
||||
if (ctrl->ctrl.queue_count > 1) {
|
||||
nvme_stop_queues(&ctrl->ctrl);
|
||||
blk_mq_tagset_busy_iter(&ctrl->tag_set,
|
||||
nvme_fc_terminate_exchange, &ctrl->ctrl);
|
||||
blk_mq_tagset_wait_completed_request(&ctrl->tag_set);
|
||||
if (start_queues)
|
||||
nvme_start_queues(&ctrl->ctrl);
|
||||
}
|
||||
|
||||
/*
|
||||
* Other transports, which don't have link-level contexts bound
|
||||
* to sqe's, would try to gracefully shutdown the controller by
|
||||
* writing the registers for shutdown and polling (call
|
||||
* nvme_shutdown_ctrl()). Given a bunch of i/o was potentially
|
||||
* just aborted and we will wait on those contexts, and given
|
||||
* there was no indication of how live the controlelr is on the
|
||||
* link, don't send more io to create more contexts for the
|
||||
* shutdown. Let the controller fail via keepalive failure if
|
||||
* its still present.
|
||||
*/
|
||||
|
||||
/*
|
||||
* clean up the admin queue. Same thing as above.
|
||||
*/
|
||||
blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
|
||||
blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
|
||||
nvme_fc_terminate_exchange, &ctrl->ctrl);
|
||||
blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set);
|
||||
}
|
||||
|
||||
/*
|
||||
* This routine stops operation of the controller on the host side.
|
||||
* On the host os stack side: Admin and IO queues are stopped,
|
||||
@ -3110,46 +3181,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
|
||||
ctrl->iocnt = 0;
|
||||
spin_unlock_irqrestore(&ctrl->lock, flags);
|
||||
|
||||
/*
|
||||
* If io queues are present, stop them and terminate all outstanding
|
||||
* ios on them. As FC allocates FC exchange for each io, the
|
||||
* transport must contact the LLDD to terminate the exchange,
|
||||
* thus releasing the FC exchange. We use blk_mq_tagset_busy_itr()
|
||||
* to tell us what io's are busy and invoke a transport routine
|
||||
* to kill them with the LLDD. After terminating the exchange
|
||||
* the LLDD will call the transport's normal io done path, but it
|
||||
* will have an aborted status. The done path will return the
|
||||
* io requests back to the block layer as part of normal completions
|
||||
* (but with error status).
|
||||
*/
|
||||
if (ctrl->ctrl.queue_count > 1) {
|
||||
nvme_stop_queues(&ctrl->ctrl);
|
||||
blk_mq_tagset_busy_iter(&ctrl->tag_set,
|
||||
nvme_fc_terminate_exchange, &ctrl->ctrl);
|
||||
blk_mq_tagset_wait_completed_request(&ctrl->tag_set);
|
||||
}
|
||||
|
||||
/*
|
||||
* Other transports, which don't have link-level contexts bound
|
||||
* to sqe's, would try to gracefully shutdown the controller by
|
||||
* writing the registers for shutdown and polling (call
|
||||
* nvme_shutdown_ctrl()). Given a bunch of i/o was potentially
|
||||
* just aborted and we will wait on those contexts, and given
|
||||
* there was no indication of how live the controlelr is on the
|
||||
* link, don't send more io to create more contexts for the
|
||||
* shutdown. Let the controller fail via keepalive failure if
|
||||
* its still present.
|
||||
*/
|
||||
|
||||
/*
|
||||
* clean up the admin queue. Same thing as above.
|
||||
* use blk_mq_tagset_busy_itr() and the transport routine to
|
||||
* terminate the exchanges.
|
||||
*/
|
||||
blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
|
||||
blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
|
||||
nvme_fc_terminate_exchange, &ctrl->ctrl);
|
||||
blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set);
|
||||
__nvme_fc_abort_outstanding_ios(ctrl, false);
|
||||
|
||||
/* kill the aens as they are a separate path */
|
||||
nvme_fc_abort_aen_ops(ctrl);
|
||||
@ -3263,22 +3295,27 @@ static void
|
||||
__nvme_fc_terminate_io(struct nvme_fc_ctrl *ctrl)
|
||||
{
|
||||
/*
|
||||
* if state is connecting - the error occurred as part of a
|
||||
* reconnect attempt. The create_association error paths will
|
||||
* clean up any outstanding io.
|
||||
*
|
||||
* if it's a different state - ensure all pending io is
|
||||
* terminated. Given this can delay while waiting for the
|
||||
* aborted io to return, we recheck adapter state below
|
||||
* before changing state.
|
||||
* if state is CONNECTING - the error occurred as part of a
|
||||
* reconnect attempt. Abort any ios on the association and
|
||||
* let the create_association error paths resolve things.
|
||||
*/
|
||||
if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) {
|
||||
nvme_stop_keep_alive(&ctrl->ctrl);
|
||||
|
||||
/* will block will waiting for io to terminate */
|
||||
nvme_fc_delete_association(ctrl);
|
||||
if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) {
|
||||
__nvme_fc_abort_outstanding_ios(ctrl, true);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* For any other state, kill the association. As this routine
|
||||
* is a common io abort routine for resetting and such, after
|
||||
* the association is terminated, ensure that the state is set
|
||||
* to CONNECTING.
|
||||
*/
|
||||
|
||||
nvme_stop_keep_alive(&ctrl->ctrl);
|
||||
|
||||
/* will block will waiting for io to terminate */
|
||||
nvme_fc_delete_association(ctrl);
|
||||
|
||||
if (ctrl->ctrl.state != NVME_CTRL_CONNECTING &&
|
||||
!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING))
|
||||
dev_err(ctrl->ctrl.device,
|
||||
@ -3403,7 +3440,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
|
||||
{
|
||||
struct nvme_fc_ctrl *ctrl;
|
||||
unsigned long flags;
|
||||
int ret, idx;
|
||||
int ret, idx, ctrl_loss_tmo;
|
||||
|
||||
if (!(rport->remoteport.port_role &
|
||||
(FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) {
|
||||
@ -3429,6 +3466,19 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
|
||||
goto out_free_ctrl;
|
||||
}
|
||||
|
||||
/*
|
||||
* if ctrl_loss_tmo is being enforced and the default reconnect delay
|
||||
* is being used, change to a shorter reconnect delay for FC.
|
||||
*/
|
||||
if (opts->max_reconnects != -1 &&
|
||||
opts->reconnect_delay == NVMF_DEF_RECONNECT_DELAY &&
|
||||
opts->reconnect_delay > NVME_FC_DEFAULT_RECONNECT_TMO) {
|
||||
ctrl_loss_tmo = opts->max_reconnects * opts->reconnect_delay;
|
||||
opts->reconnect_delay = NVME_FC_DEFAULT_RECONNECT_TMO;
|
||||
opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo,
|
||||
opts->reconnect_delay);
|
||||
}
|
||||
|
||||
ctrl->ctrl.opts = opts;
|
||||
ctrl->ctrl.nr_reconnects = 0;
|
||||
if (lport->dev)
|
||||
|
@ -176,7 +176,7 @@ static inline struct nvme_request *nvme_req(struct request *req)
|
||||
|
||||
static inline u16 nvme_req_qid(struct request *req)
|
||||
{
|
||||
if (!req->rq_disk)
|
||||
if (!req->q->queuedata)
|
||||
return 0;
|
||||
return blk_mq_unique_tag_to_hwq(blk_mq_unique_tag(req)) + 1;
|
||||
}
|
||||
|
@ -3185,6 +3185,8 @@ static const struct pci_device_id nvme_id_table[] = {
|
||||
NVME_QUIRK_IGNORE_DEV_SUBNQN, },
|
||||
{ PCI_DEVICE(0x1c5c, 0x1504), /* SK Hynix PC400 */
|
||||
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
|
||||
{ PCI_DEVICE(0x15b7, 0x2001), /* Sandisk Skyhawk */
|
||||
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
|
||||
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001),
|
||||
.driver_data = NVME_QUIRK_SINGLE_VECTOR },
|
||||
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },
|
||||
|
@ -1730,10 +1730,11 @@ static void nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
|
||||
req->result = cqe->result;
|
||||
|
||||
if (wc->wc_flags & IB_WC_WITH_INVALIDATE) {
|
||||
if (unlikely(wc->ex.invalidate_rkey != req->mr->rkey)) {
|
||||
if (unlikely(!req->mr ||
|
||||
wc->ex.invalidate_rkey != req->mr->rkey)) {
|
||||
dev_err(queue->ctrl->ctrl.device,
|
||||
"Bogus remote invalidation for rkey %#x\n",
|
||||
req->mr->rkey);
|
||||
req->mr ? req->mr->rkey : 0);
|
||||
nvme_rdma_error_recovery(queue->ctrl);
|
||||
}
|
||||
} else if (req->mr) {
|
||||
@ -1926,7 +1927,6 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
|
||||
complete(&queue->cm_done);
|
||||
return 0;
|
||||
case RDMA_CM_EVENT_REJECTED:
|
||||
nvme_rdma_destroy_queue_ib(queue);
|
||||
cm_error = nvme_rdma_conn_rejected(queue, ev);
|
||||
break;
|
||||
case RDMA_CM_EVENT_ROUTE_ERROR:
|
||||
|
@ -1126,7 +1126,8 @@ static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
|
||||
* in case a host died before it enabled the controller. Hence, simply
|
||||
* reset the keep alive timer when the controller is enabled.
|
||||
*/
|
||||
mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ);
|
||||
if (ctrl->kato)
|
||||
mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ);
|
||||
}
|
||||
|
||||
static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl)
|
||||
|
@ -26,7 +26,7 @@ static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req)
|
||||
struct nvme_ctrl *pctrl = ctrl->subsys->passthru_ctrl;
|
||||
u16 status = NVME_SC_SUCCESS;
|
||||
struct nvme_id_ctrl *id;
|
||||
u32 max_hw_sectors;
|
||||
int max_hw_sectors;
|
||||
int page_shift;
|
||||
|
||||
id = kzalloc(sizeof(*id), GFP_KERNEL);
|
||||
@ -48,6 +48,13 @@ static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req)
|
||||
max_hw_sectors = min_not_zero(pctrl->max_segments << (PAGE_SHIFT - 9),
|
||||
pctrl->max_hw_sectors);
|
||||
|
||||
/*
|
||||
* nvmet_passthru_map_sg is limitted to using a single bio so limit
|
||||
* the mdts based on BIO_MAX_PAGES as well
|
||||
*/
|
||||
max_hw_sectors = min_not_zero(BIO_MAX_PAGES << (PAGE_SHIFT - 9),
|
||||
max_hw_sectors);
|
||||
|
||||
page_shift = NVME_CAP_MPSMIN(ctrl->cap) + 12;
|
||||
|
||||
id->mdts = ilog2(max_hw_sectors) + 9 - page_shift;
|
||||
@ -180,18 +187,20 @@ static void nvmet_passthru_req_done(struct request *rq,
|
||||
|
||||
static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq)
|
||||
{
|
||||
int sg_cnt = req->sg_cnt;
|
||||
struct scatterlist *sg;
|
||||
int op_flags = 0;
|
||||
struct bio *bio;
|
||||
int i, ret;
|
||||
|
||||
if (req->sg_cnt > BIO_MAX_PAGES)
|
||||
return -EINVAL;
|
||||
|
||||
if (req->cmd->common.opcode == nvme_cmd_flush)
|
||||
op_flags = REQ_FUA;
|
||||
else if (nvme_is_write(req->cmd))
|
||||
op_flags = REQ_SYNC | REQ_IDLE;
|
||||
|
||||
bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES));
|
||||
bio = bio_alloc(GFP_KERNEL, req->sg_cnt);
|
||||
bio->bi_end_io = bio_put;
|
||||
bio->bi_opf = req_op(rq) | op_flags;
|
||||
|
||||
@ -201,7 +210,6 @@ static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq)
|
||||
bio_put(bio);
|
||||
return -EINVAL;
|
||||
}
|
||||
sg_cnt--;
|
||||
}
|
||||
|
||||
ret = blk_rq_append_bio(rq, &bio);
|
||||
@ -236,7 +244,7 @@ static void nvmet_passthru_execute_cmd(struct nvmet_req *req)
|
||||
q = ns->queue;
|
||||
}
|
||||
|
||||
rq = nvme_alloc_request(q, req->cmd, BLK_MQ_REQ_NOWAIT, NVME_QID_ANY);
|
||||
rq = nvme_alloc_request(q, req->cmd, 0, NVME_QID_ANY);
|
||||
if (IS_ERR(rq)) {
|
||||
status = NVME_SC_INTERNAL;
|
||||
goto out_put_ns;
|
||||
|
@ -777,6 +777,15 @@ static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result)
|
||||
/* See SSC3rXX or current. */
|
||||
action = ACTION_FAIL;
|
||||
break;
|
||||
case DATA_PROTECT:
|
||||
action = ACTION_FAIL;
|
||||
if ((sshdr.asc == 0x0C && sshdr.ascq == 0x12) ||
|
||||
(sshdr.asc == 0x55 &&
|
||||
(sshdr.ascq == 0x0E || sshdr.ascq == 0x0F))) {
|
||||
/* Insufficient zone resources */
|
||||
blk_stat = BLK_STS_ZONE_OPEN_RESOURCE;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
action = ACTION_FAIL;
|
||||
break;
|
||||
|
@ -104,6 +104,24 @@ typedef u8 __bitwise blk_status_t;
|
||||
*/
|
||||
#define BLK_STS_ZONE_RESOURCE ((__force blk_status_t)14)
|
||||
|
||||
/*
|
||||
* BLK_STS_ZONE_OPEN_RESOURCE is returned from the driver in the completion
|
||||
* path if the device returns a status indicating that too many zone resources
|
||||
* are currently open. The same command should be successful if resubmitted
|
||||
* after the number of open zones decreases below the device's limits, which is
|
||||
* reported in the request_queue's max_open_zones.
|
||||
*/
|
||||
#define BLK_STS_ZONE_OPEN_RESOURCE ((__force blk_status_t)15)
|
||||
|
||||
/*
|
||||
* BLK_STS_ZONE_ACTIVE_RESOURCE is returned from the driver in the completion
|
||||
* path if the device returns a status indicating that too many zone resources
|
||||
* are currently active. The same command should be successful if resubmitted
|
||||
* after the number of active zones decreases below the device's limits, which
|
||||
* is reported in the request_queue's max_active_zones.
|
||||
*/
|
||||
#define BLK_STS_ZONE_ACTIVE_RESOURCE ((__force blk_status_t)16)
|
||||
|
||||
/**
|
||||
* blk_path_error - returns true if error may be path related
|
||||
* @error: status the request was completed with
|
||||
|
@ -106,12 +106,6 @@ struct io_context {
|
||||
|
||||
unsigned short ioprio;
|
||||
|
||||
/*
|
||||
* For request batching
|
||||
*/
|
||||
int nr_batch_requests; /* Number of requests left in the batch */
|
||||
unsigned long last_waited; /* Time last woken after wait for request */
|
||||
|
||||
struct radix_tree_root icq_tree;
|
||||
struct io_cq __rcu *icq_hint;
|
||||
struct hlist_head icq_list;
|
||||
|
@ -595,7 +595,7 @@ struct scatterlist *sgl_alloc_order(unsigned long long length,
|
||||
elem_len = min_t(u64, length, PAGE_SIZE << order);
|
||||
page = alloc_pages(gfp, order);
|
||||
if (!page) {
|
||||
sgl_free(sgl);
|
||||
sgl_free_order(sgl, order);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user