mirror of
https://github.com/torvalds/linux.git
synced 2024-11-10 14:11:52 +00:00
for-linus-2019-10-18
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAl2qbF0QHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgptsuEADEKL8pta74uy50pl0t8l9fZ++U+wdIeEIW 9uumpOEPnI2GpkG1sOyKWK6tl8InQLw6pAquP9MoT2BHXqFHk7NIgtvk67lwQeoc dRwklVfvOLAdnKzyfODqE9Fh9BgczZIuOLzgdtNqrPKqgJfFRCwN94Kj/r2tYuy7 v+riK3A49u12dOLtjU6ciNgZ0m1iUX9s0+PFYVUXtJHU/1OYToQaKP+sgWiue0Ca VJP/L4MLYD0a7tfd92WAK7xWLsYWTDw1Gg20hXH/tV+IIDQ5+OXhu2s6PuqI7c0y cZqWHQHBDkZMQvT8+V+YqZtEa+xwVCom51prJEPasmdq3fGx+2sDC1HQiySao1ML wfFxZvFvY9fm6M7p2xsSNEcOmamrx1aLLyNSbjIvAqLUDYJWWS56BHsKyTU5Z+Jp RA9dpq8iR6ISaIAcFf0IB0pJSv1HEeHyo/ixlALqezBFJaMdhWy/M+dEbWKtix9M s19ozcpe+omN9+O0anlLtzKNgj2Xnjiwuu8mhVcqn6uG/p6GUOup+lNvTW/fig3I JBH8kObjYXL181V9rYVqFutnuqcf2HYqMvV2vzAmg4LYnPVUmU7HMj8zEpxc4N+f Evd77j0wXmY9S+4JERxaqQZuvKBEIkvM1rkk3N4NbNghfa7QL4aW+I9cWtuelPC2 E+DK7if0Gg== =rvkw -----END PGP SIGNATURE----- Merge tag 'for-linus-2019-10-18' of git://git.kernel.dk/linux-block Pull block fixes from Jens Axboe: - NVMe pull request from Keith that address deadlocks, double resets, memory leaks, and other regression. - Fixup elv_support_iosched() for bio based devices (Damien) - Fixup for the ahci PCS quirk (Dan) - Socket O_NONBLOCK handling fix for io_uring (me) - Timeout sequence io_uring fixes (yangerkun) - MD warning fix for parameter default_layout (Song) - blkcg activation fixes (Tejun) - blk-rq-qos node deletion fix (Tejun) * tag 'for-linus-2019-10-18' of git://git.kernel.dk/linux-block: nvme-pci: Set the prp2 correctly when using more than 4k page io_uring: fix logic error in io_timeout io_uring: fix up O_NONBLOCK handling for sockets md/raid0: fix warning message for parameter default_layout libata/ahci: Fix PCS quirk application blk-rq-qos: fix first node deletion of rq_qos_del() blkcg: Fix multiple bugs in blkcg_activate_policy() io_uring: consider the overflow of sequence for timeout req nvme-tcp: fix possible leakage during error flow nvmet-loop: fix possible leakage during error flow block: Fix elv_support_iosched() nvme-tcp: Initialize sk->sk_ll_usec only with NET_RX_BUSY_POLL nvme: Wait for reset state when required nvme: Prevent resets during paused controller state nvme: Restart request timers in resetting state nvme: Remove ADMIN_ONLY state nvme-pci: Free tagset if no IO queues nvme: retain split access workaround for capability reads nvme: fix possible deadlock when nvme_update_formats fails
This commit is contained in:
commit
d418d07005
@ -1362,7 +1362,7 @@ int blkcg_activate_policy(struct request_queue *q,
|
||||
const struct blkcg_policy *pol)
|
||||
{
|
||||
struct blkg_policy_data *pd_prealloc = NULL;
|
||||
struct blkcg_gq *blkg;
|
||||
struct blkcg_gq *blkg, *pinned_blkg = NULL;
|
||||
int ret;
|
||||
|
||||
if (blkcg_policy_enabled(q, pol))
|
||||
@ -1370,49 +1370,82 @@ int blkcg_activate_policy(struct request_queue *q,
|
||||
|
||||
if (queue_is_mq(q))
|
||||
blk_mq_freeze_queue(q);
|
||||
pd_prealloc:
|
||||
if (!pd_prealloc) {
|
||||
pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q, &blkcg_root);
|
||||
if (!pd_prealloc) {
|
||||
ret = -ENOMEM;
|
||||
goto out_bypass_end;
|
||||
}
|
||||
}
|
||||
|
||||
retry:
|
||||
spin_lock_irq(&q->queue_lock);
|
||||
|
||||
/* blkg_list is pushed at the head, reverse walk to init parents first */
|
||||
/* blkg_list is pushed at the head, reverse walk to allocate parents first */
|
||||
list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) {
|
||||
struct blkg_policy_data *pd;
|
||||
|
||||
if (blkg->pd[pol->plid])
|
||||
continue;
|
||||
|
||||
pd = pol->pd_alloc_fn(GFP_NOWAIT | __GFP_NOWARN, q, &blkcg_root);
|
||||
if (!pd)
|
||||
swap(pd, pd_prealloc);
|
||||
/* If prealloc matches, use it; otherwise try GFP_NOWAIT */
|
||||
if (blkg == pinned_blkg) {
|
||||
pd = pd_prealloc;
|
||||
pd_prealloc = NULL;
|
||||
} else {
|
||||
pd = pol->pd_alloc_fn(GFP_NOWAIT | __GFP_NOWARN, q,
|
||||
blkg->blkcg);
|
||||
}
|
||||
|
||||
if (!pd) {
|
||||
/*
|
||||
* GFP_NOWAIT failed. Free the existing one and
|
||||
* prealloc for @blkg w/ GFP_KERNEL.
|
||||
*/
|
||||
if (pinned_blkg)
|
||||
blkg_put(pinned_blkg);
|
||||
blkg_get(blkg);
|
||||
pinned_blkg = blkg;
|
||||
|
||||
spin_unlock_irq(&q->queue_lock);
|
||||
goto pd_prealloc;
|
||||
|
||||
if (pd_prealloc)
|
||||
pol->pd_free_fn(pd_prealloc);
|
||||
pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q,
|
||||
blkg->blkcg);
|
||||
if (pd_prealloc)
|
||||
goto retry;
|
||||
else
|
||||
goto enomem;
|
||||
}
|
||||
|
||||
blkg->pd[pol->plid] = pd;
|
||||
pd->blkg = blkg;
|
||||
pd->plid = pol->plid;
|
||||
if (pol->pd_init_fn)
|
||||
pol->pd_init_fn(pd);
|
||||
}
|
||||
|
||||
/* all allocated, init in the same order */
|
||||
if (pol->pd_init_fn)
|
||||
list_for_each_entry_reverse(blkg, &q->blkg_list, q_node)
|
||||
pol->pd_init_fn(blkg->pd[pol->plid]);
|
||||
|
||||
__set_bit(pol->plid, q->blkcg_pols);
|
||||
ret = 0;
|
||||
|
||||
spin_unlock_irq(&q->queue_lock);
|
||||
out_bypass_end:
|
||||
out:
|
||||
if (queue_is_mq(q))
|
||||
blk_mq_unfreeze_queue(q);
|
||||
if (pinned_blkg)
|
||||
blkg_put(pinned_blkg);
|
||||
if (pd_prealloc)
|
||||
pol->pd_free_fn(pd_prealloc);
|
||||
return ret;
|
||||
|
||||
enomem:
|
||||
/* alloc failed, nothing's initialized yet, free everything */
|
||||
spin_lock_irq(&q->queue_lock);
|
||||
list_for_each_entry(blkg, &q->blkg_list, q_node) {
|
||||
if (blkg->pd[pol->plid]) {
|
||||
pol->pd_free_fn(blkg->pd[pol->plid]);
|
||||
blkg->pd[pol->plid] = NULL;
|
||||
}
|
||||
}
|
||||
spin_unlock_irq(&q->queue_lock);
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkcg_activate_policy);
|
||||
|
||||
|
@ -108,16 +108,13 @@ static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
|
||||
|
||||
static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
|
||||
{
|
||||
struct rq_qos *cur, *prev = NULL;
|
||||
for (cur = q->rq_qos; cur; cur = cur->next) {
|
||||
if (cur == rqos) {
|
||||
if (prev)
|
||||
prev->next = rqos->next;
|
||||
else
|
||||
q->rq_qos = cur;
|
||||
struct rq_qos **cur;
|
||||
|
||||
for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) {
|
||||
if (*cur == rqos) {
|
||||
*cur = rqos->next;
|
||||
break;
|
||||
}
|
||||
prev = cur;
|
||||
}
|
||||
|
||||
blk_mq_debugfs_unregister_rqos(rqos);
|
||||
|
@ -616,7 +616,8 @@ out:
|
||||
|
||||
static inline bool elv_support_iosched(struct request_queue *q)
|
||||
{
|
||||
if (q->tag_set && (q->tag_set->flags & BLK_MQ_F_NO_SCHED))
|
||||
if (!q->mq_ops ||
|
||||
(q->tag_set && (q->tag_set->flags & BLK_MQ_F_NO_SCHED)))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
@ -1600,7 +1600,9 @@ static void ahci_intel_pcs_quirk(struct pci_dev *pdev, struct ahci_host_priv *hp
|
||||
*/
|
||||
if (!id || id->vendor != PCI_VENDOR_ID_INTEL)
|
||||
return;
|
||||
if (((enum board_ids) id->driver_data) < board_ahci_pcs7)
|
||||
|
||||
/* Skip applying the quirk on Denverton and beyond */
|
||||
if (((enum board_ids) id->driver_data) >= board_ahci_pcs7)
|
||||
return;
|
||||
|
||||
/*
|
||||
|
@ -154,7 +154,7 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
|
||||
} else {
|
||||
pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n",
|
||||
mdname(mddev));
|
||||
pr_err("md/raid0: please set raid.default_layout to 1 or 2\n");
|
||||
pr_err("md/raid0: please set raid0.default_layout to 1 or 2\n");
|
||||
err = -ENOTSUPP;
|
||||
goto abort;
|
||||
}
|
||||
|
@ -116,10 +116,26 @@ static void nvme_queue_scan(struct nvme_ctrl *ctrl)
|
||||
/*
|
||||
* Only new queue scan work when admin and IO queues are both alive
|
||||
*/
|
||||
if (ctrl->state == NVME_CTRL_LIVE)
|
||||
if (ctrl->state == NVME_CTRL_LIVE && ctrl->tagset)
|
||||
queue_work(nvme_wq, &ctrl->scan_work);
|
||||
}
|
||||
|
||||
/*
|
||||
* Use this function to proceed with scheduling reset_work for a controller
|
||||
* that had previously been set to the resetting state. This is intended for
|
||||
* code paths that can't be interrupted by other reset attempts. A hot removal
|
||||
* may prevent this from succeeding.
|
||||
*/
|
||||
int nvme_try_sched_reset(struct nvme_ctrl *ctrl)
|
||||
{
|
||||
if (ctrl->state != NVME_CTRL_RESETTING)
|
||||
return -EBUSY;
|
||||
if (!queue_work(nvme_reset_wq, &ctrl->reset_work))
|
||||
return -EBUSY;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_try_sched_reset);
|
||||
|
||||
int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
|
||||
{
|
||||
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
|
||||
@ -137,8 +153,7 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
|
||||
ret = nvme_reset_ctrl(ctrl);
|
||||
if (!ret) {
|
||||
flush_work(&ctrl->reset_work);
|
||||
if (ctrl->state != NVME_CTRL_LIVE &&
|
||||
ctrl->state != NVME_CTRL_ADMIN_ONLY)
|
||||
if (ctrl->state != NVME_CTRL_LIVE)
|
||||
ret = -ENETRESET;
|
||||
}
|
||||
|
||||
@ -315,15 +330,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
|
||||
|
||||
old_state = ctrl->state;
|
||||
switch (new_state) {
|
||||
case NVME_CTRL_ADMIN_ONLY:
|
||||
switch (old_state) {
|
||||
case NVME_CTRL_CONNECTING:
|
||||
changed = true;
|
||||
/* FALLTHRU */
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case NVME_CTRL_LIVE:
|
||||
switch (old_state) {
|
||||
case NVME_CTRL_NEW:
|
||||
@ -339,7 +345,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
|
||||
switch (old_state) {
|
||||
case NVME_CTRL_NEW:
|
||||
case NVME_CTRL_LIVE:
|
||||
case NVME_CTRL_ADMIN_ONLY:
|
||||
changed = true;
|
||||
/* FALLTHRU */
|
||||
default:
|
||||
@ -359,7 +364,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
|
||||
case NVME_CTRL_DELETING:
|
||||
switch (old_state) {
|
||||
case NVME_CTRL_LIVE:
|
||||
case NVME_CTRL_ADMIN_ONLY:
|
||||
case NVME_CTRL_RESETTING:
|
||||
case NVME_CTRL_CONNECTING:
|
||||
changed = true;
|
||||
@ -381,8 +385,10 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
|
||||
break;
|
||||
}
|
||||
|
||||
if (changed)
|
||||
if (changed) {
|
||||
ctrl->state = new_state;
|
||||
wake_up_all(&ctrl->state_wq);
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&ctrl->lock, flags);
|
||||
if (changed && ctrl->state == NVME_CTRL_LIVE)
|
||||
@ -391,6 +397,39 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_change_ctrl_state);
|
||||
|
||||
/*
|
||||
* Returns true for sink states that can't ever transition back to live.
|
||||
*/
|
||||
static bool nvme_state_terminal(struct nvme_ctrl *ctrl)
|
||||
{
|
||||
switch (ctrl->state) {
|
||||
case NVME_CTRL_NEW:
|
||||
case NVME_CTRL_LIVE:
|
||||
case NVME_CTRL_RESETTING:
|
||||
case NVME_CTRL_CONNECTING:
|
||||
return false;
|
||||
case NVME_CTRL_DELETING:
|
||||
case NVME_CTRL_DEAD:
|
||||
return true;
|
||||
default:
|
||||
WARN_ONCE(1, "Unhandled ctrl state:%d", ctrl->state);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Waits for the controller state to be resetting, or returns false if it is
|
||||
* not possible to ever transition to that state.
|
||||
*/
|
||||
bool nvme_wait_reset(struct nvme_ctrl *ctrl)
|
||||
{
|
||||
wait_event(ctrl->state_wq,
|
||||
nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING) ||
|
||||
nvme_state_terminal(ctrl));
|
||||
return ctrl->state == NVME_CTRL_RESETTING;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_wait_reset);
|
||||
|
||||
static void nvme_free_ns_head(struct kref *ref)
|
||||
{
|
||||
struct nvme_ns_head *head =
|
||||
@ -1306,8 +1345,6 @@ static void nvme_update_formats(struct nvme_ctrl *ctrl)
|
||||
if (ns->disk && nvme_revalidate_disk(ns->disk))
|
||||
nvme_set_queue_dying(ns);
|
||||
up_read(&ctrl->namespaces_rwsem);
|
||||
|
||||
nvme_remove_invalid_namespaces(ctrl, NVME_NSID_ALL);
|
||||
}
|
||||
|
||||
static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
|
||||
@ -1323,6 +1360,7 @@ static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
|
||||
nvme_unfreeze(ctrl);
|
||||
nvme_mpath_unfreeze(ctrl->subsys);
|
||||
mutex_unlock(&ctrl->subsys->lock);
|
||||
nvme_remove_invalid_namespaces(ctrl, NVME_NSID_ALL);
|
||||
mutex_unlock(&ctrl->scan_lock);
|
||||
}
|
||||
if (effects & NVME_CMD_EFFECTS_CCC)
|
||||
@ -2874,7 +2912,6 @@ static int nvme_dev_open(struct inode *inode, struct file *file)
|
||||
|
||||
switch (ctrl->state) {
|
||||
case NVME_CTRL_LIVE:
|
||||
case NVME_CTRL_ADMIN_ONLY:
|
||||
break;
|
||||
default:
|
||||
return -EWOULDBLOCK;
|
||||
@ -3168,7 +3205,6 @@ static ssize_t nvme_sysfs_show_state(struct device *dev,
|
||||
static const char *const state_name[] = {
|
||||
[NVME_CTRL_NEW] = "new",
|
||||
[NVME_CTRL_LIVE] = "live",
|
||||
[NVME_CTRL_ADMIN_ONLY] = "only-admin",
|
||||
[NVME_CTRL_RESETTING] = "resetting",
|
||||
[NVME_CTRL_CONNECTING] = "connecting",
|
||||
[NVME_CTRL_DELETING] = "deleting",
|
||||
@ -3679,11 +3715,10 @@ static void nvme_scan_work(struct work_struct *work)
|
||||
struct nvme_id_ctrl *id;
|
||||
unsigned nn;
|
||||
|
||||
if (ctrl->state != NVME_CTRL_LIVE)
|
||||
/* No tagset on a live ctrl means IO queues could not created */
|
||||
if (ctrl->state != NVME_CTRL_LIVE || !ctrl->tagset)
|
||||
return;
|
||||
|
||||
WARN_ON_ONCE(!ctrl->tagset);
|
||||
|
||||
if (test_and_clear_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events)) {
|
||||
dev_info(ctrl->device, "rescanning namespaces.\n");
|
||||
nvme_clear_changed_ns_log(ctrl);
|
||||
@ -3844,13 +3879,13 @@ static void nvme_fw_act_work(struct work_struct *work)
|
||||
if (time_after(jiffies, fw_act_timeout)) {
|
||||
dev_warn(ctrl->device,
|
||||
"Fw activation timeout, reset controller\n");
|
||||
nvme_reset_ctrl(ctrl);
|
||||
break;
|
||||
nvme_try_sched_reset(ctrl);
|
||||
return;
|
||||
}
|
||||
msleep(100);
|
||||
}
|
||||
|
||||
if (ctrl->state != NVME_CTRL_LIVE)
|
||||
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE))
|
||||
return;
|
||||
|
||||
nvme_start_queues(ctrl);
|
||||
@ -3870,7 +3905,13 @@ static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
|
||||
nvme_queue_scan(ctrl);
|
||||
break;
|
||||
case NVME_AER_NOTICE_FW_ACT_STARTING:
|
||||
queue_work(nvme_wq, &ctrl->fw_act_work);
|
||||
/*
|
||||
* We are (ab)using the RESETTING state to prevent subsequent
|
||||
* recovery actions from interfering with the controller's
|
||||
* firmware activation.
|
||||
*/
|
||||
if (nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
|
||||
queue_work(nvme_wq, &ctrl->fw_act_work);
|
||||
break;
|
||||
#ifdef CONFIG_NVME_MULTIPATH
|
||||
case NVME_AER_NOTICE_ANA:
|
||||
@ -3993,6 +4034,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
|
||||
INIT_WORK(&ctrl->async_event_work, nvme_async_event_work);
|
||||
INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work);
|
||||
INIT_WORK(&ctrl->delete_work, nvme_delete_ctrl_work);
|
||||
init_waitqueue_head(&ctrl->state_wq);
|
||||
|
||||
INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work);
|
||||
memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd));
|
||||
|
@ -182,8 +182,7 @@ bool nvmf_ip_options_match(struct nvme_ctrl *ctrl,
|
||||
static inline bool nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
|
||||
bool queue_live)
|
||||
{
|
||||
if (likely(ctrl->state == NVME_CTRL_LIVE ||
|
||||
ctrl->state == NVME_CTRL_ADMIN_ONLY))
|
||||
if (likely(ctrl->state == NVME_CTRL_LIVE))
|
||||
return true;
|
||||
return __nvmf_check_ready(ctrl, rq, queue_live);
|
||||
}
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include <linux/sed-opal.h>
|
||||
#include <linux/fault-inject.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/wait.h>
|
||||
|
||||
#include <trace/events/block.h>
|
||||
|
||||
@ -161,7 +162,6 @@ static inline u16 nvme_req_qid(struct request *req)
|
||||
enum nvme_ctrl_state {
|
||||
NVME_CTRL_NEW,
|
||||
NVME_CTRL_LIVE,
|
||||
NVME_CTRL_ADMIN_ONLY, /* Only admin queue live */
|
||||
NVME_CTRL_RESETTING,
|
||||
NVME_CTRL_CONNECTING,
|
||||
NVME_CTRL_DELETING,
|
||||
@ -199,6 +199,7 @@ struct nvme_ctrl {
|
||||
struct cdev cdev;
|
||||
struct work_struct reset_work;
|
||||
struct work_struct delete_work;
|
||||
wait_queue_head_t state_wq;
|
||||
|
||||
struct nvme_subsystem *subsys;
|
||||
struct list_head subsys_entry;
|
||||
@ -449,6 +450,7 @@ void nvme_complete_rq(struct request *req);
|
||||
bool nvme_cancel_request(struct request *req, void *data, bool reserved);
|
||||
bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
|
||||
enum nvme_ctrl_state new_state);
|
||||
bool nvme_wait_reset(struct nvme_ctrl *ctrl);
|
||||
int nvme_disable_ctrl(struct nvme_ctrl *ctrl);
|
||||
int nvme_enable_ctrl(struct nvme_ctrl *ctrl);
|
||||
int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl);
|
||||
@ -499,6 +501,7 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
|
||||
void nvme_stop_keep_alive(struct nvme_ctrl *ctrl);
|
||||
int nvme_reset_ctrl(struct nvme_ctrl *ctrl);
|
||||
int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl);
|
||||
int nvme_try_sched_reset(struct nvme_ctrl *ctrl);
|
||||
int nvme_delete_ctrl(struct nvme_ctrl *ctrl);
|
||||
|
||||
int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp,
|
||||
|
@ -773,7 +773,8 @@ static blk_status_t nvme_setup_prp_simple(struct nvme_dev *dev,
|
||||
struct bio_vec *bv)
|
||||
{
|
||||
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
|
||||
unsigned int first_prp_len = dev->ctrl.page_size - bv->bv_offset;
|
||||
unsigned int offset = bv->bv_offset & (dev->ctrl.page_size - 1);
|
||||
unsigned int first_prp_len = dev->ctrl.page_size - offset;
|
||||
|
||||
iod->first_dma = dma_map_bvec(dev->dev, bv, rq_dma_dir(req), 0);
|
||||
if (dma_mapping_error(dev->dev, iod->first_dma))
|
||||
@ -2263,10 +2264,7 @@ static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode)
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* return error value only when tagset allocation failed
|
||||
*/
|
||||
static int nvme_dev_add(struct nvme_dev *dev)
|
||||
static void nvme_dev_add(struct nvme_dev *dev)
|
||||
{
|
||||
int ret;
|
||||
|
||||
@ -2296,7 +2294,7 @@ static int nvme_dev_add(struct nvme_dev *dev)
|
||||
if (ret) {
|
||||
dev_warn(dev->ctrl.device,
|
||||
"IO queues tagset allocation failed %d\n", ret);
|
||||
return ret;
|
||||
return;
|
||||
}
|
||||
dev->ctrl.tagset = &dev->tagset;
|
||||
} else {
|
||||
@ -2307,7 +2305,6 @@ static int nvme_dev_add(struct nvme_dev *dev)
|
||||
}
|
||||
|
||||
nvme_dbbuf_set(dev);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nvme_pci_enable(struct nvme_dev *dev)
|
||||
@ -2467,6 +2464,14 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
|
||||
mutex_unlock(&dev->shutdown_lock);
|
||||
}
|
||||
|
||||
static int nvme_disable_prepare_reset(struct nvme_dev *dev, bool shutdown)
|
||||
{
|
||||
if (!nvme_wait_reset(&dev->ctrl))
|
||||
return -EBUSY;
|
||||
nvme_dev_disable(dev, shutdown);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nvme_setup_prp_pools(struct nvme_dev *dev)
|
||||
{
|
||||
dev->prp_page_pool = dma_pool_create("prp list page", dev->dev,
|
||||
@ -2490,14 +2495,20 @@ static void nvme_release_prp_pools(struct nvme_dev *dev)
|
||||
dma_pool_destroy(dev->prp_small_pool);
|
||||
}
|
||||
|
||||
static void nvme_free_tagset(struct nvme_dev *dev)
|
||||
{
|
||||
if (dev->tagset.tags)
|
||||
blk_mq_free_tag_set(&dev->tagset);
|
||||
dev->ctrl.tagset = NULL;
|
||||
}
|
||||
|
||||
static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
|
||||
{
|
||||
struct nvme_dev *dev = to_nvme_dev(ctrl);
|
||||
|
||||
nvme_dbbuf_dma_free(dev);
|
||||
put_device(dev->dev);
|
||||
if (dev->tagset.tags)
|
||||
blk_mq_free_tag_set(&dev->tagset);
|
||||
nvme_free_tagset(dev);
|
||||
if (dev->ctrl.admin_q)
|
||||
blk_put_queue(dev->ctrl.admin_q);
|
||||
kfree(dev->queues);
|
||||
@ -2508,6 +2519,11 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
|
||||
|
||||
static void nvme_remove_dead_ctrl(struct nvme_dev *dev)
|
||||
{
|
||||
/*
|
||||
* Set state to deleting now to avoid blocking nvme_wait_reset(), which
|
||||
* may be holding this pci_dev's device lock.
|
||||
*/
|
||||
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
|
||||
nvme_get_ctrl(&dev->ctrl);
|
||||
nvme_dev_disable(dev, false);
|
||||
nvme_kill_queues(&dev->ctrl);
|
||||
@ -2521,7 +2537,6 @@ static void nvme_reset_work(struct work_struct *work)
|
||||
container_of(work, struct nvme_dev, ctrl.reset_work);
|
||||
bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL);
|
||||
int result;
|
||||
enum nvme_ctrl_state new_state = NVME_CTRL_LIVE;
|
||||
|
||||
if (WARN_ON(dev->ctrl.state != NVME_CTRL_RESETTING)) {
|
||||
result = -ENODEV;
|
||||
@ -2615,13 +2630,11 @@ static void nvme_reset_work(struct work_struct *work)
|
||||
dev_warn(dev->ctrl.device, "IO queues not created\n");
|
||||
nvme_kill_queues(&dev->ctrl);
|
||||
nvme_remove_namespaces(&dev->ctrl);
|
||||
new_state = NVME_CTRL_ADMIN_ONLY;
|
||||
nvme_free_tagset(dev);
|
||||
} else {
|
||||
nvme_start_queues(&dev->ctrl);
|
||||
nvme_wait_freeze(&dev->ctrl);
|
||||
/* hit this only when allocate tagset fails */
|
||||
if (nvme_dev_add(dev))
|
||||
new_state = NVME_CTRL_ADMIN_ONLY;
|
||||
nvme_dev_add(dev);
|
||||
nvme_unfreeze(&dev->ctrl);
|
||||
}
|
||||
|
||||
@ -2629,9 +2642,9 @@ static void nvme_reset_work(struct work_struct *work)
|
||||
* If only admin queue live, keep it to do further investigation or
|
||||
* recovery.
|
||||
*/
|
||||
if (!nvme_change_ctrl_state(&dev->ctrl, new_state)) {
|
||||
if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_LIVE)) {
|
||||
dev_warn(dev->ctrl.device,
|
||||
"failed to mark controller state %d\n", new_state);
|
||||
"failed to mark controller live state\n");
|
||||
result = -ENODEV;
|
||||
goto out;
|
||||
}
|
||||
@ -2672,7 +2685,7 @@ static int nvme_pci_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val)
|
||||
|
||||
static int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
|
||||
{
|
||||
*val = readq(to_nvme_dev(ctrl)->bar + off);
|
||||
*val = lo_hi_readq(to_nvme_dev(ctrl)->bar + off);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -2836,19 +2849,28 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
|
||||
static void nvme_reset_prepare(struct pci_dev *pdev)
|
||||
{
|
||||
struct nvme_dev *dev = pci_get_drvdata(pdev);
|
||||
nvme_dev_disable(dev, false);
|
||||
|
||||
/*
|
||||
* We don't need to check the return value from waiting for the reset
|
||||
* state as pci_dev device lock is held, making it impossible to race
|
||||
* with ->remove().
|
||||
*/
|
||||
nvme_disable_prepare_reset(dev, false);
|
||||
nvme_sync_queues(&dev->ctrl);
|
||||
}
|
||||
|
||||
static void nvme_reset_done(struct pci_dev *pdev)
|
||||
{
|
||||
struct nvme_dev *dev = pci_get_drvdata(pdev);
|
||||
nvme_reset_ctrl_sync(&dev->ctrl);
|
||||
|
||||
if (!nvme_try_sched_reset(&dev->ctrl))
|
||||
flush_work(&dev->ctrl.reset_work);
|
||||
}
|
||||
|
||||
static void nvme_shutdown(struct pci_dev *pdev)
|
||||
{
|
||||
struct nvme_dev *dev = pci_get_drvdata(pdev);
|
||||
nvme_dev_disable(dev, true);
|
||||
nvme_disable_prepare_reset(dev, true);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2901,7 +2923,7 @@ static int nvme_resume(struct device *dev)
|
||||
|
||||
if (ndev->last_ps == U32_MAX ||
|
||||
nvme_set_power_state(ctrl, ndev->last_ps) != 0)
|
||||
nvme_reset_ctrl(ctrl);
|
||||
return nvme_try_sched_reset(&ndev->ctrl);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -2929,17 +2951,14 @@ static int nvme_suspend(struct device *dev)
|
||||
*/
|
||||
if (pm_suspend_via_firmware() || !ctrl->npss ||
|
||||
!pcie_aspm_enabled(pdev) ||
|
||||
(ndev->ctrl.quirks & NVME_QUIRK_SIMPLE_SUSPEND)) {
|
||||
nvme_dev_disable(ndev, true);
|
||||
return 0;
|
||||
}
|
||||
(ndev->ctrl.quirks & NVME_QUIRK_SIMPLE_SUSPEND))
|
||||
return nvme_disable_prepare_reset(ndev, true);
|
||||
|
||||
nvme_start_freeze(ctrl);
|
||||
nvme_wait_freeze(ctrl);
|
||||
nvme_sync_queues(ctrl);
|
||||
|
||||
if (ctrl->state != NVME_CTRL_LIVE &&
|
||||
ctrl->state != NVME_CTRL_ADMIN_ONLY)
|
||||
if (ctrl->state != NVME_CTRL_LIVE)
|
||||
goto unfreeze;
|
||||
|
||||
ret = nvme_get_power_state(ctrl, &ndev->last_ps);
|
||||
@ -2965,9 +2984,8 @@ static int nvme_suspend(struct device *dev)
|
||||
* Clearing npss forces a controller reset on resume. The
|
||||
* correct value will be resdicovered then.
|
||||
*/
|
||||
nvme_dev_disable(ndev, true);
|
||||
ret = nvme_disable_prepare_reset(ndev, true);
|
||||
ctrl->npss = 0;
|
||||
ret = 0;
|
||||
}
|
||||
unfreeze:
|
||||
nvme_unfreeze(ctrl);
|
||||
@ -2977,9 +2995,7 @@ unfreeze:
|
||||
static int nvme_simple_suspend(struct device *dev)
|
||||
{
|
||||
struct nvme_dev *ndev = pci_get_drvdata(to_pci_dev(dev));
|
||||
|
||||
nvme_dev_disable(ndev, true);
|
||||
return 0;
|
||||
return nvme_disable_prepare_reset(ndev, true);
|
||||
}
|
||||
|
||||
static int nvme_simple_resume(struct device *dev)
|
||||
@ -2987,8 +3003,7 @@ static int nvme_simple_resume(struct device *dev)
|
||||
struct pci_dev *pdev = to_pci_dev(dev);
|
||||
struct nvme_dev *ndev = pci_get_drvdata(pdev);
|
||||
|
||||
nvme_reset_ctrl(&ndev->ctrl);
|
||||
return 0;
|
||||
return nvme_try_sched_reset(&ndev->ctrl);
|
||||
}
|
||||
|
||||
static const struct dev_pm_ops nvme_dev_pm_ops = {
|
||||
|
@ -1701,6 +1701,14 @@ nvme_rdma_timeout(struct request *rq, bool reserved)
|
||||
dev_warn(ctrl->ctrl.device, "I/O %d QID %d timeout\n",
|
||||
rq->tag, nvme_rdma_queue_idx(queue));
|
||||
|
||||
/*
|
||||
* Restart the timer if a controller reset is already scheduled. Any
|
||||
* timed out commands would be handled before entering the connecting
|
||||
* state.
|
||||
*/
|
||||
if (ctrl->ctrl.state == NVME_CTRL_RESETTING)
|
||||
return BLK_EH_RESET_TIMER;
|
||||
|
||||
if (ctrl->ctrl.state != NVME_CTRL_LIVE) {
|
||||
/*
|
||||
* Teardown immediately if controller times out while starting
|
||||
|
@ -1386,7 +1386,9 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
|
||||
queue->sock->sk->sk_data_ready = nvme_tcp_data_ready;
|
||||
queue->sock->sk->sk_state_change = nvme_tcp_state_change;
|
||||
queue->sock->sk->sk_write_space = nvme_tcp_write_space;
|
||||
#ifdef CONFIG_NET_RX_BUSY_POLL
|
||||
queue->sock->sk->sk_ll_usec = 1;
|
||||
#endif
|
||||
write_unlock_bh(&queue->sock->sk->sk_callback_lock);
|
||||
|
||||
return 0;
|
||||
@ -2044,6 +2046,14 @@ nvme_tcp_timeout(struct request *rq, bool reserved)
|
||||
struct nvme_tcp_ctrl *ctrl = req->queue->ctrl;
|
||||
struct nvme_tcp_cmd_pdu *pdu = req->pdu;
|
||||
|
||||
/*
|
||||
* Restart the timer if a controller reset is already scheduled. Any
|
||||
* timed out commands would be handled before entering the connecting
|
||||
* state.
|
||||
*/
|
||||
if (ctrl->ctrl.state == NVME_CTRL_RESETTING)
|
||||
return BLK_EH_RESET_TIMER;
|
||||
|
||||
dev_warn(ctrl->ctrl.device,
|
||||
"queue %d: timeout request %#x type %d\n",
|
||||
nvme_tcp_queue_id(req->queue), rq->tag, pdu->hdr.type);
|
||||
@ -2126,6 +2136,7 @@ static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns,
|
||||
|
||||
ret = nvme_tcp_map_data(queue, rq);
|
||||
if (unlikely(ret)) {
|
||||
nvme_cleanup_cmd(rq);
|
||||
dev_err(queue->ctrl->ctrl.device,
|
||||
"Failed to map data (%d)\n", ret);
|
||||
return ret;
|
||||
|
@ -157,8 +157,10 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
iod->sg_table.sgl = iod->first_sgl;
|
||||
if (sg_alloc_table_chained(&iod->sg_table,
|
||||
blk_rq_nr_phys_segments(req),
|
||||
iod->sg_table.sgl, SG_CHUNK_SIZE))
|
||||
iod->sg_table.sgl, SG_CHUNK_SIZE)) {
|
||||
nvme_cleanup_cmd(req);
|
||||
return BLK_STS_RESOURCE;
|
||||
}
|
||||
|
||||
iod->req.sg = iod->sg_table.sgl;
|
||||
iod->req.sg_cnt = blk_rq_map_sg(req->q, req, iod->sg_table.sgl);
|
||||
|
@ -322,6 +322,8 @@ struct io_kiocb {
|
||||
#define REQ_F_FAIL_LINK 256 /* fail rest of links */
|
||||
#define REQ_F_SHADOW_DRAIN 512 /* link-drain shadow req */
|
||||
#define REQ_F_TIMEOUT 1024 /* timeout request */
|
||||
#define REQ_F_ISREG 2048 /* regular file */
|
||||
#define REQ_F_MUST_PUNT 4096 /* must be punted even for NONBLOCK */
|
||||
u64 user_data;
|
||||
u32 result;
|
||||
u32 sequence;
|
||||
@ -914,26 +916,26 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void kiocb_end_write(struct kiocb *kiocb)
|
||||
static void kiocb_end_write(struct io_kiocb *req)
|
||||
{
|
||||
if (kiocb->ki_flags & IOCB_WRITE) {
|
||||
struct inode *inode = file_inode(kiocb->ki_filp);
|
||||
/*
|
||||
* Tell lockdep we inherited freeze protection from submission
|
||||
* thread.
|
||||
*/
|
||||
if (req->flags & REQ_F_ISREG) {
|
||||
struct inode *inode = file_inode(req->file);
|
||||
|
||||
/*
|
||||
* Tell lockdep we inherited freeze protection from submission
|
||||
* thread.
|
||||
*/
|
||||
if (S_ISREG(inode->i_mode))
|
||||
__sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
|
||||
file_end_write(kiocb->ki_filp);
|
||||
__sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
|
||||
}
|
||||
file_end_write(req->file);
|
||||
}
|
||||
|
||||
static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
|
||||
{
|
||||
struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
|
||||
|
||||
kiocb_end_write(kiocb);
|
||||
if (kiocb->ki_flags & IOCB_WRITE)
|
||||
kiocb_end_write(req);
|
||||
|
||||
if ((req->flags & REQ_F_LINK) && res != req->result)
|
||||
req->flags |= REQ_F_FAIL_LINK;
|
||||
@ -945,7 +947,8 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
|
||||
{
|
||||
struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
|
||||
|
||||
kiocb_end_write(kiocb);
|
||||
if (kiocb->ki_flags & IOCB_WRITE)
|
||||
kiocb_end_write(req);
|
||||
|
||||
if ((req->flags & REQ_F_LINK) && res != req->result)
|
||||
req->flags |= REQ_F_FAIL_LINK;
|
||||
@ -1059,8 +1062,17 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
|
||||
if (!req->file)
|
||||
return -EBADF;
|
||||
|
||||
if (force_nonblock && !io_file_supports_async(req->file))
|
||||
force_nonblock = false;
|
||||
if (S_ISREG(file_inode(req->file)->i_mode))
|
||||
req->flags |= REQ_F_ISREG;
|
||||
|
||||
/*
|
||||
* If the file doesn't support async, mark it as REQ_F_MUST_PUNT so
|
||||
* we know to async punt it even if it was opened O_NONBLOCK
|
||||
*/
|
||||
if (force_nonblock && !io_file_supports_async(req->file)) {
|
||||
req->flags |= REQ_F_MUST_PUNT;
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
kiocb->ki_pos = READ_ONCE(sqe->off);
|
||||
kiocb->ki_flags = iocb_flags(kiocb->ki_filp);
|
||||
@ -1081,7 +1093,8 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
|
||||
return ret;
|
||||
|
||||
/* don't allow async punt if RWF_NOWAIT was requested */
|
||||
if (kiocb->ki_flags & IOCB_NOWAIT)
|
||||
if ((kiocb->ki_flags & IOCB_NOWAIT) ||
|
||||
(req->file->f_flags & O_NONBLOCK))
|
||||
req->flags |= REQ_F_NOWAIT;
|
||||
|
||||
if (force_nonblock)
|
||||
@ -1382,7 +1395,9 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
|
||||
* need async punt anyway, so it's more efficient to do it
|
||||
* here.
|
||||
*/
|
||||
if (force_nonblock && ret2 > 0 && ret2 < read_size)
|
||||
if (force_nonblock && !(req->flags & REQ_F_NOWAIT) &&
|
||||
(req->flags & REQ_F_ISREG) &&
|
||||
ret2 > 0 && ret2 < read_size)
|
||||
ret2 = -EAGAIN;
|
||||
/* Catch -EAGAIN return for forced non-blocking submission */
|
||||
if (!force_nonblock || ret2 != -EAGAIN) {
|
||||
@ -1447,7 +1462,7 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
|
||||
* released so that it doesn't complain about the held lock when
|
||||
* we return to userspace.
|
||||
*/
|
||||
if (S_ISREG(file_inode(file)->i_mode)) {
|
||||
if (req->flags & REQ_F_ISREG) {
|
||||
__sb_start_write(file_inode(file)->i_sb,
|
||||
SB_FREEZE_WRITE, true);
|
||||
__sb_writers_release(file_inode(file)->i_sb,
|
||||
@ -1884,7 +1899,7 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
|
||||
|
||||
static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
{
|
||||
unsigned count, req_dist, tail_index;
|
||||
unsigned count;
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
struct list_head *entry;
|
||||
struct timespec64 ts;
|
||||
@ -1907,21 +1922,36 @@ static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
count = 1;
|
||||
|
||||
req->sequence = ctx->cached_sq_head + count - 1;
|
||||
/* reuse it to store the count */
|
||||
req->submit.sequence = count;
|
||||
req->flags |= REQ_F_TIMEOUT;
|
||||
|
||||
/*
|
||||
* Insertion sort, ensuring the first entry in the list is always
|
||||
* the one we need first.
|
||||
*/
|
||||
tail_index = ctx->cached_cq_tail - ctx->rings->sq_dropped;
|
||||
req_dist = req->sequence - tail_index;
|
||||
spin_lock_irq(&ctx->completion_lock);
|
||||
list_for_each_prev(entry, &ctx->timeout_list) {
|
||||
struct io_kiocb *nxt = list_entry(entry, struct io_kiocb, list);
|
||||
unsigned dist;
|
||||
unsigned nxt_sq_head;
|
||||
long long tmp, tmp_nxt;
|
||||
|
||||
dist = nxt->sequence - tail_index;
|
||||
if (req_dist >= dist)
|
||||
/*
|
||||
* Since cached_sq_head + count - 1 can overflow, use type long
|
||||
* long to store it.
|
||||
*/
|
||||
tmp = (long long)ctx->cached_sq_head + count - 1;
|
||||
nxt_sq_head = nxt->sequence - nxt->submit.sequence + 1;
|
||||
tmp_nxt = (long long)nxt_sq_head + nxt->submit.sequence - 1;
|
||||
|
||||
/*
|
||||
* cached_sq_head may overflow, and it will never overflow twice
|
||||
* once there is some timeout req still be valid.
|
||||
*/
|
||||
if (ctx->cached_sq_head < nxt_sq_head)
|
||||
tmp += UINT_MAX;
|
||||
|
||||
if (tmp >= tmp_nxt)
|
||||
break;
|
||||
}
|
||||
list_add(&req->list, entry);
|
||||
@ -2267,7 +2297,13 @@ static int __io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
|
||||
int ret;
|
||||
|
||||
ret = __io_submit_sqe(ctx, req, s, force_nonblock);
|
||||
if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) {
|
||||
|
||||
/*
|
||||
* We async punt it if the file wasn't marked NOWAIT, or if the file
|
||||
* doesn't support non-blocking read/write attempts
|
||||
*/
|
||||
if (ret == -EAGAIN && (!(req->flags & REQ_F_NOWAIT) ||
|
||||
(req->flags & REQ_F_MUST_PUNT))) {
|
||||
struct io_uring_sqe *sqe_copy;
|
||||
|
||||
sqe_copy = kmemdup(s->sqe, sizeof(*sqe_copy), GFP_KERNEL);
|
||||
|
Loading…
Reference in New Issue
Block a user