block-6.7-2023-12-08

-----BEGIN PGP SIGNATURE-----
 
 iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmVzOGgQHGF4Ym9lQGtl
 cm5lbC5kawAKCRD301j7KXHgprPED/sFJUY31XzGNWlqigwneZYppNYLyfJwTZ5z
 FJyMfN/i9IPWZdBnY/Sed4lp/rWhlEIDNN69bHG7ErK4t8weaGWAV9+ygwRHNmm/
 bAcezY0rJwh23pl/kCYxidhVVFpyhSjFebDUQ6nY4XeTm9OOeeVsTsKVNmN7hF/M
 kGkJU+xZfu63RHcc0NATJPaaZGv0t5tZDnLCOiBy71tckxQlvPqvGnXKoTK1XeEv
 WQu3WgYFhDmRfjnaWPKW8HewcEjZHrRNFiAgKOf1fVCUdEbUrMU9qtGRr0Wv5Fwv
 f5cVqc+K44AE+spz/3Kb07q2/yI9cY3gbB1Ogt1ML1ryOMX9VgmymTxAhHyMJdJF
 +SnDgkUGnkn4mAr93lOad1DL76Ep5fvK3NX799TwkL1RC/78GiGYJWEOA3rC7sD2
 Nfrs5RHxAnjwlT4jKSJ8pbGZa3SQ5g+zn7sePxTRSEARq4Z8YY7WhQpKuW70NVvp
 qkVCZgUEXO7DQcpjPLksOGt4tviXHahuIbq7RQD6OFZ1eNFrljHljymo+4rRRT/B
 QlouVtrdkGA+u2xj0NJ24rtlyVvSg/150UWdtxgkq9zLQudFD9HoIDySjdnwHgIE
 JTjIwH6Wq2y4TqGb4BkOlYMSqJaL0jcac2mT9oLkVoJ4OoQ6P9rtjHnl0yQGsb2R
 a6vKAkWF7Q==
 =Ds8S
 -----END PGP SIGNATURE-----

Merge tag 'block-6.7-2023-12-08' of git://git.kernel.dk/linux

Pull block fixes from Jens Axboe:
 "Nothing major in here, just miscellanous fixes for MD and NVMe:

   - NVMe pull request via Keith:
      - Proper nvme ctrl state setting (Keith)
      - Passthrough command optimization (Keith)
      - Spectre fix (Nitesh)
      - Kconfig clarifications (Shin'ichiro)
      - Frozen state deadlock fix (Bitao)
      - Power setting quirk (Georg)

   - MD pull requests via Song:
      - 6.7 regresisons with recovery/sync (Yu)
      - Reshape fix (David)"

* tag 'block-6.7-2023-12-08' of git://git.kernel.dk/linux:
  md: split MD_RECOVERY_NEEDED out of mddev_resume
  nvme-pci: Add sleep quirk for Kingston drives
  md: fix stopping sync thread
  md: don't leave 'MD_RECOVERY_FROZEN' in error path of md_set_readonly()
  md: fix missing flush of sync_work
  nvme: fix deadlock between reset and scan
  nvme: prevent potential spectre v1 gadget
  nvme: improve NVME_HOST_AUTH and NVME_TARGET_AUTH config descriptions
  nvme-ioctl: move capable() admin check to the end
  nvme: ensure reset state check ordering
  nvme: introduce helper function to get ctrl state
  md/raid6: use valid sector values to determine if an I/O should wait on the reshape
This commit is contained in:
Linus Torvalds 2023-12-08 12:36:45 -08:00
commit d71369dbe0
12 changed files with 197 additions and 134 deletions

View File

@ -490,7 +490,7 @@ int mddev_suspend(struct mddev *mddev, bool interruptible)
}
EXPORT_SYMBOL_GPL(mddev_suspend);
void mddev_resume(struct mddev *mddev)
static void __mddev_resume(struct mddev *mddev, bool recovery_needed)
{
lockdep_assert_not_held(&mddev->reconfig_mutex);
@ -507,12 +507,18 @@ void mddev_resume(struct mddev *mddev)
percpu_ref_resurrect(&mddev->active_io);
wake_up(&mddev->sb_wait);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
if (recovery_needed)
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread);
md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
mutex_unlock(&mddev->suspend_mutex);
}
void mddev_resume(struct mddev *mddev)
{
return __mddev_resume(mddev, true);
}
EXPORT_SYMBOL_GPL(mddev_resume);
/*
@ -4840,25 +4846,29 @@ action_show(struct mddev *mddev, char *page)
return sprintf(page, "%s\n", type);
}
static void stop_sync_thread(struct mddev *mddev)
/**
* stop_sync_thread() - wait for sync_thread to stop if it's running.
* @mddev: the array.
* @locked: if set, reconfig_mutex will still be held after this function
* return; if not set, reconfig_mutex will be released after this
* function return.
* @check_seq: if set, only wait for curent running sync_thread to stop, noted
* that new sync_thread can still start.
*/
static void stop_sync_thread(struct mddev *mddev, bool locked, bool check_seq)
{
if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
return;
int sync_seq;
if (mddev_lock(mddev))
return;
if (check_seq)
sync_seq = atomic_read(&mddev->sync_seq);
/*
* Check again in case MD_RECOVERY_RUNNING is cleared before lock is
* held.
*/
if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
mddev_unlock(mddev);
if (!locked)
mddev_unlock(mddev);
return;
}
if (work_pending(&mddev->del_work))
flush_workqueue(md_misc_wq);
mddev_unlock(mddev);
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
/*
@ -4866,21 +4876,28 @@ static void stop_sync_thread(struct mddev *mddev)
* never happen
*/
md_wakeup_thread_directly(mddev->sync_thread);
if (work_pending(&mddev->sync_work))
flush_work(&mddev->sync_work);
mddev_unlock(mddev);
wait_event(resync_wait,
!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
(check_seq && sync_seq != atomic_read(&mddev->sync_seq)));
if (locked)
mddev_lock_nointr(mddev);
}
static void idle_sync_thread(struct mddev *mddev)
{
int sync_seq = atomic_read(&mddev->sync_seq);
mutex_lock(&mddev->sync_mutex);
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
stop_sync_thread(mddev);
wait_event(resync_wait, sync_seq != atomic_read(&mddev->sync_seq) ||
!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
if (mddev_lock(mddev)) {
mutex_unlock(&mddev->sync_mutex);
return;
}
stop_sync_thread(mddev, false, true);
mutex_unlock(&mddev->sync_mutex);
}
@ -4888,11 +4905,13 @@ static void frozen_sync_thread(struct mddev *mddev)
{
mutex_lock(&mddev->sync_mutex);
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
stop_sync_thread(mddev);
wait_event(resync_wait, mddev->sync_thread == NULL &&
!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
if (mddev_lock(mddev)) {
mutex_unlock(&mddev->sync_mutex);
return;
}
stop_sync_thread(mddev, false, false);
mutex_unlock(&mddev->sync_mutex);
}
@ -6264,14 +6283,7 @@ static void md_clean(struct mddev *mddev)
static void __md_stop_writes(struct mddev *mddev)
{
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
if (work_pending(&mddev->del_work))
flush_workqueue(md_misc_wq);
if (mddev->sync_thread) {
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
md_reap_sync_thread(mddev);
}
stop_sync_thread(mddev, true, false);
del_timer_sync(&mddev->safemode_timer);
if (mddev->pers && mddev->pers->quiesce) {
@ -6355,25 +6367,16 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
int err = 0;
int did_freeze = 0;
if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
return -EBUSY;
if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
did_freeze = 1;
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
md_wakeup_thread(mddev->thread);
}
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
/*
* Thread might be blocked waiting for metadata update which will now
* never happen
*/
md_wakeup_thread_directly(mddev->sync_thread);
if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
return -EBUSY;
mddev_unlock(mddev);
wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
&mddev->recovery));
stop_sync_thread(mddev, false, false);
wait_event(mddev->sb_wait,
!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
mddev_lock_nointr(mddev);
@ -6383,29 +6386,30 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
mddev->sync_thread ||
test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
pr_warn("md: %s still in use.\n",mdname(mddev));
if (did_freeze) {
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread);
}
err = -EBUSY;
goto out;
}
if (mddev->pers) {
__md_stop_writes(mddev);
err = -ENXIO;
if (mddev->ro == MD_RDONLY)
if (mddev->ro == MD_RDONLY) {
err = -ENXIO;
goto out;
}
mddev->ro = MD_RDONLY;
set_disk_ro(mddev->gendisk, 1);
}
out:
if ((mddev->pers && !err) || did_freeze) {
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread);
sysfs_notify_dirent_safe(mddev->sysfs_state);
err = 0;
}
out:
mutex_unlock(&mddev->open_mutex);
return err;
}
@ -6426,20 +6430,8 @@ static int do_md_stop(struct mddev *mddev, int mode,
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
md_wakeup_thread(mddev->thread);
}
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
/*
* Thread might be blocked waiting for metadata update which will now
* never happen
*/
md_wakeup_thread_directly(mddev->sync_thread);
mddev_unlock(mddev);
wait_event(resync_wait, (mddev->sync_thread == NULL &&
!test_bit(MD_RECOVERY_RUNNING,
&mddev->recovery)));
mddev_lock_nointr(mddev);
stop_sync_thread(mddev, true, false);
mutex_lock(&mddev->open_mutex);
if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
@ -9403,7 +9395,15 @@ static void md_start_sync(struct work_struct *ws)
goto not_running;
}
suspend ? mddev_unlock_and_resume(mddev) : mddev_unlock(mddev);
mddev_unlock(mddev);
/*
* md_start_sync was triggered by MD_RECOVERY_NEEDED, so we should
* not set it again. Otherwise, we may cause issue like this one:
* https://bugzilla.kernel.org/show_bug.cgi?id=218200
* Therefore, use __mddev_resume(mddev, false).
*/
if (suspend)
__mddev_resume(mddev, false);
md_wakeup_thread(mddev->sync_thread);
sysfs_notify_dirent_safe(mddev->sysfs_action);
md_new_event();
@ -9415,7 +9415,15 @@ not_running:
clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
suspend ? mddev_unlock_and_resume(mddev) : mddev_unlock(mddev);
mddev_unlock(mddev);
/*
* md_start_sync was triggered by MD_RECOVERY_NEEDED, so we should
* not set it again. Otherwise, we may cause issue like this one:
* https://bugzilla.kernel.org/show_bug.cgi?id=218200
* Therefore, use __mddev_resume(mddev, false).
*/
if (suspend)
__mddev_resume(mddev, false);
wake_up(&resync_wait);
if (test_and_clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery) &&

View File

@ -5892,11 +5892,11 @@ static bool stripe_ahead_of_reshape(struct mddev *mddev, struct r5conf *conf,
int dd_idx;
for (dd_idx = 0; dd_idx < sh->disks; dd_idx++) {
if (dd_idx == sh->pd_idx)
if (dd_idx == sh->pd_idx || dd_idx == sh->qd_idx)
continue;
min_sector = min(min_sector, sh->dev[dd_idx].sector);
max_sector = min(max_sector, sh->dev[dd_idx].sector);
max_sector = max(max_sector, sh->dev[dd_idx].sector);
}
spin_lock_irq(&conf->device_lock);

View File

@ -107,11 +107,12 @@ config NVME_TCP_TLS
If unsure, say N.
config NVME_HOST_AUTH
bool "NVM Express over Fabrics In-Band Authentication"
bool "NVMe over Fabrics In-Band Authentication in host side"
depends on NVME_CORE
select NVME_AUTH
help
This provides support for NVMe over Fabrics In-Band Authentication.
This provides support for NVMe over Fabrics In-Band Authentication in
host side.
If unsure, say N.

View File

@ -131,7 +131,7 @@ void nvme_queue_scan(struct nvme_ctrl *ctrl)
/*
* Only new queue scan work when admin and IO queues are both alive
*/
if (ctrl->state == NVME_CTRL_LIVE && ctrl->tagset)
if (nvme_ctrl_state(ctrl) == NVME_CTRL_LIVE && ctrl->tagset)
queue_work(nvme_wq, &ctrl->scan_work);
}
@ -143,7 +143,7 @@ void nvme_queue_scan(struct nvme_ctrl *ctrl)
*/
int nvme_try_sched_reset(struct nvme_ctrl *ctrl)
{
if (ctrl->state != NVME_CTRL_RESETTING)
if (nvme_ctrl_state(ctrl) != NVME_CTRL_RESETTING)
return -EBUSY;
if (!queue_work(nvme_reset_wq, &ctrl->reset_work))
return -EBUSY;
@ -156,7 +156,7 @@ static void nvme_failfast_work(struct work_struct *work)
struct nvme_ctrl *ctrl = container_of(to_delayed_work(work),
struct nvme_ctrl, failfast_work);
if (ctrl->state != NVME_CTRL_CONNECTING)
if (nvme_ctrl_state(ctrl) != NVME_CTRL_CONNECTING)
return;
set_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
@ -200,7 +200,7 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
ret = nvme_reset_ctrl(ctrl);
if (!ret) {
flush_work(&ctrl->reset_work);
if (ctrl->state != NVME_CTRL_LIVE)
if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE)
ret = -ENETRESET;
}
@ -499,7 +499,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
spin_lock_irqsave(&ctrl->lock, flags);
old_state = ctrl->state;
old_state = nvme_ctrl_state(ctrl);
switch (new_state) {
case NVME_CTRL_LIVE:
switch (old_state) {
@ -567,7 +567,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
}
if (changed) {
ctrl->state = new_state;
WRITE_ONCE(ctrl->state, new_state);
wake_up_all(&ctrl->state_wq);
}
@ -575,11 +575,11 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
if (!changed)
return false;
if (ctrl->state == NVME_CTRL_LIVE) {
if (new_state == NVME_CTRL_LIVE) {
if (old_state == NVME_CTRL_CONNECTING)
nvme_stop_failfast_work(ctrl);
nvme_kick_requeue_lists(ctrl);
} else if (ctrl->state == NVME_CTRL_CONNECTING &&
} else if (new_state == NVME_CTRL_CONNECTING &&
old_state == NVME_CTRL_RESETTING) {
nvme_start_failfast_work(ctrl);
}
@ -592,7 +592,7 @@ EXPORT_SYMBOL_GPL(nvme_change_ctrl_state);
*/
static bool nvme_state_terminal(struct nvme_ctrl *ctrl)
{
switch (ctrl->state) {
switch (nvme_ctrl_state(ctrl)) {
case NVME_CTRL_NEW:
case NVME_CTRL_LIVE:
case NVME_CTRL_RESETTING:
@ -617,7 +617,7 @@ bool nvme_wait_reset(struct nvme_ctrl *ctrl)
wait_event(ctrl->state_wq,
nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING) ||
nvme_state_terminal(ctrl));
return ctrl->state == NVME_CTRL_RESETTING;
return nvme_ctrl_state(ctrl) == NVME_CTRL_RESETTING;
}
EXPORT_SYMBOL_GPL(nvme_wait_reset);
@ -704,9 +704,11 @@ EXPORT_SYMBOL_GPL(nvme_init_request);
blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl,
struct request *rq)
{
if (ctrl->state != NVME_CTRL_DELETING_NOIO &&
ctrl->state != NVME_CTRL_DELETING &&
ctrl->state != NVME_CTRL_DEAD &&
enum nvme_ctrl_state state = nvme_ctrl_state(ctrl);
if (state != NVME_CTRL_DELETING_NOIO &&
state != NVME_CTRL_DELETING &&
state != NVME_CTRL_DEAD &&
!test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) &&
!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
return BLK_STS_RESOURCE;
@ -736,7 +738,7 @@ bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
* command, which is require to set the queue live in the
* appropinquate states.
*/
switch (ctrl->state) {
switch (nvme_ctrl_state(ctrl)) {
case NVME_CTRL_CONNECTING:
if (blk_rq_is_passthrough(rq) && nvme_is_fabrics(req->cmd) &&
(req->cmd->fabrics.fctype == nvme_fabrics_type_connect ||
@ -2550,7 +2552,7 @@ static void nvme_set_latency_tolerance(struct device *dev, s32 val)
if (ctrl->ps_max_latency_us != latency) {
ctrl->ps_max_latency_us = latency;
if (ctrl->state == NVME_CTRL_LIVE)
if (nvme_ctrl_state(ctrl) == NVME_CTRL_LIVE)
nvme_configure_apst(ctrl);
}
}
@ -3238,7 +3240,7 @@ static int nvme_dev_open(struct inode *inode, struct file *file)
struct nvme_ctrl *ctrl =
container_of(inode->i_cdev, struct nvme_ctrl, cdev);
switch (ctrl->state) {
switch (nvme_ctrl_state(ctrl)) {
case NVME_CTRL_LIVE:
break;
default:
@ -3660,6 +3662,14 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
goto out_unlink_ns;
down_write(&ctrl->namespaces_rwsem);
/*
* Ensure that no namespaces are added to the ctrl list after the queues
* are frozen, thereby avoiding a deadlock between scan and reset.
*/
if (test_bit(NVME_CTRL_FROZEN, &ctrl->flags)) {
up_write(&ctrl->namespaces_rwsem);
goto out_unlink_ns;
}
nvme_ns_add_to_ctrl_list(ns);
up_write(&ctrl->namespaces_rwsem);
nvme_get_ctrl(ctrl);
@ -3924,7 +3934,7 @@ static void nvme_scan_work(struct work_struct *work)
int ret;
/* No tagset on a live ctrl means IO queues could not created */
if (ctrl->state != NVME_CTRL_LIVE || !ctrl->tagset)
if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE || !ctrl->tagset)
return;
/*
@ -3994,7 +4004,7 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
* removing the namespaces' disks; fail all the queues now to avoid
* potentially having to clean up the failed sync later.
*/
if (ctrl->state == NVME_CTRL_DEAD)
if (nvme_ctrl_state(ctrl) == NVME_CTRL_DEAD)
nvme_mark_namespaces_dead(ctrl);
/* this is a no-op when called from the controller reset handler */
@ -4076,7 +4086,7 @@ static void nvme_async_event_work(struct work_struct *work)
* flushing ctrl async_event_work after changing the controller state
* from LIVE and before freeing the admin queue.
*/
if (ctrl->state == NVME_CTRL_LIVE)
if (nvme_ctrl_state(ctrl) == NVME_CTRL_LIVE)
ctrl->ops->submit_async_event(ctrl);
}
@ -4471,7 +4481,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
{
int ret;
ctrl->state = NVME_CTRL_NEW;
WRITE_ONCE(ctrl->state, NVME_CTRL_NEW);
clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
spin_lock_init(&ctrl->lock);
mutex_init(&ctrl->scan_lock);
@ -4581,6 +4591,7 @@ void nvme_unfreeze(struct nvme_ctrl *ctrl)
list_for_each_entry(ns, &ctrl->namespaces, list)
blk_mq_unfreeze_queue(ns->queue);
up_read(&ctrl->namespaces_rwsem);
clear_bit(NVME_CTRL_FROZEN, &ctrl->flags);
}
EXPORT_SYMBOL_GPL(nvme_unfreeze);
@ -4614,6 +4625,7 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl)
{
struct nvme_ns *ns;
set_bit(NVME_CTRL_FROZEN, &ctrl->flags);
down_read(&ctrl->namespaces_rwsem);
list_for_each_entry(ns, &ctrl->namespaces, list)
blk_freeze_queue_start(ns->queue);

View File

@ -557,7 +557,7 @@ nvme_fc_rport_get(struct nvme_fc_rport *rport)
static void
nvme_fc_resume_controller(struct nvme_fc_ctrl *ctrl)
{
switch (ctrl->ctrl.state) {
switch (nvme_ctrl_state(&ctrl->ctrl)) {
case NVME_CTRL_NEW:
case NVME_CTRL_CONNECTING:
/*
@ -793,7 +793,7 @@ nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl)
"NVME-FC{%d}: controller connectivity lost. Awaiting "
"Reconnect", ctrl->cnum);
switch (ctrl->ctrl.state) {
switch (nvme_ctrl_state(&ctrl->ctrl)) {
case NVME_CTRL_NEW:
case NVME_CTRL_LIVE:
/*
@ -3319,7 +3319,7 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
unsigned long recon_delay = ctrl->ctrl.opts->reconnect_delay * HZ;
bool recon = true;
if (ctrl->ctrl.state != NVME_CTRL_CONNECTING)
if (nvme_ctrl_state(&ctrl->ctrl) != NVME_CTRL_CONNECTING)
return;
if (portptr->port_state == FC_OBJSTATE_ONLINE) {

View File

@ -18,15 +18,12 @@ static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c,
{
u32 effects;
if (capable(CAP_SYS_ADMIN))
return true;
/*
* Do not allow unprivileged passthrough on partitions, as that allows an
* escape from the containment of the partition.
*/
if (flags & NVME_IOCTL_PARTITION)
return false;
goto admin;
/*
* Do not allow unprivileged processes to send vendor specific or fabrics
@ -34,7 +31,7 @@ static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c,
*/
if (c->common.opcode >= nvme_cmd_vendor_start ||
c->common.opcode == nvme_fabrics_command)
return false;
goto admin;
/*
* Do not allow unprivileged passthrough of admin commands except
@ -53,7 +50,7 @@ static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c,
return true;
}
}
return false;
goto admin;
}
/*
@ -63,7 +60,7 @@ static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c,
*/
effects = nvme_command_effects(ns->ctrl, ns, c->common.opcode);
if (!(effects & NVME_CMD_EFFECTS_CSUPP))
return false;
goto admin;
/*
* Don't allow passthrough for command that have intrusive (or unknown)
@ -72,16 +69,20 @@ static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c,
if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC |
NVME_CMD_EFFECTS_UUID_SEL |
NVME_CMD_EFFECTS_SCOPE_MASK))
return false;
goto admin;
/*
* Only allow I/O commands that transfer data to the controller or that
* change the logical block contents if the file descriptor is open for
* writing.
*/
if (nvme_is_write(c) || (effects & NVME_CMD_EFFECTS_LBCC))
return open_for_write;
if ((nvme_is_write(c) || (effects & NVME_CMD_EFFECTS_LBCC)) &&
!open_for_write)
goto admin;
return true;
admin:
return capable(CAP_SYS_ADMIN);
}
/*

View File

@ -156,6 +156,11 @@ enum nvme_quirks {
* No temperature thresholds for channels other than 0 (Composite).
*/
NVME_QUIRK_NO_SECONDARY_TEMP_THRESH = (1 << 19),
/*
* Disables simple suspend/resume path.
*/
NVME_QUIRK_FORCE_NO_SIMPLE_SUSPEND = (1 << 20),
};
/*
@ -251,6 +256,7 @@ enum nvme_ctrl_flags {
NVME_CTRL_STOPPED = 3,
NVME_CTRL_SKIP_ID_CNS_CS = 4,
NVME_CTRL_DIRTY_CAPABILITY = 5,
NVME_CTRL_FROZEN = 6,
};
struct nvme_ctrl {
@ -387,6 +393,11 @@ struct nvme_ctrl {
enum nvme_dctype dctype;
};
static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl)
{
return READ_ONCE(ctrl->state);
}
enum nvme_iopolicy {
NVME_IOPOLICY_NUMA,
NVME_IOPOLICY_RR,

View File

@ -1233,7 +1233,7 @@ static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO);
/* If there is a reset/reinit ongoing, we shouldn't reset again. */
switch (dev->ctrl.state) {
switch (nvme_ctrl_state(&dev->ctrl)) {
case NVME_CTRL_RESETTING:
case NVME_CTRL_CONNECTING:
return false;
@ -1321,7 +1321,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
* cancellation error. All outstanding requests are completed on
* shutdown, so we return BLK_EH_DONE.
*/
switch (dev->ctrl.state) {
switch (nvme_ctrl_state(&dev->ctrl)) {
case NVME_CTRL_CONNECTING:
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
fallthrough;
@ -1593,7 +1593,7 @@ static int nvme_setup_io_queues_trylock(struct nvme_dev *dev)
/*
* Controller is in wrong state, fail early.
*/
if (dev->ctrl.state != NVME_CTRL_CONNECTING) {
if (nvme_ctrl_state(&dev->ctrl) != NVME_CTRL_CONNECTING) {
mutex_unlock(&dev->shutdown_lock);
return -ENODEV;
}
@ -2573,13 +2573,13 @@ static bool nvme_pci_ctrl_is_dead(struct nvme_dev *dev)
static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
{
enum nvme_ctrl_state state = nvme_ctrl_state(&dev->ctrl);
struct pci_dev *pdev = to_pci_dev(dev->dev);
bool dead;
mutex_lock(&dev->shutdown_lock);
dead = nvme_pci_ctrl_is_dead(dev);
if (dev->ctrl.state == NVME_CTRL_LIVE ||
dev->ctrl.state == NVME_CTRL_RESETTING) {
if (state == NVME_CTRL_LIVE || state == NVME_CTRL_RESETTING) {
if (pci_is_enabled(pdev))
nvme_start_freeze(&dev->ctrl);
/*
@ -2690,7 +2690,7 @@ static void nvme_reset_work(struct work_struct *work)
bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL);
int result;
if (dev->ctrl.state != NVME_CTRL_RESETTING) {
if (nvme_ctrl_state(&dev->ctrl) != NVME_CTRL_RESETTING) {
dev_warn(dev->ctrl.device, "ctrl state %d is not RESETTING\n",
dev->ctrl.state);
result = -ENODEV;
@ -2902,6 +2902,18 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev)
if ((dmi_match(DMI_BOARD_VENDOR, "LENOVO")) &&
dmi_match(DMI_BOARD_NAME, "LNVNB161216"))
return NVME_QUIRK_SIMPLE_SUSPEND;
} else if (pdev->vendor == 0x2646 && (pdev->device == 0x2263 ||
pdev->device == 0x500f)) {
/*
* Exclude some Kingston NV1 and A2000 devices from
* NVME_QUIRK_SIMPLE_SUSPEND. Do a full suspend to save a
* lot fo energy with s2idle sleep on some TUXEDO platforms.
*/
if (dmi_match(DMI_BOARD_NAME, "NS5X_NS7XAU") ||
dmi_match(DMI_BOARD_NAME, "NS5x_7xAU") ||
dmi_match(DMI_BOARD_NAME, "NS5x_7xPU") ||
dmi_match(DMI_BOARD_NAME, "PH4PRX1_PH6PRX1"))
return NVME_QUIRK_FORCE_NO_SIMPLE_SUSPEND;
}
return 0;
@ -2932,7 +2944,9 @@ static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev,
dev->dev = get_device(&pdev->dev);
quirks |= check_vendor_combination_bug(pdev);
if (!noacpi && acpi_storage_d3(&pdev->dev)) {
if (!noacpi &&
!(quirks & NVME_QUIRK_FORCE_NO_SIMPLE_SUSPEND) &&
acpi_storage_d3(&pdev->dev)) {
/*
* Some systems use a bios work around to ask for D3 on
* platforms that support kernel managed suspend.
@ -3192,7 +3206,7 @@ static int nvme_suspend(struct device *dev)
nvme_wait_freeze(ctrl);
nvme_sync_queues(ctrl);
if (ctrl->state != NVME_CTRL_LIVE)
if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE)
goto unfreeze;
/*

View File

@ -984,10 +984,11 @@ free_ctrl:
static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
{
enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl);
/* If we are resetting/deleting then do nothing */
if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) {
WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW ||
ctrl->ctrl.state == NVME_CTRL_LIVE);
if (state != NVME_CTRL_CONNECTING) {
WARN_ON_ONCE(state == NVME_CTRL_NEW || state == NVME_CTRL_LIVE);
return;
}
@ -1059,8 +1060,10 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new)
* unless we're during creation of a new controller to
* avoid races with teardown flow.
*/
WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING &&
ctrl->ctrl.state != NVME_CTRL_DELETING_NOIO);
enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl);
WARN_ON_ONCE(state != NVME_CTRL_DELETING &&
state != NVME_CTRL_DELETING_NOIO);
WARN_ON_ONCE(new);
ret = -EINVAL;
goto destroy_io;
@ -1129,8 +1132,10 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
/* state change failure is ok if we started ctrl delete */
WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING &&
ctrl->ctrl.state != NVME_CTRL_DELETING_NOIO);
enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl);
WARN_ON_ONCE(state != NVME_CTRL_DELETING &&
state != NVME_CTRL_DELETING_NOIO);
return;
}
@ -1162,7 +1167,7 @@ static void nvme_rdma_wr_error(struct ib_cq *cq, struct ib_wc *wc,
struct nvme_rdma_queue *queue = wc->qp->qp_context;
struct nvme_rdma_ctrl *ctrl = queue->ctrl;
if (ctrl->ctrl.state == NVME_CTRL_LIVE)
if (nvme_ctrl_state(&ctrl->ctrl) == NVME_CTRL_LIVE)
dev_info(ctrl->ctrl.device,
"%s for CQE 0x%p failed with status %s (%d)\n",
op, wc->wr_cqe,
@ -1945,7 +1950,7 @@ static enum blk_eh_timer_return nvme_rdma_timeout(struct request *rq)
dev_warn(ctrl->ctrl.device, "I/O %d QID %d timeout\n",
rq->tag, nvme_rdma_queue_idx(queue));
if (ctrl->ctrl.state != NVME_CTRL_LIVE) {
if (nvme_ctrl_state(&ctrl->ctrl) != NVME_CTRL_LIVE) {
/*
* If we are resetting, connecting or deleting we should
* complete immediately because we may block controller

View File

@ -2152,10 +2152,11 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
{
enum nvme_ctrl_state state = nvme_ctrl_state(ctrl);
/* If we are resetting/deleting then do nothing */
if (ctrl->state != NVME_CTRL_CONNECTING) {
WARN_ON_ONCE(ctrl->state == NVME_CTRL_NEW ||
ctrl->state == NVME_CTRL_LIVE);
if (state != NVME_CTRL_CONNECTING) {
WARN_ON_ONCE(state == NVME_CTRL_NEW || state == NVME_CTRL_LIVE);
return;
}
@ -2215,8 +2216,10 @@ static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new)
* unless we're during creation of a new controller to
* avoid races with teardown flow.
*/
WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING &&
ctrl->state != NVME_CTRL_DELETING_NOIO);
enum nvme_ctrl_state state = nvme_ctrl_state(ctrl);
WARN_ON_ONCE(state != NVME_CTRL_DELETING &&
state != NVME_CTRL_DELETING_NOIO);
WARN_ON_ONCE(new);
ret = -EINVAL;
goto destroy_io;
@ -2280,8 +2283,10 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) {
/* state change failure is ok if we started ctrl delete */
WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING &&
ctrl->state != NVME_CTRL_DELETING_NOIO);
enum nvme_ctrl_state state = nvme_ctrl_state(ctrl);
WARN_ON_ONCE(state != NVME_CTRL_DELETING &&
state != NVME_CTRL_DELETING_NOIO);
return;
}
@ -2311,8 +2316,10 @@ static void nvme_reset_ctrl_work(struct work_struct *work)
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) {
/* state change failure is ok if we started ctrl delete */
WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING &&
ctrl->state != NVME_CTRL_DELETING_NOIO);
enum nvme_ctrl_state state = nvme_ctrl_state(ctrl);
WARN_ON_ONCE(state != NVME_CTRL_DELETING &&
state != NVME_CTRL_DELETING_NOIO);
return;
}
@ -2430,7 +2437,7 @@ static enum blk_eh_timer_return nvme_tcp_timeout(struct request *rq)
nvme_tcp_queue_id(req->queue), nvme_cid(rq), pdu->hdr.type,
opc, nvme_opcode_str(qid, opc, fctype));
if (ctrl->state != NVME_CTRL_LIVE) {
if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE) {
/*
* If we are resetting, connecting or deleting we should
* complete immediately because we may block controller

View File

@ -99,10 +99,11 @@ config NVME_TARGET_TCP_TLS
If unsure, say N.
config NVME_TARGET_AUTH
bool "NVMe over Fabrics In-band Authentication support"
bool "NVMe over Fabrics In-band Authentication in target side"
depends on NVME_TARGET
select NVME_AUTH
help
This enables support for NVMe over Fabrics In-band Authentication
This enables support for NVMe over Fabrics In-band Authentication in
target side.
If unsure, say N.

View File

@ -18,6 +18,7 @@
#include <linux/nvme-keyring.h>
#include <crypto/hash.h>
#include <crypto/kpp.h>
#include <linux/nospec.h>
#include "nvmet.h"
@ -621,6 +622,7 @@ static ssize_t nvmet_ns_ana_grpid_store(struct config_item *item,
down_write(&nvmet_ana_sem);
oldgrpid = ns->anagrpid;
newgrpid = array_index_nospec(newgrpid, NVMET_MAX_ANAGRPS);
nvmet_ana_group_enabled[newgrpid]++;
ns->anagrpid = newgrpid;
nvmet_ana_group_enabled[oldgrpid]--;
@ -1812,6 +1814,7 @@ static struct config_group *nvmet_ana_groups_make_group(
grp->grpid = grpid;
down_write(&nvmet_ana_sem);
grpid = array_index_nospec(grpid, NVMET_MAX_ANAGRPS);
nvmet_ana_group_enabled[grpid]++;
up_write(&nvmet_ana_sem);