- Biggest change this cycle is to remove support for the legacy IO path

(.request_fn) from request-based DM.  Jens has already started
   preparing for complete removal of the legacy IO path in 4.21 but this
   earlier removal of support from DM has been coordinated with Jens (as
   evidenced by the commit being attributed to him).  Making
   request-based DM exclussively blk-mq only cleans up that portion of DM
   core quite nicely.
 
 - Convert the thinp and zoned targets over to using refcount_t where
   applicable.
 
 - A couple fixes to the DM zoned target for refcounting and other races
   buried in the implementation of metadata block creation and use.
 
 - Small cleanups to remove redundant unlikely() around a couple
   WARN_ON_ONCE().
 
 - Simplify how dm-ioctl copies from userspace, eliminating some
   potential for a malicious user trying to change the executed ioctl
   after its processing has begun.
 
 - Tweaked DM crypt target to use the DM device name when naming the
   various workqueues created for a particular DM crypt device (makes the
   N workqueues for a DM crypt device more easily understood and enhances
   user's accounting capabilities at a glance via "ps")
 
 - Small fixup to remove dead branch in DM writecache's memory_entry().
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQEcBAABAgAGBQJb0zCmAAoJEMUj8QotnQNaUKwIAKFC3cuTtQyh3LomSbT4uAr5
 6apBSVcxVhqU+isriW3HmBPkO4HGyqMWjX5oQGHrOj0YK0i1H65Nq3qH9ATaiSHn
 awdo8A4YmClF5Mojc51UebXIH0IfnGSOKH/FHNhQzT3jAdn+vYinMSZ28JwFPgKW
 DsVOSM1dlJZBWRXhQNpyCjVl9Xb3rRUOnkfG0endyMfOsnoxKurhwSkXoStzCdQn
 O5ubt1XT3wMKoI1k9QWjfrBU1NtZZYD+kQ6EfkYXfL9RNhhZjwzO/eNtqT3jnKsq
 qbcd8/0JIUttPf7+F0URG9mbMbebfJGqNAaJWcnlRbCHmgUBBGVWsnl8MTWOLkw=
 =MdDN
 -----END PGP SIGNATURE-----

Merge tag 'for-4.20/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper updates from Mike Snitzer:

 - The biggest change this cycle is to remove support for the legacy IO
   path (.request_fn) from request-based DM.

   Jens has already started preparing for complete removal of the legacy
   IO path in 4.21 but this earlier removal of support from DM has been
   coordinated with Jens (as evidenced by the commit being attributed to
   him).

   Making request-based DM exclussively blk-mq only cleans up that
   portion of DM core quite nicely.

 - Convert the thinp and zoned targets over to using refcount_t where
   applicable.

 - A couple fixes to the DM zoned target for refcounting and other races
   buried in the implementation of metadata block creation and use.

 - Small cleanups to remove redundant unlikely() around a couple
   WARN_ON_ONCE().

 - Simplify how dm-ioctl copies from userspace, eliminating some
   potential for a malicious user trying to change the executed ioctl
   after its processing has begun.

 - Tweaked DM crypt target to use the DM device name when naming the
   various workqueues created for a particular DM crypt device (makes
   the N workqueues for a DM crypt device more easily understood and
   enhances user's accounting capabilities at a glance via "ps")

 - Small fixup to remove dead branch in DM writecache's memory_entry().

* tag 'for-4.20/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm writecache: remove disabled code in memory_entry()
  dm zoned: fix various dmz_get_mblock() issues
  dm zoned: fix metadata block ref counting
  dm raid: avoid bitmap with raid4/5/6 journal device
  dm crypt: make workqueue names device-specific
  dm: add dm_table_device_name()
  dm ioctl: harden copy_params()'s copy_from_user() from malicious users
  dm: remove unnecessary unlikely() around WARN_ON_ONCE()
  dm zoned: target: use refcount_t for dm zoned reference counters
  dm thin: use refcount_t for thin_c reference counting
  dm table: require that request-based DM be layered on blk-mq devices
  dm: rename DM_TYPE_MQ_REQUEST_BASED to DM_TYPE_REQUEST_BASED
  dm: remove legacy request-based IO path
This commit is contained in:
Linus Torvalds 2018-10-26 12:57:38 -07:00
commit 71f4d95b23
18 changed files with 134 additions and 464 deletions

View File

@ -215,17 +215,6 @@ config BLK_DEV_DM
If unsure, say N.
config DM_MQ_DEFAULT
bool "request-based DM: use blk-mq I/O path by default"
depends on BLK_DEV_DM
---help---
This option enables the blk-mq based I/O path for request-based
DM devices by default. With the option the dm_mod.use_blk_mq
module/boot option defaults to Y, without it to N, but it can
still be overriden either way.
If unsure say N.
config DM_DEBUG
bool "Device mapper debugging support"
depends on BLK_DEV_DM

View File

@ -1200,7 +1200,7 @@ static void queue_demotion(struct smq_policy *mq)
struct policy_work work;
struct entry *e;
if (unlikely(WARN_ON_ONCE(!mq->migrations_allowed)))
if (WARN_ON_ONCE(!mq->migrations_allowed))
return;
e = q_peek(&mq->clean, mq->clean.nr_levels / 2, true);

View File

@ -112,18 +112,8 @@ struct mapped_device {
struct dm_stats stats;
struct kthread_worker kworker;
struct task_struct *kworker_task;
/* for request-based merge heuristic in dm_request_fn() */
unsigned seq_rq_merge_deadline_usecs;
int last_rq_rw;
sector_t last_rq_pos;
ktime_t last_rq_start_time;
/* for blk-mq request-based DM support */
struct blk_mq_tag_set *tag_set;
bool use_blk_mq:1;
bool init_tio_pdu:1;
struct srcu_struct io_barrier;

View File

@ -2661,6 +2661,7 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar
static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
{
struct crypt_config *cc;
const char *devname = dm_table_device_name(ti->table);
int key_size;
unsigned int align_mask;
unsigned long long tmpll;
@ -2806,18 +2807,22 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
}
ret = -ENOMEM;
cc->io_queue = alloc_workqueue("kcryptd_io", WQ_HIGHPRI | WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 1);
cc->io_queue = alloc_workqueue("kcryptd_io/%s",
WQ_HIGHPRI | WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM,
1, devname);
if (!cc->io_queue) {
ti->error = "Couldn't create kcryptd io queue";
goto bad;
}
if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags))
cc->crypt_queue = alloc_workqueue("kcryptd", WQ_HIGHPRI | WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 1);
cc->crypt_queue = alloc_workqueue("kcryptd/%s",
WQ_HIGHPRI | WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM,
1, devname);
else
cc->crypt_queue = alloc_workqueue("kcryptd",
cc->crypt_queue = alloc_workqueue("kcryptd/%s",
WQ_HIGHPRI | WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND,
num_online_cpus());
num_online_cpus(), devname);
if (!cc->crypt_queue) {
ti->error = "Couldn't create kcryptd queue";
goto bad;
@ -2826,7 +2831,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
spin_lock_init(&cc->write_thread_lock);
cc->write_tree = RB_ROOT;
cc->write_thread = kthread_create(dmcrypt_write, cc, "dmcrypt_write");
cc->write_thread = kthread_create(dmcrypt_write, cc, "dmcrypt_write/%s", devname);
if (IS_ERR(cc->write_thread)) {
ret = PTR_ERR(cc->write_thread);
cc->write_thread = NULL;

View File

@ -1720,8 +1720,7 @@ static void free_params(struct dm_ioctl *param, size_t param_size, int param_fla
}
static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kernel,
int ioctl_flags,
struct dm_ioctl **param, int *param_flags)
int ioctl_flags, struct dm_ioctl **param, int *param_flags)
{
struct dm_ioctl *dmi;
int secure_data;
@ -1762,18 +1761,13 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kern
*param_flags |= DM_PARAMS_MALLOC;
if (copy_from_user(dmi, user, param_kernel->data_size))
goto bad;
/* Copy from param_kernel (which was already copied from user) */
memcpy(dmi, param_kernel, minimum_data_size);
if (copy_from_user(&dmi->data, (char __user *)user + minimum_data_size,
param_kernel->data_size - minimum_data_size))
goto bad;
data_copied:
/*
* Abort if something changed the ioctl data while it was being copied.
*/
if (dmi->data_size != param_kernel->data_size) {
DMERR("rejecting ioctl: data size modified while processing parameters");
goto bad;
}
/* Wipe the user buffer so we do not return it to userspace */
if (secure_data && clear_user(user, param_kernel->data_size))
goto bad;

View File

@ -203,14 +203,7 @@ static struct multipath *alloc_multipath(struct dm_target *ti)
static int alloc_multipath_stage2(struct dm_target *ti, struct multipath *m)
{
if (m->queue_mode == DM_TYPE_NONE) {
/*
* Default to request-based.
*/
if (dm_use_blk_mq(dm_table_get_md(ti->table)))
m->queue_mode = DM_TYPE_MQ_REQUEST_BASED;
else
m->queue_mode = DM_TYPE_REQUEST_BASED;
m->queue_mode = DM_TYPE_REQUEST_BASED;
} else if (m->queue_mode == DM_TYPE_BIO_BASED) {
INIT_WORK(&m->process_queued_bios, process_queued_bios);
/*
@ -537,10 +530,7 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
* get the queue busy feedback (via BLK_STS_RESOURCE),
* otherwise I/O merging can suffer.
*/
if (q->mq_ops)
return DM_MAPIO_REQUEUE;
else
return DM_MAPIO_DELAY_REQUEUE;
return DM_MAPIO_REQUEUE;
}
clone->bio = clone->biotail = NULL;
clone->rq_disk = bdev->bd_disk;
@ -668,7 +658,7 @@ static int multipath_map_bio(struct dm_target *ti, struct bio *bio)
static void process_queued_io_list(struct multipath *m)
{
if (m->queue_mode == DM_TYPE_MQ_REQUEST_BASED)
if (m->queue_mode == DM_TYPE_REQUEST_BASED)
dm_mq_kick_requeue_list(dm_table_get_md(m->ti->table));
else if (m->queue_mode == DM_TYPE_BIO_BASED)
queue_work(kmultipathd, &m->process_queued_bios);
@ -1089,10 +1079,9 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m)
if (!strcasecmp(queue_mode_name, "bio"))
m->queue_mode = DM_TYPE_BIO_BASED;
else if (!strcasecmp(queue_mode_name, "rq"))
else if (!strcasecmp(queue_mode_name, "rq") ||
!strcasecmp(queue_mode_name, "mq"))
m->queue_mode = DM_TYPE_REQUEST_BASED;
else if (!strcasecmp(queue_mode_name, "mq"))
m->queue_mode = DM_TYPE_MQ_REQUEST_BASED;
else {
ti->error = "Unknown 'queue_mode' requested";
r = -EINVAL;
@ -1726,9 +1715,6 @@ static void multipath_status(struct dm_target *ti, status_type_t type,
case DM_TYPE_BIO_BASED:
DMEMIT("queue_mode bio ");
break;
case DM_TYPE_MQ_REQUEST_BASED:
DMEMIT("queue_mode mq ");
break;
default:
WARN_ON_ONCE(true);
break;
@ -1972,7 +1958,7 @@ static int multipath_busy(struct dm_target *ti)
/* no paths available, for blk-mq: rely on IO mapping to delay requeue */
if (!atomic_read(&m->nr_valid_paths) && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
return (m->queue_mode != DM_TYPE_MQ_REQUEST_BASED);
return (m->queue_mode != DM_TYPE_REQUEST_BASED);
/* Guess which priority_group will be used at next mapping time */
pg = READ_ONCE(m->current_pg);

View File

@ -2475,7 +2475,7 @@ static int super_validate(struct raid_set *rs, struct md_rdev *rdev)
}
/* Enable bitmap creation for RAID levels != 0 */
mddev->bitmap_info.offset = rt_is_raid0(rs->raid_type) ? 0 : to_sector(4096);
mddev->bitmap_info.offset = (rt_is_raid0(rs->raid_type) || rs->journal_dev.dev) ? 0 : to_sector(4096);
mddev->bitmap_info.default_offset = mddev->bitmap_info.offset;
if (!test_and_clear_bit(FirstUse, &rdev->flags)) {

View File

@ -23,19 +23,6 @@ static unsigned dm_mq_queue_depth = DM_MQ_QUEUE_DEPTH;
#define RESERVED_REQUEST_BASED_IOS 256
static unsigned reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS;
static bool use_blk_mq = IS_ENABLED(CONFIG_DM_MQ_DEFAULT);
bool dm_use_blk_mq_default(void)
{
return use_blk_mq;
}
bool dm_use_blk_mq(struct mapped_device *md)
{
return md->use_blk_mq;
}
EXPORT_SYMBOL_GPL(dm_use_blk_mq);
unsigned dm_get_reserved_rq_based_ios(void)
{
return __dm_get_module_param(&reserved_rq_based_ios,
@ -59,41 +46,13 @@ int dm_request_based(struct mapped_device *md)
return queue_is_rq_based(md->queue);
}
static void dm_old_start_queue(struct request_queue *q)
{
unsigned long flags;
spin_lock_irqsave(q->queue_lock, flags);
if (blk_queue_stopped(q))
blk_start_queue(q);
spin_unlock_irqrestore(q->queue_lock, flags);
}
static void dm_mq_start_queue(struct request_queue *q)
void dm_start_queue(struct request_queue *q)
{
blk_mq_unquiesce_queue(q);
blk_mq_kick_requeue_list(q);
}
void dm_start_queue(struct request_queue *q)
{
if (!q->mq_ops)
dm_old_start_queue(q);
else
dm_mq_start_queue(q);
}
static void dm_old_stop_queue(struct request_queue *q)
{
unsigned long flags;
spin_lock_irqsave(q->queue_lock, flags);
if (!blk_queue_stopped(q))
blk_stop_queue(q);
spin_unlock_irqrestore(q->queue_lock, flags);
}
static void dm_mq_stop_queue(struct request_queue *q)
void dm_stop_queue(struct request_queue *q)
{
if (blk_mq_queue_stopped(q))
return;
@ -101,14 +60,6 @@ static void dm_mq_stop_queue(struct request_queue *q)
blk_mq_quiesce_queue(q);
}
void dm_stop_queue(struct request_queue *q)
{
if (!q->mq_ops)
dm_old_stop_queue(q);
else
dm_mq_stop_queue(q);
}
/*
* Partial completion handling for request-based dm
*/
@ -179,27 +130,12 @@ static void rq_end_stats(struct mapped_device *md, struct request *orig)
*/
static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
{
struct request_queue *q = md->queue;
unsigned long flags;
atomic_dec(&md->pending[rw]);
/* nudge anyone waiting on suspend queue */
if (!md_in_flight(md))
wake_up(&md->wait);
/*
* Run this off this callpath, as drivers could invoke end_io while
* inside their request_fn (and holding the queue lock). Calling
* back into ->request_fn() could deadlock attempting to grab the
* queue lock again.
*/
if (!q->mq_ops && run_queue) {
spin_lock_irqsave(q->queue_lock, flags);
blk_run_queue_async(q);
spin_unlock_irqrestore(q->queue_lock, flags);
}
/*
* dm_put() must be at the end of this function. See the comment above
*/
@ -222,27 +158,10 @@ static void dm_end_request(struct request *clone, blk_status_t error)
tio->ti->type->release_clone_rq(clone);
rq_end_stats(md, rq);
if (!rq->q->mq_ops)
blk_end_request_all(rq, error);
else
blk_mq_end_request(rq, error);
blk_mq_end_request(rq, error);
rq_completed(md, rw, true);
}
/*
* Requeue the original request of a clone.
*/
static void dm_old_requeue_request(struct request *rq, unsigned long delay_ms)
{
struct request_queue *q = rq->q;
unsigned long flags;
spin_lock_irqsave(q->queue_lock, flags);
blk_requeue_request(q, rq);
blk_delay_queue(q, delay_ms);
spin_unlock_irqrestore(q->queue_lock, flags);
}
static void __dm_mq_kick_requeue_list(struct request_queue *q, unsigned long msecs)
{
blk_mq_delay_kick_requeue_list(q, msecs);
@ -273,11 +192,7 @@ static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_
tio->ti->type->release_clone_rq(tio->clone);
}
if (!rq->q->mq_ops)
dm_old_requeue_request(rq, delay_ms);
else
dm_mq_delay_requeue_request(rq, delay_ms);
dm_mq_delay_requeue_request(rq, delay_ms);
rq_completed(md, rw, false);
}
@ -340,10 +255,7 @@ static void dm_softirq_done(struct request *rq)
rq_end_stats(md, rq);
rw = rq_data_dir(rq);
if (!rq->q->mq_ops)
blk_end_request_all(rq, tio->error);
else
blk_mq_end_request(rq, tio->error);
blk_mq_end_request(rq, tio->error);
rq_completed(md, rw, false);
return;
}
@ -363,17 +275,14 @@ static void dm_complete_request(struct request *rq, blk_status_t error)
struct dm_rq_target_io *tio = tio_from_request(rq);
tio->error = error;
if (!rq->q->mq_ops)
blk_complete_request(rq);
else
blk_mq_complete_request(rq);
blk_mq_complete_request(rq);
}
/*
* Complete the not-mapped clone and the original request with the error status
* through softirq context.
* Target's rq_end_io() function isn't called.
* This may be used when the target's map_rq() or clone_and_map_rq() functions fail.
* This may be used when the target's clone_and_map_rq() function fails.
*/
static void dm_kill_unmapped_request(struct request *rq, blk_status_t error)
{
@ -381,21 +290,10 @@ static void dm_kill_unmapped_request(struct request *rq, blk_status_t error)
dm_complete_request(rq, error);
}
/*
* Called with the clone's queue lock held (in the case of .request_fn)
*/
static void end_clone_request(struct request *clone, blk_status_t error)
{
struct dm_rq_target_io *tio = clone->end_io_data;
/*
* Actual request completion is done in a softirq context which doesn't
* hold the clone's queue lock. Otherwise, deadlock could occur because:
* - another request may be submitted by the upper level driver
* of the stacking during the completion
* - the submission which requires queue lock may be done
* against this clone's queue
*/
dm_complete_request(tio->orig, error);
}
@ -446,8 +344,6 @@ static int setup_clone(struct request *clone, struct request *rq,
return 0;
}
static void map_tio_request(struct kthread_work *work);
static void init_tio(struct dm_rq_target_io *tio, struct request *rq,
struct mapped_device *md)
{
@ -464,8 +360,6 @@ static void init_tio(struct dm_rq_target_io *tio, struct request *rq,
*/
if (!md->init_tio_pdu)
memset(&tio->info, 0, sizeof(tio->info));
if (md->kworker_task)
kthread_init_work(&tio->work, map_tio_request);
}
/*
@ -504,10 +398,7 @@ check_again:
blk_rq_unprep_clone(clone);
tio->ti->type->release_clone_rq(clone);
tio->clone = NULL;
if (!rq->q->mq_ops)
r = DM_MAPIO_DELAY_REQUEUE;
else
r = DM_MAPIO_REQUEUE;
r = DM_MAPIO_REQUEUE;
goto check_again;
}
break;
@ -530,20 +421,23 @@ check_again:
return r;
}
/* DEPRECATED: previously used for request-based merge heuristic in dm_request_fn() */
ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf)
{
return sprintf(buf, "%u\n", 0);
}
ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md,
const char *buf, size_t count)
{
return count;
}
static void dm_start_request(struct mapped_device *md, struct request *orig)
{
if (!orig->q->mq_ops)
blk_start_request(orig);
else
blk_mq_start_request(orig);
blk_mq_start_request(orig);
atomic_inc(&md->pending[rq_data_dir(orig)]);
if (md->seq_rq_merge_deadline_usecs) {
md->last_rq_pos = rq_end_sector(orig);
md->last_rq_rw = rq_data_dir(orig);
md->last_rq_start_time = ktime_get();
}
if (unlikely(dm_stats_used(&md->stats))) {
struct dm_rq_target_io *tio = tio_from_request(orig);
tio->duration_jiffies = jiffies;
@ -563,8 +457,10 @@ static void dm_start_request(struct mapped_device *md, struct request *orig)
dm_get(md);
}
static int __dm_rq_init_rq(struct mapped_device *md, struct request *rq)
static int dm_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
unsigned int hctx_idx, unsigned int numa_node)
{
struct mapped_device *md = set->driver_data;
struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
/*
@ -581,163 +477,6 @@ static int __dm_rq_init_rq(struct mapped_device *md, struct request *rq)
return 0;
}
static int dm_rq_init_rq(struct request_queue *q, struct request *rq, gfp_t gfp)
{
return __dm_rq_init_rq(q->rq_alloc_data, rq);
}
static void map_tio_request(struct kthread_work *work)
{
struct dm_rq_target_io *tio = container_of(work, struct dm_rq_target_io, work);
if (map_request(tio) == DM_MAPIO_REQUEUE)
dm_requeue_original_request(tio, false);
}
ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf)
{
return sprintf(buf, "%u\n", md->seq_rq_merge_deadline_usecs);
}
#define MAX_SEQ_RQ_MERGE_DEADLINE_USECS 100000
ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md,
const char *buf, size_t count)
{
unsigned deadline;
if (dm_get_md_type(md) != DM_TYPE_REQUEST_BASED)
return count;
if (kstrtouint(buf, 10, &deadline))
return -EINVAL;
if (deadline > MAX_SEQ_RQ_MERGE_DEADLINE_USECS)
deadline = MAX_SEQ_RQ_MERGE_DEADLINE_USECS;
md->seq_rq_merge_deadline_usecs = deadline;
return count;
}
static bool dm_old_request_peeked_before_merge_deadline(struct mapped_device *md)
{
ktime_t kt_deadline;
if (!md->seq_rq_merge_deadline_usecs)
return false;
kt_deadline = ns_to_ktime((u64)md->seq_rq_merge_deadline_usecs * NSEC_PER_USEC);
kt_deadline = ktime_add_safe(md->last_rq_start_time, kt_deadline);
return !ktime_after(ktime_get(), kt_deadline);
}
/*
* q->request_fn for old request-based dm.
* Called with the queue lock held.
*/
static void dm_old_request_fn(struct request_queue *q)
{
struct mapped_device *md = q->queuedata;
struct dm_target *ti = md->immutable_target;
struct request *rq;
struct dm_rq_target_io *tio;
sector_t pos = 0;
if (unlikely(!ti)) {
int srcu_idx;
struct dm_table *map = dm_get_live_table(md, &srcu_idx);
if (unlikely(!map)) {
dm_put_live_table(md, srcu_idx);
return;
}
ti = dm_table_find_target(map, pos);
dm_put_live_table(md, srcu_idx);
}
/*
* For suspend, check blk_queue_stopped() and increment
* ->pending within a single queue_lock not to increment the
* number of in-flight I/Os after the queue is stopped in
* dm_suspend().
*/
while (!blk_queue_stopped(q)) {
rq = blk_peek_request(q);
if (!rq)
return;
/* always use block 0 to find the target for flushes for now */
pos = 0;
if (req_op(rq) != REQ_OP_FLUSH)
pos = blk_rq_pos(rq);
if ((dm_old_request_peeked_before_merge_deadline(md) &&
md_in_flight(md) && rq->bio && !bio_multiple_segments(rq->bio) &&
md->last_rq_pos == pos && md->last_rq_rw == rq_data_dir(rq)) ||
(ti->type->busy && ti->type->busy(ti))) {
blk_delay_queue(q, 10);
return;
}
dm_start_request(md, rq);
tio = tio_from_request(rq);
init_tio(tio, rq, md);
/* Establish tio->ti before queuing work (map_tio_request) */
tio->ti = ti;
kthread_queue_work(&md->kworker, &tio->work);
BUG_ON(!irqs_disabled());
}
}
/*
* Fully initialize a .request_fn request-based queue.
*/
int dm_old_init_request_queue(struct mapped_device *md, struct dm_table *t)
{
struct dm_target *immutable_tgt;
/* Fully initialize the queue */
md->queue->cmd_size = sizeof(struct dm_rq_target_io);
md->queue->rq_alloc_data = md;
md->queue->request_fn = dm_old_request_fn;
md->queue->init_rq_fn = dm_rq_init_rq;
immutable_tgt = dm_table_get_immutable_target(t);
if (immutable_tgt && immutable_tgt->per_io_data_size) {
/* any target-specific per-io data is immediately after the tio */
md->queue->cmd_size += immutable_tgt->per_io_data_size;
md->init_tio_pdu = true;
}
if (blk_init_allocated_queue(md->queue) < 0)
return -EINVAL;
/* disable dm_old_request_fn's merge heuristic by default */
md->seq_rq_merge_deadline_usecs = 0;
blk_queue_softirq_done(md->queue, dm_softirq_done);
/* Initialize the request-based DM worker thread */
kthread_init_worker(&md->kworker);
md->kworker_task = kthread_run(kthread_worker_fn, &md->kworker,
"kdmwork-%s", dm_device_name(md));
if (IS_ERR(md->kworker_task)) {
int error = PTR_ERR(md->kworker_task);
md->kworker_task = NULL;
return error;
}
return 0;
}
static int dm_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
unsigned int hctx_idx, unsigned int numa_node)
{
return __dm_rq_init_rq(set->driver_data, rq);
}
static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
@ -790,11 +529,6 @@ int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t)
struct dm_target *immutable_tgt;
int err;
if (!dm_table_all_blk_mq_devices(t)) {
DMERR("request-based dm-mq may only be stacked on blk-mq device(s)");
return -EINVAL;
}
md->tag_set = kzalloc_node(sizeof(struct blk_mq_tag_set), GFP_KERNEL, md->numa_node_id);
if (!md->tag_set)
return -ENOMEM;
@ -845,6 +579,8 @@ void dm_mq_cleanup_mapped_device(struct mapped_device *md)
module_param(reserved_rq_based_ios, uint, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(reserved_rq_based_ios, "Reserved IOs in request-based mempools");
/* Unused, but preserved for userspace compatibility */
static bool use_blk_mq = true;
module_param(use_blk_mq, bool, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(use_blk_mq, "Use block multiqueue for request-based DM devices");

View File

@ -46,10 +46,6 @@ struct dm_rq_clone_bio_info {
struct bio clone;
};
bool dm_use_blk_mq_default(void);
bool dm_use_blk_mq(struct mapped_device *md);
int dm_old_init_request_queue(struct mapped_device *md, struct dm_table *t);
int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t);
void dm_mq_cleanup_mapped_device(struct mapped_device *md);

View File

@ -92,7 +92,8 @@ static ssize_t dm_attr_suspended_show(struct mapped_device *md, char *buf)
static ssize_t dm_attr_use_blk_mq_show(struct mapped_device *md, char *buf)
{
sprintf(buf, "%d\n", dm_use_blk_mq(md));
/* Purely for userspace compatibility */
sprintf(buf, "%d\n", true);
return strlen(buf);
}

View File

@ -47,7 +47,6 @@ struct dm_table {
bool integrity_supported:1;
bool singleton:1;
bool all_blk_mq:1;
unsigned integrity_added:1;
/*
@ -872,8 +871,7 @@ static bool __table_type_bio_based(enum dm_queue_mode table_type)
static bool __table_type_request_based(enum dm_queue_mode table_type)
{
return (table_type == DM_TYPE_REQUEST_BASED ||
table_type == DM_TYPE_MQ_REQUEST_BASED);
return table_type == DM_TYPE_REQUEST_BASED;
}
void dm_table_set_type(struct dm_table *t, enum dm_queue_mode type)
@ -999,10 +997,6 @@ verify_bio_based:
BUG_ON(!request_based); /* No targets in this table */
/*
* The only way to establish DM_TYPE_MQ_REQUEST_BASED is by
* having a compatible target use dm_table_set_type.
*/
t->type = DM_TYPE_REQUEST_BASED;
verify_rq_based:
@ -1022,11 +1016,9 @@ verify_rq_based:
int srcu_idx;
struct dm_table *live_table = dm_get_live_table(t->md, &srcu_idx);
/* inherit live table's type and all_blk_mq */
if (live_table) {
/* inherit live table's type */
if (live_table)
t->type = live_table->type;
t->all_blk_mq = live_table->all_blk_mq;
}
dm_put_live_table(t->md, srcu_idx);
return 0;
}
@ -1046,17 +1038,10 @@ verify_rq_based:
DMERR("table load rejected: including non-request-stackable devices");
return -EINVAL;
}
if (v.sq_count && v.mq_count) {
if (v.sq_count > 0) {
DMERR("table load rejected: not all devices are blk-mq request-stackable");
return -EINVAL;
}
t->all_blk_mq = v.mq_count > 0;
if (!t->all_blk_mq &&
(t->type == DM_TYPE_MQ_REQUEST_BASED || t->type == DM_TYPE_NVME_BIO_BASED)) {
DMERR("table load rejected: all devices are not blk-mq request-stackable");
return -EINVAL;
}
return 0;
}
@ -1105,11 +1090,6 @@ bool dm_table_request_based(struct dm_table *t)
return __table_type_request_based(dm_table_get_type(t));
}
bool dm_table_all_blk_mq_devices(struct dm_table *t)
{
return t->all_blk_mq;
}
static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *md)
{
enum dm_queue_mode type = dm_table_get_type(t);
@ -2089,26 +2069,24 @@ struct mapped_device *dm_table_get_md(struct dm_table *t)
}
EXPORT_SYMBOL(dm_table_get_md);
const char *dm_table_device_name(struct dm_table *t)
{
return dm_device_name(t->md);
}
EXPORT_SYMBOL_GPL(dm_table_device_name);
void dm_table_run_md_queue_async(struct dm_table *t)
{
struct mapped_device *md;
struct request_queue *queue;
unsigned long flags;
if (!dm_table_request_based(t))
return;
md = dm_table_get_md(t);
queue = dm_get_md_queue(md);
if (queue) {
if (queue->mq_ops)
blk_mq_run_hw_queues(queue, true);
else {
spin_lock_irqsave(queue->queue_lock, flags);
blk_run_queue_async(queue);
spin_unlock_irqrestore(queue->queue_lock, flags);
}
}
if (queue)
blk_mq_run_hw_queues(queue, true);
}
EXPORT_SYMBOL(dm_table_run_md_queue_async);

View File

@ -325,7 +325,7 @@ struct thin_c {
* Ensures the thin is not destroyed until the worker has finished
* iterating the active_thins list.
*/
atomic_t refcount;
refcount_t refcount;
struct completion can_destroy;
};
@ -4044,12 +4044,12 @@ static struct target_type pool_target = {
*--------------------------------------------------------------*/
static void thin_get(struct thin_c *tc)
{
atomic_inc(&tc->refcount);
refcount_inc(&tc->refcount);
}
static void thin_put(struct thin_c *tc)
{
if (atomic_dec_and_test(&tc->refcount))
if (refcount_dec_and_test(&tc->refcount))
complete(&tc->can_destroy);
}
@ -4193,7 +4193,7 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
r = -EINVAL;
goto bad;
}
atomic_set(&tc->refcount, 1);
refcount_set(&tc->refcount, 1);
init_completion(&tc->can_destroy);
list_add_tail_rcu(&tc->list, &tc->pool->active_thins);
spin_unlock_irqrestore(&tc->pool->lock, flags);

View File

@ -350,10 +350,7 @@ static struct wc_memory_superblock *sb(struct dm_writecache *wc)
static struct wc_memory_entry *memory_entry(struct dm_writecache *wc, struct wc_entry *e)
{
if (is_power_of_2(sizeof(struct wc_entry)) && 0)
return &sb(wc)->entries[e - wc->entries];
else
return &sb(wc)->entries[e->index];
return &sb(wc)->entries[e->index];
}
static void *memory_data(struct dm_writecache *wc, struct wc_entry *e)

View File

@ -99,7 +99,7 @@ struct dmz_mblock {
struct rb_node node;
struct list_head link;
sector_t no;
atomic_t ref;
unsigned int ref;
unsigned long state;
struct page *page;
void *data;
@ -296,7 +296,7 @@ static struct dmz_mblock *dmz_alloc_mblock(struct dmz_metadata *zmd,
RB_CLEAR_NODE(&mblk->node);
INIT_LIST_HEAD(&mblk->link);
atomic_set(&mblk->ref, 0);
mblk->ref = 0;
mblk->state = 0;
mblk->no = mblk_no;
mblk->data = page_address(mblk->page);
@ -339,10 +339,11 @@ static void dmz_insert_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk)
}
/*
* Lookup a metadata block in the rbtree.
* Lookup a metadata block in the rbtree. If the block is found, increment
* its reference count.
*/
static struct dmz_mblock *dmz_lookup_mblock(struct dmz_metadata *zmd,
sector_t mblk_no)
static struct dmz_mblock *dmz_get_mblock_fast(struct dmz_metadata *zmd,
sector_t mblk_no)
{
struct rb_root *root = &zmd->mblk_rbtree;
struct rb_node *node = root->rb_node;
@ -350,8 +351,17 @@ static struct dmz_mblock *dmz_lookup_mblock(struct dmz_metadata *zmd,
while (node) {
mblk = container_of(node, struct dmz_mblock, node);
if (mblk->no == mblk_no)
if (mblk->no == mblk_no) {
/*
* If this is the first reference to the block,
* remove it from the LRU list.
*/
mblk->ref++;
if (mblk->ref == 1 &&
!test_bit(DMZ_META_DIRTY, &mblk->state))
list_del_init(&mblk->link);
return mblk;
}
node = (mblk->no < mblk_no) ? node->rb_left : node->rb_right;
}
@ -382,32 +392,47 @@ static void dmz_mblock_bio_end_io(struct bio *bio)
}
/*
* Read a metadata block from disk.
* Read an uncached metadata block from disk and add it to the cache.
*/
static struct dmz_mblock *dmz_fetch_mblock(struct dmz_metadata *zmd,
sector_t mblk_no)
static struct dmz_mblock *dmz_get_mblock_slow(struct dmz_metadata *zmd,
sector_t mblk_no)
{
struct dmz_mblock *mblk;
struct dmz_mblock *mblk, *m;
sector_t block = zmd->sb[zmd->mblk_primary].block + mblk_no;
struct bio *bio;
/* Get block and insert it */
/* Get a new block and a BIO to read it */
mblk = dmz_alloc_mblock(zmd, mblk_no);
if (!mblk)
return NULL;
spin_lock(&zmd->mblk_lock);
atomic_inc(&mblk->ref);
set_bit(DMZ_META_READING, &mblk->state);
dmz_insert_mblock(zmd, mblk);
spin_unlock(&zmd->mblk_lock);
bio = bio_alloc(GFP_NOIO, 1);
if (!bio) {
dmz_free_mblock(zmd, mblk);
return NULL;
}
spin_lock(&zmd->mblk_lock);
/*
* Make sure that another context did not start reading
* the block already.
*/
m = dmz_get_mblock_fast(zmd, mblk_no);
if (m) {
spin_unlock(&zmd->mblk_lock);
dmz_free_mblock(zmd, mblk);
bio_put(bio);
return m;
}
mblk->ref++;
set_bit(DMZ_META_READING, &mblk->state);
dmz_insert_mblock(zmd, mblk);
spin_unlock(&zmd->mblk_lock);
/* Submit read BIO */
bio->bi_iter.bi_sector = dmz_blk2sect(block);
bio_set_dev(bio, zmd->dev->bdev);
bio->bi_private = mblk;
@ -484,7 +509,8 @@ static void dmz_release_mblock(struct dmz_metadata *zmd,
spin_lock(&zmd->mblk_lock);
if (atomic_dec_and_test(&mblk->ref)) {
mblk->ref--;
if (mblk->ref == 0) {
if (test_bit(DMZ_META_ERROR, &mblk->state)) {
rb_erase(&mblk->node, &zmd->mblk_rbtree);
dmz_free_mblock(zmd, mblk);
@ -508,18 +534,12 @@ static struct dmz_mblock *dmz_get_mblock(struct dmz_metadata *zmd,
/* Check rbtree */
spin_lock(&zmd->mblk_lock);
mblk = dmz_lookup_mblock(zmd, mblk_no);
if (mblk) {
/* Cache hit: remove block from LRU list */
if (atomic_inc_return(&mblk->ref) == 1 &&
!test_bit(DMZ_META_DIRTY, &mblk->state))
list_del_init(&mblk->link);
}
mblk = dmz_get_mblock_fast(zmd, mblk_no);
spin_unlock(&zmd->mblk_lock);
if (!mblk) {
/* Cache miss: read the block from disk */
mblk = dmz_fetch_mblock(zmd, mblk_no);
mblk = dmz_get_mblock_slow(zmd, mblk_no);
if (!mblk)
return ERR_PTR(-ENOMEM);
}
@ -753,7 +773,7 @@ int dmz_flush_metadata(struct dmz_metadata *zmd)
spin_lock(&zmd->mblk_lock);
clear_bit(DMZ_META_DIRTY, &mblk->state);
if (atomic_read(&mblk->ref) == 0)
if (mblk->ref == 0)
list_add_tail(&mblk->link, &zmd->mblk_lru_list);
spin_unlock(&zmd->mblk_lock);
}
@ -2308,7 +2328,7 @@ static void dmz_cleanup_metadata(struct dmz_metadata *zmd)
mblk = list_first_entry(&zmd->mblk_dirty_list,
struct dmz_mblock, link);
dmz_dev_warn(zmd->dev, "mblock %llu still in dirty list (ref %u)",
(u64)mblk->no, atomic_read(&mblk->ref));
(u64)mblk->no, mblk->ref);
list_del_init(&mblk->link);
rb_erase(&mblk->node, &zmd->mblk_rbtree);
dmz_free_mblock(zmd, mblk);
@ -2326,8 +2346,8 @@ static void dmz_cleanup_metadata(struct dmz_metadata *zmd)
root = &zmd->mblk_rbtree;
rbtree_postorder_for_each_entry_safe(mblk, next, root, node) {
dmz_dev_warn(zmd->dev, "mblock %llu ref %u still in rbtree",
(u64)mblk->no, atomic_read(&mblk->ref));
atomic_set(&mblk->ref, 0);
(u64)mblk->no, mblk->ref);
mblk->ref = 0;
dmz_free_mblock(zmd, mblk);
}

View File

@ -19,7 +19,7 @@ struct dmz_bioctx {
struct dmz_target *target;
struct dm_zone *zone;
struct bio *bio;
atomic_t ref;
refcount_t ref;
blk_status_t status;
};
@ -28,7 +28,7 @@ struct dmz_bioctx {
*/
struct dm_chunk_work {
struct work_struct work;
atomic_t refcount;
refcount_t refcount;
struct dmz_target *target;
unsigned int chunk;
struct bio_list bio_list;
@ -115,7 +115,7 @@ static int dmz_submit_read_bio(struct dmz_target *dmz, struct dm_zone *zone,
if (nr_blocks == dmz_bio_blocks(bio)) {
/* Setup and submit the BIO */
bio->bi_iter.bi_sector = sector;
atomic_inc(&bioctx->ref);
refcount_inc(&bioctx->ref);
generic_make_request(bio);
return 0;
}
@ -134,7 +134,7 @@ static int dmz_submit_read_bio(struct dmz_target *dmz, struct dm_zone *zone,
bio_advance(bio, clone->bi_iter.bi_size);
/* Submit the clone */
atomic_inc(&bioctx->ref);
refcount_inc(&bioctx->ref);
generic_make_request(clone);
return 0;
@ -240,7 +240,7 @@ static void dmz_submit_write_bio(struct dmz_target *dmz, struct dm_zone *zone,
/* Setup and submit the BIO */
bio_set_dev(bio, dmz->dev->bdev);
bio->bi_iter.bi_sector = dmz_start_sect(dmz->metadata, zone) + dmz_blk2sect(chunk_block);
atomic_inc(&bioctx->ref);
refcount_inc(&bioctx->ref);
generic_make_request(bio);
if (dmz_is_seq(zone))
@ -456,7 +456,7 @@ out:
*/
static inline void dmz_get_chunk_work(struct dm_chunk_work *cw)
{
atomic_inc(&cw->refcount);
refcount_inc(&cw->refcount);
}
/*
@ -465,7 +465,7 @@ static inline void dmz_get_chunk_work(struct dm_chunk_work *cw)
*/
static void dmz_put_chunk_work(struct dm_chunk_work *cw)
{
if (atomic_dec_and_test(&cw->refcount)) {
if (refcount_dec_and_test(&cw->refcount)) {
WARN_ON(!bio_list_empty(&cw->bio_list));
radix_tree_delete(&cw->target->chunk_rxtree, cw->chunk);
kfree(cw);
@ -546,7 +546,7 @@ static void dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio)
goto out;
INIT_WORK(&cw->work, dmz_chunk_work);
atomic_set(&cw->refcount, 0);
refcount_set(&cw->refcount, 0);
cw->target = dmz;
cw->chunk = chunk;
bio_list_init(&cw->bio_list);
@ -599,7 +599,7 @@ static int dmz_map(struct dm_target *ti, struct bio *bio)
bioctx->target = dmz;
bioctx->zone = NULL;
bioctx->bio = bio;
atomic_set(&bioctx->ref, 1);
refcount_set(&bioctx->ref, 1);
bioctx->status = BLK_STS_OK;
/* Set the BIO pending in the flush list */
@ -633,7 +633,7 @@ static int dmz_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *error
if (bioctx->status == BLK_STS_OK && *error)
bioctx->status = *error;
if (!atomic_dec_and_test(&bioctx->ref))
if (!refcount_dec_and_test(&bioctx->ref))
return DM_ENDIO_INCOMPLETE;
/* Done */

View File

@ -1664,7 +1664,7 @@ static blk_qc_t __process_bio(struct mapped_device *md,
* Defend against IO still getting in during teardown
* - as was seen for a time with nvme-fcloop
*/
if (unlikely(WARN_ON_ONCE(!ti || !dm_target_is_valid(ti)))) {
if (WARN_ON_ONCE(!ti || !dm_target_is_valid(ti))) {
error = -EIO;
goto out;
}
@ -1806,8 +1806,6 @@ static void dm_wq_work(struct work_struct *work);
static void dm_init_normal_md_queue(struct mapped_device *md)
{
md->use_blk_mq = false;
/*
* Initialize aspects of queue that aren't relevant for blk-mq
*/
@ -1818,8 +1816,6 @@ static void cleanup_mapped_device(struct mapped_device *md)
{
if (md->wq)
destroy_workqueue(md->wq);
if (md->kworker_task)
kthread_stop(md->kworker_task);
bioset_exit(&md->bs);
bioset_exit(&md->io_bs);
@ -1886,7 +1882,6 @@ static struct mapped_device *alloc_dev(int minor)
goto bad_io_barrier;
md->numa_node_id = numa_node_id;
md->use_blk_mq = dm_use_blk_mq_default();
md->init_tio_pdu = false;
md->type = DM_TYPE_NONE;
mutex_init(&md->suspend_lock);
@ -1917,7 +1912,6 @@ static struct mapped_device *alloc_dev(int minor)
INIT_WORK(&md->work, dm_wq_work);
init_waitqueue_head(&md->eventq);
init_completion(&md->kobj_holder.completion);
md->kworker_task = NULL;
md->disk->major = _major;
md->disk->first_minor = minor;
@ -2217,14 +2211,6 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
switch (type) {
case DM_TYPE_REQUEST_BASED:
dm_init_normal_md_queue(md);
r = dm_old_init_request_queue(md, t);
if (r) {
DMERR("Cannot initialize queue for request-based mapped device");
return r;
}
break;
case DM_TYPE_MQ_REQUEST_BASED:
r = dm_mq_init_request_queue(md, t);
if (r) {
DMERR("Cannot initialize queue for request-based dm-mq mapped device");
@ -2329,9 +2315,6 @@ static void __dm_destroy(struct mapped_device *md, bool wait)
blk_set_queue_dying(md->queue);
if (dm_request_based(md) && md->kworker_task)
kthread_flush_worker(&md->kworker);
/*
* Take suspend_lock so that presuspend and postsuspend methods
* do not race with internal suspend.
@ -2584,11 +2567,8 @@ static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
* Stop md->queue before flushing md->wq in case request-based
* dm defers requests to md->wq from md->queue.
*/
if (dm_request_based(md)) {
if (dm_request_based(md))
dm_stop_queue(md->queue);
if (md->kworker_task)
kthread_flush_worker(&md->kworker);
}
flush_workqueue(md->wq);
@ -2963,7 +2943,6 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_qu
goto out;
break;
case DM_TYPE_REQUEST_BASED:
case DM_TYPE_MQ_REQUEST_BASED:
pool_size = max(dm_get_reserved_rq_based_ios(), min_pool_size);
front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
/* per_io_data_size is used for blk-mq pdu at queue allocation */

View File

@ -70,7 +70,6 @@ struct dm_target *dm_table_get_immutable_target(struct dm_table *t);
struct dm_target *dm_table_get_wildcard_target(struct dm_table *t);
bool dm_table_bio_based(struct dm_table *t);
bool dm_table_request_based(struct dm_table *t);
bool dm_table_all_blk_mq_devices(struct dm_table *t);
void dm_table_free_md_mempools(struct dm_table *t);
struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);

View File

@ -26,9 +26,8 @@ enum dm_queue_mode {
DM_TYPE_NONE = 0,
DM_TYPE_BIO_BASED = 1,
DM_TYPE_REQUEST_BASED = 2,
DM_TYPE_MQ_REQUEST_BASED = 3,
DM_TYPE_DAX_BIO_BASED = 4,
DM_TYPE_NVME_BIO_BASED = 5,
DM_TYPE_DAX_BIO_BASED = 3,
DM_TYPE_NVME_BIO_BASED = 4,
};
typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t;
@ -498,6 +497,7 @@ sector_t dm_table_get_size(struct dm_table *t);
unsigned int dm_table_get_num_targets(struct dm_table *t);
fmode_t dm_table_get_mode(struct dm_table *t);
struct mapped_device *dm_table_get_md(struct dm_table *t);
const char *dm_table_device_name(struct dm_table *t);
/*
* Trigger an event.