dm: enhance internal suspend and resume interface
Rename dm_internal_{suspend,resume} to dm_internal_{suspend,resume}_fast
-- dm-stats will continue using these methods to avoid all the extra
suspend/resume logic that is not needed in order to quickly flush IO.
Introduce dm_internal_suspend_noflush() variant that actually calls the
mapped_device's target callbacks -- otherwise target-specific hooks are
avoided (e.g. dm-thin's thin_presuspend and thin_postsuspend).  Common
code between dm_internal_{suspend_noflush,resume} and
dm_{suspend,resume} was factored out as __dm_{suspend,resume}.
Update dm_internal_{suspend_noflush,resume} to always take and release
the mapped_device's suspend_lock.  Also update dm_{suspend,resume} to be
aware of potential for DM_INTERNAL_SUSPEND_FLAG to be set and respond
accordingly by interruptibly waiting for the DM_INTERNAL_SUSPEND_FLAG to
be cleared.  Add lockdep annotation to dm_suspend() and dm_resume().
The existing DM_SUSPEND_FLAG remains unchanged.
DM_INTERNAL_SUSPEND_FLAG is set by dm_internal_suspend_noflush() and
cleared by dm_internal_resume().
Both DM_SUSPEND_FLAG and DM_INTERNAL_SUSPEND_FLAG may be set if a device
was already suspended when dm_internal_suspend_noflush() was called --
this can be thought of as a "nested suspend".  A "nested suspend" can
occur with legacy userspace dm-thin code that might suspend all active
thin volumes before suspending the pool for resize.
But otherwise, in the normal dm-thin-pool suspend case moving forward:
the thin-pool will have DM_SUSPEND_FLAG set and all active thins from
that thin-pool will have DM_INTERNAL_SUSPEND_FLAG set.
Also add DM_INTERNAL_SUSPEND_FLAG to status report.  This new
DM_INTERNAL_SUSPEND_FLAG state is being reported to assist with
debugging (e.g. 'dmsetup info' will report an internally suspended
device accordingly).
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Acked-by: Joe Thornber <ejt@redhat.com>
			
			
This commit is contained in:
		
							parent
							
								
									80e96c5484
								
							
						
					
					
						commit
						ffcc393641
					
				| @ -684,11 +684,14 @@ static void __dev_status(struct mapped_device *md, struct dm_ioctl *param) | ||||
| 	int srcu_idx; | ||||
| 
 | ||||
| 	param->flags &= ~(DM_SUSPEND_FLAG | DM_READONLY_FLAG | | ||||
| 			  DM_ACTIVE_PRESENT_FLAG); | ||||
| 			  DM_ACTIVE_PRESENT_FLAG | DM_INTERNAL_SUSPEND_FLAG); | ||||
| 
 | ||||
| 	if (dm_suspended_md(md)) | ||||
| 		param->flags |= DM_SUSPEND_FLAG; | ||||
| 
 | ||||
| 	if (dm_suspended_internally_md(md)) | ||||
| 		param->flags |= DM_INTERNAL_SUSPEND_FLAG; | ||||
| 
 | ||||
| 	if (dm_test_deferred_remove_flag(md)) | ||||
| 		param->flags |= DM_DEFERRED_REMOVE; | ||||
| 
 | ||||
|  | ||||
| @ -824,7 +824,7 @@ static int message_stats_create(struct mapped_device *md, | ||||
| 		return 1; | ||||
| 
 | ||||
| 	id = dm_stats_create(dm_get_stats(md), start, end, step, program_id, aux_data, | ||||
| 			     dm_internal_suspend, dm_internal_resume, md); | ||||
| 			     dm_internal_suspend_fast, dm_internal_resume_fast, md); | ||||
| 	if (id < 0) | ||||
| 		return id; | ||||
| 
 | ||||
|  | ||||
							
								
								
									
										225
									
								
								drivers/md/dm.c
									
									
									
									
									
								
							
							
						
						
									
										225
									
								
								drivers/md/dm.c
									
									
									
									
									
								
							| @ -19,6 +19,7 @@ | ||||
| #include <linux/idr.h> | ||||
| #include <linux/hdreg.h> | ||||
| #include <linux/delay.h> | ||||
| #include <linux/wait.h> | ||||
| 
 | ||||
| #include <trace/events/block.h> | ||||
| 
 | ||||
| @ -117,6 +118,7 @@ EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo); | ||||
| #define DMF_NOFLUSH_SUSPENDING 5 | ||||
| #define DMF_MERGE_IS_OPTIONAL 6 | ||||
| #define DMF_DEFERRED_REMOVE 7 | ||||
| #define DMF_SUSPENDED_INTERNALLY 8 | ||||
| 
 | ||||
| /*
 | ||||
|  * A dummy definition to make RCU happy. | ||||
| @ -2718,36 +2720,18 @@ static void unlock_fs(struct mapped_device *md) | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * We need to be able to change a mapping table under a mounted | ||||
|  * filesystem.  For example we might want to move some data in | ||||
|  * the background.  Before the table can be swapped with | ||||
|  * dm_bind_table, dm_suspend must be called to flush any in | ||||
|  * flight bios and ensure that any further io gets deferred. | ||||
|  */ | ||||
| /*
 | ||||
|  * Suspend mechanism in request-based dm. | ||||
|  * If __dm_suspend returns 0, the device is completely quiescent | ||||
|  * now. There is no request-processing activity. All new requests | ||||
|  * are being added to md->deferred list. | ||||
|  * | ||||
|  * 1. Flush all I/Os by lock_fs() if needed. | ||||
|  * 2. Stop dispatching any I/O by stopping the request_queue. | ||||
|  * 3. Wait for all in-flight I/Os to be completed or requeued. | ||||
|  * | ||||
|  * To abort suspend, start the request_queue. | ||||
|  * Caller must hold md->suspend_lock | ||||
|  */ | ||||
| int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | ||||
| static int __dm_suspend(struct mapped_device *md, struct dm_table *map, | ||||
| 			unsigned suspend_flags, int interruptible) | ||||
| { | ||||
| 	struct dm_table *map = NULL; | ||||
| 	int r = 0; | ||||
| 	int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0; | ||||
| 	int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0; | ||||
| 
 | ||||
| 	mutex_lock(&md->suspend_lock); | ||||
| 
 | ||||
| 	if (dm_suspended_md(md)) { | ||||
| 		r = -EINVAL; | ||||
| 		goto out_unlock; | ||||
| 	} | ||||
| 
 | ||||
| 	map = rcu_dereference(md->map); | ||||
| 	bool do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG; | ||||
| 	bool noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG; | ||||
| 	int r; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * DMF_NOFLUSH_SUSPENDING must be set before presuspend. | ||||
| @ -2772,7 +2756,7 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | ||||
| 		r = lock_fs(md); | ||||
| 		if (r) { | ||||
| 			dm_table_presuspend_undo_targets(map); | ||||
| 			goto out_unlock; | ||||
| 			return r; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| @ -2806,7 +2790,7 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | ||||
| 	 * We call dm_wait_for_completion to wait for all existing requests | ||||
| 	 * to finish. | ||||
| 	 */ | ||||
| 	r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE); | ||||
| 	r = dm_wait_for_completion(md, interruptible); | ||||
| 
 | ||||
| 	if (noflush) | ||||
| 		clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); | ||||
| @ -2822,14 +2806,55 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | ||||
| 
 | ||||
| 		unlock_fs(md); | ||||
| 		dm_table_presuspend_undo_targets(map); | ||||
| 		goto out_unlock; /* pushback list is already flushed, so skip flush */ | ||||
| 		/* pushback list is already flushed, so skip flush */ | ||||
| 	} | ||||
| 
 | ||||
| 	return r; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
| 	 * If dm_wait_for_completion returned 0, the device is completely | ||||
| 	 * quiescent now. There is no request-processing activity. All new | ||||
| 	 * requests are being added to md->deferred list. | ||||
|  * We need to be able to change a mapping table under a mounted | ||||
|  * filesystem.  For example we might want to move some data in | ||||
|  * the background.  Before the table can be swapped with | ||||
|  * dm_bind_table, dm_suspend must be called to flush any in | ||||
|  * flight bios and ensure that any further io gets deferred. | ||||
|  */ | ||||
| /*
 | ||||
|  * Suspend mechanism in request-based dm. | ||||
|  * | ||||
|  * 1. Flush all I/Os by lock_fs() if needed. | ||||
|  * 2. Stop dispatching any I/O by stopping the request_queue. | ||||
|  * 3. Wait for all in-flight I/Os to be completed or requeued. | ||||
|  * | ||||
|  * To abort suspend, start the request_queue. | ||||
|  */ | ||||
| int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | ||||
| { | ||||
| 	struct dm_table *map = NULL; | ||||
| 	int r = 0; | ||||
| 
 | ||||
| retry: | ||||
| 	mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING); | ||||
| 
 | ||||
| 	if (dm_suspended_md(md)) { | ||||
| 		r = -EINVAL; | ||||
| 		goto out_unlock; | ||||
| 	} | ||||
| 
 | ||||
| 	if (dm_suspended_internally_md(md)) { | ||||
| 		/* already internally suspended, wait for internal resume */ | ||||
| 		mutex_unlock(&md->suspend_lock); | ||||
| 		r = wait_on_bit(&md->flags, DMF_SUSPENDED_INTERNALLY, TASK_INTERRUPTIBLE); | ||||
| 		if (r) | ||||
| 			return r; | ||||
| 		goto retry; | ||||
| 	} | ||||
| 
 | ||||
| 	map = rcu_dereference(md->map); | ||||
| 
 | ||||
| 	r = __dm_suspend(md, map, suspend_flags, TASK_INTERRUPTIBLE); | ||||
| 	if (r) | ||||
| 		goto out_unlock; | ||||
| 
 | ||||
| 	set_bit(DMF_SUSPENDED, &md->flags); | ||||
| 
 | ||||
| @ -2840,22 +2865,13 @@ out_unlock: | ||||
| 	return r; | ||||
| } | ||||
| 
 | ||||
| int dm_resume(struct mapped_device *md) | ||||
| static int __dm_resume(struct mapped_device *md, struct dm_table *map) | ||||
| { | ||||
| 	int r = -EINVAL; | ||||
| 	struct dm_table *map = NULL; | ||||
| 
 | ||||
| 	mutex_lock(&md->suspend_lock); | ||||
| 	if (!dm_suspended_md(md)) | ||||
| 		goto out; | ||||
| 
 | ||||
| 	map = rcu_dereference(md->map); | ||||
| 	if (!map || !dm_table_get_size(map)) | ||||
| 		goto out; | ||||
| 
 | ||||
| 	r = dm_table_resume_targets(map); | ||||
| 	if (map) { | ||||
| 		int r = dm_table_resume_targets(map); | ||||
| 		if (r) | ||||
| 		goto out; | ||||
| 			return r; | ||||
| 	} | ||||
| 
 | ||||
| 	dm_queue_flush(md); | ||||
| 
 | ||||
| @ -2869,6 +2885,37 @@ int dm_resume(struct mapped_device *md) | ||||
| 
 | ||||
| 	unlock_fs(md); | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| int dm_resume(struct mapped_device *md) | ||||
| { | ||||
| 	int r = -EINVAL; | ||||
| 	struct dm_table *map = NULL; | ||||
| 
 | ||||
| retry: | ||||
| 	mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING); | ||||
| 
 | ||||
| 	if (!dm_suspended_md(md)) | ||||
| 		goto out; | ||||
| 
 | ||||
| 	if (dm_suspended_internally_md(md)) { | ||||
| 		/* already internally suspended, wait for internal resume */ | ||||
| 		mutex_unlock(&md->suspend_lock); | ||||
| 		r = wait_on_bit(&md->flags, DMF_SUSPENDED_INTERNALLY, TASK_INTERRUPTIBLE); | ||||
| 		if (r) | ||||
| 			return r; | ||||
| 		goto retry; | ||||
| 	} | ||||
| 
 | ||||
| 	map = rcu_dereference(md->map); | ||||
| 	if (!map || !dm_table_get_size(map)) | ||||
| 		goto out; | ||||
| 
 | ||||
| 	r = __dm_resume(md, map); | ||||
| 	if (r) | ||||
| 		goto out; | ||||
| 
 | ||||
| 	clear_bit(DMF_SUSPENDED, &md->flags); | ||||
| 
 | ||||
| 	r = 0; | ||||
| @ -2882,15 +2929,80 @@ out: | ||||
|  * Internal suspend/resume works like userspace-driven suspend. It waits | ||||
|  * until all bios finish and prevents issuing new bios to the target drivers. | ||||
|  * It may be used only from the kernel. | ||||
|  * | ||||
|  * Internal suspend holds md->suspend_lock, which prevents interaction with | ||||
|  * userspace-driven suspend. | ||||
|  */ | ||||
| 
 | ||||
| void dm_internal_suspend(struct mapped_device *md) | ||||
| static void __dm_internal_suspend(struct mapped_device *md, unsigned suspend_flags) | ||||
| { | ||||
| 	struct dm_table *map = NULL; | ||||
| 
 | ||||
| 	if (dm_suspended_internally_md(md)) | ||||
| 		return; /* nested internal suspend */ | ||||
| 
 | ||||
| 	if (dm_suspended_md(md)) { | ||||
| 		set_bit(DMF_SUSPENDED_INTERNALLY, &md->flags); | ||||
| 		return; /* nest suspend */ | ||||
| 	} | ||||
| 
 | ||||
| 	map = rcu_dereference(md->map); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Using TASK_UNINTERRUPTIBLE because only NOFLUSH internal suspend is | ||||
| 	 * supported.  Properly supporting a TASK_INTERRUPTIBLE internal suspend | ||||
| 	 * would require changing .presuspend to return an error -- avoid this | ||||
| 	 * until there is a need for more elaborate variants of internal suspend. | ||||
| 	 */ | ||||
| 	(void) __dm_suspend(md, map, suspend_flags, TASK_UNINTERRUPTIBLE); | ||||
| 
 | ||||
| 	set_bit(DMF_SUSPENDED_INTERNALLY, &md->flags); | ||||
| 
 | ||||
| 	dm_table_postsuspend_targets(map); | ||||
| } | ||||
| 
 | ||||
| static void __dm_internal_resume(struct mapped_device *md) | ||||
| { | ||||
| 	if (!dm_suspended_internally_md(md)) | ||||
| 		return; /* resume from nested internal suspend */ | ||||
| 
 | ||||
| 	if (dm_suspended_md(md)) | ||||
| 		goto done; /* resume from nested suspend */ | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * NOTE: existing callers don't need to call dm_table_resume_targets | ||||
| 	 * (which may fail -- so best to avoid it for now by passing NULL map) | ||||
| 	 */ | ||||
| 	(void) __dm_resume(md, NULL); | ||||
| 
 | ||||
| done: | ||||
| 	clear_bit(DMF_SUSPENDED_INTERNALLY, &md->flags); | ||||
| 	smp_mb__after_atomic(); | ||||
| 	wake_up_bit(&md->flags, DMF_SUSPENDED_INTERNALLY); | ||||
| } | ||||
| 
 | ||||
| void dm_internal_suspend_noflush(struct mapped_device *md) | ||||
| { | ||||
| 	mutex_lock(&md->suspend_lock); | ||||
| 	if (dm_suspended_md(md)) | ||||
| 	__dm_internal_suspend(md, DM_SUSPEND_NOFLUSH_FLAG); | ||||
| 	mutex_unlock(&md->suspend_lock); | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(dm_internal_suspend_noflush); | ||||
| 
 | ||||
| void dm_internal_resume(struct mapped_device *md) | ||||
| { | ||||
| 	mutex_lock(&md->suspend_lock); | ||||
| 	__dm_internal_resume(md); | ||||
| 	mutex_unlock(&md->suspend_lock); | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(dm_internal_resume); | ||||
| 
 | ||||
| /*
 | ||||
|  * Fast variants of internal suspend/resume hold md->suspend_lock, | ||||
|  * which prevents interaction with userspace-driven suspend. | ||||
|  */ | ||||
| 
 | ||||
| void dm_internal_suspend_fast(struct mapped_device *md) | ||||
| { | ||||
| 	mutex_lock(&md->suspend_lock); | ||||
| 	if (dm_suspended_md(md) || dm_suspended_internally_md(md)) | ||||
| 		return; | ||||
| 
 | ||||
| 	set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); | ||||
| @ -2899,9 +3011,9 @@ void dm_internal_suspend(struct mapped_device *md) | ||||
| 	dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); | ||||
| } | ||||
| 
 | ||||
| void dm_internal_resume(struct mapped_device *md) | ||||
| void dm_internal_resume_fast(struct mapped_device *md) | ||||
| { | ||||
| 	if (dm_suspended_md(md)) | ||||
| 	if (dm_suspended_md(md) || dm_suspended_internally_md(md)) | ||||
| 		goto done; | ||||
| 
 | ||||
| 	dm_queue_flush(md); | ||||
| @ -2987,6 +3099,11 @@ int dm_suspended_md(struct mapped_device *md) | ||||
| 	return test_bit(DMF_SUSPENDED, &md->flags); | ||||
| } | ||||
| 
 | ||||
| int dm_suspended_internally_md(struct mapped_device *md) | ||||
| { | ||||
| 	return test_bit(DMF_SUSPENDED_INTERNALLY, &md->flags); | ||||
| } | ||||
| 
 | ||||
| int dm_test_deferred_remove_flag(struct mapped_device *md) | ||||
| { | ||||
| 	return test_bit(DMF_DEFERRED_REMOVE, &md->flags); | ||||
|  | ||||
| @ -129,6 +129,15 @@ int dm_deleting_md(struct mapped_device *md); | ||||
|  */ | ||||
| int dm_suspended_md(struct mapped_device *md); | ||||
| 
 | ||||
| /*
 | ||||
|  * Internal suspend and resume methods. | ||||
|  */ | ||||
| int dm_suspended_internally_md(struct mapped_device *md); | ||||
| void dm_internal_suspend_fast(struct mapped_device *md); | ||||
| void dm_internal_resume_fast(struct mapped_device *md); | ||||
| void dm_internal_suspend_noflush(struct mapped_device *md); | ||||
| void dm_internal_resume(struct mapped_device *md); | ||||
| 
 | ||||
| /*
 | ||||
|  * Test if the device is scheduled for deferred remove. | ||||
|  */ | ||||
|  | ||||
| @ -352,4 +352,9 @@ enum { | ||||
|  */ | ||||
| #define DM_DEFERRED_REMOVE		(1 << 17) /* In/Out */ | ||||
| 
 | ||||
| /*
 | ||||
|  * If set, the device is suspended internally. | ||||
|  */ | ||||
| #define DM_INTERNAL_SUSPEND_FLAG	(1 << 18) /* Out */ | ||||
| 
 | ||||
| #endif				/* _LINUX_DM_IOCTL_H */ | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user