blk-mq: Create hctx for each present CPU
Currently we only create hctx for online CPUs, which can lead to a lot of churn due to frequent soft offline / online operations. Instead allocate one for each present CPU to avoid this and dramatically simplify the code. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Jens Axboe <axboe@kernel.dk> Cc: Keith Busch <keith.busch@intel.com> Cc: linux-block@vger.kernel.org Cc: linux-nvme@lists.infradead.org Link: http://lkml.kernel.org/r/20170626102058.10200-3-hch@lst.de Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
This commit is contained in:
		
							parent
							
								
									5f042e7cbd
								
							
						
					
					
						commit
						4b855ad371
					
				
							
								
								
									
										120
									
								
								block/blk-mq.c
									
									
									
									
									
								
							
							
						
						
									
										120
									
								
								block/blk-mq.c
									
									
									
									
									
								
							| @ -37,9 +37,6 @@ | |||||||
| #include "blk-wbt.h" | #include "blk-wbt.h" | ||||||
| #include "blk-mq-sched.h" | #include "blk-mq-sched.h" | ||||||
| 
 | 
 | ||||||
| static DEFINE_MUTEX(all_q_mutex); |  | ||||||
| static LIST_HEAD(all_q_list); |  | ||||||
| 
 |  | ||||||
| static void blk_mq_poll_stats_start(struct request_queue *q); | static void blk_mq_poll_stats_start(struct request_queue *q); | ||||||
| static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb); | static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb); | ||||||
| static void __blk_mq_stop_hw_queues(struct request_queue *q, bool sync); | static void __blk_mq_stop_hw_queues(struct request_queue *q, bool sync); | ||||||
| @ -1975,8 +1972,8 @@ static void blk_mq_init_cpu_queues(struct request_queue *q, | |||||||
| 		INIT_LIST_HEAD(&__ctx->rq_list); | 		INIT_LIST_HEAD(&__ctx->rq_list); | ||||||
| 		__ctx->queue = q; | 		__ctx->queue = q; | ||||||
| 
 | 
 | ||||||
| 		/* If the cpu isn't online, the cpu is mapped to first hctx */ | 		/* If the cpu isn't present, the cpu is mapped to first hctx */ | ||||||
| 		if (!cpu_online(i)) | 		if (!cpu_present(i)) | ||||||
| 			continue; | 			continue; | ||||||
| 
 | 
 | ||||||
| 		hctx = blk_mq_map_queue(q, i); | 		hctx = blk_mq_map_queue(q, i); | ||||||
| @ -2019,8 +2016,7 @@ static void blk_mq_free_map_and_requests(struct blk_mq_tag_set *set, | |||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void blk_mq_map_swqueue(struct request_queue *q, | static void blk_mq_map_swqueue(struct request_queue *q) | ||||||
| 			       const struct cpumask *online_mask) |  | ||||||
| { | { | ||||||
| 	unsigned int i, hctx_idx; | 	unsigned int i, hctx_idx; | ||||||
| 	struct blk_mq_hw_ctx *hctx; | 	struct blk_mq_hw_ctx *hctx; | ||||||
| @ -2038,13 +2034,11 @@ static void blk_mq_map_swqueue(struct request_queue *q, | |||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * Map software to hardware queues | 	 * Map software to hardware queues. | ||||||
|  | 	 * | ||||||
|  | 	 * If the cpu isn't present, the cpu is mapped to first hctx. | ||||||
| 	 */ | 	 */ | ||||||
| 	for_each_possible_cpu(i) { | 	for_each_present_cpu(i) { | ||||||
| 		/* If the cpu isn't online, the cpu is mapped to first hctx */ |  | ||||||
| 		if (!cpumask_test_cpu(i, online_mask)) |  | ||||||
| 			continue; |  | ||||||
| 
 |  | ||||||
| 		hctx_idx = q->mq_map[i]; | 		hctx_idx = q->mq_map[i]; | ||||||
| 		/* unmapped hw queue can be remapped after CPU topo changed */ | 		/* unmapped hw queue can be remapped after CPU topo changed */ | ||||||
| 		if (!set->tags[hctx_idx] && | 		if (!set->tags[hctx_idx] && | ||||||
| @ -2330,16 +2324,8 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, | |||||||
| 		blk_queue_softirq_done(q, set->ops->complete); | 		blk_queue_softirq_done(q, set->ops->complete); | ||||||
| 
 | 
 | ||||||
| 	blk_mq_init_cpu_queues(q, set->nr_hw_queues); | 	blk_mq_init_cpu_queues(q, set->nr_hw_queues); | ||||||
| 
 |  | ||||||
| 	get_online_cpus(); |  | ||||||
| 	mutex_lock(&all_q_mutex); |  | ||||||
| 
 |  | ||||||
| 	list_add_tail(&q->all_q_node, &all_q_list); |  | ||||||
| 	blk_mq_add_queue_tag_set(set, q); | 	blk_mq_add_queue_tag_set(set, q); | ||||||
| 	blk_mq_map_swqueue(q, cpu_online_mask); | 	blk_mq_map_swqueue(q); | ||||||
| 
 |  | ||||||
| 	mutex_unlock(&all_q_mutex); |  | ||||||
| 	put_online_cpus(); |  | ||||||
| 
 | 
 | ||||||
| 	if (!(set->flags & BLK_MQ_F_NO_SCHED)) { | 	if (!(set->flags & BLK_MQ_F_NO_SCHED)) { | ||||||
| 		int ret; | 		int ret; | ||||||
| @ -2365,18 +2351,12 @@ void blk_mq_free_queue(struct request_queue *q) | |||||||
| { | { | ||||||
| 	struct blk_mq_tag_set	*set = q->tag_set; | 	struct blk_mq_tag_set	*set = q->tag_set; | ||||||
| 
 | 
 | ||||||
| 	mutex_lock(&all_q_mutex); |  | ||||||
| 	list_del_init(&q->all_q_node); |  | ||||||
| 	mutex_unlock(&all_q_mutex); |  | ||||||
| 
 |  | ||||||
| 	blk_mq_del_queue_tag_set(q); | 	blk_mq_del_queue_tag_set(q); | ||||||
| 
 |  | ||||||
| 	blk_mq_exit_hw_queues(q, set, set->nr_hw_queues); | 	blk_mq_exit_hw_queues(q, set, set->nr_hw_queues); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /* Basically redo blk_mq_init_queue with queue frozen */ | /* Basically redo blk_mq_init_queue with queue frozen */ | ||||||
| static void blk_mq_queue_reinit(struct request_queue *q, | static void blk_mq_queue_reinit(struct request_queue *q) | ||||||
| 				const struct cpumask *online_mask) |  | ||||||
| { | { | ||||||
| 	WARN_ON_ONCE(!atomic_read(&q->mq_freeze_depth)); | 	WARN_ON_ONCE(!atomic_read(&q->mq_freeze_depth)); | ||||||
| 
 | 
 | ||||||
| @ -2389,76 +2369,12 @@ static void blk_mq_queue_reinit(struct request_queue *q, | |||||||
| 	 * involves free and re-allocate memory, worthy doing?) | 	 * involves free and re-allocate memory, worthy doing?) | ||||||
| 	 */ | 	 */ | ||||||
| 
 | 
 | ||||||
| 	blk_mq_map_swqueue(q, online_mask); | 	blk_mq_map_swqueue(q); | ||||||
| 
 | 
 | ||||||
| 	blk_mq_sysfs_register(q); | 	blk_mq_sysfs_register(q); | ||||||
| 	blk_mq_debugfs_register_hctxs(q); | 	blk_mq_debugfs_register_hctxs(q); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*
 |  | ||||||
|  * New online cpumask which is going to be set in this hotplug event. |  | ||||||
|  * Declare this cpumasks as global as cpu-hotplug operation is invoked |  | ||||||
|  * one-by-one and dynamically allocating this could result in a failure. |  | ||||||
|  */ |  | ||||||
| static struct cpumask cpuhp_online_new; |  | ||||||
| 
 |  | ||||||
| static void blk_mq_queue_reinit_work(void) |  | ||||||
| { |  | ||||||
| 	struct request_queue *q; |  | ||||||
| 
 |  | ||||||
| 	mutex_lock(&all_q_mutex); |  | ||||||
| 	/*
 |  | ||||||
| 	 * We need to freeze and reinit all existing queues.  Freezing |  | ||||||
| 	 * involves synchronous wait for an RCU grace period and doing it |  | ||||||
| 	 * one by one may take a long time.  Start freezing all queues in |  | ||||||
| 	 * one swoop and then wait for the completions so that freezing can |  | ||||||
| 	 * take place in parallel. |  | ||||||
| 	 */ |  | ||||||
| 	list_for_each_entry(q, &all_q_list, all_q_node) |  | ||||||
| 		blk_freeze_queue_start(q); |  | ||||||
| 	list_for_each_entry(q, &all_q_list, all_q_node) |  | ||||||
| 		blk_mq_freeze_queue_wait(q); |  | ||||||
| 
 |  | ||||||
| 	list_for_each_entry(q, &all_q_list, all_q_node) |  | ||||||
| 		blk_mq_queue_reinit(q, &cpuhp_online_new); |  | ||||||
| 
 |  | ||||||
| 	list_for_each_entry(q, &all_q_list, all_q_node) |  | ||||||
| 		blk_mq_unfreeze_queue(q); |  | ||||||
| 
 |  | ||||||
| 	mutex_unlock(&all_q_mutex); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static int blk_mq_queue_reinit_dead(unsigned int cpu) |  | ||||||
| { |  | ||||||
| 	cpumask_copy(&cpuhp_online_new, cpu_online_mask); |  | ||||||
| 	blk_mq_queue_reinit_work(); |  | ||||||
| 	return 0; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /*
 |  | ||||||
|  * Before hotadded cpu starts handling requests, new mappings must be |  | ||||||
|  * established.  Otherwise, these requests in hw queue might never be |  | ||||||
|  * dispatched. |  | ||||||
|  * |  | ||||||
|  * For example, there is a single hw queue (hctx) and two CPU queues (ctx0 |  | ||||||
|  * for CPU0, and ctx1 for CPU1). |  | ||||||
|  * |  | ||||||
|  * Now CPU1 is just onlined and a request is inserted into ctx1->rq_list |  | ||||||
|  * and set bit0 in pending bitmap as ctx1->index_hw is still zero. |  | ||||||
|  * |  | ||||||
|  * And then while running hw queue, blk_mq_flush_busy_ctxs() finds bit0 is set |  | ||||||
|  * in pending bitmap and tries to retrieve requests in hctx->ctxs[0]->rq_list. |  | ||||||
|  * But htx->ctxs[0] is a pointer to ctx0, so the request in ctx1->rq_list is |  | ||||||
|  * ignored. |  | ||||||
|  */ |  | ||||||
| static int blk_mq_queue_reinit_prepare(unsigned int cpu) |  | ||||||
| { |  | ||||||
| 	cpumask_copy(&cpuhp_online_new, cpu_online_mask); |  | ||||||
| 	cpumask_set_cpu(cpu, &cpuhp_online_new); |  | ||||||
| 	blk_mq_queue_reinit_work(); |  | ||||||
| 	return 0; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set) | static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set) | ||||||
| { | { | ||||||
| 	int i; | 	int i; | ||||||
| @ -2669,7 +2585,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, | |||||||
| 	blk_mq_update_queue_map(set); | 	blk_mq_update_queue_map(set); | ||||||
| 	list_for_each_entry(q, &set->tag_list, tag_set_list) { | 	list_for_each_entry(q, &set->tag_list, tag_set_list) { | ||||||
| 		blk_mq_realloc_hw_ctxs(set, q); | 		blk_mq_realloc_hw_ctxs(set, q); | ||||||
| 		blk_mq_queue_reinit(q, cpu_online_mask); | 		blk_mq_queue_reinit(q); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	list_for_each_entry(q, &set->tag_list, tag_set_list) | 	list_for_each_entry(q, &set->tag_list, tag_set_list) | ||||||
| @ -2885,24 +2801,10 @@ bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie) | |||||||
| } | } | ||||||
| EXPORT_SYMBOL_GPL(blk_mq_poll); | EXPORT_SYMBOL_GPL(blk_mq_poll); | ||||||
| 
 | 
 | ||||||
| void blk_mq_disable_hotplug(void) |  | ||||||
| { |  | ||||||
| 	mutex_lock(&all_q_mutex); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void blk_mq_enable_hotplug(void) |  | ||||||
| { |  | ||||||
| 	mutex_unlock(&all_q_mutex); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static int __init blk_mq_init(void) | static int __init blk_mq_init(void) | ||||||
| { | { | ||||||
| 	cpuhp_setup_state_multi(CPUHP_BLK_MQ_DEAD, "block/mq:dead", NULL, | 	cpuhp_setup_state_multi(CPUHP_BLK_MQ_DEAD, "block/mq:dead", NULL, | ||||||
| 				blk_mq_hctx_notify_dead); | 				blk_mq_hctx_notify_dead); | ||||||
| 
 |  | ||||||
| 	cpuhp_setup_state_nocalls(CPUHP_BLK_MQ_PREPARE, "block/mq:prepare", |  | ||||||
| 				  blk_mq_queue_reinit_prepare, |  | ||||||
| 				  blk_mq_queue_reinit_dead); |  | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
| subsys_initcall(blk_mq_init); | subsys_initcall(blk_mq_init); | ||||||
|  | |||||||
| @ -56,11 +56,6 @@ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, | |||||||
| 				bool at_head); | 				bool at_head); | ||||||
| void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, | void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, | ||||||
| 				struct list_head *list); | 				struct list_head *list); | ||||||
| /*
 |  | ||||||
|  * CPU hotplug helpers |  | ||||||
|  */ |  | ||||||
| void blk_mq_enable_hotplug(void); |  | ||||||
| void blk_mq_disable_hotplug(void); |  | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * CPU -> queue mappings |  * CPU -> queue mappings | ||||||
|  | |||||||
| @ -58,7 +58,6 @@ enum cpuhp_state { | |||||||
| 	CPUHP_XEN_EVTCHN_PREPARE, | 	CPUHP_XEN_EVTCHN_PREPARE, | ||||||
| 	CPUHP_ARM_SHMOBILE_SCU_PREPARE, | 	CPUHP_ARM_SHMOBILE_SCU_PREPARE, | ||||||
| 	CPUHP_SH_SH3X_PREPARE, | 	CPUHP_SH_SH3X_PREPARE, | ||||||
| 	CPUHP_BLK_MQ_PREPARE, |  | ||||||
| 	CPUHP_NET_FLOW_PREPARE, | 	CPUHP_NET_FLOW_PREPARE, | ||||||
| 	CPUHP_TOPOLOGY_PREPARE, | 	CPUHP_TOPOLOGY_PREPARE, | ||||||
| 	CPUHP_NET_IUCV_PREPARE, | 	CPUHP_NET_IUCV_PREPARE, | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user