block: remove legacy IO schedulers
Retain the deadline documentation, as that carries over to mq-deadline as well. Tested-by: Ming Lei <ming.lei@redhat.com> Reviewed-by: Omar Sandoval <osandov@fb.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
		
							parent
							
								
									404b8f5a03
								
							
						
					
					
						commit
						f382fb0bce
					
				| @ -1,291 +0,0 @@ | |||||||
| CFQ (Complete Fairness Queueing) |  | ||||||
| =============================== |  | ||||||
| 
 |  | ||||||
| The main aim of CFQ scheduler is to provide a fair allocation of the disk |  | ||||||
| I/O bandwidth for all the processes which requests an I/O operation. |  | ||||||
| 
 |  | ||||||
| CFQ maintains the per process queue for the processes which request I/O |  | ||||||
| operation(synchronous requests). In case of asynchronous requests, all the |  | ||||||
| requests from all the processes are batched together according to their |  | ||||||
| process's I/O priority. |  | ||||||
| 
 |  | ||||||
| CFQ ioscheduler tunables |  | ||||||
| ======================== |  | ||||||
| 
 |  | ||||||
| slice_idle |  | ||||||
| ---------- |  | ||||||
| This specifies how long CFQ should idle for next request on certain cfq queues |  | ||||||
| (for sequential workloads) and service trees (for random workloads) before |  | ||||||
| queue is expired and CFQ selects next queue to dispatch from. |  | ||||||
| 
 |  | ||||||
| By default slice_idle is a non-zero value. That means by default we idle on |  | ||||||
| queues/service trees. This can be very helpful on highly seeky media like |  | ||||||
| single spindle SATA/SAS disks where we can cut down on overall number of |  | ||||||
| seeks and see improved throughput. |  | ||||||
| 
 |  | ||||||
| Setting slice_idle to 0 will remove all the idling on queues/service tree |  | ||||||
| level and one should see an overall improved throughput on faster storage |  | ||||||
| devices like multiple SATA/SAS disks in hardware RAID configuration. The down |  | ||||||
| side is that isolation provided from WRITES also goes down and notion of |  | ||||||
| IO priority becomes weaker. |  | ||||||
| 
 |  | ||||||
| So depending on storage and workload, it might be useful to set slice_idle=0. |  | ||||||
| In general I think for SATA/SAS disks and software RAID of SATA/SAS disks |  | ||||||
| keeping slice_idle enabled should be useful. For any configurations where |  | ||||||
| there are multiple spindles behind single LUN (Host based hardware RAID |  | ||||||
| controller or for storage arrays), setting slice_idle=0 might end up in better |  | ||||||
| throughput and acceptable latencies. |  | ||||||
| 
 |  | ||||||
| back_seek_max |  | ||||||
| ------------- |  | ||||||
| This specifies, given in Kbytes, the maximum "distance" for backward seeking. |  | ||||||
| The distance is the amount of space from the current head location to the |  | ||||||
| sectors that are backward in terms of distance. |  | ||||||
| 
 |  | ||||||
| This parameter allows the scheduler to anticipate requests in the "backward" |  | ||||||
| direction and consider them as being the "next" if they are within this |  | ||||||
| distance from the current head location. |  | ||||||
| 
 |  | ||||||
| back_seek_penalty |  | ||||||
| ----------------- |  | ||||||
| This parameter is used to compute the cost of backward seeking. If the |  | ||||||
| backward distance of request is just 1/back_seek_penalty from a "front" |  | ||||||
| request, then the seeking cost of two requests is considered equivalent. |  | ||||||
| 
 |  | ||||||
| So scheduler will not bias toward one or the other request (otherwise scheduler |  | ||||||
| will bias toward front request). Default value of back_seek_penalty is 2. |  | ||||||
| 
 |  | ||||||
| fifo_expire_async |  | ||||||
| ----------------- |  | ||||||
| This parameter is used to set the timeout of asynchronous requests. Default |  | ||||||
| value of this is 248ms. |  | ||||||
| 
 |  | ||||||
| fifo_expire_sync |  | ||||||
| ---------------- |  | ||||||
| This parameter is used to set the timeout of synchronous requests. Default |  | ||||||
| value of this is 124ms. In case to favor synchronous requests over asynchronous |  | ||||||
| one, this value should be decreased relative to fifo_expire_async. |  | ||||||
| 
 |  | ||||||
| group_idle |  | ||||||
| ----------- |  | ||||||
| This parameter forces idling at the CFQ group level instead of CFQ |  | ||||||
| queue level. This was introduced after a bottleneck was observed |  | ||||||
| in higher end storage due to idle on sequential queue and allow dispatch |  | ||||||
| from a single queue. The idea with this parameter is that it can be run with |  | ||||||
| slice_idle=0 and group_idle=8, so that idling does not happen on individual |  | ||||||
| queues in the group but happens overall on the group and thus still keeps the |  | ||||||
| IO controller working. |  | ||||||
| Not idling on individual queues in the group will dispatch requests from |  | ||||||
| multiple queues in the group at the same time and achieve higher throughput |  | ||||||
| on higher end storage. |  | ||||||
| 
 |  | ||||||
| Default value for this parameter is 8ms. |  | ||||||
| 
 |  | ||||||
| low_latency |  | ||||||
| ----------- |  | ||||||
| This parameter is used to enable/disable the low latency mode of the CFQ |  | ||||||
| scheduler. If enabled, CFQ tries to recompute the slice time for each process |  | ||||||
| based on the target_latency set for the system. This favors fairness over |  | ||||||
| throughput. Disabling low latency (setting it to 0) ignores target latency, |  | ||||||
| allowing each process in the system to get a full time slice. |  | ||||||
| 
 |  | ||||||
| By default low latency mode is enabled. |  | ||||||
| 
 |  | ||||||
| target_latency |  | ||||||
| -------------- |  | ||||||
| This parameter is used to calculate the time slice for a process if cfq's |  | ||||||
| latency mode is enabled. It will ensure that sync requests have an estimated |  | ||||||
| latency. But if sequential workload is higher(e.g. sequential read), |  | ||||||
| then to meet the latency constraints, throughput may decrease because of less |  | ||||||
| time for each process to issue I/O request before the cfq queue is switched. |  | ||||||
| 
 |  | ||||||
| Though this can be overcome by disabling the latency_mode, it may increase |  | ||||||
| the read latency for some applications. This parameter allows for changing |  | ||||||
| target_latency through the sysfs interface which can provide the balanced |  | ||||||
| throughput and read latency. |  | ||||||
| 
 |  | ||||||
| Default value for target_latency is 300ms. |  | ||||||
| 
 |  | ||||||
| slice_async |  | ||||||
| ----------- |  | ||||||
| This parameter is same as of slice_sync but for asynchronous queue. The |  | ||||||
| default value is 40ms. |  | ||||||
| 
 |  | ||||||
| slice_async_rq |  | ||||||
| -------------- |  | ||||||
| This parameter is used to limit the dispatching of asynchronous request to |  | ||||||
| device request queue in queue's slice time. The maximum number of request that |  | ||||||
| are allowed to be dispatched also depends upon the io priority. Default value |  | ||||||
| for this is 2. |  | ||||||
| 
 |  | ||||||
| slice_sync |  | ||||||
| ---------- |  | ||||||
| When a queue is selected for execution, the queues IO requests are only |  | ||||||
| executed for a certain amount of time(time_slice) before switching to another |  | ||||||
| queue. This parameter is used to calculate the time slice of synchronous |  | ||||||
| queue. |  | ||||||
| 
 |  | ||||||
| time_slice is computed using the below equation:- |  | ||||||
| time_slice = slice_sync + (slice_sync/5 * (4 - prio)). To increase the |  | ||||||
| time_slice of synchronous queue, increase the value of slice_sync. Default |  | ||||||
| value is 100ms. |  | ||||||
| 
 |  | ||||||
| quantum |  | ||||||
| ------- |  | ||||||
| This specifies the number of request dispatched to the device queue. In a |  | ||||||
| queue's time slice, a request will not be dispatched if the number of request |  | ||||||
| in the device exceeds this parameter. This parameter is used for synchronous |  | ||||||
| request. |  | ||||||
| 
 |  | ||||||
| In case of storage with several disk, this setting can limit the parallel |  | ||||||
| processing of request. Therefore, increasing the value can improve the |  | ||||||
| performance although this can cause the latency of some I/O to increase due |  | ||||||
| to more number of requests. |  | ||||||
| 
 |  | ||||||
| CFQ Group scheduling |  | ||||||
| ==================== |  | ||||||
| 
 |  | ||||||
| CFQ supports blkio cgroup and has "blkio." prefixed files in each |  | ||||||
| blkio cgroup directory. It is weight-based and there are four knobs |  | ||||||
| for configuration - weight[_device] and leaf_weight[_device]. |  | ||||||
| Internal cgroup nodes (the ones with children) can also have tasks in |  | ||||||
| them, so the former two configure how much proportion the cgroup as a |  | ||||||
| whole is entitled to at its parent's level while the latter two |  | ||||||
| configure how much proportion the tasks in the cgroup have compared to |  | ||||||
| its direct children. |  | ||||||
| 
 |  | ||||||
| Another way to think about it is assuming that each internal node has |  | ||||||
| an implicit leaf child node which hosts all the tasks whose weight is |  | ||||||
| configured by leaf_weight[_device]. Let's assume a blkio hierarchy |  | ||||||
| composed of five cgroups - root, A, B, AA and AB - with the following |  | ||||||
| weights where the names represent the hierarchy. |  | ||||||
| 
 |  | ||||||
|         weight leaf_weight |  | ||||||
|  root :  125    125 |  | ||||||
|  A    :  500    750 |  | ||||||
|  B    :  250    500 |  | ||||||
|  AA   :  500    500 |  | ||||||
|  AB   : 1000    500 |  | ||||||
| 
 |  | ||||||
| root never has a parent making its weight is meaningless. For backward |  | ||||||
| compatibility, weight is always kept in sync with leaf_weight. B, AA |  | ||||||
| and AB have no child and thus its tasks have no children cgroup to |  | ||||||
| compete with. They always get 100% of what the cgroup won at the |  | ||||||
| parent level. Considering only the weights which matter, the hierarchy |  | ||||||
| looks like the following. |  | ||||||
| 
 |  | ||||||
|           root |  | ||||||
|        /    |   \ |  | ||||||
|       A     B    leaf |  | ||||||
|      500   250   125 |  | ||||||
|    /  |  \ |  | ||||||
|   AA  AB  leaf |  | ||||||
|  500 1000 750 |  | ||||||
| 
 |  | ||||||
| If all cgroups have active IOs and competing with each other, disk |  | ||||||
| time will be distributed like the following. |  | ||||||
| 
 |  | ||||||
| Distribution below root. The total active weight at this level is |  | ||||||
| A:500 + B:250 + C:125 = 875. |  | ||||||
| 
 |  | ||||||
|  root-leaf :   125 /  875      =~ 14% |  | ||||||
|  A         :   500 /  875      =~ 57% |  | ||||||
|  B(-leaf)  :   250 /  875      =~ 28% |  | ||||||
| 
 |  | ||||||
| A has children and further distributes its 57% among the children and |  | ||||||
| the implicit leaf node. The total active weight at this level is |  | ||||||
| AA:500 + AB:1000 + A-leaf:750 = 2250. |  | ||||||
| 
 |  | ||||||
|  A-leaf    : ( 750 / 2250) * A =~ 19% |  | ||||||
|  AA(-leaf) : ( 500 / 2250) * A =~ 12% |  | ||||||
|  AB(-leaf) : (1000 / 2250) * A =~ 25% |  | ||||||
| 
 |  | ||||||
| CFQ IOPS Mode for group scheduling |  | ||||||
| =================================== |  | ||||||
| Basic CFQ design is to provide priority based time slices. Higher priority |  | ||||||
| process gets bigger time slice and lower priority process gets smaller time |  | ||||||
| slice. Measuring time becomes harder if storage is fast and supports NCQ and |  | ||||||
| it would be better to dispatch multiple requests from multiple cfq queues in |  | ||||||
| request queue at a time. In such scenario, it is not possible to measure time |  | ||||||
| consumed by single queue accurately. |  | ||||||
| 
 |  | ||||||
| What is possible though is to measure number of requests dispatched from a |  | ||||||
| single queue and also allow dispatch from multiple cfq queue at the same time. |  | ||||||
| This effectively becomes the fairness in terms of IOPS (IO operations per |  | ||||||
| second). |  | ||||||
| 
 |  | ||||||
| If one sets slice_idle=0 and if storage supports NCQ, CFQ internally switches |  | ||||||
| to IOPS mode and starts providing fairness in terms of number of requests |  | ||||||
| dispatched. Note that this mode switching takes effect only for group |  | ||||||
| scheduling. For non-cgroup users nothing should change. |  | ||||||
| 
 |  | ||||||
| CFQ IO scheduler Idling Theory |  | ||||||
| =============================== |  | ||||||
| Idling on a queue is primarily about waiting for the next request to come |  | ||||||
| on same queue after completion of a request. In this process CFQ will not |  | ||||||
| dispatch requests from other cfq queues even if requests are pending there. |  | ||||||
| 
 |  | ||||||
| The rationale behind idling is that it can cut down on number of seeks |  | ||||||
| on rotational media. For example, if a process is doing dependent |  | ||||||
| sequential reads (next read will come on only after completion of previous |  | ||||||
| one), then not dispatching request from other queue should help as we |  | ||||||
| did not move the disk head and kept on dispatching sequential IO from |  | ||||||
| one queue. |  | ||||||
| 
 |  | ||||||
| CFQ has following service trees and various queues are put on these trees. |  | ||||||
| 
 |  | ||||||
| 	sync-idle	sync-noidle	async |  | ||||||
| 
 |  | ||||||
| All cfq queues doing synchronous sequential IO go on to sync-idle tree. |  | ||||||
| On this tree we idle on each queue individually. |  | ||||||
| 
 |  | ||||||
| All synchronous non-sequential queues go on sync-noidle tree. Also any |  | ||||||
| synchronous write request which is not marked with REQ_IDLE goes on this |  | ||||||
| service tree. On this tree we do not idle on individual queues instead idle |  | ||||||
| on the whole group of queues or the tree. So if there are 4 queues waiting |  | ||||||
| for IO to dispatch we will idle only once last queue has dispatched the IO |  | ||||||
| and there is no more IO on this service tree. |  | ||||||
| 
 |  | ||||||
| All async writes go on async service tree. There is no idling on async |  | ||||||
| queues. |  | ||||||
| 
 |  | ||||||
| CFQ has some optimizations for SSDs and if it detects a non-rotational |  | ||||||
| media which can support higher queue depth (multiple requests at in |  | ||||||
| flight at a time), then it cuts down on idling of individual queues and |  | ||||||
| all the queues move to sync-noidle tree and only tree idle remains. This |  | ||||||
| tree idling provides isolation with buffered write queues on async tree. |  | ||||||
| 
 |  | ||||||
| FAQ |  | ||||||
| === |  | ||||||
| Q1. Why to idle at all on queues not marked with REQ_IDLE. |  | ||||||
| 
 |  | ||||||
| A1. We only do tree idle (all queues on sync-noidle tree) on queues not marked |  | ||||||
|     with REQ_IDLE. This helps in providing isolation with all the sync-idle |  | ||||||
|     queues. Otherwise in presence of many sequential readers, other |  | ||||||
|     synchronous IO might not get fair share of disk. |  | ||||||
| 
 |  | ||||||
|     For example, if there are 10 sequential readers doing IO and they get |  | ||||||
|     100ms each. If a !REQ_IDLE request comes in, it will be scheduled |  | ||||||
|     roughly after 1 second. If after completion of !REQ_IDLE request we |  | ||||||
|     do not idle, and after a couple of milli seconds a another !REQ_IDLE |  | ||||||
|     request comes in, again it will be scheduled after 1second. Repeat it |  | ||||||
|     and notice how a workload can lose its disk share and suffer due to |  | ||||||
|     multiple sequential readers. |  | ||||||
| 
 |  | ||||||
|     fsync can generate dependent IO where bunch of data is written in the |  | ||||||
|     context of fsync, and later some journaling data is written. Journaling |  | ||||||
|     data comes in only after fsync has finished its IO (atleast for ext4 |  | ||||||
|     that seemed to be the case). Now if one decides not to idle on fsync |  | ||||||
|     thread due to !REQ_IDLE, then next journaling write will not get |  | ||||||
|     scheduled for another second. A process doing small fsync, will suffer |  | ||||||
|     badly in presence of multiple sequential readers. |  | ||||||
| 
 |  | ||||||
|     Hence doing tree idling on threads using !REQ_IDLE flag on requests |  | ||||||
|     provides isolation from multiple sequential readers and at the same |  | ||||||
|     time we do not idle on individual threads. |  | ||||||
| 
 |  | ||||||
| Q2. When to specify REQ_IDLE |  | ||||||
| A2. I would think whenever one is doing synchronous write and expecting |  | ||||||
|     more writes to be dispatched from same context soon, should be able |  | ||||||
|     to specify REQ_IDLE on writes and that probably should work well for |  | ||||||
|     most of the cases. |  | ||||||
| @ -3,67 +3,6 @@ if BLOCK | |||||||
| 
 | 
 | ||||||
| menu "IO Schedulers" | menu "IO Schedulers" | ||||||
| 
 | 
 | ||||||
| config IOSCHED_NOOP |  | ||||||
| 	bool |  | ||||||
| 	default y |  | ||||||
| 	---help--- |  | ||||||
| 	  The no-op I/O scheduler is a minimal scheduler that does basic merging |  | ||||||
| 	  and sorting. Its main uses include non-disk based block devices like |  | ||||||
| 	  memory devices, and specialised software or hardware environments |  | ||||||
| 	  that do their own scheduling and require only minimal assistance from |  | ||||||
| 	  the kernel. |  | ||||||
| 
 |  | ||||||
| config IOSCHED_DEADLINE |  | ||||||
| 	tristate "Deadline I/O scheduler" |  | ||||||
| 	default y |  | ||||||
| 	---help--- |  | ||||||
| 	  The deadline I/O scheduler is simple and compact. It will provide |  | ||||||
| 	  CSCAN service with FIFO expiration of requests, switching to |  | ||||||
| 	  a new point in the service tree and doing a batch of IO from there |  | ||||||
| 	  in case of expiry. |  | ||||||
| 
 |  | ||||||
| config IOSCHED_CFQ |  | ||||||
| 	tristate "CFQ I/O scheduler" |  | ||||||
| 	default y |  | ||||||
| 	---help--- |  | ||||||
| 	  The CFQ I/O scheduler tries to distribute bandwidth equally |  | ||||||
| 	  among all processes in the system. It should provide a fair |  | ||||||
| 	  and low latency working environment, suitable for both desktop |  | ||||||
| 	  and server systems. |  | ||||||
| 
 |  | ||||||
| 	  This is the default I/O scheduler. |  | ||||||
| 
 |  | ||||||
| config CFQ_GROUP_IOSCHED |  | ||||||
| 	bool "CFQ Group Scheduling support" |  | ||||||
| 	depends on IOSCHED_CFQ && BLK_CGROUP |  | ||||||
| 	---help--- |  | ||||||
| 	  Enable group IO scheduling in CFQ. |  | ||||||
| 
 |  | ||||||
| choice |  | ||||||
| 
 |  | ||||||
| 	prompt "Default I/O scheduler" |  | ||||||
| 	default DEFAULT_CFQ |  | ||||||
| 	help |  | ||||||
| 	  Select the I/O scheduler which will be used by default for all |  | ||||||
| 	  block devices. |  | ||||||
| 
 |  | ||||||
| 	config DEFAULT_DEADLINE |  | ||||||
| 		bool "Deadline" if IOSCHED_DEADLINE=y |  | ||||||
| 
 |  | ||||||
| 	config DEFAULT_CFQ |  | ||||||
| 		bool "CFQ" if IOSCHED_CFQ=y |  | ||||||
| 
 |  | ||||||
| 	config DEFAULT_NOOP |  | ||||||
| 		bool "No-op" |  | ||||||
| 
 |  | ||||||
| endchoice |  | ||||||
| 
 |  | ||||||
| config DEFAULT_IOSCHED |  | ||||||
| 	string |  | ||||||
| 	default "deadline" if DEFAULT_DEADLINE |  | ||||||
| 	default "cfq" if DEFAULT_CFQ |  | ||||||
| 	default "noop" if DEFAULT_NOOP |  | ||||||
| 
 |  | ||||||
| config MQ_IOSCHED_DEADLINE | config MQ_IOSCHED_DEADLINE | ||||||
| 	tristate "MQ deadline I/O scheduler" | 	tristate "MQ deadline I/O scheduler" | ||||||
| 	default y | 	default y | ||||||
|  | |||||||
| @ -18,9 +18,6 @@ obj-$(CONFIG_BLK_DEV_BSGLIB)	+= bsg-lib.o | |||||||
| obj-$(CONFIG_BLK_CGROUP)	+= blk-cgroup.o | obj-$(CONFIG_BLK_CGROUP)	+= blk-cgroup.o | ||||||
| obj-$(CONFIG_BLK_DEV_THROTTLING)	+= blk-throttle.o | obj-$(CONFIG_BLK_DEV_THROTTLING)	+= blk-throttle.o | ||||||
| obj-$(CONFIG_BLK_CGROUP_IOLATENCY)	+= blk-iolatency.o | obj-$(CONFIG_BLK_CGROUP_IOLATENCY)	+= blk-iolatency.o | ||||||
| obj-$(CONFIG_IOSCHED_NOOP)	+= noop-iosched.o |  | ||||||
| obj-$(CONFIG_IOSCHED_DEADLINE)	+= deadline-iosched.o |  | ||||||
| obj-$(CONFIG_IOSCHED_CFQ)	+= cfq-iosched.o |  | ||||||
| obj-$(CONFIG_MQ_IOSCHED_DEADLINE)	+= mq-deadline.o | obj-$(CONFIG_MQ_IOSCHED_DEADLINE)	+= mq-deadline.o | ||||||
| obj-$(CONFIG_MQ_IOSCHED_KYBER)	+= kyber-iosched.o | obj-$(CONFIG_MQ_IOSCHED_KYBER)	+= kyber-iosched.o | ||||||
| bfq-y				:= bfq-iosched.o bfq-wf2q.o bfq-cgroup.o | bfq-y				:= bfq-iosched.o bfq-wf2q.o bfq-cgroup.o | ||||||
|  | |||||||
							
								
								
									
										4916
									
								
								block/cfq-iosched.c
									
									
									
									
									
								
							
							
						
						
									
										4916
									
								
								block/cfq-iosched.c
									
									
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @ -1,560 +0,0 @@ | |||||||
| /*
 |  | ||||||
|  *  Deadline i/o scheduler. |  | ||||||
|  * |  | ||||||
|  *  Copyright (C) 2002 Jens Axboe <axboe@kernel.dk> |  | ||||||
|  */ |  | ||||||
| #include <linux/kernel.h> |  | ||||||
| #include <linux/fs.h> |  | ||||||
| #include <linux/blkdev.h> |  | ||||||
| #include <linux/elevator.h> |  | ||||||
| #include <linux/bio.h> |  | ||||||
| #include <linux/module.h> |  | ||||||
| #include <linux/slab.h> |  | ||||||
| #include <linux/init.h> |  | ||||||
| #include <linux/compiler.h> |  | ||||||
| #include <linux/rbtree.h> |  | ||||||
| 
 |  | ||||||
| /*
 |  | ||||||
|  * See Documentation/block/deadline-iosched.txt |  | ||||||
|  */ |  | ||||||
| static const int read_expire = HZ / 2;  /* max time before a read is submitted. */ |  | ||||||
| static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */ |  | ||||||
| static const int writes_starved = 2;    /* max times reads can starve a write */ |  | ||||||
| static const int fifo_batch = 16;       /* # of sequential requests treated as one
 |  | ||||||
| 				     by the above parameters. For throughput. */ |  | ||||||
| 
 |  | ||||||
| struct deadline_data { |  | ||||||
| 	/*
 |  | ||||||
| 	 * run time data |  | ||||||
| 	 */ |  | ||||||
| 
 |  | ||||||
| 	/*
 |  | ||||||
| 	 * requests (deadline_rq s) are present on both sort_list and fifo_list |  | ||||||
| 	 */ |  | ||||||
| 	struct rb_root sort_list[2];	 |  | ||||||
| 	struct list_head fifo_list[2]; |  | ||||||
| 
 |  | ||||||
| 	/*
 |  | ||||||
| 	 * next in sort order. read, write or both are NULL |  | ||||||
| 	 */ |  | ||||||
| 	struct request *next_rq[2]; |  | ||||||
| 	unsigned int batching;		/* number of sequential requests made */ |  | ||||||
| 	unsigned int starved;		/* times reads have starved writes */ |  | ||||||
| 
 |  | ||||||
| 	/*
 |  | ||||||
| 	 * settings that change how the i/o scheduler behaves |  | ||||||
| 	 */ |  | ||||||
| 	int fifo_expire[2]; |  | ||||||
| 	int fifo_batch; |  | ||||||
| 	int writes_starved; |  | ||||||
| 	int front_merges; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| static inline struct rb_root * |  | ||||||
| deadline_rb_root(struct deadline_data *dd, struct request *rq) |  | ||||||
| { |  | ||||||
| 	return &dd->sort_list[rq_data_dir(rq)]; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /*
 |  | ||||||
|  * get the request after `rq' in sector-sorted order |  | ||||||
|  */ |  | ||||||
| static inline struct request * |  | ||||||
| deadline_latter_request(struct request *rq) |  | ||||||
| { |  | ||||||
| 	struct rb_node *node = rb_next(&rq->rb_node); |  | ||||||
| 
 |  | ||||||
| 	if (node) |  | ||||||
| 		return rb_entry_rq(node); |  | ||||||
| 
 |  | ||||||
| 	return NULL; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static void |  | ||||||
| deadline_add_rq_rb(struct deadline_data *dd, struct request *rq) |  | ||||||
| { |  | ||||||
| 	struct rb_root *root = deadline_rb_root(dd, rq); |  | ||||||
| 
 |  | ||||||
| 	elv_rb_add(root, rq); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static inline void |  | ||||||
| deadline_del_rq_rb(struct deadline_data *dd, struct request *rq) |  | ||||||
| { |  | ||||||
| 	const int data_dir = rq_data_dir(rq); |  | ||||||
| 
 |  | ||||||
| 	if (dd->next_rq[data_dir] == rq) |  | ||||||
| 		dd->next_rq[data_dir] = deadline_latter_request(rq); |  | ||||||
| 
 |  | ||||||
| 	elv_rb_del(deadline_rb_root(dd, rq), rq); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /*
 |  | ||||||
|  * add rq to rbtree and fifo |  | ||||||
|  */ |  | ||||||
| static void |  | ||||||
| deadline_add_request(struct request_queue *q, struct request *rq) |  | ||||||
| { |  | ||||||
| 	struct deadline_data *dd = q->elevator->elevator_data; |  | ||||||
| 	const int data_dir = rq_data_dir(rq); |  | ||||||
| 
 |  | ||||||
| 	/*
 |  | ||||||
| 	 * This may be a requeue of a write request that has locked its |  | ||||||
| 	 * target zone. If it is the case, this releases the zone lock. |  | ||||||
| 	 */ |  | ||||||
| 	blk_req_zone_write_unlock(rq); |  | ||||||
| 
 |  | ||||||
| 	deadline_add_rq_rb(dd, rq); |  | ||||||
| 
 |  | ||||||
| 	/*
 |  | ||||||
| 	 * set expire time and add to fifo list |  | ||||||
| 	 */ |  | ||||||
| 	rq->fifo_time = jiffies + dd->fifo_expire[data_dir]; |  | ||||||
| 	list_add_tail(&rq->queuelist, &dd->fifo_list[data_dir]); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /*
 |  | ||||||
|  * remove rq from rbtree and fifo. |  | ||||||
|  */ |  | ||||||
| static void deadline_remove_request(struct request_queue *q, struct request *rq) |  | ||||||
| { |  | ||||||
| 	struct deadline_data *dd = q->elevator->elevator_data; |  | ||||||
| 
 |  | ||||||
| 	rq_fifo_clear(rq); |  | ||||||
| 	deadline_del_rq_rb(dd, rq); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static enum elv_merge |  | ||||||
| deadline_merge(struct request_queue *q, struct request **req, struct bio *bio) |  | ||||||
| { |  | ||||||
| 	struct deadline_data *dd = q->elevator->elevator_data; |  | ||||||
| 	struct request *__rq; |  | ||||||
| 
 |  | ||||||
| 	/*
 |  | ||||||
| 	 * check for front merge |  | ||||||
| 	 */ |  | ||||||
| 	if (dd->front_merges) { |  | ||||||
| 		sector_t sector = bio_end_sector(bio); |  | ||||||
| 
 |  | ||||||
| 		__rq = elv_rb_find(&dd->sort_list[bio_data_dir(bio)], sector); |  | ||||||
| 		if (__rq) { |  | ||||||
| 			BUG_ON(sector != blk_rq_pos(__rq)); |  | ||||||
| 
 |  | ||||||
| 			if (elv_bio_merge_ok(__rq, bio)) { |  | ||||||
| 				*req = __rq; |  | ||||||
| 				return ELEVATOR_FRONT_MERGE; |  | ||||||
| 			} |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	return ELEVATOR_NO_MERGE; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static void deadline_merged_request(struct request_queue *q, |  | ||||||
| 				    struct request *req, enum elv_merge type) |  | ||||||
| { |  | ||||||
| 	struct deadline_data *dd = q->elevator->elevator_data; |  | ||||||
| 
 |  | ||||||
| 	/*
 |  | ||||||
| 	 * if the merge was a front merge, we need to reposition request |  | ||||||
| 	 */ |  | ||||||
| 	if (type == ELEVATOR_FRONT_MERGE) { |  | ||||||
| 		elv_rb_del(deadline_rb_root(dd, req), req); |  | ||||||
| 		deadline_add_rq_rb(dd, req); |  | ||||||
| 	} |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static void |  | ||||||
| deadline_merged_requests(struct request_queue *q, struct request *req, |  | ||||||
| 			 struct request *next) |  | ||||||
| { |  | ||||||
| 	/*
 |  | ||||||
| 	 * if next expires before rq, assign its expire time to rq |  | ||||||
| 	 * and move into next position (next will be deleted) in fifo |  | ||||||
| 	 */ |  | ||||||
| 	if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) { |  | ||||||
| 		if (time_before((unsigned long)next->fifo_time, |  | ||||||
| 				(unsigned long)req->fifo_time)) { |  | ||||||
| 			list_move(&req->queuelist, &next->queuelist); |  | ||||||
| 			req->fifo_time = next->fifo_time; |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	/*
 |  | ||||||
| 	 * kill knowledge of next, this one is a goner |  | ||||||
| 	 */ |  | ||||||
| 	deadline_remove_request(q, next); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /*
 |  | ||||||
|  * move request from sort list to dispatch queue. |  | ||||||
|  */ |  | ||||||
| static inline void |  | ||||||
| deadline_move_to_dispatch(struct deadline_data *dd, struct request *rq) |  | ||||||
| { |  | ||||||
| 	struct request_queue *q = rq->q; |  | ||||||
| 
 |  | ||||||
| 	/*
 |  | ||||||
| 	 * For a zoned block device, write requests must write lock their |  | ||||||
| 	 * target zone. |  | ||||||
| 	 */ |  | ||||||
| 	blk_req_zone_write_lock(rq); |  | ||||||
| 
 |  | ||||||
| 	deadline_remove_request(q, rq); |  | ||||||
| 	elv_dispatch_add_tail(q, rq); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /*
 |  | ||||||
|  * move an entry to dispatch queue |  | ||||||
|  */ |  | ||||||
| static void |  | ||||||
| deadline_move_request(struct deadline_data *dd, struct request *rq) |  | ||||||
| { |  | ||||||
| 	const int data_dir = rq_data_dir(rq); |  | ||||||
| 
 |  | ||||||
| 	dd->next_rq[READ] = NULL; |  | ||||||
| 	dd->next_rq[WRITE] = NULL; |  | ||||||
| 	dd->next_rq[data_dir] = deadline_latter_request(rq); |  | ||||||
| 
 |  | ||||||
| 	/*
 |  | ||||||
| 	 * take it off the sort and fifo list, move |  | ||||||
| 	 * to dispatch queue |  | ||||||
| 	 */ |  | ||||||
| 	deadline_move_to_dispatch(dd, rq); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /*
 |  | ||||||
|  * deadline_check_fifo returns 0 if there are no expired requests on the fifo, |  | ||||||
|  * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir]) |  | ||||||
|  */ |  | ||||||
| static inline int deadline_check_fifo(struct deadline_data *dd, int ddir) |  | ||||||
| { |  | ||||||
| 	struct request *rq = rq_entry_fifo(dd->fifo_list[ddir].next); |  | ||||||
| 
 |  | ||||||
| 	/*
 |  | ||||||
| 	 * rq is expired! |  | ||||||
| 	 */ |  | ||||||
| 	if (time_after_eq(jiffies, (unsigned long)rq->fifo_time)) |  | ||||||
| 		return 1; |  | ||||||
| 
 |  | ||||||
| 	return 0; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /*
 |  | ||||||
|  * For the specified data direction, return the next request to dispatch using |  | ||||||
|  * arrival ordered lists. |  | ||||||
|  */ |  | ||||||
| static struct request * |  | ||||||
| deadline_fifo_request(struct deadline_data *dd, int data_dir) |  | ||||||
| { |  | ||||||
| 	struct request *rq; |  | ||||||
| 
 |  | ||||||
| 	if (WARN_ON_ONCE(data_dir != READ && data_dir != WRITE)) |  | ||||||
| 		return NULL; |  | ||||||
| 
 |  | ||||||
| 	if (list_empty(&dd->fifo_list[data_dir])) |  | ||||||
| 		return NULL; |  | ||||||
| 
 |  | ||||||
| 	rq = rq_entry_fifo(dd->fifo_list[data_dir].next); |  | ||||||
| 	if (data_dir == READ || !blk_queue_is_zoned(rq->q)) |  | ||||||
| 		return rq; |  | ||||||
| 
 |  | ||||||
| 	/*
 |  | ||||||
| 	 * Look for a write request that can be dispatched, that is one with |  | ||||||
| 	 * an unlocked target zone. |  | ||||||
| 	 */ |  | ||||||
| 	list_for_each_entry(rq, &dd->fifo_list[WRITE], queuelist) { |  | ||||||
| 		if (blk_req_can_dispatch_to_zone(rq)) |  | ||||||
| 			return rq; |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	return NULL; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /*
 |  | ||||||
|  * For the specified data direction, return the next request to dispatch using |  | ||||||
|  * sector position sorted lists. |  | ||||||
|  */ |  | ||||||
| static struct request * |  | ||||||
| deadline_next_request(struct deadline_data *dd, int data_dir) |  | ||||||
| { |  | ||||||
| 	struct request *rq; |  | ||||||
| 
 |  | ||||||
| 	if (WARN_ON_ONCE(data_dir != READ && data_dir != WRITE)) |  | ||||||
| 		return NULL; |  | ||||||
| 
 |  | ||||||
| 	rq = dd->next_rq[data_dir]; |  | ||||||
| 	if (!rq) |  | ||||||
| 		return NULL; |  | ||||||
| 
 |  | ||||||
| 	if (data_dir == READ || !blk_queue_is_zoned(rq->q)) |  | ||||||
| 		return rq; |  | ||||||
| 
 |  | ||||||
| 	/*
 |  | ||||||
| 	 * Look for a write request that can be dispatched, that is one with |  | ||||||
| 	 * an unlocked target zone. |  | ||||||
| 	 */ |  | ||||||
| 	while (rq) { |  | ||||||
| 		if (blk_req_can_dispatch_to_zone(rq)) |  | ||||||
| 			return rq; |  | ||||||
| 		rq = deadline_latter_request(rq); |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	return NULL; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /*
 |  | ||||||
|  * deadline_dispatch_requests selects the best request according to |  | ||||||
|  * read/write expire, fifo_batch, etc |  | ||||||
|  */ |  | ||||||
| static int deadline_dispatch_requests(struct request_queue *q, int force) |  | ||||||
| { |  | ||||||
| 	struct deadline_data *dd = q->elevator->elevator_data; |  | ||||||
| 	const int reads = !list_empty(&dd->fifo_list[READ]); |  | ||||||
| 	const int writes = !list_empty(&dd->fifo_list[WRITE]); |  | ||||||
| 	struct request *rq, *next_rq; |  | ||||||
| 	int data_dir; |  | ||||||
| 
 |  | ||||||
| 	/*
 |  | ||||||
| 	 * batches are currently reads XOR writes |  | ||||||
| 	 */ |  | ||||||
| 	rq = deadline_next_request(dd, WRITE); |  | ||||||
| 	if (!rq) |  | ||||||
| 		rq = deadline_next_request(dd, READ); |  | ||||||
| 
 |  | ||||||
| 	if (rq && dd->batching < dd->fifo_batch) |  | ||||||
| 		/* we have a next request are still entitled to batch */ |  | ||||||
| 		goto dispatch_request; |  | ||||||
| 
 |  | ||||||
| 	/*
 |  | ||||||
| 	 * at this point we are not running a batch. select the appropriate |  | ||||||
| 	 * data direction (read / write) |  | ||||||
| 	 */ |  | ||||||
| 
 |  | ||||||
| 	if (reads) { |  | ||||||
| 		BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[READ])); |  | ||||||
| 
 |  | ||||||
| 		if (deadline_fifo_request(dd, WRITE) && |  | ||||||
| 		    (dd->starved++ >= dd->writes_starved)) |  | ||||||
| 			goto dispatch_writes; |  | ||||||
| 
 |  | ||||||
| 		data_dir = READ; |  | ||||||
| 
 |  | ||||||
| 		goto dispatch_find_request; |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	/*
 |  | ||||||
| 	 * there are either no reads or writes have been starved |  | ||||||
| 	 */ |  | ||||||
| 
 |  | ||||||
| 	if (writes) { |  | ||||||
| dispatch_writes: |  | ||||||
| 		BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[WRITE])); |  | ||||||
| 
 |  | ||||||
| 		dd->starved = 0; |  | ||||||
| 
 |  | ||||||
| 		data_dir = WRITE; |  | ||||||
| 
 |  | ||||||
| 		goto dispatch_find_request; |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	return 0; |  | ||||||
| 
 |  | ||||||
| dispatch_find_request: |  | ||||||
| 	/*
 |  | ||||||
| 	 * we are not running a batch, find best request for selected data_dir |  | ||||||
| 	 */ |  | ||||||
| 	next_rq = deadline_next_request(dd, data_dir); |  | ||||||
| 	if (deadline_check_fifo(dd, data_dir) || !next_rq) { |  | ||||||
| 		/*
 |  | ||||||
| 		 * A deadline has expired, the last request was in the other |  | ||||||
| 		 * direction, or we have run out of higher-sectored requests. |  | ||||||
| 		 * Start again from the request with the earliest expiry time. |  | ||||||
| 		 */ |  | ||||||
| 		rq = deadline_fifo_request(dd, data_dir); |  | ||||||
| 	} else { |  | ||||||
| 		/*
 |  | ||||||
| 		 * The last req was the same dir and we have a next request in |  | ||||||
| 		 * sort order. No expired requests so continue on from here. |  | ||||||
| 		 */ |  | ||||||
| 		rq = next_rq; |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	/*
 |  | ||||||
| 	 * For a zoned block device, if we only have writes queued and none of |  | ||||||
| 	 * them can be dispatched, rq will be NULL. |  | ||||||
| 	 */ |  | ||||||
| 	if (!rq) |  | ||||||
| 		return 0; |  | ||||||
| 
 |  | ||||||
| 	dd->batching = 0; |  | ||||||
| 
 |  | ||||||
| dispatch_request: |  | ||||||
| 	/*
 |  | ||||||
| 	 * rq is the selected appropriate request. |  | ||||||
| 	 */ |  | ||||||
| 	dd->batching++; |  | ||||||
| 	deadline_move_request(dd, rq); |  | ||||||
| 
 |  | ||||||
| 	return 1; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /*
 |  | ||||||
|  * For zoned block devices, write unlock the target zone of completed |  | ||||||
|  * write requests. |  | ||||||
|  */ |  | ||||||
| static void |  | ||||||
| deadline_completed_request(struct request_queue *q, struct request *rq) |  | ||||||
| { |  | ||||||
| 	blk_req_zone_write_unlock(rq); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static void deadline_exit_queue(struct elevator_queue *e) |  | ||||||
| { |  | ||||||
| 	struct deadline_data *dd = e->elevator_data; |  | ||||||
| 
 |  | ||||||
| 	BUG_ON(!list_empty(&dd->fifo_list[READ])); |  | ||||||
| 	BUG_ON(!list_empty(&dd->fifo_list[WRITE])); |  | ||||||
| 
 |  | ||||||
| 	kfree(dd); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /*
 |  | ||||||
|  * initialize elevator private data (deadline_data). |  | ||||||
|  */ |  | ||||||
| static int deadline_init_queue(struct request_queue *q, struct elevator_type *e) |  | ||||||
| { |  | ||||||
| 	struct deadline_data *dd; |  | ||||||
| 	struct elevator_queue *eq; |  | ||||||
| 
 |  | ||||||
| 	eq = elevator_alloc(q, e); |  | ||||||
| 	if (!eq) |  | ||||||
| 		return -ENOMEM; |  | ||||||
| 
 |  | ||||||
| 	dd = kzalloc_node(sizeof(*dd), GFP_KERNEL, q->node); |  | ||||||
| 	if (!dd) { |  | ||||||
| 		kobject_put(&eq->kobj); |  | ||||||
| 		return -ENOMEM; |  | ||||||
| 	} |  | ||||||
| 	eq->elevator_data = dd; |  | ||||||
| 
 |  | ||||||
| 	INIT_LIST_HEAD(&dd->fifo_list[READ]); |  | ||||||
| 	INIT_LIST_HEAD(&dd->fifo_list[WRITE]); |  | ||||||
| 	dd->sort_list[READ] = RB_ROOT; |  | ||||||
| 	dd->sort_list[WRITE] = RB_ROOT; |  | ||||||
| 	dd->fifo_expire[READ] = read_expire; |  | ||||||
| 	dd->fifo_expire[WRITE] = write_expire; |  | ||||||
| 	dd->writes_starved = writes_starved; |  | ||||||
| 	dd->front_merges = 1; |  | ||||||
| 	dd->fifo_batch = fifo_batch; |  | ||||||
| 
 |  | ||||||
| 	spin_lock_irq(q->queue_lock); |  | ||||||
| 	q->elevator = eq; |  | ||||||
| 	spin_unlock_irq(q->queue_lock); |  | ||||||
| 	return 0; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /*
 |  | ||||||
|  * sysfs parts below |  | ||||||
|  */ |  | ||||||
| 
 |  | ||||||
| static ssize_t |  | ||||||
| deadline_var_show(int var, char *page) |  | ||||||
| { |  | ||||||
| 	return sprintf(page, "%d\n", var); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static void |  | ||||||
| deadline_var_store(int *var, const char *page) |  | ||||||
| { |  | ||||||
| 	char *p = (char *) page; |  | ||||||
| 
 |  | ||||||
| 	*var = simple_strtol(p, &p, 10); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| #define SHOW_FUNCTION(__FUNC, __VAR, __CONV)				\ |  | ||||||
| static ssize_t __FUNC(struct elevator_queue *e, char *page)		\ |  | ||||||
| {									\ |  | ||||||
| 	struct deadline_data *dd = e->elevator_data;			\ |  | ||||||
| 	int __data = __VAR;						\ |  | ||||||
| 	if (__CONV)							\ |  | ||||||
| 		__data = jiffies_to_msecs(__data);			\ |  | ||||||
| 	return deadline_var_show(__data, (page));			\ |  | ||||||
| } |  | ||||||
| SHOW_FUNCTION(deadline_read_expire_show, dd->fifo_expire[READ], 1); |  | ||||||
| SHOW_FUNCTION(deadline_write_expire_show, dd->fifo_expire[WRITE], 1); |  | ||||||
| SHOW_FUNCTION(deadline_writes_starved_show, dd->writes_starved, 0); |  | ||||||
| SHOW_FUNCTION(deadline_front_merges_show, dd->front_merges, 0); |  | ||||||
| SHOW_FUNCTION(deadline_fifo_batch_show, dd->fifo_batch, 0); |  | ||||||
| #undef SHOW_FUNCTION |  | ||||||
| 
 |  | ||||||
| #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)			\ |  | ||||||
| static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)	\ |  | ||||||
| {									\ |  | ||||||
| 	struct deadline_data *dd = e->elevator_data;			\ |  | ||||||
| 	int __data;							\ |  | ||||||
| 	deadline_var_store(&__data, (page));				\ |  | ||||||
| 	if (__data < (MIN))						\ |  | ||||||
| 		__data = (MIN);						\ |  | ||||||
| 	else if (__data > (MAX))					\ |  | ||||||
| 		__data = (MAX);						\ |  | ||||||
| 	if (__CONV)							\ |  | ||||||
| 		*(__PTR) = msecs_to_jiffies(__data);			\ |  | ||||||
| 	else								\ |  | ||||||
| 		*(__PTR) = __data;					\ |  | ||||||
| 	return count;							\ |  | ||||||
| } |  | ||||||
| STORE_FUNCTION(deadline_read_expire_store, &dd->fifo_expire[READ], 0, INT_MAX, 1); |  | ||||||
| STORE_FUNCTION(deadline_write_expire_store, &dd->fifo_expire[WRITE], 0, INT_MAX, 1); |  | ||||||
| STORE_FUNCTION(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX, 0); |  | ||||||
| STORE_FUNCTION(deadline_front_merges_store, &dd->front_merges, 0, 1, 0); |  | ||||||
| STORE_FUNCTION(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX, 0); |  | ||||||
| #undef STORE_FUNCTION |  | ||||||
| 
 |  | ||||||
| #define DD_ATTR(name) \ |  | ||||||
| 	__ATTR(name, 0644, deadline_##name##_show, deadline_##name##_store) |  | ||||||
| 
 |  | ||||||
| static struct elv_fs_entry deadline_attrs[] = { |  | ||||||
| 	DD_ATTR(read_expire), |  | ||||||
| 	DD_ATTR(write_expire), |  | ||||||
| 	DD_ATTR(writes_starved), |  | ||||||
| 	DD_ATTR(front_merges), |  | ||||||
| 	DD_ATTR(fifo_batch), |  | ||||||
| 	__ATTR_NULL |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| static struct elevator_type iosched_deadline = { |  | ||||||
| 	.ops.sq = { |  | ||||||
| 		.elevator_merge_fn = 		deadline_merge, |  | ||||||
| 		.elevator_merged_fn =		deadline_merged_request, |  | ||||||
| 		.elevator_merge_req_fn =	deadline_merged_requests, |  | ||||||
| 		.elevator_dispatch_fn =		deadline_dispatch_requests, |  | ||||||
| 		.elevator_completed_req_fn =	deadline_completed_request, |  | ||||||
| 		.elevator_add_req_fn =		deadline_add_request, |  | ||||||
| 		.elevator_former_req_fn =	elv_rb_former_request, |  | ||||||
| 		.elevator_latter_req_fn =	elv_rb_latter_request, |  | ||||||
| 		.elevator_init_fn =		deadline_init_queue, |  | ||||||
| 		.elevator_exit_fn =		deadline_exit_queue, |  | ||||||
| 	}, |  | ||||||
| 
 |  | ||||||
| 	.elevator_attrs = deadline_attrs, |  | ||||||
| 	.elevator_name = "deadline", |  | ||||||
| 	.elevator_owner = THIS_MODULE, |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| static int __init deadline_init(void) |  | ||||||
| { |  | ||||||
| 	return elv_register(&iosched_deadline); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static void __exit deadline_exit(void) |  | ||||||
| { |  | ||||||
| 	elv_unregister(&iosched_deadline); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| module_init(deadline_init); |  | ||||||
| module_exit(deadline_exit); |  | ||||||
| 
 |  | ||||||
| MODULE_AUTHOR("Jens Axboe"); |  | ||||||
| MODULE_LICENSE("GPL"); |  | ||||||
| MODULE_DESCRIPTION("deadline IO scheduler"); |  | ||||||
| @ -225,8 +225,6 @@ int elevator_init(struct request_queue *q) | |||||||
| 							chosen_elevator); | 							chosen_elevator); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	if (!e) |  | ||||||
| 		e = elevator_get(q, CONFIG_DEFAULT_IOSCHED, false); |  | ||||||
| 	if (!e) { | 	if (!e) { | ||||||
| 		printk(KERN_ERR | 		printk(KERN_ERR | ||||||
| 			"Default I/O scheduler not found. Using noop.\n"); | 			"Default I/O scheduler not found. Using noop.\n"); | ||||||
| @ -356,68 +354,6 @@ struct request *elv_rb_find(struct rb_root *root, sector_t sector) | |||||||
| } | } | ||||||
| EXPORT_SYMBOL(elv_rb_find); | EXPORT_SYMBOL(elv_rb_find); | ||||||
| 
 | 
 | ||||||
| /*
 |  | ||||||
|  * Insert rq into dispatch queue of q.  Queue lock must be held on |  | ||||||
|  * entry.  rq is sort instead into the dispatch queue. To be used by |  | ||||||
|  * specific elevators. |  | ||||||
|  */ |  | ||||||
| void elv_dispatch_sort(struct request_queue *q, struct request *rq) |  | ||||||
| { |  | ||||||
| 	sector_t boundary; |  | ||||||
| 	struct list_head *entry; |  | ||||||
| 
 |  | ||||||
| 	if (q->last_merge == rq) |  | ||||||
| 		q->last_merge = NULL; |  | ||||||
| 
 |  | ||||||
| 	elv_rqhash_del(q, rq); |  | ||||||
| 
 |  | ||||||
| 	q->nr_sorted--; |  | ||||||
| 
 |  | ||||||
| 	boundary = q->end_sector; |  | ||||||
| 	list_for_each_prev(entry, &q->queue_head) { |  | ||||||
| 		struct request *pos = list_entry_rq(entry); |  | ||||||
| 
 |  | ||||||
| 		if (req_op(rq) != req_op(pos)) |  | ||||||
| 			break; |  | ||||||
| 		if (rq_data_dir(rq) != rq_data_dir(pos)) |  | ||||||
| 			break; |  | ||||||
| 		if (pos->rq_flags & (RQF_STARTED | RQF_SOFTBARRIER)) |  | ||||||
| 			break; |  | ||||||
| 		if (blk_rq_pos(rq) >= boundary) { |  | ||||||
| 			if (blk_rq_pos(pos) < boundary) |  | ||||||
| 				continue; |  | ||||||
| 		} else { |  | ||||||
| 			if (blk_rq_pos(pos) >= boundary) |  | ||||||
| 				break; |  | ||||||
| 		} |  | ||||||
| 		if (blk_rq_pos(rq) >= blk_rq_pos(pos)) |  | ||||||
| 			break; |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	list_add(&rq->queuelist, entry); |  | ||||||
| } |  | ||||||
| EXPORT_SYMBOL(elv_dispatch_sort); |  | ||||||
| 
 |  | ||||||
| /*
 |  | ||||||
|  * Insert rq into dispatch queue of q.  Queue lock must be held on |  | ||||||
|  * entry.  rq is added to the back of the dispatch queue. To be used by |  | ||||||
|  * specific elevators. |  | ||||||
|  */ |  | ||||||
| void elv_dispatch_add_tail(struct request_queue *q, struct request *rq) |  | ||||||
| { |  | ||||||
| 	if (q->last_merge == rq) |  | ||||||
| 		q->last_merge = NULL; |  | ||||||
| 
 |  | ||||||
| 	elv_rqhash_del(q, rq); |  | ||||||
| 
 |  | ||||||
| 	q->nr_sorted--; |  | ||||||
| 
 |  | ||||||
| 	q->end_sector = rq_end_sector(rq); |  | ||||||
| 	q->boundary_rq = rq; |  | ||||||
| 	list_add_tail(&rq->queuelist, &q->queue_head); |  | ||||||
| } |  | ||||||
| EXPORT_SYMBOL(elv_dispatch_add_tail); |  | ||||||
| 
 |  | ||||||
| enum elv_merge elv_merge(struct request_queue *q, struct request **req, | enum elv_merge elv_merge(struct request_queue *q, struct request **req, | ||||||
| 		struct bio *bio) | 		struct bio *bio) | ||||||
| { | { | ||||||
| @ -881,12 +817,6 @@ int elv_register(struct elevator_type *e) | |||||||
| 	list_add_tail(&e->list, &elv_list); | 	list_add_tail(&e->list, &elv_list); | ||||||
| 	spin_unlock(&elv_list_lock); | 	spin_unlock(&elv_list_lock); | ||||||
| 
 | 
 | ||||||
| 	/* print pretty message */ |  | ||||||
| 	if (elevator_match(e, chosen_elevator) || |  | ||||||
| 			(!*chosen_elevator && |  | ||||||
| 			 elevator_match(e, CONFIG_DEFAULT_IOSCHED))) |  | ||||||
| 				def = " (default)"; |  | ||||||
| 
 |  | ||||||
| 	printk(KERN_INFO "io scheduler %s registered%s\n", e->elevator_name, | 	printk(KERN_INFO "io scheduler %s registered%s\n", e->elevator_name, | ||||||
| 								def); | 								def); | ||||||
| 	return 0; | 	return 0; | ||||||
|  | |||||||
| @ -1,124 +0,0 @@ | |||||||
| /*
 |  | ||||||
|  * elevator noop |  | ||||||
|  */ |  | ||||||
| #include <linux/blkdev.h> |  | ||||||
| #include <linux/elevator.h> |  | ||||||
| #include <linux/bio.h> |  | ||||||
| #include <linux/module.h> |  | ||||||
| #include <linux/slab.h> |  | ||||||
| #include <linux/init.h> |  | ||||||
| 
 |  | ||||||
| struct noop_data { |  | ||||||
| 	struct list_head queue; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| static void noop_merged_requests(struct request_queue *q, struct request *rq, |  | ||||||
| 				 struct request *next) |  | ||||||
| { |  | ||||||
| 	list_del_init(&next->queuelist); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static int noop_dispatch(struct request_queue *q, int force) |  | ||||||
| { |  | ||||||
| 	struct noop_data *nd = q->elevator->elevator_data; |  | ||||||
| 	struct request *rq; |  | ||||||
| 
 |  | ||||||
| 	rq = list_first_entry_or_null(&nd->queue, struct request, queuelist); |  | ||||||
| 	if (rq) { |  | ||||||
| 		list_del_init(&rq->queuelist); |  | ||||||
| 		elv_dispatch_sort(q, rq); |  | ||||||
| 		return 1; |  | ||||||
| 	} |  | ||||||
| 	return 0; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static void noop_add_request(struct request_queue *q, struct request *rq) |  | ||||||
| { |  | ||||||
| 	struct noop_data *nd = q->elevator->elevator_data; |  | ||||||
| 
 |  | ||||||
| 	list_add_tail(&rq->queuelist, &nd->queue); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static struct request * |  | ||||||
| noop_former_request(struct request_queue *q, struct request *rq) |  | ||||||
| { |  | ||||||
| 	struct noop_data *nd = q->elevator->elevator_data; |  | ||||||
| 
 |  | ||||||
| 	if (rq->queuelist.prev == &nd->queue) |  | ||||||
| 		return NULL; |  | ||||||
| 	return list_prev_entry(rq, queuelist); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static struct request * |  | ||||||
| noop_latter_request(struct request_queue *q, struct request *rq) |  | ||||||
| { |  | ||||||
| 	struct noop_data *nd = q->elevator->elevator_data; |  | ||||||
| 
 |  | ||||||
| 	if (rq->queuelist.next == &nd->queue) |  | ||||||
| 		return NULL; |  | ||||||
| 	return list_next_entry(rq, queuelist); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static int noop_init_queue(struct request_queue *q, struct elevator_type *e) |  | ||||||
| { |  | ||||||
| 	struct noop_data *nd; |  | ||||||
| 	struct elevator_queue *eq; |  | ||||||
| 
 |  | ||||||
| 	eq = elevator_alloc(q, e); |  | ||||||
| 	if (!eq) |  | ||||||
| 		return -ENOMEM; |  | ||||||
| 
 |  | ||||||
| 	nd = kmalloc_node(sizeof(*nd), GFP_KERNEL, q->node); |  | ||||||
| 	if (!nd) { |  | ||||||
| 		kobject_put(&eq->kobj); |  | ||||||
| 		return -ENOMEM; |  | ||||||
| 	} |  | ||||||
| 	eq->elevator_data = nd; |  | ||||||
| 
 |  | ||||||
| 	INIT_LIST_HEAD(&nd->queue); |  | ||||||
| 
 |  | ||||||
| 	spin_lock_irq(q->queue_lock); |  | ||||||
| 	q->elevator = eq; |  | ||||||
| 	spin_unlock_irq(q->queue_lock); |  | ||||||
| 	return 0; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static void noop_exit_queue(struct elevator_queue *e) |  | ||||||
| { |  | ||||||
| 	struct noop_data *nd = e->elevator_data; |  | ||||||
| 
 |  | ||||||
| 	BUG_ON(!list_empty(&nd->queue)); |  | ||||||
| 	kfree(nd); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static struct elevator_type elevator_noop = { |  | ||||||
| 	.ops.sq = { |  | ||||||
| 		.elevator_merge_req_fn		= noop_merged_requests, |  | ||||||
| 		.elevator_dispatch_fn		= noop_dispatch, |  | ||||||
| 		.elevator_add_req_fn		= noop_add_request, |  | ||||||
| 		.elevator_former_req_fn		= noop_former_request, |  | ||||||
| 		.elevator_latter_req_fn		= noop_latter_request, |  | ||||||
| 		.elevator_init_fn		= noop_init_queue, |  | ||||||
| 		.elevator_exit_fn		= noop_exit_queue, |  | ||||||
| 	}, |  | ||||||
| 	.elevator_name = "noop", |  | ||||||
| 	.elevator_owner = THIS_MODULE, |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| static int __init noop_init(void) |  | ||||||
| { |  | ||||||
| 	return elv_register(&elevator_noop); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static void __exit noop_exit(void) |  | ||||||
| { |  | ||||||
| 	elv_unregister(&elevator_noop); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| module_init(noop_init); |  | ||||||
| module_exit(noop_exit); |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| MODULE_AUTHOR("Jens Axboe"); |  | ||||||
| MODULE_LICENSE("GPL"); |  | ||||||
| MODULE_DESCRIPTION("No-op IO scheduler"); |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user