forked from Minki/linux
for-5.17/block-2022-01-11
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmHd8DAQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgpnhRD/wMAjsNO65PCA+o/bPpVi4ulx9EejAzrJnB 5vHFvREAoOOGKvRpYGe4w3TcKyW+zPb+GtlXFjPfK+wuVzWhrQtW/+vkjKlBt8wK o7rzeMwTKJ9ZGvYaaQpp1yC0WURBB3qnCRQhb8dOQzhJgEXinhIOznZsut4mniLv fTqcDmKAb/+G6K6CQCCqnH0I/+OJZyUeSFo1kk2i4ZqCBepQpBkOL6H2rBOtGxUg bt1jiGHbbhCRYEE3u2kV0HP10qAChNaMQC705jV4Qpf4+3EntSxs+6nSb74dvMkX 3+Wmp8Ctq6lpPnDL1nrAFGz3jZnB0Y+GdgOclQn3ViQd1FCXZzuYWQ3fTaBfURCZ /RE5nc047SqpwCFLOynM++OkaeQZ1zSxeyoFTtzDaPF4tLuaX3JHswvTzNGPw8SN BnexseNnNBCjJliZSEE7fOkjJDcev2dvRxPtI8/wkF4lHUgETc5IW563C53xo/Tx 32yFjZwCVIpNWk21su/0H3iEq80wZ7PnriiN/E3JA6XbnevlRPu0NPMb0D258GCm yCcdPVDNZsQCB8hluqZcu0g6LSgZRo90Yg1oqKqEpAllJJMBaEAPPPuUIJh998mo iKGxZzgr7d9jrbGJTInp0F8b3B3/oV/hxgzy0Hu/mHP3AsnaAk9o/oEQZ7rX4Khr 6biloqkIMA== =RWnJ -----END PGP SIGNATURE----- Merge tag 'for-5.17/block-2022-01-11' of git://git.kernel.dk/linux-block Pull block updates from Jens Axboe: - Unify where the struct request handling code is located in the blk-mq code (Christoph) - Header cleanups (Christoph) - Clean up the io_context handling code (Christoph, me) - Get rid of ->rq_disk in struct request (Christoph) - Error handling fix for add_disk() (Christoph) - request allocation cleanusp (Christoph) - Documentation updates (Eric, Matthew) - Remove trivial crypto unregister helper (Eric) - Reduce shared tag overhead (John) - Reduce poll_stats memory overhead (me) - Known indirect function call for dio (me) - Use atomic references for struct request (me) - Support request list issue for block and NVMe (me) - Improve queue dispatch pinning (Ming) - Improve the direct list issue code (Keith) - BFQ improvements (Jan) - Direct completion helper and use it in mmc block (Sebastian) - Use raw spinlock for the blktrace code (Wander) - fsync error handling fix (Ye) - Various fixes and cleanups (Lukas, Randy, Yang, Tetsuo, Ming, me) * tag 'for-5.17/block-2022-01-11' of git://git.kernel.dk/linux-block: (132 commits) MAINTAINERS: add entries for block layer documentation docs: block: remove queue-sysfs.rst docs: sysfs-block: document virt_boundary_mask docs: sysfs-block: document stable_writes docs: sysfs-block: fill in missing documentation from queue-sysfs.rst docs: sysfs-block: add contact for nomerges docs: sysfs-block: sort alphabetically docs: sysfs-block: move to stable directory block: don't protect submit_bio_checks by q_usage_counter block: fix old-style declaration nvme-pci: fix queue_rqs list splitting block: introduce rq_list_move block: introduce rq_list_for_each_safe macro block: move rq_list macros to blk-mq.h block: drop needless assignment in set_task_ioprio() block: remove unnecessary trailing '\' bio.h: fix kernel-doc warnings block: check minor range in device_add_disk() block: use "unsigned long" for blk_validate_block_size(). block: fix error unwinding in device_add_disk ...
This commit is contained in:
commit
d3c8108035
676
Documentation/ABI/stable/sysfs-block
Normal file
676
Documentation/ABI/stable/sysfs-block
Normal file
@ -0,0 +1,676 @@
|
||||
What: /sys/block/<disk>/alignment_offset
|
||||
Date: April 2009
|
||||
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||
Description:
|
||||
Storage devices may report a physical block size that is
|
||||
bigger than the logical block size (for instance a drive
|
||||
with 4KB physical sectors exposing 512-byte logical
|
||||
blocks to the operating system). This parameter
|
||||
indicates how many bytes the beginning of the device is
|
||||
offset from the disk's natural alignment.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/discard_alignment
|
||||
Date: May 2011
|
||||
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||
Description:
|
||||
Devices that support discard functionality may
|
||||
internally allocate space in units that are bigger than
|
||||
the exported logical block size. The discard_alignment
|
||||
parameter indicates how many bytes the beginning of the
|
||||
device is offset from the internal allocation unit's
|
||||
natural alignment.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/diskseq
|
||||
Date: February 2021
|
||||
Contact: Matteo Croce <mcroce@microsoft.com>
|
||||
Description:
|
||||
The /sys/block/<disk>/diskseq files reports the disk
|
||||
sequence number, which is a monotonically increasing
|
||||
number assigned to every drive.
|
||||
Some devices, like the loop device, refresh such number
|
||||
every time the backing file is changed.
|
||||
The value type is 64 bit unsigned.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/inflight
|
||||
Date: October 2009
|
||||
Contact: Jens Axboe <axboe@kernel.dk>, Nikanth Karthikesan <knikanth@suse.de>
|
||||
Description:
|
||||
Reports the number of I/O requests currently in progress
|
||||
(pending / in flight) in a device driver. This can be less
|
||||
than the number of requests queued in the block device queue.
|
||||
The report contains 2 fields: one for read requests
|
||||
and one for write requests.
|
||||
The value type is unsigned int.
|
||||
Cf. Documentation/block/stat.rst which contains a single value for
|
||||
requests in flight.
|
||||
This is related to /sys/block/<disk>/queue/nr_requests
|
||||
and for SCSI device also its queue_depth.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/integrity/device_is_integrity_capable
|
||||
Date: July 2014
|
||||
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||
Description:
|
||||
Indicates whether a storage device is capable of storing
|
||||
integrity metadata. Set if the device is T10 PI-capable.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/integrity/format
|
||||
Date: June 2008
|
||||
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||
Description:
|
||||
Metadata format for integrity capable block device.
|
||||
E.g. T10-DIF-TYPE1-CRC.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/integrity/protection_interval_bytes
|
||||
Date: July 2015
|
||||
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||
Description:
|
||||
Describes the number of data bytes which are protected
|
||||
by one integrity tuple. Typically the device's logical
|
||||
block size.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/integrity/read_verify
|
||||
Date: June 2008
|
||||
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||
Description:
|
||||
Indicates whether the block layer should verify the
|
||||
integrity of read requests serviced by devices that
|
||||
support sending integrity metadata.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/integrity/tag_size
|
||||
Date: June 2008
|
||||
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||
Description:
|
||||
Number of bytes of integrity tag space available per
|
||||
512 bytes of data.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/integrity/write_generate
|
||||
Date: June 2008
|
||||
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||
Description:
|
||||
Indicates whether the block layer should automatically
|
||||
generate checksums for write requests bound for
|
||||
devices that support receiving integrity metadata.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/<partition>/alignment_offset
|
||||
Date: April 2009
|
||||
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||
Description:
|
||||
Storage devices may report a physical block size that is
|
||||
bigger than the logical block size (for instance a drive
|
||||
with 4KB physical sectors exposing 512-byte logical
|
||||
blocks to the operating system). This parameter
|
||||
indicates how many bytes the beginning of the partition
|
||||
is offset from the disk's natural alignment.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/<partition>/discard_alignment
|
||||
Date: May 2011
|
||||
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||
Description:
|
||||
Devices that support discard functionality may
|
||||
internally allocate space in units that are bigger than
|
||||
the exported logical block size. The discard_alignment
|
||||
parameter indicates how many bytes the beginning of the
|
||||
partition is offset from the internal allocation unit's
|
||||
natural alignment.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/<partition>/stat
|
||||
Date: February 2008
|
||||
Contact: Jerome Marchand <jmarchan@redhat.com>
|
||||
Description:
|
||||
The /sys/block/<disk>/<partition>/stat files display the
|
||||
I/O statistics of partition <partition>. The format is the
|
||||
same as the format of /sys/block/<disk>/stat.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/add_random
|
||||
Date: June 2010
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RW] This file allows to turn off the disk entropy contribution.
|
||||
Default value of this file is '1'(on).
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/chunk_sectors
|
||||
Date: September 2016
|
||||
Contact: Hannes Reinecke <hare@suse.com>
|
||||
Description:
|
||||
[RO] chunk_sectors has different meaning depending on the type
|
||||
of the disk. For a RAID device (dm-raid), chunk_sectors
|
||||
indicates the size in 512B sectors of the RAID volume stripe
|
||||
segment. For a zoned block device, either host-aware or
|
||||
host-managed, chunk_sectors indicates the size in 512B sectors
|
||||
of the zones of the device, with the eventual exception of the
|
||||
last zone of the device which may be smaller.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/dax
|
||||
Date: June 2016
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RO] This file indicates whether the device supports Direct
|
||||
Access (DAX), used by CPU-addressable storage to bypass the
|
||||
pagecache. It shows '1' if true, '0' if not.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/discard_granularity
|
||||
Date: May 2011
|
||||
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||
Description:
|
||||
[RO] Devices that support discard functionality may internally
|
||||
allocate space using units that are bigger than the logical
|
||||
block size. The discard_granularity parameter indicates the size
|
||||
of the internal allocation unit in bytes if reported by the
|
||||
device. Otherwise the discard_granularity will be set to match
|
||||
the device's physical block size. A discard_granularity of 0
|
||||
means that the device does not support discard functionality.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/discard_max_bytes
|
||||
Date: May 2011
|
||||
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||
Description:
|
||||
[RW] While discard_max_hw_bytes is the hardware limit for the
|
||||
device, this setting is the software limit. Some devices exhibit
|
||||
large latencies when large discards are issued, setting this
|
||||
value lower will make Linux issue smaller discards and
|
||||
potentially help reduce latencies induced by large discard
|
||||
operations.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/discard_max_hw_bytes
|
||||
Date: July 2015
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RO] Devices that support discard functionality may have
|
||||
internal limits on the number of bytes that can be trimmed or
|
||||
unmapped in a single operation. The `discard_max_hw_bytes`
|
||||
parameter is set by the device driver to the maximum number of
|
||||
bytes that can be discarded in a single operation. Discard
|
||||
requests issued to the device must not exceed this limit. A
|
||||
`discard_max_hw_bytes` value of 0 means that the device does not
|
||||
support discard functionality.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/discard_zeroes_data
|
||||
Date: May 2011
|
||||
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||
Description:
|
||||
[RO] Will always return 0. Don't rely on any specific behavior
|
||||
for discards, and don't read this file.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/fua
|
||||
Date: May 2018
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RO] Whether or not the block driver supports the FUA flag for
|
||||
write requests. FUA stands for Force Unit Access. If the FUA
|
||||
flag is set that means that write requests must bypass the
|
||||
volatile cache of the storage device.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/hw_sector_size
|
||||
Date: January 2008
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RO] This is the hardware sector size of the device, in bytes.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/independent_access_ranges/
|
||||
Date: October 2021
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RO] The presence of this sub-directory of the
|
||||
/sys/block/xxx/queue/ directory indicates that the device is
|
||||
capable of executing requests targeting different sector ranges
|
||||
in parallel. For instance, single LUN multi-actuator hard-disks
|
||||
will have an independent_access_ranges directory if the device
|
||||
correctly advertizes the sector ranges of its actuators.
|
||||
|
||||
The independent_access_ranges directory contains one directory
|
||||
per access range, with each range described using the sector
|
||||
(RO) attribute file to indicate the first sector of the range
|
||||
and the nr_sectors (RO) attribute file to indicate the total
|
||||
number of sectors in the range starting from the first sector of
|
||||
the range. For example, a dual-actuator hard-disk will have the
|
||||
following independent_access_ranges entries.::
|
||||
|
||||
$ tree /sys/block/<disk>/queue/independent_access_ranges/
|
||||
/sys/block/<disk>/queue/independent_access_ranges/
|
||||
|-- 0
|
||||
| |-- nr_sectors
|
||||
| `-- sector
|
||||
`-- 1
|
||||
|-- nr_sectors
|
||||
`-- sector
|
||||
|
||||
The sector and nr_sectors attributes use 512B sector unit,
|
||||
regardless of the actual block size of the device. Independent
|
||||
access ranges do not overlap and include all sectors within the
|
||||
device capacity. The access ranges are numbered in increasing
|
||||
order of the range start sector, that is, the sector attribute
|
||||
of range 0 always has the value 0.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/io_poll
|
||||
Date: November 2015
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RW] When read, this file shows whether polling is enabled (1)
|
||||
or disabled (0). Writing '0' to this file will disable polling
|
||||
for this device. Writing any non-zero value will enable this
|
||||
feature.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/io_poll_delay
|
||||
Date: November 2016
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RW] If polling is enabled, this controls what kind of polling
|
||||
will be performed. It defaults to -1, which is classic polling.
|
||||
In this mode, the CPU will repeatedly ask for completions
|
||||
without giving up any time. If set to 0, a hybrid polling mode
|
||||
is used, where the kernel will attempt to make an educated guess
|
||||
at when the IO will complete. Based on this guess, the kernel
|
||||
will put the process issuing IO to sleep for an amount of time,
|
||||
before entering a classic poll loop. This mode might be a little
|
||||
slower than pure classic polling, but it will be more efficient.
|
||||
If set to a value larger than 0, the kernel will put the process
|
||||
issuing IO to sleep for this amount of microseconds before
|
||||
entering classic polling.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/io_timeout
|
||||
Date: November 2018
|
||||
Contact: Weiping Zhang <zhangweiping@didiglobal.com>
|
||||
Description:
|
||||
[RW] io_timeout is the request timeout in milliseconds. If a
|
||||
request does not complete in this time then the block driver
|
||||
timeout handler is invoked. That timeout handler can decide to
|
||||
retry the request, to fail it or to start a device recovery
|
||||
strategy.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/iostats
|
||||
Date: January 2009
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RW] This file is used to control (on/off) the iostats
|
||||
accounting of the disk.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/logical_block_size
|
||||
Date: May 2009
|
||||
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||
Description:
|
||||
[RO] This is the smallest unit the storage device can address.
|
||||
It is typically 512 bytes.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/max_active_zones
|
||||
Date: July 2020
|
||||
Contact: Niklas Cassel <niklas.cassel@wdc.com>
|
||||
Description:
|
||||
[RO] For zoned block devices (zoned attribute indicating
|
||||
"host-managed" or "host-aware"), the sum of zones belonging to
|
||||
any of the zone states: EXPLICIT OPEN, IMPLICIT OPEN or CLOSED,
|
||||
is limited by this value. If this value is 0, there is no limit.
|
||||
|
||||
If the host attempts to exceed this limit, the driver should
|
||||
report this error with BLK_STS_ZONE_ACTIVE_RESOURCE, which user
|
||||
space may see as the EOVERFLOW errno.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/max_discard_segments
|
||||
Date: February 2017
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RO] The maximum number of DMA scatter/gather entries in a
|
||||
discard request.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/max_hw_sectors_kb
|
||||
Date: September 2004
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RO] This is the maximum number of kilobytes supported in a
|
||||
single data transfer.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/max_integrity_segments
|
||||
Date: September 2010
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RO] Maximum number of elements in a DMA scatter/gather list
|
||||
with integrity data that will be submitted by the block layer
|
||||
core to the associated block driver.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/max_open_zones
|
||||
Date: July 2020
|
||||
Contact: Niklas Cassel <niklas.cassel@wdc.com>
|
||||
Description:
|
||||
[RO] For zoned block devices (zoned attribute indicating
|
||||
"host-managed" or "host-aware"), the sum of zones belonging to
|
||||
any of the zone states: EXPLICIT OPEN or IMPLICIT OPEN, is
|
||||
limited by this value. If this value is 0, there is no limit.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/max_sectors_kb
|
||||
Date: September 2004
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RW] This is the maximum number of kilobytes that the block
|
||||
layer will allow for a filesystem request. Must be smaller than
|
||||
or equal to the maximum size allowed by the hardware.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/max_segment_size
|
||||
Date: March 2010
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RO] Maximum size in bytes of a single element in a DMA
|
||||
scatter/gather list.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/max_segments
|
||||
Date: March 2010
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RO] Maximum number of elements in a DMA scatter/gather list
|
||||
that is submitted to the associated block driver.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/minimum_io_size
|
||||
Date: April 2009
|
||||
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||
Description:
|
||||
[RO] Storage devices may report a granularity or preferred
|
||||
minimum I/O size which is the smallest request the device can
|
||||
perform without incurring a performance penalty. For disk
|
||||
drives this is often the physical block size. For RAID arrays
|
||||
it is often the stripe chunk size. A properly aligned multiple
|
||||
of minimum_io_size is the preferred request size for workloads
|
||||
where a high number of I/O operations is desired.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/nomerges
|
||||
Date: January 2010
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RW] Standard I/O elevator operations include attempts to merge
|
||||
contiguous I/Os. For known random I/O loads these attempts will
|
||||
always fail and result in extra cycles being spent in the
|
||||
kernel. This allows one to turn off this behavior on one of two
|
||||
ways: When set to 1, complex merge checks are disabled, but the
|
||||
simple one-shot merges with the previous I/O request are
|
||||
enabled. When set to 2, all merge tries are disabled. The
|
||||
default value is 0 - which enables all types of merge tries.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/nr_requests
|
||||
Date: July 2003
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RW] This controls how many requests may be allocated in the
|
||||
block layer for read or write requests. Note that the total
|
||||
allocated number may be twice this amount, since it applies only
|
||||
to reads or writes (not the accumulated sum).
|
||||
|
||||
To avoid priority inversion through request starvation, a
|
||||
request queue maintains a separate request pool per each cgroup
|
||||
when CONFIG_BLK_CGROUP is enabled, and this parameter applies to
|
||||
each such per-block-cgroup request pool. IOW, if there are N
|
||||
block cgroups, each request queue may have up to N request
|
||||
pools, each independently regulated by nr_requests.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/nr_zones
|
||||
Date: November 2018
|
||||
Contact: Damien Le Moal <damien.lemoal@wdc.com>
|
||||
Description:
|
||||
[RO] nr_zones indicates the total number of zones of a zoned
|
||||
block device ("host-aware" or "host-managed" zone model). For
|
||||
regular block devices, the value is always 0.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/optimal_io_size
|
||||
Date: April 2009
|
||||
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||
Description:
|
||||
[RO] Storage devices may report an optimal I/O size, which is
|
||||
the device's preferred unit for sustained I/O. This is rarely
|
||||
reported for disk drives. For RAID arrays it is usually the
|
||||
stripe width or the internal track size. A properly aligned
|
||||
multiple of optimal_io_size is the preferred request size for
|
||||
workloads where sustained throughput is desired. If no optimal
|
||||
I/O size is reported this file contains 0.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/physical_block_size
|
||||
Date: May 2009
|
||||
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||
Description:
|
||||
[RO] This is the smallest unit a physical storage device can
|
||||
write atomically. It is usually the same as the logical block
|
||||
size but may be bigger. One example is SATA drives with 4KB
|
||||
sectors that expose a 512-byte logical block size to the
|
||||
operating system. For stacked block devices the
|
||||
physical_block_size variable contains the maximum
|
||||
physical_block_size of the component devices.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/read_ahead_kb
|
||||
Date: May 2004
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RW] Maximum number of kilobytes to read-ahead for filesystems
|
||||
on this block device.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/rotational
|
||||
Date: January 2009
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RW] This file is used to stat if the device is of rotational
|
||||
type or non-rotational type.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/rq_affinity
|
||||
Date: September 2008
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RW] If this option is '1', the block layer will migrate request
|
||||
completions to the cpu "group" that originally submitted the
|
||||
request. For some workloads this provides a significant
|
||||
reduction in CPU cycles due to caching effects.
|
||||
|
||||
For storage configurations that need to maximize distribution of
|
||||
completion processing setting this option to '2' forces the
|
||||
completion to run on the requesting cpu (bypassing the "group"
|
||||
aggregation logic).
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/scheduler
|
||||
Date: October 2004
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RW] When read, this file will display the current and available
|
||||
IO schedulers for this block device. The currently active IO
|
||||
scheduler will be enclosed in [] brackets. Writing an IO
|
||||
scheduler name to this file will switch control of this block
|
||||
device to that new IO scheduler. Note that writing an IO
|
||||
scheduler name to this file will attempt to load that IO
|
||||
scheduler module, if it isn't already present in the system.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/stable_writes
|
||||
Date: September 2020
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RW] This file will contain '1' if memory must not be modified
|
||||
while it is being used in a write request to this device. When
|
||||
this is the case and the kernel is performing writeback of a
|
||||
page, the kernel will wait for writeback to complete before
|
||||
allowing the page to be modified again, rather than allowing
|
||||
immediate modification as is normally the case. This
|
||||
restriction arises when the device accesses the memory multiple
|
||||
times where the same data must be seen every time -- for
|
||||
example, once to calculate a checksum and once to actually write
|
||||
the data. If no such restriction exists, this file will contain
|
||||
'0'. This file is writable for testing purposes.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/throttle_sample_time
|
||||
Date: March 2017
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RW] This is the time window that blk-throttle samples data, in
|
||||
millisecond. blk-throttle makes decision based on the
|
||||
samplings. Lower time means cgroups have more smooth throughput,
|
||||
but higher CPU overhead. This exists only when
|
||||
CONFIG_BLK_DEV_THROTTLING_LOW is enabled.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/virt_boundary_mask
|
||||
Date: April 2021
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RO] This file shows the I/O segment memory alignment mask for
|
||||
the block device. I/O requests to this device will be split
|
||||
between segments wherever either the memory address of the end
|
||||
of the previous segment or the memory address of the beginning
|
||||
of the current segment is not aligned to virt_boundary_mask + 1
|
||||
bytes.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/wbt_lat_usec
|
||||
Date: November 2016
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RW] If the device is registered for writeback throttling, then
|
||||
this file shows the target minimum read latency. If this latency
|
||||
is exceeded in a given window of time (see wb_window_usec), then
|
||||
the writeback throttling will start scaling back writes. Writing
|
||||
a value of '0' to this file disables the feature. Writing a
|
||||
value of '-1' to this file resets the value to the default
|
||||
setting.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/write_cache
|
||||
Date: April 2016
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RW] When read, this file will display whether the device has
|
||||
write back caching enabled or not. It will return "write back"
|
||||
for the former case, and "write through" for the latter. Writing
|
||||
to this file can change the kernels view of the device, but it
|
||||
doesn't alter the device state. This means that it might not be
|
||||
safe to toggle the setting from "write back" to "write through",
|
||||
since that will also eliminate cache flushes issued by the
|
||||
kernel.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/write_same_max_bytes
|
||||
Date: January 2012
|
||||
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||
Description:
|
||||
[RO] Some devices support a write same operation in which a
|
||||
single data block can be written to a range of several
|
||||
contiguous blocks on storage. This can be used to wipe areas on
|
||||
disk or to initialize drives in a RAID configuration.
|
||||
write_same_max_bytes indicates how many bytes can be written in
|
||||
a single write same command. If write_same_max_bytes is 0, write
|
||||
same is not supported by the device.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/write_zeroes_max_bytes
|
||||
Date: November 2016
|
||||
Contact: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
|
||||
Description:
|
||||
[RO] Devices that support write zeroes operation in which a
|
||||
single request can be issued to zero out the range of contiguous
|
||||
blocks on storage without having any payload in the request.
|
||||
This can be used to optimize writing zeroes to the devices.
|
||||
write_zeroes_max_bytes indicates how many bytes can be written
|
||||
in a single write zeroes command. If write_zeroes_max_bytes is
|
||||
0, write zeroes is not supported by the device.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/zone_append_max_bytes
|
||||
Date: May 2020
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RO] This is the maximum number of bytes that can be written to
|
||||
a sequential zone of a zoned block device using a zone append
|
||||
write operation (REQ_OP_ZONE_APPEND). This value is always 0 for
|
||||
regular block devices.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/zone_write_granularity
|
||||
Date: January 2021
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RO] This indicates the alignment constraint, in bytes, for
|
||||
write operations in sequential zones of zoned block devices
|
||||
(devices with a zoned attributed that reports "host-managed" or
|
||||
"host-aware"). This value is always 0 for regular block devices.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/zoned
|
||||
Date: September 2016
|
||||
Contact: Damien Le Moal <damien.lemoal@wdc.com>
|
||||
Description:
|
||||
[RO] zoned indicates if the device is a zoned block device and
|
||||
the zone model of the device if it is indeed zoned. The
|
||||
possible values indicated by zoned are "none" for regular block
|
||||
devices and "host-aware" or "host-managed" for zoned block
|
||||
devices. The characteristics of host-aware and host-managed
|
||||
zoned block devices are described in the ZBC (Zoned Block
|
||||
Commands) and ZAC (Zoned Device ATA Command Set) standards.
|
||||
These standards also define the "drive-managed" zone model.
|
||||
However, since drive-managed zoned block devices do not support
|
||||
zone commands, they will be treated as regular block devices and
|
||||
zoned will report "none".
|
||||
|
||||
|
||||
What: /sys/block/<disk>/stat
|
||||
Date: February 2008
|
||||
Contact: Jerome Marchand <jmarchan@redhat.com>
|
||||
Description:
|
||||
The /sys/block/<disk>/stat files displays the I/O
|
||||
statistics of disk <disk>. They contain 11 fields:
|
||||
|
||||
== ==============================================
|
||||
1 reads completed successfully
|
||||
2 reads merged
|
||||
3 sectors read
|
||||
4 time spent reading (ms)
|
||||
5 writes completed
|
||||
6 writes merged
|
||||
7 sectors written
|
||||
8 time spent writing (ms)
|
||||
9 I/Os currently in progress
|
||||
10 time spent doing I/Os (ms)
|
||||
11 weighted time spent doing I/Os (ms)
|
||||
12 discards completed
|
||||
13 discards merged
|
||||
14 sectors discarded
|
||||
15 time spent discarding (ms)
|
||||
16 flush requests completed
|
||||
17 time spent flushing (ms)
|
||||
== ==============================================
|
||||
|
||||
For more details refer Documentation/admin-guide/iostats.rst
|
@ -1,346 +0,0 @@
|
||||
What: /sys/block/<disk>/stat
|
||||
Date: February 2008
|
||||
Contact: Jerome Marchand <jmarchan@redhat.com>
|
||||
Description:
|
||||
The /sys/block/<disk>/stat files displays the I/O
|
||||
statistics of disk <disk>. They contain 11 fields:
|
||||
|
||||
== ==============================================
|
||||
1 reads completed successfully
|
||||
2 reads merged
|
||||
3 sectors read
|
||||
4 time spent reading (ms)
|
||||
5 writes completed
|
||||
6 writes merged
|
||||
7 sectors written
|
||||
8 time spent writing (ms)
|
||||
9 I/Os currently in progress
|
||||
10 time spent doing I/Os (ms)
|
||||
11 weighted time spent doing I/Os (ms)
|
||||
12 discards completed
|
||||
13 discards merged
|
||||
14 sectors discarded
|
||||
15 time spent discarding (ms)
|
||||
16 flush requests completed
|
||||
17 time spent flushing (ms)
|
||||
== ==============================================
|
||||
|
||||
For more details refer Documentation/admin-guide/iostats.rst
|
||||
|
||||
|
||||
What: /sys/block/<disk>/inflight
|
||||
Date: October 2009
|
||||
Contact: Jens Axboe <axboe@kernel.dk>, Nikanth Karthikesan <knikanth@suse.de>
|
||||
Description:
|
||||
Reports the number of I/O requests currently in progress
|
||||
(pending / in flight) in a device driver. This can be less
|
||||
than the number of requests queued in the block device queue.
|
||||
The report contains 2 fields: one for read requests
|
||||
and one for write requests.
|
||||
The value type is unsigned int.
|
||||
Cf. Documentation/block/stat.rst which contains a single value for
|
||||
requests in flight.
|
||||
This is related to nr_requests in Documentation/block/queue-sysfs.rst
|
||||
and for SCSI device also its queue_depth.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/diskseq
|
||||
Date: February 2021
|
||||
Contact: Matteo Croce <mcroce@microsoft.com>
|
||||
Description:
|
||||
The /sys/block/<disk>/diskseq files reports the disk
|
||||
sequence number, which is a monotonically increasing
|
||||
number assigned to every drive.
|
||||
Some devices, like the loop device, refresh such number
|
||||
every time the backing file is changed.
|
||||
The value type is 64 bit unsigned.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/<part>/stat
|
||||
Date: February 2008
|
||||
Contact: Jerome Marchand <jmarchan@redhat.com>
|
||||
Description:
|
||||
The /sys/block/<disk>/<part>/stat files display the
|
||||
I/O statistics of partition <part>. The format is the
|
||||
same as the above-written /sys/block/<disk>/stat
|
||||
format.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/integrity/format
|
||||
Date: June 2008
|
||||
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||
Description:
|
||||
Metadata format for integrity capable block device.
|
||||
E.g. T10-DIF-TYPE1-CRC.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/integrity/read_verify
|
||||
Date: June 2008
|
||||
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||
Description:
|
||||
Indicates whether the block layer should verify the
|
||||
integrity of read requests serviced by devices that
|
||||
support sending integrity metadata.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/integrity/tag_size
|
||||
Date: June 2008
|
||||
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||
Description:
|
||||
Number of bytes of integrity tag space available per
|
||||
512 bytes of data.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/integrity/device_is_integrity_capable
|
||||
Date: July 2014
|
||||
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||
Description:
|
||||
Indicates whether a storage device is capable of storing
|
||||
integrity metadata. Set if the device is T10 PI-capable.
|
||||
|
||||
What: /sys/block/<disk>/integrity/protection_interval_bytes
|
||||
Date: July 2015
|
||||
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||
Description:
|
||||
Describes the number of data bytes which are protected
|
||||
by one integrity tuple. Typically the device's logical
|
||||
block size.
|
||||
|
||||
What: /sys/block/<disk>/integrity/write_generate
|
||||
Date: June 2008
|
||||
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||
Description:
|
||||
Indicates whether the block layer should automatically
|
||||
generate checksums for write requests bound for
|
||||
devices that support receiving integrity metadata.
|
||||
|
||||
What: /sys/block/<disk>/alignment_offset
|
||||
Date: April 2009
|
||||
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||
Description:
|
||||
Storage devices may report a physical block size that is
|
||||
bigger than the logical block size (for instance a drive
|
||||
with 4KB physical sectors exposing 512-byte logical
|
||||
blocks to the operating system). This parameter
|
||||
indicates how many bytes the beginning of the device is
|
||||
offset from the disk's natural alignment.
|
||||
|
||||
What: /sys/block/<disk>/<partition>/alignment_offset
|
||||
Date: April 2009
|
||||
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||
Description:
|
||||
Storage devices may report a physical block size that is
|
||||
bigger than the logical block size (for instance a drive
|
||||
with 4KB physical sectors exposing 512-byte logical
|
||||
blocks to the operating system). This parameter
|
||||
indicates how many bytes the beginning of the partition
|
||||
is offset from the disk's natural alignment.
|
||||
|
||||
What: /sys/block/<disk>/queue/logical_block_size
|
||||
Date: May 2009
|
||||
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||
Description:
|
||||
This is the smallest unit the storage device can
|
||||
address. It is typically 512 bytes.
|
||||
|
||||
What: /sys/block/<disk>/queue/physical_block_size
|
||||
Date: May 2009
|
||||
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||
Description:
|
||||
This is the smallest unit a physical storage device can
|
||||
write atomically. It is usually the same as the logical
|
||||
block size but may be bigger. One example is SATA
|
||||
drives with 4KB sectors that expose a 512-byte logical
|
||||
block size to the operating system. For stacked block
|
||||
devices the physical_block_size variable contains the
|
||||
maximum physical_block_size of the component devices.
|
||||
|
||||
What: /sys/block/<disk>/queue/minimum_io_size
|
||||
Date: April 2009
|
||||
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||
Description:
|
||||
Storage devices may report a granularity or preferred
|
||||
minimum I/O size which is the smallest request the
|
||||
device can perform without incurring a performance
|
||||
penalty. For disk drives this is often the physical
|
||||
block size. For RAID arrays it is often the stripe
|
||||
chunk size. A properly aligned multiple of
|
||||
minimum_io_size is the preferred request size for
|
||||
workloads where a high number of I/O operations is
|
||||
desired.
|
||||
|
||||
What: /sys/block/<disk>/queue/optimal_io_size
|
||||
Date: April 2009
|
||||
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||
Description:
|
||||
Storage devices may report an optimal I/O size, which is
|
||||
the device's preferred unit for sustained I/O. This is
|
||||
rarely reported for disk drives. For RAID arrays it is
|
||||
usually the stripe width or the internal track size. A
|
||||
properly aligned multiple of optimal_io_size is the
|
||||
preferred request size for workloads where sustained
|
||||
throughput is desired. If no optimal I/O size is
|
||||
reported this file contains 0.
|
||||
|
||||
What: /sys/block/<disk>/queue/nomerges
|
||||
Date: January 2010
|
||||
Contact:
|
||||
Description:
|
||||
Standard I/O elevator operations include attempts to
|
||||
merge contiguous I/Os. For known random I/O loads these
|
||||
attempts will always fail and result in extra cycles
|
||||
being spent in the kernel. This allows one to turn off
|
||||
this behavior on one of two ways: When set to 1, complex
|
||||
merge checks are disabled, but the simple one-shot merges
|
||||
with the previous I/O request are enabled. When set to 2,
|
||||
all merge tries are disabled. The default value is 0 -
|
||||
which enables all types of merge tries.
|
||||
|
||||
What: /sys/block/<disk>/discard_alignment
|
||||
Date: May 2011
|
||||
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||
Description:
|
||||
Devices that support discard functionality may
|
||||
internally allocate space in units that are bigger than
|
||||
the exported logical block size. The discard_alignment
|
||||
parameter indicates how many bytes the beginning of the
|
||||
device is offset from the internal allocation unit's
|
||||
natural alignment.
|
||||
|
||||
What: /sys/block/<disk>/<partition>/discard_alignment
|
||||
Date: May 2011
|
||||
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||
Description:
|
||||
Devices that support discard functionality may
|
||||
internally allocate space in units that are bigger than
|
||||
the exported logical block size. The discard_alignment
|
||||
parameter indicates how many bytes the beginning of the
|
||||
partition is offset from the internal allocation unit's
|
||||
natural alignment.
|
||||
|
||||
What: /sys/block/<disk>/queue/discard_granularity
|
||||
Date: May 2011
|
||||
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||
Description:
|
||||
Devices that support discard functionality may
|
||||
internally allocate space using units that are bigger
|
||||
than the logical block size. The discard_granularity
|
||||
parameter indicates the size of the internal allocation
|
||||
unit in bytes if reported by the device. Otherwise the
|
||||
discard_granularity will be set to match the device's
|
||||
physical block size. A discard_granularity of 0 means
|
||||
that the device does not support discard functionality.
|
||||
|
||||
What: /sys/block/<disk>/queue/discard_max_bytes
|
||||
Date: May 2011
|
||||
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||
Description:
|
||||
Devices that support discard functionality may have
|
||||
internal limits on the number of bytes that can be
|
||||
trimmed or unmapped in a single operation. Some storage
|
||||
protocols also have inherent limits on the number of
|
||||
blocks that can be described in a single command. The
|
||||
discard_max_bytes parameter is set by the device driver
|
||||
to the maximum number of bytes that can be discarded in
|
||||
a single operation. Discard requests issued to the
|
||||
device must not exceed this limit. A discard_max_bytes
|
||||
value of 0 means that the device does not support
|
||||
discard functionality.
|
||||
|
||||
What: /sys/block/<disk>/queue/discard_zeroes_data
|
||||
Date: May 2011
|
||||
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||
Description:
|
||||
Will always return 0. Don't rely on any specific behavior
|
||||
for discards, and don't read this file.
|
||||
|
||||
What: /sys/block/<disk>/queue/write_same_max_bytes
|
||||
Date: January 2012
|
||||
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||
Description:
|
||||
Some devices support a write same operation in which a
|
||||
single data block can be written to a range of several
|
||||
contiguous blocks on storage. This can be used to wipe
|
||||
areas on disk or to initialize drives in a RAID
|
||||
configuration. write_same_max_bytes indicates how many
|
||||
bytes can be written in a single write same command. If
|
||||
write_same_max_bytes is 0, write same is not supported
|
||||
by the device.
|
||||
|
||||
What: /sys/block/<disk>/queue/write_zeroes_max_bytes
|
||||
Date: November 2016
|
||||
Contact: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
|
||||
Description:
|
||||
Devices that support write zeroes operation in which a
|
||||
single request can be issued to zero out the range of
|
||||
contiguous blocks on storage without having any payload
|
||||
in the request. This can be used to optimize writing zeroes
|
||||
to the devices. write_zeroes_max_bytes indicates how many
|
||||
bytes can be written in a single write zeroes command. If
|
||||
write_zeroes_max_bytes is 0, write zeroes is not supported
|
||||
by the device.
|
||||
|
||||
What: /sys/block/<disk>/queue/zoned
|
||||
Date: September 2016
|
||||
Contact: Damien Le Moal <damien.lemoal@wdc.com>
|
||||
Description:
|
||||
zoned indicates if the device is a zoned block device
|
||||
and the zone model of the device if it is indeed zoned.
|
||||
The possible values indicated by zoned are "none" for
|
||||
regular block devices and "host-aware" or "host-managed"
|
||||
for zoned block devices. The characteristics of
|
||||
host-aware and host-managed zoned block devices are
|
||||
described in the ZBC (Zoned Block Commands) and ZAC
|
||||
(Zoned Device ATA Command Set) standards. These standards
|
||||
also define the "drive-managed" zone model. However,
|
||||
since drive-managed zoned block devices do not support
|
||||
zone commands, they will be treated as regular block
|
||||
devices and zoned will report "none".
|
||||
|
||||
What: /sys/block/<disk>/queue/nr_zones
|
||||
Date: November 2018
|
||||
Contact: Damien Le Moal <damien.lemoal@wdc.com>
|
||||
Description:
|
||||
nr_zones indicates the total number of zones of a zoned block
|
||||
device ("host-aware" or "host-managed" zone model). For regular
|
||||
block devices, the value is always 0.
|
||||
|
||||
What: /sys/block/<disk>/queue/max_active_zones
|
||||
Date: July 2020
|
||||
Contact: Niklas Cassel <niklas.cassel@wdc.com>
|
||||
Description:
|
||||
For zoned block devices (zoned attribute indicating
|
||||
"host-managed" or "host-aware"), the sum of zones belonging to
|
||||
any of the zone states: EXPLICIT OPEN, IMPLICIT OPEN or CLOSED,
|
||||
is limited by this value. If this value is 0, there is no limit.
|
||||
|
||||
What: /sys/block/<disk>/queue/max_open_zones
|
||||
Date: July 2020
|
||||
Contact: Niklas Cassel <niklas.cassel@wdc.com>
|
||||
Description:
|
||||
For zoned block devices (zoned attribute indicating
|
||||
"host-managed" or "host-aware"), the sum of zones belonging to
|
||||
any of the zone states: EXPLICIT OPEN or IMPLICIT OPEN,
|
||||
is limited by this value. If this value is 0, there is no limit.
|
||||
|
||||
What: /sys/block/<disk>/queue/chunk_sectors
|
||||
Date: September 2016
|
||||
Contact: Hannes Reinecke <hare@suse.com>
|
||||
Description:
|
||||
chunk_sectors has different meaning depending on the type
|
||||
of the disk. For a RAID device (dm-raid), chunk_sectors
|
||||
indicates the size in 512B sectors of the RAID volume
|
||||
stripe segment. For a zoned block device, either
|
||||
host-aware or host-managed, chunk_sectors indicates the
|
||||
size in 512B sectors of the zones of the device, with
|
||||
the eventual exception of the last zone of the device
|
||||
which may be smaller.
|
||||
|
||||
What: /sys/block/<disk>/queue/io_timeout
|
||||
Date: November 2018
|
||||
Contact: Weiping Zhang <zhangweiping@didiglobal.com>
|
||||
Description:
|
||||
io_timeout is the request timeout in milliseconds. If a request
|
||||
does not complete in this time then the block driver timeout
|
||||
handler is invoked. That timeout handler can decide to retry
|
||||
the request, to fail it or to start a device recovery strategy.
|
@ -20,7 +20,6 @@ Block
|
||||
kyber-iosched
|
||||
null_blk
|
||||
pr
|
||||
queue-sysfs
|
||||
request
|
||||
stat
|
||||
switching-sched
|
||||
|
@ -1,321 +0,0 @@
|
||||
=================
|
||||
Queue sysfs files
|
||||
=================
|
||||
|
||||
This text file will detail the queue files that are located in the sysfs tree
|
||||
for each block device. Note that stacked devices typically do not export
|
||||
any settings, since their queue merely functions as a remapping target.
|
||||
These files are the ones found in the /sys/block/xxx/queue/ directory.
|
||||
|
||||
Files denoted with a RO postfix are readonly and the RW postfix means
|
||||
read-write.
|
||||
|
||||
add_random (RW)
|
||||
---------------
|
||||
This file allows to turn off the disk entropy contribution. Default
|
||||
value of this file is '1'(on).
|
||||
|
||||
chunk_sectors (RO)
|
||||
------------------
|
||||
This has different meaning depending on the type of the block device.
|
||||
For a RAID device (dm-raid), chunk_sectors indicates the size in 512B sectors
|
||||
of the RAID volume stripe segment. For a zoned block device, either host-aware
|
||||
or host-managed, chunk_sectors indicates the size in 512B sectors of the zones
|
||||
of the device, with the eventual exception of the last zone of the device which
|
||||
may be smaller.
|
||||
|
||||
dax (RO)
|
||||
--------
|
||||
This file indicates whether the device supports Direct Access (DAX),
|
||||
used by CPU-addressable storage to bypass the pagecache. It shows '1'
|
||||
if true, '0' if not.
|
||||
|
||||
discard_granularity (RO)
|
||||
------------------------
|
||||
This shows the size of internal allocation of the device in bytes, if
|
||||
reported by the device. A value of '0' means device does not support
|
||||
the discard functionality.
|
||||
|
||||
discard_max_hw_bytes (RO)
|
||||
-------------------------
|
||||
Devices that support discard functionality may have internal limits on
|
||||
the number of bytes that can be trimmed or unmapped in a single operation.
|
||||
The `discard_max_hw_bytes` parameter is set by the device driver to the
|
||||
maximum number of bytes that can be discarded in a single operation.
|
||||
Discard requests issued to the device must not exceed this limit.
|
||||
A `discard_max_hw_bytes` value of 0 means that the device does not support
|
||||
discard functionality.
|
||||
|
||||
discard_max_bytes (RW)
|
||||
----------------------
|
||||
While discard_max_hw_bytes is the hardware limit for the device, this
|
||||
setting is the software limit. Some devices exhibit large latencies when
|
||||
large discards are issued, setting this value lower will make Linux issue
|
||||
smaller discards and potentially help reduce latencies induced by large
|
||||
discard operations.
|
||||
|
||||
discard_zeroes_data (RO)
|
||||
------------------------
|
||||
Obsolete. Always zero.
|
||||
|
||||
fua (RO)
|
||||
--------
|
||||
Whether or not the block driver supports the FUA flag for write requests.
|
||||
FUA stands for Force Unit Access. If the FUA flag is set that means that
|
||||
write requests must bypass the volatile cache of the storage device.
|
||||
|
||||
hw_sector_size (RO)
|
||||
-------------------
|
||||
This is the hardware sector size of the device, in bytes.
|
||||
|
||||
io_poll (RW)
|
||||
------------
|
||||
When read, this file shows whether polling is enabled (1) or disabled
|
||||
(0). Writing '0' to this file will disable polling for this device.
|
||||
Writing any non-zero value will enable this feature.
|
||||
|
||||
io_poll_delay (RW)
|
||||
------------------
|
||||
If polling is enabled, this controls what kind of polling will be
|
||||
performed. It defaults to -1, which is classic polling. In this mode,
|
||||
the CPU will repeatedly ask for completions without giving up any time.
|
||||
If set to 0, a hybrid polling mode is used, where the kernel will attempt
|
||||
to make an educated guess at when the IO will complete. Based on this
|
||||
guess, the kernel will put the process issuing IO to sleep for an amount
|
||||
of time, before entering a classic poll loop. This mode might be a
|
||||
little slower than pure classic polling, but it will be more efficient.
|
||||
If set to a value larger than 0, the kernel will put the process issuing
|
||||
IO to sleep for this amount of microseconds before entering classic
|
||||
polling.
|
||||
|
||||
io_timeout (RW)
|
||||
---------------
|
||||
io_timeout is the request timeout in milliseconds. If a request does not
|
||||
complete in this time then the block driver timeout handler is invoked.
|
||||
That timeout handler can decide to retry the request, to fail it or to start
|
||||
a device recovery strategy.
|
||||
|
||||
iostats (RW)
|
||||
-------------
|
||||
This file is used to control (on/off) the iostats accounting of the
|
||||
disk.
|
||||
|
||||
logical_block_size (RO)
|
||||
-----------------------
|
||||
This is the logical block size of the device, in bytes.
|
||||
|
||||
max_discard_segments (RO)
|
||||
-------------------------
|
||||
The maximum number of DMA scatter/gather entries in a discard request.
|
||||
|
||||
max_hw_sectors_kb (RO)
|
||||
----------------------
|
||||
This is the maximum number of kilobytes supported in a single data transfer.
|
||||
|
||||
max_integrity_segments (RO)
|
||||
---------------------------
|
||||
Maximum number of elements in a DMA scatter/gather list with integrity
|
||||
data that will be submitted by the block layer core to the associated
|
||||
block driver.
|
||||
|
||||
max_active_zones (RO)
|
||||
---------------------
|
||||
For zoned block devices (zoned attribute indicating "host-managed" or
|
||||
"host-aware"), the sum of zones belonging to any of the zone states:
|
||||
EXPLICIT OPEN, IMPLICIT OPEN or CLOSED, is limited by this value.
|
||||
If this value is 0, there is no limit.
|
||||
|
||||
If the host attempts to exceed this limit, the driver should report this error
|
||||
with BLK_STS_ZONE_ACTIVE_RESOURCE, which user space may see as the EOVERFLOW
|
||||
errno.
|
||||
|
||||
max_open_zones (RO)
|
||||
-------------------
|
||||
For zoned block devices (zoned attribute indicating "host-managed" or
|
||||
"host-aware"), the sum of zones belonging to any of the zone states:
|
||||
EXPLICIT OPEN or IMPLICIT OPEN, is limited by this value.
|
||||
If this value is 0, there is no limit.
|
||||
|
||||
If the host attempts to exceed this limit, the driver should report this error
|
||||
with BLK_STS_ZONE_OPEN_RESOURCE, which user space may see as the ETOOMANYREFS
|
||||
errno.
|
||||
|
||||
max_sectors_kb (RW)
|
||||
-------------------
|
||||
This is the maximum number of kilobytes that the block layer will allow
|
||||
for a filesystem request. Must be smaller than or equal to the maximum
|
||||
size allowed by the hardware.
|
||||
|
||||
max_segments (RO)
|
||||
-----------------
|
||||
Maximum number of elements in a DMA scatter/gather list that is submitted
|
||||
to the associated block driver.
|
||||
|
||||
max_segment_size (RO)
|
||||
---------------------
|
||||
Maximum size in bytes of a single element in a DMA scatter/gather list.
|
||||
|
||||
minimum_io_size (RO)
|
||||
--------------------
|
||||
This is the smallest preferred IO size reported by the device.
|
||||
|
||||
nomerges (RW)
|
||||
-------------
|
||||
This enables the user to disable the lookup logic involved with IO
|
||||
merging requests in the block layer. By default (0) all merges are
|
||||
enabled. When set to 1 only simple one-hit merges will be tried. When
|
||||
set to 2 no merge algorithms will be tried (including one-hit or more
|
||||
complex tree/hash lookups).
|
||||
|
||||
nr_requests (RW)
|
||||
----------------
|
||||
This controls how many requests may be allocated in the block layer for
|
||||
read or write requests. Note that the total allocated number may be twice
|
||||
this amount, since it applies only to reads or writes (not the accumulated
|
||||
sum).
|
||||
|
||||
To avoid priority inversion through request starvation, a request
|
||||
queue maintains a separate request pool per each cgroup when
|
||||
CONFIG_BLK_CGROUP is enabled, and this parameter applies to each such
|
||||
per-block-cgroup request pool. IOW, if there are N block cgroups,
|
||||
each request queue may have up to N request pools, each independently
|
||||
regulated by nr_requests.
|
||||
|
||||
nr_zones (RO)
|
||||
-------------
|
||||
For zoned block devices (zoned attribute indicating "host-managed" or
|
||||
"host-aware"), this indicates the total number of zones of the device.
|
||||
This is always 0 for regular block devices.
|
||||
|
||||
optimal_io_size (RO)
|
||||
--------------------
|
||||
This is the optimal IO size reported by the device.
|
||||
|
||||
physical_block_size (RO)
|
||||
------------------------
|
||||
This is the physical block size of device, in bytes.
|
||||
|
||||
read_ahead_kb (RW)
|
||||
------------------
|
||||
Maximum number of kilobytes to read-ahead for filesystems on this block
|
||||
device.
|
||||
|
||||
rotational (RW)
|
||||
---------------
|
||||
This file is used to stat if the device is of rotational type or
|
||||
non-rotational type.
|
||||
|
||||
rq_affinity (RW)
|
||||
----------------
|
||||
If this option is '1', the block layer will migrate request completions to the
|
||||
cpu "group" that originally submitted the request. For some workloads this
|
||||
provides a significant reduction in CPU cycles due to caching effects.
|
||||
|
||||
For storage configurations that need to maximize distribution of completion
|
||||
processing setting this option to '2' forces the completion to run on the
|
||||
requesting cpu (bypassing the "group" aggregation logic).
|
||||
|
||||
scheduler (RW)
|
||||
--------------
|
||||
When read, this file will display the current and available IO schedulers
|
||||
for this block device. The currently active IO scheduler will be enclosed
|
||||
in [] brackets. Writing an IO scheduler name to this file will switch
|
||||
control of this block device to that new IO scheduler. Note that writing
|
||||
an IO scheduler name to this file will attempt to load that IO scheduler
|
||||
module, if it isn't already present in the system.
|
||||
|
||||
write_cache (RW)
|
||||
----------------
|
||||
When read, this file will display whether the device has write back
|
||||
caching enabled or not. It will return "write back" for the former
|
||||
case, and "write through" for the latter. Writing to this file can
|
||||
change the kernels view of the device, but it doesn't alter the
|
||||
device state. This means that it might not be safe to toggle the
|
||||
setting from "write back" to "write through", since that will also
|
||||
eliminate cache flushes issued by the kernel.
|
||||
|
||||
write_same_max_bytes (RO)
|
||||
-------------------------
|
||||
This is the number of bytes the device can write in a single write-same
|
||||
command. A value of '0' means write-same is not supported by this
|
||||
device.
|
||||
|
||||
wbt_lat_usec (RW)
|
||||
-----------------
|
||||
If the device is registered for writeback throttling, then this file shows
|
||||
the target minimum read latency. If this latency is exceeded in a given
|
||||
window of time (see wb_window_usec), then the writeback throttling will start
|
||||
scaling back writes. Writing a value of '0' to this file disables the
|
||||
feature. Writing a value of '-1' to this file resets the value to the
|
||||
default setting.
|
||||
|
||||
throttle_sample_time (RW)
|
||||
-------------------------
|
||||
This is the time window that blk-throttle samples data, in millisecond.
|
||||
blk-throttle makes decision based on the samplings. Lower time means cgroups
|
||||
have more smooth throughput, but higher CPU overhead. This exists only when
|
||||
CONFIG_BLK_DEV_THROTTLING_LOW is enabled.
|
||||
|
||||
write_zeroes_max_bytes (RO)
|
||||
---------------------------
|
||||
For block drivers that support REQ_OP_WRITE_ZEROES, the maximum number of
|
||||
bytes that can be zeroed at once. The value 0 means that REQ_OP_WRITE_ZEROES
|
||||
is not supported.
|
||||
|
||||
zone_append_max_bytes (RO)
|
||||
--------------------------
|
||||
This is the maximum number of bytes that can be written to a sequential
|
||||
zone of a zoned block device using a zone append write operation
|
||||
(REQ_OP_ZONE_APPEND). This value is always 0 for regular block devices.
|
||||
|
||||
zoned (RO)
|
||||
----------
|
||||
This indicates if the device is a zoned block device and the zone model of the
|
||||
device if it is indeed zoned. The possible values indicated by zoned are
|
||||
"none" for regular block devices and "host-aware" or "host-managed" for zoned
|
||||
block devices. The characteristics of host-aware and host-managed zoned block
|
||||
devices are described in the ZBC (Zoned Block Commands) and ZAC
|
||||
(Zoned Device ATA Command Set) standards. These standards also define the
|
||||
"drive-managed" zone model. However, since drive-managed zoned block devices
|
||||
do not support zone commands, they will be treated as regular block devices
|
||||
and zoned will report "none".
|
||||
|
||||
zone_write_granularity (RO)
|
||||
---------------------------
|
||||
This indicates the alignment constraint, in bytes, for write operations in
|
||||
sequential zones of zoned block devices (devices with a zoned attributed
|
||||
that reports "host-managed" or "host-aware"). This value is always 0 for
|
||||
regular block devices.
|
||||
|
||||
independent_access_ranges (RO)
|
||||
------------------------------
|
||||
|
||||
The presence of this sub-directory of the /sys/block/xxx/queue/ directory
|
||||
indicates that the device is capable of executing requests targeting
|
||||
different sector ranges in parallel. For instance, single LUN multi-actuator
|
||||
hard-disks will have an independent_access_ranges directory if the device
|
||||
correctly advertizes the sector ranges of its actuators.
|
||||
|
||||
The independent_access_ranges directory contains one directory per access
|
||||
range, with each range described using the sector (RO) attribute file to
|
||||
indicate the first sector of the range and the nr_sectors (RO) attribute file
|
||||
to indicate the total number of sectors in the range starting from the first
|
||||
sector of the range. For example, a dual-actuator hard-disk will have the
|
||||
following independent_access_ranges entries.::
|
||||
|
||||
$ tree /sys/block/<device>/queue/independent_access_ranges/
|
||||
/sys/block/<device>/queue/independent_access_ranges/
|
||||
|-- 0
|
||||
| |-- nr_sectors
|
||||
| `-- sector
|
||||
`-- 1
|
||||
|-- nr_sectors
|
||||
`-- sector
|
||||
|
||||
The sector and nr_sectors attributes use 512B sector unit, regardless of
|
||||
the actual block size of the device. Independent access ranges do not
|
||||
overlap and include all sectors within the device capacity. The access
|
||||
ranges are numbered in increasing order of the range start sector,
|
||||
that is, the sector attribute of range 0 always has the value 0.
|
||||
|
||||
Jens Axboe <jens.axboe@oracle.com>, February 2009
|
@ -294,9 +294,6 @@ Block Devices
|
||||
.. kernel-doc:: block/blk-settings.c
|
||||
:export:
|
||||
|
||||
.. kernel-doc:: block/blk-exec.c
|
||||
:export:
|
||||
|
||||
.. kernel-doc:: block/blk-flush.c
|
||||
:export:
|
||||
|
||||
|
@ -292,8 +292,6 @@ block/blk-sysfs.c
|
||||
|
||||
block/blk-settings.c
|
||||
|
||||
block/blk-exec.c
|
||||
|
||||
block/blk-flush.c
|
||||
|
||||
block/blk-lib.c
|
||||
|
@ -3416,6 +3416,8 @@ M: Jens Axboe <axboe@kernel.dk>
|
||||
L: linux-block@vger.kernel.org
|
||||
S: Maintained
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git
|
||||
F: Documentation/ABI/stable/sysfs-block
|
||||
F: Documentation/block/
|
||||
F: block/
|
||||
F: drivers/block/
|
||||
F: include/linux/blk*
|
||||
|
@ -35,6 +35,9 @@ config BLK_CGROUP_RWSTAT
|
||||
config BLK_DEV_BSG_COMMON
|
||||
tristate
|
||||
|
||||
config BLK_ICQ
|
||||
bool
|
||||
|
||||
config BLK_DEV_BSGLIB
|
||||
bool "Block layer SG support v4 helper lib"
|
||||
select BLK_DEV_BSG_COMMON
|
||||
|
@ -18,6 +18,7 @@ config MQ_IOSCHED_KYBER
|
||||
|
||||
config IOSCHED_BFQ
|
||||
tristate "BFQ I/O scheduler"
|
||||
select BLK_ICQ
|
||||
help
|
||||
BFQ I/O scheduler for BLK-MQ. BFQ distributes the bandwidth of
|
||||
of the device among all processes according to their weights,
|
||||
|
@ -5,7 +5,7 @@
|
||||
|
||||
obj-y := bdev.o fops.o bio.o elevator.o blk-core.o blk-sysfs.o \
|
||||
blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
|
||||
blk-exec.o blk-merge.o blk-timeout.o \
|
||||
blk-merge.o blk-timeout.o \
|
||||
blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
|
||||
blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \
|
||||
genhd.o ioprio.o badblocks.o partitions/ blk-rq-qos.o \
|
||||
|
24
block/bdev.c
24
block/bdev.c
@ -665,7 +665,7 @@ static void blkdev_flush_mapping(struct block_device *bdev)
|
||||
static int blkdev_get_whole(struct block_device *bdev, fmode_t mode)
|
||||
{
|
||||
struct gendisk *disk = bdev->bd_disk;
|
||||
int ret = 0;
|
||||
int ret;
|
||||
|
||||
if (disk->fops->open) {
|
||||
ret = disk->fops->open(bdev, mode);
|
||||
@ -750,14 +750,6 @@ struct block_device *blkdev_get_no_open(dev_t dev)
|
||||
if (!kobject_get_unless_zero(&bdev->bd_device.kobj))
|
||||
bdev = NULL;
|
||||
iput(inode);
|
||||
|
||||
if (!bdev)
|
||||
return NULL;
|
||||
if ((bdev->bd_disk->flags & GENHD_FL_HIDDEN)) {
|
||||
put_device(&bdev->bd_device);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return bdev;
|
||||
}
|
||||
|
||||
@ -837,7 +829,7 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
|
||||
* used in blkdev_get/put().
|
||||
*/
|
||||
if ((mode & FMODE_WRITE) && !bdev->bd_write_holder &&
|
||||
(disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) {
|
||||
(disk->event_flags & DISK_EVENT_FLAG_BLOCK_ON_EXCL_WRITE)) {
|
||||
bdev->bd_write_holder = true;
|
||||
unblock_events = false;
|
||||
}
|
||||
@ -963,15 +955,15 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
|
||||
EXPORT_SYMBOL(blkdev_put);
|
||||
|
||||
/**
|
||||
* lookup_bdev - lookup a struct block_device by name
|
||||
* @pathname: special file representing the block device
|
||||
* @dev: return value of the block device's dev_t
|
||||
* lookup_bdev() - Look up a struct block_device by name.
|
||||
* @pathname: Name of the block device in the filesystem.
|
||||
* @dev: Pointer to the block device's dev_t, if found.
|
||||
*
|
||||
* Lookup the block device's dev_t at @pathname in the current
|
||||
* namespace if possible and return it by @dev.
|
||||
* namespace if possible and return it in @dev.
|
||||
*
|
||||
* RETURNS:
|
||||
* 0 if succeeded, errno otherwise.
|
||||
* Context: May sleep.
|
||||
* Return: 0 if succeeded, negative errno otherwise.
|
||||
*/
|
||||
int lookup_bdev(const char *pathname, dev_t *dev)
|
||||
{
|
||||
|
@ -433,26 +433,21 @@ static struct bfq_io_cq *icq_to_bic(struct io_cq *icq)
|
||||
|
||||
/**
|
||||
* bfq_bic_lookup - search into @ioc a bic associated to @bfqd.
|
||||
* @bfqd: the lookup key.
|
||||
* @ioc: the io_context of the process doing I/O.
|
||||
* @q: the request queue.
|
||||
*/
|
||||
static struct bfq_io_cq *bfq_bic_lookup(struct bfq_data *bfqd,
|
||||
struct io_context *ioc,
|
||||
struct request_queue *q)
|
||||
static struct bfq_io_cq *bfq_bic_lookup(struct request_queue *q)
|
||||
{
|
||||
if (ioc) {
|
||||
unsigned long flags;
|
||||
struct bfq_io_cq *icq;
|
||||
struct bfq_io_cq *icq;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&q->queue_lock, flags);
|
||||
icq = icq_to_bic(ioc_lookup_icq(ioc, q));
|
||||
spin_unlock_irqrestore(&q->queue_lock, flags);
|
||||
if (!current->io_context)
|
||||
return NULL;
|
||||
|
||||
return icq;
|
||||
}
|
||||
spin_lock_irqsave(&q->queue_lock, flags);
|
||||
icq = icq_to_bic(ioc_lookup_icq(q));
|
||||
spin_unlock_irqrestore(&q->queue_lock, flags);
|
||||
|
||||
return NULL;
|
||||
return icq;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -565,26 +560,134 @@ static struct request *bfq_choose_req(struct bfq_data *bfqd,
|
||||
}
|
||||
}
|
||||
|
||||
#define BFQ_LIMIT_INLINE_DEPTH 16
|
||||
|
||||
#ifdef CONFIG_BFQ_GROUP_IOSCHED
|
||||
static bool bfqq_request_over_limit(struct bfq_queue *bfqq, int limit)
|
||||
{
|
||||
struct bfq_data *bfqd = bfqq->bfqd;
|
||||
struct bfq_entity *entity = &bfqq->entity;
|
||||
struct bfq_entity *inline_entities[BFQ_LIMIT_INLINE_DEPTH];
|
||||
struct bfq_entity **entities = inline_entities;
|
||||
int depth, level;
|
||||
int class_idx = bfqq->ioprio_class - 1;
|
||||
struct bfq_sched_data *sched_data;
|
||||
unsigned long wsum;
|
||||
bool ret = false;
|
||||
|
||||
if (!entity->on_st_or_in_serv)
|
||||
return false;
|
||||
|
||||
/* +1 for bfqq entity, root cgroup not included */
|
||||
depth = bfqg_to_blkg(bfqq_group(bfqq))->blkcg->css.cgroup->level + 1;
|
||||
if (depth > BFQ_LIMIT_INLINE_DEPTH) {
|
||||
entities = kmalloc_array(depth, sizeof(*entities), GFP_NOIO);
|
||||
if (!entities)
|
||||
return false;
|
||||
}
|
||||
|
||||
spin_lock_irq(&bfqd->lock);
|
||||
sched_data = entity->sched_data;
|
||||
/* Gather our ancestors as we need to traverse them in reverse order */
|
||||
level = 0;
|
||||
for_each_entity(entity) {
|
||||
/*
|
||||
* If at some level entity is not even active, allow request
|
||||
* queueing so that BFQ knows there's work to do and activate
|
||||
* entities.
|
||||
*/
|
||||
if (!entity->on_st_or_in_serv)
|
||||
goto out;
|
||||
/* Uh, more parents than cgroup subsystem thinks? */
|
||||
if (WARN_ON_ONCE(level >= depth))
|
||||
break;
|
||||
entities[level++] = entity;
|
||||
}
|
||||
WARN_ON_ONCE(level != depth);
|
||||
for (level--; level >= 0; level--) {
|
||||
entity = entities[level];
|
||||
if (level > 0) {
|
||||
wsum = bfq_entity_service_tree(entity)->wsum;
|
||||
} else {
|
||||
int i;
|
||||
/*
|
||||
* For bfqq itself we take into account service trees
|
||||
* of all higher priority classes and multiply their
|
||||
* weights so that low prio queue from higher class
|
||||
* gets more requests than high prio queue from lower
|
||||
* class.
|
||||
*/
|
||||
wsum = 0;
|
||||
for (i = 0; i <= class_idx; i++) {
|
||||
wsum = wsum * IOPRIO_BE_NR +
|
||||
sched_data->service_tree[i].wsum;
|
||||
}
|
||||
}
|
||||
limit = DIV_ROUND_CLOSEST(limit * entity->weight, wsum);
|
||||
if (entity->allocated >= limit) {
|
||||
bfq_log_bfqq(bfqq->bfqd, bfqq,
|
||||
"too many requests: allocated %d limit %d level %d",
|
||||
entity->allocated, limit, level);
|
||||
ret = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
out:
|
||||
spin_unlock_irq(&bfqd->lock);
|
||||
if (entities != inline_entities)
|
||||
kfree(entities);
|
||||
return ret;
|
||||
}
|
||||
#else
|
||||
static bool bfqq_request_over_limit(struct bfq_queue *bfqq, int limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Async I/O can easily starve sync I/O (both sync reads and sync
|
||||
* writes), by consuming all tags. Similarly, storms of sync writes,
|
||||
* such as those that sync(2) may trigger, can starve sync reads.
|
||||
* Limit depths of async I/O and sync writes so as to counter both
|
||||
* problems.
|
||||
*
|
||||
* Also if a bfq queue or its parent cgroup consume more tags than would be
|
||||
* appropriate for their weight, we trim the available tag depth to 1. This
|
||||
* avoids a situation where one cgroup can starve another cgroup from tags and
|
||||
* thus block service differentiation among cgroups. Note that because the
|
||||
* queue / cgroup already has many requests allocated and queued, this does not
|
||||
* significantly affect service guarantees coming from the BFQ scheduling
|
||||
* algorithm.
|
||||
*/
|
||||
static void bfq_limit_depth(unsigned int op, struct blk_mq_alloc_data *data)
|
||||
{
|
||||
struct bfq_data *bfqd = data->q->elevator->elevator_data;
|
||||
struct bfq_io_cq *bic = bfq_bic_lookup(data->q);
|
||||
struct bfq_queue *bfqq = bic ? bic_to_bfqq(bic, op_is_sync(op)) : NULL;
|
||||
int depth;
|
||||
unsigned limit = data->q->nr_requests;
|
||||
|
||||
if (op_is_sync(op) && !op_is_write(op))
|
||||
return;
|
||||
/* Sync reads have full depth available */
|
||||
if (op_is_sync(op) && !op_is_write(op)) {
|
||||
depth = 0;
|
||||
} else {
|
||||
depth = bfqd->word_depths[!!bfqd->wr_busy_queues][op_is_sync(op)];
|
||||
limit = (limit * depth) >> bfqd->full_depth_shift;
|
||||
}
|
||||
|
||||
data->shallow_depth =
|
||||
bfqd->word_depths[!!bfqd->wr_busy_queues][op_is_sync(op)];
|
||||
/*
|
||||
* Does queue (or any parent entity) exceed number of requests that
|
||||
* should be available to it? Heavily limit depth so that it cannot
|
||||
* consume more available requests and thus starve other entities.
|
||||
*/
|
||||
if (bfqq && bfqq_request_over_limit(bfqq, limit))
|
||||
depth = 1;
|
||||
|
||||
bfq_log(bfqd, "[%s] wr_busy %d sync %d depth %u",
|
||||
__func__, bfqd->wr_busy_queues, op_is_sync(op),
|
||||
data->shallow_depth);
|
||||
__func__, bfqd->wr_busy_queues, op_is_sync(op), depth);
|
||||
if (depth)
|
||||
data->shallow_depth = depth;
|
||||
}
|
||||
|
||||
static struct bfq_queue *
|
||||
@ -1113,7 +1216,8 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd,
|
||||
|
||||
static int bfqq_process_refs(struct bfq_queue *bfqq)
|
||||
{
|
||||
return bfqq->ref - bfqq->allocated - bfqq->entity.on_st_or_in_serv -
|
||||
return bfqq->ref - bfqq->entity.allocated -
|
||||
bfqq->entity.on_st_or_in_serv -
|
||||
(bfqq->weight_counter != NULL) - bfqq->stable_ref;
|
||||
}
|
||||
|
||||
@ -1982,20 +2086,19 @@ static void bfq_update_io_intensity(struct bfq_queue *bfqq, u64 now_ns)
|
||||
* aspect, see the comments on the choice of the queue for injection
|
||||
* in bfq_select_queue().
|
||||
*
|
||||
* Turning back to the detection of a waker queue, a queue Q is deemed
|
||||
* as a waker queue for bfqq if, for three consecutive times, bfqq
|
||||
* happens to become non empty right after a request of Q has been
|
||||
* completed. In this respect, even if bfqq is empty, we do not check
|
||||
* for a waker if it still has some in-flight I/O. In fact, in this
|
||||
* case bfqq is actually still being served by the drive, and may
|
||||
* receive new I/O on the completion of some of the in-flight
|
||||
* requests. In particular, on the first time, Q is tentatively set as
|
||||
* a candidate waker queue, while on the third consecutive time that Q
|
||||
* is detected, the field waker_bfqq is set to Q, to confirm that Q is
|
||||
* a waker queue for bfqq. These detection steps are performed only if
|
||||
* bfqq has a long think time, so as to make it more likely that
|
||||
* bfqq's I/O is actually being blocked by a synchronization. This
|
||||
* last filter, plus the above three-times requirement, make false
|
||||
* Turning back to the detection of a waker queue, a queue Q is deemed as a
|
||||
* waker queue for bfqq if, for three consecutive times, bfqq happens to become
|
||||
* non empty right after a request of Q has been completed within given
|
||||
* timeout. In this respect, even if bfqq is empty, we do not check for a waker
|
||||
* if it still has some in-flight I/O. In fact, in this case bfqq is actually
|
||||
* still being served by the drive, and may receive new I/O on the completion
|
||||
* of some of the in-flight requests. In particular, on the first time, Q is
|
||||
* tentatively set as a candidate waker queue, while on the third consecutive
|
||||
* time that Q is detected, the field waker_bfqq is set to Q, to confirm that Q
|
||||
* is a waker queue for bfqq. These detection steps are performed only if bfqq
|
||||
* has a long think time, so as to make it more likely that bfqq's I/O is
|
||||
* actually being blocked by a synchronization. This last filter, plus the
|
||||
* above three-times requirement and time limit for detection, make false
|
||||
* positives less likely.
|
||||
*
|
||||
* NOTE
|
||||
@ -2019,6 +2122,8 @@ static void bfq_update_io_intensity(struct bfq_queue *bfqq, u64 now_ns)
|
||||
static void bfq_check_waker(struct bfq_data *bfqd, struct bfq_queue *bfqq,
|
||||
u64 now_ns)
|
||||
{
|
||||
char waker_name[MAX_BFQQ_NAME_LENGTH];
|
||||
|
||||
if (!bfqd->last_completed_rq_bfqq ||
|
||||
bfqd->last_completed_rq_bfqq == bfqq ||
|
||||
bfq_bfqq_has_short_ttime(bfqq) ||
|
||||
@ -2027,8 +2132,16 @@ static void bfq_check_waker(struct bfq_data *bfqd, struct bfq_queue *bfqq,
|
||||
bfqd->last_completed_rq_bfqq == bfqq->waker_bfqq)
|
||||
return;
|
||||
|
||||
/*
|
||||
* We reset waker detection logic also if too much time has passed
|
||||
* since the first detection. If wakeups are rare, pointless idling
|
||||
* doesn't hurt throughput that much. The condition below makes sure
|
||||
* we do not uselessly idle blocking waker in more than 1/64 cases.
|
||||
*/
|
||||
if (bfqd->last_completed_rq_bfqq !=
|
||||
bfqq->tentative_waker_bfqq) {
|
||||
bfqq->tentative_waker_bfqq ||
|
||||
now_ns > bfqq->waker_detection_started +
|
||||
128 * (u64)bfqd->bfq_slice_idle) {
|
||||
/*
|
||||
* First synchronization detected with a
|
||||
* candidate waker queue, or with a different
|
||||
@ -2037,12 +2150,19 @@ static void bfq_check_waker(struct bfq_data *bfqd, struct bfq_queue *bfqq,
|
||||
bfqq->tentative_waker_bfqq =
|
||||
bfqd->last_completed_rq_bfqq;
|
||||
bfqq->num_waker_detections = 1;
|
||||
bfqq->waker_detection_started = now_ns;
|
||||
bfq_bfqq_name(bfqq->tentative_waker_bfqq, waker_name,
|
||||
MAX_BFQQ_NAME_LENGTH);
|
||||
bfq_log_bfqq(bfqd, bfqq, "set tenative waker %s", waker_name);
|
||||
} else /* Same tentative waker queue detected again */
|
||||
bfqq->num_waker_detections++;
|
||||
|
||||
if (bfqq->num_waker_detections == 3) {
|
||||
bfqq->waker_bfqq = bfqd->last_completed_rq_bfqq;
|
||||
bfqq->tentative_waker_bfqq = NULL;
|
||||
bfq_bfqq_name(bfqq->waker_bfqq, waker_name,
|
||||
MAX_BFQQ_NAME_LENGTH);
|
||||
bfq_log_bfqq(bfqd, bfqq, "set waker %s", waker_name);
|
||||
|
||||
/*
|
||||
* If the waker queue disappears, then
|
||||
@ -2332,7 +2452,7 @@ static bool bfq_bio_merge(struct request_queue *q, struct bio *bio,
|
||||
* returned by bfq_bic_lookup does not go away before
|
||||
* bfqd->lock is taken.
|
||||
*/
|
||||
struct bfq_io_cq *bic = bfq_bic_lookup(bfqd, current->io_context, q);
|
||||
struct bfq_io_cq *bic = bfq_bic_lookup(q);
|
||||
bool ret;
|
||||
|
||||
spin_lock_irq(&bfqd->lock);
|
||||
@ -5878,6 +5998,22 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
|
||||
}
|
||||
}
|
||||
|
||||
static void bfqq_request_allocated(struct bfq_queue *bfqq)
|
||||
{
|
||||
struct bfq_entity *entity = &bfqq->entity;
|
||||
|
||||
for_each_entity(entity)
|
||||
entity->allocated++;
|
||||
}
|
||||
|
||||
static void bfqq_request_freed(struct bfq_queue *bfqq)
|
||||
{
|
||||
struct bfq_entity *entity = &bfqq->entity;
|
||||
|
||||
for_each_entity(entity)
|
||||
entity->allocated--;
|
||||
}
|
||||
|
||||
/* returns true if it causes the idle timer to be disabled */
|
||||
static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq)
|
||||
{
|
||||
@ -5891,8 +6027,8 @@ static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq)
|
||||
* Release the request's reference to the old bfqq
|
||||
* and make sure one is taken to the shared queue.
|
||||
*/
|
||||
new_bfqq->allocated++;
|
||||
bfqq->allocated--;
|
||||
bfqq_request_allocated(new_bfqq);
|
||||
bfqq_request_freed(bfqq);
|
||||
new_bfqq->ref++;
|
||||
/*
|
||||
* If the bic associated with the process
|
||||
@ -5991,48 +6127,7 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
||||
|
||||
spin_lock_irq(&bfqd->lock);
|
||||
bfqq = bfq_init_rq(rq);
|
||||
|
||||
/*
|
||||
* Reqs with at_head or passthrough flags set are to be put
|
||||
* directly into dispatch list. Additional case for putting rq
|
||||
* directly into the dispatch queue: the only active
|
||||
* bfq_queues are bfqq and either its waker bfq_queue or one
|
||||
* of its woken bfq_queues. The rationale behind this
|
||||
* additional condition is as follows:
|
||||
* - consider a bfq_queue, say Q1, detected as a waker of
|
||||
* another bfq_queue, say Q2
|
||||
* - by definition of a waker, Q1 blocks the I/O of Q2, i.e.,
|
||||
* some I/O of Q1 needs to be completed for new I/O of Q2
|
||||
* to arrive. A notable example of waker is journald
|
||||
* - so, Q1 and Q2 are in any respect the queues of two
|
||||
* cooperating processes (or of two cooperating sets of
|
||||
* processes): the goal of Q1's I/O is doing what needs to
|
||||
* be done so that new Q2's I/O can finally be
|
||||
* issued. Therefore, if the service of Q1's I/O is delayed,
|
||||
* then Q2's I/O is delayed too. Conversely, if Q2's I/O is
|
||||
* delayed, the goal of Q1's I/O is hindered.
|
||||
* - as a consequence, if some I/O of Q1/Q2 arrives while
|
||||
* Q2/Q1 is the only queue in service, there is absolutely
|
||||
* no point in delaying the service of such an I/O. The
|
||||
* only possible result is a throughput loss
|
||||
* - so, when the above condition holds, the best option is to
|
||||
* have the new I/O dispatched as soon as possible
|
||||
* - the most effective and efficient way to attain the above
|
||||
* goal is to put the new I/O directly in the dispatch
|
||||
* list
|
||||
* - as an additional restriction, Q1 and Q2 must be the only
|
||||
* busy queues for this commit to put the I/O of Q2/Q1 in
|
||||
* the dispatch list. This is necessary, because, if also
|
||||
* other queues are waiting for service, then putting new
|
||||
* I/O directly in the dispatch list may evidently cause a
|
||||
* violation of service guarantees for the other queues
|
||||
*/
|
||||
if (!bfqq ||
|
||||
(bfqq != bfqd->in_service_queue &&
|
||||
bfqd->in_service_queue != NULL &&
|
||||
bfq_tot_busy_queues(bfqd) == 1 + bfq_bfqq_busy(bfqq) &&
|
||||
(bfqq->waker_bfqq == bfqd->in_service_queue ||
|
||||
bfqd->in_service_queue->waker_bfqq == bfqq)) || at_head) {
|
||||
if (!bfqq || at_head) {
|
||||
if (at_head)
|
||||
list_add(&rq->queuelist, &bfqd->dispatch);
|
||||
else
|
||||
@ -6059,7 +6154,6 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
||||
* merge).
|
||||
*/
|
||||
cmd_flags = rq->cmd_flags;
|
||||
|
||||
spin_unlock_irq(&bfqd->lock);
|
||||
|
||||
bfq_update_insert_stats(q, bfqq, idle_timer_disabled,
|
||||
@ -6251,8 +6345,7 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd)
|
||||
|
||||
static void bfq_finish_requeue_request_body(struct bfq_queue *bfqq)
|
||||
{
|
||||
bfqq->allocated--;
|
||||
|
||||
bfqq_request_freed(bfqq);
|
||||
bfq_put_queue(bfqq);
|
||||
}
|
||||
|
||||
@ -6476,6 +6569,16 @@ static void bfq_finish_requeue_request(struct request *rq)
|
||||
rq->elv.priv[1] = NULL;
|
||||
}
|
||||
|
||||
static void bfq_finish_request(struct request *rq)
|
||||
{
|
||||
bfq_finish_requeue_request(rq);
|
||||
|
||||
if (rq->elv.icq) {
|
||||
put_io_context(rq->elv.icq->ioc);
|
||||
rq->elv.icq = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Removes the association between the current task and bfqq, assuming
|
||||
* that bic points to the bfq iocontext of the task.
|
||||
@ -6573,6 +6676,8 @@ static struct bfq_queue *bfq_get_bfqq_handle_split(struct bfq_data *bfqd,
|
||||
*/
|
||||
static void bfq_prepare_request(struct request *rq)
|
||||
{
|
||||
rq->elv.icq = ioc_find_get_icq(rq->q);
|
||||
|
||||
/*
|
||||
* Regardless of whether we have an icq attached, we have to
|
||||
* clear the scheduler pointers, as they might point to
|
||||
@ -6672,7 +6777,7 @@ static struct bfq_queue *bfq_init_rq(struct request *rq)
|
||||
}
|
||||
}
|
||||
|
||||
bfqq->allocated++;
|
||||
bfqq_request_allocated(bfqq);
|
||||
bfqq->ref++;
|
||||
bfq_log_bfqq(bfqd, bfqq, "get_request %p: bfqq %p, %d",
|
||||
rq, bfqq, bfqq->ref);
|
||||
@ -6835,11 +6940,11 @@ void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg)
|
||||
* See the comments on bfq_limit_depth for the purpose of
|
||||
* the depths set in the function. Return minimum shallow depth we'll use.
|
||||
*/
|
||||
static unsigned int bfq_update_depths(struct bfq_data *bfqd,
|
||||
struct sbitmap_queue *bt)
|
||||
static void bfq_update_depths(struct bfq_data *bfqd, struct sbitmap_queue *bt)
|
||||
{
|
||||
unsigned int i, j, min_shallow = UINT_MAX;
|
||||
unsigned int depth = 1U << bt->sb.shift;
|
||||
|
||||
bfqd->full_depth_shift = bt->sb.shift;
|
||||
/*
|
||||
* In-word depths if no bfq_queue is being weight-raised:
|
||||
* leaving 25% of tags only for sync reads.
|
||||
@ -6851,13 +6956,13 @@ static unsigned int bfq_update_depths(struct bfq_data *bfqd,
|
||||
* limit 'something'.
|
||||
*/
|
||||
/* no more than 50% of tags for async I/O */
|
||||
bfqd->word_depths[0][0] = max((1U << bt->sb.shift) >> 1, 1U);
|
||||
bfqd->word_depths[0][0] = max(depth >> 1, 1U);
|
||||
/*
|
||||
* no more than 75% of tags for sync writes (25% extra tags
|
||||
* w.r.t. async I/O, to prevent async I/O from starving sync
|
||||
* writes)
|
||||
*/
|
||||
bfqd->word_depths[0][1] = max(((1U << bt->sb.shift) * 3) >> 2, 1U);
|
||||
bfqd->word_depths[0][1] = max((depth * 3) >> 2, 1U);
|
||||
|
||||
/*
|
||||
* In-word depths in case some bfq_queue is being weight-
|
||||
@ -6867,25 +6972,18 @@ static unsigned int bfq_update_depths(struct bfq_data *bfqd,
|
||||
* shortage.
|
||||
*/
|
||||
/* no more than ~18% of tags for async I/O */
|
||||
bfqd->word_depths[1][0] = max(((1U << bt->sb.shift) * 3) >> 4, 1U);
|
||||
bfqd->word_depths[1][0] = max((depth * 3) >> 4, 1U);
|
||||
/* no more than ~37% of tags for sync writes (~20% extra tags) */
|
||||
bfqd->word_depths[1][1] = max(((1U << bt->sb.shift) * 6) >> 4, 1U);
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
for (j = 0; j < 2; j++)
|
||||
min_shallow = min(min_shallow, bfqd->word_depths[i][j]);
|
||||
|
||||
return min_shallow;
|
||||
bfqd->word_depths[1][1] = max((depth * 6) >> 4, 1U);
|
||||
}
|
||||
|
||||
static void bfq_depth_updated(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;
|
||||
struct blk_mq_tags *tags = hctx->sched_tags;
|
||||
unsigned int min_shallow;
|
||||
|
||||
min_shallow = bfq_update_depths(bfqd, &tags->bitmap_tags);
|
||||
sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, min_shallow);
|
||||
bfq_update_depths(bfqd, &tags->bitmap_tags);
|
||||
sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, 1);
|
||||
}
|
||||
|
||||
static int bfq_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int index)
|
||||
@ -7300,7 +7398,7 @@ static struct elevator_type iosched_bfq_mq = {
|
||||
.limit_depth = bfq_limit_depth,
|
||||
.prepare_request = bfq_prepare_request,
|
||||
.requeue_request = bfq_finish_requeue_request,
|
||||
.finish_request = bfq_finish_requeue_request,
|
||||
.finish_request = bfq_finish_request,
|
||||
.exit_icq = bfq_exit_icq,
|
||||
.insert_requests = bfq_insert_requests,
|
||||
.dispatch_request = bfq_dispatch_request,
|
||||
|
@ -25,7 +25,7 @@
|
||||
#define BFQ_DEFAULT_GRP_IOPRIO 0
|
||||
#define BFQ_DEFAULT_GRP_CLASS IOPRIO_CLASS_BE
|
||||
|
||||
#define MAX_PID_STR_LENGTH 12
|
||||
#define MAX_BFQQ_NAME_LENGTH 16
|
||||
|
||||
/*
|
||||
* Soft real-time applications are extremely more latency sensitive
|
||||
@ -170,6 +170,9 @@ struct bfq_entity {
|
||||
/* budget, used also to calculate F_i: F_i = S_i + @budget / @weight */
|
||||
int budget;
|
||||
|
||||
/* Number of requests allocated in the subtree of this entity */
|
||||
int allocated;
|
||||
|
||||
/* device weight, if non-zero, it overrides the default weight of
|
||||
* bfq_group_data */
|
||||
int dev_weight;
|
||||
@ -266,8 +269,6 @@ struct bfq_queue {
|
||||
struct request *next_rq;
|
||||
/* number of sync and async requests queued */
|
||||
int queued[2];
|
||||
/* number of requests currently allocated */
|
||||
int allocated;
|
||||
/* number of pending metadata requests */
|
||||
int meta_pending;
|
||||
/* fifo list of requests in sort_list */
|
||||
@ -387,6 +388,8 @@ struct bfq_queue {
|
||||
struct bfq_queue *tentative_waker_bfqq;
|
||||
/* number of times the same tentative waker has been detected */
|
||||
unsigned int num_waker_detections;
|
||||
/* time when we started considering this waker */
|
||||
u64 waker_detection_started;
|
||||
|
||||
/* node for woken_list, see below */
|
||||
struct hlist_node woken_list_node;
|
||||
@ -768,6 +771,7 @@ struct bfq_data {
|
||||
* function)
|
||||
*/
|
||||
unsigned int word_depths[2][2];
|
||||
unsigned int full_depth_shift;
|
||||
};
|
||||
|
||||
enum bfqq_state_flags {
|
||||
@ -1079,26 +1083,27 @@ void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq);
|
||||
/* --------------- end of interface of B-WF2Q+ ---------------- */
|
||||
|
||||
/* Logging facilities. */
|
||||
static inline void bfq_pid_to_str(int pid, char *str, int len)
|
||||
static inline void bfq_bfqq_name(struct bfq_queue *bfqq, char *str, int len)
|
||||
{
|
||||
if (pid != -1)
|
||||
snprintf(str, len, "%d", pid);
|
||||
char type = bfq_bfqq_sync(bfqq) ? 'S' : 'A';
|
||||
|
||||
if (bfqq->pid != -1)
|
||||
snprintf(str, len, "bfq%d%c", bfqq->pid, type);
|
||||
else
|
||||
snprintf(str, len, "SHARED-");
|
||||
snprintf(str, len, "bfqSHARED-%c", type);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BFQ_GROUP_IOSCHED
|
||||
struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
|
||||
|
||||
#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) do { \
|
||||
char pid_str[MAX_PID_STR_LENGTH]; \
|
||||
char pid_str[MAX_BFQQ_NAME_LENGTH]; \
|
||||
if (likely(!blk_trace_note_message_enabled((bfqd)->queue))) \
|
||||
break; \
|
||||
bfq_pid_to_str((bfqq)->pid, pid_str, MAX_PID_STR_LENGTH); \
|
||||
bfq_bfqq_name((bfqq), pid_str, MAX_BFQQ_NAME_LENGTH); \
|
||||
blk_add_cgroup_trace_msg((bfqd)->queue, \
|
||||
bfqg_to_blkg(bfqq_group(bfqq))->blkcg, \
|
||||
"bfq%s%c " fmt, pid_str, \
|
||||
bfq_bfqq_sync((bfqq)) ? 'S' : 'A', ##args); \
|
||||
"%s " fmt, pid_str, ##args); \
|
||||
} while (0)
|
||||
|
||||
#define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do { \
|
||||
@ -1109,13 +1114,11 @@ struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
|
||||
#else /* CONFIG_BFQ_GROUP_IOSCHED */
|
||||
|
||||
#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) do { \
|
||||
char pid_str[MAX_PID_STR_LENGTH]; \
|
||||
char pid_str[MAX_BFQQ_NAME_LENGTH]; \
|
||||
if (likely(!blk_trace_note_message_enabled((bfqd)->queue))) \
|
||||
break; \
|
||||
bfq_pid_to_str((bfqq)->pid, pid_str, MAX_PID_STR_LENGTH); \
|
||||
blk_add_trace_msg((bfqd)->queue, "bfq%s%c " fmt, pid_str, \
|
||||
bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \
|
||||
##args); \
|
||||
bfq_bfqq_name((bfqq), pid_str, MAX_BFQQ_NAME_LENGTH); \
|
||||
blk_add_trace_msg((bfqd)->queue, "%s " fmt, pid_str, ##args); \
|
||||
} while (0)
|
||||
#define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do {} while (0)
|
||||
|
||||
|
13
block/bio.c
13
block/bio.c
@ -26,7 +26,7 @@
|
||||
#include "blk-rq-qos.h"
|
||||
|
||||
struct bio_alloc_cache {
|
||||
struct bio_list free_list;
|
||||
struct bio *free_list;
|
||||
unsigned int nr;
|
||||
};
|
||||
|
||||
@ -630,7 +630,8 @@ static void bio_alloc_cache_prune(struct bio_alloc_cache *cache,
|
||||
unsigned int i = 0;
|
||||
struct bio *bio;
|
||||
|
||||
while ((bio = bio_list_pop(&cache->free_list)) != NULL) {
|
||||
while ((bio = cache->free_list) != NULL) {
|
||||
cache->free_list = bio->bi_next;
|
||||
cache->nr--;
|
||||
bio_free(bio);
|
||||
if (++i == nr)
|
||||
@ -689,7 +690,8 @@ void bio_put(struct bio *bio)
|
||||
|
||||
bio_uninit(bio);
|
||||
cache = per_cpu_ptr(bio->bi_pool->cache, get_cpu());
|
||||
bio_list_add_head(&cache->free_list, bio);
|
||||
bio->bi_next = cache->free_list;
|
||||
cache->free_list = bio;
|
||||
if (++cache->nr > ALLOC_CACHE_MAX + ALLOC_CACHE_SLACK)
|
||||
bio_alloc_cache_prune(cache, ALLOC_CACHE_SLACK);
|
||||
put_cpu();
|
||||
@ -1704,8 +1706,9 @@ struct bio *bio_alloc_kiocb(struct kiocb *kiocb, unsigned short nr_vecs,
|
||||
return bio_alloc_bioset(GFP_KERNEL, nr_vecs, bs);
|
||||
|
||||
cache = per_cpu_ptr(bs->cache, get_cpu());
|
||||
bio = bio_list_pop(&cache->free_list);
|
||||
if (bio) {
|
||||
if (cache->free_list) {
|
||||
bio = cache->free_list;
|
||||
cache->free_list = bio->bi_next;
|
||||
cache->nr--;
|
||||
put_cpu();
|
||||
bio_init(bio, nr_vecs ? bio->bi_inline_vecs : NULL, nr_vecs);
|
||||
|
@ -30,6 +30,7 @@
|
||||
#include <linux/blk-cgroup.h>
|
||||
#include <linux/tracehook.h>
|
||||
#include <linux/psi.h>
|
||||
#include <linux/part_stat.h>
|
||||
#include "blk.h"
|
||||
#include "blk-ioprio.h"
|
||||
#include "blk-throttle.h"
|
||||
|
391
block/blk-core.c
391
block/blk-core.c
@ -16,7 +16,6 @@
|
||||
#include <linux/module.h>
|
||||
#include <linux/bio.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/blk-pm.h>
|
||||
#include <linux/blk-integrity.h>
|
||||
#include <linux/highmem.h>
|
||||
@ -40,6 +39,7 @@
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/psi.h>
|
||||
#include <linux/part_stat.h>
|
||||
#include <linux/sched/sysctl.h>
|
||||
#include <linux/blk-crypto.h>
|
||||
|
||||
@ -47,7 +47,6 @@
|
||||
#include <trace/events/block.h>
|
||||
|
||||
#include "blk.h"
|
||||
#include "blk-mq.h"
|
||||
#include "blk-mq-sched.h"
|
||||
#include "blk-pm.h"
|
||||
#include "blk-throttle.h"
|
||||
@ -67,6 +66,7 @@ DEFINE_IDA(blk_queue_ida);
|
||||
* For queue allocation
|
||||
*/
|
||||
struct kmem_cache *blk_requestq_cachep;
|
||||
struct kmem_cache *blk_requestq_srcu_cachep;
|
||||
|
||||
/*
|
||||
* Controlling structure to kblockd
|
||||
@ -109,23 +109,6 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_queue_flag_test_and_set);
|
||||
|
||||
void blk_rq_init(struct request_queue *q, struct request *rq)
|
||||
{
|
||||
memset(rq, 0, sizeof(*rq));
|
||||
|
||||
INIT_LIST_HEAD(&rq->queuelist);
|
||||
rq->q = q;
|
||||
rq->__sector = (sector_t) -1;
|
||||
INIT_HLIST_NODE(&rq->hash);
|
||||
RB_CLEAR_NODE(&rq->rb_node);
|
||||
rq->tag = BLK_MQ_NO_TAG;
|
||||
rq->internal_tag = BLK_MQ_NO_TAG;
|
||||
rq->start_time_ns = ktime_get_ns();
|
||||
rq->part = NULL;
|
||||
blk_crypto_rq_set_defaults(rq);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_rq_init);
|
||||
|
||||
#define REQ_OP_NAME(name) [REQ_OP_##name] = #name
|
||||
static const char *const blk_op_name[] = {
|
||||
REQ_OP_NAME(READ),
|
||||
@ -216,38 +199,15 @@ int blk_status_to_errno(blk_status_t status)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_status_to_errno);
|
||||
|
||||
void blk_print_req_error(struct request *req, blk_status_t status)
|
||||
const char *blk_status_to_str(blk_status_t status)
|
||||
{
|
||||
int idx = (__force int)status;
|
||||
|
||||
if (WARN_ON_ONCE(idx >= ARRAY_SIZE(blk_errors)))
|
||||
return;
|
||||
|
||||
printk_ratelimited(KERN_ERR
|
||||
"%s error, dev %s, sector %llu op 0x%x:(%s) flags 0x%x "
|
||||
"phys_seg %u prio class %u\n",
|
||||
blk_errors[idx].name,
|
||||
req->rq_disk ? req->rq_disk->disk_name : "?",
|
||||
blk_rq_pos(req), req_op(req), blk_op_str(req_op(req)),
|
||||
req->cmd_flags & ~REQ_OP_MASK,
|
||||
req->nr_phys_segments,
|
||||
IOPRIO_PRIO_CLASS(req->ioprio));
|
||||
return "<null>";
|
||||
return blk_errors[idx].name;
|
||||
}
|
||||
|
||||
void blk_dump_rq_flags(struct request *rq, char *msg)
|
||||
{
|
||||
printk(KERN_INFO "%s: dev %s: flags=%llx\n", msg,
|
||||
rq->rq_disk ? rq->rq_disk->disk_name : "?",
|
||||
(unsigned long long) rq->cmd_flags);
|
||||
|
||||
printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n",
|
||||
(unsigned long long)blk_rq_pos(rq),
|
||||
blk_rq_sectors(rq), blk_rq_cur_sectors(rq));
|
||||
printk(KERN_INFO " bio %p, biotail %p, len %u\n",
|
||||
rq->bio, rq->biotail, blk_rq_bytes(rq));
|
||||
}
|
||||
EXPORT_SYMBOL(blk_dump_rq_flags);
|
||||
|
||||
/**
|
||||
* blk_sync_queue - cancel any pending callbacks on a queue
|
||||
* @q: the queue
|
||||
@ -478,21 +438,27 @@ static void blk_timeout_work(struct work_struct *work)
|
||||
{
|
||||
}
|
||||
|
||||
struct request_queue *blk_alloc_queue(int node_id)
|
||||
struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu)
|
||||
{
|
||||
struct request_queue *q;
|
||||
int ret;
|
||||
|
||||
q = kmem_cache_alloc_node(blk_requestq_cachep,
|
||||
GFP_KERNEL | __GFP_ZERO, node_id);
|
||||
q = kmem_cache_alloc_node(blk_get_queue_kmem_cache(alloc_srcu),
|
||||
GFP_KERNEL | __GFP_ZERO, node_id);
|
||||
if (!q)
|
||||
return NULL;
|
||||
|
||||
if (alloc_srcu) {
|
||||
blk_queue_flag_set(QUEUE_FLAG_HAS_SRCU, q);
|
||||
if (init_srcu_struct(q->srcu) != 0)
|
||||
goto fail_q;
|
||||
}
|
||||
|
||||
q->last_merge = NULL;
|
||||
|
||||
q->id = ida_simple_get(&blk_queue_ida, 0, 0, GFP_KERNEL);
|
||||
if (q->id < 0)
|
||||
goto fail_q;
|
||||
goto fail_srcu;
|
||||
|
||||
ret = bioset_init(&q->bio_split, BIO_POOL_SIZE, 0, 0);
|
||||
if (ret)
|
||||
@ -549,8 +515,11 @@ fail_split:
|
||||
bioset_exit(&q->bio_split);
|
||||
fail_id:
|
||||
ida_simple_remove(&blk_queue_ida, q->id);
|
||||
fail_srcu:
|
||||
if (alloc_srcu)
|
||||
cleanup_srcu_struct(q->srcu);
|
||||
fail_q:
|
||||
kmem_cache_free(blk_requestq_cachep, q);
|
||||
kmem_cache_free(blk_get_queue_kmem_cache(alloc_srcu), q);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -594,7 +563,7 @@ static int __init setup_fail_make_request(char *str)
|
||||
}
|
||||
__setup("fail_make_request=", setup_fail_make_request);
|
||||
|
||||
static bool should_fail_request(struct block_device *part, unsigned int bytes)
|
||||
bool should_fail_request(struct block_device *part, unsigned int bytes)
|
||||
{
|
||||
return part->bd_make_it_fail && should_fail(&fail_make_request, bytes);
|
||||
}
|
||||
@ -608,15 +577,6 @@ static int __init fail_make_request_debugfs(void)
|
||||
}
|
||||
|
||||
late_initcall(fail_make_request_debugfs);
|
||||
|
||||
#else /* CONFIG_FAIL_MAKE_REQUEST */
|
||||
|
||||
static inline bool should_fail_request(struct block_device *part,
|
||||
unsigned int bytes)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_FAIL_MAKE_REQUEST */
|
||||
|
||||
static inline bool bio_check_ro(struct bio *bio)
|
||||
@ -802,15 +762,6 @@ noinline_for_stack bool submit_bio_checks(struct bio *bio)
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Various block parts want %current->io_context, so allocate it up
|
||||
* front rather than dealing with lots of pain to allocate it only
|
||||
* where needed. This may fail and the block layer knows how to live
|
||||
* with it.
|
||||
*/
|
||||
if (unlikely(!current->io_context))
|
||||
create_task_io_context(current, GFP_ATOMIC, q->node);
|
||||
|
||||
if (blk_throtl_bio(bio))
|
||||
return false;
|
||||
|
||||
@ -836,17 +787,21 @@ end_io:
|
||||
|
||||
static void __submit_bio_fops(struct gendisk *disk, struct bio *bio)
|
||||
{
|
||||
if (unlikely(bio_queue_enter(bio) != 0))
|
||||
return;
|
||||
if (submit_bio_checks(bio) && blk_crypto_bio_prep(&bio))
|
||||
disk->fops->submit_bio(bio);
|
||||
blk_queue_exit(disk->queue);
|
||||
if (blk_crypto_bio_prep(&bio)) {
|
||||
if (likely(bio_queue_enter(bio) == 0)) {
|
||||
disk->fops->submit_bio(bio);
|
||||
blk_queue_exit(disk->queue);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void __submit_bio(struct bio *bio)
|
||||
{
|
||||
struct gendisk *disk = bio->bi_bdev->bd_disk;
|
||||
|
||||
if (unlikely(!submit_bio_checks(bio)))
|
||||
return;
|
||||
|
||||
if (!disk->fops->submit_bio)
|
||||
blk_mq_submit_bio(bio);
|
||||
else
|
||||
@ -1090,135 +1045,7 @@ int iocb_bio_iopoll(struct kiocb *kiocb, struct io_comp_batch *iob,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iocb_bio_iopoll);
|
||||
|
||||
/**
|
||||
* blk_cloned_rq_check_limits - Helper function to check a cloned request
|
||||
* for the new queue limits
|
||||
* @q: the queue
|
||||
* @rq: the request being checked
|
||||
*
|
||||
* Description:
|
||||
* @rq may have been made based on weaker limitations of upper-level queues
|
||||
* in request stacking drivers, and it may violate the limitation of @q.
|
||||
* Since the block layer and the underlying device driver trust @rq
|
||||
* after it is inserted to @q, it should be checked against @q before
|
||||
* the insertion using this generic function.
|
||||
*
|
||||
* Request stacking drivers like request-based dm may change the queue
|
||||
* limits when retrying requests on other queues. Those requests need
|
||||
* to be checked against the new queue limits again during dispatch.
|
||||
*/
|
||||
static blk_status_t blk_cloned_rq_check_limits(struct request_queue *q,
|
||||
struct request *rq)
|
||||
{
|
||||
unsigned int max_sectors = blk_queue_get_max_sectors(q, req_op(rq));
|
||||
|
||||
if (blk_rq_sectors(rq) > max_sectors) {
|
||||
/*
|
||||
* SCSI device does not have a good way to return if
|
||||
* Write Same/Zero is actually supported. If a device rejects
|
||||
* a non-read/write command (discard, write same,etc.) the
|
||||
* low-level device driver will set the relevant queue limit to
|
||||
* 0 to prevent blk-lib from issuing more of the offending
|
||||
* operations. Commands queued prior to the queue limit being
|
||||
* reset need to be completed with BLK_STS_NOTSUPP to avoid I/O
|
||||
* errors being propagated to upper layers.
|
||||
*/
|
||||
if (max_sectors == 0)
|
||||
return BLK_STS_NOTSUPP;
|
||||
|
||||
printk(KERN_ERR "%s: over max size limit. (%u > %u)\n",
|
||||
__func__, blk_rq_sectors(rq), max_sectors);
|
||||
return BLK_STS_IOERR;
|
||||
}
|
||||
|
||||
/*
|
||||
* The queue settings related to segment counting may differ from the
|
||||
* original queue.
|
||||
*/
|
||||
rq->nr_phys_segments = blk_recalc_rq_segments(rq);
|
||||
if (rq->nr_phys_segments > queue_max_segments(q)) {
|
||||
printk(KERN_ERR "%s: over max segments limit. (%hu > %hu)\n",
|
||||
__func__, rq->nr_phys_segments, queue_max_segments(q));
|
||||
return BLK_STS_IOERR;
|
||||
}
|
||||
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_insert_cloned_request - Helper for stacking drivers to submit a request
|
||||
* @q: the queue to submit the request
|
||||
* @rq: the request being queued
|
||||
*/
|
||||
blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq)
|
||||
{
|
||||
blk_status_t ret;
|
||||
|
||||
ret = blk_cloned_rq_check_limits(q, rq);
|
||||
if (ret != BLK_STS_OK)
|
||||
return ret;
|
||||
|
||||
if (rq->rq_disk &&
|
||||
should_fail_request(rq->rq_disk->part0, blk_rq_bytes(rq)))
|
||||
return BLK_STS_IOERR;
|
||||
|
||||
if (blk_crypto_insert_cloned_request(rq))
|
||||
return BLK_STS_IOERR;
|
||||
|
||||
blk_account_io_start(rq);
|
||||
|
||||
/*
|
||||
* Since we have a scheduler attached on the top device,
|
||||
* bypass a potential scheduler on the bottom device for
|
||||
* insert.
|
||||
*/
|
||||
return blk_mq_request_issue_directly(rq, true);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
|
||||
|
||||
/**
|
||||
* blk_rq_err_bytes - determine number of bytes till the next failure boundary
|
||||
* @rq: request to examine
|
||||
*
|
||||
* Description:
|
||||
* A request could be merge of IOs which require different failure
|
||||
* handling. This function determines the number of bytes which
|
||||
* can be failed from the beginning of the request without
|
||||
* crossing into area which need to be retried further.
|
||||
*
|
||||
* Return:
|
||||
* The number of bytes to fail.
|
||||
*/
|
||||
unsigned int blk_rq_err_bytes(const struct request *rq)
|
||||
{
|
||||
unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;
|
||||
unsigned int bytes = 0;
|
||||
struct bio *bio;
|
||||
|
||||
if (!(rq->rq_flags & RQF_MIXED_MERGE))
|
||||
return blk_rq_bytes(rq);
|
||||
|
||||
/*
|
||||
* Currently the only 'mixing' which can happen is between
|
||||
* different fastfail types. We can safely fail portions
|
||||
* which have all the failfast bits that the first one has -
|
||||
* the ones which are at least as eager to fail as the first
|
||||
* one.
|
||||
*/
|
||||
for (bio = rq->bio; bio; bio = bio->bi_next) {
|
||||
if ((bio->bi_opf & ff) != ff)
|
||||
break;
|
||||
bytes += bio->bi_iter.bi_size;
|
||||
}
|
||||
|
||||
/* this could lead to infinite loop */
|
||||
BUG_ON(blk_rq_bytes(rq) && !bytes);
|
||||
return bytes;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_rq_err_bytes);
|
||||
|
||||
static void update_io_ticks(struct block_device *part, unsigned long now,
|
||||
bool end)
|
||||
void update_io_ticks(struct block_device *part, unsigned long now, bool end)
|
||||
{
|
||||
unsigned long stamp;
|
||||
again:
|
||||
@ -1233,30 +1060,6 @@ again:
|
||||
}
|
||||
}
|
||||
|
||||
void __blk_account_io_done(struct request *req, u64 now)
|
||||
{
|
||||
const int sgrp = op_stat_group(req_op(req));
|
||||
|
||||
part_stat_lock();
|
||||
update_io_ticks(req->part, jiffies, true);
|
||||
part_stat_inc(req->part, ios[sgrp]);
|
||||
part_stat_add(req->part, nsecs[sgrp], now - req->start_time_ns);
|
||||
part_stat_unlock();
|
||||
}
|
||||
|
||||
void __blk_account_io_start(struct request *rq)
|
||||
{
|
||||
/* passthrough requests can hold bios that do not have ->bi_bdev set */
|
||||
if (rq->bio && rq->bio->bi_bdev)
|
||||
rq->part = rq->bio->bi_bdev;
|
||||
else
|
||||
rq->part = rq->rq_disk->part0;
|
||||
|
||||
part_stat_lock();
|
||||
update_io_ticks(rq->part, jiffies, false);
|
||||
part_stat_unlock();
|
||||
}
|
||||
|
||||
static unsigned long __part_start_io_acct(struct block_device *part,
|
||||
unsigned int sectors, unsigned int op)
|
||||
{
|
||||
@ -1320,46 +1123,6 @@ void disk_end_io_acct(struct gendisk *disk, unsigned int op,
|
||||
}
|
||||
EXPORT_SYMBOL(disk_end_io_acct);
|
||||
|
||||
/*
|
||||
* Steal bios from a request and add them to a bio list.
|
||||
* The request must not have been partially completed before.
|
||||
*/
|
||||
void blk_steal_bios(struct bio_list *list, struct request *rq)
|
||||
{
|
||||
if (rq->bio) {
|
||||
if (list->tail)
|
||||
list->tail->bi_next = rq->bio;
|
||||
else
|
||||
list->head = rq->bio;
|
||||
list->tail = rq->biotail;
|
||||
|
||||
rq->bio = NULL;
|
||||
rq->biotail = NULL;
|
||||
}
|
||||
|
||||
rq->__data_len = 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_steal_bios);
|
||||
|
||||
#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
|
||||
/**
|
||||
* rq_flush_dcache_pages - Helper function to flush all pages in a request
|
||||
* @rq: the request to be flushed
|
||||
*
|
||||
* Description:
|
||||
* Flush all pages in @rq.
|
||||
*/
|
||||
void rq_flush_dcache_pages(struct request *rq)
|
||||
{
|
||||
struct req_iterator iter;
|
||||
struct bio_vec bvec;
|
||||
|
||||
rq_for_each_segment(bvec, rq, iter)
|
||||
flush_dcache_page(bvec.bv_page);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rq_flush_dcache_pages);
|
||||
#endif
|
||||
|
||||
/**
|
||||
* blk_lld_busy - Check if underlying low-level drivers of a device are busy
|
||||
* @q : the queue of the device being checked
|
||||
@ -1388,93 +1151,6 @@ int blk_lld_busy(struct request_queue *q)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_lld_busy);
|
||||
|
||||
/**
|
||||
* blk_rq_unprep_clone - Helper function to free all bios in a cloned request
|
||||
* @rq: the clone request to be cleaned up
|
||||
*
|
||||
* Description:
|
||||
* Free all bios in @rq for a cloned request.
|
||||
*/
|
||||
void blk_rq_unprep_clone(struct request *rq)
|
||||
{
|
||||
struct bio *bio;
|
||||
|
||||
while ((bio = rq->bio) != NULL) {
|
||||
rq->bio = bio->bi_next;
|
||||
|
||||
bio_put(bio);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
|
||||
|
||||
/**
|
||||
* blk_rq_prep_clone - Helper function to setup clone request
|
||||
* @rq: the request to be setup
|
||||
* @rq_src: original request to be cloned
|
||||
* @bs: bio_set that bios for clone are allocated from
|
||||
* @gfp_mask: memory allocation mask for bio
|
||||
* @bio_ctr: setup function to be called for each clone bio.
|
||||
* Returns %0 for success, non %0 for failure.
|
||||
* @data: private data to be passed to @bio_ctr
|
||||
*
|
||||
* Description:
|
||||
* Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq.
|
||||
* Also, pages which the original bios are pointing to are not copied
|
||||
* and the cloned bios just point same pages.
|
||||
* So cloned bios must be completed before original bios, which means
|
||||
* the caller must complete @rq before @rq_src.
|
||||
*/
|
||||
int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
|
||||
struct bio_set *bs, gfp_t gfp_mask,
|
||||
int (*bio_ctr)(struct bio *, struct bio *, void *),
|
||||
void *data)
|
||||
{
|
||||
struct bio *bio, *bio_src;
|
||||
|
||||
if (!bs)
|
||||
bs = &fs_bio_set;
|
||||
|
||||
__rq_for_each_bio(bio_src, rq_src) {
|
||||
bio = bio_clone_fast(bio_src, gfp_mask, bs);
|
||||
if (!bio)
|
||||
goto free_and_out;
|
||||
|
||||
if (bio_ctr && bio_ctr(bio, bio_src, data))
|
||||
goto free_and_out;
|
||||
|
||||
if (rq->bio) {
|
||||
rq->biotail->bi_next = bio;
|
||||
rq->biotail = bio;
|
||||
} else {
|
||||
rq->bio = rq->biotail = bio;
|
||||
}
|
||||
bio = NULL;
|
||||
}
|
||||
|
||||
/* Copy attributes of the original request to the clone request. */
|
||||
rq->__sector = blk_rq_pos(rq_src);
|
||||
rq->__data_len = blk_rq_bytes(rq_src);
|
||||
if (rq_src->rq_flags & RQF_SPECIAL_PAYLOAD) {
|
||||
rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
|
||||
rq->special_vec = rq_src->special_vec;
|
||||
}
|
||||
rq->nr_phys_segments = rq_src->nr_phys_segments;
|
||||
rq->ioprio = rq_src->ioprio;
|
||||
|
||||
if (rq->bio && blk_crypto_rq_bio_prep(rq, rq->bio, gfp_mask) < 0)
|
||||
goto free_and_out;
|
||||
|
||||
return 0;
|
||||
|
||||
free_and_out:
|
||||
if (bio)
|
||||
bio_put(bio);
|
||||
blk_rq_unprep_clone(rq);
|
||||
|
||||
return -ENOMEM;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_rq_prep_clone);
|
||||
|
||||
int kblockd_schedule_work(struct work_struct *work)
|
||||
{
|
||||
return queue_work(kblockd_workqueue, work);
|
||||
@ -1639,6 +1315,9 @@ int __init blk_dev_init(void)
|
||||
sizeof_field(struct request, cmd_flags));
|
||||
BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 *
|
||||
sizeof_field(struct bio, bi_opf));
|
||||
BUILD_BUG_ON(ALIGN(offsetof(struct request_queue, srcu),
|
||||
__alignof__(struct request_queue)) !=
|
||||
sizeof(struct request_queue));
|
||||
|
||||
/* used for unplugging and affects IO latency/throughput - HIGHPRI */
|
||||
kblockd_workqueue = alloc_workqueue("kblockd",
|
||||
@ -1649,6 +1328,10 @@ int __init blk_dev_init(void)
|
||||
blk_requestq_cachep = kmem_cache_create("request_queue",
|
||||
sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
|
||||
|
||||
blk_requestq_srcu_cachep = kmem_cache_create("request_queue_srcu",
|
||||
sizeof(struct request_queue) +
|
||||
sizeof(struct srcu_struct), 0, SLAB_PANIC, NULL);
|
||||
|
||||
blk_debugfs_root = debugfs_create_dir("block", NULL);
|
||||
|
||||
return 0;
|
||||
|
@ -463,11 +463,6 @@ bool blk_crypto_register(struct blk_crypto_profile *profile,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_crypto_register);
|
||||
|
||||
void blk_crypto_unregister(struct request_queue *q)
|
||||
{
|
||||
q->crypto_profile = NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_crypto_intersect_capabilities() - restrict supported crypto capabilities
|
||||
* by child device
|
||||
|
116
block/blk-exec.c
116
block/blk-exec.c
@ -1,116 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Functions related to setting various queue properties from drivers
|
||||
*/
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/bio.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/sched/sysctl.h>
|
||||
|
||||
#include "blk.h"
|
||||
#include "blk-mq-sched.h"
|
||||
|
||||
/**
|
||||
* blk_end_sync_rq - executes a completion event on a request
|
||||
* @rq: request to complete
|
||||
* @error: end I/O status of the request
|
||||
*/
|
||||
static void blk_end_sync_rq(struct request *rq, blk_status_t error)
|
||||
{
|
||||
struct completion *waiting = rq->end_io_data;
|
||||
|
||||
rq->end_io_data = (void *)(uintptr_t)error;
|
||||
|
||||
/*
|
||||
* complete last, if this is a stack request the process (and thus
|
||||
* the rq pointer) could be invalid right after this complete()
|
||||
*/
|
||||
complete(waiting);
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_execute_rq_nowait - insert a request to I/O scheduler for execution
|
||||
* @bd_disk: matching gendisk
|
||||
* @rq: request to insert
|
||||
* @at_head: insert request at head or tail of queue
|
||||
* @done: I/O completion handler
|
||||
*
|
||||
* Description:
|
||||
* Insert a fully prepared request at the back of the I/O scheduler queue
|
||||
* for execution. Don't wait for completion.
|
||||
*
|
||||
* Note:
|
||||
* This function will invoke @done directly if the queue is dead.
|
||||
*/
|
||||
void blk_execute_rq_nowait(struct gendisk *bd_disk, struct request *rq,
|
||||
int at_head, rq_end_io_fn *done)
|
||||
{
|
||||
WARN_ON(irqs_disabled());
|
||||
WARN_ON(!blk_rq_is_passthrough(rq));
|
||||
|
||||
rq->rq_disk = bd_disk;
|
||||
rq->end_io = done;
|
||||
|
||||
blk_account_io_start(rq);
|
||||
|
||||
/*
|
||||
* don't check dying flag for MQ because the request won't
|
||||
* be reused after dying flag is set
|
||||
*/
|
||||
blk_mq_sched_insert_request(rq, at_head, true, false);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
|
||||
|
||||
static bool blk_rq_is_poll(struct request *rq)
|
||||
{
|
||||
if (!rq->mq_hctx)
|
||||
return false;
|
||||
if (rq->mq_hctx->type != HCTX_TYPE_POLL)
|
||||
return false;
|
||||
if (WARN_ON_ONCE(!rq->bio))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
static void blk_rq_poll_completion(struct request *rq, struct completion *wait)
|
||||
{
|
||||
do {
|
||||
bio_poll(rq->bio, NULL, 0);
|
||||
cond_resched();
|
||||
} while (!completion_done(wait));
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_execute_rq - insert a request into queue for execution
|
||||
* @bd_disk: matching gendisk
|
||||
* @rq: request to insert
|
||||
* @at_head: insert request at head or tail of queue
|
||||
*
|
||||
* Description:
|
||||
* Insert a fully prepared request at the back of the I/O scheduler queue
|
||||
* for execution and wait for completion.
|
||||
* Return: The blk_status_t result provided to blk_mq_end_request().
|
||||
*/
|
||||
blk_status_t blk_execute_rq(struct gendisk *bd_disk, struct request *rq, int at_head)
|
||||
{
|
||||
DECLARE_COMPLETION_ONSTACK(wait);
|
||||
unsigned long hang_check;
|
||||
|
||||
rq->end_io_data = &wait;
|
||||
blk_execute_rq_nowait(bd_disk, rq, at_head, blk_end_sync_rq);
|
||||
|
||||
/* Prevent hang_check timer from firing at us during very long I/O */
|
||||
hang_check = sysctl_hung_task_timeout_secs;
|
||||
|
||||
if (blk_rq_is_poll(rq))
|
||||
blk_rq_poll_completion(rq, &wait);
|
||||
else if (hang_check)
|
||||
while (!wait_for_completion_io_timeout(&wait, hang_check * (HZ/2)));
|
||||
else
|
||||
wait_for_completion_io(&wait);
|
||||
|
||||
return (blk_status_t)(uintptr_t)rq->end_io_data;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_execute_rq);
|
@ -69,6 +69,7 @@
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/part_stat.h>
|
||||
|
||||
#include "blk.h"
|
||||
#include "blk-mq.h"
|
||||
@ -95,6 +96,12 @@ enum {
|
||||
static void blk_kick_flush(struct request_queue *q,
|
||||
struct blk_flush_queue *fq, unsigned int flags);
|
||||
|
||||
static inline struct blk_flush_queue *
|
||||
blk_get_flush_queue(struct request_queue *q, struct blk_mq_ctx *ctx)
|
||||
{
|
||||
return blk_mq_map_queue(q, REQ_OP_FLUSH, ctx)->fq;
|
||||
}
|
||||
|
||||
static unsigned int blk_flush_policy(unsigned long fflags, struct request *rq)
|
||||
{
|
||||
unsigned int policy = 0;
|
||||
@ -138,7 +145,7 @@ static void blk_flush_queue_rq(struct request *rq, bool add_front)
|
||||
|
||||
static void blk_account_io_flush(struct request *rq)
|
||||
{
|
||||
struct block_device *part = rq->rq_disk->part0;
|
||||
struct block_device *part = rq->q->disk->part0;
|
||||
|
||||
part_stat_lock();
|
||||
part_stat_inc(part, ios[STAT_FLUSH]);
|
||||
@ -222,7 +229,7 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
|
||||
/* release the tag's ownership to the req cloned from */
|
||||
spin_lock_irqsave(&fq->mq_flush_lock, flags);
|
||||
|
||||
if (!refcount_dec_and_test(&flush_rq->ref)) {
|
||||
if (!req_ref_put_and_test(flush_rq)) {
|
||||
fq->rq_status = error;
|
||||
spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
|
||||
return;
|
||||
@ -235,8 +242,10 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
|
||||
* avoiding use-after-free.
|
||||
*/
|
||||
WRITE_ONCE(flush_rq->state, MQ_RQ_IDLE);
|
||||
if (fq->rq_status != BLK_STS_OK)
|
||||
if (fq->rq_status != BLK_STS_OK) {
|
||||
error = fq->rq_status;
|
||||
fq->rq_status = BLK_STS_OK;
|
||||
}
|
||||
|
||||
if (!q->elevator) {
|
||||
flush_rq->tag = BLK_MQ_NO_TAG;
|
||||
@ -332,7 +341,6 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
|
||||
flush_rq->cmd_flags = REQ_OP_FLUSH | REQ_PREFLUSH;
|
||||
flush_rq->cmd_flags |= (flags & REQ_DRV) | (flags & REQ_FAILFAST_MASK);
|
||||
flush_rq->rq_flags |= RQF_FLUSH_SEQ;
|
||||
flush_rq->rq_disk = first_rq->rq_disk;
|
||||
flush_rq->end_io = flush_end_io;
|
||||
/*
|
||||
* Order WRITE ->end_io and WRITE rq->ref, and its pair is the one
|
||||
@ -341,7 +349,7 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
|
||||
* and READ flush_rq->end_io
|
||||
*/
|
||||
smp_wmb();
|
||||
refcount_set(&flush_rq->ref, 1);
|
||||
req_ref_set(flush_rq, 1);
|
||||
|
||||
blk_flush_queue_rq(flush_rq, false);
|
||||
}
|
||||
|
@ -411,7 +411,7 @@ void blk_integrity_register(struct gendisk *disk, struct blk_integrity *template
|
||||
#ifdef CONFIG_BLK_INLINE_ENCRYPTION
|
||||
if (disk->queue->crypto_profile) {
|
||||
pr_warn("blk-integrity: Integrity and hardware inline encryption are not supported together. Disabling hardware inline encryption.\n");
|
||||
blk_crypto_unregister(disk->queue);
|
||||
disk->queue->crypto_profile = NULL;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
348
block/blk-ioc.c
348
block/blk-ioc.c
@ -8,22 +8,25 @@
|
||||
#include <linux/bio.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/sched/task.h>
|
||||
|
||||
#include "blk.h"
|
||||
#include "blk-mq-sched.h"
|
||||
|
||||
/*
|
||||
* For io context allocations
|
||||
*/
|
||||
static struct kmem_cache *iocontext_cachep;
|
||||
|
||||
#ifdef CONFIG_BLK_ICQ
|
||||
/**
|
||||
* get_io_context - increment reference count to io_context
|
||||
* @ioc: io_context to get
|
||||
*
|
||||
* Increment reference count to @ioc.
|
||||
*/
|
||||
void get_io_context(struct io_context *ioc)
|
||||
static void get_io_context(struct io_context *ioc)
|
||||
{
|
||||
BUG_ON(atomic_long_read(&ioc->refcount) <= 0);
|
||||
atomic_long_inc(&ioc->refcount);
|
||||
@ -53,6 +56,16 @@ static void ioc_exit_icq(struct io_cq *icq)
|
||||
icq->flags |= ICQ_EXITED;
|
||||
}
|
||||
|
||||
static void ioc_exit_icqs(struct io_context *ioc)
|
||||
{
|
||||
struct io_cq *icq;
|
||||
|
||||
spin_lock_irq(&ioc->lock);
|
||||
hlist_for_each_entry(icq, &ioc->icq_list, ioc_node)
|
||||
ioc_exit_icq(icq);
|
||||
spin_unlock_irq(&ioc->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Release an icq. Called with ioc locked for blk-mq, and with both ioc
|
||||
* and queue locked for legacy.
|
||||
@ -132,102 +145,22 @@ static void ioc_release_fn(struct work_struct *work)
|
||||
kmem_cache_free(iocontext_cachep, ioc);
|
||||
}
|
||||
|
||||
/**
|
||||
* put_io_context - put a reference of io_context
|
||||
* @ioc: io_context to put
|
||||
*
|
||||
* Decrement reference count of @ioc and release it if the count reaches
|
||||
* zero.
|
||||
/*
|
||||
* Releasing icqs requires reverse order double locking and we may already be
|
||||
* holding a queue_lock. Do it asynchronously from a workqueue.
|
||||
*/
|
||||
void put_io_context(struct io_context *ioc)
|
||||
{
|
||||
unsigned long flags;
|
||||
bool free_ioc = false;
|
||||
|
||||
if (ioc == NULL)
|
||||
return;
|
||||
|
||||
BUG_ON(atomic_long_read(&ioc->refcount) <= 0);
|
||||
|
||||
/*
|
||||
* Releasing ioc requires reverse order double locking and we may
|
||||
* already be holding a queue_lock. Do it asynchronously from wq.
|
||||
*/
|
||||
if (atomic_long_dec_and_test(&ioc->refcount)) {
|
||||
spin_lock_irqsave(&ioc->lock, flags);
|
||||
if (!hlist_empty(&ioc->icq_list))
|
||||
queue_work(system_power_efficient_wq,
|
||||
&ioc->release_work);
|
||||
else
|
||||
free_ioc = true;
|
||||
spin_unlock_irqrestore(&ioc->lock, flags);
|
||||
}
|
||||
|
||||
if (free_ioc)
|
||||
kmem_cache_free(iocontext_cachep, ioc);
|
||||
}
|
||||
|
||||
/**
|
||||
* put_io_context_active - put active reference on ioc
|
||||
* @ioc: ioc of interest
|
||||
*
|
||||
* Undo get_io_context_active(). If active reference reaches zero after
|
||||
* put, @ioc can never issue further IOs and ioscheds are notified.
|
||||
*/
|
||||
void put_io_context_active(struct io_context *ioc)
|
||||
{
|
||||
struct io_cq *icq;
|
||||
|
||||
if (!atomic_dec_and_test(&ioc->active_ref)) {
|
||||
put_io_context(ioc);
|
||||
return;
|
||||
}
|
||||
|
||||
spin_lock_irq(&ioc->lock);
|
||||
hlist_for_each_entry(icq, &ioc->icq_list, ioc_node) {
|
||||
if (icq->flags & ICQ_EXITED)
|
||||
continue;
|
||||
|
||||
ioc_exit_icq(icq);
|
||||
}
|
||||
spin_unlock_irq(&ioc->lock);
|
||||
|
||||
put_io_context(ioc);
|
||||
}
|
||||
|
||||
/* Called by the exiting task */
|
||||
void exit_io_context(struct task_struct *task)
|
||||
{
|
||||
struct io_context *ioc;
|
||||
|
||||
task_lock(task);
|
||||
ioc = task->io_context;
|
||||
task->io_context = NULL;
|
||||
task_unlock(task);
|
||||
|
||||
atomic_dec(&ioc->nr_tasks);
|
||||
put_io_context_active(ioc);
|
||||
}
|
||||
|
||||
static void __ioc_clear_queue(struct list_head *icq_list)
|
||||
static bool ioc_delay_free(struct io_context *ioc)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
rcu_read_lock();
|
||||
while (!list_empty(icq_list)) {
|
||||
struct io_cq *icq = list_entry(icq_list->next,
|
||||
struct io_cq, q_node);
|
||||
struct io_context *ioc = icq->ioc;
|
||||
|
||||
spin_lock_irqsave(&ioc->lock, flags);
|
||||
if (icq->flags & ICQ_DESTROYED) {
|
||||
spin_unlock_irqrestore(&ioc->lock, flags);
|
||||
continue;
|
||||
}
|
||||
ioc_destroy_icq(icq);
|
||||
spin_lock_irqsave(&ioc->lock, flags);
|
||||
if (!hlist_empty(&ioc->icq_list)) {
|
||||
queue_work(system_power_efficient_wq, &ioc->release_work);
|
||||
spin_unlock_irqrestore(&ioc->lock, flags);
|
||||
return true;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
spin_unlock_irqrestore(&ioc->lock, flags);
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -244,93 +177,156 @@ void ioc_clear_queue(struct request_queue *q)
|
||||
list_splice_init(&q->icq_list, &icq_list);
|
||||
spin_unlock_irq(&q->queue_lock);
|
||||
|
||||
__ioc_clear_queue(&icq_list);
|
||||
}
|
||||
rcu_read_lock();
|
||||
while (!list_empty(&icq_list)) {
|
||||
struct io_cq *icq =
|
||||
list_entry(icq_list.next, struct io_cq, q_node);
|
||||
|
||||
int create_task_io_context(struct task_struct *task, gfp_t gfp_flags, int node)
|
||||
spin_lock_irq(&icq->ioc->lock);
|
||||
if (!(icq->flags & ICQ_DESTROYED))
|
||||
ioc_destroy_icq(icq);
|
||||
spin_unlock_irq(&icq->ioc->lock);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
#else /* CONFIG_BLK_ICQ */
|
||||
static inline void ioc_exit_icqs(struct io_context *ioc)
|
||||
{
|
||||
}
|
||||
static inline bool ioc_delay_free(struct io_context *ioc)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif /* CONFIG_BLK_ICQ */
|
||||
|
||||
/**
|
||||
* put_io_context - put a reference of io_context
|
||||
* @ioc: io_context to put
|
||||
*
|
||||
* Decrement reference count of @ioc and release it if the count reaches
|
||||
* zero.
|
||||
*/
|
||||
void put_io_context(struct io_context *ioc)
|
||||
{
|
||||
BUG_ON(atomic_long_read(&ioc->refcount) <= 0);
|
||||
if (atomic_long_dec_and_test(&ioc->refcount) && !ioc_delay_free(ioc))
|
||||
kmem_cache_free(iocontext_cachep, ioc);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(put_io_context);
|
||||
|
||||
/* Called by the exiting task */
|
||||
void exit_io_context(struct task_struct *task)
|
||||
{
|
||||
struct io_context *ioc;
|
||||
|
||||
task_lock(task);
|
||||
ioc = task->io_context;
|
||||
task->io_context = NULL;
|
||||
task_unlock(task);
|
||||
|
||||
if (atomic_dec_and_test(&ioc->active_ref)) {
|
||||
ioc_exit_icqs(ioc);
|
||||
put_io_context(ioc);
|
||||
}
|
||||
}
|
||||
|
||||
static struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
|
||||
{
|
||||
struct io_context *ioc;
|
||||
int ret;
|
||||
|
||||
ioc = kmem_cache_alloc_node(iocontext_cachep, gfp_flags | __GFP_ZERO,
|
||||
node);
|
||||
if (unlikely(!ioc))
|
||||
return -ENOMEM;
|
||||
return NULL;
|
||||
|
||||
/* initialize */
|
||||
atomic_long_set(&ioc->refcount, 1);
|
||||
atomic_set(&ioc->nr_tasks, 1);
|
||||
atomic_set(&ioc->active_ref, 1);
|
||||
#ifdef CONFIG_BLK_ICQ
|
||||
spin_lock_init(&ioc->lock);
|
||||
INIT_RADIX_TREE(&ioc->icq_tree, GFP_ATOMIC);
|
||||
INIT_HLIST_HEAD(&ioc->icq_list);
|
||||
INIT_WORK(&ioc->release_work, ioc_release_fn);
|
||||
#endif
|
||||
return ioc;
|
||||
}
|
||||
|
||||
int set_task_ioprio(struct task_struct *task, int ioprio)
|
||||
{
|
||||
int err;
|
||||
const struct cred *cred = current_cred(), *tcred;
|
||||
|
||||
rcu_read_lock();
|
||||
tcred = __task_cred(task);
|
||||
if (!uid_eq(tcred->uid, cred->euid) &&
|
||||
!uid_eq(tcred->uid, cred->uid) && !capable(CAP_SYS_NICE)) {
|
||||
rcu_read_unlock();
|
||||
return -EPERM;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
err = security_task_setioprio(task, ioprio);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
task_lock(task);
|
||||
if (unlikely(!task->io_context)) {
|
||||
struct io_context *ioc;
|
||||
|
||||
task_unlock(task);
|
||||
|
||||
ioc = alloc_io_context(GFP_ATOMIC, NUMA_NO_NODE);
|
||||
if (!ioc)
|
||||
return -ENOMEM;
|
||||
|
||||
task_lock(task);
|
||||
if (task->flags & PF_EXITING) {
|
||||
err = -ESRCH;
|
||||
kmem_cache_free(iocontext_cachep, ioc);
|
||||
goto out;
|
||||
}
|
||||
if (task->io_context)
|
||||
kmem_cache_free(iocontext_cachep, ioc);
|
||||
else
|
||||
task->io_context = ioc;
|
||||
}
|
||||
task->io_context->ioprio = ioprio;
|
||||
out:
|
||||
task_unlock(task);
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(set_task_ioprio);
|
||||
|
||||
int __copy_io(unsigned long clone_flags, struct task_struct *tsk)
|
||||
{
|
||||
struct io_context *ioc = current->io_context;
|
||||
|
||||
/*
|
||||
* Try to install. ioc shouldn't be installed if someone else
|
||||
* already did or @task, which isn't %current, is exiting. Note
|
||||
* that we need to allow ioc creation on exiting %current as exit
|
||||
* path may issue IOs from e.g. exit_files(). The exit path is
|
||||
* responsible for not issuing IO after exit_io_context().
|
||||
* Share io context with parent, if CLONE_IO is set
|
||||
*/
|
||||
task_lock(task);
|
||||
if (!task->io_context &&
|
||||
(task == current || !(task->flags & PF_EXITING)))
|
||||
task->io_context = ioc;
|
||||
else
|
||||
kmem_cache_free(iocontext_cachep, ioc);
|
||||
if (clone_flags & CLONE_IO) {
|
||||
atomic_inc(&ioc->active_ref);
|
||||
tsk->io_context = ioc;
|
||||
} else if (ioprio_valid(ioc->ioprio)) {
|
||||
tsk->io_context = alloc_io_context(GFP_KERNEL, NUMA_NO_NODE);
|
||||
if (!tsk->io_context)
|
||||
return -ENOMEM;
|
||||
tsk->io_context->ioprio = ioc->ioprio;
|
||||
}
|
||||
|
||||
ret = task->io_context ? 0 : -EBUSY;
|
||||
|
||||
task_unlock(task);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* get_task_io_context - get io_context of a task
|
||||
* @task: task of interest
|
||||
* @gfp_flags: allocation flags, used if allocation is necessary
|
||||
* @node: allocation node, used if allocation is necessary
|
||||
*
|
||||
* Return io_context of @task. If it doesn't exist, it is created with
|
||||
* @gfp_flags and @node. The returned io_context has its reference count
|
||||
* incremented.
|
||||
*
|
||||
* This function always goes through task_lock() and it's better to use
|
||||
* %current->io_context + get_io_context() for %current.
|
||||
*/
|
||||
struct io_context *get_task_io_context(struct task_struct *task,
|
||||
gfp_t gfp_flags, int node)
|
||||
{
|
||||
struct io_context *ioc;
|
||||
|
||||
might_sleep_if(gfpflags_allow_blocking(gfp_flags));
|
||||
|
||||
do {
|
||||
task_lock(task);
|
||||
ioc = task->io_context;
|
||||
if (likely(ioc)) {
|
||||
get_io_context(ioc);
|
||||
task_unlock(task);
|
||||
return ioc;
|
||||
}
|
||||
task_unlock(task);
|
||||
} while (!create_task_io_context(task, gfp_flags, node));
|
||||
|
||||
return NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BLK_ICQ
|
||||
/**
|
||||
* ioc_lookup_icq - lookup io_cq from ioc
|
||||
* @ioc: the associated io_context
|
||||
* @q: the associated request_queue
|
||||
*
|
||||
* Look up io_cq associated with @ioc - @q pair from @ioc. Must be called
|
||||
* with @q->queue_lock held.
|
||||
*/
|
||||
struct io_cq *ioc_lookup_icq(struct io_context *ioc, struct request_queue *q)
|
||||
struct io_cq *ioc_lookup_icq(struct request_queue *q)
|
||||
{
|
||||
struct io_context *ioc = current->io_context;
|
||||
struct io_cq *icq;
|
||||
|
||||
lockdep_assert_held(&q->queue_lock);
|
||||
@ -359,9 +355,7 @@ EXPORT_SYMBOL(ioc_lookup_icq);
|
||||
|
||||
/**
|
||||
* ioc_create_icq - create and link io_cq
|
||||
* @ioc: io_context of interest
|
||||
* @q: request_queue of interest
|
||||
* @gfp_mask: allocation mask
|
||||
*
|
||||
* Make sure io_cq linking @ioc and @q exists. If icq doesn't exist, they
|
||||
* will be created using @gfp_mask.
|
||||
@ -369,19 +363,19 @@ EXPORT_SYMBOL(ioc_lookup_icq);
|
||||
* The caller is responsible for ensuring @ioc won't go away and @q is
|
||||
* alive and will stay alive until this function returns.
|
||||
*/
|
||||
struct io_cq *ioc_create_icq(struct io_context *ioc, struct request_queue *q,
|
||||
gfp_t gfp_mask)
|
||||
static struct io_cq *ioc_create_icq(struct request_queue *q)
|
||||
{
|
||||
struct io_context *ioc = current->io_context;
|
||||
struct elevator_type *et = q->elevator->type;
|
||||
struct io_cq *icq;
|
||||
|
||||
/* allocate stuff */
|
||||
icq = kmem_cache_alloc_node(et->icq_cache, gfp_mask | __GFP_ZERO,
|
||||
icq = kmem_cache_alloc_node(et->icq_cache, GFP_ATOMIC | __GFP_ZERO,
|
||||
q->node);
|
||||
if (!icq)
|
||||
return NULL;
|
||||
|
||||
if (radix_tree_maybe_preload(gfp_mask) < 0) {
|
||||
if (radix_tree_maybe_preload(GFP_ATOMIC) < 0) {
|
||||
kmem_cache_free(et->icq_cache, icq);
|
||||
return NULL;
|
||||
}
|
||||
@ -402,7 +396,7 @@ struct io_cq *ioc_create_icq(struct io_context *ioc, struct request_queue *q,
|
||||
et->ops.init_icq(icq);
|
||||
} else {
|
||||
kmem_cache_free(et->icq_cache, icq);
|
||||
icq = ioc_lookup_icq(ioc, q);
|
||||
icq = ioc_lookup_icq(q);
|
||||
if (!icq)
|
||||
printk(KERN_ERR "cfq: icq link failed!\n");
|
||||
}
|
||||
@ -413,6 +407,46 @@ struct io_cq *ioc_create_icq(struct io_context *ioc, struct request_queue *q,
|
||||
return icq;
|
||||
}
|
||||
|
||||
struct io_cq *ioc_find_get_icq(struct request_queue *q)
|
||||
{
|
||||
struct io_context *ioc = current->io_context;
|
||||
struct io_cq *icq = NULL;
|
||||
|
||||
if (unlikely(!ioc)) {
|
||||
ioc = alloc_io_context(GFP_ATOMIC, q->node);
|
||||
if (!ioc)
|
||||
return NULL;
|
||||
|
||||
task_lock(current);
|
||||
if (current->io_context) {
|
||||
kmem_cache_free(iocontext_cachep, ioc);
|
||||
ioc = current->io_context;
|
||||
} else {
|
||||
current->io_context = ioc;
|
||||
}
|
||||
|
||||
get_io_context(ioc);
|
||||
task_unlock(current);
|
||||
} else {
|
||||
get_io_context(ioc);
|
||||
|
||||
spin_lock_irq(&q->queue_lock);
|
||||
icq = ioc_lookup_icq(q);
|
||||
spin_unlock_irq(&q->queue_lock);
|
||||
}
|
||||
|
||||
if (!icq) {
|
||||
icq = ioc_create_icq(q);
|
||||
if (!icq) {
|
||||
put_io_context(ioc);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
return icq;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ioc_find_get_icq);
|
||||
#endif /* CONFIG_BLK_ICQ */
|
||||
|
||||
static int __init blk_ioc_init(void)
|
||||
{
|
||||
iocontext_cachep = kmem_cache_create("blkdev_ioc",
|
||||
|
@ -62,6 +62,7 @@ struct ioprio_blkg {
|
||||
struct ioprio_blkcg {
|
||||
struct blkcg_policy_data cpd;
|
||||
enum prio_policy prio_policy;
|
||||
bool prio_set;
|
||||
};
|
||||
|
||||
static inline struct ioprio_blkg *pd_to_ioprio(struct blkg_policy_data *pd)
|
||||
@ -112,7 +113,7 @@ static ssize_t ioprio_set_prio_policy(struct kernfs_open_file *of, char *buf,
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
blkcg->prio_policy = ret;
|
||||
|
||||
blkcg->prio_set = true;
|
||||
return nbytes;
|
||||
}
|
||||
|
||||
@ -190,6 +191,10 @@ static void blkcg_ioprio_track(struct rq_qos *rqos, struct request *rq,
|
||||
struct bio *bio)
|
||||
{
|
||||
struct ioprio_blkcg *blkcg = ioprio_blkcg_from_bio(bio);
|
||||
u16 prio;
|
||||
|
||||
if (!blkcg->prio_set)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Except for IOPRIO_CLASS_NONE, higher I/O priority numbers
|
||||
@ -199,8 +204,10 @@ static void blkcg_ioprio_track(struct rq_qos *rqos, struct request *rq,
|
||||
* bio I/O priority is not modified. If the bio I/O priority equals
|
||||
* IOPRIO_CLASS_NONE, the cgroup I/O priority is assigned to the bio.
|
||||
*/
|
||||
bio->bi_ioprio = max_t(u16, bio->bi_ioprio,
|
||||
IOPRIO_PRIO_VALUE(blkcg->prio_policy, 0));
|
||||
prio = max_t(u16, bio->bi_ioprio,
|
||||
IOPRIO_PRIO_VALUE(blkcg->prio_policy, 0));
|
||||
if (prio > bio->bi_ioprio)
|
||||
bio->bi_ioprio = prio;
|
||||
}
|
||||
|
||||
static void blkcg_ioprio_exit(struct rq_qos *rqos)
|
||||
|
@ -8,10 +8,12 @@
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blk-integrity.h>
|
||||
#include <linux/scatterlist.h>
|
||||
#include <linux/part_stat.h>
|
||||
|
||||
#include <trace/events/block.h>
|
||||
|
||||
#include "blk.h"
|
||||
#include "blk-mq-sched.h"
|
||||
#include "blk-rq-qos.h"
|
||||
#include "blk-throttle.h"
|
||||
|
||||
@ -775,8 +777,7 @@ static struct request *attempt_merge(struct request_queue *q,
|
||||
if (req_op(req) != req_op(next))
|
||||
return NULL;
|
||||
|
||||
if (rq_data_dir(req) != rq_data_dir(next)
|
||||
|| req->rq_disk != next->rq_disk)
|
||||
if (rq_data_dir(req) != rq_data_dir(next))
|
||||
return NULL;
|
||||
|
||||
if (req_op(req) == REQ_OP_WRITE_SAME &&
|
||||
@ -903,10 +904,6 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
|
||||
if (bio_data_dir(bio) != rq_data_dir(rq))
|
||||
return false;
|
||||
|
||||
/* must be same device */
|
||||
if (rq->rq_disk != bio->bi_bdev->bd_disk)
|
||||
return false;
|
||||
|
||||
/* only merge integrity protected bio into ditto rq */
|
||||
if (blk_integrity_merge_bio(rq->q, rq, bio) == false)
|
||||
return false;
|
||||
@ -1067,7 +1064,6 @@ static enum bio_merge_status blk_attempt_bio_merge(struct request_queue *q,
|
||||
* @q: request_queue new bio is being queued at
|
||||
* @bio: new bio being queued
|
||||
* @nr_segs: number of segments in @bio
|
||||
* @same_queue_rq: output value, will be true if there's an existing request
|
||||
* from the passed in @q already in the plug list
|
||||
*
|
||||
* Determine whether @bio being queued on @q can be merged with the previous
|
||||
@ -1084,7 +1080,7 @@ static enum bio_merge_status blk_attempt_bio_merge(struct request_queue *q,
|
||||
* Caller must ensure !blk_queue_nomerges(q) beforehand.
|
||||
*/
|
||||
bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
|
||||
unsigned int nr_segs, bool *same_queue_rq)
|
||||
unsigned int nr_segs)
|
||||
{
|
||||
struct blk_plug *plug;
|
||||
struct request *rq;
|
||||
@ -1096,12 +1092,6 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
|
||||
/* check the previously added entry for a quick merge attempt */
|
||||
rq = rq_list_peek(&plug->mq_list);
|
||||
if (rq->q == q) {
|
||||
/*
|
||||
* Only blk-mq multiple hardware queues case checks the rq in
|
||||
* the same queue, there should be only one such rq in a queue
|
||||
*/
|
||||
*same_queue_rq = true;
|
||||
|
||||
if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) ==
|
||||
BIO_MERGE_OK)
|
||||
return true;
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include "blk.h"
|
||||
#include "blk-mq.h"
|
||||
#include "blk-mq-debugfs.h"
|
||||
#include "blk-mq-sched.h"
|
||||
#include "blk-mq-tag.h"
|
||||
#include "blk-rq-qos.h"
|
||||
|
||||
@ -29,6 +30,9 @@ static int queue_poll_stat_show(void *data, struct seq_file *m)
|
||||
struct request_queue *q = data;
|
||||
int bucket;
|
||||
|
||||
if (!q->poll_stat)
|
||||
return 0;
|
||||
|
||||
for (bucket = 0; bucket < (BLK_MQ_POLL_STATS_BKTS / 2); bucket++) {
|
||||
seq_printf(m, "read (%d Bytes): ", 1 << (9 + bucket));
|
||||
print_stat(m, &q->poll_stat[2 * bucket]);
|
||||
@ -122,7 +126,6 @@ static const char *const blk_queue_flag_name[] = {
|
||||
QUEUE_FLAG_NAME(FUA),
|
||||
QUEUE_FLAG_NAME(DAX),
|
||||
QUEUE_FLAG_NAME(STATS),
|
||||
QUEUE_FLAG_NAME(POLL_STATS),
|
||||
QUEUE_FLAG_NAME(REGISTERED),
|
||||
QUEUE_FLAG_NAME(QUIESCED),
|
||||
QUEUE_FLAG_NAME(PCI_P2PDMA),
|
||||
|
@ -18,32 +18,6 @@
|
||||
#include "blk-mq-tag.h"
|
||||
#include "blk-wbt.h"
|
||||
|
||||
void blk_mq_sched_assign_ioc(struct request *rq)
|
||||
{
|
||||
struct request_queue *q = rq->q;
|
||||
struct io_context *ioc;
|
||||
struct io_cq *icq;
|
||||
|
||||
/*
|
||||
* May not have an IO context if it's a passthrough request
|
||||
*/
|
||||
ioc = current->io_context;
|
||||
if (!ioc)
|
||||
return;
|
||||
|
||||
spin_lock_irq(&q->queue_lock);
|
||||
icq = ioc_lookup_icq(ioc, q);
|
||||
spin_unlock_irq(&q->queue_lock);
|
||||
|
||||
if (!icq) {
|
||||
icq = ioc_create_icq(ioc, q, GFP_ATOMIC);
|
||||
if (!icq)
|
||||
return;
|
||||
}
|
||||
get_io_context(icq->ioc);
|
||||
rq->elv.icq = icq;
|
||||
}
|
||||
|
||||
/*
|
||||
* Mark a hardware queue as needing a restart. For shared queues, maintain
|
||||
* a count of how many hardware queues are marked for restart.
|
||||
@ -501,7 +475,8 @@ void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx,
|
||||
* us one extra enqueue & dequeue to sw queue.
|
||||
*/
|
||||
if (!hctx->dispatch_busy && !run_queue_async) {
|
||||
blk_mq_try_issue_list_directly(hctx, list);
|
||||
blk_mq_run_dispatch_ops(hctx->queue,
|
||||
blk_mq_try_issue_list_directly(hctx, list));
|
||||
if (list_empty(list))
|
||||
goto out;
|
||||
}
|
||||
|
@ -8,8 +8,6 @@
|
||||
|
||||
#define MAX_SCHED_RQ (16 * BLKDEV_DEFAULT_RQ)
|
||||
|
||||
void blk_mq_sched_assign_ioc(struct request *rq);
|
||||
|
||||
bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
|
||||
unsigned int nr_segs, struct request **merged_request);
|
||||
bool blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
|
||||
|
@ -36,8 +36,6 @@ static void blk_mq_hw_sysfs_release(struct kobject *kobj)
|
||||
struct blk_mq_hw_ctx *hctx = container_of(kobj, struct blk_mq_hw_ctx,
|
||||
kobj);
|
||||
|
||||
if (hctx->flags & BLK_MQ_F_BLOCKING)
|
||||
cleanup_srcu_struct(hctx->srcu);
|
||||
blk_free_flush_queue(hctx->fq);
|
||||
sbitmap_free(&hctx->ctx_map);
|
||||
free_cpumask_var(hctx->cpumask);
|
||||
|
@ -215,7 +215,8 @@ void blk_mq_put_tags(struct blk_mq_tags *tags, int *tag_array, int nr_tags)
|
||||
|
||||
struct bt_iter_data {
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
busy_iter_fn *fn;
|
||||
struct request_queue *q;
|
||||
busy_tag_iter_fn *fn;
|
||||
void *data;
|
||||
bool reserved;
|
||||
};
|
||||
@ -228,7 +229,7 @@ static struct request *blk_mq_find_and_get_req(struct blk_mq_tags *tags,
|
||||
|
||||
spin_lock_irqsave(&tags->lock, flags);
|
||||
rq = tags->rqs[bitnr];
|
||||
if (!rq || rq->tag != bitnr || !refcount_inc_not_zero(&rq->ref))
|
||||
if (!rq || rq->tag != bitnr || !req_ref_inc_not_zero(rq))
|
||||
rq = NULL;
|
||||
spin_unlock_irqrestore(&tags->lock, flags);
|
||||
return rq;
|
||||
@ -238,11 +239,18 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
|
||||
{
|
||||
struct bt_iter_data *iter_data = data;
|
||||
struct blk_mq_hw_ctx *hctx = iter_data->hctx;
|
||||
struct blk_mq_tags *tags = hctx->tags;
|
||||
struct request_queue *q = iter_data->q;
|
||||
struct blk_mq_tag_set *set = q->tag_set;
|
||||
bool reserved = iter_data->reserved;
|
||||
struct blk_mq_tags *tags;
|
||||
struct request *rq;
|
||||
bool ret = true;
|
||||
|
||||
if (blk_mq_is_shared_tags(set->flags))
|
||||
tags = set->shared_tags;
|
||||
else
|
||||
tags = hctx->tags;
|
||||
|
||||
if (!reserved)
|
||||
bitnr += tags->nr_reserved_tags;
|
||||
/*
|
||||
@ -253,8 +261,8 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
|
||||
if (!rq)
|
||||
return true;
|
||||
|
||||
if (rq->q == hctx->queue && rq->mq_hctx == hctx)
|
||||
ret = iter_data->fn(hctx, rq, iter_data->data, reserved);
|
||||
if (rq->q == q && (!hctx || rq->mq_hctx == hctx))
|
||||
ret = iter_data->fn(rq, iter_data->data, reserved);
|
||||
blk_mq_put_rq_ref(rq);
|
||||
return ret;
|
||||
}
|
||||
@ -262,6 +270,7 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
|
||||
/**
|
||||
* bt_for_each - iterate over the requests associated with a hardware queue
|
||||
* @hctx: Hardware queue to examine.
|
||||
* @q: Request queue to examine.
|
||||
* @bt: sbitmap to examine. This is either the breserved_tags member
|
||||
* or the bitmap_tags member of struct blk_mq_tags.
|
||||
* @fn: Pointer to the function that will be called for each request
|
||||
@ -273,14 +282,16 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
|
||||
* @reserved: Indicates whether @bt is the breserved_tags member or the
|
||||
* bitmap_tags member of struct blk_mq_tags.
|
||||
*/
|
||||
static void bt_for_each(struct blk_mq_hw_ctx *hctx, struct sbitmap_queue *bt,
|
||||
busy_iter_fn *fn, void *data, bool reserved)
|
||||
static void bt_for_each(struct blk_mq_hw_ctx *hctx, struct request_queue *q,
|
||||
struct sbitmap_queue *bt, busy_tag_iter_fn *fn,
|
||||
void *data, bool reserved)
|
||||
{
|
||||
struct bt_iter_data iter_data = {
|
||||
.hctx = hctx,
|
||||
.fn = fn,
|
||||
.data = data,
|
||||
.reserved = reserved,
|
||||
.q = q,
|
||||
};
|
||||
|
||||
sbitmap_for_each_set(&bt->sb, bt_iter, &iter_data);
|
||||
@ -457,12 +468,9 @@ EXPORT_SYMBOL(blk_mq_tagset_wait_completed_request);
|
||||
* called for all requests on all queues that share that tag set and not only
|
||||
* for requests associated with @q.
|
||||
*/
|
||||
void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
|
||||
void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_tag_iter_fn *fn,
|
||||
void *priv)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* __blk_mq_update_nr_hw_queues() updates nr_hw_queues and queue_hw_ctx
|
||||
* while the queue is frozen. So we can use q_usage_counter to avoid
|
||||
@ -471,19 +479,34 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
|
||||
if (!percpu_ref_tryget(&q->q_usage_counter))
|
||||
return;
|
||||
|
||||
queue_for_each_hw_ctx(q, hctx, i) {
|
||||
struct blk_mq_tags *tags = hctx->tags;
|
||||
|
||||
/*
|
||||
* If no software queues are currently mapped to this
|
||||
* hardware queue, there's nothing to check
|
||||
*/
|
||||
if (!blk_mq_hw_queue_mapped(hctx))
|
||||
continue;
|
||||
if (blk_mq_is_shared_tags(q->tag_set->flags)) {
|
||||
struct blk_mq_tags *tags = q->tag_set->shared_tags;
|
||||
struct sbitmap_queue *bresv = &tags->breserved_tags;
|
||||
struct sbitmap_queue *btags = &tags->bitmap_tags;
|
||||
|
||||
if (tags->nr_reserved_tags)
|
||||
bt_for_each(hctx, &tags->breserved_tags, fn, priv, true);
|
||||
bt_for_each(hctx, &tags->bitmap_tags, fn, priv, false);
|
||||
bt_for_each(NULL, q, bresv, fn, priv, true);
|
||||
bt_for_each(NULL, q, btags, fn, priv, false);
|
||||
} else {
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
int i;
|
||||
|
||||
queue_for_each_hw_ctx(q, hctx, i) {
|
||||
struct blk_mq_tags *tags = hctx->tags;
|
||||
struct sbitmap_queue *bresv = &tags->breserved_tags;
|
||||
struct sbitmap_queue *btags = &tags->bitmap_tags;
|
||||
|
||||
/*
|
||||
* If no software queues are currently mapped to this
|
||||
* hardware queue, there's nothing to check
|
||||
*/
|
||||
if (!blk_mq_hw_queue_mapped(hctx))
|
||||
continue;
|
||||
|
||||
if (tags->nr_reserved_tags)
|
||||
bt_for_each(hctx, q, bresv, fn, priv, true);
|
||||
bt_for_each(hctx, q, btags, fn, priv, false);
|
||||
}
|
||||
}
|
||||
blk_queue_exit(q);
|
||||
}
|
||||
|
@ -28,7 +28,7 @@ extern void blk_mq_tag_resize_shared_tags(struct blk_mq_tag_set *set,
|
||||
extern void blk_mq_tag_update_sched_shared_tags(struct request_queue *q);
|
||||
|
||||
extern void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool);
|
||||
void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
|
||||
void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_tag_iter_fn *fn,
|
||||
void *priv);
|
||||
void blk_mq_all_tag_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn,
|
||||
void *priv);
|
||||
|
990
block/blk-mq.c
990
block/blk-mq.c
File diff suppressed because it is too large
Load Diff
@ -65,9 +65,6 @@ void blk_mq_request_bypass_insert(struct request *rq, bool at_head,
|
||||
bool run_queue);
|
||||
void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
|
||||
struct list_head *list);
|
||||
|
||||
/* Used by blk_insert_cloned_request() to issue request directly */
|
||||
blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last);
|
||||
void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
|
||||
struct list_head *list);
|
||||
|
||||
@ -377,5 +374,24 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
|
||||
return __blk_mq_active_requests(hctx) < depth;
|
||||
}
|
||||
|
||||
/* run the code block in @dispatch_ops with rcu/srcu read lock held */
|
||||
#define __blk_mq_run_dispatch_ops(q, check_sleep, dispatch_ops) \
|
||||
do { \
|
||||
if (!blk_queue_has_srcu(q)) { \
|
||||
rcu_read_lock(); \
|
||||
(dispatch_ops); \
|
||||
rcu_read_unlock(); \
|
||||
} else { \
|
||||
int srcu_idx; \
|
||||
\
|
||||
might_sleep_if(check_sleep); \
|
||||
srcu_idx = srcu_read_lock((q)->srcu); \
|
||||
(dispatch_ops); \
|
||||
srcu_read_unlock((q)->srcu, srcu_idx); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define blk_mq_run_dispatch_ops(q, dispatch_ops) \
|
||||
__blk_mq_run_dispatch_ops(q, true, dispatch_ops) \
|
||||
|
||||
#endif
|
||||
|
@ -15,7 +15,7 @@
|
||||
struct blk_queue_stats {
|
||||
struct list_head callbacks;
|
||||
spinlock_t lock;
|
||||
bool enable_accounting;
|
||||
int accounting;
|
||||
};
|
||||
|
||||
void blk_rq_stat_init(struct blk_rq_stat *stat)
|
||||
@ -161,7 +161,7 @@ void blk_stat_remove_callback(struct request_queue *q,
|
||||
|
||||
spin_lock_irqsave(&q->stats->lock, flags);
|
||||
list_del_rcu(&cb->list);
|
||||
if (list_empty(&q->stats->callbacks) && !q->stats->enable_accounting)
|
||||
if (list_empty(&q->stats->callbacks) && !q->stats->accounting)
|
||||
blk_queue_flag_clear(QUEUE_FLAG_STATS, q);
|
||||
spin_unlock_irqrestore(&q->stats->lock, flags);
|
||||
|
||||
@ -184,13 +184,24 @@ void blk_stat_free_callback(struct blk_stat_callback *cb)
|
||||
call_rcu(&cb->rcu, blk_stat_free_callback_rcu);
|
||||
}
|
||||
|
||||
void blk_stat_disable_accounting(struct request_queue *q)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&q->stats->lock, flags);
|
||||
if (!--q->stats->accounting)
|
||||
blk_queue_flag_clear(QUEUE_FLAG_STATS, q);
|
||||
spin_unlock_irqrestore(&q->stats->lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_stat_disable_accounting);
|
||||
|
||||
void blk_stat_enable_accounting(struct request_queue *q)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&q->stats->lock, flags);
|
||||
q->stats->enable_accounting = true;
|
||||
blk_queue_flag_set(QUEUE_FLAG_STATS, q);
|
||||
if (!q->stats->accounting++)
|
||||
blk_queue_flag_set(QUEUE_FLAG_STATS, q);
|
||||
spin_unlock_irqrestore(&q->stats->lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_stat_enable_accounting);
|
||||
@ -205,7 +216,7 @@ struct blk_queue_stats *blk_alloc_queue_stats(void)
|
||||
|
||||
INIT_LIST_HEAD(&stats->callbacks);
|
||||
spin_lock_init(&stats->lock);
|
||||
stats->enable_accounting = false;
|
||||
stats->accounting = 0;
|
||||
|
||||
return stats;
|
||||
}
|
||||
@ -219,3 +230,21 @@ void blk_free_queue_stats(struct blk_queue_stats *stats)
|
||||
|
||||
kfree(stats);
|
||||
}
|
||||
|
||||
bool blk_stats_alloc_enable(struct request_queue *q)
|
||||
{
|
||||
struct blk_rq_stat *poll_stat;
|
||||
|
||||
poll_stat = kcalloc(BLK_MQ_POLL_STATS_BKTS, sizeof(*poll_stat),
|
||||
GFP_ATOMIC);
|
||||
if (!poll_stat)
|
||||
return false;
|
||||
|
||||
if (cmpxchg(&q->poll_stat, NULL, poll_stat) != NULL) {
|
||||
kfree(poll_stat);
|
||||
return true;
|
||||
}
|
||||
|
||||
blk_stat_add_callback(q, q->poll_cb);
|
||||
return false;
|
||||
}
|
||||
|
@ -64,11 +64,13 @@ struct blk_stat_callback {
|
||||
|
||||
struct blk_queue_stats *blk_alloc_queue_stats(void);
|
||||
void blk_free_queue_stats(struct blk_queue_stats *);
|
||||
bool blk_stats_alloc_enable(struct request_queue *q);
|
||||
|
||||
void blk_stat_add(struct request *rq, u64 now);
|
||||
|
||||
/* record time/size info in request but not add a callback */
|
||||
void blk_stat_enable_accounting(struct request_queue *q);
|
||||
void blk_stat_disable_accounting(struct request_queue *q);
|
||||
|
||||
/**
|
||||
* blk_stat_alloc_callback() - Allocate a block statistics callback.
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "blk.h"
|
||||
#include "blk-mq.h"
|
||||
#include "blk-mq-debugfs.h"
|
||||
#include "blk-mq-sched.h"
|
||||
#include "blk-wbt.h"
|
||||
#include "blk-throttle.h"
|
||||
|
||||
@ -734,7 +735,8 @@ static void blk_free_queue_rcu(struct rcu_head *rcu_head)
|
||||
{
|
||||
struct request_queue *q = container_of(rcu_head, struct request_queue,
|
||||
rcu_head);
|
||||
kmem_cache_free(blk_requestq_cachep, q);
|
||||
|
||||
kmem_cache_free(blk_get_queue_kmem_cache(blk_queue_has_srcu(q)), q);
|
||||
}
|
||||
|
||||
/* Unconfigure the I/O scheduler and dissociate from the cgroup controller. */
|
||||
@ -747,7 +749,7 @@ static void blk_exit_queue(struct request_queue *q)
|
||||
*/
|
||||
if (q->elevator) {
|
||||
ioc_clear_queue(q);
|
||||
__elevator_exit(q, q->elevator);
|
||||
elevator_exit(q);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -785,14 +787,15 @@ static void blk_release_queue(struct kobject *kobj)
|
||||
|
||||
might_sleep();
|
||||
|
||||
if (test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags))
|
||||
if (q->poll_stat)
|
||||
blk_stat_remove_callback(q, q->poll_cb);
|
||||
blk_stat_free_callback(q->poll_cb);
|
||||
|
||||
blk_free_queue_stats(q->stats);
|
||||
|
||||
blk_exit_queue(q);
|
||||
|
||||
blk_free_queue_stats(q->stats);
|
||||
kfree(q->poll_stat);
|
||||
|
||||
blk_queue_free_zone_bitmaps(q);
|
||||
|
||||
if (queue_is_mq(q))
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include <linux/blk-cgroup.h>
|
||||
#include "blk.h"
|
||||
#include "blk-cgroup-rwstat.h"
|
||||
#include "blk-stat.h"
|
||||
#include "blk-throttle.h"
|
||||
|
||||
/* Max dispatch from a group in 1 round */
|
||||
|
115
block/blk.h
115
block/blk.h
@ -2,15 +2,10 @@
|
||||
#ifndef BLK_INTERNAL_H
|
||||
#define BLK_INTERNAL_H
|
||||
|
||||
#include <linux/idr.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/part_stat.h>
|
||||
#include <linux/blk-crypto.h>
|
||||
#include <linux/memblock.h> /* for max_pfn/max_low_pfn */
|
||||
#include <xen/xen.h>
|
||||
#include "blk-crypto-internal.h"
|
||||
#include "blk-mq.h"
|
||||
#include "blk-mq-sched.h"
|
||||
|
||||
struct elevator_type;
|
||||
|
||||
@ -32,15 +27,10 @@ struct blk_flush_queue {
|
||||
};
|
||||
|
||||
extern struct kmem_cache *blk_requestq_cachep;
|
||||
extern struct kmem_cache *blk_requestq_srcu_cachep;
|
||||
extern struct kobj_type blk_queue_ktype;
|
||||
extern struct ida blk_queue_ida;
|
||||
|
||||
static inline struct blk_flush_queue *
|
||||
blk_get_flush_queue(struct request_queue *q, struct blk_mq_ctx *ctx)
|
||||
{
|
||||
return blk_mq_map_queue(q, REQ_OP_FLUSH, ctx)->fq;
|
||||
}
|
||||
|
||||
static inline void __blk_get_queue(struct request_queue *q)
|
||||
{
|
||||
kobject_get(&q->kobj);
|
||||
@ -250,16 +240,13 @@ static inline void blk_integrity_del(struct gendisk *disk)
|
||||
|
||||
unsigned long blk_rq_timeout(unsigned long timeout);
|
||||
void blk_add_timer(struct request *req);
|
||||
void blk_print_req_error(struct request *req, blk_status_t status);
|
||||
const char *blk_status_to_str(blk_status_t status);
|
||||
|
||||
bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
|
||||
unsigned int nr_segs, bool *same_queue_rq);
|
||||
unsigned int nr_segs);
|
||||
bool blk_bio_list_merge(struct request_queue *q, struct list_head *list,
|
||||
struct bio *bio, unsigned int nr_segs);
|
||||
|
||||
void __blk_account_io_start(struct request *req);
|
||||
void __blk_account_io_done(struct request *req, u64 now);
|
||||
|
||||
/*
|
||||
* Plug flush limits
|
||||
*/
|
||||
@ -275,19 +262,10 @@ void blk_insert_flush(struct request *rq);
|
||||
|
||||
int elevator_switch_mq(struct request_queue *q,
|
||||
struct elevator_type *new_e);
|
||||
void __elevator_exit(struct request_queue *, struct elevator_queue *);
|
||||
void elevator_exit(struct request_queue *q);
|
||||
int elv_register_queue(struct request_queue *q, bool uevent);
|
||||
void elv_unregister_queue(struct request_queue *q);
|
||||
|
||||
static inline void elevator_exit(struct request_queue *q,
|
||||
struct elevator_queue *e)
|
||||
{
|
||||
lockdep_assert_held(&q->sysfs_lock);
|
||||
|
||||
blk_mq_sched_free_rqs(q);
|
||||
__elevator_exit(q, e);
|
||||
}
|
||||
|
||||
ssize_t part_size_show(struct device *dev, struct device_attribute *attr,
|
||||
char *buf);
|
||||
ssize_t part_stat_show(struct device *dev, struct device_attribute *attr,
|
||||
@ -347,26 +325,10 @@ int blk_dev_init(void);
|
||||
*/
|
||||
static inline bool blk_do_io_stat(struct request *rq)
|
||||
{
|
||||
return (rq->rq_flags & RQF_IO_STAT) && rq->rq_disk;
|
||||
return (rq->rq_flags & RQF_IO_STAT) && rq->q->disk;
|
||||
}
|
||||
|
||||
static inline void blk_account_io_done(struct request *req, u64 now)
|
||||
{
|
||||
/*
|
||||
* Account IO completion. flush_rq isn't accounted as a
|
||||
* normal IO on queueing nor completion. Accounting the
|
||||
* containing request is enough.
|
||||
*/
|
||||
if (blk_do_io_stat(req) && req->part &&
|
||||
!(req->rq_flags & RQF_FLUSH_SEQ))
|
||||
__blk_account_io_done(req, now);
|
||||
}
|
||||
|
||||
static inline void blk_account_io_start(struct request *req)
|
||||
{
|
||||
if (blk_do_io_stat(req))
|
||||
__blk_account_io_start(req);
|
||||
}
|
||||
void update_io_ticks(struct block_device *part, unsigned long now, bool end);
|
||||
|
||||
static inline void req_set_nomerge(struct request_queue *q, struct request *req)
|
||||
{
|
||||
@ -402,13 +364,15 @@ static inline unsigned int bio_aligned_discard_max_sectors(
|
||||
/*
|
||||
* Internal io_context interface
|
||||
*/
|
||||
void get_io_context(struct io_context *ioc);
|
||||
struct io_cq *ioc_lookup_icq(struct io_context *ioc, struct request_queue *q);
|
||||
struct io_cq *ioc_create_icq(struct io_context *ioc, struct request_queue *q,
|
||||
gfp_t gfp_mask);
|
||||
struct io_cq *ioc_find_get_icq(struct request_queue *q);
|
||||
struct io_cq *ioc_lookup_icq(struct request_queue *q);
|
||||
#ifdef CONFIG_BLK_ICQ
|
||||
void ioc_clear_queue(struct request_queue *q);
|
||||
|
||||
int create_task_io_context(struct task_struct *task, gfp_t gfp_mask, int node);
|
||||
#else
|
||||
static inline void ioc_clear_queue(struct request_queue *q)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_BLK_ICQ */
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
|
||||
extern ssize_t blk_throtl_sample_time_show(struct request_queue *q, char *page);
|
||||
@ -467,7 +431,15 @@ int bio_add_hw_page(struct request_queue *q, struct bio *bio,
|
||||
struct page *page, unsigned int len, unsigned int offset,
|
||||
unsigned int max_sectors, bool *same_page);
|
||||
|
||||
struct request_queue *blk_alloc_queue(int node_id);
|
||||
static inline struct kmem_cache *blk_get_queue_kmem_cache(bool srcu)
|
||||
{
|
||||
if (srcu)
|
||||
return blk_requestq_srcu_cachep;
|
||||
return blk_requestq_cachep;
|
||||
}
|
||||
struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu);
|
||||
|
||||
int disk_scan_partitions(struct gendisk *disk, fmode_t mode);
|
||||
|
||||
int disk_alloc_events(struct gendisk *disk);
|
||||
void disk_add_events(struct gendisk *disk);
|
||||
@ -493,4 +465,45 @@ int disk_register_independent_access_ranges(struct gendisk *disk,
|
||||
struct blk_independent_access_ranges *new_iars);
|
||||
void disk_unregister_independent_access_ranges(struct gendisk *disk);
|
||||
|
||||
#ifdef CONFIG_FAIL_MAKE_REQUEST
|
||||
bool should_fail_request(struct block_device *part, unsigned int bytes);
|
||||
#else /* CONFIG_FAIL_MAKE_REQUEST */
|
||||
static inline bool should_fail_request(struct block_device *part,
|
||||
unsigned int bytes)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif /* CONFIG_FAIL_MAKE_REQUEST */
|
||||
|
||||
/*
|
||||
* Optimized request reference counting. Ideally we'd make timeouts be more
|
||||
* clever, as that's the only reason we need references at all... But until
|
||||
* this happens, this is faster than using refcount_t. Also see:
|
||||
*
|
||||
* abc54d634334 ("io_uring: switch to atomic_t for io_kiocb reference count")
|
||||
*/
|
||||
#define req_ref_zero_or_close_to_overflow(req) \
|
||||
((unsigned int) atomic_read(&(req->ref)) + 127u <= 127u)
|
||||
|
||||
static inline bool req_ref_inc_not_zero(struct request *req)
|
||||
{
|
||||
return atomic_inc_not_zero(&req->ref);
|
||||
}
|
||||
|
||||
static inline bool req_ref_put_and_test(struct request *req)
|
||||
{
|
||||
WARN_ON_ONCE(req_ref_zero_or_close_to_overflow(req));
|
||||
return atomic_dec_and_test(&req->ref);
|
||||
}
|
||||
|
||||
static inline void req_ref_set(struct request *req, int value)
|
||||
{
|
||||
atomic_set(&req->ref, value);
|
||||
}
|
||||
|
||||
static inline int req_ref_read(struct request *req)
|
||||
{
|
||||
return atomic_read(&req->ref);
|
||||
}
|
||||
|
||||
#endif /* BLK_INTERNAL_H */
|
||||
|
@ -92,7 +92,7 @@ static int bsg_transport_sg_io_fn(struct request_queue *q, struct sg_io_v4 *hdr,
|
||||
goto out_unmap_bidi_rq;
|
||||
|
||||
bio = rq->bio;
|
||||
blk_execute_rq(NULL, rq, !(hdr->flags & BSG_FLAG_Q_AT_TAIL));
|
||||
blk_execute_rq(rq, !(hdr->flags & BSG_FLAG_Q_AT_TAIL));
|
||||
|
||||
/*
|
||||
* The assignments below don't make much sense, but are kept for
|
||||
|
@ -188,8 +188,10 @@ static void elevator_release(struct kobject *kobj)
|
||||
kfree(e);
|
||||
}
|
||||
|
||||
void __elevator_exit(struct request_queue *q, struct elevator_queue *e)
|
||||
void elevator_exit(struct request_queue *q)
|
||||
{
|
||||
struct elevator_queue *e = q->elevator;
|
||||
|
||||
mutex_lock(&e->sysfs_lock);
|
||||
blk_mq_exit_sched(q, e);
|
||||
mutex_unlock(&e->sysfs_lock);
|
||||
@ -595,7 +597,8 @@ int elevator_switch_mq(struct request_queue *q,
|
||||
elv_unregister_queue(q);
|
||||
|
||||
ioc_clear_queue(q);
|
||||
elevator_exit(q, q->elevator);
|
||||
blk_mq_sched_free_rqs(q);
|
||||
elevator_exit(q);
|
||||
}
|
||||
|
||||
ret = blk_mq_init_sched(q, new_e);
|
||||
@ -605,7 +608,8 @@ int elevator_switch_mq(struct request_queue *q,
|
||||
if (new_e) {
|
||||
ret = elv_register_queue(q, true);
|
||||
if (ret) {
|
||||
elevator_exit(q, q->elevator);
|
||||
blk_mq_sched_free_rqs(q);
|
||||
elevator_exit(q);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
37
block/fops.c
37
block/fops.c
@ -566,21 +566,48 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
|
||||
{
|
||||
struct block_device *bdev = iocb->ki_filp->private_data;
|
||||
loff_t size = bdev_nr_bytes(bdev);
|
||||
size_t count = iov_iter_count(to);
|
||||
loff_t pos = iocb->ki_pos;
|
||||
size_t shorted = 0;
|
||||
ssize_t ret;
|
||||
ssize_t ret = 0;
|
||||
|
||||
if (unlikely(pos + iov_iter_count(to) > size)) {
|
||||
if (unlikely(pos + count > size)) {
|
||||
if (pos >= size)
|
||||
return 0;
|
||||
size -= pos;
|
||||
if (iov_iter_count(to) > size) {
|
||||
shorted = iov_iter_count(to) - size;
|
||||
if (count > size) {
|
||||
shorted = count - size;
|
||||
iov_iter_truncate(to, size);
|
||||
}
|
||||
}
|
||||
|
||||
ret = generic_file_read_iter(iocb, to);
|
||||
if (iocb->ki_flags & IOCB_DIRECT) {
|
||||
struct address_space *mapping = iocb->ki_filp->f_mapping;
|
||||
|
||||
if (iocb->ki_flags & IOCB_NOWAIT) {
|
||||
if (filemap_range_needs_writeback(mapping, iocb->ki_pos,
|
||||
iocb->ki_pos + count - 1))
|
||||
return -EAGAIN;
|
||||
} else {
|
||||
ret = filemap_write_and_wait_range(mapping,
|
||||
iocb->ki_pos,
|
||||
iocb->ki_pos + count - 1);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
|
||||
file_accessed(iocb->ki_filp);
|
||||
|
||||
ret = blkdev_direct_IO(iocb, to);
|
||||
if (ret >= 0) {
|
||||
iocb->ki_pos += ret;
|
||||
count -= ret;
|
||||
}
|
||||
if (ret < 0 || !count)
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = filemap_read(iocb, to, ret);
|
||||
|
||||
if (unlikely(shorted))
|
||||
iov_iter_reexpand(to, iov_iter_count(to) + shorted);
|
||||
|
@ -25,8 +25,10 @@
|
||||
#include <linux/log2.h>
|
||||
#include <linux/pm_runtime.h>
|
||||
#include <linux/badblocks.h>
|
||||
#include <linux/part_stat.h>
|
||||
|
||||
#include "blk.h"
|
||||
#include "blk-mq-sched.h"
|
||||
#include "blk-rq-qos.h"
|
||||
|
||||
static struct kobject *block_depr;
|
||||
@ -372,17 +374,21 @@ void disk_uevent(struct gendisk *disk, enum kobject_action action)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(disk_uevent);
|
||||
|
||||
static void disk_scan_partitions(struct gendisk *disk)
|
||||
int disk_scan_partitions(struct gendisk *disk, fmode_t mode)
|
||||
{
|
||||
struct block_device *bdev;
|
||||
|
||||
if (!get_capacity(disk) || !disk_part_scan_enabled(disk))
|
||||
return;
|
||||
if (disk->flags & (GENHD_FL_NO_PART | GENHD_FL_HIDDEN))
|
||||
return -EINVAL;
|
||||
if (disk->open_partitions)
|
||||
return -EBUSY;
|
||||
|
||||
set_bit(GD_NEED_PART_SCAN, &disk->state);
|
||||
bdev = blkdev_get_by_dev(disk_devt(disk), FMODE_READ, NULL);
|
||||
if (!IS_ERR(bdev))
|
||||
blkdev_put(bdev, FMODE_READ);
|
||||
bdev = blkdev_get_by_dev(disk_devt(disk), mode, NULL);
|
||||
if (IS_ERR(bdev))
|
||||
return PTR_ERR(bdev);
|
||||
blkdev_put(bdev, mode);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -425,6 +431,8 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
|
||||
DISK_MAX_PARTS);
|
||||
disk->minors = DISK_MAX_PARTS;
|
||||
}
|
||||
if (disk->first_minor + disk->minors > MINORMASK + 1)
|
||||
return -EINVAL;
|
||||
} else {
|
||||
if (WARN_ON(disk->minors))
|
||||
return -EINVAL;
|
||||
@ -434,13 +442,8 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
|
||||
return ret;
|
||||
disk->major = BLOCK_EXT_MAJOR;
|
||||
disk->first_minor = ret;
|
||||
disk->flags |= GENHD_FL_EXT_DEVT;
|
||||
}
|
||||
|
||||
ret = disk_alloc_events(disk);
|
||||
if (ret)
|
||||
goto out_free_ext_minor;
|
||||
|
||||
/* delay uevents, until we scanned partition table */
|
||||
dev_set_uevent_suppress(ddev, 1);
|
||||
|
||||
@ -451,7 +454,12 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
|
||||
ddev->devt = MKDEV(disk->major, disk->first_minor);
|
||||
ret = device_add(ddev);
|
||||
if (ret)
|
||||
goto out_disk_release_events;
|
||||
goto out_free_ext_minor;
|
||||
|
||||
ret = disk_alloc_events(disk);
|
||||
if (ret)
|
||||
goto out_device_del;
|
||||
|
||||
if (!sysfs_deprecated) {
|
||||
ret = sysfs_create_link(block_depr, &ddev->kobj,
|
||||
kobject_name(&ddev->kobj));
|
||||
@ -490,14 +498,7 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
|
||||
if (ret)
|
||||
goto out_put_slave_dir;
|
||||
|
||||
if (disk->flags & GENHD_FL_HIDDEN) {
|
||||
/*
|
||||
* Don't let hidden disks show up in /proc/partitions,
|
||||
* and don't bother scanning for partitions either.
|
||||
*/
|
||||
disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO;
|
||||
disk->flags |= GENHD_FL_NO_PART_SCAN;
|
||||
} else {
|
||||
if (!(disk->flags & GENHD_FL_HIDDEN)) {
|
||||
ret = bdi_register(disk->bdi, "%u:%u",
|
||||
disk->major, disk->first_minor);
|
||||
if (ret)
|
||||
@ -509,7 +510,8 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
|
||||
goto out_unregister_bdi;
|
||||
|
||||
bdev_add(disk->part0, ddev->devt);
|
||||
disk_scan_partitions(disk);
|
||||
if (get_capacity(disk))
|
||||
disk_scan_partitions(disk, FMODE_READ);
|
||||
|
||||
/*
|
||||
* Announce the disk and partitions after all partitions are
|
||||
@ -539,8 +541,6 @@ out_del_block_link:
|
||||
sysfs_remove_link(block_depr, dev_name(ddev));
|
||||
out_device_del:
|
||||
device_del(ddev);
|
||||
out_disk_release_events:
|
||||
disk_release_events(disk);
|
||||
out_free_ext_minor:
|
||||
if (disk->major == BLOCK_EXT_MAJOR)
|
||||
blk_free_ext_minor(disk->first_minor);
|
||||
@ -720,8 +720,7 @@ void __init printk_all_partitions(void)
|
||||
* Don't show empty devices or things that have been
|
||||
* suppressed
|
||||
*/
|
||||
if (get_capacity(disk) == 0 ||
|
||||
(disk->flags & GENHD_FL_SUPPRESS_PARTITION_INFO))
|
||||
if (get_capacity(disk) == 0 || (disk->flags & GENHD_FL_HIDDEN))
|
||||
continue;
|
||||
|
||||
/*
|
||||
@ -814,11 +813,7 @@ static int show_partition(struct seq_file *seqf, void *v)
|
||||
struct block_device *part;
|
||||
unsigned long idx;
|
||||
|
||||
/* Don't show non-partitionable removeable devices or empty devices */
|
||||
if (!get_capacity(sgp) || (!disk_max_parts(sgp) &&
|
||||
(sgp->flags & GENHD_FL_REMOVABLE)))
|
||||
return 0;
|
||||
if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)
|
||||
if (!get_capacity(sgp) || (sgp->flags & GENHD_FL_HIDDEN))
|
||||
return 0;
|
||||
|
||||
rcu_read_lock();
|
||||
@ -874,7 +869,8 @@ static ssize_t disk_ext_range_show(struct device *dev,
|
||||
{
|
||||
struct gendisk *disk = dev_to_disk(dev);
|
||||
|
||||
return sprintf(buf, "%d\n", disk_max_parts(disk));
|
||||
return sprintf(buf, "%d\n",
|
||||
(disk->flags & GENHD_FL_NO_PART) ? 1 : DISK_MAX_PARTS);
|
||||
}
|
||||
|
||||
static ssize_t disk_removable_show(struct device *dev,
|
||||
@ -1343,7 +1339,7 @@ struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass)
|
||||
struct request_queue *q;
|
||||
struct gendisk *disk;
|
||||
|
||||
q = blk_alloc_queue(node);
|
||||
q = blk_alloc_queue(node, false);
|
||||
if (!q)
|
||||
return NULL;
|
||||
|
||||
|
@ -82,31 +82,6 @@ static int compat_blkpg_ioctl(struct block_device *bdev,
|
||||
}
|
||||
#endif
|
||||
|
||||
static int blkdev_reread_part(struct block_device *bdev, fmode_t mode)
|
||||
{
|
||||
struct block_device *tmp;
|
||||
|
||||
if (!disk_part_scan_enabled(bdev->bd_disk) || bdev_is_partition(bdev))
|
||||
return -EINVAL;
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EACCES;
|
||||
if (bdev->bd_disk->open_partitions)
|
||||
return -EBUSY;
|
||||
|
||||
/*
|
||||
* Reopen the device to revalidate the driver state and force a
|
||||
* partition rescan.
|
||||
*/
|
||||
mode &= ~FMODE_EXCL;
|
||||
set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state);
|
||||
|
||||
tmp = blkdev_get_by_dev(bdev->bd_dev, mode, NULL);
|
||||
if (IS_ERR(tmp))
|
||||
return PTR_ERR(tmp);
|
||||
blkdev_put(tmp, mode);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
|
||||
unsigned long arg, unsigned long flags)
|
||||
{
|
||||
@ -522,7 +497,11 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
|
||||
bdev->bd_disk->bdi->ra_pages = (arg * 512) / PAGE_SIZE;
|
||||
return 0;
|
||||
case BLKRRPART:
|
||||
return blkdev_reread_part(bdev, mode);
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EACCES;
|
||||
if (bdev_is_partition(bdev))
|
||||
return -EINVAL;
|
||||
return disk_scan_partitions(bdev->bd_disk, mode & ~FMODE_EXCL);
|
||||
case BLKTRACESTART:
|
||||
case BLKTRACESTOP:
|
||||
case BLKTRACETEARDOWN:
|
||||
|
@ -22,46 +22,14 @@
|
||||
*/
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/ioprio.h>
|
||||
#include <linux/cred.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/capability.h>
|
||||
#include <linux/sched/user.h>
|
||||
#include <linux/sched/task.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/pid_namespace.h>
|
||||
|
||||
int set_task_ioprio(struct task_struct *task, int ioprio)
|
||||
{
|
||||
int err;
|
||||
struct io_context *ioc;
|
||||
const struct cred *cred = current_cred(), *tcred;
|
||||
|
||||
rcu_read_lock();
|
||||
tcred = __task_cred(task);
|
||||
if (!uid_eq(tcred->uid, cred->euid) &&
|
||||
!uid_eq(tcred->uid, cred->uid) && !capable(CAP_SYS_NICE)) {
|
||||
rcu_read_unlock();
|
||||
return -EPERM;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
err = security_task_setioprio(task, ioprio);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE);
|
||||
if (ioc) {
|
||||
ioc->ioprio = ioprio;
|
||||
put_io_context(ioc);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(set_task_ioprio);
|
||||
|
||||
int ioprio_check_cap(int ioprio)
|
||||
{
|
||||
int class = IOPRIO_PRIO_CLASS(ioprio);
|
||||
|
@ -433,6 +433,7 @@ static void kyber_exit_sched(struct elevator_queue *e)
|
||||
int i;
|
||||
|
||||
del_timer_sync(&kqd->timer);
|
||||
blk_stat_disable_accounting(kqd->q);
|
||||
|
||||
for (i = 0; i < KYBER_NUM_DOMAINS; i++)
|
||||
sbitmap_queue_free(&kqd->domain_tokens[i]);
|
||||
|
@ -98,13 +98,12 @@ static void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors)
|
||||
static struct parsed_partitions *allocate_partitions(struct gendisk *hd)
|
||||
{
|
||||
struct parsed_partitions *state;
|
||||
int nr;
|
||||
int nr = DISK_MAX_PARTS;
|
||||
|
||||
state = kzalloc(sizeof(*state), GFP_KERNEL);
|
||||
if (!state)
|
||||
return NULL;
|
||||
|
||||
nr = disk_max_parts(hd);
|
||||
state->parts = vzalloc(array_size(nr, sizeof(state->parts[0])));
|
||||
if (!state->parts) {
|
||||
kfree(state);
|
||||
@ -326,7 +325,7 @@ static struct block_device *add_partition(struct gendisk *disk, int partno,
|
||||
|
||||
lockdep_assert_held(&disk->open_mutex);
|
||||
|
||||
if (partno >= disk_max_parts(disk))
|
||||
if (partno >= DISK_MAX_PARTS)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
/*
|
||||
@ -527,18 +526,15 @@ out_unlock:
|
||||
|
||||
static bool disk_unlock_native_capacity(struct gendisk *disk)
|
||||
{
|
||||
const struct block_device_operations *bdops = disk->fops;
|
||||
|
||||
if (bdops->unlock_native_capacity &&
|
||||
!(disk->flags & GENHD_FL_NATIVE_CAPACITY)) {
|
||||
printk(KERN_CONT "enabling native capacity\n");
|
||||
bdops->unlock_native_capacity(disk);
|
||||
disk->flags |= GENHD_FL_NATIVE_CAPACITY;
|
||||
return true;
|
||||
} else {
|
||||
if (!disk->fops->unlock_native_capacity ||
|
||||
test_and_set_bit(GD_NATIVE_CAPACITY, &disk->state)) {
|
||||
printk(KERN_CONT "truncated\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
printk(KERN_CONT "enabling native capacity\n");
|
||||
disk->fops->unlock_native_capacity(disk);
|
||||
return true;
|
||||
}
|
||||
|
||||
void blk_drop_partitions(struct gendisk *disk)
|
||||
@ -607,7 +603,7 @@ static int blk_add_partitions(struct gendisk *disk)
|
||||
struct parsed_partitions *state;
|
||||
int ret = -EAGAIN, p;
|
||||
|
||||
if (!disk_part_scan_enabled(disk))
|
||||
if (disk->flags & GENHD_FL_NO_PART)
|
||||
return 0;
|
||||
|
||||
state = check_partition(disk);
|
||||
@ -690,7 +686,7 @@ rescan:
|
||||
* userspace for this particular setup.
|
||||
*/
|
||||
if (invalidate) {
|
||||
if (disk_part_scan_enabled(disk) ||
|
||||
if (!(disk->flags & GENHD_FL_NO_PART) ||
|
||||
!(disk->flags & GENHD_FL_REMOVABLE))
|
||||
set_capacity(disk, 0);
|
||||
}
|
||||
|
@ -1505,7 +1505,7 @@ static blk_status_t amiflop_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
const struct blk_mq_queue_data *bd)
|
||||
{
|
||||
struct request *rq = bd->rq;
|
||||
struct amiga_floppy_struct *floppy = rq->rq_disk->private_data;
|
||||
struct amiga_floppy_struct *floppy = rq->q->disk->private_data;
|
||||
blk_status_t err;
|
||||
|
||||
if (!spin_trylock_irq(&amiflop_lock))
|
||||
@ -1790,6 +1790,7 @@ static int fd_alloc_disk(int drive, int system)
|
||||
disk->first_minor = drive + system;
|
||||
disk->minors = 1;
|
||||
disk->fops = &floppy_fops;
|
||||
disk->flags |= GENHD_FL_NO_PART;
|
||||
disk->events = DISK_EVENT_MEDIA_CHANGE;
|
||||
if (system)
|
||||
sprintf(disk->disk_name, "fd%d_msdos", drive);
|
||||
|
@ -1502,7 +1502,7 @@ static void setup_req_params( int drive )
|
||||
static blk_status_t ataflop_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
const struct blk_mq_queue_data *bd)
|
||||
{
|
||||
struct atari_floppy_struct *floppy = bd->rq->rq_disk->private_data;
|
||||
struct atari_floppy_struct *floppy = bd->rq->q->disk->private_data;
|
||||
int drive = floppy - unit;
|
||||
int type = floppy->type;
|
||||
|
||||
@ -1538,7 +1538,7 @@ static blk_status_t ataflop_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
if (!UDT) {
|
||||
Probing = 1;
|
||||
UDT = atari_disk_type + StartDiskType[DriveType];
|
||||
set_capacity(bd->rq->rq_disk, UDT->blocks);
|
||||
set_capacity(bd->rq->q->disk, UDT->blocks);
|
||||
UD.autoprobe = 1;
|
||||
}
|
||||
}
|
||||
@ -1558,7 +1558,7 @@ static blk_status_t ataflop_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
}
|
||||
type = minor2disktype[type].index;
|
||||
UDT = &atari_disk_type[type];
|
||||
set_capacity(bd->rq->rq_disk, UDT->blocks);
|
||||
set_capacity(bd->rq->q->disk, UDT->blocks);
|
||||
UD.autoprobe = 0;
|
||||
}
|
||||
|
||||
@ -2000,6 +2000,7 @@ static int ataflop_alloc_disk(unsigned int drive, unsigned int type)
|
||||
disk->minors = 1;
|
||||
sprintf(disk->disk_name, "fd%d", drive);
|
||||
disk->fops = &floppy_fops;
|
||||
disk->flags |= GENHD_FL_NO_PART;
|
||||
disk->events = DISK_EVENT_MEDIA_CHANGE;
|
||||
disk->private_data = &unit[drive];
|
||||
set_capacity(disk, MAX_DISK_SIZE * 2);
|
||||
|
@ -405,7 +405,6 @@ static int brd_alloc(int i)
|
||||
disk->minors = max_part;
|
||||
disk->fops = &brd_fops;
|
||||
disk->private_data = brd;
|
||||
disk->flags = GENHD_FL_EXT_DEVT;
|
||||
strlcpy(disk->disk_name, buf, DISK_NAME_LEN);
|
||||
set_capacity(disk, rd_size * 2);
|
||||
|
||||
|
@ -2734,6 +2734,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
|
||||
disk->first_minor = minor;
|
||||
disk->minors = 1;
|
||||
disk->fops = &drbd_ops;
|
||||
disk->flags |= GENHD_FL_NO_PART;
|
||||
sprintf(disk->disk_name, "drbd%d", minor);
|
||||
disk->private_data = device;
|
||||
|
||||
|
@ -2259,7 +2259,7 @@ static int do_format(int drive, struct format_descr *tmp_format_req)
|
||||
static void floppy_end_request(struct request *req, blk_status_t error)
|
||||
{
|
||||
unsigned int nr_sectors = current_count_sectors;
|
||||
unsigned int drive = (unsigned long)req->rq_disk->private_data;
|
||||
unsigned int drive = (unsigned long)req->q->disk->private_data;
|
||||
|
||||
/* current_count_sectors can be zero if transfer failed */
|
||||
if (error)
|
||||
@ -2550,7 +2550,7 @@ static int make_raw_rw_request(void)
|
||||
if (WARN(max_buffer_sectors == 0, "VFS: Block I/O scheduled on unopened device\n"))
|
||||
return 0;
|
||||
|
||||
set_fdc((long)current_req->rq_disk->private_data);
|
||||
set_fdc((long)current_req->q->disk->private_data);
|
||||
|
||||
raw_cmd = &default_raw_cmd;
|
||||
raw_cmd->flags = FD_RAW_SPIN | FD_RAW_NEED_DISK | FD_RAW_NEED_SEEK;
|
||||
@ -2792,7 +2792,7 @@ do_request:
|
||||
return;
|
||||
}
|
||||
}
|
||||
drive = (long)current_req->rq_disk->private_data;
|
||||
drive = (long)current_req->q->disk->private_data;
|
||||
set_fdc(drive);
|
||||
reschedule_timeout(current_drive, "redo fd request");
|
||||
|
||||
@ -4503,6 +4503,7 @@ static int floppy_alloc_disk(unsigned int drive, unsigned int type)
|
||||
disk->first_minor = TOMINOR(drive) | (type << 2);
|
||||
disk->minors = 1;
|
||||
disk->fops = &floppy_fops;
|
||||
disk->flags |= GENHD_FL_NO_PART;
|
||||
disk->events = DISK_EVENT_MEDIA_CHANGE;
|
||||
if (type)
|
||||
sprintf(disk->disk_name, "fd%d_type%d", drive, type);
|
||||
|
@ -1061,7 +1061,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
|
||||
lo->lo_flags |= LO_FLAGS_PARTSCAN;
|
||||
partscan = lo->lo_flags & LO_FLAGS_PARTSCAN;
|
||||
if (partscan)
|
||||
lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN;
|
||||
lo->lo_disk->flags &= ~GENHD_FL_NO_PART;
|
||||
|
||||
loop_global_unlock(lo, is_loop);
|
||||
if (partscan)
|
||||
@ -1191,7 +1191,7 @@ out_unlock:
|
||||
mutex_lock(&lo->lo_mutex);
|
||||
lo->lo_flags = 0;
|
||||
if (!part_shift)
|
||||
lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN;
|
||||
lo->lo_disk->flags |= GENHD_FL_NO_PART;
|
||||
lo->lo_state = Lo_unbound;
|
||||
mutex_unlock(&lo->lo_mutex);
|
||||
|
||||
@ -1301,7 +1301,7 @@ out_unfreeze:
|
||||
|
||||
if (!err && (lo->lo_flags & LO_FLAGS_PARTSCAN) &&
|
||||
!(prev_lo_flags & LO_FLAGS_PARTSCAN)) {
|
||||
lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN;
|
||||
lo->lo_disk->flags &= ~GENHD_FL_NO_PART;
|
||||
partscan = true;
|
||||
}
|
||||
out_unlock:
|
||||
@ -2032,8 +2032,7 @@ static int loop_add(int i)
|
||||
* userspace tools. Parameters like this in general should be avoided.
|
||||
*/
|
||||
if (!part_shift)
|
||||
disk->flags |= GENHD_FL_NO_PART_SCAN;
|
||||
disk->flags |= GENHD_FL_EXT_DEVT;
|
||||
disk->flags |= GENHD_FL_NO_PART;
|
||||
atomic_set(&lo->lo_refcnt, 0);
|
||||
mutex_init(&lo->lo_mutex);
|
||||
lo->lo_number = i;
|
||||
|
@ -1015,7 +1015,7 @@ static int mtip_exec_internal_command(struct mtip_port *port,
|
||||
rq->timeout = timeout;
|
||||
|
||||
/* insert request and run queue */
|
||||
blk_execute_rq(NULL, rq, true);
|
||||
blk_execute_rq(rq, true);
|
||||
|
||||
if (int_cmd->status) {
|
||||
dev_err(&dd->pdev->dev, "Internal command [%02X] failed %d\n",
|
||||
|
@ -136,7 +136,7 @@ static int __init n64cart_probe(struct platform_device *pdev)
|
||||
goto out;
|
||||
|
||||
disk->first_minor = 0;
|
||||
disk->flags = GENHD_FL_NO_PART_SCAN;
|
||||
disk->flags = GENHD_FL_NO_PART;
|
||||
disk->fops = &n64cart_fops;
|
||||
disk->private_data = &pdev->dev;
|
||||
strcpy(disk->disk_name, "n64cart");
|
||||
|
@ -1850,7 +1850,6 @@ static int null_gendisk_register(struct nullb *nullb)
|
||||
|
||||
set_capacity(disk, size);
|
||||
|
||||
disk->flags |= GENHD_FL_EXT_DEVT | GENHD_FL_SUPPRESS_PARTITION_INFO;
|
||||
disk->major = null_major;
|
||||
disk->first_minor = nullb->index;
|
||||
disk->minors = 1;
|
||||
|
@ -44,7 +44,7 @@ TRACE_EVENT(nullb_zone_op,
|
||||
__entry->op = req_op(cmd->rq);
|
||||
__entry->zone_no = zone_no;
|
||||
__entry->zone_cond = zone_cond;
|
||||
__assign_disk_name(__entry->disk, cmd->rq->rq_disk);
|
||||
__assign_disk_name(__entry->disk, cmd->rq->q->disk);
|
||||
),
|
||||
TP_printk("%s req=%-15s zone_no=%u zone_cond=%-10s",
|
||||
__print_disk_name(__entry->disk),
|
||||
|
@ -690,7 +690,7 @@ static void pcd_request(void)
|
||||
if (!pcd_req && !set_next_request())
|
||||
return;
|
||||
|
||||
cd = pcd_req->rq_disk->private_data;
|
||||
cd = pcd_req->q->disk->private_data;
|
||||
if (cd != pcd_current)
|
||||
pcd_bufblk = -1;
|
||||
pcd_current = cd;
|
||||
@ -928,8 +928,9 @@ static int pcd_init_unit(struct pcd_unit *cd, bool autoprobe, int port,
|
||||
disk->minors = 1;
|
||||
strcpy(disk->disk_name, cd->name); /* umm... */
|
||||
disk->fops = &pcd_bdops;
|
||||
disk->flags = GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
|
||||
disk->flags |= GENHD_FL_NO_PART;
|
||||
disk->events = DISK_EVENT_MEDIA_CHANGE;
|
||||
disk->event_flags = DISK_EVENT_FLAG_BLOCK_ON_EXCL_WRITE;
|
||||
|
||||
if (!pi_init(cd->pi, autoprobe, port, mode, unit, protocol, delay,
|
||||
pcd_buffer, PI_PCD, verbose, cd->name)) {
|
||||
|
@ -430,7 +430,7 @@ static void run_fsm(void)
|
||||
int stop = 0;
|
||||
|
||||
if (!phase) {
|
||||
pd_current = pd_req->rq_disk->private_data;
|
||||
pd_current = pd_req->q->disk->private_data;
|
||||
pi_current = pd_current->pi;
|
||||
phase = do_pd_io_start;
|
||||
}
|
||||
@ -492,7 +492,7 @@ static enum action do_pd_io_start(void)
|
||||
case REQ_OP_WRITE:
|
||||
pd_block = blk_rq_pos(pd_req);
|
||||
pd_count = blk_rq_cur_sectors(pd_req);
|
||||
if (pd_block + pd_count > get_capacity(pd_req->rq_disk))
|
||||
if (pd_block + pd_count > get_capacity(pd_req->q->disk))
|
||||
return Fail;
|
||||
pd_run = blk_rq_sectors(pd_req);
|
||||
pd_buf = bio_data(pd_req->bio);
|
||||
@ -781,7 +781,7 @@ static int pd_special_command(struct pd_unit *disk,
|
||||
req = blk_mq_rq_to_pdu(rq);
|
||||
|
||||
req->func = func;
|
||||
blk_execute_rq(disk->gd, rq, 0);
|
||||
blk_execute_rq(rq, false);
|
||||
blk_mq_free_request(rq);
|
||||
return 0;
|
||||
}
|
||||
|
@ -746,12 +746,12 @@ repeat:
|
||||
if (!pf_req && !set_next_request())
|
||||
return;
|
||||
|
||||
pf_current = pf_req->rq_disk->private_data;
|
||||
pf_current = pf_req->q->disk->private_data;
|
||||
pf_block = blk_rq_pos(pf_req);
|
||||
pf_run = blk_rq_sectors(pf_req);
|
||||
pf_count = blk_rq_cur_sectors(pf_req);
|
||||
|
||||
if (pf_block + pf_count > get_capacity(pf_req->rq_disk)) {
|
||||
if (pf_block + pf_count > get_capacity(pf_req->q->disk)) {
|
||||
pf_end_request(BLK_STS_IOERR);
|
||||
goto repeat;
|
||||
}
|
||||
@ -942,6 +942,7 @@ static int __init pf_init_unit(struct pf_unit *pf, bool autoprobe, int port,
|
||||
disk->minors = 1;
|
||||
strcpy(disk->disk_name, pf->name);
|
||||
disk->fops = &pf_fops;
|
||||
disk->flags |= GENHD_FL_NO_PART;
|
||||
disk->events = DISK_EVENT_MEDIA_CHANGE;
|
||||
disk->private_data = pf;
|
||||
|
||||
|
@ -722,7 +722,7 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
|
||||
if (cgc->quiet)
|
||||
rq->rq_flags |= RQF_QUIET;
|
||||
|
||||
blk_execute_rq(pd->bdev->bd_disk, rq, 0);
|
||||
blk_execute_rq(rq, false);
|
||||
if (scsi_req(rq)->result)
|
||||
ret = -EIO;
|
||||
out:
|
||||
@ -2719,7 +2719,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
|
||||
disk->first_minor = idx;
|
||||
disk->minors = 1;
|
||||
disk->fops = &pktcdvd_ops;
|
||||
disk->flags = GENHD_FL_REMOVABLE;
|
||||
disk->flags = GENHD_FL_REMOVABLE | GENHD_FL_NO_PART;
|
||||
strcpy(disk->disk_name, pd->name);
|
||||
disk->private_data = pd;
|
||||
|
||||
|
@ -742,6 +742,7 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev)
|
||||
priv->gendisk = gendisk;
|
||||
gendisk->major = ps3vram_major;
|
||||
gendisk->minors = 1;
|
||||
gendisk->flags |= GENHD_FL_NO_PART;
|
||||
gendisk->fops = &ps3vram_fops;
|
||||
gendisk->private_data = dev;
|
||||
strlcpy(gendisk->disk_name, DEVICE_NAME, sizeof(gendisk->disk_name));
|
||||
|
@ -4924,12 +4924,10 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
|
||||
rbd_dev->dev_id);
|
||||
disk->major = rbd_dev->major;
|
||||
disk->first_minor = rbd_dev->minor;
|
||||
if (single_major) {
|
||||
if (single_major)
|
||||
disk->minors = (1 << RBD_SINGLE_MAJOR_PART_SHIFT);
|
||||
disk->flags |= GENHD_FL_EXT_DEVT;
|
||||
} else {
|
||||
else
|
||||
disk->minors = RBD_MINORS_PER_MAJOR;
|
||||
}
|
||||
disk->fops = &rbd_bd_ops;
|
||||
disk->private_data = rbd_dev;
|
||||
|
||||
|
@ -393,7 +393,7 @@ static void rnbd_put_iu(struct rnbd_clt_session *sess, struct rnbd_iu *iu)
|
||||
|
||||
static void rnbd_softirq_done_fn(struct request *rq)
|
||||
{
|
||||
struct rnbd_clt_dev *dev = rq->rq_disk->private_data;
|
||||
struct rnbd_clt_dev *dev = rq->q->disk->private_data;
|
||||
struct rnbd_clt_session *sess = dev->sess;
|
||||
struct rnbd_iu *iu;
|
||||
|
||||
@ -1133,7 +1133,7 @@ static blk_status_t rnbd_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
const struct blk_mq_queue_data *bd)
|
||||
{
|
||||
struct request *rq = bd->rq;
|
||||
struct rnbd_clt_dev *dev = rq->rq_disk->private_data;
|
||||
struct rnbd_clt_dev *dev = rq->q->disk->private_data;
|
||||
struct rnbd_iu *iu = blk_mq_rq_to_pdu(rq);
|
||||
int err;
|
||||
blk_status_t ret = BLK_STS_IOERR;
|
||||
|
@ -143,8 +143,8 @@ static int vdc_getgeo(struct block_device *bdev, struct hd_geometry *geo)
|
||||
static int vdc_ioctl(struct block_device *bdev, fmode_t mode,
|
||||
unsigned command, unsigned long argument)
|
||||
{
|
||||
struct vdc_port *port = bdev->bd_disk->private_data;
|
||||
int i;
|
||||
struct gendisk *disk;
|
||||
|
||||
switch (command) {
|
||||
case CDROMMULTISESSION:
|
||||
@ -155,12 +155,15 @@ static int vdc_ioctl(struct block_device *bdev, fmode_t mode,
|
||||
return 0;
|
||||
|
||||
case CDROM_GET_CAPABILITY:
|
||||
disk = bdev->bd_disk;
|
||||
|
||||
if (bdev->bd_disk && (disk->flags & GENHD_FL_CD))
|
||||
if (!vdc_version_supported(port, 1, 1))
|
||||
return -EINVAL;
|
||||
switch (port->vdisk_mtype) {
|
||||
case VD_MEDIA_TYPE_CD:
|
||||
case VD_MEDIA_TYPE_DVD:
|
||||
return 0;
|
||||
return -EINVAL;
|
||||
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
default:
|
||||
pr_debug(PFX "ioctl %08x not supported\n", command);
|
||||
return -EINVAL;
|
||||
@ -459,7 +462,7 @@ static int __vdc_tx_trigger(struct vdc_port *port)
|
||||
|
||||
static int __send_request(struct request *req)
|
||||
{
|
||||
struct vdc_port *port = req->rq_disk->private_data;
|
||||
struct vdc_port *port = req->q->disk->private_data;
|
||||
struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
|
||||
struct scatterlist sg[MAX_RING_COOKIES];
|
||||
struct vdc_req_entry *rqe;
|
||||
@ -854,14 +857,12 @@ static int probe_disk(struct vdc_port *port)
|
||||
switch (port->vdisk_mtype) {
|
||||
case VD_MEDIA_TYPE_CD:
|
||||
pr_info(PFX "Virtual CDROM %s\n", port->disk_name);
|
||||
g->flags |= GENHD_FL_CD;
|
||||
g->flags |= GENHD_FL_REMOVABLE;
|
||||
set_disk_ro(g, 1);
|
||||
break;
|
||||
|
||||
case VD_MEDIA_TYPE_DVD:
|
||||
pr_info(PFX "Virtual DVD %s\n", port->disk_name);
|
||||
g->flags |= GENHD_FL_CD;
|
||||
g->flags |= GENHD_FL_REMOVABLE;
|
||||
set_disk_ro(g, 1);
|
||||
break;
|
||||
|
@ -840,6 +840,7 @@ static int swim_floppy_init(struct swim_priv *swd)
|
||||
swd->unit[drive].disk->minors = 1;
|
||||
sprintf(swd->unit[drive].disk->disk_name, "fd%d", drive);
|
||||
swd->unit[drive].disk->fops = &floppy_fops;
|
||||
swd->unit[drive].disk->flags |= GENHD_FL_NO_PART;
|
||||
swd->unit[drive].disk->events = DISK_EVENT_MEDIA_CHANGE;
|
||||
swd->unit[drive].disk->private_data = &swd->unit[drive];
|
||||
set_capacity(swd->unit[drive].disk, 2880);
|
||||
|
@ -1227,7 +1227,7 @@ static int swim3_attach(struct macio_dev *mdev,
|
||||
disk->fops = &floppy_fops;
|
||||
disk->private_data = fs;
|
||||
disk->events = DISK_EVENT_MEDIA_CHANGE;
|
||||
disk->flags |= GENHD_FL_REMOVABLE;
|
||||
disk->flags |= GENHD_FL_REMOVABLE | GENHD_FL_NO_PART;
|
||||
sprintf(disk->disk_name, "fd%d", floppy_count);
|
||||
set_capacity(disk, 2880);
|
||||
rc = add_disk(disk);
|
||||
|
@ -540,7 +540,7 @@ static int carm_array_info (struct carm_host *host, unsigned int array_idx)
|
||||
spin_unlock_irq(&host->lock);
|
||||
|
||||
DPRINTK("blk_execute_rq_nowait, tag == %u\n", rq->tag);
|
||||
blk_execute_rq_nowait(NULL, rq, true, NULL);
|
||||
blk_execute_rq_nowait(rq, true, NULL);
|
||||
|
||||
return 0;
|
||||
|
||||
@ -579,7 +579,7 @@ static int carm_send_special (struct carm_host *host, carm_sspc_t func)
|
||||
crq->msg_bucket = (u32) rc;
|
||||
|
||||
DPRINTK("blk_execute_rq_nowait, tag == %u\n", rq->tag);
|
||||
blk_execute_rq_nowait(NULL, rq, true, NULL);
|
||||
blk_execute_rq_nowait(rq, true, NULL);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -384,7 +384,7 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str)
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
blk_execute_rq(vblk->disk, req, false);
|
||||
blk_execute_rq(req, false);
|
||||
err = blk_status_to_errno(virtblk_result(blk_mq_rq_to_pdu(req)));
|
||||
out:
|
||||
blk_mq_free_request(req);
|
||||
@ -843,7 +843,6 @@ static int virtblk_probe(struct virtio_device *vdev)
|
||||
vblk->disk->minors = 1 << PART_BITS;
|
||||
vblk->disk->private_data = vblk;
|
||||
vblk->disk->fops = &virtblk_fops;
|
||||
vblk->disk->flags |= GENHD_FL_EXT_DEVT;
|
||||
vblk->index = index;
|
||||
|
||||
/* configure queue flush support */
|
||||
|
@ -510,7 +510,7 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
|
||||
}
|
||||
vbd->size = vbd_sz(vbd);
|
||||
|
||||
if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom)
|
||||
if (cdrom || disk_to_cdi(vbd->bdev->bd_disk))
|
||||
vbd->type |= VDISK_CDROM;
|
||||
if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE)
|
||||
vbd->type |= VDISK_REMOVABLE;
|
||||
|
@ -198,6 +198,7 @@ struct blkfront_info
|
||||
struct gendisk *gd;
|
||||
u16 sector_size;
|
||||
unsigned int physical_sector_size;
|
||||
unsigned long vdisk_info;
|
||||
int vdevice;
|
||||
blkif_vdev_t handle;
|
||||
enum blkif_state connected;
|
||||
@ -505,6 +506,7 @@ static int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg)
|
||||
static int blkif_ioctl(struct block_device *bdev, fmode_t mode,
|
||||
unsigned command, unsigned long argument)
|
||||
{
|
||||
struct blkfront_info *info = bdev->bd_disk->private_data;
|
||||
int i;
|
||||
|
||||
switch (command) {
|
||||
@ -514,9 +516,9 @@ static int blkif_ioctl(struct block_device *bdev, fmode_t mode,
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
case CDROM_GET_CAPABILITY:
|
||||
if (bdev->bd_disk->flags & GENHD_FL_CD)
|
||||
return 0;
|
||||
return -EINVAL;
|
||||
if (!(info->vdisk_info & VDISK_CDROM))
|
||||
return -EINVAL;
|
||||
return 0;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
@ -1057,9 +1059,8 @@ static char *encode_disk_name(char *ptr, unsigned int n)
|
||||
}
|
||||
|
||||
static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
|
||||
struct blkfront_info *info,
|
||||
u16 vdisk_info, u16 sector_size,
|
||||
unsigned int physical_sector_size)
|
||||
struct blkfront_info *info, u16 sector_size,
|
||||
unsigned int physical_sector_size)
|
||||
{
|
||||
struct gendisk *gd;
|
||||
int nr_minors = 1;
|
||||
@ -1157,15 +1158,11 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
|
||||
|
||||
xlvbd_flush(info);
|
||||
|
||||
if (vdisk_info & VDISK_READONLY)
|
||||
if (info->vdisk_info & VDISK_READONLY)
|
||||
set_disk_ro(gd, 1);
|
||||
|
||||
if (vdisk_info & VDISK_REMOVABLE)
|
||||
if (info->vdisk_info & VDISK_REMOVABLE)
|
||||
gd->flags |= GENHD_FL_REMOVABLE;
|
||||
|
||||
if (vdisk_info & VDISK_CDROM)
|
||||
gd->flags |= GENHD_FL_CD;
|
||||
|
||||
return 0;
|
||||
|
||||
out_free_tag_set:
|
||||
@ -2313,7 +2310,6 @@ static void blkfront_connect(struct blkfront_info *info)
|
||||
unsigned long long sectors;
|
||||
unsigned long sector_size;
|
||||
unsigned int physical_sector_size;
|
||||
unsigned int binfo;
|
||||
int err, i;
|
||||
struct blkfront_ring_info *rinfo;
|
||||
|
||||
@ -2351,7 +2347,7 @@ static void blkfront_connect(struct blkfront_info *info)
|
||||
|
||||
err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
|
||||
"sectors", "%llu", §ors,
|
||||
"info", "%u", &binfo,
|
||||
"info", "%u", &info->vdisk_info,
|
||||
"sector-size", "%lu", §or_size,
|
||||
NULL);
|
||||
if (err) {
|
||||
@ -2380,7 +2376,7 @@ static void blkfront_connect(struct blkfront_info *info)
|
||||
}
|
||||
}
|
||||
|
||||
err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size,
|
||||
err = xlvbd_alloc_gendisk(sectors, info, sector_size,
|
||||
physical_sector_size);
|
||||
if (err) {
|
||||
xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
|
||||
|
@ -327,6 +327,7 @@ static int z2ram_register_disk(int minor)
|
||||
disk->major = Z2RAM_MAJOR;
|
||||
disk->first_minor = minor;
|
||||
disk->minors = 1;
|
||||
disk->flags |= GENHD_FL_NO_PART;
|
||||
disk->fops = &z2_fops;
|
||||
if (minor)
|
||||
sprintf(disk->disk_name, "z2ram%d", minor);
|
||||
|
@ -1947,6 +1947,7 @@ static int zram_add(void)
|
||||
zram->disk->major = zram_major;
|
||||
zram->disk->first_minor = device_id;
|
||||
zram->disk->minors = 1;
|
||||
zram->disk->flags |= GENHD_FL_NO_PART;
|
||||
zram->disk->fops = &zram_devops;
|
||||
zram->disk->private_data = zram;
|
||||
snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
|
||||
|
@ -719,6 +719,7 @@ static void probe_gdrom_setupdisk(void)
|
||||
gd.disk->major = gdrom_major;
|
||||
gd.disk->first_minor = 1;
|
||||
gd.disk->minors = 1;
|
||||
gd.disk->flags |= GENHD_FL_NO_PART;
|
||||
strcpy(gd.disk->disk_name, GDROM_DEV_NAME);
|
||||
}
|
||||
|
||||
|
@ -425,7 +425,7 @@ static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
|
||||
}
|
||||
#endif
|
||||
|
||||
static void copy_io(u32 __iomem *piobuf, struct rvt_sge_state *ss,
|
||||
static void qib_copy_io(u32 __iomem *piobuf, struct rvt_sge_state *ss,
|
||||
u32 length, unsigned flush_wc)
|
||||
{
|
||||
u32 extra = 0;
|
||||
@ -975,7 +975,7 @@ static int qib_verbs_send_pio(struct rvt_qp *qp, struct ib_header *ibhdr,
|
||||
qib_pio_copy(piobuf, addr, dwords);
|
||||
goto done;
|
||||
}
|
||||
copy_io(piobuf, ss, len, flush_wc);
|
||||
qib_copy_io(piobuf, ss, len, flush_wc);
|
||||
done:
|
||||
if (dd->flags & QIB_USE_SPCL_TRIG) {
|
||||
u32 spcl_off = (pbufn >= dd->piobcnt2k) ? 2047 : 1023;
|
||||
|
@ -550,7 +550,6 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
|
||||
return DM_MAPIO_REQUEUE;
|
||||
}
|
||||
clone->bio = clone->biotail = NULL;
|
||||
clone->rq_disk = bdev->bd_disk;
|
||||
clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
|
||||
*__clone = clone;
|
||||
|
||||
|
@ -1778,6 +1778,7 @@ static struct mapped_device *alloc_dev(int minor)
|
||||
md->disk->major = _major;
|
||||
md->disk->first_minor = minor;
|
||||
md->disk->minors = 1;
|
||||
md->disk->flags |= GENHD_FL_NO_PART;
|
||||
md->disk->fops = &dm_blk_dops;
|
||||
md->disk->queue = md->queue;
|
||||
md->disk->private_data = md;
|
||||
|
@ -5708,11 +5708,6 @@ static int md_alloc(dev_t dev, char *name)
|
||||
mddev->queue = disk->queue;
|
||||
blk_set_stacking_limits(&mddev->queue->limits);
|
||||
blk_queue_write_cache(mddev->queue, true, true);
|
||||
/* Allow extended partitions. This makes the
|
||||
* 'mdp' device redundant, but we can't really
|
||||
* remove it now.
|
||||
*/
|
||||
disk->flags |= GENHD_FL_EXT_DEVT;
|
||||
disk->events |= DISK_EVENT_MEDIA_CHANGE;
|
||||
mddev->gendisk = disk;
|
||||
error = add_disk(disk);
|
||||
|
@ -264,7 +264,7 @@ static ssize_t power_ro_lock_store(struct device *dev,
|
||||
goto out_put;
|
||||
}
|
||||
req_to_mmc_queue_req(req)->drv_op = MMC_DRV_OP_BOOT_WP;
|
||||
blk_execute_rq(NULL, req, 0);
|
||||
blk_execute_rq(req, false);
|
||||
ret = req_to_mmc_queue_req(req)->drv_op_result;
|
||||
blk_mq_free_request(req);
|
||||
|
||||
@ -657,7 +657,7 @@ static int mmc_blk_ioctl_cmd(struct mmc_blk_data *md,
|
||||
rpmb ? MMC_DRV_OP_IOCTL_RPMB : MMC_DRV_OP_IOCTL;
|
||||
req_to_mmc_queue_req(req)->drv_op_data = idatas;
|
||||
req_to_mmc_queue_req(req)->ioc_count = 1;
|
||||
blk_execute_rq(NULL, req, 0);
|
||||
blk_execute_rq(req, false);
|
||||
ioc_err = req_to_mmc_queue_req(req)->drv_op_result;
|
||||
err = mmc_blk_ioctl_copy_to_user(ic_ptr, idata);
|
||||
blk_mq_free_request(req);
|
||||
@ -726,7 +726,7 @@ static int mmc_blk_ioctl_multi_cmd(struct mmc_blk_data *md,
|
||||
rpmb ? MMC_DRV_OP_IOCTL_RPMB : MMC_DRV_OP_IOCTL;
|
||||
req_to_mmc_queue_req(req)->drv_op_data = idata;
|
||||
req_to_mmc_queue_req(req)->ioc_count = num_of_cmds;
|
||||
blk_execute_rq(NULL, req, 0);
|
||||
blk_execute_rq(req, false);
|
||||
ioc_err = req_to_mmc_queue_req(req)->drv_op_result;
|
||||
|
||||
/* copy to user if data and response */
|
||||
@ -1837,7 +1837,7 @@ static void mmc_blk_mq_rw_recovery(struct mmc_queue *mq, struct request *req)
|
||||
/* Reset if the card is in a bad state */
|
||||
if (!mmc_host_is_spi(mq->card->host) &&
|
||||
err && mmc_blk_reset(md, card->host, type)) {
|
||||
pr_err("%s: recovery failed!\n", req->rq_disk->disk_name);
|
||||
pr_err("%s: recovery failed!\n", req->q->disk->disk_name);
|
||||
mqrq->retries = MMC_NO_RETRIES;
|
||||
return;
|
||||
}
|
||||
@ -2051,7 +2051,8 @@ static void mmc_blk_mq_dec_in_flight(struct mmc_queue *mq, struct request *req)
|
||||
mmc_put_card(mq->card, &mq->ctx);
|
||||
}
|
||||
|
||||
static void mmc_blk_mq_post_req(struct mmc_queue *mq, struct request *req)
|
||||
static void mmc_blk_mq_post_req(struct mmc_queue *mq, struct request *req,
|
||||
bool can_sleep)
|
||||
{
|
||||
struct mmc_queue_req *mqrq = req_to_mmc_queue_req(req);
|
||||
struct mmc_request *mrq = &mqrq->brq.mrq;
|
||||
@ -2063,10 +2064,14 @@ static void mmc_blk_mq_post_req(struct mmc_queue *mq, struct request *req)
|
||||
* Block layer timeouts race with completions which means the normal
|
||||
* completion path cannot be used during recovery.
|
||||
*/
|
||||
if (mq->in_recovery)
|
||||
if (mq->in_recovery) {
|
||||
mmc_blk_mq_complete_rq(mq, req);
|
||||
else if (likely(!blk_should_fake_timeout(req->q)))
|
||||
blk_mq_complete_request(req);
|
||||
} else if (likely(!blk_should_fake_timeout(req->q))) {
|
||||
if (can_sleep)
|
||||
blk_mq_complete_request_direct(req, mmc_blk_mq_complete);
|
||||
else
|
||||
blk_mq_complete_request(req);
|
||||
}
|
||||
|
||||
mmc_blk_mq_dec_in_flight(mq, req);
|
||||
}
|
||||
@ -2087,7 +2092,7 @@ void mmc_blk_mq_recovery(struct mmc_queue *mq)
|
||||
|
||||
mmc_blk_urgent_bkops(mq, mqrq);
|
||||
|
||||
mmc_blk_mq_post_req(mq, req);
|
||||
mmc_blk_mq_post_req(mq, req, true);
|
||||
}
|
||||
|
||||
static void mmc_blk_mq_complete_prev_req(struct mmc_queue *mq,
|
||||
@ -2106,7 +2111,7 @@ static void mmc_blk_mq_complete_prev_req(struct mmc_queue *mq,
|
||||
if (prev_req)
|
||||
*prev_req = mq->complete_req;
|
||||
else
|
||||
mmc_blk_mq_post_req(mq, mq->complete_req);
|
||||
mmc_blk_mq_post_req(mq, mq->complete_req, true);
|
||||
|
||||
mq->complete_req = NULL;
|
||||
|
||||
@ -2178,7 +2183,8 @@ static void mmc_blk_mq_req_done(struct mmc_request *mrq)
|
||||
mq->rw_wait = false;
|
||||
wake_up(&mq->wait);
|
||||
|
||||
mmc_blk_mq_post_req(mq, req);
|
||||
/* context unknown */
|
||||
mmc_blk_mq_post_req(mq, req, false);
|
||||
}
|
||||
|
||||
static bool mmc_blk_rw_wait_cond(struct mmc_queue *mq, int *err)
|
||||
@ -2238,7 +2244,7 @@ static int mmc_blk_mq_issue_rw_rq(struct mmc_queue *mq,
|
||||
err = mmc_start_request(host, &mqrq->brq.mrq);
|
||||
|
||||
if (prev_req)
|
||||
mmc_blk_mq_post_req(mq, prev_req);
|
||||
mmc_blk_mq_post_req(mq, prev_req, true);
|
||||
|
||||
if (err)
|
||||
mq->rw_wait = false;
|
||||
@ -2395,10 +2401,8 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
|
||||
md->disk->private_data = md;
|
||||
md->parent = parent;
|
||||
set_disk_ro(md->disk, md->read_only || default_ro);
|
||||
md->disk->flags = GENHD_FL_EXT_DEVT;
|
||||
if (area_type & (MMC_BLK_DATA_AREA_RPMB | MMC_BLK_DATA_AREA_BOOT))
|
||||
md->disk->flags |= GENHD_FL_NO_PART_SCAN
|
||||
| GENHD_FL_SUPPRESS_PARTITION_INFO;
|
||||
md->disk->flags |= GENHD_FL_NO_PART;
|
||||
|
||||
/*
|
||||
* As discussed on lkml, GENHD_FL_REMOVABLE should:
|
||||
@ -2739,7 +2743,7 @@ static int mmc_dbg_card_status_get(void *data, u64 *val)
|
||||
if (IS_ERR(req))
|
||||
return PTR_ERR(req);
|
||||
req_to_mmc_queue_req(req)->drv_op = MMC_DRV_OP_GET_CARD_STATUS;
|
||||
blk_execute_rq(NULL, req, 0);
|
||||
blk_execute_rq(req, false);
|
||||
ret = req_to_mmc_queue_req(req)->drv_op_result;
|
||||
if (ret >= 0) {
|
||||
*val = ret;
|
||||
@ -2778,7 +2782,7 @@ static int mmc_ext_csd_open(struct inode *inode, struct file *filp)
|
||||
}
|
||||
req_to_mmc_queue_req(req)->drv_op = MMC_DRV_OP_GET_EXT_CSD;
|
||||
req_to_mmc_queue_req(req)->drv_op_data = &ext_csd;
|
||||
blk_execute_rq(NULL, req, 0);
|
||||
blk_execute_rq(req, false);
|
||||
err = req_to_mmc_queue_req(req)->drv_op_result;
|
||||
blk_mq_free_request(req);
|
||||
if (err) {
|
||||
|
@ -46,23 +46,19 @@ static blk_status_t do_blktrans_request(struct mtd_blktrans_ops *tr,
|
||||
struct mtd_blktrans_dev *dev,
|
||||
struct request *req)
|
||||
{
|
||||
struct req_iterator iter;
|
||||
struct bio_vec bvec;
|
||||
unsigned long block, nsect;
|
||||
char *buf;
|
||||
|
||||
block = blk_rq_pos(req) << 9 >> tr->blkshift;
|
||||
nsect = blk_rq_cur_bytes(req) >> tr->blkshift;
|
||||
|
||||
if (req_op(req) == REQ_OP_FLUSH) {
|
||||
switch (req_op(req)) {
|
||||
case REQ_OP_FLUSH:
|
||||
if (tr->flush(dev))
|
||||
return BLK_STS_IOERR;
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
if (blk_rq_pos(req) + blk_rq_cur_sectors(req) >
|
||||
get_capacity(req->rq_disk))
|
||||
return BLK_STS_IOERR;
|
||||
|
||||
switch (req_op(req)) {
|
||||
case REQ_OP_DISCARD:
|
||||
if (tr->discard(dev, block, nsect))
|
||||
return BLK_STS_IOERR;
|
||||
@ -76,13 +72,17 @@ static blk_status_t do_blktrans_request(struct mtd_blktrans_ops *tr,
|
||||
}
|
||||
}
|
||||
kunmap(bio_page(req->bio));
|
||||
rq_flush_dcache_pages(req);
|
||||
|
||||
rq_for_each_segment(bvec, req, iter)
|
||||
flush_dcache_page(bvec.bv_page);
|
||||
return BLK_STS_OK;
|
||||
case REQ_OP_WRITE:
|
||||
if (!tr->writesect)
|
||||
return BLK_STS_IOERR;
|
||||
|
||||
rq_flush_dcache_pages(req);
|
||||
rq_for_each_segment(bvec, req, iter)
|
||||
flush_dcache_page(bvec.bv_page);
|
||||
|
||||
buf = kmap(bio_page(req->bio)) + bio_offset(req->bio);
|
||||
for (; nsect > 0; nsect--, block++, buf += tr->blksize) {
|
||||
if (tr->writesect(dev, block, buf)) {
|
||||
@ -346,7 +346,7 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
|
||||
gd->minors = 1 << tr->part_bits;
|
||||
gd->fops = &mtd_block_ops;
|
||||
|
||||
if (tr->part_bits)
|
||||
if (tr->part_bits) {
|
||||
if (new->devnum < 26)
|
||||
snprintf(gd->disk_name, sizeof(gd->disk_name),
|
||||
"%s%c", tr->name, 'a' + new->devnum);
|
||||
@ -355,9 +355,11 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
|
||||
"%s%c%c", tr->name,
|
||||
'a' - 1 + new->devnum / 26,
|
||||
'a' + new->devnum % 26);
|
||||
else
|
||||
} else {
|
||||
snprintf(gd->disk_name, sizeof(gd->disk_name),
|
||||
"%s%d", tr->name, new->devnum);
|
||||
gd->flags |= GENHD_FL_NO_PART;
|
||||
}
|
||||
|
||||
set_capacity(gd, ((u64)new->size * tr->blksize) >> 9);
|
||||
|
||||
|
@ -294,6 +294,8 @@ static void ubiblock_do_work(struct work_struct *work)
|
||||
int ret;
|
||||
struct ubiblock_pdu *pdu = container_of(work, struct ubiblock_pdu, work);
|
||||
struct request *req = blk_mq_rq_from_pdu(pdu);
|
||||
struct req_iterator iter;
|
||||
struct bio_vec bvec;
|
||||
|
||||
blk_mq_start_request(req);
|
||||
|
||||
@ -305,7 +307,9 @@ static void ubiblock_do_work(struct work_struct *work)
|
||||
blk_rq_map_sg(req->q, req, pdu->usgl.sg);
|
||||
|
||||
ret = ubiblock_read(pdu);
|
||||
rq_flush_dcache_pages(req);
|
||||
|
||||
rq_for_each_segment(bvec, req, iter)
|
||||
flush_dcache_page(bvec.bv_page);
|
||||
|
||||
blk_mq_end_request(req, errno_to_blk_status(ret));
|
||||
}
|
||||
@ -426,6 +430,7 @@ int ubiblock_create(struct ubi_volume_info *vi)
|
||||
ret = -ENODEV;
|
||||
goto out_cleanup_disk;
|
||||
}
|
||||
gd->flags |= GENHD_FL_NO_PART;
|
||||
gd->private_data = dev;
|
||||
sprintf(gd->disk_name, "ubiblock%d_%d", dev->ubi_num, dev->vol_id);
|
||||
set_capacity(gd, disk_capacity);
|
||||
|
@ -1057,7 +1057,7 @@ static int nvme_execute_rq(struct gendisk *disk, struct request *rq,
|
||||
{
|
||||
blk_status_t status;
|
||||
|
||||
status = blk_execute_rq(disk, rq, at_head);
|
||||
status = blk_execute_rq(rq, at_head);
|
||||
if (nvme_req(rq)->flags & NVME_REQ_CANCELLED)
|
||||
return -EINTR;
|
||||
if (nvme_req(rq)->status)
|
||||
@ -1284,7 +1284,7 @@ static void nvme_keep_alive_work(struct work_struct *work)
|
||||
|
||||
rq->timeout = ctrl->kato * HZ;
|
||||
rq->end_io_data = ctrl;
|
||||
blk_execute_rq_nowait(NULL, rq, 0, nvme_keep_alive_end_io);
|
||||
blk_execute_rq_nowait(rq, false, nvme_keep_alive_end_io);
|
||||
}
|
||||
|
||||
static void nvme_start_keep_alive(struct nvme_ctrl *ctrl)
|
||||
|
@ -56,7 +56,7 @@ void nvme_fault_inject_fini(struct nvme_fault_inject *fault_inject)
|
||||
|
||||
void nvme_should_fail(struct request *req)
|
||||
{
|
||||
struct gendisk *disk = req->rq_disk;
|
||||
struct gendisk *disk = req->q->disk;
|
||||
struct nvme_fault_inject *fault_inject = NULL;
|
||||
u16 status;
|
||||
|
||||
|
@ -500,22 +500,13 @@ static inline void nvme_write_sq_db(struct nvme_queue *nvmeq, bool write_sq)
|
||||
nvmeq->last_sq_tail = nvmeq->sq_tail;
|
||||
}
|
||||
|
||||
/**
|
||||
* nvme_submit_cmd() - Copy a command into a queue and ring the doorbell
|
||||
* @nvmeq: The queue to use
|
||||
* @cmd: The command to send
|
||||
* @write_sq: whether to write to the SQ doorbell
|
||||
*/
|
||||
static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd,
|
||||
bool write_sq)
|
||||
static inline void nvme_sq_copy_cmd(struct nvme_queue *nvmeq,
|
||||
struct nvme_command *cmd)
|
||||
{
|
||||
spin_lock(&nvmeq->sq_lock);
|
||||
memcpy(nvmeq->sq_cmds + (nvmeq->sq_tail << nvmeq->sqes),
|
||||
cmd, sizeof(*cmd));
|
||||
absolute_pointer(cmd), sizeof(*cmd));
|
||||
if (++nvmeq->sq_tail == nvmeq->q_depth)
|
||||
nvmeq->sq_tail = 0;
|
||||
nvme_write_sq_db(nvmeq, write_sq);
|
||||
spin_unlock(&nvmeq->sq_lock);
|
||||
}
|
||||
|
||||
static void nvme_commit_rqs(struct blk_mq_hw_ctx *hctx)
|
||||
@ -912,24 +903,52 @@ static blk_status_t nvme_map_metadata(struct nvme_dev *dev, struct request *req,
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
static blk_status_t nvme_prep_rq(struct nvme_dev *dev, struct request *req)
|
||||
{
|
||||
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
|
||||
blk_status_t ret;
|
||||
|
||||
iod->aborted = 0;
|
||||
iod->npages = -1;
|
||||
iod->nents = 0;
|
||||
|
||||
ret = nvme_setup_cmd(req->q->queuedata, req);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (blk_rq_nr_phys_segments(req)) {
|
||||
ret = nvme_map_data(dev, req, &iod->cmd);
|
||||
if (ret)
|
||||
goto out_free_cmd;
|
||||
}
|
||||
|
||||
if (blk_integrity_rq(req)) {
|
||||
ret = nvme_map_metadata(dev, req, &iod->cmd);
|
||||
if (ret)
|
||||
goto out_unmap_data;
|
||||
}
|
||||
|
||||
blk_mq_start_request(req);
|
||||
return BLK_STS_OK;
|
||||
out_unmap_data:
|
||||
nvme_unmap_data(dev, req);
|
||||
out_free_cmd:
|
||||
nvme_cleanup_cmd(req);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* NOTE: ns is NULL when called on the admin queue.
|
||||
*/
|
||||
static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
const struct blk_mq_queue_data *bd)
|
||||
{
|
||||
struct nvme_ns *ns = hctx->queue->queuedata;
|
||||
struct nvme_queue *nvmeq = hctx->driver_data;
|
||||
struct nvme_dev *dev = nvmeq->dev;
|
||||
struct request *req = bd->rq;
|
||||
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
|
||||
struct nvme_command *cmnd = &iod->cmd;
|
||||
blk_status_t ret;
|
||||
|
||||
iod->aborted = 0;
|
||||
iod->npages = -1;
|
||||
iod->nents = 0;
|
||||
|
||||
/*
|
||||
* We should not need to do this, but we're still using this to
|
||||
* ensure we can drain requests on a dying queue.
|
||||
@ -937,33 +956,75 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
if (unlikely(!test_bit(NVMEQ_ENABLED, &nvmeq->flags)))
|
||||
return BLK_STS_IOERR;
|
||||
|
||||
if (!nvme_check_ready(&dev->ctrl, req, true))
|
||||
if (unlikely(!nvme_check_ready(&dev->ctrl, req, true)))
|
||||
return nvme_fail_nonready_command(&dev->ctrl, req);
|
||||
|
||||
ret = nvme_setup_cmd(ns, req);
|
||||
if (ret)
|
||||
ret = nvme_prep_rq(dev, req);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
if (blk_rq_nr_phys_segments(req)) {
|
||||
ret = nvme_map_data(dev, req, cmnd);
|
||||
if (ret)
|
||||
goto out_free_cmd;
|
||||
}
|
||||
|
||||
if (blk_integrity_rq(req)) {
|
||||
ret = nvme_map_metadata(dev, req, cmnd);
|
||||
if (ret)
|
||||
goto out_unmap_data;
|
||||
}
|
||||
|
||||
blk_mq_start_request(req);
|
||||
nvme_submit_cmd(nvmeq, cmnd, bd->last);
|
||||
spin_lock(&nvmeq->sq_lock);
|
||||
nvme_sq_copy_cmd(nvmeq, &iod->cmd);
|
||||
nvme_write_sq_db(nvmeq, bd->last);
|
||||
spin_unlock(&nvmeq->sq_lock);
|
||||
return BLK_STS_OK;
|
||||
out_unmap_data:
|
||||
nvme_unmap_data(dev, req);
|
||||
out_free_cmd:
|
||||
nvme_cleanup_cmd(req);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void nvme_submit_cmds(struct nvme_queue *nvmeq, struct request **rqlist)
|
||||
{
|
||||
spin_lock(&nvmeq->sq_lock);
|
||||
while (!rq_list_empty(*rqlist)) {
|
||||
struct request *req = rq_list_pop(rqlist);
|
||||
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
|
||||
|
||||
nvme_sq_copy_cmd(nvmeq, &iod->cmd);
|
||||
}
|
||||
nvme_write_sq_db(nvmeq, true);
|
||||
spin_unlock(&nvmeq->sq_lock);
|
||||
}
|
||||
|
||||
static bool nvme_prep_rq_batch(struct nvme_queue *nvmeq, struct request *req)
|
||||
{
|
||||
/*
|
||||
* We should not need to do this, but we're still using this to
|
||||
* ensure we can drain requests on a dying queue.
|
||||
*/
|
||||
if (unlikely(!test_bit(NVMEQ_ENABLED, &nvmeq->flags)))
|
||||
return false;
|
||||
if (unlikely(!nvme_check_ready(&nvmeq->dev->ctrl, req, true)))
|
||||
return false;
|
||||
|
||||
req->mq_hctx->tags->rqs[req->tag] = req;
|
||||
return nvme_prep_rq(nvmeq->dev, req) == BLK_STS_OK;
|
||||
}
|
||||
|
||||
static void nvme_queue_rqs(struct request **rqlist)
|
||||
{
|
||||
struct request *req, *next, *prev = NULL;
|
||||
struct request *requeue_list = NULL;
|
||||
|
||||
rq_list_for_each_safe(rqlist, req, next) {
|
||||
struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
|
||||
|
||||
if (!nvme_prep_rq_batch(nvmeq, req)) {
|
||||
/* detach 'req' and add to remainder list */
|
||||
rq_list_move(rqlist, &requeue_list, req, prev);
|
||||
|
||||
req = prev;
|
||||
if (!req)
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!next || req->mq_hctx != next->mq_hctx) {
|
||||
/* detach rest of list, and submit */
|
||||
req->rq_next = NULL;
|
||||
nvme_submit_cmds(nvmeq, rqlist);
|
||||
*rqlist = next;
|
||||
prev = NULL;
|
||||
} else
|
||||
prev = req;
|
||||
}
|
||||
|
||||
*rqlist = requeue_list;
|
||||
}
|
||||
|
||||
static __always_inline void nvme_pci_unmap_rq(struct request *req)
|
||||
@ -1140,7 +1201,11 @@ static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl)
|
||||
|
||||
c.common.opcode = nvme_admin_async_event;
|
||||
c.common.command_id = NVME_AQ_BLK_MQ_DEPTH;
|
||||
nvme_submit_cmd(nvmeq, &c, true);
|
||||
|
||||
spin_lock(&nvmeq->sq_lock);
|
||||
nvme_sq_copy_cmd(nvmeq, &c);
|
||||
nvme_write_sq_db(nvmeq, true);
|
||||
spin_unlock(&nvmeq->sq_lock);
|
||||
}
|
||||
|
||||
static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
|
||||
@ -1371,7 +1436,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
|
||||
}
|
||||
|
||||
abort_req->end_io_data = NULL;
|
||||
blk_execute_rq_nowait(NULL, abort_req, 0, abort_endio);
|
||||
blk_execute_rq_nowait(abort_req, false, abort_endio);
|
||||
|
||||
/*
|
||||
* The aborted req will be completed on receiving the abort req.
|
||||
@ -1663,6 +1728,7 @@ static const struct blk_mq_ops nvme_mq_admin_ops = {
|
||||
|
||||
static const struct blk_mq_ops nvme_mq_ops = {
|
||||
.queue_rq = nvme_queue_rq,
|
||||
.queue_rqs = nvme_queue_rqs,
|
||||
.complete = nvme_pci_complete_rq,
|
||||
.commit_rqs = nvme_commit_rqs,
|
||||
.init_hctx = nvme_init_hctx,
|
||||
@ -2416,9 +2482,8 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode)
|
||||
req->end_io_data = nvmeq;
|
||||
|
||||
init_completion(&nvmeq->delete_done);
|
||||
blk_execute_rq_nowait(NULL, req, false,
|
||||
opcode == nvme_admin_delete_cq ?
|
||||
nvme_del_cq_end : nvme_del_queue_end);
|
||||
blk_execute_rq_nowait(req, false, opcode == nvme_admin_delete_cq ?
|
||||
nvme_del_cq_end : nvme_del_queue_end);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -68,7 +68,7 @@ TRACE_EVENT(nvme_setup_cmd,
|
||||
__entry->nsid = le32_to_cpu(cmd->common.nsid);
|
||||
__entry->metadata = !!blk_integrity_rq(req);
|
||||
__entry->fctype = cmd->fabrics.fctype;
|
||||
__assign_disk_name(__entry->disk, req->rq_disk);
|
||||
__assign_disk_name(__entry->disk, req->q->disk);
|
||||
memcpy(__entry->cdw10, &cmd->common.cdw10,
|
||||
sizeof(__entry->cdw10));
|
||||
),
|
||||
@ -103,7 +103,7 @@ TRACE_EVENT(nvme_complete_rq,
|
||||
__entry->retries = nvme_req(req)->retries;
|
||||
__entry->flags = nvme_req(req)->flags;
|
||||
__entry->status = nvme_req(req)->status;
|
||||
__assign_disk_name(__entry->disk, req->rq_disk);
|
||||
__assign_disk_name(__entry->disk, req->q->disk);
|
||||
),
|
||||
TP_printk("nvme%d: %sqid=%d, cmdid=%u, res=%#llx, retries=%u, flags=0x%x, status=%#x",
|
||||
__entry->ctrl_id, __print_disk_name(__entry->disk),
|
||||
@ -153,7 +153,7 @@ TRACE_EVENT(nvme_sq,
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->ctrl_id = nvme_req(req)->ctrl->instance;
|
||||
__assign_disk_name(__entry->disk, req->rq_disk);
|
||||
__assign_disk_name(__entry->disk, req->q->disk);
|
||||
__entry->qid = nvme_req_qid(req);
|
||||
__entry->sq_head = le16_to_cpu(sq_head);
|
||||
__entry->sq_tail = sq_tail;
|
||||
|
@ -284,8 +284,7 @@ static void nvmet_passthru_execute_cmd(struct nvmet_req *req)
|
||||
schedule_work(&req->p.work);
|
||||
} else {
|
||||
rq->end_io_data = req;
|
||||
blk_execute_rq_nowait(ns ? ns->disk : NULL, rq, 0,
|
||||
nvmet_passthru_req_done);
|
||||
blk_execute_rq_nowait(rq, false, nvmet_passthru_req_done);
|
||||
}
|
||||
|
||||
if (ns)
|
||||
|
@ -877,7 +877,7 @@ static long ch_ioctl(struct file *file,
|
||||
}
|
||||
|
||||
default:
|
||||
return scsi_ioctl(ch->device, NULL, file->f_mode, cmd, argp);
|
||||
return scsi_ioctl(ch->device, file->f_mode, cmd, argp);
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -60,7 +60,7 @@ static int scsi_bsg_sg_io_fn(struct request_queue *q, struct sg_io_v4 *hdr,
|
||||
goto out_free_cmd;
|
||||
|
||||
bio = rq->bio;
|
||||
blk_execute_rq(NULL, rq, !(hdr->flags & BSG_FLAG_Q_AT_TAIL));
|
||||
blk_execute_rq(rq, !(hdr->flags & BSG_FLAG_Q_AT_TAIL));
|
||||
|
||||
/*
|
||||
* fill in all the output members
|
||||
|
@ -2040,7 +2040,7 @@ static void scsi_eh_lock_door(struct scsi_device *sdev)
|
||||
req->timeout = 10 * HZ;
|
||||
rq->retries = 5;
|
||||
|
||||
blk_execute_rq_nowait(NULL, req, 1, eh_lock_door_done);
|
||||
blk_execute_rq_nowait(req, true, eh_lock_door_done);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -408,8 +408,7 @@ static int scsi_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int sg_io(struct scsi_device *sdev, struct gendisk *disk,
|
||||
struct sg_io_hdr *hdr, fmode_t mode)
|
||||
static int sg_io(struct scsi_device *sdev, struct sg_io_hdr *hdr, fmode_t mode)
|
||||
{
|
||||
unsigned long start_time;
|
||||
ssize_t ret = 0;
|
||||
@ -483,7 +482,7 @@ static int sg_io(struct scsi_device *sdev, struct gendisk *disk,
|
||||
|
||||
start_time = jiffies;
|
||||
|
||||
blk_execute_rq(disk, rq, at_head);
|
||||
blk_execute_rq(rq, at_head);
|
||||
|
||||
hdr->duration = jiffies_to_msecs(jiffies - start_time);
|
||||
|
||||
@ -499,19 +498,12 @@ out_put_request:
|
||||
/**
|
||||
* sg_scsi_ioctl -- handle deprecated SCSI_IOCTL_SEND_COMMAND ioctl
|
||||
* @q: request queue to send scsi commands down
|
||||
* @disk: gendisk to operate on (option)
|
||||
* @mode: mode used to open the file through which the ioctl has been
|
||||
* submitted
|
||||
* @sic: userspace structure describing the command to perform
|
||||
*
|
||||
* Send down the scsi command described by @sic to the device below
|
||||
* the request queue @q. If @file is non-NULL it's used to perform
|
||||
* fine-grained permission checks that allow users to send down
|
||||
* non-destructive SCSI commands. If the caller has a struct gendisk
|
||||
* available it should be passed in as @disk to allow the low level
|
||||
* driver to use the information contained in it. A non-NULL @disk
|
||||
* is only allowed if the caller knows that the low level driver doesn't
|
||||
* need it (e.g. in the scsi subsystem).
|
||||
* the request queue @q.
|
||||
*
|
||||
* Notes:
|
||||
* - This interface is deprecated - users should use the SG_IO
|
||||
@ -530,8 +522,8 @@ out_put_request:
|
||||
* Positive numbers returned are the compacted SCSI error codes (4
|
||||
* bytes in one int) where the lowest byte is the SCSI status.
|
||||
*/
|
||||
static int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk,
|
||||
fmode_t mode, struct scsi_ioctl_command __user *sic)
|
||||
static int sg_scsi_ioctl(struct request_queue *q, fmode_t mode,
|
||||
struct scsi_ioctl_command __user *sic)
|
||||
{
|
||||
enum { OMAX_SB_LEN = 16 }; /* For backward compatibility */
|
||||
struct request *rq;
|
||||
@ -620,7 +612,7 @@ static int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk,
|
||||
goto error;
|
||||
}
|
||||
|
||||
blk_execute_rq(disk, rq, 0);
|
||||
blk_execute_rq(rq, false);
|
||||
|
||||
err = req->result & 0xff; /* only 8 bit SCSI status */
|
||||
if (err) {
|
||||
@ -806,8 +798,8 @@ static int scsi_put_cdrom_generic_arg(const struct cdrom_generic_command *cgc,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int scsi_cdrom_send_packet(struct scsi_device *sdev, struct gendisk *disk,
|
||||
fmode_t mode, void __user *arg)
|
||||
static int scsi_cdrom_send_packet(struct scsi_device *sdev, fmode_t mode,
|
||||
void __user *arg)
|
||||
{
|
||||
struct cdrom_generic_command cgc;
|
||||
struct sg_io_hdr hdr;
|
||||
@ -847,7 +839,7 @@ static int scsi_cdrom_send_packet(struct scsi_device *sdev, struct gendisk *disk
|
||||
hdr.cmdp = ((struct cdrom_generic_command __user *) arg)->cmd;
|
||||
hdr.cmd_len = sizeof(cgc.cmd);
|
||||
|
||||
err = sg_io(sdev, disk, &hdr, mode);
|
||||
err = sg_io(sdev, &hdr, mode);
|
||||
if (err == -EFAULT)
|
||||
return -EFAULT;
|
||||
|
||||
@ -862,8 +854,8 @@ static int scsi_cdrom_send_packet(struct scsi_device *sdev, struct gendisk *disk
|
||||
return err;
|
||||
}
|
||||
|
||||
static int scsi_ioctl_sg_io(struct scsi_device *sdev, struct gendisk *disk,
|
||||
fmode_t mode, void __user *argp)
|
||||
static int scsi_ioctl_sg_io(struct scsi_device *sdev, fmode_t mode,
|
||||
void __user *argp)
|
||||
{
|
||||
struct sg_io_hdr hdr;
|
||||
int error;
|
||||
@ -871,7 +863,7 @@ static int scsi_ioctl_sg_io(struct scsi_device *sdev, struct gendisk *disk,
|
||||
error = get_sg_io_hdr(&hdr, argp);
|
||||
if (error)
|
||||
return error;
|
||||
error = sg_io(sdev, disk, &hdr, mode);
|
||||
error = sg_io(sdev, &hdr, mode);
|
||||
if (error == -EFAULT)
|
||||
return error;
|
||||
if (put_sg_io_hdr(&hdr, argp))
|
||||
@ -882,7 +874,6 @@ static int scsi_ioctl_sg_io(struct scsi_device *sdev, struct gendisk *disk,
|
||||
/**
|
||||
* scsi_ioctl - Dispatch ioctl to scsi device
|
||||
* @sdev: scsi device receiving ioctl
|
||||
* @disk: disk receiving the ioctl
|
||||
* @mode: mode the block/char device is opened with
|
||||
* @cmd: which ioctl is it
|
||||
* @arg: data associated with ioctl
|
||||
@ -891,8 +882,8 @@ static int scsi_ioctl_sg_io(struct scsi_device *sdev, struct gendisk *disk,
|
||||
* does not take a major/minor number as the dev field. Rather, it takes
|
||||
* a pointer to a &struct scsi_device.
|
||||
*/
|
||||
int scsi_ioctl(struct scsi_device *sdev, struct gendisk *disk, fmode_t mode,
|
||||
int cmd, void __user *arg)
|
||||
int scsi_ioctl(struct scsi_device *sdev, fmode_t mode, int cmd,
|
||||
void __user *arg)
|
||||
{
|
||||
struct request_queue *q = sdev->request_queue;
|
||||
struct scsi_sense_hdr sense_hdr;
|
||||
@ -927,11 +918,11 @@ int scsi_ioctl(struct scsi_device *sdev, struct gendisk *disk, fmode_t mode,
|
||||
case SG_EMULATED_HOST:
|
||||
return sg_emulated_host(q, arg);
|
||||
case SG_IO:
|
||||
return scsi_ioctl_sg_io(sdev, disk, mode, arg);
|
||||
return scsi_ioctl_sg_io(sdev, mode, arg);
|
||||
case SCSI_IOCTL_SEND_COMMAND:
|
||||
return sg_scsi_ioctl(q, disk, mode, arg);
|
||||
return sg_scsi_ioctl(q, mode, arg);
|
||||
case CDROM_SEND_PACKET:
|
||||
return scsi_cdrom_send_packet(sdev, disk, mode, arg);
|
||||
return scsi_cdrom_send_packet(sdev, mode, arg);
|
||||
case CDROMCLOSETRAY:
|
||||
return scsi_send_start_stop(sdev, 3);
|
||||
case CDROMEJECT:
|
||||
|
@ -241,7 +241,7 @@ int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
|
||||
/*
|
||||
* head injection *required* here otherwise quiesce won't work
|
||||
*/
|
||||
blk_execute_rq(NULL, req, 1);
|
||||
blk_execute_rq(req, true);
|
||||
|
||||
/*
|
||||
* Some devices (USB mass-storage in particular) may transfer
|
||||
@ -543,8 +543,9 @@ static bool scsi_end_request(struct request *req, blk_status_t error,
|
||||
if (blk_update_request(req, error, bytes))
|
||||
return true;
|
||||
|
||||
// XXX:
|
||||
if (blk_queue_add_random(q))
|
||||
add_disk_randomness(req->rq_disk);
|
||||
add_disk_randomness(req->q->disk);
|
||||
|
||||
if (!blk_rq_is_passthrough(req)) {
|
||||
WARN_ON_ONCE(!(cmd->flags & SCMD_INITIALIZED));
|
||||
@ -617,6 +618,46 @@ static blk_status_t scsi_result_to_blk_status(struct scsi_cmnd *cmd, int result)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* scsi_rq_err_bytes - determine number of bytes till the next failure boundary
|
||||
* @rq: request to examine
|
||||
*
|
||||
* Description:
|
||||
* A request could be merge of IOs which require different failure
|
||||
* handling. This function determines the number of bytes which
|
||||
* can be failed from the beginning of the request without
|
||||
* crossing into area which need to be retried further.
|
||||
*
|
||||
* Return:
|
||||
* The number of bytes to fail.
|
||||
*/
|
||||
static unsigned int scsi_rq_err_bytes(const struct request *rq)
|
||||
{
|
||||
unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;
|
||||
unsigned int bytes = 0;
|
||||
struct bio *bio;
|
||||
|
||||
if (!(rq->rq_flags & RQF_MIXED_MERGE))
|
||||
return blk_rq_bytes(rq);
|
||||
|
||||
/*
|
||||
* Currently the only 'mixing' which can happen is between
|
||||
* different fastfail types. We can safely fail portions
|
||||
* which have all the failfast bits that the first one has -
|
||||
* the ones which are at least as eager to fail as the first
|
||||
* one.
|
||||
*/
|
||||
for (bio = rq->bio; bio; bio = bio->bi_next) {
|
||||
if ((bio->bi_opf & ff) != ff)
|
||||
break;
|
||||
bytes += bio->bi_iter.bi_size;
|
||||
}
|
||||
|
||||
/* this could lead to infinite loop */
|
||||
BUG_ON(blk_rq_bytes(rq) && !bytes);
|
||||
return bytes;
|
||||
}
|
||||
|
||||
/* Helper for scsi_io_completion() when "reprep" action required. */
|
||||
static void scsi_io_completion_reprep(struct scsi_cmnd *cmd,
|
||||
struct request_queue *q)
|
||||
@ -794,7 +835,7 @@ static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result)
|
||||
scsi_print_command(cmd);
|
||||
}
|
||||
}
|
||||
if (!scsi_end_request(req, blk_stat, blk_rq_err_bytes(req)))
|
||||
if (!scsi_end_request(req, blk_stat, scsi_rq_err_bytes(req)))
|
||||
return;
|
||||
fallthrough;
|
||||
case ACTION_REPREP:
|
||||
|
@ -30,7 +30,9 @@ static inline const char *scmd_name(const struct scsi_cmnd *scmd)
|
||||
{
|
||||
struct request *rq = scsi_cmd_to_rq((struct scsi_cmnd *)scmd);
|
||||
|
||||
return rq->rq_disk ? rq->rq_disk->disk_name : NULL;
|
||||
if (!rq->q->disk)
|
||||
return NULL;
|
||||
return rq->q->disk->disk_name;
|
||||
}
|
||||
|
||||
static size_t sdev_format_header(char *logbuf, size_t logbuf_len,
|
||||
|
@ -872,7 +872,7 @@ static blk_status_t sd_setup_unmap_cmnd(struct scsi_cmnd *cmd)
|
||||
{
|
||||
struct scsi_device *sdp = cmd->device;
|
||||
struct request *rq = scsi_cmd_to_rq(cmd);
|
||||
struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
|
||||
struct scsi_disk *sdkp = scsi_disk(rq->q->disk);
|
||||
u64 lba = sectors_to_logical(sdp, blk_rq_pos(rq));
|
||||
u32 nr_blocks = sectors_to_logical(sdp, blk_rq_sectors(rq));
|
||||
unsigned int data_len = 24;
|
||||
@ -908,7 +908,7 @@ static blk_status_t sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd,
|
||||
{
|
||||
struct scsi_device *sdp = cmd->device;
|
||||
struct request *rq = scsi_cmd_to_rq(cmd);
|
||||
struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
|
||||
struct scsi_disk *sdkp = scsi_disk(rq->q->disk);
|
||||
u64 lba = sectors_to_logical(sdp, blk_rq_pos(rq));
|
||||
u32 nr_blocks = sectors_to_logical(sdp, blk_rq_sectors(rq));
|
||||
u32 data_len = sdp->sector_size;
|
||||
@ -940,7 +940,7 @@ static blk_status_t sd_setup_write_same10_cmnd(struct scsi_cmnd *cmd,
|
||||
{
|
||||
struct scsi_device *sdp = cmd->device;
|
||||
struct request *rq = scsi_cmd_to_rq(cmd);
|
||||
struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
|
||||
struct scsi_disk *sdkp = scsi_disk(rq->q->disk);
|
||||
u64 lba = sectors_to_logical(sdp, blk_rq_pos(rq));
|
||||
u32 nr_blocks = sectors_to_logical(sdp, blk_rq_sectors(rq));
|
||||
u32 data_len = sdp->sector_size;
|
||||
@ -971,7 +971,7 @@ static blk_status_t sd_setup_write_zeroes_cmnd(struct scsi_cmnd *cmd)
|
||||
{
|
||||
struct request *rq = scsi_cmd_to_rq(cmd);
|
||||
struct scsi_device *sdp = cmd->device;
|
||||
struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
|
||||
struct scsi_disk *sdkp = scsi_disk(rq->q->disk);
|
||||
u64 lba = sectors_to_logical(sdp, blk_rq_pos(rq));
|
||||
u32 nr_blocks = sectors_to_logical(sdp, blk_rq_sectors(rq));
|
||||
|
||||
@ -1068,7 +1068,7 @@ static blk_status_t sd_setup_write_same_cmnd(struct scsi_cmnd *cmd)
|
||||
{
|
||||
struct request *rq = scsi_cmd_to_rq(cmd);
|
||||
struct scsi_device *sdp = cmd->device;
|
||||
struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
|
||||
struct scsi_disk *sdkp = scsi_disk(rq->q->disk);
|
||||
struct bio *bio = rq->bio;
|
||||
u64 lba = sectors_to_logical(sdp, blk_rq_pos(rq));
|
||||
u32 nr_blocks = sectors_to_logical(sdp, blk_rq_sectors(rq));
|
||||
@ -1116,7 +1116,7 @@ static blk_status_t sd_setup_write_same_cmnd(struct scsi_cmnd *cmd)
|
||||
static blk_status_t sd_setup_flush_cmnd(struct scsi_cmnd *cmd)
|
||||
{
|
||||
struct request *rq = scsi_cmd_to_rq(cmd);
|
||||
struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
|
||||
struct scsi_disk *sdkp = scsi_disk(rq->q->disk);
|
||||
|
||||
/* flush requests don't perform I/O, zero the S/G table */
|
||||
memset(&cmd->sdb, 0, sizeof(cmd->sdb));
|
||||
@ -1215,7 +1215,7 @@ static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
|
||||
{
|
||||
struct request *rq = scsi_cmd_to_rq(cmd);
|
||||
struct scsi_device *sdp = cmd->device;
|
||||
struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
|
||||
struct scsi_disk *sdkp = scsi_disk(rq->q->disk);
|
||||
sector_t lba = sectors_to_logical(sdp, blk_rq_pos(rq));
|
||||
sector_t threshold;
|
||||
unsigned int nr_blocks = sectors_to_logical(sdp, blk_rq_sectors(rq));
|
||||
@ -1236,7 +1236,7 @@ static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (blk_rq_pos(rq) + blk_rq_sectors(rq) > get_capacity(rq->rq_disk)) {
|
||||
if (blk_rq_pos(rq) + blk_rq_sectors(rq) > get_capacity(rq->q->disk)) {
|
||||
scmd_printk(KERN_ERR, cmd, "access beyond end of device\n");
|
||||
goto fail;
|
||||
}
|
||||
@ -1331,7 +1331,7 @@ static blk_status_t sd_init_command(struct scsi_cmnd *cmd)
|
||||
|
||||
switch (req_op(rq)) {
|
||||
case REQ_OP_DISCARD:
|
||||
switch (scsi_disk(rq->rq_disk)->provisioning_mode) {
|
||||
switch (scsi_disk(rq->q->disk)->provisioning_mode) {
|
||||
case SD_LBP_UNMAP:
|
||||
return sd_setup_unmap_cmnd(cmd);
|
||||
case SD_LBP_WS16:
|
||||
@ -1574,7 +1574,7 @@ static int sd_ioctl(struct block_device *bdev, fmode_t mode,
|
||||
|
||||
if (is_sed_ioctl(cmd))
|
||||
return sed_ioctl(sdkp->opal_dev, cmd, p);
|
||||
return scsi_ioctl(sdp, disk, mode, cmd, p);
|
||||
return scsi_ioctl(sdp, mode, cmd, p);
|
||||
}
|
||||
|
||||
static void set_media_not_present(struct scsi_disk *sdkp)
|
||||
@ -1917,7 +1917,7 @@ static const struct block_device_operations sd_fops = {
|
||||
**/
|
||||
static void sd_eh_reset(struct scsi_cmnd *scmd)
|
||||
{
|
||||
struct scsi_disk *sdkp = scsi_disk(scsi_cmd_to_rq(scmd)->rq_disk);
|
||||
struct scsi_disk *sdkp = scsi_disk(scsi_cmd_to_rq(scmd)->q->disk);
|
||||
|
||||
/* New SCSI EH run, reset gate variable */
|
||||
sdkp->ignore_medium_access_errors = false;
|
||||
@ -1937,7 +1937,7 @@ static void sd_eh_reset(struct scsi_cmnd *scmd)
|
||||
**/
|
||||
static int sd_eh_action(struct scsi_cmnd *scmd, int eh_disp)
|
||||
{
|
||||
struct scsi_disk *sdkp = scsi_disk(scsi_cmd_to_rq(scmd)->rq_disk);
|
||||
struct scsi_disk *sdkp = scsi_disk(scsi_cmd_to_rq(scmd)->q->disk);
|
||||
struct scsi_device *sdev = scmd->device;
|
||||
|
||||
if (!scsi_device_online(sdev) ||
|
||||
@ -2034,7 +2034,7 @@ static int sd_done(struct scsi_cmnd *SCpnt)
|
||||
unsigned int resid;
|
||||
struct scsi_sense_hdr sshdr;
|
||||
struct request *req = scsi_cmd_to_rq(SCpnt);
|
||||
struct scsi_disk *sdkp = scsi_disk(req->rq_disk);
|
||||
struct scsi_disk *sdkp = scsi_disk(req->q->disk);
|
||||
int sense_valid = 0;
|
||||
int sense_deferred = 0;
|
||||
|
||||
@ -3566,7 +3566,6 @@ static int sd_probe(struct device *dev)
|
||||
|
||||
sd_revalidate_disk(gd);
|
||||
|
||||
gd->flags = GENHD_FL_EXT_DEVT;
|
||||
if (sdp->removable) {
|
||||
gd->flags |= GENHD_FL_REMOVABLE;
|
||||
gd->events |= DISK_EVENT_MEDIA_CHANGE;
|
||||
|
@ -244,7 +244,7 @@ out:
|
||||
static blk_status_t sd_zbc_cmnd_checks(struct scsi_cmnd *cmd)
|
||||
{
|
||||
struct request *rq = scsi_cmd_to_rq(cmd);
|
||||
struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
|
||||
struct scsi_disk *sdkp = scsi_disk(rq->q->disk);
|
||||
sector_t sector = blk_rq_pos(rq);
|
||||
|
||||
if (!sd_is_zoned(sdkp))
|
||||
@ -322,7 +322,7 @@ blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd, sector_t *lba,
|
||||
unsigned int nr_blocks)
|
||||
{
|
||||
struct request *rq = scsi_cmd_to_rq(cmd);
|
||||
struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
|
||||
struct scsi_disk *sdkp = scsi_disk(rq->q->disk);
|
||||
unsigned int wp_offset, zno = blk_rq_zone_no(rq);
|
||||
unsigned long flags;
|
||||
blk_status_t ret;
|
||||
@ -388,7 +388,7 @@ blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd,
|
||||
{
|
||||
struct request *rq = scsi_cmd_to_rq(cmd);
|
||||
sector_t sector = blk_rq_pos(rq);
|
||||
struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
|
||||
struct scsi_disk *sdkp = scsi_disk(rq->q->disk);
|
||||
sector_t block = sectors_to_logical(sdkp->device, sector);
|
||||
blk_status_t ret;
|
||||
|
||||
@ -443,7 +443,7 @@ static unsigned int sd_zbc_zone_wp_update(struct scsi_cmnd *cmd,
|
||||
{
|
||||
int result = cmd->result;
|
||||
struct request *rq = scsi_cmd_to_rq(cmd);
|
||||
struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
|
||||
struct scsi_disk *sdkp = scsi_disk(rq->q->disk);
|
||||
unsigned int zno = blk_rq_zone_no(rq);
|
||||
enum req_opf op = req_op(rq);
|
||||
unsigned long flags;
|
||||
|
@ -833,7 +833,7 @@ sg_common_write(Sg_fd * sfp, Sg_request * srp,
|
||||
|
||||
srp->rq->timeout = timeout;
|
||||
kref_get(&sfp->f_ref); /* sg_rq_end_io() does kref_put(). */
|
||||
blk_execute_rq_nowait(NULL, srp->rq, at_head, sg_rq_end_io);
|
||||
blk_execute_rq_nowait(srp->rq, at_head, sg_rq_end_io);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1109,7 +1109,7 @@ sg_ioctl_common(struct file *filp, Sg_device *sdp, Sg_fd *sfp,
|
||||
case SCSI_IOCTL_SEND_COMMAND:
|
||||
if (atomic_read(&sdp->detaching))
|
||||
return -ENODEV;
|
||||
return scsi_ioctl(sdp->device, NULL, filp->f_mode, cmd_in, p);
|
||||
return scsi_ioctl(sdp->device, filp->f_mode, cmd_in, p);
|
||||
case SG_SET_DEBUG:
|
||||
result = get_user(val, ip);
|
||||
if (result)
|
||||
@ -1165,7 +1165,7 @@ sg_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg)
|
||||
ret = sg_ioctl_common(filp, sdp, sfp, cmd_in, p);
|
||||
if (ret != -ENOIOCTLCMD)
|
||||
return ret;
|
||||
return scsi_ioctl(sdp->device, NULL, filp->f_mode, cmd_in, p);
|
||||
return scsi_ioctl(sdp->device, filp->f_mode, cmd_in, p);
|
||||
}
|
||||
|
||||
static __poll_t
|
||||
|
@ -335,7 +335,7 @@ static int sr_done(struct scsi_cmnd *SCpnt)
|
||||
int block_sectors = 0;
|
||||
long error_sector;
|
||||
struct request *rq = scsi_cmd_to_rq(SCpnt);
|
||||
struct scsi_cd *cd = scsi_cd(rq->rq_disk);
|
||||
struct scsi_cd *cd = scsi_cd(rq->q->disk);
|
||||
|
||||
#ifdef DEBUG
|
||||
scmd_printk(KERN_INFO, SCpnt, "done: %x\n", result);
|
||||
@ -402,7 +402,7 @@ static blk_status_t sr_init_command(struct scsi_cmnd *SCpnt)
|
||||
ret = scsi_alloc_sgtables(SCpnt);
|
||||
if (ret != BLK_STS_OK)
|
||||
return ret;
|
||||
cd = scsi_cd(rq->rq_disk);
|
||||
cd = scsi_cd(rq->q->disk);
|
||||
|
||||
SCSI_LOG_HLQUEUE(1, scmd_printk(KERN_INFO, SCpnt,
|
||||
"Doing sr request, block = %d\n", block));
|
||||
@ -561,8 +561,7 @@ static void sr_block_release(struct gendisk *disk, fmode_t mode)
|
||||
static int sr_block_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
|
||||
unsigned long arg)
|
||||
{
|
||||
struct gendisk *disk = bdev->bd_disk;
|
||||
struct scsi_cd *cd = scsi_cd(disk);
|
||||
struct scsi_cd *cd = scsi_cd(bdev->bd_disk);
|
||||
struct scsi_device *sdev = cd->device;
|
||||
void __user *argp = (void __user *)arg;
|
||||
int ret;
|
||||
@ -584,7 +583,7 @@ static int sr_block_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
|
||||
if (ret != -ENOSYS)
|
||||
goto put;
|
||||
}
|
||||
ret = scsi_ioctl(sdev, disk, mode, cmd, argp);
|
||||
ret = scsi_ioctl(sdev, mode, cmd, argp);
|
||||
|
||||
put:
|
||||
scsi_autopm_put_device(sdev);
|
||||
@ -684,9 +683,10 @@ static int sr_probe(struct device *dev)
|
||||
disk->minors = 1;
|
||||
sprintf(disk->disk_name, "sr%d", minor);
|
||||
disk->fops = &sr_bdops;
|
||||
disk->flags = GENHD_FL_CD | GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
|
||||
disk->flags |= GENHD_FL_REMOVABLE | GENHD_FL_NO_PART;
|
||||
disk->events = DISK_EVENT_MEDIA_CHANGE | DISK_EVENT_EJECT_REQUEST;
|
||||
disk->event_flags = DISK_EVENT_FLAG_POLL | DISK_EVENT_FLAG_UEVENT;
|
||||
disk->event_flags = DISK_EVENT_FLAG_POLL | DISK_EVENT_FLAG_UEVENT |
|
||||
DISK_EVENT_FLAG_BLOCK_ON_EXCL_WRITE;
|
||||
|
||||
blk_queue_rq_timeout(sdev->request_queue, SR_TIMEOUT);
|
||||
|
||||
@ -725,7 +725,6 @@ static int sr_probe(struct device *dev)
|
||||
blk_pm_runtime_init(sdev->request_queue, dev);
|
||||
|
||||
dev_set_drvdata(dev, cd);
|
||||
disk->flags |= GENHD_FL_REMOVABLE;
|
||||
sr_revalidate_disk(cd);
|
||||
|
||||
error = device_add_disk(&sdev->sdev_gendev, disk, NULL);
|
||||
@ -994,7 +993,7 @@ static int sr_read_cdda_bpc(struct cdrom_device_info *cdi, void __user *ubuf,
|
||||
rq->timeout = 60 * HZ;
|
||||
bio = rq->bio;
|
||||
|
||||
blk_execute_rq(disk, rq, 0);
|
||||
blk_execute_rq(rq, false);
|
||||
if (scsi_req(rq)->result) {
|
||||
struct scsi_sense_hdr sshdr;
|
||||
|
||||
|
@ -581,7 +581,7 @@ static int st_scsi_execute(struct st_request *SRpnt, const unsigned char *cmd,
|
||||
rq->retries = retries;
|
||||
req->end_io_data = SRpnt;
|
||||
|
||||
blk_execute_rq_nowait(NULL, req, 1, st_scsi_execute_end);
|
||||
blk_execute_rq_nowait(req, true, st_scsi_execute_end);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -3829,7 +3829,7 @@ static long st_ioctl(struct file *file, unsigned int cmd_in, unsigned long arg)
|
||||
break;
|
||||
}
|
||||
|
||||
retval = scsi_ioctl(STp->device, NULL, file->f_mode, cmd_in, p);
|
||||
retval = scsi_ioctl(STp->device, file->f_mode, cmd_in, p);
|
||||
if (!retval && cmd_in == SCSI_IOCTL_STOP_UNIT) {
|
||||
/* unload */
|
||||
STp->rew_at_close = 0;
|
||||
|
@ -677,7 +677,7 @@ static void ufshpb_execute_umap_req(struct ufshpb_lu *hpb,
|
||||
ufshpb_set_unmap_cmd(rq->cmd, rgn);
|
||||
rq->cmd_len = HPB_WRITE_BUFFER_CMD_LENGTH;
|
||||
|
||||
blk_execute_rq_nowait(NULL, req, 1, ufshpb_umap_req_compl_fn);
|
||||
blk_execute_rq_nowait(req, true, ufshpb_umap_req_compl_fn);
|
||||
|
||||
hpb->stats.umap_req_cnt++;
|
||||
}
|
||||
@ -719,7 +719,7 @@ static int ufshpb_execute_map_req(struct ufshpb_lu *hpb,
|
||||
map_req->rb.srgn_idx, mem_size);
|
||||
rq->cmd_len = HPB_READ_BUFFER_CMD_LENGTH;
|
||||
|
||||
blk_execute_rq_nowait(NULL, req, 1, ufshpb_map_req_compl_fn);
|
||||
blk_execute_rq_nowait(req, true, ufshpb_map_req_compl_fn);
|
||||
|
||||
hpb->stats.map_req_cnt++;
|
||||
return 0;
|
||||
|
@ -528,7 +528,7 @@ static void virtio_scsi_init_hdr_pi(struct virtio_device *vdev,
|
||||
if (!rq || !scsi_prot_sg_count(sc))
|
||||
return;
|
||||
|
||||
bi = blk_get_integrity(rq->rq_disk);
|
||||
bi = blk_get_integrity(rq->q->disk);
|
||||
|
||||
if (sc->sc_data_direction == DMA_TO_DEVICE)
|
||||
cmd_pi->pi_bytesout = cpu_to_virtio32(vdev,
|
||||
|
@ -1005,7 +1005,7 @@ pscsi_execute_cmd(struct se_cmd *cmd)
|
||||
req->timeout = PS_TIMEOUT_OTHER;
|
||||
scsi_req(req)->retries = PS_RETRY;
|
||||
|
||||
blk_execute_rq_nowait(NULL, req, (cmd->sam_task_attr == TCM_HEAD_TAG),
|
||||
blk_execute_rq_nowait(req, cmd->sam_task_attr == TCM_HEAD_TAG,
|
||||
pscsi_req_done);
|
||||
|
||||
return 0;
|
||||
|
@ -551,7 +551,7 @@ static void last_sector_hacks(struct us_data *us, struct scsi_cmnd *srb)
|
||||
/* Did this command access the last sector? */
|
||||
sector = (srb->cmnd[2] << 24) | (srb->cmnd[3] << 16) |
|
||||
(srb->cmnd[4] << 8) | (srb->cmnd[5]);
|
||||
disk = scsi_cmd_to_rq(srb)->rq_disk;
|
||||
disk = scsi_cmd_to_rq(srb)->q->disk;
|
||||
if (!disk)
|
||||
goto done;
|
||||
sdkp = scsi_disk(disk);
|
||||
|
@ -57,7 +57,7 @@
|
||||
#include <linux/mman.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/bvec.h>
|
||||
#include <linux/net.h>
|
||||
#include <net/sock.h>
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <linux/module.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/iomap.h>
|
||||
#include <linux/backing-dev.h>
|
||||
#include <linux/uio.h>
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user