2005-04-16 22:20:36 +00:00
|
|
|
/*
|
|
|
|
linear.c : Multiple Devices driver for Linux
|
|
|
|
Copyright (C) 1994-96 Marc ZYNGIER
|
|
|
|
<zyngier@ufr-info-p7.ibp.fr> or
|
|
|
|
<maz@gloups.fdn.fr>
|
|
|
|
|
|
|
|
Linear mode management functions.
|
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; either version 2, or (at your option)
|
|
|
|
any later version.
|
2014-09-30 04:23:59 +00:00
|
|
|
|
2005-04-16 22:20:36 +00:00
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
(for example /usr/src/linux/COPYING); if not, write to the Free
|
2014-09-30 04:23:59 +00:00
|
|
|
Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
2005-04-16 22:20:36 +00:00
|
|
|
*/
|
|
|
|
|
2009-03-31 03:33:13 +00:00
|
|
|
#include <linux/blkdev.h>
|
|
|
|
#include <linux/raid/md_u.h>
|
|
|
|
#include <linux/seq_file.h>
|
2011-07-03 17:58:33 +00:00
|
|
|
#include <linux/module.h>
|
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h
percpu.h is included by sched.h and module.h and thus ends up being
included when building most .c files. percpu.h includes slab.h which
in turn includes gfp.h making everything defined by the two files
universally available and complicating inclusion dependencies.
percpu.h -> slab.h dependency is about to be removed. Prepare for
this change by updating users of gfp and slab facilities include those
headers directly instead of assuming availability. As this conversion
needs to touch large number of source files, the following script is
used as the basis of conversion.
http://userweb.kernel.org/~tj/misc/slabh-sweep.py
The script does the followings.
* Scan files for gfp and slab usages and update includes such that
only the necessary includes are there. ie. if only gfp is used,
gfp.h, if slab is used, slab.h.
* When the script inserts a new include, it looks at the include
blocks and try to put the new include such that its order conforms
to its surrounding. It's put in the include block which contains
core kernel includes, in the same order that the rest are ordered -
alphabetical, Christmas tree, rev-Xmas-tree or at the end if there
doesn't seem to be any matching order.
* If the script can't find a place to put a new include (mostly
because the file doesn't have fitting include block), it prints out
an error message indicating which .h file needs to be added to the
file.
The conversion was done in the following steps.
1. The initial automatic conversion of all .c files updated slightly
over 4000 files, deleting around 700 includes and adding ~480 gfp.h
and ~3000 slab.h inclusions. The script emitted errors for ~400
files.
2. Each error was manually checked. Some didn't need the inclusion,
some needed manual addition while adding it to implementation .h or
embedding .c file was more appropriate for others. This step added
inclusions to around 150 files.
3. The script was run again and the output was compared to the edits
from #2 to make sure no file was left behind.
4. Several build tests were done and a couple of problems were fixed.
e.g. lib/decompress_*.c used malloc/free() wrappers around slab
APIs requiring slab.h to be added manually.
5. The script was run on all .h files but without automatically
editing them as sprinkling gfp.h and slab.h inclusions around .h
files could easily lead to inclusion dependency hell. Most gfp.h
inclusion directives were ignored as stuff from gfp.h was usually
wildly available and often used in preprocessor macros. Each
slab.h inclusion directive was examined and added manually as
necessary.
6. percpu.h was updated not to include slab.h.
7. Build test were done on the following configurations and failures
were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my
distributed build env didn't work with gcov compiles) and a few
more options had to be turned off depending on archs to make things
build (like ipr on powerpc/64 which failed due to missing writeq).
* x86 and x86_64 UP and SMP allmodconfig and a custom test config.
* powerpc and powerpc64 SMP allmodconfig
* sparc and sparc64 SMP allmodconfig
* ia64 SMP allmodconfig
* s390 SMP allmodconfig
* alpha SMP allmodconfig
* um on x86_64 SMP allmodconfig
8. percpu.h modifications were reverted so that it could be applied as
a separate patch and serve as bisection point.
Given the fact that I had only a couple of failures from tests on step
6, I'm fairly confident about the coverage of this conversion patch.
If there is a breakage, it's likely to be something in one of the arch
headers which should be easily discoverable easily on most builds of
the specific arch.
Signed-off-by: Tejun Heo <tj@kernel.org>
Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 08:04:11 +00:00
|
|
|
#include <linux/slab.h>
|
2016-11-18 02:22:04 +00:00
|
|
|
#include <trace/events/block.h>
|
2009-03-31 03:33:13 +00:00
|
|
|
#include "md.h"
|
2009-03-31 03:27:03 +00:00
|
|
|
#include "linear.h"
|
2005-04-16 22:20:36 +00:00
|
|
|
|
|
|
|
/*
|
2014-09-30 04:23:59 +00:00
|
|
|
* find which device holds a particular offset
|
2005-04-16 22:20:36 +00:00
|
|
|
*/
|
2011-10-11 05:48:49 +00:00
|
|
|
static inline struct dev_info *which_dev(struct mddev *mddev, sector_t sector)
|
2005-04-16 22:20:36 +00:00
|
|
|
{
|
2009-06-16 06:57:08 +00:00
|
|
|
int lo, mid, hi;
|
2011-10-11 05:48:54 +00:00
|
|
|
struct linear_conf *conf;
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2009-06-16 06:57:08 +00:00
|
|
|
lo = 0;
|
|
|
|
hi = mddev->raid_disks - 1;
|
2014-12-15 01:56:57 +00:00
|
|
|
conf = mddev->private;
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2009-06-16 06:57:08 +00:00
|
|
|
/*
|
|
|
|
* Binary Search
|
|
|
|
*/
|
|
|
|
|
|
|
|
while (hi > lo) {
|
|
|
|
|
|
|
|
mid = (hi + lo) / 2;
|
|
|
|
if (sector < conf->disks[mid].end_sector)
|
|
|
|
hi = mid;
|
|
|
|
else
|
|
|
|
lo = mid + 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return conf->disks + lo;
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
|
|
|
|
md linear: fix a race between linear_add() and linear_congested()
Recently I receive a bug report that on Linux v3.0 based kerenl, hot add
disk to a md linear device causes kernel crash at linear_congested(). From
the crash image analysis, I find in linear_congested(), mddev->raid_disks
contains value N, but conf->disks[] only has N-1 pointers available. Then
a NULL pointer deference crashes the kernel.
There is a race between linear_add() and linear_congested(), RCU stuffs
used in these two functions cannot avoid the race. Since Linuv v4.0
RCU code is replaced by introducing mddev_suspend(). After checking the
upstream code, it seems linear_congested() is not called in
generic_make_request() code patch, so mddev_suspend() cannot provent it
from being called. The possible race still exists.
Here I explain how the race still exists in current code. For a machine
has many CPUs, on one CPU, linear_add() is called to add a hard disk to a
md linear device; at the same time on other CPU, linear_congested() is
called to detect whether this md linear device is congested before issuing
an I/O request onto it.
Now I use a possible code execution time sequence to demo how the possible
race happens,
seq linear_add() linear_congested()
0 conf=mddev->private
1 oldconf=mddev->private
2 mddev->raid_disks++
3 for (i=0; i<mddev->raid_disks;i++)
4 bdev_get_queue(conf->disks[i].rdev->bdev)
5 mddev->private=newconf
In linear_add() mddev->raid_disks is increased in time seq 2, and on
another CPU in linear_congested() the for-loop iterates conf->disks[i] by
the increased mddev->raid_disks in time seq 3,4. But conf with one more
element (which is a pointer to struct dev_info type) to conf->disks[] is
not updated yet, accessing its structure member in time seq 4 will cause a
NULL pointer deference fault.
To fix this race, there are 2 parts of modification in the patch,
1) Add 'int raid_disks' in struct linear_conf, as a copy of
mddev->raid_disks. It is initialized in linear_conf(), always being
consistent with pointers number of 'struct dev_info disks[]'. When
iterating conf->disks[] in linear_congested(), use conf->raid_disks to
replace mddev->raid_disks in the for-loop, then NULL pointer deference
will not happen again.
2) RCU stuffs are back again, and use kfree_rcu() in linear_add() to
free oldconf memory. Because oldconf may be referenced as mddev->private
in linear_congested(), kfree_rcu() makes sure that its memory will not
be released until no one uses it any more.
Also some code comments are added in this patch, to make this modification
to be easier understandable.
This patch can be applied for kernels since v4.0 after commit:
3be260cc18f8 ("md/linear: remove rcu protections in favour of
suspend/resume"). But this bug is reported on Linux v3.0 based kernel, for
people who maintain kernels before Linux v4.0, they need to do some back
back port to this patch.
Changelog:
- V3: add 'int raid_disks' in struct linear_conf, and use kfree_rcu() to
replace rcu_call() in linear_add().
- v2: add RCU stuffs by suggestion from Shaohua and Neil.
- v1: initial effort.
Signed-off-by: Coly Li <colyli@suse.de>
Cc: Shaohua Li <shli@fb.com>
Cc: Neil Brown <neilb@suse.com>
Cc: stable@vger.kernel.org
Signed-off-by: Shaohua Li <shli@fb.com>
2017-01-28 13:11:49 +00:00
|
|
|
/*
|
|
|
|
* In linear_congested() conf->raid_disks is used as a copy of
|
|
|
|
* mddev->raid_disks to iterate conf->disks[], because conf->raid_disks
|
|
|
|
* and conf->disks[] are created in linear_conf(), they are always
|
|
|
|
* consitent with each other, but mddev->raid_disks does not.
|
|
|
|
*/
|
2014-12-15 01:56:56 +00:00
|
|
|
static int linear_congested(struct mddev *mddev, int bits)
|
2006-10-03 08:15:53 +00:00
|
|
|
{
|
2011-10-11 05:48:54 +00:00
|
|
|
struct linear_conf *conf;
|
2006-10-03 08:15:53 +00:00
|
|
|
int i, ret = 0;
|
|
|
|
|
md linear: fix a race between linear_add() and linear_congested()
Recently I receive a bug report that on Linux v3.0 based kerenl, hot add
disk to a md linear device causes kernel crash at linear_congested(). From
the crash image analysis, I find in linear_congested(), mddev->raid_disks
contains value N, but conf->disks[] only has N-1 pointers available. Then
a NULL pointer deference crashes the kernel.
There is a race between linear_add() and linear_congested(), RCU stuffs
used in these two functions cannot avoid the race. Since Linuv v4.0
RCU code is replaced by introducing mddev_suspend(). After checking the
upstream code, it seems linear_congested() is not called in
generic_make_request() code patch, so mddev_suspend() cannot provent it
from being called. The possible race still exists.
Here I explain how the race still exists in current code. For a machine
has many CPUs, on one CPU, linear_add() is called to add a hard disk to a
md linear device; at the same time on other CPU, linear_congested() is
called to detect whether this md linear device is congested before issuing
an I/O request onto it.
Now I use a possible code execution time sequence to demo how the possible
race happens,
seq linear_add() linear_congested()
0 conf=mddev->private
1 oldconf=mddev->private
2 mddev->raid_disks++
3 for (i=0; i<mddev->raid_disks;i++)
4 bdev_get_queue(conf->disks[i].rdev->bdev)
5 mddev->private=newconf
In linear_add() mddev->raid_disks is increased in time seq 2, and on
another CPU in linear_congested() the for-loop iterates conf->disks[i] by
the increased mddev->raid_disks in time seq 3,4. But conf with one more
element (which is a pointer to struct dev_info type) to conf->disks[] is
not updated yet, accessing its structure member in time seq 4 will cause a
NULL pointer deference fault.
To fix this race, there are 2 parts of modification in the patch,
1) Add 'int raid_disks' in struct linear_conf, as a copy of
mddev->raid_disks. It is initialized in linear_conf(), always being
consistent with pointers number of 'struct dev_info disks[]'. When
iterating conf->disks[] in linear_congested(), use conf->raid_disks to
replace mddev->raid_disks in the for-loop, then NULL pointer deference
will not happen again.
2) RCU stuffs are back again, and use kfree_rcu() in linear_add() to
free oldconf memory. Because oldconf may be referenced as mddev->private
in linear_congested(), kfree_rcu() makes sure that its memory will not
be released until no one uses it any more.
Also some code comments are added in this patch, to make this modification
to be easier understandable.
This patch can be applied for kernels since v4.0 after commit:
3be260cc18f8 ("md/linear: remove rcu protections in favour of
suspend/resume"). But this bug is reported on Linux v3.0 based kernel, for
people who maintain kernels before Linux v4.0, they need to do some back
back port to this patch.
Changelog:
- V3: add 'int raid_disks' in struct linear_conf, and use kfree_rcu() to
replace rcu_call() in linear_add().
- v2: add RCU stuffs by suggestion from Shaohua and Neil.
- v1: initial effort.
Signed-off-by: Coly Li <colyli@suse.de>
Cc: Shaohua Li <shli@fb.com>
Cc: Neil Brown <neilb@suse.com>
Cc: stable@vger.kernel.org
Signed-off-by: Shaohua Li <shli@fb.com>
2017-01-28 13:11:49 +00:00
|
|
|
rcu_read_lock();
|
|
|
|
conf = rcu_dereference(mddev->private);
|
2009-06-17 22:49:35 +00:00
|
|
|
|
md linear: fix a race between linear_add() and linear_congested()
Recently I receive a bug report that on Linux v3.0 based kerenl, hot add
disk to a md linear device causes kernel crash at linear_congested(). From
the crash image analysis, I find in linear_congested(), mddev->raid_disks
contains value N, but conf->disks[] only has N-1 pointers available. Then
a NULL pointer deference crashes the kernel.
There is a race between linear_add() and linear_congested(), RCU stuffs
used in these two functions cannot avoid the race. Since Linuv v4.0
RCU code is replaced by introducing mddev_suspend(). After checking the
upstream code, it seems linear_congested() is not called in
generic_make_request() code patch, so mddev_suspend() cannot provent it
from being called. The possible race still exists.
Here I explain how the race still exists in current code. For a machine
has many CPUs, on one CPU, linear_add() is called to add a hard disk to a
md linear device; at the same time on other CPU, linear_congested() is
called to detect whether this md linear device is congested before issuing
an I/O request onto it.
Now I use a possible code execution time sequence to demo how the possible
race happens,
seq linear_add() linear_congested()
0 conf=mddev->private
1 oldconf=mddev->private
2 mddev->raid_disks++
3 for (i=0; i<mddev->raid_disks;i++)
4 bdev_get_queue(conf->disks[i].rdev->bdev)
5 mddev->private=newconf
In linear_add() mddev->raid_disks is increased in time seq 2, and on
another CPU in linear_congested() the for-loop iterates conf->disks[i] by
the increased mddev->raid_disks in time seq 3,4. But conf with one more
element (which is a pointer to struct dev_info type) to conf->disks[] is
not updated yet, accessing its structure member in time seq 4 will cause a
NULL pointer deference fault.
To fix this race, there are 2 parts of modification in the patch,
1) Add 'int raid_disks' in struct linear_conf, as a copy of
mddev->raid_disks. It is initialized in linear_conf(), always being
consistent with pointers number of 'struct dev_info disks[]'. When
iterating conf->disks[] in linear_congested(), use conf->raid_disks to
replace mddev->raid_disks in the for-loop, then NULL pointer deference
will not happen again.
2) RCU stuffs are back again, and use kfree_rcu() in linear_add() to
free oldconf memory. Because oldconf may be referenced as mddev->private
in linear_congested(), kfree_rcu() makes sure that its memory will not
be released until no one uses it any more.
Also some code comments are added in this patch, to make this modification
to be easier understandable.
This patch can be applied for kernels since v4.0 after commit:
3be260cc18f8 ("md/linear: remove rcu protections in favour of
suspend/resume"). But this bug is reported on Linux v3.0 based kernel, for
people who maintain kernels before Linux v4.0, they need to do some back
back port to this patch.
Changelog:
- V3: add 'int raid_disks' in struct linear_conf, and use kfree_rcu() to
replace rcu_call() in linear_add().
- v2: add RCU stuffs by suggestion from Shaohua and Neil.
- v1: initial effort.
Signed-off-by: Coly Li <colyli@suse.de>
Cc: Shaohua Li <shli@fb.com>
Cc: Neil Brown <neilb@suse.com>
Cc: stable@vger.kernel.org
Signed-off-by: Shaohua Li <shli@fb.com>
2017-01-28 13:11:49 +00:00
|
|
|
for (i = 0; i < conf->raid_disks && !ret ; i++) {
|
2007-07-24 07:28:11 +00:00
|
|
|
struct request_queue *q = bdev_get_queue(conf->disks[i].rdev->bdev);
|
2006-10-03 08:15:53 +00:00
|
|
|
ret |= bdi_congested(&q->backing_dev_info, bits);
|
|
|
|
}
|
2009-06-17 22:49:35 +00:00
|
|
|
|
md linear: fix a race between linear_add() and linear_congested()
Recently I receive a bug report that on Linux v3.0 based kerenl, hot add
disk to a md linear device causes kernel crash at linear_congested(). From
the crash image analysis, I find in linear_congested(), mddev->raid_disks
contains value N, but conf->disks[] only has N-1 pointers available. Then
a NULL pointer deference crashes the kernel.
There is a race between linear_add() and linear_congested(), RCU stuffs
used in these two functions cannot avoid the race. Since Linuv v4.0
RCU code is replaced by introducing mddev_suspend(). After checking the
upstream code, it seems linear_congested() is not called in
generic_make_request() code patch, so mddev_suspend() cannot provent it
from being called. The possible race still exists.
Here I explain how the race still exists in current code. For a machine
has many CPUs, on one CPU, linear_add() is called to add a hard disk to a
md linear device; at the same time on other CPU, linear_congested() is
called to detect whether this md linear device is congested before issuing
an I/O request onto it.
Now I use a possible code execution time sequence to demo how the possible
race happens,
seq linear_add() linear_congested()
0 conf=mddev->private
1 oldconf=mddev->private
2 mddev->raid_disks++
3 for (i=0; i<mddev->raid_disks;i++)
4 bdev_get_queue(conf->disks[i].rdev->bdev)
5 mddev->private=newconf
In linear_add() mddev->raid_disks is increased in time seq 2, and on
another CPU in linear_congested() the for-loop iterates conf->disks[i] by
the increased mddev->raid_disks in time seq 3,4. But conf with one more
element (which is a pointer to struct dev_info type) to conf->disks[] is
not updated yet, accessing its structure member in time seq 4 will cause a
NULL pointer deference fault.
To fix this race, there are 2 parts of modification in the patch,
1) Add 'int raid_disks' in struct linear_conf, as a copy of
mddev->raid_disks. It is initialized in linear_conf(), always being
consistent with pointers number of 'struct dev_info disks[]'. When
iterating conf->disks[] in linear_congested(), use conf->raid_disks to
replace mddev->raid_disks in the for-loop, then NULL pointer deference
will not happen again.
2) RCU stuffs are back again, and use kfree_rcu() in linear_add() to
free oldconf memory. Because oldconf may be referenced as mddev->private
in linear_congested(), kfree_rcu() makes sure that its memory will not
be released until no one uses it any more.
Also some code comments are added in this patch, to make this modification
to be easier understandable.
This patch can be applied for kernels since v4.0 after commit:
3be260cc18f8 ("md/linear: remove rcu protections in favour of
suspend/resume"). But this bug is reported on Linux v3.0 based kernel, for
people who maintain kernels before Linux v4.0, they need to do some back
back port to this patch.
Changelog:
- V3: add 'int raid_disks' in struct linear_conf, and use kfree_rcu() to
replace rcu_call() in linear_add().
- v2: add RCU stuffs by suggestion from Shaohua and Neil.
- v1: initial effort.
Signed-off-by: Coly Li <colyli@suse.de>
Cc: Shaohua Li <shli@fb.com>
Cc: Neil Brown <neilb@suse.com>
Cc: stable@vger.kernel.org
Signed-off-by: Shaohua Li <shli@fb.com>
2017-01-28 13:11:49 +00:00
|
|
|
rcu_read_unlock();
|
2006-10-03 08:15:53 +00:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2011-10-11 05:47:53 +00:00
|
|
|
static sector_t linear_size(struct mddev *mddev, sector_t sectors, int raid_disks)
|
2009-03-18 01:10:40 +00:00
|
|
|
{
|
2011-10-11 05:48:54 +00:00
|
|
|
struct linear_conf *conf;
|
2009-06-17 22:49:35 +00:00
|
|
|
sector_t array_sectors;
|
2009-03-18 01:10:40 +00:00
|
|
|
|
2014-12-15 01:56:57 +00:00
|
|
|
conf = mddev->private;
|
2009-03-18 01:10:40 +00:00
|
|
|
WARN_ONCE(sectors || raid_disks,
|
|
|
|
"%s does not support generic reshape\n", __func__);
|
2009-06-17 22:49:35 +00:00
|
|
|
array_sectors = conf->array_sectors;
|
2009-03-18 01:10:40 +00:00
|
|
|
|
2009-06-17 22:49:35 +00:00
|
|
|
return array_sectors;
|
2009-03-18 01:10:40 +00:00
|
|
|
}
|
|
|
|
|
2011-10-11 05:48:54 +00:00
|
|
|
static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
|
2005-04-16 22:20:36 +00:00
|
|
|
{
|
2011-10-11 05:48:54 +00:00
|
|
|
struct linear_conf *conf;
|
2011-10-11 05:45:26 +00:00
|
|
|
struct md_rdev *rdev;
|
2009-06-16 06:55:26 +00:00
|
|
|
int i, cnt;
|
2012-10-11 02:08:44 +00:00
|
|
|
bool discard_supported = false;
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2011-10-11 05:48:49 +00:00
|
|
|
conf = kzalloc (sizeof (*conf) + raid_disks*sizeof(struct dev_info),
|
2005-04-16 22:20:36 +00:00
|
|
|
GFP_KERNEL);
|
|
|
|
if (!conf)
|
2006-06-26 07:27:41 +00:00
|
|
|
return NULL;
|
|
|
|
|
2005-04-16 22:20:36 +00:00
|
|
|
cnt = 0;
|
2008-07-21 07:05:25 +00:00
|
|
|
conf->array_sectors = 0;
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2012-03-19 01:46:39 +00:00
|
|
|
rdev_for_each(rdev, mddev) {
|
2005-04-16 22:20:36 +00:00
|
|
|
int j = rdev->raid_disk;
|
2011-10-11 05:48:49 +00:00
|
|
|
struct dev_info *disk = conf->disks + j;
|
2009-06-17 22:48:55 +00:00
|
|
|
sector_t sectors;
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2008-06-27 22:31:19 +00:00
|
|
|
if (j < 0 || j >= raid_disks || disk->rdev) {
|
2016-11-02 03:16:49 +00:00
|
|
|
pr_warn("md/linear:%s: disk numbering problem. Aborting!\n",
|
|
|
|
mdname(mddev));
|
2005-04-16 22:20:36 +00:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
disk->rdev = rdev;
|
2009-06-17 22:48:55 +00:00
|
|
|
if (mddev->chunk_sectors) {
|
|
|
|
sectors = rdev->sectors;
|
|
|
|
sector_div(sectors, mddev->chunk_sectors);
|
|
|
|
rdev->sectors = sectors * mddev->chunk_sectors;
|
|
|
|
}
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2009-07-01 01:13:45 +00:00
|
|
|
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
|
|
|
rdev->data_offset << 9);
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2009-03-31 03:33:13 +00:00
|
|
|
conf->array_sectors += rdev->sectors;
|
2005-04-16 22:20:36 +00:00
|
|
|
cnt++;
|
2009-06-16 06:56:13 +00:00
|
|
|
|
2012-10-11 02:08:44 +00:00
|
|
|
if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
|
|
|
|
discard_supported = true;
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
2006-06-26 07:27:41 +00:00
|
|
|
if (cnt != raid_disks) {
|
2016-11-02 03:16:49 +00:00
|
|
|
pr_warn("md/linear:%s: not enough drives present. Aborting!\n",
|
|
|
|
mdname(mddev));
|
2005-04-16 22:20:36 +00:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2012-10-11 02:08:44 +00:00
|
|
|
if (!discard_supported)
|
|
|
|
queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
|
|
|
|
else
|
|
|
|
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
|
|
|
|
|
2005-04-16 22:20:36 +00:00
|
|
|
/*
|
2009-06-16 06:55:26 +00:00
|
|
|
* Here we calculate the device offsets.
|
2005-04-16 22:20:36 +00:00
|
|
|
*/
|
2009-06-16 06:56:13 +00:00
|
|
|
conf->disks[0].end_sector = conf->disks[0].rdev->sectors;
|
|
|
|
|
2007-05-23 20:58:10 +00:00
|
|
|
for (i = 1; i < raid_disks; i++)
|
2009-06-16 06:56:13 +00:00
|
|
|
conf->disks[i].end_sector =
|
|
|
|
conf->disks[i-1].end_sector +
|
|
|
|
conf->disks[i].rdev->sectors;
|
2005-09-09 23:23:47 +00:00
|
|
|
|
md linear: fix a race between linear_add() and linear_congested()
Recently I receive a bug report that on Linux v3.0 based kerenl, hot add
disk to a md linear device causes kernel crash at linear_congested(). From
the crash image analysis, I find in linear_congested(), mddev->raid_disks
contains value N, but conf->disks[] only has N-1 pointers available. Then
a NULL pointer deference crashes the kernel.
There is a race between linear_add() and linear_congested(), RCU stuffs
used in these two functions cannot avoid the race. Since Linuv v4.0
RCU code is replaced by introducing mddev_suspend(). After checking the
upstream code, it seems linear_congested() is not called in
generic_make_request() code patch, so mddev_suspend() cannot provent it
from being called. The possible race still exists.
Here I explain how the race still exists in current code. For a machine
has many CPUs, on one CPU, linear_add() is called to add a hard disk to a
md linear device; at the same time on other CPU, linear_congested() is
called to detect whether this md linear device is congested before issuing
an I/O request onto it.
Now I use a possible code execution time sequence to demo how the possible
race happens,
seq linear_add() linear_congested()
0 conf=mddev->private
1 oldconf=mddev->private
2 mddev->raid_disks++
3 for (i=0; i<mddev->raid_disks;i++)
4 bdev_get_queue(conf->disks[i].rdev->bdev)
5 mddev->private=newconf
In linear_add() mddev->raid_disks is increased in time seq 2, and on
another CPU in linear_congested() the for-loop iterates conf->disks[i] by
the increased mddev->raid_disks in time seq 3,4. But conf with one more
element (which is a pointer to struct dev_info type) to conf->disks[] is
not updated yet, accessing its structure member in time seq 4 will cause a
NULL pointer deference fault.
To fix this race, there are 2 parts of modification in the patch,
1) Add 'int raid_disks' in struct linear_conf, as a copy of
mddev->raid_disks. It is initialized in linear_conf(), always being
consistent with pointers number of 'struct dev_info disks[]'. When
iterating conf->disks[] in linear_congested(), use conf->raid_disks to
replace mddev->raid_disks in the for-loop, then NULL pointer deference
will not happen again.
2) RCU stuffs are back again, and use kfree_rcu() in linear_add() to
free oldconf memory. Because oldconf may be referenced as mddev->private
in linear_congested(), kfree_rcu() makes sure that its memory will not
be released until no one uses it any more.
Also some code comments are added in this patch, to make this modification
to be easier understandable.
This patch can be applied for kernels since v4.0 after commit:
3be260cc18f8 ("md/linear: remove rcu protections in favour of
suspend/resume"). But this bug is reported on Linux v3.0 based kernel, for
people who maintain kernels before Linux v4.0, they need to do some back
back port to this patch.
Changelog:
- V3: add 'int raid_disks' in struct linear_conf, and use kfree_rcu() to
replace rcu_call() in linear_add().
- v2: add RCU stuffs by suggestion from Shaohua and Neil.
- v1: initial effort.
Signed-off-by: Coly Li <colyli@suse.de>
Cc: Shaohua Li <shli@fb.com>
Cc: Neil Brown <neilb@suse.com>
Cc: stable@vger.kernel.org
Signed-off-by: Shaohua Li <shli@fb.com>
2017-01-28 13:11:49 +00:00
|
|
|
/*
|
|
|
|
* conf->raid_disks is copy of mddev->raid_disks. The reason to
|
|
|
|
* keep a copy of mddev->raid_disks in struct linear_conf is,
|
|
|
|
* mddev->raid_disks may not be consistent with pointers number of
|
|
|
|
* conf->disks[] when it is updated in linear_add() and used to
|
|
|
|
* iterate old conf->disks[] earray in linear_congested().
|
|
|
|
* Here conf->raid_disks is always consitent with number of
|
|
|
|
* pointers in conf->disks[] array, and mddev->private is updated
|
|
|
|
* with rcu_assign_pointer() in linear_addr(), such race can be
|
|
|
|
* avoided.
|
|
|
|
*/
|
|
|
|
conf->raid_disks = raid_disks;
|
|
|
|
|
2006-06-26 07:27:41 +00:00
|
|
|
return conf;
|
|
|
|
|
|
|
|
out:
|
|
|
|
kfree(conf);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2011-10-11 05:47:53 +00:00
|
|
|
static int linear_run (struct mddev *mddev)
|
2006-06-26 07:27:41 +00:00
|
|
|
{
|
2011-10-11 05:48:54 +00:00
|
|
|
struct linear_conf *conf;
|
2012-04-01 23:48:37 +00:00
|
|
|
int ret;
|
2006-06-26 07:27:41 +00:00
|
|
|
|
2009-06-17 22:49:23 +00:00
|
|
|
if (md_check_no_bitmap(mddev))
|
|
|
|
return -EINVAL;
|
2006-06-26 07:27:41 +00:00
|
|
|
conf = linear_conf(mddev, mddev->raid_disks);
|
|
|
|
|
|
|
|
if (!conf)
|
|
|
|
return 1;
|
|
|
|
mddev->private = conf;
|
2009-03-31 03:59:03 +00:00
|
|
|
md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
|
2006-06-26 07:27:41 +00:00
|
|
|
|
2012-04-01 23:48:37 +00:00
|
|
|
ret = md_integrity_register(mddev);
|
|
|
|
if (ret) {
|
|
|
|
kfree(conf);
|
|
|
|
mddev->private = NULL;
|
|
|
|
}
|
|
|
|
return ret;
|
2006-06-26 07:27:41 +00:00
|
|
|
}
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2011-10-11 05:47:53 +00:00
|
|
|
static int linear_add(struct mddev *mddev, struct md_rdev *rdev)
|
2006-06-26 07:27:41 +00:00
|
|
|
{
|
|
|
|
/* Adding a drive to a linear array allows the array to grow.
|
|
|
|
* It is permitted if the new drive has a matching superblock
|
|
|
|
* already on it, with raid_disk equal to raid_disks.
|
|
|
|
* It is achieved by creating a new linear_private_data structure
|
|
|
|
* and swapping it in in-place of the current one.
|
|
|
|
* The current one is never freed until the array is stopped.
|
|
|
|
* This avoids races.
|
|
|
|
*/
|
2011-10-11 05:48:54 +00:00
|
|
|
struct linear_conf *newconf, *oldconf;
|
2006-06-26 07:27:41 +00:00
|
|
|
|
2007-05-23 20:58:10 +00:00
|
|
|
if (rdev->saved_raid_disk != mddev->raid_disks)
|
2006-06-26 07:27:41 +00:00
|
|
|
return -EINVAL;
|
|
|
|
|
2007-05-23 20:58:10 +00:00
|
|
|
rdev->raid_disk = rdev->saved_raid_disk;
|
2011-12-22 22:56:55 +00:00
|
|
|
rdev->saved_raid_disk = -1;
|
2007-05-23 20:58:10 +00:00
|
|
|
|
2006-06-26 07:27:41 +00:00
|
|
|
newconf = linear_conf(mddev,mddev->raid_disks+1);
|
|
|
|
|
|
|
|
if (!newconf)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
md linear: fix a race between linear_add() and linear_congested()
Recently I receive a bug report that on Linux v3.0 based kerenl, hot add
disk to a md linear device causes kernel crash at linear_congested(). From
the crash image analysis, I find in linear_congested(), mddev->raid_disks
contains value N, but conf->disks[] only has N-1 pointers available. Then
a NULL pointer deference crashes the kernel.
There is a race between linear_add() and linear_congested(), RCU stuffs
used in these two functions cannot avoid the race. Since Linuv v4.0
RCU code is replaced by introducing mddev_suspend(). After checking the
upstream code, it seems linear_congested() is not called in
generic_make_request() code patch, so mddev_suspend() cannot provent it
from being called. The possible race still exists.
Here I explain how the race still exists in current code. For a machine
has many CPUs, on one CPU, linear_add() is called to add a hard disk to a
md linear device; at the same time on other CPU, linear_congested() is
called to detect whether this md linear device is congested before issuing
an I/O request onto it.
Now I use a possible code execution time sequence to demo how the possible
race happens,
seq linear_add() linear_congested()
0 conf=mddev->private
1 oldconf=mddev->private
2 mddev->raid_disks++
3 for (i=0; i<mddev->raid_disks;i++)
4 bdev_get_queue(conf->disks[i].rdev->bdev)
5 mddev->private=newconf
In linear_add() mddev->raid_disks is increased in time seq 2, and on
another CPU in linear_congested() the for-loop iterates conf->disks[i] by
the increased mddev->raid_disks in time seq 3,4. But conf with one more
element (which is a pointer to struct dev_info type) to conf->disks[] is
not updated yet, accessing its structure member in time seq 4 will cause a
NULL pointer deference fault.
To fix this race, there are 2 parts of modification in the patch,
1) Add 'int raid_disks' in struct linear_conf, as a copy of
mddev->raid_disks. It is initialized in linear_conf(), always being
consistent with pointers number of 'struct dev_info disks[]'. When
iterating conf->disks[] in linear_congested(), use conf->raid_disks to
replace mddev->raid_disks in the for-loop, then NULL pointer deference
will not happen again.
2) RCU stuffs are back again, and use kfree_rcu() in linear_add() to
free oldconf memory. Because oldconf may be referenced as mddev->private
in linear_congested(), kfree_rcu() makes sure that its memory will not
be released until no one uses it any more.
Also some code comments are added in this patch, to make this modification
to be easier understandable.
This patch can be applied for kernels since v4.0 after commit:
3be260cc18f8 ("md/linear: remove rcu protections in favour of
suspend/resume"). But this bug is reported on Linux v3.0 based kernel, for
people who maintain kernels before Linux v4.0, they need to do some back
back port to this patch.
Changelog:
- V3: add 'int raid_disks' in struct linear_conf, and use kfree_rcu() to
replace rcu_call() in linear_add().
- v2: add RCU stuffs by suggestion from Shaohua and Neil.
- v1: initial effort.
Signed-off-by: Coly Li <colyli@suse.de>
Cc: Shaohua Li <shli@fb.com>
Cc: Neil Brown <neilb@suse.com>
Cc: stable@vger.kernel.org
Signed-off-by: Shaohua Li <shli@fb.com>
2017-01-28 13:11:49 +00:00
|
|
|
/* newconf->raid_disks already keeps a copy of * the increased
|
|
|
|
* value of mddev->raid_disks, WARN_ONCE() is just used to make
|
|
|
|
* sure of this. It is possible that oldconf is still referenced
|
|
|
|
* in linear_congested(), therefore kfree_rcu() is used to free
|
|
|
|
* oldconf until no one uses it anymore.
|
|
|
|
*/
|
2014-12-15 01:56:57 +00:00
|
|
|
mddev_suspend(mddev);
|
md linear: fix a race between linear_add() and linear_congested()
Recently I receive a bug report that on Linux v3.0 based kerenl, hot add
disk to a md linear device causes kernel crash at linear_congested(). From
the crash image analysis, I find in linear_congested(), mddev->raid_disks
contains value N, but conf->disks[] only has N-1 pointers available. Then
a NULL pointer deference crashes the kernel.
There is a race between linear_add() and linear_congested(), RCU stuffs
used in these two functions cannot avoid the race. Since Linuv v4.0
RCU code is replaced by introducing mddev_suspend(). After checking the
upstream code, it seems linear_congested() is not called in
generic_make_request() code patch, so mddev_suspend() cannot provent it
from being called. The possible race still exists.
Here I explain how the race still exists in current code. For a machine
has many CPUs, on one CPU, linear_add() is called to add a hard disk to a
md linear device; at the same time on other CPU, linear_congested() is
called to detect whether this md linear device is congested before issuing
an I/O request onto it.
Now I use a possible code execution time sequence to demo how the possible
race happens,
seq linear_add() linear_congested()
0 conf=mddev->private
1 oldconf=mddev->private
2 mddev->raid_disks++
3 for (i=0; i<mddev->raid_disks;i++)
4 bdev_get_queue(conf->disks[i].rdev->bdev)
5 mddev->private=newconf
In linear_add() mddev->raid_disks is increased in time seq 2, and on
another CPU in linear_congested() the for-loop iterates conf->disks[i] by
the increased mddev->raid_disks in time seq 3,4. But conf with one more
element (which is a pointer to struct dev_info type) to conf->disks[] is
not updated yet, accessing its structure member in time seq 4 will cause a
NULL pointer deference fault.
To fix this race, there are 2 parts of modification in the patch,
1) Add 'int raid_disks' in struct linear_conf, as a copy of
mddev->raid_disks. It is initialized in linear_conf(), always being
consistent with pointers number of 'struct dev_info disks[]'. When
iterating conf->disks[] in linear_congested(), use conf->raid_disks to
replace mddev->raid_disks in the for-loop, then NULL pointer deference
will not happen again.
2) RCU stuffs are back again, and use kfree_rcu() in linear_add() to
free oldconf memory. Because oldconf may be referenced as mddev->private
in linear_congested(), kfree_rcu() makes sure that its memory will not
be released until no one uses it any more.
Also some code comments are added in this patch, to make this modification
to be easier understandable.
This patch can be applied for kernels since v4.0 after commit:
3be260cc18f8 ("md/linear: remove rcu protections in favour of
suspend/resume"). But this bug is reported on Linux v3.0 based kernel, for
people who maintain kernels before Linux v4.0, they need to do some back
back port to this patch.
Changelog:
- V3: add 'int raid_disks' in struct linear_conf, and use kfree_rcu() to
replace rcu_call() in linear_add().
- v2: add RCU stuffs by suggestion from Shaohua and Neil.
- v1: initial effort.
Signed-off-by: Coly Li <colyli@suse.de>
Cc: Shaohua Li <shli@fb.com>
Cc: Neil Brown <neilb@suse.com>
Cc: stable@vger.kernel.org
Signed-off-by: Shaohua Li <shli@fb.com>
2017-01-28 13:11:49 +00:00
|
|
|
oldconf = rcu_dereference(mddev->private);
|
2006-06-26 07:27:41 +00:00
|
|
|
mddev->raid_disks++;
|
md linear: fix a race between linear_add() and linear_congested()
Recently I receive a bug report that on Linux v3.0 based kerenl, hot add
disk to a md linear device causes kernel crash at linear_congested(). From
the crash image analysis, I find in linear_congested(), mddev->raid_disks
contains value N, but conf->disks[] only has N-1 pointers available. Then
a NULL pointer deference crashes the kernel.
There is a race between linear_add() and linear_congested(), RCU stuffs
used in these two functions cannot avoid the race. Since Linuv v4.0
RCU code is replaced by introducing mddev_suspend(). After checking the
upstream code, it seems linear_congested() is not called in
generic_make_request() code patch, so mddev_suspend() cannot provent it
from being called. The possible race still exists.
Here I explain how the race still exists in current code. For a machine
has many CPUs, on one CPU, linear_add() is called to add a hard disk to a
md linear device; at the same time on other CPU, linear_congested() is
called to detect whether this md linear device is congested before issuing
an I/O request onto it.
Now I use a possible code execution time sequence to demo how the possible
race happens,
seq linear_add() linear_congested()
0 conf=mddev->private
1 oldconf=mddev->private
2 mddev->raid_disks++
3 for (i=0; i<mddev->raid_disks;i++)
4 bdev_get_queue(conf->disks[i].rdev->bdev)
5 mddev->private=newconf
In linear_add() mddev->raid_disks is increased in time seq 2, and on
another CPU in linear_congested() the for-loop iterates conf->disks[i] by
the increased mddev->raid_disks in time seq 3,4. But conf with one more
element (which is a pointer to struct dev_info type) to conf->disks[] is
not updated yet, accessing its structure member in time seq 4 will cause a
NULL pointer deference fault.
To fix this race, there are 2 parts of modification in the patch,
1) Add 'int raid_disks' in struct linear_conf, as a copy of
mddev->raid_disks. It is initialized in linear_conf(), always being
consistent with pointers number of 'struct dev_info disks[]'. When
iterating conf->disks[] in linear_congested(), use conf->raid_disks to
replace mddev->raid_disks in the for-loop, then NULL pointer deference
will not happen again.
2) RCU stuffs are back again, and use kfree_rcu() in linear_add() to
free oldconf memory. Because oldconf may be referenced as mddev->private
in linear_congested(), kfree_rcu() makes sure that its memory will not
be released until no one uses it any more.
Also some code comments are added in this patch, to make this modification
to be easier understandable.
This patch can be applied for kernels since v4.0 after commit:
3be260cc18f8 ("md/linear: remove rcu protections in favour of
suspend/resume"). But this bug is reported on Linux v3.0 based kernel, for
people who maintain kernels before Linux v4.0, they need to do some back
back port to this patch.
Changelog:
- V3: add 'int raid_disks' in struct linear_conf, and use kfree_rcu() to
replace rcu_call() in linear_add().
- v2: add RCU stuffs by suggestion from Shaohua and Neil.
- v1: initial effort.
Signed-off-by: Coly Li <colyli@suse.de>
Cc: Shaohua Li <shli@fb.com>
Cc: Neil Brown <neilb@suse.com>
Cc: stable@vger.kernel.org
Signed-off-by: Shaohua Li <shli@fb.com>
2017-01-28 13:11:49 +00:00
|
|
|
WARN_ONCE(mddev->raid_disks != newconf->raid_disks,
|
|
|
|
"copied raid_disks doesn't match mddev->raid_disks");
|
|
|
|
rcu_assign_pointer(mddev->private, newconf);
|
2009-03-31 03:59:03 +00:00
|
|
|
md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
|
2008-07-21 07:05:22 +00:00
|
|
|
set_capacity(mddev->gendisk, mddev->array_sectors);
|
2014-12-15 01:56:57 +00:00
|
|
|
mddev_resume(mddev);
|
2009-08-03 00:59:58 +00:00
|
|
|
revalidate_disk(mddev->gendisk);
|
md linear: fix a race between linear_add() and linear_congested()
Recently I receive a bug report that on Linux v3.0 based kerenl, hot add
disk to a md linear device causes kernel crash at linear_congested(). From
the crash image analysis, I find in linear_congested(), mddev->raid_disks
contains value N, but conf->disks[] only has N-1 pointers available. Then
a NULL pointer deference crashes the kernel.
There is a race between linear_add() and linear_congested(), RCU stuffs
used in these two functions cannot avoid the race. Since Linuv v4.0
RCU code is replaced by introducing mddev_suspend(). After checking the
upstream code, it seems linear_congested() is not called in
generic_make_request() code patch, so mddev_suspend() cannot provent it
from being called. The possible race still exists.
Here I explain how the race still exists in current code. For a machine
has many CPUs, on one CPU, linear_add() is called to add a hard disk to a
md linear device; at the same time on other CPU, linear_congested() is
called to detect whether this md linear device is congested before issuing
an I/O request onto it.
Now I use a possible code execution time sequence to demo how the possible
race happens,
seq linear_add() linear_congested()
0 conf=mddev->private
1 oldconf=mddev->private
2 mddev->raid_disks++
3 for (i=0; i<mddev->raid_disks;i++)
4 bdev_get_queue(conf->disks[i].rdev->bdev)
5 mddev->private=newconf
In linear_add() mddev->raid_disks is increased in time seq 2, and on
another CPU in linear_congested() the for-loop iterates conf->disks[i] by
the increased mddev->raid_disks in time seq 3,4. But conf with one more
element (which is a pointer to struct dev_info type) to conf->disks[] is
not updated yet, accessing its structure member in time seq 4 will cause a
NULL pointer deference fault.
To fix this race, there are 2 parts of modification in the patch,
1) Add 'int raid_disks' in struct linear_conf, as a copy of
mddev->raid_disks. It is initialized in linear_conf(), always being
consistent with pointers number of 'struct dev_info disks[]'. When
iterating conf->disks[] in linear_congested(), use conf->raid_disks to
replace mddev->raid_disks in the for-loop, then NULL pointer deference
will not happen again.
2) RCU stuffs are back again, and use kfree_rcu() in linear_add() to
free oldconf memory. Because oldconf may be referenced as mddev->private
in linear_congested(), kfree_rcu() makes sure that its memory will not
be released until no one uses it any more.
Also some code comments are added in this patch, to make this modification
to be easier understandable.
This patch can be applied for kernels since v4.0 after commit:
3be260cc18f8 ("md/linear: remove rcu protections in favour of
suspend/resume"). But this bug is reported on Linux v3.0 based kernel, for
people who maintain kernels before Linux v4.0, they need to do some back
back port to this patch.
Changelog:
- V3: add 'int raid_disks' in struct linear_conf, and use kfree_rcu() to
replace rcu_call() in linear_add().
- v2: add RCU stuffs by suggestion from Shaohua and Neil.
- v1: initial effort.
Signed-off-by: Coly Li <colyli@suse.de>
Cc: Shaohua Li <shli@fb.com>
Cc: Neil Brown <neilb@suse.com>
Cc: stable@vger.kernel.org
Signed-off-by: Shaohua Li <shli@fb.com>
2017-01-28 13:11:49 +00:00
|
|
|
kfree_rcu(oldconf, rcu);
|
2006-06-26 07:27:41 +00:00
|
|
|
return 0;
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
|
|
|
|
2014-12-15 01:56:58 +00:00
|
|
|
static void linear_free(struct mddev *mddev, void *priv)
|
2005-04-16 22:20:36 +00:00
|
|
|
{
|
2014-12-15 01:56:58 +00:00
|
|
|
struct linear_conf *conf = priv;
|
2009-06-17 22:49:35 +00:00
|
|
|
|
2009-06-17 22:49:42 +00:00
|
|
|
kfree(conf);
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
|
|
|
|
2011-11-05 00:06:58 +00:00
|
|
|
static void linear_make_request(struct mddev *mddev, struct bio *bio)
|
2005-04-16 22:20:36 +00:00
|
|
|
{
|
2013-11-24 02:21:01 +00:00
|
|
|
char b[BDEVNAME_SIZE];
|
2011-10-11 05:48:49 +00:00
|
|
|
struct dev_info *tmp_dev;
|
2013-11-24 02:21:01 +00:00
|
|
|
struct bio *split;
|
|
|
|
sector_t start_sector, end_sector, data_offset;
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2016-08-05 21:35:16 +00:00
|
|
|
if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
|
2010-09-03 09:56:18 +00:00
|
|
|
md_flush_request(mddev, bio);
|
2011-09-12 10:12:01 +00:00
|
|
|
return;
|
2005-09-09 23:23:41 +00:00
|
|
|
}
|
|
|
|
|
2013-11-24 02:21:01 +00:00
|
|
|
do {
|
2016-11-18 02:22:04 +00:00
|
|
|
sector_t bio_sector = bio->bi_iter.bi_sector;
|
|
|
|
tmp_dev = which_dev(mddev, bio_sector);
|
2013-11-24 02:21:01 +00:00
|
|
|
start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors;
|
|
|
|
end_sector = tmp_dev->end_sector;
|
|
|
|
data_offset = tmp_dev->rdev->data_offset;
|
|
|
|
bio->bi_bdev = tmp_dev->rdev->bdev;
|
2008-10-13 00:55:12 +00:00
|
|
|
|
2016-11-18 02:22:04 +00:00
|
|
|
if (unlikely(bio_sector >= end_sector ||
|
|
|
|
bio_sector < start_sector))
|
2013-11-24 02:21:01 +00:00
|
|
|
goto out_of_bounds;
|
|
|
|
|
|
|
|
if (unlikely(bio_end_sector(bio) > end_sector)) {
|
|
|
|
/* This bio crosses a device boundary, so we have to
|
|
|
|
* split it.
|
|
|
|
*/
|
2016-11-18 02:22:04 +00:00
|
|
|
split = bio_split(bio, end_sector - bio_sector,
|
2013-11-24 02:21:01 +00:00
|
|
|
GFP_NOIO, fs_bio_set);
|
|
|
|
bio_chain(split, bio);
|
|
|
|
} else {
|
|
|
|
split = bio;
|
|
|
|
}
|
2012-10-11 02:08:44 +00:00
|
|
|
|
2013-11-24 02:21:01 +00:00
|
|
|
split->bi_iter.bi_sector = split->bi_iter.bi_sector -
|
|
|
|
start_sector + data_offset;
|
|
|
|
|
2016-06-05 19:32:07 +00:00
|
|
|
if (unlikely((bio_op(split) == REQ_OP_DISCARD) &&
|
2013-11-24 02:21:01 +00:00
|
|
|
!blk_queue_discard(bdev_get_queue(split->bi_bdev)))) {
|
|
|
|
/* Just ignore it */
|
2015-07-20 13:29:37 +00:00
|
|
|
bio_endio(split);
|
2016-11-18 02:22:04 +00:00
|
|
|
} else {
|
|
|
|
if (mddev->gendisk)
|
|
|
|
trace_block_bio_remap(bdev_get_queue(split->bi_bdev),
|
|
|
|
split, disk_devt(mddev->gendisk),
|
|
|
|
bio_sector);
|
2017-02-14 00:21:49 +00:00
|
|
|
mddev_check_writesame(mddev, split);
|
2013-11-24 02:21:01 +00:00
|
|
|
generic_make_request(split);
|
2016-11-18 02:22:04 +00:00
|
|
|
}
|
2013-11-24 02:21:01 +00:00
|
|
|
} while (split != bio);
|
|
|
|
return;
|
|
|
|
|
|
|
|
out_of_bounds:
|
2016-11-02 03:16:49 +00:00
|
|
|
pr_err("md/linear:%s: make_request: Sector %llu out of bounds on dev %s: %llu sectors, offset %llu\n",
|
2013-11-24 02:21:01 +00:00
|
|
|
mdname(mddev),
|
|
|
|
(unsigned long long)bio->bi_iter.bi_sector,
|
|
|
|
bdevname(tmp_dev->rdev->bdev, b),
|
|
|
|
(unsigned long long)tmp_dev->rdev->sectors,
|
|
|
|
(unsigned long long)start_sector);
|
|
|
|
bio_io_error(bio);
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
|
|
|
|
2011-10-11 05:47:53 +00:00
|
|
|
static void linear_status (struct seq_file *seq, struct mddev *mddev)
|
2005-04-16 22:20:36 +00:00
|
|
|
{
|
2009-06-17 22:45:01 +00:00
|
|
|
seq_printf(seq, " %dk rounding", mddev->chunk_sectors / 2);
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
|
|
|
|
2014-12-15 01:56:57 +00:00
|
|
|
static void linear_quiesce(struct mddev *mddev, int state)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2011-10-11 05:49:58 +00:00
|
|
|
static struct md_personality linear_personality =
|
2005-04-16 22:20:36 +00:00
|
|
|
{
|
|
|
|
.name = "linear",
|
2006-01-06 08:20:36 +00:00
|
|
|
.level = LEVEL_LINEAR,
|
2005-04-16 22:20:36 +00:00
|
|
|
.owner = THIS_MODULE,
|
|
|
|
.make_request = linear_make_request,
|
|
|
|
.run = linear_run,
|
2014-12-15 01:56:58 +00:00
|
|
|
.free = linear_free,
|
2005-04-16 22:20:36 +00:00
|
|
|
.status = linear_status,
|
2006-06-26 07:27:41 +00:00
|
|
|
.hot_add_disk = linear_add,
|
2009-03-18 01:10:40 +00:00
|
|
|
.size = linear_size,
|
2014-12-15 01:56:57 +00:00
|
|
|
.quiesce = linear_quiesce,
|
2014-12-15 01:56:56 +00:00
|
|
|
.congested = linear_congested,
|
2005-04-16 22:20:36 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
static int __init linear_init (void)
|
|
|
|
{
|
2006-01-06 08:20:36 +00:00
|
|
|
return register_md_personality (&linear_personality);
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void linear_exit (void)
|
|
|
|
{
|
2006-01-06 08:20:36 +00:00
|
|
|
unregister_md_personality (&linear_personality);
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
module_init(linear_init);
|
|
|
|
module_exit(linear_exit);
|
|
|
|
MODULE_LICENSE("GPL");
|
2009-12-14 01:49:58 +00:00
|
|
|
MODULE_DESCRIPTION("Linear device concatenation personality for MD");
|
2006-01-06 08:20:51 +00:00
|
|
|
MODULE_ALIAS("md-personality-1"); /* LINEAR - deprecated*/
|
|
|
|
MODULE_ALIAS("md-linear");
|
2006-01-06 08:20:36 +00:00
|
|
|
MODULE_ALIAS("md-level--1");
|