mirror of
https://github.com/torvalds/linux.git
synced 2024-12-28 22:02:28 +00:00
block: Introduce new bio_split()
The new bio_split() can split arbitrary bios - it's not restricted to single page bios, like the old bio_split() (previously renamed to bio_pair_split()). It also has different semantics - it doesn't allocate a struct bio_pair, leaving it up to the caller to handle completions. Then convert the existing bio_pair_split() users to the new bio_split() - and also nvme, which was open coding bio splitting. (We have to take that BUG_ON() out of bio_integrity_trim() because this bio_split() needs to use it, and there's no reason it has to be used on bios marked as cloned; BIO_CLONED doesn't seem to have clearly documented semantics anyways.) Signed-off-by: Kent Overstreet <kmo@daterainc.com> Cc: Jens Axboe <axboe@kernel.dk> Cc: Martin K. Petersen <martin.petersen@oracle.com> Cc: Matthew Wilcox <matthew.r.wilcox@intel.com> Cc: Keith Busch <keith.busch@intel.com> Cc: Vishal Verma <vishal.l.verma@intel.com> Cc: Jiri Kosina <jkosina@suse.cz> Cc: Neil Brown <neilb@suse.de>
This commit is contained in:
parent
ee67891bf1
commit
20d0189b10
@ -441,104 +441,19 @@ int nvme_setup_prps(struct nvme_dev *dev, struct nvme_common_command *cmd,
|
||||
return total_len;
|
||||
}
|
||||
|
||||
struct nvme_bio_pair {
|
||||
struct bio b1, b2, *parent;
|
||||
struct bio_vec *bv1, *bv2;
|
||||
int err;
|
||||
atomic_t cnt;
|
||||
};
|
||||
|
||||
static void nvme_bio_pair_endio(struct bio *bio, int err)
|
||||
{
|
||||
struct nvme_bio_pair *bp = bio->bi_private;
|
||||
|
||||
if (err)
|
||||
bp->err = err;
|
||||
|
||||
if (atomic_dec_and_test(&bp->cnt)) {
|
||||
bio_endio(bp->parent, bp->err);
|
||||
kfree(bp->bv1);
|
||||
kfree(bp->bv2);
|
||||
kfree(bp);
|
||||
}
|
||||
}
|
||||
|
||||
static struct nvme_bio_pair *nvme_bio_split(struct bio *bio, int idx,
|
||||
int len, int offset)
|
||||
{
|
||||
struct nvme_bio_pair *bp;
|
||||
|
||||
BUG_ON(len > bio->bi_iter.bi_size);
|
||||
BUG_ON(idx > bio->bi_vcnt);
|
||||
|
||||
bp = kmalloc(sizeof(*bp), GFP_ATOMIC);
|
||||
if (!bp)
|
||||
return NULL;
|
||||
bp->err = 0;
|
||||
|
||||
bp->b1 = *bio;
|
||||
bp->b2 = *bio;
|
||||
|
||||
bp->b1.bi_iter.bi_size = len;
|
||||
bp->b2.bi_iter.bi_size -= len;
|
||||
bp->b1.bi_vcnt = idx;
|
||||
bp->b2.bi_iter.bi_idx = idx;
|
||||
bp->b2.bi_iter.bi_sector += len >> 9;
|
||||
|
||||
if (offset) {
|
||||
bp->bv1 = kmalloc(bio->bi_max_vecs * sizeof(struct bio_vec),
|
||||
GFP_ATOMIC);
|
||||
if (!bp->bv1)
|
||||
goto split_fail_1;
|
||||
|
||||
bp->bv2 = kmalloc(bio->bi_max_vecs * sizeof(struct bio_vec),
|
||||
GFP_ATOMIC);
|
||||
if (!bp->bv2)
|
||||
goto split_fail_2;
|
||||
|
||||
memcpy(bp->bv1, bio->bi_io_vec,
|
||||
bio->bi_max_vecs * sizeof(struct bio_vec));
|
||||
memcpy(bp->bv2, bio->bi_io_vec,
|
||||
bio->bi_max_vecs * sizeof(struct bio_vec));
|
||||
|
||||
bp->b1.bi_io_vec = bp->bv1;
|
||||
bp->b2.bi_io_vec = bp->bv2;
|
||||
bp->b2.bi_io_vec[idx].bv_offset += offset;
|
||||
bp->b2.bi_io_vec[idx].bv_len -= offset;
|
||||
bp->b1.bi_io_vec[idx].bv_len = offset;
|
||||
bp->b1.bi_vcnt++;
|
||||
} else
|
||||
bp->bv1 = bp->bv2 = NULL;
|
||||
|
||||
bp->b1.bi_private = bp;
|
||||
bp->b2.bi_private = bp;
|
||||
|
||||
bp->b1.bi_end_io = nvme_bio_pair_endio;
|
||||
bp->b2.bi_end_io = nvme_bio_pair_endio;
|
||||
|
||||
bp->parent = bio;
|
||||
atomic_set(&bp->cnt, 2);
|
||||
|
||||
return bp;
|
||||
|
||||
split_fail_2:
|
||||
kfree(bp->bv1);
|
||||
split_fail_1:
|
||||
kfree(bp);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int nvme_split_and_submit(struct bio *bio, struct nvme_queue *nvmeq,
|
||||
int idx, int len, int offset)
|
||||
int len)
|
||||
{
|
||||
struct nvme_bio_pair *bp = nvme_bio_split(bio, idx, len, offset);
|
||||
if (!bp)
|
||||
struct bio *split = bio_split(bio, len >> 9, GFP_ATOMIC, NULL);
|
||||
if (!split)
|
||||
return -ENOMEM;
|
||||
|
||||
bio_chain(split, bio);
|
||||
|
||||
if (bio_list_empty(&nvmeq->sq_cong))
|
||||
add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait);
|
||||
bio_list_add(&nvmeq->sq_cong, &bp->b1);
|
||||
bio_list_add(&nvmeq->sq_cong, &bp->b2);
|
||||
bio_list_add(&nvmeq->sq_cong, split);
|
||||
bio_list_add(&nvmeq->sq_cong, bio);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -568,8 +483,7 @@ static int nvme_map_bio(struct nvme_queue *nvmeq, struct nvme_iod *iod,
|
||||
} else {
|
||||
if (!first && BIOVEC_NOT_VIRT_MERGEABLE(&bvprv, &bvec))
|
||||
return nvme_split_and_submit(bio, nvmeq,
|
||||
iter.bi_idx,
|
||||
length, 0);
|
||||
length);
|
||||
|
||||
sg = sg ? sg + 1 : iod->sg;
|
||||
sg_set_page(sg, bvec.bv_page,
|
||||
@ -578,9 +492,7 @@ static int nvme_map_bio(struct nvme_queue *nvmeq, struct nvme_iod *iod,
|
||||
}
|
||||
|
||||
if (split_len - length < bvec.bv_len)
|
||||
return nvme_split_and_submit(bio, nvmeq, iter.bi_idx,
|
||||
split_len,
|
||||
split_len - length);
|
||||
return nvme_split_and_submit(bio, nvmeq, split_len);
|
||||
length += bvec.bv_len;
|
||||
bvprv = bvec;
|
||||
first = 0;
|
||||
|
@ -2338,26 +2338,8 @@ static void pkt_end_io_read_cloned(struct bio *bio, int err)
|
||||
pkt_bio_finished(pd);
|
||||
}
|
||||
|
||||
static void pkt_make_request(struct request_queue *q, struct bio *bio)
|
||||
static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio)
|
||||
{
|
||||
struct pktcdvd_device *pd;
|
||||
char b[BDEVNAME_SIZE];
|
||||
sector_t zone;
|
||||
struct packet_data *pkt;
|
||||
int was_empty, blocked_bio;
|
||||
struct pkt_rb_node *node;
|
||||
|
||||
pd = q->queuedata;
|
||||
if (!pd) {
|
||||
pr_err("%s incorrect request queue\n",
|
||||
bdevname(bio->bi_bdev, b));
|
||||
goto end_io;
|
||||
}
|
||||
|
||||
/*
|
||||
* Clone READ bios so we can have our own bi_end_io callback.
|
||||
*/
|
||||
if (bio_data_dir(bio) == READ) {
|
||||
struct bio *cloned_bio = bio_clone(bio, GFP_NOIO);
|
||||
struct packet_stacked_data *psd = mempool_alloc(psd_pool, GFP_NOIO);
|
||||
|
||||
@ -2368,45 +2350,17 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio)
|
||||
cloned_bio->bi_end_io = pkt_end_io_read_cloned;
|
||||
pd->stats.secs_r += bio_sectors(bio);
|
||||
pkt_queue_bio(pd, cloned_bio);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (!test_bit(PACKET_WRITABLE, &pd->flags)) {
|
||||
pkt_notice(pd, "WRITE for ro device (%llu)\n",
|
||||
(unsigned long long)bio->bi_iter.bi_sector);
|
||||
goto end_io;
|
||||
}
|
||||
|
||||
if (!bio->bi_iter.bi_size || (bio->bi_iter.bi_size % CD_FRAMESIZE)) {
|
||||
pkt_err(pd, "wrong bio size\n");
|
||||
goto end_io;
|
||||
}
|
||||
|
||||
blk_queue_bounce(q, &bio);
|
||||
static void pkt_make_request_write(struct request_queue *q, struct bio *bio)
|
||||
{
|
||||
struct pktcdvd_device *pd = q->queuedata;
|
||||
sector_t zone;
|
||||
struct packet_data *pkt;
|
||||
int was_empty, blocked_bio;
|
||||
struct pkt_rb_node *node;
|
||||
|
||||
zone = get_zone(bio->bi_iter.bi_sector, pd);
|
||||
pkt_dbg(2, pd, "start = %6llx stop = %6llx\n",
|
||||
(unsigned long long)bio->bi_iter.bi_sector,
|
||||
(unsigned long long)bio_end_sector(bio));
|
||||
|
||||
/* Check if we have to split the bio */
|
||||
{
|
||||
struct bio_pair *bp;
|
||||
sector_t last_zone;
|
||||
int first_sectors;
|
||||
|
||||
last_zone = get_zone(bio_end_sector(bio) - 1, pd);
|
||||
if (last_zone != zone) {
|
||||
BUG_ON(last_zone != zone + pd->settings.size);
|
||||
first_sectors = last_zone - bio->bi_iter.bi_sector;
|
||||
bp = bio_pair_split(bio, first_sectors);
|
||||
BUG_ON(!bp);
|
||||
pkt_make_request(q, &bp->bio1);
|
||||
pkt_make_request(q, &bp->bio2);
|
||||
bio_pair_release(bp);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If we find a matching packet in state WAITING or READ_WAIT, we can
|
||||
@ -2480,6 +2434,64 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio)
|
||||
*/
|
||||
wake_up(&pd->wqueue);
|
||||
}
|
||||
}
|
||||
|
||||
static void pkt_make_request(struct request_queue *q, struct bio *bio)
|
||||
{
|
||||
struct pktcdvd_device *pd;
|
||||
char b[BDEVNAME_SIZE];
|
||||
struct bio *split;
|
||||
|
||||
pd = q->queuedata;
|
||||
if (!pd) {
|
||||
pr_err("%s incorrect request queue\n",
|
||||
bdevname(bio->bi_bdev, b));
|
||||
goto end_io;
|
||||
}
|
||||
|
||||
pkt_dbg(2, pd, "start = %6llx stop = %6llx\n",
|
||||
(unsigned long long)bio->bi_iter.bi_sector,
|
||||
(unsigned long long)bio_end_sector(bio));
|
||||
|
||||
/*
|
||||
* Clone READ bios so we can have our own bi_end_io callback.
|
||||
*/
|
||||
if (bio_data_dir(bio) == READ) {
|
||||
pkt_make_request_read(pd, bio);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!test_bit(PACKET_WRITABLE, &pd->flags)) {
|
||||
pkt_notice(pd, "WRITE for ro device (%llu)\n",
|
||||
(unsigned long long)bio->bi_iter.bi_sector);
|
||||
goto end_io;
|
||||
}
|
||||
|
||||
if (!bio->bi_iter.bi_size || (bio->bi_iter.bi_size % CD_FRAMESIZE)) {
|
||||
pkt_err(pd, "wrong bio size\n");
|
||||
goto end_io;
|
||||
}
|
||||
|
||||
blk_queue_bounce(q, &bio);
|
||||
|
||||
do {
|
||||
sector_t zone = get_zone(bio->bi_iter.bi_sector, pd);
|
||||
sector_t last_zone = get_zone(bio_end_sector(bio) - 1, pd);
|
||||
|
||||
if (last_zone != zone) {
|
||||
BUG_ON(last_zone != zone + pd->settings.size);
|
||||
|
||||
split = bio_split(bio, last_zone -
|
||||
bio->bi_iter.bi_sector,
|
||||
GFP_NOIO, fs_bio_set);
|
||||
bio_chain(split, bio);
|
||||
} else {
|
||||
split = bio;
|
||||
}
|
||||
|
||||
pkt_make_request_write(q, split);
|
||||
} while (split != bio);
|
||||
|
||||
return;
|
||||
end_io:
|
||||
bio_io_error(bio);
|
||||
|
@ -901,7 +901,6 @@ void bch_bbio_endio(struct cache_set *, struct bio *, int, const char *);
|
||||
void bch_bbio_free(struct bio *, struct cache_set *);
|
||||
struct bio *bch_bbio_alloc(struct cache_set *);
|
||||
|
||||
struct bio *bch_bio_split(struct bio *, int, gfp_t, struct bio_set *);
|
||||
void bch_generic_make_request(struct bio *, struct bio_split_pool *);
|
||||
void __bch_submit_bbio(struct bio *, struct cache_set *);
|
||||
void bch_submit_bbio(struct bio *, struct cache_set *, struct bkey *, unsigned);
|
||||
|
@ -11,84 +11,6 @@
|
||||
|
||||
#include <linux/blkdev.h>
|
||||
|
||||
/**
|
||||
* bch_bio_split - split a bio
|
||||
* @bio: bio to split
|
||||
* @sectors: number of sectors to split from the front of @bio
|
||||
* @gfp: gfp mask
|
||||
* @bs: bio set to allocate from
|
||||
*
|
||||
* Allocates and returns a new bio which represents @sectors from the start of
|
||||
* @bio, and updates @bio to represent the remaining sectors.
|
||||
*
|
||||
* If bio_sectors(@bio) was less than or equal to @sectors, returns @bio
|
||||
* unchanged.
|
||||
*
|
||||
* The newly allocated bio will point to @bio's bi_io_vec, if the split was on a
|
||||
* bvec boundry; it is the caller's responsibility to ensure that @bio is not
|
||||
* freed before the split.
|
||||
*/
|
||||
struct bio *bch_bio_split(struct bio *bio, int sectors,
|
||||
gfp_t gfp, struct bio_set *bs)
|
||||
{
|
||||
unsigned vcnt = 0, nbytes = sectors << 9;
|
||||
struct bio_vec bv;
|
||||
struct bvec_iter iter;
|
||||
struct bio *ret = NULL;
|
||||
|
||||
BUG_ON(sectors <= 0);
|
||||
|
||||
if (sectors >= bio_sectors(bio))
|
||||
return bio;
|
||||
|
||||
if (bio->bi_rw & REQ_DISCARD) {
|
||||
ret = bio_alloc_bioset(gfp, 1, bs);
|
||||
if (!ret)
|
||||
return NULL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
bio_for_each_segment(bv, bio, iter) {
|
||||
vcnt++;
|
||||
|
||||
if (nbytes <= bv.bv_len)
|
||||
break;
|
||||
|
||||
nbytes -= bv.bv_len;
|
||||
}
|
||||
|
||||
ret = bio_alloc_bioset(gfp, vcnt, bs);
|
||||
if (!ret)
|
||||
return NULL;
|
||||
|
||||
bio_for_each_segment(bv, bio, iter) {
|
||||
ret->bi_io_vec[ret->bi_vcnt++] = bv;
|
||||
|
||||
if (ret->bi_vcnt == vcnt)
|
||||
break;
|
||||
}
|
||||
|
||||
ret->bi_io_vec[ret->bi_vcnt - 1].bv_len = nbytes;
|
||||
out:
|
||||
ret->bi_bdev = bio->bi_bdev;
|
||||
ret->bi_iter.bi_sector = bio->bi_iter.bi_sector;
|
||||
ret->bi_iter.bi_size = sectors << 9;
|
||||
ret->bi_rw = bio->bi_rw;
|
||||
|
||||
if (bio_integrity(bio)) {
|
||||
if (bio_integrity_clone(ret, bio, gfp)) {
|
||||
bio_put(ret);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bio_integrity_trim(ret, 0, bio_sectors(ret));
|
||||
}
|
||||
|
||||
bio_advance(bio, ret->bi_iter.bi_size);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static unsigned bch_bio_max_sectors(struct bio *bio)
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
|
||||
@ -172,7 +94,7 @@ void bch_generic_make_request(struct bio *bio, struct bio_split_pool *p)
|
||||
bio_get(bio);
|
||||
|
||||
do {
|
||||
n = bch_bio_split(bio, bch_bio_max_sectors(bio),
|
||||
n = bio_next_split(bio, bch_bio_max_sectors(bio),
|
||||
GFP_NOIO, s->p->bio_split);
|
||||
|
||||
n->bi_end_io = bch_bio_submit_split_endio;
|
||||
|
@ -371,7 +371,7 @@ static void bch_data_insert_start(struct closure *cl)
|
||||
op->writeback))
|
||||
goto err;
|
||||
|
||||
n = bch_bio_split(bio, KEY_SIZE(k), GFP_NOIO, split);
|
||||
n = bio_next_split(bio, KEY_SIZE(k), GFP_NOIO, split);
|
||||
|
||||
n->bi_end_io = bch_data_insert_endio;
|
||||
n->bi_private = cl;
|
||||
@ -679,7 +679,7 @@ static int cache_lookup_fn(struct btree_op *op, struct btree *b, struct bkey *k)
|
||||
if (KEY_DIRTY(k))
|
||||
s->read_dirty_data = true;
|
||||
|
||||
n = bch_bio_split(bio, min_t(uint64_t, INT_MAX,
|
||||
n = bio_next_split(bio, min_t(uint64_t, INT_MAX,
|
||||
KEY_OFFSET(k) - bio->bi_iter.bi_sector),
|
||||
GFP_NOIO, s->d->bio_split);
|
||||
|
||||
@ -920,7 +920,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
|
||||
struct bio *miss, *cache_bio;
|
||||
|
||||
if (s->cache_miss || s->iop.bypass) {
|
||||
miss = bch_bio_split(bio, sectors, GFP_NOIO, s->d->bio_split);
|
||||
miss = bio_next_split(bio, sectors, GFP_NOIO, s->d->bio_split);
|
||||
ret = miss == bio ? MAP_DONE : MAP_CONTINUE;
|
||||
goto out_submit;
|
||||
}
|
||||
@ -943,7 +943,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
|
||||
|
||||
s->iop.replace = true;
|
||||
|
||||
miss = bch_bio_split(bio, sectors, GFP_NOIO, s->d->bio_split);
|
||||
miss = bio_next_split(bio, sectors, GFP_NOIO, s->d->bio_split);
|
||||
|
||||
/* btree_search_recurse()'s btree iterator is no good anymore */
|
||||
ret = miss == bio ? MAP_DONE : -EINTR;
|
||||
|
@ -288,23 +288,56 @@ static int linear_stop (struct mddev *mddev)
|
||||
|
||||
static void linear_make_request(struct mddev *mddev, struct bio *bio)
|
||||
{
|
||||
char b[BDEVNAME_SIZE];
|
||||
struct dev_info *tmp_dev;
|
||||
sector_t start_sector;
|
||||
struct bio *split;
|
||||
sector_t start_sector, end_sector, data_offset;
|
||||
|
||||
if (unlikely(bio->bi_rw & REQ_FLUSH)) {
|
||||
md_flush_request(mddev, bio);
|
||||
return;
|
||||
}
|
||||
|
||||
do {
|
||||
rcu_read_lock();
|
||||
|
||||
tmp_dev = which_dev(mddev, bio->bi_iter.bi_sector);
|
||||
start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors;
|
||||
end_sector = tmp_dev->end_sector;
|
||||
data_offset = tmp_dev->rdev->data_offset;
|
||||
bio->bi_bdev = tmp_dev->rdev->bdev;
|
||||
|
||||
rcu_read_unlock();
|
||||
|
||||
if (unlikely(bio->bi_iter.bi_sector >= (tmp_dev->end_sector)
|
||||
|| (bio->bi_iter.bi_sector < start_sector))) {
|
||||
char b[BDEVNAME_SIZE];
|
||||
if (unlikely(bio->bi_iter.bi_sector >= end_sector ||
|
||||
bio->bi_iter.bi_sector < start_sector))
|
||||
goto out_of_bounds;
|
||||
|
||||
if (unlikely(bio_end_sector(bio) > end_sector)) {
|
||||
/* This bio crosses a device boundary, so we have to
|
||||
* split it.
|
||||
*/
|
||||
split = bio_split(bio, end_sector -
|
||||
bio->bi_iter.bi_sector,
|
||||
GFP_NOIO, fs_bio_set);
|
||||
bio_chain(split, bio);
|
||||
} else {
|
||||
split = bio;
|
||||
}
|
||||
|
||||
split->bi_iter.bi_sector = split->bi_iter.bi_sector -
|
||||
start_sector + data_offset;
|
||||
|
||||
if (unlikely((split->bi_rw & REQ_DISCARD) &&
|
||||
!blk_queue_discard(bdev_get_queue(split->bi_bdev)))) {
|
||||
/* Just ignore it */
|
||||
bio_endio(split, 0);
|
||||
} else
|
||||
generic_make_request(split);
|
||||
} while (split != bio);
|
||||
return;
|
||||
|
||||
out_of_bounds:
|
||||
printk(KERN_ERR
|
||||
"md/linear:%s: make_request: Sector %llu out of bounds on "
|
||||
"dev %s: %llu sectors, offset %llu\n",
|
||||
@ -313,40 +346,7 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio)
|
||||
bdevname(tmp_dev->rdev->bdev, b),
|
||||
(unsigned long long)tmp_dev->rdev->sectors,
|
||||
(unsigned long long)start_sector);
|
||||
rcu_read_unlock();
|
||||
bio_io_error(bio);
|
||||
return;
|
||||
}
|
||||
if (unlikely(bio_end_sector(bio) > tmp_dev->end_sector)) {
|
||||
/* This bio crosses a device boundary, so we have to
|
||||
* split it.
|
||||
*/
|
||||
struct bio_pair *bp;
|
||||
sector_t end_sector = tmp_dev->end_sector;
|
||||
|
||||
rcu_read_unlock();
|
||||
|
||||
bp = bio_pair_split(bio, end_sector - bio->bi_iter.bi_sector);
|
||||
|
||||
linear_make_request(mddev, &bp->bio1);
|
||||
linear_make_request(mddev, &bp->bio2);
|
||||
bio_pair_release(bp);
|
||||
return;
|
||||
}
|
||||
|
||||
bio->bi_bdev = tmp_dev->rdev->bdev;
|
||||
bio->bi_iter.bi_sector = bio->bi_iter.bi_sector - start_sector
|
||||
+ tmp_dev->rdev->data_offset;
|
||||
rcu_read_unlock();
|
||||
|
||||
if (unlikely((bio->bi_rw & REQ_DISCARD) &&
|
||||
!blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) {
|
||||
/* Just ignore it */
|
||||
bio_endio(bio, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
generic_make_request(bio);
|
||||
}
|
||||
|
||||
static void linear_status (struct seq_file *seq, struct mddev *mddev)
|
||||
|
@ -513,65 +513,44 @@ static inline int is_io_in_chunk_boundary(struct mddev *mddev,
|
||||
|
||||
static void raid0_make_request(struct mddev *mddev, struct bio *bio)
|
||||
{
|
||||
unsigned int chunk_sects;
|
||||
sector_t sector_offset;
|
||||
struct strip_zone *zone;
|
||||
struct md_rdev *tmp_dev;
|
||||
struct bio *split;
|
||||
|
||||
if (unlikely(bio->bi_rw & REQ_FLUSH)) {
|
||||
md_flush_request(mddev, bio);
|
||||
return;
|
||||
}
|
||||
|
||||
chunk_sects = mddev->chunk_sectors;
|
||||
if (unlikely(!is_io_in_chunk_boundary(mddev, chunk_sects, bio))) {
|
||||
do {
|
||||
sector_t sector = bio->bi_iter.bi_sector;
|
||||
struct bio_pair *bp;
|
||||
/* Sanity check -- queue functions should prevent this happening */
|
||||
if (bio_multiple_segments(bio))
|
||||
goto bad_map;
|
||||
/* This is a one page bio that upper layers
|
||||
* refuse to split for us, so we need to split it.
|
||||
*/
|
||||
if (likely(is_power_of_2(chunk_sects)))
|
||||
bp = bio_pair_split(bio, chunk_sects - (sector &
|
||||
(chunk_sects-1)));
|
||||
else
|
||||
bp = bio_pair_split(bio, chunk_sects -
|
||||
sector_div(sector, chunk_sects));
|
||||
raid0_make_request(mddev, &bp->bio1);
|
||||
raid0_make_request(mddev, &bp->bio2);
|
||||
bio_pair_release(bp);
|
||||
return;
|
||||
unsigned chunk_sects = mddev->chunk_sectors;
|
||||
|
||||
unsigned sectors = chunk_sects -
|
||||
(likely(is_power_of_2(chunk_sects))
|
||||
? (sector & (chunk_sects-1))
|
||||
: sector_div(sector, chunk_sects));
|
||||
|
||||
if (sectors < bio_sectors(bio)) {
|
||||
split = bio_split(bio, sectors, GFP_NOIO, fs_bio_set);
|
||||
bio_chain(split, bio);
|
||||
} else {
|
||||
split = bio;
|
||||
}
|
||||
|
||||
sector_offset = bio->bi_iter.bi_sector;
|
||||
zone = find_zone(mddev->private, §or_offset);
|
||||
tmp_dev = map_sector(mddev, zone, bio->bi_iter.bi_sector,
|
||||
§or_offset);
|
||||
bio->bi_bdev = tmp_dev->bdev;
|
||||
bio->bi_iter.bi_sector = sector_offset + zone->dev_start +
|
||||
zone = find_zone(mddev->private, §or);
|
||||
tmp_dev = map_sector(mddev, zone, sector, §or);
|
||||
split->bi_bdev = tmp_dev->bdev;
|
||||
split->bi_iter.bi_sector = sector + zone->dev_start +
|
||||
tmp_dev->data_offset;
|
||||
|
||||
if (unlikely((bio->bi_rw & REQ_DISCARD) &&
|
||||
!blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) {
|
||||
if (unlikely((split->bi_rw & REQ_DISCARD) &&
|
||||
!blk_queue_discard(bdev_get_queue(split->bi_bdev)))) {
|
||||
/* Just ignore it */
|
||||
bio_endio(bio, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
generic_make_request(bio);
|
||||
return;
|
||||
|
||||
bad_map:
|
||||
printk("md/raid0:%s: make_request bug: can't convert block across chunks"
|
||||
" or bigger than %dk %llu %d\n",
|
||||
mdname(mddev), chunk_sects / 2,
|
||||
(unsigned long long)bio->bi_iter.bi_sector,
|
||||
bio_sectors(bio) / 2);
|
||||
|
||||
bio_io_error(bio);
|
||||
return;
|
||||
bio_endio(split, 0);
|
||||
} else
|
||||
generic_make_request(split);
|
||||
} while (split != bio);
|
||||
}
|
||||
|
||||
static void raid0_status(struct seq_file *seq, struct mddev *mddev)
|
||||
|
@ -1152,14 +1152,12 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
|
||||
kfree(plug);
|
||||
}
|
||||
|
||||
static void make_request(struct mddev *mddev, struct bio * bio)
|
||||
static void __make_request(struct mddev *mddev, struct bio *bio)
|
||||
{
|
||||
struct r10conf *conf = mddev->private;
|
||||
struct r10bio *r10_bio;
|
||||
struct bio *read_bio;
|
||||
int i;
|
||||
sector_t chunk_mask = (conf->geo.chunk_mask & conf->prev.chunk_mask);
|
||||
int chunk_sects = chunk_mask + 1;
|
||||
const int rw = bio_data_dir(bio);
|
||||
const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
|
||||
const unsigned long do_fua = (bio->bi_rw & REQ_FUA);
|
||||
@ -1174,69 +1172,6 @@ static void make_request(struct mddev *mddev, struct bio * bio)
|
||||
int max_sectors;
|
||||
int sectors;
|
||||
|
||||
if (unlikely(bio->bi_rw & REQ_FLUSH)) {
|
||||
md_flush_request(mddev, bio);
|
||||
return;
|
||||
}
|
||||
|
||||
/* If this request crosses a chunk boundary, we need to
|
||||
* split it. This will only happen for 1 PAGE (or less) requests.
|
||||
*/
|
||||
if (unlikely((bio->bi_iter.bi_sector & chunk_mask) + bio_sectors(bio)
|
||||
> chunk_sects
|
||||
&& (conf->geo.near_copies < conf->geo.raid_disks
|
||||
|| conf->prev.near_copies < conf->prev.raid_disks))) {
|
||||
struct bio_pair *bp;
|
||||
/* Sanity check -- queue functions should prevent this happening */
|
||||
if (bio_multiple_segments(bio))
|
||||
goto bad_map;
|
||||
/* This is a one page bio that upper layers
|
||||
* refuse to split for us, so we need to split it.
|
||||
*/
|
||||
bp = bio_pair_split(bio, chunk_sects -
|
||||
(bio->bi_iter.bi_sector & (chunk_sects - 1)));
|
||||
|
||||
/* Each of these 'make_request' calls will call 'wait_barrier'.
|
||||
* If the first succeeds but the second blocks due to the resync
|
||||
* thread raising the barrier, we will deadlock because the
|
||||
* IO to the underlying device will be queued in generic_make_request
|
||||
* and will never complete, so will never reduce nr_pending.
|
||||
* So increment nr_waiting here so no new raise_barriers will
|
||||
* succeed, and so the second wait_barrier cannot block.
|
||||
*/
|
||||
spin_lock_irq(&conf->resync_lock);
|
||||
conf->nr_waiting++;
|
||||
spin_unlock_irq(&conf->resync_lock);
|
||||
|
||||
make_request(mddev, &bp->bio1);
|
||||
make_request(mddev, &bp->bio2);
|
||||
|
||||
spin_lock_irq(&conf->resync_lock);
|
||||
conf->nr_waiting--;
|
||||
wake_up(&conf->wait_barrier);
|
||||
spin_unlock_irq(&conf->resync_lock);
|
||||
|
||||
bio_pair_release(bp);
|
||||
return;
|
||||
bad_map:
|
||||
printk("md/raid10:%s: make_request bug: can't convert block across chunks"
|
||||
" or bigger than %dk %llu %d\n", mdname(mddev), chunk_sects/2,
|
||||
(unsigned long long)bio->bi_iter.bi_sector,
|
||||
bio_sectors(bio) / 2);
|
||||
|
||||
bio_io_error(bio);
|
||||
return;
|
||||
}
|
||||
|
||||
md_write_start(mddev, bio);
|
||||
|
||||
/*
|
||||
* Register the new request and wait if the reconstruction
|
||||
* thread has put up a bar for new requests.
|
||||
* Continue immediately if no resync is active currently.
|
||||
*/
|
||||
wait_barrier(conf);
|
||||
|
||||
sectors = bio_sectors(bio);
|
||||
while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
|
||||
bio->bi_iter.bi_sector < conf->reshape_progress &&
|
||||
@ -1600,6 +1535,52 @@ retry_write:
|
||||
goto retry_write;
|
||||
}
|
||||
one_write_done(r10_bio);
|
||||
}
|
||||
|
||||
static void make_request(struct mddev *mddev, struct bio *bio)
|
||||
{
|
||||
struct r10conf *conf = mddev->private;
|
||||
sector_t chunk_mask = (conf->geo.chunk_mask & conf->prev.chunk_mask);
|
||||
int chunk_sects = chunk_mask + 1;
|
||||
|
||||
struct bio *split;
|
||||
|
||||
if (unlikely(bio->bi_rw & REQ_FLUSH)) {
|
||||
md_flush_request(mddev, bio);
|
||||
return;
|
||||
}
|
||||
|
||||
md_write_start(mddev, bio);
|
||||
|
||||
/*
|
||||
* Register the new request and wait if the reconstruction
|
||||
* thread has put up a bar for new requests.
|
||||
* Continue immediately if no resync is active currently.
|
||||
*/
|
||||
wait_barrier(conf);
|
||||
|
||||
do {
|
||||
|
||||
/*
|
||||
* If this request crosses a chunk boundary, we need to split
|
||||
* it.
|
||||
*/
|
||||
if (unlikely((bio->bi_iter.bi_sector & chunk_mask) +
|
||||
bio_sectors(bio) > chunk_sects
|
||||
&& (conf->geo.near_copies < conf->geo.raid_disks
|
||||
|| conf->prev.near_copies <
|
||||
conf->prev.raid_disks))) {
|
||||
split = bio_split(bio, chunk_sects -
|
||||
(bio->bi_iter.bi_sector &
|
||||
(chunk_sects - 1)),
|
||||
GFP_NOIO, fs_bio_set);
|
||||
bio_chain(split, bio);
|
||||
} else {
|
||||
split = bio;
|
||||
}
|
||||
|
||||
__make_request(mddev, split);
|
||||
} while (split != bio);
|
||||
|
||||
/* In case raid10d snuck in to freeze_array */
|
||||
wake_up(&conf->wait_barrier);
|
||||
|
36
fs/bio.c
36
fs/bio.c
@ -1793,6 +1793,42 @@ void bio_endio_nodec(struct bio *bio, int error)
|
||||
}
|
||||
EXPORT_SYMBOL(bio_endio_nodec);
|
||||
|
||||
/**
|
||||
* bio_split - split a bio
|
||||
* @bio: bio to split
|
||||
* @sectors: number of sectors to split from the front of @bio
|
||||
* @gfp: gfp mask
|
||||
* @bs: bio set to allocate from
|
||||
*
|
||||
* Allocates and returns a new bio which represents @sectors from the start of
|
||||
* @bio, and updates @bio to represent the remaining sectors.
|
||||
*
|
||||
* The newly allocated bio will point to @bio's bi_io_vec; it is the caller's
|
||||
* responsibility to ensure that @bio is not freed before the split.
|
||||
*/
|
||||
struct bio *bio_split(struct bio *bio, int sectors,
|
||||
gfp_t gfp, struct bio_set *bs)
|
||||
{
|
||||
struct bio *split = NULL;
|
||||
|
||||
BUG_ON(sectors <= 0);
|
||||
BUG_ON(sectors >= bio_sectors(bio));
|
||||
|
||||
split = bio_clone_fast(bio, gfp, bs);
|
||||
if (!split)
|
||||
return NULL;
|
||||
|
||||
split->bi_iter.bi_size = sectors << 9;
|
||||
|
||||
if (bio_integrity(split))
|
||||
bio_integrity_trim(split, 0, sectors);
|
||||
|
||||
bio_advance(bio, split->bi_iter.bi_size);
|
||||
|
||||
return split;
|
||||
}
|
||||
EXPORT_SYMBOL(bio_split);
|
||||
|
||||
void bio_pair_release(struct bio_pair *bp)
|
||||
{
|
||||
if (atomic_dec_and_test(&bp->cnt)) {
|
||||
|
@ -321,6 +321,28 @@ extern struct bio_pair *bio_pair_split(struct bio *bi, int first_sectors);
|
||||
extern void bio_pair_release(struct bio_pair *dbio);
|
||||
extern void bio_trim(struct bio *bio, int offset, int size);
|
||||
|
||||
extern struct bio *bio_split(struct bio *bio, int sectors,
|
||||
gfp_t gfp, struct bio_set *bs);
|
||||
|
||||
/**
|
||||
* bio_next_split - get next @sectors from a bio, splitting if necessary
|
||||
* @bio: bio to split
|
||||
* @sectors: number of sectors to split from the front of @bio
|
||||
* @gfp: gfp mask
|
||||
* @bs: bio set to allocate from
|
||||
*
|
||||
* Returns a bio representing the next @sectors of @bio - if the bio is smaller
|
||||
* than @sectors, returns the original bio unchanged.
|
||||
*/
|
||||
static inline struct bio *bio_next_split(struct bio *bio, int sectors,
|
||||
gfp_t gfp, struct bio_set *bs)
|
||||
{
|
||||
if (sectors >= bio_sectors(bio))
|
||||
return bio;
|
||||
|
||||
return bio_split(bio, sectors, gfp, bs);
|
||||
}
|
||||
|
||||
extern struct bio_set *bioset_create(unsigned int, unsigned int);
|
||||
extern void bioset_free(struct bio_set *);
|
||||
extern mempool_t *biovec_create_pool(struct bio_set *bs, int pool_entries);
|
||||
|
Loading…
Reference in New Issue
Block a user