exofs: convert io_state to use pages array instead of bio at input

* inode.c operations are full-pages based, and not actually
  true scatter-gather
* Lets us use more pages at once upto 512 (from 249) in 64 bit
* Brings us much much closer to be able to use exofs's io_state engine
  from objlayout driver. (Once I decide where to put the common code)

After RAID0 patch the outer (input) bio was never used as a bio, but
was simply a page carrier into the raid engine. Even in the simple
mirror/single-dev arrangement pages info was copied into a second bio.
It is now easer to just pass a pages array into the io_state and prepare
bio(s) once.

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
This commit is contained in:
Boaz Harrosh 2010-01-28 18:24:06 +02:00
parent 5d952b8391
commit 86093aaff5
3 changed files with 71 additions and 61 deletions

View File

@ -128,7 +128,10 @@ struct exofs_io_state {
loff_t offset; loff_t offset;
unsigned long length; unsigned long length;
void *kern_buff; void *kern_buff;
struct bio *bio;
struct page **pages;
unsigned nr_pages;
unsigned pgbase;
/* Attributes */ /* Attributes */
unsigned in_attr_len; unsigned in_attr_len;

View File

@ -41,16 +41,18 @@
enum { BIO_MAX_PAGES_KMALLOC = enum { BIO_MAX_PAGES_KMALLOC =
(PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec), (PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec),
MAX_PAGES_KMALLOC =
PAGE_SIZE / sizeof(struct page *),
}; };
struct page_collect { struct page_collect {
struct exofs_sb_info *sbi; struct exofs_sb_info *sbi;
struct request_queue *req_q;
struct inode *inode; struct inode *inode;
unsigned expected_pages; unsigned expected_pages;
struct exofs_io_state *ios; struct exofs_io_state *ios;
struct bio *bio; struct page **pages;
unsigned alloc_pages;
unsigned nr_pages; unsigned nr_pages;
unsigned long length; unsigned long length;
loff_t pg_first; /* keep 64bit also in 32-arches */ loff_t pg_first; /* keep 64bit also in 32-arches */
@ -62,15 +64,12 @@ static void _pcol_init(struct page_collect *pcol, unsigned expected_pages,
struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
pcol->sbi = sbi; pcol->sbi = sbi;
/* Create master bios on first Q, later on cloning, each clone will be
* allocated on it's destination Q
*/
pcol->req_q = osd_request_queue(sbi->layout.s_ods[0]);
pcol->inode = inode; pcol->inode = inode;
pcol->expected_pages = expected_pages; pcol->expected_pages = expected_pages;
pcol->ios = NULL; pcol->ios = NULL;
pcol->bio = NULL; pcol->pages = NULL;
pcol->alloc_pages = 0;
pcol->nr_pages = 0; pcol->nr_pages = 0;
pcol->length = 0; pcol->length = 0;
pcol->pg_first = -1; pcol->pg_first = -1;
@ -80,7 +79,8 @@ static void _pcol_reset(struct page_collect *pcol)
{ {
pcol->expected_pages -= min(pcol->nr_pages, pcol->expected_pages); pcol->expected_pages -= min(pcol->nr_pages, pcol->expected_pages);
pcol->bio = NULL; pcol->pages = NULL;
pcol->alloc_pages = 0;
pcol->nr_pages = 0; pcol->nr_pages = 0;
pcol->length = 0; pcol->length = 0;
pcol->pg_first = -1; pcol->pg_first = -1;
@ -90,13 +90,13 @@ static void _pcol_reset(struct page_collect *pcol)
* it might not end here. don't be left with nothing * it might not end here. don't be left with nothing
*/ */
if (!pcol->expected_pages) if (!pcol->expected_pages)
pcol->expected_pages = BIO_MAX_PAGES_KMALLOC; pcol->expected_pages = MAX_PAGES_KMALLOC;
} }
static int pcol_try_alloc(struct page_collect *pcol) static int pcol_try_alloc(struct page_collect *pcol)
{ {
int pages = min_t(unsigned, pcol->expected_pages, unsigned pages = min_t(unsigned, pcol->expected_pages,
BIO_MAX_PAGES_KMALLOC); MAX_PAGES_KMALLOC);
if (!pcol->ios) { /* First time allocate io_state */ if (!pcol->ios) { /* First time allocate io_state */
int ret = exofs_get_io_state(&pcol->sbi->layout, &pcol->ios); int ret = exofs_get_io_state(&pcol->sbi->layout, &pcol->ios);
@ -105,23 +105,28 @@ static int pcol_try_alloc(struct page_collect *pcol)
return ret; return ret;
} }
/* TODO: easily support bio chaining */
pages = min_t(unsigned, pages,
pcol->sbi->layout.group_width * BIO_MAX_PAGES_KMALLOC);
for (; pages; pages >>= 1) { for (; pages; pages >>= 1) {
pcol->bio = bio_kmalloc(GFP_KERNEL, pages); pcol->pages = kmalloc(pages * sizeof(struct page *),
if (likely(pcol->bio)) GFP_KERNEL);
if (likely(pcol->pages)) {
pcol->alloc_pages = pages;
return 0; return 0;
}
} }
EXOFS_ERR("Failed to bio_kmalloc expected_pages=%u\n", EXOFS_ERR("Failed to kmalloc expected_pages=%u\n",
pcol->expected_pages); pcol->expected_pages);
return -ENOMEM; return -ENOMEM;
} }
static void pcol_free(struct page_collect *pcol) static void pcol_free(struct page_collect *pcol)
{ {
if (pcol->bio) { kfree(pcol->pages);
bio_put(pcol->bio); pcol->pages = NULL;
pcol->bio = NULL;
}
if (pcol->ios) { if (pcol->ios) {
exofs_put_io_state(pcol->ios); exofs_put_io_state(pcol->ios);
@ -132,11 +137,10 @@ static void pcol_free(struct page_collect *pcol)
static int pcol_add_page(struct page_collect *pcol, struct page *page, static int pcol_add_page(struct page_collect *pcol, struct page *page,
unsigned len) unsigned len)
{ {
int added_len = bio_add_pc_page(pcol->req_q, pcol->bio, page, len, 0); if (unlikely(pcol->nr_pages >= pcol->alloc_pages))
if (unlikely(len != added_len))
return -ENOMEM; return -ENOMEM;
++pcol->nr_pages; pcol->pages[pcol->nr_pages++] = page;
pcol->length += len; pcol->length += len;
return 0; return 0;
} }
@ -181,7 +185,6 @@ static void update_write_page(struct page *page, int ret)
*/ */
static int __readpages_done(struct page_collect *pcol, bool do_unlock) static int __readpages_done(struct page_collect *pcol, bool do_unlock)
{ {
struct bio_vec *bvec;
int i; int i;
u64 resid; u64 resid;
u64 good_bytes; u64 good_bytes;
@ -198,8 +201,8 @@ static int __readpages_done(struct page_collect *pcol, bool do_unlock)
pcol->inode->i_ino, _LLU(good_bytes), pcol->length, pcol->inode->i_ino, _LLU(good_bytes), pcol->length,
pcol->nr_pages); pcol->nr_pages);
__bio_for_each_segment(bvec, pcol->bio, i, 0) { for (i = 0; i < pcol->nr_pages; i++) {
struct page *page = bvec->bv_page; struct page *page = pcol->pages[i];
struct inode *inode = page->mapping->host; struct inode *inode = page->mapping->host;
int page_stat; int page_stat;
@ -218,7 +221,7 @@ static int __readpages_done(struct page_collect *pcol, bool do_unlock)
ret = update_read_page(page, page_stat); ret = update_read_page(page, page_stat);
if (do_unlock) if (do_unlock)
unlock_page(page); unlock_page(page);
length += bvec->bv_len; length += PAGE_SIZE;
} }
pcol_free(pcol); pcol_free(pcol);
@ -238,11 +241,10 @@ static void readpages_done(struct exofs_io_state *ios, void *p)
static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw) static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw)
{ {
struct bio_vec *bvec;
int i; int i;
__bio_for_each_segment(bvec, pcol->bio, i, 0) { for (i = 0; i < pcol->nr_pages; i++) {
struct page *page = bvec->bv_page; struct page *page = pcol->pages[i];
if (rw == READ) if (rw == READ)
update_read_page(page, ret); update_read_page(page, ret);
@ -260,13 +262,14 @@ static int read_exec(struct page_collect *pcol, bool is_sync)
struct page_collect *pcol_copy = NULL; struct page_collect *pcol_copy = NULL;
int ret; int ret;
if (!pcol->bio) if (!pcol->pages)
return 0; return 0;
/* see comment in _readpage() about sync reads */ /* see comment in _readpage() about sync reads */
WARN_ON(is_sync && (pcol->nr_pages != 1)); WARN_ON(is_sync && (pcol->nr_pages != 1));
ios->bio = pcol->bio; ios->pages = pcol->pages;
ios->nr_pages = pcol->nr_pages;
ios->length = pcol->length; ios->length = pcol->length;
ios->offset = pcol->pg_first << PAGE_CACHE_SHIFT; ios->offset = pcol->pg_first << PAGE_CACHE_SHIFT;
@ -366,7 +369,7 @@ try_again:
goto try_again; goto try_again;
} }
if (!pcol->bio) { if (!pcol->pages) {
ret = pcol_try_alloc(pcol); ret = pcol_try_alloc(pcol);
if (unlikely(ret)) if (unlikely(ret))
goto fail; goto fail;
@ -448,7 +451,6 @@ static int exofs_readpage(struct file *file, struct page *page)
static void writepages_done(struct exofs_io_state *ios, void *p) static void writepages_done(struct exofs_io_state *ios, void *p)
{ {
struct page_collect *pcol = p; struct page_collect *pcol = p;
struct bio_vec *bvec;
int i; int i;
u64 resid; u64 resid;
u64 good_bytes; u64 good_bytes;
@ -467,8 +469,8 @@ static void writepages_done(struct exofs_io_state *ios, void *p)
pcol->inode->i_ino, _LLU(good_bytes), pcol->length, pcol->inode->i_ino, _LLU(good_bytes), pcol->length,
pcol->nr_pages); pcol->nr_pages);
__bio_for_each_segment(bvec, pcol->bio, i, 0) { for (i = 0; i < pcol->nr_pages; i++) {
struct page *page = bvec->bv_page; struct page *page = pcol->pages[i];
struct inode *inode = page->mapping->host; struct inode *inode = page->mapping->host;
int page_stat; int page_stat;
@ -485,7 +487,7 @@ static void writepages_done(struct exofs_io_state *ios, void *p)
EXOFS_DBGMSG2(" writepages_done(0x%lx, 0x%lx) status=%d\n", EXOFS_DBGMSG2(" writepages_done(0x%lx, 0x%lx) status=%d\n",
inode->i_ino, page->index, page_stat); inode->i_ino, page->index, page_stat);
length += bvec->bv_len; length += PAGE_SIZE;
} }
pcol_free(pcol); pcol_free(pcol);
@ -500,7 +502,7 @@ static int write_exec(struct page_collect *pcol)
struct page_collect *pcol_copy = NULL; struct page_collect *pcol_copy = NULL;
int ret; int ret;
if (!pcol->bio) if (!pcol->pages)
return 0; return 0;
pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL);
@ -512,9 +514,8 @@ static int write_exec(struct page_collect *pcol)
*pcol_copy = *pcol; *pcol_copy = *pcol;
pcol_copy->bio->bi_rw |= (1 << BIO_RW); /* FIXME: bio_set_dir() */ ios->pages = pcol_copy->pages;
ios->nr_pages = pcol_copy->nr_pages;
ios->bio = pcol_copy->bio;
ios->offset = pcol_copy->pg_first << PAGE_CACHE_SHIFT; ios->offset = pcol_copy->pg_first << PAGE_CACHE_SHIFT;
ios->length = pcol_copy->length; ios->length = pcol_copy->length;
ios->done = writepages_done; ios->done = writepages_done;
@ -605,7 +606,7 @@ try_again:
goto try_again; goto try_again;
} }
if (!pcol->bio) { if (!pcol->pages) {
ret = pcol_try_alloc(pcol); ret = pcol_try_alloc(pcol);
if (unlikely(ret)) if (unlikely(ret))
goto fail; goto fail;

View File

@ -283,10 +283,11 @@ static void _offset_dev_unit_off(struct exofs_io_state *ios, u64 file_offset,
*dev = stripe_mod / stripe_unit * ios->layout->mirrors_p1; *dev = stripe_mod / stripe_unit * ios->layout->mirrors_p1;
} }
static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_bvec, static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_pg,
struct exofs_per_dev_state *per_dev, int cur_len) unsigned pgbase, struct exofs_per_dev_state *per_dev,
int cur_len)
{ {
unsigned bv = *cur_bvec; unsigned pg = *cur_pg;
struct request_queue *q = struct request_queue *q =
osd_request_queue(exofs_ios_od(ios, per_dev->dev)); osd_request_queue(exofs_ios_od(ios, per_dev->dev));
@ -295,7 +296,7 @@ static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_bvec,
if (per_dev->bio == NULL) { if (per_dev->bio == NULL) {
unsigned pages_in_stripe = ios->layout->group_width * unsigned pages_in_stripe = ios->layout->group_width *
(ios->layout->stripe_unit / PAGE_SIZE); (ios->layout->stripe_unit / PAGE_SIZE);
unsigned bio_size = (ios->bio->bi_vcnt + pages_in_stripe) / unsigned bio_size = (ios->nr_pages + pages_in_stripe) /
ios->layout->group_width; ios->layout->group_width;
per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size); per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size);
@ -307,21 +308,22 @@ static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_bvec,
} }
while (cur_len > 0) { while (cur_len > 0) {
int added_len; unsigned pglen = min_t(unsigned, PAGE_SIZE - pgbase, cur_len);
struct bio_vec *bvec = &ios->bio->bi_io_vec[bv]; unsigned added_len;
BUG_ON(ios->bio->bi_vcnt <= bv); BUG_ON(ios->nr_pages <= pg);
cur_len -= bvec->bv_len; cur_len -= pglen;
added_len = bio_add_pc_page(q, per_dev->bio, bvec->bv_page, added_len = bio_add_pc_page(q, per_dev->bio, ios->pages[pg],
bvec->bv_len, bvec->bv_offset); pglen, pgbase);
if (unlikely(bvec->bv_len != added_len)) if (unlikely(pglen != added_len))
return -ENOMEM; return -ENOMEM;
++bv; pgbase = 0;
++pg;
} }
BUG_ON(cur_len); BUG_ON(cur_len);
*cur_bvec = bv; *cur_pg = pg;
return 0; return 0;
} }
@ -332,10 +334,10 @@ static int _prepare_for_striping(struct exofs_io_state *ios)
unsigned stripe_unit = ios->layout->stripe_unit; unsigned stripe_unit = ios->layout->stripe_unit;
unsigned comp = 0; unsigned comp = 0;
unsigned stripes = 0; unsigned stripes = 0;
unsigned cur_bvec = 0; unsigned cur_pg = 0;
int ret; int ret = 0;
if (!ios->bio) { if (!ios->pages) {
if (ios->kern_buff) { if (ios->kern_buff) {
struct exofs_per_dev_state *per_dev = &ios->per_dev[0]; struct exofs_per_dev_state *per_dev = &ios->per_dev[0];
unsigned unit_off; unsigned unit_off;
@ -352,7 +354,7 @@ static int _prepare_for_striping(struct exofs_io_state *ios)
while (length) { while (length) {
struct exofs_per_dev_state *per_dev = &ios->per_dev[comp]; struct exofs_per_dev_state *per_dev = &ios->per_dev[comp];
unsigned cur_len; unsigned cur_len, page_off;
if (!per_dev->length) { if (!per_dev->length) {
unsigned unit_off; unsigned unit_off;
@ -362,11 +364,15 @@ static int _prepare_for_striping(struct exofs_io_state *ios)
stripes++; stripes++;
cur_len = min_t(u64, stripe_unit - unit_off, length); cur_len = min_t(u64, stripe_unit - unit_off, length);
offset += cur_len; offset += cur_len;
page_off = unit_off & ~PAGE_MASK;
BUG_ON(page_off != ios->pgbase);
} else { } else {
cur_len = min_t(u64, stripe_unit, length); cur_len = min_t(u64, stripe_unit, length);
page_off = 0;
} }
ret = _add_stripe_unit(ios, &cur_bvec, per_dev, cur_len); ret = _add_stripe_unit(ios, &cur_pg, page_off , per_dev,
cur_len);
if (unlikely(ret)) if (unlikely(ret))
goto out; goto out;
@ -448,7 +454,7 @@ static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp)
per_dev->or = or; per_dev->or = or;
per_dev->offset = master_dev->offset; per_dev->offset = master_dev->offset;
if (ios->bio) { if (ios->pages) {
struct bio *bio; struct bio *bio;
if (per_dev != master_dev) { if (per_dev != master_dev) {
@ -541,7 +547,7 @@ static int _sbi_read_mirror(struct exofs_io_state *ios, unsigned cur_comp)
} }
per_dev->or = or; per_dev->or = or;
if (ios->bio) { if (ios->pages) {
osd_req_read(or, &ios->obj, per_dev->offset, osd_req_read(or, &ios->obj, per_dev->offset,
per_dev->bio, per_dev->length); per_dev->bio, per_dev->length);
EXOFS_DBGMSG("read(0x%llx) offset=0x%llx length=0x%llx" EXOFS_DBGMSG("read(0x%llx) offset=0x%llx length=0x%llx"