NFS: rewrite directio read to use async coalesce code

This also has the advantage that it allows directio to use pnfs.

Signed-off-by: Fred Isaman <iisaman@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
This commit is contained in:
Fred Isaman 2012-04-20 14:47:51 -04:00 committed by Trond Myklebust
parent 1825a0d08f
commit 584aa810b6
6 changed files with 139 additions and 145 deletions

View File

@ -124,22 +124,6 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_
return -EINVAL; return -EINVAL;
} }
static void nfs_direct_dirty_pages(struct page **pages, unsigned int pgbase, size_t count)
{
unsigned int npages;
unsigned int i;
if (count == 0)
return;
pages += (pgbase >> PAGE_SHIFT);
npages = (count + (pgbase & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT;
for (i = 0; i < npages; i++) {
struct page *page = pages[i];
if (!PageCompound(page))
set_page_dirty(page);
}
}
static void nfs_direct_release_pages(struct page **pages, unsigned int npages) static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
{ {
unsigned int i; unsigned int i;
@ -226,58 +210,92 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
nfs_direct_req_release(dreq); nfs_direct_req_release(dreq);
} }
/* void nfs_direct_readpage_release(struct nfs_page *req)
* We must hold a reference to all the pages in this direct read request
* until the RPCs complete. This could be long *after* we are woken up in
* nfs_direct_wait (for instance, if someone hits ^C on a slow server).
*/
static void nfs_direct_read_result(struct rpc_task *task, void *calldata)
{ {
struct nfs_read_data *data = calldata; dprintk("NFS: direct read done (%s/%lld %d@%lld)\n",
req->wb_context->dentry->d_inode->i_sb->s_id,
nfs_readpage_result(task, data); (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
req->wb_bytes,
(long long)req_offset(req));
nfs_release_request(req);
} }
static void nfs_direct_read_release(void *calldata) static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
{ {
unsigned long bytes = 0;
struct nfs_direct_req *dreq = hdr->dreq;
struct nfs_read_data *data = calldata; if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
struct nfs_direct_req *dreq = (struct nfs_direct_req *)data->header->req; goto out_put;
int status = data->task.tk_status;
spin_lock(&dreq->lock); spin_lock(&dreq->lock);
if (unlikely(status < 0)) { if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes == 0))
dreq->error = status; dreq->error = hdr->error;
spin_unlock(&dreq->lock); else
} else { dreq->count += hdr->good_bytes;
dreq->count += data->res.count; spin_unlock(&dreq->lock);
spin_unlock(&dreq->lock);
nfs_direct_dirty_pages(data->pages.pagevec,
data->args.pgbase,
data->res.count);
}
nfs_direct_release_pages(data->pages.pagevec, data->pages.npages);
if (!test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
while (!list_empty(&hdr->pages)) {
struct nfs_page *req = nfs_list_entry(hdr->pages.next);
struct page *page = req->wb_page;
if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) {
if (bytes > hdr->good_bytes)
zero_user(page, 0, PAGE_SIZE);
else if (hdr->good_bytes - bytes < PAGE_SIZE)
zero_user_segment(page,
hdr->good_bytes & ~PAGE_MASK,
PAGE_SIZE);
}
bytes += req->wb_bytes;
nfs_list_remove_request(req);
nfs_direct_readpage_release(req);
if (!PageCompound(page))
set_page_dirty(page);
page_cache_release(page);
}
} else {
while (!list_empty(&hdr->pages)) {
struct nfs_page *req = nfs_list_entry(hdr->pages.next);
if (bytes < hdr->good_bytes)
if (!PageCompound(req->wb_page))
set_page_dirty(req->wb_page);
bytes += req->wb_bytes;
page_cache_release(req->wb_page);
nfs_list_remove_request(req);
nfs_direct_readpage_release(req);
}
}
out_put:
if (put_dreq(dreq)) if (put_dreq(dreq))
nfs_direct_complete(dreq); nfs_direct_complete(dreq);
nfs_readdata_release(data); hdr->release(hdr);
} }
static const struct rpc_call_ops nfs_read_direct_ops = { static void nfs_sync_pgio_error(struct list_head *head)
.rpc_call_prepare = nfs_read_prepare,
.rpc_call_done = nfs_direct_read_result,
.rpc_release = nfs_direct_read_release,
};
static void nfs_direct_readhdr_release(struct nfs_read_header *rhdr)
{ {
struct nfs_read_data *data = &rhdr->rpc_data; struct nfs_page *req;
if (data->pages.pagevec != data->pages.page_array) while (!list_empty(head)) {
kfree(data->pages.pagevec); req = nfs_list_entry(head->next);
nfs_readhdr_free(&rhdr->header); nfs_list_remove_request(req);
nfs_release_request(req);
}
} }
static void nfs_direct_pgio_init(struct nfs_pgio_header *hdr)
{
get_dreq(hdr->dreq);
}
static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = {
.error_cleanup = nfs_sync_pgio_error,
.init_hdr = nfs_direct_pgio_init,
.completion = nfs_direct_read_completion,
};
/* /*
* For each rsize'd chunk of the user's buffer, dispatch an NFS READ * For each rsize'd chunk of the user's buffer, dispatch an NFS READ
* operation. If nfs_readdata_alloc() or get_user_pages() fails, * operation. If nfs_readdata_alloc() or get_user_pages() fails,
@ -285,118 +303,85 @@ static void nfs_direct_readhdr_release(struct nfs_read_header *rhdr)
* handled automatically by nfs_direct_read_result(). Otherwise, if * handled automatically by nfs_direct_read_result(). Otherwise, if
* no requests have been sent, just return an error. * no requests have been sent, just return an error.
*/ */
static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc,
const struct iovec *iov, const struct iovec *iov,
loff_t pos) loff_t pos)
{ {
struct nfs_direct_req *dreq = desc->pg_dreq;
struct nfs_open_context *ctx = dreq->ctx; struct nfs_open_context *ctx = dreq->ctx;
struct inode *inode = ctx->dentry->d_inode; struct inode *inode = ctx->dentry->d_inode;
unsigned long user_addr = (unsigned long)iov->iov_base; unsigned long user_addr = (unsigned long)iov->iov_base;
size_t count = iov->iov_len; size_t count = iov->iov_len;
size_t rsize = NFS_SERVER(inode)->rsize; size_t rsize = NFS_SERVER(inode)->rsize;
struct rpc_task *task;
struct rpc_message msg = {
.rpc_cred = ctx->cred,
};
struct rpc_task_setup task_setup_data = {
.rpc_client = NFS_CLIENT(inode),
.rpc_message = &msg,
.callback_ops = &nfs_read_direct_ops,
.workqueue = nfsiod_workqueue,
.flags = RPC_TASK_ASYNC,
};
unsigned int pgbase; unsigned int pgbase;
int result; int result;
ssize_t started = 0; ssize_t started = 0;
struct page **pagevec = NULL;
unsigned int npages;
do { do {
struct nfs_read_header *rhdr;
struct nfs_read_data *data;
struct nfs_page_array *pages;
size_t bytes; size_t bytes;
int i;
pgbase = user_addr & ~PAGE_MASK; pgbase = user_addr & ~PAGE_MASK;
bytes = min(rsize,count); bytes = min(max(rsize, PAGE_SIZE), count);
result = -ENOMEM; result = -ENOMEM;
rhdr = nfs_readhdr_alloc(); npages = nfs_page_array_len(pgbase, bytes);
if (unlikely(!rhdr)) if (!pagevec)
pagevec = kmalloc(npages * sizeof(struct page *),
GFP_KERNEL);
if (!pagevec)
break; break;
data = nfs_readdata_alloc(&rhdr->header, nfs_page_array_len(pgbase, bytes));
if (!data) {
nfs_readhdr_free(&rhdr->header);
break;
}
data->header = &rhdr->header;
atomic_inc(&data->header->refcnt);
pages = &data->pages;
down_read(&current->mm->mmap_sem); down_read(&current->mm->mmap_sem);
result = get_user_pages(current, current->mm, user_addr, result = get_user_pages(current, current->mm, user_addr,
pages->npages, 1, 0, pages->pagevec, NULL); npages, 1, 0, pagevec, NULL);
up_read(&current->mm->mmap_sem); up_read(&current->mm->mmap_sem);
if (result < 0) { if (result < 0)
nfs_direct_readhdr_release(rhdr);
break; break;
} if ((unsigned)result < npages) {
if ((unsigned)result < pages->npages) {
bytes = result * PAGE_SIZE; bytes = result * PAGE_SIZE;
if (bytes <= pgbase) { if (bytes <= pgbase) {
nfs_direct_release_pages(pages->pagevec, result); nfs_direct_release_pages(pagevec, result);
nfs_direct_readhdr_release(rhdr);
break; break;
} }
bytes -= pgbase; bytes -= pgbase;
pages->npages = result; npages = result;
} }
get_dreq(dreq); for (i = 0; i < npages; i++) {
struct nfs_page *req;
rhdr->header.req = (struct nfs_page *) dreq; unsigned int req_len = min(bytes, PAGE_SIZE - pgbase);
rhdr->header.inode = inode; /* XXX do we need to do the eof zeroing found in async_filler? */
rhdr->header.cred = msg.rpc_cred; req = nfs_create_request(dreq->ctx, dreq->inode,
data->args.fh = NFS_FH(inode); pagevec[i],
data->args.context = get_nfs_open_context(ctx); pgbase, req_len);
data->args.lock_context = dreq->l_ctx; if (IS_ERR(req)) {
data->args.offset = pos; nfs_direct_release_pages(pagevec + i,
data->args.pgbase = pgbase; npages - i);
data->args.pages = pages->pagevec; result = PTR_ERR(req);
data->args.count = bytes; break;
data->res.fattr = &data->fattr; }
data->res.eof = 0; req->wb_index = pos >> PAGE_SHIFT;
data->res.count = bytes; req->wb_offset = pos & ~PAGE_MASK;
nfs_fattr_init(&data->fattr); if (!nfs_pageio_add_request(desc, req)) {
msg.rpc_argp = &data->args; result = desc->pg_error;
msg.rpc_resp = &data->res; nfs_release_request(req);
nfs_direct_release_pages(pagevec + i,
task_setup_data.task = &data->task; npages - i);
task_setup_data.callback_data = data; break;
NFS_PROTO(inode)->read_setup(data, &msg); }
pgbase = 0;
task = rpc_run_task(&task_setup_data); bytes -= req_len;
if (IS_ERR(task)) started += req_len;
break; user_addr += req_len;
pos += req_len;
dprintk("NFS: %5u initiated direct read call " count -= req_len;
"(req %s/%Ld, %zu bytes @ offset %Lu)\n", }
task->tk_pid,
inode->i_sb->s_id,
(long long)NFS_FILEID(inode),
bytes,
(unsigned long long)data->args.offset);
rpc_put_task(task);
started += bytes;
user_addr += bytes;
pos += bytes;
/* FIXME: Remove this unnecessary math from final patch */
pgbase += bytes;
pgbase &= ~PAGE_MASK;
BUG_ON(pgbase != (user_addr & ~PAGE_MASK));
count -= bytes;
} while (count != 0); } while (count != 0);
kfree(pagevec);
if (started) if (started)
return started; return started;
return result < 0 ? (ssize_t) result : -EFAULT; return result < 0 ? (ssize_t) result : -EFAULT;
@ -407,15 +392,19 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
unsigned long nr_segs, unsigned long nr_segs,
loff_t pos) loff_t pos)
{ {
struct nfs_pageio_descriptor desc;
ssize_t result = -EINVAL; ssize_t result = -EINVAL;
size_t requested_bytes = 0; size_t requested_bytes = 0;
unsigned long seg; unsigned long seg;
nfs_pageio_init_read(&desc, dreq->inode,
&nfs_direct_read_completion_ops);
get_dreq(dreq); get_dreq(dreq);
desc.pg_dreq = dreq;
for (seg = 0; seg < nr_segs; seg++) { for (seg = 0; seg < nr_segs; seg++) {
const struct iovec *vec = &iov[seg]; const struct iovec *vec = &iov[seg];
result = nfs_direct_read_schedule_segment(dreq, vec, pos); result = nfs_direct_read_schedule_segment(&desc, vec, pos);
if (result < 0) if (result < 0)
break; break;
requested_bytes += result; requested_bytes += result;
@ -424,6 +413,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
pos += vec->iov_len; pos += vec->iov_len;
} }
nfs_pageio_complete(&desc);
/* /*
* If no bytes were started, return the error, and let the * If no bytes were started, return the error, and let the
* generic layer handle the completion. * generic layer handle the completion.

View File

@ -304,8 +304,9 @@ struct nfs_pgio_completion_ops;
/* read.c */ /* read.c */
extern struct nfs_read_header *nfs_readhdr_alloc(void); extern struct nfs_read_header *nfs_readhdr_alloc(void);
extern void nfs_readhdr_free(struct nfs_pgio_header *hdr); extern void nfs_readhdr_free(struct nfs_pgio_header *hdr);
extern struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr, extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
unsigned int pagecount); struct inode *inode,
const struct nfs_pgio_completion_ops *compl_ops);
extern int nfs_initiate_read(struct rpc_clnt *clnt, extern int nfs_initiate_read(struct rpc_clnt *clnt,
struct nfs_read_data *data, struct nfs_read_data *data,
const struct rpc_call_ops *call_ops); const struct rpc_call_ops *call_ops);

View File

@ -48,8 +48,11 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
hdr->cred = hdr->req->wb_context->cred; hdr->cred = hdr->req->wb_context->cred;
hdr->io_start = req_offset(hdr->req); hdr->io_start = req_offset(hdr->req);
hdr->good_bytes = desc->pg_count; hdr->good_bytes = desc->pg_count;
hdr->dreq = desc->pg_dreq;
hdr->release = release; hdr->release = release;
hdr->completion_ops = desc->pg_completion_ops; hdr->completion_ops = desc->pg_completion_ops;
if (hdr->completion_ops->init_hdr)
hdr->completion_ops->init_hdr(hdr);
} }
void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos) void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos)
@ -116,9 +119,6 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
req->wb_page = page; req->wb_page = page;
req->wb_index = page->index; req->wb_index = page->index;
page_cache_get(page); page_cache_get(page);
BUG_ON(PagePrivate(page));
BUG_ON(!PageLocked(page));
BUG_ON(page->mapping->host != inode);
req->wb_offset = offset; req->wb_offset = offset;
req->wb_pgbase = offset; req->wb_pgbase = offset;
req->wb_bytes = count; req->wb_bytes = count;
@ -257,6 +257,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
desc->pg_ioflags = io_flags; desc->pg_ioflags = io_flags;
desc->pg_error = 0; desc->pg_error = 0;
desc->pg_lseg = NULL; desc->pg_lseg = NULL;
desc->pg_dreq = NULL;
} }
/** /**

View File

@ -51,8 +51,8 @@ struct nfs_read_header *nfs_readhdr_alloc()
return rhdr; return rhdr;
} }
struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr, static struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr,
unsigned int pagecount) unsigned int pagecount)
{ {
struct nfs_read_data *data, *prealloc; struct nfs_read_data *data, *prealloc;
@ -123,9 +123,9 @@ void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
} }
EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
struct inode *inode, struct inode *inode,
const struct nfs_pgio_completion_ops *compl_ops) const struct nfs_pgio_completion_ops *compl_ops)
{ {
if (!pnfs_pageio_init_read(pgio, inode, compl_ops)) if (!pnfs_pageio_init_read(pgio, inode, compl_ops))
nfs_pageio_init_read_mds(pgio, inode, compl_ops); nfs_pageio_init_read_mds(pgio, inode, compl_ops);

View File

@ -68,6 +68,7 @@ struct nfs_pageio_descriptor {
const struct rpc_call_ops *pg_rpc_callops; const struct rpc_call_ops *pg_rpc_callops;
const struct nfs_pgio_completion_ops *pg_completion_ops; const struct nfs_pgio_completion_ops *pg_completion_ops;
struct pnfs_layout_segment *pg_lseg; struct pnfs_layout_segment *pg_lseg;
struct nfs_direct_req *pg_dreq;
}; };
#define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags)) #define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags))

View File

@ -1208,6 +1208,7 @@ struct nfs_pgio_header {
const struct rpc_call_ops *mds_ops; const struct rpc_call_ops *mds_ops;
void (*release) (struct nfs_pgio_header *hdr); void (*release) (struct nfs_pgio_header *hdr);
const struct nfs_pgio_completion_ops *completion_ops; const struct nfs_pgio_completion_ops *completion_ops;
struct nfs_direct_req *dreq;
spinlock_t lock; spinlock_t lock;
/* fields protected by lock */ /* fields protected by lock */
int pnfs_error; int pnfs_error;
@ -1221,8 +1222,6 @@ struct nfs_read_header {
struct nfs_read_data rpc_data; struct nfs_read_data rpc_data;
}; };
struct nfs_direct_req;
struct nfs_write_data { struct nfs_write_data {
struct nfs_pgio_header *header; struct nfs_pgio_header *header;
struct list_head list; struct list_head list;
@ -1264,6 +1263,7 @@ struct nfs_commit_data {
struct nfs_pgio_completion_ops { struct nfs_pgio_completion_ops {
void (*error_cleanup)(struct list_head *head); void (*error_cleanup)(struct list_head *head);
void (*init_hdr)(struct nfs_pgio_header *hdr);
void (*completion)(struct nfs_pgio_header *hdr); void (*completion)(struct nfs_pgio_header *hdr);
}; };