forked from Minki/linux
[PATCH] add AOP_TRUNCATED_PAGE, prepend AOP_ to WRITEPAGE_ACTIVATE
readpage(), prepare_write(), and commit_write() callers are updated to understand the special return code AOP_TRUNCATED_PAGE in the style of writepage() and WRITEPAGE_ACTIVATE. AOP_TRUNCATED_PAGE tells the caller that the callee has unlocked the page and that the operation should be tried again with a new page. OCFS2 uses this to detect and work around a lock inversion in its aop methods. There should be no change in behaviour for methods that don't return AOP_TRUNCATED_PAGE. WRITEPAGE_ACTIVATE is also prepended with AOP_ for consistency and they are made enums so that kerneldoc can be used to document their semantics. Signed-off-by: Zach Brown <zach.brown@oracle.com>
This commit is contained in:
parent
7063fbf226
commit
994fc28c7b
@ -213,7 +213,7 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
|
||||
struct address_space_operations *aops = mapping->a_ops;
|
||||
pgoff_t index;
|
||||
unsigned offset, bv_offs;
|
||||
int len, ret = 0;
|
||||
int len, ret;
|
||||
|
||||
down(&mapping->host->i_sem);
|
||||
index = pos >> PAGE_CACHE_SHIFT;
|
||||
@ -232,9 +232,15 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
|
||||
page = grab_cache_page(mapping, index);
|
||||
if (unlikely(!page))
|
||||
goto fail;
|
||||
if (unlikely(aops->prepare_write(file, page, offset,
|
||||
offset + size)))
|
||||
ret = aops->prepare_write(file, page, offset,
|
||||
offset + size);
|
||||
if (unlikely(ret)) {
|
||||
if (ret == AOP_TRUNCATED_PAGE) {
|
||||
page_cache_release(page);
|
||||
continue;
|
||||
}
|
||||
goto unlock;
|
||||
}
|
||||
transfer_result = lo_do_transfer(lo, WRITE, page, offset,
|
||||
bvec->bv_page, bv_offs, size, IV);
|
||||
if (unlikely(transfer_result)) {
|
||||
@ -251,9 +257,15 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
|
||||
kunmap_atomic(kaddr, KM_USER0);
|
||||
}
|
||||
flush_dcache_page(page);
|
||||
if (unlikely(aops->commit_write(file, page, offset,
|
||||
offset + size)))
|
||||
ret = aops->commit_write(file, page, offset,
|
||||
offset + size);
|
||||
if (unlikely(ret)) {
|
||||
if (ret == AOP_TRUNCATED_PAGE) {
|
||||
page_cache_release(page);
|
||||
continue;
|
||||
}
|
||||
goto unlock;
|
||||
}
|
||||
if (unlikely(transfer_result))
|
||||
goto unlock;
|
||||
bv_offs += size;
|
||||
@ -264,6 +276,7 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
|
||||
unlock_page(page);
|
||||
page_cache_release(page);
|
||||
}
|
||||
ret = 0;
|
||||
out:
|
||||
up(&mapping->host->i_sem);
|
||||
return ret;
|
||||
|
@ -154,7 +154,7 @@ static int ramdisk_commit_write(struct file *file, struct page *page,
|
||||
|
||||
/*
|
||||
* ->writepage to the the blockdev's mapping has to redirty the page so that the
|
||||
* VM doesn't go and steal it. We return WRITEPAGE_ACTIVATE so that the VM
|
||||
* VM doesn't go and steal it. We return AOP_WRITEPAGE_ACTIVATE so that the VM
|
||||
* won't try to (pointlessly) write the page again for a while.
|
||||
*
|
||||
* Really, these pages should not be on the LRU at all.
|
||||
@ -165,7 +165,7 @@ static int ramdisk_writepage(struct page *page, struct writeback_control *wbc)
|
||||
make_page_uptodate(page);
|
||||
SetPageDirty(page);
|
||||
if (wbc->for_reclaim)
|
||||
return WRITEPAGE_ACTIVATE;
|
||||
return AOP_WRITEPAGE_ACTIVATE;
|
||||
unlock_page(page);
|
||||
return 0;
|
||||
}
|
||||
|
@ -721,7 +721,7 @@ retry:
|
||||
&last_block_in_bio, &ret, wbc,
|
||||
page->mapping->a_ops->writepage);
|
||||
}
|
||||
if (unlikely(ret == WRITEPAGE_ACTIVATE))
|
||||
if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE))
|
||||
unlock_page(page);
|
||||
if (ret || (--(wbc->nr_to_write) <= 0))
|
||||
done = 1;
|
||||
|
@ -302,6 +302,37 @@ struct iattr {
|
||||
*/
|
||||
#include <linux/quota.h>
|
||||
|
||||
/**
|
||||
* enum positive_aop_returns - aop return codes with specific semantics
|
||||
*
|
||||
* @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has
|
||||
* completed, that the page is still locked, and
|
||||
* should be considered active. The VM uses this hint
|
||||
* to return the page to the active list -- it won't
|
||||
* be a candidate for writeback again in the near
|
||||
* future. Other callers must be careful to unlock
|
||||
* the page if they get this return. Returned by
|
||||
* writepage();
|
||||
*
|
||||
* @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has
|
||||
* unlocked it and the page might have been truncated.
|
||||
* The caller should back up to acquiring a new page and
|
||||
* trying again. The aop will be taking reasonable
|
||||
* precautions not to livelock. If the caller held a page
|
||||
* reference, it should drop it before retrying. Returned
|
||||
* by readpage(), prepare_write(), and commit_write().
|
||||
*
|
||||
* address_space_operation functions return these large constants to indicate
|
||||
* special semantics to the caller. These are much larger than the bytes in a
|
||||
* page to allow for functions that return the number of bytes operated on in a
|
||||
* given page.
|
||||
*/
|
||||
|
||||
enum positive_aop_returns {
|
||||
AOP_WRITEPAGE_ACTIVATE = 0x80000,
|
||||
AOP_TRUNCATED_PAGE = 0x80001,
|
||||
};
|
||||
|
||||
/*
|
||||
* oh the beauties of C type declarations.
|
||||
*/
|
||||
|
@ -59,12 +59,6 @@ struct writeback_control {
|
||||
unsigned for_reclaim:1; /* Invoked from the page allocator */
|
||||
};
|
||||
|
||||
/*
|
||||
* ->writepage() return values (make these much larger than a pagesize, in
|
||||
* case some fs is returning number-of-bytes-written from writepage)
|
||||
*/
|
||||
#define WRITEPAGE_ACTIVATE 0x80000 /* IO was not started: activate page */
|
||||
|
||||
/*
|
||||
* fs/fs-writeback.c
|
||||
*/
|
||||
|
73
mm/filemap.c
73
mm/filemap.c
@ -831,8 +831,13 @@ readpage:
|
||||
/* Start the actual read. The read will unlock the page. */
|
||||
error = mapping->a_ops->readpage(filp, page);
|
||||
|
||||
if (unlikely(error))
|
||||
if (unlikely(error)) {
|
||||
if (error == AOP_TRUNCATED_PAGE) {
|
||||
page_cache_release(page);
|
||||
goto find_page;
|
||||
}
|
||||
goto readpage_error;
|
||||
}
|
||||
|
||||
if (!PageUptodate(page)) {
|
||||
lock_page(page);
|
||||
@ -1152,26 +1157,24 @@ static int fastcall page_cache_read(struct file * file, unsigned long offset)
|
||||
{
|
||||
struct address_space *mapping = file->f_mapping;
|
||||
struct page *page;
|
||||
int error;
|
||||
int ret;
|
||||
|
||||
page = page_cache_alloc_cold(mapping);
|
||||
if (!page)
|
||||
return -ENOMEM;
|
||||
do {
|
||||
page = page_cache_alloc_cold(mapping);
|
||||
if (!page)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = add_to_page_cache_lru(page, mapping, offset, GFP_KERNEL);
|
||||
if (ret == 0)
|
||||
ret = mapping->a_ops->readpage(file, page);
|
||||
else if (ret == -EEXIST)
|
||||
ret = 0; /* losing race to add is OK */
|
||||
|
||||
error = add_to_page_cache_lru(page, mapping, offset, GFP_KERNEL);
|
||||
if (!error) {
|
||||
error = mapping->a_ops->readpage(file, page);
|
||||
page_cache_release(page);
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* We arrive here in the unlikely event that someone
|
||||
* raced with us and added our page to the cache first
|
||||
* or we are out of memory for radix-tree nodes.
|
||||
*/
|
||||
page_cache_release(page);
|
||||
return error == -EEXIST ? 0 : error;
|
||||
} while (ret == AOP_TRUNCATED_PAGE);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define MMAP_LOTSAMISS (100)
|
||||
@ -1331,10 +1334,14 @@ page_not_uptodate:
|
||||
goto success;
|
||||
}
|
||||
|
||||
if (!mapping->a_ops->readpage(file, page)) {
|
||||
error = mapping->a_ops->readpage(file, page);
|
||||
if (!error) {
|
||||
wait_on_page_locked(page);
|
||||
if (PageUptodate(page))
|
||||
goto success;
|
||||
} else if (error == AOP_TRUNCATED_PAGE) {
|
||||
page_cache_release(page);
|
||||
goto retry_find;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1358,10 +1365,14 @@ page_not_uptodate:
|
||||
goto success;
|
||||
}
|
||||
ClearPageError(page);
|
||||
if (!mapping->a_ops->readpage(file, page)) {
|
||||
error = mapping->a_ops->readpage(file, page);
|
||||
if (!error) {
|
||||
wait_on_page_locked(page);
|
||||
if (PageUptodate(page))
|
||||
goto success;
|
||||
} else if (error == AOP_TRUNCATED_PAGE) {
|
||||
page_cache_release(page);
|
||||
goto retry_find;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1444,10 +1455,14 @@ page_not_uptodate:
|
||||
goto success;
|
||||
}
|
||||
|
||||
if (!mapping->a_ops->readpage(file, page)) {
|
||||
error = mapping->a_ops->readpage(file, page);
|
||||
if (!error) {
|
||||
wait_on_page_locked(page);
|
||||
if (PageUptodate(page))
|
||||
goto success;
|
||||
} else if (error == AOP_TRUNCATED_PAGE) {
|
||||
page_cache_release(page);
|
||||
goto retry_find;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1470,10 +1485,14 @@ page_not_uptodate:
|
||||
}
|
||||
|
||||
ClearPageError(page);
|
||||
if (!mapping->a_ops->readpage(file, page)) {
|
||||
error = mapping->a_ops->readpage(file, page);
|
||||
if (!error) {
|
||||
wait_on_page_locked(page);
|
||||
if (PageUptodate(page))
|
||||
goto success;
|
||||
} else if (error == AOP_TRUNCATED_PAGE) {
|
||||
page_cache_release(page);
|
||||
goto retry_find;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1934,12 +1953,16 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
|
||||
status = a_ops->prepare_write(file, page, offset, offset+bytes);
|
||||
if (unlikely(status)) {
|
||||
loff_t isize = i_size_read(inode);
|
||||
|
||||
if (status != AOP_TRUNCATED_PAGE)
|
||||
unlock_page(page);
|
||||
page_cache_release(page);
|
||||
if (status == AOP_TRUNCATED_PAGE)
|
||||
continue;
|
||||
/*
|
||||
* prepare_write() may have instantiated a few blocks
|
||||
* outside i_size. Trim these off again.
|
||||
*/
|
||||
unlock_page(page);
|
||||
page_cache_release(page);
|
||||
if (pos + bytes > isize)
|
||||
vmtruncate(inode, isize);
|
||||
break;
|
||||
@ -1952,6 +1975,10 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
|
||||
cur_iov, iov_base, bytes);
|
||||
flush_dcache_page(page);
|
||||
status = a_ops->commit_write(file, page, offset, offset+bytes);
|
||||
if (status == AOP_TRUNCATED_PAGE) {
|
||||
page_cache_release(page);
|
||||
continue;
|
||||
}
|
||||
if (likely(copied > 0)) {
|
||||
if (!status)
|
||||
status = copied;
|
||||
|
@ -158,7 +158,7 @@ static int read_pages(struct address_space *mapping, struct file *filp,
|
||||
{
|
||||
unsigned page_idx;
|
||||
struct pagevec lru_pvec;
|
||||
int ret = 0;
|
||||
int ret;
|
||||
|
||||
if (mapping->a_ops->readpages) {
|
||||
ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages);
|
||||
@ -171,14 +171,17 @@ static int read_pages(struct address_space *mapping, struct file *filp,
|
||||
list_del(&page->lru);
|
||||
if (!add_to_page_cache(page, mapping,
|
||||
page->index, GFP_KERNEL)) {
|
||||
mapping->a_ops->readpage(filp, page);
|
||||
if (!pagevec_add(&lru_pvec, page))
|
||||
__pagevec_lru_add(&lru_pvec);
|
||||
} else {
|
||||
page_cache_release(page);
|
||||
ret = mapping->a_ops->readpage(filp, page);
|
||||
if (ret != AOP_TRUNCATED_PAGE) {
|
||||
if (!pagevec_add(&lru_pvec, page))
|
||||
__pagevec_lru_add(&lru_pvec);
|
||||
continue;
|
||||
} /* else fall through to release */
|
||||
}
|
||||
page_cache_release(page);
|
||||
}
|
||||
pagevec_lru_add(&lru_pvec);
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
@ -855,7 +855,7 @@ unlock:
|
||||
swap_free(swap);
|
||||
redirty:
|
||||
set_page_dirty(page);
|
||||
return WRITEPAGE_ACTIVATE; /* Return with the page locked */
|
||||
return AOP_WRITEPAGE_ACTIVATE; /* Return with the page locked */
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
|
@ -367,7 +367,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping)
|
||||
res = mapping->a_ops->writepage(page, &wbc);
|
||||
if (res < 0)
|
||||
handle_write_error(mapping, page, res);
|
||||
if (res == WRITEPAGE_ACTIVATE) {
|
||||
if (res == AOP_WRITEPAGE_ACTIVATE) {
|
||||
ClearPageReclaim(page);
|
||||
return PAGE_ACTIVATE;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user