forked from Minki/linux
mm: lockless pagecache
Combine page_cache_get_speculative with lockless radix tree lookups to introduce lockless page cache lookups (ie. no mapping->tree_lock on the read-side). The only atomicity changes this introduces is that the gang pagecache lookup functions now behave as if they are implemented with multiple find_get_page calls, rather than operating on a snapshot of the pages. In practice, this atomicity guarantee is not used anyway, and it is to replace individual lookups, so these semantics are natural. Signed-off-by: Nick Piggin <npiggin@suse.de> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Hugh Dickins <hugh@veritas.com> Cc: "Paul E. McKenney" <paulmck@us.ibm.com> Reviewed-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
e286781d5f
commit
a60637c858
179
mm/filemap.c
179
mm/filemap.c
@ -637,15 +637,35 @@ void __lock_page_nosync(struct page *page)
|
||||
* Is there a pagecache struct page at the given (mapping, offset) tuple?
|
||||
* If yes, increment its refcount and return it; if no, return NULL.
|
||||
*/
|
||||
struct page * find_get_page(struct address_space *mapping, pgoff_t offset)
|
||||
struct page *find_get_page(struct address_space *mapping, pgoff_t offset)
|
||||
{
|
||||
void **pagep;
|
||||
struct page *page;
|
||||
|
||||
read_lock_irq(&mapping->tree_lock);
|
||||
page = radix_tree_lookup(&mapping->page_tree, offset);
|
||||
if (page)
|
||||
page_cache_get(page);
|
||||
read_unlock_irq(&mapping->tree_lock);
|
||||
rcu_read_lock();
|
||||
repeat:
|
||||
page = NULL;
|
||||
pagep = radix_tree_lookup_slot(&mapping->page_tree, offset);
|
||||
if (pagep) {
|
||||
page = radix_tree_deref_slot(pagep);
|
||||
if (unlikely(!page || page == RADIX_TREE_RETRY))
|
||||
goto repeat;
|
||||
|
||||
if (!page_cache_get_speculative(page))
|
||||
goto repeat;
|
||||
|
||||
/*
|
||||
* Has the page moved?
|
||||
* This is part of the lockless pagecache protocol. See
|
||||
* include/linux/pagemap.h for details.
|
||||
*/
|
||||
if (unlikely(page != *pagep)) {
|
||||
page_cache_release(page);
|
||||
goto repeat;
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
return page;
|
||||
}
|
||||
EXPORT_SYMBOL(find_get_page);
|
||||
@ -660,32 +680,22 @@ EXPORT_SYMBOL(find_get_page);
|
||||
*
|
||||
* Returns zero if the page was not present. find_lock_page() may sleep.
|
||||
*/
|
||||
struct page *find_lock_page(struct address_space *mapping,
|
||||
pgoff_t offset)
|
||||
struct page *find_lock_page(struct address_space *mapping, pgoff_t offset)
|
||||
{
|
||||
struct page *page;
|
||||
|
||||
repeat:
|
||||
read_lock_irq(&mapping->tree_lock);
|
||||
page = radix_tree_lookup(&mapping->page_tree, offset);
|
||||
page = find_get_page(mapping, offset);
|
||||
if (page) {
|
||||
page_cache_get(page);
|
||||
if (TestSetPageLocked(page)) {
|
||||
read_unlock_irq(&mapping->tree_lock);
|
||||
__lock_page(page);
|
||||
|
||||
/* Has the page been truncated while we slept? */
|
||||
if (unlikely(page->mapping != mapping)) {
|
||||
unlock_page(page);
|
||||
page_cache_release(page);
|
||||
goto repeat;
|
||||
}
|
||||
VM_BUG_ON(page->index != offset);
|
||||
goto out;
|
||||
lock_page(page);
|
||||
/* Has the page been truncated? */
|
||||
if (unlikely(page->mapping != mapping)) {
|
||||
unlock_page(page);
|
||||
page_cache_release(page);
|
||||
goto repeat;
|
||||
}
|
||||
VM_BUG_ON(page->index != offset);
|
||||
}
|
||||
read_unlock_irq(&mapping->tree_lock);
|
||||
out:
|
||||
return page;
|
||||
}
|
||||
EXPORT_SYMBOL(find_lock_page);
|
||||
@ -751,13 +761,39 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
|
||||
{
|
||||
unsigned int i;
|
||||
unsigned int ret;
|
||||
unsigned int nr_found;
|
||||
|
||||
read_lock_irq(&mapping->tree_lock);
|
||||
ret = radix_tree_gang_lookup(&mapping->page_tree,
|
||||
(void **)pages, start, nr_pages);
|
||||
for (i = 0; i < ret; i++)
|
||||
page_cache_get(pages[i]);
|
||||
read_unlock_irq(&mapping->tree_lock);
|
||||
rcu_read_lock();
|
||||
restart:
|
||||
nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
|
||||
(void ***)pages, start, nr_pages);
|
||||
ret = 0;
|
||||
for (i = 0; i < nr_found; i++) {
|
||||
struct page *page;
|
||||
repeat:
|
||||
page = radix_tree_deref_slot((void **)pages[i]);
|
||||
if (unlikely(!page))
|
||||
continue;
|
||||
/*
|
||||
* this can only trigger if nr_found == 1, making livelock
|
||||
* a non issue.
|
||||
*/
|
||||
if (unlikely(page == RADIX_TREE_RETRY))
|
||||
goto restart;
|
||||
|
||||
if (!page_cache_get_speculative(page))
|
||||
goto repeat;
|
||||
|
||||
/* Has the page moved? */
|
||||
if (unlikely(page != *((void **)pages[i]))) {
|
||||
page_cache_release(page);
|
||||
goto repeat;
|
||||
}
|
||||
|
||||
pages[ret] = page;
|
||||
ret++;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -778,19 +814,44 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
|
||||
{
|
||||
unsigned int i;
|
||||
unsigned int ret;
|
||||
unsigned int nr_found;
|
||||
|
||||
read_lock_irq(&mapping->tree_lock);
|
||||
ret = radix_tree_gang_lookup(&mapping->page_tree,
|
||||
(void **)pages, index, nr_pages);
|
||||
for (i = 0; i < ret; i++) {
|
||||
if (pages[i]->mapping == NULL || pages[i]->index != index)
|
||||
rcu_read_lock();
|
||||
restart:
|
||||
nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
|
||||
(void ***)pages, index, nr_pages);
|
||||
ret = 0;
|
||||
for (i = 0; i < nr_found; i++) {
|
||||
struct page *page;
|
||||
repeat:
|
||||
page = radix_tree_deref_slot((void **)pages[i]);
|
||||
if (unlikely(!page))
|
||||
continue;
|
||||
/*
|
||||
* this can only trigger if nr_found == 1, making livelock
|
||||
* a non issue.
|
||||
*/
|
||||
if (unlikely(page == RADIX_TREE_RETRY))
|
||||
goto restart;
|
||||
|
||||
if (page->mapping == NULL || page->index != index)
|
||||
break;
|
||||
|
||||
page_cache_get(pages[i]);
|
||||
if (!page_cache_get_speculative(page))
|
||||
goto repeat;
|
||||
|
||||
/* Has the page moved? */
|
||||
if (unlikely(page != *((void **)pages[i]))) {
|
||||
page_cache_release(page);
|
||||
goto repeat;
|
||||
}
|
||||
|
||||
pages[ret] = page;
|
||||
ret++;
|
||||
index++;
|
||||
}
|
||||
read_unlock_irq(&mapping->tree_lock);
|
||||
return i;
|
||||
rcu_read_unlock();
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(find_get_pages_contig);
|
||||
|
||||
@ -810,15 +871,43 @@ unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
|
||||
{
|
||||
unsigned int i;
|
||||
unsigned int ret;
|
||||
unsigned int nr_found;
|
||||
|
||||
rcu_read_lock();
|
||||
restart:
|
||||
nr_found = radix_tree_gang_lookup_tag_slot(&mapping->page_tree,
|
||||
(void ***)pages, *index, nr_pages, tag);
|
||||
ret = 0;
|
||||
for (i = 0; i < nr_found; i++) {
|
||||
struct page *page;
|
||||
repeat:
|
||||
page = radix_tree_deref_slot((void **)pages[i]);
|
||||
if (unlikely(!page))
|
||||
continue;
|
||||
/*
|
||||
* this can only trigger if nr_found == 1, making livelock
|
||||
* a non issue.
|
||||
*/
|
||||
if (unlikely(page == RADIX_TREE_RETRY))
|
||||
goto restart;
|
||||
|
||||
if (!page_cache_get_speculative(page))
|
||||
goto repeat;
|
||||
|
||||
/* Has the page moved? */
|
||||
if (unlikely(page != *((void **)pages[i]))) {
|
||||
page_cache_release(page);
|
||||
goto repeat;
|
||||
}
|
||||
|
||||
pages[ret] = page;
|
||||
ret++;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
read_lock_irq(&mapping->tree_lock);
|
||||
ret = radix_tree_gang_lookup_tag(&mapping->page_tree,
|
||||
(void **)pages, *index, nr_pages, tag);
|
||||
for (i = 0; i < ret; i++)
|
||||
page_cache_get(pages[i]);
|
||||
if (ret)
|
||||
*index = pages[ret - 1]->index + 1;
|
||||
read_unlock_irq(&mapping->tree_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(find_get_pages_tag);
|
||||
|
Loading…
Reference in New Issue
Block a user