mirror of
https://github.com/torvalds/linux.git
synced 2024-12-26 12:52:30 +00:00
09cbfeaf1a
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time ago with promise that one day it will be possible to implement page cache with bigger chunks than PAGE_SIZE. This promise never materialized. And unlikely will. We have many places where PAGE_CACHE_SIZE assumed to be equal to PAGE_SIZE. And it's constant source of confusion on whether PAGE_CACHE_* or PAGE_* constant should be used in a particular case, especially on the border between fs and mm. Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much breakage to be doable. Let's stop pretending that pages in page cache are special. They are not. The changes are pretty straight-forward: - <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>; - <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>; - PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN}; - page_cache_get() -> get_page(); - page_cache_release() -> put_page(); This patch contains automated changes generated with coccinelle using script below. For some reason, coccinelle doesn't patch header files. I've called spatch for them manually. The only adjustment after coccinelle is revert of changes to PAGE_CAHCE_ALIGN definition: we are going to drop it later. There are few places in the code where coccinelle didn't reach. I'll fix them manually in a separate patch. Comments and documentation also will be addressed with the separate patch. virtual patch @@ expression E; @@ - E << (PAGE_CACHE_SHIFT - PAGE_SHIFT) + E @@ expression E; @@ - E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) + E @@ @@ - PAGE_CACHE_SHIFT + PAGE_SHIFT @@ @@ - PAGE_CACHE_SIZE + PAGE_SIZE @@ @@ - PAGE_CACHE_MASK + PAGE_MASK @@ expression E; @@ - PAGE_CACHE_ALIGN(E) + PAGE_ALIGN(E) @@ expression E; @@ - page_cache_get(E) + get_page(E) @@ expression E; @@ - page_cache_release(E) + put_page(E) Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Acked-by: Michal Hocko <mhocko@suse.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
163 lines
3.7 KiB
C
163 lines
3.7 KiB
C
/*
|
|
* mm/fadvise.c
|
|
*
|
|
* Copyright (C) 2002, Linus Torvalds
|
|
*
|
|
* 11Jan2003 Andrew Morton
|
|
* Initial version.
|
|
*/
|
|
|
|
#include <linux/kernel.h>
|
|
#include <linux/file.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/pagemap.h>
|
|
#include <linux/backing-dev.h>
|
|
#include <linux/pagevec.h>
|
|
#include <linux/fadvise.h>
|
|
#include <linux/writeback.h>
|
|
#include <linux/syscalls.h>
|
|
#include <linux/swap.h>
|
|
|
|
#include <asm/unistd.h>
|
|
|
|
/*
|
|
* POSIX_FADV_WILLNEED could set PG_Referenced, and POSIX_FADV_NOREUSE could
|
|
* deactivate the pages and clear PG_Referenced.
|
|
*/
|
|
SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice)
|
|
{
|
|
struct fd f = fdget(fd);
|
|
struct inode *inode;
|
|
struct address_space *mapping;
|
|
struct backing_dev_info *bdi;
|
|
loff_t endbyte; /* inclusive */
|
|
pgoff_t start_index;
|
|
pgoff_t end_index;
|
|
unsigned long nrpages;
|
|
int ret = 0;
|
|
|
|
if (!f.file)
|
|
return -EBADF;
|
|
|
|
inode = file_inode(f.file);
|
|
if (S_ISFIFO(inode->i_mode)) {
|
|
ret = -ESPIPE;
|
|
goto out;
|
|
}
|
|
|
|
mapping = f.file->f_mapping;
|
|
if (!mapping || len < 0) {
|
|
ret = -EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
if (IS_DAX(inode)) {
|
|
switch (advice) {
|
|
case POSIX_FADV_NORMAL:
|
|
case POSIX_FADV_RANDOM:
|
|
case POSIX_FADV_SEQUENTIAL:
|
|
case POSIX_FADV_WILLNEED:
|
|
case POSIX_FADV_NOREUSE:
|
|
case POSIX_FADV_DONTNEED:
|
|
/* no bad return value, but ignore advice */
|
|
break;
|
|
default:
|
|
ret = -EINVAL;
|
|
}
|
|
goto out;
|
|
}
|
|
|
|
/* Careful about overflows. Len == 0 means "as much as possible" */
|
|
endbyte = offset + len;
|
|
if (!len || endbyte < len)
|
|
endbyte = -1;
|
|
else
|
|
endbyte--; /* inclusive */
|
|
|
|
bdi = inode_to_bdi(mapping->host);
|
|
|
|
switch (advice) {
|
|
case POSIX_FADV_NORMAL:
|
|
f.file->f_ra.ra_pages = bdi->ra_pages;
|
|
spin_lock(&f.file->f_lock);
|
|
f.file->f_mode &= ~FMODE_RANDOM;
|
|
spin_unlock(&f.file->f_lock);
|
|
break;
|
|
case POSIX_FADV_RANDOM:
|
|
spin_lock(&f.file->f_lock);
|
|
f.file->f_mode |= FMODE_RANDOM;
|
|
spin_unlock(&f.file->f_lock);
|
|
break;
|
|
case POSIX_FADV_SEQUENTIAL:
|
|
f.file->f_ra.ra_pages = bdi->ra_pages * 2;
|
|
spin_lock(&f.file->f_lock);
|
|
f.file->f_mode &= ~FMODE_RANDOM;
|
|
spin_unlock(&f.file->f_lock);
|
|
break;
|
|
case POSIX_FADV_WILLNEED:
|
|
/* First and last PARTIAL page! */
|
|
start_index = offset >> PAGE_SHIFT;
|
|
end_index = endbyte >> PAGE_SHIFT;
|
|
|
|
/* Careful about overflow on the "+1" */
|
|
nrpages = end_index - start_index + 1;
|
|
if (!nrpages)
|
|
nrpages = ~0UL;
|
|
|
|
/*
|
|
* Ignore return value because fadvise() shall return
|
|
* success even if filesystem can't retrieve a hint,
|
|
*/
|
|
force_page_cache_readahead(mapping, f.file, start_index,
|
|
nrpages);
|
|
break;
|
|
case POSIX_FADV_NOREUSE:
|
|
break;
|
|
case POSIX_FADV_DONTNEED:
|
|
if (!inode_write_congested(mapping->host))
|
|
__filemap_fdatawrite_range(mapping, offset, endbyte,
|
|
WB_SYNC_NONE);
|
|
|
|
/*
|
|
* First and last FULL page! Partial pages are deliberately
|
|
* preserved on the expectation that it is better to preserve
|
|
* needed memory than to discard unneeded memory.
|
|
*/
|
|
start_index = (offset+(PAGE_SIZE-1)) >> PAGE_SHIFT;
|
|
end_index = (endbyte >> PAGE_SHIFT);
|
|
|
|
if (end_index >= start_index) {
|
|
unsigned long count = invalidate_mapping_pages(mapping,
|
|
start_index, end_index);
|
|
|
|
/*
|
|
* If fewer pages were invalidated than expected then
|
|
* it is possible that some of the pages were on
|
|
* a per-cpu pagevec for a remote CPU. Drain all
|
|
* pagevecs and try again.
|
|
*/
|
|
if (count < (end_index - start_index + 1)) {
|
|
lru_add_drain_all();
|
|
invalidate_mapping_pages(mapping, start_index,
|
|
end_index);
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
ret = -EINVAL;
|
|
}
|
|
out:
|
|
fdput(f);
|
|
return ret;
|
|
}
|
|
|
|
#ifdef __ARCH_WANT_SYS_FADVISE64
|
|
|
|
SYSCALL_DEFINE4(fadvise64, int, fd, loff_t, offset, size_t, len, int, advice)
|
|
{
|
|
return sys_fadvise64_64(fd, offset, len, advice);
|
|
}
|
|
|
|
#endif
|