linux/fs/nfs/nfs4super.c

362 lines
8.7 KiB
C
Raw Normal View History

/*
* Copyright (c) 2012 Bryan Schumaker <bjschuma@netapp.com>
*/
#include <linux/init.h>
#include <linux/module.h>
#include <linux/nfs4_mount.h>
#include <linux/nfs_fs.h>
#include "delegation.h"
#include "internal.h"
#include "nfs4_fs.h"
#include "nfs4idmap.h"
#include "dns_resolve.h"
#include "pnfs.h"
#include "nfs.h"
#define NFSDBG_FACILITY NFSDBG_VFS
static int nfs4_write_inode(struct inode *inode, struct writeback_control *wbc);
static void nfs4_evict_inode(struct inode *inode);
static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *raw_data);
static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *raw_data);
static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *raw_data);
static struct file_system_type nfs4_remote_fs_type = {
.owner = THIS_MODULE,
.name = "nfs4",
.mount = nfs4_remote_mount,
.kill_sb = nfs_kill_super,
vfs: kill FS_REVAL_DOT by adding a d_weak_revalidate dentry op The following set of operations on a NFS client and server will cause server# mkdir a client# cd a server# mv a a.bak client# sleep 30 # (or whatever the dir attrcache timeout is) client# stat . stat: cannot stat `.': Stale NFS file handle Obviously, we should not be getting an ESTALE error back there since the inode still exists on the server. The problem is that the lookup code will call d_revalidate on the dentry that "." refers to, because NFS has FS_REVAL_DOT set. nfs_lookup_revalidate will see that the parent directory has changed and will try to reverify the dentry by redoing a LOOKUP. That of course fails, so the lookup code returns ESTALE. The problem here is that d_revalidate is really a bad fit for this case. What we really want to know at this point is whether the inode is still good or not, but we don't really care what name it goes by or whether the dcache is still valid. Add a new d_op->d_weak_revalidate operation and have complete_walk call that instead of d_revalidate. The intent there is to allow for a "weaker" d_revalidate that just checks to see whether the inode is still good. This is also gives us an opportunity to kill off the FS_REVAL_DOT special casing. [AV: changed method name, added note in porting, fixed confusion re having it possibly called from RCU mode (it won't be)] Cc: NeilBrown <neilb@suse.de> Signed-off-by: Jeff Layton <jlayton@redhat.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2013-02-20 16:19:05 +00:00
.fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
};
static struct file_system_type nfs4_remote_referral_fs_type = {
.owner = THIS_MODULE,
.name = "nfs4",
.mount = nfs4_remote_referral_mount,
.kill_sb = nfs_kill_super,
vfs: kill FS_REVAL_DOT by adding a d_weak_revalidate dentry op The following set of operations on a NFS client and server will cause server# mkdir a client# cd a server# mv a a.bak client# sleep 30 # (or whatever the dir attrcache timeout is) client# stat . stat: cannot stat `.': Stale NFS file handle Obviously, we should not be getting an ESTALE error back there since the inode still exists on the server. The problem is that the lookup code will call d_revalidate on the dentry that "." refers to, because NFS has FS_REVAL_DOT set. nfs_lookup_revalidate will see that the parent directory has changed and will try to reverify the dentry by redoing a LOOKUP. That of course fails, so the lookup code returns ESTALE. The problem here is that d_revalidate is really a bad fit for this case. What we really want to know at this point is whether the inode is still good or not, but we don't really care what name it goes by or whether the dcache is still valid. Add a new d_op->d_weak_revalidate operation and have complete_walk call that instead of d_revalidate. The intent there is to allow for a "weaker" d_revalidate that just checks to see whether the inode is still good. This is also gives us an opportunity to kill off the FS_REVAL_DOT special casing. [AV: changed method name, added note in porting, fixed confusion re having it possibly called from RCU mode (it won't be)] Cc: NeilBrown <neilb@suse.de> Signed-off-by: Jeff Layton <jlayton@redhat.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2013-02-20 16:19:05 +00:00
.fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
};
struct file_system_type nfs4_referral_fs_type = {
.owner = THIS_MODULE,
.name = "nfs4",
.mount = nfs4_referral_mount,
.kill_sb = nfs_kill_super,
vfs: kill FS_REVAL_DOT by adding a d_weak_revalidate dentry op The following set of operations on a NFS client and server will cause server# mkdir a client# cd a server# mv a a.bak client# sleep 30 # (or whatever the dir attrcache timeout is) client# stat . stat: cannot stat `.': Stale NFS file handle Obviously, we should not be getting an ESTALE error back there since the inode still exists on the server. The problem is that the lookup code will call d_revalidate on the dentry that "." refers to, because NFS has FS_REVAL_DOT set. nfs_lookup_revalidate will see that the parent directory has changed and will try to reverify the dentry by redoing a LOOKUP. That of course fails, so the lookup code returns ESTALE. The problem here is that d_revalidate is really a bad fit for this case. What we really want to know at this point is whether the inode is still good or not, but we don't really care what name it goes by or whether the dcache is still valid. Add a new d_op->d_weak_revalidate operation and have complete_walk call that instead of d_revalidate. The intent there is to allow for a "weaker" d_revalidate that just checks to see whether the inode is still good. This is also gives us an opportunity to kill off the FS_REVAL_DOT special casing. [AV: changed method name, added note in porting, fixed confusion re having it possibly called from RCU mode (it won't be)] Cc: NeilBrown <neilb@suse.de> Signed-off-by: Jeff Layton <jlayton@redhat.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2013-02-20 16:19:05 +00:00
.fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
};
static const struct super_operations nfs4_sops = {
.alloc_inode = nfs_alloc_inode,
.destroy_inode = nfs_destroy_inode,
.write_inode = nfs4_write_inode,
.drop_inode = nfs_drop_inode,
.statfs = nfs_statfs,
.evict_inode = nfs4_evict_inode,
.umount_begin = nfs_umount_begin,
.show_options = nfs_show_options,
.show_devname = nfs_show_devname,
.show_path = nfs_show_path,
.show_stats = nfs_show_stats,
.remount_fs = nfs_remount,
};
struct nfs_subversion nfs_v4 = {
.owner = THIS_MODULE,
.nfs_fs = &nfs4_fs_type,
.rpc_vers = &nfs_version4,
.rpc_ops = &nfs_v4_clientops,
.sops = &nfs4_sops,
.xattr = nfs4_xattr_handlers,
};
static int nfs4_write_inode(struct inode *inode, struct writeback_control *wbc)
{
int ret = nfs_write_inode(inode, wbc);
if (ret == 0)
ret = pnfs_layoutcommit_inode(inode,
wbc->sync_mode == WB_SYNC_ALL);
return ret;
}
/*
* Clean out any remaining NFSv4 state that might be left over due
* to open() calls that passed nfs_atomic_lookup, but failed to call
* nfs_open().
*/
static void nfs4_evict_inode(struct inode *inode)
{
mm + fs: store shadow entries in page cache Reclaim will be leaving shadow entries in the page cache radix tree upon evicting the real page. As those pages are found from the LRU, an iput() can lead to the inode being freed concurrently. At this point, reclaim must no longer install shadow pages because the inode freeing code needs to ensure the page tree is really empty. Add an address_space flag, AS_EXITING, that the inode freeing code sets under the tree lock before doing the final truncate. Reclaim will check for this flag before installing shadow pages. Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Reviewed-by: Rik van Riel <riel@redhat.com> Reviewed-by: Minchan Kim <minchan@kernel.org> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Bob Liu <bob.liu@oracle.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: Dave Chinner <david@fromorbit.com> Cc: Greg Thelen <gthelen@google.com> Cc: Hugh Dickins <hughd@google.com> Cc: Jan Kara <jack@suse.cz> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Luigi Semenzato <semenzato@google.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Metin Doslu <metin@citusdata.com> Cc: Michel Lespinasse <walken@google.com> Cc: Ozgun Erdogan <ozgun@citusdata.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Roman Gushchin <klamm@yandex-team.ru> Cc: Ryan Mallon <rmallon@gmail.com> Cc: Tejun Heo <tj@kernel.org> Cc: Vlastimil Babka <vbabka@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-04-03 21:47:49 +00:00
truncate_inode_pages_final(&inode->i_data);
clear_inode(inode);
/* If we are holding a delegation, return it! */
nfs_inode_return_delegation_noreclaim(inode);
/* Note that above delegreturn would trigger pnfs return-on-close */
pnfs_return_layout(inode);
pnfs_destroy_layout(NFS_I(inode));
/* First call standard NFS clear_inode() code */
nfs_clear_inode(inode);
}
/*
* Get the superblock for the NFS4 root partition
*/
static struct dentry *
nfs4_remote_mount(struct file_system_type *fs_type, int flags,
const char *dev_name, void *info)
{
struct nfs_mount_info *mount_info = info;
struct nfs_server *server;
struct dentry *mntroot = ERR_PTR(-ENOMEM);
mount_info->set_security = nfs_set_sb_security;
/* Get a volume representation */
server = nfs4_create_server(mount_info, &nfs_v4);
if (IS_ERR(server)) {
mntroot = ERR_CAST(server);
goto out;
}
mntroot = nfs_fs_mount_common(server, flags, dev_name, mount_info, &nfs_v4);
out:
return mntroot;
}
static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type,
int flags, void *data, const char *hostname)
{
struct vfsmount *root_mnt;
char *root_devname;
size_t len;
len = strlen(hostname) + 5;
root_devname = kmalloc(len, GFP_KERNEL);
if (root_devname == NULL)
return ERR_PTR(-ENOMEM);
/* Does hostname needs to be enclosed in brackets? */
if (strchr(hostname, ':'))
snprintf(root_devname, len, "[%s]:/", hostname);
else
snprintf(root_devname, len, "%s:/", hostname);
root_mnt = vfs_kern_mount(fs_type, flags, root_devname, data);
kfree(root_devname);
return root_mnt;
}
struct nfs_referral_count {
struct list_head list;
const struct task_struct *task;
unsigned int referral_count;
};
static LIST_HEAD(nfs_referral_count_list);
static DEFINE_SPINLOCK(nfs_referral_count_list_lock);
static struct nfs_referral_count *nfs_find_referral_count(void)
{
struct nfs_referral_count *p;
list_for_each_entry(p, &nfs_referral_count_list, list) {
if (p->task == current)
return p;
}
return NULL;
}
#define NFS_MAX_NESTED_REFERRALS 2
static int nfs_referral_loop_protect(void)
{
struct nfs_referral_count *p, *new;
int ret = -ENOMEM;
new = kmalloc(sizeof(*new), GFP_KERNEL);
if (!new)
goto out;
new->task = current;
new->referral_count = 1;
ret = 0;
spin_lock(&nfs_referral_count_list_lock);
p = nfs_find_referral_count();
if (p != NULL) {
if (p->referral_count >= NFS_MAX_NESTED_REFERRALS)
ret = -ELOOP;
else
p->referral_count++;
} else {
list_add(&new->list, &nfs_referral_count_list);
new = NULL;
}
spin_unlock(&nfs_referral_count_list_lock);
kfree(new);
out:
return ret;
}
static void nfs_referral_loop_unprotect(void)
{
struct nfs_referral_count *p;
spin_lock(&nfs_referral_count_list_lock);
p = nfs_find_referral_count();
p->referral_count--;
if (p->referral_count == 0)
list_del(&p->list);
else
p = NULL;
spin_unlock(&nfs_referral_count_list_lock);
kfree(p);
}
static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt,
const char *export_path)
{
struct dentry *dentry;
int err;
if (IS_ERR(root_mnt))
return ERR_CAST(root_mnt);
err = nfs_referral_loop_protect();
if (err) {
mntput(root_mnt);
return ERR_PTR(err);
}
dentry = mount_subtree(root_mnt, export_path);
nfs_referral_loop_unprotect();
return dentry;
}
struct dentry *nfs4_try_mount(int flags, const char *dev_name,
struct nfs_mount_info *mount_info,
struct nfs_subversion *nfs_mod)
{
char *export_path;
struct vfsmount *root_mnt;
struct dentry *res;
struct nfs_parsed_mount_data *data = mount_info->parsed;
dfprintk(MOUNT, "--> nfs4_try_mount()\n");
export_path = data->nfs_server.export_path;
data->nfs_server.export_path = "/";
root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, mount_info,
data->nfs_server.hostname);
data->nfs_server.export_path = export_path;
res = nfs_follow_remote_path(root_mnt, export_path);
dfprintk(MOUNT, "<-- nfs4_try_mount() = %d%s\n",
PTR_ERR_OR_ZERO(res),
IS_ERR(res) ? " [error]" : "");
return res;
}
static struct dentry *
nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags,
const char *dev_name, void *raw_data)
{
struct nfs_mount_info mount_info = {
.fill_super = nfs_fill_super,
.set_security = nfs_clone_sb_security,
.cloned = raw_data,
};
struct nfs_server *server;
struct dentry *mntroot = ERR_PTR(-ENOMEM);
dprintk("--> nfs4_referral_get_sb()\n");
mount_info.mntfh = nfs_alloc_fhandle();
if (mount_info.cloned == NULL || mount_info.mntfh == NULL)
goto out;
/* create a new volume representation */
server = nfs4_create_referral_server(mount_info.cloned, mount_info.mntfh);
if (IS_ERR(server)) {
mntroot = ERR_CAST(server);
goto out;
}
mntroot = nfs_fs_mount_common(server, flags, dev_name, &mount_info, &nfs_v4);
out:
nfs_free_fhandle(mount_info.mntfh);
return mntroot;
}
/*
* Create an NFS4 server record on referral traversal
*/
static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *raw_data)
{
struct nfs_clone_mount *data = raw_data;
char *export_path;
struct vfsmount *root_mnt;
struct dentry *res;
dprintk("--> nfs4_referral_mount()\n");
export_path = data->mnt_path;
data->mnt_path = "/";
root_mnt = nfs_do_root_mount(&nfs4_remote_referral_fs_type,
flags, data, data->hostname);
data->mnt_path = export_path;
res = nfs_follow_remote_path(root_mnt, export_path);
dprintk("<-- nfs4_referral_mount() = %d%s\n",
PTR_ERR_OR_ZERO(res),
IS_ERR(res) ? " [error]" : "");
return res;
}
static int __init init_nfs_v4(void)
{
int err;
err = nfs_dns_resolver_init();
if (err)
goto out;
err = nfs_idmap_init();
if (err)
goto out1;
err = nfs4_register_sysctl();
if (err)
goto out2;
register_nfs_version(&nfs_v4);
return 0;
out2:
nfs_idmap_quit();
out1:
nfs_dns_resolver_destroy();
out:
return err;
}
static void __exit exit_nfs_v4(void)
{
/* Not called in the _init(), conditionally loaded */
nfs4_pnfs_v3_ds_connect_unload();
unregister_nfs_version(&nfs_v4);
nfs4_unregister_sysctl();
nfs_idmap_quit();
nfs_dns_resolver_destroy();
}
MODULE_LICENSE("GPL");
module_init(init_nfs_v4);
module_exit(exit_nfs_v4);