Merge git://git.linux-nfs.org/pub/linux/nfs-2.6
* git://git.linux-nfs.org/pub/linux/nfs-2.6: (131 commits) NFSv4: Fix a typo in nfs_inode_reclaim_delegation NFS: Add a boot parameter to disable 64 bit inode numbers NFS: nfs_refresh_inode should clear cache_validity flags on success NFS: Fix a connectathon regression in NFSv3 and NFSv4 NFS: Use nfs_refresh_inode() in ops that aren't expected to change the inode SUNRPC: Don't call xprt_release in call refresh SUNRPC: Don't call xprt_release() if call_allocate fails SUNRPC: Fix buggy UDP transmission [23/37] Clean up duplicate includes in [2.6 patch] net/sunrpc/rpcb_clnt.c: make struct rpcb_program static SUNRPC: Use correct type in buffer length calculations SUNRPC: Fix default hostname created in rpc_create() nfs: add server port to rpc_pipe info file NFS: Get rid of some obsolete macros NFS: Simplify filehandle revalidation NFS: Ensure that nfs_link() returns a hashed dentry NFS: Be strict about dentry revalidation when doing exclusive create NFS: Don't zap the readdir caches upon error NFS: Remove the redundant nfs_reval_fsid() NFSv3: Always use directory post-op attributes in nfs3_proc_lookup ... Fix up trivial conflict due to sock_owned_by_user() cleanup manually in net/sunrpc/xprtsock.c
This commit is contained in:
commit
f4921aff5b
@ -1083,6 +1083,13 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
[NFS] set the maximum lifetime for idmapper cache
|
||||
entries.
|
||||
|
||||
nfs.enable_ino64=
|
||||
[NFS] enable 64-bit inode numbers.
|
||||
If zero, the NFS client will fake up a 32-bit inode
|
||||
number for the readdir() and stat() syscalls instead
|
||||
of returning the full 64-bit number.
|
||||
The default is to return 64-bit inode numbers.
|
||||
|
||||
nmi_watchdog= [KNL,BUGS=X86-32] Debugging features for SMP kernels
|
||||
|
||||
no387 [BUGS=X86-32] Tells the kernel to use the 387 maths
|
||||
|
@ -1755,6 +1755,14 @@ config SUNRPC
|
||||
config SUNRPC_GSS
|
||||
tristate
|
||||
|
||||
config SUNRPC_XPRT_RDMA
|
||||
tristate "RDMA transport for sunrpc (EXPERIMENTAL)"
|
||||
depends on SUNRPC && INFINIBAND && EXPERIMENTAL
|
||||
default m
|
||||
help
|
||||
Adds a client RPC transport for supporting kernel NFS over RDMA
|
||||
mounts, including Infiniband and iWARP. Experimental.
|
||||
|
||||
config SUNRPC_BIND34
|
||||
bool "Support for rpcbind versions 3 & 4 (EXPERIMENTAL)"
|
||||
depends on SUNRPC && EXPERIMENTAL
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <linux/utsname.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/sunrpc/clnt.h>
|
||||
#include <linux/sunrpc/xprtsock.h>
|
||||
#include <linux/sunrpc/svc.h>
|
||||
#include <linux/lockd/lockd.h>
|
||||
#include <linux/lockd/sm_inter.h>
|
||||
@ -132,7 +133,7 @@ nsm_create(void)
|
||||
.sin_port = 0,
|
||||
};
|
||||
struct rpc_create_args args = {
|
||||
.protocol = IPPROTO_UDP,
|
||||
.protocol = XPRT_TRANSPORT_UDP,
|
||||
.address = (struct sockaddr *)&sin,
|
||||
.addrsize = sizeof(sin),
|
||||
.servername = "localhost",
|
||||
|
@ -62,8 +62,9 @@ static __be32 *nlm_decode_cookie(__be32 *p, struct nlm_cookie *c)
|
||||
}
|
||||
else
|
||||
{
|
||||
printk(KERN_NOTICE
|
||||
"lockd: bad cookie size %d (only cookies under %d bytes are supported.)\n", len, NLM_MAXCOOKIELEN);
|
||||
dprintk("lockd: bad cookie size %d (only cookies under "
|
||||
"%d bytes are supported.)\n",
|
||||
len, NLM_MAXCOOKIELEN);
|
||||
return NULL;
|
||||
}
|
||||
return p;
|
||||
@ -84,8 +85,7 @@ nlm_decode_fh(__be32 *p, struct nfs_fh *f)
|
||||
unsigned int len;
|
||||
|
||||
if ((len = ntohl(*p++)) != NFS2_FHSIZE) {
|
||||
printk(KERN_NOTICE
|
||||
"lockd: bad fhandle size %d (should be %d)\n",
|
||||
dprintk("lockd: bad fhandle size %d (should be %d)\n",
|
||||
len, NFS2_FHSIZE);
|
||||
return NULL;
|
||||
}
|
||||
|
@ -64,8 +64,9 @@ nlm4_decode_cookie(__be32 *p, struct nlm_cookie *c)
|
||||
}
|
||||
else
|
||||
{
|
||||
printk(KERN_NOTICE
|
||||
"lockd: bad cookie size %d (only cookies under %d bytes are supported.)\n", len, NLM_MAXCOOKIELEN);
|
||||
dprintk("lockd: bad cookie size %d (only cookies under "
|
||||
"%d bytes are supported.)\n",
|
||||
len, NLM_MAXCOOKIELEN);
|
||||
return NULL;
|
||||
}
|
||||
return p;
|
||||
@ -86,8 +87,7 @@ nlm4_decode_fh(__be32 *p, struct nfs_fh *f)
|
||||
memset(f->data, 0, sizeof(f->data));
|
||||
f->size = ntohl(*p++);
|
||||
if (f->size > NFS_MAXFHSIZE) {
|
||||
printk(KERN_NOTICE
|
||||
"lockd: bad fhandle size %d (should be <=%d)\n",
|
||||
dprintk("lockd: bad fhandle size %d (should be <=%d)\n",
|
||||
f->size, NFS_MAXFHSIZE);
|
||||
return NULL;
|
||||
}
|
||||
|
@ -16,4 +16,3 @@ nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
|
||||
nfs4namespace.o
|
||||
nfs-$(CONFIG_NFS_DIRECTIO) += direct.o
|
||||
nfs-$(CONFIG_SYSCTL) += sysctl.o
|
||||
nfs-objs := $(nfs-y)
|
||||
|
@ -23,6 +23,8 @@
|
||||
#include <linux/sunrpc/clnt.h>
|
||||
#include <linux/sunrpc/stats.h>
|
||||
#include <linux/sunrpc/metrics.h>
|
||||
#include <linux/sunrpc/xprtsock.h>
|
||||
#include <linux/sunrpc/xprtrdma.h>
|
||||
#include <linux/nfs_fs.h>
|
||||
#include <linux/nfs_mount.h>
|
||||
#include <linux/nfs4_mount.h>
|
||||
@ -340,7 +342,8 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
|
||||
to->to_retries = 2;
|
||||
|
||||
switch (proto) {
|
||||
case IPPROTO_TCP:
|
||||
case XPRT_TRANSPORT_TCP:
|
||||
case XPRT_TRANSPORT_RDMA:
|
||||
if (!to->to_initval)
|
||||
to->to_initval = 60 * HZ;
|
||||
if (to->to_initval > NFS_MAX_TCP_TIMEOUT)
|
||||
@ -349,7 +352,7 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
|
||||
to->to_maxval = to->to_initval + (to->to_increment * to->to_retries);
|
||||
to->to_exponential = 0;
|
||||
break;
|
||||
case IPPROTO_UDP:
|
||||
case XPRT_TRANSPORT_UDP:
|
||||
default:
|
||||
if (!to->to_initval)
|
||||
to->to_initval = 11 * HZ / 10;
|
||||
@ -501,9 +504,9 @@ static int nfs_init_server_rpcclient(struct nfs_server *server, rpc_authflavor_t
|
||||
/*
|
||||
* Initialise an NFS2 or NFS3 client
|
||||
*/
|
||||
static int nfs_init_client(struct nfs_client *clp, const struct nfs_mount_data *data)
|
||||
static int nfs_init_client(struct nfs_client *clp,
|
||||
const struct nfs_parsed_mount_data *data)
|
||||
{
|
||||
int proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP;
|
||||
int error;
|
||||
|
||||
if (clp->cl_cons_state == NFS_CS_READY) {
|
||||
@ -522,8 +525,8 @@ static int nfs_init_client(struct nfs_client *clp, const struct nfs_mount_data *
|
||||
* Create a client RPC handle for doing FSSTAT with UNIX auth only
|
||||
* - RFC 2623, sec 2.3.2
|
||||
*/
|
||||
error = nfs_create_rpc_client(clp, proto, data->timeo, data->retrans,
|
||||
RPC_AUTH_UNIX, 0);
|
||||
error = nfs_create_rpc_client(clp, data->nfs_server.protocol,
|
||||
data->timeo, data->retrans, RPC_AUTH_UNIX, 0);
|
||||
if (error < 0)
|
||||
goto error;
|
||||
nfs_mark_client_ready(clp, NFS_CS_READY);
|
||||
@ -538,7 +541,8 @@ error:
|
||||
/*
|
||||
* Create a version 2 or 3 client
|
||||
*/
|
||||
static int nfs_init_server(struct nfs_server *server, const struct nfs_mount_data *data)
|
||||
static int nfs_init_server(struct nfs_server *server,
|
||||
const struct nfs_parsed_mount_data *data)
|
||||
{
|
||||
struct nfs_client *clp;
|
||||
int error, nfsvers = 2;
|
||||
@ -551,7 +555,8 @@ static int nfs_init_server(struct nfs_server *server, const struct nfs_mount_dat
|
||||
#endif
|
||||
|
||||
/* Allocate or find a client reference we can use */
|
||||
clp = nfs_get_client(data->hostname, &data->addr, nfsvers);
|
||||
clp = nfs_get_client(data->nfs_server.hostname,
|
||||
&data->nfs_server.address, nfsvers);
|
||||
if (IS_ERR(clp)) {
|
||||
dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp));
|
||||
return PTR_ERR(clp);
|
||||
@ -581,7 +586,7 @@ static int nfs_init_server(struct nfs_server *server, const struct nfs_mount_dat
|
||||
if (error < 0)
|
||||
goto error;
|
||||
|
||||
error = nfs_init_server_rpcclient(server, data->pseudoflavor);
|
||||
error = nfs_init_server_rpcclient(server, data->auth_flavors[0]);
|
||||
if (error < 0)
|
||||
goto error;
|
||||
|
||||
@ -760,7 +765,7 @@ void nfs_free_server(struct nfs_server *server)
|
||||
* Create a version 2 or 3 volume record
|
||||
* - keyed on server and FSID
|
||||
*/
|
||||
struct nfs_server *nfs_create_server(const struct nfs_mount_data *data,
|
||||
struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data,
|
||||
struct nfs_fh *mntfh)
|
||||
{
|
||||
struct nfs_server *server;
|
||||
@ -906,7 +911,7 @@ error:
|
||||
* Create a version 4 volume record
|
||||
*/
|
||||
static int nfs4_init_server(struct nfs_server *server,
|
||||
const struct nfs4_mount_data *data, rpc_authflavor_t authflavour)
|
||||
const struct nfs_parsed_mount_data *data)
|
||||
{
|
||||
int error;
|
||||
|
||||
@ -926,7 +931,7 @@ static int nfs4_init_server(struct nfs_server *server,
|
||||
server->acdirmin = data->acdirmin * HZ;
|
||||
server->acdirmax = data->acdirmax * HZ;
|
||||
|
||||
error = nfs_init_server_rpcclient(server, authflavour);
|
||||
error = nfs_init_server_rpcclient(server, data->auth_flavors[0]);
|
||||
|
||||
/* Done */
|
||||
dprintk("<-- nfs4_init_server() = %d\n", error);
|
||||
@ -937,12 +942,7 @@ static int nfs4_init_server(struct nfs_server *server,
|
||||
* Create a version 4 volume record
|
||||
* - keyed on server and FSID
|
||||
*/
|
||||
struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *data,
|
||||
const char *hostname,
|
||||
const struct sockaddr_in *addr,
|
||||
const char *mntpath,
|
||||
const char *ip_addr,
|
||||
rpc_authflavor_t authflavour,
|
||||
struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
|
||||
struct nfs_fh *mntfh)
|
||||
{
|
||||
struct nfs_fattr fattr;
|
||||
@ -956,13 +956,18 @@ struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *data,
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
/* Get a client record */
|
||||
error = nfs4_set_client(server, hostname, addr, ip_addr, authflavour,
|
||||
data->proto, data->timeo, data->retrans);
|
||||
error = nfs4_set_client(server,
|
||||
data->nfs_server.hostname,
|
||||
&data->nfs_server.address,
|
||||
data->client_address,
|
||||
data->auth_flavors[0],
|
||||
data->nfs_server.protocol,
|
||||
data->timeo, data->retrans);
|
||||
if (error < 0)
|
||||
goto error;
|
||||
|
||||
/* set up the general RPC client */
|
||||
error = nfs4_init_server(server, data, authflavour);
|
||||
error = nfs4_init_server(server, data);
|
||||
if (error < 0)
|
||||
goto error;
|
||||
|
||||
@ -971,7 +976,7 @@ struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *data,
|
||||
BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
|
||||
|
||||
/* Probe the root fh to retrieve its FSID */
|
||||
error = nfs4_path_walk(server, mntfh, mntpath);
|
||||
error = nfs4_path_walk(server, mntfh, data->nfs_server.export_path);
|
||||
if (error < 0)
|
||||
goto error;
|
||||
|
||||
|
@ -52,7 +52,7 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_
|
||||
for (fl = inode->i_flock; fl != 0; fl = fl->fl_next) {
|
||||
if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK)))
|
||||
continue;
|
||||
if ((struct nfs_open_context *)fl->fl_file->private_data != ctx)
|
||||
if (nfs_file_open_context(fl->fl_file) != ctx)
|
||||
continue;
|
||||
status = nfs4_lock_delegation_recall(state, fl);
|
||||
if (status >= 0)
|
||||
@ -109,6 +109,7 @@ again:
|
||||
void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
|
||||
{
|
||||
struct nfs_delegation *delegation = NFS_I(inode)->delegation;
|
||||
struct rpc_cred *oldcred;
|
||||
|
||||
if (delegation == NULL)
|
||||
return;
|
||||
@ -116,11 +117,12 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, st
|
||||
sizeof(delegation->stateid.data));
|
||||
delegation->type = res->delegation_type;
|
||||
delegation->maxsize = res->maxsize;
|
||||
put_rpccred(cred);
|
||||
oldcred = delegation->cred;
|
||||
delegation->cred = get_rpccred(cred);
|
||||
delegation->flags &= ~NFS_DELEGATION_NEED_RECLAIM;
|
||||
NFS_I(inode)->delegation_state = delegation->type;
|
||||
smp_wmb();
|
||||
put_rpccred(oldcred);
|
||||
}
|
||||
|
||||
/*
|
||||
|
263
fs/nfs/dir.c
263
fs/nfs/dir.c
@ -200,9 +200,6 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
|
||||
desc->timestamp = timestamp;
|
||||
desc->timestamp_valid = 1;
|
||||
SetPageUptodate(page);
|
||||
spin_lock(&inode->i_lock);
|
||||
NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME;
|
||||
spin_unlock(&inode->i_lock);
|
||||
/* Ensure consistent page alignment of the data.
|
||||
* Note: assumes we have exclusive access to this mapping either
|
||||
* through inode->i_mutex or some other mechanism.
|
||||
@ -214,9 +211,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
|
||||
unlock_page(page);
|
||||
return 0;
|
||||
error:
|
||||
SetPageError(page);
|
||||
unlock_page(page);
|
||||
nfs_zap_caches(inode);
|
||||
desc->error = error;
|
||||
return -EIO;
|
||||
}
|
||||
@ -407,7 +402,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
|
||||
struct file *file = desc->file;
|
||||
struct nfs_entry *entry = desc->entry;
|
||||
struct dentry *dentry = NULL;
|
||||
unsigned long fileid;
|
||||
u64 fileid;
|
||||
int loop_count = 0,
|
||||
res;
|
||||
|
||||
@ -418,7 +413,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
|
||||
unsigned d_type = DT_UNKNOWN;
|
||||
/* Note: entry->prev_cookie contains the cookie for
|
||||
* retrieving the current dirent on the server */
|
||||
fileid = nfs_fileid_to_ino_t(entry->ino);
|
||||
fileid = entry->ino;
|
||||
|
||||
/* Get a dentry if we have one */
|
||||
if (dentry != NULL)
|
||||
@ -428,11 +423,12 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
|
||||
/* Use readdirplus info */
|
||||
if (dentry != NULL && dentry->d_inode != NULL) {
|
||||
d_type = dt_type(dentry->d_inode);
|
||||
fileid = dentry->d_inode->i_ino;
|
||||
fileid = NFS_FILEID(dentry->d_inode);
|
||||
}
|
||||
|
||||
res = filldir(dirent, entry->name, entry->len,
|
||||
file->f_pos, fileid, d_type);
|
||||
file->f_pos, nfs_compat_user_ino64(fileid),
|
||||
d_type);
|
||||
if (res < 0)
|
||||
break;
|
||||
file->f_pos++;
|
||||
@ -490,9 +486,6 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
|
||||
page,
|
||||
NFS_SERVER(inode)->dtsize,
|
||||
desc->plus);
|
||||
spin_lock(&inode->i_lock);
|
||||
NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME;
|
||||
spin_unlock(&inode->i_lock);
|
||||
desc->page = page;
|
||||
desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */
|
||||
if (desc->error >= 0) {
|
||||
@ -558,7 +551,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
|
||||
memset(desc, 0, sizeof(*desc));
|
||||
|
||||
desc->file = filp;
|
||||
desc->dir_cookie = &((struct nfs_open_context *)filp->private_data)->dir_cookie;
|
||||
desc->dir_cookie = &nfs_file_open_context(filp)->dir_cookie;
|
||||
desc->decode = NFS_PROTO(inode)->decode_dirent;
|
||||
desc->plus = NFS_USE_READDIRPLUS(inode);
|
||||
|
||||
@ -623,7 +616,7 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin)
|
||||
}
|
||||
if (offset != filp->f_pos) {
|
||||
filp->f_pos = offset;
|
||||
((struct nfs_open_context *)filp->private_data)->dir_cookie = 0;
|
||||
nfs_file_open_context(filp)->dir_cookie = 0;
|
||||
}
|
||||
out:
|
||||
mutex_unlock(&filp->f_path.dentry->d_inode->i_mutex);
|
||||
@ -650,36 +643,18 @@ static int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync)
|
||||
*/
|
||||
static int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
|
||||
{
|
||||
unsigned long verf;
|
||||
|
||||
if (IS_ROOT(dentry))
|
||||
return 1;
|
||||
verf = dentry->d_time;
|
||||
if (nfs_caches_unstable(dir)
|
||||
|| verf != NFS_I(dir)->cache_change_attribute)
|
||||
if (!nfs_verify_change_attribute(dir, dentry->d_time))
|
||||
return 0;
|
||||
/* Revalidate nfsi->cache_change_attribute before we declare a match */
|
||||
if (nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0)
|
||||
return 0;
|
||||
if (!nfs_verify_change_attribute(dir, dentry->d_time))
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf)
|
||||
{
|
||||
dentry->d_time = verf;
|
||||
}
|
||||
|
||||
static void nfs_refresh_verifier(struct dentry * dentry, unsigned long verf)
|
||||
{
|
||||
nfs_set_verifier(dentry, verf);
|
||||
}
|
||||
|
||||
/*
|
||||
* Whenever an NFS operation succeeds, we know that the dentry
|
||||
* is valid, so we update the revalidation timestamp.
|
||||
*/
|
||||
static inline void nfs_renew_times(struct dentry * dentry)
|
||||
{
|
||||
dentry->d_time = jiffies;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the intent data that applies to this particular path component
|
||||
*
|
||||
@ -694,6 +669,19 @@ static inline unsigned int nfs_lookup_check_intent(struct nameidata *nd, unsigne
|
||||
return nd->flags & mask;
|
||||
}
|
||||
|
||||
/*
|
||||
* Use intent information to check whether or not we're going to do
|
||||
* an O_EXCL create using this path component.
|
||||
*/
|
||||
static int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd)
|
||||
{
|
||||
if (NFS_PROTO(dir)->version == 2)
|
||||
return 0;
|
||||
if (nd == NULL || nfs_lookup_check_intent(nd, LOOKUP_CREATE) == 0)
|
||||
return 0;
|
||||
return (nd->intent.open.flags & O_EXCL) != 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Inode and filehandle revalidation for lookups.
|
||||
*
|
||||
@ -717,6 +705,7 @@ int nfs_lookup_verify_inode(struct inode *inode, struct nameidata *nd)
|
||||
(S_ISREG(inode->i_mode) ||
|
||||
S_ISDIR(inode->i_mode)))
|
||||
goto out_force;
|
||||
return 0;
|
||||
}
|
||||
return nfs_revalidate_inode(server, inode);
|
||||
out_force:
|
||||
@ -759,7 +748,6 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
|
||||
int error;
|
||||
struct nfs_fh fhandle;
|
||||
struct nfs_fattr fattr;
|
||||
unsigned long verifier;
|
||||
|
||||
parent = dget_parent(dentry);
|
||||
lock_kernel();
|
||||
@ -767,10 +755,6 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
|
||||
nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
|
||||
inode = dentry->d_inode;
|
||||
|
||||
/* Revalidate parent directory attribute cache */
|
||||
if (nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0)
|
||||
goto out_zap_parent;
|
||||
|
||||
if (!inode) {
|
||||
if (nfs_neg_need_reval(dir, dentry, nd))
|
||||
goto out_bad;
|
||||
@ -785,7 +769,7 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
|
||||
}
|
||||
|
||||
/* Force a full look up iff the parent directory has changed */
|
||||
if (nfs_check_verifier(dir, dentry)) {
|
||||
if (!nfs_is_exclusive_create(dir, nd) && nfs_check_verifier(dir, dentry)) {
|
||||
if (nfs_lookup_verify_inode(inode, nd))
|
||||
goto out_zap_parent;
|
||||
goto out_valid;
|
||||
@ -794,7 +778,6 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
|
||||
if (NFS_STALE(inode))
|
||||
goto out_bad;
|
||||
|
||||
verifier = nfs_save_change_attribute(dir);
|
||||
error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr);
|
||||
if (error)
|
||||
goto out_bad;
|
||||
@ -803,8 +786,7 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
|
||||
if ((error = nfs_refresh_inode(inode, &fattr)) != 0)
|
||||
goto out_bad;
|
||||
|
||||
nfs_renew_times(dentry);
|
||||
nfs_refresh_verifier(dentry, verifier);
|
||||
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
|
||||
out_valid:
|
||||
unlock_kernel();
|
||||
dput(parent);
|
||||
@ -815,7 +797,7 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
|
||||
out_zap_parent:
|
||||
nfs_zap_caches(dir);
|
||||
out_bad:
|
||||
NFS_CACHEINV(dir);
|
||||
nfs_mark_for_revalidate(dir);
|
||||
if (inode && S_ISDIR(inode->i_mode)) {
|
||||
/* Purge readdir caches. */
|
||||
nfs_zap_caches(inode);
|
||||
@ -872,8 +854,6 @@ static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode)
|
||||
nfs_complete_unlink(dentry, inode);
|
||||
unlock_kernel();
|
||||
}
|
||||
/* When creating a negative dentry, we want to renew d_time */
|
||||
nfs_renew_times(dentry);
|
||||
iput(inode);
|
||||
}
|
||||
|
||||
@ -883,30 +863,6 @@ struct dentry_operations nfs_dentry_operations = {
|
||||
.d_iput = nfs_dentry_iput,
|
||||
};
|
||||
|
||||
/*
|
||||
* Use intent information to check whether or not we're going to do
|
||||
* an O_EXCL create using this path component.
|
||||
*/
|
||||
static inline
|
||||
int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd)
|
||||
{
|
||||
if (NFS_PROTO(dir)->version == 2)
|
||||
return 0;
|
||||
if (nd == NULL || nfs_lookup_check_intent(nd, LOOKUP_CREATE) == 0)
|
||||
return 0;
|
||||
return (nd->intent.open.flags & O_EXCL) != 0;
|
||||
}
|
||||
|
||||
static inline int nfs_reval_fsid(struct inode *dir, const struct nfs_fattr *fattr)
|
||||
{
|
||||
struct nfs_server *server = NFS_SERVER(dir);
|
||||
|
||||
if (!nfs_fsid_equal(&server->fsid, &fattr->fsid))
|
||||
/* Revalidate fsid using the parent directory */
|
||||
return __nfs_revalidate_inode(server, dir);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
|
||||
{
|
||||
struct dentry *res;
|
||||
@ -945,11 +901,6 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
|
||||
res = ERR_PTR(error);
|
||||
goto out_unlock;
|
||||
}
|
||||
error = nfs_reval_fsid(dir, &fattr);
|
||||
if (error < 0) {
|
||||
res = ERR_PTR(error);
|
||||
goto out_unlock;
|
||||
}
|
||||
inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr);
|
||||
res = (struct dentry *)inode;
|
||||
if (IS_ERR(res))
|
||||
@ -958,17 +909,10 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
|
||||
no_entry:
|
||||
res = d_materialise_unique(dentry, inode);
|
||||
if (res != NULL) {
|
||||
struct dentry *parent;
|
||||
if (IS_ERR(res))
|
||||
goto out_unlock;
|
||||
/* Was a directory renamed! */
|
||||
parent = dget_parent(res);
|
||||
if (!IS_ROOT(parent))
|
||||
nfs_mark_for_revalidate(parent->d_inode);
|
||||
dput(parent);
|
||||
dentry = res;
|
||||
}
|
||||
nfs_renew_times(dentry);
|
||||
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
|
||||
out_unlock:
|
||||
unlock_kernel();
|
||||
@ -1020,28 +964,16 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
|
||||
}
|
||||
dentry->d_op = NFS_PROTO(dir)->dentry_ops;
|
||||
|
||||
/* Let vfs_create() deal with O_EXCL */
|
||||
/* Let vfs_create() deal with O_EXCL. Instantiate, but don't hash
|
||||
* the dentry. */
|
||||
if (nd->intent.open.flags & O_EXCL) {
|
||||
d_add(dentry, NULL);
|
||||
d_instantiate(dentry, NULL);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Open the file on the server */
|
||||
lock_kernel();
|
||||
/* Revalidate parent directory attribute cache */
|
||||
error = nfs_revalidate_inode(NFS_SERVER(dir), dir);
|
||||
if (error < 0) {
|
||||
res = ERR_PTR(error);
|
||||
unlock_kernel();
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (nd->intent.open.flags & O_CREAT) {
|
||||
nfs_begin_data_update(dir);
|
||||
res = nfs4_atomic_open(dir, dentry, nd);
|
||||
nfs_end_data_update(dir);
|
||||
} else
|
||||
res = nfs4_atomic_open(dir, dentry, nd);
|
||||
res = nfs4_atomic_open(dir, dentry, nd);
|
||||
unlock_kernel();
|
||||
if (IS_ERR(res)) {
|
||||
error = PTR_ERR(res);
|
||||
@ -1063,8 +995,6 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
|
||||
}
|
||||
} else if (res != NULL)
|
||||
dentry = res;
|
||||
nfs_renew_times(dentry);
|
||||
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
|
||||
out:
|
||||
return res;
|
||||
no_open:
|
||||
@ -1076,7 +1006,6 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
|
||||
struct dentry *parent = NULL;
|
||||
struct inode *inode = dentry->d_inode;
|
||||
struct inode *dir;
|
||||
unsigned long verifier;
|
||||
int openflags, ret = 0;
|
||||
|
||||
parent = dget_parent(dentry);
|
||||
@ -1086,8 +1015,12 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
|
||||
/* We can't create new files in nfs_open_revalidate(), so we
|
||||
* optimize away revalidation of negative dentries.
|
||||
*/
|
||||
if (inode == NULL)
|
||||
if (inode == NULL) {
|
||||
if (!nfs_neg_need_reval(dir, dentry, nd))
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* NFS only supports OPEN on regular files */
|
||||
if (!S_ISREG(inode->i_mode))
|
||||
goto no_open;
|
||||
@ -1104,10 +1037,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
|
||||
* change attribute *before* we do the RPC call.
|
||||
*/
|
||||
lock_kernel();
|
||||
verifier = nfs_save_change_attribute(dir);
|
||||
ret = nfs4_open_revalidate(dir, dentry, openflags, nd);
|
||||
if (!ret)
|
||||
nfs_refresh_verifier(dentry, verifier);
|
||||
unlock_kernel();
|
||||
out:
|
||||
dput(parent);
|
||||
@ -1133,6 +1063,7 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc)
|
||||
.len = entry->len,
|
||||
};
|
||||
struct inode *inode;
|
||||
unsigned long verf = nfs_save_change_attribute(dir);
|
||||
|
||||
switch (name.len) {
|
||||
case 2:
|
||||
@ -1143,6 +1074,14 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc)
|
||||
if (name.name[0] == '.')
|
||||
return dget(parent);
|
||||
}
|
||||
|
||||
spin_lock(&dir->i_lock);
|
||||
if (NFS_I(dir)->cache_validity & NFS_INO_INVALID_DATA) {
|
||||
spin_unlock(&dir->i_lock);
|
||||
return NULL;
|
||||
}
|
||||
spin_unlock(&dir->i_lock);
|
||||
|
||||
name.hash = full_name_hash(name.name, name.len);
|
||||
dentry = d_lookup(parent, &name);
|
||||
if (dentry != NULL) {
|
||||
@ -1183,12 +1122,8 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc)
|
||||
dentry = alias;
|
||||
}
|
||||
|
||||
nfs_renew_times(dentry);
|
||||
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
|
||||
return dentry;
|
||||
out_renew:
|
||||
nfs_renew_times(dentry);
|
||||
nfs_refresh_verifier(dentry, nfs_save_change_attribute(dir));
|
||||
nfs_set_verifier(dentry, verf);
|
||||
return dentry;
|
||||
}
|
||||
|
||||
@ -1198,32 +1133,40 @@ out_renew:
|
||||
int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
|
||||
struct nfs_fattr *fattr)
|
||||
{
|
||||
struct dentry *parent = dget_parent(dentry);
|
||||
struct inode *dir = parent->d_inode;
|
||||
struct inode *inode;
|
||||
int error = -EACCES;
|
||||
|
||||
d_drop(dentry);
|
||||
|
||||
/* We may have been initialized further down */
|
||||
if (dentry->d_inode)
|
||||
return 0;
|
||||
goto out;
|
||||
if (fhandle->size == 0) {
|
||||
struct inode *dir = dentry->d_parent->d_inode;
|
||||
error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
|
||||
if (error)
|
||||
return error;
|
||||
goto out_error;
|
||||
}
|
||||
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
|
||||
if (!(fattr->valid & NFS_ATTR_FATTR)) {
|
||||
struct nfs_server *server = NFS_SB(dentry->d_sb);
|
||||
error = server->nfs_client->rpc_ops->getattr(server, fhandle, fattr);
|
||||
if (error < 0)
|
||||
return error;
|
||||
goto out_error;
|
||||
}
|
||||
inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
|
||||
error = PTR_ERR(inode);
|
||||
if (IS_ERR(inode))
|
||||
return error;
|
||||
d_instantiate(dentry, inode);
|
||||
if (d_unhashed(dentry))
|
||||
d_rehash(dentry);
|
||||
goto out_error;
|
||||
d_add(dentry, inode);
|
||||
out:
|
||||
dput(parent);
|
||||
return 0;
|
||||
out_error:
|
||||
nfs_mark_for_revalidate(dir);
|
||||
dput(parent);
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1249,13 +1192,9 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
|
||||
open_flags = nd->intent.open.flags;
|
||||
|
||||
lock_kernel();
|
||||
nfs_begin_data_update(dir);
|
||||
error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, nd);
|
||||
nfs_end_data_update(dir);
|
||||
if (error != 0)
|
||||
goto out_err;
|
||||
nfs_renew_times(dentry);
|
||||
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
|
||||
unlock_kernel();
|
||||
return 0;
|
||||
out_err:
|
||||
@ -1283,13 +1222,9 @@ nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
|
||||
attr.ia_valid = ATTR_MODE;
|
||||
|
||||
lock_kernel();
|
||||
nfs_begin_data_update(dir);
|
||||
status = NFS_PROTO(dir)->mknod(dir, dentry, &attr, rdev);
|
||||
nfs_end_data_update(dir);
|
||||
if (status != 0)
|
||||
goto out_err;
|
||||
nfs_renew_times(dentry);
|
||||
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
|
||||
unlock_kernel();
|
||||
return 0;
|
||||
out_err:
|
||||
@ -1313,13 +1248,9 @@ static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
|
||||
attr.ia_mode = mode | S_IFDIR;
|
||||
|
||||
lock_kernel();
|
||||
nfs_begin_data_update(dir);
|
||||
error = NFS_PROTO(dir)->mkdir(dir, dentry, &attr);
|
||||
nfs_end_data_update(dir);
|
||||
if (error != 0)
|
||||
goto out_err;
|
||||
nfs_renew_times(dentry);
|
||||
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
|
||||
unlock_kernel();
|
||||
return 0;
|
||||
out_err:
|
||||
@ -1336,12 +1267,10 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry)
|
||||
dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
|
||||
|
||||
lock_kernel();
|
||||
nfs_begin_data_update(dir);
|
||||
error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
|
||||
/* Ensure the VFS deletes this inode */
|
||||
if (error == 0 && dentry->d_inode != NULL)
|
||||
clear_nlink(dentry->d_inode);
|
||||
nfs_end_data_update(dir);
|
||||
unlock_kernel();
|
||||
|
||||
return error;
|
||||
@ -1350,9 +1279,9 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry)
|
||||
static int nfs_sillyrename(struct inode *dir, struct dentry *dentry)
|
||||
{
|
||||
static unsigned int sillycounter;
|
||||
const int i_inosize = sizeof(dir->i_ino)*2;
|
||||
const int fileidsize = sizeof(NFS_FILEID(dentry->d_inode))*2;
|
||||
const int countersize = sizeof(sillycounter)*2;
|
||||
const int slen = sizeof(".nfs") + i_inosize + countersize - 1;
|
||||
const int slen = sizeof(".nfs")+fileidsize+countersize-1;
|
||||
char silly[slen+1];
|
||||
struct qstr qsilly;
|
||||
struct dentry *sdentry;
|
||||
@ -1370,8 +1299,9 @@ static int nfs_sillyrename(struct inode *dir, struct dentry *dentry)
|
||||
if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
|
||||
goto out;
|
||||
|
||||
sprintf(silly, ".nfs%*.*lx",
|
||||
i_inosize, i_inosize, dentry->d_inode->i_ino);
|
||||
sprintf(silly, ".nfs%*.*Lx",
|
||||
fileidsize, fileidsize,
|
||||
(unsigned long long)NFS_FILEID(dentry->d_inode));
|
||||
|
||||
/* Return delegation in anticipation of the rename */
|
||||
nfs_inode_return_delegation(dentry->d_inode);
|
||||
@ -1398,19 +1328,14 @@ static int nfs_sillyrename(struct inode *dir, struct dentry *dentry)
|
||||
|
||||
qsilly.name = silly;
|
||||
qsilly.len = strlen(silly);
|
||||
nfs_begin_data_update(dir);
|
||||
if (dentry->d_inode) {
|
||||
nfs_begin_data_update(dentry->d_inode);
|
||||
error = NFS_PROTO(dir)->rename(dir, &dentry->d_name,
|
||||
dir, &qsilly);
|
||||
nfs_mark_for_revalidate(dentry->d_inode);
|
||||
nfs_end_data_update(dentry->d_inode);
|
||||
} else
|
||||
error = NFS_PROTO(dir)->rename(dir, &dentry->d_name,
|
||||
dir, &qsilly);
|
||||
nfs_end_data_update(dir);
|
||||
if (!error) {
|
||||
nfs_renew_times(dentry);
|
||||
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
|
||||
d_move(dentry, sdentry);
|
||||
error = nfs_async_unlink(dir, dentry);
|
||||
@ -1443,19 +1368,15 @@ static int nfs_safe_remove(struct dentry *dentry)
|
||||
goto out;
|
||||
}
|
||||
|
||||
nfs_begin_data_update(dir);
|
||||
if (inode != NULL) {
|
||||
nfs_inode_return_delegation(inode);
|
||||
nfs_begin_data_update(inode);
|
||||
error = NFS_PROTO(dir)->remove(dir, &dentry->d_name);
|
||||
/* The VFS may want to delete this inode */
|
||||
if (error == 0)
|
||||
drop_nlink(inode);
|
||||
nfs_mark_for_revalidate(inode);
|
||||
nfs_end_data_update(inode);
|
||||
} else
|
||||
error = NFS_PROTO(dir)->remove(dir, &dentry->d_name);
|
||||
nfs_end_data_update(dir);
|
||||
out:
|
||||
return error;
|
||||
}
|
||||
@ -1493,7 +1414,6 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry)
|
||||
spin_unlock(&dcache_lock);
|
||||
error = nfs_safe_remove(dentry);
|
||||
if (!error) {
|
||||
nfs_renew_times(dentry);
|
||||
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
|
||||
} else if (need_rehash)
|
||||
d_rehash(dentry);
|
||||
@ -1548,9 +1468,7 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym
|
||||
memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen);
|
||||
kunmap_atomic(kaddr, KM_USER0);
|
||||
|
||||
nfs_begin_data_update(dir);
|
||||
error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr);
|
||||
nfs_end_data_update(dir);
|
||||
if (error != 0) {
|
||||
dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s) error %d\n",
|
||||
dir->i_sb->s_id, dir->i_ino,
|
||||
@ -1590,15 +1508,12 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
|
||||
dentry->d_parent->d_name.name, dentry->d_name.name);
|
||||
|
||||
lock_kernel();
|
||||
nfs_begin_data_update(dir);
|
||||
nfs_begin_data_update(inode);
|
||||
d_drop(dentry);
|
||||
error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name);
|
||||
if (error == 0) {
|
||||
atomic_inc(&inode->i_count);
|
||||
d_instantiate(dentry, inode);
|
||||
d_add(dentry, inode);
|
||||
}
|
||||
nfs_end_data_update(inode);
|
||||
nfs_end_data_update(dir);
|
||||
unlock_kernel();
|
||||
return error;
|
||||
}
|
||||
@ -1701,22 +1616,16 @@ go_ahead:
|
||||
d_delete(new_dentry);
|
||||
}
|
||||
|
||||
nfs_begin_data_update(old_dir);
|
||||
nfs_begin_data_update(new_dir);
|
||||
nfs_begin_data_update(old_inode);
|
||||
error = NFS_PROTO(old_dir)->rename(old_dir, &old_dentry->d_name,
|
||||
new_dir, &new_dentry->d_name);
|
||||
nfs_mark_for_revalidate(old_inode);
|
||||
nfs_end_data_update(old_inode);
|
||||
nfs_end_data_update(new_dir);
|
||||
nfs_end_data_update(old_dir);
|
||||
out:
|
||||
if (rehash)
|
||||
d_rehash(rehash);
|
||||
if (!error) {
|
||||
d_move(old_dentry, new_dentry);
|
||||
nfs_renew_times(new_dentry);
|
||||
nfs_refresh_verifier(new_dentry, nfs_save_change_attribute(new_dir));
|
||||
nfs_set_verifier(new_dentry,
|
||||
nfs_save_change_attribute(new_dir));
|
||||
}
|
||||
|
||||
/* new dentry created? */
|
||||
@ -1842,7 +1751,7 @@ static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, st
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res)
|
||||
static int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res)
|
||||
{
|
||||
struct nfs_inode *nfsi = NFS_I(inode);
|
||||
struct nfs_access_entry *cache;
|
||||
@ -1854,7 +1763,7 @@ int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs
|
||||
cache = nfs_access_search_rbtree(inode, cred);
|
||||
if (cache == NULL)
|
||||
goto out;
|
||||
if (time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode)))
|
||||
if (!time_in_range(jiffies, cache->jiffies, cache->jiffies + nfsi->attrtimeo))
|
||||
goto out_stale;
|
||||
res->jiffies = cache->jiffies;
|
||||
res->cred = cache->cred;
|
||||
@ -1909,7 +1818,7 @@ found:
|
||||
nfs_access_free_entry(entry);
|
||||
}
|
||||
|
||||
void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
|
||||
static void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
|
||||
{
|
||||
struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL);
|
||||
if (cache == NULL)
|
||||
@ -1957,6 +1866,24 @@ out:
|
||||
return -EACCES;
|
||||
}
|
||||
|
||||
static int nfs_open_permission_mask(int openflags)
|
||||
{
|
||||
int mask = 0;
|
||||
|
||||
if (openflags & FMODE_READ)
|
||||
mask |= MAY_READ;
|
||||
if (openflags & FMODE_WRITE)
|
||||
mask |= MAY_WRITE;
|
||||
if (openflags & FMODE_EXEC)
|
||||
mask |= MAY_EXEC;
|
||||
return mask;
|
||||
}
|
||||
|
||||
int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags)
|
||||
{
|
||||
return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags));
|
||||
}
|
||||
|
||||
int nfs_permission(struct inode *inode, int mask, struct nameidata *nd)
|
||||
{
|
||||
struct rpc_cred *cred;
|
||||
|
@ -368,7 +368,7 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size
|
||||
return -ENOMEM;
|
||||
|
||||
dreq->inode = inode;
|
||||
dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data);
|
||||
dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
|
||||
if (!is_sync_kiocb(iocb))
|
||||
dreq->iocb = iocb;
|
||||
|
||||
@ -510,7 +510,6 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
|
||||
nfs_direct_write_reschedule(dreq);
|
||||
break;
|
||||
default:
|
||||
nfs_end_data_update(inode);
|
||||
if (dreq->commit_data != NULL)
|
||||
nfs_commit_free(dreq->commit_data);
|
||||
nfs_direct_free_writedata(dreq);
|
||||
@ -533,7 +532,6 @@ static inline void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
|
||||
|
||||
static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
|
||||
{
|
||||
nfs_end_data_update(inode);
|
||||
nfs_direct_free_writedata(dreq);
|
||||
nfs_zap_mapping(inode, inode->i_mapping);
|
||||
nfs_direct_complete(dreq);
|
||||
@ -718,14 +716,12 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz
|
||||
sync = FLUSH_STABLE;
|
||||
|
||||
dreq->inode = inode;
|
||||
dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data);
|
||||
dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
|
||||
if (!is_sync_kiocb(iocb))
|
||||
dreq->iocb = iocb;
|
||||
|
||||
nfs_add_stats(inode, NFSIOS_DIRECTWRITTENBYTES, count);
|
||||
|
||||
nfs_begin_data_update(inode);
|
||||
|
||||
rpc_clnt_sigmask(clnt, &oldset);
|
||||
result = nfs_direct_write_schedule(dreq, user_addr, count, pos, sync);
|
||||
if (!result)
|
||||
|
105
fs/nfs/file.c
105
fs/nfs/file.c
@ -33,6 +33,7 @@
|
||||
#include <asm/system.h>
|
||||
|
||||
#include "delegation.h"
|
||||
#include "internal.h"
|
||||
#include "iostat.h"
|
||||
|
||||
#define NFSDBG_FACILITY NFSDBG_FILE
|
||||
@ -55,6 +56,8 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl);
|
||||
static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl);
|
||||
static int nfs_setlease(struct file *file, long arg, struct file_lock **fl);
|
||||
|
||||
static struct vm_operations_struct nfs_file_vm_ops;
|
||||
|
||||
const struct file_operations nfs_file_operations = {
|
||||
.llseek = nfs_file_llseek,
|
||||
.read = do_sync_read,
|
||||
@ -173,6 +176,31 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)
|
||||
return remote_llseek(filp, offset, origin);
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper for nfs_file_flush() and nfs_fsync()
|
||||
*
|
||||
* Notice that it clears the NFS_CONTEXT_ERROR_WRITE before synching to
|
||||
* disk, but it retrieves and clears ctx->error after synching, despite
|
||||
* the two being set at the same time in nfs_context_set_write_error().
|
||||
* This is because the former is used to notify the _next_ call to
|
||||
* nfs_file_write() that a write error occured, and hence cause it to
|
||||
* fall back to doing a synchronous write.
|
||||
*/
|
||||
static int nfs_do_fsync(struct nfs_open_context *ctx, struct inode *inode)
|
||||
{
|
||||
int have_error, status;
|
||||
int ret = 0;
|
||||
|
||||
have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
|
||||
status = nfs_wb_all(inode);
|
||||
have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
|
||||
if (have_error)
|
||||
ret = xchg(&ctx->error, 0);
|
||||
if (!ret)
|
||||
ret = status;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Flush all dirty pages, and check for write errors.
|
||||
*
|
||||
@ -180,7 +208,7 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)
|
||||
static int
|
||||
nfs_file_flush(struct file *file, fl_owner_t id)
|
||||
{
|
||||
struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
|
||||
struct nfs_open_context *ctx = nfs_file_open_context(file);
|
||||
struct inode *inode = file->f_path.dentry->d_inode;
|
||||
int status;
|
||||
|
||||
@ -189,16 +217,11 @@ nfs_file_flush(struct file *file, fl_owner_t id)
|
||||
if ((file->f_mode & FMODE_WRITE) == 0)
|
||||
return 0;
|
||||
nfs_inc_stats(inode, NFSIOS_VFSFLUSH);
|
||||
lock_kernel();
|
||||
|
||||
/* Ensure that data+attribute caches are up to date after close() */
|
||||
status = nfs_wb_all(inode);
|
||||
if (!status) {
|
||||
status = ctx->error;
|
||||
ctx->error = 0;
|
||||
if (!status)
|
||||
nfs_revalidate_inode(NFS_SERVER(inode), inode);
|
||||
}
|
||||
unlock_kernel();
|
||||
status = nfs_do_fsync(ctx, inode);
|
||||
if (!status)
|
||||
nfs_revalidate_inode(NFS_SERVER(inode), inode);
|
||||
return status;
|
||||
}
|
||||
|
||||
@ -257,8 +280,11 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
|
||||
dentry->d_parent->d_name.name, dentry->d_name.name);
|
||||
|
||||
status = nfs_revalidate_mapping(inode, file->f_mapping);
|
||||
if (!status)
|
||||
status = generic_file_mmap(file, vma);
|
||||
if (!status) {
|
||||
vma->vm_ops = &nfs_file_vm_ops;
|
||||
vma->vm_flags |= VM_CAN_NONLINEAR;
|
||||
file_accessed(file);
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
@ -270,21 +296,13 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
|
||||
static int
|
||||
nfs_fsync(struct file *file, struct dentry *dentry, int datasync)
|
||||
{
|
||||
struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
|
||||
struct nfs_open_context *ctx = nfs_file_open_context(file);
|
||||
struct inode *inode = dentry->d_inode;
|
||||
int status;
|
||||
|
||||
dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);
|
||||
|
||||
nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
|
||||
lock_kernel();
|
||||
status = nfs_wb_all(inode);
|
||||
if (!status) {
|
||||
status = ctx->error;
|
||||
ctx->error = 0;
|
||||
}
|
||||
unlock_kernel();
|
||||
return status;
|
||||
return nfs_do_fsync(ctx, inode);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -333,7 +351,7 @@ static int nfs_launder_page(struct page *page)
|
||||
const struct address_space_operations nfs_file_aops = {
|
||||
.readpage = nfs_readpage,
|
||||
.readpages = nfs_readpages,
|
||||
.set_page_dirty = nfs_set_page_dirty,
|
||||
.set_page_dirty = __set_page_dirty_nobuffers,
|
||||
.writepage = nfs_writepage,
|
||||
.writepages = nfs_writepages,
|
||||
.prepare_write = nfs_prepare_write,
|
||||
@ -346,6 +364,43 @@ const struct address_space_operations nfs_file_aops = {
|
||||
.launder_page = nfs_launder_page,
|
||||
};
|
||||
|
||||
static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
|
||||
{
|
||||
struct file *filp = vma->vm_file;
|
||||
unsigned pagelen;
|
||||
int ret = -EINVAL;
|
||||
|
||||
lock_page(page);
|
||||
if (page->mapping != vma->vm_file->f_path.dentry->d_inode->i_mapping)
|
||||
goto out_unlock;
|
||||
pagelen = nfs_page_length(page);
|
||||
if (pagelen == 0)
|
||||
goto out_unlock;
|
||||
ret = nfs_prepare_write(filp, page, 0, pagelen);
|
||||
if (!ret)
|
||||
ret = nfs_commit_write(filp, page, 0, pagelen);
|
||||
out_unlock:
|
||||
unlock_page(page);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct vm_operations_struct nfs_file_vm_ops = {
|
||||
.fault = filemap_fault,
|
||||
.page_mkwrite = nfs_vm_page_mkwrite,
|
||||
};
|
||||
|
||||
static int nfs_need_sync_write(struct file *filp, struct inode *inode)
|
||||
{
|
||||
struct nfs_open_context *ctx;
|
||||
|
||||
if (IS_SYNC(inode) || (filp->f_flags & O_SYNC))
|
||||
return 1;
|
||||
ctx = nfs_file_open_context(filp);
|
||||
if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags))
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
|
||||
unsigned long nr_segs, loff_t pos)
|
||||
{
|
||||
@ -382,8 +437,8 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
|
||||
nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count);
|
||||
result = generic_file_aio_write(iocb, iov, nr_segs, pos);
|
||||
/* Return error values for O_SYNC and IS_SYNC() */
|
||||
if (result >= 0 && (IS_SYNC(inode) || (iocb->ki_filp->f_flags & O_SYNC))) {
|
||||
int err = nfs_fsync(iocb->ki_filp, dentry, 1);
|
||||
if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) {
|
||||
int err = nfs_do_fsync(nfs_file_open_context(iocb->ki_filp), inode);
|
||||
if (err < 0)
|
||||
result = err;
|
||||
}
|
||||
|
273
fs/nfs/inode.c
273
fs/nfs/inode.c
@ -49,6 +49,11 @@
|
||||
|
||||
#define NFSDBG_FACILITY NFSDBG_VFS
|
||||
|
||||
#define NFS_64_BIT_INODE_NUMBERS_ENABLED 1
|
||||
|
||||
/* Default is to see 64-bit inode numbers */
|
||||
static int enable_ino64 = NFS_64_BIT_INODE_NUMBERS_ENABLED;
|
||||
|
||||
static void nfs_invalidate_inode(struct inode *);
|
||||
static int nfs_update_inode(struct inode *, struct nfs_fattr *);
|
||||
|
||||
@ -62,6 +67,25 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
|
||||
return nfs_fileid_to_ino_t(fattr->fileid);
|
||||
}
|
||||
|
||||
/**
|
||||
* nfs_compat_user_ino64 - returns the user-visible inode number
|
||||
* @fileid: 64-bit fileid
|
||||
*
|
||||
* This function returns a 32-bit inode number if the boot parameter
|
||||
* nfs.enable_ino64 is zero.
|
||||
*/
|
||||
u64 nfs_compat_user_ino64(u64 fileid)
|
||||
{
|
||||
int ino;
|
||||
|
||||
if (enable_ino64)
|
||||
return fileid;
|
||||
ino = fileid;
|
||||
if (sizeof(ino) < sizeof(fileid))
|
||||
ino ^= fileid >> (sizeof(fileid)-sizeof(ino)) * 8;
|
||||
return ino;
|
||||
}
|
||||
|
||||
int nfs_write_inode(struct inode *inode, int sync)
|
||||
{
|
||||
int ret;
|
||||
@ -85,7 +109,6 @@ void nfs_clear_inode(struct inode *inode)
|
||||
*/
|
||||
BUG_ON(nfs_have_writebacks(inode));
|
||||
BUG_ON(!list_empty(&NFS_I(inode)->open_files));
|
||||
BUG_ON(atomic_read(&NFS_I(inode)->data_updates) != 0);
|
||||
nfs_zap_acl_cache(inode);
|
||||
nfs_access_zap_cache(inode);
|
||||
}
|
||||
@ -118,8 +141,8 @@ static void nfs_zap_caches_locked(struct inode *inode)
|
||||
|
||||
nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
|
||||
|
||||
NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode);
|
||||
NFS_ATTRTIMEO_UPDATE(inode) = jiffies;
|
||||
nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
|
||||
nfsi->attrtimeo_timestamp = jiffies;
|
||||
|
||||
memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode)));
|
||||
if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))
|
||||
@ -156,6 +179,13 @@ static void nfs_zap_acl_cache(struct inode *inode)
|
||||
spin_unlock(&inode->i_lock);
|
||||
}
|
||||
|
||||
void nfs_invalidate_atime(struct inode *inode)
|
||||
{
|
||||
spin_lock(&inode->i_lock);
|
||||
NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME;
|
||||
spin_unlock(&inode->i_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Invalidate, but do not unhash, the inode.
|
||||
* NB: must be called with inode->i_lock held!
|
||||
@ -338,7 +368,6 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
|
||||
return 0;
|
||||
|
||||
lock_kernel();
|
||||
nfs_begin_data_update(inode);
|
||||
/* Write all dirty data */
|
||||
if (S_ISREG(inode->i_mode)) {
|
||||
filemap_write_and_wait(inode->i_mapping);
|
||||
@ -352,7 +381,6 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
|
||||
error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr);
|
||||
if (error == 0)
|
||||
nfs_refresh_inode(inode, &fattr);
|
||||
nfs_end_data_update(inode);
|
||||
unlock_kernel();
|
||||
return error;
|
||||
}
|
||||
@ -431,7 +459,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
|
||||
|
||||
/* Flush out writes to the server in order to update c/mtime */
|
||||
if (S_ISREG(inode->i_mode))
|
||||
nfs_sync_mapping_range(inode->i_mapping, 0, 0, FLUSH_NOCOMMIT);
|
||||
nfs_wb_nocommit(inode);
|
||||
|
||||
/*
|
||||
* We may force a getattr if the user cares about atime.
|
||||
@ -450,8 +478,10 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
|
||||
err = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
|
||||
else
|
||||
err = nfs_revalidate_inode(NFS_SERVER(inode), inode);
|
||||
if (!err)
|
||||
if (!err) {
|
||||
generic_fillattr(inode, stat);
|
||||
stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode));
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -536,7 +566,7 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_c
|
||||
static void nfs_file_clear_open_context(struct file *filp)
|
||||
{
|
||||
struct inode *inode = filp->f_path.dentry->d_inode;
|
||||
struct nfs_open_context *ctx = (struct nfs_open_context *)filp->private_data;
|
||||
struct nfs_open_context *ctx = nfs_file_open_context(filp);
|
||||
|
||||
if (ctx) {
|
||||
filp->private_data = NULL;
|
||||
@ -598,16 +628,10 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
|
||||
status = nfs_wait_on_inode(inode);
|
||||
if (status < 0)
|
||||
goto out;
|
||||
if (NFS_STALE(inode)) {
|
||||
status = -ESTALE;
|
||||
/* Do we trust the cached ESTALE? */
|
||||
if (NFS_ATTRTIMEO(inode) != 0) {
|
||||
if (nfsi->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME)) {
|
||||
/* no */
|
||||
} else
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
status = -ESTALE;
|
||||
if (NFS_STALE(inode))
|
||||
goto out;
|
||||
|
||||
status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr);
|
||||
if (status != 0) {
|
||||
@ -654,7 +678,7 @@ int nfs_attribute_timeout(struct inode *inode)
|
||||
|
||||
if (nfs_have_delegation(inode, FMODE_READ))
|
||||
return 0;
|
||||
return time_after(jiffies, nfsi->read_cache_jiffies+nfsi->attrtimeo);
|
||||
return !time_in_range(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -683,11 +707,8 @@ static int nfs_invalidate_mapping_nolock(struct inode *inode, struct address_spa
|
||||
}
|
||||
spin_lock(&inode->i_lock);
|
||||
nfsi->cache_validity &= ~NFS_INO_INVALID_DATA;
|
||||
if (S_ISDIR(inode->i_mode)) {
|
||||
if (S_ISDIR(inode->i_mode))
|
||||
memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
|
||||
/* This ensures we revalidate child dentries */
|
||||
nfsi->cache_change_attribute = jiffies;
|
||||
}
|
||||
spin_unlock(&inode->i_lock);
|
||||
nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE);
|
||||
dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n",
|
||||
@ -756,56 +777,27 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* nfs_begin_data_update
|
||||
* @inode - pointer to inode
|
||||
* Declare that a set of operations will update file data on the server
|
||||
*/
|
||||
void nfs_begin_data_update(struct inode *inode)
|
||||
{
|
||||
atomic_inc(&NFS_I(inode)->data_updates);
|
||||
}
|
||||
|
||||
/**
|
||||
* nfs_end_data_update
|
||||
* @inode - pointer to inode
|
||||
* Declare end of the operations that will update file data
|
||||
* This will mark the inode as immediately needing revalidation
|
||||
* of its attribute cache.
|
||||
*/
|
||||
void nfs_end_data_update(struct inode *inode)
|
||||
{
|
||||
struct nfs_inode *nfsi = NFS_I(inode);
|
||||
|
||||
/* Directories: invalidate page cache */
|
||||
if (S_ISDIR(inode->i_mode)) {
|
||||
spin_lock(&inode->i_lock);
|
||||
nfsi->cache_validity |= NFS_INO_INVALID_DATA;
|
||||
spin_unlock(&inode->i_lock);
|
||||
}
|
||||
nfsi->cache_change_attribute = jiffies;
|
||||
atomic_dec(&nfsi->data_updates);
|
||||
}
|
||||
|
||||
static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
|
||||
{
|
||||
struct nfs_inode *nfsi = NFS_I(inode);
|
||||
unsigned long now = jiffies;
|
||||
|
||||
if ((fattr->valid & NFS_ATTR_WCC_V4) != 0 &&
|
||||
nfsi->change_attr == fattr->pre_change_attr) {
|
||||
nfsi->change_attr = fattr->change_attr;
|
||||
if (S_ISDIR(inode->i_mode))
|
||||
nfsi->cache_validity |= NFS_INO_INVALID_DATA;
|
||||
}
|
||||
/* If we have atomic WCC data, we may update some attributes */
|
||||
if ((fattr->valid & NFS_ATTR_WCC) != 0) {
|
||||
if (timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) {
|
||||
if (timespec_equal(&inode->i_ctime, &fattr->pre_ctime))
|
||||
memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
|
||||
nfsi->cache_change_attribute = now;
|
||||
}
|
||||
if (timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) {
|
||||
memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
|
||||
nfsi->cache_change_attribute = now;
|
||||
if (S_ISDIR(inode->i_mode))
|
||||
nfsi->cache_validity |= NFS_INO_INVALID_DATA;
|
||||
}
|
||||
if (inode->i_size == fattr->pre_size && nfsi->npages == 0) {
|
||||
if (inode->i_size == fattr->pre_size && nfsi->npages == 0)
|
||||
inode->i_size = fattr->size;
|
||||
nfsi->cache_change_attribute = now;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -822,7 +814,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
|
||||
{
|
||||
struct nfs_inode *nfsi = NFS_I(inode);
|
||||
loff_t cur_size, new_isize;
|
||||
int data_unstable;
|
||||
unsigned long invalid = 0;
|
||||
|
||||
|
||||
/* Has the inode gone and changed behind our back? */
|
||||
@ -831,37 +823,41 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
/* Are we in the process of updating data on the server? */
|
||||
data_unstable = nfs_caches_unstable(inode);
|
||||
|
||||
/* Do atomic weak cache consistency updates */
|
||||
nfs_wcc_update_inode(inode, fattr);
|
||||
|
||||
if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 &&
|
||||
nfsi->change_attr != fattr->change_attr)
|
||||
nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
|
||||
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
|
||||
|
||||
/* Verify a few of the more important attributes */
|
||||
if (!timespec_equal(&inode->i_mtime, &fattr->mtime))
|
||||
nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
|
||||
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
|
||||
|
||||
cur_size = i_size_read(inode);
|
||||
new_isize = nfs_size_to_loff_t(fattr->size);
|
||||
if (cur_size != new_isize && nfsi->npages == 0)
|
||||
nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
|
||||
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
|
||||
|
||||
/* Have any file permissions changed? */
|
||||
if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)
|
||||
|| inode->i_uid != fattr->uid
|
||||
|| inode->i_gid != fattr->gid)
|
||||
nfsi->cache_validity |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
|
||||
invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
|
||||
|
||||
/* Has the link count changed? */
|
||||
if (inode->i_nlink != fattr->nlink)
|
||||
nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
|
||||
invalid |= NFS_INO_INVALID_ATTR;
|
||||
|
||||
if (!timespec_equal(&inode->i_atime, &fattr->atime))
|
||||
nfsi->cache_validity |= NFS_INO_INVALID_ATIME;
|
||||
invalid |= NFS_INO_INVALID_ATIME;
|
||||
|
||||
if (invalid != 0)
|
||||
nfsi->cache_validity |= invalid;
|
||||
else
|
||||
nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR
|
||||
| NFS_INO_INVALID_ATIME
|
||||
| NFS_INO_REVAL_PAGECACHE);
|
||||
|
||||
nfsi->read_cache_jiffies = fattr->time_start;
|
||||
return 0;
|
||||
@ -911,17 +907,41 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
|
||||
int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr)
|
||||
{
|
||||
struct nfs_inode *nfsi = NFS_I(inode);
|
||||
int status = 0;
|
||||
|
||||
spin_lock(&inode->i_lock);
|
||||
if (unlikely((fattr->valid & NFS_ATTR_FATTR) == 0)) {
|
||||
nfsi->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
|
||||
goto out;
|
||||
}
|
||||
status = nfs_update_inode(inode, fattr);
|
||||
out:
|
||||
nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
|
||||
if (S_ISDIR(inode->i_mode))
|
||||
nfsi->cache_validity |= NFS_INO_INVALID_DATA;
|
||||
spin_unlock(&inode->i_lock);
|
||||
return status;
|
||||
return nfs_refresh_inode(inode, fattr);
|
||||
}
|
||||
|
||||
/**
|
||||
* nfs_post_op_update_inode_force_wcc - try to update the inode attribute cache
|
||||
* @inode - pointer to inode
|
||||
* @fattr - updated attributes
|
||||
*
|
||||
* After an operation that has changed the inode metadata, mark the
|
||||
* attribute cache as being invalid, then try to update it. Fake up
|
||||
* weak cache consistency data, if none exist.
|
||||
*
|
||||
* This function is mainly designed to be used by the ->write_done() functions.
|
||||
*/
|
||||
int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr)
|
||||
{
|
||||
if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 &&
|
||||
(fattr->valid & NFS_ATTR_WCC_V4) == 0) {
|
||||
fattr->pre_change_attr = NFS_I(inode)->change_attr;
|
||||
fattr->valid |= NFS_ATTR_WCC_V4;
|
||||
}
|
||||
if ((fattr->valid & NFS_ATTR_FATTR) != 0 &&
|
||||
(fattr->valid & NFS_ATTR_WCC) == 0) {
|
||||
memcpy(&fattr->pre_ctime, &inode->i_ctime, sizeof(fattr->pre_ctime));
|
||||
memcpy(&fattr->pre_mtime, &inode->i_mtime, sizeof(fattr->pre_mtime));
|
||||
fattr->pre_size = inode->i_size;
|
||||
fattr->valid |= NFS_ATTR_WCC;
|
||||
}
|
||||
return nfs_post_op_update_inode(inode, fattr);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -941,9 +961,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
|
||||
struct nfs_server *server;
|
||||
struct nfs_inode *nfsi = NFS_I(inode);
|
||||
loff_t cur_isize, new_isize;
|
||||
unsigned int invalid = 0;
|
||||
unsigned long invalid = 0;
|
||||
unsigned long now = jiffies;
|
||||
int data_stable;
|
||||
|
||||
dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n",
|
||||
__FUNCTION__, inode->i_sb->s_id, inode->i_ino,
|
||||
@ -968,57 +987,51 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
|
||||
* Update the read time so we don't revalidate too often.
|
||||
*/
|
||||
nfsi->read_cache_jiffies = fattr->time_start;
|
||||
nfsi->last_updated = now;
|
||||
|
||||
/* Fix a wraparound issue with nfsi->cache_change_attribute */
|
||||
if (time_before(now, nfsi->cache_change_attribute))
|
||||
nfsi->cache_change_attribute = now - 600*HZ;
|
||||
|
||||
/* Are we racing with known updates of the metadata on the server? */
|
||||
data_stable = nfs_verify_change_attribute(inode, fattr->time_start);
|
||||
if (data_stable)
|
||||
nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_ATIME);
|
||||
nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ATIME
|
||||
| NFS_INO_REVAL_PAGECACHE);
|
||||
|
||||
/* Do atomic weak cache consistency updates */
|
||||
nfs_wcc_update_inode(inode, fattr);
|
||||
|
||||
/* More cache consistency checks */
|
||||
if (!(fattr->valid & NFS_ATTR_FATTR_V4)) {
|
||||
/* NFSv2/v3: Check if the mtime agrees */
|
||||
if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) {
|
||||
dprintk("NFS: mtime change on server for file %s/%ld\n",
|
||||
inode->i_sb->s_id, inode->i_ino);
|
||||
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
|
||||
nfsi->cache_change_attribute = now;
|
||||
}
|
||||
/* If ctime has changed we should definitely clear access+acl caches */
|
||||
if (!timespec_equal(&inode->i_ctime, &fattr->ctime))
|
||||
invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
|
||||
} else if (nfsi->change_attr != fattr->change_attr) {
|
||||
dprintk("NFS: change_attr change on server for file %s/%ld\n",
|
||||
inode->i_sb->s_id, inode->i_ino);
|
||||
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
|
||||
nfsi->cache_change_attribute = now;
|
||||
}
|
||||
|
||||
/* Check if our cached file size is stale */
|
||||
new_isize = nfs_size_to_loff_t(fattr->size);
|
||||
cur_isize = i_size_read(inode);
|
||||
if (new_isize != cur_isize) {
|
||||
/* Do we perhaps have any outstanding writes? */
|
||||
if (nfsi->npages == 0) {
|
||||
/* No, but did we race with nfs_end_data_update()? */
|
||||
if (data_stable) {
|
||||
inode->i_size = new_isize;
|
||||
invalid |= NFS_INO_INVALID_DATA;
|
||||
}
|
||||
invalid |= NFS_INO_INVALID_ATTR;
|
||||
} else if (new_isize > cur_isize) {
|
||||
/* Do we perhaps have any outstanding writes, or has
|
||||
* the file grown beyond our last write? */
|
||||
if (nfsi->npages == 0 || new_isize > cur_isize) {
|
||||
inode->i_size = new_isize;
|
||||
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
|
||||
}
|
||||
nfsi->cache_change_attribute = now;
|
||||
dprintk("NFS: isize change on server for file %s/%ld\n",
|
||||
inode->i_sb->s_id, inode->i_ino);
|
||||
}
|
||||
|
||||
/* Check if the mtime agrees */
|
||||
if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) {
|
||||
memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
|
||||
dprintk("NFS: mtime change on server for file %s/%ld\n",
|
||||
inode->i_sb->s_id, inode->i_ino);
|
||||
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
|
||||
nfsi->cache_change_attribute = now;
|
||||
}
|
||||
|
||||
/* If ctime has changed we should definitely clear access+acl caches */
|
||||
if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) {
|
||||
invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
|
||||
memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
|
||||
nfsi->cache_change_attribute = now;
|
||||
}
|
||||
memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
|
||||
memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
|
||||
memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
|
||||
nfsi->change_attr = fattr->change_attr;
|
||||
|
||||
if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) ||
|
||||
inode->i_uid != fattr->uid ||
|
||||
@ -1039,31 +1052,29 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
|
||||
inode->i_blocks = fattr->du.nfs2.blocks;
|
||||
}
|
||||
|
||||
if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 &&
|
||||
nfsi->change_attr != fattr->change_attr) {
|
||||
dprintk("NFS: change_attr change on server for file %s/%ld\n",
|
||||
inode->i_sb->s_id, inode->i_ino);
|
||||
nfsi->change_attr = fattr->change_attr;
|
||||
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
|
||||
nfsi->cache_change_attribute = now;
|
||||
}
|
||||
|
||||
/* Update attrtimeo value if we're out of the unstable period */
|
||||
if (invalid & NFS_INO_INVALID_ATTR) {
|
||||
nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
|
||||
nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
|
||||
nfsi->attrtimeo_timestamp = now;
|
||||
} else if (time_after(now, nfsi->attrtimeo_timestamp+nfsi->attrtimeo)) {
|
||||
if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode))
|
||||
nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode);
|
||||
nfsi->attrtimeo_timestamp = now;
|
||||
nfsi->last_updated = now;
|
||||
} else {
|
||||
if (!time_in_range(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) {
|
||||
if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode))
|
||||
nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode);
|
||||
nfsi->attrtimeo_timestamp = now;
|
||||
}
|
||||
/*
|
||||
* Avoid jiffy wraparound issues with nfsi->last_updated
|
||||
*/
|
||||
if (!time_in_range(nfsi->last_updated, nfsi->read_cache_jiffies, now))
|
||||
nfsi->last_updated = nfsi->read_cache_jiffies;
|
||||
}
|
||||
invalid &= ~NFS_INO_INVALID_ATTR;
|
||||
/* Don't invalidate the data if we were to blame */
|
||||
if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
|
||||
|| S_ISLNK(inode->i_mode)))
|
||||
invalid &= ~NFS_INO_INVALID_DATA;
|
||||
if (data_stable)
|
||||
invalid &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME|NFS_INO_REVAL_PAGECACHE);
|
||||
if (!nfs_have_delegation(inode, FMODE_READ) ||
|
||||
(nfsi->cache_validity & NFS_INO_REVAL_FORCED))
|
||||
nfsi->cache_validity |= invalid;
|
||||
@ -1152,7 +1163,6 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
|
||||
INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
|
||||
INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
|
||||
INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC);
|
||||
atomic_set(&nfsi->data_updates, 0);
|
||||
nfsi->ncommit = 0;
|
||||
nfsi->npages = 0;
|
||||
nfs4_init_once(nfsi);
|
||||
@ -1249,6 +1259,7 @@ static void __exit exit_nfs_fs(void)
|
||||
/* Not quite true; I just maintain it */
|
||||
MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>");
|
||||
MODULE_LICENSE("GPL");
|
||||
module_param(enable_ino64, bool, 0644);
|
||||
|
||||
module_init(init_nfs_fs)
|
||||
module_exit(exit_nfs_fs)
|
||||
|
@ -5,8 +5,6 @@
|
||||
#include <linux/mount.h>
|
||||
|
||||
struct nfs_string;
|
||||
struct nfs_mount_data;
|
||||
struct nfs4_mount_data;
|
||||
|
||||
/* Maximum number of readahead requests
|
||||
* FIXME: this should really be a sysctl so that users may tune it to suit
|
||||
@ -27,20 +25,50 @@ struct nfs_clone_mount {
|
||||
rpc_authflavor_t authflavor;
|
||||
};
|
||||
|
||||
/*
|
||||
* In-kernel mount arguments
|
||||
*/
|
||||
struct nfs_parsed_mount_data {
|
||||
int flags;
|
||||
int rsize, wsize;
|
||||
int timeo, retrans;
|
||||
int acregmin, acregmax,
|
||||
acdirmin, acdirmax;
|
||||
int namlen;
|
||||
unsigned int bsize;
|
||||
unsigned int auth_flavor_len;
|
||||
rpc_authflavor_t auth_flavors[1];
|
||||
char *client_address;
|
||||
|
||||
struct {
|
||||
struct sockaddr_in address;
|
||||
char *hostname;
|
||||
unsigned int program;
|
||||
unsigned int version;
|
||||
unsigned short port;
|
||||
int protocol;
|
||||
} mount_server;
|
||||
|
||||
struct {
|
||||
struct sockaddr_in address;
|
||||
char *hostname;
|
||||
char *export_path;
|
||||
unsigned int program;
|
||||
int protocol;
|
||||
} nfs_server;
|
||||
};
|
||||
|
||||
/* client.c */
|
||||
extern struct rpc_program nfs_program;
|
||||
|
||||
extern void nfs_put_client(struct nfs_client *);
|
||||
extern struct nfs_client *nfs_find_client(const struct sockaddr_in *, int);
|
||||
extern struct nfs_server *nfs_create_server(const struct nfs_mount_data *,
|
||||
struct nfs_fh *);
|
||||
extern struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *,
|
||||
const char *,
|
||||
const struct sockaddr_in *,
|
||||
const char *,
|
||||
const char *,
|
||||
rpc_authflavor_t,
|
||||
struct nfs_fh *);
|
||||
extern struct nfs_server *nfs_create_server(
|
||||
const struct nfs_parsed_mount_data *,
|
||||
struct nfs_fh *);
|
||||
extern struct nfs_server *nfs4_create_server(
|
||||
const struct nfs_parsed_mount_data *,
|
||||
struct nfs_fh *);
|
||||
extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *,
|
||||
struct nfs_fh *);
|
||||
extern void nfs_free_server(struct nfs_server *server);
|
||||
|
@ -251,6 +251,7 @@ nfs_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
|
||||
replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readres_sz) << 2;
|
||||
xdr_inline_pages(&req->rq_rcv_buf, replen,
|
||||
args->pages, args->pgbase, count);
|
||||
req->rq_rcv_buf.flags |= XDRBUF_READ;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -271,7 +272,7 @@ nfs_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res)
|
||||
res->eof = 0;
|
||||
hdrlen = (u8 *) p - (u8 *) iov->iov_base;
|
||||
if (iov->iov_len < hdrlen) {
|
||||
printk(KERN_WARNING "NFS: READ reply header overflowed:"
|
||||
dprintk("NFS: READ reply header overflowed:"
|
||||
"length %d > %Zu\n", hdrlen, iov->iov_len);
|
||||
return -errno_NFSERR_IO;
|
||||
} else if (iov->iov_len != hdrlen) {
|
||||
@ -281,7 +282,7 @@ nfs_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res)
|
||||
|
||||
recvd = req->rq_rcv_buf.len - hdrlen;
|
||||
if (count > recvd) {
|
||||
printk(KERN_WARNING "NFS: server cheating in read reply: "
|
||||
dprintk("NFS: server cheating in read reply: "
|
||||
"count %d > recvd %d\n", count, recvd);
|
||||
count = recvd;
|
||||
}
|
||||
@ -313,6 +314,7 @@ nfs_xdr_writeargs(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args)
|
||||
|
||||
/* Copy the page array */
|
||||
xdr_encode_pages(sndbuf, args->pages, args->pgbase, count);
|
||||
sndbuf->flags |= XDRBUF_WRITE;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -431,7 +433,7 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy)
|
||||
|
||||
hdrlen = (u8 *) p - (u8 *) iov->iov_base;
|
||||
if (iov->iov_len < hdrlen) {
|
||||
printk(KERN_WARNING "NFS: READDIR reply header overflowed:"
|
||||
dprintk("NFS: READDIR reply header overflowed:"
|
||||
"length %d > %Zu\n", hdrlen, iov->iov_len);
|
||||
return -errno_NFSERR_IO;
|
||||
} else if (iov->iov_len != hdrlen) {
|
||||
@ -454,7 +456,7 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy)
|
||||
len = ntohl(*p++);
|
||||
p += XDR_QUADLEN(len) + 1; /* name plus cookie */
|
||||
if (len > NFS2_MAXNAMLEN) {
|
||||
printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)!\n",
|
||||
dprintk("NFS: giant filename in readdir (len 0x%x)!\n",
|
||||
len);
|
||||
goto err_unmap;
|
||||
}
|
||||
@ -471,7 +473,7 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy)
|
||||
entry[0] = entry[1] = 0;
|
||||
/* truncate listing ? */
|
||||
if (!nr) {
|
||||
printk(KERN_NOTICE "NFS: readdir reply truncated!\n");
|
||||
dprintk("NFS: readdir reply truncated!\n");
|
||||
entry[1] = 1;
|
||||
}
|
||||
goto out;
|
||||
@ -583,12 +585,12 @@ nfs_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, void *dummy)
|
||||
/* Convert length of symlink */
|
||||
len = ntohl(*p++);
|
||||
if (len >= rcvbuf->page_len || len <= 0) {
|
||||
dprintk(KERN_WARNING "nfs: server returned giant symlink!\n");
|
||||
dprintk("nfs: server returned giant symlink!\n");
|
||||
return -ENAMETOOLONG;
|
||||
}
|
||||
hdrlen = (u8 *) p - (u8 *) iov->iov_base;
|
||||
if (iov->iov_len < hdrlen) {
|
||||
printk(KERN_WARNING "NFS: READLINK reply header overflowed:"
|
||||
dprintk("NFS: READLINK reply header overflowed:"
|
||||
"length %d > %Zu\n", hdrlen, iov->iov_len);
|
||||
return -errno_NFSERR_IO;
|
||||
} else if (iov->iov_len != hdrlen) {
|
||||
@ -597,7 +599,7 @@ nfs_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, void *dummy)
|
||||
}
|
||||
recvd = req->rq_rcv_buf.len - hdrlen;
|
||||
if (recvd < len) {
|
||||
printk(KERN_WARNING "NFS: server cheating in readlink reply: "
|
||||
dprintk("NFS: server cheating in readlink reply: "
|
||||
"count %u > recvd %u\n", len, recvd);
|
||||
return -EIO;
|
||||
}
|
||||
@ -695,7 +697,7 @@ nfs_stat_to_errno(int stat)
|
||||
if (nfs_errtbl[i].stat == stat)
|
||||
return nfs_errtbl[i].errno;
|
||||
}
|
||||
printk(KERN_ERR "nfs_stat_to_errno: bad nfs status return value: %d\n", stat);
|
||||
dprintk("nfs_stat_to_errno: bad nfs status return value: %d\n", stat);
|
||||
return nfs_errtbl[i].errno;
|
||||
}
|
||||
|
||||
|
@ -317,13 +317,11 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
|
||||
}
|
||||
|
||||
dprintk("NFS call setacl\n");
|
||||
nfs_begin_data_update(inode);
|
||||
msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL];
|
||||
status = rpc_call_sync(server->client_acl, &msg, 0);
|
||||
spin_lock(&inode->i_lock);
|
||||
NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS;
|
||||
spin_unlock(&inode->i_lock);
|
||||
nfs_end_data_update(inode);
|
||||
dprintk("NFS reply setacl: %d\n", status);
|
||||
|
||||
/* pages may have been allocated at the xdr layer. */
|
||||
|
@ -166,6 +166,7 @@ nfs3_proc_lookup(struct inode *dir, struct qstr *name,
|
||||
nfs_fattr_init(&dir_attr);
|
||||
nfs_fattr_init(fattr);
|
||||
status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
|
||||
nfs_refresh_inode(dir, &dir_attr);
|
||||
if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR)) {
|
||||
msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR];
|
||||
msg.rpc_argp = fhandle;
|
||||
@ -173,8 +174,6 @@ nfs3_proc_lookup(struct inode *dir, struct qstr *name,
|
||||
status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
|
||||
}
|
||||
dprintk("NFS reply lookup: %d\n", status);
|
||||
if (status >= 0)
|
||||
status = nfs_refresh_inode(dir, &dir_attr);
|
||||
return status;
|
||||
}
|
||||
|
||||
@ -607,6 +606,9 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
|
||||
|
||||
nfs_fattr_init(&dir_attr);
|
||||
status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
|
||||
|
||||
nfs_invalidate_atime(dir);
|
||||
|
||||
nfs_refresh_inode(dir, &dir_attr);
|
||||
dprintk("NFS reply readdir: %d\n", status);
|
||||
return status;
|
||||
@ -724,9 +726,9 @@ static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data)
|
||||
{
|
||||
if (nfs3_async_handle_jukebox(task, data->inode))
|
||||
return -EAGAIN;
|
||||
/* Call back common NFS readpage processing */
|
||||
if (task->tk_status >= 0)
|
||||
nfs_refresh_inode(data->inode, &data->fattr);
|
||||
|
||||
nfs_invalidate_atime(data->inode);
|
||||
nfs_refresh_inode(data->inode, &data->fattr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -747,7 +749,7 @@ static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)
|
||||
if (nfs3_async_handle_jukebox(task, data->inode))
|
||||
return -EAGAIN;
|
||||
if (task->tk_status >= 0)
|
||||
nfs_post_op_update_inode(data->inode, data->res.fattr);
|
||||
nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -775,8 +777,7 @@ static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data)
|
||||
{
|
||||
if (nfs3_async_handle_jukebox(task, data->inode))
|
||||
return -EAGAIN;
|
||||
if (task->tk_status >= 0)
|
||||
nfs_post_op_update_inode(data->inode, data->res.fattr);
|
||||
nfs_refresh_inode(data->inode, data->res.fattr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -346,6 +346,7 @@ nfs3_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
|
||||
replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readres_sz) << 2;
|
||||
xdr_inline_pages(&req->rq_rcv_buf, replen,
|
||||
args->pages, args->pgbase, count);
|
||||
req->rq_rcv_buf.flags |= XDRBUF_READ;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -367,6 +368,7 @@ nfs3_xdr_writeargs(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args)
|
||||
|
||||
/* Copy the page array */
|
||||
xdr_encode_pages(sndbuf, args->pages, args->pgbase, count);
|
||||
sndbuf->flags |= XDRBUF_WRITE;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -524,7 +526,7 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res
|
||||
|
||||
hdrlen = (u8 *) p - (u8 *) iov->iov_base;
|
||||
if (iov->iov_len < hdrlen) {
|
||||
printk(KERN_WARNING "NFS: READDIR reply header overflowed:"
|
||||
dprintk("NFS: READDIR reply header overflowed:"
|
||||
"length %d > %Zu\n", hdrlen, iov->iov_len);
|
||||
return -errno_NFSERR_IO;
|
||||
} else if (iov->iov_len != hdrlen) {
|
||||
@ -547,7 +549,7 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res
|
||||
len = ntohl(*p++); /* string length */
|
||||
p += XDR_QUADLEN(len) + 2; /* name + cookie */
|
||||
if (len > NFS3_MAXNAMLEN) {
|
||||
printk(KERN_WARNING "NFS: giant filename in readdir (len %x)!\n",
|
||||
dprintk("NFS: giant filename in readdir (len %x)!\n",
|
||||
len);
|
||||
goto err_unmap;
|
||||
}
|
||||
@ -567,7 +569,7 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res
|
||||
goto short_pkt;
|
||||
len = ntohl(*p++);
|
||||
if (len > NFS3_FHSIZE) {
|
||||
printk(KERN_WARNING "NFS: giant filehandle in "
|
||||
dprintk("NFS: giant filehandle in "
|
||||
"readdir (len %x)!\n", len);
|
||||
goto err_unmap;
|
||||
}
|
||||
@ -588,7 +590,7 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res
|
||||
entry[0] = entry[1] = 0;
|
||||
/* truncate listing ? */
|
||||
if (!nr) {
|
||||
printk(KERN_NOTICE "NFS: readdir reply truncated!\n");
|
||||
dprintk("NFS: readdir reply truncated!\n");
|
||||
entry[1] = 1;
|
||||
}
|
||||
goto out;
|
||||
@ -826,22 +828,23 @@ nfs3_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
|
||||
/* Convert length of symlink */
|
||||
len = ntohl(*p++);
|
||||
if (len >= rcvbuf->page_len || len <= 0) {
|
||||
dprintk(KERN_WARNING "nfs: server returned giant symlink!\n");
|
||||
dprintk("nfs: server returned giant symlink!\n");
|
||||
return -ENAMETOOLONG;
|
||||
}
|
||||
|
||||
hdrlen = (u8 *) p - (u8 *) iov->iov_base;
|
||||
if (iov->iov_len < hdrlen) {
|
||||
printk(KERN_WARNING "NFS: READLINK reply header overflowed:"
|
||||
dprintk("NFS: READLINK reply header overflowed:"
|
||||
"length %d > %Zu\n", hdrlen, iov->iov_len);
|
||||
return -errno_NFSERR_IO;
|
||||
} else if (iov->iov_len != hdrlen) {
|
||||
dprintk("NFS: READLINK header is short. iovec will be shifted.\n");
|
||||
dprintk("NFS: READLINK header is short. "
|
||||
"iovec will be shifted.\n");
|
||||
xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen);
|
||||
}
|
||||
recvd = req->rq_rcv_buf.len - hdrlen;
|
||||
if (recvd < len) {
|
||||
printk(KERN_WARNING "NFS: server cheating in readlink reply: "
|
||||
dprintk("NFS: server cheating in readlink reply: "
|
||||
"count %u > recvd %u\n", len, recvd);
|
||||
return -EIO;
|
||||
}
|
||||
@ -876,13 +879,13 @@ nfs3_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res)
|
||||
ocount = ntohl(*p++);
|
||||
|
||||
if (ocount != count) {
|
||||
printk(KERN_WARNING "NFS: READ count doesn't match RPC opaque count.\n");
|
||||
dprintk("NFS: READ count doesn't match RPC opaque count.\n");
|
||||
return -errno_NFSERR_IO;
|
||||
}
|
||||
|
||||
hdrlen = (u8 *) p - (u8 *) iov->iov_base;
|
||||
if (iov->iov_len < hdrlen) {
|
||||
printk(KERN_WARNING "NFS: READ reply header overflowed:"
|
||||
dprintk("NFS: READ reply header overflowed:"
|
||||
"length %d > %Zu\n", hdrlen, iov->iov_len);
|
||||
return -errno_NFSERR_IO;
|
||||
} else if (iov->iov_len != hdrlen) {
|
||||
@ -892,7 +895,7 @@ nfs3_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res)
|
||||
|
||||
recvd = req->rq_rcv_buf.len - hdrlen;
|
||||
if (count > recvd) {
|
||||
printk(KERN_WARNING "NFS: server cheating in read reply: "
|
||||
dprintk("NFS: server cheating in read reply: "
|
||||
"count %d > recvd %d\n", count, recvd);
|
||||
count = recvd;
|
||||
res->eof = 0;
|
||||
|
@ -62,10 +62,8 @@ struct nfs4_opendata;
|
||||
static int _nfs4_proc_open(struct nfs4_opendata *data);
|
||||
static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
|
||||
static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *);
|
||||
static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry);
|
||||
static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception);
|
||||
static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp);
|
||||
static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openflags);
|
||||
static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
|
||||
static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
|
||||
|
||||
@ -177,7 +175,7 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent
|
||||
*p++ = xdr_one; /* bitmap length */
|
||||
*p++ = htonl(FATTR4_WORD0_FILEID); /* bitmap */
|
||||
*p++ = htonl(8); /* attribute buffer length */
|
||||
p = xdr_encode_hyper(p, dentry->d_inode->i_ino);
|
||||
p = xdr_encode_hyper(p, NFS_FILEID(dentry->d_inode));
|
||||
}
|
||||
|
||||
*p++ = xdr_one; /* next */
|
||||
@ -189,7 +187,7 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent
|
||||
*p++ = xdr_one; /* bitmap length */
|
||||
*p++ = htonl(FATTR4_WORD0_FILEID); /* bitmap */
|
||||
*p++ = htonl(8); /* attribute buffer length */
|
||||
p = xdr_encode_hyper(p, dentry->d_parent->d_inode->i_ino);
|
||||
p = xdr_encode_hyper(p, NFS_FILEID(dentry->d_parent->d_inode));
|
||||
|
||||
readdir->pgbase = (char *)p - (char *)start;
|
||||
readdir->count -= readdir->pgbase;
|
||||
@ -211,8 +209,9 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
|
||||
|
||||
spin_lock(&dir->i_lock);
|
||||
nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA;
|
||||
if (cinfo->before == nfsi->change_attr && cinfo->atomic)
|
||||
nfsi->change_attr = cinfo->after;
|
||||
if (!cinfo->atomic || cinfo->before != nfsi->change_attr)
|
||||
nfsi->cache_change_attribute = jiffies;
|
||||
nfsi->change_attr = cinfo->after;
|
||||
spin_unlock(&dir->i_lock);
|
||||
}
|
||||
|
||||
@ -454,7 +453,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
|
||||
memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data));
|
||||
rcu_read_unlock();
|
||||
lock_kernel();
|
||||
ret = _nfs4_do_access(state->inode, state->owner->so_cred, open_mode);
|
||||
ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode);
|
||||
unlock_kernel();
|
||||
if (ret != 0)
|
||||
goto out;
|
||||
@ -948,36 +947,6 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openflags)
|
||||
{
|
||||
struct nfs_access_entry cache;
|
||||
int mask = 0;
|
||||
int status;
|
||||
|
||||
if (openflags & FMODE_READ)
|
||||
mask |= MAY_READ;
|
||||
if (openflags & FMODE_WRITE)
|
||||
mask |= MAY_WRITE;
|
||||
if (openflags & FMODE_EXEC)
|
||||
mask |= MAY_EXEC;
|
||||
status = nfs_access_get_cached(inode, cred, &cache);
|
||||
if (status == 0)
|
||||
goto out;
|
||||
|
||||
/* Be clever: ask server to check for all possible rights */
|
||||
cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ;
|
||||
cache.cred = cred;
|
||||
cache.jiffies = jiffies;
|
||||
status = _nfs4_proc_access(inode, &cache);
|
||||
if (status != 0)
|
||||
return status;
|
||||
nfs_access_add_cache(inode, &cache);
|
||||
out:
|
||||
if ((cache.mask & mask) == mask)
|
||||
return 0;
|
||||
return -EACCES;
|
||||
}
|
||||
|
||||
static int nfs4_recover_expired_lease(struct nfs_server *server)
|
||||
{
|
||||
struct nfs_client *clp = server->nfs_client;
|
||||
@ -1381,7 +1350,7 @@ static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct
|
||||
|
||||
/* If the open_intent is for execute, we have an extra check to make */
|
||||
if (nd->intent.open.flags & FMODE_EXEC) {
|
||||
ret = _nfs4_do_access(state->inode,
|
||||
ret = nfs_may_open(state->inode,
|
||||
state->owner->so_cred,
|
||||
nd->intent.open.flags);
|
||||
if (ret < 0)
|
||||
@ -1390,7 +1359,7 @@ static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct
|
||||
filp = lookup_instantiate_filp(nd, path->dentry, NULL);
|
||||
if (!IS_ERR(filp)) {
|
||||
struct nfs_open_context *ctx;
|
||||
ctx = (struct nfs_open_context *)filp->private_data;
|
||||
ctx = nfs_file_open_context(filp);
|
||||
ctx->state = state;
|
||||
return 0;
|
||||
}
|
||||
@ -1428,13 +1397,16 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
|
||||
state = nfs4_do_open(dir, &path, nd->intent.open.flags, &attr, cred);
|
||||
put_rpccred(cred);
|
||||
if (IS_ERR(state)) {
|
||||
if (PTR_ERR(state) == -ENOENT)
|
||||
if (PTR_ERR(state) == -ENOENT) {
|
||||
d_add(dentry, NULL);
|
||||
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
|
||||
}
|
||||
return (struct dentry *)state;
|
||||
}
|
||||
res = d_add_unique(dentry, igrab(state->inode));
|
||||
if (res != NULL)
|
||||
path.dentry = res;
|
||||
nfs_set_verifier(path.dentry, nfs_save_change_attribute(dir));
|
||||
nfs4_intent_set_file(nd, &path, state);
|
||||
return res;
|
||||
}
|
||||
@ -1468,6 +1440,7 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st
|
||||
}
|
||||
}
|
||||
if (state->inode == dentry->d_inode) {
|
||||
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
|
||||
nfs4_intent_set_file(nd, &path, state);
|
||||
return 1;
|
||||
}
|
||||
@ -1757,10 +1730,16 @@ static int nfs4_proc_lookup(struct inode *dir, struct qstr *name, struct nfs_fh
|
||||
|
||||
static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry)
|
||||
{
|
||||
struct nfs_server *server = NFS_SERVER(inode);
|
||||
struct nfs_fattr fattr;
|
||||
struct nfs4_accessargs args = {
|
||||
.fh = NFS_FH(inode),
|
||||
.bitmask = server->attr_bitmask,
|
||||
};
|
||||
struct nfs4_accessres res = {
|
||||
.server = server,
|
||||
.fattr = &fattr,
|
||||
};
|
||||
struct nfs4_accessres res = { 0 };
|
||||
struct rpc_message msg = {
|
||||
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ACCESS],
|
||||
.rpc_argp = &args,
|
||||
@ -1786,6 +1765,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry
|
||||
if (mode & MAY_EXEC)
|
||||
args.access |= NFS4_ACCESS_EXECUTE;
|
||||
}
|
||||
nfs_fattr_init(&fattr);
|
||||
status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
|
||||
if (!status) {
|
||||
entry->mask = 0;
|
||||
@ -1795,6 +1775,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry
|
||||
entry->mask |= MAY_WRITE;
|
||||
if (res.access & (NFS4_ACCESS_LOOKUP|NFS4_ACCESS_EXECUTE))
|
||||
entry->mask |= MAY_EXEC;
|
||||
nfs_refresh_inode(inode, &fattr);
|
||||
}
|
||||
return status;
|
||||
}
|
||||
@ -1900,11 +1881,13 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
|
||||
}
|
||||
state = nfs4_do_open(dir, &path, flags, sattr, cred);
|
||||
put_rpccred(cred);
|
||||
d_drop(dentry);
|
||||
if (IS_ERR(state)) {
|
||||
status = PTR_ERR(state);
|
||||
goto out;
|
||||
}
|
||||
d_instantiate(dentry, igrab(state->inode));
|
||||
d_add(dentry, igrab(state->inode));
|
||||
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
|
||||
if (flags & O_EXCL) {
|
||||
struct nfs_fattr fattr;
|
||||
status = nfs4_do_setattr(state->inode, &fattr, sattr, state);
|
||||
@ -2218,6 +2201,9 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
|
||||
status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
|
||||
if (status == 0)
|
||||
memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE);
|
||||
|
||||
nfs_invalidate_atime(dir);
|
||||
|
||||
dprintk("%s: returns %d\n", __FUNCTION__, status);
|
||||
return status;
|
||||
}
|
||||
@ -2414,6 +2400,8 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
|
||||
rpc_restart_call(task);
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
nfs_invalidate_atime(data->inode);
|
||||
if (task->tk_status > 0)
|
||||
renew_lease(server, data->timestamp);
|
||||
return 0;
|
||||
@ -2443,7 +2431,7 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
|
||||
}
|
||||
if (task->tk_status >= 0) {
|
||||
renew_lease(NFS_SERVER(inode), data->timestamp);
|
||||
nfs_post_op_update_inode(inode, data->res.fattr);
|
||||
nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@ -2485,8 +2473,7 @@ static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
|
||||
rpc_restart_call(task);
|
||||
return -EAGAIN;
|
||||
}
|
||||
if (task->tk_status >= 0)
|
||||
nfs_post_op_update_inode(inode, data->res.fattr);
|
||||
nfs_refresh_inode(inode, data->res.fattr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -3056,7 +3043,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
|
||||
if (status == 0) {
|
||||
status = data->rpc_status;
|
||||
if (status == 0)
|
||||
nfs_post_op_update_inode(inode, &data->fattr);
|
||||
nfs_refresh_inode(inode, &data->fattr);
|
||||
}
|
||||
rpc_put_task(task);
|
||||
return status;
|
||||
@ -3303,7 +3290,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
|
||||
status = -ENOMEM;
|
||||
if (seqid == NULL)
|
||||
goto out;
|
||||
task = nfs4_do_unlck(request, request->fl_file->private_data, lsp, seqid);
|
||||
task = nfs4_do_unlck(request, nfs_file_open_context(request->fl_file), lsp, seqid);
|
||||
status = PTR_ERR(task);
|
||||
if (IS_ERR(task))
|
||||
goto out;
|
||||
@ -3447,7 +3434,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
|
||||
int ret;
|
||||
|
||||
dprintk("%s: begin!\n", __FUNCTION__);
|
||||
data = nfs4_alloc_lockdata(fl, fl->fl_file->private_data,
|
||||
data = nfs4_alloc_lockdata(fl, nfs_file_open_context(fl->fl_file),
|
||||
fl->fl_u.nfs4_fl.owner);
|
||||
if (data == NULL)
|
||||
return -ENOMEM;
|
||||
@ -3573,7 +3560,7 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request)
|
||||
int status;
|
||||
|
||||
/* verify open state */
|
||||
ctx = (struct nfs_open_context *)filp->private_data;
|
||||
ctx = nfs_file_open_context(filp);
|
||||
state = ctx->state;
|
||||
|
||||
if (request->fl_start < 0 || request->fl_end < 0)
|
||||
|
@ -774,7 +774,7 @@ static int nfs4_reclaim_locks(struct nfs4_state_recovery_ops *ops, struct nfs4_s
|
||||
for (fl = inode->i_flock; fl != 0; fl = fl->fl_next) {
|
||||
if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK)))
|
||||
continue;
|
||||
if (((struct nfs_open_context *)fl->fl_file->private_data)->state != state)
|
||||
if (nfs_file_open_context(fl->fl_file)->state != state)
|
||||
continue;
|
||||
status = ops->recover_lock(state, fl);
|
||||
if (status >= 0)
|
||||
|
@ -376,10 +376,12 @@ static int nfs4_stat_to_errno(int);
|
||||
decode_locku_maxsz)
|
||||
#define NFS4_enc_access_sz (compound_encode_hdr_maxsz + \
|
||||
encode_putfh_maxsz + \
|
||||
encode_access_maxsz)
|
||||
encode_access_maxsz + \
|
||||
encode_getattr_maxsz)
|
||||
#define NFS4_dec_access_sz (compound_decode_hdr_maxsz + \
|
||||
decode_putfh_maxsz + \
|
||||
decode_access_maxsz)
|
||||
decode_access_maxsz + \
|
||||
decode_getattr_maxsz)
|
||||
#define NFS4_enc_getattr_sz (compound_encode_hdr_maxsz + \
|
||||
encode_putfh_maxsz + \
|
||||
encode_getattr_maxsz)
|
||||
@ -562,7 +564,6 @@ struct compound_hdr {
|
||||
|
||||
#define RESERVE_SPACE(nbytes) do { \
|
||||
p = xdr_reserve_space(xdr, nbytes); \
|
||||
if (!p) printk("RESERVE_SPACE(%d) failed in function %s\n", (int) (nbytes), __FUNCTION__); \
|
||||
BUG_ON(!p); \
|
||||
} while (0)
|
||||
|
||||
@ -628,8 +629,8 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s
|
||||
if (iap->ia_valid & ATTR_UID) {
|
||||
owner_namelen = nfs_map_uid_to_name(server->nfs_client, iap->ia_uid, owner_name);
|
||||
if (owner_namelen < 0) {
|
||||
printk(KERN_WARNING "nfs: couldn't resolve uid %d to string\n",
|
||||
iap->ia_uid);
|
||||
dprintk("nfs: couldn't resolve uid %d to string\n",
|
||||
iap->ia_uid);
|
||||
/* XXX */
|
||||
strcpy(owner_name, "nobody");
|
||||
owner_namelen = sizeof("nobody") - 1;
|
||||
@ -640,8 +641,8 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s
|
||||
if (iap->ia_valid & ATTR_GID) {
|
||||
owner_grouplen = nfs_map_gid_to_group(server->nfs_client, iap->ia_gid, owner_group);
|
||||
if (owner_grouplen < 0) {
|
||||
printk(KERN_WARNING "nfs4: couldn't resolve gid %d to string\n",
|
||||
iap->ia_gid);
|
||||
dprintk("nfs: couldn't resolve gid %d to string\n",
|
||||
iap->ia_gid);
|
||||
strcpy(owner_group, "nobody");
|
||||
owner_grouplen = sizeof("nobody") - 1;
|
||||
/* goto out; */
|
||||
@ -711,7 +712,7 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s
|
||||
* Now we backfill the bitmap and the attribute buffer length.
|
||||
*/
|
||||
if (len != ((char *)p - (char *)q) + 4) {
|
||||
printk ("encode_attr: Attr length calculation error! %u != %Zu\n",
|
||||
printk(KERN_ERR "nfs: Attr length error, %u != %Zu\n",
|
||||
len, ((char *)p - (char *)q) + 4);
|
||||
BUG();
|
||||
}
|
||||
@ -1376,14 +1377,20 @@ static int nfs4_xdr_enc_access(struct rpc_rqst *req, __be32 *p, const struct nfs
|
||||
{
|
||||
struct xdr_stream xdr;
|
||||
struct compound_hdr hdr = {
|
||||
.nops = 2,
|
||||
.nops = 3,
|
||||
};
|
||||
int status;
|
||||
|
||||
xdr_init_encode(&xdr, &req->rq_snd_buf, p);
|
||||
encode_compound_hdr(&xdr, &hdr);
|
||||
if ((status = encode_putfh(&xdr, args->fh)) == 0)
|
||||
status = encode_access(&xdr, args->access);
|
||||
status = encode_putfh(&xdr, args->fh);
|
||||
if (status != 0)
|
||||
goto out;
|
||||
status = encode_access(&xdr, args->access);
|
||||
if (status != 0)
|
||||
goto out;
|
||||
status = encode_getfattr(&xdr, args->bitmask);
|
||||
out:
|
||||
return status;
|
||||
}
|
||||
|
||||
@ -1857,6 +1864,7 @@ static int nfs4_xdr_enc_read(struct rpc_rqst *req, __be32 *p, struct nfs_readarg
|
||||
replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_read_sz) << 2;
|
||||
xdr_inline_pages(&req->rq_rcv_buf, replen,
|
||||
args->pages, args->pgbase, args->count);
|
||||
req->rq_rcv_buf.flags |= XDRBUF_READ;
|
||||
out:
|
||||
return status;
|
||||
}
|
||||
@ -1933,6 +1941,7 @@ static int nfs4_xdr_enc_write(struct rpc_rqst *req, __be32 *p, struct nfs_writea
|
||||
status = encode_write(&xdr, args);
|
||||
if (status)
|
||||
goto out;
|
||||
req->rq_snd_buf.flags |= XDRBUF_WRITE;
|
||||
status = encode_getfattr(&xdr, args->bitmask);
|
||||
out:
|
||||
return status;
|
||||
@ -2180,9 +2189,9 @@ out:
|
||||
#define READ_BUF(nbytes) do { \
|
||||
p = xdr_inline_decode(xdr, nbytes); \
|
||||
if (unlikely(!p)) { \
|
||||
printk(KERN_INFO "%s: prematurely hit end of receive" \
|
||||
dprintk("nfs: %s: prematurely hit end of receive" \
|
||||
" buffer\n", __FUNCTION__); \
|
||||
printk(KERN_INFO "%s: xdr->p=%p, bytes=%u, xdr->end=%p\n", \
|
||||
dprintk("nfs: %s: xdr->p=%p, bytes=%u, xdr->end=%p\n", \
|
||||
__FUNCTION__, xdr->p, nbytes, xdr->end); \
|
||||
return -EIO; \
|
||||
} \
|
||||
@ -2223,9 +2232,8 @@ static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
|
||||
READ_BUF(8);
|
||||
READ32(opnum);
|
||||
if (opnum != expected) {
|
||||
printk(KERN_NOTICE
|
||||
"nfs4_decode_op_hdr: Server returned operation"
|
||||
" %d but we issued a request for %d\n",
|
||||
dprintk("nfs: Server returned operation"
|
||||
" %d but we issued a request for %d\n",
|
||||
opnum, expected);
|
||||
return -EIO;
|
||||
}
|
||||
@ -2758,7 +2766,7 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nf
|
||||
dprintk("%s: nfs_map_name_to_uid failed!\n",
|
||||
__FUNCTION__);
|
||||
} else
|
||||
printk(KERN_WARNING "%s: name too long (%u)!\n",
|
||||
dprintk("%s: name too long (%u)!\n",
|
||||
__FUNCTION__, len);
|
||||
bitmap[1] &= ~FATTR4_WORD1_OWNER;
|
||||
}
|
||||
@ -2783,7 +2791,7 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nf
|
||||
dprintk("%s: nfs_map_group_to_gid failed!\n",
|
||||
__FUNCTION__);
|
||||
} else
|
||||
printk(KERN_WARNING "%s: name too long (%u)!\n",
|
||||
dprintk("%s: name too long (%u)!\n",
|
||||
__FUNCTION__, len);
|
||||
bitmap[1] &= ~FATTR4_WORD1_OWNER_GROUP;
|
||||
}
|
||||
@ -2950,7 +2958,8 @@ static int verify_attr_len(struct xdr_stream *xdr, __be32 *savep, uint32_t attrl
|
||||
unsigned int nwords = xdr->p - savep;
|
||||
|
||||
if (unlikely(attrwords != nwords)) {
|
||||
printk(KERN_WARNING "%s: server returned incorrect attribute length: %u %c %u\n",
|
||||
dprintk("%s: server returned incorrect attribute length: "
|
||||
"%u %c %u\n",
|
||||
__FUNCTION__,
|
||||
attrwords << 2,
|
||||
(attrwords < nwords) ? '<' : '>',
|
||||
@ -3451,7 +3460,7 @@ static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_
|
||||
hdrlen = (u8 *) p - (u8 *) iov->iov_base;
|
||||
recvd = req->rq_rcv_buf.len - hdrlen;
|
||||
if (count > recvd) {
|
||||
printk(KERN_WARNING "NFS: server cheating in read reply: "
|
||||
dprintk("NFS: server cheating in read reply: "
|
||||
"count %u > recvd %u\n", count, recvd);
|
||||
count = recvd;
|
||||
eof = 0;
|
||||
@ -3500,7 +3509,8 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
|
||||
p += 2; /* cookie */
|
||||
len = ntohl(*p++); /* filename length */
|
||||
if (len > NFS4_MAXNAMLEN) {
|
||||
printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)\n", len);
|
||||
dprintk("NFS: giant filename in readdir (len 0x%x)\n",
|
||||
len);
|
||||
goto err_unmap;
|
||||
}
|
||||
xlen = XDR_QUADLEN(len);
|
||||
@ -3528,7 +3538,7 @@ short_pkt:
|
||||
entry[0] = entry[1] = 0;
|
||||
/* truncate listing ? */
|
||||
if (!nr) {
|
||||
printk(KERN_NOTICE "NFS: readdir reply truncated!\n");
|
||||
dprintk("NFS: readdir reply truncated!\n");
|
||||
entry[1] = 1;
|
||||
}
|
||||
goto out;
|
||||
@ -3554,13 +3564,13 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
|
||||
READ_BUF(4);
|
||||
READ32(len);
|
||||
if (len >= rcvbuf->page_len || len <= 0) {
|
||||
dprintk(KERN_WARNING "nfs: server returned giant symlink!\n");
|
||||
dprintk("nfs: server returned giant symlink!\n");
|
||||
return -ENAMETOOLONG;
|
||||
}
|
||||
hdrlen = (char *) xdr->p - (char *) iov->iov_base;
|
||||
recvd = req->rq_rcv_buf.len - hdrlen;
|
||||
if (recvd < len) {
|
||||
printk(KERN_WARNING "NFS: server cheating in readlink reply: "
|
||||
dprintk("NFS: server cheating in readlink reply: "
|
||||
"count %u > recvd %u\n", len, recvd);
|
||||
return -EIO;
|
||||
}
|
||||
@ -3643,7 +3653,7 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
|
||||
hdrlen = (u8 *)xdr->p - (u8 *)iov->iov_base;
|
||||
recvd = req->rq_rcv_buf.len - hdrlen;
|
||||
if (attrlen > recvd) {
|
||||
printk(KERN_WARNING "NFS: server cheating in getattr"
|
||||
dprintk("NFS: server cheating in getattr"
|
||||
" acl reply: attrlen %u > recvd %u\n",
|
||||
attrlen, recvd);
|
||||
return -EINVAL;
|
||||
@ -3688,8 +3698,7 @@ static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp)
|
||||
READ_BUF(8);
|
||||
READ32(opnum);
|
||||
if (opnum != OP_SETCLIENTID) {
|
||||
printk(KERN_NOTICE
|
||||
"nfs4_decode_setclientid: Server returned operation"
|
||||
dprintk("nfs: decode_setclientid: Server returned operation"
|
||||
" %d\n", opnum);
|
||||
return -EIO;
|
||||
}
|
||||
@ -3783,8 +3792,13 @@ static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_ac
|
||||
xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
|
||||
if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
|
||||
goto out;
|
||||
if ((status = decode_putfh(&xdr)) == 0)
|
||||
status = decode_access(&xdr, res);
|
||||
status = decode_putfh(&xdr);
|
||||
if (status != 0)
|
||||
goto out;
|
||||
status = decode_access(&xdr, res);
|
||||
if (status != 0)
|
||||
goto out;
|
||||
decode_getfattr(&xdr, res->fattr, res->server);
|
||||
out:
|
||||
return status;
|
||||
}
|
||||
|
@ -76,6 +76,7 @@
|
||||
#include <linux/fs.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/sunrpc/clnt.h>
|
||||
#include <linux/sunrpc/xprtsock.h>
|
||||
#include <linux/nfs.h>
|
||||
#include <linux/nfs_fs.h>
|
||||
#include <linux/nfs_mount.h>
|
||||
@ -491,7 +492,7 @@ static int __init root_nfs_get_handle(void)
|
||||
struct sockaddr_in sin;
|
||||
int status;
|
||||
int protocol = (nfs_data.flags & NFS_MOUNT_TCP) ?
|
||||
IPPROTO_TCP : IPPROTO_UDP;
|
||||
XPRT_TRANSPORT_TCP : XPRT_TRANSPORT_UDP;
|
||||
int version = (nfs_data.flags & NFS_MOUNT_VER3) ?
|
||||
NFS_MNT3_VERSION : NFS_MNT_VERSION;
|
||||
|
||||
|
@ -476,6 +476,8 @@ nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
|
||||
dprintk("NFS call readdir %d\n", (unsigned int)cookie);
|
||||
status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
|
||||
|
||||
nfs_invalidate_atime(dir);
|
||||
|
||||
dprintk("NFS reply readdir: %d\n", status);
|
||||
return status;
|
||||
}
|
||||
@ -550,6 +552,7 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
|
||||
|
||||
static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data)
|
||||
{
|
||||
nfs_invalidate_atime(data->inode);
|
||||
if (task->tk_status >= 0) {
|
||||
nfs_refresh_inode(data->inode, data->res.fattr);
|
||||
/* Emulate the eof flag, which isn't normally needed in NFSv2
|
||||
@ -576,7 +579,7 @@ static void nfs_proc_read_setup(struct nfs_read_data *data)
|
||||
static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)
|
||||
{
|
||||
if (task->tk_status >= 0)
|
||||
nfs_post_op_update_inode(data->inode, data->res.fattr);
|
||||
nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -341,9 +341,6 @@ int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
|
||||
set_bit(NFS_INO_STALE, &NFS_FLAGS(data->inode));
|
||||
nfs_mark_for_revalidate(data->inode);
|
||||
}
|
||||
spin_lock(&data->inode->i_lock);
|
||||
NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME;
|
||||
spin_unlock(&data->inode->i_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -497,8 +494,7 @@ int nfs_readpage(struct file *file, struct page *page)
|
||||
if (ctx == NULL)
|
||||
goto out_unlock;
|
||||
} else
|
||||
ctx = get_nfs_open_context((struct nfs_open_context *)
|
||||
file->private_data);
|
||||
ctx = get_nfs_open_context(nfs_file_open_context(file));
|
||||
|
||||
error = nfs_readpage_async(ctx, inode, page);
|
||||
|
||||
@ -576,8 +572,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
|
||||
if (desc.ctx == NULL)
|
||||
return -EBADF;
|
||||
} else
|
||||
desc.ctx = get_nfs_open_context((struct nfs_open_context *)
|
||||
filp->private_data);
|
||||
desc.ctx = get_nfs_open_context(nfs_file_open_context(filp));
|
||||
if (rsize < PAGE_CACHE_SIZE)
|
||||
nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0);
|
||||
else
|
||||
|
389
fs/nfs/super.c
389
fs/nfs/super.c
@ -33,6 +33,8 @@
|
||||
#include <linux/sunrpc/clnt.h>
|
||||
#include <linux/sunrpc/stats.h>
|
||||
#include <linux/sunrpc/metrics.h>
|
||||
#include <linux/sunrpc/xprtsock.h>
|
||||
#include <linux/sunrpc/xprtrdma.h>
|
||||
#include <linux/nfs_fs.h>
|
||||
#include <linux/nfs_mount.h>
|
||||
#include <linux/nfs4_mount.h>
|
||||
@ -58,36 +60,6 @@
|
||||
|
||||
#define NFSDBG_FACILITY NFSDBG_VFS
|
||||
|
||||
|
||||
struct nfs_parsed_mount_data {
|
||||
int flags;
|
||||
int rsize, wsize;
|
||||
int timeo, retrans;
|
||||
int acregmin, acregmax,
|
||||
acdirmin, acdirmax;
|
||||
int namlen;
|
||||
unsigned int bsize;
|
||||
unsigned int auth_flavor_len;
|
||||
rpc_authflavor_t auth_flavors[1];
|
||||
char *client_address;
|
||||
|
||||
struct {
|
||||
struct sockaddr_in address;
|
||||
unsigned int program;
|
||||
unsigned int version;
|
||||
unsigned short port;
|
||||
int protocol;
|
||||
} mount_server;
|
||||
|
||||
struct {
|
||||
struct sockaddr_in address;
|
||||
char *hostname;
|
||||
char *export_path;
|
||||
unsigned int program;
|
||||
int protocol;
|
||||
} nfs_server;
|
||||
};
|
||||
|
||||
enum {
|
||||
/* Mount options that take no arguments */
|
||||
Opt_soft, Opt_hard,
|
||||
@ -97,7 +69,7 @@ enum {
|
||||
Opt_ac, Opt_noac,
|
||||
Opt_lock, Opt_nolock,
|
||||
Opt_v2, Opt_v3,
|
||||
Opt_udp, Opt_tcp,
|
||||
Opt_udp, Opt_tcp, Opt_rdma,
|
||||
Opt_acl, Opt_noacl,
|
||||
Opt_rdirplus, Opt_nordirplus,
|
||||
Opt_sharecache, Opt_nosharecache,
|
||||
@ -116,7 +88,7 @@ enum {
|
||||
|
||||
/* Mount options that take string arguments */
|
||||
Opt_sec, Opt_proto, Opt_mountproto,
|
||||
Opt_addr, Opt_mounthost, Opt_clientaddr,
|
||||
Opt_addr, Opt_mountaddr, Opt_clientaddr,
|
||||
|
||||
/* Mount options that are ignored */
|
||||
Opt_userspace, Opt_deprecated,
|
||||
@ -143,6 +115,7 @@ static match_table_t nfs_mount_option_tokens = {
|
||||
{ Opt_v3, "v3" },
|
||||
{ Opt_udp, "udp" },
|
||||
{ Opt_tcp, "tcp" },
|
||||
{ Opt_rdma, "rdma" },
|
||||
{ Opt_acl, "acl" },
|
||||
{ Opt_noacl, "noacl" },
|
||||
{ Opt_rdirplus, "rdirplus" },
|
||||
@ -175,13 +148,14 @@ static match_table_t nfs_mount_option_tokens = {
|
||||
{ Opt_mountproto, "mountproto=%s" },
|
||||
{ Opt_addr, "addr=%s" },
|
||||
{ Opt_clientaddr, "clientaddr=%s" },
|
||||
{ Opt_mounthost, "mounthost=%s" },
|
||||
{ Opt_userspace, "mounthost=%s" },
|
||||
{ Opt_mountaddr, "mountaddr=%s" },
|
||||
|
||||
{ Opt_err, NULL }
|
||||
};
|
||||
|
||||
enum {
|
||||
Opt_xprt_udp, Opt_xprt_tcp,
|
||||
Opt_xprt_udp, Opt_xprt_tcp, Opt_xprt_rdma,
|
||||
|
||||
Opt_xprt_err
|
||||
};
|
||||
@ -189,6 +163,7 @@ enum {
|
||||
static match_table_t nfs_xprt_protocol_tokens = {
|
||||
{ Opt_xprt_udp, "udp" },
|
||||
{ Opt_xprt_tcp, "tcp" },
|
||||
{ Opt_xprt_rdma, "rdma" },
|
||||
|
||||
{ Opt_xprt_err, NULL }
|
||||
};
|
||||
@ -449,7 +424,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
|
||||
const char *nostr;
|
||||
} nfs_info[] = {
|
||||
{ NFS_MOUNT_SOFT, ",soft", ",hard" },
|
||||
{ NFS_MOUNT_INTR, ",intr", "" },
|
||||
{ NFS_MOUNT_INTR, ",intr", ",nointr" },
|
||||
{ NFS_MOUNT_NOCTO, ",nocto", "" },
|
||||
{ NFS_MOUNT_NOAC, ",noac", "" },
|
||||
{ NFS_MOUNT_NONLM, ",nolock", "" },
|
||||
@ -460,8 +435,6 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
|
||||
};
|
||||
const struct proc_nfs_info *nfs_infop;
|
||||
struct nfs_client *clp = nfss->nfs_client;
|
||||
char buf[12];
|
||||
const char *proto;
|
||||
|
||||
seq_printf(m, ",vers=%d", clp->rpc_ops->version);
|
||||
seq_printf(m, ",rsize=%d", nfss->rsize);
|
||||
@ -480,18 +453,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
|
||||
else
|
||||
seq_puts(m, nfs_infop->nostr);
|
||||
}
|
||||
switch (nfss->client->cl_xprt->prot) {
|
||||
case IPPROTO_TCP:
|
||||
proto = "tcp";
|
||||
break;
|
||||
case IPPROTO_UDP:
|
||||
proto = "udp";
|
||||
break;
|
||||
default:
|
||||
snprintf(buf, sizeof(buf), "%u", nfss->client->cl_xprt->prot);
|
||||
proto = buf;
|
||||
}
|
||||
seq_printf(m, ",proto=%s", proto);
|
||||
seq_printf(m, ",proto=%s",
|
||||
rpc_peeraddr2str(nfss->client, RPC_DISPLAY_PROTO));
|
||||
seq_printf(m, ",timeo=%lu", 10U * clp->retrans_timeo / HZ);
|
||||
seq_printf(m, ",retrans=%u", clp->retrans_count);
|
||||
seq_printf(m, ",sec=%s", nfs_pseudoflavour_to_name(nfss->client->cl_auth->au_flavor));
|
||||
@ -506,8 +469,8 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
|
||||
|
||||
nfs_show_mount_options(m, nfss, 0);
|
||||
|
||||
seq_puts(m, ",addr=");
|
||||
seq_escape(m, nfss->nfs_client->cl_hostname, " \t\n\\");
|
||||
seq_printf(m, ",addr="NIPQUAD_FMT,
|
||||
NIPQUAD(nfss->nfs_client->cl_addr.sin_addr));
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -698,13 +661,19 @@ static int nfs_parse_mount_options(char *raw,
|
||||
break;
|
||||
case Opt_udp:
|
||||
mnt->flags &= ~NFS_MOUNT_TCP;
|
||||
mnt->nfs_server.protocol = IPPROTO_UDP;
|
||||
mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP;
|
||||
mnt->timeo = 7;
|
||||
mnt->retrans = 5;
|
||||
break;
|
||||
case Opt_tcp:
|
||||
mnt->flags |= NFS_MOUNT_TCP;
|
||||
mnt->nfs_server.protocol = IPPROTO_TCP;
|
||||
mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP;
|
||||
mnt->timeo = 600;
|
||||
mnt->retrans = 2;
|
||||
break;
|
||||
case Opt_rdma:
|
||||
mnt->flags |= NFS_MOUNT_TCP; /* for side protocols */
|
||||
mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA;
|
||||
mnt->timeo = 600;
|
||||
mnt->retrans = 2;
|
||||
break;
|
||||
@ -913,13 +882,20 @@ static int nfs_parse_mount_options(char *raw,
|
||||
switch (token) {
|
||||
case Opt_xprt_udp:
|
||||
mnt->flags &= ~NFS_MOUNT_TCP;
|
||||
mnt->nfs_server.protocol = IPPROTO_UDP;
|
||||
mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP;
|
||||
mnt->timeo = 7;
|
||||
mnt->retrans = 5;
|
||||
break;
|
||||
case Opt_xprt_tcp:
|
||||
mnt->flags |= NFS_MOUNT_TCP;
|
||||
mnt->nfs_server.protocol = IPPROTO_TCP;
|
||||
mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP;
|
||||
mnt->timeo = 600;
|
||||
mnt->retrans = 2;
|
||||
break;
|
||||
case Opt_xprt_rdma:
|
||||
/* vector side protocols to TCP */
|
||||
mnt->flags |= NFS_MOUNT_TCP;
|
||||
mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA;
|
||||
mnt->timeo = 600;
|
||||
mnt->retrans = 2;
|
||||
break;
|
||||
@ -937,11 +913,12 @@ static int nfs_parse_mount_options(char *raw,
|
||||
|
||||
switch (token) {
|
||||
case Opt_xprt_udp:
|
||||
mnt->mount_server.protocol = IPPROTO_UDP;
|
||||
mnt->mount_server.protocol = XPRT_TRANSPORT_UDP;
|
||||
break;
|
||||
case Opt_xprt_tcp:
|
||||
mnt->mount_server.protocol = IPPROTO_TCP;
|
||||
mnt->mount_server.protocol = XPRT_TRANSPORT_TCP;
|
||||
break;
|
||||
case Opt_xprt_rdma: /* not used for side protocols */
|
||||
default:
|
||||
goto out_unrec_xprt;
|
||||
}
|
||||
@ -961,7 +938,7 @@ static int nfs_parse_mount_options(char *raw,
|
||||
goto out_nomem;
|
||||
mnt->client_address = string;
|
||||
break;
|
||||
case Opt_mounthost:
|
||||
case Opt_mountaddr:
|
||||
string = match_strdup(args);
|
||||
if (string == NULL)
|
||||
goto out_nomem;
|
||||
@ -1027,16 +1004,10 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args,
|
||||
sin = args->mount_server.address;
|
||||
else
|
||||
sin = args->nfs_server.address;
|
||||
if (args->mount_server.port == 0) {
|
||||
status = rpcb_getport_sync(&sin,
|
||||
args->mount_server.program,
|
||||
args->mount_server.version,
|
||||
args->mount_server.protocol);
|
||||
if (status < 0)
|
||||
goto out_err;
|
||||
sin.sin_port = htons(status);
|
||||
} else
|
||||
sin.sin_port = htons(args->mount_server.port);
|
||||
/*
|
||||
* autobind will be used if mount_server.port == 0
|
||||
*/
|
||||
sin.sin_port = htons(args->mount_server.port);
|
||||
|
||||
/*
|
||||
* Now ask the mount server to map our export path
|
||||
@ -1049,14 +1020,11 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args,
|
||||
args->mount_server.version,
|
||||
args->mount_server.protocol,
|
||||
root_fh);
|
||||
if (status < 0)
|
||||
goto out_err;
|
||||
if (status == 0)
|
||||
return 0;
|
||||
|
||||
return status;
|
||||
|
||||
out_err:
|
||||
dfprintk(MOUNT, "NFS: unable to contact server on host "
|
||||
NIPQUAD_FMT "\n", NIPQUAD(sin.sin_addr.s_addr));
|
||||
dfprintk(MOUNT, "NFS: unable to mount server " NIPQUAD_FMT
|
||||
", error %d\n", NIPQUAD(sin.sin_addr.s_addr), status);
|
||||
return status;
|
||||
}
|
||||
|
||||
@ -1079,15 +1047,31 @@ out_err:
|
||||
* XXX: as far as I can tell, changing the NFS program number is not
|
||||
* supported in the NFS client.
|
||||
*/
|
||||
static int nfs_validate_mount_data(struct nfs_mount_data **options,
|
||||
static int nfs_validate_mount_data(void *options,
|
||||
struct nfs_parsed_mount_data *args,
|
||||
struct nfs_fh *mntfh,
|
||||
const char *dev_name)
|
||||
{
|
||||
struct nfs_mount_data *data = *options;
|
||||
struct nfs_mount_data *data = (struct nfs_mount_data *)options;
|
||||
|
||||
if (data == NULL)
|
||||
goto out_no_data;
|
||||
|
||||
memset(args, 0, sizeof(*args));
|
||||
args->flags = (NFS_MOUNT_VER3 | NFS_MOUNT_TCP);
|
||||
args->rsize = NFS_MAX_FILE_IO_SIZE;
|
||||
args->wsize = NFS_MAX_FILE_IO_SIZE;
|
||||
args->timeo = 600;
|
||||
args->retrans = 2;
|
||||
args->acregmin = 3;
|
||||
args->acregmax = 60;
|
||||
args->acdirmin = 30;
|
||||
args->acdirmax = 60;
|
||||
args->mount_server.protocol = XPRT_TRANSPORT_UDP;
|
||||
args->mount_server.program = NFS_MNT_PROGRAM;
|
||||
args->nfs_server.protocol = XPRT_TRANSPORT_TCP;
|
||||
args->nfs_server.program = NFS_PROGRAM;
|
||||
|
||||
switch (data->version) {
|
||||
case 1:
|
||||
data->namlen = 0;
|
||||
@ -1116,92 +1100,73 @@ static int nfs_validate_mount_data(struct nfs_mount_data **options,
|
||||
if (mntfh->size < sizeof(mntfh->data))
|
||||
memset(mntfh->data + mntfh->size, 0,
|
||||
sizeof(mntfh->data) - mntfh->size);
|
||||
|
||||
if (!nfs_verify_server_address((struct sockaddr *) &data->addr))
|
||||
goto out_no_address;
|
||||
|
||||
/*
|
||||
* Translate to nfs_parsed_mount_data, which nfs_fill_super
|
||||
* can deal with.
|
||||
*/
|
||||
args->flags = data->flags;
|
||||
args->rsize = data->rsize;
|
||||
args->wsize = data->wsize;
|
||||
args->flags = data->flags;
|
||||
args->timeo = data->timeo;
|
||||
args->retrans = data->retrans;
|
||||
args->acregmin = data->acregmin;
|
||||
args->acregmax = data->acregmax;
|
||||
args->acdirmin = data->acdirmin;
|
||||
args->acdirmax = data->acdirmax;
|
||||
args->nfs_server.address = data->addr;
|
||||
if (!(data->flags & NFS_MOUNT_TCP))
|
||||
args->nfs_server.protocol = XPRT_TRANSPORT_UDP;
|
||||
/* N.B. caller will free nfs_server.hostname in all cases */
|
||||
args->nfs_server.hostname = kstrdup(data->hostname, GFP_KERNEL);
|
||||
args->namlen = data->namlen;
|
||||
args->bsize = data->bsize;
|
||||
args->auth_flavors[0] = data->pseudoflavor;
|
||||
break;
|
||||
default: {
|
||||
unsigned int len;
|
||||
char *c;
|
||||
int status;
|
||||
struct nfs_parsed_mount_data args = {
|
||||
.flags = (NFS_MOUNT_VER3 | NFS_MOUNT_TCP),
|
||||
.rsize = NFS_MAX_FILE_IO_SIZE,
|
||||
.wsize = NFS_MAX_FILE_IO_SIZE,
|
||||
.timeo = 600,
|
||||
.retrans = 2,
|
||||
.acregmin = 3,
|
||||
.acregmax = 60,
|
||||
.acdirmin = 30,
|
||||
.acdirmax = 60,
|
||||
.mount_server.protocol = IPPROTO_UDP,
|
||||
.mount_server.program = NFS_MNT_PROGRAM,
|
||||
.nfs_server.protocol = IPPROTO_TCP,
|
||||
.nfs_server.program = NFS_PROGRAM,
|
||||
};
|
||||
|
||||
if (nfs_parse_mount_options((char *) *options, &args) == 0)
|
||||
if (nfs_parse_mount_options((char *)options, args) == 0)
|
||||
return -EINVAL;
|
||||
|
||||
data = kzalloc(sizeof(*data), GFP_KERNEL);
|
||||
if (data == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
/*
|
||||
* NB: after this point, caller will free "data"
|
||||
* if we return an error
|
||||
*/
|
||||
*options = data;
|
||||
if (!nfs_verify_server_address((struct sockaddr *)
|
||||
&args->nfs_server.address))
|
||||
goto out_no_address;
|
||||
|
||||
c = strchr(dev_name, ':');
|
||||
if (c == NULL)
|
||||
return -EINVAL;
|
||||
len = c - dev_name;
|
||||
if (len > sizeof(data->hostname))
|
||||
return -ENAMETOOLONG;
|
||||
strncpy(data->hostname, dev_name, len);
|
||||
args.nfs_server.hostname = data->hostname;
|
||||
/* N.B. caller will free nfs_server.hostname in all cases */
|
||||
args->nfs_server.hostname = kstrndup(dev_name, len, GFP_KERNEL);
|
||||
|
||||
c++;
|
||||
if (strlen(c) > NFS_MAXPATHLEN)
|
||||
return -ENAMETOOLONG;
|
||||
args.nfs_server.export_path = c;
|
||||
args->nfs_server.export_path = c;
|
||||
|
||||
status = nfs_try_mount(&args, mntfh);
|
||||
status = nfs_try_mount(args, mntfh);
|
||||
if (status)
|
||||
return status;
|
||||
|
||||
/*
|
||||
* Translate to nfs_mount_data, which nfs_fill_super
|
||||
* can deal with.
|
||||
*/
|
||||
data->version = 6;
|
||||
data->flags = args.flags;
|
||||
data->rsize = args.rsize;
|
||||
data->wsize = args.wsize;
|
||||
data->timeo = args.timeo;
|
||||
data->retrans = args.retrans;
|
||||
data->acregmin = args.acregmin;
|
||||
data->acregmax = args.acregmax;
|
||||
data->acdirmin = args.acdirmin;
|
||||
data->acdirmax = args.acdirmax;
|
||||
data->addr = args.nfs_server.address;
|
||||
data->namlen = args.namlen;
|
||||
data->bsize = args.bsize;
|
||||
data->pseudoflavor = args.auth_flavors[0];
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!(data->flags & NFS_MOUNT_SECFLAVOUR))
|
||||
data->pseudoflavor = RPC_AUTH_UNIX;
|
||||
if (!(args->flags & NFS_MOUNT_SECFLAVOUR))
|
||||
args->auth_flavors[0] = RPC_AUTH_UNIX;
|
||||
|
||||
#ifndef CONFIG_NFS_V3
|
||||
if (data->flags & NFS_MOUNT_VER3)
|
||||
if (args->flags & NFS_MOUNT_VER3)
|
||||
goto out_v3_not_compiled;
|
||||
#endif /* !CONFIG_NFS_V3 */
|
||||
|
||||
if (!nfs_verify_server_address((struct sockaddr *) &data->addr))
|
||||
goto out_no_address;
|
||||
|
||||
return 0;
|
||||
|
||||
out_no_data:
|
||||
@ -1258,7 +1223,8 @@ static inline void nfs_initialise_sb(struct super_block *sb)
|
||||
/*
|
||||
* Finish setting up an NFS2/3 superblock
|
||||
*/
|
||||
static void nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data)
|
||||
static void nfs_fill_super(struct super_block *sb,
|
||||
struct nfs_parsed_mount_data *data)
|
||||
{
|
||||
struct nfs_server *server = NFS_SB(sb);
|
||||
|
||||
@ -1379,7 +1345,7 @@ static int nfs_get_sb(struct file_system_type *fs_type,
|
||||
struct nfs_server *server = NULL;
|
||||
struct super_block *s;
|
||||
struct nfs_fh mntfh;
|
||||
struct nfs_mount_data *data = raw_data;
|
||||
struct nfs_parsed_mount_data data;
|
||||
struct dentry *mntroot;
|
||||
int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
|
||||
struct nfs_sb_mountdata sb_mntdata = {
|
||||
@ -1388,12 +1354,12 @@ static int nfs_get_sb(struct file_system_type *fs_type,
|
||||
int error;
|
||||
|
||||
/* Validate the mount data */
|
||||
error = nfs_validate_mount_data(&data, &mntfh, dev_name);
|
||||
error = nfs_validate_mount_data(raw_data, &data, &mntfh, dev_name);
|
||||
if (error < 0)
|
||||
goto out;
|
||||
|
||||
/* Get a volume representation */
|
||||
server = nfs_create_server(data, &mntfh);
|
||||
server = nfs_create_server(&data, &mntfh);
|
||||
if (IS_ERR(server)) {
|
||||
error = PTR_ERR(server);
|
||||
goto out;
|
||||
@ -1417,7 +1383,7 @@ static int nfs_get_sb(struct file_system_type *fs_type,
|
||||
|
||||
if (!s->s_root) {
|
||||
/* initial superblock/root creation */
|
||||
nfs_fill_super(s, data);
|
||||
nfs_fill_super(s, &data);
|
||||
}
|
||||
|
||||
mntroot = nfs_get_root(s, &mntfh);
|
||||
@ -1432,8 +1398,7 @@ static int nfs_get_sb(struct file_system_type *fs_type,
|
||||
error = 0;
|
||||
|
||||
out:
|
||||
if (data != raw_data)
|
||||
kfree(data);
|
||||
kfree(data.nfs_server.hostname);
|
||||
return error;
|
||||
|
||||
out_err_nosb:
|
||||
@ -1559,38 +1524,49 @@ static void nfs4_fill_super(struct super_block *sb)
|
||||
/*
|
||||
* Validate NFSv4 mount options
|
||||
*/
|
||||
static int nfs4_validate_mount_data(struct nfs4_mount_data **options,
|
||||
const char *dev_name,
|
||||
struct sockaddr_in *addr,
|
||||
rpc_authflavor_t *authflavour,
|
||||
char **hostname,
|
||||
char **mntpath,
|
||||
char **ip_addr)
|
||||
static int nfs4_validate_mount_data(void *options,
|
||||
struct nfs_parsed_mount_data *args,
|
||||
const char *dev_name)
|
||||
{
|
||||
struct nfs4_mount_data *data = *options;
|
||||
struct nfs4_mount_data *data = (struct nfs4_mount_data *)options;
|
||||
char *c;
|
||||
|
||||
if (data == NULL)
|
||||
goto out_no_data;
|
||||
|
||||
memset(args, 0, sizeof(*args));
|
||||
args->rsize = NFS_MAX_FILE_IO_SIZE;
|
||||
args->wsize = NFS_MAX_FILE_IO_SIZE;
|
||||
args->timeo = 600;
|
||||
args->retrans = 2;
|
||||
args->acregmin = 3;
|
||||
args->acregmax = 60;
|
||||
args->acdirmin = 30;
|
||||
args->acdirmax = 60;
|
||||
args->nfs_server.protocol = XPRT_TRANSPORT_TCP;
|
||||
|
||||
switch (data->version) {
|
||||
case 1:
|
||||
if (data->host_addrlen != sizeof(*addr))
|
||||
if (data->host_addrlen != sizeof(args->nfs_server.address))
|
||||
goto out_no_address;
|
||||
if (copy_from_user(addr, data->host_addr, sizeof(*addr)))
|
||||
if (copy_from_user(&args->nfs_server.address,
|
||||
data->host_addr,
|
||||
sizeof(args->nfs_server.address)))
|
||||
return -EFAULT;
|
||||
if (addr->sin_port == 0)
|
||||
addr->sin_port = htons(NFS_PORT);
|
||||
if (!nfs_verify_server_address((struct sockaddr *) addr))
|
||||
if (args->nfs_server.address.sin_port == 0)
|
||||
args->nfs_server.address.sin_port = htons(NFS_PORT);
|
||||
if (!nfs_verify_server_address((struct sockaddr *)
|
||||
&args->nfs_server.address))
|
||||
goto out_no_address;
|
||||
|
||||
switch (data->auth_flavourlen) {
|
||||
case 0:
|
||||
*authflavour = RPC_AUTH_UNIX;
|
||||
args->auth_flavors[0] = RPC_AUTH_UNIX;
|
||||
break;
|
||||
case 1:
|
||||
if (copy_from_user(authflavour, data->auth_flavours,
|
||||
sizeof(*authflavour)))
|
||||
if (copy_from_user(&args->auth_flavors[0],
|
||||
data->auth_flavours,
|
||||
sizeof(args->auth_flavors[0])))
|
||||
return -EFAULT;
|
||||
break;
|
||||
default:
|
||||
@ -1600,74 +1576,56 @@ static int nfs4_validate_mount_data(struct nfs4_mount_data **options,
|
||||
c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN);
|
||||
if (IS_ERR(c))
|
||||
return PTR_ERR(c);
|
||||
*hostname = c;
|
||||
args->nfs_server.hostname = c;
|
||||
|
||||
c = strndup_user(data->mnt_path.data, NFS4_MAXPATHLEN);
|
||||
if (IS_ERR(c))
|
||||
return PTR_ERR(c);
|
||||
*mntpath = c;
|
||||
dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", *mntpath);
|
||||
args->nfs_server.export_path = c;
|
||||
dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", c);
|
||||
|
||||
c = strndup_user(data->client_addr.data, 16);
|
||||
if (IS_ERR(c))
|
||||
return PTR_ERR(c);
|
||||
*ip_addr = c;
|
||||
args->client_address = c;
|
||||
|
||||
/*
|
||||
* Translate to nfs_parsed_mount_data, which nfs4_fill_super
|
||||
* can deal with.
|
||||
*/
|
||||
|
||||
args->flags = data->flags & NFS4_MOUNT_FLAGMASK;
|
||||
args->rsize = data->rsize;
|
||||
args->wsize = data->wsize;
|
||||
args->timeo = data->timeo;
|
||||
args->retrans = data->retrans;
|
||||
args->acregmin = data->acregmin;
|
||||
args->acregmax = data->acregmax;
|
||||
args->acdirmin = data->acdirmin;
|
||||
args->acdirmax = data->acdirmax;
|
||||
args->nfs_server.protocol = data->proto;
|
||||
|
||||
break;
|
||||
default: {
|
||||
unsigned int len;
|
||||
struct nfs_parsed_mount_data args = {
|
||||
.rsize = NFS_MAX_FILE_IO_SIZE,
|
||||
.wsize = NFS_MAX_FILE_IO_SIZE,
|
||||
.timeo = 600,
|
||||
.retrans = 2,
|
||||
.acregmin = 3,
|
||||
.acregmax = 60,
|
||||
.acdirmin = 30,
|
||||
.acdirmax = 60,
|
||||
.nfs_server.protocol = IPPROTO_TCP,
|
||||
};
|
||||
|
||||
if (nfs_parse_mount_options((char *) *options, &args) == 0)
|
||||
if (nfs_parse_mount_options((char *)options, args) == 0)
|
||||
return -EINVAL;
|
||||
|
||||
if (!nfs_verify_server_address((struct sockaddr *)
|
||||
&args.nfs_server.address))
|
||||
&args->nfs_server.address))
|
||||
return -EINVAL;
|
||||
*addr = args.nfs_server.address;
|
||||
|
||||
switch (args.auth_flavor_len) {
|
||||
switch (args->auth_flavor_len) {
|
||||
case 0:
|
||||
*authflavour = RPC_AUTH_UNIX;
|
||||
args->auth_flavors[0] = RPC_AUTH_UNIX;
|
||||
break;
|
||||
case 1:
|
||||
*authflavour = (rpc_authflavor_t) args.auth_flavors[0];
|
||||
break;
|
||||
default:
|
||||
goto out_inval_auth;
|
||||
}
|
||||
|
||||
/*
|
||||
* Translate to nfs4_mount_data, which nfs4_fill_super
|
||||
* can deal with.
|
||||
*/
|
||||
data = kzalloc(sizeof(*data), GFP_KERNEL);
|
||||
if (data == NULL)
|
||||
return -ENOMEM;
|
||||
*options = data;
|
||||
|
||||
data->version = 1;
|
||||
data->flags = args.flags & NFS4_MOUNT_FLAGMASK;
|
||||
data->rsize = args.rsize;
|
||||
data->wsize = args.wsize;
|
||||
data->timeo = args.timeo;
|
||||
data->retrans = args.retrans;
|
||||
data->acregmin = args.acregmin;
|
||||
data->acregmax = args.acregmax;
|
||||
data->acdirmin = args.acdirmin;
|
||||
data->acdirmax = args.acdirmax;
|
||||
data->proto = args.nfs_server.protocol;
|
||||
|
||||
/*
|
||||
* Split "dev_name" into "hostname:mntpath".
|
||||
*/
|
||||
@ -1678,27 +1636,25 @@ static int nfs4_validate_mount_data(struct nfs4_mount_data **options,
|
||||
len = c - dev_name;
|
||||
if (len > NFS4_MAXNAMLEN)
|
||||
return -ENAMETOOLONG;
|
||||
*hostname = kzalloc(len, GFP_KERNEL);
|
||||
if (*hostname == NULL)
|
||||
args->nfs_server.hostname = kzalloc(len, GFP_KERNEL);
|
||||
if (args->nfs_server.hostname == NULL)
|
||||
return -ENOMEM;
|
||||
strncpy(*hostname, dev_name, len - 1);
|
||||
strncpy(args->nfs_server.hostname, dev_name, len - 1);
|
||||
|
||||
c++; /* step over the ':' */
|
||||
len = strlen(c);
|
||||
if (len > NFS4_MAXPATHLEN)
|
||||
return -ENAMETOOLONG;
|
||||
*mntpath = kzalloc(len + 1, GFP_KERNEL);
|
||||
if (*mntpath == NULL)
|
||||
args->nfs_server.export_path = kzalloc(len + 1, GFP_KERNEL);
|
||||
if (args->nfs_server.export_path == NULL)
|
||||
return -ENOMEM;
|
||||
strncpy(*mntpath, c, len);
|
||||
strncpy(args->nfs_server.export_path, c, len);
|
||||
|
||||
dprintk("MNTPATH: %s\n", *mntpath);
|
||||
dprintk("MNTPATH: %s\n", args->nfs_server.export_path);
|
||||
|
||||
if (args.client_address == NULL)
|
||||
if (args->client_address == NULL)
|
||||
goto out_no_client_address;
|
||||
|
||||
*ip_addr = args.client_address;
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -1729,14 +1685,11 @@ out_no_client_address:
|
||||
static int nfs4_get_sb(struct file_system_type *fs_type,
|
||||
int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
|
||||
{
|
||||
struct nfs4_mount_data *data = raw_data;
|
||||
struct nfs_parsed_mount_data data;
|
||||
struct super_block *s;
|
||||
struct nfs_server *server;
|
||||
struct sockaddr_in addr;
|
||||
rpc_authflavor_t authflavour;
|
||||
struct nfs_fh mntfh;
|
||||
struct dentry *mntroot;
|
||||
char *mntpath = NULL, *hostname = NULL, *ip_addr = NULL;
|
||||
int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
|
||||
struct nfs_sb_mountdata sb_mntdata = {
|
||||
.mntflags = flags,
|
||||
@ -1744,14 +1697,12 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
|
||||
int error;
|
||||
|
||||
/* Validate the mount data */
|
||||
error = nfs4_validate_mount_data(&data, dev_name, &addr, &authflavour,
|
||||
&hostname, &mntpath, &ip_addr);
|
||||
error = nfs4_validate_mount_data(raw_data, &data, dev_name);
|
||||
if (error < 0)
|
||||
goto out;
|
||||
|
||||
/* Get a volume representation */
|
||||
server = nfs4_create_server(data, hostname, &addr, mntpath, ip_addr,
|
||||
authflavour, &mntfh);
|
||||
server = nfs4_create_server(&data, &mntfh);
|
||||
if (IS_ERR(server)) {
|
||||
error = PTR_ERR(server);
|
||||
goto out;
|
||||
@ -1790,9 +1741,9 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
|
||||
error = 0;
|
||||
|
||||
out:
|
||||
kfree(ip_addr);
|
||||
kfree(mntpath);
|
||||
kfree(hostname);
|
||||
kfree(data.client_address);
|
||||
kfree(data.nfs_server.export_path);
|
||||
kfree(data.nfs_server.hostname);
|
||||
return error;
|
||||
|
||||
out_free:
|
||||
|
@ -66,7 +66,6 @@ static void nfs_async_unlink_init(struct rpc_task *task, void *calldata)
|
||||
.rpc_cred = data->cred,
|
||||
};
|
||||
|
||||
nfs_begin_data_update(dir);
|
||||
NFS_PROTO(dir)->unlink_setup(&msg, dir);
|
||||
rpc_call_setup(task, &msg, 0);
|
||||
}
|
||||
@ -84,8 +83,6 @@ static void nfs_async_unlink_done(struct rpc_task *task, void *calldata)
|
||||
|
||||
if (!NFS_PROTO(dir)->unlink_done(task, dir))
|
||||
rpc_restart_call(task);
|
||||
else
|
||||
nfs_end_data_update(dir);
|
||||
}
|
||||
|
||||
/**
|
||||
|
213
fs/nfs/write.c
213
fs/nfs/write.c
@ -110,6 +110,13 @@ void nfs_writedata_release(void *wdata)
|
||||
nfs_writedata_free(wdata);
|
||||
}
|
||||
|
||||
static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
|
||||
{
|
||||
ctx->error = error;
|
||||
smp_wmb();
|
||||
set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
|
||||
}
|
||||
|
||||
static struct nfs_page *nfs_page_find_request_locked(struct page *page)
|
||||
{
|
||||
struct nfs_page *req = NULL;
|
||||
@ -243,10 +250,7 @@ static void nfs_end_page_writeback(struct page *page)
|
||||
|
||||
/*
|
||||
* Find an associated nfs write request, and prepare to flush it out
|
||||
* Returns 1 if there was no write request, or if the request was
|
||||
* already tagged by nfs_set_page_dirty.Returns 0 if the request
|
||||
* was not tagged.
|
||||
* May also return an error if the user signalled nfs_wait_on_request().
|
||||
* May return an error if the user signalled nfs_wait_on_request().
|
||||
*/
|
||||
static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
|
||||
struct page *page)
|
||||
@ -261,7 +265,7 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
|
||||
req = nfs_page_find_request_locked(page);
|
||||
if (req == NULL) {
|
||||
spin_unlock(&inode->i_lock);
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
if (nfs_lock_request_dontget(req))
|
||||
break;
|
||||
@ -282,7 +286,7 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
|
||||
spin_unlock(&inode->i_lock);
|
||||
nfs_unlock_request(req);
|
||||
nfs_pageio_complete(pgio);
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
if (nfs_set_page_writeback(page) != 0) {
|
||||
spin_unlock(&inode->i_lock);
|
||||
@ -290,10 +294,20 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
|
||||
}
|
||||
radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index,
|
||||
NFS_PAGE_TAG_LOCKED);
|
||||
ret = test_bit(PG_NEED_FLUSH, &req->wb_flags);
|
||||
spin_unlock(&inode->i_lock);
|
||||
nfs_pageio_add_request(pgio, req);
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio)
|
||||
{
|
||||
struct inode *inode = page->mapping->host;
|
||||
|
||||
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
|
||||
nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);
|
||||
|
||||
nfs_pageio_cond_complete(pgio, page->index);
|
||||
return nfs_page_async_flush(pgio, page);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -301,59 +315,35 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
|
||||
*/
|
||||
static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc)
|
||||
{
|
||||
struct nfs_pageio_descriptor mypgio, *pgio;
|
||||
struct nfs_open_context *ctx;
|
||||
struct inode *inode = page->mapping->host;
|
||||
unsigned offset;
|
||||
struct nfs_pageio_descriptor pgio;
|
||||
int err;
|
||||
|
||||
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
|
||||
nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);
|
||||
|
||||
if (wbc->for_writepages)
|
||||
pgio = wbc->fs_private;
|
||||
else {
|
||||
nfs_pageio_init_write(&mypgio, inode, wb_priority(wbc));
|
||||
pgio = &mypgio;
|
||||
}
|
||||
|
||||
nfs_pageio_cond_complete(pgio, page->index);
|
||||
|
||||
err = nfs_page_async_flush(pgio, page);
|
||||
if (err <= 0)
|
||||
goto out;
|
||||
err = 0;
|
||||
offset = nfs_page_length(page);
|
||||
if (!offset)
|
||||
goto out;
|
||||
|
||||
nfs_pageio_cond_complete(pgio, page->index);
|
||||
|
||||
ctx = nfs_find_open_context(inode, NULL, FMODE_WRITE);
|
||||
if (ctx == NULL) {
|
||||
err = -EBADF;
|
||||
goto out;
|
||||
}
|
||||
err = nfs_writepage_setup(ctx, page, 0, offset);
|
||||
put_nfs_open_context(ctx);
|
||||
if (err != 0)
|
||||
goto out;
|
||||
err = nfs_page_async_flush(pgio, page);
|
||||
if (err > 0)
|
||||
err = 0;
|
||||
out:
|
||||
if (!wbc->for_writepages)
|
||||
nfs_pageio_complete(pgio);
|
||||
return err;
|
||||
nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc));
|
||||
err = nfs_do_writepage(page, wbc, &pgio);
|
||||
nfs_pageio_complete(&pgio);
|
||||
if (err < 0)
|
||||
return err;
|
||||
if (pgio.pg_error < 0)
|
||||
return pgio.pg_error;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nfs_writepage(struct page *page, struct writeback_control *wbc)
|
||||
{
|
||||
int err;
|
||||
int ret;
|
||||
|
||||
err = nfs_writepage_locked(page, wbc);
|
||||
ret = nfs_writepage_locked(page, wbc);
|
||||
unlock_page(page);
|
||||
return err;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int nfs_writepages_callback(struct page *page, struct writeback_control *wbc, void *data)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = nfs_do_writepage(page, wbc, data);
|
||||
unlock_page(page);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
|
||||
@ -365,12 +355,11 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
|
||||
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
|
||||
|
||||
nfs_pageio_init_write(&pgio, inode, wb_priority(wbc));
|
||||
wbc->fs_private = &pgio;
|
||||
err = generic_writepages(mapping, wbc);
|
||||
err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
|
||||
nfs_pageio_complete(&pgio);
|
||||
if (err)
|
||||
if (err < 0)
|
||||
return err;
|
||||
if (pgio.pg_error)
|
||||
if (pgio.pg_error < 0)
|
||||
return pgio.pg_error;
|
||||
return 0;
|
||||
}
|
||||
@ -389,14 +378,11 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
|
||||
return error;
|
||||
if (!nfsi->npages) {
|
||||
igrab(inode);
|
||||
nfs_begin_data_update(inode);
|
||||
if (nfs_have_delegation(inode, FMODE_WRITE))
|
||||
nfsi->change_attr++;
|
||||
}
|
||||
SetPagePrivate(req->wb_page);
|
||||
set_page_private(req->wb_page, (unsigned long)req);
|
||||
if (PageDirty(req->wb_page))
|
||||
set_bit(PG_NEED_FLUSH, &req->wb_flags);
|
||||
nfsi->npages++;
|
||||
kref_get(&req->wb_kref);
|
||||
return 0;
|
||||
@ -416,12 +402,9 @@ static void nfs_inode_remove_request(struct nfs_page *req)
|
||||
set_page_private(req->wb_page, 0);
|
||||
ClearPagePrivate(req->wb_page);
|
||||
radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
|
||||
if (test_and_clear_bit(PG_NEED_FLUSH, &req->wb_flags))
|
||||
__set_page_dirty_nobuffers(req->wb_page);
|
||||
nfsi->npages--;
|
||||
if (!nfsi->npages) {
|
||||
spin_unlock(&inode->i_lock);
|
||||
nfs_end_data_update(inode);
|
||||
iput(inode);
|
||||
} else
|
||||
spin_unlock(&inode->i_lock);
|
||||
@ -682,7 +665,7 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
|
||||
|
||||
int nfs_flush_incompatible(struct file *file, struct page *page)
|
||||
{
|
||||
struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
|
||||
struct nfs_open_context *ctx = nfs_file_open_context(file);
|
||||
struct nfs_page *req;
|
||||
int do_flush, status;
|
||||
/*
|
||||
@ -716,7 +699,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
|
||||
int nfs_updatepage(struct file *file, struct page *page,
|
||||
unsigned int offset, unsigned int count)
|
||||
{
|
||||
struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
|
||||
struct nfs_open_context *ctx = nfs_file_open_context(file);
|
||||
struct inode *inode = page->mapping->host;
|
||||
int status = 0;
|
||||
|
||||
@ -967,7 +950,7 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
|
||||
|
||||
if (task->tk_status < 0) {
|
||||
nfs_set_pageerror(page);
|
||||
req->wb_context->error = task->tk_status;
|
||||
nfs_context_set_write_error(req->wb_context, task->tk_status);
|
||||
dprintk(", error = %d\n", task->tk_status);
|
||||
goto out;
|
||||
}
|
||||
@ -1030,7 +1013,7 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
|
||||
|
||||
if (task->tk_status < 0) {
|
||||
nfs_set_pageerror(page);
|
||||
req->wb_context->error = task->tk_status;
|
||||
nfs_context_set_write_error(req->wb_context, task->tk_status);
|
||||
dprintk(", error = %d\n", task->tk_status);
|
||||
goto remove_request;
|
||||
}
|
||||
@ -1244,7 +1227,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
|
||||
req->wb_bytes,
|
||||
(long long)req_offset(req));
|
||||
if (task->tk_status < 0) {
|
||||
req->wb_context->error = task->tk_status;
|
||||
nfs_context_set_write_error(req->wb_context, task->tk_status);
|
||||
nfs_inode_remove_request(req);
|
||||
dprintk(", error = %d\n", task->tk_status);
|
||||
goto next;
|
||||
@ -1347,53 +1330,52 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* flush the inode to disk.
|
||||
*/
|
||||
int nfs_wb_all(struct inode *inode)
|
||||
static int __nfs_write_mapping(struct address_space *mapping, struct writeback_control *wbc, int how)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = nfs_writepages(mapping, wbc);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
ret = nfs_sync_mapping_wait(mapping, wbc, how);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
return 0;
|
||||
out:
|
||||
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Two pass sync: first using WB_SYNC_NONE, then WB_SYNC_ALL */
|
||||
static int nfs_write_mapping(struct address_space *mapping, int how)
|
||||
{
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
struct writeback_control wbc = {
|
||||
.bdi = mapping->backing_dev_info,
|
||||
.sync_mode = WB_SYNC_ALL,
|
||||
.sync_mode = WB_SYNC_NONE,
|
||||
.nr_to_write = LONG_MAX,
|
||||
.for_writepages = 1,
|
||||
.range_cyclic = 1,
|
||||
};
|
||||
int ret;
|
||||
|
||||
ret = nfs_writepages(mapping, &wbc);
|
||||
ret = __nfs_write_mapping(mapping, &wbc, how);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
ret = nfs_sync_mapping_wait(mapping, &wbc, 0);
|
||||
if (ret >= 0)
|
||||
return 0;
|
||||
out:
|
||||
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
|
||||
return ret;
|
||||
return ret;
|
||||
wbc.sync_mode = WB_SYNC_ALL;
|
||||
return __nfs_write_mapping(mapping, &wbc, how);
|
||||
}
|
||||
|
||||
int nfs_sync_mapping_range(struct address_space *mapping, loff_t range_start, loff_t range_end, int how)
|
||||
/*
|
||||
* flush the inode to disk.
|
||||
*/
|
||||
int nfs_wb_all(struct inode *inode)
|
||||
{
|
||||
struct writeback_control wbc = {
|
||||
.bdi = mapping->backing_dev_info,
|
||||
.sync_mode = WB_SYNC_ALL,
|
||||
.nr_to_write = LONG_MAX,
|
||||
.range_start = range_start,
|
||||
.range_end = range_end,
|
||||
.for_writepages = 1,
|
||||
};
|
||||
int ret;
|
||||
return nfs_write_mapping(inode->i_mapping, 0);
|
||||
}
|
||||
|
||||
ret = nfs_writepages(mapping, &wbc);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
ret = nfs_sync_mapping_wait(mapping, &wbc, how);
|
||||
if (ret >= 0)
|
||||
return 0;
|
||||
out:
|
||||
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
|
||||
return ret;
|
||||
int nfs_wb_nocommit(struct inode *inode)
|
||||
{
|
||||
return nfs_write_mapping(inode->i_mapping, FLUSH_NOCOMMIT);
|
||||
}
|
||||
|
||||
int nfs_wb_page_cancel(struct inode *inode, struct page *page)
|
||||
@ -1477,35 +1459,6 @@ int nfs_wb_page(struct inode *inode, struct page* page)
|
||||
return nfs_wb_page_priority(inode, page, FLUSH_STABLE);
|
||||
}
|
||||
|
||||
int nfs_set_page_dirty(struct page *page)
|
||||
{
|
||||
struct address_space *mapping = page->mapping;
|
||||
struct inode *inode;
|
||||
struct nfs_page *req;
|
||||
int ret;
|
||||
|
||||
if (!mapping)
|
||||
goto out_raced;
|
||||
inode = mapping->host;
|
||||
if (!inode)
|
||||
goto out_raced;
|
||||
spin_lock(&inode->i_lock);
|
||||
req = nfs_page_find_request_locked(page);
|
||||
if (req != NULL) {
|
||||
/* Mark any existing write requests for flushing */
|
||||
ret = !test_and_set_bit(PG_NEED_FLUSH, &req->wb_flags);
|
||||
spin_unlock(&inode->i_lock);
|
||||
nfs_release_request(req);
|
||||
return ret;
|
||||
}
|
||||
ret = __set_page_dirty_nobuffers(page);
|
||||
spin_unlock(&inode->i_lock);
|
||||
return ret;
|
||||
out_raced:
|
||||
return !TestSetPageDirty(page);
|
||||
}
|
||||
|
||||
|
||||
int __init nfs_init_writepagecache(void)
|
||||
{
|
||||
nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
|
||||
|
@ -102,7 +102,8 @@ check_filename(char *str, int len, __be32 err)
|
||||
out: \
|
||||
return status; \
|
||||
xdr_error: \
|
||||
printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__); \
|
||||
dprintk("NFSD: xdr error (%s:%d)\n", \
|
||||
__FILE__, __LINE__); \
|
||||
status = nfserr_bad_xdr; \
|
||||
goto out
|
||||
|
||||
@ -124,7 +125,8 @@ xdr_error: \
|
||||
if (!(x = (p==argp->tmp || p == argp->tmpp) ? \
|
||||
savemem(argp, p, nbytes) : \
|
||||
(char *)p)) { \
|
||||
printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__); \
|
||||
dprintk("NFSD: xdr error (%s:%d)\n", \
|
||||
__FILE__, __LINE__); \
|
||||
goto xdr_error; \
|
||||
} \
|
||||
p += XDR_QUADLEN(nbytes); \
|
||||
@ -140,7 +142,8 @@ xdr_error: \
|
||||
p = argp->p; \
|
||||
argp->p += XDR_QUADLEN(nbytes); \
|
||||
} else if (!(p = read_buf(argp, nbytes))) { \
|
||||
printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__); \
|
||||
dprintk("NFSD: xdr error (%s:%d)\n", \
|
||||
__FILE__, __LINE__); \
|
||||
goto xdr_error; \
|
||||
} \
|
||||
} while (0)
|
||||
@ -948,7 +951,8 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
|
||||
*/
|
||||
avail = (char*)argp->end - (char*)argp->p;
|
||||
if (avail + argp->pagelen < write->wr_buflen) {
|
||||
printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__);
|
||||
dprintk("NFSD: xdr error (%s:%d)\n",
|
||||
__FILE__, __LINE__);
|
||||
goto xdr_error;
|
||||
}
|
||||
argp->rqstp->rq_vec[0].iov_base = p;
|
||||
@ -1019,7 +1023,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
|
||||
argp->ops = kmalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL);
|
||||
if (!argp->ops) {
|
||||
argp->ops = argp->iops;
|
||||
printk(KERN_INFO "nfsd: couldn't allocate room for COMPOUND\n");
|
||||
dprintk("nfsd: couldn't allocate room for COMPOUND\n");
|
||||
goto xdr_error;
|
||||
}
|
||||
}
|
||||
@ -1326,7 +1330,7 @@ static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp, __be32 *
|
||||
path = exp->ex_path;
|
||||
|
||||
if (strncmp(path, rootpath, strlen(rootpath))) {
|
||||
printk("nfsd: fs_locations failed;"
|
||||
dprintk("nfsd: fs_locations failed;"
|
||||
"%s is not contained in %s\n", path, rootpath);
|
||||
*stat = nfserr_notsupp;
|
||||
return NULL;
|
||||
|
@ -109,6 +109,10 @@ static inline u64 get_jiffies_64(void)
|
||||
((long)(a) - (long)(b) >= 0))
|
||||
#define time_before_eq(a,b) time_after_eq(b,a)
|
||||
|
||||
#define time_in_range(a,b,c) \
|
||||
(time_after_eq(a,b) && \
|
||||
time_before_eq(a,c))
|
||||
|
||||
/* Same as above, but does so with platform independent 64bit types.
|
||||
* These must be used when utilizing jiffies_64 (i.e. return value of
|
||||
* get_jiffies_64() */
|
||||
|
@ -47,10 +47,8 @@
|
||||
#include <linux/nfs3.h>
|
||||
#include <linux/nfs4.h>
|
||||
#include <linux/nfs_xdr.h>
|
||||
|
||||
#include <linux/nfs_fs_sb.h>
|
||||
|
||||
#include <linux/rwsem.h>
|
||||
#include <linux/mempool.h>
|
||||
|
||||
/*
|
||||
@ -77,6 +75,9 @@ struct nfs_open_context {
|
||||
struct nfs4_state *state;
|
||||
fl_owner_t lockowner;
|
||||
int mode;
|
||||
|
||||
unsigned long flags;
|
||||
#define NFS_CONTEXT_ERROR_WRITE (0)
|
||||
int error;
|
||||
|
||||
struct list_head list;
|
||||
@ -133,11 +134,6 @@ struct nfs_inode {
|
||||
* server.
|
||||
*/
|
||||
unsigned long cache_change_attribute;
|
||||
/*
|
||||
* Counter indicating the number of outstanding requests that
|
||||
* will cause a file data update.
|
||||
*/
|
||||
atomic_t data_updates;
|
||||
|
||||
struct rb_root access_cache;
|
||||
struct list_head access_cache_entry_lru;
|
||||
@ -205,27 +201,18 @@ static inline struct nfs_inode *NFS_I(struct inode *inode)
|
||||
#define NFS_CLIENT(inode) (NFS_SERVER(inode)->client)
|
||||
#define NFS_PROTO(inode) (NFS_SERVER(inode)->nfs_client->rpc_ops)
|
||||
#define NFS_COOKIEVERF(inode) (NFS_I(inode)->cookieverf)
|
||||
#define NFS_READTIME(inode) (NFS_I(inode)->read_cache_jiffies)
|
||||
#define NFS_CHANGE_ATTR(inode) (NFS_I(inode)->change_attr)
|
||||
#define NFS_ATTRTIMEO(inode) (NFS_I(inode)->attrtimeo)
|
||||
#define NFS_MINATTRTIMEO(inode) \
|
||||
(S_ISDIR(inode->i_mode)? NFS_SERVER(inode)->acdirmin \
|
||||
: NFS_SERVER(inode)->acregmin)
|
||||
#define NFS_MAXATTRTIMEO(inode) \
|
||||
(S_ISDIR(inode->i_mode)? NFS_SERVER(inode)->acdirmax \
|
||||
: NFS_SERVER(inode)->acregmax)
|
||||
#define NFS_ATTRTIMEO_UPDATE(inode) (NFS_I(inode)->attrtimeo_timestamp)
|
||||
|
||||
#define NFS_FLAGS(inode) (NFS_I(inode)->flags)
|
||||
#define NFS_STALE(inode) (test_bit(NFS_INO_STALE, &NFS_FLAGS(inode)))
|
||||
|
||||
#define NFS_FILEID(inode) (NFS_I(inode)->fileid)
|
||||
|
||||
static inline int nfs_caches_unstable(struct inode *inode)
|
||||
{
|
||||
return atomic_read(&NFS_I(inode)->data_updates) != 0;
|
||||
}
|
||||
|
||||
static inline void nfs_mark_for_revalidate(struct inode *inode)
|
||||
{
|
||||
struct nfs_inode *nfsi = NFS_I(inode);
|
||||
@ -237,12 +224,6 @@ static inline void nfs_mark_for_revalidate(struct inode *inode)
|
||||
spin_unlock(&inode->i_lock);
|
||||
}
|
||||
|
||||
static inline void NFS_CACHEINV(struct inode *inode)
|
||||
{
|
||||
if (!nfs_caches_unstable(inode))
|
||||
nfs_mark_for_revalidate(inode);
|
||||
}
|
||||
|
||||
static inline int nfs_server_capable(struct inode *inode, int cap)
|
||||
{
|
||||
return NFS_SERVER(inode)->caps & cap;
|
||||
@ -253,28 +234,33 @@ static inline int NFS_USE_READDIRPLUS(struct inode *inode)
|
||||
return test_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode));
|
||||
}
|
||||
|
||||
/**
|
||||
* nfs_save_change_attribute - Returns the inode attribute change cookie
|
||||
* @inode - pointer to inode
|
||||
* The "change attribute" is updated every time we finish an operation
|
||||
* that will result in a metadata change on the server.
|
||||
*/
|
||||
static inline long nfs_save_change_attribute(struct inode *inode)
|
||||
static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf)
|
||||
{
|
||||
return NFS_I(inode)->cache_change_attribute;
|
||||
dentry->d_time = verf;
|
||||
}
|
||||
|
||||
/**
|
||||
* nfs_verify_change_attribute - Detects NFS inode cache updates
|
||||
* @inode - pointer to inode
|
||||
* @chattr - previously saved change attribute
|
||||
* Return "false" if metadata has been updated (or is in the process of
|
||||
* being updated) since the change attribute was saved.
|
||||
* nfs_save_change_attribute - Returns the inode attribute change cookie
|
||||
* @dir - pointer to parent directory inode
|
||||
* The "change attribute" is updated every time we finish an operation
|
||||
* that will result in a metadata change on the server.
|
||||
*/
|
||||
static inline int nfs_verify_change_attribute(struct inode *inode, unsigned long chattr)
|
||||
static inline unsigned long nfs_save_change_attribute(struct inode *dir)
|
||||
{
|
||||
return !nfs_caches_unstable(inode)
|
||||
&& time_after_eq(chattr, NFS_I(inode)->cache_change_attribute);
|
||||
return NFS_I(dir)->cache_change_attribute;
|
||||
}
|
||||
|
||||
/**
|
||||
* nfs_verify_change_attribute - Detects NFS remote directory changes
|
||||
* @dir - pointer to parent directory inode
|
||||
* @chattr - previously saved change attribute
|
||||
* Return "false" if the verifiers doesn't match the change attribute.
|
||||
* This would usually indicate that the directory contents have changed on
|
||||
* the server, and that any dentries need revalidating.
|
||||
*/
|
||||
static inline int nfs_verify_change_attribute(struct inode *dir, unsigned long chattr)
|
||||
{
|
||||
return chattr == NFS_I(dir)->cache_change_attribute;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -283,15 +269,14 @@ static inline int nfs_verify_change_attribute(struct inode *inode, unsigned long
|
||||
extern int nfs_sync_mapping(struct address_space *mapping);
|
||||
extern void nfs_zap_mapping(struct inode *inode, struct address_space *mapping);
|
||||
extern void nfs_zap_caches(struct inode *);
|
||||
extern void nfs_invalidate_atime(struct inode *);
|
||||
extern struct inode *nfs_fhget(struct super_block *, struct nfs_fh *,
|
||||
struct nfs_fattr *);
|
||||
extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *);
|
||||
extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr);
|
||||
extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr);
|
||||
extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
|
||||
extern int nfs_permission(struct inode *, int, struct nameidata *);
|
||||
extern int nfs_access_get_cached(struct inode *, struct rpc_cred *, struct nfs_access_entry *);
|
||||
extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *);
|
||||
extern void nfs_access_zap_cache(struct inode *inode);
|
||||
extern int nfs_open(struct inode *, struct file *);
|
||||
extern int nfs_release(struct inode *, struct file *);
|
||||
extern int nfs_attribute_timeout(struct inode *inode);
|
||||
@ -301,13 +286,10 @@ extern int nfs_revalidate_mapping(struct inode *inode, struct address_space *map
|
||||
extern int nfs_revalidate_mapping_nolock(struct inode *inode, struct address_space *mapping);
|
||||
extern int nfs_setattr(struct dentry *, struct iattr *);
|
||||
extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr);
|
||||
extern void nfs_begin_attr_update(struct inode *);
|
||||
extern void nfs_end_attr_update(struct inode *);
|
||||
extern void nfs_begin_data_update(struct inode *);
|
||||
extern void nfs_end_data_update(struct inode *);
|
||||
extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx);
|
||||
extern void put_nfs_open_context(struct nfs_open_context *ctx);
|
||||
extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, int mode);
|
||||
extern u64 nfs_compat_user_ino64(u64 fileid);
|
||||
|
||||
/* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */
|
||||
extern __be32 root_nfs_parse_addr(char *name); /*__init*/
|
||||
@ -328,14 +310,15 @@ extern const struct inode_operations nfs3_file_inode_operations;
|
||||
extern const struct file_operations nfs_file_operations;
|
||||
extern const struct address_space_operations nfs_file_aops;
|
||||
|
||||
static inline struct nfs_open_context *nfs_file_open_context(struct file *filp)
|
||||
{
|
||||
return filp->private_data;
|
||||
}
|
||||
|
||||
static inline struct rpc_cred *nfs_file_cred(struct file *file)
|
||||
{
|
||||
if (file != NULL) {
|
||||
struct nfs_open_context *ctx;
|
||||
|
||||
ctx = (struct nfs_open_context*)file->private_data;
|
||||
return ctx->cred;
|
||||
}
|
||||
if (file != NULL)
|
||||
return nfs_file_open_context(file)->cred;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -378,6 +361,8 @@ extern const struct file_operations nfs_dir_operations;
|
||||
extern struct dentry_operations nfs_dentry_operations;
|
||||
|
||||
extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh, struct nfs_fattr *fattr);
|
||||
extern int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags);
|
||||
extern void nfs_access_zap_cache(struct inode *inode);
|
||||
|
||||
/*
|
||||
* linux/fs/nfs/symlink.c
|
||||
@ -420,15 +405,14 @@ extern int nfs_flush_incompatible(struct file *file, struct page *page);
|
||||
extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int);
|
||||
extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
|
||||
extern void nfs_writedata_release(void *);
|
||||
extern int nfs_set_page_dirty(struct page *);
|
||||
|
||||
/*
|
||||
* Try to write back everything synchronously (but check the
|
||||
* return value!)
|
||||
*/
|
||||
extern long nfs_sync_mapping_wait(struct address_space *, struct writeback_control *, int);
|
||||
extern int nfs_sync_mapping_range(struct address_space *, loff_t, loff_t, int);
|
||||
extern int nfs_wb_all(struct inode *inode);
|
||||
extern int nfs_wb_nocommit(struct inode *inode);
|
||||
extern int nfs_wb_page(struct inode *inode, struct page* page);
|
||||
extern int nfs_wb_page_priority(struct inode *inode, struct page* page, int how);
|
||||
extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
|
||||
|
@ -30,7 +30,6 @@
|
||||
#define PG_BUSY 0
|
||||
#define PG_NEED_COMMIT 1
|
||||
#define PG_NEED_RESCHED 2
|
||||
#define PG_NEED_FLUSH 3
|
||||
|
||||
struct nfs_inode;
|
||||
struct nfs_page {
|
||||
|
@ -62,7 +62,8 @@ struct nfs_fattr {
|
||||
#define NFS_ATTR_FATTR 0x0002 /* post-op attributes */
|
||||
#define NFS_ATTR_FATTR_V3 0x0004 /* NFSv3 attributes */
|
||||
#define NFS_ATTR_FATTR_V4 0x0008 /* NFSv4 change attribute */
|
||||
#define NFS_ATTR_FATTR_V4_REFERRAL 0x0010 /* NFSv4 referral */
|
||||
#define NFS_ATTR_WCC_V4 0x0010 /* pre-op change attribute */
|
||||
#define NFS_ATTR_FATTR_V4_REFERRAL 0x0020 /* NFSv4 referral */
|
||||
|
||||
/*
|
||||
* Info on the file system
|
||||
@ -538,10 +539,13 @@ typedef u64 clientid4;
|
||||
|
||||
struct nfs4_accessargs {
|
||||
const struct nfs_fh * fh;
|
||||
const u32 * bitmask;
|
||||
u32 access;
|
||||
};
|
||||
|
||||
struct nfs4_accessres {
|
||||
const struct nfs_server * server;
|
||||
struct nfs_fattr * fattr;
|
||||
u32 supported;
|
||||
u32 access;
|
||||
};
|
||||
|
@ -117,7 +117,7 @@ struct rpc_create_args {
|
||||
|
||||
struct rpc_clnt *rpc_create(struct rpc_create_args *args);
|
||||
struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *,
|
||||
struct rpc_program *, int);
|
||||
struct rpc_program *, u32);
|
||||
struct rpc_clnt *rpc_clone_client(struct rpc_clnt *);
|
||||
void rpc_shutdown_client(struct rpc_clnt *);
|
||||
void rpc_release_client(struct rpc_clnt *);
|
||||
|
@ -88,6 +88,11 @@ enum {
|
||||
CTL_SLOTTABLE_TCP,
|
||||
CTL_MIN_RESVPORT,
|
||||
CTL_MAX_RESVPORT,
|
||||
CTL_SLOTTABLE_RDMA,
|
||||
CTL_RDMA_MAXINLINEREAD,
|
||||
CTL_RDMA_MAXINLINEWRITE,
|
||||
CTL_RDMA_WRITEPADDING,
|
||||
CTL_RDMA_MEMREG,
|
||||
};
|
||||
|
||||
#endif /* _LINUX_SUNRPC_DEBUG_H_ */
|
||||
|
@ -138,6 +138,19 @@ typedef __be32 rpc_fraghdr;
|
||||
#define RPC_MAX_HEADER_WITH_AUTH \
|
||||
(RPC_CALLHDRSIZE + 2*(2+RPC_MAX_AUTH_SIZE/4))
|
||||
|
||||
/*
|
||||
* RFC1833/RFC3530 rpcbind (v3+) well-known netid's.
|
||||
*/
|
||||
#define RPCBIND_NETID_UDP "udp"
|
||||
#define RPCBIND_NETID_TCP "tcp"
|
||||
#define RPCBIND_NETID_UDP6 "udp6"
|
||||
#define RPCBIND_NETID_TCP6 "tcp6"
|
||||
|
||||
/*
|
||||
* Note that RFC 1833 does not put any size restrictions on the
|
||||
* netid string, but all currently defined netid's fit in 4 bytes.
|
||||
*/
|
||||
#define RPCBIND_MAXNETIDLEN (4u)
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
#endif /* _LINUX_SUNRPC_MSGPROT_H_ */
|
||||
|
116
include/linux/sunrpc/rpc_rdma.h
Normal file
116
include/linux/sunrpc/rpc_rdma.h
Normal file
@ -0,0 +1,116 @@
|
||||
/*
|
||||
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
|
||||
*
|
||||
* This software is available to you under a choice of one of two
|
||||
* licenses. You may choose to be licensed under the terms of the GNU
|
||||
* General Public License (GPL) Version 2, available from the file
|
||||
* COPYING in the main directory of this source tree, or the BSD-type
|
||||
* license below:
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* Redistributions in binary form must reproduce the above
|
||||
* copyright notice, this list of conditions and the following
|
||||
* disclaimer in the documentation and/or other materials provided
|
||||
* with the distribution.
|
||||
*
|
||||
* Neither the name of the Network Appliance, Inc. nor the names of
|
||||
* its contributors may be used to endorse or promote products
|
||||
* derived from this software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef _LINUX_SUNRPC_RPC_RDMA_H
|
||||
#define _LINUX_SUNRPC_RPC_RDMA_H
|
||||
|
||||
struct rpcrdma_segment {
|
||||
uint32_t rs_handle; /* Registered memory handle */
|
||||
uint32_t rs_length; /* Length of the chunk in bytes */
|
||||
uint64_t rs_offset; /* Chunk virtual address or offset */
|
||||
};
|
||||
|
||||
/*
|
||||
* read chunk(s), encoded as a linked list.
|
||||
*/
|
||||
struct rpcrdma_read_chunk {
|
||||
uint32_t rc_discrim; /* 1 indicates presence */
|
||||
uint32_t rc_position; /* Position in XDR stream */
|
||||
struct rpcrdma_segment rc_target;
|
||||
};
|
||||
|
||||
/*
|
||||
* write chunk, and reply chunk.
|
||||
*/
|
||||
struct rpcrdma_write_chunk {
|
||||
struct rpcrdma_segment wc_target;
|
||||
};
|
||||
|
||||
/*
|
||||
* write chunk(s), encoded as a counted array.
|
||||
*/
|
||||
struct rpcrdma_write_array {
|
||||
uint32_t wc_discrim; /* 1 indicates presence */
|
||||
uint32_t wc_nchunks; /* Array count */
|
||||
struct rpcrdma_write_chunk wc_array[0];
|
||||
};
|
||||
|
||||
struct rpcrdma_msg {
|
||||
uint32_t rm_xid; /* Mirrors the RPC header xid */
|
||||
uint32_t rm_vers; /* Version of this protocol */
|
||||
uint32_t rm_credit; /* Buffers requested/granted */
|
||||
uint32_t rm_type; /* Type of message (enum rpcrdma_proc) */
|
||||
union {
|
||||
|
||||
struct { /* no chunks */
|
||||
uint32_t rm_empty[3]; /* 3 empty chunk lists */
|
||||
} rm_nochunks;
|
||||
|
||||
struct { /* no chunks and padded */
|
||||
uint32_t rm_align; /* Padding alignment */
|
||||
uint32_t rm_thresh; /* Padding threshold */
|
||||
uint32_t rm_pempty[3]; /* 3 empty chunk lists */
|
||||
} rm_padded;
|
||||
|
||||
uint32_t rm_chunks[0]; /* read, write and reply chunks */
|
||||
|
||||
} rm_body;
|
||||
};
|
||||
|
||||
#define RPCRDMA_HDRLEN_MIN 28
|
||||
|
||||
enum rpcrdma_errcode {
|
||||
ERR_VERS = 1,
|
||||
ERR_CHUNK = 2
|
||||
};
|
||||
|
||||
struct rpcrdma_err_vers {
|
||||
uint32_t rdma_vers_low; /* Version range supported by peer */
|
||||
uint32_t rdma_vers_high;
|
||||
};
|
||||
|
||||
enum rpcrdma_proc {
|
||||
RDMA_MSG = 0, /* An RPC call or reply msg */
|
||||
RDMA_NOMSG = 1, /* An RPC call or reply msg - separate body */
|
||||
RDMA_MSGP = 2, /* An RPC call or reply msg with padding */
|
||||
RDMA_DONE = 3, /* Client signals reply completion */
|
||||
RDMA_ERROR = 4 /* An RPC RDMA encoding error */
|
||||
};
|
||||
|
||||
#endif /* _LINUX_SUNRPC_RPC_RDMA_H */
|
@ -70,7 +70,10 @@ struct xdr_buf {
|
||||
|
||||
struct page ** pages; /* Array of contiguous pages */
|
||||
unsigned int page_base, /* Start of page data */
|
||||
page_len; /* Length of page data */
|
||||
page_len, /* Length of page data */
|
||||
flags; /* Flags for data disposition */
|
||||
#define XDRBUF_READ 0x01 /* target of file read */
|
||||
#define XDRBUF_WRITE 0x02 /* source of file write */
|
||||
|
||||
unsigned int buflen, /* Total length of storage buffer */
|
||||
len; /* Length of XDR encoded message */
|
||||
|
@ -19,24 +19,10 @@
|
||||
|
||||
#ifdef __KERNEL__
|
||||
|
||||
extern unsigned int xprt_udp_slot_table_entries;
|
||||
extern unsigned int xprt_tcp_slot_table_entries;
|
||||
|
||||
#define RPC_MIN_SLOT_TABLE (2U)
|
||||
#define RPC_DEF_SLOT_TABLE (16U)
|
||||
#define RPC_MAX_SLOT_TABLE (128U)
|
||||
|
||||
/*
|
||||
* Parameters for choosing a free port
|
||||
*/
|
||||
extern unsigned int xprt_min_resvport;
|
||||
extern unsigned int xprt_max_resvport;
|
||||
|
||||
#define RPC_MIN_RESVPORT (1U)
|
||||
#define RPC_MAX_RESVPORT (65535U)
|
||||
#define RPC_DEF_MIN_RESVPORT (665U)
|
||||
#define RPC_DEF_MAX_RESVPORT (1023U)
|
||||
|
||||
/*
|
||||
* This describes a timeout strategy
|
||||
*/
|
||||
@ -53,6 +39,10 @@ enum rpc_display_format_t {
|
||||
RPC_DISPLAY_PORT,
|
||||
RPC_DISPLAY_PROTO,
|
||||
RPC_DISPLAY_ALL,
|
||||
RPC_DISPLAY_HEX_ADDR,
|
||||
RPC_DISPLAY_HEX_PORT,
|
||||
RPC_DISPLAY_UNIVERSAL_ADDR,
|
||||
RPC_DISPLAY_NETID,
|
||||
RPC_DISPLAY_MAX,
|
||||
};
|
||||
|
||||
@ -196,14 +186,22 @@ struct rpc_xprt {
|
||||
char * address_strings[RPC_DISPLAY_MAX];
|
||||
};
|
||||
|
||||
struct rpc_xprtsock_create {
|
||||
int proto; /* IPPROTO_UDP or IPPROTO_TCP */
|
||||
struct xprt_create {
|
||||
int ident; /* XPRT_TRANSPORT identifier */
|
||||
struct sockaddr * srcaddr; /* optional local address */
|
||||
struct sockaddr * dstaddr; /* remote peer address */
|
||||
size_t addrlen;
|
||||
struct rpc_timeout * timeout; /* optional timeout parameters */
|
||||
};
|
||||
|
||||
struct xprt_class {
|
||||
struct list_head list;
|
||||
int ident; /* XPRT_TRANSPORT identifier */
|
||||
struct rpc_xprt * (*setup)(struct xprt_create *);
|
||||
struct module *owner;
|
||||
char name[32];
|
||||
};
|
||||
|
||||
/*
|
||||
* Transport operations used by ULPs
|
||||
*/
|
||||
@ -212,7 +210,7 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long
|
||||
/*
|
||||
* Generic internal transport functions
|
||||
*/
|
||||
struct rpc_xprt * xprt_create_transport(struct rpc_xprtsock_create *args);
|
||||
struct rpc_xprt *xprt_create_transport(struct xprt_create *args);
|
||||
void xprt_connect(struct rpc_task *task);
|
||||
void xprt_reserve(struct rpc_task *task);
|
||||
int xprt_reserve_xprt(struct rpc_task *task);
|
||||
@ -235,6 +233,8 @@ static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 *
|
||||
/*
|
||||
* Transport switch helper functions
|
||||
*/
|
||||
int xprt_register_transport(struct xprt_class *type);
|
||||
int xprt_unregister_transport(struct xprt_class *type);
|
||||
void xprt_set_retrans_timeout_def(struct rpc_task *task);
|
||||
void xprt_set_retrans_timeout_rtt(struct rpc_task *task);
|
||||
void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status);
|
||||
@ -247,14 +247,6 @@ void xprt_complete_rqst(struct rpc_task *task, int copied);
|
||||
void xprt_release_rqst_cong(struct rpc_task *task);
|
||||
void xprt_disconnect(struct rpc_xprt *xprt);
|
||||
|
||||
/*
|
||||
* Socket transport setup operations
|
||||
*/
|
||||
struct rpc_xprt * xs_setup_udp(struct rpc_xprtsock_create *args);
|
||||
struct rpc_xprt * xs_setup_tcp(struct rpc_xprtsock_create *args);
|
||||
int init_socket_xprt(void);
|
||||
void cleanup_socket_xprt(void);
|
||||
|
||||
/*
|
||||
* Reserved bit positions in xprt->state
|
||||
*/
|
||||
|
85
include/linux/sunrpc/xprtrdma.h
Normal file
85
include/linux/sunrpc/xprtrdma.h
Normal file
@ -0,0 +1,85 @@
|
||||
/*
|
||||
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
|
||||
*
|
||||
* This software is available to you under a choice of one of two
|
||||
* licenses. You may choose to be licensed under the terms of the GNU
|
||||
* General Public License (GPL) Version 2, available from the file
|
||||
* COPYING in the main directory of this source tree, or the BSD-type
|
||||
* license below:
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* Redistributions in binary form must reproduce the above
|
||||
* copyright notice, this list of conditions and the following
|
||||
* disclaimer in the documentation and/or other materials provided
|
||||
* with the distribution.
|
||||
*
|
||||
* Neither the name of the Network Appliance, Inc. nor the names of
|
||||
* its contributors may be used to endorse or promote products
|
||||
* derived from this software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef _LINUX_SUNRPC_XPRTRDMA_H
|
||||
#define _LINUX_SUNRPC_XPRTRDMA_H
|
||||
|
||||
/*
|
||||
* RPC transport identifier for RDMA
|
||||
*/
|
||||
#define XPRT_TRANSPORT_RDMA 256
|
||||
|
||||
/*
|
||||
* rpcbind (v3+) RDMA netid.
|
||||
*/
|
||||
#define RPCBIND_NETID_RDMA "rdma"
|
||||
|
||||
/*
|
||||
* Constants. Max RPC/NFS header is big enough to account for
|
||||
* additional marshaling buffers passed down by Linux client.
|
||||
*
|
||||
* RDMA header is currently fixed max size, and is big enough for a
|
||||
* fully-chunked NFS message (read chunks are the largest). Note only
|
||||
* a single chunk type per message is supported currently.
|
||||
*/
|
||||
#define RPCRDMA_MIN_SLOT_TABLE (2U)
|
||||
#define RPCRDMA_DEF_SLOT_TABLE (32U)
|
||||
#define RPCRDMA_MAX_SLOT_TABLE (256U)
|
||||
|
||||
#define RPCRDMA_DEF_INLINE (1024) /* default inline max */
|
||||
|
||||
#define RPCRDMA_INLINE_PAD_THRESH (512)/* payload threshold to pad (bytes) */
|
||||
|
||||
#define RDMA_RESOLVE_TIMEOUT (5*HZ) /* TBD 5 seconds */
|
||||
#define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */
|
||||
|
||||
/* memory registration strategies */
|
||||
#define RPCRDMA_PERSISTENT_REGISTRATION (1)
|
||||
|
||||
enum rpcrdma_memreg {
|
||||
RPCRDMA_BOUNCEBUFFERS = 0,
|
||||
RPCRDMA_REGISTER,
|
||||
RPCRDMA_MEMWINDOWS,
|
||||
RPCRDMA_MEMWINDOWS_ASYNC,
|
||||
RPCRDMA_MTHCAFMR,
|
||||
RPCRDMA_ALLPHYSICAL,
|
||||
RPCRDMA_LAST
|
||||
};
|
||||
|
||||
#endif /* _LINUX_SUNRPC_XPRTRDMA_H */
|
51
include/linux/sunrpc/xprtsock.h
Normal file
51
include/linux/sunrpc/xprtsock.h
Normal file
@ -0,0 +1,51 @@
|
||||
/*
|
||||
* linux/include/linux/sunrpc/xprtsock.h
|
||||
*
|
||||
* Declarations for the RPC transport socket provider.
|
||||
*/
|
||||
|
||||
#ifndef _LINUX_SUNRPC_XPRTSOCK_H
|
||||
#define _LINUX_SUNRPC_XPRTSOCK_H
|
||||
|
||||
#ifdef __KERNEL__
|
||||
|
||||
/*
|
||||
* Socket transport setup operations
|
||||
*/
|
||||
struct rpc_xprt *xs_setup_udp(struct xprt_create *args);
|
||||
struct rpc_xprt *xs_setup_tcp(struct xprt_create *args);
|
||||
|
||||
int init_socket_xprt(void);
|
||||
void cleanup_socket_xprt(void);
|
||||
|
||||
/*
|
||||
* RPC transport identifiers for UDP, TCP
|
||||
*
|
||||
* To preserve compatibility with the historical use of raw IP protocol
|
||||
* id's for transport selection, these are specified with the previous
|
||||
* values. No such restriction exists for new transports, except that
|
||||
* they may not collide with these values (17 and 6, respectively).
|
||||
*/
|
||||
#define XPRT_TRANSPORT_UDP IPPROTO_UDP
|
||||
#define XPRT_TRANSPORT_TCP IPPROTO_TCP
|
||||
|
||||
/*
|
||||
* RPC slot table sizes for UDP, TCP transports
|
||||
*/
|
||||
extern unsigned int xprt_udp_slot_table_entries;
|
||||
extern unsigned int xprt_tcp_slot_table_entries;
|
||||
|
||||
/*
|
||||
* Parameters for choosing a free port
|
||||
*/
|
||||
extern unsigned int xprt_min_resvport;
|
||||
extern unsigned int xprt_max_resvport;
|
||||
|
||||
#define RPC_MIN_RESVPORT (1U)
|
||||
#define RPC_MAX_RESVPORT (65535U)
|
||||
#define RPC_DEF_MIN_RESVPORT (665U)
|
||||
#define RPC_DEF_MAX_RESVPORT (1023U)
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
|
||||
#endif /* _LINUX_SUNRPC_XPRTSOCK_H */
|
@ -62,8 +62,6 @@ struct writeback_control {
|
||||
unsigned for_reclaim:1; /* Invoked from the page allocator */
|
||||
unsigned for_writepages:1; /* This is a writepages() call */
|
||||
unsigned range_cyclic:1; /* range_start is cyclic */
|
||||
|
||||
void *fs_private; /* For use by ->writepages() */
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -1525,6 +1525,7 @@ add_names:
|
||||
context->names[idx].ino = (unsigned long)-1;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__audit_inode_child);
|
||||
|
||||
/**
|
||||
* auditsc_get_stamp - get local copies of audit_context values
|
||||
|
@ -5,6 +5,7 @@
|
||||
|
||||
obj-$(CONFIG_SUNRPC) += sunrpc.o
|
||||
obj-$(CONFIG_SUNRPC_GSS) += auth_gss/
|
||||
obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/
|
||||
|
||||
sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
|
||||
auth.o auth_null.o auth_unix.o \
|
||||
|
@ -42,7 +42,7 @@ gss_krb5_remove_padding(struct xdr_buf *buf, int blocksize)
|
||||
{
|
||||
u8 *ptr;
|
||||
u8 pad;
|
||||
int len = buf->len;
|
||||
size_t len = buf->len;
|
||||
|
||||
if (len <= buf->head[0].iov_len) {
|
||||
pad = *(u8 *)(buf->head[0].iov_base + len - 1);
|
||||
@ -53,9 +53,9 @@ gss_krb5_remove_padding(struct xdr_buf *buf, int blocksize)
|
||||
} else
|
||||
len -= buf->head[0].iov_len;
|
||||
if (len <= buf->page_len) {
|
||||
int last = (buf->page_base + len - 1)
|
||||
unsigned int last = (buf->page_base + len - 1)
|
||||
>>PAGE_CACHE_SHIFT;
|
||||
int offset = (buf->page_base + len - 1)
|
||||
unsigned int offset = (buf->page_base + len - 1)
|
||||
& (PAGE_CACHE_SIZE - 1);
|
||||
ptr = kmap_atomic(buf->pages[last], KM_USER0);
|
||||
pad = *(ptr + offset);
|
||||
|
@ -127,7 +127,14 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s
|
||||
struct rpc_clnt *clnt = NULL;
|
||||
struct rpc_auth *auth;
|
||||
int err;
|
||||
int len;
|
||||
size_t len;
|
||||
|
||||
/* sanity check the name before trying to print it */
|
||||
err = -EINVAL;
|
||||
len = strlen(servname);
|
||||
if (len > RPC_MAXNETNAMELEN)
|
||||
goto out_no_rpciod;
|
||||
len++;
|
||||
|
||||
dprintk("RPC: creating %s client for %s (xprt %p)\n",
|
||||
program->name, servname, xprt);
|
||||
@ -148,7 +155,6 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s
|
||||
clnt->cl_parent = clnt;
|
||||
|
||||
clnt->cl_server = clnt->cl_inline_name;
|
||||
len = strlen(servname) + 1;
|
||||
if (len > sizeof(clnt->cl_inline_name)) {
|
||||
char *buf = kmalloc(len, GFP_KERNEL);
|
||||
if (buf != 0)
|
||||
@ -234,8 +240,8 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
|
||||
{
|
||||
struct rpc_xprt *xprt;
|
||||
struct rpc_clnt *clnt;
|
||||
struct rpc_xprtsock_create xprtargs = {
|
||||
.proto = args->protocol,
|
||||
struct xprt_create xprtargs = {
|
||||
.ident = args->protocol,
|
||||
.srcaddr = args->saddress,
|
||||
.dstaddr = args->address,
|
||||
.addrlen = args->addrsize,
|
||||
@ -253,7 +259,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
|
||||
*/
|
||||
if (args->servername == NULL) {
|
||||
struct sockaddr_in *addr =
|
||||
(struct sockaddr_in *) &args->address;
|
||||
(struct sockaddr_in *) args->address;
|
||||
snprintf(servername, sizeof(servername), NIPQUAD_FMT,
|
||||
NIPQUAD(addr->sin_addr.s_addr));
|
||||
args->servername = servername;
|
||||
@ -269,9 +275,6 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
|
||||
if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT)
|
||||
xprt->resvport = 0;
|
||||
|
||||
dprintk("RPC: creating %s client for %s (xprt %p)\n",
|
||||
args->program->name, args->servername, xprt);
|
||||
|
||||
clnt = rpc_new_client(xprt, args->servername, args->program,
|
||||
args->version, args->authflavor);
|
||||
if (IS_ERR(clnt))
|
||||
@ -439,7 +442,7 @@ rpc_release_client(struct rpc_clnt *clnt)
|
||||
*/
|
||||
struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old,
|
||||
struct rpc_program *program,
|
||||
int vers)
|
||||
u32 vers)
|
||||
{
|
||||
struct rpc_clnt *clnt;
|
||||
struct rpc_version *version;
|
||||
@ -843,8 +846,7 @@ call_allocate(struct rpc_task *task)
|
||||
dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid);
|
||||
|
||||
if (RPC_IS_ASYNC(task) || !signalled()) {
|
||||
xprt_release(task);
|
||||
task->tk_action = call_reserve;
|
||||
task->tk_action = call_allocate;
|
||||
rpc_delay(task, HZ>>4);
|
||||
return;
|
||||
}
|
||||
@ -871,6 +873,7 @@ rpc_xdr_buf_init(struct xdr_buf *buf, void *start, size_t len)
|
||||
buf->head[0].iov_len = len;
|
||||
buf->tail[0].iov_len = 0;
|
||||
buf->page_len = 0;
|
||||
buf->flags = 0;
|
||||
buf->len = 0;
|
||||
buf->buflen = len;
|
||||
}
|
||||
@ -937,7 +940,7 @@ call_bind(struct rpc_task *task)
|
||||
static void
|
||||
call_bind_status(struct rpc_task *task)
|
||||
{
|
||||
int status = -EACCES;
|
||||
int status = -EIO;
|
||||
|
||||
if (task->tk_status >= 0) {
|
||||
dprint_status(task);
|
||||
@ -947,9 +950,20 @@ call_bind_status(struct rpc_task *task)
|
||||
}
|
||||
|
||||
switch (task->tk_status) {
|
||||
case -EAGAIN:
|
||||
dprintk("RPC: %5u rpcbind waiting for another request "
|
||||
"to finish\n", task->tk_pid);
|
||||
/* avoid busy-waiting here -- could be a network outage. */
|
||||
rpc_delay(task, 5*HZ);
|
||||
goto retry_timeout;
|
||||
case -EACCES:
|
||||
dprintk("RPC: %5u remote rpcbind: RPC program/version "
|
||||
"unavailable\n", task->tk_pid);
|
||||
/* fail immediately if this is an RPC ping */
|
||||
if (task->tk_msg.rpc_proc->p_proc == 0) {
|
||||
status = -EOPNOTSUPP;
|
||||
break;
|
||||
}
|
||||
rpc_delay(task, 3*HZ);
|
||||
goto retry_timeout;
|
||||
case -ETIMEDOUT:
|
||||
@ -957,6 +971,7 @@ call_bind_status(struct rpc_task *task)
|
||||
task->tk_pid);
|
||||
goto retry_timeout;
|
||||
case -EPFNOSUPPORT:
|
||||
/* server doesn't support any rpcbind version we know of */
|
||||
dprintk("RPC: %5u remote rpcbind service unavailable\n",
|
||||
task->tk_pid);
|
||||
break;
|
||||
@ -969,7 +984,6 @@ call_bind_status(struct rpc_task *task)
|
||||
default:
|
||||
dprintk("RPC: %5u unrecognized rpcbind error (%d)\n",
|
||||
task->tk_pid, -task->tk_status);
|
||||
status = -EIO;
|
||||
}
|
||||
|
||||
rpc_exit(task, status);
|
||||
@ -1257,7 +1271,6 @@ call_refresh(struct rpc_task *task)
|
||||
{
|
||||
dprint_status(task);
|
||||
|
||||
xprt_release(task); /* Must do to obtain new XID */
|
||||
task->tk_action = call_refreshresult;
|
||||
task->tk_status = 0;
|
||||
task->tk_client->cl_stats->rpcauthrefresh++;
|
||||
@ -1375,6 +1388,8 @@ call_verify(struct rpc_task *task)
|
||||
dprintk("RPC: %5u %s: retry stale creds\n",
|
||||
task->tk_pid, __FUNCTION__);
|
||||
rpcauth_invalcred(task);
|
||||
/* Ensure we obtain a new XID! */
|
||||
xprt_release(task);
|
||||
task->tk_action = call_refresh;
|
||||
goto out_retry;
|
||||
case RPC_AUTH_BADCRED:
|
||||
@ -1523,13 +1538,18 @@ void rpc_show_tasks(void)
|
||||
spin_lock(&clnt->cl_lock);
|
||||
list_for_each_entry(t, &clnt->cl_tasks, tk_task) {
|
||||
const char *rpc_waitq = "none";
|
||||
int proc;
|
||||
|
||||
if (t->tk_msg.rpc_proc)
|
||||
proc = t->tk_msg.rpc_proc->p_proc;
|
||||
else
|
||||
proc = -1;
|
||||
|
||||
if (RPC_IS_QUEUED(t))
|
||||
rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq);
|
||||
|
||||
printk("%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n",
|
||||
t->tk_pid,
|
||||
(t->tk_msg.rpc_proc ? t->tk_msg.rpc_proc->p_proc : -1),
|
||||
t->tk_pid, proc,
|
||||
t->tk_flags, t->tk_status,
|
||||
t->tk_client,
|
||||
(t->tk_client ? t->tk_client->cl_prog : 0),
|
||||
|
@ -14,7 +14,7 @@
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/mount.h>
|
||||
#include <linux/namei.h>
|
||||
#include <linux/dnotify.h>
|
||||
#include <linux/fsnotify.h>
|
||||
#include <linux/kernel.h>
|
||||
|
||||
#include <asm/ioctls.h>
|
||||
@ -329,6 +329,7 @@ rpc_show_info(struct seq_file *m, void *v)
|
||||
clnt->cl_prog, clnt->cl_vers);
|
||||
seq_printf(m, "address: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR));
|
||||
seq_printf(m, "protocol: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PROTO));
|
||||
seq_printf(m, "port: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PORT));
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -585,6 +586,7 @@ rpc_populate(struct dentry *parent,
|
||||
if (S_ISDIR(mode))
|
||||
inc_nlink(dir);
|
||||
d_add(dentry, inode);
|
||||
fsnotify_create(dir, dentry);
|
||||
}
|
||||
mutex_unlock(&dir->i_mutex);
|
||||
return 0;
|
||||
@ -606,7 +608,7 @@ __rpc_mkdir(struct inode *dir, struct dentry *dentry)
|
||||
inode->i_ino = iunique(dir->i_sb, 100);
|
||||
d_instantiate(dentry, inode);
|
||||
inc_nlink(dir);
|
||||
inode_dir_notify(dir, DN_CREATE);
|
||||
fsnotify_mkdir(dir, dentry);
|
||||
return 0;
|
||||
out_err:
|
||||
printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %s\n",
|
||||
@ -748,7 +750,7 @@ rpc_mkpipe(struct dentry *parent, const char *name, void *private, struct rpc_pi
|
||||
rpci->flags = flags;
|
||||
rpci->ops = ops;
|
||||
rpci->nkern_readwriters = 1;
|
||||
inode_dir_notify(dir, DN_CREATE);
|
||||
fsnotify_create(dir, dentry);
|
||||
dget(dentry);
|
||||
out:
|
||||
mutex_unlock(&dir->i_mutex);
|
||||
|
@ -16,11 +16,14 @@
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/socket.h>
|
||||
#include <linux/in.h>
|
||||
#include <linux/in6.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/errno.h>
|
||||
|
||||
#include <linux/sunrpc/clnt.h>
|
||||
#include <linux/sunrpc/sched.h>
|
||||
#include <linux/sunrpc/xprtsock.h>
|
||||
|
||||
#ifdef RPC_DEBUG
|
||||
# define RPCDBG_FACILITY RPCDBG_BIND
|
||||
@ -90,26 +93,6 @@ enum {
|
||||
*/
|
||||
#define RPCB_MAXADDRLEN (128u)
|
||||
|
||||
/*
|
||||
* r_netid
|
||||
*
|
||||
* Quoting RFC 3530, section 2.2:
|
||||
*
|
||||
* For TCP over IPv4 the value of r_netid is the string "tcp". For UDP
|
||||
* over IPv4 the value of r_netid is the string "udp".
|
||||
*
|
||||
* ...
|
||||
*
|
||||
* For TCP over IPv6 the value of r_netid is the string "tcp6". For UDP
|
||||
* over IPv6 the value of r_netid is the string "udp6".
|
||||
*/
|
||||
#define RPCB_NETID_UDP "\165\144\160" /* "udp" */
|
||||
#define RPCB_NETID_TCP "\164\143\160" /* "tcp" */
|
||||
#define RPCB_NETID_UDP6 "\165\144\160\066" /* "udp6" */
|
||||
#define RPCB_NETID_TCP6 "\164\143\160\066" /* "tcp6" */
|
||||
|
||||
#define RPCB_MAXNETIDLEN (4u)
|
||||
|
||||
/*
|
||||
* r_owner
|
||||
*
|
||||
@ -120,7 +103,7 @@ enum {
|
||||
#define RPCB_MAXOWNERLEN sizeof(RPCB_OWNER_STRING)
|
||||
|
||||
static void rpcb_getport_done(struct rpc_task *, void *);
|
||||
extern struct rpc_program rpcb_program;
|
||||
static struct rpc_program rpcb_program;
|
||||
|
||||
struct rpcbind_args {
|
||||
struct rpc_xprt * r_xprt;
|
||||
@ -137,10 +120,13 @@ struct rpcbind_args {
|
||||
static struct rpc_procinfo rpcb_procedures2[];
|
||||
static struct rpc_procinfo rpcb_procedures3[];
|
||||
|
||||
static struct rpcb_info {
|
||||
struct rpcb_info {
|
||||
int rpc_vers;
|
||||
struct rpc_procinfo * rpc_proc;
|
||||
} rpcb_next_version[];
|
||||
};
|
||||
|
||||
static struct rpcb_info rpcb_next_version[];
|
||||
static struct rpcb_info rpcb_next_version6[];
|
||||
|
||||
static void rpcb_getport_prepare(struct rpc_task *task, void *calldata)
|
||||
{
|
||||
@ -190,7 +176,17 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
|
||||
RPC_CLNT_CREATE_INTR),
|
||||
};
|
||||
|
||||
((struct sockaddr_in *)srvaddr)->sin_port = htons(RPCBIND_PORT);
|
||||
switch (srvaddr->sa_family) {
|
||||
case AF_INET:
|
||||
((struct sockaddr_in *)srvaddr)->sin_port = htons(RPCBIND_PORT);
|
||||
break;
|
||||
case AF_INET6:
|
||||
((struct sockaddr_in6 *)srvaddr)->sin6_port = htons(RPCBIND_PORT);
|
||||
break;
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (!privileged)
|
||||
args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
|
||||
return rpc_create(&args);
|
||||
@ -234,7 +230,7 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
|
||||
prog, vers, prot, port);
|
||||
|
||||
rpcb_clnt = rpcb_create("localhost", (struct sockaddr *) &sin,
|
||||
IPPROTO_UDP, 2, 1);
|
||||
XPRT_TRANSPORT_UDP, 2, 1);
|
||||
if (IS_ERR(rpcb_clnt))
|
||||
return PTR_ERR(rpcb_clnt);
|
||||
|
||||
@ -316,6 +312,7 @@ void rpcb_getport_async(struct rpc_task *task)
|
||||
struct rpc_task *child;
|
||||
struct sockaddr addr;
|
||||
int status;
|
||||
struct rpcb_info *info;
|
||||
|
||||
dprintk("RPC: %5u %s(%s, %u, %u, %d)\n",
|
||||
task->tk_pid, __FUNCTION__,
|
||||
@ -325,7 +322,7 @@ void rpcb_getport_async(struct rpc_task *task)
|
||||
BUG_ON(clnt->cl_parent != clnt);
|
||||
|
||||
if (xprt_test_and_set_binding(xprt)) {
|
||||
status = -EACCES; /* tell caller to check again */
|
||||
status = -EAGAIN; /* tell caller to check again */
|
||||
dprintk("RPC: %5u %s: waiting for another binder\n",
|
||||
task->tk_pid, __FUNCTION__);
|
||||
goto bailout_nowake;
|
||||
@ -343,18 +340,43 @@ void rpcb_getport_async(struct rpc_task *task)
|
||||
goto bailout_nofree;
|
||||
}
|
||||
|
||||
if (rpcb_next_version[xprt->bind_index].rpc_proc == NULL) {
|
||||
rpc_peeraddr(clnt, (void *)&addr, sizeof(addr));
|
||||
|
||||
/* Don't ever use rpcbind v2 for AF_INET6 requests */
|
||||
switch (addr.sa_family) {
|
||||
case AF_INET:
|
||||
info = rpcb_next_version;
|
||||
break;
|
||||
case AF_INET6:
|
||||
info = rpcb_next_version6;
|
||||
break;
|
||||
default:
|
||||
status = -EAFNOSUPPORT;
|
||||
dprintk("RPC: %5u %s: bad address family\n",
|
||||
task->tk_pid, __FUNCTION__);
|
||||
goto bailout_nofree;
|
||||
}
|
||||
if (info[xprt->bind_index].rpc_proc == NULL) {
|
||||
xprt->bind_index = 0;
|
||||
status = -EACCES; /* tell caller to try again later */
|
||||
status = -EPFNOSUPPORT;
|
||||
dprintk("RPC: %5u %s: no more getport versions available\n",
|
||||
task->tk_pid, __FUNCTION__);
|
||||
goto bailout_nofree;
|
||||
}
|
||||
bind_version = rpcb_next_version[xprt->bind_index].rpc_vers;
|
||||
bind_version = info[xprt->bind_index].rpc_vers;
|
||||
|
||||
dprintk("RPC: %5u %s: trying rpcbind version %u\n",
|
||||
task->tk_pid, __FUNCTION__, bind_version);
|
||||
|
||||
rpcb_clnt = rpcb_create(clnt->cl_server, &addr, xprt->prot,
|
||||
bind_version, 0);
|
||||
if (IS_ERR(rpcb_clnt)) {
|
||||
status = PTR_ERR(rpcb_clnt);
|
||||
dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n",
|
||||
task->tk_pid, __FUNCTION__, PTR_ERR(rpcb_clnt));
|
||||
goto bailout_nofree;
|
||||
}
|
||||
|
||||
map = kzalloc(sizeof(struct rpcbind_args), GFP_ATOMIC);
|
||||
if (!map) {
|
||||
status = -ENOMEM;
|
||||
@ -367,28 +389,19 @@ void rpcb_getport_async(struct rpc_task *task)
|
||||
map->r_prot = xprt->prot;
|
||||
map->r_port = 0;
|
||||
map->r_xprt = xprt_get(xprt);
|
||||
map->r_netid = (xprt->prot == IPPROTO_TCP) ? RPCB_NETID_TCP :
|
||||
RPCB_NETID_UDP;
|
||||
memcpy(&map->r_addr, rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR),
|
||||
sizeof(map->r_addr));
|
||||
map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID);
|
||||
memcpy(map->r_addr,
|
||||
rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR),
|
||||
sizeof(map->r_addr));
|
||||
map->r_owner = RPCB_OWNER_STRING; /* ignored for GETADDR */
|
||||
|
||||
rpc_peeraddr(clnt, (void *)&addr, sizeof(addr));
|
||||
rpcb_clnt = rpcb_create(clnt->cl_server, &addr, xprt->prot, bind_version, 0);
|
||||
if (IS_ERR(rpcb_clnt)) {
|
||||
status = PTR_ERR(rpcb_clnt);
|
||||
dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n",
|
||||
task->tk_pid, __FUNCTION__, PTR_ERR(rpcb_clnt));
|
||||
goto bailout;
|
||||
}
|
||||
|
||||
child = rpc_run_task(rpcb_clnt, RPC_TASK_ASYNC, &rpcb_getport_ops, map);
|
||||
rpc_release_client(rpcb_clnt);
|
||||
if (IS_ERR(child)) {
|
||||
status = -EIO;
|
||||
dprintk("RPC: %5u %s: rpc_run_task failed\n",
|
||||
task->tk_pid, __FUNCTION__);
|
||||
goto bailout_nofree;
|
||||
goto bailout;
|
||||
}
|
||||
rpc_put_task(child);
|
||||
|
||||
@ -403,6 +416,7 @@ bailout_nofree:
|
||||
bailout_nowake:
|
||||
task->tk_status = status;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rpcb_getport_async);
|
||||
|
||||
/*
|
||||
* Rpcbind child task calls this callback via tk_exit.
|
||||
@ -413,6 +427,10 @@ static void rpcb_getport_done(struct rpc_task *child, void *data)
|
||||
struct rpc_xprt *xprt = map->r_xprt;
|
||||
int status = child->tk_status;
|
||||
|
||||
/* Garbage reply: retry with a lesser rpcbind version */
|
||||
if (status == -EIO)
|
||||
status = -EPROTONOSUPPORT;
|
||||
|
||||
/* rpcbind server doesn't support this rpcbind protocol version */
|
||||
if (status == -EPROTONOSUPPORT)
|
||||
xprt->bind_index++;
|
||||
@ -490,16 +508,24 @@ static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p,
|
||||
unsigned short *portp)
|
||||
{
|
||||
char *addr;
|
||||
int addr_len, c, i, f, first, val;
|
||||
u32 addr_len;
|
||||
int c, i, f, first, val;
|
||||
|
||||
*portp = 0;
|
||||
addr_len = (unsigned int) ntohl(*p++);
|
||||
if (addr_len > RPCB_MAXADDRLEN) /* sanity */
|
||||
return -EINVAL;
|
||||
addr_len = ntohl(*p++);
|
||||
|
||||
dprintk("RPC: rpcb_decode_getaddr returned string: '%s'\n",
|
||||
(char *) p);
|
||||
/*
|
||||
* Simple sanity check. The smallest possible universal
|
||||
* address is an IPv4 address string containing 11 bytes.
|
||||
*/
|
||||
if (addr_len < 11 || addr_len > RPCB_MAXADDRLEN)
|
||||
goto out_err;
|
||||
|
||||
/*
|
||||
* Start at the end and walk backwards until the first dot
|
||||
* is encountered. When the second dot is found, we have
|
||||
* both parts of the port number.
|
||||
*/
|
||||
addr = (char *)p;
|
||||
val = 0;
|
||||
first = 1;
|
||||
@ -521,8 +547,19 @@ static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p,
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Simple sanity check. If we never saw a dot in the reply,
|
||||
* then this was probably just garbage.
|
||||
*/
|
||||
if (first)
|
||||
goto out_err;
|
||||
|
||||
dprintk("RPC: rpcb_decode_getaddr port=%u\n", *portp);
|
||||
return 0;
|
||||
|
||||
out_err:
|
||||
dprintk("RPC: rpcbind server returned malformed reply\n");
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
#define RPCB_program_sz (1u)
|
||||
@ -531,7 +568,7 @@ static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p,
|
||||
#define RPCB_port_sz (1u)
|
||||
#define RPCB_boolean_sz (1u)
|
||||
|
||||
#define RPCB_netid_sz (1+XDR_QUADLEN(RPCB_MAXNETIDLEN))
|
||||
#define RPCB_netid_sz (1+XDR_QUADLEN(RPCBIND_MAXNETIDLEN))
|
||||
#define RPCB_addr_sz (1+XDR_QUADLEN(RPCB_MAXADDRLEN))
|
||||
#define RPCB_ownerstring_sz (1+XDR_QUADLEN(RPCB_MAXOWNERLEN))
|
||||
|
||||
@ -593,6 +630,14 @@ static struct rpcb_info rpcb_next_version[] = {
|
||||
{ 0, NULL },
|
||||
};
|
||||
|
||||
static struct rpcb_info rpcb_next_version6[] = {
|
||||
#ifdef CONFIG_SUNRPC_BIND34
|
||||
{ 4, &rpcb_procedures4[RPCBPROC_GETVERSADDR] },
|
||||
{ 3, &rpcb_procedures3[RPCBPROC_GETADDR] },
|
||||
#endif
|
||||
{ 0, NULL },
|
||||
};
|
||||
|
||||
static struct rpc_version rpcb_version2 = {
|
||||
.number = 2,
|
||||
.nrprocs = RPCB_HIGHPROC_2,
|
||||
@ -621,7 +666,7 @@ static struct rpc_version *rpcb_version[] = {
|
||||
|
||||
static struct rpc_stat rpcb_stats;
|
||||
|
||||
struct rpc_program rpcb_program = {
|
||||
static struct rpc_program rpcb_program = {
|
||||
.name = "rpcbind",
|
||||
.number = RPCBIND_PROGRAM,
|
||||
.nrvers = ARRAY_SIZE(rpcb_version),
|
||||
|
@ -777,6 +777,7 @@ void *rpc_malloc(struct rpc_task *task, size_t size)
|
||||
task->tk_pid, size, buf);
|
||||
return &buf->data;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rpc_malloc);
|
||||
|
||||
/**
|
||||
* rpc_free - free buffer allocated via rpc_malloc
|
||||
@ -802,6 +803,7 @@ void rpc_free(void *buffer)
|
||||
else
|
||||
kfree(buf);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rpc_free);
|
||||
|
||||
/*
|
||||
* Creation and deletion of RPC task structures
|
||||
|
@ -34,6 +34,7 @@ size_t xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len)
|
||||
desc->offset += len;
|
||||
return len;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xdr_skb_read_bits);
|
||||
|
||||
/**
|
||||
* xdr_skb_read_and_csum_bits - copy and checksum from skb to buffer
|
||||
@ -137,6 +138,7 @@ copy_tail:
|
||||
out:
|
||||
return copied;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xdr_partial_copy_from_skb);
|
||||
|
||||
/**
|
||||
* csum_partial_copy_to_xdr - checksum and copy data
|
||||
@ -179,3 +181,4 @@ no_checksum:
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(csum_partial_copy_to_xdr);
|
||||
|
@ -20,7 +20,7 @@
|
||||
#include <linux/sunrpc/auth.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/sunrpc/rpc_pipe_fs.h>
|
||||
|
||||
#include <linux/sunrpc/xprtsock.h>
|
||||
|
||||
/* RPC scheduler */
|
||||
EXPORT_SYMBOL(rpc_execute);
|
||||
|
@ -17,6 +17,7 @@
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/unistd.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
#include <linux/sunrpc/clnt.h>
|
||||
|
||||
@ -40,6 +41,7 @@ rpc_init_rtt(struct rpc_rtt *rt, unsigned long timeo)
|
||||
rt->ntimeouts[i] = 0;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rpc_init_rtt);
|
||||
|
||||
/*
|
||||
* NB: When computing the smoothed RTT and standard deviation,
|
||||
@ -75,6 +77,7 @@ rpc_update_rtt(struct rpc_rtt *rt, unsigned timer, long m)
|
||||
if (*sdrtt < RPC_RTO_MIN)
|
||||
*sdrtt = RPC_RTO_MIN;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rpc_update_rtt);
|
||||
|
||||
/*
|
||||
* Estimate rto for an nfs rpc sent via. an unreliable datagram.
|
||||
@ -103,3 +106,4 @@ rpc_calc_rto(struct rpc_rtt *rt, unsigned timer)
|
||||
|
||||
return res;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rpc_calc_rto);
|
||||
|
@ -62,6 +62,9 @@ static inline void do_xprt_reserve(struct rpc_task *);
|
||||
static void xprt_connect_status(struct rpc_task *task);
|
||||
static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
|
||||
|
||||
static spinlock_t xprt_list_lock = SPIN_LOCK_UNLOCKED;
|
||||
static LIST_HEAD(xprt_list);
|
||||
|
||||
/*
|
||||
* The transport code maintains an estimate on the maximum number of out-
|
||||
* standing RPC requests, using a smoothed version of the congestion
|
||||
@ -80,6 +83,78 @@ static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
|
||||
|
||||
#define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd)
|
||||
|
||||
/**
|
||||
* xprt_register_transport - register a transport implementation
|
||||
* @transport: transport to register
|
||||
*
|
||||
* If a transport implementation is loaded as a kernel module, it can
|
||||
* call this interface to make itself known to the RPC client.
|
||||
*
|
||||
* Returns:
|
||||
* 0: transport successfully registered
|
||||
* -EEXIST: transport already registered
|
||||
* -EINVAL: transport module being unloaded
|
||||
*/
|
||||
int xprt_register_transport(struct xprt_class *transport)
|
||||
{
|
||||
struct xprt_class *t;
|
||||
int result;
|
||||
|
||||
result = -EEXIST;
|
||||
spin_lock(&xprt_list_lock);
|
||||
list_for_each_entry(t, &xprt_list, list) {
|
||||
/* don't register the same transport class twice */
|
||||
if (t->ident == transport->ident)
|
||||
goto out;
|
||||
}
|
||||
|
||||
result = -EINVAL;
|
||||
if (try_module_get(THIS_MODULE)) {
|
||||
list_add_tail(&transport->list, &xprt_list);
|
||||
printk(KERN_INFO "RPC: Registered %s transport module.\n",
|
||||
transport->name);
|
||||
result = 0;
|
||||
}
|
||||
|
||||
out:
|
||||
spin_unlock(&xprt_list_lock);
|
||||
return result;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xprt_register_transport);
|
||||
|
||||
/**
|
||||
* xprt_unregister_transport - unregister a transport implementation
|
||||
* transport: transport to unregister
|
||||
*
|
||||
* Returns:
|
||||
* 0: transport successfully unregistered
|
||||
* -ENOENT: transport never registered
|
||||
*/
|
||||
int xprt_unregister_transport(struct xprt_class *transport)
|
||||
{
|
||||
struct xprt_class *t;
|
||||
int result;
|
||||
|
||||
result = 0;
|
||||
spin_lock(&xprt_list_lock);
|
||||
list_for_each_entry(t, &xprt_list, list) {
|
||||
if (t == transport) {
|
||||
printk(KERN_INFO
|
||||
"RPC: Unregistered %s transport module.\n",
|
||||
transport->name);
|
||||
list_del_init(&transport->list);
|
||||
module_put(THIS_MODULE);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
result = -ENOENT;
|
||||
|
||||
out:
|
||||
spin_unlock(&xprt_list_lock);
|
||||
return result;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xprt_unregister_transport);
|
||||
|
||||
/**
|
||||
* xprt_reserve_xprt - serialize write access to transports
|
||||
* @task: task that is requesting access to the transport
|
||||
@ -118,6 +193,7 @@ out_sleep:
|
||||
rpc_sleep_on(&xprt->sending, task, NULL, NULL);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xprt_reserve_xprt);
|
||||
|
||||
static void xprt_clear_locked(struct rpc_xprt *xprt)
|
||||
{
|
||||
@ -167,6 +243,7 @@ out_sleep:
|
||||
rpc_sleep_on(&xprt->sending, task, NULL, NULL);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xprt_reserve_xprt_cong);
|
||||
|
||||
static inline int xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task)
|
||||
{
|
||||
@ -246,6 +323,7 @@ void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
|
||||
__xprt_lock_write_next(xprt);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xprt_release_xprt);
|
||||
|
||||
/**
|
||||
* xprt_release_xprt_cong - allow other requests to use a transport
|
||||
@ -262,6 +340,7 @@ void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task)
|
||||
__xprt_lock_write_next_cong(xprt);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xprt_release_xprt_cong);
|
||||
|
||||
static inline void xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task)
|
||||
{
|
||||
@ -314,6 +393,7 @@ void xprt_release_rqst_cong(struct rpc_task *task)
|
||||
{
|
||||
__xprt_put_cong(task->tk_xprt, task->tk_rqstp);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xprt_release_rqst_cong);
|
||||
|
||||
/**
|
||||
* xprt_adjust_cwnd - adjust transport congestion window
|
||||
@ -345,6 +425,7 @@ void xprt_adjust_cwnd(struct rpc_task *task, int result)
|
||||
xprt->cwnd = cwnd;
|
||||
__xprt_put_cong(xprt, req);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xprt_adjust_cwnd);
|
||||
|
||||
/**
|
||||
* xprt_wake_pending_tasks - wake all tasks on a transport's pending queue
|
||||
@ -359,6 +440,7 @@ void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status)
|
||||
else
|
||||
rpc_wake_up(&xprt->pending);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xprt_wake_pending_tasks);
|
||||
|
||||
/**
|
||||
* xprt_wait_for_buffer_space - wait for transport output buffer to clear
|
||||
@ -373,6 +455,7 @@ void xprt_wait_for_buffer_space(struct rpc_task *task)
|
||||
task->tk_timeout = req->rq_timeout;
|
||||
rpc_sleep_on(&xprt->pending, task, NULL, NULL);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xprt_wait_for_buffer_space);
|
||||
|
||||
/**
|
||||
* xprt_write_space - wake the task waiting for transport output buffer space
|
||||
@ -393,6 +476,7 @@ void xprt_write_space(struct rpc_xprt *xprt)
|
||||
}
|
||||
spin_unlock_bh(&xprt->transport_lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xprt_write_space);
|
||||
|
||||
/**
|
||||
* xprt_set_retrans_timeout_def - set a request's retransmit timeout
|
||||
@ -406,6 +490,7 @@ void xprt_set_retrans_timeout_def(struct rpc_task *task)
|
||||
{
|
||||
task->tk_timeout = task->tk_rqstp->rq_timeout;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xprt_set_retrans_timeout_def);
|
||||
|
||||
/*
|
||||
* xprt_set_retrans_timeout_rtt - set a request's retransmit timeout
|
||||
@ -425,6 +510,7 @@ void xprt_set_retrans_timeout_rtt(struct rpc_task *task)
|
||||
if (task->tk_timeout > max_timeout || task->tk_timeout == 0)
|
||||
task->tk_timeout = max_timeout;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xprt_set_retrans_timeout_rtt);
|
||||
|
||||
static void xprt_reset_majortimeo(struct rpc_rqst *req)
|
||||
{
|
||||
@ -500,6 +586,7 @@ void xprt_disconnect(struct rpc_xprt *xprt)
|
||||
xprt_wake_pending_tasks(xprt, -ENOTCONN);
|
||||
spin_unlock_bh(&xprt->transport_lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xprt_disconnect);
|
||||
|
||||
static void
|
||||
xprt_init_autodisconnect(unsigned long data)
|
||||
@ -610,6 +697,7 @@ struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid)
|
||||
xprt->stat.bad_xids++;
|
||||
return NULL;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xprt_lookup_rqst);
|
||||
|
||||
/**
|
||||
* xprt_update_rtt - update an RPC client's RTT state after receiving a reply
|
||||
@ -629,6 +717,7 @@ void xprt_update_rtt(struct rpc_task *task)
|
||||
rpc_set_timeo(rtt, timer, req->rq_ntrans - 1);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xprt_update_rtt);
|
||||
|
||||
/**
|
||||
* xprt_complete_rqst - called when reply processing is complete
|
||||
@ -653,6 +742,7 @@ void xprt_complete_rqst(struct rpc_task *task, int copied)
|
||||
req->rq_received = req->rq_private_buf.len = copied;
|
||||
rpc_wake_up_task(task);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xprt_complete_rqst);
|
||||
|
||||
static void xprt_timer(struct rpc_task *task)
|
||||
{
|
||||
@ -889,23 +979,25 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long i
|
||||
* @args: rpc transport creation arguments
|
||||
*
|
||||
*/
|
||||
struct rpc_xprt *xprt_create_transport(struct rpc_xprtsock_create *args)
|
||||
struct rpc_xprt *xprt_create_transport(struct xprt_create *args)
|
||||
{
|
||||
struct rpc_xprt *xprt;
|
||||
struct rpc_rqst *req;
|
||||
struct xprt_class *t;
|
||||
|
||||
switch (args->proto) {
|
||||
case IPPROTO_UDP:
|
||||
xprt = xs_setup_udp(args);
|
||||
break;
|
||||
case IPPROTO_TCP:
|
||||
xprt = xs_setup_tcp(args);
|
||||
break;
|
||||
default:
|
||||
printk(KERN_ERR "RPC: unrecognized transport protocol: %d\n",
|
||||
args->proto);
|
||||
return ERR_PTR(-EIO);
|
||||
spin_lock(&xprt_list_lock);
|
||||
list_for_each_entry(t, &xprt_list, list) {
|
||||
if (t->ident == args->ident) {
|
||||
spin_unlock(&xprt_list_lock);
|
||||
goto found;
|
||||
}
|
||||
}
|
||||
spin_unlock(&xprt_list_lock);
|
||||
printk(KERN_ERR "RPC: transport (%d) not supported\n", args->ident);
|
||||
return ERR_PTR(-EIO);
|
||||
|
||||
found:
|
||||
xprt = t->setup(args);
|
||||
if (IS_ERR(xprt)) {
|
||||
dprintk("RPC: xprt_create_transport: failed, %ld\n",
|
||||
-PTR_ERR(xprt));
|
||||
|
3
net/sunrpc/xprtrdma/Makefile
Normal file
3
net/sunrpc/xprtrdma/Makefile
Normal file
@ -0,0 +1,3 @@
|
||||
obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma.o
|
||||
|
||||
xprtrdma-y := transport.o rpc_rdma.o verbs.o
|
868
net/sunrpc/xprtrdma/rpc_rdma.c
Normal file
868
net/sunrpc/xprtrdma/rpc_rdma.c
Normal file
@ -0,0 +1,868 @@
|
||||
/*
|
||||
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
|
||||
*
|
||||
* This software is available to you under a choice of one of two
|
||||
* licenses. You may choose to be licensed under the terms of the GNU
|
||||
* General Public License (GPL) Version 2, available from the file
|
||||
* COPYING in the main directory of this source tree, or the BSD-type
|
||||
* license below:
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* Redistributions in binary form must reproduce the above
|
||||
* copyright notice, this list of conditions and the following
|
||||
* disclaimer in the documentation and/or other materials provided
|
||||
* with the distribution.
|
||||
*
|
||||
* Neither the name of the Network Appliance, Inc. nor the names of
|
||||
* its contributors may be used to endorse or promote products
|
||||
* derived from this software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* rpc_rdma.c
|
||||
*
|
||||
* This file contains the guts of the RPC RDMA protocol, and
|
||||
* does marshaling/unmarshaling, etc. It is also where interfacing
|
||||
* to the Linux RPC framework lives.
|
||||
*/
|
||||
|
||||
#include "xprt_rdma.h"
|
||||
|
||||
#include <linux/highmem.h>
|
||||
|
||||
#ifdef RPC_DEBUG
|
||||
# define RPCDBG_FACILITY RPCDBG_TRANS
|
||||
#endif
|
||||
|
||||
enum rpcrdma_chunktype {
|
||||
rpcrdma_noch = 0,
|
||||
rpcrdma_readch,
|
||||
rpcrdma_areadch,
|
||||
rpcrdma_writech,
|
||||
rpcrdma_replych
|
||||
};
|
||||
|
||||
#ifdef RPC_DEBUG
|
||||
static const char transfertypes[][12] = {
|
||||
"pure inline", /* no chunks */
|
||||
" read chunk", /* some argument via rdma read */
|
||||
"*read chunk", /* entire request via rdma read */
|
||||
"write chunk", /* some result via rdma write */
|
||||
"reply chunk" /* entire reply via rdma write */
|
||||
};
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Chunk assembly from upper layer xdr_buf.
|
||||
*
|
||||
* Prepare the passed-in xdr_buf into representation as RPC/RDMA chunk
|
||||
* elements. Segments are then coalesced when registered, if possible
|
||||
* within the selected memreg mode.
|
||||
*
|
||||
* Note, this routine is never called if the connection's memory
|
||||
* registration strategy is 0 (bounce buffers).
|
||||
*/
|
||||
|
||||
static int
|
||||
rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, int pos,
|
||||
enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg, int nsegs)
|
||||
{
|
||||
int len, n = 0, p;
|
||||
|
||||
if (pos == 0 && xdrbuf->head[0].iov_len) {
|
||||
seg[n].mr_page = NULL;
|
||||
seg[n].mr_offset = xdrbuf->head[0].iov_base;
|
||||
seg[n].mr_len = xdrbuf->head[0].iov_len;
|
||||
pos += xdrbuf->head[0].iov_len;
|
||||
++n;
|
||||
}
|
||||
|
||||
if (xdrbuf->page_len && (xdrbuf->pages[0] != NULL)) {
|
||||
if (n == nsegs)
|
||||
return 0;
|
||||
seg[n].mr_page = xdrbuf->pages[0];
|
||||
seg[n].mr_offset = (void *)(unsigned long) xdrbuf->page_base;
|
||||
seg[n].mr_len = min_t(u32,
|
||||
PAGE_SIZE - xdrbuf->page_base, xdrbuf->page_len);
|
||||
len = xdrbuf->page_len - seg[n].mr_len;
|
||||
pos += len;
|
||||
++n;
|
||||
p = 1;
|
||||
while (len > 0) {
|
||||
if (n == nsegs)
|
||||
return 0;
|
||||
seg[n].mr_page = xdrbuf->pages[p];
|
||||
seg[n].mr_offset = NULL;
|
||||
seg[n].mr_len = min_t(u32, PAGE_SIZE, len);
|
||||
len -= seg[n].mr_len;
|
||||
++n;
|
||||
++p;
|
||||
}
|
||||
}
|
||||
|
||||
if (pos < xdrbuf->len && xdrbuf->tail[0].iov_len) {
|
||||
if (n == nsegs)
|
||||
return 0;
|
||||
seg[n].mr_page = NULL;
|
||||
seg[n].mr_offset = xdrbuf->tail[0].iov_base;
|
||||
seg[n].mr_len = xdrbuf->tail[0].iov_len;
|
||||
pos += xdrbuf->tail[0].iov_len;
|
||||
++n;
|
||||
}
|
||||
|
||||
if (pos < xdrbuf->len)
|
||||
dprintk("RPC: %s: marshaled only %d of %d\n",
|
||||
__func__, pos, xdrbuf->len);
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
/*
|
||||
* Create read/write chunk lists, and reply chunks, for RDMA
|
||||
*
|
||||
* Assume check against THRESHOLD has been done, and chunks are required.
|
||||
* Assume only encoding one list entry for read|write chunks. The NFSv3
|
||||
* protocol is simple enough to allow this as it only has a single "bulk
|
||||
* result" in each procedure - complicated NFSv4 COMPOUNDs are not. (The
|
||||
* RDMA/Sessions NFSv4 proposal addresses this for future v4 revs.)
|
||||
*
|
||||
* When used for a single reply chunk (which is a special write
|
||||
* chunk used for the entire reply, rather than just the data), it
|
||||
* is used primarily for READDIR and READLINK which would otherwise
|
||||
* be severely size-limited by a small rdma inline read max. The server
|
||||
* response will come back as an RDMA Write, followed by a message
|
||||
* of type RDMA_NOMSG carrying the xid and length. As a result, reply
|
||||
* chunks do not provide data alignment, however they do not require
|
||||
* "fixup" (moving the response to the upper layer buffer) either.
|
||||
*
|
||||
* Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64):
|
||||
*
|
||||
* Read chunklist (a linked list):
|
||||
* N elements, position P (same P for all chunks of same arg!):
|
||||
* 1 - PHLOO - 1 - PHLOO - ... - 1 - PHLOO - 0
|
||||
*
|
||||
* Write chunklist (a list of (one) counted array):
|
||||
* N elements:
|
||||
* 1 - N - HLOO - HLOO - ... - HLOO - 0
|
||||
*
|
||||
* Reply chunk (a counted array):
|
||||
* N elements:
|
||||
* 1 - N - HLOO - HLOO - ... - HLOO
|
||||
*/
|
||||
|
||||
static unsigned int
|
||||
rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
|
||||
struct rpcrdma_msg *headerp, enum rpcrdma_chunktype type)
|
||||
{
|
||||
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
|
||||
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_task->tk_xprt);
|
||||
int nsegs, nchunks = 0;
|
||||
int pos;
|
||||
struct rpcrdma_mr_seg *seg = req->rl_segments;
|
||||
struct rpcrdma_read_chunk *cur_rchunk = NULL;
|
||||
struct rpcrdma_write_array *warray = NULL;
|
||||
struct rpcrdma_write_chunk *cur_wchunk = NULL;
|
||||
u32 *iptr = headerp->rm_body.rm_chunks;
|
||||
|
||||
if (type == rpcrdma_readch || type == rpcrdma_areadch) {
|
||||
/* a read chunk - server will RDMA Read our memory */
|
||||
cur_rchunk = (struct rpcrdma_read_chunk *) iptr;
|
||||
} else {
|
||||
/* a write or reply chunk - server will RDMA Write our memory */
|
||||
*iptr++ = xdr_zero; /* encode a NULL read chunk list */
|
||||
if (type == rpcrdma_replych)
|
||||
*iptr++ = xdr_zero; /* a NULL write chunk list */
|
||||
warray = (struct rpcrdma_write_array *) iptr;
|
||||
cur_wchunk = (struct rpcrdma_write_chunk *) (warray + 1);
|
||||
}
|
||||
|
||||
if (type == rpcrdma_replych || type == rpcrdma_areadch)
|
||||
pos = 0;
|
||||
else
|
||||
pos = target->head[0].iov_len;
|
||||
|
||||
nsegs = rpcrdma_convert_iovs(target, pos, type, seg, RPCRDMA_MAX_SEGS);
|
||||
if (nsegs == 0)
|
||||
return 0;
|
||||
|
||||
do {
|
||||
/* bind/register the memory, then build chunk from result. */
|
||||
int n = rpcrdma_register_external(seg, nsegs,
|
||||
cur_wchunk != NULL, r_xprt);
|
||||
if (n <= 0)
|
||||
goto out;
|
||||
if (cur_rchunk) { /* read */
|
||||
cur_rchunk->rc_discrim = xdr_one;
|
||||
/* all read chunks have the same "position" */
|
||||
cur_rchunk->rc_position = htonl(pos);
|
||||
cur_rchunk->rc_target.rs_handle = htonl(seg->mr_rkey);
|
||||
cur_rchunk->rc_target.rs_length = htonl(seg->mr_len);
|
||||
xdr_encode_hyper(
|
||||
(u32 *)&cur_rchunk->rc_target.rs_offset,
|
||||
seg->mr_base);
|
||||
dprintk("RPC: %s: read chunk "
|
||||
"elem %d@0x%llx:0x%x pos %d (%s)\n", __func__,
|
||||
seg->mr_len, seg->mr_base, seg->mr_rkey, pos,
|
||||
n < nsegs ? "more" : "last");
|
||||
cur_rchunk++;
|
||||
r_xprt->rx_stats.read_chunk_count++;
|
||||
} else { /* write/reply */
|
||||
cur_wchunk->wc_target.rs_handle = htonl(seg->mr_rkey);
|
||||
cur_wchunk->wc_target.rs_length = htonl(seg->mr_len);
|
||||
xdr_encode_hyper(
|
||||
(u32 *)&cur_wchunk->wc_target.rs_offset,
|
||||
seg->mr_base);
|
||||
dprintk("RPC: %s: %s chunk "
|
||||
"elem %d@0x%llx:0x%x (%s)\n", __func__,
|
||||
(type == rpcrdma_replych) ? "reply" : "write",
|
||||
seg->mr_len, seg->mr_base, seg->mr_rkey,
|
||||
n < nsegs ? "more" : "last");
|
||||
cur_wchunk++;
|
||||
if (type == rpcrdma_replych)
|
||||
r_xprt->rx_stats.reply_chunk_count++;
|
||||
else
|
||||
r_xprt->rx_stats.write_chunk_count++;
|
||||
r_xprt->rx_stats.total_rdma_request += seg->mr_len;
|
||||
}
|
||||
nchunks++;
|
||||
seg += n;
|
||||
nsegs -= n;
|
||||
} while (nsegs);
|
||||
|
||||
/* success. all failures return above */
|
||||
req->rl_nchunks = nchunks;
|
||||
|
||||
BUG_ON(nchunks == 0);
|
||||
|
||||
/*
|
||||
* finish off header. If write, marshal discrim and nchunks.
|
||||
*/
|
||||
if (cur_rchunk) {
|
||||
iptr = (u32 *) cur_rchunk;
|
||||
*iptr++ = xdr_zero; /* finish the read chunk list */
|
||||
*iptr++ = xdr_zero; /* encode a NULL write chunk list */
|
||||
*iptr++ = xdr_zero; /* encode a NULL reply chunk */
|
||||
} else {
|
||||
warray->wc_discrim = xdr_one;
|
||||
warray->wc_nchunks = htonl(nchunks);
|
||||
iptr = (u32 *) cur_wchunk;
|
||||
if (type == rpcrdma_writech) {
|
||||
*iptr++ = xdr_zero; /* finish the write chunk list */
|
||||
*iptr++ = xdr_zero; /* encode a NULL reply chunk */
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Return header size.
|
||||
*/
|
||||
return (unsigned char *)iptr - (unsigned char *)headerp;
|
||||
|
||||
out:
|
||||
for (pos = 0; nchunks--;)
|
||||
pos += rpcrdma_deregister_external(
|
||||
&req->rl_segments[pos], r_xprt, NULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy write data inline.
|
||||
* This function is used for "small" requests. Data which is passed
|
||||
* to RPC via iovecs (or page list) is copied directly into the
|
||||
* pre-registered memory buffer for this request. For small amounts
|
||||
* of data, this is efficient. The cutoff value is tunable.
|
||||
*/
|
||||
static int
|
||||
rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad)
|
||||
{
|
||||
int i, npages, curlen;
|
||||
int copy_len;
|
||||
unsigned char *srcp, *destp;
|
||||
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
|
||||
|
||||
destp = rqst->rq_svec[0].iov_base;
|
||||
curlen = rqst->rq_svec[0].iov_len;
|
||||
destp += curlen;
|
||||
/*
|
||||
* Do optional padding where it makes sense. Alignment of write
|
||||
* payload can help the server, if our setting is accurate.
|
||||
*/
|
||||
pad -= (curlen + 36/*sizeof(struct rpcrdma_msg_padded)*/);
|
||||
if (pad < 0 || rqst->rq_slen - curlen < RPCRDMA_INLINE_PAD_THRESH)
|
||||
pad = 0; /* don't pad this request */
|
||||
|
||||
dprintk("RPC: %s: pad %d destp 0x%p len %d hdrlen %d\n",
|
||||
__func__, pad, destp, rqst->rq_slen, curlen);
|
||||
|
||||
copy_len = rqst->rq_snd_buf.page_len;
|
||||
r_xprt->rx_stats.pullup_copy_count += copy_len;
|
||||
npages = PAGE_ALIGN(rqst->rq_snd_buf.page_base+copy_len) >> PAGE_SHIFT;
|
||||
for (i = 0; copy_len && i < npages; i++) {
|
||||
if (i == 0)
|
||||
curlen = PAGE_SIZE - rqst->rq_snd_buf.page_base;
|
||||
else
|
||||
curlen = PAGE_SIZE;
|
||||
if (curlen > copy_len)
|
||||
curlen = copy_len;
|
||||
dprintk("RPC: %s: page %d destp 0x%p len %d curlen %d\n",
|
||||
__func__, i, destp, copy_len, curlen);
|
||||
srcp = kmap_atomic(rqst->rq_snd_buf.pages[i],
|
||||
KM_SKB_SUNRPC_DATA);
|
||||
if (i == 0)
|
||||
memcpy(destp, srcp+rqst->rq_snd_buf.page_base, curlen);
|
||||
else
|
||||
memcpy(destp, srcp, curlen);
|
||||
kunmap_atomic(srcp, KM_SKB_SUNRPC_DATA);
|
||||
rqst->rq_svec[0].iov_len += curlen;
|
||||
destp += curlen;
|
||||
copy_len -= curlen;
|
||||
}
|
||||
if (rqst->rq_snd_buf.tail[0].iov_len) {
|
||||
curlen = rqst->rq_snd_buf.tail[0].iov_len;
|
||||
if (destp != rqst->rq_snd_buf.tail[0].iov_base) {
|
||||
memcpy(destp,
|
||||
rqst->rq_snd_buf.tail[0].iov_base, curlen);
|
||||
r_xprt->rx_stats.pullup_copy_count += curlen;
|
||||
}
|
||||
dprintk("RPC: %s: tail destp 0x%p len %d curlen %d\n",
|
||||
__func__, destp, copy_len, curlen);
|
||||
rqst->rq_svec[0].iov_len += curlen;
|
||||
}
|
||||
/* header now contains entire send message */
|
||||
return pad;
|
||||
}
|
||||
|
||||
/*
|
||||
* Marshal a request: the primary job of this routine is to choose
|
||||
* the transfer modes. See comments below.
|
||||
*
|
||||
* Uses multiple RDMA IOVs for a request:
|
||||
* [0] -- RPC RDMA header, which uses memory from the *start* of the
|
||||
* preregistered buffer that already holds the RPC data in
|
||||
* its middle.
|
||||
* [1] -- the RPC header/data, marshaled by RPC and the NFS protocol.
|
||||
* [2] -- optional padding.
|
||||
* [3] -- if padded, header only in [1] and data here.
|
||||
*/
|
||||
|
||||
int
|
||||
rpcrdma_marshal_req(struct rpc_rqst *rqst)
|
||||
{
|
||||
struct rpc_xprt *xprt = rqst->rq_task->tk_xprt;
|
||||
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
|
||||
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
|
||||
char *base;
|
||||
size_t hdrlen, rpclen, padlen;
|
||||
enum rpcrdma_chunktype rtype, wtype;
|
||||
struct rpcrdma_msg *headerp;
|
||||
|
||||
/*
|
||||
* rpclen gets amount of data in first buffer, which is the
|
||||
* pre-registered buffer.
|
||||
*/
|
||||
base = rqst->rq_svec[0].iov_base;
|
||||
rpclen = rqst->rq_svec[0].iov_len;
|
||||
|
||||
/* build RDMA header in private area at front */
|
||||
headerp = (struct rpcrdma_msg *) req->rl_base;
|
||||
/* don't htonl XID, it's already done in request */
|
||||
headerp->rm_xid = rqst->rq_xid;
|
||||
headerp->rm_vers = xdr_one;
|
||||
headerp->rm_credit = htonl(r_xprt->rx_buf.rb_max_requests);
|
||||
headerp->rm_type = __constant_htonl(RDMA_MSG);
|
||||
|
||||
/*
|
||||
* Chunks needed for results?
|
||||
*
|
||||
* o If the expected result is under the inline threshold, all ops
|
||||
* return as inline (but see later).
|
||||
* o Large non-read ops return as a single reply chunk.
|
||||
* o Large read ops return data as write chunk(s), header as inline.
|
||||
*
|
||||
* Note: the NFS code sending down multiple result segments implies
|
||||
* the op is one of read, readdir[plus], readlink or NFSv4 getacl.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This code can handle read chunks, write chunks OR reply
|
||||
* chunks -- only one type. If the request is too big to fit
|
||||
* inline, then we will choose read chunks. If the request is
|
||||
* a READ, then use write chunks to separate the file data
|
||||
* into pages; otherwise use reply chunks.
|
||||
*/
|
||||
if (rqst->rq_rcv_buf.buflen <= RPCRDMA_INLINE_READ_THRESHOLD(rqst))
|
||||
wtype = rpcrdma_noch;
|
||||
else if (rqst->rq_rcv_buf.page_len == 0)
|
||||
wtype = rpcrdma_replych;
|
||||
else if (rqst->rq_rcv_buf.flags & XDRBUF_READ)
|
||||
wtype = rpcrdma_writech;
|
||||
else
|
||||
wtype = rpcrdma_replych;
|
||||
|
||||
/*
|
||||
* Chunks needed for arguments?
|
||||
*
|
||||
* o If the total request is under the inline threshold, all ops
|
||||
* are sent as inline.
|
||||
* o Large non-write ops are sent with the entire message as a
|
||||
* single read chunk (protocol 0-position special case).
|
||||
* o Large write ops transmit data as read chunk(s), header as
|
||||
* inline.
|
||||
*
|
||||
* Note: the NFS code sending down multiple argument segments
|
||||
* implies the op is a write.
|
||||
* TBD check NFSv4 setacl
|
||||
*/
|
||||
if (rqst->rq_snd_buf.len <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst))
|
||||
rtype = rpcrdma_noch;
|
||||
else if (rqst->rq_snd_buf.page_len == 0)
|
||||
rtype = rpcrdma_areadch;
|
||||
else
|
||||
rtype = rpcrdma_readch;
|
||||
|
||||
/* The following simplification is not true forever */
|
||||
if (rtype != rpcrdma_noch && wtype == rpcrdma_replych)
|
||||
wtype = rpcrdma_noch;
|
||||
BUG_ON(rtype != rpcrdma_noch && wtype != rpcrdma_noch);
|
||||
|
||||
if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_BOUNCEBUFFERS &&
|
||||
(rtype != rpcrdma_noch || wtype != rpcrdma_noch)) {
|
||||
/* forced to "pure inline"? */
|
||||
dprintk("RPC: %s: too much data (%d/%d) for inline\n",
|
||||
__func__, rqst->rq_rcv_buf.len, rqst->rq_snd_buf.len);
|
||||
return -1;
|
||||
}
|
||||
|
||||
hdrlen = 28; /*sizeof *headerp;*/
|
||||
padlen = 0;
|
||||
|
||||
/*
|
||||
* Pull up any extra send data into the preregistered buffer.
|
||||
* When padding is in use and applies to the transfer, insert
|
||||
* it and change the message type.
|
||||
*/
|
||||
if (rtype == rpcrdma_noch) {
|
||||
|
||||
padlen = rpcrdma_inline_pullup(rqst,
|
||||
RPCRDMA_INLINE_PAD_VALUE(rqst));
|
||||
|
||||
if (padlen) {
|
||||
headerp->rm_type = __constant_htonl(RDMA_MSGP);
|
||||
headerp->rm_body.rm_padded.rm_align =
|
||||
htonl(RPCRDMA_INLINE_PAD_VALUE(rqst));
|
||||
headerp->rm_body.rm_padded.rm_thresh =
|
||||
__constant_htonl(RPCRDMA_INLINE_PAD_THRESH);
|
||||
headerp->rm_body.rm_padded.rm_pempty[0] = xdr_zero;
|
||||
headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero;
|
||||
headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero;
|
||||
hdrlen += 2 * sizeof(u32); /* extra words in padhdr */
|
||||
BUG_ON(wtype != rpcrdma_noch);
|
||||
|
||||
} else {
|
||||
headerp->rm_body.rm_nochunks.rm_empty[0] = xdr_zero;
|
||||
headerp->rm_body.rm_nochunks.rm_empty[1] = xdr_zero;
|
||||
headerp->rm_body.rm_nochunks.rm_empty[2] = xdr_zero;
|
||||
/* new length after pullup */
|
||||
rpclen = rqst->rq_svec[0].iov_len;
|
||||
/*
|
||||
* Currently we try to not actually use read inline.
|
||||
* Reply chunks have the desirable property that
|
||||
* they land, packed, directly in the target buffers
|
||||
* without headers, so they require no fixup. The
|
||||
* additional RDMA Write op sends the same amount
|
||||
* of data, streams on-the-wire and adds no overhead
|
||||
* on receive. Therefore, we request a reply chunk
|
||||
* for non-writes wherever feasible and efficient.
|
||||
*/
|
||||
if (wtype == rpcrdma_noch &&
|
||||
r_xprt->rx_ia.ri_memreg_strategy > RPCRDMA_REGISTER)
|
||||
wtype = rpcrdma_replych;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Marshal chunks. This routine will return the header length
|
||||
* consumed by marshaling.
|
||||
*/
|
||||
if (rtype != rpcrdma_noch) {
|
||||
hdrlen = rpcrdma_create_chunks(rqst,
|
||||
&rqst->rq_snd_buf, headerp, rtype);
|
||||
wtype = rtype; /* simplify dprintk */
|
||||
|
||||
} else if (wtype != rpcrdma_noch) {
|
||||
hdrlen = rpcrdma_create_chunks(rqst,
|
||||
&rqst->rq_rcv_buf, headerp, wtype);
|
||||
}
|
||||
|
||||
if (hdrlen == 0)
|
||||
return -1;
|
||||
|
||||
dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd\n"
|
||||
" headerp 0x%p base 0x%p lkey 0x%x\n",
|
||||
__func__, transfertypes[wtype], hdrlen, rpclen, padlen,
|
||||
headerp, base, req->rl_iov.lkey);
|
||||
|
||||
/*
|
||||
* initialize send_iov's - normally only two: rdma chunk header and
|
||||
* single preregistered RPC header buffer, but if padding is present,
|
||||
* then use a preregistered (and zeroed) pad buffer between the RPC
|
||||
* header and any write data. In all non-rdma cases, any following
|
||||
* data has been copied into the RPC header buffer.
|
||||
*/
|
||||
req->rl_send_iov[0].addr = req->rl_iov.addr;
|
||||
req->rl_send_iov[0].length = hdrlen;
|
||||
req->rl_send_iov[0].lkey = req->rl_iov.lkey;
|
||||
|
||||
req->rl_send_iov[1].addr = req->rl_iov.addr + (base - req->rl_base);
|
||||
req->rl_send_iov[1].length = rpclen;
|
||||
req->rl_send_iov[1].lkey = req->rl_iov.lkey;
|
||||
|
||||
req->rl_niovs = 2;
|
||||
|
||||
if (padlen) {
|
||||
struct rpcrdma_ep *ep = &r_xprt->rx_ep;
|
||||
|
||||
req->rl_send_iov[2].addr = ep->rep_pad.addr;
|
||||
req->rl_send_iov[2].length = padlen;
|
||||
req->rl_send_iov[2].lkey = ep->rep_pad.lkey;
|
||||
|
||||
req->rl_send_iov[3].addr = req->rl_send_iov[1].addr + rpclen;
|
||||
req->rl_send_iov[3].length = rqst->rq_slen - rpclen;
|
||||
req->rl_send_iov[3].lkey = req->rl_iov.lkey;
|
||||
|
||||
req->rl_niovs = 4;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Chase down a received write or reply chunklist to get length
|
||||
* RDMA'd by server. See map at rpcrdma_create_chunks()! :-)
|
||||
*/
|
||||
static int
|
||||
rpcrdma_count_chunks(struct rpcrdma_rep *rep, int max, int wrchunk, u32 **iptrp)
|
||||
{
|
||||
unsigned int i, total_len;
|
||||
struct rpcrdma_write_chunk *cur_wchunk;
|
||||
|
||||
i = ntohl(**iptrp); /* get array count */
|
||||
if (i > max)
|
||||
return -1;
|
||||
cur_wchunk = (struct rpcrdma_write_chunk *) (*iptrp + 1);
|
||||
total_len = 0;
|
||||
while (i--) {
|
||||
struct rpcrdma_segment *seg = &cur_wchunk->wc_target;
|
||||
ifdebug(FACILITY) {
|
||||
u64 off;
|
||||
xdr_decode_hyper((u32 *)&seg->rs_offset, &off);
|
||||
dprintk("RPC: %s: chunk %d@0x%llx:0x%x\n",
|
||||
__func__,
|
||||
ntohl(seg->rs_length),
|
||||
off,
|
||||
ntohl(seg->rs_handle));
|
||||
}
|
||||
total_len += ntohl(seg->rs_length);
|
||||
++cur_wchunk;
|
||||
}
|
||||
/* check and adjust for properly terminated write chunk */
|
||||
if (wrchunk) {
|
||||
u32 *w = (u32 *) cur_wchunk;
|
||||
if (*w++ != xdr_zero)
|
||||
return -1;
|
||||
cur_wchunk = (struct rpcrdma_write_chunk *) w;
|
||||
}
|
||||
if ((char *) cur_wchunk > rep->rr_base + rep->rr_len)
|
||||
return -1;
|
||||
|
||||
*iptrp = (u32 *) cur_wchunk;
|
||||
return total_len;
|
||||
}
|
||||
|
||||
/*
|
||||
* Scatter inline received data back into provided iov's.
|
||||
*/
|
||||
static void
|
||||
rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len)
|
||||
{
|
||||
int i, npages, curlen, olen;
|
||||
char *destp;
|
||||
|
||||
curlen = rqst->rq_rcv_buf.head[0].iov_len;
|
||||
if (curlen > copy_len) { /* write chunk header fixup */
|
||||
curlen = copy_len;
|
||||
rqst->rq_rcv_buf.head[0].iov_len = curlen;
|
||||
}
|
||||
|
||||
dprintk("RPC: %s: srcp 0x%p len %d hdrlen %d\n",
|
||||
__func__, srcp, copy_len, curlen);
|
||||
|
||||
/* Shift pointer for first receive segment only */
|
||||
rqst->rq_rcv_buf.head[0].iov_base = srcp;
|
||||
srcp += curlen;
|
||||
copy_len -= curlen;
|
||||
|
||||
olen = copy_len;
|
||||
i = 0;
|
||||
rpcx_to_rdmax(rqst->rq_xprt)->rx_stats.fixup_copy_count += olen;
|
||||
if (copy_len && rqst->rq_rcv_buf.page_len) {
|
||||
npages = PAGE_ALIGN(rqst->rq_rcv_buf.page_base +
|
||||
rqst->rq_rcv_buf.page_len) >> PAGE_SHIFT;
|
||||
for (; i < npages; i++) {
|
||||
if (i == 0)
|
||||
curlen = PAGE_SIZE - rqst->rq_rcv_buf.page_base;
|
||||
else
|
||||
curlen = PAGE_SIZE;
|
||||
if (curlen > copy_len)
|
||||
curlen = copy_len;
|
||||
dprintk("RPC: %s: page %d"
|
||||
" srcp 0x%p len %d curlen %d\n",
|
||||
__func__, i, srcp, copy_len, curlen);
|
||||
destp = kmap_atomic(rqst->rq_rcv_buf.pages[i],
|
||||
KM_SKB_SUNRPC_DATA);
|
||||
if (i == 0)
|
||||
memcpy(destp + rqst->rq_rcv_buf.page_base,
|
||||
srcp, curlen);
|
||||
else
|
||||
memcpy(destp, srcp, curlen);
|
||||
flush_dcache_page(rqst->rq_rcv_buf.pages[i]);
|
||||
kunmap_atomic(destp, KM_SKB_SUNRPC_DATA);
|
||||
srcp += curlen;
|
||||
copy_len -= curlen;
|
||||
if (copy_len == 0)
|
||||
break;
|
||||
}
|
||||
rqst->rq_rcv_buf.page_len = olen - copy_len;
|
||||
} else
|
||||
rqst->rq_rcv_buf.page_len = 0;
|
||||
|
||||
if (copy_len && rqst->rq_rcv_buf.tail[0].iov_len) {
|
||||
curlen = copy_len;
|
||||
if (curlen > rqst->rq_rcv_buf.tail[0].iov_len)
|
||||
curlen = rqst->rq_rcv_buf.tail[0].iov_len;
|
||||
if (rqst->rq_rcv_buf.tail[0].iov_base != srcp)
|
||||
memcpy(rqst->rq_rcv_buf.tail[0].iov_base, srcp, curlen);
|
||||
dprintk("RPC: %s: tail srcp 0x%p len %d curlen %d\n",
|
||||
__func__, srcp, copy_len, curlen);
|
||||
rqst->rq_rcv_buf.tail[0].iov_len = curlen;
|
||||
copy_len -= curlen; ++i;
|
||||
} else
|
||||
rqst->rq_rcv_buf.tail[0].iov_len = 0;
|
||||
|
||||
if (copy_len)
|
||||
dprintk("RPC: %s: %d bytes in"
|
||||
" %d extra segments (%d lost)\n",
|
||||
__func__, olen, i, copy_len);
|
||||
|
||||
/* TBD avoid a warning from call_decode() */
|
||||
rqst->rq_private_buf = rqst->rq_rcv_buf;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is called when an async event is posted to
|
||||
* the connection which changes the connection state. All it
|
||||
* does at this point is mark the connection up/down, the rpc
|
||||
* timers do the rest.
|
||||
*/
|
||||
void
|
||||
rpcrdma_conn_func(struct rpcrdma_ep *ep)
|
||||
{
|
||||
struct rpc_xprt *xprt = ep->rep_xprt;
|
||||
|
||||
spin_lock_bh(&xprt->transport_lock);
|
||||
if (ep->rep_connected > 0) {
|
||||
if (!xprt_test_and_set_connected(xprt))
|
||||
xprt_wake_pending_tasks(xprt, 0);
|
||||
} else {
|
||||
if (xprt_test_and_clear_connected(xprt))
|
||||
xprt_wake_pending_tasks(xprt, ep->rep_connected);
|
||||
}
|
||||
spin_unlock_bh(&xprt->transport_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is called when memory window unbind which we are waiting
|
||||
* for completes. Just use rr_func (zeroed by upcall) to signal completion.
|
||||
*/
|
||||
static void
|
||||
rpcrdma_unbind_func(struct rpcrdma_rep *rep)
|
||||
{
|
||||
wake_up(&rep->rr_unbind);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called as a tasklet to do req/reply match and complete a request
|
||||
* Errors must result in the RPC task either being awakened, or
|
||||
* allowed to timeout, to discover the errors at that time.
|
||||
*/
|
||||
void
|
||||
rpcrdma_reply_handler(struct rpcrdma_rep *rep)
|
||||
{
|
||||
struct rpcrdma_msg *headerp;
|
||||
struct rpcrdma_req *req;
|
||||
struct rpc_rqst *rqst;
|
||||
struct rpc_xprt *xprt = rep->rr_xprt;
|
||||
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
|
||||
u32 *iptr;
|
||||
int i, rdmalen, status;
|
||||
|
||||
/* Check status. If bad, signal disconnect and return rep to pool */
|
||||
if (rep->rr_len == ~0U) {
|
||||
rpcrdma_recv_buffer_put(rep);
|
||||
if (r_xprt->rx_ep.rep_connected == 1) {
|
||||
r_xprt->rx_ep.rep_connected = -EIO;
|
||||
rpcrdma_conn_func(&r_xprt->rx_ep);
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (rep->rr_len < 28) {
|
||||
dprintk("RPC: %s: short/invalid reply\n", __func__);
|
||||
goto repost;
|
||||
}
|
||||
headerp = (struct rpcrdma_msg *) rep->rr_base;
|
||||
if (headerp->rm_vers != xdr_one) {
|
||||
dprintk("RPC: %s: invalid version %d\n",
|
||||
__func__, ntohl(headerp->rm_vers));
|
||||
goto repost;
|
||||
}
|
||||
|
||||
/* Get XID and try for a match. */
|
||||
spin_lock(&xprt->transport_lock);
|
||||
rqst = xprt_lookup_rqst(xprt, headerp->rm_xid);
|
||||
if (rqst == NULL) {
|
||||
spin_unlock(&xprt->transport_lock);
|
||||
dprintk("RPC: %s: reply 0x%p failed "
|
||||
"to match any request xid 0x%08x len %d\n",
|
||||
__func__, rep, headerp->rm_xid, rep->rr_len);
|
||||
repost:
|
||||
r_xprt->rx_stats.bad_reply_count++;
|
||||
rep->rr_func = rpcrdma_reply_handler;
|
||||
if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep))
|
||||
rpcrdma_recv_buffer_put(rep);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/* get request object */
|
||||
req = rpcr_to_rdmar(rqst);
|
||||
|
||||
dprintk("RPC: %s: reply 0x%p completes request 0x%p\n"
|
||||
" RPC request 0x%p xid 0x%08x\n",
|
||||
__func__, rep, req, rqst, headerp->rm_xid);
|
||||
|
||||
BUG_ON(!req || req->rl_reply);
|
||||
|
||||
/* from here on, the reply is no longer an orphan */
|
||||
req->rl_reply = rep;
|
||||
|
||||
/* check for expected message types */
|
||||
/* The order of some of these tests is important. */
|
||||
switch (headerp->rm_type) {
|
||||
case __constant_htonl(RDMA_MSG):
|
||||
/* never expect read chunks */
|
||||
/* never expect reply chunks (two ways to check) */
|
||||
/* never expect write chunks without having offered RDMA */
|
||||
if (headerp->rm_body.rm_chunks[0] != xdr_zero ||
|
||||
(headerp->rm_body.rm_chunks[1] == xdr_zero &&
|
||||
headerp->rm_body.rm_chunks[2] != xdr_zero) ||
|
||||
(headerp->rm_body.rm_chunks[1] != xdr_zero &&
|
||||
req->rl_nchunks == 0))
|
||||
goto badheader;
|
||||
if (headerp->rm_body.rm_chunks[1] != xdr_zero) {
|
||||
/* count any expected write chunks in read reply */
|
||||
/* start at write chunk array count */
|
||||
iptr = &headerp->rm_body.rm_chunks[2];
|
||||
rdmalen = rpcrdma_count_chunks(rep,
|
||||
req->rl_nchunks, 1, &iptr);
|
||||
/* check for validity, and no reply chunk after */
|
||||
if (rdmalen < 0 || *iptr++ != xdr_zero)
|
||||
goto badheader;
|
||||
rep->rr_len -=
|
||||
((unsigned char *)iptr - (unsigned char *)headerp);
|
||||
status = rep->rr_len + rdmalen;
|
||||
r_xprt->rx_stats.total_rdma_reply += rdmalen;
|
||||
} else {
|
||||
/* else ordinary inline */
|
||||
iptr = (u32 *)((unsigned char *)headerp + 28);
|
||||
rep->rr_len -= 28; /*sizeof *headerp;*/
|
||||
status = rep->rr_len;
|
||||
}
|
||||
/* Fix up the rpc results for upper layer */
|
||||
rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len);
|
||||
break;
|
||||
|
||||
case __constant_htonl(RDMA_NOMSG):
|
||||
/* never expect read or write chunks, always reply chunks */
|
||||
if (headerp->rm_body.rm_chunks[0] != xdr_zero ||
|
||||
headerp->rm_body.rm_chunks[1] != xdr_zero ||
|
||||
headerp->rm_body.rm_chunks[2] != xdr_one ||
|
||||
req->rl_nchunks == 0)
|
||||
goto badheader;
|
||||
iptr = (u32 *)((unsigned char *)headerp + 28);
|
||||
rdmalen = rpcrdma_count_chunks(rep, req->rl_nchunks, 0, &iptr);
|
||||
if (rdmalen < 0)
|
||||
goto badheader;
|
||||
r_xprt->rx_stats.total_rdma_reply += rdmalen;
|
||||
/* Reply chunk buffer already is the reply vector - no fixup. */
|
||||
status = rdmalen;
|
||||
break;
|
||||
|
||||
badheader:
|
||||
default:
|
||||
dprintk("%s: invalid rpcrdma reply header (type %d):"
|
||||
" chunks[012] == %d %d %d"
|
||||
" expected chunks <= %d\n",
|
||||
__func__, ntohl(headerp->rm_type),
|
||||
headerp->rm_body.rm_chunks[0],
|
||||
headerp->rm_body.rm_chunks[1],
|
||||
headerp->rm_body.rm_chunks[2],
|
||||
req->rl_nchunks);
|
||||
status = -EIO;
|
||||
r_xprt->rx_stats.bad_reply_count++;
|
||||
break;
|
||||
}
|
||||
|
||||
/* If using mw bind, start the deregister process now. */
|
||||
/* (Note: if mr_free(), cannot perform it here, in tasklet context) */
|
||||
if (req->rl_nchunks) switch (r_xprt->rx_ia.ri_memreg_strategy) {
|
||||
case RPCRDMA_MEMWINDOWS:
|
||||
for (i = 0; req->rl_nchunks-- > 1;)
|
||||
i += rpcrdma_deregister_external(
|
||||
&req->rl_segments[i], r_xprt, NULL);
|
||||
/* Optionally wait (not here) for unbinds to complete */
|
||||
rep->rr_func = rpcrdma_unbind_func;
|
||||
(void) rpcrdma_deregister_external(&req->rl_segments[i],
|
||||
r_xprt, rep);
|
||||
break;
|
||||
case RPCRDMA_MEMWINDOWS_ASYNC:
|
||||
for (i = 0; req->rl_nchunks--;)
|
||||
i += rpcrdma_deregister_external(&req->rl_segments[i],
|
||||
r_xprt, NULL);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
dprintk("RPC: %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n",
|
||||
__func__, xprt, rqst, status);
|
||||
xprt_complete_rqst(rqst->rq_task, status);
|
||||
spin_unlock(&xprt->transport_lock);
|
||||
}
|
800
net/sunrpc/xprtrdma/transport.c
Normal file
800
net/sunrpc/xprtrdma/transport.c
Normal file
@ -0,0 +1,800 @@
|
||||
/*
|
||||
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
|
||||
*
|
||||
* This software is available to you under a choice of one of two
|
||||
* licenses. You may choose to be licensed under the terms of the GNU
|
||||
* General Public License (GPL) Version 2, available from the file
|
||||
* COPYING in the main directory of this source tree, or the BSD-type
|
||||
* license below:
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* Redistributions in binary form must reproduce the above
|
||||
* copyright notice, this list of conditions and the following
|
||||
* disclaimer in the documentation and/or other materials provided
|
||||
* with the distribution.
|
||||
*
|
||||
* Neither the name of the Network Appliance, Inc. nor the names of
|
||||
* its contributors may be used to endorse or promote products
|
||||
* derived from this software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* transport.c
|
||||
*
|
||||
* This file contains the top-level implementation of an RPC RDMA
|
||||
* transport.
|
||||
*
|
||||
* Naming convention: functions beginning with xprt_ are part of the
|
||||
* transport switch. All others are RPC RDMA internal.
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/seq_file.h>
|
||||
|
||||
#include "xprt_rdma.h"
|
||||
|
||||
#ifdef RPC_DEBUG
|
||||
# define RPCDBG_FACILITY RPCDBG_TRANS
|
||||
#endif
|
||||
|
||||
MODULE_LICENSE("Dual BSD/GPL");
|
||||
|
||||
MODULE_DESCRIPTION("RPC/RDMA Transport for Linux kernel NFS");
|
||||
MODULE_AUTHOR("Network Appliance, Inc.");
|
||||
|
||||
/*
|
||||
* tunables
|
||||
*/
|
||||
|
||||
static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
|
||||
static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
|
||||
static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
|
||||
static unsigned int xprt_rdma_inline_write_padding;
|
||||
#if !RPCRDMA_PERSISTENT_REGISTRATION
|
||||
static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_REGISTER; /* FMR? */
|
||||
#else
|
||||
static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_ALLPHYSICAL;
|
||||
#endif
|
||||
|
||||
#ifdef RPC_DEBUG
|
||||
|
||||
static unsigned int min_slot_table_size = RPCRDMA_MIN_SLOT_TABLE;
|
||||
static unsigned int max_slot_table_size = RPCRDMA_MAX_SLOT_TABLE;
|
||||
static unsigned int zero;
|
||||
static unsigned int max_padding = PAGE_SIZE;
|
||||
static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS;
|
||||
static unsigned int max_memreg = RPCRDMA_LAST - 1;
|
||||
|
||||
static struct ctl_table_header *sunrpc_table_header;
|
||||
|
||||
static ctl_table xr_tunables_table[] = {
|
||||
{
|
||||
.ctl_name = CTL_SLOTTABLE_RDMA,
|
||||
.procname = "rdma_slot_table_entries",
|
||||
.data = &xprt_rdma_slot_table_entries,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_dointvec_minmax,
|
||||
.strategy = &sysctl_intvec,
|
||||
.extra1 = &min_slot_table_size,
|
||||
.extra2 = &max_slot_table_size
|
||||
},
|
||||
{
|
||||
.ctl_name = CTL_RDMA_MAXINLINEREAD,
|
||||
.procname = "rdma_max_inline_read",
|
||||
.data = &xprt_rdma_max_inline_read,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_dointvec,
|
||||
.strategy = &sysctl_intvec,
|
||||
},
|
||||
{
|
||||
.ctl_name = CTL_RDMA_MAXINLINEWRITE,
|
||||
.procname = "rdma_max_inline_write",
|
||||
.data = &xprt_rdma_max_inline_write,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_dointvec,
|
||||
.strategy = &sysctl_intvec,
|
||||
},
|
||||
{
|
||||
.ctl_name = CTL_RDMA_WRITEPADDING,
|
||||
.procname = "rdma_inline_write_padding",
|
||||
.data = &xprt_rdma_inline_write_padding,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_dointvec_minmax,
|
||||
.strategy = &sysctl_intvec,
|
||||
.extra1 = &zero,
|
||||
.extra2 = &max_padding,
|
||||
},
|
||||
{
|
||||
.ctl_name = CTL_RDMA_MEMREG,
|
||||
.procname = "rdma_memreg_strategy",
|
||||
.data = &xprt_rdma_memreg_strategy,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_dointvec_minmax,
|
||||
.strategy = &sysctl_intvec,
|
||||
.extra1 = &min_memreg,
|
||||
.extra2 = &max_memreg,
|
||||
},
|
||||
{
|
||||
.ctl_name = 0,
|
||||
},
|
||||
};
|
||||
|
||||
static ctl_table sunrpc_table[] = {
|
||||
{
|
||||
.ctl_name = CTL_SUNRPC,
|
||||
.procname = "sunrpc",
|
||||
.mode = 0555,
|
||||
.child = xr_tunables_table
|
||||
},
|
||||
{
|
||||
.ctl_name = 0,
|
||||
},
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
static struct rpc_xprt_ops xprt_rdma_procs; /* forward reference */
|
||||
|
||||
static void
|
||||
xprt_rdma_format_addresses(struct rpc_xprt *xprt)
|
||||
{
|
||||
struct sockaddr_in *addr = (struct sockaddr_in *)
|
||||
&rpcx_to_rdmad(xprt).addr;
|
||||
char *buf;
|
||||
|
||||
buf = kzalloc(20, GFP_KERNEL);
|
||||
if (buf)
|
||||
snprintf(buf, 20, NIPQUAD_FMT, NIPQUAD(addr->sin_addr.s_addr));
|
||||
xprt->address_strings[RPC_DISPLAY_ADDR] = buf;
|
||||
|
||||
buf = kzalloc(8, GFP_KERNEL);
|
||||
if (buf)
|
||||
snprintf(buf, 8, "%u", ntohs(addr->sin_port));
|
||||
xprt->address_strings[RPC_DISPLAY_PORT] = buf;
|
||||
|
||||
xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma";
|
||||
|
||||
buf = kzalloc(48, GFP_KERNEL);
|
||||
if (buf)
|
||||
snprintf(buf, 48, "addr="NIPQUAD_FMT" port=%u proto=%s",
|
||||
NIPQUAD(addr->sin_addr.s_addr),
|
||||
ntohs(addr->sin_port), "rdma");
|
||||
xprt->address_strings[RPC_DISPLAY_ALL] = buf;
|
||||
|
||||
buf = kzalloc(10, GFP_KERNEL);
|
||||
if (buf)
|
||||
snprintf(buf, 10, "%02x%02x%02x%02x",
|
||||
NIPQUAD(addr->sin_addr.s_addr));
|
||||
xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf;
|
||||
|
||||
buf = kzalloc(8, GFP_KERNEL);
|
||||
if (buf)
|
||||
snprintf(buf, 8, "%4hx", ntohs(addr->sin_port));
|
||||
xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf;
|
||||
|
||||
buf = kzalloc(30, GFP_KERNEL);
|
||||
if (buf)
|
||||
snprintf(buf, 30, NIPQUAD_FMT".%u.%u",
|
||||
NIPQUAD(addr->sin_addr.s_addr),
|
||||
ntohs(addr->sin_port) >> 8,
|
||||
ntohs(addr->sin_port) & 0xff);
|
||||
xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf;
|
||||
|
||||
/* netid */
|
||||
xprt->address_strings[RPC_DISPLAY_NETID] = "rdma";
|
||||
}
|
||||
|
||||
static void
|
||||
xprt_rdma_free_addresses(struct rpc_xprt *xprt)
|
||||
{
|
||||
kfree(xprt->address_strings[RPC_DISPLAY_ADDR]);
|
||||
kfree(xprt->address_strings[RPC_DISPLAY_PORT]);
|
||||
kfree(xprt->address_strings[RPC_DISPLAY_ALL]);
|
||||
kfree(xprt->address_strings[RPC_DISPLAY_HEX_ADDR]);
|
||||
kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]);
|
||||
kfree(xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR]);
|
||||
}
|
||||
|
||||
static void
|
||||
xprt_rdma_connect_worker(struct work_struct *work)
|
||||
{
|
||||
struct rpcrdma_xprt *r_xprt =
|
||||
container_of(work, struct rpcrdma_xprt, rdma_connect.work);
|
||||
struct rpc_xprt *xprt = &r_xprt->xprt;
|
||||
int rc = 0;
|
||||
|
||||
if (!xprt->shutdown) {
|
||||
xprt_clear_connected(xprt);
|
||||
|
||||
dprintk("RPC: %s: %sconnect\n", __func__,
|
||||
r_xprt->rx_ep.rep_connected != 0 ? "re" : "");
|
||||
rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia);
|
||||
if (rc)
|
||||
goto out;
|
||||
}
|
||||
goto out_clear;
|
||||
|
||||
out:
|
||||
xprt_wake_pending_tasks(xprt, rc);
|
||||
|
||||
out_clear:
|
||||
dprintk("RPC: %s: exit\n", __func__);
|
||||
xprt_clear_connecting(xprt);
|
||||
}
|
||||
|
||||
/*
|
||||
* xprt_rdma_destroy
|
||||
*
|
||||
* Destroy the xprt.
|
||||
* Free all memory associated with the object, including its own.
|
||||
* NOTE: none of the *destroy methods free memory for their top-level
|
||||
* objects, even though they may have allocated it (they do free
|
||||
* private memory). It's up to the caller to handle it. In this
|
||||
* case (RDMA transport), all structure memory is inlined with the
|
||||
* struct rpcrdma_xprt.
|
||||
*/
|
||||
static void
|
||||
xprt_rdma_destroy(struct rpc_xprt *xprt)
|
||||
{
|
||||
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
|
||||
int rc;
|
||||
|
||||
dprintk("RPC: %s: called\n", __func__);
|
||||
|
||||
cancel_delayed_work(&r_xprt->rdma_connect);
|
||||
flush_scheduled_work();
|
||||
|
||||
xprt_clear_connected(xprt);
|
||||
|
||||
rpcrdma_buffer_destroy(&r_xprt->rx_buf);
|
||||
rc = rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia);
|
||||
if (rc)
|
||||
dprintk("RPC: %s: rpcrdma_ep_destroy returned %i\n",
|
||||
__func__, rc);
|
||||
rpcrdma_ia_close(&r_xprt->rx_ia);
|
||||
|
||||
xprt_rdma_free_addresses(xprt);
|
||||
|
||||
kfree(xprt->slot);
|
||||
xprt->slot = NULL;
|
||||
kfree(xprt);
|
||||
|
||||
dprintk("RPC: %s: returning\n", __func__);
|
||||
|
||||
module_put(THIS_MODULE);
|
||||
}
|
||||
|
||||
/**
|
||||
* xprt_setup_rdma - Set up transport to use RDMA
|
||||
*
|
||||
* @args: rpc transport arguments
|
||||
*/
|
||||
static struct rpc_xprt *
|
||||
xprt_setup_rdma(struct xprt_create *args)
|
||||
{
|
||||
struct rpcrdma_create_data_internal cdata;
|
||||
struct rpc_xprt *xprt;
|
||||
struct rpcrdma_xprt *new_xprt;
|
||||
struct rpcrdma_ep *new_ep;
|
||||
struct sockaddr_in *sin;
|
||||
int rc;
|
||||
|
||||
if (args->addrlen > sizeof(xprt->addr)) {
|
||||
dprintk("RPC: %s: address too large\n", __func__);
|
||||
return ERR_PTR(-EBADF);
|
||||
}
|
||||
|
||||
xprt = kzalloc(sizeof(struct rpcrdma_xprt), GFP_KERNEL);
|
||||
if (xprt == NULL) {
|
||||
dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n",
|
||||
__func__);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
xprt->max_reqs = xprt_rdma_slot_table_entries;
|
||||
xprt->slot = kcalloc(xprt->max_reqs,
|
||||
sizeof(struct rpc_rqst), GFP_KERNEL);
|
||||
if (xprt->slot == NULL) {
|
||||
kfree(xprt);
|
||||
dprintk("RPC: %s: couldn't allocate %d slots\n",
|
||||
__func__, xprt->max_reqs);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
/* 60 second timeout, no retries */
|
||||
xprt_set_timeout(&xprt->timeout, 0, 60UL * HZ);
|
||||
xprt->bind_timeout = (60U * HZ);
|
||||
xprt->connect_timeout = (60U * HZ);
|
||||
xprt->reestablish_timeout = (5U * HZ);
|
||||
xprt->idle_timeout = (5U * 60 * HZ);
|
||||
|
||||
xprt->resvport = 0; /* privileged port not needed */
|
||||
xprt->tsh_size = 0; /* RPC-RDMA handles framing */
|
||||
xprt->max_payload = RPCRDMA_MAX_DATA_SEGS * PAGE_SIZE;
|
||||
xprt->ops = &xprt_rdma_procs;
|
||||
|
||||
/*
|
||||
* Set up RDMA-specific connect data.
|
||||
*/
|
||||
|
||||
/* Put server RDMA address in local cdata */
|
||||
memcpy(&cdata.addr, args->dstaddr, args->addrlen);
|
||||
|
||||
/* Ensure xprt->addr holds valid server TCP (not RDMA)
|
||||
* address, for any side protocols which peek at it */
|
||||
xprt->prot = IPPROTO_TCP;
|
||||
xprt->addrlen = args->addrlen;
|
||||
memcpy(&xprt->addr, &cdata.addr, xprt->addrlen);
|
||||
|
||||
sin = (struct sockaddr_in *)&cdata.addr;
|
||||
if (ntohs(sin->sin_port) != 0)
|
||||
xprt_set_bound(xprt);
|
||||
|
||||
dprintk("RPC: %s: %u.%u.%u.%u:%u\n", __func__,
|
||||
NIPQUAD(sin->sin_addr.s_addr), ntohs(sin->sin_port));
|
||||
|
||||
/* Set max requests */
|
||||
cdata.max_requests = xprt->max_reqs;
|
||||
|
||||
/* Set some length limits */
|
||||
cdata.rsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA write max */
|
||||
cdata.wsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA read max */
|
||||
|
||||
cdata.inline_wsize = xprt_rdma_max_inline_write;
|
||||
if (cdata.inline_wsize > cdata.wsize)
|
||||
cdata.inline_wsize = cdata.wsize;
|
||||
|
||||
cdata.inline_rsize = xprt_rdma_max_inline_read;
|
||||
if (cdata.inline_rsize > cdata.rsize)
|
||||
cdata.inline_rsize = cdata.rsize;
|
||||
|
||||
cdata.padding = xprt_rdma_inline_write_padding;
|
||||
|
||||
/*
|
||||
* Create new transport instance, which includes initialized
|
||||
* o ia
|
||||
* o endpoint
|
||||
* o buffers
|
||||
*/
|
||||
|
||||
new_xprt = rpcx_to_rdmax(xprt);
|
||||
|
||||
rc = rpcrdma_ia_open(new_xprt, (struct sockaddr *) &cdata.addr,
|
||||
xprt_rdma_memreg_strategy);
|
||||
if (rc)
|
||||
goto out1;
|
||||
|
||||
/*
|
||||
* initialize and create ep
|
||||
*/
|
||||
new_xprt->rx_data = cdata;
|
||||
new_ep = &new_xprt->rx_ep;
|
||||
new_ep->rep_remote_addr = cdata.addr;
|
||||
|
||||
rc = rpcrdma_ep_create(&new_xprt->rx_ep,
|
||||
&new_xprt->rx_ia, &new_xprt->rx_data);
|
||||
if (rc)
|
||||
goto out2;
|
||||
|
||||
/*
|
||||
* Allocate pre-registered send and receive buffers for headers and
|
||||
* any inline data. Also specify any padding which will be provided
|
||||
* from a preregistered zero buffer.
|
||||
*/
|
||||
rc = rpcrdma_buffer_create(&new_xprt->rx_buf, new_ep, &new_xprt->rx_ia,
|
||||
&new_xprt->rx_data);
|
||||
if (rc)
|
||||
goto out3;
|
||||
|
||||
/*
|
||||
* Register a callback for connection events. This is necessary because
|
||||
* connection loss notification is async. We also catch connection loss
|
||||
* when reaping receives.
|
||||
*/
|
||||
INIT_DELAYED_WORK(&new_xprt->rdma_connect, xprt_rdma_connect_worker);
|
||||
new_ep->rep_func = rpcrdma_conn_func;
|
||||
new_ep->rep_xprt = xprt;
|
||||
|
||||
xprt_rdma_format_addresses(xprt);
|
||||
|
||||
if (!try_module_get(THIS_MODULE))
|
||||
goto out4;
|
||||
|
||||
return xprt;
|
||||
|
||||
out4:
|
||||
xprt_rdma_free_addresses(xprt);
|
||||
rc = -EINVAL;
|
||||
out3:
|
||||
(void) rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia);
|
||||
out2:
|
||||
rpcrdma_ia_close(&new_xprt->rx_ia);
|
||||
out1:
|
||||
kfree(xprt->slot);
|
||||
kfree(xprt);
|
||||
return ERR_PTR(rc);
|
||||
}
|
||||
|
||||
/*
|
||||
* Close a connection, during shutdown or timeout/reconnect
|
||||
*/
|
||||
static void
|
||||
xprt_rdma_close(struct rpc_xprt *xprt)
|
||||
{
|
||||
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
|
||||
|
||||
dprintk("RPC: %s: closing\n", __func__);
|
||||
xprt_disconnect(xprt);
|
||||
(void) rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia);
|
||||
}
|
||||
|
||||
static void
|
||||
xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port)
|
||||
{
|
||||
struct sockaddr_in *sap;
|
||||
|
||||
sap = (struct sockaddr_in *)&xprt->addr;
|
||||
sap->sin_port = htons(port);
|
||||
sap = (struct sockaddr_in *)&rpcx_to_rdmad(xprt).addr;
|
||||
sap->sin_port = htons(port);
|
||||
dprintk("RPC: %s: %u\n", __func__, port);
|
||||
}
|
||||
|
||||
static void
|
||||
xprt_rdma_connect(struct rpc_task *task)
|
||||
{
|
||||
struct rpc_xprt *xprt = (struct rpc_xprt *)task->tk_xprt;
|
||||
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
|
||||
|
||||
if (!xprt_test_and_set_connecting(xprt)) {
|
||||
if (r_xprt->rx_ep.rep_connected != 0) {
|
||||
/* Reconnect */
|
||||
schedule_delayed_work(&r_xprt->rdma_connect,
|
||||
xprt->reestablish_timeout);
|
||||
} else {
|
||||
schedule_delayed_work(&r_xprt->rdma_connect, 0);
|
||||
if (!RPC_IS_ASYNC(task))
|
||||
flush_scheduled_work();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
xprt_rdma_reserve_xprt(struct rpc_task *task)
|
||||
{
|
||||
struct rpc_xprt *xprt = task->tk_xprt;
|
||||
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
|
||||
int credits = atomic_read(&r_xprt->rx_buf.rb_credits);
|
||||
|
||||
/* == RPC_CWNDSCALE @ init, but *after* setup */
|
||||
if (r_xprt->rx_buf.rb_cwndscale == 0UL) {
|
||||
r_xprt->rx_buf.rb_cwndscale = xprt->cwnd;
|
||||
dprintk("RPC: %s: cwndscale %lu\n", __func__,
|
||||
r_xprt->rx_buf.rb_cwndscale);
|
||||
BUG_ON(r_xprt->rx_buf.rb_cwndscale <= 0);
|
||||
}
|
||||
xprt->cwnd = credits * r_xprt->rx_buf.rb_cwndscale;
|
||||
return xprt_reserve_xprt_cong(task);
|
||||
}
|
||||
|
||||
/*
|
||||
* The RDMA allocate/free functions need the task structure as a place
|
||||
* to hide the struct rpcrdma_req, which is necessary for the actual send/recv
|
||||
* sequence. For this reason, the recv buffers are attached to send
|
||||
* buffers for portions of the RPC. Note that the RPC layer allocates
|
||||
* both send and receive buffers in the same call. We may register
|
||||
* the receive buffer portion when using reply chunks.
|
||||
*/
|
||||
static void *
|
||||
xprt_rdma_allocate(struct rpc_task *task, size_t size)
|
||||
{
|
||||
struct rpc_xprt *xprt = task->tk_xprt;
|
||||
struct rpcrdma_req *req, *nreq;
|
||||
|
||||
req = rpcrdma_buffer_get(&rpcx_to_rdmax(xprt)->rx_buf);
|
||||
BUG_ON(NULL == req);
|
||||
|
||||
if (size > req->rl_size) {
|
||||
dprintk("RPC: %s: size %zd too large for buffer[%zd]: "
|
||||
"prog %d vers %d proc %d\n",
|
||||
__func__, size, req->rl_size,
|
||||
task->tk_client->cl_prog, task->tk_client->cl_vers,
|
||||
task->tk_msg.rpc_proc->p_proc);
|
||||
/*
|
||||
* Outgoing length shortage. Our inline write max must have
|
||||
* been configured to perform direct i/o.
|
||||
*
|
||||
* This is therefore a large metadata operation, and the
|
||||
* allocate call was made on the maximum possible message,
|
||||
* e.g. containing long filename(s) or symlink data. In
|
||||
* fact, while these metadata operations *might* carry
|
||||
* large outgoing payloads, they rarely *do*. However, we
|
||||
* have to commit to the request here, so reallocate and
|
||||
* register it now. The data path will never require this
|
||||
* reallocation.
|
||||
*
|
||||
* If the allocation or registration fails, the RPC framework
|
||||
* will (doggedly) retry.
|
||||
*/
|
||||
if (rpcx_to_rdmax(xprt)->rx_ia.ri_memreg_strategy ==
|
||||
RPCRDMA_BOUNCEBUFFERS) {
|
||||
/* forced to "pure inline" */
|
||||
dprintk("RPC: %s: too much data (%zd) for inline "
|
||||
"(r/w max %d/%d)\n", __func__, size,
|
||||
rpcx_to_rdmad(xprt).inline_rsize,
|
||||
rpcx_to_rdmad(xprt).inline_wsize);
|
||||
size = req->rl_size;
|
||||
rpc_exit(task, -EIO); /* fail the operation */
|
||||
rpcx_to_rdmax(xprt)->rx_stats.failed_marshal_count++;
|
||||
goto out;
|
||||
}
|
||||
if (task->tk_flags & RPC_TASK_SWAPPER)
|
||||
nreq = kmalloc(sizeof *req + size, GFP_ATOMIC);
|
||||
else
|
||||
nreq = kmalloc(sizeof *req + size, GFP_NOFS);
|
||||
if (nreq == NULL)
|
||||
goto outfail;
|
||||
|
||||
if (rpcrdma_register_internal(&rpcx_to_rdmax(xprt)->rx_ia,
|
||||
nreq->rl_base, size + sizeof(struct rpcrdma_req)
|
||||
- offsetof(struct rpcrdma_req, rl_base),
|
||||
&nreq->rl_handle, &nreq->rl_iov)) {
|
||||
kfree(nreq);
|
||||
goto outfail;
|
||||
}
|
||||
rpcx_to_rdmax(xprt)->rx_stats.hardway_register_count += size;
|
||||
nreq->rl_size = size;
|
||||
nreq->rl_niovs = 0;
|
||||
nreq->rl_nchunks = 0;
|
||||
nreq->rl_buffer = (struct rpcrdma_buffer *)req;
|
||||
nreq->rl_reply = req->rl_reply;
|
||||
memcpy(nreq->rl_segments,
|
||||
req->rl_segments, sizeof nreq->rl_segments);
|
||||
/* flag the swap with an unused field */
|
||||
nreq->rl_iov.length = 0;
|
||||
req->rl_reply = NULL;
|
||||
req = nreq;
|
||||
}
|
||||
dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req);
|
||||
out:
|
||||
return req->rl_xdr_buf;
|
||||
|
||||
outfail:
|
||||
rpcrdma_buffer_put(req);
|
||||
rpcx_to_rdmax(xprt)->rx_stats.failed_marshal_count++;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function returns all RDMA resources to the pool.
|
||||
*/
|
||||
static void
|
||||
xprt_rdma_free(void *buffer)
|
||||
{
|
||||
struct rpcrdma_req *req;
|
||||
struct rpcrdma_xprt *r_xprt;
|
||||
struct rpcrdma_rep *rep;
|
||||
int i;
|
||||
|
||||
if (buffer == NULL)
|
||||
return;
|
||||
|
||||
req = container_of(buffer, struct rpcrdma_req, rl_xdr_buf[0]);
|
||||
r_xprt = container_of(req->rl_buffer, struct rpcrdma_xprt, rx_buf);
|
||||
rep = req->rl_reply;
|
||||
|
||||
dprintk("RPC: %s: called on 0x%p%s\n",
|
||||
__func__, rep, (rep && rep->rr_func) ? " (with waiter)" : "");
|
||||
|
||||
/*
|
||||
* Finish the deregistration. When using mw bind, this was
|
||||
* begun in rpcrdma_reply_handler(). In all other modes, we
|
||||
* do it here, in thread context. The process is considered
|
||||
* complete when the rr_func vector becomes NULL - this
|
||||
* was put in place during rpcrdma_reply_handler() - the wait
|
||||
* call below will not block if the dereg is "done". If
|
||||
* interrupted, our framework will clean up.
|
||||
*/
|
||||
for (i = 0; req->rl_nchunks;) {
|
||||
--req->rl_nchunks;
|
||||
i += rpcrdma_deregister_external(
|
||||
&req->rl_segments[i], r_xprt, NULL);
|
||||
}
|
||||
|
||||
if (rep && wait_event_interruptible(rep->rr_unbind, !rep->rr_func)) {
|
||||
rep->rr_func = NULL; /* abandon the callback */
|
||||
req->rl_reply = NULL;
|
||||
}
|
||||
|
||||
if (req->rl_iov.length == 0) { /* see allocate above */
|
||||
struct rpcrdma_req *oreq = (struct rpcrdma_req *)req->rl_buffer;
|
||||
oreq->rl_reply = req->rl_reply;
|
||||
(void) rpcrdma_deregister_internal(&r_xprt->rx_ia,
|
||||
req->rl_handle,
|
||||
&req->rl_iov);
|
||||
kfree(req);
|
||||
req = oreq;
|
||||
}
|
||||
|
||||
/* Put back request+reply buffers */
|
||||
rpcrdma_buffer_put(req);
|
||||
}
|
||||
|
||||
/*
|
||||
* send_request invokes the meat of RPC RDMA. It must do the following:
|
||||
* 1. Marshal the RPC request into an RPC RDMA request, which means
|
||||
* putting a header in front of data, and creating IOVs for RDMA
|
||||
* from those in the request.
|
||||
* 2. In marshaling, detect opportunities for RDMA, and use them.
|
||||
* 3. Post a recv message to set up asynch completion, then send
|
||||
* the request (rpcrdma_ep_post).
|
||||
* 4. No partial sends are possible in the RPC-RDMA protocol (as in UDP).
|
||||
*/
|
||||
|
||||
static int
|
||||
xprt_rdma_send_request(struct rpc_task *task)
|
||||
{
|
||||
struct rpc_rqst *rqst = task->tk_rqstp;
|
||||
struct rpc_xprt *xprt = task->tk_xprt;
|
||||
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
|
||||
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
|
||||
|
||||
/* marshal the send itself */
|
||||
if (req->rl_niovs == 0 && rpcrdma_marshal_req(rqst) != 0) {
|
||||
r_xprt->rx_stats.failed_marshal_count++;
|
||||
dprintk("RPC: %s: rpcrdma_marshal_req failed\n",
|
||||
__func__);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
if (req->rl_reply == NULL) /* e.g. reconnection */
|
||||
rpcrdma_recv_buffer_get(req);
|
||||
|
||||
if (req->rl_reply) {
|
||||
req->rl_reply->rr_func = rpcrdma_reply_handler;
|
||||
/* this need only be done once, but... */
|
||||
req->rl_reply->rr_xprt = xprt;
|
||||
}
|
||||
|
||||
if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) {
|
||||
xprt_disconnect(xprt);
|
||||
return -ENOTCONN; /* implies disconnect */
|
||||
}
|
||||
|
||||
rqst->rq_bytes_sent = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
|
||||
{
|
||||
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
|
||||
long idle_time = 0;
|
||||
|
||||
if (xprt_connected(xprt))
|
||||
idle_time = (long)(jiffies - xprt->last_used) / HZ;
|
||||
|
||||
seq_printf(seq,
|
||||
"\txprt:\trdma %u %lu %lu %lu %ld %lu %lu %lu %Lu %Lu "
|
||||
"%lu %lu %lu %Lu %Lu %Lu %Lu %lu %lu %lu\n",
|
||||
|
||||
0, /* need a local port? */
|
||||
xprt->stat.bind_count,
|
||||
xprt->stat.connect_count,
|
||||
xprt->stat.connect_time,
|
||||
idle_time,
|
||||
xprt->stat.sends,
|
||||
xprt->stat.recvs,
|
||||
xprt->stat.bad_xids,
|
||||
xprt->stat.req_u,
|
||||
xprt->stat.bklog_u,
|
||||
|
||||
r_xprt->rx_stats.read_chunk_count,
|
||||
r_xprt->rx_stats.write_chunk_count,
|
||||
r_xprt->rx_stats.reply_chunk_count,
|
||||
r_xprt->rx_stats.total_rdma_request,
|
||||
r_xprt->rx_stats.total_rdma_reply,
|
||||
r_xprt->rx_stats.pullup_copy_count,
|
||||
r_xprt->rx_stats.fixup_copy_count,
|
||||
r_xprt->rx_stats.hardway_register_count,
|
||||
r_xprt->rx_stats.failed_marshal_count,
|
||||
r_xprt->rx_stats.bad_reply_count);
|
||||
}
|
||||
|
||||
/*
|
||||
* Plumbing for rpc transport switch and kernel module
|
||||
*/
|
||||
|
||||
static struct rpc_xprt_ops xprt_rdma_procs = {
|
||||
.reserve_xprt = xprt_rdma_reserve_xprt,
|
||||
.release_xprt = xprt_release_xprt_cong, /* sunrpc/xprt.c */
|
||||
.release_request = xprt_release_rqst_cong, /* ditto */
|
||||
.set_retrans_timeout = xprt_set_retrans_timeout_def, /* ditto */
|
||||
.rpcbind = rpcb_getport_async, /* sunrpc/rpcb_clnt.c */
|
||||
.set_port = xprt_rdma_set_port,
|
||||
.connect = xprt_rdma_connect,
|
||||
.buf_alloc = xprt_rdma_allocate,
|
||||
.buf_free = xprt_rdma_free,
|
||||
.send_request = xprt_rdma_send_request,
|
||||
.close = xprt_rdma_close,
|
||||
.destroy = xprt_rdma_destroy,
|
||||
.print_stats = xprt_rdma_print_stats
|
||||
};
|
||||
|
||||
static struct xprt_class xprt_rdma = {
|
||||
.list = LIST_HEAD_INIT(xprt_rdma.list),
|
||||
.name = "rdma",
|
||||
.owner = THIS_MODULE,
|
||||
.ident = XPRT_TRANSPORT_RDMA,
|
||||
.setup = xprt_setup_rdma,
|
||||
};
|
||||
|
||||
static void __exit xprt_rdma_cleanup(void)
|
||||
{
|
||||
int rc;
|
||||
|
||||
dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n");
|
||||
#ifdef RPC_DEBUG
|
||||
if (sunrpc_table_header) {
|
||||
unregister_sysctl_table(sunrpc_table_header);
|
||||
sunrpc_table_header = NULL;
|
||||
}
|
||||
#endif
|
||||
rc = xprt_unregister_transport(&xprt_rdma);
|
||||
if (rc)
|
||||
dprintk("RPC: %s: xprt_unregister returned %i\n",
|
||||
__func__, rc);
|
||||
}
|
||||
|
||||
static int __init xprt_rdma_init(void)
|
||||
{
|
||||
int rc;
|
||||
|
||||
rc = xprt_register_transport(&xprt_rdma);
|
||||
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
dprintk(KERN_INFO "RPCRDMA Module Init, register RPC RDMA transport\n");
|
||||
|
||||
dprintk(KERN_INFO "Defaults:\n");
|
||||
dprintk(KERN_INFO "\tSlots %d\n"
|
||||
"\tMaxInlineRead %d\n\tMaxInlineWrite %d\n",
|
||||
xprt_rdma_slot_table_entries,
|
||||
xprt_rdma_max_inline_read, xprt_rdma_max_inline_write);
|
||||
dprintk(KERN_INFO "\tPadding %d\n\tMemreg %d\n",
|
||||
xprt_rdma_inline_write_padding, xprt_rdma_memreg_strategy);
|
||||
|
||||
#ifdef RPC_DEBUG
|
||||
if (!sunrpc_table_header)
|
||||
sunrpc_table_header = register_sysctl_table(sunrpc_table);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
module_init(xprt_rdma_init);
|
||||
module_exit(xprt_rdma_cleanup);
|
1626
net/sunrpc/xprtrdma/verbs.c
Normal file
1626
net/sunrpc/xprtrdma/verbs.c
Normal file
File diff suppressed because it is too large
Load Diff
330
net/sunrpc/xprtrdma/xprt_rdma.h
Normal file
330
net/sunrpc/xprtrdma/xprt_rdma.h
Normal file
@ -0,0 +1,330 @@
|
||||
/*
|
||||
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
|
||||
*
|
||||
* This software is available to you under a choice of one of two
|
||||
* licenses. You may choose to be licensed under the terms of the GNU
|
||||
* General Public License (GPL) Version 2, available from the file
|
||||
* COPYING in the main directory of this source tree, or the BSD-type
|
||||
* license below:
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* Redistributions in binary form must reproduce the above
|
||||
* copyright notice, this list of conditions and the following
|
||||
* disclaimer in the documentation and/or other materials provided
|
||||
* with the distribution.
|
||||
*
|
||||
* Neither the name of the Network Appliance, Inc. nor the names of
|
||||
* its contributors may be used to endorse or promote products
|
||||
* derived from this software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef _LINUX_SUNRPC_XPRT_RDMA_H
|
||||
#define _LINUX_SUNRPC_XPRT_RDMA_H
|
||||
|
||||
#include <linux/wait.h> /* wait_queue_head_t, etc */
|
||||
#include <linux/spinlock.h> /* spinlock_t, etc */
|
||||
#include <asm/atomic.h> /* atomic_t, etc */
|
||||
|
||||
#include <rdma/rdma_cm.h> /* RDMA connection api */
|
||||
#include <rdma/ib_verbs.h> /* RDMA verbs api */
|
||||
|
||||
#include <linux/sunrpc/clnt.h> /* rpc_xprt */
|
||||
#include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */
|
||||
#include <linux/sunrpc/xprtrdma.h> /* xprt parameters */
|
||||
|
||||
/*
|
||||
* Interface Adapter -- one per transport instance
|
||||
*/
|
||||
struct rpcrdma_ia {
|
||||
struct rdma_cm_id *ri_id;
|
||||
struct ib_pd *ri_pd;
|
||||
struct ib_mr *ri_bind_mem;
|
||||
struct completion ri_done;
|
||||
int ri_async_rc;
|
||||
enum rpcrdma_memreg ri_memreg_strategy;
|
||||
};
|
||||
|
||||
/*
|
||||
* RDMA Endpoint -- one per transport instance
|
||||
*/
|
||||
|
||||
struct rpcrdma_ep {
|
||||
atomic_t rep_cqcount;
|
||||
int rep_cqinit;
|
||||
int rep_connected;
|
||||
struct rpcrdma_ia *rep_ia;
|
||||
struct ib_cq *rep_cq;
|
||||
struct ib_qp_init_attr rep_attr;
|
||||
wait_queue_head_t rep_connect_wait;
|
||||
struct ib_sge rep_pad; /* holds zeroed pad */
|
||||
struct ib_mr *rep_pad_mr; /* holds zeroed pad */
|
||||
void (*rep_func)(struct rpcrdma_ep *);
|
||||
struct rpc_xprt *rep_xprt; /* for rep_func */
|
||||
struct rdma_conn_param rep_remote_cma;
|
||||
struct sockaddr_storage rep_remote_addr;
|
||||
};
|
||||
|
||||
#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit)
|
||||
#define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount)
|
||||
|
||||
/*
|
||||
* struct rpcrdma_rep -- this structure encapsulates state required to recv
|
||||
* and complete a reply, asychronously. It needs several pieces of
|
||||
* state:
|
||||
* o recv buffer (posted to provider)
|
||||
* o ib_sge (also donated to provider)
|
||||
* o status of reply (length, success or not)
|
||||
* o bookkeeping state to get run by tasklet (list, etc)
|
||||
*
|
||||
* These are allocated during initialization, per-transport instance;
|
||||
* however, the tasklet execution list itself is global, as it should
|
||||
* always be pretty short.
|
||||
*
|
||||
* N of these are associated with a transport instance, and stored in
|
||||
* struct rpcrdma_buffer. N is the max number of outstanding requests.
|
||||
*/
|
||||
|
||||
/* temporary static scatter/gather max */
|
||||
#define RPCRDMA_MAX_DATA_SEGS (8) /* max scatter/gather */
|
||||
#define RPCRDMA_MAX_SEGS (RPCRDMA_MAX_DATA_SEGS + 2) /* head+tail = 2 */
|
||||
#define MAX_RPCRDMAHDR (\
|
||||
/* max supported RPC/RDMA header */ \
|
||||
sizeof(struct rpcrdma_msg) + (2 * sizeof(u32)) + \
|
||||
(sizeof(struct rpcrdma_read_chunk) * RPCRDMA_MAX_SEGS) + sizeof(u32))
|
||||
|
||||
struct rpcrdma_buffer;
|
||||
|
||||
struct rpcrdma_rep {
|
||||
unsigned int rr_len; /* actual received reply length */
|
||||
struct rpcrdma_buffer *rr_buffer; /* home base for this structure */
|
||||
struct rpc_xprt *rr_xprt; /* needed for request/reply matching */
|
||||
void (*rr_func)(struct rpcrdma_rep *);/* called by tasklet in softint */
|
||||
struct list_head rr_list; /* tasklet list */
|
||||
wait_queue_head_t rr_unbind; /* optional unbind wait */
|
||||
struct ib_sge rr_iov; /* for posting */
|
||||
struct ib_mr *rr_handle; /* handle for mem in rr_iov */
|
||||
char rr_base[MAX_RPCRDMAHDR]; /* minimal inline receive buffer */
|
||||
};
|
||||
|
||||
/*
|
||||
* struct rpcrdma_req -- structure central to the request/reply sequence.
|
||||
*
|
||||
* N of these are associated with a transport instance, and stored in
|
||||
* struct rpcrdma_buffer. N is the max number of outstanding requests.
|
||||
*
|
||||
* It includes pre-registered buffer memory for send AND recv.
|
||||
* The recv buffer, however, is not owned by this structure, and
|
||||
* is "donated" to the hardware when a recv is posted. When a
|
||||
* reply is handled, the recv buffer used is given back to the
|
||||
* struct rpcrdma_req associated with the request.
|
||||
*
|
||||
* In addition to the basic memory, this structure includes an array
|
||||
* of iovs for send operations. The reason is that the iovs passed to
|
||||
* ib_post_{send,recv} must not be modified until the work request
|
||||
* completes.
|
||||
*
|
||||
* NOTES:
|
||||
* o RPCRDMA_MAX_SEGS is the max number of addressible chunk elements we
|
||||
* marshal. The number needed varies depending on the iov lists that
|
||||
* are passed to us, the memory registration mode we are in, and if
|
||||
* physical addressing is used, the layout.
|
||||
*/
|
||||
|
||||
struct rpcrdma_mr_seg { /* chunk descriptors */
|
||||
union { /* chunk memory handles */
|
||||
struct ib_mr *rl_mr; /* if registered directly */
|
||||
struct rpcrdma_mw { /* if registered from region */
|
||||
union {
|
||||
struct ib_mw *mw;
|
||||
struct ib_fmr *fmr;
|
||||
} r;
|
||||
struct list_head mw_list;
|
||||
} *rl_mw;
|
||||
} mr_chunk;
|
||||
u64 mr_base; /* registration result */
|
||||
u32 mr_rkey; /* registration result */
|
||||
u32 mr_len; /* length of chunk or segment */
|
||||
int mr_nsegs; /* number of segments in chunk or 0 */
|
||||
enum dma_data_direction mr_dir; /* segment mapping direction */
|
||||
dma_addr_t mr_dma; /* segment mapping address */
|
||||
size_t mr_dmalen; /* segment mapping length */
|
||||
struct page *mr_page; /* owning page, if any */
|
||||
char *mr_offset; /* kva if no page, else offset */
|
||||
};
|
||||
|
||||
struct rpcrdma_req {
|
||||
size_t rl_size; /* actual length of buffer */
|
||||
unsigned int rl_niovs; /* 0, 2 or 4 */
|
||||
unsigned int rl_nchunks; /* non-zero if chunks */
|
||||
struct rpcrdma_buffer *rl_buffer; /* home base for this structure */
|
||||
struct rpcrdma_rep *rl_reply;/* holder for reply buffer */
|
||||
struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];/* chunk segments */
|
||||
struct ib_sge rl_send_iov[4]; /* for active requests */
|
||||
struct ib_sge rl_iov; /* for posting */
|
||||
struct ib_mr *rl_handle; /* handle for mem in rl_iov */
|
||||
char rl_base[MAX_RPCRDMAHDR]; /* start of actual buffer */
|
||||
__u32 rl_xdr_buf[0]; /* start of returned rpc rq_buffer */
|
||||
};
|
||||
#define rpcr_to_rdmar(r) \
|
||||
container_of((r)->rq_buffer, struct rpcrdma_req, rl_xdr_buf[0])
|
||||
|
||||
/*
|
||||
* struct rpcrdma_buffer -- holds list/queue of pre-registered memory for
|
||||
* inline requests/replies, and client/server credits.
|
||||
*
|
||||
* One of these is associated with a transport instance
|
||||
*/
|
||||
struct rpcrdma_buffer {
|
||||
spinlock_t rb_lock; /* protects indexes */
|
||||
atomic_t rb_credits; /* most recent server credits */
|
||||
unsigned long rb_cwndscale; /* cached framework rpc_cwndscale */
|
||||
int rb_max_requests;/* client max requests */
|
||||
struct list_head rb_mws; /* optional memory windows/fmrs */
|
||||
int rb_send_index;
|
||||
struct rpcrdma_req **rb_send_bufs;
|
||||
int rb_recv_index;
|
||||
struct rpcrdma_rep **rb_recv_bufs;
|
||||
char *rb_pool;
|
||||
};
|
||||
#define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia)
|
||||
|
||||
/*
|
||||
* Internal structure for transport instance creation. This
|
||||
* exists primarily for modularity.
|
||||
*
|
||||
* This data should be set with mount options
|
||||
*/
|
||||
struct rpcrdma_create_data_internal {
|
||||
struct sockaddr_storage addr; /* RDMA server address */
|
||||
unsigned int max_requests; /* max requests (slots) in flight */
|
||||
unsigned int rsize; /* mount rsize - max read hdr+data */
|
||||
unsigned int wsize; /* mount wsize - max write hdr+data */
|
||||
unsigned int inline_rsize; /* max non-rdma read data payload */
|
||||
unsigned int inline_wsize; /* max non-rdma write data payload */
|
||||
unsigned int padding; /* non-rdma write header padding */
|
||||
};
|
||||
|
||||
#define RPCRDMA_INLINE_READ_THRESHOLD(rq) \
|
||||
(rpcx_to_rdmad(rq->rq_task->tk_xprt).inline_rsize)
|
||||
|
||||
#define RPCRDMA_INLINE_WRITE_THRESHOLD(rq)\
|
||||
(rpcx_to_rdmad(rq->rq_task->tk_xprt).inline_wsize)
|
||||
|
||||
#define RPCRDMA_INLINE_PAD_VALUE(rq)\
|
||||
rpcx_to_rdmad(rq->rq_task->tk_xprt).padding
|
||||
|
||||
/*
|
||||
* Statistics for RPCRDMA
|
||||
*/
|
||||
struct rpcrdma_stats {
|
||||
unsigned long read_chunk_count;
|
||||
unsigned long write_chunk_count;
|
||||
unsigned long reply_chunk_count;
|
||||
|
||||
unsigned long long total_rdma_request;
|
||||
unsigned long long total_rdma_reply;
|
||||
|
||||
unsigned long long pullup_copy_count;
|
||||
unsigned long long fixup_copy_count;
|
||||
unsigned long hardway_register_count;
|
||||
unsigned long failed_marshal_count;
|
||||
unsigned long bad_reply_count;
|
||||
};
|
||||
|
||||
/*
|
||||
* RPCRDMA transport -- encapsulates the structures above for
|
||||
* integration with RPC.
|
||||
*
|
||||
* The contained structures are embedded, not pointers,
|
||||
* for convenience. This structure need not be visible externally.
|
||||
*
|
||||
* It is allocated and initialized during mount, and released
|
||||
* during unmount.
|
||||
*/
|
||||
struct rpcrdma_xprt {
|
||||
struct rpc_xprt xprt;
|
||||
struct rpcrdma_ia rx_ia;
|
||||
struct rpcrdma_ep rx_ep;
|
||||
struct rpcrdma_buffer rx_buf;
|
||||
struct rpcrdma_create_data_internal rx_data;
|
||||
struct delayed_work rdma_connect;
|
||||
struct rpcrdma_stats rx_stats;
|
||||
};
|
||||
|
||||
#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, xprt)
|
||||
#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data)
|
||||
|
||||
/*
|
||||
* Interface Adapter calls - xprtrdma/verbs.c
|
||||
*/
|
||||
int rpcrdma_ia_open(struct rpcrdma_xprt *, struct sockaddr *, int);
|
||||
void rpcrdma_ia_close(struct rpcrdma_ia *);
|
||||
|
||||
/*
|
||||
* Endpoint calls - xprtrdma/verbs.c
|
||||
*/
|
||||
int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *,
|
||||
struct rpcrdma_create_data_internal *);
|
||||
int rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *);
|
||||
int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *);
|
||||
int rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *);
|
||||
|
||||
int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *,
|
||||
struct rpcrdma_req *);
|
||||
int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_ep *,
|
||||
struct rpcrdma_rep *);
|
||||
|
||||
/*
|
||||
* Buffer calls - xprtrdma/verbs.c
|
||||
*/
|
||||
int rpcrdma_buffer_create(struct rpcrdma_buffer *, struct rpcrdma_ep *,
|
||||
struct rpcrdma_ia *,
|
||||
struct rpcrdma_create_data_internal *);
|
||||
void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
|
||||
|
||||
struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
|
||||
void rpcrdma_buffer_put(struct rpcrdma_req *);
|
||||
void rpcrdma_recv_buffer_get(struct rpcrdma_req *);
|
||||
void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
|
||||
|
||||
int rpcrdma_register_internal(struct rpcrdma_ia *, void *, int,
|
||||
struct ib_mr **, struct ib_sge *);
|
||||
int rpcrdma_deregister_internal(struct rpcrdma_ia *,
|
||||
struct ib_mr *, struct ib_sge *);
|
||||
|
||||
int rpcrdma_register_external(struct rpcrdma_mr_seg *,
|
||||
int, int, struct rpcrdma_xprt *);
|
||||
int rpcrdma_deregister_external(struct rpcrdma_mr_seg *,
|
||||
struct rpcrdma_xprt *, void *);
|
||||
|
||||
/*
|
||||
* RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c
|
||||
*/
|
||||
void rpcrdma_conn_func(struct rpcrdma_ep *);
|
||||
void rpcrdma_reply_handler(struct rpcrdma_rep *);
|
||||
|
||||
/*
|
||||
* RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c
|
||||
*/
|
||||
int rpcrdma_marshal_req(struct rpc_rqst *);
|
||||
|
||||
#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */
|
@ -13,10 +13,14 @@
|
||||
* (C) 1999 Trond Myklebust <trond.myklebust@fys.uio.no>
|
||||
*
|
||||
* IP socket transport implementation, (C) 2005 Chuck Lever <cel@netapp.com>
|
||||
*
|
||||
* IPv6 support contributed by Gilles Quillard, Bull Open Source, 2005.
|
||||
* <gilles.quillard@bull.net>
|
||||
*/
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/capability.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/errno.h>
|
||||
@ -28,6 +32,7 @@
|
||||
#include <linux/tcp.h>
|
||||
#include <linux/sunrpc/clnt.h>
|
||||
#include <linux/sunrpc/sched.h>
|
||||
#include <linux/sunrpc/xprtsock.h>
|
||||
#include <linux/file.h>
|
||||
|
||||
#include <net/sock.h>
|
||||
@ -260,14 +265,29 @@ struct sock_xprt {
|
||||
#define TCP_RCV_COPY_XID (1UL << 2)
|
||||
#define TCP_RCV_COPY_DATA (1UL << 3)
|
||||
|
||||
static void xs_format_peer_addresses(struct rpc_xprt *xprt)
|
||||
static inline struct sockaddr *xs_addr(struct rpc_xprt *xprt)
|
||||
{
|
||||
struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr;
|
||||
return (struct sockaddr *) &xprt->addr;
|
||||
}
|
||||
|
||||
static inline struct sockaddr_in *xs_addr_in(struct rpc_xprt *xprt)
|
||||
{
|
||||
return (struct sockaddr_in *) &xprt->addr;
|
||||
}
|
||||
|
||||
static inline struct sockaddr_in6 *xs_addr_in6(struct rpc_xprt *xprt)
|
||||
{
|
||||
return (struct sockaddr_in6 *) &xprt->addr;
|
||||
}
|
||||
|
||||
static void xs_format_ipv4_peer_addresses(struct rpc_xprt *xprt)
|
||||
{
|
||||
struct sockaddr_in *addr = xs_addr_in(xprt);
|
||||
char *buf;
|
||||
|
||||
buf = kzalloc(20, GFP_KERNEL);
|
||||
if (buf) {
|
||||
snprintf(buf, 20, "%u.%u.%u.%u",
|
||||
snprintf(buf, 20, NIPQUAD_FMT,
|
||||
NIPQUAD(addr->sin_addr.s_addr));
|
||||
}
|
||||
xprt->address_strings[RPC_DISPLAY_ADDR] = buf;
|
||||
@ -279,26 +299,123 @@ static void xs_format_peer_addresses(struct rpc_xprt *xprt)
|
||||
}
|
||||
xprt->address_strings[RPC_DISPLAY_PORT] = buf;
|
||||
|
||||
if (xprt->prot == IPPROTO_UDP)
|
||||
xprt->address_strings[RPC_DISPLAY_PROTO] = "udp";
|
||||
else
|
||||
xprt->address_strings[RPC_DISPLAY_PROTO] = "tcp";
|
||||
buf = kzalloc(8, GFP_KERNEL);
|
||||
if (buf) {
|
||||
if (xprt->prot == IPPROTO_UDP)
|
||||
snprintf(buf, 8, "udp");
|
||||
else
|
||||
snprintf(buf, 8, "tcp");
|
||||
}
|
||||
xprt->address_strings[RPC_DISPLAY_PROTO] = buf;
|
||||
|
||||
buf = kzalloc(48, GFP_KERNEL);
|
||||
if (buf) {
|
||||
snprintf(buf, 48, "addr=%u.%u.%u.%u port=%u proto=%s",
|
||||
snprintf(buf, 48, "addr="NIPQUAD_FMT" port=%u proto=%s",
|
||||
NIPQUAD(addr->sin_addr.s_addr),
|
||||
ntohs(addr->sin_port),
|
||||
xprt->prot == IPPROTO_UDP ? "udp" : "tcp");
|
||||
}
|
||||
xprt->address_strings[RPC_DISPLAY_ALL] = buf;
|
||||
|
||||
buf = kzalloc(10, GFP_KERNEL);
|
||||
if (buf) {
|
||||
snprintf(buf, 10, "%02x%02x%02x%02x",
|
||||
NIPQUAD(addr->sin_addr.s_addr));
|
||||
}
|
||||
xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf;
|
||||
|
||||
buf = kzalloc(8, GFP_KERNEL);
|
||||
if (buf) {
|
||||
snprintf(buf, 8, "%4hx",
|
||||
ntohs(addr->sin_port));
|
||||
}
|
||||
xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf;
|
||||
|
||||
buf = kzalloc(30, GFP_KERNEL);
|
||||
if (buf) {
|
||||
snprintf(buf, 30, NIPQUAD_FMT".%u.%u",
|
||||
NIPQUAD(addr->sin_addr.s_addr),
|
||||
ntohs(addr->sin_port) >> 8,
|
||||
ntohs(addr->sin_port) & 0xff);
|
||||
}
|
||||
xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf;
|
||||
|
||||
xprt->address_strings[RPC_DISPLAY_NETID] =
|
||||
kstrdup(xprt->prot == IPPROTO_UDP ?
|
||||
RPCBIND_NETID_UDP : RPCBIND_NETID_TCP, GFP_KERNEL);
|
||||
}
|
||||
|
||||
static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt)
|
||||
{
|
||||
struct sockaddr_in6 *addr = xs_addr_in6(xprt);
|
||||
char *buf;
|
||||
|
||||
buf = kzalloc(40, GFP_KERNEL);
|
||||
if (buf) {
|
||||
snprintf(buf, 40, NIP6_FMT,
|
||||
NIP6(addr->sin6_addr));
|
||||
}
|
||||
xprt->address_strings[RPC_DISPLAY_ADDR] = buf;
|
||||
|
||||
buf = kzalloc(8, GFP_KERNEL);
|
||||
if (buf) {
|
||||
snprintf(buf, 8, "%u",
|
||||
ntohs(addr->sin6_port));
|
||||
}
|
||||
xprt->address_strings[RPC_DISPLAY_PORT] = buf;
|
||||
|
||||
buf = kzalloc(8, GFP_KERNEL);
|
||||
if (buf) {
|
||||
if (xprt->prot == IPPROTO_UDP)
|
||||
snprintf(buf, 8, "udp");
|
||||
else
|
||||
snprintf(buf, 8, "tcp");
|
||||
}
|
||||
xprt->address_strings[RPC_DISPLAY_PROTO] = buf;
|
||||
|
||||
buf = kzalloc(64, GFP_KERNEL);
|
||||
if (buf) {
|
||||
snprintf(buf, 64, "addr="NIP6_FMT" port=%u proto=%s",
|
||||
NIP6(addr->sin6_addr),
|
||||
ntohs(addr->sin6_port),
|
||||
xprt->prot == IPPROTO_UDP ? "udp" : "tcp");
|
||||
}
|
||||
xprt->address_strings[RPC_DISPLAY_ALL] = buf;
|
||||
|
||||
buf = kzalloc(36, GFP_KERNEL);
|
||||
if (buf) {
|
||||
snprintf(buf, 36, NIP6_SEQFMT,
|
||||
NIP6(addr->sin6_addr));
|
||||
}
|
||||
xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf;
|
||||
|
||||
buf = kzalloc(8, GFP_KERNEL);
|
||||
if (buf) {
|
||||
snprintf(buf, 8, "%4hx",
|
||||
ntohs(addr->sin6_port));
|
||||
}
|
||||
xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf;
|
||||
|
||||
buf = kzalloc(50, GFP_KERNEL);
|
||||
if (buf) {
|
||||
snprintf(buf, 50, NIP6_FMT".%u.%u",
|
||||
NIP6(addr->sin6_addr),
|
||||
ntohs(addr->sin6_port) >> 8,
|
||||
ntohs(addr->sin6_port) & 0xff);
|
||||
}
|
||||
xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf;
|
||||
|
||||
xprt->address_strings[RPC_DISPLAY_NETID] =
|
||||
kstrdup(xprt->prot == IPPROTO_UDP ?
|
||||
RPCBIND_NETID_UDP6 : RPCBIND_NETID_TCP6, GFP_KERNEL);
|
||||
}
|
||||
|
||||
static void xs_free_peer_addresses(struct rpc_xprt *xprt)
|
||||
{
|
||||
kfree(xprt->address_strings[RPC_DISPLAY_ADDR]);
|
||||
kfree(xprt->address_strings[RPC_DISPLAY_PORT]);
|
||||
kfree(xprt->address_strings[RPC_DISPLAY_ALL]);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < RPC_DISPLAY_MAX; i++)
|
||||
kfree(xprt->address_strings[i]);
|
||||
}
|
||||
|
||||
#define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL)
|
||||
@ -463,19 +580,20 @@ static int xs_udp_send_request(struct rpc_task *task)
|
||||
|
||||
req->rq_xtime = jiffies;
|
||||
status = xs_sendpages(transport->sock,
|
||||
(struct sockaddr *) &xprt->addr,
|
||||
xs_addr(xprt),
|
||||
xprt->addrlen, xdr,
|
||||
req->rq_bytes_sent);
|
||||
|
||||
dprintk("RPC: xs_udp_send_request(%u) = %d\n",
|
||||
xdr->len - req->rq_bytes_sent, status);
|
||||
|
||||
if (likely(status >= (int) req->rq_slen))
|
||||
return 0;
|
||||
|
||||
/* Still some bytes left; set up for a retry later. */
|
||||
if (status > 0)
|
||||
if (status >= 0) {
|
||||
task->tk_bytes_sent += status;
|
||||
if (status >= req->rq_slen)
|
||||
return 0;
|
||||
/* Still some bytes left; set up for a retry later. */
|
||||
status = -EAGAIN;
|
||||
}
|
||||
|
||||
switch (status) {
|
||||
case -ENETUNREACH:
|
||||
@ -523,7 +641,8 @@ static int xs_tcp_send_request(struct rpc_task *task)
|
||||
struct rpc_xprt *xprt = req->rq_xprt;
|
||||
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
|
||||
struct xdr_buf *xdr = &req->rq_snd_buf;
|
||||
int status, retry = 0;
|
||||
int status;
|
||||
unsigned int retry = 0;
|
||||
|
||||
xs_encode_tcp_record_marker(&req->rq_snd_buf);
|
||||
|
||||
@ -661,6 +780,7 @@ static void xs_destroy(struct rpc_xprt *xprt)
|
||||
xs_free_peer_addresses(xprt);
|
||||
kfree(xprt->slot);
|
||||
kfree(xprt);
|
||||
module_put(THIS_MODULE);
|
||||
}
|
||||
|
||||
static inline struct rpc_xprt *xprt_from_sock(struct sock *sk)
|
||||
@ -1139,14 +1259,23 @@ static unsigned short xs_get_random_port(void)
|
||||
*/
|
||||
static void xs_set_port(struct rpc_xprt *xprt, unsigned short port)
|
||||
{
|
||||
struct sockaddr_in *sap = (struct sockaddr_in *) &xprt->addr;
|
||||
struct sockaddr *addr = xs_addr(xprt);
|
||||
|
||||
dprintk("RPC: setting port for xprt %p to %u\n", xprt, port);
|
||||
|
||||
sap->sin_port = htons(port);
|
||||
switch (addr->sa_family) {
|
||||
case AF_INET:
|
||||
((struct sockaddr_in *)addr)->sin_port = htons(port);
|
||||
break;
|
||||
case AF_INET6:
|
||||
((struct sockaddr_in6 *)addr)->sin6_port = htons(port);
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
static int xs_bind(struct sock_xprt *transport, struct socket *sock)
|
||||
static int xs_bind4(struct sock_xprt *transport, struct socket *sock)
|
||||
{
|
||||
struct sockaddr_in myaddr = {
|
||||
.sin_family = AF_INET,
|
||||
@ -1174,8 +1303,42 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock)
|
||||
else
|
||||
port--;
|
||||
} while (err == -EADDRINUSE && port != transport->port);
|
||||
dprintk("RPC: xs_bind "NIPQUAD_FMT":%u: %s (%d)\n",
|
||||
NIPQUAD(myaddr.sin_addr), port, err ? "failed" : "ok", err);
|
||||
dprintk("RPC: %s "NIPQUAD_FMT":%u: %s (%d)\n",
|
||||
__FUNCTION__, NIPQUAD(myaddr.sin_addr),
|
||||
port, err ? "failed" : "ok", err);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int xs_bind6(struct sock_xprt *transport, struct socket *sock)
|
||||
{
|
||||
struct sockaddr_in6 myaddr = {
|
||||
.sin6_family = AF_INET6,
|
||||
};
|
||||
struct sockaddr_in6 *sa;
|
||||
int err;
|
||||
unsigned short port = transport->port;
|
||||
|
||||
if (!transport->xprt.resvport)
|
||||
port = 0;
|
||||
sa = (struct sockaddr_in6 *)&transport->addr;
|
||||
myaddr.sin6_addr = sa->sin6_addr;
|
||||
do {
|
||||
myaddr.sin6_port = htons(port);
|
||||
err = kernel_bind(sock, (struct sockaddr *) &myaddr,
|
||||
sizeof(myaddr));
|
||||
if (!transport->xprt.resvport)
|
||||
break;
|
||||
if (err == 0) {
|
||||
transport->port = port;
|
||||
break;
|
||||
}
|
||||
if (port <= xprt_min_resvport)
|
||||
port = xprt_max_resvport;
|
||||
else
|
||||
port--;
|
||||
} while (err == -EADDRINUSE && port != transport->port);
|
||||
dprintk("RPC: xs_bind6 "NIP6_FMT":%u: %s (%d)\n",
|
||||
NIP6(myaddr.sin6_addr), port, err ? "failed" : "ok", err);
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -1183,64 +1346,36 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock)
|
||||
static struct lock_class_key xs_key[2];
|
||||
static struct lock_class_key xs_slock_key[2];
|
||||
|
||||
static inline void xs_reclassify_socket(struct socket *sock)
|
||||
static inline void xs_reclassify_socket4(struct socket *sock)
|
||||
{
|
||||
struct sock *sk = sock->sk;
|
||||
|
||||
BUG_ON(sock_owned_by_user(sk));
|
||||
switch (sk->sk_family) {
|
||||
case AF_INET:
|
||||
sock_lock_init_class_and_name(sk, "slock-AF_INET-NFS",
|
||||
&xs_slock_key[0], "sk_lock-AF_INET-NFS", &xs_key[0]);
|
||||
break;
|
||||
sock_lock_init_class_and_name(sk, "slock-AF_INET-RPC",
|
||||
&xs_slock_key[0], "sk_lock-AF_INET-RPC", &xs_key[0]);
|
||||
}
|
||||
|
||||
case AF_INET6:
|
||||
sock_lock_init_class_and_name(sk, "slock-AF_INET6-NFS",
|
||||
&xs_slock_key[1], "sk_lock-AF_INET6-NFS", &xs_key[1]);
|
||||
break;
|
||||
static inline void xs_reclassify_socket6(struct socket *sock)
|
||||
{
|
||||
struct sock *sk = sock->sk;
|
||||
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
BUG_ON(sock_owned_by_user(sk));
|
||||
sock_lock_init_class_and_name(sk, "slock-AF_INET6-RPC",
|
||||
&xs_slock_key[1], "sk_lock-AF_INET6-RPC", &xs_key[1]);
|
||||
}
|
||||
#else
|
||||
static inline void xs_reclassify_socket(struct socket *sock)
|
||||
static inline void xs_reclassify_socket4(struct socket *sock)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void xs_reclassify_socket6(struct socket *sock)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* xs_udp_connect_worker - set up a UDP socket
|
||||
* @work: RPC transport to connect
|
||||
*
|
||||
* Invoked by a work queue tasklet.
|
||||
*/
|
||||
static void xs_udp_connect_worker(struct work_struct *work)
|
||||
static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
|
||||
{
|
||||
struct sock_xprt *transport =
|
||||
container_of(work, struct sock_xprt, connect_worker.work);
|
||||
struct rpc_xprt *xprt = &transport->xprt;
|
||||
struct socket *sock = transport->sock;
|
||||
int err, status = -EIO;
|
||||
|
||||
if (xprt->shutdown || !xprt_bound(xprt))
|
||||
goto out;
|
||||
|
||||
/* Start by resetting any existing state */
|
||||
xs_close(xprt);
|
||||
|
||||
if ((err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) {
|
||||
dprintk("RPC: can't create UDP transport socket (%d).\n", -err);
|
||||
goto out;
|
||||
}
|
||||
xs_reclassify_socket(sock);
|
||||
|
||||
if (xs_bind(transport, sock)) {
|
||||
sock_release(sock);
|
||||
goto out;
|
||||
}
|
||||
|
||||
dprintk("RPC: worker connecting xprt %p to address: %s\n",
|
||||
xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
|
||||
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
|
||||
|
||||
if (!transport->inet) {
|
||||
struct sock *sk = sock->sk;
|
||||
@ -1265,6 +1400,84 @@ static void xs_udp_connect_worker(struct work_struct *work)
|
||||
write_unlock_bh(&sk->sk_callback_lock);
|
||||
}
|
||||
xs_udp_do_set_buffer_size(xprt);
|
||||
}
|
||||
|
||||
/**
|
||||
* xs_udp_connect_worker4 - set up a UDP socket
|
||||
* @work: RPC transport to connect
|
||||
*
|
||||
* Invoked by a work queue tasklet.
|
||||
*/
|
||||
static void xs_udp_connect_worker4(struct work_struct *work)
|
||||
{
|
||||
struct sock_xprt *transport =
|
||||
container_of(work, struct sock_xprt, connect_worker.work);
|
||||
struct rpc_xprt *xprt = &transport->xprt;
|
||||
struct socket *sock = transport->sock;
|
||||
int err, status = -EIO;
|
||||
|
||||
if (xprt->shutdown || !xprt_bound(xprt))
|
||||
goto out;
|
||||
|
||||
/* Start by resetting any existing state */
|
||||
xs_close(xprt);
|
||||
|
||||
if ((err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) {
|
||||
dprintk("RPC: can't create UDP transport socket (%d).\n", -err);
|
||||
goto out;
|
||||
}
|
||||
xs_reclassify_socket4(sock);
|
||||
|
||||
if (xs_bind4(transport, sock)) {
|
||||
sock_release(sock);
|
||||
goto out;
|
||||
}
|
||||
|
||||
dprintk("RPC: worker connecting xprt %p to address: %s\n",
|
||||
xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
|
||||
|
||||
xs_udp_finish_connecting(xprt, sock);
|
||||
status = 0;
|
||||
out:
|
||||
xprt_wake_pending_tasks(xprt, status);
|
||||
xprt_clear_connecting(xprt);
|
||||
}
|
||||
|
||||
/**
|
||||
* xs_udp_connect_worker6 - set up a UDP socket
|
||||
* @work: RPC transport to connect
|
||||
*
|
||||
* Invoked by a work queue tasklet.
|
||||
*/
|
||||
static void xs_udp_connect_worker6(struct work_struct *work)
|
||||
{
|
||||
struct sock_xprt *transport =
|
||||
container_of(work, struct sock_xprt, connect_worker.work);
|
||||
struct rpc_xprt *xprt = &transport->xprt;
|
||||
struct socket *sock = transport->sock;
|
||||
int err, status = -EIO;
|
||||
|
||||
if (xprt->shutdown || !xprt_bound(xprt))
|
||||
goto out;
|
||||
|
||||
/* Start by resetting any existing state */
|
||||
xs_close(xprt);
|
||||
|
||||
if ((err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) {
|
||||
dprintk("RPC: can't create UDP transport socket (%d).\n", -err);
|
||||
goto out;
|
||||
}
|
||||
xs_reclassify_socket6(sock);
|
||||
|
||||
if (xs_bind6(transport, sock) < 0) {
|
||||
sock_release(sock);
|
||||
goto out;
|
||||
}
|
||||
|
||||
dprintk("RPC: worker connecting xprt %p to address: %s\n",
|
||||
xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
|
||||
|
||||
xs_udp_finish_connecting(xprt, sock);
|
||||
status = 0;
|
||||
out:
|
||||
xprt_wake_pending_tasks(xprt, status);
|
||||
@ -1295,42 +1508,9 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt)
|
||||
result);
|
||||
}
|
||||
|
||||
/**
|
||||
* xs_tcp_connect_worker - connect a TCP socket to a remote endpoint
|
||||
* @work: RPC transport to connect
|
||||
*
|
||||
* Invoked by a work queue tasklet.
|
||||
*/
|
||||
static void xs_tcp_connect_worker(struct work_struct *work)
|
||||
static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
|
||||
{
|
||||
struct sock_xprt *transport =
|
||||
container_of(work, struct sock_xprt, connect_worker.work);
|
||||
struct rpc_xprt *xprt = &transport->xprt;
|
||||
struct socket *sock = transport->sock;
|
||||
int err, status = -EIO;
|
||||
|
||||
if (xprt->shutdown || !xprt_bound(xprt))
|
||||
goto out;
|
||||
|
||||
if (!sock) {
|
||||
/* start from scratch */
|
||||
if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) {
|
||||
dprintk("RPC: can't create TCP transport "
|
||||
"socket (%d).\n", -err);
|
||||
goto out;
|
||||
}
|
||||
xs_reclassify_socket(sock);
|
||||
|
||||
if (xs_bind(transport, sock)) {
|
||||
sock_release(sock);
|
||||
goto out;
|
||||
}
|
||||
} else
|
||||
/* "close" the socket, preserving the local port */
|
||||
xs_tcp_reuse_connection(xprt);
|
||||
|
||||
dprintk("RPC: worker connecting xprt %p to address: %s\n",
|
||||
xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
|
||||
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
|
||||
|
||||
if (!transport->inet) {
|
||||
struct sock *sk = sock->sk;
|
||||
@ -1364,8 +1544,46 @@ static void xs_tcp_connect_worker(struct work_struct *work)
|
||||
/* Tell the socket layer to start connecting... */
|
||||
xprt->stat.connect_count++;
|
||||
xprt->stat.connect_start = jiffies;
|
||||
status = kernel_connect(sock, (struct sockaddr *) &xprt->addr,
|
||||
xprt->addrlen, O_NONBLOCK);
|
||||
return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK);
|
||||
}
|
||||
|
||||
/**
|
||||
* xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint
|
||||
* @work: RPC transport to connect
|
||||
*
|
||||
* Invoked by a work queue tasklet.
|
||||
*/
|
||||
static void xs_tcp_connect_worker4(struct work_struct *work)
|
||||
{
|
||||
struct sock_xprt *transport =
|
||||
container_of(work, struct sock_xprt, connect_worker.work);
|
||||
struct rpc_xprt *xprt = &transport->xprt;
|
||||
struct socket *sock = transport->sock;
|
||||
int err, status = -EIO;
|
||||
|
||||
if (xprt->shutdown || !xprt_bound(xprt))
|
||||
goto out;
|
||||
|
||||
if (!sock) {
|
||||
/* start from scratch */
|
||||
if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) {
|
||||
dprintk("RPC: can't create TCP transport socket (%d).\n", -err);
|
||||
goto out;
|
||||
}
|
||||
xs_reclassify_socket4(sock);
|
||||
|
||||
if (xs_bind4(transport, sock) < 0) {
|
||||
sock_release(sock);
|
||||
goto out;
|
||||
}
|
||||
} else
|
||||
/* "close" the socket, preserving the local port */
|
||||
xs_tcp_reuse_connection(xprt);
|
||||
|
||||
dprintk("RPC: worker connecting xprt %p to address: %s\n",
|
||||
xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
|
||||
|
||||
status = xs_tcp_finish_connecting(xprt, sock);
|
||||
dprintk("RPC: %p connect status %d connected %d sock state %d\n",
|
||||
xprt, -status, xprt_connected(xprt),
|
||||
sock->sk->sk_state);
|
||||
@ -1390,6 +1608,66 @@ out_clear:
|
||||
xprt_clear_connecting(xprt);
|
||||
}
|
||||
|
||||
/**
|
||||
* xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint
|
||||
* @work: RPC transport to connect
|
||||
*
|
||||
* Invoked by a work queue tasklet.
|
||||
*/
|
||||
static void xs_tcp_connect_worker6(struct work_struct *work)
|
||||
{
|
||||
struct sock_xprt *transport =
|
||||
container_of(work, struct sock_xprt, connect_worker.work);
|
||||
struct rpc_xprt *xprt = &transport->xprt;
|
||||
struct socket *sock = transport->sock;
|
||||
int err, status = -EIO;
|
||||
|
||||
if (xprt->shutdown || !xprt_bound(xprt))
|
||||
goto out;
|
||||
|
||||
if (!sock) {
|
||||
/* start from scratch */
|
||||
if ((err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) {
|
||||
dprintk("RPC: can't create TCP transport socket (%d).\n", -err);
|
||||
goto out;
|
||||
}
|
||||
xs_reclassify_socket6(sock);
|
||||
|
||||
if (xs_bind6(transport, sock) < 0) {
|
||||
sock_release(sock);
|
||||
goto out;
|
||||
}
|
||||
} else
|
||||
/* "close" the socket, preserving the local port */
|
||||
xs_tcp_reuse_connection(xprt);
|
||||
|
||||
dprintk("RPC: worker connecting xprt %p to address: %s\n",
|
||||
xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
|
||||
|
||||
status = xs_tcp_finish_connecting(xprt, sock);
|
||||
dprintk("RPC: %p connect status %d connected %d sock state %d\n",
|
||||
xprt, -status, xprt_connected(xprt), sock->sk->sk_state);
|
||||
if (status < 0) {
|
||||
switch (status) {
|
||||
case -EINPROGRESS:
|
||||
case -EALREADY:
|
||||
goto out_clear;
|
||||
case -ECONNREFUSED:
|
||||
case -ECONNRESET:
|
||||
/* retry with existing socket, after a delay */
|
||||
break;
|
||||
default:
|
||||
/* get rid of existing socket, and retry */
|
||||
xs_close(xprt);
|
||||
break;
|
||||
}
|
||||
}
|
||||
out:
|
||||
xprt_wake_pending_tasks(xprt, status);
|
||||
out_clear:
|
||||
xprt_clear_connecting(xprt);
|
||||
}
|
||||
|
||||
/**
|
||||
* xs_connect - connect a socket to a remote endpoint
|
||||
* @task: address of RPC task that manages state of connect request
|
||||
@ -1508,7 +1786,8 @@ static struct rpc_xprt_ops xs_tcp_ops = {
|
||||
.print_stats = xs_tcp_print_stats,
|
||||
};
|
||||
|
||||
static struct rpc_xprt *xs_setup_xprt(struct rpc_xprtsock_create *args, unsigned int slot_table_size)
|
||||
static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
|
||||
unsigned int slot_table_size)
|
||||
{
|
||||
struct rpc_xprt *xprt;
|
||||
struct sock_xprt *new;
|
||||
@ -1549,8 +1828,9 @@ static struct rpc_xprt *xs_setup_xprt(struct rpc_xprtsock_create *args, unsigned
|
||||
* @args: rpc transport creation arguments
|
||||
*
|
||||
*/
|
||||
struct rpc_xprt *xs_setup_udp(struct rpc_xprtsock_create *args)
|
||||
struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
|
||||
{
|
||||
struct sockaddr *addr = args->dstaddr;
|
||||
struct rpc_xprt *xprt;
|
||||
struct sock_xprt *transport;
|
||||
|
||||
@ -1559,15 +1839,11 @@ struct rpc_xprt *xs_setup_udp(struct rpc_xprtsock_create *args)
|
||||
return xprt;
|
||||
transport = container_of(xprt, struct sock_xprt, xprt);
|
||||
|
||||
if (ntohs(((struct sockaddr_in *)args->dstaddr)->sin_port) != 0)
|
||||
xprt_set_bound(xprt);
|
||||
|
||||
xprt->prot = IPPROTO_UDP;
|
||||
xprt->tsh_size = 0;
|
||||
/* XXX: header size can vary due to auth type, IPv6, etc. */
|
||||
xprt->max_payload = (1U << 16) - (MAX_HEADER << 3);
|
||||
|
||||
INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_connect_worker);
|
||||
xprt->bind_timeout = XS_BIND_TO;
|
||||
xprt->connect_timeout = XS_UDP_CONN_TO;
|
||||
xprt->reestablish_timeout = XS_UDP_REEST_TO;
|
||||
@ -1580,11 +1856,37 @@ struct rpc_xprt *xs_setup_udp(struct rpc_xprtsock_create *args)
|
||||
else
|
||||
xprt_set_timeout(&xprt->timeout, 5, 5 * HZ);
|
||||
|
||||
xs_format_peer_addresses(xprt);
|
||||
switch (addr->sa_family) {
|
||||
case AF_INET:
|
||||
if (((struct sockaddr_in *)addr)->sin_port != htons(0))
|
||||
xprt_set_bound(xprt);
|
||||
|
||||
INIT_DELAYED_WORK(&transport->connect_worker,
|
||||
xs_udp_connect_worker4);
|
||||
xs_format_ipv4_peer_addresses(xprt);
|
||||
break;
|
||||
case AF_INET6:
|
||||
if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
|
||||
xprt_set_bound(xprt);
|
||||
|
||||
INIT_DELAYED_WORK(&transport->connect_worker,
|
||||
xs_udp_connect_worker6);
|
||||
xs_format_ipv6_peer_addresses(xprt);
|
||||
break;
|
||||
default:
|
||||
kfree(xprt);
|
||||
return ERR_PTR(-EAFNOSUPPORT);
|
||||
}
|
||||
|
||||
dprintk("RPC: set up transport to address %s\n",
|
||||
xprt->address_strings[RPC_DISPLAY_ALL]);
|
||||
|
||||
return xprt;
|
||||
if (try_module_get(THIS_MODULE))
|
||||
return xprt;
|
||||
|
||||
kfree(xprt->slot);
|
||||
kfree(xprt);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1592,8 +1894,9 @@ struct rpc_xprt *xs_setup_udp(struct rpc_xprtsock_create *args)
|
||||
* @args: rpc transport creation arguments
|
||||
*
|
||||
*/
|
||||
struct rpc_xprt *xs_setup_tcp(struct rpc_xprtsock_create *args)
|
||||
struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
|
||||
{
|
||||
struct sockaddr *addr = args->dstaddr;
|
||||
struct rpc_xprt *xprt;
|
||||
struct sock_xprt *transport;
|
||||
|
||||
@ -1602,14 +1905,10 @@ struct rpc_xprt *xs_setup_tcp(struct rpc_xprtsock_create *args)
|
||||
return xprt;
|
||||
transport = container_of(xprt, struct sock_xprt, xprt);
|
||||
|
||||
if (ntohs(((struct sockaddr_in *)args->dstaddr)->sin_port) != 0)
|
||||
xprt_set_bound(xprt);
|
||||
|
||||
xprt->prot = IPPROTO_TCP;
|
||||
xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
|
||||
xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
|
||||
|
||||
INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker);
|
||||
xprt->bind_timeout = XS_BIND_TO;
|
||||
xprt->connect_timeout = XS_TCP_CONN_TO;
|
||||
xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
|
||||
@ -1622,15 +1921,55 @@ struct rpc_xprt *xs_setup_tcp(struct rpc_xprtsock_create *args)
|
||||
else
|
||||
xprt_set_timeout(&xprt->timeout, 2, 60 * HZ);
|
||||
|
||||
xs_format_peer_addresses(xprt);
|
||||
switch (addr->sa_family) {
|
||||
case AF_INET:
|
||||
if (((struct sockaddr_in *)addr)->sin_port != htons(0))
|
||||
xprt_set_bound(xprt);
|
||||
|
||||
INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker4);
|
||||
xs_format_ipv4_peer_addresses(xprt);
|
||||
break;
|
||||
case AF_INET6:
|
||||
if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
|
||||
xprt_set_bound(xprt);
|
||||
|
||||
INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker6);
|
||||
xs_format_ipv6_peer_addresses(xprt);
|
||||
break;
|
||||
default:
|
||||
kfree(xprt);
|
||||
return ERR_PTR(-EAFNOSUPPORT);
|
||||
}
|
||||
|
||||
dprintk("RPC: set up transport to address %s\n",
|
||||
xprt->address_strings[RPC_DISPLAY_ALL]);
|
||||
|
||||
return xprt;
|
||||
if (try_module_get(THIS_MODULE))
|
||||
return xprt;
|
||||
|
||||
kfree(xprt->slot);
|
||||
kfree(xprt);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
static struct xprt_class xs_udp_transport = {
|
||||
.list = LIST_HEAD_INIT(xs_udp_transport.list),
|
||||
.name = "udp",
|
||||
.owner = THIS_MODULE,
|
||||
.ident = IPPROTO_UDP,
|
||||
.setup = xs_setup_udp,
|
||||
};
|
||||
|
||||
static struct xprt_class xs_tcp_transport = {
|
||||
.list = LIST_HEAD_INIT(xs_tcp_transport.list),
|
||||
.name = "tcp",
|
||||
.owner = THIS_MODULE,
|
||||
.ident = IPPROTO_TCP,
|
||||
.setup = xs_setup_tcp,
|
||||
};
|
||||
|
||||
/**
|
||||
* init_socket_xprt - set up xprtsock's sysctls
|
||||
* init_socket_xprt - set up xprtsock's sysctls, register with RPC client
|
||||
*
|
||||
*/
|
||||
int init_socket_xprt(void)
|
||||
@ -1640,11 +1979,14 @@ int init_socket_xprt(void)
|
||||
sunrpc_table_header = register_sysctl_table(sunrpc_table);
|
||||
#endif
|
||||
|
||||
xprt_register_transport(&xs_udp_transport);
|
||||
xprt_register_transport(&xs_tcp_transport);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* cleanup_socket_xprt - remove xprtsock's sysctls
|
||||
* cleanup_socket_xprt - remove xprtsock's sysctls, unregister
|
||||
*
|
||||
*/
|
||||
void cleanup_socket_xprt(void)
|
||||
@ -1655,4 +1997,7 @@ void cleanup_socket_xprt(void)
|
||||
sunrpc_table_header = NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
xprt_unregister_transport(&xs_udp_transport);
|
||||
xprt_unregister_transport(&xs_tcp_transport);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user