NFS client updates for Linux 4.18

Highlights include:
 
 Stable fixes:
 - Fix a 1-byte stack overflow in nfs_idmap_read_and_verify_message
 - Fix a hang due to incorrect error returns in rpcrdma_convert_iovs()
 - Revert an incorrect change to the NFSv4.1 callback channel
 - Fix a bug in the NFSv4.1 sequence error handling
 
 Features and optimisations:
 - Support for piggybacking a LAYOUTGET operation to the OPEN compound
 - RDMA performance enhancements to deal with transport congestion
 - Add proper SPDX tags for NetApp-contributed RDMA source
 - Do not request delegated file attributes (size+change) from the server
 - Optimise away a GETATTR in the lookup revalidate code when doing NFSv4 OPEN
 - Optimise away unnecessary lookups for rename targets
 - Misc performance improvements when freeing NFSv4 delegations
 
 Bugfixes and cleanups:
 - Try to fail quickly if proto=rdma
 - Clean up RDMA receive trace points
 - Fix sillyrename to return the delegation when appropriate
 - Misc attribute revalidation fixes
 - Immediately clear the pNFS layout on a file when the server returns ESTALE
 - Return NFS4ERR_DELAY when delegation/layout recalls fail due to igrab()
 - Fix the client behaviour on NFS4ERR_SEQ_FALSE_RETRY
 -----BEGIN PGP SIGNATURE-----
 
 iQIcBAABAgAGBQJbH8gIAAoJEA4mA3inWBJcpzYQAJYY3ykt9oLQgm/2b/D/weDe
 6890M9W5nIeuZq5soWSpYsZTxqIFbGV4laG/eCTW1gUN1TitSZsoOp7kqhRHXOjq
 Rv3ZvjlZsP2qv2SnzsEmhJsynfyB46d19smSTJhgQ8dnXhaZv04Wsd4krLHx0z6p
 uUUis5Q1m+vL7HsFPp3iUareO/DFKeSkw2cQ2V5ksTIEiAzX7GC+Ex/KKWf82nrJ
 hm7+Nq7rLf1QHJkQvsc3fYCMR4gIzEwUu6F8RyxCoAVgD6O90Hx6NbxnINaHDD4N
 U0nRP5LwCyN9hbPWvwcH7Sn4ePDTos2yj2tFO5NP9btTLDVLFSGYZ2a74d9PRdAf
 9jn6f6juSDwI7T6NXvkHzzkJG6Or9ABAUZo+yX5JoD6lmgOcPUJpLRy6fu7UxAuN
 a5OZ7d9edYpOi0Kys8sDSIlLlxZtFkvybOMVuI3dSHsI+c0g39w8oarpqT2wXWMs
 /ZtFz0FCreHhKkNtz7Z49z1UQHDv/XYM0WkcO+eaeK58RLIEE0pZHoMvPKP63lkI
 nbbgHvBRAu38Jtvvu65Hpb/VpBcqNGM5hjN1cfW/BOqAPKW23s4vWVj+/1silfW/
 uw0MkNrDC9endoALp/YMCcMwPvEw9Awt9y4KjMgfVgSnKwXd0HaSZ2zE6aJU3Wry
 Fy2Tv0e0OH3z9Bi/LNuJ
 =YWSl
 -----END PGP SIGNATURE-----

Merge tag 'nfs-for-4.18-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs

Pull NFS client updates from Trond Myklebust:
 "Highlights include:

  Stable fixes:

   - Fix a 1-byte stack overflow in nfs_idmap_read_and_verify_message

   - Fix a hang due to incorrect error returns in rpcrdma_convert_iovs()

   - Revert an incorrect change to the NFSv4.1 callback channel

   - Fix a bug in the NFSv4.1 sequence error handling

  Features and optimisations:

   - Support for piggybacking a LAYOUTGET operation to the OPEN compound

   - RDMA performance enhancements to deal with transport congestion

   - Add proper SPDX tags for NetApp-contributed RDMA source

   - Do not request delegated file attributes (size+change) from the
     server

   - Optimise away a GETATTR in the lookup revalidate code when doing
     NFSv4 OPEN

   - Optimise away unnecessary lookups for rename targets

   - Misc performance improvements when freeing NFSv4 delegations

  Bugfixes and cleanups:

   - Try to fail quickly if proto=rdma

   - Clean up RDMA receive trace points

   - Fix sillyrename to return the delegation when appropriate

   - Misc attribute revalidation fixes

   - Immediately clear the pNFS layout on a file when the server returns
     ESTALE

   - Return NFS4ERR_DELAY when delegation/layout recalls fail due to
     igrab()

   - Fix the client behaviour on NFS4ERR_SEQ_FALSE_RETRY"

* tag 'nfs-for-4.18-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (80 commits)
  skip LAYOUTRETURN if layout is invalid
  NFSv4.1: Fix the client behaviour on NFS4ERR_SEQ_FALSE_RETRY
  NFSv4: Fix a typo in nfs41_sequence_process
  NFSv4: Revert commit 5f83d86cf5 ("NFSv4.x: Fix wraparound issues..")
  NFSv4: Return NFS4ERR_DELAY when a layout recall fails due to igrab()
  NFSv4: Return NFS4ERR_DELAY when a delegation recall fails due to igrab()
  NFSv4.0: Remove transport protocol name from non-UCS client ID
  NFSv4.0: Remove cl_ipaddr from non-UCS client ID
  NFSv4: Fix a compiler warning when CONFIG_NFS_V4_1 is undefined
  NFS: Filter cache invalidation when holding a delegation
  NFS: Ignore NFS_INO_REVAL_FORCED in nfs_check_inode_attributes()
  NFS: Improve caching while holding a delegation
  NFS: Fix attribute revalidation
  NFS: fix up nfs_setattr_update_inode
  NFSv4: Ensure the inode is clean when we set a delegation
  NFSv4: Ignore NFS_INO_REVAL_FORCED in nfs4_proc_access
  NFSv4: Don't ask for delegated attributes when adding a hard link
  NFSv4: Don't ask for delegated attributes when revalidating the inode
  NFS: Pass the inode down to the getattr() callback
  NFSv4: Don't request size+change attribute if they are delegated to us
  ...
This commit is contained in:
Linus Torvalds 2018-06-12 10:09:03 -07:00
commit 0725d4e1b8
38 changed files with 1244 additions and 737 deletions

View File

@ -40,7 +40,9 @@ __be32 nfs4_callback_getattr(void *argp, void *resp,
rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR)); rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR));
inode = nfs_delegation_find_inode(cps->clp, &args->fh); inode = nfs_delegation_find_inode(cps->clp, &args->fh);
if (inode == NULL) { if (IS_ERR(inode)) {
if (inode == ERR_PTR(-EAGAIN))
res->status = htonl(NFS4ERR_DELAY);
trace_nfs4_cb_getattr(cps->clp, &args->fh, NULL, trace_nfs4_cb_getattr(cps->clp, &args->fh, NULL,
-ntohl(res->status)); -ntohl(res->status));
goto out; goto out;
@ -86,7 +88,9 @@ __be32 nfs4_callback_recall(void *argp, void *resp,
res = htonl(NFS4ERR_BADHANDLE); res = htonl(NFS4ERR_BADHANDLE);
inode = nfs_delegation_find_inode(cps->clp, &args->fh); inode = nfs_delegation_find_inode(cps->clp, &args->fh);
if (inode == NULL) { if (IS_ERR(inode)) {
if (inode == ERR_PTR(-EAGAIN))
res = htonl(NFS4ERR_DELAY);
trace_nfs4_cb_recall(cps->clp, &args->fh, NULL, trace_nfs4_cb_recall(cps->clp, &args->fh, NULL,
&args->stateid, -ntohl(res)); &args->stateid, -ntohl(res));
goto out; goto out;
@ -124,7 +128,6 @@ static struct inode *nfs_layout_find_inode_by_stateid(struct nfs_client *clp,
struct inode *inode; struct inode *inode;
struct pnfs_layout_hdr *lo; struct pnfs_layout_hdr *lo;
restart:
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
list_for_each_entry(lo, &server->layouts, plh_layouts) { list_for_each_entry(lo, &server->layouts, plh_layouts) {
if (stateid != NULL && if (stateid != NULL &&
@ -132,20 +135,20 @@ restart:
continue; continue;
inode = igrab(lo->plh_inode); inode = igrab(lo->plh_inode);
if (!inode) if (!inode)
continue; return ERR_PTR(-EAGAIN);
if (!nfs_sb_active(inode->i_sb)) { if (!nfs_sb_active(inode->i_sb)) {
rcu_read_unlock(); rcu_read_unlock();
spin_unlock(&clp->cl_lock); spin_unlock(&clp->cl_lock);
iput(inode); iput(inode);
spin_lock(&clp->cl_lock); spin_lock(&clp->cl_lock);
rcu_read_lock(); rcu_read_lock();
goto restart; return ERR_PTR(-EAGAIN);
} }
return inode; return inode;
} }
} }
return NULL; return ERR_PTR(-ENOENT);
} }
/* /*
@ -162,7 +165,6 @@ static struct inode *nfs_layout_find_inode_by_fh(struct nfs_client *clp,
struct inode *inode; struct inode *inode;
struct pnfs_layout_hdr *lo; struct pnfs_layout_hdr *lo;
restart:
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
list_for_each_entry(lo, &server->layouts, plh_layouts) { list_for_each_entry(lo, &server->layouts, plh_layouts) {
nfsi = NFS_I(lo->plh_inode); nfsi = NFS_I(lo->plh_inode);
@ -172,20 +174,20 @@ restart:
continue; continue;
inode = igrab(lo->plh_inode); inode = igrab(lo->plh_inode);
if (!inode) if (!inode)
continue; return ERR_PTR(-EAGAIN);
if (!nfs_sb_active(inode->i_sb)) { if (!nfs_sb_active(inode->i_sb)) {
rcu_read_unlock(); rcu_read_unlock();
spin_unlock(&clp->cl_lock); spin_unlock(&clp->cl_lock);
iput(inode); iput(inode);
spin_lock(&clp->cl_lock); spin_lock(&clp->cl_lock);
rcu_read_lock(); rcu_read_lock();
goto restart; return ERR_PTR(-EAGAIN);
} }
return inode; return inode;
} }
} }
return NULL; return ERR_PTR(-ENOENT);
} }
static struct inode *nfs_layout_find_inode(struct nfs_client *clp, static struct inode *nfs_layout_find_inode(struct nfs_client *clp,
@ -197,7 +199,7 @@ static struct inode *nfs_layout_find_inode(struct nfs_client *clp,
spin_lock(&clp->cl_lock); spin_lock(&clp->cl_lock);
rcu_read_lock(); rcu_read_lock();
inode = nfs_layout_find_inode_by_stateid(clp, stateid); inode = nfs_layout_find_inode_by_stateid(clp, stateid);
if (!inode) if (inode == ERR_PTR(-ENOENT))
inode = nfs_layout_find_inode_by_fh(clp, fh); inode = nfs_layout_find_inode_by_fh(clp, fh);
rcu_read_unlock(); rcu_read_unlock();
spin_unlock(&clp->cl_lock); spin_unlock(&clp->cl_lock);
@ -252,8 +254,11 @@ static u32 initiate_file_draining(struct nfs_client *clp,
LIST_HEAD(free_me_list); LIST_HEAD(free_me_list);
ino = nfs_layout_find_inode(clp, &args->cbl_fh, &args->cbl_stateid); ino = nfs_layout_find_inode(clp, &args->cbl_fh, &args->cbl_stateid);
if (!ino) if (IS_ERR(ino)) {
goto out; if (ino == ERR_PTR(-EAGAIN))
rv = NFS4ERR_DELAY;
goto out_noput;
}
pnfs_layoutcommit_inode(ino, false); pnfs_layoutcommit_inode(ino, false);
@ -299,9 +304,10 @@ unlock:
nfs_commit_inode(ino, 0); nfs_commit_inode(ino, 0);
pnfs_put_layout_hdr(lo); pnfs_put_layout_hdr(lo);
out: out:
nfs_iput_and_deactive(ino);
out_noput:
trace_nfs4_cb_layoutrecall_file(clp, &args->cbl_fh, ino, trace_nfs4_cb_layoutrecall_file(clp, &args->cbl_fh, ino,
&args->cbl_stateid, -rv); &args->cbl_stateid, -rv);
nfs_iput_and_deactive(ino);
return rv; return rv;
} }
@ -322,6 +328,8 @@ static u32 initiate_bulk_draining(struct nfs_client *clp,
static u32 do_callback_layoutrecall(struct nfs_client *clp, static u32 do_callback_layoutrecall(struct nfs_client *clp,
struct cb_layoutrecallargs *args) struct cb_layoutrecallargs *args)
{ {
write_seqcount_begin(&clp->cl_callback_count);
write_seqcount_end(&clp->cl_callback_count);
if (args->cbl_recall_type == RETURN_FILE) if (args->cbl_recall_type == RETURN_FILE)
return initiate_file_draining(clp, args); return initiate_file_draining(clp, args);
return initiate_bulk_draining(clp, args); return initiate_bulk_draining(clp, args);
@ -420,11 +428,8 @@ validate_seqid(const struct nfs4_slot_table *tbl, const struct nfs4_slot *slot,
return htonl(NFS4ERR_SEQ_FALSE_RETRY); return htonl(NFS4ERR_SEQ_FALSE_RETRY);
} }
/* Wraparound */ /* Note: wraparound relies on seq_nr being of type u32 */
if (unlikely(slot->seq_nr == 0xFFFFFFFFU)) { if (likely(args->csa_sequenceid == slot->seq_nr + 1))
if (args->csa_sequenceid == 1)
return htonl(NFS4_OK);
} else if (likely(args->csa_sequenceid == slot->seq_nr + 1))
return htonl(NFS4_OK); return htonl(NFS4_OK);
/* Misordered request */ /* Misordered request */

View File

@ -969,7 +969,8 @@ struct nfs_server *nfs_create_server(struct nfs_mount_info *mount_info,
} }
if (!(fattr->valid & NFS_ATTR_FATTR)) { if (!(fattr->valid & NFS_ATTR_FATTR)) {
error = nfs_mod->rpc_ops->getattr(server, mount_info->mntfh, fattr, NULL); error = nfs_mod->rpc_ops->getattr(server, mount_info->mntfh,
fattr, NULL, NULL);
if (error < 0) { if (error < 0) {
dprintk("nfs_create_server: getattr error = %d\n", -error); dprintk("nfs_create_server: getattr error = %d\n", -error);
goto error; goto error;

View File

@ -404,6 +404,10 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred,
trace_nfs4_set_delegation(inode, type); trace_nfs4_set_delegation(inode, type);
spin_lock(&inode->i_lock);
if (NFS_I(inode)->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME))
NFS_I(inode)->cache_validity |= NFS_INO_REVAL_FORCED;
spin_unlock(&inode->i_lock);
out: out:
spin_unlock(&clp->cl_lock); spin_unlock(&clp->cl_lock);
if (delegation != NULL) if (delegation != NULL)
@ -483,38 +487,88 @@ out:
int nfs_client_return_marked_delegations(struct nfs_client *clp) int nfs_client_return_marked_delegations(struct nfs_client *clp)
{ {
struct nfs_delegation *delegation; struct nfs_delegation *delegation;
struct nfs_delegation *prev;
struct nfs_server *server; struct nfs_server *server;
struct inode *inode; struct inode *inode;
struct inode *place_holder = NULL;
struct nfs_delegation *place_holder_deleg = NULL;
int err = 0; int err = 0;
restart: restart:
/*
* To avoid quadratic looping we hold a reference
* to an inode place_holder. Each time we restart, we
* list nfs_servers from the server of that inode, and
* delegation in the server from the delegations of that
* inode.
* prev is an RCU-protected pointer to a delegation which
* wasn't marked for return and might be a good choice for
* the next place_holder.
*/
rcu_read_lock(); rcu_read_lock();
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { prev = NULL;
list_for_each_entry_rcu(delegation, &server->delegations, if (place_holder)
super_list) { server = NFS_SERVER(place_holder);
if (!nfs_delegation_need_return(delegation)) else
server = list_entry_rcu(clp->cl_superblocks.next,
struct nfs_server, client_link);
list_for_each_entry_from_rcu(server, &clp->cl_superblocks, client_link) {
delegation = NULL;
if (place_holder && server == NFS_SERVER(place_holder))
delegation = rcu_dereference(NFS_I(place_holder)->delegation);
if (!delegation || delegation != place_holder_deleg)
delegation = list_entry_rcu(server->delegations.next,
struct nfs_delegation, super_list);
list_for_each_entry_from_rcu(delegation, &server->delegations, super_list) {
struct inode *to_put = NULL;
if (!nfs_delegation_need_return(delegation)) {
prev = delegation;
continue; continue;
}
if (!nfs_sb_active(server->super)) if (!nfs_sb_active(server->super))
continue; break; /* continue in outer loop */
if (prev) {
struct inode *tmp;
tmp = nfs_delegation_grab_inode(prev);
if (tmp) {
to_put = place_holder;
place_holder = tmp;
place_holder_deleg = prev;
}
}
inode = nfs_delegation_grab_inode(delegation); inode = nfs_delegation_grab_inode(delegation);
if (inode == NULL) { if (inode == NULL) {
rcu_read_unlock(); rcu_read_unlock();
if (to_put)
iput(to_put);
nfs_sb_deactive(server->super); nfs_sb_deactive(server->super);
goto restart; goto restart;
} }
delegation = nfs_start_delegation_return_locked(NFS_I(inode)); delegation = nfs_start_delegation_return_locked(NFS_I(inode));
rcu_read_unlock(); rcu_read_unlock();
if (to_put)
iput(to_put);
err = nfs_end_delegation_return(inode, delegation, 0); err = nfs_end_delegation_return(inode, delegation, 0);
iput(inode); iput(inode);
nfs_sb_deactive(server->super); nfs_sb_deactive(server->super);
cond_resched();
if (!err) if (!err)
goto restart; goto restart;
set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state); set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
if (place_holder)
iput(place_holder);
return err; return err;
} }
} }
rcu_read_unlock(); rcu_read_unlock();
if (place_holder)
iput(place_holder);
return 0; return 0;
} }
@ -802,12 +856,14 @@ nfs_delegation_find_inode_server(struct nfs_server *server,
if (delegation->inode != NULL && if (delegation->inode != NULL &&
nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) { nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) {
res = igrab(delegation->inode); res = igrab(delegation->inode);
spin_unlock(&delegation->lock);
if (res != NULL)
return res;
return ERR_PTR(-EAGAIN);
} }
spin_unlock(&delegation->lock); spin_unlock(&delegation->lock);
if (res != NULL)
break;
} }
return res; return ERR_PTR(-ENOENT);
} }
/** /**
@ -822,16 +878,16 @@ struct inode *nfs_delegation_find_inode(struct nfs_client *clp,
const struct nfs_fh *fhandle) const struct nfs_fh *fhandle)
{ {
struct nfs_server *server; struct nfs_server *server;
struct inode *res = NULL; struct inode *res;
rcu_read_lock(); rcu_read_lock();
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
res = nfs_delegation_find_inode_server(server, fhandle); res = nfs_delegation_find_inode_server(server, fhandle);
if (res != NULL) if (res != ERR_PTR(-ENOENT))
break; return res;
} }
rcu_read_unlock(); rcu_read_unlock();
return res; return ERR_PTR(-ENOENT);
} }
static void nfs_delegation_mark_reclaim_server(struct nfs_server *server) static void nfs_delegation_mark_reclaim_server(struct nfs_server *server)
@ -887,7 +943,7 @@ restart:
&delegation->flags) == 0) &delegation->flags) == 0)
continue; continue;
if (!nfs_sb_active(server->super)) if (!nfs_sb_active(server->super))
continue; break; /* continue in outer loop */
inode = nfs_delegation_grab_inode(delegation); inode = nfs_delegation_grab_inode(delegation);
if (inode == NULL) { if (inode == NULL) {
rcu_read_unlock(); rcu_read_unlock();
@ -904,6 +960,7 @@ restart:
} }
iput(inode); iput(inode);
nfs_sb_deactive(server->super); nfs_sb_deactive(server->super);
cond_resched();
goto restart; goto restart;
} }
} }
@ -995,7 +1052,7 @@ restart:
&delegation->flags) == 0) &delegation->flags) == 0)
continue; continue;
if (!nfs_sb_active(server->super)) if (!nfs_sb_active(server->super))
continue; break; /* continue in outer loop */
inode = nfs_delegation_grab_inode(delegation); inode = nfs_delegation_grab_inode(delegation);
if (inode == NULL) { if (inode == NULL) {
rcu_read_unlock(); rcu_read_unlock();
@ -1020,6 +1077,7 @@ restart:
} }
iput(inode); iput(inode);
nfs_sb_deactive(server->super); nfs_sb_deactive(server->super);
cond_resched();
goto restart; goto restart;
} }
} }

View File

@ -1012,13 +1012,25 @@ int nfs_lookup_verify_inode(struct inode *inode, unsigned int flags)
if (IS_AUTOMOUNT(inode)) if (IS_AUTOMOUNT(inode))
return 0; return 0;
if (flags & LOOKUP_OPEN) {
switch (inode->i_mode & S_IFMT) {
case S_IFREG:
/* A NFSv4 OPEN will revalidate later */
if (server->caps & NFS_CAP_ATOMIC_OPEN)
goto out;
/* Fallthrough */
case S_IFDIR:
if (server->flags & NFS_MOUNT_NOCTO)
break;
/* NFS close-to-open cache consistency validation */
goto out_force;
}
}
/* VFS wants an on-the-wire revalidation */ /* VFS wants an on-the-wire revalidation */
if (flags & LOOKUP_REVAL) if (flags & LOOKUP_REVAL)
goto out_force; goto out_force;
/* This is an open(2) */
if ((flags & LOOKUP_OPEN) && !(server->flags & NFS_MOUNT_NOCTO) &&
(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)))
goto out_force;
out: out:
return (inode->i_nlink == 0) ? -ENOENT : 0; return (inode->i_nlink == 0) ? -ENOENT : 0;
out_force: out_force:
@ -1039,13 +1051,15 @@ out_force:
* *
* If LOOKUP_RCU prevents us from performing a full check, return 1 * If LOOKUP_RCU prevents us from performing a full check, return 1
* suggesting a reval is needed. * suggesting a reval is needed.
*
* Note that when creating a new file, or looking up a rename target,
* then it shouldn't be necessary to revalidate a negative dentry.
*/ */
static inline static inline
int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
unsigned int flags) unsigned int flags)
{ {
/* Don't revalidate a negative dentry if we're creating a new file */ if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
if (flags & LOOKUP_CREATE)
return 0; return 0;
if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG)
return 1; return 1;
@ -1106,7 +1120,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
goto out_set_verifier; goto out_set_verifier;
/* Force a full look up iff the parent directory has changed */ /* Force a full look up iff the parent directory has changed */
if (!nfs_is_exclusive_create(dir, flags) && if (!(flags & (LOOKUP_EXCL | LOOKUP_REVAL)) &&
nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) { nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) {
error = nfs_lookup_verify_inode(inode, flags); error = nfs_lookup_verify_inode(inode, flags);
if (error) { if (error) {
@ -1270,11 +1284,13 @@ static void nfs_drop_nlink(struct inode *inode)
{ {
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
/* drop the inode if we're reasonably sure this is the last link */ /* drop the inode if we're reasonably sure this is the last link */
if (inode->i_nlink == 1) if (inode->i_nlink > 0)
clear_nlink(inode); drop_nlink(inode);
NFS_I(inode)->attr_gencount = nfs_inc_attr_generation_counter();
NFS_I(inode)->cache_validity |= NFS_INO_INVALID_CHANGE NFS_I(inode)->cache_validity |= NFS_INO_INVALID_CHANGE
| NFS_INO_INVALID_CTIME | NFS_INO_INVALID_CTIME
| NFS_INO_INVALID_OTHER; | NFS_INO_INVALID_OTHER
| NFS_INO_REVAL_FORCED;
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
} }
@ -1335,7 +1351,7 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in
* If we're doing an exclusive create, optimize away the lookup * If we're doing an exclusive create, optimize away the lookup
* but don't hash the dentry. * but don't hash the dentry.
*/ */
if (nfs_is_exclusive_create(dir, flags)) if (nfs_is_exclusive_create(dir, flags) || flags & LOOKUP_RENAME_TARGET)
return NULL; return NULL;
res = ERR_PTR(-ENOMEM); res = ERR_PTR(-ENOMEM);
@ -1640,7 +1656,8 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
if (!(fattr->valid & NFS_ATTR_FATTR)) { if (!(fattr->valid & NFS_ATTR_FATTR)) {
struct nfs_server *server = NFS_SB(dentry->d_sb); struct nfs_server *server = NFS_SB(dentry->d_sb);
error = server->nfs_client->rpc_ops->getattr(server, fhandle, fattr, NULL); error = server->nfs_client->rpc_ops->getattr(server, fhandle,
fattr, NULL, NULL);
if (error < 0) if (error < 0)
goto out_error; goto out_error;
} }
@ -2036,7 +2053,15 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
} else } else
error = task->tk_status; error = task->tk_status;
rpc_put_task(task); rpc_put_task(task);
nfs_mark_for_revalidate(old_inode); /* Ensure the inode attributes are revalidated */
if (error == 0) {
spin_lock(&old_inode->i_lock);
NFS_I(old_inode)->attr_gencount = nfs_inc_attr_generation_counter();
NFS_I(old_inode)->cache_validity |= NFS_INO_INVALID_CHANGE
| NFS_INO_INVALID_CTIME
| NFS_INO_REVAL_FORCED;
spin_unlock(&old_inode->i_lock);
}
out: out:
if (rehash) if (rehash)
d_rehash(rehash); d_rehash(rehash);

View File

@ -102,7 +102,7 @@ nfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
} }
rpc_ops = NFS_SB(sb)->nfs_client->rpc_ops; rpc_ops = NFS_SB(sb)->nfs_client->rpc_ops;
ret = rpc_ops->getattr(NFS_SB(sb), server_fh, fattr, label); ret = rpc_ops->getattr(NFS_SB(sb), server_fh, fattr, label, NULL);
if (ret) { if (ret) {
dprintk("%s: getattr failed %d\n", __func__, ret); dprintk("%s: getattr failed %d\n", __func__, ret);
dentry = ERR_PTR(ret); dentry = ERR_PTR(ret);

View File

@ -2347,6 +2347,7 @@ static struct pnfs_layoutdriver_type flexfilelayout_type = {
.id = LAYOUT_FLEX_FILES, .id = LAYOUT_FLEX_FILES,
.name = "LAYOUT_FLEX_FILES", .name = "LAYOUT_FLEX_FILES",
.owner = THIS_MODULE, .owner = THIS_MODULE,
.flags = PNFS_LAYOUTGET_ON_OPEN,
.set_layoutdriver = ff_layout_set_layoutdriver, .set_layoutdriver = ff_layout_set_layoutdriver,
.alloc_layout_hdr = ff_layout_alloc_layout_hdr, .alloc_layout_hdr = ff_layout_alloc_layout_hdr,
.free_layout_hdr = ff_layout_free_layout_hdr, .free_layout_hdr = ff_layout_free_layout_hdr,

View File

@ -195,10 +195,16 @@ bool nfs_check_cache_invalid(struct inode *inode, unsigned long flags)
static void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) static void nfs_set_cache_invalid(struct inode *inode, unsigned long flags)
{ {
struct nfs_inode *nfsi = NFS_I(inode); struct nfs_inode *nfsi = NFS_I(inode);
bool have_delegation = nfs_have_delegated_attributes(inode); bool have_delegation = NFS_PROTO(inode)->have_delegation(inode, FMODE_READ);
if (have_delegation) {
if (!(flags & NFS_INO_REVAL_FORCED))
flags &= ~NFS_INO_INVALID_OTHER;
flags &= ~(NFS_INO_INVALID_CHANGE
| NFS_INO_INVALID_SIZE
| NFS_INO_REVAL_PAGECACHE);
}
if (have_delegation)
flags &= ~(NFS_INO_INVALID_CHANGE|NFS_INO_REVAL_PAGECACHE);
if (inode->i_mapping->nrpages == 0) if (inode->i_mapping->nrpages == 0)
flags &= ~NFS_INO_INVALID_DATA; flags &= ~NFS_INO_INVALID_DATA;
nfsi->cache_validity |= flags; nfsi->cache_validity |= flags;
@ -448,6 +454,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
/* We can't support update_atime(), since the server will reset it */ /* We can't support update_atime(), since the server will reset it */
inode->i_flags |= S_NOATIME|S_NOCMTIME; inode->i_flags |= S_NOATIME|S_NOCMTIME;
inode->i_mode = fattr->mode; inode->i_mode = fattr->mode;
nfsi->cache_validity = 0;
if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0 if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0
&& nfs_server_capable(inode, NFS_CAP_MODE)) && nfs_server_capable(inode, NFS_CAP_MODE))
nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER); nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER);
@ -534,6 +541,9 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
} }
if (nfsi->cache_validity != 0)
nfsi->cache_validity |= NFS_INO_REVAL_FORCED;
nfs_setsecurity(inode, fattr, label); nfs_setsecurity(inode, fattr, label);
nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
@ -667,9 +677,13 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr,
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
NFS_I(inode)->attr_gencount = fattr->gencount; NFS_I(inode)->attr_gencount = fattr->gencount;
nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE if ((attr->ia_valid & ATTR_SIZE) != 0) {
| NFS_INO_INVALID_CTIME); nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME);
nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC);
nfs_vmtruncate(inode, attr->ia_size);
}
if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) { if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) {
NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_CTIME;
if ((attr->ia_valid & ATTR_MODE) != 0) { if ((attr->ia_valid & ATTR_MODE) != 0) {
int mode = attr->ia_mode & S_IALLUGO; int mode = attr->ia_mode & S_IALLUGO;
mode |= inode->i_mode & ~S_IALLUGO; mode |= inode->i_mode & ~S_IALLUGO;
@ -679,13 +693,45 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr,
inode->i_uid = attr->ia_uid; inode->i_uid = attr->ia_uid;
if ((attr->ia_valid & ATTR_GID) != 0) if ((attr->ia_valid & ATTR_GID) != 0)
inode->i_gid = attr->ia_gid; inode->i_gid = attr->ia_gid;
if (fattr->valid & NFS_ATTR_FATTR_CTIME)
inode->i_ctime = fattr->ctime;
else
nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE
| NFS_INO_INVALID_CTIME);
nfs_set_cache_invalid(inode, NFS_INO_INVALID_ACCESS nfs_set_cache_invalid(inode, NFS_INO_INVALID_ACCESS
| NFS_INO_INVALID_ACL); | NFS_INO_INVALID_ACL);
} }
if ((attr->ia_valid & ATTR_SIZE) != 0) { if (attr->ia_valid & (ATTR_ATIME_SET|ATTR_ATIME)) {
nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME); NFS_I(inode)->cache_validity &= ~(NFS_INO_INVALID_ATIME
nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC); | NFS_INO_INVALID_CTIME);
nfs_vmtruncate(inode, attr->ia_size); if (fattr->valid & NFS_ATTR_FATTR_ATIME)
inode->i_atime = fattr->atime;
else if (attr->ia_valid & ATTR_ATIME_SET)
inode->i_atime = attr->ia_atime;
else
nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME);
if (fattr->valid & NFS_ATTR_FATTR_CTIME)
inode->i_ctime = fattr->ctime;
else
nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE
| NFS_INO_INVALID_CTIME);
}
if (attr->ia_valid & (ATTR_MTIME_SET|ATTR_MTIME)) {
NFS_I(inode)->cache_validity &= ~(NFS_INO_INVALID_MTIME
| NFS_INO_INVALID_CTIME);
if (fattr->valid & NFS_ATTR_FATTR_MTIME)
inode->i_mtime = fattr->mtime;
else if (attr->ia_valid & ATTR_MTIME_SET)
inode->i_mtime = attr->ia_mtime;
else
nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME);
if (fattr->valid & NFS_ATTR_FATTR_CTIME)
inode->i_ctime = fattr->ctime;
else
nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE
| NFS_INO_INVALID_CTIME);
} }
if (fattr->valid) if (fattr->valid)
nfs_update_inode(inode, fattr); nfs_update_inode(inode, fattr);
@ -1097,7 +1143,8 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
goto out; goto out;
} }
status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), fattr, label); status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), fattr,
label, inode);
if (status != 0) { if (status != 0) {
dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Lu) getattr failed, error=%d\n", dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Lu) getattr failed, error=%d\n",
inode->i_sb->s_id, inode->i_sb->s_id,
@ -1349,8 +1396,9 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
unsigned long invalid = 0; unsigned long invalid = 0;
if (nfs_have_delegated_attributes(inode)) if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
return 0; return 0;
/* Has the inode gone and changed behind our back? */ /* Has the inode gone and changed behind our back? */
if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid)
return -ESTALE; return -ESTALE;
@ -1400,7 +1448,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
invalid |= NFS_INO_INVALID_ATIME; invalid |= NFS_INO_INVALID_ATIME;
if (invalid != 0) if (invalid != 0)
nfs_set_cache_invalid(inode, invalid | NFS_INO_REVAL_FORCED); nfs_set_cache_invalid(inode, invalid);
nfsi->read_cache_jiffies = fattr->time_start; nfsi->read_cache_jiffies = fattr->time_start;
return 0; return 0;
@ -1629,7 +1677,8 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr)
nfs_fattr_set_barrier(fattr); nfs_fattr_set_barrier(fattr);
status = nfs_post_op_update_inode_locked(inode, fattr, status = nfs_post_op_update_inode_locked(inode, fattr,
NFS_INO_INVALID_CHANGE NFS_INO_INVALID_CHANGE
| NFS_INO_INVALID_CTIME); | NFS_INO_INVALID_CTIME
| NFS_INO_REVAL_FORCED);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
return status; return status;
@ -1746,6 +1795,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
unsigned long save_cache_validity; unsigned long save_cache_validity;
bool have_writers = nfs_file_has_buffered_writers(nfsi); bool have_writers = nfs_file_has_buffered_writers(nfsi);
bool cache_revalidated = true; bool cache_revalidated = true;
bool attr_changed = false;
bool have_delegation;
dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%x)\n", dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%x)\n",
__func__, inode->i_sb->s_id, inode->i_ino, __func__, inode->i_sb->s_id, inode->i_ino,
@ -1780,6 +1831,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
!IS_AUTOMOUNT(inode)) !IS_AUTOMOUNT(inode))
server->fsid = fattr->fsid; server->fsid = fattr->fsid;
/* Save the delegation state before clearing cache_validity */
have_delegation = nfs_have_delegated_attributes(inode);
/* /*
* Update the read time so we don't revalidate too often. * Update the read time so we don't revalidate too often.
*/ */
@ -1802,12 +1856,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
/* More cache consistency checks */ /* More cache consistency checks */
if (fattr->valid & NFS_ATTR_FATTR_CHANGE) { if (fattr->valid & NFS_ATTR_FATTR_CHANGE) {
if (!inode_eq_iversion_raw(inode, fattr->change_attr)) { if (!inode_eq_iversion_raw(inode, fattr->change_attr)) {
dprintk("NFS: change_attr change on server for file %s/%ld\n",
inode->i_sb->s_id, inode->i_ino);
/* Could it be a race with writeback? */ /* Could it be a race with writeback? */
if (!have_writers) { if (!(have_writers || have_delegation)) {
invalid |= NFS_INO_INVALID_CHANGE invalid |= NFS_INO_INVALID_DATA
| NFS_INO_INVALID_DATA
| NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACCESS
| NFS_INO_INVALID_ACL; | NFS_INO_INVALID_ACL;
/* Force revalidate of all attributes */ /* Force revalidate of all attributes */
@ -1817,8 +1868,12 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
| NFS_INO_INVALID_OTHER; | NFS_INO_INVALID_OTHER;
if (S_ISDIR(inode->i_mode)) if (S_ISDIR(inode->i_mode))
nfs_force_lookup_revalidate(inode); nfs_force_lookup_revalidate(inode);
dprintk("NFS: change_attr change on server for file %s/%ld\n",
inode->i_sb->s_id,
inode->i_ino);
} }
inode_set_iversion_raw(inode, fattr->change_attr); inode_set_iversion_raw(inode, fattr->change_attr);
attr_changed = true;
} }
} else { } else {
nfsi->cache_validity |= save_cache_validity & nfsi->cache_validity |= save_cache_validity &
@ -1850,13 +1905,14 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
if (fattr->valid & NFS_ATTR_FATTR_SIZE) { if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
new_isize = nfs_size_to_loff_t(fattr->size); new_isize = nfs_size_to_loff_t(fattr->size);
cur_isize = i_size_read(inode); cur_isize = i_size_read(inode);
if (new_isize != cur_isize) { if (new_isize != cur_isize && !have_delegation) {
/* Do we perhaps have any outstanding writes, or has /* Do we perhaps have any outstanding writes, or has
* the file grown beyond our last write? */ * the file grown beyond our last write? */
if (!nfs_have_writebacks(inode) || new_isize > cur_isize) { if (!nfs_have_writebacks(inode) || new_isize > cur_isize) {
i_size_write(inode, new_isize); i_size_write(inode, new_isize);
if (!have_writers) if (!have_writers)
invalid |= NFS_INO_INVALID_DATA; invalid |= NFS_INO_INVALID_DATA;
attr_changed = true;
} }
dprintk("NFS: isize change on server for file %s/%ld " dprintk("NFS: isize change on server for file %s/%ld "
"(%Ld to %Ld)\n", "(%Ld to %Ld)\n",
@ -1889,14 +1945,12 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
newmode |= fattr->mode & S_IALLUGO; newmode |= fattr->mode & S_IALLUGO;
inode->i_mode = newmode; inode->i_mode = newmode;
invalid |= NFS_INO_INVALID_ACCESS invalid |= NFS_INO_INVALID_ACCESS
| NFS_INO_INVALID_ACL | NFS_INO_INVALID_ACL;
| NFS_INO_INVALID_OTHER; attr_changed = true;
} }
} else if (server->caps & NFS_CAP_MODE) { } else if (server->caps & NFS_CAP_MODE) {
nfsi->cache_validity |= save_cache_validity & nfsi->cache_validity |= save_cache_validity &
(NFS_INO_INVALID_ACCESS (NFS_INO_INVALID_OTHER
| NFS_INO_INVALID_ACL
| NFS_INO_INVALID_OTHER
| NFS_INO_REVAL_FORCED); | NFS_INO_REVAL_FORCED);
cache_revalidated = false; cache_revalidated = false;
} }
@ -1904,15 +1958,13 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
if (fattr->valid & NFS_ATTR_FATTR_OWNER) { if (fattr->valid & NFS_ATTR_FATTR_OWNER) {
if (!uid_eq(inode->i_uid, fattr->uid)) { if (!uid_eq(inode->i_uid, fattr->uid)) {
invalid |= NFS_INO_INVALID_ACCESS invalid |= NFS_INO_INVALID_ACCESS
| NFS_INO_INVALID_ACL | NFS_INO_INVALID_ACL;
| NFS_INO_INVALID_OTHER;
inode->i_uid = fattr->uid; inode->i_uid = fattr->uid;
attr_changed = true;
} }
} else if (server->caps & NFS_CAP_OWNER) { } else if (server->caps & NFS_CAP_OWNER) {
nfsi->cache_validity |= save_cache_validity & nfsi->cache_validity |= save_cache_validity &
(NFS_INO_INVALID_ACCESS (NFS_INO_INVALID_OTHER
| NFS_INO_INVALID_ACL
| NFS_INO_INVALID_OTHER
| NFS_INO_REVAL_FORCED); | NFS_INO_REVAL_FORCED);
cache_revalidated = false; cache_revalidated = false;
} }
@ -1920,25 +1972,23 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
if (fattr->valid & NFS_ATTR_FATTR_GROUP) { if (fattr->valid & NFS_ATTR_FATTR_GROUP) {
if (!gid_eq(inode->i_gid, fattr->gid)) { if (!gid_eq(inode->i_gid, fattr->gid)) {
invalid |= NFS_INO_INVALID_ACCESS invalid |= NFS_INO_INVALID_ACCESS
| NFS_INO_INVALID_ACL | NFS_INO_INVALID_ACL;
| NFS_INO_INVALID_OTHER;
inode->i_gid = fattr->gid; inode->i_gid = fattr->gid;
attr_changed = true;
} }
} else if (server->caps & NFS_CAP_OWNER_GROUP) { } else if (server->caps & NFS_CAP_OWNER_GROUP) {
nfsi->cache_validity |= save_cache_validity & nfsi->cache_validity |= save_cache_validity &
(NFS_INO_INVALID_ACCESS (NFS_INO_INVALID_OTHER
| NFS_INO_INVALID_ACL
| NFS_INO_INVALID_OTHER
| NFS_INO_REVAL_FORCED); | NFS_INO_REVAL_FORCED);
cache_revalidated = false; cache_revalidated = false;
} }
if (fattr->valid & NFS_ATTR_FATTR_NLINK) { if (fattr->valid & NFS_ATTR_FATTR_NLINK) {
if (inode->i_nlink != fattr->nlink) { if (inode->i_nlink != fattr->nlink) {
invalid |= NFS_INO_INVALID_OTHER;
if (S_ISDIR(inode->i_mode)) if (S_ISDIR(inode->i_mode))
invalid |= NFS_INO_INVALID_DATA; invalid |= NFS_INO_INVALID_DATA;
set_nlink(inode, fattr->nlink); set_nlink(inode, fattr->nlink);
attr_changed = true;
} }
} else if (server->caps & NFS_CAP_NLINK) { } else if (server->caps & NFS_CAP_NLINK) {
nfsi->cache_validity |= save_cache_validity & nfsi->cache_validity |= save_cache_validity &
@ -1958,7 +2008,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
cache_revalidated = false; cache_revalidated = false;
/* Update attrtimeo value if we're out of the unstable period */ /* Update attrtimeo value if we're out of the unstable period */
if (invalid & NFS_INO_INVALID_ATTR) { if (attr_changed) {
invalid &= ~NFS_INO_INVALID_ATTR; invalid &= ~NFS_INO_INVALID_ATTR;
nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
@ -1984,9 +2034,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
|| S_ISLNK(inode->i_mode))) || S_ISLNK(inode->i_mode)))
invalid &= ~NFS_INO_INVALID_DATA; invalid &= ~NFS_INO_INVALID_DATA;
if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ) || nfs_set_cache_invalid(inode, invalid);
(save_cache_validity & NFS_INO_REVAL_FORCED))
nfs_set_cache_invalid(inode, invalid);
return 0; return 0;
out_err: out_err:

View File

@ -101,7 +101,8 @@ nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
*/ */
static int static int
nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fattr *fattr, struct nfs4_label *label) struct nfs_fattr *fattr, struct nfs4_label *label,
struct inode *inode)
{ {
struct rpc_message msg = { struct rpc_message msg = {
.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR], .rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR],
@ -414,7 +415,9 @@ out:
} }
static void static void
nfs3_proc_unlink_setup(struct rpc_message *msg, struct dentry *dentry) nfs3_proc_unlink_setup(struct rpc_message *msg,
struct dentry *dentry,
struct inode *inode)
{ {
msg->rpc_proc = &nfs3_procedures[NFS3PROC_REMOVE]; msg->rpc_proc = &nfs3_procedures[NFS3PROC_REMOVE];
} }
@ -823,7 +826,8 @@ static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
} }
static void nfs3_proc_write_setup(struct nfs_pgio_header *hdr, static void nfs3_proc_write_setup(struct nfs_pgio_header *hdr,
struct rpc_message *msg) struct rpc_message *msg,
struct rpc_clnt **clnt)
{ {
msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE]; msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE];
} }
@ -844,7 +848,8 @@ static int nfs3_commit_done(struct rpc_task *task, struct nfs_commit_data *data)
return 0; return 0;
} }
static void nfs3_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg) static void nfs3_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg,
struct rpc_clnt **clnt)
{ {
msg->rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT]; msg->rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT];
} }

View File

@ -370,6 +370,10 @@ nfs42_layoutstat_done(struct rpc_task *task, void *calldata)
switch (task->tk_status) { switch (task->tk_status) {
case 0: case 0:
break; break;
case -NFS4ERR_BADHANDLE:
case -ESTALE:
pnfs_destroy_layout(NFS_I(inode));
break;
case -NFS4ERR_EXPIRED: case -NFS4ERR_EXPIRED:
case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_DELEG_REVOKED:
@ -462,7 +466,7 @@ int nfs42_proc_layoutstats_generic(struct nfs_server *server,
nfs42_layoutstat_release(data); nfs42_layoutstat_release(data);
return -EAGAIN; return -EAGAIN;
} }
nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0, 0);
task = rpc_run_task(&task_setup); task = rpc_run_task(&task_setup);
if (IS_ERR(task)) if (IS_ERR(task))
return PTR_ERR(task); return PTR_ERR(task);

View File

@ -212,6 +212,31 @@ struct nfs4_state_recovery_ops {
struct rpc_cred *); struct rpc_cred *);
}; };
struct nfs4_opendata {
struct kref kref;
struct nfs_openargs o_arg;
struct nfs_openres o_res;
struct nfs_open_confirmargs c_arg;
struct nfs_open_confirmres c_res;
struct nfs4_string owner_name;
struct nfs4_string group_name;
struct nfs4_label *a_label;
struct nfs_fattr f_attr;
struct nfs4_label *f_label;
struct dentry *dir;
struct dentry *dentry;
struct nfs4_state_owner *owner;
struct nfs4_state *state;
struct iattr attrs;
struct nfs4_layoutget *lgp;
unsigned long timestamp;
bool rpc_done;
bool file_created;
bool is_recover;
bool cancelled;
int rpc_status;
};
struct nfs4_add_xprt_data { struct nfs4_add_xprt_data {
struct nfs_client *clp; struct nfs_client *clp;
struct rpc_cred *cred; struct rpc_cred *cred;
@ -251,7 +276,7 @@ extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception
extern int nfs4_call_sync(struct rpc_clnt *, struct nfs_server *, extern int nfs4_call_sync(struct rpc_clnt *, struct nfs_server *,
struct rpc_message *, struct nfs4_sequence_args *, struct rpc_message *, struct nfs4_sequence_args *,
struct nfs4_sequence_res *, int); struct nfs4_sequence_res *, int);
extern void nfs4_init_sequence(struct nfs4_sequence_args *, struct nfs4_sequence_res *, int); extern void nfs4_init_sequence(struct nfs4_sequence_args *, struct nfs4_sequence_res *, int, int);
extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *); extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *);
extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *); extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *);
extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *, bool); extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *, bool);

View File

@ -343,7 +343,7 @@ static ssize_t nfs_idmap_lookup_name(__u32 id, const char *type, char *buf,
int id_len; int id_len;
ssize_t ret; ssize_t ret;
id_len = snprintf(id_str, sizeof(id_str), "%u", id); id_len = nfs_map_numeric_to_string(id, id_str, sizeof(id_str));
ret = nfs_idmap_get_key(id_str, id_len, type, buf, buflen, idmap); ret = nfs_idmap_get_key(id_str, id_len, type, buf, buflen, idmap);
if (ret < 0) if (ret < 0)
return -EINVAL; return -EINVAL;
@ -627,7 +627,8 @@ static int nfs_idmap_read_and_verify_message(struct idmap_msg *im,
if (strcmp(upcall->im_name, im->im_name) != 0) if (strcmp(upcall->im_name, im->im_name) != 0)
break; break;
/* Note: here we store the NUL terminator too */ /* Note: here we store the NUL terminator too */
len = sprintf(id_str, "%d", im->im_id) + 1; len = 1 + nfs_map_numeric_to_string(im->im_id, id_str,
sizeof(id_str));
ret = nfs_idmap_instantiate(key, authkey, id_str, len); ret = nfs_idmap_instantiate(key, authkey, id_str, len);
break; break;
case IDMAP_CONV_IDTONAME: case IDMAP_CONV_IDTONAME:

View File

@ -71,6 +71,8 @@
#define NFSDBG_FACILITY NFSDBG_PROC #define NFSDBG_FACILITY NFSDBG_PROC
#define NFS4_BITMASK_SZ 3
#define NFS4_POLL_RETRY_MIN (HZ/10) #define NFS4_POLL_RETRY_MIN (HZ/10)
#define NFS4_POLL_RETRY_MAX (15*HZ) #define NFS4_POLL_RETRY_MAX (15*HZ)
@ -86,12 +88,11 @@
| ATTR_MTIME_SET) | ATTR_MTIME_SET)
struct nfs4_opendata; struct nfs4_opendata;
static int _nfs4_proc_open(struct nfs4_opendata *data);
static int _nfs4_recover_proc_open(struct nfs4_opendata *data); static int _nfs4_recover_proc_open(struct nfs4_opendata *data);
static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr); static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr);
static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *label); static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *label, struct inode *inode);
static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label); static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label, struct inode *inode);
static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
struct nfs_fattr *fattr, struct iattr *sattr, struct nfs_fattr *fattr, struct iattr *sattr,
struct nfs_open_context *ctx, struct nfs4_label *ilabel, struct nfs_open_context *ctx, struct nfs4_label *ilabel,
@ -274,6 +275,33 @@ const u32 nfs4_fs_locations_bitmap[3] = {
| FATTR4_WORD1_MOUNTED_ON_FILEID, | FATTR4_WORD1_MOUNTED_ON_FILEID,
}; };
static void nfs4_bitmap_copy_adjust(__u32 *dst, const __u32 *src,
struct inode *inode)
{
unsigned long cache_validity;
memcpy(dst, src, NFS4_BITMASK_SZ*sizeof(*dst));
if (!inode || !nfs4_have_delegation(inode, FMODE_READ))
return;
cache_validity = READ_ONCE(NFS_I(inode)->cache_validity);
if (!(cache_validity & NFS_INO_REVAL_FORCED))
cache_validity &= ~(NFS_INO_INVALID_CHANGE
| NFS_INO_INVALID_SIZE);
if (!(cache_validity & NFS_INO_INVALID_SIZE))
dst[0] &= ~FATTR4_WORD0_SIZE;
if (!(cache_validity & NFS_INO_INVALID_CHANGE))
dst[0] &= ~FATTR4_WORD0_CHANGE;
}
static void nfs4_bitmap_copy_adjust_setattr(__u32 *dst,
const __u32 *src, struct inode *inode)
{
nfs4_bitmap_copy_adjust(dst, src, inode);
}
static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dentry, static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dentry,
struct nfs4_readdir_arg *readdir) struct nfs4_readdir_arg *readdir)
{ {
@ -407,6 +435,11 @@ static int nfs4_do_handle_exception(struct nfs_server *server,
switch(errorcode) { switch(errorcode) {
case 0: case 0:
return 0; return 0;
case -NFS4ERR_BADHANDLE:
case -ESTALE:
if (inode != NULL && S_ISREG(inode->i_mode))
pnfs_destroy_layout(NFS_I(inode));
break;
case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_DELEG_REVOKED:
case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_EXPIRED: case -NFS4ERR_EXPIRED:
@ -608,20 +641,16 @@ struct nfs4_call_sync_data {
}; };
void nfs4_init_sequence(struct nfs4_sequence_args *args, void nfs4_init_sequence(struct nfs4_sequence_args *args,
struct nfs4_sequence_res *res, int cache_reply) struct nfs4_sequence_res *res, int cache_reply,
int privileged)
{ {
args->sa_slot = NULL; args->sa_slot = NULL;
args->sa_cache_this = cache_reply; args->sa_cache_this = cache_reply;
args->sa_privileged = 0; args->sa_privileged = privileged;
res->sr_slot = NULL; res->sr_slot = NULL;
} }
static void nfs4_set_sequence_privileged(struct nfs4_sequence_args *args)
{
args->sa_privileged = 1;
}
static void nfs40_sequence_free_slot(struct nfs4_sequence_res *res) static void nfs40_sequence_free_slot(struct nfs4_sequence_res *res)
{ {
struct nfs4_slot *slot = res->sr_slot; struct nfs4_slot *slot = res->sr_slot;
@ -746,12 +775,19 @@ static int nfs41_sequence_process(struct rpc_task *task,
slot->slot_nr, slot->slot_nr,
slot->seq_nr); slot->seq_nr);
goto out_retry; goto out_retry;
case -NFS4ERR_RETRY_UNCACHED_REP:
case -NFS4ERR_SEQ_FALSE_RETRY:
/*
* The server thinks we tried to replay a request.
* Retry the call after bumping the sequence ID.
*/
goto retry_new_seq;
case -NFS4ERR_BADSLOT: case -NFS4ERR_BADSLOT:
/* /*
* The slot id we used was probably retired. Try again * The slot id we used was probably retired. Try again
* using a different slot id. * using a different slot id.
*/ */
if (slot->seq_nr < slot->table->target_highest_slotid) if (slot->slot_nr < slot->table->target_highest_slotid)
goto session_recover; goto session_recover;
goto retry_nowait; goto retry_nowait;
case -NFS4ERR_SEQ_MISORDERED: case -NFS4ERR_SEQ_MISORDERED:
@ -770,10 +806,6 @@ static int nfs41_sequence_process(struct rpc_task *task,
goto retry_nowait; goto retry_nowait;
} }
goto session_recover; goto session_recover;
case -NFS4ERR_SEQ_FALSE_RETRY:
if (interrupted)
goto retry_new_seq;
goto session_recover;
default: default:
/* Just update the slot sequence no. */ /* Just update the slot sequence no. */
slot->seq_done = 1; slot->seq_done = 1;
@ -1035,7 +1067,7 @@ int nfs4_call_sync(struct rpc_clnt *clnt,
struct nfs4_sequence_res *res, struct nfs4_sequence_res *res,
int cache_reply) int cache_reply)
{ {
nfs4_init_sequence(args, res, cache_reply); nfs4_init_sequence(args, res, cache_reply, 0);
return nfs4_call_sync_sequence(clnt, server, msg, args, res); return nfs4_call_sync_sequence(clnt, server, msg, args, res);
} }
@ -1064,30 +1096,6 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo,
spin_unlock(&dir->i_lock); spin_unlock(&dir->i_lock);
} }
struct nfs4_opendata {
struct kref kref;
struct nfs_openargs o_arg;
struct nfs_openres o_res;
struct nfs_open_confirmargs c_arg;
struct nfs_open_confirmres c_res;
struct nfs4_string owner_name;
struct nfs4_string group_name;
struct nfs4_label *a_label;
struct nfs_fattr f_attr;
struct nfs4_label *f_label;
struct dentry *dir;
struct dentry *dentry;
struct nfs4_state_owner *owner;
struct nfs4_state *state;
struct iattr attrs;
unsigned long timestamp;
bool rpc_done;
bool file_created;
bool is_recover;
bool cancelled;
int rpc_status;
};
struct nfs4_open_createattrs { struct nfs4_open_createattrs {
struct nfs4_label *label; struct nfs4_label *label;
struct iattr *sattr; struct iattr *sattr;
@ -1268,6 +1276,7 @@ static void nfs4_opendata_free(struct kref *kref)
struct nfs4_opendata, kref); struct nfs4_opendata, kref);
struct super_block *sb = p->dentry->d_sb; struct super_block *sb = p->dentry->d_sb;
nfs4_lgopen_release(p->lgp);
nfs_free_seqid(p->o_arg.seqid); nfs_free_seqid(p->o_arg.seqid);
nfs4_sequence_free_slot(&p->o_res.seq_res); nfs4_sequence_free_slot(&p->o_res.seq_res);
if (p->state != NULL) if (p->state != NULL)
@ -2187,13 +2196,12 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
}; };
int status; int status;
nfs4_init_sequence(&data->c_arg.seq_args, &data->c_res.seq_res, 1); nfs4_init_sequence(&data->c_arg.seq_args, &data->c_res.seq_res, 1,
data->is_recover);
kref_get(&data->kref); kref_get(&data->kref);
data->rpc_done = false; data->rpc_done = false;
data->rpc_status = 0; data->rpc_status = 0;
data->timestamp = jiffies; data->timestamp = jiffies;
if (data->is_recover)
nfs4_set_sequence_privileged(&data->c_arg.seq_args);
task = rpc_run_task(&task_setup_data); task = rpc_run_task(&task_setup_data);
if (IS_ERR(task)) if (IS_ERR(task))
return PTR_ERR(task); return PTR_ERR(task);
@ -2327,7 +2335,8 @@ static const struct rpc_call_ops nfs4_open_ops = {
.rpc_release = nfs4_open_release, .rpc_release = nfs4_open_release,
}; };
static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover) static int nfs4_run_open_task(struct nfs4_opendata *data,
struct nfs_open_context *ctx)
{ {
struct inode *dir = d_inode(data->dir); struct inode *dir = d_inode(data->dir);
struct nfs_server *server = NFS_SERVER(dir); struct nfs_server *server = NFS_SERVER(dir);
@ -2350,15 +2359,17 @@ static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover)
}; };
int status; int status;
nfs4_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1);
kref_get(&data->kref); kref_get(&data->kref);
data->rpc_done = false; data->rpc_done = false;
data->rpc_status = 0; data->rpc_status = 0;
data->cancelled = false; data->cancelled = false;
data->is_recover = false; data->is_recover = false;
if (isrecover) { if (!ctx) {
nfs4_set_sequence_privileged(&o_arg->seq_args); nfs4_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1, 1);
data->is_recover = true; data->is_recover = true;
} else {
nfs4_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1, 0);
pnfs_lgopen_prepare(data, ctx);
} }
task = rpc_run_task(&task_setup_data); task = rpc_run_task(&task_setup_data);
if (IS_ERR(task)) if (IS_ERR(task))
@ -2380,7 +2391,7 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data)
struct nfs_openres *o_res = &data->o_res; struct nfs_openres *o_res = &data->o_res;
int status; int status;
status = nfs4_run_open_task(data, 1); status = nfs4_run_open_task(data, NULL);
if (status != 0 || !data->rpc_done) if (status != 0 || !data->rpc_done)
return status; return status;
@ -2441,7 +2452,8 @@ static int nfs4_opendata_access(struct rpc_cred *cred,
/* /*
* Note: On error, nfs4_proc_open will free the struct nfs4_opendata * Note: On error, nfs4_proc_open will free the struct nfs4_opendata
*/ */
static int _nfs4_proc_open(struct nfs4_opendata *data) static int _nfs4_proc_open(struct nfs4_opendata *data,
struct nfs_open_context *ctx)
{ {
struct inode *dir = d_inode(data->dir); struct inode *dir = d_inode(data->dir);
struct nfs_server *server = NFS_SERVER(dir); struct nfs_server *server = NFS_SERVER(dir);
@ -2449,7 +2461,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
struct nfs_openres *o_res = &data->o_res; struct nfs_openres *o_res = &data->o_res;
int status; int status;
status = nfs4_run_open_task(data, 0); status = nfs4_run_open_task(data, ctx);
if (!data->rpc_done) if (!data->rpc_done)
return status; return status;
if (status != 0) { if (status != 0) {
@ -2480,7 +2492,8 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
} }
if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) { if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) {
nfs4_sequence_free_slot(&o_res->seq_res); nfs4_sequence_free_slot(&o_res->seq_res);
nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr, o_res->f_label); nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr,
o_res->f_label, NULL);
} }
return 0; return 0;
} }
@ -2800,11 +2813,11 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
ret = _nfs4_proc_open(opendata); ret = _nfs4_proc_open(opendata, ctx);
if (ret != 0) if (ret != 0)
goto out; goto out;
state = nfs4_opendata_to_nfs4_state(opendata); state = _nfs4_opendata_to_nfs4_state(opendata);
ret = PTR_ERR(state); ret = PTR_ERR(state);
if (IS_ERR(state)) if (IS_ERR(state))
goto out; goto out;
@ -2838,8 +2851,12 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
nfs_inode_attach_open_context(ctx); nfs_inode_attach_open_context(ctx);
if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
nfs4_schedule_stateid_recovery(server, state); nfs4_schedule_stateid_recovery(server, state);
else
pnfs_parse_lgopen(state->inode, opendata->lgp, ctx);
} }
out: out:
nfs4_sequence_free_slot(&opendata->o_res.seq_res);
return ret; return ret;
} }
@ -3039,7 +3056,6 @@ static int _nfs4_do_setattr(struct inode *inode,
}; };
struct rpc_cred *delegation_cred = NULL; struct rpc_cred *delegation_cred = NULL;
unsigned long timestamp = jiffies; unsigned long timestamp = jiffies;
fmode_t fmode;
bool truncate; bool truncate;
int status; int status;
@ -3047,11 +3063,12 @@ static int _nfs4_do_setattr(struct inode *inode,
/* Servers should only apply open mode checks for file size changes */ /* Servers should only apply open mode checks for file size changes */
truncate = (arg->iap->ia_valid & ATTR_SIZE) ? true : false; truncate = (arg->iap->ia_valid & ATTR_SIZE) ? true : false;
fmode = truncate ? FMODE_WRITE : FMODE_READ; if (!truncate)
goto zero_stateid;
if (nfs4_copy_delegation_stateid(inode, fmode, &arg->stateid, &delegation_cred)) { if (nfs4_copy_delegation_stateid(inode, FMODE_WRITE, &arg->stateid, &delegation_cred)) {
/* Use that stateid */ /* Use that stateid */
} else if (truncate && ctx != NULL) { } else if (ctx != NULL) {
struct nfs_lock_context *l_ctx; struct nfs_lock_context *l_ctx;
if (!nfs4_valid_open_stateid(ctx->state)) if (!nfs4_valid_open_stateid(ctx->state))
return -EBADF; return -EBADF;
@ -3063,8 +3080,10 @@ static int _nfs4_do_setattr(struct inode *inode,
nfs_put_lock_context(l_ctx); nfs_put_lock_context(l_ctx);
if (status == -EIO) if (status == -EIO)
return -EBADF; return -EBADF;
} else } else {
zero_stateid:
nfs4_stateid_copy(&arg->stateid, &zero_stateid); nfs4_stateid_copy(&arg->stateid, &zero_stateid);
}
if (delegation_cred) if (delegation_cred)
msg.rpc_cred = delegation_cred; msg.rpc_cred = delegation_cred;
@ -3083,12 +3102,13 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
struct nfs4_label *olabel) struct nfs4_label *olabel)
{ {
struct nfs_server *server = NFS_SERVER(inode); struct nfs_server *server = NFS_SERVER(inode);
__u32 bitmask[NFS4_BITMASK_SZ];
struct nfs4_state *state = ctx ? ctx->state : NULL; struct nfs4_state *state = ctx ? ctx->state : NULL;
struct nfs_setattrargs arg = { struct nfs_setattrargs arg = {
.fh = NFS_FH(inode), .fh = NFS_FH(inode),
.iap = sattr, .iap = sattr,
.server = server, .server = server,
.bitmask = server->attr_bitmask, .bitmask = bitmask,
.label = ilabel, .label = ilabel,
}; };
struct nfs_setattrres res = { struct nfs_setattrres res = {
@ -3103,11 +3123,11 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
}; };
int err; int err;
arg.bitmask = nfs4_bitmask(server, ilabel);
if (ilabel)
arg.bitmask = nfs4_bitmask(server, olabel);
do { do {
nfs4_bitmap_copy_adjust_setattr(bitmask,
nfs4_bitmask(server, olabel),
inode);
err = _nfs4_do_setattr(inode, &arg, &res, cred, ctx); err = _nfs4_do_setattr(inode, &arg, &res, cred, ctx);
switch (err) { switch (err) {
case -NFS4ERR_OPENMODE: case -NFS4ERR_OPENMODE:
@ -3393,7 +3413,7 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait)
calldata = kzalloc(sizeof(*calldata), gfp_mask); calldata = kzalloc(sizeof(*calldata), gfp_mask);
if (calldata == NULL) if (calldata == NULL)
goto out; goto out;
nfs4_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 1); nfs4_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 1, 0);
calldata->inode = state->inode; calldata->inode = state->inode;
calldata->state = state; calldata->state = state;
calldata->arg.fh = NFS_FH(state->inode); calldata->arg.fh = NFS_FH(state->inode);
@ -3742,7 +3762,7 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *mntfh,
if (IS_ERR(label)) if (IS_ERR(label))
return PTR_ERR(label); return PTR_ERR(label);
error = nfs4_proc_getattr(server, mntfh, fattr, label); error = nfs4_proc_getattr(server, mntfh, fattr, label, NULL);
if (error < 0) { if (error < 0) {
dprintk("nfs4_get_root: getattr error = %d\n", -error); dprintk("nfs4_get_root: getattr error = %d\n", -error);
goto err_free_label; goto err_free_label;
@ -3807,11 +3827,13 @@ out:
} }
static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fattr *fattr, struct nfs4_label *label) struct nfs_fattr *fattr, struct nfs4_label *label,
struct inode *inode)
{ {
__u32 bitmask[NFS4_BITMASK_SZ];
struct nfs4_getattr_arg args = { struct nfs4_getattr_arg args = {
.fh = fhandle, .fh = fhandle,
.bitmask = server->attr_bitmask, .bitmask = bitmask,
}; };
struct nfs4_getattr_res res = { struct nfs4_getattr_res res = {
.fattr = fattr, .fattr = fattr,
@ -3824,19 +3846,20 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
.rpc_resp = &res, .rpc_resp = &res,
}; };
args.bitmask = nfs4_bitmask(server, label); nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, label), inode);
nfs_fattr_init(fattr); nfs_fattr_init(fattr);
return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
} }
static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fattr *fattr, struct nfs4_label *label) struct nfs_fattr *fattr, struct nfs4_label *label,
struct inode *inode)
{ {
struct nfs4_exception exception = { }; struct nfs4_exception exception = { };
int err; int err;
do { do {
err = _nfs4_proc_getattr(server, fhandle, fattr, label); err = _nfs4_proc_getattr(server, fhandle, fattr, label, inode);
trace_nfs4_getattr(server, fhandle, fattr, err); trace_nfs4_getattr(server, fhandle, fattr, err);
err = nfs4_handle_exception(server, err, err = nfs4_handle_exception(server, err,
&exception); &exception);
@ -4089,7 +4112,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry
}; };
int status = 0; int status = 0;
if (!nfs_have_delegated_attributes(inode)) { if (!nfs4_have_delegation(inode, FMODE_READ)) {
res.fattr = nfs_alloc_fattr(); res.fattr = nfs_alloc_fattr();
if (res.fattr == NULL) if (res.fattr == NULL)
return -ENOMEM; return -ENOMEM;
@ -4265,15 +4288,16 @@ static int nfs4_proc_rmdir(struct inode *dir, const struct qstr *name)
return err; return err;
} }
static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct dentry *dentry) static void nfs4_proc_unlink_setup(struct rpc_message *msg,
struct dentry *dentry,
struct inode *inode)
{ {
struct nfs_removeargs *args = msg->rpc_argp; struct nfs_removeargs *args = msg->rpc_argp;
struct nfs_removeres *res = msg->rpc_resp; struct nfs_removeres *res = msg->rpc_resp;
struct inode *inode = d_inode(dentry);
res->server = NFS_SB(dentry->d_sb); res->server = NFS_SB(dentry->d_sb);
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE]; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE];
nfs4_init_sequence(&args->seq_args, &res->seq_res, 1); nfs4_init_sequence(&args->seq_args, &res->seq_res, 1, 0);
nfs_fattr_init(res->dir_attr); nfs_fattr_init(res->dir_attr);
@ -4319,7 +4343,7 @@ static void nfs4_proc_rename_setup(struct rpc_message *msg,
nfs4_inode_return_delegation(new_inode); nfs4_inode_return_delegation(new_inode);
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME]; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME];
res->server = NFS_SB(old_dentry->d_sb); res->server = NFS_SB(old_dentry->d_sb);
nfs4_init_sequence(&arg->seq_args, &res->seq_res, 1); nfs4_init_sequence(&arg->seq_args, &res->seq_res, 1, 0);
} }
static void nfs4_proc_rename_rpc_prepare(struct rpc_task *task, struct nfs_renamedata *data) static void nfs4_proc_rename_rpc_prepare(struct rpc_task *task, struct nfs_renamedata *data)
@ -4352,11 +4376,12 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
static int _nfs4_proc_link(struct inode *inode, struct inode *dir, const struct qstr *name) static int _nfs4_proc_link(struct inode *inode, struct inode *dir, const struct qstr *name)
{ {
struct nfs_server *server = NFS_SERVER(inode); struct nfs_server *server = NFS_SERVER(inode);
__u32 bitmask[NFS4_BITMASK_SZ];
struct nfs4_link_arg arg = { struct nfs4_link_arg arg = {
.fh = NFS_FH(inode), .fh = NFS_FH(inode),
.dir_fh = NFS_FH(dir), .dir_fh = NFS_FH(dir),
.name = name, .name = name,
.bitmask = server->attr_bitmask, .bitmask = bitmask,
}; };
struct nfs4_link_res res = { struct nfs4_link_res res = {
.server = server, .server = server,
@ -4378,9 +4403,9 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, const struct
status = PTR_ERR(res.label); status = PTR_ERR(res.label);
goto out; goto out;
} }
arg.bitmask = nfs4_bitmask(server, res.label);
nfs4_inode_make_writeable(inode); nfs4_inode_make_writeable(inode);
nfs4_bitmap_copy_adjust_setattr(bitmask, nfs4_bitmask(server, res.label), inode);
status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
if (!status) { if (!status) {
@ -4895,7 +4920,7 @@ static void nfs4_proc_read_setup(struct nfs_pgio_header *hdr,
if (!hdr->pgio_done_cb) if (!hdr->pgio_done_cb)
hdr->pgio_done_cb = nfs4_read_done_cb; hdr->pgio_done_cb = nfs4_read_done_cb;
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0); nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0, 0);
} }
static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task, static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task,
@ -4979,7 +5004,8 @@ bool nfs4_write_need_cache_consistency_data(struct nfs_pgio_header *hdr)
} }
static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr, static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr,
struct rpc_message *msg) struct rpc_message *msg,
struct rpc_clnt **clnt)
{ {
struct nfs_server *server = NFS_SERVER(hdr->inode); struct nfs_server *server = NFS_SERVER(hdr->inode);
@ -4995,7 +5021,8 @@ static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr,
hdr->timestamp = jiffies; hdr->timestamp = jiffies;
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE]; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE];
nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 1); nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 1, 0);
nfs4_state_protect_write(server->nfs_client, clnt, msg, hdr);
} }
static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
@ -5026,7 +5053,8 @@ static int nfs4_commit_done(struct rpc_task *task, struct nfs_commit_data *data)
return data->commit_done_cb(task, data); return data->commit_done_cb(task, data);
} }
static void nfs4_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg) static void nfs4_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg,
struct rpc_clnt **clnt)
{ {
struct nfs_server *server = NFS_SERVER(data->inode); struct nfs_server *server = NFS_SERVER(data->inode);
@ -5034,7 +5062,8 @@ static void nfs4_proc_commit_setup(struct nfs_commit_data *data, struct rpc_mess
data->commit_done_cb = nfs4_commit_done_cb; data->commit_done_cb = nfs4_commit_done_cb;
data->res.server = server; data->res.server = server;
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT]; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];
nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1, 0);
nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_COMMIT, clnt, msg);
} }
struct nfs4_renewdata { struct nfs4_renewdata {
@ -5391,7 +5420,8 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
*/ */
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
NFS_I(inode)->cache_validity |= NFS_INO_INVALID_CHANGE NFS_I(inode)->cache_validity |= NFS_INO_INVALID_CHANGE
| NFS_INO_INVALID_CTIME; | NFS_INO_INVALID_CTIME
| NFS_INO_REVAL_FORCED;
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
nfs_access_zap_cache(inode); nfs_access_zap_cache(inode);
nfs_zap_acl_cache(inode); nfs_zap_acl_cache(inode);
@ -5591,13 +5621,14 @@ nfs4_init_nonuniform_client_string(struct nfs_client *clp)
return 0; return 0;
rcu_read_lock(); rcu_read_lock();
len = 14 + strlen(clp->cl_ipaddr) + 1 + len = 14 +
strlen(rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)) + strlen(clp->cl_rpcclient->cl_nodename) +
1 + 1 +
strlen(rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_PROTO)) + strlen(rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)) +
1; 1;
rcu_read_unlock(); rcu_read_unlock();
if (nfs4_client_id_uniquifier[0] != '\0')
len += strlen(nfs4_client_id_uniquifier) + 1;
if (len > NFS4_OPAQUE_LIMIT + 1) if (len > NFS4_OPAQUE_LIMIT + 1)
return -EINVAL; return -EINVAL;
@ -5611,10 +5642,17 @@ nfs4_init_nonuniform_client_string(struct nfs_client *clp)
return -ENOMEM; return -ENOMEM;
rcu_read_lock(); rcu_read_lock();
scnprintf(str, len, "Linux NFSv4.0 %s/%s %s", if (nfs4_client_id_uniquifier[0] != '\0')
clp->cl_ipaddr, scnprintf(str, len, "Linux NFSv4.0 %s/%s/%s",
rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR), clp->cl_rpcclient->cl_nodename,
rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_PROTO)); nfs4_client_id_uniquifier,
rpc_peeraddr2str(clp->cl_rpcclient,
RPC_DISPLAY_ADDR));
else
scnprintf(str, len, "Linux NFSv4.0 %s/%s",
clp->cl_rpcclient->cl_nodename,
rpc_peeraddr2str(clp->cl_rpcclient,
RPC_DISPLAY_ADDR));
rcu_read_unlock(); rcu_read_unlock();
clp->cl_owner_id = str; clp->cl_owner_id = str;
@ -5972,7 +6010,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
data = kzalloc(sizeof(*data), GFP_NOFS); data = kzalloc(sizeof(*data), GFP_NOFS);
if (data == NULL) if (data == NULL)
return -ENOMEM; return -ENOMEM;
nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1, 0);
nfs4_state_protect(server->nfs_client, nfs4_state_protect(server->nfs_client,
NFS_SP4_MACH_CRED_CLEANUP, NFS_SP4_MACH_CRED_CLEANUP,
@ -6247,7 +6285,7 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
} }
nfs4_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1); nfs4_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1, 0);
msg.rpc_argp = &data->arg; msg.rpc_argp = &data->arg;
msg.rpc_resp = &data->res; msg.rpc_resp = &data->res;
task_setup_data.callback_data = data; task_setup_data.callback_data = data;
@ -6411,32 +6449,36 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
case 0: case 0:
renew_lease(NFS_SERVER(d_inode(data->ctx->dentry)), renew_lease(NFS_SERVER(d_inode(data->ctx->dentry)),
data->timestamp); data->timestamp);
if (data->arg.new_lock) { if (data->arg.new_lock && !data->cancelled) {
data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS); data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS);
if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0) { if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0)
rpc_restart_call_prepare(task);
break; break;
}
} }
if (data->arg.new_lock_owner != 0) { if (data->arg.new_lock_owner != 0) {
nfs_confirm_seqid(&lsp->ls_seqid, 0); nfs_confirm_seqid(&lsp->ls_seqid, 0);
nfs4_stateid_copy(&lsp->ls_stateid, &data->res.stateid); nfs4_stateid_copy(&lsp->ls_stateid, &data->res.stateid);
set_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags); set_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags);
} else if (!nfs4_update_lock_stateid(lsp, &data->res.stateid)) goto out_done;
rpc_restart_call_prepare(task); } else if (nfs4_update_lock_stateid(lsp, &data->res.stateid))
goto out_done;
break; break;
case -NFS4ERR_BAD_STATEID: case -NFS4ERR_BAD_STATEID:
case -NFS4ERR_OLD_STATEID: case -NFS4ERR_OLD_STATEID:
case -NFS4ERR_STALE_STATEID: case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_EXPIRED: case -NFS4ERR_EXPIRED:
if (data->arg.new_lock_owner != 0) { if (data->arg.new_lock_owner != 0) {
if (!nfs4_stateid_match(&data->arg.open_stateid, if (nfs4_stateid_match(&data->arg.open_stateid,
&lsp->ls_state->open_stateid)) &lsp->ls_state->open_stateid))
rpc_restart_call_prepare(task); goto out_done;
} else if (!nfs4_stateid_match(&data->arg.lock_stateid, } else if (nfs4_stateid_match(&data->arg.lock_stateid,
&lsp->ls_stateid)) &lsp->ls_stateid))
rpc_restart_call_prepare(task); goto out_done;
} }
if (!data->cancelled)
rpc_restart_call_prepare(task);
out_done:
dprintk("%s: done, ret = %d!\n", __func__, data->rpc_status); dprintk("%s: done, ret = %d!\n", __func__, data->rpc_status);
} }
@ -6509,14 +6551,14 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
return -ENOMEM; return -ENOMEM;
if (IS_SETLKW(cmd)) if (IS_SETLKW(cmd))
data->arg.block = 1; data->arg.block = 1;
nfs4_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1); nfs4_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1,
recovery_type > NFS_LOCK_NEW);
msg.rpc_argp = &data->arg; msg.rpc_argp = &data->arg;
msg.rpc_resp = &data->res; msg.rpc_resp = &data->res;
task_setup_data.callback_data = data; task_setup_data.callback_data = data;
if (recovery_type > NFS_LOCK_NEW) { if (recovery_type > NFS_LOCK_NEW) {
if (recovery_type == NFS_LOCK_RECLAIM) if (recovery_type == NFS_LOCK_RECLAIM)
data->arg.reclaim = NFS_LOCK_RECLAIM; data->arg.reclaim = NFS_LOCK_RECLAIM;
nfs4_set_sequence_privileged(&data->arg.seq_args);
} else } else
data->arg.new_lock = 1; data->arg.new_lock = 1;
task = rpc_run_task(&task_setup_data); task = rpc_run_task(&task_setup_data);
@ -6911,7 +6953,7 @@ nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_state *lsp)
msg.rpc_argp = &data->args; msg.rpc_argp = &data->args;
msg.rpc_resp = &data->res; msg.rpc_resp = &data->res;
nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0, 0);
rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, data); rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, data);
} }
@ -7107,8 +7149,7 @@ static int _nfs40_proc_get_locations(struct inode *inode,
locations->server = server; locations->server = server;
locations->nlocations = 0; locations->nlocations = 0;
nfs4_init_sequence(&args.seq_args, &res.seq_res, 0); nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 1);
nfs4_set_sequence_privileged(&args.seq_args);
status = nfs4_call_sync_sequence(clnt, server, &msg, status = nfs4_call_sync_sequence(clnt, server, &msg,
&args.seq_args, &res.seq_res); &args.seq_args, &res.seq_res);
if (status) if (status)
@ -7161,8 +7202,7 @@ static int _nfs41_proc_get_locations(struct inode *inode,
locations->server = server; locations->server = server;
locations->nlocations = 0; locations->nlocations = 0;
nfs4_init_sequence(&args.seq_args, &res.seq_res, 0); nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 1);
nfs4_set_sequence_privileged(&args.seq_args);
status = nfs4_call_sync_sequence(clnt, server, &msg, status = nfs4_call_sync_sequence(clnt, server, &msg,
&args.seq_args, &res.seq_res); &args.seq_args, &res.seq_res);
if (status == NFS4_OK && if (status == NFS4_OK &&
@ -7249,8 +7289,7 @@ static int _nfs40_proc_fsid_present(struct inode *inode, struct rpc_cred *cred)
if (res.fh == NULL) if (res.fh == NULL)
return -ENOMEM; return -ENOMEM;
nfs4_init_sequence(&args.seq_args, &res.seq_res, 0); nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 1);
nfs4_set_sequence_privileged(&args.seq_args);
status = nfs4_call_sync_sequence(clnt, server, &msg, status = nfs4_call_sync_sequence(clnt, server, &msg,
&args.seq_args, &res.seq_res); &args.seq_args, &res.seq_res);
nfs_free_fhandle(res.fh); nfs_free_fhandle(res.fh);
@ -7291,8 +7330,7 @@ static int _nfs41_proc_fsid_present(struct inode *inode, struct rpc_cred *cred)
if (res.fh == NULL) if (res.fh == NULL)
return -ENOMEM; return -ENOMEM;
nfs4_init_sequence(&args.seq_args, &res.seq_res, 0); nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 1);
nfs4_set_sequence_privileged(&args.seq_args);
status = nfs4_call_sync_sequence(clnt, server, &msg, status = nfs4_call_sync_sequence(clnt, server, &msg,
&args.seq_args, &res.seq_res); &args.seq_args, &res.seq_res);
nfs_free_fhandle(res.fh); nfs_free_fhandle(res.fh);
@ -8070,8 +8108,7 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo)
}; };
int status; int status;
nfs4_init_sequence(&args.la_seq_args, &res.lr_seq_res, 0); nfs4_init_sequence(&args.la_seq_args, &res.lr_seq_res, 0, 1);
nfs4_set_sequence_privileged(&args.la_seq_args);
task = rpc_run_task(&task_setup); task = rpc_run_task(&task_setup);
if (IS_ERR(task)) if (IS_ERR(task))
@ -8408,10 +8445,8 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
calldata = kzalloc(sizeof(*calldata), GFP_NOFS); calldata = kzalloc(sizeof(*calldata), GFP_NOFS);
if (calldata == NULL) if (calldata == NULL)
goto out_put_clp; goto out_put_clp;
nfs4_init_sequence(&calldata->args, &calldata->res, 0); nfs4_init_sequence(&calldata->args, &calldata->res, 0, is_privileged);
nfs4_sequence_attach_slot(&calldata->args, &calldata->res, slot); nfs4_sequence_attach_slot(&calldata->args, &calldata->res, slot);
if (is_privileged)
nfs4_set_sequence_privileged(&calldata->args);
msg.rpc_argp = &calldata->args; msg.rpc_argp = &calldata->args;
msg.rpc_resp = &calldata->res; msg.rpc_resp = &calldata->res;
calldata->clp = clp; calldata->clp = clp;
@ -8563,8 +8598,7 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp,
calldata->clp = clp; calldata->clp = clp;
calldata->arg.one_fs = 0; calldata->arg.one_fs = 0;
nfs4_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 0); nfs4_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 0, 1);
nfs4_set_sequence_privileged(&calldata->arg.seq_args);
msg.rpc_argp = &calldata->arg; msg.rpc_argp = &calldata->arg;
msg.rpc_resp = &calldata->res; msg.rpc_resp = &calldata->res;
task_setup_data.callback_data = calldata; task_setup_data.callback_data = calldata;
@ -8693,63 +8727,19 @@ out:
return status; return status;
} }
static size_t max_response_pages(struct nfs_server *server) size_t max_response_pages(struct nfs_server *server)
{ {
u32 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; u32 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
return nfs_page_array_len(0, max_resp_sz); return nfs_page_array_len(0, max_resp_sz);
} }
static void nfs4_free_pages(struct page **pages, size_t size)
{
int i;
if (!pages)
return;
for (i = 0; i < size; i++) {
if (!pages[i])
break;
__free_page(pages[i]);
}
kfree(pages);
}
static struct page **nfs4_alloc_pages(size_t size, gfp_t gfp_flags)
{
struct page **pages;
int i;
pages = kcalloc(size, sizeof(struct page *), gfp_flags);
if (!pages) {
dprintk("%s: can't alloc array of %zu pages\n", __func__, size);
return NULL;
}
for (i = 0; i < size; i++) {
pages[i] = alloc_page(gfp_flags);
if (!pages[i]) {
dprintk("%s: failed to allocate page\n", __func__);
nfs4_free_pages(pages, size);
return NULL;
}
}
return pages;
}
static void nfs4_layoutget_release(void *calldata) static void nfs4_layoutget_release(void *calldata)
{ {
struct nfs4_layoutget *lgp = calldata; struct nfs4_layoutget *lgp = calldata;
struct inode *inode = lgp->args.inode;
struct nfs_server *server = NFS_SERVER(inode);
size_t max_pages = max_response_pages(server);
dprintk("--> %s\n", __func__); dprintk("--> %s\n", __func__);
nfs4_sequence_free_slot(&lgp->res.seq_res); nfs4_sequence_free_slot(&lgp->res.seq_res);
nfs4_free_pages(lgp->args.layout.pages, max_pages); pnfs_layoutget_free(lgp);
pnfs_put_layout_hdr(NFS_I(inode)->layout);
put_nfs_open_context(lgp->args.ctx);
kfree(calldata);
dprintk("<-- %s\n", __func__); dprintk("<-- %s\n", __func__);
} }
@ -8760,11 +8750,10 @@ static const struct rpc_call_ops nfs4_layoutget_call_ops = {
}; };
struct pnfs_layout_segment * struct pnfs_layout_segment *
nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags) nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout)
{ {
struct inode *inode = lgp->args.inode; struct inode *inode = lgp->args.inode;
struct nfs_server *server = NFS_SERVER(inode); struct nfs_server *server = NFS_SERVER(inode);
size_t max_pages = max_response_pages(server);
struct rpc_task *task; struct rpc_task *task;
struct rpc_message msg = { struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET], .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET],
@ -8791,16 +8780,7 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags)
/* nfs4_layoutget_release calls pnfs_put_layout_hdr */ /* nfs4_layoutget_release calls pnfs_put_layout_hdr */
pnfs_get_layout_hdr(NFS_I(inode)->layout); pnfs_get_layout_hdr(NFS_I(inode)->layout);
lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags); nfs4_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0, 0);
if (!lgp->args.layout.pages) {
nfs4_layoutget_release(lgp);
return ERR_PTR(-ENOMEM);
}
lgp->args.layout.pglen = max_pages * PAGE_SIZE;
lgp->res.layoutp = &lgp->args.layout;
lgp->res.seq_res.sr_slot = NULL;
nfs4_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0);
task = rpc_run_task(&task_setup_data); task = rpc_run_task(&task_setup_data);
if (IS_ERR(task)) if (IS_ERR(task))
@ -8927,7 +8907,7 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync)
} }
task_setup_data.flags |= RPC_TASK_ASYNC; task_setup_data.flags |= RPC_TASK_ASYNC;
} }
nfs4_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1); nfs4_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1, 0);
task = rpc_run_task(&task_setup_data); task = rpc_run_task(&task_setup_data);
if (IS_ERR(task)) if (IS_ERR(task))
return PTR_ERR(task); return PTR_ERR(task);
@ -9074,7 +9054,7 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync)
} }
task_setup_data.flags = RPC_TASK_ASYNC; task_setup_data.flags = RPC_TASK_ASYNC;
} }
nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1, 0);
task = rpc_run_task(&task_setup_data); task = rpc_run_task(&task_setup_data);
if (IS_ERR(task)) if (IS_ERR(task))
return PTR_ERR(task); return PTR_ERR(task);
@ -9254,8 +9234,7 @@ static int _nfs41_test_stateid(struct nfs_server *server,
&rpc_client, &msg); &rpc_client, &msg);
dprintk("NFS call test_stateid %p\n", stateid); dprintk("NFS call test_stateid %p\n", stateid);
nfs4_init_sequence(&args.seq_args, &res.seq_res, 0); nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 1);
nfs4_set_sequence_privileged(&args.seq_args);
status = nfs4_call_sync_sequence(rpc_client, server, &msg, status = nfs4_call_sync_sequence(rpc_client, server, &msg,
&args.seq_args, &res.seq_res); &args.seq_args, &res.seq_res);
if (status != NFS_OK) { if (status != NFS_OK) {
@ -9347,7 +9326,17 @@ static const struct rpc_call_ops nfs41_free_stateid_ops = {
.rpc_release = nfs41_free_stateid_release, .rpc_release = nfs41_free_stateid_release,
}; };
static struct rpc_task *_nfs41_free_stateid(struct nfs_server *server, /**
* nfs41_free_stateid - perform a FREE_STATEID operation
*
* @server: server / transport on which to perform the operation
* @stateid: state ID to release
* @cred: credential
* @is_recovery: set to true if this call needs to be privileged
*
* Note: this function is always asynchronous.
*/
static int nfs41_free_stateid(struct nfs_server *server,
const nfs4_stateid *stateid, const nfs4_stateid *stateid,
struct rpc_cred *cred, struct rpc_cred *cred,
bool privileged) bool privileged)
@ -9363,6 +9352,7 @@ static struct rpc_task *_nfs41_free_stateid(struct nfs_server *server,
.flags = RPC_TASK_ASYNC, .flags = RPC_TASK_ASYNC,
}; };
struct nfs_free_stateid_data *data; struct nfs_free_stateid_data *data;
struct rpc_task *task;
nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_STATEID, nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_STATEID,
&task_setup.rpc_client, &msg); &task_setup.rpc_client, &msg);
@ -9370,7 +9360,7 @@ static struct rpc_task *_nfs41_free_stateid(struct nfs_server *server,
dprintk("NFS call free_stateid %p\n", stateid); dprintk("NFS call free_stateid %p\n", stateid);
data = kmalloc(sizeof(*data), GFP_NOFS); data = kmalloc(sizeof(*data), GFP_NOFS);
if (!data) if (!data)
return ERR_PTR(-ENOMEM); return -ENOMEM;
data->server = server; data->server = server;
nfs4_stateid_copy(&data->args.stateid, stateid); nfs4_stateid_copy(&data->args.stateid, stateid);
@ -9378,31 +9368,8 @@ static struct rpc_task *_nfs41_free_stateid(struct nfs_server *server,
msg.rpc_argp = &data->args; msg.rpc_argp = &data->args;
msg.rpc_resp = &data->res; msg.rpc_resp = &data->res;
nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1, privileged);
if (privileged) task = rpc_run_task(&task_setup);
nfs4_set_sequence_privileged(&data->args.seq_args);
return rpc_run_task(&task_setup);
}
/**
* nfs41_free_stateid - perform a FREE_STATEID operation
*
* @server: server / transport on which to perform the operation
* @stateid: state ID to release
* @cred: credential
* @is_recovery: set to true if this call needs to be privileged
*
* Note: this function is always asynchronous.
*/
static int nfs41_free_stateid(struct nfs_server *server,
const nfs4_stateid *stateid,
struct rpc_cred *cred,
bool is_recovery)
{
struct rpc_task *task;
task = _nfs41_free_stateid(server, stateid, cred, is_recovery);
if (IS_ERR(task)) if (IS_ERR(task))
return PTR_ERR(task); return PTR_ERR(task);
rpc_put_task(task); rpc_put_task(task);
@ -9539,7 +9506,8 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = {
| NFS_CAP_ATOMIC_OPEN | NFS_CAP_ATOMIC_OPEN
| NFS_CAP_POSIX_LOCK | NFS_CAP_POSIX_LOCK
| NFS_CAP_STATEID_NFSV41 | NFS_CAP_STATEID_NFSV41
| NFS_CAP_ATOMIC_OPEN_V1, | NFS_CAP_ATOMIC_OPEN_V1
| NFS_CAP_LGOPEN,
.init_client = nfs41_init_client, .init_client = nfs41_init_client,
.shutdown_client = nfs41_shutdown_client, .shutdown_client = nfs41_shutdown_client,
.match_stateid = nfs41_match_stateid, .match_stateid = nfs41_match_stateid,
@ -9564,6 +9532,7 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = {
| NFS_CAP_POSIX_LOCK | NFS_CAP_POSIX_LOCK
| NFS_CAP_STATEID_NFSV41 | NFS_CAP_STATEID_NFSV41
| NFS_CAP_ATOMIC_OPEN_V1 | NFS_CAP_ATOMIC_OPEN_V1
| NFS_CAP_LGOPEN
| NFS_CAP_ALLOCATE | NFS_CAP_ALLOCATE
| NFS_CAP_COPY | NFS_CAP_COPY
| NFS_CAP_DEALLOCATE | NFS_CAP_DEALLOCATE

View File

@ -77,6 +77,14 @@ const nfs4_stateid invalid_stateid = {
.type = NFS4_INVALID_STATEID_TYPE, .type = NFS4_INVALID_STATEID_TYPE,
}; };
const nfs4_stateid current_stateid = {
{
/* Funky initialiser keeps older gcc versions happy */
.data = { 0x0, 0x0, 0x0, 0x1, 0 },
},
.type = NFS4_SPECIAL_STATEID_TYPE,
};
static DEFINE_MUTEX(nfs_clid_init_mutex); static DEFINE_MUTEX(nfs_clid_init_mutex);
int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)

View File

@ -65,7 +65,13 @@
/* Mapping from NFS error code to "errno" error code. */ /* Mapping from NFS error code to "errno" error code. */
#define errno_NFSERR_IO EIO #define errno_NFSERR_IO EIO
struct compound_hdr;
static int nfs4_stat_to_errno(int); static int nfs4_stat_to_errno(int);
static void encode_layoutget(struct xdr_stream *xdr,
const struct nfs4_layoutget_args *args,
struct compound_hdr *hdr);
static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
struct nfs4_layoutget_res *res);
/* NFSv4 COMPOUND tags are only wanted for debugging purposes */ /* NFSv4 COMPOUND tags are only wanted for debugging purposes */
#ifdef DEBUG #ifdef DEBUG
@ -424,6 +430,8 @@ static int nfs4_stat_to_errno(int);
#define decode_sequence_maxsz 0 #define decode_sequence_maxsz 0
#define encode_layoutreturn_maxsz 0 #define encode_layoutreturn_maxsz 0
#define decode_layoutreturn_maxsz 0 #define decode_layoutreturn_maxsz 0
#define encode_layoutget_maxsz 0
#define decode_layoutget_maxsz 0
#endif /* CONFIG_NFS_V4_1 */ #endif /* CONFIG_NFS_V4_1 */
#define NFS4_enc_compound_sz (1024) /* XXX: large enough? */ #define NFS4_enc_compound_sz (1024) /* XXX: large enough? */
@ -476,14 +484,16 @@ static int nfs4_stat_to_errno(int);
encode_open_maxsz + \ encode_open_maxsz + \
encode_access_maxsz + \ encode_access_maxsz + \
encode_getfh_maxsz + \ encode_getfh_maxsz + \
encode_getattr_maxsz) encode_getattr_maxsz + \
encode_layoutget_maxsz)
#define NFS4_dec_open_sz (compound_decode_hdr_maxsz + \ #define NFS4_dec_open_sz (compound_decode_hdr_maxsz + \
decode_sequence_maxsz + \ decode_sequence_maxsz + \
decode_putfh_maxsz + \ decode_putfh_maxsz + \
decode_open_maxsz + \ decode_open_maxsz + \
decode_access_maxsz + \ decode_access_maxsz + \
decode_getfh_maxsz + \ decode_getfh_maxsz + \
decode_getattr_maxsz) decode_getattr_maxsz + \
decode_layoutget_maxsz)
#define NFS4_enc_open_confirm_sz \ #define NFS4_enc_open_confirm_sz \
(compound_encode_hdr_maxsz + \ (compound_encode_hdr_maxsz + \
encode_putfh_maxsz + \ encode_putfh_maxsz + \
@ -497,13 +507,15 @@ static int nfs4_stat_to_errno(int);
encode_putfh_maxsz + \ encode_putfh_maxsz + \
encode_open_maxsz + \ encode_open_maxsz + \
encode_access_maxsz + \ encode_access_maxsz + \
encode_getattr_maxsz) encode_getattr_maxsz + \
encode_layoutget_maxsz)
#define NFS4_dec_open_noattr_sz (compound_decode_hdr_maxsz + \ #define NFS4_dec_open_noattr_sz (compound_decode_hdr_maxsz + \
decode_sequence_maxsz + \ decode_sequence_maxsz + \
decode_putfh_maxsz + \ decode_putfh_maxsz + \
decode_open_maxsz + \ decode_open_maxsz + \
decode_access_maxsz + \ decode_access_maxsz + \
decode_getattr_maxsz) decode_getattr_maxsz + \
decode_layoutget_maxsz)
#define NFS4_enc_open_downgrade_sz \ #define NFS4_enc_open_downgrade_sz \
(compound_encode_hdr_maxsz + \ (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \ encode_sequence_maxsz + \
@ -2070,6 +2082,13 @@ encode_layoutreturn(struct xdr_stream *xdr,
struct compound_hdr *hdr) struct compound_hdr *hdr)
{ {
} }
static void
encode_layoutget(struct xdr_stream *xdr,
const struct nfs4_layoutget_args *args,
struct compound_hdr *hdr)
{
}
#endif /* CONFIG_NFS_V4_1 */ #endif /* CONFIG_NFS_V4_1 */
/* /*
@ -2316,6 +2335,12 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr,
if (args->access) if (args->access)
encode_access(xdr, args->access, &hdr); encode_access(xdr, args->access, &hdr);
encode_getfattr_open(xdr, args->bitmask, args->open_bitmap, &hdr); encode_getfattr_open(xdr, args->bitmask, args->open_bitmap, &hdr);
if (args->lg_args) {
encode_layoutget(xdr, args->lg_args, &hdr);
xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2,
args->lg_args->layout.pages,
0, args->lg_args->layout.pglen);
}
encode_nops(&hdr); encode_nops(&hdr);
} }
@ -2356,6 +2381,12 @@ static void nfs4_xdr_enc_open_noattr(struct rpc_rqst *req,
if (args->access) if (args->access)
encode_access(xdr, args->access, &hdr); encode_access(xdr, args->access, &hdr);
encode_getfattr_open(xdr, args->bitmask, args->open_bitmap, &hdr); encode_getfattr_open(xdr, args->bitmask, args->open_bitmap, &hdr);
if (args->lg_args) {
encode_layoutget(xdr, args->lg_args, &hdr);
xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2,
args->lg_args->layout.pages,
0, args->lg_args->layout.pglen);
}
encode_nops(&hdr); encode_nops(&hdr);
} }
@ -6024,7 +6055,7 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
status = decode_op_hdr(xdr, OP_LAYOUTGET); status = decode_op_hdr(xdr, OP_LAYOUTGET);
if (status) if (status)
return status; goto out;
p = xdr_inline_decode(xdr, 4); p = xdr_inline_decode(xdr, 4);
if (unlikely(!p)) if (unlikely(!p))
goto out_overflow; goto out_overflow;
@ -6037,7 +6068,8 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
if (!layout_count) { if (!layout_count) {
dprintk("%s: server responded with empty layout array\n", dprintk("%s: server responded with empty layout array\n",
__func__); __func__);
return -EINVAL; status = -EINVAL;
goto out;
} }
p = xdr_inline_decode(xdr, 28); p = xdr_inline_decode(xdr, 28);
@ -6062,7 +6094,8 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
dprintk("NFS: server cheating in layoutget reply: " dprintk("NFS: server cheating in layoutget reply: "
"layout len %u > recvd %u\n", "layout len %u > recvd %u\n",
res->layoutp->len, recvd); res->layoutp->len, recvd);
return -EINVAL; status = -EINVAL;
goto out;
} }
if (layout_count > 1) { if (layout_count > 1) {
@ -6075,10 +6108,13 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
__func__, layout_count); __func__, layout_count);
} }
return 0; out:
res->status = status;
return status;
out_overflow: out_overflow:
print_overflow_msg(__func__, xdr); print_overflow_msg(__func__, xdr);
return -EIO; status = -EIO;
goto out;
} }
static int decode_layoutreturn(struct xdr_stream *xdr, static int decode_layoutreturn(struct xdr_stream *xdr,
@ -6177,6 +6213,13 @@ int decode_layoutreturn(struct xdr_stream *xdr,
{ {
return 0; return 0;
} }
static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
struct nfs4_layoutget_res *res)
{
return 0;
}
#endif /* CONFIG_NFS_V4_1 */ #endif /* CONFIG_NFS_V4_1 */
/* /*
@ -6623,6 +6666,8 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
if (res->access_request) if (res->access_request)
decode_access(xdr, &res->access_supported, &res->access_result); decode_access(xdr, &res->access_supported, &res->access_result);
decode_getfattr_label(xdr, res->f_attr, res->f_label, res->server); decode_getfattr_label(xdr, res->f_attr, res->f_label, res->server);
if (res->lg_res)
decode_layoutget(xdr, rqstp, res->lg_res);
out: out:
return status; return status;
} }
@ -6675,6 +6720,8 @@ static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp,
if (res->access_request) if (res->access_request)
decode_access(xdr, &res->access_supported, &res->access_result); decode_access(xdr, &res->access_supported, &res->access_result);
decode_getfattr(xdr, res->f_attr, res->server); decode_getfattr(xdr, res->f_attr, res->server);
if (res->lg_res)
decode_layoutget(xdr, rqstp, res->lg_res);
out: out:
return status; return status;
} }

View File

@ -37,6 +37,7 @@
#include "nfs4trace.h" #include "nfs4trace.h"
#include "delegation.h" #include "delegation.h"
#include "nfs42.h" #include "nfs42.h"
#include "nfs4_fs.h"
#define NFSDBG_FACILITY NFSDBG_PNFS #define NFSDBG_FACILITY NFSDBG_PNFS
#define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ) #define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ)
@ -915,45 +916,99 @@ pnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo)
test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
} }
/* static struct nfs_server *
* Get layout from server. pnfs_find_server(struct inode *inode, struct nfs_open_context *ctx)
* for now, assume that whole file layouts are requested.
* arg->offset: 0
* arg->length: all ones
*/
static struct pnfs_layout_segment *
send_layoutget(struct pnfs_layout_hdr *lo,
struct nfs_open_context *ctx,
nfs4_stateid *stateid,
const struct pnfs_layout_range *range,
long *timeout, gfp_t gfp_flags)
{ {
struct inode *ino = lo->plh_inode; struct nfs_server *server;
struct nfs_server *server = NFS_SERVER(ino);
if (inode) {
server = NFS_SERVER(inode);
} else {
struct dentry *parent_dir = dget_parent(ctx->dentry);
server = NFS_SERVER(parent_dir->d_inode);
dput(parent_dir);
}
return server;
}
static void nfs4_free_pages(struct page **pages, size_t size)
{
int i;
if (!pages)
return;
for (i = 0; i < size; i++) {
if (!pages[i])
break;
__free_page(pages[i]);
}
kfree(pages);
}
static struct page **nfs4_alloc_pages(size_t size, gfp_t gfp_flags)
{
struct page **pages;
int i;
pages = kcalloc(size, sizeof(struct page *), gfp_flags);
if (!pages) {
dprintk("%s: can't alloc array of %zu pages\n", __func__, size);
return NULL;
}
for (i = 0; i < size; i++) {
pages[i] = alloc_page(gfp_flags);
if (!pages[i]) {
dprintk("%s: failed to allocate page\n", __func__);
nfs4_free_pages(pages, size);
return NULL;
}
}
return pages;
}
static struct nfs4_layoutget *
pnfs_alloc_init_layoutget_args(struct inode *ino,
struct nfs_open_context *ctx,
const nfs4_stateid *stateid,
const struct pnfs_layout_range *range,
gfp_t gfp_flags)
{
struct nfs_server *server = pnfs_find_server(ino, ctx);
size_t max_pages = max_response_pages(server);
struct nfs4_layoutget *lgp; struct nfs4_layoutget *lgp;
loff_t i_size;
dprintk("--> %s\n", __func__); dprintk("--> %s\n", __func__);
/*
* Synchronously retrieve layout information from server and
* store in lseg. If we race with a concurrent seqid morphing
* op, then re-send the LAYOUTGET.
*/
lgp = kzalloc(sizeof(*lgp), gfp_flags); lgp = kzalloc(sizeof(*lgp), gfp_flags);
if (lgp == NULL) if (lgp == NULL)
return ERR_PTR(-ENOMEM); return NULL;
i_size = i_size_read(ino); lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags);
if (!lgp->args.layout.pages) {
kfree(lgp);
return NULL;
}
lgp->args.layout.pglen = max_pages * PAGE_SIZE;
lgp->res.layoutp = &lgp->args.layout;
/* Don't confuse uninitialised result and success */
lgp->res.status = -NFS4ERR_DELAY;
lgp->args.minlength = PAGE_SIZE; lgp->args.minlength = PAGE_SIZE;
if (lgp->args.minlength > range->length) if (lgp->args.minlength > range->length)
lgp->args.minlength = range->length; lgp->args.minlength = range->length;
if (range->iomode == IOMODE_READ) { if (ino) {
if (range->offset >= i_size) loff_t i_size = i_size_read(ino);
lgp->args.minlength = 0;
else if (i_size - range->offset < lgp->args.minlength) if (range->iomode == IOMODE_READ) {
lgp->args.minlength = i_size - range->offset; if (range->offset >= i_size)
lgp->args.minlength = 0;
else if (i_size - range->offset < lgp->args.minlength)
lgp->args.minlength = i_size - range->offset;
}
} }
lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
pnfs_copy_range(&lgp->args.range, range); pnfs_copy_range(&lgp->args.range, range);
@ -962,9 +1017,21 @@ send_layoutget(struct pnfs_layout_hdr *lo,
lgp->args.ctx = get_nfs_open_context(ctx); lgp->args.ctx = get_nfs_open_context(ctx);
nfs4_stateid_copy(&lgp->args.stateid, stateid); nfs4_stateid_copy(&lgp->args.stateid, stateid);
lgp->gfp_flags = gfp_flags; lgp->gfp_flags = gfp_flags;
lgp->cred = lo->plh_lc_cred; lgp->cred = get_rpccred(ctx->cred);
lgp->callback_count = raw_seqcount_begin(&server->nfs_client->cl_callback_count);
return lgp;
}
return nfs4_proc_layoutget(lgp, timeout, gfp_flags); void pnfs_layoutget_free(struct nfs4_layoutget *lgp)
{
size_t max_pages = lgp->args.layout.pglen / PAGE_SIZE;
nfs4_free_pages(lgp->args.layout.pages, max_pages);
if (lgp->args.inode)
pnfs_put_layout_hdr(NFS_I(lgp->args.inode)->layout);
put_rpccred(lgp->cred);
put_nfs_open_context(lgp->args.ctx);
kfree(lgp);
} }
static void pnfs_clear_layoutcommit(struct inode *inode, static void pnfs_clear_layoutcommit(struct inode *inode,
@ -1144,7 +1211,7 @@ _pnfs_return_layout(struct inode *ino)
LIST_HEAD(tmp_list); LIST_HEAD(tmp_list);
nfs4_stateid stateid; nfs4_stateid stateid;
int status = 0; int status = 0;
bool send; bool send, valid_layout;
dprintk("NFS: %s for inode %lu\n", __func__, ino->i_ino); dprintk("NFS: %s for inode %lu\n", __func__, ino->i_ino);
@ -1165,6 +1232,7 @@ _pnfs_return_layout(struct inode *ino)
goto out_put_layout_hdr; goto out_put_layout_hdr;
spin_lock(&ino->i_lock); spin_lock(&ino->i_lock);
} }
valid_layout = pnfs_layout_is_valid(lo);
pnfs_clear_layoutcommit(ino, &tmp_list); pnfs_clear_layoutcommit(ino, &tmp_list);
pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL, 0); pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL, 0);
@ -1178,7 +1246,8 @@ _pnfs_return_layout(struct inode *ino)
} }
/* Don't send a LAYOUTRETURN if list was initially empty */ /* Don't send a LAYOUTRETURN if list was initially empty */
if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) { if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags) ||
!valid_layout) {
spin_unlock(&ino->i_lock); spin_unlock(&ino->i_lock);
dprintk("NFS: %s no layout segments to return\n", __func__); dprintk("NFS: %s no layout segments to return\n", __func__);
goto out_put_layout_hdr; goto out_put_layout_hdr;
@ -1671,6 +1740,22 @@ static void pnfs_clear_first_layoutget(struct pnfs_layout_hdr *lo)
wake_up_bit(bitlock, NFS_LAYOUT_FIRST_LAYOUTGET); wake_up_bit(bitlock, NFS_LAYOUT_FIRST_LAYOUTGET);
} }
static void _add_to_server_list(struct pnfs_layout_hdr *lo,
struct nfs_server *server)
{
if (list_empty(&lo->plh_layouts)) {
struct nfs_client *clp = server->nfs_client;
/* The lo must be on the clp list if there is any
* chance of a CB_LAYOUTRECALL(FILE) coming in.
*/
spin_lock(&clp->cl_lock);
if (list_empty(&lo->plh_layouts))
list_add_tail(&lo->plh_layouts, &server->layouts);
spin_unlock(&clp->cl_lock);
}
}
/* /*
* Layout segment is retreived from the server if not cached. * Layout segment is retreived from the server if not cached.
* The appropriate layout segment is referenced and returned to the caller. * The appropriate layout segment is referenced and returned to the caller.
@ -1694,6 +1779,7 @@ pnfs_update_layout(struct inode *ino,
struct nfs_client *clp = server->nfs_client; struct nfs_client *clp = server->nfs_client;
struct pnfs_layout_hdr *lo = NULL; struct pnfs_layout_hdr *lo = NULL;
struct pnfs_layout_segment *lseg = NULL; struct pnfs_layout_segment *lseg = NULL;
struct nfs4_layoutget *lgp;
nfs4_stateid stateid; nfs4_stateid stateid;
long timeout = 0; long timeout = 0;
unsigned long giveup = jiffies + (clp->cl_lease_time << 1); unsigned long giveup = jiffies + (clp->cl_lease_time << 1);
@ -1820,15 +1906,7 @@ lookup_again:
atomic_inc(&lo->plh_outstanding); atomic_inc(&lo->plh_outstanding);
spin_unlock(&ino->i_lock); spin_unlock(&ino->i_lock);
if (list_empty(&lo->plh_layouts)) { _add_to_server_list(lo, server);
/* The lo must be on the clp list if there is any
* chance of a CB_LAYOUTRECALL(FILE) coming in.
*/
spin_lock(&clp->cl_lock);
if (list_empty(&lo->plh_layouts))
list_add_tail(&lo->plh_layouts, &server->layouts);
spin_unlock(&clp->cl_lock);
}
pg_offset = arg.offset & ~PAGE_MASK; pg_offset = arg.offset & ~PAGE_MASK;
if (pg_offset) { if (pg_offset) {
@ -1838,7 +1916,15 @@ lookup_again:
if (arg.length != NFS4_MAX_UINT64) if (arg.length != NFS4_MAX_UINT64)
arg.length = PAGE_ALIGN(arg.length); arg.length = PAGE_ALIGN(arg.length);
lseg = send_layoutget(lo, ctx, &stateid, &arg, &timeout, gfp_flags); lgp = pnfs_alloc_init_layoutget_args(ino, ctx, &stateid, &arg, gfp_flags);
if (!lgp) {
trace_pnfs_update_layout(ino, pos, count, iomode, lo, NULL,
PNFS_UPDATE_LAYOUT_NOMEM);
atomic_dec(&lo->plh_outstanding);
goto out_put_layout_hdr;
}
lseg = nfs4_proc_layoutget(lgp, &timeout);
trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET); PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET);
atomic_dec(&lo->plh_outstanding); atomic_dec(&lo->plh_outstanding);
@ -1919,6 +2005,171 @@ pnfs_sanity_check_layout_range(struct pnfs_layout_range *range)
return true; return true;
} }
static struct pnfs_layout_hdr *
_pnfs_grab_empty_layout(struct inode *ino, struct nfs_open_context *ctx)
{
struct pnfs_layout_hdr *lo;
spin_lock(&ino->i_lock);
lo = pnfs_find_alloc_layout(ino, ctx, GFP_KERNEL);
if (!lo)
goto out_unlock;
if (!test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags))
goto out_unlock;
if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
goto out_unlock;
if (pnfs_layoutgets_blocked(lo))
goto out_unlock;
if (test_and_set_bit(NFS_LAYOUT_FIRST_LAYOUTGET, &lo->plh_flags))
goto out_unlock;
atomic_inc(&lo->plh_outstanding);
spin_unlock(&ino->i_lock);
_add_to_server_list(lo, NFS_SERVER(ino));
return lo;
out_unlock:
spin_unlock(&ino->i_lock);
pnfs_put_layout_hdr(lo);
return NULL;
}
extern const nfs4_stateid current_stateid;
static void _lgopen_prepare_attached(struct nfs4_opendata *data,
struct nfs_open_context *ctx)
{
struct inode *ino = data->dentry->d_inode;
struct pnfs_layout_range rng = {
.iomode = (data->o_arg.fmode & FMODE_WRITE) ?
IOMODE_RW: IOMODE_READ,
.offset = 0,
.length = NFS4_MAX_UINT64,
};
struct nfs4_layoutget *lgp;
struct pnfs_layout_hdr *lo;
/* Heuristic: don't send layoutget if we have cached data */
if (rng.iomode == IOMODE_READ &&
(i_size_read(ino) == 0 || ino->i_mapping->nrpages != 0))
return;
lo = _pnfs_grab_empty_layout(ino, ctx);
if (!lo)
return;
lgp = pnfs_alloc_init_layoutget_args(ino, ctx, &current_stateid,
&rng, GFP_KERNEL);
if (!lgp) {
pnfs_clear_first_layoutget(lo);
pnfs_put_layout_hdr(lo);
return;
}
data->lgp = lgp;
data->o_arg.lg_args = &lgp->args;
data->o_res.lg_res = &lgp->res;
}
static void _lgopen_prepare_floating(struct nfs4_opendata *data,
struct nfs_open_context *ctx)
{
struct pnfs_layout_range rng = {
.iomode = (data->o_arg.fmode & FMODE_WRITE) ?
IOMODE_RW: IOMODE_READ,
.offset = 0,
.length = NFS4_MAX_UINT64,
};
struct nfs4_layoutget *lgp;
lgp = pnfs_alloc_init_layoutget_args(NULL, ctx, &current_stateid,
&rng, GFP_KERNEL);
if (!lgp)
return;
data->lgp = lgp;
data->o_arg.lg_args = &lgp->args;
data->o_res.lg_res = &lgp->res;
}
void pnfs_lgopen_prepare(struct nfs4_opendata *data,
struct nfs_open_context *ctx)
{
struct nfs_server *server = NFS_SERVER(data->dir->d_inode);
if (!(pnfs_enabled_sb(server) &&
server->pnfs_curr_ld->flags & PNFS_LAYOUTGET_ON_OPEN))
return;
/* Could check on max_ops, but currently hardcoded high enough */
if (!nfs_server_capable(data->dir->d_inode, NFS_CAP_LGOPEN))
return;
if (data->state)
_lgopen_prepare_attached(data, ctx);
else
_lgopen_prepare_floating(data, ctx);
}
void pnfs_parse_lgopen(struct inode *ino, struct nfs4_layoutget *lgp,
struct nfs_open_context *ctx)
{
struct pnfs_layout_hdr *lo;
struct pnfs_layout_segment *lseg;
struct nfs_server *srv = NFS_SERVER(ino);
u32 iomode;
if (!lgp)
return;
dprintk("%s: entered with status %i\n", __func__, lgp->res.status);
if (lgp->res.status) {
switch (lgp->res.status) {
default:
break;
/*
* Halt lgopen attempts if the server doesn't recognise
* the "current stateid" value, the layout type, or the
* layoutget operation as being valid.
* Also if it complains about too many ops in the compound
* or of the request/reply being too big.
*/
case -NFS4ERR_BAD_STATEID:
case -NFS4ERR_NOTSUPP:
case -NFS4ERR_REP_TOO_BIG:
case -NFS4ERR_REP_TOO_BIG_TO_CACHE:
case -NFS4ERR_REQ_TOO_BIG:
case -NFS4ERR_TOO_MANY_OPS:
case -NFS4ERR_UNKNOWN_LAYOUTTYPE:
srv->caps &= ~NFS_CAP_LGOPEN;
}
return;
}
if (!lgp->args.inode) {
lo = _pnfs_grab_empty_layout(ino, ctx);
if (!lo)
return;
lgp->args.inode = ino;
} else
lo = NFS_I(lgp->args.inode)->layout;
if (read_seqcount_retry(&srv->nfs_client->cl_callback_count,
lgp->callback_count))
return;
lseg = pnfs_layout_process(lgp);
if (!IS_ERR(lseg)) {
iomode = lgp->args.range.iomode;
pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));
pnfs_put_lseg(lseg);
}
}
void nfs4_lgopen_release(struct nfs4_layoutget *lgp)
{
if (lgp != NULL) {
struct inode *inode = lgp->args.inode;
if (inode) {
struct pnfs_layout_hdr *lo = NFS_I(inode)->layout;
atomic_dec(&lo->plh_outstanding);
pnfs_clear_first_layoutget(lo);
}
pnfs_layoutget_free(lgp);
}
}
struct pnfs_layout_segment * struct pnfs_layout_segment *
pnfs_layout_process(struct nfs4_layoutget *lgp) pnfs_layout_process(struct nfs4_layoutget *lgp)
{ {
@ -1984,8 +2235,6 @@ out_forget:
spin_unlock(&ino->i_lock); spin_unlock(&ino->i_lock);
lseg->pls_layout = lo; lseg->pls_layout = lo;
NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
if (!pnfs_layout_is_valid(lo))
nfs_commit_inode(ino, 0);
return ERR_PTR(-EAGAIN); return ERR_PTR(-EAGAIN);
} }

View File

@ -35,6 +35,8 @@
#include <linux/nfs_page.h> #include <linux/nfs_page.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
struct nfs4_opendata;
enum { enum {
NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */ NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */
NFS_LSEG_ROC, /* roc bit received from server */ NFS_LSEG_ROC, /* roc bit received from server */
@ -110,6 +112,7 @@ enum layoutdriver_policy_flags {
PNFS_LAYOUTRET_ON_SETATTR = 1 << 0, PNFS_LAYOUTRET_ON_SETATTR = 1 << 0,
PNFS_LAYOUTRET_ON_ERROR = 1 << 1, PNFS_LAYOUTRET_ON_ERROR = 1 << 1,
PNFS_READ_WHOLE_PAGE = 1 << 2, PNFS_READ_WHOLE_PAGE = 1 << 2,
PNFS_LAYOUTGET_ON_OPEN = 1 << 3,
}; };
struct nfs4_deviceid_node; struct nfs4_deviceid_node;
@ -223,10 +226,11 @@ extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *);
extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *);
/* nfs4proc.c */ /* nfs4proc.c */
extern size_t max_response_pages(struct nfs_server *server);
extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
struct pnfs_device *dev, struct pnfs_device *dev,
struct rpc_cred *cred); struct rpc_cred *cred);
extern struct pnfs_layout_segment* nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags); extern struct pnfs_layout_segment* nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout);
extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync); extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync);
/* pnfs.c */ /* pnfs.c */
@ -246,6 +250,7 @@ size_t pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio,
struct nfs_page *prev, struct nfs_page *req); struct nfs_page *prev, struct nfs_page *req);
void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg); void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg);
struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp); struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp);
void pnfs_layoutget_free(struct nfs4_layoutget *lgp);
void pnfs_free_lseg_list(struct list_head *tmp_list); void pnfs_free_lseg_list(struct list_head *tmp_list);
void pnfs_destroy_layout(struct nfs_inode *); void pnfs_destroy_layout(struct nfs_inode *);
void pnfs_destroy_all_layouts(struct nfs_client *); void pnfs_destroy_all_layouts(struct nfs_client *);
@ -375,6 +380,11 @@ void pnfs_layout_mark_request_commit(struct nfs_page *req,
struct pnfs_layout_segment *lseg, struct pnfs_layout_segment *lseg,
struct nfs_commit_info *cinfo, struct nfs_commit_info *cinfo,
u32 ds_commit_idx); u32 ds_commit_idx);
void pnfs_lgopen_prepare(struct nfs4_opendata *data,
struct nfs_open_context *ctx);
void pnfs_parse_lgopen(struct inode *ino, struct nfs4_layoutget *lgp,
struct nfs_open_context *ctx);
void nfs4_lgopen_release(struct nfs4_layoutget *lgp);
static inline bool nfs_have_layout(struct inode *inode) static inline bool nfs_have_layout(struct inode *inode)
{ {
@ -775,6 +785,22 @@ static inline bool nfs4_refresh_layout_stateid(nfs4_stateid *dst,
{ {
return false; return false;
} }
static inline void pnfs_lgopen_prepare(struct nfs4_opendata *data,
struct nfs_open_context *ctx)
{
}
static inline void pnfs_parse_lgopen(struct inode *ino,
struct nfs4_layoutget *lgp,
struct nfs_open_context *ctx)
{
}
static inline void nfs4_lgopen_release(struct nfs4_layoutget *lgp)
{
}
#endif /* CONFIG_NFS_V4_1 */ #endif /* CONFIG_NFS_V4_1 */
#if IS_ENABLED(CONFIG_NFS_V4_2) #if IS_ENABLED(CONFIG_NFS_V4_2)

View File

@ -99,7 +99,8 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
*/ */
static int static int
nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fattr *fattr, struct nfs4_label *label) struct nfs_fattr *fattr, struct nfs4_label *label,
struct inode *inode)
{ {
struct rpc_message msg = { struct rpc_message msg = {
.rpc_proc = &nfs_procedures[NFSPROC_GETATTR], .rpc_proc = &nfs_procedures[NFSPROC_GETATTR],
@ -321,7 +322,9 @@ nfs_proc_remove(struct inode *dir, struct dentry *dentry)
} }
static void static void
nfs_proc_unlink_setup(struct rpc_message *msg, struct dentry *dentry) nfs_proc_unlink_setup(struct rpc_message *msg,
struct dentry *dentry,
struct inode *inode)
{ {
msg->rpc_proc = &nfs_procedures[NFSPROC_REMOVE]; msg->rpc_proc = &nfs_procedures[NFSPROC_REMOVE];
} }
@ -618,7 +621,8 @@ static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
} }
static void nfs_proc_write_setup(struct nfs_pgio_header *hdr, static void nfs_proc_write_setup(struct nfs_pgio_header *hdr,
struct rpc_message *msg) struct rpc_message *msg,
struct rpc_clnt **clnt)
{ {
/* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */ /* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */
hdr->args.stable = NFS_FILE_SYNC; hdr->args.stable = NFS_FILE_SYNC;
@ -631,7 +635,8 @@ static void nfs_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit
} }
static void static void
nfs_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg) nfs_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg,
struct rpc_clnt **clnt)
{ {
BUG(); BUG();
} }

View File

@ -85,7 +85,7 @@ static const struct rpc_call_ops nfs_unlink_ops = {
.rpc_call_prepare = nfs_unlink_prepare, .rpc_call_prepare = nfs_unlink_prepare,
}; };
static void nfs_do_call_unlink(struct nfs_unlinkdata *data) static void nfs_do_call_unlink(struct inode *inode, struct nfs_unlinkdata *data)
{ {
struct rpc_message msg = { struct rpc_message msg = {
.rpc_argp = &data->args, .rpc_argp = &data->args,
@ -105,7 +105,7 @@ static void nfs_do_call_unlink(struct nfs_unlinkdata *data)
data->args.fh = NFS_FH(dir); data->args.fh = NFS_FH(dir);
nfs_fattr_init(data->res.dir_attr); nfs_fattr_init(data->res.dir_attr);
NFS_PROTO(dir)->unlink_setup(&msg, data->dentry); NFS_PROTO(dir)->unlink_setup(&msg, data->dentry, inode);
task_setup_data.rpc_client = NFS_CLIENT(dir); task_setup_data.rpc_client = NFS_CLIENT(dir);
task = rpc_run_task(&task_setup_data); task = rpc_run_task(&task_setup_data);
@ -113,7 +113,7 @@ static void nfs_do_call_unlink(struct nfs_unlinkdata *data)
rpc_put_task_async(task); rpc_put_task_async(task);
} }
static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data) static int nfs_call_unlink(struct dentry *dentry, struct inode *inode, struct nfs_unlinkdata *data)
{ {
struct inode *dir = d_inode(dentry->d_parent); struct inode *dir = d_inode(dentry->d_parent);
struct dentry *alias; struct dentry *alias;
@ -153,7 +153,7 @@ static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data)
return ret; return ret;
} }
data->dentry = alias; data->dentry = alias;
nfs_do_call_unlink(data); nfs_do_call_unlink(inode, data);
return 1; return 1;
} }
@ -231,7 +231,7 @@ nfs_complete_unlink(struct dentry *dentry, struct inode *inode)
dentry->d_fsdata = NULL; dentry->d_fsdata = NULL;
spin_unlock(&dentry->d_lock); spin_unlock(&dentry->d_lock);
if (NFS_STALE(inode) || !nfs_call_unlink(dentry, data)) if (NFS_STALE(inode) || !nfs_call_unlink(dentry, inode, data))
nfs_free_unlinkdata(data); nfs_free_unlinkdata(data);
} }
@ -448,6 +448,7 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
unsigned char silly[SILLYNAME_LEN + 1]; unsigned char silly[SILLYNAME_LEN + 1];
unsigned long long fileid; unsigned long long fileid;
struct dentry *sdentry; struct dentry *sdentry;
struct inode *inode = d_inode(dentry);
struct rpc_task *task; struct rpc_task *task;
int error = -EBUSY; int error = -EBUSY;
@ -485,6 +486,8 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
goto out; goto out;
} while (d_inode(sdentry) != NULL); /* need negative lookup */ } while (d_inode(sdentry) != NULL); /* need negative lookup */
ihold(inode);
/* queue unlink first. Can't do this from rpc_release as it /* queue unlink first. Can't do this from rpc_release as it
* has to allocate memory * has to allocate memory
*/ */
@ -509,6 +512,12 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
case 0: case 0:
/* The rename succeeded */ /* The rename succeeded */
nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
spin_lock(&inode->i_lock);
NFS_I(inode)->attr_gencount = nfs_inc_attr_generation_counter();
NFS_I(inode)->cache_validity |= NFS_INO_INVALID_CHANGE
| NFS_INO_INVALID_CTIME
| NFS_INO_REVAL_FORCED;
spin_unlock(&inode->i_lock);
d_move(dentry, sdentry); d_move(dentry, sdentry);
break; break;
case -ERESTARTSYS: case -ERESTARTSYS:
@ -519,6 +528,7 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
} }
rpc_put_task(task); rpc_put_task(task);
out_dput: out_dput:
iput(inode);
dput(sdentry); dput(sdentry);
out: out:
return error; return error;

View File

@ -1375,12 +1375,9 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr,
int priority = flush_task_priority(how); int priority = flush_task_priority(how);
task_setup_data->priority = priority; task_setup_data->priority = priority;
rpc_ops->write_setup(hdr, msg); rpc_ops->write_setup(hdr, msg, &task_setup_data->rpc_client);
trace_nfs_initiate_write(hdr->inode, hdr->io_start, hdr->good_bytes, trace_nfs_initiate_write(hdr->inode, hdr->io_start, hdr->good_bytes,
hdr->args.stable); hdr->args.stable);
nfs4_state_protect_write(NFS_SERVER(hdr->inode)->nfs_client,
&task_setup_data->rpc_client, msg, hdr);
} }
/* If a nfs_flush_* function fails, it should remove reqs from @head and /* If a nfs_flush_* function fails, it should remove reqs from @head and
@ -1669,14 +1666,11 @@ int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data,
.priority = priority, .priority = priority,
}; };
/* Set up the initial task struct. */ /* Set up the initial task struct. */
nfs_ops->commit_setup(data, &msg); nfs_ops->commit_setup(data, &msg, &task_setup_data.rpc_client);
trace_nfs_initiate_commit(data); trace_nfs_initiate_commit(data);
dprintk("NFS: initiated commit call\n"); dprintk("NFS: initiated commit call\n");
nfs4_state_protect(NFS_SERVER(data->inode)->nfs_client,
NFS_SP4_MACH_CRED_COMMIT, &task_setup_data.rpc_client, &msg);
task = rpc_run_task(&task_setup_data); task = rpc_run_task(&task_setup_data);
if (IS_ERR(task)) if (IS_ERR(task))
return PTR_ERR(task); return PTR_ERR(task);

View File

@ -28,6 +28,7 @@ struct nfs41_impl_id;
struct nfs_client { struct nfs_client {
refcount_t cl_count; refcount_t cl_count;
atomic_t cl_mds_count; atomic_t cl_mds_count;
seqcount_t cl_callback_count;
int cl_cons_state; /* current construction state (-ve: init error) */ int cl_cons_state; /* current construction state (-ve: init error) */
#define NFS_CS_READY 0 /* ready to be used */ #define NFS_CS_READY 0 /* ready to be used */
#define NFS_CS_INITING 1 /* busy initialising */ #define NFS_CS_INITING 1 /* busy initialising */
@ -235,6 +236,7 @@ struct nfs_server {
#define NFS_CAP_ACLS (1U << 3) #define NFS_CAP_ACLS (1U << 3)
#define NFS_CAP_ATOMIC_OPEN (1U << 4) #define NFS_CAP_ATOMIC_OPEN (1U << 4)
/* #define NFS_CAP_CHANGE_ATTR (1U << 5) */ /* #define NFS_CAP_CHANGE_ATTR (1U << 5) */
#define NFS_CAP_LGOPEN (1U << 5)
#define NFS_CAP_FILEID (1U << 6) #define NFS_CAP_FILEID (1U << 6)
#define NFS_CAP_MODE (1U << 7) #define NFS_CAP_MODE (1U << 7)
#define NFS_CAP_NLINK (1U << 8) #define NFS_CAP_NLINK (1U << 8)

View File

@ -259,6 +259,7 @@ struct nfs4_layoutget_args {
struct nfs4_layoutget_res { struct nfs4_layoutget_res {
struct nfs4_sequence_res seq_res; struct nfs4_sequence_res seq_res;
int status;
__u32 return_on_close; __u32 return_on_close;
struct pnfs_layout_range range; struct pnfs_layout_range range;
__u32 type; __u32 type;
@ -270,6 +271,7 @@ struct nfs4_layoutget {
struct nfs4_layoutget_args args; struct nfs4_layoutget_args args;
struct nfs4_layoutget_res res; struct nfs4_layoutget_res res;
struct rpc_cred *cred; struct rpc_cred *cred;
unsigned callback_count;
gfp_t gfp_flags; gfp_t gfp_flags;
}; };
@ -435,6 +437,7 @@ struct nfs_openargs {
enum createmode4 createmode; enum createmode4 createmode;
const struct nfs4_label *label; const struct nfs4_label *label;
umode_t umask; umode_t umask;
struct nfs4_layoutget_args *lg_args;
}; };
struct nfs_openres { struct nfs_openres {
@ -457,6 +460,7 @@ struct nfs_openres {
__u32 access_request; __u32 access_request;
__u32 access_supported; __u32 access_supported;
__u32 access_result; __u32 access_result;
struct nfs4_layoutget_res *lg_res;
}; };
/* /*
@ -1577,7 +1581,8 @@ struct nfs_rpc_ops {
struct dentry *(*try_mount) (int, const char *, struct nfs_mount_info *, struct dentry *(*try_mount) (int, const char *, struct nfs_mount_info *,
struct nfs_subversion *); struct nfs_subversion *);
int (*getattr) (struct nfs_server *, struct nfs_fh *, int (*getattr) (struct nfs_server *, struct nfs_fh *,
struct nfs_fattr *, struct nfs4_label *); struct nfs_fattr *, struct nfs4_label *,
struct inode *);
int (*setattr) (struct dentry *, struct nfs_fattr *, int (*setattr) (struct dentry *, struct nfs_fattr *,
struct iattr *); struct iattr *);
int (*lookup) (struct inode *, const struct qstr *, int (*lookup) (struct inode *, const struct qstr *,
@ -1591,7 +1596,7 @@ struct nfs_rpc_ops {
int (*create) (struct inode *, struct dentry *, int (*create) (struct inode *, struct dentry *,
struct iattr *, int); struct iattr *, int);
int (*remove) (struct inode *, struct dentry *); int (*remove) (struct inode *, struct dentry *);
void (*unlink_setup) (struct rpc_message *, struct dentry *); void (*unlink_setup) (struct rpc_message *, struct dentry *, struct inode *);
void (*unlink_rpc_prepare) (struct rpc_task *, struct nfs_unlinkdata *); void (*unlink_rpc_prepare) (struct rpc_task *, struct nfs_unlinkdata *);
int (*unlink_done) (struct rpc_task *, struct inode *); int (*unlink_done) (struct rpc_task *, struct inode *);
void (*rename_setup) (struct rpc_message *msg, void (*rename_setup) (struct rpc_message *msg,
@ -1620,9 +1625,11 @@ struct nfs_rpc_ops {
struct nfs_pgio_header *); struct nfs_pgio_header *);
void (*read_setup)(struct nfs_pgio_header *, struct rpc_message *); void (*read_setup)(struct nfs_pgio_header *, struct rpc_message *);
int (*read_done)(struct rpc_task *, struct nfs_pgio_header *); int (*read_done)(struct rpc_task *, struct nfs_pgio_header *);
void (*write_setup)(struct nfs_pgio_header *, struct rpc_message *); void (*write_setup)(struct nfs_pgio_header *, struct rpc_message *,
struct rpc_clnt **);
int (*write_done)(struct rpc_task *, struct nfs_pgio_header *); int (*write_done)(struct rpc_task *, struct nfs_pgio_header *);
void (*commit_setup) (struct nfs_commit_data *, struct rpc_message *); void (*commit_setup) (struct nfs_commit_data *, struct rpc_message *,
struct rpc_clnt **);
void (*commit_rpc_prepare)(struct rpc_task *, struct nfs_commit_data *); void (*commit_rpc_prepare)(struct rpc_task *, struct nfs_commit_data *);
int (*commit_done) (struct rpc_task *, struct nfs_commit_data *); int (*commit_done) (struct rpc_task *, struct nfs_commit_data *);
int (*lock)(struct file *, int, struct file_lock *); int (*lock)(struct file *, int, struct file_lock *);

View File

@ -403,6 +403,19 @@ static inline void list_splice_tail_init_rcu(struct list_head *list,
&pos->member != (head); \ &pos->member != (head); \
pos = list_entry_rcu(pos->member.next, typeof(*pos), member)) pos = list_entry_rcu(pos->member.next, typeof(*pos), member))
/**
* list_for_each_entry_from_rcu - iterate over a list from current point
* @pos: the type * to use as a loop cursor.
* @head: the head for your list.
* @member: the name of the list_node within the struct.
*
* Iterate over the tail of a list starting from a given position,
* which must have been in the list when the RCU read lock was taken.
*/
#define list_for_each_entry_from_rcu(pos, head, member) \
for (; &(pos)->member != (head); \
pos = list_entry_rcu(pos->member.next, typeof(*(pos)), member))
/** /**
* hlist_del_rcu - deletes entry from hash list without re-initialization * hlist_del_rcu - deletes entry from hash list without re-initialization
* @n: the element to delete from the hash list. * @n: the element to delete from the hash list.

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/* /*
* Copyright (c) 2015-2017 Oracle. All rights reserved. * Copyright (c) 2015-2017 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.

View File

@ -84,7 +84,6 @@ struct rpc_rqst {
void (*rq_release_snd_buf)(struct rpc_rqst *); /* release rq_enc_pages */ void (*rq_release_snd_buf)(struct rpc_rqst *); /* release rq_enc_pages */
struct list_head rq_list; struct list_head rq_list;
void *rq_xprtdata; /* Per-xprt private data */
void *rq_buffer; /* Call XDR encode buffer */ void *rq_buffer; /* Call XDR encode buffer */
size_t rq_callsize; size_t rq_callsize;
void *rq_rbuffer; /* Reply XDR decode buffer */ void *rq_rbuffer; /* Reply XDR decode buffer */
@ -127,6 +126,8 @@ struct rpc_xprt_ops {
int (*reserve_xprt)(struct rpc_xprt *xprt, struct rpc_task *task); int (*reserve_xprt)(struct rpc_xprt *xprt, struct rpc_task *task);
void (*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task); void (*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task);
void (*alloc_slot)(struct rpc_xprt *xprt, struct rpc_task *task); void (*alloc_slot)(struct rpc_xprt *xprt, struct rpc_task *task);
void (*free_slot)(struct rpc_xprt *xprt,
struct rpc_rqst *req);
void (*rpcbind)(struct rpc_task *task); void (*rpcbind)(struct rpc_task *task);
void (*set_port)(struct rpc_xprt *xprt, unsigned short port); void (*set_port)(struct rpc_xprt *xprt, unsigned short port);
void (*connect)(struct rpc_xprt *xprt, struct rpc_task *task); void (*connect)(struct rpc_xprt *xprt, struct rpc_task *task);
@ -324,10 +325,13 @@ struct xprt_class {
struct rpc_xprt *xprt_create_transport(struct xprt_create *args); struct rpc_xprt *xprt_create_transport(struct xprt_create *args);
void xprt_connect(struct rpc_task *task); void xprt_connect(struct rpc_task *task);
void xprt_reserve(struct rpc_task *task); void xprt_reserve(struct rpc_task *task);
void xprt_request_init(struct rpc_task *task);
void xprt_retry_reserve(struct rpc_task *task); void xprt_retry_reserve(struct rpc_task *task);
int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task); int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task);
int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task); int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task);
void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task);
void xprt_free_slot(struct rpc_xprt *xprt,
struct rpc_rqst *req);
void xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task);
bool xprt_prepare_transmit(struct rpc_task *task); bool xprt_prepare_transmit(struct rpc_task *task);
void xprt_transmit(struct rpc_task *task); void xprt_transmit(struct rpc_task *task);

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/* /*
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
* *

View File

@ -530,24 +530,54 @@ TRACE_EVENT(xprtrdma_post_send,
TRACE_EVENT(xprtrdma_post_recv, TRACE_EVENT(xprtrdma_post_recv,
TP_PROTO( TP_PROTO(
const struct rpcrdma_rep *rep, const struct ib_cqe *cqe
int status
), ),
TP_ARGS(rep, status), TP_ARGS(cqe),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(const void *, rep) __field(const void *, cqe)
__field(int, status)
), ),
TP_fast_assign( TP_fast_assign(
__entry->rep = rep; __entry->cqe = cqe;
__entry->status = status;
), ),
TP_printk("rep=%p status=%d", TP_printk("cqe=%p",
__entry->rep, __entry->status __entry->cqe
)
);
TRACE_EVENT(xprtrdma_post_recvs,
TP_PROTO(
const struct rpcrdma_xprt *r_xprt,
unsigned int count,
int status
),
TP_ARGS(r_xprt, count, status),
TP_STRUCT__entry(
__field(const void *, r_xprt)
__field(unsigned int, count)
__field(int, status)
__field(int, posted)
__string(addr, rpcrdma_addrstr(r_xprt))
__string(port, rpcrdma_portstr(r_xprt))
),
TP_fast_assign(
__entry->r_xprt = r_xprt;
__entry->count = count;
__entry->status = status;
__entry->posted = r_xprt->rx_buf.rb_posted_receives;
__assign_str(addr, rpcrdma_addrstr(r_xprt));
__assign_str(port, rpcrdma_portstr(r_xprt));
),
TP_printk("peer=[%s]:%s r_xprt=%p: %u new recvs, %d active (rc %d)",
__get_str(addr), __get_str(port), __entry->r_xprt,
__entry->count, __entry->posted, __entry->status
) )
); );
@ -586,28 +616,32 @@ TRACE_EVENT(xprtrdma_wc_send,
TRACE_EVENT(xprtrdma_wc_receive, TRACE_EVENT(xprtrdma_wc_receive,
TP_PROTO( TP_PROTO(
const struct rpcrdma_rep *rep,
const struct ib_wc *wc const struct ib_wc *wc
), ),
TP_ARGS(rep, wc), TP_ARGS(wc),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(const void *, rep) __field(const void *, cqe)
__field(unsigned int, byte_len) __field(u32, byte_len)
__field(unsigned int, status) __field(unsigned int, status)
__field(unsigned int, vendor_err) __field(u32, vendor_err)
), ),
TP_fast_assign( TP_fast_assign(
__entry->rep = rep; __entry->cqe = wc->wr_cqe;
__entry->byte_len = wc->byte_len;
__entry->status = wc->status; __entry->status = wc->status;
__entry->vendor_err = __entry->status ? wc->vendor_err : 0; if (wc->status) {
__entry->byte_len = 0;
__entry->vendor_err = wc->vendor_err;
} else {
__entry->byte_len = wc->byte_len;
__entry->vendor_err = 0;
}
), ),
TP_printk("rep=%p, %u bytes: %s (%u/0x%x)", TP_printk("cqe=%p %u bytes: %s (%u/0x%x)",
__entry->rep, __entry->byte_len, __entry->cqe, __entry->byte_len,
rdma_show_wc_status(__entry->status), rdma_show_wc_status(__entry->status),
__entry->status, __entry->vendor_err __entry->status, __entry->vendor_err
) )
@ -618,6 +652,7 @@ DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_li);
DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_li_wake); DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_li_wake);
DEFINE_MR_EVENT(xprtrdma_localinv); DEFINE_MR_EVENT(xprtrdma_localinv);
DEFINE_MR_EVENT(xprtrdma_dma_map);
DEFINE_MR_EVENT(xprtrdma_dma_unmap); DEFINE_MR_EVENT(xprtrdma_dma_unmap);
DEFINE_MR_EVENT(xprtrdma_remoteinv); DEFINE_MR_EVENT(xprtrdma_remoteinv);
DEFINE_MR_EVENT(xprtrdma_recover_mr); DEFINE_MR_EVENT(xprtrdma_recover_mr);
@ -801,7 +836,6 @@ TRACE_EVENT(xprtrdma_allocate,
__field(unsigned int, task_id) __field(unsigned int, task_id)
__field(unsigned int, client_id) __field(unsigned int, client_id)
__field(const void *, req) __field(const void *, req)
__field(const void *, rep)
__field(size_t, callsize) __field(size_t, callsize)
__field(size_t, rcvsize) __field(size_t, rcvsize)
), ),
@ -810,15 +844,13 @@ TRACE_EVENT(xprtrdma_allocate,
__entry->task_id = task->tk_pid; __entry->task_id = task->tk_pid;
__entry->client_id = task->tk_client->cl_clid; __entry->client_id = task->tk_client->cl_clid;
__entry->req = req; __entry->req = req;
__entry->rep = req ? req->rl_reply : NULL;
__entry->callsize = task->tk_rqstp->rq_callsize; __entry->callsize = task->tk_rqstp->rq_callsize;
__entry->rcvsize = task->tk_rqstp->rq_rcvsize; __entry->rcvsize = task->tk_rqstp->rq_rcvsize;
), ),
TP_printk("task:%u@%u req=%p rep=%p (%zu, %zu)", TP_printk("task:%u@%u req=%p (%zu, %zu)",
__entry->task_id, __entry->client_id, __entry->task_id, __entry->client_id,
__entry->req, __entry->rep, __entry->req, __entry->callsize, __entry->rcvsize
__entry->callsize, __entry->rcvsize
) )
); );
@ -850,8 +882,6 @@ TRACE_EVENT(xprtrdma_rpc_done,
) )
); );
DEFINE_RXPRT_EVENT(xprtrdma_noreps);
/** /**
** Callback events ** Callback events
**/ **/

View File

@ -1546,6 +1546,7 @@ call_reserveresult(struct rpc_task *task)
task->tk_status = 0; task->tk_status = 0;
if (status >= 0) { if (status >= 0) {
if (task->tk_rqstp) { if (task->tk_rqstp) {
xprt_request_init(task);
task->tk_action = call_refresh; task->tk_action = call_refresh;
return; return;
} }

View File

@ -66,7 +66,7 @@
* Local functions * Local functions
*/ */
static void xprt_init(struct rpc_xprt *xprt, struct net *net); static void xprt_init(struct rpc_xprt *xprt, struct net *net);
static void xprt_request_init(struct rpc_task *, struct rpc_xprt *); static __be32 xprt_alloc_xid(struct rpc_xprt *xprt);
static void xprt_connect_status(struct rpc_task *task); static void xprt_connect_status(struct rpc_task *task);
static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
static void __xprt_put_cong(struct rpc_xprt *, struct rpc_rqst *); static void __xprt_put_cong(struct rpc_xprt *, struct rpc_rqst *);
@ -987,6 +987,8 @@ bool xprt_prepare_transmit(struct rpc_task *task)
task->tk_status = -EAGAIN; task->tk_status = -EAGAIN;
goto out_unlock; goto out_unlock;
} }
if (!bc_prealloc(req) && !req->rq_xmit_bytes_sent)
req->rq_xid = xprt_alloc_xid(xprt);
ret = true; ret = true;
out_unlock: out_unlock:
spin_unlock_bh(&xprt->transport_lock); spin_unlock_bh(&xprt->transport_lock);
@ -1163,10 +1165,10 @@ void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
out_init_req: out_init_req:
xprt->stat.max_slots = max_t(unsigned int, xprt->stat.max_slots, xprt->stat.max_slots = max_t(unsigned int, xprt->stat.max_slots,
xprt->num_reqs); xprt->num_reqs);
spin_unlock(&xprt->reserve_lock);
task->tk_status = 0; task->tk_status = 0;
task->tk_rqstp = req; task->tk_rqstp = req;
xprt_request_init(task, xprt);
spin_unlock(&xprt->reserve_lock);
} }
EXPORT_SYMBOL_GPL(xprt_alloc_slot); EXPORT_SYMBOL_GPL(xprt_alloc_slot);
@ -1184,7 +1186,7 @@ void xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
} }
EXPORT_SYMBOL_GPL(xprt_lock_and_alloc_slot); EXPORT_SYMBOL_GPL(xprt_lock_and_alloc_slot);
static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req) void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
{ {
spin_lock(&xprt->reserve_lock); spin_lock(&xprt->reserve_lock);
if (!xprt_dynamic_free_slot(xprt, req)) { if (!xprt_dynamic_free_slot(xprt, req)) {
@ -1194,6 +1196,7 @@ static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
xprt_wake_up_backlog(xprt); xprt_wake_up_backlog(xprt);
spin_unlock(&xprt->reserve_lock); spin_unlock(&xprt->reserve_lock);
} }
EXPORT_SYMBOL_GPL(xprt_free_slot);
static void xprt_free_all_slots(struct rpc_xprt *xprt) static void xprt_free_all_slots(struct rpc_xprt *xprt)
{ {
@ -1303,8 +1306,9 @@ static inline void xprt_init_xid(struct rpc_xprt *xprt)
xprt->xid = prandom_u32(); xprt->xid = prandom_u32();
} }
static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) void xprt_request_init(struct rpc_task *task)
{ {
struct rpc_xprt *xprt = task->tk_xprt;
struct rpc_rqst *req = task->tk_rqstp; struct rpc_rqst *req = task->tk_rqstp;
INIT_LIST_HEAD(&req->rq_list); INIT_LIST_HEAD(&req->rq_list);
@ -1312,7 +1316,6 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt)
req->rq_task = task; req->rq_task = task;
req->rq_xprt = xprt; req->rq_xprt = xprt;
req->rq_buffer = NULL; req->rq_buffer = NULL;
req->rq_xid = xprt_alloc_xid(xprt);
req->rq_connect_cookie = xprt->connect_cookie - 1; req->rq_connect_cookie = xprt->connect_cookie - 1;
req->rq_bytes_sent = 0; req->rq_bytes_sent = 0;
req->rq_snd_buf.len = 0; req->rq_snd_buf.len = 0;
@ -1373,7 +1376,7 @@ void xprt_release(struct rpc_task *task)
dprintk("RPC: %5u release request %p\n", task->tk_pid, req); dprintk("RPC: %5u release request %p\n", task->tk_pid, req);
if (likely(!bc_prealloc(req))) if (likely(!bc_prealloc(req)))
xprt_free_slot(xprt, req); xprt->ops->free_slot(xprt, req);
else else
xprt_free_bc_request(req); xprt_free_bc_request(req);
} }

View File

@ -31,29 +31,41 @@ static void rpcrdma_bc_free_rqst(struct rpcrdma_xprt *r_xprt,
spin_unlock(&buf->rb_reqslock); spin_unlock(&buf->rb_reqslock);
rpcrdma_destroy_req(req); rpcrdma_destroy_req(req);
kfree(rqst);
} }
static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt, static int rpcrdma_bc_setup_reqs(struct rpcrdma_xprt *r_xprt,
struct rpc_rqst *rqst) unsigned int count)
{ {
struct rpcrdma_regbuf *rb; struct rpc_xprt *xprt = &r_xprt->rx_xprt;
struct rpcrdma_req *req; struct rpc_rqst *rqst;
size_t size; unsigned int i;
req = rpcrdma_create_req(r_xprt); for (i = 0; i < (count << 1); i++) {
if (IS_ERR(req)) struct rpcrdma_regbuf *rb;
return PTR_ERR(req); struct rpcrdma_req *req;
size_t size;
size = r_xprt->rx_data.inline_rsize; req = rpcrdma_create_req(r_xprt);
rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, GFP_KERNEL); if (IS_ERR(req))
if (IS_ERR(rb)) return PTR_ERR(req);
goto out_fail; rqst = &req->rl_slot;
req->rl_sendbuf = rb;
xdr_buf_init(&rqst->rq_snd_buf, rb->rg_base, rqst->rq_xprt = xprt;
min_t(size_t, size, PAGE_SIZE)); INIT_LIST_HEAD(&rqst->rq_list);
rpcrdma_set_xprtdata(rqst, req); INIT_LIST_HEAD(&rqst->rq_bc_list);
__set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
spin_lock_bh(&xprt->bc_pa_lock);
list_add(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
spin_unlock_bh(&xprt->bc_pa_lock);
size = r_xprt->rx_data.inline_rsize;
rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, GFP_KERNEL);
if (IS_ERR(rb))
goto out_fail;
req->rl_sendbuf = rb;
xdr_buf_init(&rqst->rq_snd_buf, rb->rg_base,
min_t(size_t, size, PAGE_SIZE));
}
return 0; return 0;
out_fail: out_fail:
@ -61,23 +73,6 @@ out_fail:
return -ENOMEM; return -ENOMEM;
} }
/* Allocate and add receive buffers to the rpcrdma_buffer's
* existing list of rep's. These are released when the
* transport is destroyed.
*/
static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt,
unsigned int count)
{
int rc = 0;
while (count--) {
rc = rpcrdma_create_rep(r_xprt);
if (rc)
break;
}
return rc;
}
/** /**
* xprt_rdma_bc_setup - Pre-allocate resources for handling backchannel requests * xprt_rdma_bc_setup - Pre-allocate resources for handling backchannel requests
* @xprt: transport associated with these backchannel resources * @xprt: transport associated with these backchannel resources
@ -88,9 +83,6 @@ static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt,
int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs) int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
{ {
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
struct rpcrdma_buffer *buffer = &r_xprt->rx_buf;
struct rpc_rqst *rqst;
unsigned int i;
int rc; int rc;
/* The backchannel reply path returns each rpc_rqst to the /* The backchannel reply path returns each rpc_rqst to the
@ -105,35 +97,11 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
if (reqs > RPCRDMA_BACKWARD_WRS >> 1) if (reqs > RPCRDMA_BACKWARD_WRS >> 1)
goto out_err; goto out_err;
for (i = 0; i < (reqs << 1); i++) { rc = rpcrdma_bc_setup_reqs(r_xprt, reqs);
rqst = kzalloc(sizeof(*rqst), GFP_KERNEL);
if (!rqst)
goto out_free;
dprintk("RPC: %s: new rqst %p\n", __func__, rqst);
rqst->rq_xprt = &r_xprt->rx_xprt;
INIT_LIST_HEAD(&rqst->rq_list);
INIT_LIST_HEAD(&rqst->rq_bc_list);
__set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
if (rpcrdma_bc_setup_rqst(r_xprt, rqst))
goto out_free;
spin_lock_bh(&xprt->bc_pa_lock);
list_add(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
spin_unlock_bh(&xprt->bc_pa_lock);
}
rc = rpcrdma_bc_setup_reps(r_xprt, reqs);
if (rc) if (rc)
goto out_free; goto out_free;
rc = rpcrdma_ep_post_extra_recv(r_xprt, reqs); r_xprt->rx_buf.rb_bc_srv_max_requests = reqs;
if (rc)
goto out_free;
buffer->rb_bc_srv_max_requests = reqs;
request_module("svcrdma"); request_module("svcrdma");
trace_xprtrdma_cb_setup(r_xprt, reqs); trace_xprtrdma_cb_setup(r_xprt, reqs);
return 0; return 0;
@ -237,6 +205,7 @@ int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst)
if (rc < 0) if (rc < 0)
goto failed_marshal; goto failed_marshal;
rpcrdma_post_recvs(r_xprt, true);
if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
goto drop_connection; goto drop_connection;
return 0; return 0;
@ -277,10 +246,14 @@ void xprt_rdma_bc_destroy(struct rpc_xprt *xprt, unsigned int reqs)
*/ */
void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst) void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
{ {
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
struct rpc_xprt *xprt = rqst->rq_xprt; struct rpc_xprt *xprt = rqst->rq_xprt;
dprintk("RPC: %s: freeing rqst %p (req %p)\n", dprintk("RPC: %s: freeing rqst %p (req %p)\n",
__func__, rqst, rpcr_to_rdmar(rqst)); __func__, rqst, req);
rpcrdma_recv_buffer_put(req->rl_reply);
req->rl_reply = NULL;
spin_lock_bh(&xprt->bc_pa_lock); spin_lock_bh(&xprt->bc_pa_lock);
list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list); list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);

View File

@ -159,10 +159,32 @@ out_release:
fmr_op_release_mr(mr); fmr_op_release_mr(mr);
} }
/* On success, sets:
* ep->rep_attr.cap.max_send_wr
* ep->rep_attr.cap.max_recv_wr
* cdata->max_requests
* ia->ri_max_segs
*/
static int static int
fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
struct rpcrdma_create_data_internal *cdata) struct rpcrdma_create_data_internal *cdata)
{ {
int max_qp_wr;
max_qp_wr = ia->ri_device->attrs.max_qp_wr;
max_qp_wr -= RPCRDMA_BACKWARD_WRS;
max_qp_wr -= 1;
if (max_qp_wr < RPCRDMA_MIN_SLOT_TABLE)
return -ENOMEM;
if (cdata->max_requests > max_qp_wr)
cdata->max_requests = max_qp_wr;
ep->rep_attr.cap.max_send_wr = cdata->max_requests;
ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;
ep->rep_attr.cap.max_send_wr += 1; /* for ib_drain_sq */
ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
ep->rep_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */
ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS / ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
RPCRDMA_MAX_FMR_SGES); RPCRDMA_MAX_FMR_SGES);
return 0; return 0;
@ -222,6 +244,7 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
mr->mr_sg, i, mr->mr_dir); mr->mr_sg, i, mr->mr_dir);
if (!mr->mr_nents) if (!mr->mr_nents)
goto out_dmamap_err; goto out_dmamap_err;
trace_xprtrdma_dma_map(mr);
for (i = 0, dma_pages = mr->fmr.fm_physaddrs; i < mr->mr_nents; i++) for (i = 0, dma_pages = mr->fmr.fm_physaddrs; i < mr->mr_nents; i++)
dma_pages[i] = sg_dma_address(&mr->mr_sg[i]); dma_pages[i] = sg_dma_address(&mr->mr_sg[i]);

View File

@ -204,12 +204,22 @@ out_release:
frwr_op_release_mr(mr); frwr_op_release_mr(mr);
} }
/* On success, sets:
* ep->rep_attr.cap.max_send_wr
* ep->rep_attr.cap.max_recv_wr
* cdata->max_requests
* ia->ri_max_segs
*
* And these FRWR-related fields:
* ia->ri_max_frwr_depth
* ia->ri_mrtype
*/
static int static int
frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
struct rpcrdma_create_data_internal *cdata) struct rpcrdma_create_data_internal *cdata)
{ {
struct ib_device_attr *attrs = &ia->ri_device->attrs; struct ib_device_attr *attrs = &ia->ri_device->attrs;
int depth, delta; int max_qp_wr, depth, delta;
ia->ri_mrtype = IB_MR_TYPE_MEM_REG; ia->ri_mrtype = IB_MR_TYPE_MEM_REG;
if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG) if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG)
@ -243,14 +253,26 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
} while (delta > 0); } while (delta > 0);
} }
ep->rep_attr.cap.max_send_wr *= depth; max_qp_wr = ia->ri_device->attrs.max_qp_wr;
if (ep->rep_attr.cap.max_send_wr > attrs->max_qp_wr) { max_qp_wr -= RPCRDMA_BACKWARD_WRS;
cdata->max_requests = attrs->max_qp_wr / depth; max_qp_wr -= 1;
if (max_qp_wr < RPCRDMA_MIN_SLOT_TABLE)
return -ENOMEM;
if (cdata->max_requests > max_qp_wr)
cdata->max_requests = max_qp_wr;
ep->rep_attr.cap.max_send_wr = cdata->max_requests * depth;
if (ep->rep_attr.cap.max_send_wr > max_qp_wr) {
cdata->max_requests = max_qp_wr / depth;
if (!cdata->max_requests) if (!cdata->max_requests)
return -EINVAL; return -EINVAL;
ep->rep_attr.cap.max_send_wr = cdata->max_requests * ep->rep_attr.cap.max_send_wr = cdata->max_requests *
depth; depth;
} }
ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;
ep->rep_attr.cap.max_send_wr += 1; /* for ib_drain_sq */
ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
ep->rep_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */
ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS / ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
ia->ri_max_frwr_depth); ia->ri_max_frwr_depth);
@ -395,6 +417,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir); mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir);
if (!mr->mr_nents) if (!mr->mr_nents)
goto out_dmamap_err; goto out_dmamap_err;
trace_xprtrdma_dma_map(mr);
ibmr = frwr->fr_mr; ibmr = frwr->fr_mr;
n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE); n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE);

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/* /*
* Copyright (c) 2015, 2017 Oracle. All rights reserved. * Copyright (c) 2015, 2017 Oracle. All rights reserved.
*/ */

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/* /*
* Copyright (c) 2014-2017 Oracle. All rights reserved. * Copyright (c) 2014-2017 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
@ -57,14 +58,6 @@
# define RPCDBG_FACILITY RPCDBG_TRANS # define RPCDBG_FACILITY RPCDBG_TRANS
#endif #endif
static const char transfertypes[][12] = {
"inline", /* no chunks */
"read list", /* some argument via rdma read */
"*read list", /* entire request via rdma read */
"write list", /* some result via rdma write */
"reply chunk" /* entire reply via rdma write */
};
/* Returns size of largest RPC-over-RDMA header in a Call message /* Returns size of largest RPC-over-RDMA header in a Call message
* *
* The largest Call header contains a full-size Read list and a * The largest Call header contains a full-size Read list and a
@ -233,7 +226,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
*/ */
*ppages = alloc_page(GFP_ATOMIC); *ppages = alloc_page(GFP_ATOMIC);
if (!*ppages) if (!*ppages)
return -EAGAIN; return -ENOBUFS;
} }
seg->mr_page = *ppages; seg->mr_page = *ppages;
seg->mr_offset = (char *)page_base; seg->mr_offset = (char *)page_base;
@ -368,7 +361,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
false, &mr); false, &mr);
if (IS_ERR(seg)) if (IS_ERR(seg))
goto out_maperr; return PTR_ERR(seg);
rpcrdma_mr_push(mr, &req->rl_registered); rpcrdma_mr_push(mr, &req->rl_registered);
if (encode_read_segment(xdr, mr, pos) < 0) if (encode_read_segment(xdr, mr, pos) < 0)
@ -380,11 +373,6 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
} while (nsegs); } while (nsegs);
return 0; return 0;
out_maperr:
if (PTR_ERR(seg) == -EAGAIN)
xprt_wait_for_buffer_space(rqst->rq_task, NULL);
return PTR_ERR(seg);
} }
/* Register and XDR encode the Write list. Supports encoding a list /* Register and XDR encode the Write list. Supports encoding a list
@ -431,7 +419,7 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
true, &mr); true, &mr);
if (IS_ERR(seg)) if (IS_ERR(seg))
goto out_maperr; return PTR_ERR(seg);
rpcrdma_mr_push(mr, &req->rl_registered); rpcrdma_mr_push(mr, &req->rl_registered);
if (encode_rdma_segment(xdr, mr) < 0) if (encode_rdma_segment(xdr, mr) < 0)
@ -448,11 +436,6 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
*segcount = cpu_to_be32(nchunks); *segcount = cpu_to_be32(nchunks);
return 0; return 0;
out_maperr:
if (PTR_ERR(seg) == -EAGAIN)
xprt_wait_for_buffer_space(rqst->rq_task, NULL);
return PTR_ERR(seg);
} }
/* Register and XDR encode the Reply chunk. Supports encoding an array /* Register and XDR encode the Reply chunk. Supports encoding an array
@ -494,7 +477,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
true, &mr); true, &mr);
if (IS_ERR(seg)) if (IS_ERR(seg))
goto out_maperr; return PTR_ERR(seg);
rpcrdma_mr_push(mr, &req->rl_registered); rpcrdma_mr_push(mr, &req->rl_registered);
if (encode_rdma_segment(xdr, mr) < 0) if (encode_rdma_segment(xdr, mr) < 0)
@ -511,11 +494,6 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
*segcount = cpu_to_be32(nchunks); *segcount = cpu_to_be32(nchunks);
return 0; return 0;
out_maperr:
if (PTR_ERR(seg) == -EAGAIN)
xprt_wait_for_buffer_space(rqst->rq_task, NULL);
return PTR_ERR(seg);
} }
/** /**
@ -712,7 +690,7 @@ rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
{ {
req->rl_sendctx = rpcrdma_sendctx_get_locked(&r_xprt->rx_buf); req->rl_sendctx = rpcrdma_sendctx_get_locked(&r_xprt->rx_buf);
if (!req->rl_sendctx) if (!req->rl_sendctx)
return -ENOBUFS; return -EAGAIN;
req->rl_sendctx->sc_wr.num_sge = 0; req->rl_sendctx->sc_wr.num_sge = 0;
req->rl_sendctx->sc_unmap_count = 0; req->rl_sendctx->sc_unmap_count = 0;
req->rl_sendctx->sc_req = req; req->rl_sendctx->sc_req = req;
@ -886,7 +864,15 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
return 0; return 0;
out_err: out_err:
r_xprt->rx_stats.failed_marshal_count++; switch (ret) {
case -EAGAIN:
xprt_wait_for_buffer_space(rqst->rq_task, NULL);
break;
case -ENOBUFS:
break;
default:
r_xprt->rx_stats.failed_marshal_count++;
}
return ret; return ret;
} }
@ -1029,8 +1015,6 @@ rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
out_short: out_short:
pr_warn("RPC/RDMA short backward direction call\n"); pr_warn("RPC/RDMA short backward direction call\n");
if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, rep))
xprt_disconnect_done(&r_xprt->rx_xprt);
return true; return true;
} }
#else /* CONFIG_SUNRPC_BACKCHANNEL */ #else /* CONFIG_SUNRPC_BACKCHANNEL */
@ -1336,13 +1320,14 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
u32 credits; u32 credits;
__be32 *p; __be32 *p;
--buf->rb_posted_receives;
if (rep->rr_hdrbuf.head[0].iov_len == 0) if (rep->rr_hdrbuf.head[0].iov_len == 0)
goto out_badstatus; goto out_badstatus;
/* Fixed transport header fields */
xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf, xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf,
rep->rr_hdrbuf.head[0].iov_base); rep->rr_hdrbuf.head[0].iov_base);
/* Fixed transport header fields */
p = xdr_inline_decode(&rep->rr_stream, 4 * sizeof(*p)); p = xdr_inline_decode(&rep->rr_stream, 4 * sizeof(*p));
if (unlikely(!p)) if (unlikely(!p))
goto out_shortreply; goto out_shortreply;
@ -1381,17 +1366,10 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
trace_xprtrdma_reply(rqst->rq_task, rep, req, credits); trace_xprtrdma_reply(rqst->rq_task, rep, req, credits);
rpcrdma_post_recvs(r_xprt, false);
queue_work(rpcrdma_receive_wq, &rep->rr_work); queue_work(rpcrdma_receive_wq, &rep->rr_work);
return; return;
out_badstatus:
rpcrdma_recv_buffer_put(rep);
if (r_xprt->rx_ep.rep_connected == 1) {
r_xprt->rx_ep.rep_connected = -EIO;
rpcrdma_conn_func(&r_xprt->rx_ep);
}
return;
out_badversion: out_badversion:
trace_xprtrdma_reply_vers(rep); trace_xprtrdma_reply_vers(rep);
goto repost; goto repost;
@ -1411,7 +1389,7 @@ out_shortreply:
* receive buffer before returning. * receive buffer before returning.
*/ */
repost: repost:
r_xprt->rx_stats.bad_reply_count++; rpcrdma_post_recvs(r_xprt, false);
if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, rep)) out_badstatus:
rpcrdma_recv_buffer_put(rep); rpcrdma_recv_buffer_put(rep);
} }

View File

@ -263,6 +263,7 @@ static const struct rpc_xprt_ops xprt_rdma_bc_procs = {
.reserve_xprt = xprt_reserve_xprt_cong, .reserve_xprt = xprt_reserve_xprt_cong,
.release_xprt = xprt_release_xprt_cong, .release_xprt = xprt_release_xprt_cong,
.alloc_slot = xprt_alloc_slot, .alloc_slot = xprt_alloc_slot,
.free_slot = xprt_free_slot,
.release_request = xprt_release_rqst_cong, .release_request = xprt_release_rqst_cong,
.buf_alloc = xprt_rdma_bc_allocate, .buf_alloc = xprt_rdma_bc_allocate,
.buf_free = xprt_rdma_bc_free, .buf_free = xprt_rdma_bc_free,

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/* /*
* Copyright (c) 2014-2017 Oracle. All rights reserved. * Copyright (c) 2014-2017 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
@ -334,9 +335,7 @@ xprt_setup_rdma(struct xprt_create *args)
return ERR_PTR(-EBADF); return ERR_PTR(-EBADF);
} }
xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), 0, 0);
xprt_rdma_slot_table_entries,
xprt_rdma_slot_table_entries);
if (xprt == NULL) { if (xprt == NULL) {
dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n", dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n",
__func__); __func__);
@ -368,7 +367,7 @@ xprt_setup_rdma(struct xprt_create *args)
xprt_set_bound(xprt); xprt_set_bound(xprt);
xprt_rdma_format_addresses(xprt, sap); xprt_rdma_format_addresses(xprt, sap);
cdata.max_requests = xprt->max_reqs; cdata.max_requests = xprt_rdma_slot_table_entries;
cdata.rsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA write max */ cdata.rsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA write max */
cdata.wsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA read max */ cdata.wsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA read max */
@ -541,6 +540,47 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
} }
} }
/**
* xprt_rdma_alloc_slot - allocate an rpc_rqst
* @xprt: controlling RPC transport
* @task: RPC task requesting a fresh rpc_rqst
*
* tk_status values:
* %0 if task->tk_rqstp points to a fresh rpc_rqst
* %-EAGAIN if no rpc_rqst is available; queued on backlog
*/
static void
xprt_rdma_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
struct rpcrdma_req *req;
req = rpcrdma_buffer_get(&r_xprt->rx_buf);
if (!req)
goto out_sleep;
task->tk_rqstp = &req->rl_slot;
task->tk_status = 0;
return;
out_sleep:
rpc_sleep_on(&xprt->backlog, task, NULL);
task->tk_status = -EAGAIN;
}
/**
* xprt_rdma_free_slot - release an rpc_rqst
* @xprt: controlling RPC transport
* @rqst: rpc_rqst to release
*
*/
static void
xprt_rdma_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *rqst)
{
memset(rqst, 0, sizeof(*rqst));
rpcrdma_buffer_put(rpcr_to_rdmar(rqst));
rpc_wake_up_next(&xprt->backlog);
}
static bool static bool
rpcrdma_get_sendbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, rpcrdma_get_sendbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
size_t size, gfp_t flags) size_t size, gfp_t flags)
@ -611,13 +651,9 @@ xprt_rdma_allocate(struct rpc_task *task)
{ {
struct rpc_rqst *rqst = task->tk_rqstp; struct rpc_rqst *rqst = task->tk_rqstp;
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
struct rpcrdma_req *req; struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
gfp_t flags; gfp_t flags;
req = rpcrdma_buffer_get(&r_xprt->rx_buf);
if (req == NULL)
goto out_get;
flags = RPCRDMA_DEF_GFP; flags = RPCRDMA_DEF_GFP;
if (RPC_IS_SWAPPER(task)) if (RPC_IS_SWAPPER(task))
flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN; flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
@ -627,15 +663,12 @@ xprt_rdma_allocate(struct rpc_task *task)
if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags)) if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags))
goto out_fail; goto out_fail;
rpcrdma_set_xprtdata(rqst, req);
rqst->rq_buffer = req->rl_sendbuf->rg_base; rqst->rq_buffer = req->rl_sendbuf->rg_base;
rqst->rq_rbuffer = req->rl_recvbuf->rg_base; rqst->rq_rbuffer = req->rl_recvbuf->rg_base;
trace_xprtrdma_allocate(task, req); trace_xprtrdma_allocate(task, req);
return 0; return 0;
out_fail: out_fail:
rpcrdma_buffer_put(req);
out_get:
trace_xprtrdma_allocate(task, NULL); trace_xprtrdma_allocate(task, NULL);
return -ENOMEM; return -ENOMEM;
} }
@ -656,7 +689,6 @@ xprt_rdma_free(struct rpc_task *task)
if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags)) if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags))
rpcrdma_release_rqst(r_xprt, req); rpcrdma_release_rqst(r_xprt, req);
trace_xprtrdma_rpc_done(task, req); trace_xprtrdma_rpc_done(task, req);
rpcrdma_buffer_put(req);
} }
/** /**
@ -694,9 +726,6 @@ xprt_rdma_send_request(struct rpc_task *task)
if (rc < 0) if (rc < 0)
goto failed_marshal; goto failed_marshal;
if (req->rl_reply == NULL) /* e.g. reconnection */
rpcrdma_recv_buffer_get(req);
/* Must suppress retransmit to maintain credits */ /* Must suppress retransmit to maintain credits */
if (rqst->rq_connect_cookie == xprt->connect_cookie) if (rqst->rq_connect_cookie == xprt->connect_cookie)
goto drop_connection; goto drop_connection;
@ -783,7 +812,8 @@ xprt_rdma_disable_swap(struct rpc_xprt *xprt)
static const struct rpc_xprt_ops xprt_rdma_procs = { static const struct rpc_xprt_ops xprt_rdma_procs = {
.reserve_xprt = xprt_reserve_xprt_cong, .reserve_xprt = xprt_reserve_xprt_cong,
.release_xprt = xprt_release_xprt_cong, /* sunrpc/xprt.c */ .release_xprt = xprt_release_xprt_cong, /* sunrpc/xprt.c */
.alloc_slot = xprt_alloc_slot, .alloc_slot = xprt_rdma_alloc_slot,
.free_slot = xprt_rdma_free_slot,
.release_request = xprt_release_rqst_cong, /* ditto */ .release_request = xprt_release_rqst_cong, /* ditto */
.set_retrans_timeout = xprt_set_retrans_timeout_def, /* ditto */ .set_retrans_timeout = xprt_set_retrans_timeout_def, /* ditto */
.timer = xprt_rdma_timer, .timer = xprt_rdma_timer,

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/* /*
* Copyright (c) 2014-2017 Oracle. All rights reserved. * Copyright (c) 2014-2017 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
@ -72,8 +73,10 @@
/* /*
* internal functions * internal functions
*/ */
static void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc);
static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt); static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf); static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf);
static int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp);
static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb); static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb);
struct workqueue_struct *rpcrdma_receive_wq __read_mostly; struct workqueue_struct *rpcrdma_receive_wq __read_mostly;
@ -160,7 +163,7 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
rr_cqe); rr_cqe);
/* WARNING: Only wr_id and status are reliable at this point */ /* WARNING: Only wr_id and status are reliable at this point */
trace_xprtrdma_wc_receive(rep, wc); trace_xprtrdma_wc_receive(wc);
if (wc->status != IB_WC_SUCCESS) if (wc->status != IB_WC_SUCCESS)
goto out_fail; goto out_fail;
@ -232,7 +235,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
complete(&ia->ri_done); complete(&ia->ri_done);
break; break;
case RDMA_CM_EVENT_ADDR_ERROR: case RDMA_CM_EVENT_ADDR_ERROR:
ia->ri_async_rc = -EHOSTUNREACH; ia->ri_async_rc = -EPROTO;
complete(&ia->ri_done); complete(&ia->ri_done);
break; break;
case RDMA_CM_EVENT_ROUTE_ERROR: case RDMA_CM_EVENT_ROUTE_ERROR:
@ -263,7 +266,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
connstate = -ENOTCONN; connstate = -ENOTCONN;
goto connected; goto connected;
case RDMA_CM_EVENT_UNREACHABLE: case RDMA_CM_EVENT_UNREACHABLE:
connstate = -ENETDOWN; connstate = -ENETUNREACH;
goto connected; goto connected;
case RDMA_CM_EVENT_REJECTED: case RDMA_CM_EVENT_REJECTED:
dprintk("rpcrdma: connection to %s:%s rejected: %s\n", dprintk("rpcrdma: connection to %s:%s rejected: %s\n",
@ -306,8 +309,8 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia)
init_completion(&ia->ri_done); init_completion(&ia->ri_done);
init_completion(&ia->ri_remove_done); init_completion(&ia->ri_remove_done);
id = rdma_create_id(&init_net, rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, id = rdma_create_id(xprt->rx_xprt.xprt_net, rpcrdma_conn_upcall,
IB_QPT_RC); xprt, RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(id)) { if (IS_ERR(id)) {
rc = PTR_ERR(id); rc = PTR_ERR(id);
dprintk("RPC: %s: rdma_create_id() failed %i\n", dprintk("RPC: %s: rdma_create_id() failed %i\n",
@ -501,8 +504,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
struct rpcrdma_create_data_internal *cdata) struct rpcrdma_create_data_internal *cdata)
{ {
struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private; struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private;
unsigned int max_qp_wr, max_sge;
struct ib_cq *sendcq, *recvcq; struct ib_cq *sendcq, *recvcq;
unsigned int max_sge;
int rc; int rc;
max_sge = min_t(unsigned int, ia->ri_device->attrs.max_sge, max_sge = min_t(unsigned int, ia->ri_device->attrs.max_sge,
@ -513,29 +516,13 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
} }
ia->ri_max_send_sges = max_sge; ia->ri_max_send_sges = max_sge;
if (ia->ri_device->attrs.max_qp_wr <= RPCRDMA_BACKWARD_WRS) { rc = ia->ri_ops->ro_open(ia, ep, cdata);
dprintk("RPC: %s: insufficient wqe's available\n", if (rc)
__func__); return rc;
return -ENOMEM;
}
max_qp_wr = ia->ri_device->attrs.max_qp_wr - RPCRDMA_BACKWARD_WRS - 1;
/* check provider's send/recv wr limits */
if (cdata->max_requests > max_qp_wr)
cdata->max_requests = max_qp_wr;
ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall; ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
ep->rep_attr.qp_context = ep; ep->rep_attr.qp_context = ep;
ep->rep_attr.srq = NULL; ep->rep_attr.srq = NULL;
ep->rep_attr.cap.max_send_wr = cdata->max_requests;
ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;
ep->rep_attr.cap.max_send_wr += 1; /* drain cqe */
rc = ia->ri_ops->ro_open(ia, ep, cdata);
if (rc)
return rc;
ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
ep->rep_attr.cap.max_recv_wr += 1; /* drain cqe */
ep->rep_attr.cap.max_send_sge = max_sge; ep->rep_attr.cap.max_send_sge = max_sge;
ep->rep_attr.cap.max_recv_sge = 1; ep->rep_attr.cap.max_recv_sge = 1;
ep->rep_attr.cap.max_inline_data = 0; ep->rep_attr.cap.max_inline_data = 0;
@ -742,7 +729,6 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
{ {
struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt, struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt,
rx_ia); rx_ia);
unsigned int extras;
int rc; int rc;
retry: retry:
@ -786,9 +772,8 @@ retry:
} }
dprintk("RPC: %s: connected\n", __func__); dprintk("RPC: %s: connected\n", __func__);
extras = r_xprt->rx_buf.rb_bc_srv_max_requests;
if (extras) rpcrdma_post_recvs(r_xprt, true);
rpcrdma_ep_post_extra_recv(r_xprt, extras);
out: out:
if (rc) if (rc)
@ -894,6 +879,7 @@ static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt)
sc->sc_xprt = r_xprt; sc->sc_xprt = r_xprt;
buf->rb_sc_ctxs[i] = sc; buf->rb_sc_ctxs[i] = sc;
} }
buf->rb_flags = 0;
return 0; return 0;
@ -951,7 +937,7 @@ out_emptyq:
* completions recently. This is a sign the Send Queue is * completions recently. This is a sign the Send Queue is
* backing up. Cause the caller to pause and try again. * backing up. Cause the caller to pause and try again.
*/ */
dprintk("RPC: %s: empty sendctx queue\n", __func__); set_bit(RPCRDMA_BUF_F_EMPTY_SCQ, &buf->rb_flags);
r_xprt = container_of(buf, struct rpcrdma_xprt, rx_buf); r_xprt = container_of(buf, struct rpcrdma_xprt, rx_buf);
r_xprt->rx_stats.empty_sendctx_q++; r_xprt->rx_stats.empty_sendctx_q++;
return NULL; return NULL;
@ -966,7 +952,8 @@ out_emptyq:
* *
* The caller serializes calls to this function (per rpcrdma_buffer). * The caller serializes calls to this function (per rpcrdma_buffer).
*/ */
void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc) static void
rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc)
{ {
struct rpcrdma_buffer *buf = &sc->sc_xprt->rx_buf; struct rpcrdma_buffer *buf = &sc->sc_xprt->rx_buf;
unsigned long next_tail; unsigned long next_tail;
@ -985,6 +972,11 @@ void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc)
/* Paired with READ_ONCE */ /* Paired with READ_ONCE */
smp_store_release(&buf->rb_sc_tail, next_tail); smp_store_release(&buf->rb_sc_tail, next_tail);
if (test_and_clear_bit(RPCRDMA_BUF_F_EMPTY_SCQ, &buf->rb_flags)) {
smp_mb__after_atomic();
xprt_write_space(&sc->sc_xprt->rx_xprt);
}
} }
static void static void
@ -1098,14 +1090,8 @@ rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
return req; return req;
} }
/** static int
* rpcrdma_create_rep - Allocate an rpcrdma_rep object rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp)
* @r_xprt: controlling transport
*
* Returns 0 on success or a negative errno on failure.
*/
int
rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
{ {
struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
struct rpcrdma_buffer *buf = &r_xprt->rx_buf; struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
@ -1133,6 +1119,7 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
rep->rr_recv_wr.wr_cqe = &rep->rr_cqe; rep->rr_recv_wr.wr_cqe = &rep->rr_cqe;
rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
rep->rr_recv_wr.num_sge = 1; rep->rr_recv_wr.num_sge = 1;
rep->rr_temp = temp;
spin_lock(&buf->rb_lock); spin_lock(&buf->rb_lock);
list_add(&rep->rr_list, &buf->rb_recv_bufs); list_add(&rep->rr_list, &buf->rb_recv_bufs);
@ -1184,12 +1171,8 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
list_add(&req->rl_list, &buf->rb_send_bufs); list_add(&req->rl_list, &buf->rb_send_bufs);
} }
buf->rb_posted_receives = 0;
INIT_LIST_HEAD(&buf->rb_recv_bufs); INIT_LIST_HEAD(&buf->rb_recv_bufs);
for (i = 0; i <= buf->rb_max_requests; i++) {
rc = rpcrdma_create_rep(r_xprt);
if (rc)
goto out;
}
rc = rpcrdma_sendctxs_create(r_xprt); rc = rpcrdma_sendctxs_create(r_xprt);
if (rc) if (rc)
@ -1201,28 +1184,6 @@ out:
return rc; return rc;
} }
static struct rpcrdma_req *
rpcrdma_buffer_get_req_locked(struct rpcrdma_buffer *buf)
{
struct rpcrdma_req *req;
req = list_first_entry(&buf->rb_send_bufs,
struct rpcrdma_req, rl_list);
list_del_init(&req->rl_list);
return req;
}
static struct rpcrdma_rep *
rpcrdma_buffer_get_rep_locked(struct rpcrdma_buffer *buf)
{
struct rpcrdma_rep *rep;
rep = list_first_entry(&buf->rb_recv_bufs,
struct rpcrdma_rep, rr_list);
list_del(&rep->rr_list);
return rep;
}
static void static void
rpcrdma_destroy_rep(struct rpcrdma_rep *rep) rpcrdma_destroy_rep(struct rpcrdma_rep *rep)
{ {
@ -1281,10 +1242,11 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
while (!list_empty(&buf->rb_recv_bufs)) { while (!list_empty(&buf->rb_recv_bufs)) {
struct rpcrdma_rep *rep; struct rpcrdma_rep *rep;
rep = rpcrdma_buffer_get_rep_locked(buf); rep = list_first_entry(&buf->rb_recv_bufs,
struct rpcrdma_rep, rr_list);
list_del(&rep->rr_list);
rpcrdma_destroy_rep(rep); rpcrdma_destroy_rep(rep);
} }
buf->rb_send_count = 0;
spin_lock(&buf->rb_reqslock); spin_lock(&buf->rb_reqslock);
while (!list_empty(&buf->rb_allreqs)) { while (!list_empty(&buf->rb_allreqs)) {
@ -1299,7 +1261,6 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
spin_lock(&buf->rb_reqslock); spin_lock(&buf->rb_reqslock);
} }
spin_unlock(&buf->rb_reqslock); spin_unlock(&buf->rb_reqslock);
buf->rb_recv_count = 0;
rpcrdma_mrs_destroy(buf); rpcrdma_mrs_destroy(buf);
} }
@ -1372,27 +1333,11 @@ rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr)
__rpcrdma_mr_put(&r_xprt->rx_buf, mr); __rpcrdma_mr_put(&r_xprt->rx_buf, mr);
} }
static struct rpcrdma_rep * /**
rpcrdma_buffer_get_rep(struct rpcrdma_buffer *buffers) * rpcrdma_buffer_get - Get a request buffer
{ * @buffers: Buffer pool from which to obtain a buffer
/* If an RPC previously completed without a reply (say, a
* credential problem or a soft timeout occurs) then hold off
* on supplying more Receive buffers until the number of new
* pending RPCs catches up to the number of posted Receives.
*/
if (unlikely(buffers->rb_send_count < buffers->rb_recv_count))
return NULL;
if (unlikely(list_empty(&buffers->rb_recv_bufs)))
return NULL;
buffers->rb_recv_count++;
return rpcrdma_buffer_get_rep_locked(buffers);
}
/*
* Get a set of request/reply buffers.
* *
* Reply buffer (if available) is attached to send buffer upon return. * Returns a fresh rpcrdma_req, or NULL if none are available.
*/ */
struct rpcrdma_req * struct rpcrdma_req *
rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
@ -1400,23 +1345,18 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
struct rpcrdma_req *req; struct rpcrdma_req *req;
spin_lock(&buffers->rb_lock); spin_lock(&buffers->rb_lock);
if (list_empty(&buffers->rb_send_bufs)) req = list_first_entry_or_null(&buffers->rb_send_bufs,
goto out_reqbuf; struct rpcrdma_req, rl_list);
buffers->rb_send_count++; if (req)
req = rpcrdma_buffer_get_req_locked(buffers); list_del_init(&req->rl_list);
req->rl_reply = rpcrdma_buffer_get_rep(buffers);
spin_unlock(&buffers->rb_lock); spin_unlock(&buffers->rb_lock);
return req; return req;
out_reqbuf:
spin_unlock(&buffers->rb_lock);
return NULL;
} }
/* /**
* Put request/reply buffers back into pool. * rpcrdma_buffer_put - Put request/reply buffers back into pool
* Pre-decrement counter/array index. * @req: object to return
*
*/ */
void void
rpcrdma_buffer_put(struct rpcrdma_req *req) rpcrdma_buffer_put(struct rpcrdma_req *req)
@ -1427,27 +1367,16 @@ rpcrdma_buffer_put(struct rpcrdma_req *req)
req->rl_reply = NULL; req->rl_reply = NULL;
spin_lock(&buffers->rb_lock); spin_lock(&buffers->rb_lock);
buffers->rb_send_count--; list_add(&req->rl_list, &buffers->rb_send_bufs);
list_add_tail(&req->rl_list, &buffers->rb_send_bufs);
if (rep) { if (rep) {
buffers->rb_recv_count--; if (!rep->rr_temp) {
list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs); list_add(&rep->rr_list, &buffers->rb_recv_bufs);
rep = NULL;
}
} }
spin_unlock(&buffers->rb_lock); spin_unlock(&buffers->rb_lock);
} if (rep)
rpcrdma_destroy_rep(rep);
/*
* Recover reply buffers from pool.
* This happens when recovering from disconnect.
*/
void
rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
{
struct rpcrdma_buffer *buffers = req->rl_buffer;
spin_lock(&buffers->rb_lock);
req->rl_reply = rpcrdma_buffer_get_rep(buffers);
spin_unlock(&buffers->rb_lock);
} }
/* /*
@ -1459,10 +1388,13 @@ rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
{ {
struct rpcrdma_buffer *buffers = &rep->rr_rxprt->rx_buf; struct rpcrdma_buffer *buffers = &rep->rr_rxprt->rx_buf;
spin_lock(&buffers->rb_lock); if (!rep->rr_temp) {
buffers->rb_recv_count--; spin_lock(&buffers->rb_lock);
list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs); list_add(&rep->rr_list, &buffers->rb_recv_bufs);
spin_unlock(&buffers->rb_lock); spin_unlock(&buffers->rb_lock);
} else {
rpcrdma_destroy_rep(rep);
}
} }
/** /**
@ -1558,13 +1490,6 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
struct ib_send_wr *send_wr = &req->rl_sendctx->sc_wr; struct ib_send_wr *send_wr = &req->rl_sendctx->sc_wr;
int rc; int rc;
if (req->rl_reply) {
rc = rpcrdma_ep_post_recv(ia, req->rl_reply);
if (rc)
return rc;
req->rl_reply = NULL;
}
if (!ep->rep_send_count || if (!ep->rep_send_count ||
test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) { test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) {
send_wr->send_flags |= IB_SEND_SIGNALED; send_wr->send_flags |= IB_SEND_SIGNALED;
@ -1581,61 +1506,69 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
return 0; return 0;
} }
int
rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
struct rpcrdma_rep *rep)
{
struct ib_recv_wr *recv_wr_fail;
int rc;
if (!rpcrdma_dma_map_regbuf(ia, rep->rr_rdmabuf))
goto out_map;
rc = ib_post_recv(ia->ri_id->qp, &rep->rr_recv_wr, &recv_wr_fail);
trace_xprtrdma_post_recv(rep, rc);
if (rc)
return -ENOTCONN;
return 0;
out_map:
pr_err("rpcrdma: failed to DMA map the Receive buffer\n");
return -EIO;
}
/** /**
* rpcrdma_ep_post_extra_recv - Post buffers for incoming backchannel requests * rpcrdma_post_recvs - Maybe post some Receive buffers
* @r_xprt: transport associated with these backchannel resources * @r_xprt: controlling transport
* @count: minimum number of incoming requests expected * @temp: when true, allocate temp rpcrdma_rep objects
* *
* Returns zero if all requested buffers were posted, or a negative errno.
*/ */
int void
rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count) rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
{ {
struct rpcrdma_buffer *buffers = &r_xprt->rx_buf; struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct ib_recv_wr *wr, *bad_wr;
struct rpcrdma_rep *rep; int needed, count, rc;
int rc;
while (count--) { needed = buf->rb_credits + (buf->rb_bc_srv_max_requests << 1);
spin_lock(&buffers->rb_lock); if (buf->rb_posted_receives > needed)
if (list_empty(&buffers->rb_recv_bufs)) return;
goto out_reqbuf; needed -= buf->rb_posted_receives;
rep = rpcrdma_buffer_get_rep_locked(buffers);
spin_unlock(&buffers->rb_lock);
rc = rpcrdma_ep_post_recv(ia, rep); count = 0;
if (rc) wr = NULL;
goto out_rc; while (needed) {
struct rpcrdma_regbuf *rb;
struct rpcrdma_rep *rep;
spin_lock(&buf->rb_lock);
rep = list_first_entry_or_null(&buf->rb_recv_bufs,
struct rpcrdma_rep, rr_list);
if (likely(rep))
list_del(&rep->rr_list);
spin_unlock(&buf->rb_lock);
if (!rep) {
if (rpcrdma_create_rep(r_xprt, temp))
break;
continue;
}
rb = rep->rr_rdmabuf;
if (!rpcrdma_regbuf_is_mapped(rb)) {
if (!__rpcrdma_dma_map_regbuf(&r_xprt->rx_ia, rb)) {
rpcrdma_recv_buffer_put(rep);
break;
}
}
trace_xprtrdma_post_recv(rep->rr_recv_wr.wr_cqe);
rep->rr_recv_wr.next = wr;
wr = &rep->rr_recv_wr;
++count;
--needed;
} }
if (!count)
return;
return 0; rc = ib_post_recv(r_xprt->rx_ia.ri_id->qp, wr, &bad_wr);
if (rc) {
for (wr = bad_wr; wr; wr = wr->next) {
struct rpcrdma_rep *rep;
out_reqbuf: rep = container_of(wr, struct rpcrdma_rep, rr_recv_wr);
spin_unlock(&buffers->rb_lock); rpcrdma_recv_buffer_put(rep);
trace_xprtrdma_noreps(r_xprt); --count;
return -ENOMEM; }
}
out_rc: buf->rb_posted_receives += count;
rpcrdma_recv_buffer_put(rep); trace_xprtrdma_post_recvs(r_xprt, count, rc);
return rc;
} }

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/* /*
* Copyright (c) 2014-2017 Oracle. All rights reserved. * Copyright (c) 2014-2017 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
@ -196,6 +197,7 @@ struct rpcrdma_rep {
__be32 rr_proc; __be32 rr_proc;
int rr_wc_flags; int rr_wc_flags;
u32 rr_inv_rkey; u32 rr_inv_rkey;
bool rr_temp;
struct rpcrdma_regbuf *rr_rdmabuf; struct rpcrdma_regbuf *rr_rdmabuf;
struct rpcrdma_xprt *rr_rxprt; struct rpcrdma_xprt *rr_rxprt;
struct work_struct rr_work; struct work_struct rr_work;
@ -334,6 +336,7 @@ enum {
struct rpcrdma_buffer; struct rpcrdma_buffer;
struct rpcrdma_req { struct rpcrdma_req {
struct list_head rl_list; struct list_head rl_list;
struct rpc_rqst rl_slot;
struct rpcrdma_buffer *rl_buffer; struct rpcrdma_buffer *rl_buffer;
struct rpcrdma_rep *rl_reply; struct rpcrdma_rep *rl_reply;
struct xdr_stream rl_stream; struct xdr_stream rl_stream;
@ -356,16 +359,10 @@ enum {
RPCRDMA_REQ_F_TX_RESOURCES, RPCRDMA_REQ_F_TX_RESOURCES,
}; };
static inline void
rpcrdma_set_xprtdata(struct rpc_rqst *rqst, struct rpcrdma_req *req)
{
rqst->rq_xprtdata = req;
}
static inline struct rpcrdma_req * static inline struct rpcrdma_req *
rpcr_to_rdmar(const struct rpc_rqst *rqst) rpcr_to_rdmar(const struct rpc_rqst *rqst)
{ {
return rqst->rq_xprtdata; return container_of(rqst, struct rpcrdma_req, rl_slot);
} }
static inline void static inline void
@ -401,11 +398,12 @@ struct rpcrdma_buffer {
struct rpcrdma_sendctx **rb_sc_ctxs; struct rpcrdma_sendctx **rb_sc_ctxs;
spinlock_t rb_lock; /* protect buf lists */ spinlock_t rb_lock; /* protect buf lists */
int rb_send_count, rb_recv_count;
struct list_head rb_send_bufs; struct list_head rb_send_bufs;
struct list_head rb_recv_bufs; struct list_head rb_recv_bufs;
unsigned long rb_flags;
u32 rb_max_requests; u32 rb_max_requests;
u32 rb_credits; /* most recent credit grant */ u32 rb_credits; /* most recent credit grant */
int rb_posted_receives;
u32 rb_bc_srv_max_requests; u32 rb_bc_srv_max_requests;
spinlock_t rb_reqslock; /* protect rb_allreqs */ spinlock_t rb_reqslock; /* protect rb_allreqs */
@ -420,6 +418,11 @@ struct rpcrdma_buffer {
}; };
#define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia) #define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia)
/* rb_flags */
enum {
RPCRDMA_BUF_F_EMPTY_SCQ = 0,
};
/* /*
* Internal structure for transport instance creation. This * Internal structure for transport instance creation. This
* exists primarily for modularity. * exists primarily for modularity.
@ -561,18 +564,16 @@ void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *);
int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *, int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *,
struct rpcrdma_req *); struct rpcrdma_req *);
int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_rep *); void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp);
/* /*
* Buffer calls - xprtrdma/verbs.c * Buffer calls - xprtrdma/verbs.c
*/ */
struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *); struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *);
void rpcrdma_destroy_req(struct rpcrdma_req *); void rpcrdma_destroy_req(struct rpcrdma_req *);
int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt);
int rpcrdma_buffer_create(struct rpcrdma_xprt *); int rpcrdma_buffer_create(struct rpcrdma_xprt *);
void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf); struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf);
void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc);
struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt); struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt);
void rpcrdma_mr_put(struct rpcrdma_mr *mr); void rpcrdma_mr_put(struct rpcrdma_mr *mr);
@ -581,7 +582,6 @@ void rpcrdma_mr_defer_recovery(struct rpcrdma_mr *mr);
struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *); struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
void rpcrdma_buffer_put(struct rpcrdma_req *); void rpcrdma_buffer_put(struct rpcrdma_req *);
void rpcrdma_recv_buffer_get(struct rpcrdma_req *);
void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(size_t, enum dma_data_direction, struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(size_t, enum dma_data_direction,
@ -603,8 +603,6 @@ rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
return __rpcrdma_dma_map_regbuf(ia, rb); return __rpcrdma_dma_map_regbuf(ia, rb);
} }
int rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *, unsigned int);
int rpcrdma_alloc_wq(void); int rpcrdma_alloc_wq(void);
void rpcrdma_destroy_wq(void); void rpcrdma_destroy_wq(void);

View File

@ -2763,6 +2763,7 @@ static const struct rpc_xprt_ops xs_local_ops = {
.reserve_xprt = xprt_reserve_xprt, .reserve_xprt = xprt_reserve_xprt,
.release_xprt = xs_tcp_release_xprt, .release_xprt = xs_tcp_release_xprt,
.alloc_slot = xprt_alloc_slot, .alloc_slot = xprt_alloc_slot,
.free_slot = xprt_free_slot,
.rpcbind = xs_local_rpcbind, .rpcbind = xs_local_rpcbind,
.set_port = xs_local_set_port, .set_port = xs_local_set_port,
.connect = xs_local_connect, .connect = xs_local_connect,
@ -2782,6 +2783,7 @@ static const struct rpc_xprt_ops xs_udp_ops = {
.reserve_xprt = xprt_reserve_xprt_cong, .reserve_xprt = xprt_reserve_xprt_cong,
.release_xprt = xprt_release_xprt_cong, .release_xprt = xprt_release_xprt_cong,
.alloc_slot = xprt_alloc_slot, .alloc_slot = xprt_alloc_slot,
.free_slot = xprt_free_slot,
.rpcbind = rpcb_getport_async, .rpcbind = rpcb_getport_async,
.set_port = xs_set_port, .set_port = xs_set_port,
.connect = xs_connect, .connect = xs_connect,
@ -2803,6 +2805,7 @@ static const struct rpc_xprt_ops xs_tcp_ops = {
.reserve_xprt = xprt_reserve_xprt, .reserve_xprt = xprt_reserve_xprt,
.release_xprt = xs_tcp_release_xprt, .release_xprt = xs_tcp_release_xprt,
.alloc_slot = xprt_lock_and_alloc_slot, .alloc_slot = xprt_lock_and_alloc_slot,
.free_slot = xprt_free_slot,
.rpcbind = rpcb_getport_async, .rpcbind = rpcb_getport_async,
.set_port = xs_set_port, .set_port = xs_set_port,
.connect = xs_connect, .connect = xs_connect,
@ -2834,6 +2837,7 @@ static const struct rpc_xprt_ops bc_tcp_ops = {
.reserve_xprt = xprt_reserve_xprt, .reserve_xprt = xprt_reserve_xprt,
.release_xprt = xprt_release_xprt, .release_xprt = xprt_release_xprt,
.alloc_slot = xprt_alloc_slot, .alloc_slot = xprt_alloc_slot,
.free_slot = xprt_free_slot,
.buf_alloc = bc_malloc, .buf_alloc = bc_malloc,
.buf_free = bc_free, .buf_free = bc_free,
.send_request = bc_send_request, .send_request = bc_send_request,