NFS client updates for Linux 5.2

Stable bugfixes:
 - Fall back to MDS if no deviceid is found rather than aborting   # v4.11+
 - NFS4: Fix v4.0 client state corruption when mount
 
 Features:
 - Much improved handling of soft mounts with NFS v4.0
   - Reduce risk of false positive timeouts
   - Faster failover of reads and writes after a timeout
   - Added a "softerr" mount option to return ETIMEDOUT instead of
     EIO to the application after a timeout
 - Increase number of xprtrdma backchannel requests
 - Add additional xprtrdma tracepoints
 - Improved send completion batching for xprtrdma
 
 Other bugfixes and cleanups:
 - Return -EINVAL when NFS v4.2 is passed an invalid dedup mode
 - Reduce usage of GFP_ATOMIC pages in SUNRPC
 - Various minor NFS over RDMA cleanups and bugfixes
 - Use the correct container namespace for upcalls
 - Don't share superblocks between user namespaces
 - Various other container fixes
 - Make nfs_match_client() killable to prevent soft lockups
 - Don't mark all open state for recovery when handling recallable state revoked flag
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEnZ5MQTpR7cLU7KEp18tUv7ClQOsFAlzUjdcACgkQ18tUv7Cl
 QOsUiw/+OirzlZI7XeHfpZ/CwS7A+tSk3AAg9PDS1gjbfylER0g++GpA08tXnmDt
 JdUnBKYC5ujLyAqxN1j7QK+EvmXZQro8rucJxhEdPJMIQDC65fQQnmW7efl2bAEv
 CAWNDCf9Xe4g6X8LSR5jrnaMV4kuOQBYX4wqrrmaV8I+g/A/GKXW262KWnAv+w1M
 Y1ZlX+d1Gm8hODXhvqz4lldW6bkyrpWpU9BKUtYSYnSR0x1fam6PLPuCTm74fEDR
 N/Tgy5XvJi4xgti4SOZ/dI2O/Oqu6ut81PEPlhs8sTX04G8bLhr+hl3rSksCZFlu
 Afz9Hcnxg6XYB3Va7j7AO67H5SbyX4Zyj5cRMipXQE7Ebc1iXo5lu3vdhAEOAtNx
 fdNJlqD86MC/XWbtM+DfWlD+KjtpZ+lkxN+xuMgC/kVaPTeFI7nEWM796hJP/4no
 EYtnSLbSpJyH6F7wH9IL5V2EJYFxbzTvnPSTxV+QNZ0HgF17gTY0AGmQBzDE5bF0
 tfQteOG6MYXMHg64pTEzjlowlXOWdnE5TnuaFpt64/yP+hVznZMepBMSkxZO1xYt
 jc1wQlJkv/SyVH7cMGsj5lw3A6zwTrLManDUUmrLjIsVVmh4dk8WKlNtWQmvf1v6
 nFBklUa2GzH8LWKRT2ftNGcUeEiCuw/QF9oE5T/V7/7SQ/wmmvA=
 =skb2
 -----END PGP SIGNATURE-----

Merge tag 'nfs-for-5.2-1' of git://git.linux-nfs.org/projects/anna/linux-nfs

Pull NFS client updates from Anna Schumaker:
 "Highlights include:

  Stable bugfixes:
   - Fall back to MDS if no deviceid is found rather than aborting   # v4.11+
   - NFS4: Fix v4.0 client state corruption when mount

  Features:
   - Much improved handling of soft mounts with NFS v4.0:
       - Reduce risk of false positive timeouts
       - Faster failover of reads and writes after a timeout
       - Added a "softerr" mount option to return ETIMEDOUT instead of
         EIO to the application after a timeout
   - Increase number of xprtrdma backchannel requests
   - Add additional xprtrdma tracepoints
   - Improved send completion batching for xprtrdma

  Other bugfixes and cleanups:
   - Return -EINVAL when NFS v4.2 is passed an invalid dedup mode
   - Reduce usage of GFP_ATOMIC pages in SUNRPC
   - Various minor NFS over RDMA cleanups and bugfixes
   - Use the correct container namespace for upcalls
   - Don't share superblocks between user namespaces
   - Various other container fixes
   - Make nfs_match_client() killable to prevent soft lockups
   - Don't mark all open state for recovery when handling recallable
     state revoked flag"

* tag 'nfs-for-5.2-1' of git://git.linux-nfs.org/projects/anna/linux-nfs: (69 commits)
  SUNRPC: Rebalance a kref in auth_gss.c
  NFS: Fix a double unlock from nfs_match,get_client
  nfs: pass the correct prototype to read_cache_page
  NFSv4: don't mark all open state for recovery when handling recallable state revoked flag
  SUNRPC: Fix an error code in gss_alloc_msg()
  SUNRPC: task should be exit if encode return EKEYEXPIRED more times
  NFS4: Fix v4.0 client state corruption when mount
  PNFS fallback to MDS if no deviceid found
  NFS: make nfs_match_client killable
  lockd: Store the lockd client credential in struct nlm_host
  NFS: When mounting, don't share filesystems between different user namespaces
  NFS: Convert NFSv2 to use the container user namespace
  NFSv4: Convert the NFS client idmapper to use the container user namespace
  NFS: Convert NFSv3 to use the container user namespace
  SUNRPC: Use namespace of listening daemon in the client AUTH_GSS upcall
  SUNRPC: Use the client user namespace when encoding creds
  NFS: Store the credential of the mount process in the nfs_server
  SUNRPC: Cache cred of process creating the rpc_client
  xprtrdma: Remove stale comment
  xprtrdma: Update comments that reference ib_drain_qp
  ...
This commit is contained in:
Linus Torvalds 2019-05-09 14:33:15 -07:00
commit 06cbd26d31
59 changed files with 1369 additions and 949 deletions

View File

@ -63,7 +63,7 @@ struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init)
host = nlmclnt_lookup_host(nlm_init->address, nlm_init->addrlen,
nlm_init->protocol, nlm_version,
nlm_init->hostname, nlm_init->noresvport,
nlm_init->net);
nlm_init->net, nlm_init->cred);
if (host == NULL)
goto out_nohost;
if (host->h_rpcclnt == NULL && nlm_bind_host(host) == NULL)

View File

@ -715,7 +715,7 @@ static void nlmclnt_unlock_callback(struct rpc_task *task, void *data)
struct nlm_rqst *req = data;
u32 status = ntohl(req->a_res.status);
if (RPC_ASSASSINATED(task))
if (RPC_SIGNALLED(task))
goto die;
if (task->tk_status < 0) {
@ -783,7 +783,7 @@ static void nlmclnt_cancel_callback(struct rpc_task *task, void *data)
struct nlm_rqst *req = data;
u32 status = ntohl(req->a_res.status);
if (RPC_ASSASSINATED(task))
if (RPC_SIGNALLED(task))
goto die;
if (task->tk_status < 0) {

View File

@ -60,6 +60,7 @@ struct nlm_lookup_host_info {
const size_t hostname_len; /* it's length */
const int noresvport; /* use non-priv port */
struct net *net; /* network namespace to bind */
const struct cred *cred;
};
/*
@ -162,6 +163,7 @@ static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni,
host->h_nsmhandle = nsm;
host->h_addrbuf = nsm->sm_addrbuf;
host->net = ni->net;
host->h_cred = get_cred(ni->cred),
strlcpy(host->nodename, utsname()->nodename, sizeof(host->nodename));
out:
@ -188,6 +190,7 @@ static void nlm_destroy_host_locked(struct nlm_host *host)
clnt = host->h_rpcclnt;
if (clnt != NULL)
rpc_shutdown_client(clnt);
put_cred(host->h_cred);
kfree(host);
ln->nrhosts--;
@ -202,6 +205,8 @@ static void nlm_destroy_host_locked(struct nlm_host *host)
* @version: NLM protocol version
* @hostname: '\0'-terminated hostname of server
* @noresvport: 1 if non-privileged port should be used
* @net: pointer to net namespace
* @cred: pointer to cred
*
* Returns an nlm_host structure that matches the passed-in
* [server address, transport protocol, NLM version, server hostname].
@ -214,7 +219,8 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
const u32 version,
const char *hostname,
int noresvport,
struct net *net)
struct net *net,
const struct cred *cred)
{
struct nlm_lookup_host_info ni = {
.server = 0,
@ -226,6 +232,7 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
.hostname_len = strlen(hostname),
.noresvport = noresvport,
.net = net,
.cred = cred,
};
struct hlist_head *chain;
struct nlm_host *host;
@ -458,6 +465,7 @@ nlm_bind_host(struct nlm_host *host)
.authflavor = RPC_AUTH_UNIX,
.flags = (RPC_CLNT_CREATE_NOPING |
RPC_CLNT_CREATE_AUTOBIND),
.cred = host->h_cred,
};
/*

View File

@ -82,6 +82,7 @@ static struct rpc_clnt *nsm_create(struct net *net, const char *nodename)
.version = NSM_VERSION,
.authflavor = RPC_AUTH_NULL,
.flags = RPC_CLNT_CREATE_NOPING,
.cred = current_cred(),
};
return rpc_create(&args);

View File

@ -284,6 +284,7 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
struct nfs_client *clp;
const struct sockaddr *sap = data->addr;
struct nfs_net *nn = net_generic(data->net, nfs_net_id);
int error;
again:
list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) {
@ -296,9 +297,11 @@ again:
if (clp->cl_cons_state > NFS_CS_READY) {
refcount_inc(&clp->cl_count);
spin_unlock(&nn->nfs_client_lock);
nfs_wait_client_init_complete(clp);
error = nfs_wait_client_init_complete(clp);
nfs_put_client(clp);
spin_lock(&nn->nfs_client_lock);
if (error < 0)
return ERR_PTR(error);
goto again;
}
@ -407,6 +410,8 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init)
clp = nfs_match_client(cl_init);
if (clp) {
spin_unlock(&nn->nfs_client_lock);
if (IS_ERR(clp))
return clp;
if (new)
new->rpc_ops->free_client(new);
return nfs_found_client(cl_init, clp);
@ -500,6 +505,7 @@ int nfs_create_rpc_client(struct nfs_client *clp,
.program = &nfs_program,
.version = clp->rpc_ops->version,
.authflavor = flavor,
.cred = cl_init->cred,
};
if (test_bit(NFS_CS_DISCRTRY, &clp->cl_flags))
@ -598,6 +604,8 @@ int nfs_init_server_rpcclient(struct nfs_server *server,
sizeof(server->client->cl_timeout_default));
server->client->cl_timeout = &server->client->cl_timeout_default;
server->client->cl_softrtry = 0;
if (server->flags & NFS_MOUNT_SOFTERR)
server->client->cl_softerr = 1;
if (server->flags & NFS_MOUNT_SOFT)
server->client->cl_softrtry = 1;
@ -652,6 +660,7 @@ static int nfs_init_server(struct nfs_server *server,
.proto = data->nfs_server.protocol,
.net = data->net,
.timeparms = &timeparms,
.cred = server->cred,
};
struct nfs_client *clp;
int error;
@ -920,6 +929,7 @@ void nfs_free_server(struct nfs_server *server)
ida_destroy(&server->lockowner_id);
ida_destroy(&server->openowner_id);
nfs_free_iostats(server->io_stats);
put_cred(server->cred);
kfree(server);
nfs_release_automount_timer();
}
@ -940,6 +950,8 @@ struct nfs_server *nfs_create_server(struct nfs_mount_info *mount_info,
if (!server)
return ERR_PTR(-ENOMEM);
server->cred = get_cred(current_cred());
error = -ENOMEM;
fattr = nfs_alloc_fattr();
if (fattr == NULL)
@ -1006,6 +1018,8 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
if (!server)
return ERR_PTR(-ENOMEM);
server->cred = get_cred(source->cred);
error = -ENOMEM;
fattr_fsinfo = nfs_alloc_fattr();
if (fattr_fsinfo == NULL)

View File

@ -1033,6 +1033,18 @@ void nfs_mark_test_expired_all_delegations(struct nfs_client *clp)
rcu_read_unlock();
}
/**
* nfs_test_expired_all_delegations - test all delegations for a client
* @clp: nfs_client to process
*
* Helper for handling "recallable state revoked" status from server.
*/
void nfs_test_expired_all_delegations(struct nfs_client *clp)
{
nfs_mark_test_expired_all_delegations(clp);
nfs4_schedule_state_manager(clp);
}
/**
* nfs_reap_expired_delegations - reap expired delegations
* @clp: nfs_client to process

View File

@ -58,6 +58,7 @@ void nfs_delegation_mark_reclaim(struct nfs_client *clp);
void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
void nfs_mark_test_expired_all_delegations(struct nfs_client *clp);
void nfs_test_expired_all_delegations(struct nfs_client *clp);
void nfs_reap_expired_delegations(struct nfs_client *clp);
/* NFSv4 delegation-related procedures */

View File

@ -714,8 +714,9 @@ out:
* We only need to convert from xdr once so future lookups are much simpler
*/
static
int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page)
int nfs_readdir_filler(void *data, struct page* page)
{
nfs_readdir_descriptor_t *desc = data;
struct inode *inode = file_inode(desc->file);
int ret;
@ -762,8 +763,8 @@ void cache_page_release(nfs_readdir_descriptor_t *desc)
static
struct page *get_cache_page(nfs_readdir_descriptor_t *desc)
{
return read_cache_page(desc->file->f_mapping,
desc->page_index, (filler_t *)nfs_readdir_filler, desc);
return read_cache_page(desc->file->f_mapping, desc->page_index,
nfs_readdir_filler, desc);
}
/*

View File

@ -492,7 +492,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
struct nfs_page *req;
unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
/* XXX do we need to do the eof zeroing found in async_filler? */
req = nfs_create_request(dreq->ctx, pagevec[i], NULL,
req = nfs_create_request(dreq->ctx, pagevec[i],
pgbase, req_len);
if (IS_ERR(req)) {
result = PTR_ERR(req);
@ -663,6 +663,8 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
}
list_for_each_entry_safe(req, tmp, &reqs, wb_list) {
/* Bump the transmission count */
req->wb_nio++;
if (!nfs_pageio_add_request(&desc, req)) {
nfs_list_move_request(req, &failed);
spin_lock(&cinfo.inode->i_lock);
@ -703,6 +705,11 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
req = nfs_list_entry(data->pages.next);
nfs_list_remove_request(req);
if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) {
/*
* Despite the reboot, the write was successful,
* so reset wb_nio.
*/
req->wb_nio = 0;
/* Note the rewrite will go through mds */
nfs_mark_request_commit(req, NULL, &cinfo, 0);
} else
@ -899,7 +906,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
struct nfs_page *req;
unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
req = nfs_create_request(dreq->ctx, pagevec[i], NULL,
req = nfs_create_request(dreq->ctx, pagevec[i],
pgbase, req_len);
if (IS_ERR(req)) {
result = PTR_ERR(req);

View File

@ -147,7 +147,7 @@ nfs_file_flush(struct file *file, fl_owner_t id)
return 0;
/* Flush writes to the server and return any errors */
return vfs_fsync(file, 0);
return nfs_wb_all(inode);
}
ssize_t
@ -199,13 +199,6 @@ EXPORT_SYMBOL_GPL(nfs_file_mmap);
* Flush any dirty pages for this process, and check for write errors.
* The return status from this call provides a reliable indication of
* whether any write errors occurred for this process.
*
* Notice that it clears the NFS_CONTEXT_ERROR_WRITE before synching to
* disk, but it retrieves and clears ctx->error after synching, despite
* the two being set at the same time in nfs_context_set_write_error().
* This is because the former is used to notify the _next_ call to
* nfs_file_write() that a write error occurred, and hence cause it to
* fall back to doing a synchronous write.
*/
static int
nfs_file_fsync_commit(struct file *file, int datasync)
@ -220,11 +213,8 @@ nfs_file_fsync_commit(struct file *file, int datasync)
nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
do_resend = test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags);
status = nfs_commit_inode(inode, FLUSH_SYNC);
if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags)) {
ret = xchg(&ctx->error, 0);
if (ret)
goto out;
}
if (status == 0)
status = file_check_and_advance_wb_err(file);
if (status < 0) {
ret = status;
goto out;
@ -245,13 +235,7 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
trace_nfs_fsync_enter(inode);
do {
struct nfs_open_context *ctx = nfs_file_open_context(file);
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags)) {
int ret2 = xchg(&ctx->error, 0);
if (ret2)
ret = ret2;
}
ret = file_write_and_wait_range(file, start, end);
if (ret != 0)
break;
ret = nfs_file_fsync_commit(file, datasync);
@ -600,8 +584,7 @@ static int nfs_need_check_write(struct file *filp, struct inode *inode)
struct nfs_open_context *ctx;
ctx = nfs_file_open_context(filp);
if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags) ||
nfs_ctx_key_to_expire(ctx, inode))
if (nfs_ctx_key_to_expire(ctx, inode))
return 1;
return 0;
}
@ -655,7 +638,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
/* Return error values */
if (nfs_need_check_write(file, inode)) {
int err = vfs_fsync(file, 0);
int err = nfs_wb_all(inode);
if (err < 0)
result = err;
}
@ -709,7 +692,7 @@ do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
* Flush all pending writes before doing anything
* with locks..
*/
vfs_fsync(filp, 0);
nfs_wb_all(inode);
l_ctx = nfs_get_lock_context(nfs_file_open_context(filp));
if (!IS_ERR(l_ctx)) {

View File

@ -904,7 +904,7 @@ fl_pnfs_update_layout(struct inode *ino,
status = filelayout_check_deviceid(lo, fl, gfp_flags);
if (status) {
pnfs_put_lseg(lseg);
lseg = ERR_PTR(status);
lseg = NULL;
}
out:
return lseg;
@ -917,7 +917,7 @@ filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
pnfs_generic_pg_check_layout(pgio);
if (!pgio->pg_lseg) {
pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode,
req->wb_context,
nfs_req_openctx(req),
0,
NFS4_MAX_UINT64,
IOMODE_READ,
@ -944,7 +944,7 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
pnfs_generic_pg_check_layout(pgio);
if (!pgio->pg_lseg) {
pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode,
req->wb_context,
nfs_req_openctx(req),
0,
NFS4_MAX_UINT64,
IOMODE_RW,

View File

@ -28,6 +28,8 @@
#define FF_LAYOUT_POLL_RETRY_MAX (15*HZ)
#define FF_LAYOUTRETURN_MAXERR 20
static unsigned short io_maxretrans;
static void ff_layout_read_record_layoutstats_done(struct rpc_task *task,
struct nfs_pgio_header *hdr);
static int ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo,
@ -871,7 +873,7 @@ ff_layout_pg_get_read(struct nfs_pageio_descriptor *pgio,
{
pnfs_put_lseg(pgio->pg_lseg);
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
nfs_req_openctx(req),
0,
NFS4_MAX_UINT64,
IOMODE_READ,
@ -925,6 +927,7 @@ retry:
pgm = &pgio->pg_mirrors[0];
pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize;
pgio->pg_maxretrans = io_maxretrans;
return;
out_nolseg:
if (pgio->pg_error < 0)
@ -950,7 +953,7 @@ retry:
pnfs_generic_pg_check_layout(pgio);
if (!pgio->pg_lseg) {
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
nfs_req_openctx(req),
0,
NFS4_MAX_UINT64,
IOMODE_RW,
@ -992,6 +995,7 @@ retry:
pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].wsize;
}
pgio->pg_maxretrans = io_maxretrans;
return;
out_mds:
@ -1006,7 +1010,7 @@ ff_layout_pg_get_mirror_count_write(struct nfs_pageio_descriptor *pgio,
{
if (!pgio->pg_lseg) {
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
nfs_req_openctx(req),
0,
NFS4_MAX_UINT64,
IOMODE_RW,
@ -2515,3 +2519,7 @@ MODULE_DESCRIPTION("The NFSv4 flexfile layout driver");
module_init(nfs4flexfilelayout_init);
module_exit(nfs4flexfilelayout_exit);
module_param(io_maxretrans, ushort, 0644);
MODULE_PARM_DESC(io_maxretrans, "The number of times the NFSv4.1 client "
"retries an I/O request before returning an error. ");

View File

@ -885,10 +885,14 @@ struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx)
spin_lock(&inode->i_lock);
res = __nfs_find_lock_context(ctx);
if (res == NULL) {
list_add_tail_rcu(&new->list, &ctx->lock_context.list);
new->open_context = ctx;
res = new;
new = NULL;
new->open_context = get_nfs_open_context(ctx);
if (new->open_context) {
list_add_tail_rcu(&new->list,
&ctx->lock_context.list);
res = new;
new = NULL;
} else
res = ERR_PTR(-EBADF);
}
spin_unlock(&inode->i_lock);
kfree(new);
@ -906,6 +910,7 @@ void nfs_put_lock_context(struct nfs_lock_context *l_ctx)
return;
list_del_rcu(&l_ctx->list);
spin_unlock(&inode->i_lock);
put_nfs_open_context(ctx);
kfree_rcu(l_ctx, rcu_head);
}
EXPORT_SYMBOL_GPL(nfs_put_lock_context);

View File

@ -84,6 +84,7 @@ struct nfs_client_initdata {
u32 minorversion;
struct net *net;
const struct rpc_timeout *timeparms;
const struct cred *cred;
};
/*
@ -766,15 +767,10 @@ static inline bool nfs_error_is_fatal(int err)
case -ESTALE:
case -E2BIG:
case -ENOMEM:
case -ETIMEDOUT:
return true;
default:
return false;
}
}
static inline void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
{
ctx->error = error;
smp_wmb();
set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
}

View File

@ -163,6 +163,7 @@ int nfs_mount(struct nfs_mount_request *info)
.program = &mnt_program,
.version = info->version,
.authflavor = RPC_AUTH_UNIX,
.cred = current_cred(),
};
struct rpc_clnt *mnt_clnt;
int status;
@ -249,6 +250,7 @@ void nfs_umount(const struct nfs_mount_request *info)
.version = info->version,
.authflavor = RPC_AUTH_UNIX,
.flags = RPC_CLNT_CREATE_NOPING,
.cred = current_cred(),
};
struct rpc_message msg = {
.rpc_argp = info->dirpath,

View File

@ -76,6 +76,20 @@ static int nfs_stat_to_errno(enum nfs_stat);
* or decoded inline.
*/
static struct user_namespace *rpc_userns(const struct rpc_clnt *clnt)
{
if (clnt && clnt->cl_cred)
return clnt->cl_cred->user_ns;
return &init_user_ns;
}
static struct user_namespace *rpc_rqst_userns(const struct rpc_rqst *rqstp)
{
if (rqstp->rq_task)
return rpc_userns(rqstp->rq_task->tk_client);
return &init_user_ns;
}
/*
* typedef opaque nfsdata<>;
*/
@ -248,7 +262,8 @@ static __be32 *xdr_decode_time(__be32 *p, struct timespec *timep)
* };
*
*/
static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
struct user_namespace *userns)
{
u32 rdev, type;
__be32 *p;
@ -263,10 +278,10 @@ static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
fattr->mode = be32_to_cpup(p++);
fattr->nlink = be32_to_cpup(p++);
fattr->uid = make_kuid(&init_user_ns, be32_to_cpup(p++));
fattr->uid = make_kuid(userns, be32_to_cpup(p++));
if (!uid_valid(fattr->uid))
goto out_uid;
fattr->gid = make_kgid(&init_user_ns, be32_to_cpup(p++));
fattr->gid = make_kgid(userns, be32_to_cpup(p++));
if (!gid_valid(fattr->gid))
goto out_gid;
@ -321,7 +336,8 @@ static __be32 *xdr_time_not_set(__be32 *p)
return p;
}
static void encode_sattr(struct xdr_stream *xdr, const struct iattr *attr)
static void encode_sattr(struct xdr_stream *xdr, const struct iattr *attr,
struct user_namespace *userns)
{
struct timespec ts;
__be32 *p;
@ -333,11 +349,11 @@ static void encode_sattr(struct xdr_stream *xdr, const struct iattr *attr)
else
*p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
if (attr->ia_valid & ATTR_UID)
*p++ = cpu_to_be32(from_kuid(&init_user_ns, attr->ia_uid));
*p++ = cpu_to_be32(from_kuid_munged(userns, attr->ia_uid));
else
*p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
if (attr->ia_valid & ATTR_GID)
*p++ = cpu_to_be32(from_kgid(&init_user_ns, attr->ia_gid));
*p++ = cpu_to_be32(from_kgid_munged(userns, attr->ia_gid));
else
*p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
if (attr->ia_valid & ATTR_SIZE)
@ -451,7 +467,8 @@ out_cheating:
* };
*/
static int decode_attrstat(struct xdr_stream *xdr, struct nfs_fattr *result,
__u32 *op_status)
__u32 *op_status,
struct user_namespace *userns)
{
enum nfs_stat status;
int error;
@ -463,7 +480,7 @@ static int decode_attrstat(struct xdr_stream *xdr, struct nfs_fattr *result,
*op_status = status;
if (status != NFS_OK)
goto out_default;
error = decode_fattr(xdr, result);
error = decode_fattr(xdr, result, userns);
out:
return error;
out_default:
@ -498,19 +515,21 @@ static void encode_diropargs(struct xdr_stream *xdr, const struct nfs_fh *fh,
* void;
* };
*/
static int decode_diropok(struct xdr_stream *xdr, struct nfs_diropok *result)
static int decode_diropok(struct xdr_stream *xdr, struct nfs_diropok *result,
struct user_namespace *userns)
{
int error;
error = decode_fhandle(xdr, result->fh);
if (unlikely(error))
goto out;
error = decode_fattr(xdr, result->fattr);
error = decode_fattr(xdr, result->fattr, userns);
out:
return error;
}
static int decode_diropres(struct xdr_stream *xdr, struct nfs_diropok *result)
static int decode_diropres(struct xdr_stream *xdr, struct nfs_diropok *result,
struct user_namespace *userns)
{
enum nfs_stat status;
int error;
@ -520,7 +539,7 @@ static int decode_diropres(struct xdr_stream *xdr, struct nfs_diropok *result)
goto out;
if (status != NFS_OK)
goto out_default;
error = decode_diropok(xdr, result);
error = decode_diropok(xdr, result, userns);
out:
return error;
out_default:
@ -559,7 +578,7 @@ static void nfs2_xdr_enc_sattrargs(struct rpc_rqst *req,
const struct nfs_sattrargs *args = data;
encode_fhandle(xdr, args->fh);
encode_sattr(xdr, args->sattr);
encode_sattr(xdr, args->sattr, rpc_rqst_userns(req));
}
static void nfs2_xdr_enc_diropargs(struct rpc_rqst *req,
@ -674,7 +693,7 @@ static void nfs2_xdr_enc_createargs(struct rpc_rqst *req,
const struct nfs_createargs *args = data;
encode_diropargs(xdr, args->fh, args->name, args->len);
encode_sattr(xdr, args->sattr);
encode_sattr(xdr, args->sattr, rpc_rqst_userns(req));
}
static void nfs2_xdr_enc_removeargs(struct rpc_rqst *req,
@ -741,7 +760,7 @@ static void nfs2_xdr_enc_symlinkargs(struct rpc_rqst *req,
encode_diropargs(xdr, args->fromfh, args->fromname, args->fromlen);
encode_path(xdr, args->pages, args->pathlen);
encode_sattr(xdr, args->sattr);
encode_sattr(xdr, args->sattr, rpc_rqst_userns(req));
}
/*
@ -803,13 +822,13 @@ out_default:
static int nfs2_xdr_dec_attrstat(struct rpc_rqst *req, struct xdr_stream *xdr,
void *result)
{
return decode_attrstat(xdr, result, NULL);
return decode_attrstat(xdr, result, NULL, rpc_rqst_userns(req));
}
static int nfs2_xdr_dec_diropres(struct rpc_rqst *req, struct xdr_stream *xdr,
void *result)
{
return decode_diropres(xdr, result);
return decode_diropres(xdr, result, rpc_rqst_userns(req));
}
/*
@ -864,7 +883,7 @@ static int nfs2_xdr_dec_readres(struct rpc_rqst *req, struct xdr_stream *xdr,
result->op_status = status;
if (status != NFS_OK)
goto out_default;
error = decode_fattr(xdr, result->fattr);
error = decode_fattr(xdr, result->fattr, rpc_rqst_userns(req));
if (unlikely(error))
goto out;
error = decode_nfsdata(xdr, result);
@ -881,7 +900,8 @@ static int nfs2_xdr_dec_writeres(struct rpc_rqst *req, struct xdr_stream *xdr,
/* All NFSv2 writes are "file sync" writes */
result->verf->committed = NFS_FILE_SYNC;
return decode_attrstat(xdr, result->fattr, &result->op_status);
return decode_attrstat(xdr, result->fattr, &result->op_status,
rpc_rqst_userns(req));
}
/**

View File

@ -91,6 +91,7 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv,
.proto = ds_proto,
.net = mds_clp->cl_net,
.timeparms = &ds_timeout,
.cred = mds_srv->cred,
};
struct nfs_client *clp;
char buf[INET6_ADDRSTRLEN + 1];

View File

@ -104,6 +104,20 @@ static const umode_t nfs_type2fmt[] = {
[NF3FIFO] = S_IFIFO,
};
static struct user_namespace *rpc_userns(const struct rpc_clnt *clnt)
{
if (clnt && clnt->cl_cred)
return clnt->cl_cred->user_ns;
return &init_user_ns;
}
static struct user_namespace *rpc_rqst_userns(const struct rpc_rqst *rqstp)
{
if (rqstp->rq_task)
return rpc_userns(rqstp->rq_task->tk_client);
return &init_user_ns;
}
/*
* Encode/decode NFSv3 basic data types
*
@ -516,7 +530,8 @@ static __be32 *xdr_decode_nfstime3(__be32 *p, struct timespec *timep)
* set_mtime mtime;
* };
*/
static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr)
static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr,
struct user_namespace *userns)
{
struct timespec ts;
u32 nbytes;
@ -551,13 +566,13 @@ static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr)
if (attr->ia_valid & ATTR_UID) {
*p++ = xdr_one;
*p++ = cpu_to_be32(from_kuid(&init_user_ns, attr->ia_uid));
*p++ = cpu_to_be32(from_kuid_munged(userns, attr->ia_uid));
} else
*p++ = xdr_zero;
if (attr->ia_valid & ATTR_GID) {
*p++ = xdr_one;
*p++ = cpu_to_be32(from_kgid(&init_user_ns, attr->ia_gid));
*p++ = cpu_to_be32(from_kgid_munged(userns, attr->ia_gid));
} else
*p++ = xdr_zero;
@ -606,7 +621,8 @@ static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr)
* nfstime3 ctime;
* };
*/
static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr)
static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr,
struct user_namespace *userns)
{
umode_t fmode;
__be32 *p;
@ -619,10 +635,10 @@ static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr)
fattr->mode = (be32_to_cpup(p++) & ~S_IFMT) | fmode;
fattr->nlink = be32_to_cpup(p++);
fattr->uid = make_kuid(&init_user_ns, be32_to_cpup(p++));
fattr->uid = make_kuid(userns, be32_to_cpup(p++));
if (!uid_valid(fattr->uid))
goto out_uid;
fattr->gid = make_kgid(&init_user_ns, be32_to_cpup(p++));
fattr->gid = make_kgid(userns, be32_to_cpup(p++));
if (!gid_valid(fattr->gid))
goto out_gid;
@ -659,7 +675,8 @@ out_gid:
* void;
* };
*/
static int decode_post_op_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
static int decode_post_op_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
struct user_namespace *userns)
{
__be32 *p;
@ -667,7 +684,7 @@ static int decode_post_op_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
if (unlikely(!p))
return -EIO;
if (*p != xdr_zero)
return decode_fattr3(xdr, fattr);
return decode_fattr3(xdr, fattr, userns);
return 0;
}
@ -728,14 +745,15 @@ static int decode_pre_op_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
return 0;
}
static int decode_wcc_data(struct xdr_stream *xdr, struct nfs_fattr *fattr)
static int decode_wcc_data(struct xdr_stream *xdr, struct nfs_fattr *fattr,
struct user_namespace *userns)
{
int error;
error = decode_pre_op_attr(xdr, fattr);
if (unlikely(error))
goto out;
error = decode_post_op_attr(xdr, fattr);
error = decode_post_op_attr(xdr, fattr, userns);
out:
return error;
}
@ -837,7 +855,7 @@ static void nfs3_xdr_enc_setattr3args(struct rpc_rqst *req,
{
const struct nfs3_sattrargs *args = data;
encode_nfs_fh3(xdr, args->fh);
encode_sattr3(xdr, args->sattr);
encode_sattr3(xdr, args->sattr, rpc_rqst_userns(req));
encode_sattrguard3(xdr, args);
}
@ -998,13 +1016,14 @@ static void nfs3_xdr_enc_write3args(struct rpc_rqst *req,
* };
*/
static void encode_createhow3(struct xdr_stream *xdr,
const struct nfs3_createargs *args)
const struct nfs3_createargs *args,
struct user_namespace *userns)
{
encode_uint32(xdr, args->createmode);
switch (args->createmode) {
case NFS3_CREATE_UNCHECKED:
case NFS3_CREATE_GUARDED:
encode_sattr3(xdr, args->sattr);
encode_sattr3(xdr, args->sattr, userns);
break;
case NFS3_CREATE_EXCLUSIVE:
encode_createverf3(xdr, args->verifier);
@ -1021,7 +1040,7 @@ static void nfs3_xdr_enc_create3args(struct rpc_rqst *req,
const struct nfs3_createargs *args = data;
encode_diropargs3(xdr, args->fh, args->name, args->len);
encode_createhow3(xdr, args);
encode_createhow3(xdr, args, rpc_rqst_userns(req));
}
/*
@ -1039,7 +1058,7 @@ static void nfs3_xdr_enc_mkdir3args(struct rpc_rqst *req,
const struct nfs3_mkdirargs *args = data;
encode_diropargs3(xdr, args->fh, args->name, args->len);
encode_sattr3(xdr, args->sattr);
encode_sattr3(xdr, args->sattr, rpc_rqst_userns(req));
}
/*
@ -1056,11 +1075,12 @@ static void nfs3_xdr_enc_mkdir3args(struct rpc_rqst *req,
* };
*/
static void encode_symlinkdata3(struct xdr_stream *xdr,
const void *data)
const void *data,
struct user_namespace *userns)
{
const struct nfs3_symlinkargs *args = data;
encode_sattr3(xdr, args->sattr);
encode_sattr3(xdr, args->sattr, userns);
encode_nfspath3(xdr, args->pages, args->pathlen);
}
@ -1071,7 +1091,7 @@ static void nfs3_xdr_enc_symlink3args(struct rpc_rqst *req,
const struct nfs3_symlinkargs *args = data;
encode_diropargs3(xdr, args->fromfh, args->fromname, args->fromlen);
encode_symlinkdata3(xdr, args);
encode_symlinkdata3(xdr, args, rpc_rqst_userns(req));
xdr->buf->flags |= XDRBUF_WRITE;
}
@ -1100,24 +1120,26 @@ static void nfs3_xdr_enc_symlink3args(struct rpc_rqst *req,
* };
*/
static void encode_devicedata3(struct xdr_stream *xdr,
const struct nfs3_mknodargs *args)
const struct nfs3_mknodargs *args,
struct user_namespace *userns)
{
encode_sattr3(xdr, args->sattr);
encode_sattr3(xdr, args->sattr, userns);
encode_specdata3(xdr, args->rdev);
}
static void encode_mknoddata3(struct xdr_stream *xdr,
const struct nfs3_mknodargs *args)
const struct nfs3_mknodargs *args,
struct user_namespace *userns)
{
encode_ftype3(xdr, args->type);
switch (args->type) {
case NF3CHR:
case NF3BLK:
encode_devicedata3(xdr, args);
encode_devicedata3(xdr, args, userns);
break;
case NF3SOCK:
case NF3FIFO:
encode_sattr3(xdr, args->sattr);
encode_sattr3(xdr, args->sattr, userns);
break;
case NF3REG:
case NF3DIR:
@ -1134,7 +1156,7 @@ static void nfs3_xdr_enc_mknod3args(struct rpc_rqst *req,
const struct nfs3_mknodargs *args = data;
encode_diropargs3(xdr, args->fh, args->name, args->len);
encode_mknoddata3(xdr, args);
encode_mknoddata3(xdr, args, rpc_rqst_userns(req));
}
/*
@ -1379,7 +1401,7 @@ static int nfs3_xdr_dec_getattr3res(struct rpc_rqst *req,
goto out;
if (status != NFS3_OK)
goto out_default;
error = decode_fattr3(xdr, result);
error = decode_fattr3(xdr, result, rpc_rqst_userns(req));
out:
return error;
out_default:
@ -1414,7 +1436,7 @@ static int nfs3_xdr_dec_setattr3res(struct rpc_rqst *req,
error = decode_nfsstat3(xdr, &status);
if (unlikely(error))
goto out;
error = decode_wcc_data(xdr, result);
error = decode_wcc_data(xdr, result, rpc_rqst_userns(req));
if (unlikely(error))
goto out;
if (status != NFS3_OK)
@ -1449,6 +1471,7 @@ static int nfs3_xdr_dec_lookup3res(struct rpc_rqst *req,
struct xdr_stream *xdr,
void *data)
{
struct user_namespace *userns = rpc_rqst_userns(req);
struct nfs3_diropres *result = data;
enum nfs_stat status;
int error;
@ -1461,14 +1484,14 @@ static int nfs3_xdr_dec_lookup3res(struct rpc_rqst *req,
error = decode_nfs_fh3(xdr, result->fh);
if (unlikely(error))
goto out;
error = decode_post_op_attr(xdr, result->fattr);
error = decode_post_op_attr(xdr, result->fattr, userns);
if (unlikely(error))
goto out;
error = decode_post_op_attr(xdr, result->dir_attr);
error = decode_post_op_attr(xdr, result->dir_attr, userns);
out:
return error;
out_default:
error = decode_post_op_attr(xdr, result->dir_attr);
error = decode_post_op_attr(xdr, result->dir_attr, userns);
if (unlikely(error))
goto out;
return nfs3_stat_to_errno(status);
@ -1504,7 +1527,7 @@ static int nfs3_xdr_dec_access3res(struct rpc_rqst *req,
error = decode_nfsstat3(xdr, &status);
if (unlikely(error))
goto out;
error = decode_post_op_attr(xdr, result->fattr);
error = decode_post_op_attr(xdr, result->fattr, rpc_rqst_userns(req));
if (unlikely(error))
goto out;
if (status != NFS3_OK)
@ -1545,7 +1568,7 @@ static int nfs3_xdr_dec_readlink3res(struct rpc_rqst *req,
error = decode_nfsstat3(xdr, &status);
if (unlikely(error))
goto out;
error = decode_post_op_attr(xdr, result);
error = decode_post_op_attr(xdr, result, rpc_rqst_userns(req));
if (unlikely(error))
goto out;
if (status != NFS3_OK)
@ -1623,7 +1646,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
error = decode_nfsstat3(xdr, &status);
if (unlikely(error))
goto out;
error = decode_post_op_attr(xdr, result->fattr);
error = decode_post_op_attr(xdr, result->fattr, rpc_rqst_userns(req));
if (unlikely(error))
goto out;
result->op_status = status;
@ -1694,7 +1717,7 @@ static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr,
error = decode_nfsstat3(xdr, &status);
if (unlikely(error))
goto out;
error = decode_wcc_data(xdr, result->fattr);
error = decode_wcc_data(xdr, result->fattr, rpc_rqst_userns(req));
if (unlikely(error))
goto out;
result->op_status = status;
@ -1728,14 +1751,15 @@ out_status:
* };
*/
static int decode_create3resok(struct xdr_stream *xdr,
struct nfs3_diropres *result)
struct nfs3_diropres *result,
struct user_namespace *userns)
{
int error;
error = decode_post_op_fh3(xdr, result->fh);
if (unlikely(error))
goto out;
error = decode_post_op_attr(xdr, result->fattr);
error = decode_post_op_attr(xdr, result->fattr, userns);
if (unlikely(error))
goto out;
/* The server isn't required to return a file handle.
@ -1744,7 +1768,7 @@ static int decode_create3resok(struct xdr_stream *xdr,
* values for the new object. */
if (result->fh->size == 0)
result->fattr->valid = 0;
error = decode_wcc_data(xdr, result->dir_attr);
error = decode_wcc_data(xdr, result->dir_attr, userns);
out:
return error;
}
@ -1753,6 +1777,7 @@ static int nfs3_xdr_dec_create3res(struct rpc_rqst *req,
struct xdr_stream *xdr,
void *data)
{
struct user_namespace *userns = rpc_rqst_userns(req);
struct nfs3_diropres *result = data;
enum nfs_stat status;
int error;
@ -1762,11 +1787,11 @@ static int nfs3_xdr_dec_create3res(struct rpc_rqst *req,
goto out;
if (status != NFS3_OK)
goto out_default;
error = decode_create3resok(xdr, result);
error = decode_create3resok(xdr, result, userns);
out:
return error;
out_default:
error = decode_wcc_data(xdr, result->dir_attr);
error = decode_wcc_data(xdr, result->dir_attr, userns);
if (unlikely(error))
goto out;
return nfs3_stat_to_errno(status);
@ -1801,7 +1826,7 @@ static int nfs3_xdr_dec_remove3res(struct rpc_rqst *req,
error = decode_nfsstat3(xdr, &status);
if (unlikely(error))
goto out;
error = decode_wcc_data(xdr, result->dir_attr);
error = decode_wcc_data(xdr, result->dir_attr, rpc_rqst_userns(req));
if (unlikely(error))
goto out;
if (status != NFS3_OK)
@ -1836,6 +1861,7 @@ static int nfs3_xdr_dec_rename3res(struct rpc_rqst *req,
struct xdr_stream *xdr,
void *data)
{
struct user_namespace *userns = rpc_rqst_userns(req);
struct nfs_renameres *result = data;
enum nfs_stat status;
int error;
@ -1843,10 +1869,10 @@ static int nfs3_xdr_dec_rename3res(struct rpc_rqst *req,
error = decode_nfsstat3(xdr, &status);
if (unlikely(error))
goto out;
error = decode_wcc_data(xdr, result->old_fattr);
error = decode_wcc_data(xdr, result->old_fattr, userns);
if (unlikely(error))
goto out;
error = decode_wcc_data(xdr, result->new_fattr);
error = decode_wcc_data(xdr, result->new_fattr, userns);
if (unlikely(error))
goto out;
if (status != NFS3_OK)
@ -1880,6 +1906,7 @@ out_status:
static int nfs3_xdr_dec_link3res(struct rpc_rqst *req, struct xdr_stream *xdr,
void *data)
{
struct user_namespace *userns = rpc_rqst_userns(req);
struct nfs3_linkres *result = data;
enum nfs_stat status;
int error;
@ -1887,10 +1914,10 @@ static int nfs3_xdr_dec_link3res(struct rpc_rqst *req, struct xdr_stream *xdr,
error = decode_nfsstat3(xdr, &status);
if (unlikely(error))
goto out;
error = decode_post_op_attr(xdr, result->fattr);
error = decode_post_op_attr(xdr, result->fattr, userns);
if (unlikely(error))
goto out;
error = decode_wcc_data(xdr, result->dir_attr);
error = decode_wcc_data(xdr, result->dir_attr, userns);
if (unlikely(error))
goto out;
if (status != NFS3_OK)
@ -1939,6 +1966,7 @@ out_status:
int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
bool plus)
{
struct user_namespace *userns = rpc_userns(entry->server->client);
struct nfs_entry old = *entry;
__be32 *p;
int error;
@ -1973,7 +2001,7 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
if (plus) {
entry->fattr->valid = 0;
error = decode_post_op_attr(xdr, entry->fattr);
error = decode_post_op_attr(xdr, entry->fattr, userns);
if (unlikely(error))
return error;
if (entry->fattr->valid & NFS_ATTR_FATTR_V3)
@ -2045,11 +2073,12 @@ static int decode_dirlist3(struct xdr_stream *xdr)
}
static int decode_readdir3resok(struct xdr_stream *xdr,
struct nfs3_readdirres *result)
struct nfs3_readdirres *result,
struct user_namespace *userns)
{
int error;
error = decode_post_op_attr(xdr, result->dir_attr);
error = decode_post_op_attr(xdr, result->dir_attr, userns);
if (unlikely(error))
goto out;
/* XXX: do we need to check if result->verf != NULL ? */
@ -2074,11 +2103,11 @@ static int nfs3_xdr_dec_readdir3res(struct rpc_rqst *req,
goto out;
if (status != NFS3_OK)
goto out_default;
error = decode_readdir3resok(xdr, result);
error = decode_readdir3resok(xdr, result, rpc_rqst_userns(req));
out:
return error;
out_default:
error = decode_post_op_attr(xdr, result->dir_attr);
error = decode_post_op_attr(xdr, result->dir_attr, rpc_rqst_userns(req));
if (unlikely(error))
goto out;
return nfs3_stat_to_errno(status);
@ -2138,7 +2167,7 @@ static int nfs3_xdr_dec_fsstat3res(struct rpc_rqst *req,
error = decode_nfsstat3(xdr, &status);
if (unlikely(error))
goto out;
error = decode_post_op_attr(xdr, result->fattr);
error = decode_post_op_attr(xdr, result->fattr, rpc_rqst_userns(req));
if (unlikely(error))
goto out;
if (status != NFS3_OK)
@ -2212,7 +2241,7 @@ static int nfs3_xdr_dec_fsinfo3res(struct rpc_rqst *req,
error = decode_nfsstat3(xdr, &status);
if (unlikely(error))
goto out;
error = decode_post_op_attr(xdr, result->fattr);
error = decode_post_op_attr(xdr, result->fattr, rpc_rqst_userns(req));
if (unlikely(error))
goto out;
if (status != NFS3_OK)
@ -2273,7 +2302,7 @@ static int nfs3_xdr_dec_pathconf3res(struct rpc_rqst *req,
error = decode_nfsstat3(xdr, &status);
if (unlikely(error))
goto out;
error = decode_post_op_attr(xdr, result->fattr);
error = decode_post_op_attr(xdr, result->fattr, rpc_rqst_userns(req));
if (unlikely(error))
goto out;
if (status != NFS3_OK)
@ -2315,7 +2344,7 @@ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req,
error = decode_nfsstat3(xdr, &status);
if (unlikely(error))
goto out;
error = decode_wcc_data(xdr, result->fattr);
error = decode_wcc_data(xdr, result->fattr, rpc_rqst_userns(req));
if (unlikely(error))
goto out;
result->op_status = status;
@ -2331,14 +2360,15 @@ out_status:
#ifdef CONFIG_NFS_V3_ACL
static inline int decode_getacl3resok(struct xdr_stream *xdr,
struct nfs3_getaclres *result)
struct nfs3_getaclres *result,
struct user_namespace *userns)
{
struct posix_acl **acl;
unsigned int *aclcnt;
size_t hdrlen;
int error;
error = decode_post_op_attr(xdr, result->fattr);
error = decode_post_op_attr(xdr, result->fattr, userns);
if (unlikely(error))
goto out;
error = decode_uint32(xdr, &result->mask);
@ -2386,7 +2416,7 @@ static int nfs3_xdr_dec_getacl3res(struct rpc_rqst *req,
goto out;
if (status != NFS3_OK)
goto out_default;
error = decode_getacl3resok(xdr, result);
error = decode_getacl3resok(xdr, result, rpc_rqst_userns(req));
out:
return error;
out_default:
@ -2405,7 +2435,7 @@ static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req,
goto out;
if (status != NFS3_OK)
goto out_default;
error = decode_post_op_attr(xdr, result);
error = decode_post_op_attr(xdr, result, rpc_rqst_userns(req));
out:
return error;
out_default:

View File

@ -206,6 +206,7 @@ struct nfs4_exception {
unsigned char delay : 1,
recovering : 1,
retry : 1;
bool interruptible;
};
struct nfs4_state_recovery_ops {

View File

@ -870,6 +870,7 @@ static int nfs4_set_client(struct nfs_server *server,
.minorversion = minorversion,
.net = net,
.timeparms = timeparms,
.cred = server->cred,
};
struct nfs_client *clp;
@ -931,6 +932,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
.minorversion = minor_version,
.net = mds_clp->cl_net,
.timeparms = &ds_timeout,
.cred = mds_srv->cred,
};
char buf[INET6_ADDRSTRLEN + 1];
@ -1107,6 +1109,8 @@ struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info,
if (!server)
return ERR_PTR(-ENOMEM);
server->cred = get_cred(current_cred());
auth_probe = mount_info->parsed->auth_info.flavor_len < 1;
/* set up the general RPC client */
@ -1143,6 +1147,8 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
parent_server = NFS_SB(data->sb);
parent_client = parent_server->nfs_client;
server->cred = get_cred(parent_server->cred);
/* Initialise the client representation from the parent server */
nfs_server_copy_userdata(server, parent_server);

View File

@ -125,7 +125,7 @@ nfs4_file_flush(struct file *file, fl_owner_t id)
return filemap_fdatawrite(file->f_mapping);
/* Flush writes to the server and return any errors */
return vfs_fsync(file, 0);
return nfs_wb_all(inode);
}
#ifdef CONFIG_NFS_V4_2
@ -187,7 +187,7 @@ static loff_t nfs42_remap_file_range(struct file *src_file, loff_t src_off,
bool same_inode = false;
int ret;
if (remap_flags & ~REMAP_FILE_ADVISORY)
if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
return -EINVAL;
/* check alignment w.r.t. clone_blksize */

View File

@ -69,8 +69,16 @@ struct idmap {
struct rpc_pipe *idmap_pipe;
struct idmap_legacy_upcalldata *idmap_upcall_data;
struct mutex idmap_mutex;
const struct cred *cred;
};
static struct user_namespace *idmap_userns(const struct idmap *idmap)
{
if (idmap && idmap->cred)
return idmap->cred->user_ns;
return &init_user_ns;
}
/**
* nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields
* @fattr: fully initialised struct nfs_fattr
@ -271,14 +279,15 @@ static struct key *nfs_idmap_request_key(const char *name, size_t namelen,
const char *type, struct idmap *idmap)
{
char *desc;
struct key *rkey;
struct key *rkey = ERR_PTR(-EAGAIN);
ssize_t ret;
ret = nfs_idmap_get_desc(name, namelen, type, strlen(type), &desc);
if (ret < 0)
return ERR_PTR(ret);
rkey = request_key(&key_type_id_resolver, desc, "");
if (!idmap->cred || idmap->cred->user_ns == &init_user_ns)
rkey = request_key(&key_type_id_resolver, desc, "");
if (IS_ERR(rkey)) {
mutex_lock(&idmap->idmap_mutex);
rkey = request_key_with_auxdata(&key_type_id_resolver_legacy,
@ -452,6 +461,9 @@ nfs_idmap_new(struct nfs_client *clp)
if (idmap == NULL)
return -ENOMEM;
mutex_init(&idmap->idmap_mutex);
idmap->cred = get_cred(clp->cl_rpcclient->cl_cred);
rpc_init_pipe_dir_object(&idmap->idmap_pdo,
&nfs_idmap_pipe_dir_object_ops,
idmap);
@ -462,7 +474,6 @@ nfs_idmap_new(struct nfs_client *clp)
goto err;
}
idmap->idmap_pipe = pipe;
mutex_init(&idmap->idmap_mutex);
error = rpc_add_pipe_dir_object(clp->cl_net,
&clp->cl_rpcclient->cl_pipedir_objects,
@ -475,6 +486,7 @@ nfs_idmap_new(struct nfs_client *clp)
err_destroy_pipe:
rpc_destroy_pipe_data(idmap->idmap_pipe);
err:
put_cred(idmap->cred);
kfree(idmap);
return error;
}
@ -491,6 +503,7 @@ nfs_idmap_delete(struct nfs_client *clp)
&clp->cl_rpcclient->cl_pipedir_objects,
&idmap->idmap_pdo);
rpc_destroy_pipe_data(idmap->idmap_pipe);
put_cred(idmap->cred);
kfree(idmap);
}
@ -735,7 +748,7 @@ int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_
if (!nfs_map_string_to_numeric(name, namelen, &id))
ret = nfs_idmap_lookup_id(name, namelen, "uid", &id, idmap);
if (ret == 0) {
*uid = make_kuid(&init_user_ns, id);
*uid = make_kuid(idmap_userns(idmap), id);
if (!uid_valid(*uid))
ret = -ERANGE;
}
@ -752,7 +765,7 @@ int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size
if (!nfs_map_string_to_numeric(name, namelen, &id))
ret = nfs_idmap_lookup_id(name, namelen, "gid", &id, idmap);
if (ret == 0) {
*gid = make_kgid(&init_user_ns, id);
*gid = make_kgid(idmap_userns(idmap), id);
if (!gid_valid(*gid))
ret = -ERANGE;
}
@ -766,7 +779,7 @@ int nfs_map_uid_to_name(const struct nfs_server *server, kuid_t uid, char *buf,
int ret = -EINVAL;
__u32 id;
id = from_kuid(&init_user_ns, uid);
id = from_kuid_munged(idmap_userns(idmap), uid);
if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
ret = nfs_idmap_lookup_name(id, "user", buf, buflen, idmap);
if (ret < 0)
@ -780,7 +793,7 @@ int nfs_map_gid_to_group(const struct nfs_server *server, kgid_t gid, char *buf,
int ret = -EINVAL;
__u32 id;
id = from_kgid(&init_user_ns, gid);
id = from_kgid_munged(idmap_userns(idmap), gid);
if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
ret = nfs_idmap_lookup_name(id, "group", buf, buflen, idmap);
if (ret < 0)

View File

@ -400,17 +400,32 @@ static long nfs4_update_delay(long *timeout)
return ret;
}
static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
static int nfs4_delay_killable(long *timeout)
{
int res = 0;
might_sleep();
freezable_schedule_timeout_killable_unsafe(
nfs4_update_delay(timeout));
if (fatal_signal_pending(current))
res = -ERESTARTSYS;
return res;
if (!__fatal_signal_pending(current))
return 0;
return -EINTR;
}
static int nfs4_delay_interruptible(long *timeout)
{
might_sleep();
freezable_schedule_timeout_interruptible(nfs4_update_delay(timeout));
if (!signal_pending(current))
return 0;
return __fatal_signal_pending(current) ? -EINTR :-ERESTARTSYS;
}
static int nfs4_delay(long *timeout, bool interruptible)
{
if (interruptible)
return nfs4_delay_interruptible(timeout);
return nfs4_delay_killable(timeout);
}
/* This is the error handling routine for processes that are allowed
@ -546,7 +561,8 @@ int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_
ret = nfs4_do_handle_exception(server, errorcode, exception);
if (exception->delay) {
ret = nfs4_delay(server->client, &exception->timeout);
ret = nfs4_delay(&exception->timeout,
exception->interruptible);
goto out_retry;
}
if (exception->recovering) {
@ -978,10 +994,8 @@ int nfs4_setup_sequence(struct nfs_client *client,
if (res->sr_slot != NULL)
goto out_start;
if (session) {
if (session)
tbl = &session->fc_slot_table;
task->tk_timeout = 0;
}
spin_lock(&tbl->slot_tbl_lock);
/* The state manager will wait until the slot table is empty */
@ -990,9 +1004,8 @@ int nfs4_setup_sequence(struct nfs_client *client,
slot = nfs4_alloc_slot(tbl);
if (IS_ERR(slot)) {
/* Try again in 1/4 second */
if (slot == ERR_PTR(-ENOMEM))
task->tk_timeout = HZ >> 2;
goto out_sleep_timeout;
goto out_sleep;
}
spin_unlock(&tbl->slot_tbl_lock);
@ -1004,11 +1017,20 @@ out_start:
nfs41_sequence_res_init(res);
rpc_call_start(task);
return 0;
out_sleep_timeout:
/* Try again in 1/4 second */
if (args->sa_privileged)
rpc_sleep_on_priority_timeout(&tbl->slot_tbl_waitq, task,
jiffies + (HZ >> 2), RPC_PRIORITY_PRIVILEGED);
else
rpc_sleep_on_timeout(&tbl->slot_tbl_waitq, task,
NULL, jiffies + (HZ >> 2));
spin_unlock(&tbl->slot_tbl_lock);
return -EAGAIN;
out_sleep:
if (args->sa_privileged)
rpc_sleep_on_priority(&tbl->slot_tbl_waitq, task,
NULL, RPC_PRIORITY_PRIVILEGED);
RPC_PRIORITY_PRIVILEGED);
else
rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
spin_unlock(&tbl->slot_tbl_lock);
@ -3060,7 +3082,9 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir,
int *opened)
{
struct nfs_server *server = NFS_SERVER(dir);
struct nfs4_exception exception = { };
struct nfs4_exception exception = {
.interruptible = true,
};
struct nfs4_state *res;
struct nfs4_open_createattrs c = {
.label = label,
@ -3673,7 +3697,9 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
{
struct nfs4_exception exception = { };
struct nfs4_exception exception = {
.interruptible = true,
};
int err;
do {
err = nfs4_handle_exception(server,
@ -3715,7 +3741,9 @@ static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fsinfo *info)
{
struct nfs4_exception exception = { };
struct nfs4_exception exception = {
.interruptible = true,
};
int err;
do {
err = _nfs4_lookup_root(server, fhandle, info);
@ -3942,7 +3970,9 @@ static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fattr *fattr, struct nfs4_label *label,
struct inode *inode)
{
struct nfs4_exception exception = { };
struct nfs4_exception exception = {
.interruptible = true,
};
int err;
do {
err = _nfs4_proc_getattr(server, fhandle, fattr, label, inode);
@ -4065,7 +4095,9 @@ static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir,
const struct qstr *name, struct nfs_fh *fhandle,
struct nfs_fattr *fattr, struct nfs4_label *label)
{
struct nfs4_exception exception = { };
struct nfs4_exception exception = {
.interruptible = true,
};
struct rpc_clnt *client = *clnt;
int err;
do {
@ -4169,7 +4201,9 @@ static int _nfs4_proc_lookupp(struct inode *inode,
static int nfs4_proc_lookupp(struct inode *inode, struct nfs_fh *fhandle,
struct nfs_fattr *fattr, struct nfs4_label *label)
{
struct nfs4_exception exception = { };
struct nfs4_exception exception = {
.interruptible = true,
};
int err;
do {
err = _nfs4_proc_lookupp(inode, fhandle, fattr, label);
@ -4216,7 +4250,9 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry
static int nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry)
{
struct nfs4_exception exception = { };
struct nfs4_exception exception = {
.interruptible = true,
};
int err;
do {
err = _nfs4_proc_access(inode, entry);
@ -4271,7 +4307,9 @@ static int _nfs4_proc_readlink(struct inode *inode, struct page *page,
static int nfs4_proc_readlink(struct inode *inode, struct page *page,
unsigned int pgbase, unsigned int pglen)
{
struct nfs4_exception exception = { };
struct nfs4_exception exception = {
.interruptible = true,
};
int err;
do {
err = _nfs4_proc_readlink(inode, page, pgbase, pglen);
@ -4347,7 +4385,9 @@ _nfs4_proc_remove(struct inode *dir, const struct qstr *name, u32 ftype)
static int nfs4_proc_remove(struct inode *dir, struct dentry *dentry)
{
struct nfs4_exception exception = { };
struct nfs4_exception exception = {
.interruptible = true,
};
struct inode *inode = d_inode(dentry);
int err;
@ -4368,7 +4408,9 @@ static int nfs4_proc_remove(struct inode *dir, struct dentry *dentry)
static int nfs4_proc_rmdir(struct inode *dir, const struct qstr *name)
{
struct nfs4_exception exception = { };
struct nfs4_exception exception = {
.interruptible = true,
};
int err;
do {
@ -4527,7 +4569,9 @@ out:
static int nfs4_proc_link(struct inode *inode, struct inode *dir, const struct qstr *name)
{
struct nfs4_exception exception = { };
struct nfs4_exception exception = {
.interruptible = true,
};
int err;
do {
err = nfs4_handle_exception(NFS_SERVER(inode),
@ -4634,7 +4678,9 @@ out:
static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,
struct page *page, unsigned int len, struct iattr *sattr)
{
struct nfs4_exception exception = { };
struct nfs4_exception exception = {
.interruptible = true,
};
struct nfs4_label l, *label = NULL;
int err;
@ -4673,7 +4719,9 @@ static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
struct iattr *sattr)
{
struct nfs_server *server = NFS_SERVER(dir);
struct nfs4_exception exception = { };
struct nfs4_exception exception = {
.interruptible = true,
};
struct nfs4_label l, *label = NULL;
int err;
@ -4733,7 +4781,9 @@ static int _nfs4_proc_readdir(struct dentry *dentry, const struct cred *cred,
static int nfs4_proc_readdir(struct dentry *dentry, const struct cred *cred,
u64 cookie, struct page **pages, unsigned int count, bool plus)
{
struct nfs4_exception exception = { };
struct nfs4_exception exception = {
.interruptible = true,
};
int err;
do {
err = _nfs4_proc_readdir(dentry, cred, cookie,
@ -4784,7 +4834,9 @@ static int nfs4_proc_mknod(struct inode *dir, struct dentry *dentry,
struct iattr *sattr, dev_t rdev)
{
struct nfs_server *server = NFS_SERVER(dir);
struct nfs4_exception exception = { };
struct nfs4_exception exception = {
.interruptible = true,
};
struct nfs4_label l, *label = NULL;
int err;
@ -4826,7 +4878,9 @@ static int _nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
static int nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsstat *fsstat)
{
struct nfs4_exception exception = { };
struct nfs4_exception exception = {
.interruptible = true,
};
int err;
do {
err = nfs4_handle_exception(server,
@ -4857,7 +4911,9 @@ static int _nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo)
{
struct nfs4_exception exception = { };
struct nfs4_exception exception = {
.interruptible = true,
};
unsigned long now = jiffies;
int err;
@ -4919,7 +4975,9 @@ static int _nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle
static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_pathconf *pathconf)
{
struct nfs4_exception exception = { };
struct nfs4_exception exception = {
.interruptible = true,
};
int err;
do {
@ -5488,7 +5546,9 @@ out_free:
static ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen)
{
struct nfs4_exception exception = { };
struct nfs4_exception exception = {
.interruptible = true,
};
ssize_t ret;
do {
ret = __nfs4_get_acl_uncached(inode, buf, buflen);
@ -5622,7 +5682,9 @@ static int _nfs4_get_security_label(struct inode *inode, void *buf,
static int nfs4_get_security_label(struct inode *inode, void *buf,
size_t buflen)
{
struct nfs4_exception exception = { };
struct nfs4_exception exception = {
.interruptible = true,
};
int err;
if (!nfs_server_capable(inode, NFS_CAP_SECURITY_LABEL))
@ -6263,7 +6325,9 @@ out:
static int nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock *request)
{
struct nfs4_exception exception = { };
struct nfs4_exception exception = {
.interruptible = true,
};
int err;
do {
@ -6827,6 +6891,7 @@ static int nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *
struct nfs4_exception exception = {
.state = state,
.inode = state->inode,
.interruptible = true,
};
int err;
@ -7240,7 +7305,9 @@ int nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir,
struct nfs4_fs_locations *fs_locations,
struct page *page)
{
struct nfs4_exception exception = { };
struct nfs4_exception exception = {
.interruptible = true,
};
int err;
do {
err = _nfs4_proc_fs_locations(client, dir, name,
@ -7383,7 +7450,9 @@ int nfs4_proc_get_locations(struct inode *inode,
struct nfs_client *clp = server->nfs_client;
const struct nfs4_mig_recovery_ops *ops =
clp->cl_mvops->mig_recovery_ops;
struct nfs4_exception exception = { };
struct nfs4_exception exception = {
.interruptible = true,
};
int status;
dprintk("%s: FSID %llx:%llx on \"%s\"\n", __func__,
@ -7507,7 +7576,9 @@ int nfs4_proc_fsid_present(struct inode *inode, const struct cred *cred)
struct nfs_client *clp = server->nfs_client;
const struct nfs4_mig_recovery_ops *ops =
clp->cl_mvops->mig_recovery_ops;
struct nfs4_exception exception = { };
struct nfs4_exception exception = {
.interruptible = true,
};
int status;
dprintk("%s: FSID %llx:%llx on \"%s\"\n", __func__,
@ -7573,7 +7644,9 @@ static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct
int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name,
struct nfs4_secinfo_flavors *flavors)
{
struct nfs4_exception exception = { };
struct nfs4_exception exception = {
.interruptible = true,
};
int err;
do {
err = -NFS4ERR_WRONGSEC;
@ -9263,7 +9336,9 @@ static int
nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fsinfo *info, struct nfs4_secinfo_flavors *flavors)
{
struct nfs4_exception exception = { };
struct nfs4_exception exception = {
.interruptible = true,
};
int err;
do {
/* first try using integrity protection */
@ -9430,7 +9505,9 @@ static int nfs41_test_stateid(struct nfs_server *server,
nfs4_stateid *stateid,
const struct cred *cred)
{
struct nfs4_exception exception = { };
struct nfs4_exception exception = {
.interruptible = true,
};
int err;
do {
err = _nfs41_test_stateid(server, stateid, cred);

View File

@ -159,6 +159,10 @@ int nfs40_discover_server_trunking(struct nfs_client *clp,
/* Sustain the lease, even if it's empty. If the clientid4
* goes stale it's of no use for trunking discovery. */
nfs4_schedule_state_renewal(*result);
/* If the client state need to recover, do it. */
if (clp->cl_state)
nfs4_schedule_state_manager(clp);
}
out:
return status;
@ -2346,8 +2350,7 @@ static void nfs41_handle_recallable_state_revoked(struct nfs_client *clp)
{
/* FIXME: For now, we destroy all layouts. */
pnfs_destroy_all_layouts(clp);
/* FIXME: For now, we test all delegations+open state+locks. */
nfs41_handle_some_state_revoked(clp);
nfs_test_expired_all_delegations(clp);
dprintk("%s: Recallable state revoked on server %s!\n", __func__,
clp->cl_hostname);
}

View File

@ -16,8 +16,8 @@
#include <linux/nfs.h>
#include <linux/nfs3.h>
#include <linux/nfs4.h>
#include <linux/nfs_page.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_page.h>
#include <linux/nfs_mount.h>
#include <linux/export.h>
@ -47,7 +47,7 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
hdr->req = nfs_list_entry(mirror->pg_list.next);
hdr->inode = desc->pg_inode;
hdr->cred = hdr->req->wb_context->cred;
hdr->cred = nfs_req_openctx(hdr->req)->cred;
hdr->io_start = req_offset(hdr->req);
hdr->good_bytes = mirror->pg_count;
hdr->io_completion = desc->pg_io_completion;
@ -295,25 +295,13 @@ out:
nfs_release_request(head);
}
/**
* nfs_create_request - Create an NFS read/write request.
* @ctx: open context to use
* @page: page to write
* @last: last nfs request created for this page group or NULL if head
* @offset: starting offset within the page for the write
* @count: number of bytes to read/write
*
* The page must be locked by the caller. This makes sure we never
* create two different requests for the same page.
* User should ensure it is safe to sleep in this function.
*/
struct nfs_page *
nfs_create_request(struct nfs_open_context *ctx, struct page *page,
struct nfs_page *last, unsigned int offset,
static struct nfs_page *
__nfs_create_request(struct nfs_lock_context *l_ctx, struct page *page,
unsigned int pgbase, unsigned int offset,
unsigned int count)
{
struct nfs_page *req;
struct nfs_lock_context *l_ctx;
struct nfs_open_context *ctx = l_ctx->open_context;
if (test_bit(NFS_CONTEXT_BAD, &ctx->flags))
return ERR_PTR(-EBADF);
@ -322,13 +310,8 @@ nfs_create_request(struct nfs_open_context *ctx, struct page *page,
if (req == NULL)
return ERR_PTR(-ENOMEM);
/* get lock context early so we can deal with alloc failures */
l_ctx = nfs_get_lock_context(ctx);
if (IS_ERR(l_ctx)) {
nfs_page_free(req);
return ERR_CAST(l_ctx);
}
req->wb_lock_context = l_ctx;
refcount_inc(&l_ctx->count);
atomic_inc(&l_ctx->io_count);
/* Initialize the request struct. Initially, we assume a
@ -340,14 +323,58 @@ nfs_create_request(struct nfs_open_context *ctx, struct page *page,
get_page(page);
}
req->wb_offset = offset;
req->wb_pgbase = offset;
req->wb_pgbase = pgbase;
req->wb_bytes = count;
req->wb_context = get_nfs_open_context(ctx);
kref_init(&req->wb_kref);
nfs_page_group_init(req, last);
req->wb_nio = 0;
return req;
}
/**
* nfs_create_request - Create an NFS read/write request.
* @ctx: open context to use
* @page: page to write
* @offset: starting offset within the page for the write
* @count: number of bytes to read/write
*
* The page must be locked by the caller. This makes sure we never
* create two different requests for the same page.
* User should ensure it is safe to sleep in this function.
*/
struct nfs_page *
nfs_create_request(struct nfs_open_context *ctx, struct page *page,
unsigned int offset, unsigned int count)
{
struct nfs_lock_context *l_ctx = nfs_get_lock_context(ctx);
struct nfs_page *ret;
if (IS_ERR(l_ctx))
return ERR_CAST(l_ctx);
ret = __nfs_create_request(l_ctx, page, offset, offset, count);
if (!IS_ERR(ret))
nfs_page_group_init(ret, NULL);
nfs_put_lock_context(l_ctx);
return ret;
}
static struct nfs_page *
nfs_create_subreq(struct nfs_page *req, struct nfs_page *last,
unsigned int pgbase, unsigned int offset,
unsigned int count)
{
struct nfs_page *ret;
ret = __nfs_create_request(req->wb_lock_context, req->wb_page,
pgbase, offset, count);
if (!IS_ERR(ret)) {
nfs_lock_request(ret);
ret->wb_index = req->wb_index;
nfs_page_group_init(ret, last);
ret->wb_nio = req->wb_nio;
}
return ret;
}
/**
* nfs_unlock_request - Unlock request and wake up sleepers.
* @req: pointer to request
@ -386,8 +413,8 @@ void nfs_unlock_and_release_request(struct nfs_page *req)
static void nfs_clear_request(struct nfs_page *req)
{
struct page *page = req->wb_page;
struct nfs_open_context *ctx = req->wb_context;
struct nfs_lock_context *l_ctx = req->wb_lock_context;
struct nfs_open_context *ctx;
if (page != NULL) {
put_page(page);
@ -396,16 +423,13 @@ static void nfs_clear_request(struct nfs_page *req)
if (l_ctx != NULL) {
if (atomic_dec_and_test(&l_ctx->io_count)) {
wake_up_var(&l_ctx->io_count);
ctx = l_ctx->open_context;
if (test_bit(NFS_CONTEXT_UNLOCK, &ctx->flags))
rpc_wake_up(&NFS_SERVER(d_inode(ctx->dentry))->uoc_rpcwaitq);
}
nfs_put_lock_context(l_ctx);
req->wb_lock_context = NULL;
}
if (ctx != NULL) {
put_nfs_open_context(ctx);
req->wb_context = NULL;
}
}
/**
@ -550,7 +574,7 @@ static void nfs_pgio_rpcsetup(struct nfs_pgio_header *hdr,
hdr->args.pgbase = req->wb_pgbase;
hdr->args.pages = hdr->page_array.pagevec;
hdr->args.count = count;
hdr->args.context = get_nfs_open_context(req->wb_context);
hdr->args.context = get_nfs_open_context(nfs_req_openctx(req));
hdr->args.lock_context = req->wb_lock_context;
hdr->args.stable = NFS_UNSTABLE;
switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
@ -698,6 +722,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
desc->pg_mirrors_dynamic = NULL;
desc->pg_mirrors = desc->pg_mirrors_static;
nfs_pageio_mirror_init(&desc->pg_mirrors[0], bsize);
desc->pg_maxretrans = 0;
}
/**
@ -906,9 +931,9 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
struct file_lock_context *flctx;
if (prev) {
if (!nfs_match_open_context(req->wb_context, prev->wb_context))
if (!nfs_match_open_context(nfs_req_openctx(req), nfs_req_openctx(prev)))
return false;
flctx = d_inode(req->wb_context->dentry)->i_flctx;
flctx = d_inode(nfs_req_openctx(req)->dentry)->i_flctx;
if (flctx != NULL &&
!(list_empty_careful(&flctx->flc_posix) &&
list_empty_careful(&flctx->flc_flock)) &&
@ -957,6 +982,15 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
return 0;
mirror->pg_base = req->wb_pgbase;
}
if (desc->pg_maxretrans && req->wb_nio > desc->pg_maxretrans) {
if (NFS_SERVER(desc->pg_inode)->flags & NFS_MOUNT_SOFTERR)
desc->pg_error = -ETIMEDOUT;
else
desc->pg_error = -EIO;
return 0;
}
if (!nfs_can_coalesce_requests(prev, req, desc))
return 0;
nfs_list_move_request(req, &mirror->pg_list);
@ -1049,14 +1083,10 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
pgbase += subreq->wb_bytes;
if (bytes_left) {
subreq = nfs_create_request(req->wb_context,
req->wb_page,
subreq, pgbase, bytes_left);
subreq = nfs_create_subreq(req, subreq, pgbase,
offset, bytes_left);
if (IS_ERR(subreq))
goto err_ptr;
nfs_lock_request(subreq);
subreq->wb_offset = offset;
subreq->wb_index = req->wb_index;
}
} while (bytes_left > 0);
@ -1158,19 +1188,14 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
lastreq = lastreq->wb_this_page)
;
dupreq = nfs_create_request(req->wb_context,
req->wb_page, lastreq, pgbase, bytes);
dupreq = nfs_create_subreq(req, lastreq,
pgbase, offset, bytes);
nfs_page_group_unlock(req);
if (IS_ERR(dupreq)) {
nfs_page_group_unlock(req);
desc->pg_error = PTR_ERR(dupreq);
goto out_failed;
}
nfs_lock_request(dupreq);
nfs_page_group_unlock(req);
dupreq->wb_offset = offset;
dupreq->wb_index = req->wb_index;
} else
dupreq = req;

View File

@ -2436,7 +2436,7 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r
rd_size = nfs_dreq_bytes_left(pgio->pg_dreq);
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
nfs_req_openctx(req),
req_offset(req),
rd_size,
IOMODE_READ,
@ -2463,7 +2463,7 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
pnfs_generic_pg_check_range(pgio, req);
if (pgio->pg_lseg == NULL) {
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
nfs_req_openctx(req),
req_offset(req),
wb_size,
IOMODE_RW,

View File

@ -459,7 +459,7 @@ static inline bool
pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
struct nfs_commit_info *cinfo, u32 ds_commit_idx)
{
struct inode *inode = d_inode(req->wb_context->dentry);
struct inode *inode = d_inode(nfs_req_openctx(req)->dentry);
struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
if (lseg == NULL || ld->mark_request_commit == NULL)
@ -471,7 +471,7 @@ pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
static inline bool
pnfs_clear_request_commit(struct nfs_page *req, struct nfs_commit_info *cinfo)
{
struct inode *inode = d_inode(req->wb_context->dentry);
struct inode *inode = d_inode(nfs_req_openctx(req)->dentry);
struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
if (ld == NULL || ld->clear_request_commit == NULL)

View File

@ -92,7 +92,7 @@ EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
static void nfs_readpage_release(struct nfs_page *req)
{
struct inode *inode = d_inode(req->wb_context->dentry);
struct inode *inode = d_inode(nfs_req_openctx(req)->dentry);
dprintk("NFS: read done (%s/%llu %d@%lld)\n", inode->i_sb->s_id,
(unsigned long long)NFS_FILEID(inode), req->wb_bytes,
@ -118,7 +118,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
len = nfs_page_length(page);
if (len == 0)
return nfs_return_empty_page(page);
new = nfs_create_request(ctx, page, NULL, 0, len);
new = nfs_create_request(ctx, page, 0, len);
if (IS_ERR(new)) {
unlock_page(page);
return PTR_ERR(new);
@ -363,7 +363,7 @@ readpage_async_filler(void *data, struct page *page)
if (len == 0)
return nfs_return_empty_page(page);
new = nfs_create_request(desc->ctx, page, NULL, 0, len);
new = nfs_create_request(desc->ctx, page, 0, len);
if (IS_ERR(new))
goto out_error;

View File

@ -78,7 +78,7 @@
enum {
/* Mount options that take no arguments */
Opt_soft, Opt_hard,
Opt_soft, Opt_softerr, Opt_hard,
Opt_posix, Opt_noposix,
Opt_cto, Opt_nocto,
Opt_ac, Opt_noac,
@ -125,6 +125,7 @@ static const match_table_t nfs_mount_option_tokens = {
{ Opt_sloppy, "sloppy" },
{ Opt_soft, "soft" },
{ Opt_softerr, "softerr" },
{ Opt_hard, "hard" },
{ Opt_deprecated, "intr" },
{ Opt_deprecated, "nointr" },
@ -628,7 +629,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
const char *str;
const char *nostr;
} nfs_info[] = {
{ NFS_MOUNT_SOFT, ",soft", ",hard" },
{ NFS_MOUNT_SOFT, ",soft", "" },
{ NFS_MOUNT_SOFTERR, ",softerr", "" },
{ NFS_MOUNT_POSIX, ",posix", "" },
{ NFS_MOUNT_NOCTO, ",nocto", "" },
{ NFS_MOUNT_NOAC, ",noac", "" },
@ -658,6 +660,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
seq_printf(m, ",acdirmin=%u", nfss->acdirmin/HZ);
if (nfss->acdirmax != NFS_DEF_ACDIRMAX*HZ || showdefaults)
seq_printf(m, ",acdirmax=%u", nfss->acdirmax/HZ);
if (!(nfss->flags & (NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR)))
seq_puts(m, ",hard");
for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) {
if (nfss->flags & nfs_infop->flag)
seq_puts(m, nfs_infop->str);
@ -1239,9 +1243,14 @@ static int nfs_parse_mount_options(char *raw,
*/
case Opt_soft:
mnt->flags |= NFS_MOUNT_SOFT;
mnt->flags &= ~NFS_MOUNT_SOFTERR;
break;
case Opt_softerr:
mnt->flags |= NFS_MOUNT_SOFTERR;
mnt->flags &= ~NFS_MOUNT_SOFT;
break;
case Opt_hard:
mnt->flags &= ~NFS_MOUNT_SOFT;
mnt->flags &= ~(NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR);
break;
case Opt_posix:
mnt->flags |= NFS_MOUNT_POSIX;
@ -2476,6 +2485,21 @@ static int nfs_compare_super_address(struct nfs_server *server1,
return 1;
}
static int nfs_compare_userns(const struct nfs_server *old,
const struct nfs_server *new)
{
const struct user_namespace *oldns = &init_user_ns;
const struct user_namespace *newns = &init_user_ns;
if (old->client && old->client->cl_cred)
oldns = old->client->cl_cred->user_ns;
if (new->client && new->client->cl_cred)
newns = new->client->cl_cred->user_ns;
if (oldns != newns)
return 0;
return 1;
}
static int nfs_compare_super(struct super_block *sb, void *data)
{
struct nfs_sb_mountdata *sb_mntdata = data;
@ -2489,6 +2513,8 @@ static int nfs_compare_super(struct super_block *sb, void *data)
return 0;
if (memcmp(&old->fsid, &server->fsid, sizeof(old->fsid)) != 0)
return 0;
if (!nfs_compare_userns(old, server))
return 0;
return nfs_compare_mount_options(sb, server, mntflags);
}

View File

@ -26,8 +26,9 @@
* and straight-forward than readdir caching.
*/
static int nfs_symlink_filler(struct inode *inode, struct page *page)
static int nfs_symlink_filler(void *data, struct page *page)
{
struct inode *inode = data;
int error;
error = NFS_PROTO(inode)->readlink(inode, page, 0, PAGE_SIZE);
@ -65,8 +66,8 @@ static const char *nfs_get_link(struct dentry *dentry,
err = ERR_PTR(nfs_revalidate_mapping(inode, inode->i_mapping));
if (err)
return err;
page = read_cache_page(&inode->i_data, 0,
(filler_t *)nfs_symlink_filler, inode);
page = read_cache_page(&inode->i_data, 0, nfs_symlink_filler,
inode);
if (IS_ERR(page))
return ERR_CAST(page);
}

View File

@ -244,6 +244,12 @@ static void nfs_set_pageerror(struct address_space *mapping)
nfs_zap_mapping(mapping->host, mapping);
}
static void nfs_mapping_set_error(struct page *page, int error)
{
SetPageError(page);
mapping_set_error(page_file_mapping(page), error);
}
/*
* nfs_page_group_search_locked
* @head - head request of page group
@ -582,11 +588,10 @@ release_request:
return ERR_PTR(ret);
}
static void nfs_write_error_remove_page(struct nfs_page *req)
static void nfs_write_error(struct nfs_page *req, int error)
{
nfs_mapping_set_error(req->wb_page, error);
nfs_end_page_writeback(req);
generic_error_remove_page(page_file_mapping(req->wb_page),
req->wb_page);
nfs_release_request(req);
}
@ -609,6 +614,7 @@ nfs_error_is_fatal_on_server(int err)
static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
struct page *page)
{
struct address_space *mapping;
struct nfs_page *req;
int ret = 0;
@ -622,19 +628,19 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
nfs_set_page_writeback(page);
WARN_ON_ONCE(test_bit(PG_CLEAN, &req->wb_flags));
ret = req->wb_context->error;
/* If there is a fatal error that covers this write, just exit */
if (nfs_error_is_fatal_on_server(ret))
ret = 0;
mapping = page_file_mapping(page);
if (test_bit(AS_ENOSPC, &mapping->flags) ||
test_bit(AS_EIO, &mapping->flags))
goto out_launder;
ret = 0;
if (!nfs_pageio_add_request(pgio, req)) {
ret = pgio->pg_error;
/*
* Remove the problematic req upon fatal errors on the server
*/
if (nfs_error_is_fatal(ret)) {
nfs_context_set_write_error(req->wb_context, ret);
if (nfs_error_is_fatal_on_server(ret))
goto out_launder;
} else
@ -646,8 +652,8 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
out:
return ret;
out_launder:
nfs_write_error_remove_page(req);
return ret;
nfs_write_error(req, ret);
return 0;
}
static int nfs_do_writepage(struct page *page, struct writeback_control *wbc,
@ -958,7 +964,8 @@ static void
nfs_clear_request_commit(struct nfs_page *req)
{
if (test_bit(PG_CLEAN, &req->wb_flags)) {
struct inode *inode = d_inode(req->wb_context->dentry);
struct nfs_open_context *ctx = nfs_req_openctx(req);
struct inode *inode = d_inode(ctx->dentry);
struct nfs_commit_info cinfo;
nfs_init_cinfo_from_inode(&cinfo, inode);
@ -999,10 +1006,12 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) &&
(hdr->good_bytes < bytes)) {
nfs_set_pageerror(page_file_mapping(req->wb_page));
nfs_context_set_write_error(req->wb_context, hdr->error);
nfs_mapping_set_error(req->wb_page, hdr->error);
goto remove_req;
}
if (nfs_write_need_commit(hdr)) {
/* Reset wb_nio, since the write was successful. */
req->wb_nio = 0;
memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf));
nfs_mark_request_commit(req, hdr->lseg, &cinfo,
hdr->pgio_mirror_idx);
@ -1136,6 +1145,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
req->wb_bytes = end - req->wb_offset;
else
req->wb_bytes = rqend - req->wb_offset;
req->wb_nio = 0;
return req;
out_flushme:
/*
@ -1165,7 +1175,7 @@ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
req = nfs_try_to_update_request(inode, page, offset, bytes);
if (req != NULL)
goto out;
req = nfs_create_request(ctx, page, NULL, offset, bytes);
req = nfs_create_request(ctx, page, offset, bytes);
if (IS_ERR(req))
goto out;
nfs_inode_add_request(inode, req);
@ -1210,7 +1220,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
return 0;
l_ctx = req->wb_lock_context;
do_flush = req->wb_page != page ||
!nfs_match_open_context(req->wb_context, ctx);
!nfs_match_open_context(nfs_req_openctx(req), ctx);
if (l_ctx && flctx &&
!(list_empty_careful(&flctx->flc_posix) &&
list_empty_careful(&flctx->flc_flock))) {
@ -1410,8 +1420,10 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr,
*/
static void nfs_redirty_request(struct nfs_page *req)
{
/* Bump the transmission count */
req->wb_nio++;
nfs_mark_request_dirty(req);
set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags);
set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags);
nfs_end_page_writeback(req);
nfs_release_request(req);
}
@ -1423,14 +1435,10 @@ static void nfs_async_write_error(struct list_head *head, int error)
while (!list_empty(head)) {
req = nfs_list_entry(head->next);
nfs_list_remove_request(req);
if (nfs_error_is_fatal(error)) {
nfs_context_set_write_error(req->wb_context, error);
if (nfs_error_is_fatal_on_server(error)) {
nfs_write_error_remove_page(req);
continue;
}
}
nfs_redirty_request(req);
if (nfs_error_is_fatal(error))
nfs_write_error(req, error);
else
nfs_redirty_request(req);
}
}
@ -1735,7 +1743,8 @@ void nfs_init_commit(struct nfs_commit_data *data,
struct nfs_commit_info *cinfo)
{
struct nfs_page *first = nfs_list_entry(head->next);
struct inode *inode = d_inode(first->wb_context->dentry);
struct nfs_open_context *ctx = nfs_req_openctx(first);
struct inode *inode = d_inode(ctx->dentry);
/* Set up the RPC argument and reply structs
* NB: take care not to mess about with data->commit et al. */
@ -1743,7 +1752,7 @@ void nfs_init_commit(struct nfs_commit_data *data,
list_splice_init(head, &data->pages);
data->inode = inode;
data->cred = first->wb_context->cred;
data->cred = ctx->cred;
data->lseg = lseg; /* reference transferred */
/* only set lwb for pnfs commit */
if (lseg)
@ -1756,7 +1765,7 @@ void nfs_init_commit(struct nfs_commit_data *data,
/* Note: we always request a commit of the entire inode */
data->args.offset = 0;
data->args.count = 0;
data->context = get_nfs_open_context(first->wb_context);
data->context = get_nfs_open_context(ctx);
data->res.fattr = &data->fattr;
data->res.verf = &data->verf;
nfs_fattr_init(&data->fattr);
@ -1839,14 +1848,15 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
nfs_clear_page_commit(req->wb_page);
dprintk("NFS: commit (%s/%llu %d@%lld)",
req->wb_context->dentry->d_sb->s_id,
(unsigned long long)NFS_FILEID(d_inode(req->wb_context->dentry)),
nfs_req_openctx(req)->dentry->d_sb->s_id,
(unsigned long long)NFS_FILEID(d_inode(nfs_req_openctx(req)->dentry)),
req->wb_bytes,
(long long)req_offset(req));
if (status < 0) {
nfs_context_set_write_error(req->wb_context, status);
if (req->wb_page)
if (req->wb_page) {
nfs_mapping_set_error(req->wb_page, status);
nfs_inode_remove_request(req);
}
dprintk_cont(", error = %d\n", status);
goto next;
}
@ -1863,7 +1873,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
/* We have a mismatch. Write the page again */
dprintk_cont(" mismatch\n");
nfs_mark_request_dirty(req);
set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags);
set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags);
next:
nfs_unlock_and_release_request(req);
/* Latency breaker */

View File

@ -868,6 +868,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
.program = &cb_program,
.version = 1,
.flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET),
.cred = current_cred(),
};
struct rpc_clnt *client;
const struct cred *cred;
@ -1033,7 +1034,7 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
* the submission code will error out, so we don't need to
* handle that case here.
*/
if (task->tk_flags & RPC_TASK_KILLED)
if (RPC_SIGNALLED(task))
goto need_restart;
return true;
@ -1086,7 +1087,7 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
dprintk("%s: freed slot, new seqid=%d\n", __func__,
clp->cl_cb_session->se_cb_seq_nr);
if (task->tk_flags & RPC_TASK_KILLED)
if (RPC_SIGNALLED(task))
goto need_restart;
out:
return ret;

View File

@ -46,6 +46,7 @@ struct nlmclnt_initdata {
int noresvport;
struct net *net;
const struct nlmclnt_operations *nlmclnt_ops;
const struct cred *cred;
};
/*

View File

@ -70,6 +70,7 @@ struct nlm_host {
struct nsm_handle *h_nsmhandle; /* NSM status handle */
char *h_addrbuf; /* address eyecatcher */
struct net *net; /* host net */
const struct cred *h_cred;
char nodename[UNX_MAXNODENAME + 1];
const struct nlmclnt_operations *h_nlmclnt_ops; /* Callback ops for NLM users */
};
@ -229,7 +230,8 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
const u32 version,
const char *hostname,
int noresvport,
struct net *net);
struct net *net,
const struct cred *cred);
void nlmclnt_release_host(struct nlm_host *);
struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
const char *hostname,

View File

@ -76,7 +76,6 @@ struct nfs_open_context {
fmode_t mode;
unsigned long flags;
#define NFS_CONTEXT_ERROR_WRITE (0)
#define NFS_CONTEXT_RESEND_WRITES (1)
#define NFS_CONTEXT_BAD (2)
#define NFS_CONTEXT_UNLOCK (3)

View File

@ -139,6 +139,16 @@ struct nfs_server {
struct nfs_iostats __percpu *io_stats; /* I/O statistics */
atomic_long_t writeback; /* number of writeback pages */
int flags; /* various flags */
/* The following are for internal use only. Also see uapi/linux/nfs_mount.h */
#define NFS_MOUNT_LOOKUP_CACHE_NONEG 0x10000
#define NFS_MOUNT_LOOKUP_CACHE_NONE 0x20000
#define NFS_MOUNT_NORESVPORT 0x40000
#define NFS_MOUNT_LEGACY_INTERFACE 0x80000
#define NFS_MOUNT_LOCAL_FLOCK 0x100000
#define NFS_MOUNT_LOCAL_FCNTL 0x200000
#define NFS_MOUNT_SOFTERR 0x400000
unsigned int caps; /* server capabilities */
unsigned int rsize; /* read size */
unsigned int rpages; /* read size (in pages) */
@ -231,6 +241,9 @@ struct nfs_server {
/* XDR related information */
unsigned int read_hdrsize;
/* User namespace info */
const struct cred *cred;
};
/* Server capabilities */

View File

@ -42,7 +42,6 @@ struct nfs_inode;
struct nfs_page {
struct list_head wb_list; /* Defines state of page: */
struct page *wb_page; /* page to read in/write out */
struct nfs_open_context *wb_context; /* File state context info */
struct nfs_lock_context *wb_lock_context; /* lock context info */
pgoff_t wb_index; /* Offset >> PAGE_SHIFT */
unsigned int wb_offset, /* Offset & ~PAGE_MASK */
@ -53,6 +52,7 @@ struct nfs_page {
struct nfs_write_verifier wb_verf; /* Commit cookie */
struct nfs_page *wb_this_page; /* list of reqs for this page */
struct nfs_page *wb_head; /* head pointer for req list */
unsigned short wb_nio; /* Number of I/O attempts */
};
struct nfs_pageio_descriptor;
@ -87,7 +87,6 @@ struct nfs_pgio_mirror {
};
struct nfs_pageio_descriptor {
unsigned char pg_moreio : 1;
struct inode *pg_inode;
const struct nfs_pageio_ops *pg_ops;
const struct nfs_rw_ops *pg_rw_ops;
@ -105,6 +104,8 @@ struct nfs_pageio_descriptor {
struct nfs_pgio_mirror pg_mirrors_static[1];
struct nfs_pgio_mirror *pg_mirrors_dynamic;
u32 pg_mirror_idx; /* current mirror */
unsigned short pg_maxretrans;
unsigned char pg_moreio : 1;
};
/* arbitrarily selected limit to number of mirrors */
@ -114,7 +115,6 @@ struct nfs_pageio_descriptor {
extern struct nfs_page *nfs_create_request(struct nfs_open_context *ctx,
struct page *page,
struct nfs_page *last,
unsigned int offset,
unsigned int count);
extern void nfs_release_request(struct nfs_page *);
@ -199,4 +199,10 @@ loff_t req_offset(struct nfs_page *req)
return (((loff_t)req->wb_index) << PAGE_SHIFT) + req->wb_offset;
}
static inline struct nfs_open_context *
nfs_req_openctx(struct nfs_page *req)
{
return req->wb_lock_context->open_context;
}
#endif /* _LINUX_NFS_PAGE_H */

View File

@ -50,6 +50,7 @@ struct rpc_clnt {
struct rpc_iostats * cl_metrics; /* per-client statistics */
unsigned int cl_softrtry : 1,/* soft timeouts */
cl_softerr : 1,/* Timeouts return errors */
cl_discrtry : 1,/* disconnect before retry */
cl_noretranstimeo: 1,/* No retransmit timeouts */
cl_autobind : 1,/* use getport() */
@ -71,6 +72,7 @@ struct rpc_clnt {
struct dentry *cl_debugfs; /* debugfs directory */
#endif
struct rpc_xprt_iter cl_xpi;
const struct cred *cl_cred;
};
/*
@ -125,6 +127,7 @@ struct rpc_create_args {
unsigned long flags;
char *client_name;
struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */
const struct cred *cred;
};
struct rpc_add_xprt_test {
@ -144,6 +147,7 @@ struct rpc_add_xprt_test {
#define RPC_CLNT_CREATE_INFINITE_SLOTS (1UL << 7)
#define RPC_CLNT_CREATE_NO_IDLE_TIMEOUT (1UL << 8)
#define RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT (1UL << 9)
#define RPC_CLNT_CREATE_SOFTERR (1UL << 10)
struct rpc_clnt *rpc_create(struct rpc_create_args *args);
struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *,

View File

@ -35,7 +35,6 @@ struct rpc_wait {
struct list_head list; /* wait queue links */
struct list_head links; /* Links to related tasks */
struct list_head timer_list; /* Timer list */
unsigned long expires;
};
/*
@ -62,6 +61,8 @@ struct rpc_task {
struct rpc_wait tk_wait; /* RPC wait */
} u;
int tk_rpc_status; /* Result of last RPC operation */
/*
* RPC call state
*/
@ -125,7 +126,6 @@ struct rpc_task_setup {
#define RPC_CALL_MAJORSEEN 0x0020 /* major timeout seen */
#define RPC_TASK_ROOTCREDS 0x0040 /* force root creds */
#define RPC_TASK_DYNAMIC 0x0080 /* task was kmalloc'ed */
#define RPC_TASK_KILLED 0x0100 /* task was killed */
#define RPC_TASK_SOFT 0x0200 /* Use soft timeouts */
#define RPC_TASK_SOFTCONN 0x0400 /* Fail if can't connect */
#define RPC_TASK_SENT 0x0800 /* message was sent */
@ -135,7 +135,6 @@ struct rpc_task_setup {
#define RPC_IS_ASYNC(t) ((t)->tk_flags & RPC_TASK_ASYNC)
#define RPC_IS_SWAPPER(t) ((t)->tk_flags & RPC_TASK_SWAPPER)
#define RPC_ASSASSINATED(t) ((t)->tk_flags & RPC_TASK_KILLED)
#define RPC_IS_SOFT(t) ((t)->tk_flags & (RPC_TASK_SOFT|RPC_TASK_TIMEOUT))
#define RPC_IS_SOFTCONN(t) ((t)->tk_flags & RPC_TASK_SOFTCONN)
#define RPC_WAS_SENT(t) ((t)->tk_flags & RPC_TASK_SENT)
@ -146,6 +145,7 @@ struct rpc_task_setup {
#define RPC_TASK_NEED_XMIT 3
#define RPC_TASK_NEED_RECV 4
#define RPC_TASK_MSG_PIN_WAIT 5
#define RPC_TASK_SIGNALLED 6
#define RPC_IS_RUNNING(t) test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)
#define rpc_set_running(t) set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)
@ -169,6 +169,8 @@ struct rpc_task_setup {
#define RPC_IS_ACTIVATED(t) test_bit(RPC_TASK_ACTIVE, &(t)->tk_runstate)
#define RPC_SIGNALLED(t) test_bit(RPC_TASK_SIGNALLED, &(t)->tk_runstate)
/*
* Task priorities.
* Note: if you change these, you must also change
@ -183,7 +185,6 @@ struct rpc_task_setup {
struct rpc_timer {
struct timer_list timer;
struct list_head list;
unsigned long expires;
};
/*
@ -217,6 +218,7 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *);
struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req);
void rpc_put_task(struct rpc_task *);
void rpc_put_task_async(struct rpc_task *);
void rpc_signal_task(struct rpc_task *);
void rpc_exit_task(struct rpc_task *);
void rpc_exit(struct rpc_task *, int);
void rpc_release_calldata(const struct rpc_call_ops *, void *);
@ -225,11 +227,19 @@ void rpc_execute(struct rpc_task *);
void rpc_init_priority_wait_queue(struct rpc_wait_queue *, const char *);
void rpc_init_wait_queue(struct rpc_wait_queue *, const char *);
void rpc_destroy_wait_queue(struct rpc_wait_queue *);
unsigned long rpc_task_timeout(const struct rpc_task *task);
void rpc_sleep_on_timeout(struct rpc_wait_queue *queue,
struct rpc_task *task,
rpc_action action,
unsigned long timeout);
void rpc_sleep_on(struct rpc_wait_queue *, struct rpc_task *,
rpc_action action);
void rpc_sleep_on_priority_timeout(struct rpc_wait_queue *queue,
struct rpc_task *task,
unsigned long timeout,
int priority);
void rpc_sleep_on_priority(struct rpc_wait_queue *,
struct rpc_task *,
rpc_action action,
int priority);
void rpc_wake_up_queued_task_on_wq(struct workqueue_struct *wq,
struct rpc_wait_queue *queue,

View File

@ -143,7 +143,7 @@ struct rpc_xprt_ops {
void (*buf_free)(struct rpc_task *task);
void (*prepare_request)(struct rpc_rqst *req);
int (*send_request)(struct rpc_rqst *req);
void (*set_retrans_timeout)(struct rpc_task *task);
void (*wait_for_reply_request)(struct rpc_task *task);
void (*timer)(struct rpc_xprt *xprt, struct rpc_task *task);
void (*release_request)(struct rpc_task *task);
void (*close)(struct rpc_xprt *xprt);
@ -378,8 +378,8 @@ xprt_disable_swap(struct rpc_xprt *xprt)
int xprt_register_transport(struct xprt_class *type);
int xprt_unregister_transport(struct xprt_class *type);
int xprt_load_transport(const char *);
void xprt_set_retrans_timeout_def(struct rpc_task *task);
void xprt_set_retrans_timeout_rtt(struct rpc_task *task);
void xprt_wait_for_reply_request_def(struct rpc_task *task);
void xprt_wait_for_reply_request_rtt(struct rpc_task *task);
void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status);
void xprt_wait_for_buffer_space(struct rpc_xprt *xprt);
bool xprt_write_space(struct rpc_xprt *xprt);

View File

@ -511,6 +511,33 @@ TRACE_EVENT(xprtrdma_marshal,
)
);
TRACE_EVENT(xprtrdma_marshal_failed,
TP_PROTO(const struct rpc_rqst *rqst,
int ret
),
TP_ARGS(rqst, ret),
TP_STRUCT__entry(
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(u32, xid)
__field(int, ret)
),
TP_fast_assign(
__entry->task_id = rqst->rq_task->tk_pid;
__entry->client_id = rqst->rq_task->tk_client->cl_clid;
__entry->xid = be32_to_cpu(rqst->rq_xid);
__entry->ret = ret;
),
TP_printk("task:%u@%u xid=0x%08x: ret=%d",
__entry->task_id, __entry->client_id, __entry->xid,
__entry->ret
)
);
TRACE_EVENT(xprtrdma_post_send,
TP_PROTO(
const struct rpcrdma_req *req,

View File

@ -82,7 +82,6 @@ TRACE_DEFINE_ENUM(RPC_TASK_SWAPPER);
TRACE_DEFINE_ENUM(RPC_CALL_MAJORSEEN);
TRACE_DEFINE_ENUM(RPC_TASK_ROOTCREDS);
TRACE_DEFINE_ENUM(RPC_TASK_DYNAMIC);
TRACE_DEFINE_ENUM(RPC_TASK_KILLED);
TRACE_DEFINE_ENUM(RPC_TASK_SOFT);
TRACE_DEFINE_ENUM(RPC_TASK_SOFTCONN);
TRACE_DEFINE_ENUM(RPC_TASK_SENT);
@ -97,7 +96,6 @@ TRACE_DEFINE_ENUM(RPC_TASK_NO_RETRANS_TIMEOUT);
{ RPC_CALL_MAJORSEEN, "MAJORSEEN" }, \
{ RPC_TASK_ROOTCREDS, "ROOTCREDS" }, \
{ RPC_TASK_DYNAMIC, "DYNAMIC" }, \
{ RPC_TASK_KILLED, "KILLED" }, \
{ RPC_TASK_SOFT, "SOFT" }, \
{ RPC_TASK_SOFTCONN, "SOFTCONN" }, \
{ RPC_TASK_SENT, "SENT" }, \
@ -111,6 +109,7 @@ TRACE_DEFINE_ENUM(RPC_TASK_ACTIVE);
TRACE_DEFINE_ENUM(RPC_TASK_NEED_XMIT);
TRACE_DEFINE_ENUM(RPC_TASK_NEED_RECV);
TRACE_DEFINE_ENUM(RPC_TASK_MSG_PIN_WAIT);
TRACE_DEFINE_ENUM(RPC_TASK_SIGNALLED);
#define rpc_show_runstate(flags) \
__print_flags(flags, "|", \
@ -119,7 +118,8 @@ TRACE_DEFINE_ENUM(RPC_TASK_MSG_PIN_WAIT);
{ (1UL << RPC_TASK_ACTIVE), "ACTIVE" }, \
{ (1UL << RPC_TASK_NEED_XMIT), "NEED_XMIT" }, \
{ (1UL << RPC_TASK_NEED_RECV), "NEED_RECV" }, \
{ (1UL << RPC_TASK_MSG_PIN_WAIT), "MSG_PIN_WAIT" })
{ (1UL << RPC_TASK_MSG_PIN_WAIT), "MSG_PIN_WAIT" }, \
{ (1UL << RPC_TASK_SIGNALLED), "SIGNALLED" })
DECLARE_EVENT_CLASS(rpc_task_running,
@ -186,7 +186,7 @@ DECLARE_EVENT_CLASS(rpc_task_queued,
__entry->client_id = task->tk_client ?
task->tk_client->cl_clid : -1;
__entry->task_id = task->tk_pid;
__entry->timeout = task->tk_timeout;
__entry->timeout = rpc_task_timeout(task);
__entry->runstate = task->tk_runstate;
__entry->status = task->tk_status;
__entry->flags = task->tk_flags;

View File

@ -66,13 +66,4 @@ struct nfs_mount_data {
#define NFS_MOUNT_UNSHARED 0x8000 /* 5 */
#define NFS_MOUNT_FLAGMASK 0xFFFF
/* The following are for internal use only */
#define NFS_MOUNT_LOOKUP_CACHE_NONEG 0x10000
#define NFS_MOUNT_LOOKUP_CACHE_NONE 0x20000
#define NFS_MOUNT_NORESVPORT 0x40000
#define NFS_MOUNT_LEGACY_INTERFACE 0x80000
#define NFS_MOUNT_LOCAL_FLOCK 0x100000
#define NFS_MOUNT_LOCAL_FCNTL 0x200000
#endif

View File

@ -269,6 +269,7 @@ err:
struct gss_upcall_msg {
refcount_t count;
kuid_t uid;
const char *service_name;
struct rpc_pipe_msg msg;
struct list_head list;
struct gss_auth *auth;
@ -316,6 +317,7 @@ gss_release_msg(struct gss_upcall_msg *gss_msg)
gss_put_ctx(gss_msg->ctx);
rpc_destroy_wait_queue(&gss_msg->rpc_waitqueue);
gss_put_auth(gss_msg->auth);
kfree_const(gss_msg->service_name);
kfree(gss_msg);
}
@ -410,9 +412,12 @@ gss_upcall_callback(struct rpc_task *task)
gss_release_msg(gss_msg);
}
static void gss_encode_v0_msg(struct gss_upcall_msg *gss_msg)
static void gss_encode_v0_msg(struct gss_upcall_msg *gss_msg,
const struct cred *cred)
{
uid_t uid = from_kuid(&init_user_ns, gss_msg->uid);
struct user_namespace *userns = cred->user_ns;
uid_t uid = from_kuid_munged(userns, gss_msg->uid);
memcpy(gss_msg->databuf, &uid, sizeof(uid));
gss_msg->msg.data = gss_msg->databuf;
gss_msg->msg.len = sizeof(uid);
@ -420,17 +425,31 @@ static void gss_encode_v0_msg(struct gss_upcall_msg *gss_msg)
BUILD_BUG_ON(sizeof(uid) > sizeof(gss_msg->databuf));
}
static ssize_t
gss_v0_upcall(struct file *file, struct rpc_pipe_msg *msg,
char __user *buf, size_t buflen)
{
struct gss_upcall_msg *gss_msg = container_of(msg,
struct gss_upcall_msg,
msg);
if (msg->copied == 0)
gss_encode_v0_msg(gss_msg, file->f_cred);
return rpc_pipe_generic_upcall(file, msg, buf, buflen);
}
static int gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
const char *service_name,
const char *target_name)
const char *target_name,
const struct cred *cred)
{
struct user_namespace *userns = cred->user_ns;
struct gss_api_mech *mech = gss_msg->auth->mech;
char *p = gss_msg->databuf;
size_t buflen = sizeof(gss_msg->databuf);
int len;
len = scnprintf(p, buflen, "mech=%s uid=%d", mech->gm_name,
from_kuid(&init_user_ns, gss_msg->uid));
from_kuid_munged(userns, gss_msg->uid));
buflen -= len;
p += len;
gss_msg->msg.len = len;
@ -491,6 +510,25 @@ out_overflow:
return -ENOMEM;
}
static ssize_t
gss_v1_upcall(struct file *file, struct rpc_pipe_msg *msg,
char __user *buf, size_t buflen)
{
struct gss_upcall_msg *gss_msg = container_of(msg,
struct gss_upcall_msg,
msg);
int err;
if (msg->copied == 0) {
err = gss_encode_v1_msg(gss_msg,
gss_msg->service_name,
gss_msg->auth->target_name,
file->f_cred);
if (err)
return err;
}
return rpc_pipe_generic_upcall(file, msg, buf, buflen);
}
static struct gss_upcall_msg *
gss_alloc_msg(struct gss_auth *gss_auth,
kuid_t uid, const char *service_name)
@ -513,16 +551,14 @@ gss_alloc_msg(struct gss_auth *gss_auth,
refcount_set(&gss_msg->count, 1);
gss_msg->uid = uid;
gss_msg->auth = gss_auth;
switch (vers) {
case 0:
gss_encode_v0_msg(gss_msg);
break;
default:
err = gss_encode_v1_msg(gss_msg, service_name, gss_auth->target_name);
if (err)
goto err_put_pipe_version;
}
kref_get(&gss_auth->kref);
if (service_name) {
gss_msg->service_name = kstrdup_const(service_name, GFP_NOFS);
if (!gss_msg->service_name) {
err = -ENOMEM;
goto err_put_pipe_version;
}
}
return gss_msg;
err_put_pipe_version:
put_pipe_version(gss_auth->net);
@ -581,8 +617,8 @@ gss_refresh_upcall(struct rpc_task *task)
/* XXX: warning on the first, under the assumption we
* shouldn't normally hit this case on a refresh. */
warn_gssd();
task->tk_timeout = 15*HZ;
rpc_sleep_on(&pipe_version_rpc_waitqueue, task, NULL);
rpc_sleep_on_timeout(&pipe_version_rpc_waitqueue,
task, NULL, jiffies + (15 * HZ));
err = -EAGAIN;
goto out;
}
@ -595,7 +631,6 @@ gss_refresh_upcall(struct rpc_task *task)
if (gss_cred->gc_upcall != NULL)
rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL);
else if (gss_msg->ctx == NULL && gss_msg->msg.errno >= 0) {
task->tk_timeout = 0;
gss_cred->gc_upcall = gss_msg;
/* gss_upcall_callback will release the reference to gss_upcall_msg */
refcount_inc(&gss_msg->count);
@ -707,7 +742,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
goto err;
}
uid = make_kuid(&init_user_ns, id);
uid = make_kuid(current_user_ns(), id);
if (!uid_valid(uid)) {
err = -EINVAL;
goto err;
@ -2116,7 +2151,7 @@ static const struct rpc_credops gss_nullops = {
};
static const struct rpc_pipe_ops gss_upcall_ops_v0 = {
.upcall = rpc_pipe_generic_upcall,
.upcall = gss_v0_upcall,
.downcall = gss_pipe_downcall,
.destroy_msg = gss_pipe_destroy_msg,
.open_pipe = gss_pipe_open_v0,
@ -2124,7 +2159,7 @@ static const struct rpc_pipe_ops gss_upcall_ops_v0 = {
};
static const struct rpc_pipe_ops gss_upcall_ops_v1 = {
.upcall = rpc_pipe_generic_upcall,
.upcall = gss_v1_upcall,
.downcall = gss_pipe_downcall,
.destroy_msg = gss_pipe_destroy_msg,
.open_pipe = gss_pipe_open_v1,

View File

@ -107,6 +107,8 @@ unx_marshal(struct rpc_task *task, struct xdr_stream *xdr)
__be32 *p, *cred_len, *gidarr_len;
int i;
struct group_info *gi = cred->cr_cred->group_info;
struct user_namespace *userns = clnt->cl_cred ?
clnt->cl_cred->user_ns : &init_user_ns;
/* Credential */
@ -122,14 +124,13 @@ unx_marshal(struct rpc_task *task, struct xdr_stream *xdr)
p = xdr_reserve_space(xdr, 3 * sizeof(*p));
if (!p)
goto marshal_failed;
*p++ = cpu_to_be32(from_kuid(&init_user_ns, cred->cr_cred->fsuid));
*p++ = cpu_to_be32(from_kgid(&init_user_ns, cred->cr_cred->fsgid));
*p++ = cpu_to_be32(from_kuid_munged(userns, cred->cr_cred->fsuid));
*p++ = cpu_to_be32(from_kgid_munged(userns, cred->cr_cred->fsgid));
gidarr_len = p++;
if (gi)
for (i = 0; i < UNX_NGROUPS && i < gi->ngroups; i++)
*p++ = cpu_to_be32(from_kgid(&init_user_ns,
gi->gid[i]));
*p++ = cpu_to_be32(from_kgid_munged(userns, gi->gid[i]));
*gidarr_len = cpu_to_be32(p - gidarr_len - 1);
*cred_len = cpu_to_be32((p - cred_len - 1) << 2);
p = xdr_reserve_space(xdr, (p - gidarr_len - 1) << 2);

View File

@ -394,6 +394,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
if (err)
goto out_no_clid;
clnt->cl_cred = get_cred(args->cred);
clnt->cl_procinfo = version->procs;
clnt->cl_maxproc = version->nrprocs;
clnt->cl_prog = args->prognumber ? : program->number;
@ -439,6 +440,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
out_no_path:
rpc_free_iostats(clnt->cl_metrics);
out_no_stats:
put_cred(clnt->cl_cred);
rpc_free_clid(clnt);
out_no_clid:
kfree(clnt);
@ -484,8 +486,11 @@ static struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args,
}
clnt->cl_softrtry = 1;
if (args->flags & RPC_CLNT_CREATE_HARDRTRY)
if (args->flags & (RPC_CLNT_CREATE_HARDRTRY|RPC_CLNT_CREATE_SOFTERR)) {
clnt->cl_softrtry = 0;
if (args->flags & RPC_CLNT_CREATE_SOFTERR)
clnt->cl_softerr = 1;
}
if (args->flags & RPC_CLNT_CREATE_AUTOBIND)
clnt->cl_autobind = 1;
@ -623,10 +628,12 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args,
/* Turn off autobind on clones */
new->cl_autobind = 0;
new->cl_softrtry = clnt->cl_softrtry;
new->cl_softerr = clnt->cl_softerr;
new->cl_noretranstimeo = clnt->cl_noretranstimeo;
new->cl_discrtry = clnt->cl_discrtry;
new->cl_chatty = clnt->cl_chatty;
new->cl_principal = clnt->cl_principal;
new->cl_cred = get_cred(clnt->cl_cred);
return new;
out_err:
@ -648,6 +655,7 @@ struct rpc_clnt *rpc_clone_client(struct rpc_clnt *clnt)
.prognumber = clnt->cl_prog,
.version = clnt->cl_vers,
.authflavor = clnt->cl_auth->au_flavor,
.cred = clnt->cl_cred,
};
return __rpc_clone_client(&args, clnt);
}
@ -669,6 +677,7 @@ rpc_clone_client_set_auth(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
.prognumber = clnt->cl_prog,
.version = clnt->cl_vers,
.authflavor = flavor,
.cred = clnt->cl_cred,
};
return __rpc_clone_client(&args, clnt);
}
@ -827,14 +836,8 @@ void rpc_killall_tasks(struct rpc_clnt *clnt)
* Spin lock all_tasks to prevent changes...
*/
spin_lock(&clnt->cl_lock);
list_for_each_entry(rovr, &clnt->cl_tasks, tk_task) {
if (!RPC_IS_ACTIVATED(rovr))
continue;
if (!(rovr->tk_flags & RPC_TASK_KILLED)) {
rovr->tk_flags |= RPC_TASK_KILLED;
rpc_exit(rovr, -EIO);
}
}
list_for_each_entry(rovr, &clnt->cl_tasks, tk_task)
rpc_signal_task(rovr);
spin_unlock(&clnt->cl_lock);
}
EXPORT_SYMBOL_GPL(rpc_killall_tasks);
@ -882,6 +885,7 @@ rpc_free_client(struct rpc_clnt *clnt)
xprt_put(rcu_dereference_raw(clnt->cl_xprt));
xprt_iter_destroy(&clnt->cl_xpi);
rpciod_down();
put_cred(clnt->cl_cred);
rpc_free_clid(clnt);
kfree(clnt);
return parent;
@ -946,6 +950,7 @@ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old,
.prognumber = program->number,
.version = vers,
.authflavor = old->cl_auth->au_flavor,
.cred = old->cl_cred,
};
struct rpc_clnt *clnt;
int err;
@ -1007,6 +1012,8 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
atomic_inc(&clnt->cl_count);
if (clnt->cl_softrtry)
task->tk_flags |= RPC_TASK_SOFT;
if (clnt->cl_softerr)
task->tk_flags |= RPC_TASK_TIMEOUT;
if (clnt->cl_noretranstimeo)
task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT;
if (atomic_read(&clnt->cl_swapper))
@ -1470,22 +1477,14 @@ void rpc_force_rebind(struct rpc_clnt *clnt)
}
EXPORT_SYMBOL_GPL(rpc_force_rebind);
/*
* Restart an (async) RPC call from the call_prepare state.
* Usually called from within the exit handler.
*/
int
rpc_restart_call_prepare(struct rpc_task *task)
static int
__rpc_restart_call(struct rpc_task *task, void (*action)(struct rpc_task *))
{
if (RPC_ASSASSINATED(task))
return 0;
task->tk_action = call_start;
task->tk_status = 0;
if (task->tk_ops->rpc_call_prepare != NULL)
task->tk_action = rpc_prepare_task;
task->tk_rpc_status = 0;
task->tk_action = action;
return 1;
}
EXPORT_SYMBOL_GPL(rpc_restart_call_prepare);
/*
* Restart an (async) RPC call. Usually called from within the
@ -1494,14 +1493,23 @@ EXPORT_SYMBOL_GPL(rpc_restart_call_prepare);
int
rpc_restart_call(struct rpc_task *task)
{
if (RPC_ASSASSINATED(task))
return 0;
task->tk_action = call_start;
task->tk_status = 0;
return 1;
return __rpc_restart_call(task, call_start);
}
EXPORT_SYMBOL_GPL(rpc_restart_call);
/*
* Restart an (async) RPC call from the call_prepare state.
* Usually called from within the exit handler.
*/
int
rpc_restart_call_prepare(struct rpc_task *task)
{
if (task->tk_ops->rpc_call_prepare != NULL)
return __rpc_restart_call(task, rpc_prepare_task);
return rpc_restart_call(task);
}
EXPORT_SYMBOL_GPL(rpc_restart_call_prepare);
const char
*rpc_proc_name(const struct rpc_task *task)
{
@ -1516,6 +1524,19 @@ const char
return "no proc";
}
static void
__rpc_call_rpcerror(struct rpc_task *task, int tk_status, int rpc_status)
{
task->tk_rpc_status = rpc_status;
rpc_exit(task, tk_status);
}
static void
rpc_call_rpcerror(struct rpc_task *task, int status)
{
__rpc_call_rpcerror(task, status, status);
}
/*
* 0. Initial state
*
@ -1580,7 +1601,7 @@ call_reserveresult(struct rpc_task *task)
printk(KERN_ERR "%s: status=%d, but no request slot, exiting\n",
__func__, status);
rpc_exit(task, -EIO);
rpc_call_rpcerror(task, -EIO);
return;
}
@ -1608,7 +1629,7 @@ call_reserveresult(struct rpc_task *task)
__func__, status);
break;
}
rpc_exit(task, status);
rpc_call_rpcerror(task, status);
}
/*
@ -1676,7 +1697,7 @@ call_refreshresult(struct rpc_task *task)
}
dprintk("RPC: %5u %s: refresh creds failed with error %d\n",
task->tk_pid, __func__, status);
rpc_exit(task, status);
rpc_call_rpcerror(task, status);
}
/*
@ -1727,7 +1748,7 @@ call_allocate(struct rpc_task *task)
if (status == 0)
return;
if (status != -ENOMEM) {
rpc_exit(task, status);
rpc_call_rpcerror(task, status);
return;
}
@ -1793,10 +1814,17 @@ call_encode(struct rpc_task *task)
rpc_delay(task, HZ >> 4);
break;
case -EKEYEXPIRED:
task->tk_action = call_refresh;
if (!task->tk_cred_retry) {
rpc_exit(task, task->tk_status);
} else {
task->tk_action = call_refresh;
task->tk_cred_retry--;
dprintk("RPC: %5u %s: retry refresh creds\n",
task->tk_pid, __func__);
}
break;
default:
rpc_exit(task, task->tk_status);
rpc_call_rpcerror(task, task->tk_status);
}
return;
} else {
@ -1857,7 +1885,6 @@ call_bind(struct rpc_task *task)
if (!xprt_prepare_transmit(task))
return;
task->tk_timeout = xprt->bind_timeout;
xprt->ops->rpcbind(task);
}
@ -1938,7 +1965,7 @@ call_bind_status(struct rpc_task *task)
task->tk_pid, -task->tk_status);
}
rpc_exit(task, status);
rpc_call_rpcerror(task, status);
return;
retry_timeout:
@ -1973,7 +2000,7 @@ call_connect(struct rpc_task *task)
if (task->tk_status < 0)
return;
if (task->tk_flags & RPC_TASK_NOCONNECT) {
rpc_exit(task, -ENOTCONN);
rpc_call_rpcerror(task, -ENOTCONN);
return;
}
if (!xprt_prepare_transmit(task))
@ -2033,7 +2060,7 @@ call_connect_status(struct rpc_task *task)
task->tk_action = call_transmit;
return;
}
rpc_exit(task, status);
rpc_call_rpcerror(task, status);
return;
out_retry:
/* Check for timeouts before looping back to call_bind */
@ -2118,7 +2145,7 @@ call_transmit_status(struct rpc_task *task)
if (!task->tk_msg.rpc_proc->p_proc)
trace_xprt_ping(task->tk_xprt,
task->tk_status);
rpc_exit(task, task->tk_status);
rpc_call_rpcerror(task, task->tk_status);
return;
}
/* fall through */
@ -2282,7 +2309,7 @@ call_status(struct rpc_task *task)
rpc_check_timeout(task);
return;
out_exit:
rpc_exit(task, status);
rpc_call_rpcerror(task, status);
}
static bool
@ -2306,29 +2333,40 @@ rpc_check_timeout(struct rpc_task *task)
task->tk_timeouts++;
if (RPC_IS_SOFTCONN(task) && !rpc_check_connected(task->tk_rqstp)) {
rpc_exit(task, -ETIMEDOUT);
rpc_call_rpcerror(task, -ETIMEDOUT);
return;
}
if (RPC_IS_SOFT(task)) {
/*
* Once a "no retrans timeout" soft tasks (a.k.a NFSv4) has
* been sent, it should time out only if the transport
* connection gets terminally broken.
*/
if ((task->tk_flags & RPC_TASK_NO_RETRANS_TIMEOUT) &&
rpc_check_connected(task->tk_rqstp))
return;
if (clnt->cl_chatty) {
printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
pr_notice_ratelimited(
"%s: server %s not responding, timed out\n",
clnt->cl_program->name,
task->tk_xprt->servername);
}
if (task->tk_flags & RPC_TASK_TIMEOUT)
rpc_exit(task, -ETIMEDOUT);
rpc_call_rpcerror(task, -ETIMEDOUT);
else
rpc_exit(task, -EIO);
__rpc_call_rpcerror(task, -EIO, -ETIMEDOUT);
return;
}
if (!(task->tk_flags & RPC_CALL_MAJORSEEN)) {
task->tk_flags |= RPC_CALL_MAJORSEEN;
if (clnt->cl_chatty) {
printk(KERN_NOTICE "%s: server %s not responding, still trying\n",
clnt->cl_program->name,
task->tk_xprt->servername);
pr_notice_ratelimited(
"%s: server %s not responding, still trying\n",
clnt->cl_program->name,
task->tk_xprt->servername);
}
}
rpc_force_rebind(clnt);
@ -2358,7 +2396,7 @@ call_decode(struct rpc_task *task)
if (task->tk_flags & RPC_CALL_MAJORSEEN) {
if (clnt->cl_chatty) {
printk(KERN_NOTICE "%s: server %s OK\n",
pr_notice_ratelimited("%s: server %s OK\n",
clnt->cl_program->name,
task->tk_xprt->servername);
}
@ -2881,7 +2919,7 @@ static void rpc_show_task(const struct rpc_clnt *clnt,
printk(KERN_INFO "%5u %04x %6d %8p %8p %8ld %8p %sv%u %s a:%ps q:%s\n",
task->tk_pid, task->tk_flags, task->tk_status,
clnt, task->tk_rqstp, task->tk_timeout, task->tk_ops,
clnt, task->tk_rqstp, rpc_task_timeout(task), task->tk_ops,
clnt->cl_program->name, clnt->cl_vers, rpc_proc_name(task),
task->tk_action, rpc_waitq);
}

View File

@ -33,7 +33,7 @@ tasks_show(struct seq_file *f, void *v)
seq_printf(f, "%5u %04x %6d 0x%x 0x%x %8ld %ps %sv%u %s a:%ps q:%s\n",
task->tk_pid, task->tk_flags, task->tk_status,
clnt->cl_clid, xid, task->tk_timeout, task->tk_ops,
clnt->cl_clid, xid, rpc_task_timeout(task), task->tk_ops,
clnt->cl_program->name, clnt->cl_vers, rpc_proc_name(task),
task->tk_action, rpc_waitq);
return 0;

View File

@ -240,6 +240,7 @@ static int rpcb_create_local_unix(struct net *net)
.program = &rpcb_program,
.version = RPCBVERS_2,
.authflavor = RPC_AUTH_NULL,
.cred = current_cred(),
/*
* We turn off the idle timeout to prevent the kernel
* from automatically disconnecting the socket.
@ -299,6 +300,7 @@ static int rpcb_create_local_net(struct net *net)
.program = &rpcb_program,
.version = RPCBVERS_2,
.authflavor = RPC_AUTH_UNIX,
.cred = current_cred(),
.flags = RPC_CLNT_CREATE_NOPING,
};
struct rpc_clnt *clnt, *clnt4;
@ -358,7 +360,8 @@ out:
static struct rpc_clnt *rpcb_create(struct net *net, const char *nodename,
const char *hostname,
struct sockaddr *srvaddr, size_t salen,
int proto, u32 version)
int proto, u32 version,
const struct cred *cred)
{
struct rpc_create_args args = {
.net = net,
@ -370,6 +373,7 @@ static struct rpc_clnt *rpcb_create(struct net *net, const char *nodename,
.program = &rpcb_program,
.version = version,
.authflavor = RPC_AUTH_UNIX,
.cred = cred,
.flags = (RPC_CLNT_CREATE_NOPING |
RPC_CLNT_CREATE_NONPRIVPORT),
};
@ -694,7 +698,8 @@ void rpcb_getport_async(struct rpc_task *task)
/* Put self on the wait queue to ensure we get notified if
* some other task is already attempting to bind the port */
rpc_sleep_on(&xprt->binding, task, NULL);
rpc_sleep_on_timeout(&xprt->binding, task,
NULL, jiffies + xprt->bind_timeout);
if (xprt_test_and_set_binding(xprt)) {
dprintk("RPC: %5u %s: waiting for another binder\n",
@ -744,7 +749,8 @@ void rpcb_getport_async(struct rpc_task *task)
rpcb_clnt = rpcb_create(xprt->xprt_net,
clnt->cl_nodename,
xprt->servername, sap, salen,
xprt->prot, bind_version);
xprt->prot, bind_version,
clnt->cl_cred);
if (IS_ERR(rpcb_clnt)) {
status = PTR_ERR(rpcb_clnt);
dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n",

View File

@ -58,6 +58,20 @@ static struct rpc_wait_queue delay_queue;
struct workqueue_struct *rpciod_workqueue __read_mostly;
struct workqueue_struct *xprtiod_workqueue __read_mostly;
unsigned long
rpc_task_timeout(const struct rpc_task *task)
{
unsigned long timeout = READ_ONCE(task->tk_timeout);
if (timeout != 0) {
unsigned long now = jiffies;
if (time_before(now, timeout))
return timeout - now;
}
return 0;
}
EXPORT_SYMBOL_GPL(rpc_task_timeout);
/*
* Disable the timer for a given RPC task. Should be called with
* queue->lock and bh_disabled in order to avoid races within
@ -66,7 +80,7 @@ struct workqueue_struct *xprtiod_workqueue __read_mostly;
static void
__rpc_disable_timer(struct rpc_wait_queue *queue, struct rpc_task *task)
{
if (task->tk_timeout == 0)
if (list_empty(&task->u.tk_wait.timer_list))
return;
dprintk("RPC: %5u disabling timer\n", task->tk_pid);
task->tk_timeout = 0;
@ -78,25 +92,21 @@ __rpc_disable_timer(struct rpc_wait_queue *queue, struct rpc_task *task)
static void
rpc_set_queue_timer(struct rpc_wait_queue *queue, unsigned long expires)
{
queue->timer_list.expires = expires;
mod_timer(&queue->timer_list.timer, expires);
timer_reduce(&queue->timer_list.timer, expires);
}
/*
* Set up a timer for the current task.
*/
static void
__rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task)
__rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task,
unsigned long timeout)
{
if (!task->tk_timeout)
return;
dprintk("RPC: %5u setting alarm for %u ms\n",
task->tk_pid, jiffies_to_msecs(task->tk_timeout));
task->tk_pid, jiffies_to_msecs(timeout - jiffies));
task->u.tk_wait.expires = jiffies + task->tk_timeout;
if (list_empty(&queue->timer_list.list) || time_before(task->u.tk_wait.expires, queue->timer_list.expires))
rpc_set_queue_timer(queue, task->u.tk_wait.expires);
task->tk_timeout = timeout;
rpc_set_queue_timer(queue, timeout);
list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list);
}
@ -188,6 +198,7 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue,
if (RPC_IS_QUEUED(task))
return;
INIT_LIST_HEAD(&task->u.tk_wait.timer_list);
if (RPC_IS_PRIORITY(queue))
__rpc_add_wait_queue_priority(queue, task, queue_priority);
else if (RPC_IS_SWAPPER(task))
@ -238,7 +249,9 @@ static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const c
queue->maxpriority = nr_queues - 1;
rpc_reset_waitqueue_priority(queue);
queue->qlen = 0;
timer_setup(&queue->timer_list.timer, __rpc_queue_timer_fn, 0);
timer_setup(&queue->timer_list.timer,
__rpc_queue_timer_fn,
TIMER_DEFERRABLE);
INIT_LIST_HEAD(&queue->timer_list.list);
rpc_assign_waitqueue_name(queue, qname);
}
@ -362,7 +375,6 @@ static void rpc_make_runnable(struct workqueue_struct *wq,
*/
static void __rpc_sleep_on_priority(struct rpc_wait_queue *q,
struct rpc_task *task,
rpc_action action,
unsigned char queue_priority)
{
dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n",
@ -372,47 +384,100 @@ static void __rpc_sleep_on_priority(struct rpc_wait_queue *q,
__rpc_add_wait_queue(q, task, queue_priority);
WARN_ON_ONCE(task->tk_callback != NULL);
task->tk_callback = action;
__rpc_add_timer(q, task);
}
static void __rpc_sleep_on_priority_timeout(struct rpc_wait_queue *q,
struct rpc_task *task, unsigned long timeout,
unsigned char queue_priority)
{
if (time_is_after_jiffies(timeout)) {
__rpc_sleep_on_priority(q, task, queue_priority);
__rpc_add_timer(q, task, timeout);
} else
task->tk_status = -ETIMEDOUT;
}
static void rpc_set_tk_callback(struct rpc_task *task, rpc_action action)
{
if (action && !WARN_ON_ONCE(task->tk_callback != NULL))
task->tk_callback = action;
}
static bool rpc_sleep_check_activated(struct rpc_task *task)
{
/* We shouldn't ever put an inactive task to sleep */
if (WARN_ON_ONCE(!RPC_IS_ACTIVATED(task))) {
task->tk_status = -EIO;
rpc_put_task_async(task);
return false;
}
return true;
}
void rpc_sleep_on_timeout(struct rpc_wait_queue *q, struct rpc_task *task,
rpc_action action, unsigned long timeout)
{
if (!rpc_sleep_check_activated(task))
return;
rpc_set_tk_callback(task, action);
/*
* Protect the queue operations.
*/
spin_lock_bh(&q->lock);
__rpc_sleep_on_priority_timeout(q, task, timeout, task->tk_priority);
spin_unlock_bh(&q->lock);
}
EXPORT_SYMBOL_GPL(rpc_sleep_on_timeout);
void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
rpc_action action)
{
/* We shouldn't ever put an inactive task to sleep */
WARN_ON_ONCE(!RPC_IS_ACTIVATED(task));
if (!RPC_IS_ACTIVATED(task)) {
task->tk_status = -EIO;
rpc_put_task_async(task);
if (!rpc_sleep_check_activated(task))
return;
}
rpc_set_tk_callback(task, action);
WARN_ON_ONCE(task->tk_timeout != 0);
/*
* Protect the queue operations.
*/
spin_lock_bh(&q->lock);
__rpc_sleep_on_priority(q, task, action, task->tk_priority);
__rpc_sleep_on_priority(q, task, task->tk_priority);
spin_unlock_bh(&q->lock);
}
EXPORT_SYMBOL_GPL(rpc_sleep_on);
void rpc_sleep_on_priority(struct rpc_wait_queue *q, struct rpc_task *task,
rpc_action action, int priority)
void rpc_sleep_on_priority_timeout(struct rpc_wait_queue *q,
struct rpc_task *task, unsigned long timeout, int priority)
{
/* We shouldn't ever put an inactive task to sleep */
WARN_ON_ONCE(!RPC_IS_ACTIVATED(task));
if (!RPC_IS_ACTIVATED(task)) {
task->tk_status = -EIO;
rpc_put_task_async(task);
if (!rpc_sleep_check_activated(task))
return;
}
priority -= RPC_PRIORITY_LOW;
/*
* Protect the queue operations.
*/
spin_lock_bh(&q->lock);
__rpc_sleep_on_priority(q, task, action, priority - RPC_PRIORITY_LOW);
__rpc_sleep_on_priority_timeout(q, task, timeout, priority);
spin_unlock_bh(&q->lock);
}
EXPORT_SYMBOL_GPL(rpc_sleep_on_priority_timeout);
void rpc_sleep_on_priority(struct rpc_wait_queue *q, struct rpc_task *task,
int priority)
{
if (!rpc_sleep_check_activated(task))
return;
WARN_ON_ONCE(task->tk_timeout != 0);
priority -= RPC_PRIORITY_LOW;
/*
* Protect the queue operations.
*/
spin_lock_bh(&q->lock);
__rpc_sleep_on_priority(q, task, priority);
spin_unlock_bh(&q->lock);
}
EXPORT_SYMBOL_GPL(rpc_sleep_on_priority);
@ -704,7 +769,7 @@ static void __rpc_queue_timer_fn(struct timer_list *t)
spin_lock(&queue->lock);
expires = now = jiffies;
list_for_each_entry_safe(task, n, &queue->timer_list.list, u.tk_wait.timer_list) {
timeo = task->u.tk_wait.expires;
timeo = task->tk_timeout;
if (time_after_eq(now, timeo)) {
dprintk("RPC: %5u timeout\n", task->tk_pid);
task->tk_status = -ETIMEDOUT;
@ -730,8 +795,7 @@ static void __rpc_atrun(struct rpc_task *task)
*/
void rpc_delay(struct rpc_task *task, unsigned long delay)
{
task->tk_timeout = delay;
rpc_sleep_on(&delay_queue, task, __rpc_atrun);
rpc_sleep_on_timeout(&delay_queue, task, __rpc_atrun, jiffies + delay);
}
EXPORT_SYMBOL_GPL(rpc_delay);
@ -759,8 +823,7 @@ static void
rpc_reset_task_statistics(struct rpc_task *task)
{
task->tk_timeouts = 0;
task->tk_flags &= ~(RPC_CALL_MAJORSEEN|RPC_TASK_KILLED|RPC_TASK_SENT);
task->tk_flags &= ~(RPC_CALL_MAJORSEEN|RPC_TASK_SENT);
rpc_init_task_statistics(task);
}
@ -773,7 +836,6 @@ void rpc_exit_task(struct rpc_task *task)
if (task->tk_ops->rpc_call_done != NULL) {
task->tk_ops->rpc_call_done(task, task->tk_calldata);
if (task->tk_action != NULL) {
WARN_ON(RPC_ASSASSINATED(task));
/* Always release the RPC slot and buffer memory */
xprt_release(task);
rpc_reset_task_statistics(task);
@ -781,6 +843,19 @@ void rpc_exit_task(struct rpc_task *task)
}
}
void rpc_signal_task(struct rpc_task *task)
{
struct rpc_wait_queue *queue;
if (!RPC_IS_ACTIVATED(task))
return;
set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate);
smp_mb__after_atomic();
queue = READ_ONCE(task->tk_waitqueue);
if (queue)
rpc_wake_up_queued_task_set_status(queue, task, -ERESTARTSYS);
}
void rpc_exit(struct rpc_task *task, int status)
{
task->tk_status = status;
@ -836,6 +911,13 @@ static void __rpc_execute(struct rpc_task *task)
*/
if (!RPC_IS_QUEUED(task))
continue;
/*
* Signalled tasks should exit rather than sleep.
*/
if (RPC_SIGNALLED(task))
rpc_exit(task, -ERESTARTSYS);
/*
* The queue->lock protects against races with
* rpc_make_runnable().
@ -861,7 +943,7 @@ static void __rpc_execute(struct rpc_task *task)
status = out_of_line_wait_on_bit(&task->tk_runstate,
RPC_TASK_QUEUED, rpc_wait_bit_killable,
TASK_KILLABLE);
if (status == -ERESTARTSYS) {
if (status < 0) {
/*
* When a sync task receives a signal, it exits with
* -ERESTARTSYS. In order to catch any callbacks that
@ -869,7 +951,7 @@ static void __rpc_execute(struct rpc_task *task)
* break the loop here, but go around once more.
*/
dprintk("RPC: %5u got signal\n", task->tk_pid);
task->tk_flags |= RPC_TASK_KILLED;
set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate);
rpc_exit(task, -ERESTARTSYS);
}
dprintk("RPC: %5u sync task resuming\n", task->tk_pid);

View File

@ -106,7 +106,7 @@ xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct xdr_skb
/* ACL likes to be lazy in allocating pages - ACLs
* are small by default but can get huge. */
if ((xdr->flags & XDRBUF_SPARSE_PAGES) && *ppage == NULL) {
*ppage = alloc_page(GFP_ATOMIC);
*ppage = alloc_page(GFP_NOWAIT | __GFP_NOWARN);
if (unlikely(*ppage == NULL)) {
if (copied == 0)
copied = -ENOMEM;

View File

@ -73,6 +73,15 @@ static void xprt_destroy(struct rpc_xprt *xprt);
static DEFINE_SPINLOCK(xprt_list_lock);
static LIST_HEAD(xprt_list);
static unsigned long xprt_request_timeout(const struct rpc_rqst *req)
{
unsigned long timeout = jiffies + req->rq_timeout;
if (time_before(timeout, req->rq_majortimeo))
return timeout;
return req->rq_majortimeo;
}
/**
* xprt_register_transport - register a transport implementation
* @transport: transport to register
@ -209,9 +218,12 @@ out_unlock:
out_sleep:
dprintk("RPC: %5u failed to lock transport %p\n",
task->tk_pid, xprt);
task->tk_timeout = RPC_IS_SOFT(task) ? req->rq_timeout : 0;
task->tk_status = -EAGAIN;
rpc_sleep_on(&xprt->sending, task, NULL);
if (RPC_IS_SOFT(task))
rpc_sleep_on_timeout(&xprt->sending, task, NULL,
xprt_request_timeout(req));
else
rpc_sleep_on(&xprt->sending, task, NULL);
return 0;
}
EXPORT_SYMBOL_GPL(xprt_reserve_xprt);
@ -273,9 +285,12 @@ out_unlock:
xprt_clear_locked(xprt);
out_sleep:
dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt);
task->tk_timeout = RPC_IS_SOFT(task) ? req->rq_timeout : 0;
task->tk_status = -EAGAIN;
rpc_sleep_on(&xprt->sending, task, NULL);
if (RPC_IS_SOFT(task))
rpc_sleep_on_timeout(&xprt->sending, task, NULL,
xprt_request_timeout(req));
else
rpc_sleep_on(&xprt->sending, task, NULL);
return 0;
}
EXPORT_SYMBOL_GPL(xprt_reserve_xprt_cong);
@ -554,53 +569,44 @@ bool xprt_write_space(struct rpc_xprt *xprt)
}
EXPORT_SYMBOL_GPL(xprt_write_space);
/**
* xprt_set_retrans_timeout_def - set a request's retransmit timeout
* @task: task whose timeout is to be set
*
* Set a request's retransmit timeout based on the transport's
* default timeout parameters. Used by transports that don't adjust
* the retransmit timeout based on round-trip time estimation.
*/
void xprt_set_retrans_timeout_def(struct rpc_task *task)
static unsigned long xprt_abs_ktime_to_jiffies(ktime_t abstime)
{
task->tk_timeout = task->tk_rqstp->rq_timeout;
s64 delta = ktime_to_ns(ktime_get() - abstime);
return likely(delta >= 0) ?
jiffies - nsecs_to_jiffies(delta) :
jiffies + nsecs_to_jiffies(-delta);
}
EXPORT_SYMBOL_GPL(xprt_set_retrans_timeout_def);
/**
* xprt_set_retrans_timeout_rtt - set a request's retransmit timeout
* @task: task whose timeout is to be set
*
* Set a request's retransmit timeout using the RTT estimator.
*/
void xprt_set_retrans_timeout_rtt(struct rpc_task *task)
static unsigned long xprt_calc_majortimeo(struct rpc_rqst *req)
{
int timer = task->tk_msg.rpc_proc->p_timer;
struct rpc_clnt *clnt = task->tk_client;
struct rpc_rtt *rtt = clnt->cl_rtt;
struct rpc_rqst *req = task->tk_rqstp;
unsigned long max_timeout = clnt->cl_timeout->to_maxval;
const struct rpc_timeout *to = req->rq_task->tk_client->cl_timeout;
unsigned long majortimeo = req->rq_timeout;
task->tk_timeout = rpc_calc_rto(rtt, timer);
task->tk_timeout <<= rpc_ntimeo(rtt, timer) + req->rq_retries;
if (task->tk_timeout > max_timeout || task->tk_timeout == 0)
task->tk_timeout = max_timeout;
if (to->to_exponential)
majortimeo <<= to->to_retries;
else
majortimeo += to->to_increment * to->to_retries;
if (majortimeo > to->to_maxval || majortimeo == 0)
majortimeo = to->to_maxval;
return majortimeo;
}
EXPORT_SYMBOL_GPL(xprt_set_retrans_timeout_rtt);
static void xprt_reset_majortimeo(struct rpc_rqst *req)
{
const struct rpc_timeout *to = req->rq_task->tk_client->cl_timeout;
req->rq_majortimeo += xprt_calc_majortimeo(req);
}
req->rq_majortimeo = req->rq_timeout;
if (to->to_exponential)
req->rq_majortimeo <<= to->to_retries;
static void xprt_init_majortimeo(struct rpc_task *task, struct rpc_rqst *req)
{
unsigned long time_init;
struct rpc_xprt *xprt = req->rq_xprt;
if (likely(xprt && xprt_connected(xprt)))
time_init = jiffies;
else
req->rq_majortimeo += to->to_increment * to->to_retries;
if (req->rq_majortimeo > to->to_maxval || req->rq_majortimeo == 0)
req->rq_majortimeo = to->to_maxval;
req->rq_majortimeo += jiffies;
time_init = xprt_abs_ktime_to_jiffies(task->tk_start);
req->rq_timeout = task->tk_client->cl_timeout->to_initval;
req->rq_majortimeo = time_init + xprt_calc_majortimeo(req);
}
/**
@ -822,9 +828,9 @@ void xprt_connect(struct rpc_task *task)
xprt->ops->close(xprt);
if (!xprt_connected(xprt)) {
task->tk_timeout = task->tk_rqstp->rq_timeout;
task->tk_rqstp->rq_connect_cookie = xprt->connect_cookie;
rpc_sleep_on(&xprt->pending, task, NULL);
rpc_sleep_on_timeout(&xprt->pending, task, NULL,
xprt_request_timeout(task->tk_rqstp));
if (test_bit(XPRT_CLOSING, &xprt->state))
return;
@ -949,7 +955,7 @@ xprt_is_pinned_rqst(struct rpc_rqst *req)
* @req: Request to pin
*
* Caller must ensure this is atomic with the call to xprt_lookup_rqst()
* so should be holding the xprt receive lock.
* so should be holding xprt->queue_lock.
*/
void xprt_pin_rqst(struct rpc_rqst *req)
{
@ -961,7 +967,7 @@ EXPORT_SYMBOL_GPL(xprt_pin_rqst);
* xprt_unpin_rqst - Unpin a request on the transport receive list
* @req: Request to pin
*
* Caller should be holding the xprt receive lock.
* Caller should be holding xprt->queue_lock.
*/
void xprt_unpin_rqst(struct rpc_rqst *req)
{
@ -1017,7 +1023,6 @@ xprt_request_enqueue_receive(struct rpc_task *task)
set_bit(RPC_TASK_NEED_RECV, &task->tk_runstate);
spin_unlock(&xprt->queue_lock);
xprt_reset_majortimeo(req);
/* Turn off autodisconnect */
del_singleshot_timer_sync(&xprt->timer);
}
@ -1102,6 +1107,49 @@ static void xprt_timer(struct rpc_task *task)
task->tk_status = 0;
}
/**
* xprt_wait_for_reply_request_def - wait for reply
* @task: pointer to rpc_task
*
* Set a request's retransmit timeout based on the transport's
* default timeout parameters. Used by transports that don't adjust
* the retransmit timeout based on round-trip time estimation,
* and put the task to sleep on the pending queue.
*/
void xprt_wait_for_reply_request_def(struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
rpc_sleep_on_timeout(&req->rq_xprt->pending, task, xprt_timer,
xprt_request_timeout(req));
}
EXPORT_SYMBOL_GPL(xprt_wait_for_reply_request_def);
/**
* xprt_wait_for_reply_request_rtt - wait for reply using RTT estimator
* @task: pointer to rpc_task
*
* Set a request's retransmit timeout using the RTT estimator,
* and put the task to sleep on the pending queue.
*/
void xprt_wait_for_reply_request_rtt(struct rpc_task *task)
{
int timer = task->tk_msg.rpc_proc->p_timer;
struct rpc_clnt *clnt = task->tk_client;
struct rpc_rtt *rtt = clnt->cl_rtt;
struct rpc_rqst *req = task->tk_rqstp;
unsigned long max_timeout = clnt->cl_timeout->to_maxval;
unsigned long timeout;
timeout = rpc_calc_rto(rtt, timer);
timeout <<= rpc_ntimeo(rtt, timer) + req->rq_retries;
if (timeout > max_timeout || timeout == 0)
timeout = max_timeout;
rpc_sleep_on_timeout(&req->rq_xprt->pending, task, xprt_timer,
jiffies + timeout);
}
EXPORT_SYMBOL_GPL(xprt_wait_for_reply_request_rtt);
/**
* xprt_request_wait_receive - wait for the reply to an RPC request
* @task: RPC task about to send a request
@ -1121,8 +1169,7 @@ void xprt_request_wait_receive(struct rpc_task *task)
*/
spin_lock(&xprt->queue_lock);
if (test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate)) {
xprt->ops->set_retrans_timeout(task);
rpc_sleep_on(&xprt->pending, task, xprt_timer);
xprt->ops->wait_for_reply_request(task);
/*
* Send an extra queue wakeup call if the
* connection was dropped in case the call to
@ -1337,6 +1384,10 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task)
if (status < 0)
goto out_dequeue;
}
if (RPC_SIGNALLED(task)) {
status = -ERESTARTSYS;
goto out_dequeue;
}
}
/*
@ -1605,7 +1656,6 @@ xprt_request_init(struct rpc_task *task)
struct rpc_xprt *xprt = task->tk_xprt;
struct rpc_rqst *req = task->tk_rqstp;
req->rq_timeout = task->tk_client->cl_timeout->to_initval;
req->rq_task = task;
req->rq_xprt = xprt;
req->rq_buffer = NULL;
@ -1618,7 +1668,7 @@ xprt_request_init(struct rpc_task *task)
req->rq_snd_buf.bvec = NULL;
req->rq_rcv_buf.bvec = NULL;
req->rq_release_snd_buf = NULL;
xprt_reset_majortimeo(req);
xprt_init_majortimeo(task, req);
dprintk("RPC: %5u reserved req %p xid %08x\n", task->tk_pid,
req, ntohl(req->rq_xid));
}
@ -1647,7 +1697,6 @@ void xprt_reserve(struct rpc_task *task)
if (task->tk_rqstp != NULL)
return;
task->tk_timeout = 0;
task->tk_status = -EAGAIN;
if (!xprt_throttle_congested(xprt, task))
xprt_do_reserve(xprt, task);
@ -1670,7 +1719,6 @@ void xprt_retry_reserve(struct rpc_task *task)
if (task->tk_rqstp != NULL)
return;
task->tk_timeout = 0;
task->tk_status = -EAGAIN;
xprt_do_reserve(xprt, task);
}
@ -1827,7 +1875,9 @@ found:
xprt->idle_timeout = 0;
INIT_WORK(&xprt->task_cleanup, xprt_autoclose);
if (xprt_has_timer(xprt))
timer_setup(&xprt->timer, xprt_init_autodisconnect, 0);
timer_setup(&xprt->timer,
xprt_init_autodisconnect,
TIMER_DEFERRABLE);
else
timer_setup(&xprt->timer, NULL, 0);

View File

@ -19,45 +19,6 @@
#undef RPCRDMA_BACKCHANNEL_DEBUG
static int rpcrdma_bc_setup_reqs(struct rpcrdma_xprt *r_xprt,
unsigned int count)
{
struct rpc_xprt *xprt = &r_xprt->rx_xprt;
struct rpcrdma_req *req;
struct rpc_rqst *rqst;
unsigned int i;
for (i = 0; i < (count << 1); i++) {
struct rpcrdma_regbuf *rb;
size_t size;
req = rpcrdma_create_req(r_xprt);
if (IS_ERR(req))
return PTR_ERR(req);
rqst = &req->rl_slot;
rqst->rq_xprt = xprt;
INIT_LIST_HEAD(&rqst->rq_bc_list);
__set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
spin_lock(&xprt->bc_pa_lock);
list_add(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
spin_unlock(&xprt->bc_pa_lock);
size = r_xprt->rx_data.inline_rsize;
rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, GFP_KERNEL);
if (IS_ERR(rb))
goto out_fail;
req->rl_sendbuf = rb;
xdr_buf_init(&rqst->rq_snd_buf, rb->rg_base,
min_t(size_t, size, PAGE_SIZE));
}
return 0;
out_fail:
rpcrdma_req_destroy(req);
return -ENOMEM;
}
/**
* xprt_rdma_bc_setup - Pre-allocate resources for handling backchannel requests
* @xprt: transport associated with these backchannel resources
@ -68,34 +29,10 @@ out_fail:
int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
int rc;
/* The backchannel reply path returns each rpc_rqst to the
* bc_pa_list _after_ the reply is sent. If the server is
* faster than the client, it can send another backward
* direction request before the rpc_rqst is returned to the
* list. The client rejects the request in this case.
*
* Twice as many rpc_rqsts are prepared to ensure there is
* always an rpc_rqst available as soon as a reply is sent.
*/
if (reqs > RPCRDMA_BACKWARD_WRS >> 1)
goto out_err;
rc = rpcrdma_bc_setup_reqs(r_xprt, reqs);
if (rc)
goto out_free;
r_xprt->rx_buf.rb_bc_srv_max_requests = reqs;
r_xprt->rx_buf.rb_bc_srv_max_requests = RPCRDMA_BACKWARD_WRS >> 1;
trace_xprtrdma_cb_setup(r_xprt, reqs);
return 0;
out_free:
xprt_rdma_bc_destroy(xprt, reqs);
out_err:
pr_err("RPC: %s: setup backchannel transport failed\n", __func__);
return -ENOMEM;
}
/**
@ -107,10 +44,10 @@ out_err:
size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
struct rpcrdma_ep *ep = &r_xprt->rx_ep;
size_t maxmsg;
maxmsg = min_t(unsigned int, cdata->inline_rsize, cdata->inline_wsize);
maxmsg = min_t(unsigned int, ep->rep_inline_send, ep->rep_inline_recv);
maxmsg = min_t(unsigned int, maxmsg, PAGE_SIZE);
return maxmsg - RPCRDMA_HDRLEN_MIN;
}
@ -123,7 +60,7 @@ static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
xdr_init_encode(&req->rl_stream, &req->rl_hdrbuf,
req->rl_rdmabuf->rg_base, rqst);
rdmab_data(req->rl_rdmabuf), rqst);
p = xdr_reserve_space(&req->rl_stream, 28);
if (unlikely(!p))
@ -223,6 +160,43 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
spin_unlock(&xprt->bc_pa_lock);
}
static struct rpc_rqst *rpcrdma_bc_rqst_get(struct rpcrdma_xprt *r_xprt)
{
struct rpc_xprt *xprt = &r_xprt->rx_xprt;
struct rpcrdma_req *req;
struct rpc_rqst *rqst;
size_t size;
spin_lock(&xprt->bc_pa_lock);
rqst = list_first_entry_or_null(&xprt->bc_pa_list, struct rpc_rqst,
rq_bc_pa_list);
if (!rqst)
goto create_req;
list_del(&rqst->rq_bc_pa_list);
spin_unlock(&xprt->bc_pa_lock);
return rqst;
create_req:
spin_unlock(&xprt->bc_pa_lock);
/* Set a limit to prevent a remote from overrunning our resources.
*/
if (xprt->bc_alloc_count >= RPCRDMA_BACKWARD_WRS)
return NULL;
size = min_t(size_t, r_xprt->rx_ep.rep_inline_recv, PAGE_SIZE);
req = rpcrdma_req_create(r_xprt, size, GFP_KERNEL);
if (!req)
return NULL;
xprt->bc_alloc_count++;
rqst = &req->rl_slot;
rqst->rq_xprt = xprt;
__set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
xdr_buf_init(&rqst->rq_snd_buf, rdmab_data(req->rl_sendbuf), size);
return rqst;
}
/**
* rpcrdma_bc_receive_call - Handle a backward direction call
* @r_xprt: transport receiving the call
@ -254,18 +228,10 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
pr_info("RPC: %s: %*ph\n", __func__, size, p);
#endif
/* Grab a free bc rqst */
spin_lock(&xprt->bc_pa_lock);
if (list_empty(&xprt->bc_pa_list)) {
spin_unlock(&xprt->bc_pa_lock);
rqst = rpcrdma_bc_rqst_get(r_xprt);
if (!rqst)
goto out_overflow;
}
rqst = list_first_entry(&xprt->bc_pa_list,
struct rpc_rqst, rq_bc_pa_list);
list_del(&rqst->rq_bc_pa_list);
spin_unlock(&xprt->bc_pa_lock);
/* Prepare rqst */
rqst->rq_reply_bytes_recvd = 0;
rqst->rq_xid = *p;

View File

@ -82,13 +82,13 @@
/**
* frwr_is_supported - Check if device supports FRWR
* @ia: interface adapter to check
* @device: interface adapter to check
*
* Returns true if device supports FRWR, otherwise false
*/
bool frwr_is_supported(struct rpcrdma_ia *ia)
bool frwr_is_supported(struct ib_device *device)
{
struct ib_device_attr *attrs = &ia->ri_device->attrs;
struct ib_device_attr *attrs = &device->attrs;
if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))
goto out_not_supported;
@ -98,7 +98,7 @@ bool frwr_is_supported(struct rpcrdma_ia *ia)
out_not_supported:
pr_info("rpcrdma: 'frwr' mode is not supported by device %s\n",
ia->ri_device->name);
device->name);
return false;
}
@ -131,7 +131,7 @@ frwr_mr_recycle_worker(struct work_struct *work)
if (mr->mr_dir != DMA_NONE) {
trace_xprtrdma_mr_unmap(mr);
ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
ib_dma_unmap_sg(r_xprt->rx_ia.ri_id->device,
mr->mr_sg, mr->mr_nents, mr->mr_dir);
mr->mr_dir = DMA_NONE;
}
@ -194,12 +194,11 @@ out_list_err:
* frwr_open - Prepare an endpoint for use with FRWR
* @ia: interface adapter this endpoint will use
* @ep: endpoint to prepare
* @cdata: transport parameters
*
* On success, sets:
* ep->rep_attr.cap.max_send_wr
* ep->rep_attr.cap.max_recv_wr
* cdata->max_requests
* ep->rep_max_requests
* ia->ri_max_segs
*
* And these FRWR-related fields:
@ -208,10 +207,9 @@ out_list_err:
*
* On failure, a negative errno is returned.
*/
int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
struct rpcrdma_create_data_internal *cdata)
int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep)
{
struct ib_device_attr *attrs = &ia->ri_device->attrs;
struct ib_device_attr *attrs = &ia->ri_id->device->attrs;
int max_qp_wr, depth, delta;
ia->ri_mrtype = IB_MR_TYPE_MEM_REG;
@ -253,24 +251,23 @@ int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
} while (delta > 0);
}
max_qp_wr = ia->ri_device->attrs.max_qp_wr;
max_qp_wr = ia->ri_id->device->attrs.max_qp_wr;
max_qp_wr -= RPCRDMA_BACKWARD_WRS;
max_qp_wr -= 1;
if (max_qp_wr < RPCRDMA_MIN_SLOT_TABLE)
return -ENOMEM;
if (cdata->max_requests > max_qp_wr)
cdata->max_requests = max_qp_wr;
ep->rep_attr.cap.max_send_wr = cdata->max_requests * depth;
if (ep->rep_max_requests > max_qp_wr)
ep->rep_max_requests = max_qp_wr;
ep->rep_attr.cap.max_send_wr = ep->rep_max_requests * depth;
if (ep->rep_attr.cap.max_send_wr > max_qp_wr) {
cdata->max_requests = max_qp_wr / depth;
if (!cdata->max_requests)
ep->rep_max_requests = max_qp_wr / depth;
if (!ep->rep_max_requests)
return -EINVAL;
ep->rep_attr.cap.max_send_wr = cdata->max_requests *
depth;
ep->rep_attr.cap.max_send_wr = ep->rep_max_requests * depth;
}
ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;
ep->rep_attr.cap.max_send_wr += 1; /* for ib_drain_sq */
ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
ep->rep_attr.cap.max_recv_wr = ep->rep_max_requests;
ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
ep->rep_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */
@ -300,15 +297,6 @@ size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt)
(ia->ri_max_segs - 2) * ia->ri_max_frwr_depth);
}
static void
__frwr_sendcompletion_flush(struct ib_wc *wc, const char *wr)
{
if (wc->status != IB_WC_WR_FLUSH_ERR)
pr_err("rpcrdma: %s: %s (%u/0x%x)\n",
wr, ib_wc_status_msg(wc->status),
wc->status, wc->vendor_err);
}
/**
* frwr_wc_fastreg - Invoked by RDMA provider for a flushed FastReg WC
* @cq: completion queue (ignored)
@ -323,10 +311,8 @@ frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
container_of(cqe, struct rpcrdma_frwr, fr_cqe);
/* WARNING: Only wr_cqe and status are reliable at this point */
if (wc->status != IB_WC_SUCCESS) {
if (wc->status != IB_WC_SUCCESS)
frwr->fr_state = FRWR_FLUSHED_FR;
__frwr_sendcompletion_flush(wc, "fastreg");
}
trace_xprtrdma_wc_fastreg(wc, frwr);
}
@ -344,10 +330,8 @@ frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
fr_cqe);
/* WARNING: Only wr_cqe and status are reliable at this point */
if (wc->status != IB_WC_SUCCESS) {
if (wc->status != IB_WC_SUCCESS)
frwr->fr_state = FRWR_FLUSHED_LI;
__frwr_sendcompletion_flush(wc, "localinv");
}
trace_xprtrdma_wc_li(wc, frwr);
}
@ -366,12 +350,10 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
fr_cqe);
/* WARNING: Only wr_cqe and status are reliable at this point */
if (wc->status != IB_WC_SUCCESS) {
if (wc->status != IB_WC_SUCCESS)
frwr->fr_state = FRWR_FLUSHED_LI;
__frwr_sendcompletion_flush(wc, "localinv");
}
complete(&frwr->fr_linv_done);
trace_xprtrdma_wc_li_wake(wc, frwr);
complete(&frwr->fr_linv_done);
}
/**
@ -436,7 +418,8 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
}
mr->mr_dir = rpcrdma_data_dir(writing);
mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir);
mr->mr_nents =
ib_dma_map_sg(ia->ri_id->device, mr->mr_sg, i, mr->mr_dir);
if (!mr->mr_nents)
goto out_dmamap_err;
@ -466,7 +449,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
return seg;
out_dmamap_err:
frwr->fr_state = FRWR_IS_INVALID;
mr->mr_dir = DMA_NONE;
trace_xprtrdma_frwr_sgerr(mr, i);
rpcrdma_mr_put(mr);
return ERR_PTR(-EIO);

View File

@ -105,16 +105,23 @@ static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs)
return size;
}
/**
* rpcrdma_set_max_header_sizes - Initialize inline payload sizes
* @r_xprt: transport instance to initialize
*
* The max_inline fields contain the maximum size of an RPC message
* so the marshaling code doesn't have to repeat this calculation
* for every RPC.
*/
void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
unsigned int maxsegs = ia->ri_max_segs;
unsigned int maxsegs = r_xprt->rx_ia.ri_max_segs;
struct rpcrdma_ep *ep = &r_xprt->rx_ep;
ia->ri_max_inline_write = cdata->inline_wsize -
rpcrdma_max_call_header_size(maxsegs);
ia->ri_max_inline_read = cdata->inline_rsize -
rpcrdma_max_reply_header_size(maxsegs);
ep->rep_max_inline_send =
ep->rep_inline_send - rpcrdma_max_call_header_size(maxsegs);
ep->rep_max_inline_recv =
ep->rep_inline_recv - rpcrdma_max_reply_header_size(maxsegs);
}
/* The client can send a request inline as long as the RPCRDMA header
@ -131,7 +138,7 @@ static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt,
struct xdr_buf *xdr = &rqst->rq_snd_buf;
unsigned int count, remaining, offset;
if (xdr->len > r_xprt->rx_ia.ri_max_inline_write)
if (xdr->len > r_xprt->rx_ep.rep_max_inline_send)
return false;
if (xdr->page_len) {
@ -159,9 +166,7 @@ static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt,
static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
struct rpc_rqst *rqst)
{
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
return rqst->rq_rcv_buf.buflen <= ia->ri_max_inline_read;
return rqst->rq_rcv_buf.buflen <= r_xprt->rx_ep.rep_max_inline_recv;
}
/* The client is required to provide a Reply chunk if the maximum
@ -173,10 +178,9 @@ rpcrdma_nonpayload_inline(const struct rpcrdma_xprt *r_xprt,
const struct rpc_rqst *rqst)
{
const struct xdr_buf *buf = &rqst->rq_rcv_buf;
const struct rpcrdma_ia *ia = &r_xprt->rx_ia;
return buf->head[0].iov_len + buf->tail[0].iov_len <
ia->ri_max_inline_read;
return (buf->head[0].iov_len + buf->tail[0].iov_len) <
r_xprt->rx_ep.rep_max_inline_recv;
}
/* Split @vec on page boundaries into SGEs. FMR registers pages, not
@ -238,7 +242,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
*/
if (unlikely(xdrbuf->flags & XDRBUF_SPARSE_PAGES)) {
if (!*ppages)
*ppages = alloc_page(GFP_ATOMIC);
*ppages = alloc_page(GFP_NOWAIT | __GFP_NOWARN);
if (!*ppages)
return -ENOBUFS;
}
@ -508,50 +512,45 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
}
/**
* rpcrdma_unmap_sendctx - DMA-unmap Send buffers
* rpcrdma_sendctx_unmap - DMA-unmap Send buffer
* @sc: sendctx containing SGEs to unmap
*
*/
void
rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc)
void rpcrdma_sendctx_unmap(struct rpcrdma_sendctx *sc)
{
struct rpcrdma_ia *ia = &sc->sc_xprt->rx_ia;
struct ib_sge *sge;
unsigned int count;
/* The first two SGEs contain the transport header and
* the inline buffer. These are always left mapped so
* they can be cheaply re-used.
*/
sge = &sc->sc_sges[2];
for (count = sc->sc_unmap_count; count; ++sge, --count)
ib_dma_unmap_page(ia->ri_device,
sge->addr, sge->length, DMA_TO_DEVICE);
for (sge = &sc->sc_sges[2]; sc->sc_unmap_count;
++sge, --sc->sc_unmap_count)
ib_dma_unmap_page(sc->sc_device, sge->addr, sge->length,
DMA_TO_DEVICE);
if (test_and_clear_bit(RPCRDMA_REQ_F_TX_RESOURCES, &sc->sc_req->rl_flags)) {
smp_mb__after_atomic();
if (test_and_clear_bit(RPCRDMA_REQ_F_TX_RESOURCES,
&sc->sc_req->rl_flags))
wake_up_bit(&sc->sc_req->rl_flags, RPCRDMA_REQ_F_TX_RESOURCES);
}
}
/* Prepare an SGE for the RPC-over-RDMA transport header.
*/
static bool
rpcrdma_prepare_hdr_sge(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
u32 len)
static bool rpcrdma_prepare_hdr_sge(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_req *req, u32 len)
{
struct rpcrdma_sendctx *sc = req->rl_sendctx;
struct rpcrdma_regbuf *rb = req->rl_rdmabuf;
struct ib_sge *sge = sc->sc_sges;
if (!rpcrdma_dma_map_regbuf(ia, rb))
if (!rpcrdma_regbuf_dma_map(r_xprt, rb))
goto out_regbuf;
sge->addr = rdmab_addr(rb);
sge->length = len;
sge->lkey = rdmab_lkey(rb);
ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr,
sge->length, DMA_TO_DEVICE);
ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr, sge->length,
DMA_TO_DEVICE);
sc->sc_wr.num_sge++;
return true;
@ -563,23 +562,23 @@ out_regbuf:
/* Prepare the Send SGEs. The head and tail iovec, and each entry
* in the page list, gets its own SGE.
*/
static bool
rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
struct xdr_buf *xdr, enum rpcrdma_chunktype rtype)
static bool rpcrdma_prepare_msg_sges(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_req *req,
struct xdr_buf *xdr,
enum rpcrdma_chunktype rtype)
{
struct rpcrdma_sendctx *sc = req->rl_sendctx;
unsigned int sge_no, page_base, len, remaining;
struct rpcrdma_regbuf *rb = req->rl_sendbuf;
struct ib_device *device = ia->ri_device;
struct ib_sge *sge = sc->sc_sges;
u32 lkey = ia->ri_pd->local_dma_lkey;
struct page *page, **ppages;
/* The head iovec is straightforward, as it is already
* DMA-mapped. Sync the content that has changed.
*/
if (!rpcrdma_dma_map_regbuf(ia, rb))
if (!rpcrdma_regbuf_dma_map(r_xprt, rb))
goto out_regbuf;
sc->sc_device = rdmab_device(rb);
sge_no = 1;
sge[sge_no].addr = rdmab_addr(rb);
sge[sge_no].length = xdr->head[0].iov_len;
@ -626,13 +625,14 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
goto out_mapping_overflow;
len = min_t(u32, PAGE_SIZE - page_base, remaining);
sge[sge_no].addr = ib_dma_map_page(device, *ppages,
page_base, len,
DMA_TO_DEVICE);
if (ib_dma_mapping_error(device, sge[sge_no].addr))
sge[sge_no].addr =
ib_dma_map_page(rdmab_device(rb), *ppages,
page_base, len, DMA_TO_DEVICE);
if (ib_dma_mapping_error(rdmab_device(rb),
sge[sge_no].addr))
goto out_mapping_err;
sge[sge_no].length = len;
sge[sge_no].lkey = lkey;
sge[sge_no].lkey = rdmab_lkey(rb);
sc->sc_unmap_count++;
ppages++;
@ -653,13 +653,13 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
map_tail:
sge_no++;
sge[sge_no].addr = ib_dma_map_page(device, page,
page_base, len,
DMA_TO_DEVICE);
if (ib_dma_mapping_error(device, sge[sge_no].addr))
sge[sge_no].addr =
ib_dma_map_page(rdmab_device(rb), page, page_base, len,
DMA_TO_DEVICE);
if (ib_dma_mapping_error(rdmab_device(rb), sge[sge_no].addr))
goto out_mapping_err;
sge[sge_no].length = len;
sge[sge_no].lkey = lkey;
sge[sge_no].lkey = rdmab_lkey(rb);
sc->sc_unmap_count++;
}
@ -674,12 +674,12 @@ out_regbuf:
return false;
out_mapping_overflow:
rpcrdma_unmap_sendctx(sc);
rpcrdma_sendctx_unmap(sc);
pr_err("rpcrdma: too many Send SGEs (%u)\n", sge_no);
return false;
out_mapping_err:
rpcrdma_unmap_sendctx(sc);
rpcrdma_sendctx_unmap(sc);
trace_xprtrdma_dma_maperr(sge[sge_no].addr);
return false;
}
@ -699,7 +699,7 @@ rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_req *req, u32 hdrlen,
struct xdr_buf *xdr, enum rpcrdma_chunktype rtype)
{
req->rl_sendctx = rpcrdma_sendctx_get_locked(&r_xprt->rx_buf);
req->rl_sendctx = rpcrdma_sendctx_get_locked(r_xprt);
if (!req->rl_sendctx)
return -EAGAIN;
req->rl_sendctx->sc_wr.num_sge = 0;
@ -707,11 +707,11 @@ rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
req->rl_sendctx->sc_req = req;
__clear_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags);
if (!rpcrdma_prepare_hdr_sge(&r_xprt->rx_ia, req, hdrlen))
if (!rpcrdma_prepare_hdr_sge(r_xprt, req, hdrlen))
return -EIO;
if (rtype != rpcrdma_areadch)
if (!rpcrdma_prepare_msg_sges(&r_xprt->rx_ia, req, xdr, rtype))
if (!rpcrdma_prepare_msg_sges(r_xprt, req, xdr, rtype))
return -EIO;
return 0;
@ -747,8 +747,8 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
int ret;
rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
xdr_init_encode(xdr, &req->rl_hdrbuf,
req->rl_rdmabuf->rg_base, rqst);
xdr_init_encode(xdr, &req->rl_hdrbuf, rdmab_data(req->rl_rdmabuf),
rqst);
/* Fixed header fields */
ret = -EMSGSIZE;
@ -876,6 +876,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
return 0;
out_err:
trace_xprtrdma_marshal_failed(rqst, ret);
switch (ret) {
case -EAGAIN:
xprt_wait_for_buffer_space(rqst->rq_xprt);

View File

@ -261,7 +261,7 @@ static const struct rpc_xprt_ops xprt_rdma_bc_procs = {
.buf_alloc = xprt_rdma_bc_allocate,
.buf_free = xprt_rdma_bc_free,
.send_request = xprt_rdma_bc_send_request,
.set_retrans_timeout = xprt_set_retrans_timeout_def,
.wait_for_reply_request = xprt_wait_for_reply_request_def,
.close = xprt_rdma_bc_close,
.destroy = xprt_rdma_bc_put,
.print_stats = xprt_rdma_print_stats

View File

@ -68,9 +68,9 @@
* tunables
*/
static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRWR;
int xprt_rdma_pad_optimize;
@ -288,7 +288,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
cancel_delayed_work_sync(&r_xprt->rx_connect_worker);
rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia);
rpcrdma_ep_destroy(r_xprt);
rpcrdma_buffer_destroy(&r_xprt->rx_buf);
rpcrdma_ia_close(&r_xprt->rx_ia);
@ -311,10 +311,8 @@ static const struct rpc_timeout xprt_rdma_default_timeout = {
static struct rpc_xprt *
xprt_setup_rdma(struct xprt_create *args)
{
struct rpcrdma_create_data_internal cdata;
struct rpc_xprt *xprt;
struct rpcrdma_xprt *new_xprt;
struct rpcrdma_ep *new_ep;
struct sockaddr *sap;
int rc;
@ -349,40 +347,12 @@ xprt_setup_rdma(struct xprt_create *args)
xprt_set_bound(xprt);
xprt_rdma_format_addresses(xprt, sap);
cdata.max_requests = xprt_rdma_slot_table_entries;
cdata.rsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA write max */
cdata.wsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA read max */
cdata.inline_wsize = xprt_rdma_max_inline_write;
if (cdata.inline_wsize > cdata.wsize)
cdata.inline_wsize = cdata.wsize;
cdata.inline_rsize = xprt_rdma_max_inline_read;
if (cdata.inline_rsize > cdata.rsize)
cdata.inline_rsize = cdata.rsize;
/*
* Create new transport instance, which includes initialized
* o ia
* o endpoint
* o buffers
*/
new_xprt = rpcx_to_rdmax(xprt);
rc = rpcrdma_ia_open(new_xprt);
if (rc)
goto out1;
/*
* initialize and create ep
*/
new_xprt->rx_data = cdata;
new_ep = &new_xprt->rx_ep;
rc = rpcrdma_ep_create(&new_xprt->rx_ep,
&new_xprt->rx_ia, &new_xprt->rx_data);
rc = rpcrdma_ep_create(new_xprt);
if (rc)
goto out2;
@ -413,7 +383,7 @@ out4:
rpcrdma_buffer_destroy(&new_xprt->rx_buf);
rc = -ENODEV;
out3:
rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia);
rpcrdma_ep_destroy(new_xprt);
out2:
rpcrdma_ia_close(&new_xprt->rx_ia);
out1:
@ -585,52 +555,15 @@ xprt_rdma_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *rqst)
rpc_wake_up_next(&xprt->backlog);
}
static bool
rpcrdma_get_sendbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
size_t size, gfp_t flags)
static bool rpcrdma_check_regbuf(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_regbuf *rb, size_t size,
gfp_t flags)
{
struct rpcrdma_regbuf *rb;
if (req->rl_sendbuf && rdmab_length(req->rl_sendbuf) >= size)
return true;
rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, flags);
if (IS_ERR(rb))
return false;
rpcrdma_free_regbuf(req->rl_sendbuf);
r_xprt->rx_stats.hardway_register_count += size;
req->rl_sendbuf = rb;
return true;
}
/* The rq_rcv_buf is used only if a Reply chunk is necessary.
* The decision to use a Reply chunk is made later in
* rpcrdma_marshal_req. This buffer is registered at that time.
*
* Otherwise, the associated RPC Reply arrives in a separate
* Receive buffer, arbitrarily chosen by the HCA. The buffer
* allocated here for the RPC Reply is not utilized in that
* case. See rpcrdma_inline_fixup.
*
* A regbuf is used here to remember the buffer size.
*/
static bool
rpcrdma_get_recvbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
size_t size, gfp_t flags)
{
struct rpcrdma_regbuf *rb;
if (req->rl_recvbuf && rdmab_length(req->rl_recvbuf) >= size)
return true;
rb = rpcrdma_alloc_regbuf(size, DMA_NONE, flags);
if (IS_ERR(rb))
return false;
rpcrdma_free_regbuf(req->rl_recvbuf);
r_xprt->rx_stats.hardway_register_count += size;
req->rl_recvbuf = rb;
if (unlikely(rdmab_length(rb) < size)) {
if (!rpcrdma_regbuf_realloc(rb, size, flags))
return false;
r_xprt->rx_stats.hardway_register_count += size;
}
return true;
}
@ -655,13 +588,15 @@ xprt_rdma_allocate(struct rpc_task *task)
if (RPC_IS_SWAPPER(task))
flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
if (!rpcrdma_get_sendbuf(r_xprt, req, rqst->rq_callsize, flags))
if (!rpcrdma_check_regbuf(r_xprt, req->rl_sendbuf, rqst->rq_callsize,
flags))
goto out_fail;
if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags))
if (!rpcrdma_check_regbuf(r_xprt, req->rl_recvbuf, rqst->rq_rcvsize,
flags))
goto out_fail;
rqst->rq_buffer = req->rl_sendbuf->rg_base;
rqst->rq_rbuffer = req->rl_recvbuf->rg_base;
rqst->rq_buffer = rdmab_data(req->rl_sendbuf);
rqst->rq_rbuffer = rdmab_data(req->rl_recvbuf);
trace_xprtrdma_op_allocate(task, req);
return 0;
@ -815,7 +750,7 @@ static const struct rpc_xprt_ops xprt_rdma_procs = {
.alloc_slot = xprt_rdma_alloc_slot,
.free_slot = xprt_rdma_free_slot,
.release_request = xprt_release_rqst_cong, /* ditto */
.set_retrans_timeout = xprt_set_retrans_timeout_def, /* ditto */
.wait_for_reply_request = xprt_wait_for_reply_request_def, /* ditto */
.timer = xprt_rdma_timer,
.rpcbind = rpcb_getport_async, /* sunrpc/rpcb_clnt.c */
.set_port = xprt_rdma_set_port,

View File

@ -76,11 +76,16 @@
static void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc);
static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf);
static int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp);
static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb);
static struct rpcrdma_regbuf *
rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction,
gfp_t flags);
static void rpcrdma_regbuf_dma_unmap(struct rpcrdma_regbuf *rb);
static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb);
static void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp);
/* Wait for outstanding transport work to finish.
/* Wait for outstanding transport work to finish. ib_drain_qp
* handles the drains in the wrong order for us, so open code
* them here.
*/
static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt)
{
@ -132,11 +137,6 @@ rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
/* WARNING: Only wr_cqe and status are reliable at this point */
trace_xprtrdma_wc_send(sc, wc);
if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR)
pr_err("rpcrdma: Send: %s (%u/0x%x)\n",
ib_wc_status_msg(wc->status),
wc->status, wc->vendor_err);
rpcrdma_sendctx_put_locked(sc);
}
@ -174,10 +174,6 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
return;
out_flushed:
if (wc->status != IB_WC_WR_FLUSH_ERR)
pr_err("rpcrdma: Recv: %s (%u/0x%x)\n",
ib_wc_status_msg(wc->status),
wc->status, wc->vendor_err);
rpcrdma_recv_buffer_put(rep);
}
@ -185,7 +181,6 @@ static void
rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
struct rdma_conn_param *param)
{
struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
const struct rpcrdma_connect_private *pmsg = param->private_data;
unsigned int rsize, wsize;
@ -202,12 +197,13 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size);
}
if (rsize < cdata->inline_rsize)
cdata->inline_rsize = rsize;
if (wsize < cdata->inline_wsize)
cdata->inline_wsize = wsize;
dprintk("RPC: %s: max send %u, max recv %u\n",
__func__, cdata->inline_wsize, cdata->inline_rsize);
if (rsize < r_xprt->rx_ep.rep_inline_recv)
r_xprt->rx_ep.rep_inline_recv = rsize;
if (wsize < r_xprt->rx_ep.rep_inline_send)
r_xprt->rx_ep.rep_inline_send = wsize;
dprintk("RPC: %s: max send %u, max recv %u\n", __func__,
r_xprt->rx_ep.rep_inline_send,
r_xprt->rx_ep.rep_inline_recv);
rpcrdma_set_max_header_sizes(r_xprt);
}
@ -247,7 +243,7 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
case RDMA_CM_EVENT_DEVICE_REMOVAL:
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
pr_info("rpcrdma: removing device %s for %s:%s\n",
ia->ri_device->name,
ia->ri_id->device->name,
rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt));
#endif
set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags);
@ -256,7 +252,6 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
wait_for_completion(&ia->ri_remove_done);
ia->ri_id = NULL;
ia->ri_device = NULL;
/* Return 1 to ensure the core destroys the id. */
return 1;
case RDMA_CM_EVENT_ESTABLISHED:
@ -291,7 +286,7 @@ disconnected:
dprintk("RPC: %s: %s:%s on %s/frwr: %s\n", __func__,
rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt),
ia->ri_device->name, rdma_event_msg(event->event));
ia->ri_id->device->name, rdma_event_msg(event->event));
return 0;
}
@ -370,9 +365,8 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt)
rc = PTR_ERR(ia->ri_id);
goto out_err;
}
ia->ri_device = ia->ri_id->device;
ia->ri_pd = ib_alloc_pd(ia->ri_device, 0);
ia->ri_pd = ib_alloc_pd(ia->ri_id->device, 0);
if (IS_ERR(ia->ri_pd)) {
rc = PTR_ERR(ia->ri_pd);
pr_err("rpcrdma: ib_alloc_pd() returned %d\n", rc);
@ -381,12 +375,12 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt)
switch (xprt_rdma_memreg_strategy) {
case RPCRDMA_FRWR:
if (frwr_is_supported(ia))
if (frwr_is_supported(ia->ri_id->device))
break;
/*FALLTHROUGH*/
default:
pr_err("rpcrdma: Device %s does not support memreg mode %d\n",
ia->ri_device->name, xprt_rdma_memreg_strategy);
ia->ri_id->device->name, xprt_rdma_memreg_strategy);
rc = -EINVAL;
goto out_err;
}
@ -438,11 +432,11 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia)
* mappings and MRs are gone.
*/
list_for_each_entry(rep, &buf->rb_recv_bufs, rr_list)
rpcrdma_dma_unmap_regbuf(rep->rr_rdmabuf);
rpcrdma_regbuf_dma_unmap(rep->rr_rdmabuf);
list_for_each_entry(req, &buf->rb_allreqs, rl_all) {
rpcrdma_dma_unmap_regbuf(req->rl_rdmabuf);
rpcrdma_dma_unmap_regbuf(req->rl_sendbuf);
rpcrdma_dma_unmap_regbuf(req->rl_recvbuf);
rpcrdma_regbuf_dma_unmap(req->rl_rdmabuf);
rpcrdma_regbuf_dma_unmap(req->rl_sendbuf);
rpcrdma_regbuf_dma_unmap(req->rl_recvbuf);
}
rpcrdma_mrs_destroy(buf);
ib_dealloc_pd(ia->ri_pd);
@ -468,7 +462,6 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia)
rdma_destroy_id(ia->ri_id);
}
ia->ri_id = NULL;
ia->ri_device = NULL;
/* If the pd is still busy, xprtrdma missed freeing a resource */
if (ia->ri_pd && !IS_ERR(ia->ri_pd))
@ -476,19 +469,26 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia)
ia->ri_pd = NULL;
}
/*
* Create unconnected endpoint.
/**
* rpcrdma_ep_create - Create unconnected endpoint
* @r_xprt: transport to instantiate
*
* Returns zero on success, or a negative errno.
*/
int
rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
struct rpcrdma_create_data_internal *cdata)
int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_ep *ep = &r_xprt->rx_ep;
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private;
struct ib_cq *sendcq, *recvcq;
unsigned int max_sge;
int rc;
max_sge = min_t(unsigned int, ia->ri_device->attrs.max_send_sge,
ep->rep_max_requests = xprt_rdma_slot_table_entries;
ep->rep_inline_send = xprt_rdma_max_inline_write;
ep->rep_inline_recv = xprt_rdma_max_inline_read;
max_sge = min_t(unsigned int, ia->ri_id->device->attrs.max_send_sge,
RPCRDMA_MAX_SEND_SGES);
if (max_sge < RPCRDMA_MIN_SEND_SGES) {
pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge);
@ -496,7 +496,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
}
ia->ri_max_send_sges = max_sge;
rc = frwr_open(ia, ep, cdata);
rc = frwr_open(ia, ep);
if (rc)
return rc;
@ -518,23 +518,21 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
ep->rep_attr.cap.max_send_sge,
ep->rep_attr.cap.max_recv_sge);
/* set trigger for requesting send completion */
ep->rep_send_batch = min_t(unsigned int, RPCRDMA_MAX_SEND_BATCH,
cdata->max_requests >> 2);
ep->rep_send_batch = ep->rep_max_requests >> 3;
ep->rep_send_count = ep->rep_send_batch;
init_waitqueue_head(&ep->rep_connect_wait);
ep->rep_receive_count = 0;
sendcq = ib_alloc_cq(ia->ri_device, NULL,
sendcq = ib_alloc_cq(ia->ri_id->device, NULL,
ep->rep_attr.cap.max_send_wr + 1,
ia->ri_device->num_comp_vectors > 1 ? 1 : 0,
ia->ri_id->device->num_comp_vectors > 1 ? 1 : 0,
IB_POLL_WORKQUEUE);
if (IS_ERR(sendcq)) {
rc = PTR_ERR(sendcq);
goto out1;
}
recvcq = ib_alloc_cq(ia->ri_device, NULL,
recvcq = ib_alloc_cq(ia->ri_id->device, NULL,
ep->rep_attr.cap.max_recv_wr + 1,
0, IB_POLL_WORKQUEUE);
if (IS_ERR(recvcq)) {
@ -552,15 +550,15 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
pmsg->cp_magic = rpcrdma_cmp_magic;
pmsg->cp_version = RPCRDMA_CMP_VERSION;
pmsg->cp_flags |= RPCRDMA_CMP_F_SND_W_INV_OK;
pmsg->cp_send_size = rpcrdma_encode_buffer_size(cdata->inline_wsize);
pmsg->cp_recv_size = rpcrdma_encode_buffer_size(cdata->inline_rsize);
pmsg->cp_send_size = rpcrdma_encode_buffer_size(ep->rep_inline_send);
pmsg->cp_recv_size = rpcrdma_encode_buffer_size(ep->rep_inline_recv);
ep->rep_remote_cma.private_data = pmsg;
ep->rep_remote_cma.private_data_len = sizeof(*pmsg);
/* Client offers RDMA Read but does not initiate */
ep->rep_remote_cma.initiator_depth = 0;
ep->rep_remote_cma.responder_resources =
min_t(int, U8_MAX, ia->ri_device->attrs.max_qp_rd_atom);
min_t(int, U8_MAX, ia->ri_id->device->attrs.max_qp_rd_atom);
/* Limit transport retries so client can detect server
* GID changes quickly. RPC layer handles re-establishing
@ -583,16 +581,16 @@ out1:
return rc;
}
/*
* rpcrdma_ep_destroy
/**
* rpcrdma_ep_destroy - Disconnect and destroy endpoint.
* @r_xprt: transport instance to shut down
*
* Disconnect and destroy endpoint. After this, the only
* valid operations on the ep are to free it (if dynamically
* allocated) or re-create it.
*/
void
rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
void rpcrdma_ep_destroy(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_ep *ep = &r_xprt->rx_ep;
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
if (ia->ri_id && ia->ri_id->qp) {
rpcrdma_ep_disconnect(ep, ia);
rdma_destroy_qp(ia->ri_id);
@ -622,7 +620,7 @@ rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt,
goto out1;
rc = -ENOMEM;
err = rpcrdma_ep_create(ep, ia, &r_xprt->rx_data);
err = rpcrdma_ep_create(r_xprt);
if (err) {
pr_err("rpcrdma: rpcrdma_ep_create returned %d\n", err);
goto out2;
@ -639,7 +637,7 @@ rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt,
return 0;
out3:
rpcrdma_ep_destroy(ep, ia);
rpcrdma_ep_destroy(r_xprt);
out2:
rpcrdma_ia_close(ia);
out1:
@ -672,7 +670,7 @@ rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep,
*/
old = id;
rc = -ENETUNREACH;
if (ia->ri_device != id->device) {
if (ia->ri_id->device != id->device) {
pr_err("rpcrdma: can't reconnect on different device!\n");
goto out_destroy;
}
@ -796,8 +794,8 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
*/
/* rpcrdma_sendctxs_destroy() assumes caller has already quiesced
* queue activity, and ib_drain_qp has flushed all remaining Send
* requests.
* queue activity, and rpcrdma_xprt_drain has flushed all remaining
* Send requests.
*/
static void rpcrdma_sendctxs_destroy(struct rpcrdma_buffer *buf)
{
@ -867,20 +865,20 @@ static unsigned long rpcrdma_sendctx_next(struct rpcrdma_buffer *buf,
/**
* rpcrdma_sendctx_get_locked - Acquire a send context
* @buf: transport buffers from which to acquire an unused context
* @r_xprt: controlling transport instance
*
* Returns pointer to a free send completion context; or NULL if
* the queue is empty.
*
* Usage: Called to acquire an SGE array before preparing a Send WR.
*
* The caller serializes calls to this function (per rpcrdma_buffer),
* and provides an effective memory barrier that flushes the new value
* The caller serializes calls to this function (per transport), and
* provides an effective memory barrier that flushes the new value
* of rb_sc_head.
*/
struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf)
struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_xprt *r_xprt;
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_sendctx *sc;
unsigned long next_head;
@ -905,7 +903,6 @@ out_emptyq:
* backing up. Cause the caller to pause and try again.
*/
set_bit(RPCRDMA_BUF_F_EMPTY_SCQ, &buf->rb_flags);
r_xprt = container_of(buf, struct rpcrdma_xprt, rx_buf);
r_xprt->rx_stats.empty_sendctx_q++;
return NULL;
}
@ -917,7 +914,7 @@ out_emptyq:
* Usage: Called from Send completion to return a sendctxt
* to the queue.
*
* The caller serializes calls to this function (per rpcrdma_buffer).
* The caller serializes calls to this function (per transport).
*/
static void
rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc)
@ -925,7 +922,7 @@ rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc)
struct rpcrdma_buffer *buf = &sc->sc_xprt->rx_buf;
unsigned long next_tail;
/* Unmap SGEs of previously completed by unsignaled
/* Unmap SGEs of previously completed but unsignaled
* Sends by walking up the queue until @sc is found.
*/
next_tail = buf->rb_sc_tail;
@ -933,7 +930,7 @@ rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc)
next_tail = rpcrdma_sendctx_next(buf, next_tail);
/* ORDER: item must be accessed _before_ tail is updated */
rpcrdma_unmap_sendctx(buf->rb_sc_ctxs[next_tail]);
rpcrdma_sendctx_unmap(buf->rb_sc_ctxs[next_tail]);
} while (buf->rb_sc_ctxs[next_tail] != sc);
@ -996,54 +993,70 @@ rpcrdma_mr_refresh_worker(struct work_struct *work)
rpcrdma_mrs_create(r_xprt);
}
struct rpcrdma_req *
rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
/**
* rpcrdma_req_create - Allocate an rpcrdma_req object
* @r_xprt: controlling r_xprt
* @size: initial size, in bytes, of send and receive buffers
* @flags: GFP flags passed to memory allocators
*
* Returns an allocated and fully initialized rpcrdma_req or NULL.
*/
struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size,
gfp_t flags)
{
struct rpcrdma_buffer *buffer = &r_xprt->rx_buf;
struct rpcrdma_regbuf *rb;
struct rpcrdma_req *req;
req = kzalloc(sizeof(*req), GFP_KERNEL);
req = kzalloc(sizeof(*req), flags);
if (req == NULL)
return ERR_PTR(-ENOMEM);
goto out1;
rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE,
DMA_TO_DEVICE, GFP_KERNEL);
if (IS_ERR(rb)) {
kfree(req);
return ERR_PTR(-ENOMEM);
}
rb = rpcrdma_regbuf_alloc(RPCRDMA_HDRBUF_SIZE, DMA_TO_DEVICE, flags);
if (!rb)
goto out2;
req->rl_rdmabuf = rb;
xdr_buf_init(&req->rl_hdrbuf, rb->rg_base, rdmab_length(rb));
xdr_buf_init(&req->rl_hdrbuf, rdmab_data(rb), rdmab_length(rb));
req->rl_sendbuf = rpcrdma_regbuf_alloc(size, DMA_TO_DEVICE, flags);
if (!req->rl_sendbuf)
goto out3;
req->rl_recvbuf = rpcrdma_regbuf_alloc(size, DMA_NONE, flags);
if (!req->rl_recvbuf)
goto out4;
req->rl_buffer = buffer;
INIT_LIST_HEAD(&req->rl_registered);
spin_lock(&buffer->rb_lock);
list_add(&req->rl_all, &buffer->rb_allreqs);
spin_unlock(&buffer->rb_lock);
return req;
out4:
kfree(req->rl_sendbuf);
out3:
kfree(req->rl_rdmabuf);
out2:
kfree(req);
out1:
return NULL;
}
static int
rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp)
static bool rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, bool temp)
{
struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_rep *rep;
int rc;
rc = -ENOMEM;
rep = kzalloc(sizeof(*rep), GFP_KERNEL);
if (rep == NULL)
goto out;
rep->rr_rdmabuf = rpcrdma_alloc_regbuf(cdata->inline_rsize,
rep->rr_rdmabuf = rpcrdma_regbuf_alloc(r_xprt->rx_ep.rep_inline_recv,
DMA_FROM_DEVICE, GFP_KERNEL);
if (IS_ERR(rep->rr_rdmabuf)) {
rc = PTR_ERR(rep->rr_rdmabuf);
if (!rep->rr_rdmabuf)
goto out_free;
}
xdr_buf_init(&rep->rr_hdrbuf, rep->rr_rdmabuf->rg_base,
xdr_buf_init(&rep->rr_hdrbuf, rdmab_data(rep->rr_rdmabuf),
rdmab_length(rep->rr_rdmabuf));
rep->rr_cqe.done = rpcrdma_wc_receive;
@ -1058,22 +1071,27 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp)
spin_lock(&buf->rb_lock);
list_add(&rep->rr_list, &buf->rb_recv_bufs);
spin_unlock(&buf->rb_lock);
return 0;
return true;
out_free:
kfree(rep);
out:
return rc;
return false;
}
int
rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
/**
* rpcrdma_buffer_create - Create initial set of req/rep objects
* @r_xprt: transport instance to (re)initialize
*
* Returns zero on success, otherwise a negative errno.
*/
int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
int i, rc;
buf->rb_flags = 0;
buf->rb_max_requests = r_xprt->rx_data.max_requests;
buf->rb_max_requests = r_xprt->rx_ep.rep_max_requests;
buf->rb_bc_srv_max_requests = 0;
spin_lock_init(&buf->rb_mrlock);
spin_lock_init(&buf->rb_lock);
@ -1086,16 +1104,15 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
INIT_LIST_HEAD(&buf->rb_send_bufs);
INIT_LIST_HEAD(&buf->rb_allreqs);
rc = -ENOMEM;
for (i = 0; i < buf->rb_max_requests; i++) {
struct rpcrdma_req *req;
req = rpcrdma_create_req(r_xprt);
if (IS_ERR(req)) {
dprintk("RPC: %s: request buffer %d alloc"
" failed\n", __func__, i);
rc = PTR_ERR(req);
req = rpcrdma_req_create(r_xprt, RPCRDMA_V1_DEF_INLINE_SIZE,
GFP_KERNEL);
if (!req)
goto out;
}
list_add(&req->rl_list, &buf->rb_send_bufs);
}
@ -1121,10 +1138,9 @@ out:
return rc;
}
static void
rpcrdma_destroy_rep(struct rpcrdma_rep *rep)
static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
{
rpcrdma_free_regbuf(rep->rr_rdmabuf);
rpcrdma_regbuf_free(rep->rr_rdmabuf);
kfree(rep);
}
@ -1140,9 +1156,9 @@ rpcrdma_req_destroy(struct rpcrdma_req *req)
{
list_del(&req->rl_all);
rpcrdma_free_regbuf(req->rl_recvbuf);
rpcrdma_free_regbuf(req->rl_sendbuf);
rpcrdma_free_regbuf(req->rl_rdmabuf);
rpcrdma_regbuf_free(req->rl_recvbuf);
rpcrdma_regbuf_free(req->rl_sendbuf);
rpcrdma_regbuf_free(req->rl_rdmabuf);
kfree(req);
}
@ -1180,7 +1196,7 @@ rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf)
* rpcrdma_buffer_destroy - Release all hw resources
* @buf: root control block for resources
*
* ORDERING: relies on a prior ib_drain_qp :
* ORDERING: relies on a prior rpcrdma_xprt_drain :
* - No more Send or Receive completions can occur
* - All MRs, reps, and reqs are returned to their free lists
*/
@ -1202,7 +1218,7 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
rep = list_first_entry(&buf->rb_recv_bufs,
struct rpcrdma_rep, rr_list);
list_del(&rep->rr_list);
rpcrdma_destroy_rep(rep);
rpcrdma_rep_destroy(rep);
}
while (!list_empty(&buf->rb_send_bufs)) {
@ -1281,7 +1297,7 @@ rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr)
if (mr->mr_dir != DMA_NONE) {
trace_xprtrdma_mr_unmap(mr);
ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
ib_dma_unmap_sg(r_xprt->rx_ia.ri_id->device,
mr->mr_sg, mr->mr_nents, mr->mr_dir);
mr->mr_dir = DMA_NONE;
}
@ -1331,7 +1347,7 @@ rpcrdma_buffer_put(struct rpcrdma_req *req)
}
spin_unlock(&buffers->rb_lock);
if (rep)
rpcrdma_destroy_rep(rep);
rpcrdma_rep_destroy(rep);
}
/*
@ -1348,69 +1364,90 @@ rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
list_add(&rep->rr_list, &buffers->rb_recv_bufs);
spin_unlock(&buffers->rb_lock);
} else {
rpcrdma_destroy_rep(rep);
rpcrdma_rep_destroy(rep);
}
}
/**
* rpcrdma_alloc_regbuf - allocate and DMA-map memory for SEND/RECV buffers
* @size: size of buffer to be allocated, in bytes
* @direction: direction of data movement
* @flags: GFP flags
*
* Returns an ERR_PTR, or a pointer to a regbuf, a buffer that
* can be persistently DMA-mapped for I/O.
/* Returns a pointer to a rpcrdma_regbuf object, or NULL.
*
* xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for
* receiving the payload of RDMA RECV operations. During Long Calls
* or Replies they may be registered externally via frwr_map.
*/
struct rpcrdma_regbuf *
rpcrdma_alloc_regbuf(size_t size, enum dma_data_direction direction,
static struct rpcrdma_regbuf *
rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction,
gfp_t flags)
{
struct rpcrdma_regbuf *rb;
rb = kmalloc(sizeof(*rb) + size, flags);
if (rb == NULL)
return ERR_PTR(-ENOMEM);
rb = kmalloc(sizeof(*rb), flags);
if (!rb)
return NULL;
rb->rg_data = kmalloc(size, flags);
if (!rb->rg_data) {
kfree(rb);
return NULL;
}
rb->rg_device = NULL;
rb->rg_direction = direction;
rb->rg_iov.length = size;
return rb;
}
/**
* __rpcrdma_map_regbuf - DMA-map a regbuf
* @ia: controlling rpcrdma_ia
* @rb: regbuf to be mapped
* rpcrdma_regbuf_realloc - re-allocate a SEND/RECV buffer
* @rb: regbuf to reallocate
* @size: size of buffer to be allocated, in bytes
* @flags: GFP flags
*
* Returns true if reallocation was successful. If false is
* returned, @rb is left untouched.
*/
bool
__rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size, gfp_t flags)
{
struct ib_device *device = ia->ri_device;
void *buf;
buf = kmalloc(size, flags);
if (!buf)
return false;
rpcrdma_regbuf_dma_unmap(rb);
kfree(rb->rg_data);
rb->rg_data = buf;
rb->rg_iov.length = size;
return true;
}
/**
* __rpcrdma_regbuf_dma_map - DMA-map a regbuf
* @r_xprt: controlling transport instance
* @rb: regbuf to be mapped
*
* Returns true if the buffer is now DMA mapped to @r_xprt's device
*/
bool __rpcrdma_regbuf_dma_map(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_regbuf *rb)
{
struct ib_device *device = r_xprt->rx_ia.ri_id->device;
if (rb->rg_direction == DMA_NONE)
return false;
rb->rg_iov.addr = ib_dma_map_single(device,
(void *)rb->rg_base,
rdmab_length(rb),
rb->rg_direction);
rb->rg_iov.addr = ib_dma_map_single(device, rdmab_data(rb),
rdmab_length(rb), rb->rg_direction);
if (ib_dma_mapping_error(device, rdmab_addr(rb))) {
trace_xprtrdma_dma_maperr(rdmab_addr(rb));
return false;
}
rb->rg_device = device;
rb->rg_iov.lkey = ia->ri_pd->local_dma_lkey;
rb->rg_iov.lkey = r_xprt->rx_ia.ri_pd->local_dma_lkey;
return true;
}
static void
rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb)
static void rpcrdma_regbuf_dma_unmap(struct rpcrdma_regbuf *rb)
{
if (!rb)
return;
@ -1418,19 +1455,16 @@ rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb)
if (!rpcrdma_regbuf_is_mapped(rb))
return;
ib_dma_unmap_single(rb->rg_device, rdmab_addr(rb),
rdmab_length(rb), rb->rg_direction);
ib_dma_unmap_single(rb->rg_device, rdmab_addr(rb), rdmab_length(rb),
rb->rg_direction);
rb->rg_device = NULL;
}
/**
* rpcrdma_free_regbuf - deregister and free registered buffer
* @rb: regbuf to be deregistered and freed
*/
void
rpcrdma_free_regbuf(struct rpcrdma_regbuf *rb)
static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb)
{
rpcrdma_dma_unmap_regbuf(rb);
rpcrdma_regbuf_dma_unmap(rb);
if (rb)
kfree(rb->rg_data);
kfree(rb);
}
@ -1497,17 +1531,15 @@ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
list_del(&rep->rr_list);
spin_unlock(&buf->rb_lock);
if (!rep) {
if (rpcrdma_create_rep(r_xprt, temp))
if (!rpcrdma_rep_create(r_xprt, temp))
break;
continue;
}
rb = rep->rr_rdmabuf;
if (!rpcrdma_regbuf_is_mapped(rb)) {
if (!__rpcrdma_dma_map_regbuf(&r_xprt->rx_ia, rb)) {
rpcrdma_recv_buffer_put(rep);
break;
}
if (!rpcrdma_regbuf_dma_map(r_xprt, rb)) {
rpcrdma_recv_buffer_put(rep);
break;
}
trace_xprtrdma_post_recv(rep->rr_recv_wr.wr_cqe);

View File

@ -66,20 +66,17 @@
* Interface Adapter -- one per transport instance
*/
struct rpcrdma_ia {
struct ib_device *ri_device;
struct rdma_cm_id *ri_id;
struct ib_pd *ri_pd;
struct completion ri_done;
struct completion ri_remove_done;
int ri_async_rc;
unsigned int ri_max_segs;
unsigned int ri_max_frwr_depth;
unsigned int ri_max_inline_write;
unsigned int ri_max_inline_read;
unsigned int ri_max_send_sges;
bool ri_implicit_roundup;
enum ib_mr_type ri_mrtype;
unsigned long ri_flags;
struct completion ri_done;
struct completion ri_remove_done;
};
enum {
@ -93,22 +90,29 @@ enum {
struct rpcrdma_ep {
unsigned int rep_send_count;
unsigned int rep_send_batch;
unsigned int rep_max_inline_send;
unsigned int rep_max_inline_recv;
int rep_connected;
struct ib_qp_init_attr rep_attr;
wait_queue_head_t rep_connect_wait;
struct rpcrdma_connect_private rep_cm_private;
struct rdma_conn_param rep_remote_cma;
unsigned int rep_max_requests; /* set by /proc */
unsigned int rep_inline_send; /* negotiated */
unsigned int rep_inline_recv; /* negotiated */
int rep_receive_count;
};
/* Pre-allocate extra Work Requests for handling backward receives
* and sends. This is a fixed value because the Work Queues are
* allocated when the forward channel is set up.
* allocated when the forward channel is set up, long before the
* backchannel is provisioned. This value is two times
* NFS4_DEF_CB_SLOT_TABLE_SIZE.
*/
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
#define RPCRDMA_BACKWARD_WRS (8)
#define RPCRDMA_BACKWARD_WRS (32)
#else
#define RPCRDMA_BACKWARD_WRS (0)
#define RPCRDMA_BACKWARD_WRS (0)
#endif
/* Registered buffer -- registered kmalloc'd memory for RDMA SEND/RECV
@ -121,33 +125,34 @@ struct rpcrdma_regbuf {
struct ib_sge rg_iov;
struct ib_device *rg_device;
enum dma_data_direction rg_direction;
__be32 rg_base[0] __attribute__ ((aligned(256)));
void *rg_data;
};
static inline u64
rdmab_addr(struct rpcrdma_regbuf *rb)
static inline u64 rdmab_addr(struct rpcrdma_regbuf *rb)
{
return rb->rg_iov.addr;
}
static inline u32
rdmab_length(struct rpcrdma_regbuf *rb)
static inline u32 rdmab_length(struct rpcrdma_regbuf *rb)
{
return rb->rg_iov.length;
}
static inline u32
rdmab_lkey(struct rpcrdma_regbuf *rb)
static inline u32 rdmab_lkey(struct rpcrdma_regbuf *rb)
{
return rb->rg_iov.lkey;
}
static inline struct ib_device *
rdmab_device(struct rpcrdma_regbuf *rb)
static inline struct ib_device *rdmab_device(struct rpcrdma_regbuf *rb)
{
return rb->rg_device;
}
static inline void *rdmab_data(const struct rpcrdma_regbuf *rb)
{
return rb->rg_data;
}
#define RPCRDMA_DEF_GFP (GFP_NOIO | __GFP_NOWARN)
/* To ensure a transport can always make forward progress,
@ -222,34 +227,18 @@ struct rpcrdma_xprt;
struct rpcrdma_sendctx {
struct ib_send_wr sc_wr;
struct ib_cqe sc_cqe;
struct ib_device *sc_device;
struct rpcrdma_xprt *sc_xprt;
struct rpcrdma_req *sc_req;
unsigned int sc_unmap_count;
struct ib_sge sc_sges[];
};
/* Limit the number of SGEs that can be unmapped during one
* Send completion. This caps the amount of work a single
* completion can do before returning to the provider.
*
* Setting this to zero disables Send completion batching.
*/
enum {
RPCRDMA_MAX_SEND_BATCH = 7,
};
/*
* struct rpcrdma_mr - external memory region metadata
*
* An external memory region is any buffer or page that is registered
* on the fly (ie, not pre-registered).
*
* Each rpcrdma_buffer has a list of free MWs anchored in rb_mrs. During
* call_allocate, rpcrdma_buffer_get() assigns one to each segment in
* an rpcrdma_req. Then rpcrdma_register_external() grabs these to keep
* track of registration metadata while each RPC is pending.
* rpcrdma_deregister_external() uses this metadata to unmap and
* release these resources when an RPC is complete.
*/
enum rpcrdma_frwr_state {
FRWR_IS_INVALID, /* ready to be used */
@ -418,20 +407,6 @@ enum {
RPCRDMA_BUF_F_EMPTY_SCQ = 0,
};
/*
* Internal structure for transport instance creation. This
* exists primarily for modularity.
*
* This data should be set with mount options
*/
struct rpcrdma_create_data_internal {
unsigned int max_requests; /* max requests (slots) in flight */
unsigned int rsize; /* mount rsize - max read hdr+data */
unsigned int wsize; /* mount wsize - max write hdr+data */
unsigned int inline_rsize; /* max non-rdma read data payload */
unsigned int inline_wsize; /* max non-rdma write data payload */
};
/*
* Statistics for RPCRDMA
*/
@ -476,13 +451,11 @@ struct rpcrdma_xprt {
struct rpcrdma_ia rx_ia;
struct rpcrdma_ep rx_ep;
struct rpcrdma_buffer rx_buf;
struct rpcrdma_create_data_internal rx_data;
struct delayed_work rx_connect_worker;
struct rpcrdma_stats rx_stats;
};
#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, rx_xprt)
#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data)
static inline const char *
rpcrdma_addrstr(const struct rpcrdma_xprt *r_xprt)
@ -516,9 +489,8 @@ void rpcrdma_ia_close(struct rpcrdma_ia *);
/*
* Endpoint calls - xprtrdma/verbs.c
*/
int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *,
struct rpcrdma_create_data_internal *);
void rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *);
int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt);
void rpcrdma_ep_destroy(struct rpcrdma_xprt *r_xprt);
int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *);
void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *);
@ -528,11 +500,12 @@ int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *,
/*
* Buffer calls - xprtrdma/verbs.c
*/
struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *);
struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size,
gfp_t flags);
void rpcrdma_req_destroy(struct rpcrdma_req *req);
int rpcrdma_buffer_create(struct rpcrdma_xprt *);
void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf);
struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_xprt *r_xprt);
struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt);
void rpcrdma_mr_put(struct rpcrdma_mr *mr);
@ -548,23 +521,34 @@ struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
void rpcrdma_buffer_put(struct rpcrdma_req *);
void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(size_t, enum dma_data_direction,
gfp_t);
bool __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *, struct rpcrdma_regbuf *);
void rpcrdma_free_regbuf(struct rpcrdma_regbuf *);
bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size,
gfp_t flags);
bool __rpcrdma_regbuf_dma_map(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_regbuf *rb);
static inline bool
rpcrdma_regbuf_is_mapped(struct rpcrdma_regbuf *rb)
/**
* rpcrdma_regbuf_is_mapped - check if buffer is DMA mapped
*
* Returns true if the buffer is now mapped to rb->rg_device.
*/
static inline bool rpcrdma_regbuf_is_mapped(struct rpcrdma_regbuf *rb)
{
return rb->rg_device != NULL;
}
static inline bool
rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
/**
* rpcrdma_regbuf_dma_map - DMA-map a regbuf
* @r_xprt: controlling transport instance
* @rb: regbuf to be mapped
*
* Returns true if the buffer is currently DMA mapped.
*/
static inline bool rpcrdma_regbuf_dma_map(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_regbuf *rb)
{
if (likely(rpcrdma_regbuf_is_mapped(rb)))
return true;
return __rpcrdma_dma_map_regbuf(ia, rb);
return __rpcrdma_regbuf_dma_map(r_xprt, rb);
}
/*
@ -579,9 +563,8 @@ rpcrdma_data_dir(bool writing)
/* Memory registration calls xprtrdma/frwr_ops.c
*/
bool frwr_is_supported(struct rpcrdma_ia *);
int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
struct rpcrdma_create_data_internal *cdata);
bool frwr_is_supported(struct ib_device *device);
int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep);
int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr);
void frwr_release_mr(struct rpcrdma_mr *mr);
size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt);
@ -610,7 +593,7 @@ int rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_req *req, u32 hdrlen,
struct xdr_buf *xdr,
enum rpcrdma_chunktype rtype);
void rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc);
void rpcrdma_sendctx_unmap(struct rpcrdma_sendctx *sc);
int rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst);
void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *);
void rpcrdma_complete_rqst(struct rpcrdma_rep *rep);
@ -627,7 +610,9 @@ static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len)
/* RPC/RDMA module init - xprtrdma/transport.c
*/
extern unsigned int xprt_rdma_slot_table_entries;
extern unsigned int xprt_rdma_max_inline_read;
extern unsigned int xprt_rdma_max_inline_write;
void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap);
void xprt_rdma_free_addresses(struct rpc_xprt *xprt);
void xprt_rdma_close(struct rpc_xprt *xprt);

View File

@ -2017,6 +2017,7 @@ static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task)
* we'll need to figure out how to pass a namespace to
* connect.
*/
task->tk_rpc_status = -ENOTCONN;
rpc_exit(task, -ENOTCONN);
return;
}
@ -2690,7 +2691,7 @@ static const struct rpc_xprt_ops xs_local_ops = {
.buf_free = rpc_free,
.prepare_request = xs_stream_prepare_request,
.send_request = xs_local_send_request,
.set_retrans_timeout = xprt_set_retrans_timeout_def,
.wait_for_reply_request = xprt_wait_for_reply_request_def,
.close = xs_close,
.destroy = xs_destroy,
.print_stats = xs_local_print_stats,
@ -2710,7 +2711,7 @@ static const struct rpc_xprt_ops xs_udp_ops = {
.buf_alloc = rpc_malloc,
.buf_free = rpc_free,
.send_request = xs_udp_send_request,
.set_retrans_timeout = xprt_set_retrans_timeout_rtt,
.wait_for_reply_request = xprt_wait_for_reply_request_rtt,
.timer = xs_udp_timer,
.release_request = xprt_release_rqst_cong,
.close = xs_close,
@ -2733,7 +2734,7 @@ static const struct rpc_xprt_ops xs_tcp_ops = {
.buf_free = rpc_free,
.prepare_request = xs_stream_prepare_request,
.send_request = xs_tcp_send_request,
.set_retrans_timeout = xprt_set_retrans_timeout_def,
.wait_for_reply_request = xprt_wait_for_reply_request_def,
.close = xs_tcp_shutdown,
.destroy = xs_destroy,
.set_connect_timeout = xs_tcp_set_connect_timeout,
@ -2761,7 +2762,7 @@ static const struct rpc_xprt_ops bc_tcp_ops = {
.buf_alloc = bc_malloc,
.buf_free = bc_free,
.send_request = bc_send_request,
.set_retrans_timeout = xprt_set_retrans_timeout_def,
.wait_for_reply_request = xprt_wait_for_reply_request_def,
.close = bc_close,
.destroy = bc_destroy,
.print_stats = xs_tcp_print_stats,